1 /* $Id: roff.c,v 1.405 2025/04/08 14:05:09 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2025 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /* Maximum number of string expansions per line, to break infinite loops. */
44 #define EXPAND_LIMIT 1000
45
46 /* Types of definitions of macros and strings. */
47 #define ROFFDEF_USER (1 << 1) /* User-defined. */
48 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
49 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
50 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
51 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
52 ROFFDEF_REN | ROFFDEF_STD)
53 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
54
55 /* --- data types --------------------------------------------------------- */
56
57 /*
58 * An incredibly-simple string buffer.
59 */
60 struct roffstr {
61 char *p; /* nil-terminated buffer */
62 size_t sz; /* saved strlen(p) */
63 };
64
65 /*
66 * A key-value roffstr pair as part of a singly-linked list.
67 */
68 struct roffkv {
69 struct roffstr key;
70 struct roffstr val;
71 struct roffkv *next; /* next in list */
72 };
73
74 /*
75 * A single number register as part of a singly-linked list.
76 */
77 struct roffreg {
78 struct roffstr key;
79 int val;
80 int step;
81 struct roffreg *next;
82 };
83
84 /*
85 * Association of request and macro names with token IDs.
86 */
87 struct roffreq {
88 enum roff_tok tok;
89 char name[];
90 };
91
92 /*
93 * A macro processing context.
94 * More than one is needed when macro calls are nested.
95 */
96 struct mctx {
97 char **argv;
98 int argc;
99 int argsz;
100 };
101
102 struct roff {
103 struct roff_man *man; /* mdoc or man parser */
104 struct roffnode *last; /* leaf of stack */
105 struct mctx *mstack; /* stack of macro contexts */
106 int *rstack; /* stack of inverted `ie' values */
107 struct ohash *reqtab; /* request lookup table */
108 struct roffreg *regtab; /* number registers */
109 struct roffkv *strtab; /* user-defined strings & macros */
110 struct roffkv *rentab; /* renamed strings & macros */
111 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
112 struct roffstr *xtab; /* single-byte trans table (`tr') */
113 const char *current_string; /* value of last called user macro */
114 struct tbl_node *first_tbl; /* first table parsed */
115 struct tbl_node *last_tbl; /* last table parsed */
116 struct tbl_node *tbl; /* current table being parsed */
117 struct eqn_node *last_eqn; /* equation parser */
118 struct eqn_node *eqn; /* active equation parser */
119 int eqn_inline; /* current equation is inline */
120 int options; /* parse options */
121 int mstacksz; /* current size of mstack */
122 int mstackpos; /* position in mstack */
123 int rstacksz; /* current size limit of rstack */
124 int rstackpos; /* position in rstack */
125 int format; /* current file in mdoc or man format */
126 char control; /* control character */
127 char escape; /* escape character */
128 };
129
130 /*
131 * A macro definition, condition, or ignored block.
132 */
133 struct roffnode {
134 enum roff_tok tok; /* type of node */
135 struct roffnode *parent; /* up one in stack */
136 int line; /* parse line */
137 int col; /* parse col */
138 char *name; /* node name, e.g. macro name */
139 char *end; /* custom end macro of the block */
140 int endspan; /* scope to: 1=eol 2=next line -1=\} */
141 int rule; /* content is: 1=evaluated 0=skipped */
142 };
143
144 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
145 enum roff_tok tok, /* tok of macro */ \
146 struct buf *buf, /* input buffer */ \
147 int ln, /* parse line */ \
148 int ppos, /* original pos in buffer */ \
149 int pos, /* current pos in buffer */ \
150 int *offs /* reset offset of buffer data */
151
152 typedef int (*roffproc)(ROFF_ARGS);
153
154 struct roffmac {
155 roffproc proc; /* process new macro */
156 roffproc text; /* process as child text of macro */
157 roffproc sub; /* process as child of macro */
158 int flags;
159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
160 };
161
162 struct predef {
163 const char *name; /* predefined input name */
164 const char *str; /* replacement symbol */
165 };
166
167 #define PREDEF(__name, __str) \
168 { (__name), (__str) },
169
170 /* --- function prototypes ------------------------------------------------ */
171
172 static int roffnode_cleanscope(struct roff *);
173 static int roffnode_pop(struct roff *);
174 static void roffnode_push(struct roff *, enum roff_tok,
175 const char *, int, int);
176 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
177 static int roff_als(ROFF_ARGS);
178 static int roff_block(ROFF_ARGS);
179 static int roff_block_text(ROFF_ARGS);
180 static int roff_block_sub(ROFF_ARGS);
181 static int roff_break(ROFF_ARGS);
182 static int roff_cblock(ROFF_ARGS);
183 static int roff_cc(ROFF_ARGS);
184 static int roff_ccond(struct roff *, int, int);
185 static int roff_char(ROFF_ARGS);
186 static int roff_cond(ROFF_ARGS);
187 static int roff_cond_checkend(ROFF_ARGS);
188 static int roff_cond_text(ROFF_ARGS);
189 static int roff_cond_sub(ROFF_ARGS);
190 static int roff_ds(ROFF_ARGS);
191 static int roff_ec(ROFF_ARGS);
192 static int roff_eo(ROFF_ARGS);
193 static int roff_eqndelim(struct roff *, struct buf *, int);
194 static int roff_evalcond(struct roff *, int, char *, int *);
195 static int roff_evalpar(int, const char *, int *, int *,
196 char, int);
197 static int roff_evalstrcond(const char *, int *);
198 static int roff_expand(struct roff *, struct buf *,
199 int, int, char);
200 static void roff_expand_patch(struct buf *, int,
201 const char *, int);
202 static void roff_free1(struct roff *);
203 static void roff_freereg(struct roffreg *);
204 static void roff_freestr(struct roffkv *);
205 static size_t roff_getname(char **, int, int);
206 static int roff_getnum(const char *, int *, int *, char, int);
207 static int roff_getop(const char *, int *, char *);
208 static int roff_getregn(struct roff *,
209 const char *, size_t, char);
210 static int roff_getregro(const struct roff *,
211 const char *name);
212 static const char *roff_getstrn(struct roff *,
213 const char *, size_t, int *);
214 static int roff_hasregn(const struct roff *,
215 const char *, size_t);
216 static int roff_insec(ROFF_ARGS);
217 static int roff_it(ROFF_ARGS);
218 static int roff_line_ignore(ROFF_ARGS);
219 static void roff_man_alloc1(struct roff_man *);
220 static void roff_man_free1(struct roff_man *);
221 static int roff_manyarg(ROFF_ARGS);
222 static int roff_mc(ROFF_ARGS);
223 static int roff_noarg(ROFF_ARGS);
224 static int roff_nop(ROFF_ARGS);
225 static int roff_nr(ROFF_ARGS);
226 static int roff_onearg(ROFF_ARGS);
227 static enum roff_tok roff_parse(struct roff *, char *, int *,
228 int, int);
229 static int roff_parse_comment(struct roff *, struct buf *,
230 int, int, char);
231 static int roff_parsetext(struct roff *, struct buf *,
232 int, int *);
233 static int roff_renamed(ROFF_ARGS);
234 static int roff_req_or_macro(ROFF_ARGS);
235 static int roff_return(ROFF_ARGS);
236 static int roff_rm(ROFF_ARGS);
237 static int roff_rn(ROFF_ARGS);
238 static int roff_rr(ROFF_ARGS);
239 static void roff_setregn(struct roff *, const char *,
240 size_t, int, char, int);
241 static void roff_setstr(struct roff *,
242 const char *, const char *, int);
243 static void roff_setstrn(struct roffkv **, const char *,
244 size_t, const char *, size_t, int);
245 static int roff_shift(ROFF_ARGS);
246 static int roff_so(ROFF_ARGS);
247 static int roff_tr(ROFF_ARGS);
248 static int roff_Dd(ROFF_ARGS);
249 static int roff_TE(ROFF_ARGS);
250 static int roff_TS(ROFF_ARGS);
251 static int roff_EQ(ROFF_ARGS);
252 static int roff_EN(ROFF_ARGS);
253 static int roff_T_(ROFF_ARGS);
254 static int roff_unsupp(ROFF_ARGS);
255 static int roff_userdef(ROFF_ARGS);
256
257 /* --- constant data ------------------------------------------------------ */
258
259 const char *__roff_name[MAN_MAX + 1] = {
260 "br", "ce", "fi", "ft",
261 "ll", "mc", "nf",
262 "po", "rj", "sp",
263 "ta", "ti", NULL,
264 "ab", "ad", "af", "aln",
265 "als", "am", "am1", "ami",
266 "ami1", "as", "as1", "asciify",
267 "backtrace", "bd", "bleedat", "blm",
268 "box", "boxa", "bp", "BP",
269 "break", "breakchar", "brnl", "brp",
270 "brpnl", "c2", "cc",
271 "cf", "cflags", "ch", "char",
272 "chop", "class", "close", "CL",
273 "color", "composite", "continue", "cp",
274 "cropat", "cs", "cu", "da",
275 "dch", "Dd", "de", "de1",
276 "defcolor", "dei", "dei1", "device",
277 "devicem", "di", "do", "ds",
278 "ds1", "dwh", "dt", "ec",
279 "ecr", "ecs", "el", "em",
280 "EN", "eo", "EP", "EQ",
281 "errprint", "ev", "evc", "ex",
282 "fallback", "fam", "fc", "fchar",
283 "fcolor", "fdeferlig", "feature", "fkern",
284 "fl", "flig", "fp", "fps",
285 "fschar", "fspacewidth", "fspecial", "ftr",
286 "fzoom", "gcolor", "hc", "hcode",
287 "hidechar", "hla", "hlm", "hpf",
288 "hpfa", "hpfcode", "hw", "hy",
289 "hylang", "hylen", "hym", "hypp",
290 "hys", "ie", "if", "ig",
291 "index", "it", "itc", "IX",
292 "kern", "kernafter", "kernbefore", "kernpair",
293 "lc", "lc_ctype", "lds", "length",
294 "letadj", "lf", "lg", "lhang",
295 "linetabs", "lnr", "lnrf", "lpfx",
296 "ls", "lsm", "lt",
297 "mediasize", "minss", "mk", "mso",
298 "na", "ne", "nh", "nhychar",
299 "nm", "nn", "nop", "nr",
300 "nrf", "nroff", "ns", "nx",
301 "open", "opena", "os", "output",
302 "padj", "papersize", "pc", "pev",
303 "pi", "PI", "pl", "pm",
304 "pn", "pnr", "ps",
305 "psbb", "pshape", "pso", "ptr",
306 "pvs", "rchar", "rd", "recursionlimit",
307 "return", "rfschar", "rhang",
308 "rm", "rn", "rnn", "rr",
309 "rs", "rt", "schar", "sentchar",
310 "shc", "shift", "sizes", "so",
311 "spacewidth", "special", "spreadwarn", "ss",
312 "sty", "substring", "sv", "sy",
313 "T&", "tc", "TE",
314 "TH", "tkf", "tl",
315 "tm", "tm1", "tmc", "tr",
316 "track", "transchar", "trf", "trimat",
317 "trin", "trnt", "troff", "TS",
318 "uf", "ul", "unformat", "unwatch",
319 "unwatchn", "vpt", "vs", "warn",
320 "warnscale", "watch", "watchlength", "watchn",
321 "wh", "while", "write", "writec",
322 "writem", "xflag", ".", NULL,
323 NULL, "text",
324 "Dd", "Dt", "Os", "Sh",
325 "Ss", "Pp", "D1", "Dl",
326 "Bd", "Ed", "Bl", "El",
327 "It", "Ad", "An", "Ap",
328 "Ar", "Cd", "Cm", "Dv",
329 "Er", "Ev", "Ex", "Fa",
330 "Fd", "Fl", "Fn", "Ft",
331 "Ic", "In", "Li", "Nd",
332 "Nm", "Op", "Ot", "Pa",
333 "Rv", "St", "Va", "Vt",
334 "Xr", "%A", "%B", "%D",
335 "%I", "%J", "%N", "%O",
336 "%P", "%R", "%T", "%V",
337 "Ac", "Ao", "Aq", "At",
338 "Bc", "Bf", "Bo", "Bq",
339 "Bsx", "Bx", "Db", "Dc",
340 "Do", "Dq", "Ec", "Ef",
341 "Em", "Eo", "Fx", "Ms",
342 "No", "Ns", "Nx", "Ox",
343 "Pc", "Pf", "Po", "Pq",
344 "Qc", "Ql", "Qo", "Qq",
345 "Re", "Rs", "Sc", "So",
346 "Sq", "Sm", "Sx", "Sy",
347 "Tn", "Ux", "Xc", "Xo",
348 "Fo", "Fc", "Oo", "Oc",
349 "Bk", "Ek", "Bt", "Hf",
350 "Fr", "Ud", "Lb", "Lp",
351 "Lk", "Mt", "Brq", "Bro",
352 "Brc", "%C", "Es", "En",
353 "Dx", "%Q", "%U", "Ta",
354 "Tg", NULL,
355 "TH", "SH", "SS", "TP",
356 "TQ",
357 "LP", "PP", "P", "IP",
358 "HP", "SM", "SB", "BI",
359 "IB", "BR", "RB", "R",
360 "B", "I", "IR", "RI",
361 "RE", "RS", "DT", "UC",
362 "PD", "AT", "in",
363 "SY", "YS", "OP",
364 "EX", "EE", "UR",
365 "UE", "MT", "ME", "MR",
366 NULL
367 };
368 const char *const *roff_name = __roff_name;
369
370 static struct roffmac roffs[TOKEN_NONE] = {
371 { roff_noarg, NULL, NULL, 0 }, /* br */
372 { roff_onearg, NULL, NULL, 0 }, /* ce */
373 { roff_noarg, NULL, NULL, 0 }, /* fi */
374 { roff_onearg, NULL, NULL, 0 }, /* ft */
375 { roff_onearg, NULL, NULL, 0 }, /* ll */
376 { roff_mc, NULL, NULL, 0 }, /* mc */
377 { roff_noarg, NULL, NULL, 0 }, /* nf */
378 { roff_onearg, NULL, NULL, 0 }, /* po */
379 { roff_onearg, NULL, NULL, 0 }, /* rj */
380 { roff_onearg, NULL, NULL, 0 }, /* sp */
381 { roff_manyarg, NULL, NULL, 0 }, /* ta */
382 { roff_onearg, NULL, NULL, 0 }, /* ti */
383 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
384 { roff_unsupp, NULL, NULL, 0 }, /* ab */
385 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
386 { roff_line_ignore, NULL, NULL, 0 }, /* af */
387 { roff_unsupp, NULL, NULL, 0 }, /* aln */
388 { roff_als, NULL, NULL, 0 }, /* als */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
393 { roff_ds, NULL, NULL, 0 }, /* as */
394 { roff_ds, NULL, NULL, 0 }, /* as1 */
395 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
396 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
397 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
398 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
399 { roff_unsupp, NULL, NULL, 0 }, /* blm */
400 { roff_unsupp, NULL, NULL, 0 }, /* box */
401 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
402 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
403 { roff_unsupp, NULL, NULL, 0 }, /* BP */
404 { roff_break, NULL, NULL, 0 }, /* break */
405 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
406 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
407 { roff_noarg, NULL, NULL, 0 }, /* brp */
408 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
409 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
410 { roff_cc, NULL, NULL, 0 }, /* cc */
411 { roff_insec, NULL, NULL, 0 }, /* cf */
412 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
413 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
414 { roff_char, NULL, NULL, 0 }, /* char */
415 { roff_unsupp, NULL, NULL, 0 }, /* chop */
416 { roff_line_ignore, NULL, NULL, 0 }, /* class */
417 { roff_insec, NULL, NULL, 0 }, /* close */
418 { roff_unsupp, NULL, NULL, 0 }, /* CL */
419 { roff_line_ignore, NULL, NULL, 0 }, /* color */
420 { roff_unsupp, NULL, NULL, 0 }, /* composite */
421 { roff_unsupp, NULL, NULL, 0 }, /* continue */
422 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
423 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
424 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
425 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
426 { roff_unsupp, NULL, NULL, 0 }, /* da */
427 { roff_unsupp, NULL, NULL, 0 }, /* dch */
428 { roff_Dd, NULL, NULL, 0 }, /* Dd */
429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
431 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
434 { roff_unsupp, NULL, NULL, 0 }, /* device */
435 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
436 { roff_unsupp, NULL, NULL, 0 }, /* di */
437 { roff_unsupp, NULL, NULL, 0 }, /* do */
438 { roff_ds, NULL, NULL, 0 }, /* ds */
439 { roff_ds, NULL, NULL, 0 }, /* ds1 */
440 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
441 { roff_unsupp, NULL, NULL, 0 }, /* dt */
442 { roff_ec, NULL, NULL, 0 }, /* ec */
443 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
444 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
445 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
446 { roff_unsupp, NULL, NULL, 0 }, /* em */
447 { roff_EN, NULL, NULL, 0 }, /* EN */
448 { roff_eo, NULL, NULL, 0 }, /* eo */
449 { roff_unsupp, NULL, NULL, 0 }, /* EP */
450 { roff_EQ, NULL, NULL, 0 }, /* EQ */
451 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
452 { roff_unsupp, NULL, NULL, 0 }, /* ev */
453 { roff_unsupp, NULL, NULL, 0 }, /* evc */
454 { roff_unsupp, NULL, NULL, 0 }, /* ex */
455 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
456 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
457 { roff_unsupp, NULL, NULL, 0 }, /* fc */
458 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
460 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
461 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
464 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
467 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
470 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
472 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
488 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
489 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
490 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
491 { roff_unsupp, NULL, NULL, 0 }, /* index */
492 { roff_it, NULL, NULL, 0 }, /* it */
493 { roff_unsupp, NULL, NULL, 0 }, /* itc */
494 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
495 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
496 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
497 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
498 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
499 { roff_unsupp, NULL, NULL, 0 }, /* lc */
500 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
501 { roff_unsupp, NULL, NULL, 0 }, /* lds */
502 { roff_unsupp, NULL, NULL, 0 }, /* length */
503 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
504 { roff_insec, NULL, NULL, 0 }, /* lf */
505 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
506 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
507 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
508 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
509 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
510 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
511 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
512 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
513 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
514 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
515 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
516 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
517 { roff_insec, NULL, NULL, 0 }, /* mso */
518 { roff_line_ignore, NULL, NULL, 0 }, /* na */
519 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
520 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
521 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
522 { roff_unsupp, NULL, NULL, 0 }, /* nm */
523 { roff_unsupp, NULL, NULL, 0 }, /* nn */
524 { roff_nop, NULL, NULL, 0 }, /* nop */
525 { roff_nr, NULL, NULL, 0 }, /* nr */
526 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
528 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
529 { roff_insec, NULL, NULL, 0 }, /* nx */
530 { roff_insec, NULL, NULL, 0 }, /* open */
531 { roff_insec, NULL, NULL, 0 }, /* opena */
532 { roff_line_ignore, NULL, NULL, 0 }, /* os */
533 { roff_unsupp, NULL, NULL, 0 }, /* output */
534 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
535 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
536 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
537 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
538 { roff_insec, NULL, NULL, 0 }, /* pi */
539 { roff_unsupp, NULL, NULL, 0 }, /* PI */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
541 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
544 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
545 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
546 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
547 { roff_insec, NULL, NULL, 0 }, /* pso */
548 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
550 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
551 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
552 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
553 { roff_return, NULL, NULL, 0 }, /* return */
554 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
555 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
556 { roff_rm, NULL, NULL, 0 }, /* rm */
557 { roff_rn, NULL, NULL, 0 }, /* rn */
558 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
559 { roff_rr, NULL, NULL, 0 }, /* rr */
560 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
562 { roff_unsupp, NULL, NULL, 0 }, /* schar */
563 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
564 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
565 { roff_shift, NULL, NULL, 0 }, /* shift */
566 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
567 { roff_so, NULL, NULL, 0 }, /* so */
568 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
569 { roff_line_ignore, NULL, NULL, 0 }, /* special */
570 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
571 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
573 { roff_unsupp, NULL, NULL, 0 }, /* substring */
574 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
575 { roff_insec, NULL, NULL, 0 }, /* sy */
576 { roff_T_, NULL, NULL, 0 }, /* T& */
577 { roff_unsupp, NULL, NULL, 0 }, /* tc */
578 { roff_TE, NULL, NULL, 0 }, /* TE */
579 { roff_Dd, NULL, NULL, 0 }, /* TH */
580 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
581 { roff_unsupp, NULL, NULL, 0 }, /* tl */
582 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
583 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
584 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
585 { roff_tr, NULL, NULL, 0 }, /* tr */
586 { roff_line_ignore, NULL, NULL, 0 }, /* track */
587 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
588 { roff_insec, NULL, NULL, 0 }, /* trf */
589 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
590 { roff_unsupp, NULL, NULL, 0 }, /* trin */
591 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
592 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
593 { roff_TS, NULL, NULL, 0 }, /* TS */
594 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
595 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
596 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
597 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
598 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
599 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
600 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
601 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
602 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
603 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
604 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
605 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
606 { roff_unsupp, NULL, NULL, 0 }, /* wh */
607 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
608 { roff_insec, NULL, NULL, 0 }, /* write */
609 { roff_insec, NULL, NULL, 0 }, /* writec */
610 { roff_insec, NULL, NULL, 0 }, /* writem */
611 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
612 { roff_cblock, NULL, NULL, 0 }, /* . */
613 { roff_renamed, NULL, NULL, 0 },
614 { roff_userdef, NULL, NULL, 0 }
615 };
616
617 /* Array of injected predefined strings. */
618 #define PREDEFS_MAX 38
619 static const struct predef predefs[PREDEFS_MAX] = {
620 #include "predefs.in"
621 };
622
623 static int roffce_lines; /* number of input lines to center */
624 static struct roff_node *roffce_node; /* active request */
625 static int roffit_lines; /* number of lines to delay */
626 static char *roffit_macro; /* nil-terminated macro line */
627
628
629 /* --- request table ------------------------------------------------------ */
630
631 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)632 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
633 {
634 struct ohash *htab;
635 struct roffreq *req;
636 enum roff_tok tok;
637 size_t sz;
638 unsigned int slot;
639
640 htab = mandoc_malloc(sizeof(*htab));
641 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
642
643 for (tok = mintok; tok < maxtok; tok++) {
644 if (roff_name[tok] == NULL)
645 continue;
646 sz = strlen(roff_name[tok]);
647 req = mandoc_malloc(sizeof(*req) + sz + 1);
648 req->tok = tok;
649 memcpy(req->name, roff_name[tok], sz + 1);
650 slot = ohash_qlookup(htab, req->name);
651 ohash_insert(htab, slot, req);
652 }
653 return htab;
654 }
655
656 void
roffhash_free(struct ohash * htab)657 roffhash_free(struct ohash *htab)
658 {
659 struct roffreq *req;
660 unsigned int slot;
661
662 if (htab == NULL)
663 return;
664 for (req = ohash_first(htab, &slot); req != NULL;
665 req = ohash_next(htab, &slot))
666 free(req);
667 ohash_delete(htab);
668 free(htab);
669 }
670
671 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)672 roffhash_find(struct ohash *htab, const char *name, size_t sz)
673 {
674 struct roffreq *req;
675 const char *end;
676
677 if (sz) {
678 end = name + sz;
679 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
680 } else
681 req = ohash_find(htab, ohash_qlookup(htab, name));
682 return req == NULL ? TOKEN_NONE : req->tok;
683 }
684
685 /* --- stack of request blocks -------------------------------------------- */
686
687 /*
688 * Pop the current node off of the stack of roff instructions currently
689 * pending. Return 1 if it is a loop or 0 otherwise.
690 */
691 static int
roffnode_pop(struct roff * r)692 roffnode_pop(struct roff *r)
693 {
694 struct roffnode *p;
695 int inloop;
696
697 p = r->last;
698 inloop = p->tok == ROFF_while;
699 r->last = p->parent;
700 free(p->name);
701 free(p->end);
702 free(p);
703 return inloop;
704 }
705
706 /*
707 * Push a roff node onto the instruction stack. This must later be
708 * removed with roffnode_pop().
709 */
710 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)711 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
712 int line, int col)
713 {
714 struct roffnode *p;
715
716 p = mandoc_calloc(1, sizeof(struct roffnode));
717 p->tok = tok;
718 if (name)
719 p->name = mandoc_strdup(name);
720 p->parent = r->last;
721 p->line = line;
722 p->col = col;
723 p->rule = p->parent ? p->parent->rule : 0;
724
725 r->last = p;
726 }
727
728 /* --- roff parser state data management ---------------------------------- */
729
730 static void
roff_free1(struct roff * r)731 roff_free1(struct roff *r)
732 {
733 int i;
734
735 tbl_free(r->first_tbl);
736 r->first_tbl = r->last_tbl = r->tbl = NULL;
737
738 eqn_free(r->last_eqn);
739 r->last_eqn = r->eqn = NULL;
740
741 while (r->mstackpos >= 0)
742 roff_userret(r);
743
744 while (r->last)
745 roffnode_pop(r);
746
747 free (r->rstack);
748 r->rstack = NULL;
749 r->rstacksz = 0;
750 r->rstackpos = -1;
751
752 roff_freereg(r->regtab);
753 r->regtab = NULL;
754
755 roff_freestr(r->strtab);
756 roff_freestr(r->rentab);
757 roff_freestr(r->xmbtab);
758 r->strtab = r->rentab = r->xmbtab = NULL;
759
760 if (r->xtab)
761 for (i = 0; i < 128; i++)
762 free(r->xtab[i].p);
763 free(r->xtab);
764 r->xtab = NULL;
765 }
766
767 void
roff_reset(struct roff * r)768 roff_reset(struct roff *r)
769 {
770 roff_free1(r);
771 r->options |= MPARSE_COMMENT;
772 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
773 r->control = '\0';
774 r->escape = '\\';
775 roffce_lines = 0;
776 roffce_node = NULL;
777 roffit_lines = 0;
778 roffit_macro = NULL;
779 }
780
781 void
roff_free(struct roff * r)782 roff_free(struct roff *r)
783 {
784 int i;
785
786 roff_free1(r);
787 for (i = 0; i < r->mstacksz; i++)
788 free(r->mstack[i].argv);
789 free(r->mstack);
790 roffhash_free(r->reqtab);
791 free(r);
792 }
793
794 struct roff *
roff_alloc(int options)795 roff_alloc(int options)
796 {
797 struct roff *r;
798
799 r = mandoc_calloc(1, sizeof(struct roff));
800 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
801 r->options = options | MPARSE_COMMENT;
802 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
803 r->mstackpos = -1;
804 r->rstackpos = -1;
805 r->escape = '\\';
806 return r;
807 }
808
809 /* --- syntax tree state data management ---------------------------------- */
810
811 static void
roff_man_free1(struct roff_man * man)812 roff_man_free1(struct roff_man *man)
813 {
814 if (man->meta.first != NULL)
815 roff_node_delete(man, man->meta.first);
816 free(man->meta.msec);
817 free(man->meta.vol);
818 free(man->meta.os);
819 free(man->meta.arch);
820 free(man->meta.title);
821 free(man->meta.name);
822 free(man->meta.date);
823 free(man->meta.sodest);
824 }
825
826 void
roff_state_reset(struct roff_man * man)827 roff_state_reset(struct roff_man *man)
828 {
829 man->last = man->meta.first;
830 man->last_es = NULL;
831 man->flags = 0;
832 man->lastsec = man->lastnamed = SEC_NONE;
833 man->next = ROFF_NEXT_CHILD;
834 roff_setreg(man->roff, "nS", 0, '=');
835 }
836
837 static void
roff_man_alloc1(struct roff_man * man)838 roff_man_alloc1(struct roff_man *man)
839 {
840 memset(&man->meta, 0, sizeof(man->meta));
841 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
842 man->meta.first->type = ROFFT_ROOT;
843 man->meta.macroset = MACROSET_NONE;
844 roff_state_reset(man);
845 }
846
847 void
roff_man_reset(struct roff_man * man)848 roff_man_reset(struct roff_man *man)
849 {
850 roff_man_free1(man);
851 roff_man_alloc1(man);
852 }
853
854 void
roff_man_free(struct roff_man * man)855 roff_man_free(struct roff_man *man)
856 {
857 roff_man_free1(man);
858 free(man->os_r);
859 free(man);
860 }
861
862 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)863 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
864 {
865 struct roff_man *man;
866
867 man = mandoc_calloc(1, sizeof(*man));
868 man->roff = roff;
869 man->os_s = os_s;
870 man->quick = quick;
871 roff_man_alloc1(man);
872 roff->man = man;
873 return man;
874 }
875
876 /* --- syntax tree handling ----------------------------------------------- */
877
878 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)879 roff_node_alloc(struct roff_man *man, int line, int pos,
880 enum roff_type type, int tok)
881 {
882 struct roff_node *n;
883
884 n = mandoc_calloc(1, sizeof(*n));
885 n->line = line;
886 n->pos = pos;
887 n->tok = tok;
888 n->type = type;
889 n->sec = man->lastsec;
890
891 if (man->flags & MDOC_SYNOPSIS)
892 n->flags |= NODE_SYNPRETTY;
893 else
894 n->flags &= ~NODE_SYNPRETTY;
895 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
896 n->flags |= NODE_NOFILL;
897 else
898 n->flags &= ~NODE_NOFILL;
899 if (man->flags & MDOC_NEWLINE)
900 n->flags |= NODE_LINE;
901 man->flags &= ~MDOC_NEWLINE;
902
903 return n;
904 }
905
906 void
roff_node_append(struct roff_man * man,struct roff_node * n)907 roff_node_append(struct roff_man *man, struct roff_node *n)
908 {
909
910 switch (man->next) {
911 case ROFF_NEXT_SIBLING:
912 if (man->last->next != NULL) {
913 n->next = man->last->next;
914 man->last->next->prev = n;
915 } else
916 man->last->parent->last = n;
917 man->last->next = n;
918 n->prev = man->last;
919 n->parent = man->last->parent;
920 break;
921 case ROFF_NEXT_CHILD:
922 if (man->last->child != NULL) {
923 n->next = man->last->child;
924 man->last->child->prev = n;
925 } else
926 man->last->last = n;
927 man->last->child = n;
928 n->parent = man->last;
929 break;
930 default:
931 abort();
932 }
933 man->last = n;
934
935 switch (n->type) {
936 case ROFFT_HEAD:
937 n->parent->head = n;
938 break;
939 case ROFFT_BODY:
940 if (n->end != ENDBODY_NOT)
941 return;
942 n->parent->body = n;
943 break;
944 case ROFFT_TAIL:
945 n->parent->tail = n;
946 break;
947 default:
948 return;
949 }
950
951 /*
952 * Copy over the normalised-data pointer of our parent. Not
953 * everybody has one, but copying a null pointer is fine.
954 */
955
956 n->norm = n->parent->norm;
957 assert(n->parent->type == ROFFT_BLOCK);
958 }
959
960 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)961 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
962 {
963 struct roff_node *n;
964
965 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
966 n->string = roff_strdup(man->roff, word);
967 roff_node_append(man, n);
968 n->flags |= NODE_VALID | NODE_ENDED;
969 man->next = ROFF_NEXT_SIBLING;
970 }
971
972 void
roff_word_append(struct roff_man * man,const char * word)973 roff_word_append(struct roff_man *man, const char *word)
974 {
975 struct roff_node *n;
976 char *addstr, *newstr;
977
978 n = man->last;
979 addstr = roff_strdup(man->roff, word);
980 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
981 free(addstr);
982 free(n->string);
983 n->string = newstr;
984 man->next = ROFF_NEXT_SIBLING;
985 }
986
987 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)988 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
989 {
990 struct roff_node *n;
991
992 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
993 roff_node_append(man, n);
994 man->next = ROFF_NEXT_CHILD;
995 }
996
997 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)998 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
999 {
1000 struct roff_node *n;
1001
1002 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1003 roff_node_append(man, n);
1004 man->next = ROFF_NEXT_CHILD;
1005 return n;
1006 }
1007
1008 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1009 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1010 {
1011 struct roff_node *n;
1012
1013 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1014 roff_node_append(man, n);
1015 man->next = ROFF_NEXT_CHILD;
1016 return n;
1017 }
1018
1019 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1020 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1021 {
1022 struct roff_node *n;
1023
1024 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1025 roff_node_append(man, n);
1026 man->next = ROFF_NEXT_CHILD;
1027 return n;
1028 }
1029
1030 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1031 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1032 {
1033 struct roff_node *n;
1034 struct tbl_span *span;
1035
1036 if (man->meta.macroset == MACROSET_MAN)
1037 man_breakscope(man, ROFF_TS);
1038 while ((span = tbl_span(tbl)) != NULL) {
1039 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1040 n->span = span;
1041 roff_node_append(man, n);
1042 n->flags |= NODE_VALID | NODE_ENDED;
1043 man->next = ROFF_NEXT_SIBLING;
1044 }
1045 }
1046
1047 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1048 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1049 {
1050
1051 /* Adjust siblings. */
1052
1053 if (n->prev)
1054 n->prev->next = n->next;
1055 if (n->next)
1056 n->next->prev = n->prev;
1057
1058 /* Adjust parent. */
1059
1060 if (n->parent != NULL) {
1061 if (n->parent->child == n)
1062 n->parent->child = n->next;
1063 if (n->parent->last == n)
1064 n->parent->last = n->prev;
1065 }
1066
1067 /* Adjust parse point. */
1068
1069 if (man == NULL)
1070 return;
1071 if (man->last == n) {
1072 if (n->prev == NULL) {
1073 man->last = n->parent;
1074 man->next = ROFF_NEXT_CHILD;
1075 } else {
1076 man->last = n->prev;
1077 man->next = ROFF_NEXT_SIBLING;
1078 }
1079 }
1080 if (man->meta.first == n)
1081 man->meta.first = NULL;
1082 }
1083
1084 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1085 roff_node_relink(struct roff_man *man, struct roff_node *n)
1086 {
1087 roff_node_unlink(man, n);
1088 n->prev = n->next = NULL;
1089 roff_node_append(man, n);
1090 }
1091
1092 void
roff_node_free(struct roff_node * n)1093 roff_node_free(struct roff_node *n)
1094 {
1095
1096 if (n->args != NULL)
1097 mdoc_argv_free(n->args);
1098 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1099 free(n->norm);
1100 eqn_box_free(n->eqn);
1101 free(n->string);
1102 free(n->tag);
1103 free(n);
1104 }
1105
1106 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1107 roff_node_delete(struct roff_man *man, struct roff_node *n)
1108 {
1109
1110 while (n->child != NULL)
1111 roff_node_delete(man, n->child);
1112 roff_node_unlink(man, n);
1113 roff_node_free(n);
1114 }
1115
1116 int
roff_node_transparent(struct roff_node * n)1117 roff_node_transparent(struct roff_node *n)
1118 {
1119 if (n == NULL)
1120 return 0;
1121 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1122 return 1;
1123 return roff_tok_transparent(n->tok);
1124 }
1125
1126 int
roff_tok_transparent(enum roff_tok tok)1127 roff_tok_transparent(enum roff_tok tok)
1128 {
1129 switch (tok) {
1130 case ROFF_ft:
1131 case ROFF_ll:
1132 case ROFF_mc:
1133 case ROFF_po:
1134 case ROFF_ta:
1135 case MDOC_Db:
1136 case MDOC_Es:
1137 case MDOC_Sm:
1138 case MDOC_Tg:
1139 case MAN_DT:
1140 case MAN_UC:
1141 case MAN_PD:
1142 case MAN_AT:
1143 return 1;
1144 default:
1145 return 0;
1146 }
1147 }
1148
1149 struct roff_node *
roff_node_child(struct roff_node * n)1150 roff_node_child(struct roff_node *n)
1151 {
1152 for (n = n->child; roff_node_transparent(n); n = n->next)
1153 continue;
1154 return n;
1155 }
1156
1157 struct roff_node *
roff_node_prev(struct roff_node * n)1158 roff_node_prev(struct roff_node *n)
1159 {
1160 do {
1161 n = n->prev;
1162 } while (roff_node_transparent(n));
1163 return n;
1164 }
1165
1166 struct roff_node *
roff_node_next(struct roff_node * n)1167 roff_node_next(struct roff_node *n)
1168 {
1169 do {
1170 n = n->next;
1171 } while (roff_node_transparent(n));
1172 return n;
1173 }
1174
1175 void
deroff(char ** dest,const struct roff_node * n)1176 deroff(char **dest, const struct roff_node *n)
1177 {
1178 char *cp;
1179 size_t sz;
1180
1181 if (n->string == NULL) {
1182 for (n = n->child; n != NULL; n = n->next)
1183 deroff(dest, n);
1184 return;
1185 }
1186
1187 /* Skip leading whitespace. */
1188
1189 for (cp = n->string; *cp != '\0'; cp++) {
1190 if (cp[0] == '\\' && cp[1] != '\0' &&
1191 strchr(" %&0^|~", cp[1]) != NULL)
1192 cp++;
1193 else if ( ! isspace((unsigned char)*cp))
1194 break;
1195 }
1196
1197 /* Skip trailing backslash. */
1198
1199 sz = strlen(cp);
1200 if (sz > 0 && cp[sz - 1] == '\\')
1201 sz--;
1202
1203 /* Skip trailing whitespace. */
1204
1205 for (; sz; sz--)
1206 if ( ! isspace((unsigned char)cp[sz-1]))
1207 break;
1208
1209 /* Skip empty strings. */
1210
1211 if (sz == 0)
1212 return;
1213
1214 if (*dest == NULL) {
1215 *dest = mandoc_strndup(cp, sz);
1216 return;
1217 }
1218
1219 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1220 free(*dest);
1221 *dest = cp;
1222 }
1223
1224 /* --- main functions of the roff parser ---------------------------------- */
1225
1226 /*
1227 * Save comments preceding the title macro, for example in order to
1228 * preserve Copyright and license headers in HTML output,
1229 * provide diagnostics about RCS ids and trailing whitespace in comments,
1230 * then discard comments including preceding whitespace.
1231 * This function also handles input line continuation.
1232 */
1233 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1234 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1235 {
1236 struct roff_node *n; /* used for header comments */
1237 const char *start; /* start of the string to process */
1238 const char *cp; /* for RCS id parsing */
1239 char *stesc; /* start of an escape sequence ('\\') */
1240 char *ep; /* end of comment string */
1241 int rcsid; /* kind of RCS id seen */
1242
1243 for (start = stesc = buf->buf + pos;; stesc++) {
1244 /*
1245 * XXX Ugly hack: Remove the newline character that
1246 * mparse_buf_r() appended to mark the end of input
1247 * if it is not preceded by an escape character.
1248 */
1249 if (stesc[0] == '\n') {
1250 assert(stesc[1] == '\0');
1251 stesc[0] = '\0';
1252 }
1253
1254 /* The line ends without continuation or comment. */
1255 if (stesc[0] == '\0')
1256 return ROFF_CONT;
1257
1258 /* Unescaped byte: skip it. */
1259 if (stesc[0] != ec)
1260 continue;
1261
1262 /*
1263 * XXX Ugly hack: Do not attempt to append another line
1264 * if the function mparse_buf_r() appended a newline
1265 * character to indicate the end of input.
1266 */
1267 if (stesc[1] == '\n') {
1268 assert(stesc[2] == '\0');
1269 stesc[0] = '\0';
1270 return ROFF_CONT;
1271 }
1272
1273 /*
1274 * An escape character at the end of an input line
1275 * requests line continuation.
1276 */
1277 if (stesc[1] == '\0') {
1278 stesc[0] = '\0';
1279 return ROFF_IGN | ROFF_APPEND;
1280 }
1281
1282 /* Found a comment: process it. */
1283 if (stesc[1] == '"' || stesc[1] == '#')
1284 break;
1285
1286 /* Escaped escape character: skip them both. */
1287 if (stesc[1] == ec)
1288 stesc++;
1289 }
1290
1291 /* Look for an RCS id in the comment. */
1292
1293 rcsid = 0;
1294 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1295 rcsid = 1 << MANDOC_OS_OPENBSD;
1296 cp += 8;
1297 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1298 rcsid = 1 << MANDOC_OS_NETBSD;
1299 cp += 7;
1300 }
1301 if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1302 strchr(cp, '$') != NULL) {
1303 if (r->man->meta.rcsids & rcsid)
1304 mandoc_msg(MANDOCERR_RCS_REP, ln,
1305 (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1306 r->man->meta.rcsids |= rcsid;
1307 }
1308
1309 /* Warn about trailing whitespace at the end of the comment. */
1310
1311 ep = strchr(stesc + 2, '\0') - 1;
1312 if (*ep == '\n')
1313 *ep-- = '\0';
1314 if (*ep == ' ' || *ep == '\t')
1315 mandoc_msg(MANDOCERR_SPACE_EOL,
1316 ln, (int)(ep - buf->buf), NULL);
1317
1318 /* Save comments preceding the title macro in the syntax tree. */
1319
1320 if (r->options & MPARSE_COMMENT) {
1321 while (*ep == ' ' || *ep == '\t')
1322 ep--;
1323 ep[1] = '\0';
1324 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1325 ROFFT_COMMENT, TOKEN_NONE);
1326 n->string = mandoc_strdup(stesc + 2);
1327 roff_node_append(r->man, n);
1328 n->flags |= NODE_VALID | NODE_ENDED;
1329 r->man->next = ROFF_NEXT_SIBLING;
1330 }
1331
1332 /* The comment requests line continuation. */
1333
1334 if (stesc[1] == '#') {
1335 *stesc = '\0';
1336 return ROFF_IGN | ROFF_APPEND;
1337 }
1338
1339 /* Discard the comment including preceding whitespace. */
1340
1341 while (stesc > start && stesc[-1] == ' ' &&
1342 (stesc == start + 1 || stesc[-2] != '\\'))
1343 stesc--;
1344 *stesc = '\0';
1345 return ROFF_CONT;
1346 }
1347
1348 /*
1349 * In the current line, expand escape sequences that produce parsable
1350 * input text. Also check the syntax of the remaining escape sequences,
1351 * which typically produce output glyphs or change formatter state.
1352 */
1353 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1354 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1355 {
1356 char ubuf[24]; /* buffer to print a number */
1357 struct mctx *ctx; /* current macro call context */
1358 const char *res; /* the string to be pasted */
1359 const char *src; /* source for copying */
1360 char *dst; /* destination for copying */
1361 enum mandoc_esc subtype; /* return value from roff_escape */
1362 int iesc; /* index of leading escape char */
1363 int inam; /* index of the escape name */
1364 int iarg; /* index beginning the argument */
1365 int iendarg; /* index right after the argument */
1366 int iend; /* index right after the sequence */
1367 int isrc, idst; /* to reduce \\ and \. in names */
1368 int deftype; /* type of definition to paste */
1369 int argi; /* macro argument index */
1370 int quote_args; /* true for \\$@, false for \\$* */
1371 int asz; /* length of the replacement */
1372 int rsz; /* length of the rest of the string */
1373 int npos; /* position in numeric expression */
1374 int expand_count; /* to avoid infinite loops */
1375
1376 expand_count = 0;
1377 while (buf->buf[pos] != '\0') {
1378
1379 /*
1380 * Skip plain ASCII characters.
1381 * If we have a non-standard escape character,
1382 * escape literal backslashes because all processing in
1383 * subsequent functions uses the standard escaping rules.
1384 */
1385
1386 if (buf->buf[pos] != ec) {
1387 if (buf->buf[pos] == '\\') {
1388 roff_expand_patch(buf, pos, "\\e", pos + 1);
1389 pos++;
1390 }
1391 pos++;
1392 continue;
1393 }
1394
1395 /*
1396 * Parse escape sequences,
1397 * issue diagnostic messages when appropriate,
1398 * and skip sequences that do not need expansion.
1399 * If we have a non-standard escape character, translate
1400 * it to backslashes and translate backslashes to \e.
1401 */
1402
1403 if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1404 &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1405 while (pos < iend) {
1406 if (buf->buf[pos] == ec) {
1407 buf->buf[pos] = '\\';
1408 if (pos + 1 < iend)
1409 pos++;
1410 } else if (buf->buf[pos] == '\\') {
1411 roff_expand_patch(buf,
1412 pos, "\\e", pos + 1);
1413 pos++;
1414 iend++;
1415 }
1416 pos++;
1417 }
1418 continue;
1419 }
1420
1421 /* Reduce \\ and \. in names. */
1422
1423 if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1424 isrc = idst = iarg;
1425 while (isrc < iendarg) {
1426 if (isrc + 1 < iendarg &&
1427 buf->buf[isrc] == '\\' &&
1428 (buf->buf[isrc + 1] == '\\' ||
1429 buf->buf[isrc + 1] == '.'))
1430 isrc++;
1431 buf->buf[idst++] = buf->buf[isrc++];
1432 }
1433 iendarg -= isrc - idst;
1434 }
1435
1436 /* Handle expansion. */
1437
1438 res = NULL;
1439 switch (buf->buf[inam]) {
1440 case '*':
1441 if (iendarg == iarg)
1442 break;
1443 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1444 if ((res = roff_getstrn(r, buf->buf + iarg,
1445 iendarg - iarg, &deftype)) != NULL)
1446 break;
1447
1448 /*
1449 * If not overridden,
1450 * let \*(.T through to the formatters.
1451 */
1452
1453 if (iendarg - iarg == 2 &&
1454 buf->buf[iarg] == '.' &&
1455 buf->buf[iarg + 1] == 'T') {
1456 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1457 pos = iend;
1458 continue;
1459 }
1460
1461 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1462 "%.*s", iendarg - iarg, buf->buf + iarg);
1463 break;
1464
1465 case '$':
1466 if (r->mstackpos < 0) {
1467 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1468 "%.*s", iend - iesc, buf->buf + iesc);
1469 break;
1470 }
1471 ctx = r->mstack + r->mstackpos;
1472 argi = buf->buf[iarg] - '1';
1473 if (argi >= 0 && argi <= 8) {
1474 if (argi < ctx->argc)
1475 res = ctx->argv[argi];
1476 break;
1477 }
1478 if (buf->buf[iarg] == '*')
1479 quote_args = 0;
1480 else if (buf->buf[iarg] == '@')
1481 quote_args = 1;
1482 else {
1483 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1484 "%.*s", iend - iesc, buf->buf + iesc);
1485 break;
1486 }
1487 asz = 0;
1488 for (argi = 0; argi < ctx->argc; argi++) {
1489 if (argi)
1490 asz++; /* blank */
1491 if (quote_args)
1492 asz += 2; /* quotes */
1493 asz += strlen(ctx->argv[argi]);
1494 }
1495 if (asz != iend - iesc) {
1496 rsz = buf->sz - iend;
1497 if (asz < iend - iesc)
1498 memmove(buf->buf + iesc + asz,
1499 buf->buf + iend, rsz);
1500 buf->sz = iesc + asz + rsz;
1501 buf->buf = mandoc_realloc(buf->buf, buf->sz);
1502 if (asz > iend - iesc)
1503 memmove(buf->buf + iesc + asz,
1504 buf->buf + iend, rsz);
1505 }
1506 dst = buf->buf + iesc;
1507 for (argi = 0; argi < ctx->argc; argi++) {
1508 if (argi)
1509 *dst++ = ' ';
1510 if (quote_args)
1511 *dst++ = '"';
1512 src = ctx->argv[argi];
1513 while (*src != '\0')
1514 *dst++ = *src++;
1515 if (quote_args)
1516 *dst++ = '"';
1517 }
1518 continue;
1519 case 'A':
1520 ubuf[0] = iendarg > iarg ? '1' : '0';
1521 ubuf[1] = '\0';
1522 res = ubuf;
1523 break;
1524 case 'B':
1525 npos = 0;
1526 ubuf[0] = iendarg > iarg && iend > iendarg &&
1527 roff_evalnum(ln, buf->buf + iarg, &npos,
1528 NULL, 'u', 0) &&
1529 npos == iendarg - iarg ? '1' : '0';
1530 ubuf[1] = '\0';
1531 res = ubuf;
1532 break;
1533 case 'V':
1534 mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1535 "%.*s", iend - iesc, buf->buf + iesc);
1536 roff_expand_patch(buf, iendarg, "}", iend);
1537 roff_expand_patch(buf, iesc, "${", iarg);
1538 continue;
1539 case 'g':
1540 break;
1541 case 'n':
1542 if (iendarg > iarg)
1543 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1544 roff_getregn(r, buf->buf + iarg,
1545 iendarg - iarg, buf->buf[inam + 1]));
1546 else
1547 ubuf[0] = '\0';
1548 res = ubuf;
1549 break;
1550 case 'w':
1551 rsz = 0;
1552 subtype = ESCAPE_UNDEF;
1553 while (iarg < iendarg) {
1554 asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1555 if (buf->buf[iarg] != '\\') {
1556 rsz += asz;
1557 iarg++;
1558 continue;
1559 }
1560 switch ((subtype = roff_escape(buf->buf, 0,
1561 iarg, NULL, NULL, NULL, NULL, &iarg))) {
1562 case ESCAPE_SPECIAL:
1563 case ESCAPE_NUMBERED:
1564 case ESCAPE_UNICODE:
1565 case ESCAPE_OVERSTRIKE:
1566 case ESCAPE_UNDEF:
1567 break;
1568 case ESCAPE_DEVICE:
1569 asz *= 8;
1570 break;
1571 case ESCAPE_EXPAND:
1572 abort();
1573 default:
1574 continue;
1575 }
1576 rsz += asz;
1577 }
1578 (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1579 res = ubuf;
1580 break;
1581 default:
1582 break;
1583 }
1584 if (res == NULL)
1585 res = "";
1586 if (++expand_count > EXPAND_LIMIT ||
1587 buf->sz + strlen(res) > SHRT_MAX) {
1588 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1589 return ROFF_IGN;
1590 }
1591 roff_expand_patch(buf, iesc, res, iend);
1592 }
1593 return ROFF_CONT;
1594 }
1595
1596 /*
1597 * Replace the substring from the start position (inclusive)
1598 * to end position (exclusive) with the repl(acement) string.
1599 */
1600 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1601 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1602 {
1603 char *nbuf;
1604
1605 buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1606 repl, buf->buf + end) + 1;
1607 free(buf->buf);
1608 buf->buf = nbuf;
1609 }
1610
1611 /*
1612 * Parse a quoted or unquoted roff-style request or macro argument.
1613 * Return a pointer to the parsed argument, which is either the original
1614 * pointer or advanced by one byte in case the argument is quoted.
1615 * NUL-terminate the argument in place.
1616 * Collapse pairs of quotes inside quoted arguments.
1617 * Advance the argument pointer to the next argument,
1618 * or to the NUL byte terminating the argument line.
1619 */
1620 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1621 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1622 {
1623 struct buf buf;
1624 char *cp, *start;
1625 int newesc, pairs, quoted, white;
1626
1627 /* Quoting can only start with a new word. */
1628 start = *cpp;
1629 quoted = 0;
1630 if ('"' == *start) {
1631 quoted = 1;
1632 start++;
1633 }
1634
1635 newesc = pairs = white = 0;
1636 for (cp = start; '\0' != *cp; cp++) {
1637
1638 /*
1639 * Move the following text left
1640 * after quoted quotes and after "\\" and "\t".
1641 */
1642 if (pairs)
1643 cp[-pairs] = cp[0];
1644
1645 if ('\\' == cp[0]) {
1646 /*
1647 * In copy mode, translate double to single
1648 * backslashes and backslash-t to literal tabs.
1649 */
1650 switch (cp[1]) {
1651 case 'a':
1652 case 't':
1653 cp[-pairs] = '\t';
1654 pairs++;
1655 cp++;
1656 break;
1657 case '\\':
1658 cp[-pairs] = '\\';
1659 newesc = 1;
1660 pairs++;
1661 cp++;
1662 break;
1663 case ' ':
1664 /* Skip escaped blanks. */
1665 if (0 == quoted)
1666 cp++;
1667 break;
1668 default:
1669 break;
1670 }
1671 } else if (0 == quoted) {
1672 if (' ' == cp[0]) {
1673 /* Unescaped blanks end unquoted args. */
1674 white = 1;
1675 break;
1676 }
1677 } else if ('"' == cp[0]) {
1678 if ('"' == cp[1]) {
1679 /* Quoted quotes collapse. */
1680 pairs++;
1681 cp++;
1682 } else {
1683 /* Unquoted quotes end quoted args. */
1684 quoted = 2;
1685 break;
1686 }
1687 }
1688 }
1689
1690 /* Quoted argument without a closing quote. */
1691 if (1 == quoted)
1692 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1693
1694 /* NUL-terminate this argument and move to the next one. */
1695 if (pairs)
1696 cp[-pairs] = '\0';
1697 if ('\0' != *cp) {
1698 *cp++ = '\0';
1699 while (' ' == *cp)
1700 cp++;
1701 }
1702 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1703 *cpp = cp;
1704
1705 if ('\0' == *cp && (white || ' ' == cp[-1]))
1706 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1707
1708 start = mandoc_strdup(start);
1709 if (newesc == 0)
1710 return start;
1711
1712 buf.buf = start;
1713 buf.sz = strlen(start) + 1;
1714 buf.next = NULL;
1715 if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1716 free(buf.buf);
1717 buf.buf = mandoc_strdup("");
1718 }
1719 return buf.buf;
1720 }
1721
1722
1723 /*
1724 * Process text streams.
1725 */
1726 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1727 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1728 {
1729 size_t sz;
1730 const char *start;
1731 char *p;
1732 int isz;
1733 enum mandoc_esc esc;
1734
1735 /* Spring the input line trap. */
1736
1737 if (roffit_lines == 1) {
1738 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1739 free(buf->buf);
1740 buf->buf = p;
1741 buf->sz = isz + 1;
1742 *offs = 0;
1743 free(roffit_macro);
1744 roffit_lines = 0;
1745 return ROFF_REPARSE;
1746 } else if (roffit_lines > 1)
1747 --roffit_lines;
1748
1749 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1750 if (roffce_lines < 1) {
1751 r->man->last = roffce_node;
1752 r->man->next = ROFF_NEXT_SIBLING;
1753 roffce_lines = 0;
1754 roffce_node = NULL;
1755 } else
1756 roffce_lines--;
1757 }
1758
1759 /* Convert all breakable hyphens into ASCII_HYPH. */
1760
1761 start = p = buf->buf + pos;
1762
1763 while (*p != '\0') {
1764 sz = strcspn(p, "-\\");
1765 p += sz;
1766
1767 if (*p == '\0')
1768 break;
1769
1770 if (*p == '\\') {
1771 /* Skip over escapes. */
1772 p++;
1773 esc = mandoc_escape((const char **)&p, NULL, NULL);
1774 if (esc == ESCAPE_ERROR)
1775 break;
1776 while (*p == '-')
1777 p++;
1778 continue;
1779 } else if (p == start) {
1780 p++;
1781 continue;
1782 }
1783
1784 if (isalpha((unsigned char)p[-1]) &&
1785 isalpha((unsigned char)p[1]))
1786 *p = ASCII_HYPH;
1787 p++;
1788 }
1789 return ROFF_CONT;
1790 }
1791
1792 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1793 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1794 {
1795 enum roff_tok t;
1796 int e;
1797 int pos; /* parse point */
1798 int spos; /* saved parse point for messages */
1799 int ppos; /* original offset in buf->buf */
1800 int ctl; /* macro line (boolean) */
1801
1802 ppos = pos = *offs;
1803
1804 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1805 (r->man->flags & ROFF_NOFILL) == 0 &&
1806 strchr(" .\\", buf->buf[pos]) == NULL &&
1807 buf->buf[pos] != r->control &&
1808 strcspn(buf->buf, " ") < 80)
1809 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1810 "%.20s...", buf->buf + pos);
1811
1812 /* Handle in-line equation delimiters. */
1813
1814 if (r->tbl == NULL &&
1815 r->last_eqn != NULL && r->last_eqn->delim &&
1816 (r->eqn == NULL || r->eqn_inline)) {
1817 e = roff_eqndelim(r, buf, pos);
1818 if (e == ROFF_REPARSE)
1819 return e;
1820 assert(e == ROFF_CONT);
1821 }
1822
1823 /* Handle comments and escape sequences. */
1824
1825 e = roff_parse_comment(r, buf, ln, pos, r->escape);
1826 if ((e & ROFF_MASK) == ROFF_IGN)
1827 return e;
1828 assert(e == ROFF_CONT);
1829
1830 e = roff_expand(r, buf, ln, pos, r->escape);
1831 if ((e & ROFF_MASK) == ROFF_IGN)
1832 return e;
1833 assert(e == ROFF_CONT);
1834
1835 ctl = roff_getcontrol(r, buf->buf, &pos);
1836
1837 /*
1838 * First, if a scope is open and we're not a macro, pass the
1839 * text through the macro's filter.
1840 * Equations process all content themselves.
1841 * Tables process almost all content themselves, but we want
1842 * to warn about macros before passing it there.
1843 */
1844
1845 if (r->last != NULL && ! ctl) {
1846 t = r->last->tok;
1847 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1848 if ((e & ROFF_MASK) == ROFF_IGN)
1849 return e;
1850 e &= ~ROFF_MASK;
1851 } else
1852 e = ROFF_IGN;
1853 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1854 eqn_read(r->eqn, buf->buf + ppos);
1855 return e;
1856 }
1857 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1858 tbl_read(r->tbl, ln, buf->buf, ppos);
1859 roff_addtbl(r->man, ln, r->tbl);
1860 return e;
1861 }
1862 if ( ! ctl) {
1863 r->options &= ~MPARSE_COMMENT;
1864 return roff_parsetext(r, buf, pos, offs) | e;
1865 }
1866
1867 /* Skip empty request lines. */
1868
1869 if (buf->buf[pos] == '"') {
1870 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1871 return ROFF_IGN;
1872 } else if (buf->buf[pos] == '\0')
1873 return ROFF_IGN;
1874
1875 /*
1876 * If a scope is open, go to the child handler for that macro,
1877 * as it may want to preprocess before doing anything with it.
1878 */
1879
1880 if (r->last) {
1881 t = r->last->tok;
1882 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1883 }
1884
1885 r->options &= ~MPARSE_COMMENT;
1886 spos = pos;
1887 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1888 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1889 }
1890
1891 /*
1892 * Handle a new request or macro.
1893 * May be called outside any scope or from inside a conditional scope.
1894 */
1895 static int
roff_req_or_macro(ROFF_ARGS)1896 roff_req_or_macro(ROFF_ARGS) {
1897
1898 /* For now, tables ignore most macros and some request. */
1899
1900 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1901 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1902 tok == ROFF_sp)) {
1903 mandoc_msg(MANDOCERR_TBLMACRO,
1904 ln, ppos, "%s", buf->buf + ppos);
1905 if (tok != TOKEN_NONE)
1906 return ROFF_IGN;
1907 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1908 pos++;
1909 while (buf->buf[pos] == ' ')
1910 pos++;
1911 tbl_read(r->tbl, ln, buf->buf, pos);
1912 roff_addtbl(r->man, ln, r->tbl);
1913 return ROFF_IGN;
1914 }
1915
1916 /* For now, let high level macros abort .ce mode. */
1917
1918 if (roffce_node != NULL &&
1919 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1920 tok == ROFF_TH || tok == ROFF_TS)) {
1921 r->man->last = roffce_node;
1922 r->man->next = ROFF_NEXT_SIBLING;
1923 roffce_lines = 0;
1924 roffce_node = NULL;
1925 }
1926
1927 /*
1928 * This is neither a roff request nor a user-defined macro.
1929 * Let the standard macro set parsers handle it.
1930 */
1931
1932 if (tok == TOKEN_NONE)
1933 return ROFF_CONT;
1934
1935 /* Execute a roff request or a user-defined macro. */
1936
1937 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1938 }
1939
1940 /*
1941 * Internal interface function to tell the roff parser that execution
1942 * of the current macro ended. This is required because macro
1943 * definitions usually do not end with a .return request.
1944 */
1945 void
roff_userret(struct roff * r)1946 roff_userret(struct roff *r)
1947 {
1948 struct mctx *ctx;
1949 int i;
1950
1951 assert(r->mstackpos >= 0);
1952 ctx = r->mstack + r->mstackpos;
1953 for (i = 0; i < ctx->argc; i++)
1954 free(ctx->argv[i]);
1955 ctx->argc = 0;
1956 r->mstackpos--;
1957 }
1958
1959 void
roff_endparse(struct roff * r)1960 roff_endparse(struct roff *r)
1961 {
1962 if (r->last != NULL)
1963 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1964 r->last->col, "%s", roff_name[r->last->tok]);
1965
1966 if (r->eqn != NULL) {
1967 mandoc_msg(MANDOCERR_BLK_NOEND,
1968 r->eqn->node->line, r->eqn->node->pos, "EQ");
1969 eqn_parse(r->eqn);
1970 r->eqn = NULL;
1971 }
1972
1973 if (r->tbl != NULL) {
1974 tbl_end(r->tbl, 1);
1975 r->tbl = NULL;
1976 }
1977 }
1978
1979 /*
1980 * Parse the request or macro name at buf[*pos].
1981 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1982 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1983 * As a side effect, set r->current_string to the definition or to NULL.
1984 */
1985 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1986 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1987 {
1988 char *cp;
1989 const char *mac;
1990 size_t maclen;
1991 int deftype;
1992 enum roff_tok t;
1993
1994 cp = buf + *pos;
1995
1996 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1997 return TOKEN_NONE;
1998
1999 mac = cp;
2000 maclen = roff_getname(&cp, ln, ppos);
2001
2002 deftype = ROFFDEF_USER | ROFFDEF_REN;
2003 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2004 switch (deftype) {
2005 case ROFFDEF_USER:
2006 t = ROFF_USERDEF;
2007 break;
2008 case ROFFDEF_REN:
2009 t = ROFF_RENAMED;
2010 break;
2011 default:
2012 t = roffhash_find(r->reqtab, mac, maclen);
2013 break;
2014 }
2015 if (t != TOKEN_NONE)
2016 *pos = cp - buf;
2017 else if (deftype == ROFFDEF_UNDEF) {
2018 /* Using an undefined macro defines it to be empty. */
2019 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2020 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2021 }
2022 return t;
2023 }
2024
2025 /* --- handling of request blocks ----------------------------------------- */
2026
2027 /*
2028 * Close a macro definition block or an "ignore" block.
2029 */
2030 static int
roff_cblock(ROFF_ARGS)2031 roff_cblock(ROFF_ARGS)
2032 {
2033 int rr;
2034
2035 if (r->last == NULL) {
2036 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2037 return ROFF_IGN;
2038 }
2039
2040 switch (r->last->tok) {
2041 case ROFF_am:
2042 case ROFF_ami:
2043 case ROFF_de:
2044 case ROFF_dei:
2045 case ROFF_ig:
2046 break;
2047 case ROFF_am1:
2048 case ROFF_de1:
2049 /* Remapped in roff_block(). */
2050 abort();
2051 default:
2052 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2053 return ROFF_IGN;
2054 }
2055
2056 roffnode_pop(r);
2057 roffnode_cleanscope(r);
2058
2059 /*
2060 * If a conditional block with braces is still open,
2061 * check for "\}" block end markers.
2062 */
2063
2064 if (r->last != NULL && r->last->endspan < 0) {
2065 rr = 1; /* If arguments follow "\}", warn about them. */
2066 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2067 }
2068
2069 if (buf->buf[pos] != '\0')
2070 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2071 ".. %s", buf->buf + pos);
2072
2073 return ROFF_IGN;
2074 }
2075
2076 /*
2077 * Pop all nodes ending at the end of the current input line.
2078 * Return the number of loops ended.
2079 */
2080 static int
roffnode_cleanscope(struct roff * r)2081 roffnode_cleanscope(struct roff *r)
2082 {
2083 int inloop;
2084
2085 inloop = 0;
2086 while (r->last != NULL && r->last->endspan > 0) {
2087 if (--r->last->endspan != 0)
2088 break;
2089 inloop += roffnode_pop(r);
2090 }
2091 return inloop;
2092 }
2093
2094 /*
2095 * Handle the closing "\}" of a conditional block.
2096 * Apart from generating warnings, this only pops nodes.
2097 * Return the number of loops ended.
2098 */
2099 static int
roff_ccond(struct roff * r,int ln,int ppos)2100 roff_ccond(struct roff *r, int ln, int ppos)
2101 {
2102 if (NULL == r->last) {
2103 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2104 return 0;
2105 }
2106
2107 switch (r->last->tok) {
2108 case ROFF_el:
2109 case ROFF_ie:
2110 case ROFF_if:
2111 case ROFF_while:
2112 break;
2113 default:
2114 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2115 return 0;
2116 }
2117
2118 if (r->last->endspan > -1) {
2119 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2120 return 0;
2121 }
2122
2123 return roffnode_pop(r) + roffnode_cleanscope(r);
2124 }
2125
2126 static int
roff_block(ROFF_ARGS)2127 roff_block(ROFF_ARGS)
2128 {
2129 const char *name, *value;
2130 char *call, *cp, *iname, *rname;
2131 size_t csz, namesz, rsz;
2132 int deftype;
2133
2134 /* Ignore groff compatibility mode for now. */
2135
2136 if (tok == ROFF_de1)
2137 tok = ROFF_de;
2138 else if (tok == ROFF_dei1)
2139 tok = ROFF_dei;
2140 else if (tok == ROFF_am1)
2141 tok = ROFF_am;
2142 else if (tok == ROFF_ami1)
2143 tok = ROFF_ami;
2144
2145 /* Parse the macro name argument. */
2146
2147 cp = buf->buf + pos;
2148 if (tok == ROFF_ig) {
2149 iname = NULL;
2150 namesz = 0;
2151 } else {
2152 iname = cp;
2153 namesz = roff_getname(&cp, ln, ppos);
2154 iname[namesz] = '\0';
2155 }
2156
2157 /* Resolve the macro name argument if it is indirect. */
2158
2159 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2160 deftype = ROFFDEF_USER;
2161 name = roff_getstrn(r, iname, namesz, &deftype);
2162 if (name == NULL) {
2163 mandoc_msg(MANDOCERR_STR_UNDEF,
2164 ln, (int)(iname - buf->buf),
2165 "%.*s", (int)namesz, iname);
2166 namesz = 0;
2167 } else
2168 namesz = strlen(name);
2169 } else
2170 name = iname;
2171
2172 if (namesz == 0 && tok != ROFF_ig) {
2173 mandoc_msg(MANDOCERR_REQ_EMPTY,
2174 ln, ppos, "%s", roff_name[tok]);
2175 return ROFF_IGN;
2176 }
2177
2178 roffnode_push(r, tok, name, ln, ppos);
2179
2180 /*
2181 * At the beginning of a `de' macro, clear the existing string
2182 * with the same name, if there is one. New content will be
2183 * appended from roff_block_text() in multiline mode.
2184 */
2185
2186 if (tok == ROFF_de || tok == ROFF_dei) {
2187 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2188 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2189 } else if (tok == ROFF_am || tok == ROFF_ami) {
2190 deftype = ROFFDEF_ANY;
2191 value = roff_getstrn(r, iname, namesz, &deftype);
2192 switch (deftype) { /* Before appending, ... */
2193 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2194 roff_setstrn(&r->strtab, name, namesz,
2195 value, strlen(value), 0);
2196 break;
2197 case ROFFDEF_REN: /* call original standard macro. */
2198 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2199 (int)strlen(value), value);
2200 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2201 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2202 free(call);
2203 break;
2204 case ROFFDEF_STD: /* rename and call standard macro. */
2205 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2206 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2207 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2208 (int)rsz, rname);
2209 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2210 free(call);
2211 free(rname);
2212 break;
2213 default:
2214 break;
2215 }
2216 }
2217
2218 if (*cp == '\0')
2219 return ROFF_IGN;
2220
2221 /* Get the custom end marker. */
2222
2223 iname = cp;
2224 namesz = roff_getname(&cp, ln, ppos);
2225
2226 /* Resolve the end marker if it is indirect. */
2227
2228 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2229 deftype = ROFFDEF_USER;
2230 name = roff_getstrn(r, iname, namesz, &deftype);
2231 if (name == NULL) {
2232 mandoc_msg(MANDOCERR_STR_UNDEF,
2233 ln, (int)(iname - buf->buf),
2234 "%.*s", (int)namesz, iname);
2235 namesz = 0;
2236 } else
2237 namesz = strlen(name);
2238 } else
2239 name = iname;
2240
2241 if (namesz)
2242 r->last->end = mandoc_strndup(name, namesz);
2243
2244 if (*cp != '\0')
2245 mandoc_msg(MANDOCERR_ARG_EXCESS,
2246 ln, pos, ".%s ... %s", roff_name[tok], cp);
2247
2248 return ROFF_IGN;
2249 }
2250
2251 static int
roff_block_sub(ROFF_ARGS)2252 roff_block_sub(ROFF_ARGS)
2253 {
2254 enum roff_tok t;
2255 int i, j;
2256
2257 /*
2258 * If a custom end marker is a user-defined or predefined macro
2259 * or a request, interpret it.
2260 */
2261
2262 if (r->last->end) {
2263 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2264 if (buf->buf[i] != r->last->end[j])
2265 break;
2266
2267 if (r->last->end[j] == '\0' &&
2268 (buf->buf[i] == '\0' ||
2269 buf->buf[i] == ' ' ||
2270 buf->buf[i] == '\t')) {
2271 roffnode_pop(r);
2272 roffnode_cleanscope(r);
2273
2274 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2275 i++;
2276
2277 pos = i;
2278 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2279 TOKEN_NONE)
2280 return ROFF_RERUN;
2281 return ROFF_IGN;
2282 }
2283 }
2284
2285 /* Handle the standard end marker. */
2286
2287 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2288 if (t == ROFF_cblock)
2289 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2290
2291 /* Not an end marker, so append the line to the block. */
2292
2293 if (tok != ROFF_ig)
2294 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2295 return ROFF_IGN;
2296 }
2297
2298 static int
roff_block_text(ROFF_ARGS)2299 roff_block_text(ROFF_ARGS)
2300 {
2301
2302 if (tok != ROFF_ig)
2303 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2304
2305 return ROFF_IGN;
2306 }
2307
2308 /*
2309 * Check for a closing "\}" and handle it.
2310 * In this function, the final "int *offs" argument is used for
2311 * different purposes than elsewhere:
2312 * Input: *offs == 0: caller wants to discard arguments following \}
2313 * *offs == 1: caller wants to preserve text following \}
2314 * Output: *offs = 0: tell caller to discard input line
2315 * *offs = 1: tell caller to use input line
2316 */
2317 static int
roff_cond_checkend(ROFF_ARGS)2318 roff_cond_checkend(ROFF_ARGS)
2319 {
2320 char *ep;
2321 int endloop, irc, rr;
2322
2323 irc = ROFF_IGN;
2324 rr = r->last->rule;
2325 endloop = tok != ROFF_while ? ROFF_IGN :
2326 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2327 if (roffnode_cleanscope(r))
2328 irc |= endloop;
2329
2330 /*
2331 * If "\}" occurs on a macro line without a preceding macro or
2332 * a text line contains nothing else, drop the line completely.
2333 */
2334
2335 ep = buf->buf + pos;
2336 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2337 rr = 0;
2338
2339 /*
2340 * The closing delimiter "\}" rewinds the conditional scope
2341 * but is otherwise ignored when interpreting the line.
2342 */
2343
2344 while ((ep = strchr(ep, '\\')) != NULL) {
2345 switch (ep[1]) {
2346 case '}':
2347 if (ep[2] == '\0')
2348 ep[0] = '\0';
2349 else if (rr)
2350 ep[1] = '&';
2351 else
2352 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2353 if (roff_ccond(r, ln, ep - buf->buf))
2354 irc |= endloop;
2355 break;
2356 case '\0':
2357 ++ep;
2358 break;
2359 default:
2360 ep += 2;
2361 break;
2362 }
2363 }
2364 *offs = rr;
2365 return irc;
2366 }
2367
2368 /*
2369 * Parse and process a request or macro line in conditional scope.
2370 */
2371 static int
roff_cond_sub(ROFF_ARGS)2372 roff_cond_sub(ROFF_ARGS)
2373 {
2374 struct roffnode *bl;
2375 int irc, rr, spos;
2376 enum roff_tok t;
2377
2378 rr = 0; /* If arguments follow "\}", skip them. */
2379 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2380 spos = pos;
2381 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2382
2383 /*
2384 * Handle requests and macros if the conditional evaluated
2385 * to true or if they are structurally required.
2386 * The .break request is always handled specially.
2387 */
2388
2389 if (t == ROFF_break) {
2390 if (irc & ROFF_LOOPMASK)
2391 irc = ROFF_IGN | ROFF_LOOPEXIT;
2392 else if (rr) {
2393 for (bl = r->last; bl != NULL; bl = bl->parent) {
2394 bl->rule = 0;
2395 if (bl->tok == ROFF_while)
2396 break;
2397 }
2398 }
2399 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2400 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2401 if (irc & ROFF_WHILE)
2402 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2403 }
2404 return irc;
2405 }
2406
2407 /*
2408 * Parse and process a text line in conditional scope.
2409 */
2410 static int
roff_cond_text(ROFF_ARGS)2411 roff_cond_text(ROFF_ARGS)
2412 {
2413 int irc, rr;
2414
2415 rr = 1; /* If arguments follow "\}", preserve them. */
2416 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2417 if (rr)
2418 irc |= ROFF_CONT;
2419 return irc;
2420 }
2421
2422 /* --- handling of numeric and conditional expressions -------------------- */
2423
2424 /*
2425 * Parse a single signed decimal number. Stop at the first non-digit.
2426 * If there is at least one digit, return success and advance the
2427 * parse point, else return failure and let the parse point unchanged.
2428 * Ignore overflows, treat them just like the C language.
2429 */
2430 static int
roff_getnum(const char * v,int * pos,int * res,char unit,int skipspace)2431 roff_getnum(const char *v, int *pos, int *res, char unit, int skipspace)
2432 {
2433 double frac, myres;
2434 int n, p;
2435
2436 p = *pos;
2437 n = v[p] == '-';
2438 if (n || v[p] == '+')
2439 p++;
2440
2441 if (skipspace)
2442 while (isspace((unsigned char)v[p]))
2443 p++;
2444
2445 for (myres = 0.0; isdigit((unsigned char)v[p]); p++)
2446 myres = myres * 10.0 + (v[p] - '0');
2447 if (v[p] == '.')
2448 for (frac = 0.1; isdigit((unsigned char)v[++p]); frac *= 0.1)
2449 myres += frac * (v[p] - '0');
2450
2451 if (p == *pos + n)
2452 return 0;
2453
2454 if (n)
2455 myres *= -1.0;
2456
2457 /* Each number may be followed by one optional scaling unit. */
2458
2459 if (v[p] != '\0' && strchr("ficvPmnpuM", v[p]) != NULL) {
2460 if (unit != '\0')
2461 unit = v[p];
2462 p++;
2463 }
2464
2465 switch (unit) {
2466 case 'f':
2467 myres *= 65536.0;
2468 break;
2469 case 'i':
2470 myres *= 240.0;
2471 break;
2472 case 'c':
2473 myres *= 24000.0;
2474 myres /= 254.0;
2475 break;
2476 case 'v':
2477 case 'P':
2478 myres *= 40.0;
2479 break;
2480 case 'm':
2481 case 'n':
2482 myres *= 24.0;
2483 break;
2484 case 'p':
2485 myres *= 40.0;
2486 myres /= 12.0;
2487 break;
2488 case 'u':
2489 break;
2490 case 'M':
2491 myres *= 24.0;
2492 myres /= 100.0;
2493 break;
2494 default:
2495 break;
2496 }
2497 if (res != NULL)
2498 *res = myres;
2499 *pos = p;
2500 return 1;
2501 }
2502
2503 /*
2504 * Evaluate a string comparison condition.
2505 * The first character is the delimiter.
2506 * Succeed if the string up to its second occurrence
2507 * matches the string up to its third occurrence.
2508 * Advance the cursor after the third occurrence
2509 * or lacking that, to the end of the line.
2510 */
2511 static int
roff_evalstrcond(const char * v,int * pos)2512 roff_evalstrcond(const char *v, int *pos)
2513 {
2514 const char *s1, *s2, *s3;
2515 int match;
2516
2517 match = 0;
2518 s1 = v + *pos; /* initial delimiter */
2519 s2 = s1 + 1; /* for scanning the first string */
2520 s3 = strchr(s2, *s1); /* for scanning the second string */
2521
2522 if (NULL == s3) /* found no middle delimiter */
2523 goto out;
2524
2525 while ('\0' != *++s3) {
2526 if (*s2 != *s3) { /* mismatch */
2527 s3 = strchr(s3, *s1);
2528 break;
2529 }
2530 if (*s3 == *s1) { /* found the final delimiter */
2531 match = 1;
2532 break;
2533 }
2534 s2++;
2535 }
2536
2537 out:
2538 if (NULL == s3)
2539 s3 = strchr(s2, '\0');
2540 else if (*s3 != '\0')
2541 s3++;
2542 *pos = s3 - v;
2543 return match;
2544 }
2545
2546 /*
2547 * Evaluate an optionally negated single character, numerical,
2548 * or string condition.
2549 */
2550 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2551 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2552 {
2553 const char *start, *end;
2554 char *cp, *name;
2555 size_t sz;
2556 int deftype, len, number, savepos, istrue, wanttrue;
2557
2558 if ('!' == v[*pos]) {
2559 wanttrue = 0;
2560 (*pos)++;
2561 } else
2562 wanttrue = 1;
2563
2564 switch (v[*pos]) {
2565 case '\0':
2566 return 0;
2567 case 'n':
2568 case 'o':
2569 (*pos)++;
2570 return wanttrue;
2571 case 'e':
2572 case 't':
2573 case 'v':
2574 (*pos)++;
2575 return !wanttrue;
2576 case 'c':
2577 do {
2578 (*pos)++;
2579 } while (v[*pos] == ' ');
2580
2581 /*
2582 * Quirk for groff compatibility:
2583 * The horizontal tab is neither available nor unavailable.
2584 */
2585
2586 if (v[*pos] == '\t') {
2587 (*pos)++;
2588 return 0;
2589 }
2590
2591 /* Printable ASCII characters are available. */
2592
2593 if (v[*pos] != '\\') {
2594 (*pos)++;
2595 return wanttrue;
2596 }
2597
2598 end = v + ++*pos;
2599 switch (mandoc_escape(&end, &start, &len)) {
2600 case ESCAPE_SPECIAL:
2601 istrue = mchars_spec2cp(start, len) != -1;
2602 break;
2603 case ESCAPE_UNICODE:
2604 istrue = 1;
2605 break;
2606 case ESCAPE_NUMBERED:
2607 istrue = mchars_num2char(start, len) != -1;
2608 break;
2609 default:
2610 istrue = !wanttrue;
2611 break;
2612 }
2613 *pos = end - v;
2614 return istrue == wanttrue;
2615 case 'd':
2616 case 'r':
2617 cp = v + *pos + 1;
2618 while (*cp == ' ')
2619 cp++;
2620 name = cp;
2621 sz = roff_getname(&cp, ln, cp - v);
2622 if (sz == 0)
2623 istrue = 0;
2624 else if (v[*pos] == 'r')
2625 istrue = roff_hasregn(r, name, sz);
2626 else {
2627 deftype = ROFFDEF_ANY;
2628 roff_getstrn(r, name, sz, &deftype);
2629 istrue = !!deftype;
2630 }
2631 *pos = (name + sz) - v;
2632 return istrue == wanttrue;
2633 default:
2634 break;
2635 }
2636
2637 savepos = *pos;
2638 if (roff_evalnum(ln, v, pos, &number, 'u', 0))
2639 return (number > 0) == wanttrue;
2640 else if (*pos == savepos)
2641 return roff_evalstrcond(v, pos) == wanttrue;
2642 else
2643 return 0;
2644 }
2645
2646 static int
roff_line_ignore(ROFF_ARGS)2647 roff_line_ignore(ROFF_ARGS)
2648 {
2649
2650 return ROFF_IGN;
2651 }
2652
2653 static int
roff_insec(ROFF_ARGS)2654 roff_insec(ROFF_ARGS)
2655 {
2656
2657 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2658 return ROFF_IGN;
2659 }
2660
2661 static int
roff_unsupp(ROFF_ARGS)2662 roff_unsupp(ROFF_ARGS)
2663 {
2664
2665 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2666 return ROFF_IGN;
2667 }
2668
2669 static int
roff_cond(ROFF_ARGS)2670 roff_cond(ROFF_ARGS)
2671 {
2672 int irc;
2673
2674 roffnode_push(r, tok, NULL, ln, ppos);
2675
2676 /*
2677 * An `.el' has no conditional body: it will consume the value
2678 * of the current rstack entry set in prior `ie' calls or
2679 * defaults to DENY.
2680 *
2681 * If we're not an `el', however, then evaluate the conditional.
2682 */
2683
2684 r->last->rule = tok == ROFF_el ?
2685 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2686 roff_evalcond(r, ln, buf->buf, &pos);
2687
2688 /*
2689 * An if-else will put the NEGATION of the current evaluated
2690 * conditional into the stack of rules.
2691 */
2692
2693 if (tok == ROFF_ie) {
2694 if (r->rstackpos + 1 == r->rstacksz) {
2695 r->rstacksz += 16;
2696 r->rstack = mandoc_reallocarray(r->rstack,
2697 r->rstacksz, sizeof(int));
2698 }
2699 r->rstack[++r->rstackpos] = !r->last->rule;
2700 }
2701
2702 /* If the parent has false as its rule, then so do we. */
2703
2704 if (r->last->parent && !r->last->parent->rule)
2705 r->last->rule = 0;
2706
2707 /*
2708 * Determine scope.
2709 * If there is nothing on the line after the conditional,
2710 * not even whitespace, use next-line scope.
2711 * Except that .while does not support next-line scope.
2712 */
2713
2714 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2715 r->last->endspan = 2;
2716 goto out;
2717 }
2718
2719 while (buf->buf[pos] == ' ')
2720 pos++;
2721
2722 /* An opening brace requests multiline scope. */
2723
2724 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2725 r->last->endspan = -1;
2726 pos += 2;
2727 while (buf->buf[pos] == ' ')
2728 pos++;
2729 goto out;
2730 }
2731
2732 /*
2733 * Anything else following the conditional causes
2734 * single-line scope. Warn if the scope contains
2735 * nothing but trailing whitespace.
2736 */
2737
2738 if (buf->buf[pos] == '\0')
2739 mandoc_msg(MANDOCERR_COND_EMPTY,
2740 ln, ppos, "%s", roff_name[tok]);
2741
2742 r->last->endspan = 1;
2743
2744 out:
2745 *offs = pos;
2746 irc = ROFF_RERUN;
2747 if (tok == ROFF_while)
2748 irc |= ROFF_WHILE;
2749 return irc;
2750 }
2751
2752 static int
roff_ds(ROFF_ARGS)2753 roff_ds(ROFF_ARGS)
2754 {
2755 char *string;
2756 const char *name;
2757 size_t namesz;
2758
2759 /* Ignore groff compatibility mode for now. */
2760
2761 if (tok == ROFF_ds1)
2762 tok = ROFF_ds;
2763 else if (tok == ROFF_as1)
2764 tok = ROFF_as;
2765
2766 /*
2767 * The first word is the name of the string.
2768 * If it is empty or terminated by an escape sequence,
2769 * abort the `ds' request without defining anything.
2770 */
2771
2772 name = string = buf->buf + pos;
2773 if (*name == '\0')
2774 return ROFF_IGN;
2775
2776 namesz = roff_getname(&string, ln, pos);
2777 switch (name[namesz]) {
2778 case '\\':
2779 return ROFF_IGN;
2780 case '\t':
2781 string = buf->buf + pos + namesz;
2782 break;
2783 default:
2784 break;
2785 }
2786
2787 /* Read past the initial double-quote, if any. */
2788 if (*string == '"')
2789 string++;
2790
2791 /* The rest is the value. */
2792 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2793 ROFF_as == tok);
2794 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2795 return ROFF_IGN;
2796 }
2797
2798 /*
2799 * Parse a single operator, one or two characters long.
2800 * If the operator is recognized, return success and advance the
2801 * parse point, else return failure and let the parse point unchanged.
2802 */
2803 static int
roff_getop(const char * v,int * pos,char * res)2804 roff_getop(const char *v, int *pos, char *res)
2805 {
2806
2807 *res = v[*pos];
2808
2809 switch (*res) {
2810 case '+':
2811 case '-':
2812 case '*':
2813 case '/':
2814 case '%':
2815 case '&':
2816 case ':':
2817 break;
2818 case '<':
2819 switch (v[*pos + 1]) {
2820 case '=':
2821 *res = 'l';
2822 (*pos)++;
2823 break;
2824 case '>':
2825 *res = '!';
2826 (*pos)++;
2827 break;
2828 case '?':
2829 *res = 'i';
2830 (*pos)++;
2831 break;
2832 default:
2833 break;
2834 }
2835 break;
2836 case '>':
2837 switch (v[*pos + 1]) {
2838 case '=':
2839 *res = 'g';
2840 (*pos)++;
2841 break;
2842 case '?':
2843 *res = 'a';
2844 (*pos)++;
2845 break;
2846 default:
2847 break;
2848 }
2849 break;
2850 case '=':
2851 if ('=' == v[*pos + 1])
2852 (*pos)++;
2853 break;
2854 default:
2855 return 0;
2856 }
2857 (*pos)++;
2858
2859 return *res;
2860 }
2861
2862 /*
2863 * Evaluate either a parenthesized numeric expression
2864 * or a single signed integer number.
2865 */
2866 static int
roff_evalpar(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2867 roff_evalpar(int ln, const char *v, int *pos, int *res, char unit,
2868 int skipspace)
2869 {
2870
2871 if ('(' != v[*pos])
2872 return roff_getnum(v, pos, res, unit, skipspace);
2873
2874 (*pos)++;
2875 if ( ! roff_evalnum(ln, v, pos, res, unit, 1))
2876 return 0;
2877
2878 /*
2879 * Omission of the closing parenthesis
2880 * is an error in validation mode,
2881 * but ignored in evaluation mode.
2882 */
2883
2884 if (')' == v[*pos])
2885 (*pos)++;
2886 else if (NULL == res)
2887 return 0;
2888
2889 return 1;
2890 }
2891
2892 /*
2893 * Evaluate a complete numeric expression.
2894 * Proceed left to right, there is no concept of precedence.
2895 */
2896 int
roff_evalnum(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2897 roff_evalnum(int ln, const char *v, int *pos, int *res, char unit,
2898 int skipspace)
2899 {
2900 int mypos, operand2;
2901 char operator;
2902
2903 if (NULL == pos) {
2904 mypos = 0;
2905 pos = &mypos;
2906 }
2907
2908 if (skipspace)
2909 while (isspace((unsigned char)v[*pos]))
2910 (*pos)++;
2911
2912 if ( ! roff_evalpar(ln, v, pos, res, unit, skipspace))
2913 return 0;
2914
2915 while (1) {
2916 if (skipspace)
2917 while (isspace((unsigned char)v[*pos]))
2918 (*pos)++;
2919
2920 if ( ! roff_getop(v, pos, &operator))
2921 break;
2922
2923 if (skipspace)
2924 while (isspace((unsigned char)v[*pos]))
2925 (*pos)++;
2926
2927 if ( ! roff_evalpar(ln, v, pos, &operand2, unit, skipspace))
2928 return 0;
2929
2930 if (skipspace)
2931 while (isspace((unsigned char)v[*pos]))
2932 (*pos)++;
2933
2934 if (NULL == res)
2935 continue;
2936
2937 switch (operator) {
2938 case '+':
2939 *res += operand2;
2940 break;
2941 case '-':
2942 *res -= operand2;
2943 break;
2944 case '*':
2945 *res *= operand2;
2946 break;
2947 case '/':
2948 if (operand2 == 0) {
2949 mandoc_msg(MANDOCERR_DIVZERO,
2950 ln, *pos, "%s", v);
2951 *res = 0;
2952 break;
2953 }
2954 *res /= operand2;
2955 break;
2956 case '%':
2957 if (operand2 == 0) {
2958 mandoc_msg(MANDOCERR_DIVZERO,
2959 ln, *pos, "%s", v);
2960 *res = 0;
2961 break;
2962 }
2963 *res %= operand2;
2964 break;
2965 case '<':
2966 *res = *res < operand2;
2967 break;
2968 case '>':
2969 *res = *res > operand2;
2970 break;
2971 case 'l':
2972 *res = *res <= operand2;
2973 break;
2974 case 'g':
2975 *res = *res >= operand2;
2976 break;
2977 case '=':
2978 *res = *res == operand2;
2979 break;
2980 case '!':
2981 *res = *res != operand2;
2982 break;
2983 case '&':
2984 *res = *res && operand2;
2985 break;
2986 case ':':
2987 *res = *res || operand2;
2988 break;
2989 case 'i':
2990 if (operand2 < *res)
2991 *res = operand2;
2992 break;
2993 case 'a':
2994 if (operand2 > *res)
2995 *res = operand2;
2996 break;
2997 default:
2998 abort();
2999 }
3000 }
3001 return 1;
3002 }
3003
3004 /* --- register management ------------------------------------------------ */
3005
3006 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3007 roff_setreg(struct roff *r, const char *name, int val, char sign)
3008 {
3009 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3010 }
3011
3012 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3013 roff_setregn(struct roff *r, const char *name, size_t len,
3014 int val, char sign, int step)
3015 {
3016 struct roffreg *reg;
3017
3018 /* Search for an existing register with the same name. */
3019 reg = r->regtab;
3020
3021 while (reg != NULL && (reg->key.sz != len ||
3022 strncmp(reg->key.p, name, len) != 0))
3023 reg = reg->next;
3024
3025 if (NULL == reg) {
3026 /* Create a new register. */
3027 reg = mandoc_malloc(sizeof(struct roffreg));
3028 reg->key.p = mandoc_strndup(name, len);
3029 reg->key.sz = len;
3030 reg->val = 0;
3031 reg->step = 0;
3032 reg->next = r->regtab;
3033 r->regtab = reg;
3034 }
3035
3036 if ('+' == sign)
3037 reg->val += val;
3038 else if ('-' == sign)
3039 reg->val -= val;
3040 else
3041 reg->val = val;
3042 if (step != INT_MIN)
3043 reg->step = step;
3044 }
3045
3046 /*
3047 * Handle some predefined read-only number registers.
3048 * For now, return -1 if the requested register is not predefined;
3049 * in case a predefined read-only register having the value -1
3050 * were to turn up, another special value would have to be chosen.
3051 */
3052 static int
roff_getregro(const struct roff * r,const char * name)3053 roff_getregro(const struct roff *r, const char *name)
3054 {
3055
3056 switch (*name) {
3057 case '$': /* Number of arguments of the last macro evaluated. */
3058 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3059 case 'A': /* ASCII approximation mode is always off. */
3060 return 0;
3061 case 'g': /* Groff compatibility mode is always on. */
3062 return 1;
3063 case 'H': /* Fixed horizontal resolution. */
3064 return 24;
3065 case 'j': /* Always adjust left margin only. */
3066 return 0;
3067 case 'l': /* Fixed line width for DocBook. */
3068 return 78 * 24;
3069 case 'T': /* Some output device is always defined. */
3070 return 1;
3071 case 'V': /* Fixed vertical resolution. */
3072 return 40;
3073 default:
3074 return -1;
3075 }
3076 }
3077
3078 int
roff_getreg(struct roff * r,const char * name)3079 roff_getreg(struct roff *r, const char *name)
3080 {
3081 return roff_getregn(r, name, strlen(name), '\0');
3082 }
3083
3084 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3085 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3086 {
3087 struct roffreg *reg;
3088 int val;
3089
3090 if ('.' == name[0] && 2 == len) {
3091 val = roff_getregro(r, name + 1);
3092 if (-1 != val)
3093 return val;
3094 }
3095
3096 for (reg = r->regtab; reg; reg = reg->next) {
3097 if (len == reg->key.sz &&
3098 0 == strncmp(name, reg->key.p, len)) {
3099 switch (sign) {
3100 case '+':
3101 reg->val += reg->step;
3102 break;
3103 case '-':
3104 reg->val -= reg->step;
3105 break;
3106 default:
3107 break;
3108 }
3109 return reg->val;
3110 }
3111 }
3112
3113 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3114 return 0;
3115 }
3116
3117 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3118 roff_hasregn(const struct roff *r, const char *name, size_t len)
3119 {
3120 struct roffreg *reg;
3121 int val;
3122
3123 if ('.' == name[0] && 2 == len) {
3124 val = roff_getregro(r, name + 1);
3125 if (-1 != val)
3126 return 1;
3127 }
3128
3129 for (reg = r->regtab; reg; reg = reg->next)
3130 if (len == reg->key.sz &&
3131 0 == strncmp(name, reg->key.p, len))
3132 return 1;
3133
3134 return 0;
3135 }
3136
3137 static void
roff_freereg(struct roffreg * reg)3138 roff_freereg(struct roffreg *reg)
3139 {
3140 struct roffreg *old_reg;
3141
3142 while (NULL != reg) {
3143 free(reg->key.p);
3144 old_reg = reg;
3145 reg = reg->next;
3146 free(old_reg);
3147 }
3148 }
3149
3150 static int
roff_nr(ROFF_ARGS)3151 roff_nr(ROFF_ARGS)
3152 {
3153 char *key, *val, *step;
3154 size_t keysz;
3155 int iv, is, len;
3156 char sign;
3157
3158 key = val = buf->buf + pos;
3159 if (*key == '\0')
3160 return ROFF_IGN;
3161
3162 keysz = roff_getname(&val, ln, pos);
3163 if (key[keysz] == '\\' || key[keysz] == '\t')
3164 return ROFF_IGN;
3165
3166 sign = *val;
3167 if (sign == '+' || sign == '-')
3168 val++;
3169
3170 len = 0;
3171 if (roff_evalnum(ln, val, &len, &iv, 'u', 0) == 0)
3172 return ROFF_IGN;
3173
3174 step = val + len;
3175 while (isspace((unsigned char)*step))
3176 step++;
3177 if (roff_evalnum(ln, step, NULL, &is, '\0', 0) == 0)
3178 is = INT_MIN;
3179
3180 roff_setregn(r, key, keysz, iv, sign, is);
3181 return ROFF_IGN;
3182 }
3183
3184 static int
roff_rr(ROFF_ARGS)3185 roff_rr(ROFF_ARGS)
3186 {
3187 struct roffreg *reg, **prev;
3188 char *name, *cp;
3189 size_t namesz;
3190
3191 name = cp = buf->buf + pos;
3192 if (*name == '\0')
3193 return ROFF_IGN;
3194 namesz = roff_getname(&cp, ln, pos);
3195 name[namesz] = '\0';
3196
3197 prev = &r->regtab;
3198 while (1) {
3199 reg = *prev;
3200 if (reg == NULL || !strcmp(name, reg->key.p))
3201 break;
3202 prev = ®->next;
3203 }
3204 if (reg != NULL) {
3205 *prev = reg->next;
3206 free(reg->key.p);
3207 free(reg);
3208 }
3209 return ROFF_IGN;
3210 }
3211
3212 /* --- handler functions for roff requests -------------------------------- */
3213
3214 static int
roff_rm(ROFF_ARGS)3215 roff_rm(ROFF_ARGS)
3216 {
3217 const char *name;
3218 char *cp;
3219 size_t namesz;
3220
3221 cp = buf->buf + pos;
3222 while (*cp != '\0') {
3223 name = cp;
3224 namesz = roff_getname(&cp, ln, (int)(cp - buf->buf));
3225 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3226 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3227 if (name[namesz] == '\\' || name[namesz] == '\t')
3228 break;
3229 }
3230 return ROFF_IGN;
3231 }
3232
3233 static int
roff_it(ROFF_ARGS)3234 roff_it(ROFF_ARGS)
3235 {
3236 int iv;
3237
3238 /* Parse the number of lines. */
3239
3240 if ( ! roff_evalnum(ln, buf->buf, &pos, &iv, '\0', 0)) {
3241 mandoc_msg(MANDOCERR_IT_NONUM,
3242 ln, ppos, "%s", buf->buf + 1);
3243 return ROFF_IGN;
3244 }
3245
3246 while (isspace((unsigned char)buf->buf[pos]))
3247 pos++;
3248
3249 /*
3250 * Arm the input line trap.
3251 * Special-casing "an-trap" is an ugly workaround to cope
3252 * with DocBook stupidly fiddling with man(7) internals.
3253 */
3254
3255 roffit_lines = iv;
3256 roffit_macro = mandoc_strdup(iv != 1 ||
3257 strcmp(buf->buf + pos, "an-trap") ?
3258 buf->buf + pos : "br");
3259 return ROFF_IGN;
3260 }
3261
3262 static int
roff_Dd(ROFF_ARGS)3263 roff_Dd(ROFF_ARGS)
3264 {
3265 int mask;
3266 enum roff_tok t, te;
3267
3268 switch (tok) {
3269 case ROFF_Dd:
3270 tok = MDOC_Dd;
3271 te = MDOC_MAX;
3272 if (r->format == 0)
3273 r->format = MPARSE_MDOC;
3274 mask = MPARSE_MDOC | MPARSE_QUICK;
3275 break;
3276 case ROFF_TH:
3277 tok = MAN_TH;
3278 te = MAN_MAX;
3279 if (r->format == 0)
3280 r->format = MPARSE_MAN;
3281 mask = MPARSE_QUICK;
3282 break;
3283 default:
3284 abort();
3285 }
3286 if ((r->options & mask) == 0)
3287 for (t = tok; t < te; t++)
3288 roff_setstr(r, roff_name[t], NULL, 0);
3289 return ROFF_CONT;
3290 }
3291
3292 static int
roff_TE(ROFF_ARGS)3293 roff_TE(ROFF_ARGS)
3294 {
3295 r->man->flags &= ~ROFF_NONOFILL;
3296 if (r->tbl == NULL) {
3297 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3298 return ROFF_IGN;
3299 }
3300 if (tbl_end(r->tbl, 0) == 0) {
3301 r->tbl = NULL;
3302 free(buf->buf);
3303 buf->buf = mandoc_strdup(".sp");
3304 buf->sz = 4;
3305 *offs = 0;
3306 return ROFF_REPARSE;
3307 }
3308 r->tbl = NULL;
3309 return ROFF_IGN;
3310 }
3311
3312 static int
roff_T_(ROFF_ARGS)3313 roff_T_(ROFF_ARGS)
3314 {
3315
3316 if (NULL == r->tbl)
3317 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3318 else
3319 tbl_restart(ln, ppos, r->tbl);
3320
3321 return ROFF_IGN;
3322 }
3323
3324 /*
3325 * Handle in-line equation delimiters.
3326 */
3327 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3328 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3329 {
3330 char *cp1, *cp2;
3331 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3332
3333 /*
3334 * Outside equations, look for an opening delimiter.
3335 * If we are inside an equation, we already know it is
3336 * in-line, or this function wouldn't have been called;
3337 * so look for a closing delimiter.
3338 */
3339
3340 cp1 = buf->buf + pos;
3341 cp2 = strchr(cp1, r->eqn == NULL ?
3342 r->last_eqn->odelim : r->last_eqn->cdelim);
3343 if (cp2 == NULL)
3344 return ROFF_CONT;
3345
3346 *cp2++ = '\0';
3347 bef_pr = bef_nl = aft_nl = aft_pr = "";
3348
3349 /* Handle preceding text, protecting whitespace. */
3350
3351 if (*buf->buf != '\0') {
3352 if (r->eqn == NULL)
3353 bef_pr = "\\&";
3354 bef_nl = "\n";
3355 }
3356
3357 /*
3358 * Prepare replacing the delimiter with an equation macro
3359 * and drop leading white space from the equation.
3360 */
3361
3362 if (r->eqn == NULL) {
3363 while (*cp2 == ' ')
3364 cp2++;
3365 mac = ".EQ";
3366 } else
3367 mac = ".EN";
3368
3369 /* Handle following text, protecting whitespace. */
3370
3371 if (*cp2 != '\0') {
3372 aft_nl = "\n";
3373 if (r->eqn != NULL)
3374 aft_pr = "\\&";
3375 }
3376
3377 /* Do the actual replacement. */
3378
3379 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3380 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3381 free(buf->buf);
3382 buf->buf = cp1;
3383
3384 /* Toggle the in-line state of the eqn subsystem. */
3385
3386 r->eqn_inline = r->eqn == NULL;
3387 return ROFF_REPARSE;
3388 }
3389
3390 static int
roff_EQ(ROFF_ARGS)3391 roff_EQ(ROFF_ARGS)
3392 {
3393 struct roff_node *n;
3394
3395 if (r->man->meta.macroset == MACROSET_MAN)
3396 man_breakscope(r->man, ROFF_EQ);
3397 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3398 if (ln > r->man->last->line)
3399 n->flags |= NODE_LINE;
3400 n->eqn = eqn_box_new();
3401 roff_node_append(r->man, n);
3402 r->man->next = ROFF_NEXT_SIBLING;
3403
3404 assert(r->eqn == NULL);
3405 if (r->last_eqn == NULL)
3406 r->last_eqn = eqn_alloc();
3407 else
3408 eqn_reset(r->last_eqn);
3409 r->eqn = r->last_eqn;
3410 r->eqn->node = n;
3411
3412 if (buf->buf[pos] != '\0')
3413 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3414 ".EQ %s", buf->buf + pos);
3415
3416 return ROFF_IGN;
3417 }
3418
3419 static int
roff_EN(ROFF_ARGS)3420 roff_EN(ROFF_ARGS)
3421 {
3422 if (r->eqn != NULL) {
3423 eqn_parse(r->eqn);
3424 r->eqn = NULL;
3425 } else
3426 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3427 if (buf->buf[pos] != '\0')
3428 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3429 "EN %s", buf->buf + pos);
3430 return ROFF_IGN;
3431 }
3432
3433 static int
roff_TS(ROFF_ARGS)3434 roff_TS(ROFF_ARGS)
3435 {
3436 if (r->tbl != NULL) {
3437 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3438 tbl_end(r->tbl, 0);
3439 }
3440 r->man->flags |= ROFF_NONOFILL;
3441 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3442 if (r->last_tbl == NULL)
3443 r->first_tbl = r->tbl;
3444 r->last_tbl = r->tbl;
3445 return ROFF_IGN;
3446 }
3447
3448 static int
roff_noarg(ROFF_ARGS)3449 roff_noarg(ROFF_ARGS)
3450 {
3451 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3452 man_breakscope(r->man, tok);
3453 if (tok == ROFF_brp)
3454 tok = ROFF_br;
3455 roff_elem_alloc(r->man, ln, ppos, tok);
3456 if (buf->buf[pos] != '\0')
3457 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3458 "%s %s", roff_name[tok], buf->buf + pos);
3459 if (tok == ROFF_nf)
3460 r->man->flags |= ROFF_NOFILL;
3461 else if (tok == ROFF_fi)
3462 r->man->flags &= ~ROFF_NOFILL;
3463 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3464 r->man->next = ROFF_NEXT_SIBLING;
3465 return ROFF_IGN;
3466 }
3467
3468 static int
roff_onearg(ROFF_ARGS)3469 roff_onearg(ROFF_ARGS)
3470 {
3471 struct roff_node *n;
3472 char *cp;
3473 int npos;
3474
3475 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3476 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3477 tok == ROFF_ti))
3478 man_breakscope(r->man, tok);
3479
3480 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3481 r->man->last = roffce_node;
3482 r->man->next = ROFF_NEXT_SIBLING;
3483 }
3484
3485 roff_elem_alloc(r->man, ln, ppos, tok);
3486 n = r->man->last;
3487
3488 cp = buf->buf + pos;
3489 if (*cp != '\0') {
3490 while (*cp != '\0' && *cp != ' ')
3491 cp++;
3492 while (*cp == ' ')
3493 *cp++ = '\0';
3494 if (*cp != '\0')
3495 mandoc_msg(MANDOCERR_ARG_EXCESS,
3496 ln, (int)(cp - buf->buf),
3497 "%s ... %s", roff_name[tok], cp);
3498 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3499 }
3500
3501 if (tok == ROFF_ce || tok == ROFF_rj) {
3502 if (r->man->last->type == ROFFT_ELEM) {
3503 roff_word_alloc(r->man, ln, pos, "1");
3504 r->man->last->flags |= NODE_NOSRC;
3505 }
3506 npos = 0;
3507 if (roff_evalnum(ln, r->man->last->string, &npos,
3508 &roffce_lines, '\0', 0) == 0) {
3509 mandoc_msg(MANDOCERR_CE_NONUM,
3510 ln, pos, "ce %s", buf->buf + pos);
3511 roffce_lines = 1;
3512 }
3513 if (roffce_lines < 1) {
3514 r->man->last = r->man->last->parent;
3515 roffce_node = NULL;
3516 roffce_lines = 0;
3517 } else
3518 roffce_node = r->man->last->parent;
3519 } else {
3520 n->flags |= NODE_VALID | NODE_ENDED;
3521 r->man->last = n;
3522 }
3523 n->flags |= NODE_LINE;
3524 r->man->next = ROFF_NEXT_SIBLING;
3525 return ROFF_IGN;
3526 }
3527
3528 static int
roff_manyarg(ROFF_ARGS)3529 roff_manyarg(ROFF_ARGS)
3530 {
3531 struct roff_node *n;
3532 char *sp, *ep;
3533
3534 roff_elem_alloc(r->man, ln, ppos, tok);
3535 n = r->man->last;
3536
3537 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3538 while (*ep != '\0' && *ep != ' ')
3539 ep++;
3540 while (*ep == ' ')
3541 *ep++ = '\0';
3542 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3543 }
3544
3545 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3546 r->man->last = n;
3547 r->man->next = ROFF_NEXT_SIBLING;
3548 return ROFF_IGN;
3549 }
3550
3551 static int
roff_als(ROFF_ARGS)3552 roff_als(ROFF_ARGS)
3553 {
3554 char *oldn, *newn, *end, *value;
3555 size_t oldsz, newsz, valsz;
3556
3557 newn = oldn = buf->buf + pos;
3558 if (*newn == '\0')
3559 return ROFF_IGN;
3560
3561 newsz = roff_getname(&oldn, ln, pos);
3562 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3563 return ROFF_IGN;
3564
3565 end = oldn;
3566 oldsz = roff_getname(&end, ln, oldn - buf->buf);
3567 if (oldsz == 0)
3568 return ROFF_IGN;
3569
3570 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3571 (int)oldsz, oldn);
3572 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3573 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3574 free(value);
3575 return ROFF_IGN;
3576 }
3577
3578 /*
3579 * The .break request only makes sense inside conditionals,
3580 * and that case is already handled in roff_cond_sub().
3581 */
3582 static int
roff_break(ROFF_ARGS)3583 roff_break(ROFF_ARGS)
3584 {
3585 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3586 return ROFF_IGN;
3587 }
3588
3589 static int
roff_cc(ROFF_ARGS)3590 roff_cc(ROFF_ARGS)
3591 {
3592 const char *p;
3593
3594 p = buf->buf + pos;
3595
3596 if (*p == '\0' || (r->control = *p++) == '.')
3597 r->control = '\0';
3598
3599 if (*p != '\0')
3600 mandoc_msg(MANDOCERR_ARG_EXCESS,
3601 ln, p - buf->buf, "cc ... %s", p);
3602
3603 return ROFF_IGN;
3604 }
3605
3606 static int
roff_char(ROFF_ARGS)3607 roff_char(ROFF_ARGS)
3608 {
3609 const char *p, *kp, *vp;
3610 size_t ksz, vsz;
3611 int font;
3612
3613 /* Parse the character to be replaced. */
3614
3615 kp = buf->buf + pos;
3616 p = kp + 1;
3617 if (*kp == '\0' || (*kp == '\\' &&
3618 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3619 (*p != ' ' && *p != '\0')) {
3620 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3621 return ROFF_IGN;
3622 }
3623 ksz = p - kp;
3624 while (*p == ' ')
3625 p++;
3626
3627 /*
3628 * If the replacement string contains a font escape sequence,
3629 * we have to restore the font at the end.
3630 */
3631
3632 vp = p;
3633 vsz = strlen(p);
3634 font = 0;
3635 while (*p != '\0') {
3636 if (*p++ != '\\')
3637 continue;
3638 switch (mandoc_escape(&p, NULL, NULL)) {
3639 case ESCAPE_FONT:
3640 case ESCAPE_FONTROMAN:
3641 case ESCAPE_FONTITALIC:
3642 case ESCAPE_FONTBOLD:
3643 case ESCAPE_FONTBI:
3644 case ESCAPE_FONTCR:
3645 case ESCAPE_FONTCB:
3646 case ESCAPE_FONTCI:
3647 case ESCAPE_FONTPREV:
3648 font++;
3649 break;
3650 default:
3651 break;
3652 }
3653 }
3654 if (font > 1)
3655 mandoc_msg(MANDOCERR_CHAR_FONT,
3656 ln, (int)(vp - buf->buf), "%s", vp);
3657
3658 /*
3659 * Approximate the effect of .char using the .tr tables.
3660 * XXX In groff, .char and .tr interact differently.
3661 */
3662
3663 if (ksz == 1) {
3664 if (r->xtab == NULL)
3665 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3666 assert((unsigned int)*kp < 128);
3667 free(r->xtab[(int)*kp].p);
3668 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3669 "%s%s", vp, font ? "\fP" : "");
3670 } else {
3671 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3672 if (font)
3673 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3674 }
3675 return ROFF_IGN;
3676 }
3677
3678 static int
roff_ec(ROFF_ARGS)3679 roff_ec(ROFF_ARGS)
3680 {
3681 const char *p;
3682
3683 p = buf->buf + pos;
3684 if (*p == '\0')
3685 r->escape = '\\';
3686 else {
3687 r->escape = *p;
3688 if (*++p != '\0')
3689 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3690 (int)(p - buf->buf), "ec ... %s", p);
3691 }
3692 return ROFF_IGN;
3693 }
3694
3695 static int
roff_eo(ROFF_ARGS)3696 roff_eo(ROFF_ARGS)
3697 {
3698 r->escape = '\0';
3699 if (buf->buf[pos] != '\0')
3700 mandoc_msg(MANDOCERR_ARG_SKIP,
3701 ln, pos, "eo %s", buf->buf + pos);
3702 return ROFF_IGN;
3703 }
3704
3705 static int
roff_mc(ROFF_ARGS)3706 roff_mc(ROFF_ARGS)
3707 {
3708 struct roff_node *n;
3709 char *cp;
3710
3711 /* Parse the first argument. */
3712
3713 cp = buf->buf + pos;
3714 if (*cp != '\0')
3715 cp++;
3716 if (buf->buf[pos] == '\\') {
3717 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3718 case ESCAPE_SPECIAL:
3719 case ESCAPE_UNICODE:
3720 case ESCAPE_NUMBERED:
3721 break;
3722 default:
3723 *cp = '\0';
3724 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3725 "mc %s", buf->buf + pos);
3726 buf->buf[pos] = '\0';
3727 break;
3728 }
3729 }
3730
3731 /* Ignore additional arguments. */
3732
3733 while (*cp == ' ')
3734 *cp++ = '\0';
3735 if (*cp != '\0') {
3736 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3737 "mc ... %s", cp);
3738 *cp = '\0';
3739 }
3740
3741 /* Create the .mc node. */
3742
3743 roff_elem_alloc(r->man, ln, ppos, tok);
3744 n = r->man->last;
3745 if (buf->buf[pos] != '\0')
3746 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3747 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3748 r->man->last = n;
3749 r->man->next = ROFF_NEXT_SIBLING;
3750 return ROFF_IGN;
3751 }
3752
3753 static int
roff_nop(ROFF_ARGS)3754 roff_nop(ROFF_ARGS)
3755 {
3756 while (buf->buf[pos] == ' ')
3757 pos++;
3758 *offs = pos;
3759 return ROFF_RERUN;
3760 }
3761
3762 static int
roff_tr(ROFF_ARGS)3763 roff_tr(ROFF_ARGS)
3764 {
3765 const char *p, *first, *second;
3766 size_t fsz, ssz;
3767
3768 p = buf->buf + pos;
3769
3770 if (*p == '\0') {
3771 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3772 return ROFF_IGN;
3773 }
3774
3775 while (*p != '\0') {
3776 fsz = ssz = 1;
3777
3778 first = p++;
3779 if (*first == '\\') {
3780 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3781 return ROFF_IGN;
3782 fsz = (size_t)(p - first);
3783 }
3784
3785 second = p++;
3786 if (*second == '\\') {
3787 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3788 return ROFF_IGN;
3789 ssz = (size_t)(p - second);
3790 } else if (*second == '\0') {
3791 mandoc_msg(MANDOCERR_TR_ODD, ln,
3792 (int)(first - buf->buf), "tr %s", first);
3793 second = " ";
3794 p--;
3795 }
3796
3797 if (fsz > 1) {
3798 roff_setstrn(&r->xmbtab, first, fsz,
3799 second, ssz, 0);
3800 continue;
3801 }
3802
3803 if (r->xtab == NULL)
3804 r->xtab = mandoc_calloc(128,
3805 sizeof(struct roffstr));
3806
3807 free(r->xtab[(int)*first].p);
3808 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3809 r->xtab[(int)*first].sz = ssz;
3810 }
3811
3812 return ROFF_IGN;
3813 }
3814
3815 /*
3816 * Implementation of the .return request.
3817 * There is no need to call roff_userret() from here.
3818 * The read module will call that after rewinding the reader stack
3819 * to the place from where the current macro was called.
3820 */
3821 static int
roff_return(ROFF_ARGS)3822 roff_return(ROFF_ARGS)
3823 {
3824 if (r->mstackpos >= 0)
3825 return ROFF_IGN | ROFF_USERRET;
3826
3827 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3828 return ROFF_IGN;
3829 }
3830
3831 static int
roff_rn(ROFF_ARGS)3832 roff_rn(ROFF_ARGS)
3833 {
3834 const char *value;
3835 char *oldn, *newn, *end;
3836 size_t oldsz, newsz;
3837 int deftype;
3838
3839 oldn = newn = buf->buf + pos;
3840 if (*oldn == '\0')
3841 return ROFF_IGN;
3842
3843 oldsz = roff_getname(&newn, ln, pos);
3844 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3845 return ROFF_IGN;
3846
3847 end = newn;
3848 newsz = roff_getname(&end, ln, newn - buf->buf);
3849 if (newsz == 0)
3850 return ROFF_IGN;
3851
3852 deftype = ROFFDEF_ANY;
3853 value = roff_getstrn(r, oldn, oldsz, &deftype);
3854 switch (deftype) {
3855 case ROFFDEF_USER:
3856 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3857 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3858 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3859 break;
3860 case ROFFDEF_PRE:
3861 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3862 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863 break;
3864 case ROFFDEF_REN:
3865 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3866 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3867 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3868 break;
3869 case ROFFDEF_STD:
3870 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3871 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3872 break;
3873 default:
3874 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3875 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3876 break;
3877 }
3878 return ROFF_IGN;
3879 }
3880
3881 static int
roff_shift(ROFF_ARGS)3882 roff_shift(ROFF_ARGS)
3883 {
3884 struct mctx *ctx;
3885 int argpos, levels, i;
3886
3887 argpos = pos;
3888 levels = 1;
3889 if (buf->buf[pos] != '\0' &&
3890 roff_evalnum(ln, buf->buf, &pos, &levels, '\0', 0) == 0) {
3891 mandoc_msg(MANDOCERR_CE_NONUM,
3892 ln, pos, "shift %s", buf->buf + pos);
3893 levels = 1;
3894 }
3895 if (r->mstackpos < 0) {
3896 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3897 return ROFF_IGN;
3898 }
3899 ctx = r->mstack + r->mstackpos;
3900 if (levels > ctx->argc) {
3901 mandoc_msg(MANDOCERR_SHIFT,
3902 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3903 levels = ctx->argc;
3904 }
3905 if (levels < 0) {
3906 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3907 levels = 0;
3908 }
3909 if (levels == 0)
3910 return ROFF_IGN;
3911 for (i = 0; i < levels; i++)
3912 free(ctx->argv[i]);
3913 ctx->argc -= levels;
3914 for (i = 0; i < ctx->argc; i++)
3915 ctx->argv[i] = ctx->argv[i + levels];
3916 return ROFF_IGN;
3917 }
3918
3919 static int
roff_so(ROFF_ARGS)3920 roff_so(ROFF_ARGS)
3921 {
3922 char *name, *cp;
3923
3924 name = buf->buf + pos;
3925 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3926
3927 /*
3928 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3929 * opening anything that's not in our cwd or anything beneath
3930 * it. Thus, explicitly disallow traversing up the file-system
3931 * or using absolute paths.
3932 */
3933
3934 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3935 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3936 buf->sz = mandoc_asprintf(&cp,
3937 ".sp\nSee the file %s.\n.sp", name) + 1;
3938 free(buf->buf);
3939 buf->buf = cp;
3940 *offs = 0;
3941 return ROFF_REPARSE;
3942 }
3943
3944 *offs = pos;
3945 return ROFF_SO;
3946 }
3947
3948 /* --- user defined strings and macros ------------------------------------ */
3949
3950 static int
roff_userdef(ROFF_ARGS)3951 roff_userdef(ROFF_ARGS)
3952 {
3953 struct mctx *ctx;
3954 char *arg, *ap, *dst, *src;
3955 size_t sz;
3956
3957 /* If the macro is empty, ignore it altogether. */
3958
3959 if (*r->current_string == '\0')
3960 return ROFF_IGN;
3961
3962 /* Initialize a new macro stack context. */
3963
3964 if (++r->mstackpos == r->mstacksz) {
3965 r->mstack = mandoc_recallocarray(r->mstack,
3966 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3967 r->mstacksz += 8;
3968 }
3969 ctx = r->mstack + r->mstackpos;
3970 ctx->argc = 0;
3971
3972 /*
3973 * Collect pointers to macro argument strings,
3974 * NUL-terminating them and escaping quotes.
3975 */
3976
3977 src = buf->buf + pos;
3978 while (*src != '\0') {
3979 if (ctx->argc == ctx->argsz) {
3980 ctx->argsz += 8;
3981 ctx->argv = mandoc_reallocarray(ctx->argv,
3982 ctx->argsz, sizeof(*ctx->argv));
3983 }
3984 arg = roff_getarg(r, &src, ln, &pos);
3985 sz = 1; /* For the terminating NUL. */
3986 for (ap = arg; *ap != '\0'; ap++)
3987 sz += *ap == '"' ? 4 : 1;
3988 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3989 for (ap = arg; *ap != '\0'; ap++) {
3990 if (*ap == '"') {
3991 memcpy(dst, "\\(dq", 4);
3992 dst += 4;
3993 } else
3994 *dst++ = *ap;
3995 }
3996 *dst = '\0';
3997 free(arg);
3998 }
3999
4000 /* Replace the macro invocation by the macro definition. */
4001
4002 free(buf->buf);
4003 buf->buf = mandoc_strdup(r->current_string);
4004 buf->sz = strlen(buf->buf) + 1;
4005 *offs = 0;
4006
4007 return buf->buf[buf->sz - 2] == '\n' ?
4008 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4009 }
4010
4011 /*
4012 * Calling a high-level macro that was renamed with .rn.
4013 * r->current_string has already been set up by roff_parse().
4014 */
4015 static int
roff_renamed(ROFF_ARGS)4016 roff_renamed(ROFF_ARGS)
4017 {
4018 char *nbuf;
4019
4020 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4021 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4022 free(buf->buf);
4023 buf->buf = nbuf;
4024 *offs = 0;
4025 return ROFF_CONT;
4026 }
4027
4028 /*
4029 * Measure the length in bytes of the roff identifier at *cpp
4030 * and advance the pointer to the next word.
4031 */
4032 static size_t
roff_getname(char ** cpp,int ln,int pos)4033 roff_getname(char **cpp, int ln, int pos)
4034 {
4035 char *name, *cp;
4036 int namesz, inam, iend;
4037
4038 name = *cpp;
4039 if (*name == '\0')
4040 return 0;
4041
4042 /* Advance cp to the byte after the end of the name. */
4043
4044 cp = name;
4045 namesz = 0;
4046 for (;;) {
4047 if (*cp == '\0')
4048 break;
4049 if (*cp == ' ' || *cp == '\t') {
4050 cp++;
4051 break;
4052 }
4053 if (*cp != '\\') {
4054 if (name + namesz < cp) {
4055 name[namesz] = *cp;
4056 *cp = ' ';
4057 }
4058 namesz++;
4059 cp++;
4060 continue;
4061 }
4062 if (cp[1] == '{' || cp[1] == '}')
4063 break;
4064 if (roff_escape(cp, 0, 0, NULL, &inam,
4065 NULL, NULL, &iend) != ESCAPE_UNDEF) {
4066 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4067 "%.*s%.*s", namesz, name, iend, cp);
4068 cp += iend;
4069 break;
4070 }
4071
4072 /*
4073 * In an identifier, \\, \., \G and so on
4074 * are reduced to \, ., G and so on,
4075 * vaguely similar to copy mode.
4076 */
4077
4078 name[namesz++] = cp[inam];
4079 while (iend--) {
4080 if (cp >= name + namesz)
4081 *cp = ' ';
4082 cp++;
4083 }
4084 }
4085
4086 /* Read past spaces. */
4087
4088 while (*cp == ' ')
4089 cp++;
4090
4091 *cpp = cp;
4092 return namesz;
4093 }
4094
4095 /*
4096 * Store *string into the user-defined string called *name.
4097 * To clear an existing entry, call with (*r, *name, NULL, 0).
4098 * append == 0: replace mode
4099 * append == 1: single-line append mode
4100 * append == 2: multiline append mode, append '\n' after each call
4101 */
4102 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4103 roff_setstr(struct roff *r, const char *name, const char *string,
4104 int append)
4105 {
4106 size_t namesz;
4107
4108 namesz = strlen(name);
4109 roff_setstrn(&r->strtab, name, namesz, string,
4110 string ? strlen(string) : 0, append);
4111 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4112 }
4113
4114 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4115 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4116 const char *string, size_t stringsz, int append)
4117 {
4118 struct roffkv *n;
4119 char *c;
4120 int i;
4121 size_t oldch, newch;
4122
4123 /* Search for an existing string with the same name. */
4124 n = *r;
4125
4126 while (n && (namesz != n->key.sz ||
4127 strncmp(n->key.p, name, namesz)))
4128 n = n->next;
4129
4130 if (NULL == n) {
4131 /* Create a new string table entry. */
4132 n = mandoc_malloc(sizeof(struct roffkv));
4133 n->key.p = mandoc_strndup(name, namesz);
4134 n->key.sz = namesz;
4135 n->val.p = NULL;
4136 n->val.sz = 0;
4137 n->next = *r;
4138 *r = n;
4139 } else if (0 == append) {
4140 free(n->val.p);
4141 n->val.p = NULL;
4142 n->val.sz = 0;
4143 }
4144
4145 if (NULL == string)
4146 return;
4147
4148 /*
4149 * One additional byte for the '\n' in multiline mode,
4150 * and one for the terminating '\0'.
4151 */
4152 newch = stringsz + (1 < append ? 2u : 1u);
4153
4154 if (NULL == n->val.p) {
4155 n->val.p = mandoc_malloc(newch);
4156 *n->val.p = '\0';
4157 oldch = 0;
4158 } else {
4159 oldch = n->val.sz;
4160 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4161 }
4162
4163 /* Skip existing content in the destination buffer. */
4164 c = n->val.p + (int)oldch;
4165
4166 /* Append new content to the destination buffer. */
4167 i = 0;
4168 while (i < (int)stringsz) {
4169 /*
4170 * Rudimentary roff copy mode:
4171 * Handle escaped backslashes.
4172 */
4173 if ('\\' == string[i] && '\\' == string[i + 1])
4174 i++;
4175 *c++ = string[i++];
4176 }
4177
4178 /* Append terminating bytes. */
4179 if (1 < append)
4180 *c++ = '\n';
4181
4182 *c = '\0';
4183 n->val.sz = (int)(c - n->val.p);
4184 }
4185
4186 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4187 roff_getstrn(struct roff *r, const char *name, size_t len,
4188 int *deftype)
4189 {
4190 const struct roffkv *n;
4191 int found, i;
4192 enum roff_tok tok;
4193
4194 found = 0;
4195 for (n = r->strtab; n != NULL; n = n->next) {
4196 if (strncmp(name, n->key.p, len) != 0 ||
4197 n->key.p[len] != '\0' || n->val.p == NULL)
4198 continue;
4199 if (*deftype & ROFFDEF_USER) {
4200 *deftype = ROFFDEF_USER;
4201 return n->val.p;
4202 } else {
4203 found = 1;
4204 break;
4205 }
4206 }
4207 for (n = r->rentab; n != NULL; n = n->next) {
4208 if (strncmp(name, n->key.p, len) != 0 ||
4209 n->key.p[len] != '\0' || n->val.p == NULL)
4210 continue;
4211 if (*deftype & ROFFDEF_REN) {
4212 *deftype = ROFFDEF_REN;
4213 return n->val.p;
4214 } else {
4215 found = 1;
4216 break;
4217 }
4218 }
4219 for (i = 0; i < PREDEFS_MAX; i++) {
4220 if (strncmp(name, predefs[i].name, len) != 0 ||
4221 predefs[i].name[len] != '\0')
4222 continue;
4223 if (*deftype & ROFFDEF_PRE) {
4224 *deftype = ROFFDEF_PRE;
4225 return predefs[i].str;
4226 } else {
4227 found = 1;
4228 break;
4229 }
4230 }
4231 if (r->man->meta.macroset != MACROSET_MAN) {
4232 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4233 if (strncmp(name, roff_name[tok], len) != 0 ||
4234 roff_name[tok][len] != '\0')
4235 continue;
4236 if (*deftype & ROFFDEF_STD) {
4237 *deftype = ROFFDEF_STD;
4238 return NULL;
4239 } else {
4240 found = 1;
4241 break;
4242 }
4243 }
4244 }
4245 if (r->man->meta.macroset != MACROSET_MDOC) {
4246 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4247 if (strncmp(name, roff_name[tok], len) != 0 ||
4248 roff_name[tok][len] != '\0')
4249 continue;
4250 if (*deftype & ROFFDEF_STD) {
4251 *deftype = ROFFDEF_STD;
4252 return NULL;
4253 } else {
4254 found = 1;
4255 break;
4256 }
4257 }
4258 }
4259
4260 if (found == 0 && *deftype != ROFFDEF_ANY) {
4261 if (*deftype & ROFFDEF_REN) {
4262 /*
4263 * This might still be a request,
4264 * so do not treat it as undefined yet.
4265 */
4266 *deftype = ROFFDEF_UNDEF;
4267 return NULL;
4268 }
4269
4270 /* Using an undefined string defines it to be empty. */
4271
4272 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4273 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4274 }
4275
4276 *deftype = 0;
4277 return NULL;
4278 }
4279
4280 static void
roff_freestr(struct roffkv * r)4281 roff_freestr(struct roffkv *r)
4282 {
4283 struct roffkv *n, *nn;
4284
4285 for (n = r; n; n = nn) {
4286 free(n->key.p);
4287 free(n->val.p);
4288 nn = n->next;
4289 free(n);
4290 }
4291 }
4292
4293 /* --- accessors and utility functions ------------------------------------ */
4294
4295 /*
4296 * Duplicate an input string, making the appropriate character
4297 * conversations (as stipulated by `tr') along the way.
4298 * Returns a heap-allocated string with all the replacements made.
4299 */
4300 char *
roff_strdup(const struct roff * r,const char * p)4301 roff_strdup(const struct roff *r, const char *p)
4302 {
4303 const struct roffkv *cp;
4304 char *res;
4305 const char *pp;
4306 size_t ssz, sz;
4307 enum mandoc_esc esc;
4308
4309 if (NULL == r->xmbtab && NULL == r->xtab)
4310 return mandoc_strdup(p);
4311 else if ('\0' == *p)
4312 return mandoc_strdup("");
4313
4314 /*
4315 * Step through each character looking for term matches
4316 * (remember that a `tr' can be invoked with an escape, which is
4317 * a glyph but the escape is multi-character).
4318 * We only do this if the character hash has been initialised
4319 * and the string is >0 length.
4320 */
4321
4322 res = NULL;
4323 ssz = 0;
4324
4325 while ('\0' != *p) {
4326 assert((unsigned int)*p < 128);
4327 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4328 sz = r->xtab[(int)*p].sz;
4329 res = mandoc_realloc(res, ssz + sz + 1);
4330 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4331 ssz += sz;
4332 p++;
4333 continue;
4334 } else if ('\\' != *p) {
4335 res = mandoc_realloc(res, ssz + 2);
4336 res[ssz++] = *p++;
4337 continue;
4338 }
4339
4340 /* Search for term matches. */
4341 for (cp = r->xmbtab; cp; cp = cp->next)
4342 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4343 break;
4344
4345 if (NULL != cp) {
4346 /*
4347 * A match has been found.
4348 * Append the match to the array and move
4349 * forward by its keysize.
4350 */
4351 res = mandoc_realloc(res,
4352 ssz + cp->val.sz + 1);
4353 memcpy(res + ssz, cp->val.p, cp->val.sz);
4354 ssz += cp->val.sz;
4355 p += (int)cp->key.sz;
4356 continue;
4357 }
4358
4359 /*
4360 * Handle escapes carefully: we need to copy
4361 * over just the escape itself, or else we might
4362 * do replacements within the escape itself.
4363 * Make sure to pass along the bogus string.
4364 */
4365 pp = p++;
4366 esc = mandoc_escape(&p, NULL, NULL);
4367 if (ESCAPE_ERROR == esc) {
4368 sz = strlen(pp);
4369 res = mandoc_realloc(res, ssz + sz + 1);
4370 memcpy(res + ssz, pp, sz);
4371 break;
4372 }
4373 /*
4374 * We bail out on bad escapes.
4375 * No need to warn: we already did so when
4376 * roff_expand() was called.
4377 */
4378 sz = (int)(p - pp);
4379 res = mandoc_realloc(res, ssz + sz + 1);
4380 memcpy(res + ssz, pp, sz);
4381 ssz += sz;
4382 }
4383
4384 res[(int)ssz] = '\0';
4385 return res;
4386 }
4387
4388 int
roff_getformat(const struct roff * r)4389 roff_getformat(const struct roff *r)
4390 {
4391
4392 return r->format;
4393 }
4394
4395 /*
4396 * Find out whether a line is a macro line or not.
4397 * If it is, adjust the current position and return one; if it isn't,
4398 * return zero and don't change the current position.
4399 * If the control character has been set with `.cc', then let that grain
4400 * precedence.
4401 * This is slightly contrary to groff, where using the non-breaking
4402 * control character when `cc' has been invoked will cause the
4403 * non-breaking macro contents to be printed verbatim.
4404 */
4405 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4406 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4407 {
4408 int pos;
4409
4410 pos = *ppos;
4411
4412 if (r->control != '\0' && cp[pos] == r->control)
4413 pos++;
4414 else if (r->control != '\0')
4415 return 0;
4416 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4417 pos += 2;
4418 else if ('.' == cp[pos] || '\'' == cp[pos])
4419 pos++;
4420 else
4421 return 0;
4422
4423 while (' ' == cp[pos] || '\t' == cp[pos])
4424 pos++;
4425
4426 *ppos = pos;
4427 return 1;
4428 }
4429