1 /* $Id: roff.c,v 1.263 2015/02/21 14:46:58 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 /* Maximum number of nested if-else conditionals. */
35 #define RSTACK_MAX 128
36
37 /* Maximum number of string expansions per line, to break infinite loops. */
38 #define EXPAND_LIMIT 1000
39
40 enum rofft {
41 ROFF_ab,
42 ROFF_ad,
43 ROFF_af,
44 ROFF_aln,
45 ROFF_als,
46 ROFF_am,
47 ROFF_am1,
48 ROFF_ami,
49 ROFF_ami1,
50 ROFF_as,
51 ROFF_as1,
52 ROFF_asciify,
53 ROFF_backtrace,
54 ROFF_bd,
55 ROFF_bleedat,
56 ROFF_blm,
57 ROFF_box,
58 ROFF_boxa,
59 ROFF_bp,
60 ROFF_BP,
61 /* MAN_br, MDOC_br */
62 ROFF_break,
63 ROFF_breakchar,
64 ROFF_brnl,
65 ROFF_brp,
66 ROFF_brpnl,
67 ROFF_c2,
68 ROFF_cc,
69 ROFF_ce,
70 ROFF_cf,
71 ROFF_cflags,
72 ROFF_ch,
73 ROFF_char,
74 ROFF_chop,
75 ROFF_class,
76 ROFF_close,
77 ROFF_CL,
78 ROFF_color,
79 ROFF_composite,
80 ROFF_continue,
81 ROFF_cp,
82 ROFF_cropat,
83 ROFF_cs,
84 ROFF_cu,
85 ROFF_da,
86 ROFF_dch,
87 ROFF_Dd,
88 ROFF_de,
89 ROFF_de1,
90 ROFF_defcolor,
91 ROFF_dei,
92 ROFF_dei1,
93 ROFF_device,
94 ROFF_devicem,
95 ROFF_di,
96 ROFF_do,
97 ROFF_ds,
98 ROFF_ds1,
99 ROFF_dwh,
100 ROFF_dt,
101 ROFF_ec,
102 ROFF_ecr,
103 ROFF_ecs,
104 ROFF_el,
105 ROFF_em,
106 ROFF_EN,
107 ROFF_eo,
108 ROFF_EP,
109 ROFF_EQ,
110 ROFF_errprint,
111 ROFF_ev,
112 ROFF_evc,
113 ROFF_ex,
114 ROFF_fallback,
115 ROFF_fam,
116 ROFF_fc,
117 ROFF_fchar,
118 ROFF_fcolor,
119 ROFF_fdeferlig,
120 ROFF_feature,
121 /* MAN_fi; ignored in mdoc(7) */
122 ROFF_fkern,
123 ROFF_fl,
124 ROFF_flig,
125 ROFF_fp,
126 ROFF_fps,
127 ROFF_fschar,
128 ROFF_fspacewidth,
129 ROFF_fspecial,
130 /* MAN_ft; ignored in mdoc(7) */
131 ROFF_ftr,
132 ROFF_fzoom,
133 ROFF_gcolor,
134 ROFF_hc,
135 ROFF_hcode,
136 ROFF_hidechar,
137 ROFF_hla,
138 ROFF_hlm,
139 ROFF_hpf,
140 ROFF_hpfa,
141 ROFF_hpfcode,
142 ROFF_hw,
143 ROFF_hy,
144 ROFF_hylang,
145 ROFF_hylen,
146 ROFF_hym,
147 ROFF_hypp,
148 ROFF_hys,
149 ROFF_ie,
150 ROFF_if,
151 ROFF_ig,
152 /* MAN_in; ignored in mdoc(7) */
153 ROFF_index,
154 ROFF_it,
155 ROFF_itc,
156 ROFF_IX,
157 ROFF_kern,
158 ROFF_kernafter,
159 ROFF_kernbefore,
160 ROFF_kernpair,
161 ROFF_lc,
162 ROFF_lc_ctype,
163 ROFF_lds,
164 ROFF_length,
165 ROFF_letadj,
166 ROFF_lf,
167 ROFF_lg,
168 ROFF_lhang,
169 ROFF_linetabs,
170 /* MAN_ll, MDOC_ll */
171 ROFF_lnr,
172 ROFF_lnrf,
173 ROFF_lpfx,
174 ROFF_ls,
175 ROFF_lsm,
176 ROFF_lt,
177 ROFF_mc,
178 ROFF_mediasize,
179 ROFF_minss,
180 ROFF_mk,
181 ROFF_mso,
182 ROFF_na,
183 ROFF_ne,
184 /* MAN_nf; ignored in mdoc(7) */
185 ROFF_nh,
186 ROFF_nhychar,
187 ROFF_nm,
188 ROFF_nn,
189 ROFF_nop,
190 ROFF_nr,
191 ROFF_nrf,
192 ROFF_nroff,
193 ROFF_ns,
194 ROFF_nx,
195 ROFF_open,
196 ROFF_opena,
197 ROFF_os,
198 ROFF_output,
199 ROFF_padj,
200 ROFF_papersize,
201 ROFF_pc,
202 ROFF_pev,
203 ROFF_pi,
204 ROFF_PI,
205 ROFF_pl,
206 ROFF_pm,
207 ROFF_pn,
208 ROFF_pnr,
209 ROFF_po,
210 ROFF_ps,
211 ROFF_psbb,
212 ROFF_pshape,
213 ROFF_pso,
214 ROFF_ptr,
215 ROFF_pvs,
216 ROFF_rchar,
217 ROFF_rd,
218 ROFF_recursionlimit,
219 ROFF_return,
220 ROFF_rfschar,
221 ROFF_rhang,
222 ROFF_rj,
223 ROFF_rm,
224 ROFF_rn,
225 ROFF_rnn,
226 ROFF_rr,
227 ROFF_rs,
228 ROFF_rt,
229 ROFF_schar,
230 ROFF_sentchar,
231 ROFF_shc,
232 ROFF_shift,
233 ROFF_sizes,
234 ROFF_so,
235 /* MAN_sp, MDOC_sp */
236 ROFF_spacewidth,
237 ROFF_special,
238 ROFF_spreadwarn,
239 ROFF_ss,
240 ROFF_sty,
241 ROFF_substring,
242 ROFF_sv,
243 ROFF_sy,
244 ROFF_T_,
245 ROFF_ta,
246 ROFF_tc,
247 ROFF_TE,
248 ROFF_TH,
249 ROFF_ti,
250 ROFF_tkf,
251 ROFF_tl,
252 ROFF_tm,
253 ROFF_tm1,
254 ROFF_tmc,
255 ROFF_tr,
256 ROFF_track,
257 ROFF_transchar,
258 ROFF_trf,
259 ROFF_trimat,
260 ROFF_trin,
261 ROFF_trnt,
262 ROFF_troff,
263 ROFF_TS,
264 ROFF_uf,
265 ROFF_ul,
266 ROFF_unformat,
267 ROFF_unwatch,
268 ROFF_unwatchn,
269 ROFF_vpt,
270 ROFF_vs,
271 ROFF_warn,
272 ROFF_warnscale,
273 ROFF_watch,
274 ROFF_watchlength,
275 ROFF_watchn,
276 ROFF_wh,
277 ROFF_while,
278 ROFF_write,
279 ROFF_writec,
280 ROFF_writem,
281 ROFF_xflag,
282 ROFF_cblock,
283 ROFF_USERDEF,
284 ROFF_MAX
285 };
286
287 /*
288 * An incredibly-simple string buffer.
289 */
290 struct roffstr {
291 char *p; /* nil-terminated buffer */
292 size_t sz; /* saved strlen(p) */
293 };
294
295 /*
296 * A key-value roffstr pair as part of a singly-linked list.
297 */
298 struct roffkv {
299 struct roffstr key;
300 struct roffstr val;
301 struct roffkv *next; /* next in list */
302 };
303
304 /*
305 * A single number register as part of a singly-linked list.
306 */
307 struct roffreg {
308 struct roffstr key;
309 int val;
310 struct roffreg *next;
311 };
312
313 struct roff {
314 struct mparse *parse; /* parse point */
315 const struct mchars *mchars; /* character table */
316 struct roffnode *last; /* leaf of stack */
317 int *rstack; /* stack of inverted `ie' values */
318 struct roffreg *regtab; /* number registers */
319 struct roffkv *strtab; /* user-defined strings & macros */
320 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
321 struct roffstr *xtab; /* single-byte trans table (`tr') */
322 const char *current_string; /* value of last called user macro */
323 struct tbl_node *first_tbl; /* first table parsed */
324 struct tbl_node *last_tbl; /* last table parsed */
325 struct tbl_node *tbl; /* current table being parsed */
326 struct eqn_node *last_eqn; /* last equation parsed */
327 struct eqn_node *first_eqn; /* first equation parsed */
328 struct eqn_node *eqn; /* current equation being parsed */
329 int eqn_inline; /* current equation is inline */
330 int options; /* parse options */
331 int rstacksz; /* current size limit of rstack */
332 int rstackpos; /* position in rstack */
333 int format; /* current file in mdoc or man format */
334 char control; /* control character */
335 };
336
337 struct roffnode {
338 enum rofft tok; /* type of node */
339 struct roffnode *parent; /* up one in stack */
340 int line; /* parse line */
341 int col; /* parse col */
342 char *name; /* node name, e.g. macro name */
343 char *end; /* end-rules: custom token */
344 int endspan; /* end-rules: next-line or infty */
345 int rule; /* current evaluation rule */
346 };
347
348 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
349 enum rofft tok, /* tok of macro */ \
350 struct buf *buf, /* input buffer */ \
351 int ln, /* parse line */ \
352 int ppos, /* original pos in buffer */ \
353 int pos, /* current pos in buffer */ \
354 int *offs /* reset offset of buffer data */
355
356 typedef enum rofferr (*roffproc)(ROFF_ARGS);
357
358 struct roffmac {
359 const char *name; /* macro name */
360 roffproc proc; /* process new macro */
361 roffproc text; /* process as child text of macro */
362 roffproc sub; /* process as child of macro */
363 int flags;
364 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
365 struct roffmac *next;
366 };
367
368 struct predef {
369 const char *name; /* predefined input name */
370 const char *str; /* replacement symbol */
371 };
372
373 #define PREDEF(__name, __str) \
374 { (__name), (__str) },
375
376 static enum rofft roffhash_find(const char *, size_t);
377 static void roffhash_init(void);
378 static void roffnode_cleanscope(struct roff *);
379 static void roffnode_pop(struct roff *);
380 static void roffnode_push(struct roff *, enum rofft,
381 const char *, int, int);
382 static enum rofferr roff_block(ROFF_ARGS);
383 static enum rofferr roff_block_text(ROFF_ARGS);
384 static enum rofferr roff_block_sub(ROFF_ARGS);
385 static enum rofferr roff_brp(ROFF_ARGS);
386 static enum rofferr roff_cblock(ROFF_ARGS);
387 static enum rofferr roff_cc(ROFF_ARGS);
388 static void roff_ccond(struct roff *, int, int);
389 static enum rofferr roff_cond(ROFF_ARGS);
390 static enum rofferr roff_cond_text(ROFF_ARGS);
391 static enum rofferr roff_cond_sub(ROFF_ARGS);
392 static enum rofferr roff_ds(ROFF_ARGS);
393 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int);
394 static int roff_evalcond(struct roff *r, int,
395 const char *, int *);
396 static int roff_evalnum(struct roff *, int,
397 const char *, int *, int *, int);
398 static int roff_evalpar(struct roff *, int,
399 const char *, int *, int *, int);
400 static int roff_evalstrcond(const char *, int *);
401 static void roff_free1(struct roff *);
402 static void roff_freereg(struct roffreg *);
403 static void roff_freestr(struct roffkv *);
404 static size_t roff_getname(struct roff *, char **, int, int);
405 static int roff_getnum(const char *, int *, int *, int);
406 static int roff_getop(const char *, int *, char *);
407 static int roff_getregn(const struct roff *,
408 const char *, size_t);
409 static int roff_getregro(const char *name);
410 static const char *roff_getstrn(const struct roff *,
411 const char *, size_t);
412 static enum rofferr roff_insec(ROFF_ARGS);
413 static enum rofferr roff_it(ROFF_ARGS);
414 static enum rofferr roff_line_ignore(ROFF_ARGS);
415 static enum rofferr roff_nr(ROFF_ARGS);
416 static enum rofft roff_parse(struct roff *, char *, int *,
417 int, int);
418 static enum rofferr roff_parsetext(struct buf *, int, int *);
419 static enum rofferr roff_res(struct roff *, struct buf *, int, int);
420 static enum rofferr roff_rm(ROFF_ARGS);
421 static enum rofferr roff_rr(ROFF_ARGS);
422 static void roff_setstr(struct roff *,
423 const char *, const char *, int);
424 static void roff_setstrn(struct roffkv **, const char *,
425 size_t, const char *, size_t, int);
426 static enum rofferr roff_so(ROFF_ARGS);
427 static enum rofferr roff_tr(ROFF_ARGS);
428 static enum rofferr roff_Dd(ROFF_ARGS);
429 static enum rofferr roff_TH(ROFF_ARGS);
430 static enum rofferr roff_TE(ROFF_ARGS);
431 static enum rofferr roff_TS(ROFF_ARGS);
432 static enum rofferr roff_EQ(ROFF_ARGS);
433 static enum rofferr roff_EN(ROFF_ARGS);
434 static enum rofferr roff_T_(ROFF_ARGS);
435 static enum rofferr roff_unsupp(ROFF_ARGS);
436 static enum rofferr roff_userdef(ROFF_ARGS);
437
438 /* See roffhash_find() */
439
440 #define ASCII_HI 126
441 #define ASCII_LO 33
442 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
443
444 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
445 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
446
447 static struct roffmac *hash[HASHWIDTH];
448
449 static struct roffmac roffs[ROFF_MAX] = {
450 { "ab", roff_unsupp, NULL, NULL, 0, NULL },
451 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
452 { "af", roff_line_ignore, NULL, NULL, 0, NULL },
453 { "aln", roff_unsupp, NULL, NULL, 0, NULL },
454 { "als", roff_unsupp, NULL, NULL, 0, NULL },
455 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
456 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
457 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
458 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
459 { "as", roff_ds, NULL, NULL, 0, NULL },
460 { "as1", roff_ds, NULL, NULL, 0, NULL },
461 { "asciify", roff_unsupp, NULL, NULL, 0, NULL },
462 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL },
463 { "bd", roff_line_ignore, NULL, NULL, 0, NULL },
464 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL },
465 { "blm", roff_unsupp, NULL, NULL, 0, NULL },
466 { "box", roff_unsupp, NULL, NULL, 0, NULL },
467 { "boxa", roff_unsupp, NULL, NULL, 0, NULL },
468 { "bp", roff_line_ignore, NULL, NULL, 0, NULL },
469 { "BP", roff_unsupp, NULL, NULL, 0, NULL },
470 { "break", roff_unsupp, NULL, NULL, 0, NULL },
471 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL },
472 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL },
473 { "brp", roff_brp, NULL, NULL, 0, NULL },
474 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL },
475 { "c2", roff_unsupp, NULL, NULL, 0, NULL },
476 { "cc", roff_cc, NULL, NULL, 0, NULL },
477 { "ce", roff_line_ignore, NULL, NULL, 0, NULL },
478 { "cf", roff_insec, NULL, NULL, 0, NULL },
479 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL },
480 { "ch", roff_line_ignore, NULL, NULL, 0, NULL },
481 { "char", roff_unsupp, NULL, NULL, 0, NULL },
482 { "chop", roff_unsupp, NULL, NULL, 0, NULL },
483 { "class", roff_line_ignore, NULL, NULL, 0, NULL },
484 { "close", roff_insec, NULL, NULL, 0, NULL },
485 { "CL", roff_unsupp, NULL, NULL, 0, NULL },
486 { "color", roff_line_ignore, NULL, NULL, 0, NULL },
487 { "composite", roff_unsupp, NULL, NULL, 0, NULL },
488 { "continue", roff_unsupp, NULL, NULL, 0, NULL },
489 { "cp", roff_line_ignore, NULL, NULL, 0, NULL },
490 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL },
491 { "cs", roff_line_ignore, NULL, NULL, 0, NULL },
492 { "cu", roff_line_ignore, NULL, NULL, 0, NULL },
493 { "da", roff_unsupp, NULL, NULL, 0, NULL },
494 { "dch", roff_unsupp, NULL, NULL, 0, NULL },
495 { "Dd", roff_Dd, NULL, NULL, 0, NULL },
496 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
497 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
498 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL },
499 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
500 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
501 { "device", roff_unsupp, NULL, NULL, 0, NULL },
502 { "devicem", roff_unsupp, NULL, NULL, 0, NULL },
503 { "di", roff_unsupp, NULL, NULL, 0, NULL },
504 { "do", roff_unsupp, NULL, NULL, 0, NULL },
505 { "ds", roff_ds, NULL, NULL, 0, NULL },
506 { "ds1", roff_ds, NULL, NULL, 0, NULL },
507 { "dwh", roff_unsupp, NULL, NULL, 0, NULL },
508 { "dt", roff_unsupp, NULL, NULL, 0, NULL },
509 { "ec", roff_unsupp, NULL, NULL, 0, NULL },
510 { "ecr", roff_unsupp, NULL, NULL, 0, NULL },
511 { "ecs", roff_unsupp, NULL, NULL, 0, NULL },
512 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
513 { "em", roff_unsupp, NULL, NULL, 0, NULL },
514 { "EN", roff_EN, NULL, NULL, 0, NULL },
515 { "eo", roff_unsupp, NULL, NULL, 0, NULL },
516 { "EP", roff_unsupp, NULL, NULL, 0, NULL },
517 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
518 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL },
519 { "ev", roff_unsupp, NULL, NULL, 0, NULL },
520 { "evc", roff_unsupp, NULL, NULL, 0, NULL },
521 { "ex", roff_unsupp, NULL, NULL, 0, NULL },
522 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL },
523 { "fam", roff_line_ignore, NULL, NULL, 0, NULL },
524 { "fc", roff_unsupp, NULL, NULL, 0, NULL },
525 { "fchar", roff_unsupp, NULL, NULL, 0, NULL },
526 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL },
527 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL },
528 { "feature", roff_line_ignore, NULL, NULL, 0, NULL },
529 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL },
530 { "fl", roff_line_ignore, NULL, NULL, 0, NULL },
531 { "flig", roff_line_ignore, NULL, NULL, 0, NULL },
532 { "fp", roff_line_ignore, NULL, NULL, 0, NULL },
533 { "fps", roff_line_ignore, NULL, NULL, 0, NULL },
534 { "fschar", roff_unsupp, NULL, NULL, 0, NULL },
535 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
536 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL },
537 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL },
538 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL },
539 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL },
540 { "hc", roff_line_ignore, NULL, NULL, 0, NULL },
541 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL },
542 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL },
543 { "hla", roff_line_ignore, NULL, NULL, 0, NULL },
544 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL },
545 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL },
546 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL },
547 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL },
548 { "hw", roff_line_ignore, NULL, NULL, 0, NULL },
549 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
550 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL },
551 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL },
552 { "hym", roff_line_ignore, NULL, NULL, 0, NULL },
553 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL },
554 { "hys", roff_line_ignore, NULL, NULL, 0, NULL },
555 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
556 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
557 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
558 { "index", roff_unsupp, NULL, NULL, 0, NULL },
559 { "it", roff_it, NULL, NULL, 0, NULL },
560 { "itc", roff_unsupp, NULL, NULL, 0, NULL },
561 { "IX", roff_line_ignore, NULL, NULL, 0, NULL },
562 { "kern", roff_line_ignore, NULL, NULL, 0, NULL },
563 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL },
564 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL },
565 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL },
566 { "lc", roff_unsupp, NULL, NULL, 0, NULL },
567 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL },
568 { "lds", roff_unsupp, NULL, NULL, 0, NULL },
569 { "length", roff_unsupp, NULL, NULL, 0, NULL },
570 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL },
571 { "lf", roff_insec, NULL, NULL, 0, NULL },
572 { "lg", roff_line_ignore, NULL, NULL, 0, NULL },
573 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL },
574 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL },
575 { "lnr", roff_unsupp, NULL, NULL, 0, NULL },
576 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL },
577 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL },
578 { "ls", roff_line_ignore, NULL, NULL, 0, NULL },
579 { "lsm", roff_unsupp, NULL, NULL, 0, NULL },
580 { "lt", roff_line_ignore, NULL, NULL, 0, NULL },
581 { "mc", roff_line_ignore, NULL, NULL, 0, NULL },
582 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL },
583 { "minss", roff_line_ignore, NULL, NULL, 0, NULL },
584 { "mk", roff_line_ignore, NULL, NULL, 0, NULL },
585 { "mso", roff_insec, NULL, NULL, 0, NULL },
586 { "na", roff_line_ignore, NULL, NULL, 0, NULL },
587 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
588 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
589 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL },
590 { "nm", roff_unsupp, NULL, NULL, 0, NULL },
591 { "nn", roff_unsupp, NULL, NULL, 0, NULL },
592 { "nop", roff_unsupp, NULL, NULL, 0, NULL },
593 { "nr", roff_nr, NULL, NULL, 0, NULL },
594 { "nrf", roff_unsupp, NULL, NULL, 0, NULL },
595 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL },
596 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
597 { "nx", roff_insec, NULL, NULL, 0, NULL },
598 { "open", roff_insec, NULL, NULL, 0, NULL },
599 { "opena", roff_insec, NULL, NULL, 0, NULL },
600 { "os", roff_line_ignore, NULL, NULL, 0, NULL },
601 { "output", roff_unsupp, NULL, NULL, 0, NULL },
602 { "padj", roff_line_ignore, NULL, NULL, 0, NULL },
603 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL },
604 { "pc", roff_line_ignore, NULL, NULL, 0, NULL },
605 { "pev", roff_line_ignore, NULL, NULL, 0, NULL },
606 { "pi", roff_insec, NULL, NULL, 0, NULL },
607 { "PI", roff_unsupp, NULL, NULL, 0, NULL },
608 { "pl", roff_line_ignore, NULL, NULL, 0, NULL },
609 { "pm", roff_line_ignore, NULL, NULL, 0, NULL },
610 { "pn", roff_line_ignore, NULL, NULL, 0, NULL },
611 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL },
612 { "po", roff_line_ignore, NULL, NULL, 0, NULL },
613 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
614 { "psbb", roff_unsupp, NULL, NULL, 0, NULL },
615 { "pshape", roff_unsupp, NULL, NULL, 0, NULL },
616 { "pso", roff_insec, NULL, NULL, 0, NULL },
617 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL },
618 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL },
619 { "rchar", roff_unsupp, NULL, NULL, 0, NULL },
620 { "rd", roff_line_ignore, NULL, NULL, 0, NULL },
621 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL },
622 { "return", roff_unsupp, NULL, NULL, 0, NULL },
623 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL },
624 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL },
625 { "rj", roff_line_ignore, NULL, NULL, 0, NULL },
626 { "rm", roff_rm, NULL, NULL, 0, NULL },
627 { "rn", roff_unsupp, NULL, NULL, 0, NULL },
628 { "rnn", roff_unsupp, NULL, NULL, 0, NULL },
629 { "rr", roff_rr, NULL, NULL, 0, NULL },
630 { "rs", roff_line_ignore, NULL, NULL, 0, NULL },
631 { "rt", roff_line_ignore, NULL, NULL, 0, NULL },
632 { "schar", roff_unsupp, NULL, NULL, 0, NULL },
633 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL },
634 { "shc", roff_line_ignore, NULL, NULL, 0, NULL },
635 { "shift", roff_unsupp, NULL, NULL, 0, NULL },
636 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL },
637 { "so", roff_so, NULL, NULL, 0, NULL },
638 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL },
639 { "special", roff_line_ignore, NULL, NULL, 0, NULL },
640 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL },
641 { "ss", roff_line_ignore, NULL, NULL, 0, NULL },
642 { "sty", roff_line_ignore, NULL, NULL, 0, NULL },
643 { "substring", roff_unsupp, NULL, NULL, 0, NULL },
644 { "sv", roff_line_ignore, NULL, NULL, 0, NULL },
645 { "sy", roff_insec, NULL, NULL, 0, NULL },
646 { "T&", roff_T_, NULL, NULL, 0, NULL },
647 { "ta", roff_unsupp, NULL, NULL, 0, NULL },
648 { "tc", roff_unsupp, NULL, NULL, 0, NULL },
649 { "TE", roff_TE, NULL, NULL, 0, NULL },
650 { "TH", roff_TH, NULL, NULL, 0, NULL },
651 { "ti", roff_unsupp, NULL, NULL, 0, NULL },
652 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL },
653 { "tl", roff_unsupp, NULL, NULL, 0, NULL },
654 { "tm", roff_line_ignore, NULL, NULL, 0, NULL },
655 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL },
656 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL },
657 { "tr", roff_tr, NULL, NULL, 0, NULL },
658 { "track", roff_line_ignore, NULL, NULL, 0, NULL },
659 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL },
660 { "trf", roff_insec, NULL, NULL, 0, NULL },
661 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL },
662 { "trin", roff_unsupp, NULL, NULL, 0, NULL },
663 { "trnt", roff_unsupp, NULL, NULL, 0, NULL },
664 { "troff", roff_line_ignore, NULL, NULL, 0, NULL },
665 { "TS", roff_TS, NULL, NULL, 0, NULL },
666 { "uf", roff_line_ignore, NULL, NULL, 0, NULL },
667 { "ul", roff_line_ignore, NULL, NULL, 0, NULL },
668 { "unformat", roff_unsupp, NULL, NULL, 0, NULL },
669 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL },
670 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL },
671 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL },
672 { "vs", roff_line_ignore, NULL, NULL, 0, NULL },
673 { "warn", roff_line_ignore, NULL, NULL, 0, NULL },
674 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL },
675 { "watch", roff_line_ignore, NULL, NULL, 0, NULL },
676 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL },
677 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL },
678 { "wh", roff_unsupp, NULL, NULL, 0, NULL },
679 { "while", roff_unsupp, NULL, NULL, 0, NULL },
680 { "write", roff_insec, NULL, NULL, 0, NULL },
681 { "writec", roff_insec, NULL, NULL, 0, NULL },
682 { "writem", roff_insec, NULL, NULL, 0, NULL },
683 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL },
684 { ".", roff_cblock, NULL, NULL, 0, NULL },
685 { NULL, roff_userdef, NULL, NULL, 0, NULL },
686 };
687
688 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */
689 const char *const __mdoc_reserved[] = {
690 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
691 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
692 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
693 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
694 "Dt", "Dv", "Dx", "D1",
695 "Ec", "Ed", "Ef", "Ek", "El", "Em",
696 "En", "Eo", "Er", "Es", "Ev", "Ex",
697 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
698 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp",
699 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
700 "Oc", "Oo", "Op", "Os", "Ot", "Ox",
701 "Pa", "Pc", "Pf", "Po", "Pp", "Pq",
702 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv",
703 "Sc", "Sh", "Sm", "So", "Sq",
704 "Ss", "St", "Sx", "Sy",
705 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
706 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
707 "%P", "%Q", "%R", "%T", "%U", "%V",
708 NULL
709 };
710
711 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */
712 const char *const __man_reserved[] = {
713 "AT", "B", "BI", "BR", "DT",
714 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR",
715 "LP", "OP", "P", "PD", "PP",
716 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS",
717 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR",
718 NULL
719 };
720
721 /* Array of injected predefined strings. */
722 #define PREDEFS_MAX 38
723 static const struct predef predefs[PREDEFS_MAX] = {
724 #include "predefs.in"
725 };
726
727 /* See roffhash_find() */
728 #define ROFF_HASH(p) (p[0] - ASCII_LO)
729
730 static int roffit_lines; /* number of lines to delay */
731 static char *roffit_macro; /* nil-terminated macro line */
732
733
734 static void
roffhash_init(void)735 roffhash_init(void)
736 {
737 struct roffmac *n;
738 int buc, i;
739
740 for (i = 0; i < (int)ROFF_USERDEF; i++) {
741 assert(roffs[i].name[0] >= ASCII_LO);
742 assert(roffs[i].name[0] <= ASCII_HI);
743
744 buc = ROFF_HASH(roffs[i].name);
745
746 if (NULL != (n = hash[buc])) {
747 for ( ; n->next; n = n->next)
748 /* Do nothing. */ ;
749 n->next = &roffs[i];
750 } else
751 hash[buc] = &roffs[i];
752 }
753 }
754
755 /*
756 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
757 * the nil-terminated string name could be found.
758 */
759 static enum rofft
roffhash_find(const char * p,size_t s)760 roffhash_find(const char *p, size_t s)
761 {
762 int buc;
763 struct roffmac *n;
764
765 /*
766 * libroff has an extremely simple hashtable, for the time
767 * being, which simply keys on the first character, which must
768 * be printable, then walks a chain. It works well enough until
769 * optimised.
770 */
771
772 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
773 return(ROFF_MAX);
774
775 buc = ROFF_HASH(p);
776
777 if (NULL == (n = hash[buc]))
778 return(ROFF_MAX);
779 for ( ; n; n = n->next)
780 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
781 return((enum rofft)(n - roffs));
782
783 return(ROFF_MAX);
784 }
785
786 /*
787 * Pop the current node off of the stack of roff instructions currently
788 * pending.
789 */
790 static void
roffnode_pop(struct roff * r)791 roffnode_pop(struct roff *r)
792 {
793 struct roffnode *p;
794
795 assert(r->last);
796 p = r->last;
797
798 r->last = r->last->parent;
799 free(p->name);
800 free(p->end);
801 free(p);
802 }
803
804 /*
805 * Push a roff node onto the instruction stack. This must later be
806 * removed with roffnode_pop().
807 */
808 static void
roffnode_push(struct roff * r,enum rofft tok,const char * name,int line,int col)809 roffnode_push(struct roff *r, enum rofft tok, const char *name,
810 int line, int col)
811 {
812 struct roffnode *p;
813
814 p = mandoc_calloc(1, sizeof(struct roffnode));
815 p->tok = tok;
816 if (name)
817 p->name = mandoc_strdup(name);
818 p->parent = r->last;
819 p->line = line;
820 p->col = col;
821 p->rule = p->parent ? p->parent->rule : 0;
822
823 r->last = p;
824 }
825
826 static void
roff_free1(struct roff * r)827 roff_free1(struct roff *r)
828 {
829 struct tbl_node *tbl;
830 struct eqn_node *e;
831 int i;
832
833 while (NULL != (tbl = r->first_tbl)) {
834 r->first_tbl = tbl->next;
835 tbl_free(tbl);
836 }
837 r->first_tbl = r->last_tbl = r->tbl = NULL;
838
839 while (NULL != (e = r->first_eqn)) {
840 r->first_eqn = e->next;
841 eqn_free(e);
842 }
843 r->first_eqn = r->last_eqn = r->eqn = NULL;
844
845 while (r->last)
846 roffnode_pop(r);
847
848 free (r->rstack);
849 r->rstack = NULL;
850 r->rstacksz = 0;
851 r->rstackpos = -1;
852
853 roff_freereg(r->regtab);
854 r->regtab = NULL;
855
856 roff_freestr(r->strtab);
857 roff_freestr(r->xmbtab);
858 r->strtab = r->xmbtab = NULL;
859
860 if (r->xtab)
861 for (i = 0; i < 128; i++)
862 free(r->xtab[i].p);
863 free(r->xtab);
864 r->xtab = NULL;
865 }
866
867 void
roff_reset(struct roff * r)868 roff_reset(struct roff *r)
869 {
870
871 roff_free1(r);
872 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
873 r->control = 0;
874 }
875
876 void
roff_free(struct roff * r)877 roff_free(struct roff *r)
878 {
879
880 roff_free1(r);
881 free(r);
882 }
883
884 struct roff *
roff_alloc(struct mparse * parse,const struct mchars * mchars,int options)885 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options)
886 {
887 struct roff *r;
888
889 r = mandoc_calloc(1, sizeof(struct roff));
890 r->parse = parse;
891 r->mchars = mchars;
892 r->options = options;
893 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
894 r->rstackpos = -1;
895
896 roffhash_init();
897
898 return(r);
899 }
900
901 /*
902 * In the current line, expand escape sequences that tend to get
903 * used in numerical expressions and conditional requests.
904 * Also check the syntax of the remaining escape sequences.
905 */
906 static enum rofferr
roff_res(struct roff * r,struct buf * buf,int ln,int pos)907 roff_res(struct roff *r, struct buf *buf, int ln, int pos)
908 {
909 char ubuf[24]; /* buffer to print the number */
910 const char *start; /* start of the string to process */
911 char *stesc; /* start of an escape sequence ('\\') */
912 const char *stnam; /* start of the name, after "[(*" */
913 const char *cp; /* end of the name, e.g. before ']' */
914 const char *res; /* the string to be substituted */
915 char *nbuf; /* new buffer to copy buf->buf to */
916 size_t maxl; /* expected length of the escape name */
917 size_t naml; /* actual length of the escape name */
918 enum mandoc_esc esc; /* type of the escape sequence */
919 int inaml; /* length returned from mandoc_escape() */
920 int expand_count; /* to avoid infinite loops */
921 int npos; /* position in numeric expression */
922 int arg_complete; /* argument not interrupted by eol */
923 char term; /* character terminating the escape */
924
925 expand_count = 0;
926 start = buf->buf + pos;
927 stesc = strchr(start, '\0') - 1;
928 while (stesc-- > start) {
929
930 /* Search backwards for the next backslash. */
931
932 if (*stesc != '\\')
933 continue;
934
935 /* If it is escaped, skip it. */
936
937 for (cp = stesc - 1; cp >= start; cp--)
938 if (*cp != '\\')
939 break;
940
941 if ((stesc - cp) % 2 == 0) {
942 stesc = (char *)cp;
943 continue;
944 }
945
946 /* Decide whether to expand or to check only. */
947
948 term = '\0';
949 cp = stesc + 1;
950 switch (*cp) {
951 case '*':
952 res = NULL;
953 break;
954 case 'B':
955 /* FALLTHROUGH */
956 case 'w':
957 term = cp[1];
958 /* FALLTHROUGH */
959 case 'n':
960 res = ubuf;
961 break;
962 default:
963 esc = mandoc_escape(&cp, &stnam, &inaml);
964 if (esc == ESCAPE_ERROR ||
965 (esc == ESCAPE_SPECIAL &&
966 mchars_spec2cp(r->mchars, stnam, inaml) < 0))
967 mandoc_vmsg(MANDOCERR_ESC_BAD,
968 r->parse, ln, (int)(stesc - buf->buf),
969 "%.*s", (int)(cp - stesc), stesc);
970 continue;
971 }
972
973 if (EXPAND_LIMIT < ++expand_count) {
974 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
975 ln, (int)(stesc - buf->buf), NULL);
976 return(ROFF_IGN);
977 }
978
979 /*
980 * The third character decides the length
981 * of the name of the string or register.
982 * Save a pointer to the name.
983 */
984
985 if (term == '\0') {
986 switch (*++cp) {
987 case '\0':
988 maxl = 0;
989 break;
990 case '(':
991 cp++;
992 maxl = 2;
993 break;
994 case '[':
995 cp++;
996 term = ']';
997 maxl = 0;
998 break;
999 default:
1000 maxl = 1;
1001 break;
1002 }
1003 } else {
1004 cp += 2;
1005 maxl = 0;
1006 }
1007 stnam = cp;
1008
1009 /* Advance to the end of the name. */
1010
1011 naml = 0;
1012 arg_complete = 1;
1013 while (maxl == 0 || naml < maxl) {
1014 if (*cp == '\0') {
1015 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
1016 ln, (int)(stesc - buf->buf), stesc);
1017 arg_complete = 0;
1018 break;
1019 }
1020 if (maxl == 0 && *cp == term) {
1021 cp++;
1022 break;
1023 }
1024 if (*cp++ != '\\' || stesc[1] != 'w') {
1025 naml++;
1026 continue;
1027 }
1028 switch (mandoc_escape(&cp, NULL, NULL)) {
1029 case ESCAPE_SPECIAL:
1030 /* FALLTHROUGH */
1031 case ESCAPE_UNICODE:
1032 /* FALLTHROUGH */
1033 case ESCAPE_NUMBERED:
1034 /* FALLTHROUGH */
1035 case ESCAPE_OVERSTRIKE:
1036 naml++;
1037 break;
1038 default:
1039 break;
1040 }
1041 }
1042
1043 /*
1044 * Retrieve the replacement string; if it is
1045 * undefined, resume searching for escapes.
1046 */
1047
1048 switch (stesc[1]) {
1049 case '*':
1050 if (arg_complete)
1051 res = roff_getstrn(r, stnam, naml);
1052 break;
1053 case 'B':
1054 npos = 0;
1055 ubuf[0] = arg_complete &&
1056 roff_evalnum(r, ln, stnam, &npos,
1057 NULL, ROFFNUM_SCALE) &&
1058 stnam + npos + 1 == cp ? '1' : '0';
1059 ubuf[1] = '\0';
1060 break;
1061 case 'n':
1062 if (arg_complete)
1063 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1064 roff_getregn(r, stnam, naml));
1065 else
1066 ubuf[0] = '\0';
1067 break;
1068 case 'w':
1069 /* use even incomplete args */
1070 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1071 24 * (int)naml);
1072 break;
1073 }
1074
1075 if (res == NULL) {
1076 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1077 r->parse, ln, (int)(stesc - buf->buf),
1078 "%.*s", (int)naml, stnam);
1079 res = "";
1080 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1081 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse,
1082 ln, (int)(stesc - buf->buf), NULL);
1083 return(ROFF_IGN);
1084 }
1085
1086 /* Replace the escape sequence by the string. */
1087
1088 *stesc = '\0';
1089 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1090 buf->buf, res, cp) + 1;
1091
1092 /* Prepare for the next replacement. */
1093
1094 start = nbuf + pos;
1095 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1096 free(buf->buf);
1097 buf->buf = nbuf;
1098 }
1099 return(ROFF_CONT);
1100 }
1101
1102 /*
1103 * Process text streams:
1104 * Convert all breakable hyphens into ASCII_HYPH.
1105 * Decrement and spring input line trap.
1106 */
1107 static enum rofferr
roff_parsetext(struct buf * buf,int pos,int * offs)1108 roff_parsetext(struct buf *buf, int pos, int *offs)
1109 {
1110 size_t sz;
1111 const char *start;
1112 char *p;
1113 int isz;
1114 enum mandoc_esc esc;
1115
1116 start = p = buf->buf + pos;
1117
1118 while (*p != '\0') {
1119 sz = strcspn(p, "-\\");
1120 p += sz;
1121
1122 if (*p == '\0')
1123 break;
1124
1125 if (*p == '\\') {
1126 /* Skip over escapes. */
1127 p++;
1128 esc = mandoc_escape((const char **)&p, NULL, NULL);
1129 if (esc == ESCAPE_ERROR)
1130 break;
1131 continue;
1132 } else if (p == start) {
1133 p++;
1134 continue;
1135 }
1136
1137 if (isalpha((unsigned char)p[-1]) &&
1138 isalpha((unsigned char)p[1]))
1139 *p = ASCII_HYPH;
1140 p++;
1141 }
1142
1143 /* Spring the input line trap. */
1144 if (roffit_lines == 1) {
1145 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1146 free(buf->buf);
1147 buf->buf = p;
1148 buf->sz = isz + 1;
1149 *offs = 0;
1150 free(roffit_macro);
1151 roffit_lines = 0;
1152 return(ROFF_REPARSE);
1153 } else if (roffit_lines > 1)
1154 --roffit_lines;
1155 return(ROFF_CONT);
1156 }
1157
1158 enum rofferr
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs)1159 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1160 {
1161 enum rofft t;
1162 enum rofferr e;
1163 int pos; /* parse point */
1164 int spos; /* saved parse point for messages */
1165 int ppos; /* original offset in buf->buf */
1166 int ctl; /* macro line (boolean) */
1167
1168 ppos = pos = *offs;
1169
1170 /* Handle in-line equation delimiters. */
1171
1172 if (r->tbl == NULL &&
1173 r->last_eqn != NULL && r->last_eqn->delim &&
1174 (r->eqn == NULL || r->eqn_inline)) {
1175 e = roff_eqndelim(r, buf, pos);
1176 if (e == ROFF_REPARSE)
1177 return(e);
1178 assert(e == ROFF_CONT);
1179 }
1180
1181 /* Expand some escape sequences. */
1182
1183 e = roff_res(r, buf, ln, pos);
1184 if (e == ROFF_IGN)
1185 return(e);
1186 assert(e == ROFF_CONT);
1187
1188 ctl = roff_getcontrol(r, buf->buf, &pos);
1189
1190 /*
1191 * First, if a scope is open and we're not a macro, pass the
1192 * text through the macro's filter.
1193 * Equations process all content themselves.
1194 * Tables process almost all content themselves, but we want
1195 * to warn about macros before passing it there.
1196 */
1197
1198 if (r->last != NULL && ! ctl) {
1199 t = r->last->tok;
1200 assert(roffs[t].text);
1201 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1202 assert(e == ROFF_IGN || e == ROFF_CONT);
1203 if (e != ROFF_CONT)
1204 return(e);
1205 }
1206 if (r->eqn != NULL)
1207 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs));
1208 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0'))
1209 return(tbl_read(r->tbl, ln, buf->buf, ppos));
1210 if ( ! ctl)
1211 return(roff_parsetext(buf, pos, offs));
1212
1213 /* Skip empty request lines. */
1214
1215 if (buf->buf[pos] == '"') {
1216 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse,
1217 ln, pos, NULL);
1218 return(ROFF_IGN);
1219 } else if (buf->buf[pos] == '\0')
1220 return(ROFF_IGN);
1221
1222 /*
1223 * If a scope is open, go to the child handler for that macro,
1224 * as it may want to preprocess before doing anything with it.
1225 * Don't do so if an equation is open.
1226 */
1227
1228 if (r->last) {
1229 t = r->last->tok;
1230 assert(roffs[t].sub);
1231 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs));
1232 }
1233
1234 /* No scope is open. This is a new request or macro. */
1235
1236 spos = pos;
1237 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1238
1239 /* Tables ignore most macros. */
1240
1241 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) {
1242 mandoc_msg(MANDOCERR_TBLMACRO, r->parse,
1243 ln, pos, buf->buf + spos);
1244 if (t == ROFF_TS)
1245 return(ROFF_IGN);
1246 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1247 pos++;
1248 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ')
1249 pos++;
1250 return(tbl_read(r->tbl, ln, buf->buf, pos));
1251 }
1252
1253 /*
1254 * This is neither a roff request nor a user-defined macro.
1255 * Let the standard macro set parsers handle it.
1256 */
1257
1258 if (t == ROFF_MAX)
1259 return(ROFF_CONT);
1260
1261 /* Execute a roff request or a user defined macro. */
1262
1263 assert(roffs[t].proc);
1264 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1265 }
1266
1267 void
roff_endparse(struct roff * r)1268 roff_endparse(struct roff *r)
1269 {
1270
1271 if (r->last)
1272 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1273 r->last->line, r->last->col,
1274 roffs[r->last->tok].name);
1275
1276 if (r->eqn) {
1277 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1278 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ");
1279 eqn_end(&r->eqn);
1280 }
1281
1282 if (r->tbl) {
1283 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse,
1284 r->tbl->line, r->tbl->pos, "TS");
1285 tbl_end(&r->tbl);
1286 }
1287 }
1288
1289 /*
1290 * Parse a roff node's type from the input buffer. This must be in the
1291 * form of ".foo xxx" in the usual way.
1292 */
1293 static enum rofft
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1294 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1295 {
1296 char *cp;
1297 const char *mac;
1298 size_t maclen;
1299 enum rofft t;
1300
1301 cp = buf + *pos;
1302
1303 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1304 return(ROFF_MAX);
1305
1306 mac = cp;
1307 maclen = roff_getname(r, &cp, ln, ppos);
1308
1309 t = (r->current_string = roff_getstrn(r, mac, maclen))
1310 ? ROFF_USERDEF : roffhash_find(mac, maclen);
1311
1312 if (ROFF_MAX != t)
1313 *pos = cp - buf;
1314
1315 return(t);
1316 }
1317
1318 static enum rofferr
roff_cblock(ROFF_ARGS)1319 roff_cblock(ROFF_ARGS)
1320 {
1321
1322 /*
1323 * A block-close `..' should only be invoked as a child of an
1324 * ignore macro, otherwise raise a warning and just ignore it.
1325 */
1326
1327 if (r->last == NULL) {
1328 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1329 ln, ppos, "..");
1330 return(ROFF_IGN);
1331 }
1332
1333 switch (r->last->tok) {
1334 case ROFF_am:
1335 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1336 /* FALLTHROUGH */
1337 case ROFF_ami:
1338 /* FALLTHROUGH */
1339 case ROFF_de:
1340 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1341 /* FALLTHROUGH */
1342 case ROFF_dei:
1343 /* FALLTHROUGH */
1344 case ROFF_ig:
1345 break;
1346 default:
1347 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1348 ln, ppos, "..");
1349 return(ROFF_IGN);
1350 }
1351
1352 if (buf->buf[pos] != '\0')
1353 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
1354 ".. %s", buf->buf + pos);
1355
1356 roffnode_pop(r);
1357 roffnode_cleanscope(r);
1358 return(ROFF_IGN);
1359
1360 }
1361
1362 static void
roffnode_cleanscope(struct roff * r)1363 roffnode_cleanscope(struct roff *r)
1364 {
1365
1366 while (r->last) {
1367 if (--r->last->endspan != 0)
1368 break;
1369 roffnode_pop(r);
1370 }
1371 }
1372
1373 static void
roff_ccond(struct roff * r,int ln,int ppos)1374 roff_ccond(struct roff *r, int ln, int ppos)
1375 {
1376
1377 if (NULL == r->last) {
1378 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1379 ln, ppos, "\\}");
1380 return;
1381 }
1382
1383 switch (r->last->tok) {
1384 case ROFF_el:
1385 /* FALLTHROUGH */
1386 case ROFF_ie:
1387 /* FALLTHROUGH */
1388 case ROFF_if:
1389 break;
1390 default:
1391 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1392 ln, ppos, "\\}");
1393 return;
1394 }
1395
1396 if (r->last->endspan > -1) {
1397 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
1398 ln, ppos, "\\}");
1399 return;
1400 }
1401
1402 roffnode_pop(r);
1403 roffnode_cleanscope(r);
1404 return;
1405 }
1406
1407 static enum rofferr
roff_block(ROFF_ARGS)1408 roff_block(ROFF_ARGS)
1409 {
1410 const char *name;
1411 char *iname, *cp;
1412 size_t namesz;
1413
1414 /* Ignore groff compatibility mode for now. */
1415
1416 if (tok == ROFF_de1)
1417 tok = ROFF_de;
1418 else if (tok == ROFF_dei1)
1419 tok = ROFF_dei;
1420 else if (tok == ROFF_am1)
1421 tok = ROFF_am;
1422 else if (tok == ROFF_ami1)
1423 tok = ROFF_ami;
1424
1425 /* Parse the macro name argument. */
1426
1427 cp = buf->buf + pos;
1428 if (tok == ROFF_ig) {
1429 iname = NULL;
1430 namesz = 0;
1431 } else {
1432 iname = cp;
1433 namesz = roff_getname(r, &cp, ln, ppos);
1434 iname[namesz] = '\0';
1435 }
1436
1437 /* Resolve the macro name argument if it is indirect. */
1438
1439 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1440 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1441 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1442 r->parse, ln, (int)(iname - buf->buf),
1443 "%.*s", (int)namesz, iname);
1444 namesz = 0;
1445 } else
1446 namesz = strlen(name);
1447 } else
1448 name = iname;
1449
1450 if (namesz == 0 && tok != ROFF_ig) {
1451 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse,
1452 ln, ppos, roffs[tok].name);
1453 return(ROFF_IGN);
1454 }
1455
1456 roffnode_push(r, tok, name, ln, ppos);
1457
1458 /*
1459 * At the beginning of a `de' macro, clear the existing string
1460 * with the same name, if there is one. New content will be
1461 * appended from roff_block_text() in multiline mode.
1462 */
1463
1464 if (tok == ROFF_de || tok == ROFF_dei)
1465 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
1466
1467 if (*cp == '\0')
1468 return(ROFF_IGN);
1469
1470 /* Get the custom end marker. */
1471
1472 iname = cp;
1473 namesz = roff_getname(r, &cp, ln, ppos);
1474
1475 /* Resolve the end marker if it is indirect. */
1476
1477 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
1478 if ((name = roff_getstrn(r, iname, namesz)) == NULL) {
1479 mandoc_vmsg(MANDOCERR_STR_UNDEF,
1480 r->parse, ln, (int)(iname - buf->buf),
1481 "%.*s", (int)namesz, iname);
1482 namesz = 0;
1483 } else
1484 namesz = strlen(name);
1485 } else
1486 name = iname;
1487
1488 if (namesz)
1489 r->last->end = mandoc_strndup(name, namesz);
1490
1491 if (*cp != '\0')
1492 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
1493 ln, pos, ".%s ... %s", roffs[tok].name, cp);
1494
1495 return(ROFF_IGN);
1496 }
1497
1498 static enum rofferr
roff_block_sub(ROFF_ARGS)1499 roff_block_sub(ROFF_ARGS)
1500 {
1501 enum rofft t;
1502 int i, j;
1503
1504 /*
1505 * First check whether a custom macro exists at this level. If
1506 * it does, then check against it. This is some of groff's
1507 * stranger behaviours. If we encountered a custom end-scope
1508 * tag and that tag also happens to be a "real" macro, then we
1509 * need to try interpreting it again as a real macro. If it's
1510 * not, then return ignore. Else continue.
1511 */
1512
1513 if (r->last->end) {
1514 for (i = pos, j = 0; r->last->end[j]; j++, i++)
1515 if (buf->buf[i] != r->last->end[j])
1516 break;
1517
1518 if (r->last->end[j] == '\0' &&
1519 (buf->buf[i] == '\0' ||
1520 buf->buf[i] == ' ' ||
1521 buf->buf[i] == '\t')) {
1522 roffnode_pop(r);
1523 roffnode_cleanscope(r);
1524
1525 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
1526 i++;
1527
1528 pos = i;
1529 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
1530 ROFF_MAX)
1531 return(ROFF_RERUN);
1532 return(ROFF_IGN);
1533 }
1534 }
1535
1536 /*
1537 * If we have no custom end-query or lookup failed, then try
1538 * pulling it out of the hashtable.
1539 */
1540
1541 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1542
1543 if (t != ROFF_cblock) {
1544 if (tok != ROFF_ig)
1545 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
1546 return(ROFF_IGN);
1547 }
1548
1549 assert(roffs[t].proc);
1550 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1551 }
1552
1553 static enum rofferr
roff_block_text(ROFF_ARGS)1554 roff_block_text(ROFF_ARGS)
1555 {
1556
1557 if (tok != ROFF_ig)
1558 roff_setstr(r, r->last->name, buf->buf + pos, 2);
1559
1560 return(ROFF_IGN);
1561 }
1562
1563 static enum rofferr
roff_cond_sub(ROFF_ARGS)1564 roff_cond_sub(ROFF_ARGS)
1565 {
1566 enum rofft t;
1567 char *ep;
1568 int rr;
1569
1570 rr = r->last->rule;
1571 roffnode_cleanscope(r);
1572 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1573
1574 /*
1575 * Fully handle known macros when they are structurally
1576 * required or when the conditional evaluated to true.
1577 */
1578
1579 if ((t != ROFF_MAX) &&
1580 (rr || roffs[t].flags & ROFFMAC_STRUCT)) {
1581 assert(roffs[t].proc);
1582 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs));
1583 }
1584
1585 /*
1586 * If `\}' occurs on a macro line without a preceding macro,
1587 * drop the line completely.
1588 */
1589
1590 ep = buf->buf + pos;
1591 if (ep[0] == '\\' && ep[1] == '}')
1592 rr = 0;
1593
1594 /* Always check for the closing delimiter `\}'. */
1595
1596 while ((ep = strchr(ep, '\\')) != NULL) {
1597 if (*(++ep) == '}') {
1598 *ep = '&';
1599 roff_ccond(r, ln, ep - buf->buf - 1);
1600 }
1601 if (*ep != '\0')
1602 ++ep;
1603 }
1604 return(rr ? ROFF_CONT : ROFF_IGN);
1605 }
1606
1607 static enum rofferr
roff_cond_text(ROFF_ARGS)1608 roff_cond_text(ROFF_ARGS)
1609 {
1610 char *ep;
1611 int rr;
1612
1613 rr = r->last->rule;
1614 roffnode_cleanscope(r);
1615
1616 ep = buf->buf + pos;
1617 while ((ep = strchr(ep, '\\')) != NULL) {
1618 if (*(++ep) == '}') {
1619 *ep = '&';
1620 roff_ccond(r, ln, ep - buf->buf - 1);
1621 }
1622 if (*ep != '\0')
1623 ++ep;
1624 }
1625 return(rr ? ROFF_CONT : ROFF_IGN);
1626 }
1627
1628 /*
1629 * Parse a single signed integer number. Stop at the first non-digit.
1630 * If there is at least one digit, return success and advance the
1631 * parse point, else return failure and let the parse point unchanged.
1632 * Ignore overflows, treat them just like the C language.
1633 */
1634 static int
roff_getnum(const char * v,int * pos,int * res,int flags)1635 roff_getnum(const char *v, int *pos, int *res, int flags)
1636 {
1637 int myres, scaled, n, p;
1638
1639 if (NULL == res)
1640 res = &myres;
1641
1642 p = *pos;
1643 n = v[p] == '-';
1644 if (n || v[p] == '+')
1645 p++;
1646
1647 if (flags & ROFFNUM_WHITE)
1648 while (isspace((unsigned char)v[p]))
1649 p++;
1650
1651 for (*res = 0; isdigit((unsigned char)v[p]); p++)
1652 *res = 10 * *res + v[p] - '0';
1653 if (p == *pos + n)
1654 return 0;
1655
1656 if (n)
1657 *res = -*res;
1658
1659 /* Each number may be followed by one optional scaling unit. */
1660
1661 switch (v[p]) {
1662 case 'f':
1663 scaled = *res * 65536;
1664 break;
1665 case 'i':
1666 scaled = *res * 240;
1667 break;
1668 case 'c':
1669 scaled = *res * 240 / 2.54;
1670 break;
1671 case 'v':
1672 /* FALLTROUGH */
1673 case 'P':
1674 scaled = *res * 40;
1675 break;
1676 case 'm':
1677 /* FALLTROUGH */
1678 case 'n':
1679 scaled = *res * 24;
1680 break;
1681 case 'p':
1682 scaled = *res * 10 / 3;
1683 break;
1684 case 'u':
1685 scaled = *res;
1686 break;
1687 case 'M':
1688 scaled = *res * 6 / 25;
1689 break;
1690 default:
1691 scaled = *res;
1692 p--;
1693 break;
1694 }
1695 if (flags & ROFFNUM_SCALE)
1696 *res = scaled;
1697
1698 *pos = p + 1;
1699 return(1);
1700 }
1701
1702 /*
1703 * Evaluate a string comparison condition.
1704 * The first character is the delimiter.
1705 * Succeed if the string up to its second occurrence
1706 * matches the string up to its third occurence.
1707 * Advance the cursor after the third occurrence
1708 * or lacking that, to the end of the line.
1709 */
1710 static int
roff_evalstrcond(const char * v,int * pos)1711 roff_evalstrcond(const char *v, int *pos)
1712 {
1713 const char *s1, *s2, *s3;
1714 int match;
1715
1716 match = 0;
1717 s1 = v + *pos; /* initial delimiter */
1718 s2 = s1 + 1; /* for scanning the first string */
1719 s3 = strchr(s2, *s1); /* for scanning the second string */
1720
1721 if (NULL == s3) /* found no middle delimiter */
1722 goto out;
1723
1724 while ('\0' != *++s3) {
1725 if (*s2 != *s3) { /* mismatch */
1726 s3 = strchr(s3, *s1);
1727 break;
1728 }
1729 if (*s3 == *s1) { /* found the final delimiter */
1730 match = 1;
1731 break;
1732 }
1733 s2++;
1734 }
1735
1736 out:
1737 if (NULL == s3)
1738 s3 = strchr(s2, '\0');
1739 else if (*s3 != '\0')
1740 s3++;
1741 *pos = s3 - v;
1742 return(match);
1743 }
1744
1745 /*
1746 * Evaluate an optionally negated single character, numerical,
1747 * or string condition.
1748 */
1749 static int
roff_evalcond(struct roff * r,int ln,const char * v,int * pos)1750 roff_evalcond(struct roff *r, int ln, const char *v, int *pos)
1751 {
1752 int number, savepos, wanttrue;
1753
1754 if ('!' == v[*pos]) {
1755 wanttrue = 0;
1756 (*pos)++;
1757 } else
1758 wanttrue = 1;
1759
1760 switch (v[*pos]) {
1761 case '\0':
1762 return(0);
1763 case 'n':
1764 /* FALLTHROUGH */
1765 case 'o':
1766 (*pos)++;
1767 return(wanttrue);
1768 case 'c':
1769 /* FALLTHROUGH */
1770 case 'd':
1771 /* FALLTHROUGH */
1772 case 'e':
1773 /* FALLTHROUGH */
1774 case 'r':
1775 /* FALLTHROUGH */
1776 case 't':
1777 /* FALLTHROUGH */
1778 case 'v':
1779 (*pos)++;
1780 return(!wanttrue);
1781 default:
1782 break;
1783 }
1784
1785 savepos = *pos;
1786 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
1787 return((number > 0) == wanttrue);
1788 else if (*pos == savepos)
1789 return(roff_evalstrcond(v, pos) == wanttrue);
1790 else
1791 return (0);
1792 }
1793
1794 static enum rofferr
roff_line_ignore(ROFF_ARGS)1795 roff_line_ignore(ROFF_ARGS)
1796 {
1797
1798 return(ROFF_IGN);
1799 }
1800
1801 static enum rofferr
roff_insec(ROFF_ARGS)1802 roff_insec(ROFF_ARGS)
1803 {
1804
1805 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse,
1806 ln, ppos, roffs[tok].name);
1807 return(ROFF_IGN);
1808 }
1809
1810 static enum rofferr
roff_unsupp(ROFF_ARGS)1811 roff_unsupp(ROFF_ARGS)
1812 {
1813
1814 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse,
1815 ln, ppos, roffs[tok].name);
1816 return(ROFF_IGN);
1817 }
1818
1819 static enum rofferr
roff_cond(ROFF_ARGS)1820 roff_cond(ROFF_ARGS)
1821 {
1822
1823 roffnode_push(r, tok, NULL, ln, ppos);
1824
1825 /*
1826 * An `.el' has no conditional body: it will consume the value
1827 * of the current rstack entry set in prior `ie' calls or
1828 * defaults to DENY.
1829 *
1830 * If we're not an `el', however, then evaluate the conditional.
1831 */
1832
1833 r->last->rule = tok == ROFF_el ?
1834 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
1835 roff_evalcond(r, ln, buf->buf, &pos);
1836
1837 /*
1838 * An if-else will put the NEGATION of the current evaluated
1839 * conditional into the stack of rules.
1840 */
1841
1842 if (tok == ROFF_ie) {
1843 if (r->rstackpos + 1 == r->rstacksz) {
1844 r->rstacksz += 16;
1845 r->rstack = mandoc_reallocarray(r->rstack,
1846 r->rstacksz, sizeof(int));
1847 }
1848 r->rstack[++r->rstackpos] = !r->last->rule;
1849 }
1850
1851 /* If the parent has false as its rule, then so do we. */
1852
1853 if (r->last->parent && !r->last->parent->rule)
1854 r->last->rule = 0;
1855
1856 /*
1857 * Determine scope.
1858 * If there is nothing on the line after the conditional,
1859 * not even whitespace, use next-line scope.
1860 */
1861
1862 if (buf->buf[pos] == '\0') {
1863 r->last->endspan = 2;
1864 goto out;
1865 }
1866
1867 while (buf->buf[pos] == ' ')
1868 pos++;
1869
1870 /* An opening brace requests multiline scope. */
1871
1872 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
1873 r->last->endspan = -1;
1874 pos += 2;
1875 goto out;
1876 }
1877
1878 /*
1879 * Anything else following the conditional causes
1880 * single-line scope. Warn if the scope contains
1881 * nothing but trailing whitespace.
1882 */
1883
1884 if (buf->buf[pos] == '\0')
1885 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse,
1886 ln, ppos, roffs[tok].name);
1887
1888 r->last->endspan = 1;
1889
1890 out:
1891 *offs = pos;
1892 return(ROFF_RERUN);
1893 }
1894
1895 static enum rofferr
roff_ds(ROFF_ARGS)1896 roff_ds(ROFF_ARGS)
1897 {
1898 char *string;
1899 const char *name;
1900 size_t namesz;
1901
1902 /* Ignore groff compatibility mode for now. */
1903
1904 if (tok == ROFF_ds1)
1905 tok = ROFF_ds;
1906 else if (tok == ROFF_as1)
1907 tok = ROFF_as;
1908
1909 /*
1910 * The first word is the name of the string.
1911 * If it is empty or terminated by an escape sequence,
1912 * abort the `ds' request without defining anything.
1913 */
1914
1915 name = string = buf->buf + pos;
1916 if (*name == '\0')
1917 return(ROFF_IGN);
1918
1919 namesz = roff_getname(r, &string, ln, pos);
1920 if (name[namesz] == '\\')
1921 return(ROFF_IGN);
1922
1923 /* Read past the initial double-quote, if any. */
1924 if (*string == '"')
1925 string++;
1926
1927 /* The rest is the value. */
1928 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
1929 ROFF_as == tok);
1930 return(ROFF_IGN);
1931 }
1932
1933 /*
1934 * Parse a single operator, one or two characters long.
1935 * If the operator is recognized, return success and advance the
1936 * parse point, else return failure and let the parse point unchanged.
1937 */
1938 static int
roff_getop(const char * v,int * pos,char * res)1939 roff_getop(const char *v, int *pos, char *res)
1940 {
1941
1942 *res = v[*pos];
1943
1944 switch (*res) {
1945 case '+':
1946 /* FALLTHROUGH */
1947 case '-':
1948 /* FALLTHROUGH */
1949 case '*':
1950 /* FALLTHROUGH */
1951 case '/':
1952 /* FALLTHROUGH */
1953 case '%':
1954 /* FALLTHROUGH */
1955 case '&':
1956 /* FALLTHROUGH */
1957 case ':':
1958 break;
1959 case '<':
1960 switch (v[*pos + 1]) {
1961 case '=':
1962 *res = 'l';
1963 (*pos)++;
1964 break;
1965 case '>':
1966 *res = '!';
1967 (*pos)++;
1968 break;
1969 case '?':
1970 *res = 'i';
1971 (*pos)++;
1972 break;
1973 default:
1974 break;
1975 }
1976 break;
1977 case '>':
1978 switch (v[*pos + 1]) {
1979 case '=':
1980 *res = 'g';
1981 (*pos)++;
1982 break;
1983 case '?':
1984 *res = 'a';
1985 (*pos)++;
1986 break;
1987 default:
1988 break;
1989 }
1990 break;
1991 case '=':
1992 if ('=' == v[*pos + 1])
1993 (*pos)++;
1994 break;
1995 default:
1996 return(0);
1997 }
1998 (*pos)++;
1999
2000 return(*res);
2001 }
2002
2003 /*
2004 * Evaluate either a parenthesized numeric expression
2005 * or a single signed integer number.
2006 */
2007 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2008 roff_evalpar(struct roff *r, int ln,
2009 const char *v, int *pos, int *res, int flags)
2010 {
2011
2012 if ('(' != v[*pos])
2013 return(roff_getnum(v, pos, res, flags));
2014
2015 (*pos)++;
2016 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2017 return(0);
2018
2019 /*
2020 * Omission of the closing parenthesis
2021 * is an error in validation mode,
2022 * but ignored in evaluation mode.
2023 */
2024
2025 if (')' == v[*pos])
2026 (*pos)++;
2027 else if (NULL == res)
2028 return(0);
2029
2030 return(1);
2031 }
2032
2033 /*
2034 * Evaluate a complete numeric expression.
2035 * Proceed left to right, there is no concept of precedence.
2036 */
2037 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2038 roff_evalnum(struct roff *r, int ln, const char *v,
2039 int *pos, int *res, int flags)
2040 {
2041 int mypos, operand2;
2042 char operator;
2043
2044 if (NULL == pos) {
2045 mypos = 0;
2046 pos = &mypos;
2047 }
2048
2049 if (flags & ROFFNUM_WHITE)
2050 while (isspace((unsigned char)v[*pos]))
2051 (*pos)++;
2052
2053 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2054 return(0);
2055
2056 while (1) {
2057 if (flags & ROFFNUM_WHITE)
2058 while (isspace((unsigned char)v[*pos]))
2059 (*pos)++;
2060
2061 if ( ! roff_getop(v, pos, &operator))
2062 break;
2063
2064 if (flags & ROFFNUM_WHITE)
2065 while (isspace((unsigned char)v[*pos]))
2066 (*pos)++;
2067
2068 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2069 return(0);
2070
2071 if (flags & ROFFNUM_WHITE)
2072 while (isspace((unsigned char)v[*pos]))
2073 (*pos)++;
2074
2075 if (NULL == res)
2076 continue;
2077
2078 switch (operator) {
2079 case '+':
2080 *res += operand2;
2081 break;
2082 case '-':
2083 *res -= operand2;
2084 break;
2085 case '*':
2086 *res *= operand2;
2087 break;
2088 case '/':
2089 if (operand2 == 0) {
2090 mandoc_msg(MANDOCERR_DIVZERO,
2091 r->parse, ln, *pos, v);
2092 *res = 0;
2093 break;
2094 }
2095 *res /= operand2;
2096 break;
2097 case '%':
2098 if (operand2 == 0) {
2099 mandoc_msg(MANDOCERR_DIVZERO,
2100 r->parse, ln, *pos, v);
2101 *res = 0;
2102 break;
2103 }
2104 *res %= operand2;
2105 break;
2106 case '<':
2107 *res = *res < operand2;
2108 break;
2109 case '>':
2110 *res = *res > operand2;
2111 break;
2112 case 'l':
2113 *res = *res <= operand2;
2114 break;
2115 case 'g':
2116 *res = *res >= operand2;
2117 break;
2118 case '=':
2119 *res = *res == operand2;
2120 break;
2121 case '!':
2122 *res = *res != operand2;
2123 break;
2124 case '&':
2125 *res = *res && operand2;
2126 break;
2127 case ':':
2128 *res = *res || operand2;
2129 break;
2130 case 'i':
2131 if (operand2 < *res)
2132 *res = operand2;
2133 break;
2134 case 'a':
2135 if (operand2 > *res)
2136 *res = operand2;
2137 break;
2138 default:
2139 abort();
2140 }
2141 }
2142 return(1);
2143 }
2144
2145 void
roff_setreg(struct roff * r,const char * name,int val,char sign)2146 roff_setreg(struct roff *r, const char *name, int val, char sign)
2147 {
2148 struct roffreg *reg;
2149
2150 /* Search for an existing register with the same name. */
2151 reg = r->regtab;
2152
2153 while (reg && strcmp(name, reg->key.p))
2154 reg = reg->next;
2155
2156 if (NULL == reg) {
2157 /* Create a new register. */
2158 reg = mandoc_malloc(sizeof(struct roffreg));
2159 reg->key.p = mandoc_strdup(name);
2160 reg->key.sz = strlen(name);
2161 reg->val = 0;
2162 reg->next = r->regtab;
2163 r->regtab = reg;
2164 }
2165
2166 if ('+' == sign)
2167 reg->val += val;
2168 else if ('-' == sign)
2169 reg->val -= val;
2170 else
2171 reg->val = val;
2172 }
2173
2174 /*
2175 * Handle some predefined read-only number registers.
2176 * For now, return -1 if the requested register is not predefined;
2177 * in case a predefined read-only register having the value -1
2178 * were to turn up, another special value would have to be chosen.
2179 */
2180 static int
roff_getregro(const char * name)2181 roff_getregro(const char *name)
2182 {
2183
2184 switch (*name) {
2185 case 'A': /* ASCII approximation mode is always off. */
2186 return(0);
2187 case 'g': /* Groff compatibility mode is always on. */
2188 return(1);
2189 case 'H': /* Fixed horizontal resolution. */
2190 return (24);
2191 case 'j': /* Always adjust left margin only. */
2192 return(0);
2193 case 'T': /* Some output device is always defined. */
2194 return(1);
2195 case 'V': /* Fixed vertical resolution. */
2196 return (40);
2197 default:
2198 return (-1);
2199 }
2200 }
2201
2202 int
roff_getreg(const struct roff * r,const char * name)2203 roff_getreg(const struct roff *r, const char *name)
2204 {
2205 struct roffreg *reg;
2206 int val;
2207
2208 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) {
2209 val = roff_getregro(name + 1);
2210 if (-1 != val)
2211 return (val);
2212 }
2213
2214 for (reg = r->regtab; reg; reg = reg->next)
2215 if (0 == strcmp(name, reg->key.p))
2216 return(reg->val);
2217
2218 return(0);
2219 }
2220
2221 static int
roff_getregn(const struct roff * r,const char * name,size_t len)2222 roff_getregn(const struct roff *r, const char *name, size_t len)
2223 {
2224 struct roffreg *reg;
2225 int val;
2226
2227 if ('.' == name[0] && 2 == len) {
2228 val = roff_getregro(name + 1);
2229 if (-1 != val)
2230 return (val);
2231 }
2232
2233 for (reg = r->regtab; reg; reg = reg->next)
2234 if (len == reg->key.sz &&
2235 0 == strncmp(name, reg->key.p, len))
2236 return(reg->val);
2237
2238 return(0);
2239 }
2240
2241 static void
roff_freereg(struct roffreg * reg)2242 roff_freereg(struct roffreg *reg)
2243 {
2244 struct roffreg *old_reg;
2245
2246 while (NULL != reg) {
2247 free(reg->key.p);
2248 old_reg = reg;
2249 reg = reg->next;
2250 free(old_reg);
2251 }
2252 }
2253
2254 static enum rofferr
roff_nr(ROFF_ARGS)2255 roff_nr(ROFF_ARGS)
2256 {
2257 char *key, *val;
2258 size_t keysz;
2259 int iv;
2260 char sign;
2261
2262 key = val = buf->buf + pos;
2263 if (*key == '\0')
2264 return(ROFF_IGN);
2265
2266 keysz = roff_getname(r, &val, ln, pos);
2267 if (key[keysz] == '\\')
2268 return(ROFF_IGN);
2269 key[keysz] = '\0';
2270
2271 sign = *val;
2272 if (sign == '+' || sign == '-')
2273 val++;
2274
2275 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE))
2276 roff_setreg(r, key, iv, sign);
2277
2278 return(ROFF_IGN);
2279 }
2280
2281 static enum rofferr
roff_rr(ROFF_ARGS)2282 roff_rr(ROFF_ARGS)
2283 {
2284 struct roffreg *reg, **prev;
2285 char *name, *cp;
2286 size_t namesz;
2287
2288 name = cp = buf->buf + pos;
2289 if (*name == '\0')
2290 return(ROFF_IGN);
2291 namesz = roff_getname(r, &cp, ln, pos);
2292 name[namesz] = '\0';
2293
2294 prev = &r->regtab;
2295 while (1) {
2296 reg = *prev;
2297 if (reg == NULL || !strcmp(name, reg->key.p))
2298 break;
2299 prev = ®->next;
2300 }
2301 if (reg != NULL) {
2302 *prev = reg->next;
2303 free(reg->key.p);
2304 free(reg);
2305 }
2306 return(ROFF_IGN);
2307 }
2308
2309 static enum rofferr
roff_rm(ROFF_ARGS)2310 roff_rm(ROFF_ARGS)
2311 {
2312 const char *name;
2313 char *cp;
2314 size_t namesz;
2315
2316 cp = buf->buf + pos;
2317 while (*cp != '\0') {
2318 name = cp;
2319 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
2320 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
2321 if (name[namesz] == '\\')
2322 break;
2323 }
2324 return(ROFF_IGN);
2325 }
2326
2327 static enum rofferr
roff_it(ROFF_ARGS)2328 roff_it(ROFF_ARGS)
2329 {
2330 int iv;
2331
2332 /* Parse the number of lines. */
2333
2334 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
2335 mandoc_msg(MANDOCERR_IT_NONUM, r->parse,
2336 ln, ppos, buf->buf + 1);
2337 return(ROFF_IGN);
2338 }
2339
2340 while (isspace((unsigned char)buf->buf[pos]))
2341 pos++;
2342
2343 /*
2344 * Arm the input line trap.
2345 * Special-casing "an-trap" is an ugly workaround to cope
2346 * with DocBook stupidly fiddling with man(7) internals.
2347 */
2348
2349 roffit_lines = iv;
2350 roffit_macro = mandoc_strdup(iv != 1 ||
2351 strcmp(buf->buf + pos, "an-trap") ?
2352 buf->buf + pos : "br");
2353 return(ROFF_IGN);
2354 }
2355
2356 static enum rofferr
roff_Dd(ROFF_ARGS)2357 roff_Dd(ROFF_ARGS)
2358 {
2359 const char *const *cp;
2360
2361 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0)
2362 for (cp = __mdoc_reserved; *cp; cp++)
2363 roff_setstr(r, *cp, NULL, 0);
2364
2365 if (r->format == 0)
2366 r->format = MPARSE_MDOC;
2367
2368 return(ROFF_CONT);
2369 }
2370
2371 static enum rofferr
roff_TH(ROFF_ARGS)2372 roff_TH(ROFF_ARGS)
2373 {
2374 const char *const *cp;
2375
2376 if ((r->options & MPARSE_QUICK) == 0)
2377 for (cp = __man_reserved; *cp; cp++)
2378 roff_setstr(r, *cp, NULL, 0);
2379
2380 if (r->format == 0)
2381 r->format = MPARSE_MAN;
2382
2383 return(ROFF_CONT);
2384 }
2385
2386 static enum rofferr
roff_TE(ROFF_ARGS)2387 roff_TE(ROFF_ARGS)
2388 {
2389
2390 if (NULL == r->tbl)
2391 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2392 ln, ppos, "TE");
2393 else if ( ! tbl_end(&r->tbl)) {
2394 free(buf->buf);
2395 buf->buf = mandoc_strdup(".sp");
2396 buf->sz = 4;
2397 return(ROFF_REPARSE);
2398 }
2399 return(ROFF_IGN);
2400 }
2401
2402 static enum rofferr
roff_T_(ROFF_ARGS)2403 roff_T_(ROFF_ARGS)
2404 {
2405
2406 if (NULL == r->tbl)
2407 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse,
2408 ln, ppos, "T&");
2409 else
2410 tbl_restart(ppos, ln, r->tbl);
2411
2412 return(ROFF_IGN);
2413 }
2414
2415 /*
2416 * Handle in-line equation delimiters.
2417 */
2418 static enum rofferr
roff_eqndelim(struct roff * r,struct buf * buf,int pos)2419 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
2420 {
2421 char *cp1, *cp2;
2422 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
2423
2424 /*
2425 * Outside equations, look for an opening delimiter.
2426 * If we are inside an equation, we already know it is
2427 * in-line, or this function wouldn't have been called;
2428 * so look for a closing delimiter.
2429 */
2430
2431 cp1 = buf->buf + pos;
2432 cp2 = strchr(cp1, r->eqn == NULL ?
2433 r->last_eqn->odelim : r->last_eqn->cdelim);
2434 if (cp2 == NULL)
2435 return(ROFF_CONT);
2436
2437 *cp2++ = '\0';
2438 bef_pr = bef_nl = aft_nl = aft_pr = "";
2439
2440 /* Handle preceding text, protecting whitespace. */
2441
2442 if (*buf->buf != '\0') {
2443 if (r->eqn == NULL)
2444 bef_pr = "\\&";
2445 bef_nl = "\n";
2446 }
2447
2448 /*
2449 * Prepare replacing the delimiter with an equation macro
2450 * and drop leading white space from the equation.
2451 */
2452
2453 if (r->eqn == NULL) {
2454 while (*cp2 == ' ')
2455 cp2++;
2456 mac = ".EQ";
2457 } else
2458 mac = ".EN";
2459
2460 /* Handle following text, protecting whitespace. */
2461
2462 if (*cp2 != '\0') {
2463 aft_nl = "\n";
2464 if (r->eqn != NULL)
2465 aft_pr = "\\&";
2466 }
2467
2468 /* Do the actual replacement. */
2469
2470 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
2471 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
2472 free(buf->buf);
2473 buf->buf = cp1;
2474
2475 /* Toggle the in-line state of the eqn subsystem. */
2476
2477 r->eqn_inline = r->eqn == NULL;
2478 return(ROFF_REPARSE);
2479 }
2480
2481 static enum rofferr
roff_EQ(ROFF_ARGS)2482 roff_EQ(ROFF_ARGS)
2483 {
2484 struct eqn_node *e;
2485
2486 assert(r->eqn == NULL);
2487 e = eqn_alloc(ppos, ln, r->parse);
2488
2489 if (r->last_eqn) {
2490 r->last_eqn->next = e;
2491 e->delim = r->last_eqn->delim;
2492 e->odelim = r->last_eqn->odelim;
2493 e->cdelim = r->last_eqn->cdelim;
2494 } else
2495 r->first_eqn = r->last_eqn = e;
2496
2497 r->eqn = r->last_eqn = e;
2498
2499 if (buf->buf[pos] != '\0')
2500 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos,
2501 ".EQ %s", buf->buf + pos);
2502
2503 return(ROFF_IGN);
2504 }
2505
2506 static enum rofferr
roff_EN(ROFF_ARGS)2507 roff_EN(ROFF_ARGS)
2508 {
2509
2510 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN");
2511 return(ROFF_IGN);
2512 }
2513
2514 static enum rofferr
roff_TS(ROFF_ARGS)2515 roff_TS(ROFF_ARGS)
2516 {
2517 struct tbl_node *tbl;
2518
2519 if (r->tbl) {
2520 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse,
2521 ln, ppos, "TS breaks TS");
2522 tbl_end(&r->tbl);
2523 }
2524
2525 tbl = tbl_alloc(ppos, ln, r->parse);
2526
2527 if (r->last_tbl)
2528 r->last_tbl->next = tbl;
2529 else
2530 r->first_tbl = r->last_tbl = tbl;
2531
2532 r->tbl = r->last_tbl = tbl;
2533 return(ROFF_IGN);
2534 }
2535
2536 static enum rofferr
roff_brp(ROFF_ARGS)2537 roff_brp(ROFF_ARGS)
2538 {
2539
2540 buf->buf[pos - 1] = '\0';
2541 return(ROFF_CONT);
2542 }
2543
2544 static enum rofferr
roff_cc(ROFF_ARGS)2545 roff_cc(ROFF_ARGS)
2546 {
2547 const char *p;
2548
2549 p = buf->buf + pos;
2550
2551 if (*p == '\0' || (r->control = *p++) == '.')
2552 r->control = 0;
2553
2554 if (*p != '\0')
2555 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse,
2556 ln, p - buf->buf, "cc ... %s", p);
2557
2558 return(ROFF_IGN);
2559 }
2560
2561 static enum rofferr
roff_tr(ROFF_ARGS)2562 roff_tr(ROFF_ARGS)
2563 {
2564 const char *p, *first, *second;
2565 size_t fsz, ssz;
2566 enum mandoc_esc esc;
2567
2568 p = buf->buf + pos;
2569
2570 if (*p == '\0') {
2571 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr");
2572 return(ROFF_IGN);
2573 }
2574
2575 while (*p != '\0') {
2576 fsz = ssz = 1;
2577
2578 first = p++;
2579 if (*first == '\\') {
2580 esc = mandoc_escape(&p, NULL, NULL);
2581 if (esc == ESCAPE_ERROR) {
2582 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2583 ln, (int)(p - buf->buf), first);
2584 return(ROFF_IGN);
2585 }
2586 fsz = (size_t)(p - first);
2587 }
2588
2589 second = p++;
2590 if (*second == '\\') {
2591 esc = mandoc_escape(&p, NULL, NULL);
2592 if (esc == ESCAPE_ERROR) {
2593 mandoc_msg(MANDOCERR_ESC_BAD, r->parse,
2594 ln, (int)(p - buf->buf), second);
2595 return(ROFF_IGN);
2596 }
2597 ssz = (size_t)(p - second);
2598 } else if (*second == '\0') {
2599 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse,
2600 ln, first - buf->buf, "tr %s", first);
2601 second = " ";
2602 p--;
2603 }
2604
2605 if (fsz > 1) {
2606 roff_setstrn(&r->xmbtab, first, fsz,
2607 second, ssz, 0);
2608 continue;
2609 }
2610
2611 if (r->xtab == NULL)
2612 r->xtab = mandoc_calloc(128,
2613 sizeof(struct roffstr));
2614
2615 free(r->xtab[(int)*first].p);
2616 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
2617 r->xtab[(int)*first].sz = ssz;
2618 }
2619
2620 return(ROFF_IGN);
2621 }
2622
2623 static enum rofferr
roff_so(ROFF_ARGS)2624 roff_so(ROFF_ARGS)
2625 {
2626 char *name, *cp;
2627
2628 name = buf->buf + pos;
2629 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name);
2630
2631 /*
2632 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
2633 * opening anything that's not in our cwd or anything beneath
2634 * it. Thus, explicitly disallow traversing up the file-system
2635 * or using absolute paths.
2636 */
2637
2638 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
2639 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos,
2640 ".so %s", name);
2641 buf->sz = mandoc_asprintf(&cp,
2642 ".sp\nSee the file %s.\n.sp", name) + 1;
2643 free(buf->buf);
2644 buf->buf = cp;
2645 *offs = 0;
2646 return(ROFF_REPARSE);
2647 }
2648
2649 *offs = pos;
2650 return(ROFF_SO);
2651 }
2652
2653 static enum rofferr
roff_userdef(ROFF_ARGS)2654 roff_userdef(ROFF_ARGS)
2655 {
2656 const char *arg[9], *ap;
2657 char *cp, *n1, *n2;
2658 int i;
2659 size_t asz, rsz;
2660
2661 /*
2662 * Collect pointers to macro argument strings
2663 * and NUL-terminate them.
2664 */
2665
2666 cp = buf->buf + pos;
2667 for (i = 0; i < 9; i++)
2668 arg[i] = *cp == '\0' ? "" :
2669 mandoc_getarg(r->parse, &cp, ln, &pos);
2670
2671 /*
2672 * Expand macro arguments.
2673 */
2674
2675 buf->sz = strlen(r->current_string) + 1;
2676 n1 = cp = mandoc_malloc(buf->sz);
2677 memcpy(n1, r->current_string, buf->sz);
2678 while (*cp != '\0') {
2679
2680 /* Scan ahead for the next argument invocation. */
2681
2682 if (*cp++ != '\\')
2683 continue;
2684 if (*cp++ != '$')
2685 continue;
2686 i = *cp - '1';
2687 if (0 > i || 8 < i)
2688 continue;
2689 cp -= 2;
2690
2691 /*
2692 * Determine the size of the expanded argument,
2693 * taking escaping of quotes into account.
2694 */
2695
2696 asz = 0;
2697 for (ap = arg[i]; *ap != '\0'; ap++) {
2698 asz++;
2699 if (*ap == '"')
2700 asz += 3;
2701 }
2702 if (asz != 3) {
2703
2704 /*
2705 * Determine the size of the rest of the
2706 * unexpanded macro, including the NUL.
2707 */
2708
2709 rsz = buf->sz - (cp - n1) - 3;
2710
2711 /*
2712 * When shrinking, move before
2713 * releasing the storage.
2714 */
2715
2716 if (asz < 3)
2717 memmove(cp + asz, cp + 3, rsz);
2718
2719 /*
2720 * Resize the storage for the macro
2721 * and readjust the parse pointer.
2722 */
2723
2724 buf->sz += asz - 3;
2725 n2 = mandoc_realloc(n1, buf->sz);
2726 cp = n2 + (cp - n1);
2727 n1 = n2;
2728
2729 /*
2730 * When growing, make room
2731 * for the expanded argument.
2732 */
2733
2734 if (asz > 3)
2735 memmove(cp + asz, cp + 3, rsz);
2736 }
2737
2738 /* Copy the expanded argument, escaping quotes. */
2739
2740 n2 = cp;
2741 for (ap = arg[i]; *ap != '\0'; ap++) {
2742 if (*ap == '"') {
2743 memcpy(n2, "\\(dq", 4);
2744 n2 += 4;
2745 } else
2746 *n2++ = *ap;
2747 }
2748 }
2749
2750 /*
2751 * Replace the macro invocation
2752 * by the expanded macro.
2753 */
2754
2755 free(buf->buf);
2756 buf->buf = n1;
2757 *offs = 0;
2758
2759 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
2760 ROFF_REPARSE : ROFF_APPEND);
2761 }
2762
2763 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)2764 roff_getname(struct roff *r, char **cpp, int ln, int pos)
2765 {
2766 char *name, *cp;
2767 size_t namesz;
2768
2769 name = *cpp;
2770 if ('\0' == *name)
2771 return(0);
2772
2773 /* Read until end of name and terminate it with NUL. */
2774 for (cp = name; 1; cp++) {
2775 if ('\0' == *cp || ' ' == *cp) {
2776 namesz = cp - name;
2777 break;
2778 }
2779 if ('\\' != *cp)
2780 continue;
2781 namesz = cp - name;
2782 if ('{' == cp[1] || '}' == cp[1])
2783 break;
2784 cp++;
2785 if ('\\' == *cp)
2786 continue;
2787 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos,
2788 "%.*s", (int)(cp - name + 1), name);
2789 mandoc_escape((const char **)&cp, NULL, NULL);
2790 break;
2791 }
2792
2793 /* Read past spaces. */
2794 while (' ' == *cp)
2795 cp++;
2796
2797 *cpp = cp;
2798 return(namesz);
2799 }
2800
2801 /*
2802 * Store *string into the user-defined string called *name.
2803 * To clear an existing entry, call with (*r, *name, NULL, 0).
2804 * append == 0: replace mode
2805 * append == 1: single-line append mode
2806 * append == 2: multiline append mode, append '\n' after each call
2807 */
2808 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)2809 roff_setstr(struct roff *r, const char *name, const char *string,
2810 int append)
2811 {
2812
2813 roff_setstrn(&r->strtab, name, strlen(name), string,
2814 string ? strlen(string) : 0, append);
2815 }
2816
2817 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)2818 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
2819 const char *string, size_t stringsz, int append)
2820 {
2821 struct roffkv *n;
2822 char *c;
2823 int i;
2824 size_t oldch, newch;
2825
2826 /* Search for an existing string with the same name. */
2827 n = *r;
2828
2829 while (n && (namesz != n->key.sz ||
2830 strncmp(n->key.p, name, namesz)))
2831 n = n->next;
2832
2833 if (NULL == n) {
2834 /* Create a new string table entry. */
2835 n = mandoc_malloc(sizeof(struct roffkv));
2836 n->key.p = mandoc_strndup(name, namesz);
2837 n->key.sz = namesz;
2838 n->val.p = NULL;
2839 n->val.sz = 0;
2840 n->next = *r;
2841 *r = n;
2842 } else if (0 == append) {
2843 free(n->val.p);
2844 n->val.p = NULL;
2845 n->val.sz = 0;
2846 }
2847
2848 if (NULL == string)
2849 return;
2850
2851 /*
2852 * One additional byte for the '\n' in multiline mode,
2853 * and one for the terminating '\0'.
2854 */
2855 newch = stringsz + (1 < append ? 2u : 1u);
2856
2857 if (NULL == n->val.p) {
2858 n->val.p = mandoc_malloc(newch);
2859 *n->val.p = '\0';
2860 oldch = 0;
2861 } else {
2862 oldch = n->val.sz;
2863 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
2864 }
2865
2866 /* Skip existing content in the destination buffer. */
2867 c = n->val.p + (int)oldch;
2868
2869 /* Append new content to the destination buffer. */
2870 i = 0;
2871 while (i < (int)stringsz) {
2872 /*
2873 * Rudimentary roff copy mode:
2874 * Handle escaped backslashes.
2875 */
2876 if ('\\' == string[i] && '\\' == string[i + 1])
2877 i++;
2878 *c++ = string[i++];
2879 }
2880
2881 /* Append terminating bytes. */
2882 if (1 < append)
2883 *c++ = '\n';
2884
2885 *c = '\0';
2886 n->val.sz = (int)(c - n->val.p);
2887 }
2888
2889 static const char *
roff_getstrn(const struct roff * r,const char * name,size_t len)2890 roff_getstrn(const struct roff *r, const char *name, size_t len)
2891 {
2892 const struct roffkv *n;
2893 int i;
2894
2895 for (n = r->strtab; n; n = n->next)
2896 if (0 == strncmp(name, n->key.p, len) &&
2897 '\0' == n->key.p[(int)len])
2898 return(n->val.p);
2899
2900 for (i = 0; i < PREDEFS_MAX; i++)
2901 if (0 == strncmp(name, predefs[i].name, len) &&
2902 '\0' == predefs[i].name[(int)len])
2903 return(predefs[i].str);
2904
2905 return(NULL);
2906 }
2907
2908 static void
roff_freestr(struct roffkv * r)2909 roff_freestr(struct roffkv *r)
2910 {
2911 struct roffkv *n, *nn;
2912
2913 for (n = r; n; n = nn) {
2914 free(n->key.p);
2915 free(n->val.p);
2916 nn = n->next;
2917 free(n);
2918 }
2919 }
2920
2921 const struct tbl_span *
roff_span(const struct roff * r)2922 roff_span(const struct roff *r)
2923 {
2924
2925 return(r->tbl ? tbl_span(r->tbl) : NULL);
2926 }
2927
2928 const struct eqn *
roff_eqn(const struct roff * r)2929 roff_eqn(const struct roff *r)
2930 {
2931
2932 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
2933 }
2934
2935 /*
2936 * Duplicate an input string, making the appropriate character
2937 * conversations (as stipulated by `tr') along the way.
2938 * Returns a heap-allocated string with all the replacements made.
2939 */
2940 char *
roff_strdup(const struct roff * r,const char * p)2941 roff_strdup(const struct roff *r, const char *p)
2942 {
2943 const struct roffkv *cp;
2944 char *res;
2945 const char *pp;
2946 size_t ssz, sz;
2947 enum mandoc_esc esc;
2948
2949 if (NULL == r->xmbtab && NULL == r->xtab)
2950 return(mandoc_strdup(p));
2951 else if ('\0' == *p)
2952 return(mandoc_strdup(""));
2953
2954 /*
2955 * Step through each character looking for term matches
2956 * (remember that a `tr' can be invoked with an escape, which is
2957 * a glyph but the escape is multi-character).
2958 * We only do this if the character hash has been initialised
2959 * and the string is >0 length.
2960 */
2961
2962 res = NULL;
2963 ssz = 0;
2964
2965 while ('\0' != *p) {
2966 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
2967 sz = r->xtab[(int)*p].sz;
2968 res = mandoc_realloc(res, ssz + sz + 1);
2969 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
2970 ssz += sz;
2971 p++;
2972 continue;
2973 } else if ('\\' != *p) {
2974 res = mandoc_realloc(res, ssz + 2);
2975 res[ssz++] = *p++;
2976 continue;
2977 }
2978
2979 /* Search for term matches. */
2980 for (cp = r->xmbtab; cp; cp = cp->next)
2981 if (0 == strncmp(p, cp->key.p, cp->key.sz))
2982 break;
2983
2984 if (NULL != cp) {
2985 /*
2986 * A match has been found.
2987 * Append the match to the array and move
2988 * forward by its keysize.
2989 */
2990 res = mandoc_realloc(res,
2991 ssz + cp->val.sz + 1);
2992 memcpy(res + ssz, cp->val.p, cp->val.sz);
2993 ssz += cp->val.sz;
2994 p += (int)cp->key.sz;
2995 continue;
2996 }
2997
2998 /*
2999 * Handle escapes carefully: we need to copy
3000 * over just the escape itself, or else we might
3001 * do replacements within the escape itself.
3002 * Make sure to pass along the bogus string.
3003 */
3004 pp = p++;
3005 esc = mandoc_escape(&p, NULL, NULL);
3006 if (ESCAPE_ERROR == esc) {
3007 sz = strlen(pp);
3008 res = mandoc_realloc(res, ssz + sz + 1);
3009 memcpy(res + ssz, pp, sz);
3010 break;
3011 }
3012 /*
3013 * We bail out on bad escapes.
3014 * No need to warn: we already did so when
3015 * roff_res() was called.
3016 */
3017 sz = (int)(p - pp);
3018 res = mandoc_realloc(res, ssz + sz + 1);
3019 memcpy(res + ssz, pp, sz);
3020 ssz += sz;
3021 }
3022
3023 res[(int)ssz] = '\0';
3024 return(res);
3025 }
3026
3027 int
roff_getformat(const struct roff * r)3028 roff_getformat(const struct roff *r)
3029 {
3030
3031 return(r->format);
3032 }
3033
3034 /*
3035 * Find out whether a line is a macro line or not.
3036 * If it is, adjust the current position and return one; if it isn't,
3037 * return zero and don't change the current position.
3038 * If the control character has been set with `.cc', then let that grain
3039 * precedence.
3040 * This is slighly contrary to groff, where using the non-breaking
3041 * control character when `cc' has been invoked will cause the
3042 * non-breaking macro contents to be printed verbatim.
3043 */
3044 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)3045 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
3046 {
3047 int pos;
3048
3049 pos = *ppos;
3050
3051 if (0 != r->control && cp[pos] == r->control)
3052 pos++;
3053 else if (0 != r->control)
3054 return(0);
3055 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
3056 pos += 2;
3057 else if ('.' == cp[pos] || '\'' == cp[pos])
3058 pos++;
3059 else
3060 return(0);
3061
3062 while (' ' == cp[pos] || '\t' == cp[pos])
3063 pos++;
3064
3065 *ppos = pos;
3066 return(1);
3067 }
3068