1 /* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <limits.h> 27 #include <stddef.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 33 #include "mandoc_aux.h" 34 #include "mandoc_ohash.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mandoc_parse.h" 38 #include "libmandoc.h" 39 #include "roff_int.h" 40 #include "tbl_parse.h" 41 #include "eqn_parse.h" 42 43 /* 44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 45 * that an escape sequence resulted from copy-in processing and 46 * needs to be checked or interpolated. As it is used nowhere 47 * else, it is defined here rather than in a header file. 48 */ 49 #define ASCII_ESC 27 50 51 /* Maximum number of string expansions per line, to break infinite loops. */ 52 #define EXPAND_LIMIT 1000 53 54 /* Types of definitions of macros and strings. */ 55 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 60 ROFFDEF_REN | ROFFDEF_STD) 61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 62 63 /* --- data types --------------------------------------------------------- */ 64 65 /* 66 * An incredibly-simple string buffer. 67 */ 68 struct roffstr { 69 char *p; /* nil-terminated buffer */ 70 size_t sz; /* saved strlen(p) */ 71 }; 72 73 /* 74 * A key-value roffstr pair as part of a singly-linked list. 75 */ 76 struct roffkv { 77 struct roffstr key; 78 struct roffstr val; 79 struct roffkv *next; /* next in list */ 80 }; 81 82 /* 83 * A single number register as part of a singly-linked list. 84 */ 85 struct roffreg { 86 struct roffstr key; 87 int val; 88 int step; 89 struct roffreg *next; 90 }; 91 92 /* 93 * Association of request and macro names with token IDs. 94 */ 95 struct roffreq { 96 enum roff_tok tok; 97 char name[]; 98 }; 99 100 /* 101 * A macro processing context. 102 * More than one is needed when macro calls are nested. 103 */ 104 struct mctx { 105 char **argv; 106 int argc; 107 int argsz; 108 }; 109 110 struct roff { 111 struct roff_man *man; /* mdoc or man parser */ 112 struct roffnode *last; /* leaf of stack */ 113 struct mctx *mstack; /* stack of macro contexts */ 114 int *rstack; /* stack of inverted `ie' values */ 115 struct ohash *reqtab; /* request lookup table */ 116 struct roffreg *regtab; /* number registers */ 117 struct roffkv *strtab; /* user-defined strings & macros */ 118 struct roffkv *rentab; /* renamed strings & macros */ 119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 120 struct roffstr *xtab; /* single-byte trans table (`tr') */ 121 const char *current_string; /* value of last called user macro */ 122 struct tbl_node *first_tbl; /* first table parsed */ 123 struct tbl_node *last_tbl; /* last table parsed */ 124 struct tbl_node *tbl; /* current table being parsed */ 125 struct eqn_node *last_eqn; /* equation parser */ 126 struct eqn_node *eqn; /* active equation parser */ 127 int eqn_inline; /* current equation is inline */ 128 int options; /* parse options */ 129 int mstacksz; /* current size of mstack */ 130 int mstackpos; /* position in mstack */ 131 int rstacksz; /* current size limit of rstack */ 132 int rstackpos; /* position in rstack */ 133 int format; /* current file in mdoc or man format */ 134 char control; /* control character */ 135 char escape; /* escape character */ 136 }; 137 138 /* 139 * A macro definition, condition, or ignored block. 140 */ 141 struct roffnode { 142 enum roff_tok tok; /* type of node */ 143 struct roffnode *parent; /* up one in stack */ 144 int line; /* parse line */ 145 int col; /* parse col */ 146 char *name; /* node name, e.g. macro name */ 147 char *end; /* custom end macro of the block */ 148 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 149 int rule; /* content is: 1=evaluated 0=skipped */ 150 }; 151 152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 153 enum roff_tok tok, /* tok of macro */ \ 154 struct buf *buf, /* input buffer */ \ 155 int ln, /* parse line */ \ 156 int ppos, /* original pos in buffer */ \ 157 int pos, /* current pos in buffer */ \ 158 int *offs /* reset offset of buffer data */ 159 160 typedef int (*roffproc)(ROFF_ARGS); 161 162 struct roffmac { 163 roffproc proc; /* process new macro */ 164 roffproc text; /* process as child text of macro */ 165 roffproc sub; /* process as child of macro */ 166 int flags; 167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 168 }; 169 170 struct predef { 171 const char *name; /* predefined input name */ 172 const char *str; /* replacement symbol */ 173 }; 174 175 #define PREDEF(__name, __str) \ 176 { (__name), (__str) }, 177 178 /* --- function prototypes ------------------------------------------------ */ 179 180 static int roffnode_cleanscope(struct roff *); 181 static int roffnode_pop(struct roff *); 182 static void roffnode_push(struct roff *, enum roff_tok, 183 const char *, int, int); 184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 185 static int roff_als(ROFF_ARGS); 186 static int roff_block(ROFF_ARGS); 187 static int roff_block_text(ROFF_ARGS); 188 static int roff_block_sub(ROFF_ARGS); 189 static int roff_break(ROFF_ARGS); 190 static int roff_cblock(ROFF_ARGS); 191 static int roff_cc(ROFF_ARGS); 192 static int roff_ccond(struct roff *, int, int); 193 static int roff_char(ROFF_ARGS); 194 static int roff_cond(ROFF_ARGS); 195 static int roff_cond_checkend(ROFF_ARGS); 196 static int roff_cond_text(ROFF_ARGS); 197 static int roff_cond_sub(ROFF_ARGS); 198 static int roff_ds(ROFF_ARGS); 199 static int roff_ec(ROFF_ARGS); 200 static int roff_eo(ROFF_ARGS); 201 static int roff_eqndelim(struct roff *, struct buf *, int); 202 static int roff_evalcond(struct roff *, int, char *, int *); 203 static int roff_evalnum(struct roff *, int, 204 const char *, int *, int *, int); 205 static int roff_evalpar(struct roff *, int, 206 const char *, int *, int *, int); 207 static int roff_evalstrcond(const char *, int *); 208 static int roff_expand(struct roff *, struct buf *, 209 int, int, char); 210 static void roff_free1(struct roff *); 211 static void roff_freereg(struct roffreg *); 212 static void roff_freestr(struct roffkv *); 213 static size_t roff_getname(struct roff *, char **, int, int); 214 static int roff_getnum(const char *, int *, int *, int); 215 static int roff_getop(const char *, int *, char *); 216 static int roff_getregn(struct roff *, 217 const char *, size_t, char); 218 static int roff_getregro(const struct roff *, 219 const char *name); 220 static const char *roff_getstrn(struct roff *, 221 const char *, size_t, int *); 222 static int roff_hasregn(const struct roff *, 223 const char *, size_t); 224 static int roff_insec(ROFF_ARGS); 225 static int roff_it(ROFF_ARGS); 226 static int roff_line_ignore(ROFF_ARGS); 227 static void roff_man_alloc1(struct roff_man *); 228 static void roff_man_free1(struct roff_man *); 229 static int roff_manyarg(ROFF_ARGS); 230 static int roff_noarg(ROFF_ARGS); 231 static int roff_nop(ROFF_ARGS); 232 static int roff_nr(ROFF_ARGS); 233 static int roff_onearg(ROFF_ARGS); 234 static enum roff_tok roff_parse(struct roff *, char *, int *, 235 int, int); 236 static int roff_parsetext(struct roff *, struct buf *, 237 int, int *); 238 static int roff_renamed(ROFF_ARGS); 239 static int roff_return(ROFF_ARGS); 240 static int roff_rm(ROFF_ARGS); 241 static int roff_rn(ROFF_ARGS); 242 static int roff_rr(ROFF_ARGS); 243 static void roff_setregn(struct roff *, const char *, 244 size_t, int, char, int); 245 static void roff_setstr(struct roff *, 246 const char *, const char *, int); 247 static void roff_setstrn(struct roffkv **, const char *, 248 size_t, const char *, size_t, int); 249 static int roff_shift(ROFF_ARGS); 250 static int roff_so(ROFF_ARGS); 251 static int roff_tr(ROFF_ARGS); 252 static int roff_Dd(ROFF_ARGS); 253 static int roff_TE(ROFF_ARGS); 254 static int roff_TS(ROFF_ARGS); 255 static int roff_EQ(ROFF_ARGS); 256 static int roff_EN(ROFF_ARGS); 257 static int roff_T_(ROFF_ARGS); 258 static int roff_unsupp(ROFF_ARGS); 259 static int roff_userdef(ROFF_ARGS); 260 261 /* --- constant data ------------------------------------------------------ */ 262 263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 265 266 const char *__roff_name[MAN_MAX + 1] = { 267 "br", "ce", "fi", "ft", 268 "ll", "mc", "nf", 269 "po", "rj", "sp", 270 "ta", "ti", NULL, 271 "ab", "ad", "af", "aln", 272 "als", "am", "am1", "ami", 273 "ami1", "as", "as1", "asciify", 274 "backtrace", "bd", "bleedat", "blm", 275 "box", "boxa", "bp", "BP", 276 "break", "breakchar", "brnl", "brp", 277 "brpnl", "c2", "cc", 278 "cf", "cflags", "ch", "char", 279 "chop", "class", "close", "CL", 280 "color", "composite", "continue", "cp", 281 "cropat", "cs", "cu", "da", 282 "dch", "Dd", "de", "de1", 283 "defcolor", "dei", "dei1", "device", 284 "devicem", "di", "do", "ds", 285 "ds1", "dwh", "dt", "ec", 286 "ecr", "ecs", "el", "em", 287 "EN", "eo", "EP", "EQ", 288 "errprint", "ev", "evc", "ex", 289 "fallback", "fam", "fc", "fchar", 290 "fcolor", "fdeferlig", "feature", "fkern", 291 "fl", "flig", "fp", "fps", 292 "fschar", "fspacewidth", "fspecial", "ftr", 293 "fzoom", "gcolor", "hc", "hcode", 294 "hidechar", "hla", "hlm", "hpf", 295 "hpfa", "hpfcode", "hw", "hy", 296 "hylang", "hylen", "hym", "hypp", 297 "hys", "ie", "if", "ig", 298 "index", "it", "itc", "IX", 299 "kern", "kernafter", "kernbefore", "kernpair", 300 "lc", "lc_ctype", "lds", "length", 301 "letadj", "lf", "lg", "lhang", 302 "linetabs", "lnr", "lnrf", "lpfx", 303 "ls", "lsm", "lt", 304 "mediasize", "minss", "mk", "mso", 305 "na", "ne", "nh", "nhychar", 306 "nm", "nn", "nop", "nr", 307 "nrf", "nroff", "ns", "nx", 308 "open", "opena", "os", "output", 309 "padj", "papersize", "pc", "pev", 310 "pi", "PI", "pl", "pm", 311 "pn", "pnr", "ps", 312 "psbb", "pshape", "pso", "ptr", 313 "pvs", "rchar", "rd", "recursionlimit", 314 "return", "rfschar", "rhang", 315 "rm", "rn", "rnn", "rr", 316 "rs", "rt", "schar", "sentchar", 317 "shc", "shift", "sizes", "so", 318 "spacewidth", "special", "spreadwarn", "ss", 319 "sty", "substring", "sv", "sy", 320 "T&", "tc", "TE", 321 "TH", "tkf", "tl", 322 "tm", "tm1", "tmc", "tr", 323 "track", "transchar", "trf", "trimat", 324 "trin", "trnt", "troff", "TS", 325 "uf", "ul", "unformat", "unwatch", 326 "unwatchn", "vpt", "vs", "warn", 327 "warnscale", "watch", "watchlength", "watchn", 328 "wh", "while", "write", "writec", 329 "writem", "xflag", ".", NULL, 330 NULL, "text", 331 "Dd", "Dt", "Os", "Sh", 332 "Ss", "Pp", "D1", "Dl", 333 "Bd", "Ed", "Bl", "El", 334 "It", "Ad", "An", "Ap", 335 "Ar", "Cd", "Cm", "Dv", 336 "Er", "Ev", "Ex", "Fa", 337 "Fd", "Fl", "Fn", "Ft", 338 "Ic", "In", "Li", "Nd", 339 "Nm", "Op", "Ot", "Pa", 340 "Rv", "St", "Va", "Vt", 341 "Xr", "%A", "%B", "%D", 342 "%I", "%J", "%N", "%O", 343 "%P", "%R", "%T", "%V", 344 "Ac", "Ao", "Aq", "At", 345 "Bc", "Bf", "Bo", "Bq", 346 "Bsx", "Bx", "Db", "Dc", 347 "Do", "Dq", "Ec", "Ef", 348 "Em", "Eo", "Fx", "Ms", 349 "No", "Ns", "Nx", "Ox", 350 "Pc", "Pf", "Po", "Pq", 351 "Qc", "Ql", "Qo", "Qq", 352 "Re", "Rs", "Sc", "So", 353 "Sq", "Sm", "Sx", "Sy", 354 "Tn", "Ux", "Xc", "Xo", 355 "Fo", "Fc", "Oo", "Oc", 356 "Bk", "Ek", "Bt", "Hf", 357 "Fr", "Ud", "Lb", "Lp", 358 "Lk", "Mt", "Brq", "Bro", 359 "Brc", "%C", "Es", "En", 360 "Dx", "%Q", "%U", "Ta", 361 "Tg", NULL, 362 "TH", "SH", "SS", "TP", 363 "TQ", 364 "LP", "PP", "P", "IP", 365 "HP", "SM", "SB", "BI", 366 "IB", "BR", "RB", "R", 367 "B", "I", "IR", "RI", 368 "RE", "RS", "DT", "UC", 369 "PD", "AT", "in", 370 "SY", "YS", "OP", 371 "EX", "EE", "UR", 372 "UE", "MT", "ME", NULL 373 }; 374 const char *const *roff_name = __roff_name; 375 376 static struct roffmac roffs[TOKEN_NONE] = { 377 { roff_noarg, NULL, NULL, 0 }, /* br */ 378 { roff_onearg, NULL, NULL, 0 }, /* ce */ 379 { roff_noarg, NULL, NULL, 0 }, /* fi */ 380 { roff_onearg, NULL, NULL, 0 }, /* ft */ 381 { roff_onearg, NULL, NULL, 0 }, /* ll */ 382 { roff_onearg, NULL, NULL, 0 }, /* mc */ 383 { roff_noarg, NULL, NULL, 0 }, /* nf */ 384 { roff_onearg, NULL, NULL, 0 }, /* po */ 385 { roff_onearg, NULL, NULL, 0 }, /* rj */ 386 { roff_onearg, NULL, NULL, 0 }, /* sp */ 387 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 388 { roff_onearg, NULL, NULL, 0 }, /* ti */ 389 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 390 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 391 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 392 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 393 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 394 { roff_als, NULL, NULL, 0 }, /* als */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 399 { roff_ds, NULL, NULL, 0 }, /* as */ 400 { roff_ds, NULL, NULL, 0 }, /* as1 */ 401 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 404 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 405 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 406 { roff_unsupp, NULL, NULL, 0 }, /* box */ 407 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 408 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 409 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 410 { roff_break, NULL, NULL, 0 }, /* break */ 411 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 412 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 413 { roff_noarg, NULL, NULL, 0 }, /* brp */ 414 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 415 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 416 { roff_cc, NULL, NULL, 0 }, /* cc */ 417 { roff_insec, NULL, NULL, 0 }, /* cf */ 418 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 419 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 420 { roff_char, NULL, NULL, 0 }, /* char */ 421 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 423 { roff_insec, NULL, NULL, 0 }, /* close */ 424 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 426 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 427 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 431 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 432 { roff_unsupp, NULL, NULL, 0 }, /* da */ 433 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 434 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 437 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 440 { roff_unsupp, NULL, NULL, 0 }, /* device */ 441 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 442 { roff_unsupp, NULL, NULL, 0 }, /* di */ 443 { roff_unsupp, NULL, NULL, 0 }, /* do */ 444 { roff_ds, NULL, NULL, 0 }, /* ds */ 445 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 446 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 447 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 448 { roff_ec, NULL, NULL, 0 }, /* ec */ 449 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 450 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 451 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 452 { roff_unsupp, NULL, NULL, 0 }, /* em */ 453 { roff_EN, NULL, NULL, 0 }, /* EN */ 454 { roff_eo, NULL, NULL, 0 }, /* eo */ 455 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 456 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 457 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 458 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 459 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 460 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 463 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 464 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 473 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 492 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 493 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 496 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 497 { roff_unsupp, NULL, NULL, 0 }, /* index */ 498 { roff_it, NULL, NULL, 0 }, /* it */ 499 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 506 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 507 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 508 { roff_unsupp, NULL, NULL, 0 }, /* length */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 510 { roff_insec, NULL, NULL, 0 }, /* lf */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 513 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 515 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 516 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 518 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 523 { roff_insec, NULL, NULL, 0 }, /* mso */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 526 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 527 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 528 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 529 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 530 { roff_nop, NULL, NULL, 0 }, /* nop */ 531 { roff_nr, NULL, NULL, 0 }, /* nr */ 532 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 534 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 535 { roff_insec, NULL, NULL, 0 }, /* nx */ 536 { roff_insec, NULL, NULL, 0 }, /* open */ 537 { roff_insec, NULL, NULL, 0 }, /* opena */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 539 { roff_unsupp, NULL, NULL, 0 }, /* output */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 544 { roff_insec, NULL, NULL, 0 }, /* pi */ 545 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 551 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 552 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 553 { roff_insec, NULL, NULL, 0 }, /* pso */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 556 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 559 { roff_return, NULL, NULL, 0 }, /* return */ 560 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 562 { roff_rm, NULL, NULL, 0 }, /* rm */ 563 { roff_rn, NULL, NULL, 0 }, /* rn */ 564 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 565 { roff_rr, NULL, NULL, 0 }, /* rr */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 568 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 571 { roff_shift, NULL, NULL, 0 }, /* shift */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 573 { roff_so, NULL, NULL, 0 }, /* so */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 578 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 579 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 580 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 581 { roff_insec, NULL, NULL, 0 }, /* sy */ 582 { roff_T_, NULL, NULL, 0 }, /* T& */ 583 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 584 { roff_TE, NULL, NULL, 0 }, /* TE */ 585 { roff_Dd, NULL, NULL, 0 }, /* TH */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 587 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 591 { roff_tr, NULL, NULL, 0 }, /* tr */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 594 { roff_insec, NULL, NULL, 0 }, /* trf */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 596 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 597 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 599 { roff_TS, NULL, NULL, 0 }, /* TS */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 602 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 610 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 611 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 612 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 613 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 614 { roff_insec, NULL, NULL, 0 }, /* write */ 615 { roff_insec, NULL, NULL, 0 }, /* writec */ 616 { roff_insec, NULL, NULL, 0 }, /* writem */ 617 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 618 { roff_cblock, NULL, NULL, 0 }, /* . */ 619 { roff_renamed, NULL, NULL, 0 }, 620 { roff_userdef, NULL, NULL, 0 } 621 }; 622 623 /* Array of injected predefined strings. */ 624 #define PREDEFS_MAX 38 625 static const struct predef predefs[PREDEFS_MAX] = { 626 #include "predefs.in" 627 }; 628 629 static int roffce_lines; /* number of input lines to center */ 630 static struct roff_node *roffce_node; /* active request */ 631 static int roffit_lines; /* number of lines to delay */ 632 static char *roffit_macro; /* nil-terminated macro line */ 633 634 635 /* --- request table ------------------------------------------------------ */ 636 637 struct ohash * 638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 639 { 640 struct ohash *htab; 641 struct roffreq *req; 642 enum roff_tok tok; 643 size_t sz; 644 unsigned int slot; 645 646 htab = mandoc_malloc(sizeof(*htab)); 647 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 648 649 for (tok = mintok; tok < maxtok; tok++) { 650 if (roff_name[tok] == NULL) 651 continue; 652 sz = strlen(roff_name[tok]); 653 req = mandoc_malloc(sizeof(*req) + sz + 1); 654 req->tok = tok; 655 memcpy(req->name, roff_name[tok], sz + 1); 656 slot = ohash_qlookup(htab, req->name); 657 ohash_insert(htab, slot, req); 658 } 659 return htab; 660 } 661 662 void 663 roffhash_free(struct ohash *htab) 664 { 665 struct roffreq *req; 666 unsigned int slot; 667 668 if (htab == NULL) 669 return; 670 for (req = ohash_first(htab, &slot); req != NULL; 671 req = ohash_next(htab, &slot)) 672 free(req); 673 ohash_delete(htab); 674 free(htab); 675 } 676 677 enum roff_tok 678 roffhash_find(struct ohash *htab, const char *name, size_t sz) 679 { 680 struct roffreq *req; 681 const char *end; 682 683 if (sz) { 684 end = name + sz; 685 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 686 } else 687 req = ohash_find(htab, ohash_qlookup(htab, name)); 688 return req == NULL ? TOKEN_NONE : req->tok; 689 } 690 691 /* --- stack of request blocks -------------------------------------------- */ 692 693 /* 694 * Pop the current node off of the stack of roff instructions currently 695 * pending. Return 1 if it is a loop or 0 otherwise. 696 */ 697 static int 698 roffnode_pop(struct roff *r) 699 { 700 struct roffnode *p; 701 int inloop; 702 703 p = r->last; 704 inloop = p->tok == ROFF_while; 705 r->last = p->parent; 706 free(p->name); 707 free(p->end); 708 free(p); 709 return inloop; 710 } 711 712 /* 713 * Push a roff node onto the instruction stack. This must later be 714 * removed with roffnode_pop(). 715 */ 716 static void 717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 718 int line, int col) 719 { 720 struct roffnode *p; 721 722 p = mandoc_calloc(1, sizeof(struct roffnode)); 723 p->tok = tok; 724 if (name) 725 p->name = mandoc_strdup(name); 726 p->parent = r->last; 727 p->line = line; 728 p->col = col; 729 p->rule = p->parent ? p->parent->rule : 0; 730 731 r->last = p; 732 } 733 734 /* --- roff parser state data management ---------------------------------- */ 735 736 static void 737 roff_free1(struct roff *r) 738 { 739 int i; 740 741 tbl_free(r->first_tbl); 742 r->first_tbl = r->last_tbl = r->tbl = NULL; 743 744 eqn_free(r->last_eqn); 745 r->last_eqn = r->eqn = NULL; 746 747 while (r->mstackpos >= 0) 748 roff_userret(r); 749 750 while (r->last) 751 roffnode_pop(r); 752 753 free (r->rstack); 754 r->rstack = NULL; 755 r->rstacksz = 0; 756 r->rstackpos = -1; 757 758 roff_freereg(r->regtab); 759 r->regtab = NULL; 760 761 roff_freestr(r->strtab); 762 roff_freestr(r->rentab); 763 roff_freestr(r->xmbtab); 764 r->strtab = r->rentab = r->xmbtab = NULL; 765 766 if (r->xtab) 767 for (i = 0; i < 128; i++) 768 free(r->xtab[i].p); 769 free(r->xtab); 770 r->xtab = NULL; 771 } 772 773 void 774 roff_reset(struct roff *r) 775 { 776 roff_free1(r); 777 r->options |= MPARSE_COMMENT; 778 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 779 r->control = '\0'; 780 r->escape = '\\'; 781 roffce_lines = 0; 782 roffce_node = NULL; 783 roffit_lines = 0; 784 roffit_macro = NULL; 785 } 786 787 void 788 roff_free(struct roff *r) 789 { 790 int i; 791 792 roff_free1(r); 793 for (i = 0; i < r->mstacksz; i++) 794 free(r->mstack[i].argv); 795 free(r->mstack); 796 roffhash_free(r->reqtab); 797 free(r); 798 } 799 800 struct roff * 801 roff_alloc(int options) 802 { 803 struct roff *r; 804 805 r = mandoc_calloc(1, sizeof(struct roff)); 806 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 807 r->options = options | MPARSE_COMMENT; 808 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 809 r->mstackpos = -1; 810 r->rstackpos = -1; 811 r->escape = '\\'; 812 return r; 813 } 814 815 /* --- syntax tree state data management ---------------------------------- */ 816 817 static void 818 roff_man_free1(struct roff_man *man) 819 { 820 if (man->meta.first != NULL) 821 roff_node_delete(man, man->meta.first); 822 free(man->meta.msec); 823 free(man->meta.vol); 824 free(man->meta.os); 825 free(man->meta.arch); 826 free(man->meta.title); 827 free(man->meta.name); 828 free(man->meta.date); 829 free(man->meta.sodest); 830 } 831 832 void 833 roff_state_reset(struct roff_man *man) 834 { 835 man->last = man->meta.first; 836 man->last_es = NULL; 837 man->flags = 0; 838 man->lastsec = man->lastnamed = SEC_NONE; 839 man->next = ROFF_NEXT_CHILD; 840 roff_setreg(man->roff, "nS", 0, '='); 841 } 842 843 static void 844 roff_man_alloc1(struct roff_man *man) 845 { 846 memset(&man->meta, 0, sizeof(man->meta)); 847 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 848 man->meta.first->type = ROFFT_ROOT; 849 man->meta.macroset = MACROSET_NONE; 850 roff_state_reset(man); 851 } 852 853 void 854 roff_man_reset(struct roff_man *man) 855 { 856 roff_man_free1(man); 857 roff_man_alloc1(man); 858 } 859 860 void 861 roff_man_free(struct roff_man *man) 862 { 863 roff_man_free1(man); 864 free(man); 865 } 866 867 struct roff_man * 868 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 869 { 870 struct roff_man *man; 871 872 man = mandoc_calloc(1, sizeof(*man)); 873 man->roff = roff; 874 man->os_s = os_s; 875 man->quick = quick; 876 roff_man_alloc1(man); 877 roff->man = man; 878 return man; 879 } 880 881 /* --- syntax tree handling ----------------------------------------------- */ 882 883 struct roff_node * 884 roff_node_alloc(struct roff_man *man, int line, int pos, 885 enum roff_type type, int tok) 886 { 887 struct roff_node *n; 888 889 n = mandoc_calloc(1, sizeof(*n)); 890 n->line = line; 891 n->pos = pos; 892 n->tok = tok; 893 n->type = type; 894 n->sec = man->lastsec; 895 896 if (man->flags & MDOC_SYNOPSIS) 897 n->flags |= NODE_SYNPRETTY; 898 else 899 n->flags &= ~NODE_SYNPRETTY; 900 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 901 n->flags |= NODE_NOFILL; 902 else 903 n->flags &= ~NODE_NOFILL; 904 if (man->flags & MDOC_NEWLINE) 905 n->flags |= NODE_LINE; 906 man->flags &= ~MDOC_NEWLINE; 907 908 return n; 909 } 910 911 void 912 roff_node_append(struct roff_man *man, struct roff_node *n) 913 { 914 915 switch (man->next) { 916 case ROFF_NEXT_SIBLING: 917 if (man->last->next != NULL) { 918 n->next = man->last->next; 919 man->last->next->prev = n; 920 } else 921 man->last->parent->last = n; 922 man->last->next = n; 923 n->prev = man->last; 924 n->parent = man->last->parent; 925 break; 926 case ROFF_NEXT_CHILD: 927 if (man->last->child != NULL) { 928 n->next = man->last->child; 929 man->last->child->prev = n; 930 } else 931 man->last->last = n; 932 man->last->child = n; 933 n->parent = man->last; 934 break; 935 default: 936 abort(); 937 } 938 man->last = n; 939 940 switch (n->type) { 941 case ROFFT_HEAD: 942 n->parent->head = n; 943 break; 944 case ROFFT_BODY: 945 if (n->end != ENDBODY_NOT) 946 return; 947 n->parent->body = n; 948 break; 949 case ROFFT_TAIL: 950 n->parent->tail = n; 951 break; 952 default: 953 return; 954 } 955 956 /* 957 * Copy over the normalised-data pointer of our parent. Not 958 * everybody has one, but copying a null pointer is fine. 959 */ 960 961 n->norm = n->parent->norm; 962 assert(n->parent->type == ROFFT_BLOCK); 963 } 964 965 void 966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 967 { 968 struct roff_node *n; 969 970 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 971 n->string = roff_strdup(man->roff, word); 972 roff_node_append(man, n); 973 n->flags |= NODE_VALID | NODE_ENDED; 974 man->next = ROFF_NEXT_SIBLING; 975 } 976 977 void 978 roff_word_append(struct roff_man *man, const char *word) 979 { 980 struct roff_node *n; 981 char *addstr, *newstr; 982 983 n = man->last; 984 addstr = roff_strdup(man->roff, word); 985 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 986 free(addstr); 987 free(n->string); 988 n->string = newstr; 989 man->next = ROFF_NEXT_SIBLING; 990 } 991 992 void 993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 994 { 995 struct roff_node *n; 996 997 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 998 roff_node_append(man, n); 999 man->next = ROFF_NEXT_CHILD; 1000 } 1001 1002 struct roff_node * 1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1004 { 1005 struct roff_node *n; 1006 1007 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1008 roff_node_append(man, n); 1009 man->next = ROFF_NEXT_CHILD; 1010 return n; 1011 } 1012 1013 struct roff_node * 1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1015 { 1016 struct roff_node *n; 1017 1018 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1019 roff_node_append(man, n); 1020 man->next = ROFF_NEXT_CHILD; 1021 return n; 1022 } 1023 1024 struct roff_node * 1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1026 { 1027 struct roff_node *n; 1028 1029 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1030 roff_node_append(man, n); 1031 man->next = ROFF_NEXT_CHILD; 1032 return n; 1033 } 1034 1035 static void 1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1037 { 1038 struct roff_node *n; 1039 struct tbl_span *span; 1040 1041 if (man->meta.macroset == MACROSET_MAN) 1042 man_breakscope(man, ROFF_TS); 1043 while ((span = tbl_span(tbl)) != NULL) { 1044 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1045 n->span = span; 1046 roff_node_append(man, n); 1047 n->flags |= NODE_VALID | NODE_ENDED; 1048 man->next = ROFF_NEXT_SIBLING; 1049 } 1050 } 1051 1052 void 1053 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1054 { 1055 1056 /* Adjust siblings. */ 1057 1058 if (n->prev) 1059 n->prev->next = n->next; 1060 if (n->next) 1061 n->next->prev = n->prev; 1062 1063 /* Adjust parent. */ 1064 1065 if (n->parent != NULL) { 1066 if (n->parent->child == n) 1067 n->parent->child = n->next; 1068 if (n->parent->last == n) 1069 n->parent->last = n->prev; 1070 } 1071 1072 /* Adjust parse point. */ 1073 1074 if (man == NULL) 1075 return; 1076 if (man->last == n) { 1077 if (n->prev == NULL) { 1078 man->last = n->parent; 1079 man->next = ROFF_NEXT_CHILD; 1080 } else { 1081 man->last = n->prev; 1082 man->next = ROFF_NEXT_SIBLING; 1083 } 1084 } 1085 if (man->meta.first == n) 1086 man->meta.first = NULL; 1087 } 1088 1089 void 1090 roff_node_relink(struct roff_man *man, struct roff_node *n) 1091 { 1092 roff_node_unlink(man, n); 1093 n->prev = n->next = NULL; 1094 roff_node_append(man, n); 1095 } 1096 1097 void 1098 roff_node_free(struct roff_node *n) 1099 { 1100 1101 if (n->args != NULL) 1102 mdoc_argv_free(n->args); 1103 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1104 free(n->norm); 1105 eqn_box_free(n->eqn); 1106 free(n->string); 1107 free(n->tag); 1108 free(n); 1109 } 1110 1111 void 1112 roff_node_delete(struct roff_man *man, struct roff_node *n) 1113 { 1114 1115 while (n->child != NULL) 1116 roff_node_delete(man, n->child); 1117 roff_node_unlink(man, n); 1118 roff_node_free(n); 1119 } 1120 1121 int 1122 roff_node_transparent(struct roff_node *n) 1123 { 1124 if (n == NULL) 1125 return 0; 1126 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1127 return 1; 1128 return roff_tok_transparent(n->tok); 1129 } 1130 1131 int 1132 roff_tok_transparent(enum roff_tok tok) 1133 { 1134 switch (tok) { 1135 case ROFF_ft: 1136 case ROFF_ll: 1137 case ROFF_mc: 1138 case ROFF_po: 1139 case ROFF_ta: 1140 case MDOC_Db: 1141 case MDOC_Es: 1142 case MDOC_Sm: 1143 case MDOC_Tg: 1144 case MAN_DT: 1145 case MAN_UC: 1146 case MAN_PD: 1147 case MAN_AT: 1148 return 1; 1149 default: 1150 return 0; 1151 } 1152 } 1153 1154 struct roff_node * 1155 roff_node_child(struct roff_node *n) 1156 { 1157 for (n = n->child; roff_node_transparent(n); n = n->next) 1158 continue; 1159 return n; 1160 } 1161 1162 struct roff_node * 1163 roff_node_prev(struct roff_node *n) 1164 { 1165 do { 1166 n = n->prev; 1167 } while (roff_node_transparent(n)); 1168 return n; 1169 } 1170 1171 struct roff_node * 1172 roff_node_next(struct roff_node *n) 1173 { 1174 do { 1175 n = n->next; 1176 } while (roff_node_transparent(n)); 1177 return n; 1178 } 1179 1180 void 1181 deroff(char **dest, const struct roff_node *n) 1182 { 1183 char *cp; 1184 size_t sz; 1185 1186 if (n->string == NULL) { 1187 for (n = n->child; n != NULL; n = n->next) 1188 deroff(dest, n); 1189 return; 1190 } 1191 1192 /* Skip leading whitespace. */ 1193 1194 for (cp = n->string; *cp != '\0'; cp++) { 1195 if (cp[0] == '\\' && cp[1] != '\0' && 1196 strchr(" %&0^|~", cp[1]) != NULL) 1197 cp++; 1198 else if ( ! isspace((unsigned char)*cp)) 1199 break; 1200 } 1201 1202 /* Skip trailing backslash. */ 1203 1204 sz = strlen(cp); 1205 if (sz > 0 && cp[sz - 1] == '\\') 1206 sz--; 1207 1208 /* Skip trailing whitespace. */ 1209 1210 for (; sz; sz--) 1211 if ( ! isspace((unsigned char)cp[sz-1])) 1212 break; 1213 1214 /* Skip empty strings. */ 1215 1216 if (sz == 0) 1217 return; 1218 1219 if (*dest == NULL) { 1220 *dest = mandoc_strndup(cp, sz); 1221 return; 1222 } 1223 1224 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1225 free(*dest); 1226 *dest = cp; 1227 } 1228 1229 /* --- main functions of the roff parser ---------------------------------- */ 1230 1231 /* 1232 * In the current line, expand escape sequences that produce parsable 1233 * input text. Also check the syntax of the remaining escape sequences, 1234 * which typically produce output glyphs or change formatter state. 1235 */ 1236 static int 1237 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1238 { 1239 struct mctx *ctx; /* current macro call context */ 1240 char ubuf[24]; /* buffer to print the number */ 1241 struct roff_node *n; /* used for header comments */ 1242 const char *start; /* start of the string to process */ 1243 char *stesc; /* start of an escape sequence ('\\') */ 1244 const char *esct; /* type of esccape sequence */ 1245 char *ep; /* end of comment string */ 1246 const char *stnam; /* start of the name, after "[(*" */ 1247 const char *cp; /* end of the name, e.g. before ']' */ 1248 const char *res; /* the string to be substituted */ 1249 char *nbuf; /* new buffer to copy buf->buf to */ 1250 size_t maxl; /* expected length of the escape name */ 1251 size_t naml; /* actual length of the escape name */ 1252 size_t asz; /* length of the replacement */ 1253 size_t rsz; /* length of the rest of the string */ 1254 int inaml; /* length returned from mandoc_escape() */ 1255 int expand_count; /* to avoid infinite loops */ 1256 int npos; /* position in numeric expression */ 1257 int arg_complete; /* argument not interrupted by eol */ 1258 int quote_args; /* true for \\$@, false for \\$* */ 1259 int done; /* no more input available */ 1260 int deftype; /* type of definition to paste */ 1261 int rcsid; /* kind of RCS id seen */ 1262 enum mandocerr err; /* for escape sequence problems */ 1263 char sign; /* increment number register */ 1264 char term; /* character terminating the escape */ 1265 1266 /* Search forward for comments. */ 1267 1268 done = 0; 1269 start = buf->buf + pos; 1270 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1271 if (stesc[0] != newesc || stesc[1] == '\0') 1272 continue; 1273 stesc++; 1274 if (*stesc != '"' && *stesc != '#') 1275 continue; 1276 1277 /* Comment found, look for RCS id. */ 1278 1279 rcsid = 0; 1280 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1281 rcsid = 1 << MANDOC_OS_OPENBSD; 1282 cp += 8; 1283 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1284 rcsid = 1 << MANDOC_OS_NETBSD; 1285 cp += 7; 1286 } 1287 if (cp != NULL && 1288 isalnum((unsigned char)*cp) == 0 && 1289 strchr(cp, '$') != NULL) { 1290 if (r->man->meta.rcsids & rcsid) 1291 mandoc_msg(MANDOCERR_RCS_REP, ln, 1292 (int)(stesc - buf->buf) + 1, 1293 "%s", stesc + 1); 1294 r->man->meta.rcsids |= rcsid; 1295 } 1296 1297 /* Handle trailing whitespace. */ 1298 1299 ep = strchr(stesc--, '\0') - 1; 1300 if (*ep == '\n') { 1301 done = 1; 1302 ep--; 1303 } 1304 if (*ep == ' ' || *ep == '\t') 1305 mandoc_msg(MANDOCERR_SPACE_EOL, 1306 ln, (int)(ep - buf->buf), NULL); 1307 1308 /* 1309 * Save comments preceding the title macro 1310 * in the syntax tree. 1311 */ 1312 1313 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) { 1314 while (*ep == ' ' || *ep == '\t') 1315 ep--; 1316 ep[1] = '\0'; 1317 n = roff_node_alloc(r->man, 1318 ln, stesc + 1 - buf->buf, 1319 ROFFT_COMMENT, TOKEN_NONE); 1320 n->string = mandoc_strdup(stesc + 2); 1321 roff_node_append(r->man, n); 1322 n->flags |= NODE_VALID | NODE_ENDED; 1323 r->man->next = ROFF_NEXT_SIBLING; 1324 } 1325 1326 /* Line continuation with comment. */ 1327 1328 if (stesc[1] == '#') { 1329 *stesc = '\0'; 1330 return ROFF_IGN | ROFF_APPEND; 1331 } 1332 1333 /* Discard normal comments. */ 1334 1335 while (stesc > start && stesc[-1] == ' ' && 1336 (stesc == start + 1 || stesc[-2] != '\\')) 1337 stesc--; 1338 *stesc = '\0'; 1339 break; 1340 } 1341 if (stesc == start) 1342 return ROFF_CONT; 1343 stesc--; 1344 1345 /* Notice the end of the input. */ 1346 1347 if (*stesc == '\n') { 1348 *stesc-- = '\0'; 1349 done = 1; 1350 } 1351 1352 expand_count = 0; 1353 while (stesc >= start) { 1354 if (*stesc != newesc) { 1355 1356 /* 1357 * If we have a non-standard escape character, 1358 * escape literal backslashes because all 1359 * processing in subsequent functions uses 1360 * the standard escaping rules. 1361 */ 1362 1363 if (newesc != ASCII_ESC && *stesc == '\\') { 1364 *stesc = '\0'; 1365 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1366 buf->buf, stesc + 1) + 1; 1367 start = nbuf + pos; 1368 stesc = nbuf + (stesc - buf->buf); 1369 free(buf->buf); 1370 buf->buf = nbuf; 1371 } 1372 1373 /* Search backwards for the next escape. */ 1374 1375 stesc--; 1376 continue; 1377 } 1378 1379 /* If it is escaped, skip it. */ 1380 1381 for (cp = stesc - 1; cp >= start; cp--) 1382 if (*cp != r->escape) 1383 break; 1384 1385 if ((stesc - cp) % 2 == 0) { 1386 while (stesc > cp) 1387 *stesc-- = '\\'; 1388 continue; 1389 } else if (stesc[1] != '\0') { 1390 *stesc = '\\'; 1391 } else { 1392 *stesc-- = '\0'; 1393 if (done) 1394 continue; 1395 else 1396 return ROFF_IGN | ROFF_APPEND; 1397 } 1398 1399 /* Decide whether to expand or to check only. */ 1400 1401 term = '\0'; 1402 cp = stesc + 1; 1403 if (*cp == 'E') 1404 cp++; 1405 esct = cp; 1406 switch (*esct) { 1407 case '*': 1408 case '$': 1409 res = NULL; 1410 break; 1411 case 'B': 1412 case 'w': 1413 term = cp[1]; 1414 /* FALLTHROUGH */ 1415 case 'n': 1416 sign = cp[1]; 1417 if (sign == '+' || sign == '-') 1418 cp++; 1419 res = ubuf; 1420 break; 1421 default: 1422 err = MANDOCERR_OK; 1423 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1424 case ESCAPE_SPECIAL: 1425 if (mchars_spec2cp(stnam, inaml) >= 0) 1426 break; 1427 /* FALLTHROUGH */ 1428 case ESCAPE_ERROR: 1429 err = MANDOCERR_ESC_BAD; 1430 break; 1431 case ESCAPE_UNDEF: 1432 err = MANDOCERR_ESC_UNDEF; 1433 break; 1434 case ESCAPE_UNSUPP: 1435 err = MANDOCERR_ESC_UNSUPP; 1436 break; 1437 default: 1438 break; 1439 } 1440 if (err != MANDOCERR_OK) 1441 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1442 "%.*s", (int)(cp - stesc), stesc); 1443 stesc--; 1444 continue; 1445 } 1446 1447 if (EXPAND_LIMIT < ++expand_count) { 1448 mandoc_msg(MANDOCERR_ROFFLOOP, 1449 ln, (int)(stesc - buf->buf), NULL); 1450 return ROFF_IGN; 1451 } 1452 1453 /* 1454 * The third character decides the length 1455 * of the name of the string or register. 1456 * Save a pointer to the name. 1457 */ 1458 1459 if (term == '\0') { 1460 switch (*++cp) { 1461 case '\0': 1462 maxl = 0; 1463 break; 1464 case '(': 1465 cp++; 1466 maxl = 2; 1467 break; 1468 case '[': 1469 cp++; 1470 term = ']'; 1471 maxl = 0; 1472 break; 1473 default: 1474 maxl = 1; 1475 break; 1476 } 1477 } else { 1478 cp += 2; 1479 maxl = 0; 1480 } 1481 stnam = cp; 1482 1483 /* Advance to the end of the name. */ 1484 1485 naml = 0; 1486 arg_complete = 1; 1487 while (maxl == 0 || naml < maxl) { 1488 if (*cp == '\0') { 1489 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1490 (int)(stesc - buf->buf), "%s", stesc); 1491 arg_complete = 0; 1492 break; 1493 } 1494 if (maxl == 0 && *cp == term) { 1495 cp++; 1496 break; 1497 } 1498 if (*cp++ != '\\' || *esct != 'w') { 1499 naml++; 1500 continue; 1501 } 1502 switch (mandoc_escape(&cp, NULL, NULL)) { 1503 case ESCAPE_SPECIAL: 1504 case ESCAPE_UNICODE: 1505 case ESCAPE_NUMBERED: 1506 case ESCAPE_UNDEF: 1507 case ESCAPE_OVERSTRIKE: 1508 naml++; 1509 break; 1510 default: 1511 break; 1512 } 1513 } 1514 1515 /* 1516 * Retrieve the replacement string; if it is 1517 * undefined, resume searching for escapes. 1518 */ 1519 1520 switch (*esct) { 1521 case '*': 1522 if (arg_complete) { 1523 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1524 res = roff_getstrn(r, stnam, naml, &deftype); 1525 1526 /* 1527 * If not overriden, let \*(.T 1528 * through to the formatters. 1529 */ 1530 1531 if (res == NULL && naml == 2 && 1532 stnam[0] == '.' && stnam[1] == 'T') { 1533 roff_setstrn(&r->strtab, 1534 ".T", 2, NULL, 0, 0); 1535 stesc--; 1536 continue; 1537 } 1538 } 1539 break; 1540 case '$': 1541 if (r->mstackpos < 0) { 1542 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1543 (int)(stesc - buf->buf), "%.3s", stesc); 1544 break; 1545 } 1546 ctx = r->mstack + r->mstackpos; 1547 npos = esct[1] - '1'; 1548 if (npos >= 0 && npos <= 8) { 1549 res = npos < ctx->argc ? 1550 ctx->argv[npos] : ""; 1551 break; 1552 } 1553 if (esct[1] == '*') 1554 quote_args = 0; 1555 else if (esct[1] == '@') 1556 quote_args = 1; 1557 else { 1558 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1559 (int)(stesc - buf->buf), "%.3s", stesc); 1560 break; 1561 } 1562 asz = 0; 1563 for (npos = 0; npos < ctx->argc; npos++) { 1564 if (npos) 1565 asz++; /* blank */ 1566 if (quote_args) 1567 asz += 2; /* quotes */ 1568 asz += strlen(ctx->argv[npos]); 1569 } 1570 if (asz != 3) { 1571 rsz = buf->sz - (stesc - buf->buf) - 3; 1572 if (asz < 3) 1573 memmove(stesc + asz, stesc + 3, rsz); 1574 buf->sz += asz - 3; 1575 nbuf = mandoc_realloc(buf->buf, buf->sz); 1576 start = nbuf + pos; 1577 stesc = nbuf + (stesc - buf->buf); 1578 buf->buf = nbuf; 1579 if (asz > 3) 1580 memmove(stesc + asz, stesc + 3, rsz); 1581 } 1582 for (npos = 0; npos < ctx->argc; npos++) { 1583 if (npos) 1584 *stesc++ = ' '; 1585 if (quote_args) 1586 *stesc++ = '"'; 1587 cp = ctx->argv[npos]; 1588 while (*cp != '\0') 1589 *stesc++ = *cp++; 1590 if (quote_args) 1591 *stesc++ = '"'; 1592 } 1593 continue; 1594 case 'B': 1595 npos = 0; 1596 ubuf[0] = arg_complete && 1597 roff_evalnum(r, ln, stnam, &npos, 1598 NULL, ROFFNUM_SCALE) && 1599 stnam + npos + 1 == cp ? '1' : '0'; 1600 ubuf[1] = '\0'; 1601 break; 1602 case 'n': 1603 if (arg_complete) 1604 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1605 roff_getregn(r, stnam, naml, sign)); 1606 else 1607 ubuf[0] = '\0'; 1608 break; 1609 case 'w': 1610 /* use even incomplete args */ 1611 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1612 24 * (int)naml); 1613 break; 1614 } 1615 1616 if (res == NULL) { 1617 if (*esct == '*') 1618 mandoc_msg(MANDOCERR_STR_UNDEF, 1619 ln, (int)(stesc - buf->buf), 1620 "%.*s", (int)naml, stnam); 1621 res = ""; 1622 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1623 mandoc_msg(MANDOCERR_ROFFLOOP, 1624 ln, (int)(stesc - buf->buf), NULL); 1625 return ROFF_IGN; 1626 } 1627 1628 /* Replace the escape sequence by the string. */ 1629 1630 *stesc = '\0'; 1631 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1632 buf->buf, res, cp) + 1; 1633 1634 /* Prepare for the next replacement. */ 1635 1636 start = nbuf + pos; 1637 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1638 free(buf->buf); 1639 buf->buf = nbuf; 1640 } 1641 return ROFF_CONT; 1642 } 1643 1644 /* 1645 * Parse a quoted or unquoted roff-style request or macro argument. 1646 * Return a pointer to the parsed argument, which is either the original 1647 * pointer or advanced by one byte in case the argument is quoted. 1648 * NUL-terminate the argument in place. 1649 * Collapse pairs of quotes inside quoted arguments. 1650 * Advance the argument pointer to the next argument, 1651 * or to the NUL byte terminating the argument line. 1652 */ 1653 char * 1654 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1655 { 1656 struct buf buf; 1657 char *cp, *start; 1658 int newesc, pairs, quoted, white; 1659 1660 /* Quoting can only start with a new word. */ 1661 start = *cpp; 1662 quoted = 0; 1663 if ('"' == *start) { 1664 quoted = 1; 1665 start++; 1666 } 1667 1668 newesc = pairs = white = 0; 1669 for (cp = start; '\0' != *cp; cp++) { 1670 1671 /* 1672 * Move the following text left 1673 * after quoted quotes and after "\\" and "\t". 1674 */ 1675 if (pairs) 1676 cp[-pairs] = cp[0]; 1677 1678 if ('\\' == cp[0]) { 1679 /* 1680 * In copy mode, translate double to single 1681 * backslashes and backslash-t to literal tabs. 1682 */ 1683 switch (cp[1]) { 1684 case 'a': 1685 case 't': 1686 cp[-pairs] = '\t'; 1687 pairs++; 1688 cp++; 1689 break; 1690 case '\\': 1691 newesc = 1; 1692 cp[-pairs] = ASCII_ESC; 1693 pairs++; 1694 cp++; 1695 break; 1696 case ' ': 1697 /* Skip escaped blanks. */ 1698 if (0 == quoted) 1699 cp++; 1700 break; 1701 default: 1702 break; 1703 } 1704 } else if (0 == quoted) { 1705 if (' ' == cp[0]) { 1706 /* Unescaped blanks end unquoted args. */ 1707 white = 1; 1708 break; 1709 } 1710 } else if ('"' == cp[0]) { 1711 if ('"' == cp[1]) { 1712 /* Quoted quotes collapse. */ 1713 pairs++; 1714 cp++; 1715 } else { 1716 /* Unquoted quotes end quoted args. */ 1717 quoted = 2; 1718 break; 1719 } 1720 } 1721 } 1722 1723 /* Quoted argument without a closing quote. */ 1724 if (1 == quoted) 1725 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1726 1727 /* NUL-terminate this argument and move to the next one. */ 1728 if (pairs) 1729 cp[-pairs] = '\0'; 1730 if ('\0' != *cp) { 1731 *cp++ = '\0'; 1732 while (' ' == *cp) 1733 cp++; 1734 } 1735 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1736 *cpp = cp; 1737 1738 if ('\0' == *cp && (white || ' ' == cp[-1])) 1739 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1740 1741 start = mandoc_strdup(start); 1742 if (newesc == 0) 1743 return start; 1744 1745 buf.buf = start; 1746 buf.sz = strlen(start) + 1; 1747 buf.next = NULL; 1748 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1749 free(buf.buf); 1750 buf.buf = mandoc_strdup(""); 1751 } 1752 return buf.buf; 1753 } 1754 1755 1756 /* 1757 * Process text streams. 1758 */ 1759 static int 1760 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1761 { 1762 size_t sz; 1763 const char *start; 1764 char *p; 1765 int isz; 1766 enum mandoc_esc esc; 1767 1768 /* Spring the input line trap. */ 1769 1770 if (roffit_lines == 1) { 1771 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1772 free(buf->buf); 1773 buf->buf = p; 1774 buf->sz = isz + 1; 1775 *offs = 0; 1776 free(roffit_macro); 1777 roffit_lines = 0; 1778 return ROFF_REPARSE; 1779 } else if (roffit_lines > 1) 1780 --roffit_lines; 1781 1782 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1783 if (roffce_lines < 1) { 1784 r->man->last = roffce_node; 1785 r->man->next = ROFF_NEXT_SIBLING; 1786 roffce_lines = 0; 1787 roffce_node = NULL; 1788 } else 1789 roffce_lines--; 1790 } 1791 1792 /* Convert all breakable hyphens into ASCII_HYPH. */ 1793 1794 start = p = buf->buf + pos; 1795 1796 while (*p != '\0') { 1797 sz = strcspn(p, "-\\"); 1798 p += sz; 1799 1800 if (*p == '\0') 1801 break; 1802 1803 if (*p == '\\') { 1804 /* Skip over escapes. */ 1805 p++; 1806 esc = mandoc_escape((const char **)&p, NULL, NULL); 1807 if (esc == ESCAPE_ERROR) 1808 break; 1809 while (*p == '-') 1810 p++; 1811 continue; 1812 } else if (p == start) { 1813 p++; 1814 continue; 1815 } 1816 1817 if (isalpha((unsigned char)p[-1]) && 1818 isalpha((unsigned char)p[1])) 1819 *p = ASCII_HYPH; 1820 p++; 1821 } 1822 return ROFF_CONT; 1823 } 1824 1825 int 1826 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1827 { 1828 enum roff_tok t; 1829 int e; 1830 int pos; /* parse point */ 1831 int spos; /* saved parse point for messages */ 1832 int ppos; /* original offset in buf->buf */ 1833 int ctl; /* macro line (boolean) */ 1834 1835 ppos = pos = *offs; 1836 1837 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1838 (r->man->flags & ROFF_NOFILL) == 0 && 1839 strchr(" .\\", buf->buf[pos]) == NULL && 1840 buf->buf[pos] != r->control && 1841 strcspn(buf->buf, " ") < 80) 1842 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1843 "%.20s...", buf->buf + pos); 1844 1845 /* Handle in-line equation delimiters. */ 1846 1847 if (r->tbl == NULL && 1848 r->last_eqn != NULL && r->last_eqn->delim && 1849 (r->eqn == NULL || r->eqn_inline)) { 1850 e = roff_eqndelim(r, buf, pos); 1851 if (e == ROFF_REPARSE) 1852 return e; 1853 assert(e == ROFF_CONT); 1854 } 1855 1856 /* Expand some escape sequences. */ 1857 1858 e = roff_expand(r, buf, ln, pos, r->escape); 1859 if ((e & ROFF_MASK) == ROFF_IGN) 1860 return e; 1861 assert(e == ROFF_CONT); 1862 1863 ctl = roff_getcontrol(r, buf->buf, &pos); 1864 1865 /* 1866 * First, if a scope is open and we're not a macro, pass the 1867 * text through the macro's filter. 1868 * Equations process all content themselves. 1869 * Tables process almost all content themselves, but we want 1870 * to warn about macros before passing it there. 1871 */ 1872 1873 if (r->last != NULL && ! ctl) { 1874 t = r->last->tok; 1875 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1876 if ((e & ROFF_MASK) == ROFF_IGN) 1877 return e; 1878 e &= ~ROFF_MASK; 1879 } else 1880 e = ROFF_IGN; 1881 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1882 eqn_read(r->eqn, buf->buf + ppos); 1883 return e; 1884 } 1885 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1886 tbl_read(r->tbl, ln, buf->buf, ppos); 1887 roff_addtbl(r->man, ln, r->tbl); 1888 return e; 1889 } 1890 if ( ! ctl) { 1891 r->options &= ~MPARSE_COMMENT; 1892 return roff_parsetext(r, buf, pos, offs) | e; 1893 } 1894 1895 /* Skip empty request lines. */ 1896 1897 if (buf->buf[pos] == '"') { 1898 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1899 return ROFF_IGN; 1900 } else if (buf->buf[pos] == '\0') 1901 return ROFF_IGN; 1902 1903 /* 1904 * If a scope is open, go to the child handler for that macro, 1905 * as it may want to preprocess before doing anything with it. 1906 * Don't do so if an equation is open. 1907 */ 1908 1909 if (r->last) { 1910 t = r->last->tok; 1911 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1912 } 1913 1914 /* No scope is open. This is a new request or macro. */ 1915 1916 r->options &= ~MPARSE_COMMENT; 1917 spos = pos; 1918 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1919 1920 /* Tables ignore most macros. */ 1921 1922 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1923 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1924 mandoc_msg(MANDOCERR_TBLMACRO, 1925 ln, pos, "%s", buf->buf + spos); 1926 if (t != TOKEN_NONE) 1927 return ROFF_IGN; 1928 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1929 pos++; 1930 while (buf->buf[pos] == ' ') 1931 pos++; 1932 tbl_read(r->tbl, ln, buf->buf, pos); 1933 roff_addtbl(r->man, ln, r->tbl); 1934 return ROFF_IGN; 1935 } 1936 1937 /* For now, let high level macros abort .ce mode. */ 1938 1939 if (ctl && roffce_node != NULL && 1940 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1941 t == ROFF_TH || t == ROFF_TS)) { 1942 r->man->last = roffce_node; 1943 r->man->next = ROFF_NEXT_SIBLING; 1944 roffce_lines = 0; 1945 roffce_node = NULL; 1946 } 1947 1948 /* 1949 * This is neither a roff request nor a user-defined macro. 1950 * Let the standard macro set parsers handle it. 1951 */ 1952 1953 if (t == TOKEN_NONE) 1954 return ROFF_CONT; 1955 1956 /* Execute a roff request or a user defined macro. */ 1957 1958 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1959 } 1960 1961 /* 1962 * Internal interface function to tell the roff parser that execution 1963 * of the current macro ended. This is required because macro 1964 * definitions usually do not end with a .return request. 1965 */ 1966 void 1967 roff_userret(struct roff *r) 1968 { 1969 struct mctx *ctx; 1970 int i; 1971 1972 assert(r->mstackpos >= 0); 1973 ctx = r->mstack + r->mstackpos; 1974 for (i = 0; i < ctx->argc; i++) 1975 free(ctx->argv[i]); 1976 ctx->argc = 0; 1977 r->mstackpos--; 1978 } 1979 1980 void 1981 roff_endparse(struct roff *r) 1982 { 1983 if (r->last != NULL) 1984 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1985 r->last->col, "%s", roff_name[r->last->tok]); 1986 1987 if (r->eqn != NULL) { 1988 mandoc_msg(MANDOCERR_BLK_NOEND, 1989 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1990 eqn_parse(r->eqn); 1991 r->eqn = NULL; 1992 } 1993 1994 if (r->tbl != NULL) { 1995 tbl_end(r->tbl, 1); 1996 r->tbl = NULL; 1997 } 1998 } 1999 2000 /* 2001 * Parse a roff node's type from the input buffer. This must be in the 2002 * form of ".foo xxx" in the usual way. 2003 */ 2004 static enum roff_tok 2005 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 2006 { 2007 char *cp; 2008 const char *mac; 2009 size_t maclen; 2010 int deftype; 2011 enum roff_tok t; 2012 2013 cp = buf + *pos; 2014 2015 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2016 return TOKEN_NONE; 2017 2018 mac = cp; 2019 maclen = roff_getname(r, &cp, ln, ppos); 2020 2021 deftype = ROFFDEF_USER | ROFFDEF_REN; 2022 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2023 switch (deftype) { 2024 case ROFFDEF_USER: 2025 t = ROFF_USERDEF; 2026 break; 2027 case ROFFDEF_REN: 2028 t = ROFF_RENAMED; 2029 break; 2030 default: 2031 t = roffhash_find(r->reqtab, mac, maclen); 2032 break; 2033 } 2034 if (t != TOKEN_NONE) 2035 *pos = cp - buf; 2036 else if (deftype == ROFFDEF_UNDEF) { 2037 /* Using an undefined macro defines it to be empty. */ 2038 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2039 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2040 } 2041 return t; 2042 } 2043 2044 /* --- handling of request blocks ----------------------------------------- */ 2045 2046 /* 2047 * Close a macro definition block or an "ignore" block. 2048 */ 2049 static int 2050 roff_cblock(ROFF_ARGS) 2051 { 2052 int rr; 2053 2054 if (r->last == NULL) { 2055 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2056 return ROFF_IGN; 2057 } 2058 2059 switch (r->last->tok) { 2060 case ROFF_am: 2061 case ROFF_ami: 2062 case ROFF_de: 2063 case ROFF_dei: 2064 case ROFF_ig: 2065 break; 2066 case ROFF_am1: 2067 case ROFF_de1: 2068 /* Remapped in roff_block(). */ 2069 abort(); 2070 default: 2071 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2072 return ROFF_IGN; 2073 } 2074 2075 roffnode_pop(r); 2076 roffnode_cleanscope(r); 2077 2078 /* 2079 * If a conditional block with braces is still open, 2080 * check for "\}" block end markers. 2081 */ 2082 2083 if (r->last != NULL && r->last->endspan < 0) { 2084 rr = 1; /* If arguments follow "\}", warn about them. */ 2085 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2086 } 2087 2088 if (buf->buf[pos] != '\0') 2089 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2090 ".. %s", buf->buf + pos); 2091 2092 return ROFF_IGN; 2093 } 2094 2095 /* 2096 * Pop all nodes ending at the end of the current input line. 2097 * Return the number of loops ended. 2098 */ 2099 static int 2100 roffnode_cleanscope(struct roff *r) 2101 { 2102 int inloop; 2103 2104 inloop = 0; 2105 while (r->last != NULL && r->last->endspan > 0) { 2106 if (--r->last->endspan != 0) 2107 break; 2108 inloop += roffnode_pop(r); 2109 } 2110 return inloop; 2111 } 2112 2113 /* 2114 * Handle the closing "\}" of a conditional block. 2115 * Apart from generating warnings, this only pops nodes. 2116 * Return the number of loops ended. 2117 */ 2118 static int 2119 roff_ccond(struct roff *r, int ln, int ppos) 2120 { 2121 if (NULL == r->last) { 2122 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2123 return 0; 2124 } 2125 2126 switch (r->last->tok) { 2127 case ROFF_el: 2128 case ROFF_ie: 2129 case ROFF_if: 2130 case ROFF_while: 2131 break; 2132 default: 2133 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2134 return 0; 2135 } 2136 2137 if (r->last->endspan > -1) { 2138 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2139 return 0; 2140 } 2141 2142 return roffnode_pop(r) + roffnode_cleanscope(r); 2143 } 2144 2145 static int 2146 roff_block(ROFF_ARGS) 2147 { 2148 const char *name, *value; 2149 char *call, *cp, *iname, *rname; 2150 size_t csz, namesz, rsz; 2151 int deftype; 2152 2153 /* Ignore groff compatibility mode for now. */ 2154 2155 if (tok == ROFF_de1) 2156 tok = ROFF_de; 2157 else if (tok == ROFF_dei1) 2158 tok = ROFF_dei; 2159 else if (tok == ROFF_am1) 2160 tok = ROFF_am; 2161 else if (tok == ROFF_ami1) 2162 tok = ROFF_ami; 2163 2164 /* Parse the macro name argument. */ 2165 2166 cp = buf->buf + pos; 2167 if (tok == ROFF_ig) { 2168 iname = NULL; 2169 namesz = 0; 2170 } else { 2171 iname = cp; 2172 namesz = roff_getname(r, &cp, ln, ppos); 2173 iname[namesz] = '\0'; 2174 } 2175 2176 /* Resolve the macro name argument if it is indirect. */ 2177 2178 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2179 deftype = ROFFDEF_USER; 2180 name = roff_getstrn(r, iname, namesz, &deftype); 2181 if (name == NULL) { 2182 mandoc_msg(MANDOCERR_STR_UNDEF, 2183 ln, (int)(iname - buf->buf), 2184 "%.*s", (int)namesz, iname); 2185 namesz = 0; 2186 } else 2187 namesz = strlen(name); 2188 } else 2189 name = iname; 2190 2191 if (namesz == 0 && tok != ROFF_ig) { 2192 mandoc_msg(MANDOCERR_REQ_EMPTY, 2193 ln, ppos, "%s", roff_name[tok]); 2194 return ROFF_IGN; 2195 } 2196 2197 roffnode_push(r, tok, name, ln, ppos); 2198 2199 /* 2200 * At the beginning of a `de' macro, clear the existing string 2201 * with the same name, if there is one. New content will be 2202 * appended from roff_block_text() in multiline mode. 2203 */ 2204 2205 if (tok == ROFF_de || tok == ROFF_dei) { 2206 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2207 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2208 } else if (tok == ROFF_am || tok == ROFF_ami) { 2209 deftype = ROFFDEF_ANY; 2210 value = roff_getstrn(r, iname, namesz, &deftype); 2211 switch (deftype) { /* Before appending, ... */ 2212 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2213 roff_setstrn(&r->strtab, name, namesz, 2214 value, strlen(value), 0); 2215 break; 2216 case ROFFDEF_REN: /* call original standard macro. */ 2217 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2218 (int)strlen(value), value); 2219 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2220 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2221 free(call); 2222 break; 2223 case ROFFDEF_STD: /* rename and call standard macro. */ 2224 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2225 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2226 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2227 (int)rsz, rname); 2228 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2229 free(call); 2230 free(rname); 2231 break; 2232 default: 2233 break; 2234 } 2235 } 2236 2237 if (*cp == '\0') 2238 return ROFF_IGN; 2239 2240 /* Get the custom end marker. */ 2241 2242 iname = cp; 2243 namesz = roff_getname(r, &cp, ln, ppos); 2244 2245 /* Resolve the end marker if it is indirect. */ 2246 2247 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2248 deftype = ROFFDEF_USER; 2249 name = roff_getstrn(r, iname, namesz, &deftype); 2250 if (name == NULL) { 2251 mandoc_msg(MANDOCERR_STR_UNDEF, 2252 ln, (int)(iname - buf->buf), 2253 "%.*s", (int)namesz, iname); 2254 namesz = 0; 2255 } else 2256 namesz = strlen(name); 2257 } else 2258 name = iname; 2259 2260 if (namesz) 2261 r->last->end = mandoc_strndup(name, namesz); 2262 2263 if (*cp != '\0') 2264 mandoc_msg(MANDOCERR_ARG_EXCESS, 2265 ln, pos, ".%s ... %s", roff_name[tok], cp); 2266 2267 return ROFF_IGN; 2268 } 2269 2270 static int 2271 roff_block_sub(ROFF_ARGS) 2272 { 2273 enum roff_tok t; 2274 int i, j; 2275 2276 /* 2277 * First check whether a custom macro exists at this level. If 2278 * it does, then check against it. This is some of groff's 2279 * stranger behaviours. If we encountered a custom end-scope 2280 * tag and that tag also happens to be a "real" macro, then we 2281 * need to try interpreting it again as a real macro. If it's 2282 * not, then return ignore. Else continue. 2283 */ 2284 2285 if (r->last->end) { 2286 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2287 if (buf->buf[i] != r->last->end[j]) 2288 break; 2289 2290 if (r->last->end[j] == '\0' && 2291 (buf->buf[i] == '\0' || 2292 buf->buf[i] == ' ' || 2293 buf->buf[i] == '\t')) { 2294 roffnode_pop(r); 2295 roffnode_cleanscope(r); 2296 2297 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2298 i++; 2299 2300 pos = i; 2301 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2302 TOKEN_NONE) 2303 return ROFF_RERUN; 2304 return ROFF_IGN; 2305 } 2306 } 2307 2308 /* 2309 * If we have no custom end-query or lookup failed, then try 2310 * pulling it out of the hashtable. 2311 */ 2312 2313 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2314 2315 if (t != ROFF_cblock) { 2316 if (tok != ROFF_ig) 2317 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2318 return ROFF_IGN; 2319 } 2320 2321 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2322 } 2323 2324 static int 2325 roff_block_text(ROFF_ARGS) 2326 { 2327 2328 if (tok != ROFF_ig) 2329 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2330 2331 return ROFF_IGN; 2332 } 2333 2334 /* 2335 * Check for a closing "\}" and handle it. 2336 * In this function, the final "int *offs" argument is used for 2337 * different purposes than elsewhere: 2338 * Input: *offs == 0: caller wants to discard arguments following \} 2339 * *offs == 1: caller wants to preserve text following \} 2340 * Output: *offs = 0: tell caller to discard input line 2341 * *offs = 1: tell caller to use input line 2342 */ 2343 static int 2344 roff_cond_checkend(ROFF_ARGS) 2345 { 2346 char *ep; 2347 int endloop, irc, rr; 2348 2349 irc = ROFF_IGN; 2350 rr = r->last->rule; 2351 endloop = tok != ROFF_while ? ROFF_IGN : 2352 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2353 if (roffnode_cleanscope(r)) 2354 irc |= endloop; 2355 2356 /* 2357 * If "\}" occurs on a macro line without a preceding macro or 2358 * a text line contains nothing else, drop the line completely. 2359 */ 2360 2361 ep = buf->buf + pos; 2362 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2363 rr = 0; 2364 2365 /* 2366 * The closing delimiter "\}" rewinds the conditional scope 2367 * but is otherwise ignored when interpreting the line. 2368 */ 2369 2370 while ((ep = strchr(ep, '\\')) != NULL) { 2371 switch (ep[1]) { 2372 case '}': 2373 if (ep[2] == '\0') 2374 ep[0] = '\0'; 2375 else if (rr) 2376 ep[1] = '&'; 2377 else 2378 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2379 if (roff_ccond(r, ln, ep - buf->buf)) 2380 irc |= endloop; 2381 break; 2382 case '\0': 2383 ++ep; 2384 break; 2385 default: 2386 ep += 2; 2387 break; 2388 } 2389 } 2390 *offs = rr; 2391 return irc; 2392 } 2393 2394 /* 2395 * Parse and process a request or macro line in conditional scope. 2396 */ 2397 static int 2398 roff_cond_sub(ROFF_ARGS) 2399 { 2400 struct roffnode *bl; 2401 int irc, rr; 2402 enum roff_tok t; 2403 2404 rr = 0; /* If arguments follow "\}", skip them. */ 2405 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2406 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2407 2408 /* For now, let high level macros abort .ce mode. */ 2409 2410 if (roffce_node != NULL && 2411 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 2412 t == ROFF_TH || t == ROFF_TS)) { 2413 r->man->last = roffce_node; 2414 r->man->next = ROFF_NEXT_SIBLING; 2415 roffce_lines = 0; 2416 roffce_node = NULL; 2417 } 2418 2419 /* 2420 * Fully handle known macros when they are structurally 2421 * required or when the conditional evaluated to true. 2422 */ 2423 2424 if (t == ROFF_break) { 2425 if (irc & ROFF_LOOPMASK) 2426 irc = ROFF_IGN | ROFF_LOOPEXIT; 2427 else if (rr) { 2428 for (bl = r->last; bl != NULL; bl = bl->parent) { 2429 bl->rule = 0; 2430 if (bl->tok == ROFF_while) 2431 break; 2432 } 2433 } 2434 } else if (t != TOKEN_NONE && 2435 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2436 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2437 else 2438 irc |= rr ? ROFF_CONT : ROFF_IGN; 2439 return irc; 2440 } 2441 2442 /* 2443 * Parse and process a text line in conditional scope. 2444 */ 2445 static int 2446 roff_cond_text(ROFF_ARGS) 2447 { 2448 int irc, rr; 2449 2450 rr = 1; /* If arguments follow "\}", preserve them. */ 2451 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2452 if (rr) 2453 irc |= ROFF_CONT; 2454 return irc; 2455 } 2456 2457 /* --- handling of numeric and conditional expressions -------------------- */ 2458 2459 /* 2460 * Parse a single signed integer number. Stop at the first non-digit. 2461 * If there is at least one digit, return success and advance the 2462 * parse point, else return failure and let the parse point unchanged. 2463 * Ignore overflows, treat them just like the C language. 2464 */ 2465 static int 2466 roff_getnum(const char *v, int *pos, int *res, int flags) 2467 { 2468 int myres, scaled, n, p; 2469 2470 if (NULL == res) 2471 res = &myres; 2472 2473 p = *pos; 2474 n = v[p] == '-'; 2475 if (n || v[p] == '+') 2476 p++; 2477 2478 if (flags & ROFFNUM_WHITE) 2479 while (isspace((unsigned char)v[p])) 2480 p++; 2481 2482 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2483 *res = 10 * *res + v[p] - '0'; 2484 if (p == *pos + n) 2485 return 0; 2486 2487 if (n) 2488 *res = -*res; 2489 2490 /* Each number may be followed by one optional scaling unit. */ 2491 2492 switch (v[p]) { 2493 case 'f': 2494 scaled = *res * 65536; 2495 break; 2496 case 'i': 2497 scaled = *res * 240; 2498 break; 2499 case 'c': 2500 scaled = *res * 240 / 2.54; 2501 break; 2502 case 'v': 2503 case 'P': 2504 scaled = *res * 40; 2505 break; 2506 case 'm': 2507 case 'n': 2508 scaled = *res * 24; 2509 break; 2510 case 'p': 2511 scaled = *res * 10 / 3; 2512 break; 2513 case 'u': 2514 scaled = *res; 2515 break; 2516 case 'M': 2517 scaled = *res * 6 / 25; 2518 break; 2519 default: 2520 scaled = *res; 2521 p--; 2522 break; 2523 } 2524 if (flags & ROFFNUM_SCALE) 2525 *res = scaled; 2526 2527 *pos = p + 1; 2528 return 1; 2529 } 2530 2531 /* 2532 * Evaluate a string comparison condition. 2533 * The first character is the delimiter. 2534 * Succeed if the string up to its second occurrence 2535 * matches the string up to its third occurence. 2536 * Advance the cursor after the third occurrence 2537 * or lacking that, to the end of the line. 2538 */ 2539 static int 2540 roff_evalstrcond(const char *v, int *pos) 2541 { 2542 const char *s1, *s2, *s3; 2543 int match; 2544 2545 match = 0; 2546 s1 = v + *pos; /* initial delimiter */ 2547 s2 = s1 + 1; /* for scanning the first string */ 2548 s3 = strchr(s2, *s1); /* for scanning the second string */ 2549 2550 if (NULL == s3) /* found no middle delimiter */ 2551 goto out; 2552 2553 while ('\0' != *++s3) { 2554 if (*s2 != *s3) { /* mismatch */ 2555 s3 = strchr(s3, *s1); 2556 break; 2557 } 2558 if (*s3 == *s1) { /* found the final delimiter */ 2559 match = 1; 2560 break; 2561 } 2562 s2++; 2563 } 2564 2565 out: 2566 if (NULL == s3) 2567 s3 = strchr(s2, '\0'); 2568 else if (*s3 != '\0') 2569 s3++; 2570 *pos = s3 - v; 2571 return match; 2572 } 2573 2574 /* 2575 * Evaluate an optionally negated single character, numerical, 2576 * or string condition. 2577 */ 2578 static int 2579 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2580 { 2581 const char *start, *end; 2582 char *cp, *name; 2583 size_t sz; 2584 int deftype, len, number, savepos, istrue, wanttrue; 2585 2586 if ('!' == v[*pos]) { 2587 wanttrue = 0; 2588 (*pos)++; 2589 } else 2590 wanttrue = 1; 2591 2592 switch (v[*pos]) { 2593 case '\0': 2594 return 0; 2595 case 'n': 2596 case 'o': 2597 (*pos)++; 2598 return wanttrue; 2599 case 'e': 2600 case 't': 2601 case 'v': 2602 (*pos)++; 2603 return !wanttrue; 2604 case 'c': 2605 do { 2606 (*pos)++; 2607 } while (v[*pos] == ' '); 2608 2609 /* 2610 * Quirk for groff compatibility: 2611 * The horizontal tab is neither available nor unavailable. 2612 */ 2613 2614 if (v[*pos] == '\t') { 2615 (*pos)++; 2616 return 0; 2617 } 2618 2619 /* Printable ASCII characters are available. */ 2620 2621 if (v[*pos] != '\\') { 2622 (*pos)++; 2623 return wanttrue; 2624 } 2625 2626 end = v + ++*pos; 2627 switch (mandoc_escape(&end, &start, &len)) { 2628 case ESCAPE_SPECIAL: 2629 istrue = mchars_spec2cp(start, len) != -1; 2630 break; 2631 case ESCAPE_UNICODE: 2632 istrue = 1; 2633 break; 2634 case ESCAPE_NUMBERED: 2635 istrue = mchars_num2char(start, len) != -1; 2636 break; 2637 default: 2638 istrue = !wanttrue; 2639 break; 2640 } 2641 *pos = end - v; 2642 return istrue == wanttrue; 2643 case 'd': 2644 case 'r': 2645 cp = v + *pos + 1; 2646 while (*cp == ' ') 2647 cp++; 2648 name = cp; 2649 sz = roff_getname(r, &cp, ln, cp - v); 2650 if (sz == 0) 2651 istrue = 0; 2652 else if (v[*pos] == 'r') 2653 istrue = roff_hasregn(r, name, sz); 2654 else { 2655 deftype = ROFFDEF_ANY; 2656 roff_getstrn(r, name, sz, &deftype); 2657 istrue = !!deftype; 2658 } 2659 *pos = (name + sz) - v; 2660 return istrue == wanttrue; 2661 default: 2662 break; 2663 } 2664 2665 savepos = *pos; 2666 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2667 return (number > 0) == wanttrue; 2668 else if (*pos == savepos) 2669 return roff_evalstrcond(v, pos) == wanttrue; 2670 else 2671 return 0; 2672 } 2673 2674 static int 2675 roff_line_ignore(ROFF_ARGS) 2676 { 2677 2678 return ROFF_IGN; 2679 } 2680 2681 static int 2682 roff_insec(ROFF_ARGS) 2683 { 2684 2685 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2686 return ROFF_IGN; 2687 } 2688 2689 static int 2690 roff_unsupp(ROFF_ARGS) 2691 { 2692 2693 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2694 return ROFF_IGN; 2695 } 2696 2697 static int 2698 roff_cond(ROFF_ARGS) 2699 { 2700 int irc; 2701 2702 roffnode_push(r, tok, NULL, ln, ppos); 2703 2704 /* 2705 * An `.el' has no conditional body: it will consume the value 2706 * of the current rstack entry set in prior `ie' calls or 2707 * defaults to DENY. 2708 * 2709 * If we're not an `el', however, then evaluate the conditional. 2710 */ 2711 2712 r->last->rule = tok == ROFF_el ? 2713 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2714 roff_evalcond(r, ln, buf->buf, &pos); 2715 2716 /* 2717 * An if-else will put the NEGATION of the current evaluated 2718 * conditional into the stack of rules. 2719 */ 2720 2721 if (tok == ROFF_ie) { 2722 if (r->rstackpos + 1 == r->rstacksz) { 2723 r->rstacksz += 16; 2724 r->rstack = mandoc_reallocarray(r->rstack, 2725 r->rstacksz, sizeof(int)); 2726 } 2727 r->rstack[++r->rstackpos] = !r->last->rule; 2728 } 2729 2730 /* If the parent has false as its rule, then so do we. */ 2731 2732 if (r->last->parent && !r->last->parent->rule) 2733 r->last->rule = 0; 2734 2735 /* 2736 * Determine scope. 2737 * If there is nothing on the line after the conditional, 2738 * not even whitespace, use next-line scope. 2739 * Except that .while does not support next-line scope. 2740 */ 2741 2742 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2743 r->last->endspan = 2; 2744 goto out; 2745 } 2746 2747 while (buf->buf[pos] == ' ') 2748 pos++; 2749 2750 /* An opening brace requests multiline scope. */ 2751 2752 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2753 r->last->endspan = -1; 2754 pos += 2; 2755 while (buf->buf[pos] == ' ') 2756 pos++; 2757 goto out; 2758 } 2759 2760 /* 2761 * Anything else following the conditional causes 2762 * single-line scope. Warn if the scope contains 2763 * nothing but trailing whitespace. 2764 */ 2765 2766 if (buf->buf[pos] == '\0') 2767 mandoc_msg(MANDOCERR_COND_EMPTY, 2768 ln, ppos, "%s", roff_name[tok]); 2769 2770 r->last->endspan = 1; 2771 2772 out: 2773 *offs = pos; 2774 irc = ROFF_RERUN; 2775 if (tok == ROFF_while) 2776 irc |= ROFF_WHILE; 2777 return irc; 2778 } 2779 2780 static int 2781 roff_ds(ROFF_ARGS) 2782 { 2783 char *string; 2784 const char *name; 2785 size_t namesz; 2786 2787 /* Ignore groff compatibility mode for now. */ 2788 2789 if (tok == ROFF_ds1) 2790 tok = ROFF_ds; 2791 else if (tok == ROFF_as1) 2792 tok = ROFF_as; 2793 2794 /* 2795 * The first word is the name of the string. 2796 * If it is empty or terminated by an escape sequence, 2797 * abort the `ds' request without defining anything. 2798 */ 2799 2800 name = string = buf->buf + pos; 2801 if (*name == '\0') 2802 return ROFF_IGN; 2803 2804 namesz = roff_getname(r, &string, ln, pos); 2805 switch (name[namesz]) { 2806 case '\\': 2807 return ROFF_IGN; 2808 case '\t': 2809 string = buf->buf + pos + namesz; 2810 break; 2811 default: 2812 break; 2813 } 2814 2815 /* Read past the initial double-quote, if any. */ 2816 if (*string == '"') 2817 string++; 2818 2819 /* The rest is the value. */ 2820 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2821 ROFF_as == tok); 2822 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2823 return ROFF_IGN; 2824 } 2825 2826 /* 2827 * Parse a single operator, one or two characters long. 2828 * If the operator is recognized, return success and advance the 2829 * parse point, else return failure and let the parse point unchanged. 2830 */ 2831 static int 2832 roff_getop(const char *v, int *pos, char *res) 2833 { 2834 2835 *res = v[*pos]; 2836 2837 switch (*res) { 2838 case '+': 2839 case '-': 2840 case '*': 2841 case '/': 2842 case '%': 2843 case '&': 2844 case ':': 2845 break; 2846 case '<': 2847 switch (v[*pos + 1]) { 2848 case '=': 2849 *res = 'l'; 2850 (*pos)++; 2851 break; 2852 case '>': 2853 *res = '!'; 2854 (*pos)++; 2855 break; 2856 case '?': 2857 *res = 'i'; 2858 (*pos)++; 2859 break; 2860 default: 2861 break; 2862 } 2863 break; 2864 case '>': 2865 switch (v[*pos + 1]) { 2866 case '=': 2867 *res = 'g'; 2868 (*pos)++; 2869 break; 2870 case '?': 2871 *res = 'a'; 2872 (*pos)++; 2873 break; 2874 default: 2875 break; 2876 } 2877 break; 2878 case '=': 2879 if ('=' == v[*pos + 1]) 2880 (*pos)++; 2881 break; 2882 default: 2883 return 0; 2884 } 2885 (*pos)++; 2886 2887 return *res; 2888 } 2889 2890 /* 2891 * Evaluate either a parenthesized numeric expression 2892 * or a single signed integer number. 2893 */ 2894 static int 2895 roff_evalpar(struct roff *r, int ln, 2896 const char *v, int *pos, int *res, int flags) 2897 { 2898 2899 if ('(' != v[*pos]) 2900 return roff_getnum(v, pos, res, flags); 2901 2902 (*pos)++; 2903 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2904 return 0; 2905 2906 /* 2907 * Omission of the closing parenthesis 2908 * is an error in validation mode, 2909 * but ignored in evaluation mode. 2910 */ 2911 2912 if (')' == v[*pos]) 2913 (*pos)++; 2914 else if (NULL == res) 2915 return 0; 2916 2917 return 1; 2918 } 2919 2920 /* 2921 * Evaluate a complete numeric expression. 2922 * Proceed left to right, there is no concept of precedence. 2923 */ 2924 static int 2925 roff_evalnum(struct roff *r, int ln, const char *v, 2926 int *pos, int *res, int flags) 2927 { 2928 int mypos, operand2; 2929 char operator; 2930 2931 if (NULL == pos) { 2932 mypos = 0; 2933 pos = &mypos; 2934 } 2935 2936 if (flags & ROFFNUM_WHITE) 2937 while (isspace((unsigned char)v[*pos])) 2938 (*pos)++; 2939 2940 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2941 return 0; 2942 2943 while (1) { 2944 if (flags & ROFFNUM_WHITE) 2945 while (isspace((unsigned char)v[*pos])) 2946 (*pos)++; 2947 2948 if ( ! roff_getop(v, pos, &operator)) 2949 break; 2950 2951 if (flags & ROFFNUM_WHITE) 2952 while (isspace((unsigned char)v[*pos])) 2953 (*pos)++; 2954 2955 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2956 return 0; 2957 2958 if (flags & ROFFNUM_WHITE) 2959 while (isspace((unsigned char)v[*pos])) 2960 (*pos)++; 2961 2962 if (NULL == res) 2963 continue; 2964 2965 switch (operator) { 2966 case '+': 2967 *res += operand2; 2968 break; 2969 case '-': 2970 *res -= operand2; 2971 break; 2972 case '*': 2973 *res *= operand2; 2974 break; 2975 case '/': 2976 if (operand2 == 0) { 2977 mandoc_msg(MANDOCERR_DIVZERO, 2978 ln, *pos, "%s", v); 2979 *res = 0; 2980 break; 2981 } 2982 *res /= operand2; 2983 break; 2984 case '%': 2985 if (operand2 == 0) { 2986 mandoc_msg(MANDOCERR_DIVZERO, 2987 ln, *pos, "%s", v); 2988 *res = 0; 2989 break; 2990 } 2991 *res %= operand2; 2992 break; 2993 case '<': 2994 *res = *res < operand2; 2995 break; 2996 case '>': 2997 *res = *res > operand2; 2998 break; 2999 case 'l': 3000 *res = *res <= operand2; 3001 break; 3002 case 'g': 3003 *res = *res >= operand2; 3004 break; 3005 case '=': 3006 *res = *res == operand2; 3007 break; 3008 case '!': 3009 *res = *res != operand2; 3010 break; 3011 case '&': 3012 *res = *res && operand2; 3013 break; 3014 case ':': 3015 *res = *res || operand2; 3016 break; 3017 case 'i': 3018 if (operand2 < *res) 3019 *res = operand2; 3020 break; 3021 case 'a': 3022 if (operand2 > *res) 3023 *res = operand2; 3024 break; 3025 default: 3026 abort(); 3027 } 3028 } 3029 return 1; 3030 } 3031 3032 /* --- register management ------------------------------------------------ */ 3033 3034 void 3035 roff_setreg(struct roff *r, const char *name, int val, char sign) 3036 { 3037 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3038 } 3039 3040 static void 3041 roff_setregn(struct roff *r, const char *name, size_t len, 3042 int val, char sign, int step) 3043 { 3044 struct roffreg *reg; 3045 3046 /* Search for an existing register with the same name. */ 3047 reg = r->regtab; 3048 3049 while (reg != NULL && (reg->key.sz != len || 3050 strncmp(reg->key.p, name, len) != 0)) 3051 reg = reg->next; 3052 3053 if (NULL == reg) { 3054 /* Create a new register. */ 3055 reg = mandoc_malloc(sizeof(struct roffreg)); 3056 reg->key.p = mandoc_strndup(name, len); 3057 reg->key.sz = len; 3058 reg->val = 0; 3059 reg->step = 0; 3060 reg->next = r->regtab; 3061 r->regtab = reg; 3062 } 3063 3064 if ('+' == sign) 3065 reg->val += val; 3066 else if ('-' == sign) 3067 reg->val -= val; 3068 else 3069 reg->val = val; 3070 if (step != INT_MIN) 3071 reg->step = step; 3072 } 3073 3074 /* 3075 * Handle some predefined read-only number registers. 3076 * For now, return -1 if the requested register is not predefined; 3077 * in case a predefined read-only register having the value -1 3078 * were to turn up, another special value would have to be chosen. 3079 */ 3080 static int 3081 roff_getregro(const struct roff *r, const char *name) 3082 { 3083 3084 switch (*name) { 3085 case '$': /* Number of arguments of the last macro evaluated. */ 3086 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3087 case 'A': /* ASCII approximation mode is always off. */ 3088 return 0; 3089 case 'g': /* Groff compatibility mode is always on. */ 3090 return 1; 3091 case 'H': /* Fixed horizontal resolution. */ 3092 return 24; 3093 case 'j': /* Always adjust left margin only. */ 3094 return 0; 3095 case 'T': /* Some output device is always defined. */ 3096 return 1; 3097 case 'V': /* Fixed vertical resolution. */ 3098 return 40; 3099 default: 3100 return -1; 3101 } 3102 } 3103 3104 int 3105 roff_getreg(struct roff *r, const char *name) 3106 { 3107 return roff_getregn(r, name, strlen(name), '\0'); 3108 } 3109 3110 static int 3111 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3112 { 3113 struct roffreg *reg; 3114 int val; 3115 3116 if ('.' == name[0] && 2 == len) { 3117 val = roff_getregro(r, name + 1); 3118 if (-1 != val) 3119 return val; 3120 } 3121 3122 for (reg = r->regtab; reg; reg = reg->next) { 3123 if (len == reg->key.sz && 3124 0 == strncmp(name, reg->key.p, len)) { 3125 switch (sign) { 3126 case '+': 3127 reg->val += reg->step; 3128 break; 3129 case '-': 3130 reg->val -= reg->step; 3131 break; 3132 default: 3133 break; 3134 } 3135 return reg->val; 3136 } 3137 } 3138 3139 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3140 return 0; 3141 } 3142 3143 static int 3144 roff_hasregn(const struct roff *r, const char *name, size_t len) 3145 { 3146 struct roffreg *reg; 3147 int val; 3148 3149 if ('.' == name[0] && 2 == len) { 3150 val = roff_getregro(r, name + 1); 3151 if (-1 != val) 3152 return 1; 3153 } 3154 3155 for (reg = r->regtab; reg; reg = reg->next) 3156 if (len == reg->key.sz && 3157 0 == strncmp(name, reg->key.p, len)) 3158 return 1; 3159 3160 return 0; 3161 } 3162 3163 static void 3164 roff_freereg(struct roffreg *reg) 3165 { 3166 struct roffreg *old_reg; 3167 3168 while (NULL != reg) { 3169 free(reg->key.p); 3170 old_reg = reg; 3171 reg = reg->next; 3172 free(old_reg); 3173 } 3174 } 3175 3176 static int 3177 roff_nr(ROFF_ARGS) 3178 { 3179 char *key, *val, *step; 3180 size_t keysz; 3181 int iv, is, len; 3182 char sign; 3183 3184 key = val = buf->buf + pos; 3185 if (*key == '\0') 3186 return ROFF_IGN; 3187 3188 keysz = roff_getname(r, &val, ln, pos); 3189 if (key[keysz] == '\\' || key[keysz] == '\t') 3190 return ROFF_IGN; 3191 3192 sign = *val; 3193 if (sign == '+' || sign == '-') 3194 val++; 3195 3196 len = 0; 3197 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3198 return ROFF_IGN; 3199 3200 step = val + len; 3201 while (isspace((unsigned char)*step)) 3202 step++; 3203 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3204 is = INT_MIN; 3205 3206 roff_setregn(r, key, keysz, iv, sign, is); 3207 return ROFF_IGN; 3208 } 3209 3210 static int 3211 roff_rr(ROFF_ARGS) 3212 { 3213 struct roffreg *reg, **prev; 3214 char *name, *cp; 3215 size_t namesz; 3216 3217 name = cp = buf->buf + pos; 3218 if (*name == '\0') 3219 return ROFF_IGN; 3220 namesz = roff_getname(r, &cp, ln, pos); 3221 name[namesz] = '\0'; 3222 3223 prev = &r->regtab; 3224 while (1) { 3225 reg = *prev; 3226 if (reg == NULL || !strcmp(name, reg->key.p)) 3227 break; 3228 prev = ®->next; 3229 } 3230 if (reg != NULL) { 3231 *prev = reg->next; 3232 free(reg->key.p); 3233 free(reg); 3234 } 3235 return ROFF_IGN; 3236 } 3237 3238 /* --- handler functions for roff requests -------------------------------- */ 3239 3240 static int 3241 roff_rm(ROFF_ARGS) 3242 { 3243 const char *name; 3244 char *cp; 3245 size_t namesz; 3246 3247 cp = buf->buf + pos; 3248 while (*cp != '\0') { 3249 name = cp; 3250 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3251 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3252 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3253 if (name[namesz] == '\\' || name[namesz] == '\t') 3254 break; 3255 } 3256 return ROFF_IGN; 3257 } 3258 3259 static int 3260 roff_it(ROFF_ARGS) 3261 { 3262 int iv; 3263 3264 /* Parse the number of lines. */ 3265 3266 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3267 mandoc_msg(MANDOCERR_IT_NONUM, 3268 ln, ppos, "%s", buf->buf + 1); 3269 return ROFF_IGN; 3270 } 3271 3272 while (isspace((unsigned char)buf->buf[pos])) 3273 pos++; 3274 3275 /* 3276 * Arm the input line trap. 3277 * Special-casing "an-trap" is an ugly workaround to cope 3278 * with DocBook stupidly fiddling with man(7) internals. 3279 */ 3280 3281 roffit_lines = iv; 3282 roffit_macro = mandoc_strdup(iv != 1 || 3283 strcmp(buf->buf + pos, "an-trap") ? 3284 buf->buf + pos : "br"); 3285 return ROFF_IGN; 3286 } 3287 3288 static int 3289 roff_Dd(ROFF_ARGS) 3290 { 3291 int mask; 3292 enum roff_tok t, te; 3293 3294 switch (tok) { 3295 case ROFF_Dd: 3296 tok = MDOC_Dd; 3297 te = MDOC_MAX; 3298 if (r->format == 0) 3299 r->format = MPARSE_MDOC; 3300 mask = MPARSE_MDOC | MPARSE_QUICK; 3301 break; 3302 case ROFF_TH: 3303 tok = MAN_TH; 3304 te = MAN_MAX; 3305 if (r->format == 0) 3306 r->format = MPARSE_MAN; 3307 mask = MPARSE_QUICK; 3308 break; 3309 default: 3310 abort(); 3311 } 3312 if ((r->options & mask) == 0) 3313 for (t = tok; t < te; t++) 3314 roff_setstr(r, roff_name[t], NULL, 0); 3315 return ROFF_CONT; 3316 } 3317 3318 static int 3319 roff_TE(ROFF_ARGS) 3320 { 3321 r->man->flags &= ~ROFF_NONOFILL; 3322 if (r->tbl == NULL) { 3323 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3324 return ROFF_IGN; 3325 } 3326 if (tbl_end(r->tbl, 0) == 0) { 3327 r->tbl = NULL; 3328 free(buf->buf); 3329 buf->buf = mandoc_strdup(".sp"); 3330 buf->sz = 4; 3331 *offs = 0; 3332 return ROFF_REPARSE; 3333 } 3334 r->tbl = NULL; 3335 return ROFF_IGN; 3336 } 3337 3338 static int 3339 roff_T_(ROFF_ARGS) 3340 { 3341 3342 if (NULL == r->tbl) 3343 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3344 else 3345 tbl_restart(ln, ppos, r->tbl); 3346 3347 return ROFF_IGN; 3348 } 3349 3350 /* 3351 * Handle in-line equation delimiters. 3352 */ 3353 static int 3354 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3355 { 3356 char *cp1, *cp2; 3357 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3358 3359 /* 3360 * Outside equations, look for an opening delimiter. 3361 * If we are inside an equation, we already know it is 3362 * in-line, or this function wouldn't have been called; 3363 * so look for a closing delimiter. 3364 */ 3365 3366 cp1 = buf->buf + pos; 3367 cp2 = strchr(cp1, r->eqn == NULL ? 3368 r->last_eqn->odelim : r->last_eqn->cdelim); 3369 if (cp2 == NULL) 3370 return ROFF_CONT; 3371 3372 *cp2++ = '\0'; 3373 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3374 3375 /* Handle preceding text, protecting whitespace. */ 3376 3377 if (*buf->buf != '\0') { 3378 if (r->eqn == NULL) 3379 bef_pr = "\\&"; 3380 bef_nl = "\n"; 3381 } 3382 3383 /* 3384 * Prepare replacing the delimiter with an equation macro 3385 * and drop leading white space from the equation. 3386 */ 3387 3388 if (r->eqn == NULL) { 3389 while (*cp2 == ' ') 3390 cp2++; 3391 mac = ".EQ"; 3392 } else 3393 mac = ".EN"; 3394 3395 /* Handle following text, protecting whitespace. */ 3396 3397 if (*cp2 != '\0') { 3398 aft_nl = "\n"; 3399 if (r->eqn != NULL) 3400 aft_pr = "\\&"; 3401 } 3402 3403 /* Do the actual replacement. */ 3404 3405 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3406 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3407 free(buf->buf); 3408 buf->buf = cp1; 3409 3410 /* Toggle the in-line state of the eqn subsystem. */ 3411 3412 r->eqn_inline = r->eqn == NULL; 3413 return ROFF_REPARSE; 3414 } 3415 3416 static int 3417 roff_EQ(ROFF_ARGS) 3418 { 3419 struct roff_node *n; 3420 3421 if (r->man->meta.macroset == MACROSET_MAN) 3422 man_breakscope(r->man, ROFF_EQ); 3423 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3424 if (ln > r->man->last->line) 3425 n->flags |= NODE_LINE; 3426 n->eqn = eqn_box_new(); 3427 roff_node_append(r->man, n); 3428 r->man->next = ROFF_NEXT_SIBLING; 3429 3430 assert(r->eqn == NULL); 3431 if (r->last_eqn == NULL) 3432 r->last_eqn = eqn_alloc(); 3433 else 3434 eqn_reset(r->last_eqn); 3435 r->eqn = r->last_eqn; 3436 r->eqn->node = n; 3437 3438 if (buf->buf[pos] != '\0') 3439 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3440 ".EQ %s", buf->buf + pos); 3441 3442 return ROFF_IGN; 3443 } 3444 3445 static int 3446 roff_EN(ROFF_ARGS) 3447 { 3448 if (r->eqn != NULL) { 3449 eqn_parse(r->eqn); 3450 r->eqn = NULL; 3451 } else 3452 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3453 if (buf->buf[pos] != '\0') 3454 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3455 "EN %s", buf->buf + pos); 3456 return ROFF_IGN; 3457 } 3458 3459 static int 3460 roff_TS(ROFF_ARGS) 3461 { 3462 if (r->tbl != NULL) { 3463 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3464 tbl_end(r->tbl, 0); 3465 } 3466 r->man->flags |= ROFF_NONOFILL; 3467 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3468 if (r->last_tbl == NULL) 3469 r->first_tbl = r->tbl; 3470 r->last_tbl = r->tbl; 3471 return ROFF_IGN; 3472 } 3473 3474 static int 3475 roff_noarg(ROFF_ARGS) 3476 { 3477 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3478 man_breakscope(r->man, tok); 3479 if (tok == ROFF_brp) 3480 tok = ROFF_br; 3481 roff_elem_alloc(r->man, ln, ppos, tok); 3482 if (buf->buf[pos] != '\0') 3483 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3484 "%s %s", roff_name[tok], buf->buf + pos); 3485 if (tok == ROFF_nf) 3486 r->man->flags |= ROFF_NOFILL; 3487 else if (tok == ROFF_fi) 3488 r->man->flags &= ~ROFF_NOFILL; 3489 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3490 r->man->next = ROFF_NEXT_SIBLING; 3491 return ROFF_IGN; 3492 } 3493 3494 static int 3495 roff_onearg(ROFF_ARGS) 3496 { 3497 struct roff_node *n; 3498 char *cp; 3499 int npos; 3500 3501 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3502 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3503 tok == ROFF_ti)) 3504 man_breakscope(r->man, tok); 3505 3506 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3507 r->man->last = roffce_node; 3508 r->man->next = ROFF_NEXT_SIBLING; 3509 } 3510 3511 roff_elem_alloc(r->man, ln, ppos, tok); 3512 n = r->man->last; 3513 3514 cp = buf->buf + pos; 3515 if (*cp != '\0') { 3516 while (*cp != '\0' && *cp != ' ') 3517 cp++; 3518 while (*cp == ' ') 3519 *cp++ = '\0'; 3520 if (*cp != '\0') 3521 mandoc_msg(MANDOCERR_ARG_EXCESS, 3522 ln, (int)(cp - buf->buf), 3523 "%s ... %s", roff_name[tok], cp); 3524 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3525 } 3526 3527 if (tok == ROFF_ce || tok == ROFF_rj) { 3528 if (r->man->last->type == ROFFT_ELEM) { 3529 roff_word_alloc(r->man, ln, pos, "1"); 3530 r->man->last->flags |= NODE_NOSRC; 3531 } 3532 npos = 0; 3533 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3534 &roffce_lines, 0) == 0) { 3535 mandoc_msg(MANDOCERR_CE_NONUM, 3536 ln, pos, "ce %s", buf->buf + pos); 3537 roffce_lines = 1; 3538 } 3539 if (roffce_lines < 1) { 3540 r->man->last = r->man->last->parent; 3541 roffce_node = NULL; 3542 roffce_lines = 0; 3543 } else 3544 roffce_node = r->man->last->parent; 3545 } else { 3546 n->flags |= NODE_VALID | NODE_ENDED; 3547 r->man->last = n; 3548 } 3549 n->flags |= NODE_LINE; 3550 r->man->next = ROFF_NEXT_SIBLING; 3551 return ROFF_IGN; 3552 } 3553 3554 static int 3555 roff_manyarg(ROFF_ARGS) 3556 { 3557 struct roff_node *n; 3558 char *sp, *ep; 3559 3560 roff_elem_alloc(r->man, ln, ppos, tok); 3561 n = r->man->last; 3562 3563 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3564 while (*ep != '\0' && *ep != ' ') 3565 ep++; 3566 while (*ep == ' ') 3567 *ep++ = '\0'; 3568 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3569 } 3570 3571 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3572 r->man->last = n; 3573 r->man->next = ROFF_NEXT_SIBLING; 3574 return ROFF_IGN; 3575 } 3576 3577 static int 3578 roff_als(ROFF_ARGS) 3579 { 3580 char *oldn, *newn, *end, *value; 3581 size_t oldsz, newsz, valsz; 3582 3583 newn = oldn = buf->buf + pos; 3584 if (*newn == '\0') 3585 return ROFF_IGN; 3586 3587 newsz = roff_getname(r, &oldn, ln, pos); 3588 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3589 return ROFF_IGN; 3590 3591 end = oldn; 3592 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3593 if (oldsz == 0) 3594 return ROFF_IGN; 3595 3596 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3597 (int)oldsz, oldn); 3598 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3599 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3600 free(value); 3601 return ROFF_IGN; 3602 } 3603 3604 /* 3605 * The .break request only makes sense inside conditionals, 3606 * and that case is already handled in roff_cond_sub(). 3607 */ 3608 static int 3609 roff_break(ROFF_ARGS) 3610 { 3611 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3612 return ROFF_IGN; 3613 } 3614 3615 static int 3616 roff_cc(ROFF_ARGS) 3617 { 3618 const char *p; 3619 3620 p = buf->buf + pos; 3621 3622 if (*p == '\0' || (r->control = *p++) == '.') 3623 r->control = '\0'; 3624 3625 if (*p != '\0') 3626 mandoc_msg(MANDOCERR_ARG_EXCESS, 3627 ln, p - buf->buf, "cc ... %s", p); 3628 3629 return ROFF_IGN; 3630 } 3631 3632 static int 3633 roff_char(ROFF_ARGS) 3634 { 3635 const char *p, *kp, *vp; 3636 size_t ksz, vsz; 3637 int font; 3638 3639 /* Parse the character to be replaced. */ 3640 3641 kp = buf->buf + pos; 3642 p = kp + 1; 3643 if (*kp == '\0' || (*kp == '\\' && 3644 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3645 (*p != ' ' && *p != '\0')) { 3646 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3647 return ROFF_IGN; 3648 } 3649 ksz = p - kp; 3650 while (*p == ' ') 3651 p++; 3652 3653 /* 3654 * If the replacement string contains a font escape sequence, 3655 * we have to restore the font at the end. 3656 */ 3657 3658 vp = p; 3659 vsz = strlen(p); 3660 font = 0; 3661 while (*p != '\0') { 3662 if (*p++ != '\\') 3663 continue; 3664 switch (mandoc_escape(&p, NULL, NULL)) { 3665 case ESCAPE_FONT: 3666 case ESCAPE_FONTROMAN: 3667 case ESCAPE_FONTITALIC: 3668 case ESCAPE_FONTBOLD: 3669 case ESCAPE_FONTBI: 3670 case ESCAPE_FONTCR: 3671 case ESCAPE_FONTCB: 3672 case ESCAPE_FONTCI: 3673 case ESCAPE_FONTPREV: 3674 font++; 3675 break; 3676 default: 3677 break; 3678 } 3679 } 3680 if (font > 1) 3681 mandoc_msg(MANDOCERR_CHAR_FONT, 3682 ln, (int)(vp - buf->buf), "%s", vp); 3683 3684 /* 3685 * Approximate the effect of .char using the .tr tables. 3686 * XXX In groff, .char and .tr interact differently. 3687 */ 3688 3689 if (ksz == 1) { 3690 if (r->xtab == NULL) 3691 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3692 assert((unsigned int)*kp < 128); 3693 free(r->xtab[(int)*kp].p); 3694 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3695 "%s%s", vp, font ? "\fP" : ""); 3696 } else { 3697 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3698 if (font) 3699 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3700 } 3701 return ROFF_IGN; 3702 } 3703 3704 static int 3705 roff_ec(ROFF_ARGS) 3706 { 3707 const char *p; 3708 3709 p = buf->buf + pos; 3710 if (*p == '\0') 3711 r->escape = '\\'; 3712 else { 3713 r->escape = *p; 3714 if (*++p != '\0') 3715 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3716 (int)(p - buf->buf), "ec ... %s", p); 3717 } 3718 return ROFF_IGN; 3719 } 3720 3721 static int 3722 roff_eo(ROFF_ARGS) 3723 { 3724 r->escape = '\0'; 3725 if (buf->buf[pos] != '\0') 3726 mandoc_msg(MANDOCERR_ARG_SKIP, 3727 ln, pos, "eo %s", buf->buf + pos); 3728 return ROFF_IGN; 3729 } 3730 3731 static int 3732 roff_nop(ROFF_ARGS) 3733 { 3734 while (buf->buf[pos] == ' ') 3735 pos++; 3736 *offs = pos; 3737 return ROFF_RERUN; 3738 } 3739 3740 static int 3741 roff_tr(ROFF_ARGS) 3742 { 3743 const char *p, *first, *second; 3744 size_t fsz, ssz; 3745 enum mandoc_esc esc; 3746 3747 p = buf->buf + pos; 3748 3749 if (*p == '\0') { 3750 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3751 return ROFF_IGN; 3752 } 3753 3754 while (*p != '\0') { 3755 fsz = ssz = 1; 3756 3757 first = p++; 3758 if (*first == '\\') { 3759 esc = mandoc_escape(&p, NULL, NULL); 3760 if (esc == ESCAPE_ERROR) { 3761 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3762 (int)(p - buf->buf), "%s", first); 3763 return ROFF_IGN; 3764 } 3765 fsz = (size_t)(p - first); 3766 } 3767 3768 second = p++; 3769 if (*second == '\\') { 3770 esc = mandoc_escape(&p, NULL, NULL); 3771 if (esc == ESCAPE_ERROR) { 3772 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3773 (int)(p - buf->buf), "%s", second); 3774 return ROFF_IGN; 3775 } 3776 ssz = (size_t)(p - second); 3777 } else if (*second == '\0') { 3778 mandoc_msg(MANDOCERR_TR_ODD, ln, 3779 (int)(first - buf->buf), "tr %s", first); 3780 second = " "; 3781 p--; 3782 } 3783 3784 if (fsz > 1) { 3785 roff_setstrn(&r->xmbtab, first, fsz, 3786 second, ssz, 0); 3787 continue; 3788 } 3789 3790 if (r->xtab == NULL) 3791 r->xtab = mandoc_calloc(128, 3792 sizeof(struct roffstr)); 3793 3794 free(r->xtab[(int)*first].p); 3795 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3796 r->xtab[(int)*first].sz = ssz; 3797 } 3798 3799 return ROFF_IGN; 3800 } 3801 3802 /* 3803 * Implementation of the .return request. 3804 * There is no need to call roff_userret() from here. 3805 * The read module will call that after rewinding the reader stack 3806 * to the place from where the current macro was called. 3807 */ 3808 static int 3809 roff_return(ROFF_ARGS) 3810 { 3811 if (r->mstackpos >= 0) 3812 return ROFF_IGN | ROFF_USERRET; 3813 3814 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3815 return ROFF_IGN; 3816 } 3817 3818 static int 3819 roff_rn(ROFF_ARGS) 3820 { 3821 const char *value; 3822 char *oldn, *newn, *end; 3823 size_t oldsz, newsz; 3824 int deftype; 3825 3826 oldn = newn = buf->buf + pos; 3827 if (*oldn == '\0') 3828 return ROFF_IGN; 3829 3830 oldsz = roff_getname(r, &newn, ln, pos); 3831 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3832 return ROFF_IGN; 3833 3834 end = newn; 3835 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3836 if (newsz == 0) 3837 return ROFF_IGN; 3838 3839 deftype = ROFFDEF_ANY; 3840 value = roff_getstrn(r, oldn, oldsz, &deftype); 3841 switch (deftype) { 3842 case ROFFDEF_USER: 3843 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3844 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3845 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3846 break; 3847 case ROFFDEF_PRE: 3848 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3849 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3850 break; 3851 case ROFFDEF_REN: 3852 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3853 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3854 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3855 break; 3856 case ROFFDEF_STD: 3857 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3858 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3859 break; 3860 default: 3861 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3862 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3863 break; 3864 } 3865 return ROFF_IGN; 3866 } 3867 3868 static int 3869 roff_shift(ROFF_ARGS) 3870 { 3871 struct mctx *ctx; 3872 int levels, i; 3873 3874 levels = 1; 3875 if (buf->buf[pos] != '\0' && 3876 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3877 mandoc_msg(MANDOCERR_CE_NONUM, 3878 ln, pos, "shift %s", buf->buf + pos); 3879 levels = 1; 3880 } 3881 if (r->mstackpos < 0) { 3882 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3883 return ROFF_IGN; 3884 } 3885 ctx = r->mstack + r->mstackpos; 3886 if (levels > ctx->argc) { 3887 mandoc_msg(MANDOCERR_SHIFT, 3888 ln, pos, "%d, but max is %d", levels, ctx->argc); 3889 levels = ctx->argc; 3890 } 3891 if (levels == 0) 3892 return ROFF_IGN; 3893 for (i = 0; i < levels; i++) 3894 free(ctx->argv[i]); 3895 ctx->argc -= levels; 3896 for (i = 0; i < ctx->argc; i++) 3897 ctx->argv[i] = ctx->argv[i + levels]; 3898 return ROFF_IGN; 3899 } 3900 3901 static int 3902 roff_so(ROFF_ARGS) 3903 { 3904 char *name, *cp; 3905 3906 name = buf->buf + pos; 3907 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3908 3909 /* 3910 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3911 * opening anything that's not in our cwd or anything beneath 3912 * it. Thus, explicitly disallow traversing up the file-system 3913 * or using absolute paths. 3914 */ 3915 3916 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3917 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3918 buf->sz = mandoc_asprintf(&cp, 3919 ".sp\nSee the file %s.\n.sp", name) + 1; 3920 free(buf->buf); 3921 buf->buf = cp; 3922 *offs = 0; 3923 return ROFF_REPARSE; 3924 } 3925 3926 *offs = pos; 3927 return ROFF_SO; 3928 } 3929 3930 /* --- user defined strings and macros ------------------------------------ */ 3931 3932 static int 3933 roff_userdef(ROFF_ARGS) 3934 { 3935 struct mctx *ctx; 3936 char *arg, *ap, *dst, *src; 3937 size_t sz; 3938 3939 /* If the macro is empty, ignore it altogether. */ 3940 3941 if (*r->current_string == '\0') 3942 return ROFF_IGN; 3943 3944 /* Initialize a new macro stack context. */ 3945 3946 if (++r->mstackpos == r->mstacksz) { 3947 r->mstack = mandoc_recallocarray(r->mstack, 3948 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3949 r->mstacksz += 8; 3950 } 3951 ctx = r->mstack + r->mstackpos; 3952 ctx->argsz = 0; 3953 ctx->argc = 0; 3954 ctx->argv = NULL; 3955 3956 /* 3957 * Collect pointers to macro argument strings, 3958 * NUL-terminating them and escaping quotes. 3959 */ 3960 3961 src = buf->buf + pos; 3962 while (*src != '\0') { 3963 if (ctx->argc == ctx->argsz) { 3964 ctx->argsz += 8; 3965 ctx->argv = mandoc_reallocarray(ctx->argv, 3966 ctx->argsz, sizeof(*ctx->argv)); 3967 } 3968 arg = roff_getarg(r, &src, ln, &pos); 3969 sz = 1; /* For the terminating NUL. */ 3970 for (ap = arg; *ap != '\0'; ap++) 3971 sz += *ap == '"' ? 4 : 1; 3972 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3973 for (ap = arg; *ap != '\0'; ap++) { 3974 if (*ap == '"') { 3975 memcpy(dst, "\\(dq", 4); 3976 dst += 4; 3977 } else 3978 *dst++ = *ap; 3979 } 3980 *dst = '\0'; 3981 free(arg); 3982 } 3983 3984 /* Replace the macro invocation by the macro definition. */ 3985 3986 free(buf->buf); 3987 buf->buf = mandoc_strdup(r->current_string); 3988 buf->sz = strlen(buf->buf) + 1; 3989 *offs = 0; 3990 3991 return buf->buf[buf->sz - 2] == '\n' ? 3992 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3993 } 3994 3995 /* 3996 * Calling a high-level macro that was renamed with .rn. 3997 * r->current_string has already been set up by roff_parse(). 3998 */ 3999 static int 4000 roff_renamed(ROFF_ARGS) 4001 { 4002 char *nbuf; 4003 4004 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 4005 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 4006 free(buf->buf); 4007 buf->buf = nbuf; 4008 *offs = 0; 4009 return ROFF_CONT; 4010 } 4011 4012 /* 4013 * Measure the length in bytes of the roff identifier at *cpp 4014 * and advance the pointer to the next word. 4015 */ 4016 static size_t 4017 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4018 { 4019 char *name, *cp; 4020 size_t namesz; 4021 4022 name = *cpp; 4023 if (*name == '\0') 4024 return 0; 4025 4026 /* Advance cp to the byte after the end of the name. */ 4027 4028 for (cp = name; 1; cp++) { 4029 namesz = cp - name; 4030 if (*cp == '\0') 4031 break; 4032 if (*cp == ' ' || *cp == '\t') { 4033 cp++; 4034 break; 4035 } 4036 if (*cp != '\\') 4037 continue; 4038 if (cp[1] == '{' || cp[1] == '}') 4039 break; 4040 if (*++cp == '\\') 4041 continue; 4042 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4043 "%.*s", (int)(cp - name + 1), name); 4044 mandoc_escape((const char **)&cp, NULL, NULL); 4045 break; 4046 } 4047 4048 /* Read past spaces. */ 4049 4050 while (*cp == ' ') 4051 cp++; 4052 4053 *cpp = cp; 4054 return namesz; 4055 } 4056 4057 /* 4058 * Store *string into the user-defined string called *name. 4059 * To clear an existing entry, call with (*r, *name, NULL, 0). 4060 * append == 0: replace mode 4061 * append == 1: single-line append mode 4062 * append == 2: multiline append mode, append '\n' after each call 4063 */ 4064 static void 4065 roff_setstr(struct roff *r, const char *name, const char *string, 4066 int append) 4067 { 4068 size_t namesz; 4069 4070 namesz = strlen(name); 4071 roff_setstrn(&r->strtab, name, namesz, string, 4072 string ? strlen(string) : 0, append); 4073 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4074 } 4075 4076 static void 4077 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4078 const char *string, size_t stringsz, int append) 4079 { 4080 struct roffkv *n; 4081 char *c; 4082 int i; 4083 size_t oldch, newch; 4084 4085 /* Search for an existing string with the same name. */ 4086 n = *r; 4087 4088 while (n && (namesz != n->key.sz || 4089 strncmp(n->key.p, name, namesz))) 4090 n = n->next; 4091 4092 if (NULL == n) { 4093 /* Create a new string table entry. */ 4094 n = mandoc_malloc(sizeof(struct roffkv)); 4095 n->key.p = mandoc_strndup(name, namesz); 4096 n->key.sz = namesz; 4097 n->val.p = NULL; 4098 n->val.sz = 0; 4099 n->next = *r; 4100 *r = n; 4101 } else if (0 == append) { 4102 free(n->val.p); 4103 n->val.p = NULL; 4104 n->val.sz = 0; 4105 } 4106 4107 if (NULL == string) 4108 return; 4109 4110 /* 4111 * One additional byte for the '\n' in multiline mode, 4112 * and one for the terminating '\0'. 4113 */ 4114 newch = stringsz + (1 < append ? 2u : 1u); 4115 4116 if (NULL == n->val.p) { 4117 n->val.p = mandoc_malloc(newch); 4118 *n->val.p = '\0'; 4119 oldch = 0; 4120 } else { 4121 oldch = n->val.sz; 4122 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4123 } 4124 4125 /* Skip existing content in the destination buffer. */ 4126 c = n->val.p + (int)oldch; 4127 4128 /* Append new content to the destination buffer. */ 4129 i = 0; 4130 while (i < (int)stringsz) { 4131 /* 4132 * Rudimentary roff copy mode: 4133 * Handle escaped backslashes. 4134 */ 4135 if ('\\' == string[i] && '\\' == string[i + 1]) 4136 i++; 4137 *c++ = string[i++]; 4138 } 4139 4140 /* Append terminating bytes. */ 4141 if (1 < append) 4142 *c++ = '\n'; 4143 4144 *c = '\0'; 4145 n->val.sz = (int)(c - n->val.p); 4146 } 4147 4148 static const char * 4149 roff_getstrn(struct roff *r, const char *name, size_t len, 4150 int *deftype) 4151 { 4152 const struct roffkv *n; 4153 int found, i; 4154 enum roff_tok tok; 4155 4156 found = 0; 4157 for (n = r->strtab; n != NULL; n = n->next) { 4158 if (strncmp(name, n->key.p, len) != 0 || 4159 n->key.p[len] != '\0' || n->val.p == NULL) 4160 continue; 4161 if (*deftype & ROFFDEF_USER) { 4162 *deftype = ROFFDEF_USER; 4163 return n->val.p; 4164 } else { 4165 found = 1; 4166 break; 4167 } 4168 } 4169 for (n = r->rentab; n != NULL; n = n->next) { 4170 if (strncmp(name, n->key.p, len) != 0 || 4171 n->key.p[len] != '\0' || n->val.p == NULL) 4172 continue; 4173 if (*deftype & ROFFDEF_REN) { 4174 *deftype = ROFFDEF_REN; 4175 return n->val.p; 4176 } else { 4177 found = 1; 4178 break; 4179 } 4180 } 4181 for (i = 0; i < PREDEFS_MAX; i++) { 4182 if (strncmp(name, predefs[i].name, len) != 0 || 4183 predefs[i].name[len] != '\0') 4184 continue; 4185 if (*deftype & ROFFDEF_PRE) { 4186 *deftype = ROFFDEF_PRE; 4187 return predefs[i].str; 4188 } else { 4189 found = 1; 4190 break; 4191 } 4192 } 4193 if (r->man->meta.macroset != MACROSET_MAN) { 4194 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4195 if (strncmp(name, roff_name[tok], len) != 0 || 4196 roff_name[tok][len] != '\0') 4197 continue; 4198 if (*deftype & ROFFDEF_STD) { 4199 *deftype = ROFFDEF_STD; 4200 return NULL; 4201 } else { 4202 found = 1; 4203 break; 4204 } 4205 } 4206 } 4207 if (r->man->meta.macroset != MACROSET_MDOC) { 4208 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4209 if (strncmp(name, roff_name[tok], len) != 0 || 4210 roff_name[tok][len] != '\0') 4211 continue; 4212 if (*deftype & ROFFDEF_STD) { 4213 *deftype = ROFFDEF_STD; 4214 return NULL; 4215 } else { 4216 found = 1; 4217 break; 4218 } 4219 } 4220 } 4221 4222 if (found == 0 && *deftype != ROFFDEF_ANY) { 4223 if (*deftype & ROFFDEF_REN) { 4224 /* 4225 * This might still be a request, 4226 * so do not treat it as undefined yet. 4227 */ 4228 *deftype = ROFFDEF_UNDEF; 4229 return NULL; 4230 } 4231 4232 /* Using an undefined string defines it to be empty. */ 4233 4234 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4235 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4236 } 4237 4238 *deftype = 0; 4239 return NULL; 4240 } 4241 4242 static void 4243 roff_freestr(struct roffkv *r) 4244 { 4245 struct roffkv *n, *nn; 4246 4247 for (n = r; n; n = nn) { 4248 free(n->key.p); 4249 free(n->val.p); 4250 nn = n->next; 4251 free(n); 4252 } 4253 } 4254 4255 /* --- accessors and utility functions ------------------------------------ */ 4256 4257 /* 4258 * Duplicate an input string, making the appropriate character 4259 * conversations (as stipulated by `tr') along the way. 4260 * Returns a heap-allocated string with all the replacements made. 4261 */ 4262 char * 4263 roff_strdup(const struct roff *r, const char *p) 4264 { 4265 const struct roffkv *cp; 4266 char *res; 4267 const char *pp; 4268 size_t ssz, sz; 4269 enum mandoc_esc esc; 4270 4271 if (NULL == r->xmbtab && NULL == r->xtab) 4272 return mandoc_strdup(p); 4273 else if ('\0' == *p) 4274 return mandoc_strdup(""); 4275 4276 /* 4277 * Step through each character looking for term matches 4278 * (remember that a `tr' can be invoked with an escape, which is 4279 * a glyph but the escape is multi-character). 4280 * We only do this if the character hash has been initialised 4281 * and the string is >0 length. 4282 */ 4283 4284 res = NULL; 4285 ssz = 0; 4286 4287 while ('\0' != *p) { 4288 assert((unsigned int)*p < 128); 4289 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4290 sz = r->xtab[(int)*p].sz; 4291 res = mandoc_realloc(res, ssz + sz + 1); 4292 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4293 ssz += sz; 4294 p++; 4295 continue; 4296 } else if ('\\' != *p) { 4297 res = mandoc_realloc(res, ssz + 2); 4298 res[ssz++] = *p++; 4299 continue; 4300 } 4301 4302 /* Search for term matches. */ 4303 for (cp = r->xmbtab; cp; cp = cp->next) 4304 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4305 break; 4306 4307 if (NULL != cp) { 4308 /* 4309 * A match has been found. 4310 * Append the match to the array and move 4311 * forward by its keysize. 4312 */ 4313 res = mandoc_realloc(res, 4314 ssz + cp->val.sz + 1); 4315 memcpy(res + ssz, cp->val.p, cp->val.sz); 4316 ssz += cp->val.sz; 4317 p += (int)cp->key.sz; 4318 continue; 4319 } 4320 4321 /* 4322 * Handle escapes carefully: we need to copy 4323 * over just the escape itself, or else we might 4324 * do replacements within the escape itself. 4325 * Make sure to pass along the bogus string. 4326 */ 4327 pp = p++; 4328 esc = mandoc_escape(&p, NULL, NULL); 4329 if (ESCAPE_ERROR == esc) { 4330 sz = strlen(pp); 4331 res = mandoc_realloc(res, ssz + sz + 1); 4332 memcpy(res + ssz, pp, sz); 4333 break; 4334 } 4335 /* 4336 * We bail out on bad escapes. 4337 * No need to warn: we already did so when 4338 * roff_expand() was called. 4339 */ 4340 sz = (int)(p - pp); 4341 res = mandoc_realloc(res, ssz + sz + 1); 4342 memcpy(res + ssz, pp, sz); 4343 ssz += sz; 4344 } 4345 4346 res[(int)ssz] = '\0'; 4347 return res; 4348 } 4349 4350 int 4351 roff_getformat(const struct roff *r) 4352 { 4353 4354 return r->format; 4355 } 4356 4357 /* 4358 * Find out whether a line is a macro line or not. 4359 * If it is, adjust the current position and return one; if it isn't, 4360 * return zero and don't change the current position. 4361 * If the control character has been set with `.cc', then let that grain 4362 * precedence. 4363 * This is slighly contrary to groff, where using the non-breaking 4364 * control character when `cc' has been invoked will cause the 4365 * non-breaking macro contents to be printed verbatim. 4366 */ 4367 int 4368 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4369 { 4370 int pos; 4371 4372 pos = *ppos; 4373 4374 if (r->control != '\0' && cp[pos] == r->control) 4375 pos++; 4376 else if (r->control != '\0') 4377 return 0; 4378 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4379 pos += 2; 4380 else if ('.' == cp[pos] || '\'' == cp[pos]) 4381 pos++; 4382 else 4383 return 0; 4384 4385 while (' ' == cp[pos] || '\t' == cp[pos]) 4386 pos++; 4387 4388 *ppos = pos; 4389 return 1; 4390 } 4391