1 /* $Id: roff.c,v 1.329 2018/08/01 15:40:17 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc.h" 32 #include "mandoc_aux.h" 33 #include "mandoc_ohash.h" 34 #include "roff.h" 35 #include "libmandoc.h" 36 #include "roff_int.h" 37 #include "libroff.h" 38 39 /* Maximum number of string expansions per line, to break infinite loops. */ 40 #define EXPAND_LIMIT 1000 41 42 /* Types of definitions of macros and strings. */ 43 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 44 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 45 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 46 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 47 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 48 ROFFDEF_REN | ROFFDEF_STD) 49 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 50 51 /* --- data types --------------------------------------------------------- */ 52 53 /* 54 * An incredibly-simple string buffer. 55 */ 56 struct roffstr { 57 char *p; /* nil-terminated buffer */ 58 size_t sz; /* saved strlen(p) */ 59 }; 60 61 /* 62 * A key-value roffstr pair as part of a singly-linked list. 63 */ 64 struct roffkv { 65 struct roffstr key; 66 struct roffstr val; 67 struct roffkv *next; /* next in list */ 68 }; 69 70 /* 71 * A single number register as part of a singly-linked list. 72 */ 73 struct roffreg { 74 struct roffstr key; 75 int val; 76 int step; 77 struct roffreg *next; 78 }; 79 80 /* 81 * Association of request and macro names with token IDs. 82 */ 83 struct roffreq { 84 enum roff_tok tok; 85 char name[]; 86 }; 87 88 struct roff { 89 struct mparse *parse; /* parse point */ 90 struct roff_man *man; /* mdoc or man parser */ 91 struct roffnode *last; /* leaf of stack */ 92 int *rstack; /* stack of inverted `ie' values */ 93 struct ohash *reqtab; /* request lookup table */ 94 struct roffreg *regtab; /* number registers */ 95 struct roffkv *strtab; /* user-defined strings & macros */ 96 struct roffkv *rentab; /* renamed strings & macros */ 97 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 98 struct roffstr *xtab; /* single-byte trans table (`tr') */ 99 const char *current_string; /* value of last called user macro */ 100 struct tbl_node *first_tbl; /* first table parsed */ 101 struct tbl_node *last_tbl; /* last table parsed */ 102 struct tbl_node *tbl; /* current table being parsed */ 103 struct eqn_node *last_eqn; /* equation parser */ 104 struct eqn_node *eqn; /* active equation parser */ 105 int eqn_inline; /* current equation is inline */ 106 int options; /* parse options */ 107 int rstacksz; /* current size limit of rstack */ 108 int rstackpos; /* position in rstack */ 109 int format; /* current file in mdoc or man format */ 110 int argc; /* number of args of the last macro */ 111 char control; /* control character */ 112 char escape; /* escape character */ 113 }; 114 115 struct roffnode { 116 enum roff_tok tok; /* type of node */ 117 struct roffnode *parent; /* up one in stack */ 118 int line; /* parse line */ 119 int col; /* parse col */ 120 char *name; /* node name, e.g. macro name */ 121 char *end; /* end-rules: custom token */ 122 int endspan; /* end-rules: next-line or infty */ 123 int rule; /* current evaluation rule */ 124 }; 125 126 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 127 enum roff_tok tok, /* tok of macro */ \ 128 struct buf *buf, /* input buffer */ \ 129 int ln, /* parse line */ \ 130 int ppos, /* original pos in buffer */ \ 131 int pos, /* current pos in buffer */ \ 132 int *offs /* reset offset of buffer data */ 133 134 typedef enum rofferr (*roffproc)(ROFF_ARGS); 135 136 struct roffmac { 137 roffproc proc; /* process new macro */ 138 roffproc text; /* process as child text of macro */ 139 roffproc sub; /* process as child of macro */ 140 int flags; 141 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 142 }; 143 144 struct predef { 145 const char *name; /* predefined input name */ 146 const char *str; /* replacement symbol */ 147 }; 148 149 #define PREDEF(__name, __str) \ 150 { (__name), (__str) }, 151 152 /* --- function prototypes ------------------------------------------------ */ 153 154 static void roffnode_cleanscope(struct roff *); 155 static void roffnode_pop(struct roff *); 156 static void roffnode_push(struct roff *, enum roff_tok, 157 const char *, int, int); 158 static void roff_addtbl(struct roff_man *, struct tbl_node *); 159 static enum rofferr roff_als(ROFF_ARGS); 160 static enum rofferr roff_block(ROFF_ARGS); 161 static enum rofferr roff_block_text(ROFF_ARGS); 162 static enum rofferr roff_block_sub(ROFF_ARGS); 163 static enum rofferr roff_br(ROFF_ARGS); 164 static enum rofferr roff_cblock(ROFF_ARGS); 165 static enum rofferr roff_cc(ROFF_ARGS); 166 static void roff_ccond(struct roff *, int, int); 167 static enum rofferr roff_cond(ROFF_ARGS); 168 static enum rofferr roff_cond_text(ROFF_ARGS); 169 static enum rofferr roff_cond_sub(ROFF_ARGS); 170 static enum rofferr roff_ds(ROFF_ARGS); 171 static enum rofferr roff_ec(ROFF_ARGS); 172 static enum rofferr roff_eo(ROFF_ARGS); 173 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); 174 static int roff_evalcond(struct roff *r, int, char *, int *); 175 static int roff_evalnum(struct roff *, int, 176 const char *, int *, int *, int); 177 static int roff_evalpar(struct roff *, int, 178 const char *, int *, int *, int); 179 static int roff_evalstrcond(const char *, int *); 180 static void roff_free1(struct roff *); 181 static void roff_freereg(struct roffreg *); 182 static void roff_freestr(struct roffkv *); 183 static size_t roff_getname(struct roff *, char **, int, int); 184 static int roff_getnum(const char *, int *, int *, int); 185 static int roff_getop(const char *, int *, char *); 186 static int roff_getregn(struct roff *, 187 const char *, size_t, char); 188 static int roff_getregro(const struct roff *, 189 const char *name); 190 static const char *roff_getstrn(struct roff *, 191 const char *, size_t, int *); 192 static int roff_hasregn(const struct roff *, 193 const char *, size_t); 194 static enum rofferr roff_insec(ROFF_ARGS); 195 static enum rofferr roff_it(ROFF_ARGS); 196 static enum rofferr roff_line_ignore(ROFF_ARGS); 197 static void roff_man_alloc1(struct roff_man *); 198 static void roff_man_free1(struct roff_man *); 199 static enum rofferr roff_manyarg(ROFF_ARGS); 200 static enum rofferr roff_nr(ROFF_ARGS); 201 static enum rofferr roff_onearg(ROFF_ARGS); 202 static enum roff_tok roff_parse(struct roff *, char *, int *, 203 int, int); 204 static enum rofferr roff_parsetext(struct roff *, struct buf *, 205 int, int *); 206 static enum rofferr roff_renamed(ROFF_ARGS); 207 static enum rofferr roff_res(struct roff *, struct buf *, int, int); 208 static enum rofferr roff_rm(ROFF_ARGS); 209 static enum rofferr roff_rn(ROFF_ARGS); 210 static enum rofferr roff_rr(ROFF_ARGS); 211 static void roff_setregn(struct roff *, const char *, 212 size_t, int, char, int); 213 static void roff_setstr(struct roff *, 214 const char *, const char *, int); 215 static void roff_setstrn(struct roffkv **, const char *, 216 size_t, const char *, size_t, int); 217 static enum rofferr roff_so(ROFF_ARGS); 218 static enum rofferr roff_tr(ROFF_ARGS); 219 static enum rofferr roff_Dd(ROFF_ARGS); 220 static enum rofferr roff_TE(ROFF_ARGS); 221 static enum rofferr roff_TS(ROFF_ARGS); 222 static enum rofferr roff_EQ(ROFF_ARGS); 223 static enum rofferr roff_EN(ROFF_ARGS); 224 static enum rofferr roff_T_(ROFF_ARGS); 225 static enum rofferr roff_unsupp(ROFF_ARGS); 226 static enum rofferr roff_userdef(ROFF_ARGS); 227 228 /* --- constant data ------------------------------------------------------ */ 229 230 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 231 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 232 233 const char *__roff_name[MAN_MAX + 1] = { 234 "br", "ce", "ft", "ll", 235 "mc", "po", "rj", "sp", 236 "ta", "ti", NULL, 237 "ab", "ad", "af", "aln", 238 "als", "am", "am1", "ami", 239 "ami1", "as", "as1", "asciify", 240 "backtrace", "bd", "bleedat", "blm", 241 "box", "boxa", "bp", "BP", 242 "break", "breakchar", "brnl", "brp", 243 "brpnl", "c2", "cc", 244 "cf", "cflags", "ch", "char", 245 "chop", "class", "close", "CL", 246 "color", "composite", "continue", "cp", 247 "cropat", "cs", "cu", "da", 248 "dch", "Dd", "de", "de1", 249 "defcolor", "dei", "dei1", "device", 250 "devicem", "di", "do", "ds", 251 "ds1", "dwh", "dt", "ec", 252 "ecr", "ecs", "el", "em", 253 "EN", "eo", "EP", "EQ", 254 "errprint", "ev", "evc", "ex", 255 "fallback", "fam", "fc", "fchar", 256 "fcolor", "fdeferlig", "feature", "fkern", 257 "fl", "flig", "fp", "fps", 258 "fschar", "fspacewidth", "fspecial", "ftr", 259 "fzoom", "gcolor", "hc", "hcode", 260 "hidechar", "hla", "hlm", "hpf", 261 "hpfa", "hpfcode", "hw", "hy", 262 "hylang", "hylen", "hym", "hypp", 263 "hys", "ie", "if", "ig", 264 "index", "it", "itc", "IX", 265 "kern", "kernafter", "kernbefore", "kernpair", 266 "lc", "lc_ctype", "lds", "length", 267 "letadj", "lf", "lg", "lhang", 268 "linetabs", "lnr", "lnrf", "lpfx", 269 "ls", "lsm", "lt", 270 "mediasize", "minss", "mk", "mso", 271 "na", "ne", "nh", "nhychar", 272 "nm", "nn", "nop", "nr", 273 "nrf", "nroff", "ns", "nx", 274 "open", "opena", "os", "output", 275 "padj", "papersize", "pc", "pev", 276 "pi", "PI", "pl", "pm", 277 "pn", "pnr", "ps", 278 "psbb", "pshape", "pso", "ptr", 279 "pvs", "rchar", "rd", "recursionlimit", 280 "return", "rfschar", "rhang", 281 "rm", "rn", "rnn", "rr", 282 "rs", "rt", "schar", "sentchar", 283 "shc", "shift", "sizes", "so", 284 "spacewidth", "special", "spreadwarn", "ss", 285 "sty", "substring", "sv", "sy", 286 "T&", "tc", "TE", 287 "TH", "tkf", "tl", 288 "tm", "tm1", "tmc", "tr", 289 "track", "transchar", "trf", "trimat", 290 "trin", "trnt", "troff", "TS", 291 "uf", "ul", "unformat", "unwatch", 292 "unwatchn", "vpt", "vs", "warn", 293 "warnscale", "watch", "watchlength", "watchn", 294 "wh", "while", "write", "writec", 295 "writem", "xflag", ".", NULL, 296 NULL, "text", 297 "Dd", "Dt", "Os", "Sh", 298 "Ss", "Pp", "D1", "Dl", 299 "Bd", "Ed", "Bl", "El", 300 "It", "Ad", "An", "Ap", 301 "Ar", "Cd", "Cm", "Dv", 302 "Er", "Ev", "Ex", "Fa", 303 "Fd", "Fl", "Fn", "Ft", 304 "Ic", "In", "Li", "Nd", 305 "Nm", "Op", "Ot", "Pa", 306 "Rv", "St", "Va", "Vt", 307 "Xr", "%A", "%B", "%D", 308 "%I", "%J", "%N", "%O", 309 "%P", "%R", "%T", "%V", 310 "Ac", "Ao", "Aq", "At", 311 "Bc", "Bf", "Bo", "Bq", 312 "Bsx", "Bx", "Db", "Dc", 313 "Do", "Dq", "Ec", "Ef", 314 "Em", "Eo", "Fx", "Ms", 315 "No", "Ns", "Nx", "Ox", 316 "Pc", "Pf", "Po", "Pq", 317 "Qc", "Ql", "Qo", "Qq", 318 "Re", "Rs", "Sc", "So", 319 "Sq", "Sm", "Sx", "Sy", 320 "Tn", "Ux", "Xc", "Xo", 321 "Fo", "Fc", "Oo", "Oc", 322 "Bk", "Ek", "Bt", "Hf", 323 "Fr", "Ud", "Lb", "Lp", 324 "Lk", "Mt", "Brq", "Bro", 325 "Brc", "%C", "Es", "En", 326 "Dx", "%Q", "%U", "Ta", 327 NULL, 328 "TH", "SH", "SS", "TP", 329 "LP", "PP", "P", "IP", 330 "HP", "SM", "SB", "BI", 331 "IB", "BR", "RB", "R", 332 "B", "I", "IR", "RI", 333 "nf", "fi", 334 "RE", "RS", "DT", "UC", 335 "PD", "AT", "in", 336 "OP", "EX", "EE", "UR", 337 "UE", "MT", "ME", NULL 338 }; 339 const char *const *roff_name = __roff_name; 340 341 static struct roffmac roffs[TOKEN_NONE] = { 342 { roff_br, NULL, NULL, 0 }, /* br */ 343 { roff_onearg, NULL, NULL, 0 }, /* ce */ 344 { roff_onearg, NULL, NULL, 0 }, /* ft */ 345 { roff_onearg, NULL, NULL, 0 }, /* ll */ 346 { roff_onearg, NULL, NULL, 0 }, /* mc */ 347 { roff_onearg, NULL, NULL, 0 }, /* po */ 348 { roff_onearg, NULL, NULL, 0 }, /* rj */ 349 { roff_onearg, NULL, NULL, 0 }, /* sp */ 350 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 351 { roff_onearg, NULL, NULL, 0 }, /* ti */ 352 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 353 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 354 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 355 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 356 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 357 { roff_als, NULL, NULL, 0 }, /* als */ 358 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 359 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 360 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 361 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 362 { roff_ds, NULL, NULL, 0 }, /* as */ 363 { roff_ds, NULL, NULL, 0 }, /* as1 */ 364 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 365 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 366 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 367 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 368 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 369 { roff_unsupp, NULL, NULL, 0 }, /* box */ 370 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 371 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 372 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 373 { roff_unsupp, NULL, NULL, 0 }, /* break */ 374 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 375 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 376 { roff_br, NULL, NULL, 0 }, /* brp */ 377 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 378 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 379 { roff_cc, NULL, NULL, 0 }, /* cc */ 380 { roff_insec, NULL, NULL, 0 }, /* cf */ 381 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 382 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 383 { roff_unsupp, NULL, NULL, 0 }, /* char */ 384 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 385 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 386 { roff_insec, NULL, NULL, 0 }, /* close */ 387 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 388 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 389 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 390 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 391 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 392 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 393 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 394 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 395 { roff_unsupp, NULL, NULL, 0 }, /* da */ 396 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 397 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 399 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 401 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 402 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 403 { roff_unsupp, NULL, NULL, 0 }, /* device */ 404 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 405 { roff_unsupp, NULL, NULL, 0 }, /* di */ 406 { roff_unsupp, NULL, NULL, 0 }, /* do */ 407 { roff_ds, NULL, NULL, 0 }, /* ds */ 408 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 409 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 410 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 411 { roff_ec, NULL, NULL, 0 }, /* ec */ 412 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 413 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 414 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 415 { roff_unsupp, NULL, NULL, 0 }, /* em */ 416 { roff_EN, NULL, NULL, 0 }, /* EN */ 417 { roff_eo, NULL, NULL, 0 }, /* eo */ 418 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 419 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 420 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 421 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 422 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 423 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 426 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 427 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 431 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 432 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 433 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 434 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 435 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 436 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 437 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 438 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 439 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 440 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 441 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 442 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 443 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 444 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 445 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 446 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 447 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 448 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 449 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 450 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 451 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 452 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 453 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 454 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 455 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 457 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 458 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 459 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 460 { roff_unsupp, NULL, NULL, 0 }, /* index */ 461 { roff_it, NULL, NULL, 0 }, /* it */ 462 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 468 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 469 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 470 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 471 { roff_unsupp, NULL, NULL, 0 }, /* length */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 473 { roff_insec, NULL, NULL, 0 }, /* lf */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 476 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 477 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 478 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 479 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 481 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 486 { roff_insec, NULL, NULL, 0 }, /* mso */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 491 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 492 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 493 { roff_unsupp, NULL, NULL, 0 }, /* nop */ 494 { roff_nr, NULL, NULL, 0 }, /* nr */ 495 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 496 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 498 { roff_insec, NULL, NULL, 0 }, /* nx */ 499 { roff_insec, NULL, NULL, 0 }, /* open */ 500 { roff_insec, NULL, NULL, 0 }, /* opena */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 502 { roff_unsupp, NULL, NULL, 0 }, /* output */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 505 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 507 { roff_insec, NULL, NULL, 0 }, /* pi */ 508 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 514 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 515 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 516 { roff_insec, NULL, NULL, 0 }, /* pso */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 519 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 522 { roff_unsupp, NULL, NULL, 0 }, /* return */ 523 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 525 { roff_rm, NULL, NULL, 0 }, /* rm */ 526 { roff_rn, NULL, NULL, 0 }, /* rn */ 527 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 528 { roff_rr, NULL, NULL, 0 }, /* rr */ 529 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 530 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 531 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 534 { roff_unsupp, NULL, NULL, 0 }, /* shift */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 536 { roff_so, NULL, NULL, 0 }, /* so */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 542 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 544 { roff_insec, NULL, NULL, 0 }, /* sy */ 545 { roff_T_, NULL, NULL, 0 }, /* T& */ 546 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 547 { roff_TE, NULL, NULL, 0 }, /* TE */ 548 { roff_Dd, NULL, NULL, 0 }, /* TH */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 550 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 554 { roff_tr, NULL, NULL, 0 }, /* tr */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 557 { roff_insec, NULL, NULL, 0 }, /* trf */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 559 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 560 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 561 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 562 { roff_TS, NULL, NULL, 0 }, /* TS */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 565 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 575 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 576 { roff_unsupp, NULL, NULL, 0 }, /* while */ 577 { roff_insec, NULL, NULL, 0 }, /* write */ 578 { roff_insec, NULL, NULL, 0 }, /* writec */ 579 { roff_insec, NULL, NULL, 0 }, /* writem */ 580 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 581 { roff_cblock, NULL, NULL, 0 }, /* . */ 582 { roff_renamed, NULL, NULL, 0 }, 583 { roff_userdef, NULL, NULL, 0 } 584 }; 585 586 /* Array of injected predefined strings. */ 587 #define PREDEFS_MAX 38 588 static const struct predef predefs[PREDEFS_MAX] = { 589 #include "predefs.in" 590 }; 591 592 static int roffce_lines; /* number of input lines to center */ 593 static struct roff_node *roffce_node; /* active request */ 594 static int roffit_lines; /* number of lines to delay */ 595 static char *roffit_macro; /* nil-terminated macro line */ 596 597 598 /* --- request table ------------------------------------------------------ */ 599 600 struct ohash * 601 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 602 { 603 struct ohash *htab; 604 struct roffreq *req; 605 enum roff_tok tok; 606 size_t sz; 607 unsigned int slot; 608 609 htab = mandoc_malloc(sizeof(*htab)); 610 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 611 612 for (tok = mintok; tok < maxtok; tok++) { 613 if (roff_name[tok] == NULL) 614 continue; 615 sz = strlen(roff_name[tok]); 616 req = mandoc_malloc(sizeof(*req) + sz + 1); 617 req->tok = tok; 618 memcpy(req->name, roff_name[tok], sz + 1); 619 slot = ohash_qlookup(htab, req->name); 620 ohash_insert(htab, slot, req); 621 } 622 return htab; 623 } 624 625 void 626 roffhash_free(struct ohash *htab) 627 { 628 struct roffreq *req; 629 unsigned int slot; 630 631 if (htab == NULL) 632 return; 633 for (req = ohash_first(htab, &slot); req != NULL; 634 req = ohash_next(htab, &slot)) 635 free(req); 636 ohash_delete(htab); 637 free(htab); 638 } 639 640 enum roff_tok 641 roffhash_find(struct ohash *htab, const char *name, size_t sz) 642 { 643 struct roffreq *req; 644 const char *end; 645 646 if (sz) { 647 end = name + sz; 648 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 649 } else 650 req = ohash_find(htab, ohash_qlookup(htab, name)); 651 return req == NULL ? TOKEN_NONE : req->tok; 652 } 653 654 /* --- stack of request blocks -------------------------------------------- */ 655 656 /* 657 * Pop the current node off of the stack of roff instructions currently 658 * pending. 659 */ 660 static void 661 roffnode_pop(struct roff *r) 662 { 663 struct roffnode *p; 664 665 assert(r->last); 666 p = r->last; 667 668 r->last = r->last->parent; 669 free(p->name); 670 free(p->end); 671 free(p); 672 } 673 674 /* 675 * Push a roff node onto the instruction stack. This must later be 676 * removed with roffnode_pop(). 677 */ 678 static void 679 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 680 int line, int col) 681 { 682 struct roffnode *p; 683 684 p = mandoc_calloc(1, sizeof(struct roffnode)); 685 p->tok = tok; 686 if (name) 687 p->name = mandoc_strdup(name); 688 p->parent = r->last; 689 p->line = line; 690 p->col = col; 691 p->rule = p->parent ? p->parent->rule : 0; 692 693 r->last = p; 694 } 695 696 /* --- roff parser state data management ---------------------------------- */ 697 698 static void 699 roff_free1(struct roff *r) 700 { 701 struct tbl_node *tbl; 702 int i; 703 704 while (NULL != (tbl = r->first_tbl)) { 705 r->first_tbl = tbl->next; 706 tbl_free(tbl); 707 } 708 r->first_tbl = r->last_tbl = r->tbl = NULL; 709 710 if (r->last_eqn != NULL) 711 eqn_free(r->last_eqn); 712 r->last_eqn = r->eqn = NULL; 713 714 while (r->last) 715 roffnode_pop(r); 716 717 free (r->rstack); 718 r->rstack = NULL; 719 r->rstacksz = 0; 720 r->rstackpos = -1; 721 722 roff_freereg(r->regtab); 723 r->regtab = NULL; 724 725 roff_freestr(r->strtab); 726 roff_freestr(r->rentab); 727 roff_freestr(r->xmbtab); 728 r->strtab = r->rentab = r->xmbtab = NULL; 729 730 if (r->xtab) 731 for (i = 0; i < 128; i++) 732 free(r->xtab[i].p); 733 free(r->xtab); 734 r->xtab = NULL; 735 } 736 737 void 738 roff_reset(struct roff *r) 739 { 740 roff_free1(r); 741 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 742 r->control = '\0'; 743 r->escape = '\\'; 744 roffce_lines = 0; 745 roffce_node = NULL; 746 roffit_lines = 0; 747 roffit_macro = NULL; 748 } 749 750 void 751 roff_free(struct roff *r) 752 { 753 roff_free1(r); 754 roffhash_free(r->reqtab); 755 free(r); 756 } 757 758 struct roff * 759 roff_alloc(struct mparse *parse, int options) 760 { 761 struct roff *r; 762 763 r = mandoc_calloc(1, sizeof(struct roff)); 764 r->parse = parse; 765 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 766 r->options = options; 767 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 768 r->rstackpos = -1; 769 r->escape = '\\'; 770 return r; 771 } 772 773 /* --- syntax tree state data management ---------------------------------- */ 774 775 static void 776 roff_man_free1(struct roff_man *man) 777 { 778 779 if (man->first != NULL) 780 roff_node_delete(man, man->first); 781 free(man->meta.msec); 782 free(man->meta.vol); 783 free(man->meta.os); 784 free(man->meta.arch); 785 free(man->meta.title); 786 free(man->meta.name); 787 free(man->meta.date); 788 } 789 790 static void 791 roff_man_alloc1(struct roff_man *man) 792 { 793 794 memset(&man->meta, 0, sizeof(man->meta)); 795 man->first = mandoc_calloc(1, sizeof(*man->first)); 796 man->first->type = ROFFT_ROOT; 797 man->last = man->first; 798 man->last_es = NULL; 799 man->flags = 0; 800 man->macroset = MACROSET_NONE; 801 man->lastsec = man->lastnamed = SEC_NONE; 802 man->next = ROFF_NEXT_CHILD; 803 } 804 805 void 806 roff_man_reset(struct roff_man *man) 807 { 808 809 roff_man_free1(man); 810 roff_man_alloc1(man); 811 } 812 813 void 814 roff_man_free(struct roff_man *man) 815 { 816 817 roff_man_free1(man); 818 free(man); 819 } 820 821 struct roff_man * 822 roff_man_alloc(struct roff *roff, struct mparse *parse, 823 const char *os_s, int quick) 824 { 825 struct roff_man *man; 826 827 man = mandoc_calloc(1, sizeof(*man)); 828 man->parse = parse; 829 man->roff = roff; 830 man->os_s = os_s; 831 man->quick = quick; 832 roff_man_alloc1(man); 833 roff->man = man; 834 return man; 835 } 836 837 /* --- syntax tree handling ----------------------------------------------- */ 838 839 struct roff_node * 840 roff_node_alloc(struct roff_man *man, int line, int pos, 841 enum roff_type type, int tok) 842 { 843 struct roff_node *n; 844 845 n = mandoc_calloc(1, sizeof(*n)); 846 n->line = line; 847 n->pos = pos; 848 n->tok = tok; 849 n->type = type; 850 n->sec = man->lastsec; 851 852 if (man->flags & MDOC_SYNOPSIS) 853 n->flags |= NODE_SYNPRETTY; 854 else 855 n->flags &= ~NODE_SYNPRETTY; 856 if (man->flags & MDOC_NEWLINE) 857 n->flags |= NODE_LINE; 858 man->flags &= ~MDOC_NEWLINE; 859 860 return n; 861 } 862 863 void 864 roff_node_append(struct roff_man *man, struct roff_node *n) 865 { 866 867 switch (man->next) { 868 case ROFF_NEXT_SIBLING: 869 if (man->last->next != NULL) { 870 n->next = man->last->next; 871 man->last->next->prev = n; 872 } else 873 man->last->parent->last = n; 874 man->last->next = n; 875 n->prev = man->last; 876 n->parent = man->last->parent; 877 break; 878 case ROFF_NEXT_CHILD: 879 if (man->last->child != NULL) { 880 n->next = man->last->child; 881 man->last->child->prev = n; 882 } else 883 man->last->last = n; 884 man->last->child = n; 885 n->parent = man->last; 886 break; 887 default: 888 abort(); 889 } 890 man->last = n; 891 892 switch (n->type) { 893 case ROFFT_HEAD: 894 n->parent->head = n; 895 break; 896 case ROFFT_BODY: 897 if (n->end != ENDBODY_NOT) 898 return; 899 n->parent->body = n; 900 break; 901 case ROFFT_TAIL: 902 n->parent->tail = n; 903 break; 904 default: 905 return; 906 } 907 908 /* 909 * Copy over the normalised-data pointer of our parent. Not 910 * everybody has one, but copying a null pointer is fine. 911 */ 912 913 n->norm = n->parent->norm; 914 assert(n->parent->type == ROFFT_BLOCK); 915 } 916 917 void 918 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 919 { 920 struct roff_node *n; 921 922 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 923 n->string = roff_strdup(man->roff, word); 924 roff_node_append(man, n); 925 n->flags |= NODE_VALID | NODE_ENDED; 926 man->next = ROFF_NEXT_SIBLING; 927 } 928 929 void 930 roff_word_append(struct roff_man *man, const char *word) 931 { 932 struct roff_node *n; 933 char *addstr, *newstr; 934 935 n = man->last; 936 addstr = roff_strdup(man->roff, word); 937 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 938 free(addstr); 939 free(n->string); 940 n->string = newstr; 941 man->next = ROFF_NEXT_SIBLING; 942 } 943 944 void 945 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 946 { 947 struct roff_node *n; 948 949 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 950 roff_node_append(man, n); 951 man->next = ROFF_NEXT_CHILD; 952 } 953 954 struct roff_node * 955 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 956 { 957 struct roff_node *n; 958 959 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 960 roff_node_append(man, n); 961 man->next = ROFF_NEXT_CHILD; 962 return n; 963 } 964 965 struct roff_node * 966 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 967 { 968 struct roff_node *n; 969 970 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 971 roff_node_append(man, n); 972 man->next = ROFF_NEXT_CHILD; 973 return n; 974 } 975 976 struct roff_node * 977 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 978 { 979 struct roff_node *n; 980 981 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 982 roff_node_append(man, n); 983 man->next = ROFF_NEXT_CHILD; 984 return n; 985 } 986 987 static void 988 roff_addtbl(struct roff_man *man, struct tbl_node *tbl) 989 { 990 struct roff_node *n; 991 const struct tbl_span *span; 992 993 if (man->macroset == MACROSET_MAN) 994 man_breakscope(man, ROFF_TS); 995 while ((span = tbl_span(tbl)) != NULL) { 996 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); 997 n->span = span; 998 roff_node_append(man, n); 999 n->flags |= NODE_VALID | NODE_ENDED; 1000 man->next = ROFF_NEXT_SIBLING; 1001 } 1002 } 1003 1004 void 1005 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1006 { 1007 1008 /* Adjust siblings. */ 1009 1010 if (n->prev) 1011 n->prev->next = n->next; 1012 if (n->next) 1013 n->next->prev = n->prev; 1014 1015 /* Adjust parent. */ 1016 1017 if (n->parent != NULL) { 1018 if (n->parent->child == n) 1019 n->parent->child = n->next; 1020 if (n->parent->last == n) 1021 n->parent->last = n->prev; 1022 } 1023 1024 /* Adjust parse point. */ 1025 1026 if (man == NULL) 1027 return; 1028 if (man->last == n) { 1029 if (n->prev == NULL) { 1030 man->last = n->parent; 1031 man->next = ROFF_NEXT_CHILD; 1032 } else { 1033 man->last = n->prev; 1034 man->next = ROFF_NEXT_SIBLING; 1035 } 1036 } 1037 if (man->first == n) 1038 man->first = NULL; 1039 } 1040 1041 void 1042 roff_node_free(struct roff_node *n) 1043 { 1044 1045 if (n->args != NULL) 1046 mdoc_argv_free(n->args); 1047 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1048 free(n->norm); 1049 if (n->eqn != NULL) 1050 eqn_box_free(n->eqn); 1051 free(n->string); 1052 free(n); 1053 } 1054 1055 void 1056 roff_node_delete(struct roff_man *man, struct roff_node *n) 1057 { 1058 1059 while (n->child != NULL) 1060 roff_node_delete(man, n->child); 1061 roff_node_unlink(man, n); 1062 roff_node_free(n); 1063 } 1064 1065 void 1066 deroff(char **dest, const struct roff_node *n) 1067 { 1068 char *cp; 1069 size_t sz; 1070 1071 if (n->type != ROFFT_TEXT) { 1072 for (n = n->child; n != NULL; n = n->next) 1073 deroff(dest, n); 1074 return; 1075 } 1076 1077 /* Skip leading whitespace. */ 1078 1079 for (cp = n->string; *cp != '\0'; cp++) { 1080 if (cp[0] == '\\' && cp[1] != '\0' && 1081 strchr(" %&0^|~", cp[1]) != NULL) 1082 cp++; 1083 else if ( ! isspace((unsigned char)*cp)) 1084 break; 1085 } 1086 1087 /* Skip trailing backslash. */ 1088 1089 sz = strlen(cp); 1090 if (sz > 0 && cp[sz - 1] == '\\') 1091 sz--; 1092 1093 /* Skip trailing whitespace. */ 1094 1095 for (; sz; sz--) 1096 if ( ! isspace((unsigned char)cp[sz-1])) 1097 break; 1098 1099 /* Skip empty strings. */ 1100 1101 if (sz == 0) 1102 return; 1103 1104 if (*dest == NULL) { 1105 *dest = mandoc_strndup(cp, sz); 1106 return; 1107 } 1108 1109 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1110 free(*dest); 1111 *dest = cp; 1112 } 1113 1114 /* --- main functions of the roff parser ---------------------------------- */ 1115 1116 /* 1117 * In the current line, expand escape sequences that tend to get 1118 * used in numerical expressions and conditional requests. 1119 * Also check the syntax of the remaining escape sequences. 1120 */ 1121 static enum rofferr 1122 roff_res(struct roff *r, struct buf *buf, int ln, int pos) 1123 { 1124 char ubuf[24]; /* buffer to print the number */ 1125 struct roff_node *n; /* used for header comments */ 1126 const char *start; /* start of the string to process */ 1127 char *stesc; /* start of an escape sequence ('\\') */ 1128 char *ep; /* end of comment string */ 1129 const char *stnam; /* start of the name, after "[(*" */ 1130 const char *cp; /* end of the name, e.g. before ']' */ 1131 const char *res; /* the string to be substituted */ 1132 char *nbuf; /* new buffer to copy buf->buf to */ 1133 size_t maxl; /* expected length of the escape name */ 1134 size_t naml; /* actual length of the escape name */ 1135 enum mandoc_esc esc; /* type of the escape sequence */ 1136 int inaml; /* length returned from mandoc_escape() */ 1137 int expand_count; /* to avoid infinite loops */ 1138 int npos; /* position in numeric expression */ 1139 int arg_complete; /* argument not interrupted by eol */ 1140 int done; /* no more input available */ 1141 int deftype; /* type of definition to paste */ 1142 int rcsid; /* kind of RCS id seen */ 1143 char sign; /* increment number register */ 1144 char term; /* character terminating the escape */ 1145 1146 /* Search forward for comments. */ 1147 1148 done = 0; 1149 start = buf->buf + pos; 1150 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1151 if (stesc[0] != r->escape || stesc[1] == '\0') 1152 continue; 1153 stesc++; 1154 if (*stesc != '"' && *stesc != '#') 1155 continue; 1156 1157 /* Comment found, look for RCS id. */ 1158 1159 rcsid = 0; 1160 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1161 rcsid = 1 << MANDOC_OS_OPENBSD; 1162 cp += 8; 1163 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1164 rcsid = 1 << MANDOC_OS_NETBSD; 1165 cp += 7; 1166 } 1167 if (cp != NULL && 1168 isalnum((unsigned char)*cp) == 0 && 1169 strchr(cp, '$') != NULL) { 1170 if (r->man->meta.rcsids & rcsid) 1171 mandoc_msg(MANDOCERR_RCS_REP, r->parse, 1172 ln, stesc + 1 - buf->buf, stesc + 1); 1173 r->man->meta.rcsids |= rcsid; 1174 } 1175 1176 /* Handle trailing whitespace. */ 1177 1178 ep = strchr(stesc--, '\0') - 1; 1179 if (*ep == '\n') { 1180 done = 1; 1181 ep--; 1182 } 1183 if (*ep == ' ' || *ep == '\t') 1184 mandoc_msg(MANDOCERR_SPACE_EOL, r->parse, 1185 ln, ep - buf->buf, NULL); 1186 1187 /* 1188 * Save comments preceding the title macro 1189 * in the syntax tree. 1190 */ 1191 1192 if (r->format == 0) { 1193 while (*ep == ' ' || *ep == '\t') 1194 ep--; 1195 ep[1] = '\0'; 1196 n = roff_node_alloc(r->man, 1197 ln, stesc + 1 - buf->buf, 1198 ROFFT_COMMENT, TOKEN_NONE); 1199 n->string = mandoc_strdup(stesc + 2); 1200 roff_node_append(r->man, n); 1201 n->flags |= NODE_VALID | NODE_ENDED; 1202 r->man->next = ROFF_NEXT_SIBLING; 1203 } 1204 1205 /* Discard comments. */ 1206 1207 while (stesc > start && stesc[-1] == ' ') 1208 stesc--; 1209 *stesc = '\0'; 1210 break; 1211 } 1212 if (stesc == start) 1213 return ROFF_CONT; 1214 stesc--; 1215 1216 /* Notice the end of the input. */ 1217 1218 if (*stesc == '\n') { 1219 *stesc-- = '\0'; 1220 done = 1; 1221 } 1222 1223 expand_count = 0; 1224 while (stesc >= start) { 1225 1226 /* Search backwards for the next backslash. */ 1227 1228 if (*stesc != r->escape) { 1229 if (*stesc == '\\') { 1230 *stesc = '\0'; 1231 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1232 buf->buf, stesc + 1) + 1; 1233 start = nbuf + pos; 1234 stesc = nbuf + (stesc - buf->buf); 1235 free(buf->buf); 1236 buf->buf = nbuf; 1237 } 1238 stesc--; 1239 continue; 1240 } 1241 1242 /* If it is escaped, skip it. */ 1243 1244 for (cp = stesc - 1; cp >= start; cp--) 1245 if (*cp != r->escape) 1246 break; 1247 1248 if ((stesc - cp) % 2 == 0) { 1249 while (stesc > cp) 1250 *stesc-- = '\\'; 1251 continue; 1252 } else if (stesc[1] != '\0') { 1253 *stesc = '\\'; 1254 } else { 1255 *stesc-- = '\0'; 1256 if (done) 1257 continue; 1258 else 1259 return ROFF_APPEND; 1260 } 1261 1262 /* Decide whether to expand or to check only. */ 1263 1264 term = '\0'; 1265 cp = stesc + 1; 1266 switch (*cp) { 1267 case '*': 1268 res = NULL; 1269 break; 1270 case 'B': 1271 case 'w': 1272 term = cp[1]; 1273 /* FALLTHROUGH */ 1274 case 'n': 1275 sign = cp[1]; 1276 if (sign == '+' || sign == '-') 1277 cp++; 1278 res = ubuf; 1279 break; 1280 default: 1281 esc = mandoc_escape(&cp, &stnam, &inaml); 1282 if (esc == ESCAPE_ERROR || 1283 (esc == ESCAPE_SPECIAL && 1284 mchars_spec2cp(stnam, inaml) < 0)) 1285 mandoc_vmsg(MANDOCERR_ESC_BAD, 1286 r->parse, ln, (int)(stesc - buf->buf), 1287 "%.*s", (int)(cp - stesc), stesc); 1288 stesc--; 1289 continue; 1290 } 1291 1292 if (EXPAND_LIMIT < ++expand_count) { 1293 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1294 ln, (int)(stesc - buf->buf), NULL); 1295 return ROFF_IGN; 1296 } 1297 1298 /* 1299 * The third character decides the length 1300 * of the name of the string or register. 1301 * Save a pointer to the name. 1302 */ 1303 1304 if (term == '\0') { 1305 switch (*++cp) { 1306 case '\0': 1307 maxl = 0; 1308 break; 1309 case '(': 1310 cp++; 1311 maxl = 2; 1312 break; 1313 case '[': 1314 cp++; 1315 term = ']'; 1316 maxl = 0; 1317 break; 1318 default: 1319 maxl = 1; 1320 break; 1321 } 1322 } else { 1323 cp += 2; 1324 maxl = 0; 1325 } 1326 stnam = cp; 1327 1328 /* Advance to the end of the name. */ 1329 1330 naml = 0; 1331 arg_complete = 1; 1332 while (maxl == 0 || naml < maxl) { 1333 if (*cp == '\0') { 1334 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 1335 ln, (int)(stesc - buf->buf), stesc); 1336 arg_complete = 0; 1337 break; 1338 } 1339 if (maxl == 0 && *cp == term) { 1340 cp++; 1341 break; 1342 } 1343 if (*cp++ != '\\' || stesc[1] != 'w') { 1344 naml++; 1345 continue; 1346 } 1347 switch (mandoc_escape(&cp, NULL, NULL)) { 1348 case ESCAPE_SPECIAL: 1349 case ESCAPE_UNICODE: 1350 case ESCAPE_NUMBERED: 1351 case ESCAPE_OVERSTRIKE: 1352 naml++; 1353 break; 1354 default: 1355 break; 1356 } 1357 } 1358 1359 /* 1360 * Retrieve the replacement string; if it is 1361 * undefined, resume searching for escapes. 1362 */ 1363 1364 switch (stesc[1]) { 1365 case '*': 1366 if (arg_complete) { 1367 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1368 res = roff_getstrn(r, stnam, naml, &deftype); 1369 } 1370 break; 1371 case 'B': 1372 npos = 0; 1373 ubuf[0] = arg_complete && 1374 roff_evalnum(r, ln, stnam, &npos, 1375 NULL, ROFFNUM_SCALE) && 1376 stnam + npos + 1 == cp ? '1' : '0'; 1377 ubuf[1] = '\0'; 1378 break; 1379 case 'n': 1380 if (arg_complete) 1381 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1382 roff_getregn(r, stnam, naml, sign)); 1383 else 1384 ubuf[0] = '\0'; 1385 break; 1386 case 'w': 1387 /* use even incomplete args */ 1388 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1389 24 * (int)naml); 1390 break; 1391 } 1392 1393 if (res == NULL) { 1394 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1395 r->parse, ln, (int)(stesc - buf->buf), 1396 "%.*s", (int)naml, stnam); 1397 res = ""; 1398 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1399 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1400 ln, (int)(stesc - buf->buf), NULL); 1401 return ROFF_IGN; 1402 } 1403 1404 /* Replace the escape sequence by the string. */ 1405 1406 *stesc = '\0'; 1407 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1408 buf->buf, res, cp) + 1; 1409 1410 /* Prepare for the next replacement. */ 1411 1412 start = nbuf + pos; 1413 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1414 free(buf->buf); 1415 buf->buf = nbuf; 1416 } 1417 return ROFF_CONT; 1418 } 1419 1420 /* 1421 * Process text streams. 1422 */ 1423 static enum rofferr 1424 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1425 { 1426 size_t sz; 1427 const char *start; 1428 char *p; 1429 int isz; 1430 enum mandoc_esc esc; 1431 1432 /* Spring the input line trap. */ 1433 1434 if (roffit_lines == 1) { 1435 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1436 free(buf->buf); 1437 buf->buf = p; 1438 buf->sz = isz + 1; 1439 *offs = 0; 1440 free(roffit_macro); 1441 roffit_lines = 0; 1442 return ROFF_REPARSE; 1443 } else if (roffit_lines > 1) 1444 --roffit_lines; 1445 1446 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1447 if (roffce_lines < 1) { 1448 r->man->last = roffce_node; 1449 r->man->next = ROFF_NEXT_SIBLING; 1450 roffce_lines = 0; 1451 roffce_node = NULL; 1452 } else 1453 roffce_lines--; 1454 } 1455 1456 /* Convert all breakable hyphens into ASCII_HYPH. */ 1457 1458 start = p = buf->buf + pos; 1459 1460 while (*p != '\0') { 1461 sz = strcspn(p, "-\\"); 1462 p += sz; 1463 1464 if (*p == '\0') 1465 break; 1466 1467 if (*p == '\\') { 1468 /* Skip over escapes. */ 1469 p++; 1470 esc = mandoc_escape((const char **)&p, NULL, NULL); 1471 if (esc == ESCAPE_ERROR) 1472 break; 1473 while (*p == '-') 1474 p++; 1475 continue; 1476 } else if (p == start) { 1477 p++; 1478 continue; 1479 } 1480 1481 if (isalpha((unsigned char)p[-1]) && 1482 isalpha((unsigned char)p[1])) 1483 *p = ASCII_HYPH; 1484 p++; 1485 } 1486 return ROFF_CONT; 1487 } 1488 1489 enum rofferr 1490 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1491 { 1492 enum roff_tok t; 1493 enum rofferr e; 1494 int pos; /* parse point */ 1495 int spos; /* saved parse point for messages */ 1496 int ppos; /* original offset in buf->buf */ 1497 int ctl; /* macro line (boolean) */ 1498 1499 ppos = pos = *offs; 1500 1501 /* Handle in-line equation delimiters. */ 1502 1503 if (r->tbl == NULL && 1504 r->last_eqn != NULL && r->last_eqn->delim && 1505 (r->eqn == NULL || r->eqn_inline)) { 1506 e = roff_eqndelim(r, buf, pos); 1507 if (e == ROFF_REPARSE) 1508 return e; 1509 assert(e == ROFF_CONT); 1510 } 1511 1512 /* Expand some escape sequences. */ 1513 1514 e = roff_res(r, buf, ln, pos); 1515 if (e == ROFF_IGN || e == ROFF_APPEND) 1516 return e; 1517 assert(e == ROFF_CONT); 1518 1519 ctl = roff_getcontrol(r, buf->buf, &pos); 1520 1521 /* 1522 * First, if a scope is open and we're not a macro, pass the 1523 * text through the macro's filter. 1524 * Equations process all content themselves. 1525 * Tables process almost all content themselves, but we want 1526 * to warn about macros before passing it there. 1527 */ 1528 1529 if (r->last != NULL && ! ctl) { 1530 t = r->last->tok; 1531 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1532 if (e == ROFF_IGN) 1533 return e; 1534 assert(e == ROFF_CONT); 1535 } 1536 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1537 eqn_read(r->eqn, buf->buf + ppos); 1538 return ROFF_IGN; 1539 } 1540 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1541 tbl_read(r->tbl, ln, buf->buf, ppos); 1542 roff_addtbl(r->man, r->tbl); 1543 return ROFF_IGN; 1544 } 1545 if ( ! ctl) 1546 return roff_parsetext(r, buf, pos, offs); 1547 1548 /* Skip empty request lines. */ 1549 1550 if (buf->buf[pos] == '"') { 1551 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse, 1552 ln, pos, NULL); 1553 return ROFF_IGN; 1554 } else if (buf->buf[pos] == '\0') 1555 return ROFF_IGN; 1556 1557 /* 1558 * If a scope is open, go to the child handler for that macro, 1559 * as it may want to preprocess before doing anything with it. 1560 * Don't do so if an equation is open. 1561 */ 1562 1563 if (r->last) { 1564 t = r->last->tok; 1565 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1566 } 1567 1568 /* No scope is open. This is a new request or macro. */ 1569 1570 spos = pos; 1571 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1572 1573 /* Tables ignore most macros. */ 1574 1575 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1576 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1577 mandoc_msg(MANDOCERR_TBLMACRO, r->parse, 1578 ln, pos, buf->buf + spos); 1579 if (t != TOKEN_NONE) 1580 return ROFF_IGN; 1581 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1582 pos++; 1583 while (buf->buf[pos] == ' ') 1584 pos++; 1585 tbl_read(r->tbl, ln, buf->buf, pos); 1586 roff_addtbl(r->man, r->tbl); 1587 return ROFF_IGN; 1588 } 1589 1590 /* For now, let high level macros abort .ce mode. */ 1591 1592 if (ctl && roffce_node != NULL && 1593 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1594 t == ROFF_TH || t == ROFF_TS)) { 1595 r->man->last = roffce_node; 1596 r->man->next = ROFF_NEXT_SIBLING; 1597 roffce_lines = 0; 1598 roffce_node = NULL; 1599 } 1600 1601 /* 1602 * This is neither a roff request nor a user-defined macro. 1603 * Let the standard macro set parsers handle it. 1604 */ 1605 1606 if (t == TOKEN_NONE) 1607 return ROFF_CONT; 1608 1609 /* Execute a roff request or a user defined macro. */ 1610 1611 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1612 } 1613 1614 void 1615 roff_endparse(struct roff *r) 1616 { 1617 if (r->last != NULL) 1618 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1619 r->last->line, r->last->col, 1620 roff_name[r->last->tok]); 1621 1622 if (r->eqn != NULL) { 1623 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1624 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1625 eqn_parse(r->eqn); 1626 r->eqn = NULL; 1627 } 1628 1629 if (r->tbl != NULL) { 1630 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1631 r->tbl->line, r->tbl->pos, "TS"); 1632 tbl_end(r->tbl); 1633 r->tbl = NULL; 1634 } 1635 } 1636 1637 /* 1638 * Parse a roff node's type from the input buffer. This must be in the 1639 * form of ".foo xxx" in the usual way. 1640 */ 1641 static enum roff_tok 1642 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1643 { 1644 char *cp; 1645 const char *mac; 1646 size_t maclen; 1647 int deftype; 1648 enum roff_tok t; 1649 1650 cp = buf + *pos; 1651 1652 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1653 return TOKEN_NONE; 1654 1655 mac = cp; 1656 maclen = roff_getname(r, &cp, ln, ppos); 1657 1658 deftype = ROFFDEF_USER | ROFFDEF_REN; 1659 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1660 switch (deftype) { 1661 case ROFFDEF_USER: 1662 t = ROFF_USERDEF; 1663 break; 1664 case ROFFDEF_REN: 1665 t = ROFF_RENAMED; 1666 break; 1667 default: 1668 t = roffhash_find(r->reqtab, mac, maclen); 1669 break; 1670 } 1671 if (t != TOKEN_NONE) 1672 *pos = cp - buf; 1673 else if (deftype == ROFFDEF_UNDEF) { 1674 /* Using an undefined macro defines it to be empty. */ 1675 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1676 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1677 } 1678 return t; 1679 } 1680 1681 /* --- handling of request blocks ----------------------------------------- */ 1682 1683 static enum rofferr 1684 roff_cblock(ROFF_ARGS) 1685 { 1686 1687 /* 1688 * A block-close `..' should only be invoked as a child of an 1689 * ignore macro, otherwise raise a warning and just ignore it. 1690 */ 1691 1692 if (r->last == NULL) { 1693 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1694 ln, ppos, ".."); 1695 return ROFF_IGN; 1696 } 1697 1698 switch (r->last->tok) { 1699 case ROFF_am: 1700 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1701 case ROFF_ami: 1702 case ROFF_de: 1703 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1704 case ROFF_dei: 1705 case ROFF_ig: 1706 break; 1707 default: 1708 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1709 ln, ppos, ".."); 1710 return ROFF_IGN; 1711 } 1712 1713 if (buf->buf[pos] != '\0') 1714 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 1715 ".. %s", buf->buf + pos); 1716 1717 roffnode_pop(r); 1718 roffnode_cleanscope(r); 1719 return ROFF_IGN; 1720 1721 } 1722 1723 static void 1724 roffnode_cleanscope(struct roff *r) 1725 { 1726 1727 while (r->last) { 1728 if (--r->last->endspan != 0) 1729 break; 1730 roffnode_pop(r); 1731 } 1732 } 1733 1734 static void 1735 roff_ccond(struct roff *r, int ln, int ppos) 1736 { 1737 1738 if (NULL == r->last) { 1739 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1740 ln, ppos, "\\}"); 1741 return; 1742 } 1743 1744 switch (r->last->tok) { 1745 case ROFF_el: 1746 case ROFF_ie: 1747 case ROFF_if: 1748 break; 1749 default: 1750 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1751 ln, ppos, "\\}"); 1752 return; 1753 } 1754 1755 if (r->last->endspan > -1) { 1756 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1757 ln, ppos, "\\}"); 1758 return; 1759 } 1760 1761 roffnode_pop(r); 1762 roffnode_cleanscope(r); 1763 return; 1764 } 1765 1766 static enum rofferr 1767 roff_block(ROFF_ARGS) 1768 { 1769 const char *name, *value; 1770 char *call, *cp, *iname, *rname; 1771 size_t csz, namesz, rsz; 1772 int deftype; 1773 1774 /* Ignore groff compatibility mode for now. */ 1775 1776 if (tok == ROFF_de1) 1777 tok = ROFF_de; 1778 else if (tok == ROFF_dei1) 1779 tok = ROFF_dei; 1780 else if (tok == ROFF_am1) 1781 tok = ROFF_am; 1782 else if (tok == ROFF_ami1) 1783 tok = ROFF_ami; 1784 1785 /* Parse the macro name argument. */ 1786 1787 cp = buf->buf + pos; 1788 if (tok == ROFF_ig) { 1789 iname = NULL; 1790 namesz = 0; 1791 } else { 1792 iname = cp; 1793 namesz = roff_getname(r, &cp, ln, ppos); 1794 iname[namesz] = '\0'; 1795 } 1796 1797 /* Resolve the macro name argument if it is indirect. */ 1798 1799 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1800 deftype = ROFFDEF_USER; 1801 name = roff_getstrn(r, iname, namesz, &deftype); 1802 if (name == NULL) { 1803 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1804 r->parse, ln, (int)(iname - buf->buf), 1805 "%.*s", (int)namesz, iname); 1806 namesz = 0; 1807 } else 1808 namesz = strlen(name); 1809 } else 1810 name = iname; 1811 1812 if (namesz == 0 && tok != ROFF_ig) { 1813 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, 1814 ln, ppos, roff_name[tok]); 1815 return ROFF_IGN; 1816 } 1817 1818 roffnode_push(r, tok, name, ln, ppos); 1819 1820 /* 1821 * At the beginning of a `de' macro, clear the existing string 1822 * with the same name, if there is one. New content will be 1823 * appended from roff_block_text() in multiline mode. 1824 */ 1825 1826 if (tok == ROFF_de || tok == ROFF_dei) { 1827 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 1828 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 1829 } else if (tok == ROFF_am || tok == ROFF_ami) { 1830 deftype = ROFFDEF_ANY; 1831 value = roff_getstrn(r, iname, namesz, &deftype); 1832 switch (deftype) { /* Before appending, ... */ 1833 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 1834 roff_setstrn(&r->strtab, name, namesz, 1835 value, strlen(value), 0); 1836 break; 1837 case ROFFDEF_REN: /* call original standard macro. */ 1838 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 1839 (int)strlen(value), value); 1840 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 1841 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 1842 free(call); 1843 break; 1844 case ROFFDEF_STD: /* rename and call standard macro. */ 1845 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 1846 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 1847 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 1848 (int)rsz, rname); 1849 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 1850 free(call); 1851 free(rname); 1852 break; 1853 default: 1854 break; 1855 } 1856 } 1857 1858 if (*cp == '\0') 1859 return ROFF_IGN; 1860 1861 /* Get the custom end marker. */ 1862 1863 iname = cp; 1864 namesz = roff_getname(r, &cp, ln, ppos); 1865 1866 /* Resolve the end marker if it is indirect. */ 1867 1868 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1869 deftype = ROFFDEF_USER; 1870 name = roff_getstrn(r, iname, namesz, &deftype); 1871 if (name == NULL) { 1872 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1873 r->parse, ln, (int)(iname - buf->buf), 1874 "%.*s", (int)namesz, iname); 1875 namesz = 0; 1876 } else 1877 namesz = strlen(name); 1878 } else 1879 name = iname; 1880 1881 if (namesz) 1882 r->last->end = mandoc_strndup(name, namesz); 1883 1884 if (*cp != '\0') 1885 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 1886 ln, pos, ".%s ... %s", roff_name[tok], cp); 1887 1888 return ROFF_IGN; 1889 } 1890 1891 static enum rofferr 1892 roff_block_sub(ROFF_ARGS) 1893 { 1894 enum roff_tok t; 1895 int i, j; 1896 1897 /* 1898 * First check whether a custom macro exists at this level. If 1899 * it does, then check against it. This is some of groff's 1900 * stranger behaviours. If we encountered a custom end-scope 1901 * tag and that tag also happens to be a "real" macro, then we 1902 * need to try interpreting it again as a real macro. If it's 1903 * not, then return ignore. Else continue. 1904 */ 1905 1906 if (r->last->end) { 1907 for (i = pos, j = 0; r->last->end[j]; j++, i++) 1908 if (buf->buf[i] != r->last->end[j]) 1909 break; 1910 1911 if (r->last->end[j] == '\0' && 1912 (buf->buf[i] == '\0' || 1913 buf->buf[i] == ' ' || 1914 buf->buf[i] == '\t')) { 1915 roffnode_pop(r); 1916 roffnode_cleanscope(r); 1917 1918 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 1919 i++; 1920 1921 pos = i; 1922 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 1923 TOKEN_NONE) 1924 return ROFF_RERUN; 1925 return ROFF_IGN; 1926 } 1927 } 1928 1929 /* 1930 * If we have no custom end-query or lookup failed, then try 1931 * pulling it out of the hashtable. 1932 */ 1933 1934 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1935 1936 if (t != ROFF_cblock) { 1937 if (tok != ROFF_ig) 1938 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 1939 return ROFF_IGN; 1940 } 1941 1942 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 1943 } 1944 1945 static enum rofferr 1946 roff_block_text(ROFF_ARGS) 1947 { 1948 1949 if (tok != ROFF_ig) 1950 roff_setstr(r, r->last->name, buf->buf + pos, 2); 1951 1952 return ROFF_IGN; 1953 } 1954 1955 static enum rofferr 1956 roff_cond_sub(ROFF_ARGS) 1957 { 1958 enum roff_tok t; 1959 char *ep; 1960 int rr; 1961 1962 rr = r->last->rule; 1963 roffnode_cleanscope(r); 1964 1965 /* 1966 * If `\}' occurs on a macro line without a preceding macro, 1967 * drop the line completely. 1968 */ 1969 1970 ep = buf->buf + pos; 1971 if (ep[0] == '\\' && ep[1] == '}') 1972 rr = 0; 1973 1974 /* Always check for the closing delimiter `\}'. */ 1975 1976 while ((ep = strchr(ep, '\\')) != NULL) { 1977 switch (ep[1]) { 1978 case '}': 1979 memmove(ep, ep + 2, strlen(ep + 2) + 1); 1980 roff_ccond(r, ln, ep - buf->buf); 1981 break; 1982 case '\0': 1983 ++ep; 1984 break; 1985 default: 1986 ep += 2; 1987 break; 1988 } 1989 } 1990 1991 /* 1992 * Fully handle known macros when they are structurally 1993 * required or when the conditional evaluated to true. 1994 */ 1995 1996 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1997 return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) 1998 ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr 1999 ? ROFF_CONT : ROFF_IGN; 2000 } 2001 2002 static enum rofferr 2003 roff_cond_text(ROFF_ARGS) 2004 { 2005 char *ep; 2006 int rr; 2007 2008 rr = r->last->rule; 2009 roffnode_cleanscope(r); 2010 2011 ep = buf->buf + pos; 2012 while ((ep = strchr(ep, '\\')) != NULL) { 2013 if (*(++ep) == '}') { 2014 *ep = '&'; 2015 roff_ccond(r, ln, ep - buf->buf - 1); 2016 } 2017 if (*ep != '\0') 2018 ++ep; 2019 } 2020 return rr ? ROFF_CONT : ROFF_IGN; 2021 } 2022 2023 /* --- handling of numeric and conditional expressions -------------------- */ 2024 2025 /* 2026 * Parse a single signed integer number. Stop at the first non-digit. 2027 * If there is at least one digit, return success and advance the 2028 * parse point, else return failure and let the parse point unchanged. 2029 * Ignore overflows, treat them just like the C language. 2030 */ 2031 static int 2032 roff_getnum(const char *v, int *pos, int *res, int flags) 2033 { 2034 int myres, scaled, n, p; 2035 2036 if (NULL == res) 2037 res = &myres; 2038 2039 p = *pos; 2040 n = v[p] == '-'; 2041 if (n || v[p] == '+') 2042 p++; 2043 2044 if (flags & ROFFNUM_WHITE) 2045 while (isspace((unsigned char)v[p])) 2046 p++; 2047 2048 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2049 *res = 10 * *res + v[p] - '0'; 2050 if (p == *pos + n) 2051 return 0; 2052 2053 if (n) 2054 *res = -*res; 2055 2056 /* Each number may be followed by one optional scaling unit. */ 2057 2058 switch (v[p]) { 2059 case 'f': 2060 scaled = *res * 65536; 2061 break; 2062 case 'i': 2063 scaled = *res * 240; 2064 break; 2065 case 'c': 2066 scaled = *res * 240 / 2.54; 2067 break; 2068 case 'v': 2069 case 'P': 2070 scaled = *res * 40; 2071 break; 2072 case 'm': 2073 case 'n': 2074 scaled = *res * 24; 2075 break; 2076 case 'p': 2077 scaled = *res * 10 / 3; 2078 break; 2079 case 'u': 2080 scaled = *res; 2081 break; 2082 case 'M': 2083 scaled = *res * 6 / 25; 2084 break; 2085 default: 2086 scaled = *res; 2087 p--; 2088 break; 2089 } 2090 if (flags & ROFFNUM_SCALE) 2091 *res = scaled; 2092 2093 *pos = p + 1; 2094 return 1; 2095 } 2096 2097 /* 2098 * Evaluate a string comparison condition. 2099 * The first character is the delimiter. 2100 * Succeed if the string up to its second occurrence 2101 * matches the string up to its third occurence. 2102 * Advance the cursor after the third occurrence 2103 * or lacking that, to the end of the line. 2104 */ 2105 static int 2106 roff_evalstrcond(const char *v, int *pos) 2107 { 2108 const char *s1, *s2, *s3; 2109 int match; 2110 2111 match = 0; 2112 s1 = v + *pos; /* initial delimiter */ 2113 s2 = s1 + 1; /* for scanning the first string */ 2114 s3 = strchr(s2, *s1); /* for scanning the second string */ 2115 2116 if (NULL == s3) /* found no middle delimiter */ 2117 goto out; 2118 2119 while ('\0' != *++s3) { 2120 if (*s2 != *s3) { /* mismatch */ 2121 s3 = strchr(s3, *s1); 2122 break; 2123 } 2124 if (*s3 == *s1) { /* found the final delimiter */ 2125 match = 1; 2126 break; 2127 } 2128 s2++; 2129 } 2130 2131 out: 2132 if (NULL == s3) 2133 s3 = strchr(s2, '\0'); 2134 else if (*s3 != '\0') 2135 s3++; 2136 *pos = s3 - v; 2137 return match; 2138 } 2139 2140 /* 2141 * Evaluate an optionally negated single character, numerical, 2142 * or string condition. 2143 */ 2144 static int 2145 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2146 { 2147 char *cp, *name; 2148 size_t sz; 2149 int deftype, number, savepos, istrue, wanttrue; 2150 2151 if ('!' == v[*pos]) { 2152 wanttrue = 0; 2153 (*pos)++; 2154 } else 2155 wanttrue = 1; 2156 2157 switch (v[*pos]) { 2158 case '\0': 2159 return 0; 2160 case 'n': 2161 case 'o': 2162 (*pos)++; 2163 return wanttrue; 2164 case 'c': 2165 case 'e': 2166 case 't': 2167 case 'v': 2168 (*pos)++; 2169 return !wanttrue; 2170 case 'd': 2171 case 'r': 2172 cp = v + *pos + 1; 2173 while (*cp == ' ') 2174 cp++; 2175 name = cp; 2176 sz = roff_getname(r, &cp, ln, cp - v); 2177 if (sz == 0) 2178 istrue = 0; 2179 else if (v[*pos] == 'r') 2180 istrue = roff_hasregn(r, name, sz); 2181 else { 2182 deftype = ROFFDEF_ANY; 2183 roff_getstrn(r, name, sz, &deftype); 2184 istrue = !!deftype; 2185 } 2186 *pos = cp - v; 2187 return istrue == wanttrue; 2188 default: 2189 break; 2190 } 2191 2192 savepos = *pos; 2193 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2194 return (number > 0) == wanttrue; 2195 else if (*pos == savepos) 2196 return roff_evalstrcond(v, pos) == wanttrue; 2197 else 2198 return 0; 2199 } 2200 2201 static enum rofferr 2202 roff_line_ignore(ROFF_ARGS) 2203 { 2204 2205 return ROFF_IGN; 2206 } 2207 2208 static enum rofferr 2209 roff_insec(ROFF_ARGS) 2210 { 2211 2212 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, 2213 ln, ppos, roff_name[tok]); 2214 return ROFF_IGN; 2215 } 2216 2217 static enum rofferr 2218 roff_unsupp(ROFF_ARGS) 2219 { 2220 2221 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, 2222 ln, ppos, roff_name[tok]); 2223 return ROFF_IGN; 2224 } 2225 2226 static enum rofferr 2227 roff_cond(ROFF_ARGS) 2228 { 2229 2230 roffnode_push(r, tok, NULL, ln, ppos); 2231 2232 /* 2233 * An `.el' has no conditional body: it will consume the value 2234 * of the current rstack entry set in prior `ie' calls or 2235 * defaults to DENY. 2236 * 2237 * If we're not an `el', however, then evaluate the conditional. 2238 */ 2239 2240 r->last->rule = tok == ROFF_el ? 2241 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2242 roff_evalcond(r, ln, buf->buf, &pos); 2243 2244 /* 2245 * An if-else will put the NEGATION of the current evaluated 2246 * conditional into the stack of rules. 2247 */ 2248 2249 if (tok == ROFF_ie) { 2250 if (r->rstackpos + 1 == r->rstacksz) { 2251 r->rstacksz += 16; 2252 r->rstack = mandoc_reallocarray(r->rstack, 2253 r->rstacksz, sizeof(int)); 2254 } 2255 r->rstack[++r->rstackpos] = !r->last->rule; 2256 } 2257 2258 /* If the parent has false as its rule, then so do we. */ 2259 2260 if (r->last->parent && !r->last->parent->rule) 2261 r->last->rule = 0; 2262 2263 /* 2264 * Determine scope. 2265 * If there is nothing on the line after the conditional, 2266 * not even whitespace, use next-line scope. 2267 */ 2268 2269 if (buf->buf[pos] == '\0') { 2270 r->last->endspan = 2; 2271 goto out; 2272 } 2273 2274 while (buf->buf[pos] == ' ') 2275 pos++; 2276 2277 /* An opening brace requests multiline scope. */ 2278 2279 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2280 r->last->endspan = -1; 2281 pos += 2; 2282 while (buf->buf[pos] == ' ') 2283 pos++; 2284 goto out; 2285 } 2286 2287 /* 2288 * Anything else following the conditional causes 2289 * single-line scope. Warn if the scope contains 2290 * nothing but trailing whitespace. 2291 */ 2292 2293 if (buf->buf[pos] == '\0') 2294 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, 2295 ln, ppos, roff_name[tok]); 2296 2297 r->last->endspan = 1; 2298 2299 out: 2300 *offs = pos; 2301 return ROFF_RERUN; 2302 } 2303 2304 static enum rofferr 2305 roff_ds(ROFF_ARGS) 2306 { 2307 char *string; 2308 const char *name; 2309 size_t namesz; 2310 2311 /* Ignore groff compatibility mode for now. */ 2312 2313 if (tok == ROFF_ds1) 2314 tok = ROFF_ds; 2315 else if (tok == ROFF_as1) 2316 tok = ROFF_as; 2317 2318 /* 2319 * The first word is the name of the string. 2320 * If it is empty or terminated by an escape sequence, 2321 * abort the `ds' request without defining anything. 2322 */ 2323 2324 name = string = buf->buf + pos; 2325 if (*name == '\0') 2326 return ROFF_IGN; 2327 2328 namesz = roff_getname(r, &string, ln, pos); 2329 if (name[namesz] == '\\') 2330 return ROFF_IGN; 2331 2332 /* Read past the initial double-quote, if any. */ 2333 if (*string == '"') 2334 string++; 2335 2336 /* The rest is the value. */ 2337 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2338 ROFF_as == tok); 2339 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2340 return ROFF_IGN; 2341 } 2342 2343 /* 2344 * Parse a single operator, one or two characters long. 2345 * If the operator is recognized, return success and advance the 2346 * parse point, else return failure and let the parse point unchanged. 2347 */ 2348 static int 2349 roff_getop(const char *v, int *pos, char *res) 2350 { 2351 2352 *res = v[*pos]; 2353 2354 switch (*res) { 2355 case '+': 2356 case '-': 2357 case '*': 2358 case '/': 2359 case '%': 2360 case '&': 2361 case ':': 2362 break; 2363 case '<': 2364 switch (v[*pos + 1]) { 2365 case '=': 2366 *res = 'l'; 2367 (*pos)++; 2368 break; 2369 case '>': 2370 *res = '!'; 2371 (*pos)++; 2372 break; 2373 case '?': 2374 *res = 'i'; 2375 (*pos)++; 2376 break; 2377 default: 2378 break; 2379 } 2380 break; 2381 case '>': 2382 switch (v[*pos + 1]) { 2383 case '=': 2384 *res = 'g'; 2385 (*pos)++; 2386 break; 2387 case '?': 2388 *res = 'a'; 2389 (*pos)++; 2390 break; 2391 default: 2392 break; 2393 } 2394 break; 2395 case '=': 2396 if ('=' == v[*pos + 1]) 2397 (*pos)++; 2398 break; 2399 default: 2400 return 0; 2401 } 2402 (*pos)++; 2403 2404 return *res; 2405 } 2406 2407 /* 2408 * Evaluate either a parenthesized numeric expression 2409 * or a single signed integer number. 2410 */ 2411 static int 2412 roff_evalpar(struct roff *r, int ln, 2413 const char *v, int *pos, int *res, int flags) 2414 { 2415 2416 if ('(' != v[*pos]) 2417 return roff_getnum(v, pos, res, flags); 2418 2419 (*pos)++; 2420 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2421 return 0; 2422 2423 /* 2424 * Omission of the closing parenthesis 2425 * is an error in validation mode, 2426 * but ignored in evaluation mode. 2427 */ 2428 2429 if (')' == v[*pos]) 2430 (*pos)++; 2431 else if (NULL == res) 2432 return 0; 2433 2434 return 1; 2435 } 2436 2437 /* 2438 * Evaluate a complete numeric expression. 2439 * Proceed left to right, there is no concept of precedence. 2440 */ 2441 static int 2442 roff_evalnum(struct roff *r, int ln, const char *v, 2443 int *pos, int *res, int flags) 2444 { 2445 int mypos, operand2; 2446 char operator; 2447 2448 if (NULL == pos) { 2449 mypos = 0; 2450 pos = &mypos; 2451 } 2452 2453 if (flags & ROFFNUM_WHITE) 2454 while (isspace((unsigned char)v[*pos])) 2455 (*pos)++; 2456 2457 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2458 return 0; 2459 2460 while (1) { 2461 if (flags & ROFFNUM_WHITE) 2462 while (isspace((unsigned char)v[*pos])) 2463 (*pos)++; 2464 2465 if ( ! roff_getop(v, pos, &operator)) 2466 break; 2467 2468 if (flags & ROFFNUM_WHITE) 2469 while (isspace((unsigned char)v[*pos])) 2470 (*pos)++; 2471 2472 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2473 return 0; 2474 2475 if (flags & ROFFNUM_WHITE) 2476 while (isspace((unsigned char)v[*pos])) 2477 (*pos)++; 2478 2479 if (NULL == res) 2480 continue; 2481 2482 switch (operator) { 2483 case '+': 2484 *res += operand2; 2485 break; 2486 case '-': 2487 *res -= operand2; 2488 break; 2489 case '*': 2490 *res *= operand2; 2491 break; 2492 case '/': 2493 if (operand2 == 0) { 2494 mandoc_msg(MANDOCERR_DIVZERO, 2495 r->parse, ln, *pos, v); 2496 *res = 0; 2497 break; 2498 } 2499 *res /= operand2; 2500 break; 2501 case '%': 2502 if (operand2 == 0) { 2503 mandoc_msg(MANDOCERR_DIVZERO, 2504 r->parse, ln, *pos, v); 2505 *res = 0; 2506 break; 2507 } 2508 *res %= operand2; 2509 break; 2510 case '<': 2511 *res = *res < operand2; 2512 break; 2513 case '>': 2514 *res = *res > operand2; 2515 break; 2516 case 'l': 2517 *res = *res <= operand2; 2518 break; 2519 case 'g': 2520 *res = *res >= operand2; 2521 break; 2522 case '=': 2523 *res = *res == operand2; 2524 break; 2525 case '!': 2526 *res = *res != operand2; 2527 break; 2528 case '&': 2529 *res = *res && operand2; 2530 break; 2531 case ':': 2532 *res = *res || operand2; 2533 break; 2534 case 'i': 2535 if (operand2 < *res) 2536 *res = operand2; 2537 break; 2538 case 'a': 2539 if (operand2 > *res) 2540 *res = operand2; 2541 break; 2542 default: 2543 abort(); 2544 } 2545 } 2546 return 1; 2547 } 2548 2549 /* --- register management ------------------------------------------------ */ 2550 2551 void 2552 roff_setreg(struct roff *r, const char *name, int val, char sign) 2553 { 2554 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2555 } 2556 2557 static void 2558 roff_setregn(struct roff *r, const char *name, size_t len, 2559 int val, char sign, int step) 2560 { 2561 struct roffreg *reg; 2562 2563 /* Search for an existing register with the same name. */ 2564 reg = r->regtab; 2565 2566 while (reg != NULL && (reg->key.sz != len || 2567 strncmp(reg->key.p, name, len) != 0)) 2568 reg = reg->next; 2569 2570 if (NULL == reg) { 2571 /* Create a new register. */ 2572 reg = mandoc_malloc(sizeof(struct roffreg)); 2573 reg->key.p = mandoc_strndup(name, len); 2574 reg->key.sz = len; 2575 reg->val = 0; 2576 reg->step = 0; 2577 reg->next = r->regtab; 2578 r->regtab = reg; 2579 } 2580 2581 if ('+' == sign) 2582 reg->val += val; 2583 else if ('-' == sign) 2584 reg->val -= val; 2585 else 2586 reg->val = val; 2587 if (step != INT_MIN) 2588 reg->step = step; 2589 } 2590 2591 /* 2592 * Handle some predefined read-only number registers. 2593 * For now, return -1 if the requested register is not predefined; 2594 * in case a predefined read-only register having the value -1 2595 * were to turn up, another special value would have to be chosen. 2596 */ 2597 static int 2598 roff_getregro(const struct roff *r, const char *name) 2599 { 2600 2601 switch (*name) { 2602 case '$': /* Number of arguments of the last macro evaluated. */ 2603 return r->argc; 2604 case 'A': /* ASCII approximation mode is always off. */ 2605 return 0; 2606 case 'g': /* Groff compatibility mode is always on. */ 2607 return 1; 2608 case 'H': /* Fixed horizontal resolution. */ 2609 return 24; 2610 case 'j': /* Always adjust left margin only. */ 2611 return 0; 2612 case 'T': /* Some output device is always defined. */ 2613 return 1; 2614 case 'V': /* Fixed vertical resolution. */ 2615 return 40; 2616 default: 2617 return -1; 2618 } 2619 } 2620 2621 int 2622 roff_getreg(struct roff *r, const char *name) 2623 { 2624 return roff_getregn(r, name, strlen(name), '\0'); 2625 } 2626 2627 static int 2628 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 2629 { 2630 struct roffreg *reg; 2631 int val; 2632 2633 if ('.' == name[0] && 2 == len) { 2634 val = roff_getregro(r, name + 1); 2635 if (-1 != val) 2636 return val; 2637 } 2638 2639 for (reg = r->regtab; reg; reg = reg->next) { 2640 if (len == reg->key.sz && 2641 0 == strncmp(name, reg->key.p, len)) { 2642 switch (sign) { 2643 case '+': 2644 reg->val += reg->step; 2645 break; 2646 case '-': 2647 reg->val -= reg->step; 2648 break; 2649 default: 2650 break; 2651 } 2652 return reg->val; 2653 } 2654 } 2655 2656 roff_setregn(r, name, len, 0, '\0', INT_MIN); 2657 return 0; 2658 } 2659 2660 static int 2661 roff_hasregn(const struct roff *r, const char *name, size_t len) 2662 { 2663 struct roffreg *reg; 2664 int val; 2665 2666 if ('.' == name[0] && 2 == len) { 2667 val = roff_getregro(r, name + 1); 2668 if (-1 != val) 2669 return 1; 2670 } 2671 2672 for (reg = r->regtab; reg; reg = reg->next) 2673 if (len == reg->key.sz && 2674 0 == strncmp(name, reg->key.p, len)) 2675 return 1; 2676 2677 return 0; 2678 } 2679 2680 static void 2681 roff_freereg(struct roffreg *reg) 2682 { 2683 struct roffreg *old_reg; 2684 2685 while (NULL != reg) { 2686 free(reg->key.p); 2687 old_reg = reg; 2688 reg = reg->next; 2689 free(old_reg); 2690 } 2691 } 2692 2693 static enum rofferr 2694 roff_nr(ROFF_ARGS) 2695 { 2696 char *key, *val, *step; 2697 size_t keysz; 2698 int iv, is, len; 2699 char sign; 2700 2701 key = val = buf->buf + pos; 2702 if (*key == '\0') 2703 return ROFF_IGN; 2704 2705 keysz = roff_getname(r, &val, ln, pos); 2706 if (key[keysz] == '\\') 2707 return ROFF_IGN; 2708 2709 sign = *val; 2710 if (sign == '+' || sign == '-') 2711 val++; 2712 2713 len = 0; 2714 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 2715 return ROFF_IGN; 2716 2717 step = val + len; 2718 while (isspace((unsigned char)*step)) 2719 step++; 2720 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 2721 is = INT_MIN; 2722 2723 roff_setregn(r, key, keysz, iv, sign, is); 2724 return ROFF_IGN; 2725 } 2726 2727 static enum rofferr 2728 roff_rr(ROFF_ARGS) 2729 { 2730 struct roffreg *reg, **prev; 2731 char *name, *cp; 2732 size_t namesz; 2733 2734 name = cp = buf->buf + pos; 2735 if (*name == '\0') 2736 return ROFF_IGN; 2737 namesz = roff_getname(r, &cp, ln, pos); 2738 name[namesz] = '\0'; 2739 2740 prev = &r->regtab; 2741 while (1) { 2742 reg = *prev; 2743 if (reg == NULL || !strcmp(name, reg->key.p)) 2744 break; 2745 prev = ®->next; 2746 } 2747 if (reg != NULL) { 2748 *prev = reg->next; 2749 free(reg->key.p); 2750 free(reg); 2751 } 2752 return ROFF_IGN; 2753 } 2754 2755 /* --- handler functions for roff requests -------------------------------- */ 2756 2757 static enum rofferr 2758 roff_rm(ROFF_ARGS) 2759 { 2760 const char *name; 2761 char *cp; 2762 size_t namesz; 2763 2764 cp = buf->buf + pos; 2765 while (*cp != '\0') { 2766 name = cp; 2767 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 2768 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 2769 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2770 if (name[namesz] == '\\') 2771 break; 2772 } 2773 return ROFF_IGN; 2774 } 2775 2776 static enum rofferr 2777 roff_it(ROFF_ARGS) 2778 { 2779 int iv; 2780 2781 /* Parse the number of lines. */ 2782 2783 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 2784 mandoc_msg(MANDOCERR_IT_NONUM, r->parse, 2785 ln, ppos, buf->buf + 1); 2786 return ROFF_IGN; 2787 } 2788 2789 while (isspace((unsigned char)buf->buf[pos])) 2790 pos++; 2791 2792 /* 2793 * Arm the input line trap. 2794 * Special-casing "an-trap" is an ugly workaround to cope 2795 * with DocBook stupidly fiddling with man(7) internals. 2796 */ 2797 2798 roffit_lines = iv; 2799 roffit_macro = mandoc_strdup(iv != 1 || 2800 strcmp(buf->buf + pos, "an-trap") ? 2801 buf->buf + pos : "br"); 2802 return ROFF_IGN; 2803 } 2804 2805 static enum rofferr 2806 roff_Dd(ROFF_ARGS) 2807 { 2808 int mask; 2809 enum roff_tok t, te; 2810 2811 switch (tok) { 2812 case ROFF_Dd: 2813 tok = MDOC_Dd; 2814 te = MDOC_MAX; 2815 if (r->format == 0) 2816 r->format = MPARSE_MDOC; 2817 mask = MPARSE_MDOC | MPARSE_QUICK; 2818 break; 2819 case ROFF_TH: 2820 tok = MAN_TH; 2821 te = MAN_MAX; 2822 if (r->format == 0) 2823 r->format = MPARSE_MAN; 2824 mask = MPARSE_QUICK; 2825 break; 2826 default: 2827 abort(); 2828 } 2829 if ((r->options & mask) == 0) 2830 for (t = tok; t < te; t++) 2831 roff_setstr(r, roff_name[t], NULL, 0); 2832 return ROFF_CONT; 2833 } 2834 2835 static enum rofferr 2836 roff_TE(ROFF_ARGS) 2837 { 2838 if (r->tbl == NULL) { 2839 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2840 ln, ppos, "TE"); 2841 return ROFF_IGN; 2842 } 2843 if (tbl_end(r->tbl) == 0) { 2844 r->tbl = NULL; 2845 free(buf->buf); 2846 buf->buf = mandoc_strdup(".sp"); 2847 buf->sz = 4; 2848 *offs = 0; 2849 return ROFF_REPARSE; 2850 } 2851 r->tbl = NULL; 2852 return ROFF_IGN; 2853 } 2854 2855 static enum rofferr 2856 roff_T_(ROFF_ARGS) 2857 { 2858 2859 if (NULL == r->tbl) 2860 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2861 ln, ppos, "T&"); 2862 else 2863 tbl_restart(ln, ppos, r->tbl); 2864 2865 return ROFF_IGN; 2866 } 2867 2868 /* 2869 * Handle in-line equation delimiters. 2870 */ 2871 static enum rofferr 2872 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 2873 { 2874 char *cp1, *cp2; 2875 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 2876 2877 /* 2878 * Outside equations, look for an opening delimiter. 2879 * If we are inside an equation, we already know it is 2880 * in-line, or this function wouldn't have been called; 2881 * so look for a closing delimiter. 2882 */ 2883 2884 cp1 = buf->buf + pos; 2885 cp2 = strchr(cp1, r->eqn == NULL ? 2886 r->last_eqn->odelim : r->last_eqn->cdelim); 2887 if (cp2 == NULL) 2888 return ROFF_CONT; 2889 2890 *cp2++ = '\0'; 2891 bef_pr = bef_nl = aft_nl = aft_pr = ""; 2892 2893 /* Handle preceding text, protecting whitespace. */ 2894 2895 if (*buf->buf != '\0') { 2896 if (r->eqn == NULL) 2897 bef_pr = "\\&"; 2898 bef_nl = "\n"; 2899 } 2900 2901 /* 2902 * Prepare replacing the delimiter with an equation macro 2903 * and drop leading white space from the equation. 2904 */ 2905 2906 if (r->eqn == NULL) { 2907 while (*cp2 == ' ') 2908 cp2++; 2909 mac = ".EQ"; 2910 } else 2911 mac = ".EN"; 2912 2913 /* Handle following text, protecting whitespace. */ 2914 2915 if (*cp2 != '\0') { 2916 aft_nl = "\n"; 2917 if (r->eqn != NULL) 2918 aft_pr = "\\&"; 2919 } 2920 2921 /* Do the actual replacement. */ 2922 2923 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 2924 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 2925 free(buf->buf); 2926 buf->buf = cp1; 2927 2928 /* Toggle the in-line state of the eqn subsystem. */ 2929 2930 r->eqn_inline = r->eqn == NULL; 2931 return ROFF_REPARSE; 2932 } 2933 2934 static enum rofferr 2935 roff_EQ(ROFF_ARGS) 2936 { 2937 struct roff_node *n; 2938 2939 if (r->man->macroset == MACROSET_MAN) 2940 man_breakscope(r->man, ROFF_EQ); 2941 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 2942 if (ln > r->man->last->line) 2943 n->flags |= NODE_LINE; 2944 n->eqn = mandoc_calloc(1, sizeof(*n->eqn)); 2945 n->eqn->expectargs = UINT_MAX; 2946 roff_node_append(r->man, n); 2947 r->man->next = ROFF_NEXT_SIBLING; 2948 2949 assert(r->eqn == NULL); 2950 if (r->last_eqn == NULL) 2951 r->last_eqn = eqn_alloc(r->parse); 2952 else 2953 eqn_reset(r->last_eqn); 2954 r->eqn = r->last_eqn; 2955 r->eqn->node = n; 2956 2957 if (buf->buf[pos] != '\0') 2958 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2959 ".EQ %s", buf->buf + pos); 2960 2961 return ROFF_IGN; 2962 } 2963 2964 static enum rofferr 2965 roff_EN(ROFF_ARGS) 2966 { 2967 if (r->eqn != NULL) { 2968 eqn_parse(r->eqn); 2969 r->eqn = NULL; 2970 } else 2971 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); 2972 if (buf->buf[pos] != '\0') 2973 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2974 "EN %s", buf->buf + pos); 2975 return ROFF_IGN; 2976 } 2977 2978 static enum rofferr 2979 roff_TS(ROFF_ARGS) 2980 { 2981 if (r->tbl != NULL) { 2982 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, 2983 ln, ppos, "TS breaks TS"); 2984 tbl_end(r->tbl); 2985 } 2986 r->tbl = tbl_alloc(ppos, ln, r->parse); 2987 if (r->last_tbl) 2988 r->last_tbl->next = r->tbl; 2989 else 2990 r->first_tbl = r->tbl; 2991 r->last_tbl = r->tbl; 2992 return ROFF_IGN; 2993 } 2994 2995 static enum rofferr 2996 roff_onearg(ROFF_ARGS) 2997 { 2998 struct roff_node *n; 2999 char *cp; 3000 int npos; 3001 3002 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3003 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3004 tok == ROFF_ti)) 3005 man_breakscope(r->man, tok); 3006 3007 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3008 r->man->last = roffce_node; 3009 r->man->next = ROFF_NEXT_SIBLING; 3010 } 3011 3012 roff_elem_alloc(r->man, ln, ppos, tok); 3013 n = r->man->last; 3014 3015 cp = buf->buf + pos; 3016 if (*cp != '\0') { 3017 while (*cp != '\0' && *cp != ' ') 3018 cp++; 3019 while (*cp == ' ') 3020 *cp++ = '\0'; 3021 if (*cp != '\0') 3022 mandoc_vmsg(MANDOCERR_ARG_EXCESS, 3023 r->parse, ln, cp - buf->buf, 3024 "%s ... %s", roff_name[tok], cp); 3025 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3026 } 3027 3028 if (tok == ROFF_ce || tok == ROFF_rj) { 3029 if (r->man->last->type == ROFFT_ELEM) { 3030 roff_word_alloc(r->man, ln, pos, "1"); 3031 r->man->last->flags |= NODE_NOSRC; 3032 } 3033 npos = 0; 3034 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3035 &roffce_lines, 0) == 0) { 3036 mandoc_vmsg(MANDOCERR_CE_NONUM, 3037 r->parse, ln, pos, "ce %s", buf->buf + pos); 3038 roffce_lines = 1; 3039 } 3040 if (roffce_lines < 1) { 3041 r->man->last = r->man->last->parent; 3042 roffce_node = NULL; 3043 roffce_lines = 0; 3044 } else 3045 roffce_node = r->man->last->parent; 3046 } else { 3047 n->flags |= NODE_VALID | NODE_ENDED; 3048 r->man->last = n; 3049 } 3050 n->flags |= NODE_LINE; 3051 r->man->next = ROFF_NEXT_SIBLING; 3052 return ROFF_IGN; 3053 } 3054 3055 static enum rofferr 3056 roff_manyarg(ROFF_ARGS) 3057 { 3058 struct roff_node *n; 3059 char *sp, *ep; 3060 3061 roff_elem_alloc(r->man, ln, ppos, tok); 3062 n = r->man->last; 3063 3064 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3065 while (*ep != '\0' && *ep != ' ') 3066 ep++; 3067 while (*ep == ' ') 3068 *ep++ = '\0'; 3069 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3070 } 3071 3072 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3073 r->man->last = n; 3074 r->man->next = ROFF_NEXT_SIBLING; 3075 return ROFF_IGN; 3076 } 3077 3078 static enum rofferr 3079 roff_als(ROFF_ARGS) 3080 { 3081 char *oldn, *newn, *end, *value; 3082 size_t oldsz, newsz, valsz; 3083 3084 newn = oldn = buf->buf + pos; 3085 if (*newn == '\0') 3086 return ROFF_IGN; 3087 3088 newsz = roff_getname(r, &oldn, ln, pos); 3089 if (newn[newsz] == '\\' || *oldn == '\0') 3090 return ROFF_IGN; 3091 3092 end = oldn; 3093 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3094 if (oldsz == 0) 3095 return ROFF_IGN; 3096 3097 valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n", 3098 (int)oldsz, oldn); 3099 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3100 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3101 free(value); 3102 return ROFF_IGN; 3103 } 3104 3105 static enum rofferr 3106 roff_br(ROFF_ARGS) 3107 { 3108 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3109 man_breakscope(r->man, ROFF_br); 3110 roff_elem_alloc(r->man, ln, ppos, ROFF_br); 3111 if (buf->buf[pos] != '\0') 3112 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 3113 "%s %s", roff_name[tok], buf->buf + pos); 3114 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3115 r->man->next = ROFF_NEXT_SIBLING; 3116 return ROFF_IGN; 3117 } 3118 3119 static enum rofferr 3120 roff_cc(ROFF_ARGS) 3121 { 3122 const char *p; 3123 3124 p = buf->buf + pos; 3125 3126 if (*p == '\0' || (r->control = *p++) == '.') 3127 r->control = '\0'; 3128 3129 if (*p != '\0') 3130 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 3131 ln, p - buf->buf, "cc ... %s", p); 3132 3133 return ROFF_IGN; 3134 } 3135 3136 static enum rofferr 3137 roff_ec(ROFF_ARGS) 3138 { 3139 const char *p; 3140 3141 p = buf->buf + pos; 3142 if (*p == '\0') 3143 r->escape = '\\'; 3144 else { 3145 r->escape = *p; 3146 if (*++p != '\0') 3147 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 3148 ln, p - buf->buf, "ec ... %s", p); 3149 } 3150 return ROFF_IGN; 3151 } 3152 3153 static enum rofferr 3154 roff_eo(ROFF_ARGS) 3155 { 3156 r->escape = '\0'; 3157 if (buf->buf[pos] != '\0') 3158 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, 3159 ln, pos, "eo %s", buf->buf + pos); 3160 return ROFF_IGN; 3161 } 3162 3163 static enum rofferr 3164 roff_tr(ROFF_ARGS) 3165 { 3166 const char *p, *first, *second; 3167 size_t fsz, ssz; 3168 enum mandoc_esc esc; 3169 3170 p = buf->buf + pos; 3171 3172 if (*p == '\0') { 3173 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr"); 3174 return ROFF_IGN; 3175 } 3176 3177 while (*p != '\0') { 3178 fsz = ssz = 1; 3179 3180 first = p++; 3181 if (*first == '\\') { 3182 esc = mandoc_escape(&p, NULL, NULL); 3183 if (esc == ESCAPE_ERROR) { 3184 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 3185 ln, (int)(p - buf->buf), first); 3186 return ROFF_IGN; 3187 } 3188 fsz = (size_t)(p - first); 3189 } 3190 3191 second = p++; 3192 if (*second == '\\') { 3193 esc = mandoc_escape(&p, NULL, NULL); 3194 if (esc == ESCAPE_ERROR) { 3195 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 3196 ln, (int)(p - buf->buf), second); 3197 return ROFF_IGN; 3198 } 3199 ssz = (size_t)(p - second); 3200 } else if (*second == '\0') { 3201 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse, 3202 ln, first - buf->buf, "tr %s", first); 3203 second = " "; 3204 p--; 3205 } 3206 3207 if (fsz > 1) { 3208 roff_setstrn(&r->xmbtab, first, fsz, 3209 second, ssz, 0); 3210 continue; 3211 } 3212 3213 if (r->xtab == NULL) 3214 r->xtab = mandoc_calloc(128, 3215 sizeof(struct roffstr)); 3216 3217 free(r->xtab[(int)*first].p); 3218 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3219 r->xtab[(int)*first].sz = ssz; 3220 } 3221 3222 return ROFF_IGN; 3223 } 3224 3225 static enum rofferr 3226 roff_rn(ROFF_ARGS) 3227 { 3228 const char *value; 3229 char *oldn, *newn, *end; 3230 size_t oldsz, newsz; 3231 int deftype; 3232 3233 oldn = newn = buf->buf + pos; 3234 if (*oldn == '\0') 3235 return ROFF_IGN; 3236 3237 oldsz = roff_getname(r, &newn, ln, pos); 3238 if (oldn[oldsz] == '\\' || *newn == '\0') 3239 return ROFF_IGN; 3240 3241 end = newn; 3242 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3243 if (newsz == 0) 3244 return ROFF_IGN; 3245 3246 deftype = ROFFDEF_ANY; 3247 value = roff_getstrn(r, oldn, oldsz, &deftype); 3248 switch (deftype) { 3249 case ROFFDEF_USER: 3250 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3251 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3252 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3253 break; 3254 case ROFFDEF_PRE: 3255 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3256 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3257 break; 3258 case ROFFDEF_REN: 3259 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3260 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3261 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3262 break; 3263 case ROFFDEF_STD: 3264 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3265 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3266 break; 3267 default: 3268 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3269 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3270 break; 3271 } 3272 return ROFF_IGN; 3273 } 3274 3275 static enum rofferr 3276 roff_so(ROFF_ARGS) 3277 { 3278 char *name, *cp; 3279 3280 name = buf->buf + pos; 3281 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name); 3282 3283 /* 3284 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3285 * opening anything that's not in our cwd or anything beneath 3286 * it. Thus, explicitly disallow traversing up the file-system 3287 * or using absolute paths. 3288 */ 3289 3290 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3291 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos, 3292 ".so %s", name); 3293 buf->sz = mandoc_asprintf(&cp, 3294 ".sp\nSee the file %s.\n.sp", name) + 1; 3295 free(buf->buf); 3296 buf->buf = cp; 3297 *offs = 0; 3298 return ROFF_REPARSE; 3299 } 3300 3301 *offs = pos; 3302 return ROFF_SO; 3303 } 3304 3305 /* --- user defined strings and macros ------------------------------------ */ 3306 3307 static enum rofferr 3308 roff_userdef(ROFF_ARGS) 3309 { 3310 const char *arg[16], *ap; 3311 char *cp, *n1, *n2; 3312 int expand_count, i, ib, ie; 3313 size_t asz, rsz; 3314 3315 /* 3316 * Collect pointers to macro argument strings 3317 * and NUL-terminate them. 3318 */ 3319 3320 r->argc = 0; 3321 cp = buf->buf + pos; 3322 for (i = 0; i < 16; i++) { 3323 if (*cp == '\0') 3324 arg[i] = ""; 3325 else { 3326 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); 3327 r->argc = i + 1; 3328 } 3329 } 3330 3331 /* 3332 * Expand macro arguments. 3333 */ 3334 3335 buf->sz = strlen(r->current_string) + 1; 3336 n1 = n2 = cp = mandoc_malloc(buf->sz); 3337 memcpy(n1, r->current_string, buf->sz); 3338 expand_count = 0; 3339 while (*cp != '\0') { 3340 3341 /* Scan ahead for the next argument invocation. */ 3342 3343 if (*cp++ != '\\') 3344 continue; 3345 if (*cp++ != '$') 3346 continue; 3347 if (*cp == '*') { /* \\$* inserts all arguments */ 3348 ib = 0; 3349 ie = r->argc - 1; 3350 } else { /* \\$1 .. \\$9 insert one argument */ 3351 ib = ie = *cp - '1'; 3352 if (ib < 0 || ib > 8) 3353 continue; 3354 } 3355 cp -= 2; 3356 3357 /* 3358 * Prevent infinite recursion. 3359 */ 3360 3361 if (cp >= n2) 3362 expand_count = 1; 3363 else if (++expand_count > EXPAND_LIMIT) { 3364 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 3365 ln, (int)(cp - n1), NULL); 3366 free(buf->buf); 3367 buf->buf = n1; 3368 *offs = 0; 3369 return ROFF_IGN; 3370 } 3371 3372 /* 3373 * Determine the size of the expanded argument, 3374 * taking escaping of quotes into account. 3375 */ 3376 3377 asz = ie > ib ? ie - ib : 0; /* for blanks */ 3378 for (i = ib; i <= ie; i++) { 3379 for (ap = arg[i]; *ap != '\0'; ap++) { 3380 asz++; 3381 if (*ap == '"') 3382 asz += 3; 3383 } 3384 } 3385 if (asz != 3) { 3386 3387 /* 3388 * Determine the size of the rest of the 3389 * unexpanded macro, including the NUL. 3390 */ 3391 3392 rsz = buf->sz - (cp - n1) - 3; 3393 3394 /* 3395 * When shrinking, move before 3396 * releasing the storage. 3397 */ 3398 3399 if (asz < 3) 3400 memmove(cp + asz, cp + 3, rsz); 3401 3402 /* 3403 * Resize the storage for the macro 3404 * and readjust the parse pointer. 3405 */ 3406 3407 buf->sz += asz - 3; 3408 n2 = mandoc_realloc(n1, buf->sz); 3409 cp = n2 + (cp - n1); 3410 n1 = n2; 3411 3412 /* 3413 * When growing, make room 3414 * for the expanded argument. 3415 */ 3416 3417 if (asz > 3) 3418 memmove(cp + asz, cp + 3, rsz); 3419 } 3420 3421 /* Copy the expanded argument, escaping quotes. */ 3422 3423 n2 = cp; 3424 for (i = ib; i <= ie; i++) { 3425 for (ap = arg[i]; *ap != '\0'; ap++) { 3426 if (*ap == '"') { 3427 memcpy(n2, "\\(dq", 4); 3428 n2 += 4; 3429 } else 3430 *n2++ = *ap; 3431 } 3432 if (i < ie) 3433 *n2++ = ' '; 3434 } 3435 } 3436 3437 /* 3438 * Replace the macro invocation 3439 * by the expanded macro. 3440 */ 3441 3442 free(buf->buf); 3443 buf->buf = n1; 3444 *offs = 0; 3445 3446 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3447 ROFF_REPARSE : ROFF_APPEND; 3448 } 3449 3450 /* 3451 * Calling a high-level macro that was renamed with .rn. 3452 * r->current_string has already been set up by roff_parse(). 3453 */ 3454 static enum rofferr 3455 roff_renamed(ROFF_ARGS) 3456 { 3457 char *nbuf; 3458 3459 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3460 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3461 free(buf->buf); 3462 buf->buf = nbuf; 3463 *offs = 0; 3464 return ROFF_CONT; 3465 } 3466 3467 static size_t 3468 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3469 { 3470 char *name, *cp; 3471 size_t namesz; 3472 3473 name = *cpp; 3474 if ('\0' == *name) 3475 return 0; 3476 3477 /* Read until end of name and terminate it with NUL. */ 3478 for (cp = name; 1; cp++) { 3479 if ('\0' == *cp || ' ' == *cp) { 3480 namesz = cp - name; 3481 break; 3482 } 3483 if ('\\' != *cp) 3484 continue; 3485 namesz = cp - name; 3486 if ('{' == cp[1] || '}' == cp[1]) 3487 break; 3488 cp++; 3489 if ('\\' == *cp) 3490 continue; 3491 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos, 3492 "%.*s", (int)(cp - name + 1), name); 3493 mandoc_escape((const char **)&cp, NULL, NULL); 3494 break; 3495 } 3496 3497 /* Read past spaces. */ 3498 while (' ' == *cp) 3499 cp++; 3500 3501 *cpp = cp; 3502 return namesz; 3503 } 3504 3505 /* 3506 * Store *string into the user-defined string called *name. 3507 * To clear an existing entry, call with (*r, *name, NULL, 0). 3508 * append == 0: replace mode 3509 * append == 1: single-line append mode 3510 * append == 2: multiline append mode, append '\n' after each call 3511 */ 3512 static void 3513 roff_setstr(struct roff *r, const char *name, const char *string, 3514 int append) 3515 { 3516 size_t namesz; 3517 3518 namesz = strlen(name); 3519 roff_setstrn(&r->strtab, name, namesz, string, 3520 string ? strlen(string) : 0, append); 3521 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3522 } 3523 3524 static void 3525 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3526 const char *string, size_t stringsz, int append) 3527 { 3528 struct roffkv *n; 3529 char *c; 3530 int i; 3531 size_t oldch, newch; 3532 3533 /* Search for an existing string with the same name. */ 3534 n = *r; 3535 3536 while (n && (namesz != n->key.sz || 3537 strncmp(n->key.p, name, namesz))) 3538 n = n->next; 3539 3540 if (NULL == n) { 3541 /* Create a new string table entry. */ 3542 n = mandoc_malloc(sizeof(struct roffkv)); 3543 n->key.p = mandoc_strndup(name, namesz); 3544 n->key.sz = namesz; 3545 n->val.p = NULL; 3546 n->val.sz = 0; 3547 n->next = *r; 3548 *r = n; 3549 } else if (0 == append) { 3550 free(n->val.p); 3551 n->val.p = NULL; 3552 n->val.sz = 0; 3553 } 3554 3555 if (NULL == string) 3556 return; 3557 3558 /* 3559 * One additional byte for the '\n' in multiline mode, 3560 * and one for the terminating '\0'. 3561 */ 3562 newch = stringsz + (1 < append ? 2u : 1u); 3563 3564 if (NULL == n->val.p) { 3565 n->val.p = mandoc_malloc(newch); 3566 *n->val.p = '\0'; 3567 oldch = 0; 3568 } else { 3569 oldch = n->val.sz; 3570 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3571 } 3572 3573 /* Skip existing content in the destination buffer. */ 3574 c = n->val.p + (int)oldch; 3575 3576 /* Append new content to the destination buffer. */ 3577 i = 0; 3578 while (i < (int)stringsz) { 3579 /* 3580 * Rudimentary roff copy mode: 3581 * Handle escaped backslashes. 3582 */ 3583 if ('\\' == string[i] && '\\' == string[i + 1]) 3584 i++; 3585 *c++ = string[i++]; 3586 } 3587 3588 /* Append terminating bytes. */ 3589 if (1 < append) 3590 *c++ = '\n'; 3591 3592 *c = '\0'; 3593 n->val.sz = (int)(c - n->val.p); 3594 } 3595 3596 static const char * 3597 roff_getstrn(struct roff *r, const char *name, size_t len, 3598 int *deftype) 3599 { 3600 const struct roffkv *n; 3601 int found, i; 3602 enum roff_tok tok; 3603 3604 found = 0; 3605 for (n = r->strtab; n != NULL; n = n->next) { 3606 if (strncmp(name, n->key.p, len) != 0 || 3607 n->key.p[len] != '\0' || n->val.p == NULL) 3608 continue; 3609 if (*deftype & ROFFDEF_USER) { 3610 *deftype = ROFFDEF_USER; 3611 return n->val.p; 3612 } else { 3613 found = 1; 3614 break; 3615 } 3616 } 3617 for (n = r->rentab; n != NULL; n = n->next) { 3618 if (strncmp(name, n->key.p, len) != 0 || 3619 n->key.p[len] != '\0' || n->val.p == NULL) 3620 continue; 3621 if (*deftype & ROFFDEF_REN) { 3622 *deftype = ROFFDEF_REN; 3623 return n->val.p; 3624 } else { 3625 found = 1; 3626 break; 3627 } 3628 } 3629 for (i = 0; i < PREDEFS_MAX; i++) { 3630 if (strncmp(name, predefs[i].name, len) != 0 || 3631 predefs[i].name[len] != '\0') 3632 continue; 3633 if (*deftype & ROFFDEF_PRE) { 3634 *deftype = ROFFDEF_PRE; 3635 return predefs[i].str; 3636 } else { 3637 found = 1; 3638 break; 3639 } 3640 } 3641 if (r->man->macroset != MACROSET_MAN) { 3642 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 3643 if (strncmp(name, roff_name[tok], len) != 0 || 3644 roff_name[tok][len] != '\0') 3645 continue; 3646 if (*deftype & ROFFDEF_STD) { 3647 *deftype = ROFFDEF_STD; 3648 return NULL; 3649 } else { 3650 found = 1; 3651 break; 3652 } 3653 } 3654 } 3655 if (r->man->macroset != MACROSET_MDOC) { 3656 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 3657 if (strncmp(name, roff_name[tok], len) != 0 || 3658 roff_name[tok][len] != '\0') 3659 continue; 3660 if (*deftype & ROFFDEF_STD) { 3661 *deftype = ROFFDEF_STD; 3662 return NULL; 3663 } else { 3664 found = 1; 3665 break; 3666 } 3667 } 3668 } 3669 3670 if (found == 0 && *deftype != ROFFDEF_ANY) { 3671 if (*deftype & ROFFDEF_REN) { 3672 /* 3673 * This might still be a request, 3674 * so do not treat it as undefined yet. 3675 */ 3676 *deftype = ROFFDEF_UNDEF; 3677 return NULL; 3678 } 3679 3680 /* Using an undefined string defines it to be empty. */ 3681 3682 roff_setstrn(&r->strtab, name, len, "", 0, 0); 3683 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 3684 } 3685 3686 *deftype = 0; 3687 return NULL; 3688 } 3689 3690 static void 3691 roff_freestr(struct roffkv *r) 3692 { 3693 struct roffkv *n, *nn; 3694 3695 for (n = r; n; n = nn) { 3696 free(n->key.p); 3697 free(n->val.p); 3698 nn = n->next; 3699 free(n); 3700 } 3701 } 3702 3703 /* --- accessors and utility functions ------------------------------------ */ 3704 3705 /* 3706 * Duplicate an input string, making the appropriate character 3707 * conversations (as stipulated by `tr') along the way. 3708 * Returns a heap-allocated string with all the replacements made. 3709 */ 3710 char * 3711 roff_strdup(const struct roff *r, const char *p) 3712 { 3713 const struct roffkv *cp; 3714 char *res; 3715 const char *pp; 3716 size_t ssz, sz; 3717 enum mandoc_esc esc; 3718 3719 if (NULL == r->xmbtab && NULL == r->xtab) 3720 return mandoc_strdup(p); 3721 else if ('\0' == *p) 3722 return mandoc_strdup(""); 3723 3724 /* 3725 * Step through each character looking for term matches 3726 * (remember that a `tr' can be invoked with an escape, which is 3727 * a glyph but the escape is multi-character). 3728 * We only do this if the character hash has been initialised 3729 * and the string is >0 length. 3730 */ 3731 3732 res = NULL; 3733 ssz = 0; 3734 3735 while ('\0' != *p) { 3736 assert((unsigned int)*p < 128); 3737 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 3738 sz = r->xtab[(int)*p].sz; 3739 res = mandoc_realloc(res, ssz + sz + 1); 3740 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 3741 ssz += sz; 3742 p++; 3743 continue; 3744 } else if ('\\' != *p) { 3745 res = mandoc_realloc(res, ssz + 2); 3746 res[ssz++] = *p++; 3747 continue; 3748 } 3749 3750 /* Search for term matches. */ 3751 for (cp = r->xmbtab; cp; cp = cp->next) 3752 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 3753 break; 3754 3755 if (NULL != cp) { 3756 /* 3757 * A match has been found. 3758 * Append the match to the array and move 3759 * forward by its keysize. 3760 */ 3761 res = mandoc_realloc(res, 3762 ssz + cp->val.sz + 1); 3763 memcpy(res + ssz, cp->val.p, cp->val.sz); 3764 ssz += cp->val.sz; 3765 p += (int)cp->key.sz; 3766 continue; 3767 } 3768 3769 /* 3770 * Handle escapes carefully: we need to copy 3771 * over just the escape itself, or else we might 3772 * do replacements within the escape itself. 3773 * Make sure to pass along the bogus string. 3774 */ 3775 pp = p++; 3776 esc = mandoc_escape(&p, NULL, NULL); 3777 if (ESCAPE_ERROR == esc) { 3778 sz = strlen(pp); 3779 res = mandoc_realloc(res, ssz + sz + 1); 3780 memcpy(res + ssz, pp, sz); 3781 break; 3782 } 3783 /* 3784 * We bail out on bad escapes. 3785 * No need to warn: we already did so when 3786 * roff_res() was called. 3787 */ 3788 sz = (int)(p - pp); 3789 res = mandoc_realloc(res, ssz + sz + 1); 3790 memcpy(res + ssz, pp, sz); 3791 ssz += sz; 3792 } 3793 3794 res[(int)ssz] = '\0'; 3795 return res; 3796 } 3797 3798 int 3799 roff_getformat(const struct roff *r) 3800 { 3801 3802 return r->format; 3803 } 3804 3805 /* 3806 * Find out whether a line is a macro line or not. 3807 * If it is, adjust the current position and return one; if it isn't, 3808 * return zero and don't change the current position. 3809 * If the control character has been set with `.cc', then let that grain 3810 * precedence. 3811 * This is slighly contrary to groff, where using the non-breaking 3812 * control character when `cc' has been invoked will cause the 3813 * non-breaking macro contents to be printed verbatim. 3814 */ 3815 int 3816 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 3817 { 3818 int pos; 3819 3820 pos = *ppos; 3821 3822 if (r->control != '\0' && cp[pos] == r->control) 3823 pos++; 3824 else if (r->control != '\0') 3825 return 0; 3826 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 3827 pos += 2; 3828 else if ('.' == cp[pos] || '\'' == cp[pos]) 3829 pos++; 3830 else 3831 return 0; 3832 3833 while (' ' == cp[pos] || '\t' == cp[pos]) 3834 pos++; 3835 3836 *ppos = pos; 3837 return 1; 3838 } 3839