1 /* $Id: roff.c,v 1.400 2023/10/24 20:53:12 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the roff(7) parser for mandoc(1). 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <limits.h> 27 #include <stddef.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 33 #include "mandoc_aux.h" 34 #include "mandoc_ohash.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mandoc_parse.h" 38 #include "libmandoc.h" 39 #include "roff_int.h" 40 #include "tbl_parse.h" 41 #include "eqn_parse.h" 42 43 /* Maximum number of string expansions per line, to break infinite loops. */ 44 #define EXPAND_LIMIT 1000 45 46 /* Types of definitions of macros and strings. */ 47 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 48 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 49 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 50 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 51 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 52 ROFFDEF_REN | ROFFDEF_STD) 53 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 54 55 /* --- data types --------------------------------------------------------- */ 56 57 /* 58 * An incredibly-simple string buffer. 59 */ 60 struct roffstr { 61 char *p; /* nil-terminated buffer */ 62 size_t sz; /* saved strlen(p) */ 63 }; 64 65 /* 66 * A key-value roffstr pair as part of a singly-linked list. 67 */ 68 struct roffkv { 69 struct roffstr key; 70 struct roffstr val; 71 struct roffkv *next; /* next in list */ 72 }; 73 74 /* 75 * A single number register as part of a singly-linked list. 76 */ 77 struct roffreg { 78 struct roffstr key; 79 int val; 80 int step; 81 struct roffreg *next; 82 }; 83 84 /* 85 * Association of request and macro names with token IDs. 86 */ 87 struct roffreq { 88 enum roff_tok tok; 89 char name[]; 90 }; 91 92 /* 93 * A macro processing context. 94 * More than one is needed when macro calls are nested. 95 */ 96 struct mctx { 97 char **argv; 98 int argc; 99 int argsz; 100 }; 101 102 struct roff { 103 struct roff_man *man; /* mdoc or man parser */ 104 struct roffnode *last; /* leaf of stack */ 105 struct mctx *mstack; /* stack of macro contexts */ 106 int *rstack; /* stack of inverted `ie' values */ 107 struct ohash *reqtab; /* request lookup table */ 108 struct roffreg *regtab; /* number registers */ 109 struct roffkv *strtab; /* user-defined strings & macros */ 110 struct roffkv *rentab; /* renamed strings & macros */ 111 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 112 struct roffstr *xtab; /* single-byte trans table (`tr') */ 113 const char *current_string; /* value of last called user macro */ 114 struct tbl_node *first_tbl; /* first table parsed */ 115 struct tbl_node *last_tbl; /* last table parsed */ 116 struct tbl_node *tbl; /* current table being parsed */ 117 struct eqn_node *last_eqn; /* equation parser */ 118 struct eqn_node *eqn; /* active equation parser */ 119 int eqn_inline; /* current equation is inline */ 120 int options; /* parse options */ 121 int mstacksz; /* current size of mstack */ 122 int mstackpos; /* position in mstack */ 123 int rstacksz; /* current size limit of rstack */ 124 int rstackpos; /* position in rstack */ 125 int format; /* current file in mdoc or man format */ 126 char control; /* control character */ 127 char escape; /* escape character */ 128 }; 129 130 /* 131 * A macro definition, condition, or ignored block. 132 */ 133 struct roffnode { 134 enum roff_tok tok; /* type of node */ 135 struct roffnode *parent; /* up one in stack */ 136 int line; /* parse line */ 137 int col; /* parse col */ 138 char *name; /* node name, e.g. macro name */ 139 char *end; /* custom end macro of the block */ 140 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 141 int rule; /* content is: 1=evaluated 0=skipped */ 142 }; 143 144 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 145 enum roff_tok tok, /* tok of macro */ \ 146 struct buf *buf, /* input buffer */ \ 147 int ln, /* parse line */ \ 148 int ppos, /* original pos in buffer */ \ 149 int pos, /* current pos in buffer */ \ 150 int *offs /* reset offset of buffer data */ 151 152 typedef int (*roffproc)(ROFF_ARGS); 153 154 struct roffmac { 155 roffproc proc; /* process new macro */ 156 roffproc text; /* process as child text of macro */ 157 roffproc sub; /* process as child of macro */ 158 int flags; 159 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 160 }; 161 162 struct predef { 163 const char *name; /* predefined input name */ 164 const char *str; /* replacement symbol */ 165 }; 166 167 #define PREDEF(__name, __str) \ 168 { (__name), (__str) }, 169 170 /* --- function prototypes ------------------------------------------------ */ 171 172 static int roffnode_cleanscope(struct roff *); 173 static int roffnode_pop(struct roff *); 174 static void roffnode_push(struct roff *, enum roff_tok, 175 const char *, int, int); 176 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 177 static int roff_als(ROFF_ARGS); 178 static int roff_block(ROFF_ARGS); 179 static int roff_block_text(ROFF_ARGS); 180 static int roff_block_sub(ROFF_ARGS); 181 static int roff_break(ROFF_ARGS); 182 static int roff_cblock(ROFF_ARGS); 183 static int roff_cc(ROFF_ARGS); 184 static int roff_ccond(struct roff *, int, int); 185 static int roff_char(ROFF_ARGS); 186 static int roff_cond(ROFF_ARGS); 187 static int roff_cond_checkend(ROFF_ARGS); 188 static int roff_cond_text(ROFF_ARGS); 189 static int roff_cond_sub(ROFF_ARGS); 190 static int roff_ds(ROFF_ARGS); 191 static int roff_ec(ROFF_ARGS); 192 static int roff_eo(ROFF_ARGS); 193 static int roff_eqndelim(struct roff *, struct buf *, int); 194 static int roff_evalcond(struct roff *, int, char *, int *); 195 static int roff_evalnum(struct roff *, int, 196 const char *, int *, int *, int); 197 static int roff_evalpar(struct roff *, int, 198 const char *, int *, int *, int); 199 static int roff_evalstrcond(const char *, int *); 200 static int roff_expand(struct roff *, struct buf *, 201 int, int, char); 202 static void roff_expand_patch(struct buf *, int, 203 const char *, int); 204 static void roff_free1(struct roff *); 205 static void roff_freereg(struct roffreg *); 206 static void roff_freestr(struct roffkv *); 207 static size_t roff_getname(struct roff *, char **, int, int); 208 static int roff_getnum(const char *, int *, int *, int); 209 static int roff_getop(const char *, int *, char *); 210 static int roff_getregn(struct roff *, 211 const char *, size_t, char); 212 static int roff_getregro(const struct roff *, 213 const char *name); 214 static const char *roff_getstrn(struct roff *, 215 const char *, size_t, int *); 216 static int roff_hasregn(const struct roff *, 217 const char *, size_t); 218 static int roff_insec(ROFF_ARGS); 219 static int roff_it(ROFF_ARGS); 220 static int roff_line_ignore(ROFF_ARGS); 221 static void roff_man_alloc1(struct roff_man *); 222 static void roff_man_free1(struct roff_man *); 223 static int roff_manyarg(ROFF_ARGS); 224 static int roff_mc(ROFF_ARGS); 225 static int roff_noarg(ROFF_ARGS); 226 static int roff_nop(ROFF_ARGS); 227 static int roff_nr(ROFF_ARGS); 228 static int roff_onearg(ROFF_ARGS); 229 static enum roff_tok roff_parse(struct roff *, char *, int *, 230 int, int); 231 static int roff_parse_comment(struct roff *, struct buf *, 232 int, int, char); 233 static int roff_parsetext(struct roff *, struct buf *, 234 int, int *); 235 static int roff_renamed(ROFF_ARGS); 236 static int roff_req_or_macro(ROFF_ARGS); 237 static int roff_return(ROFF_ARGS); 238 static int roff_rm(ROFF_ARGS); 239 static int roff_rn(ROFF_ARGS); 240 static int roff_rr(ROFF_ARGS); 241 static void roff_setregn(struct roff *, const char *, 242 size_t, int, char, int); 243 static void roff_setstr(struct roff *, 244 const char *, const char *, int); 245 static void roff_setstrn(struct roffkv **, const char *, 246 size_t, const char *, size_t, int); 247 static int roff_shift(ROFF_ARGS); 248 static int roff_so(ROFF_ARGS); 249 static int roff_tr(ROFF_ARGS); 250 static int roff_Dd(ROFF_ARGS); 251 static int roff_TE(ROFF_ARGS); 252 static int roff_TS(ROFF_ARGS); 253 static int roff_EQ(ROFF_ARGS); 254 static int roff_EN(ROFF_ARGS); 255 static int roff_T_(ROFF_ARGS); 256 static int roff_unsupp(ROFF_ARGS); 257 static int roff_userdef(ROFF_ARGS); 258 259 /* --- constant data ------------------------------------------------------ */ 260 261 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 262 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 263 264 const char *__roff_name[MAN_MAX + 1] = { 265 "br", "ce", "fi", "ft", 266 "ll", "mc", "nf", 267 "po", "rj", "sp", 268 "ta", "ti", NULL, 269 "ab", "ad", "af", "aln", 270 "als", "am", "am1", "ami", 271 "ami1", "as", "as1", "asciify", 272 "backtrace", "bd", "bleedat", "blm", 273 "box", "boxa", "bp", "BP", 274 "break", "breakchar", "brnl", "brp", 275 "brpnl", "c2", "cc", 276 "cf", "cflags", "ch", "char", 277 "chop", "class", "close", "CL", 278 "color", "composite", "continue", "cp", 279 "cropat", "cs", "cu", "da", 280 "dch", "Dd", "de", "de1", 281 "defcolor", "dei", "dei1", "device", 282 "devicem", "di", "do", "ds", 283 "ds1", "dwh", "dt", "ec", 284 "ecr", "ecs", "el", "em", 285 "EN", "eo", "EP", "EQ", 286 "errprint", "ev", "evc", "ex", 287 "fallback", "fam", "fc", "fchar", 288 "fcolor", "fdeferlig", "feature", "fkern", 289 "fl", "flig", "fp", "fps", 290 "fschar", "fspacewidth", "fspecial", "ftr", 291 "fzoom", "gcolor", "hc", "hcode", 292 "hidechar", "hla", "hlm", "hpf", 293 "hpfa", "hpfcode", "hw", "hy", 294 "hylang", "hylen", "hym", "hypp", 295 "hys", "ie", "if", "ig", 296 "index", "it", "itc", "IX", 297 "kern", "kernafter", "kernbefore", "kernpair", 298 "lc", "lc_ctype", "lds", "length", 299 "letadj", "lf", "lg", "lhang", 300 "linetabs", "lnr", "lnrf", "lpfx", 301 "ls", "lsm", "lt", 302 "mediasize", "minss", "mk", "mso", 303 "na", "ne", "nh", "nhychar", 304 "nm", "nn", "nop", "nr", 305 "nrf", "nroff", "ns", "nx", 306 "open", "opena", "os", "output", 307 "padj", "papersize", "pc", "pev", 308 "pi", "PI", "pl", "pm", 309 "pn", "pnr", "ps", 310 "psbb", "pshape", "pso", "ptr", 311 "pvs", "rchar", "rd", "recursionlimit", 312 "return", "rfschar", "rhang", 313 "rm", "rn", "rnn", "rr", 314 "rs", "rt", "schar", "sentchar", 315 "shc", "shift", "sizes", "so", 316 "spacewidth", "special", "spreadwarn", "ss", 317 "sty", "substring", "sv", "sy", 318 "T&", "tc", "TE", 319 "TH", "tkf", "tl", 320 "tm", "tm1", "tmc", "tr", 321 "track", "transchar", "trf", "trimat", 322 "trin", "trnt", "troff", "TS", 323 "uf", "ul", "unformat", "unwatch", 324 "unwatchn", "vpt", "vs", "warn", 325 "warnscale", "watch", "watchlength", "watchn", 326 "wh", "while", "write", "writec", 327 "writem", "xflag", ".", NULL, 328 NULL, "text", 329 "Dd", "Dt", "Os", "Sh", 330 "Ss", "Pp", "D1", "Dl", 331 "Bd", "Ed", "Bl", "El", 332 "It", "Ad", "An", "Ap", 333 "Ar", "Cd", "Cm", "Dv", 334 "Er", "Ev", "Ex", "Fa", 335 "Fd", "Fl", "Fn", "Ft", 336 "Ic", "In", "Li", "Nd", 337 "Nm", "Op", "Ot", "Pa", 338 "Rv", "St", "Va", "Vt", 339 "Xr", "%A", "%B", "%D", 340 "%I", "%J", "%N", "%O", 341 "%P", "%R", "%T", "%V", 342 "Ac", "Ao", "Aq", "At", 343 "Bc", "Bf", "Bo", "Bq", 344 "Bsx", "Bx", "Db", "Dc", 345 "Do", "Dq", "Ec", "Ef", 346 "Em", "Eo", "Fx", "Ms", 347 "No", "Ns", "Nx", "Ox", 348 "Pc", "Pf", "Po", "Pq", 349 "Qc", "Ql", "Qo", "Qq", 350 "Re", "Rs", "Sc", "So", 351 "Sq", "Sm", "Sx", "Sy", 352 "Tn", "Ux", "Xc", "Xo", 353 "Fo", "Fc", "Oo", "Oc", 354 "Bk", "Ek", "Bt", "Hf", 355 "Fr", "Ud", "Lb", "Lp", 356 "Lk", "Mt", "Brq", "Bro", 357 "Brc", "%C", "Es", "En", 358 "Dx", "%Q", "%U", "Ta", 359 "Tg", NULL, 360 "TH", "SH", "SS", "TP", 361 "TQ", 362 "LP", "PP", "P", "IP", 363 "HP", "SM", "SB", "BI", 364 "IB", "BR", "RB", "R", 365 "B", "I", "IR", "RI", 366 "RE", "RS", "DT", "UC", 367 "PD", "AT", "in", 368 "SY", "YS", "OP", 369 "EX", "EE", "UR", 370 "UE", "MT", "ME", "MR", 371 NULL 372 }; 373 const char *const *roff_name = __roff_name; 374 375 static struct roffmac roffs[TOKEN_NONE] = { 376 { roff_noarg, NULL, NULL, 0 }, /* br */ 377 { roff_onearg, NULL, NULL, 0 }, /* ce */ 378 { roff_noarg, NULL, NULL, 0 }, /* fi */ 379 { roff_onearg, NULL, NULL, 0 }, /* ft */ 380 { roff_onearg, NULL, NULL, 0 }, /* ll */ 381 { roff_mc, NULL, NULL, 0 }, /* mc */ 382 { roff_noarg, NULL, NULL, 0 }, /* nf */ 383 { roff_onearg, NULL, NULL, 0 }, /* po */ 384 { roff_onearg, NULL, NULL, 0 }, /* rj */ 385 { roff_onearg, NULL, NULL, 0 }, /* sp */ 386 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 387 { roff_onearg, NULL, NULL, 0 }, /* ti */ 388 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 389 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 390 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 391 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 392 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 393 { roff_als, NULL, NULL, 0 }, /* als */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 398 { roff_ds, NULL, NULL, 0 }, /* as */ 399 { roff_ds, NULL, NULL, 0 }, /* as1 */ 400 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 402 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 403 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 404 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 405 { roff_unsupp, NULL, NULL, 0 }, /* box */ 406 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 407 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 408 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 409 { roff_break, NULL, NULL, 0 }, /* break */ 410 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 411 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 412 { roff_noarg, NULL, NULL, 0 }, /* brp */ 413 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 414 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 415 { roff_cc, NULL, NULL, 0 }, /* cc */ 416 { roff_insec, NULL, NULL, 0 }, /* cf */ 417 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 418 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 419 { roff_char, NULL, NULL, 0 }, /* char */ 420 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 422 { roff_insec, NULL, NULL, 0 }, /* close */ 423 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 425 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 426 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 429 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 431 { roff_unsupp, NULL, NULL, 0 }, /* da */ 432 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 433 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 434 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 436 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 437 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 439 { roff_unsupp, NULL, NULL, 0 }, /* device */ 440 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 441 { roff_unsupp, NULL, NULL, 0 }, /* di */ 442 { roff_unsupp, NULL, NULL, 0 }, /* do */ 443 { roff_ds, NULL, NULL, 0 }, /* ds */ 444 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 445 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 446 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 447 { roff_ec, NULL, NULL, 0 }, /* ec */ 448 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 449 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 450 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 451 { roff_unsupp, NULL, NULL, 0 }, /* em */ 452 { roff_EN, NULL, NULL, 0 }, /* EN */ 453 { roff_eo, NULL, NULL, 0 }, /* eo */ 454 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 455 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 456 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 457 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 458 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 459 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 462 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 463 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 472 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 491 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 492 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 493 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 495 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 496 { roff_unsupp, NULL, NULL, 0 }, /* index */ 497 { roff_it, NULL, NULL, 0 }, /* it */ 498 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 503 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 505 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 506 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 507 { roff_unsupp, NULL, NULL, 0 }, /* length */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 509 { roff_insec, NULL, NULL, 0 }, /* lf */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 511 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 512 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 514 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 515 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 517 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 522 { roff_insec, NULL, NULL, 0 }, /* mso */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 525 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 526 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 527 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 528 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 529 { roff_nop, NULL, NULL, 0 }, /* nop */ 530 { roff_nr, NULL, NULL, 0 }, /* nr */ 531 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 532 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 534 { roff_insec, NULL, NULL, 0 }, /* nx */ 535 { roff_insec, NULL, NULL, 0 }, /* open */ 536 { roff_insec, NULL, NULL, 0 }, /* opena */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 538 { roff_unsupp, NULL, NULL, 0 }, /* output */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 543 { roff_insec, NULL, NULL, 0 }, /* pi */ 544 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 549 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 550 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 551 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 552 { roff_insec, NULL, NULL, 0 }, /* pso */ 553 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 555 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 556 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 557 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 558 { roff_return, NULL, NULL, 0 }, /* return */ 559 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 560 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 561 { roff_rm, NULL, NULL, 0 }, /* rm */ 562 { roff_rn, NULL, NULL, 0 }, /* rn */ 563 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 564 { roff_rr, NULL, NULL, 0 }, /* rr */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 567 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 570 { roff_shift, NULL, NULL, 0 }, /* shift */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 572 { roff_so, NULL, NULL, 0 }, /* so */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 576 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 578 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 579 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 580 { roff_insec, NULL, NULL, 0 }, /* sy */ 581 { roff_T_, NULL, NULL, 0 }, /* T& */ 582 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 583 { roff_TE, NULL, NULL, 0 }, /* TE */ 584 { roff_Dd, NULL, NULL, 0 }, /* TH */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 586 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 590 { roff_tr, NULL, NULL, 0 }, /* tr */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 593 { roff_insec, NULL, NULL, 0 }, /* trf */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 595 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 596 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 598 { roff_TS, NULL, NULL, 0 }, /* TS */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 601 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 609 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 610 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 611 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 612 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 613 { roff_insec, NULL, NULL, 0 }, /* write */ 614 { roff_insec, NULL, NULL, 0 }, /* writec */ 615 { roff_insec, NULL, NULL, 0 }, /* writem */ 616 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 617 { roff_cblock, NULL, NULL, 0 }, /* . */ 618 { roff_renamed, NULL, NULL, 0 }, 619 { roff_userdef, NULL, NULL, 0 } 620 }; 621 622 /* Array of injected predefined strings. */ 623 #define PREDEFS_MAX 38 624 static const struct predef predefs[PREDEFS_MAX] = { 625 #include "predefs.in" 626 }; 627 628 static int roffce_lines; /* number of input lines to center */ 629 static struct roff_node *roffce_node; /* active request */ 630 static int roffit_lines; /* number of lines to delay */ 631 static char *roffit_macro; /* nil-terminated macro line */ 632 633 634 /* --- request table ------------------------------------------------------ */ 635 636 struct ohash * 637 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 638 { 639 struct ohash *htab; 640 struct roffreq *req; 641 enum roff_tok tok; 642 size_t sz; 643 unsigned int slot; 644 645 htab = mandoc_malloc(sizeof(*htab)); 646 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 647 648 for (tok = mintok; tok < maxtok; tok++) { 649 if (roff_name[tok] == NULL) 650 continue; 651 sz = strlen(roff_name[tok]); 652 req = mandoc_malloc(sizeof(*req) + sz + 1); 653 req->tok = tok; 654 memcpy(req->name, roff_name[tok], sz + 1); 655 slot = ohash_qlookup(htab, req->name); 656 ohash_insert(htab, slot, req); 657 } 658 return htab; 659 } 660 661 void 662 roffhash_free(struct ohash *htab) 663 { 664 struct roffreq *req; 665 unsigned int slot; 666 667 if (htab == NULL) 668 return; 669 for (req = ohash_first(htab, &slot); req != NULL; 670 req = ohash_next(htab, &slot)) 671 free(req); 672 ohash_delete(htab); 673 free(htab); 674 } 675 676 enum roff_tok 677 roffhash_find(struct ohash *htab, const char *name, size_t sz) 678 { 679 struct roffreq *req; 680 const char *end; 681 682 if (sz) { 683 end = name + sz; 684 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 685 } else 686 req = ohash_find(htab, ohash_qlookup(htab, name)); 687 return req == NULL ? TOKEN_NONE : req->tok; 688 } 689 690 /* --- stack of request blocks -------------------------------------------- */ 691 692 /* 693 * Pop the current node off of the stack of roff instructions currently 694 * pending. Return 1 if it is a loop or 0 otherwise. 695 */ 696 static int 697 roffnode_pop(struct roff *r) 698 { 699 struct roffnode *p; 700 int inloop; 701 702 p = r->last; 703 inloop = p->tok == ROFF_while; 704 r->last = p->parent; 705 free(p->name); 706 free(p->end); 707 free(p); 708 return inloop; 709 } 710 711 /* 712 * Push a roff node onto the instruction stack. This must later be 713 * removed with roffnode_pop(). 714 */ 715 static void 716 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 717 int line, int col) 718 { 719 struct roffnode *p; 720 721 p = mandoc_calloc(1, sizeof(struct roffnode)); 722 p->tok = tok; 723 if (name) 724 p->name = mandoc_strdup(name); 725 p->parent = r->last; 726 p->line = line; 727 p->col = col; 728 p->rule = p->parent ? p->parent->rule : 0; 729 730 r->last = p; 731 } 732 733 /* --- roff parser state data management ---------------------------------- */ 734 735 static void 736 roff_free1(struct roff *r) 737 { 738 int i; 739 740 tbl_free(r->first_tbl); 741 r->first_tbl = r->last_tbl = r->tbl = NULL; 742 743 eqn_free(r->last_eqn); 744 r->last_eqn = r->eqn = NULL; 745 746 while (r->mstackpos >= 0) 747 roff_userret(r); 748 749 while (r->last) 750 roffnode_pop(r); 751 752 free (r->rstack); 753 r->rstack = NULL; 754 r->rstacksz = 0; 755 r->rstackpos = -1; 756 757 roff_freereg(r->regtab); 758 r->regtab = NULL; 759 760 roff_freestr(r->strtab); 761 roff_freestr(r->rentab); 762 roff_freestr(r->xmbtab); 763 r->strtab = r->rentab = r->xmbtab = NULL; 764 765 if (r->xtab) 766 for (i = 0; i < 128; i++) 767 free(r->xtab[i].p); 768 free(r->xtab); 769 r->xtab = NULL; 770 } 771 772 void 773 roff_reset(struct roff *r) 774 { 775 roff_free1(r); 776 r->options |= MPARSE_COMMENT; 777 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 778 r->control = '\0'; 779 r->escape = '\\'; 780 roffce_lines = 0; 781 roffce_node = NULL; 782 roffit_lines = 0; 783 roffit_macro = NULL; 784 } 785 786 void 787 roff_free(struct roff *r) 788 { 789 int i; 790 791 roff_free1(r); 792 for (i = 0; i < r->mstacksz; i++) 793 free(r->mstack[i].argv); 794 free(r->mstack); 795 roffhash_free(r->reqtab); 796 free(r); 797 } 798 799 struct roff * 800 roff_alloc(int options) 801 { 802 struct roff *r; 803 804 r = mandoc_calloc(1, sizeof(struct roff)); 805 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 806 r->options = options | MPARSE_COMMENT; 807 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 808 r->mstackpos = -1; 809 r->rstackpos = -1; 810 r->escape = '\\'; 811 return r; 812 } 813 814 /* --- syntax tree state data management ---------------------------------- */ 815 816 static void 817 roff_man_free1(struct roff_man *man) 818 { 819 if (man->meta.first != NULL) 820 roff_node_delete(man, man->meta.first); 821 free(man->meta.msec); 822 free(man->meta.vol); 823 free(man->meta.os); 824 free(man->meta.arch); 825 free(man->meta.title); 826 free(man->meta.name); 827 free(man->meta.date); 828 free(man->meta.sodest); 829 } 830 831 void 832 roff_state_reset(struct roff_man *man) 833 { 834 man->last = man->meta.first; 835 man->last_es = NULL; 836 man->flags = 0; 837 man->lastsec = man->lastnamed = SEC_NONE; 838 man->next = ROFF_NEXT_CHILD; 839 roff_setreg(man->roff, "nS", 0, '='); 840 } 841 842 static void 843 roff_man_alloc1(struct roff_man *man) 844 { 845 memset(&man->meta, 0, sizeof(man->meta)); 846 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 847 man->meta.first->type = ROFFT_ROOT; 848 man->meta.macroset = MACROSET_NONE; 849 roff_state_reset(man); 850 } 851 852 void 853 roff_man_reset(struct roff_man *man) 854 { 855 roff_man_free1(man); 856 roff_man_alloc1(man); 857 } 858 859 void 860 roff_man_free(struct roff_man *man) 861 { 862 roff_man_free1(man); 863 free(man->os_r); 864 free(man); 865 } 866 867 struct roff_man * 868 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 869 { 870 struct roff_man *man; 871 872 man = mandoc_calloc(1, sizeof(*man)); 873 man->roff = roff; 874 man->os_s = os_s; 875 man->quick = quick; 876 roff_man_alloc1(man); 877 roff->man = man; 878 return man; 879 } 880 881 /* --- syntax tree handling ----------------------------------------------- */ 882 883 struct roff_node * 884 roff_node_alloc(struct roff_man *man, int line, int pos, 885 enum roff_type type, int tok) 886 { 887 struct roff_node *n; 888 889 n = mandoc_calloc(1, sizeof(*n)); 890 n->line = line; 891 n->pos = pos; 892 n->tok = tok; 893 n->type = type; 894 n->sec = man->lastsec; 895 896 if (man->flags & MDOC_SYNOPSIS) 897 n->flags |= NODE_SYNPRETTY; 898 else 899 n->flags &= ~NODE_SYNPRETTY; 900 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 901 n->flags |= NODE_NOFILL; 902 else 903 n->flags &= ~NODE_NOFILL; 904 if (man->flags & MDOC_NEWLINE) 905 n->flags |= NODE_LINE; 906 man->flags &= ~MDOC_NEWLINE; 907 908 return n; 909 } 910 911 void 912 roff_node_append(struct roff_man *man, struct roff_node *n) 913 { 914 915 switch (man->next) { 916 case ROFF_NEXT_SIBLING: 917 if (man->last->next != NULL) { 918 n->next = man->last->next; 919 man->last->next->prev = n; 920 } else 921 man->last->parent->last = n; 922 man->last->next = n; 923 n->prev = man->last; 924 n->parent = man->last->parent; 925 break; 926 case ROFF_NEXT_CHILD: 927 if (man->last->child != NULL) { 928 n->next = man->last->child; 929 man->last->child->prev = n; 930 } else 931 man->last->last = n; 932 man->last->child = n; 933 n->parent = man->last; 934 break; 935 default: 936 abort(); 937 } 938 man->last = n; 939 940 switch (n->type) { 941 case ROFFT_HEAD: 942 n->parent->head = n; 943 break; 944 case ROFFT_BODY: 945 if (n->end != ENDBODY_NOT) 946 return; 947 n->parent->body = n; 948 break; 949 case ROFFT_TAIL: 950 n->parent->tail = n; 951 break; 952 default: 953 return; 954 } 955 956 /* 957 * Copy over the normalised-data pointer of our parent. Not 958 * everybody has one, but copying a null pointer is fine. 959 */ 960 961 n->norm = n->parent->norm; 962 assert(n->parent->type == ROFFT_BLOCK); 963 } 964 965 void 966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 967 { 968 struct roff_node *n; 969 970 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 971 n->string = roff_strdup(man->roff, word); 972 roff_node_append(man, n); 973 n->flags |= NODE_VALID | NODE_ENDED; 974 man->next = ROFF_NEXT_SIBLING; 975 } 976 977 void 978 roff_word_append(struct roff_man *man, const char *word) 979 { 980 struct roff_node *n; 981 char *addstr, *newstr; 982 983 n = man->last; 984 addstr = roff_strdup(man->roff, word); 985 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 986 free(addstr); 987 free(n->string); 988 n->string = newstr; 989 man->next = ROFF_NEXT_SIBLING; 990 } 991 992 void 993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 994 { 995 struct roff_node *n; 996 997 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 998 roff_node_append(man, n); 999 man->next = ROFF_NEXT_CHILD; 1000 } 1001 1002 struct roff_node * 1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1004 { 1005 struct roff_node *n; 1006 1007 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1008 roff_node_append(man, n); 1009 man->next = ROFF_NEXT_CHILD; 1010 return n; 1011 } 1012 1013 struct roff_node * 1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1015 { 1016 struct roff_node *n; 1017 1018 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1019 roff_node_append(man, n); 1020 man->next = ROFF_NEXT_CHILD; 1021 return n; 1022 } 1023 1024 struct roff_node * 1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1026 { 1027 struct roff_node *n; 1028 1029 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1030 roff_node_append(man, n); 1031 man->next = ROFF_NEXT_CHILD; 1032 return n; 1033 } 1034 1035 static void 1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1037 { 1038 struct roff_node *n; 1039 struct tbl_span *span; 1040 1041 if (man->meta.macroset == MACROSET_MAN) 1042 man_breakscope(man, ROFF_TS); 1043 while ((span = tbl_span(tbl)) != NULL) { 1044 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1045 n->span = span; 1046 roff_node_append(man, n); 1047 n->flags |= NODE_VALID | NODE_ENDED; 1048 man->next = ROFF_NEXT_SIBLING; 1049 } 1050 } 1051 1052 void 1053 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1054 { 1055 1056 /* Adjust siblings. */ 1057 1058 if (n->prev) 1059 n->prev->next = n->next; 1060 if (n->next) 1061 n->next->prev = n->prev; 1062 1063 /* Adjust parent. */ 1064 1065 if (n->parent != NULL) { 1066 if (n->parent->child == n) 1067 n->parent->child = n->next; 1068 if (n->parent->last == n) 1069 n->parent->last = n->prev; 1070 } 1071 1072 /* Adjust parse point. */ 1073 1074 if (man == NULL) 1075 return; 1076 if (man->last == n) { 1077 if (n->prev == NULL) { 1078 man->last = n->parent; 1079 man->next = ROFF_NEXT_CHILD; 1080 } else { 1081 man->last = n->prev; 1082 man->next = ROFF_NEXT_SIBLING; 1083 } 1084 } 1085 if (man->meta.first == n) 1086 man->meta.first = NULL; 1087 } 1088 1089 void 1090 roff_node_relink(struct roff_man *man, struct roff_node *n) 1091 { 1092 roff_node_unlink(man, n); 1093 n->prev = n->next = NULL; 1094 roff_node_append(man, n); 1095 } 1096 1097 void 1098 roff_node_free(struct roff_node *n) 1099 { 1100 1101 if (n->args != NULL) 1102 mdoc_argv_free(n->args); 1103 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1104 free(n->norm); 1105 eqn_box_free(n->eqn); 1106 free(n->string); 1107 free(n->tag); 1108 free(n); 1109 } 1110 1111 void 1112 roff_node_delete(struct roff_man *man, struct roff_node *n) 1113 { 1114 1115 while (n->child != NULL) 1116 roff_node_delete(man, n->child); 1117 roff_node_unlink(man, n); 1118 roff_node_free(n); 1119 } 1120 1121 int 1122 roff_node_transparent(struct roff_node *n) 1123 { 1124 if (n == NULL) 1125 return 0; 1126 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 1127 return 1; 1128 return roff_tok_transparent(n->tok); 1129 } 1130 1131 int 1132 roff_tok_transparent(enum roff_tok tok) 1133 { 1134 switch (tok) { 1135 case ROFF_ft: 1136 case ROFF_ll: 1137 case ROFF_mc: 1138 case ROFF_po: 1139 case ROFF_ta: 1140 case MDOC_Db: 1141 case MDOC_Es: 1142 case MDOC_Sm: 1143 case MDOC_Tg: 1144 case MAN_DT: 1145 case MAN_UC: 1146 case MAN_PD: 1147 case MAN_AT: 1148 return 1; 1149 default: 1150 return 0; 1151 } 1152 } 1153 1154 struct roff_node * 1155 roff_node_child(struct roff_node *n) 1156 { 1157 for (n = n->child; roff_node_transparent(n); n = n->next) 1158 continue; 1159 return n; 1160 } 1161 1162 struct roff_node * 1163 roff_node_prev(struct roff_node *n) 1164 { 1165 do { 1166 n = n->prev; 1167 } while (roff_node_transparent(n)); 1168 return n; 1169 } 1170 1171 struct roff_node * 1172 roff_node_next(struct roff_node *n) 1173 { 1174 do { 1175 n = n->next; 1176 } while (roff_node_transparent(n)); 1177 return n; 1178 } 1179 1180 void 1181 deroff(char **dest, const struct roff_node *n) 1182 { 1183 char *cp; 1184 size_t sz; 1185 1186 if (n->string == NULL) { 1187 for (n = n->child; n != NULL; n = n->next) 1188 deroff(dest, n); 1189 return; 1190 } 1191 1192 /* Skip leading whitespace. */ 1193 1194 for (cp = n->string; *cp != '\0'; cp++) { 1195 if (cp[0] == '\\' && cp[1] != '\0' && 1196 strchr(" %&0^|~", cp[1]) != NULL) 1197 cp++; 1198 else if ( ! isspace((unsigned char)*cp)) 1199 break; 1200 } 1201 1202 /* Skip trailing backslash. */ 1203 1204 sz = strlen(cp); 1205 if (sz > 0 && cp[sz - 1] == '\\') 1206 sz--; 1207 1208 /* Skip trailing whitespace. */ 1209 1210 for (; sz; sz--) 1211 if ( ! isspace((unsigned char)cp[sz-1])) 1212 break; 1213 1214 /* Skip empty strings. */ 1215 1216 if (sz == 0) 1217 return; 1218 1219 if (*dest == NULL) { 1220 *dest = mandoc_strndup(cp, sz); 1221 return; 1222 } 1223 1224 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1225 free(*dest); 1226 *dest = cp; 1227 } 1228 1229 /* --- main functions of the roff parser ---------------------------------- */ 1230 1231 /* 1232 * Save comments preceding the title macro, for example in order to 1233 * preserve Copyright and license headers in HTML output, 1234 * provide diagnostics about RCS ids and trailing whitespace in comments, 1235 * then discard comments including preceding whitespace. 1236 * This function also handles input line continuation. 1237 */ 1238 static int 1239 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec) 1240 { 1241 struct roff_node *n; /* used for header comments */ 1242 const char *start; /* start of the string to process */ 1243 const char *cp; /* for RCS id parsing */ 1244 char *stesc; /* start of an escape sequence ('\\') */ 1245 char *ep; /* end of comment string */ 1246 int rcsid; /* kind of RCS id seen */ 1247 1248 for (start = stesc = buf->buf + pos;; stesc++) { 1249 /* 1250 * XXX Ugly hack: Remove the newline character that 1251 * mparse_buf_r() appended to mark the end of input 1252 * if it is not preceded by an escape character. 1253 */ 1254 if (stesc[0] == '\n') { 1255 assert(stesc[1] == '\0'); 1256 stesc[0] = '\0'; 1257 } 1258 1259 /* The line ends without continuation or comment. */ 1260 if (stesc[0] == '\0') 1261 return ROFF_CONT; 1262 1263 /* Unescaped byte: skip it. */ 1264 if (stesc[0] != ec) 1265 continue; 1266 1267 /* 1268 * XXX Ugly hack: Do not attempt to append another line 1269 * if the function mparse_buf_r() appended a newline 1270 * character to indicate the end of input. 1271 */ 1272 if (stesc[1] == '\n') { 1273 assert(stesc[2] == '\0'); 1274 stesc[0] = '\0'; 1275 return ROFF_CONT; 1276 } 1277 1278 /* 1279 * An escape character at the end of an input line 1280 * requests line continuation. 1281 */ 1282 if (stesc[1] == '\0') { 1283 stesc[0] = '\0'; 1284 return ROFF_IGN | ROFF_APPEND; 1285 } 1286 1287 /* Found a comment: process it. */ 1288 if (stesc[1] == '"' || stesc[1] == '#') 1289 break; 1290 1291 /* Escaped escape character: skip them both. */ 1292 if (stesc[1] == ec) 1293 stesc++; 1294 } 1295 1296 /* Look for an RCS id in the comment. */ 1297 1298 rcsid = 0; 1299 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) { 1300 rcsid = 1 << MANDOC_OS_OPENBSD; 1301 cp += 8; 1302 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) { 1303 rcsid = 1 << MANDOC_OS_NETBSD; 1304 cp += 7; 1305 } 1306 if (cp != NULL && isalnum((unsigned char)*cp) == 0 && 1307 strchr(cp, '$') != NULL) { 1308 if (r->man->meta.rcsids & rcsid) 1309 mandoc_msg(MANDOCERR_RCS_REP, ln, 1310 (int)(stesc - buf->buf) + 2, "%s", stesc + 1); 1311 r->man->meta.rcsids |= rcsid; 1312 } 1313 1314 /* Warn about trailing whitespace at the end of the comment. */ 1315 1316 ep = strchr(stesc + 2, '\0') - 1; 1317 if (*ep == '\n') 1318 *ep-- = '\0'; 1319 if (*ep == ' ' || *ep == '\t') 1320 mandoc_msg(MANDOCERR_SPACE_EOL, 1321 ln, (int)(ep - buf->buf), NULL); 1322 1323 /* Save comments preceding the title macro in the syntax tree. */ 1324 1325 if (r->options & MPARSE_COMMENT) { 1326 while (*ep == ' ' || *ep == '\t') 1327 ep--; 1328 ep[1] = '\0'; 1329 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf, 1330 ROFFT_COMMENT, TOKEN_NONE); 1331 n->string = mandoc_strdup(stesc + 2); 1332 roff_node_append(r->man, n); 1333 n->flags |= NODE_VALID | NODE_ENDED; 1334 r->man->next = ROFF_NEXT_SIBLING; 1335 } 1336 1337 /* The comment requests line continuation. */ 1338 1339 if (stesc[1] == '#') { 1340 *stesc = '\0'; 1341 return ROFF_IGN | ROFF_APPEND; 1342 } 1343 1344 /* Discard the comment including preceding whitespace. */ 1345 1346 while (stesc > start && stesc[-1] == ' ' && 1347 (stesc == start + 1 || stesc[-2] != '\\')) 1348 stesc--; 1349 *stesc = '\0'; 1350 return ROFF_CONT; 1351 } 1352 1353 /* 1354 * In the current line, expand escape sequences that produce parsable 1355 * input text. Also check the syntax of the remaining escape sequences, 1356 * which typically produce output glyphs or change formatter state. 1357 */ 1358 static int 1359 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) 1360 { 1361 char ubuf[24]; /* buffer to print a number */ 1362 struct mctx *ctx; /* current macro call context */ 1363 const char *res; /* the string to be pasted */ 1364 const char *src; /* source for copying */ 1365 char *dst; /* destination for copying */ 1366 enum mandoc_esc subtype; /* return value from roff_escape */ 1367 int iesc; /* index of leading escape char */ 1368 int inam; /* index of the escape name */ 1369 int iarg; /* index beginning the argument */ 1370 int iendarg; /* index right after the argument */ 1371 int iend; /* index right after the sequence */ 1372 int isrc, idst; /* to reduce \\ and \. in names */ 1373 int deftype; /* type of definition to paste */ 1374 int argi; /* macro argument index */ 1375 int quote_args; /* true for \\$@, false for \\$* */ 1376 int asz; /* length of the replacement */ 1377 int rsz; /* length of the rest of the string */ 1378 int npos; /* position in numeric expression */ 1379 int expand_count; /* to avoid infinite loops */ 1380 1381 expand_count = 0; 1382 while (buf->buf[pos] != '\0') { 1383 1384 /* 1385 * Skip plain ASCII characters. 1386 * If we have a non-standard escape character, 1387 * escape literal backslashes because all processing in 1388 * subsequent functions uses the standard escaping rules. 1389 */ 1390 1391 if (buf->buf[pos] != ec) { 1392 if (buf->buf[pos] == '\\') { 1393 roff_expand_patch(buf, pos, "\\e", pos + 1); 1394 pos++; 1395 } 1396 pos++; 1397 continue; 1398 } 1399 1400 /* 1401 * Parse escape sequences, 1402 * issue diagnostic messages when appropriate, 1403 * and skip sequences that do not need expansion. 1404 * If we have a non-standard escape character, translate 1405 * it to backslashes and translate backslashes to \e. 1406 */ 1407 1408 if (roff_escape(buf->buf, ln, pos, &iesc, &inam, 1409 &iarg, &iendarg, &iend) != ESCAPE_EXPAND) { 1410 while (pos < iend) { 1411 if (buf->buf[pos] == ec) { 1412 buf->buf[pos] = '\\'; 1413 if (pos + 1 < iend) 1414 pos++; 1415 } else if (buf->buf[pos] == '\\') { 1416 roff_expand_patch(buf, 1417 pos, "\\e", pos + 1); 1418 pos++; 1419 iend++; 1420 } 1421 pos++; 1422 } 1423 continue; 1424 } 1425 1426 /* Reduce \\ and \. in names. */ 1427 1428 if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { 1429 isrc = idst = iarg; 1430 while (isrc < iendarg) { 1431 if (isrc + 1 < iendarg && 1432 buf->buf[isrc] == '\\' && 1433 (buf->buf[isrc + 1] == '\\' || 1434 buf->buf[isrc + 1] == '.')) 1435 isrc++; 1436 buf->buf[idst++] = buf->buf[isrc++]; 1437 } 1438 iendarg -= isrc - idst; 1439 } 1440 1441 /* Handle expansion. */ 1442 1443 res = NULL; 1444 switch (buf->buf[inam]) { 1445 case '*': 1446 if (iendarg == iarg) 1447 break; 1448 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1449 if ((res = roff_getstrn(r, buf->buf + iarg, 1450 iendarg - iarg, &deftype)) != NULL) 1451 break; 1452 1453 /* 1454 * If not overridden, 1455 * let \*(.T through to the formatters. 1456 */ 1457 1458 if (iendarg - iarg == 2 && 1459 buf->buf[iarg] == '.' && 1460 buf->buf[iarg + 1] == 'T') { 1461 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0); 1462 pos = iend; 1463 continue; 1464 } 1465 1466 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc, 1467 "%.*s", iendarg - iarg, buf->buf + iarg); 1468 break; 1469 1470 case '$': 1471 if (r->mstackpos < 0) { 1472 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc, 1473 "%.*s", iend - iesc, buf->buf + iesc); 1474 break; 1475 } 1476 ctx = r->mstack + r->mstackpos; 1477 argi = buf->buf[iarg] - '1'; 1478 if (argi >= 0 && argi <= 8) { 1479 if (argi < ctx->argc) 1480 res = ctx->argv[argi]; 1481 break; 1482 } 1483 if (buf->buf[iarg] == '*') 1484 quote_args = 0; 1485 else if (buf->buf[iarg] == '@') 1486 quote_args = 1; 1487 else { 1488 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc, 1489 "%.*s", iend - iesc, buf->buf + iesc); 1490 break; 1491 } 1492 asz = 0; 1493 for (argi = 0; argi < ctx->argc; argi++) { 1494 if (argi) 1495 asz++; /* blank */ 1496 if (quote_args) 1497 asz += 2; /* quotes */ 1498 asz += strlen(ctx->argv[argi]); 1499 } 1500 if (asz != iend - iesc) { 1501 rsz = buf->sz - iend; 1502 if (asz < iend - iesc) 1503 memmove(buf->buf + iesc + asz, 1504 buf->buf + iend, rsz); 1505 buf->sz = iesc + asz + rsz; 1506 buf->buf = mandoc_realloc(buf->buf, buf->sz); 1507 if (asz > iend - iesc) 1508 memmove(buf->buf + iesc + asz, 1509 buf->buf + iend, rsz); 1510 } 1511 dst = buf->buf + iesc; 1512 for (argi = 0; argi < ctx->argc; argi++) { 1513 if (argi) 1514 *dst++ = ' '; 1515 if (quote_args) 1516 *dst++ = '"'; 1517 src = ctx->argv[argi]; 1518 while (*src != '\0') 1519 *dst++ = *src++; 1520 if (quote_args) 1521 *dst++ = '"'; 1522 } 1523 continue; 1524 case 'A': 1525 ubuf[0] = iendarg > iarg ? '1' : '0'; 1526 ubuf[1] = '\0'; 1527 res = ubuf; 1528 break; 1529 case 'B': 1530 npos = 0; 1531 ubuf[0] = iendarg > iarg && iend > iendarg && 1532 roff_evalnum(r, ln, buf->buf + iarg, &npos, 1533 NULL, ROFFNUM_SCALE) && 1534 npos == iendarg - iarg ? '1' : '0'; 1535 ubuf[1] = '\0'; 1536 res = ubuf; 1537 break; 1538 case 'V': 1539 mandoc_msg(MANDOCERR_UNSUPP, ln, iesc, 1540 "%.*s", iend - iesc, buf->buf + iesc); 1541 roff_expand_patch(buf, iendarg, "}", iend); 1542 roff_expand_patch(buf, iesc, "${", iarg); 1543 continue; 1544 case 'g': 1545 break; 1546 case 'n': 1547 if (iendarg > iarg) 1548 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1549 roff_getregn(r, buf->buf + iarg, 1550 iendarg - iarg, buf->buf[inam + 1])); 1551 else 1552 ubuf[0] = '\0'; 1553 res = ubuf; 1554 break; 1555 case 'w': 1556 rsz = 0; 1557 subtype = ESCAPE_UNDEF; 1558 while (iarg < iendarg) { 1559 asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1; 1560 if (buf->buf[iarg] != '\\') { 1561 rsz += asz; 1562 iarg++; 1563 continue; 1564 } 1565 switch ((subtype = roff_escape(buf->buf, 0, 1566 iarg, NULL, NULL, NULL, NULL, &iarg))) { 1567 case ESCAPE_SPECIAL: 1568 case ESCAPE_NUMBERED: 1569 case ESCAPE_UNICODE: 1570 case ESCAPE_OVERSTRIKE: 1571 case ESCAPE_UNDEF: 1572 break; 1573 case ESCAPE_DEVICE: 1574 asz *= 8; 1575 break; 1576 case ESCAPE_EXPAND: 1577 abort(); 1578 default: 1579 continue; 1580 } 1581 rsz += asz; 1582 } 1583 (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24); 1584 res = ubuf; 1585 break; 1586 default: 1587 break; 1588 } 1589 if (res == NULL) 1590 res = ""; 1591 if (++expand_count > EXPAND_LIMIT || 1592 buf->sz + strlen(res) > SHRT_MAX) { 1593 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL); 1594 return ROFF_IGN; 1595 } 1596 roff_expand_patch(buf, iesc, res, iend); 1597 } 1598 return ROFF_CONT; 1599 } 1600 1601 /* 1602 * Replace the substring from the start position (inclusive) 1603 * to end position (exclusive) with the repl(acement) string. 1604 */ 1605 static void 1606 roff_expand_patch(struct buf *buf, int start, const char *repl, int end) 1607 { 1608 char *nbuf; 1609 1610 buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf, 1611 repl, buf->buf + end) + 1; 1612 free(buf->buf); 1613 buf->buf = nbuf; 1614 } 1615 1616 /* 1617 * Parse a quoted or unquoted roff-style request or macro argument. 1618 * Return a pointer to the parsed argument, which is either the original 1619 * pointer or advanced by one byte in case the argument is quoted. 1620 * NUL-terminate the argument in place. 1621 * Collapse pairs of quotes inside quoted arguments. 1622 * Advance the argument pointer to the next argument, 1623 * or to the NUL byte terminating the argument line. 1624 */ 1625 char * 1626 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1627 { 1628 struct buf buf; 1629 char *cp, *start; 1630 int newesc, pairs, quoted, white; 1631 1632 /* Quoting can only start with a new word. */ 1633 start = *cpp; 1634 quoted = 0; 1635 if ('"' == *start) { 1636 quoted = 1; 1637 start++; 1638 } 1639 1640 newesc = pairs = white = 0; 1641 for (cp = start; '\0' != *cp; cp++) { 1642 1643 /* 1644 * Move the following text left 1645 * after quoted quotes and after "\\" and "\t". 1646 */ 1647 if (pairs) 1648 cp[-pairs] = cp[0]; 1649 1650 if ('\\' == cp[0]) { 1651 /* 1652 * In copy mode, translate double to single 1653 * backslashes and backslash-t to literal tabs. 1654 */ 1655 switch (cp[1]) { 1656 case 'a': 1657 case 't': 1658 cp[-pairs] = '\t'; 1659 pairs++; 1660 cp++; 1661 break; 1662 case '\\': 1663 cp[-pairs] = '\\'; 1664 newesc = 1; 1665 pairs++; 1666 cp++; 1667 break; 1668 case ' ': 1669 /* Skip escaped blanks. */ 1670 if (0 == quoted) 1671 cp++; 1672 break; 1673 default: 1674 break; 1675 } 1676 } else if (0 == quoted) { 1677 if (' ' == cp[0]) { 1678 /* Unescaped blanks end unquoted args. */ 1679 white = 1; 1680 break; 1681 } 1682 } else if ('"' == cp[0]) { 1683 if ('"' == cp[1]) { 1684 /* Quoted quotes collapse. */ 1685 pairs++; 1686 cp++; 1687 } else { 1688 /* Unquoted quotes end quoted args. */ 1689 quoted = 2; 1690 break; 1691 } 1692 } 1693 } 1694 1695 /* Quoted argument without a closing quote. */ 1696 if (1 == quoted) 1697 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1698 1699 /* NUL-terminate this argument and move to the next one. */ 1700 if (pairs) 1701 cp[-pairs] = '\0'; 1702 if ('\0' != *cp) { 1703 *cp++ = '\0'; 1704 while (' ' == *cp) 1705 cp++; 1706 } 1707 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1708 *cpp = cp; 1709 1710 if ('\0' == *cp && (white || ' ' == cp[-1])) 1711 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1712 1713 start = mandoc_strdup(start); 1714 if (newesc == 0) 1715 return start; 1716 1717 buf.buf = start; 1718 buf.sz = strlen(start) + 1; 1719 buf.next = NULL; 1720 if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) { 1721 free(buf.buf); 1722 buf.buf = mandoc_strdup(""); 1723 } 1724 return buf.buf; 1725 } 1726 1727 1728 /* 1729 * Process text streams. 1730 */ 1731 static int 1732 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1733 { 1734 size_t sz; 1735 const char *start; 1736 char *p; 1737 int isz; 1738 enum mandoc_esc esc; 1739 1740 /* Spring the input line trap. */ 1741 1742 if (roffit_lines == 1) { 1743 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1744 free(buf->buf); 1745 buf->buf = p; 1746 buf->sz = isz + 1; 1747 *offs = 0; 1748 free(roffit_macro); 1749 roffit_lines = 0; 1750 return ROFF_REPARSE; 1751 } else if (roffit_lines > 1) 1752 --roffit_lines; 1753 1754 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1755 if (roffce_lines < 1) { 1756 r->man->last = roffce_node; 1757 r->man->next = ROFF_NEXT_SIBLING; 1758 roffce_lines = 0; 1759 roffce_node = NULL; 1760 } else 1761 roffce_lines--; 1762 } 1763 1764 /* Convert all breakable hyphens into ASCII_HYPH. */ 1765 1766 start = p = buf->buf + pos; 1767 1768 while (*p != '\0') { 1769 sz = strcspn(p, "-\\"); 1770 p += sz; 1771 1772 if (*p == '\0') 1773 break; 1774 1775 if (*p == '\\') { 1776 /* Skip over escapes. */ 1777 p++; 1778 esc = mandoc_escape((const char **)&p, NULL, NULL); 1779 if (esc == ESCAPE_ERROR) 1780 break; 1781 while (*p == '-') 1782 p++; 1783 continue; 1784 } else if (p == start) { 1785 p++; 1786 continue; 1787 } 1788 1789 if (isalpha((unsigned char)p[-1]) && 1790 isalpha((unsigned char)p[1])) 1791 *p = ASCII_HYPH; 1792 p++; 1793 } 1794 return ROFF_CONT; 1795 } 1796 1797 int 1798 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len) 1799 { 1800 enum roff_tok t; 1801 int e; 1802 int pos; /* parse point */ 1803 int spos; /* saved parse point for messages */ 1804 int ppos; /* original offset in buf->buf */ 1805 int ctl; /* macro line (boolean) */ 1806 1807 ppos = pos = *offs; 1808 1809 if (len > 80 && r->tbl == NULL && r->eqn == NULL && 1810 (r->man->flags & ROFF_NOFILL) == 0 && 1811 strchr(" .\\", buf->buf[pos]) == NULL && 1812 buf->buf[pos] != r->control && 1813 strcspn(buf->buf, " ") < 80) 1814 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1, 1815 "%.20s...", buf->buf + pos); 1816 1817 /* Handle in-line equation delimiters. */ 1818 1819 if (r->tbl == NULL && 1820 r->last_eqn != NULL && r->last_eqn->delim && 1821 (r->eqn == NULL || r->eqn_inline)) { 1822 e = roff_eqndelim(r, buf, pos); 1823 if (e == ROFF_REPARSE) 1824 return e; 1825 assert(e == ROFF_CONT); 1826 } 1827 1828 /* Handle comments and escape sequences. */ 1829 1830 e = roff_parse_comment(r, buf, ln, pos, r->escape); 1831 if ((e & ROFF_MASK) == ROFF_IGN) 1832 return e; 1833 assert(e == ROFF_CONT); 1834 1835 e = roff_expand(r, buf, ln, pos, r->escape); 1836 if ((e & ROFF_MASK) == ROFF_IGN) 1837 return e; 1838 assert(e == ROFF_CONT); 1839 1840 ctl = roff_getcontrol(r, buf->buf, &pos); 1841 1842 /* 1843 * First, if a scope is open and we're not a macro, pass the 1844 * text through the macro's filter. 1845 * Equations process all content themselves. 1846 * Tables process almost all content themselves, but we want 1847 * to warn about macros before passing it there. 1848 */ 1849 1850 if (r->last != NULL && ! ctl) { 1851 t = r->last->tok; 1852 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1853 if ((e & ROFF_MASK) == ROFF_IGN) 1854 return e; 1855 e &= ~ROFF_MASK; 1856 } else 1857 e = ROFF_IGN; 1858 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1859 eqn_read(r->eqn, buf->buf + ppos); 1860 return e; 1861 } 1862 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1863 tbl_read(r->tbl, ln, buf->buf, ppos); 1864 roff_addtbl(r->man, ln, r->tbl); 1865 return e; 1866 } 1867 if ( ! ctl) { 1868 r->options &= ~MPARSE_COMMENT; 1869 return roff_parsetext(r, buf, pos, offs) | e; 1870 } 1871 1872 /* Skip empty request lines. */ 1873 1874 if (buf->buf[pos] == '"') { 1875 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1876 return ROFF_IGN; 1877 } else if (buf->buf[pos] == '\0') 1878 return ROFF_IGN; 1879 1880 /* 1881 * If a scope is open, go to the child handler for that macro, 1882 * as it may want to preprocess before doing anything with it. 1883 */ 1884 1885 if (r->last) { 1886 t = r->last->tok; 1887 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1888 } 1889 1890 r->options &= ~MPARSE_COMMENT; 1891 spos = pos; 1892 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1893 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 1894 } 1895 1896 /* 1897 * Handle a new request or macro. 1898 * May be called outside any scope or from inside a conditional scope. 1899 */ 1900 static int 1901 roff_req_or_macro(ROFF_ARGS) { 1902 1903 /* For now, tables ignore most macros and some request. */ 1904 1905 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS || 1906 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj || 1907 tok == ROFF_sp)) { 1908 mandoc_msg(MANDOCERR_TBLMACRO, 1909 ln, ppos, "%s", buf->buf + ppos); 1910 if (tok != TOKEN_NONE) 1911 return ROFF_IGN; 1912 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1913 pos++; 1914 while (buf->buf[pos] == ' ') 1915 pos++; 1916 tbl_read(r->tbl, ln, buf->buf, pos); 1917 roff_addtbl(r->man, ln, r->tbl); 1918 return ROFF_IGN; 1919 } 1920 1921 /* For now, let high level macros abort .ce mode. */ 1922 1923 if (roffce_node != NULL && 1924 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ || 1925 tok == ROFF_TH || tok == ROFF_TS)) { 1926 r->man->last = roffce_node; 1927 r->man->next = ROFF_NEXT_SIBLING; 1928 roffce_lines = 0; 1929 roffce_node = NULL; 1930 } 1931 1932 /* 1933 * This is neither a roff request nor a user-defined macro. 1934 * Let the standard macro set parsers handle it. 1935 */ 1936 1937 if (tok == TOKEN_NONE) 1938 return ROFF_CONT; 1939 1940 /* Execute a roff request or a user-defined macro. */ 1941 1942 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs); 1943 } 1944 1945 /* 1946 * Internal interface function to tell the roff parser that execution 1947 * of the current macro ended. This is required because macro 1948 * definitions usually do not end with a .return request. 1949 */ 1950 void 1951 roff_userret(struct roff *r) 1952 { 1953 struct mctx *ctx; 1954 int i; 1955 1956 assert(r->mstackpos >= 0); 1957 ctx = r->mstack + r->mstackpos; 1958 for (i = 0; i < ctx->argc; i++) 1959 free(ctx->argv[i]); 1960 ctx->argc = 0; 1961 r->mstackpos--; 1962 } 1963 1964 void 1965 roff_endparse(struct roff *r) 1966 { 1967 if (r->last != NULL) 1968 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1969 r->last->col, "%s", roff_name[r->last->tok]); 1970 1971 if (r->eqn != NULL) { 1972 mandoc_msg(MANDOCERR_BLK_NOEND, 1973 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1974 eqn_parse(r->eqn); 1975 r->eqn = NULL; 1976 } 1977 1978 if (r->tbl != NULL) { 1979 tbl_end(r->tbl, 1); 1980 r->tbl = NULL; 1981 } 1982 } 1983 1984 /* 1985 * Parse the request or macro name at buf[*pos]. 1986 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value. 1987 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE. 1988 * As a side effect, set r->current_string to the definition or to NULL. 1989 */ 1990 static enum roff_tok 1991 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1992 { 1993 char *cp; 1994 const char *mac; 1995 size_t maclen; 1996 int deftype; 1997 enum roff_tok t; 1998 1999 cp = buf + *pos; 2000 2001 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 2002 return TOKEN_NONE; 2003 2004 mac = cp; 2005 maclen = roff_getname(r, &cp, ln, ppos); 2006 2007 deftype = ROFFDEF_USER | ROFFDEF_REN; 2008 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 2009 switch (deftype) { 2010 case ROFFDEF_USER: 2011 t = ROFF_USERDEF; 2012 break; 2013 case ROFFDEF_REN: 2014 t = ROFF_RENAMED; 2015 break; 2016 default: 2017 t = roffhash_find(r->reqtab, mac, maclen); 2018 break; 2019 } 2020 if (t != TOKEN_NONE) 2021 *pos = cp - buf; 2022 else if (deftype == ROFFDEF_UNDEF) { 2023 /* Using an undefined macro defines it to be empty. */ 2024 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 2025 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 2026 } 2027 return t; 2028 } 2029 2030 /* --- handling of request blocks ----------------------------------------- */ 2031 2032 /* 2033 * Close a macro definition block or an "ignore" block. 2034 */ 2035 static int 2036 roff_cblock(ROFF_ARGS) 2037 { 2038 int rr; 2039 2040 if (r->last == NULL) { 2041 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2042 return ROFF_IGN; 2043 } 2044 2045 switch (r->last->tok) { 2046 case ROFF_am: 2047 case ROFF_ami: 2048 case ROFF_de: 2049 case ROFF_dei: 2050 case ROFF_ig: 2051 break; 2052 case ROFF_am1: 2053 case ROFF_de1: 2054 /* Remapped in roff_block(). */ 2055 abort(); 2056 default: 2057 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 2058 return ROFF_IGN; 2059 } 2060 2061 roffnode_pop(r); 2062 roffnode_cleanscope(r); 2063 2064 /* 2065 * If a conditional block with braces is still open, 2066 * check for "\}" block end markers. 2067 */ 2068 2069 if (r->last != NULL && r->last->endspan < 0) { 2070 rr = 1; /* If arguments follow "\}", warn about them. */ 2071 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2072 } 2073 2074 if (buf->buf[pos] != '\0') 2075 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2076 ".. %s", buf->buf + pos); 2077 2078 return ROFF_IGN; 2079 } 2080 2081 /* 2082 * Pop all nodes ending at the end of the current input line. 2083 * Return the number of loops ended. 2084 */ 2085 static int 2086 roffnode_cleanscope(struct roff *r) 2087 { 2088 int inloop; 2089 2090 inloop = 0; 2091 while (r->last != NULL && r->last->endspan > 0) { 2092 if (--r->last->endspan != 0) 2093 break; 2094 inloop += roffnode_pop(r); 2095 } 2096 return inloop; 2097 } 2098 2099 /* 2100 * Handle the closing "\}" of a conditional block. 2101 * Apart from generating warnings, this only pops nodes. 2102 * Return the number of loops ended. 2103 */ 2104 static int 2105 roff_ccond(struct roff *r, int ln, int ppos) 2106 { 2107 if (NULL == r->last) { 2108 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2109 return 0; 2110 } 2111 2112 switch (r->last->tok) { 2113 case ROFF_el: 2114 case ROFF_ie: 2115 case ROFF_if: 2116 case ROFF_while: 2117 break; 2118 default: 2119 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2120 return 0; 2121 } 2122 2123 if (r->last->endspan > -1) { 2124 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2125 return 0; 2126 } 2127 2128 return roffnode_pop(r) + roffnode_cleanscope(r); 2129 } 2130 2131 static int 2132 roff_block(ROFF_ARGS) 2133 { 2134 const char *name, *value; 2135 char *call, *cp, *iname, *rname; 2136 size_t csz, namesz, rsz; 2137 int deftype; 2138 2139 /* Ignore groff compatibility mode for now. */ 2140 2141 if (tok == ROFF_de1) 2142 tok = ROFF_de; 2143 else if (tok == ROFF_dei1) 2144 tok = ROFF_dei; 2145 else if (tok == ROFF_am1) 2146 tok = ROFF_am; 2147 else if (tok == ROFF_ami1) 2148 tok = ROFF_ami; 2149 2150 /* Parse the macro name argument. */ 2151 2152 cp = buf->buf + pos; 2153 if (tok == ROFF_ig) { 2154 iname = NULL; 2155 namesz = 0; 2156 } else { 2157 iname = cp; 2158 namesz = roff_getname(r, &cp, ln, ppos); 2159 iname[namesz] = '\0'; 2160 } 2161 2162 /* Resolve the macro name argument if it is indirect. */ 2163 2164 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2165 deftype = ROFFDEF_USER; 2166 name = roff_getstrn(r, iname, namesz, &deftype); 2167 if (name == NULL) { 2168 mandoc_msg(MANDOCERR_STR_UNDEF, 2169 ln, (int)(iname - buf->buf), 2170 "%.*s", (int)namesz, iname); 2171 namesz = 0; 2172 } else 2173 namesz = strlen(name); 2174 } else 2175 name = iname; 2176 2177 if (namesz == 0 && tok != ROFF_ig) { 2178 mandoc_msg(MANDOCERR_REQ_EMPTY, 2179 ln, ppos, "%s", roff_name[tok]); 2180 return ROFF_IGN; 2181 } 2182 2183 roffnode_push(r, tok, name, ln, ppos); 2184 2185 /* 2186 * At the beginning of a `de' macro, clear the existing string 2187 * with the same name, if there is one. New content will be 2188 * appended from roff_block_text() in multiline mode. 2189 */ 2190 2191 if (tok == ROFF_de || tok == ROFF_dei) { 2192 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2193 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2194 } else if (tok == ROFF_am || tok == ROFF_ami) { 2195 deftype = ROFFDEF_ANY; 2196 value = roff_getstrn(r, iname, namesz, &deftype); 2197 switch (deftype) { /* Before appending, ... */ 2198 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2199 roff_setstrn(&r->strtab, name, namesz, 2200 value, strlen(value), 0); 2201 break; 2202 case ROFFDEF_REN: /* call original standard macro. */ 2203 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2204 (int)strlen(value), value); 2205 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2206 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2207 free(call); 2208 break; 2209 case ROFFDEF_STD: /* rename and call standard macro. */ 2210 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2211 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2212 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2213 (int)rsz, rname); 2214 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2215 free(call); 2216 free(rname); 2217 break; 2218 default: 2219 break; 2220 } 2221 } 2222 2223 if (*cp == '\0') 2224 return ROFF_IGN; 2225 2226 /* Get the custom end marker. */ 2227 2228 iname = cp; 2229 namesz = roff_getname(r, &cp, ln, ppos); 2230 2231 /* Resolve the end marker if it is indirect. */ 2232 2233 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2234 deftype = ROFFDEF_USER; 2235 name = roff_getstrn(r, iname, namesz, &deftype); 2236 if (name == NULL) { 2237 mandoc_msg(MANDOCERR_STR_UNDEF, 2238 ln, (int)(iname - buf->buf), 2239 "%.*s", (int)namesz, iname); 2240 namesz = 0; 2241 } else 2242 namesz = strlen(name); 2243 } else 2244 name = iname; 2245 2246 if (namesz) 2247 r->last->end = mandoc_strndup(name, namesz); 2248 2249 if (*cp != '\0') 2250 mandoc_msg(MANDOCERR_ARG_EXCESS, 2251 ln, pos, ".%s ... %s", roff_name[tok], cp); 2252 2253 return ROFF_IGN; 2254 } 2255 2256 static int 2257 roff_block_sub(ROFF_ARGS) 2258 { 2259 enum roff_tok t; 2260 int i, j; 2261 2262 /* 2263 * If a custom end marker is a user-defined or predefined macro 2264 * or a request, interpret it. 2265 */ 2266 2267 if (r->last->end) { 2268 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2269 if (buf->buf[i] != r->last->end[j]) 2270 break; 2271 2272 if (r->last->end[j] == '\0' && 2273 (buf->buf[i] == '\0' || 2274 buf->buf[i] == ' ' || 2275 buf->buf[i] == '\t')) { 2276 roffnode_pop(r); 2277 roffnode_cleanscope(r); 2278 2279 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2280 i++; 2281 2282 pos = i; 2283 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2284 TOKEN_NONE) 2285 return ROFF_RERUN; 2286 return ROFF_IGN; 2287 } 2288 } 2289 2290 /* Handle the standard end marker. */ 2291 2292 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2293 if (t == ROFF_cblock) 2294 return roff_cblock(r, t, buf, ln, ppos, pos, offs); 2295 2296 /* Not an end marker, so append the line to the block. */ 2297 2298 if (tok != ROFF_ig) 2299 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2300 return ROFF_IGN; 2301 } 2302 2303 static int 2304 roff_block_text(ROFF_ARGS) 2305 { 2306 2307 if (tok != ROFF_ig) 2308 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2309 2310 return ROFF_IGN; 2311 } 2312 2313 /* 2314 * Check for a closing "\}" and handle it. 2315 * In this function, the final "int *offs" argument is used for 2316 * different purposes than elsewhere: 2317 * Input: *offs == 0: caller wants to discard arguments following \} 2318 * *offs == 1: caller wants to preserve text following \} 2319 * Output: *offs = 0: tell caller to discard input line 2320 * *offs = 1: tell caller to use input line 2321 */ 2322 static int 2323 roff_cond_checkend(ROFF_ARGS) 2324 { 2325 char *ep; 2326 int endloop, irc, rr; 2327 2328 irc = ROFF_IGN; 2329 rr = r->last->rule; 2330 endloop = tok != ROFF_while ? ROFF_IGN : 2331 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2332 if (roffnode_cleanscope(r)) 2333 irc |= endloop; 2334 2335 /* 2336 * If "\}" occurs on a macro line without a preceding macro or 2337 * a text line contains nothing else, drop the line completely. 2338 */ 2339 2340 ep = buf->buf + pos; 2341 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0)) 2342 rr = 0; 2343 2344 /* 2345 * The closing delimiter "\}" rewinds the conditional scope 2346 * but is otherwise ignored when interpreting the line. 2347 */ 2348 2349 while ((ep = strchr(ep, '\\')) != NULL) { 2350 switch (ep[1]) { 2351 case '}': 2352 if (ep[2] == '\0') 2353 ep[0] = '\0'; 2354 else if (rr) 2355 ep[1] = '&'; 2356 else 2357 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2358 if (roff_ccond(r, ln, ep - buf->buf)) 2359 irc |= endloop; 2360 break; 2361 case '\0': 2362 ++ep; 2363 break; 2364 default: 2365 ep += 2; 2366 break; 2367 } 2368 } 2369 *offs = rr; 2370 return irc; 2371 } 2372 2373 /* 2374 * Parse and process a request or macro line in conditional scope. 2375 */ 2376 static int 2377 roff_cond_sub(ROFF_ARGS) 2378 { 2379 struct roffnode *bl; 2380 int irc, rr, spos; 2381 enum roff_tok t; 2382 2383 rr = 0; /* If arguments follow "\}", skip them. */ 2384 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2385 spos = pos; 2386 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2387 2388 /* 2389 * Handle requests and macros if the conditional evaluated 2390 * to true or if they are structurally required. 2391 * The .break request is always handled specially. 2392 */ 2393 2394 if (t == ROFF_break) { 2395 if (irc & ROFF_LOOPMASK) 2396 irc = ROFF_IGN | ROFF_LOOPEXIT; 2397 else if (rr) { 2398 for (bl = r->last; bl != NULL; bl = bl->parent) { 2399 bl->rule = 0; 2400 if (bl->tok == ROFF_while) 2401 break; 2402 } 2403 } 2404 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) { 2405 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs); 2406 if (irc & ROFF_WHILE) 2407 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT); 2408 } 2409 return irc; 2410 } 2411 2412 /* 2413 * Parse and process a text line in conditional scope. 2414 */ 2415 static int 2416 roff_cond_text(ROFF_ARGS) 2417 { 2418 int irc, rr; 2419 2420 rr = 1; /* If arguments follow "\}", preserve them. */ 2421 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr); 2422 if (rr) 2423 irc |= ROFF_CONT; 2424 return irc; 2425 } 2426 2427 /* --- handling of numeric and conditional expressions -------------------- */ 2428 2429 /* 2430 * Parse a single signed integer number. Stop at the first non-digit. 2431 * If there is at least one digit, return success and advance the 2432 * parse point, else return failure and let the parse point unchanged. 2433 * Ignore overflows, treat them just like the C language. 2434 */ 2435 static int 2436 roff_getnum(const char *v, int *pos, int *res, int flags) 2437 { 2438 int myres, scaled, n, p; 2439 2440 if (NULL == res) 2441 res = &myres; 2442 2443 p = *pos; 2444 n = v[p] == '-'; 2445 if (n || v[p] == '+') 2446 p++; 2447 2448 if (flags & ROFFNUM_WHITE) 2449 while (isspace((unsigned char)v[p])) 2450 p++; 2451 2452 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2453 *res = 10 * *res + v[p] - '0'; 2454 if (p == *pos + n) 2455 return 0; 2456 2457 if (n) 2458 *res = -*res; 2459 2460 /* Each number may be followed by one optional scaling unit. */ 2461 2462 switch (v[p]) { 2463 case 'f': 2464 scaled = *res * 65536; 2465 break; 2466 case 'i': 2467 scaled = *res * 240; 2468 break; 2469 case 'c': 2470 scaled = *res * 240 / 2.54; 2471 break; 2472 case 'v': 2473 case 'P': 2474 scaled = *res * 40; 2475 break; 2476 case 'm': 2477 case 'n': 2478 scaled = *res * 24; 2479 break; 2480 case 'p': 2481 scaled = *res * 10 / 3; 2482 break; 2483 case 'u': 2484 scaled = *res; 2485 break; 2486 case 'M': 2487 scaled = *res * 6 / 25; 2488 break; 2489 default: 2490 scaled = *res; 2491 p--; 2492 break; 2493 } 2494 if (flags & ROFFNUM_SCALE) 2495 *res = scaled; 2496 2497 *pos = p + 1; 2498 return 1; 2499 } 2500 2501 /* 2502 * Evaluate a string comparison condition. 2503 * The first character is the delimiter. 2504 * Succeed if the string up to its second occurrence 2505 * matches the string up to its third occurrence. 2506 * Advance the cursor after the third occurrence 2507 * or lacking that, to the end of the line. 2508 */ 2509 static int 2510 roff_evalstrcond(const char *v, int *pos) 2511 { 2512 const char *s1, *s2, *s3; 2513 int match; 2514 2515 match = 0; 2516 s1 = v + *pos; /* initial delimiter */ 2517 s2 = s1 + 1; /* for scanning the first string */ 2518 s3 = strchr(s2, *s1); /* for scanning the second string */ 2519 2520 if (NULL == s3) /* found no middle delimiter */ 2521 goto out; 2522 2523 while ('\0' != *++s3) { 2524 if (*s2 != *s3) { /* mismatch */ 2525 s3 = strchr(s3, *s1); 2526 break; 2527 } 2528 if (*s3 == *s1) { /* found the final delimiter */ 2529 match = 1; 2530 break; 2531 } 2532 s2++; 2533 } 2534 2535 out: 2536 if (NULL == s3) 2537 s3 = strchr(s2, '\0'); 2538 else if (*s3 != '\0') 2539 s3++; 2540 *pos = s3 - v; 2541 return match; 2542 } 2543 2544 /* 2545 * Evaluate an optionally negated single character, numerical, 2546 * or string condition. 2547 */ 2548 static int 2549 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2550 { 2551 const char *start, *end; 2552 char *cp, *name; 2553 size_t sz; 2554 int deftype, len, number, savepos, istrue, wanttrue; 2555 2556 if ('!' == v[*pos]) { 2557 wanttrue = 0; 2558 (*pos)++; 2559 } else 2560 wanttrue = 1; 2561 2562 switch (v[*pos]) { 2563 case '\0': 2564 return 0; 2565 case 'n': 2566 case 'o': 2567 (*pos)++; 2568 return wanttrue; 2569 case 'e': 2570 case 't': 2571 case 'v': 2572 (*pos)++; 2573 return !wanttrue; 2574 case 'c': 2575 do { 2576 (*pos)++; 2577 } while (v[*pos] == ' '); 2578 2579 /* 2580 * Quirk for groff compatibility: 2581 * The horizontal tab is neither available nor unavailable. 2582 */ 2583 2584 if (v[*pos] == '\t') { 2585 (*pos)++; 2586 return 0; 2587 } 2588 2589 /* Printable ASCII characters are available. */ 2590 2591 if (v[*pos] != '\\') { 2592 (*pos)++; 2593 return wanttrue; 2594 } 2595 2596 end = v + ++*pos; 2597 switch (mandoc_escape(&end, &start, &len)) { 2598 case ESCAPE_SPECIAL: 2599 istrue = mchars_spec2cp(start, len) != -1; 2600 break; 2601 case ESCAPE_UNICODE: 2602 istrue = 1; 2603 break; 2604 case ESCAPE_NUMBERED: 2605 istrue = mchars_num2char(start, len) != -1; 2606 break; 2607 default: 2608 istrue = !wanttrue; 2609 break; 2610 } 2611 *pos = end - v; 2612 return istrue == wanttrue; 2613 case 'd': 2614 case 'r': 2615 cp = v + *pos + 1; 2616 while (*cp == ' ') 2617 cp++; 2618 name = cp; 2619 sz = roff_getname(r, &cp, ln, cp - v); 2620 if (sz == 0) 2621 istrue = 0; 2622 else if (v[*pos] == 'r') 2623 istrue = roff_hasregn(r, name, sz); 2624 else { 2625 deftype = ROFFDEF_ANY; 2626 roff_getstrn(r, name, sz, &deftype); 2627 istrue = !!deftype; 2628 } 2629 *pos = (name + sz) - v; 2630 return istrue == wanttrue; 2631 default: 2632 break; 2633 } 2634 2635 savepos = *pos; 2636 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2637 return (number > 0) == wanttrue; 2638 else if (*pos == savepos) 2639 return roff_evalstrcond(v, pos) == wanttrue; 2640 else 2641 return 0; 2642 } 2643 2644 static int 2645 roff_line_ignore(ROFF_ARGS) 2646 { 2647 2648 return ROFF_IGN; 2649 } 2650 2651 static int 2652 roff_insec(ROFF_ARGS) 2653 { 2654 2655 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2656 return ROFF_IGN; 2657 } 2658 2659 static int 2660 roff_unsupp(ROFF_ARGS) 2661 { 2662 2663 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2664 return ROFF_IGN; 2665 } 2666 2667 static int 2668 roff_cond(ROFF_ARGS) 2669 { 2670 int irc; 2671 2672 roffnode_push(r, tok, NULL, ln, ppos); 2673 2674 /* 2675 * An `.el' has no conditional body: it will consume the value 2676 * of the current rstack entry set in prior `ie' calls or 2677 * defaults to DENY. 2678 * 2679 * If we're not an `el', however, then evaluate the conditional. 2680 */ 2681 2682 r->last->rule = tok == ROFF_el ? 2683 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2684 roff_evalcond(r, ln, buf->buf, &pos); 2685 2686 /* 2687 * An if-else will put the NEGATION of the current evaluated 2688 * conditional into the stack of rules. 2689 */ 2690 2691 if (tok == ROFF_ie) { 2692 if (r->rstackpos + 1 == r->rstacksz) { 2693 r->rstacksz += 16; 2694 r->rstack = mandoc_reallocarray(r->rstack, 2695 r->rstacksz, sizeof(int)); 2696 } 2697 r->rstack[++r->rstackpos] = !r->last->rule; 2698 } 2699 2700 /* If the parent has false as its rule, then so do we. */ 2701 2702 if (r->last->parent && !r->last->parent->rule) 2703 r->last->rule = 0; 2704 2705 /* 2706 * Determine scope. 2707 * If there is nothing on the line after the conditional, 2708 * not even whitespace, use next-line scope. 2709 * Except that .while does not support next-line scope. 2710 */ 2711 2712 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2713 r->last->endspan = 2; 2714 goto out; 2715 } 2716 2717 while (buf->buf[pos] == ' ') 2718 pos++; 2719 2720 /* An opening brace requests multiline scope. */ 2721 2722 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2723 r->last->endspan = -1; 2724 pos += 2; 2725 while (buf->buf[pos] == ' ') 2726 pos++; 2727 goto out; 2728 } 2729 2730 /* 2731 * Anything else following the conditional causes 2732 * single-line scope. Warn if the scope contains 2733 * nothing but trailing whitespace. 2734 */ 2735 2736 if (buf->buf[pos] == '\0') 2737 mandoc_msg(MANDOCERR_COND_EMPTY, 2738 ln, ppos, "%s", roff_name[tok]); 2739 2740 r->last->endspan = 1; 2741 2742 out: 2743 *offs = pos; 2744 irc = ROFF_RERUN; 2745 if (tok == ROFF_while) 2746 irc |= ROFF_WHILE; 2747 return irc; 2748 } 2749 2750 static int 2751 roff_ds(ROFF_ARGS) 2752 { 2753 char *string; 2754 const char *name; 2755 size_t namesz; 2756 2757 /* Ignore groff compatibility mode for now. */ 2758 2759 if (tok == ROFF_ds1) 2760 tok = ROFF_ds; 2761 else if (tok == ROFF_as1) 2762 tok = ROFF_as; 2763 2764 /* 2765 * The first word is the name of the string. 2766 * If it is empty or terminated by an escape sequence, 2767 * abort the `ds' request without defining anything. 2768 */ 2769 2770 name = string = buf->buf + pos; 2771 if (*name == '\0') 2772 return ROFF_IGN; 2773 2774 namesz = roff_getname(r, &string, ln, pos); 2775 switch (name[namesz]) { 2776 case '\\': 2777 return ROFF_IGN; 2778 case '\t': 2779 string = buf->buf + pos + namesz; 2780 break; 2781 default: 2782 break; 2783 } 2784 2785 /* Read past the initial double-quote, if any. */ 2786 if (*string == '"') 2787 string++; 2788 2789 /* The rest is the value. */ 2790 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2791 ROFF_as == tok); 2792 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2793 return ROFF_IGN; 2794 } 2795 2796 /* 2797 * Parse a single operator, one or two characters long. 2798 * If the operator is recognized, return success and advance the 2799 * parse point, else return failure and let the parse point unchanged. 2800 */ 2801 static int 2802 roff_getop(const char *v, int *pos, char *res) 2803 { 2804 2805 *res = v[*pos]; 2806 2807 switch (*res) { 2808 case '+': 2809 case '-': 2810 case '*': 2811 case '/': 2812 case '%': 2813 case '&': 2814 case ':': 2815 break; 2816 case '<': 2817 switch (v[*pos + 1]) { 2818 case '=': 2819 *res = 'l'; 2820 (*pos)++; 2821 break; 2822 case '>': 2823 *res = '!'; 2824 (*pos)++; 2825 break; 2826 case '?': 2827 *res = 'i'; 2828 (*pos)++; 2829 break; 2830 default: 2831 break; 2832 } 2833 break; 2834 case '>': 2835 switch (v[*pos + 1]) { 2836 case '=': 2837 *res = 'g'; 2838 (*pos)++; 2839 break; 2840 case '?': 2841 *res = 'a'; 2842 (*pos)++; 2843 break; 2844 default: 2845 break; 2846 } 2847 break; 2848 case '=': 2849 if ('=' == v[*pos + 1]) 2850 (*pos)++; 2851 break; 2852 default: 2853 return 0; 2854 } 2855 (*pos)++; 2856 2857 return *res; 2858 } 2859 2860 /* 2861 * Evaluate either a parenthesized numeric expression 2862 * or a single signed integer number. 2863 */ 2864 static int 2865 roff_evalpar(struct roff *r, int ln, 2866 const char *v, int *pos, int *res, int flags) 2867 { 2868 2869 if ('(' != v[*pos]) 2870 return roff_getnum(v, pos, res, flags); 2871 2872 (*pos)++; 2873 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2874 return 0; 2875 2876 /* 2877 * Omission of the closing parenthesis 2878 * is an error in validation mode, 2879 * but ignored in evaluation mode. 2880 */ 2881 2882 if (')' == v[*pos]) 2883 (*pos)++; 2884 else if (NULL == res) 2885 return 0; 2886 2887 return 1; 2888 } 2889 2890 /* 2891 * Evaluate a complete numeric expression. 2892 * Proceed left to right, there is no concept of precedence. 2893 */ 2894 static int 2895 roff_evalnum(struct roff *r, int ln, const char *v, 2896 int *pos, int *res, int flags) 2897 { 2898 int mypos, operand2; 2899 char operator; 2900 2901 if (NULL == pos) { 2902 mypos = 0; 2903 pos = &mypos; 2904 } 2905 2906 if (flags & ROFFNUM_WHITE) 2907 while (isspace((unsigned char)v[*pos])) 2908 (*pos)++; 2909 2910 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2911 return 0; 2912 2913 while (1) { 2914 if (flags & ROFFNUM_WHITE) 2915 while (isspace((unsigned char)v[*pos])) 2916 (*pos)++; 2917 2918 if ( ! roff_getop(v, pos, &operator)) 2919 break; 2920 2921 if (flags & ROFFNUM_WHITE) 2922 while (isspace((unsigned char)v[*pos])) 2923 (*pos)++; 2924 2925 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2926 return 0; 2927 2928 if (flags & ROFFNUM_WHITE) 2929 while (isspace((unsigned char)v[*pos])) 2930 (*pos)++; 2931 2932 if (NULL == res) 2933 continue; 2934 2935 switch (operator) { 2936 case '+': 2937 *res += operand2; 2938 break; 2939 case '-': 2940 *res -= operand2; 2941 break; 2942 case '*': 2943 *res *= operand2; 2944 break; 2945 case '/': 2946 if (operand2 == 0) { 2947 mandoc_msg(MANDOCERR_DIVZERO, 2948 ln, *pos, "%s", v); 2949 *res = 0; 2950 break; 2951 } 2952 *res /= operand2; 2953 break; 2954 case '%': 2955 if (operand2 == 0) { 2956 mandoc_msg(MANDOCERR_DIVZERO, 2957 ln, *pos, "%s", v); 2958 *res = 0; 2959 break; 2960 } 2961 *res %= operand2; 2962 break; 2963 case '<': 2964 *res = *res < operand2; 2965 break; 2966 case '>': 2967 *res = *res > operand2; 2968 break; 2969 case 'l': 2970 *res = *res <= operand2; 2971 break; 2972 case 'g': 2973 *res = *res >= operand2; 2974 break; 2975 case '=': 2976 *res = *res == operand2; 2977 break; 2978 case '!': 2979 *res = *res != operand2; 2980 break; 2981 case '&': 2982 *res = *res && operand2; 2983 break; 2984 case ':': 2985 *res = *res || operand2; 2986 break; 2987 case 'i': 2988 if (operand2 < *res) 2989 *res = operand2; 2990 break; 2991 case 'a': 2992 if (operand2 > *res) 2993 *res = operand2; 2994 break; 2995 default: 2996 abort(); 2997 } 2998 } 2999 return 1; 3000 } 3001 3002 /* --- register management ------------------------------------------------ */ 3003 3004 void 3005 roff_setreg(struct roff *r, const char *name, int val, char sign) 3006 { 3007 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 3008 } 3009 3010 static void 3011 roff_setregn(struct roff *r, const char *name, size_t len, 3012 int val, char sign, int step) 3013 { 3014 struct roffreg *reg; 3015 3016 /* Search for an existing register with the same name. */ 3017 reg = r->regtab; 3018 3019 while (reg != NULL && (reg->key.sz != len || 3020 strncmp(reg->key.p, name, len) != 0)) 3021 reg = reg->next; 3022 3023 if (NULL == reg) { 3024 /* Create a new register. */ 3025 reg = mandoc_malloc(sizeof(struct roffreg)); 3026 reg->key.p = mandoc_strndup(name, len); 3027 reg->key.sz = len; 3028 reg->val = 0; 3029 reg->step = 0; 3030 reg->next = r->regtab; 3031 r->regtab = reg; 3032 } 3033 3034 if ('+' == sign) 3035 reg->val += val; 3036 else if ('-' == sign) 3037 reg->val -= val; 3038 else 3039 reg->val = val; 3040 if (step != INT_MIN) 3041 reg->step = step; 3042 } 3043 3044 /* 3045 * Handle some predefined read-only number registers. 3046 * For now, return -1 if the requested register is not predefined; 3047 * in case a predefined read-only register having the value -1 3048 * were to turn up, another special value would have to be chosen. 3049 */ 3050 static int 3051 roff_getregro(const struct roff *r, const char *name) 3052 { 3053 3054 switch (*name) { 3055 case '$': /* Number of arguments of the last macro evaluated. */ 3056 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 3057 case 'A': /* ASCII approximation mode is always off. */ 3058 return 0; 3059 case 'g': /* Groff compatibility mode is always on. */ 3060 return 1; 3061 case 'H': /* Fixed horizontal resolution. */ 3062 return 24; 3063 case 'j': /* Always adjust left margin only. */ 3064 return 0; 3065 case 'T': /* Some output device is always defined. */ 3066 return 1; 3067 case 'V': /* Fixed vertical resolution. */ 3068 return 40; 3069 default: 3070 return -1; 3071 } 3072 } 3073 3074 int 3075 roff_getreg(struct roff *r, const char *name) 3076 { 3077 return roff_getregn(r, name, strlen(name), '\0'); 3078 } 3079 3080 static int 3081 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3082 { 3083 struct roffreg *reg; 3084 int val; 3085 3086 if ('.' == name[0] && 2 == len) { 3087 val = roff_getregro(r, name + 1); 3088 if (-1 != val) 3089 return val; 3090 } 3091 3092 for (reg = r->regtab; reg; reg = reg->next) { 3093 if (len == reg->key.sz && 3094 0 == strncmp(name, reg->key.p, len)) { 3095 switch (sign) { 3096 case '+': 3097 reg->val += reg->step; 3098 break; 3099 case '-': 3100 reg->val -= reg->step; 3101 break; 3102 default: 3103 break; 3104 } 3105 return reg->val; 3106 } 3107 } 3108 3109 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3110 return 0; 3111 } 3112 3113 static int 3114 roff_hasregn(const struct roff *r, const char *name, size_t len) 3115 { 3116 struct roffreg *reg; 3117 int val; 3118 3119 if ('.' == name[0] && 2 == len) { 3120 val = roff_getregro(r, name + 1); 3121 if (-1 != val) 3122 return 1; 3123 } 3124 3125 for (reg = r->regtab; reg; reg = reg->next) 3126 if (len == reg->key.sz && 3127 0 == strncmp(name, reg->key.p, len)) 3128 return 1; 3129 3130 return 0; 3131 } 3132 3133 static void 3134 roff_freereg(struct roffreg *reg) 3135 { 3136 struct roffreg *old_reg; 3137 3138 while (NULL != reg) { 3139 free(reg->key.p); 3140 old_reg = reg; 3141 reg = reg->next; 3142 free(old_reg); 3143 } 3144 } 3145 3146 static int 3147 roff_nr(ROFF_ARGS) 3148 { 3149 char *key, *val, *step; 3150 size_t keysz; 3151 int iv, is, len; 3152 char sign; 3153 3154 key = val = buf->buf + pos; 3155 if (*key == '\0') 3156 return ROFF_IGN; 3157 3158 keysz = roff_getname(r, &val, ln, pos); 3159 if (key[keysz] == '\\' || key[keysz] == '\t') 3160 return ROFF_IGN; 3161 3162 sign = *val; 3163 if (sign == '+' || sign == '-') 3164 val++; 3165 3166 len = 0; 3167 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3168 return ROFF_IGN; 3169 3170 step = val + len; 3171 while (isspace((unsigned char)*step)) 3172 step++; 3173 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3174 is = INT_MIN; 3175 3176 roff_setregn(r, key, keysz, iv, sign, is); 3177 return ROFF_IGN; 3178 } 3179 3180 static int 3181 roff_rr(ROFF_ARGS) 3182 { 3183 struct roffreg *reg, **prev; 3184 char *name, *cp; 3185 size_t namesz; 3186 3187 name = cp = buf->buf + pos; 3188 if (*name == '\0') 3189 return ROFF_IGN; 3190 namesz = roff_getname(r, &cp, ln, pos); 3191 name[namesz] = '\0'; 3192 3193 prev = &r->regtab; 3194 while (1) { 3195 reg = *prev; 3196 if (reg == NULL || !strcmp(name, reg->key.p)) 3197 break; 3198 prev = ®->next; 3199 } 3200 if (reg != NULL) { 3201 *prev = reg->next; 3202 free(reg->key.p); 3203 free(reg); 3204 } 3205 return ROFF_IGN; 3206 } 3207 3208 /* --- handler functions for roff requests -------------------------------- */ 3209 3210 static int 3211 roff_rm(ROFF_ARGS) 3212 { 3213 const char *name; 3214 char *cp; 3215 size_t namesz; 3216 3217 cp = buf->buf + pos; 3218 while (*cp != '\0') { 3219 name = cp; 3220 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3221 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3222 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3223 if (name[namesz] == '\\' || name[namesz] == '\t') 3224 break; 3225 } 3226 return ROFF_IGN; 3227 } 3228 3229 static int 3230 roff_it(ROFF_ARGS) 3231 { 3232 int iv; 3233 3234 /* Parse the number of lines. */ 3235 3236 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3237 mandoc_msg(MANDOCERR_IT_NONUM, 3238 ln, ppos, "%s", buf->buf + 1); 3239 return ROFF_IGN; 3240 } 3241 3242 while (isspace((unsigned char)buf->buf[pos])) 3243 pos++; 3244 3245 /* 3246 * Arm the input line trap. 3247 * Special-casing "an-trap" is an ugly workaround to cope 3248 * with DocBook stupidly fiddling with man(7) internals. 3249 */ 3250 3251 roffit_lines = iv; 3252 roffit_macro = mandoc_strdup(iv != 1 || 3253 strcmp(buf->buf + pos, "an-trap") ? 3254 buf->buf + pos : "br"); 3255 return ROFF_IGN; 3256 } 3257 3258 static int 3259 roff_Dd(ROFF_ARGS) 3260 { 3261 int mask; 3262 enum roff_tok t, te; 3263 3264 switch (tok) { 3265 case ROFF_Dd: 3266 tok = MDOC_Dd; 3267 te = MDOC_MAX; 3268 if (r->format == 0) 3269 r->format = MPARSE_MDOC; 3270 mask = MPARSE_MDOC | MPARSE_QUICK; 3271 break; 3272 case ROFF_TH: 3273 tok = MAN_TH; 3274 te = MAN_MAX; 3275 if (r->format == 0) 3276 r->format = MPARSE_MAN; 3277 mask = MPARSE_QUICK; 3278 break; 3279 default: 3280 abort(); 3281 } 3282 if ((r->options & mask) == 0) 3283 for (t = tok; t < te; t++) 3284 roff_setstr(r, roff_name[t], NULL, 0); 3285 return ROFF_CONT; 3286 } 3287 3288 static int 3289 roff_TE(ROFF_ARGS) 3290 { 3291 r->man->flags &= ~ROFF_NONOFILL; 3292 if (r->tbl == NULL) { 3293 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3294 return ROFF_IGN; 3295 } 3296 if (tbl_end(r->tbl, 0) == 0) { 3297 r->tbl = NULL; 3298 free(buf->buf); 3299 buf->buf = mandoc_strdup(".sp"); 3300 buf->sz = 4; 3301 *offs = 0; 3302 return ROFF_REPARSE; 3303 } 3304 r->tbl = NULL; 3305 return ROFF_IGN; 3306 } 3307 3308 static int 3309 roff_T_(ROFF_ARGS) 3310 { 3311 3312 if (NULL == r->tbl) 3313 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3314 else 3315 tbl_restart(ln, ppos, r->tbl); 3316 3317 return ROFF_IGN; 3318 } 3319 3320 /* 3321 * Handle in-line equation delimiters. 3322 */ 3323 static int 3324 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3325 { 3326 char *cp1, *cp2; 3327 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3328 3329 /* 3330 * Outside equations, look for an opening delimiter. 3331 * If we are inside an equation, we already know it is 3332 * in-line, or this function wouldn't have been called; 3333 * so look for a closing delimiter. 3334 */ 3335 3336 cp1 = buf->buf + pos; 3337 cp2 = strchr(cp1, r->eqn == NULL ? 3338 r->last_eqn->odelim : r->last_eqn->cdelim); 3339 if (cp2 == NULL) 3340 return ROFF_CONT; 3341 3342 *cp2++ = '\0'; 3343 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3344 3345 /* Handle preceding text, protecting whitespace. */ 3346 3347 if (*buf->buf != '\0') { 3348 if (r->eqn == NULL) 3349 bef_pr = "\\&"; 3350 bef_nl = "\n"; 3351 } 3352 3353 /* 3354 * Prepare replacing the delimiter with an equation macro 3355 * and drop leading white space from the equation. 3356 */ 3357 3358 if (r->eqn == NULL) { 3359 while (*cp2 == ' ') 3360 cp2++; 3361 mac = ".EQ"; 3362 } else 3363 mac = ".EN"; 3364 3365 /* Handle following text, protecting whitespace. */ 3366 3367 if (*cp2 != '\0') { 3368 aft_nl = "\n"; 3369 if (r->eqn != NULL) 3370 aft_pr = "\\&"; 3371 } 3372 3373 /* Do the actual replacement. */ 3374 3375 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3376 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3377 free(buf->buf); 3378 buf->buf = cp1; 3379 3380 /* Toggle the in-line state of the eqn subsystem. */ 3381 3382 r->eqn_inline = r->eqn == NULL; 3383 return ROFF_REPARSE; 3384 } 3385 3386 static int 3387 roff_EQ(ROFF_ARGS) 3388 { 3389 struct roff_node *n; 3390 3391 if (r->man->meta.macroset == MACROSET_MAN) 3392 man_breakscope(r->man, ROFF_EQ); 3393 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3394 if (ln > r->man->last->line) 3395 n->flags |= NODE_LINE; 3396 n->eqn = eqn_box_new(); 3397 roff_node_append(r->man, n); 3398 r->man->next = ROFF_NEXT_SIBLING; 3399 3400 assert(r->eqn == NULL); 3401 if (r->last_eqn == NULL) 3402 r->last_eqn = eqn_alloc(); 3403 else 3404 eqn_reset(r->last_eqn); 3405 r->eqn = r->last_eqn; 3406 r->eqn->node = n; 3407 3408 if (buf->buf[pos] != '\0') 3409 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3410 ".EQ %s", buf->buf + pos); 3411 3412 return ROFF_IGN; 3413 } 3414 3415 static int 3416 roff_EN(ROFF_ARGS) 3417 { 3418 if (r->eqn != NULL) { 3419 eqn_parse(r->eqn); 3420 r->eqn = NULL; 3421 } else 3422 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3423 if (buf->buf[pos] != '\0') 3424 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3425 "EN %s", buf->buf + pos); 3426 return ROFF_IGN; 3427 } 3428 3429 static int 3430 roff_TS(ROFF_ARGS) 3431 { 3432 if (r->tbl != NULL) { 3433 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3434 tbl_end(r->tbl, 0); 3435 } 3436 r->man->flags |= ROFF_NONOFILL; 3437 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3438 if (r->last_tbl == NULL) 3439 r->first_tbl = r->tbl; 3440 r->last_tbl = r->tbl; 3441 return ROFF_IGN; 3442 } 3443 3444 static int 3445 roff_noarg(ROFF_ARGS) 3446 { 3447 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3448 man_breakscope(r->man, tok); 3449 if (tok == ROFF_brp) 3450 tok = ROFF_br; 3451 roff_elem_alloc(r->man, ln, ppos, tok); 3452 if (buf->buf[pos] != '\0') 3453 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3454 "%s %s", roff_name[tok], buf->buf + pos); 3455 if (tok == ROFF_nf) 3456 r->man->flags |= ROFF_NOFILL; 3457 else if (tok == ROFF_fi) 3458 r->man->flags &= ~ROFF_NOFILL; 3459 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3460 r->man->next = ROFF_NEXT_SIBLING; 3461 return ROFF_IGN; 3462 } 3463 3464 static int 3465 roff_onearg(ROFF_ARGS) 3466 { 3467 struct roff_node *n; 3468 char *cp; 3469 int npos; 3470 3471 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3472 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3473 tok == ROFF_ti)) 3474 man_breakscope(r->man, tok); 3475 3476 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3477 r->man->last = roffce_node; 3478 r->man->next = ROFF_NEXT_SIBLING; 3479 } 3480 3481 roff_elem_alloc(r->man, ln, ppos, tok); 3482 n = r->man->last; 3483 3484 cp = buf->buf + pos; 3485 if (*cp != '\0') { 3486 while (*cp != '\0' && *cp != ' ') 3487 cp++; 3488 while (*cp == ' ') 3489 *cp++ = '\0'; 3490 if (*cp != '\0') 3491 mandoc_msg(MANDOCERR_ARG_EXCESS, 3492 ln, (int)(cp - buf->buf), 3493 "%s ... %s", roff_name[tok], cp); 3494 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3495 } 3496 3497 if (tok == ROFF_ce || tok == ROFF_rj) { 3498 if (r->man->last->type == ROFFT_ELEM) { 3499 roff_word_alloc(r->man, ln, pos, "1"); 3500 r->man->last->flags |= NODE_NOSRC; 3501 } 3502 npos = 0; 3503 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3504 &roffce_lines, 0) == 0) { 3505 mandoc_msg(MANDOCERR_CE_NONUM, 3506 ln, pos, "ce %s", buf->buf + pos); 3507 roffce_lines = 1; 3508 } 3509 if (roffce_lines < 1) { 3510 r->man->last = r->man->last->parent; 3511 roffce_node = NULL; 3512 roffce_lines = 0; 3513 } else 3514 roffce_node = r->man->last->parent; 3515 } else { 3516 n->flags |= NODE_VALID | NODE_ENDED; 3517 r->man->last = n; 3518 } 3519 n->flags |= NODE_LINE; 3520 r->man->next = ROFF_NEXT_SIBLING; 3521 return ROFF_IGN; 3522 } 3523 3524 static int 3525 roff_manyarg(ROFF_ARGS) 3526 { 3527 struct roff_node *n; 3528 char *sp, *ep; 3529 3530 roff_elem_alloc(r->man, ln, ppos, tok); 3531 n = r->man->last; 3532 3533 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3534 while (*ep != '\0' && *ep != ' ') 3535 ep++; 3536 while (*ep == ' ') 3537 *ep++ = '\0'; 3538 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3539 } 3540 3541 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3542 r->man->last = n; 3543 r->man->next = ROFF_NEXT_SIBLING; 3544 return ROFF_IGN; 3545 } 3546 3547 static int 3548 roff_als(ROFF_ARGS) 3549 { 3550 char *oldn, *newn, *end, *value; 3551 size_t oldsz, newsz, valsz; 3552 3553 newn = oldn = buf->buf + pos; 3554 if (*newn == '\0') 3555 return ROFF_IGN; 3556 3557 newsz = roff_getname(r, &oldn, ln, pos); 3558 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3559 return ROFF_IGN; 3560 3561 end = oldn; 3562 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3563 if (oldsz == 0) 3564 return ROFF_IGN; 3565 3566 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3567 (int)oldsz, oldn); 3568 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3569 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3570 free(value); 3571 return ROFF_IGN; 3572 } 3573 3574 /* 3575 * The .break request only makes sense inside conditionals, 3576 * and that case is already handled in roff_cond_sub(). 3577 */ 3578 static int 3579 roff_break(ROFF_ARGS) 3580 { 3581 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3582 return ROFF_IGN; 3583 } 3584 3585 static int 3586 roff_cc(ROFF_ARGS) 3587 { 3588 const char *p; 3589 3590 p = buf->buf + pos; 3591 3592 if (*p == '\0' || (r->control = *p++) == '.') 3593 r->control = '\0'; 3594 3595 if (*p != '\0') 3596 mandoc_msg(MANDOCERR_ARG_EXCESS, 3597 ln, p - buf->buf, "cc ... %s", p); 3598 3599 return ROFF_IGN; 3600 } 3601 3602 static int 3603 roff_char(ROFF_ARGS) 3604 { 3605 const char *p, *kp, *vp; 3606 size_t ksz, vsz; 3607 int font; 3608 3609 /* Parse the character to be replaced. */ 3610 3611 kp = buf->buf + pos; 3612 p = kp + 1; 3613 if (*kp == '\0' || (*kp == '\\' && 3614 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3615 (*p != ' ' && *p != '\0')) { 3616 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3617 return ROFF_IGN; 3618 } 3619 ksz = p - kp; 3620 while (*p == ' ') 3621 p++; 3622 3623 /* 3624 * If the replacement string contains a font escape sequence, 3625 * we have to restore the font at the end. 3626 */ 3627 3628 vp = p; 3629 vsz = strlen(p); 3630 font = 0; 3631 while (*p != '\0') { 3632 if (*p++ != '\\') 3633 continue; 3634 switch (mandoc_escape(&p, NULL, NULL)) { 3635 case ESCAPE_FONT: 3636 case ESCAPE_FONTROMAN: 3637 case ESCAPE_FONTITALIC: 3638 case ESCAPE_FONTBOLD: 3639 case ESCAPE_FONTBI: 3640 case ESCAPE_FONTCR: 3641 case ESCAPE_FONTCB: 3642 case ESCAPE_FONTCI: 3643 case ESCAPE_FONTPREV: 3644 font++; 3645 break; 3646 default: 3647 break; 3648 } 3649 } 3650 if (font > 1) 3651 mandoc_msg(MANDOCERR_CHAR_FONT, 3652 ln, (int)(vp - buf->buf), "%s", vp); 3653 3654 /* 3655 * Approximate the effect of .char using the .tr tables. 3656 * XXX In groff, .char and .tr interact differently. 3657 */ 3658 3659 if (ksz == 1) { 3660 if (r->xtab == NULL) 3661 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3662 assert((unsigned int)*kp < 128); 3663 free(r->xtab[(int)*kp].p); 3664 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3665 "%s%s", vp, font ? "\fP" : ""); 3666 } else { 3667 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3668 if (font) 3669 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3670 } 3671 return ROFF_IGN; 3672 } 3673 3674 static int 3675 roff_ec(ROFF_ARGS) 3676 { 3677 const char *p; 3678 3679 p = buf->buf + pos; 3680 if (*p == '\0') 3681 r->escape = '\\'; 3682 else { 3683 r->escape = *p; 3684 if (*++p != '\0') 3685 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3686 (int)(p - buf->buf), "ec ... %s", p); 3687 } 3688 return ROFF_IGN; 3689 } 3690 3691 static int 3692 roff_eo(ROFF_ARGS) 3693 { 3694 r->escape = '\0'; 3695 if (buf->buf[pos] != '\0') 3696 mandoc_msg(MANDOCERR_ARG_SKIP, 3697 ln, pos, "eo %s", buf->buf + pos); 3698 return ROFF_IGN; 3699 } 3700 3701 static int 3702 roff_mc(ROFF_ARGS) 3703 { 3704 struct roff_node *n; 3705 char *cp; 3706 3707 /* Parse the first argument. */ 3708 3709 cp = buf->buf + pos; 3710 if (*cp != '\0') 3711 cp++; 3712 if (buf->buf[pos] == '\\') { 3713 switch (mandoc_escape((const char **)&cp, NULL, NULL)) { 3714 case ESCAPE_SPECIAL: 3715 case ESCAPE_UNICODE: 3716 case ESCAPE_NUMBERED: 3717 break; 3718 default: 3719 *cp = '\0'; 3720 mandoc_msg(MANDOCERR_MC_ESC, ln, pos, 3721 "mc %s", buf->buf + pos); 3722 buf->buf[pos] = '\0'; 3723 break; 3724 } 3725 } 3726 3727 /* Ignore additional arguments. */ 3728 3729 while (*cp == ' ') 3730 *cp++ = '\0'; 3731 if (*cp != '\0') { 3732 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf), 3733 "mc ... %s", cp); 3734 *cp = '\0'; 3735 } 3736 3737 /* Create the .mc node. */ 3738 3739 roff_elem_alloc(r->man, ln, ppos, tok); 3740 n = r->man->last; 3741 if (buf->buf[pos] != '\0') 3742 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3743 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3744 r->man->last = n; 3745 r->man->next = ROFF_NEXT_SIBLING; 3746 return ROFF_IGN; 3747 } 3748 3749 static int 3750 roff_nop(ROFF_ARGS) 3751 { 3752 while (buf->buf[pos] == ' ') 3753 pos++; 3754 *offs = pos; 3755 return ROFF_RERUN; 3756 } 3757 3758 static int 3759 roff_tr(ROFF_ARGS) 3760 { 3761 const char *p, *first, *second; 3762 size_t fsz, ssz; 3763 3764 p = buf->buf + pos; 3765 3766 if (*p == '\0') { 3767 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3768 return ROFF_IGN; 3769 } 3770 3771 while (*p != '\0') { 3772 fsz = ssz = 1; 3773 3774 first = p++; 3775 if (*first == '\\') { 3776 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) 3777 return ROFF_IGN; 3778 fsz = (size_t)(p - first); 3779 } 3780 3781 second = p++; 3782 if (*second == '\\') { 3783 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR) 3784 return ROFF_IGN; 3785 ssz = (size_t)(p - second); 3786 } else if (*second == '\0') { 3787 mandoc_msg(MANDOCERR_TR_ODD, ln, 3788 (int)(first - buf->buf), "tr %s", first); 3789 second = " "; 3790 p--; 3791 } 3792 3793 if (fsz > 1) { 3794 roff_setstrn(&r->xmbtab, first, fsz, 3795 second, ssz, 0); 3796 continue; 3797 } 3798 3799 if (r->xtab == NULL) 3800 r->xtab = mandoc_calloc(128, 3801 sizeof(struct roffstr)); 3802 3803 free(r->xtab[(int)*first].p); 3804 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3805 r->xtab[(int)*first].sz = ssz; 3806 } 3807 3808 return ROFF_IGN; 3809 } 3810 3811 /* 3812 * Implementation of the .return request. 3813 * There is no need to call roff_userret() from here. 3814 * The read module will call that after rewinding the reader stack 3815 * to the place from where the current macro was called. 3816 */ 3817 static int 3818 roff_return(ROFF_ARGS) 3819 { 3820 if (r->mstackpos >= 0) 3821 return ROFF_IGN | ROFF_USERRET; 3822 3823 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3824 return ROFF_IGN; 3825 } 3826 3827 static int 3828 roff_rn(ROFF_ARGS) 3829 { 3830 const char *value; 3831 char *oldn, *newn, *end; 3832 size_t oldsz, newsz; 3833 int deftype; 3834 3835 oldn = newn = buf->buf + pos; 3836 if (*oldn == '\0') 3837 return ROFF_IGN; 3838 3839 oldsz = roff_getname(r, &newn, ln, pos); 3840 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3841 return ROFF_IGN; 3842 3843 end = newn; 3844 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3845 if (newsz == 0) 3846 return ROFF_IGN; 3847 3848 deftype = ROFFDEF_ANY; 3849 value = roff_getstrn(r, oldn, oldsz, &deftype); 3850 switch (deftype) { 3851 case ROFFDEF_USER: 3852 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3853 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3854 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3855 break; 3856 case ROFFDEF_PRE: 3857 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3858 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3859 break; 3860 case ROFFDEF_REN: 3861 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3862 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3863 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3864 break; 3865 case ROFFDEF_STD: 3866 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3867 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3868 break; 3869 default: 3870 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3871 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3872 break; 3873 } 3874 return ROFF_IGN; 3875 } 3876 3877 static int 3878 roff_shift(ROFF_ARGS) 3879 { 3880 struct mctx *ctx; 3881 int argpos, levels, i; 3882 3883 argpos = pos; 3884 levels = 1; 3885 if (buf->buf[pos] != '\0' && 3886 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3887 mandoc_msg(MANDOCERR_CE_NONUM, 3888 ln, pos, "shift %s", buf->buf + pos); 3889 levels = 1; 3890 } 3891 if (r->mstackpos < 0) { 3892 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3893 return ROFF_IGN; 3894 } 3895 ctx = r->mstack + r->mstackpos; 3896 if (levels > ctx->argc) { 3897 mandoc_msg(MANDOCERR_SHIFT, 3898 ln, argpos, "%d, but max is %d", levels, ctx->argc); 3899 levels = ctx->argc; 3900 } 3901 if (levels < 0) { 3902 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels); 3903 levels = 0; 3904 } 3905 if (levels == 0) 3906 return ROFF_IGN; 3907 for (i = 0; i < levels; i++) 3908 free(ctx->argv[i]); 3909 ctx->argc -= levels; 3910 for (i = 0; i < ctx->argc; i++) 3911 ctx->argv[i] = ctx->argv[i + levels]; 3912 return ROFF_IGN; 3913 } 3914 3915 static int 3916 roff_so(ROFF_ARGS) 3917 { 3918 char *name, *cp; 3919 3920 name = buf->buf + pos; 3921 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3922 3923 /* 3924 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3925 * opening anything that's not in our cwd or anything beneath 3926 * it. Thus, explicitly disallow traversing up the file-system 3927 * or using absolute paths. 3928 */ 3929 3930 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3931 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3932 buf->sz = mandoc_asprintf(&cp, 3933 ".sp\nSee the file %s.\n.sp", name) + 1; 3934 free(buf->buf); 3935 buf->buf = cp; 3936 *offs = 0; 3937 return ROFF_REPARSE; 3938 } 3939 3940 *offs = pos; 3941 return ROFF_SO; 3942 } 3943 3944 /* --- user defined strings and macros ------------------------------------ */ 3945 3946 static int 3947 roff_userdef(ROFF_ARGS) 3948 { 3949 struct mctx *ctx; 3950 char *arg, *ap, *dst, *src; 3951 size_t sz; 3952 3953 /* If the macro is empty, ignore it altogether. */ 3954 3955 if (*r->current_string == '\0') 3956 return ROFF_IGN; 3957 3958 /* Initialize a new macro stack context. */ 3959 3960 if (++r->mstackpos == r->mstacksz) { 3961 r->mstack = mandoc_recallocarray(r->mstack, 3962 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3963 r->mstacksz += 8; 3964 } 3965 ctx = r->mstack + r->mstackpos; 3966 ctx->argc = 0; 3967 3968 /* 3969 * Collect pointers to macro argument strings, 3970 * NUL-terminating them and escaping quotes. 3971 */ 3972 3973 src = buf->buf + pos; 3974 while (*src != '\0') { 3975 if (ctx->argc == ctx->argsz) { 3976 ctx->argsz += 8; 3977 ctx->argv = mandoc_reallocarray(ctx->argv, 3978 ctx->argsz, sizeof(*ctx->argv)); 3979 } 3980 arg = roff_getarg(r, &src, ln, &pos); 3981 sz = 1; /* For the terminating NUL. */ 3982 for (ap = arg; *ap != '\0'; ap++) 3983 sz += *ap == '"' ? 4 : 1; 3984 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3985 for (ap = arg; *ap != '\0'; ap++) { 3986 if (*ap == '"') { 3987 memcpy(dst, "\\(dq", 4); 3988 dst += 4; 3989 } else 3990 *dst++ = *ap; 3991 } 3992 *dst = '\0'; 3993 free(arg); 3994 } 3995 3996 /* Replace the macro invocation by the macro definition. */ 3997 3998 free(buf->buf); 3999 buf->buf = mandoc_strdup(r->current_string); 4000 buf->sz = strlen(buf->buf) + 1; 4001 *offs = 0; 4002 4003 return buf->buf[buf->sz - 2] == '\n' ? 4004 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 4005 } 4006 4007 /* 4008 * Calling a high-level macro that was renamed with .rn. 4009 * r->current_string has already been set up by roff_parse(). 4010 */ 4011 static int 4012 roff_renamed(ROFF_ARGS) 4013 { 4014 char *nbuf; 4015 4016 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 4017 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 4018 free(buf->buf); 4019 buf->buf = nbuf; 4020 *offs = 0; 4021 return ROFF_CONT; 4022 } 4023 4024 /* 4025 * Measure the length in bytes of the roff identifier at *cpp 4026 * and advance the pointer to the next word. 4027 */ 4028 static size_t 4029 roff_getname(struct roff *r, char **cpp, int ln, int pos) 4030 { 4031 char *name, *cp; 4032 int namesz, inam, iend; 4033 4034 name = *cpp; 4035 if (*name == '\0') 4036 return 0; 4037 4038 /* Advance cp to the byte after the end of the name. */ 4039 4040 cp = name; 4041 namesz = 0; 4042 for (;;) { 4043 if (*cp == '\0') 4044 break; 4045 if (*cp == ' ' || *cp == '\t') { 4046 cp++; 4047 break; 4048 } 4049 if (*cp != '\\') { 4050 if (name + namesz < cp) { 4051 name[namesz] = *cp; 4052 *cp = ' '; 4053 } 4054 namesz++; 4055 cp++; 4056 continue; 4057 } 4058 if (cp[1] == '{' || cp[1] == '}') 4059 break; 4060 if (roff_escape(cp, 0, 0, NULL, &inam, 4061 NULL, NULL, &iend) != ESCAPE_UNDEF) { 4062 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 4063 "%.*s%.*s", namesz, name, iend, cp); 4064 cp += iend; 4065 break; 4066 } 4067 4068 /* 4069 * In an identifier, \\, \., \G and so on 4070 * are reduced to \, ., G and so on, 4071 * vaguely similar to copy mode. 4072 */ 4073 4074 name[namesz++] = cp[inam]; 4075 while (iend--) { 4076 if (cp >= name + namesz) 4077 *cp = ' '; 4078 cp++; 4079 } 4080 } 4081 4082 /* Read past spaces. */ 4083 4084 while (*cp == ' ') 4085 cp++; 4086 4087 *cpp = cp; 4088 return namesz; 4089 } 4090 4091 /* 4092 * Store *string into the user-defined string called *name. 4093 * To clear an existing entry, call with (*r, *name, NULL, 0). 4094 * append == 0: replace mode 4095 * append == 1: single-line append mode 4096 * append == 2: multiline append mode, append '\n' after each call 4097 */ 4098 static void 4099 roff_setstr(struct roff *r, const char *name, const char *string, 4100 int append) 4101 { 4102 size_t namesz; 4103 4104 namesz = strlen(name); 4105 roff_setstrn(&r->strtab, name, namesz, string, 4106 string ? strlen(string) : 0, append); 4107 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 4108 } 4109 4110 static void 4111 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 4112 const char *string, size_t stringsz, int append) 4113 { 4114 struct roffkv *n; 4115 char *c; 4116 int i; 4117 size_t oldch, newch; 4118 4119 /* Search for an existing string with the same name. */ 4120 n = *r; 4121 4122 while (n && (namesz != n->key.sz || 4123 strncmp(n->key.p, name, namesz))) 4124 n = n->next; 4125 4126 if (NULL == n) { 4127 /* Create a new string table entry. */ 4128 n = mandoc_malloc(sizeof(struct roffkv)); 4129 n->key.p = mandoc_strndup(name, namesz); 4130 n->key.sz = namesz; 4131 n->val.p = NULL; 4132 n->val.sz = 0; 4133 n->next = *r; 4134 *r = n; 4135 } else if (0 == append) { 4136 free(n->val.p); 4137 n->val.p = NULL; 4138 n->val.sz = 0; 4139 } 4140 4141 if (NULL == string) 4142 return; 4143 4144 /* 4145 * One additional byte for the '\n' in multiline mode, 4146 * and one for the terminating '\0'. 4147 */ 4148 newch = stringsz + (1 < append ? 2u : 1u); 4149 4150 if (NULL == n->val.p) { 4151 n->val.p = mandoc_malloc(newch); 4152 *n->val.p = '\0'; 4153 oldch = 0; 4154 } else { 4155 oldch = n->val.sz; 4156 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4157 } 4158 4159 /* Skip existing content in the destination buffer. */ 4160 c = n->val.p + (int)oldch; 4161 4162 /* Append new content to the destination buffer. */ 4163 i = 0; 4164 while (i < (int)stringsz) { 4165 /* 4166 * Rudimentary roff copy mode: 4167 * Handle escaped backslashes. 4168 */ 4169 if ('\\' == string[i] && '\\' == string[i + 1]) 4170 i++; 4171 *c++ = string[i++]; 4172 } 4173 4174 /* Append terminating bytes. */ 4175 if (1 < append) 4176 *c++ = '\n'; 4177 4178 *c = '\0'; 4179 n->val.sz = (int)(c - n->val.p); 4180 } 4181 4182 static const char * 4183 roff_getstrn(struct roff *r, const char *name, size_t len, 4184 int *deftype) 4185 { 4186 const struct roffkv *n; 4187 int found, i; 4188 enum roff_tok tok; 4189 4190 found = 0; 4191 for (n = r->strtab; n != NULL; n = n->next) { 4192 if (strncmp(name, n->key.p, len) != 0 || 4193 n->key.p[len] != '\0' || n->val.p == NULL) 4194 continue; 4195 if (*deftype & ROFFDEF_USER) { 4196 *deftype = ROFFDEF_USER; 4197 return n->val.p; 4198 } else { 4199 found = 1; 4200 break; 4201 } 4202 } 4203 for (n = r->rentab; n != NULL; n = n->next) { 4204 if (strncmp(name, n->key.p, len) != 0 || 4205 n->key.p[len] != '\0' || n->val.p == NULL) 4206 continue; 4207 if (*deftype & ROFFDEF_REN) { 4208 *deftype = ROFFDEF_REN; 4209 return n->val.p; 4210 } else { 4211 found = 1; 4212 break; 4213 } 4214 } 4215 for (i = 0; i < PREDEFS_MAX; i++) { 4216 if (strncmp(name, predefs[i].name, len) != 0 || 4217 predefs[i].name[len] != '\0') 4218 continue; 4219 if (*deftype & ROFFDEF_PRE) { 4220 *deftype = ROFFDEF_PRE; 4221 return predefs[i].str; 4222 } else { 4223 found = 1; 4224 break; 4225 } 4226 } 4227 if (r->man->meta.macroset != MACROSET_MAN) { 4228 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4229 if (strncmp(name, roff_name[tok], len) != 0 || 4230 roff_name[tok][len] != '\0') 4231 continue; 4232 if (*deftype & ROFFDEF_STD) { 4233 *deftype = ROFFDEF_STD; 4234 return NULL; 4235 } else { 4236 found = 1; 4237 break; 4238 } 4239 } 4240 } 4241 if (r->man->meta.macroset != MACROSET_MDOC) { 4242 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4243 if (strncmp(name, roff_name[tok], len) != 0 || 4244 roff_name[tok][len] != '\0') 4245 continue; 4246 if (*deftype & ROFFDEF_STD) { 4247 *deftype = ROFFDEF_STD; 4248 return NULL; 4249 } else { 4250 found = 1; 4251 break; 4252 } 4253 } 4254 } 4255 4256 if (found == 0 && *deftype != ROFFDEF_ANY) { 4257 if (*deftype & ROFFDEF_REN) { 4258 /* 4259 * This might still be a request, 4260 * so do not treat it as undefined yet. 4261 */ 4262 *deftype = ROFFDEF_UNDEF; 4263 return NULL; 4264 } 4265 4266 /* Using an undefined string defines it to be empty. */ 4267 4268 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4269 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4270 } 4271 4272 *deftype = 0; 4273 return NULL; 4274 } 4275 4276 static void 4277 roff_freestr(struct roffkv *r) 4278 { 4279 struct roffkv *n, *nn; 4280 4281 for (n = r; n; n = nn) { 4282 free(n->key.p); 4283 free(n->val.p); 4284 nn = n->next; 4285 free(n); 4286 } 4287 } 4288 4289 /* --- accessors and utility functions ------------------------------------ */ 4290 4291 /* 4292 * Duplicate an input string, making the appropriate character 4293 * conversations (as stipulated by `tr') along the way. 4294 * Returns a heap-allocated string with all the replacements made. 4295 */ 4296 char * 4297 roff_strdup(const struct roff *r, const char *p) 4298 { 4299 const struct roffkv *cp; 4300 char *res; 4301 const char *pp; 4302 size_t ssz, sz; 4303 enum mandoc_esc esc; 4304 4305 if (NULL == r->xmbtab && NULL == r->xtab) 4306 return mandoc_strdup(p); 4307 else if ('\0' == *p) 4308 return mandoc_strdup(""); 4309 4310 /* 4311 * Step through each character looking for term matches 4312 * (remember that a `tr' can be invoked with an escape, which is 4313 * a glyph but the escape is multi-character). 4314 * We only do this if the character hash has been initialised 4315 * and the string is >0 length. 4316 */ 4317 4318 res = NULL; 4319 ssz = 0; 4320 4321 while ('\0' != *p) { 4322 assert((unsigned int)*p < 128); 4323 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4324 sz = r->xtab[(int)*p].sz; 4325 res = mandoc_realloc(res, ssz + sz + 1); 4326 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4327 ssz += sz; 4328 p++; 4329 continue; 4330 } else if ('\\' != *p) { 4331 res = mandoc_realloc(res, ssz + 2); 4332 res[ssz++] = *p++; 4333 continue; 4334 } 4335 4336 /* Search for term matches. */ 4337 for (cp = r->xmbtab; cp; cp = cp->next) 4338 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4339 break; 4340 4341 if (NULL != cp) { 4342 /* 4343 * A match has been found. 4344 * Append the match to the array and move 4345 * forward by its keysize. 4346 */ 4347 res = mandoc_realloc(res, 4348 ssz + cp->val.sz + 1); 4349 memcpy(res + ssz, cp->val.p, cp->val.sz); 4350 ssz += cp->val.sz; 4351 p += (int)cp->key.sz; 4352 continue; 4353 } 4354 4355 /* 4356 * Handle escapes carefully: we need to copy 4357 * over just the escape itself, or else we might 4358 * do replacements within the escape itself. 4359 * Make sure to pass along the bogus string. 4360 */ 4361 pp = p++; 4362 esc = mandoc_escape(&p, NULL, NULL); 4363 if (ESCAPE_ERROR == esc) { 4364 sz = strlen(pp); 4365 res = mandoc_realloc(res, ssz + sz + 1); 4366 memcpy(res + ssz, pp, sz); 4367 break; 4368 } 4369 /* 4370 * We bail out on bad escapes. 4371 * No need to warn: we already did so when 4372 * roff_expand() was called. 4373 */ 4374 sz = (int)(p - pp); 4375 res = mandoc_realloc(res, ssz + sz + 1); 4376 memcpy(res + ssz, pp, sz); 4377 ssz += sz; 4378 } 4379 4380 res[(int)ssz] = '\0'; 4381 return res; 4382 } 4383 4384 int 4385 roff_getformat(const struct roff *r) 4386 { 4387 4388 return r->format; 4389 } 4390 4391 /* 4392 * Find out whether a line is a macro line or not. 4393 * If it is, adjust the current position and return one; if it isn't, 4394 * return zero and don't change the current position. 4395 * If the control character has been set with `.cc', then let that grain 4396 * precedence. 4397 * This is slightly contrary to groff, where using the non-breaking 4398 * control character when `cc' has been invoked will cause the 4399 * non-breaking macro contents to be printed verbatim. 4400 */ 4401 int 4402 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4403 { 4404 int pos; 4405 4406 pos = *ppos; 4407 4408 if (r->control != '\0' && cp[pos] == r->control) 4409 pos++; 4410 else if (r->control != '\0') 4411 return 0; 4412 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4413 pos += 2; 4414 else if ('.' == cp[pos] || '\'' == cp[pos]) 4415 pos++; 4416 else 4417 return 0; 4418 4419 while (' ' == cp[pos] || '\t' == cp[pos]) 4420 pos++; 4421 4422 *ppos = pos; 4423 return 1; 4424 } 4425