1 /* $Id: roff.c,v 1.366 2019/07/01 22:56:24 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 /* 137 * A macro definition, condition, or ignored block. 138 */ 139 struct roffnode { 140 enum roff_tok tok; /* type of node */ 141 struct roffnode *parent; /* up one in stack */ 142 int line; /* parse line */ 143 int col; /* parse col */ 144 char *name; /* node name, e.g. macro name */ 145 char *end; /* custom end macro of the block */ 146 int endspan; /* scope to: 1=eol 2=next line -1=\} */ 147 int rule; /* content is: 1=evaluated 0=skipped */ 148 }; 149 150 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 151 enum roff_tok tok, /* tok of macro */ \ 152 struct buf *buf, /* input buffer */ \ 153 int ln, /* parse line */ \ 154 int ppos, /* original pos in buffer */ \ 155 int pos, /* current pos in buffer */ \ 156 int *offs /* reset offset of buffer data */ 157 158 typedef int (*roffproc)(ROFF_ARGS); 159 160 struct roffmac { 161 roffproc proc; /* process new macro */ 162 roffproc text; /* process as child text of macro */ 163 roffproc sub; /* process as child of macro */ 164 int flags; 165 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 166 }; 167 168 struct predef { 169 const char *name; /* predefined input name */ 170 const char *str; /* replacement symbol */ 171 }; 172 173 #define PREDEF(__name, __str) \ 174 { (__name), (__str) }, 175 176 /* --- function prototypes ------------------------------------------------ */ 177 178 static int roffnode_cleanscope(struct roff *); 179 static int roffnode_pop(struct roff *); 180 static void roffnode_push(struct roff *, enum roff_tok, 181 const char *, int, int); 182 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 183 static int roff_als(ROFF_ARGS); 184 static int roff_block(ROFF_ARGS); 185 static int roff_block_text(ROFF_ARGS); 186 static int roff_block_sub(ROFF_ARGS); 187 static int roff_break(ROFF_ARGS); 188 static int roff_cblock(ROFF_ARGS); 189 static int roff_cc(ROFF_ARGS); 190 static int roff_ccond(struct roff *, int, int); 191 static int roff_char(ROFF_ARGS); 192 static int roff_cond(ROFF_ARGS); 193 static int roff_cond_text(ROFF_ARGS); 194 static int roff_cond_sub(ROFF_ARGS); 195 static int roff_ds(ROFF_ARGS); 196 static int roff_ec(ROFF_ARGS); 197 static int roff_eo(ROFF_ARGS); 198 static int roff_eqndelim(struct roff *, struct buf *, int); 199 static int roff_evalcond(struct roff *r, int, char *, int *); 200 static int roff_evalnum(struct roff *, int, 201 const char *, int *, int *, int); 202 static int roff_evalpar(struct roff *, int, 203 const char *, int *, int *, int); 204 static int roff_evalstrcond(const char *, int *); 205 static int roff_expand(struct roff *, struct buf *, 206 int, int, char); 207 static void roff_free1(struct roff *); 208 static void roff_freereg(struct roffreg *); 209 static void roff_freestr(struct roffkv *); 210 static size_t roff_getname(struct roff *, char **, int, int); 211 static int roff_getnum(const char *, int *, int *, int); 212 static int roff_getop(const char *, int *, char *); 213 static int roff_getregn(struct roff *, 214 const char *, size_t, char); 215 static int roff_getregro(const struct roff *, 216 const char *name); 217 static const char *roff_getstrn(struct roff *, 218 const char *, size_t, int *); 219 static int roff_hasregn(const struct roff *, 220 const char *, size_t); 221 static int roff_insec(ROFF_ARGS); 222 static int roff_it(ROFF_ARGS); 223 static int roff_line_ignore(ROFF_ARGS); 224 static void roff_man_alloc1(struct roff_man *); 225 static void roff_man_free1(struct roff_man *); 226 static int roff_manyarg(ROFF_ARGS); 227 static int roff_noarg(ROFF_ARGS); 228 static int roff_nop(ROFF_ARGS); 229 static int roff_nr(ROFF_ARGS); 230 static int roff_onearg(ROFF_ARGS); 231 static enum roff_tok roff_parse(struct roff *, char *, int *, 232 int, int); 233 static int roff_parsetext(struct roff *, struct buf *, 234 int, int *); 235 static int roff_renamed(ROFF_ARGS); 236 static int roff_return(ROFF_ARGS); 237 static int roff_rm(ROFF_ARGS); 238 static int roff_rn(ROFF_ARGS); 239 static int roff_rr(ROFF_ARGS); 240 static void roff_setregn(struct roff *, const char *, 241 size_t, int, char, int); 242 static void roff_setstr(struct roff *, 243 const char *, const char *, int); 244 static void roff_setstrn(struct roffkv **, const char *, 245 size_t, const char *, size_t, int); 246 static int roff_shift(ROFF_ARGS); 247 static int roff_so(ROFF_ARGS); 248 static int roff_tr(ROFF_ARGS); 249 static int roff_Dd(ROFF_ARGS); 250 static int roff_TE(ROFF_ARGS); 251 static int roff_TS(ROFF_ARGS); 252 static int roff_EQ(ROFF_ARGS); 253 static int roff_EN(ROFF_ARGS); 254 static int roff_T_(ROFF_ARGS); 255 static int roff_unsupp(ROFF_ARGS); 256 static int roff_userdef(ROFF_ARGS); 257 258 /* --- constant data ------------------------------------------------------ */ 259 260 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 261 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 262 263 const char *__roff_name[MAN_MAX + 1] = { 264 "br", "ce", "fi", "ft", 265 "ll", "mc", "nf", 266 "po", "rj", "sp", 267 "ta", "ti", NULL, 268 "ab", "ad", "af", "aln", 269 "als", "am", "am1", "ami", 270 "ami1", "as", "as1", "asciify", 271 "backtrace", "bd", "bleedat", "blm", 272 "box", "boxa", "bp", "BP", 273 "break", "breakchar", "brnl", "brp", 274 "brpnl", "c2", "cc", 275 "cf", "cflags", "ch", "char", 276 "chop", "class", "close", "CL", 277 "color", "composite", "continue", "cp", 278 "cropat", "cs", "cu", "da", 279 "dch", "Dd", "de", "de1", 280 "defcolor", "dei", "dei1", "device", 281 "devicem", "di", "do", "ds", 282 "ds1", "dwh", "dt", "ec", 283 "ecr", "ecs", "el", "em", 284 "EN", "eo", "EP", "EQ", 285 "errprint", "ev", "evc", "ex", 286 "fallback", "fam", "fc", "fchar", 287 "fcolor", "fdeferlig", "feature", "fkern", 288 "fl", "flig", "fp", "fps", 289 "fschar", "fspacewidth", "fspecial", "ftr", 290 "fzoom", "gcolor", "hc", "hcode", 291 "hidechar", "hla", "hlm", "hpf", 292 "hpfa", "hpfcode", "hw", "hy", 293 "hylang", "hylen", "hym", "hypp", 294 "hys", "ie", "if", "ig", 295 "index", "it", "itc", "IX", 296 "kern", "kernafter", "kernbefore", "kernpair", 297 "lc", "lc_ctype", "lds", "length", 298 "letadj", "lf", "lg", "lhang", 299 "linetabs", "lnr", "lnrf", "lpfx", 300 "ls", "lsm", "lt", 301 "mediasize", "minss", "mk", "mso", 302 "na", "ne", "nh", "nhychar", 303 "nm", "nn", "nop", "nr", 304 "nrf", "nroff", "ns", "nx", 305 "open", "opena", "os", "output", 306 "padj", "papersize", "pc", "pev", 307 "pi", "PI", "pl", "pm", 308 "pn", "pnr", "ps", 309 "psbb", "pshape", "pso", "ptr", 310 "pvs", "rchar", "rd", "recursionlimit", 311 "return", "rfschar", "rhang", 312 "rm", "rn", "rnn", "rr", 313 "rs", "rt", "schar", "sentchar", 314 "shc", "shift", "sizes", "so", 315 "spacewidth", "special", "spreadwarn", "ss", 316 "sty", "substring", "sv", "sy", 317 "T&", "tc", "TE", 318 "TH", "tkf", "tl", 319 "tm", "tm1", "tmc", "tr", 320 "track", "transchar", "trf", "trimat", 321 "trin", "trnt", "troff", "TS", 322 "uf", "ul", "unformat", "unwatch", 323 "unwatchn", "vpt", "vs", "warn", 324 "warnscale", "watch", "watchlength", "watchn", 325 "wh", "while", "write", "writec", 326 "writem", "xflag", ".", NULL, 327 NULL, "text", 328 "Dd", "Dt", "Os", "Sh", 329 "Ss", "Pp", "D1", "Dl", 330 "Bd", "Ed", "Bl", "El", 331 "It", "Ad", "An", "Ap", 332 "Ar", "Cd", "Cm", "Dv", 333 "Er", "Ev", "Ex", "Fa", 334 "Fd", "Fl", "Fn", "Ft", 335 "Ic", "In", "Li", "Nd", 336 "Nm", "Op", "Ot", "Pa", 337 "Rv", "St", "Va", "Vt", 338 "Xr", "%A", "%B", "%D", 339 "%I", "%J", "%N", "%O", 340 "%P", "%R", "%T", "%V", 341 "Ac", "Ao", "Aq", "At", 342 "Bc", "Bf", "Bo", "Bq", 343 "Bsx", "Bx", "Db", "Dc", 344 "Do", "Dq", "Ec", "Ef", 345 "Em", "Eo", "Fx", "Ms", 346 "No", "Ns", "Nx", "Ox", 347 "Pc", "Pf", "Po", "Pq", 348 "Qc", "Ql", "Qo", "Qq", 349 "Re", "Rs", "Sc", "So", 350 "Sq", "Sm", "Sx", "Sy", 351 "Tn", "Ux", "Xc", "Xo", 352 "Fo", "Fc", "Oo", "Oc", 353 "Bk", "Ek", "Bt", "Hf", 354 "Fr", "Ud", "Lb", "Lp", 355 "Lk", "Mt", "Brq", "Bro", 356 "Brc", "%C", "Es", "En", 357 "Dx", "%Q", "%U", "Ta", 358 NULL, 359 "TH", "SH", "SS", "TP", 360 "TQ", 361 "LP", "PP", "P", "IP", 362 "HP", "SM", "SB", "BI", 363 "IB", "BR", "RB", "R", 364 "B", "I", "IR", "RI", 365 "RE", "RS", "DT", "UC", 366 "PD", "AT", "in", 367 "SY", "YS", "OP", 368 "EX", "EE", "UR", 369 "UE", "MT", "ME", NULL 370 }; 371 const char *const *roff_name = __roff_name; 372 373 static struct roffmac roffs[TOKEN_NONE] = { 374 { roff_noarg, NULL, NULL, 0 }, /* br */ 375 { roff_onearg, NULL, NULL, 0 }, /* ce */ 376 { roff_noarg, NULL, NULL, 0 }, /* fi */ 377 { roff_onearg, NULL, NULL, 0 }, /* ft */ 378 { roff_onearg, NULL, NULL, 0 }, /* ll */ 379 { roff_onearg, NULL, NULL, 0 }, /* mc */ 380 { roff_noarg, NULL, NULL, 0 }, /* nf */ 381 { roff_onearg, NULL, NULL, 0 }, /* po */ 382 { roff_onearg, NULL, NULL, 0 }, /* rj */ 383 { roff_onearg, NULL, NULL, 0 }, /* sp */ 384 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 385 { roff_onearg, NULL, NULL, 0 }, /* ti */ 386 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 387 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 388 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 389 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 390 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 391 { roff_als, NULL, NULL, 0 }, /* als */ 392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 396 { roff_ds, NULL, NULL, 0 }, /* as */ 397 { roff_ds, NULL, NULL, 0 }, /* as1 */ 398 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 399 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 400 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 402 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 403 { roff_unsupp, NULL, NULL, 0 }, /* box */ 404 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 406 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 407 { roff_break, NULL, NULL, 0 }, /* break */ 408 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 409 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 410 { roff_noarg, NULL, NULL, 0 }, /* brp */ 411 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 412 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 413 { roff_cc, NULL, NULL, 0 }, /* cc */ 414 { roff_insec, NULL, NULL, 0 }, /* cf */ 415 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 416 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 417 { roff_char, NULL, NULL, 0 }, /* char */ 418 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 419 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 420 { roff_insec, NULL, NULL, 0 }, /* close */ 421 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 423 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 424 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 425 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 426 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 427 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 428 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 429 { roff_unsupp, NULL, NULL, 0 }, /* da */ 430 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 431 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 434 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 437 { roff_unsupp, NULL, NULL, 0 }, /* device */ 438 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 439 { roff_unsupp, NULL, NULL, 0 }, /* di */ 440 { roff_unsupp, NULL, NULL, 0 }, /* do */ 441 { roff_ds, NULL, NULL, 0 }, /* ds */ 442 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 443 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 444 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 445 { roff_ec, NULL, NULL, 0 }, /* ec */ 446 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 447 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 449 { roff_unsupp, NULL, NULL, 0 }, /* em */ 450 { roff_EN, NULL, NULL, 0 }, /* EN */ 451 { roff_eo, NULL, NULL, 0 }, /* eo */ 452 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 453 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 454 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 455 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 456 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 457 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 460 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 461 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 466 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 470 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 487 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 488 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 489 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 490 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 493 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 494 { roff_unsupp, NULL, NULL, 0 }, /* index */ 495 { roff_it, NULL, NULL, 0 }, /* it */ 496 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 498 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 499 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 500 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 501 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 502 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 503 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 504 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 505 { roff_unsupp, NULL, NULL, 0 }, /* length */ 506 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 507 { roff_insec, NULL, NULL, 0 }, /* lf */ 508 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 509 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 510 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 512 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 513 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 515 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 516 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 520 { roff_insec, NULL, NULL, 0 }, /* mso */ 521 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 522 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 523 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 524 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 525 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 526 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 527 { roff_nop, NULL, NULL, 0 }, /* nop */ 528 { roff_nr, NULL, NULL, 0 }, /* nr */ 529 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 530 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 532 { roff_insec, NULL, NULL, 0 }, /* nx */ 533 { roff_insec, NULL, NULL, 0 }, /* open */ 534 { roff_insec, NULL, NULL, 0 }, /* opena */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 536 { roff_unsupp, NULL, NULL, 0 }, /* output */ 537 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 538 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 541 { roff_insec, NULL, NULL, 0 }, /* pi */ 542 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 544 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 545 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 546 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 548 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 549 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 550 { roff_insec, NULL, NULL, 0 }, /* pso */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 552 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 553 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 555 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 556 { roff_return, NULL, NULL, 0 }, /* return */ 557 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 558 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 559 { roff_rm, NULL, NULL, 0 }, /* rm */ 560 { roff_rn, NULL, NULL, 0 }, /* rn */ 561 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 562 { roff_rr, NULL, NULL, 0 }, /* rr */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 564 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 565 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 566 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 568 { roff_shift, NULL, NULL, 0 }, /* shift */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 570 { roff_so, NULL, NULL, 0 }, /* so */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 572 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 574 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 575 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 576 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 577 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 578 { roff_insec, NULL, NULL, 0 }, /* sy */ 579 { roff_T_, NULL, NULL, 0 }, /* T& */ 580 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 581 { roff_TE, NULL, NULL, 0 }, /* TE */ 582 { roff_Dd, NULL, NULL, 0 }, /* TH */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 584 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 587 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 588 { roff_tr, NULL, NULL, 0 }, /* tr */ 589 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 590 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 591 { roff_insec, NULL, NULL, 0 }, /* trf */ 592 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 593 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 594 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 595 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 596 { roff_TS, NULL, NULL, 0 }, /* TS */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 599 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 605 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 606 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 607 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 608 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 609 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 610 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 611 { roff_insec, NULL, NULL, 0 }, /* write */ 612 { roff_insec, NULL, NULL, 0 }, /* writec */ 613 { roff_insec, NULL, NULL, 0 }, /* writem */ 614 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 615 { roff_cblock, NULL, NULL, 0 }, /* . */ 616 { roff_renamed, NULL, NULL, 0 }, 617 { roff_userdef, NULL, NULL, 0 } 618 }; 619 620 /* Array of injected predefined strings. */ 621 #define PREDEFS_MAX 38 622 static const struct predef predefs[PREDEFS_MAX] = { 623 #include "predefs.in" 624 }; 625 626 static int roffce_lines; /* number of input lines to center */ 627 static struct roff_node *roffce_node; /* active request */ 628 static int roffit_lines; /* number of lines to delay */ 629 static char *roffit_macro; /* nil-terminated macro line */ 630 631 632 /* --- request table ------------------------------------------------------ */ 633 634 struct ohash * 635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 636 { 637 struct ohash *htab; 638 struct roffreq *req; 639 enum roff_tok tok; 640 size_t sz; 641 unsigned int slot; 642 643 htab = mandoc_malloc(sizeof(*htab)); 644 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 645 646 for (tok = mintok; tok < maxtok; tok++) { 647 if (roff_name[tok] == NULL) 648 continue; 649 sz = strlen(roff_name[tok]); 650 req = mandoc_malloc(sizeof(*req) + sz + 1); 651 req->tok = tok; 652 memcpy(req->name, roff_name[tok], sz + 1); 653 slot = ohash_qlookup(htab, req->name); 654 ohash_insert(htab, slot, req); 655 } 656 return htab; 657 } 658 659 void 660 roffhash_free(struct ohash *htab) 661 { 662 struct roffreq *req; 663 unsigned int slot; 664 665 if (htab == NULL) 666 return; 667 for (req = ohash_first(htab, &slot); req != NULL; 668 req = ohash_next(htab, &slot)) 669 free(req); 670 ohash_delete(htab); 671 free(htab); 672 } 673 674 enum roff_tok 675 roffhash_find(struct ohash *htab, const char *name, size_t sz) 676 { 677 struct roffreq *req; 678 const char *end; 679 680 if (sz) { 681 end = name + sz; 682 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 683 } else 684 req = ohash_find(htab, ohash_qlookup(htab, name)); 685 return req == NULL ? TOKEN_NONE : req->tok; 686 } 687 688 /* --- stack of request blocks -------------------------------------------- */ 689 690 /* 691 * Pop the current node off of the stack of roff instructions currently 692 * pending. Return 1 if it is a loop or 0 otherwise. 693 */ 694 static int 695 roffnode_pop(struct roff *r) 696 { 697 struct roffnode *p; 698 int inloop; 699 700 p = r->last; 701 inloop = p->tok == ROFF_while; 702 r->last = p->parent; 703 free(p->name); 704 free(p->end); 705 free(p); 706 return inloop; 707 } 708 709 /* 710 * Push a roff node onto the instruction stack. This must later be 711 * removed with roffnode_pop(). 712 */ 713 static void 714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 715 int line, int col) 716 { 717 struct roffnode *p; 718 719 p = mandoc_calloc(1, sizeof(struct roffnode)); 720 p->tok = tok; 721 if (name) 722 p->name = mandoc_strdup(name); 723 p->parent = r->last; 724 p->line = line; 725 p->col = col; 726 p->rule = p->parent ? p->parent->rule : 0; 727 728 r->last = p; 729 } 730 731 /* --- roff parser state data management ---------------------------------- */ 732 733 static void 734 roff_free1(struct roff *r) 735 { 736 int i; 737 738 tbl_free(r->first_tbl); 739 r->first_tbl = r->last_tbl = r->tbl = NULL; 740 741 eqn_free(r->last_eqn); 742 r->last_eqn = r->eqn = NULL; 743 744 while (r->mstackpos >= 0) 745 roff_userret(r); 746 747 while (r->last) 748 roffnode_pop(r); 749 750 free (r->rstack); 751 r->rstack = NULL; 752 r->rstacksz = 0; 753 r->rstackpos = -1; 754 755 roff_freereg(r->regtab); 756 r->regtab = NULL; 757 758 roff_freestr(r->strtab); 759 roff_freestr(r->rentab); 760 roff_freestr(r->xmbtab); 761 r->strtab = r->rentab = r->xmbtab = NULL; 762 763 if (r->xtab) 764 for (i = 0; i < 128; i++) 765 free(r->xtab[i].p); 766 free(r->xtab); 767 r->xtab = NULL; 768 } 769 770 void 771 roff_reset(struct roff *r) 772 { 773 roff_free1(r); 774 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 775 r->control = '\0'; 776 r->escape = '\\'; 777 roffce_lines = 0; 778 roffce_node = NULL; 779 roffit_lines = 0; 780 roffit_macro = NULL; 781 } 782 783 void 784 roff_free(struct roff *r) 785 { 786 int i; 787 788 roff_free1(r); 789 for (i = 0; i < r->mstacksz; i++) 790 free(r->mstack[i].argv); 791 free(r->mstack); 792 roffhash_free(r->reqtab); 793 free(r); 794 } 795 796 struct roff * 797 roff_alloc(int options) 798 { 799 struct roff *r; 800 801 r = mandoc_calloc(1, sizeof(struct roff)); 802 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 803 r->options = options; 804 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 805 r->mstackpos = -1; 806 r->rstackpos = -1; 807 r->escape = '\\'; 808 return r; 809 } 810 811 /* --- syntax tree state data management ---------------------------------- */ 812 813 static void 814 roff_man_free1(struct roff_man *man) 815 { 816 if (man->meta.first != NULL) 817 roff_node_delete(man, man->meta.first); 818 free(man->meta.msec); 819 free(man->meta.vol); 820 free(man->meta.os); 821 free(man->meta.arch); 822 free(man->meta.title); 823 free(man->meta.name); 824 free(man->meta.date); 825 free(man->meta.sodest); 826 } 827 828 void 829 roff_state_reset(struct roff_man *man) 830 { 831 man->last = man->meta.first; 832 man->last_es = NULL; 833 man->flags = 0; 834 man->lastsec = man->lastnamed = SEC_NONE; 835 man->next = ROFF_NEXT_CHILD; 836 roff_setreg(man->roff, "nS", 0, '='); 837 } 838 839 static void 840 roff_man_alloc1(struct roff_man *man) 841 { 842 memset(&man->meta, 0, sizeof(man->meta)); 843 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 844 man->meta.first->type = ROFFT_ROOT; 845 man->meta.macroset = MACROSET_NONE; 846 roff_state_reset(man); 847 } 848 849 void 850 roff_man_reset(struct roff_man *man) 851 { 852 roff_man_free1(man); 853 roff_man_alloc1(man); 854 } 855 856 void 857 roff_man_free(struct roff_man *man) 858 { 859 roff_man_free1(man); 860 free(man); 861 } 862 863 struct roff_man * 864 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 865 { 866 struct roff_man *man; 867 868 man = mandoc_calloc(1, sizeof(*man)); 869 man->roff = roff; 870 man->os_s = os_s; 871 man->quick = quick; 872 roff_man_alloc1(man); 873 roff->man = man; 874 return man; 875 } 876 877 /* --- syntax tree handling ----------------------------------------------- */ 878 879 struct roff_node * 880 roff_node_alloc(struct roff_man *man, int line, int pos, 881 enum roff_type type, int tok) 882 { 883 struct roff_node *n; 884 885 n = mandoc_calloc(1, sizeof(*n)); 886 n->line = line; 887 n->pos = pos; 888 n->tok = tok; 889 n->type = type; 890 n->sec = man->lastsec; 891 892 if (man->flags & MDOC_SYNOPSIS) 893 n->flags |= NODE_SYNPRETTY; 894 else 895 n->flags &= ~NODE_SYNPRETTY; 896 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 897 n->flags |= NODE_NOFILL; 898 else 899 n->flags &= ~NODE_NOFILL; 900 if (man->flags & MDOC_NEWLINE) 901 n->flags |= NODE_LINE; 902 man->flags &= ~MDOC_NEWLINE; 903 904 return n; 905 } 906 907 void 908 roff_node_append(struct roff_man *man, struct roff_node *n) 909 { 910 911 switch (man->next) { 912 case ROFF_NEXT_SIBLING: 913 if (man->last->next != NULL) { 914 n->next = man->last->next; 915 man->last->next->prev = n; 916 } else 917 man->last->parent->last = n; 918 man->last->next = n; 919 n->prev = man->last; 920 n->parent = man->last->parent; 921 break; 922 case ROFF_NEXT_CHILD: 923 if (man->last->child != NULL) { 924 n->next = man->last->child; 925 man->last->child->prev = n; 926 } else 927 man->last->last = n; 928 man->last->child = n; 929 n->parent = man->last; 930 break; 931 default: 932 abort(); 933 } 934 man->last = n; 935 936 switch (n->type) { 937 case ROFFT_HEAD: 938 n->parent->head = n; 939 break; 940 case ROFFT_BODY: 941 if (n->end != ENDBODY_NOT) 942 return; 943 n->parent->body = n; 944 break; 945 case ROFFT_TAIL: 946 n->parent->tail = n; 947 break; 948 default: 949 return; 950 } 951 952 /* 953 * Copy over the normalised-data pointer of our parent. Not 954 * everybody has one, but copying a null pointer is fine. 955 */ 956 957 n->norm = n->parent->norm; 958 assert(n->parent->type == ROFFT_BLOCK); 959 } 960 961 void 962 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 963 { 964 struct roff_node *n; 965 966 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 967 n->string = roff_strdup(man->roff, word); 968 roff_node_append(man, n); 969 n->flags |= NODE_VALID | NODE_ENDED; 970 man->next = ROFF_NEXT_SIBLING; 971 } 972 973 void 974 roff_word_append(struct roff_man *man, const char *word) 975 { 976 struct roff_node *n; 977 char *addstr, *newstr; 978 979 n = man->last; 980 addstr = roff_strdup(man->roff, word); 981 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 982 free(addstr); 983 free(n->string); 984 n->string = newstr; 985 man->next = ROFF_NEXT_SIBLING; 986 } 987 988 void 989 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 990 { 991 struct roff_node *n; 992 993 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 994 roff_node_append(man, n); 995 man->next = ROFF_NEXT_CHILD; 996 } 997 998 struct roff_node * 999 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1000 { 1001 struct roff_node *n; 1002 1003 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1004 roff_node_append(man, n); 1005 man->next = ROFF_NEXT_CHILD; 1006 return n; 1007 } 1008 1009 struct roff_node * 1010 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1011 { 1012 struct roff_node *n; 1013 1014 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1015 roff_node_append(man, n); 1016 man->next = ROFF_NEXT_CHILD; 1017 return n; 1018 } 1019 1020 struct roff_node * 1021 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1022 { 1023 struct roff_node *n; 1024 1025 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1026 roff_node_append(man, n); 1027 man->next = ROFF_NEXT_CHILD; 1028 return n; 1029 } 1030 1031 static void 1032 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1033 { 1034 struct roff_node *n; 1035 struct tbl_span *span; 1036 1037 if (man->meta.macroset == MACROSET_MAN) 1038 man_breakscope(man, ROFF_TS); 1039 while ((span = tbl_span(tbl)) != NULL) { 1040 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1041 n->span = span; 1042 roff_node_append(man, n); 1043 n->flags |= NODE_VALID | NODE_ENDED; 1044 man->next = ROFF_NEXT_SIBLING; 1045 } 1046 } 1047 1048 void 1049 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1050 { 1051 1052 /* Adjust siblings. */ 1053 1054 if (n->prev) 1055 n->prev->next = n->next; 1056 if (n->next) 1057 n->next->prev = n->prev; 1058 1059 /* Adjust parent. */ 1060 1061 if (n->parent != NULL) { 1062 if (n->parent->child == n) 1063 n->parent->child = n->next; 1064 if (n->parent->last == n) 1065 n->parent->last = n->prev; 1066 } 1067 1068 /* Adjust parse point. */ 1069 1070 if (man == NULL) 1071 return; 1072 if (man->last == n) { 1073 if (n->prev == NULL) { 1074 man->last = n->parent; 1075 man->next = ROFF_NEXT_CHILD; 1076 } else { 1077 man->last = n->prev; 1078 man->next = ROFF_NEXT_SIBLING; 1079 } 1080 } 1081 if (man->meta.first == n) 1082 man->meta.first = NULL; 1083 } 1084 1085 void 1086 roff_node_relink(struct roff_man *man, struct roff_node *n) 1087 { 1088 roff_node_unlink(man, n); 1089 n->prev = n->next = NULL; 1090 roff_node_append(man, n); 1091 } 1092 1093 void 1094 roff_node_free(struct roff_node *n) 1095 { 1096 1097 if (n->args != NULL) 1098 mdoc_argv_free(n->args); 1099 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1100 free(n->norm); 1101 eqn_box_free(n->eqn); 1102 free(n->string); 1103 free(n); 1104 } 1105 1106 void 1107 roff_node_delete(struct roff_man *man, struct roff_node *n) 1108 { 1109 1110 while (n->child != NULL) 1111 roff_node_delete(man, n->child); 1112 roff_node_unlink(man, n); 1113 roff_node_free(n); 1114 } 1115 1116 void 1117 deroff(char **dest, const struct roff_node *n) 1118 { 1119 char *cp; 1120 size_t sz; 1121 1122 if (n->type != ROFFT_TEXT) { 1123 for (n = n->child; n != NULL; n = n->next) 1124 deroff(dest, n); 1125 return; 1126 } 1127 1128 /* Skip leading whitespace. */ 1129 1130 for (cp = n->string; *cp != '\0'; cp++) { 1131 if (cp[0] == '\\' && cp[1] != '\0' && 1132 strchr(" %&0^|~", cp[1]) != NULL) 1133 cp++; 1134 else if ( ! isspace((unsigned char)*cp)) 1135 break; 1136 } 1137 1138 /* Skip trailing backslash. */ 1139 1140 sz = strlen(cp); 1141 if (sz > 0 && cp[sz - 1] == '\\') 1142 sz--; 1143 1144 /* Skip trailing whitespace. */ 1145 1146 for (; sz; sz--) 1147 if ( ! isspace((unsigned char)cp[sz-1])) 1148 break; 1149 1150 /* Skip empty strings. */ 1151 1152 if (sz == 0) 1153 return; 1154 1155 if (*dest == NULL) { 1156 *dest = mandoc_strndup(cp, sz); 1157 return; 1158 } 1159 1160 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1161 free(*dest); 1162 *dest = cp; 1163 } 1164 1165 /* --- main functions of the roff parser ---------------------------------- */ 1166 1167 /* 1168 * In the current line, expand escape sequences that produce parsable 1169 * input text. Also check the syntax of the remaining escape sequences, 1170 * which typically produce output glyphs or change formatter state. 1171 */ 1172 static int 1173 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1174 { 1175 struct mctx *ctx; /* current macro call context */ 1176 char ubuf[24]; /* buffer to print the number */ 1177 struct roff_node *n; /* used for header comments */ 1178 const char *start; /* start of the string to process */ 1179 char *stesc; /* start of an escape sequence ('\\') */ 1180 const char *esct; /* type of esccape sequence */ 1181 char *ep; /* end of comment string */ 1182 const char *stnam; /* start of the name, after "[(*" */ 1183 const char *cp; /* end of the name, e.g. before ']' */ 1184 const char *res; /* the string to be substituted */ 1185 char *nbuf; /* new buffer to copy buf->buf to */ 1186 size_t maxl; /* expected length of the escape name */ 1187 size_t naml; /* actual length of the escape name */ 1188 size_t asz; /* length of the replacement */ 1189 size_t rsz; /* length of the rest of the string */ 1190 int inaml; /* length returned from mandoc_escape() */ 1191 int expand_count; /* to avoid infinite loops */ 1192 int npos; /* position in numeric expression */ 1193 int arg_complete; /* argument not interrupted by eol */ 1194 int quote_args; /* true for \\$@, false for \\$* */ 1195 int done; /* no more input available */ 1196 int deftype; /* type of definition to paste */ 1197 int rcsid; /* kind of RCS id seen */ 1198 enum mandocerr err; /* for escape sequence problems */ 1199 char sign; /* increment number register */ 1200 char term; /* character terminating the escape */ 1201 1202 /* Search forward for comments. */ 1203 1204 done = 0; 1205 start = buf->buf + pos; 1206 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1207 if (stesc[0] != newesc || stesc[1] == '\0') 1208 continue; 1209 stesc++; 1210 if (*stesc != '"' && *stesc != '#') 1211 continue; 1212 1213 /* Comment found, look for RCS id. */ 1214 1215 rcsid = 0; 1216 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1217 rcsid = 1 << MANDOC_OS_OPENBSD; 1218 cp += 8; 1219 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1220 rcsid = 1 << MANDOC_OS_NETBSD; 1221 cp += 7; 1222 } 1223 if (cp != NULL && 1224 isalnum((unsigned char)*cp) == 0 && 1225 strchr(cp, '$') != NULL) { 1226 if (r->man->meta.rcsids & rcsid) 1227 mandoc_msg(MANDOCERR_RCS_REP, ln, 1228 (int)(stesc - buf->buf) + 1, 1229 "%s", stesc + 1); 1230 r->man->meta.rcsids |= rcsid; 1231 } 1232 1233 /* Handle trailing whitespace. */ 1234 1235 ep = strchr(stesc--, '\0') - 1; 1236 if (*ep == '\n') { 1237 done = 1; 1238 ep--; 1239 } 1240 if (*ep == ' ' || *ep == '\t') 1241 mandoc_msg(MANDOCERR_SPACE_EOL, 1242 ln, (int)(ep - buf->buf), NULL); 1243 1244 /* 1245 * Save comments preceding the title macro 1246 * in the syntax tree. 1247 */ 1248 1249 if (newesc != ASCII_ESC && r->format == 0) { 1250 while (*ep == ' ' || *ep == '\t') 1251 ep--; 1252 ep[1] = '\0'; 1253 n = roff_node_alloc(r->man, 1254 ln, stesc + 1 - buf->buf, 1255 ROFFT_COMMENT, TOKEN_NONE); 1256 n->string = mandoc_strdup(stesc + 2); 1257 roff_node_append(r->man, n); 1258 n->flags |= NODE_VALID | NODE_ENDED; 1259 r->man->next = ROFF_NEXT_SIBLING; 1260 } 1261 1262 /* Line continuation with comment. */ 1263 1264 if (stesc[1] == '#') { 1265 *stesc = '\0'; 1266 return ROFF_IGN | ROFF_APPEND; 1267 } 1268 1269 /* Discard normal comments. */ 1270 1271 while (stesc > start && stesc[-1] == ' ' && 1272 (stesc == start + 1 || stesc[-2] != '\\')) 1273 stesc--; 1274 *stesc = '\0'; 1275 break; 1276 } 1277 if (stesc == start) 1278 return ROFF_CONT; 1279 stesc--; 1280 1281 /* Notice the end of the input. */ 1282 1283 if (*stesc == '\n') { 1284 *stesc-- = '\0'; 1285 done = 1; 1286 } 1287 1288 expand_count = 0; 1289 while (stesc >= start) { 1290 if (*stesc != newesc) { 1291 1292 /* 1293 * If we have a non-standard escape character, 1294 * escape literal backslashes because all 1295 * processing in subsequent functions uses 1296 * the standard escaping rules. 1297 */ 1298 1299 if (newesc != ASCII_ESC && *stesc == '\\') { 1300 *stesc = '\0'; 1301 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1302 buf->buf, stesc + 1) + 1; 1303 start = nbuf + pos; 1304 stesc = nbuf + (stesc - buf->buf); 1305 free(buf->buf); 1306 buf->buf = nbuf; 1307 } 1308 1309 /* Search backwards for the next escape. */ 1310 1311 stesc--; 1312 continue; 1313 } 1314 1315 /* If it is escaped, skip it. */ 1316 1317 for (cp = stesc - 1; cp >= start; cp--) 1318 if (*cp != r->escape) 1319 break; 1320 1321 if ((stesc - cp) % 2 == 0) { 1322 while (stesc > cp) 1323 *stesc-- = '\\'; 1324 continue; 1325 } else if (stesc[1] != '\0') { 1326 *stesc = '\\'; 1327 } else { 1328 *stesc-- = '\0'; 1329 if (done) 1330 continue; 1331 else 1332 return ROFF_IGN | ROFF_APPEND; 1333 } 1334 1335 /* Decide whether to expand or to check only. */ 1336 1337 term = '\0'; 1338 cp = stesc + 1; 1339 if (*cp == 'E') 1340 cp++; 1341 esct = cp; 1342 switch (*esct) { 1343 case '*': 1344 case '$': 1345 res = NULL; 1346 break; 1347 case 'B': 1348 case 'w': 1349 term = cp[1]; 1350 /* FALLTHROUGH */ 1351 case 'n': 1352 sign = cp[1]; 1353 if (sign == '+' || sign == '-') 1354 cp++; 1355 res = ubuf; 1356 break; 1357 default: 1358 err = MANDOCERR_OK; 1359 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1360 case ESCAPE_SPECIAL: 1361 if (mchars_spec2cp(stnam, inaml) >= 0) 1362 break; 1363 /* FALLTHROUGH */ 1364 case ESCAPE_ERROR: 1365 err = MANDOCERR_ESC_BAD; 1366 break; 1367 case ESCAPE_UNDEF: 1368 err = MANDOCERR_ESC_UNDEF; 1369 break; 1370 case ESCAPE_UNSUPP: 1371 err = MANDOCERR_ESC_UNSUPP; 1372 break; 1373 default: 1374 break; 1375 } 1376 if (err != MANDOCERR_OK) 1377 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1378 "%.*s", (int)(cp - stesc), stesc); 1379 stesc--; 1380 continue; 1381 } 1382 1383 if (EXPAND_LIMIT < ++expand_count) { 1384 mandoc_msg(MANDOCERR_ROFFLOOP, 1385 ln, (int)(stesc - buf->buf), NULL); 1386 return ROFF_IGN; 1387 } 1388 1389 /* 1390 * The third character decides the length 1391 * of the name of the string or register. 1392 * Save a pointer to the name. 1393 */ 1394 1395 if (term == '\0') { 1396 switch (*++cp) { 1397 case '\0': 1398 maxl = 0; 1399 break; 1400 case '(': 1401 cp++; 1402 maxl = 2; 1403 break; 1404 case '[': 1405 cp++; 1406 term = ']'; 1407 maxl = 0; 1408 break; 1409 default: 1410 maxl = 1; 1411 break; 1412 } 1413 } else { 1414 cp += 2; 1415 maxl = 0; 1416 } 1417 stnam = cp; 1418 1419 /* Advance to the end of the name. */ 1420 1421 naml = 0; 1422 arg_complete = 1; 1423 while (maxl == 0 || naml < maxl) { 1424 if (*cp == '\0') { 1425 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1426 (int)(stesc - buf->buf), "%s", stesc); 1427 arg_complete = 0; 1428 break; 1429 } 1430 if (maxl == 0 && *cp == term) { 1431 cp++; 1432 break; 1433 } 1434 if (*cp++ != '\\' || *esct != 'w') { 1435 naml++; 1436 continue; 1437 } 1438 switch (mandoc_escape(&cp, NULL, NULL)) { 1439 case ESCAPE_SPECIAL: 1440 case ESCAPE_UNICODE: 1441 case ESCAPE_NUMBERED: 1442 case ESCAPE_UNDEF: 1443 case ESCAPE_OVERSTRIKE: 1444 naml++; 1445 break; 1446 default: 1447 break; 1448 } 1449 } 1450 1451 /* 1452 * Retrieve the replacement string; if it is 1453 * undefined, resume searching for escapes. 1454 */ 1455 1456 switch (*esct) { 1457 case '*': 1458 if (arg_complete) { 1459 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1460 res = roff_getstrn(r, stnam, naml, &deftype); 1461 1462 /* 1463 * If not overriden, let \*(.T 1464 * through to the formatters. 1465 */ 1466 1467 if (res == NULL && naml == 2 && 1468 stnam[0] == '.' && stnam[1] == 'T') { 1469 roff_setstrn(&r->strtab, 1470 ".T", 2, NULL, 0, 0); 1471 stesc--; 1472 continue; 1473 } 1474 } 1475 break; 1476 case '$': 1477 if (r->mstackpos < 0) { 1478 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1479 (int)(stesc - buf->buf), "%.3s", stesc); 1480 break; 1481 } 1482 ctx = r->mstack + r->mstackpos; 1483 npos = esct[1] - '1'; 1484 if (npos >= 0 && npos <= 8) { 1485 res = npos < ctx->argc ? 1486 ctx->argv[npos] : ""; 1487 break; 1488 } 1489 if (esct[1] == '*') 1490 quote_args = 0; 1491 else if (esct[1] == '@') 1492 quote_args = 1; 1493 else { 1494 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1495 (int)(stesc - buf->buf), "%.3s", stesc); 1496 break; 1497 } 1498 asz = 0; 1499 for (npos = 0; npos < ctx->argc; npos++) { 1500 if (npos) 1501 asz++; /* blank */ 1502 if (quote_args) 1503 asz += 2; /* quotes */ 1504 asz += strlen(ctx->argv[npos]); 1505 } 1506 if (asz != 3) { 1507 rsz = buf->sz - (stesc - buf->buf) - 3; 1508 if (asz < 3) 1509 memmove(stesc + asz, stesc + 3, rsz); 1510 buf->sz += asz - 3; 1511 nbuf = mandoc_realloc(buf->buf, buf->sz); 1512 start = nbuf + pos; 1513 stesc = nbuf + (stesc - buf->buf); 1514 buf->buf = nbuf; 1515 if (asz > 3) 1516 memmove(stesc + asz, stesc + 3, rsz); 1517 } 1518 for (npos = 0; npos < ctx->argc; npos++) { 1519 if (npos) 1520 *stesc++ = ' '; 1521 if (quote_args) 1522 *stesc++ = '"'; 1523 cp = ctx->argv[npos]; 1524 while (*cp != '\0') 1525 *stesc++ = *cp++; 1526 if (quote_args) 1527 *stesc++ = '"'; 1528 } 1529 continue; 1530 case 'B': 1531 npos = 0; 1532 ubuf[0] = arg_complete && 1533 roff_evalnum(r, ln, stnam, &npos, 1534 NULL, ROFFNUM_SCALE) && 1535 stnam + npos + 1 == cp ? '1' : '0'; 1536 ubuf[1] = '\0'; 1537 break; 1538 case 'n': 1539 if (arg_complete) 1540 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1541 roff_getregn(r, stnam, naml, sign)); 1542 else 1543 ubuf[0] = '\0'; 1544 break; 1545 case 'w': 1546 /* use even incomplete args */ 1547 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1548 24 * (int)naml); 1549 break; 1550 } 1551 1552 if (res == NULL) { 1553 if (*esct == '*') 1554 mandoc_msg(MANDOCERR_STR_UNDEF, 1555 ln, (int)(stesc - buf->buf), 1556 "%.*s", (int)naml, stnam); 1557 res = ""; 1558 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1559 mandoc_msg(MANDOCERR_ROFFLOOP, 1560 ln, (int)(stesc - buf->buf), NULL); 1561 return ROFF_IGN; 1562 } 1563 1564 /* Replace the escape sequence by the string. */ 1565 1566 *stesc = '\0'; 1567 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1568 buf->buf, res, cp) + 1; 1569 1570 /* Prepare for the next replacement. */ 1571 1572 start = nbuf + pos; 1573 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1574 free(buf->buf); 1575 buf->buf = nbuf; 1576 } 1577 return ROFF_CONT; 1578 } 1579 1580 /* 1581 * Parse a quoted or unquoted roff-style request or macro argument. 1582 * Return a pointer to the parsed argument, which is either the original 1583 * pointer or advanced by one byte in case the argument is quoted. 1584 * NUL-terminate the argument in place. 1585 * Collapse pairs of quotes inside quoted arguments. 1586 * Advance the argument pointer to the next argument, 1587 * or to the NUL byte terminating the argument line. 1588 */ 1589 char * 1590 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1591 { 1592 struct buf buf; 1593 char *cp, *start; 1594 int newesc, pairs, quoted, white; 1595 1596 /* Quoting can only start with a new word. */ 1597 start = *cpp; 1598 quoted = 0; 1599 if ('"' == *start) { 1600 quoted = 1; 1601 start++; 1602 } 1603 1604 newesc = pairs = white = 0; 1605 for (cp = start; '\0' != *cp; cp++) { 1606 1607 /* 1608 * Move the following text left 1609 * after quoted quotes and after "\\" and "\t". 1610 */ 1611 if (pairs) 1612 cp[-pairs] = cp[0]; 1613 1614 if ('\\' == cp[0]) { 1615 /* 1616 * In copy mode, translate double to single 1617 * backslashes and backslash-t to literal tabs. 1618 */ 1619 switch (cp[1]) { 1620 case 'a': 1621 case 't': 1622 cp[-pairs] = '\t'; 1623 pairs++; 1624 cp++; 1625 break; 1626 case '\\': 1627 newesc = 1; 1628 cp[-pairs] = ASCII_ESC; 1629 pairs++; 1630 cp++; 1631 break; 1632 case ' ': 1633 /* Skip escaped blanks. */ 1634 if (0 == quoted) 1635 cp++; 1636 break; 1637 default: 1638 break; 1639 } 1640 } else if (0 == quoted) { 1641 if (' ' == cp[0]) { 1642 /* Unescaped blanks end unquoted args. */ 1643 white = 1; 1644 break; 1645 } 1646 } else if ('"' == cp[0]) { 1647 if ('"' == cp[1]) { 1648 /* Quoted quotes collapse. */ 1649 pairs++; 1650 cp++; 1651 } else { 1652 /* Unquoted quotes end quoted args. */ 1653 quoted = 2; 1654 break; 1655 } 1656 } 1657 } 1658 1659 /* Quoted argument without a closing quote. */ 1660 if (1 == quoted) 1661 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1662 1663 /* NUL-terminate this argument and move to the next one. */ 1664 if (pairs) 1665 cp[-pairs] = '\0'; 1666 if ('\0' != *cp) { 1667 *cp++ = '\0'; 1668 while (' ' == *cp) 1669 cp++; 1670 } 1671 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1672 *cpp = cp; 1673 1674 if ('\0' == *cp && (white || ' ' == cp[-1])) 1675 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1676 1677 start = mandoc_strdup(start); 1678 if (newesc == 0) 1679 return start; 1680 1681 buf.buf = start; 1682 buf.sz = strlen(start) + 1; 1683 buf.next = NULL; 1684 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1685 free(buf.buf); 1686 buf.buf = mandoc_strdup(""); 1687 } 1688 return buf.buf; 1689 } 1690 1691 1692 /* 1693 * Process text streams. 1694 */ 1695 static int 1696 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1697 { 1698 size_t sz; 1699 const char *start; 1700 char *p; 1701 int isz; 1702 enum mandoc_esc esc; 1703 1704 /* Spring the input line trap. */ 1705 1706 if (roffit_lines == 1) { 1707 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1708 free(buf->buf); 1709 buf->buf = p; 1710 buf->sz = isz + 1; 1711 *offs = 0; 1712 free(roffit_macro); 1713 roffit_lines = 0; 1714 return ROFF_REPARSE; 1715 } else if (roffit_lines > 1) 1716 --roffit_lines; 1717 1718 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1719 if (roffce_lines < 1) { 1720 r->man->last = roffce_node; 1721 r->man->next = ROFF_NEXT_SIBLING; 1722 roffce_lines = 0; 1723 roffce_node = NULL; 1724 } else 1725 roffce_lines--; 1726 } 1727 1728 /* Convert all breakable hyphens into ASCII_HYPH. */ 1729 1730 start = p = buf->buf + pos; 1731 1732 while (*p != '\0') { 1733 sz = strcspn(p, "-\\"); 1734 p += sz; 1735 1736 if (*p == '\0') 1737 break; 1738 1739 if (*p == '\\') { 1740 /* Skip over escapes. */ 1741 p++; 1742 esc = mandoc_escape((const char **)&p, NULL, NULL); 1743 if (esc == ESCAPE_ERROR) 1744 break; 1745 while (*p == '-') 1746 p++; 1747 continue; 1748 } else if (p == start) { 1749 p++; 1750 continue; 1751 } 1752 1753 if (isalpha((unsigned char)p[-1]) && 1754 isalpha((unsigned char)p[1])) 1755 *p = ASCII_HYPH; 1756 p++; 1757 } 1758 return ROFF_CONT; 1759 } 1760 1761 int 1762 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1763 { 1764 enum roff_tok t; 1765 int e; 1766 int pos; /* parse point */ 1767 int spos; /* saved parse point for messages */ 1768 int ppos; /* original offset in buf->buf */ 1769 int ctl; /* macro line (boolean) */ 1770 1771 ppos = pos = *offs; 1772 1773 /* Handle in-line equation delimiters. */ 1774 1775 if (r->tbl == NULL && 1776 r->last_eqn != NULL && r->last_eqn->delim && 1777 (r->eqn == NULL || r->eqn_inline)) { 1778 e = roff_eqndelim(r, buf, pos); 1779 if (e == ROFF_REPARSE) 1780 return e; 1781 assert(e == ROFF_CONT); 1782 } 1783 1784 /* Expand some escape sequences. */ 1785 1786 e = roff_expand(r, buf, ln, pos, r->escape); 1787 if ((e & ROFF_MASK) == ROFF_IGN) 1788 return e; 1789 assert(e == ROFF_CONT); 1790 1791 ctl = roff_getcontrol(r, buf->buf, &pos); 1792 1793 /* 1794 * First, if a scope is open and we're not a macro, pass the 1795 * text through the macro's filter. 1796 * Equations process all content themselves. 1797 * Tables process almost all content themselves, but we want 1798 * to warn about macros before passing it there. 1799 */ 1800 1801 if (r->last != NULL && ! ctl) { 1802 t = r->last->tok; 1803 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1804 if ((e & ROFF_MASK) == ROFF_IGN) 1805 return e; 1806 e &= ~ROFF_MASK; 1807 } else 1808 e = ROFF_IGN; 1809 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1810 eqn_read(r->eqn, buf->buf + ppos); 1811 return e; 1812 } 1813 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1814 tbl_read(r->tbl, ln, buf->buf, ppos); 1815 roff_addtbl(r->man, ln, r->tbl); 1816 return e; 1817 } 1818 if ( ! ctl) 1819 return roff_parsetext(r, buf, pos, offs) | e; 1820 1821 /* Skip empty request lines. */ 1822 1823 if (buf->buf[pos] == '"') { 1824 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1825 return ROFF_IGN; 1826 } else if (buf->buf[pos] == '\0') 1827 return ROFF_IGN; 1828 1829 /* 1830 * If a scope is open, go to the child handler for that macro, 1831 * as it may want to preprocess before doing anything with it. 1832 * Don't do so if an equation is open. 1833 */ 1834 1835 if (r->last) { 1836 t = r->last->tok; 1837 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1838 } 1839 1840 /* No scope is open. This is a new request or macro. */ 1841 1842 spos = pos; 1843 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1844 1845 /* Tables ignore most macros. */ 1846 1847 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1848 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1849 mandoc_msg(MANDOCERR_TBLMACRO, 1850 ln, pos, "%s", buf->buf + spos); 1851 if (t != TOKEN_NONE) 1852 return ROFF_IGN; 1853 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1854 pos++; 1855 while (buf->buf[pos] == ' ') 1856 pos++; 1857 tbl_read(r->tbl, ln, buf->buf, pos); 1858 roff_addtbl(r->man, ln, r->tbl); 1859 return ROFF_IGN; 1860 } 1861 1862 /* For now, let high level macros abort .ce mode. */ 1863 1864 if (ctl && roffce_node != NULL && 1865 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1866 t == ROFF_TH || t == ROFF_TS)) { 1867 r->man->last = roffce_node; 1868 r->man->next = ROFF_NEXT_SIBLING; 1869 roffce_lines = 0; 1870 roffce_node = NULL; 1871 } 1872 1873 /* 1874 * This is neither a roff request nor a user-defined macro. 1875 * Let the standard macro set parsers handle it. 1876 */ 1877 1878 if (t == TOKEN_NONE) 1879 return ROFF_CONT; 1880 1881 /* Execute a roff request or a user defined macro. */ 1882 1883 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1884 } 1885 1886 /* 1887 * Internal interface function to tell the roff parser that execution 1888 * of the current macro ended. This is required because macro 1889 * definitions usually do not end with a .return request. 1890 */ 1891 void 1892 roff_userret(struct roff *r) 1893 { 1894 struct mctx *ctx; 1895 int i; 1896 1897 assert(r->mstackpos >= 0); 1898 ctx = r->mstack + r->mstackpos; 1899 for (i = 0; i < ctx->argc; i++) 1900 free(ctx->argv[i]); 1901 ctx->argc = 0; 1902 r->mstackpos--; 1903 } 1904 1905 void 1906 roff_endparse(struct roff *r) 1907 { 1908 if (r->last != NULL) 1909 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1910 r->last->col, "%s", roff_name[r->last->tok]); 1911 1912 if (r->eqn != NULL) { 1913 mandoc_msg(MANDOCERR_BLK_NOEND, 1914 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1915 eqn_parse(r->eqn); 1916 r->eqn = NULL; 1917 } 1918 1919 if (r->tbl != NULL) { 1920 tbl_end(r->tbl, 1); 1921 r->tbl = NULL; 1922 } 1923 } 1924 1925 /* 1926 * Parse a roff node's type from the input buffer. This must be in the 1927 * form of ".foo xxx" in the usual way. 1928 */ 1929 static enum roff_tok 1930 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1931 { 1932 char *cp; 1933 const char *mac; 1934 size_t maclen; 1935 int deftype; 1936 enum roff_tok t; 1937 1938 cp = buf + *pos; 1939 1940 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1941 return TOKEN_NONE; 1942 1943 mac = cp; 1944 maclen = roff_getname(r, &cp, ln, ppos); 1945 1946 deftype = ROFFDEF_USER | ROFFDEF_REN; 1947 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1948 switch (deftype) { 1949 case ROFFDEF_USER: 1950 t = ROFF_USERDEF; 1951 break; 1952 case ROFFDEF_REN: 1953 t = ROFF_RENAMED; 1954 break; 1955 default: 1956 t = roffhash_find(r->reqtab, mac, maclen); 1957 break; 1958 } 1959 if (t != TOKEN_NONE) 1960 *pos = cp - buf; 1961 else if (deftype == ROFFDEF_UNDEF) { 1962 /* Using an undefined macro defines it to be empty. */ 1963 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1964 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1965 } 1966 return t; 1967 } 1968 1969 /* --- handling of request blocks ----------------------------------------- */ 1970 1971 static int 1972 roff_cblock(ROFF_ARGS) 1973 { 1974 1975 /* 1976 * A block-close `..' should only be invoked as a child of an 1977 * ignore macro, otherwise raise a warning and just ignore it. 1978 */ 1979 1980 if (r->last == NULL) { 1981 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1982 return ROFF_IGN; 1983 } 1984 1985 switch (r->last->tok) { 1986 case ROFF_am: 1987 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1988 case ROFF_ami: 1989 case ROFF_de: 1990 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1991 case ROFF_dei: 1992 case ROFF_ig: 1993 break; 1994 default: 1995 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1996 return ROFF_IGN; 1997 } 1998 1999 if (buf->buf[pos] != '\0') 2000 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 2001 ".. %s", buf->buf + pos); 2002 2003 roffnode_pop(r); 2004 roffnode_cleanscope(r); 2005 return ROFF_IGN; 2006 2007 } 2008 2009 /* 2010 * Pop all nodes ending at the end of the current input line. 2011 * Return the number of loops ended. 2012 */ 2013 static int 2014 roffnode_cleanscope(struct roff *r) 2015 { 2016 int inloop; 2017 2018 inloop = 0; 2019 while (r->last != NULL) { 2020 if (--r->last->endspan != 0) 2021 break; 2022 inloop += roffnode_pop(r); 2023 } 2024 return inloop; 2025 } 2026 2027 /* 2028 * Handle the closing \} of a conditional block. 2029 * Apart from generating warnings, this only pops nodes. 2030 * Return the number of loops ended. 2031 */ 2032 static int 2033 roff_ccond(struct roff *r, int ln, int ppos) 2034 { 2035 if (NULL == r->last) { 2036 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2037 return 0; 2038 } 2039 2040 switch (r->last->tok) { 2041 case ROFF_el: 2042 case ROFF_ie: 2043 case ROFF_if: 2044 case ROFF_while: 2045 break; 2046 default: 2047 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2048 return 0; 2049 } 2050 2051 if (r->last->endspan > -1) { 2052 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2053 return 0; 2054 } 2055 2056 return roffnode_pop(r) + roffnode_cleanscope(r); 2057 } 2058 2059 static int 2060 roff_block(ROFF_ARGS) 2061 { 2062 const char *name, *value; 2063 char *call, *cp, *iname, *rname; 2064 size_t csz, namesz, rsz; 2065 int deftype; 2066 2067 /* Ignore groff compatibility mode for now. */ 2068 2069 if (tok == ROFF_de1) 2070 tok = ROFF_de; 2071 else if (tok == ROFF_dei1) 2072 tok = ROFF_dei; 2073 else if (tok == ROFF_am1) 2074 tok = ROFF_am; 2075 else if (tok == ROFF_ami1) 2076 tok = ROFF_ami; 2077 2078 /* Parse the macro name argument. */ 2079 2080 cp = buf->buf + pos; 2081 if (tok == ROFF_ig) { 2082 iname = NULL; 2083 namesz = 0; 2084 } else { 2085 iname = cp; 2086 namesz = roff_getname(r, &cp, ln, ppos); 2087 iname[namesz] = '\0'; 2088 } 2089 2090 /* Resolve the macro name argument if it is indirect. */ 2091 2092 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2093 deftype = ROFFDEF_USER; 2094 name = roff_getstrn(r, iname, namesz, &deftype); 2095 if (name == NULL) { 2096 mandoc_msg(MANDOCERR_STR_UNDEF, 2097 ln, (int)(iname - buf->buf), 2098 "%.*s", (int)namesz, iname); 2099 namesz = 0; 2100 } else 2101 namesz = strlen(name); 2102 } else 2103 name = iname; 2104 2105 if (namesz == 0 && tok != ROFF_ig) { 2106 mandoc_msg(MANDOCERR_REQ_EMPTY, 2107 ln, ppos, "%s", roff_name[tok]); 2108 return ROFF_IGN; 2109 } 2110 2111 roffnode_push(r, tok, name, ln, ppos); 2112 2113 /* 2114 * At the beginning of a `de' macro, clear the existing string 2115 * with the same name, if there is one. New content will be 2116 * appended from roff_block_text() in multiline mode. 2117 */ 2118 2119 if (tok == ROFF_de || tok == ROFF_dei) { 2120 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2121 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2122 } else if (tok == ROFF_am || tok == ROFF_ami) { 2123 deftype = ROFFDEF_ANY; 2124 value = roff_getstrn(r, iname, namesz, &deftype); 2125 switch (deftype) { /* Before appending, ... */ 2126 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2127 roff_setstrn(&r->strtab, name, namesz, 2128 value, strlen(value), 0); 2129 break; 2130 case ROFFDEF_REN: /* call original standard macro. */ 2131 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2132 (int)strlen(value), value); 2133 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2134 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2135 free(call); 2136 break; 2137 case ROFFDEF_STD: /* rename and call standard macro. */ 2138 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2139 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2140 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2141 (int)rsz, rname); 2142 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2143 free(call); 2144 free(rname); 2145 break; 2146 default: 2147 break; 2148 } 2149 } 2150 2151 if (*cp == '\0') 2152 return ROFF_IGN; 2153 2154 /* Get the custom end marker. */ 2155 2156 iname = cp; 2157 namesz = roff_getname(r, &cp, ln, ppos); 2158 2159 /* Resolve the end marker if it is indirect. */ 2160 2161 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2162 deftype = ROFFDEF_USER; 2163 name = roff_getstrn(r, iname, namesz, &deftype); 2164 if (name == NULL) { 2165 mandoc_msg(MANDOCERR_STR_UNDEF, 2166 ln, (int)(iname - buf->buf), 2167 "%.*s", (int)namesz, iname); 2168 namesz = 0; 2169 } else 2170 namesz = strlen(name); 2171 } else 2172 name = iname; 2173 2174 if (namesz) 2175 r->last->end = mandoc_strndup(name, namesz); 2176 2177 if (*cp != '\0') 2178 mandoc_msg(MANDOCERR_ARG_EXCESS, 2179 ln, pos, ".%s ... %s", roff_name[tok], cp); 2180 2181 return ROFF_IGN; 2182 } 2183 2184 static int 2185 roff_block_sub(ROFF_ARGS) 2186 { 2187 enum roff_tok t; 2188 int i, j; 2189 2190 /* 2191 * First check whether a custom macro exists at this level. If 2192 * it does, then check against it. This is some of groff's 2193 * stranger behaviours. If we encountered a custom end-scope 2194 * tag and that tag also happens to be a "real" macro, then we 2195 * need to try interpreting it again as a real macro. If it's 2196 * not, then return ignore. Else continue. 2197 */ 2198 2199 if (r->last->end) { 2200 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2201 if (buf->buf[i] != r->last->end[j]) 2202 break; 2203 2204 if (r->last->end[j] == '\0' && 2205 (buf->buf[i] == '\0' || 2206 buf->buf[i] == ' ' || 2207 buf->buf[i] == '\t')) { 2208 roffnode_pop(r); 2209 roffnode_cleanscope(r); 2210 2211 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2212 i++; 2213 2214 pos = i; 2215 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2216 TOKEN_NONE) 2217 return ROFF_RERUN; 2218 return ROFF_IGN; 2219 } 2220 } 2221 2222 /* 2223 * If we have no custom end-query or lookup failed, then try 2224 * pulling it out of the hashtable. 2225 */ 2226 2227 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2228 2229 if (t != ROFF_cblock) { 2230 if (tok != ROFF_ig) 2231 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2232 return ROFF_IGN; 2233 } 2234 2235 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2236 } 2237 2238 static int 2239 roff_block_text(ROFF_ARGS) 2240 { 2241 2242 if (tok != ROFF_ig) 2243 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2244 2245 return ROFF_IGN; 2246 } 2247 2248 static int 2249 roff_cond_sub(ROFF_ARGS) 2250 { 2251 struct roffnode *bl; 2252 char *ep; 2253 int endloop, irc, rr; 2254 enum roff_tok t; 2255 2256 irc = ROFF_IGN; 2257 rr = r->last->rule; 2258 endloop = tok != ROFF_while ? ROFF_IGN : 2259 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2260 if (roffnode_cleanscope(r)) 2261 irc |= endloop; 2262 2263 /* 2264 * If `\}' occurs on a macro line without a preceding macro, 2265 * drop the line completely. 2266 */ 2267 2268 ep = buf->buf + pos; 2269 if (ep[0] == '\\' && ep[1] == '}') 2270 rr = 0; 2271 2272 /* 2273 * The closing delimiter `\}' rewinds the conditional scope 2274 * but is otherwise ignored when interpreting the line. 2275 */ 2276 2277 while ((ep = strchr(ep, '\\')) != NULL) { 2278 switch (ep[1]) { 2279 case '}': 2280 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2281 if (roff_ccond(r, ln, ep - buf->buf)) 2282 irc |= endloop; 2283 break; 2284 case '\0': 2285 ++ep; 2286 break; 2287 default: 2288 ep += 2; 2289 break; 2290 } 2291 } 2292 2293 /* 2294 * Fully handle known macros when they are structurally 2295 * required or when the conditional evaluated to true. 2296 */ 2297 2298 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2299 if (t == ROFF_break) { 2300 if (irc & ROFF_LOOPMASK) 2301 irc = ROFF_IGN | ROFF_LOOPEXIT; 2302 else if (rr) { 2303 for (bl = r->last; bl != NULL; bl = bl->parent) { 2304 bl->rule = 0; 2305 if (bl->tok == ROFF_while) 2306 break; 2307 } 2308 } 2309 } else if (t != TOKEN_NONE && 2310 (rr || roffs[t].flags & ROFFMAC_STRUCT)) 2311 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2312 else 2313 irc |= rr ? ROFF_CONT : ROFF_IGN; 2314 return irc; 2315 } 2316 2317 static int 2318 roff_cond_text(ROFF_ARGS) 2319 { 2320 char *ep; 2321 int endloop, irc, rr; 2322 2323 irc = ROFF_IGN; 2324 rr = r->last->rule; 2325 endloop = tok != ROFF_while ? ROFF_IGN : 2326 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2327 if (roffnode_cleanscope(r)) 2328 irc |= endloop; 2329 2330 /* 2331 * If `\}' occurs on a text line with neither preceding 2332 * nor following characters, drop the line completely. 2333 */ 2334 2335 ep = buf->buf + pos; 2336 if (strcmp(ep, "\\}") == 0) 2337 rr = 0; 2338 2339 /* 2340 * The closing delimiter `\}' rewinds the conditional scope 2341 * but is otherwise ignored when interpreting the line. 2342 */ 2343 2344 while ((ep = strchr(ep, '\\')) != NULL) { 2345 switch (ep[1]) { 2346 case '}': 2347 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2348 if (roff_ccond(r, ln, ep - buf->buf)) 2349 irc |= endloop; 2350 break; 2351 case '\0': 2352 ++ep; 2353 break; 2354 default: 2355 ep += 2; 2356 break; 2357 } 2358 } 2359 if (rr) 2360 irc |= ROFF_CONT; 2361 return irc; 2362 } 2363 2364 /* --- handling of numeric and conditional expressions -------------------- */ 2365 2366 /* 2367 * Parse a single signed integer number. Stop at the first non-digit. 2368 * If there is at least one digit, return success and advance the 2369 * parse point, else return failure and let the parse point unchanged. 2370 * Ignore overflows, treat them just like the C language. 2371 */ 2372 static int 2373 roff_getnum(const char *v, int *pos, int *res, int flags) 2374 { 2375 int myres, scaled, n, p; 2376 2377 if (NULL == res) 2378 res = &myres; 2379 2380 p = *pos; 2381 n = v[p] == '-'; 2382 if (n || v[p] == '+') 2383 p++; 2384 2385 if (flags & ROFFNUM_WHITE) 2386 while (isspace((unsigned char)v[p])) 2387 p++; 2388 2389 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2390 *res = 10 * *res + v[p] - '0'; 2391 if (p == *pos + n) 2392 return 0; 2393 2394 if (n) 2395 *res = -*res; 2396 2397 /* Each number may be followed by one optional scaling unit. */ 2398 2399 switch (v[p]) { 2400 case 'f': 2401 scaled = *res * 65536; 2402 break; 2403 case 'i': 2404 scaled = *res * 240; 2405 break; 2406 case 'c': 2407 scaled = *res * 240 / 2.54; 2408 break; 2409 case 'v': 2410 case 'P': 2411 scaled = *res * 40; 2412 break; 2413 case 'm': 2414 case 'n': 2415 scaled = *res * 24; 2416 break; 2417 case 'p': 2418 scaled = *res * 10 / 3; 2419 break; 2420 case 'u': 2421 scaled = *res; 2422 break; 2423 case 'M': 2424 scaled = *res * 6 / 25; 2425 break; 2426 default: 2427 scaled = *res; 2428 p--; 2429 break; 2430 } 2431 if (flags & ROFFNUM_SCALE) 2432 *res = scaled; 2433 2434 *pos = p + 1; 2435 return 1; 2436 } 2437 2438 /* 2439 * Evaluate a string comparison condition. 2440 * The first character is the delimiter. 2441 * Succeed if the string up to its second occurrence 2442 * matches the string up to its third occurence. 2443 * Advance the cursor after the third occurrence 2444 * or lacking that, to the end of the line. 2445 */ 2446 static int 2447 roff_evalstrcond(const char *v, int *pos) 2448 { 2449 const char *s1, *s2, *s3; 2450 int match; 2451 2452 match = 0; 2453 s1 = v + *pos; /* initial delimiter */ 2454 s2 = s1 + 1; /* for scanning the first string */ 2455 s3 = strchr(s2, *s1); /* for scanning the second string */ 2456 2457 if (NULL == s3) /* found no middle delimiter */ 2458 goto out; 2459 2460 while ('\0' != *++s3) { 2461 if (*s2 != *s3) { /* mismatch */ 2462 s3 = strchr(s3, *s1); 2463 break; 2464 } 2465 if (*s3 == *s1) { /* found the final delimiter */ 2466 match = 1; 2467 break; 2468 } 2469 s2++; 2470 } 2471 2472 out: 2473 if (NULL == s3) 2474 s3 = strchr(s2, '\0'); 2475 else if (*s3 != '\0') 2476 s3++; 2477 *pos = s3 - v; 2478 return match; 2479 } 2480 2481 /* 2482 * Evaluate an optionally negated single character, numerical, 2483 * or string condition. 2484 */ 2485 static int 2486 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2487 { 2488 const char *start, *end; 2489 char *cp, *name; 2490 size_t sz; 2491 int deftype, len, number, savepos, istrue, wanttrue; 2492 2493 if ('!' == v[*pos]) { 2494 wanttrue = 0; 2495 (*pos)++; 2496 } else 2497 wanttrue = 1; 2498 2499 switch (v[*pos]) { 2500 case '\0': 2501 return 0; 2502 case 'n': 2503 case 'o': 2504 (*pos)++; 2505 return wanttrue; 2506 case 'e': 2507 case 't': 2508 case 'v': 2509 (*pos)++; 2510 return !wanttrue; 2511 case 'c': 2512 do { 2513 (*pos)++; 2514 } while (v[*pos] == ' '); 2515 2516 /* 2517 * Quirk for groff compatibility: 2518 * The horizontal tab is neither available nor unavailable. 2519 */ 2520 2521 if (v[*pos] == '\t') { 2522 (*pos)++; 2523 return 0; 2524 } 2525 2526 /* Printable ASCII characters are available. */ 2527 2528 if (v[*pos] != '\\') { 2529 (*pos)++; 2530 return wanttrue; 2531 } 2532 2533 end = v + ++*pos; 2534 switch (mandoc_escape(&end, &start, &len)) { 2535 case ESCAPE_SPECIAL: 2536 istrue = mchars_spec2cp(start, len) != -1; 2537 break; 2538 case ESCAPE_UNICODE: 2539 istrue = 1; 2540 break; 2541 case ESCAPE_NUMBERED: 2542 istrue = mchars_num2char(start, len) != -1; 2543 break; 2544 default: 2545 istrue = !wanttrue; 2546 break; 2547 } 2548 *pos = end - v; 2549 return istrue == wanttrue; 2550 case 'd': 2551 case 'r': 2552 cp = v + *pos + 1; 2553 while (*cp == ' ') 2554 cp++; 2555 name = cp; 2556 sz = roff_getname(r, &cp, ln, cp - v); 2557 if (sz == 0) 2558 istrue = 0; 2559 else if (v[*pos] == 'r') 2560 istrue = roff_hasregn(r, name, sz); 2561 else { 2562 deftype = ROFFDEF_ANY; 2563 roff_getstrn(r, name, sz, &deftype); 2564 istrue = !!deftype; 2565 } 2566 *pos = (name + sz) - v; 2567 return istrue == wanttrue; 2568 default: 2569 break; 2570 } 2571 2572 savepos = *pos; 2573 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2574 return (number > 0) == wanttrue; 2575 else if (*pos == savepos) 2576 return roff_evalstrcond(v, pos) == wanttrue; 2577 else 2578 return 0; 2579 } 2580 2581 static int 2582 roff_line_ignore(ROFF_ARGS) 2583 { 2584 2585 return ROFF_IGN; 2586 } 2587 2588 static int 2589 roff_insec(ROFF_ARGS) 2590 { 2591 2592 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2593 return ROFF_IGN; 2594 } 2595 2596 static int 2597 roff_unsupp(ROFF_ARGS) 2598 { 2599 2600 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2601 return ROFF_IGN; 2602 } 2603 2604 static int 2605 roff_cond(ROFF_ARGS) 2606 { 2607 int irc; 2608 2609 roffnode_push(r, tok, NULL, ln, ppos); 2610 2611 /* 2612 * An `.el' has no conditional body: it will consume the value 2613 * of the current rstack entry set in prior `ie' calls or 2614 * defaults to DENY. 2615 * 2616 * If we're not an `el', however, then evaluate the conditional. 2617 */ 2618 2619 r->last->rule = tok == ROFF_el ? 2620 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2621 roff_evalcond(r, ln, buf->buf, &pos); 2622 2623 /* 2624 * An if-else will put the NEGATION of the current evaluated 2625 * conditional into the stack of rules. 2626 */ 2627 2628 if (tok == ROFF_ie) { 2629 if (r->rstackpos + 1 == r->rstacksz) { 2630 r->rstacksz += 16; 2631 r->rstack = mandoc_reallocarray(r->rstack, 2632 r->rstacksz, sizeof(int)); 2633 } 2634 r->rstack[++r->rstackpos] = !r->last->rule; 2635 } 2636 2637 /* If the parent has false as its rule, then so do we. */ 2638 2639 if (r->last->parent && !r->last->parent->rule) 2640 r->last->rule = 0; 2641 2642 /* 2643 * Determine scope. 2644 * If there is nothing on the line after the conditional, 2645 * not even whitespace, use next-line scope. 2646 * Except that .while does not support next-line scope. 2647 */ 2648 2649 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2650 r->last->endspan = 2; 2651 goto out; 2652 } 2653 2654 while (buf->buf[pos] == ' ') 2655 pos++; 2656 2657 /* An opening brace requests multiline scope. */ 2658 2659 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2660 r->last->endspan = -1; 2661 pos += 2; 2662 while (buf->buf[pos] == ' ') 2663 pos++; 2664 goto out; 2665 } 2666 2667 /* 2668 * Anything else following the conditional causes 2669 * single-line scope. Warn if the scope contains 2670 * nothing but trailing whitespace. 2671 */ 2672 2673 if (buf->buf[pos] == '\0') 2674 mandoc_msg(MANDOCERR_COND_EMPTY, 2675 ln, ppos, "%s", roff_name[tok]); 2676 2677 r->last->endspan = 1; 2678 2679 out: 2680 *offs = pos; 2681 irc = ROFF_RERUN; 2682 if (tok == ROFF_while) 2683 irc |= ROFF_WHILE; 2684 return irc; 2685 } 2686 2687 static int 2688 roff_ds(ROFF_ARGS) 2689 { 2690 char *string; 2691 const char *name; 2692 size_t namesz; 2693 2694 /* Ignore groff compatibility mode for now. */ 2695 2696 if (tok == ROFF_ds1) 2697 tok = ROFF_ds; 2698 else if (tok == ROFF_as1) 2699 tok = ROFF_as; 2700 2701 /* 2702 * The first word is the name of the string. 2703 * If it is empty or terminated by an escape sequence, 2704 * abort the `ds' request without defining anything. 2705 */ 2706 2707 name = string = buf->buf + pos; 2708 if (*name == '\0') 2709 return ROFF_IGN; 2710 2711 namesz = roff_getname(r, &string, ln, pos); 2712 switch (name[namesz]) { 2713 case '\\': 2714 return ROFF_IGN; 2715 case '\t': 2716 string = buf->buf + pos + namesz; 2717 break; 2718 default: 2719 break; 2720 } 2721 2722 /* Read past the initial double-quote, if any. */ 2723 if (*string == '"') 2724 string++; 2725 2726 /* The rest is the value. */ 2727 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2728 ROFF_as == tok); 2729 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2730 return ROFF_IGN; 2731 } 2732 2733 /* 2734 * Parse a single operator, one or two characters long. 2735 * If the operator is recognized, return success and advance the 2736 * parse point, else return failure and let the parse point unchanged. 2737 */ 2738 static int 2739 roff_getop(const char *v, int *pos, char *res) 2740 { 2741 2742 *res = v[*pos]; 2743 2744 switch (*res) { 2745 case '+': 2746 case '-': 2747 case '*': 2748 case '/': 2749 case '%': 2750 case '&': 2751 case ':': 2752 break; 2753 case '<': 2754 switch (v[*pos + 1]) { 2755 case '=': 2756 *res = 'l'; 2757 (*pos)++; 2758 break; 2759 case '>': 2760 *res = '!'; 2761 (*pos)++; 2762 break; 2763 case '?': 2764 *res = 'i'; 2765 (*pos)++; 2766 break; 2767 default: 2768 break; 2769 } 2770 break; 2771 case '>': 2772 switch (v[*pos + 1]) { 2773 case '=': 2774 *res = 'g'; 2775 (*pos)++; 2776 break; 2777 case '?': 2778 *res = 'a'; 2779 (*pos)++; 2780 break; 2781 default: 2782 break; 2783 } 2784 break; 2785 case '=': 2786 if ('=' == v[*pos + 1]) 2787 (*pos)++; 2788 break; 2789 default: 2790 return 0; 2791 } 2792 (*pos)++; 2793 2794 return *res; 2795 } 2796 2797 /* 2798 * Evaluate either a parenthesized numeric expression 2799 * or a single signed integer number. 2800 */ 2801 static int 2802 roff_evalpar(struct roff *r, int ln, 2803 const char *v, int *pos, int *res, int flags) 2804 { 2805 2806 if ('(' != v[*pos]) 2807 return roff_getnum(v, pos, res, flags); 2808 2809 (*pos)++; 2810 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2811 return 0; 2812 2813 /* 2814 * Omission of the closing parenthesis 2815 * is an error in validation mode, 2816 * but ignored in evaluation mode. 2817 */ 2818 2819 if (')' == v[*pos]) 2820 (*pos)++; 2821 else if (NULL == res) 2822 return 0; 2823 2824 return 1; 2825 } 2826 2827 /* 2828 * Evaluate a complete numeric expression. 2829 * Proceed left to right, there is no concept of precedence. 2830 */ 2831 static int 2832 roff_evalnum(struct roff *r, int ln, const char *v, 2833 int *pos, int *res, int flags) 2834 { 2835 int mypos, operand2; 2836 char operator; 2837 2838 if (NULL == pos) { 2839 mypos = 0; 2840 pos = &mypos; 2841 } 2842 2843 if (flags & ROFFNUM_WHITE) 2844 while (isspace((unsigned char)v[*pos])) 2845 (*pos)++; 2846 2847 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2848 return 0; 2849 2850 while (1) { 2851 if (flags & ROFFNUM_WHITE) 2852 while (isspace((unsigned char)v[*pos])) 2853 (*pos)++; 2854 2855 if ( ! roff_getop(v, pos, &operator)) 2856 break; 2857 2858 if (flags & ROFFNUM_WHITE) 2859 while (isspace((unsigned char)v[*pos])) 2860 (*pos)++; 2861 2862 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2863 return 0; 2864 2865 if (flags & ROFFNUM_WHITE) 2866 while (isspace((unsigned char)v[*pos])) 2867 (*pos)++; 2868 2869 if (NULL == res) 2870 continue; 2871 2872 switch (operator) { 2873 case '+': 2874 *res += operand2; 2875 break; 2876 case '-': 2877 *res -= operand2; 2878 break; 2879 case '*': 2880 *res *= operand2; 2881 break; 2882 case '/': 2883 if (operand2 == 0) { 2884 mandoc_msg(MANDOCERR_DIVZERO, 2885 ln, *pos, "%s", v); 2886 *res = 0; 2887 break; 2888 } 2889 *res /= operand2; 2890 break; 2891 case '%': 2892 if (operand2 == 0) { 2893 mandoc_msg(MANDOCERR_DIVZERO, 2894 ln, *pos, "%s", v); 2895 *res = 0; 2896 break; 2897 } 2898 *res %= operand2; 2899 break; 2900 case '<': 2901 *res = *res < operand2; 2902 break; 2903 case '>': 2904 *res = *res > operand2; 2905 break; 2906 case 'l': 2907 *res = *res <= operand2; 2908 break; 2909 case 'g': 2910 *res = *res >= operand2; 2911 break; 2912 case '=': 2913 *res = *res == operand2; 2914 break; 2915 case '!': 2916 *res = *res != operand2; 2917 break; 2918 case '&': 2919 *res = *res && operand2; 2920 break; 2921 case ':': 2922 *res = *res || operand2; 2923 break; 2924 case 'i': 2925 if (operand2 < *res) 2926 *res = operand2; 2927 break; 2928 case 'a': 2929 if (operand2 > *res) 2930 *res = operand2; 2931 break; 2932 default: 2933 abort(); 2934 } 2935 } 2936 return 1; 2937 } 2938 2939 /* --- register management ------------------------------------------------ */ 2940 2941 void 2942 roff_setreg(struct roff *r, const char *name, int val, char sign) 2943 { 2944 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2945 } 2946 2947 static void 2948 roff_setregn(struct roff *r, const char *name, size_t len, 2949 int val, char sign, int step) 2950 { 2951 struct roffreg *reg; 2952 2953 /* Search for an existing register with the same name. */ 2954 reg = r->regtab; 2955 2956 while (reg != NULL && (reg->key.sz != len || 2957 strncmp(reg->key.p, name, len) != 0)) 2958 reg = reg->next; 2959 2960 if (NULL == reg) { 2961 /* Create a new register. */ 2962 reg = mandoc_malloc(sizeof(struct roffreg)); 2963 reg->key.p = mandoc_strndup(name, len); 2964 reg->key.sz = len; 2965 reg->val = 0; 2966 reg->step = 0; 2967 reg->next = r->regtab; 2968 r->regtab = reg; 2969 } 2970 2971 if ('+' == sign) 2972 reg->val += val; 2973 else if ('-' == sign) 2974 reg->val -= val; 2975 else 2976 reg->val = val; 2977 if (step != INT_MIN) 2978 reg->step = step; 2979 } 2980 2981 /* 2982 * Handle some predefined read-only number registers. 2983 * For now, return -1 if the requested register is not predefined; 2984 * in case a predefined read-only register having the value -1 2985 * were to turn up, another special value would have to be chosen. 2986 */ 2987 static int 2988 roff_getregro(const struct roff *r, const char *name) 2989 { 2990 2991 switch (*name) { 2992 case '$': /* Number of arguments of the last macro evaluated. */ 2993 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 2994 case 'A': /* ASCII approximation mode is always off. */ 2995 return 0; 2996 case 'g': /* Groff compatibility mode is always on. */ 2997 return 1; 2998 case 'H': /* Fixed horizontal resolution. */ 2999 return 24; 3000 case 'j': /* Always adjust left margin only. */ 3001 return 0; 3002 case 'T': /* Some output device is always defined. */ 3003 return 1; 3004 case 'V': /* Fixed vertical resolution. */ 3005 return 40; 3006 default: 3007 return -1; 3008 } 3009 } 3010 3011 int 3012 roff_getreg(struct roff *r, const char *name) 3013 { 3014 return roff_getregn(r, name, strlen(name), '\0'); 3015 } 3016 3017 static int 3018 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 3019 { 3020 struct roffreg *reg; 3021 int val; 3022 3023 if ('.' == name[0] && 2 == len) { 3024 val = roff_getregro(r, name + 1); 3025 if (-1 != val) 3026 return val; 3027 } 3028 3029 for (reg = r->regtab; reg; reg = reg->next) { 3030 if (len == reg->key.sz && 3031 0 == strncmp(name, reg->key.p, len)) { 3032 switch (sign) { 3033 case '+': 3034 reg->val += reg->step; 3035 break; 3036 case '-': 3037 reg->val -= reg->step; 3038 break; 3039 default: 3040 break; 3041 } 3042 return reg->val; 3043 } 3044 } 3045 3046 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3047 return 0; 3048 } 3049 3050 static int 3051 roff_hasregn(const struct roff *r, const char *name, size_t len) 3052 { 3053 struct roffreg *reg; 3054 int val; 3055 3056 if ('.' == name[0] && 2 == len) { 3057 val = roff_getregro(r, name + 1); 3058 if (-1 != val) 3059 return 1; 3060 } 3061 3062 for (reg = r->regtab; reg; reg = reg->next) 3063 if (len == reg->key.sz && 3064 0 == strncmp(name, reg->key.p, len)) 3065 return 1; 3066 3067 return 0; 3068 } 3069 3070 static void 3071 roff_freereg(struct roffreg *reg) 3072 { 3073 struct roffreg *old_reg; 3074 3075 while (NULL != reg) { 3076 free(reg->key.p); 3077 old_reg = reg; 3078 reg = reg->next; 3079 free(old_reg); 3080 } 3081 } 3082 3083 static int 3084 roff_nr(ROFF_ARGS) 3085 { 3086 char *key, *val, *step; 3087 size_t keysz; 3088 int iv, is, len; 3089 char sign; 3090 3091 key = val = buf->buf + pos; 3092 if (*key == '\0') 3093 return ROFF_IGN; 3094 3095 keysz = roff_getname(r, &val, ln, pos); 3096 if (key[keysz] == '\\' || key[keysz] == '\t') 3097 return ROFF_IGN; 3098 3099 sign = *val; 3100 if (sign == '+' || sign == '-') 3101 val++; 3102 3103 len = 0; 3104 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3105 return ROFF_IGN; 3106 3107 step = val + len; 3108 while (isspace((unsigned char)*step)) 3109 step++; 3110 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3111 is = INT_MIN; 3112 3113 roff_setregn(r, key, keysz, iv, sign, is); 3114 return ROFF_IGN; 3115 } 3116 3117 static int 3118 roff_rr(ROFF_ARGS) 3119 { 3120 struct roffreg *reg, **prev; 3121 char *name, *cp; 3122 size_t namesz; 3123 3124 name = cp = buf->buf + pos; 3125 if (*name == '\0') 3126 return ROFF_IGN; 3127 namesz = roff_getname(r, &cp, ln, pos); 3128 name[namesz] = '\0'; 3129 3130 prev = &r->regtab; 3131 while (1) { 3132 reg = *prev; 3133 if (reg == NULL || !strcmp(name, reg->key.p)) 3134 break; 3135 prev = ®->next; 3136 } 3137 if (reg != NULL) { 3138 *prev = reg->next; 3139 free(reg->key.p); 3140 free(reg); 3141 } 3142 return ROFF_IGN; 3143 } 3144 3145 /* --- handler functions for roff requests -------------------------------- */ 3146 3147 static int 3148 roff_rm(ROFF_ARGS) 3149 { 3150 const char *name; 3151 char *cp; 3152 size_t namesz; 3153 3154 cp = buf->buf + pos; 3155 while (*cp != '\0') { 3156 name = cp; 3157 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3158 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3159 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3160 if (name[namesz] == '\\' || name[namesz] == '\t') 3161 break; 3162 } 3163 return ROFF_IGN; 3164 } 3165 3166 static int 3167 roff_it(ROFF_ARGS) 3168 { 3169 int iv; 3170 3171 /* Parse the number of lines. */ 3172 3173 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3174 mandoc_msg(MANDOCERR_IT_NONUM, 3175 ln, ppos, "%s", buf->buf + 1); 3176 return ROFF_IGN; 3177 } 3178 3179 while (isspace((unsigned char)buf->buf[pos])) 3180 pos++; 3181 3182 /* 3183 * Arm the input line trap. 3184 * Special-casing "an-trap" is an ugly workaround to cope 3185 * with DocBook stupidly fiddling with man(7) internals. 3186 */ 3187 3188 roffit_lines = iv; 3189 roffit_macro = mandoc_strdup(iv != 1 || 3190 strcmp(buf->buf + pos, "an-trap") ? 3191 buf->buf + pos : "br"); 3192 return ROFF_IGN; 3193 } 3194 3195 static int 3196 roff_Dd(ROFF_ARGS) 3197 { 3198 int mask; 3199 enum roff_tok t, te; 3200 3201 switch (tok) { 3202 case ROFF_Dd: 3203 tok = MDOC_Dd; 3204 te = MDOC_MAX; 3205 if (r->format == 0) 3206 r->format = MPARSE_MDOC; 3207 mask = MPARSE_MDOC | MPARSE_QUICK; 3208 break; 3209 case ROFF_TH: 3210 tok = MAN_TH; 3211 te = MAN_MAX; 3212 if (r->format == 0) 3213 r->format = MPARSE_MAN; 3214 mask = MPARSE_QUICK; 3215 break; 3216 default: 3217 abort(); 3218 } 3219 if ((r->options & mask) == 0) 3220 for (t = tok; t < te; t++) 3221 roff_setstr(r, roff_name[t], NULL, 0); 3222 return ROFF_CONT; 3223 } 3224 3225 static int 3226 roff_TE(ROFF_ARGS) 3227 { 3228 r->man->flags &= ~ROFF_NONOFILL; 3229 if (r->tbl == NULL) { 3230 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3231 return ROFF_IGN; 3232 } 3233 if (tbl_end(r->tbl, 0) == 0) { 3234 r->tbl = NULL; 3235 free(buf->buf); 3236 buf->buf = mandoc_strdup(".sp"); 3237 buf->sz = 4; 3238 *offs = 0; 3239 return ROFF_REPARSE; 3240 } 3241 r->tbl = NULL; 3242 return ROFF_IGN; 3243 } 3244 3245 static int 3246 roff_T_(ROFF_ARGS) 3247 { 3248 3249 if (NULL == r->tbl) 3250 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3251 else 3252 tbl_restart(ln, ppos, r->tbl); 3253 3254 return ROFF_IGN; 3255 } 3256 3257 /* 3258 * Handle in-line equation delimiters. 3259 */ 3260 static int 3261 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3262 { 3263 char *cp1, *cp2; 3264 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3265 3266 /* 3267 * Outside equations, look for an opening delimiter. 3268 * If we are inside an equation, we already know it is 3269 * in-line, or this function wouldn't have been called; 3270 * so look for a closing delimiter. 3271 */ 3272 3273 cp1 = buf->buf + pos; 3274 cp2 = strchr(cp1, r->eqn == NULL ? 3275 r->last_eqn->odelim : r->last_eqn->cdelim); 3276 if (cp2 == NULL) 3277 return ROFF_CONT; 3278 3279 *cp2++ = '\0'; 3280 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3281 3282 /* Handle preceding text, protecting whitespace. */ 3283 3284 if (*buf->buf != '\0') { 3285 if (r->eqn == NULL) 3286 bef_pr = "\\&"; 3287 bef_nl = "\n"; 3288 } 3289 3290 /* 3291 * Prepare replacing the delimiter with an equation macro 3292 * and drop leading white space from the equation. 3293 */ 3294 3295 if (r->eqn == NULL) { 3296 while (*cp2 == ' ') 3297 cp2++; 3298 mac = ".EQ"; 3299 } else 3300 mac = ".EN"; 3301 3302 /* Handle following text, protecting whitespace. */ 3303 3304 if (*cp2 != '\0') { 3305 aft_nl = "\n"; 3306 if (r->eqn != NULL) 3307 aft_pr = "\\&"; 3308 } 3309 3310 /* Do the actual replacement. */ 3311 3312 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3313 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3314 free(buf->buf); 3315 buf->buf = cp1; 3316 3317 /* Toggle the in-line state of the eqn subsystem. */ 3318 3319 r->eqn_inline = r->eqn == NULL; 3320 return ROFF_REPARSE; 3321 } 3322 3323 static int 3324 roff_EQ(ROFF_ARGS) 3325 { 3326 struct roff_node *n; 3327 3328 if (r->man->meta.macroset == MACROSET_MAN) 3329 man_breakscope(r->man, ROFF_EQ); 3330 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3331 if (ln > r->man->last->line) 3332 n->flags |= NODE_LINE; 3333 n->eqn = eqn_box_new(); 3334 roff_node_append(r->man, n); 3335 r->man->next = ROFF_NEXT_SIBLING; 3336 3337 assert(r->eqn == NULL); 3338 if (r->last_eqn == NULL) 3339 r->last_eqn = eqn_alloc(); 3340 else 3341 eqn_reset(r->last_eqn); 3342 r->eqn = r->last_eqn; 3343 r->eqn->node = n; 3344 3345 if (buf->buf[pos] != '\0') 3346 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3347 ".EQ %s", buf->buf + pos); 3348 3349 return ROFF_IGN; 3350 } 3351 3352 static int 3353 roff_EN(ROFF_ARGS) 3354 { 3355 if (r->eqn != NULL) { 3356 eqn_parse(r->eqn); 3357 r->eqn = NULL; 3358 } else 3359 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3360 if (buf->buf[pos] != '\0') 3361 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3362 "EN %s", buf->buf + pos); 3363 return ROFF_IGN; 3364 } 3365 3366 static int 3367 roff_TS(ROFF_ARGS) 3368 { 3369 if (r->tbl != NULL) { 3370 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3371 tbl_end(r->tbl, 0); 3372 } 3373 r->man->flags |= ROFF_NONOFILL; 3374 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3375 if (r->last_tbl == NULL) 3376 r->first_tbl = r->tbl; 3377 r->last_tbl = r->tbl; 3378 return ROFF_IGN; 3379 } 3380 3381 static int 3382 roff_noarg(ROFF_ARGS) 3383 { 3384 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3385 man_breakscope(r->man, tok); 3386 if (tok == ROFF_brp) 3387 tok = ROFF_br; 3388 roff_elem_alloc(r->man, ln, ppos, tok); 3389 if (buf->buf[pos] != '\0') 3390 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3391 "%s %s", roff_name[tok], buf->buf + pos); 3392 if (tok == ROFF_nf) 3393 r->man->flags |= ROFF_NOFILL; 3394 else if (tok == ROFF_fi) 3395 r->man->flags &= ~ROFF_NOFILL; 3396 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3397 r->man->next = ROFF_NEXT_SIBLING; 3398 return ROFF_IGN; 3399 } 3400 3401 static int 3402 roff_onearg(ROFF_ARGS) 3403 { 3404 struct roff_node *n; 3405 char *cp; 3406 int npos; 3407 3408 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3409 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3410 tok == ROFF_ti)) 3411 man_breakscope(r->man, tok); 3412 3413 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3414 r->man->last = roffce_node; 3415 r->man->next = ROFF_NEXT_SIBLING; 3416 } 3417 3418 roff_elem_alloc(r->man, ln, ppos, tok); 3419 n = r->man->last; 3420 3421 cp = buf->buf + pos; 3422 if (*cp != '\0') { 3423 while (*cp != '\0' && *cp != ' ') 3424 cp++; 3425 while (*cp == ' ') 3426 *cp++ = '\0'; 3427 if (*cp != '\0') 3428 mandoc_msg(MANDOCERR_ARG_EXCESS, 3429 ln, (int)(cp - buf->buf), 3430 "%s ... %s", roff_name[tok], cp); 3431 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3432 } 3433 3434 if (tok == ROFF_ce || tok == ROFF_rj) { 3435 if (r->man->last->type == ROFFT_ELEM) { 3436 roff_word_alloc(r->man, ln, pos, "1"); 3437 r->man->last->flags |= NODE_NOSRC; 3438 } 3439 npos = 0; 3440 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3441 &roffce_lines, 0) == 0) { 3442 mandoc_msg(MANDOCERR_CE_NONUM, 3443 ln, pos, "ce %s", buf->buf + pos); 3444 roffce_lines = 1; 3445 } 3446 if (roffce_lines < 1) { 3447 r->man->last = r->man->last->parent; 3448 roffce_node = NULL; 3449 roffce_lines = 0; 3450 } else 3451 roffce_node = r->man->last->parent; 3452 } else { 3453 n->flags |= NODE_VALID | NODE_ENDED; 3454 r->man->last = n; 3455 } 3456 n->flags |= NODE_LINE; 3457 r->man->next = ROFF_NEXT_SIBLING; 3458 return ROFF_IGN; 3459 } 3460 3461 static int 3462 roff_manyarg(ROFF_ARGS) 3463 { 3464 struct roff_node *n; 3465 char *sp, *ep; 3466 3467 roff_elem_alloc(r->man, ln, ppos, tok); 3468 n = r->man->last; 3469 3470 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3471 while (*ep != '\0' && *ep != ' ') 3472 ep++; 3473 while (*ep == ' ') 3474 *ep++ = '\0'; 3475 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3476 } 3477 3478 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3479 r->man->last = n; 3480 r->man->next = ROFF_NEXT_SIBLING; 3481 return ROFF_IGN; 3482 } 3483 3484 static int 3485 roff_als(ROFF_ARGS) 3486 { 3487 char *oldn, *newn, *end, *value; 3488 size_t oldsz, newsz, valsz; 3489 3490 newn = oldn = buf->buf + pos; 3491 if (*newn == '\0') 3492 return ROFF_IGN; 3493 3494 newsz = roff_getname(r, &oldn, ln, pos); 3495 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3496 return ROFF_IGN; 3497 3498 end = oldn; 3499 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3500 if (oldsz == 0) 3501 return ROFF_IGN; 3502 3503 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3504 (int)oldsz, oldn); 3505 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3506 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3507 free(value); 3508 return ROFF_IGN; 3509 } 3510 3511 /* 3512 * The .break request only makes sense inside conditionals, 3513 * and that case is already handled in roff_cond_sub(). 3514 */ 3515 static int 3516 roff_break(ROFF_ARGS) 3517 { 3518 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break"); 3519 return ROFF_IGN; 3520 } 3521 3522 static int 3523 roff_cc(ROFF_ARGS) 3524 { 3525 const char *p; 3526 3527 p = buf->buf + pos; 3528 3529 if (*p == '\0' || (r->control = *p++) == '.') 3530 r->control = '\0'; 3531 3532 if (*p != '\0') 3533 mandoc_msg(MANDOCERR_ARG_EXCESS, 3534 ln, p - buf->buf, "cc ... %s", p); 3535 3536 return ROFF_IGN; 3537 } 3538 3539 static int 3540 roff_char(ROFF_ARGS) 3541 { 3542 const char *p, *kp, *vp; 3543 size_t ksz, vsz; 3544 int font; 3545 3546 /* Parse the character to be replaced. */ 3547 3548 kp = buf->buf + pos; 3549 p = kp + 1; 3550 if (*kp == '\0' || (*kp == '\\' && 3551 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3552 (*p != ' ' && *p != '\0')) { 3553 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3554 return ROFF_IGN; 3555 } 3556 ksz = p - kp; 3557 while (*p == ' ') 3558 p++; 3559 3560 /* 3561 * If the replacement string contains a font escape sequence, 3562 * we have to restore the font at the end. 3563 */ 3564 3565 vp = p; 3566 vsz = strlen(p); 3567 font = 0; 3568 while (*p != '\0') { 3569 if (*p++ != '\\') 3570 continue; 3571 switch (mandoc_escape(&p, NULL, NULL)) { 3572 case ESCAPE_FONT: 3573 case ESCAPE_FONTROMAN: 3574 case ESCAPE_FONTITALIC: 3575 case ESCAPE_FONTBOLD: 3576 case ESCAPE_FONTBI: 3577 case ESCAPE_FONTCW: 3578 case ESCAPE_FONTPREV: 3579 font++; 3580 break; 3581 default: 3582 break; 3583 } 3584 } 3585 if (font > 1) 3586 mandoc_msg(MANDOCERR_CHAR_FONT, 3587 ln, (int)(vp - buf->buf), "%s", vp); 3588 3589 /* 3590 * Approximate the effect of .char using the .tr tables. 3591 * XXX In groff, .char and .tr interact differently. 3592 */ 3593 3594 if (ksz == 1) { 3595 if (r->xtab == NULL) 3596 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3597 assert((unsigned int)*kp < 128); 3598 free(r->xtab[(int)*kp].p); 3599 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3600 "%s%s", vp, font ? "\fP" : ""); 3601 } else { 3602 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3603 if (font) 3604 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3605 } 3606 return ROFF_IGN; 3607 } 3608 3609 static int 3610 roff_ec(ROFF_ARGS) 3611 { 3612 const char *p; 3613 3614 p = buf->buf + pos; 3615 if (*p == '\0') 3616 r->escape = '\\'; 3617 else { 3618 r->escape = *p; 3619 if (*++p != '\0') 3620 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3621 (int)(p - buf->buf), "ec ... %s", p); 3622 } 3623 return ROFF_IGN; 3624 } 3625 3626 static int 3627 roff_eo(ROFF_ARGS) 3628 { 3629 r->escape = '\0'; 3630 if (buf->buf[pos] != '\0') 3631 mandoc_msg(MANDOCERR_ARG_SKIP, 3632 ln, pos, "eo %s", buf->buf + pos); 3633 return ROFF_IGN; 3634 } 3635 3636 static int 3637 roff_nop(ROFF_ARGS) 3638 { 3639 while (buf->buf[pos] == ' ') 3640 pos++; 3641 *offs = pos; 3642 return ROFF_RERUN; 3643 } 3644 3645 static int 3646 roff_tr(ROFF_ARGS) 3647 { 3648 const char *p, *first, *second; 3649 size_t fsz, ssz; 3650 enum mandoc_esc esc; 3651 3652 p = buf->buf + pos; 3653 3654 if (*p == '\0') { 3655 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3656 return ROFF_IGN; 3657 } 3658 3659 while (*p != '\0') { 3660 fsz = ssz = 1; 3661 3662 first = p++; 3663 if (*first == '\\') { 3664 esc = mandoc_escape(&p, NULL, NULL); 3665 if (esc == ESCAPE_ERROR) { 3666 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3667 (int)(p - buf->buf), "%s", first); 3668 return ROFF_IGN; 3669 } 3670 fsz = (size_t)(p - first); 3671 } 3672 3673 second = p++; 3674 if (*second == '\\') { 3675 esc = mandoc_escape(&p, NULL, NULL); 3676 if (esc == ESCAPE_ERROR) { 3677 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3678 (int)(p - buf->buf), "%s", second); 3679 return ROFF_IGN; 3680 } 3681 ssz = (size_t)(p - second); 3682 } else if (*second == '\0') { 3683 mandoc_msg(MANDOCERR_TR_ODD, ln, 3684 (int)(first - buf->buf), "tr %s", first); 3685 second = " "; 3686 p--; 3687 } 3688 3689 if (fsz > 1) { 3690 roff_setstrn(&r->xmbtab, first, fsz, 3691 second, ssz, 0); 3692 continue; 3693 } 3694 3695 if (r->xtab == NULL) 3696 r->xtab = mandoc_calloc(128, 3697 sizeof(struct roffstr)); 3698 3699 free(r->xtab[(int)*first].p); 3700 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3701 r->xtab[(int)*first].sz = ssz; 3702 } 3703 3704 return ROFF_IGN; 3705 } 3706 3707 /* 3708 * Implementation of the .return request. 3709 * There is no need to call roff_userret() from here. 3710 * The read module will call that after rewinding the reader stack 3711 * to the place from where the current macro was called. 3712 */ 3713 static int 3714 roff_return(ROFF_ARGS) 3715 { 3716 if (r->mstackpos >= 0) 3717 return ROFF_IGN | ROFF_USERRET; 3718 3719 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3720 return ROFF_IGN; 3721 } 3722 3723 static int 3724 roff_rn(ROFF_ARGS) 3725 { 3726 const char *value; 3727 char *oldn, *newn, *end; 3728 size_t oldsz, newsz; 3729 int deftype; 3730 3731 oldn = newn = buf->buf + pos; 3732 if (*oldn == '\0') 3733 return ROFF_IGN; 3734 3735 oldsz = roff_getname(r, &newn, ln, pos); 3736 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3737 return ROFF_IGN; 3738 3739 end = newn; 3740 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3741 if (newsz == 0) 3742 return ROFF_IGN; 3743 3744 deftype = ROFFDEF_ANY; 3745 value = roff_getstrn(r, oldn, oldsz, &deftype); 3746 switch (deftype) { 3747 case ROFFDEF_USER: 3748 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3749 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3750 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3751 break; 3752 case ROFFDEF_PRE: 3753 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3754 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3755 break; 3756 case ROFFDEF_REN: 3757 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3758 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3759 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3760 break; 3761 case ROFFDEF_STD: 3762 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3763 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3764 break; 3765 default: 3766 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3767 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3768 break; 3769 } 3770 return ROFF_IGN; 3771 } 3772 3773 static int 3774 roff_shift(ROFF_ARGS) 3775 { 3776 struct mctx *ctx; 3777 int levels, i; 3778 3779 levels = 1; 3780 if (buf->buf[pos] != '\0' && 3781 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3782 mandoc_msg(MANDOCERR_CE_NONUM, 3783 ln, pos, "shift %s", buf->buf + pos); 3784 levels = 1; 3785 } 3786 if (r->mstackpos < 0) { 3787 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3788 return ROFF_IGN; 3789 } 3790 ctx = r->mstack + r->mstackpos; 3791 if (levels > ctx->argc) { 3792 mandoc_msg(MANDOCERR_SHIFT, 3793 ln, pos, "%d, but max is %d", levels, ctx->argc); 3794 levels = ctx->argc; 3795 } 3796 if (levels == 0) 3797 return ROFF_IGN; 3798 for (i = 0; i < levels; i++) 3799 free(ctx->argv[i]); 3800 ctx->argc -= levels; 3801 for (i = 0; i < ctx->argc; i++) 3802 ctx->argv[i] = ctx->argv[i + levels]; 3803 return ROFF_IGN; 3804 } 3805 3806 static int 3807 roff_so(ROFF_ARGS) 3808 { 3809 char *name, *cp; 3810 3811 name = buf->buf + pos; 3812 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3813 3814 /* 3815 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3816 * opening anything that's not in our cwd or anything beneath 3817 * it. Thus, explicitly disallow traversing up the file-system 3818 * or using absolute paths. 3819 */ 3820 3821 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3822 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3823 buf->sz = mandoc_asprintf(&cp, 3824 ".sp\nSee the file %s.\n.sp", name) + 1; 3825 free(buf->buf); 3826 buf->buf = cp; 3827 *offs = 0; 3828 return ROFF_REPARSE; 3829 } 3830 3831 *offs = pos; 3832 return ROFF_SO; 3833 } 3834 3835 /* --- user defined strings and macros ------------------------------------ */ 3836 3837 static int 3838 roff_userdef(ROFF_ARGS) 3839 { 3840 struct mctx *ctx; 3841 char *arg, *ap, *dst, *src; 3842 size_t sz; 3843 3844 /* If the macro is empty, ignore it altogether. */ 3845 3846 if (*r->current_string == '\0') 3847 return ROFF_IGN; 3848 3849 /* Initialize a new macro stack context. */ 3850 3851 if (++r->mstackpos == r->mstacksz) { 3852 r->mstack = mandoc_recallocarray(r->mstack, 3853 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3854 r->mstacksz += 8; 3855 } 3856 ctx = r->mstack + r->mstackpos; 3857 ctx->argsz = 0; 3858 ctx->argc = 0; 3859 ctx->argv = NULL; 3860 3861 /* 3862 * Collect pointers to macro argument strings, 3863 * NUL-terminating them and escaping quotes. 3864 */ 3865 3866 src = buf->buf + pos; 3867 while (*src != '\0') { 3868 if (ctx->argc == ctx->argsz) { 3869 ctx->argsz += 8; 3870 ctx->argv = mandoc_reallocarray(ctx->argv, 3871 ctx->argsz, sizeof(*ctx->argv)); 3872 } 3873 arg = roff_getarg(r, &src, ln, &pos); 3874 sz = 1; /* For the terminating NUL. */ 3875 for (ap = arg; *ap != '\0'; ap++) 3876 sz += *ap == '"' ? 4 : 1; 3877 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3878 for (ap = arg; *ap != '\0'; ap++) { 3879 if (*ap == '"') { 3880 memcpy(dst, "\\(dq", 4); 3881 dst += 4; 3882 } else 3883 *dst++ = *ap; 3884 } 3885 *dst = '\0'; 3886 free(arg); 3887 } 3888 3889 /* Replace the macro invocation by the macro definition. */ 3890 3891 free(buf->buf); 3892 buf->buf = mandoc_strdup(r->current_string); 3893 buf->sz = strlen(buf->buf) + 1; 3894 *offs = 0; 3895 3896 return buf->buf[buf->sz - 2] == '\n' ? 3897 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3898 } 3899 3900 /* 3901 * Calling a high-level macro that was renamed with .rn. 3902 * r->current_string has already been set up by roff_parse(). 3903 */ 3904 static int 3905 roff_renamed(ROFF_ARGS) 3906 { 3907 char *nbuf; 3908 3909 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3910 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3911 free(buf->buf); 3912 buf->buf = nbuf; 3913 *offs = 0; 3914 return ROFF_CONT; 3915 } 3916 3917 /* 3918 * Measure the length in bytes of the roff identifier at *cpp 3919 * and advance the pointer to the next word. 3920 */ 3921 static size_t 3922 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3923 { 3924 char *name, *cp; 3925 size_t namesz; 3926 3927 name = *cpp; 3928 if (*name == '\0') 3929 return 0; 3930 3931 /* Advance cp to the byte after the end of the name. */ 3932 3933 for (cp = name; 1; cp++) { 3934 namesz = cp - name; 3935 if (*cp == '\0') 3936 break; 3937 if (*cp == ' ' || *cp == '\t') { 3938 cp++; 3939 break; 3940 } 3941 if (*cp != '\\') 3942 continue; 3943 if (cp[1] == '{' || cp[1] == '}') 3944 break; 3945 if (*++cp == '\\') 3946 continue; 3947 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 3948 "%.*s", (int)(cp - name + 1), name); 3949 mandoc_escape((const char **)&cp, NULL, NULL); 3950 break; 3951 } 3952 3953 /* Read past spaces. */ 3954 3955 while (*cp == ' ') 3956 cp++; 3957 3958 *cpp = cp; 3959 return namesz; 3960 } 3961 3962 /* 3963 * Store *string into the user-defined string called *name. 3964 * To clear an existing entry, call with (*r, *name, NULL, 0). 3965 * append == 0: replace mode 3966 * append == 1: single-line append mode 3967 * append == 2: multiline append mode, append '\n' after each call 3968 */ 3969 static void 3970 roff_setstr(struct roff *r, const char *name, const char *string, 3971 int append) 3972 { 3973 size_t namesz; 3974 3975 namesz = strlen(name); 3976 roff_setstrn(&r->strtab, name, namesz, string, 3977 string ? strlen(string) : 0, append); 3978 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3979 } 3980 3981 static void 3982 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3983 const char *string, size_t stringsz, int append) 3984 { 3985 struct roffkv *n; 3986 char *c; 3987 int i; 3988 size_t oldch, newch; 3989 3990 /* Search for an existing string with the same name. */ 3991 n = *r; 3992 3993 while (n && (namesz != n->key.sz || 3994 strncmp(n->key.p, name, namesz))) 3995 n = n->next; 3996 3997 if (NULL == n) { 3998 /* Create a new string table entry. */ 3999 n = mandoc_malloc(sizeof(struct roffkv)); 4000 n->key.p = mandoc_strndup(name, namesz); 4001 n->key.sz = namesz; 4002 n->val.p = NULL; 4003 n->val.sz = 0; 4004 n->next = *r; 4005 *r = n; 4006 } else if (0 == append) { 4007 free(n->val.p); 4008 n->val.p = NULL; 4009 n->val.sz = 0; 4010 } 4011 4012 if (NULL == string) 4013 return; 4014 4015 /* 4016 * One additional byte for the '\n' in multiline mode, 4017 * and one for the terminating '\0'. 4018 */ 4019 newch = stringsz + (1 < append ? 2u : 1u); 4020 4021 if (NULL == n->val.p) { 4022 n->val.p = mandoc_malloc(newch); 4023 *n->val.p = '\0'; 4024 oldch = 0; 4025 } else { 4026 oldch = n->val.sz; 4027 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 4028 } 4029 4030 /* Skip existing content in the destination buffer. */ 4031 c = n->val.p + (int)oldch; 4032 4033 /* Append new content to the destination buffer. */ 4034 i = 0; 4035 while (i < (int)stringsz) { 4036 /* 4037 * Rudimentary roff copy mode: 4038 * Handle escaped backslashes. 4039 */ 4040 if ('\\' == string[i] && '\\' == string[i + 1]) 4041 i++; 4042 *c++ = string[i++]; 4043 } 4044 4045 /* Append terminating bytes. */ 4046 if (1 < append) 4047 *c++ = '\n'; 4048 4049 *c = '\0'; 4050 n->val.sz = (int)(c - n->val.p); 4051 } 4052 4053 static const char * 4054 roff_getstrn(struct roff *r, const char *name, size_t len, 4055 int *deftype) 4056 { 4057 const struct roffkv *n; 4058 int found, i; 4059 enum roff_tok tok; 4060 4061 found = 0; 4062 for (n = r->strtab; n != NULL; n = n->next) { 4063 if (strncmp(name, n->key.p, len) != 0 || 4064 n->key.p[len] != '\0' || n->val.p == NULL) 4065 continue; 4066 if (*deftype & ROFFDEF_USER) { 4067 *deftype = ROFFDEF_USER; 4068 return n->val.p; 4069 } else { 4070 found = 1; 4071 break; 4072 } 4073 } 4074 for (n = r->rentab; n != NULL; n = n->next) { 4075 if (strncmp(name, n->key.p, len) != 0 || 4076 n->key.p[len] != '\0' || n->val.p == NULL) 4077 continue; 4078 if (*deftype & ROFFDEF_REN) { 4079 *deftype = ROFFDEF_REN; 4080 return n->val.p; 4081 } else { 4082 found = 1; 4083 break; 4084 } 4085 } 4086 for (i = 0; i < PREDEFS_MAX; i++) { 4087 if (strncmp(name, predefs[i].name, len) != 0 || 4088 predefs[i].name[len] != '\0') 4089 continue; 4090 if (*deftype & ROFFDEF_PRE) { 4091 *deftype = ROFFDEF_PRE; 4092 return predefs[i].str; 4093 } else { 4094 found = 1; 4095 break; 4096 } 4097 } 4098 if (r->man->meta.macroset != MACROSET_MAN) { 4099 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4100 if (strncmp(name, roff_name[tok], len) != 0 || 4101 roff_name[tok][len] != '\0') 4102 continue; 4103 if (*deftype & ROFFDEF_STD) { 4104 *deftype = ROFFDEF_STD; 4105 return NULL; 4106 } else { 4107 found = 1; 4108 break; 4109 } 4110 } 4111 } 4112 if (r->man->meta.macroset != MACROSET_MDOC) { 4113 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4114 if (strncmp(name, roff_name[tok], len) != 0 || 4115 roff_name[tok][len] != '\0') 4116 continue; 4117 if (*deftype & ROFFDEF_STD) { 4118 *deftype = ROFFDEF_STD; 4119 return NULL; 4120 } else { 4121 found = 1; 4122 break; 4123 } 4124 } 4125 } 4126 4127 if (found == 0 && *deftype != ROFFDEF_ANY) { 4128 if (*deftype & ROFFDEF_REN) { 4129 /* 4130 * This might still be a request, 4131 * so do not treat it as undefined yet. 4132 */ 4133 *deftype = ROFFDEF_UNDEF; 4134 return NULL; 4135 } 4136 4137 /* Using an undefined string defines it to be empty. */ 4138 4139 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4140 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4141 } 4142 4143 *deftype = 0; 4144 return NULL; 4145 } 4146 4147 static void 4148 roff_freestr(struct roffkv *r) 4149 { 4150 struct roffkv *n, *nn; 4151 4152 for (n = r; n; n = nn) { 4153 free(n->key.p); 4154 free(n->val.p); 4155 nn = n->next; 4156 free(n); 4157 } 4158 } 4159 4160 /* --- accessors and utility functions ------------------------------------ */ 4161 4162 /* 4163 * Duplicate an input string, making the appropriate character 4164 * conversations (as stipulated by `tr') along the way. 4165 * Returns a heap-allocated string with all the replacements made. 4166 */ 4167 char * 4168 roff_strdup(const struct roff *r, const char *p) 4169 { 4170 const struct roffkv *cp; 4171 char *res; 4172 const char *pp; 4173 size_t ssz, sz; 4174 enum mandoc_esc esc; 4175 4176 if (NULL == r->xmbtab && NULL == r->xtab) 4177 return mandoc_strdup(p); 4178 else if ('\0' == *p) 4179 return mandoc_strdup(""); 4180 4181 /* 4182 * Step through each character looking for term matches 4183 * (remember that a `tr' can be invoked with an escape, which is 4184 * a glyph but the escape is multi-character). 4185 * We only do this if the character hash has been initialised 4186 * and the string is >0 length. 4187 */ 4188 4189 res = NULL; 4190 ssz = 0; 4191 4192 while ('\0' != *p) { 4193 assert((unsigned int)*p < 128); 4194 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4195 sz = r->xtab[(int)*p].sz; 4196 res = mandoc_realloc(res, ssz + sz + 1); 4197 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4198 ssz += sz; 4199 p++; 4200 continue; 4201 } else if ('\\' != *p) { 4202 res = mandoc_realloc(res, ssz + 2); 4203 res[ssz++] = *p++; 4204 continue; 4205 } 4206 4207 /* Search for term matches. */ 4208 for (cp = r->xmbtab; cp; cp = cp->next) 4209 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4210 break; 4211 4212 if (NULL != cp) { 4213 /* 4214 * A match has been found. 4215 * Append the match to the array and move 4216 * forward by its keysize. 4217 */ 4218 res = mandoc_realloc(res, 4219 ssz + cp->val.sz + 1); 4220 memcpy(res + ssz, cp->val.p, cp->val.sz); 4221 ssz += cp->val.sz; 4222 p += (int)cp->key.sz; 4223 continue; 4224 } 4225 4226 /* 4227 * Handle escapes carefully: we need to copy 4228 * over just the escape itself, or else we might 4229 * do replacements within the escape itself. 4230 * Make sure to pass along the bogus string. 4231 */ 4232 pp = p++; 4233 esc = mandoc_escape(&p, NULL, NULL); 4234 if (ESCAPE_ERROR == esc) { 4235 sz = strlen(pp); 4236 res = mandoc_realloc(res, ssz + sz + 1); 4237 memcpy(res + ssz, pp, sz); 4238 break; 4239 } 4240 /* 4241 * We bail out on bad escapes. 4242 * No need to warn: we already did so when 4243 * roff_expand() was called. 4244 */ 4245 sz = (int)(p - pp); 4246 res = mandoc_realloc(res, ssz + sz + 1); 4247 memcpy(res + ssz, pp, sz); 4248 ssz += sz; 4249 } 4250 4251 res[(int)ssz] = '\0'; 4252 return res; 4253 } 4254 4255 int 4256 roff_getformat(const struct roff *r) 4257 { 4258 4259 return r->format; 4260 } 4261 4262 /* 4263 * Find out whether a line is a macro line or not. 4264 * If it is, adjust the current position and return one; if it isn't, 4265 * return zero and don't change the current position. 4266 * If the control character has been set with `.cc', then let that grain 4267 * precedence. 4268 * This is slighly contrary to groff, where using the non-breaking 4269 * control character when `cc' has been invoked will cause the 4270 * non-breaking macro contents to be printed verbatim. 4271 */ 4272 int 4273 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4274 { 4275 int pos; 4276 4277 pos = *ppos; 4278 4279 if (r->control != '\0' && cp[pos] == r->control) 4280 pos++; 4281 else if (r->control != '\0') 4282 return 0; 4283 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4284 pos += 2; 4285 else if ('.' == cp[pos] || '\'' == cp[pos]) 4286 pos++; 4287 else 4288 return 0; 4289 4290 while (' ' == cp[pos] || '\t' == cp[pos]) 4291 pos++; 4292 4293 *ppos = pos; 4294 return 1; 4295 } 4296