1 /* $Id: roff.c,v 1.263 2015/02/21 14:46:58 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libmandoc.h" 32 #include "libroff.h" 33 34 /* Maximum number of nested if-else conditionals. */ 35 #define RSTACK_MAX 128 36 37 /* Maximum number of string expansions per line, to break infinite loops. */ 38 #define EXPAND_LIMIT 1000 39 40 enum rofft { 41 ROFF_ab, 42 ROFF_ad, 43 ROFF_af, 44 ROFF_aln, 45 ROFF_als, 46 ROFF_am, 47 ROFF_am1, 48 ROFF_ami, 49 ROFF_ami1, 50 ROFF_as, 51 ROFF_as1, 52 ROFF_asciify, 53 ROFF_backtrace, 54 ROFF_bd, 55 ROFF_bleedat, 56 ROFF_blm, 57 ROFF_box, 58 ROFF_boxa, 59 ROFF_bp, 60 ROFF_BP, 61 /* MAN_br, MDOC_br */ 62 ROFF_break, 63 ROFF_breakchar, 64 ROFF_brnl, 65 ROFF_brp, 66 ROFF_brpnl, 67 ROFF_c2, 68 ROFF_cc, 69 ROFF_ce, 70 ROFF_cf, 71 ROFF_cflags, 72 ROFF_ch, 73 ROFF_char, 74 ROFF_chop, 75 ROFF_class, 76 ROFF_close, 77 ROFF_CL, 78 ROFF_color, 79 ROFF_composite, 80 ROFF_continue, 81 ROFF_cp, 82 ROFF_cropat, 83 ROFF_cs, 84 ROFF_cu, 85 ROFF_da, 86 ROFF_dch, 87 ROFF_Dd, 88 ROFF_de, 89 ROFF_de1, 90 ROFF_defcolor, 91 ROFF_dei, 92 ROFF_dei1, 93 ROFF_device, 94 ROFF_devicem, 95 ROFF_di, 96 ROFF_do, 97 ROFF_ds, 98 ROFF_ds1, 99 ROFF_dwh, 100 ROFF_dt, 101 ROFF_ec, 102 ROFF_ecr, 103 ROFF_ecs, 104 ROFF_el, 105 ROFF_em, 106 ROFF_EN, 107 ROFF_eo, 108 ROFF_EP, 109 ROFF_EQ, 110 ROFF_errprint, 111 ROFF_ev, 112 ROFF_evc, 113 ROFF_ex, 114 ROFF_fallback, 115 ROFF_fam, 116 ROFF_fc, 117 ROFF_fchar, 118 ROFF_fcolor, 119 ROFF_fdeferlig, 120 ROFF_feature, 121 /* MAN_fi; ignored in mdoc(7) */ 122 ROFF_fkern, 123 ROFF_fl, 124 ROFF_flig, 125 ROFF_fp, 126 ROFF_fps, 127 ROFF_fschar, 128 ROFF_fspacewidth, 129 ROFF_fspecial, 130 /* MAN_ft; ignored in mdoc(7) */ 131 ROFF_ftr, 132 ROFF_fzoom, 133 ROFF_gcolor, 134 ROFF_hc, 135 ROFF_hcode, 136 ROFF_hidechar, 137 ROFF_hla, 138 ROFF_hlm, 139 ROFF_hpf, 140 ROFF_hpfa, 141 ROFF_hpfcode, 142 ROFF_hw, 143 ROFF_hy, 144 ROFF_hylang, 145 ROFF_hylen, 146 ROFF_hym, 147 ROFF_hypp, 148 ROFF_hys, 149 ROFF_ie, 150 ROFF_if, 151 ROFF_ig, 152 /* MAN_in; ignored in mdoc(7) */ 153 ROFF_index, 154 ROFF_it, 155 ROFF_itc, 156 ROFF_IX, 157 ROFF_kern, 158 ROFF_kernafter, 159 ROFF_kernbefore, 160 ROFF_kernpair, 161 ROFF_lc, 162 ROFF_lc_ctype, 163 ROFF_lds, 164 ROFF_length, 165 ROFF_letadj, 166 ROFF_lf, 167 ROFF_lg, 168 ROFF_lhang, 169 ROFF_linetabs, 170 /* MAN_ll, MDOC_ll */ 171 ROFF_lnr, 172 ROFF_lnrf, 173 ROFF_lpfx, 174 ROFF_ls, 175 ROFF_lsm, 176 ROFF_lt, 177 ROFF_mc, 178 ROFF_mediasize, 179 ROFF_minss, 180 ROFF_mk, 181 ROFF_mso, 182 ROFF_na, 183 ROFF_ne, 184 /* MAN_nf; ignored in mdoc(7) */ 185 ROFF_nh, 186 ROFF_nhychar, 187 ROFF_nm, 188 ROFF_nn, 189 ROFF_nop, 190 ROFF_nr, 191 ROFF_nrf, 192 ROFF_nroff, 193 ROFF_ns, 194 ROFF_nx, 195 ROFF_open, 196 ROFF_opena, 197 ROFF_os, 198 ROFF_output, 199 ROFF_padj, 200 ROFF_papersize, 201 ROFF_pc, 202 ROFF_pev, 203 ROFF_pi, 204 ROFF_PI, 205 ROFF_pl, 206 ROFF_pm, 207 ROFF_pn, 208 ROFF_pnr, 209 ROFF_po, 210 ROFF_ps, 211 ROFF_psbb, 212 ROFF_pshape, 213 ROFF_pso, 214 ROFF_ptr, 215 ROFF_pvs, 216 ROFF_rchar, 217 ROFF_rd, 218 ROFF_recursionlimit, 219 ROFF_return, 220 ROFF_rfschar, 221 ROFF_rhang, 222 ROFF_rj, 223 ROFF_rm, 224 ROFF_rn, 225 ROFF_rnn, 226 ROFF_rr, 227 ROFF_rs, 228 ROFF_rt, 229 ROFF_schar, 230 ROFF_sentchar, 231 ROFF_shc, 232 ROFF_shift, 233 ROFF_sizes, 234 ROFF_so, 235 /* MAN_sp, MDOC_sp */ 236 ROFF_spacewidth, 237 ROFF_special, 238 ROFF_spreadwarn, 239 ROFF_ss, 240 ROFF_sty, 241 ROFF_substring, 242 ROFF_sv, 243 ROFF_sy, 244 ROFF_T_, 245 ROFF_ta, 246 ROFF_tc, 247 ROFF_TE, 248 ROFF_TH, 249 ROFF_ti, 250 ROFF_tkf, 251 ROFF_tl, 252 ROFF_tm, 253 ROFF_tm1, 254 ROFF_tmc, 255 ROFF_tr, 256 ROFF_track, 257 ROFF_transchar, 258 ROFF_trf, 259 ROFF_trimat, 260 ROFF_trin, 261 ROFF_trnt, 262 ROFF_troff, 263 ROFF_TS, 264 ROFF_uf, 265 ROFF_ul, 266 ROFF_unformat, 267 ROFF_unwatch, 268 ROFF_unwatchn, 269 ROFF_vpt, 270 ROFF_vs, 271 ROFF_warn, 272 ROFF_warnscale, 273 ROFF_watch, 274 ROFF_watchlength, 275 ROFF_watchn, 276 ROFF_wh, 277 ROFF_while, 278 ROFF_write, 279 ROFF_writec, 280 ROFF_writem, 281 ROFF_xflag, 282 ROFF_cblock, 283 ROFF_USERDEF, 284 ROFF_MAX 285 }; 286 287 /* 288 * An incredibly-simple string buffer. 289 */ 290 struct roffstr { 291 char *p; /* nil-terminated buffer */ 292 size_t sz; /* saved strlen(p) */ 293 }; 294 295 /* 296 * A key-value roffstr pair as part of a singly-linked list. 297 */ 298 struct roffkv { 299 struct roffstr key; 300 struct roffstr val; 301 struct roffkv *next; /* next in list */ 302 }; 303 304 /* 305 * A single number register as part of a singly-linked list. 306 */ 307 struct roffreg { 308 struct roffstr key; 309 int val; 310 struct roffreg *next; 311 }; 312 313 struct roff { 314 struct mparse *parse; /* parse point */ 315 const struct mchars *mchars; /* character table */ 316 struct roffnode *last; /* leaf of stack */ 317 int *rstack; /* stack of inverted `ie' values */ 318 struct roffreg *regtab; /* number registers */ 319 struct roffkv *strtab; /* user-defined strings & macros */ 320 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 321 struct roffstr *xtab; /* single-byte trans table (`tr') */ 322 const char *current_string; /* value of last called user macro */ 323 struct tbl_node *first_tbl; /* first table parsed */ 324 struct tbl_node *last_tbl; /* last table parsed */ 325 struct tbl_node *tbl; /* current table being parsed */ 326 struct eqn_node *last_eqn; /* last equation parsed */ 327 struct eqn_node *first_eqn; /* first equation parsed */ 328 struct eqn_node *eqn; /* current equation being parsed */ 329 int eqn_inline; /* current equation is inline */ 330 int options; /* parse options */ 331 int rstacksz; /* current size limit of rstack */ 332 int rstackpos; /* position in rstack */ 333 int format; /* current file in mdoc or man format */ 334 char control; /* control character */ 335 }; 336 337 struct roffnode { 338 enum rofft tok; /* type of node */ 339 struct roffnode *parent; /* up one in stack */ 340 int line; /* parse line */ 341 int col; /* parse col */ 342 char *name; /* node name, e.g. macro name */ 343 char *end; /* end-rules: custom token */ 344 int endspan; /* end-rules: next-line or infty */ 345 int rule; /* current evaluation rule */ 346 }; 347 348 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 349 enum rofft tok, /* tok of macro */ \ 350 struct buf *buf, /* input buffer */ \ 351 int ln, /* parse line */ \ 352 int ppos, /* original pos in buffer */ \ 353 int pos, /* current pos in buffer */ \ 354 int *offs /* reset offset of buffer data */ 355 356 typedef enum rofferr (*roffproc)(ROFF_ARGS); 357 358 struct roffmac { 359 const char *name; /* macro name */ 360 roffproc proc; /* process new macro */ 361 roffproc text; /* process as child text of macro */ 362 roffproc sub; /* process as child of macro */ 363 int flags; 364 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 365 struct roffmac *next; 366 }; 367 368 struct predef { 369 const char *name; /* predefined input name */ 370 const char *str; /* replacement symbol */ 371 }; 372 373 #define PREDEF(__name, __str) \ 374 { (__name), (__str) }, 375 376 static enum rofft roffhash_find(const char *, size_t); 377 static void roffhash_init(void); 378 static void roffnode_cleanscope(struct roff *); 379 static void roffnode_pop(struct roff *); 380 static void roffnode_push(struct roff *, enum rofft, 381 const char *, int, int); 382 static enum rofferr roff_block(ROFF_ARGS); 383 static enum rofferr roff_block_text(ROFF_ARGS); 384 static enum rofferr roff_block_sub(ROFF_ARGS); 385 static enum rofferr roff_brp(ROFF_ARGS); 386 static enum rofferr roff_cblock(ROFF_ARGS); 387 static enum rofferr roff_cc(ROFF_ARGS); 388 static void roff_ccond(struct roff *, int, int); 389 static enum rofferr roff_cond(ROFF_ARGS); 390 static enum rofferr roff_cond_text(ROFF_ARGS); 391 static enum rofferr roff_cond_sub(ROFF_ARGS); 392 static enum rofferr roff_ds(ROFF_ARGS); 393 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); 394 static int roff_evalcond(struct roff *r, int, 395 const char *, int *); 396 static int roff_evalnum(struct roff *, int, 397 const char *, int *, int *, int); 398 static int roff_evalpar(struct roff *, int, 399 const char *, int *, int *, int); 400 static int roff_evalstrcond(const char *, int *); 401 static void roff_free1(struct roff *); 402 static void roff_freereg(struct roffreg *); 403 static void roff_freestr(struct roffkv *); 404 static size_t roff_getname(struct roff *, char **, int, int); 405 static int roff_getnum(const char *, int *, int *, int); 406 static int roff_getop(const char *, int *, char *); 407 static int roff_getregn(const struct roff *, 408 const char *, size_t); 409 static int roff_getregro(const char *name); 410 static const char *roff_getstrn(const struct roff *, 411 const char *, size_t); 412 static enum rofferr roff_insec(ROFF_ARGS); 413 static enum rofferr roff_it(ROFF_ARGS); 414 static enum rofferr roff_line_ignore(ROFF_ARGS); 415 static enum rofferr roff_nr(ROFF_ARGS); 416 static enum rofft roff_parse(struct roff *, char *, int *, 417 int, int); 418 static enum rofferr roff_parsetext(struct buf *, int, int *); 419 static enum rofferr roff_res(struct roff *, struct buf *, int, int); 420 static enum rofferr roff_rm(ROFF_ARGS); 421 static enum rofferr roff_rr(ROFF_ARGS); 422 static void roff_setstr(struct roff *, 423 const char *, const char *, int); 424 static void roff_setstrn(struct roffkv **, const char *, 425 size_t, const char *, size_t, int); 426 static enum rofferr roff_so(ROFF_ARGS); 427 static enum rofferr roff_tr(ROFF_ARGS); 428 static enum rofferr roff_Dd(ROFF_ARGS); 429 static enum rofferr roff_TH(ROFF_ARGS); 430 static enum rofferr roff_TE(ROFF_ARGS); 431 static enum rofferr roff_TS(ROFF_ARGS); 432 static enum rofferr roff_EQ(ROFF_ARGS); 433 static enum rofferr roff_EN(ROFF_ARGS); 434 static enum rofferr roff_T_(ROFF_ARGS); 435 static enum rofferr roff_unsupp(ROFF_ARGS); 436 static enum rofferr roff_userdef(ROFF_ARGS); 437 438 /* See roffhash_find() */ 439 440 #define ASCII_HI 126 441 #define ASCII_LO 33 442 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 443 444 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 445 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 446 447 static struct roffmac *hash[HASHWIDTH]; 448 449 static struct roffmac roffs[ROFF_MAX] = { 450 { "ab", roff_unsupp, NULL, NULL, 0, NULL }, 451 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 452 { "af", roff_line_ignore, NULL, NULL, 0, NULL }, 453 { "aln", roff_unsupp, NULL, NULL, 0, NULL }, 454 { "als", roff_unsupp, NULL, NULL, 0, NULL }, 455 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 456 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 457 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 458 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 459 { "as", roff_ds, NULL, NULL, 0, NULL }, 460 { "as1", roff_ds, NULL, NULL, 0, NULL }, 461 { "asciify", roff_unsupp, NULL, NULL, 0, NULL }, 462 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL }, 463 { "bd", roff_line_ignore, NULL, NULL, 0, NULL }, 464 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL }, 465 { "blm", roff_unsupp, NULL, NULL, 0, NULL }, 466 { "box", roff_unsupp, NULL, NULL, 0, NULL }, 467 { "boxa", roff_unsupp, NULL, NULL, 0, NULL }, 468 { "bp", roff_line_ignore, NULL, NULL, 0, NULL }, 469 { "BP", roff_unsupp, NULL, NULL, 0, NULL }, 470 { "break", roff_unsupp, NULL, NULL, 0, NULL }, 471 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL }, 472 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL }, 473 { "brp", roff_brp, NULL, NULL, 0, NULL }, 474 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL }, 475 { "c2", roff_unsupp, NULL, NULL, 0, NULL }, 476 { "cc", roff_cc, NULL, NULL, 0, NULL }, 477 { "ce", roff_line_ignore, NULL, NULL, 0, NULL }, 478 { "cf", roff_insec, NULL, NULL, 0, NULL }, 479 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL }, 480 { "ch", roff_line_ignore, NULL, NULL, 0, NULL }, 481 { "char", roff_unsupp, NULL, NULL, 0, NULL }, 482 { "chop", roff_unsupp, NULL, NULL, 0, NULL }, 483 { "class", roff_line_ignore, NULL, NULL, 0, NULL }, 484 { "close", roff_insec, NULL, NULL, 0, NULL }, 485 { "CL", roff_unsupp, NULL, NULL, 0, NULL }, 486 { "color", roff_line_ignore, NULL, NULL, 0, NULL }, 487 { "composite", roff_unsupp, NULL, NULL, 0, NULL }, 488 { "continue", roff_unsupp, NULL, NULL, 0, NULL }, 489 { "cp", roff_line_ignore, NULL, NULL, 0, NULL }, 490 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL }, 491 { "cs", roff_line_ignore, NULL, NULL, 0, NULL }, 492 { "cu", roff_line_ignore, NULL, NULL, 0, NULL }, 493 { "da", roff_unsupp, NULL, NULL, 0, NULL }, 494 { "dch", roff_unsupp, NULL, NULL, 0, NULL }, 495 { "Dd", roff_Dd, NULL, NULL, 0, NULL }, 496 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 497 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 498 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 499 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 500 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 501 { "device", roff_unsupp, NULL, NULL, 0, NULL }, 502 { "devicem", roff_unsupp, NULL, NULL, 0, NULL }, 503 { "di", roff_unsupp, NULL, NULL, 0, NULL }, 504 { "do", roff_unsupp, NULL, NULL, 0, NULL }, 505 { "ds", roff_ds, NULL, NULL, 0, NULL }, 506 { "ds1", roff_ds, NULL, NULL, 0, NULL }, 507 { "dwh", roff_unsupp, NULL, NULL, 0, NULL }, 508 { "dt", roff_unsupp, NULL, NULL, 0, NULL }, 509 { "ec", roff_unsupp, NULL, NULL, 0, NULL }, 510 { "ecr", roff_unsupp, NULL, NULL, 0, NULL }, 511 { "ecs", roff_unsupp, NULL, NULL, 0, NULL }, 512 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 513 { "em", roff_unsupp, NULL, NULL, 0, NULL }, 514 { "EN", roff_EN, NULL, NULL, 0, NULL }, 515 { "eo", roff_unsupp, NULL, NULL, 0, NULL }, 516 { "EP", roff_unsupp, NULL, NULL, 0, NULL }, 517 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 518 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL }, 519 { "ev", roff_unsupp, NULL, NULL, 0, NULL }, 520 { "evc", roff_unsupp, NULL, NULL, 0, NULL }, 521 { "ex", roff_unsupp, NULL, NULL, 0, NULL }, 522 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL }, 523 { "fam", roff_line_ignore, NULL, NULL, 0, NULL }, 524 { "fc", roff_unsupp, NULL, NULL, 0, NULL }, 525 { "fchar", roff_unsupp, NULL, NULL, 0, NULL }, 526 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 527 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL }, 528 { "feature", roff_line_ignore, NULL, NULL, 0, NULL }, 529 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL }, 530 { "fl", roff_line_ignore, NULL, NULL, 0, NULL }, 531 { "flig", roff_line_ignore, NULL, NULL, 0, NULL }, 532 { "fp", roff_line_ignore, NULL, NULL, 0, NULL }, 533 { "fps", roff_line_ignore, NULL, NULL, 0, NULL }, 534 { "fschar", roff_unsupp, NULL, NULL, 0, NULL }, 535 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, 536 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL }, 537 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL }, 538 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL }, 539 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 540 { "hc", roff_line_ignore, NULL, NULL, 0, NULL }, 541 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL }, 542 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL }, 543 { "hla", roff_line_ignore, NULL, NULL, 0, NULL }, 544 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL }, 545 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL }, 546 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL }, 547 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL }, 548 { "hw", roff_line_ignore, NULL, NULL, 0, NULL }, 549 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 550 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL }, 551 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL }, 552 { "hym", roff_line_ignore, NULL, NULL, 0, NULL }, 553 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL }, 554 { "hys", roff_line_ignore, NULL, NULL, 0, NULL }, 555 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 556 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 557 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 558 { "index", roff_unsupp, NULL, NULL, 0, NULL }, 559 { "it", roff_it, NULL, NULL, 0, NULL }, 560 { "itc", roff_unsupp, NULL, NULL, 0, NULL }, 561 { "IX", roff_line_ignore, NULL, NULL, 0, NULL }, 562 { "kern", roff_line_ignore, NULL, NULL, 0, NULL }, 563 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL }, 564 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL }, 565 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL }, 566 { "lc", roff_unsupp, NULL, NULL, 0, NULL }, 567 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL }, 568 { "lds", roff_unsupp, NULL, NULL, 0, NULL }, 569 { "length", roff_unsupp, NULL, NULL, 0, NULL }, 570 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL }, 571 { "lf", roff_insec, NULL, NULL, 0, NULL }, 572 { "lg", roff_line_ignore, NULL, NULL, 0, NULL }, 573 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL }, 574 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL }, 575 { "lnr", roff_unsupp, NULL, NULL, 0, NULL }, 576 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL }, 577 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL }, 578 { "ls", roff_line_ignore, NULL, NULL, 0, NULL }, 579 { "lsm", roff_unsupp, NULL, NULL, 0, NULL }, 580 { "lt", roff_line_ignore, NULL, NULL, 0, NULL }, 581 { "mc", roff_line_ignore, NULL, NULL, 0, NULL }, 582 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL }, 583 { "minss", roff_line_ignore, NULL, NULL, 0, NULL }, 584 { "mk", roff_line_ignore, NULL, NULL, 0, NULL }, 585 { "mso", roff_insec, NULL, NULL, 0, NULL }, 586 { "na", roff_line_ignore, NULL, NULL, 0, NULL }, 587 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 588 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 589 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL }, 590 { "nm", roff_unsupp, NULL, NULL, 0, NULL }, 591 { "nn", roff_unsupp, NULL, NULL, 0, NULL }, 592 { "nop", roff_unsupp, NULL, NULL, 0, NULL }, 593 { "nr", roff_nr, NULL, NULL, 0, NULL }, 594 { "nrf", roff_unsupp, NULL, NULL, 0, NULL }, 595 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL }, 596 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 597 { "nx", roff_insec, NULL, NULL, 0, NULL }, 598 { "open", roff_insec, NULL, NULL, 0, NULL }, 599 { "opena", roff_insec, NULL, NULL, 0, NULL }, 600 { "os", roff_line_ignore, NULL, NULL, 0, NULL }, 601 { "output", roff_unsupp, NULL, NULL, 0, NULL }, 602 { "padj", roff_line_ignore, NULL, NULL, 0, NULL }, 603 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL }, 604 { "pc", roff_line_ignore, NULL, NULL, 0, NULL }, 605 { "pev", roff_line_ignore, NULL, NULL, 0, NULL }, 606 { "pi", roff_insec, NULL, NULL, 0, NULL }, 607 { "PI", roff_unsupp, NULL, NULL, 0, NULL }, 608 { "pl", roff_line_ignore, NULL, NULL, 0, NULL }, 609 { "pm", roff_line_ignore, NULL, NULL, 0, NULL }, 610 { "pn", roff_line_ignore, NULL, NULL, 0, NULL }, 611 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL }, 612 { "po", roff_line_ignore, NULL, NULL, 0, NULL }, 613 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 614 { "psbb", roff_unsupp, NULL, NULL, 0, NULL }, 615 { "pshape", roff_unsupp, NULL, NULL, 0, NULL }, 616 { "pso", roff_insec, NULL, NULL, 0, NULL }, 617 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL }, 618 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL }, 619 { "rchar", roff_unsupp, NULL, NULL, 0, NULL }, 620 { "rd", roff_line_ignore, NULL, NULL, 0, NULL }, 621 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL }, 622 { "return", roff_unsupp, NULL, NULL, 0, NULL }, 623 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL }, 624 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL }, 625 { "rj", roff_line_ignore, NULL, NULL, 0, NULL }, 626 { "rm", roff_rm, NULL, NULL, 0, NULL }, 627 { "rn", roff_unsupp, NULL, NULL, 0, NULL }, 628 { "rnn", roff_unsupp, NULL, NULL, 0, NULL }, 629 { "rr", roff_rr, NULL, NULL, 0, NULL }, 630 { "rs", roff_line_ignore, NULL, NULL, 0, NULL }, 631 { "rt", roff_line_ignore, NULL, NULL, 0, NULL }, 632 { "schar", roff_unsupp, NULL, NULL, 0, NULL }, 633 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL }, 634 { "shc", roff_line_ignore, NULL, NULL, 0, NULL }, 635 { "shift", roff_unsupp, NULL, NULL, 0, NULL }, 636 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL }, 637 { "so", roff_so, NULL, NULL, 0, NULL }, 638 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, 639 { "special", roff_line_ignore, NULL, NULL, 0, NULL }, 640 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL }, 641 { "ss", roff_line_ignore, NULL, NULL, 0, NULL }, 642 { "sty", roff_line_ignore, NULL, NULL, 0, NULL }, 643 { "substring", roff_unsupp, NULL, NULL, 0, NULL }, 644 { "sv", roff_line_ignore, NULL, NULL, 0, NULL }, 645 { "sy", roff_insec, NULL, NULL, 0, NULL }, 646 { "T&", roff_T_, NULL, NULL, 0, NULL }, 647 { "ta", roff_unsupp, NULL, NULL, 0, NULL }, 648 { "tc", roff_unsupp, NULL, NULL, 0, NULL }, 649 { "TE", roff_TE, NULL, NULL, 0, NULL }, 650 { "TH", roff_TH, NULL, NULL, 0, NULL }, 651 { "ti", roff_unsupp, NULL, NULL, 0, NULL }, 652 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL }, 653 { "tl", roff_unsupp, NULL, NULL, 0, NULL }, 654 { "tm", roff_line_ignore, NULL, NULL, 0, NULL }, 655 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL }, 656 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL }, 657 { "tr", roff_tr, NULL, NULL, 0, NULL }, 658 { "track", roff_line_ignore, NULL, NULL, 0, NULL }, 659 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL }, 660 { "trf", roff_insec, NULL, NULL, 0, NULL }, 661 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL }, 662 { "trin", roff_unsupp, NULL, NULL, 0, NULL }, 663 { "trnt", roff_unsupp, NULL, NULL, 0, NULL }, 664 { "troff", roff_line_ignore, NULL, NULL, 0, NULL }, 665 { "TS", roff_TS, NULL, NULL, 0, NULL }, 666 { "uf", roff_line_ignore, NULL, NULL, 0, NULL }, 667 { "ul", roff_line_ignore, NULL, NULL, 0, NULL }, 668 { "unformat", roff_unsupp, NULL, NULL, 0, NULL }, 669 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL }, 670 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL }, 671 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL }, 672 { "vs", roff_line_ignore, NULL, NULL, 0, NULL }, 673 { "warn", roff_line_ignore, NULL, NULL, 0, NULL }, 674 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL }, 675 { "watch", roff_line_ignore, NULL, NULL, 0, NULL }, 676 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL }, 677 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL }, 678 { "wh", roff_unsupp, NULL, NULL, 0, NULL }, 679 { "while", roff_unsupp, NULL, NULL, 0, NULL }, 680 { "write", roff_insec, NULL, NULL, 0, NULL }, 681 { "writec", roff_insec, NULL, NULL, 0, NULL }, 682 { "writem", roff_insec, NULL, NULL, 0, NULL }, 683 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL }, 684 { ".", roff_cblock, NULL, NULL, 0, NULL }, 685 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 686 }; 687 688 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */ 689 const char *const __mdoc_reserved[] = { 690 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 691 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", 692 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", 693 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", 694 "Dt", "Dv", "Dx", "D1", 695 "Ec", "Ed", "Ef", "Ek", "El", "Em", 696 "En", "Eo", "Er", "Es", "Ev", "Ex", 697 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", 698 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", 699 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", 700 "Oc", "Oo", "Op", "Os", "Ot", "Ox", 701 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", 702 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv", 703 "Sc", "Sh", "Sm", "So", "Sq", 704 "Ss", "St", "Sx", "Sy", 705 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", 706 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", 707 "%P", "%Q", "%R", "%T", "%U", "%V", 708 NULL 709 }; 710 711 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */ 712 const char *const __man_reserved[] = { 713 "AT", "B", "BI", "BR", "DT", 714 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR", 715 "LP", "OP", "P", "PD", "PP", 716 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", 717 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR", 718 NULL 719 }; 720 721 /* Array of injected predefined strings. */ 722 #define PREDEFS_MAX 38 723 static const struct predef predefs[PREDEFS_MAX] = { 724 #include "predefs.in" 725 }; 726 727 /* See roffhash_find() */ 728 #define ROFF_HASH(p) (p[0] - ASCII_LO) 729 730 static int roffit_lines; /* number of lines to delay */ 731 static char *roffit_macro; /* nil-terminated macro line */ 732 733 734 static void 735 roffhash_init(void) 736 { 737 struct roffmac *n; 738 int buc, i; 739 740 for (i = 0; i < (int)ROFF_USERDEF; i++) { 741 assert(roffs[i].name[0] >= ASCII_LO); 742 assert(roffs[i].name[0] <= ASCII_HI); 743 744 buc = ROFF_HASH(roffs[i].name); 745 746 if (NULL != (n = hash[buc])) { 747 for ( ; n->next; n = n->next) 748 /* Do nothing. */ ; 749 n->next = &roffs[i]; 750 } else 751 hash[buc] = &roffs[i]; 752 } 753 } 754 755 /* 756 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 757 * the nil-terminated string name could be found. 758 */ 759 static enum rofft 760 roffhash_find(const char *p, size_t s) 761 { 762 int buc; 763 struct roffmac *n; 764 765 /* 766 * libroff has an extremely simple hashtable, for the time 767 * being, which simply keys on the first character, which must 768 * be printable, then walks a chain. It works well enough until 769 * optimised. 770 */ 771 772 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 773 return(ROFF_MAX); 774 775 buc = ROFF_HASH(p); 776 777 if (NULL == (n = hash[buc])) 778 return(ROFF_MAX); 779 for ( ; n; n = n->next) 780 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 781 return((enum rofft)(n - roffs)); 782 783 return(ROFF_MAX); 784 } 785 786 /* 787 * Pop the current node off of the stack of roff instructions currently 788 * pending. 789 */ 790 static void 791 roffnode_pop(struct roff *r) 792 { 793 struct roffnode *p; 794 795 assert(r->last); 796 p = r->last; 797 798 r->last = r->last->parent; 799 free(p->name); 800 free(p->end); 801 free(p); 802 } 803 804 /* 805 * Push a roff node onto the instruction stack. This must later be 806 * removed with roffnode_pop(). 807 */ 808 static void 809 roffnode_push(struct roff *r, enum rofft tok, const char *name, 810 int line, int col) 811 { 812 struct roffnode *p; 813 814 p = mandoc_calloc(1, sizeof(struct roffnode)); 815 p->tok = tok; 816 if (name) 817 p->name = mandoc_strdup(name); 818 p->parent = r->last; 819 p->line = line; 820 p->col = col; 821 p->rule = p->parent ? p->parent->rule : 0; 822 823 r->last = p; 824 } 825 826 static void 827 roff_free1(struct roff *r) 828 { 829 struct tbl_node *tbl; 830 struct eqn_node *e; 831 int i; 832 833 while (NULL != (tbl = r->first_tbl)) { 834 r->first_tbl = tbl->next; 835 tbl_free(tbl); 836 } 837 r->first_tbl = r->last_tbl = r->tbl = NULL; 838 839 while (NULL != (e = r->first_eqn)) { 840 r->first_eqn = e->next; 841 eqn_free(e); 842 } 843 r->first_eqn = r->last_eqn = r->eqn = NULL; 844 845 while (r->last) 846 roffnode_pop(r); 847 848 free (r->rstack); 849 r->rstack = NULL; 850 r->rstacksz = 0; 851 r->rstackpos = -1; 852 853 roff_freereg(r->regtab); 854 r->regtab = NULL; 855 856 roff_freestr(r->strtab); 857 roff_freestr(r->xmbtab); 858 r->strtab = r->xmbtab = NULL; 859 860 if (r->xtab) 861 for (i = 0; i < 128; i++) 862 free(r->xtab[i].p); 863 free(r->xtab); 864 r->xtab = NULL; 865 } 866 867 void 868 roff_reset(struct roff *r) 869 { 870 871 roff_free1(r); 872 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 873 r->control = 0; 874 } 875 876 void 877 roff_free(struct roff *r) 878 { 879 880 roff_free1(r); 881 free(r); 882 } 883 884 struct roff * 885 roff_alloc(struct mparse *parse, const struct mchars *mchars, int options) 886 { 887 struct roff *r; 888 889 r = mandoc_calloc(1, sizeof(struct roff)); 890 r->parse = parse; 891 r->mchars = mchars; 892 r->options = options; 893 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 894 r->rstackpos = -1; 895 896 roffhash_init(); 897 898 return(r); 899 } 900 901 /* 902 * In the current line, expand escape sequences that tend to get 903 * used in numerical expressions and conditional requests. 904 * Also check the syntax of the remaining escape sequences. 905 */ 906 static enum rofferr 907 roff_res(struct roff *r, struct buf *buf, int ln, int pos) 908 { 909 char ubuf[24]; /* buffer to print the number */ 910 const char *start; /* start of the string to process */ 911 char *stesc; /* start of an escape sequence ('\\') */ 912 const char *stnam; /* start of the name, after "[(*" */ 913 const char *cp; /* end of the name, e.g. before ']' */ 914 const char *res; /* the string to be substituted */ 915 char *nbuf; /* new buffer to copy buf->buf to */ 916 size_t maxl; /* expected length of the escape name */ 917 size_t naml; /* actual length of the escape name */ 918 enum mandoc_esc esc; /* type of the escape sequence */ 919 int inaml; /* length returned from mandoc_escape() */ 920 int expand_count; /* to avoid infinite loops */ 921 int npos; /* position in numeric expression */ 922 int arg_complete; /* argument not interrupted by eol */ 923 char term; /* character terminating the escape */ 924 925 expand_count = 0; 926 start = buf->buf + pos; 927 stesc = strchr(start, '\0') - 1; 928 while (stesc-- > start) { 929 930 /* Search backwards for the next backslash. */ 931 932 if (*stesc != '\\') 933 continue; 934 935 /* If it is escaped, skip it. */ 936 937 for (cp = stesc - 1; cp >= start; cp--) 938 if (*cp != '\\') 939 break; 940 941 if ((stesc - cp) % 2 == 0) { 942 stesc = (char *)cp; 943 continue; 944 } 945 946 /* Decide whether to expand or to check only. */ 947 948 term = '\0'; 949 cp = stesc + 1; 950 switch (*cp) { 951 case '*': 952 res = NULL; 953 break; 954 case 'B': 955 /* FALLTHROUGH */ 956 case 'w': 957 term = cp[1]; 958 /* FALLTHROUGH */ 959 case 'n': 960 res = ubuf; 961 break; 962 default: 963 esc = mandoc_escape(&cp, &stnam, &inaml); 964 if (esc == ESCAPE_ERROR || 965 (esc == ESCAPE_SPECIAL && 966 mchars_spec2cp(r->mchars, stnam, inaml) < 0)) 967 mandoc_vmsg(MANDOCERR_ESC_BAD, 968 r->parse, ln, (int)(stesc - buf->buf), 969 "%.*s", (int)(cp - stesc), stesc); 970 continue; 971 } 972 973 if (EXPAND_LIMIT < ++expand_count) { 974 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 975 ln, (int)(stesc - buf->buf), NULL); 976 return(ROFF_IGN); 977 } 978 979 /* 980 * The third character decides the length 981 * of the name of the string or register. 982 * Save a pointer to the name. 983 */ 984 985 if (term == '\0') { 986 switch (*++cp) { 987 case '\0': 988 maxl = 0; 989 break; 990 case '(': 991 cp++; 992 maxl = 2; 993 break; 994 case '[': 995 cp++; 996 term = ']'; 997 maxl = 0; 998 break; 999 default: 1000 maxl = 1; 1001 break; 1002 } 1003 } else { 1004 cp += 2; 1005 maxl = 0; 1006 } 1007 stnam = cp; 1008 1009 /* Advance to the end of the name. */ 1010 1011 naml = 0; 1012 arg_complete = 1; 1013 while (maxl == 0 || naml < maxl) { 1014 if (*cp == '\0') { 1015 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 1016 ln, (int)(stesc - buf->buf), stesc); 1017 arg_complete = 0; 1018 break; 1019 } 1020 if (maxl == 0 && *cp == term) { 1021 cp++; 1022 break; 1023 } 1024 if (*cp++ != '\\' || stesc[1] != 'w') { 1025 naml++; 1026 continue; 1027 } 1028 switch (mandoc_escape(&cp, NULL, NULL)) { 1029 case ESCAPE_SPECIAL: 1030 /* FALLTHROUGH */ 1031 case ESCAPE_UNICODE: 1032 /* FALLTHROUGH */ 1033 case ESCAPE_NUMBERED: 1034 /* FALLTHROUGH */ 1035 case ESCAPE_OVERSTRIKE: 1036 naml++; 1037 break; 1038 default: 1039 break; 1040 } 1041 } 1042 1043 /* 1044 * Retrieve the replacement string; if it is 1045 * undefined, resume searching for escapes. 1046 */ 1047 1048 switch (stesc[1]) { 1049 case '*': 1050 if (arg_complete) 1051 res = roff_getstrn(r, stnam, naml); 1052 break; 1053 case 'B': 1054 npos = 0; 1055 ubuf[0] = arg_complete && 1056 roff_evalnum(r, ln, stnam, &npos, 1057 NULL, ROFFNUM_SCALE) && 1058 stnam + npos + 1 == cp ? '1' : '0'; 1059 ubuf[1] = '\0'; 1060 break; 1061 case 'n': 1062 if (arg_complete) 1063 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1064 roff_getregn(r, stnam, naml)); 1065 else 1066 ubuf[0] = '\0'; 1067 break; 1068 case 'w': 1069 /* use even incomplete args */ 1070 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1071 24 * (int)naml); 1072 break; 1073 } 1074 1075 if (res == NULL) { 1076 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1077 r->parse, ln, (int)(stesc - buf->buf), 1078 "%.*s", (int)naml, stnam); 1079 res = ""; 1080 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1081 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1082 ln, (int)(stesc - buf->buf), NULL); 1083 return(ROFF_IGN); 1084 } 1085 1086 /* Replace the escape sequence by the string. */ 1087 1088 *stesc = '\0'; 1089 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1090 buf->buf, res, cp) + 1; 1091 1092 /* Prepare for the next replacement. */ 1093 1094 start = nbuf + pos; 1095 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1096 free(buf->buf); 1097 buf->buf = nbuf; 1098 } 1099 return(ROFF_CONT); 1100 } 1101 1102 /* 1103 * Process text streams: 1104 * Convert all breakable hyphens into ASCII_HYPH. 1105 * Decrement and spring input line trap. 1106 */ 1107 static enum rofferr 1108 roff_parsetext(struct buf *buf, int pos, int *offs) 1109 { 1110 size_t sz; 1111 const char *start; 1112 char *p; 1113 int isz; 1114 enum mandoc_esc esc; 1115 1116 start = p = buf->buf + pos; 1117 1118 while (*p != '\0') { 1119 sz = strcspn(p, "-\\"); 1120 p += sz; 1121 1122 if (*p == '\0') 1123 break; 1124 1125 if (*p == '\\') { 1126 /* Skip over escapes. */ 1127 p++; 1128 esc = mandoc_escape((const char **)&p, NULL, NULL); 1129 if (esc == ESCAPE_ERROR) 1130 break; 1131 continue; 1132 } else if (p == start) { 1133 p++; 1134 continue; 1135 } 1136 1137 if (isalpha((unsigned char)p[-1]) && 1138 isalpha((unsigned char)p[1])) 1139 *p = ASCII_HYPH; 1140 p++; 1141 } 1142 1143 /* Spring the input line trap. */ 1144 if (roffit_lines == 1) { 1145 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1146 free(buf->buf); 1147 buf->buf = p; 1148 buf->sz = isz + 1; 1149 *offs = 0; 1150 free(roffit_macro); 1151 roffit_lines = 0; 1152 return(ROFF_REPARSE); 1153 } else if (roffit_lines > 1) 1154 --roffit_lines; 1155 return(ROFF_CONT); 1156 } 1157 1158 enum rofferr 1159 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1160 { 1161 enum rofft t; 1162 enum rofferr e; 1163 int pos; /* parse point */ 1164 int spos; /* saved parse point for messages */ 1165 int ppos; /* original offset in buf->buf */ 1166 int ctl; /* macro line (boolean) */ 1167 1168 ppos = pos = *offs; 1169 1170 /* Handle in-line equation delimiters. */ 1171 1172 if (r->tbl == NULL && 1173 r->last_eqn != NULL && r->last_eqn->delim && 1174 (r->eqn == NULL || r->eqn_inline)) { 1175 e = roff_eqndelim(r, buf, pos); 1176 if (e == ROFF_REPARSE) 1177 return(e); 1178 assert(e == ROFF_CONT); 1179 } 1180 1181 /* Expand some escape sequences. */ 1182 1183 e = roff_res(r, buf, ln, pos); 1184 if (e == ROFF_IGN) 1185 return(e); 1186 assert(e == ROFF_CONT); 1187 1188 ctl = roff_getcontrol(r, buf->buf, &pos); 1189 1190 /* 1191 * First, if a scope is open and we're not a macro, pass the 1192 * text through the macro's filter. 1193 * Equations process all content themselves. 1194 * Tables process almost all content themselves, but we want 1195 * to warn about macros before passing it there. 1196 */ 1197 1198 if (r->last != NULL && ! ctl) { 1199 t = r->last->tok; 1200 assert(roffs[t].text); 1201 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1202 assert(e == ROFF_IGN || e == ROFF_CONT); 1203 if (e != ROFF_CONT) 1204 return(e); 1205 } 1206 if (r->eqn != NULL) 1207 return(eqn_read(&r->eqn, ln, buf->buf, ppos, offs)); 1208 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) 1209 return(tbl_read(r->tbl, ln, buf->buf, ppos)); 1210 if ( ! ctl) 1211 return(roff_parsetext(buf, pos, offs)); 1212 1213 /* Skip empty request lines. */ 1214 1215 if (buf->buf[pos] == '"') { 1216 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse, 1217 ln, pos, NULL); 1218 return(ROFF_IGN); 1219 } else if (buf->buf[pos] == '\0') 1220 return(ROFF_IGN); 1221 1222 /* 1223 * If a scope is open, go to the child handler for that macro, 1224 * as it may want to preprocess before doing anything with it. 1225 * Don't do so if an equation is open. 1226 */ 1227 1228 if (r->last) { 1229 t = r->last->tok; 1230 assert(roffs[t].sub); 1231 return((*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs)); 1232 } 1233 1234 /* No scope is open. This is a new request or macro. */ 1235 1236 spos = pos; 1237 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1238 1239 /* Tables ignore most macros. */ 1240 1241 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) { 1242 mandoc_msg(MANDOCERR_TBLMACRO, r->parse, 1243 ln, pos, buf->buf + spos); 1244 if (t == ROFF_TS) 1245 return(ROFF_IGN); 1246 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1247 pos++; 1248 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ') 1249 pos++; 1250 return(tbl_read(r->tbl, ln, buf->buf, pos)); 1251 } 1252 1253 /* 1254 * This is neither a roff request nor a user-defined macro. 1255 * Let the standard macro set parsers handle it. 1256 */ 1257 1258 if (t == ROFF_MAX) 1259 return(ROFF_CONT); 1260 1261 /* Execute a roff request or a user defined macro. */ 1262 1263 assert(roffs[t].proc); 1264 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs)); 1265 } 1266 1267 void 1268 roff_endparse(struct roff *r) 1269 { 1270 1271 if (r->last) 1272 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1273 r->last->line, r->last->col, 1274 roffs[r->last->tok].name); 1275 1276 if (r->eqn) { 1277 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1278 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ"); 1279 eqn_end(&r->eqn); 1280 } 1281 1282 if (r->tbl) { 1283 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1284 r->tbl->line, r->tbl->pos, "TS"); 1285 tbl_end(&r->tbl); 1286 } 1287 } 1288 1289 /* 1290 * Parse a roff node's type from the input buffer. This must be in the 1291 * form of ".foo xxx" in the usual way. 1292 */ 1293 static enum rofft 1294 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1295 { 1296 char *cp; 1297 const char *mac; 1298 size_t maclen; 1299 enum rofft t; 1300 1301 cp = buf + *pos; 1302 1303 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1304 return(ROFF_MAX); 1305 1306 mac = cp; 1307 maclen = roff_getname(r, &cp, ln, ppos); 1308 1309 t = (r->current_string = roff_getstrn(r, mac, maclen)) 1310 ? ROFF_USERDEF : roffhash_find(mac, maclen); 1311 1312 if (ROFF_MAX != t) 1313 *pos = cp - buf; 1314 1315 return(t); 1316 } 1317 1318 static enum rofferr 1319 roff_cblock(ROFF_ARGS) 1320 { 1321 1322 /* 1323 * A block-close `..' should only be invoked as a child of an 1324 * ignore macro, otherwise raise a warning and just ignore it. 1325 */ 1326 1327 if (r->last == NULL) { 1328 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1329 ln, ppos, ".."); 1330 return(ROFF_IGN); 1331 } 1332 1333 switch (r->last->tok) { 1334 case ROFF_am: 1335 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1336 /* FALLTHROUGH */ 1337 case ROFF_ami: 1338 /* FALLTHROUGH */ 1339 case ROFF_de: 1340 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1341 /* FALLTHROUGH */ 1342 case ROFF_dei: 1343 /* FALLTHROUGH */ 1344 case ROFF_ig: 1345 break; 1346 default: 1347 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1348 ln, ppos, ".."); 1349 return(ROFF_IGN); 1350 } 1351 1352 if (buf->buf[pos] != '\0') 1353 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 1354 ".. %s", buf->buf + pos); 1355 1356 roffnode_pop(r); 1357 roffnode_cleanscope(r); 1358 return(ROFF_IGN); 1359 1360 } 1361 1362 static void 1363 roffnode_cleanscope(struct roff *r) 1364 { 1365 1366 while (r->last) { 1367 if (--r->last->endspan != 0) 1368 break; 1369 roffnode_pop(r); 1370 } 1371 } 1372 1373 static void 1374 roff_ccond(struct roff *r, int ln, int ppos) 1375 { 1376 1377 if (NULL == r->last) { 1378 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1379 ln, ppos, "\\}"); 1380 return; 1381 } 1382 1383 switch (r->last->tok) { 1384 case ROFF_el: 1385 /* FALLTHROUGH */ 1386 case ROFF_ie: 1387 /* FALLTHROUGH */ 1388 case ROFF_if: 1389 break; 1390 default: 1391 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1392 ln, ppos, "\\}"); 1393 return; 1394 } 1395 1396 if (r->last->endspan > -1) { 1397 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1398 ln, ppos, "\\}"); 1399 return; 1400 } 1401 1402 roffnode_pop(r); 1403 roffnode_cleanscope(r); 1404 return; 1405 } 1406 1407 static enum rofferr 1408 roff_block(ROFF_ARGS) 1409 { 1410 const char *name; 1411 char *iname, *cp; 1412 size_t namesz; 1413 1414 /* Ignore groff compatibility mode for now. */ 1415 1416 if (tok == ROFF_de1) 1417 tok = ROFF_de; 1418 else if (tok == ROFF_dei1) 1419 tok = ROFF_dei; 1420 else if (tok == ROFF_am1) 1421 tok = ROFF_am; 1422 else if (tok == ROFF_ami1) 1423 tok = ROFF_ami; 1424 1425 /* Parse the macro name argument. */ 1426 1427 cp = buf->buf + pos; 1428 if (tok == ROFF_ig) { 1429 iname = NULL; 1430 namesz = 0; 1431 } else { 1432 iname = cp; 1433 namesz = roff_getname(r, &cp, ln, ppos); 1434 iname[namesz] = '\0'; 1435 } 1436 1437 /* Resolve the macro name argument if it is indirect. */ 1438 1439 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1440 if ((name = roff_getstrn(r, iname, namesz)) == NULL) { 1441 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1442 r->parse, ln, (int)(iname - buf->buf), 1443 "%.*s", (int)namesz, iname); 1444 namesz = 0; 1445 } else 1446 namesz = strlen(name); 1447 } else 1448 name = iname; 1449 1450 if (namesz == 0 && tok != ROFF_ig) { 1451 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, 1452 ln, ppos, roffs[tok].name); 1453 return(ROFF_IGN); 1454 } 1455 1456 roffnode_push(r, tok, name, ln, ppos); 1457 1458 /* 1459 * At the beginning of a `de' macro, clear the existing string 1460 * with the same name, if there is one. New content will be 1461 * appended from roff_block_text() in multiline mode. 1462 */ 1463 1464 if (tok == ROFF_de || tok == ROFF_dei) 1465 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 1466 1467 if (*cp == '\0') 1468 return(ROFF_IGN); 1469 1470 /* Get the custom end marker. */ 1471 1472 iname = cp; 1473 namesz = roff_getname(r, &cp, ln, ppos); 1474 1475 /* Resolve the end marker if it is indirect. */ 1476 1477 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1478 if ((name = roff_getstrn(r, iname, namesz)) == NULL) { 1479 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1480 r->parse, ln, (int)(iname - buf->buf), 1481 "%.*s", (int)namesz, iname); 1482 namesz = 0; 1483 } else 1484 namesz = strlen(name); 1485 } else 1486 name = iname; 1487 1488 if (namesz) 1489 r->last->end = mandoc_strndup(name, namesz); 1490 1491 if (*cp != '\0') 1492 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 1493 ln, pos, ".%s ... %s", roffs[tok].name, cp); 1494 1495 return(ROFF_IGN); 1496 } 1497 1498 static enum rofferr 1499 roff_block_sub(ROFF_ARGS) 1500 { 1501 enum rofft t; 1502 int i, j; 1503 1504 /* 1505 * First check whether a custom macro exists at this level. If 1506 * it does, then check against it. This is some of groff's 1507 * stranger behaviours. If we encountered a custom end-scope 1508 * tag and that tag also happens to be a "real" macro, then we 1509 * need to try interpreting it again as a real macro. If it's 1510 * not, then return ignore. Else continue. 1511 */ 1512 1513 if (r->last->end) { 1514 for (i = pos, j = 0; r->last->end[j]; j++, i++) 1515 if (buf->buf[i] != r->last->end[j]) 1516 break; 1517 1518 if (r->last->end[j] == '\0' && 1519 (buf->buf[i] == '\0' || 1520 buf->buf[i] == ' ' || 1521 buf->buf[i] == '\t')) { 1522 roffnode_pop(r); 1523 roffnode_cleanscope(r); 1524 1525 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 1526 i++; 1527 1528 pos = i; 1529 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 1530 ROFF_MAX) 1531 return(ROFF_RERUN); 1532 return(ROFF_IGN); 1533 } 1534 } 1535 1536 /* 1537 * If we have no custom end-query or lookup failed, then try 1538 * pulling it out of the hashtable. 1539 */ 1540 1541 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1542 1543 if (t != ROFF_cblock) { 1544 if (tok != ROFF_ig) 1545 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 1546 return(ROFF_IGN); 1547 } 1548 1549 assert(roffs[t].proc); 1550 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs)); 1551 } 1552 1553 static enum rofferr 1554 roff_block_text(ROFF_ARGS) 1555 { 1556 1557 if (tok != ROFF_ig) 1558 roff_setstr(r, r->last->name, buf->buf + pos, 2); 1559 1560 return(ROFF_IGN); 1561 } 1562 1563 static enum rofferr 1564 roff_cond_sub(ROFF_ARGS) 1565 { 1566 enum rofft t; 1567 char *ep; 1568 int rr; 1569 1570 rr = r->last->rule; 1571 roffnode_cleanscope(r); 1572 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1573 1574 /* 1575 * Fully handle known macros when they are structurally 1576 * required or when the conditional evaluated to true. 1577 */ 1578 1579 if ((t != ROFF_MAX) && 1580 (rr || roffs[t].flags & ROFFMAC_STRUCT)) { 1581 assert(roffs[t].proc); 1582 return((*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs)); 1583 } 1584 1585 /* 1586 * If `\}' occurs on a macro line without a preceding macro, 1587 * drop the line completely. 1588 */ 1589 1590 ep = buf->buf + pos; 1591 if (ep[0] == '\\' && ep[1] == '}') 1592 rr = 0; 1593 1594 /* Always check for the closing delimiter `\}'. */ 1595 1596 while ((ep = strchr(ep, '\\')) != NULL) { 1597 if (*(++ep) == '}') { 1598 *ep = '&'; 1599 roff_ccond(r, ln, ep - buf->buf - 1); 1600 } 1601 if (*ep != '\0') 1602 ++ep; 1603 } 1604 return(rr ? ROFF_CONT : ROFF_IGN); 1605 } 1606 1607 static enum rofferr 1608 roff_cond_text(ROFF_ARGS) 1609 { 1610 char *ep; 1611 int rr; 1612 1613 rr = r->last->rule; 1614 roffnode_cleanscope(r); 1615 1616 ep = buf->buf + pos; 1617 while ((ep = strchr(ep, '\\')) != NULL) { 1618 if (*(++ep) == '}') { 1619 *ep = '&'; 1620 roff_ccond(r, ln, ep - buf->buf - 1); 1621 } 1622 if (*ep != '\0') 1623 ++ep; 1624 } 1625 return(rr ? ROFF_CONT : ROFF_IGN); 1626 } 1627 1628 /* 1629 * Parse a single signed integer number. Stop at the first non-digit. 1630 * If there is at least one digit, return success and advance the 1631 * parse point, else return failure and let the parse point unchanged. 1632 * Ignore overflows, treat them just like the C language. 1633 */ 1634 static int 1635 roff_getnum(const char *v, int *pos, int *res, int flags) 1636 { 1637 int myres, scaled, n, p; 1638 1639 if (NULL == res) 1640 res = &myres; 1641 1642 p = *pos; 1643 n = v[p] == '-'; 1644 if (n || v[p] == '+') 1645 p++; 1646 1647 if (flags & ROFFNUM_WHITE) 1648 while (isspace((unsigned char)v[p])) 1649 p++; 1650 1651 for (*res = 0; isdigit((unsigned char)v[p]); p++) 1652 *res = 10 * *res + v[p] - '0'; 1653 if (p == *pos + n) 1654 return 0; 1655 1656 if (n) 1657 *res = -*res; 1658 1659 /* Each number may be followed by one optional scaling unit. */ 1660 1661 switch (v[p]) { 1662 case 'f': 1663 scaled = *res * 65536; 1664 break; 1665 case 'i': 1666 scaled = *res * 240; 1667 break; 1668 case 'c': 1669 scaled = *res * 240 / 2.54; 1670 break; 1671 case 'v': 1672 /* FALLTROUGH */ 1673 case 'P': 1674 scaled = *res * 40; 1675 break; 1676 case 'm': 1677 /* FALLTROUGH */ 1678 case 'n': 1679 scaled = *res * 24; 1680 break; 1681 case 'p': 1682 scaled = *res * 10 / 3; 1683 break; 1684 case 'u': 1685 scaled = *res; 1686 break; 1687 case 'M': 1688 scaled = *res * 6 / 25; 1689 break; 1690 default: 1691 scaled = *res; 1692 p--; 1693 break; 1694 } 1695 if (flags & ROFFNUM_SCALE) 1696 *res = scaled; 1697 1698 *pos = p + 1; 1699 return(1); 1700 } 1701 1702 /* 1703 * Evaluate a string comparison condition. 1704 * The first character is the delimiter. 1705 * Succeed if the string up to its second occurrence 1706 * matches the string up to its third occurence. 1707 * Advance the cursor after the third occurrence 1708 * or lacking that, to the end of the line. 1709 */ 1710 static int 1711 roff_evalstrcond(const char *v, int *pos) 1712 { 1713 const char *s1, *s2, *s3; 1714 int match; 1715 1716 match = 0; 1717 s1 = v + *pos; /* initial delimiter */ 1718 s2 = s1 + 1; /* for scanning the first string */ 1719 s3 = strchr(s2, *s1); /* for scanning the second string */ 1720 1721 if (NULL == s3) /* found no middle delimiter */ 1722 goto out; 1723 1724 while ('\0' != *++s3) { 1725 if (*s2 != *s3) { /* mismatch */ 1726 s3 = strchr(s3, *s1); 1727 break; 1728 } 1729 if (*s3 == *s1) { /* found the final delimiter */ 1730 match = 1; 1731 break; 1732 } 1733 s2++; 1734 } 1735 1736 out: 1737 if (NULL == s3) 1738 s3 = strchr(s2, '\0'); 1739 else if (*s3 != '\0') 1740 s3++; 1741 *pos = s3 - v; 1742 return(match); 1743 } 1744 1745 /* 1746 * Evaluate an optionally negated single character, numerical, 1747 * or string condition. 1748 */ 1749 static int 1750 roff_evalcond(struct roff *r, int ln, const char *v, int *pos) 1751 { 1752 int number, savepos, wanttrue; 1753 1754 if ('!' == v[*pos]) { 1755 wanttrue = 0; 1756 (*pos)++; 1757 } else 1758 wanttrue = 1; 1759 1760 switch (v[*pos]) { 1761 case '\0': 1762 return(0); 1763 case 'n': 1764 /* FALLTHROUGH */ 1765 case 'o': 1766 (*pos)++; 1767 return(wanttrue); 1768 case 'c': 1769 /* FALLTHROUGH */ 1770 case 'd': 1771 /* FALLTHROUGH */ 1772 case 'e': 1773 /* FALLTHROUGH */ 1774 case 'r': 1775 /* FALLTHROUGH */ 1776 case 't': 1777 /* FALLTHROUGH */ 1778 case 'v': 1779 (*pos)++; 1780 return(!wanttrue); 1781 default: 1782 break; 1783 } 1784 1785 savepos = *pos; 1786 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 1787 return((number > 0) == wanttrue); 1788 else if (*pos == savepos) 1789 return(roff_evalstrcond(v, pos) == wanttrue); 1790 else 1791 return (0); 1792 } 1793 1794 static enum rofferr 1795 roff_line_ignore(ROFF_ARGS) 1796 { 1797 1798 return(ROFF_IGN); 1799 } 1800 1801 static enum rofferr 1802 roff_insec(ROFF_ARGS) 1803 { 1804 1805 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, 1806 ln, ppos, roffs[tok].name); 1807 return(ROFF_IGN); 1808 } 1809 1810 static enum rofferr 1811 roff_unsupp(ROFF_ARGS) 1812 { 1813 1814 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, 1815 ln, ppos, roffs[tok].name); 1816 return(ROFF_IGN); 1817 } 1818 1819 static enum rofferr 1820 roff_cond(ROFF_ARGS) 1821 { 1822 1823 roffnode_push(r, tok, NULL, ln, ppos); 1824 1825 /* 1826 * An `.el' has no conditional body: it will consume the value 1827 * of the current rstack entry set in prior `ie' calls or 1828 * defaults to DENY. 1829 * 1830 * If we're not an `el', however, then evaluate the conditional. 1831 */ 1832 1833 r->last->rule = tok == ROFF_el ? 1834 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 1835 roff_evalcond(r, ln, buf->buf, &pos); 1836 1837 /* 1838 * An if-else will put the NEGATION of the current evaluated 1839 * conditional into the stack of rules. 1840 */ 1841 1842 if (tok == ROFF_ie) { 1843 if (r->rstackpos + 1 == r->rstacksz) { 1844 r->rstacksz += 16; 1845 r->rstack = mandoc_reallocarray(r->rstack, 1846 r->rstacksz, sizeof(int)); 1847 } 1848 r->rstack[++r->rstackpos] = !r->last->rule; 1849 } 1850 1851 /* If the parent has false as its rule, then so do we. */ 1852 1853 if (r->last->parent && !r->last->parent->rule) 1854 r->last->rule = 0; 1855 1856 /* 1857 * Determine scope. 1858 * If there is nothing on the line after the conditional, 1859 * not even whitespace, use next-line scope. 1860 */ 1861 1862 if (buf->buf[pos] == '\0') { 1863 r->last->endspan = 2; 1864 goto out; 1865 } 1866 1867 while (buf->buf[pos] == ' ') 1868 pos++; 1869 1870 /* An opening brace requests multiline scope. */ 1871 1872 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 1873 r->last->endspan = -1; 1874 pos += 2; 1875 goto out; 1876 } 1877 1878 /* 1879 * Anything else following the conditional causes 1880 * single-line scope. Warn if the scope contains 1881 * nothing but trailing whitespace. 1882 */ 1883 1884 if (buf->buf[pos] == '\0') 1885 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, 1886 ln, ppos, roffs[tok].name); 1887 1888 r->last->endspan = 1; 1889 1890 out: 1891 *offs = pos; 1892 return(ROFF_RERUN); 1893 } 1894 1895 static enum rofferr 1896 roff_ds(ROFF_ARGS) 1897 { 1898 char *string; 1899 const char *name; 1900 size_t namesz; 1901 1902 /* Ignore groff compatibility mode for now. */ 1903 1904 if (tok == ROFF_ds1) 1905 tok = ROFF_ds; 1906 else if (tok == ROFF_as1) 1907 tok = ROFF_as; 1908 1909 /* 1910 * The first word is the name of the string. 1911 * If it is empty or terminated by an escape sequence, 1912 * abort the `ds' request without defining anything. 1913 */ 1914 1915 name = string = buf->buf + pos; 1916 if (*name == '\0') 1917 return(ROFF_IGN); 1918 1919 namesz = roff_getname(r, &string, ln, pos); 1920 if (name[namesz] == '\\') 1921 return(ROFF_IGN); 1922 1923 /* Read past the initial double-quote, if any. */ 1924 if (*string == '"') 1925 string++; 1926 1927 /* The rest is the value. */ 1928 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 1929 ROFF_as == tok); 1930 return(ROFF_IGN); 1931 } 1932 1933 /* 1934 * Parse a single operator, one or two characters long. 1935 * If the operator is recognized, return success and advance the 1936 * parse point, else return failure and let the parse point unchanged. 1937 */ 1938 static int 1939 roff_getop(const char *v, int *pos, char *res) 1940 { 1941 1942 *res = v[*pos]; 1943 1944 switch (*res) { 1945 case '+': 1946 /* FALLTHROUGH */ 1947 case '-': 1948 /* FALLTHROUGH */ 1949 case '*': 1950 /* FALLTHROUGH */ 1951 case '/': 1952 /* FALLTHROUGH */ 1953 case '%': 1954 /* FALLTHROUGH */ 1955 case '&': 1956 /* FALLTHROUGH */ 1957 case ':': 1958 break; 1959 case '<': 1960 switch (v[*pos + 1]) { 1961 case '=': 1962 *res = 'l'; 1963 (*pos)++; 1964 break; 1965 case '>': 1966 *res = '!'; 1967 (*pos)++; 1968 break; 1969 case '?': 1970 *res = 'i'; 1971 (*pos)++; 1972 break; 1973 default: 1974 break; 1975 } 1976 break; 1977 case '>': 1978 switch (v[*pos + 1]) { 1979 case '=': 1980 *res = 'g'; 1981 (*pos)++; 1982 break; 1983 case '?': 1984 *res = 'a'; 1985 (*pos)++; 1986 break; 1987 default: 1988 break; 1989 } 1990 break; 1991 case '=': 1992 if ('=' == v[*pos + 1]) 1993 (*pos)++; 1994 break; 1995 default: 1996 return(0); 1997 } 1998 (*pos)++; 1999 2000 return(*res); 2001 } 2002 2003 /* 2004 * Evaluate either a parenthesized numeric expression 2005 * or a single signed integer number. 2006 */ 2007 static int 2008 roff_evalpar(struct roff *r, int ln, 2009 const char *v, int *pos, int *res, int flags) 2010 { 2011 2012 if ('(' != v[*pos]) 2013 return(roff_getnum(v, pos, res, flags)); 2014 2015 (*pos)++; 2016 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2017 return(0); 2018 2019 /* 2020 * Omission of the closing parenthesis 2021 * is an error in validation mode, 2022 * but ignored in evaluation mode. 2023 */ 2024 2025 if (')' == v[*pos]) 2026 (*pos)++; 2027 else if (NULL == res) 2028 return(0); 2029 2030 return(1); 2031 } 2032 2033 /* 2034 * Evaluate a complete numeric expression. 2035 * Proceed left to right, there is no concept of precedence. 2036 */ 2037 static int 2038 roff_evalnum(struct roff *r, int ln, const char *v, 2039 int *pos, int *res, int flags) 2040 { 2041 int mypos, operand2; 2042 char operator; 2043 2044 if (NULL == pos) { 2045 mypos = 0; 2046 pos = &mypos; 2047 } 2048 2049 if (flags & ROFFNUM_WHITE) 2050 while (isspace((unsigned char)v[*pos])) 2051 (*pos)++; 2052 2053 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2054 return(0); 2055 2056 while (1) { 2057 if (flags & ROFFNUM_WHITE) 2058 while (isspace((unsigned char)v[*pos])) 2059 (*pos)++; 2060 2061 if ( ! roff_getop(v, pos, &operator)) 2062 break; 2063 2064 if (flags & ROFFNUM_WHITE) 2065 while (isspace((unsigned char)v[*pos])) 2066 (*pos)++; 2067 2068 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2069 return(0); 2070 2071 if (flags & ROFFNUM_WHITE) 2072 while (isspace((unsigned char)v[*pos])) 2073 (*pos)++; 2074 2075 if (NULL == res) 2076 continue; 2077 2078 switch (operator) { 2079 case '+': 2080 *res += operand2; 2081 break; 2082 case '-': 2083 *res -= operand2; 2084 break; 2085 case '*': 2086 *res *= operand2; 2087 break; 2088 case '/': 2089 if (operand2 == 0) { 2090 mandoc_msg(MANDOCERR_DIVZERO, 2091 r->parse, ln, *pos, v); 2092 *res = 0; 2093 break; 2094 } 2095 *res /= operand2; 2096 break; 2097 case '%': 2098 if (operand2 == 0) { 2099 mandoc_msg(MANDOCERR_DIVZERO, 2100 r->parse, ln, *pos, v); 2101 *res = 0; 2102 break; 2103 } 2104 *res %= operand2; 2105 break; 2106 case '<': 2107 *res = *res < operand2; 2108 break; 2109 case '>': 2110 *res = *res > operand2; 2111 break; 2112 case 'l': 2113 *res = *res <= operand2; 2114 break; 2115 case 'g': 2116 *res = *res >= operand2; 2117 break; 2118 case '=': 2119 *res = *res == operand2; 2120 break; 2121 case '!': 2122 *res = *res != operand2; 2123 break; 2124 case '&': 2125 *res = *res && operand2; 2126 break; 2127 case ':': 2128 *res = *res || operand2; 2129 break; 2130 case 'i': 2131 if (operand2 < *res) 2132 *res = operand2; 2133 break; 2134 case 'a': 2135 if (operand2 > *res) 2136 *res = operand2; 2137 break; 2138 default: 2139 abort(); 2140 } 2141 } 2142 return(1); 2143 } 2144 2145 void 2146 roff_setreg(struct roff *r, const char *name, int val, char sign) 2147 { 2148 struct roffreg *reg; 2149 2150 /* Search for an existing register with the same name. */ 2151 reg = r->regtab; 2152 2153 while (reg && strcmp(name, reg->key.p)) 2154 reg = reg->next; 2155 2156 if (NULL == reg) { 2157 /* Create a new register. */ 2158 reg = mandoc_malloc(sizeof(struct roffreg)); 2159 reg->key.p = mandoc_strdup(name); 2160 reg->key.sz = strlen(name); 2161 reg->val = 0; 2162 reg->next = r->regtab; 2163 r->regtab = reg; 2164 } 2165 2166 if ('+' == sign) 2167 reg->val += val; 2168 else if ('-' == sign) 2169 reg->val -= val; 2170 else 2171 reg->val = val; 2172 } 2173 2174 /* 2175 * Handle some predefined read-only number registers. 2176 * For now, return -1 if the requested register is not predefined; 2177 * in case a predefined read-only register having the value -1 2178 * were to turn up, another special value would have to be chosen. 2179 */ 2180 static int 2181 roff_getregro(const char *name) 2182 { 2183 2184 switch (*name) { 2185 case 'A': /* ASCII approximation mode is always off. */ 2186 return(0); 2187 case 'g': /* Groff compatibility mode is always on. */ 2188 return(1); 2189 case 'H': /* Fixed horizontal resolution. */ 2190 return (24); 2191 case 'j': /* Always adjust left margin only. */ 2192 return(0); 2193 case 'T': /* Some output device is always defined. */ 2194 return(1); 2195 case 'V': /* Fixed vertical resolution. */ 2196 return (40); 2197 default: 2198 return (-1); 2199 } 2200 } 2201 2202 int 2203 roff_getreg(const struct roff *r, const char *name) 2204 { 2205 struct roffreg *reg; 2206 int val; 2207 2208 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) { 2209 val = roff_getregro(name + 1); 2210 if (-1 != val) 2211 return (val); 2212 } 2213 2214 for (reg = r->regtab; reg; reg = reg->next) 2215 if (0 == strcmp(name, reg->key.p)) 2216 return(reg->val); 2217 2218 return(0); 2219 } 2220 2221 static int 2222 roff_getregn(const struct roff *r, const char *name, size_t len) 2223 { 2224 struct roffreg *reg; 2225 int val; 2226 2227 if ('.' == name[0] && 2 == len) { 2228 val = roff_getregro(name + 1); 2229 if (-1 != val) 2230 return (val); 2231 } 2232 2233 for (reg = r->regtab; reg; reg = reg->next) 2234 if (len == reg->key.sz && 2235 0 == strncmp(name, reg->key.p, len)) 2236 return(reg->val); 2237 2238 return(0); 2239 } 2240 2241 static void 2242 roff_freereg(struct roffreg *reg) 2243 { 2244 struct roffreg *old_reg; 2245 2246 while (NULL != reg) { 2247 free(reg->key.p); 2248 old_reg = reg; 2249 reg = reg->next; 2250 free(old_reg); 2251 } 2252 } 2253 2254 static enum rofferr 2255 roff_nr(ROFF_ARGS) 2256 { 2257 char *key, *val; 2258 size_t keysz; 2259 int iv; 2260 char sign; 2261 2262 key = val = buf->buf + pos; 2263 if (*key == '\0') 2264 return(ROFF_IGN); 2265 2266 keysz = roff_getname(r, &val, ln, pos); 2267 if (key[keysz] == '\\') 2268 return(ROFF_IGN); 2269 key[keysz] = '\0'; 2270 2271 sign = *val; 2272 if (sign == '+' || sign == '-') 2273 val++; 2274 2275 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE)) 2276 roff_setreg(r, key, iv, sign); 2277 2278 return(ROFF_IGN); 2279 } 2280 2281 static enum rofferr 2282 roff_rr(ROFF_ARGS) 2283 { 2284 struct roffreg *reg, **prev; 2285 char *name, *cp; 2286 size_t namesz; 2287 2288 name = cp = buf->buf + pos; 2289 if (*name == '\0') 2290 return(ROFF_IGN); 2291 namesz = roff_getname(r, &cp, ln, pos); 2292 name[namesz] = '\0'; 2293 2294 prev = &r->regtab; 2295 while (1) { 2296 reg = *prev; 2297 if (reg == NULL || !strcmp(name, reg->key.p)) 2298 break; 2299 prev = ®->next; 2300 } 2301 if (reg != NULL) { 2302 *prev = reg->next; 2303 free(reg->key.p); 2304 free(reg); 2305 } 2306 return(ROFF_IGN); 2307 } 2308 2309 static enum rofferr 2310 roff_rm(ROFF_ARGS) 2311 { 2312 const char *name; 2313 char *cp; 2314 size_t namesz; 2315 2316 cp = buf->buf + pos; 2317 while (*cp != '\0') { 2318 name = cp; 2319 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 2320 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 2321 if (name[namesz] == '\\') 2322 break; 2323 } 2324 return(ROFF_IGN); 2325 } 2326 2327 static enum rofferr 2328 roff_it(ROFF_ARGS) 2329 { 2330 int iv; 2331 2332 /* Parse the number of lines. */ 2333 2334 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 2335 mandoc_msg(MANDOCERR_IT_NONUM, r->parse, 2336 ln, ppos, buf->buf + 1); 2337 return(ROFF_IGN); 2338 } 2339 2340 while (isspace((unsigned char)buf->buf[pos])) 2341 pos++; 2342 2343 /* 2344 * Arm the input line trap. 2345 * Special-casing "an-trap" is an ugly workaround to cope 2346 * with DocBook stupidly fiddling with man(7) internals. 2347 */ 2348 2349 roffit_lines = iv; 2350 roffit_macro = mandoc_strdup(iv != 1 || 2351 strcmp(buf->buf + pos, "an-trap") ? 2352 buf->buf + pos : "br"); 2353 return(ROFF_IGN); 2354 } 2355 2356 static enum rofferr 2357 roff_Dd(ROFF_ARGS) 2358 { 2359 const char *const *cp; 2360 2361 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0) 2362 for (cp = __mdoc_reserved; *cp; cp++) 2363 roff_setstr(r, *cp, NULL, 0); 2364 2365 if (r->format == 0) 2366 r->format = MPARSE_MDOC; 2367 2368 return(ROFF_CONT); 2369 } 2370 2371 static enum rofferr 2372 roff_TH(ROFF_ARGS) 2373 { 2374 const char *const *cp; 2375 2376 if ((r->options & MPARSE_QUICK) == 0) 2377 for (cp = __man_reserved; *cp; cp++) 2378 roff_setstr(r, *cp, NULL, 0); 2379 2380 if (r->format == 0) 2381 r->format = MPARSE_MAN; 2382 2383 return(ROFF_CONT); 2384 } 2385 2386 static enum rofferr 2387 roff_TE(ROFF_ARGS) 2388 { 2389 2390 if (NULL == r->tbl) 2391 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2392 ln, ppos, "TE"); 2393 else if ( ! tbl_end(&r->tbl)) { 2394 free(buf->buf); 2395 buf->buf = mandoc_strdup(".sp"); 2396 buf->sz = 4; 2397 return(ROFF_REPARSE); 2398 } 2399 return(ROFF_IGN); 2400 } 2401 2402 static enum rofferr 2403 roff_T_(ROFF_ARGS) 2404 { 2405 2406 if (NULL == r->tbl) 2407 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2408 ln, ppos, "T&"); 2409 else 2410 tbl_restart(ppos, ln, r->tbl); 2411 2412 return(ROFF_IGN); 2413 } 2414 2415 /* 2416 * Handle in-line equation delimiters. 2417 */ 2418 static enum rofferr 2419 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 2420 { 2421 char *cp1, *cp2; 2422 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 2423 2424 /* 2425 * Outside equations, look for an opening delimiter. 2426 * If we are inside an equation, we already know it is 2427 * in-line, or this function wouldn't have been called; 2428 * so look for a closing delimiter. 2429 */ 2430 2431 cp1 = buf->buf + pos; 2432 cp2 = strchr(cp1, r->eqn == NULL ? 2433 r->last_eqn->odelim : r->last_eqn->cdelim); 2434 if (cp2 == NULL) 2435 return(ROFF_CONT); 2436 2437 *cp2++ = '\0'; 2438 bef_pr = bef_nl = aft_nl = aft_pr = ""; 2439 2440 /* Handle preceding text, protecting whitespace. */ 2441 2442 if (*buf->buf != '\0') { 2443 if (r->eqn == NULL) 2444 bef_pr = "\\&"; 2445 bef_nl = "\n"; 2446 } 2447 2448 /* 2449 * Prepare replacing the delimiter with an equation macro 2450 * and drop leading white space from the equation. 2451 */ 2452 2453 if (r->eqn == NULL) { 2454 while (*cp2 == ' ') 2455 cp2++; 2456 mac = ".EQ"; 2457 } else 2458 mac = ".EN"; 2459 2460 /* Handle following text, protecting whitespace. */ 2461 2462 if (*cp2 != '\0') { 2463 aft_nl = "\n"; 2464 if (r->eqn != NULL) 2465 aft_pr = "\\&"; 2466 } 2467 2468 /* Do the actual replacement. */ 2469 2470 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 2471 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 2472 free(buf->buf); 2473 buf->buf = cp1; 2474 2475 /* Toggle the in-line state of the eqn subsystem. */ 2476 2477 r->eqn_inline = r->eqn == NULL; 2478 return(ROFF_REPARSE); 2479 } 2480 2481 static enum rofferr 2482 roff_EQ(ROFF_ARGS) 2483 { 2484 struct eqn_node *e; 2485 2486 assert(r->eqn == NULL); 2487 e = eqn_alloc(ppos, ln, r->parse); 2488 2489 if (r->last_eqn) { 2490 r->last_eqn->next = e; 2491 e->delim = r->last_eqn->delim; 2492 e->odelim = r->last_eqn->odelim; 2493 e->cdelim = r->last_eqn->cdelim; 2494 } else 2495 r->first_eqn = r->last_eqn = e; 2496 2497 r->eqn = r->last_eqn = e; 2498 2499 if (buf->buf[pos] != '\0') 2500 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2501 ".EQ %s", buf->buf + pos); 2502 2503 return(ROFF_IGN); 2504 } 2505 2506 static enum rofferr 2507 roff_EN(ROFF_ARGS) 2508 { 2509 2510 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); 2511 return(ROFF_IGN); 2512 } 2513 2514 static enum rofferr 2515 roff_TS(ROFF_ARGS) 2516 { 2517 struct tbl_node *tbl; 2518 2519 if (r->tbl) { 2520 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, 2521 ln, ppos, "TS breaks TS"); 2522 tbl_end(&r->tbl); 2523 } 2524 2525 tbl = tbl_alloc(ppos, ln, r->parse); 2526 2527 if (r->last_tbl) 2528 r->last_tbl->next = tbl; 2529 else 2530 r->first_tbl = r->last_tbl = tbl; 2531 2532 r->tbl = r->last_tbl = tbl; 2533 return(ROFF_IGN); 2534 } 2535 2536 static enum rofferr 2537 roff_brp(ROFF_ARGS) 2538 { 2539 2540 buf->buf[pos - 1] = '\0'; 2541 return(ROFF_CONT); 2542 } 2543 2544 static enum rofferr 2545 roff_cc(ROFF_ARGS) 2546 { 2547 const char *p; 2548 2549 p = buf->buf + pos; 2550 2551 if (*p == '\0' || (r->control = *p++) == '.') 2552 r->control = 0; 2553 2554 if (*p != '\0') 2555 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 2556 ln, p - buf->buf, "cc ... %s", p); 2557 2558 return(ROFF_IGN); 2559 } 2560 2561 static enum rofferr 2562 roff_tr(ROFF_ARGS) 2563 { 2564 const char *p, *first, *second; 2565 size_t fsz, ssz; 2566 enum mandoc_esc esc; 2567 2568 p = buf->buf + pos; 2569 2570 if (*p == '\0') { 2571 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr"); 2572 return(ROFF_IGN); 2573 } 2574 2575 while (*p != '\0') { 2576 fsz = ssz = 1; 2577 2578 first = p++; 2579 if (*first == '\\') { 2580 esc = mandoc_escape(&p, NULL, NULL); 2581 if (esc == ESCAPE_ERROR) { 2582 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 2583 ln, (int)(p - buf->buf), first); 2584 return(ROFF_IGN); 2585 } 2586 fsz = (size_t)(p - first); 2587 } 2588 2589 second = p++; 2590 if (*second == '\\') { 2591 esc = mandoc_escape(&p, NULL, NULL); 2592 if (esc == ESCAPE_ERROR) { 2593 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 2594 ln, (int)(p - buf->buf), second); 2595 return(ROFF_IGN); 2596 } 2597 ssz = (size_t)(p - second); 2598 } else if (*second == '\0') { 2599 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse, 2600 ln, first - buf->buf, "tr %s", first); 2601 second = " "; 2602 p--; 2603 } 2604 2605 if (fsz > 1) { 2606 roff_setstrn(&r->xmbtab, first, fsz, 2607 second, ssz, 0); 2608 continue; 2609 } 2610 2611 if (r->xtab == NULL) 2612 r->xtab = mandoc_calloc(128, 2613 sizeof(struct roffstr)); 2614 2615 free(r->xtab[(int)*first].p); 2616 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 2617 r->xtab[(int)*first].sz = ssz; 2618 } 2619 2620 return(ROFF_IGN); 2621 } 2622 2623 static enum rofferr 2624 roff_so(ROFF_ARGS) 2625 { 2626 char *name, *cp; 2627 2628 name = buf->buf + pos; 2629 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name); 2630 2631 /* 2632 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 2633 * opening anything that's not in our cwd or anything beneath 2634 * it. Thus, explicitly disallow traversing up the file-system 2635 * or using absolute paths. 2636 */ 2637 2638 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 2639 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos, 2640 ".so %s", name); 2641 buf->sz = mandoc_asprintf(&cp, 2642 ".sp\nSee the file %s.\n.sp", name) + 1; 2643 free(buf->buf); 2644 buf->buf = cp; 2645 *offs = 0; 2646 return(ROFF_REPARSE); 2647 } 2648 2649 *offs = pos; 2650 return(ROFF_SO); 2651 } 2652 2653 static enum rofferr 2654 roff_userdef(ROFF_ARGS) 2655 { 2656 const char *arg[9], *ap; 2657 char *cp, *n1, *n2; 2658 int i; 2659 size_t asz, rsz; 2660 2661 /* 2662 * Collect pointers to macro argument strings 2663 * and NUL-terminate them. 2664 */ 2665 2666 cp = buf->buf + pos; 2667 for (i = 0; i < 9; i++) 2668 arg[i] = *cp == '\0' ? "" : 2669 mandoc_getarg(r->parse, &cp, ln, &pos); 2670 2671 /* 2672 * Expand macro arguments. 2673 */ 2674 2675 buf->sz = strlen(r->current_string) + 1; 2676 n1 = cp = mandoc_malloc(buf->sz); 2677 memcpy(n1, r->current_string, buf->sz); 2678 while (*cp != '\0') { 2679 2680 /* Scan ahead for the next argument invocation. */ 2681 2682 if (*cp++ != '\\') 2683 continue; 2684 if (*cp++ != '$') 2685 continue; 2686 i = *cp - '1'; 2687 if (0 > i || 8 < i) 2688 continue; 2689 cp -= 2; 2690 2691 /* 2692 * Determine the size of the expanded argument, 2693 * taking escaping of quotes into account. 2694 */ 2695 2696 asz = 0; 2697 for (ap = arg[i]; *ap != '\0'; ap++) { 2698 asz++; 2699 if (*ap == '"') 2700 asz += 3; 2701 } 2702 if (asz != 3) { 2703 2704 /* 2705 * Determine the size of the rest of the 2706 * unexpanded macro, including the NUL. 2707 */ 2708 2709 rsz = buf->sz - (cp - n1) - 3; 2710 2711 /* 2712 * When shrinking, move before 2713 * releasing the storage. 2714 */ 2715 2716 if (asz < 3) 2717 memmove(cp + asz, cp + 3, rsz); 2718 2719 /* 2720 * Resize the storage for the macro 2721 * and readjust the parse pointer. 2722 */ 2723 2724 buf->sz += asz - 3; 2725 n2 = mandoc_realloc(n1, buf->sz); 2726 cp = n2 + (cp - n1); 2727 n1 = n2; 2728 2729 /* 2730 * When growing, make room 2731 * for the expanded argument. 2732 */ 2733 2734 if (asz > 3) 2735 memmove(cp + asz, cp + 3, rsz); 2736 } 2737 2738 /* Copy the expanded argument, escaping quotes. */ 2739 2740 n2 = cp; 2741 for (ap = arg[i]; *ap != '\0'; ap++) { 2742 if (*ap == '"') { 2743 memcpy(n2, "\\(dq", 4); 2744 n2 += 4; 2745 } else 2746 *n2++ = *ap; 2747 } 2748 } 2749 2750 /* 2751 * Replace the macro invocation 2752 * by the expanded macro. 2753 */ 2754 2755 free(buf->buf); 2756 buf->buf = n1; 2757 *offs = 0; 2758 2759 return(buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 2760 ROFF_REPARSE : ROFF_APPEND); 2761 } 2762 2763 static size_t 2764 roff_getname(struct roff *r, char **cpp, int ln, int pos) 2765 { 2766 char *name, *cp; 2767 size_t namesz; 2768 2769 name = *cpp; 2770 if ('\0' == *name) 2771 return(0); 2772 2773 /* Read until end of name and terminate it with NUL. */ 2774 for (cp = name; 1; cp++) { 2775 if ('\0' == *cp || ' ' == *cp) { 2776 namesz = cp - name; 2777 break; 2778 } 2779 if ('\\' != *cp) 2780 continue; 2781 namesz = cp - name; 2782 if ('{' == cp[1] || '}' == cp[1]) 2783 break; 2784 cp++; 2785 if ('\\' == *cp) 2786 continue; 2787 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos, 2788 "%.*s", (int)(cp - name + 1), name); 2789 mandoc_escape((const char **)&cp, NULL, NULL); 2790 break; 2791 } 2792 2793 /* Read past spaces. */ 2794 while (' ' == *cp) 2795 cp++; 2796 2797 *cpp = cp; 2798 return(namesz); 2799 } 2800 2801 /* 2802 * Store *string into the user-defined string called *name. 2803 * To clear an existing entry, call with (*r, *name, NULL, 0). 2804 * append == 0: replace mode 2805 * append == 1: single-line append mode 2806 * append == 2: multiline append mode, append '\n' after each call 2807 */ 2808 static void 2809 roff_setstr(struct roff *r, const char *name, const char *string, 2810 int append) 2811 { 2812 2813 roff_setstrn(&r->strtab, name, strlen(name), string, 2814 string ? strlen(string) : 0, append); 2815 } 2816 2817 static void 2818 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 2819 const char *string, size_t stringsz, int append) 2820 { 2821 struct roffkv *n; 2822 char *c; 2823 int i; 2824 size_t oldch, newch; 2825 2826 /* Search for an existing string with the same name. */ 2827 n = *r; 2828 2829 while (n && (namesz != n->key.sz || 2830 strncmp(n->key.p, name, namesz))) 2831 n = n->next; 2832 2833 if (NULL == n) { 2834 /* Create a new string table entry. */ 2835 n = mandoc_malloc(sizeof(struct roffkv)); 2836 n->key.p = mandoc_strndup(name, namesz); 2837 n->key.sz = namesz; 2838 n->val.p = NULL; 2839 n->val.sz = 0; 2840 n->next = *r; 2841 *r = n; 2842 } else if (0 == append) { 2843 free(n->val.p); 2844 n->val.p = NULL; 2845 n->val.sz = 0; 2846 } 2847 2848 if (NULL == string) 2849 return; 2850 2851 /* 2852 * One additional byte for the '\n' in multiline mode, 2853 * and one for the terminating '\0'. 2854 */ 2855 newch = stringsz + (1 < append ? 2u : 1u); 2856 2857 if (NULL == n->val.p) { 2858 n->val.p = mandoc_malloc(newch); 2859 *n->val.p = '\0'; 2860 oldch = 0; 2861 } else { 2862 oldch = n->val.sz; 2863 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 2864 } 2865 2866 /* Skip existing content in the destination buffer. */ 2867 c = n->val.p + (int)oldch; 2868 2869 /* Append new content to the destination buffer. */ 2870 i = 0; 2871 while (i < (int)stringsz) { 2872 /* 2873 * Rudimentary roff copy mode: 2874 * Handle escaped backslashes. 2875 */ 2876 if ('\\' == string[i] && '\\' == string[i + 1]) 2877 i++; 2878 *c++ = string[i++]; 2879 } 2880 2881 /* Append terminating bytes. */ 2882 if (1 < append) 2883 *c++ = '\n'; 2884 2885 *c = '\0'; 2886 n->val.sz = (int)(c - n->val.p); 2887 } 2888 2889 static const char * 2890 roff_getstrn(const struct roff *r, const char *name, size_t len) 2891 { 2892 const struct roffkv *n; 2893 int i; 2894 2895 for (n = r->strtab; n; n = n->next) 2896 if (0 == strncmp(name, n->key.p, len) && 2897 '\0' == n->key.p[(int)len]) 2898 return(n->val.p); 2899 2900 for (i = 0; i < PREDEFS_MAX; i++) 2901 if (0 == strncmp(name, predefs[i].name, len) && 2902 '\0' == predefs[i].name[(int)len]) 2903 return(predefs[i].str); 2904 2905 return(NULL); 2906 } 2907 2908 static void 2909 roff_freestr(struct roffkv *r) 2910 { 2911 struct roffkv *n, *nn; 2912 2913 for (n = r; n; n = nn) { 2914 free(n->key.p); 2915 free(n->val.p); 2916 nn = n->next; 2917 free(n); 2918 } 2919 } 2920 2921 const struct tbl_span * 2922 roff_span(const struct roff *r) 2923 { 2924 2925 return(r->tbl ? tbl_span(r->tbl) : NULL); 2926 } 2927 2928 const struct eqn * 2929 roff_eqn(const struct roff *r) 2930 { 2931 2932 return(r->last_eqn ? &r->last_eqn->eqn : NULL); 2933 } 2934 2935 /* 2936 * Duplicate an input string, making the appropriate character 2937 * conversations (as stipulated by `tr') along the way. 2938 * Returns a heap-allocated string with all the replacements made. 2939 */ 2940 char * 2941 roff_strdup(const struct roff *r, const char *p) 2942 { 2943 const struct roffkv *cp; 2944 char *res; 2945 const char *pp; 2946 size_t ssz, sz; 2947 enum mandoc_esc esc; 2948 2949 if (NULL == r->xmbtab && NULL == r->xtab) 2950 return(mandoc_strdup(p)); 2951 else if ('\0' == *p) 2952 return(mandoc_strdup("")); 2953 2954 /* 2955 * Step through each character looking for term matches 2956 * (remember that a `tr' can be invoked with an escape, which is 2957 * a glyph but the escape is multi-character). 2958 * We only do this if the character hash has been initialised 2959 * and the string is >0 length. 2960 */ 2961 2962 res = NULL; 2963 ssz = 0; 2964 2965 while ('\0' != *p) { 2966 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 2967 sz = r->xtab[(int)*p].sz; 2968 res = mandoc_realloc(res, ssz + sz + 1); 2969 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 2970 ssz += sz; 2971 p++; 2972 continue; 2973 } else if ('\\' != *p) { 2974 res = mandoc_realloc(res, ssz + 2); 2975 res[ssz++] = *p++; 2976 continue; 2977 } 2978 2979 /* Search for term matches. */ 2980 for (cp = r->xmbtab; cp; cp = cp->next) 2981 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 2982 break; 2983 2984 if (NULL != cp) { 2985 /* 2986 * A match has been found. 2987 * Append the match to the array and move 2988 * forward by its keysize. 2989 */ 2990 res = mandoc_realloc(res, 2991 ssz + cp->val.sz + 1); 2992 memcpy(res + ssz, cp->val.p, cp->val.sz); 2993 ssz += cp->val.sz; 2994 p += (int)cp->key.sz; 2995 continue; 2996 } 2997 2998 /* 2999 * Handle escapes carefully: we need to copy 3000 * over just the escape itself, or else we might 3001 * do replacements within the escape itself. 3002 * Make sure to pass along the bogus string. 3003 */ 3004 pp = p++; 3005 esc = mandoc_escape(&p, NULL, NULL); 3006 if (ESCAPE_ERROR == esc) { 3007 sz = strlen(pp); 3008 res = mandoc_realloc(res, ssz + sz + 1); 3009 memcpy(res + ssz, pp, sz); 3010 break; 3011 } 3012 /* 3013 * We bail out on bad escapes. 3014 * No need to warn: we already did so when 3015 * roff_res() was called. 3016 */ 3017 sz = (int)(p - pp); 3018 res = mandoc_realloc(res, ssz + sz + 1); 3019 memcpy(res + ssz, pp, sz); 3020 ssz += sz; 3021 } 3022 3023 res[(int)ssz] = '\0'; 3024 return(res); 3025 } 3026 3027 int 3028 roff_getformat(const struct roff *r) 3029 { 3030 3031 return(r->format); 3032 } 3033 3034 /* 3035 * Find out whether a line is a macro line or not. 3036 * If it is, adjust the current position and return one; if it isn't, 3037 * return zero and don't change the current position. 3038 * If the control character has been set with `.cc', then let that grain 3039 * precedence. 3040 * This is slighly contrary to groff, where using the non-breaking 3041 * control character when `cc' has been invoked will cause the 3042 * non-breaking macro contents to be printed verbatim. 3043 */ 3044 int 3045 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 3046 { 3047 int pos; 3048 3049 pos = *ppos; 3050 3051 if (0 != r->control && cp[pos] == r->control) 3052 pos++; 3053 else if (0 != r->control) 3054 return(0); 3055 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 3056 pos += 2; 3057 else if ('.' == cp[pos] || '\'' == cp[pos]) 3058 pos++; 3059 else 3060 return(0); 3061 3062 while (' ' == cp[pos] || '\t' == cp[pos]) 3063 pos++; 3064 3065 *ppos = pos; 3066 return(1); 3067 } 3068