1 /* $Id: man_macro.c,v 1.98 2015/02/06 11:54:36 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2012, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include "config.h" 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdlib.h> 26 #include <string.h> 27 28 #include "man.h" 29 #include "mandoc.h" 30 #include "libmandoc.h" 31 #include "libman.h" 32 33 enum rew { 34 REW_REWIND, 35 REW_NOHALT, 36 REW_HALT 37 }; 38 39 static void blk_close(MACRO_PROT_ARGS); 40 static void blk_exp(MACRO_PROT_ARGS); 41 static void blk_imp(MACRO_PROT_ARGS); 42 static void in_line_eoln(MACRO_PROT_ARGS); 43 static int man_args(struct man *, int, 44 int *, char *, char **); 45 46 static void rew_scope(enum man_type, 47 struct man *, enum mant); 48 static enum rew rew_dohalt(enum mant, enum man_type, 49 const struct man_node *); 50 static enum rew rew_block(enum mant, enum man_type, 51 const struct man_node *); 52 53 const struct man_macro __man_macros[MAN_MAX] = { 54 { in_line_eoln, MAN_NSCOPED }, /* br */ 55 { in_line_eoln, MAN_BSCOPE }, /* TH */ 56 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */ 57 { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */ 58 { blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */ 59 { blk_imp, MAN_BSCOPE }, /* LP */ 60 { blk_imp, MAN_BSCOPE }, /* PP */ 61 { blk_imp, MAN_BSCOPE }, /* P */ 62 { blk_imp, MAN_BSCOPE }, /* IP */ 63 { blk_imp, MAN_BSCOPE }, /* HP */ 64 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SM */ 65 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* SB */ 66 { in_line_eoln, 0 }, /* BI */ 67 { in_line_eoln, 0 }, /* IB */ 68 { in_line_eoln, 0 }, /* BR */ 69 { in_line_eoln, 0 }, /* RB */ 70 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* R */ 71 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* B */ 72 { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* I */ 73 { in_line_eoln, 0 }, /* IR */ 74 { in_line_eoln, 0 }, /* RI */ 75 { in_line_eoln, MAN_NSCOPED }, /* sp */ 76 { in_line_eoln, MAN_BSCOPE }, /* nf */ 77 { in_line_eoln, MAN_BSCOPE }, /* fi */ 78 { blk_close, MAN_BSCOPE }, /* RE */ 79 { blk_exp, MAN_BSCOPE | MAN_EXPLICIT }, /* RS */ 80 { in_line_eoln, 0 }, /* DT */ 81 { in_line_eoln, 0 }, /* UC */ 82 { in_line_eoln, 0 }, /* PD */ 83 { in_line_eoln, 0 }, /* AT */ 84 { in_line_eoln, 0 }, /* in */ 85 { in_line_eoln, 0 }, /* ft */ 86 { in_line_eoln, 0 }, /* OP */ 87 { in_line_eoln, MAN_BSCOPE }, /* EX */ 88 { in_line_eoln, MAN_BSCOPE }, /* EE */ 89 { blk_exp, MAN_BSCOPE | MAN_EXPLICIT }, /* UR */ 90 { blk_close, MAN_BSCOPE }, /* UE */ 91 { in_line_eoln, 0 }, /* ll */ 92 }; 93 94 const struct man_macro * const man_macros = __man_macros; 95 96 97 void 98 man_unscope(struct man *man, const struct man_node *to) 99 { 100 struct man_node *n; 101 102 to = to->parent; 103 n = man->last; 104 while (n != to) { 105 106 /* Reached the end of the document? */ 107 108 if (to == NULL && ! (n->flags & MAN_VALID)) { 109 if (man->flags & (MAN_BLINE | MAN_ELINE) && 110 man_macros[n->tok].flags & MAN_SCOPED) { 111 mandoc_vmsg(MANDOCERR_BLK_LINE, 112 man->parse, n->line, n->pos, 113 "EOF breaks %s", 114 man_macronames[n->tok]); 115 if (man->flags & MAN_ELINE) 116 man->flags &= ~MAN_ELINE; 117 else { 118 assert(n->type == MAN_HEAD); 119 n = n->parent; 120 man->flags &= ~MAN_BLINE; 121 } 122 man->last = n; 123 n = n->parent; 124 man_node_delete(man, man->last); 125 continue; 126 } 127 if (n->type == MAN_BLOCK && 128 man_macros[n->tok].flags & MAN_EXPLICIT) 129 mandoc_msg(MANDOCERR_BLK_NOEND, 130 man->parse, n->line, n->pos, 131 man_macronames[n->tok]); 132 } 133 134 /* 135 * We might delete the man->last node 136 * in the post-validation phase. 137 * Save a pointer to the parent such that 138 * we know where to continue the iteration. 139 */ 140 141 man->last = n; 142 n = n->parent; 143 man_valid_post(man); 144 } 145 146 /* 147 * If we ended up at the parent of the node we were 148 * supposed to rewind to, that means the target node 149 * got deleted, so add the next node we parse as a child 150 * of the parent instead of as a sibling of the target. 151 */ 152 153 man->next = (man->last == to) ? 154 MAN_NEXT_CHILD : MAN_NEXT_SIBLING; 155 } 156 157 static enum rew 158 rew_block(enum mant ntok, enum man_type type, const struct man_node *n) 159 { 160 161 if (type == MAN_BLOCK && ntok == n->parent->tok && 162 n->parent->type == MAN_BODY) 163 return(REW_REWIND); 164 return(ntok == n->tok ? REW_HALT : REW_NOHALT); 165 } 166 167 /* 168 * There are three scope levels: scoped to the root (all), scoped to the 169 * section (all less sections), and scoped to subsections (all less 170 * sections and subsections). 171 */ 172 static enum rew 173 rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n) 174 { 175 enum rew c; 176 177 /* We cannot progress beyond the root ever. */ 178 if (MAN_ROOT == n->type) 179 return(REW_HALT); 180 181 assert(n->parent); 182 183 /* Normal nodes shouldn't go to the level of the root. */ 184 if (MAN_ROOT == n->parent->type) 185 return(REW_REWIND); 186 187 /* Already-validated nodes should be closed out. */ 188 if (MAN_VALID & n->flags) 189 return(REW_NOHALT); 190 191 /* First: rewind to ourselves. */ 192 if (type == n->type && tok == n->tok) { 193 if (MAN_EXPLICIT & man_macros[n->tok].flags) 194 return(REW_HALT); 195 else 196 return(REW_REWIND); 197 } 198 199 /* 200 * Next follow the implicit scope-smashings as defined by man.7: 201 * section, sub-section, etc. 202 */ 203 204 switch (tok) { 205 case MAN_SH: 206 break; 207 case MAN_SS: 208 /* Rewind to a section, if a block. */ 209 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 210 return(c); 211 break; 212 case MAN_RS: 213 /* Preserve empty paragraphs before RS. */ 214 if (0 == n->nchild && (MAN_P == n->tok || 215 MAN_PP == n->tok || MAN_LP == n->tok)) 216 return(REW_HALT); 217 /* Rewind to a subsection, if a block. */ 218 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) 219 return(c); 220 /* Rewind to a section, if a block. */ 221 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 222 return(c); 223 break; 224 default: 225 /* Rewind to an offsetter, if a block. */ 226 if (REW_NOHALT != (c = rew_block(MAN_RS, type, n))) 227 return(c); 228 /* Rewind to a subsection, if a block. */ 229 if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) 230 return(c); 231 /* Rewind to a section, if a block. */ 232 if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) 233 return(c); 234 break; 235 } 236 237 return(REW_NOHALT); 238 } 239 240 /* 241 * Rewinding entails ascending the parse tree until a coherent point, 242 * for example, the `SH' macro will close out any intervening `SS' 243 * scopes. When a scope is closed, it must be validated and actioned. 244 */ 245 static void 246 rew_scope(enum man_type type, struct man *man, enum mant tok) 247 { 248 struct man_node *n; 249 enum rew c; 250 251 for (n = man->last; n; n = n->parent) { 252 /* 253 * Whether we should stop immediately (REW_HALT), stop 254 * and rewind until this point (REW_REWIND), or keep 255 * rewinding (REW_NOHALT). 256 */ 257 c = rew_dohalt(tok, type, n); 258 if (REW_HALT == c) 259 return; 260 if (REW_REWIND == c) 261 break; 262 } 263 264 /* 265 * Rewind until the current point. Warn if we're a roff 266 * instruction that's mowing over explicit scopes. 267 */ 268 269 man_unscope(man, n); 270 } 271 272 273 /* 274 * Close out a generic explicit macro. 275 */ 276 void 277 blk_close(MACRO_PROT_ARGS) 278 { 279 enum mant ntok; 280 const struct man_node *nn; 281 char *p; 282 int nrew, target; 283 284 nrew = 1; 285 switch (tok) { 286 case MAN_RE: 287 ntok = MAN_RS; 288 if ( ! man_args(man, line, pos, buf, &p)) 289 break; 290 for (nn = man->last->parent; nn; nn = nn->parent) 291 if (nn->tok == ntok && nn->type == MAN_BLOCK) 292 nrew++; 293 target = strtol(p, &p, 10); 294 if (*p != '\0') 295 mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, 296 line, p - buf, "RE ... %s", p); 297 if (target == 0) 298 target = 1; 299 nrew -= target; 300 if (nrew < 1) { 301 mandoc_vmsg(MANDOCERR_RE_NOTOPEN, man->parse, 302 line, ppos, "RE %d", target); 303 return; 304 } 305 break; 306 case MAN_UE: 307 ntok = MAN_UR; 308 break; 309 default: 310 abort(); 311 /* NOTREACHED */ 312 } 313 314 for (nn = man->last->parent; nn; nn = nn->parent) 315 if (nn->tok == ntok && nn->type == MAN_BLOCK && ! --nrew) 316 break; 317 318 if (nn == NULL) { 319 mandoc_msg(MANDOCERR_BLK_NOTOPEN, man->parse, 320 line, ppos, man_macronames[tok]); 321 rew_scope(MAN_BLOCK, man, MAN_PP); 322 } else { 323 line = man->last->line; 324 ppos = man->last->pos; 325 ntok = man->last->tok; 326 man_unscope(man, nn); 327 328 /* Move a trailing paragraph behind the block. */ 329 330 if (ntok == MAN_LP || ntok == MAN_PP || ntok == MAN_P) { 331 *pos = strlen(buf); 332 blk_imp(man, ntok, line, ppos, pos, buf); 333 } 334 } 335 } 336 337 void 338 blk_exp(MACRO_PROT_ARGS) 339 { 340 struct man_node *head; 341 char *p; 342 int la; 343 344 rew_scope(MAN_BLOCK, man, tok); 345 man_block_alloc(man, line, ppos, tok); 346 man_head_alloc(man, line, ppos, tok); 347 head = man->last; 348 349 la = *pos; 350 if (man_args(man, line, pos, buf, &p)) 351 man_word_alloc(man, line, la, p); 352 353 if (buf[*pos] != '\0') 354 mandoc_vmsg(MANDOCERR_ARG_EXCESS, 355 man->parse, line, *pos, "%s ... %s", 356 man_macronames[tok], buf + *pos); 357 358 man_unscope(man, head); 359 man_body_alloc(man, line, ppos, tok); 360 } 361 362 /* 363 * Parse an implicit-block macro. These contain a MAN_HEAD and a 364 * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other 365 * scopes, such as `SH' closing out an `SS', are defined in the rew 366 * routines. 367 */ 368 void 369 blk_imp(MACRO_PROT_ARGS) 370 { 371 int la; 372 char *p; 373 struct man_node *n; 374 375 rew_scope(MAN_BODY, man, tok); 376 rew_scope(MAN_BLOCK, man, tok); 377 man_block_alloc(man, line, ppos, tok); 378 man_head_alloc(man, line, ppos, tok); 379 n = man->last; 380 381 /* Add line arguments. */ 382 383 for (;;) { 384 la = *pos; 385 if ( ! man_args(man, line, pos, buf, &p)) 386 break; 387 man_word_alloc(man, line, la, p); 388 } 389 390 /* Close out head and open body (unless MAN_SCOPE). */ 391 392 if (man_macros[tok].flags & MAN_SCOPED) { 393 /* If we're forcing scope (`TP'), keep it open. */ 394 if (man_macros[tok].flags & MAN_FSCOPED) { 395 man->flags |= MAN_BLINE; 396 return; 397 } else if (n == man->last) { 398 man->flags |= MAN_BLINE; 399 return; 400 } 401 } 402 rew_scope(MAN_HEAD, man, tok); 403 man_body_alloc(man, line, ppos, tok); 404 } 405 406 void 407 in_line_eoln(MACRO_PROT_ARGS) 408 { 409 int la; 410 char *p; 411 struct man_node *n; 412 413 man_elem_alloc(man, line, ppos, tok); 414 n = man->last; 415 416 for (;;) { 417 if (buf[*pos] != '\0' && (tok == MAN_br || 418 tok == MAN_fi || tok == MAN_nf)) { 419 mandoc_vmsg(MANDOCERR_ARG_SKIP, 420 man->parse, line, *pos, "%s %s", 421 man_macronames[tok], buf + *pos); 422 break; 423 } 424 if (buf[*pos] != '\0' && man->last != n && 425 (tok == MAN_PD || tok == MAN_ft || tok == MAN_sp)) { 426 mandoc_vmsg(MANDOCERR_ARG_EXCESS, 427 man->parse, line, *pos, "%s ... %s", 428 man_macronames[tok], buf + *pos); 429 break; 430 } 431 la = *pos; 432 if ( ! man_args(man, line, pos, buf, &p)) 433 break; 434 if (man_macros[tok].flags & MAN_JOIN && 435 man->last->type == MAN_TEXT) 436 man_word_append(man, p); 437 else 438 man_word_alloc(man, line, la, p); 439 } 440 441 /* 442 * Append MAN_EOS in case the last snipped argument 443 * ends with a dot, e.g. `.IR syslog (3).' 444 */ 445 446 if (n != man->last && 447 mandoc_eos(man->last->string, strlen(man->last->string))) 448 man->last->flags |= MAN_EOS; 449 450 /* 451 * If no arguments are specified and this is MAN_SCOPED (i.e., 452 * next-line scoped), then set our mode to indicate that we're 453 * waiting for terms to load into our context. 454 */ 455 456 if (n == man->last && man_macros[tok].flags & MAN_SCOPED) { 457 assert( ! (man_macros[tok].flags & MAN_NSCOPED)); 458 man->flags |= MAN_ELINE; 459 return; 460 } 461 462 assert(man->last->type != MAN_ROOT); 463 man->next = MAN_NEXT_SIBLING; 464 465 /* 466 * Rewind our element scope. Note that when TH is pruned, we'll 467 * be back at the root, so make sure that we don't clobber as 468 * its sibling. 469 */ 470 471 for ( ; man->last; man->last = man->last->parent) { 472 if (man->last == n) 473 break; 474 if (man->last->type == MAN_ROOT) 475 break; 476 man_valid_post(man); 477 } 478 479 assert(man->last); 480 481 /* 482 * Same here regarding whether we're back at the root. 483 */ 484 485 if (man->last->type != MAN_ROOT) 486 man_valid_post(man); 487 } 488 489 490 void 491 man_macroend(struct man *man) 492 { 493 494 man_unscope(man, man->first); 495 } 496 497 static int 498 man_args(struct man *man, int line, int *pos, char *buf, char **v) 499 { 500 char *start; 501 502 assert(*pos); 503 *v = start = buf + *pos; 504 assert(' ' != *start); 505 506 if ('\0' == *start) 507 return(0); 508 509 *v = mandoc_getarg(man->parse, v, line, pos); 510 return(1); 511 } 512