1 /* $Id: man.c,v 1.149 2015/01/30 21:28:46 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include "config.h" 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <stdarg.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 #include "man.h" 31 #include "mandoc.h" 32 #include "mandoc_aux.h" 33 #include "libman.h" 34 #include "libmandoc.h" 35 36 const char *const __man_macronames[MAN_MAX] = { 37 "br", "TH", "SH", "SS", 38 "TP", "LP", "PP", "P", 39 "IP", "HP", "SM", "SB", 40 "BI", "IB", "BR", "RB", 41 "R", "B", "I", "IR", 42 "RI", "sp", "nf", 43 "fi", "RE", "RS", "DT", 44 "UC", "PD", "AT", "in", 45 "ft", "OP", "EX", "EE", 46 "UR", "UE", "ll" 47 }; 48 49 const char * const *man_macronames = __man_macronames; 50 51 static void man_alloc1(struct man *); 52 static void man_breakscope(struct man *, enum mant); 53 static void man_descope(struct man *, int, int); 54 static void man_free1(struct man *); 55 static struct man_node *man_node_alloc(struct man *, int, int, 56 enum man_type, enum mant); 57 static void man_node_append(struct man *, struct man_node *); 58 static void man_node_free(struct man_node *); 59 static void man_node_unlink(struct man *, 60 struct man_node *); 61 static int man_ptext(struct man *, int, char *, int); 62 static int man_pmacro(struct man *, int, char *, int); 63 64 65 const struct man_node * 66 man_node(const struct man *man) 67 { 68 69 return(man->first); 70 } 71 72 const struct man_meta * 73 man_meta(const struct man *man) 74 { 75 76 return(&man->meta); 77 } 78 79 void 80 man_reset(struct man *man) 81 { 82 83 man_free1(man); 84 man_alloc1(man); 85 } 86 87 void 88 man_free(struct man *man) 89 { 90 91 man_free1(man); 92 free(man); 93 } 94 95 struct man * 96 man_alloc(struct roff *roff, struct mparse *parse, 97 const char *defos, int quick) 98 { 99 struct man *p; 100 101 p = mandoc_calloc(1, sizeof(struct man)); 102 103 man_hash_init(); 104 p->parse = parse; 105 p->defos = defos; 106 p->quick = quick; 107 p->roff = roff; 108 109 man_alloc1(p); 110 return(p); 111 } 112 113 void 114 man_endparse(struct man *man) 115 { 116 117 man_macroend(man); 118 } 119 120 int 121 man_parseln(struct man *man, int ln, char *buf, int offs) 122 { 123 124 if (man->last->type != MAN_EQN || ln > man->last->line) 125 man->flags |= MAN_NEWLINE; 126 127 return (roff_getcontrol(man->roff, buf, &offs) ? 128 man_pmacro(man, ln, buf, offs) : 129 man_ptext(man, ln, buf, offs)); 130 } 131 132 static void 133 man_free1(struct man *man) 134 { 135 136 if (man->first) 137 man_node_delete(man, man->first); 138 free(man->meta.title); 139 free(man->meta.source); 140 free(man->meta.date); 141 free(man->meta.vol); 142 free(man->meta.msec); 143 } 144 145 static void 146 man_alloc1(struct man *man) 147 { 148 149 memset(&man->meta, 0, sizeof(struct man_meta)); 150 man->flags = 0; 151 man->last = mandoc_calloc(1, sizeof(struct man_node)); 152 man->first = man->last; 153 man->last->type = MAN_ROOT; 154 man->last->tok = MAN_MAX; 155 man->next = MAN_NEXT_CHILD; 156 } 157 158 159 static void 160 man_node_append(struct man *man, struct man_node *p) 161 { 162 163 assert(man->last); 164 assert(man->first); 165 assert(p->type != MAN_ROOT); 166 167 switch (man->next) { 168 case MAN_NEXT_SIBLING: 169 man->last->next = p; 170 p->prev = man->last; 171 p->parent = man->last->parent; 172 break; 173 case MAN_NEXT_CHILD: 174 man->last->child = p; 175 p->parent = man->last; 176 break; 177 default: 178 abort(); 179 /* NOTREACHED */ 180 } 181 182 assert(p->parent); 183 p->parent->nchild++; 184 185 switch (p->type) { 186 case MAN_BLOCK: 187 if (p->tok == MAN_SH || p->tok == MAN_SS) 188 man->flags &= ~MAN_LITERAL; 189 break; 190 case MAN_HEAD: 191 assert(p->parent->type == MAN_BLOCK); 192 p->parent->head = p; 193 break; 194 case MAN_BODY: 195 assert(p->parent->type == MAN_BLOCK); 196 p->parent->body = p; 197 break; 198 default: 199 break; 200 } 201 202 man->last = p; 203 204 switch (p->type) { 205 case MAN_TBL: 206 /* FALLTHROUGH */ 207 case MAN_TEXT: 208 man_valid_post(man); 209 break; 210 default: 211 break; 212 } 213 } 214 215 static struct man_node * 216 man_node_alloc(struct man *man, int line, int pos, 217 enum man_type type, enum mant tok) 218 { 219 struct man_node *p; 220 221 p = mandoc_calloc(1, sizeof(struct man_node)); 222 p->line = line; 223 p->pos = pos; 224 p->type = type; 225 p->tok = tok; 226 227 if (man->flags & MAN_NEWLINE) 228 p->flags |= MAN_LINE; 229 man->flags &= ~MAN_NEWLINE; 230 return(p); 231 } 232 233 void 234 man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 235 { 236 struct man_node *p; 237 238 p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 239 man_node_append(man, p); 240 man->next = MAN_NEXT_CHILD; 241 } 242 243 void 244 man_head_alloc(struct man *man, int line, int pos, enum mant tok) 245 { 246 struct man_node *p; 247 248 p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 249 man_node_append(man, p); 250 man->next = MAN_NEXT_CHILD; 251 } 252 253 void 254 man_body_alloc(struct man *man, int line, int pos, enum mant tok) 255 { 256 struct man_node *p; 257 258 p = man_node_alloc(man, line, pos, MAN_BODY, tok); 259 man_node_append(man, p); 260 man->next = MAN_NEXT_CHILD; 261 } 262 263 void 264 man_block_alloc(struct man *man, int line, int pos, enum mant tok) 265 { 266 struct man_node *p; 267 268 p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 269 man_node_append(man, p); 270 man->next = MAN_NEXT_CHILD; 271 } 272 273 void 274 man_word_alloc(struct man *man, int line, int pos, const char *word) 275 { 276 struct man_node *n; 277 278 n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 279 n->string = roff_strdup(man->roff, word); 280 man_node_append(man, n); 281 man->next = MAN_NEXT_SIBLING; 282 } 283 284 void 285 man_word_append(struct man *man, const char *word) 286 { 287 struct man_node *n; 288 char *addstr, *newstr; 289 290 n = man->last; 291 addstr = roff_strdup(man->roff, word); 292 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 293 free(addstr); 294 free(n->string); 295 n->string = newstr; 296 man->next = MAN_NEXT_SIBLING; 297 } 298 299 /* 300 * Free all of the resources held by a node. This does NOT unlink a 301 * node from its context; for that, see man_node_unlink(). 302 */ 303 static void 304 man_node_free(struct man_node *p) 305 { 306 307 free(p->string); 308 free(p); 309 } 310 311 void 312 man_node_delete(struct man *man, struct man_node *p) 313 { 314 315 while (p->child) 316 man_node_delete(man, p->child); 317 318 man_node_unlink(man, p); 319 man_node_free(p); 320 } 321 322 void 323 man_addeqn(struct man *man, const struct eqn *ep) 324 { 325 struct man_node *n; 326 327 n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 328 n->eqn = ep; 329 if (ep->ln > man->last->line) 330 n->flags |= MAN_LINE; 331 man_node_append(man, n); 332 man->next = MAN_NEXT_SIBLING; 333 man_descope(man, ep->ln, ep->pos); 334 } 335 336 void 337 man_addspan(struct man *man, const struct tbl_span *sp) 338 { 339 struct man_node *n; 340 341 man_breakscope(man, MAN_MAX); 342 n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 343 n->span = sp; 344 man_node_append(man, n); 345 man->next = MAN_NEXT_SIBLING; 346 man_descope(man, sp->line, 0); 347 } 348 349 static void 350 man_descope(struct man *man, int line, int offs) 351 { 352 /* 353 * Co-ordinate what happens with having a next-line scope open: 354 * first close out the element scope (if applicable), then close 355 * out the block scope (also if applicable). 356 */ 357 358 if (man->flags & MAN_ELINE) { 359 man->flags &= ~MAN_ELINE; 360 man_unscope(man, man->last->parent); 361 } 362 if ( ! (man->flags & MAN_BLINE)) 363 return; 364 man->flags &= ~MAN_BLINE; 365 man_unscope(man, man->last->parent); 366 man_body_alloc(man, line, offs, man->last->tok); 367 } 368 369 static int 370 man_ptext(struct man *man, int line, char *buf, int offs) 371 { 372 int i; 373 374 /* Literal free-form text whitespace is preserved. */ 375 376 if (man->flags & MAN_LITERAL) { 377 man_word_alloc(man, line, offs, buf + offs); 378 man_descope(man, line, offs); 379 return(1); 380 } 381 382 for (i = offs; buf[i] == ' '; i++) 383 /* Skip leading whitespace. */ ; 384 385 /* 386 * Blank lines are ignored right after headings 387 * but add a single vertical space elsewhere. 388 */ 389 390 if (buf[i] == '\0') { 391 /* Allocate a blank entry. */ 392 if (man->last->tok != MAN_SH && 393 man->last->tok != MAN_SS) { 394 man_elem_alloc(man, line, offs, MAN_sp); 395 man->next = MAN_NEXT_SIBLING; 396 } 397 return(1); 398 } 399 400 /* 401 * Warn if the last un-escaped character is whitespace. Then 402 * strip away the remaining spaces (tabs stay!). 403 */ 404 405 i = (int)strlen(buf); 406 assert(i); 407 408 if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 409 if (i > 1 && '\\' != buf[i - 2]) 410 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 411 line, i - 1, NULL); 412 413 for (--i; i && ' ' == buf[i]; i--) 414 /* Spin back to non-space. */ ; 415 416 /* Jump ahead of escaped whitespace. */ 417 i += '\\' == buf[i] ? 2 : 1; 418 419 buf[i] = '\0'; 420 } 421 man_word_alloc(man, line, offs, buf + offs); 422 423 /* 424 * End-of-sentence check. If the last character is an unescaped 425 * EOS character, then flag the node as being the end of a 426 * sentence. The front-end will know how to interpret this. 427 */ 428 429 assert(i); 430 if (mandoc_eos(buf, (size_t)i)) 431 man->last->flags |= MAN_EOS; 432 433 man_descope(man, line, offs); 434 return(1); 435 } 436 437 static int 438 man_pmacro(struct man *man, int ln, char *buf, int offs) 439 { 440 struct man_node *n; 441 const char *cp; 442 enum mant tok; 443 int i, ppos; 444 int bline; 445 char mac[5]; 446 447 ppos = offs; 448 449 /* 450 * Copy the first word into a nil-terminated buffer. 451 * Stop when a space, tab, escape, or eoln is encountered. 452 */ 453 454 i = 0; 455 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 456 mac[i++] = buf[offs++]; 457 458 mac[i] = '\0'; 459 460 tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 461 462 if (tok == MAN_MAX) { 463 mandoc_msg(MANDOCERR_MACRO, man->parse, 464 ln, ppos, buf + ppos - 1); 465 return(1); 466 } 467 468 /* Skip a leading escape sequence or tab. */ 469 470 switch (buf[offs]) { 471 case '\\': 472 cp = buf + offs + 1; 473 mandoc_escape(&cp, NULL, NULL); 474 offs = cp - buf; 475 break; 476 case '\t': 477 offs++; 478 break; 479 default: 480 break; 481 } 482 483 /* Jump to the next non-whitespace word. */ 484 485 while (buf[offs] && buf[offs] == ' ') 486 offs++; 487 488 /* 489 * Trailing whitespace. Note that tabs are allowed to be passed 490 * into the parser as "text", so we only warn about spaces here. 491 */ 492 493 if (buf[offs] == '\0' && buf[offs - 1] == ' ') 494 mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 495 ln, offs - 1, NULL); 496 497 /* 498 * Some macros break next-line scopes; otherwise, remember 499 * whether we are in next-line scope for a block head. 500 */ 501 502 man_breakscope(man, tok); 503 bline = man->flags & MAN_BLINE; 504 505 /* Call to handler... */ 506 507 assert(man_macros[tok].fp); 508 (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf); 509 510 /* In quick mode (for mandocdb), abort after the NAME section. */ 511 512 if (man->quick && tok == MAN_SH) { 513 n = man->last; 514 if (n->type == MAN_BODY && 515 strcmp(n->prev->child->string, "NAME")) 516 return(2); 517 } 518 519 /* 520 * If we are in a next-line scope for a block head, 521 * close it out now and switch to the body, 522 * unless the next-line scope is allowed to continue. 523 */ 524 525 if ( ! bline || man->flags & MAN_ELINE || 526 man_macros[tok].flags & MAN_NSCOPED) 527 return(1); 528 529 assert(man->flags & MAN_BLINE); 530 man->flags &= ~MAN_BLINE; 531 532 man_unscope(man, man->last->parent); 533 man_body_alloc(man, ln, ppos, man->last->tok); 534 return(1); 535 } 536 537 void 538 man_breakscope(struct man *man, enum mant tok) 539 { 540 struct man_node *n; 541 542 /* 543 * An element next line scope is open, 544 * and the new macro is not allowed inside elements. 545 * Delete the element that is being broken. 546 */ 547 548 if (man->flags & MAN_ELINE && (tok == MAN_MAX || 549 ! (man_macros[tok].flags & MAN_NSCOPED))) { 550 n = man->last; 551 assert(n->type != MAN_TEXT); 552 if (man_macros[n->tok].flags & MAN_NSCOPED) 553 n = n->parent; 554 555 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, 556 n->line, n->pos, "%s breaks %s", 557 tok == MAN_MAX ? "TS" : man_macronames[tok], 558 man_macronames[n->tok]); 559 560 man_node_delete(man, n); 561 man->flags &= ~MAN_ELINE; 562 } 563 564 /* 565 * A block header next line scope is open, 566 * and the new macro is not allowed inside block headers. 567 * Delete the block that is being broken. 568 */ 569 570 if (man->flags & MAN_BLINE && (tok == MAN_MAX || 571 man_macros[tok].flags & MAN_BSCOPE)) { 572 n = man->last; 573 if (n->type == MAN_TEXT) 574 n = n->parent; 575 if ( ! (man_macros[n->tok].flags & MAN_BSCOPE)) 576 n = n->parent; 577 578 assert(n->type == MAN_HEAD); 579 n = n->parent; 580 assert(n->type == MAN_BLOCK); 581 assert(man_macros[n->tok].flags & MAN_SCOPED); 582 583 mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, 584 n->line, n->pos, "%s breaks %s", 585 tok == MAN_MAX ? "TS" : man_macronames[tok], 586 man_macronames[n->tok]); 587 588 man_node_delete(man, n); 589 man->flags &= ~MAN_BLINE; 590 } 591 } 592 593 /* 594 * Unlink a node from its context. If "man" is provided, the last parse 595 * point will also be adjusted accordingly. 596 */ 597 static void 598 man_node_unlink(struct man *man, struct man_node *n) 599 { 600 601 /* Adjust siblings. */ 602 603 if (n->prev) 604 n->prev->next = n->next; 605 if (n->next) 606 n->next->prev = n->prev; 607 608 /* Adjust parent. */ 609 610 if (n->parent) { 611 n->parent->nchild--; 612 if (n->parent->child == n) 613 n->parent->child = n->prev ? n->prev : n->next; 614 } 615 616 /* Adjust parse point, if applicable. */ 617 618 if (man && man->last == n) { 619 /*XXX: this can occur when bailing from validation. */ 620 /*assert(NULL == n->next);*/ 621 if (n->prev) { 622 man->last = n->prev; 623 man->next = MAN_NEXT_SIBLING; 624 } else { 625 man->last = n->parent; 626 man->next = MAN_NEXT_CHILD; 627 } 628 } 629 630 if (man && man->first == n) 631 man->first = NULL; 632 } 633 634 const struct mparse * 635 man_mparse(const struct man *man) 636 { 637 638 assert(man && man->parse); 639 return(man->parse); 640 } 641 642 void 643 man_deroff(char **dest, const struct man_node *n) 644 { 645 char *cp; 646 size_t sz; 647 648 if (n->type != MAN_TEXT) { 649 for (n = n->child; n; n = n->next) 650 man_deroff(dest, n); 651 return; 652 } 653 654 /* Skip leading whitespace and escape sequences. */ 655 656 cp = n->string; 657 while ('\0' != *cp) { 658 if ('\\' == *cp) { 659 cp++; 660 mandoc_escape((const char **)&cp, NULL, NULL); 661 } else if (isspace((unsigned char)*cp)) 662 cp++; 663 else 664 break; 665 } 666 667 /* Skip trailing whitespace. */ 668 669 for (sz = strlen(cp); sz; sz--) 670 if (0 == isspace((unsigned char)cp[sz-1])) 671 break; 672 673 /* Skip empty strings. */ 674 675 if (0 == sz) 676 return; 677 678 if (NULL == *dest) { 679 *dest = mandoc_strndup(cp, sz); 680 return; 681 } 682 683 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 684 free(*dest); 685 *dest = cp; 686 } 687