1 /* $Id: man_html.c,v 1.188 2025/06/26 17:06:34 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2013-2020,2022-2023,2025 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * HTML formatter for man(7) used by mandoc(1). 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "man.h" 34 #include "out.h" 35 #include "html.h" 36 #include "main.h" 37 38 #define MAN_ARGS const struct roff_meta *man, \ 39 struct roff_node *n, \ 40 struct html *h 41 42 struct man_html_act { 43 int (*pre)(MAN_ARGS); 44 int (*post)(MAN_ARGS); 45 }; 46 47 static void print_man_head(const struct roff_meta *, 48 struct html *); 49 static void print_man_nodelist(MAN_ARGS); 50 static void print_man_node(MAN_ARGS); 51 static char list_continues(const struct roff_node *, 52 const struct roff_node *); 53 static int man_B_pre(MAN_ARGS); 54 static int man_IP_pre(MAN_ARGS); 55 static int man_I_pre(MAN_ARGS); 56 static int man_MR_pre(MAN_ARGS); 57 static int man_OP_pre(MAN_ARGS); 58 static int man_PP_pre(MAN_ARGS); 59 static int man_RS_pre(MAN_ARGS); 60 static int man_SH_pre(MAN_ARGS); 61 static int man_SM_pre(MAN_ARGS); 62 static int man_SY_pre(MAN_ARGS); 63 static int man_UR_pre(MAN_ARGS); 64 static int man_alt_pre(MAN_ARGS); 65 static int man_ign_pre(MAN_ARGS); 66 static int man_in_pre(MAN_ARGS); 67 static void man_root_post(const struct roff_meta *, 68 struct html *); 69 static void man_root_pre(const struct roff_meta *, 70 struct html *); 71 72 static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = { 73 { NULL, NULL }, /* TH */ 74 { man_SH_pre, NULL }, /* SH */ 75 { man_SH_pre, NULL }, /* SS */ 76 { man_IP_pre, NULL }, /* TP */ 77 { man_IP_pre, NULL }, /* TQ */ 78 { man_PP_pre, NULL }, /* LP */ 79 { man_PP_pre, NULL }, /* PP */ 80 { man_PP_pre, NULL }, /* P */ 81 { man_IP_pre, NULL }, /* IP */ 82 { man_PP_pre, NULL }, /* HP */ 83 { man_SM_pre, NULL }, /* SM */ 84 { man_SM_pre, NULL }, /* SB */ 85 { man_alt_pre, NULL }, /* BI */ 86 { man_alt_pre, NULL }, /* IB */ 87 { man_alt_pre, NULL }, /* BR */ 88 { man_alt_pre, NULL }, /* RB */ 89 { NULL, NULL }, /* R */ 90 { man_B_pre, NULL }, /* B */ 91 { man_I_pre, NULL }, /* I */ 92 { man_alt_pre, NULL }, /* IR */ 93 { man_alt_pre, NULL }, /* RI */ 94 { NULL, NULL }, /* RE */ 95 { man_RS_pre, NULL }, /* RS */ 96 { man_ign_pre, NULL }, /* DT */ 97 { man_ign_pre, NULL }, /* UC */ 98 { man_ign_pre, NULL }, /* PD */ 99 { man_ign_pre, NULL }, /* AT */ 100 { man_in_pre, NULL }, /* in */ 101 { man_SY_pre, NULL }, /* SY */ 102 { NULL, NULL }, /* YS */ 103 { man_OP_pre, NULL }, /* OP */ 104 { NULL, NULL }, /* EX */ 105 { NULL, NULL }, /* EE */ 106 { man_UR_pre, NULL }, /* UR */ 107 { NULL, NULL }, /* UE */ 108 { man_UR_pre, NULL }, /* MT */ 109 { NULL, NULL }, /* ME */ 110 { man_MR_pre, NULL }, /* MR */ 111 }; 112 113 114 void 115 html_man(void *arg, const struct roff_meta *man) 116 { 117 struct html *h; 118 struct roff_node *n; 119 struct tag *t; 120 121 h = (struct html *)arg; 122 n = man->first->child; 123 124 if ((h->oflags & HTML_FRAGMENT) == 0) { 125 print_gen_decls(h); 126 print_otag(h, TAG_HTML, ""); 127 t = print_otag(h, TAG_HEAD, ""); 128 print_man_head(man, h); 129 print_tagq(h, t); 130 if (n != NULL && n->type == ROFFT_COMMENT) 131 print_gen_comment(h, n); 132 print_otag(h, TAG_BODY, ""); 133 } 134 135 man_root_pre(man, h); 136 t = print_otag(h, TAG_MAIN, "c", "manual-text"); 137 print_man_nodelist(man, n, h); 138 print_tagq(h, t); 139 man_root_post(man, h); 140 print_tagq(h, NULL); 141 } 142 143 static void 144 print_man_head(const struct roff_meta *man, struct html *h) 145 { 146 char *cp; 147 148 print_gen_head(h); 149 mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec); 150 print_otag(h, TAG_TITLE, ""); 151 print_text(h, cp); 152 free(cp); 153 } 154 155 static void 156 print_man_nodelist(MAN_ARGS) 157 { 158 while (n != NULL) { 159 print_man_node(man, n, h); 160 n = n->next; 161 } 162 } 163 164 static void 165 print_man_node(MAN_ARGS) 166 { 167 struct tag *t; 168 int child; 169 170 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT) 171 return; 172 173 if ((n->flags & NODE_NOFILL) == 0) 174 html_fillmode(h, ROFF_fi); 175 else if (html_fillmode(h, ROFF_nf) == ROFF_nf && 176 n->tok != ROFF_fi && n->flags & NODE_LINE && 177 (n->prev == NULL || n->prev->tok != MAN_YS)) 178 print_endline(h); 179 180 child = 1; 181 switch (n->type) { 182 case ROFFT_TEXT: 183 if (*n->string == '\0') { 184 print_endline(h); 185 return; 186 } 187 if (*n->string == ' ' && n->flags & NODE_LINE && 188 (h->flags & HTML_NONEWLINE) == 0) 189 print_otag(h, TAG_BR, ""); 190 else if (n->flags & NODE_DELIMC) 191 h->flags |= HTML_NOSPACE; 192 t = h->tag; 193 t->refcnt++; 194 print_text(h, n->string); 195 break; 196 case ROFFT_EQN: 197 t = h->tag; 198 t->refcnt++; 199 print_eqn(h, n->eqn); 200 break; 201 case ROFFT_TBL: 202 /* 203 * This will take care of initialising all of the table 204 * state data for the first table, then tearing it down 205 * for the last one. 206 */ 207 print_tbl(h, n->span); 208 return; 209 default: 210 /* 211 * Close out scope of font prior to opening a macro 212 * scope. 213 */ 214 if (h->metac != ESCAPE_FONTROMAN) { 215 h->metal = h->metac; 216 h->metac = ESCAPE_FONTROMAN; 217 } 218 219 /* 220 * Close out the current table, if it's open, and unset 221 * the "meta" table state. This will be reopened on the 222 * next table element. 223 */ 224 if (h->tblt != NULL) 225 print_tblclose(h); 226 t = h->tag; 227 t->refcnt++; 228 if (n->tok < ROFF_MAX) { 229 roff_html_pre(h, n); 230 t->refcnt--; 231 print_stagq(h, t); 232 return; 233 } 234 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 235 if (man_html_acts[n->tok - MAN_TH].pre != NULL) 236 child = (*man_html_acts[n->tok - MAN_TH].pre)(man, 237 n, h); 238 break; 239 } 240 241 if (child && n->child != NULL) 242 print_man_nodelist(man, n->child, h); 243 244 /* This will automatically close out any font scope. */ 245 t->refcnt--; 246 if (n->type == ROFFT_BLOCK && 247 (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) { 248 t = h->tag; 249 while (t->tag != TAG_DL && t->tag != TAG_UL) 250 t = t->next; 251 /* 252 * Close the list if no further item of the same type 253 * follows; otherwise, close the item only. 254 */ 255 if (list_continues(n, roff_node_next(n)) == '\0') { 256 print_tagq(h, t); 257 t = NULL; 258 } 259 } 260 if (t != NULL) 261 print_stagq(h, t); 262 } 263 264 static void 265 man_root_pre(const struct roff_meta *man, struct html *h) 266 { 267 struct tag *t; 268 char *title; 269 270 assert(man->title); 271 assert(man->msec); 272 mandoc_asprintf(&title, "%s(%s)", man->title, man->msec); 273 274 t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader", 275 "aria-label", "Manual header line"); 276 277 print_otag(h, TAG_SPAN, "c", "head-ltitle"); 278 print_text(h, title); 279 print_stagq(h, t); 280 281 print_otag(h, TAG_SPAN, "c", "head-vol"); 282 if (man->vol != NULL) 283 print_text(h, man->vol); 284 print_stagq(h, t); 285 286 print_otag(h, TAG_SPAN, "c", "head-rtitle"); 287 print_text(h, title); 288 print_tagq(h, t); 289 free(title); 290 } 291 292 static void 293 man_root_post(const struct roff_meta *man, struct html *h) 294 { 295 struct tag *t; 296 char *title; 297 298 assert(man->title != NULL); 299 if (man->msec == NULL) 300 title = mandoc_strdup(man->title); 301 else 302 mandoc_asprintf(&title, "%s(%s)", man->title, man->msec); 303 304 t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter", 305 "aria-label", "Manual footer line"); 306 307 print_otag(h, TAG_SPAN, "c", "foot-left"); 308 if (man->os != NULL) 309 print_text(h, man->os); 310 print_stagq(h, t); 311 312 print_otag(h, TAG_SPAN, "c", "foot-date"); 313 print_text(h, man->date); 314 print_stagq(h, t); 315 316 print_otag(h, TAG_SPAN, "c", "foot-right"); 317 print_text(h, title); 318 print_tagq(h, t); 319 free(title); 320 } 321 322 static int 323 man_SH_pre(MAN_ARGS) 324 { 325 const char *class; 326 enum htmltag tag; 327 328 if (n->tok == MAN_SH) { 329 tag = TAG_H2; 330 class = "Sh"; 331 } else { 332 tag = TAG_H3; 333 class = "Ss"; 334 } 335 switch (n->type) { 336 case ROFFT_BLOCK: 337 html_close_paragraph(h); 338 print_otag(h, TAG_SECTION, "c", class); 339 break; 340 case ROFFT_HEAD: 341 print_otag_id(h, tag, class, n); 342 break; 343 case ROFFT_BODY: 344 break; 345 default: 346 abort(); 347 } 348 return 1; 349 } 350 351 static int 352 man_alt_pre(MAN_ARGS) 353 { 354 const struct roff_node *nn; 355 struct tag *t; 356 int i; 357 enum htmltag fp; 358 359 for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) { 360 switch (n->tok) { 361 case MAN_BI: 362 fp = i % 2 ? TAG_I : TAG_B; 363 break; 364 case MAN_IB: 365 fp = i % 2 ? TAG_B : TAG_I; 366 break; 367 case MAN_RI: 368 fp = i % 2 ? TAG_I : TAG_MAX; 369 break; 370 case MAN_IR: 371 fp = i % 2 ? TAG_MAX : TAG_I; 372 break; 373 case MAN_BR: 374 fp = i % 2 ? TAG_MAX : TAG_B; 375 break; 376 case MAN_RB: 377 fp = i % 2 ? TAG_B : TAG_MAX; 378 break; 379 default: 380 abort(); 381 } 382 383 if (i) 384 h->flags |= HTML_NOSPACE; 385 386 if (fp != TAG_MAX) 387 t = print_otag(h, fp, ""); 388 389 print_text(h, nn->string); 390 391 if (fp != TAG_MAX) 392 print_tagq(h, t); 393 } 394 return 0; 395 } 396 397 static int 398 man_SM_pre(MAN_ARGS) 399 { 400 print_otag(h, TAG_SMALL, ""); 401 if (n->tok == MAN_SB) 402 print_otag(h, TAG_B, ""); 403 return 1; 404 } 405 406 static int 407 man_PP_pre(MAN_ARGS) 408 { 409 switch (n->type) { 410 case ROFFT_BLOCK: 411 html_close_paragraph(h); 412 break; 413 case ROFFT_HEAD: 414 return 0; 415 case ROFFT_BODY: 416 if (n->child != NULL && 417 (n->child->flags & NODE_NOFILL) == 0) 418 print_otag(h, TAG_P, "c", 419 n->tok == MAN_HP ? "Pp HP" : "Pp"); 420 break; 421 default: 422 abort(); 423 } 424 return 1; 425 } 426 427 static char 428 list_continues(const struct roff_node *n1, const struct roff_node *n2) 429 { 430 const char *s1, *s2; 431 char c1, c2; 432 433 if (n1 == NULL || n1->type != ROFFT_BLOCK || 434 n2 == NULL || n2->type != ROFFT_BLOCK) 435 return '\0'; 436 if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) && 437 (n2->tok == MAN_TP || n2->tok == MAN_TQ)) 438 return ' '; 439 if (n1->tok != MAN_IP || n2->tok != MAN_IP) 440 return '\0'; 441 n1 = n1->head->child; 442 n2 = n2->head->child; 443 s1 = n1 == NULL ? "" : n1->string; 444 s2 = n2 == NULL ? "" : n2->string; 445 c1 = strcmp(s1, "*") == 0 ? '*' : 446 strcmp(s1, "\\-") == 0 ? '-' : 447 strcmp(s1, "\\(bu") == 0 ? 'b' : 448 strcmp(s1, "\\[bu]") == 0 ? 'b' : ' '; 449 c2 = strcmp(s2, "*") == 0 ? '*' : 450 strcmp(s2, "\\-") == 0 ? '-' : 451 strcmp(s2, "\\(bu") == 0 ? 'b' : 452 strcmp(s2, "\\[bu]") == 0 ? 'b' : ' '; 453 return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1; 454 } 455 456 static int 457 man_IP_pre(MAN_ARGS) 458 { 459 struct roff_node *nn; 460 const char *list_class; 461 enum htmltag list_elem, body_elem; 462 char list_type; 463 464 nn = n->type == ROFFT_BLOCK ? n : n->parent; 465 list_type = list_continues(roff_node_prev(nn), nn); 466 if (list_type == '\0') { 467 /* Start a new list. */ 468 list_type = list_continues(nn, roff_node_next(nn)); 469 if (list_type == '\0') 470 list_type = ' '; 471 switch (list_type) { 472 case ' ': 473 list_class = "Bl-tag"; 474 list_elem = TAG_DL; 475 break; 476 case '*': 477 list_class = "Bl-bullet"; 478 list_elem = TAG_UL; 479 break; 480 case '-': 481 list_class = "Bl-dash"; 482 list_elem = TAG_UL; 483 break; 484 default: 485 abort(); 486 } 487 } else { 488 /* Continue a list that was started earlier. */ 489 list_class = NULL; 490 list_elem = TAG_MAX; 491 } 492 body_elem = list_type == ' ' ? TAG_DD : TAG_LI; 493 494 switch (n->type) { 495 case ROFFT_BLOCK: 496 html_close_paragraph(h); 497 if (list_elem != TAG_MAX) 498 print_otag(h, list_elem, "c", list_class); 499 return 1; 500 case ROFFT_HEAD: 501 if (body_elem == TAG_LI) 502 return 0; 503 print_otag_id(h, TAG_DT, NULL, n); 504 break; 505 case ROFFT_BODY: 506 print_otag(h, body_elem, ""); 507 return 1; 508 default: 509 abort(); 510 } 511 switch(n->tok) { 512 case MAN_IP: /* Only print the first header element. */ 513 if (n->child != NULL) 514 print_man_node(man, n->child, h); 515 break; 516 case MAN_TP: /* Only print next-line header elements. */ 517 case MAN_TQ: 518 nn = n->child; 519 while (nn != NULL && (NODE_LINE & nn->flags) == 0) 520 nn = nn->next; 521 while (nn != NULL) { 522 print_man_node(man, nn, h); 523 nn = nn->next; 524 } 525 break; 526 default: 527 abort(); 528 } 529 return 0; 530 } 531 532 static int 533 man_MR_pre(MAN_ARGS) 534 { 535 struct tag *t; 536 const char *name, *section, *suffix; 537 char *label; 538 539 html_setfont(h, ESCAPE_FONTROMAN); 540 name = section = suffix = label = NULL; 541 if (n->child != NULL) { 542 name = n->child->string; 543 if (n->child->next != NULL) { 544 section = n->child->next->string; 545 mandoc_asprintf(&label, 546 "%s, section %s", name, section); 547 if (n->child->next->next != NULL) 548 suffix = n->child->next->next->string; 549 } 550 } 551 552 if (name != NULL && section != NULL && h->base_man1 != NULL) 553 t = print_otag(h, TAG_A, "chM?", "Xr", 554 name, section, "aria-label", label); 555 else 556 t = print_otag(h, TAG_A, "c?", "Xr", "aria-label", label); 557 558 free(label); 559 if (name != NULL) { 560 print_text(h, name); 561 h->flags |= HTML_NOSPACE; 562 } 563 print_text(h, "("); 564 h->flags |= HTML_NOSPACE; 565 if (section != NULL) { 566 print_text(h, section); 567 h->flags |= HTML_NOSPACE; 568 } 569 print_text(h, ")"); 570 print_tagq(h, t); 571 if (suffix != NULL) { 572 h->flags |= HTML_NOSPACE; 573 print_text(h, suffix); 574 } 575 return 0; 576 } 577 578 static int 579 man_OP_pre(MAN_ARGS) 580 { 581 struct tag *tt; 582 583 print_text(h, "["); 584 h->flags |= HTML_NOSPACE; 585 tt = print_otag(h, TAG_SPAN, "c", "Op"); 586 587 if ((n = n->child) != NULL) { 588 print_otag(h, TAG_B, ""); 589 print_text(h, n->string); 590 } 591 592 print_stagq(h, tt); 593 594 if (n != NULL && n->next != NULL) { 595 print_otag(h, TAG_I, ""); 596 print_text(h, n->next->string); 597 } 598 599 print_stagq(h, tt); 600 h->flags |= HTML_NOSPACE; 601 print_text(h, "]"); 602 return 0; 603 } 604 605 static int 606 man_B_pre(MAN_ARGS) 607 { 608 print_otag(h, TAG_B, ""); 609 return 1; 610 } 611 612 static int 613 man_I_pre(MAN_ARGS) 614 { 615 print_otag(h, TAG_I, ""); 616 return 1; 617 } 618 619 static int 620 man_in_pre(MAN_ARGS) 621 { 622 print_otag(h, TAG_BR, ""); 623 return 0; 624 } 625 626 static int 627 man_ign_pre(MAN_ARGS) 628 { 629 return 0; 630 } 631 632 static int 633 man_RS_pre(MAN_ARGS) 634 { 635 switch (n->type) { 636 case ROFFT_BLOCK: 637 html_close_paragraph(h); 638 break; 639 case ROFFT_HEAD: 640 return 0; 641 case ROFFT_BODY: 642 print_otag(h, TAG_DIV, "c", "Bd-indent"); 643 break; 644 default: 645 abort(); 646 } 647 return 1; 648 } 649 650 static int 651 man_SY_pre(MAN_ARGS) 652 { 653 switch (n->type) { 654 case ROFFT_BLOCK: 655 html_close_paragraph(h); 656 print_otag(h, TAG_TABLE, "c", "Nm"); 657 print_otag(h, TAG_TR, ""); 658 break; 659 case ROFFT_HEAD: 660 print_otag(h, TAG_TD, ""); 661 print_otag(h, TAG_CODE, "c", "Nm"); 662 break; 663 case ROFFT_BODY: 664 print_otag(h, TAG_TD, ""); 665 break; 666 default: 667 abort(); 668 } 669 return 1; 670 } 671 672 static int 673 man_UR_pre(MAN_ARGS) 674 { 675 char *cp; 676 677 n = n->child; 678 assert(n->type == ROFFT_HEAD); 679 if (n->child != NULL) { 680 assert(n->child->type == ROFFT_TEXT); 681 if (n->tok == MAN_MT) { 682 mandoc_asprintf(&cp, "mailto:%s", n->child->string); 683 print_otag(h, TAG_A, "ch", "Mt", cp); 684 free(cp); 685 } else 686 print_otag(h, TAG_A, "ch", "Lk", n->child->string); 687 } 688 689 assert(n->next->type == ROFFT_BODY); 690 if (n->next->child != NULL) 691 n = n->next; 692 693 print_man_nodelist(man, n->child, h); 694 return 0; 695 } 696