1 /* $Id: html.c,v 1.192 2016/01/04 12:45:29 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "mandoc.h" 32 #include "mandoc_aux.h" 33 #include "out.h" 34 #include "html.h" 35 #include "manconf.h" 36 #include "main.h" 37 38 struct htmldata { 39 const char *name; 40 int flags; 41 #define HTML_CLRLINE (1 << 0) 42 #define HTML_NOSTACK (1 << 1) 43 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 44 }; 45 46 static const struct htmldata htmltags[TAG_MAX] = { 47 {"html", HTML_CLRLINE}, /* TAG_HTML */ 48 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 49 {"body", HTML_CLRLINE}, /* TAG_BODY */ 50 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 51 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 52 {"div", HTML_CLRLINE}, /* TAG_DIV */ 53 {"h1", 0}, /* TAG_H1 */ 54 {"h2", 0}, /* TAG_H2 */ 55 {"span", 0}, /* TAG_SPAN */ 56 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ 57 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 58 {"a", 0}, /* TAG_A */ 59 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 60 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ 61 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 62 {"tr", HTML_CLRLINE}, /* TAG_TR */ 63 {"td", HTML_CLRLINE}, /* TAG_TD */ 64 {"li", HTML_CLRLINE}, /* TAG_LI */ 65 {"ul", HTML_CLRLINE}, /* TAG_UL */ 66 {"ol", HTML_CLRLINE}, /* TAG_OL */ 67 {"dl", HTML_CLRLINE}, /* TAG_DL */ 68 {"dt", HTML_CLRLINE}, /* TAG_DT */ 69 {"dd", HTML_CLRLINE}, /* TAG_DD */ 70 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ 71 {"pre", HTML_CLRLINE }, /* TAG_PRE */ 72 {"b", 0 }, /* TAG_B */ 73 {"i", 0 }, /* TAG_I */ 74 {"code", 0 }, /* TAG_CODE */ 75 {"small", 0 }, /* TAG_SMALL */ 76 {"style", HTML_CLRLINE}, /* TAG_STYLE */ 77 {"math", HTML_CLRLINE}, /* TAG_MATH */ 78 {"mrow", 0}, /* TAG_MROW */ 79 {"mi", 0}, /* TAG_MI */ 80 {"mo", 0}, /* TAG_MO */ 81 {"msup", 0}, /* TAG_MSUP */ 82 {"msub", 0}, /* TAG_MSUB */ 83 {"msubsup", 0}, /* TAG_MSUBSUP */ 84 {"mfrac", 0}, /* TAG_MFRAC */ 85 {"msqrt", 0}, /* TAG_MSQRT */ 86 {"mfenced", 0}, /* TAG_MFENCED */ 87 {"mtable", 0}, /* TAG_MTABLE */ 88 {"mtr", 0}, /* TAG_MTR */ 89 {"mtd", 0}, /* TAG_MTD */ 90 {"munderover", 0}, /* TAG_MUNDEROVER */ 91 {"munder", 0}, /* TAG_MUNDER*/ 92 {"mover", 0}, /* TAG_MOVER*/ 93 }; 94 95 static const char *const htmlattrs[ATTR_MAX] = { 96 "name", /* ATTR_NAME */ 97 "rel", /* ATTR_REL */ 98 "href", /* ATTR_HREF */ 99 "type", /* ATTR_TYPE */ 100 "media", /* ATTR_MEDIA */ 101 "class", /* ATTR_CLASS */ 102 "style", /* ATTR_STYLE */ 103 "id", /* ATTR_ID */ 104 "colspan", /* ATTR_COLSPAN */ 105 "charset", /* ATTR_CHARSET */ 106 "open", /* ATTR_OPEN */ 107 "close", /* ATTR_CLOSE */ 108 "mathvariant", /* ATTR_MATHVARIANT */ 109 }; 110 111 static const char *const roffscales[SCALE_MAX] = { 112 "cm", /* SCALE_CM */ 113 "in", /* SCALE_IN */ 114 "pc", /* SCALE_PC */ 115 "pt", /* SCALE_PT */ 116 "em", /* SCALE_EM */ 117 "em", /* SCALE_MM */ 118 "ex", /* SCALE_EN */ 119 "ex", /* SCALE_BU */ 120 "em", /* SCALE_VS */ 121 "ex", /* SCALE_FS */ 122 }; 123 124 static void bufncat(struct html *, const char *, size_t); 125 static void print_ctag(struct html *, struct tag *); 126 static int print_escape(char); 127 static int print_encode(struct html *, const char *, int); 128 static void print_metaf(struct html *, enum mandoc_esc); 129 static void print_attr(struct html *, const char *, const char *); 130 131 132 void * 133 html_alloc(const struct manoutput *outopts) 134 { 135 struct html *h; 136 137 h = mandoc_calloc(1, sizeof(struct html)); 138 139 h->tags.head = NULL; 140 h->style = outopts->style; 141 h->base_man = outopts->man; 142 h->base_includes = outopts->includes; 143 if (outopts->fragment) 144 h->oflags |= HTML_FRAGMENT; 145 146 return h; 147 } 148 149 void 150 html_free(void *p) 151 { 152 struct tag *tag; 153 struct html *h; 154 155 h = (struct html *)p; 156 157 while ((tag = h->tags.head) != NULL) { 158 h->tags.head = tag->next; 159 free(tag); 160 } 161 162 free(h); 163 } 164 165 void 166 print_gen_head(struct html *h) 167 { 168 struct htmlpair tag[4]; 169 struct tag *t; 170 171 tag[0].key = ATTR_CHARSET; 172 tag[0].val = "utf-8"; 173 print_otag(h, TAG_META, 1, tag); 174 175 /* 176 * Print a default style-sheet. 177 */ 178 t = print_otag(h, TAG_STYLE, 0, NULL); 179 print_text(h, "table.head, table.foot { width: 100%; }\n" 180 "td.head-rtitle, td.foot-os { text-align: right; }\n" 181 "td.head-vol { text-align: center; }\n" 182 "table.foot td { width: 50%; }\n" 183 "table.head td { width: 33%; }\n" 184 "div.spacer { margin: 1em 0; }\n"); 185 print_tagq(h, t); 186 187 if (h->style) { 188 tag[0].key = ATTR_REL; 189 tag[0].val = "stylesheet"; 190 tag[1].key = ATTR_HREF; 191 tag[1].val = h->style; 192 tag[2].key = ATTR_TYPE; 193 tag[2].val = "text/css"; 194 tag[3].key = ATTR_MEDIA; 195 tag[3].val = "all"; 196 print_otag(h, TAG_LINK, 4, tag); 197 } 198 } 199 200 static void 201 print_metaf(struct html *h, enum mandoc_esc deco) 202 { 203 enum htmlfont font; 204 205 switch (deco) { 206 case ESCAPE_FONTPREV: 207 font = h->metal; 208 break; 209 case ESCAPE_FONTITALIC: 210 font = HTMLFONT_ITALIC; 211 break; 212 case ESCAPE_FONTBOLD: 213 font = HTMLFONT_BOLD; 214 break; 215 case ESCAPE_FONTBI: 216 font = HTMLFONT_BI; 217 break; 218 case ESCAPE_FONT: 219 case ESCAPE_FONTROMAN: 220 font = HTMLFONT_NONE; 221 break; 222 default: 223 abort(); 224 } 225 226 if (h->metaf) { 227 print_tagq(h, h->metaf); 228 h->metaf = NULL; 229 } 230 231 h->metal = h->metac; 232 h->metac = font; 233 234 switch (font) { 235 case HTMLFONT_ITALIC: 236 h->metaf = print_otag(h, TAG_I, 0, NULL); 237 break; 238 case HTMLFONT_BOLD: 239 h->metaf = print_otag(h, TAG_B, 0, NULL); 240 break; 241 case HTMLFONT_BI: 242 h->metaf = print_otag(h, TAG_B, 0, NULL); 243 print_otag(h, TAG_I, 0, NULL); 244 break; 245 default: 246 break; 247 } 248 } 249 250 int 251 html_strlen(const char *cp) 252 { 253 size_t rsz; 254 int skip, sz; 255 256 /* 257 * Account for escaped sequences within string length 258 * calculations. This follows the logic in term_strlen() as we 259 * must calculate the width of produced strings. 260 * Assume that characters are always width of "1". This is 261 * hacky, but it gets the job done for approximation of widths. 262 */ 263 264 sz = 0; 265 skip = 0; 266 while (1) { 267 rsz = strcspn(cp, "\\"); 268 if (rsz) { 269 cp += rsz; 270 if (skip) { 271 skip = 0; 272 rsz--; 273 } 274 sz += rsz; 275 } 276 if ('\0' == *cp) 277 break; 278 cp++; 279 switch (mandoc_escape(&cp, NULL, NULL)) { 280 case ESCAPE_ERROR: 281 return sz; 282 case ESCAPE_UNICODE: 283 case ESCAPE_NUMBERED: 284 case ESCAPE_SPECIAL: 285 case ESCAPE_OVERSTRIKE: 286 if (skip) 287 skip = 0; 288 else 289 sz++; 290 break; 291 case ESCAPE_SKIPCHAR: 292 skip = 1; 293 break; 294 default: 295 break; 296 } 297 } 298 return sz; 299 } 300 301 static int 302 print_escape(char c) 303 { 304 305 switch (c) { 306 case '<': 307 printf("<"); 308 break; 309 case '>': 310 printf(">"); 311 break; 312 case '&': 313 printf("&"); 314 break; 315 case '"': 316 printf("""); 317 break; 318 case ASCII_NBRSP: 319 printf(" "); 320 break; 321 case ASCII_HYPH: 322 putchar('-'); 323 break; 324 case ASCII_BREAK: 325 break; 326 default: 327 return 0; 328 } 329 return 1; 330 } 331 332 static int 333 print_encode(struct html *h, const char *p, int norecurse) 334 { 335 size_t sz; 336 int c, len, nospace; 337 const char *seq; 338 enum mandoc_esc esc; 339 static const char rejs[9] = { '\\', '<', '>', '&', '"', 340 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 341 342 nospace = 0; 343 344 while ('\0' != *p) { 345 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 346 h->flags &= ~HTML_SKIPCHAR; 347 p++; 348 continue; 349 } 350 351 sz = strcspn(p, rejs); 352 353 fwrite(p, 1, sz, stdout); 354 p += (int)sz; 355 356 if ('\0' == *p) 357 break; 358 359 if (print_escape(*p++)) 360 continue; 361 362 esc = mandoc_escape(&p, &seq, &len); 363 if (ESCAPE_ERROR == esc) 364 break; 365 366 switch (esc) { 367 case ESCAPE_FONT: 368 case ESCAPE_FONTPREV: 369 case ESCAPE_FONTBOLD: 370 case ESCAPE_FONTITALIC: 371 case ESCAPE_FONTBI: 372 case ESCAPE_FONTROMAN: 373 if (0 == norecurse) 374 print_metaf(h, esc); 375 continue; 376 case ESCAPE_SKIPCHAR: 377 h->flags |= HTML_SKIPCHAR; 378 continue; 379 default: 380 break; 381 } 382 383 if (h->flags & HTML_SKIPCHAR) { 384 h->flags &= ~HTML_SKIPCHAR; 385 continue; 386 } 387 388 switch (esc) { 389 case ESCAPE_UNICODE: 390 /* Skip past "u" header. */ 391 c = mchars_num2uc(seq + 1, len - 1); 392 break; 393 case ESCAPE_NUMBERED: 394 c = mchars_num2char(seq, len); 395 if (c < 0) 396 continue; 397 break; 398 case ESCAPE_SPECIAL: 399 c = mchars_spec2cp(seq, len); 400 if (c <= 0) 401 continue; 402 break; 403 case ESCAPE_NOSPACE: 404 if ('\0' == *p) 405 nospace = 1; 406 continue; 407 case ESCAPE_OVERSTRIKE: 408 if (len == 0) 409 continue; 410 c = seq[len - 1]; 411 break; 412 default: 413 continue; 414 } 415 if ((c < 0x20 && c != 0x09) || 416 (c > 0x7E && c < 0xA0)) 417 c = 0xFFFD; 418 if (c > 0x7E) 419 printf("&#%d;", c); 420 else if ( ! print_escape(c)) 421 putchar(c); 422 } 423 424 return nospace; 425 } 426 427 static void 428 print_attr(struct html *h, const char *key, const char *val) 429 { 430 printf(" %s=\"", key); 431 (void)print_encode(h, val, 1); 432 putchar('\"'); 433 } 434 435 struct tag * 436 print_otag(struct html *h, enum htmltag tag, 437 int sz, const struct htmlpair *p) 438 { 439 int i; 440 struct tag *t; 441 442 /* Push this tags onto the stack of open scopes. */ 443 444 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 445 t = mandoc_malloc(sizeof(struct tag)); 446 t->tag = tag; 447 t->next = h->tags.head; 448 h->tags.head = t; 449 } else 450 t = NULL; 451 452 if ( ! (HTML_NOSPACE & h->flags)) 453 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { 454 /* Manage keeps! */ 455 if ( ! (HTML_KEEP & h->flags)) { 456 if (HTML_PREKEEP & h->flags) 457 h->flags |= HTML_KEEP; 458 putchar(' '); 459 } else 460 printf(" "); 461 } 462 463 if ( ! (h->flags & HTML_NONOSPACE)) 464 h->flags &= ~HTML_NOSPACE; 465 else 466 h->flags |= HTML_NOSPACE; 467 468 /* Print out the tag name and attributes. */ 469 470 printf("<%s", htmltags[tag].name); 471 for (i = 0; i < sz; i++) 472 print_attr(h, htmlattrs[p[i].key], p[i].val); 473 474 /* Accommodate for "well-formed" singleton escaping. */ 475 476 if (HTML_AUTOCLOSE & htmltags[tag].flags) 477 putchar('/'); 478 479 putchar('>'); 480 481 h->flags |= HTML_NOSPACE; 482 483 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) 484 putchar('\n'); 485 486 return t; 487 } 488 489 static void 490 print_ctag(struct html *h, struct tag *tag) 491 { 492 493 /* 494 * Remember to close out and nullify the current 495 * meta-font and table, if applicable. 496 */ 497 if (tag == h->metaf) 498 h->metaf = NULL; 499 if (tag == h->tblt) 500 h->tblt = NULL; 501 502 printf("</%s>", htmltags[tag->tag].name); 503 if (HTML_CLRLINE & htmltags[tag->tag].flags) { 504 h->flags |= HTML_NOSPACE; 505 putchar('\n'); 506 } 507 508 h->tags.head = tag->next; 509 free(tag); 510 } 511 512 void 513 print_gen_decls(struct html *h) 514 { 515 516 puts("<!DOCTYPE html>"); 517 } 518 519 void 520 print_text(struct html *h, const char *word) 521 { 522 523 if ( ! (HTML_NOSPACE & h->flags)) { 524 /* Manage keeps! */ 525 if ( ! (HTML_KEEP & h->flags)) { 526 if (HTML_PREKEEP & h->flags) 527 h->flags |= HTML_KEEP; 528 putchar(' '); 529 } else 530 printf(" "); 531 } 532 533 assert(NULL == h->metaf); 534 switch (h->metac) { 535 case HTMLFONT_ITALIC: 536 h->metaf = print_otag(h, TAG_I, 0, NULL); 537 break; 538 case HTMLFONT_BOLD: 539 h->metaf = print_otag(h, TAG_B, 0, NULL); 540 break; 541 case HTMLFONT_BI: 542 h->metaf = print_otag(h, TAG_B, 0, NULL); 543 print_otag(h, TAG_I, 0, NULL); 544 break; 545 default: 546 break; 547 } 548 549 assert(word); 550 if ( ! print_encode(h, word, 0)) { 551 if ( ! (h->flags & HTML_NONOSPACE)) 552 h->flags &= ~HTML_NOSPACE; 553 h->flags &= ~HTML_NONEWLINE; 554 } else 555 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 556 557 if (h->metaf) { 558 print_tagq(h, h->metaf); 559 h->metaf = NULL; 560 } 561 562 h->flags &= ~HTML_IGNDELIM; 563 } 564 565 void 566 print_tagq(struct html *h, const struct tag *until) 567 { 568 struct tag *tag; 569 570 while ((tag = h->tags.head) != NULL) { 571 print_ctag(h, tag); 572 if (until && tag == until) 573 return; 574 } 575 } 576 577 void 578 print_stagq(struct html *h, const struct tag *suntil) 579 { 580 struct tag *tag; 581 582 while ((tag = h->tags.head) != NULL) { 583 if (suntil && tag == suntil) 584 return; 585 print_ctag(h, tag); 586 } 587 } 588 589 void 590 print_paragraph(struct html *h) 591 { 592 struct tag *t; 593 struct htmlpair tag; 594 595 PAIR_CLASS_INIT(&tag, "spacer"); 596 t = print_otag(h, TAG_DIV, 1, &tag); 597 print_tagq(h, t); 598 } 599 600 601 void 602 bufinit(struct html *h) 603 { 604 605 h->buf[0] = '\0'; 606 h->buflen = 0; 607 } 608 609 void 610 bufcat_style(struct html *h, const char *key, const char *val) 611 { 612 613 bufcat(h, key); 614 bufcat(h, ":"); 615 bufcat(h, val); 616 bufcat(h, ";"); 617 } 618 619 void 620 bufcat(struct html *h, const char *p) 621 { 622 623 /* 624 * XXX This is broken and not easy to fix. 625 * When using the -Oincludes option, buffmt_includes() 626 * may pass in strings overrunning BUFSIZ, causing a crash. 627 */ 628 629 h->buflen = strlcat(h->buf, p, BUFSIZ); 630 assert(h->buflen < BUFSIZ); 631 } 632 633 void 634 bufcat_fmt(struct html *h, const char *fmt, ...) 635 { 636 va_list ap; 637 638 va_start(ap, fmt); 639 (void)vsnprintf(h->buf + (int)h->buflen, 640 BUFSIZ - h->buflen - 1, fmt, ap); 641 va_end(ap); 642 h->buflen = strlen(h->buf); 643 } 644 645 static void 646 bufncat(struct html *h, const char *p, size_t sz) 647 { 648 649 assert(h->buflen + sz + 1 < BUFSIZ); 650 strncat(h->buf, p, sz); 651 h->buflen += sz; 652 } 653 654 void 655 buffmt_includes(struct html *h, const char *name) 656 { 657 const char *p, *pp; 658 659 pp = h->base_includes; 660 661 bufinit(h); 662 while (NULL != (p = strchr(pp, '%'))) { 663 bufncat(h, pp, (size_t)(p - pp)); 664 switch (*(p + 1)) { 665 case'I': 666 bufcat(h, name); 667 break; 668 default: 669 bufncat(h, p, 2); 670 break; 671 } 672 pp = p + 2; 673 } 674 if (pp) 675 bufcat(h, pp); 676 } 677 678 void 679 buffmt_man(struct html *h, const char *name, const char *sec) 680 { 681 const char *p, *pp; 682 683 pp = h->base_man; 684 685 bufinit(h); 686 while (NULL != (p = strchr(pp, '%'))) { 687 bufncat(h, pp, (size_t)(p - pp)); 688 switch (*(p + 1)) { 689 case 'S': 690 bufcat(h, sec ? sec : "1"); 691 break; 692 case 'N': 693 bufcat_fmt(h, "%s", name); 694 break; 695 default: 696 bufncat(h, p, 2); 697 break; 698 } 699 pp = p + 2; 700 } 701 if (pp) 702 bufcat(h, pp); 703 } 704 705 void 706 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 707 { 708 double v; 709 710 v = su->scale; 711 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) 712 v = 1.0; 713 else if (SCALE_BU == su->unit) 714 v /= 24.0; 715 716 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); 717 } 718 719 void 720 bufcat_id(struct html *h, const char *src) 721 { 722 723 /* Cf. <http://www.w3.org/TR/html5/dom.html#the-id-attribute>. */ 724 725 for (; '\0' != *src; src++) 726 bufncat(h, *src == ' ' ? "_" : src, 1); 727 } 728