1 /* $Id: html.c,v 1.185 2015/01/21 20:33:25 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdint.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "mandoc.h" 32 #include "mandoc_aux.h" 33 #include "out.h" 34 #include "html.h" 35 #include "main.h" 36 37 struct htmldata { 38 const char *name; 39 int flags; 40 #define HTML_CLRLINE (1 << 0) 41 #define HTML_NOSTACK (1 << 1) 42 #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ 43 }; 44 45 static const struct htmldata htmltags[TAG_MAX] = { 46 {"html", HTML_CLRLINE}, /* TAG_HTML */ 47 {"head", HTML_CLRLINE}, /* TAG_HEAD */ 48 {"body", HTML_CLRLINE}, /* TAG_BODY */ 49 {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ 50 {"title", HTML_CLRLINE}, /* TAG_TITLE */ 51 {"div", HTML_CLRLINE}, /* TAG_DIV */ 52 {"h1", 0}, /* TAG_H1 */ 53 {"h2", 0}, /* TAG_H2 */ 54 {"span", 0}, /* TAG_SPAN */ 55 {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ 56 {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ 57 {"a", 0}, /* TAG_A */ 58 {"table", HTML_CLRLINE}, /* TAG_TABLE */ 59 {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ 60 {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ 61 {"tr", HTML_CLRLINE}, /* TAG_TR */ 62 {"td", HTML_CLRLINE}, /* TAG_TD */ 63 {"li", HTML_CLRLINE}, /* TAG_LI */ 64 {"ul", HTML_CLRLINE}, /* TAG_UL */ 65 {"ol", HTML_CLRLINE}, /* TAG_OL */ 66 {"dl", HTML_CLRLINE}, /* TAG_DL */ 67 {"dt", HTML_CLRLINE}, /* TAG_DT */ 68 {"dd", HTML_CLRLINE}, /* TAG_DD */ 69 {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ 70 {"pre", HTML_CLRLINE }, /* TAG_PRE */ 71 {"b", 0 }, /* TAG_B */ 72 {"i", 0 }, /* TAG_I */ 73 {"code", 0 }, /* TAG_CODE */ 74 {"small", 0 }, /* TAG_SMALL */ 75 {"style", HTML_CLRLINE}, /* TAG_STYLE */ 76 {"math", HTML_CLRLINE}, /* TAG_MATH */ 77 {"mrow", 0}, /* TAG_MROW */ 78 {"mi", 0}, /* TAG_MI */ 79 {"mo", 0}, /* TAG_MO */ 80 {"msup", 0}, /* TAG_MSUP */ 81 {"msub", 0}, /* TAG_MSUB */ 82 {"msubsup", 0}, /* TAG_MSUBSUP */ 83 {"mfrac", 0}, /* TAG_MFRAC */ 84 {"msqrt", 0}, /* TAG_MSQRT */ 85 {"mfenced", 0}, /* TAG_MFENCED */ 86 {"mtable", 0}, /* TAG_MTABLE */ 87 {"mtr", 0}, /* TAG_MTR */ 88 {"mtd", 0}, /* TAG_MTD */ 89 {"munderover", 0}, /* TAG_MUNDEROVER */ 90 {"munder", 0}, /* TAG_MUNDER*/ 91 {"mover", 0}, /* TAG_MOVER*/ 92 }; 93 94 static const char *const htmlattrs[ATTR_MAX] = { 95 "name", /* ATTR_NAME */ 96 "rel", /* ATTR_REL */ 97 "href", /* ATTR_HREF */ 98 "type", /* ATTR_TYPE */ 99 "media", /* ATTR_MEDIA */ 100 "class", /* ATTR_CLASS */ 101 "style", /* ATTR_STYLE */ 102 "id", /* ATTR_ID */ 103 "colspan", /* ATTR_COLSPAN */ 104 "charset", /* ATTR_CHARSET */ 105 "open", /* ATTR_OPEN */ 106 "close", /* ATTR_CLOSE */ 107 "mathvariant", /* ATTR_MATHVARIANT */ 108 }; 109 110 static const char *const roffscales[SCALE_MAX] = { 111 "cm", /* SCALE_CM */ 112 "in", /* SCALE_IN */ 113 "pc", /* SCALE_PC */ 114 "pt", /* SCALE_PT */ 115 "em", /* SCALE_EM */ 116 "em", /* SCALE_MM */ 117 "ex", /* SCALE_EN */ 118 "ex", /* SCALE_BU */ 119 "em", /* SCALE_VS */ 120 "ex", /* SCALE_FS */ 121 }; 122 123 static void bufncat(struct html *, const char *, size_t); 124 static void print_ctag(struct html *, struct tag *); 125 static int print_escape(char); 126 static int print_encode(struct html *, const char *, int); 127 static void print_metaf(struct html *, enum mandoc_esc); 128 static void print_attr(struct html *, const char *, const char *); 129 130 131 void * 132 html_alloc(const struct mchars *mchars, char *outopts) 133 { 134 struct html *h; 135 const char *toks[5]; 136 char *v; 137 138 toks[0] = "style"; 139 toks[1] = "man"; 140 toks[2] = "includes"; 141 toks[3] = "fragment"; 142 toks[4] = NULL; 143 144 h = mandoc_calloc(1, sizeof(struct html)); 145 146 h->tags.head = NULL; 147 h->symtab = mchars; 148 149 while (outopts && *outopts) 150 switch (getsubopt(&outopts, UNCONST(toks), &v)) { 151 case 0: 152 h->style = v; 153 break; 154 case 1: 155 h->base_man = v; 156 break; 157 case 2: 158 h->base_includes = v; 159 break; 160 case 3: 161 h->oflags |= HTML_FRAGMENT; 162 break; 163 default: 164 break; 165 } 166 167 return(h); 168 } 169 170 void 171 html_free(void *p) 172 { 173 struct tag *tag; 174 struct html *h; 175 176 h = (struct html *)p; 177 178 while ((tag = h->tags.head) != NULL) { 179 h->tags.head = tag->next; 180 free(tag); 181 } 182 183 free(h); 184 } 185 186 void 187 print_gen_head(struct html *h) 188 { 189 struct htmlpair tag[4]; 190 struct tag *t; 191 192 tag[0].key = ATTR_CHARSET; 193 tag[0].val = "utf-8"; 194 print_otag(h, TAG_META, 1, tag); 195 196 /* 197 * Print a default style-sheet. 198 */ 199 t = print_otag(h, TAG_STYLE, 0, NULL); 200 print_text(h, "table.head, table.foot { width: 100%; }\n" 201 "td.head-rtitle, td.foot-os { text-align: right; }\n" 202 "td.head-vol { text-align: center; }\n" 203 "table.foot td { width: 50%; }\n" 204 "table.head td { width: 33%; }\n" 205 "div.spacer { margin: 1em 0; }\n"); 206 print_tagq(h, t); 207 208 if (h->style) { 209 tag[0].key = ATTR_REL; 210 tag[0].val = "stylesheet"; 211 tag[1].key = ATTR_HREF; 212 tag[1].val = h->style; 213 tag[2].key = ATTR_TYPE; 214 tag[2].val = "text/css"; 215 tag[3].key = ATTR_MEDIA; 216 tag[3].val = "all"; 217 print_otag(h, TAG_LINK, 4, tag); 218 } 219 } 220 221 static void 222 print_metaf(struct html *h, enum mandoc_esc deco) 223 { 224 enum htmlfont font; 225 226 switch (deco) { 227 case ESCAPE_FONTPREV: 228 font = h->metal; 229 break; 230 case ESCAPE_FONTITALIC: 231 font = HTMLFONT_ITALIC; 232 break; 233 case ESCAPE_FONTBOLD: 234 font = HTMLFONT_BOLD; 235 break; 236 case ESCAPE_FONTBI: 237 font = HTMLFONT_BI; 238 break; 239 case ESCAPE_FONT: 240 /* FALLTHROUGH */ 241 case ESCAPE_FONTROMAN: 242 font = HTMLFONT_NONE; 243 break; 244 default: 245 abort(); 246 /* NOTREACHED */ 247 } 248 249 if (h->metaf) { 250 print_tagq(h, h->metaf); 251 h->metaf = NULL; 252 } 253 254 h->metal = h->metac; 255 h->metac = font; 256 257 switch (font) { 258 case HTMLFONT_ITALIC: 259 h->metaf = print_otag(h, TAG_I, 0, NULL); 260 break; 261 case HTMLFONT_BOLD: 262 h->metaf = print_otag(h, TAG_B, 0, NULL); 263 break; 264 case HTMLFONT_BI: 265 h->metaf = print_otag(h, TAG_B, 0, NULL); 266 print_otag(h, TAG_I, 0, NULL); 267 break; 268 default: 269 break; 270 } 271 } 272 273 int 274 html_strlen(const char *cp) 275 { 276 size_t rsz; 277 int skip, sz; 278 279 /* 280 * Account for escaped sequences within string length 281 * calculations. This follows the logic in term_strlen() as we 282 * must calculate the width of produced strings. 283 * Assume that characters are always width of "1". This is 284 * hacky, but it gets the job done for approximation of widths. 285 */ 286 287 sz = 0; 288 skip = 0; 289 while (1) { 290 rsz = strcspn(cp, "\\"); 291 if (rsz) { 292 cp += rsz; 293 if (skip) { 294 skip = 0; 295 rsz--; 296 } 297 sz += rsz; 298 } 299 if ('\0' == *cp) 300 break; 301 cp++; 302 switch (mandoc_escape(&cp, NULL, NULL)) { 303 case ESCAPE_ERROR: 304 return(sz); 305 case ESCAPE_UNICODE: 306 /* FALLTHROUGH */ 307 case ESCAPE_NUMBERED: 308 /* FALLTHROUGH */ 309 case ESCAPE_SPECIAL: 310 /* FALLTHROUGH */ 311 case ESCAPE_OVERSTRIKE: 312 if (skip) 313 skip = 0; 314 else 315 sz++; 316 break; 317 case ESCAPE_SKIPCHAR: 318 skip = 1; 319 break; 320 default: 321 break; 322 } 323 } 324 return(sz); 325 } 326 327 static int 328 print_escape(char c) 329 { 330 331 switch (c) { 332 case '<': 333 printf("<"); 334 break; 335 case '>': 336 printf(">"); 337 break; 338 case '&': 339 printf("&"); 340 break; 341 case '"': 342 printf("""); 343 break; 344 case ASCII_NBRSP: 345 putchar('-'); 346 break; 347 case ASCII_HYPH: 348 putchar('-'); 349 /* FALLTHROUGH */ 350 case ASCII_BREAK: 351 break; 352 default: 353 return(0); 354 } 355 return(1); 356 } 357 358 static int 359 print_encode(struct html *h, const char *p, int norecurse) 360 { 361 size_t sz; 362 int c, len, nospace; 363 const char *seq; 364 enum mandoc_esc esc; 365 static const char rejs[9] = { '\\', '<', '>', '&', '"', 366 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; 367 368 nospace = 0; 369 370 while ('\0' != *p) { 371 if (HTML_SKIPCHAR & h->flags && '\\' != *p) { 372 h->flags &= ~HTML_SKIPCHAR; 373 p++; 374 continue; 375 } 376 377 sz = strcspn(p, rejs); 378 379 fwrite(p, 1, sz, stdout); 380 p += (int)sz; 381 382 if ('\0' == *p) 383 break; 384 385 if (print_escape(*p++)) 386 continue; 387 388 esc = mandoc_escape(&p, &seq, &len); 389 if (ESCAPE_ERROR == esc) 390 break; 391 392 switch (esc) { 393 case ESCAPE_FONT: 394 /* FALLTHROUGH */ 395 case ESCAPE_FONTPREV: 396 /* FALLTHROUGH */ 397 case ESCAPE_FONTBOLD: 398 /* FALLTHROUGH */ 399 case ESCAPE_FONTITALIC: 400 /* FALLTHROUGH */ 401 case ESCAPE_FONTBI: 402 /* FALLTHROUGH */ 403 case ESCAPE_FONTROMAN: 404 if (0 == norecurse) 405 print_metaf(h, esc); 406 continue; 407 case ESCAPE_SKIPCHAR: 408 h->flags |= HTML_SKIPCHAR; 409 continue; 410 default: 411 break; 412 } 413 414 if (h->flags & HTML_SKIPCHAR) { 415 h->flags &= ~HTML_SKIPCHAR; 416 continue; 417 } 418 419 switch (esc) { 420 case ESCAPE_UNICODE: 421 /* Skip past "u" header. */ 422 c = mchars_num2uc(seq + 1, len - 1); 423 break; 424 case ESCAPE_NUMBERED: 425 c = mchars_num2char(seq, len); 426 if (c < 0) 427 continue; 428 break; 429 case ESCAPE_SPECIAL: 430 c = mchars_spec2cp(h->symtab, seq, len); 431 if (c <= 0) 432 continue; 433 break; 434 case ESCAPE_NOSPACE: 435 if ('\0' == *p) 436 nospace = 1; 437 continue; 438 case ESCAPE_OVERSTRIKE: 439 if (len == 0) 440 continue; 441 c = seq[len - 1]; 442 break; 443 default: 444 continue; 445 } 446 if ((c < 0x20 && c != 0x09) || 447 (c > 0x7E && c < 0xA0)) 448 c = 0xFFFD; 449 if (c > 0x7E) 450 printf("&#%d;", c); 451 else if ( ! print_escape(c)) 452 putchar(c); 453 } 454 455 return(nospace); 456 } 457 458 static void 459 print_attr(struct html *h, const char *key, const char *val) 460 { 461 printf(" %s=\"", key); 462 (void)print_encode(h, val, 1); 463 putchar('\"'); 464 } 465 466 struct tag * 467 print_otag(struct html *h, enum htmltag tag, 468 int sz, const struct htmlpair *p) 469 { 470 int i; 471 struct tag *t; 472 473 /* Push this tags onto the stack of open scopes. */ 474 475 if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { 476 t = mandoc_malloc(sizeof(struct tag)); 477 t->tag = tag; 478 t->next = h->tags.head; 479 h->tags.head = t; 480 } else 481 t = NULL; 482 483 if ( ! (HTML_NOSPACE & h->flags)) 484 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { 485 /* Manage keeps! */ 486 if ( ! (HTML_KEEP & h->flags)) { 487 if (HTML_PREKEEP & h->flags) 488 h->flags |= HTML_KEEP; 489 putchar(' '); 490 } else 491 printf(" "); 492 } 493 494 if ( ! (h->flags & HTML_NONOSPACE)) 495 h->flags &= ~HTML_NOSPACE; 496 else 497 h->flags |= HTML_NOSPACE; 498 499 /* Print out the tag name and attributes. */ 500 501 printf("<%s", htmltags[tag].name); 502 for (i = 0; i < sz; i++) 503 print_attr(h, htmlattrs[p[i].key], p[i].val); 504 505 /* Accommodate for "well-formed" singleton escaping. */ 506 507 if (HTML_AUTOCLOSE & htmltags[tag].flags) 508 putchar('/'); 509 510 putchar('>'); 511 512 h->flags |= HTML_NOSPACE; 513 514 if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) 515 putchar('\n'); 516 517 return(t); 518 } 519 520 static void 521 print_ctag(struct html *h, struct tag *tag) 522 { 523 524 /* 525 * Remember to close out and nullify the current 526 * meta-font and table, if applicable. 527 */ 528 if (tag == h->metaf) 529 h->metaf = NULL; 530 if (tag == h->tblt) 531 h->tblt = NULL; 532 533 printf("</%s>", htmltags[tag->tag].name); 534 if (HTML_CLRLINE & htmltags[tag->tag].flags) { 535 h->flags |= HTML_NOSPACE; 536 putchar('\n'); 537 } 538 539 h->tags.head = tag->next; 540 free(tag); 541 } 542 543 void 544 print_gen_decls(struct html *h) 545 { 546 547 puts("<!DOCTYPE html>"); 548 } 549 550 void 551 print_text(struct html *h, const char *word) 552 { 553 554 if ( ! (HTML_NOSPACE & h->flags)) { 555 /* Manage keeps! */ 556 if ( ! (HTML_KEEP & h->flags)) { 557 if (HTML_PREKEEP & h->flags) 558 h->flags |= HTML_KEEP; 559 putchar(' '); 560 } else 561 printf(" "); 562 } 563 564 assert(NULL == h->metaf); 565 switch (h->metac) { 566 case HTMLFONT_ITALIC: 567 h->metaf = print_otag(h, TAG_I, 0, NULL); 568 break; 569 case HTMLFONT_BOLD: 570 h->metaf = print_otag(h, TAG_B, 0, NULL); 571 break; 572 case HTMLFONT_BI: 573 h->metaf = print_otag(h, TAG_B, 0, NULL); 574 print_otag(h, TAG_I, 0, NULL); 575 break; 576 default: 577 break; 578 } 579 580 assert(word); 581 if ( ! print_encode(h, word, 0)) { 582 if ( ! (h->flags & HTML_NONOSPACE)) 583 h->flags &= ~HTML_NOSPACE; 584 h->flags &= ~HTML_NONEWLINE; 585 } else 586 h->flags |= HTML_NOSPACE | HTML_NONEWLINE; 587 588 if (h->metaf) { 589 print_tagq(h, h->metaf); 590 h->metaf = NULL; 591 } 592 593 h->flags &= ~HTML_IGNDELIM; 594 } 595 596 void 597 print_tagq(struct html *h, const struct tag *until) 598 { 599 struct tag *tag; 600 601 while ((tag = h->tags.head) != NULL) { 602 print_ctag(h, tag); 603 if (until && tag == until) 604 return; 605 } 606 } 607 608 void 609 print_stagq(struct html *h, const struct tag *suntil) 610 { 611 struct tag *tag; 612 613 while ((tag = h->tags.head) != NULL) { 614 if (suntil && tag == suntil) 615 return; 616 print_ctag(h, tag); 617 } 618 } 619 620 void 621 print_paragraph(struct html *h) 622 { 623 struct tag *t; 624 struct htmlpair tag; 625 626 PAIR_CLASS_INIT(&tag, "spacer"); 627 t = print_otag(h, TAG_DIV, 1, &tag); 628 print_tagq(h, t); 629 } 630 631 632 void 633 bufinit(struct html *h) 634 { 635 636 h->buf[0] = '\0'; 637 h->buflen = 0; 638 } 639 640 void 641 bufcat_style(struct html *h, const char *key, const char *val) 642 { 643 644 bufcat(h, key); 645 bufcat(h, ":"); 646 bufcat(h, val); 647 bufcat(h, ";"); 648 } 649 650 void 651 bufcat(struct html *h, const char *p) 652 { 653 654 /* 655 * XXX This is broken and not easy to fix. 656 * When using the -Oincludes option, buffmt_includes() 657 * may pass in strings overrunning BUFSIZ, causing a crash. 658 */ 659 660 h->buflen = strlcat(h->buf, p, BUFSIZ); 661 assert(h->buflen < BUFSIZ); 662 } 663 664 void 665 bufcat_fmt(struct html *h, const char *fmt, ...) 666 { 667 va_list ap; 668 669 va_start(ap, fmt); 670 (void)vsnprintf(h->buf + (int)h->buflen, 671 BUFSIZ - h->buflen - 1, fmt, ap); 672 va_end(ap); 673 h->buflen = strlen(h->buf); 674 } 675 676 static void 677 bufncat(struct html *h, const char *p, size_t sz) 678 { 679 680 assert(h->buflen + sz + 1 < BUFSIZ); 681 strncat(h->buf, p, sz); 682 h->buflen += sz; 683 } 684 685 void 686 buffmt_includes(struct html *h, const char *name) 687 { 688 const char *p, *pp; 689 690 pp = h->base_includes; 691 692 bufinit(h); 693 while (NULL != (p = strchr(pp, '%'))) { 694 bufncat(h, pp, (size_t)(p - pp)); 695 switch (*(p + 1)) { 696 case'I': 697 bufcat(h, name); 698 break; 699 default: 700 bufncat(h, p, 2); 701 break; 702 } 703 pp = p + 2; 704 } 705 if (pp) 706 bufcat(h, pp); 707 } 708 709 void 710 buffmt_man(struct html *h, const char *name, const char *sec) 711 { 712 const char *p, *pp; 713 714 pp = h->base_man; 715 716 bufinit(h); 717 while (NULL != (p = strchr(pp, '%'))) { 718 bufncat(h, pp, (size_t)(p - pp)); 719 switch (*(p + 1)) { 720 case 'S': 721 bufcat(h, sec ? sec : "1"); 722 break; 723 case 'N': 724 bufcat_fmt(h, "%s", name); 725 break; 726 default: 727 bufncat(h, p, 2); 728 break; 729 } 730 pp = p + 2; 731 } 732 if (pp) 733 bufcat(h, pp); 734 } 735 736 void 737 bufcat_su(struct html *h, const char *p, const struct roffsu *su) 738 { 739 double v; 740 741 v = su->scale; 742 if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) 743 v = 1.0; 744 else if (SCALE_BU == su->unit) 745 v /= 24.0; 746 747 bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); 748 } 749 750 void 751 bufcat_id(struct html *h, const char *src) 752 { 753 754 /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ 755 756 while ('\0' != *src) 757 bufcat_fmt(h, "%.2x", *src++); 758 } 759