1*260e9a87SYuri Pankov /* $Id: html.c,v 1.185 2015/01/21 20:33:25 schwarze Exp $ */
295c635efSGarrett D'Amore /*
3*260e9a87SYuri Pankov * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4*260e9a87SYuri Pankov * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore *
695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore *
1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore */
1895c635efSGarrett D'Amore #include "config.h"
1995c635efSGarrett D'Amore
2095c635efSGarrett D'Amore #include <sys/types.h>
2195c635efSGarrett D'Amore
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
2495c635efSGarrett D'Amore #include <stdarg.h>
2595c635efSGarrett D'Amore #include <stdio.h>
2695c635efSGarrett D'Amore #include <stdint.h>
2795c635efSGarrett D'Amore #include <stdlib.h>
2895c635efSGarrett D'Amore #include <string.h>
2995c635efSGarrett D'Amore #include <unistd.h>
3095c635efSGarrett D'Amore
3195c635efSGarrett D'Amore #include "mandoc.h"
32*260e9a87SYuri Pankov #include "mandoc_aux.h"
3395c635efSGarrett D'Amore #include "out.h"
3495c635efSGarrett D'Amore #include "html.h"
3595c635efSGarrett D'Amore #include "main.h"
3695c635efSGarrett D'Amore
3795c635efSGarrett D'Amore struct htmldata {
3895c635efSGarrett D'Amore const char *name;
3995c635efSGarrett D'Amore int flags;
4095c635efSGarrett D'Amore #define HTML_CLRLINE (1 << 0)
4195c635efSGarrett D'Amore #define HTML_NOSTACK (1 << 1)
4295c635efSGarrett D'Amore #define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */
4395c635efSGarrett D'Amore };
4495c635efSGarrett D'Amore
4595c635efSGarrett D'Amore static const struct htmldata htmltags[TAG_MAX] = {
4695c635efSGarrett D'Amore {"html", HTML_CLRLINE}, /* TAG_HTML */
4795c635efSGarrett D'Amore {"head", HTML_CLRLINE}, /* TAG_HEAD */
4895c635efSGarrett D'Amore {"body", HTML_CLRLINE}, /* TAG_BODY */
4995c635efSGarrett D'Amore {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
5095c635efSGarrett D'Amore {"title", HTML_CLRLINE}, /* TAG_TITLE */
5195c635efSGarrett D'Amore {"div", HTML_CLRLINE}, /* TAG_DIV */
5295c635efSGarrett D'Amore {"h1", 0}, /* TAG_H1 */
5395c635efSGarrett D'Amore {"h2", 0}, /* TAG_H2 */
5495c635efSGarrett D'Amore {"span", 0}, /* TAG_SPAN */
5595c635efSGarrett D'Amore {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
5695c635efSGarrett D'Amore {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
5795c635efSGarrett D'Amore {"a", 0}, /* TAG_A */
5895c635efSGarrett D'Amore {"table", HTML_CLRLINE}, /* TAG_TABLE */
5995c635efSGarrett D'Amore {"tbody", HTML_CLRLINE}, /* TAG_TBODY */
6095c635efSGarrett D'Amore {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
6195c635efSGarrett D'Amore {"tr", HTML_CLRLINE}, /* TAG_TR */
6295c635efSGarrett D'Amore {"td", HTML_CLRLINE}, /* TAG_TD */
6395c635efSGarrett D'Amore {"li", HTML_CLRLINE}, /* TAG_LI */
6495c635efSGarrett D'Amore {"ul", HTML_CLRLINE}, /* TAG_UL */
6595c635efSGarrett D'Amore {"ol", HTML_CLRLINE}, /* TAG_OL */
6695c635efSGarrett D'Amore {"dl", HTML_CLRLINE}, /* TAG_DL */
6795c635efSGarrett D'Amore {"dt", HTML_CLRLINE}, /* TAG_DT */
6895c635efSGarrett D'Amore {"dd", HTML_CLRLINE}, /* TAG_DD */
6995c635efSGarrett D'Amore {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
7095c635efSGarrett D'Amore {"pre", HTML_CLRLINE }, /* TAG_PRE */
7195c635efSGarrett D'Amore {"b", 0 }, /* TAG_B */
7295c635efSGarrett D'Amore {"i", 0 }, /* TAG_I */
7395c635efSGarrett D'Amore {"code", 0 }, /* TAG_CODE */
7495c635efSGarrett D'Amore {"small", 0 }, /* TAG_SMALL */
75*260e9a87SYuri Pankov {"style", HTML_CLRLINE}, /* TAG_STYLE */
76*260e9a87SYuri Pankov {"math", HTML_CLRLINE}, /* TAG_MATH */
77*260e9a87SYuri Pankov {"mrow", 0}, /* TAG_MROW */
78*260e9a87SYuri Pankov {"mi", 0}, /* TAG_MI */
79*260e9a87SYuri Pankov {"mo", 0}, /* TAG_MO */
80*260e9a87SYuri Pankov {"msup", 0}, /* TAG_MSUP */
81*260e9a87SYuri Pankov {"msub", 0}, /* TAG_MSUB */
82*260e9a87SYuri Pankov {"msubsup", 0}, /* TAG_MSUBSUP */
83*260e9a87SYuri Pankov {"mfrac", 0}, /* TAG_MFRAC */
84*260e9a87SYuri Pankov {"msqrt", 0}, /* TAG_MSQRT */
85*260e9a87SYuri Pankov {"mfenced", 0}, /* TAG_MFENCED */
86*260e9a87SYuri Pankov {"mtable", 0}, /* TAG_MTABLE */
87*260e9a87SYuri Pankov {"mtr", 0}, /* TAG_MTR */
88*260e9a87SYuri Pankov {"mtd", 0}, /* TAG_MTD */
89*260e9a87SYuri Pankov {"munderover", 0}, /* TAG_MUNDEROVER */
90*260e9a87SYuri Pankov {"munder", 0}, /* TAG_MUNDER*/
91*260e9a87SYuri Pankov {"mover", 0}, /* TAG_MOVER*/
9295c635efSGarrett D'Amore };
9395c635efSGarrett D'Amore
9495c635efSGarrett D'Amore static const char *const htmlattrs[ATTR_MAX] = {
9595c635efSGarrett D'Amore "name", /* ATTR_NAME */
9695c635efSGarrett D'Amore "rel", /* ATTR_REL */
9795c635efSGarrett D'Amore "href", /* ATTR_HREF */
9895c635efSGarrett D'Amore "type", /* ATTR_TYPE */
9995c635efSGarrett D'Amore "media", /* ATTR_MEDIA */
10095c635efSGarrett D'Amore "class", /* ATTR_CLASS */
10195c635efSGarrett D'Amore "style", /* ATTR_STYLE */
10295c635efSGarrett D'Amore "id", /* ATTR_ID */
10395c635efSGarrett D'Amore "colspan", /* ATTR_COLSPAN */
104*260e9a87SYuri Pankov "charset", /* ATTR_CHARSET */
105*260e9a87SYuri Pankov "open", /* ATTR_OPEN */
106*260e9a87SYuri Pankov "close", /* ATTR_CLOSE */
107*260e9a87SYuri Pankov "mathvariant", /* ATTR_MATHVARIANT */
10895c635efSGarrett D'Amore };
10995c635efSGarrett D'Amore
11095c635efSGarrett D'Amore static const char *const roffscales[SCALE_MAX] = {
11195c635efSGarrett D'Amore "cm", /* SCALE_CM */
11295c635efSGarrett D'Amore "in", /* SCALE_IN */
11395c635efSGarrett D'Amore "pc", /* SCALE_PC */
11495c635efSGarrett D'Amore "pt", /* SCALE_PT */
11595c635efSGarrett D'Amore "em", /* SCALE_EM */
11695c635efSGarrett D'Amore "em", /* SCALE_MM */
11795c635efSGarrett D'Amore "ex", /* SCALE_EN */
11895c635efSGarrett D'Amore "ex", /* SCALE_BU */
11995c635efSGarrett D'Amore "em", /* SCALE_VS */
12095c635efSGarrett D'Amore "ex", /* SCALE_FS */
12195c635efSGarrett D'Amore };
12295c635efSGarrett D'Amore
12395c635efSGarrett D'Amore static void bufncat(struct html *, const char *, size_t);
124*260e9a87SYuri Pankov static void print_ctag(struct html *, struct tag *);
125*260e9a87SYuri Pankov static int print_escape(char);
12695c635efSGarrett D'Amore static int print_encode(struct html *, const char *, int);
12795c635efSGarrett D'Amore static void print_metaf(struct html *, enum mandoc_esc);
12895c635efSGarrett D'Amore static void print_attr(struct html *, const char *, const char *);
12995c635efSGarrett D'Amore
130*260e9a87SYuri Pankov
131*260e9a87SYuri Pankov void *
html_alloc(const struct mchars * mchars,char * outopts)132*260e9a87SYuri Pankov html_alloc(const struct mchars *mchars, char *outopts)
13395c635efSGarrett D'Amore {
13495c635efSGarrett D'Amore struct html *h;
13595c635efSGarrett D'Amore const char *toks[5];
13695c635efSGarrett D'Amore char *v;
13795c635efSGarrett D'Amore
13895c635efSGarrett D'Amore toks[0] = "style";
13995c635efSGarrett D'Amore toks[1] = "man";
14095c635efSGarrett D'Amore toks[2] = "includes";
14195c635efSGarrett D'Amore toks[3] = "fragment";
14295c635efSGarrett D'Amore toks[4] = NULL;
14395c635efSGarrett D'Amore
14495c635efSGarrett D'Amore h = mandoc_calloc(1, sizeof(struct html));
14595c635efSGarrett D'Amore
14695c635efSGarrett D'Amore h->tags.head = NULL;
147*260e9a87SYuri Pankov h->symtab = mchars;
14895c635efSGarrett D'Amore
14995c635efSGarrett D'Amore while (outopts && *outopts)
15095c635efSGarrett D'Amore switch (getsubopt(&outopts, UNCONST(toks), &v)) {
151*260e9a87SYuri Pankov case 0:
15295c635efSGarrett D'Amore h->style = v;
15395c635efSGarrett D'Amore break;
154*260e9a87SYuri Pankov case 1:
15595c635efSGarrett D'Amore h->base_man = v;
15695c635efSGarrett D'Amore break;
157*260e9a87SYuri Pankov case 2:
15895c635efSGarrett D'Amore h->base_includes = v;
15995c635efSGarrett D'Amore break;
160*260e9a87SYuri Pankov case 3:
16195c635efSGarrett D'Amore h->oflags |= HTML_FRAGMENT;
16295c635efSGarrett D'Amore break;
16395c635efSGarrett D'Amore default:
16495c635efSGarrett D'Amore break;
16595c635efSGarrett D'Amore }
16695c635efSGarrett D'Amore
16795c635efSGarrett D'Amore return(h);
16895c635efSGarrett D'Amore }
16995c635efSGarrett D'Amore
17095c635efSGarrett D'Amore void
html_free(void * p)17195c635efSGarrett D'Amore html_free(void *p)
17295c635efSGarrett D'Amore {
17395c635efSGarrett D'Amore struct tag *tag;
17495c635efSGarrett D'Amore struct html *h;
17595c635efSGarrett D'Amore
17695c635efSGarrett D'Amore h = (struct html *)p;
17795c635efSGarrett D'Amore
17895c635efSGarrett D'Amore while ((tag = h->tags.head) != NULL) {
17995c635efSGarrett D'Amore h->tags.head = tag->next;
18095c635efSGarrett D'Amore free(tag);
18195c635efSGarrett D'Amore }
18295c635efSGarrett D'Amore
18395c635efSGarrett D'Amore free(h);
18495c635efSGarrett D'Amore }
18595c635efSGarrett D'Amore
18695c635efSGarrett D'Amore void
print_gen_head(struct html * h)18795c635efSGarrett D'Amore print_gen_head(struct html *h)
18895c635efSGarrett D'Amore {
18995c635efSGarrett D'Amore struct htmlpair tag[4];
190*260e9a87SYuri Pankov struct tag *t;
19195c635efSGarrett D'Amore
192*260e9a87SYuri Pankov tag[0].key = ATTR_CHARSET;
193*260e9a87SYuri Pankov tag[0].val = "utf-8";
194*260e9a87SYuri Pankov print_otag(h, TAG_META, 1, tag);
19595c635efSGarrett D'Amore
196*260e9a87SYuri Pankov /*
197*260e9a87SYuri Pankov * Print a default style-sheet.
198*260e9a87SYuri Pankov */
199*260e9a87SYuri Pankov t = print_otag(h, TAG_STYLE, 0, NULL);
200*260e9a87SYuri Pankov print_text(h, "table.head, table.foot { width: 100%; }\n"
201*260e9a87SYuri Pankov "td.head-rtitle, td.foot-os { text-align: right; }\n"
202*260e9a87SYuri Pankov "td.head-vol { text-align: center; }\n"
203*260e9a87SYuri Pankov "table.foot td { width: 50%; }\n"
204*260e9a87SYuri Pankov "table.head td { width: 33%; }\n"
205*260e9a87SYuri Pankov "div.spacer { margin: 1em 0; }\n");
206*260e9a87SYuri Pankov print_tagq(h, t);
20795c635efSGarrett D'Amore
20895c635efSGarrett D'Amore if (h->style) {
20995c635efSGarrett D'Amore tag[0].key = ATTR_REL;
21095c635efSGarrett D'Amore tag[0].val = "stylesheet";
21195c635efSGarrett D'Amore tag[1].key = ATTR_HREF;
21295c635efSGarrett D'Amore tag[1].val = h->style;
21395c635efSGarrett D'Amore tag[2].key = ATTR_TYPE;
21495c635efSGarrett D'Amore tag[2].val = "text/css";
21595c635efSGarrett D'Amore tag[3].key = ATTR_MEDIA;
21695c635efSGarrett D'Amore tag[3].val = "all";
21795c635efSGarrett D'Amore print_otag(h, TAG_LINK, 4, tag);
21895c635efSGarrett D'Amore }
21995c635efSGarrett D'Amore }
22095c635efSGarrett D'Amore
22195c635efSGarrett D'Amore static void
print_metaf(struct html * h,enum mandoc_esc deco)22295c635efSGarrett D'Amore print_metaf(struct html *h, enum mandoc_esc deco)
22395c635efSGarrett D'Amore {
22495c635efSGarrett D'Amore enum htmlfont font;
22595c635efSGarrett D'Amore
22695c635efSGarrett D'Amore switch (deco) {
227*260e9a87SYuri Pankov case ESCAPE_FONTPREV:
22895c635efSGarrett D'Amore font = h->metal;
22995c635efSGarrett D'Amore break;
230*260e9a87SYuri Pankov case ESCAPE_FONTITALIC:
23195c635efSGarrett D'Amore font = HTMLFONT_ITALIC;
23295c635efSGarrett D'Amore break;
233*260e9a87SYuri Pankov case ESCAPE_FONTBOLD:
23495c635efSGarrett D'Amore font = HTMLFONT_BOLD;
23595c635efSGarrett D'Amore break;
236*260e9a87SYuri Pankov case ESCAPE_FONTBI:
237698f87a4SGarrett D'Amore font = HTMLFONT_BI;
238698f87a4SGarrett D'Amore break;
239*260e9a87SYuri Pankov case ESCAPE_FONT:
24095c635efSGarrett D'Amore /* FALLTHROUGH */
241*260e9a87SYuri Pankov case ESCAPE_FONTROMAN:
24295c635efSGarrett D'Amore font = HTMLFONT_NONE;
24395c635efSGarrett D'Amore break;
24495c635efSGarrett D'Amore default:
24595c635efSGarrett D'Amore abort();
24695c635efSGarrett D'Amore /* NOTREACHED */
24795c635efSGarrett D'Amore }
24895c635efSGarrett D'Amore
24995c635efSGarrett D'Amore if (h->metaf) {
25095c635efSGarrett D'Amore print_tagq(h, h->metaf);
25195c635efSGarrett D'Amore h->metaf = NULL;
25295c635efSGarrett D'Amore }
25395c635efSGarrett D'Amore
25495c635efSGarrett D'Amore h->metal = h->metac;
25595c635efSGarrett D'Amore h->metac = font;
25695c635efSGarrett D'Amore
257698f87a4SGarrett D'Amore switch (font) {
258*260e9a87SYuri Pankov case HTMLFONT_ITALIC:
259698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_I, 0, NULL);
260698f87a4SGarrett D'Amore break;
261*260e9a87SYuri Pankov case HTMLFONT_BOLD:
262698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_B, 0, NULL);
263698f87a4SGarrett D'Amore break;
264*260e9a87SYuri Pankov case HTMLFONT_BI:
265698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_B, 0, NULL);
26695c635efSGarrett D'Amore print_otag(h, TAG_I, 0, NULL);
267698f87a4SGarrett D'Amore break;
268698f87a4SGarrett D'Amore default:
269698f87a4SGarrett D'Amore break;
270698f87a4SGarrett D'Amore }
27195c635efSGarrett D'Amore }
27295c635efSGarrett D'Amore
27395c635efSGarrett D'Amore int
html_strlen(const char * cp)27495c635efSGarrett D'Amore html_strlen(const char *cp)
27595c635efSGarrett D'Amore {
276698f87a4SGarrett D'Amore size_t rsz;
277698f87a4SGarrett D'Amore int skip, sz;
27895c635efSGarrett D'Amore
27995c635efSGarrett D'Amore /*
28095c635efSGarrett D'Amore * Account for escaped sequences within string length
28195c635efSGarrett D'Amore * calculations. This follows the logic in term_strlen() as we
28295c635efSGarrett D'Amore * must calculate the width of produced strings.
28395c635efSGarrett D'Amore * Assume that characters are always width of "1". This is
28495c635efSGarrett D'Amore * hacky, but it gets the job done for approximation of widths.
28595c635efSGarrett D'Amore */
28695c635efSGarrett D'Amore
28795c635efSGarrett D'Amore sz = 0;
288698f87a4SGarrett D'Amore skip = 0;
289698f87a4SGarrett D'Amore while (1) {
290698f87a4SGarrett D'Amore rsz = strcspn(cp, "\\");
291698f87a4SGarrett D'Amore if (rsz) {
292698f87a4SGarrett D'Amore cp += rsz;
293698f87a4SGarrett D'Amore if (skip) {
294698f87a4SGarrett D'Amore skip = 0;
295698f87a4SGarrett D'Amore rsz--;
296698f87a4SGarrett D'Amore }
297698f87a4SGarrett D'Amore sz += rsz;
298698f87a4SGarrett D'Amore }
299698f87a4SGarrett D'Amore if ('\0' == *cp)
300698f87a4SGarrett D'Amore break;
301698f87a4SGarrett D'Amore cp++;
302698f87a4SGarrett D'Amore switch (mandoc_escape(&cp, NULL, NULL)) {
303*260e9a87SYuri Pankov case ESCAPE_ERROR:
30495c635efSGarrett D'Amore return(sz);
305*260e9a87SYuri Pankov case ESCAPE_UNICODE:
30695c635efSGarrett D'Amore /* FALLTHROUGH */
307*260e9a87SYuri Pankov case ESCAPE_NUMBERED:
30895c635efSGarrett D'Amore /* FALLTHROUGH */
309*260e9a87SYuri Pankov case ESCAPE_SPECIAL:
310*260e9a87SYuri Pankov /* FALLTHROUGH */
311*260e9a87SYuri Pankov case ESCAPE_OVERSTRIKE:
312698f87a4SGarrett D'Amore if (skip)
313698f87a4SGarrett D'Amore skip = 0;
314698f87a4SGarrett D'Amore else
31595c635efSGarrett D'Amore sz++;
31695c635efSGarrett D'Amore break;
317*260e9a87SYuri Pankov case ESCAPE_SKIPCHAR:
318698f87a4SGarrett D'Amore skip = 1;
319698f87a4SGarrett D'Amore break;
32095c635efSGarrett D'Amore default:
32195c635efSGarrett D'Amore break;
32295c635efSGarrett D'Amore }
32395c635efSGarrett D'Amore }
324698f87a4SGarrett D'Amore return(sz);
32595c635efSGarrett D'Amore }
32695c635efSGarrett D'Amore
32795c635efSGarrett D'Amore static int
print_escape(char c)328*260e9a87SYuri Pankov print_escape(char c)
329*260e9a87SYuri Pankov {
330*260e9a87SYuri Pankov
331*260e9a87SYuri Pankov switch (c) {
332*260e9a87SYuri Pankov case '<':
333*260e9a87SYuri Pankov printf("<");
334*260e9a87SYuri Pankov break;
335*260e9a87SYuri Pankov case '>':
336*260e9a87SYuri Pankov printf(">");
337*260e9a87SYuri Pankov break;
338*260e9a87SYuri Pankov case '&':
339*260e9a87SYuri Pankov printf("&");
340*260e9a87SYuri Pankov break;
341*260e9a87SYuri Pankov case '"':
342*260e9a87SYuri Pankov printf(""");
343*260e9a87SYuri Pankov break;
344*260e9a87SYuri Pankov case ASCII_NBRSP:
345*260e9a87SYuri Pankov putchar('-');
346*260e9a87SYuri Pankov break;
347*260e9a87SYuri Pankov case ASCII_HYPH:
348*260e9a87SYuri Pankov putchar('-');
349*260e9a87SYuri Pankov /* FALLTHROUGH */
350*260e9a87SYuri Pankov case ASCII_BREAK:
351*260e9a87SYuri Pankov break;
352*260e9a87SYuri Pankov default:
353*260e9a87SYuri Pankov return(0);
354*260e9a87SYuri Pankov }
355*260e9a87SYuri Pankov return(1);
356*260e9a87SYuri Pankov }
357*260e9a87SYuri Pankov
358*260e9a87SYuri Pankov static int
print_encode(struct html * h,const char * p,int norecurse)35995c635efSGarrett D'Amore print_encode(struct html *h, const char *p, int norecurse)
36095c635efSGarrett D'Amore {
36195c635efSGarrett D'Amore size_t sz;
36295c635efSGarrett D'Amore int c, len, nospace;
36395c635efSGarrett D'Amore const char *seq;
36495c635efSGarrett D'Amore enum mandoc_esc esc;
365*260e9a87SYuri Pankov static const char rejs[9] = { '\\', '<', '>', '&', '"',
366*260e9a87SYuri Pankov ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
36795c635efSGarrett D'Amore
36895c635efSGarrett D'Amore nospace = 0;
36995c635efSGarrett D'Amore
37095c635efSGarrett D'Amore while ('\0' != *p) {
371698f87a4SGarrett D'Amore if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
372698f87a4SGarrett D'Amore h->flags &= ~HTML_SKIPCHAR;
373698f87a4SGarrett D'Amore p++;
374698f87a4SGarrett D'Amore continue;
375698f87a4SGarrett D'Amore }
376698f87a4SGarrett D'Amore
37795c635efSGarrett D'Amore sz = strcspn(p, rejs);
37895c635efSGarrett D'Amore
37995c635efSGarrett D'Amore fwrite(p, 1, sz, stdout);
38095c635efSGarrett D'Amore p += (int)sz;
38195c635efSGarrett D'Amore
38295c635efSGarrett D'Amore if ('\0' == *p)
38395c635efSGarrett D'Amore break;
38495c635efSGarrett D'Amore
385*260e9a87SYuri Pankov if (print_escape(*p++))
38695c635efSGarrett D'Amore continue;
38795c635efSGarrett D'Amore
38895c635efSGarrett D'Amore esc = mandoc_escape(&p, &seq, &len);
38995c635efSGarrett D'Amore if (ESCAPE_ERROR == esc)
39095c635efSGarrett D'Amore break;
39195c635efSGarrett D'Amore
39295c635efSGarrett D'Amore switch (esc) {
393*260e9a87SYuri Pankov case ESCAPE_FONT:
394698f87a4SGarrett D'Amore /* FALLTHROUGH */
395*260e9a87SYuri Pankov case ESCAPE_FONTPREV:
396698f87a4SGarrett D'Amore /* FALLTHROUGH */
397*260e9a87SYuri Pankov case ESCAPE_FONTBOLD:
398698f87a4SGarrett D'Amore /* FALLTHROUGH */
399*260e9a87SYuri Pankov case ESCAPE_FONTITALIC:
400698f87a4SGarrett D'Amore /* FALLTHROUGH */
401*260e9a87SYuri Pankov case ESCAPE_FONTBI:
402698f87a4SGarrett D'Amore /* FALLTHROUGH */
403*260e9a87SYuri Pankov case ESCAPE_FONTROMAN:
404698f87a4SGarrett D'Amore if (0 == norecurse)
405698f87a4SGarrett D'Amore print_metaf(h, esc);
406698f87a4SGarrett D'Amore continue;
407*260e9a87SYuri Pankov case ESCAPE_SKIPCHAR:
408698f87a4SGarrett D'Amore h->flags |= HTML_SKIPCHAR;
409698f87a4SGarrett D'Amore continue;
410698f87a4SGarrett D'Amore default:
411698f87a4SGarrett D'Amore break;
412698f87a4SGarrett D'Amore }
413698f87a4SGarrett D'Amore
414698f87a4SGarrett D'Amore if (h->flags & HTML_SKIPCHAR) {
415698f87a4SGarrett D'Amore h->flags &= ~HTML_SKIPCHAR;
416698f87a4SGarrett D'Amore continue;
417698f87a4SGarrett D'Amore }
418698f87a4SGarrett D'Amore
419698f87a4SGarrett D'Amore switch (esc) {
420*260e9a87SYuri Pankov case ESCAPE_UNICODE:
421*260e9a87SYuri Pankov /* Skip past "u" header. */
42295c635efSGarrett D'Amore c = mchars_num2uc(seq + 1, len - 1);
42395c635efSGarrett D'Amore break;
424*260e9a87SYuri Pankov case ESCAPE_NUMBERED:
42595c635efSGarrett D'Amore c = mchars_num2char(seq, len);
426*260e9a87SYuri Pankov if (c < 0)
427*260e9a87SYuri Pankov continue;
42895c635efSGarrett D'Amore break;
429*260e9a87SYuri Pankov case ESCAPE_SPECIAL:
43095c635efSGarrett D'Amore c = mchars_spec2cp(h->symtab, seq, len);
431*260e9a87SYuri Pankov if (c <= 0)
432*260e9a87SYuri Pankov continue;
43395c635efSGarrett D'Amore break;
434*260e9a87SYuri Pankov case ESCAPE_NOSPACE:
43595c635efSGarrett D'Amore if ('\0' == *p)
43695c635efSGarrett D'Amore nospace = 1;
437*260e9a87SYuri Pankov continue;
438*260e9a87SYuri Pankov case ESCAPE_OVERSTRIKE:
439*260e9a87SYuri Pankov if (len == 0)
440*260e9a87SYuri Pankov continue;
441*260e9a87SYuri Pankov c = seq[len - 1];
44295c635efSGarrett D'Amore break;
44395c635efSGarrett D'Amore default:
444*260e9a87SYuri Pankov continue;
44595c635efSGarrett D'Amore }
446*260e9a87SYuri Pankov if ((c < 0x20 && c != 0x09) ||
447*260e9a87SYuri Pankov (c > 0x7E && c < 0xA0))
448*260e9a87SYuri Pankov c = 0xFFFD;
449*260e9a87SYuri Pankov if (c > 0x7E)
450*260e9a87SYuri Pankov printf("&#%d;", c);
451*260e9a87SYuri Pankov else if ( ! print_escape(c))
452*260e9a87SYuri Pankov putchar(c);
45395c635efSGarrett D'Amore }
45495c635efSGarrett D'Amore
45595c635efSGarrett D'Amore return(nospace);
45695c635efSGarrett D'Amore }
45795c635efSGarrett D'Amore
45895c635efSGarrett D'Amore static void
print_attr(struct html * h,const char * key,const char * val)45995c635efSGarrett D'Amore print_attr(struct html *h, const char *key, const char *val)
46095c635efSGarrett D'Amore {
46195c635efSGarrett D'Amore printf(" %s=\"", key);
46295c635efSGarrett D'Amore (void)print_encode(h, val, 1);
46395c635efSGarrett D'Amore putchar('\"');
46495c635efSGarrett D'Amore }
46595c635efSGarrett D'Amore
46695c635efSGarrett D'Amore struct tag *
print_otag(struct html * h,enum htmltag tag,int sz,const struct htmlpair * p)46795c635efSGarrett D'Amore print_otag(struct html *h, enum htmltag tag,
46895c635efSGarrett D'Amore int sz, const struct htmlpair *p)
46995c635efSGarrett D'Amore {
47095c635efSGarrett D'Amore int i;
47195c635efSGarrett D'Amore struct tag *t;
47295c635efSGarrett D'Amore
47395c635efSGarrett D'Amore /* Push this tags onto the stack of open scopes. */
47495c635efSGarrett D'Amore
47595c635efSGarrett D'Amore if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
47695c635efSGarrett D'Amore t = mandoc_malloc(sizeof(struct tag));
47795c635efSGarrett D'Amore t->tag = tag;
47895c635efSGarrett D'Amore t->next = h->tags.head;
47995c635efSGarrett D'Amore h->tags.head = t;
48095c635efSGarrett D'Amore } else
48195c635efSGarrett D'Amore t = NULL;
48295c635efSGarrett D'Amore
48395c635efSGarrett D'Amore if ( ! (HTML_NOSPACE & h->flags))
48495c635efSGarrett D'Amore if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
48595c635efSGarrett D'Amore /* Manage keeps! */
48695c635efSGarrett D'Amore if ( ! (HTML_KEEP & h->flags)) {
48795c635efSGarrett D'Amore if (HTML_PREKEEP & h->flags)
48895c635efSGarrett D'Amore h->flags |= HTML_KEEP;
48995c635efSGarrett D'Amore putchar(' ');
49095c635efSGarrett D'Amore } else
49195c635efSGarrett D'Amore printf(" ");
49295c635efSGarrett D'Amore }
49395c635efSGarrett D'Amore
49495c635efSGarrett D'Amore if ( ! (h->flags & HTML_NONOSPACE))
49595c635efSGarrett D'Amore h->flags &= ~HTML_NOSPACE;
49695c635efSGarrett D'Amore else
49795c635efSGarrett D'Amore h->flags |= HTML_NOSPACE;
49895c635efSGarrett D'Amore
49995c635efSGarrett D'Amore /* Print out the tag name and attributes. */
50095c635efSGarrett D'Amore
50195c635efSGarrett D'Amore printf("<%s", htmltags[tag].name);
50295c635efSGarrett D'Amore for (i = 0; i < sz; i++)
50395c635efSGarrett D'Amore print_attr(h, htmlattrs[p[i].key], p[i].val);
50495c635efSGarrett D'Amore
505*260e9a87SYuri Pankov /* Accommodate for "well-formed" singleton escaping. */
50695c635efSGarrett D'Amore
50795c635efSGarrett D'Amore if (HTML_AUTOCLOSE & htmltags[tag].flags)
50895c635efSGarrett D'Amore putchar('/');
50995c635efSGarrett D'Amore
51095c635efSGarrett D'Amore putchar('>');
51195c635efSGarrett D'Amore
51295c635efSGarrett D'Amore h->flags |= HTML_NOSPACE;
51395c635efSGarrett D'Amore
51495c635efSGarrett D'Amore if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
51595c635efSGarrett D'Amore putchar('\n');
51695c635efSGarrett D'Amore
51795c635efSGarrett D'Amore return(t);
51895c635efSGarrett D'Amore }
51995c635efSGarrett D'Amore
52095c635efSGarrett D'Amore static void
print_ctag(struct html * h,struct tag * tag)521*260e9a87SYuri Pankov print_ctag(struct html *h, struct tag *tag)
52295c635efSGarrett D'Amore {
52395c635efSGarrett D'Amore
524*260e9a87SYuri Pankov /*
525*260e9a87SYuri Pankov * Remember to close out and nullify the current
526*260e9a87SYuri Pankov * meta-font and table, if applicable.
527*260e9a87SYuri Pankov */
528*260e9a87SYuri Pankov if (tag == h->metaf)
529*260e9a87SYuri Pankov h->metaf = NULL;
530*260e9a87SYuri Pankov if (tag == h->tblt)
531*260e9a87SYuri Pankov h->tblt = NULL;
532*260e9a87SYuri Pankov
533*260e9a87SYuri Pankov printf("</%s>", htmltags[tag->tag].name);
534*260e9a87SYuri Pankov if (HTML_CLRLINE & htmltags[tag->tag].flags) {
53595c635efSGarrett D'Amore h->flags |= HTML_NOSPACE;
53695c635efSGarrett D'Amore putchar('\n');
53795c635efSGarrett D'Amore }
538*260e9a87SYuri Pankov
539*260e9a87SYuri Pankov h->tags.head = tag->next;
540*260e9a87SYuri Pankov free(tag);
54195c635efSGarrett D'Amore }
54295c635efSGarrett D'Amore
54395c635efSGarrett D'Amore void
print_gen_decls(struct html * h)54495c635efSGarrett D'Amore print_gen_decls(struct html *h)
54595c635efSGarrett D'Amore {
54695c635efSGarrett D'Amore
547*260e9a87SYuri Pankov puts("<!DOCTYPE html>");
54895c635efSGarrett D'Amore }
54995c635efSGarrett D'Amore
55095c635efSGarrett D'Amore void
print_text(struct html * h,const char * word)55195c635efSGarrett D'Amore print_text(struct html *h, const char *word)
55295c635efSGarrett D'Amore {
55395c635efSGarrett D'Amore
55495c635efSGarrett D'Amore if ( ! (HTML_NOSPACE & h->flags)) {
55595c635efSGarrett D'Amore /* Manage keeps! */
55695c635efSGarrett D'Amore if ( ! (HTML_KEEP & h->flags)) {
55795c635efSGarrett D'Amore if (HTML_PREKEEP & h->flags)
55895c635efSGarrett D'Amore h->flags |= HTML_KEEP;
55995c635efSGarrett D'Amore putchar(' ');
56095c635efSGarrett D'Amore } else
56195c635efSGarrett D'Amore printf(" ");
56295c635efSGarrett D'Amore }
56395c635efSGarrett D'Amore
56495c635efSGarrett D'Amore assert(NULL == h->metaf);
565698f87a4SGarrett D'Amore switch (h->metac) {
566*260e9a87SYuri Pankov case HTMLFONT_ITALIC:
567698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_I, 0, NULL);
568698f87a4SGarrett D'Amore break;
569*260e9a87SYuri Pankov case HTMLFONT_BOLD:
570698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_B, 0, NULL);
571698f87a4SGarrett D'Amore break;
572*260e9a87SYuri Pankov case HTMLFONT_BI:
573698f87a4SGarrett D'Amore h->metaf = print_otag(h, TAG_B, 0, NULL);
57495c635efSGarrett D'Amore print_otag(h, TAG_I, 0, NULL);
575698f87a4SGarrett D'Amore break;
576698f87a4SGarrett D'Amore default:
577698f87a4SGarrett D'Amore break;
578698f87a4SGarrett D'Amore }
57995c635efSGarrett D'Amore
58095c635efSGarrett D'Amore assert(word);
58195c635efSGarrett D'Amore if ( ! print_encode(h, word, 0)) {
58295c635efSGarrett D'Amore if ( ! (h->flags & HTML_NONOSPACE))
58395c635efSGarrett D'Amore h->flags &= ~HTML_NOSPACE;
584*260e9a87SYuri Pankov h->flags &= ~HTML_NONEWLINE;
58595c635efSGarrett D'Amore } else
586*260e9a87SYuri Pankov h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
58795c635efSGarrett D'Amore
58895c635efSGarrett D'Amore if (h->metaf) {
58995c635efSGarrett D'Amore print_tagq(h, h->metaf);
59095c635efSGarrett D'Amore h->metaf = NULL;
59195c635efSGarrett D'Amore }
59295c635efSGarrett D'Amore
59395c635efSGarrett D'Amore h->flags &= ~HTML_IGNDELIM;
59495c635efSGarrett D'Amore }
59595c635efSGarrett D'Amore
59695c635efSGarrett D'Amore void
print_tagq(struct html * h,const struct tag * until)59795c635efSGarrett D'Amore print_tagq(struct html *h, const struct tag *until)
59895c635efSGarrett D'Amore {
59995c635efSGarrett D'Amore struct tag *tag;
60095c635efSGarrett D'Amore
60195c635efSGarrett D'Amore while ((tag = h->tags.head) != NULL) {
602*260e9a87SYuri Pankov print_ctag(h, tag);
60395c635efSGarrett D'Amore if (until && tag == until)
60495c635efSGarrett D'Amore return;
60595c635efSGarrett D'Amore }
60695c635efSGarrett D'Amore }
60795c635efSGarrett D'Amore
60895c635efSGarrett D'Amore void
print_stagq(struct html * h,const struct tag * suntil)60995c635efSGarrett D'Amore print_stagq(struct html *h, const struct tag *suntil)
61095c635efSGarrett D'Amore {
61195c635efSGarrett D'Amore struct tag *tag;
61295c635efSGarrett D'Amore
61395c635efSGarrett D'Amore while ((tag = h->tags.head) != NULL) {
61495c635efSGarrett D'Amore if (suntil && tag == suntil)
61595c635efSGarrett D'Amore return;
616*260e9a87SYuri Pankov print_ctag(h, tag);
61795c635efSGarrett D'Amore }
61895c635efSGarrett D'Amore }
61995c635efSGarrett D'Amore
62095c635efSGarrett D'Amore void
print_paragraph(struct html * h)621*260e9a87SYuri Pankov print_paragraph(struct html *h)
622*260e9a87SYuri Pankov {
623*260e9a87SYuri Pankov struct tag *t;
624*260e9a87SYuri Pankov struct htmlpair tag;
625*260e9a87SYuri Pankov
626*260e9a87SYuri Pankov PAIR_CLASS_INIT(&tag, "spacer");
627*260e9a87SYuri Pankov t = print_otag(h, TAG_DIV, 1, &tag);
628*260e9a87SYuri Pankov print_tagq(h, t);
629*260e9a87SYuri Pankov }
630*260e9a87SYuri Pankov
631*260e9a87SYuri Pankov
632*260e9a87SYuri Pankov void
bufinit(struct html * h)63395c635efSGarrett D'Amore bufinit(struct html *h)
63495c635efSGarrett D'Amore {
63595c635efSGarrett D'Amore
63695c635efSGarrett D'Amore h->buf[0] = '\0';
63795c635efSGarrett D'Amore h->buflen = 0;
63895c635efSGarrett D'Amore }
63995c635efSGarrett D'Amore
64095c635efSGarrett D'Amore void
bufcat_style(struct html * h,const char * key,const char * val)64195c635efSGarrett D'Amore bufcat_style(struct html *h, const char *key, const char *val)
64295c635efSGarrett D'Amore {
64395c635efSGarrett D'Amore
64495c635efSGarrett D'Amore bufcat(h, key);
64595c635efSGarrett D'Amore bufcat(h, ":");
64695c635efSGarrett D'Amore bufcat(h, val);
64795c635efSGarrett D'Amore bufcat(h, ";");
64895c635efSGarrett D'Amore }
64995c635efSGarrett D'Amore
65095c635efSGarrett D'Amore void
bufcat(struct html * h,const char * p)65195c635efSGarrett D'Amore bufcat(struct html *h, const char *p)
65295c635efSGarrett D'Amore {
65395c635efSGarrett D'Amore
654*260e9a87SYuri Pankov /*
655*260e9a87SYuri Pankov * XXX This is broken and not easy to fix.
656*260e9a87SYuri Pankov * When using the -Oincludes option, buffmt_includes()
657*260e9a87SYuri Pankov * may pass in strings overrunning BUFSIZ, causing a crash.
658*260e9a87SYuri Pankov */
659*260e9a87SYuri Pankov
66095c635efSGarrett D'Amore h->buflen = strlcat(h->buf, p, BUFSIZ);
66195c635efSGarrett D'Amore assert(h->buflen < BUFSIZ);
66295c635efSGarrett D'Amore }
66395c635efSGarrett D'Amore
66495c635efSGarrett D'Amore void
bufcat_fmt(struct html * h,const char * fmt,...)66595c635efSGarrett D'Amore bufcat_fmt(struct html *h, const char *fmt, ...)
66695c635efSGarrett D'Amore {
66795c635efSGarrett D'Amore va_list ap;
66895c635efSGarrett D'Amore
66995c635efSGarrett D'Amore va_start(ap, fmt);
67095c635efSGarrett D'Amore (void)vsnprintf(h->buf + (int)h->buflen,
67195c635efSGarrett D'Amore BUFSIZ - h->buflen - 1, fmt, ap);
67295c635efSGarrett D'Amore va_end(ap);
67395c635efSGarrett D'Amore h->buflen = strlen(h->buf);
67495c635efSGarrett D'Amore }
67595c635efSGarrett D'Amore
67695c635efSGarrett D'Amore static void
bufncat(struct html * h,const char * p,size_t sz)67795c635efSGarrett D'Amore bufncat(struct html *h, const char *p, size_t sz)
67895c635efSGarrett D'Amore {
67995c635efSGarrett D'Amore
68095c635efSGarrett D'Amore assert(h->buflen + sz + 1 < BUFSIZ);
68195c635efSGarrett D'Amore strncat(h->buf, p, sz);
68295c635efSGarrett D'Amore h->buflen += sz;
68395c635efSGarrett D'Amore }
68495c635efSGarrett D'Amore
68595c635efSGarrett D'Amore void
buffmt_includes(struct html * h,const char * name)68695c635efSGarrett D'Amore buffmt_includes(struct html *h, const char *name)
68795c635efSGarrett D'Amore {
68895c635efSGarrett D'Amore const char *p, *pp;
68995c635efSGarrett D'Amore
69095c635efSGarrett D'Amore pp = h->base_includes;
69195c635efSGarrett D'Amore
69295c635efSGarrett D'Amore bufinit(h);
69395c635efSGarrett D'Amore while (NULL != (p = strchr(pp, '%'))) {
69495c635efSGarrett D'Amore bufncat(h, pp, (size_t)(p - pp));
69595c635efSGarrett D'Amore switch (*(p + 1)) {
696*260e9a87SYuri Pankov case'I':
69795c635efSGarrett D'Amore bufcat(h, name);
69895c635efSGarrett D'Amore break;
69995c635efSGarrett D'Amore default:
70095c635efSGarrett D'Amore bufncat(h, p, 2);
70195c635efSGarrett D'Amore break;
70295c635efSGarrett D'Amore }
70395c635efSGarrett D'Amore pp = p + 2;
70495c635efSGarrett D'Amore }
70595c635efSGarrett D'Amore if (pp)
70695c635efSGarrett D'Amore bufcat(h, pp);
70795c635efSGarrett D'Amore }
70895c635efSGarrett D'Amore
70995c635efSGarrett D'Amore void
buffmt_man(struct html * h,const char * name,const char * sec)710*260e9a87SYuri Pankov buffmt_man(struct html *h, const char *name, const char *sec)
71195c635efSGarrett D'Amore {
71295c635efSGarrett D'Amore const char *p, *pp;
71395c635efSGarrett D'Amore
71495c635efSGarrett D'Amore pp = h->base_man;
71595c635efSGarrett D'Amore
71695c635efSGarrett D'Amore bufinit(h);
71795c635efSGarrett D'Amore while (NULL != (p = strchr(pp, '%'))) {
71895c635efSGarrett D'Amore bufncat(h, pp, (size_t)(p - pp));
71995c635efSGarrett D'Amore switch (*(p + 1)) {
720*260e9a87SYuri Pankov case 'S':
72195c635efSGarrett D'Amore bufcat(h, sec ? sec : "1");
72295c635efSGarrett D'Amore break;
723*260e9a87SYuri Pankov case 'N':
724*260e9a87SYuri Pankov bufcat_fmt(h, "%s", name);
72595c635efSGarrett D'Amore break;
72695c635efSGarrett D'Amore default:
72795c635efSGarrett D'Amore bufncat(h, p, 2);
72895c635efSGarrett D'Amore break;
72995c635efSGarrett D'Amore }
73095c635efSGarrett D'Amore pp = p + 2;
73195c635efSGarrett D'Amore }
73295c635efSGarrett D'Amore if (pp)
73395c635efSGarrett D'Amore bufcat(h, pp);
73495c635efSGarrett D'Amore }
73595c635efSGarrett D'Amore
73695c635efSGarrett D'Amore void
bufcat_su(struct html * h,const char * p,const struct roffsu * su)73795c635efSGarrett D'Amore bufcat_su(struct html *h, const char *p, const struct roffsu *su)
73895c635efSGarrett D'Amore {
73995c635efSGarrett D'Amore double v;
74095c635efSGarrett D'Amore
74195c635efSGarrett D'Amore v = su->scale;
74295c635efSGarrett D'Amore if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
74395c635efSGarrett D'Amore v = 1.0;
744*260e9a87SYuri Pankov else if (SCALE_BU == su->unit)
745*260e9a87SYuri Pankov v /= 24.0;
74695c635efSGarrett D'Amore
74795c635efSGarrett D'Amore bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
74895c635efSGarrett D'Amore }
74995c635efSGarrett D'Amore
75095c635efSGarrett D'Amore void
bufcat_id(struct html * h,const char * src)75195c635efSGarrett D'Amore bufcat_id(struct html *h, const char *src)
75295c635efSGarrett D'Amore {
75395c635efSGarrett D'Amore
75495c635efSGarrett D'Amore /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
75595c635efSGarrett D'Amore
75695c635efSGarrett D'Amore while ('\0' != *src)
75795c635efSGarrett D'Amore bufcat_fmt(h, "%.2x", *src++);
75895c635efSGarrett D'Amore }
759