xref: /freebsd/contrib/mandoc/man_html.c (revision 25ecdc7d52770caf1c9b44b5ec11f468f6b636f3)
1 /*	$Id: man_html.c,v 1.174 2019/04/30 15:53:00 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "man.h"
32 #include "out.h"
33 #include "html.h"
34 #include "main.h"
35 
36 #define	MAN_ARGS	  const struct roff_meta *man, \
37 			  const struct roff_node *n, \
38 			  struct html *h
39 
40 struct	man_html_act {
41 	int		(*pre)(MAN_ARGS);
42 	int		(*post)(MAN_ARGS);
43 };
44 
45 static	void		  print_man_head(const struct roff_meta *,
46 				struct html *);
47 static	void		  print_man_nodelist(MAN_ARGS);
48 static	void		  print_man_node(MAN_ARGS);
49 static	char		  list_continues(const struct roff_node *,
50 				const struct roff_node *);
51 static	int		  man_B_pre(MAN_ARGS);
52 static	int		  man_IP_pre(MAN_ARGS);
53 static	int		  man_I_pre(MAN_ARGS);
54 static	int		  man_OP_pre(MAN_ARGS);
55 static	int		  man_PP_pre(MAN_ARGS);
56 static	int		  man_RS_pre(MAN_ARGS);
57 static	int		  man_SH_pre(MAN_ARGS);
58 static	int		  man_SM_pre(MAN_ARGS);
59 static	int		  man_SY_pre(MAN_ARGS);
60 static	int		  man_UR_pre(MAN_ARGS);
61 static	int		  man_abort_pre(MAN_ARGS);
62 static	int		  man_alt_pre(MAN_ARGS);
63 static	int		  man_ign_pre(MAN_ARGS);
64 static	int		  man_in_pre(MAN_ARGS);
65 static	void		  man_root_post(const struct roff_meta *,
66 				struct html *);
67 static	void		  man_root_pre(const struct roff_meta *,
68 				struct html *);
69 
70 static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
71 	{ NULL, NULL }, /* TH */
72 	{ man_SH_pre, NULL }, /* SH */
73 	{ man_SH_pre, NULL }, /* SS */
74 	{ man_IP_pre, NULL }, /* TP */
75 	{ man_IP_pre, NULL }, /* TQ */
76 	{ man_abort_pre, NULL }, /* LP */
77 	{ man_PP_pre, NULL }, /* PP */
78 	{ man_abort_pre, NULL }, /* P */
79 	{ man_IP_pre, NULL }, /* IP */
80 	{ man_PP_pre, NULL }, /* HP */
81 	{ man_SM_pre, NULL }, /* SM */
82 	{ man_SM_pre, NULL }, /* SB */
83 	{ man_alt_pre, NULL }, /* BI */
84 	{ man_alt_pre, NULL }, /* IB */
85 	{ man_alt_pre, NULL }, /* BR */
86 	{ man_alt_pre, NULL }, /* RB */
87 	{ NULL, NULL }, /* R */
88 	{ man_B_pre, NULL }, /* B */
89 	{ man_I_pre, NULL }, /* I */
90 	{ man_alt_pre, NULL }, /* IR */
91 	{ man_alt_pre, NULL }, /* RI */
92 	{ NULL, NULL }, /* RE */
93 	{ man_RS_pre, NULL }, /* RS */
94 	{ man_ign_pre, NULL }, /* DT */
95 	{ man_ign_pre, NULL }, /* UC */
96 	{ man_ign_pre, NULL }, /* PD */
97 	{ man_ign_pre, NULL }, /* AT */
98 	{ man_in_pre, NULL }, /* in */
99 	{ man_SY_pre, NULL }, /* SY */
100 	{ NULL, NULL }, /* YS */
101 	{ man_OP_pre, NULL }, /* OP */
102 	{ NULL, NULL }, /* EX */
103 	{ NULL, NULL }, /* EE */
104 	{ man_UR_pre, NULL }, /* UR */
105 	{ NULL, NULL }, /* UE */
106 	{ man_UR_pre, NULL }, /* MT */
107 	{ NULL, NULL }, /* ME */
108 };
109 
110 
111 void
112 html_man(void *arg, const struct roff_meta *man)
113 {
114 	struct html		*h;
115 	struct roff_node	*n;
116 	struct tag		*t;
117 
118 	h = (struct html *)arg;
119 	n = man->first->child;
120 
121 	if ((h->oflags & HTML_FRAGMENT) == 0) {
122 		print_gen_decls(h);
123 		print_otag(h, TAG_HTML, "");
124 		if (n != NULL && n->type == ROFFT_COMMENT)
125 			print_gen_comment(h, n);
126 		t = print_otag(h, TAG_HEAD, "");
127 		print_man_head(man, h);
128 		print_tagq(h, t);
129 		print_otag(h, TAG_BODY, "");
130 	}
131 
132 	man_root_pre(man, h);
133 	t = print_otag(h, TAG_DIV, "c", "manual-text");
134 	print_man_nodelist(man, n, h);
135 	print_tagq(h, t);
136 	man_root_post(man, h);
137 	print_tagq(h, NULL);
138 }
139 
140 static void
141 print_man_head(const struct roff_meta *man, struct html *h)
142 {
143 	char	*cp;
144 
145 	print_gen_head(h);
146 	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
147 	print_otag(h, TAG_TITLE, "");
148 	print_text(h, cp);
149 	free(cp);
150 }
151 
152 static void
153 print_man_nodelist(MAN_ARGS)
154 {
155 	while (n != NULL) {
156 		print_man_node(man, n, h);
157 		n = n->next;
158 	}
159 }
160 
161 static void
162 print_man_node(MAN_ARGS)
163 {
164 	struct tag	*t;
165 	int		 child;
166 
167 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
168 		return;
169 
170 	html_fillmode(h, n->flags & NODE_NOFILL ? ROFF_nf : ROFF_fi);
171 
172 	child = 1;
173 	switch (n->type) {
174 	case ROFFT_TEXT:
175 		if (*n->string == '\0') {
176 			print_endline(h);
177 			return;
178 		}
179 		if (*n->string == ' ' && n->flags & NODE_LINE &&
180 		    (h->flags & HTML_NONEWLINE) == 0)
181 			print_endline(h);
182 		else if (n->flags & NODE_DELIMC)
183 			h->flags |= HTML_NOSPACE;
184 		t = h->tag;
185 		t->refcnt++;
186 		print_text(h, n->string);
187 		break;
188 	case ROFFT_EQN:
189 		t = h->tag;
190 		t->refcnt++;
191 		print_eqn(h, n->eqn);
192 		break;
193 	case ROFFT_TBL:
194 		/*
195 		 * This will take care of initialising all of the table
196 		 * state data for the first table, then tearing it down
197 		 * for the last one.
198 		 */
199 		print_tbl(h, n->span);
200 		return;
201 	default:
202 		/*
203 		 * Close out scope of font prior to opening a macro
204 		 * scope.
205 		 */
206 		if (h->metac != ESCAPE_FONTROMAN) {
207 			h->metal = h->metac;
208 			h->metac = ESCAPE_FONTROMAN;
209 		}
210 
211 		/*
212 		 * Close out the current table, if it's open, and unset
213 		 * the "meta" table state.  This will be reopened on the
214 		 * next table element.
215 		 */
216 		if (h->tblt != NULL)
217 			print_tblclose(h);
218 		t = h->tag;
219 		t->refcnt++;
220 		if (n->tok < ROFF_MAX) {
221 			roff_html_pre(h, n);
222 			t->refcnt--;
223 			print_stagq(h, t);
224 			return;
225 		}
226 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
227 		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
228 			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
229 			    n, h);
230 		break;
231 	}
232 
233 	if (child && n->child != NULL)
234 		print_man_nodelist(man, n->child, h);
235 
236 	/* This will automatically close out any font scope. */
237 	t->refcnt--;
238 	if (n->type == ROFFT_BLOCK &&
239 	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
240 		t = h->tag;
241 		while (t->tag != TAG_DL && t->tag != TAG_UL)
242 			t = t->next;
243 		/*
244 		 * Close the list if no further item of the same type
245 		 * follows; otherwise, close the item only.
246 		 */
247 		if (list_continues(n, n->next) == '\0') {
248 			print_tagq(h, t);
249 			t = NULL;
250 		}
251 	}
252 	if (t != NULL)
253 		print_stagq(h, t);
254 
255 	if (n->flags & NODE_NOFILL && n->tok != MAN_YS &&
256 	    (n->next != NULL && n->next->flags & NODE_LINE)) {
257 		/* In .nf = <pre>, print even empty lines. */
258 		h->col++;
259 		print_endline(h);
260 	}
261 }
262 
263 static void
264 man_root_pre(const struct roff_meta *man, struct html *h)
265 {
266 	struct tag	*t, *tt;
267 	char		*title;
268 
269 	assert(man->title);
270 	assert(man->msec);
271 	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
272 
273 	t = print_otag(h, TAG_TABLE, "c", "head");
274 	tt = print_otag(h, TAG_TR, "");
275 
276 	print_otag(h, TAG_TD, "c", "head-ltitle");
277 	print_text(h, title);
278 	print_stagq(h, tt);
279 
280 	print_otag(h, TAG_TD, "c", "head-vol");
281 	if (man->vol != NULL)
282 		print_text(h, man->vol);
283 	print_stagq(h, tt);
284 
285 	print_otag(h, TAG_TD, "c", "head-rtitle");
286 	print_text(h, title);
287 	print_tagq(h, t);
288 	free(title);
289 }
290 
291 static void
292 man_root_post(const struct roff_meta *man, struct html *h)
293 {
294 	struct tag	*t, *tt;
295 
296 	t = print_otag(h, TAG_TABLE, "c", "foot");
297 	tt = print_otag(h, TAG_TR, "");
298 
299 	print_otag(h, TAG_TD, "c", "foot-date");
300 	print_text(h, man->date);
301 	print_stagq(h, tt);
302 
303 	print_otag(h, TAG_TD, "c", "foot-os");
304 	if (man->os != NULL)
305 		print_text(h, man->os);
306 	print_tagq(h, t);
307 }
308 
309 static int
310 man_SH_pre(MAN_ARGS)
311 {
312 	const char	*class;
313 	char		*id;
314 	enum htmltag	 tag;
315 
316 	if (n->tok == MAN_SH) {
317 		tag = TAG_H1;
318 		class = "Sh";
319 	} else {
320 		tag = TAG_H2;
321 		class = "Ss";
322 	}
323 	switch (n->type) {
324 	case ROFFT_BLOCK:
325 		html_close_paragraph(h);
326 		print_otag(h, TAG_SECTION, "c", class);
327 		break;
328 	case ROFFT_HEAD:
329 		id = html_make_id(n, 1);
330 		print_otag(h, tag, "ci", class, id);
331 		if (id != NULL)
332 			print_otag(h, TAG_A, "chR", "permalink", id);
333 		break;
334 	case ROFFT_BODY:
335 		break;
336 	default:
337 		abort();
338 	}
339 	return 1;
340 }
341 
342 static int
343 man_alt_pre(MAN_ARGS)
344 {
345 	const struct roff_node	*nn;
346 	struct tag	*t;
347 	int		 i;
348 	enum htmltag	 fp;
349 
350 	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
351 		switch (n->tok) {
352 		case MAN_BI:
353 			fp = i % 2 ? TAG_I : TAG_B;
354 			break;
355 		case MAN_IB:
356 			fp = i % 2 ? TAG_B : TAG_I;
357 			break;
358 		case MAN_RI:
359 			fp = i % 2 ? TAG_I : TAG_MAX;
360 			break;
361 		case MAN_IR:
362 			fp = i % 2 ? TAG_MAX : TAG_I;
363 			break;
364 		case MAN_BR:
365 			fp = i % 2 ? TAG_MAX : TAG_B;
366 			break;
367 		case MAN_RB:
368 			fp = i % 2 ? TAG_B : TAG_MAX;
369 			break;
370 		default:
371 			abort();
372 		}
373 
374 		if (i)
375 			h->flags |= HTML_NOSPACE;
376 
377 		if (fp != TAG_MAX)
378 			t = print_otag(h, fp, "");
379 
380 		print_text(h, nn->string);
381 
382 		if (fp != TAG_MAX)
383 			print_tagq(h, t);
384 	}
385 	return 0;
386 }
387 
388 static int
389 man_SM_pre(MAN_ARGS)
390 {
391 	print_otag(h, TAG_SMALL, "");
392 	if (n->tok == MAN_SB)
393 		print_otag(h, TAG_B, "");
394 	return 1;
395 }
396 
397 static int
398 man_PP_pre(MAN_ARGS)
399 {
400 	switch (n->type) {
401 	case ROFFT_BLOCK:
402 		html_close_paragraph(h);
403 		break;
404 	case ROFFT_HEAD:
405 		return 0;
406 	case ROFFT_BODY:
407 		if (n->child != NULL &&
408 		    (n->child->flags & NODE_NOFILL) == 0)
409 			print_otag(h, TAG_P, "c",
410 			    n->tok == MAN_PP ? "Pp" : "Pp HP");
411 		break;
412 	default:
413 		abort();
414 	}
415 	return 1;
416 }
417 
418 static char
419 list_continues(const struct roff_node *n1, const struct roff_node *n2)
420 {
421 	const char *s1, *s2;
422 	char c1, c2;
423 
424 	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
425 	    n2 == NULL || n2->type != ROFFT_BLOCK)
426 		return '\0';
427 	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
428 	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
429 		return ' ';
430 	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
431 		return '\0';
432 	n1 = n1->head->child;
433 	n2 = n2->head->child;
434 	s1 = n1 == NULL ? "" : n1->string;
435 	s2 = n2 == NULL ? "" : n2->string;
436 	c1 = strcmp(s1, "*") == 0 ? '*' :
437 	     strcmp(s1, "\\-") == 0 ? '-' :
438 	     strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
439 	c2 = strcmp(s2, "*") == 0 ? '*' :
440 	     strcmp(s2, "\\-") == 0 ? '-' :
441 	     strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
442 	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
443 }
444 
445 static int
446 man_IP_pre(MAN_ARGS)
447 {
448 	const struct roff_node	*nn;
449 	const char		*list_class;
450 	enum htmltag		 list_elem, body_elem;
451 	char			 list_type;
452 
453 	nn = n->type == ROFFT_BLOCK ? n : n->parent;
454 	if ((list_type = list_continues(nn->prev, nn)) == '\0') {
455 		/* Start a new list. */
456 		if ((list_type = list_continues(nn, nn->next)) == '\0')
457 			list_type = ' ';
458 		switch (list_type) {
459 		case ' ':
460 			list_class = "Bl-tag";
461 			list_elem = TAG_DL;
462 			break;
463 		case '*':
464 			list_class = "Bl-bullet";
465 			list_elem = TAG_UL;
466 			break;
467 		case '-':
468 			list_class = "Bl-dash";
469 			list_elem = TAG_UL;
470 			break;
471 		default:
472 			abort();
473 		}
474 	} else {
475 		/* Continue a list that was started earlier. */
476 		list_class = NULL;
477 		list_elem = TAG_MAX;
478 	}
479 	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
480 
481 	switch (n->type) {
482 	case ROFFT_BLOCK:
483 		html_close_paragraph(h);
484 		if (list_elem != TAG_MAX)
485 			print_otag(h, list_elem, "c", list_class);
486 		return 1;
487 	case ROFFT_HEAD:
488 		if (body_elem == TAG_LI)
489 			return 0;
490 		print_otag(h, TAG_DT, "");
491 		break;
492 	case ROFFT_BODY:
493 		print_otag(h, body_elem, "");
494 		return 1;
495 	default:
496 		abort();
497 	}
498 
499 	switch(n->tok) {
500 	case MAN_IP:  /* Only print the first header element. */
501 		if (n->child != NULL)
502 			print_man_node(man, n->child, h);
503 		break;
504 	case MAN_TP:  /* Only print next-line header elements. */
505 	case MAN_TQ:
506 		nn = n->child;
507 		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
508 			nn = nn->next;
509 		while (nn != NULL) {
510 			print_man_node(man, nn, h);
511 			nn = nn->next;
512 		}
513 		break;
514 	default:
515 		abort();
516 	}
517 	return 0;
518 }
519 
520 static int
521 man_OP_pre(MAN_ARGS)
522 {
523 	struct tag	*tt;
524 
525 	print_text(h, "[");
526 	h->flags |= HTML_NOSPACE;
527 	tt = print_otag(h, TAG_SPAN, "c", "Op");
528 
529 	if ((n = n->child) != NULL) {
530 		print_otag(h, TAG_B, "");
531 		print_text(h, n->string);
532 	}
533 
534 	print_stagq(h, tt);
535 
536 	if (n != NULL && n->next != NULL) {
537 		print_otag(h, TAG_I, "");
538 		print_text(h, n->next->string);
539 	}
540 
541 	print_stagq(h, tt);
542 	h->flags |= HTML_NOSPACE;
543 	print_text(h, "]");
544 	return 0;
545 }
546 
547 static int
548 man_B_pre(MAN_ARGS)
549 {
550 	print_otag(h, TAG_B, "");
551 	return 1;
552 }
553 
554 static int
555 man_I_pre(MAN_ARGS)
556 {
557 	print_otag(h, TAG_I, "");
558 	return 1;
559 }
560 
561 static int
562 man_in_pre(MAN_ARGS)
563 {
564 	print_otag(h, TAG_BR, "");
565 	return 0;
566 }
567 
568 static int
569 man_ign_pre(MAN_ARGS)
570 {
571 	return 0;
572 }
573 
574 static int
575 man_RS_pre(MAN_ARGS)
576 {
577 	switch (n->type) {
578 	case ROFFT_BLOCK:
579 		html_close_paragraph(h);
580 		break;
581 	case ROFFT_HEAD:
582 		return 0;
583 	case ROFFT_BODY:
584 		print_otag(h, TAG_DIV, "c", "Bd-indent");
585 		break;
586 	default:
587 		abort();
588 	}
589 	return 1;
590 }
591 
592 static int
593 man_SY_pre(MAN_ARGS)
594 {
595 	switch (n->type) {
596 	case ROFFT_BLOCK:
597 		html_close_paragraph(h);
598 		print_otag(h, TAG_TABLE, "c", "Nm");
599 		print_otag(h, TAG_TR, "");
600 		break;
601 	case ROFFT_HEAD:
602 		print_otag(h, TAG_TD, "");
603 		print_otag(h, TAG_CODE, "c", "Nm");
604 		break;
605 	case ROFFT_BODY:
606 		print_otag(h, TAG_TD, "");
607 		break;
608 	default:
609 		abort();
610 	}
611 	return 1;
612 }
613 
614 static int
615 man_UR_pre(MAN_ARGS)
616 {
617 	char *cp;
618 
619 	n = n->child;
620 	assert(n->type == ROFFT_HEAD);
621 	if (n->child != NULL) {
622 		assert(n->child->type == ROFFT_TEXT);
623 		if (n->tok == MAN_MT) {
624 			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
625 			print_otag(h, TAG_A, "ch", "Mt", cp);
626 			free(cp);
627 		} else
628 			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
629 	}
630 
631 	assert(n->next->type == ROFFT_BODY);
632 	if (n->next->child != NULL)
633 		n = n->next;
634 
635 	print_man_nodelist(man, n->child, h);
636 	return 0;
637 }
638 
639 static int
640 man_abort_pre(MAN_ARGS)
641 {
642 	abort();
643 }
644