xref: /freebsd/contrib/mandoc/man_html.c (revision 06410c1b51637e5e1f392d553b5008948af58014)
1 /* $Id: man_html.c,v 1.188 2025/06/26 17:06:34 schwarze Exp $ */
2 /*
3  * Copyright (c) 2013-2020,2022-2023,2025 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * HTML formatter for man(7) used by mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37 
38 #define	MAN_ARGS	  const struct roff_meta *man, \
39 			  struct roff_node *n, \
40 			  struct html *h
41 
42 struct	man_html_act {
43 	int		(*pre)(MAN_ARGS);
44 	int		(*post)(MAN_ARGS);
45 };
46 
47 static	void		  print_man_head(const struct roff_meta *,
48 				struct html *);
49 static	void		  print_man_nodelist(MAN_ARGS);
50 static	void		  print_man_node(MAN_ARGS);
51 static	char		  list_continues(const struct roff_node *,
52 				const struct roff_node *);
53 static	int		  man_B_pre(MAN_ARGS);
54 static	int		  man_IP_pre(MAN_ARGS);
55 static	int		  man_I_pre(MAN_ARGS);
56 static	int		  man_MR_pre(MAN_ARGS);
57 static	int		  man_OP_pre(MAN_ARGS);
58 static	int		  man_PP_pre(MAN_ARGS);
59 static	int		  man_RS_pre(MAN_ARGS);
60 static	int		  man_SH_pre(MAN_ARGS);
61 static	int		  man_SM_pre(MAN_ARGS);
62 static	int		  man_SY_pre(MAN_ARGS);
63 static	int		  man_UR_pre(MAN_ARGS);
64 static	int		  man_alt_pre(MAN_ARGS);
65 static	int		  man_ign_pre(MAN_ARGS);
66 static	int		  man_in_pre(MAN_ARGS);
67 static	void		  man_root_post(const struct roff_meta *,
68 				struct html *);
69 static	void		  man_root_pre(const struct roff_meta *,
70 				struct html *);
71 
72 static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
73 	{ NULL, NULL }, /* TH */
74 	{ man_SH_pre, NULL }, /* SH */
75 	{ man_SH_pre, NULL }, /* SS */
76 	{ man_IP_pre, NULL }, /* TP */
77 	{ man_IP_pre, NULL }, /* TQ */
78 	{ man_PP_pre, NULL }, /* LP */
79 	{ man_PP_pre, NULL }, /* PP */
80 	{ man_PP_pre, NULL }, /* P */
81 	{ man_IP_pre, NULL }, /* IP */
82 	{ man_PP_pre, NULL }, /* HP */
83 	{ man_SM_pre, NULL }, /* SM */
84 	{ man_SM_pre, NULL }, /* SB */
85 	{ man_alt_pre, NULL }, /* BI */
86 	{ man_alt_pre, NULL }, /* IB */
87 	{ man_alt_pre, NULL }, /* BR */
88 	{ man_alt_pre, NULL }, /* RB */
89 	{ NULL, NULL }, /* R */
90 	{ man_B_pre, NULL }, /* B */
91 	{ man_I_pre, NULL }, /* I */
92 	{ man_alt_pre, NULL }, /* IR */
93 	{ man_alt_pre, NULL }, /* RI */
94 	{ NULL, NULL }, /* RE */
95 	{ man_RS_pre, NULL }, /* RS */
96 	{ man_ign_pre, NULL }, /* DT */
97 	{ man_ign_pre, NULL }, /* UC */
98 	{ man_ign_pre, NULL }, /* PD */
99 	{ man_ign_pre, NULL }, /* AT */
100 	{ man_in_pre, NULL }, /* in */
101 	{ man_SY_pre, NULL }, /* SY */
102 	{ NULL, NULL }, /* YS */
103 	{ man_OP_pre, NULL }, /* OP */
104 	{ NULL, NULL }, /* EX */
105 	{ NULL, NULL }, /* EE */
106 	{ man_UR_pre, NULL }, /* UR */
107 	{ NULL, NULL }, /* UE */
108 	{ man_UR_pre, NULL }, /* MT */
109 	{ NULL, NULL }, /* ME */
110 	{ man_MR_pre, NULL }, /* MR */
111 };
112 
113 
114 void
115 html_man(void *arg, const struct roff_meta *man)
116 {
117 	struct html		*h;
118 	struct roff_node	*n;
119 	struct tag		*t;
120 
121 	h = (struct html *)arg;
122 	n = man->first->child;
123 
124 	if ((h->oflags & HTML_FRAGMENT) == 0) {
125 		print_gen_decls(h);
126 		print_otag(h, TAG_HTML, "");
127 		t = print_otag(h, TAG_HEAD, "");
128 		print_man_head(man, h);
129 		print_tagq(h, t);
130 		if (n != NULL && n->type == ROFFT_COMMENT)
131 			print_gen_comment(h, n);
132 		print_otag(h, TAG_BODY, "");
133 	}
134 
135 	man_root_pre(man, h);
136 	t = print_otag(h, TAG_MAIN, "c", "manual-text");
137 	print_man_nodelist(man, n, h);
138 	print_tagq(h, t);
139 	man_root_post(man, h);
140 	print_tagq(h, NULL);
141 }
142 
143 static void
144 print_man_head(const struct roff_meta *man, struct html *h)
145 {
146 	char	*cp;
147 
148 	print_gen_head(h);
149 	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
150 	print_otag(h, TAG_TITLE, "");
151 	print_text(h, cp);
152 	free(cp);
153 }
154 
155 static void
156 print_man_nodelist(MAN_ARGS)
157 {
158 	while (n != NULL) {
159 		print_man_node(man, n, h);
160 		n = n->next;
161 	}
162 }
163 
164 static void
165 print_man_node(MAN_ARGS)
166 {
167 	struct tag	*t;
168 	int		 child;
169 
170 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
171 		return;
172 
173 	if ((n->flags & NODE_NOFILL) == 0)
174 		html_fillmode(h, ROFF_fi);
175 	else if (html_fillmode(h, ROFF_nf) == ROFF_nf &&
176 	    n->tok != ROFF_fi && n->flags & NODE_LINE &&
177 	    (n->prev == NULL || n->prev->tok != MAN_YS))
178 		print_endline(h);
179 
180 	child = 1;
181 	switch (n->type) {
182 	case ROFFT_TEXT:
183 		if (*n->string == '\0') {
184 			print_endline(h);
185 			return;
186 		}
187 		if (*n->string == ' ' && n->flags & NODE_LINE &&
188 		    (h->flags & HTML_NONEWLINE) == 0)
189 			print_otag(h, TAG_BR, "");
190 		else if (n->flags & NODE_DELIMC)
191 			h->flags |= HTML_NOSPACE;
192 		t = h->tag;
193 		t->refcnt++;
194 		print_text(h, n->string);
195 		break;
196 	case ROFFT_EQN:
197 		t = h->tag;
198 		t->refcnt++;
199 		print_eqn(h, n->eqn);
200 		break;
201 	case ROFFT_TBL:
202 		/*
203 		 * This will take care of initialising all of the table
204 		 * state data for the first table, then tearing it down
205 		 * for the last one.
206 		 */
207 		print_tbl(h, n->span);
208 		return;
209 	default:
210 		/*
211 		 * Close out scope of font prior to opening a macro
212 		 * scope.
213 		 */
214 		if (h->metac != ESCAPE_FONTROMAN) {
215 			h->metal = h->metac;
216 			h->metac = ESCAPE_FONTROMAN;
217 		}
218 
219 		/*
220 		 * Close out the current table, if it's open, and unset
221 		 * the "meta" table state.  This will be reopened on the
222 		 * next table element.
223 		 */
224 		if (h->tblt != NULL)
225 			print_tblclose(h);
226 		t = h->tag;
227 		t->refcnt++;
228 		if (n->tok < ROFF_MAX) {
229 			roff_html_pre(h, n);
230 			t->refcnt--;
231 			print_stagq(h, t);
232 			return;
233 		}
234 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
235 		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
236 			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
237 			    n, h);
238 		break;
239 	}
240 
241 	if (child && n->child != NULL)
242 		print_man_nodelist(man, n->child, h);
243 
244 	/* This will automatically close out any font scope. */
245 	t->refcnt--;
246 	if (n->type == ROFFT_BLOCK &&
247 	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
248 		t = h->tag;
249 		while (t->tag != TAG_DL && t->tag != TAG_UL)
250 			t = t->next;
251 		/*
252 		 * Close the list if no further item of the same type
253 		 * follows; otherwise, close the item only.
254 		 */
255 		if (list_continues(n, roff_node_next(n)) == '\0') {
256 			print_tagq(h, t);
257 			t = NULL;
258 		}
259 	}
260 	if (t != NULL)
261 		print_stagq(h, t);
262 }
263 
264 static void
265 man_root_pre(const struct roff_meta *man, struct html *h)
266 {
267 	struct tag	*t;
268 	char		*title;
269 
270 	assert(man->title);
271 	assert(man->msec);
272 	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
273 
274 	t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader",
275 	    "aria-label", "Manual header line");
276 
277 	print_otag(h, TAG_SPAN, "c", "head-ltitle");
278 	print_text(h, title);
279 	print_stagq(h, t);
280 
281 	print_otag(h, TAG_SPAN, "c", "head-vol");
282 	if (man->vol != NULL)
283 		print_text(h, man->vol);
284 	print_stagq(h, t);
285 
286 	print_otag(h, TAG_SPAN, "c", "head-rtitle");
287 	print_text(h, title);
288 	print_tagq(h, t);
289 	free(title);
290 }
291 
292 static void
293 man_root_post(const struct roff_meta *man, struct html *h)
294 {
295 	struct tag	*t;
296 	char		*title;
297 
298 	assert(man->title != NULL);
299 	if (man->msec == NULL)
300 		title = mandoc_strdup(man->title);
301 	else
302 		mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
303 
304 	t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter",
305 	    "aria-label", "Manual footer line");
306 
307 	print_otag(h, TAG_SPAN, "c", "foot-left");
308 	if (man->os != NULL)
309 		print_text(h, man->os);
310 	print_stagq(h, t);
311 
312 	print_otag(h, TAG_SPAN, "c", "foot-date");
313 	print_text(h, man->date);
314 	print_stagq(h, t);
315 
316 	print_otag(h, TAG_SPAN, "c", "foot-right");
317 	print_text(h, title);
318 	print_tagq(h, t);
319 	free(title);
320 }
321 
322 static int
323 man_SH_pre(MAN_ARGS)
324 {
325 	const char	*class;
326 	enum htmltag	 tag;
327 
328 	if (n->tok == MAN_SH) {
329 		tag = TAG_H2;
330 		class = "Sh";
331 	} else {
332 		tag = TAG_H3;
333 		class = "Ss";
334 	}
335 	switch (n->type) {
336 	case ROFFT_BLOCK:
337 		html_close_paragraph(h);
338 		print_otag(h, TAG_SECTION, "c", class);
339 		break;
340 	case ROFFT_HEAD:
341 		print_otag_id(h, tag, class, n);
342 		break;
343 	case ROFFT_BODY:
344 		break;
345 	default:
346 		abort();
347 	}
348 	return 1;
349 }
350 
351 static int
352 man_alt_pre(MAN_ARGS)
353 {
354 	const struct roff_node	*nn;
355 	struct tag	*t;
356 	int		 i;
357 	enum htmltag	 fp;
358 
359 	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
360 		switch (n->tok) {
361 		case MAN_BI:
362 			fp = i % 2 ? TAG_I : TAG_B;
363 			break;
364 		case MAN_IB:
365 			fp = i % 2 ? TAG_B : TAG_I;
366 			break;
367 		case MAN_RI:
368 			fp = i % 2 ? TAG_I : TAG_MAX;
369 			break;
370 		case MAN_IR:
371 			fp = i % 2 ? TAG_MAX : TAG_I;
372 			break;
373 		case MAN_BR:
374 			fp = i % 2 ? TAG_MAX : TAG_B;
375 			break;
376 		case MAN_RB:
377 			fp = i % 2 ? TAG_B : TAG_MAX;
378 			break;
379 		default:
380 			abort();
381 		}
382 
383 		if (i)
384 			h->flags |= HTML_NOSPACE;
385 
386 		if (fp != TAG_MAX)
387 			t = print_otag(h, fp, "");
388 
389 		print_text(h, nn->string);
390 
391 		if (fp != TAG_MAX)
392 			print_tagq(h, t);
393 	}
394 	return 0;
395 }
396 
397 static int
398 man_SM_pre(MAN_ARGS)
399 {
400 	print_otag(h, TAG_SMALL, "");
401 	if (n->tok == MAN_SB)
402 		print_otag(h, TAG_B, "");
403 	return 1;
404 }
405 
406 static int
407 man_PP_pre(MAN_ARGS)
408 {
409 	switch (n->type) {
410 	case ROFFT_BLOCK:
411 		html_close_paragraph(h);
412 		break;
413 	case ROFFT_HEAD:
414 		return 0;
415 	case ROFFT_BODY:
416 		if (n->child != NULL &&
417 		    (n->child->flags & NODE_NOFILL) == 0)
418 			print_otag(h, TAG_P, "c",
419 			    n->tok == MAN_HP ? "Pp HP" : "Pp");
420 		break;
421 	default:
422 		abort();
423 	}
424 	return 1;
425 }
426 
427 static char
428 list_continues(const struct roff_node *n1, const struct roff_node *n2)
429 {
430 	const char *s1, *s2;
431 	char c1, c2;
432 
433 	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
434 	    n2 == NULL || n2->type != ROFFT_BLOCK)
435 		return '\0';
436 	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
437 	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
438 		return ' ';
439 	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
440 		return '\0';
441 	n1 = n1->head->child;
442 	n2 = n2->head->child;
443 	s1 = n1 == NULL ? "" : n1->string;
444 	s2 = n2 == NULL ? "" : n2->string;
445 	c1 = strcmp(s1, "*") == 0 ? '*' :
446 	     strcmp(s1, "\\-") == 0 ? '-' :
447 	     strcmp(s1, "\\(bu") == 0 ? 'b' :
448 	     strcmp(s1, "\\[bu]") == 0 ? 'b' : ' ';
449 	c2 = strcmp(s2, "*") == 0 ? '*' :
450 	     strcmp(s2, "\\-") == 0 ? '-' :
451 	     strcmp(s2, "\\(bu") == 0 ? 'b' :
452 	     strcmp(s2, "\\[bu]") == 0 ? 'b' : ' ';
453 	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
454 }
455 
456 static int
457 man_IP_pre(MAN_ARGS)
458 {
459 	struct roff_node	*nn;
460 	const char		*list_class;
461 	enum htmltag		 list_elem, body_elem;
462 	char			 list_type;
463 
464 	nn = n->type == ROFFT_BLOCK ? n : n->parent;
465 	list_type = list_continues(roff_node_prev(nn), nn);
466 	if (list_type == '\0') {
467 		/* Start a new list. */
468 		list_type = list_continues(nn, roff_node_next(nn));
469 		if (list_type == '\0')
470 			list_type = ' ';
471 		switch (list_type) {
472 		case ' ':
473 			list_class = "Bl-tag";
474 			list_elem = TAG_DL;
475 			break;
476 		case '*':
477 			list_class = "Bl-bullet";
478 			list_elem = TAG_UL;
479 			break;
480 		case '-':
481 			list_class = "Bl-dash";
482 			list_elem = TAG_UL;
483 			break;
484 		default:
485 			abort();
486 		}
487 	} else {
488 		/* Continue a list that was started earlier. */
489 		list_class = NULL;
490 		list_elem = TAG_MAX;
491 	}
492 	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
493 
494 	switch (n->type) {
495 	case ROFFT_BLOCK:
496 		html_close_paragraph(h);
497 		if (list_elem != TAG_MAX)
498 			print_otag(h, list_elem, "c", list_class);
499 		return 1;
500 	case ROFFT_HEAD:
501 		if (body_elem == TAG_LI)
502 			return 0;
503 		print_otag_id(h, TAG_DT, NULL, n);
504 		break;
505 	case ROFFT_BODY:
506 		print_otag(h, body_elem, "");
507 		return 1;
508 	default:
509 		abort();
510 	}
511 	switch(n->tok) {
512 	case MAN_IP:  /* Only print the first header element. */
513 		if (n->child != NULL)
514 			print_man_node(man, n->child, h);
515 		break;
516 	case MAN_TP:  /* Only print next-line header elements. */
517 	case MAN_TQ:
518 		nn = n->child;
519 		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
520 			nn = nn->next;
521 		while (nn != NULL) {
522 			print_man_node(man, nn, h);
523 			nn = nn->next;
524 		}
525 		break;
526 	default:
527 		abort();
528 	}
529 	return 0;
530 }
531 
532 static int
533 man_MR_pre(MAN_ARGS)
534 {
535 	struct tag	*t;
536 	const char	*name, *section, *suffix;
537 	char		*label;
538 
539 	html_setfont(h, ESCAPE_FONTROMAN);
540 	name = section = suffix = label = NULL;
541 	if (n->child != NULL) {
542 		name = n->child->string;
543 		if (n->child->next != NULL) {
544 			section = n->child->next->string;
545 			mandoc_asprintf(&label,
546 			    "%s, section %s", name, section);
547 			if (n->child->next->next != NULL)
548 				suffix = n->child->next->next->string;
549 		}
550 	}
551 
552 	if (name != NULL && section != NULL && h->base_man1 != NULL)
553 		t = print_otag(h, TAG_A, "chM?", "Xr",
554 		    name, section, "aria-label", label);
555 	else
556 		t = print_otag(h, TAG_A, "c?", "Xr", "aria-label", label);
557 
558 	free(label);
559 	if (name != NULL) {
560 		print_text(h, name);
561 		h->flags |= HTML_NOSPACE;
562 	}
563 	print_text(h, "(");
564 	h->flags |= HTML_NOSPACE;
565 	if (section != NULL) {
566 		print_text(h, section);
567 		h->flags |= HTML_NOSPACE;
568 	}
569 	print_text(h, ")");
570 	print_tagq(h, t);
571 	if (suffix != NULL) {
572 		h->flags |= HTML_NOSPACE;
573 		print_text(h, suffix);
574 	}
575 	return 0;
576 }
577 
578 static int
579 man_OP_pre(MAN_ARGS)
580 {
581 	struct tag	*tt;
582 
583 	print_text(h, "[");
584 	h->flags |= HTML_NOSPACE;
585 	tt = print_otag(h, TAG_SPAN, "c", "Op");
586 
587 	if ((n = n->child) != NULL) {
588 		print_otag(h, TAG_B, "");
589 		print_text(h, n->string);
590 	}
591 
592 	print_stagq(h, tt);
593 
594 	if (n != NULL && n->next != NULL) {
595 		print_otag(h, TAG_I, "");
596 		print_text(h, n->next->string);
597 	}
598 
599 	print_stagq(h, tt);
600 	h->flags |= HTML_NOSPACE;
601 	print_text(h, "]");
602 	return 0;
603 }
604 
605 static int
606 man_B_pre(MAN_ARGS)
607 {
608 	print_otag(h, TAG_B, "");
609 	return 1;
610 }
611 
612 static int
613 man_I_pre(MAN_ARGS)
614 {
615 	print_otag(h, TAG_I, "");
616 	return 1;
617 }
618 
619 static int
620 man_in_pre(MAN_ARGS)
621 {
622 	print_otag(h, TAG_BR, "");
623 	return 0;
624 }
625 
626 static int
627 man_ign_pre(MAN_ARGS)
628 {
629 	return 0;
630 }
631 
632 static int
633 man_RS_pre(MAN_ARGS)
634 {
635 	switch (n->type) {
636 	case ROFFT_BLOCK:
637 		html_close_paragraph(h);
638 		break;
639 	case ROFFT_HEAD:
640 		return 0;
641 	case ROFFT_BODY:
642 		print_otag(h, TAG_DIV, "c", "Bd-indent");
643 		break;
644 	default:
645 		abort();
646 	}
647 	return 1;
648 }
649 
650 static int
651 man_SY_pre(MAN_ARGS)
652 {
653 	switch (n->type) {
654 	case ROFFT_BLOCK:
655 		html_close_paragraph(h);
656 		print_otag(h, TAG_TABLE, "c", "Nm");
657 		print_otag(h, TAG_TR, "");
658 		break;
659 	case ROFFT_HEAD:
660 		print_otag(h, TAG_TD, "");
661 		print_otag(h, TAG_CODE, "c", "Nm");
662 		break;
663 	case ROFFT_BODY:
664 		print_otag(h, TAG_TD, "");
665 		break;
666 	default:
667 		abort();
668 	}
669 	return 1;
670 }
671 
672 static int
673 man_UR_pre(MAN_ARGS)
674 {
675 	char *cp;
676 
677 	n = n->child;
678 	assert(n->type == ROFFT_HEAD);
679 	if (n->child != NULL) {
680 		assert(n->child->type == ROFFT_TEXT);
681 		if (n->tok == MAN_MT) {
682 			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
683 			print_otag(h, TAG_A, "ch", "Mt", cp);
684 			free(cp);
685 		} else
686 			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
687 	}
688 
689 	assert(n->next->type == ROFFT_BODY);
690 	if (n->next->child != NULL)
691 		n = n->next;
692 
693 	print_man_nodelist(man, n->child, h);
694 	return 0;
695 }
696