xref: /freebsd/contrib/mandoc/man_html.c (revision 716fd348e01c5f2ba125f878a634a753436c2994)
1 /* $Id: man_html.c,v 1.179 2020/10/16 17:22:43 schwarze Exp $ */
2 /*
3  * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * HTML formatter for man(7) used by mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37 
38 #define	MAN_ARGS	  const struct roff_meta *man, \
39 			  struct roff_node *n, \
40 			  struct html *h
41 
42 struct	man_html_act {
43 	int		(*pre)(MAN_ARGS);
44 	int		(*post)(MAN_ARGS);
45 };
46 
47 static	void		  print_man_head(const struct roff_meta *,
48 				struct html *);
49 static	void		  print_man_nodelist(MAN_ARGS);
50 static	void		  print_man_node(MAN_ARGS);
51 static	char		  list_continues(const struct roff_node *,
52 				const struct roff_node *);
53 static	int		  man_B_pre(MAN_ARGS);
54 static	int		  man_IP_pre(MAN_ARGS);
55 static	int		  man_I_pre(MAN_ARGS);
56 static	int		  man_OP_pre(MAN_ARGS);
57 static	int		  man_PP_pre(MAN_ARGS);
58 static	int		  man_RS_pre(MAN_ARGS);
59 static	int		  man_SH_pre(MAN_ARGS);
60 static	int		  man_SM_pre(MAN_ARGS);
61 static	int		  man_SY_pre(MAN_ARGS);
62 static	int		  man_UR_pre(MAN_ARGS);
63 static	int		  man_abort_pre(MAN_ARGS);
64 static	int		  man_alt_pre(MAN_ARGS);
65 static	int		  man_ign_pre(MAN_ARGS);
66 static	int		  man_in_pre(MAN_ARGS);
67 static	void		  man_root_post(const struct roff_meta *,
68 				struct html *);
69 static	void		  man_root_pre(const struct roff_meta *,
70 				struct html *);
71 
72 static	const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
73 	{ NULL, NULL }, /* TH */
74 	{ man_SH_pre, NULL }, /* SH */
75 	{ man_SH_pre, NULL }, /* SS */
76 	{ man_IP_pre, NULL }, /* TP */
77 	{ man_IP_pre, NULL }, /* TQ */
78 	{ man_abort_pre, NULL }, /* LP */
79 	{ man_PP_pre, NULL }, /* PP */
80 	{ man_abort_pre, NULL }, /* P */
81 	{ man_IP_pre, NULL }, /* IP */
82 	{ man_PP_pre, NULL }, /* HP */
83 	{ man_SM_pre, NULL }, /* SM */
84 	{ man_SM_pre, NULL }, /* SB */
85 	{ man_alt_pre, NULL }, /* BI */
86 	{ man_alt_pre, NULL }, /* IB */
87 	{ man_alt_pre, NULL }, /* BR */
88 	{ man_alt_pre, NULL }, /* RB */
89 	{ NULL, NULL }, /* R */
90 	{ man_B_pre, NULL }, /* B */
91 	{ man_I_pre, NULL }, /* I */
92 	{ man_alt_pre, NULL }, /* IR */
93 	{ man_alt_pre, NULL }, /* RI */
94 	{ NULL, NULL }, /* RE */
95 	{ man_RS_pre, NULL }, /* RS */
96 	{ man_ign_pre, NULL }, /* DT */
97 	{ man_ign_pre, NULL }, /* UC */
98 	{ man_ign_pre, NULL }, /* PD */
99 	{ man_ign_pre, NULL }, /* AT */
100 	{ man_in_pre, NULL }, /* in */
101 	{ man_SY_pre, NULL }, /* SY */
102 	{ NULL, NULL }, /* YS */
103 	{ man_OP_pre, NULL }, /* OP */
104 	{ NULL, NULL }, /* EX */
105 	{ NULL, NULL }, /* EE */
106 	{ man_UR_pre, NULL }, /* UR */
107 	{ NULL, NULL }, /* UE */
108 	{ man_UR_pre, NULL }, /* MT */
109 	{ NULL, NULL }, /* ME */
110 };
111 
112 
113 void
114 html_man(void *arg, const struct roff_meta *man)
115 {
116 	struct html		*h;
117 	struct roff_node	*n;
118 	struct tag		*t;
119 
120 	h = (struct html *)arg;
121 	n = man->first->child;
122 
123 	if ((h->oflags & HTML_FRAGMENT) == 0) {
124 		print_gen_decls(h);
125 		print_otag(h, TAG_HTML, "");
126 		if (n != NULL && n->type == ROFFT_COMMENT)
127 			print_gen_comment(h, n);
128 		t = print_otag(h, TAG_HEAD, "");
129 		print_man_head(man, h);
130 		print_tagq(h, t);
131 		print_otag(h, TAG_BODY, "");
132 	}
133 
134 	man_root_pre(man, h);
135 	t = print_otag(h, TAG_DIV, "c", "manual-text");
136 	print_man_nodelist(man, n, h);
137 	print_tagq(h, t);
138 	man_root_post(man, h);
139 	print_tagq(h, NULL);
140 }
141 
142 static void
143 print_man_head(const struct roff_meta *man, struct html *h)
144 {
145 	char	*cp;
146 
147 	print_gen_head(h);
148 	mandoc_asprintf(&cp, "%s(%s)", man->title, man->msec);
149 	print_otag(h, TAG_TITLE, "");
150 	print_text(h, cp);
151 	free(cp);
152 }
153 
154 static void
155 print_man_nodelist(MAN_ARGS)
156 {
157 	while (n != NULL) {
158 		print_man_node(man, n, h);
159 		n = n->next;
160 	}
161 }
162 
163 static void
164 print_man_node(MAN_ARGS)
165 {
166 	struct tag	*t;
167 	int		 child;
168 
169 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
170 		return;
171 
172 	if ((n->flags & NODE_NOFILL) == 0)
173 		html_fillmode(h, ROFF_fi);
174 	else if (html_fillmode(h, ROFF_nf) == ROFF_nf &&
175 	    n->tok != ROFF_fi && n->flags & NODE_LINE &&
176 	    (n->prev == NULL || n->prev->tok != MAN_YS))
177 		print_endline(h);
178 
179 	child = 1;
180 	switch (n->type) {
181 	case ROFFT_TEXT:
182 		if (*n->string == '\0') {
183 			print_endline(h);
184 			return;
185 		}
186 		if (*n->string == ' ' && n->flags & NODE_LINE &&
187 		    (h->flags & HTML_NONEWLINE) == 0)
188 			print_otag(h, TAG_BR, "");
189 		else if (n->flags & NODE_DELIMC)
190 			h->flags |= HTML_NOSPACE;
191 		t = h->tag;
192 		t->refcnt++;
193 		print_text(h, n->string);
194 		break;
195 	case ROFFT_EQN:
196 		t = h->tag;
197 		t->refcnt++;
198 		print_eqn(h, n->eqn);
199 		break;
200 	case ROFFT_TBL:
201 		/*
202 		 * This will take care of initialising all of the table
203 		 * state data for the first table, then tearing it down
204 		 * for the last one.
205 		 */
206 		print_tbl(h, n->span);
207 		return;
208 	default:
209 		/*
210 		 * Close out scope of font prior to opening a macro
211 		 * scope.
212 		 */
213 		if (h->metac != ESCAPE_FONTROMAN) {
214 			h->metal = h->metac;
215 			h->metac = ESCAPE_FONTROMAN;
216 		}
217 
218 		/*
219 		 * Close out the current table, if it's open, and unset
220 		 * the "meta" table state.  This will be reopened on the
221 		 * next table element.
222 		 */
223 		if (h->tblt != NULL)
224 			print_tblclose(h);
225 		t = h->tag;
226 		t->refcnt++;
227 		if (n->tok < ROFF_MAX) {
228 			roff_html_pre(h, n);
229 			t->refcnt--;
230 			print_stagq(h, t);
231 			return;
232 		}
233 		assert(n->tok >= MAN_TH && n->tok < MAN_MAX);
234 		if (man_html_acts[n->tok - MAN_TH].pre != NULL)
235 			child = (*man_html_acts[n->tok - MAN_TH].pre)(man,
236 			    n, h);
237 		break;
238 	}
239 
240 	if (child && n->child != NULL)
241 		print_man_nodelist(man, n->child, h);
242 
243 	/* This will automatically close out any font scope. */
244 	t->refcnt--;
245 	if (n->type == ROFFT_BLOCK &&
246 	    (n->tok == MAN_IP || n->tok == MAN_TP || n->tok == MAN_TQ)) {
247 		t = h->tag;
248 		while (t->tag != TAG_DL && t->tag != TAG_UL)
249 			t = t->next;
250 		/*
251 		 * Close the list if no further item of the same type
252 		 * follows; otherwise, close the item only.
253 		 */
254 		if (list_continues(n, roff_node_next(n)) == '\0') {
255 			print_tagq(h, t);
256 			t = NULL;
257 		}
258 	}
259 	if (t != NULL)
260 		print_stagq(h, t);
261 }
262 
263 static void
264 man_root_pre(const struct roff_meta *man, struct html *h)
265 {
266 	struct tag	*t, *tt;
267 	char		*title;
268 
269 	assert(man->title);
270 	assert(man->msec);
271 	mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
272 
273 	t = print_otag(h, TAG_TABLE, "c", "head");
274 	tt = print_otag(h, TAG_TR, "");
275 
276 	print_otag(h, TAG_TD, "c", "head-ltitle");
277 	print_text(h, title);
278 	print_stagq(h, tt);
279 
280 	print_otag(h, TAG_TD, "c", "head-vol");
281 	if (man->vol != NULL)
282 		print_text(h, man->vol);
283 	print_stagq(h, tt);
284 
285 	print_otag(h, TAG_TD, "c", "head-rtitle");
286 	print_text(h, title);
287 	print_tagq(h, t);
288 	free(title);
289 }
290 
291 static void
292 man_root_post(const struct roff_meta *man, struct html *h)
293 {
294 	struct tag	*t, *tt;
295 
296 	t = print_otag(h, TAG_TABLE, "c", "foot");
297 	tt = print_otag(h, TAG_TR, "");
298 
299 	print_otag(h, TAG_TD, "c", "foot-date");
300 	print_text(h, man->date);
301 	print_stagq(h, tt);
302 
303 	print_otag(h, TAG_TD, "c", "foot-os");
304 	if (man->os != NULL)
305 		print_text(h, man->os);
306 	print_tagq(h, t);
307 }
308 
309 static int
310 man_SH_pre(MAN_ARGS)
311 {
312 	const char	*class;
313 	enum htmltag	 tag;
314 
315 	if (n->tok == MAN_SH) {
316 		tag = TAG_H1;
317 		class = "Sh";
318 	} else {
319 		tag = TAG_H2;
320 		class = "Ss";
321 	}
322 	switch (n->type) {
323 	case ROFFT_BLOCK:
324 		html_close_paragraph(h);
325 		print_otag(h, TAG_SECTION, "c", class);
326 		break;
327 	case ROFFT_HEAD:
328 		print_otag_id(h, tag, class, n);
329 		break;
330 	case ROFFT_BODY:
331 		break;
332 	default:
333 		abort();
334 	}
335 	return 1;
336 }
337 
338 static int
339 man_alt_pre(MAN_ARGS)
340 {
341 	const struct roff_node	*nn;
342 	struct tag	*t;
343 	int		 i;
344 	enum htmltag	 fp;
345 
346 	for (i = 0, nn = n->child; nn != NULL; nn = nn->next, i++) {
347 		switch (n->tok) {
348 		case MAN_BI:
349 			fp = i % 2 ? TAG_I : TAG_B;
350 			break;
351 		case MAN_IB:
352 			fp = i % 2 ? TAG_B : TAG_I;
353 			break;
354 		case MAN_RI:
355 			fp = i % 2 ? TAG_I : TAG_MAX;
356 			break;
357 		case MAN_IR:
358 			fp = i % 2 ? TAG_MAX : TAG_I;
359 			break;
360 		case MAN_BR:
361 			fp = i % 2 ? TAG_MAX : TAG_B;
362 			break;
363 		case MAN_RB:
364 			fp = i % 2 ? TAG_B : TAG_MAX;
365 			break;
366 		default:
367 			abort();
368 		}
369 
370 		if (i)
371 			h->flags |= HTML_NOSPACE;
372 
373 		if (fp != TAG_MAX)
374 			t = print_otag(h, fp, "");
375 
376 		print_text(h, nn->string);
377 
378 		if (fp != TAG_MAX)
379 			print_tagq(h, t);
380 	}
381 	return 0;
382 }
383 
384 static int
385 man_SM_pre(MAN_ARGS)
386 {
387 	print_otag(h, TAG_SMALL, "");
388 	if (n->tok == MAN_SB)
389 		print_otag(h, TAG_B, "");
390 	return 1;
391 }
392 
393 static int
394 man_PP_pre(MAN_ARGS)
395 {
396 	switch (n->type) {
397 	case ROFFT_BLOCK:
398 		html_close_paragraph(h);
399 		break;
400 	case ROFFT_HEAD:
401 		return 0;
402 	case ROFFT_BODY:
403 		if (n->child != NULL &&
404 		    (n->child->flags & NODE_NOFILL) == 0)
405 			print_otag(h, TAG_P, "c",
406 			    n->tok == MAN_PP ? "Pp" : "Pp HP");
407 		break;
408 	default:
409 		abort();
410 	}
411 	return 1;
412 }
413 
414 static char
415 list_continues(const struct roff_node *n1, const struct roff_node *n2)
416 {
417 	const char *s1, *s2;
418 	char c1, c2;
419 
420 	if (n1 == NULL || n1->type != ROFFT_BLOCK ||
421 	    n2 == NULL || n2->type != ROFFT_BLOCK)
422 		return '\0';
423 	if ((n1->tok == MAN_TP || n1->tok == MAN_TQ) &&
424 	    (n2->tok == MAN_TP || n2->tok == MAN_TQ))
425 		return ' ';
426 	if (n1->tok != MAN_IP || n2->tok != MAN_IP)
427 		return '\0';
428 	n1 = n1->head->child;
429 	n2 = n2->head->child;
430 	s1 = n1 == NULL ? "" : n1->string;
431 	s2 = n2 == NULL ? "" : n2->string;
432 	c1 = strcmp(s1, "*") == 0 ? '*' :
433 	     strcmp(s1, "\\-") == 0 ? '-' :
434 	     strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
435 	c2 = strcmp(s2, "*") == 0 ? '*' :
436 	     strcmp(s2, "\\-") == 0 ? '-' :
437 	     strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
438 	return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
439 }
440 
441 static int
442 man_IP_pre(MAN_ARGS)
443 {
444 	struct roff_node	*nn;
445 	const char		*list_class;
446 	enum htmltag		 list_elem, body_elem;
447 	char			 list_type;
448 
449 	nn = n->type == ROFFT_BLOCK ? n : n->parent;
450 	list_type = list_continues(roff_node_prev(nn), nn);
451 	if (list_type == '\0') {
452 		/* Start a new list. */
453 		list_type = list_continues(nn, roff_node_next(nn));
454 		if (list_type == '\0')
455 			list_type = ' ';
456 		switch (list_type) {
457 		case ' ':
458 			list_class = "Bl-tag";
459 			list_elem = TAG_DL;
460 			break;
461 		case '*':
462 			list_class = "Bl-bullet";
463 			list_elem = TAG_UL;
464 			break;
465 		case '-':
466 			list_class = "Bl-dash";
467 			list_elem = TAG_UL;
468 			break;
469 		default:
470 			abort();
471 		}
472 	} else {
473 		/* Continue a list that was started earlier. */
474 		list_class = NULL;
475 		list_elem = TAG_MAX;
476 	}
477 	body_elem = list_type == ' ' ? TAG_DD : TAG_LI;
478 
479 	switch (n->type) {
480 	case ROFFT_BLOCK:
481 		html_close_paragraph(h);
482 		if (list_elem != TAG_MAX)
483 			print_otag(h, list_elem, "c", list_class);
484 		return 1;
485 	case ROFFT_HEAD:
486 		if (body_elem == TAG_LI)
487 			return 0;
488 		print_otag_id(h, TAG_DT, NULL, n);
489 		break;
490 	case ROFFT_BODY:
491 		print_otag(h, body_elem, "");
492 		return 1;
493 	default:
494 		abort();
495 	}
496 	switch(n->tok) {
497 	case MAN_IP:  /* Only print the first header element. */
498 		if (n->child != NULL)
499 			print_man_node(man, n->child, h);
500 		break;
501 	case MAN_TP:  /* Only print next-line header elements. */
502 	case MAN_TQ:
503 		nn = n->child;
504 		while (nn != NULL && (NODE_LINE & nn->flags) == 0)
505 			nn = nn->next;
506 		while (nn != NULL) {
507 			print_man_node(man, nn, h);
508 			nn = nn->next;
509 		}
510 		break;
511 	default:
512 		abort();
513 	}
514 	return 0;
515 }
516 
517 static int
518 man_OP_pre(MAN_ARGS)
519 {
520 	struct tag	*tt;
521 
522 	print_text(h, "[");
523 	h->flags |= HTML_NOSPACE;
524 	tt = print_otag(h, TAG_SPAN, "c", "Op");
525 
526 	if ((n = n->child) != NULL) {
527 		print_otag(h, TAG_B, "");
528 		print_text(h, n->string);
529 	}
530 
531 	print_stagq(h, tt);
532 
533 	if (n != NULL && n->next != NULL) {
534 		print_otag(h, TAG_I, "");
535 		print_text(h, n->next->string);
536 	}
537 
538 	print_stagq(h, tt);
539 	h->flags |= HTML_NOSPACE;
540 	print_text(h, "]");
541 	return 0;
542 }
543 
544 static int
545 man_B_pre(MAN_ARGS)
546 {
547 	print_otag(h, TAG_B, "");
548 	return 1;
549 }
550 
551 static int
552 man_I_pre(MAN_ARGS)
553 {
554 	print_otag(h, TAG_I, "");
555 	return 1;
556 }
557 
558 static int
559 man_in_pre(MAN_ARGS)
560 {
561 	print_otag(h, TAG_BR, "");
562 	return 0;
563 }
564 
565 static int
566 man_ign_pre(MAN_ARGS)
567 {
568 	return 0;
569 }
570 
571 static int
572 man_RS_pre(MAN_ARGS)
573 {
574 	switch (n->type) {
575 	case ROFFT_BLOCK:
576 		html_close_paragraph(h);
577 		break;
578 	case ROFFT_HEAD:
579 		return 0;
580 	case ROFFT_BODY:
581 		print_otag(h, TAG_DIV, "c", "Bd-indent");
582 		break;
583 	default:
584 		abort();
585 	}
586 	return 1;
587 }
588 
589 static int
590 man_SY_pre(MAN_ARGS)
591 {
592 	switch (n->type) {
593 	case ROFFT_BLOCK:
594 		html_close_paragraph(h);
595 		print_otag(h, TAG_TABLE, "c", "Nm");
596 		print_otag(h, TAG_TR, "");
597 		break;
598 	case ROFFT_HEAD:
599 		print_otag(h, TAG_TD, "");
600 		print_otag(h, TAG_CODE, "c", "Nm");
601 		break;
602 	case ROFFT_BODY:
603 		print_otag(h, TAG_TD, "");
604 		break;
605 	default:
606 		abort();
607 	}
608 	return 1;
609 }
610 
611 static int
612 man_UR_pre(MAN_ARGS)
613 {
614 	char *cp;
615 
616 	n = n->child;
617 	assert(n->type == ROFFT_HEAD);
618 	if (n->child != NULL) {
619 		assert(n->child->type == ROFFT_TEXT);
620 		if (n->tok == MAN_MT) {
621 			mandoc_asprintf(&cp, "mailto:%s", n->child->string);
622 			print_otag(h, TAG_A, "ch", "Mt", cp);
623 			free(cp);
624 		} else
625 			print_otag(h, TAG_A, "ch", "Lk", n->child->string);
626 	}
627 
628 	assert(n->next->type == ROFFT_BODY);
629 	if (n->next->child != NULL)
630 		n = n->next;
631 
632 	print_man_nodelist(man, n->child, h);
633 	return 0;
634 }
635 
636 static int
637 man_abort_pre(MAN_ARGS)
638 {
639 	abort();
640 }
641