xref: /illumos-gate/usr/src/cmd/mandoc/man.c (revision 260e9a87725c090ba5835b1f9f0b62fa2f96036f)
1 /*	$Id: man.c,v 1.149 2015/01/30 21:28:46 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 #include "man.h"
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "libman.h"
34 #include "libmandoc.h"
35 
36 const	char *const __man_macronames[MAN_MAX] = {
37 	"br",		"TH",		"SH",		"SS",
38 	"TP",		"LP",		"PP",		"P",
39 	"IP",		"HP",		"SM",		"SB",
40 	"BI",		"IB",		"BR",		"RB",
41 	"R",		"B",		"I",		"IR",
42 	"RI",		"sp",		"nf",
43 	"fi",		"RE",		"RS",		"DT",
44 	"UC",		"PD",		"AT",		"in",
45 	"ft",		"OP",		"EX",		"EE",
46 	"UR",		"UE",		"ll"
47 	};
48 
49 const	char * const *man_macronames = __man_macronames;
50 
51 static	void		 man_alloc1(struct man *);
52 static	void		 man_breakscope(struct man *, enum mant);
53 static	void		 man_descope(struct man *, int, int);
54 static	void		 man_free1(struct man *);
55 static	struct man_node	*man_node_alloc(struct man *, int, int,
56 				enum man_type, enum mant);
57 static	void		 man_node_append(struct man *, struct man_node *);
58 static	void		 man_node_free(struct man_node *);
59 static	void		 man_node_unlink(struct man *,
60 				struct man_node *);
61 static	int		 man_ptext(struct man *, int, char *, int);
62 static	int		 man_pmacro(struct man *, int, char *, int);
63 
64 
65 const struct man_node *
66 man_node(const struct man *man)
67 {
68 
69 	return(man->first);
70 }
71 
72 const struct man_meta *
73 man_meta(const struct man *man)
74 {
75 
76 	return(&man->meta);
77 }
78 
79 void
80 man_reset(struct man *man)
81 {
82 
83 	man_free1(man);
84 	man_alloc1(man);
85 }
86 
87 void
88 man_free(struct man *man)
89 {
90 
91 	man_free1(man);
92 	free(man);
93 }
94 
95 struct man *
96 man_alloc(struct roff *roff, struct mparse *parse,
97 	const char *defos, int quick)
98 {
99 	struct man	*p;
100 
101 	p = mandoc_calloc(1, sizeof(struct man));
102 
103 	man_hash_init();
104 	p->parse = parse;
105 	p->defos = defos;
106 	p->quick = quick;
107 	p->roff = roff;
108 
109 	man_alloc1(p);
110 	return(p);
111 }
112 
113 void
114 man_endparse(struct man *man)
115 {
116 
117 	man_macroend(man);
118 }
119 
120 int
121 man_parseln(struct man *man, int ln, char *buf, int offs)
122 {
123 
124 	if (man->last->type != MAN_EQN || ln > man->last->line)
125 		man->flags |= MAN_NEWLINE;
126 
127 	return (roff_getcontrol(man->roff, buf, &offs) ?
128 	    man_pmacro(man, ln, buf, offs) :
129 	    man_ptext(man, ln, buf, offs));
130 }
131 
132 static void
133 man_free1(struct man *man)
134 {
135 
136 	if (man->first)
137 		man_node_delete(man, man->first);
138 	free(man->meta.title);
139 	free(man->meta.source);
140 	free(man->meta.date);
141 	free(man->meta.vol);
142 	free(man->meta.msec);
143 }
144 
145 static void
146 man_alloc1(struct man *man)
147 {
148 
149 	memset(&man->meta, 0, sizeof(struct man_meta));
150 	man->flags = 0;
151 	man->last = mandoc_calloc(1, sizeof(struct man_node));
152 	man->first = man->last;
153 	man->last->type = MAN_ROOT;
154 	man->last->tok = MAN_MAX;
155 	man->next = MAN_NEXT_CHILD;
156 }
157 
158 
159 static void
160 man_node_append(struct man *man, struct man_node *p)
161 {
162 
163 	assert(man->last);
164 	assert(man->first);
165 	assert(p->type != MAN_ROOT);
166 
167 	switch (man->next) {
168 	case MAN_NEXT_SIBLING:
169 		man->last->next = p;
170 		p->prev = man->last;
171 		p->parent = man->last->parent;
172 		break;
173 	case MAN_NEXT_CHILD:
174 		man->last->child = p;
175 		p->parent = man->last;
176 		break;
177 	default:
178 		abort();
179 		/* NOTREACHED */
180 	}
181 
182 	assert(p->parent);
183 	p->parent->nchild++;
184 
185 	switch (p->type) {
186 	case MAN_BLOCK:
187 		if (p->tok == MAN_SH || p->tok == MAN_SS)
188 			man->flags &= ~MAN_LITERAL;
189 		break;
190 	case MAN_HEAD:
191 		assert(p->parent->type == MAN_BLOCK);
192 		p->parent->head = p;
193 		break;
194 	case MAN_BODY:
195 		assert(p->parent->type == MAN_BLOCK);
196 		p->parent->body = p;
197 		break;
198 	default:
199 		break;
200 	}
201 
202 	man->last = p;
203 
204 	switch (p->type) {
205 	case MAN_TBL:
206 		/* FALLTHROUGH */
207 	case MAN_TEXT:
208 		man_valid_post(man);
209 		break;
210 	default:
211 		break;
212 	}
213 }
214 
215 static struct man_node *
216 man_node_alloc(struct man *man, int line, int pos,
217 		enum man_type type, enum mant tok)
218 {
219 	struct man_node *p;
220 
221 	p = mandoc_calloc(1, sizeof(struct man_node));
222 	p->line = line;
223 	p->pos = pos;
224 	p->type = type;
225 	p->tok = tok;
226 
227 	if (man->flags & MAN_NEWLINE)
228 		p->flags |= MAN_LINE;
229 	man->flags &= ~MAN_NEWLINE;
230 	return(p);
231 }
232 
233 void
234 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
235 {
236 	struct man_node *p;
237 
238 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
239 	man_node_append(man, p);
240 	man->next = MAN_NEXT_CHILD;
241 }
242 
243 void
244 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
245 {
246 	struct man_node *p;
247 
248 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
249 	man_node_append(man, p);
250 	man->next = MAN_NEXT_CHILD;
251 }
252 
253 void
254 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
255 {
256 	struct man_node *p;
257 
258 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
259 	man_node_append(man, p);
260 	man->next = MAN_NEXT_CHILD;
261 }
262 
263 void
264 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
265 {
266 	struct man_node *p;
267 
268 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
269 	man_node_append(man, p);
270 	man->next = MAN_NEXT_CHILD;
271 }
272 
273 void
274 man_word_alloc(struct man *man, int line, int pos, const char *word)
275 {
276 	struct man_node	*n;
277 
278 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
279 	n->string = roff_strdup(man->roff, word);
280 	man_node_append(man, n);
281 	man->next = MAN_NEXT_SIBLING;
282 }
283 
284 void
285 man_word_append(struct man *man, const char *word)
286 {
287 	struct man_node	*n;
288 	char		*addstr, *newstr;
289 
290 	n = man->last;
291 	addstr = roff_strdup(man->roff, word);
292 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
293 	free(addstr);
294 	free(n->string);
295 	n->string = newstr;
296 	man->next = MAN_NEXT_SIBLING;
297 }
298 
299 /*
300  * Free all of the resources held by a node.  This does NOT unlink a
301  * node from its context; for that, see man_node_unlink().
302  */
303 static void
304 man_node_free(struct man_node *p)
305 {
306 
307 	free(p->string);
308 	free(p);
309 }
310 
311 void
312 man_node_delete(struct man *man, struct man_node *p)
313 {
314 
315 	while (p->child)
316 		man_node_delete(man, p->child);
317 
318 	man_node_unlink(man, p);
319 	man_node_free(p);
320 }
321 
322 void
323 man_addeqn(struct man *man, const struct eqn *ep)
324 {
325 	struct man_node	*n;
326 
327 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
328 	n->eqn = ep;
329 	if (ep->ln > man->last->line)
330 		n->flags |= MAN_LINE;
331 	man_node_append(man, n);
332 	man->next = MAN_NEXT_SIBLING;
333 	man_descope(man, ep->ln, ep->pos);
334 }
335 
336 void
337 man_addspan(struct man *man, const struct tbl_span *sp)
338 {
339 	struct man_node	*n;
340 
341 	man_breakscope(man, MAN_MAX);
342 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
343 	n->span = sp;
344 	man_node_append(man, n);
345 	man->next = MAN_NEXT_SIBLING;
346 	man_descope(man, sp->line, 0);
347 }
348 
349 static void
350 man_descope(struct man *man, int line, int offs)
351 {
352 	/*
353 	 * Co-ordinate what happens with having a next-line scope open:
354 	 * first close out the element scope (if applicable), then close
355 	 * out the block scope (also if applicable).
356 	 */
357 
358 	if (man->flags & MAN_ELINE) {
359 		man->flags &= ~MAN_ELINE;
360 		man_unscope(man, man->last->parent);
361 	}
362 	if ( ! (man->flags & MAN_BLINE))
363 		return;
364 	man->flags &= ~MAN_BLINE;
365 	man_unscope(man, man->last->parent);
366 	man_body_alloc(man, line, offs, man->last->tok);
367 }
368 
369 static int
370 man_ptext(struct man *man, int line, char *buf, int offs)
371 {
372 	int		 i;
373 
374 	/* Literal free-form text whitespace is preserved. */
375 
376 	if (man->flags & MAN_LITERAL) {
377 		man_word_alloc(man, line, offs, buf + offs);
378 		man_descope(man, line, offs);
379 		return(1);
380 	}
381 
382 	for (i = offs; buf[i] == ' '; i++)
383 		/* Skip leading whitespace. */ ;
384 
385 	/*
386 	 * Blank lines are ignored right after headings
387 	 * but add a single vertical space elsewhere.
388 	 */
389 
390 	if (buf[i] == '\0') {
391 		/* Allocate a blank entry. */
392 		if (man->last->tok != MAN_SH &&
393 		    man->last->tok != MAN_SS) {
394 			man_elem_alloc(man, line, offs, MAN_sp);
395 			man->next = MAN_NEXT_SIBLING;
396 		}
397 		return(1);
398 	}
399 
400 	/*
401 	 * Warn if the last un-escaped character is whitespace. Then
402 	 * strip away the remaining spaces (tabs stay!).
403 	 */
404 
405 	i = (int)strlen(buf);
406 	assert(i);
407 
408 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
409 		if (i > 1 && '\\' != buf[i - 2])
410 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
411 			    line, i - 1, NULL);
412 
413 		for (--i; i && ' ' == buf[i]; i--)
414 			/* Spin back to non-space. */ ;
415 
416 		/* Jump ahead of escaped whitespace. */
417 		i += '\\' == buf[i] ? 2 : 1;
418 
419 		buf[i] = '\0';
420 	}
421 	man_word_alloc(man, line, offs, buf + offs);
422 
423 	/*
424 	 * End-of-sentence check.  If the last character is an unescaped
425 	 * EOS character, then flag the node as being the end of a
426 	 * sentence.  The front-end will know how to interpret this.
427 	 */
428 
429 	assert(i);
430 	if (mandoc_eos(buf, (size_t)i))
431 		man->last->flags |= MAN_EOS;
432 
433 	man_descope(man, line, offs);
434 	return(1);
435 }
436 
437 static int
438 man_pmacro(struct man *man, int ln, char *buf, int offs)
439 {
440 	struct man_node	*n;
441 	const char	*cp;
442 	enum mant	 tok;
443 	int		 i, ppos;
444 	int		 bline;
445 	char		 mac[5];
446 
447 	ppos = offs;
448 
449 	/*
450 	 * Copy the first word into a nil-terminated buffer.
451 	 * Stop when a space, tab, escape, or eoln is encountered.
452 	 */
453 
454 	i = 0;
455 	while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
456 		mac[i++] = buf[offs++];
457 
458 	mac[i] = '\0';
459 
460 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
461 
462 	if (tok == MAN_MAX) {
463 		mandoc_msg(MANDOCERR_MACRO, man->parse,
464 		    ln, ppos, buf + ppos - 1);
465 		return(1);
466 	}
467 
468 	/* Skip a leading escape sequence or tab. */
469 
470 	switch (buf[offs]) {
471 	case '\\':
472 		cp = buf + offs + 1;
473 		mandoc_escape(&cp, NULL, NULL);
474 		offs = cp - buf;
475 		break;
476 	case '\t':
477 		offs++;
478 		break;
479 	default:
480 		break;
481 	}
482 
483 	/* Jump to the next non-whitespace word. */
484 
485 	while (buf[offs] && buf[offs] == ' ')
486 		offs++;
487 
488 	/*
489 	 * Trailing whitespace.  Note that tabs are allowed to be passed
490 	 * into the parser as "text", so we only warn about spaces here.
491 	 */
492 
493 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
494 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
495 		    ln, offs - 1, NULL);
496 
497 	/*
498 	 * Some macros break next-line scopes; otherwise, remember
499 	 * whether we are in next-line scope for a block head.
500 	 */
501 
502 	man_breakscope(man, tok);
503 	bline = man->flags & MAN_BLINE;
504 
505 	/* Call to handler... */
506 
507 	assert(man_macros[tok].fp);
508 	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
509 
510 	/* In quick mode (for mandocdb), abort after the NAME section. */
511 
512 	if (man->quick && tok == MAN_SH) {
513 		n = man->last;
514 		if (n->type == MAN_BODY &&
515 		    strcmp(n->prev->child->string, "NAME"))
516 			return(2);
517 	}
518 
519 	/*
520 	 * If we are in a next-line scope for a block head,
521 	 * close it out now and switch to the body,
522 	 * unless the next-line scope is allowed to continue.
523 	 */
524 
525 	if ( ! bline || man->flags & MAN_ELINE ||
526 	    man_macros[tok].flags & MAN_NSCOPED)
527 		return(1);
528 
529 	assert(man->flags & MAN_BLINE);
530 	man->flags &= ~MAN_BLINE;
531 
532 	man_unscope(man, man->last->parent);
533 	man_body_alloc(man, ln, ppos, man->last->tok);
534 	return(1);
535 }
536 
537 void
538 man_breakscope(struct man *man, enum mant tok)
539 {
540 	struct man_node	*n;
541 
542 	/*
543 	 * An element next line scope is open,
544 	 * and the new macro is not allowed inside elements.
545 	 * Delete the element that is being broken.
546 	 */
547 
548 	if (man->flags & MAN_ELINE && (tok == MAN_MAX ||
549 	    ! (man_macros[tok].flags & MAN_NSCOPED))) {
550 		n = man->last;
551 		assert(n->type != MAN_TEXT);
552 		if (man_macros[n->tok].flags & MAN_NSCOPED)
553 			n = n->parent;
554 
555 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
556 		    n->line, n->pos, "%s breaks %s",
557 		    tok == MAN_MAX ? "TS" : man_macronames[tok],
558 		    man_macronames[n->tok]);
559 
560 		man_node_delete(man, n);
561 		man->flags &= ~MAN_ELINE;
562 	}
563 
564 	/*
565 	 * A block header next line scope is open,
566 	 * and the new macro is not allowed inside block headers.
567 	 * Delete the block that is being broken.
568 	 */
569 
570 	if (man->flags & MAN_BLINE && (tok == MAN_MAX ||
571 	    man_macros[tok].flags & MAN_BSCOPE)) {
572 		n = man->last;
573 		if (n->type == MAN_TEXT)
574 			n = n->parent;
575 		if ( ! (man_macros[n->tok].flags & MAN_BSCOPE))
576 			n = n->parent;
577 
578 		assert(n->type == MAN_HEAD);
579 		n = n->parent;
580 		assert(n->type == MAN_BLOCK);
581 		assert(man_macros[n->tok].flags & MAN_SCOPED);
582 
583 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
584 		    n->line, n->pos, "%s breaks %s",
585 		    tok == MAN_MAX ? "TS" : man_macronames[tok],
586 		    man_macronames[n->tok]);
587 
588 		man_node_delete(man, n);
589 		man->flags &= ~MAN_BLINE;
590 	}
591 }
592 
593 /*
594  * Unlink a node from its context.  If "man" is provided, the last parse
595  * point will also be adjusted accordingly.
596  */
597 static void
598 man_node_unlink(struct man *man, struct man_node *n)
599 {
600 
601 	/* Adjust siblings. */
602 
603 	if (n->prev)
604 		n->prev->next = n->next;
605 	if (n->next)
606 		n->next->prev = n->prev;
607 
608 	/* Adjust parent. */
609 
610 	if (n->parent) {
611 		n->parent->nchild--;
612 		if (n->parent->child == n)
613 			n->parent->child = n->prev ? n->prev : n->next;
614 	}
615 
616 	/* Adjust parse point, if applicable. */
617 
618 	if (man && man->last == n) {
619 		/*XXX: this can occur when bailing from validation. */
620 		/*assert(NULL == n->next);*/
621 		if (n->prev) {
622 			man->last = n->prev;
623 			man->next = MAN_NEXT_SIBLING;
624 		} else {
625 			man->last = n->parent;
626 			man->next = MAN_NEXT_CHILD;
627 		}
628 	}
629 
630 	if (man && man->first == n)
631 		man->first = NULL;
632 }
633 
634 const struct mparse *
635 man_mparse(const struct man *man)
636 {
637 
638 	assert(man && man->parse);
639 	return(man->parse);
640 }
641 
642 void
643 man_deroff(char **dest, const struct man_node *n)
644 {
645 	char	*cp;
646 	size_t	 sz;
647 
648 	if (n->type != MAN_TEXT) {
649 		for (n = n->child; n; n = n->next)
650 			man_deroff(dest, n);
651 		return;
652 	}
653 
654 	/* Skip leading whitespace and escape sequences. */
655 
656 	cp = n->string;
657 	while ('\0' != *cp) {
658 		if ('\\' == *cp) {
659 			cp++;
660 			mandoc_escape((const char **)&cp, NULL, NULL);
661 		} else if (isspace((unsigned char)*cp))
662 			cp++;
663 		else
664 			break;
665 	}
666 
667 	/* Skip trailing whitespace. */
668 
669 	for (sz = strlen(cp); sz; sz--)
670 		if (0 == isspace((unsigned char)cp[sz-1]))
671 			break;
672 
673 	/* Skip empty strings. */
674 
675 	if (0 == sz)
676 		return;
677 
678 	if (NULL == *dest) {
679 		*dest = mandoc_strndup(cp, sz);
680 		return;
681 	}
682 
683 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
684 	free(*dest);
685 	*dest = cp;
686 }
687