xref: /illumos-gate/usr/src/cmd/mandoc/man.c (revision c160bf3613805cfb4a89a0433ae896d3594f551f)
1 /*	$Id: man.c,v 1.166 2015/10/22 21:54:23 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "libmandoc.h"
35 #include "roff_int.h"
36 #include "libman.h"
37 
38 const	char *const __man_macronames[MAN_MAX] = {
39 	"br",		"TH",		"SH",		"SS",
40 	"TP",		"LP",		"PP",		"P",
41 	"IP",		"HP",		"SM",		"SB",
42 	"BI",		"IB",		"BR",		"RB",
43 	"R",		"B",		"I",		"IR",
44 	"RI",		"sp",		"nf",
45 	"fi",		"RE",		"RS",		"DT",
46 	"UC",		"PD",		"AT",		"in",
47 	"ft",		"OP",		"EX",		"EE",
48 	"UR",		"UE",		"ll"
49 	};
50 
51 const	char * const *man_macronames = __man_macronames;
52 
53 static	void		 man_descope(struct roff_man *, int, int);
54 static	int		 man_ptext(struct roff_man *, int, char *, int);
55 static	int		 man_pmacro(struct roff_man *, int, char *, int);
56 
57 
58 int
59 man_parseln(struct roff_man *man, int ln, char *buf, int offs)
60 {
61 
62 	if (man->last->type != ROFFT_EQN || ln > man->last->line)
63 		man->flags |= MAN_NEWLINE;
64 
65 	return roff_getcontrol(man->roff, buf, &offs) ?
66 	    man_pmacro(man, ln, buf, offs) :
67 	    man_ptext(man, ln, buf, offs);
68 }
69 
70 static void
71 man_descope(struct roff_man *man, int line, int offs)
72 {
73 	/*
74 	 * Co-ordinate what happens with having a next-line scope open:
75 	 * first close out the element scope (if applicable), then close
76 	 * out the block scope (also if applicable).
77 	 */
78 
79 	if (man->flags & MAN_ELINE) {
80 		man->flags &= ~MAN_ELINE;
81 		man_unscope(man, man->last->parent);
82 	}
83 	if ( ! (man->flags & MAN_BLINE))
84 		return;
85 	man->flags &= ~MAN_BLINE;
86 	man_unscope(man, man->last->parent);
87 	roff_body_alloc(man, line, offs, man->last->tok);
88 }
89 
90 static int
91 man_ptext(struct roff_man *man, int line, char *buf, int offs)
92 {
93 	int		 i;
94 
95 	/* Literal free-form text whitespace is preserved. */
96 
97 	if (man->flags & MAN_LITERAL) {
98 		roff_word_alloc(man, line, offs, buf + offs);
99 		man_descope(man, line, offs);
100 		return 1;
101 	}
102 
103 	for (i = offs; buf[i] == ' '; i++)
104 		/* Skip leading whitespace. */ ;
105 
106 	/*
107 	 * Blank lines are ignored right after headings
108 	 * but add a single vertical space elsewhere.
109 	 */
110 
111 	if (buf[i] == '\0') {
112 		/* Allocate a blank entry. */
113 		if (man->last->tok != MAN_SH &&
114 		    man->last->tok != MAN_SS) {
115 			roff_elem_alloc(man, line, offs, MAN_sp);
116 			man->next = ROFF_NEXT_SIBLING;
117 		}
118 		return 1;
119 	}
120 
121 	/*
122 	 * Warn if the last un-escaped character is whitespace. Then
123 	 * strip away the remaining spaces (tabs stay!).
124 	 */
125 
126 	i = (int)strlen(buf);
127 	assert(i);
128 
129 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
130 		if (i > 1 && '\\' != buf[i - 2])
131 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
132 			    line, i - 1, NULL);
133 
134 		for (--i; i && ' ' == buf[i]; i--)
135 			/* Spin back to non-space. */ ;
136 
137 		/* Jump ahead of escaped whitespace. */
138 		i += '\\' == buf[i] ? 2 : 1;
139 
140 		buf[i] = '\0';
141 	}
142 	roff_word_alloc(man, line, offs, buf + offs);
143 
144 	/*
145 	 * End-of-sentence check.  If the last character is an unescaped
146 	 * EOS character, then flag the node as being the end of a
147 	 * sentence.  The front-end will know how to interpret this.
148 	 */
149 
150 	assert(i);
151 	if (mandoc_eos(buf, (size_t)i))
152 		man->last->flags |= MAN_EOS;
153 
154 	man_descope(man, line, offs);
155 	return 1;
156 }
157 
158 static int
159 man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
160 {
161 	struct roff_node *n;
162 	const char	*cp;
163 	int		 tok;
164 	int		 i, ppos;
165 	int		 bline;
166 	char		 mac[5];
167 
168 	ppos = offs;
169 
170 	/*
171 	 * Copy the first word into a nil-terminated buffer.
172 	 * Stop when a space, tab, escape, or eoln is encountered.
173 	 */
174 
175 	i = 0;
176 	while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
177 		mac[i++] = buf[offs++];
178 
179 	mac[i] = '\0';
180 
181 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE;
182 
183 	if (tok == TOKEN_NONE) {
184 		mandoc_msg(MANDOCERR_MACRO, man->parse,
185 		    ln, ppos, buf + ppos - 1);
186 		return 1;
187 	}
188 
189 	/* Skip a leading escape sequence or tab. */
190 
191 	switch (buf[offs]) {
192 	case '\\':
193 		cp = buf + offs + 1;
194 		mandoc_escape(&cp, NULL, NULL);
195 		offs = cp - buf;
196 		break;
197 	case '\t':
198 		offs++;
199 		break;
200 	default:
201 		break;
202 	}
203 
204 	/* Jump to the next non-whitespace word. */
205 
206 	while (buf[offs] && buf[offs] == ' ')
207 		offs++;
208 
209 	/*
210 	 * Trailing whitespace.  Note that tabs are allowed to be passed
211 	 * into the parser as "text", so we only warn about spaces here.
212 	 */
213 
214 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
215 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
216 		    ln, offs - 1, NULL);
217 
218 	/*
219 	 * Some macros break next-line scopes; otherwise, remember
220 	 * whether we are in next-line scope for a block head.
221 	 */
222 
223 	man_breakscope(man, tok);
224 	bline = man->flags & MAN_BLINE;
225 
226 	/* Call to handler... */
227 
228 	assert(man_macros[tok].fp);
229 	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
230 
231 	/* In quick mode (for mandocdb), abort after the NAME section. */
232 
233 	if (man->quick && tok == MAN_SH) {
234 		n = man->last;
235 		if (n->type == ROFFT_BODY &&
236 		    strcmp(n->prev->child->string, "NAME"))
237 			return 2;
238 	}
239 
240 	/*
241 	 * If we are in a next-line scope for a block head,
242 	 * close it out now and switch to the body,
243 	 * unless the next-line scope is allowed to continue.
244 	 */
245 
246 	if ( ! bline || man->flags & MAN_ELINE ||
247 	    man_macros[tok].flags & MAN_NSCOPED)
248 		return 1;
249 
250 	assert(man->flags & MAN_BLINE);
251 	man->flags &= ~MAN_BLINE;
252 
253 	man_unscope(man, man->last->parent);
254 	roff_body_alloc(man, ln, ppos, man->last->tok);
255 	return 1;
256 }
257 
258 void
259 man_breakscope(struct roff_man *man, int tok)
260 {
261 	struct roff_node *n;
262 
263 	/*
264 	 * An element next line scope is open,
265 	 * and the new macro is not allowed inside elements.
266 	 * Delete the element that is being broken.
267 	 */
268 
269 	if (man->flags & MAN_ELINE && (tok == TOKEN_NONE ||
270 	    ! (man_macros[tok].flags & MAN_NSCOPED))) {
271 		n = man->last;
272 		assert(n->type != ROFFT_TEXT);
273 		if (man_macros[n->tok].flags & MAN_NSCOPED)
274 			n = n->parent;
275 
276 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
277 		    n->line, n->pos, "%s breaks %s",
278 		    tok == TOKEN_NONE ? "TS" : man_macronames[tok],
279 		    man_macronames[n->tok]);
280 
281 		roff_node_delete(man, n);
282 		man->flags &= ~MAN_ELINE;
283 	}
284 
285 	/*
286 	 * Weird special case:
287 	 * Switching fill mode closes section headers.
288 	 */
289 
290 	if (man->flags & MAN_BLINE &&
291 	    (tok == MAN_nf || tok == MAN_fi) &&
292 	    (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) {
293 		n = man->last;
294 		man_unscope(man, n);
295 		roff_body_alloc(man, n->line, n->pos, n->tok);
296 		man->flags &= ~MAN_BLINE;
297 	}
298 
299 	/*
300 	 * A block header next line scope is open,
301 	 * and the new macro is not allowed inside block headers.
302 	 * Delete the block that is being broken.
303 	 */
304 
305 	if (man->flags & MAN_BLINE && (tok == TOKEN_NONE ||
306 	    man_macros[tok].flags & MAN_BSCOPE)) {
307 		n = man->last;
308 		if (n->type == ROFFT_TEXT)
309 			n = n->parent;
310 		if ( ! (man_macros[n->tok].flags & MAN_BSCOPE))
311 			n = n->parent;
312 
313 		assert(n->type == ROFFT_HEAD);
314 		n = n->parent;
315 		assert(n->type == ROFFT_BLOCK);
316 		assert(man_macros[n->tok].flags & MAN_SCOPED);
317 
318 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
319 		    n->line, n->pos, "%s breaks %s",
320 		    tok == TOKEN_NONE ? "TS" : man_macronames[tok],
321 		    man_macronames[n->tok]);
322 
323 		roff_node_delete(man, n);
324 		man->flags &= ~MAN_BLINE;
325 	}
326 }
327 
328 const struct mparse *
329 man_mparse(const struct roff_man *man)
330 {
331 
332 	assert(man && man->parse);
333 	return man->parse;
334 }
335 
336 void
337 man_state(struct roff_man *man, struct roff_node *n)
338 {
339 
340 	switch(n->tok) {
341 	case MAN_nf:
342 	case MAN_EX:
343 		if (man->flags & MAN_LITERAL && ! (n->flags & MAN_VALID))
344 			mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
345 			    n->line, n->pos, "nf");
346 		man->flags |= MAN_LITERAL;
347 		break;
348 	case MAN_fi:
349 	case MAN_EE:
350 		if ( ! (man->flags & MAN_LITERAL) &&
351 		     ! (n->flags & MAN_VALID))
352 			mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
353 			    n->line, n->pos, "fi");
354 		man->flags &= ~MAN_LITERAL;
355 		break;
356 	default:
357 		break;
358 	}
359 	man->last->flags |= MAN_VALID;
360 }
361 
362 void
363 man_validate(struct roff_man *man)
364 {
365 
366 	man->last = man->first;
367 	man_node_validate(man);
368 	man->flags &= ~MAN_LITERAL;
369 }
370