xref: /illumos-gate/usr/src/cmd/mandoc/man.c (revision 2a6e99a0f1f7d22c0396e8b2ce9b9babbd1056cf)
1 /*	$Id: man.c,v 1.176 2017/06/28 12:52:45 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdarg.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "man.h"
34 #include "libmandoc.h"
35 #include "roff_int.h"
36 #include "libman.h"
37 
38 static	void		 man_descope(struct roff_man *, int, int);
39 static	int		 man_ptext(struct roff_man *, int, char *, int);
40 static	int		 man_pmacro(struct roff_man *, int, char *, int);
41 
42 
43 int
44 man_parseln(struct roff_man *man, int ln, char *buf, int offs)
45 {
46 
47 	if (man->last->type != ROFFT_EQN || ln > man->last->line)
48 		man->flags |= MAN_NEWLINE;
49 
50 	return roff_getcontrol(man->roff, buf, &offs) ?
51 	    man_pmacro(man, ln, buf, offs) :
52 	    man_ptext(man, ln, buf, offs);
53 }
54 
55 static void
56 man_descope(struct roff_man *man, int line, int offs)
57 {
58 	/*
59 	 * Co-ordinate what happens with having a next-line scope open:
60 	 * first close out the element scope (if applicable), then close
61 	 * out the block scope (also if applicable).
62 	 */
63 
64 	if (man->flags & MAN_ELINE) {
65 		man->flags &= ~MAN_ELINE;
66 		man_unscope(man, man->last->parent);
67 	}
68 	if ( ! (man->flags & MAN_BLINE))
69 		return;
70 	man->flags &= ~MAN_BLINE;
71 	man_unscope(man, man->last->parent);
72 	roff_body_alloc(man, line, offs, man->last->tok);
73 }
74 
75 static int
76 man_ptext(struct roff_man *man, int line, char *buf, int offs)
77 {
78 	int		 i;
79 	const char 	*cp, *sp;
80 	char		*ep;
81 
82 	/* Literal free-form text whitespace is preserved. */
83 
84 	if (man->flags & MAN_LITERAL) {
85 		roff_word_alloc(man, line, offs, buf + offs);
86 		man_descope(man, line, offs);
87 		return 1;
88 	}
89 
90 	for (i = offs; buf[i] == ' '; i++)
91 		/* Skip leading whitespace. */ ;
92 
93 	/*
94 	 * Blank lines are ignored in next line scope
95 	 * and right after headings and cancel preceding \c,
96 	 * but add a single vertical space elsewhere.
97 	 */
98 
99 	if (buf[i] == '\0') {
100 		if (man->flags & (MAN_ELINE | MAN_BLINE)) {
101 			mandoc_msg(MANDOCERR_BLK_BLANK, man->parse,
102 			    line, 0, NULL);
103 			return 1;
104 		}
105 		if (man->last->tok == MAN_SH || man->last->tok == MAN_SS)
106 			return 1;
107 		switch (man->last->type) {
108 		case ROFFT_TEXT:
109 			sp = man->last->string;
110 			cp = ep = strchr(sp, '\0') - 2;
111 			if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
112 				break;
113 			while (cp > sp && cp[-1] == '\\')
114 				cp--;
115 			if ((ep - cp) % 2)
116 				break;
117 			*ep = '\0';
118 			return 1;
119 		default:
120 			break;
121 		}
122 		roff_elem_alloc(man, line, offs, ROFF_sp);
123 		man->next = ROFF_NEXT_SIBLING;
124 		return 1;
125 	}
126 
127 	/*
128 	 * Warn if the last un-escaped character is whitespace. Then
129 	 * strip away the remaining spaces (tabs stay!).
130 	 */
131 
132 	i = (int)strlen(buf);
133 	assert(i);
134 
135 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
136 		if (i > 1 && '\\' != buf[i - 2])
137 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
138 			    line, i - 1, NULL);
139 
140 		for (--i; i && ' ' == buf[i]; i--)
141 			/* Spin back to non-space. */ ;
142 
143 		/* Jump ahead of escaped whitespace. */
144 		i += '\\' == buf[i] ? 2 : 1;
145 
146 		buf[i] = '\0';
147 	}
148 	roff_word_alloc(man, line, offs, buf + offs);
149 
150 	/*
151 	 * End-of-sentence check.  If the last character is an unescaped
152 	 * EOS character, then flag the node as being the end of a
153 	 * sentence.  The front-end will know how to interpret this.
154 	 */
155 
156 	assert(i);
157 	if (mandoc_eos(buf, (size_t)i))
158 		man->last->flags |= NODE_EOS;
159 
160 	man_descope(man, line, offs);
161 	return 1;
162 }
163 
164 static int
165 man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
166 {
167 	struct roff_node *n;
168 	const char	*cp;
169 	size_t		 sz;
170 	enum roff_tok	 tok;
171 	int		 ppos;
172 	int		 bline;
173 
174 	/* Determine the line macro. */
175 
176 	ppos = offs;
177 	tok = TOKEN_NONE;
178 	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
179 		offs++;
180 	if (sz > 0 && sz < 4)
181 		tok = roffhash_find(man->manmac, buf + ppos, sz);
182 	if (tok == TOKEN_NONE) {
183 		mandoc_msg(MANDOCERR_MACRO, man->parse,
184 		    ln, ppos, buf + ppos - 1);
185 		return 1;
186 	}
187 
188 	/* Skip a leading escape sequence or tab. */
189 
190 	switch (buf[offs]) {
191 	case '\\':
192 		cp = buf + offs + 1;
193 		mandoc_escape(&cp, NULL, NULL);
194 		offs = cp - buf;
195 		break;
196 	case '\t':
197 		offs++;
198 		break;
199 	default:
200 		break;
201 	}
202 
203 	/* Jump to the next non-whitespace word. */
204 
205 	while (buf[offs] == ' ')
206 		offs++;
207 
208 	/*
209 	 * Trailing whitespace.  Note that tabs are allowed to be passed
210 	 * into the parser as "text", so we only warn about spaces here.
211 	 */
212 
213 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
214 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
215 		    ln, offs - 1, NULL);
216 
217 	/*
218 	 * Some macros break next-line scopes; otherwise, remember
219 	 * whether we are in next-line scope for a block head.
220 	 */
221 
222 	man_breakscope(man, tok);
223 	bline = man->flags & MAN_BLINE;
224 
225 	/*
226 	 * If the line in next-line scope ends with \c, keep the
227 	 * next-line scope open for the subsequent input line.
228 	 * That is not at all portable, only groff >= 1.22.4
229 	 * does it, but *if* this weird idiom occurs in a manual
230 	 * page, that's very likely what the author intended.
231 	 */
232 
233 	if (bline) {
234 		cp = strchr(buf + offs, '\0') - 2;
235 		if (cp >= buf && cp[0] == '\\' && cp[1] == 'c')
236 			bline = 0;
237 	}
238 
239 	/* Call to handler... */
240 
241 	assert(man_macros[tok].fp);
242 	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
243 
244 	/* In quick mode (for mandocdb), abort after the NAME section. */
245 
246 	if (man->quick && tok == MAN_SH) {
247 		n = man->last;
248 		if (n->type == ROFFT_BODY &&
249 		    strcmp(n->prev->child->string, "NAME"))
250 			return 2;
251 	}
252 
253 	/*
254 	 * If we are in a next-line scope for a block head,
255 	 * close it out now and switch to the body,
256 	 * unless the next-line scope is allowed to continue.
257 	 */
258 
259 	if ( ! bline || man->flags & MAN_ELINE ||
260 	    man_macros[tok].flags & MAN_NSCOPED)
261 		return 1;
262 
263 	assert(man->flags & MAN_BLINE);
264 	man->flags &= ~MAN_BLINE;
265 
266 	man_unscope(man, man->last->parent);
267 	roff_body_alloc(man, ln, ppos, man->last->tok);
268 	return 1;
269 }
270 
271 void
272 man_breakscope(struct roff_man *man, int tok)
273 {
274 	struct roff_node *n;
275 
276 	/*
277 	 * An element next line scope is open,
278 	 * and the new macro is not allowed inside elements.
279 	 * Delete the element that is being broken.
280 	 */
281 
282 	if (man->flags & MAN_ELINE && (tok < MAN_TH ||
283 	    ! (man_macros[tok].flags & MAN_NSCOPED))) {
284 		n = man->last;
285 		if (n->type == ROFFT_TEXT)
286 			n = n->parent;
287 		if (n->tok < MAN_TH ||
288 		    man_macros[n->tok].flags & MAN_NSCOPED)
289 			n = n->parent;
290 
291 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
292 		    n->line, n->pos, "%s breaks %s",
293 		    roff_name[tok], roff_name[n->tok]);
294 
295 		roff_node_delete(man, n);
296 		man->flags &= ~MAN_ELINE;
297 	}
298 
299 	/*
300 	 * Weird special case:
301 	 * Switching fill mode closes section headers.
302 	 */
303 
304 	if (man->flags & MAN_BLINE &&
305 	    (tok == MAN_nf || tok == MAN_fi) &&
306 	    (man->last->tok == MAN_SH || man->last->tok == MAN_SS)) {
307 		n = man->last;
308 		man_unscope(man, n);
309 		roff_body_alloc(man, n->line, n->pos, n->tok);
310 		man->flags &= ~MAN_BLINE;
311 	}
312 
313 	/*
314 	 * A block header next line scope is open,
315 	 * and the new macro is not allowed inside block headers.
316 	 * Delete the block that is being broken.
317 	 */
318 
319 	if (man->flags & MAN_BLINE && (tok < MAN_TH ||
320 	    man_macros[tok].flags & MAN_BSCOPE)) {
321 		n = man->last;
322 		if (n->type == ROFFT_TEXT)
323 			n = n->parent;
324 		if (n->tok < MAN_TH ||
325 		    (man_macros[n->tok].flags & MAN_BSCOPE) == 0)
326 			n = n->parent;
327 
328 		assert(n->type == ROFFT_HEAD);
329 		n = n->parent;
330 		assert(n->type == ROFFT_BLOCK);
331 		assert(man_macros[n->tok].flags & MAN_SCOPED);
332 
333 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
334 		    n->line, n->pos, "%s breaks %s",
335 		    roff_name[tok], roff_name[n->tok]);
336 
337 		roff_node_delete(man, n);
338 		man->flags &= ~MAN_BLINE;
339 	}
340 }
341 
342 const struct mparse *
343 man_mparse(const struct roff_man *man)
344 {
345 
346 	assert(man && man->parse);
347 	return man->parse;
348 }
349 
350 void
351 man_state(struct roff_man *man, struct roff_node *n)
352 {
353 
354 	switch(n->tok) {
355 	case MAN_nf:
356 	case MAN_EX:
357 		if (man->flags & MAN_LITERAL && ! (n->flags & NODE_VALID))
358 			mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
359 			    n->line, n->pos, "nf");
360 		man->flags |= MAN_LITERAL;
361 		break;
362 	case MAN_fi:
363 	case MAN_EE:
364 		if ( ! (man->flags & MAN_LITERAL) &&
365 		     ! (n->flags & NODE_VALID))
366 			mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
367 			    n->line, n->pos, "fi");
368 		man->flags &= ~MAN_LITERAL;
369 		break;
370 	default:
371 		break;
372 	}
373 	man->last->flags |= NODE_VALID;
374 }
375 
376 void
377 man_validate(struct roff_man *man)
378 {
379 
380 	man->last = man->first;
381 	man_node_validate(man);
382 	man->flags &= ~MAN_LITERAL;
383 }
384