xref: /freebsd/contrib/mandoc/man_macro.c (revision c1c95add8c80843ba15d784f95c361d795b1f593)
1 /* $Id: man_macro.c,v 1.150 2023/11/13 19:13:01 schwarze Exp $ */
2 /*
3  * Copyright (c) 2012-2015,2017-2020,2022 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5  * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #if DEBUG_MEMORY
30 #include "mandoc_dbg.h"
31 #endif
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "man.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libman.h"
38 
39 static	void		 blk_close(MACRO_PROT_ARGS);
40 static	void		 blk_exp(MACRO_PROT_ARGS);
41 static	void		 blk_imp(MACRO_PROT_ARGS);
42 static	void		 in_line_eoln(MACRO_PROT_ARGS);
43 static	int		 man_args(struct roff_man *, int,
44 				int *, char *, char **);
45 static	void		 rew_scope(struct roff_man *, enum roff_tok);
46 
47 static const struct man_macro man_macros[MAN_MAX - MAN_TH] = {
48 	{ in_line_eoln, MAN_XSCOPE }, /* TH */
49 	{ blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SH */
50 	{ blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* SS */
51 	{ blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TP */
52 	{ blk_imp, MAN_XSCOPE | MAN_BSCOPED }, /* TQ */
53 	{ blk_imp, MAN_XSCOPE }, /* LP */
54 	{ blk_imp, MAN_XSCOPE }, /* PP */
55 	{ blk_imp, MAN_XSCOPE }, /* P */
56 	{ blk_imp, MAN_XSCOPE }, /* IP */
57 	{ blk_imp, MAN_XSCOPE }, /* HP */
58 	{ in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SM */
59 	{ in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* SB */
60 	{ in_line_eoln, 0 }, /* BI */
61 	{ in_line_eoln, 0 }, /* IB */
62 	{ in_line_eoln, 0 }, /* BR */
63 	{ in_line_eoln, 0 }, /* RB */
64 	{ in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* R */
65 	{ in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* B */
66 	{ in_line_eoln, MAN_NSCOPED | MAN_ESCOPED | MAN_JOIN }, /* I */
67 	{ in_line_eoln, 0 }, /* IR */
68 	{ in_line_eoln, 0 }, /* RI */
69 	{ blk_close, MAN_XSCOPE }, /* RE */
70 	{ blk_exp, MAN_XSCOPE }, /* RS */
71 	{ in_line_eoln, MAN_NSCOPED }, /* DT */
72 	{ in_line_eoln, MAN_NSCOPED }, /* UC */
73 	{ in_line_eoln, MAN_NSCOPED }, /* PD */
74 	{ in_line_eoln, MAN_NSCOPED }, /* AT */
75 	{ in_line_eoln, MAN_NSCOPED }, /* in */
76 	{ blk_imp, MAN_XSCOPE }, /* SY */
77 	{ blk_close, MAN_XSCOPE }, /* YS */
78 	{ in_line_eoln, 0 }, /* OP */
79 	{ in_line_eoln, MAN_XSCOPE }, /* EX */
80 	{ in_line_eoln, MAN_XSCOPE }, /* EE */
81 	{ blk_exp, MAN_XSCOPE }, /* UR */
82 	{ blk_close, MAN_XSCOPE }, /* UE */
83 	{ blk_exp, MAN_XSCOPE }, /* MT */
84 	{ blk_close, MAN_XSCOPE }, /* ME */
85 	{ in_line_eoln, 0 }, /* MR */
86 };
87 
88 
89 const struct man_macro *
man_macro(enum roff_tok tok)90 man_macro(enum roff_tok tok)
91 {
92 	assert(tok >= MAN_TH && tok <= MAN_MAX);
93 	return man_macros + (tok - MAN_TH);
94 }
95 
96 void
man_unscope(struct roff_man * man,const struct roff_node * to)97 man_unscope(struct roff_man *man, const struct roff_node *to)
98 {
99 	struct roff_node *n;
100 
101 	to = to->parent;
102 	n = man->last;
103 	while (n != to) {
104 
105 		/* Reached the end of the document? */
106 
107 		if (to == NULL && ! (n->flags & NODE_VALID)) {
108 			if (man->flags & (MAN_BLINE | MAN_ELINE) &&
109 			    man_macro(n->tok)->flags &
110 			     (MAN_BSCOPED | MAN_NSCOPED)) {
111 				mandoc_msg(MANDOCERR_BLK_LINE,
112 				    n->line, n->pos,
113 				    "EOF breaks %s", roff_name[n->tok]);
114 				if (man->flags & MAN_ELINE) {
115 					if (n->parent->type == ROFFT_ROOT ||
116 					    (man_macro(n->parent->tok)->flags &
117 					    MAN_ESCOPED) == 0)
118 						man->flags &= ~MAN_ELINE;
119 				} else {
120 					assert(n->type == ROFFT_HEAD);
121 					n = n->parent;
122 					man->flags &= ~MAN_BLINE;
123 				}
124 				man->last = n;
125 				n = n->parent;
126 				roff_node_delete(man, man->last);
127 				continue;
128 			}
129 			if (n->type == ROFFT_BLOCK &&
130 			    man_macro(n->tok)->fp == blk_exp)
131 				mandoc_msg(MANDOCERR_BLK_NOEND,
132 				    n->line, n->pos, "%s",
133 				    roff_name[n->tok]);
134 		}
135 
136 		/*
137 		 * We might delete the man->last node
138 		 * in the post-validation phase.
139 		 * Save a pointer to the parent such that
140 		 * we know where to continue the iteration.
141 		 */
142 
143 		man->last = n;
144 		n = n->parent;
145 		man->last->flags |= NODE_VALID;
146 	}
147 
148 	/*
149 	 * If we ended up at the parent of the node we were
150 	 * supposed to rewind to, that means the target node
151 	 * got deleted, so add the next node we parse as a child
152 	 * of the parent instead of as a sibling of the target.
153 	 */
154 
155 	man->next = (man->last == to) ?
156 	    ROFF_NEXT_CHILD : ROFF_NEXT_SIBLING;
157 }
158 
159 /*
160  * Rewinding entails ascending the parse tree until a coherent point,
161  * for example, the `SH' macro will close out any intervening `SS'
162  * scopes.  When a scope is closed, it must be validated and actioned.
163  */
164 static void
rew_scope(struct roff_man * man,enum roff_tok tok)165 rew_scope(struct roff_man *man, enum roff_tok tok)
166 {
167 	struct roff_node *n;
168 
169 	/* Preserve empty paragraphs before RS. */
170 
171 	n = man->last;
172 	if (tok == MAN_RS && n->child == NULL &&
173 	    (n->tok == MAN_P || n->tok == MAN_PP || n->tok == MAN_LP))
174 		return;
175 
176 	for (;;) {
177 		if (n->type == ROFFT_ROOT)
178 			return;
179 		if (n->flags & NODE_VALID) {
180 			n = n->parent;
181 			continue;
182 		}
183 		if (n->type != ROFFT_BLOCK) {
184 			if (n->parent->type == ROFFT_ROOT) {
185 				man_unscope(man, n);
186 				return;
187 			} else {
188 				n = n->parent;
189 				continue;
190 			}
191 		}
192 		if (tok != MAN_SH && (n->tok == MAN_SH ||
193 		    (tok != MAN_SS && (n->tok == MAN_SS ||
194 		     man_macro(n->tok)->fp == blk_exp))))
195 			return;
196 		man_unscope(man, n);
197 		n = man->last;
198 	}
199 }
200 
201 
202 /*
203  * Close out a generic explicit macro.
204  */
205 void
blk_close(MACRO_PROT_ARGS)206 blk_close(MACRO_PROT_ARGS)
207 {
208 	enum roff_tok		 ctok, ntok;
209 	const struct roff_node	*nn;
210 	char			*p, *ep;
211 	int			 cline, cpos, la, nrew, target;
212 
213 	nrew = 1;
214 	switch (tok) {
215 	case MAN_RE:
216 		ntok = MAN_RS;
217 		la = *pos;
218 		if ( ! man_args(man, line, pos, buf, &p))
219 			break;
220 		for (nn = man->last->parent; nn; nn = nn->parent)
221 			if (nn->tok == ntok && nn->type == ROFFT_BLOCK)
222 				nrew++;
223 		target = strtol(p, &ep, 10);
224 		if (*ep != '\0')
225 			mandoc_msg(MANDOCERR_ARG_EXCESS, line,
226 			    la + (buf[la] == '"') + (int)(ep - p),
227 			    "RE ... %s", ep);
228 		free(p);
229 		if (target == 0)
230 			target = 1;
231 		nrew -= target;
232 		if (nrew < 1) {
233 			mandoc_msg(MANDOCERR_RE_NOTOPEN,
234 			    line, ppos, "RE %d", target);
235 			return;
236 		}
237 		break;
238 	case MAN_YS:
239 		ntok = MAN_SY;
240 		break;
241 	case MAN_UE:
242 		ntok = MAN_UR;
243 		break;
244 	case MAN_ME:
245 		ntok = MAN_MT;
246 		break;
247 	default:
248 		abort();
249 	}
250 
251 	for (nn = man->last->parent; nn; nn = nn->parent)
252 		if (nn->tok == ntok && nn->type == ROFFT_BLOCK && ! --nrew)
253 			break;
254 
255 	if (nn == NULL) {
256 		mandoc_msg(MANDOCERR_BLK_NOTOPEN,
257 		    line, ppos, "%s", roff_name[tok]);
258 		rew_scope(man, MAN_PP);
259 		if (tok == MAN_RE) {
260 			roff_elem_alloc(man, line, ppos, ROFF_br);
261 			man->last->flags |= NODE_LINE |
262 			    NODE_VALID | NODE_ENDED;
263 			man->next = ROFF_NEXT_SIBLING;
264 		}
265 		return;
266 	}
267 
268 	cline = man->last->line;
269 	cpos = man->last->pos;
270 	ctok = man->last->tok;
271 	man_unscope(man, nn);
272 
273 	if (tok == MAN_RE && nn->head->aux > 0)
274 		roff_setreg(man->roff, "an-margin", nn->head->aux, '-');
275 
276 	/* Trailing text. */
277 
278 	if (buf[*pos] != '\0') {
279 		roff_word_alloc(man, line, ppos, buf + *pos);
280 		man->last->flags |= NODE_DELIMC;
281 		if (mandoc_eos(man->last->string, strlen(man->last->string)))
282 			man->last->flags |= NODE_EOS;
283 	}
284 
285 	/* Move a trailing paragraph behind the block. */
286 
287 	if (ctok == MAN_LP || ctok == MAN_PP || ctok == MAN_P) {
288 		*pos = strlen(buf);
289 		blk_imp(man, ctok, cline, cpos, pos, buf);
290 	}
291 
292 	/* Synopsis blocks need an explicit end marker for spacing. */
293 
294 	if (tok == MAN_YS && man->last == nn) {
295 		roff_elem_alloc(man, line, ppos, tok);
296 		man_unscope(man, man->last);
297 	}
298 }
299 
300 void
blk_exp(MACRO_PROT_ARGS)301 blk_exp(MACRO_PROT_ARGS)
302 {
303 	struct roff_node *head;
304 	char		*p;
305 	int		 la;
306 
307 	if (tok == MAN_RS) {
308 		rew_scope(man, tok);
309 		man->flags |= ROFF_NONOFILL;
310 	}
311 	roff_block_alloc(man, line, ppos, tok);
312 	head = roff_head_alloc(man, line, ppos, tok);
313 
314 	la = *pos;
315 	if (man_args(man, line, pos, buf, &p)) {
316 		roff_word_alloc(man, line, la, p);
317 		if (tok == MAN_RS) {
318 			if (roff_getreg(man->roff, "an-margin") == 0)
319 				roff_setreg(man->roff, "an-margin",
320 				    5 * 24, '=');
321 			if ((head->aux = strtod(p, NULL) * 24.0) > 0)
322 				roff_setreg(man->roff, "an-margin",
323 				    head->aux, '+');
324 		}
325 		free(p);
326 	}
327 
328 	if (buf[*pos] != '\0')
329 		mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos,
330 		    "%s ... %s", roff_name[tok], buf + *pos);
331 
332 	man_unscope(man, head);
333 	roff_body_alloc(man, line, ppos, tok);
334 	man->flags &= ~ROFF_NONOFILL;
335 }
336 
337 /*
338  * Parse an implicit-block macro.  These contain a ROFFT_HEAD and a
339  * ROFFT_BODY contained within a ROFFT_BLOCK.  Rules for closing out other
340  * scopes, such as `SH' closing out an `SS', are defined in the rew
341  * routines.
342  */
343 void
blk_imp(MACRO_PROT_ARGS)344 blk_imp(MACRO_PROT_ARGS)
345 {
346 	int		 la;
347 	char		*p;
348 	struct roff_node *n;
349 
350 	rew_scope(man, tok);
351 	man->flags |= ROFF_NONOFILL;
352 	if (tok == MAN_SH || tok == MAN_SS)
353 		man->flags &= ~ROFF_NOFILL;
354 	roff_block_alloc(man, line, ppos, tok);
355 	n = roff_head_alloc(man, line, ppos, tok);
356 
357 	/* Add line arguments. */
358 
359 	for (;;) {
360 		la = *pos;
361 		if ( ! man_args(man, line, pos, buf, &p))
362 			break;
363 		roff_word_alloc(man, line, la, p);
364 		free(p);
365 	}
366 
367 	/*
368 	 * For macros having optional next-line scope,
369 	 * keep the head open if there were no arguments.
370 	 * For `TP' and `TQ', always keep the head open.
371 	 */
372 
373 	if (man_macro(tok)->flags & MAN_BSCOPED &&
374 	    (tok == MAN_TP || tok == MAN_TQ || n == man->last)) {
375 		man->flags |= MAN_BLINE;
376 		return;
377 	}
378 
379 	/* Close out the head and open the body. */
380 
381 	man_unscope(man, n);
382 	roff_body_alloc(man, line, ppos, tok);
383 	man->flags &= ~ROFF_NONOFILL;
384 }
385 
386 void
in_line_eoln(MACRO_PROT_ARGS)387 in_line_eoln(MACRO_PROT_ARGS)
388 {
389 	int		 la;
390 	char		*p;
391 	struct roff_node *n;
392 
393 	roff_elem_alloc(man, line, ppos, tok);
394 	n = man->last;
395 
396 	if (tok == MAN_EX)
397 		man->flags |= ROFF_NOFILL;
398 	else if (tok == MAN_EE)
399 		man->flags &= ~ROFF_NOFILL;
400 
401 #if DEBUG_MEMORY
402 	if (tok == MAN_TH)
403 		mandoc_dbg_name(buf);
404 #endif
405 
406 	for (;;) {
407 		if (buf[*pos] != '\0' && man->last != n && tok == MAN_PD) {
408 			mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos,
409 			    "%s ... %s", roff_name[tok], buf + *pos);
410 			break;
411 		}
412 		la = *pos;
413 		if ( ! man_args(man, line, pos, buf, &p))
414 			break;
415 		if (man_macro(tok)->flags & MAN_JOIN &&
416 		    man->last->type == ROFFT_TEXT)
417 			roff_word_append(man, p);
418 		else
419 			roff_word_alloc(man, line, la, p);
420 		free(p);
421 	}
422 
423 	/*
424 	 * Append NODE_EOS in case the last snipped argument
425 	 * ends with a dot, e.g. `.IR syslog (3).'
426 	 */
427 
428 	if (n != man->last &&
429 	    mandoc_eos(man->last->string, strlen(man->last->string)))
430 		man->last->flags |= NODE_EOS;
431 
432 	/*
433 	 * If no arguments are specified and this is MAN_ESCOPED (i.e.,
434 	 * next-line scoped), then set our mode to indicate that we're
435 	 * waiting for terms to load into our context.
436 	 */
437 
438 	if (n == man->last && man_macro(tok)->flags & MAN_ESCOPED) {
439 		man->flags |= MAN_ELINE;
440 		return;
441 	}
442 
443 	assert(man->last->type != ROFFT_ROOT);
444 	man->next = ROFF_NEXT_SIBLING;
445 
446 	/* Rewind our element scope. */
447 
448 	for ( ; man->last; man->last = man->last->parent) {
449 		man->last->flags |= NODE_VALID;
450 		if (man->last == n)
451 			break;
452 	}
453 
454 	/* Rewind next-line scoped ancestors, if any. */
455 
456 	if (man_macro(tok)->flags & MAN_ESCOPED)
457 		man_descope(man, line, ppos, NULL);
458 }
459 
460 void
man_endparse(struct roff_man * man)461 man_endparse(struct roff_man *man)
462 {
463 	man_unscope(man, man->meta.first);
464 }
465 
466 static int
man_args(struct roff_man * man,int line,int * pos,char * buf,char ** v)467 man_args(struct roff_man *man, int line, int *pos, char *buf, char **v)
468 {
469 	char	 *start;
470 
471 	assert(*pos);
472 	*v = start = buf + *pos;
473 	assert(' ' != *start);
474 
475 	if ('\0' == *start)
476 		return 0;
477 
478 	*v = roff_getarg(man->roff, v, line, pos);
479 	return 1;
480 }
481