xref: /freebsd/contrib/mandoc/tag.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */
2 /*
3  * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Functions to tag syntax tree nodes.
19  * For internal use by mandoc(1) validation modules only.
20  */
21 #include "config.h"
22 
23 #include <sys/types.h>
24 
25 #include <assert.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "roff_int.h"
39 #include "tag.h"
40 
41 struct tag_entry {
42 	struct roff_node **nodes;
43 	size_t	 maxnodes;
44 	size_t	 nnodes;
45 	int	 prio;
46 	char	 s[];
47 };
48 
49 static void		 tag_move_href(struct roff_man *,
50 				struct roff_node *, const char *);
51 static void		 tag_move_id(struct roff_node *);
52 
53 static struct ohash	 tag_data;
54 
55 
56 /*
57  * Set up the ohash table to collect nodes
58  * where various marked-up terms are documented.
59  */
60 void
61 tag_alloc(void)
62 {
63 	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
64 }
65 
66 void
67 tag_free(void)
68 {
69 	struct tag_entry	*entry;
70 	unsigned int		 slot;
71 
72 	if (tag_data.info.free == NULL)
73 		return;
74 	entry = ohash_first(&tag_data, &slot);
75 	while (entry != NULL) {
76 		free(entry->nodes);
77 		free(entry);
78 		entry = ohash_next(&tag_data, &slot);
79 	}
80 	ohash_delete(&tag_data);
81 	tag_data.info.free = NULL;
82 }
83 
84 /*
85  * Set a node where a term is defined,
86  * unless the term is already defined at a lower priority.
87  */
88 void
89 tag_put(const char *s, int prio, struct roff_node *n)
90 {
91 	struct tag_entry	*entry;
92 	struct roff_node	*nold;
93 	const char		*se, *src;
94 	char			*cpy;
95 	size_t			 len;
96 	unsigned int		 slot;
97 	int			 changed;
98 
99 	assert(prio <= TAG_FALLBACK);
100 
101 	/*
102 	 * If the node is already tagged, the existing tag is
103 	 * explicit and we are now about to add an implicit tag.
104 	 * Don't do that; just skip implicit tagging if the author
105 	 * specified an explicit tag.
106 	 */
107 
108 	if (n->flags & NODE_ID)
109 		return;
110 
111 	/* Determine the implicit tag. */
112 
113 	changed = 1;
114 	if (s == NULL) {
115 		if (n->child == NULL || n->child->type != ROFFT_TEXT)
116 			return;
117 		s = n->child->string;
118 		switch (s[0]) {
119 		case '-':
120 			s++;
121 			break;
122 		case '\\':
123 			switch (s[1]) {
124 			case '&':
125 			case '-':
126 			case 'e':
127 				s += 2;
128 				break;
129 			default:
130 				return;
131 			}
132 			break;
133 		default:
134 			changed = 0;
135 			break;
136 		}
137 	}
138 
139 	/*
140 	 * Translate \- and ASCII_HYPH to plain '-'.
141 	 * Skip whitespace and escapes and whatever follows,
142 	 * and if there is any, downgrade the priority.
143 	 */
144 
145 	cpy = mandoc_malloc(strlen(s) + 1);
146 	for (src = s, len = 0; *src != '\0'; src++, len++) {
147 		switch (*src) {
148 		case '\t':
149 		case ' ':
150 			changed = 1;
151 			break;
152 		case ASCII_HYPH:
153 			cpy[len] = '-';
154 			changed = 1;
155 			continue;
156 		case '\\':
157 			if (src[1] != '-')
158 				break;
159 			src++;
160 			changed = 1;
161 			/* FALLTHROUGH */
162 		default:
163 			cpy[len] = *src;
164 			continue;
165 		}
166 		break;
167 	}
168 	if (len == 0)
169 		goto out;
170 	cpy[len] = '\0';
171 
172 	if (*src != '\0' && prio < TAG_WEAK)
173 		prio = TAG_WEAK;
174 
175 	s = cpy;
176 	se = cpy + len;
177 	slot = ohash_qlookupi(&tag_data, s, &se);
178 	entry = ohash_find(&tag_data, slot);
179 
180 	/* Build a new entry. */
181 
182 	if (entry == NULL) {
183 		entry = mandoc_malloc(sizeof(*entry) + len + 1);
184 		memcpy(entry->s, s, len + 1);
185 		entry->nodes = NULL;
186 		entry->maxnodes = entry->nnodes = 0;
187 		ohash_insert(&tag_data, slot, entry);
188 	}
189 
190 	/*
191 	 * Lower priority numbers take precedence.
192 	 * If a better entry is already present, ignore the new one.
193 	 */
194 
195 	else if (entry->prio < prio)
196 		goto out;
197 
198 	/*
199 	 * If the existing entry is worse, clear it.
200 	 * In addition, a tag with priority TAG_FALLBACK
201 	 * is only used if the tag occurs exactly once.
202 	 */
203 
204 	else if (entry->prio > prio || prio == TAG_FALLBACK) {
205 		while (entry->nnodes > 0) {
206 			nold = entry->nodes[--entry->nnodes];
207 			nold->flags &= ~NODE_ID;
208 			free(nold->tag);
209 			nold->tag = NULL;
210 		}
211 		if (prio == TAG_FALLBACK) {
212 			entry->prio = TAG_DELETE;
213 			goto out;
214 		}
215 	}
216 
217 	/* Remember the new node. */
218 
219 	if (entry->maxnodes == entry->nnodes) {
220 		entry->maxnodes += 4;
221 		entry->nodes = mandoc_reallocarray(entry->nodes,
222 		    entry->maxnodes, sizeof(*entry->nodes));
223 	}
224 	entry->nodes[entry->nnodes++] = n;
225 	entry->prio = prio;
226 	n->flags |= NODE_ID;
227 	if (changed) {
228 		assert(n->tag == NULL);
229 		n->tag = mandoc_strndup(s, len);
230 	}
231 
232  out:
233 	free(cpy);
234 }
235 
236 int
237 tag_exists(const char *tag)
238 {
239 	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
240 }
241 
242 /*
243  * For in-line elements, move the link target
244  * to the enclosing paragraph when appropriate.
245  */
246 static void
247 tag_move_id(struct roff_node *n)
248 {
249 	struct roff_node *np;
250 
251 	np = n;
252 	for (;;) {
253 		if (np->prev != NULL)
254 			np = np->prev;
255 		else if ((np = np->parent) == NULL)
256 			return;
257 		switch (np->tok) {
258 		case MDOC_It:
259 			switch (np->parent->parent->norm->Bl.type) {
260 			case LIST_column:
261 				/* Target the ROFFT_BLOCK = <tr>. */
262 				np = np->parent;
263 				break;
264 			case LIST_diag:
265 			case LIST_hang:
266 			case LIST_inset:
267 			case LIST_ohang:
268 			case LIST_tag:
269 				/* Target the ROFFT_HEAD = <dt>. */
270 				np = np->parent->head;
271 				break;
272 			default:
273 				/* Target the ROFF_BODY = <li>. */
274 				break;
275 			}
276 			/* FALLTHROUGH */
277 		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
278 			if (np->tag == NULL) {
279 				np->tag = mandoc_strdup(n->tag == NULL ?
280 				    n->child->string : n->tag);
281 				np->flags |= NODE_ID;
282 				n->flags &= ~NODE_ID;
283 			}
284 			return;
285 		case MDOC_Sh:
286 		case MDOC_Ss:
287 		case MDOC_Bd:
288 		case MDOC_Bl:
289 		case MDOC_D1:
290 		case MDOC_Dl:
291 		case MDOC_Rs:
292 			/* Do not move past major blocks. */
293 			return;
294 		default:
295 			/*
296 			 * Move past in-line content and partial
297 			 * blocks, for example .It Xo or .It Bq Er.
298 			 */
299 			break;
300 		}
301 	}
302 }
303 
304 /*
305  * When a paragraph is tagged and starts with text,
306  * move the permalink to the first few words.
307  */
308 static void
309 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
310 {
311 	char	*cp;
312 
313 	if (n == NULL || n->type != ROFFT_TEXT ||
314 	    *n->string == '\0' || *n->string == ' ')
315 		return;
316 
317 	cp = n->string;
318 	while (cp != NULL && cp - n->string < 5)
319 		cp = strchr(cp + 1, ' ');
320 
321 	/* If the first text node is longer, split it. */
322 
323 	if (cp != NULL && cp[1] != '\0') {
324 		man->last = n;
325 		man->next = ROFF_NEXT_SIBLING;
326 		roff_word_alloc(man, n->line,
327 		    n->pos + (cp - n->string), cp + 1);
328 		man->last->flags = n->flags & ~NODE_LINE;
329 		*cp = '\0';
330 	}
331 
332 	assert(n->tag == NULL);
333 	n->tag = mandoc_strdup(tag);
334 	n->flags |= NODE_HREF;
335 }
336 
337 /*
338  * When all tags have been set, decide where to put
339  * the associated permalinks, and maybe move some tags
340  * to the beginning of the respective paragraphs.
341  */
342 void
343 tag_postprocess(struct roff_man *man, struct roff_node *n)
344 {
345 	if (n->flags & NODE_ID) {
346 		switch (n->tok) {
347 		case MDOC_Pp:
348 			tag_move_href(man, n->next, n->tag);
349 			break;
350 		case MDOC_Bd:
351 		case MDOC_D1:
352 		case MDOC_Dl:
353 			tag_move_href(man, n->child, n->tag);
354 			break;
355 		case MDOC_Bl:
356 			/* XXX No permalink for now. */
357 			break;
358 		default:
359 			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
360 				tag_move_id(n);
361 			if (n->tok != MDOC_Tg)
362 				n->flags |= NODE_HREF;
363 			else if ((n->flags & NODE_ID) == 0) {
364 				n->flags |= NODE_NOPRT;
365 				free(n->tag);
366 				n->tag = NULL;
367 			}
368 			break;
369 		}
370 	}
371 	for (n = n->child; n != NULL; n = n->next)
372 		tag_postprocess(man, n);
373 }
374