1 /* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */
2 /*
3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4 * Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Functions to tag syntax tree nodes.
19 * For internal use by mandoc(1) validation modules only.
20 */
21 #include "config.h"
22
23 #include <sys/types.h>
24
25 #include <assert.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "roff_int.h"
39 #include "tag.h"
40
41 struct tag_entry {
42 struct roff_node **nodes;
43 size_t maxnodes;
44 size_t nnodes;
45 int prio;
46 char s[];
47 };
48
49 static void tag_move_href(struct roff_man *,
50 struct roff_node *, const char *);
51 static void tag_move_id(struct roff_node *);
52
53 static struct ohash tag_data;
54
55
56 /*
57 * Set up the ohash table to collect nodes
58 * where various marked-up terms are documented.
59 */
60 void
tag_alloc(void)61 tag_alloc(void)
62 {
63 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
64 }
65
66 void
tag_free(void)67 tag_free(void)
68 {
69 struct tag_entry *entry;
70 unsigned int slot;
71
72 if (tag_data.info.free == NULL)
73 return;
74 entry = ohash_first(&tag_data, &slot);
75 while (entry != NULL) {
76 free(entry->nodes);
77 free(entry);
78 entry = ohash_next(&tag_data, &slot);
79 }
80 ohash_delete(&tag_data);
81 tag_data.info.free = NULL;
82 }
83
84 /*
85 * Set a node where a term is defined,
86 * unless the term is already defined at a lower priority.
87 */
88 void
tag_put(const char * s,int prio,struct roff_node * n)89 tag_put(const char *s, int prio, struct roff_node *n)
90 {
91 struct tag_entry *entry;
92 struct roff_node *nold;
93 const char *se, *src;
94 char *cpy;
95 size_t len;
96 unsigned int slot;
97 int changed;
98
99 assert(prio <= TAG_FALLBACK);
100
101 /*
102 * If the node is already tagged, the existing tag is
103 * explicit and we are now about to add an implicit tag.
104 * Don't do that; just skip implicit tagging if the author
105 * specified an explicit tag.
106 */
107
108 if (n->flags & NODE_ID)
109 return;
110
111 /* Determine the implicit tag. */
112
113 changed = 1;
114 if (s == NULL) {
115 if (n->child == NULL || n->child->type != ROFFT_TEXT)
116 return;
117 s = n->child->string;
118 switch (s[0]) {
119 case '-':
120 s++;
121 break;
122 case '\\':
123 switch (s[1]) {
124 case '&':
125 case '-':
126 case 'e':
127 s += 2;
128 break;
129 default:
130 return;
131 }
132 break;
133 default:
134 changed = 0;
135 break;
136 }
137 }
138
139 /*
140 * Translate \- and ASCII_HYPH to plain '-'.
141 * Skip whitespace and escapes and whatever follows,
142 * and if there is any, downgrade the priority.
143 */
144
145 cpy = mandoc_malloc(strlen(s) + 1);
146 for (src = s, len = 0; *src != '\0'; src++, len++) {
147 switch (*src) {
148 case '\t':
149 case ' ':
150 changed = 1;
151 break;
152 case ASCII_HYPH:
153 cpy[len] = '-';
154 changed = 1;
155 continue;
156 case '\\':
157 if (src[1] != '-')
158 break;
159 src++;
160 changed = 1;
161 /* FALLTHROUGH */
162 default:
163 cpy[len] = *src;
164 continue;
165 }
166 break;
167 }
168 if (len == 0)
169 goto out;
170 cpy[len] = '\0';
171
172 if (*src != '\0' && prio < TAG_WEAK)
173 prio = TAG_WEAK;
174
175 s = cpy;
176 se = cpy + len;
177 slot = ohash_qlookupi(&tag_data, s, &se);
178 entry = ohash_find(&tag_data, slot);
179
180 /* Build a new entry. */
181
182 if (entry == NULL) {
183 entry = mandoc_malloc(sizeof(*entry) + len + 1);
184 memcpy(entry->s, s, len + 1);
185 entry->nodes = NULL;
186 entry->maxnodes = entry->nnodes = 0;
187 ohash_insert(&tag_data, slot, entry);
188 }
189
190 /*
191 * Lower priority numbers take precedence.
192 * If a better entry is already present, ignore the new one.
193 */
194
195 else if (entry->prio < prio)
196 goto out;
197
198 /*
199 * If the existing entry is worse, clear it.
200 * In addition, a tag with priority TAG_FALLBACK
201 * is only used if the tag occurs exactly once.
202 */
203
204 else if (entry->prio > prio || prio == TAG_FALLBACK) {
205 while (entry->nnodes > 0) {
206 nold = entry->nodes[--entry->nnodes];
207 nold->flags &= ~NODE_ID;
208 free(nold->tag);
209 nold->tag = NULL;
210 }
211 if (prio == TAG_FALLBACK) {
212 entry->prio = TAG_DELETE;
213 goto out;
214 }
215 }
216
217 /* Remember the new node. */
218
219 if (entry->maxnodes == entry->nnodes) {
220 entry->maxnodes += 4;
221 entry->nodes = mandoc_reallocarray(entry->nodes,
222 entry->maxnodes, sizeof(*entry->nodes));
223 }
224 entry->nodes[entry->nnodes++] = n;
225 entry->prio = prio;
226 n->flags |= NODE_ID;
227 if (changed) {
228 assert(n->tag == NULL);
229 n->tag = mandoc_strndup(s, len);
230 }
231
232 out:
233 free(cpy);
234 }
235
236 int
tag_exists(const char * tag)237 tag_exists(const char *tag)
238 {
239 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
240 }
241
242 /*
243 * For in-line elements, move the link target
244 * to the enclosing paragraph when appropriate.
245 */
246 static void
tag_move_id(struct roff_node * n)247 tag_move_id(struct roff_node *n)
248 {
249 struct roff_node *np;
250
251 np = n;
252 for (;;) {
253 if (np->prev != NULL)
254 np = np->prev;
255 else if ((np = np->parent) == NULL)
256 return;
257 switch (np->tok) {
258 case MDOC_It:
259 switch (np->parent->parent->norm->Bl.type) {
260 case LIST_column:
261 /* Target the ROFFT_BLOCK = <tr>. */
262 np = np->parent;
263 break;
264 case LIST_diag:
265 case LIST_hang:
266 case LIST_inset:
267 case LIST_ohang:
268 case LIST_tag:
269 /* Target the ROFFT_HEAD = <dt>. */
270 np = np->parent->head;
271 break;
272 default:
273 /* Target the ROFF_BODY = <li>. */
274 break;
275 }
276 /* FALLTHROUGH */
277 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
278 if (np->tag == NULL) {
279 np->tag = mandoc_strdup(n->tag == NULL ?
280 n->child->string : n->tag);
281 np->flags |= NODE_ID;
282 n->flags &= ~NODE_ID;
283 }
284 return;
285 case MDOC_Sh:
286 case MDOC_Ss:
287 case MDOC_Bd:
288 case MDOC_Bl:
289 case MDOC_D1:
290 case MDOC_Dl:
291 case MDOC_Rs:
292 /* Do not move past major blocks. */
293 return;
294 default:
295 /*
296 * Move past in-line content and partial
297 * blocks, for example .It Xo or .It Bq Er.
298 */
299 break;
300 }
301 }
302 }
303
304 /*
305 * When a paragraph is tagged and starts with text,
306 * move the permalink to the first few words.
307 */
308 static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)309 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
310 {
311 char *cp;
312
313 if (n == NULL || n->type != ROFFT_TEXT ||
314 *n->string == '\0' || *n->string == ' ')
315 return;
316
317 cp = n->string;
318 while (cp != NULL && cp - n->string < 5)
319 cp = strchr(cp + 1, ' ');
320
321 /* If the first text node is longer, split it. */
322
323 if (cp != NULL && cp[1] != '\0') {
324 man->last = n;
325 man->next = ROFF_NEXT_SIBLING;
326 roff_word_alloc(man, n->line,
327 n->pos + (cp - n->string), cp + 1);
328 man->last->flags = n->flags & ~NODE_LINE;
329 *cp = '\0';
330 }
331
332 assert(n->tag == NULL);
333 n->tag = mandoc_strdup(tag);
334 n->flags |= NODE_HREF;
335 }
336
337 /*
338 * When all tags have been set, decide where to put
339 * the associated permalinks, and maybe move some tags
340 * to the beginning of the respective paragraphs.
341 */
342 void
tag_postprocess(struct roff_man * man,struct roff_node * n)343 tag_postprocess(struct roff_man *man, struct roff_node *n)
344 {
345 if (n->flags & NODE_ID) {
346 switch (n->tok) {
347 case MDOC_Pp:
348 tag_move_href(man, n->next, n->tag);
349 break;
350 case MDOC_Bd:
351 case MDOC_D1:
352 case MDOC_Dl:
353 tag_move_href(man, n->child, n->tag);
354 break;
355 case MDOC_Bl:
356 /* XXX No permalink for now. */
357 break;
358 default:
359 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
360 tag_move_id(n);
361 if (n->tok != MDOC_Tg)
362 n->flags |= NODE_HREF;
363 else if ((n->flags & NODE_ID) == 0) {
364 n->flags |= NODE_NOPRT;
365 free(n->tag);
366 n->tag = NULL;
367 }
368 break;
369 }
370 }
371 for (n = n->child; n != NULL; n = n->next)
372 tag_postprocess(man, n);
373 }
374