1*c1c95addSBrooks Davis /* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */
261d06d6bSBaptiste Daroussin /*
3*c1c95addSBrooks Davis * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4*c1c95addSBrooks Davis * Ingo Schwarze <schwarze@openbsd.org>
561d06d6bSBaptiste Daroussin *
661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any
761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above
861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies.
961d06d6bSBaptiste Daroussin *
1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
176d38604fSBaptiste Daroussin *
186d38604fSBaptiste Daroussin * Functions to tag syntax tree nodes.
196d38604fSBaptiste Daroussin * For internal use by mandoc(1) validation modules only.
2061d06d6bSBaptiste Daroussin */
2161d06d6bSBaptiste Daroussin #include "config.h"
2261d06d6bSBaptiste Daroussin
2361d06d6bSBaptiste Daroussin #include <sys/types.h>
2461d06d6bSBaptiste Daroussin
256d38604fSBaptiste Daroussin #include <assert.h>
267295610fSBaptiste Daroussin #include <limits.h>
2761d06d6bSBaptiste Daroussin #include <stddef.h>
2861d06d6bSBaptiste Daroussin #include <stdint.h>
29*c1c95addSBrooks Davis #include <stdio.h>
3061d06d6bSBaptiste Daroussin #include <stdlib.h>
3161d06d6bSBaptiste Daroussin #include <string.h>
3261d06d6bSBaptiste Daroussin
3361d06d6bSBaptiste Daroussin #include "mandoc_aux.h"
3461d06d6bSBaptiste Daroussin #include "mandoc_ohash.h"
35*c1c95addSBrooks Davis #include "mandoc.h"
366d38604fSBaptiste Daroussin #include "roff.h"
376d38604fSBaptiste Daroussin #include "mdoc.h"
386d38604fSBaptiste Daroussin #include "roff_int.h"
3961d06d6bSBaptiste Daroussin #include "tag.h"
4061d06d6bSBaptiste Daroussin
4161d06d6bSBaptiste Daroussin struct tag_entry {
426d38604fSBaptiste Daroussin struct roff_node **nodes;
436d38604fSBaptiste Daroussin size_t maxnodes;
446d38604fSBaptiste Daroussin size_t nnodes;
4561d06d6bSBaptiste Daroussin int prio;
4661d06d6bSBaptiste Daroussin char s[];
4761d06d6bSBaptiste Daroussin };
4861d06d6bSBaptiste Daroussin
496d38604fSBaptiste Daroussin static void tag_move_href(struct roff_man *,
506d38604fSBaptiste Daroussin struct roff_node *, const char *);
516d38604fSBaptiste Daroussin static void tag_move_id(struct roff_node *);
5261d06d6bSBaptiste Daroussin
5361d06d6bSBaptiste Daroussin static struct ohash tag_data;
5461d06d6bSBaptiste Daroussin
5561d06d6bSBaptiste Daroussin
5661d06d6bSBaptiste Daroussin /*
576d38604fSBaptiste Daroussin * Set up the ohash table to collect nodes
5861d06d6bSBaptiste Daroussin * where various marked-up terms are documented.
5961d06d6bSBaptiste Daroussin */
606d38604fSBaptiste Daroussin void
tag_alloc(void)616d38604fSBaptiste Daroussin tag_alloc(void)
626d38604fSBaptiste Daroussin {
6361d06d6bSBaptiste Daroussin mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
646d38604fSBaptiste Daroussin }
6561d06d6bSBaptiste Daroussin
666d38604fSBaptiste Daroussin void
tag_free(void)676d38604fSBaptiste Daroussin tag_free(void)
686d38604fSBaptiste Daroussin {
696d38604fSBaptiste Daroussin struct tag_entry *entry;
706d38604fSBaptiste Daroussin unsigned int slot;
716d38604fSBaptiste Daroussin
726d38604fSBaptiste Daroussin if (tag_data.info.free == NULL)
736d38604fSBaptiste Daroussin return;
746d38604fSBaptiste Daroussin entry = ohash_first(&tag_data, &slot);
756d38604fSBaptiste Daroussin while (entry != NULL) {
766d38604fSBaptiste Daroussin free(entry->nodes);
776d38604fSBaptiste Daroussin free(entry);
786d38604fSBaptiste Daroussin entry = ohash_next(&tag_data, &slot);
796d38604fSBaptiste Daroussin }
806d38604fSBaptiste Daroussin ohash_delete(&tag_data);
816d38604fSBaptiste Daroussin tag_data.info.free = NULL;
8261d06d6bSBaptiste Daroussin }
8361d06d6bSBaptiste Daroussin
8461d06d6bSBaptiste Daroussin /*
856d38604fSBaptiste Daroussin * Set a node where a term is defined,
86*c1c95addSBrooks Davis * unless the term is already defined at a lower priority.
8761d06d6bSBaptiste Daroussin */
8861d06d6bSBaptiste Daroussin void
tag_put(const char * s,int prio,struct roff_node * n)896d38604fSBaptiste Daroussin tag_put(const char *s, int prio, struct roff_node *n)
9061d06d6bSBaptiste Daroussin {
9161d06d6bSBaptiste Daroussin struct tag_entry *entry;
926d38604fSBaptiste Daroussin struct roff_node *nold;
93*c1c95addSBrooks Davis const char *se, *src;
94*c1c95addSBrooks Davis char *cpy;
9561d06d6bSBaptiste Daroussin size_t len;
9661d06d6bSBaptiste Daroussin unsigned int slot;
97*c1c95addSBrooks Davis int changed;
9861d06d6bSBaptiste Daroussin
996d38604fSBaptiste Daroussin assert(prio <= TAG_FALLBACK);
1007295610fSBaptiste Daroussin
101*c1c95addSBrooks Davis /*
102*c1c95addSBrooks Davis * If the node is already tagged, the existing tag is
103*c1c95addSBrooks Davis * explicit and we are now about to add an implicit tag.
104*c1c95addSBrooks Davis * Don't do that; just skip implicit tagging if the author
105*c1c95addSBrooks Davis * specified an explicit tag.
106*c1c95addSBrooks Davis */
107*c1c95addSBrooks Davis
108*c1c95addSBrooks Davis if (n->flags & NODE_ID)
109*c1c95addSBrooks Davis return;
110*c1c95addSBrooks Davis
111*c1c95addSBrooks Davis /* Determine the implicit tag. */
112*c1c95addSBrooks Davis
113*c1c95addSBrooks Davis changed = 1;
1146d38604fSBaptiste Daroussin if (s == NULL) {
1156d38604fSBaptiste Daroussin if (n->child == NULL || n->child->type != ROFFT_TEXT)
1166d38604fSBaptiste Daroussin return;
1176d38604fSBaptiste Daroussin s = n->child->string;
1186d38604fSBaptiste Daroussin switch (s[0]) {
1196d38604fSBaptiste Daroussin case '-':
1206d38604fSBaptiste Daroussin s++;
1216d38604fSBaptiste Daroussin break;
1226d38604fSBaptiste Daroussin case '\\':
1236d38604fSBaptiste Daroussin switch (s[1]) {
1246d38604fSBaptiste Daroussin case '&':
1256d38604fSBaptiste Daroussin case '-':
1266d38604fSBaptiste Daroussin case 'e':
12761d06d6bSBaptiste Daroussin s += 2;
1286d38604fSBaptiste Daroussin break;
1296d38604fSBaptiste Daroussin default:
130*c1c95addSBrooks Davis return;
1316d38604fSBaptiste Daroussin }
1326d38604fSBaptiste Daroussin break;
1336d38604fSBaptiste Daroussin default:
134*c1c95addSBrooks Davis changed = 0;
1356d38604fSBaptiste Daroussin break;
1366d38604fSBaptiste Daroussin }
1376d38604fSBaptiste Daroussin }
1387295610fSBaptiste Daroussin
1397295610fSBaptiste Daroussin /*
140*c1c95addSBrooks Davis * Translate \- and ASCII_HYPH to plain '-'.
14145a5aec3SBaptiste Daroussin * Skip whitespace and escapes and whatever follows,
1427295610fSBaptiste Daroussin * and if there is any, downgrade the priority.
1437295610fSBaptiste Daroussin */
1447295610fSBaptiste Daroussin
145*c1c95addSBrooks Davis cpy = mandoc_malloc(strlen(s) + 1);
146*c1c95addSBrooks Davis for (src = s, len = 0; *src != '\0'; src++, len++) {
147*c1c95addSBrooks Davis switch (*src) {
148*c1c95addSBrooks Davis case '\t':
149*c1c95addSBrooks Davis case ' ':
150*c1c95addSBrooks Davis changed = 1;
151*c1c95addSBrooks Davis break;
152*c1c95addSBrooks Davis case ASCII_HYPH:
153*c1c95addSBrooks Davis cpy[len] = '-';
154*c1c95addSBrooks Davis changed = 1;
155*c1c95addSBrooks Davis continue;
156*c1c95addSBrooks Davis case '\\':
157*c1c95addSBrooks Davis if (src[1] != '-')
158*c1c95addSBrooks Davis break;
159*c1c95addSBrooks Davis src++;
160*c1c95addSBrooks Davis changed = 1;
161*c1c95addSBrooks Davis /* FALLTHROUGH */
162*c1c95addSBrooks Davis default:
163*c1c95addSBrooks Davis cpy[len] = *src;
164*c1c95addSBrooks Davis continue;
165*c1c95addSBrooks Davis }
166*c1c95addSBrooks Davis break;
167*c1c95addSBrooks Davis }
1687295610fSBaptiste Daroussin if (len == 0)
169*c1c95addSBrooks Davis goto out;
170*c1c95addSBrooks Davis cpy[len] = '\0';
17161d06d6bSBaptiste Daroussin
172*c1c95addSBrooks Davis if (*src != '\0' && prio < TAG_WEAK)
1736d38604fSBaptiste Daroussin prio = TAG_WEAK;
1747295610fSBaptiste Daroussin
175*c1c95addSBrooks Davis s = cpy;
176*c1c95addSBrooks Davis se = cpy + len;
1777295610fSBaptiste Daroussin slot = ohash_qlookupi(&tag_data, s, &se);
17861d06d6bSBaptiste Daroussin entry = ohash_find(&tag_data, slot);
17961d06d6bSBaptiste Daroussin
18061d06d6bSBaptiste Daroussin /* Build a new entry. */
18161d06d6bSBaptiste Daroussin
1826d38604fSBaptiste Daroussin if (entry == NULL) {
1837295610fSBaptiste Daroussin entry = mandoc_malloc(sizeof(*entry) + len + 1);
184*c1c95addSBrooks Davis memcpy(entry->s, s, len + 1);
1856d38604fSBaptiste Daroussin entry->nodes = NULL;
1866d38604fSBaptiste Daroussin entry->maxnodes = entry->nnodes = 0;
18761d06d6bSBaptiste Daroussin ohash_insert(&tag_data, slot, entry);
1886d38604fSBaptiste Daroussin }
18961d06d6bSBaptiste Daroussin
1907295610fSBaptiste Daroussin /*
1916d38604fSBaptiste Daroussin * Lower priority numbers take precedence.
1926d38604fSBaptiste Daroussin * If a better entry is already present, ignore the new one.
1937295610fSBaptiste Daroussin */
19461d06d6bSBaptiste Daroussin
1956d38604fSBaptiste Daroussin else if (entry->prio < prio)
196*c1c95addSBrooks Davis goto out;
19761d06d6bSBaptiste Daroussin
1986d38604fSBaptiste Daroussin /*
1996d38604fSBaptiste Daroussin * If the existing entry is worse, clear it.
2006d38604fSBaptiste Daroussin * In addition, a tag with priority TAG_FALLBACK
2016d38604fSBaptiste Daroussin * is only used if the tag occurs exactly once.
2026d38604fSBaptiste Daroussin */
20361d06d6bSBaptiste Daroussin
2046d38604fSBaptiste Daroussin else if (entry->prio > prio || prio == TAG_FALLBACK) {
2056d38604fSBaptiste Daroussin while (entry->nnodes > 0) {
2066d38604fSBaptiste Daroussin nold = entry->nodes[--entry->nnodes];
2076d38604fSBaptiste Daroussin nold->flags &= ~NODE_ID;
2086d38604fSBaptiste Daroussin free(nold->tag);
2096d38604fSBaptiste Daroussin nold->tag = NULL;
2106d38604fSBaptiste Daroussin }
2116d38604fSBaptiste Daroussin if (prio == TAG_FALLBACK) {
2126d38604fSBaptiste Daroussin entry->prio = TAG_DELETE;
213*c1c95addSBrooks Davis goto out;
2146d38604fSBaptiste Daroussin }
21561d06d6bSBaptiste Daroussin }
21661d06d6bSBaptiste Daroussin
2176d38604fSBaptiste Daroussin /* Remember the new node. */
21861d06d6bSBaptiste Daroussin
2196d38604fSBaptiste Daroussin if (entry->maxnodes == entry->nnodes) {
2206d38604fSBaptiste Daroussin entry->maxnodes += 4;
2216d38604fSBaptiste Daroussin entry->nodes = mandoc_reallocarray(entry->nodes,
2226d38604fSBaptiste Daroussin entry->maxnodes, sizeof(*entry->nodes));
22361d06d6bSBaptiste Daroussin }
2246d38604fSBaptiste Daroussin entry->nodes[entry->nnodes++] = n;
22561d06d6bSBaptiste Daroussin entry->prio = prio;
2266d38604fSBaptiste Daroussin n->flags |= NODE_ID;
227*c1c95addSBrooks Davis if (changed) {
2286d38604fSBaptiste Daroussin assert(n->tag == NULL);
2296d38604fSBaptiste Daroussin n->tag = mandoc_strndup(s, len);
2306d38604fSBaptiste Daroussin }
231*c1c95addSBrooks Davis
232*c1c95addSBrooks Davis out:
233*c1c95addSBrooks Davis free(cpy);
2346d38604fSBaptiste Daroussin }
2356d38604fSBaptiste Daroussin
2366d38604fSBaptiste Daroussin int
tag_exists(const char * tag)2376d38604fSBaptiste Daroussin tag_exists(const char *tag)
2386d38604fSBaptiste Daroussin {
2396d38604fSBaptiste Daroussin return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
24061d06d6bSBaptiste Daroussin }
24161d06d6bSBaptiste Daroussin
24261d06d6bSBaptiste Daroussin /*
2436d38604fSBaptiste Daroussin * For in-line elements, move the link target
2446d38604fSBaptiste Daroussin * to the enclosing paragraph when appropriate.
2456d38604fSBaptiste Daroussin */
2466d38604fSBaptiste Daroussin static void
tag_move_id(struct roff_node * n)2476d38604fSBaptiste Daroussin tag_move_id(struct roff_node *n)
2486d38604fSBaptiste Daroussin {
2496d38604fSBaptiste Daroussin struct roff_node *np;
2506d38604fSBaptiste Daroussin
2516d38604fSBaptiste Daroussin np = n;
2526d38604fSBaptiste Daroussin for (;;) {
2536d38604fSBaptiste Daroussin if (np->prev != NULL)
2546d38604fSBaptiste Daroussin np = np->prev;
2556d38604fSBaptiste Daroussin else if ((np = np->parent) == NULL)
2566d38604fSBaptiste Daroussin return;
2576d38604fSBaptiste Daroussin switch (np->tok) {
2586d38604fSBaptiste Daroussin case MDOC_It:
2596d38604fSBaptiste Daroussin switch (np->parent->parent->norm->Bl.type) {
2606d38604fSBaptiste Daroussin case LIST_column:
2616d38604fSBaptiste Daroussin /* Target the ROFFT_BLOCK = <tr>. */
2626d38604fSBaptiste Daroussin np = np->parent;
2636d38604fSBaptiste Daroussin break;
2646d38604fSBaptiste Daroussin case LIST_diag:
2656d38604fSBaptiste Daroussin case LIST_hang:
2666d38604fSBaptiste Daroussin case LIST_inset:
2676d38604fSBaptiste Daroussin case LIST_ohang:
2686d38604fSBaptiste Daroussin case LIST_tag:
2696d38604fSBaptiste Daroussin /* Target the ROFFT_HEAD = <dt>. */
2706d38604fSBaptiste Daroussin np = np->parent->head;
2716d38604fSBaptiste Daroussin break;
2726d38604fSBaptiste Daroussin default:
2736d38604fSBaptiste Daroussin /* Target the ROFF_BODY = <li>. */
2746d38604fSBaptiste Daroussin break;
2756d38604fSBaptiste Daroussin }
2766d38604fSBaptiste Daroussin /* FALLTHROUGH */
2776d38604fSBaptiste Daroussin case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */
2786d38604fSBaptiste Daroussin if (np->tag == NULL) {
2796d38604fSBaptiste Daroussin np->tag = mandoc_strdup(n->tag == NULL ?
2806d38604fSBaptiste Daroussin n->child->string : n->tag);
2816d38604fSBaptiste Daroussin np->flags |= NODE_ID;
2826d38604fSBaptiste Daroussin n->flags &= ~NODE_ID;
2836d38604fSBaptiste Daroussin }
2846d38604fSBaptiste Daroussin return;
2856d38604fSBaptiste Daroussin case MDOC_Sh:
2866d38604fSBaptiste Daroussin case MDOC_Ss:
2876d38604fSBaptiste Daroussin case MDOC_Bd:
2886d38604fSBaptiste Daroussin case MDOC_Bl:
2896d38604fSBaptiste Daroussin case MDOC_D1:
2906d38604fSBaptiste Daroussin case MDOC_Dl:
2916d38604fSBaptiste Daroussin case MDOC_Rs:
2926d38604fSBaptiste Daroussin /* Do not move past major blocks. */
2936d38604fSBaptiste Daroussin return;
2946d38604fSBaptiste Daroussin default:
2956d38604fSBaptiste Daroussin /*
2966d38604fSBaptiste Daroussin * Move past in-line content and partial
2976d38604fSBaptiste Daroussin * blocks, for example .It Xo or .It Bq Er.
2986d38604fSBaptiste Daroussin */
2996d38604fSBaptiste Daroussin break;
3006d38604fSBaptiste Daroussin }
3016d38604fSBaptiste Daroussin }
3026d38604fSBaptiste Daroussin }
3036d38604fSBaptiste Daroussin
3046d38604fSBaptiste Daroussin /*
3056d38604fSBaptiste Daroussin * When a paragraph is tagged and starts with text,
3066d38604fSBaptiste Daroussin * move the permalink to the first few words.
3076d38604fSBaptiste Daroussin */
3086d38604fSBaptiste Daroussin static void
tag_move_href(struct roff_man * man,struct roff_node * n,const char * tag)3096d38604fSBaptiste Daroussin tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
3106d38604fSBaptiste Daroussin {
3116d38604fSBaptiste Daroussin char *cp;
3126d38604fSBaptiste Daroussin
3136d38604fSBaptiste Daroussin if (n == NULL || n->type != ROFFT_TEXT ||
3146d38604fSBaptiste Daroussin *n->string == '\0' || *n->string == ' ')
3156d38604fSBaptiste Daroussin return;
3166d38604fSBaptiste Daroussin
3176d38604fSBaptiste Daroussin cp = n->string;
3186d38604fSBaptiste Daroussin while (cp != NULL && cp - n->string < 5)
3196d38604fSBaptiste Daroussin cp = strchr(cp + 1, ' ');
3206d38604fSBaptiste Daroussin
3216d38604fSBaptiste Daroussin /* If the first text node is longer, split it. */
3226d38604fSBaptiste Daroussin
3236d38604fSBaptiste Daroussin if (cp != NULL && cp[1] != '\0') {
3246d38604fSBaptiste Daroussin man->last = n;
3256d38604fSBaptiste Daroussin man->next = ROFF_NEXT_SIBLING;
3266d38604fSBaptiste Daroussin roff_word_alloc(man, n->line,
3276d38604fSBaptiste Daroussin n->pos + (cp - n->string), cp + 1);
3286d38604fSBaptiste Daroussin man->last->flags = n->flags & ~NODE_LINE;
3296d38604fSBaptiste Daroussin *cp = '\0';
3306d38604fSBaptiste Daroussin }
3316d38604fSBaptiste Daroussin
3326d38604fSBaptiste Daroussin assert(n->tag == NULL);
3336d38604fSBaptiste Daroussin n->tag = mandoc_strdup(tag);
3346d38604fSBaptiste Daroussin n->flags |= NODE_HREF;
3356d38604fSBaptiste Daroussin }
3366d38604fSBaptiste Daroussin
3376d38604fSBaptiste Daroussin /*
3386d38604fSBaptiste Daroussin * When all tags have been set, decide where to put
3396d38604fSBaptiste Daroussin * the associated permalinks, and maybe move some tags
3406d38604fSBaptiste Daroussin * to the beginning of the respective paragraphs.
34161d06d6bSBaptiste Daroussin */
34261d06d6bSBaptiste Daroussin void
tag_postprocess(struct roff_man * man,struct roff_node * n)3436d38604fSBaptiste Daroussin tag_postprocess(struct roff_man *man, struct roff_node *n)
34461d06d6bSBaptiste Daroussin {
3456d38604fSBaptiste Daroussin if (n->flags & NODE_ID) {
3466d38604fSBaptiste Daroussin switch (n->tok) {
3476d38604fSBaptiste Daroussin case MDOC_Pp:
3486d38604fSBaptiste Daroussin tag_move_href(man, n->next, n->tag);
3496d38604fSBaptiste Daroussin break;
3506d38604fSBaptiste Daroussin case MDOC_Bd:
3516d38604fSBaptiste Daroussin case MDOC_D1:
3526d38604fSBaptiste Daroussin case MDOC_Dl:
3536d38604fSBaptiste Daroussin tag_move_href(man, n->child, n->tag);
3546d38604fSBaptiste Daroussin break;
3556d38604fSBaptiste Daroussin case MDOC_Bl:
3566d38604fSBaptiste Daroussin /* XXX No permalink for now. */
3576d38604fSBaptiste Daroussin break;
3586d38604fSBaptiste Daroussin default:
3596d38604fSBaptiste Daroussin if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
3606d38604fSBaptiste Daroussin tag_move_id(n);
3616d38604fSBaptiste Daroussin if (n->tok != MDOC_Tg)
3626d38604fSBaptiste Daroussin n->flags |= NODE_HREF;
3636d38604fSBaptiste Daroussin else if ((n->flags & NODE_ID) == 0) {
3646d38604fSBaptiste Daroussin n->flags |= NODE_NOPRT;
3656d38604fSBaptiste Daroussin free(n->tag);
3666d38604fSBaptiste Daroussin n->tag = NULL;
3677295610fSBaptiste Daroussin }
3686d38604fSBaptiste Daroussin break;
36945a5aec3SBaptiste Daroussin }
37045a5aec3SBaptiste Daroussin }
3716d38604fSBaptiste Daroussin for (n = n->child; n != NULL; n = n->next)
3726d38604fSBaptiste Daroussin tag_postprocess(man, n);
37361d06d6bSBaptiste Daroussin }
374