1 /* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Functions to tag syntax tree nodes. 19 * For internal use by mandoc(1) validation modules only. 20 */ 21 #include "config.h" 22 23 #include <sys/types.h> 24 25 #include <assert.h> 26 #include <limits.h> 27 #include <stddef.h> 28 #include <stdint.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 33 #include "mandoc_aux.h" 34 #include "mandoc_ohash.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "mdoc.h" 38 #include "roff_int.h" 39 #include "tag.h" 40 41 struct tag_entry { 42 struct roff_node **nodes; 43 size_t maxnodes; 44 size_t nnodes; 45 int prio; 46 char s[]; 47 }; 48 49 static void tag_move_href(struct roff_man *, 50 struct roff_node *, const char *); 51 static void tag_move_id(struct roff_node *); 52 53 static struct ohash tag_data; 54 55 56 /* 57 * Set up the ohash table to collect nodes 58 * where various marked-up terms are documented. 59 */ 60 void 61 tag_alloc(void) 62 { 63 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); 64 } 65 66 void 67 tag_free(void) 68 { 69 struct tag_entry *entry; 70 unsigned int slot; 71 72 if (tag_data.info.free == NULL) 73 return; 74 entry = ohash_first(&tag_data, &slot); 75 while (entry != NULL) { 76 free(entry->nodes); 77 free(entry); 78 entry = ohash_next(&tag_data, &slot); 79 } 80 ohash_delete(&tag_data); 81 tag_data.info.free = NULL; 82 } 83 84 /* 85 * Set a node where a term is defined, 86 * unless the term is already defined at a lower priority. 87 */ 88 void 89 tag_put(const char *s, int prio, struct roff_node *n) 90 { 91 struct tag_entry *entry; 92 struct roff_node *nold; 93 const char *se, *src; 94 char *cpy; 95 size_t len; 96 unsigned int slot; 97 int changed; 98 99 assert(prio <= TAG_FALLBACK); 100 101 /* 102 * If the node is already tagged, the existing tag is 103 * explicit and we are now about to add an implicit tag. 104 * Don't do that; just skip implicit tagging if the author 105 * specified an explicit tag. 106 */ 107 108 if (n->flags & NODE_ID) 109 return; 110 111 /* Determine the implicit tag. */ 112 113 changed = 1; 114 if (s == NULL) { 115 if (n->child == NULL || n->child->type != ROFFT_TEXT) 116 return; 117 s = n->child->string; 118 switch (s[0]) { 119 case '-': 120 s++; 121 break; 122 case '\\': 123 switch (s[1]) { 124 case '&': 125 case '-': 126 case 'e': 127 s += 2; 128 break; 129 default: 130 return; 131 } 132 break; 133 default: 134 changed = 0; 135 break; 136 } 137 } 138 139 /* 140 * Translate \- and ASCII_HYPH to plain '-'. 141 * Skip whitespace and escapes and whatever follows, 142 * and if there is any, downgrade the priority. 143 */ 144 145 cpy = mandoc_malloc(strlen(s) + 1); 146 for (src = s, len = 0; *src != '\0'; src++, len++) { 147 switch (*src) { 148 case '\t': 149 case ' ': 150 changed = 1; 151 break; 152 case ASCII_HYPH: 153 cpy[len] = '-'; 154 changed = 1; 155 continue; 156 case '\\': 157 if (src[1] != '-') 158 break; 159 src++; 160 changed = 1; 161 /* FALLTHROUGH */ 162 default: 163 cpy[len] = *src; 164 continue; 165 } 166 break; 167 } 168 if (len == 0) 169 goto out; 170 cpy[len] = '\0'; 171 172 if (*src != '\0' && prio < TAG_WEAK) 173 prio = TAG_WEAK; 174 175 s = cpy; 176 se = cpy + len; 177 slot = ohash_qlookupi(&tag_data, s, &se); 178 entry = ohash_find(&tag_data, slot); 179 180 /* Build a new entry. */ 181 182 if (entry == NULL) { 183 entry = mandoc_malloc(sizeof(*entry) + len + 1); 184 memcpy(entry->s, s, len + 1); 185 entry->nodes = NULL; 186 entry->maxnodes = entry->nnodes = 0; 187 ohash_insert(&tag_data, slot, entry); 188 } 189 190 /* 191 * Lower priority numbers take precedence. 192 * If a better entry is already present, ignore the new one. 193 */ 194 195 else if (entry->prio < prio) 196 goto out; 197 198 /* 199 * If the existing entry is worse, clear it. 200 * In addition, a tag with priority TAG_FALLBACK 201 * is only used if the tag occurs exactly once. 202 */ 203 204 else if (entry->prio > prio || prio == TAG_FALLBACK) { 205 while (entry->nnodes > 0) { 206 nold = entry->nodes[--entry->nnodes]; 207 nold->flags &= ~NODE_ID; 208 free(nold->tag); 209 nold->tag = NULL; 210 } 211 if (prio == TAG_FALLBACK) { 212 entry->prio = TAG_DELETE; 213 goto out; 214 } 215 } 216 217 /* Remember the new node. */ 218 219 if (entry->maxnodes == entry->nnodes) { 220 entry->maxnodes += 4; 221 entry->nodes = mandoc_reallocarray(entry->nodes, 222 entry->maxnodes, sizeof(*entry->nodes)); 223 } 224 entry->nodes[entry->nnodes++] = n; 225 entry->prio = prio; 226 n->flags |= NODE_ID; 227 if (changed) { 228 assert(n->tag == NULL); 229 n->tag = mandoc_strndup(s, len); 230 } 231 232 out: 233 free(cpy); 234 } 235 236 int 237 tag_exists(const char *tag) 238 { 239 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; 240 } 241 242 /* 243 * For in-line elements, move the link target 244 * to the enclosing paragraph when appropriate. 245 */ 246 static void 247 tag_move_id(struct roff_node *n) 248 { 249 struct roff_node *np; 250 251 np = n; 252 for (;;) { 253 if (np->prev != NULL) 254 np = np->prev; 255 else if ((np = np->parent) == NULL) 256 return; 257 switch (np->tok) { 258 case MDOC_It: 259 switch (np->parent->parent->norm->Bl.type) { 260 case LIST_column: 261 /* Target the ROFFT_BLOCK = <tr>. */ 262 np = np->parent; 263 break; 264 case LIST_diag: 265 case LIST_hang: 266 case LIST_inset: 267 case LIST_ohang: 268 case LIST_tag: 269 /* Target the ROFFT_HEAD = <dt>. */ 270 np = np->parent->head; 271 break; 272 default: 273 /* Target the ROFF_BODY = <li>. */ 274 break; 275 } 276 /* FALLTHROUGH */ 277 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ 278 if (np->tag == NULL) { 279 np->tag = mandoc_strdup(n->tag == NULL ? 280 n->child->string : n->tag); 281 np->flags |= NODE_ID; 282 n->flags &= ~NODE_ID; 283 } 284 return; 285 case MDOC_Sh: 286 case MDOC_Ss: 287 case MDOC_Bd: 288 case MDOC_Bl: 289 case MDOC_D1: 290 case MDOC_Dl: 291 case MDOC_Rs: 292 /* Do not move past major blocks. */ 293 return; 294 default: 295 /* 296 * Move past in-line content and partial 297 * blocks, for example .It Xo or .It Bq Er. 298 */ 299 break; 300 } 301 } 302 } 303 304 /* 305 * When a paragraph is tagged and starts with text, 306 * move the permalink to the first few words. 307 */ 308 static void 309 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) 310 { 311 char *cp; 312 313 if (n == NULL || n->type != ROFFT_TEXT || 314 *n->string == '\0' || *n->string == ' ') 315 return; 316 317 cp = n->string; 318 while (cp != NULL && cp - n->string < 5) 319 cp = strchr(cp + 1, ' '); 320 321 /* If the first text node is longer, split it. */ 322 323 if (cp != NULL && cp[1] != '\0') { 324 man->last = n; 325 man->next = ROFF_NEXT_SIBLING; 326 roff_word_alloc(man, n->line, 327 n->pos + (cp - n->string), cp + 1); 328 man->last->flags = n->flags & ~NODE_LINE; 329 *cp = '\0'; 330 } 331 332 assert(n->tag == NULL); 333 n->tag = mandoc_strdup(tag); 334 n->flags |= NODE_HREF; 335 } 336 337 /* 338 * When all tags have been set, decide where to put 339 * the associated permalinks, and maybe move some tags 340 * to the beginning of the respective paragraphs. 341 */ 342 void 343 tag_postprocess(struct roff_man *man, struct roff_node *n) 344 { 345 if (n->flags & NODE_ID) { 346 switch (n->tok) { 347 case MDOC_Pp: 348 tag_move_href(man, n->next, n->tag); 349 break; 350 case MDOC_Bd: 351 case MDOC_D1: 352 case MDOC_Dl: 353 tag_move_href(man, n->child, n->tag); 354 break; 355 case MDOC_Bl: 356 /* XXX No permalink for now. */ 357 break; 358 default: 359 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) 360 tag_move_id(n); 361 if (n->tok != MDOC_Tg) 362 n->flags |= NODE_HREF; 363 else if ((n->flags & NODE_ID) == 0) { 364 n->flags |= NODE_NOPRT; 365 free(n->tag); 366 n->tag = NULL; 367 } 368 break; 369 } 370 } 371 for (n = n->child; n != NULL; n = n->next) 372 tag_postprocess(man, n); 373 } 374