1 /* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Functions to tag syntax tree nodes. 18 * For internal use by mandoc(1) validation modules only. 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <limits.h> 26 #include <stddef.h> 27 #include <stdint.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "roff.h" 34 #include "mdoc.h" 35 #include "roff_int.h" 36 #include "tag.h" 37 38 struct tag_entry { 39 struct roff_node **nodes; 40 size_t maxnodes; 41 size_t nnodes; 42 int prio; 43 char s[]; 44 }; 45 46 static void tag_move_href(struct roff_man *, 47 struct roff_node *, const char *); 48 static void tag_move_id(struct roff_node *); 49 50 static struct ohash tag_data; 51 52 53 /* 54 * Set up the ohash table to collect nodes 55 * where various marked-up terms are documented. 56 */ 57 void 58 tag_alloc(void) 59 { 60 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); 61 } 62 63 void 64 tag_free(void) 65 { 66 struct tag_entry *entry; 67 unsigned int slot; 68 69 if (tag_data.info.free == NULL) 70 return; 71 entry = ohash_first(&tag_data, &slot); 72 while (entry != NULL) { 73 free(entry->nodes); 74 free(entry); 75 entry = ohash_next(&tag_data, &slot); 76 } 77 ohash_delete(&tag_data); 78 tag_data.info.free = NULL; 79 } 80 81 /* 82 * Set a node where a term is defined, 83 * unless it is already defined at a lower priority. 84 */ 85 void 86 tag_put(const char *s, int prio, struct roff_node *n) 87 { 88 struct tag_entry *entry; 89 struct roff_node *nold; 90 const char *se; 91 size_t len; 92 unsigned int slot; 93 94 assert(prio <= TAG_FALLBACK); 95 96 if (s == NULL) { 97 if (n->child == NULL || n->child->type != ROFFT_TEXT) 98 return; 99 s = n->child->string; 100 switch (s[0]) { 101 case '-': 102 s++; 103 break; 104 case '\\': 105 switch (s[1]) { 106 case '&': 107 case '-': 108 case 'e': 109 s += 2; 110 break; 111 default: 112 break; 113 } 114 break; 115 default: 116 break; 117 } 118 } 119 120 /* 121 * Skip whitespace and escapes and whatever follows, 122 * and if there is any, downgrade the priority. 123 */ 124 125 len = strcspn(s, " \t\\"); 126 if (len == 0) 127 return; 128 129 se = s + len; 130 if (*se != '\0' && prio < TAG_WEAK) 131 prio = TAG_WEAK; 132 133 slot = ohash_qlookupi(&tag_data, s, &se); 134 entry = ohash_find(&tag_data, slot); 135 136 /* Build a new entry. */ 137 138 if (entry == NULL) { 139 entry = mandoc_malloc(sizeof(*entry) + len + 1); 140 memcpy(entry->s, s, len); 141 entry->s[len] = '\0'; 142 entry->nodes = NULL; 143 entry->maxnodes = entry->nnodes = 0; 144 ohash_insert(&tag_data, slot, entry); 145 } 146 147 /* 148 * Lower priority numbers take precedence. 149 * If a better entry is already present, ignore the new one. 150 */ 151 152 else if (entry->prio < prio) 153 return; 154 155 /* 156 * If the existing entry is worse, clear it. 157 * In addition, a tag with priority TAG_FALLBACK 158 * is only used if the tag occurs exactly once. 159 */ 160 161 else if (entry->prio > prio || prio == TAG_FALLBACK) { 162 while (entry->nnodes > 0) { 163 nold = entry->nodes[--entry->nnodes]; 164 nold->flags &= ~NODE_ID; 165 free(nold->tag); 166 nold->tag = NULL; 167 } 168 if (prio == TAG_FALLBACK) { 169 entry->prio = TAG_DELETE; 170 return; 171 } 172 } 173 174 /* Remember the new node. */ 175 176 if (entry->maxnodes == entry->nnodes) { 177 entry->maxnodes += 4; 178 entry->nodes = mandoc_reallocarray(entry->nodes, 179 entry->maxnodes, sizeof(*entry->nodes)); 180 } 181 entry->nodes[entry->nnodes++] = n; 182 entry->prio = prio; 183 n->flags |= NODE_ID; 184 if (n->child == NULL || n->child->string != s || *se != '\0') { 185 assert(n->tag == NULL); 186 n->tag = mandoc_strndup(s, len); 187 } 188 } 189 190 int 191 tag_exists(const char *tag) 192 { 193 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; 194 } 195 196 /* 197 * For in-line elements, move the link target 198 * to the enclosing paragraph when appropriate. 199 */ 200 static void 201 tag_move_id(struct roff_node *n) 202 { 203 struct roff_node *np; 204 205 np = n; 206 for (;;) { 207 if (np->prev != NULL) 208 np = np->prev; 209 else if ((np = np->parent) == NULL) 210 return; 211 switch (np->tok) { 212 case MDOC_It: 213 switch (np->parent->parent->norm->Bl.type) { 214 case LIST_column: 215 /* Target the ROFFT_BLOCK = <tr>. */ 216 np = np->parent; 217 break; 218 case LIST_diag: 219 case LIST_hang: 220 case LIST_inset: 221 case LIST_ohang: 222 case LIST_tag: 223 /* Target the ROFFT_HEAD = <dt>. */ 224 np = np->parent->head; 225 break; 226 default: 227 /* Target the ROFF_BODY = <li>. */ 228 break; 229 } 230 /* FALLTHROUGH */ 231 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ 232 if (np->tag == NULL) { 233 np->tag = mandoc_strdup(n->tag == NULL ? 234 n->child->string : n->tag); 235 np->flags |= NODE_ID; 236 n->flags &= ~NODE_ID; 237 } 238 return; 239 case MDOC_Sh: 240 case MDOC_Ss: 241 case MDOC_Bd: 242 case MDOC_Bl: 243 case MDOC_D1: 244 case MDOC_Dl: 245 case MDOC_Rs: 246 /* Do not move past major blocks. */ 247 return; 248 default: 249 /* 250 * Move past in-line content and partial 251 * blocks, for example .It Xo or .It Bq Er. 252 */ 253 break; 254 } 255 } 256 } 257 258 /* 259 * When a paragraph is tagged and starts with text, 260 * move the permalink to the first few words. 261 */ 262 static void 263 tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) 264 { 265 char *cp; 266 267 if (n == NULL || n->type != ROFFT_TEXT || 268 *n->string == '\0' || *n->string == ' ') 269 return; 270 271 cp = n->string; 272 while (cp != NULL && cp - n->string < 5) 273 cp = strchr(cp + 1, ' '); 274 275 /* If the first text node is longer, split it. */ 276 277 if (cp != NULL && cp[1] != '\0') { 278 man->last = n; 279 man->next = ROFF_NEXT_SIBLING; 280 roff_word_alloc(man, n->line, 281 n->pos + (cp - n->string), cp + 1); 282 man->last->flags = n->flags & ~NODE_LINE; 283 *cp = '\0'; 284 } 285 286 assert(n->tag == NULL); 287 n->tag = mandoc_strdup(tag); 288 n->flags |= NODE_HREF; 289 } 290 291 /* 292 * When all tags have been set, decide where to put 293 * the associated permalinks, and maybe move some tags 294 * to the beginning of the respective paragraphs. 295 */ 296 void 297 tag_postprocess(struct roff_man *man, struct roff_node *n) 298 { 299 if (n->flags & NODE_ID) { 300 switch (n->tok) { 301 case MDOC_Pp: 302 tag_move_href(man, n->next, n->tag); 303 break; 304 case MDOC_Bd: 305 case MDOC_D1: 306 case MDOC_Dl: 307 tag_move_href(man, n->child, n->tag); 308 break; 309 case MDOC_Bl: 310 /* XXX No permalink for now. */ 311 break; 312 default: 313 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) 314 tag_move_id(n); 315 if (n->tok != MDOC_Tg) 316 n->flags |= NODE_HREF; 317 else if ((n->flags & NODE_ID) == 0) { 318 n->flags |= NODE_NOPRT; 319 free(n->tag); 320 n->tag = NULL; 321 } 322 break; 323 } 324 } 325 for (n = n->child; n != NULL; n = n->next) 326 tag_postprocess(man, n); 327 } 328