1 /* $Id: dba.c,v 1.11 2025/09/24 13:13:30 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2016, 2017, 2025 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Allocation-based version of the mandoc database, for read-write access. 18 * The interface is defined in "dba.h". 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 #if HAVE_ENDIAN 24 #include <endian.h> 25 #elif HAVE_SYS_ENDIAN 26 #include <sys/endian.h> 27 #elif HAVE_NTOHL 28 #include <arpa/inet.h> 29 #endif 30 #include <errno.h> 31 #include <stddef.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #include "mandoc_aux.h" 38 #include "mandoc_ohash.h" 39 #include "mansearch.h" 40 #include "dba_write.h" 41 #include "dba_array.h" 42 #include "dba.h" 43 44 struct macro_entry { 45 struct dba_array *pages; 46 char value[]; 47 }; 48 49 static void *prepend(const char *, char); 50 static void dba_pages_write(struct dba_array *); 51 static int compare_names(const void *, const void *); 52 static int compare_strings(const void *, const void *); 53 54 static struct macro_entry 55 *get_macro_entry(struct ohash *, const char *, int32_t); 56 static void dba_macros_write(struct dba_array *); 57 static void dba_macro_write(struct ohash *); 58 static int compare_entries(const void *, const void *); 59 60 61 /*** top-level functions **********************************************/ 62 63 struct dba * 64 dba_new(int32_t npages) 65 { 66 struct dba *dba; 67 struct ohash *macro; 68 int32_t im; 69 70 dba = mandoc_malloc(sizeof(*dba)); 71 dba->pages = dba_array_new(npages, DBA_GROW); 72 dba->macros = dba_array_new(MACRO_MAX, 0); 73 for (im = 0; im < MACRO_MAX; im++) { 74 macro = mandoc_malloc(sizeof(*macro)); 75 mandoc_ohash_init(macro, 4, 76 offsetof(struct macro_entry, value)); 77 dba_array_set(dba->macros, im, macro); 78 } 79 return dba; 80 } 81 82 void 83 dba_free(struct dba *dba) 84 { 85 struct dba_array *page; 86 struct ohash *macro; 87 struct macro_entry *entry; 88 unsigned int slot; 89 90 dba_array_FOREACH(dba->macros, macro) { 91 for (entry = ohash_first(macro, &slot); entry != NULL; 92 entry = ohash_next(macro, &slot)) { 93 dba_array_free(entry->pages); 94 free(entry); 95 } 96 ohash_delete(macro); 97 free(macro); 98 } 99 dba_array_free(dba->macros); 100 101 dba_array_undel(dba->pages); 102 dba_array_FOREACH(dba->pages, page) { 103 dba_array_free(dba_array_get(page, DBP_NAME)); 104 dba_array_free(dba_array_get(page, DBP_SECT)); 105 dba_array_free(dba_array_get(page, DBP_ARCH)); 106 free(dba_array_get(page, DBP_DESC)); 107 dba_array_free(dba_array_get(page, DBP_FILE)); 108 dba_array_free(page); 109 } 110 dba_array_free(dba->pages); 111 112 free(dba); 113 } 114 115 /* 116 * Write the complete mandoc database to disk; the format is: 117 * - One integer each for magic and version. 118 * - One pointer each to the macros table and to the final magic. 119 * - The pages table. 120 * - The macros table. 121 * - And at the very end, the magic integer again. 122 */ 123 int 124 dba_write(const char *fname, struct dba *dba) 125 { 126 int save_errno; 127 int32_t pos_end, pos_macros, pos_macros_ptr; 128 129 if (dba_open(fname) == -1) 130 return -1; 131 dba_int_write(MANDOCDB_MAGIC); 132 dba_int_write(MANDOCDB_VERSION); 133 pos_macros_ptr = dba_skip(1, 2); 134 dba_pages_write(dba->pages); 135 pos_macros = dba_tell(); 136 dba_macros_write(dba->macros); 137 pos_end = dba_tell(); 138 dba_int_write(MANDOCDB_MAGIC); 139 dba_seek(pos_macros_ptr); 140 dba_int_write(pos_macros); 141 dba_int_write(pos_end); 142 if (dba_close() == -1) { 143 save_errno = errno; 144 unlink(fname); 145 errno = save_errno; 146 return -1; 147 } 148 return 0; 149 } 150 151 152 /*** functions for handling pages *************************************/ 153 154 /* 155 * Create a new page and append it to the pages table. 156 */ 157 struct dba_array * 158 dba_page_new(struct dba_array *pages, const char *arch, 159 const char *desc, const char *file, enum form form) 160 { 161 struct dba_array *page, *entry; 162 163 page = dba_array_new(DBP_MAX, 0); 164 entry = dba_array_new(1, DBA_STR | DBA_GROW); 165 dba_array_add(page, entry); 166 entry = dba_array_new(1, DBA_STR | DBA_GROW); 167 dba_array_add(page, entry); 168 if (arch != NULL && *arch != '\0') { 169 entry = dba_array_new(1, DBA_STR | DBA_GROW); 170 dba_array_add(entry, (void *)arch); 171 } else 172 entry = NULL; 173 dba_array_add(page, entry); 174 dba_array_add(page, mandoc_strdup(desc)); 175 entry = dba_array_new(1, DBA_STR | DBA_GROW); 176 dba_array_add(entry, prepend(file, form)); 177 dba_array_add(page, entry); 178 dba_array_add(pages, page); 179 return page; 180 } 181 182 /* 183 * Add a section, architecture, or file name to an existing page. 184 * Passing the NULL pointer for the architecture makes the page MI. 185 * In that case, any earlier or later architectures are ignored. 186 */ 187 void 188 dba_page_add(struct dba_array *page, int32_t ie, const char *str) 189 { 190 struct dba_array *entries; 191 char *entry; 192 193 entries = dba_array_get(page, ie); 194 if (ie == DBP_ARCH) { 195 if (entries == NULL) 196 return; 197 if (str == NULL || *str == '\0') { 198 dba_array_free(entries); 199 dba_array_set(page, DBP_ARCH, NULL); 200 return; 201 } 202 } 203 if (*str == '\0') 204 return; 205 dba_array_FOREACH(entries, entry) { 206 if (ie == DBP_FILE && *entry < ' ') 207 entry++; 208 if (strcmp(entry, str) == 0) 209 return; 210 } 211 dba_array_add(entries, (void *)str); 212 } 213 214 /* 215 * Add an additional name to an existing page. 216 */ 217 void 218 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask) 219 { 220 struct dba_array *entries; 221 char *entry; 222 char maskbyte; 223 224 if (*name == '\0') 225 return; 226 maskbyte = mask & NAME_MASK; 227 entries = dba_array_get(page, DBP_NAME); 228 dba_array_FOREACH(entries, entry) { 229 if (strcmp(entry + 1, name) == 0) { 230 *entry |= maskbyte; 231 return; 232 } 233 } 234 dba_array_add(entries, prepend(name, maskbyte)); 235 } 236 237 /* 238 * Return a pointer to a temporary copy of instr with inbyte prepended. 239 */ 240 static void * 241 prepend(const char *instr, char inbyte) 242 { 243 static char *outstr = NULL; 244 static size_t outlen = 0; 245 size_t newlen; 246 247 newlen = strlen(instr) + 1; 248 if (newlen > outlen) { 249 outstr = mandoc_realloc(outstr, newlen + 1); 250 outlen = newlen; 251 } 252 *outstr = inbyte; 253 memcpy(outstr + 1, instr, newlen); 254 return outstr; 255 } 256 257 /* 258 * Write the pages table to disk; the format is: 259 * - One integer containing the number of pages. 260 * - For each page, five pointers to the names, sections, 261 * architectures, description, and file names of the page. 262 * MI pages write 0 instead of the architecture pointer. 263 * - One list each for names, sections, architectures, descriptions and 264 * file names. The description for each page ends with a NUL byte. 265 * For all the other lists, each string ends with a NUL byte, 266 * and the last string for a page ends with two NUL bytes. 267 * - To assure alignment of following integers, 268 * the end is padded with NUL bytes up to a multiple of four bytes. 269 */ 270 static void 271 dba_pages_write(struct dba_array *pages) 272 { 273 struct dba_array *page, *entry; 274 int32_t pos_pages, pos_end; 275 276 pos_pages = dba_array_writelen(pages, 5); 277 dba_array_FOREACH(pages, page) { 278 dba_array_setpos(page, DBP_NAME, dba_tell()); 279 entry = dba_array_get(page, DBP_NAME); 280 dba_array_sort(entry, compare_names); 281 dba_array_writelst(entry); 282 } 283 dba_array_FOREACH(pages, page) { 284 dba_array_setpos(page, DBP_SECT, dba_tell()); 285 entry = dba_array_get(page, DBP_SECT); 286 dba_array_sort(entry, compare_strings); 287 dba_array_writelst(entry); 288 } 289 dba_array_FOREACH(pages, page) { 290 if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) { 291 dba_array_setpos(page, DBP_ARCH, dba_tell()); 292 dba_array_sort(entry, compare_strings); 293 dba_array_writelst(entry); 294 } else 295 dba_array_setpos(page, DBP_ARCH, 0); 296 } 297 dba_array_FOREACH(pages, page) { 298 dba_array_setpos(page, DBP_DESC, dba_tell()); 299 dba_str_write(dba_array_get(page, DBP_DESC)); 300 } 301 dba_array_FOREACH(pages, page) { 302 dba_array_setpos(page, DBP_FILE, dba_tell()); 303 dba_array_writelst(dba_array_get(page, DBP_FILE)); 304 } 305 pos_end = dba_align(); 306 dba_seek(pos_pages); 307 dba_array_FOREACH(pages, page) 308 dba_array_writepos(page); 309 dba_seek(pos_end); 310 } 311 312 static int 313 compare_names(const void *vp1, const void *vp2) 314 { 315 const char *cp1, *cp2; 316 int diff; 317 318 cp1 = *(const char * const *)vp1; 319 cp2 = *(const char * const *)vp2; 320 return (diff = *cp2 - *cp1) ? diff : 321 (diff = strcasecmp(cp1 + 1, cp2 + 1)) ? diff : 322 strcmp(cp1 + 1, cp2 + 1); 323 } 324 325 static int 326 compare_strings(const void *vp1, const void *vp2) 327 { 328 const char *cp1, *cp2; 329 330 cp1 = *(const char * const *)vp1; 331 cp2 = *(const char * const *)vp2; 332 return strcmp(cp1, cp2); 333 } 334 335 /*** functions for handling macros ************************************/ 336 337 /* 338 * In the hash table for a single macro, look up an entry by 339 * the macro value or add an empty one if it doesn't exist yet. 340 */ 341 static struct macro_entry * 342 get_macro_entry(struct ohash *macro, const char *value, int32_t np) 343 { 344 struct macro_entry *entry; 345 size_t len; 346 unsigned int slot; 347 348 slot = ohash_qlookup(macro, value); 349 if ((entry = ohash_find(macro, slot)) == NULL) { 350 len = strlen(value) + 1; 351 entry = mandoc_malloc(sizeof(*entry) + len); 352 memcpy(&entry->value, value, len); 353 entry->pages = dba_array_new(np, DBA_GROW); 354 ohash_insert(macro, slot, entry); 355 } 356 return entry; 357 } 358 359 /* 360 * In addition to get_macro_entry(), add multiple page references, 361 * converting them from the on-disk format (byte offsets in the file) 362 * to page pointers in memory. 363 */ 364 void 365 dba_macro_new(struct dba *dba, int32_t im, const char *value, 366 const int32_t *pp) 367 { 368 struct macro_entry *entry; 369 const int32_t *ip; 370 int32_t np; 371 372 np = 0; 373 for (ip = pp; *ip; ip++) 374 np++; 375 376 entry = get_macro_entry(dba_array_get(dba->macros, im), value, np); 377 for (ip = pp; *ip; ip++) 378 dba_array_add(entry->pages, dba_array_get(dba->pages, 379 be32toh(*ip) / 5 / sizeof(*ip) - 1)); 380 } 381 382 /* 383 * In addition to get_macro_entry(), add one page reference, 384 * directly taking the in-memory page pointer as an argument. 385 */ 386 void 387 dba_macro_add(struct dba_array *macros, int32_t im, const char *value, 388 struct dba_array *page) 389 { 390 struct macro_entry *entry; 391 392 if (*value == '\0') 393 return; 394 entry = get_macro_entry(dba_array_get(macros, im), value, 1); 395 dba_array_add(entry->pages, page); 396 } 397 398 /* 399 * Write the macros table to disk; the format is: 400 * - The number of macro tables (actually, MACRO_MAX). 401 * - That number of pointers to the individual macro tables. 402 * - The individual macro tables. 403 */ 404 static void 405 dba_macros_write(struct dba_array *macros) 406 { 407 struct ohash *macro; 408 int32_t im, pos_macros, pos_end; 409 410 pos_macros = dba_array_writelen(macros, 1); 411 im = 0; 412 dba_array_FOREACH(macros, macro) { 413 dba_array_setpos(macros, im++, dba_tell()); 414 dba_macro_write(macro); 415 } 416 pos_end = dba_tell(); 417 dba_seek(pos_macros); 418 dba_array_writepos(macros); 419 dba_seek(pos_end); 420 } 421 422 /* 423 * Write one individual macro table to disk; the format is: 424 * - The number of entries in the table. 425 * - For each entry, two pointers, the first one to the value 426 * and the second one to the list of pages. 427 * - A list of values, each ending in a NUL byte. 428 * - To assure alignment of following integers, 429 * padding with NUL bytes up to a multiple of four bytes. 430 * - A list of pointers to pages, each list ending in a 0 integer. 431 */ 432 static void 433 dba_macro_write(struct ohash *macro) 434 { 435 struct macro_entry **entries, *entry; 436 struct dba_array *page; 437 int32_t *kpos, *dpos; 438 unsigned int ie, ne, slot; 439 int use; 440 int32_t addr, pos_macro, pos_end; 441 442 /* Temporary storage for filtering and sorting. */ 443 444 ne = ohash_entries(macro); 445 entries = mandoc_reallocarray(NULL, ne, sizeof(*entries)); 446 kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos)); 447 dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos)); 448 449 /* Build a list of non-empty entries and sort it. */ 450 451 ne = 0; 452 for (entry = ohash_first(macro, &slot); entry != NULL; 453 entry = ohash_next(macro, &slot)) { 454 use = 0; 455 dba_array_FOREACH(entry->pages, page) 456 if (dba_array_getpos(page)) 457 use = 1; 458 if (use) 459 entries[ne++] = entry; 460 } 461 qsort(entries, ne, sizeof(*entries), compare_entries); 462 463 /* Number of entries, and space for the pointer pairs. */ 464 465 dba_int_write(ne); 466 pos_macro = dba_skip(2, ne); 467 468 /* String table. */ 469 470 for (ie = 0; ie < ne; ie++) { 471 kpos[ie] = dba_tell(); 472 dba_str_write(entries[ie]->value); 473 } 474 dba_align(); 475 476 /* Pages table. */ 477 478 for (ie = 0; ie < ne; ie++) { 479 dpos[ie] = dba_tell(); 480 dba_array_FOREACH(entries[ie]->pages, page) 481 if ((addr = dba_array_getpos(page))) 482 dba_int_write(addr); 483 dba_int_write(0); 484 } 485 pos_end = dba_tell(); 486 487 /* Fill in the pointer pairs. */ 488 489 dba_seek(pos_macro); 490 for (ie = 0; ie < ne; ie++) { 491 dba_int_write(kpos[ie]); 492 dba_int_write(dpos[ie]); 493 } 494 dba_seek(pos_end); 495 496 free(entries); 497 free(kpos); 498 free(dpos); 499 } 500 501 static int 502 compare_entries(const void *vp1, const void *vp2) 503 { 504 const struct macro_entry *ep1, *ep2; 505 506 ep1 = *(const struct macro_entry * const *)vp1; 507 ep2 = *(const struct macro_entry * const *)vp2; 508 return strcmp(ep1->value, ep2->value); 509 } 510