1 /* $Id: dba.c,v 1.10 2017/02/17 14:43:54 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Allocation-based version of the mandoc database, for read-write access. 18 * The interface is defined in "dba.h". 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 #if HAVE_ENDIAN 24 #include <endian.h> 25 #elif HAVE_SYS_ENDIAN 26 #include <sys/endian.h> 27 #elif HAVE_NTOHL 28 #include <arpa/inet.h> 29 #endif 30 #include <errno.h> 31 #include <stddef.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #include "mandoc_aux.h" 38 #include "mandoc_ohash.h" 39 #include "mansearch.h" 40 #include "dba_write.h" 41 #include "dba_array.h" 42 #include "dba.h" 43 44 struct macro_entry { 45 struct dba_array *pages; 46 char value[]; 47 }; 48 49 static void *prepend(const char *, char); 50 static void dba_pages_write(struct dba_array *); 51 static int compare_names(const void *, const void *); 52 static int compare_strings(const void *, const void *); 53 54 static struct macro_entry 55 *get_macro_entry(struct ohash *, const char *, int32_t); 56 static void dba_macros_write(struct dba_array *); 57 static void dba_macro_write(struct ohash *); 58 static int compare_entries(const void *, const void *); 59 60 61 /*** top-level functions **********************************************/ 62 63 struct dba * 64 dba_new(int32_t npages) 65 { 66 struct dba *dba; 67 struct ohash *macro; 68 int32_t im; 69 70 dba = mandoc_malloc(sizeof(*dba)); 71 dba->pages = dba_array_new(npages, DBA_GROW); 72 dba->macros = dba_array_new(MACRO_MAX, 0); 73 for (im = 0; im < MACRO_MAX; im++) { 74 macro = mandoc_malloc(sizeof(*macro)); 75 mandoc_ohash_init(macro, 4, 76 offsetof(struct macro_entry, value)); 77 dba_array_set(dba->macros, im, macro); 78 } 79 return dba; 80 } 81 82 void 83 dba_free(struct dba *dba) 84 { 85 struct dba_array *page; 86 struct ohash *macro; 87 struct macro_entry *entry; 88 unsigned int slot; 89 90 dba_array_FOREACH(dba->macros, macro) { 91 for (entry = ohash_first(macro, &slot); entry != NULL; 92 entry = ohash_next(macro, &slot)) { 93 dba_array_free(entry->pages); 94 free(entry); 95 } 96 ohash_delete(macro); 97 free(macro); 98 } 99 dba_array_free(dba->macros); 100 101 dba_array_undel(dba->pages); 102 dba_array_FOREACH(dba->pages, page) { 103 dba_array_free(dba_array_get(page, DBP_NAME)); 104 dba_array_free(dba_array_get(page, DBP_SECT)); 105 dba_array_free(dba_array_get(page, DBP_ARCH)); 106 free(dba_array_get(page, DBP_DESC)); 107 dba_array_free(dba_array_get(page, DBP_FILE)); 108 dba_array_free(page); 109 } 110 dba_array_free(dba->pages); 111 112 free(dba); 113 } 114 115 /* 116 * Write the complete mandoc database to disk; the format is: 117 * - One integer each for magic and version. 118 * - One pointer each to the macros table and to the final magic. 119 * - The pages table. 120 * - The macros table. 121 * - And at the very end, the magic integer again. 122 */ 123 int 124 dba_write(const char *fname, struct dba *dba) 125 { 126 int save_errno; 127 int32_t pos_end, pos_macros, pos_macros_ptr; 128 129 if (dba_open(fname) == -1) 130 return -1; 131 dba_int_write(MANDOCDB_MAGIC); 132 dba_int_write(MANDOCDB_VERSION); 133 pos_macros_ptr = dba_skip(1, 2); 134 dba_pages_write(dba->pages); 135 pos_macros = dba_tell(); 136 dba_macros_write(dba->macros); 137 pos_end = dba_tell(); 138 dba_int_write(MANDOCDB_MAGIC); 139 dba_seek(pos_macros_ptr); 140 dba_int_write(pos_macros); 141 dba_int_write(pos_end); 142 if (dba_close() == -1) { 143 save_errno = errno; 144 unlink(fname); 145 errno = save_errno; 146 return -1; 147 } 148 return 0; 149 } 150 151 152 /*** functions for handling pages *************************************/ 153 154 /* 155 * Create a new page and append it to the pages table. 156 */ 157 struct dba_array * 158 dba_page_new(struct dba_array *pages, const char *arch, 159 const char *desc, const char *file, enum form form) 160 { 161 struct dba_array *page, *entry; 162 163 page = dba_array_new(DBP_MAX, 0); 164 entry = dba_array_new(1, DBA_STR | DBA_GROW); 165 dba_array_add(page, entry); 166 entry = dba_array_new(1, DBA_STR | DBA_GROW); 167 dba_array_add(page, entry); 168 if (arch != NULL && *arch != '\0') { 169 entry = dba_array_new(1, DBA_STR | DBA_GROW); 170 dba_array_add(entry, (void *)arch); 171 } else 172 entry = NULL; 173 dba_array_add(page, entry); 174 dba_array_add(page, mandoc_strdup(desc)); 175 entry = dba_array_new(1, DBA_STR | DBA_GROW); 176 dba_array_add(entry, prepend(file, form)); 177 dba_array_add(page, entry); 178 dba_array_add(pages, page); 179 return page; 180 } 181 182 /* 183 * Add a section, architecture, or file name to an existing page. 184 * Passing the NULL pointer for the architecture makes the page MI. 185 * In that case, any earlier or later architectures are ignored. 186 */ 187 void 188 dba_page_add(struct dba_array *page, int32_t ie, const char *str) 189 { 190 struct dba_array *entries; 191 char *entry; 192 193 entries = dba_array_get(page, ie); 194 if (ie == DBP_ARCH) { 195 if (entries == NULL) 196 return; 197 if (str == NULL || *str == '\0') { 198 dba_array_free(entries); 199 dba_array_set(page, DBP_ARCH, NULL); 200 return; 201 } 202 } 203 if (*str == '\0') 204 return; 205 dba_array_FOREACH(entries, entry) { 206 if (ie == DBP_FILE && *entry < ' ') 207 entry++; 208 if (strcmp(entry, str) == 0) 209 return; 210 } 211 dba_array_add(entries, (void *)str); 212 } 213 214 /* 215 * Add an additional name to an existing page. 216 */ 217 void 218 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask) 219 { 220 struct dba_array *entries; 221 char *entry; 222 char maskbyte; 223 224 if (*name == '\0') 225 return; 226 maskbyte = mask & NAME_MASK; 227 entries = dba_array_get(page, DBP_NAME); 228 dba_array_FOREACH(entries, entry) { 229 if (strcmp(entry + 1, name) == 0) { 230 *entry |= maskbyte; 231 return; 232 } 233 } 234 dba_array_add(entries, prepend(name, maskbyte)); 235 } 236 237 /* 238 * Return a pointer to a temporary copy of instr with inbyte prepended. 239 */ 240 static void * 241 prepend(const char *instr, char inbyte) 242 { 243 static char *outstr = NULL; 244 static size_t outlen = 0; 245 size_t newlen; 246 247 newlen = strlen(instr) + 1; 248 if (newlen > outlen) { 249 outstr = mandoc_realloc(outstr, newlen + 1); 250 outlen = newlen; 251 } 252 *outstr = inbyte; 253 memcpy(outstr + 1, instr, newlen); 254 return outstr; 255 } 256 257 /* 258 * Write the pages table to disk; the format is: 259 * - One integer containing the number of pages. 260 * - For each page, five pointers to the names, sections, 261 * architectures, description, and file names of the page. 262 * MI pages write 0 instead of the architecture pointer. 263 * - One list each for names, sections, architectures, descriptions and 264 * file names. The description for each page ends with a NUL byte. 265 * For all the other lists, each string ends with a NUL byte, 266 * and the last string for a page ends with two NUL bytes. 267 * - To assure alignment of following integers, 268 * the end is padded with NUL bytes up to a multiple of four bytes. 269 */ 270 static void 271 dba_pages_write(struct dba_array *pages) 272 { 273 struct dba_array *page, *entry; 274 int32_t pos_pages, pos_end; 275 276 pos_pages = dba_array_writelen(pages, 5); 277 dba_array_FOREACH(pages, page) { 278 dba_array_setpos(page, DBP_NAME, dba_tell()); 279 entry = dba_array_get(page, DBP_NAME); 280 dba_array_sort(entry, compare_names); 281 dba_array_writelst(entry); 282 } 283 dba_array_FOREACH(pages, page) { 284 dba_array_setpos(page, DBP_SECT, dba_tell()); 285 entry = dba_array_get(page, DBP_SECT); 286 dba_array_sort(entry, compare_strings); 287 dba_array_writelst(entry); 288 } 289 dba_array_FOREACH(pages, page) { 290 if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) { 291 dba_array_setpos(page, DBP_ARCH, dba_tell()); 292 dba_array_sort(entry, compare_strings); 293 dba_array_writelst(entry); 294 } else 295 dba_array_setpos(page, DBP_ARCH, 0); 296 } 297 dba_array_FOREACH(pages, page) { 298 dba_array_setpos(page, DBP_DESC, dba_tell()); 299 dba_str_write(dba_array_get(page, DBP_DESC)); 300 } 301 dba_array_FOREACH(pages, page) { 302 dba_array_setpos(page, DBP_FILE, dba_tell()); 303 dba_array_writelst(dba_array_get(page, DBP_FILE)); 304 } 305 pos_end = dba_align(); 306 dba_seek(pos_pages); 307 dba_array_FOREACH(pages, page) 308 dba_array_writepos(page); 309 dba_seek(pos_end); 310 } 311 312 static int 313 compare_names(const void *vp1, const void *vp2) 314 { 315 const char *cp1, *cp2; 316 int diff; 317 318 cp1 = *(const char * const *)vp1; 319 cp2 = *(const char * const *)vp2; 320 return (diff = *cp2 - *cp1) ? diff : 321 strcasecmp(cp1 + 1, cp2 + 1); 322 } 323 324 static int 325 compare_strings(const void *vp1, const void *vp2) 326 { 327 const char *cp1, *cp2; 328 329 cp1 = *(const char * const *)vp1; 330 cp2 = *(const char * const *)vp2; 331 return strcmp(cp1, cp2); 332 } 333 334 /*** functions for handling macros ************************************/ 335 336 /* 337 * In the hash table for a single macro, look up an entry by 338 * the macro value or add an empty one if it doesn't exist yet. 339 */ 340 static struct macro_entry * 341 get_macro_entry(struct ohash *macro, const char *value, int32_t np) 342 { 343 struct macro_entry *entry; 344 size_t len; 345 unsigned int slot; 346 347 slot = ohash_qlookup(macro, value); 348 if ((entry = ohash_find(macro, slot)) == NULL) { 349 len = strlen(value) + 1; 350 entry = mandoc_malloc(sizeof(*entry) + len); 351 memcpy(&entry->value, value, len); 352 entry->pages = dba_array_new(np, DBA_GROW); 353 ohash_insert(macro, slot, entry); 354 } 355 return entry; 356 } 357 358 /* 359 * In addition to get_macro_entry(), add multiple page references, 360 * converting them from the on-disk format (byte offsets in the file) 361 * to page pointers in memory. 362 */ 363 void 364 dba_macro_new(struct dba *dba, int32_t im, const char *value, 365 const int32_t *pp) 366 { 367 struct macro_entry *entry; 368 const int32_t *ip; 369 int32_t np; 370 371 np = 0; 372 for (ip = pp; *ip; ip++) 373 np++; 374 375 entry = get_macro_entry(dba_array_get(dba->macros, im), value, np); 376 for (ip = pp; *ip; ip++) 377 dba_array_add(entry->pages, dba_array_get(dba->pages, 378 be32toh(*ip) / 5 / sizeof(*ip) - 1)); 379 } 380 381 /* 382 * In addition to get_macro_entry(), add one page reference, 383 * directly taking the in-memory page pointer as an argument. 384 */ 385 void 386 dba_macro_add(struct dba_array *macros, int32_t im, const char *value, 387 struct dba_array *page) 388 { 389 struct macro_entry *entry; 390 391 if (*value == '\0') 392 return; 393 entry = get_macro_entry(dba_array_get(macros, im), value, 1); 394 dba_array_add(entry->pages, page); 395 } 396 397 /* 398 * Write the macros table to disk; the format is: 399 * - The number of macro tables (actually, MACRO_MAX). 400 * - That number of pointers to the individual macro tables. 401 * - The individual macro tables. 402 */ 403 static void 404 dba_macros_write(struct dba_array *macros) 405 { 406 struct ohash *macro; 407 int32_t im, pos_macros, pos_end; 408 409 pos_macros = dba_array_writelen(macros, 1); 410 im = 0; 411 dba_array_FOREACH(macros, macro) { 412 dba_array_setpos(macros, im++, dba_tell()); 413 dba_macro_write(macro); 414 } 415 pos_end = dba_tell(); 416 dba_seek(pos_macros); 417 dba_array_writepos(macros); 418 dba_seek(pos_end); 419 } 420 421 /* 422 * Write one individual macro table to disk; the format is: 423 * - The number of entries in the table. 424 * - For each entry, two pointers, the first one to the value 425 * and the second one to the list of pages. 426 * - A list of values, each ending in a NUL byte. 427 * - To assure alignment of following integers, 428 * padding with NUL bytes up to a multiple of four bytes. 429 * - A list of pointers to pages, each list ending in a 0 integer. 430 */ 431 static void 432 dba_macro_write(struct ohash *macro) 433 { 434 struct macro_entry **entries, *entry; 435 struct dba_array *page; 436 int32_t *kpos, *dpos; 437 unsigned int ie, ne, slot; 438 int use; 439 int32_t addr, pos_macro, pos_end; 440 441 /* Temporary storage for filtering and sorting. */ 442 443 ne = ohash_entries(macro); 444 entries = mandoc_reallocarray(NULL, ne, sizeof(*entries)); 445 kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos)); 446 dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos)); 447 448 /* Build a list of non-empty entries and sort it. */ 449 450 ne = 0; 451 for (entry = ohash_first(macro, &slot); entry != NULL; 452 entry = ohash_next(macro, &slot)) { 453 use = 0; 454 dba_array_FOREACH(entry->pages, page) 455 if (dba_array_getpos(page)) 456 use = 1; 457 if (use) 458 entries[ne++] = entry; 459 } 460 qsort(entries, ne, sizeof(*entries), compare_entries); 461 462 /* Number of entries, and space for the pointer pairs. */ 463 464 dba_int_write(ne); 465 pos_macro = dba_skip(2, ne); 466 467 /* String table. */ 468 469 for (ie = 0; ie < ne; ie++) { 470 kpos[ie] = dba_tell(); 471 dba_str_write(entries[ie]->value); 472 } 473 dba_align(); 474 475 /* Pages table. */ 476 477 for (ie = 0; ie < ne; ie++) { 478 dpos[ie] = dba_tell(); 479 dba_array_FOREACH(entries[ie]->pages, page) 480 if ((addr = dba_array_getpos(page))) 481 dba_int_write(addr); 482 dba_int_write(0); 483 } 484 pos_end = dba_tell(); 485 486 /* Fill in the pointer pairs. */ 487 488 dba_seek(pos_macro); 489 for (ie = 0; ie < ne; ie++) { 490 dba_int_write(kpos[ie]); 491 dba_int_write(dpos[ie]); 492 } 493 dba_seek(pos_end); 494 495 free(entries); 496 free(kpos); 497 free(dpos); 498 } 499 500 static int 501 compare_entries(const void *vp1, const void *vp2) 502 { 503 const struct macro_entry *ep1, *ep2; 504 505 ep1 = *(const struct macro_entry * const *)vp1; 506 ep2 = *(const struct macro_entry * const *)vp2; 507 return strcmp(ep1->value, ep2->value); 508 } 509