1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)hash.c 8.12 (Berkeley) 11/7/95"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 #include <sys/param.h> 42 #include <sys/stat.h> 43 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #ifdef DEBUG 51 #include <assert.h> 52 #endif 53 54 #include "db-int.h" 55 #include "hash.h" 56 #include "page.h" 57 #include "extern.h" 58 59 static int32_t flush_meta __P((HTAB *)); 60 static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *)); 61 static int32_t hash_close __P((DB *)); 62 static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t)); 63 static int32_t hash_fd __P((const DB *)); 64 static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t)); 65 static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t)); 66 static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t)); 67 static int32_t hash_sync __P((const DB *, u_int32_t)); 68 static int32_t hdestroy __P((HTAB *)); 69 static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \ 70 u_int32_t)); 71 static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t)); 72 static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *)); 73 static int32_t init_htab __P((HTAB *, int32_t)); 74 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN 75 static void swap_header __P((HTAB *)); 76 static void swap_header_copy __P((HASHHDR *, HASHHDR *)); 77 #endif 78 static u_int32_t hget_header __P((HTAB *, u_int32_t)); 79 static void hput_header __P((HTAB *)); 80 81 #define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; } 82 83 /* Return values */ 84 #define SUCCESS (0) 85 #define ERROR (-1) 86 #define ABNORMAL (1) 87 88 #ifdef HASH_STATISTICS 89 u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows, 90 hash_bigpages; 91 #endif 92 93 /************************** INTERFACE ROUTINES ***************************/ 94 /* OPEN/CLOSE */ 95 96 extern DB * 97 __kdb2_hash_open(const char *file, int flags, int mode, const HASHINFO *info, 98 int dflags) 99 { 100 struct stat statbuf; 101 DB *dbp; 102 DBT mpool_key; 103 HTAB *hashp; 104 int32_t bpages, csize, new_table, save_errno; 105 106 if (!file || (flags & O_ACCMODE) == O_WRONLY) { 107 errno = EINVAL; 108 return (NULL); 109 } 110 if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) 111 return (NULL); 112 hashp->fp = -1; 113 /* 114 * Even if user wants write only, we need to be able to read 115 * the actual file, so we need to open it read/write. But, the 116 * field in the hashp structure needs to be accurate so that 117 * we can check accesses. 118 */ 119 hashp->flags = flags; 120 hashp->save_file = hashp->flags & O_RDWR; 121 122 new_table = 0; 123 if (!file || (flags & O_TRUNC) || 124 (stat(file, &statbuf) && (errno == ENOENT))) { 125 if (errno == ENOENT) 126 errno = 0; /* In case someone looks at errno. */ 127 new_table = 1; 128 } 129 if (file) { 130 if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1) 131 RETURN_ERROR(errno, error0); 132 (void)fcntl(hashp->fp, F_SETFD, 1); 133 } 134 135 /* Process arguments to set up hash table header. */ 136 if (new_table) { 137 if (!(hashp = init_hash(hashp, file, info))) 138 RETURN_ERROR(errno, error1); 139 } else { 140 /* Table already exists */ 141 if (info && info->hash) 142 hashp->hash = info->hash; 143 else 144 hashp->hash = __default_hash; 145 146 /* copy metadata from page into header */ 147 if (hget_header(hashp, 148 (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) != 149 sizeof(HASHHDR)) 150 RETURN_ERROR(EFTYPE, error1); 151 152 /* Verify file type, versions and hash function */ 153 if (hashp->hdr.magic != HASHMAGIC) 154 RETURN_ERROR(EFTYPE, error1); 155 #define OLDHASHVERSION 1 156 if (hashp->hdr.version != HASHVERSION && 157 hashp->hdr.version != OLDHASHVERSION) 158 RETURN_ERROR(EFTYPE, error1); 159 if (hashp->hash(CHARKEY, sizeof(CHARKEY)) 160 != hashp->hdr.h_charkey) 161 RETURN_ERROR(EFTYPE, error1); 162 /* 163 * Figure out how many segments we need. Max_Bucket is the 164 * maximum bucket number, so the number of buckets is 165 * max_bucket + 1. 166 */ 167 168 /* Read in bitmaps */ 169 bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] + 170 (hashp->hdr.bsize << BYTE_SHIFT) - 1) >> 171 (hashp->hdr.bshift + BYTE_SHIFT); 172 173 hashp->nmaps = bpages; 174 (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *)); 175 } 176 177 /* start up mpool */ 178 mpool_key.data = (u_int8_t *)file; 179 mpool_key.size = strlen(file); 180 181 if (info && info->cachesize) 182 csize = info->cachesize / hashp->hdr.bsize; 183 else 184 csize = DEF_CACHESIZE / hashp->hdr.bsize; 185 hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize); 186 187 if (!hashp->mp) 188 RETURN_ERROR(errno, error1); 189 mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp); 190 191 /* 192 * For a new table, set up the bitmaps. 193 */ 194 if (new_table && 195 init_htab(hashp, info && info->nelem ? info->nelem : 1)) 196 goto error2; 197 198 /* initialize the cursor queue */ 199 TAILQ_INIT(&hashp->curs_queue); 200 hashp->seq_cursor = NULL; 201 202 203 /* get a chunk of memory for our split buffer */ 204 hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize); 205 if (!hashp->split_buf) 206 goto error2; 207 208 hashp->new_file = new_table; 209 210 if (!(dbp = (DB *)malloc(sizeof(DB)))) 211 goto error2; 212 213 dbp->internal = hashp; 214 dbp->close = hash_close; 215 dbp->del = hash_delete; 216 dbp->fd = hash_fd; 217 dbp->get = hash_get; 218 dbp->put = hash_put; 219 dbp->seq = hash_seq; 220 dbp->sync = hash_sync; 221 dbp->type = DB_HASH; 222 223 #ifdef DEBUG 224 (void)fprintf(stderr, 225 "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", 226 "init_htab:", 227 "TABLE POINTER ", (void *)hashp, 228 "BUCKET SIZE ", hashp->hdr.bsize, 229 "BUCKET SHIFT ", hashp->hdr.bshift, 230 "FILL FACTOR ", hashp->hdr.ffactor, 231 "MAX BUCKET ", hashp->hdr.max_bucket, 232 "OVFL POINT ", hashp->hdr.ovfl_point, 233 "LAST FREED ", hashp->hdr.last_freed, 234 "HIGH MASK ", hashp->hdr.high_mask, 235 "LOW MASK ", hashp->hdr.low_mask, 236 "NKEYS ", hashp->hdr.nkeys); 237 #endif 238 #ifdef HASH_STATISTICS 239 hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0; 240 hash_bigpages = 0; 241 #endif 242 return (dbp); 243 244 error2: 245 save_errno = errno; 246 hdestroy(hashp); 247 errno = save_errno; 248 return (NULL); 249 250 error1: 251 if (hashp != NULL) 252 (void)close(hashp->fp); 253 254 error0: 255 free(hashp); 256 errno = save_errno; 257 return (NULL); 258 } 259 260 static int32_t 261 hash_close(DB *dbp) 262 { 263 HTAB *hashp; 264 int32_t retval; 265 266 if (!dbp) 267 return (ERROR); 268 269 hashp = (HTAB *)dbp->internal; 270 retval = hdestroy(hashp); 271 free(dbp); 272 return (retval); 273 } 274 275 static int32_t 276 hash_fd(const DB *dbp) 277 { 278 HTAB *hashp; 279 280 if (!dbp) 281 return (ERROR); 282 283 hashp = (HTAB *)dbp->internal; 284 if (hashp->fp == -1) { 285 errno = ENOENT; 286 return (-1); 287 } 288 return (hashp->fp); 289 } 290 291 /************************** LOCAL CREATION ROUTINES **********************/ 292 static HTAB * 293 init_hash(HTAB *hashp, const char *file, const HASHINFO *info) 294 { 295 struct stat statbuf; 296 297 hashp->hdr.nkeys = 0; 298 hashp->hdr.lorder = DB_BYTE_ORDER; 299 hashp->hdr.bsize = DEF_BUCKET_SIZE; 300 hashp->hdr.bshift = DEF_BUCKET_SHIFT; 301 hashp->hdr.ffactor = DEF_FFACTOR; 302 hashp->hash = __default_hash; 303 memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares)); 304 memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps)); 305 306 /* Fix bucket size to be optimal for file system */ 307 if (file != NULL) { 308 if (stat(file, &statbuf)) 309 return (NULL); 310 hashp->hdr.bsize = statbuf.st_blksize; 311 if (hashp->hdr.bsize > MAX_BSIZE) 312 hashp->hdr.bsize = MAX_BSIZE; 313 hashp->hdr.bshift = __log2(hashp->hdr.bsize); 314 } 315 if (info) { 316 if (info->bsize) { 317 /* Round pagesize up to power of 2 */ 318 hashp->hdr.bshift = __log2(info->bsize); 319 hashp->hdr.bsize = 1 << hashp->hdr.bshift; 320 if (hashp->hdr.bsize > MAX_BSIZE) { 321 errno = EINVAL; 322 return (NULL); 323 } 324 } 325 if (info->ffactor) 326 hashp->hdr.ffactor = info->ffactor; 327 if (info->hash) 328 hashp->hash = info->hash; 329 if (info->lorder) { 330 if ((info->lorder != DB_BIG_ENDIAN) && 331 (info->lorder != DB_LITTLE_ENDIAN)) { 332 errno = EINVAL; 333 return (NULL); 334 } 335 hashp->hdr.lorder = info->lorder; 336 } 337 } 338 return (hashp); 339 } 340 341 /* 342 * Returns 0 on No Error 343 */ 344 static int32_t 345 init_htab(HTAB *hashp, int32_t nelem) 346 { 347 int32_t l2, nbuckets; 348 349 /* 350 * Divide number of elements by the fill factor and determine a 351 * desired number of buckets. Allocate space for the next greater 352 * power of two number of buckets. 353 */ 354 nelem = (nelem - 1) / hashp->hdr.ffactor + 1; 355 356 l2 = __log2(MAX(nelem, 2)); 357 nbuckets = 1 << l2; 358 359 hashp->hdr.spares[l2] = l2 + 1; 360 hashp->hdr.spares[l2 + 1] = l2 + 1; 361 hashp->hdr.ovfl_point = l2; 362 hashp->hdr.last_freed = 2; 363 364 hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1; 365 hashp->hdr.high_mask = (nbuckets << 1) - 1; 366 367 /* 368 * The number of header pages is the size of the header divided by 369 * the amount of freespace on header pages (the page size - the 370 * size of 1 integer where the length of the header info on that 371 * page is stored) plus another page if it didn't divide evenly. 372 */ 373 hashp->hdr.hdrpages = 374 (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) + 375 (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0) 376 ? 0 : 1); 377 378 /* Create pages for these buckets */ 379 /* 380 for (i = 0; i <= hashp->hdr.max_bucket; i++) { 381 if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0) 382 return (-1); 383 } 384 */ 385 386 /* First bitmap page is at: splitpoint l2 page offset 1 */ 387 if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) 388 return (-1); 389 390 return (0); 391 } 392 393 /* 394 * Functions to get/put hash header. We access the file directly. 395 */ 396 static u_int32_t 397 hget_header(HTAB *hashp, u_int32_t page_size) 398 { 399 u_int32_t num_copied; 400 u_int8_t *hdr_dest; 401 402 num_copied = 0; 403 404 hdr_dest = (u_int8_t *)&hashp->hdr; 405 406 /* 407 * XXX 408 * This should not be printing to stderr on a "normal" error case. 409 */ 410 lseek(hashp->fp, 0, SEEK_SET); 411 num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR)); 412 if (num_copied != sizeof(HASHHDR)) { 413 fprintf(stderr, "hash: could not retrieve header"); 414 return (0); 415 } 416 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN 417 swap_header(hashp); 418 #endif 419 return (num_copied); 420 } 421 422 static void 423 hput_header(HTAB *hashp) 424 { 425 HASHHDR *whdrp; 426 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN 427 HASHHDR whdr; 428 #endif 429 u_int32_t num_copied; 430 431 num_copied = 0; 432 433 whdrp = &hashp->hdr; 434 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN 435 whdrp = &whdr; 436 swap_header_copy(&hashp->hdr, whdrp); 437 #endif 438 439 lseek(hashp->fp, 0, SEEK_SET); 440 num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR)); 441 if (num_copied != sizeof(HASHHDR)) 442 (void)fprintf(stderr, "hash: could not write hash header"); 443 return; 444 } 445 446 /********************** DESTROY/CLOSE ROUTINES ************************/ 447 448 /* 449 * Flushes any changes to the file if necessary and destroys the hashp 450 * structure, freeing all allocated space. 451 */ 452 static int32_t 453 hdestroy(HTAB *hashp) 454 { 455 int32_t save_errno; 456 457 save_errno = 0; 458 459 #ifdef HASH_STATISTICS 460 { int i; 461 (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n", 462 hash_accesses, hash_collisions); 463 (void)fprintf(stderr, 464 "hdestroy: expansions %ld\n", hash_expansions); 465 (void)fprintf(stderr, 466 "hdestroy: overflows %ld\n", hash_overflows); 467 (void)fprintf(stderr, 468 "hdestroy: big key/data pages %ld\n", hash_bigpages); 469 (void)fprintf(stderr, 470 "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket); 471 472 for (i = 0; i < NCACHED; i++) 473 (void)fprintf(stderr, 474 "spares[%d] = %d\n", i, hashp->hdr.spares[i]); 475 } 476 #endif 477 478 if (flush_meta(hashp) && !save_errno) 479 save_errno = errno; 480 481 /* Free the split page */ 482 if (hashp->split_buf) 483 free(hashp->split_buf); 484 485 /* Free the big key and big data returns */ 486 if (hashp->bigkey_buf) 487 free(hashp->bigkey_buf); 488 if (hashp->bigdata_buf) 489 free(hashp->bigdata_buf); 490 491 /* XXX This should really iterate over the cursor queue, but 492 it's not clear how to do that, and the only cursor a hash 493 table ever creates is the one used by hash_seq(). Passing 494 NULL as the first arg is also a kludge, but I know that 495 it's never used, so I do it. The intent is to plug the 496 memory leak. Correctness can come later. */ 497 498 if (hashp->seq_cursor) 499 hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0); 500 501 /* shut down mpool */ 502 mpool_sync(hashp->mp); 503 mpool_close(hashp->mp); 504 505 if (hashp->fp != -1) 506 (void)close(hashp->fp); 507 508 /* 509 * *** This may cause problems if hashp->fname is set in any case 510 * other than the case that we are generating a temporary file name. 511 * Note that the new version of mpool should support temporary 512 * files within mpool itself. 513 */ 514 if (hashp->fname && !hashp->save_file) { 515 #ifdef DEBUG 516 fprintf(stderr, "Unlinking file %s.\n", hashp->fname); 517 #endif 518 /* we need to chmod the file to allow it to be deleted... */ 519 chmod(hashp->fname, 0700); 520 unlink(hashp->fname); 521 } 522 free(hashp); 523 524 if (save_errno) { 525 errno = save_errno; 526 return (ERROR); 527 } 528 return (SUCCESS); 529 } 530 531 /* 532 * Write modified pages to disk 533 * 534 * Returns: 535 * 0 == OK 536 * -1 ERROR 537 */ 538 static int32_t 539 hash_sync(const DB *dbp, u_int32_t flags) 540 { 541 HTAB *hashp; 542 543 hashp = (HTAB *)dbp->internal; 544 545 /* 546 * XXX 547 * Check success/failure conditions. 548 */ 549 return (flush_meta(hashp) || mpool_sync(hashp->mp)); 550 } 551 552 /* 553 * Returns: 554 * 0 == OK 555 * -1 indicates that errno should be set 556 */ 557 static int32_t 558 flush_meta(HTAB *hashp) 559 { 560 int32_t i; 561 562 if (!hashp->save_file) 563 return (0); 564 hashp->hdr.magic = HASHMAGIC; 565 hashp->hdr.version = HASHVERSION; 566 hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY)); 567 568 /* write out metadata */ 569 hput_header(hashp); 570 571 for (i = 0; i < NCACHED; i++) 572 if (hashp->mapp[i]) { 573 if (__put_page(hashp, 574 (PAGE16 *)hashp->mapp[i], A_BITMAP, 1)) 575 return (-1); 576 hashp->mapp[i] = NULL; 577 } 578 return (0); 579 } 580 581 /*******************************SEARCH ROUTINES *****************************/ 582 /* 583 * All the access routines return 584 * 585 * Returns: 586 * 0 on SUCCESS 587 * 1 to indicate an external ERROR (i.e. key not found, etc) 588 * -1 to indicate an internal ERROR (i.e. out of memory, etc) 589 */ 590 591 /* *** make sure this is true! */ 592 593 static int32_t 594 hash_get(const DB *dbp, const DBT *key, DBT *data, u_int32_t flag) 595 { 596 HTAB *hashp; 597 598 hashp = (HTAB *)dbp->internal; 599 if (flag) { 600 hashp->local_errno = errno = EINVAL; 601 return (ERROR); 602 } 603 return (hash_access(hashp, HASH_GET, key, data)); 604 } 605 606 static int32_t 607 hash_put(const DB *dbp, DBT *key, const DBT *data, u_int32_t flag) 608 { 609 HTAB *hashp; 610 611 hashp = (HTAB *)dbp->internal; 612 if (flag && flag != R_NOOVERWRITE) { 613 hashp->local_errno = errno = EINVAL; 614 return (ERROR); 615 } 616 if ((hashp->flags & O_ACCMODE) == O_RDONLY) { 617 hashp->local_errno = errno = EPERM; 618 return (ERROR); 619 } 620 return (hash_access(hashp, flag == R_NOOVERWRITE ? 621 HASH_PUTNEW : HASH_PUT, key, (DBT *)data)); 622 } 623 624 static int32_t 625 hash_delete(const DB *dbp, const DBT *key, u_int32_t flag) 626 { 627 HTAB *hashp; 628 629 hashp = (HTAB *)dbp->internal; 630 if (flag) { 631 hashp->local_errno = errno = EINVAL; 632 return (ERROR); 633 } 634 if ((hashp->flags & O_ACCMODE) == O_RDONLY) { 635 hashp->local_errno = errno = EPERM; 636 return (ERROR); 637 } 638 639 return (hash_access(hashp, HASH_DELETE, key, NULL)); 640 } 641 642 /* 643 * Assume that hashp has been set in wrapper routine. 644 */ 645 static int32_t 646 hash_access(HTAB *hashp, ACTION action, const DBT *key, DBT *val) 647 { 648 DBT page_key, page_val; 649 CURSOR cursor; 650 ITEM_INFO item_info; 651 u_int32_t bucket; 652 u_int32_t num_items; 653 654 #ifdef HASH_STATISTICS 655 hash_accesses++; 656 #endif 657 658 num_items = 0; 659 660 /* 661 * Set up item_info so that we're looking for space to add an item 662 * as we cycle through the pages looking for the key. 663 */ 664 if (action == HASH_PUT || action == HASH_PUTNEW) { 665 if (ISBIG(key->size + val->size, hashp)) 666 item_info.seek_size = PAIR_OVERHEAD; 667 else 668 item_info.seek_size = key->size + val->size; 669 } else 670 item_info.seek_size = 0; 671 item_info.seek_found_page = 0; 672 673 bucket = __call_hash(hashp, (int8_t *)key->data, key->size); 674 675 cursor.pagep = NULL; 676 __get_item_reset(hashp, &cursor); 677 678 cursor.bucket = bucket; 679 while (1) { 680 __get_item_next(hashp, &cursor, &page_key, &page_val, &item_info); 681 if (item_info.status == ITEM_ERROR) 682 return (ABNORMAL); 683 if (item_info.status == ITEM_NO_MORE) 684 break; 685 num_items++; 686 if (item_info.key_off == BIGPAIR) { 687 /* 688 * !!! 689 * 0 is a valid index. 690 */ 691 if (__find_bigpair(hashp, &cursor, (int8_t *)key->data, 692 key->size) > 0) 693 goto found; 694 } else if (key->size == page_key.size && 695 !memcmp(key->data, page_key.data, key->size)) 696 goto found; 697 } 698 #ifdef HASH_STATISTICS 699 hash_collisions++; 700 #endif 701 __get_item_done(hashp, &cursor); 702 703 /* 704 * At this point, item_info will list either the last page in 705 * the chain, or the last page in the chain plus a pgno for where 706 * to find the first page in the chain with space for the 707 * item we wish to add. 708 */ 709 710 /* Not found */ 711 switch (action) { 712 case HASH_PUT: 713 case HASH_PUTNEW: 714 if (__addel(hashp, &item_info, key, val, num_items, 0)) 715 return (ERROR); 716 break; 717 case HASH_GET: 718 case HASH_DELETE: 719 default: 720 return (ABNORMAL); 721 } 722 723 if (item_info.caused_expand) 724 __expand_table(hashp); 725 return (SUCCESS); 726 727 found: __get_item_done(hashp, &cursor); 728 729 switch (action) { 730 case HASH_PUTNEW: 731 /* mpool_put(hashp->mp, pagep, 0); */ 732 return (ABNORMAL); 733 case HASH_GET: 734 if (item_info.key_off == BIGPAIR) { 735 if (__big_return(hashp, &item_info, val, 0)) 736 return (ERROR); 737 } else { 738 val->data = page_val.data; 739 val->size = page_val.size; 740 } 741 /* *** data may not be available! */ 742 break; 743 case HASH_PUT: 744 if (__delpair(hashp, &cursor, &item_info) || 745 __addel(hashp, &item_info, key, val, UNKNOWN, 0)) 746 return (ERROR); 747 __get_item_done(hashp, &cursor); 748 if (item_info.caused_expand) 749 __expand_table(hashp); 750 break; 751 case HASH_DELETE: 752 if (__delpair(hashp, &cursor, &item_info)) 753 return (ERROR); 754 break; 755 default: 756 abort(); 757 } 758 return (SUCCESS); 759 } 760 761 /* ****************** CURSORS ********************************** */ 762 CURSOR * 763 __cursor_creat(const DB *dbp) 764 { 765 CURSOR *new_curs; 766 HTAB *hashp; 767 768 new_curs = (CURSOR *)malloc(sizeof(struct cursor_t)); 769 if (!new_curs) 770 return NULL; 771 new_curs->internal = 772 (struct item_info *)malloc(sizeof(struct item_info)); 773 if (!new_curs->internal) { 774 free(new_curs); 775 return NULL; 776 } 777 new_curs->get = cursor_get; 778 new_curs->delete = cursor_delete; 779 780 new_curs->bucket = 0; 781 new_curs->pgno = INVALID_PGNO; 782 new_curs->ndx = 0; 783 new_curs->pgndx = 0; 784 new_curs->pagep = NULL; 785 786 /* place onto queue of cursors */ 787 hashp = (HTAB *)dbp->internal; 788 TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue); 789 790 return new_curs; 791 } 792 793 static int32_t 794 cursor_get(const DB *dbp, CURSOR *cursorp, DBT *key, DBT *val, u_int32_t flags) 795 { 796 HTAB *hashp; 797 ITEM_INFO item_info; 798 799 hashp = (HTAB *)dbp->internal; 800 801 if (flags && flags != R_FIRST && flags != R_NEXT) { 802 hashp->local_errno = errno = EINVAL; 803 return (ERROR); 804 } 805 #ifdef HASH_STATISTICS 806 hash_accesses++; 807 #endif 808 809 item_info.seek_size = 0; 810 811 if (flags == R_FIRST) 812 __get_item_first(hashp, cursorp, key, val, &item_info); 813 else 814 __get_item_next(hashp, cursorp, key, val, &item_info); 815 816 /* 817 * This needs to be changed around. As is, get_item_next advances 818 * the pointers on the page but this function actually advances 819 * bucket pointers. This works, since the only other place we 820 * use get_item_next is in hash_access which only deals with one 821 * bucket at a time. However, there is the problem that certain other 822 * functions (such as find_bigpair and delpair) depend on the 823 * pgndx member of the cursor. Right now, they are using pngdx - 1 824 * since indices refer to the __next__ item that is to be fetched 825 * from the page. This is ugly, as you may have noticed, whoever 826 * you are. The best solution would be to depend on item_infos to 827 * deal with _current_ information, and have the cursors only 828 * deal with _next_ information. In that scheme, get_item_next 829 * would also advance buckets. Version 3... 830 */ 831 832 833 /* 834 * Must always enter this loop to do error handling and 835 * check for big key/data pair. 836 */ 837 while (1) { 838 if (item_info.status == ITEM_OK) { 839 if (item_info.key_off == BIGPAIR && 840 __big_keydata(hashp, cursorp->pagep, key, val, 841 item_info.pgndx)) 842 return (ABNORMAL); 843 844 break; 845 } else if (item_info.status != ITEM_NO_MORE) 846 return (ABNORMAL); 847 848 __put_page(hashp, cursorp->pagep, A_RAW, 0); 849 cursorp->ndx = cursorp->pgndx = 0; 850 cursorp->bucket++; 851 cursorp->pgno = INVALID_PGNO; 852 cursorp->pagep = NULL; 853 if (cursorp->bucket > hashp->hdr.max_bucket) 854 return (ABNORMAL); 855 __get_item_next(hashp, cursorp, key, val, &item_info); 856 } 857 858 __get_item_done(hashp, cursorp); 859 return (0); 860 } 861 862 static int32_t 863 cursor_delete(const DB *dbp, CURSOR *cursor, u_int32_t flags) 864 { 865 /* XXX this is empirically determined, so it might not be completely 866 correct, but it seems to work. At the very least it fixes 867 a memory leak */ 868 869 free(cursor->internal); 870 free(cursor); 871 872 return (0); 873 } 874 875 static int32_t 876 hash_seq(const DB *dbp, DBT *key, DBT *val, u_int32_t flag) 877 { 878 HTAB *hashp; 879 880 /* 881 * Seq just uses the default cursor to go sequecing through the 882 * database. Note that the default cursor is the first in the list. 883 */ 884 885 hashp = (HTAB *)dbp->internal; 886 if (!hashp->seq_cursor) 887 hashp->seq_cursor = __cursor_creat(dbp); 888 889 return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag)); 890 } 891 892 /********************************* UTILITIES ************************/ 893 894 /* 895 * Returns: 896 * 0 ==> OK 897 * -1 ==> Error 898 */ 899 int32_t 900 __expand_table(HTAB *hashp) 901 { 902 u_int32_t old_bucket, new_bucket; 903 int32_t spare_ndx; 904 905 #ifdef HASH_STATISTICS 906 hash_expansions++; 907 #endif 908 new_bucket = ++hashp->hdr.max_bucket; 909 old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask); 910 911 /* Get a page for this new bucket */ 912 if (__new_page(hashp, new_bucket, A_BUCKET) != 0) 913 return (-1); 914 915 /* 916 * If the split point is increasing (hdr.max_bucket's log base 2 917 * increases), we need to copy the current contents of the spare 918 * split bucket to the next bucket. 919 */ 920 spare_ndx = __log2(hashp->hdr.max_bucket + 1); 921 if (spare_ndx > hashp->hdr.ovfl_point) { 922 hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point]; 923 hashp->hdr.ovfl_point = spare_ndx; 924 } 925 if (new_bucket > hashp->hdr.high_mask) { 926 /* Starting a new doubling */ 927 hashp->hdr.low_mask = hashp->hdr.high_mask; 928 hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask; 929 } 930 if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) { 931 fprintf(stderr, "hash: Cannot allocate new bucket. Pages exhausted.\n"); 932 return (-1); 933 } 934 /* Relocate records to the new bucket */ 935 return (__split_page(hashp, old_bucket, new_bucket)); 936 } 937 938 u_int32_t 939 __call_hash(HTAB *hashp, int8_t *k, int32_t len) 940 { 941 u_int32_t n, bucket; 942 943 n = hashp->hash(k, len); 944 bucket = n & hashp->hdr.high_mask; 945 if (bucket > hashp->hdr.max_bucket) 946 bucket = bucket & hashp->hdr.low_mask; 947 return (bucket); 948 } 949 950 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN 951 /* 952 * Hashp->hdr needs to be byteswapped. 953 */ 954 static void 955 swap_header_copy(HASHHDR *srcp, HASHHDR *destp) 956 { 957 int32_t i; 958 959 P_32_COPY(srcp->magic, destp->magic); 960 P_32_COPY(srcp->version, destp->version); 961 P_32_COPY(srcp->lorder, destp->lorder); 962 P_32_COPY(srcp->bsize, destp->bsize); 963 P_32_COPY(srcp->bshift, destp->bshift); 964 P_32_COPY(srcp->ovfl_point, destp->ovfl_point); 965 P_32_COPY(srcp->last_freed, destp->last_freed); 966 P_32_COPY(srcp->max_bucket, destp->max_bucket); 967 P_32_COPY(srcp->high_mask, destp->high_mask); 968 P_32_COPY(srcp->low_mask, destp->low_mask); 969 P_32_COPY(srcp->ffactor, destp->ffactor); 970 P_32_COPY(srcp->nkeys, destp->nkeys); 971 P_32_COPY(srcp->hdrpages, destp->hdrpages); 972 P_32_COPY(srcp->h_charkey, destp->h_charkey); 973 for (i = 0; i < NCACHED; i++) { 974 P_32_COPY(srcp->spares[i], destp->spares[i]); 975 P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]); 976 } 977 } 978 979 static void 980 swap_header(HTAB *hashp) 981 { 982 HASHHDR *hdrp; 983 int32_t i; 984 985 hdrp = &hashp->hdr; 986 987 M_32_SWAP(hdrp->magic); 988 M_32_SWAP(hdrp->version); 989 M_32_SWAP(hdrp->lorder); 990 M_32_SWAP(hdrp->bsize); 991 M_32_SWAP(hdrp->bshift); 992 M_32_SWAP(hdrp->ovfl_point); 993 M_32_SWAP(hdrp->last_freed); 994 M_32_SWAP(hdrp->max_bucket); 995 M_32_SWAP(hdrp->high_mask); 996 M_32_SWAP(hdrp->low_mask); 997 M_32_SWAP(hdrp->ffactor); 998 M_32_SWAP(hdrp->nkeys); 999 M_32_SWAP(hdrp->hdrpages); 1000 M_32_SWAP(hdrp->h_charkey); 1001 for (i = 0; i < NCACHED; i++) { 1002 M_32_SWAP(hdrp->spares[i]); 1003 M_16_SWAP(hdrp->bitmaps[i]); 1004 } 1005 } 1006 #endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */ 1007