1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #if defined(LIBC_SCCS) && !defined(lint) 34 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 35 #endif /* LIBC_SCCS and not lint */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 /* 40 * PACKAGE: hash 41 * DESCRIPTION: 42 * Big key/data handling for the hashing package. 43 * 44 * ROUTINES: 45 * External 46 * __big_keydata 47 * __big_split 48 * __big_insert 49 * __big_return 50 * __big_delete 51 * __find_last_page 52 * Internal 53 * collect_key 54 * collect_data 55 */ 56 57 #include <sys/param.h> 58 59 #include <errno.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 64 #ifdef DEBUG 65 #include <assert.h> 66 #endif 67 68 #include <db.h> 69 #include "hash.h" 70 #include "page.h" 71 #include "extern.h" 72 73 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 74 static int collect_data(HTAB *, BUFHEAD *, int, int); 75 76 /* 77 * Big_insert 78 * 79 * You need to do an insert and the key/data pair is too big 80 * 81 * Returns: 82 * 0 ==> OK 83 *-1 ==> ERROR 84 */ 85 extern int 86 __big_insert(hashp, bufp, key, val) 87 HTAB *hashp; 88 BUFHEAD *bufp; 89 const DBT *key, *val; 90 { 91 u_int16_t *p; 92 int key_size, n, val_size; 93 u_int16_t space, move_bytes, off; 94 char *cp, *key_data, *val_data; 95 96 cp = bufp->page; /* Character pointer of p. */ 97 p = (u_int16_t *)cp; 98 99 key_data = (char *)key->data; 100 key_size = key->size; 101 val_data = (char *)val->data; 102 val_size = val->size; 103 104 /* First move the Key */ 105 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 106 space = FREESPACE(p) - BIGOVERHEAD) { 107 move_bytes = MIN(space, key_size); 108 off = OFFSET(p) - move_bytes; 109 memmove(cp + off, key_data, move_bytes); 110 key_size -= move_bytes; 111 key_data += move_bytes; 112 n = p[0]; 113 p[++n] = off; 114 p[0] = ++n; 115 FREESPACE(p) = off - PAGE_META(n); 116 OFFSET(p) = off; 117 p[n] = PARTIAL_KEY; 118 bufp = __add_ovflpage(hashp, bufp); 119 if (!bufp) 120 return (-1); 121 n = p[0]; 122 if (!key_size) { 123 if (FREESPACE(p)) { 124 move_bytes = MIN(FREESPACE(p), val_size); 125 off = OFFSET(p) - move_bytes; 126 p[n] = off; 127 memmove(cp + off, val_data, move_bytes); 128 val_data += move_bytes; 129 val_size -= move_bytes; 130 p[n - 2] = FULL_KEY_DATA; 131 FREESPACE(p) = FREESPACE(p) - move_bytes; 132 OFFSET(p) = off; 133 } else 134 p[n - 2] = FULL_KEY; 135 } 136 p = (u_int16_t *)bufp->page; 137 cp = bufp->page; 138 bufp->flags |= BUF_MOD; 139 } 140 141 /* Now move the data */ 142 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 143 space = FREESPACE(p) - BIGOVERHEAD) { 144 move_bytes = MIN(space, val_size); 145 /* 146 * Here's the hack to make sure that if the data ends on the 147 * same page as the key ends, FREESPACE is at least one. 148 */ 149 if (space == val_size && val_size == val->size) 150 move_bytes--; 151 off = OFFSET(p) - move_bytes; 152 memmove(cp + off, val_data, move_bytes); 153 val_size -= move_bytes; 154 val_data += move_bytes; 155 n = p[0]; 156 p[++n] = off; 157 p[0] = ++n; 158 FREESPACE(p) = off - PAGE_META(n); 159 OFFSET(p) = off; 160 if (val_size) { 161 p[n] = FULL_KEY; 162 bufp = __add_ovflpage(hashp, bufp); 163 if (!bufp) 164 return (-1); 165 cp = bufp->page; 166 p = (u_int16_t *)cp; 167 } else 168 p[n] = FULL_KEY_DATA; 169 bufp->flags |= BUF_MOD; 170 } 171 return (0); 172 } 173 174 /* 175 * Called when bufp's page contains a partial key (index should be 1) 176 * 177 * All pages in the big key/data pair except bufp are freed. We cannot 178 * free bufp because the page pointing to it is lost and we can't get rid 179 * of its pointer. 180 * 181 * Returns: 182 * 0 => OK 183 *-1 => ERROR 184 */ 185 extern int 186 __big_delete(hashp, bufp) 187 HTAB *hashp; 188 BUFHEAD *bufp; 189 { 190 BUFHEAD *last_bfp, *rbufp; 191 u_int16_t *bp, pageno; 192 int key_done, n; 193 194 rbufp = bufp; 195 last_bfp = NULL; 196 bp = (u_int16_t *)bufp->page; 197 pageno = 0; 198 key_done = 0; 199 200 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 201 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 202 key_done = 1; 203 204 /* 205 * If there is freespace left on a FULL_KEY_DATA page, then 206 * the data is short and fits entirely on this page, and this 207 * is the last page. 208 */ 209 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 210 break; 211 pageno = bp[bp[0] - 1]; 212 rbufp->flags |= BUF_MOD; 213 rbufp = __get_buf(hashp, pageno, rbufp, 0); 214 if (last_bfp) 215 __free_ovflpage(hashp, last_bfp); 216 last_bfp = rbufp; 217 if (!rbufp) 218 return (-1); /* Error. */ 219 bp = (u_int16_t *)rbufp->page; 220 } 221 222 /* 223 * If we get here then rbufp points to the last page of the big 224 * key/data pair. Bufp points to the first one -- it should now be 225 * empty pointing to the next page after this pair. Can't free it 226 * because we don't have the page pointing to it. 227 */ 228 229 /* This is information from the last page of the pair. */ 230 n = bp[0]; 231 pageno = bp[n - 1]; 232 233 /* Now, bp is the first page of the pair. */ 234 bp = (u_int16_t *)bufp->page; 235 if (n > 2) { 236 /* There is an overflow page. */ 237 bp[1] = pageno; 238 bp[2] = OVFLPAGE; 239 bufp->ovfl = rbufp->ovfl; 240 } else 241 /* This is the last page. */ 242 bufp->ovfl = NULL; 243 n -= 2; 244 bp[0] = n; 245 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 246 OFFSET(bp) = hashp->BSIZE - 1; 247 248 bufp->flags |= BUF_MOD; 249 if (rbufp) 250 __free_ovflpage(hashp, rbufp); 251 if (last_bfp != rbufp) 252 __free_ovflpage(hashp, last_bfp); 253 254 hashp->NKEYS--; 255 return (0); 256 } 257 /* 258 * Returns: 259 * 0 = key not found 260 * -1 = get next overflow page 261 * -2 means key not found and this is big key/data 262 * -3 error 263 */ 264 extern int 265 __find_bigpair(hashp, bufp, ndx, key, size) 266 HTAB *hashp; 267 BUFHEAD *bufp; 268 int ndx; 269 char *key; 270 int size; 271 { 272 u_int16_t *bp; 273 char *p; 274 int ksize; 275 u_int16_t bytes; 276 char *kkey; 277 278 bp = (u_int16_t *)bufp->page; 279 p = bufp->page; 280 ksize = size; 281 kkey = key; 282 283 for (bytes = hashp->BSIZE - bp[ndx]; 284 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 285 bytes = hashp->BSIZE - bp[ndx]) { 286 if (memcmp(p + bp[ndx], kkey, bytes)) 287 return (-2); 288 kkey += bytes; 289 ksize -= bytes; 290 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 291 if (!bufp) 292 return (-3); 293 p = bufp->page; 294 bp = (u_int16_t *)p; 295 ndx = 1; 296 } 297 298 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 299 #ifdef HASH_STATISTICS 300 ++hash_collisions; 301 #endif 302 return (-2); 303 } else 304 return (ndx); 305 } 306 307 /* 308 * Given the buffer pointer of the first overflow page of a big pair, 309 * find the end of the big pair 310 * 311 * This will set bpp to the buffer header of the last page of the big pair. 312 * It will return the pageno of the overflow page following the last page 313 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 314 * bucket) 315 */ 316 extern u_int16_t 317 __find_last_page(hashp, bpp) 318 HTAB *hashp; 319 BUFHEAD **bpp; 320 { 321 BUFHEAD *bufp; 322 u_int16_t *bp, pageno; 323 int n; 324 325 bufp = *bpp; 326 bp = (u_int16_t *)bufp->page; 327 for (;;) { 328 n = bp[0]; 329 330 /* 331 * This is the last page if: the tag is FULL_KEY_DATA and 332 * either only 2 entries OVFLPAGE marker is explicit there 333 * is freespace on the page. 334 */ 335 if (bp[2] == FULL_KEY_DATA && 336 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 337 break; 338 339 pageno = bp[n - 1]; 340 bufp = __get_buf(hashp, pageno, bufp, 0); 341 if (!bufp) 342 return (0); /* Need to indicate an error! */ 343 bp = (u_int16_t *)bufp->page; 344 } 345 346 *bpp = bufp; 347 if (bp[0] > 2) 348 return (bp[3]); 349 else 350 return (0); 351 } 352 353 /* 354 * Return the data for the key/data pair that begins on this page at this 355 * index (index should always be 1). 356 */ 357 extern int 358 __big_return(hashp, bufp, ndx, val, set_current) 359 HTAB *hashp; 360 BUFHEAD *bufp; 361 int ndx; 362 DBT *val; 363 int set_current; 364 { 365 BUFHEAD *save_p; 366 u_int16_t *bp, len, off, save_addr; 367 char *tp; 368 369 bp = (u_int16_t *)bufp->page; 370 while (bp[ndx + 1] == PARTIAL_KEY) { 371 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 372 if (!bufp) 373 return (-1); 374 bp = (u_int16_t *)bufp->page; 375 ndx = 1; 376 } 377 378 if (bp[ndx + 1] == FULL_KEY) { 379 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 380 if (!bufp) 381 return (-1); 382 bp = (u_int16_t *)bufp->page; 383 save_p = bufp; 384 save_addr = save_p->addr; 385 off = bp[1]; 386 len = 0; 387 } else 388 if (!FREESPACE(bp)) { 389 /* 390 * This is a hack. We can't distinguish between 391 * FULL_KEY_DATA that contains complete data or 392 * incomplete data, so we require that if the data 393 * is complete, there is at least 1 byte of free 394 * space left. 395 */ 396 off = bp[bp[0]]; 397 len = bp[1] - off; 398 save_p = bufp; 399 save_addr = bufp->addr; 400 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 401 if (!bufp) 402 return (-1); 403 bp = (u_int16_t *)bufp->page; 404 } else { 405 /* The data is all on one page. */ 406 tp = (char *)bp; 407 off = bp[bp[0]]; 408 val->data = (u_char *)tp + off; 409 val->size = bp[1] - off; 410 if (set_current) { 411 if (bp[0] == 2) { /* No more buckets in 412 * chain */ 413 hashp->cpage = NULL; 414 hashp->cbucket++; 415 hashp->cndx = 1; 416 } else { 417 hashp->cpage = __get_buf(hashp, 418 bp[bp[0] - 1], bufp, 0); 419 if (!hashp->cpage) 420 return (-1); 421 hashp->cndx = 1; 422 if (!((u_int16_t *) 423 hashp->cpage->page)[0]) { 424 hashp->cbucket++; 425 hashp->cpage = NULL; 426 } 427 } 428 } 429 return (0); 430 } 431 432 val->size = collect_data(hashp, bufp, (int)len, set_current); 433 if (val->size == -1) 434 return (-1); 435 if (save_p->addr != save_addr) { 436 /* We are pretty short on buffers. */ 437 errno = EINVAL; /* OUT OF BUFFERS */ 438 return (-1); 439 } 440 memmove(hashp->tmp_buf, (save_p->page) + off, len); 441 val->data = (u_char *)hashp->tmp_buf; 442 return (0); 443 } 444 /* 445 * Count how big the total datasize is by recursing through the pages. Then 446 * allocate a buffer and copy the data as you recurse up. 447 */ 448 static int 449 collect_data(hashp, bufp, len, set) 450 HTAB *hashp; 451 BUFHEAD *bufp; 452 int len, set; 453 { 454 u_int16_t *bp; 455 char *p; 456 BUFHEAD *xbp; 457 u_int16_t save_addr; 458 int mylen, totlen; 459 460 p = bufp->page; 461 bp = (u_int16_t *)p; 462 mylen = hashp->BSIZE - bp[1]; 463 save_addr = bufp->addr; 464 465 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 466 totlen = len + mylen; 467 if (hashp->tmp_buf) 468 free(hashp->tmp_buf); 469 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 470 return (-1); 471 if (set) { 472 hashp->cndx = 1; 473 if (bp[0] == 2) { /* No more buckets in chain */ 474 hashp->cpage = NULL; 475 hashp->cbucket++; 476 } else { 477 hashp->cpage = 478 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 479 if (!hashp->cpage) 480 return (-1); 481 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 482 hashp->cbucket++; 483 hashp->cpage = NULL; 484 } 485 } 486 } 487 } else { 488 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 489 if (!xbp || ((totlen = 490 collect_data(hashp, xbp, len + mylen, set)) < 1)) 491 return (-1); 492 } 493 if (bufp->addr != save_addr) { 494 errno = EINVAL; /* Out of buffers. */ 495 return (-1); 496 } 497 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 498 return (totlen); 499 } 500 501 /* 502 * Fill in the key and data for this big pair. 503 */ 504 extern int 505 __big_keydata(hashp, bufp, key, val, set) 506 HTAB *hashp; 507 BUFHEAD *bufp; 508 DBT *key, *val; 509 int set; 510 { 511 key->size = collect_key(hashp, bufp, 0, val, set); 512 if (key->size == -1) 513 return (-1); 514 key->data = (u_char *)hashp->tmp_key; 515 return (0); 516 } 517 518 /* 519 * Count how big the total key size is by recursing through the pages. Then 520 * collect the data, allocate a buffer and copy the key as you recurse up. 521 */ 522 static int 523 collect_key(hashp, bufp, len, val, set) 524 HTAB *hashp; 525 BUFHEAD *bufp; 526 int len; 527 DBT *val; 528 int set; 529 { 530 BUFHEAD *xbp; 531 char *p; 532 int mylen, totlen; 533 u_int16_t *bp, save_addr; 534 535 p = bufp->page; 536 bp = (u_int16_t *)p; 537 mylen = hashp->BSIZE - bp[1]; 538 539 save_addr = bufp->addr; 540 totlen = len + mylen; 541 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 542 if (hashp->tmp_key != NULL) 543 free(hashp->tmp_key); 544 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 545 return (-1); 546 if (__big_return(hashp, bufp, 1, val, set)) 547 return (-1); 548 } else { 549 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 550 if (!xbp || ((totlen = 551 collect_key(hashp, xbp, totlen, val, set)) < 1)) 552 return (-1); 553 } 554 if (bufp->addr != save_addr) { 555 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 556 return (-1); 557 } 558 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 559 return (totlen); 560 } 561 562 /* 563 * Returns: 564 * 0 => OK 565 * -1 => error 566 */ 567 extern int 568 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 569 HTAB *hashp; 570 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 571 BUFHEAD *np; /* Pointer to new bucket page */ 572 /* Pointer to first page containing the big key/data */ 573 BUFHEAD *big_keyp; 574 int addr; /* Address of big_keyp */ 575 u_int32_t obucket;/* Old Bucket */ 576 SPLIT_RETURN *ret; 577 { 578 BUFHEAD *tmpp; 579 u_int16_t *tp; 580 BUFHEAD *bp; 581 DBT key, val; 582 u_int32_t change; 583 u_int16_t free_space, n, off; 584 585 bp = big_keyp; 586 587 /* Now figure out where the big key/data goes */ 588 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 589 return (-1); 590 change = (__call_hash(hashp, key.data, key.size) != obucket); 591 592 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 593 if (!(ret->nextp = 594 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 595 return (-1);; 596 } else 597 ret->nextp = NULL; 598 599 /* Now make one of np/op point to the big key/data pair */ 600 #ifdef DEBUG 601 assert(np->ovfl == NULL); 602 #endif 603 if (change) 604 tmpp = np; 605 else 606 tmpp = op; 607 608 tmpp->flags |= BUF_MOD; 609 #ifdef DEBUG1 610 (void)fprintf(stderr, 611 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 612 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 613 #endif 614 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 615 tp = (u_int16_t *)tmpp->page; 616 #ifdef DEBUG 617 assert(FREESPACE(tp) >= OVFLSIZE); 618 #endif 619 n = tp[0]; 620 off = OFFSET(tp); 621 free_space = FREESPACE(tp); 622 tp[++n] = (u_int16_t)addr; 623 tp[++n] = OVFLPAGE; 624 tp[0] = n; 625 OFFSET(tp) = off; 626 FREESPACE(tp) = free_space - OVFLSIZE; 627 628 /* 629 * Finally, set the new and old return values. BIG_KEYP contains a 630 * pointer to the last page of the big key_data pair. Make sure that 631 * big_keyp has no following page (2 elements) or create an empty 632 * following page. 633 */ 634 635 ret->newp = np; 636 ret->oldp = op; 637 638 tp = (u_int16_t *)big_keyp->page; 639 big_keyp->flags |= BUF_MOD; 640 if (tp[0] > 2) { 641 /* 642 * There may be either one or two offsets on this page. If 643 * there is one, then the overflow page is linked on normally 644 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 645 * the second offset and needs to get stuffed in after the 646 * next overflow page is added. 647 */ 648 n = tp[4]; 649 free_space = FREESPACE(tp); 650 off = OFFSET(tp); 651 tp[0] -= 2; 652 FREESPACE(tp) = free_space + OVFLSIZE; 653 OFFSET(tp) = off; 654 tmpp = __add_ovflpage(hashp, big_keyp); 655 if (!tmpp) 656 return (-1); 657 tp[4] = n; 658 } else 659 tmpp = big_keyp; 660 661 if (change) 662 ret->newp = tmpp; 663 else 664 ret->oldp = tmpp; 665 return (0); 666 } 667