1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 /* 42 * PACKAGE: hash 43 * DESCRIPTION: 44 * Big key/data handling for the hashing package. 45 * 46 * ROUTINES: 47 * External 48 * __big_keydata 49 * __big_split 50 * __big_insert 51 * __big_return 52 * __big_delete 53 * __find_last_page 54 * Internal 55 * collect_key 56 * collect_data 57 */ 58 59 #include <sys/param.h> 60 61 #include <errno.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 66 #ifdef DEBUG 67 #include <assert.h> 68 #endif 69 70 #include <db.h> 71 #include "hash.h" 72 #include "page.h" 73 #include "extern.h" 74 75 static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); 76 static int collect_data __P((HTAB *, BUFHEAD *, int, int)); 77 78 /* 79 * Big_insert 80 * 81 * You need to do an insert and the key/data pair is too big 82 * 83 * Returns: 84 * 0 ==> OK 85 *-1 ==> ERROR 86 */ 87 extern int 88 __big_insert(hashp, bufp, key, val) 89 HTAB *hashp; 90 BUFHEAD *bufp; 91 const DBT *key, *val; 92 { 93 register u_int16_t *p; 94 int key_size, n, val_size; 95 u_int16_t space, move_bytes, off; 96 char *cp, *key_data, *val_data; 97 98 cp = bufp->page; /* Character pointer of p. */ 99 p = (u_int16_t *)cp; 100 101 key_data = (char *)key->data; 102 key_size = key->size; 103 val_data = (char *)val->data; 104 val_size = val->size; 105 106 /* First move the Key */ 107 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 108 space = FREESPACE(p) - BIGOVERHEAD) { 109 move_bytes = MIN(space, key_size); 110 off = OFFSET(p) - move_bytes; 111 memmove(cp + off, key_data, move_bytes); 112 key_size -= move_bytes; 113 key_data += move_bytes; 114 n = p[0]; 115 p[++n] = off; 116 p[0] = ++n; 117 FREESPACE(p) = off - PAGE_META(n); 118 OFFSET(p) = off; 119 p[n] = PARTIAL_KEY; 120 bufp = __add_ovflpage(hashp, bufp); 121 if (!bufp) 122 return (-1); 123 n = p[0]; 124 if (!key_size) 125 if (FREESPACE(p)) { 126 move_bytes = MIN(FREESPACE(p), val_size); 127 off = OFFSET(p) - move_bytes; 128 p[n] = off; 129 memmove(cp + off, val_data, move_bytes); 130 val_data += move_bytes; 131 val_size -= move_bytes; 132 p[n - 2] = FULL_KEY_DATA; 133 FREESPACE(p) = FREESPACE(p) - move_bytes; 134 OFFSET(p) = off; 135 } else 136 p[n - 2] = FULL_KEY; 137 p = (u_int16_t *)bufp->page; 138 cp = bufp->page; 139 bufp->flags |= BUF_MOD; 140 } 141 142 /* Now move the data */ 143 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 144 space = FREESPACE(p) - BIGOVERHEAD) { 145 move_bytes = MIN(space, val_size); 146 /* 147 * Here's the hack to make sure that if the data ends on the 148 * same page as the key ends, FREESPACE is at least one. 149 */ 150 if (space == val_size && val_size == val->size) 151 move_bytes--; 152 off = OFFSET(p) - move_bytes; 153 memmove(cp + off, val_data, move_bytes); 154 val_size -= move_bytes; 155 val_data += move_bytes; 156 n = p[0]; 157 p[++n] = off; 158 p[0] = ++n; 159 FREESPACE(p) = off - PAGE_META(n); 160 OFFSET(p) = off; 161 if (val_size) { 162 p[n] = FULL_KEY; 163 bufp = __add_ovflpage(hashp, bufp); 164 if (!bufp) 165 return (-1); 166 cp = bufp->page; 167 p = (u_int16_t *)cp; 168 } else 169 p[n] = FULL_KEY_DATA; 170 bufp->flags |= BUF_MOD; 171 } 172 return (0); 173 } 174 175 /* 176 * Called when bufp's page contains a partial key (index should be 1) 177 * 178 * All pages in the big key/data pair except bufp are freed. We cannot 179 * free bufp because the page pointing to it is lost and we can't get rid 180 * of its pointer. 181 * 182 * Returns: 183 * 0 => OK 184 *-1 => ERROR 185 */ 186 extern int 187 __big_delete(hashp, bufp) 188 HTAB *hashp; 189 BUFHEAD *bufp; 190 { 191 register BUFHEAD *last_bfp, *rbufp; 192 u_int16_t *bp, pageno; 193 int key_done, n; 194 195 rbufp = bufp; 196 last_bfp = NULL; 197 bp = (u_int16_t *)bufp->page; 198 pageno = 0; 199 key_done = 0; 200 201 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 202 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 203 key_done = 1; 204 205 /* 206 * If there is freespace left on a FULL_KEY_DATA page, then 207 * the data is short and fits entirely on this page, and this 208 * is the last page. 209 */ 210 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 211 break; 212 pageno = bp[bp[0] - 1]; 213 rbufp->flags |= BUF_MOD; 214 rbufp = __get_buf(hashp, pageno, rbufp, 0); 215 if (last_bfp) 216 __free_ovflpage(hashp, last_bfp); 217 last_bfp = rbufp; 218 if (!rbufp) 219 return (-1); /* Error. */ 220 bp = (u_int16_t *)rbufp->page; 221 } 222 223 /* 224 * If we get here then rbufp points to the last page of the big 225 * key/data pair. Bufp points to the first one -- it should now be 226 * empty pointing to the next page after this pair. Can't free it 227 * because we don't have the page pointing to it. 228 */ 229 230 /* This is information from the last page of the pair. */ 231 n = bp[0]; 232 pageno = bp[n - 1]; 233 234 /* Now, bp is the first page of the pair. */ 235 bp = (u_int16_t *)bufp->page; 236 if (n > 2) { 237 /* There is an overflow page. */ 238 bp[1] = pageno; 239 bp[2] = OVFLPAGE; 240 bufp->ovfl = rbufp->ovfl; 241 } else 242 /* This is the last page. */ 243 bufp->ovfl = NULL; 244 n -= 2; 245 bp[0] = n; 246 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 247 OFFSET(bp) = hashp->BSIZE - 1; 248 249 bufp->flags |= BUF_MOD; 250 if (rbufp) 251 __free_ovflpage(hashp, rbufp); 252 if (last_bfp != rbufp) 253 __free_ovflpage(hashp, last_bfp); 254 255 hashp->NKEYS--; 256 return (0); 257 } 258 /* 259 * Returns: 260 * 0 = key not found 261 * -1 = get next overflow page 262 * -2 means key not found and this is big key/data 263 * -3 error 264 */ 265 extern int 266 __find_bigpair(hashp, bufp, ndx, key, size) 267 HTAB *hashp; 268 BUFHEAD *bufp; 269 int ndx; 270 char *key; 271 int size; 272 { 273 register u_int16_t *bp; 274 register char *p; 275 int ksize; 276 u_int16_t bytes; 277 char *kkey; 278 279 bp = (u_int16_t *)bufp->page; 280 p = bufp->page; 281 ksize = size; 282 kkey = key; 283 284 for (bytes = hashp->BSIZE - bp[ndx]; 285 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 286 bytes = hashp->BSIZE - bp[ndx]) { 287 if (memcmp(p + bp[ndx], kkey, bytes)) 288 return (-2); 289 kkey += bytes; 290 ksize -= bytes; 291 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 292 if (!bufp) 293 return (-3); 294 p = bufp->page; 295 bp = (u_int16_t *)p; 296 ndx = 1; 297 } 298 299 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 300 #ifdef HASH_STATISTICS 301 ++hash_collisions; 302 #endif 303 return (-2); 304 } else 305 return (ndx); 306 } 307 308 /* 309 * Given the buffer pointer of the first overflow page of a big pair, 310 * find the end of the big pair 311 * 312 * This will set bpp to the buffer header of the last page of the big pair. 313 * It will return the pageno of the overflow page following the last page 314 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 315 * bucket) 316 */ 317 extern u_int16_t 318 __find_last_page(hashp, bpp) 319 HTAB *hashp; 320 BUFHEAD **bpp; 321 { 322 BUFHEAD *bufp; 323 u_int16_t *bp, pageno; 324 int n; 325 326 bufp = *bpp; 327 bp = (u_int16_t *)bufp->page; 328 for (;;) { 329 n = bp[0]; 330 331 /* 332 * This is the last page if: the tag is FULL_KEY_DATA and 333 * either only 2 entries OVFLPAGE marker is explicit there 334 * is freespace on the page. 335 */ 336 if (bp[2] == FULL_KEY_DATA && 337 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 338 break; 339 340 pageno = bp[n - 1]; 341 bufp = __get_buf(hashp, pageno, bufp, 0); 342 if (!bufp) 343 return (0); /* Need to indicate an error! */ 344 bp = (u_int16_t *)bufp->page; 345 } 346 347 *bpp = bufp; 348 if (bp[0] > 2) 349 return (bp[3]); 350 else 351 return (0); 352 } 353 354 /* 355 * Return the data for the key/data pair that begins on this page at this 356 * index (index should always be 1). 357 */ 358 extern int 359 __big_return(hashp, bufp, ndx, val, set_current) 360 HTAB *hashp; 361 BUFHEAD *bufp; 362 int ndx; 363 DBT *val; 364 int set_current; 365 { 366 BUFHEAD *save_p; 367 u_int16_t *bp, len, off, save_addr; 368 char *tp; 369 370 bp = (u_int16_t *)bufp->page; 371 while (bp[ndx + 1] == PARTIAL_KEY) { 372 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 373 if (!bufp) 374 return (-1); 375 bp = (u_int16_t *)bufp->page; 376 ndx = 1; 377 } 378 379 if (bp[ndx + 1] == FULL_KEY) { 380 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 381 if (!bufp) 382 return (-1); 383 bp = (u_int16_t *)bufp->page; 384 save_p = bufp; 385 save_addr = save_p->addr; 386 off = bp[1]; 387 len = 0; 388 } else 389 if (!FREESPACE(bp)) { 390 /* 391 * This is a hack. We can't distinguish between 392 * FULL_KEY_DATA that contains complete data or 393 * incomplete data, so we require that if the data 394 * is complete, there is at least 1 byte of free 395 * space left. 396 */ 397 off = bp[bp[0]]; 398 len = bp[1] - off; 399 save_p = bufp; 400 save_addr = bufp->addr; 401 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 402 if (!bufp) 403 return (-1); 404 bp = (u_int16_t *)bufp->page; 405 } else { 406 /* The data is all on one page. */ 407 tp = (char *)bp; 408 off = bp[bp[0]]; 409 val->data = (u_char *)tp + off; 410 val->size = bp[1] - off; 411 if (set_current) { 412 if (bp[0] == 2) { /* No more buckets in 413 * chain */ 414 hashp->cpage = NULL; 415 hashp->cbucket++; 416 hashp->cndx = 1; 417 } else { 418 hashp->cpage = __get_buf(hashp, 419 bp[bp[0] - 1], bufp, 0); 420 if (!hashp->cpage) 421 return (-1); 422 hashp->cndx = 1; 423 if (!((u_int16_t *) 424 hashp->cpage->page)[0]) { 425 hashp->cbucket++; 426 hashp->cpage = NULL; 427 } 428 } 429 } 430 return (0); 431 } 432 433 val->size = collect_data(hashp, bufp, (int)len, set_current); 434 if (val->size == -1) 435 return (-1); 436 if (save_p->addr != save_addr) { 437 /* We are pretty short on buffers. */ 438 errno = EINVAL; /* OUT OF BUFFERS */ 439 return (-1); 440 } 441 memmove(hashp->tmp_buf, (save_p->page) + off, len); 442 val->data = (u_char *)hashp->tmp_buf; 443 return (0); 444 } 445 /* 446 * Count how big the total datasize is by recursing through the pages. Then 447 * allocate a buffer and copy the data as you recurse up. 448 */ 449 static int 450 collect_data(hashp, bufp, len, set) 451 HTAB *hashp; 452 BUFHEAD *bufp; 453 int len, set; 454 { 455 register u_int16_t *bp; 456 register char *p; 457 BUFHEAD *xbp; 458 u_int16_t save_addr; 459 int mylen, totlen; 460 461 p = bufp->page; 462 bp = (u_int16_t *)p; 463 mylen = hashp->BSIZE - bp[1]; 464 save_addr = bufp->addr; 465 466 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 467 totlen = len + mylen; 468 if (hashp->tmp_buf) 469 free(hashp->tmp_buf); 470 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 471 return (-1); 472 if (set) { 473 hashp->cndx = 1; 474 if (bp[0] == 2) { /* No more buckets in chain */ 475 hashp->cpage = NULL; 476 hashp->cbucket++; 477 } else { 478 hashp->cpage = 479 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 480 if (!hashp->cpage) 481 return (-1); 482 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 483 hashp->cbucket++; 484 hashp->cpage = NULL; 485 } 486 } 487 } 488 } else { 489 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 490 if (!xbp || ((totlen = 491 collect_data(hashp, xbp, len + mylen, set)) < 1)) 492 return (-1); 493 } 494 if (bufp->addr != save_addr) { 495 errno = EINVAL; /* Out of buffers. */ 496 return (-1); 497 } 498 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 499 return (totlen); 500 } 501 502 /* 503 * Fill in the key and data for this big pair. 504 */ 505 extern int 506 __big_keydata(hashp, bufp, key, val, set) 507 HTAB *hashp; 508 BUFHEAD *bufp; 509 DBT *key, *val; 510 int set; 511 { 512 key->size = collect_key(hashp, bufp, 0, val, set); 513 if (key->size == -1) 514 return (-1); 515 key->data = (u_char *)hashp->tmp_key; 516 return (0); 517 } 518 519 /* 520 * Count how big the total key size is by recursing through the pages. Then 521 * collect the data, allocate a buffer and copy the key as you recurse up. 522 */ 523 static int 524 collect_key(hashp, bufp, len, val, set) 525 HTAB *hashp; 526 BUFHEAD *bufp; 527 int len; 528 DBT *val; 529 int set; 530 { 531 BUFHEAD *xbp; 532 char *p; 533 int mylen, totlen; 534 u_int16_t *bp, save_addr; 535 536 p = bufp->page; 537 bp = (u_int16_t *)p; 538 mylen = hashp->BSIZE - bp[1]; 539 540 save_addr = bufp->addr; 541 totlen = len + mylen; 542 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 543 if (hashp->tmp_key != NULL) 544 free(hashp->tmp_key); 545 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 546 return (-1); 547 if (__big_return(hashp, bufp, 1, val, set)) 548 return (-1); 549 } else { 550 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 551 if (!xbp || ((totlen = 552 collect_key(hashp, xbp, totlen, val, set)) < 1)) 553 return (-1); 554 } 555 if (bufp->addr != save_addr) { 556 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 557 return (-1); 558 } 559 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 560 return (totlen); 561 } 562 563 /* 564 * Returns: 565 * 0 => OK 566 * -1 => error 567 */ 568 extern int 569 __big_split(hashp, op, np, big_keyp, addr, obucket, ret) 570 HTAB *hashp; 571 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ 572 BUFHEAD *np; /* Pointer to new bucket page */ 573 /* Pointer to first page containing the big key/data */ 574 BUFHEAD *big_keyp; 575 int addr; /* Address of big_keyp */ 576 u_int32_t obucket;/* Old Bucket */ 577 SPLIT_RETURN *ret; 578 { 579 register BUFHEAD *tmpp; 580 register u_int16_t *tp; 581 BUFHEAD *bp; 582 DBT key, val; 583 u_int32_t change; 584 u_int16_t free_space, n, off; 585 586 bp = big_keyp; 587 588 /* Now figure out where the big key/data goes */ 589 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 590 return (-1); 591 change = (__call_hash(hashp, key.data, key.size) != obucket); 592 593 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 594 if (!(ret->nextp = 595 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 596 return (-1);; 597 } else 598 ret->nextp = NULL; 599 600 /* Now make one of np/op point to the big key/data pair */ 601 #ifdef DEBUG 602 assert(np->ovfl == NULL); 603 #endif 604 if (change) 605 tmpp = np; 606 else 607 tmpp = op; 608 609 tmpp->flags |= BUF_MOD; 610 #ifdef DEBUG1 611 (void)fprintf(stderr, 612 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 613 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 614 #endif 615 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 616 tp = (u_int16_t *)tmpp->page; 617 #ifdef DEBUG 618 assert(FREESPACE(tp) >= OVFLSIZE); 619 #endif 620 n = tp[0]; 621 off = OFFSET(tp); 622 free_space = FREESPACE(tp); 623 tp[++n] = (u_int16_t)addr; 624 tp[++n] = OVFLPAGE; 625 tp[0] = n; 626 OFFSET(tp) = off; 627 FREESPACE(tp) = free_space - OVFLSIZE; 628 629 /* 630 * Finally, set the new and old return values. BIG_KEYP contains a 631 * pointer to the last page of the big key_data pair. Make sure that 632 * big_keyp has no following page (2 elements) or create an empty 633 * following page. 634 */ 635 636 ret->newp = np; 637 ret->oldp = op; 638 639 tp = (u_int16_t *)big_keyp->page; 640 big_keyp->flags |= BUF_MOD; 641 if (tp[0] > 2) { 642 /* 643 * There may be either one or two offsets on this page. If 644 * there is one, then the overflow page is linked on normally 645 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 646 * the second offset and needs to get stuffed in after the 647 * next overflow page is added. 648 */ 649 n = tp[4]; 650 free_space = FREESPACE(tp); 651 off = OFFSET(tp); 652 tp[0] -= 2; 653 FREESPACE(tp) = free_space + OVFLSIZE; 654 OFFSET(tp) = off; 655 tmpp = __add_ovflpage(hashp, big_keyp); 656 if (!tmpp) 657 return (-1); 658 tp[4] = n; 659 } else 660 tmpp = big_keyp; 661 662 if (change) 663 ret->newp = tmpp; 664 else 665 ret->oldp = tmpp; 666 return (0); 667 } 668