1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #if defined(LIBC_SCCS) && !defined(lint) 34 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 35 #endif /* LIBC_SCCS and not lint */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 /* 40 * PACKAGE: hash 41 * DESCRIPTION: 42 * Big key/data handling for the hashing package. 43 * 44 * ROUTINES: 45 * External 46 * __big_keydata 47 * __big_split 48 * __big_insert 49 * __big_return 50 * __big_delete 51 * __find_last_page 52 * Internal 53 * collect_key 54 * collect_data 55 */ 56 57 #include <sys/param.h> 58 59 #include <errno.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 64 #ifdef DEBUG 65 #include <assert.h> 66 #endif 67 68 #include <db.h> 69 #include "hash.h" 70 #include "page.h" 71 #include "extern.h" 72 73 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 74 static int collect_data(HTAB *, BUFHEAD *, int, int); 75 76 /* 77 * Big_insert 78 * 79 * You need to do an insert and the key/data pair is too big 80 * 81 * Returns: 82 * 0 ==> OK 83 *-1 ==> ERROR 84 */ 85 int 86 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 87 { 88 u_int16_t *p; 89 int key_size, n; 90 unsigned int val_size; 91 u_int16_t space, move_bytes, off; 92 char *cp, *key_data, *val_data; 93 94 cp = bufp->page; /* Character pointer of p. */ 95 p = (u_int16_t *)cp; 96 97 key_data = (char *)key->data; 98 key_size = key->size; 99 val_data = (char *)val->data; 100 val_size = val->size; 101 102 /* First move the Key */ 103 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 104 space = FREESPACE(p) - BIGOVERHEAD) { 105 move_bytes = MIN(space, key_size); 106 off = OFFSET(p) - move_bytes; 107 memmove(cp + off, key_data, move_bytes); 108 key_size -= move_bytes; 109 key_data += move_bytes; 110 n = p[0]; 111 p[++n] = off; 112 p[0] = ++n; 113 FREESPACE(p) = off - PAGE_META(n); 114 OFFSET(p) = off; 115 p[n] = PARTIAL_KEY; 116 bufp = __add_ovflpage(hashp, bufp); 117 if (!bufp) 118 return (-1); 119 n = p[0]; 120 if (!key_size) { 121 space = FREESPACE(p); 122 if (space) { 123 move_bytes = MIN(space, val_size); 124 /* 125 * If the data would fit exactly in the 126 * remaining space, we must overflow it to the 127 * next page; otherwise the invariant that the 128 * data must end on a page with FREESPACE 129 * non-zero would fail. 130 */ 131 if (space == val_size && val_size == val->size) 132 goto toolarge; 133 off = OFFSET(p) - move_bytes; 134 memmove(cp + off, val_data, move_bytes); 135 val_data += move_bytes; 136 val_size -= move_bytes; 137 p[n] = off; 138 p[n - 2] = FULL_KEY_DATA; 139 FREESPACE(p) = FREESPACE(p) - move_bytes; 140 OFFSET(p) = off; 141 } else { 142 toolarge: 143 p[n - 2] = FULL_KEY; 144 } 145 } 146 p = (u_int16_t *)bufp->page; 147 cp = bufp->page; 148 bufp->flags |= BUF_MOD; 149 } 150 151 /* Now move the data */ 152 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 153 space = FREESPACE(p) - BIGOVERHEAD) { 154 move_bytes = MIN(space, val_size); 155 /* 156 * Here's the hack to make sure that if the data ends on the 157 * same page as the key ends, FREESPACE is at least one. 158 */ 159 if (space == val_size && val_size == val->size) 160 move_bytes--; 161 off = OFFSET(p) - move_bytes; 162 memmove(cp + off, val_data, move_bytes); 163 val_size -= move_bytes; 164 val_data += move_bytes; 165 n = p[0]; 166 p[++n] = off; 167 p[0] = ++n; 168 FREESPACE(p) = off - PAGE_META(n); 169 OFFSET(p) = off; 170 if (val_size) { 171 p[n] = FULL_KEY; 172 bufp = __add_ovflpage(hashp, bufp); 173 if (!bufp) 174 return (-1); 175 cp = bufp->page; 176 p = (u_int16_t *)cp; 177 } else 178 p[n] = FULL_KEY_DATA; 179 bufp->flags |= BUF_MOD; 180 } 181 return (0); 182 } 183 184 /* 185 * Called when bufp's page contains a partial key (index should be 1) 186 * 187 * All pages in the big key/data pair except bufp are freed. We cannot 188 * free bufp because the page pointing to it is lost and we can't get rid 189 * of its pointer. 190 * 191 * Returns: 192 * 0 => OK 193 *-1 => ERROR 194 */ 195 int 196 __big_delete(HTAB *hashp, BUFHEAD *bufp) 197 { 198 BUFHEAD *last_bfp, *rbufp; 199 u_int16_t *bp, pageno; 200 int key_done, n; 201 202 rbufp = bufp; 203 last_bfp = NULL; 204 bp = (u_int16_t *)bufp->page; 205 pageno = 0; 206 key_done = 0; 207 208 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 209 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 210 key_done = 1; 211 212 /* 213 * If there is freespace left on a FULL_KEY_DATA page, then 214 * the data is short and fits entirely on this page, and this 215 * is the last page. 216 */ 217 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 218 break; 219 pageno = bp[bp[0] - 1]; 220 rbufp->flags |= BUF_MOD; 221 rbufp = __get_buf(hashp, pageno, rbufp, 0); 222 if (last_bfp) 223 __free_ovflpage(hashp, last_bfp); 224 last_bfp = rbufp; 225 if (!rbufp) 226 return (-1); /* Error. */ 227 bp = (u_int16_t *)rbufp->page; 228 } 229 230 /* 231 * If we get here then rbufp points to the last page of the big 232 * key/data pair. Bufp points to the first one -- it should now be 233 * empty pointing to the next page after this pair. Can't free it 234 * because we don't have the page pointing to it. 235 */ 236 237 /* This is information from the last page of the pair. */ 238 n = bp[0]; 239 pageno = bp[n - 1]; 240 241 /* Now, bp is the first page of the pair. */ 242 bp = (u_int16_t *)bufp->page; 243 if (n > 2) { 244 /* There is an overflow page. */ 245 bp[1] = pageno; 246 bp[2] = OVFLPAGE; 247 bufp->ovfl = rbufp->ovfl; 248 } else 249 /* This is the last page. */ 250 bufp->ovfl = NULL; 251 n -= 2; 252 bp[0] = n; 253 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 254 OFFSET(bp) = hashp->BSIZE; 255 256 bufp->flags |= BUF_MOD; 257 if (rbufp) 258 __free_ovflpage(hashp, rbufp); 259 if (last_bfp && last_bfp != rbufp) 260 __free_ovflpage(hashp, last_bfp); 261 262 hashp->NKEYS--; 263 return (0); 264 } 265 /* 266 * Returns: 267 * 0 = key not found 268 * -1 = get next overflow page 269 * -2 means key not found and this is big key/data 270 * -3 error 271 */ 272 int 273 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 274 { 275 u_int16_t *bp; 276 char *p; 277 int ksize; 278 u_int16_t bytes; 279 char *kkey; 280 281 bp = (u_int16_t *)bufp->page; 282 p = bufp->page; 283 ksize = size; 284 kkey = key; 285 286 for (bytes = hashp->BSIZE - bp[ndx]; 287 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 288 bytes = hashp->BSIZE - bp[ndx]) { 289 if (memcmp(p + bp[ndx], kkey, bytes)) 290 return (-2); 291 kkey += bytes; 292 ksize -= bytes; 293 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 294 if (!bufp) 295 return (-3); 296 p = bufp->page; 297 bp = (u_int16_t *)p; 298 ndx = 1; 299 } 300 301 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 302 #ifdef HASH_STATISTICS 303 ++hash_collisions; 304 #endif 305 return (-2); 306 } else 307 return (ndx); 308 } 309 310 /* 311 * Given the buffer pointer of the first overflow page of a big pair, 312 * find the end of the big pair 313 * 314 * This will set bpp to the buffer header of the last page of the big pair. 315 * It will return the pageno of the overflow page following the last page 316 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 317 * bucket) 318 */ 319 u_int16_t 320 __find_last_page(HTAB *hashp, BUFHEAD **bpp) 321 { 322 BUFHEAD *bufp; 323 u_int16_t *bp, pageno; 324 int n; 325 326 bufp = *bpp; 327 bp = (u_int16_t *)bufp->page; 328 for (;;) { 329 n = bp[0]; 330 331 /* 332 * This is the last page if: the tag is FULL_KEY_DATA and 333 * either only 2 entries OVFLPAGE marker is explicit there 334 * is freespace on the page. 335 */ 336 if (bp[2] == FULL_KEY_DATA && 337 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 338 break; 339 340 pageno = bp[n - 1]; 341 bufp = __get_buf(hashp, pageno, bufp, 0); 342 if (!bufp) 343 return (0); /* Need to indicate an error! */ 344 bp = (u_int16_t *)bufp->page; 345 } 346 347 *bpp = bufp; 348 if (bp[0] > 2) 349 return (bp[3]); 350 else 351 return (0); 352 } 353 354 /* 355 * Return the data for the key/data pair that begins on this page at this 356 * index (index should always be 1). 357 */ 358 int 359 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 360 { 361 BUFHEAD *save_p; 362 u_int16_t *bp, len, off, save_addr; 363 char *tp; 364 365 bp = (u_int16_t *)bufp->page; 366 while (bp[ndx + 1] == PARTIAL_KEY) { 367 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 368 if (!bufp) 369 return (-1); 370 bp = (u_int16_t *)bufp->page; 371 ndx = 1; 372 } 373 374 if (bp[ndx + 1] == FULL_KEY) { 375 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 376 if (!bufp) 377 return (-1); 378 bp = (u_int16_t *)bufp->page; 379 save_p = bufp; 380 save_addr = save_p->addr; 381 off = bp[1]; 382 len = 0; 383 } else 384 if (!FREESPACE(bp)) { 385 /* 386 * This is a hack. We can't distinguish between 387 * FULL_KEY_DATA that contains complete data or 388 * incomplete data, so we require that if the data 389 * is complete, there is at least 1 byte of free 390 * space left. 391 */ 392 off = bp[bp[0]]; 393 len = bp[1] - off; 394 save_p = bufp; 395 save_addr = bufp->addr; 396 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 397 if (!bufp) 398 return (-1); 399 bp = (u_int16_t *)bufp->page; 400 } else { 401 /* The data is all on one page. */ 402 tp = (char *)bp; 403 off = bp[bp[0]]; 404 val->data = (u_char *)tp + off; 405 val->size = bp[1] - off; 406 if (set_current) { 407 if (bp[0] == 2) { /* No more buckets in 408 * chain */ 409 hashp->cpage = NULL; 410 hashp->cbucket++; 411 hashp->cndx = 1; 412 } else { 413 hashp->cpage = __get_buf(hashp, 414 bp[bp[0] - 1], bufp, 0); 415 if (!hashp->cpage) 416 return (-1); 417 hashp->cndx = 1; 418 if (!((u_int16_t *) 419 hashp->cpage->page)[0]) { 420 hashp->cbucket++; 421 hashp->cpage = NULL; 422 } 423 } 424 } 425 return (0); 426 } 427 428 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current); 429 if (val->size == (size_t)-1) 430 return (-1); 431 if (save_p->addr != save_addr) { 432 /* We are pretty short on buffers. */ 433 errno = EINVAL; /* OUT OF BUFFERS */ 434 return (-1); 435 } 436 memmove(hashp->tmp_buf, (save_p->page) + off, len); 437 val->data = (u_char *)hashp->tmp_buf; 438 return (0); 439 } 440 /* 441 * Count how big the total datasize is by recursing through the pages. Then 442 * allocate a buffer and copy the data as you recurse up. 443 */ 444 static int 445 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 446 { 447 u_int16_t *bp; 448 char *p; 449 BUFHEAD *xbp; 450 u_int16_t save_addr; 451 int mylen, totlen; 452 453 p = bufp->page; 454 bp = (u_int16_t *)p; 455 mylen = hashp->BSIZE - bp[1]; 456 save_addr = bufp->addr; 457 458 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 459 totlen = len + mylen; 460 if (hashp->tmp_buf) 461 free(hashp->tmp_buf); 462 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 463 return (-1); 464 if (set) { 465 hashp->cndx = 1; 466 if (bp[0] == 2) { /* No more buckets in chain */ 467 hashp->cpage = NULL; 468 hashp->cbucket++; 469 } else { 470 hashp->cpage = 471 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 472 if (!hashp->cpage) 473 return (-1); 474 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 475 hashp->cbucket++; 476 hashp->cpage = NULL; 477 } 478 } 479 } 480 } else { 481 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 482 if (!xbp || ((totlen = 483 collect_data(hashp, xbp, len + mylen, set)) < 1)) 484 return (-1); 485 } 486 if (bufp->addr != save_addr) { 487 errno = EINVAL; /* Out of buffers. */ 488 return (-1); 489 } 490 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 491 return (totlen); 492 } 493 494 /* 495 * Fill in the key and data for this big pair. 496 */ 497 int 498 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 499 { 500 key->size = (size_t)collect_key(hashp, bufp, 0, val, set); 501 if (key->size == (size_t)-1) 502 return (-1); 503 key->data = (u_char *)hashp->tmp_key; 504 return (0); 505 } 506 507 /* 508 * Count how big the total key size is by recursing through the pages. Then 509 * collect the data, allocate a buffer and copy the key as you recurse up. 510 */ 511 static int 512 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 513 { 514 BUFHEAD *xbp; 515 char *p; 516 int mylen, totlen; 517 u_int16_t *bp, save_addr; 518 519 p = bufp->page; 520 bp = (u_int16_t *)p; 521 mylen = hashp->BSIZE - bp[1]; 522 523 save_addr = bufp->addr; 524 totlen = len + mylen; 525 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 526 if (hashp->tmp_key != NULL) 527 free(hashp->tmp_key); 528 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 529 return (-1); 530 if (__big_return(hashp, bufp, 1, val, set)) 531 return (-1); 532 } else { 533 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 534 if (!xbp || ((totlen = 535 collect_key(hashp, xbp, totlen, val, set)) < 1)) 536 return (-1); 537 } 538 if (bufp->addr != save_addr) { 539 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 540 return (-1); 541 } 542 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 543 return (totlen); 544 } 545 546 /* 547 * Returns: 548 * 0 => OK 549 * -1 => error 550 */ 551 int 552 __big_split(HTAB *hashp, 553 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 554 BUFHEAD *np, /* Pointer to new bucket page */ 555 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */ 556 int addr, /* Address of big_keyp */ 557 u_int32_t obucket, /* Old Bucket */ 558 SPLIT_RETURN *ret) 559 { 560 BUFHEAD *bp, *tmpp; 561 DBT key, val; 562 u_int32_t change; 563 u_int16_t free_space, n, off, *tp; 564 565 bp = big_keyp; 566 567 /* Now figure out where the big key/data goes */ 568 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 569 return (-1); 570 change = (__call_hash(hashp, key.data, key.size) != obucket); 571 572 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 573 if (!(ret->nextp = 574 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 575 return (-1); 576 } else 577 ret->nextp = NULL; 578 579 /* Now make one of np/op point to the big key/data pair */ 580 #ifdef DEBUG 581 assert(np->ovfl == NULL); 582 #endif 583 if (change) 584 tmpp = np; 585 else 586 tmpp = op; 587 588 tmpp->flags |= BUF_MOD; 589 #ifdef DEBUG1 590 (void)fprintf(stderr, 591 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 592 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 593 #endif 594 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 595 tp = (u_int16_t *)tmpp->page; 596 #ifdef DEBUG 597 assert(FREESPACE(tp) >= OVFLSIZE); 598 #endif 599 n = tp[0]; 600 off = OFFSET(tp); 601 free_space = FREESPACE(tp); 602 tp[++n] = (u_int16_t)addr; 603 tp[++n] = OVFLPAGE; 604 tp[0] = n; 605 OFFSET(tp) = off; 606 FREESPACE(tp) = free_space - OVFLSIZE; 607 608 /* 609 * Finally, set the new and old return values. BIG_KEYP contains a 610 * pointer to the last page of the big key_data pair. Make sure that 611 * big_keyp has no following page (2 elements) or create an empty 612 * following page. 613 */ 614 615 ret->newp = np; 616 ret->oldp = op; 617 618 tp = (u_int16_t *)big_keyp->page; 619 big_keyp->flags |= BUF_MOD; 620 if (tp[0] > 2) { 621 /* 622 * There may be either one or two offsets on this page. If 623 * there is one, then the overflow page is linked on normally 624 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 625 * the second offset and needs to get stuffed in after the 626 * next overflow page is added. 627 */ 628 n = tp[4]; 629 free_space = FREESPACE(tp); 630 off = OFFSET(tp); 631 tp[0] -= 2; 632 FREESPACE(tp) = free_space + OVFLSIZE; 633 OFFSET(tp) = off; 634 tmpp = __add_ovflpage(hashp, big_keyp); 635 if (!tmpp) 636 return (-1); 637 tp[4] = n; 638 } else 639 tmpp = big_keyp; 640 641 if (change) 642 ret->newp = tmpp; 643 else 644 ret->oldp = tmpp; 645 return (0); 646 } 647