1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Margo Seltzer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if defined(LIBC_SCCS) && !defined(lint) 36 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 37 #endif /* LIBC_SCCS and not lint */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 /* 42 * PACKAGE: hash 43 * DESCRIPTION: 44 * Big key/data handling for the hashing package. 45 * 46 * ROUTINES: 47 * External 48 * __big_keydata 49 * __big_split 50 * __big_insert 51 * __big_return 52 * __big_delete 53 * __find_last_page 54 * Internal 55 * collect_key 56 * collect_data 57 */ 58 59 #include <sys/param.h> 60 61 #include <errno.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 66 #ifdef DEBUG 67 #include <assert.h> 68 #endif 69 70 #include <db.h> 71 #include "hash.h" 72 #include "page.h" 73 #include "extern.h" 74 75 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 76 static int collect_data(HTAB *, BUFHEAD *, int, int); 77 78 /* 79 * Big_insert 80 * 81 * You need to do an insert and the key/data pair is too big 82 * 83 * Returns: 84 * 0 ==> OK 85 *-1 ==> ERROR 86 */ 87 int 88 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 89 { 90 u_int16_t *p; 91 int key_size, n; 92 unsigned int val_size; 93 u_int16_t space, move_bytes, off; 94 char *cp, *key_data, *val_data; 95 96 cp = bufp->page; /* Character pointer of p. */ 97 p = (u_int16_t *)cp; 98 99 key_data = (char *)key->data; 100 key_size = key->size; 101 val_data = (char *)val->data; 102 val_size = val->size; 103 104 /* First move the Key */ 105 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 106 space = FREESPACE(p) - BIGOVERHEAD) { 107 move_bytes = MIN(space, key_size); 108 off = OFFSET(p) - move_bytes; 109 memmove(cp + off, key_data, move_bytes); 110 key_size -= move_bytes; 111 key_data += move_bytes; 112 n = p[0]; 113 p[++n] = off; 114 p[0] = ++n; 115 FREESPACE(p) = off - PAGE_META(n); 116 OFFSET(p) = off; 117 p[n] = PARTIAL_KEY; 118 bufp = __add_ovflpage(hashp, bufp); 119 if (!bufp) 120 return (-1); 121 n = p[0]; 122 if (!key_size) { 123 space = FREESPACE(p); 124 if (space) { 125 move_bytes = MIN(space, val_size); 126 /* 127 * If the data would fit exactly in the 128 * remaining space, we must overflow it to the 129 * next page; otherwise the invariant that the 130 * data must end on a page with FREESPACE 131 * non-zero would fail. 132 */ 133 if (space == val_size && val_size == val->size) 134 goto toolarge; 135 off = OFFSET(p) - move_bytes; 136 memmove(cp + off, val_data, move_bytes); 137 val_data += move_bytes; 138 val_size -= move_bytes; 139 p[n] = off; 140 p[n - 2] = FULL_KEY_DATA; 141 FREESPACE(p) = FREESPACE(p) - move_bytes; 142 OFFSET(p) = off; 143 } else { 144 toolarge: 145 p[n - 2] = FULL_KEY; 146 } 147 } 148 p = (u_int16_t *)bufp->page; 149 cp = bufp->page; 150 bufp->flags |= BUF_MOD; 151 } 152 153 /* Now move the data */ 154 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 155 space = FREESPACE(p) - BIGOVERHEAD) { 156 move_bytes = MIN(space, val_size); 157 /* 158 * Here's the hack to make sure that if the data ends on the 159 * same page as the key ends, FREESPACE is at least one. 160 */ 161 if (space == val_size && val_size == val->size) 162 move_bytes--; 163 off = OFFSET(p) - move_bytes; 164 memmove(cp + off, val_data, move_bytes); 165 val_size -= move_bytes; 166 val_data += move_bytes; 167 n = p[0]; 168 p[++n] = off; 169 p[0] = ++n; 170 FREESPACE(p) = off - PAGE_META(n); 171 OFFSET(p) = off; 172 if (val_size) { 173 p[n] = FULL_KEY; 174 bufp = __add_ovflpage(hashp, bufp); 175 if (!bufp) 176 return (-1); 177 cp = bufp->page; 178 p = (u_int16_t *)cp; 179 } else 180 p[n] = FULL_KEY_DATA; 181 bufp->flags |= BUF_MOD; 182 } 183 return (0); 184 } 185 186 /* 187 * Called when bufp's page contains a partial key (index should be 1) 188 * 189 * All pages in the big key/data pair except bufp are freed. We cannot 190 * free bufp because the page pointing to it is lost and we can't get rid 191 * of its pointer. 192 * 193 * Returns: 194 * 0 => OK 195 *-1 => ERROR 196 */ 197 int 198 __big_delete(HTAB *hashp, BUFHEAD *bufp) 199 { 200 BUFHEAD *last_bfp, *rbufp; 201 u_int16_t *bp, pageno; 202 int key_done, n; 203 204 rbufp = bufp; 205 last_bfp = NULL; 206 bp = (u_int16_t *)bufp->page; 207 pageno = 0; 208 key_done = 0; 209 210 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 211 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 212 key_done = 1; 213 214 /* 215 * If there is freespace left on a FULL_KEY_DATA page, then 216 * the data is short and fits entirely on this page, and this 217 * is the last page. 218 */ 219 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 220 break; 221 pageno = bp[bp[0] - 1]; 222 rbufp->flags |= BUF_MOD; 223 rbufp = __get_buf(hashp, pageno, rbufp, 0); 224 if (last_bfp) 225 __free_ovflpage(hashp, last_bfp); 226 last_bfp = rbufp; 227 if (!rbufp) 228 return (-1); /* Error. */ 229 bp = (u_int16_t *)rbufp->page; 230 } 231 232 /* 233 * If we get here then rbufp points to the last page of the big 234 * key/data pair. Bufp points to the first one -- it should now be 235 * empty pointing to the next page after this pair. Can't free it 236 * because we don't have the page pointing to it. 237 */ 238 239 /* This is information from the last page of the pair. */ 240 n = bp[0]; 241 pageno = bp[n - 1]; 242 243 /* Now, bp is the first page of the pair. */ 244 bp = (u_int16_t *)bufp->page; 245 if (n > 2) { 246 /* There is an overflow page. */ 247 bp[1] = pageno; 248 bp[2] = OVFLPAGE; 249 bufp->ovfl = rbufp->ovfl; 250 } else 251 /* This is the last page. */ 252 bufp->ovfl = NULL; 253 n -= 2; 254 bp[0] = n; 255 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 256 OFFSET(bp) = hashp->BSIZE; 257 258 bufp->flags |= BUF_MOD; 259 if (rbufp) 260 __free_ovflpage(hashp, rbufp); 261 if (last_bfp && last_bfp != rbufp) 262 __free_ovflpage(hashp, last_bfp); 263 264 hashp->NKEYS--; 265 return (0); 266 } 267 /* 268 * Returns: 269 * 0 = key not found 270 * -1 = get next overflow page 271 * -2 means key not found and this is big key/data 272 * -3 error 273 */ 274 int 275 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 276 { 277 u_int16_t *bp; 278 char *p; 279 int ksize; 280 u_int16_t bytes; 281 char *kkey; 282 283 bp = (u_int16_t *)bufp->page; 284 p = bufp->page; 285 ksize = size; 286 kkey = key; 287 288 for (bytes = hashp->BSIZE - bp[ndx]; 289 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 290 bytes = hashp->BSIZE - bp[ndx]) { 291 if (memcmp(p + bp[ndx], kkey, bytes)) 292 return (-2); 293 kkey += bytes; 294 ksize -= bytes; 295 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 296 if (!bufp) 297 return (-3); 298 p = bufp->page; 299 bp = (u_int16_t *)p; 300 ndx = 1; 301 } 302 303 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 304 #ifdef HASH_STATISTICS 305 ++hash_collisions; 306 #endif 307 return (-2); 308 } else 309 return (ndx); 310 } 311 312 /* 313 * Given the buffer pointer of the first overflow page of a big pair, 314 * find the end of the big pair 315 * 316 * This will set bpp to the buffer header of the last page of the big pair. 317 * It will return the pageno of the overflow page following the last page 318 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 319 * bucket) 320 */ 321 u_int16_t 322 __find_last_page(HTAB *hashp, BUFHEAD **bpp) 323 { 324 BUFHEAD *bufp; 325 u_int16_t *bp, pageno; 326 int n; 327 328 bufp = *bpp; 329 bp = (u_int16_t *)bufp->page; 330 for (;;) { 331 n = bp[0]; 332 333 /* 334 * This is the last page if: the tag is FULL_KEY_DATA and 335 * either only 2 entries OVFLPAGE marker is explicit there 336 * is freespace on the page. 337 */ 338 if (bp[2] == FULL_KEY_DATA && 339 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 340 break; 341 342 pageno = bp[n - 1]; 343 bufp = __get_buf(hashp, pageno, bufp, 0); 344 if (!bufp) 345 return (0); /* Need to indicate an error! */ 346 bp = (u_int16_t *)bufp->page; 347 } 348 349 *bpp = bufp; 350 if (bp[0] > 2) 351 return (bp[3]); 352 else 353 return (0); 354 } 355 356 /* 357 * Return the data for the key/data pair that begins on this page at this 358 * index (index should always be 1). 359 */ 360 int 361 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 362 { 363 BUFHEAD *save_p; 364 u_int16_t *bp, len, off, save_addr; 365 char *tp; 366 367 bp = (u_int16_t *)bufp->page; 368 while (bp[ndx + 1] == PARTIAL_KEY) { 369 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 370 if (!bufp) 371 return (-1); 372 bp = (u_int16_t *)bufp->page; 373 ndx = 1; 374 } 375 376 if (bp[ndx + 1] == FULL_KEY) { 377 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 378 if (!bufp) 379 return (-1); 380 bp = (u_int16_t *)bufp->page; 381 save_p = bufp; 382 save_addr = save_p->addr; 383 off = bp[1]; 384 len = 0; 385 } else 386 if (!FREESPACE(bp)) { 387 /* 388 * This is a hack. We can't distinguish between 389 * FULL_KEY_DATA that contains complete data or 390 * incomplete data, so we require that if the data 391 * is complete, there is at least 1 byte of free 392 * space left. 393 */ 394 off = bp[bp[0]]; 395 len = bp[1] - off; 396 save_p = bufp; 397 save_addr = bufp->addr; 398 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 399 if (!bufp) 400 return (-1); 401 bp = (u_int16_t *)bufp->page; 402 } else { 403 /* The data is all on one page. */ 404 tp = (char *)bp; 405 off = bp[bp[0]]; 406 val->data = (u_char *)tp + off; 407 val->size = bp[1] - off; 408 if (set_current) { 409 if (bp[0] == 2) { /* No more buckets in 410 * chain */ 411 hashp->cpage = NULL; 412 hashp->cbucket++; 413 hashp->cndx = 1; 414 } else { 415 hashp->cpage = __get_buf(hashp, 416 bp[bp[0] - 1], bufp, 0); 417 if (!hashp->cpage) 418 return (-1); 419 hashp->cndx = 1; 420 if (!((u_int16_t *) 421 hashp->cpage->page)[0]) { 422 hashp->cbucket++; 423 hashp->cpage = NULL; 424 } 425 } 426 } 427 return (0); 428 } 429 430 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current); 431 if (val->size == (size_t)-1) 432 return (-1); 433 if (save_p->addr != save_addr) { 434 /* We are pretty short on buffers. */ 435 errno = EINVAL; /* OUT OF BUFFERS */ 436 return (-1); 437 } 438 memmove(hashp->tmp_buf, (save_p->page) + off, len); 439 val->data = (u_char *)hashp->tmp_buf; 440 return (0); 441 } 442 /* 443 * Count how big the total datasize is by recursing through the pages. Then 444 * allocate a buffer and copy the data as you recurse up. 445 */ 446 static int 447 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 448 { 449 u_int16_t *bp; 450 char *p; 451 BUFHEAD *xbp; 452 u_int16_t save_addr; 453 int mylen, totlen; 454 455 p = bufp->page; 456 bp = (u_int16_t *)p; 457 mylen = hashp->BSIZE - bp[1]; 458 save_addr = bufp->addr; 459 460 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 461 totlen = len + mylen; 462 if (hashp->tmp_buf) 463 free(hashp->tmp_buf); 464 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 465 return (-1); 466 if (set) { 467 hashp->cndx = 1; 468 if (bp[0] == 2) { /* No more buckets in chain */ 469 hashp->cpage = NULL; 470 hashp->cbucket++; 471 } else { 472 hashp->cpage = 473 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 474 if (!hashp->cpage) 475 return (-1); 476 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 477 hashp->cbucket++; 478 hashp->cpage = NULL; 479 } 480 } 481 } 482 } else { 483 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 484 if (!xbp || ((totlen = 485 collect_data(hashp, xbp, len + mylen, set)) < 1)) 486 return (-1); 487 } 488 if (bufp->addr != save_addr) { 489 errno = EINVAL; /* Out of buffers. */ 490 return (-1); 491 } 492 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 493 return (totlen); 494 } 495 496 /* 497 * Fill in the key and data for this big pair. 498 */ 499 int 500 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 501 { 502 key->size = (size_t)collect_key(hashp, bufp, 0, val, set); 503 if (key->size == (size_t)-1) 504 return (-1); 505 key->data = (u_char *)hashp->tmp_key; 506 return (0); 507 } 508 509 /* 510 * Count how big the total key size is by recursing through the pages. Then 511 * collect the data, allocate a buffer and copy the key as you recurse up. 512 */ 513 static int 514 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 515 { 516 BUFHEAD *xbp; 517 char *p; 518 int mylen, totlen; 519 u_int16_t *bp, save_addr; 520 521 p = bufp->page; 522 bp = (u_int16_t *)p; 523 mylen = hashp->BSIZE - bp[1]; 524 525 save_addr = bufp->addr; 526 totlen = len + mylen; 527 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 528 if (hashp->tmp_key != NULL) 529 free(hashp->tmp_key); 530 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 531 return (-1); 532 if (__big_return(hashp, bufp, 1, val, set)) 533 return (-1); 534 } else { 535 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 536 if (!xbp || ((totlen = 537 collect_key(hashp, xbp, totlen, val, set)) < 1)) 538 return (-1); 539 } 540 if (bufp->addr != save_addr) { 541 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 542 return (-1); 543 } 544 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 545 return (totlen); 546 } 547 548 /* 549 * Returns: 550 * 0 => OK 551 * -1 => error 552 */ 553 int 554 __big_split(HTAB *hashp, 555 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 556 BUFHEAD *np, /* Pointer to new bucket page */ 557 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */ 558 int addr, /* Address of big_keyp */ 559 u_int32_t obucket, /* Old Bucket */ 560 SPLIT_RETURN *ret) 561 { 562 BUFHEAD *bp, *tmpp; 563 DBT key, val; 564 u_int32_t change; 565 u_int16_t free_space, n, off, *tp; 566 567 bp = big_keyp; 568 569 /* Now figure out where the big key/data goes */ 570 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 571 return (-1); 572 change = (__call_hash(hashp, key.data, key.size) != obucket); 573 574 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 575 if (!(ret->nextp = 576 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 577 return (-1); 578 } else 579 ret->nextp = NULL; 580 581 /* Now make one of np/op point to the big key/data pair */ 582 #ifdef DEBUG 583 assert(np->ovfl == NULL); 584 #endif 585 if (change) 586 tmpp = np; 587 else 588 tmpp = op; 589 590 tmpp->flags |= BUF_MOD; 591 #ifdef DEBUG1 592 (void)fprintf(stderr, 593 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 594 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 595 #endif 596 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 597 tp = (u_int16_t *)tmpp->page; 598 #ifdef DEBUG 599 assert(FREESPACE(tp) >= OVFLSIZE); 600 #endif 601 n = tp[0]; 602 off = OFFSET(tp); 603 free_space = FREESPACE(tp); 604 tp[++n] = (u_int16_t)addr; 605 tp[++n] = OVFLPAGE; 606 tp[0] = n; 607 OFFSET(tp) = off; 608 FREESPACE(tp) = free_space - OVFLSIZE; 609 610 /* 611 * Finally, set the new and old return values. BIG_KEYP contains a 612 * pointer to the last page of the big key_data pair. Make sure that 613 * big_keyp has no following page (2 elements) or create an empty 614 * following page. 615 */ 616 617 ret->newp = np; 618 ret->oldp = op; 619 620 tp = (u_int16_t *)big_keyp->page; 621 big_keyp->flags |= BUF_MOD; 622 if (tp[0] > 2) { 623 /* 624 * There may be either one or two offsets on this page. If 625 * there is one, then the overflow page is linked on normally 626 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 627 * the second offset and needs to get stuffed in after the 628 * next overflow page is added. 629 */ 630 n = tp[4]; 631 free_space = FREESPACE(tp); 632 off = OFFSET(tp); 633 tp[0] -= 2; 634 FREESPACE(tp) = free_space + OVFLSIZE; 635 OFFSET(tp) = off; 636 tmpp = __add_ovflpage(hashp, big_keyp); 637 if (!tmpp) 638 return (-1); 639 tp[4] = n; 640 } else 641 tmpp = big_keyp; 642 643 if (change) 644 ret->newp = tmpp; 645 else 646 ret->oldp = tmpp; 647 return (0); 648 } 649