1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Margo Seltzer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if defined(LIBC_SCCS) && !defined(lint) 36 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 37 #endif /* LIBC_SCCS and not lint */ 38 /* 39 * PACKAGE: hash 40 * DESCRIPTION: 41 * Big key/data handling for the hashing package. 42 * 43 * ROUTINES: 44 * External 45 * __big_keydata 46 * __big_split 47 * __big_insert 48 * __big_return 49 * __big_delete 50 * __find_last_page 51 * Internal 52 * collect_key 53 * collect_data 54 */ 55 56 #include <sys/param.h> 57 58 #include <errno.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <string.h> 62 63 #ifdef DEBUG 64 #include <assert.h> 65 #endif 66 67 #include <db.h> 68 #include "hash.h" 69 #include "page.h" 70 #include "extern.h" 71 72 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 73 static int collect_data(HTAB *, BUFHEAD *, int, int); 74 75 /* 76 * Big_insert 77 * 78 * You need to do an insert and the key/data pair is too big 79 * 80 * Returns: 81 * 0 ==> OK 82 *-1 ==> ERROR 83 */ 84 int 85 __big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 86 { 87 u_int16_t *p; 88 int key_size, n; 89 unsigned int val_size; 90 u_int16_t space, move_bytes, off; 91 char *cp, *key_data, *val_data; 92 93 cp = bufp->page; /* Character pointer of p. */ 94 p = (u_int16_t *)cp; 95 96 key_data = (char *)key->data; 97 key_size = key->size; 98 val_data = (char *)val->data; 99 val_size = val->size; 100 101 /* First move the Key */ 102 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 103 space = FREESPACE(p) - BIGOVERHEAD) { 104 move_bytes = MIN(space, key_size); 105 off = OFFSET(p) - move_bytes; 106 memmove(cp + off, key_data, move_bytes); 107 key_size -= move_bytes; 108 key_data += move_bytes; 109 n = p[0]; 110 p[++n] = off; 111 p[0] = ++n; 112 FREESPACE(p) = off - PAGE_META(n); 113 OFFSET(p) = off; 114 p[n] = PARTIAL_KEY; 115 bufp = __add_ovflpage(hashp, bufp); 116 if (!bufp) 117 return (-1); 118 n = p[0]; 119 if (!key_size) { 120 space = FREESPACE(p); 121 if (space) { 122 move_bytes = MIN(space, val_size); 123 /* 124 * If the data would fit exactly in the 125 * remaining space, we must overflow it to the 126 * next page; otherwise the invariant that the 127 * data must end on a page with FREESPACE 128 * non-zero would fail. 129 */ 130 if (space == val_size && val_size == val->size) 131 goto toolarge; 132 off = OFFSET(p) - move_bytes; 133 memmove(cp + off, val_data, move_bytes); 134 val_data += move_bytes; 135 val_size -= move_bytes; 136 p[n] = off; 137 p[n - 2] = FULL_KEY_DATA; 138 FREESPACE(p) = FREESPACE(p) - move_bytes; 139 OFFSET(p) = off; 140 } else { 141 toolarge: 142 p[n - 2] = FULL_KEY; 143 } 144 } 145 p = (u_int16_t *)bufp->page; 146 cp = bufp->page; 147 bufp->flags |= BUF_MOD; 148 } 149 150 /* Now move the data */ 151 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 152 space = FREESPACE(p) - BIGOVERHEAD) { 153 move_bytes = MIN(space, val_size); 154 /* 155 * Here's the hack to make sure that if the data ends on the 156 * same page as the key ends, FREESPACE is at least one. 157 */ 158 if (space == val_size && val_size == val->size) 159 move_bytes--; 160 off = OFFSET(p) - move_bytes; 161 memmove(cp + off, val_data, move_bytes); 162 val_size -= move_bytes; 163 val_data += move_bytes; 164 n = p[0]; 165 p[++n] = off; 166 p[0] = ++n; 167 FREESPACE(p) = off - PAGE_META(n); 168 OFFSET(p) = off; 169 if (val_size) { 170 p[n] = FULL_KEY; 171 bufp = __add_ovflpage(hashp, bufp); 172 if (!bufp) 173 return (-1); 174 cp = bufp->page; 175 p = (u_int16_t *)cp; 176 } else 177 p[n] = FULL_KEY_DATA; 178 bufp->flags |= BUF_MOD; 179 } 180 return (0); 181 } 182 183 /* 184 * Called when bufp's page contains a partial key (index should be 1) 185 * 186 * All pages in the big key/data pair except bufp are freed. We cannot 187 * free bufp because the page pointing to it is lost and we can't get rid 188 * of its pointer. 189 * 190 * Returns: 191 * 0 => OK 192 *-1 => ERROR 193 */ 194 int 195 __big_delete(HTAB *hashp, BUFHEAD *bufp) 196 { 197 BUFHEAD *last_bfp, *rbufp; 198 u_int16_t *bp, pageno; 199 int key_done, n; 200 201 rbufp = bufp; 202 last_bfp = NULL; 203 bp = (u_int16_t *)bufp->page; 204 pageno = 0; 205 key_done = 0; 206 207 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 208 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 209 key_done = 1; 210 211 /* 212 * If there is freespace left on a FULL_KEY_DATA page, then 213 * the data is short and fits entirely on this page, and this 214 * is the last page. 215 */ 216 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 217 break; 218 pageno = bp[bp[0] - 1]; 219 rbufp->flags |= BUF_MOD; 220 rbufp = __get_buf(hashp, pageno, rbufp, 0); 221 if (last_bfp) 222 __free_ovflpage(hashp, last_bfp); 223 last_bfp = rbufp; 224 if (!rbufp) 225 return (-1); /* Error. */ 226 bp = (u_int16_t *)rbufp->page; 227 } 228 229 /* 230 * If we get here then rbufp points to the last page of the big 231 * key/data pair. Bufp points to the first one -- it should now be 232 * empty pointing to the next page after this pair. Can't free it 233 * because we don't have the page pointing to it. 234 */ 235 236 /* This is information from the last page of the pair. */ 237 n = bp[0]; 238 pageno = bp[n - 1]; 239 240 /* Now, bp is the first page of the pair. */ 241 bp = (u_int16_t *)bufp->page; 242 if (n > 2) { 243 /* There is an overflow page. */ 244 bp[1] = pageno; 245 bp[2] = OVFLPAGE; 246 bufp->ovfl = rbufp->ovfl; 247 } else 248 /* This is the last page. */ 249 bufp->ovfl = NULL; 250 n -= 2; 251 bp[0] = n; 252 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 253 OFFSET(bp) = hashp->BSIZE; 254 255 bufp->flags |= BUF_MOD; 256 if (rbufp) 257 __free_ovflpage(hashp, rbufp); 258 if (last_bfp && last_bfp != rbufp) 259 __free_ovflpage(hashp, last_bfp); 260 261 hashp->NKEYS--; 262 return (0); 263 } 264 /* 265 * Returns: 266 * 0 = key not found 267 * -1 = get next overflow page 268 * -2 means key not found and this is big key/data 269 * -3 error 270 */ 271 int 272 __find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 273 { 274 u_int16_t *bp; 275 char *p; 276 int ksize; 277 u_int16_t bytes; 278 char *kkey; 279 280 bp = (u_int16_t *)bufp->page; 281 p = bufp->page; 282 ksize = size; 283 kkey = key; 284 285 for (bytes = hashp->BSIZE - bp[ndx]; 286 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 287 bytes = hashp->BSIZE - bp[ndx]) { 288 if (memcmp(p + bp[ndx], kkey, bytes)) 289 return (-2); 290 kkey += bytes; 291 ksize -= bytes; 292 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); 293 if (!bufp) 294 return (-3); 295 p = bufp->page; 296 bp = (u_int16_t *)p; 297 ndx = 1; 298 } 299 300 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 301 #ifdef HASH_STATISTICS 302 ++hash_collisions; 303 #endif 304 return (-2); 305 } else 306 return (ndx); 307 } 308 309 /* 310 * Given the buffer pointer of the first overflow page of a big pair, 311 * find the end of the big pair 312 * 313 * This will set bpp to the buffer header of the last page of the big pair. 314 * It will return the pageno of the overflow page following the last page 315 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 316 * bucket) 317 */ 318 u_int16_t 319 __find_last_page(HTAB *hashp, BUFHEAD **bpp) 320 { 321 BUFHEAD *bufp; 322 u_int16_t *bp, pageno; 323 int n; 324 325 bufp = *bpp; 326 bp = (u_int16_t *)bufp->page; 327 for (;;) { 328 n = bp[0]; 329 330 /* 331 * This is the last page if: the tag is FULL_KEY_DATA and 332 * either only 2 entries OVFLPAGE marker is explicit there 333 * is freespace on the page. 334 */ 335 if (bp[2] == FULL_KEY_DATA && 336 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 337 break; 338 339 pageno = bp[n - 1]; 340 bufp = __get_buf(hashp, pageno, bufp, 0); 341 if (!bufp) 342 return (0); /* Need to indicate an error! */ 343 bp = (u_int16_t *)bufp->page; 344 } 345 346 *bpp = bufp; 347 if (bp[0] > 2) 348 return (bp[3]); 349 else 350 return (0); 351 } 352 353 /* 354 * Return the data for the key/data pair that begins on this page at this 355 * index (index should always be 1). 356 */ 357 int 358 __big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current) 359 { 360 BUFHEAD *save_p; 361 u_int16_t *bp, len, off, save_addr; 362 char *tp; 363 364 bp = (u_int16_t *)bufp->page; 365 while (bp[ndx + 1] == PARTIAL_KEY) { 366 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 367 if (!bufp) 368 return (-1); 369 bp = (u_int16_t *)bufp->page; 370 ndx = 1; 371 } 372 373 if (bp[ndx + 1] == FULL_KEY) { 374 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 375 if (!bufp) 376 return (-1); 377 bp = (u_int16_t *)bufp->page; 378 save_p = bufp; 379 save_addr = save_p->addr; 380 off = bp[1]; 381 len = 0; 382 } else 383 if (!FREESPACE(bp)) { 384 /* 385 * This is a hack. We can't distinguish between 386 * FULL_KEY_DATA that contains complete data or 387 * incomplete data, so we require that if the data 388 * is complete, there is at least 1 byte of free 389 * space left. 390 */ 391 off = bp[bp[0]]; 392 len = bp[1] - off; 393 save_p = bufp; 394 save_addr = bufp->addr; 395 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 396 if (!bufp) 397 return (-1); 398 bp = (u_int16_t *)bufp->page; 399 } else { 400 /* The data is all on one page. */ 401 tp = (char *)bp; 402 off = bp[bp[0]]; 403 val->data = (u_char *)tp + off; 404 val->size = bp[1] - off; 405 if (set_current) { 406 if (bp[0] == 2) { /* No more buckets in 407 * chain */ 408 hashp->cpage = NULL; 409 hashp->cbucket++; 410 hashp->cndx = 1; 411 } else { 412 hashp->cpage = __get_buf(hashp, 413 bp[bp[0] - 1], bufp, 0); 414 if (!hashp->cpage) 415 return (-1); 416 hashp->cndx = 1; 417 if (!((u_int16_t *) 418 hashp->cpage->page)[0]) { 419 hashp->cbucket++; 420 hashp->cpage = NULL; 421 } 422 } 423 } 424 return (0); 425 } 426 427 val->size = (size_t)collect_data(hashp, bufp, (int)len, set_current); 428 if (val->size == (size_t)-1) 429 return (-1); 430 if (save_p->addr != save_addr) { 431 /* We are pretty short on buffers. */ 432 errno = EINVAL; /* OUT OF BUFFERS */ 433 return (-1); 434 } 435 memmove(hashp->tmp_buf, (save_p->page) + off, len); 436 val->data = (u_char *)hashp->tmp_buf; 437 return (0); 438 } 439 /* 440 * Count how big the total datasize is by recursing through the pages. Then 441 * allocate a buffer and copy the data as you recurse up. 442 */ 443 static int 444 collect_data(HTAB *hashp, BUFHEAD *bufp, int len, int set) 445 { 446 u_int16_t *bp; 447 char *p; 448 BUFHEAD *xbp; 449 u_int16_t save_addr; 450 int mylen, totlen; 451 452 p = bufp->page; 453 bp = (u_int16_t *)p; 454 mylen = hashp->BSIZE - bp[1]; 455 save_addr = bufp->addr; 456 457 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 458 totlen = len + mylen; 459 if (hashp->tmp_buf) 460 free(hashp->tmp_buf); 461 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) 462 return (-1); 463 if (set) { 464 hashp->cndx = 1; 465 if (bp[0] == 2) { /* No more buckets in chain */ 466 hashp->cpage = NULL; 467 hashp->cbucket++; 468 } else { 469 hashp->cpage = 470 __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 471 if (!hashp->cpage) 472 return (-1); 473 else if (!((u_int16_t *)hashp->cpage->page)[0]) { 474 hashp->cbucket++; 475 hashp->cpage = NULL; 476 } 477 } 478 } 479 } else { 480 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 481 if (!xbp || ((totlen = 482 collect_data(hashp, xbp, len + mylen, set)) < 1)) 483 return (-1); 484 } 485 if (bufp->addr != save_addr) { 486 errno = EINVAL; /* Out of buffers. */ 487 return (-1); 488 } 489 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); 490 return (totlen); 491 } 492 493 /* 494 * Fill in the key and data for this big pair. 495 */ 496 int 497 __big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set) 498 { 499 key->size = (size_t)collect_key(hashp, bufp, 0, val, set); 500 if (key->size == (size_t)-1) 501 return (-1); 502 key->data = (u_char *)hashp->tmp_key; 503 return (0); 504 } 505 506 /* 507 * Count how big the total key size is by recursing through the pages. Then 508 * collect the data, allocate a buffer and copy the key as you recurse up. 509 */ 510 static int 511 collect_key(HTAB *hashp, BUFHEAD *bufp, int len, DBT *val, int set) 512 { 513 BUFHEAD *xbp; 514 char *p; 515 int mylen, totlen; 516 u_int16_t *bp, save_addr; 517 518 p = bufp->page; 519 bp = (u_int16_t *)p; 520 mylen = hashp->BSIZE - bp[1]; 521 522 save_addr = bufp->addr; 523 totlen = len + mylen; 524 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 525 if (hashp->tmp_key != NULL) 526 free(hashp->tmp_key); 527 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) 528 return (-1); 529 if (__big_return(hashp, bufp, 1, val, set)) 530 return (-1); 531 } else { 532 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); 533 if (!xbp || ((totlen = 534 collect_key(hashp, xbp, totlen, val, set)) < 1)) 535 return (-1); 536 } 537 if (bufp->addr != save_addr) { 538 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 539 return (-1); 540 } 541 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); 542 return (totlen); 543 } 544 545 /* 546 * Returns: 547 * 0 => OK 548 * -1 => error 549 */ 550 int 551 __big_split(HTAB *hashp, 552 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 553 BUFHEAD *np, /* Pointer to new bucket page */ 554 BUFHEAD *big_keyp, /* Pointer to first page containing the big key/data */ 555 int addr, /* Address of big_keyp */ 556 u_int32_t obucket, /* Old Bucket */ 557 SPLIT_RETURN *ret) 558 { 559 BUFHEAD *bp, *tmpp; 560 DBT key, val; 561 u_int32_t change; 562 u_int16_t free_space, n, off, *tp; 563 564 bp = big_keyp; 565 566 /* Now figure out where the big key/data goes */ 567 if (__big_keydata(hashp, big_keyp, &key, &val, 0)) 568 return (-1); 569 change = (__call_hash(hashp, key.data, key.size) != obucket); 570 571 if ( (ret->next_addr = __find_last_page(hashp, &big_keyp)) ) { 572 if (!(ret->nextp = 573 __get_buf(hashp, ret->next_addr, big_keyp, 0))) 574 return (-1); 575 } else 576 ret->nextp = NULL; 577 578 /* Now make one of np/op point to the big key/data pair */ 579 #ifdef DEBUG 580 assert(np->ovfl == NULL); 581 #endif 582 if (change) 583 tmpp = np; 584 else 585 tmpp = op; 586 587 tmpp->flags |= BUF_MOD; 588 #ifdef DEBUG1 589 (void)fprintf(stderr, 590 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 591 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 592 #endif 593 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 594 tp = (u_int16_t *)tmpp->page; 595 #ifdef DEBUG 596 assert(FREESPACE(tp) >= OVFLSIZE); 597 #endif 598 n = tp[0]; 599 off = OFFSET(tp); 600 free_space = FREESPACE(tp); 601 tp[++n] = (u_int16_t)addr; 602 tp[++n] = OVFLPAGE; 603 tp[0] = n; 604 OFFSET(tp) = off; 605 FREESPACE(tp) = free_space - OVFLSIZE; 606 607 /* 608 * Finally, set the new and old return values. BIG_KEYP contains a 609 * pointer to the last page of the big key_data pair. Make sure that 610 * big_keyp has no following page (2 elements) or create an empty 611 * following page. 612 */ 613 614 ret->newp = np; 615 ret->oldp = op; 616 617 tp = (u_int16_t *)big_keyp->page; 618 big_keyp->flags |= BUF_MOD; 619 if (tp[0] > 2) { 620 /* 621 * There may be either one or two offsets on this page. If 622 * there is one, then the overflow page is linked on normally 623 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 624 * the second offset and needs to get stuffed in after the 625 * next overflow page is added. 626 */ 627 n = tp[4]; 628 free_space = FREESPACE(tp); 629 off = OFFSET(tp); 630 tp[0] -= 2; 631 FREESPACE(tp) = free_space + OVFLSIZE; 632 OFFSET(tp) = off; 633 tmpp = __add_ovflpage(hashp, big_keyp); 634 if (!tmpp) 635 return (-1); 636 tp[4] = n; 637 } else 638 tmpp = big_keyp; 639 640 if (change) 641 ret->newp = tmpp; 642 else 643 ret->oldp = tmpp; 644 return (0); 645 } 646