1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 /* 8 * Copyright (c) 1990, 1993, 1994 9 * Margo Seltzer. All rights reserved. 10 */ 11 /* 12 * Copyright (c) 1990, 1993, 1994 13 * The Regents of the University of California. All rights reserved. 14 * 15 * This code is derived from software contributed to Berkeley by 16 * Margo Seltzer. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. All advertising materials mentioning features or use of this software 27 * must display the following acknowledgement: 28 * This product includes software developed by the University of 29 * California, Berkeley and its contributors. 30 * 4. Neither the name of the University nor the names of its contributors 31 * may be used to endorse or promote products derived from this software 32 * without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44 * SUCH DAMAGE. 45 */ 46 47 #include "config.h" 48 49 #ifndef lint 50 static const char sccsid[] = "@(#)hash_page.c 10.55 (Sleepycat) 1/3/99"; 51 #endif /* not lint */ 52 53 /* 54 * PACKAGE: hashing 55 * 56 * DESCRIPTION: 57 * Page manipulation for hashing package. 58 * 59 * ROUTINES: 60 * 61 * External 62 * __get_page 63 * __add_ovflpage 64 * __overflow_page 65 * Internal 66 * open_temp 67 */ 68 69 #ifndef NO_SYSTEM_INCLUDES 70 #include <sys/types.h> 71 72 #include <errno.h> 73 #include <string.h> 74 #endif 75 76 #include "db_int.h" 77 #include "db_page.h" 78 #include "hash.h" 79 80 static int __ham_lock_bucket __P((DBC *, db_lockmode_t)); 81 82 #ifdef DEBUG_SLOW 83 static void __account_page(DB *, db_pgno_t, int); 84 #endif 85 86 /* 87 * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t)); 88 */ 89 int 90 __ham_item(dbc, mode) 91 DBC *dbc; 92 db_lockmode_t mode; 93 { 94 DB *dbp; 95 HASH_CURSOR *hcp; 96 db_pgno_t next_pgno; 97 int ret; 98 99 dbp = dbc->dbp; 100 hcp = (HASH_CURSOR *)dbc->internal; 101 102 if (F_ISSET(hcp, H_DELETED)) 103 return (EINVAL); 104 F_CLR(hcp, H_OK | H_NOMORE); 105 106 /* Check if we need to get a page for this cursor. */ 107 if ((ret = __ham_get_cpage(dbc, mode)) != 0) 108 return (ret); 109 110 /* Check if we are looking for space in which to insert an item. */ 111 if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID 112 && hcp->seek_size < P_FREESPACE(hcp->pagep)) 113 hcp->seek_found_page = hcp->pgno; 114 115 /* Check if we need to go on to the next page. */ 116 if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno == PGNO_INVALID) 117 /* 118 * ISDUP is set, and offset is at the beginning of the datum. 119 * We need to grab the length of the datum, then set the datum 120 * pointer to be the beginning of the datum. 121 */ 122 memcpy(&hcp->dup_len, 123 HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)) + 124 hcp->dup_off, sizeof(db_indx_t)); 125 else if (F_ISSET(hcp, H_ISDUP)) { 126 /* Make sure we're not about to run off the page. */ 127 if (hcp->dpagep == NULL && (ret = __ham_get_page(dbp, 128 hcp->dpgno, &hcp->dpagep)) != 0) 129 return (ret); 130 131 if (hcp->dndx >= NUM_ENT(hcp->dpagep)) { 132 if (NEXT_PGNO(hcp->dpagep) == PGNO_INVALID) { 133 if (F_ISSET(hcp, H_DUPONLY)) { 134 F_CLR(hcp, H_OK); 135 F_SET(hcp, H_NOMORE); 136 return (0); 137 } 138 if ((ret = __ham_put_page(dbp, 139 hcp->dpagep, 0)) != 0) 140 return (ret); 141 F_CLR(hcp, H_ISDUP); 142 hcp->dpagep = NULL; 143 hcp->dpgno = PGNO_INVALID; 144 hcp->dndx = NDX_INVALID; 145 hcp->bndx++; 146 } else if ((ret = __ham_next_cpage(dbc, 147 NEXT_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0) 148 return (ret); 149 } 150 } 151 152 if (hcp->bndx >= (db_indx_t)H_NUMPAIRS(hcp->pagep)) { 153 /* Fetch next page. */ 154 if (NEXT_PGNO(hcp->pagep) == PGNO_INVALID) { 155 F_SET(hcp, H_NOMORE); 156 if (hcp->dpagep != NULL && 157 (ret = __ham_put_page(dbp, hcp->dpagep, 0)) != 0) 158 return (ret); 159 hcp->dpgno = PGNO_INVALID; 160 return (DB_NOTFOUND); 161 } 162 next_pgno = NEXT_PGNO(hcp->pagep); 163 hcp->bndx = 0; 164 if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0) 165 return (ret); 166 } 167 168 F_SET(hcp, H_OK); 169 return (0); 170 } 171 172 /* 173 * PUBLIC: int __ham_item_reset __P((DBC *)); 174 */ 175 int 176 __ham_item_reset(dbc) 177 DBC *dbc; 178 { 179 HASH_CURSOR *hcp; 180 DB *dbp; 181 int ret; 182 183 ret = 0; 184 dbp = dbc->dbp; 185 hcp = (HASH_CURSOR *)dbc->internal; 186 if (hcp->pagep != NULL) 187 ret = __ham_put_page(dbp, hcp->pagep, 0); 188 if (ret == 0 && hcp->dpagep != NULL) 189 ret = __ham_put_page(dbp, hcp->dpagep, 0); 190 191 __ham_item_init(hcp); 192 return (ret); 193 } 194 195 /* 196 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *)); 197 */ 198 void 199 __ham_item_init(hcp) 200 HASH_CURSOR *hcp; 201 { 202 /* 203 * If this cursor still holds any locks, we must 204 * release them if we are not running with transactions. 205 */ 206 if (hcp->lock && hcp->dbc->txn == NULL) 207 (void)lock_put(hcp->dbc->dbp->dbenv->lk_info, hcp->lock); 208 209 /* 210 * The following fields must *not* be initialized here 211 * because they may have meaning across inits. 212 * hlock, hdr, split_buf, stats 213 */ 214 hcp->bucket = BUCKET_INVALID; 215 hcp->lbucket = BUCKET_INVALID; 216 hcp->lock = 0; 217 hcp->pagep = NULL; 218 hcp->pgno = PGNO_INVALID; 219 hcp->bndx = NDX_INVALID; 220 hcp->dpagep = NULL; 221 hcp->dpgno = PGNO_INVALID; 222 hcp->dndx = NDX_INVALID; 223 hcp->dup_off = 0; 224 hcp->dup_len = 0; 225 hcp->dup_tlen = 0; 226 hcp->seek_size = 0; 227 hcp->seek_found_page = PGNO_INVALID; 228 hcp->flags = 0; 229 } 230 231 /* 232 * PUBLIC: int __ham_item_done __P((DBC *, int)); 233 */ 234 int 235 __ham_item_done(dbc, dirty) 236 DBC *dbc; 237 int dirty; 238 { 239 DB *dbp; 240 HASH_CURSOR *hcp; 241 int ret, t_ret; 242 243 dbp = dbc->dbp; 244 hcp = (HASH_CURSOR *)dbc->internal; 245 t_ret = ret = 0; 246 247 if (hcp->pagep) 248 ret = __ham_put_page(dbp, hcp->pagep, 249 dirty && hcp->dpagep == NULL); 250 hcp->pagep = NULL; 251 252 if (hcp->dpagep) 253 t_ret = __ham_put_page(dbp, hcp->dpagep, dirty); 254 hcp->dpagep = NULL; 255 256 if (ret == 0 && t_ret != 0) 257 ret = t_ret; 258 259 /* 260 * We don't throw out the page number since we might want to 261 * continue getting on this page. 262 */ 263 return (ret != 0 ? ret : t_ret); 264 } 265 266 /* 267 * Returns the last item in a bucket. 268 * 269 * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t)); 270 */ 271 int 272 __ham_item_last(dbc, mode) 273 DBC *dbc; 274 db_lockmode_t mode; 275 { 276 HASH_CURSOR *hcp; 277 int ret; 278 279 hcp = (HASH_CURSOR *)dbc->internal; 280 if ((ret = __ham_item_reset(dbc)) != 0) 281 return (ret); 282 283 hcp->bucket = hcp->hdr->max_bucket; 284 F_SET(hcp, H_OK); 285 return (__ham_item_prev(dbc, mode)); 286 } 287 288 /* 289 * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t)); 290 */ 291 int 292 __ham_item_first(dbc, mode) 293 DBC *dbc; 294 db_lockmode_t mode; 295 { 296 HASH_CURSOR *hcp; 297 int ret; 298 299 hcp = (HASH_CURSOR *)dbc->internal; 300 if ((ret = __ham_item_reset(dbc)) != 0) 301 return (ret); 302 F_SET(hcp, H_OK); 303 hcp->bucket = 0; 304 return (__ham_item_next(dbc, mode)); 305 } 306 307 /* 308 * __ham_item_prev -- 309 * Returns a pointer to key/data pair on a page. In the case of 310 * bigkeys, just returns the page number and index of the bigkey 311 * pointer pair. 312 * 313 * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t)); 314 */ 315 int 316 __ham_item_prev(dbc, mode) 317 DBC *dbc; 318 db_lockmode_t mode; 319 { 320 DB *dbp; 321 HASH_CURSOR *hcp; 322 db_pgno_t next_pgno; 323 int ret; 324 325 dbp = dbc->dbp; 326 hcp = (HASH_CURSOR *)dbc->internal; 327 /* 328 * There are N cases for backing up in a hash file. 329 * Case 1: In the middle of a page, no duplicates, just dec the index. 330 * Case 2: In the middle of a duplicate set, back up one. 331 * Case 3: At the beginning of a duplicate set, get out of set and 332 * back up to next key. 333 * Case 4: At the beginning of a page; go to previous page. 334 * Case 5: At the beginning of a bucket; go to prev bucket. 335 */ 336 F_CLR(hcp, H_OK | H_NOMORE | H_DELETED); 337 338 /* 339 * First handle the duplicates. Either you'll get the key here 340 * or you'll exit the duplicate set and drop into the code below 341 * to handle backing up through keys. 342 */ 343 if (F_ISSET(hcp, H_ISDUP)) { 344 if (hcp->dpgno == PGNO_INVALID) { 345 /* Duplicates are on-page. */ 346 if (hcp->dup_off != 0) 347 if ((ret = __ham_get_cpage(dbc, mode)) != 0) 348 return (ret); 349 else { 350 HASH_CURSOR *h; 351 h = hcp; 352 memcpy(&h->dup_len, HKEYDATA_DATA( 353 H_PAIRDATA(h->pagep, h->bndx)) 354 + h->dup_off - sizeof(db_indx_t), 355 sizeof(db_indx_t)); 356 hcp->dup_off -= 357 DUP_SIZE(hcp->dup_len); 358 hcp->dndx--; 359 return (__ham_item(dbc, mode)); 360 } 361 } else if (hcp->dndx > 0) { /* Duplicates are off-page. */ 362 hcp->dndx--; 363 return (__ham_item(dbc, mode)); 364 } else if ((ret = __ham_get_cpage(dbc, mode)) != 0) 365 return (ret); 366 else if (PREV_PGNO(hcp->dpagep) == PGNO_INVALID) { 367 if (F_ISSET(hcp, H_DUPONLY)) { 368 F_CLR(hcp, H_OK); 369 F_SET(hcp, H_NOMORE); 370 return (0); 371 } else { 372 F_CLR(hcp, H_ISDUP); /* End of dups */ 373 hcp->dpgno = PGNO_INVALID; 374 if (hcp->dpagep != NULL) 375 (void)__ham_put_page(dbp, 376 hcp->dpagep, 0); 377 hcp->dpagep = NULL; 378 } 379 } else if ((ret = __ham_next_cpage(dbc, 380 PREV_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0) 381 return (ret); 382 else { 383 hcp->dndx = NUM_ENT(hcp->pagep) - 1; 384 return (__ham_item(dbc, mode)); 385 } 386 } 387 388 /* 389 * If we get here, we are not in a duplicate set, and just need 390 * to back up the cursor. There are still three cases: 391 * midpage, beginning of page, beginning of bucket. 392 */ 393 394 if (F_ISSET(hcp, H_DUPONLY)) { 395 F_CLR(hcp, H_OK); 396 F_SET(hcp, H_NOMORE); 397 return (0); 398 } 399 400 if (hcp->bndx == 0) { /* Beginning of page. */ 401 if ((ret = __ham_get_cpage(dbc, mode)) != 0) 402 return (ret); 403 hcp->pgno = PREV_PGNO(hcp->pagep); 404 if (hcp->pgno == PGNO_INVALID) { 405 /* Beginning of bucket. */ 406 F_SET(hcp, H_NOMORE); 407 return (DB_NOTFOUND); 408 } else if ((ret = 409 __ham_next_cpage(dbc, hcp->pgno, 0, 0)) != 0) 410 return (ret); 411 else 412 hcp->bndx = H_NUMPAIRS(hcp->pagep); 413 } 414 415 /* 416 * Either we've got the cursor set up to be decremented, or we 417 * have to find the end of a bucket. 418 */ 419 if (hcp->bndx == NDX_INVALID) { 420 if (hcp->pagep == NULL) 421 next_pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); 422 else 423 goto got_page; 424 425 do { 426 if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0) 427 return (ret); 428 got_page: next_pgno = NEXT_PGNO(hcp->pagep); 429 hcp->bndx = H_NUMPAIRS(hcp->pagep); 430 } while (next_pgno != PGNO_INVALID); 431 432 if (hcp->bndx == 0) { 433 /* Bucket was empty. */ 434 F_SET(hcp, H_NOMORE); 435 return (DB_NOTFOUND); 436 } 437 } 438 439 hcp->bndx--; 440 441 return (__ham_item(dbc, mode)); 442 } 443 444 /* 445 * Sets the cursor to the next key/data pair on a page. 446 * 447 * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t)); 448 */ 449 int 450 __ham_item_next(dbc, mode) 451 DBC *dbc; 452 db_lockmode_t mode; 453 { 454 HASH_CURSOR *hcp; 455 456 hcp = (HASH_CURSOR *)dbc->internal; 457 /* 458 * Deleted on-page duplicates are a weird case. If we delete the last 459 * one, then our cursor is at the very end of a duplicate set and 460 * we actually need to go on to the next key. 461 */ 462 if (F_ISSET(hcp, H_DELETED)) { 463 if (hcp->bndx != NDX_INVALID && 464 F_ISSET(hcp, H_ISDUP) && 465 hcp->dpgno == PGNO_INVALID && 466 hcp->dup_tlen == hcp->dup_off) { 467 if (F_ISSET(hcp, H_DUPONLY)) { 468 F_CLR(hcp, H_OK); 469 F_SET(hcp, H_NOMORE); 470 return (0); 471 } else { 472 F_CLR(hcp, H_ISDUP); 473 hcp->dpgno = PGNO_INVALID; 474 hcp->bndx++; 475 } 476 } else if (!F_ISSET(hcp, H_ISDUP) && 477 F_ISSET(hcp, H_DUPONLY)) { 478 F_CLR(hcp, H_OK); 479 F_SET(hcp, H_NOMORE); 480 return (0); 481 } 482 F_CLR(hcp, H_DELETED); 483 } else if (hcp->bndx == NDX_INVALID) { 484 hcp->bndx = 0; 485 hcp->dpgno = PGNO_INVALID; 486 F_CLR(hcp, H_ISDUP); 487 } else if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) 488 hcp->dndx++; 489 else if (F_ISSET(hcp, H_ISDUP)) { 490 if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >= 491 hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) { 492 F_CLR(hcp, H_OK); 493 F_SET(hcp, H_NOMORE); 494 return (0); 495 } 496 hcp->dndx++; 497 hcp->dup_off += DUP_SIZE(hcp->dup_len); 498 if (hcp->dup_off >= hcp->dup_tlen) { 499 F_CLR(hcp, H_ISDUP); 500 hcp->dpgno = PGNO_INVALID; 501 hcp->bndx++; 502 } 503 } else if (F_ISSET(hcp, H_DUPONLY)) { 504 F_CLR(hcp, H_OK); 505 F_SET(hcp, H_NOMORE); 506 return (0); 507 } else 508 hcp->bndx++; 509 510 return (__ham_item(dbc, mode)); 511 } 512 513 /* 514 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int)); 515 * 516 * This is a little bit sleazy in that we're overloading the meaning 517 * of the H_OFFPAGE type here. When we recover deletes, we have the 518 * entire entry instead of having only the DBT, so we'll pass type 519 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing 520 * an H_KEYDATA around it. 521 */ 522 void 523 __ham_putitem(p, dbt, type) 524 PAGE *p; 525 const DBT *dbt; 526 int type; 527 { 528 u_int16_t n, off; 529 530 n = NUM_ENT(p); 531 532 /* Put the item element on the page. */ 533 if (type == H_OFFPAGE) { 534 off = HOFFSET(p) - dbt->size; 535 HOFFSET(p) = p->inp[n] = off; 536 memcpy(P_ENTRY(p, n), dbt->data, dbt->size); 537 } else { 538 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size); 539 HOFFSET(p) = p->inp[n] = off; 540 PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type); 541 } 542 543 /* Adjust page info. */ 544 NUM_ENT(p) += 1; 545 } 546 547 /* 548 * PUBLIC: void __ham_reputpair 549 * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *)); 550 * 551 * This is a special case to restore a key/data pair to its original 552 * location during recovery. We are guaranteed that the pair fits 553 * on the page and is not the last pair on the page (because if it's 554 * the last pair, the normal insert works). 555 */ 556 void 557 __ham_reputpair(p, psize, ndx, key, data) 558 PAGE *p; 559 u_int32_t psize, ndx; 560 const DBT *key, *data; 561 { 562 db_indx_t i, movebytes, newbytes; 563 u_int8_t *from; 564 565 /* First shuffle the existing items up on the page. */ 566 movebytes = 567 (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - HOFFSET(p); 568 newbytes = key->size + data->size; 569 from = (u_int8_t *)p + HOFFSET(p); 570 memmove(from - newbytes, from, movebytes); 571 572 /* 573 * Adjust the indices and move them up 2 spaces. Note that we 574 * have to check the exit condition inside the loop just in case 575 * we are dealing with index 0 (db_indx_t's are unsigned). 576 */ 577 for (i = NUM_ENT(p) - 1; ; i-- ) { 578 p->inp[i + 2] = p->inp[i] - newbytes; 579 if (i == H_KEYINDEX(ndx)) 580 break; 581 } 582 583 /* Put the key and data on the page. */ 584 p->inp[H_KEYINDEX(ndx)] = 585 (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - key->size; 586 p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size; 587 memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size); 588 memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size); 589 590 /* Adjust page info. */ 591 HOFFSET(p) -= newbytes; 592 NUM_ENT(p) += 2; 593 } 594 595 596 /* 597 * PUBLIC: int __ham_del_pair __P((DBC *, int)); 598 */ 599 int 600 __ham_del_pair(dbc, reclaim_page) 601 DBC *dbc; 602 int reclaim_page; 603 { 604 DB *dbp; 605 HASH_CURSOR *hcp; 606 DBT data_dbt, key_dbt; 607 DB_ENV *dbenv; 608 DB_LSN new_lsn, *n_lsn, tmp_lsn; 609 PAGE *p; 610 db_indx_t ndx; 611 db_pgno_t chg_pgno, pgno; 612 int ret, tret; 613 614 dbp = dbc->dbp; 615 hcp = (HASH_CURSOR *)dbc->internal; 616 617 dbenv = dbp->dbenv; 618 ndx = hcp->bndx; 619 if (hcp->pagep == NULL && 620 (ret = __ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0) 621 return (ret); 622 623 p = hcp->pagep; 624 625 /* 626 * We optimize for the normal case which is when neither the key nor 627 * the data are large. In this case, we write a single log record 628 * and do the delete. If either is large, we'll call __big_delete 629 * to remove the big item and then update the page to remove the 630 * entry referring to the big item. 631 */ 632 ret = 0; 633 if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) { 634 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))), 635 sizeof(db_pgno_t)); 636 ret = __db_doff(dbc, pgno, __ham_del_page); 637 } 638 639 if (ret == 0) 640 switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) { 641 case H_OFFPAGE: 642 memcpy(&pgno, 643 HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))), 644 sizeof(db_pgno_t)); 645 ret = __db_doff(dbc, pgno, __ham_del_page); 646 break; 647 case H_OFFDUP: 648 memcpy(&pgno, 649 HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))), 650 sizeof(db_pgno_t)); 651 ret = __db_ddup(dbc, pgno, __ham_del_page); 652 F_CLR(hcp, H_ISDUP); 653 break; 654 case H_DUPLICATE: 655 /* 656 * If we delete a pair that is/was a duplicate, then 657 * we had better clear the flag so that we update the 658 * cursor appropriately. 659 */ 660 F_CLR(hcp, H_ISDUP); 661 break; 662 } 663 664 if (ret) 665 return (ret); 666 667 /* Now log the delete off this page. */ 668 if (DB_LOGGING(dbc)) { 669 key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx)); 670 key_dbt.size = 671 LEN_HITEM(p, hcp->hdr->pagesize, H_KEYINDEX(ndx)); 672 data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx)); 673 data_dbt.size = 674 LEN_HITEM(p, hcp->hdr->pagesize, H_DATAINDEX(ndx)); 675 676 if ((ret = __ham_insdel_log(dbenv->lg_info, 677 dbc->txn, &new_lsn, 0, DELPAIR, 678 dbp->log_fileid, PGNO(p), (u_int32_t)ndx, 679 &LSN(p), &key_dbt, &data_dbt)) != 0) 680 return (ret); 681 682 /* Move lsn onto page. */ 683 LSN(p) = new_lsn; 684 } 685 686 __ham_dpair(dbp, p, ndx); 687 688 /* 689 * If we are locking, we will not maintain this, because it is 690 * a hot spot. 691 * XXX perhaps we can retain incremental numbers and apply them 692 * later. 693 */ 694 if (!F_ISSET(dbp, DB_AM_LOCKING)) 695 --hcp->hdr->nelem; 696 697 /* 698 * If we need to reclaim the page, then check if the page is empty. 699 * There are two cases. If it's empty and it's not the first page 700 * in the bucket (i.e., the bucket page) then we can simply remove 701 * it. If it is the first chain in the bucket, then we need to copy 702 * the second page into it and remove the second page. 703 */ 704 if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID && 705 NEXT_PGNO(p) != PGNO_INVALID) { 706 PAGE *n_pagep, *nn_pagep; 707 db_pgno_t tmp_pgno; 708 709 /* 710 * First page in chain is empty and we know that there 711 * are more pages in the chain. 712 */ 713 if ((ret = 714 __ham_get_page(dbp, NEXT_PGNO(p), &n_pagep)) != 0) 715 return (ret); 716 717 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) { 718 if ((ret = 719 __ham_get_page(dbp, NEXT_PGNO(n_pagep), 720 &nn_pagep)) != 0) { 721 (void) __ham_put_page(dbp, n_pagep, 0); 722 return (ret); 723 } 724 } 725 726 if (DB_LOGGING(dbc)) { 727 key_dbt.data = n_pagep; 728 key_dbt.size = hcp->hdr->pagesize; 729 if ((ret = __ham_copypage_log(dbenv->lg_info, 730 dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p), 731 &LSN(p), PGNO(n_pagep), &LSN(n_pagep), 732 NEXT_PGNO(n_pagep), 733 NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL : 734 &LSN(nn_pagep), &key_dbt)) != 0) 735 return (ret); 736 737 /* Move lsn onto page. */ 738 LSN(p) = new_lsn; /* Structure assignment. */ 739 LSN(n_pagep) = new_lsn; 740 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) 741 LSN(nn_pagep) = new_lsn; 742 } 743 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) { 744 PREV_PGNO(nn_pagep) = PGNO(p); 745 (void)__ham_put_page(dbp, nn_pagep, 1); 746 } 747 748 tmp_pgno = PGNO(p); 749 tmp_lsn = LSN(p); 750 memcpy(p, n_pagep, hcp->hdr->pagesize); 751 PGNO(p) = tmp_pgno; 752 LSN(p) = tmp_lsn; 753 PREV_PGNO(p) = PGNO_INVALID; 754 755 /* 756 * Cursor is advanced to the beginning of the next page. 757 */ 758 hcp->bndx = 0; 759 hcp->pgno = PGNO(p); 760 F_SET(hcp, H_DELETED); 761 chg_pgno = PGNO(p); 762 if ((ret = __ham_dirty_page(dbp, p)) != 0 || 763 (ret = __ham_del_page(dbc, n_pagep)) != 0) 764 return (ret); 765 } else if (reclaim_page && 766 NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) { 767 PAGE *n_pagep, *p_pagep; 768 769 if ((ret = 770 __ham_get_page(dbp, PREV_PGNO(p), &p_pagep)) != 0) 771 return (ret); 772 773 if (NEXT_PGNO(p) != PGNO_INVALID) { 774 if ((ret = __ham_get_page(dbp, 775 NEXT_PGNO(p), &n_pagep)) != 0) { 776 (void)__ham_put_page(dbp, p_pagep, 0); 777 return (ret); 778 } 779 n_lsn = &LSN(n_pagep); 780 } else { 781 n_pagep = NULL; 782 n_lsn = NULL; 783 } 784 785 NEXT_PGNO(p_pagep) = NEXT_PGNO(p); 786 if (n_pagep != NULL) 787 PREV_PGNO(n_pagep) = PGNO(p_pagep); 788 789 if (DB_LOGGING(dbc)) { 790 if ((ret = __ham_newpage_log(dbenv->lg_info, 791 dbc->txn, &new_lsn, 0, DELOVFL, 792 dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep), 793 PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) 794 return (ret); 795 796 /* Move lsn onto page. */ 797 LSN(p_pagep) = new_lsn; /* Structure assignment. */ 798 if (n_pagep) 799 LSN(n_pagep) = new_lsn; 800 LSN(p) = new_lsn; 801 } 802 hcp->pgno = NEXT_PGNO(p); 803 hcp->bndx = 0; 804 /* 805 * Since we are about to delete the cursor page and we have 806 * just moved the cursor, we need to make sure that the 807 * old page pointer isn't left hanging around in the cursor. 808 */ 809 hcp->pagep = NULL; 810 chg_pgno = PGNO(p); 811 ret = __ham_del_page(dbc, p); 812 if ((tret = __ham_put_page(dbp, p_pagep, 1)) != 0 && 813 ret == 0) 814 ret = tret; 815 if (n_pagep != NULL && 816 (tret = __ham_put_page(dbp, n_pagep, 1)) != 0 && 817 ret == 0) 818 ret = tret; 819 if (ret != 0) 820 return (ret); 821 } else { 822 /* 823 * Mark item deleted so that we don't try to return it, and 824 * so that we update the cursor correctly on the next call 825 * to next. 826 */ 827 F_SET(hcp, H_DELETED); 828 chg_pgno = hcp->pgno; 829 ret = __ham_dirty_page(dbp, p); 830 } 831 __ham_c_update(hcp, chg_pgno, 0, 0, 0); 832 833 /* 834 * Since we just deleted a pair from the master page, anything 835 * in hcp->dpgno should be cleared. 836 */ 837 hcp->dpgno = PGNO_INVALID; 838 839 F_CLR(hcp, H_OK); 840 return (ret); 841 } 842 843 /* 844 * __ham_replpair -- 845 * Given the key data indicated by the cursor, replace part/all of it 846 * according to the fields in the dbt. 847 * 848 * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t)); 849 */ 850 int 851 __ham_replpair(dbc, dbt, make_dup) 852 DBC *dbc; 853 DBT *dbt; 854 u_int32_t make_dup; 855 { 856 DB *dbp; 857 HASH_CURSOR *hcp; 858 DBT old_dbt, tdata, tmp; 859 DB_LSN new_lsn; 860 int32_t change; /* XXX: Possible overflow. */ 861 u_int32_t len; 862 int is_big, ret, type; 863 u_int8_t *beg, *dest, *end, *hk, *src; 864 865 /* 866 * Big item replacements are handled in generic code. 867 * Items that fit on the current page fall into 4 classes. 868 * 1. On-page element, same size 869 * 2. On-page element, new is bigger (fits) 870 * 3. On-page element, new is bigger (does not fit) 871 * 4. On-page element, old is bigger 872 * Numbers 1, 2, and 4 are essentially the same (and should 873 * be the common case). We handle case 3 as a delete and 874 * add. 875 */ 876 dbp = dbc->dbp; 877 hcp = (HASH_CURSOR *)dbc->internal; 878 879 /* 880 * We need to compute the number of bytes that we are adding or 881 * removing from the entry. Normally, we can simply substract 882 * the number of bytes we are replacing (dbt->dlen) from the 883 * number of bytes we are inserting (dbt->size). However, if 884 * we are doing a partial put off the end of a record, then this 885 * formula doesn't work, because we are essentially adding 886 * new bytes. 887 */ 888 change = dbt->size - dbt->dlen; 889 890 hk = H_PAIRDATA(hcp->pagep, hcp->bndx); 891 is_big = HPAGE_PTYPE(hk) == H_OFFPAGE; 892 893 if (is_big) 894 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); 895 else 896 len = LEN_HKEYDATA(hcp->pagep, 897 dbp->pgsize, H_DATAINDEX(hcp->bndx)); 898 899 if (dbt->doff + dbt->dlen > len) 900 change += dbt->doff + dbt->dlen - len; 901 902 903 if (change > (int32_t)P_FREESPACE(hcp->pagep) || is_big) { 904 /* 905 * Case 3 -- two subcases. 906 * A. This is not really a partial operation, but an overwrite. 907 * Simple del and add works. 908 * B. This is a partial and we need to construct the data that 909 * we are really inserting (yuck). 910 * In both cases, we need to grab the key off the page (in 911 * some cases we could do this outside of this routine; for 912 * cleanliness we do it here. If you happen to be on a big 913 * key, this could be a performance hit). 914 */ 915 tmp.flags = 0; 916 F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL); 917 if ((ret = 918 __db_ret(dbp, hcp->pagep, H_KEYINDEX(hcp->bndx), 919 &tmp, &dbc->rkey.data, &dbc->rkey.size)) != 0) 920 return (ret); 921 922 if (dbt->doff == 0 && dbt->dlen == len) { 923 ret = __ham_del_pair(dbc, 0); 924 if (ret == 0) 925 ret = __ham_add_el(dbc, &tmp, dbt, H_KEYDATA); 926 } else { /* Case B */ 927 type = HPAGE_PTYPE(hk) != H_OFFPAGE ? 928 HPAGE_PTYPE(hk) : H_KEYDATA; 929 tdata.flags = 0; 930 F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL); 931 932 if ((ret = __db_ret(dbp, hcp->pagep, 933 H_DATAINDEX(hcp->bndx), &tdata, &dbc->rdata.data, 934 &dbc->rdata.size)) != 0) 935 goto err; 936 937 /* Now we can delete the item. */ 938 if ((ret = __ham_del_pair(dbc, 0)) != 0) { 939 __os_free(tdata.data, tdata.size); 940 goto err; 941 } 942 943 /* Now shift old data around to make room for new. */ 944 if (change > 0) { 945 if ((ret = __os_realloc(&tdata.data, 946 tdata.size + change)) != 0) 947 return (ret); 948 memset((u_int8_t *)tdata.data + tdata.size, 949 0, change); 950 } 951 end = (u_int8_t *)tdata.data + tdata.size; 952 953 src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen; 954 if (src < end && tdata.size > dbt->doff + dbt->dlen) { 955 len = tdata.size - dbt->doff - dbt->dlen; 956 dest = src + change; 957 memmove(dest, src, len); 958 } 959 memcpy((u_int8_t *)tdata.data + dbt->doff, 960 dbt->data, dbt->size); 961 tdata.size += change; 962 963 /* Now add the pair. */ 964 ret = __ham_add_el(dbc, &tmp, &tdata, type); 965 __os_free(tdata.data, tdata.size); 966 } 967 err: __os_free(tmp.data, tmp.size); 968 return (ret); 969 } 970 971 /* 972 * Set up pointer into existing data. Do it before the log 973 * message so we can use it inside of the log setup. 974 */ 975 beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)); 976 beg += dbt->doff; 977 978 /* 979 * If we are going to have to move bytes at all, figure out 980 * all the parameters here. Then log the call before moving 981 * anything around. 982 */ 983 if (DB_LOGGING(dbc)) { 984 old_dbt.data = beg; 985 old_dbt.size = dbt->dlen; 986 if ((ret = __ham_replace_log(dbp->dbenv->lg_info, 987 dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->pagep), 988 (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep), 989 (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0) 990 return (ret); 991 992 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */ 993 } 994 995 __ham_onpage_replace(hcp->pagep, dbp->pgsize, 996 (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt); 997 998 return (0); 999 } 1000 1001 /* 1002 * Replace data on a page with new data, possibly growing or shrinking what's 1003 * there. This is called on two different occasions. On one (from replpair) 1004 * we are interested in changing only the data. On the other (from recovery) 1005 * we are replacing the entire data (header and all) with a new element. In 1006 * the latter case, the off argument is negative. 1007 * pagep: the page that we're changing 1008 * ndx: page index of the element that is growing/shrinking. 1009 * off: Offset at which we are beginning the replacement. 1010 * change: the number of bytes (+ or -) that the element is growing/shrinking. 1011 * dbt: the new data that gets written at beg. 1012 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t, 1013 * PUBLIC: int32_t, DBT *)); 1014 */ 1015 void 1016 __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt) 1017 PAGE *pagep; 1018 size_t pgsize; 1019 u_int32_t ndx; 1020 int32_t off; 1021 int32_t change; 1022 DBT *dbt; 1023 { 1024 db_indx_t i; 1025 int32_t len; 1026 u_int8_t *src, *dest; 1027 int zero_me; 1028 1029 if (change != 0) { 1030 zero_me = 0; 1031 src = (u_int8_t *)(pagep) + HOFFSET(pagep); 1032 if (off < 0) 1033 len = pagep->inp[ndx] - HOFFSET(pagep); 1034 else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) { 1035 len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + 1036 LEN_HKEYDATA(pagep, pgsize, ndx) - src; 1037 zero_me = 1; 1038 } else 1039 len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src; 1040 dest = src - change; 1041 memmove(dest, src, len); 1042 if (zero_me) 1043 memset(dest + len, 0, change); 1044 1045 /* Now update the indices. */ 1046 for (i = ndx; i < NUM_ENT(pagep); i++) 1047 pagep->inp[i] -= change; 1048 HOFFSET(pagep) -= change; 1049 } 1050 if (off >= 0) 1051 memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off, 1052 dbt->data, dbt->size); 1053 else 1054 memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size); 1055 } 1056 1057 /* 1058 * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); 1059 */ 1060 int 1061 __ham_split_page(dbc, obucket, nbucket) 1062 DBC *dbc; 1063 u_int32_t obucket, nbucket; 1064 { 1065 DB *dbp; 1066 HASH_CURSOR *hcp; 1067 DBT key, page_dbt; 1068 DB_ENV *dbenv; 1069 DB_LSN new_lsn; 1070 PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; 1071 db_indx_t n; 1072 db_pgno_t bucket_pgno, next_pgno; 1073 u_int32_t big_len, len; 1074 int ret, tret; 1075 void *big_buf; 1076 1077 dbp = dbc->dbp; 1078 hcp = (HASH_CURSOR *)dbc->internal; 1079 dbenv = dbp->dbenv; 1080 temp_pagep = old_pagep = new_pagep = NULL; 1081 1082 bucket_pgno = BUCKET_TO_PAGE(hcp, obucket); 1083 if ((ret = __ham_get_page(dbp, bucket_pgno, &old_pagep)) != 0) 1084 return (ret); 1085 if ((ret = __ham_new_page(dbp, BUCKET_TO_PAGE(hcp, nbucket), P_HASH, 1086 &new_pagep)) != 0) 1087 goto err; 1088 1089 temp_pagep = hcp->split_buf; 1090 memcpy(temp_pagep, old_pagep, hcp->hdr->pagesize); 1091 1092 if (DB_LOGGING(dbc)) { 1093 page_dbt.size = hcp->hdr->pagesize; 1094 page_dbt.data = old_pagep; 1095 if ((ret = __ham_splitdata_log(dbenv->lg_info, 1096 dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD, 1097 PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) 1098 goto err; 1099 } 1100 1101 P_INIT(old_pagep, hcp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID, 1102 PGNO_INVALID, 0, P_HASH); 1103 1104 if (DB_LOGGING(dbc)) 1105 LSN(old_pagep) = new_lsn; /* Structure assignment. */ 1106 1107 big_len = 0; 1108 big_buf = NULL; 1109 key.flags = 0; 1110 while (temp_pagep != NULL) { 1111 for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) { 1112 if ((ret = 1113 __db_ret(dbp, temp_pagep, H_KEYINDEX(n), 1114 &key, &big_buf, &big_len)) != 0) 1115 goto err; 1116 1117 if (__ham_call_hash(hcp, key.data, key.size) 1118 == obucket) 1119 pp = &old_pagep; 1120 else 1121 pp = &new_pagep; 1122 1123 /* 1124 * Figure out how many bytes we need on the new 1125 * page to store the key/data pair. 1126 */ 1127 1128 len = LEN_HITEM(temp_pagep, hcp->hdr->pagesize, 1129 H_DATAINDEX(n)) + 1130 LEN_HITEM(temp_pagep, hcp->hdr->pagesize, 1131 H_KEYINDEX(n)) + 1132 2 * sizeof(db_indx_t); 1133 1134 if (P_FREESPACE(*pp) < len) { 1135 if (DB_LOGGING(dbc)) { 1136 page_dbt.size = hcp->hdr->pagesize; 1137 page_dbt.data = *pp; 1138 if ((ret = __ham_splitdata_log( 1139 dbenv->lg_info, dbc->txn, 1140 &new_lsn, 0, dbp->log_fileid, 1141 SPLITNEW, PGNO(*pp), &page_dbt, 1142 &LSN(*pp))) != 0) 1143 goto err; 1144 LSN(*pp) = new_lsn; 1145 } 1146 if ((ret = 1147 __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0) 1148 goto err; 1149 } 1150 __ham_copy_item(dbp->pgsize, 1151 temp_pagep, H_KEYINDEX(n), *pp); 1152 __ham_copy_item(dbp->pgsize, 1153 temp_pagep, H_DATAINDEX(n), *pp); 1154 } 1155 next_pgno = NEXT_PGNO(temp_pagep); 1156 1157 /* Clear temp_page; if it's a link overflow page, free it. */ 1158 if (PGNO(temp_pagep) != bucket_pgno && (ret = 1159 __ham_del_page(dbc, temp_pagep)) != 0) 1160 goto err; 1161 1162 if (next_pgno == PGNO_INVALID) 1163 temp_pagep = NULL; 1164 else if ((ret = 1165 __ham_get_page(dbp, next_pgno, &temp_pagep)) != 0) 1166 goto err; 1167 1168 if (temp_pagep != NULL && DB_LOGGING(dbc)) { 1169 page_dbt.size = hcp->hdr->pagesize; 1170 page_dbt.data = temp_pagep; 1171 if ((ret = __ham_splitdata_log(dbenv->lg_info, 1172 dbc->txn, &new_lsn, 0, dbp->log_fileid, 1173 SPLITOLD, PGNO(temp_pagep), 1174 &page_dbt, &LSN(temp_pagep))) != 0) 1175 goto err; 1176 LSN(temp_pagep) = new_lsn; 1177 } 1178 } 1179 if (big_buf != NULL) 1180 __os_free(big_buf, big_len); 1181 1182 /* 1183 * If the original bucket spanned multiple pages, then we've got 1184 * a pointer to a page that used to be on the bucket chain. It 1185 * should be deleted. 1186 */ 1187 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno && 1188 (ret = __ham_del_page(dbc, temp_pagep)) != 0) 1189 goto err; 1190 1191 /* 1192 * Write new buckets out. 1193 */ 1194 if (DB_LOGGING(dbc)) { 1195 page_dbt.size = hcp->hdr->pagesize; 1196 page_dbt.data = old_pagep; 1197 if ((ret = __ham_splitdata_log(dbenv->lg_info, 1198 dbc->txn, &new_lsn, 0, dbp->log_fileid, 1199 SPLITNEW, PGNO(old_pagep), 1200 &page_dbt, &LSN(old_pagep))) != 0) 1201 goto err; 1202 LSN(old_pagep) = new_lsn; 1203 1204 page_dbt.data = new_pagep; 1205 if ((ret = __ham_splitdata_log(dbenv->lg_info, 1206 dbc->txn, &new_lsn, 0, dbp->log_fileid, 1207 SPLITNEW, PGNO(new_pagep), &page_dbt, &LSN(new_pagep))) != 0) 1208 goto err; 1209 LSN(new_pagep) = new_lsn; 1210 } 1211 ret = __ham_put_page(dbp, old_pagep, 1); 1212 if ((tret = __ham_put_page(dbp, new_pagep, 1)) != 0 && 1213 ret == 0) 1214 ret = tret; 1215 1216 if (0) { 1217 err: if (old_pagep != NULL) 1218 (void)__ham_put_page(dbp, old_pagep, 1); 1219 if (new_pagep != NULL) 1220 (void)__ham_put_page(dbp, new_pagep, 1); 1221 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) 1222 (void)__ham_put_page(dbp, temp_pagep, 1); 1223 } 1224 return (ret); 1225 } 1226 1227 /* 1228 * Add the given pair to the page. The page in question may already be 1229 * held (i.e. it was already gotten). If it is, then the page is passed 1230 * in via the pagep parameter. On return, pagep will contain the page 1231 * to which we just added something. This allows us to link overflow 1232 * pages and return the new page having correctly put the last page. 1233 * 1234 * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int)); 1235 */ 1236 int 1237 __ham_add_el(dbc, key, val, type) 1238 DBC *dbc; 1239 const DBT *key, *val; 1240 int type; 1241 { 1242 DB *dbp; 1243 HASH_CURSOR *hcp; 1244 const DBT *pkey, *pdata; 1245 DBT key_dbt, data_dbt; 1246 DB_LSN new_lsn; 1247 HOFFPAGE doff, koff; 1248 db_pgno_t next_pgno; 1249 u_int32_t data_size, key_size, pairsize, rectype; 1250 int do_expand, is_keybig, is_databig, ret; 1251 int key_type, data_type; 1252 1253 dbp = dbc->dbp; 1254 hcp = (HASH_CURSOR *)dbc->internal; 1255 do_expand = 0; 1256 1257 if (hcp->pagep == NULL && (ret = __ham_get_page(dbp, 1258 hcp->seek_found_page != PGNO_INVALID ? hcp->seek_found_page : 1259 hcp->pgno, &hcp->pagep)) != 0) 1260 return (ret); 1261 1262 key_size = HKEYDATA_PSIZE(key->size); 1263 data_size = HKEYDATA_PSIZE(val->size); 1264 is_keybig = ISBIG(hcp, key->size); 1265 is_databig = ISBIG(hcp, val->size); 1266 if (is_keybig) 1267 key_size = HOFFPAGE_PSIZE; 1268 if (is_databig) 1269 data_size = HOFFPAGE_PSIZE; 1270 1271 pairsize = key_size + data_size; 1272 1273 /* Advance to first page in chain with room for item. */ 1274 while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) != 1275 PGNO_INVALID) { 1276 /* 1277 * This may not be the end of the chain, but the pair may fit 1278 * anyway. Check if it's a bigpair that fits or a regular 1279 * pair that fits. 1280 */ 1281 if (P_FREESPACE(hcp->pagep) >= pairsize) 1282 break; 1283 next_pgno = NEXT_PGNO(hcp->pagep); 1284 if ((ret = 1285 __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0) 1286 return (ret); 1287 } 1288 1289 /* 1290 * Check if we need to allocate a new page. 1291 */ 1292 if (P_FREESPACE(hcp->pagep) < pairsize) { 1293 do_expand = 1; 1294 if ((ret = __ham_add_ovflpage(dbc, 1295 hcp->pagep, 1, &hcp->pagep)) != 0) 1296 return (ret); 1297 hcp->pgno = PGNO(hcp->pagep); 1298 } 1299 1300 /* 1301 * Update cursor. 1302 */ 1303 hcp->bndx = H_NUMPAIRS(hcp->pagep); 1304 F_CLR(hcp, H_DELETED); 1305 if (is_keybig) { 1306 koff.type = H_OFFPAGE; 1307 UMRW(koff.unused[0]); 1308 UMRW(koff.unused[1]); 1309 UMRW(koff.unused[2]); 1310 if ((ret = __db_poff(dbc, 1311 key, &koff.pgno, __ham_overflow_page)) != 0) 1312 return (ret); 1313 koff.tlen = key->size; 1314 key_dbt.data = &koff; 1315 key_dbt.size = sizeof(koff); 1316 pkey = &key_dbt; 1317 key_type = H_OFFPAGE; 1318 } else { 1319 pkey = key; 1320 key_type = H_KEYDATA; 1321 } 1322 1323 if (is_databig) { 1324 doff.type = H_OFFPAGE; 1325 UMRW(doff.unused[0]); 1326 UMRW(doff.unused[1]); 1327 UMRW(doff.unused[2]); 1328 if ((ret = __db_poff(dbc, 1329 val, &doff.pgno, __ham_overflow_page)) != 0) 1330 return (ret); 1331 doff.tlen = val->size; 1332 data_dbt.data = &doff; 1333 data_dbt.size = sizeof(doff); 1334 pdata = &data_dbt; 1335 data_type = H_OFFPAGE; 1336 } else { 1337 pdata = val; 1338 data_type = type; 1339 } 1340 1341 if (DB_LOGGING(dbc)) { 1342 rectype = PUTPAIR; 1343 if (is_databig) 1344 rectype |= PAIR_DATAMASK; 1345 if (is_keybig) 1346 rectype |= PAIR_KEYMASK; 1347 1348 if ((ret = __ham_insdel_log(dbp->dbenv->lg_info, 1349 dbc->txn, &new_lsn, 0, rectype, 1350 dbp->log_fileid, PGNO(hcp->pagep), 1351 (u_int32_t)H_NUMPAIRS(hcp->pagep), 1352 &LSN(hcp->pagep), pkey, pdata)) != 0) 1353 return (ret); 1354 1355 /* Move lsn onto page. */ 1356 LSN(hcp->pagep) = new_lsn; /* Structure assignment. */ 1357 } 1358 1359 __ham_putitem(hcp->pagep, pkey, key_type); 1360 __ham_putitem(hcp->pagep, pdata, data_type); 1361 1362 /* 1363 * For splits, we are going to update item_info's page number 1364 * field, so that we can easily return to the same page the 1365 * next time we come in here. For other operations, this shouldn't 1366 * matter, since odds are this is the last thing that happens before 1367 * we return to the user program. 1368 */ 1369 hcp->pgno = PGNO(hcp->pagep); 1370 1371 /* 1372 * XXX Maybe keep incremental numbers here 1373 */ 1374 if (!F_ISSET(dbp, DB_AM_LOCKING)) 1375 hcp->hdr->nelem++; 1376 1377 if (do_expand || (hcp->hdr->ffactor != 0 && 1378 (u_int32_t)H_NUMPAIRS(hcp->pagep) > hcp->hdr->ffactor)) 1379 F_SET(hcp, H_EXPAND); 1380 return (0); 1381 } 1382 1383 1384 /* 1385 * Special __putitem call used in splitting -- copies one entry to 1386 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, 1387 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we 1388 * do not need to do any logging here. 1389 * 1390 * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *)); 1391 */ 1392 void 1393 __ham_copy_item(pgsize, src_page, src_ndx, dest_page) 1394 size_t pgsize; 1395 PAGE *src_page; 1396 u_int32_t src_ndx; 1397 PAGE *dest_page; 1398 { 1399 u_int32_t len; 1400 void *src, *dest; 1401 1402 /* 1403 * Copy the key and data entries onto this new page. 1404 */ 1405 src = P_ENTRY(src_page, src_ndx); 1406 1407 /* Set up space on dest. */ 1408 len = LEN_HITEM(src_page, pgsize, src_ndx); 1409 HOFFSET(dest_page) -= len; 1410 dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page); 1411 dest = P_ENTRY(dest_page, NUM_ENT(dest_page)); 1412 NUM_ENT(dest_page)++; 1413 1414 memcpy(dest, src, len); 1415 } 1416 1417 /* 1418 * 1419 * Returns: 1420 * pointer on success 1421 * NULL on error 1422 * 1423 * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); 1424 */ 1425 int 1426 __ham_add_ovflpage(dbc, pagep, release, pp) 1427 DBC *dbc; 1428 PAGE *pagep; 1429 int release; 1430 PAGE **pp; 1431 { 1432 DB *dbp; 1433 HASH_CURSOR *hcp; 1434 DB_LSN new_lsn; 1435 PAGE *new_pagep; 1436 int ret; 1437 1438 dbp = dbc->dbp; 1439 hcp = (HASH_CURSOR *)dbc->internal; 1440 1441 if ((ret = __ham_overflow_page(dbc, P_HASH, &new_pagep)) != 0) 1442 return (ret); 1443 1444 if (DB_LOGGING(dbc)) { 1445 if ((ret = __ham_newpage_log(dbp->dbenv->lg_info, 1446 dbc->txn, &new_lsn, 0, PUTOVFL, 1447 dbp->log_fileid, PGNO(pagep), &LSN(pagep), 1448 PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) 1449 return (ret); 1450 1451 /* Move lsn onto page. */ 1452 LSN(pagep) = LSN(new_pagep) = new_lsn; 1453 } 1454 NEXT_PGNO(pagep) = PGNO(new_pagep); 1455 PREV_PGNO(new_pagep) = PGNO(pagep); 1456 1457 if (release) 1458 ret = __ham_put_page(dbp, pagep, 1); 1459 1460 hcp->stats.hash_overflows++; 1461 *pp = new_pagep; 1462 return (ret); 1463 } 1464 1465 1466 /* 1467 * PUBLIC: int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **)); 1468 */ 1469 int 1470 __ham_new_page(dbp, addr, type, pp) 1471 DB *dbp; 1472 u_int32_t addr, type; 1473 PAGE **pp; 1474 { 1475 PAGE *pagep; 1476 int ret; 1477 1478 if ((ret = memp_fget(dbp->mpf, 1479 &addr, DB_MPOOL_CREATE, &pagep)) != 0) 1480 return (ret); 1481 1482 /* This should not be necessary because page-in should do it. */ 1483 P_INIT(pagep, dbp->pgsize, addr, PGNO_INVALID, PGNO_INVALID, 0, type); 1484 1485 *pp = pagep; 1486 return (0); 1487 } 1488 1489 /* 1490 * PUBLIC: int __ham_del_page __P((DBC *, PAGE *)); 1491 */ 1492 int 1493 __ham_del_page(dbc, pagep) 1494 DBC *dbc; 1495 PAGE *pagep; 1496 { 1497 DB *dbp; 1498 HASH_CURSOR *hcp; 1499 DB_LSN new_lsn; 1500 int ret; 1501 1502 dbp = dbc->dbp; 1503 hcp = (HASH_CURSOR *)dbc->internal; 1504 ret = 0; 1505 DIRTY_META(dbp, hcp, ret); 1506 if (ret != 0) { 1507 if (ret != EAGAIN) 1508 __db_err(dbp->dbenv, 1509 "free_ovflpage: unable to lock meta data page %s\n", 1510 strerror(ret)); 1511 /* 1512 * If we are going to return an error, then we should free 1513 * the page, so it doesn't stay pinned forever. 1514 */ 1515 (void)__ham_put_page(dbp, pagep, 0); 1516 return (ret); 1517 } 1518 1519 if (DB_LOGGING(dbc)) { 1520 if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info, 1521 dbc->txn, &new_lsn, 0, DELPGNO, 1522 dbp->log_fileid, PGNO(pagep), hcp->hdr->last_freed, 1523 (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID, 1524 &LSN(pagep), &hcp->hdr->lsn)) != 0) 1525 return (ret); 1526 1527 hcp->hdr->lsn = new_lsn; 1528 LSN(pagep) = new_lsn; 1529 } 1530 1531 #ifdef DIAGNOSTIC 1532 { 1533 db_pgno_t __pgno; 1534 DB_LSN __lsn; 1535 __pgno = pagep->pgno; 1536 __lsn = pagep->lsn; 1537 memset(pagep, 0xdb, dbp->pgsize); 1538 pagep->pgno = __pgno; 1539 pagep->lsn = __lsn; 1540 } 1541 #endif 1542 TYPE(pagep) = P_INVALID; 1543 NEXT_PGNO(pagep) = hcp->hdr->last_freed; 1544 hcp->hdr->last_freed = PGNO(pagep); 1545 1546 return (__ham_put_page(dbp, pagep, 1)); 1547 } 1548 1549 1550 /* 1551 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t)); 1552 */ 1553 int 1554 __ham_put_page(dbp, pagep, is_dirty) 1555 DB *dbp; 1556 PAGE *pagep; 1557 int32_t is_dirty; 1558 { 1559 #ifdef DEBUG_SLOW 1560 __account_page(dbp, ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1); 1561 #endif 1562 return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0))); 1563 } 1564 1565 /* 1566 * __ham_dirty_page -- 1567 * Mark a page dirty. 1568 * 1569 * PUBLIC: int __ham_dirty_page __P((DB *, PAGE *)); 1570 */ 1571 int 1572 __ham_dirty_page(dbp, pagep) 1573 DB *dbp; 1574 PAGE *pagep; 1575 { 1576 return (memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY)); 1577 } 1578 1579 /* 1580 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **)); 1581 */ 1582 int 1583 __ham_get_page(dbp, addr, pagep) 1584 DB *dbp; 1585 db_pgno_t addr; 1586 PAGE **pagep; 1587 { 1588 int ret; 1589 1590 ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep); 1591 #ifdef DEBUG_SLOW 1592 if (*pagep != NULL) 1593 __account_page(dbp, addr, 1); 1594 #endif 1595 return (ret); 1596 } 1597 1598 /* 1599 * PUBLIC: int __ham_overflow_page 1600 * PUBLIC: __P((DBC *, u_int32_t, PAGE **)); 1601 */ 1602 int 1603 __ham_overflow_page(dbc, type, pp) 1604 DBC *dbc; 1605 u_int32_t type; 1606 PAGE **pp; 1607 { 1608 DB *dbp; 1609 HASH_CURSOR *hcp; 1610 DB_LSN *lsnp, new_lsn; 1611 PAGE *p; 1612 db_pgno_t new_addr, next_free, newalloc_flag; 1613 u_int32_t offset, splitnum; 1614 int ret; 1615 1616 ret = 0; 1617 dbp = dbc->dbp; 1618 hcp = (HASH_CURSOR *)dbc->internal; 1619 DIRTY_META(dbp, hcp, ret); 1620 if (ret != 0) 1621 return (ret); 1622 1623 /* 1624 * This routine is split up into two parts. First we have 1625 * to figure out the address of the new page that we are 1626 * allocating. Then we have to log the allocation. Only 1627 * after the log do we get to complete allocation of the 1628 * new page. 1629 */ 1630 new_addr = hcp->hdr->last_freed; 1631 if (new_addr != PGNO_INVALID) { 1632 if ((ret = __ham_get_page(dbp, new_addr, &p)) != 0) 1633 return (ret); 1634 next_free = NEXT_PGNO(p); 1635 lsnp = &LSN(p); 1636 newalloc_flag = 0; 1637 } else { 1638 splitnum = hcp->hdr->ovfl_point; 1639 hcp->hdr->spares[splitnum]++; 1640 offset = hcp->hdr->spares[splitnum] - 1641 (splitnum ? hcp->hdr->spares[splitnum - 1] : 0); 1642 new_addr = PGNO_OF(hcp, hcp->hdr->ovfl_point, offset); 1643 if (new_addr > MAX_PAGES(hcp)) { 1644 __db_err(dbp->dbenv, "hash: out of file pages"); 1645 hcp->hdr->spares[splitnum]--; 1646 return (ENOMEM); 1647 } 1648 next_free = PGNO_INVALID; 1649 p = NULL; 1650 lsnp = NULL; 1651 newalloc_flag = 1; 1652 } 1653 1654 if (DB_LOGGING(dbc)) { 1655 if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info, 1656 dbc->txn, &new_lsn, 0, ALLOCPGNO, 1657 dbp->log_fileid, new_addr, next_free, 1658 0, newalloc_flag, type, lsnp, &hcp->hdr->lsn)) != 0) 1659 return (ret); 1660 1661 hcp->hdr->lsn = new_lsn; 1662 if (lsnp != NULL) 1663 *lsnp = new_lsn; 1664 } 1665 1666 if (p != NULL) { 1667 /* We just took something off the free list, initialize it. */ 1668 hcp->hdr->last_freed = next_free; 1669 P_INIT(p, hcp->hdr->pagesize, PGNO(p), PGNO_INVALID, 1670 PGNO_INVALID, 0, (u_int8_t)type); 1671 } else { 1672 /* Get the new page. */ 1673 if ((ret = __ham_new_page(dbp, new_addr, type, &p)) != 0) 1674 return (ret); 1675 } 1676 if (DB_LOGGING(dbc)) 1677 LSN(p) = new_lsn; 1678 1679 *pp = p; 1680 return (0); 1681 } 1682 1683 #ifdef DEBUG 1684 /* 1685 * PUBLIC: #ifdef DEBUG 1686 * PUBLIC: db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t)); 1687 * PUBLIC: #endif 1688 */ 1689 db_pgno_t 1690 __bucket_to_page(hcp, n) 1691 HASH_CURSOR *hcp; 1692 db_pgno_t n; 1693 { 1694 int ret_val; 1695 1696 ret_val = n + 1; 1697 if (n != 0) 1698 ret_val += hcp->hdr->spares[__db_log2(n + 1) - 1]; 1699 return (ret_val); 1700 } 1701 #endif 1702 1703 /* 1704 * Create a bunch of overflow pages at the current split point. 1705 * PUBLIC: void __ham_init_ovflpages __P((DBC *)); 1706 */ 1707 void 1708 __ham_init_ovflpages(dbc) 1709 DBC *dbc; 1710 { 1711 DB *dbp; 1712 HASH_CURSOR *hcp; 1713 DB_LSN new_lsn; 1714 PAGE *p; 1715 db_pgno_t last_pgno, new_pgno; 1716 u_int32_t i, curpages, numpages; 1717 1718 dbp = dbc->dbp; 1719 hcp = (HASH_CURSOR *)dbc->internal; 1720 1721 curpages = hcp->hdr->spares[hcp->hdr->ovfl_point] - 1722 hcp->hdr->spares[hcp->hdr->ovfl_point - 1]; 1723 numpages = hcp->hdr->ovfl_point + 1 - curpages; 1724 1725 last_pgno = hcp->hdr->last_freed; 1726 new_pgno = PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + 1); 1727 if (DB_LOGGING(dbc)) { 1728 (void)__ham_ovfl_log(dbp->dbenv->lg_info, 1729 dbc->txn, &new_lsn, 0, dbp->log_fileid, new_pgno, 1730 numpages, last_pgno, hcp->hdr->ovfl_point, &hcp->hdr->lsn); 1731 hcp->hdr->lsn = new_lsn; 1732 } else 1733 ZERO_LSN(new_lsn); 1734 1735 hcp->hdr->spares[hcp->hdr->ovfl_point] += numpages; 1736 for (i = numpages; i > 0; i--) { 1737 if (__ham_new_page(dbp, 1738 PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + i), 1739 P_INVALID, &p) != 0) 1740 break; 1741 LSN(p) = new_lsn; 1742 NEXT_PGNO(p) = last_pgno; 1743 last_pgno = PGNO(p); 1744 (void)__ham_put_page(dbp, p, 1); 1745 } 1746 hcp->hdr->last_freed = last_pgno; 1747 } 1748 1749 /* 1750 * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t)); 1751 */ 1752 int 1753 __ham_get_cpage(dbc, mode) 1754 DBC *dbc; 1755 db_lockmode_t mode; 1756 { 1757 DB *dbp; 1758 HASH_CURSOR *hcp; 1759 int ret; 1760 1761 dbp = dbc->dbp; 1762 hcp = (HASH_CURSOR *)dbc->internal; 1763 1764 /* 1765 * There are three cases with respect to buckets and locks. If there 1766 * is no lock held, then if we are locking, we should get the lock. 1767 * If there is a lock held and it's for the current bucket, we don't 1768 * need to do anything. If there is a lock, but it's for a different 1769 * bucket, then we need to release and get. 1770 */ 1771 if (F_ISSET(dbp, DB_AM_LOCKING)) { 1772 if (hcp->lock != 0 && hcp->lbucket != hcp->bucket) { 1773 /* 1774 * If this is the original lock, don't release it, 1775 * because we may need to restore it upon exit. 1776 */ 1777 if (dbc->txn == NULL && 1778 !F_ISSET(hcp, H_ORIGINAL) && (ret = 1779 lock_put(dbp->dbenv->lk_info, hcp->lock)) != 0) 1780 return (ret); 1781 F_CLR(hcp, H_ORIGINAL); 1782 hcp->lock = 0; 1783 } 1784 if (hcp->lock == 0 && (ret = __ham_lock_bucket(dbc, mode)) != 0) 1785 return (ret); 1786 hcp->lbucket = hcp->bucket; 1787 } 1788 1789 if (hcp->pagep == NULL) { 1790 if (hcp->pgno == PGNO_INVALID) { 1791 hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); 1792 hcp->bndx = 0; 1793 } 1794 1795 if ((ret = 1796 __ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0) 1797 return (ret); 1798 } 1799 1800 if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL) 1801 if ((ret = 1802 __ham_get_page(dbp, hcp->dpgno, &hcp->dpagep)) != 0) 1803 return (ret); 1804 return (0); 1805 } 1806 1807 /* 1808 * Get a new page at the cursor, putting the last page if necessary. 1809 * If the flag is set to H_ISDUP, then we are talking about the 1810 * duplicate page, not the main page. 1811 * 1812 * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t)); 1813 */ 1814 int 1815 __ham_next_cpage(dbc, pgno, dirty, flags) 1816 DBC *dbc; 1817 db_pgno_t pgno; 1818 int dirty; 1819 u_int32_t flags; 1820 { 1821 DB *dbp; 1822 HASH_CURSOR *hcp; 1823 PAGE *p; 1824 int ret; 1825 1826 dbp = dbc->dbp; 1827 hcp = (HASH_CURSOR *)dbc->internal; 1828 if (LF_ISSET(H_ISDUP) && hcp->dpagep != NULL && 1829 (ret = __ham_put_page(dbp, hcp->dpagep, dirty)) != 0) 1830 return (ret); 1831 else if (!LF_ISSET(H_ISDUP) && hcp->pagep != NULL && 1832 (ret = __ham_put_page(dbp, hcp->pagep, dirty)) != 0) 1833 return (ret); 1834 1835 if ((ret = __ham_get_page(dbp, pgno, &p)) != 0) 1836 return (ret); 1837 1838 if (LF_ISSET(H_ISDUP)) { 1839 hcp->dpagep = p; 1840 hcp->dpgno = pgno; 1841 hcp->dndx = 0; 1842 } else { 1843 hcp->pagep = p; 1844 hcp->pgno = pgno; 1845 hcp->bndx = 0; 1846 } 1847 1848 return (0); 1849 } 1850 1851 /* 1852 * __ham_lock_bucket -- 1853 * Get the lock on a particular bucket. 1854 */ 1855 static int 1856 __ham_lock_bucket(dbc, mode) 1857 DBC *dbc; 1858 db_lockmode_t mode; 1859 { 1860 HASH_CURSOR *hcp; 1861 int ret; 1862 1863 hcp = (HASH_CURSOR *)dbc->internal; 1864 dbc->lock.pgno = (db_pgno_t)(hcp->bucket); 1865 if (dbc->txn == NULL) 1866 ret = lock_get(dbc->dbp->dbenv->lk_info, dbc->locker, 0, 1867 &dbc->lock_dbt, mode, &hcp->lock); 1868 else 1869 ret = lock_tget(dbc->dbp->dbenv->lk_info, dbc->txn, 0, 1870 &dbc->lock_dbt, mode, &hcp->lock); 1871 1872 return (ret < 0 ? EAGAIN : ret); 1873 } 1874 1875 /* 1876 * __ham_dpair -- 1877 * Delete a pair on a page, paying no attention to what the pair 1878 * represents. The caller is responsible for freeing up duplicates 1879 * or offpage entries that might be referenced by this pair. 1880 * 1881 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); 1882 */ 1883 void 1884 __ham_dpair(dbp, p, pndx) 1885 DB *dbp; 1886 PAGE *p; 1887 u_int32_t pndx; 1888 { 1889 db_indx_t delta, n; 1890 u_int8_t *dest, *src; 1891 1892 /* 1893 * Compute "delta", the amount we have to shift all of the 1894 * offsets. To find the delta, we just need to calculate 1895 * the size of the pair of elements we are removing. 1896 */ 1897 delta = H_PAIRSIZE(p, dbp->pgsize, pndx); 1898 1899 /* 1900 * The hard case: we want to remove something other than 1901 * the last item on the page. We need to shift data and 1902 * offsets down. 1903 */ 1904 if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) { 1905 /* 1906 * Move the data: src is the first occupied byte on 1907 * the page. (Length is delta.) 1908 */ 1909 src = (u_int8_t *)p + HOFFSET(p); 1910 1911 /* 1912 * Destination is delta bytes beyond src. This might 1913 * be an overlapping copy, so we have to use memmove. 1914 */ 1915 dest = src + delta; 1916 memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p)); 1917 } 1918 1919 /* Adjust the offsets. */ 1920 for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) { 1921 p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta; 1922 p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta; 1923 } 1924 1925 /* Adjust page metadata. */ 1926 HOFFSET(p) = HOFFSET(p) + delta; 1927 NUM_ENT(p) = NUM_ENT(p) - 2; 1928 } 1929 1930