1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 /* 8 * Copyright (c) 1990, 1993, 1994, 1995, 1996 9 * Keith Bostic. All rights reserved. 10 */ 11 /* 12 * Copyright (c) 1990, 1993, 1994, 1995 13 * The Regents of the University of California. All rights reserved. 14 * 15 * This code is derived from software contributed to Berkeley by 16 * Mike Olson. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. All advertising materials mentioning features or use of this software 27 * must display the following acknowledgement: 28 * This product includes software developed by the University of 29 * California, Berkeley and its contributors. 30 * 4. Neither the name of the University nor the names of its contributors 31 * may be used to endorse or promote products derived from this software 32 * without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44 * SUCH DAMAGE. 45 */ 46 47 #include "config.h" 48 49 #ifndef lint 50 static const char sccsid[] = "@(#)bt_put.c 10.54 (Sleepycat) 12/6/98"; 51 #endif /* not lint */ 52 53 #ifndef NO_SYSTEM_INCLUDES 54 #include <sys/types.h> 55 56 #include <errno.h> 57 #include <string.h> 58 #endif 59 60 #include "db_int.h" 61 #include "db_page.h" 62 #include "btree.h" 63 64 static int __bam_fixed __P((DBC *, DBT *)); 65 static int __bam_ndup __P((DBC *, PAGE *, u_int32_t)); 66 static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *)); 67 static int __bam_partial __P((DBC *, 68 DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t)); 69 static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t)); 70 71 /* 72 * __bam_iitem -- 73 * Insert an item into the tree. 74 * 75 * PUBLIC: int __bam_iitem __P((DBC *, 76 * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t)); 77 */ 78 int 79 __bam_iitem(dbc, hp, indxp, key, data, op, flags) 80 DBC *dbc; 81 PAGE **hp; 82 db_indx_t *indxp; 83 DBT *key, *data; 84 u_int32_t op, flags; 85 { 86 BTREE *t; 87 BKEYDATA *bk; 88 DB *dbp; 89 DBT tdbt; 90 PAGE *h; 91 db_indx_t indx, nbytes; 92 u_int32_t data_size, have_bytes, need_bytes, needed; 93 int bigkey, bigdata, dupadjust, replace, ret; 94 95 COMPQUIET(bk, NULL); 96 97 dbp = dbc->dbp; 98 t = dbp->internal; 99 h = *hp; 100 indx = *indxp; 101 dupadjust = replace = 0; 102 103 /* 104 * If it's a page of duplicates, call the common code to do the work. 105 * 106 * !!! 107 * Here's where the hp and indxp are important. The duplicate code 108 * may decide to rework/rearrange the pages and indices we're using, 109 * so the caller must understand that the page stack may change. 110 */ 111 if (TYPE(h) == P_DUPLICATE) { 112 /* Adjust the index for the new item if it's a DB_AFTER op. */ 113 if (op == DB_AFTER) 114 ++*indxp; 115 116 /* Remove the current item if it's a DB_CURRENT op. */ 117 if (op == DB_CURRENT) { 118 bk = GET_BKEYDATA(*hp, *indxp); 119 switch (B_TYPE(bk->type)) { 120 case B_KEYDATA: 121 nbytes = BKEYDATA_SIZE(bk->len); 122 break; 123 case B_OVERFLOW: 124 nbytes = BOVERFLOW_SIZE; 125 break; 126 default: 127 return (__db_pgfmt(dbp, h->pgno)); 128 } 129 if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0) 130 return (ret); 131 } 132 133 /* Put the new/replacement item onto the page. */ 134 if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0) 135 return (ret); 136 137 goto done; 138 } 139 140 /* Handle fixed-length records: build the real record. */ 141 if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) { 142 tdbt = *data; 143 if ((ret = __bam_fixed(dbc, &tdbt)) != 0) 144 return (ret); 145 data = &tdbt; 146 } 147 148 /* 149 * Figure out how much space the data will take, including if it's a 150 * partial record. If either of the key or data items won't fit on 151 * a page, we'll have to store them on overflow pages. 152 */ 153 bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize; 154 data_size = F_ISSET(data, DB_DBT_PARTIAL) ? 155 __bam_partsize(data, h, indx) : data->size; 156 bigdata = data_size > t->bt_ovflsize; 157 158 needed = 0; 159 if (LF_ISSET(BI_NEWKEY)) { 160 /* If BI_NEWKEY is set we're adding a new key and data pair. */ 161 if (bigkey) 162 needed += BOVERFLOW_PSIZE; 163 else 164 needed += BKEYDATA_PSIZE(key->size); 165 if (bigdata) 166 needed += BOVERFLOW_PSIZE; 167 else 168 needed += BKEYDATA_PSIZE(data_size); 169 } else { 170 /* 171 * We're either overwriting the data item of a key/data pair 172 * or we're adding the data item only, i.e. a new duplicate. 173 */ 174 if (op == DB_CURRENT) { 175 bk = GET_BKEYDATA(h, 176 indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 177 if (B_TYPE(bk->type) == B_KEYDATA) 178 have_bytes = BKEYDATA_PSIZE(bk->len); 179 else 180 have_bytes = BOVERFLOW_PSIZE; 181 need_bytes = 0; 182 } else { 183 have_bytes = 0; 184 need_bytes = sizeof(db_indx_t); 185 } 186 if (bigdata) 187 need_bytes += BOVERFLOW_PSIZE; 188 else 189 need_bytes += BKEYDATA_PSIZE(data_size); 190 191 if (have_bytes < need_bytes) 192 needed += need_bytes - have_bytes; 193 } 194 195 /* 196 * If there's not enough room, or the user has put a ceiling on the 197 * number of keys permitted in the page, split the page. 198 * 199 * XXX 200 * The t->bt_maxkey test here may be insufficient -- do we have to 201 * check in the btree split code, so we don't undo it there!?!? 202 */ 203 if (P_FREESPACE(h) < needed || 204 (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) 205 return (DB_NEEDSPLIT); 206 207 /* Handle partial puts: build the real record. */ 208 if (F_ISSET(data, DB_DBT_PARTIAL)) { 209 tdbt = *data; 210 if ((ret = __bam_partial(dbc, 211 &tdbt, h, indx, data_size, flags)) != 0) 212 return (ret); 213 data = &tdbt; 214 } 215 216 /* 217 * The code breaks it up into six cases: 218 * 219 * 1. Append a new key/data pair. 220 * 2. Insert a new key/data pair. 221 * 3. Append a new data item (a new duplicate). 222 * 4. Insert a new data item (a new duplicate). 223 * 5. Overflow item: delete and re-add the data item. 224 * 6. Replace the data item. 225 */ 226 if (LF_ISSET(BI_NEWKEY)) { 227 switch (op) { 228 case DB_AFTER: /* 1. Append a new key/data pair. */ 229 indx += 2; 230 *indxp += 2; 231 break; 232 case DB_BEFORE: /* 2. Insert a new key/data pair. */ 233 break; 234 default: 235 return (EINVAL); 236 } 237 238 /* Add the key. */ 239 if (bigkey) { 240 if ((ret = __bam_ovput(dbc, h, indx, key)) != 0) 241 return (ret); 242 } else 243 if ((ret = __db_pitem(dbc, h, indx, 244 BKEYDATA_SIZE(key->size), NULL, key)) != 0) 245 return (ret); 246 ++indx; 247 } else { 248 switch (op) { 249 case DB_AFTER: /* 3. Append a new data item. */ 250 if (TYPE(h) == P_LBTREE) { 251 /* 252 * Adjust the cursor and copy in the key for 253 * the duplicate. 254 */ 255 if ((ret = __bam_adjindx(dbc, 256 h, indx + P_INDX, indx, 1)) != 0) 257 return (ret); 258 259 indx += 3; 260 dupadjust = 1; 261 262 *indxp += 2; 263 } else { 264 ++indx; 265 __bam_ca_di(dbp, h->pgno, indx, 1); 266 267 *indxp += 1; 268 } 269 break; 270 case DB_BEFORE: /* 4. Insert a new data item. */ 271 if (TYPE(h) == P_LBTREE) { 272 /* 273 * Adjust the cursor and copy in the key for 274 * the duplicate. 275 */ 276 if ((ret = 277 __bam_adjindx(dbc, h, indx, indx, 1)) != 0) 278 return (ret); 279 280 ++indx; 281 dupadjust = 1; 282 } else 283 __bam_ca_di(dbp, h->pgno, indx, 1); 284 break; 285 case DB_CURRENT: 286 if (TYPE(h) == P_LBTREE) 287 ++indx; 288 289 /* 290 * 5. Delete/re-add the data item. 291 * 292 * If we're dealing with offpage items, we have to 293 * delete and then re-add the item. 294 */ 295 if (bigdata || B_TYPE(bk->type) != B_KEYDATA) { 296 if ((ret = __bam_ditem(dbc, h, indx)) != 0) 297 return (ret); 298 break; 299 } 300 301 /* 6. Replace the data item. */ 302 replace = 1; 303 break; 304 default: 305 return (EINVAL); 306 } 307 } 308 309 /* Add the data. */ 310 if (bigdata) { 311 if ((ret = __bam_ovput(dbc, h, indx, data)) != 0) 312 return (ret); 313 } else { 314 BKEYDATA __bk; 315 DBT __hdr; 316 317 if (LF_ISSET(BI_DELETED)) { 318 B_TSET(__bk.type, B_KEYDATA, 1); 319 __bk.len = data->size; 320 __hdr.data = &__bk; 321 __hdr.size = SSZA(BKEYDATA, data); 322 ret = __db_pitem(dbc, h, indx, 323 BKEYDATA_SIZE(data->size), &__hdr, data); 324 } else if (replace) 325 ret = __bam_ritem(dbc, h, indx, data); 326 else 327 ret = __db_pitem(dbc, h, indx, 328 BKEYDATA_SIZE(data->size), NULL, data); 329 if (ret != 0) 330 return (ret); 331 } 332 333 if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) 334 return (ret); 335 336 /* 337 * If the page is at least 50% full, and we added a duplicate, see if 338 * that set of duplicates takes up at least 25% of the space. If it 339 * does, move it off onto its own page. 340 */ 341 if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) { 342 --indx; 343 if ((ret = __bam_ndup(dbc, h, indx)) != 0) 344 return (ret); 345 } 346 347 /* 348 * If we've changed the record count, update the tree. Record counts 349 * need to be updated in recno databases and in btree databases where 350 * we are supporting records. In both cases, adjust the count if the 351 * operation wasn't performed on the current record or when the caller 352 * overrides and wants the adjustment made regardless. 353 */ 354 done: if (LF_ISSET(BI_DOINCR) || 355 (op != DB_CURRENT && 356 (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO))) 357 if ((ret = __bam_adjust(dbc, 1)) != 0) 358 return (ret); 359 360 /* If we've modified a recno file, set the flag */ 361 if (t->recno != NULL) 362 F_SET(t->recno, RECNO_MODIFIED); 363 364 return (ret); 365 } 366 367 /* 368 * __bam_partsize -- 369 * Figure out how much space a partial data item is in total. 370 */ 371 static u_int32_t 372 __bam_partsize(data, h, indx) 373 DBT *data; 374 PAGE *h; 375 u_int32_t indx; 376 { 377 BKEYDATA *bk; 378 u_int32_t nbytes; 379 380 /* 381 * Figure out how much total space we'll need. If the record doesn't 382 * already exist, it's simply the data we're provided. 383 */ 384 if (indx >= NUM_ENT(h)) 385 return (data->doff + data->size); 386 387 /* 388 * Otherwise, it's the data provided plus any already existing data 389 * that we're not replacing. 390 */ 391 bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 392 nbytes = 393 B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; 394 395 /* 396 * There are really two cases here: 397 * 398 * Case 1: We are replacing some bytes that do not exist (i.e., they 399 * are past the end of the record). In this case the number of bytes 400 * we are replacing is irrelevant and all we care about is how many 401 * bytes we are going to add from offset. So, the new record length 402 * is going to be the size of the new bytes (size) plus wherever those 403 * new bytes begin (doff). 404 * 405 * Case 2: All the bytes we are replacing exist. Therefore, the new 406 * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) 407 * plus the bytes we are adding (size). 408 */ 409 if (nbytes < data->doff + data->dlen) /* Case 1 */ 410 return (data->doff + data->size); 411 412 return (nbytes + data->size - data->dlen); /* Case 2 */ 413 } 414 415 /* 416 * OVPUT -- 417 * Copy an overflow item onto a page. 418 */ 419 #undef OVPUT 420 #define OVPUT(h, indx, bo) do { \ 421 DBT __hdr; \ 422 memset(&__hdr, 0, sizeof(__hdr)); \ 423 __hdr.data = &bo; \ 424 __hdr.size = BOVERFLOW_SIZE; \ 425 if ((ret = __db_pitem(dbc, \ 426 h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \ 427 return (ret); \ 428 } while (0) 429 430 /* 431 * __bam_ovput -- 432 * Build an overflow item and put it on the page. 433 */ 434 static int 435 __bam_ovput(dbc, h, indx, item) 436 DBC *dbc; 437 PAGE *h; 438 u_int32_t indx; 439 DBT *item; 440 { 441 BOVERFLOW bo; 442 int ret; 443 444 UMRW(bo.unused1); 445 B_TSET(bo.type, B_OVERFLOW, 0); 446 UMRW(bo.unused2); 447 if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0) 448 return (ret); 449 bo.tlen = item->size; 450 451 OVPUT(h, indx, bo); 452 453 return (0); 454 } 455 456 /* 457 * __bam_ritem -- 458 * Replace an item on a page. 459 * 460 * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *)); 461 */ 462 int 463 __bam_ritem(dbc, h, indx, data) 464 DBC *dbc; 465 PAGE *h; 466 u_int32_t indx; 467 DBT *data; 468 { 469 BKEYDATA *bk; 470 DB *dbp; 471 DBT orig, repl; 472 db_indx_t cnt, lo, ln, min, off, prefix, suffix; 473 int32_t nbytes; 474 int ret; 475 u_int8_t *p, *t; 476 477 dbp = dbc->dbp; 478 479 /* 480 * Replace a single item onto a page. The logic figuring out where 481 * to insert and whether it fits is handled in the caller. All we do 482 * here is manage the page shuffling. 483 */ 484 bk = GET_BKEYDATA(h, indx); 485 486 /* Log the change. */ 487 if (DB_LOGGING(dbc)) { 488 /* 489 * We might as well check to see if the two data items share 490 * a common prefix and suffix -- it can save us a lot of log 491 * message if they're large. 492 */ 493 min = data->size < bk->len ? data->size : bk->len; 494 for (prefix = 0, 495 p = bk->data, t = data->data; 496 prefix < min && *p == *t; ++prefix, ++p, ++t) 497 ; 498 499 min -= prefix; 500 for (suffix = 0, 501 p = (u_int8_t *)bk->data + bk->len - 1, 502 t = (u_int8_t *)data->data + data->size - 1; 503 suffix < min && *p == *t; ++suffix, --p, --t) 504 ; 505 506 /* We only log the parts of the keys that have changed. */ 507 orig.data = (u_int8_t *)bk->data + prefix; 508 orig.size = bk->len - (prefix + suffix); 509 repl.data = (u_int8_t *)data->data + prefix; 510 repl.size = data->size - (prefix + suffix); 511 if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn, 512 &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), 513 (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), 514 &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) 515 return (ret); 516 } 517 518 /* 519 * Set references to the first in-use byte on the page and the 520 * first byte of the item being replaced. 521 */ 522 p = (u_int8_t *)h + HOFFSET(h); 523 t = (u_int8_t *)bk; 524 525 /* 526 * If the entry is growing in size, shift the beginning of the data 527 * part of the page down. If the entry is shrinking in size, shift 528 * the beginning of the data part of the page up. Use memmove(3), 529 * the regions overlap. 530 */ 531 lo = BKEYDATA_SIZE(bk->len); 532 ln = BKEYDATA_SIZE(data->size); 533 if (lo != ln) { 534 nbytes = lo - ln; /* Signed difference. */ 535 if (p == t) /* First index is fast. */ 536 h->inp[indx] += nbytes; 537 else { /* Else, shift the page. */ 538 memmove(p + nbytes, p, t - p); 539 540 /* Adjust the indices' offsets. */ 541 off = h->inp[indx]; 542 for (cnt = 0; cnt < NUM_ENT(h); ++cnt) 543 if (h->inp[cnt] <= off) 544 h->inp[cnt] += nbytes; 545 } 546 547 /* Clean up the page and adjust the item's reference. */ 548 HOFFSET(h) += nbytes; 549 t += nbytes; 550 } 551 552 /* Copy the new item onto the page. */ 553 bk = (BKEYDATA *)t; 554 B_TSET(bk->type, B_KEYDATA, 0); 555 bk->len = data->size; 556 memcpy(bk->data, data->data, data->size); 557 558 return (0); 559 } 560 561 /* 562 * __bam_ndup -- 563 * Check to see if the duplicate set at indx should have its own page. 564 * If it should, create it. 565 */ 566 static int 567 __bam_ndup(dbc, h, indx) 568 DBC *dbc; 569 PAGE *h; 570 u_int32_t indx; 571 { 572 BKEYDATA *bk; 573 BOVERFLOW bo; 574 DB *dbp; 575 DBT hdr; 576 PAGE *cp; 577 db_indx_t cnt, cpindx, first, sz; 578 int ret; 579 580 dbp = dbc->dbp; 581 582 while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) 583 indx -= P_INDX; 584 for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) { 585 if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx]) 586 break; 587 bk = GET_BKEYDATA(h, indx); 588 sz += B_TYPE(bk->type) == B_KEYDATA ? 589 BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; 590 bk = GET_BKEYDATA(h, indx + O_INDX); 591 sz += B_TYPE(bk->type) == B_KEYDATA ? 592 BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; 593 } 594 595 /* 596 * If this set of duplicates is using more than 25% of the page, move 597 * them off. The choice of 25% is a WAG, but it has to be small enough 598 * that we can always split regardless of the presence of duplicates. 599 */ 600 if (sz < dbp->pgsize / 4) 601 return (0); 602 603 /* Get a new page. */ 604 if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0) 605 return (ret); 606 607 /* 608 * Move this set of duplicates off the page. First points to the first 609 * key of the first duplicate key/data pair, cnt is the number of pairs 610 * we're dealing with. 611 */ 612 memset(&hdr, 0, sizeof(hdr)); 613 for (indx = first + O_INDX, cpindx = 0;; ++cpindx) { 614 /* Copy the entry to the new page. */ 615 bk = GET_BKEYDATA(h, indx); 616 hdr.data = bk; 617 hdr.size = B_TYPE(bk->type) == B_KEYDATA ? 618 BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE; 619 if ((ret = 620 __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0) 621 goto err; 622 623 /* 624 * Move cursors referencing the old entry to the new entry. 625 * Done after the page put because __db_pitem() adjusts 626 * cursors on the new page, and before the delete because 627 * __db_ditem adjusts cursors on the old page. 628 */ 629 __bam_ca_dup(dbp, 630 PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx); 631 632 /* Delete the data item. */ 633 if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0) 634 goto err; 635 636 /* Delete all but the first reference to the key. */ 637 if (--cnt == 0) 638 break; 639 if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0) 640 goto err; 641 } 642 643 /* Put in a new data item that points to the duplicates page. */ 644 UMRW(bo.unused1); 645 B_TSET(bo.type, B_DUPLICATE, 0); 646 UMRW(bo.unused2); 647 bo.pgno = cp->pgno; 648 bo.tlen = 0; 649 650 OVPUT(h, indx, bo); 651 652 return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY)); 653 654 err: (void)__bam_free(dbc, cp); 655 return (ret); 656 } 657 658 /* 659 * __bam_fixed -- 660 * Build the real record for a fixed length put. 661 */ 662 static int 663 __bam_fixed(dbc, dbt) 664 DBC *dbc; 665 DBT *dbt; 666 { 667 DB *dbp; 668 RECNO *rp; 669 int ret; 670 671 dbp = dbc->dbp; 672 rp = ((BTREE *)dbp->internal)->recno; 673 674 /* 675 * If database contains fixed-length records, and the record is long, 676 * return EINVAL. 677 */ 678 if (dbt->size > rp->re_len) 679 return (EINVAL); 680 681 /* 682 * The caller checked to see if it was just right, so we know it's 683 * short. Pad it out. We use the record data return memory, it's 684 * only a short-term use. 685 */ 686 if (dbc->rdata.ulen < rp->re_len) { 687 if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { 688 dbc->rdata.ulen = 0; 689 dbc->rdata.data = NULL; 690 return (ret); 691 } 692 dbc->rdata.ulen = rp->re_len; 693 } 694 memcpy(dbc->rdata.data, dbt->data, dbt->size); 695 memset((u_int8_t *)dbc->rdata.data + dbt->size, 696 rp->re_pad, rp->re_len - dbt->size); 697 698 /* 699 * Clean up our flags and other information just in case, and 700 * change the caller's DBT to reference our created record. 701 */ 702 dbc->rdata.size = rp->re_len; 703 dbc->rdata.dlen = 0; 704 dbc->rdata.doff = 0; 705 dbc->rdata.flags = 0; 706 *dbt = dbc->rdata; 707 708 return (0); 709 } 710 711 /* 712 * __bam_partial -- 713 * Build the real record for a partial put. 714 */ 715 static int 716 __bam_partial(dbc, dbt, h, indx, nbytes, flags) 717 DBC *dbc; 718 DBT *dbt; 719 PAGE *h; 720 u_int32_t indx, nbytes, flags; 721 { 722 BKEYDATA *bk, tbk; 723 BOVERFLOW *bo; 724 DB *dbp; 725 DBT copy; 726 u_int32_t len, tlen; 727 u_int8_t *p; 728 int ret; 729 730 COMPQUIET(bo, NULL); 731 732 dbp = dbc->dbp; 733 734 /* We use the record data return memory, it's only a short-term use. */ 735 if (dbc->rdata.ulen < nbytes) { 736 if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) { 737 dbc->rdata.ulen = 0; 738 dbc->rdata.data = NULL; 739 return (ret); 740 } 741 dbc->rdata.ulen = nbytes; 742 } 743 744 /* 745 * We use nul bytes for any part of the record that isn't specified; 746 * get it over with. 747 */ 748 memset(dbc->rdata.data, 0, nbytes); 749 750 /* 751 * In the next clauses, we need to do three things: a) set p to point 752 * to the place at which to copy the user's data, b) set tlen to the 753 * total length of the record, not including the bytes contributed by 754 * the user, and c) copy any valid data from an existing record. 755 */ 756 if (LF_ISSET(BI_NEWKEY)) { 757 tlen = dbt->doff; 758 p = (u_int8_t *)dbc->rdata.data + dbt->doff; 759 goto ucopy; 760 } 761 762 /* Find the current record. */ 763 if (indx < NUM_ENT(h)) { 764 bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 765 bo = (BOVERFLOW *)bk; 766 } else { 767 bk = &tbk; 768 B_TSET(bk->type, B_KEYDATA, 0); 769 bk->len = 0; 770 } 771 if (B_TYPE(bk->type) == B_OVERFLOW) { 772 /* 773 * In the case of an overflow record, we shift things around 774 * in the current record rather than allocate a separate copy. 775 */ 776 memset(©, 0, sizeof(copy)); 777 if ((ret = __db_goff(dbp, ©, bo->tlen, 778 bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) 779 return (ret); 780 781 /* Skip any leading data from the original record. */ 782 tlen = dbt->doff; 783 p = (u_int8_t *)dbc->rdata.data + dbt->doff; 784 785 /* 786 * Copy in any trailing data from the original record. 787 * 788 * If the original record was larger than the original offset 789 * plus the bytes being deleted, there is trailing data in the 790 * original record we need to preserve. If we aren't deleting 791 * the same number of bytes as we're inserting, copy it up or 792 * down, into place. 793 * 794 * Use memmove(), the regions may overlap. 795 */ 796 if (bo->tlen > dbt->doff + dbt->dlen) { 797 len = bo->tlen - (dbt->doff + dbt->dlen); 798 if (dbt->dlen != dbt->size) 799 memmove(p + dbt->size, p + dbt->dlen, len); 800 tlen += len; 801 } 802 } else { 803 /* Copy in any leading data from the original record. */ 804 memcpy(dbc->rdata.data, 805 bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); 806 tlen = dbt->doff; 807 p = (u_int8_t *)dbc->rdata.data + dbt->doff; 808 809 /* Copy in any trailing data from the original record. */ 810 len = dbt->doff + dbt->dlen; 811 if (bk->len > len) { 812 memcpy(p + dbt->size, bk->data + len, bk->len - len); 813 tlen += bk->len - len; 814 } 815 } 816 817 ucopy: /* 818 * Copy in the application provided data -- p and tlen must have been 819 * initialized above. 820 */ 821 memcpy(p, dbt->data, dbt->size); 822 tlen += dbt->size; 823 824 /* Set the DBT to reference our new record. */ 825 dbc->rdata.size = tlen; 826 dbc->rdata.dlen = 0; 827 dbc->rdata.doff = 0; 828 dbc->rdata.flags = 0; 829 *dbt = dbc->rdata; 830 return (0); 831 } 832