1*7c478bd9Sstevel@tonic-gate /*- 2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*7c478bd9Sstevel@tonic-gate */ 7*7c478bd9Sstevel@tonic-gate /* 8*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995, 1996 9*7c478bd9Sstevel@tonic-gate * Keith Bostic. All rights reserved. 10*7c478bd9Sstevel@tonic-gate */ 11*7c478bd9Sstevel@tonic-gate /* 12*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995 13*7c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved. 14*7c478bd9Sstevel@tonic-gate * 15*7c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 16*7c478bd9Sstevel@tonic-gate * Mike Olson. 17*7c478bd9Sstevel@tonic-gate * 18*7c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 19*7c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions 20*7c478bd9Sstevel@tonic-gate * are met: 21*7c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 22*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 23*7c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 24*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 25*7c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 26*7c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 27*7c478bd9Sstevel@tonic-gate * must display the following acknowledgement: 28*7c478bd9Sstevel@tonic-gate * This product includes software developed by the University of 29*7c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors. 30*7c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 31*7c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 32*7c478bd9Sstevel@tonic-gate * without specific prior written permission. 33*7c478bd9Sstevel@tonic-gate * 34*7c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 35*7c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36*7c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37*7c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 38*7c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39*7c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40*7c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41*7c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42*7c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43*7c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44*7c478bd9Sstevel@tonic-gate * SUCH DAMAGE. 45*7c478bd9Sstevel@tonic-gate */ 46*7c478bd9Sstevel@tonic-gate 47*7c478bd9Sstevel@tonic-gate #include "config.h" 48*7c478bd9Sstevel@tonic-gate 49*7c478bd9Sstevel@tonic-gate #ifndef lint 50*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_put.c 10.54 (Sleepycat) 12/6/98"; 51*7c478bd9Sstevel@tonic-gate #endif /* not lint */ 52*7c478bd9Sstevel@tonic-gate 53*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 54*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 55*7c478bd9Sstevel@tonic-gate 56*7c478bd9Sstevel@tonic-gate #include <errno.h> 57*7c478bd9Sstevel@tonic-gate #include <string.h> 58*7c478bd9Sstevel@tonic-gate #endif 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate #include "db_int.h" 61*7c478bd9Sstevel@tonic-gate #include "db_page.h" 62*7c478bd9Sstevel@tonic-gate #include "btree.h" 63*7c478bd9Sstevel@tonic-gate 64*7c478bd9Sstevel@tonic-gate static int __bam_fixed __P((DBC *, DBT *)); 65*7c478bd9Sstevel@tonic-gate static int __bam_ndup __P((DBC *, PAGE *, u_int32_t)); 66*7c478bd9Sstevel@tonic-gate static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *)); 67*7c478bd9Sstevel@tonic-gate static int __bam_partial __P((DBC *, 68*7c478bd9Sstevel@tonic-gate DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t)); 69*7c478bd9Sstevel@tonic-gate static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t)); 70*7c478bd9Sstevel@tonic-gate 71*7c478bd9Sstevel@tonic-gate /* 72*7c478bd9Sstevel@tonic-gate * __bam_iitem -- 73*7c478bd9Sstevel@tonic-gate * Insert an item into the tree. 74*7c478bd9Sstevel@tonic-gate * 75*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_iitem __P((DBC *, 76*7c478bd9Sstevel@tonic-gate * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t)); 77*7c478bd9Sstevel@tonic-gate */ 78*7c478bd9Sstevel@tonic-gate int 79*7c478bd9Sstevel@tonic-gate __bam_iitem(dbc, hp, indxp, key, data, op, flags) 80*7c478bd9Sstevel@tonic-gate DBC *dbc; 81*7c478bd9Sstevel@tonic-gate PAGE **hp; 82*7c478bd9Sstevel@tonic-gate db_indx_t *indxp; 83*7c478bd9Sstevel@tonic-gate DBT *key, *data; 84*7c478bd9Sstevel@tonic-gate u_int32_t op, flags; 85*7c478bd9Sstevel@tonic-gate { 86*7c478bd9Sstevel@tonic-gate BTREE *t; 87*7c478bd9Sstevel@tonic-gate BKEYDATA *bk; 88*7c478bd9Sstevel@tonic-gate DB *dbp; 89*7c478bd9Sstevel@tonic-gate DBT tdbt; 90*7c478bd9Sstevel@tonic-gate PAGE *h; 91*7c478bd9Sstevel@tonic-gate db_indx_t indx, nbytes; 92*7c478bd9Sstevel@tonic-gate u_int32_t data_size, have_bytes, need_bytes, needed; 93*7c478bd9Sstevel@tonic-gate int bigkey, bigdata, dupadjust, replace, ret; 94*7c478bd9Sstevel@tonic-gate 95*7c478bd9Sstevel@tonic-gate COMPQUIET(bk, NULL); 96*7c478bd9Sstevel@tonic-gate 97*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 98*7c478bd9Sstevel@tonic-gate t = dbp->internal; 99*7c478bd9Sstevel@tonic-gate h = *hp; 100*7c478bd9Sstevel@tonic-gate indx = *indxp; 101*7c478bd9Sstevel@tonic-gate dupadjust = replace = 0; 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate /* 104*7c478bd9Sstevel@tonic-gate * If it's a page of duplicates, call the common code to do the work. 105*7c478bd9Sstevel@tonic-gate * 106*7c478bd9Sstevel@tonic-gate * !!! 107*7c478bd9Sstevel@tonic-gate * Here's where the hp and indxp are important. The duplicate code 108*7c478bd9Sstevel@tonic-gate * may decide to rework/rearrange the pages and indices we're using, 109*7c478bd9Sstevel@tonic-gate * so the caller must understand that the page stack may change. 110*7c478bd9Sstevel@tonic-gate */ 111*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_DUPLICATE) { 112*7c478bd9Sstevel@tonic-gate /* Adjust the index for the new item if it's a DB_AFTER op. */ 113*7c478bd9Sstevel@tonic-gate if (op == DB_AFTER) 114*7c478bd9Sstevel@tonic-gate ++*indxp; 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate /* Remove the current item if it's a DB_CURRENT op. */ 117*7c478bd9Sstevel@tonic-gate if (op == DB_CURRENT) { 118*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(*hp, *indxp); 119*7c478bd9Sstevel@tonic-gate switch (B_TYPE(bk->type)) { 120*7c478bd9Sstevel@tonic-gate case B_KEYDATA: 121*7c478bd9Sstevel@tonic-gate nbytes = BKEYDATA_SIZE(bk->len); 122*7c478bd9Sstevel@tonic-gate break; 123*7c478bd9Sstevel@tonic-gate case B_OVERFLOW: 124*7c478bd9Sstevel@tonic-gate nbytes = BOVERFLOW_SIZE; 125*7c478bd9Sstevel@tonic-gate break; 126*7c478bd9Sstevel@tonic-gate default: 127*7c478bd9Sstevel@tonic-gate return (__db_pgfmt(dbp, h->pgno)); 128*7c478bd9Sstevel@tonic-gate } 129*7c478bd9Sstevel@tonic-gate if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0) 130*7c478bd9Sstevel@tonic-gate return (ret); 131*7c478bd9Sstevel@tonic-gate } 132*7c478bd9Sstevel@tonic-gate 133*7c478bd9Sstevel@tonic-gate /* Put the new/replacement item onto the page. */ 134*7c478bd9Sstevel@tonic-gate if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0) 135*7c478bd9Sstevel@tonic-gate return (ret); 136*7c478bd9Sstevel@tonic-gate 137*7c478bd9Sstevel@tonic-gate goto done; 138*7c478bd9Sstevel@tonic-gate } 139*7c478bd9Sstevel@tonic-gate 140*7c478bd9Sstevel@tonic-gate /* Handle fixed-length records: build the real record. */ 141*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) { 142*7c478bd9Sstevel@tonic-gate tdbt = *data; 143*7c478bd9Sstevel@tonic-gate if ((ret = __bam_fixed(dbc, &tdbt)) != 0) 144*7c478bd9Sstevel@tonic-gate return (ret); 145*7c478bd9Sstevel@tonic-gate data = &tdbt; 146*7c478bd9Sstevel@tonic-gate } 147*7c478bd9Sstevel@tonic-gate 148*7c478bd9Sstevel@tonic-gate /* 149*7c478bd9Sstevel@tonic-gate * Figure out how much space the data will take, including if it's a 150*7c478bd9Sstevel@tonic-gate * partial record. If either of the key or data items won't fit on 151*7c478bd9Sstevel@tonic-gate * a page, we'll have to store them on overflow pages. 152*7c478bd9Sstevel@tonic-gate */ 153*7c478bd9Sstevel@tonic-gate bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize; 154*7c478bd9Sstevel@tonic-gate data_size = F_ISSET(data, DB_DBT_PARTIAL) ? 155*7c478bd9Sstevel@tonic-gate __bam_partsize(data, h, indx) : data->size; 156*7c478bd9Sstevel@tonic-gate bigdata = data_size > t->bt_ovflsize; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate needed = 0; 159*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) { 160*7c478bd9Sstevel@tonic-gate /* If BI_NEWKEY is set we're adding a new key and data pair. */ 161*7c478bd9Sstevel@tonic-gate if (bigkey) 162*7c478bd9Sstevel@tonic-gate needed += BOVERFLOW_PSIZE; 163*7c478bd9Sstevel@tonic-gate else 164*7c478bd9Sstevel@tonic-gate needed += BKEYDATA_PSIZE(key->size); 165*7c478bd9Sstevel@tonic-gate if (bigdata) 166*7c478bd9Sstevel@tonic-gate needed += BOVERFLOW_PSIZE; 167*7c478bd9Sstevel@tonic-gate else 168*7c478bd9Sstevel@tonic-gate needed += BKEYDATA_PSIZE(data_size); 169*7c478bd9Sstevel@tonic-gate } else { 170*7c478bd9Sstevel@tonic-gate /* 171*7c478bd9Sstevel@tonic-gate * We're either overwriting the data item of a key/data pair 172*7c478bd9Sstevel@tonic-gate * or we're adding the data item only, i.e. a new duplicate. 173*7c478bd9Sstevel@tonic-gate */ 174*7c478bd9Sstevel@tonic-gate if (op == DB_CURRENT) { 175*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, 176*7c478bd9Sstevel@tonic-gate indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 177*7c478bd9Sstevel@tonic-gate if (B_TYPE(bk->type) == B_KEYDATA) 178*7c478bd9Sstevel@tonic-gate have_bytes = BKEYDATA_PSIZE(bk->len); 179*7c478bd9Sstevel@tonic-gate else 180*7c478bd9Sstevel@tonic-gate have_bytes = BOVERFLOW_PSIZE; 181*7c478bd9Sstevel@tonic-gate need_bytes = 0; 182*7c478bd9Sstevel@tonic-gate } else { 183*7c478bd9Sstevel@tonic-gate have_bytes = 0; 184*7c478bd9Sstevel@tonic-gate need_bytes = sizeof(db_indx_t); 185*7c478bd9Sstevel@tonic-gate } 186*7c478bd9Sstevel@tonic-gate if (bigdata) 187*7c478bd9Sstevel@tonic-gate need_bytes += BOVERFLOW_PSIZE; 188*7c478bd9Sstevel@tonic-gate else 189*7c478bd9Sstevel@tonic-gate need_bytes += BKEYDATA_PSIZE(data_size); 190*7c478bd9Sstevel@tonic-gate 191*7c478bd9Sstevel@tonic-gate if (have_bytes < need_bytes) 192*7c478bd9Sstevel@tonic-gate needed += need_bytes - have_bytes; 193*7c478bd9Sstevel@tonic-gate } 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* 196*7c478bd9Sstevel@tonic-gate * If there's not enough room, or the user has put a ceiling on the 197*7c478bd9Sstevel@tonic-gate * number of keys permitted in the page, split the page. 198*7c478bd9Sstevel@tonic-gate * 199*7c478bd9Sstevel@tonic-gate * XXX 200*7c478bd9Sstevel@tonic-gate * The t->bt_maxkey test here may be insufficient -- do we have to 201*7c478bd9Sstevel@tonic-gate * check in the btree split code, so we don't undo it there!?!? 202*7c478bd9Sstevel@tonic-gate */ 203*7c478bd9Sstevel@tonic-gate if (P_FREESPACE(h) < needed || 204*7c478bd9Sstevel@tonic-gate (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) 205*7c478bd9Sstevel@tonic-gate return (DB_NEEDSPLIT); 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate /* Handle partial puts: build the real record. */ 208*7c478bd9Sstevel@tonic-gate if (F_ISSET(data, DB_DBT_PARTIAL)) { 209*7c478bd9Sstevel@tonic-gate tdbt = *data; 210*7c478bd9Sstevel@tonic-gate if ((ret = __bam_partial(dbc, 211*7c478bd9Sstevel@tonic-gate &tdbt, h, indx, data_size, flags)) != 0) 212*7c478bd9Sstevel@tonic-gate return (ret); 213*7c478bd9Sstevel@tonic-gate data = &tdbt; 214*7c478bd9Sstevel@tonic-gate } 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate /* 217*7c478bd9Sstevel@tonic-gate * The code breaks it up into six cases: 218*7c478bd9Sstevel@tonic-gate * 219*7c478bd9Sstevel@tonic-gate * 1. Append a new key/data pair. 220*7c478bd9Sstevel@tonic-gate * 2. Insert a new key/data pair. 221*7c478bd9Sstevel@tonic-gate * 3. Append a new data item (a new duplicate). 222*7c478bd9Sstevel@tonic-gate * 4. Insert a new data item (a new duplicate). 223*7c478bd9Sstevel@tonic-gate * 5. Overflow item: delete and re-add the data item. 224*7c478bd9Sstevel@tonic-gate * 6. Replace the data item. 225*7c478bd9Sstevel@tonic-gate */ 226*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) { 227*7c478bd9Sstevel@tonic-gate switch (op) { 228*7c478bd9Sstevel@tonic-gate case DB_AFTER: /* 1. Append a new key/data pair. */ 229*7c478bd9Sstevel@tonic-gate indx += 2; 230*7c478bd9Sstevel@tonic-gate *indxp += 2; 231*7c478bd9Sstevel@tonic-gate break; 232*7c478bd9Sstevel@tonic-gate case DB_BEFORE: /* 2. Insert a new key/data pair. */ 233*7c478bd9Sstevel@tonic-gate break; 234*7c478bd9Sstevel@tonic-gate default: 235*7c478bd9Sstevel@tonic-gate return (EINVAL); 236*7c478bd9Sstevel@tonic-gate } 237*7c478bd9Sstevel@tonic-gate 238*7c478bd9Sstevel@tonic-gate /* Add the key. */ 239*7c478bd9Sstevel@tonic-gate if (bigkey) { 240*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ovput(dbc, h, indx, key)) != 0) 241*7c478bd9Sstevel@tonic-gate return (ret); 242*7c478bd9Sstevel@tonic-gate } else 243*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc, h, indx, 244*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(key->size), NULL, key)) != 0) 245*7c478bd9Sstevel@tonic-gate return (ret); 246*7c478bd9Sstevel@tonic-gate ++indx; 247*7c478bd9Sstevel@tonic-gate } else { 248*7c478bd9Sstevel@tonic-gate switch (op) { 249*7c478bd9Sstevel@tonic-gate case DB_AFTER: /* 3. Append a new data item. */ 250*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE) { 251*7c478bd9Sstevel@tonic-gate /* 252*7c478bd9Sstevel@tonic-gate * Adjust the cursor and copy in the key for 253*7c478bd9Sstevel@tonic-gate * the duplicate. 254*7c478bd9Sstevel@tonic-gate */ 255*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjindx(dbc, 256*7c478bd9Sstevel@tonic-gate h, indx + P_INDX, indx, 1)) != 0) 257*7c478bd9Sstevel@tonic-gate return (ret); 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate indx += 3; 260*7c478bd9Sstevel@tonic-gate dupadjust = 1; 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate *indxp += 2; 263*7c478bd9Sstevel@tonic-gate } else { 264*7c478bd9Sstevel@tonic-gate ++indx; 265*7c478bd9Sstevel@tonic-gate __bam_ca_di(dbp, h->pgno, indx, 1); 266*7c478bd9Sstevel@tonic-gate 267*7c478bd9Sstevel@tonic-gate *indxp += 1; 268*7c478bd9Sstevel@tonic-gate } 269*7c478bd9Sstevel@tonic-gate break; 270*7c478bd9Sstevel@tonic-gate case DB_BEFORE: /* 4. Insert a new data item. */ 271*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE) { 272*7c478bd9Sstevel@tonic-gate /* 273*7c478bd9Sstevel@tonic-gate * Adjust the cursor and copy in the key for 274*7c478bd9Sstevel@tonic-gate * the duplicate. 275*7c478bd9Sstevel@tonic-gate */ 276*7c478bd9Sstevel@tonic-gate if ((ret = 277*7c478bd9Sstevel@tonic-gate __bam_adjindx(dbc, h, indx, indx, 1)) != 0) 278*7c478bd9Sstevel@tonic-gate return (ret); 279*7c478bd9Sstevel@tonic-gate 280*7c478bd9Sstevel@tonic-gate ++indx; 281*7c478bd9Sstevel@tonic-gate dupadjust = 1; 282*7c478bd9Sstevel@tonic-gate } else 283*7c478bd9Sstevel@tonic-gate __bam_ca_di(dbp, h->pgno, indx, 1); 284*7c478bd9Sstevel@tonic-gate break; 285*7c478bd9Sstevel@tonic-gate case DB_CURRENT: 286*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE) 287*7c478bd9Sstevel@tonic-gate ++indx; 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate /* 290*7c478bd9Sstevel@tonic-gate * 5. Delete/re-add the data item. 291*7c478bd9Sstevel@tonic-gate * 292*7c478bd9Sstevel@tonic-gate * If we're dealing with offpage items, we have to 293*7c478bd9Sstevel@tonic-gate * delete and then re-add the item. 294*7c478bd9Sstevel@tonic-gate */ 295*7c478bd9Sstevel@tonic-gate if (bigdata || B_TYPE(bk->type) != B_KEYDATA) { 296*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0) 297*7c478bd9Sstevel@tonic-gate return (ret); 298*7c478bd9Sstevel@tonic-gate break; 299*7c478bd9Sstevel@tonic-gate } 300*7c478bd9Sstevel@tonic-gate 301*7c478bd9Sstevel@tonic-gate /* 6. Replace the data item. */ 302*7c478bd9Sstevel@tonic-gate replace = 1; 303*7c478bd9Sstevel@tonic-gate break; 304*7c478bd9Sstevel@tonic-gate default: 305*7c478bd9Sstevel@tonic-gate return (EINVAL); 306*7c478bd9Sstevel@tonic-gate } 307*7c478bd9Sstevel@tonic-gate } 308*7c478bd9Sstevel@tonic-gate 309*7c478bd9Sstevel@tonic-gate /* Add the data. */ 310*7c478bd9Sstevel@tonic-gate if (bigdata) { 311*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ovput(dbc, h, indx, data)) != 0) 312*7c478bd9Sstevel@tonic-gate return (ret); 313*7c478bd9Sstevel@tonic-gate } else { 314*7c478bd9Sstevel@tonic-gate BKEYDATA __bk; 315*7c478bd9Sstevel@tonic-gate DBT __hdr; 316*7c478bd9Sstevel@tonic-gate 317*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_DELETED)) { 318*7c478bd9Sstevel@tonic-gate B_TSET(__bk.type, B_KEYDATA, 1); 319*7c478bd9Sstevel@tonic-gate __bk.len = data->size; 320*7c478bd9Sstevel@tonic-gate __hdr.data = &__bk; 321*7c478bd9Sstevel@tonic-gate __hdr.size = SSZA(BKEYDATA, data); 322*7c478bd9Sstevel@tonic-gate ret = __db_pitem(dbc, h, indx, 323*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(data->size), &__hdr, data); 324*7c478bd9Sstevel@tonic-gate } else if (replace) 325*7c478bd9Sstevel@tonic-gate ret = __bam_ritem(dbc, h, indx, data); 326*7c478bd9Sstevel@tonic-gate else 327*7c478bd9Sstevel@tonic-gate ret = __db_pitem(dbc, h, indx, 328*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(data->size), NULL, data); 329*7c478bd9Sstevel@tonic-gate if (ret != 0) 330*7c478bd9Sstevel@tonic-gate return (ret); 331*7c478bd9Sstevel@tonic-gate } 332*7c478bd9Sstevel@tonic-gate 333*7c478bd9Sstevel@tonic-gate if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0) 334*7c478bd9Sstevel@tonic-gate return (ret); 335*7c478bd9Sstevel@tonic-gate 336*7c478bd9Sstevel@tonic-gate /* 337*7c478bd9Sstevel@tonic-gate * If the page is at least 50% full, and we added a duplicate, see if 338*7c478bd9Sstevel@tonic-gate * that set of duplicates takes up at least 25% of the space. If it 339*7c478bd9Sstevel@tonic-gate * does, move it off onto its own page. 340*7c478bd9Sstevel@tonic-gate */ 341*7c478bd9Sstevel@tonic-gate if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) { 342*7c478bd9Sstevel@tonic-gate --indx; 343*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ndup(dbc, h, indx)) != 0) 344*7c478bd9Sstevel@tonic-gate return (ret); 345*7c478bd9Sstevel@tonic-gate } 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate /* 348*7c478bd9Sstevel@tonic-gate * If we've changed the record count, update the tree. Record counts 349*7c478bd9Sstevel@tonic-gate * need to be updated in recno databases and in btree databases where 350*7c478bd9Sstevel@tonic-gate * we are supporting records. In both cases, adjust the count if the 351*7c478bd9Sstevel@tonic-gate * operation wasn't performed on the current record or when the caller 352*7c478bd9Sstevel@tonic-gate * overrides and wants the adjustment made regardless. 353*7c478bd9Sstevel@tonic-gate */ 354*7c478bd9Sstevel@tonic-gate done: if (LF_ISSET(BI_DOINCR) || 355*7c478bd9Sstevel@tonic-gate (op != DB_CURRENT && 356*7c478bd9Sstevel@tonic-gate (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO))) 357*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjust(dbc, 1)) != 0) 358*7c478bd9Sstevel@tonic-gate return (ret); 359*7c478bd9Sstevel@tonic-gate 360*7c478bd9Sstevel@tonic-gate /* If we've modified a recno file, set the flag */ 361*7c478bd9Sstevel@tonic-gate if (t->recno != NULL) 362*7c478bd9Sstevel@tonic-gate F_SET(t->recno, RECNO_MODIFIED); 363*7c478bd9Sstevel@tonic-gate 364*7c478bd9Sstevel@tonic-gate return (ret); 365*7c478bd9Sstevel@tonic-gate } 366*7c478bd9Sstevel@tonic-gate 367*7c478bd9Sstevel@tonic-gate /* 368*7c478bd9Sstevel@tonic-gate * __bam_partsize -- 369*7c478bd9Sstevel@tonic-gate * Figure out how much space a partial data item is in total. 370*7c478bd9Sstevel@tonic-gate */ 371*7c478bd9Sstevel@tonic-gate static u_int32_t 372*7c478bd9Sstevel@tonic-gate __bam_partsize(data, h, indx) 373*7c478bd9Sstevel@tonic-gate DBT *data; 374*7c478bd9Sstevel@tonic-gate PAGE *h; 375*7c478bd9Sstevel@tonic-gate u_int32_t indx; 376*7c478bd9Sstevel@tonic-gate { 377*7c478bd9Sstevel@tonic-gate BKEYDATA *bk; 378*7c478bd9Sstevel@tonic-gate u_int32_t nbytes; 379*7c478bd9Sstevel@tonic-gate 380*7c478bd9Sstevel@tonic-gate /* 381*7c478bd9Sstevel@tonic-gate * Figure out how much total space we'll need. If the record doesn't 382*7c478bd9Sstevel@tonic-gate * already exist, it's simply the data we're provided. 383*7c478bd9Sstevel@tonic-gate */ 384*7c478bd9Sstevel@tonic-gate if (indx >= NUM_ENT(h)) 385*7c478bd9Sstevel@tonic-gate return (data->doff + data->size); 386*7c478bd9Sstevel@tonic-gate 387*7c478bd9Sstevel@tonic-gate /* 388*7c478bd9Sstevel@tonic-gate * Otherwise, it's the data provided plus any already existing data 389*7c478bd9Sstevel@tonic-gate * that we're not replacing. 390*7c478bd9Sstevel@tonic-gate */ 391*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 392*7c478bd9Sstevel@tonic-gate nbytes = 393*7c478bd9Sstevel@tonic-gate B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len; 394*7c478bd9Sstevel@tonic-gate 395*7c478bd9Sstevel@tonic-gate /* 396*7c478bd9Sstevel@tonic-gate * There are really two cases here: 397*7c478bd9Sstevel@tonic-gate * 398*7c478bd9Sstevel@tonic-gate * Case 1: We are replacing some bytes that do not exist (i.e., they 399*7c478bd9Sstevel@tonic-gate * are past the end of the record). In this case the number of bytes 400*7c478bd9Sstevel@tonic-gate * we are replacing is irrelevant and all we care about is how many 401*7c478bd9Sstevel@tonic-gate * bytes we are going to add from offset. So, the new record length 402*7c478bd9Sstevel@tonic-gate * is going to be the size of the new bytes (size) plus wherever those 403*7c478bd9Sstevel@tonic-gate * new bytes begin (doff). 404*7c478bd9Sstevel@tonic-gate * 405*7c478bd9Sstevel@tonic-gate * Case 2: All the bytes we are replacing exist. Therefore, the new 406*7c478bd9Sstevel@tonic-gate * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) 407*7c478bd9Sstevel@tonic-gate * plus the bytes we are adding (size). 408*7c478bd9Sstevel@tonic-gate */ 409*7c478bd9Sstevel@tonic-gate if (nbytes < data->doff + data->dlen) /* Case 1 */ 410*7c478bd9Sstevel@tonic-gate return (data->doff + data->size); 411*7c478bd9Sstevel@tonic-gate 412*7c478bd9Sstevel@tonic-gate return (nbytes + data->size - data->dlen); /* Case 2 */ 413*7c478bd9Sstevel@tonic-gate } 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate /* 416*7c478bd9Sstevel@tonic-gate * OVPUT -- 417*7c478bd9Sstevel@tonic-gate * Copy an overflow item onto a page. 418*7c478bd9Sstevel@tonic-gate */ 419*7c478bd9Sstevel@tonic-gate #undef OVPUT 420*7c478bd9Sstevel@tonic-gate #define OVPUT(h, indx, bo) do { \ 421*7c478bd9Sstevel@tonic-gate DBT __hdr; \ 422*7c478bd9Sstevel@tonic-gate memset(&__hdr, 0, sizeof(__hdr)); \ 423*7c478bd9Sstevel@tonic-gate __hdr.data = &bo; \ 424*7c478bd9Sstevel@tonic-gate __hdr.size = BOVERFLOW_SIZE; \ 425*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc, \ 426*7c478bd9Sstevel@tonic-gate h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \ 427*7c478bd9Sstevel@tonic-gate return (ret); \ 428*7c478bd9Sstevel@tonic-gate } while (0) 429*7c478bd9Sstevel@tonic-gate 430*7c478bd9Sstevel@tonic-gate /* 431*7c478bd9Sstevel@tonic-gate * __bam_ovput -- 432*7c478bd9Sstevel@tonic-gate * Build an overflow item and put it on the page. 433*7c478bd9Sstevel@tonic-gate */ 434*7c478bd9Sstevel@tonic-gate static int 435*7c478bd9Sstevel@tonic-gate __bam_ovput(dbc, h, indx, item) 436*7c478bd9Sstevel@tonic-gate DBC *dbc; 437*7c478bd9Sstevel@tonic-gate PAGE *h; 438*7c478bd9Sstevel@tonic-gate u_int32_t indx; 439*7c478bd9Sstevel@tonic-gate DBT *item; 440*7c478bd9Sstevel@tonic-gate { 441*7c478bd9Sstevel@tonic-gate BOVERFLOW bo; 442*7c478bd9Sstevel@tonic-gate int ret; 443*7c478bd9Sstevel@tonic-gate 444*7c478bd9Sstevel@tonic-gate UMRW(bo.unused1); 445*7c478bd9Sstevel@tonic-gate B_TSET(bo.type, B_OVERFLOW, 0); 446*7c478bd9Sstevel@tonic-gate UMRW(bo.unused2); 447*7c478bd9Sstevel@tonic-gate if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0) 448*7c478bd9Sstevel@tonic-gate return (ret); 449*7c478bd9Sstevel@tonic-gate bo.tlen = item->size; 450*7c478bd9Sstevel@tonic-gate 451*7c478bd9Sstevel@tonic-gate OVPUT(h, indx, bo); 452*7c478bd9Sstevel@tonic-gate 453*7c478bd9Sstevel@tonic-gate return (0); 454*7c478bd9Sstevel@tonic-gate } 455*7c478bd9Sstevel@tonic-gate 456*7c478bd9Sstevel@tonic-gate /* 457*7c478bd9Sstevel@tonic-gate * __bam_ritem -- 458*7c478bd9Sstevel@tonic-gate * Replace an item on a page. 459*7c478bd9Sstevel@tonic-gate * 460*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *)); 461*7c478bd9Sstevel@tonic-gate */ 462*7c478bd9Sstevel@tonic-gate int 463*7c478bd9Sstevel@tonic-gate __bam_ritem(dbc, h, indx, data) 464*7c478bd9Sstevel@tonic-gate DBC *dbc; 465*7c478bd9Sstevel@tonic-gate PAGE *h; 466*7c478bd9Sstevel@tonic-gate u_int32_t indx; 467*7c478bd9Sstevel@tonic-gate DBT *data; 468*7c478bd9Sstevel@tonic-gate { 469*7c478bd9Sstevel@tonic-gate BKEYDATA *bk; 470*7c478bd9Sstevel@tonic-gate DB *dbp; 471*7c478bd9Sstevel@tonic-gate DBT orig, repl; 472*7c478bd9Sstevel@tonic-gate db_indx_t cnt, lo, ln, min, off, prefix, suffix; 473*7c478bd9Sstevel@tonic-gate int32_t nbytes; 474*7c478bd9Sstevel@tonic-gate int ret; 475*7c478bd9Sstevel@tonic-gate u_int8_t *p, *t; 476*7c478bd9Sstevel@tonic-gate 477*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 478*7c478bd9Sstevel@tonic-gate 479*7c478bd9Sstevel@tonic-gate /* 480*7c478bd9Sstevel@tonic-gate * Replace a single item onto a page. The logic figuring out where 481*7c478bd9Sstevel@tonic-gate * to insert and whether it fits is handled in the caller. All we do 482*7c478bd9Sstevel@tonic-gate * here is manage the page shuffling. 483*7c478bd9Sstevel@tonic-gate */ 484*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx); 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate /* Log the change. */ 487*7c478bd9Sstevel@tonic-gate if (DB_LOGGING(dbc)) { 488*7c478bd9Sstevel@tonic-gate /* 489*7c478bd9Sstevel@tonic-gate * We might as well check to see if the two data items share 490*7c478bd9Sstevel@tonic-gate * a common prefix and suffix -- it can save us a lot of log 491*7c478bd9Sstevel@tonic-gate * message if they're large. 492*7c478bd9Sstevel@tonic-gate */ 493*7c478bd9Sstevel@tonic-gate min = data->size < bk->len ? data->size : bk->len; 494*7c478bd9Sstevel@tonic-gate for (prefix = 0, 495*7c478bd9Sstevel@tonic-gate p = bk->data, t = data->data; 496*7c478bd9Sstevel@tonic-gate prefix < min && *p == *t; ++prefix, ++p, ++t) 497*7c478bd9Sstevel@tonic-gate ; 498*7c478bd9Sstevel@tonic-gate 499*7c478bd9Sstevel@tonic-gate min -= prefix; 500*7c478bd9Sstevel@tonic-gate for (suffix = 0, 501*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)bk->data + bk->len - 1, 502*7c478bd9Sstevel@tonic-gate t = (u_int8_t *)data->data + data->size - 1; 503*7c478bd9Sstevel@tonic-gate suffix < min && *p == *t; ++suffix, --p, --t) 504*7c478bd9Sstevel@tonic-gate ; 505*7c478bd9Sstevel@tonic-gate 506*7c478bd9Sstevel@tonic-gate /* We only log the parts of the keys that have changed. */ 507*7c478bd9Sstevel@tonic-gate orig.data = (u_int8_t *)bk->data + prefix; 508*7c478bd9Sstevel@tonic-gate orig.size = bk->len - (prefix + suffix); 509*7c478bd9Sstevel@tonic-gate repl.data = (u_int8_t *)data->data + prefix; 510*7c478bd9Sstevel@tonic-gate repl.size = data->size - (prefix + suffix); 511*7c478bd9Sstevel@tonic-gate if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn, 512*7c478bd9Sstevel@tonic-gate &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h), 513*7c478bd9Sstevel@tonic-gate (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type), 514*7c478bd9Sstevel@tonic-gate &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0) 515*7c478bd9Sstevel@tonic-gate return (ret); 516*7c478bd9Sstevel@tonic-gate } 517*7c478bd9Sstevel@tonic-gate 518*7c478bd9Sstevel@tonic-gate /* 519*7c478bd9Sstevel@tonic-gate * Set references to the first in-use byte on the page and the 520*7c478bd9Sstevel@tonic-gate * first byte of the item being replaced. 521*7c478bd9Sstevel@tonic-gate */ 522*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)h + HOFFSET(h); 523*7c478bd9Sstevel@tonic-gate t = (u_int8_t *)bk; 524*7c478bd9Sstevel@tonic-gate 525*7c478bd9Sstevel@tonic-gate /* 526*7c478bd9Sstevel@tonic-gate * If the entry is growing in size, shift the beginning of the data 527*7c478bd9Sstevel@tonic-gate * part of the page down. If the entry is shrinking in size, shift 528*7c478bd9Sstevel@tonic-gate * the beginning of the data part of the page up. Use memmove(3), 529*7c478bd9Sstevel@tonic-gate * the regions overlap. 530*7c478bd9Sstevel@tonic-gate */ 531*7c478bd9Sstevel@tonic-gate lo = BKEYDATA_SIZE(bk->len); 532*7c478bd9Sstevel@tonic-gate ln = BKEYDATA_SIZE(data->size); 533*7c478bd9Sstevel@tonic-gate if (lo != ln) { 534*7c478bd9Sstevel@tonic-gate nbytes = lo - ln; /* Signed difference. */ 535*7c478bd9Sstevel@tonic-gate if (p == t) /* First index is fast. */ 536*7c478bd9Sstevel@tonic-gate h->inp[indx] += nbytes; 537*7c478bd9Sstevel@tonic-gate else { /* Else, shift the page. */ 538*7c478bd9Sstevel@tonic-gate memmove(p + nbytes, p, t - p); 539*7c478bd9Sstevel@tonic-gate 540*7c478bd9Sstevel@tonic-gate /* Adjust the indices' offsets. */ 541*7c478bd9Sstevel@tonic-gate off = h->inp[indx]; 542*7c478bd9Sstevel@tonic-gate for (cnt = 0; cnt < NUM_ENT(h); ++cnt) 543*7c478bd9Sstevel@tonic-gate if (h->inp[cnt] <= off) 544*7c478bd9Sstevel@tonic-gate h->inp[cnt] += nbytes; 545*7c478bd9Sstevel@tonic-gate } 546*7c478bd9Sstevel@tonic-gate 547*7c478bd9Sstevel@tonic-gate /* Clean up the page and adjust the item's reference. */ 548*7c478bd9Sstevel@tonic-gate HOFFSET(h) += nbytes; 549*7c478bd9Sstevel@tonic-gate t += nbytes; 550*7c478bd9Sstevel@tonic-gate } 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate /* Copy the new item onto the page. */ 553*7c478bd9Sstevel@tonic-gate bk = (BKEYDATA *)t; 554*7c478bd9Sstevel@tonic-gate B_TSET(bk->type, B_KEYDATA, 0); 555*7c478bd9Sstevel@tonic-gate bk->len = data->size; 556*7c478bd9Sstevel@tonic-gate memcpy(bk->data, data->data, data->size); 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate return (0); 559*7c478bd9Sstevel@tonic-gate } 560*7c478bd9Sstevel@tonic-gate 561*7c478bd9Sstevel@tonic-gate /* 562*7c478bd9Sstevel@tonic-gate * __bam_ndup -- 563*7c478bd9Sstevel@tonic-gate * Check to see if the duplicate set at indx should have its own page. 564*7c478bd9Sstevel@tonic-gate * If it should, create it. 565*7c478bd9Sstevel@tonic-gate */ 566*7c478bd9Sstevel@tonic-gate static int 567*7c478bd9Sstevel@tonic-gate __bam_ndup(dbc, h, indx) 568*7c478bd9Sstevel@tonic-gate DBC *dbc; 569*7c478bd9Sstevel@tonic-gate PAGE *h; 570*7c478bd9Sstevel@tonic-gate u_int32_t indx; 571*7c478bd9Sstevel@tonic-gate { 572*7c478bd9Sstevel@tonic-gate BKEYDATA *bk; 573*7c478bd9Sstevel@tonic-gate BOVERFLOW bo; 574*7c478bd9Sstevel@tonic-gate DB *dbp; 575*7c478bd9Sstevel@tonic-gate DBT hdr; 576*7c478bd9Sstevel@tonic-gate PAGE *cp; 577*7c478bd9Sstevel@tonic-gate db_indx_t cnt, cpindx, first, sz; 578*7c478bd9Sstevel@tonic-gate int ret; 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX]) 583*7c478bd9Sstevel@tonic-gate indx -= P_INDX; 584*7c478bd9Sstevel@tonic-gate for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) { 585*7c478bd9Sstevel@tonic-gate if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx]) 586*7c478bd9Sstevel@tonic-gate break; 587*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx); 588*7c478bd9Sstevel@tonic-gate sz += B_TYPE(bk->type) == B_KEYDATA ? 589*7c478bd9Sstevel@tonic-gate BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; 590*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + O_INDX); 591*7c478bd9Sstevel@tonic-gate sz += B_TYPE(bk->type) == B_KEYDATA ? 592*7c478bd9Sstevel@tonic-gate BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE; 593*7c478bd9Sstevel@tonic-gate } 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate /* 596*7c478bd9Sstevel@tonic-gate * If this set of duplicates is using more than 25% of the page, move 597*7c478bd9Sstevel@tonic-gate * them off. The choice of 25% is a WAG, but it has to be small enough 598*7c478bd9Sstevel@tonic-gate * that we can always split regardless of the presence of duplicates. 599*7c478bd9Sstevel@tonic-gate */ 600*7c478bd9Sstevel@tonic-gate if (sz < dbp->pgsize / 4) 601*7c478bd9Sstevel@tonic-gate return (0); 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate /* Get a new page. */ 604*7c478bd9Sstevel@tonic-gate if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0) 605*7c478bd9Sstevel@tonic-gate return (ret); 606*7c478bd9Sstevel@tonic-gate 607*7c478bd9Sstevel@tonic-gate /* 608*7c478bd9Sstevel@tonic-gate * Move this set of duplicates off the page. First points to the first 609*7c478bd9Sstevel@tonic-gate * key of the first duplicate key/data pair, cnt is the number of pairs 610*7c478bd9Sstevel@tonic-gate * we're dealing with. 611*7c478bd9Sstevel@tonic-gate */ 612*7c478bd9Sstevel@tonic-gate memset(&hdr, 0, sizeof(hdr)); 613*7c478bd9Sstevel@tonic-gate for (indx = first + O_INDX, cpindx = 0;; ++cpindx) { 614*7c478bd9Sstevel@tonic-gate /* Copy the entry to the new page. */ 615*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx); 616*7c478bd9Sstevel@tonic-gate hdr.data = bk; 617*7c478bd9Sstevel@tonic-gate hdr.size = B_TYPE(bk->type) == B_KEYDATA ? 618*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE; 619*7c478bd9Sstevel@tonic-gate if ((ret = 620*7c478bd9Sstevel@tonic-gate __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0) 621*7c478bd9Sstevel@tonic-gate goto err; 622*7c478bd9Sstevel@tonic-gate 623*7c478bd9Sstevel@tonic-gate /* 624*7c478bd9Sstevel@tonic-gate * Move cursors referencing the old entry to the new entry. 625*7c478bd9Sstevel@tonic-gate * Done after the page put because __db_pitem() adjusts 626*7c478bd9Sstevel@tonic-gate * cursors on the new page, and before the delete because 627*7c478bd9Sstevel@tonic-gate * __db_ditem adjusts cursors on the old page. 628*7c478bd9Sstevel@tonic-gate */ 629*7c478bd9Sstevel@tonic-gate __bam_ca_dup(dbp, 630*7c478bd9Sstevel@tonic-gate PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx); 631*7c478bd9Sstevel@tonic-gate 632*7c478bd9Sstevel@tonic-gate /* Delete the data item. */ 633*7c478bd9Sstevel@tonic-gate if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0) 634*7c478bd9Sstevel@tonic-gate goto err; 635*7c478bd9Sstevel@tonic-gate 636*7c478bd9Sstevel@tonic-gate /* Delete all but the first reference to the key. */ 637*7c478bd9Sstevel@tonic-gate if (--cnt == 0) 638*7c478bd9Sstevel@tonic-gate break; 639*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0) 640*7c478bd9Sstevel@tonic-gate goto err; 641*7c478bd9Sstevel@tonic-gate } 642*7c478bd9Sstevel@tonic-gate 643*7c478bd9Sstevel@tonic-gate /* Put in a new data item that points to the duplicates page. */ 644*7c478bd9Sstevel@tonic-gate UMRW(bo.unused1); 645*7c478bd9Sstevel@tonic-gate B_TSET(bo.type, B_DUPLICATE, 0); 646*7c478bd9Sstevel@tonic-gate UMRW(bo.unused2); 647*7c478bd9Sstevel@tonic-gate bo.pgno = cp->pgno; 648*7c478bd9Sstevel@tonic-gate bo.tlen = 0; 649*7c478bd9Sstevel@tonic-gate 650*7c478bd9Sstevel@tonic-gate OVPUT(h, indx, bo); 651*7c478bd9Sstevel@tonic-gate 652*7c478bd9Sstevel@tonic-gate return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY)); 653*7c478bd9Sstevel@tonic-gate 654*7c478bd9Sstevel@tonic-gate err: (void)__bam_free(dbc, cp); 655*7c478bd9Sstevel@tonic-gate return (ret); 656*7c478bd9Sstevel@tonic-gate } 657*7c478bd9Sstevel@tonic-gate 658*7c478bd9Sstevel@tonic-gate /* 659*7c478bd9Sstevel@tonic-gate * __bam_fixed -- 660*7c478bd9Sstevel@tonic-gate * Build the real record for a fixed length put. 661*7c478bd9Sstevel@tonic-gate */ 662*7c478bd9Sstevel@tonic-gate static int 663*7c478bd9Sstevel@tonic-gate __bam_fixed(dbc, dbt) 664*7c478bd9Sstevel@tonic-gate DBC *dbc; 665*7c478bd9Sstevel@tonic-gate DBT *dbt; 666*7c478bd9Sstevel@tonic-gate { 667*7c478bd9Sstevel@tonic-gate DB *dbp; 668*7c478bd9Sstevel@tonic-gate RECNO *rp; 669*7c478bd9Sstevel@tonic-gate int ret; 670*7c478bd9Sstevel@tonic-gate 671*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 672*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno; 673*7c478bd9Sstevel@tonic-gate 674*7c478bd9Sstevel@tonic-gate /* 675*7c478bd9Sstevel@tonic-gate * If database contains fixed-length records, and the record is long, 676*7c478bd9Sstevel@tonic-gate * return EINVAL. 677*7c478bd9Sstevel@tonic-gate */ 678*7c478bd9Sstevel@tonic-gate if (dbt->size > rp->re_len) 679*7c478bd9Sstevel@tonic-gate return (EINVAL); 680*7c478bd9Sstevel@tonic-gate 681*7c478bd9Sstevel@tonic-gate /* 682*7c478bd9Sstevel@tonic-gate * The caller checked to see if it was just right, so we know it's 683*7c478bd9Sstevel@tonic-gate * short. Pad it out. We use the record data return memory, it's 684*7c478bd9Sstevel@tonic-gate * only a short-term use. 685*7c478bd9Sstevel@tonic-gate */ 686*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) { 687*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { 688*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0; 689*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL; 690*7c478bd9Sstevel@tonic-gate return (ret); 691*7c478bd9Sstevel@tonic-gate } 692*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len; 693*7c478bd9Sstevel@tonic-gate } 694*7c478bd9Sstevel@tonic-gate memcpy(dbc->rdata.data, dbt->data, dbt->size); 695*7c478bd9Sstevel@tonic-gate memset((u_int8_t *)dbc->rdata.data + dbt->size, 696*7c478bd9Sstevel@tonic-gate rp->re_pad, rp->re_len - dbt->size); 697*7c478bd9Sstevel@tonic-gate 698*7c478bd9Sstevel@tonic-gate /* 699*7c478bd9Sstevel@tonic-gate * Clean up our flags and other information just in case, and 700*7c478bd9Sstevel@tonic-gate * change the caller's DBT to reference our created record. 701*7c478bd9Sstevel@tonic-gate */ 702*7c478bd9Sstevel@tonic-gate dbc->rdata.size = rp->re_len; 703*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0; 704*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0; 705*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0; 706*7c478bd9Sstevel@tonic-gate *dbt = dbc->rdata; 707*7c478bd9Sstevel@tonic-gate 708*7c478bd9Sstevel@tonic-gate return (0); 709*7c478bd9Sstevel@tonic-gate } 710*7c478bd9Sstevel@tonic-gate 711*7c478bd9Sstevel@tonic-gate /* 712*7c478bd9Sstevel@tonic-gate * __bam_partial -- 713*7c478bd9Sstevel@tonic-gate * Build the real record for a partial put. 714*7c478bd9Sstevel@tonic-gate */ 715*7c478bd9Sstevel@tonic-gate static int 716*7c478bd9Sstevel@tonic-gate __bam_partial(dbc, dbt, h, indx, nbytes, flags) 717*7c478bd9Sstevel@tonic-gate DBC *dbc; 718*7c478bd9Sstevel@tonic-gate DBT *dbt; 719*7c478bd9Sstevel@tonic-gate PAGE *h; 720*7c478bd9Sstevel@tonic-gate u_int32_t indx, nbytes, flags; 721*7c478bd9Sstevel@tonic-gate { 722*7c478bd9Sstevel@tonic-gate BKEYDATA *bk, tbk; 723*7c478bd9Sstevel@tonic-gate BOVERFLOW *bo; 724*7c478bd9Sstevel@tonic-gate DB *dbp; 725*7c478bd9Sstevel@tonic-gate DBT copy; 726*7c478bd9Sstevel@tonic-gate u_int32_t len, tlen; 727*7c478bd9Sstevel@tonic-gate u_int8_t *p; 728*7c478bd9Sstevel@tonic-gate int ret; 729*7c478bd9Sstevel@tonic-gate 730*7c478bd9Sstevel@tonic-gate COMPQUIET(bo, NULL); 731*7c478bd9Sstevel@tonic-gate 732*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 733*7c478bd9Sstevel@tonic-gate 734*7c478bd9Sstevel@tonic-gate /* We use the record data return memory, it's only a short-term use. */ 735*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < nbytes) { 736*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) { 737*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0; 738*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL; 739*7c478bd9Sstevel@tonic-gate return (ret); 740*7c478bd9Sstevel@tonic-gate } 741*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = nbytes; 742*7c478bd9Sstevel@tonic-gate } 743*7c478bd9Sstevel@tonic-gate 744*7c478bd9Sstevel@tonic-gate /* 745*7c478bd9Sstevel@tonic-gate * We use nul bytes for any part of the record that isn't specified; 746*7c478bd9Sstevel@tonic-gate * get it over with. 747*7c478bd9Sstevel@tonic-gate */ 748*7c478bd9Sstevel@tonic-gate memset(dbc->rdata.data, 0, nbytes); 749*7c478bd9Sstevel@tonic-gate 750*7c478bd9Sstevel@tonic-gate /* 751*7c478bd9Sstevel@tonic-gate * In the next clauses, we need to do three things: a) set p to point 752*7c478bd9Sstevel@tonic-gate * to the place at which to copy the user's data, b) set tlen to the 753*7c478bd9Sstevel@tonic-gate * total length of the record, not including the bytes contributed by 754*7c478bd9Sstevel@tonic-gate * the user, and c) copy any valid data from an existing record. 755*7c478bd9Sstevel@tonic-gate */ 756*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) { 757*7c478bd9Sstevel@tonic-gate tlen = dbt->doff; 758*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff; 759*7c478bd9Sstevel@tonic-gate goto ucopy; 760*7c478bd9Sstevel@tonic-gate } 761*7c478bd9Sstevel@tonic-gate 762*7c478bd9Sstevel@tonic-gate /* Find the current record. */ 763*7c478bd9Sstevel@tonic-gate if (indx < NUM_ENT(h)) { 764*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0)); 765*7c478bd9Sstevel@tonic-gate bo = (BOVERFLOW *)bk; 766*7c478bd9Sstevel@tonic-gate } else { 767*7c478bd9Sstevel@tonic-gate bk = &tbk; 768*7c478bd9Sstevel@tonic-gate B_TSET(bk->type, B_KEYDATA, 0); 769*7c478bd9Sstevel@tonic-gate bk->len = 0; 770*7c478bd9Sstevel@tonic-gate } 771*7c478bd9Sstevel@tonic-gate if (B_TYPE(bk->type) == B_OVERFLOW) { 772*7c478bd9Sstevel@tonic-gate /* 773*7c478bd9Sstevel@tonic-gate * In the case of an overflow record, we shift things around 774*7c478bd9Sstevel@tonic-gate * in the current record rather than allocate a separate copy. 775*7c478bd9Sstevel@tonic-gate */ 776*7c478bd9Sstevel@tonic-gate memset(©, 0, sizeof(copy)); 777*7c478bd9Sstevel@tonic-gate if ((ret = __db_goff(dbp, ©, bo->tlen, 778*7c478bd9Sstevel@tonic-gate bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) 779*7c478bd9Sstevel@tonic-gate return (ret); 780*7c478bd9Sstevel@tonic-gate 781*7c478bd9Sstevel@tonic-gate /* Skip any leading data from the original record. */ 782*7c478bd9Sstevel@tonic-gate tlen = dbt->doff; 783*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff; 784*7c478bd9Sstevel@tonic-gate 785*7c478bd9Sstevel@tonic-gate /* 786*7c478bd9Sstevel@tonic-gate * Copy in any trailing data from the original record. 787*7c478bd9Sstevel@tonic-gate * 788*7c478bd9Sstevel@tonic-gate * If the original record was larger than the original offset 789*7c478bd9Sstevel@tonic-gate * plus the bytes being deleted, there is trailing data in the 790*7c478bd9Sstevel@tonic-gate * original record we need to preserve. If we aren't deleting 791*7c478bd9Sstevel@tonic-gate * the same number of bytes as we're inserting, copy it up or 792*7c478bd9Sstevel@tonic-gate * down, into place. 793*7c478bd9Sstevel@tonic-gate * 794*7c478bd9Sstevel@tonic-gate * Use memmove(), the regions may overlap. 795*7c478bd9Sstevel@tonic-gate */ 796*7c478bd9Sstevel@tonic-gate if (bo->tlen > dbt->doff + dbt->dlen) { 797*7c478bd9Sstevel@tonic-gate len = bo->tlen - (dbt->doff + dbt->dlen); 798*7c478bd9Sstevel@tonic-gate if (dbt->dlen != dbt->size) 799*7c478bd9Sstevel@tonic-gate memmove(p + dbt->size, p + dbt->dlen, len); 800*7c478bd9Sstevel@tonic-gate tlen += len; 801*7c478bd9Sstevel@tonic-gate } 802*7c478bd9Sstevel@tonic-gate } else { 803*7c478bd9Sstevel@tonic-gate /* Copy in any leading data from the original record. */ 804*7c478bd9Sstevel@tonic-gate memcpy(dbc->rdata.data, 805*7c478bd9Sstevel@tonic-gate bk->data, dbt->doff > bk->len ? bk->len : dbt->doff); 806*7c478bd9Sstevel@tonic-gate tlen = dbt->doff; 807*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff; 808*7c478bd9Sstevel@tonic-gate 809*7c478bd9Sstevel@tonic-gate /* Copy in any trailing data from the original record. */ 810*7c478bd9Sstevel@tonic-gate len = dbt->doff + dbt->dlen; 811*7c478bd9Sstevel@tonic-gate if (bk->len > len) { 812*7c478bd9Sstevel@tonic-gate memcpy(p + dbt->size, bk->data + len, bk->len - len); 813*7c478bd9Sstevel@tonic-gate tlen += bk->len - len; 814*7c478bd9Sstevel@tonic-gate } 815*7c478bd9Sstevel@tonic-gate } 816*7c478bd9Sstevel@tonic-gate 817*7c478bd9Sstevel@tonic-gate ucopy: /* 818*7c478bd9Sstevel@tonic-gate * Copy in the application provided data -- p and tlen must have been 819*7c478bd9Sstevel@tonic-gate * initialized above. 820*7c478bd9Sstevel@tonic-gate */ 821*7c478bd9Sstevel@tonic-gate memcpy(p, dbt->data, dbt->size); 822*7c478bd9Sstevel@tonic-gate tlen += dbt->size; 823*7c478bd9Sstevel@tonic-gate 824*7c478bd9Sstevel@tonic-gate /* Set the DBT to reference our new record. */ 825*7c478bd9Sstevel@tonic-gate dbc->rdata.size = tlen; 826*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0; 827*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0; 828*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0; 829*7c478bd9Sstevel@tonic-gate *dbt = dbc->rdata; 830*7c478bd9Sstevel@tonic-gate return (0); 831*7c478bd9Sstevel@tonic-gate } 832