1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*7c478bd9Sstevel@tonic-gate */
7*7c478bd9Sstevel@tonic-gate /*
8*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995, 1996
9*7c478bd9Sstevel@tonic-gate * Keith Bostic. All rights reserved.
10*7c478bd9Sstevel@tonic-gate */
11*7c478bd9Sstevel@tonic-gate /*
12*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994, 1995
13*7c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved.
14*7c478bd9Sstevel@tonic-gate *
15*7c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by
16*7c478bd9Sstevel@tonic-gate * Mike Olson.
17*7c478bd9Sstevel@tonic-gate *
18*7c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
19*7c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions
20*7c478bd9Sstevel@tonic-gate * are met:
21*7c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
22*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer.
23*7c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
24*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the
25*7c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution.
26*7c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
27*7c478bd9Sstevel@tonic-gate * must display the following acknowledgement:
28*7c478bd9Sstevel@tonic-gate * This product includes software developed by the University of
29*7c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors.
30*7c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors
31*7c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software
32*7c478bd9Sstevel@tonic-gate * without specific prior written permission.
33*7c478bd9Sstevel@tonic-gate *
34*7c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*7c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*7c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*7c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*7c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*7c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*7c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*7c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*7c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*7c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*7c478bd9Sstevel@tonic-gate * SUCH DAMAGE.
45*7c478bd9Sstevel@tonic-gate */
46*7c478bd9Sstevel@tonic-gate
47*7c478bd9Sstevel@tonic-gate #include "config.h"
48*7c478bd9Sstevel@tonic-gate
49*7c478bd9Sstevel@tonic-gate #ifndef lint
50*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_put.c 10.54 (Sleepycat) 12/6/98";
51*7c478bd9Sstevel@tonic-gate #endif /* not lint */
52*7c478bd9Sstevel@tonic-gate
53*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
54*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
55*7c478bd9Sstevel@tonic-gate
56*7c478bd9Sstevel@tonic-gate #include <errno.h>
57*7c478bd9Sstevel@tonic-gate #include <string.h>
58*7c478bd9Sstevel@tonic-gate #endif
59*7c478bd9Sstevel@tonic-gate
60*7c478bd9Sstevel@tonic-gate #include "db_int.h"
61*7c478bd9Sstevel@tonic-gate #include "db_page.h"
62*7c478bd9Sstevel@tonic-gate #include "btree.h"
63*7c478bd9Sstevel@tonic-gate
64*7c478bd9Sstevel@tonic-gate static int __bam_fixed __P((DBC *, DBT *));
65*7c478bd9Sstevel@tonic-gate static int __bam_ndup __P((DBC *, PAGE *, u_int32_t));
66*7c478bd9Sstevel@tonic-gate static int __bam_ovput __P((DBC *, PAGE *, u_int32_t, DBT *));
67*7c478bd9Sstevel@tonic-gate static int __bam_partial __P((DBC *,
68*7c478bd9Sstevel@tonic-gate DBT *, PAGE *, u_int32_t, u_int32_t, u_int32_t));
69*7c478bd9Sstevel@tonic-gate static u_int32_t __bam_partsize __P((DBT *, PAGE *, u_int32_t));
70*7c478bd9Sstevel@tonic-gate
71*7c478bd9Sstevel@tonic-gate /*
72*7c478bd9Sstevel@tonic-gate * __bam_iitem --
73*7c478bd9Sstevel@tonic-gate * Insert an item into the tree.
74*7c478bd9Sstevel@tonic-gate *
75*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_iitem __P((DBC *,
76*7c478bd9Sstevel@tonic-gate * PUBLIC: PAGE **, db_indx_t *, DBT *, DBT *, u_int32_t, u_int32_t));
77*7c478bd9Sstevel@tonic-gate */
78*7c478bd9Sstevel@tonic-gate int
__bam_iitem(dbc,hp,indxp,key,data,op,flags)79*7c478bd9Sstevel@tonic-gate __bam_iitem(dbc, hp, indxp, key, data, op, flags)
80*7c478bd9Sstevel@tonic-gate DBC *dbc;
81*7c478bd9Sstevel@tonic-gate PAGE **hp;
82*7c478bd9Sstevel@tonic-gate db_indx_t *indxp;
83*7c478bd9Sstevel@tonic-gate DBT *key, *data;
84*7c478bd9Sstevel@tonic-gate u_int32_t op, flags;
85*7c478bd9Sstevel@tonic-gate {
86*7c478bd9Sstevel@tonic-gate BTREE *t;
87*7c478bd9Sstevel@tonic-gate BKEYDATA *bk;
88*7c478bd9Sstevel@tonic-gate DB *dbp;
89*7c478bd9Sstevel@tonic-gate DBT tdbt;
90*7c478bd9Sstevel@tonic-gate PAGE *h;
91*7c478bd9Sstevel@tonic-gate db_indx_t indx, nbytes;
92*7c478bd9Sstevel@tonic-gate u_int32_t data_size, have_bytes, need_bytes, needed;
93*7c478bd9Sstevel@tonic-gate int bigkey, bigdata, dupadjust, replace, ret;
94*7c478bd9Sstevel@tonic-gate
95*7c478bd9Sstevel@tonic-gate COMPQUIET(bk, NULL);
96*7c478bd9Sstevel@tonic-gate
97*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
98*7c478bd9Sstevel@tonic-gate t = dbp->internal;
99*7c478bd9Sstevel@tonic-gate h = *hp;
100*7c478bd9Sstevel@tonic-gate indx = *indxp;
101*7c478bd9Sstevel@tonic-gate dupadjust = replace = 0;
102*7c478bd9Sstevel@tonic-gate
103*7c478bd9Sstevel@tonic-gate /*
104*7c478bd9Sstevel@tonic-gate * If it's a page of duplicates, call the common code to do the work.
105*7c478bd9Sstevel@tonic-gate *
106*7c478bd9Sstevel@tonic-gate * !!!
107*7c478bd9Sstevel@tonic-gate * Here's where the hp and indxp are important. The duplicate code
108*7c478bd9Sstevel@tonic-gate * may decide to rework/rearrange the pages and indices we're using,
109*7c478bd9Sstevel@tonic-gate * so the caller must understand that the page stack may change.
110*7c478bd9Sstevel@tonic-gate */
111*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_DUPLICATE) {
112*7c478bd9Sstevel@tonic-gate /* Adjust the index for the new item if it's a DB_AFTER op. */
113*7c478bd9Sstevel@tonic-gate if (op == DB_AFTER)
114*7c478bd9Sstevel@tonic-gate ++*indxp;
115*7c478bd9Sstevel@tonic-gate
116*7c478bd9Sstevel@tonic-gate /* Remove the current item if it's a DB_CURRENT op. */
117*7c478bd9Sstevel@tonic-gate if (op == DB_CURRENT) {
118*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(*hp, *indxp);
119*7c478bd9Sstevel@tonic-gate switch (B_TYPE(bk->type)) {
120*7c478bd9Sstevel@tonic-gate case B_KEYDATA:
121*7c478bd9Sstevel@tonic-gate nbytes = BKEYDATA_SIZE(bk->len);
122*7c478bd9Sstevel@tonic-gate break;
123*7c478bd9Sstevel@tonic-gate case B_OVERFLOW:
124*7c478bd9Sstevel@tonic-gate nbytes = BOVERFLOW_SIZE;
125*7c478bd9Sstevel@tonic-gate break;
126*7c478bd9Sstevel@tonic-gate default:
127*7c478bd9Sstevel@tonic-gate return (__db_pgfmt(dbp, h->pgno));
128*7c478bd9Sstevel@tonic-gate }
129*7c478bd9Sstevel@tonic-gate if ((ret = __db_ditem(dbc, *hp, *indxp, nbytes)) != 0)
130*7c478bd9Sstevel@tonic-gate return (ret);
131*7c478bd9Sstevel@tonic-gate }
132*7c478bd9Sstevel@tonic-gate
133*7c478bd9Sstevel@tonic-gate /* Put the new/replacement item onto the page. */
134*7c478bd9Sstevel@tonic-gate if ((ret = __db_dput(dbc, data, hp, indxp, __bam_new)) != 0)
135*7c478bd9Sstevel@tonic-gate return (ret);
136*7c478bd9Sstevel@tonic-gate
137*7c478bd9Sstevel@tonic-gate goto done;
138*7c478bd9Sstevel@tonic-gate }
139*7c478bd9Sstevel@tonic-gate
140*7c478bd9Sstevel@tonic-gate /* Handle fixed-length records: build the real record. */
141*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->recno->re_len) {
142*7c478bd9Sstevel@tonic-gate tdbt = *data;
143*7c478bd9Sstevel@tonic-gate if ((ret = __bam_fixed(dbc, &tdbt)) != 0)
144*7c478bd9Sstevel@tonic-gate return (ret);
145*7c478bd9Sstevel@tonic-gate data = &tdbt;
146*7c478bd9Sstevel@tonic-gate }
147*7c478bd9Sstevel@tonic-gate
148*7c478bd9Sstevel@tonic-gate /*
149*7c478bd9Sstevel@tonic-gate * Figure out how much space the data will take, including if it's a
150*7c478bd9Sstevel@tonic-gate * partial record. If either of the key or data items won't fit on
151*7c478bd9Sstevel@tonic-gate * a page, we'll have to store them on overflow pages.
152*7c478bd9Sstevel@tonic-gate */
153*7c478bd9Sstevel@tonic-gate bigkey = LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize;
154*7c478bd9Sstevel@tonic-gate data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
155*7c478bd9Sstevel@tonic-gate __bam_partsize(data, h, indx) : data->size;
156*7c478bd9Sstevel@tonic-gate bigdata = data_size > t->bt_ovflsize;
157*7c478bd9Sstevel@tonic-gate
158*7c478bd9Sstevel@tonic-gate needed = 0;
159*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) {
160*7c478bd9Sstevel@tonic-gate /* If BI_NEWKEY is set we're adding a new key and data pair. */
161*7c478bd9Sstevel@tonic-gate if (bigkey)
162*7c478bd9Sstevel@tonic-gate needed += BOVERFLOW_PSIZE;
163*7c478bd9Sstevel@tonic-gate else
164*7c478bd9Sstevel@tonic-gate needed += BKEYDATA_PSIZE(key->size);
165*7c478bd9Sstevel@tonic-gate if (bigdata)
166*7c478bd9Sstevel@tonic-gate needed += BOVERFLOW_PSIZE;
167*7c478bd9Sstevel@tonic-gate else
168*7c478bd9Sstevel@tonic-gate needed += BKEYDATA_PSIZE(data_size);
169*7c478bd9Sstevel@tonic-gate } else {
170*7c478bd9Sstevel@tonic-gate /*
171*7c478bd9Sstevel@tonic-gate * We're either overwriting the data item of a key/data pair
172*7c478bd9Sstevel@tonic-gate * or we're adding the data item only, i.e. a new duplicate.
173*7c478bd9Sstevel@tonic-gate */
174*7c478bd9Sstevel@tonic-gate if (op == DB_CURRENT) {
175*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h,
176*7c478bd9Sstevel@tonic-gate indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
177*7c478bd9Sstevel@tonic-gate if (B_TYPE(bk->type) == B_KEYDATA)
178*7c478bd9Sstevel@tonic-gate have_bytes = BKEYDATA_PSIZE(bk->len);
179*7c478bd9Sstevel@tonic-gate else
180*7c478bd9Sstevel@tonic-gate have_bytes = BOVERFLOW_PSIZE;
181*7c478bd9Sstevel@tonic-gate need_bytes = 0;
182*7c478bd9Sstevel@tonic-gate } else {
183*7c478bd9Sstevel@tonic-gate have_bytes = 0;
184*7c478bd9Sstevel@tonic-gate need_bytes = sizeof(db_indx_t);
185*7c478bd9Sstevel@tonic-gate }
186*7c478bd9Sstevel@tonic-gate if (bigdata)
187*7c478bd9Sstevel@tonic-gate need_bytes += BOVERFLOW_PSIZE;
188*7c478bd9Sstevel@tonic-gate else
189*7c478bd9Sstevel@tonic-gate need_bytes += BKEYDATA_PSIZE(data_size);
190*7c478bd9Sstevel@tonic-gate
191*7c478bd9Sstevel@tonic-gate if (have_bytes < need_bytes)
192*7c478bd9Sstevel@tonic-gate needed += need_bytes - have_bytes;
193*7c478bd9Sstevel@tonic-gate }
194*7c478bd9Sstevel@tonic-gate
195*7c478bd9Sstevel@tonic-gate /*
196*7c478bd9Sstevel@tonic-gate * If there's not enough room, or the user has put a ceiling on the
197*7c478bd9Sstevel@tonic-gate * number of keys permitted in the page, split the page.
198*7c478bd9Sstevel@tonic-gate *
199*7c478bd9Sstevel@tonic-gate * XXX
200*7c478bd9Sstevel@tonic-gate * The t->bt_maxkey test here may be insufficient -- do we have to
201*7c478bd9Sstevel@tonic-gate * check in the btree split code, so we don't undo it there!?!?
202*7c478bd9Sstevel@tonic-gate */
203*7c478bd9Sstevel@tonic-gate if (P_FREESPACE(h) < needed ||
204*7c478bd9Sstevel@tonic-gate (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
205*7c478bd9Sstevel@tonic-gate return (DB_NEEDSPLIT);
206*7c478bd9Sstevel@tonic-gate
207*7c478bd9Sstevel@tonic-gate /* Handle partial puts: build the real record. */
208*7c478bd9Sstevel@tonic-gate if (F_ISSET(data, DB_DBT_PARTIAL)) {
209*7c478bd9Sstevel@tonic-gate tdbt = *data;
210*7c478bd9Sstevel@tonic-gate if ((ret = __bam_partial(dbc,
211*7c478bd9Sstevel@tonic-gate &tdbt, h, indx, data_size, flags)) != 0)
212*7c478bd9Sstevel@tonic-gate return (ret);
213*7c478bd9Sstevel@tonic-gate data = &tdbt;
214*7c478bd9Sstevel@tonic-gate }
215*7c478bd9Sstevel@tonic-gate
216*7c478bd9Sstevel@tonic-gate /*
217*7c478bd9Sstevel@tonic-gate * The code breaks it up into six cases:
218*7c478bd9Sstevel@tonic-gate *
219*7c478bd9Sstevel@tonic-gate * 1. Append a new key/data pair.
220*7c478bd9Sstevel@tonic-gate * 2. Insert a new key/data pair.
221*7c478bd9Sstevel@tonic-gate * 3. Append a new data item (a new duplicate).
222*7c478bd9Sstevel@tonic-gate * 4. Insert a new data item (a new duplicate).
223*7c478bd9Sstevel@tonic-gate * 5. Overflow item: delete and re-add the data item.
224*7c478bd9Sstevel@tonic-gate * 6. Replace the data item.
225*7c478bd9Sstevel@tonic-gate */
226*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) {
227*7c478bd9Sstevel@tonic-gate switch (op) {
228*7c478bd9Sstevel@tonic-gate case DB_AFTER: /* 1. Append a new key/data pair. */
229*7c478bd9Sstevel@tonic-gate indx += 2;
230*7c478bd9Sstevel@tonic-gate *indxp += 2;
231*7c478bd9Sstevel@tonic-gate break;
232*7c478bd9Sstevel@tonic-gate case DB_BEFORE: /* 2. Insert a new key/data pair. */
233*7c478bd9Sstevel@tonic-gate break;
234*7c478bd9Sstevel@tonic-gate default:
235*7c478bd9Sstevel@tonic-gate return (EINVAL);
236*7c478bd9Sstevel@tonic-gate }
237*7c478bd9Sstevel@tonic-gate
238*7c478bd9Sstevel@tonic-gate /* Add the key. */
239*7c478bd9Sstevel@tonic-gate if (bigkey) {
240*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ovput(dbc, h, indx, key)) != 0)
241*7c478bd9Sstevel@tonic-gate return (ret);
242*7c478bd9Sstevel@tonic-gate } else
243*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc, h, indx,
244*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(key->size), NULL, key)) != 0)
245*7c478bd9Sstevel@tonic-gate return (ret);
246*7c478bd9Sstevel@tonic-gate ++indx;
247*7c478bd9Sstevel@tonic-gate } else {
248*7c478bd9Sstevel@tonic-gate switch (op) {
249*7c478bd9Sstevel@tonic-gate case DB_AFTER: /* 3. Append a new data item. */
250*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE) {
251*7c478bd9Sstevel@tonic-gate /*
252*7c478bd9Sstevel@tonic-gate * Adjust the cursor and copy in the key for
253*7c478bd9Sstevel@tonic-gate * the duplicate.
254*7c478bd9Sstevel@tonic-gate */
255*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjindx(dbc,
256*7c478bd9Sstevel@tonic-gate h, indx + P_INDX, indx, 1)) != 0)
257*7c478bd9Sstevel@tonic-gate return (ret);
258*7c478bd9Sstevel@tonic-gate
259*7c478bd9Sstevel@tonic-gate indx += 3;
260*7c478bd9Sstevel@tonic-gate dupadjust = 1;
261*7c478bd9Sstevel@tonic-gate
262*7c478bd9Sstevel@tonic-gate *indxp += 2;
263*7c478bd9Sstevel@tonic-gate } else {
264*7c478bd9Sstevel@tonic-gate ++indx;
265*7c478bd9Sstevel@tonic-gate __bam_ca_di(dbp, h->pgno, indx, 1);
266*7c478bd9Sstevel@tonic-gate
267*7c478bd9Sstevel@tonic-gate *indxp += 1;
268*7c478bd9Sstevel@tonic-gate }
269*7c478bd9Sstevel@tonic-gate break;
270*7c478bd9Sstevel@tonic-gate case DB_BEFORE: /* 4. Insert a new data item. */
271*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE) {
272*7c478bd9Sstevel@tonic-gate /*
273*7c478bd9Sstevel@tonic-gate * Adjust the cursor and copy in the key for
274*7c478bd9Sstevel@tonic-gate * the duplicate.
275*7c478bd9Sstevel@tonic-gate */
276*7c478bd9Sstevel@tonic-gate if ((ret =
277*7c478bd9Sstevel@tonic-gate __bam_adjindx(dbc, h, indx, indx, 1)) != 0)
278*7c478bd9Sstevel@tonic-gate return (ret);
279*7c478bd9Sstevel@tonic-gate
280*7c478bd9Sstevel@tonic-gate ++indx;
281*7c478bd9Sstevel@tonic-gate dupadjust = 1;
282*7c478bd9Sstevel@tonic-gate } else
283*7c478bd9Sstevel@tonic-gate __bam_ca_di(dbp, h->pgno, indx, 1);
284*7c478bd9Sstevel@tonic-gate break;
285*7c478bd9Sstevel@tonic-gate case DB_CURRENT:
286*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_LBTREE)
287*7c478bd9Sstevel@tonic-gate ++indx;
288*7c478bd9Sstevel@tonic-gate
289*7c478bd9Sstevel@tonic-gate /*
290*7c478bd9Sstevel@tonic-gate * 5. Delete/re-add the data item.
291*7c478bd9Sstevel@tonic-gate *
292*7c478bd9Sstevel@tonic-gate * If we're dealing with offpage items, we have to
293*7c478bd9Sstevel@tonic-gate * delete and then re-add the item.
294*7c478bd9Sstevel@tonic-gate */
295*7c478bd9Sstevel@tonic-gate if (bigdata || B_TYPE(bk->type) != B_KEYDATA) {
296*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0)
297*7c478bd9Sstevel@tonic-gate return (ret);
298*7c478bd9Sstevel@tonic-gate break;
299*7c478bd9Sstevel@tonic-gate }
300*7c478bd9Sstevel@tonic-gate
301*7c478bd9Sstevel@tonic-gate /* 6. Replace the data item. */
302*7c478bd9Sstevel@tonic-gate replace = 1;
303*7c478bd9Sstevel@tonic-gate break;
304*7c478bd9Sstevel@tonic-gate default:
305*7c478bd9Sstevel@tonic-gate return (EINVAL);
306*7c478bd9Sstevel@tonic-gate }
307*7c478bd9Sstevel@tonic-gate }
308*7c478bd9Sstevel@tonic-gate
309*7c478bd9Sstevel@tonic-gate /* Add the data. */
310*7c478bd9Sstevel@tonic-gate if (bigdata) {
311*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ovput(dbc, h, indx, data)) != 0)
312*7c478bd9Sstevel@tonic-gate return (ret);
313*7c478bd9Sstevel@tonic-gate } else {
314*7c478bd9Sstevel@tonic-gate BKEYDATA __bk;
315*7c478bd9Sstevel@tonic-gate DBT __hdr;
316*7c478bd9Sstevel@tonic-gate
317*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_DELETED)) {
318*7c478bd9Sstevel@tonic-gate B_TSET(__bk.type, B_KEYDATA, 1);
319*7c478bd9Sstevel@tonic-gate __bk.len = data->size;
320*7c478bd9Sstevel@tonic-gate __hdr.data = &__bk;
321*7c478bd9Sstevel@tonic-gate __hdr.size = SSZA(BKEYDATA, data);
322*7c478bd9Sstevel@tonic-gate ret = __db_pitem(dbc, h, indx,
323*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(data->size), &__hdr, data);
324*7c478bd9Sstevel@tonic-gate } else if (replace)
325*7c478bd9Sstevel@tonic-gate ret = __bam_ritem(dbc, h, indx, data);
326*7c478bd9Sstevel@tonic-gate else
327*7c478bd9Sstevel@tonic-gate ret = __db_pitem(dbc, h, indx,
328*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(data->size), NULL, data);
329*7c478bd9Sstevel@tonic-gate if (ret != 0)
330*7c478bd9Sstevel@tonic-gate return (ret);
331*7c478bd9Sstevel@tonic-gate }
332*7c478bd9Sstevel@tonic-gate
333*7c478bd9Sstevel@tonic-gate if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
334*7c478bd9Sstevel@tonic-gate return (ret);
335*7c478bd9Sstevel@tonic-gate
336*7c478bd9Sstevel@tonic-gate /*
337*7c478bd9Sstevel@tonic-gate * If the page is at least 50% full, and we added a duplicate, see if
338*7c478bd9Sstevel@tonic-gate * that set of duplicates takes up at least 25% of the space. If it
339*7c478bd9Sstevel@tonic-gate * does, move it off onto its own page.
340*7c478bd9Sstevel@tonic-gate */
341*7c478bd9Sstevel@tonic-gate if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
342*7c478bd9Sstevel@tonic-gate --indx;
343*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ndup(dbc, h, indx)) != 0)
344*7c478bd9Sstevel@tonic-gate return (ret);
345*7c478bd9Sstevel@tonic-gate }
346*7c478bd9Sstevel@tonic-gate
347*7c478bd9Sstevel@tonic-gate /*
348*7c478bd9Sstevel@tonic-gate * If we've changed the record count, update the tree. Record counts
349*7c478bd9Sstevel@tonic-gate * need to be updated in recno databases and in btree databases where
350*7c478bd9Sstevel@tonic-gate * we are supporting records. In both cases, adjust the count if the
351*7c478bd9Sstevel@tonic-gate * operation wasn't performed on the current record or when the caller
352*7c478bd9Sstevel@tonic-gate * overrides and wants the adjustment made regardless.
353*7c478bd9Sstevel@tonic-gate */
354*7c478bd9Sstevel@tonic-gate done: if (LF_ISSET(BI_DOINCR) ||
355*7c478bd9Sstevel@tonic-gate (op != DB_CURRENT &&
356*7c478bd9Sstevel@tonic-gate (F_ISSET(dbp, DB_BT_RECNUM) || dbp->type == DB_RECNO)))
357*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjust(dbc, 1)) != 0)
358*7c478bd9Sstevel@tonic-gate return (ret);
359*7c478bd9Sstevel@tonic-gate
360*7c478bd9Sstevel@tonic-gate /* If we've modified a recno file, set the flag */
361*7c478bd9Sstevel@tonic-gate if (t->recno != NULL)
362*7c478bd9Sstevel@tonic-gate F_SET(t->recno, RECNO_MODIFIED);
363*7c478bd9Sstevel@tonic-gate
364*7c478bd9Sstevel@tonic-gate return (ret);
365*7c478bd9Sstevel@tonic-gate }
366*7c478bd9Sstevel@tonic-gate
367*7c478bd9Sstevel@tonic-gate /*
368*7c478bd9Sstevel@tonic-gate * __bam_partsize --
369*7c478bd9Sstevel@tonic-gate * Figure out how much space a partial data item is in total.
370*7c478bd9Sstevel@tonic-gate */
371*7c478bd9Sstevel@tonic-gate static u_int32_t
__bam_partsize(data,h,indx)372*7c478bd9Sstevel@tonic-gate __bam_partsize(data, h, indx)
373*7c478bd9Sstevel@tonic-gate DBT *data;
374*7c478bd9Sstevel@tonic-gate PAGE *h;
375*7c478bd9Sstevel@tonic-gate u_int32_t indx;
376*7c478bd9Sstevel@tonic-gate {
377*7c478bd9Sstevel@tonic-gate BKEYDATA *bk;
378*7c478bd9Sstevel@tonic-gate u_int32_t nbytes;
379*7c478bd9Sstevel@tonic-gate
380*7c478bd9Sstevel@tonic-gate /*
381*7c478bd9Sstevel@tonic-gate * Figure out how much total space we'll need. If the record doesn't
382*7c478bd9Sstevel@tonic-gate * already exist, it's simply the data we're provided.
383*7c478bd9Sstevel@tonic-gate */
384*7c478bd9Sstevel@tonic-gate if (indx >= NUM_ENT(h))
385*7c478bd9Sstevel@tonic-gate return (data->doff + data->size);
386*7c478bd9Sstevel@tonic-gate
387*7c478bd9Sstevel@tonic-gate /*
388*7c478bd9Sstevel@tonic-gate * Otherwise, it's the data provided plus any already existing data
389*7c478bd9Sstevel@tonic-gate * that we're not replacing.
390*7c478bd9Sstevel@tonic-gate */
391*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
392*7c478bd9Sstevel@tonic-gate nbytes =
393*7c478bd9Sstevel@tonic-gate B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
394*7c478bd9Sstevel@tonic-gate
395*7c478bd9Sstevel@tonic-gate /*
396*7c478bd9Sstevel@tonic-gate * There are really two cases here:
397*7c478bd9Sstevel@tonic-gate *
398*7c478bd9Sstevel@tonic-gate * Case 1: We are replacing some bytes that do not exist (i.e., they
399*7c478bd9Sstevel@tonic-gate * are past the end of the record). In this case the number of bytes
400*7c478bd9Sstevel@tonic-gate * we are replacing is irrelevant and all we care about is how many
401*7c478bd9Sstevel@tonic-gate * bytes we are going to add from offset. So, the new record length
402*7c478bd9Sstevel@tonic-gate * is going to be the size of the new bytes (size) plus wherever those
403*7c478bd9Sstevel@tonic-gate * new bytes begin (doff).
404*7c478bd9Sstevel@tonic-gate *
405*7c478bd9Sstevel@tonic-gate * Case 2: All the bytes we are replacing exist. Therefore, the new
406*7c478bd9Sstevel@tonic-gate * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
407*7c478bd9Sstevel@tonic-gate * plus the bytes we are adding (size).
408*7c478bd9Sstevel@tonic-gate */
409*7c478bd9Sstevel@tonic-gate if (nbytes < data->doff + data->dlen) /* Case 1 */
410*7c478bd9Sstevel@tonic-gate return (data->doff + data->size);
411*7c478bd9Sstevel@tonic-gate
412*7c478bd9Sstevel@tonic-gate return (nbytes + data->size - data->dlen); /* Case 2 */
413*7c478bd9Sstevel@tonic-gate }
414*7c478bd9Sstevel@tonic-gate
415*7c478bd9Sstevel@tonic-gate /*
416*7c478bd9Sstevel@tonic-gate * OVPUT --
417*7c478bd9Sstevel@tonic-gate * Copy an overflow item onto a page.
418*7c478bd9Sstevel@tonic-gate */
419*7c478bd9Sstevel@tonic-gate #undef OVPUT
420*7c478bd9Sstevel@tonic-gate #define OVPUT(h, indx, bo) do { \
421*7c478bd9Sstevel@tonic-gate DBT __hdr; \
422*7c478bd9Sstevel@tonic-gate memset(&__hdr, 0, sizeof(__hdr)); \
423*7c478bd9Sstevel@tonic-gate __hdr.data = &bo; \
424*7c478bd9Sstevel@tonic-gate __hdr.size = BOVERFLOW_SIZE; \
425*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc, \
426*7c478bd9Sstevel@tonic-gate h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0) \
427*7c478bd9Sstevel@tonic-gate return (ret); \
428*7c478bd9Sstevel@tonic-gate } while (0)
429*7c478bd9Sstevel@tonic-gate
430*7c478bd9Sstevel@tonic-gate /*
431*7c478bd9Sstevel@tonic-gate * __bam_ovput --
432*7c478bd9Sstevel@tonic-gate * Build an overflow item and put it on the page.
433*7c478bd9Sstevel@tonic-gate */
434*7c478bd9Sstevel@tonic-gate static int
__bam_ovput(dbc,h,indx,item)435*7c478bd9Sstevel@tonic-gate __bam_ovput(dbc, h, indx, item)
436*7c478bd9Sstevel@tonic-gate DBC *dbc;
437*7c478bd9Sstevel@tonic-gate PAGE *h;
438*7c478bd9Sstevel@tonic-gate u_int32_t indx;
439*7c478bd9Sstevel@tonic-gate DBT *item;
440*7c478bd9Sstevel@tonic-gate {
441*7c478bd9Sstevel@tonic-gate BOVERFLOW bo;
442*7c478bd9Sstevel@tonic-gate int ret;
443*7c478bd9Sstevel@tonic-gate
444*7c478bd9Sstevel@tonic-gate UMRW(bo.unused1);
445*7c478bd9Sstevel@tonic-gate B_TSET(bo.type, B_OVERFLOW, 0);
446*7c478bd9Sstevel@tonic-gate UMRW(bo.unused2);
447*7c478bd9Sstevel@tonic-gate if ((ret = __db_poff(dbc, item, &bo.pgno, __bam_new)) != 0)
448*7c478bd9Sstevel@tonic-gate return (ret);
449*7c478bd9Sstevel@tonic-gate bo.tlen = item->size;
450*7c478bd9Sstevel@tonic-gate
451*7c478bd9Sstevel@tonic-gate OVPUT(h, indx, bo);
452*7c478bd9Sstevel@tonic-gate
453*7c478bd9Sstevel@tonic-gate return (0);
454*7c478bd9Sstevel@tonic-gate }
455*7c478bd9Sstevel@tonic-gate
456*7c478bd9Sstevel@tonic-gate /*
457*7c478bd9Sstevel@tonic-gate * __bam_ritem --
458*7c478bd9Sstevel@tonic-gate * Replace an item on a page.
459*7c478bd9Sstevel@tonic-gate *
460*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_ritem __P((DBC *, PAGE *, u_int32_t, DBT *));
461*7c478bd9Sstevel@tonic-gate */
462*7c478bd9Sstevel@tonic-gate int
__bam_ritem(dbc,h,indx,data)463*7c478bd9Sstevel@tonic-gate __bam_ritem(dbc, h, indx, data)
464*7c478bd9Sstevel@tonic-gate DBC *dbc;
465*7c478bd9Sstevel@tonic-gate PAGE *h;
466*7c478bd9Sstevel@tonic-gate u_int32_t indx;
467*7c478bd9Sstevel@tonic-gate DBT *data;
468*7c478bd9Sstevel@tonic-gate {
469*7c478bd9Sstevel@tonic-gate BKEYDATA *bk;
470*7c478bd9Sstevel@tonic-gate DB *dbp;
471*7c478bd9Sstevel@tonic-gate DBT orig, repl;
472*7c478bd9Sstevel@tonic-gate db_indx_t cnt, lo, ln, min, off, prefix, suffix;
473*7c478bd9Sstevel@tonic-gate int32_t nbytes;
474*7c478bd9Sstevel@tonic-gate int ret;
475*7c478bd9Sstevel@tonic-gate u_int8_t *p, *t;
476*7c478bd9Sstevel@tonic-gate
477*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
478*7c478bd9Sstevel@tonic-gate
479*7c478bd9Sstevel@tonic-gate /*
480*7c478bd9Sstevel@tonic-gate * Replace a single item onto a page. The logic figuring out where
481*7c478bd9Sstevel@tonic-gate * to insert and whether it fits is handled in the caller. All we do
482*7c478bd9Sstevel@tonic-gate * here is manage the page shuffling.
483*7c478bd9Sstevel@tonic-gate */
484*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx);
485*7c478bd9Sstevel@tonic-gate
486*7c478bd9Sstevel@tonic-gate /* Log the change. */
487*7c478bd9Sstevel@tonic-gate if (DB_LOGGING(dbc)) {
488*7c478bd9Sstevel@tonic-gate /*
489*7c478bd9Sstevel@tonic-gate * We might as well check to see if the two data items share
490*7c478bd9Sstevel@tonic-gate * a common prefix and suffix -- it can save us a lot of log
491*7c478bd9Sstevel@tonic-gate * message if they're large.
492*7c478bd9Sstevel@tonic-gate */
493*7c478bd9Sstevel@tonic-gate min = data->size < bk->len ? data->size : bk->len;
494*7c478bd9Sstevel@tonic-gate for (prefix = 0,
495*7c478bd9Sstevel@tonic-gate p = bk->data, t = data->data;
496*7c478bd9Sstevel@tonic-gate prefix < min && *p == *t; ++prefix, ++p, ++t)
497*7c478bd9Sstevel@tonic-gate ;
498*7c478bd9Sstevel@tonic-gate
499*7c478bd9Sstevel@tonic-gate min -= prefix;
500*7c478bd9Sstevel@tonic-gate for (suffix = 0,
501*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)bk->data + bk->len - 1,
502*7c478bd9Sstevel@tonic-gate t = (u_int8_t *)data->data + data->size - 1;
503*7c478bd9Sstevel@tonic-gate suffix < min && *p == *t; ++suffix, --p, --t)
504*7c478bd9Sstevel@tonic-gate ;
505*7c478bd9Sstevel@tonic-gate
506*7c478bd9Sstevel@tonic-gate /* We only log the parts of the keys that have changed. */
507*7c478bd9Sstevel@tonic-gate orig.data = (u_int8_t *)bk->data + prefix;
508*7c478bd9Sstevel@tonic-gate orig.size = bk->len - (prefix + suffix);
509*7c478bd9Sstevel@tonic-gate repl.data = (u_int8_t *)data->data + prefix;
510*7c478bd9Sstevel@tonic-gate repl.size = data->size - (prefix + suffix);
511*7c478bd9Sstevel@tonic-gate if ((ret = __bam_repl_log(dbp->dbenv->lg_info, dbc->txn,
512*7c478bd9Sstevel@tonic-gate &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
513*7c478bd9Sstevel@tonic-gate (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
514*7c478bd9Sstevel@tonic-gate &orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
515*7c478bd9Sstevel@tonic-gate return (ret);
516*7c478bd9Sstevel@tonic-gate }
517*7c478bd9Sstevel@tonic-gate
518*7c478bd9Sstevel@tonic-gate /*
519*7c478bd9Sstevel@tonic-gate * Set references to the first in-use byte on the page and the
520*7c478bd9Sstevel@tonic-gate * first byte of the item being replaced.
521*7c478bd9Sstevel@tonic-gate */
522*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)h + HOFFSET(h);
523*7c478bd9Sstevel@tonic-gate t = (u_int8_t *)bk;
524*7c478bd9Sstevel@tonic-gate
525*7c478bd9Sstevel@tonic-gate /*
526*7c478bd9Sstevel@tonic-gate * If the entry is growing in size, shift the beginning of the data
527*7c478bd9Sstevel@tonic-gate * part of the page down. If the entry is shrinking in size, shift
528*7c478bd9Sstevel@tonic-gate * the beginning of the data part of the page up. Use memmove(3),
529*7c478bd9Sstevel@tonic-gate * the regions overlap.
530*7c478bd9Sstevel@tonic-gate */
531*7c478bd9Sstevel@tonic-gate lo = BKEYDATA_SIZE(bk->len);
532*7c478bd9Sstevel@tonic-gate ln = BKEYDATA_SIZE(data->size);
533*7c478bd9Sstevel@tonic-gate if (lo != ln) {
534*7c478bd9Sstevel@tonic-gate nbytes = lo - ln; /* Signed difference. */
535*7c478bd9Sstevel@tonic-gate if (p == t) /* First index is fast. */
536*7c478bd9Sstevel@tonic-gate h->inp[indx] += nbytes;
537*7c478bd9Sstevel@tonic-gate else { /* Else, shift the page. */
538*7c478bd9Sstevel@tonic-gate memmove(p + nbytes, p, t - p);
539*7c478bd9Sstevel@tonic-gate
540*7c478bd9Sstevel@tonic-gate /* Adjust the indices' offsets. */
541*7c478bd9Sstevel@tonic-gate off = h->inp[indx];
542*7c478bd9Sstevel@tonic-gate for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
543*7c478bd9Sstevel@tonic-gate if (h->inp[cnt] <= off)
544*7c478bd9Sstevel@tonic-gate h->inp[cnt] += nbytes;
545*7c478bd9Sstevel@tonic-gate }
546*7c478bd9Sstevel@tonic-gate
547*7c478bd9Sstevel@tonic-gate /* Clean up the page and adjust the item's reference. */
548*7c478bd9Sstevel@tonic-gate HOFFSET(h) += nbytes;
549*7c478bd9Sstevel@tonic-gate t += nbytes;
550*7c478bd9Sstevel@tonic-gate }
551*7c478bd9Sstevel@tonic-gate
552*7c478bd9Sstevel@tonic-gate /* Copy the new item onto the page. */
553*7c478bd9Sstevel@tonic-gate bk = (BKEYDATA *)t;
554*7c478bd9Sstevel@tonic-gate B_TSET(bk->type, B_KEYDATA, 0);
555*7c478bd9Sstevel@tonic-gate bk->len = data->size;
556*7c478bd9Sstevel@tonic-gate memcpy(bk->data, data->data, data->size);
557*7c478bd9Sstevel@tonic-gate
558*7c478bd9Sstevel@tonic-gate return (0);
559*7c478bd9Sstevel@tonic-gate }
560*7c478bd9Sstevel@tonic-gate
561*7c478bd9Sstevel@tonic-gate /*
562*7c478bd9Sstevel@tonic-gate * __bam_ndup --
563*7c478bd9Sstevel@tonic-gate * Check to see if the duplicate set at indx should have its own page.
564*7c478bd9Sstevel@tonic-gate * If it should, create it.
565*7c478bd9Sstevel@tonic-gate */
566*7c478bd9Sstevel@tonic-gate static int
__bam_ndup(dbc,h,indx)567*7c478bd9Sstevel@tonic-gate __bam_ndup(dbc, h, indx)
568*7c478bd9Sstevel@tonic-gate DBC *dbc;
569*7c478bd9Sstevel@tonic-gate PAGE *h;
570*7c478bd9Sstevel@tonic-gate u_int32_t indx;
571*7c478bd9Sstevel@tonic-gate {
572*7c478bd9Sstevel@tonic-gate BKEYDATA *bk;
573*7c478bd9Sstevel@tonic-gate BOVERFLOW bo;
574*7c478bd9Sstevel@tonic-gate DB *dbp;
575*7c478bd9Sstevel@tonic-gate DBT hdr;
576*7c478bd9Sstevel@tonic-gate PAGE *cp;
577*7c478bd9Sstevel@tonic-gate db_indx_t cnt, cpindx, first, sz;
578*7c478bd9Sstevel@tonic-gate int ret;
579*7c478bd9Sstevel@tonic-gate
580*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
581*7c478bd9Sstevel@tonic-gate
582*7c478bd9Sstevel@tonic-gate while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
583*7c478bd9Sstevel@tonic-gate indx -= P_INDX;
584*7c478bd9Sstevel@tonic-gate for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
585*7c478bd9Sstevel@tonic-gate if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
586*7c478bd9Sstevel@tonic-gate break;
587*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx);
588*7c478bd9Sstevel@tonic-gate sz += B_TYPE(bk->type) == B_KEYDATA ?
589*7c478bd9Sstevel@tonic-gate BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
590*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + O_INDX);
591*7c478bd9Sstevel@tonic-gate sz += B_TYPE(bk->type) == B_KEYDATA ?
592*7c478bd9Sstevel@tonic-gate BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
593*7c478bd9Sstevel@tonic-gate }
594*7c478bd9Sstevel@tonic-gate
595*7c478bd9Sstevel@tonic-gate /*
596*7c478bd9Sstevel@tonic-gate * If this set of duplicates is using more than 25% of the page, move
597*7c478bd9Sstevel@tonic-gate * them off. The choice of 25% is a WAG, but it has to be small enough
598*7c478bd9Sstevel@tonic-gate * that we can always split regardless of the presence of duplicates.
599*7c478bd9Sstevel@tonic-gate */
600*7c478bd9Sstevel@tonic-gate if (sz < dbp->pgsize / 4)
601*7c478bd9Sstevel@tonic-gate return (0);
602*7c478bd9Sstevel@tonic-gate
603*7c478bd9Sstevel@tonic-gate /* Get a new page. */
604*7c478bd9Sstevel@tonic-gate if ((ret = __bam_new(dbc, P_DUPLICATE, &cp)) != 0)
605*7c478bd9Sstevel@tonic-gate return (ret);
606*7c478bd9Sstevel@tonic-gate
607*7c478bd9Sstevel@tonic-gate /*
608*7c478bd9Sstevel@tonic-gate * Move this set of duplicates off the page. First points to the first
609*7c478bd9Sstevel@tonic-gate * key of the first duplicate key/data pair, cnt is the number of pairs
610*7c478bd9Sstevel@tonic-gate * we're dealing with.
611*7c478bd9Sstevel@tonic-gate */
612*7c478bd9Sstevel@tonic-gate memset(&hdr, 0, sizeof(hdr));
613*7c478bd9Sstevel@tonic-gate for (indx = first + O_INDX, cpindx = 0;; ++cpindx) {
614*7c478bd9Sstevel@tonic-gate /* Copy the entry to the new page. */
615*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx);
616*7c478bd9Sstevel@tonic-gate hdr.data = bk;
617*7c478bd9Sstevel@tonic-gate hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
618*7c478bd9Sstevel@tonic-gate BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
619*7c478bd9Sstevel@tonic-gate if ((ret =
620*7c478bd9Sstevel@tonic-gate __db_pitem(dbc, cp, cpindx, hdr.size, &hdr, NULL)) != 0)
621*7c478bd9Sstevel@tonic-gate goto err;
622*7c478bd9Sstevel@tonic-gate
623*7c478bd9Sstevel@tonic-gate /*
624*7c478bd9Sstevel@tonic-gate * Move cursors referencing the old entry to the new entry.
625*7c478bd9Sstevel@tonic-gate * Done after the page put because __db_pitem() adjusts
626*7c478bd9Sstevel@tonic-gate * cursors on the new page, and before the delete because
627*7c478bd9Sstevel@tonic-gate * __db_ditem adjusts cursors on the old page.
628*7c478bd9Sstevel@tonic-gate */
629*7c478bd9Sstevel@tonic-gate __bam_ca_dup(dbp,
630*7c478bd9Sstevel@tonic-gate PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx);
631*7c478bd9Sstevel@tonic-gate
632*7c478bd9Sstevel@tonic-gate /* Delete the data item. */
633*7c478bd9Sstevel@tonic-gate if ((ret = __db_ditem(dbc, h, indx, hdr.size)) != 0)
634*7c478bd9Sstevel@tonic-gate goto err;
635*7c478bd9Sstevel@tonic-gate
636*7c478bd9Sstevel@tonic-gate /* Delete all but the first reference to the key. */
637*7c478bd9Sstevel@tonic-gate if (--cnt == 0)
638*7c478bd9Sstevel@tonic-gate break;
639*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjindx(dbc, h, indx, first, 0)) != 0)
640*7c478bd9Sstevel@tonic-gate goto err;
641*7c478bd9Sstevel@tonic-gate }
642*7c478bd9Sstevel@tonic-gate
643*7c478bd9Sstevel@tonic-gate /* Put in a new data item that points to the duplicates page. */
644*7c478bd9Sstevel@tonic-gate UMRW(bo.unused1);
645*7c478bd9Sstevel@tonic-gate B_TSET(bo.type, B_DUPLICATE, 0);
646*7c478bd9Sstevel@tonic-gate UMRW(bo.unused2);
647*7c478bd9Sstevel@tonic-gate bo.pgno = cp->pgno;
648*7c478bd9Sstevel@tonic-gate bo.tlen = 0;
649*7c478bd9Sstevel@tonic-gate
650*7c478bd9Sstevel@tonic-gate OVPUT(h, indx, bo);
651*7c478bd9Sstevel@tonic-gate
652*7c478bd9Sstevel@tonic-gate return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY));
653*7c478bd9Sstevel@tonic-gate
654*7c478bd9Sstevel@tonic-gate err: (void)__bam_free(dbc, cp);
655*7c478bd9Sstevel@tonic-gate return (ret);
656*7c478bd9Sstevel@tonic-gate }
657*7c478bd9Sstevel@tonic-gate
658*7c478bd9Sstevel@tonic-gate /*
659*7c478bd9Sstevel@tonic-gate * __bam_fixed --
660*7c478bd9Sstevel@tonic-gate * Build the real record for a fixed length put.
661*7c478bd9Sstevel@tonic-gate */
662*7c478bd9Sstevel@tonic-gate static int
__bam_fixed(dbc,dbt)663*7c478bd9Sstevel@tonic-gate __bam_fixed(dbc, dbt)
664*7c478bd9Sstevel@tonic-gate DBC *dbc;
665*7c478bd9Sstevel@tonic-gate DBT *dbt;
666*7c478bd9Sstevel@tonic-gate {
667*7c478bd9Sstevel@tonic-gate DB *dbp;
668*7c478bd9Sstevel@tonic-gate RECNO *rp;
669*7c478bd9Sstevel@tonic-gate int ret;
670*7c478bd9Sstevel@tonic-gate
671*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
672*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno;
673*7c478bd9Sstevel@tonic-gate
674*7c478bd9Sstevel@tonic-gate /*
675*7c478bd9Sstevel@tonic-gate * If database contains fixed-length records, and the record is long,
676*7c478bd9Sstevel@tonic-gate * return EINVAL.
677*7c478bd9Sstevel@tonic-gate */
678*7c478bd9Sstevel@tonic-gate if (dbt->size > rp->re_len)
679*7c478bd9Sstevel@tonic-gate return (EINVAL);
680*7c478bd9Sstevel@tonic-gate
681*7c478bd9Sstevel@tonic-gate /*
682*7c478bd9Sstevel@tonic-gate * The caller checked to see if it was just right, so we know it's
683*7c478bd9Sstevel@tonic-gate * short. Pad it out. We use the record data return memory, it's
684*7c478bd9Sstevel@tonic-gate * only a short-term use.
685*7c478bd9Sstevel@tonic-gate */
686*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) {
687*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
688*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0;
689*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL;
690*7c478bd9Sstevel@tonic-gate return (ret);
691*7c478bd9Sstevel@tonic-gate }
692*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len;
693*7c478bd9Sstevel@tonic-gate }
694*7c478bd9Sstevel@tonic-gate memcpy(dbc->rdata.data, dbt->data, dbt->size);
695*7c478bd9Sstevel@tonic-gate memset((u_int8_t *)dbc->rdata.data + dbt->size,
696*7c478bd9Sstevel@tonic-gate rp->re_pad, rp->re_len - dbt->size);
697*7c478bd9Sstevel@tonic-gate
698*7c478bd9Sstevel@tonic-gate /*
699*7c478bd9Sstevel@tonic-gate * Clean up our flags and other information just in case, and
700*7c478bd9Sstevel@tonic-gate * change the caller's DBT to reference our created record.
701*7c478bd9Sstevel@tonic-gate */
702*7c478bd9Sstevel@tonic-gate dbc->rdata.size = rp->re_len;
703*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0;
704*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0;
705*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0;
706*7c478bd9Sstevel@tonic-gate *dbt = dbc->rdata;
707*7c478bd9Sstevel@tonic-gate
708*7c478bd9Sstevel@tonic-gate return (0);
709*7c478bd9Sstevel@tonic-gate }
710*7c478bd9Sstevel@tonic-gate
711*7c478bd9Sstevel@tonic-gate /*
712*7c478bd9Sstevel@tonic-gate * __bam_partial --
713*7c478bd9Sstevel@tonic-gate * Build the real record for a partial put.
714*7c478bd9Sstevel@tonic-gate */
715*7c478bd9Sstevel@tonic-gate static int
__bam_partial(dbc,dbt,h,indx,nbytes,flags)716*7c478bd9Sstevel@tonic-gate __bam_partial(dbc, dbt, h, indx, nbytes, flags)
717*7c478bd9Sstevel@tonic-gate DBC *dbc;
718*7c478bd9Sstevel@tonic-gate DBT *dbt;
719*7c478bd9Sstevel@tonic-gate PAGE *h;
720*7c478bd9Sstevel@tonic-gate u_int32_t indx, nbytes, flags;
721*7c478bd9Sstevel@tonic-gate {
722*7c478bd9Sstevel@tonic-gate BKEYDATA *bk, tbk;
723*7c478bd9Sstevel@tonic-gate BOVERFLOW *bo;
724*7c478bd9Sstevel@tonic-gate DB *dbp;
725*7c478bd9Sstevel@tonic-gate DBT copy;
726*7c478bd9Sstevel@tonic-gate u_int32_t len, tlen;
727*7c478bd9Sstevel@tonic-gate u_int8_t *p;
728*7c478bd9Sstevel@tonic-gate int ret;
729*7c478bd9Sstevel@tonic-gate
730*7c478bd9Sstevel@tonic-gate COMPQUIET(bo, NULL);
731*7c478bd9Sstevel@tonic-gate
732*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
733*7c478bd9Sstevel@tonic-gate
734*7c478bd9Sstevel@tonic-gate /* We use the record data return memory, it's only a short-term use. */
735*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < nbytes) {
736*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, nbytes)) != 0) {
737*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0;
738*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL;
739*7c478bd9Sstevel@tonic-gate return (ret);
740*7c478bd9Sstevel@tonic-gate }
741*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = nbytes;
742*7c478bd9Sstevel@tonic-gate }
743*7c478bd9Sstevel@tonic-gate
744*7c478bd9Sstevel@tonic-gate /*
745*7c478bd9Sstevel@tonic-gate * We use nul bytes for any part of the record that isn't specified;
746*7c478bd9Sstevel@tonic-gate * get it over with.
747*7c478bd9Sstevel@tonic-gate */
748*7c478bd9Sstevel@tonic-gate memset(dbc->rdata.data, 0, nbytes);
749*7c478bd9Sstevel@tonic-gate
750*7c478bd9Sstevel@tonic-gate /*
751*7c478bd9Sstevel@tonic-gate * In the next clauses, we need to do three things: a) set p to point
752*7c478bd9Sstevel@tonic-gate * to the place at which to copy the user's data, b) set tlen to the
753*7c478bd9Sstevel@tonic-gate * total length of the record, not including the bytes contributed by
754*7c478bd9Sstevel@tonic-gate * the user, and c) copy any valid data from an existing record.
755*7c478bd9Sstevel@tonic-gate */
756*7c478bd9Sstevel@tonic-gate if (LF_ISSET(BI_NEWKEY)) {
757*7c478bd9Sstevel@tonic-gate tlen = dbt->doff;
758*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff;
759*7c478bd9Sstevel@tonic-gate goto ucopy;
760*7c478bd9Sstevel@tonic-gate }
761*7c478bd9Sstevel@tonic-gate
762*7c478bd9Sstevel@tonic-gate /* Find the current record. */
763*7c478bd9Sstevel@tonic-gate if (indx < NUM_ENT(h)) {
764*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
765*7c478bd9Sstevel@tonic-gate bo = (BOVERFLOW *)bk;
766*7c478bd9Sstevel@tonic-gate } else {
767*7c478bd9Sstevel@tonic-gate bk = &tbk;
768*7c478bd9Sstevel@tonic-gate B_TSET(bk->type, B_KEYDATA, 0);
769*7c478bd9Sstevel@tonic-gate bk->len = 0;
770*7c478bd9Sstevel@tonic-gate }
771*7c478bd9Sstevel@tonic-gate if (B_TYPE(bk->type) == B_OVERFLOW) {
772*7c478bd9Sstevel@tonic-gate /*
773*7c478bd9Sstevel@tonic-gate * In the case of an overflow record, we shift things around
774*7c478bd9Sstevel@tonic-gate * in the current record rather than allocate a separate copy.
775*7c478bd9Sstevel@tonic-gate */
776*7c478bd9Sstevel@tonic-gate memset(©, 0, sizeof(copy));
777*7c478bd9Sstevel@tonic-gate if ((ret = __db_goff(dbp, ©, bo->tlen,
778*7c478bd9Sstevel@tonic-gate bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
779*7c478bd9Sstevel@tonic-gate return (ret);
780*7c478bd9Sstevel@tonic-gate
781*7c478bd9Sstevel@tonic-gate /* Skip any leading data from the original record. */
782*7c478bd9Sstevel@tonic-gate tlen = dbt->doff;
783*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff;
784*7c478bd9Sstevel@tonic-gate
785*7c478bd9Sstevel@tonic-gate /*
786*7c478bd9Sstevel@tonic-gate * Copy in any trailing data from the original record.
787*7c478bd9Sstevel@tonic-gate *
788*7c478bd9Sstevel@tonic-gate * If the original record was larger than the original offset
789*7c478bd9Sstevel@tonic-gate * plus the bytes being deleted, there is trailing data in the
790*7c478bd9Sstevel@tonic-gate * original record we need to preserve. If we aren't deleting
791*7c478bd9Sstevel@tonic-gate * the same number of bytes as we're inserting, copy it up or
792*7c478bd9Sstevel@tonic-gate * down, into place.
793*7c478bd9Sstevel@tonic-gate *
794*7c478bd9Sstevel@tonic-gate * Use memmove(), the regions may overlap.
795*7c478bd9Sstevel@tonic-gate */
796*7c478bd9Sstevel@tonic-gate if (bo->tlen > dbt->doff + dbt->dlen) {
797*7c478bd9Sstevel@tonic-gate len = bo->tlen - (dbt->doff + dbt->dlen);
798*7c478bd9Sstevel@tonic-gate if (dbt->dlen != dbt->size)
799*7c478bd9Sstevel@tonic-gate memmove(p + dbt->size, p + dbt->dlen, len);
800*7c478bd9Sstevel@tonic-gate tlen += len;
801*7c478bd9Sstevel@tonic-gate }
802*7c478bd9Sstevel@tonic-gate } else {
803*7c478bd9Sstevel@tonic-gate /* Copy in any leading data from the original record. */
804*7c478bd9Sstevel@tonic-gate memcpy(dbc->rdata.data,
805*7c478bd9Sstevel@tonic-gate bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
806*7c478bd9Sstevel@tonic-gate tlen = dbt->doff;
807*7c478bd9Sstevel@tonic-gate p = (u_int8_t *)dbc->rdata.data + dbt->doff;
808*7c478bd9Sstevel@tonic-gate
809*7c478bd9Sstevel@tonic-gate /* Copy in any trailing data from the original record. */
810*7c478bd9Sstevel@tonic-gate len = dbt->doff + dbt->dlen;
811*7c478bd9Sstevel@tonic-gate if (bk->len > len) {
812*7c478bd9Sstevel@tonic-gate memcpy(p + dbt->size, bk->data + len, bk->len - len);
813*7c478bd9Sstevel@tonic-gate tlen += bk->len - len;
814*7c478bd9Sstevel@tonic-gate }
815*7c478bd9Sstevel@tonic-gate }
816*7c478bd9Sstevel@tonic-gate
817*7c478bd9Sstevel@tonic-gate ucopy: /*
818*7c478bd9Sstevel@tonic-gate * Copy in the application provided data -- p and tlen must have been
819*7c478bd9Sstevel@tonic-gate * initialized above.
820*7c478bd9Sstevel@tonic-gate */
821*7c478bd9Sstevel@tonic-gate memcpy(p, dbt->data, dbt->size);
822*7c478bd9Sstevel@tonic-gate tlen += dbt->size;
823*7c478bd9Sstevel@tonic-gate
824*7c478bd9Sstevel@tonic-gate /* Set the DBT to reference our new record. */
825*7c478bd9Sstevel@tonic-gate dbc->rdata.size = tlen;
826*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0;
827*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0;
828*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0;
829*7c478bd9Sstevel@tonic-gate *dbt = dbc->rdata;
830*7c478bd9Sstevel@tonic-gate return (0);
831*7c478bd9Sstevel@tonic-gate }
832