1*54925bf6Swillf /*-
2*54925bf6Swillf * Copyright (c) 1990, 1993, 1994
3*54925bf6Swillf * The Regents of the University of California. All rights reserved.
4*54925bf6Swillf *
5*54925bf6Swillf * This code is derived from software contributed to Berkeley by
6*54925bf6Swillf * Mike Olson.
7*54925bf6Swillf *
8*54925bf6Swillf * Redistribution and use in source and binary forms, with or without
9*54925bf6Swillf * modification, are permitted provided that the following conditions
10*54925bf6Swillf * are met:
11*54925bf6Swillf * 1. Redistributions of source code must retain the above copyright
12*54925bf6Swillf * notice, this list of conditions and the following disclaimer.
13*54925bf6Swillf * 2. Redistributions in binary form must reproduce the above copyright
14*54925bf6Swillf * notice, this list of conditions and the following disclaimer in the
15*54925bf6Swillf * documentation and/or other materials provided with the distribution.
16*54925bf6Swillf * 3. All advertising materials mentioning features or use of this software
17*54925bf6Swillf * must display the following acknowledgement:
18*54925bf6Swillf * This product includes software developed by the University of
19*54925bf6Swillf * California, Berkeley and its contributors.
20*54925bf6Swillf * 4. Neither the name of the University nor the names of its contributors
21*54925bf6Swillf * may be used to endorse or promote products derived from this software
22*54925bf6Swillf * without specific prior written permission.
23*54925bf6Swillf *
24*54925bf6Swillf * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25*54925bf6Swillf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26*54925bf6Swillf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27*54925bf6Swillf * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28*54925bf6Swillf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29*54925bf6Swillf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30*54925bf6Swillf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31*54925bf6Swillf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32*54925bf6Swillf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33*54925bf6Swillf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34*54925bf6Swillf * SUCH DAMAGE.
35*54925bf6Swillf */
36*54925bf6Swillf
37*54925bf6Swillf #if defined(LIBC_SCCS) && !defined(lint)
38*54925bf6Swillf static char sccsid[] = "@(#)bt_put.c 8.8 (Berkeley) 7/26/94";
39*54925bf6Swillf #endif /* LIBC_SCCS and not lint */
40*54925bf6Swillf
41*54925bf6Swillf #include <sys/types.h>
42*54925bf6Swillf
43*54925bf6Swillf #include <errno.h>
44*54925bf6Swillf #include <stdio.h>
45*54925bf6Swillf #include <stdlib.h>
46*54925bf6Swillf #include <string.h>
47*54925bf6Swillf
48*54925bf6Swillf #include "db-int.h"
49*54925bf6Swillf #include "btree.h"
50*54925bf6Swillf
51*54925bf6Swillf static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *));
52*54925bf6Swillf
53*54925bf6Swillf /*
54*54925bf6Swillf * __BT_PUT -- Add a btree item to the tree.
55*54925bf6Swillf *
56*54925bf6Swillf * Parameters:
57*54925bf6Swillf * dbp: pointer to access method
58*54925bf6Swillf * key: key
59*54925bf6Swillf * data: data
60*54925bf6Swillf * flag: R_NOOVERWRITE
61*54925bf6Swillf *
62*54925bf6Swillf * Returns:
63*54925bf6Swillf * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
64*54925bf6Swillf * tree and R_NOOVERWRITE specified.
65*54925bf6Swillf */
66*54925bf6Swillf int
__bt_put(dbp,key,data,flags)67*54925bf6Swillf __bt_put(dbp, key, data, flags)
68*54925bf6Swillf const DB *dbp;
69*54925bf6Swillf DBT *key;
70*54925bf6Swillf const DBT *data;
71*54925bf6Swillf u_int flags;
72*54925bf6Swillf {
73*54925bf6Swillf BTREE *t;
74*54925bf6Swillf DBT tkey, tdata;
75*54925bf6Swillf EPG *e = 0;
76*54925bf6Swillf PAGE *h;
77*54925bf6Swillf indx_t idx, nxtindex;
78*54925bf6Swillf db_pgno_t pg;
79*54925bf6Swillf u_int32_t nbytes;
80*54925bf6Swillf int dflags, exact, status;
81*54925bf6Swillf char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
82*54925bf6Swillf
83*54925bf6Swillf t = dbp->internal;
84*54925bf6Swillf
85*54925bf6Swillf /* Toss any page pinned across calls. */
86*54925bf6Swillf if (t->bt_pinned != NULL) {
87*54925bf6Swillf mpool_put(t->bt_mp, t->bt_pinned, 0);
88*54925bf6Swillf t->bt_pinned = NULL;
89*54925bf6Swillf }
90*54925bf6Swillf
91*54925bf6Swillf /* Check for change to a read-only tree. */
92*54925bf6Swillf if (F_ISSET(t, B_RDONLY)) {
93*54925bf6Swillf errno = EPERM;
94*54925bf6Swillf return (RET_ERROR);
95*54925bf6Swillf }
96*54925bf6Swillf
97*54925bf6Swillf switch (flags) {
98*54925bf6Swillf case 0:
99*54925bf6Swillf case R_NOOVERWRITE:
100*54925bf6Swillf break;
101*54925bf6Swillf case R_CURSOR:
102*54925bf6Swillf /*
103*54925bf6Swillf * If flags is R_CURSOR, put the cursor. Must already
104*54925bf6Swillf * have started a scan and not have already deleted it.
105*54925bf6Swillf */
106*54925bf6Swillf if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
107*54925bf6Swillf !F_ISSET(&t->bt_cursor,
108*54925bf6Swillf CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
109*54925bf6Swillf break;
110*54925bf6Swillf /* FALLTHROUGH */
111*54925bf6Swillf default:
112*54925bf6Swillf errno = EINVAL;
113*54925bf6Swillf return (RET_ERROR);
114*54925bf6Swillf }
115*54925bf6Swillf
116*54925bf6Swillf /*
117*54925bf6Swillf * If the key/data pair won't fit on a page, store it on overflow
118*54925bf6Swillf * pages. Only put the key on the overflow page if the pair are
119*54925bf6Swillf * still too big after moving the data to an overflow page.
120*54925bf6Swillf *
121*54925bf6Swillf * XXX
122*54925bf6Swillf * If the insert fails later on, the overflow pages aren't recovered.
123*54925bf6Swillf */
124*54925bf6Swillf dflags = 0;
125*54925bf6Swillf if (key->size + data->size > t->bt_ovflsize) {
126*54925bf6Swillf if (key->size > t->bt_ovflsize) {
127*54925bf6Swillf u_int32_t yuck_this_is_gross_code;
128*54925bf6Swillf storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
129*54925bf6Swillf return (RET_ERROR);
130*54925bf6Swillf tkey.data = kb;
131*54925bf6Swillf tkey.size = NOVFLSIZE;
132*54925bf6Swillf memmove(kb, &pg, sizeof(db_pgno_t));
133*54925bf6Swillf yuck_this_is_gross_code = key->size;
134*54925bf6Swillf if (yuck_this_is_gross_code != key->size)
135*54925bf6Swillf abort ();
136*54925bf6Swillf memmove(kb + sizeof(db_pgno_t),
137*54925bf6Swillf &yuck_this_is_gross_code, sizeof(u_int32_t));
138*54925bf6Swillf dflags |= P_BIGKEY;
139*54925bf6Swillf key = &tkey;
140*54925bf6Swillf }
141*54925bf6Swillf if (key->size + data->size > t->bt_ovflsize) {
142*54925bf6Swillf u_int32_t yuck_this_is_gross_code = data->size;
143*54925bf6Swillf if (__ovfl_put(t, data, &pg) == RET_ERROR)
144*54925bf6Swillf return (RET_ERROR);
145*54925bf6Swillf tdata.data = db;
146*54925bf6Swillf tdata.size = NOVFLSIZE;
147*54925bf6Swillf memmove(db, &pg, sizeof(db_pgno_t));
148*54925bf6Swillf if (yuck_this_is_gross_code != data->size)
149*54925bf6Swillf abort ();
150*54925bf6Swillf memmove(db + sizeof(db_pgno_t),
151*54925bf6Swillf &yuck_this_is_gross_code, sizeof(u_int32_t));
152*54925bf6Swillf dflags |= P_BIGDATA;
153*54925bf6Swillf data = &tdata;
154*54925bf6Swillf }
155*54925bf6Swillf if (key->size + data->size > t->bt_ovflsize)
156*54925bf6Swillf goto storekey;
157*54925bf6Swillf }
158*54925bf6Swillf
159*54925bf6Swillf /* Replace the cursor. */
160*54925bf6Swillf if (flags == R_CURSOR) {
161*54925bf6Swillf if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
162*54925bf6Swillf return (RET_ERROR);
163*54925bf6Swillf idx = t->bt_cursor.pg.index;
164*54925bf6Swillf goto delete;
165*54925bf6Swillf }
166*54925bf6Swillf
167*54925bf6Swillf /*
168*54925bf6Swillf * Find the key to delete, or, the location at which to insert.
169*54925bf6Swillf * Bt_fast and __bt_search both pin the returned page.
170*54925bf6Swillf */
171*54925bf6Swillf if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
172*54925bf6Swillf if ((e = __bt_search(t, key, &exact)) == NULL)
173*54925bf6Swillf return (RET_ERROR);
174*54925bf6Swillf h = e->page;
175*54925bf6Swillf idx = e->index;
176*54925bf6Swillf
177*54925bf6Swillf /*
178*54925bf6Swillf * Add the key/data pair to the tree. If an identical key is already
179*54925bf6Swillf * in the tree, and R_NOOVERWRITE is set, an error is returned. If
180*54925bf6Swillf * R_NOOVERWRITE is not set, the key is either added (if duplicates are
181*54925bf6Swillf * permitted) or an error is returned.
182*54925bf6Swillf */
183*54925bf6Swillf switch (flags) {
184*54925bf6Swillf case R_NOOVERWRITE:
185*54925bf6Swillf if (!exact)
186*54925bf6Swillf break;
187*54925bf6Swillf mpool_put(t->bt_mp, h, 0);
188*54925bf6Swillf return (RET_SPECIAL);
189*54925bf6Swillf default:
190*54925bf6Swillf if (!exact || !F_ISSET(t, B_NODUPS))
191*54925bf6Swillf break;
192*54925bf6Swillf /*
193*54925bf6Swillf * !!!
194*54925bf6Swillf * Note, the delete may empty the page, so we need to put a
195*54925bf6Swillf * new entry into the page immediately.
196*54925bf6Swillf */
197*54925bf6Swillf delete: if (__bt_dleaf(t, key, h, idx) == RET_ERROR) {
198*54925bf6Swillf mpool_put(t->bt_mp, h, 0);
199*54925bf6Swillf return (RET_ERROR);
200*54925bf6Swillf }
201*54925bf6Swillf break;
202*54925bf6Swillf }
203*54925bf6Swillf
204*54925bf6Swillf /*
205*54925bf6Swillf * If not enough room, or the user has put a ceiling on the number of
206*54925bf6Swillf * keys permitted in the page, split the page. The split code will
207*54925bf6Swillf * insert the key and data and unpin the current page. If inserting
208*54925bf6Swillf * into the offset array, shift the pointers up.
209*54925bf6Swillf */
210*54925bf6Swillf nbytes = NBLEAFDBT(key->size, data->size);
211*54925bf6Swillf if (h->upper - h->lower < nbytes + sizeof(indx_t)) {
212*54925bf6Swillf if ((status = __bt_split(t, h, key,
213*54925bf6Swillf data, dflags, nbytes, idx)) != RET_SUCCESS)
214*54925bf6Swillf return (status);
215*54925bf6Swillf goto success;
216*54925bf6Swillf }
217*54925bf6Swillf
218*54925bf6Swillf if (idx < (nxtindex = NEXTINDEX(h)))
219*54925bf6Swillf memmove(h->linp + idx + 1, h->linp + idx,
220*54925bf6Swillf (nxtindex - idx) * sizeof(indx_t));
221*54925bf6Swillf h->lower += sizeof(indx_t);
222*54925bf6Swillf
223*54925bf6Swillf h->linp[idx] = h->upper -= nbytes;
224*54925bf6Swillf dest = (char *)h + h->upper;
225*54925bf6Swillf WR_BLEAF(dest, key, data, dflags);
226*54925bf6Swillf
227*54925bf6Swillf /* If the cursor is on this page, adjust it as necessary. */
228*54925bf6Swillf if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
229*54925bf6Swillf !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
230*54925bf6Swillf t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
231*54925bf6Swillf ++t->bt_cursor.pg.index;
232*54925bf6Swillf
233*54925bf6Swillf if (t->bt_order == NOT) {
234*54925bf6Swillf if (h->nextpg == P_INVALID) {
235*54925bf6Swillf if (idx == NEXTINDEX(h) - 1) {
236*54925bf6Swillf t->bt_order = FORWARD;
237*54925bf6Swillf t->bt_last.index = idx;
238*54925bf6Swillf t->bt_last.pgno = h->pgno;
239*54925bf6Swillf }
240*54925bf6Swillf } else if (h->prevpg == P_INVALID) {
241*54925bf6Swillf if (idx == 0) {
242*54925bf6Swillf t->bt_order = BACK;
243*54925bf6Swillf t->bt_last.index = 0;
244*54925bf6Swillf t->bt_last.pgno = h->pgno;
245*54925bf6Swillf }
246*54925bf6Swillf }
247*54925bf6Swillf }
248*54925bf6Swillf
249*54925bf6Swillf mpool_put(t->bt_mp, h, MPOOL_DIRTY);
250*54925bf6Swillf
251*54925bf6Swillf success:
252*54925bf6Swillf if (flags == R_SETCURSOR)
253*54925bf6Swillf __bt_setcur(t, e->page->pgno, e->index);
254*54925bf6Swillf
255*54925bf6Swillf F_SET(t, B_MODIFIED);
256*54925bf6Swillf return (RET_SUCCESS);
257*54925bf6Swillf }
258*54925bf6Swillf
259*54925bf6Swillf #ifdef STATISTICS
260*54925bf6Swillf u_long bt_cache_hit, bt_cache_miss;
261*54925bf6Swillf #endif
262*54925bf6Swillf
263*54925bf6Swillf /*
264*54925bf6Swillf * BT_FAST -- Do a quick check for sorted data.
265*54925bf6Swillf *
266*54925bf6Swillf * Parameters:
267*54925bf6Swillf * t: tree
268*54925bf6Swillf * key: key to insert
269*54925bf6Swillf *
270*54925bf6Swillf * Returns:
271*54925bf6Swillf * EPG for new record or NULL if not found.
272*54925bf6Swillf */
273*54925bf6Swillf static EPG *
bt_fast(t,key,data,exactp)274*54925bf6Swillf bt_fast(t, key, data, exactp)
275*54925bf6Swillf BTREE *t;
276*54925bf6Swillf const DBT *key, *data;
277*54925bf6Swillf int *exactp;
278*54925bf6Swillf {
279*54925bf6Swillf PAGE *h;
280*54925bf6Swillf u_int32_t nbytes;
281*54925bf6Swillf int cmp;
282*54925bf6Swillf
283*54925bf6Swillf if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
284*54925bf6Swillf t->bt_order = NOT;
285*54925bf6Swillf return (NULL);
286*54925bf6Swillf }
287*54925bf6Swillf t->bt_cur.page = h;
288*54925bf6Swillf t->bt_cur.index = t->bt_last.index;
289*54925bf6Swillf
290*54925bf6Swillf /*
291*54925bf6Swillf * If won't fit in this page or have too many keys in this page,
292*54925bf6Swillf * have to search to get split stack.
293*54925bf6Swillf */
294*54925bf6Swillf nbytes = NBLEAFDBT(key->size, data->size);
295*54925bf6Swillf if (h->upper - h->lower < nbytes + sizeof(indx_t))
296*54925bf6Swillf goto miss;
297*54925bf6Swillf
298*54925bf6Swillf if (t->bt_order == FORWARD) {
299*54925bf6Swillf if (t->bt_cur.page->nextpg != P_INVALID)
300*54925bf6Swillf goto miss;
301*54925bf6Swillf if (t->bt_cur.index != NEXTINDEX(h) - 1)
302*54925bf6Swillf goto miss;
303*54925bf6Swillf if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
304*54925bf6Swillf goto miss;
305*54925bf6Swillf t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
306*54925bf6Swillf } else {
307*54925bf6Swillf if (t->bt_cur.page->prevpg != P_INVALID)
308*54925bf6Swillf goto miss;
309*54925bf6Swillf if (t->bt_cur.index != 0)
310*54925bf6Swillf goto miss;
311*54925bf6Swillf if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
312*54925bf6Swillf goto miss;
313*54925bf6Swillf t->bt_last.index = 0;
314*54925bf6Swillf }
315*54925bf6Swillf *exactp = cmp == 0;
316*54925bf6Swillf #ifdef STATISTICS
317*54925bf6Swillf ++bt_cache_hit;
318*54925bf6Swillf #endif
319*54925bf6Swillf return (&t->bt_cur);
320*54925bf6Swillf
321*54925bf6Swillf miss:
322*54925bf6Swillf #ifdef STATISTICS
323*54925bf6Swillf ++bt_cache_miss;
324*54925bf6Swillf #endif
325*54925bf6Swillf t->bt_order = NOT;
326*54925bf6Swillf mpool_put(t->bt_mp, h, 0);
327*54925bf6Swillf return (NULL);
328*54925bf6Swillf }
329