1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1997, 1998
5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*7c478bd9Sstevel@tonic-gate */
7*7c478bd9Sstevel@tonic-gate
8*7c478bd9Sstevel@tonic-gate #include "config.h"
9*7c478bd9Sstevel@tonic-gate
10*7c478bd9Sstevel@tonic-gate #ifndef lint
11*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_recno.c 10.53 (Sleepycat) 12/11/98";
12*7c478bd9Sstevel@tonic-gate #endif /* not lint */
13*7c478bd9Sstevel@tonic-gate
14*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
15*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
16*7c478bd9Sstevel@tonic-gate
17*7c478bd9Sstevel@tonic-gate #include <errno.h>
18*7c478bd9Sstevel@tonic-gate #include <limits.h>
19*7c478bd9Sstevel@tonic-gate #include <string.h>
20*7c478bd9Sstevel@tonic-gate #endif
21*7c478bd9Sstevel@tonic-gate
22*7c478bd9Sstevel@tonic-gate #include "db_int.h"
23*7c478bd9Sstevel@tonic-gate #include "db_page.h"
24*7c478bd9Sstevel@tonic-gate #include "btree.h"
25*7c478bd9Sstevel@tonic-gate #include "db_ext.h"
26*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
27*7c478bd9Sstevel@tonic-gate #include "db_shash.h"
28*7c478bd9Sstevel@tonic-gate #include "lock.h"
29*7c478bd9Sstevel@tonic-gate #include "lock_ext.h"
30*7c478bd9Sstevel@tonic-gate
31*7c478bd9Sstevel@tonic-gate static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
32*7c478bd9Sstevel@tonic-gate static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
33*7c478bd9Sstevel@tonic-gate static int __ram_fmap __P((DBC *, db_recno_t));
34*7c478bd9Sstevel@tonic-gate static int __ram_i_delete __P((DBC *));
35*7c478bd9Sstevel@tonic-gate static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
36*7c478bd9Sstevel@tonic-gate static int __ram_source __P((DB *, RECNO *, const char *));
37*7c478bd9Sstevel@tonic-gate static int __ram_sync __P((DB *, u_int32_t));
38*7c478bd9Sstevel@tonic-gate static int __ram_update __P((DBC *, db_recno_t, int));
39*7c478bd9Sstevel@tonic-gate static int __ram_vmap __P((DBC *, db_recno_t));
40*7c478bd9Sstevel@tonic-gate static int __ram_writeback __P((DBC *));
41*7c478bd9Sstevel@tonic-gate
42*7c478bd9Sstevel@tonic-gate /*
43*7c478bd9Sstevel@tonic-gate * In recno, there are two meanings to the on-page "deleted" flag. If we're
44*7c478bd9Sstevel@tonic-gate * re-numbering records, it means the record was implicitly created. We skip
45*7c478bd9Sstevel@tonic-gate * over implicitly created records if doing a cursor "next" or "prev", and
46*7c478bd9Sstevel@tonic-gate * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering
47*7c478bd9Sstevel@tonic-gate * records, it means that the record was implicitly created, or was deleted.
48*7c478bd9Sstevel@tonic-gate * We skip over implicitly created or deleted records if doing a cursor "next"
49*7c478bd9Sstevel@tonic-gate * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
50*7c478bd9Sstevel@tonic-gate *
51*7c478bd9Sstevel@tonic-gate * If we're re-numbering records, then we have to detect in the cursor that
52*7c478bd9Sstevel@tonic-gate * a record was deleted, and adjust the cursor as necessary on the next get.
53*7c478bd9Sstevel@tonic-gate * If we're not re-numbering records, then we can detect that a record has
54*7c478bd9Sstevel@tonic-gate * been deleted by looking at the actual on-page record, so we completely
55*7c478bd9Sstevel@tonic-gate * ignore the cursor's delete flag. This is different from the B+tree code.
56*7c478bd9Sstevel@tonic-gate * It also maintains whether the cursor references a deleted record in the
57*7c478bd9Sstevel@tonic-gate * cursor, and it doesn't always check the on-page value.
58*7c478bd9Sstevel@tonic-gate */
59*7c478bd9Sstevel@tonic-gate #define CD_SET(dbp, cp) { \
60*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) \
61*7c478bd9Sstevel@tonic-gate F_SET(cp, C_DELETED); \
62*7c478bd9Sstevel@tonic-gate }
63*7c478bd9Sstevel@tonic-gate #define CD_CLR(dbp, cp) { \
64*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) \
65*7c478bd9Sstevel@tonic-gate F_CLR(cp, C_DELETED); \
66*7c478bd9Sstevel@tonic-gate }
67*7c478bd9Sstevel@tonic-gate #define CD_ISSET(dbp, cp) \
68*7c478bd9Sstevel@tonic-gate (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED))
69*7c478bd9Sstevel@tonic-gate
70*7c478bd9Sstevel@tonic-gate /*
71*7c478bd9Sstevel@tonic-gate * __ram_open --
72*7c478bd9Sstevel@tonic-gate * Recno open function.
73*7c478bd9Sstevel@tonic-gate *
74*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_open __P((DB *, DB_INFO *));
75*7c478bd9Sstevel@tonic-gate */
76*7c478bd9Sstevel@tonic-gate int
__ram_open(dbp,dbinfo)77*7c478bd9Sstevel@tonic-gate __ram_open(dbp, dbinfo)
78*7c478bd9Sstevel@tonic-gate DB *dbp;
79*7c478bd9Sstevel@tonic-gate DB_INFO *dbinfo;
80*7c478bd9Sstevel@tonic-gate {
81*7c478bd9Sstevel@tonic-gate BTREE *t;
82*7c478bd9Sstevel@tonic-gate DBC *dbc;
83*7c478bd9Sstevel@tonic-gate RECNO *rp;
84*7c478bd9Sstevel@tonic-gate int ret, t_ret;
85*7c478bd9Sstevel@tonic-gate
86*7c478bd9Sstevel@tonic-gate /* Allocate and initialize the private btree structure. */
87*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0)
88*7c478bd9Sstevel@tonic-gate return (ret);
89*7c478bd9Sstevel@tonic-gate dbp->internal = t;
90*7c478bd9Sstevel@tonic-gate __bam_setovflsize(dbp);
91*7c478bd9Sstevel@tonic-gate
92*7c478bd9Sstevel@tonic-gate /* Allocate and initialize the private recno structure. */
93*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(*rp), &rp)) != 0)
94*7c478bd9Sstevel@tonic-gate return (ret);
95*7c478bd9Sstevel@tonic-gate /* Link in the private recno structure. */
96*7c478bd9Sstevel@tonic-gate t->recno = rp;
97*7c478bd9Sstevel@tonic-gate
98*7c478bd9Sstevel@tonic-gate /*
99*7c478bd9Sstevel@tonic-gate * Intention is to make sure all of the user's selections are okay
100*7c478bd9Sstevel@tonic-gate * here and then use them without checking.
101*7c478bd9Sstevel@tonic-gate */
102*7c478bd9Sstevel@tonic-gate if (dbinfo == NULL) {
103*7c478bd9Sstevel@tonic-gate rp->re_delim = '\n';
104*7c478bd9Sstevel@tonic-gate rp->re_pad = ' ';
105*7c478bd9Sstevel@tonic-gate rp->re_fd = -1;
106*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF);
107*7c478bd9Sstevel@tonic-gate } else {
108*7c478bd9Sstevel@tonic-gate /*
109*7c478bd9Sstevel@tonic-gate * If the user specified a source tree, open it and map it in.
110*7c478bd9Sstevel@tonic-gate *
111*7c478bd9Sstevel@tonic-gate * !!!
112*7c478bd9Sstevel@tonic-gate * We don't complain if the user specified transactions or
113*7c478bd9Sstevel@tonic-gate * threads. It's possible to make it work, but you'd better
114*7c478bd9Sstevel@tonic-gate * know what you're doing!
115*7c478bd9Sstevel@tonic-gate */
116*7c478bd9Sstevel@tonic-gate if (dbinfo->re_source == NULL) {
117*7c478bd9Sstevel@tonic-gate rp->re_fd = -1;
118*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF);
119*7c478bd9Sstevel@tonic-gate } else {
120*7c478bd9Sstevel@tonic-gate if ((ret =
121*7c478bd9Sstevel@tonic-gate __ram_source(dbp, rp, dbinfo->re_source)) != 0)
122*7c478bd9Sstevel@tonic-gate goto err;
123*7c478bd9Sstevel@tonic-gate }
124*7c478bd9Sstevel@tonic-gate
125*7c478bd9Sstevel@tonic-gate /* Copy delimiter, length and padding values. */
126*7c478bd9Sstevel@tonic-gate rp->re_delim =
127*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
128*7c478bd9Sstevel@tonic-gate rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
129*7c478bd9Sstevel@tonic-gate
130*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
131*7c478bd9Sstevel@tonic-gate if ((rp->re_len = dbinfo->re_len) == 0) {
132*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv,
133*7c478bd9Sstevel@tonic-gate "record length must be greater than 0");
134*7c478bd9Sstevel@tonic-gate ret = EINVAL;
135*7c478bd9Sstevel@tonic-gate goto err;
136*7c478bd9Sstevel@tonic-gate }
137*7c478bd9Sstevel@tonic-gate } else
138*7c478bd9Sstevel@tonic-gate rp->re_len = 0;
139*7c478bd9Sstevel@tonic-gate }
140*7c478bd9Sstevel@tonic-gate
141*7c478bd9Sstevel@tonic-gate /* Initialize the remaining fields/methods of the DB. */
142*7c478bd9Sstevel@tonic-gate dbp->am_close = __ram_close;
143*7c478bd9Sstevel@tonic-gate dbp->del = __ram_delete;
144*7c478bd9Sstevel@tonic-gate dbp->put = __ram_put;
145*7c478bd9Sstevel@tonic-gate dbp->stat = __bam_stat;
146*7c478bd9Sstevel@tonic-gate dbp->sync = __ram_sync;
147*7c478bd9Sstevel@tonic-gate
148*7c478bd9Sstevel@tonic-gate /* Start up the tree. */
149*7c478bd9Sstevel@tonic-gate if ((ret = __bam_read_root(dbp)) != 0)
150*7c478bd9Sstevel@tonic-gate goto err;
151*7c478bd9Sstevel@tonic-gate
152*7c478bd9Sstevel@tonic-gate /* Set the overflow page size. */
153*7c478bd9Sstevel@tonic-gate __bam_setovflsize(dbp);
154*7c478bd9Sstevel@tonic-gate
155*7c478bd9Sstevel@tonic-gate /* If we're snapshotting an underlying source file, do it now. */
156*7c478bd9Sstevel@tonic-gate if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT)) {
157*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */
158*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
159*7c478bd9Sstevel@tonic-gate goto err;
160*7c478bd9Sstevel@tonic-gate
161*7c478bd9Sstevel@tonic-gate /* Do the snapshot. */
162*7c478bd9Sstevel@tonic-gate if ((ret = __ram_update(dbc,
163*7c478bd9Sstevel@tonic-gate DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND)
164*7c478bd9Sstevel@tonic-gate ret = 0;
165*7c478bd9Sstevel@tonic-gate
166*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */
167*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
168*7c478bd9Sstevel@tonic-gate ret = t_ret;
169*7c478bd9Sstevel@tonic-gate
170*7c478bd9Sstevel@tonic-gate if (ret != 0)
171*7c478bd9Sstevel@tonic-gate goto err;
172*7c478bd9Sstevel@tonic-gate }
173*7c478bd9Sstevel@tonic-gate
174*7c478bd9Sstevel@tonic-gate return (0);
175*7c478bd9Sstevel@tonic-gate
176*7c478bd9Sstevel@tonic-gate err: /* If we mmap'd a source file, discard it. */
177*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL)
178*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize);
179*7c478bd9Sstevel@tonic-gate
180*7c478bd9Sstevel@tonic-gate /* If we opened a source file, discard it. */
181*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1)
182*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd);
183*7c478bd9Sstevel@tonic-gate if (rp->re_source != NULL)
184*7c478bd9Sstevel@tonic-gate __os_freestr(rp->re_source);
185*7c478bd9Sstevel@tonic-gate
186*7c478bd9Sstevel@tonic-gate __os_free(rp, sizeof(*rp));
187*7c478bd9Sstevel@tonic-gate
188*7c478bd9Sstevel@tonic-gate return (ret);
189*7c478bd9Sstevel@tonic-gate }
190*7c478bd9Sstevel@tonic-gate
191*7c478bd9Sstevel@tonic-gate /*
192*7c478bd9Sstevel@tonic-gate * __ram_delete --
193*7c478bd9Sstevel@tonic-gate * Recno db->del function.
194*7c478bd9Sstevel@tonic-gate */
195*7c478bd9Sstevel@tonic-gate static int
__ram_delete(dbp,txn,key,flags)196*7c478bd9Sstevel@tonic-gate __ram_delete(dbp, txn, key, flags)
197*7c478bd9Sstevel@tonic-gate DB *dbp;
198*7c478bd9Sstevel@tonic-gate DB_TXN *txn;
199*7c478bd9Sstevel@tonic-gate DBT *key;
200*7c478bd9Sstevel@tonic-gate u_int32_t flags;
201*7c478bd9Sstevel@tonic-gate {
202*7c478bd9Sstevel@tonic-gate CURSOR *cp;
203*7c478bd9Sstevel@tonic-gate DBC *dbc;
204*7c478bd9Sstevel@tonic-gate db_recno_t recno;
205*7c478bd9Sstevel@tonic-gate int ret, t_ret;
206*7c478bd9Sstevel@tonic-gate
207*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp);
208*7c478bd9Sstevel@tonic-gate
209*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */
210*7c478bd9Sstevel@tonic-gate if ((ret = __db_delchk(dbp,
211*7c478bd9Sstevel@tonic-gate key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
212*7c478bd9Sstevel@tonic-gate return (ret);
213*7c478bd9Sstevel@tonic-gate
214*7c478bd9Sstevel@tonic-gate /* Acquire a cursor. */
215*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
216*7c478bd9Sstevel@tonic-gate return (ret);
217*7c478bd9Sstevel@tonic-gate
218*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags);
219*7c478bd9Sstevel@tonic-gate
220*7c478bd9Sstevel@tonic-gate /* Check the user's record number and fill in as necessary. */
221*7c478bd9Sstevel@tonic-gate if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
222*7c478bd9Sstevel@tonic-gate goto err;
223*7c478bd9Sstevel@tonic-gate
224*7c478bd9Sstevel@tonic-gate /* Do the delete. */
225*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
226*7c478bd9Sstevel@tonic-gate cp->recno = recno;
227*7c478bd9Sstevel@tonic-gate ret = __ram_i_delete(dbc);
228*7c478bd9Sstevel@tonic-gate
229*7c478bd9Sstevel@tonic-gate /* Release the cursor. */
230*7c478bd9Sstevel@tonic-gate err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
231*7c478bd9Sstevel@tonic-gate ret = t_ret;
232*7c478bd9Sstevel@tonic-gate
233*7c478bd9Sstevel@tonic-gate return (ret);
234*7c478bd9Sstevel@tonic-gate }
235*7c478bd9Sstevel@tonic-gate
236*7c478bd9Sstevel@tonic-gate /*
237*7c478bd9Sstevel@tonic-gate * __ram_i_delete --
238*7c478bd9Sstevel@tonic-gate * Internal version of recno delete, called by __ram_delete and
239*7c478bd9Sstevel@tonic-gate * __ram_c_del.
240*7c478bd9Sstevel@tonic-gate */
241*7c478bd9Sstevel@tonic-gate static int
__ram_i_delete(dbc)242*7c478bd9Sstevel@tonic-gate __ram_i_delete(dbc)
243*7c478bd9Sstevel@tonic-gate DBC *dbc;
244*7c478bd9Sstevel@tonic-gate {
245*7c478bd9Sstevel@tonic-gate BKEYDATA bk;
246*7c478bd9Sstevel@tonic-gate BTREE *t;
247*7c478bd9Sstevel@tonic-gate CURSOR *cp;
248*7c478bd9Sstevel@tonic-gate DB *dbp;
249*7c478bd9Sstevel@tonic-gate DBT hdr, data;
250*7c478bd9Sstevel@tonic-gate PAGE *h;
251*7c478bd9Sstevel@tonic-gate db_indx_t indx;
252*7c478bd9Sstevel@tonic-gate int exact, ret, stack;
253*7c478bd9Sstevel@tonic-gate
254*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
255*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
256*7c478bd9Sstevel@tonic-gate t = dbp->internal;
257*7c478bd9Sstevel@tonic-gate stack = 0;
258*7c478bd9Sstevel@tonic-gate
259*7c478bd9Sstevel@tonic-gate /*
260*7c478bd9Sstevel@tonic-gate * If this is CDB and this isn't a write cursor, then it's an error.
261*7c478bd9Sstevel@tonic-gate * If it is a write cursor, but we don't yet hold the write lock, then
262*7c478bd9Sstevel@tonic-gate * we need to upgrade to the write lock.
263*7c478bd9Sstevel@tonic-gate */
264*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) {
265*7c478bd9Sstevel@tonic-gate /* Make sure it's a valid update cursor. */
266*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
267*7c478bd9Sstevel@tonic-gate return (EINVAL);
268*7c478bd9Sstevel@tonic-gate
269*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_RMW) &&
270*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
271*7c478bd9Sstevel@tonic-gate DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
272*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0)
273*7c478bd9Sstevel@tonic-gate return (EAGAIN);
274*7c478bd9Sstevel@tonic-gate }
275*7c478bd9Sstevel@tonic-gate
276*7c478bd9Sstevel@tonic-gate /* Search the tree for the key; delete only deletes exact matches. */
277*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0)
278*7c478bd9Sstevel@tonic-gate goto err;
279*7c478bd9Sstevel@tonic-gate if (!exact) {
280*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND;
281*7c478bd9Sstevel@tonic-gate goto err;
282*7c478bd9Sstevel@tonic-gate }
283*7c478bd9Sstevel@tonic-gate stack = 1;
284*7c478bd9Sstevel@tonic-gate
285*7c478bd9Sstevel@tonic-gate h = cp->csp->page;
286*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx;
287*7c478bd9Sstevel@tonic-gate
288*7c478bd9Sstevel@tonic-gate /*
289*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag can only mean
290*7c478bd9Sstevel@tonic-gate * that this record was implicitly created. Applications aren't
291*7c478bd9Sstevel@tonic-gate * permitted to delete records they never created, return an error.
292*7c478bd9Sstevel@tonic-gate *
293*7c478bd9Sstevel@tonic-gate * If not re-numbering records, the on-page deleted flag means that
294*7c478bd9Sstevel@tonic-gate * this record was implicitly created, or, was deleted at some time.
295*7c478bd9Sstevel@tonic-gate * The former is an error because applications aren't permitted to
296*7c478bd9Sstevel@tonic-gate * delete records they never created, the latter is an error because
297*7c478bd9Sstevel@tonic-gate * if the record was "deleted", we could never have found it.
298*7c478bd9Sstevel@tonic-gate */
299*7c478bd9Sstevel@tonic-gate if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
300*7c478bd9Sstevel@tonic-gate ret = DB_KEYEMPTY;
301*7c478bd9Sstevel@tonic-gate goto err;
302*7c478bd9Sstevel@tonic-gate }
303*7c478bd9Sstevel@tonic-gate
304*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) {
305*7c478bd9Sstevel@tonic-gate /* Delete the item, adjust the counts, adjust the cursors. */
306*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0)
307*7c478bd9Sstevel@tonic-gate goto err;
308*7c478bd9Sstevel@tonic-gate __bam_adjust(dbc, -1);
309*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_DELETE);
310*7c478bd9Sstevel@tonic-gate
311*7c478bd9Sstevel@tonic-gate /*
312*7c478bd9Sstevel@tonic-gate * If the page is empty, delete it. The whole tree is locked
313*7c478bd9Sstevel@tonic-gate * so there are no preparations to make.
314*7c478bd9Sstevel@tonic-gate */
315*7c478bd9Sstevel@tonic-gate if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
316*7c478bd9Sstevel@tonic-gate stack = 0;
317*7c478bd9Sstevel@tonic-gate ret = __bam_dpages(dbc);
318*7c478bd9Sstevel@tonic-gate }
319*7c478bd9Sstevel@tonic-gate } else {
320*7c478bd9Sstevel@tonic-gate /* Use a delete/put pair to replace the record with a marker. */
321*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0)
322*7c478bd9Sstevel@tonic-gate goto err;
323*7c478bd9Sstevel@tonic-gate
324*7c478bd9Sstevel@tonic-gate B_TSET(bk.type, B_KEYDATA, 1);
325*7c478bd9Sstevel@tonic-gate bk.len = 0;
326*7c478bd9Sstevel@tonic-gate memset(&hdr, 0, sizeof(hdr));
327*7c478bd9Sstevel@tonic-gate hdr.data = &bk;
328*7c478bd9Sstevel@tonic-gate hdr.size = SSZA(BKEYDATA, data);
329*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data));
330*7c478bd9Sstevel@tonic-gate data.data = (char *)"";
331*7c478bd9Sstevel@tonic-gate data.size = 0;
332*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc,
333*7c478bd9Sstevel@tonic-gate h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
334*7c478bd9Sstevel@tonic-gate goto err;
335*7c478bd9Sstevel@tonic-gate }
336*7c478bd9Sstevel@tonic-gate F_SET(t->recno, RECNO_MODIFIED);
337*7c478bd9Sstevel@tonic-gate
338*7c478bd9Sstevel@tonic-gate err: if (stack)
339*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, 0);
340*7c478bd9Sstevel@tonic-gate
341*7c478bd9Sstevel@tonic-gate /* If we upgraded the CDB lock upon entry; downgrade it now. */
342*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
343*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
344*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0);
345*7c478bd9Sstevel@tonic-gate return (ret);
346*7c478bd9Sstevel@tonic-gate }
347*7c478bd9Sstevel@tonic-gate
348*7c478bd9Sstevel@tonic-gate /*
349*7c478bd9Sstevel@tonic-gate * __ram_put --
350*7c478bd9Sstevel@tonic-gate * Recno db->put function.
351*7c478bd9Sstevel@tonic-gate */
352*7c478bd9Sstevel@tonic-gate static int
__ram_put(dbp,txn,key,data,flags)353*7c478bd9Sstevel@tonic-gate __ram_put(dbp, txn, key, data, flags)
354*7c478bd9Sstevel@tonic-gate DB *dbp;
355*7c478bd9Sstevel@tonic-gate DB_TXN *txn;
356*7c478bd9Sstevel@tonic-gate DBT *key, *data;
357*7c478bd9Sstevel@tonic-gate u_int32_t flags;
358*7c478bd9Sstevel@tonic-gate {
359*7c478bd9Sstevel@tonic-gate DBC *dbc;
360*7c478bd9Sstevel@tonic-gate db_recno_t recno;
361*7c478bd9Sstevel@tonic-gate int ret, t_ret;
362*7c478bd9Sstevel@tonic-gate
363*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp);
364*7c478bd9Sstevel@tonic-gate
365*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */
366*7c478bd9Sstevel@tonic-gate if ((ret = __db_putchk(dbp,
367*7c478bd9Sstevel@tonic-gate key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0)
368*7c478bd9Sstevel@tonic-gate return (ret);
369*7c478bd9Sstevel@tonic-gate
370*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */
371*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
372*7c478bd9Sstevel@tonic-gate return (ret);
373*7c478bd9Sstevel@tonic-gate
374*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags);
375*7c478bd9Sstevel@tonic-gate
376*7c478bd9Sstevel@tonic-gate /*
377*7c478bd9Sstevel@tonic-gate * If we're appending to the tree, make sure we've read in all of
378*7c478bd9Sstevel@tonic-gate * the backing source file. Otherwise, check the user's record
379*7c478bd9Sstevel@tonic-gate * number and fill in as necessary.
380*7c478bd9Sstevel@tonic-gate */
381*7c478bd9Sstevel@tonic-gate ret = flags == DB_APPEND ?
382*7c478bd9Sstevel@tonic-gate __ram_update(dbc, DB_MAX_RECORDS, 0) :
383*7c478bd9Sstevel@tonic-gate __ram_getno(dbc, key, &recno, 1);
384*7c478bd9Sstevel@tonic-gate
385*7c478bd9Sstevel@tonic-gate /* Add the record. */
386*7c478bd9Sstevel@tonic-gate if (ret == 0)
387*7c478bd9Sstevel@tonic-gate ret = __ram_add(dbc, &recno, data, flags, 0);
388*7c478bd9Sstevel@tonic-gate
389*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */
390*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
391*7c478bd9Sstevel@tonic-gate ret = t_ret;
392*7c478bd9Sstevel@tonic-gate
393*7c478bd9Sstevel@tonic-gate /* Return the record number if we're appending to the tree. */
394*7c478bd9Sstevel@tonic-gate if (ret == 0 && flags == DB_APPEND)
395*7c478bd9Sstevel@tonic-gate *(db_recno_t *)key->data = recno;
396*7c478bd9Sstevel@tonic-gate
397*7c478bd9Sstevel@tonic-gate return (ret);
398*7c478bd9Sstevel@tonic-gate }
399*7c478bd9Sstevel@tonic-gate
400*7c478bd9Sstevel@tonic-gate /*
401*7c478bd9Sstevel@tonic-gate * __ram_sync --
402*7c478bd9Sstevel@tonic-gate * Recno db->sync function.
403*7c478bd9Sstevel@tonic-gate */
404*7c478bd9Sstevel@tonic-gate static int
__ram_sync(dbp,flags)405*7c478bd9Sstevel@tonic-gate __ram_sync(dbp, flags)
406*7c478bd9Sstevel@tonic-gate DB *dbp;
407*7c478bd9Sstevel@tonic-gate u_int32_t flags;
408*7c478bd9Sstevel@tonic-gate {
409*7c478bd9Sstevel@tonic-gate DBC *dbc;
410*7c478bd9Sstevel@tonic-gate int ret, t_ret;
411*7c478bd9Sstevel@tonic-gate
412*7c478bd9Sstevel@tonic-gate /*
413*7c478bd9Sstevel@tonic-gate * Sync the underlying btree.
414*7c478bd9Sstevel@tonic-gate *
415*7c478bd9Sstevel@tonic-gate * !!!
416*7c478bd9Sstevel@tonic-gate * We don't need to do a panic check or flags check, the "real"
417*7c478bd9Sstevel@tonic-gate * sync function does all that for us.
418*7c478bd9Sstevel@tonic-gate */
419*7c478bd9Sstevel@tonic-gate if ((ret = __db_sync(dbp, flags)) != 0)
420*7c478bd9Sstevel@tonic-gate return (ret);
421*7c478bd9Sstevel@tonic-gate
422*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */
423*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
424*7c478bd9Sstevel@tonic-gate return (ret);
425*7c478bd9Sstevel@tonic-gate
426*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, NULL, "ram_sync", NULL, NULL, flags);
427*7c478bd9Sstevel@tonic-gate
428*7c478bd9Sstevel@tonic-gate /* Copy back the backing source file. */
429*7c478bd9Sstevel@tonic-gate ret = __ram_writeback(dbc);
430*7c478bd9Sstevel@tonic-gate
431*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */
432*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
433*7c478bd9Sstevel@tonic-gate ret = t_ret;
434*7c478bd9Sstevel@tonic-gate
435*7c478bd9Sstevel@tonic-gate return (ret);
436*7c478bd9Sstevel@tonic-gate }
437*7c478bd9Sstevel@tonic-gate
438*7c478bd9Sstevel@tonic-gate /*
439*7c478bd9Sstevel@tonic-gate * __ram_close --
440*7c478bd9Sstevel@tonic-gate * Recno db->close function.
441*7c478bd9Sstevel@tonic-gate *
442*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_close __P((DB *));
443*7c478bd9Sstevel@tonic-gate */
444*7c478bd9Sstevel@tonic-gate int
__ram_close(dbp)445*7c478bd9Sstevel@tonic-gate __ram_close(dbp)
446*7c478bd9Sstevel@tonic-gate DB *dbp;
447*7c478bd9Sstevel@tonic-gate {
448*7c478bd9Sstevel@tonic-gate RECNO *rp;
449*7c478bd9Sstevel@tonic-gate
450*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno;
451*7c478bd9Sstevel@tonic-gate
452*7c478bd9Sstevel@tonic-gate /* Close any underlying mmap region. */
453*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL)
454*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize);
455*7c478bd9Sstevel@tonic-gate
456*7c478bd9Sstevel@tonic-gate /* Close any backing source file descriptor. */
457*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1)
458*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd);
459*7c478bd9Sstevel@tonic-gate
460*7c478bd9Sstevel@tonic-gate /* Free any backing source file name. */
461*7c478bd9Sstevel@tonic-gate if (rp->re_source != NULL)
462*7c478bd9Sstevel@tonic-gate __os_freestr(rp->re_source);
463*7c478bd9Sstevel@tonic-gate
464*7c478bd9Sstevel@tonic-gate /* Free allocated memory. */
465*7c478bd9Sstevel@tonic-gate __os_free(rp, sizeof(RECNO));
466*7c478bd9Sstevel@tonic-gate ((BTREE *)dbp->internal)->recno = NULL;
467*7c478bd9Sstevel@tonic-gate
468*7c478bd9Sstevel@tonic-gate /* Close the underlying btree. */
469*7c478bd9Sstevel@tonic-gate return (__bam_close(dbp));
470*7c478bd9Sstevel@tonic-gate }
471*7c478bd9Sstevel@tonic-gate
472*7c478bd9Sstevel@tonic-gate /*
473*7c478bd9Sstevel@tonic-gate * __ram_c_del --
474*7c478bd9Sstevel@tonic-gate * Recno cursor->c_del function.
475*7c478bd9Sstevel@tonic-gate *
476*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t));
477*7c478bd9Sstevel@tonic-gate */
478*7c478bd9Sstevel@tonic-gate int
__ram_c_del(dbc,flags)479*7c478bd9Sstevel@tonic-gate __ram_c_del(dbc, flags)
480*7c478bd9Sstevel@tonic-gate DBC *dbc;
481*7c478bd9Sstevel@tonic-gate u_int32_t flags;
482*7c478bd9Sstevel@tonic-gate {
483*7c478bd9Sstevel@tonic-gate CURSOR *cp;
484*7c478bd9Sstevel@tonic-gate DB *dbp;
485*7c478bd9Sstevel@tonic-gate int ret;
486*7c478bd9Sstevel@tonic-gate
487*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
488*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
489*7c478bd9Sstevel@tonic-gate
490*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp);
491*7c478bd9Sstevel@tonic-gate
492*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */
493*7c478bd9Sstevel@tonic-gate if ((ret = __db_cdelchk(dbp, flags,
494*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
495*7c478bd9Sstevel@tonic-gate return (ret);
496*7c478bd9Sstevel@tonic-gate
497*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "ram_c_del", NULL, NULL, flags);
498*7c478bd9Sstevel@tonic-gate
499*7c478bd9Sstevel@tonic-gate /*
500*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write
501*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer.
502*7c478bd9Sstevel@tonic-gate */
503*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB))
504*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
505*7c478bd9Sstevel@tonic-gate return (EINVAL);
506*7c478bd9Sstevel@tonic-gate
507*7c478bd9Sstevel@tonic-gate /*
508*7c478bd9Sstevel@tonic-gate * The semantics of cursors during delete are as follows: if record
509*7c478bd9Sstevel@tonic-gate * numbers are mutable (DB_RE_RENUMBER is set), deleting a record
510*7c478bd9Sstevel@tonic-gate * causes the cursor to automatically point to the record immediately
511*7c478bd9Sstevel@tonic-gate * following. In this case it is possible to use a single cursor for
512*7c478bd9Sstevel@tonic-gate * repeated delete operations, without intervening operations.
513*7c478bd9Sstevel@tonic-gate *
514*7c478bd9Sstevel@tonic-gate * If record numbers are not mutable, then records are replaced with
515*7c478bd9Sstevel@tonic-gate * a marker containing a delete flag. If the record referenced by
516*7c478bd9Sstevel@tonic-gate * this cursor has already been deleted, we will detect that as part
517*7c478bd9Sstevel@tonic-gate * of the delete operation, and fail.
518*7c478bd9Sstevel@tonic-gate */
519*7c478bd9Sstevel@tonic-gate return (__ram_i_delete(dbc));
520*7c478bd9Sstevel@tonic-gate }
521*7c478bd9Sstevel@tonic-gate
522*7c478bd9Sstevel@tonic-gate /*
523*7c478bd9Sstevel@tonic-gate * __ram_c_get --
524*7c478bd9Sstevel@tonic-gate * Recno cursor->c_get function.
525*7c478bd9Sstevel@tonic-gate *
526*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
527*7c478bd9Sstevel@tonic-gate */
528*7c478bd9Sstevel@tonic-gate int
__ram_c_get(dbc,key,data,flags)529*7c478bd9Sstevel@tonic-gate __ram_c_get(dbc, key, data, flags)
530*7c478bd9Sstevel@tonic-gate DBC *dbc;
531*7c478bd9Sstevel@tonic-gate DBT *key, *data;
532*7c478bd9Sstevel@tonic-gate u_int32_t flags;
533*7c478bd9Sstevel@tonic-gate {
534*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy;
535*7c478bd9Sstevel@tonic-gate DB *dbp;
536*7c478bd9Sstevel@tonic-gate PAGE *h;
537*7c478bd9Sstevel@tonic-gate db_indx_t indx;
538*7c478bd9Sstevel@tonic-gate int exact, ret, stack, tmp_rmw;
539*7c478bd9Sstevel@tonic-gate
540*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
541*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
542*7c478bd9Sstevel@tonic-gate
543*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp);
544*7c478bd9Sstevel@tonic-gate
545*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */
546*7c478bd9Sstevel@tonic-gate if ((ret = __db_cgetchk(dbc->dbp,
547*7c478bd9Sstevel@tonic-gate key, data, flags, cp->recno != RECNO_OOB)) != 0)
548*7c478bd9Sstevel@tonic-gate return (ret);
549*7c478bd9Sstevel@tonic-gate
550*7c478bd9Sstevel@tonic-gate /* Clear OR'd in additional bits so we can check for flag equality. */
551*7c478bd9Sstevel@tonic-gate tmp_rmw = 0;
552*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_RMW)) {
553*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbp, DB_AM_CDB)) {
554*7c478bd9Sstevel@tonic-gate tmp_rmw = 1;
555*7c478bd9Sstevel@tonic-gate F_SET(dbc, DBC_RMW);
556*7c478bd9Sstevel@tonic-gate }
557*7c478bd9Sstevel@tonic-gate LF_CLR(DB_RMW);
558*7c478bd9Sstevel@tonic-gate }
559*7c478bd9Sstevel@tonic-gate
560*7c478bd9Sstevel@tonic-gate DEBUG_LREAD(dbc, dbc->txn, "ram_c_get",
561*7c478bd9Sstevel@tonic-gate flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
562*7c478bd9Sstevel@tonic-gate
563*7c478bd9Sstevel@tonic-gate /* Initialize the cursor for a new retrieval. */
564*7c478bd9Sstevel@tonic-gate copy = *cp;
565*7c478bd9Sstevel@tonic-gate
566*7c478bd9Sstevel@tonic-gate retry: /* Update the record number. */
567*7c478bd9Sstevel@tonic-gate stack = 0;
568*7c478bd9Sstevel@tonic-gate switch (flags) {
569*7c478bd9Sstevel@tonic-gate case DB_CURRENT:
570*7c478bd9Sstevel@tonic-gate /*
571*7c478bd9Sstevel@tonic-gate * If record numbers are mutable: if we just deleted a record,
572*7c478bd9Sstevel@tonic-gate * there is no action necessary, we return the record following
573*7c478bd9Sstevel@tonic-gate * the deleted item by virtue of renumbering the tree.
574*7c478bd9Sstevel@tonic-gate */
575*7c478bd9Sstevel@tonic-gate break;
576*7c478bd9Sstevel@tonic-gate case DB_NEXT:
577*7c478bd9Sstevel@tonic-gate /*
578*7c478bd9Sstevel@tonic-gate * If record numbers are mutable: if we just deleted a record,
579*7c478bd9Sstevel@tonic-gate * we have to avoid incrementing the record number so that we
580*7c478bd9Sstevel@tonic-gate * return the right record by virtue of renumbering the tree.
581*7c478bd9Sstevel@tonic-gate */
582*7c478bd9Sstevel@tonic-gate if (CD_ISSET(dbp, cp))
583*7c478bd9Sstevel@tonic-gate break;
584*7c478bd9Sstevel@tonic-gate
585*7c478bd9Sstevel@tonic-gate if (cp->recno != RECNO_OOB) {
586*7c478bd9Sstevel@tonic-gate ++cp->recno;
587*7c478bd9Sstevel@tonic-gate break;
588*7c478bd9Sstevel@tonic-gate }
589*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */
590*7c478bd9Sstevel@tonic-gate case DB_FIRST:
591*7c478bd9Sstevel@tonic-gate flags = DB_NEXT;
592*7c478bd9Sstevel@tonic-gate cp->recno = 1;
593*7c478bd9Sstevel@tonic-gate break;
594*7c478bd9Sstevel@tonic-gate case DB_PREV:
595*7c478bd9Sstevel@tonic-gate if (cp->recno != RECNO_OOB) {
596*7c478bd9Sstevel@tonic-gate if (cp->recno == 1) {
597*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND;
598*7c478bd9Sstevel@tonic-gate goto err;
599*7c478bd9Sstevel@tonic-gate }
600*7c478bd9Sstevel@tonic-gate --cp->recno;
601*7c478bd9Sstevel@tonic-gate break;
602*7c478bd9Sstevel@tonic-gate }
603*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */
604*7c478bd9Sstevel@tonic-gate case DB_LAST:
605*7c478bd9Sstevel@tonic-gate flags = DB_PREV;
606*7c478bd9Sstevel@tonic-gate if (((ret = __ram_update(dbc,
607*7c478bd9Sstevel@tonic-gate DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND)
608*7c478bd9Sstevel@tonic-gate goto err;
609*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
610*7c478bd9Sstevel@tonic-gate goto err;
611*7c478bd9Sstevel@tonic-gate if (cp->recno == 0) {
612*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND;
613*7c478bd9Sstevel@tonic-gate goto err;
614*7c478bd9Sstevel@tonic-gate }
615*7c478bd9Sstevel@tonic-gate break;
616*7c478bd9Sstevel@tonic-gate case DB_SET:
617*7c478bd9Sstevel@tonic-gate case DB_SET_RANGE:
618*7c478bd9Sstevel@tonic-gate if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0)
619*7c478bd9Sstevel@tonic-gate goto err;
620*7c478bd9Sstevel@tonic-gate break;
621*7c478bd9Sstevel@tonic-gate }
622*7c478bd9Sstevel@tonic-gate
623*7c478bd9Sstevel@tonic-gate /* Return the key if the user didn't give us one. */
624*7c478bd9Sstevel@tonic-gate if (flags != DB_SET && flags != DB_SET_RANGE &&
625*7c478bd9Sstevel@tonic-gate (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
626*7c478bd9Sstevel@tonic-gate &dbc->rkey.data, &dbc->rkey.ulen, dbp->db_malloc)) != 0)
627*7c478bd9Sstevel@tonic-gate goto err;
628*7c478bd9Sstevel@tonic-gate
629*7c478bd9Sstevel@tonic-gate /* Search the tree for the record. */
630*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno,
631*7c478bd9Sstevel@tonic-gate F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &exact)) != 0)
632*7c478bd9Sstevel@tonic-gate goto err;
633*7c478bd9Sstevel@tonic-gate stack = 1;
634*7c478bd9Sstevel@tonic-gate if (!exact) {
635*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND;
636*7c478bd9Sstevel@tonic-gate goto err;
637*7c478bd9Sstevel@tonic-gate }
638*7c478bd9Sstevel@tonic-gate h = cp->csp->page;
639*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx;
640*7c478bd9Sstevel@tonic-gate
641*7c478bd9Sstevel@tonic-gate /*
642*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag means this record
643*7c478bd9Sstevel@tonic-gate * was implicitly created. If not re-numbering records, the on-page
644*7c478bd9Sstevel@tonic-gate * deleted flag means this record was implicitly created, or, it was
645*7c478bd9Sstevel@tonic-gate * deleted at some time. Regardless, we skip such records if doing
646*7c478bd9Sstevel@tonic-gate * cursor next/prev operations, and fail if the application requested
647*7c478bd9Sstevel@tonic-gate * them explicitly.
648*7c478bd9Sstevel@tonic-gate */
649*7c478bd9Sstevel@tonic-gate if (B_DISSET(GET_BKEYDATA(h, indx)->type)) {
650*7c478bd9Sstevel@tonic-gate if (flags == DB_NEXT || flags == DB_PREV) {
651*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0);
652*7c478bd9Sstevel@tonic-gate goto retry;
653*7c478bd9Sstevel@tonic-gate }
654*7c478bd9Sstevel@tonic-gate ret = DB_KEYEMPTY;
655*7c478bd9Sstevel@tonic-gate goto err;
656*7c478bd9Sstevel@tonic-gate }
657*7c478bd9Sstevel@tonic-gate
658*7c478bd9Sstevel@tonic-gate /* Return the data item. */
659*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp,
660*7c478bd9Sstevel@tonic-gate h, indx, data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
661*7c478bd9Sstevel@tonic-gate goto err;
662*7c478bd9Sstevel@tonic-gate
663*7c478bd9Sstevel@tonic-gate /* The cursor was reset, no further delete adjustment is necessary. */
664*7c478bd9Sstevel@tonic-gate CD_CLR(dbp, cp);
665*7c478bd9Sstevel@tonic-gate
666*7c478bd9Sstevel@tonic-gate err: if (stack)
667*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0);
668*7c478bd9Sstevel@tonic-gate
669*7c478bd9Sstevel@tonic-gate /* Release temporary lock upgrade. */
670*7c478bd9Sstevel@tonic-gate if (tmp_rmw)
671*7c478bd9Sstevel@tonic-gate F_CLR(dbc, DBC_RMW);
672*7c478bd9Sstevel@tonic-gate
673*7c478bd9Sstevel@tonic-gate if (ret != 0)
674*7c478bd9Sstevel@tonic-gate *cp = copy;
675*7c478bd9Sstevel@tonic-gate
676*7c478bd9Sstevel@tonic-gate return (ret);
677*7c478bd9Sstevel@tonic-gate }
678*7c478bd9Sstevel@tonic-gate
679*7c478bd9Sstevel@tonic-gate /*
680*7c478bd9Sstevel@tonic-gate * __ram_c_put --
681*7c478bd9Sstevel@tonic-gate * Recno cursor->c_put function.
682*7c478bd9Sstevel@tonic-gate *
683*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
684*7c478bd9Sstevel@tonic-gate */
685*7c478bd9Sstevel@tonic-gate int
__ram_c_put(dbc,key,data,flags)686*7c478bd9Sstevel@tonic-gate __ram_c_put(dbc, key, data, flags)
687*7c478bd9Sstevel@tonic-gate DBC *dbc;
688*7c478bd9Sstevel@tonic-gate DBT *key, *data;
689*7c478bd9Sstevel@tonic-gate u_int32_t flags;
690*7c478bd9Sstevel@tonic-gate {
691*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy;
692*7c478bd9Sstevel@tonic-gate DB *dbp;
693*7c478bd9Sstevel@tonic-gate int exact, ret;
694*7c478bd9Sstevel@tonic-gate void *arg;
695*7c478bd9Sstevel@tonic-gate
696*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
697*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
698*7c478bd9Sstevel@tonic-gate
699*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp);
700*7c478bd9Sstevel@tonic-gate
701*7c478bd9Sstevel@tonic-gate if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
702*7c478bd9Sstevel@tonic-gate F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
703*7c478bd9Sstevel@tonic-gate return (ret);
704*7c478bd9Sstevel@tonic-gate
705*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "ram_c_put", NULL, data, flags);
706*7c478bd9Sstevel@tonic-gate
707*7c478bd9Sstevel@tonic-gate /*
708*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write
709*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer. If it's a regular writer,
710*7c478bd9Sstevel@tonic-gate * that means we have an IWRITE lock and we need to upgrade
711*7c478bd9Sstevel@tonic-gate * it to a write lock.
712*7c478bd9Sstevel@tonic-gate */
713*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) {
714*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
715*7c478bd9Sstevel@tonic-gate return (EINVAL);
716*7c478bd9Sstevel@tonic-gate
717*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_RMW) &&
718*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
719*7c478bd9Sstevel@tonic-gate DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
720*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0)
721*7c478bd9Sstevel@tonic-gate return (EAGAIN);
722*7c478bd9Sstevel@tonic-gate }
723*7c478bd9Sstevel@tonic-gate
724*7c478bd9Sstevel@tonic-gate /* Initialize the cursor for a new retrieval. */
725*7c478bd9Sstevel@tonic-gate copy = *cp;
726*7c478bd9Sstevel@tonic-gate
727*7c478bd9Sstevel@tonic-gate /*
728*7c478bd9Sstevel@tonic-gate * To split, we need a valid key for the page. Since it's a cursor,
729*7c478bd9Sstevel@tonic-gate * we have to build one.
730*7c478bd9Sstevel@tonic-gate *
731*7c478bd9Sstevel@tonic-gate * The split code discards all short-term locks and stack pages.
732*7c478bd9Sstevel@tonic-gate */
733*7c478bd9Sstevel@tonic-gate if (0) {
734*7c478bd9Sstevel@tonic-gate split: arg = &cp->recno;
735*7c478bd9Sstevel@tonic-gate if ((ret = __bam_split(dbc, arg)) != 0)
736*7c478bd9Sstevel@tonic-gate goto err;
737*7c478bd9Sstevel@tonic-gate }
738*7c478bd9Sstevel@tonic-gate
739*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
740*7c478bd9Sstevel@tonic-gate goto err;
741*7c478bd9Sstevel@tonic-gate if (!exact) {
742*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND;
743*7c478bd9Sstevel@tonic-gate goto err;
744*7c478bd9Sstevel@tonic-gate }
745*7c478bd9Sstevel@tonic-gate if ((ret = __bam_iitem(dbc, &cp->csp->page,
746*7c478bd9Sstevel@tonic-gate &cp->csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
747*7c478bd9Sstevel@tonic-gate if ((ret = __bam_stkrel(dbc, 0)) != 0)
748*7c478bd9Sstevel@tonic-gate goto err;
749*7c478bd9Sstevel@tonic-gate goto split;
750*7c478bd9Sstevel@tonic-gate }
751*7c478bd9Sstevel@tonic-gate if ((ret = __bam_stkrel(dbc, 0)) != 0)
752*7c478bd9Sstevel@tonic-gate goto err;
753*7c478bd9Sstevel@tonic-gate
754*7c478bd9Sstevel@tonic-gate switch (flags) {
755*7c478bd9Sstevel@tonic-gate case DB_AFTER:
756*7c478bd9Sstevel@tonic-gate /* Adjust the cursors. */
757*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_IAFTER);
758*7c478bd9Sstevel@tonic-gate
759*7c478bd9Sstevel@tonic-gate /* Set this cursor to reference the new record. */
760*7c478bd9Sstevel@tonic-gate cp->recno = copy.recno + 1;
761*7c478bd9Sstevel@tonic-gate break;
762*7c478bd9Sstevel@tonic-gate case DB_BEFORE:
763*7c478bd9Sstevel@tonic-gate /* Adjust the cursors. */
764*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_IBEFORE);
765*7c478bd9Sstevel@tonic-gate
766*7c478bd9Sstevel@tonic-gate /* Set this cursor to reference the new record. */
767*7c478bd9Sstevel@tonic-gate cp->recno = copy.recno;
768*7c478bd9Sstevel@tonic-gate break;
769*7c478bd9Sstevel@tonic-gate }
770*7c478bd9Sstevel@tonic-gate
771*7c478bd9Sstevel@tonic-gate /* The cursor was reset, no further delete adjustment is necessary. */
772*7c478bd9Sstevel@tonic-gate CD_CLR(dbp, cp);
773*7c478bd9Sstevel@tonic-gate
774*7c478bd9Sstevel@tonic-gate err: if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
775*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
776*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0);
777*7c478bd9Sstevel@tonic-gate
778*7c478bd9Sstevel@tonic-gate if (ret != 0)
779*7c478bd9Sstevel@tonic-gate *cp = copy;
780*7c478bd9Sstevel@tonic-gate
781*7c478bd9Sstevel@tonic-gate return (ret);
782*7c478bd9Sstevel@tonic-gate }
783*7c478bd9Sstevel@tonic-gate
784*7c478bd9Sstevel@tonic-gate /*
785*7c478bd9Sstevel@tonic-gate * __ram_ca --
786*7c478bd9Sstevel@tonic-gate * Adjust cursors.
787*7c478bd9Sstevel@tonic-gate *
788*7c478bd9Sstevel@tonic-gate * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
789*7c478bd9Sstevel@tonic-gate */
790*7c478bd9Sstevel@tonic-gate void
__ram_ca(dbp,recno,op)791*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, recno, op)
792*7c478bd9Sstevel@tonic-gate DB *dbp;
793*7c478bd9Sstevel@tonic-gate db_recno_t recno;
794*7c478bd9Sstevel@tonic-gate ca_recno_arg op;
795*7c478bd9Sstevel@tonic-gate {
796*7c478bd9Sstevel@tonic-gate CURSOR *cp;
797*7c478bd9Sstevel@tonic-gate DBC *dbc;
798*7c478bd9Sstevel@tonic-gate
799*7c478bd9Sstevel@tonic-gate /*
800*7c478bd9Sstevel@tonic-gate * Adjust the cursors. See the comment in __bam_ca_delete().
801*7c478bd9Sstevel@tonic-gate */
802*7c478bd9Sstevel@tonic-gate DB_THREAD_LOCK(dbp);
803*7c478bd9Sstevel@tonic-gate for (dbc = TAILQ_FIRST(&dbp->active_queue);
804*7c478bd9Sstevel@tonic-gate dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
805*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
806*7c478bd9Sstevel@tonic-gate switch (op) {
807*7c478bd9Sstevel@tonic-gate case CA_DELETE:
808*7c478bd9Sstevel@tonic-gate if (recno > cp->recno)
809*7c478bd9Sstevel@tonic-gate --cp->recno;
810*7c478bd9Sstevel@tonic-gate if (recno == cp->recno)
811*7c478bd9Sstevel@tonic-gate CD_SET(dbp, cp);
812*7c478bd9Sstevel@tonic-gate break;
813*7c478bd9Sstevel@tonic-gate case CA_IAFTER:
814*7c478bd9Sstevel@tonic-gate if (recno > cp->recno)
815*7c478bd9Sstevel@tonic-gate ++cp->recno;
816*7c478bd9Sstevel@tonic-gate break;
817*7c478bd9Sstevel@tonic-gate case CA_IBEFORE:
818*7c478bd9Sstevel@tonic-gate if (recno >= cp->recno)
819*7c478bd9Sstevel@tonic-gate ++cp->recno;
820*7c478bd9Sstevel@tonic-gate break;
821*7c478bd9Sstevel@tonic-gate }
822*7c478bd9Sstevel@tonic-gate }
823*7c478bd9Sstevel@tonic-gate DB_THREAD_UNLOCK(dbp);
824*7c478bd9Sstevel@tonic-gate }
825*7c478bd9Sstevel@tonic-gate
826*7c478bd9Sstevel@tonic-gate /*
827*7c478bd9Sstevel@tonic-gate * __ram_getno --
828*7c478bd9Sstevel@tonic-gate * Check the user's record number, and make sure we've seen it.
829*7c478bd9Sstevel@tonic-gate *
830*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
831*7c478bd9Sstevel@tonic-gate */
832*7c478bd9Sstevel@tonic-gate int
__ram_getno(dbc,key,rep,can_create)833*7c478bd9Sstevel@tonic-gate __ram_getno(dbc, key, rep, can_create)
834*7c478bd9Sstevel@tonic-gate DBC *dbc;
835*7c478bd9Sstevel@tonic-gate const DBT *key;
836*7c478bd9Sstevel@tonic-gate db_recno_t *rep;
837*7c478bd9Sstevel@tonic-gate int can_create;
838*7c478bd9Sstevel@tonic-gate {
839*7c478bd9Sstevel@tonic-gate DB *dbp;
840*7c478bd9Sstevel@tonic-gate db_recno_t recno;
841*7c478bd9Sstevel@tonic-gate
842*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
843*7c478bd9Sstevel@tonic-gate
844*7c478bd9Sstevel@tonic-gate /* Check the user's record number. */
845*7c478bd9Sstevel@tonic-gate if ((recno = *(db_recno_t *)key->data) == 0) {
846*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "illegal record number of 0");
847*7c478bd9Sstevel@tonic-gate return (EINVAL);
848*7c478bd9Sstevel@tonic-gate }
849*7c478bd9Sstevel@tonic-gate if (rep != NULL)
850*7c478bd9Sstevel@tonic-gate *rep = recno;
851*7c478bd9Sstevel@tonic-gate
852*7c478bd9Sstevel@tonic-gate /*
853*7c478bd9Sstevel@tonic-gate * Btree can neither create records nor read them in. Recno can
854*7c478bd9Sstevel@tonic-gate * do both, see if we can find the record.
855*7c478bd9Sstevel@tonic-gate */
856*7c478bd9Sstevel@tonic-gate return (dbp->type == DB_RECNO ?
857*7c478bd9Sstevel@tonic-gate __ram_update(dbc, recno, can_create) : 0);
858*7c478bd9Sstevel@tonic-gate }
859*7c478bd9Sstevel@tonic-gate
860*7c478bd9Sstevel@tonic-gate /*
861*7c478bd9Sstevel@tonic-gate * __ram_update --
862*7c478bd9Sstevel@tonic-gate * Ensure the tree has records up to and including the specified one.
863*7c478bd9Sstevel@tonic-gate */
864*7c478bd9Sstevel@tonic-gate static int
__ram_update(dbc,recno,can_create)865*7c478bd9Sstevel@tonic-gate __ram_update(dbc, recno, can_create)
866*7c478bd9Sstevel@tonic-gate DBC *dbc;
867*7c478bd9Sstevel@tonic-gate db_recno_t recno;
868*7c478bd9Sstevel@tonic-gate int can_create;
869*7c478bd9Sstevel@tonic-gate {
870*7c478bd9Sstevel@tonic-gate BTREE *t;
871*7c478bd9Sstevel@tonic-gate DB *dbp;
872*7c478bd9Sstevel@tonic-gate RECNO *rp;
873*7c478bd9Sstevel@tonic-gate db_recno_t nrecs;
874*7c478bd9Sstevel@tonic-gate int ret;
875*7c478bd9Sstevel@tonic-gate
876*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
877*7c478bd9Sstevel@tonic-gate t = dbp->internal;
878*7c478bd9Sstevel@tonic-gate rp = t->recno;
879*7c478bd9Sstevel@tonic-gate
880*7c478bd9Sstevel@tonic-gate /*
881*7c478bd9Sstevel@tonic-gate * If we can't create records and we've read the entire backing input
882*7c478bd9Sstevel@tonic-gate * file, we're done.
883*7c478bd9Sstevel@tonic-gate */
884*7c478bd9Sstevel@tonic-gate if (!can_create && F_ISSET(rp, RECNO_EOF))
885*7c478bd9Sstevel@tonic-gate return (0);
886*7c478bd9Sstevel@tonic-gate
887*7c478bd9Sstevel@tonic-gate /*
888*7c478bd9Sstevel@tonic-gate * If we haven't seen this record yet, try to get it from the original
889*7c478bd9Sstevel@tonic-gate * file.
890*7c478bd9Sstevel@tonic-gate */
891*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
892*7c478bd9Sstevel@tonic-gate return (ret);
893*7c478bd9Sstevel@tonic-gate if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
894*7c478bd9Sstevel@tonic-gate if ((ret = rp->re_irec(dbc, recno)) != 0)
895*7c478bd9Sstevel@tonic-gate return (ret);
896*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
897*7c478bd9Sstevel@tonic-gate return (ret);
898*7c478bd9Sstevel@tonic-gate }
899*7c478bd9Sstevel@tonic-gate
900*7c478bd9Sstevel@tonic-gate /*
901*7c478bd9Sstevel@tonic-gate * If we can create records, create empty ones up to the requested
902*7c478bd9Sstevel@tonic-gate * record.
903*7c478bd9Sstevel@tonic-gate */
904*7c478bd9Sstevel@tonic-gate if (!can_create || recno <= nrecs + 1)
905*7c478bd9Sstevel@tonic-gate return (0);
906*7c478bd9Sstevel@tonic-gate
907*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0;
908*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0;
909*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0;
910*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
911*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) {
912*7c478bd9Sstevel@tonic-gate if ((ret =
913*7c478bd9Sstevel@tonic-gate __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
914*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0;
915*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL;
916*7c478bd9Sstevel@tonic-gate return (ret);
917*7c478bd9Sstevel@tonic-gate }
918*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len;
919*7c478bd9Sstevel@tonic-gate }
920*7c478bd9Sstevel@tonic-gate dbc->rdata.size = rp->re_len;
921*7c478bd9Sstevel@tonic-gate memset(dbc->rdata.data, rp->re_pad, rp->re_len);
922*7c478bd9Sstevel@tonic-gate } else
923*7c478bd9Sstevel@tonic-gate dbc->rdata.size = 0;
924*7c478bd9Sstevel@tonic-gate
925*7c478bd9Sstevel@tonic-gate while (recno > ++nrecs)
926*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc,
927*7c478bd9Sstevel@tonic-gate &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0)
928*7c478bd9Sstevel@tonic-gate return (ret);
929*7c478bd9Sstevel@tonic-gate return (0);
930*7c478bd9Sstevel@tonic-gate }
931*7c478bd9Sstevel@tonic-gate
932*7c478bd9Sstevel@tonic-gate /*
933*7c478bd9Sstevel@tonic-gate * __ram_source --
934*7c478bd9Sstevel@tonic-gate * Load information about the backing file.
935*7c478bd9Sstevel@tonic-gate */
936*7c478bd9Sstevel@tonic-gate static int
__ram_source(dbp,rp,fname)937*7c478bd9Sstevel@tonic-gate __ram_source(dbp, rp, fname)
938*7c478bd9Sstevel@tonic-gate DB *dbp;
939*7c478bd9Sstevel@tonic-gate RECNO *rp;
940*7c478bd9Sstevel@tonic-gate const char *fname;
941*7c478bd9Sstevel@tonic-gate {
942*7c478bd9Sstevel@tonic-gate size_t size;
943*7c478bd9Sstevel@tonic-gate u_int32_t bytes, mbytes, oflags;
944*7c478bd9Sstevel@tonic-gate int ret;
945*7c478bd9Sstevel@tonic-gate
946*7c478bd9Sstevel@tonic-gate /*
947*7c478bd9Sstevel@tonic-gate * !!!
948*7c478bd9Sstevel@tonic-gate * The caller has full responsibility for cleaning up on error --
949*7c478bd9Sstevel@tonic-gate * (it has to anyway, in case it fails after this routine succeeds).
950*7c478bd9Sstevel@tonic-gate */
951*7c478bd9Sstevel@tonic-gate if ((ret = __db_appname(dbp->dbenv,
952*7c478bd9Sstevel@tonic-gate DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0)
953*7c478bd9Sstevel@tonic-gate return (ret);
954*7c478bd9Sstevel@tonic-gate
955*7c478bd9Sstevel@tonic-gate oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
956*7c478bd9Sstevel@tonic-gate if ((ret =
957*7c478bd9Sstevel@tonic-gate __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
958*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
959*7c478bd9Sstevel@tonic-gate return (ret);
960*7c478bd9Sstevel@tonic-gate }
961*7c478bd9Sstevel@tonic-gate
962*7c478bd9Sstevel@tonic-gate /*
963*7c478bd9Sstevel@tonic-gate * XXX
964*7c478bd9Sstevel@tonic-gate * We'd like to test to see if the file is too big to mmap. Since we
965*7c478bd9Sstevel@tonic-gate * don't know what size or type off_t's or size_t's are, or the largest
966*7c478bd9Sstevel@tonic-gate * unsigned integral type is, or what random insanity the local C
967*7c478bd9Sstevel@tonic-gate * compiler will perpetrate, doing the comparison in a portable way is
968*7c478bd9Sstevel@tonic-gate * flatly impossible. Hope that mmap fails if the file is too large.
969*7c478bd9Sstevel@tonic-gate */
970*7c478bd9Sstevel@tonic-gate if ((ret = __os_ioinfo(rp->re_source,
971*7c478bd9Sstevel@tonic-gate rp->re_fd, &mbytes, &bytes, NULL)) != 0) {
972*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
973*7c478bd9Sstevel@tonic-gate return (ret);
974*7c478bd9Sstevel@tonic-gate }
975*7c478bd9Sstevel@tonic-gate if (mbytes == 0 && bytes == 0) {
976*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF);
977*7c478bd9Sstevel@tonic-gate return (0);
978*7c478bd9Sstevel@tonic-gate }
979*7c478bd9Sstevel@tonic-gate
980*7c478bd9Sstevel@tonic-gate size = mbytes * MEGABYTE + bytes;
981*7c478bd9Sstevel@tonic-gate if ((ret = __db_mapfile(rp->re_source,
982*7c478bd9Sstevel@tonic-gate rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0)
983*7c478bd9Sstevel@tonic-gate return (ret);
984*7c478bd9Sstevel@tonic-gate rp->re_cmap = rp->re_smap;
985*7c478bd9Sstevel@tonic-gate rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
986*7c478bd9Sstevel@tonic-gate rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ? __ram_fmap : __ram_vmap;
987*7c478bd9Sstevel@tonic-gate return (0);
988*7c478bd9Sstevel@tonic-gate }
989*7c478bd9Sstevel@tonic-gate
990*7c478bd9Sstevel@tonic-gate /*
991*7c478bd9Sstevel@tonic-gate * __ram_writeback --
992*7c478bd9Sstevel@tonic-gate * Rewrite the backing file.
993*7c478bd9Sstevel@tonic-gate */
994*7c478bd9Sstevel@tonic-gate static int
__ram_writeback(dbc)995*7c478bd9Sstevel@tonic-gate __ram_writeback(dbc)
996*7c478bd9Sstevel@tonic-gate DBC *dbc;
997*7c478bd9Sstevel@tonic-gate {
998*7c478bd9Sstevel@tonic-gate DB *dbp;
999*7c478bd9Sstevel@tonic-gate DBT key, data;
1000*7c478bd9Sstevel@tonic-gate RECNO *rp;
1001*7c478bd9Sstevel@tonic-gate db_recno_t keyno;
1002*7c478bd9Sstevel@tonic-gate ssize_t nw;
1003*7c478bd9Sstevel@tonic-gate int fd, ret, t_ret;
1004*7c478bd9Sstevel@tonic-gate u_int8_t delim, *pad;
1005*7c478bd9Sstevel@tonic-gate
1006*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
1007*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno;
1008*7c478bd9Sstevel@tonic-gate
1009*7c478bd9Sstevel@tonic-gate /* If the file wasn't modified, we're done. */
1010*7c478bd9Sstevel@tonic-gate if (!F_ISSET(rp, RECNO_MODIFIED))
1011*7c478bd9Sstevel@tonic-gate return (0);
1012*7c478bd9Sstevel@tonic-gate
1013*7c478bd9Sstevel@tonic-gate /* If there's no backing source file, we're done. */
1014*7c478bd9Sstevel@tonic-gate if (rp->re_source == NULL) {
1015*7c478bd9Sstevel@tonic-gate F_CLR(rp, RECNO_MODIFIED);
1016*7c478bd9Sstevel@tonic-gate return (0);
1017*7c478bd9Sstevel@tonic-gate }
1018*7c478bd9Sstevel@tonic-gate
1019*7c478bd9Sstevel@tonic-gate /*
1020*7c478bd9Sstevel@tonic-gate * Read any remaining records into the tree.
1021*7c478bd9Sstevel@tonic-gate *
1022*7c478bd9Sstevel@tonic-gate * !!!
1023*7c478bd9Sstevel@tonic-gate * This is why we can't support transactions when applications specify
1024*7c478bd9Sstevel@tonic-gate * backing (re_source) files. At this point we have to read in the
1025*7c478bd9Sstevel@tonic-gate * rest of the records from the file so that we can write all of the
1026*7c478bd9Sstevel@tonic-gate * records back out again, which could modify a page for which we'd
1027*7c478bd9Sstevel@tonic-gate * have to log changes and which we don't have locked. This could be
1028*7c478bd9Sstevel@tonic-gate * partially fixed by taking a snapshot of the entire file during the
1029*7c478bd9Sstevel@tonic-gate * db_open(), or, since db_open() isn't transaction protected, as part
1030*7c478bd9Sstevel@tonic-gate * of the first DB operation. But, if a checkpoint occurs then, the
1031*7c478bd9Sstevel@tonic-gate * part of the log holding the copy of the file could be discarded, and
1032*7c478bd9Sstevel@tonic-gate * that would make it impossible to recover in the face of disaster.
1033*7c478bd9Sstevel@tonic-gate * This could all probably be fixed, but it would require transaction
1034*7c478bd9Sstevel@tonic-gate * protecting the backing source file, i.e. mpool would have to know
1035*7c478bd9Sstevel@tonic-gate * about it, and we don't want to go there.
1036*7c478bd9Sstevel@tonic-gate */
1037*7c478bd9Sstevel@tonic-gate if ((ret =
1038*7c478bd9Sstevel@tonic-gate __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND)
1039*7c478bd9Sstevel@tonic-gate return (ret);
1040*7c478bd9Sstevel@tonic-gate
1041*7c478bd9Sstevel@tonic-gate /*
1042*7c478bd9Sstevel@tonic-gate * !!!
1043*7c478bd9Sstevel@tonic-gate * Close any underlying mmap region. This is required for Windows NT
1044*7c478bd9Sstevel@tonic-gate * (4.0, Service Pack 2) -- if the file is still mapped, the following
1045*7c478bd9Sstevel@tonic-gate * open will fail.
1046*7c478bd9Sstevel@tonic-gate */
1047*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL) {
1048*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize);
1049*7c478bd9Sstevel@tonic-gate rp->re_smap = NULL;
1050*7c478bd9Sstevel@tonic-gate }
1051*7c478bd9Sstevel@tonic-gate
1052*7c478bd9Sstevel@tonic-gate /* Get rid of any backing file descriptor, just on GP's. */
1053*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1) {
1054*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd);
1055*7c478bd9Sstevel@tonic-gate rp->re_fd = -1;
1056*7c478bd9Sstevel@tonic-gate }
1057*7c478bd9Sstevel@tonic-gate
1058*7c478bd9Sstevel@tonic-gate /* Open the file, truncating it. */
1059*7c478bd9Sstevel@tonic-gate if ((ret = __db_open(rp->re_source,
1060*7c478bd9Sstevel@tonic-gate DB_SEQUENTIAL | DB_TRUNCATE,
1061*7c478bd9Sstevel@tonic-gate DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
1062*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
1063*7c478bd9Sstevel@tonic-gate return (ret);
1064*7c478bd9Sstevel@tonic-gate }
1065*7c478bd9Sstevel@tonic-gate
1066*7c478bd9Sstevel@tonic-gate /*
1067*7c478bd9Sstevel@tonic-gate * We step through the records, writing each one out. Use the record
1068*7c478bd9Sstevel@tonic-gate * number and the dbp->get() function, instead of a cursor, so we find
1069*7c478bd9Sstevel@tonic-gate * and write out "deleted" or non-existent records.
1070*7c478bd9Sstevel@tonic-gate */
1071*7c478bd9Sstevel@tonic-gate memset(&key, 0, sizeof(key));
1072*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data));
1073*7c478bd9Sstevel@tonic-gate key.size = sizeof(db_recno_t);
1074*7c478bd9Sstevel@tonic-gate key.data = &keyno;
1075*7c478bd9Sstevel@tonic-gate
1076*7c478bd9Sstevel@tonic-gate /*
1077*7c478bd9Sstevel@tonic-gate * We'll need the delimiter if we're doing variable-length records,
1078*7c478bd9Sstevel@tonic-gate * and the pad character if we're doing fixed-length records.
1079*7c478bd9Sstevel@tonic-gate */
1080*7c478bd9Sstevel@tonic-gate delim = rp->re_delim;
1081*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1082*7c478bd9Sstevel@tonic-gate if ((ret = __os_malloc(rp->re_len, NULL, &pad)) != 0)
1083*7c478bd9Sstevel@tonic-gate goto err;
1084*7c478bd9Sstevel@tonic-gate memset(pad, rp->re_pad, rp->re_len);
1085*7c478bd9Sstevel@tonic-gate } else
1086*7c478bd9Sstevel@tonic-gate COMPQUIET(pad, NULL);
1087*7c478bd9Sstevel@tonic-gate for (keyno = 1;; ++keyno) {
1088*7c478bd9Sstevel@tonic-gate switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
1089*7c478bd9Sstevel@tonic-gate case 0:
1090*7c478bd9Sstevel@tonic-gate if ((ret =
1091*7c478bd9Sstevel@tonic-gate __os_write(fd, data.data, data.size, &nw)) != 0)
1092*7c478bd9Sstevel@tonic-gate goto err;
1093*7c478bd9Sstevel@tonic-gate if (nw != (ssize_t)data.size) {
1094*7c478bd9Sstevel@tonic-gate ret = EIO;
1095*7c478bd9Sstevel@tonic-gate goto err;
1096*7c478bd9Sstevel@tonic-gate }
1097*7c478bd9Sstevel@tonic-gate break;
1098*7c478bd9Sstevel@tonic-gate case DB_KEYEMPTY:
1099*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1100*7c478bd9Sstevel@tonic-gate if ((ret =
1101*7c478bd9Sstevel@tonic-gate __os_write(fd, pad, rp->re_len, &nw)) != 0)
1102*7c478bd9Sstevel@tonic-gate goto err;
1103*7c478bd9Sstevel@tonic-gate if (nw != (ssize_t)rp->re_len) {
1104*7c478bd9Sstevel@tonic-gate ret = EIO;
1105*7c478bd9Sstevel@tonic-gate goto err;
1106*7c478bd9Sstevel@tonic-gate }
1107*7c478bd9Sstevel@tonic-gate }
1108*7c478bd9Sstevel@tonic-gate break;
1109*7c478bd9Sstevel@tonic-gate case DB_NOTFOUND:
1110*7c478bd9Sstevel@tonic-gate ret = 0;
1111*7c478bd9Sstevel@tonic-gate goto done;
1112*7c478bd9Sstevel@tonic-gate }
1113*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
1114*7c478bd9Sstevel@tonic-gate if ((ret = __os_write(fd, &delim, 1, &nw)) != 0)
1115*7c478bd9Sstevel@tonic-gate goto err;
1116*7c478bd9Sstevel@tonic-gate if (nw != 1) {
1117*7c478bd9Sstevel@tonic-gate ret = EIO;
1118*7c478bd9Sstevel@tonic-gate goto err;
1119*7c478bd9Sstevel@tonic-gate }
1120*7c478bd9Sstevel@tonic-gate }
1121*7c478bd9Sstevel@tonic-gate }
1122*7c478bd9Sstevel@tonic-gate
1123*7c478bd9Sstevel@tonic-gate err:
1124*7c478bd9Sstevel@tonic-gate done: /* Close the file descriptor. */
1125*7c478bd9Sstevel@tonic-gate if ((t_ret = __os_close(fd)) != 0 || ret == 0)
1126*7c478bd9Sstevel@tonic-gate ret = t_ret;
1127*7c478bd9Sstevel@tonic-gate
1128*7c478bd9Sstevel@tonic-gate if (ret == 0)
1129*7c478bd9Sstevel@tonic-gate F_CLR(rp, RECNO_MODIFIED);
1130*7c478bd9Sstevel@tonic-gate return (ret);
1131*7c478bd9Sstevel@tonic-gate }
1132*7c478bd9Sstevel@tonic-gate
1133*7c478bd9Sstevel@tonic-gate /*
1134*7c478bd9Sstevel@tonic-gate * __ram_fmap --
1135*7c478bd9Sstevel@tonic-gate * Get fixed length records from a file.
1136*7c478bd9Sstevel@tonic-gate */
1137*7c478bd9Sstevel@tonic-gate static int
__ram_fmap(dbc,top)1138*7c478bd9Sstevel@tonic-gate __ram_fmap(dbc, top)
1139*7c478bd9Sstevel@tonic-gate DBC *dbc;
1140*7c478bd9Sstevel@tonic-gate db_recno_t top;
1141*7c478bd9Sstevel@tonic-gate {
1142*7c478bd9Sstevel@tonic-gate DB *dbp;
1143*7c478bd9Sstevel@tonic-gate DBT data;
1144*7c478bd9Sstevel@tonic-gate RECNO *rp;
1145*7c478bd9Sstevel@tonic-gate db_recno_t recno;
1146*7c478bd9Sstevel@tonic-gate u_int32_t len;
1147*7c478bd9Sstevel@tonic-gate u_int8_t *sp, *ep, *p;
1148*7c478bd9Sstevel@tonic-gate int ret;
1149*7c478bd9Sstevel@tonic-gate
1150*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &recno)) != 0)
1151*7c478bd9Sstevel@tonic-gate return (ret);
1152*7c478bd9Sstevel@tonic-gate
1153*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
1154*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)(dbp->internal))->recno;
1155*7c478bd9Sstevel@tonic-gate
1156*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) {
1157*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) {
1158*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0;
1159*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL;
1160*7c478bd9Sstevel@tonic-gate return (ret);
1161*7c478bd9Sstevel@tonic-gate }
1162*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len;
1163*7c478bd9Sstevel@tonic-gate }
1164*7c478bd9Sstevel@tonic-gate
1165*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data));
1166*7c478bd9Sstevel@tonic-gate data.data = dbc->rdata.data;
1167*7c478bd9Sstevel@tonic-gate data.size = rp->re_len;
1168*7c478bd9Sstevel@tonic-gate
1169*7c478bd9Sstevel@tonic-gate sp = (u_int8_t *)rp->re_cmap;
1170*7c478bd9Sstevel@tonic-gate ep = (u_int8_t *)rp->re_emap;
1171*7c478bd9Sstevel@tonic-gate while (recno < top) {
1172*7c478bd9Sstevel@tonic-gate if (sp >= ep) {
1173*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF);
1174*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND);
1175*7c478bd9Sstevel@tonic-gate }
1176*7c478bd9Sstevel@tonic-gate len = rp->re_len;
1177*7c478bd9Sstevel@tonic-gate for (p = dbc->rdata.data;
1178*7c478bd9Sstevel@tonic-gate sp < ep && len > 0; *p++ = *sp++, --len)
1179*7c478bd9Sstevel@tonic-gate ;
1180*7c478bd9Sstevel@tonic-gate
1181*7c478bd9Sstevel@tonic-gate /*
1182*7c478bd9Sstevel@tonic-gate * Another process may have read this record from the input
1183*7c478bd9Sstevel@tonic-gate * file and stored it into the database already, in which
1184*7c478bd9Sstevel@tonic-gate * case we don't need to repeat that operation. We detect
1185*7c478bd9Sstevel@tonic-gate * this by checking if the last record we've read is greater
1186*7c478bd9Sstevel@tonic-gate * or equal to the number of records in the database.
1187*7c478bd9Sstevel@tonic-gate *
1188*7c478bd9Sstevel@tonic-gate * XXX
1189*7c478bd9Sstevel@tonic-gate * We should just do a seek, since the records are fixed
1190*7c478bd9Sstevel@tonic-gate * length.
1191*7c478bd9Sstevel@tonic-gate */
1192*7c478bd9Sstevel@tonic-gate if (rp->re_last >= recno) {
1193*7c478bd9Sstevel@tonic-gate if (len != 0)
1194*7c478bd9Sstevel@tonic-gate memset(p, rp->re_pad, len);
1195*7c478bd9Sstevel@tonic-gate
1196*7c478bd9Sstevel@tonic-gate ++recno;
1197*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
1198*7c478bd9Sstevel@tonic-gate return (ret);
1199*7c478bd9Sstevel@tonic-gate }
1200*7c478bd9Sstevel@tonic-gate ++rp->re_last;
1201*7c478bd9Sstevel@tonic-gate }
1202*7c478bd9Sstevel@tonic-gate rp->re_cmap = sp;
1203*7c478bd9Sstevel@tonic-gate return (0);
1204*7c478bd9Sstevel@tonic-gate }
1205*7c478bd9Sstevel@tonic-gate
1206*7c478bd9Sstevel@tonic-gate /*
1207*7c478bd9Sstevel@tonic-gate * __ram_vmap --
1208*7c478bd9Sstevel@tonic-gate * Get variable length records from a file.
1209*7c478bd9Sstevel@tonic-gate */
1210*7c478bd9Sstevel@tonic-gate static int
__ram_vmap(dbc,top)1211*7c478bd9Sstevel@tonic-gate __ram_vmap(dbc, top)
1212*7c478bd9Sstevel@tonic-gate DBC *dbc;
1213*7c478bd9Sstevel@tonic-gate db_recno_t top;
1214*7c478bd9Sstevel@tonic-gate {
1215*7c478bd9Sstevel@tonic-gate DBT data;
1216*7c478bd9Sstevel@tonic-gate RECNO *rp;
1217*7c478bd9Sstevel@tonic-gate db_recno_t recno;
1218*7c478bd9Sstevel@tonic-gate u_int8_t *sp, *ep;
1219*7c478bd9Sstevel@tonic-gate int delim, ret;
1220*7c478bd9Sstevel@tonic-gate
1221*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)(dbc->dbp->internal))->recno;
1222*7c478bd9Sstevel@tonic-gate
1223*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &recno)) != 0)
1224*7c478bd9Sstevel@tonic-gate return (ret);
1225*7c478bd9Sstevel@tonic-gate
1226*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data));
1227*7c478bd9Sstevel@tonic-gate
1228*7c478bd9Sstevel@tonic-gate delim = rp->re_delim;
1229*7c478bd9Sstevel@tonic-gate
1230*7c478bd9Sstevel@tonic-gate sp = (u_int8_t *)rp->re_cmap;
1231*7c478bd9Sstevel@tonic-gate ep = (u_int8_t *)rp->re_emap;
1232*7c478bd9Sstevel@tonic-gate while (recno < top) {
1233*7c478bd9Sstevel@tonic-gate if (sp >= ep) {
1234*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF);
1235*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND);
1236*7c478bd9Sstevel@tonic-gate }
1237*7c478bd9Sstevel@tonic-gate for (data.data = sp; sp < ep && *sp != delim; ++sp)
1238*7c478bd9Sstevel@tonic-gate ;
1239*7c478bd9Sstevel@tonic-gate
1240*7c478bd9Sstevel@tonic-gate /*
1241*7c478bd9Sstevel@tonic-gate * Another process may have read this record from the input
1242*7c478bd9Sstevel@tonic-gate * file and stored it into the database already, in which
1243*7c478bd9Sstevel@tonic-gate * case we don't need to repeat that operation. We detect
1244*7c478bd9Sstevel@tonic-gate * this by checking if the last record we've read is greater
1245*7c478bd9Sstevel@tonic-gate * or equal to the number of records in the database.
1246*7c478bd9Sstevel@tonic-gate */
1247*7c478bd9Sstevel@tonic-gate if (rp->re_last >= recno) {
1248*7c478bd9Sstevel@tonic-gate data.size = sp - (u_int8_t *)data.data;
1249*7c478bd9Sstevel@tonic-gate ++recno;
1250*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
1251*7c478bd9Sstevel@tonic-gate return (ret);
1252*7c478bd9Sstevel@tonic-gate }
1253*7c478bd9Sstevel@tonic-gate ++rp->re_last;
1254*7c478bd9Sstevel@tonic-gate ++sp;
1255*7c478bd9Sstevel@tonic-gate }
1256*7c478bd9Sstevel@tonic-gate rp->re_cmap = sp;
1257*7c478bd9Sstevel@tonic-gate return (0);
1258*7c478bd9Sstevel@tonic-gate }
1259*7c478bd9Sstevel@tonic-gate
1260*7c478bd9Sstevel@tonic-gate /*
1261*7c478bd9Sstevel@tonic-gate * __ram_add --
1262*7c478bd9Sstevel@tonic-gate * Add records into the tree.
1263*7c478bd9Sstevel@tonic-gate */
1264*7c478bd9Sstevel@tonic-gate static int
__ram_add(dbc,recnop,data,flags,bi_flags)1265*7c478bd9Sstevel@tonic-gate __ram_add(dbc, recnop, data, flags, bi_flags)
1266*7c478bd9Sstevel@tonic-gate DBC *dbc;
1267*7c478bd9Sstevel@tonic-gate db_recno_t *recnop;
1268*7c478bd9Sstevel@tonic-gate DBT *data;
1269*7c478bd9Sstevel@tonic-gate u_int32_t flags, bi_flags;
1270*7c478bd9Sstevel@tonic-gate {
1271*7c478bd9Sstevel@tonic-gate BKEYDATA *bk;
1272*7c478bd9Sstevel@tonic-gate CURSOR *cp;
1273*7c478bd9Sstevel@tonic-gate DB *dbp;
1274*7c478bd9Sstevel@tonic-gate PAGE *h;
1275*7c478bd9Sstevel@tonic-gate db_indx_t indx;
1276*7c478bd9Sstevel@tonic-gate int exact, isdeleted, ret, stack;
1277*7c478bd9Sstevel@tonic-gate
1278*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp;
1279*7c478bd9Sstevel@tonic-gate cp = dbc->internal;
1280*7c478bd9Sstevel@tonic-gate
1281*7c478bd9Sstevel@tonic-gate retry: /* Find the slot for insertion. */
1282*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, recnop,
1283*7c478bd9Sstevel@tonic-gate S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
1284*7c478bd9Sstevel@tonic-gate return (ret);
1285*7c478bd9Sstevel@tonic-gate h = cp->csp->page;
1286*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx;
1287*7c478bd9Sstevel@tonic-gate stack = 1;
1288*7c478bd9Sstevel@tonic-gate
1289*7c478bd9Sstevel@tonic-gate /*
1290*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag means this record
1291*7c478bd9Sstevel@tonic-gate * was implicitly created. If not re-numbering records, the on-page
1292*7c478bd9Sstevel@tonic-gate * deleted flag means this record was implicitly created, or, it was
1293*7c478bd9Sstevel@tonic-gate * deleted at some time.
1294*7c478bd9Sstevel@tonic-gate *
1295*7c478bd9Sstevel@tonic-gate * If DB_NOOVERWRITE is set and the item already exists in the tree,
1296*7c478bd9Sstevel@tonic-gate * return an error unless the item was either marked for deletion or
1297*7c478bd9Sstevel@tonic-gate * only implicitly created.
1298*7c478bd9Sstevel@tonic-gate */
1299*7c478bd9Sstevel@tonic-gate isdeleted = 0;
1300*7c478bd9Sstevel@tonic-gate if (exact) {
1301*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx);
1302*7c478bd9Sstevel@tonic-gate if (B_DISSET(bk->type))
1303*7c478bd9Sstevel@tonic-gate isdeleted = 1;
1304*7c478bd9Sstevel@tonic-gate else
1305*7c478bd9Sstevel@tonic-gate if (flags == DB_NOOVERWRITE) {
1306*7c478bd9Sstevel@tonic-gate ret = DB_KEYEXIST;
1307*7c478bd9Sstevel@tonic-gate goto err;
1308*7c478bd9Sstevel@tonic-gate }
1309*7c478bd9Sstevel@tonic-gate }
1310*7c478bd9Sstevel@tonic-gate
1311*7c478bd9Sstevel@tonic-gate /*
1312*7c478bd9Sstevel@tonic-gate * Select the arguments for __bam_iitem() and do the insert. If the
1313*7c478bd9Sstevel@tonic-gate * key is an exact match, or we're replacing the data item with a
1314*7c478bd9Sstevel@tonic-gate * new data item, replace the current item. If the key isn't an exact
1315*7c478bd9Sstevel@tonic-gate * match, we're inserting a new key/data pair, before the search
1316*7c478bd9Sstevel@tonic-gate * location.
1317*7c478bd9Sstevel@tonic-gate */
1318*7c478bd9Sstevel@tonic-gate switch (ret = __bam_iitem(dbc,
1319*7c478bd9Sstevel@tonic-gate &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
1320*7c478bd9Sstevel@tonic-gate case 0:
1321*7c478bd9Sstevel@tonic-gate /*
1322*7c478bd9Sstevel@tonic-gate * Don't adjust anything.
1323*7c478bd9Sstevel@tonic-gate *
1324*7c478bd9Sstevel@tonic-gate * If we inserted a record, no cursors need adjusting because
1325*7c478bd9Sstevel@tonic-gate * the only new record it's possible to insert is at the very
1326*7c478bd9Sstevel@tonic-gate * end of the tree. The necessary adjustments to the internal
1327*7c478bd9Sstevel@tonic-gate * page counts were made by __bam_iitem().
1328*7c478bd9Sstevel@tonic-gate *
1329*7c478bd9Sstevel@tonic-gate * If we overwrote a record, no cursors need adjusting because
1330*7c478bd9Sstevel@tonic-gate * future DBcursor->get calls will simply return the underlying
1331*7c478bd9Sstevel@tonic-gate * record (there's no adjustment made for the DB_CURRENT flag
1332*7c478bd9Sstevel@tonic-gate * when a cursor get operation immediately follows a cursor
1333*7c478bd9Sstevel@tonic-gate * delete operation, and the normal adjustment for the DB_NEXT
1334*7c478bd9Sstevel@tonic-gate * flag is still correct).
1335*7c478bd9Sstevel@tonic-gate */
1336*7c478bd9Sstevel@tonic-gate break;
1337*7c478bd9Sstevel@tonic-gate case DB_NEEDSPLIT:
1338*7c478bd9Sstevel@tonic-gate /* Discard the stack of pages and split the page. */
1339*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0);
1340*7c478bd9Sstevel@tonic-gate stack = 0;
1341*7c478bd9Sstevel@tonic-gate
1342*7c478bd9Sstevel@tonic-gate if ((ret = __bam_split(dbc, recnop)) != 0)
1343*7c478bd9Sstevel@tonic-gate goto err;
1344*7c478bd9Sstevel@tonic-gate
1345*7c478bd9Sstevel@tonic-gate goto retry;
1346*7c478bd9Sstevel@tonic-gate /* NOTREACHED */
1347*7c478bd9Sstevel@tonic-gate default:
1348*7c478bd9Sstevel@tonic-gate goto err;
1349*7c478bd9Sstevel@tonic-gate }
1350*7c478bd9Sstevel@tonic-gate
1351*7c478bd9Sstevel@tonic-gate
1352*7c478bd9Sstevel@tonic-gate err: if (stack)
1353*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, 0);
1354*7c478bd9Sstevel@tonic-gate
1355*7c478bd9Sstevel@tonic-gate return (ret);
1356*7c478bd9Sstevel@tonic-gate }
1357