1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
10 */
11 /*
12 * Copyright (c) 1990, 1993
13 * The Regents of the University of California. All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 */
43
44 #include "config.h"
45
46 #ifndef lint
47 static const char sccsid[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98";
48 #endif /* not lint */
49
50 #ifndef NO_SYSTEM_INCLUDES
51 #include <sys/types.h>
52 #endif
53
54 #include "db_int.h"
55 #include "db_page.h"
56 #include "btree.h"
57
58 /*
59 * __bam_rsearch --
60 * Search a btree for a record number.
61 *
62 * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
63 */
64 int
__bam_rsearch(dbc,recnop,flags,stop,exactp)65 __bam_rsearch(dbc, recnop, flags, stop, exactp)
66 DBC *dbc;
67 db_recno_t *recnop;
68 u_int32_t flags;
69 int stop, *exactp;
70 {
71 BINTERNAL *bi;
72 CURSOR *cp;
73 DB *dbp;
74 DB_LOCK lock;
75 PAGE *h;
76 RINTERNAL *ri;
77 db_indx_t indx, top;
78 db_pgno_t pg;
79 db_recno_t i, recno, total;
80 int ret, stack;
81
82 dbp = dbc->dbp;
83 cp = dbc->internal;
84
85 BT_STK_CLR(cp);
86
87 /*
88 * There are several ways we search a btree tree. The flags argument
89 * specifies if we're acquiring read or write locks and if we are
90 * locking pairs of pages. In addition, if we're adding or deleting
91 * an item, we have to lock the entire tree, regardless. See btree.h
92 * for more details.
93 *
94 * If write-locking pages, we need to know whether or not to acquire a
95 * write lock on a page before getting it. This depends on how deep it
96 * is in tree, which we don't know until we acquire the root page. So,
97 * if we need to lock the root page we may have to upgrade it later,
98 * because we won't get the correct lock initially.
99 *
100 * Retrieve the root page.
101 */
102 pg = PGNO_ROOT;
103 stack = LF_ISSET(S_STACK);
104 if ((ret = __bam_lget(dbc,
105 0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
106 return (ret);
107 if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
108 (void)__BT_LPUT(dbc, lock);
109 return (ret);
110 }
111
112 /*
113 * Decide if we need to save this page; if we do, write lock it.
114 * We deliberately don't lock-couple on this call. If the tree
115 * is tiny, i.e., one page, and two threads are busily updating
116 * the root page, we're almost guaranteed deadlocks galore, as
117 * each one gets a read lock and then blocks the other's attempt
118 * for a write lock.
119 */
120 if (!stack &&
121 ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
122 (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
123 (void)memp_fput(dbp->mpf, h, 0);
124 (void)__BT_LPUT(dbc, lock);
125 if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
126 return (ret);
127 if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
128 (void)__BT_LPUT(dbc, lock);
129 return (ret);
130 }
131 stack = 1;
132 }
133
134 /*
135 * If appending to the tree, set the record number now -- we have the
136 * root page locked.
137 *
138 * Delete only deletes exact matches, read only returns exact matches.
139 * Note, this is different from __bam_search(), which returns non-exact
140 * matches for read.
141 *
142 * The record may not exist. We can only return the correct location
143 * for the record immediately after the last record in the tree, so do
144 * a fast check now.
145 */
146 total = RE_NREC(h);
147 if (LF_ISSET(S_APPEND)) {
148 *exactp = 0;
149 *recnop = recno = total + 1;
150 } else {
151 recno = *recnop;
152 if (recno <= total)
153 *exactp = 1;
154 else {
155 *exactp = 0;
156 if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
157 (void)memp_fput(dbp->mpf, h, 0);
158 (void)__BT_LPUT(dbc, lock);
159 return (DB_NOTFOUND);
160 }
161 }
162 }
163
164 /*
165 * !!!
166 * Record numbers in the tree are 0-based, but the recno is
167 * 1-based. All of the calculations below have to take this
168 * into account.
169 */
170 for (total = 0;;) {
171 switch (TYPE(h)) {
172 case P_LBTREE:
173 recno -= total;
174
175 /*
176 * There may be logically deleted records on the page,
177 * walk the page correcting for them. The record may
178 * not exist if there are enough deleted records in the
179 * page.
180 */
181 if (recno <= (db_recno_t)NUM_ENT(h) / P_INDX)
182 for (i = recno - 1;; --i) {
183 if (B_DISSET(GET_BKEYDATA(h,
184 i * P_INDX + O_INDX)->type))
185 ++recno;
186 if (i == 0)
187 break;
188 }
189 if (recno > (db_recno_t)NUM_ENT(h) / P_INDX) {
190 *exactp = 0;
191 if (!LF_ISSET(S_PAST_EOF) || recno >
192 (db_recno_t)(NUM_ENT(h) / P_INDX + 1)) {
193 ret = DB_NOTFOUND;
194 goto err;
195 }
196
197 }
198
199 /* Correct from 1-based to 0-based for a page offset. */
200 --recno;
201 BT_STK_ENTER(cp, h, recno * P_INDX, lock, ret);
202 return (ret);
203 case P_IBTREE:
204 for (indx = 0, top = NUM_ENT(h);;) {
205 bi = GET_BINTERNAL(h, indx);
206 if (++indx == top || total + bi->nrecs >= recno)
207 break;
208 total += bi->nrecs;
209 }
210 pg = bi->pgno;
211 break;
212 case P_LRECNO:
213 recno -= total;
214
215 /* Correct from 1-based to 0-based for a page offset. */
216 --recno;
217 BT_STK_ENTER(cp, h, recno, lock, ret);
218 return (ret);
219 case P_IRECNO:
220 for (indx = 0, top = NUM_ENT(h);;) {
221 ri = GET_RINTERNAL(h, indx);
222 if (++indx == top || total + ri->nrecs >= recno)
223 break;
224 total += ri->nrecs;
225 }
226 pg = ri->pgno;
227 break;
228 default:
229 return (__db_pgfmt(dbp, h->pgno));
230 }
231 --indx;
232
233 if (stack) {
234 /* Return if this is the lowest page wanted. */
235 if (LF_ISSET(S_PARENT) && stop == h->level) {
236 BT_STK_ENTER(cp, h, indx, lock, ret);
237 return (ret);
238 }
239 BT_STK_PUSH(cp, h, indx, lock, ret);
240 if (ret != 0)
241 goto err;
242
243 if ((ret =
244 __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
245 goto err;
246 } else {
247 /*
248 * Decide if we want to return a pointer to the next
249 * page in the stack. If we do, write lock it and
250 * never unlock it.
251 */
252 if ((LF_ISSET(S_PARENT) &&
253 (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
254 (h->level - 1) == LEAFLEVEL)
255 stack = 1;
256
257 (void)memp_fput(dbp->mpf, h, 0);
258
259 if ((ret =
260 __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
261 DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
262 goto err;
263 }
264
265 if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
266 goto err;
267 }
268 /* NOTREACHED */
269
270 err: BT_STK_POP(cp);
271 __bam_stkrel(dbc, 0);
272 return (ret);
273 }
274
275 /*
276 * __bam_adjust --
277 * Adjust the tree after adding or deleting a record.
278 *
279 * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
280 */
281 int
__bam_adjust(dbc,adjust)282 __bam_adjust(dbc, adjust)
283 DBC *dbc;
284 int32_t adjust;
285 {
286 CURSOR *cp;
287 DB *dbp;
288 EPG *epg;
289 PAGE *h;
290 int ret;
291
292 dbp = dbc->dbp;
293 cp = dbc->internal;
294
295 /* Update the record counts for the tree. */
296 for (epg = cp->sp; epg <= cp->csp; ++epg) {
297 h = epg->page;
298 if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
299 if (DB_LOGGING(dbc) &&
300 (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
301 dbc->txn, &LSN(h), 0, dbp->log_fileid,
302 PGNO(h), &LSN(h), (u_int32_t)epg->indx,
303 adjust, 1)) != 0)
304 return (ret);
305
306 if (TYPE(h) == P_IBTREE)
307 GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
308 else
309 GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
310
311 if (PGNO(h) == PGNO_ROOT)
312 RE_NREC_ADJ(h, adjust);
313
314 if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
315 return (ret);
316 }
317 }
318 return (0);
319 }
320
321 /*
322 * __bam_nrecs --
323 * Return the number of records in the tree.
324 *
325 * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
326 */
327 int
__bam_nrecs(dbc,rep)328 __bam_nrecs(dbc, rep)
329 DBC *dbc;
330 db_recno_t *rep;
331 {
332 DB *dbp;
333 DB_LOCK lock;
334 PAGE *h;
335 db_pgno_t pgno;
336 int ret;
337
338 dbp = dbc->dbp;
339
340 pgno = PGNO_ROOT;
341 if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &lock)) != 0)
342 return (ret);
343 if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
344 return (ret);
345
346 *rep = RE_NREC(h);
347
348 (void)memp_fput(dbp->mpf, h, 0);
349 (void)__BT_TLPUT(dbc, lock);
350
351 return (0);
352 }
353
354 /*
355 * __bam_total --
356 * Return the number of records below a page.
357 *
358 * PUBLIC: db_recno_t __bam_total __P((PAGE *));
359 */
360 db_recno_t
__bam_total(h)361 __bam_total(h)
362 PAGE *h;
363 {
364 db_recno_t nrecs;
365 db_indx_t indx, top;
366
367 nrecs = 0;
368 top = NUM_ENT(h);
369
370 switch (TYPE(h)) {
371 case P_LBTREE:
372 /* Check for logically deleted records. */
373 for (indx = 0; indx < top; indx += P_INDX)
374 if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
375 ++nrecs;
376 break;
377 case P_IBTREE:
378 for (indx = 0; indx < top; indx += O_INDX)
379 nrecs += GET_BINTERNAL(h, indx)->nrecs;
380 break;
381 case P_LRECNO:
382 nrecs = NUM_ENT(h);
383 break;
384 case P_IRECNO:
385 for (indx = 0; indx < top; indx += O_INDX)
386 nrecs += GET_RINTERNAL(h, indx)->nrecs;
387 break;
388 }
389
390 return (nrecs);
391 }
392