1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if defined(LIBC_SCCS) && !defined(lint) 36 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 37 #endif /* LIBC_SCCS and not lint */ 38 #include <sys/types.h> 39 40 #include <errno.h> 41 #include <stddef.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 45 #include <db.h> 46 #include "btree.h" 47 48 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 49 static int __bt_seqadv(BTREE *, EPG *, int); 50 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 51 52 /* 53 * Sequential scan support. 54 * 55 * The tree can be scanned sequentially, starting from either end of the 56 * tree or from any specific key. A scan request before any scanning is 57 * done is initialized as starting from the least node. 58 */ 59 60 /* 61 * __bt_seq -- 62 * Btree sequential scan interface. 63 * 64 * Parameters: 65 * dbp: pointer to access method 66 * key: key for positioning and return value 67 * data: data return value 68 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 69 * 70 * Returns: 71 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 72 */ 73 int 74 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 75 { 76 BTREE *t; 77 EPG e; 78 int status; 79 80 t = dbp->internal; 81 82 /* Toss any page pinned across calls. */ 83 if (t->bt_pinned != NULL) { 84 mpool_put(t->bt_mp, t->bt_pinned, 0); 85 t->bt_pinned = NULL; 86 } 87 88 /* 89 * If scan unitialized as yet, or starting at a specific record, set 90 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 91 * the page the cursor references if they're successful. 92 */ 93 switch (flags) { 94 case R_NEXT: 95 case R_PREV: 96 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 97 status = __bt_seqadv(t, &e, flags); 98 break; 99 } 100 /* FALLTHROUGH */ 101 case R_FIRST: 102 case R_LAST: 103 case R_CURSOR: 104 status = __bt_seqset(t, &e, key, flags); 105 break; 106 default: 107 errno = EINVAL; 108 return (RET_ERROR); 109 } 110 111 if (status == RET_SUCCESS) { 112 __bt_setcur(t, e.page->pgno, e.index); 113 114 status = 115 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 116 117 /* 118 * If the user is doing concurrent access, we copied the 119 * key/data, toss the page. 120 */ 121 if (F_ISSET(t, B_DB_LOCK)) 122 mpool_put(t->bt_mp, e.page, 0); 123 else 124 t->bt_pinned = e.page; 125 } 126 return (status); 127 } 128 129 /* 130 * __bt_seqset -- 131 * Set the sequential scan to a specific key. 132 * 133 * Parameters: 134 * t: tree 135 * ep: storage for returned key 136 * key: key for initial scan position 137 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 138 * 139 * Side effects: 140 * Pins the page the cursor references. 141 * 142 * Returns: 143 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 144 */ 145 static int 146 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 147 { 148 PAGE *h; 149 pgno_t pg; 150 int exact; 151 152 /* 153 * Find the first, last or specific key in the tree and point the 154 * cursor at it. The cursor may not be moved until a new key has 155 * been found. 156 */ 157 switch (flags) { 158 case R_CURSOR: /* Keyed scan. */ 159 /* 160 * Find the first instance of the key or the smallest key 161 * which is greater than or equal to the specified key. 162 */ 163 if (key->data == NULL || key->size == 0) { 164 errno = EINVAL; 165 return (RET_ERROR); 166 } 167 return (__bt_first(t, key, ep, &exact)); 168 case R_FIRST: /* First record. */ 169 case R_NEXT: 170 /* Walk down the left-hand side of the tree. */ 171 for (pg = P_ROOT;;) { 172 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 173 return (RET_ERROR); 174 175 /* Check for an empty tree. */ 176 if (NEXTINDEX(h) == 0) { 177 mpool_put(t->bt_mp, h, 0); 178 return (RET_SPECIAL); 179 } 180 181 if (h->flags & (P_BLEAF | P_RLEAF)) 182 break; 183 pg = GETBINTERNAL(h, 0)->pgno; 184 mpool_put(t->bt_mp, h, 0); 185 } 186 ep->page = h; 187 ep->index = 0; 188 break; 189 case R_LAST: /* Last record. */ 190 case R_PREV: 191 /* Walk down the right-hand side of the tree. */ 192 for (pg = P_ROOT;;) { 193 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 194 return (RET_ERROR); 195 196 /* Check for an empty tree. */ 197 if (NEXTINDEX(h) == 0) { 198 mpool_put(t->bt_mp, h, 0); 199 return (RET_SPECIAL); 200 } 201 202 if (h->flags & (P_BLEAF | P_RLEAF)) 203 break; 204 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 205 mpool_put(t->bt_mp, h, 0); 206 } 207 208 ep->page = h; 209 ep->index = NEXTINDEX(h) - 1; 210 break; 211 } 212 return (RET_SUCCESS); 213 } 214 215 /* 216 * __bt_seqadvance -- 217 * Advance the sequential scan. 218 * 219 * Parameters: 220 * t: tree 221 * flags: R_NEXT, R_PREV 222 * 223 * Side effects: 224 * Pins the page the new key/data record is on. 225 * 226 * Returns: 227 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 228 */ 229 static int 230 __bt_seqadv(BTREE *t, EPG *ep, int flags) 231 { 232 CURSOR *c; 233 PAGE *h; 234 indx_t idx; 235 pgno_t pg; 236 int exact; 237 238 /* 239 * There are a couple of states that we can be in. The cursor has 240 * been initialized by the time we get here, but that's all we know. 241 */ 242 c = &t->bt_cursor; 243 244 /* 245 * The cursor was deleted where there weren't any duplicate records, 246 * so the key was saved. Find out where that key would go in the 247 * current tree. It doesn't matter if the returned key is an exact 248 * match or not -- if it's an exact match, the record was added after 249 * the delete so we can just return it. If not, as long as there's 250 * a record there, return it. 251 */ 252 if (F_ISSET(c, CURS_ACQUIRE)) 253 return (__bt_first(t, &c->key, ep, &exact)); 254 255 /* Get the page referenced by the cursor. */ 256 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 257 return (RET_ERROR); 258 259 /* 260 * Find the next/previous record in the tree and point the cursor at 261 * it. The cursor may not be moved until a new key has been found. 262 */ 263 switch (flags) { 264 case R_NEXT: /* Next record. */ 265 /* 266 * The cursor was deleted in duplicate records, and moved 267 * forward to a record that has yet to be returned. Clear 268 * that flag, and return the record. 269 */ 270 if (F_ISSET(c, CURS_AFTER)) 271 goto usecurrent; 272 idx = c->pg.index; 273 if (++idx == NEXTINDEX(h)) { 274 pg = h->nextpg; 275 mpool_put(t->bt_mp, h, 0); 276 if (pg == P_INVALID) 277 return (RET_SPECIAL); 278 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 279 return (RET_ERROR); 280 idx = 0; 281 } 282 break; 283 case R_PREV: /* Previous record. */ 284 /* 285 * The cursor was deleted in duplicate records, and moved 286 * backward to a record that has yet to be returned. Clear 287 * that flag, and return the record. 288 */ 289 if (F_ISSET(c, CURS_BEFORE)) { 290 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 291 ep->page = h; 292 ep->index = c->pg.index; 293 return (RET_SUCCESS); 294 } 295 idx = c->pg.index; 296 if (idx == 0) { 297 pg = h->prevpg; 298 mpool_put(t->bt_mp, h, 0); 299 if (pg == P_INVALID) 300 return (RET_SPECIAL); 301 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 302 return (RET_ERROR); 303 idx = NEXTINDEX(h) - 1; 304 } else 305 --idx; 306 break; 307 } 308 309 ep->page = h; 310 ep->index = idx; 311 return (RET_SUCCESS); 312 } 313 314 /* 315 * __bt_first -- 316 * Find the first entry. 317 * 318 * Parameters: 319 * t: the tree 320 * key: the key 321 * erval: return EPG 322 * exactp: pointer to exact match flag 323 * 324 * Returns: 325 * The first entry in the tree greater than or equal to key, 326 * or RET_SPECIAL if no such key exists. 327 */ 328 static int 329 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 330 { 331 PAGE *h; 332 EPG *ep, save; 333 pgno_t pg; 334 335 /* 336 * Find any matching record; __bt_search pins the page. 337 * 338 * If it's an exact match and duplicates are possible, walk backwards 339 * in the tree until we find the first one. Otherwise, make sure it's 340 * a valid key (__bt_search may return an index just past the end of a 341 * page) and return it. 342 */ 343 if ((ep = __bt_search(t, key, exactp)) == NULL) 344 return (0); 345 if (*exactp) { 346 if (F_ISSET(t, B_NODUPS)) { 347 *erval = *ep; 348 return (RET_SUCCESS); 349 } 350 351 /* 352 * Walk backwards, as long as the entry matches and there are 353 * keys left in the tree. Save a copy of each match in case 354 * we go too far. 355 */ 356 save = *ep; 357 h = ep->page; 358 do { 359 if (save.page->pgno != ep->page->pgno) { 360 mpool_put(t->bt_mp, save.page, 0); 361 save = *ep; 362 } else 363 save.index = ep->index; 364 365 /* 366 * Don't unpin the page the last (or original) match 367 * was on, but make sure it's unpinned if an error 368 * occurs. 369 */ 370 if (ep->index == 0) { 371 if (h->prevpg == P_INVALID) 372 break; 373 if (h->pgno != save.page->pgno) 374 mpool_put(t->bt_mp, h, 0); 375 if ((h = mpool_get(t->bt_mp, 376 h->prevpg, 0)) == NULL) { 377 if (h->pgno == save.page->pgno) 378 mpool_put(t->bt_mp, 379 save.page, 0); 380 return (RET_ERROR); 381 } 382 ep->page = h; 383 ep->index = NEXTINDEX(h); 384 } 385 --ep->index; 386 } while (__bt_cmp(t, key, ep) == 0); 387 388 /* 389 * Reach here with the last page that was looked at pinned, 390 * which may or may not be the same as the last (or original) 391 * match page. If it's not useful, release it. 392 */ 393 if (h->pgno != save.page->pgno) 394 mpool_put(t->bt_mp, h, 0); 395 396 *erval = save; 397 return (RET_SUCCESS); 398 } 399 400 /* If at the end of a page, find the next entry. */ 401 if (ep->index == NEXTINDEX(ep->page)) { 402 h = ep->page; 403 pg = h->nextpg; 404 mpool_put(t->bt_mp, h, 0); 405 if (pg == P_INVALID) 406 return (RET_SPECIAL); 407 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 408 return (RET_ERROR); 409 ep->index = 0; 410 ep->page = h; 411 } 412 *erval = *ep; 413 return (RET_SUCCESS); 414 } 415 416 /* 417 * __bt_setcur -- 418 * Set the cursor to an entry in the tree. 419 * 420 * Parameters: 421 * t: the tree 422 * pgno: page number 423 * idx: page index 424 */ 425 void 426 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 427 { 428 /* Lose any already deleted key. */ 429 if (t->bt_cursor.key.data != NULL) { 430 free(t->bt_cursor.key.data); 431 t->bt_cursor.key.size = 0; 432 t->bt_cursor.key.data = NULL; 433 } 434 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 435 436 /* Update the cursor. */ 437 t->bt_cursor.pg.pgno = pgno; 438 t->bt_cursor.pg.index = idx; 439 F_SET(&t->bt_cursor, CURS_INIT); 440 } 441