1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if defined(LIBC_SCCS) && !defined(lint) 36 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 37 #endif /* LIBC_SCCS and not lint */ 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <sys/types.h> 42 43 #include <errno.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 48 #include <db.h> 49 #include "btree.h" 50 51 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 52 static int __bt_seqadv(BTREE *, EPG *, int); 53 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 54 55 /* 56 * Sequential scan support. 57 * 58 * The tree can be scanned sequentially, starting from either end of the 59 * tree or from any specific key. A scan request before any scanning is 60 * done is initialized as starting from the least node. 61 */ 62 63 /* 64 * __bt_seq -- 65 * Btree sequential scan interface. 66 * 67 * Parameters: 68 * dbp: pointer to access method 69 * key: key for positioning and return value 70 * data: data return value 71 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 72 * 73 * Returns: 74 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 75 */ 76 int 77 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 78 { 79 BTREE *t; 80 EPG e; 81 int status; 82 83 t = dbp->internal; 84 85 /* Toss any page pinned across calls. */ 86 if (t->bt_pinned != NULL) { 87 mpool_put(t->bt_mp, t->bt_pinned, 0); 88 t->bt_pinned = NULL; 89 } 90 91 /* 92 * If scan unitialized as yet, or starting at a specific record, set 93 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 94 * the page the cursor references if they're successful. 95 */ 96 switch (flags) { 97 case R_NEXT: 98 case R_PREV: 99 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 100 status = __bt_seqadv(t, &e, flags); 101 break; 102 } 103 /* FALLTHROUGH */ 104 case R_FIRST: 105 case R_LAST: 106 case R_CURSOR: 107 status = __bt_seqset(t, &e, key, flags); 108 break; 109 default: 110 errno = EINVAL; 111 return (RET_ERROR); 112 } 113 114 if (status == RET_SUCCESS) { 115 __bt_setcur(t, e.page->pgno, e.index); 116 117 status = 118 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 119 120 /* 121 * If the user is doing concurrent access, we copied the 122 * key/data, toss the page. 123 */ 124 if (F_ISSET(t, B_DB_LOCK)) 125 mpool_put(t->bt_mp, e.page, 0); 126 else 127 t->bt_pinned = e.page; 128 } 129 return (status); 130 } 131 132 /* 133 * __bt_seqset -- 134 * Set the sequential scan to a specific key. 135 * 136 * Parameters: 137 * t: tree 138 * ep: storage for returned key 139 * key: key for initial scan position 140 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 141 * 142 * Side effects: 143 * Pins the page the cursor references. 144 * 145 * Returns: 146 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 147 */ 148 static int 149 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 150 { 151 PAGE *h; 152 pgno_t pg; 153 int exact; 154 155 /* 156 * Find the first, last or specific key in the tree and point the 157 * cursor at it. The cursor may not be moved until a new key has 158 * been found. 159 */ 160 switch (flags) { 161 case R_CURSOR: /* Keyed scan. */ 162 /* 163 * Find the first instance of the key or the smallest key 164 * which is greater than or equal to the specified key. 165 */ 166 if (key->data == NULL || key->size == 0) { 167 errno = EINVAL; 168 return (RET_ERROR); 169 } 170 return (__bt_first(t, key, ep, &exact)); 171 case R_FIRST: /* First record. */ 172 case R_NEXT: 173 /* Walk down the left-hand side of the tree. */ 174 for (pg = P_ROOT;;) { 175 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 176 return (RET_ERROR); 177 178 /* Check for an empty tree. */ 179 if (NEXTINDEX(h) == 0) { 180 mpool_put(t->bt_mp, h, 0); 181 return (RET_SPECIAL); 182 } 183 184 if (h->flags & (P_BLEAF | P_RLEAF)) 185 break; 186 pg = GETBINTERNAL(h, 0)->pgno; 187 mpool_put(t->bt_mp, h, 0); 188 } 189 ep->page = h; 190 ep->index = 0; 191 break; 192 case R_LAST: /* Last record. */ 193 case R_PREV: 194 /* Walk down the right-hand side of the tree. */ 195 for (pg = P_ROOT;;) { 196 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 197 return (RET_ERROR); 198 199 /* Check for an empty tree. */ 200 if (NEXTINDEX(h) == 0) { 201 mpool_put(t->bt_mp, h, 0); 202 return (RET_SPECIAL); 203 } 204 205 if (h->flags & (P_BLEAF | P_RLEAF)) 206 break; 207 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 208 mpool_put(t->bt_mp, h, 0); 209 } 210 211 ep->page = h; 212 ep->index = NEXTINDEX(h) - 1; 213 break; 214 } 215 return (RET_SUCCESS); 216 } 217 218 /* 219 * __bt_seqadvance -- 220 * Advance the sequential scan. 221 * 222 * Parameters: 223 * t: tree 224 * flags: R_NEXT, R_PREV 225 * 226 * Side effects: 227 * Pins the page the new key/data record is on. 228 * 229 * Returns: 230 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 231 */ 232 static int 233 __bt_seqadv(BTREE *t, EPG *ep, int flags) 234 { 235 CURSOR *c; 236 PAGE *h; 237 indx_t idx; 238 pgno_t pg; 239 int exact; 240 241 /* 242 * There are a couple of states that we can be in. The cursor has 243 * been initialized by the time we get here, but that's all we know. 244 */ 245 c = &t->bt_cursor; 246 247 /* 248 * The cursor was deleted where there weren't any duplicate records, 249 * so the key was saved. Find out where that key would go in the 250 * current tree. It doesn't matter if the returned key is an exact 251 * match or not -- if it's an exact match, the record was added after 252 * the delete so we can just return it. If not, as long as there's 253 * a record there, return it. 254 */ 255 if (F_ISSET(c, CURS_ACQUIRE)) 256 return (__bt_first(t, &c->key, ep, &exact)); 257 258 /* Get the page referenced by the cursor. */ 259 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 260 return (RET_ERROR); 261 262 /* 263 * Find the next/previous record in the tree and point the cursor at 264 * it. The cursor may not be moved until a new key has been found. 265 */ 266 switch (flags) { 267 case R_NEXT: /* Next record. */ 268 /* 269 * The cursor was deleted in duplicate records, and moved 270 * forward to a record that has yet to be returned. Clear 271 * that flag, and return the record. 272 */ 273 if (F_ISSET(c, CURS_AFTER)) 274 goto usecurrent; 275 idx = c->pg.index; 276 if (++idx == NEXTINDEX(h)) { 277 pg = h->nextpg; 278 mpool_put(t->bt_mp, h, 0); 279 if (pg == P_INVALID) 280 return (RET_SPECIAL); 281 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 282 return (RET_ERROR); 283 idx = 0; 284 } 285 break; 286 case R_PREV: /* Previous record. */ 287 /* 288 * The cursor was deleted in duplicate records, and moved 289 * backward to a record that has yet to be returned. Clear 290 * that flag, and return the record. 291 */ 292 if (F_ISSET(c, CURS_BEFORE)) { 293 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 294 ep->page = h; 295 ep->index = c->pg.index; 296 return (RET_SUCCESS); 297 } 298 idx = c->pg.index; 299 if (idx == 0) { 300 pg = h->prevpg; 301 mpool_put(t->bt_mp, h, 0); 302 if (pg == P_INVALID) 303 return (RET_SPECIAL); 304 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 305 return (RET_ERROR); 306 idx = NEXTINDEX(h) - 1; 307 } else 308 --idx; 309 break; 310 } 311 312 ep->page = h; 313 ep->index = idx; 314 return (RET_SUCCESS); 315 } 316 317 /* 318 * __bt_first -- 319 * Find the first entry. 320 * 321 * Parameters: 322 * t: the tree 323 * key: the key 324 * erval: return EPG 325 * exactp: pointer to exact match flag 326 * 327 * Returns: 328 * The first entry in the tree greater than or equal to key, 329 * or RET_SPECIAL if no such key exists. 330 */ 331 static int 332 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 333 { 334 PAGE *h; 335 EPG *ep, save; 336 pgno_t pg; 337 338 /* 339 * Find any matching record; __bt_search pins the page. 340 * 341 * If it's an exact match and duplicates are possible, walk backwards 342 * in the tree until we find the first one. Otherwise, make sure it's 343 * a valid key (__bt_search may return an index just past the end of a 344 * page) and return it. 345 */ 346 if ((ep = __bt_search(t, key, exactp)) == NULL) 347 return (0); 348 if (*exactp) { 349 if (F_ISSET(t, B_NODUPS)) { 350 *erval = *ep; 351 return (RET_SUCCESS); 352 } 353 354 /* 355 * Walk backwards, as long as the entry matches and there are 356 * keys left in the tree. Save a copy of each match in case 357 * we go too far. 358 */ 359 save = *ep; 360 h = ep->page; 361 do { 362 if (save.page->pgno != ep->page->pgno) { 363 mpool_put(t->bt_mp, save.page, 0); 364 save = *ep; 365 } else 366 save.index = ep->index; 367 368 /* 369 * Don't unpin the page the last (or original) match 370 * was on, but make sure it's unpinned if an error 371 * occurs. 372 */ 373 if (ep->index == 0) { 374 if (h->prevpg == P_INVALID) 375 break; 376 if (h->pgno != save.page->pgno) 377 mpool_put(t->bt_mp, h, 0); 378 if ((h = mpool_get(t->bt_mp, 379 h->prevpg, 0)) == NULL) { 380 if (h->pgno == save.page->pgno) 381 mpool_put(t->bt_mp, 382 save.page, 0); 383 return (RET_ERROR); 384 } 385 ep->page = h; 386 ep->index = NEXTINDEX(h); 387 } 388 --ep->index; 389 } while (__bt_cmp(t, key, ep) == 0); 390 391 /* 392 * Reach here with the last page that was looked at pinned, 393 * which may or may not be the same as the last (or original) 394 * match page. If it's not useful, release it. 395 */ 396 if (h->pgno != save.page->pgno) 397 mpool_put(t->bt_mp, h, 0); 398 399 *erval = save; 400 return (RET_SUCCESS); 401 } 402 403 /* If at the end of a page, find the next entry. */ 404 if (ep->index == NEXTINDEX(ep->page)) { 405 h = ep->page; 406 pg = h->nextpg; 407 mpool_put(t->bt_mp, h, 0); 408 if (pg == P_INVALID) 409 return (RET_SPECIAL); 410 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 411 return (RET_ERROR); 412 ep->index = 0; 413 ep->page = h; 414 } 415 *erval = *ep; 416 return (RET_SUCCESS); 417 } 418 419 /* 420 * __bt_setcur -- 421 * Set the cursor to an entry in the tree. 422 * 423 * Parameters: 424 * t: the tree 425 * pgno: page number 426 * idx: page index 427 */ 428 void 429 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 430 { 431 /* Lose any already deleted key. */ 432 if (t->bt_cursor.key.data != NULL) { 433 free(t->bt_cursor.key.data); 434 t->bt_cursor.key.size = 0; 435 t->bt_cursor.key.data = NULL; 436 } 437 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 438 439 /* Update the cursor. */ 440 t->bt_cursor.pg.pgno = pgno; 441 t->bt_cursor.pg.index = idx; 442 F_SET(&t->bt_cursor, CURS_INIT); 443 } 444