1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Mike Olson. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/types.h> 36 37 #include <errno.h> 38 #include <stddef.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 42 #include <db.h> 43 #include "btree.h" 44 45 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 46 static int __bt_seqadv(BTREE *, EPG *, int); 47 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 48 49 /* 50 * Sequential scan support. 51 * 52 * The tree can be scanned sequentially, starting from either end of the 53 * tree or from any specific key. A scan request before any scanning is 54 * done is initialized as starting from the least node. 55 */ 56 57 /* 58 * __bt_seq -- 59 * Btree sequential scan interface. 60 * 61 * Parameters: 62 * dbp: pointer to access method 63 * key: key for positioning and return value 64 * data: data return value 65 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 66 * 67 * Returns: 68 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 69 */ 70 int 71 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 72 { 73 BTREE *t; 74 EPG e; 75 int status; 76 77 t = dbp->internal; 78 79 /* Toss any page pinned across calls. */ 80 if (t->bt_pinned != NULL) { 81 mpool_put(t->bt_mp, t->bt_pinned, 0); 82 t->bt_pinned = NULL; 83 } 84 85 /* 86 * If scan uninitialized as yet, or starting at a specific record, set 87 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 88 * the page the cursor references if they're successful. 89 */ 90 switch (flags) { 91 case R_NEXT: 92 case R_PREV: 93 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 94 status = __bt_seqadv(t, &e, flags); 95 break; 96 } 97 /* FALLTHROUGH */ 98 case R_FIRST: 99 case R_LAST: 100 case R_CURSOR: 101 status = __bt_seqset(t, &e, key, flags); 102 break; 103 default: 104 errno = EINVAL; 105 return (RET_ERROR); 106 } 107 108 if (status == RET_SUCCESS) { 109 __bt_setcur(t, e.page->pgno, e.index); 110 111 status = 112 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 113 114 /* 115 * If the user is doing concurrent access, we copied the 116 * key/data, toss the page. 117 */ 118 if (F_ISSET(t, B_DB_LOCK)) 119 mpool_put(t->bt_mp, e.page, 0); 120 else 121 t->bt_pinned = e.page; 122 } 123 return (status); 124 } 125 126 /* 127 * __bt_seqset -- 128 * Set the sequential scan to a specific key. 129 * 130 * Parameters: 131 * t: tree 132 * ep: storage for returned key 133 * key: key for initial scan position 134 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 135 * 136 * Side effects: 137 * Pins the page the cursor references. 138 * 139 * Returns: 140 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 141 */ 142 static int 143 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 144 { 145 PAGE *h; 146 pgno_t pg; 147 int exact; 148 149 /* 150 * Find the first, last or specific key in the tree and point the 151 * cursor at it. The cursor may not be moved until a new key has 152 * been found. 153 */ 154 switch (flags) { 155 case R_CURSOR: /* Keyed scan. */ 156 /* 157 * Find the first instance of the key or the smallest key 158 * which is greater than or equal to the specified key. 159 */ 160 if (key->data == NULL || key->size == 0) { 161 errno = EINVAL; 162 return (RET_ERROR); 163 } 164 return (__bt_first(t, key, ep, &exact)); 165 case R_FIRST: /* First record. */ 166 case R_NEXT: 167 /* Walk down the left-hand side of the tree. */ 168 for (pg = P_ROOT;;) { 169 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 170 return (RET_ERROR); 171 172 /* Check for an empty tree. */ 173 if (NEXTINDEX(h) == 0) { 174 mpool_put(t->bt_mp, h, 0); 175 return (RET_SPECIAL); 176 } 177 178 if (h->flags & (P_BLEAF | P_RLEAF)) 179 break; 180 pg = GETBINTERNAL(h, 0)->pgno; 181 mpool_put(t->bt_mp, h, 0); 182 } 183 ep->page = h; 184 ep->index = 0; 185 break; 186 case R_LAST: /* Last record. */ 187 case R_PREV: 188 /* Walk down the right-hand side of the tree. */ 189 for (pg = P_ROOT;;) { 190 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 191 return (RET_ERROR); 192 193 /* Check for an empty tree. */ 194 if (NEXTINDEX(h) == 0) { 195 mpool_put(t->bt_mp, h, 0); 196 return (RET_SPECIAL); 197 } 198 199 if (h->flags & (P_BLEAF | P_RLEAF)) 200 break; 201 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 202 mpool_put(t->bt_mp, h, 0); 203 } 204 205 ep->page = h; 206 ep->index = NEXTINDEX(h) - 1; 207 break; 208 } 209 return (RET_SUCCESS); 210 } 211 212 /* 213 * __bt_seqadvance -- 214 * Advance the sequential scan. 215 * 216 * Parameters: 217 * t: tree 218 * flags: R_NEXT, R_PREV 219 * 220 * Side effects: 221 * Pins the page the new key/data record is on. 222 * 223 * Returns: 224 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 225 */ 226 static int 227 __bt_seqadv(BTREE *t, EPG *ep, int flags) 228 { 229 CURSOR *c; 230 PAGE *h; 231 indx_t idx; 232 pgno_t pg; 233 int exact; 234 235 /* 236 * There are a couple of states that we can be in. The cursor has 237 * been initialized by the time we get here, but that's all we know. 238 */ 239 c = &t->bt_cursor; 240 241 /* 242 * The cursor was deleted where there weren't any duplicate records, 243 * so the key was saved. Find out where that key would go in the 244 * current tree. It doesn't matter if the returned key is an exact 245 * match or not -- if it's an exact match, the record was added after 246 * the delete so we can just return it. If not, as long as there's 247 * a record there, return it. 248 */ 249 if (F_ISSET(c, CURS_ACQUIRE)) 250 return (__bt_first(t, &c->key, ep, &exact)); 251 252 /* Get the page referenced by the cursor. */ 253 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 254 return (RET_ERROR); 255 256 /* 257 * Find the next/previous record in the tree and point the cursor at 258 * it. The cursor may not be moved until a new key has been found. 259 */ 260 switch (flags) { 261 case R_NEXT: /* Next record. */ 262 /* 263 * The cursor was deleted in duplicate records, and moved 264 * forward to a record that has yet to be returned. Clear 265 * that flag, and return the record. 266 */ 267 if (F_ISSET(c, CURS_AFTER)) 268 goto usecurrent; 269 idx = c->pg.index; 270 if (++idx == NEXTINDEX(h)) { 271 pg = h->nextpg; 272 mpool_put(t->bt_mp, h, 0); 273 if (pg == P_INVALID) 274 return (RET_SPECIAL); 275 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 276 return (RET_ERROR); 277 idx = 0; 278 } 279 break; 280 case R_PREV: /* Previous record. */ 281 /* 282 * The cursor was deleted in duplicate records, and moved 283 * backward to a record that has yet to be returned. Clear 284 * that flag, and return the record. 285 */ 286 if (F_ISSET(c, CURS_BEFORE)) { 287 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 288 ep->page = h; 289 ep->index = c->pg.index; 290 return (RET_SUCCESS); 291 } 292 idx = c->pg.index; 293 if (idx == 0) { 294 pg = h->prevpg; 295 mpool_put(t->bt_mp, h, 0); 296 if (pg == P_INVALID) 297 return (RET_SPECIAL); 298 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 299 return (RET_ERROR); 300 idx = NEXTINDEX(h) - 1; 301 } else 302 --idx; 303 break; 304 } 305 306 ep->page = h; 307 ep->index = idx; 308 return (RET_SUCCESS); 309 } 310 311 /* 312 * __bt_first -- 313 * Find the first entry. 314 * 315 * Parameters: 316 * t: the tree 317 * key: the key 318 * erval: return EPG 319 * exactp: pointer to exact match flag 320 * 321 * Returns: 322 * The first entry in the tree greater than or equal to key, 323 * or RET_SPECIAL if no such key exists. 324 */ 325 static int 326 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 327 { 328 PAGE *h; 329 EPG *ep, save; 330 pgno_t pg; 331 332 /* 333 * Find any matching record; __bt_search pins the page. 334 * 335 * If it's an exact match and duplicates are possible, walk backwards 336 * in the tree until we find the first one. Otherwise, make sure it's 337 * a valid key (__bt_search may return an index just past the end of a 338 * page) and return it. 339 */ 340 if ((ep = __bt_search(t, key, exactp)) == NULL) 341 return (0); 342 if (*exactp) { 343 if (F_ISSET(t, B_NODUPS)) { 344 *erval = *ep; 345 return (RET_SUCCESS); 346 } 347 348 /* 349 * Walk backwards, as long as the entry matches and there are 350 * keys left in the tree. Save a copy of each match in case 351 * we go too far. 352 */ 353 save = *ep; 354 h = ep->page; 355 do { 356 if (save.page->pgno != ep->page->pgno) { 357 mpool_put(t->bt_mp, save.page, 0); 358 save = *ep; 359 } else 360 save.index = ep->index; 361 362 /* 363 * Don't unpin the page the last (or original) match 364 * was on, but make sure it's unpinned if an error 365 * occurs. 366 */ 367 if (ep->index == 0) { 368 if (h->prevpg == P_INVALID) 369 break; 370 if (h->pgno != save.page->pgno) 371 mpool_put(t->bt_mp, h, 0); 372 if ((h = mpool_get(t->bt_mp, 373 h->prevpg, 0)) == NULL) { 374 if (h->pgno == save.page->pgno) 375 mpool_put(t->bt_mp, 376 save.page, 0); 377 return (RET_ERROR); 378 } 379 ep->page = h; 380 ep->index = NEXTINDEX(h); 381 } 382 --ep->index; 383 } while (__bt_cmp(t, key, ep) == 0); 384 385 /* 386 * Reach here with the last page that was looked at pinned, 387 * which may or may not be the same as the last (or original) 388 * match page. If it's not useful, release it. 389 */ 390 if (h->pgno != save.page->pgno) 391 mpool_put(t->bt_mp, h, 0); 392 393 *erval = save; 394 return (RET_SUCCESS); 395 } 396 397 /* If at the end of a page, find the next entry. */ 398 if (ep->index == NEXTINDEX(ep->page)) { 399 h = ep->page; 400 pg = h->nextpg; 401 mpool_put(t->bt_mp, h, 0); 402 if (pg == P_INVALID) 403 return (RET_SPECIAL); 404 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 405 return (RET_ERROR); 406 ep->index = 0; 407 ep->page = h; 408 } 409 *erval = *ep; 410 return (RET_SUCCESS); 411 } 412 413 /* 414 * __bt_setcur -- 415 * Set the cursor to an entry in the tree. 416 * 417 * Parameters: 418 * t: the tree 419 * pgno: page number 420 * idx: page index 421 */ 422 void 423 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 424 { 425 /* Lose any already deleted key. */ 426 if (t->bt_cursor.key.data != NULL) { 427 free(t->bt_cursor.key.data); 428 t->bt_cursor.key.size = 0; 429 t->bt_cursor.key.data = NULL; 430 } 431 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 432 433 /* Update the cursor. */ 434 t->bt_cursor.pg.pgno = pgno; 435 t->bt_cursor.pg.index = idx; 436 F_SET(&t->bt_cursor, CURS_INIT); 437 } 438