1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #if defined(LIBC_SCCS) && !defined(lint) 34 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 35 #endif /* LIBC_SCCS and not lint */ 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/types.h> 40 41 #include <errno.h> 42 #include <stddef.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 46 #include <db.h> 47 #include "btree.h" 48 49 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 50 static int __bt_seqadv(BTREE *, EPG *, int); 51 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 52 53 /* 54 * Sequential scan support. 55 * 56 * The tree can be scanned sequentially, starting from either end of the 57 * tree or from any specific key. A scan request before any scanning is 58 * done is initialized as starting from the least node. 59 */ 60 61 /* 62 * __bt_seq -- 63 * Btree sequential scan interface. 64 * 65 * Parameters: 66 * dbp: pointer to access method 67 * key: key for positioning and return value 68 * data: data return value 69 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 70 * 71 * Returns: 72 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 73 */ 74 int 75 __bt_seq(dbp, key, data, flags) 76 const DB *dbp; 77 DBT *key, *data; 78 u_int flags; 79 { 80 BTREE *t; 81 EPG e; 82 int status; 83 84 t = dbp->internal; 85 86 /* Toss any page pinned across calls. */ 87 if (t->bt_pinned != NULL) { 88 mpool_put(t->bt_mp, t->bt_pinned, 0); 89 t->bt_pinned = NULL; 90 } 91 92 /* 93 * If scan unitialized as yet, or starting at a specific record, set 94 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 95 * the page the cursor references if they're successful. 96 */ 97 switch (flags) { 98 case R_NEXT: 99 case R_PREV: 100 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 101 status = __bt_seqadv(t, &e, flags); 102 break; 103 } 104 /* FALLTHROUGH */ 105 case R_FIRST: 106 case R_LAST: 107 case R_CURSOR: 108 status = __bt_seqset(t, &e, key, flags); 109 break; 110 default: 111 errno = EINVAL; 112 return (RET_ERROR); 113 } 114 115 if (status == RET_SUCCESS) { 116 __bt_setcur(t, e.page->pgno, e.index); 117 118 status = 119 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 120 121 /* 122 * If the user is doing concurrent access, we copied the 123 * key/data, toss the page. 124 */ 125 if (F_ISSET(t, B_DB_LOCK)) 126 mpool_put(t->bt_mp, e.page, 0); 127 else 128 t->bt_pinned = e.page; 129 } 130 return (status); 131 } 132 133 /* 134 * __bt_seqset -- 135 * Set the sequential scan to a specific key. 136 * 137 * Parameters: 138 * t: tree 139 * ep: storage for returned key 140 * key: key for initial scan position 141 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 142 * 143 * Side effects: 144 * Pins the page the cursor references. 145 * 146 * Returns: 147 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 148 */ 149 static int 150 __bt_seqset(t, ep, key, flags) 151 BTREE *t; 152 EPG *ep; 153 DBT *key; 154 int flags; 155 { 156 PAGE *h; 157 pgno_t pg; 158 int exact; 159 160 /* 161 * Find the first, last or specific key in the tree and point the 162 * cursor at it. The cursor may not be moved until a new key has 163 * been found. 164 */ 165 switch (flags) { 166 case R_CURSOR: /* Keyed scan. */ 167 /* 168 * Find the first instance of the key or the smallest key 169 * which is greater than or equal to the specified key. 170 */ 171 if (key->data == NULL || key->size == 0) { 172 errno = EINVAL; 173 return (RET_ERROR); 174 } 175 return (__bt_first(t, key, ep, &exact)); 176 case R_FIRST: /* First record. */ 177 case R_NEXT: 178 /* Walk down the left-hand side of the tree. */ 179 for (pg = P_ROOT;;) { 180 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 181 return (RET_ERROR); 182 183 /* Check for an empty tree. */ 184 if (NEXTINDEX(h) == 0) { 185 mpool_put(t->bt_mp, h, 0); 186 return (RET_SPECIAL); 187 } 188 189 if (h->flags & (P_BLEAF | P_RLEAF)) 190 break; 191 pg = GETBINTERNAL(h, 0)->pgno; 192 mpool_put(t->bt_mp, h, 0); 193 } 194 ep->page = h; 195 ep->index = 0; 196 break; 197 case R_LAST: /* Last record. */ 198 case R_PREV: 199 /* Walk down the right-hand side of the tree. */ 200 for (pg = P_ROOT;;) { 201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 202 return (RET_ERROR); 203 204 /* Check for an empty tree. */ 205 if (NEXTINDEX(h) == 0) { 206 mpool_put(t->bt_mp, h, 0); 207 return (RET_SPECIAL); 208 } 209 210 if (h->flags & (P_BLEAF | P_RLEAF)) 211 break; 212 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 213 mpool_put(t->bt_mp, h, 0); 214 } 215 216 ep->page = h; 217 ep->index = NEXTINDEX(h) - 1; 218 break; 219 } 220 return (RET_SUCCESS); 221 } 222 223 /* 224 * __bt_seqadvance -- 225 * Advance the sequential scan. 226 * 227 * Parameters: 228 * t: tree 229 * flags: R_NEXT, R_PREV 230 * 231 * Side effects: 232 * Pins the page the new key/data record is on. 233 * 234 * Returns: 235 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 236 */ 237 static int 238 __bt_seqadv(t, ep, flags) 239 BTREE *t; 240 EPG *ep; 241 int flags; 242 { 243 CURSOR *c; 244 PAGE *h; 245 indx_t index; 246 pgno_t pg; 247 int exact; 248 249 /* 250 * There are a couple of states that we can be in. The cursor has 251 * been initialized by the time we get here, but that's all we know. 252 */ 253 c = &t->bt_cursor; 254 255 /* 256 * The cursor was deleted where there weren't any duplicate records, 257 * so the key was saved. Find out where that key would go in the 258 * current tree. It doesn't matter if the returned key is an exact 259 * match or not -- if it's an exact match, the record was added after 260 * the delete so we can just return it. If not, as long as there's 261 * a record there, return it. 262 */ 263 if (F_ISSET(c, CURS_ACQUIRE)) 264 return (__bt_first(t, &c->key, ep, &exact)); 265 266 /* Get the page referenced by the cursor. */ 267 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 268 return (RET_ERROR); 269 270 /* 271 * Find the next/previous record in the tree and point the cursor at 272 * it. The cursor may not be moved until a new key has been found. 273 */ 274 switch (flags) { 275 case R_NEXT: /* Next record. */ 276 /* 277 * The cursor was deleted in duplicate records, and moved 278 * forward to a record that has yet to be returned. Clear 279 * that flag, and return the record. 280 */ 281 if (F_ISSET(c, CURS_AFTER)) 282 goto usecurrent; 283 index = c->pg.index; 284 if (++index == NEXTINDEX(h)) { 285 pg = h->nextpg; 286 mpool_put(t->bt_mp, h, 0); 287 if (pg == P_INVALID) 288 return (RET_SPECIAL); 289 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 290 return (RET_ERROR); 291 index = 0; 292 } 293 break; 294 case R_PREV: /* Previous record. */ 295 /* 296 * The cursor was deleted in duplicate records, and moved 297 * backward to a record that has yet to be returned. Clear 298 * that flag, and return the record. 299 */ 300 if (F_ISSET(c, CURS_BEFORE)) { 301 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 302 ep->page = h; 303 ep->index = c->pg.index; 304 return (RET_SUCCESS); 305 } 306 index = c->pg.index; 307 if (index == 0) { 308 pg = h->prevpg; 309 mpool_put(t->bt_mp, h, 0); 310 if (pg == P_INVALID) 311 return (RET_SPECIAL); 312 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 313 return (RET_ERROR); 314 index = NEXTINDEX(h) - 1; 315 } else 316 --index; 317 break; 318 } 319 320 ep->page = h; 321 ep->index = index; 322 return (RET_SUCCESS); 323 } 324 325 /* 326 * __bt_first -- 327 * Find the first entry. 328 * 329 * Parameters: 330 * t: the tree 331 * key: the key 332 * erval: return EPG 333 * exactp: pointer to exact match flag 334 * 335 * Returns: 336 * The first entry in the tree greater than or equal to key, 337 * or RET_SPECIAL if no such key exists. 338 */ 339 static int 340 __bt_first(t, key, erval, exactp) 341 BTREE *t; 342 const DBT *key; 343 EPG *erval; 344 int *exactp; 345 { 346 PAGE *h; 347 EPG *ep, save; 348 pgno_t pg; 349 350 /* 351 * Find any matching record; __bt_search pins the page. 352 * 353 * If it's an exact match and duplicates are possible, walk backwards 354 * in the tree until we find the first one. Otherwise, make sure it's 355 * a valid key (__bt_search may return an index just past the end of a 356 * page) and return it. 357 */ 358 if ((ep = __bt_search(t, key, exactp)) == NULL) 359 return (0); 360 if (*exactp) { 361 if (F_ISSET(t, B_NODUPS)) { 362 *erval = *ep; 363 return (RET_SUCCESS); 364 } 365 366 /* 367 * Walk backwards, as long as the entry matches and there are 368 * keys left in the tree. Save a copy of each match in case 369 * we go too far. 370 */ 371 save = *ep; 372 h = ep->page; 373 do { 374 if (save.page->pgno != ep->page->pgno) { 375 mpool_put(t->bt_mp, save.page, 0); 376 save = *ep; 377 } else 378 save.index = ep->index; 379 380 /* 381 * Don't unpin the page the last (or original) match 382 * was on, but make sure it's unpinned if an error 383 * occurs. 384 */ 385 if (ep->index == 0) { 386 if (h->prevpg == P_INVALID) 387 break; 388 if (h->pgno != save.page->pgno) 389 mpool_put(t->bt_mp, h, 0); 390 if ((h = mpool_get(t->bt_mp, 391 h->prevpg, 0)) == NULL) { 392 if (h->pgno == save.page->pgno) 393 mpool_put(t->bt_mp, 394 save.page, 0); 395 return (RET_ERROR); 396 } 397 ep->page = h; 398 ep->index = NEXTINDEX(h); 399 } 400 --ep->index; 401 } while (__bt_cmp(t, key, ep) == 0); 402 403 /* 404 * Reach here with the last page that was looked at pinned, 405 * which may or may not be the same as the last (or original) 406 * match page. If it's not useful, release it. 407 */ 408 if (h->pgno != save.page->pgno) 409 mpool_put(t->bt_mp, h, 0); 410 411 *erval = save; 412 return (RET_SUCCESS); 413 } 414 415 /* If at the end of a page, find the next entry. */ 416 if (ep->index == NEXTINDEX(ep->page)) { 417 h = ep->page; 418 pg = h->nextpg; 419 mpool_put(t->bt_mp, h, 0); 420 if (pg == P_INVALID) 421 return (RET_SPECIAL); 422 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 423 return (RET_ERROR); 424 ep->index = 0; 425 ep->page = h; 426 } 427 *erval = *ep; 428 return (RET_SUCCESS); 429 } 430 431 /* 432 * __bt_setcur -- 433 * Set the cursor to an entry in the tree. 434 * 435 * Parameters: 436 * t: the tree 437 * pgno: page number 438 * index: page index 439 */ 440 void 441 __bt_setcur(t, pgno, index) 442 BTREE *t; 443 pgno_t pgno; 444 u_int index; 445 { 446 /* Lose any already deleted key. */ 447 if (t->bt_cursor.key.data != NULL) { 448 free(t->bt_cursor.key.data); 449 t->bt_cursor.key.size = 0; 450 t->bt_cursor.key.data = NULL; 451 } 452 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 453 454 /* Update the cursor. */ 455 t->bt_cursor.pg.pgno = pgno; 456 t->bt_cursor.pg.index = index; 457 F_SET(&t->bt_cursor, CURS_INIT); 458 } 459