1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 39 #endif /* LIBC_SCCS and not lint */ 40 #include <sys/cdefs.h> 41 __FBSDID("$FreeBSD$"); 42 43 #include <sys/types.h> 44 45 #include <errno.h> 46 #include <stddef.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 50 #include <db.h> 51 #include "btree.h" 52 53 static int __bt_first(BTREE *, const DBT *, EPG *, int *); 54 static int __bt_seqadv(BTREE *, EPG *, int); 55 static int __bt_seqset(BTREE *, EPG *, DBT *, int); 56 57 /* 58 * Sequential scan support. 59 * 60 * The tree can be scanned sequentially, starting from either end of the 61 * tree or from any specific key. A scan request before any scanning is 62 * done is initialized as starting from the least node. 63 */ 64 65 /* 66 * __bt_seq -- 67 * Btree sequential scan interface. 68 * 69 * Parameters: 70 * dbp: pointer to access method 71 * key: key for positioning and return value 72 * data: data return value 73 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 74 * 75 * Returns: 76 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 77 */ 78 int 79 __bt_seq(dbp, key, data, flags) 80 const DB *dbp; 81 DBT *key, *data; 82 u_int flags; 83 { 84 BTREE *t; 85 EPG e; 86 int status; 87 88 t = dbp->internal; 89 90 /* Toss any page pinned across calls. */ 91 if (t->bt_pinned != NULL) { 92 mpool_put(t->bt_mp, t->bt_pinned, 0); 93 t->bt_pinned = NULL; 94 } 95 96 /* 97 * If scan unitialized as yet, or starting at a specific record, set 98 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 99 * the page the cursor references if they're successful. 100 */ 101 switch (flags) { 102 case R_NEXT: 103 case R_PREV: 104 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 105 status = __bt_seqadv(t, &e, flags); 106 break; 107 } 108 /* FALLTHROUGH */ 109 case R_FIRST: 110 case R_LAST: 111 case R_CURSOR: 112 status = __bt_seqset(t, &e, key, flags); 113 break; 114 default: 115 errno = EINVAL; 116 return (RET_ERROR); 117 } 118 119 if (status == RET_SUCCESS) { 120 __bt_setcur(t, e.page->pgno, e.index); 121 122 status = 123 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 124 125 /* 126 * If the user is doing concurrent access, we copied the 127 * key/data, toss the page. 128 */ 129 if (F_ISSET(t, B_DB_LOCK)) 130 mpool_put(t->bt_mp, e.page, 0); 131 else 132 t->bt_pinned = e.page; 133 } 134 return (status); 135 } 136 137 /* 138 * __bt_seqset -- 139 * Set the sequential scan to a specific key. 140 * 141 * Parameters: 142 * t: tree 143 * ep: storage for returned key 144 * key: key for initial scan position 145 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 146 * 147 * Side effects: 148 * Pins the page the cursor references. 149 * 150 * Returns: 151 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 152 */ 153 static int 154 __bt_seqset(t, ep, key, flags) 155 BTREE *t; 156 EPG *ep; 157 DBT *key; 158 int flags; 159 { 160 PAGE *h; 161 pgno_t pg; 162 int exact; 163 164 /* 165 * Find the first, last or specific key in the tree and point the 166 * cursor at it. The cursor may not be moved until a new key has 167 * been found. 168 */ 169 switch (flags) { 170 case R_CURSOR: /* Keyed scan. */ 171 /* 172 * Find the first instance of the key or the smallest key 173 * which is greater than or equal to the specified key. 174 */ 175 if (key->data == NULL || key->size == 0) { 176 errno = EINVAL; 177 return (RET_ERROR); 178 } 179 return (__bt_first(t, key, ep, &exact)); 180 case R_FIRST: /* First record. */ 181 case R_NEXT: 182 /* Walk down the left-hand side of the tree. */ 183 for (pg = P_ROOT;;) { 184 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 185 return (RET_ERROR); 186 187 /* Check for an empty tree. */ 188 if (NEXTINDEX(h) == 0) { 189 mpool_put(t->bt_mp, h, 0); 190 return (RET_SPECIAL); 191 } 192 193 if (h->flags & (P_BLEAF | P_RLEAF)) 194 break; 195 pg = GETBINTERNAL(h, 0)->pgno; 196 mpool_put(t->bt_mp, h, 0); 197 } 198 ep->page = h; 199 ep->index = 0; 200 break; 201 case R_LAST: /* Last record. */ 202 case R_PREV: 203 /* Walk down the right-hand side of the tree. */ 204 for (pg = P_ROOT;;) { 205 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 206 return (RET_ERROR); 207 208 /* Check for an empty tree. */ 209 if (NEXTINDEX(h) == 0) { 210 mpool_put(t->bt_mp, h, 0); 211 return (RET_SPECIAL); 212 } 213 214 if (h->flags & (P_BLEAF | P_RLEAF)) 215 break; 216 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 217 mpool_put(t->bt_mp, h, 0); 218 } 219 220 ep->page = h; 221 ep->index = NEXTINDEX(h) - 1; 222 break; 223 } 224 return (RET_SUCCESS); 225 } 226 227 /* 228 * __bt_seqadvance -- 229 * Advance the sequential scan. 230 * 231 * Parameters: 232 * t: tree 233 * flags: R_NEXT, R_PREV 234 * 235 * Side effects: 236 * Pins the page the new key/data record is on. 237 * 238 * Returns: 239 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 240 */ 241 static int 242 __bt_seqadv(t, ep, flags) 243 BTREE *t; 244 EPG *ep; 245 int flags; 246 { 247 CURSOR *c; 248 PAGE *h; 249 indx_t index; 250 pgno_t pg; 251 int exact; 252 253 /* 254 * There are a couple of states that we can be in. The cursor has 255 * been initialized by the time we get here, but that's all we know. 256 */ 257 c = &t->bt_cursor; 258 259 /* 260 * The cursor was deleted where there weren't any duplicate records, 261 * so the key was saved. Find out where that key would go in the 262 * current tree. It doesn't matter if the returned key is an exact 263 * match or not -- if it's an exact match, the record was added after 264 * the delete so we can just return it. If not, as long as there's 265 * a record there, return it. 266 */ 267 if (F_ISSET(c, CURS_ACQUIRE)) 268 return (__bt_first(t, &c->key, ep, &exact)); 269 270 /* Get the page referenced by the cursor. */ 271 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 272 return (RET_ERROR); 273 274 /* 275 * Find the next/previous record in the tree and point the cursor at 276 * it. The cursor may not be moved until a new key has been found. 277 */ 278 switch (flags) { 279 case R_NEXT: /* Next record. */ 280 /* 281 * The cursor was deleted in duplicate records, and moved 282 * forward to a record that has yet to be returned. Clear 283 * that flag, and return the record. 284 */ 285 if (F_ISSET(c, CURS_AFTER)) 286 goto usecurrent; 287 index = c->pg.index; 288 if (++index == NEXTINDEX(h)) { 289 pg = h->nextpg; 290 mpool_put(t->bt_mp, h, 0); 291 if (pg == P_INVALID) 292 return (RET_SPECIAL); 293 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 294 return (RET_ERROR); 295 index = 0; 296 } 297 break; 298 case R_PREV: /* Previous record. */ 299 /* 300 * The cursor was deleted in duplicate records, and moved 301 * backward to a record that has yet to be returned. Clear 302 * that flag, and return the record. 303 */ 304 if (F_ISSET(c, CURS_BEFORE)) { 305 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 306 ep->page = h; 307 ep->index = c->pg.index; 308 return (RET_SUCCESS); 309 } 310 index = c->pg.index; 311 if (index == 0) { 312 pg = h->prevpg; 313 mpool_put(t->bt_mp, h, 0); 314 if (pg == P_INVALID) 315 return (RET_SPECIAL); 316 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 317 return (RET_ERROR); 318 index = NEXTINDEX(h) - 1; 319 } else 320 --index; 321 break; 322 } 323 324 ep->page = h; 325 ep->index = index; 326 return (RET_SUCCESS); 327 } 328 329 /* 330 * __bt_first -- 331 * Find the first entry. 332 * 333 * Parameters: 334 * t: the tree 335 * key: the key 336 * erval: return EPG 337 * exactp: pointer to exact match flag 338 * 339 * Returns: 340 * The first entry in the tree greater than or equal to key, 341 * or RET_SPECIAL if no such key exists. 342 */ 343 static int 344 __bt_first(t, key, erval, exactp) 345 BTREE *t; 346 const DBT *key; 347 EPG *erval; 348 int *exactp; 349 { 350 PAGE *h; 351 EPG *ep, save; 352 pgno_t pg; 353 354 /* 355 * Find any matching record; __bt_search pins the page. 356 * 357 * If it's an exact match and duplicates are possible, walk backwards 358 * in the tree until we find the first one. Otherwise, make sure it's 359 * a valid key (__bt_search may return an index just past the end of a 360 * page) and return it. 361 */ 362 if ((ep = __bt_search(t, key, exactp)) == NULL) 363 return (0); 364 if (*exactp) { 365 if (F_ISSET(t, B_NODUPS)) { 366 *erval = *ep; 367 return (RET_SUCCESS); 368 } 369 370 /* 371 * Walk backwards, as long as the entry matches and there are 372 * keys left in the tree. Save a copy of each match in case 373 * we go too far. 374 */ 375 save = *ep; 376 h = ep->page; 377 do { 378 if (save.page->pgno != ep->page->pgno) { 379 mpool_put(t->bt_mp, save.page, 0); 380 save = *ep; 381 } else 382 save.index = ep->index; 383 384 /* 385 * Don't unpin the page the last (or original) match 386 * was on, but make sure it's unpinned if an error 387 * occurs. 388 */ 389 if (ep->index == 0) { 390 if (h->prevpg == P_INVALID) 391 break; 392 if (h->pgno != save.page->pgno) 393 mpool_put(t->bt_mp, h, 0); 394 if ((h = mpool_get(t->bt_mp, 395 h->prevpg, 0)) == NULL) { 396 if (h->pgno == save.page->pgno) 397 mpool_put(t->bt_mp, 398 save.page, 0); 399 return (RET_ERROR); 400 } 401 ep->page = h; 402 ep->index = NEXTINDEX(h); 403 } 404 --ep->index; 405 } while (__bt_cmp(t, key, ep) == 0); 406 407 /* 408 * Reach here with the last page that was looked at pinned, 409 * which may or may not be the same as the last (or original) 410 * match page. If it's not useful, release it. 411 */ 412 if (h->pgno != save.page->pgno) 413 mpool_put(t->bt_mp, h, 0); 414 415 *erval = save; 416 return (RET_SUCCESS); 417 } 418 419 /* If at the end of a page, find the next entry. */ 420 if (ep->index == NEXTINDEX(ep->page)) { 421 h = ep->page; 422 pg = h->nextpg; 423 mpool_put(t->bt_mp, h, 0); 424 if (pg == P_INVALID) 425 return (RET_SPECIAL); 426 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 427 return (RET_ERROR); 428 ep->index = 0; 429 ep->page = h; 430 } 431 *erval = *ep; 432 return (RET_SUCCESS); 433 } 434 435 /* 436 * __bt_setcur -- 437 * Set the cursor to an entry in the tree. 438 * 439 * Parameters: 440 * t: the tree 441 * pgno: page number 442 * index: page index 443 */ 444 void 445 __bt_setcur(t, pgno, index) 446 BTREE *t; 447 pgno_t pgno; 448 u_int index; 449 { 450 /* Lose any already deleted key. */ 451 if (t->bt_cursor.key.data != NULL) { 452 free(t->bt_cursor.key.data); 453 t->bt_cursor.key.size = 0; 454 t->bt_cursor.key.data = NULL; 455 } 456 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 457 458 /* Update the cursor. */ 459 t->bt_cursor.pg.pgno = pgno; 460 t->bt_cursor.pg.index = index; 461 F_SET(&t->bt_cursor, CURS_INIT); 462 } 463