1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 #include <sys/types.h> 42 43 #include <errno.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 48 #include <db.h> 49 #include "btree.h" 50 51 static int __bt_first __P((BTREE *, const DBT *, EPG *, int *)); 52 static int __bt_seqadv __P((BTREE *, EPG *, int)); 53 static int __bt_seqset __P((BTREE *, EPG *, DBT *, int)); 54 55 /* 56 * Sequential scan support. 57 * 58 * The tree can be scanned sequentially, starting from either end of the 59 * tree or from any specific key. A scan request before any scanning is 60 * done is initialized as starting from the least node. 61 */ 62 63 /* 64 * __bt_seq -- 65 * Btree sequential scan interface. 66 * 67 * Parameters: 68 * dbp: pointer to access method 69 * key: key for positioning and return value 70 * data: data return value 71 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 72 * 73 * Returns: 74 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 75 */ 76 int 77 __bt_seq(dbp, key, data, flags) 78 const DB *dbp; 79 DBT *key, *data; 80 u_int flags; 81 { 82 BTREE *t; 83 EPG e; 84 int status; 85 86 t = dbp->internal; 87 88 /* Toss any page pinned across calls. */ 89 if (t->bt_pinned != NULL) { 90 mpool_put(t->bt_mp, t->bt_pinned, 0); 91 t->bt_pinned = NULL; 92 } 93 94 /* 95 * If scan unitialized as yet, or starting at a specific record, set 96 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 97 * the page the cursor references if they're successful. 98 */ 99 switch (flags) { 100 case R_NEXT: 101 case R_PREV: 102 if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 103 status = __bt_seqadv(t, &e, flags); 104 break; 105 } 106 /* FALLTHROUGH */ 107 case R_FIRST: 108 case R_LAST: 109 case R_CURSOR: 110 status = __bt_seqset(t, &e, key, flags); 111 break; 112 default: 113 errno = EINVAL; 114 return (RET_ERROR); 115 } 116 117 if (status == RET_SUCCESS) { 118 __bt_setcur(t, e.page->pgno, e.index); 119 120 status = 121 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 122 123 /* 124 * If the user is doing concurrent access, we copied the 125 * key/data, toss the page. 126 */ 127 if (F_ISSET(t, B_DB_LOCK)) 128 mpool_put(t->bt_mp, e.page, 0); 129 else 130 t->bt_pinned = e.page; 131 } 132 return (status); 133 } 134 135 /* 136 * __bt_seqset -- 137 * Set the sequential scan to a specific key. 138 * 139 * Parameters: 140 * t: tree 141 * ep: storage for returned key 142 * key: key for initial scan position 143 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 144 * 145 * Side effects: 146 * Pins the page the cursor references. 147 * 148 * Returns: 149 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 150 */ 151 static int 152 __bt_seqset(t, ep, key, flags) 153 BTREE *t; 154 EPG *ep; 155 DBT *key; 156 int flags; 157 { 158 PAGE *h; 159 pgno_t pg; 160 int exact; 161 162 /* 163 * Find the first, last or specific key in the tree and point the 164 * cursor at it. The cursor may not be moved until a new key has 165 * been found. 166 */ 167 switch (flags) { 168 case R_CURSOR: /* Keyed scan. */ 169 /* 170 * Find the first instance of the key or the smallest key 171 * which is greater than or equal to the specified key. 172 */ 173 if (key->data == NULL || key->size == 0) { 174 errno = EINVAL; 175 return (RET_ERROR); 176 } 177 return (__bt_first(t, key, ep, &exact)); 178 case R_FIRST: /* First record. */ 179 case R_NEXT: 180 /* Walk down the left-hand side of the tree. */ 181 for (pg = P_ROOT;;) { 182 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 183 return (RET_ERROR); 184 185 /* Check for an empty tree. */ 186 if (NEXTINDEX(h) == 0) { 187 mpool_put(t->bt_mp, h, 0); 188 return (RET_SPECIAL); 189 } 190 191 if (h->flags & (P_BLEAF | P_RLEAF)) 192 break; 193 pg = GETBINTERNAL(h, 0)->pgno; 194 mpool_put(t->bt_mp, h, 0); 195 } 196 ep->page = h; 197 ep->index = 0; 198 break; 199 case R_LAST: /* Last record. */ 200 case R_PREV: 201 /* Walk down the right-hand side of the tree. */ 202 for (pg = P_ROOT;;) { 203 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 204 return (RET_ERROR); 205 206 /* Check for an empty tree. */ 207 if (NEXTINDEX(h) == 0) { 208 mpool_put(t->bt_mp, h, 0); 209 return (RET_SPECIAL); 210 } 211 212 if (h->flags & (P_BLEAF | P_RLEAF)) 213 break; 214 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 215 mpool_put(t->bt_mp, h, 0); 216 } 217 218 ep->page = h; 219 ep->index = NEXTINDEX(h) - 1; 220 break; 221 } 222 return (RET_SUCCESS); 223 } 224 225 /* 226 * __bt_seqadvance -- 227 * Advance the sequential scan. 228 * 229 * Parameters: 230 * t: tree 231 * flags: R_NEXT, R_PREV 232 * 233 * Side effects: 234 * Pins the page the new key/data record is on. 235 * 236 * Returns: 237 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 238 */ 239 static int 240 __bt_seqadv(t, ep, flags) 241 BTREE *t; 242 EPG *ep; 243 int flags; 244 { 245 CURSOR *c; 246 PAGE *h; 247 indx_t index; 248 pgno_t pg; 249 int exact; 250 251 /* 252 * There are a couple of states that we can be in. The cursor has 253 * been initialized by the time we get here, but that's all we know. 254 */ 255 c = &t->bt_cursor; 256 257 /* 258 * The cursor was deleted where there weren't any duplicate records, 259 * so the key was saved. Find out where that key would go in the 260 * current tree. It doesn't matter if the returned key is an exact 261 * match or not -- if it's an exact match, the record was added after 262 * the delete so we can just return it. If not, as long as there's 263 * a record there, return it. 264 */ 265 if (F_ISSET(c, CURS_ACQUIRE)) 266 return (__bt_first(t, &c->key, ep, &exact)); 267 268 /* Get the page referenced by the cursor. */ 269 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 270 return (RET_ERROR); 271 272 /* 273 * Find the next/previous record in the tree and point the cursor at 274 * it. The cursor may not be moved until a new key has been found. 275 */ 276 switch (flags) { 277 case R_NEXT: /* Next record. */ 278 /* 279 * The cursor was deleted in duplicate records, and moved 280 * forward to a record that has yet to be returned. Clear 281 * that flag, and return the record. 282 */ 283 if (F_ISSET(c, CURS_AFTER)) 284 goto usecurrent; 285 index = c->pg.index; 286 if (++index == NEXTINDEX(h)) { 287 pg = h->nextpg; 288 mpool_put(t->bt_mp, h, 0); 289 if (pg == P_INVALID) 290 return (RET_SPECIAL); 291 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 292 return (RET_ERROR); 293 index = 0; 294 } 295 break; 296 case R_PREV: /* Previous record. */ 297 /* 298 * The cursor was deleted in duplicate records, and moved 299 * backward to a record that has yet to be returned. Clear 300 * that flag, and return the record. 301 */ 302 if (F_ISSET(c, CURS_BEFORE)) { 303 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 304 ep->page = h; 305 ep->index = c->pg.index; 306 return (RET_SUCCESS); 307 } 308 index = c->pg.index; 309 if (index == 0) { 310 pg = h->prevpg; 311 mpool_put(t->bt_mp, h, 0); 312 if (pg == P_INVALID) 313 return (RET_SPECIAL); 314 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 315 return (RET_ERROR); 316 index = NEXTINDEX(h) - 1; 317 } else 318 --index; 319 break; 320 } 321 322 ep->page = h; 323 ep->index = index; 324 return (RET_SUCCESS); 325 } 326 327 /* 328 * __bt_first -- 329 * Find the first entry. 330 * 331 * Parameters: 332 * t: the tree 333 * key: the key 334 * erval: return EPG 335 * exactp: pointer to exact match flag 336 * 337 * Returns: 338 * The first entry in the tree greater than or equal to key, 339 * or RET_SPECIAL if no such key exists. 340 */ 341 static int 342 __bt_first(t, key, erval, exactp) 343 BTREE *t; 344 const DBT *key; 345 EPG *erval; 346 int *exactp; 347 { 348 PAGE *h; 349 EPG *ep, save; 350 pgno_t pg; 351 352 /* 353 * Find any matching record; __bt_search pins the page. 354 * 355 * If it's an exact match and duplicates are possible, walk backwards 356 * in the tree until we find the first one. Otherwise, make sure it's 357 * a valid key (__bt_search may return an index just past the end of a 358 * page) and return it. 359 */ 360 if ((ep = __bt_search(t, key, exactp)) == NULL) 361 return (0); 362 if (*exactp) { 363 if (F_ISSET(t, B_NODUPS)) { 364 *erval = *ep; 365 return (RET_SUCCESS); 366 } 367 368 /* 369 * Walk backwards, as long as the entry matches and there are 370 * keys left in the tree. Save a copy of each match in case 371 * we go too far. 372 */ 373 save = *ep; 374 h = ep->page; 375 do { 376 if (save.page->pgno != ep->page->pgno) { 377 mpool_put(t->bt_mp, save.page, 0); 378 save = *ep; 379 } else 380 save.index = ep->index; 381 382 /* 383 * Don't unpin the page the last (or original) match 384 * was on, but make sure it's unpinned if an error 385 * occurs. 386 */ 387 if (ep->index == 0) { 388 if (h->prevpg == P_INVALID) 389 break; 390 if (h->pgno != save.page->pgno) 391 mpool_put(t->bt_mp, h, 0); 392 if ((h = mpool_get(t->bt_mp, 393 h->prevpg, 0)) == NULL) { 394 if (h->pgno == save.page->pgno) 395 mpool_put(t->bt_mp, 396 save.page, 0); 397 return (RET_ERROR); 398 } 399 ep->page = h; 400 ep->index = NEXTINDEX(h); 401 } 402 --ep->index; 403 } while (__bt_cmp(t, key, ep) == 0); 404 405 /* 406 * Reach here with the last page that was looked at pinned, 407 * which may or may not be the same as the last (or original) 408 * match page. If it's not useful, release it. 409 */ 410 if (h->pgno != save.page->pgno) 411 mpool_put(t->bt_mp, h, 0); 412 413 *erval = save; 414 return (RET_SUCCESS); 415 } 416 417 /* If at the end of a page, find the next entry. */ 418 if (ep->index == NEXTINDEX(ep->page)) { 419 h = ep->page; 420 pg = h->nextpg; 421 mpool_put(t->bt_mp, h, 0); 422 if (pg == P_INVALID) 423 return (RET_SPECIAL); 424 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 425 return (RET_ERROR); 426 ep->index = 0; 427 ep->page = h; 428 } 429 *erval = *ep; 430 return (RET_SUCCESS); 431 } 432 433 /* 434 * __bt_setcur -- 435 * Set the cursor to an entry in the tree. 436 * 437 * Parameters: 438 * t: the tree 439 * pgno: page number 440 * index: page index 441 */ 442 void 443 __bt_setcur(t, pgno, index) 444 BTREE *t; 445 pgno_t pgno; 446 u_int index; 447 { 448 /* Lose any already deleted key. */ 449 if (t->bt_cursor.key.data != NULL) { 450 free(t->bt_cursor.key.data); 451 t->bt_cursor.key.size = 0; 452 t->bt_cursor.key.data = NULL; 453 } 454 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 455 456 /* Update the cursor. */ 457 t->bt_cursor.pg.pgno = pgno; 458 t->bt_cursor.pg.index = index; 459 F_SET(&t->bt_cursor, CURS_INIT); 460 } 461