1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 #include <sys/types.h> 42 43 #include <errno.h> 44 #include <stddef.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 48 #include <db.h> 49 #include "btree.h" 50 51 static int bt_seqadv __P((BTREE *, EPG *, int)); 52 static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); 53 54 /* 55 * Sequential scan support. 56 * 57 * The tree can be scanned sequentially, starting from either end of the tree 58 * or from any specific key. A scan request before any scanning is done is 59 * initialized as starting from the least node. 60 * 61 * Each tree has an EPGNO which has the current position of the cursor. The 62 * cursor has to survive deletions/insertions in the tree without losing its 63 * position. This is done by noting deletions without doing them, and then 64 * doing them when the cursor moves (or the tree is closed). 65 */ 66 67 /* 68 * __BT_SEQ -- Btree sequential scan interface. 69 * 70 * Parameters: 71 * dbp: pointer to access method 72 * key: key for positioning and return value 73 * data: data return value 74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. 75 * 76 * Returns: 77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 78 */ 79 int 80 __bt_seq(dbp, key, data, flags) 81 const DB *dbp; 82 DBT *key, *data; 83 u_int flags; 84 { 85 BTREE *t; 86 EPG e; 87 int status; 88 89 t = dbp->internal; 90 91 /* Toss any page pinned across calls. */ 92 if (t->bt_pinned != NULL) { 93 mpool_put(t->bt_mp, t->bt_pinned, 0); 94 t->bt_pinned = NULL; 95 } 96 97 /* 98 * If scan unitialized as yet, or starting at a specific record, set 99 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the 100 * page the cursor references if they're successful. 101 */ 102 switch(flags) { 103 case R_NEXT: 104 case R_PREV: 105 if (ISSET(t, B_SEQINIT)) { 106 status = bt_seqadv(t, &e, flags); 107 break; 108 } 109 /* FALLTHROUGH */ 110 case R_CURSOR: 111 case R_FIRST: 112 case R_LAST: 113 status = bt_seqset(t, &e, key, flags); 114 break; 115 default: 116 errno = EINVAL; 117 return (RET_ERROR); 118 } 119 120 if (status == RET_SUCCESS) { 121 status = __bt_ret(t, &e, key, data); 122 123 /* Update the actual cursor. */ 124 t->bt_bcursor.pgno = e.page->pgno; 125 t->bt_bcursor.index = e.index; 126 127 /* 128 * If the user is doing concurrent access, we copied the 129 * key/data, toss the page. 130 */ 131 if (ISSET(t, B_DB_LOCK)) 132 mpool_put(t->bt_mp, e.page, 0); 133 else 134 t->bt_pinned = e.page; 135 SET(t, B_SEQINIT); 136 } 137 return (status); 138 } 139 140 /* 141 * BT_SEQSET -- Set the sequential scan to a specific key. 142 * 143 * Parameters: 144 * t: tree 145 * ep: storage for returned key 146 * key: key for initial scan position 147 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV 148 * 149 * Side effects: 150 * Pins the page the cursor references. 151 * 152 * Returns: 153 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 154 */ 155 static int 156 bt_seqset(t, ep, key, flags) 157 BTREE *t; 158 EPG *ep; 159 DBT *key; 160 int flags; 161 { 162 EPG *e; 163 PAGE *h; 164 pgno_t pg; 165 int exact; 166 167 /* 168 * Delete any already deleted record that we've been saving because 169 * the cursor pointed to it. Since going to a specific key, should 170 * delete any logically deleted records so they aren't found. 171 */ 172 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) 173 return (RET_ERROR); 174 175 /* 176 * Find the first, last or specific key in the tree and point the cursor 177 * at it. The cursor may not be moved until a new key has been found. 178 */ 179 switch(flags) { 180 case R_CURSOR: /* Keyed scan. */ 181 /* 182 * Find the first instance of the key or the smallest key which 183 * is greater than or equal to the specified key. If run out 184 * of keys, return RET_SPECIAL. 185 */ 186 if (key->data == NULL || key->size == 0) { 187 errno = EINVAL; 188 return (RET_ERROR); 189 } 190 e = __bt_first(t, key, &exact); /* Returns pinned page. */ 191 if (e == NULL) 192 return (RET_ERROR); 193 /* 194 * If at the end of a page, skip any empty pages and find the 195 * next entry. 196 */ 197 if (e->index == NEXTINDEX(e->page)) { 198 h = e->page; 199 do { 200 pg = h->nextpg; 201 mpool_put(t->bt_mp, h, 0); 202 if (pg == P_INVALID) 203 return (RET_SPECIAL); 204 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 205 return (RET_ERROR); 206 } while (NEXTINDEX(h) == 0); 207 e->index = 0; 208 e->page = h; 209 } 210 *ep = *e; 211 break; 212 case R_FIRST: /* First record. */ 213 case R_NEXT: 214 /* Walk down the left-hand side of the tree. */ 215 for (pg = P_ROOT;;) { 216 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 217 return (RET_ERROR); 218 if (h->flags & (P_BLEAF | P_RLEAF)) 219 break; 220 pg = GETBINTERNAL(h, 0)->pgno; 221 mpool_put(t->bt_mp, h, 0); 222 } 223 224 /* Skip any empty pages. */ 225 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { 226 pg = h->nextpg; 227 mpool_put(t->bt_mp, h, 0); 228 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 229 return (RET_ERROR); 230 } 231 232 if (NEXTINDEX(h) == 0) { 233 mpool_put(t->bt_mp, h, 0); 234 return (RET_SPECIAL); 235 } 236 237 ep->page = h; 238 ep->index = 0; 239 break; 240 case R_LAST: /* Last record. */ 241 case R_PREV: 242 /* Walk down the right-hand side of the tree. */ 243 for (pg = P_ROOT;;) { 244 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 245 return (RET_ERROR); 246 if (h->flags & (P_BLEAF | P_RLEAF)) 247 break; 248 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 249 mpool_put(t->bt_mp, h, 0); 250 } 251 252 /* Skip any empty pages. */ 253 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { 254 pg = h->prevpg; 255 mpool_put(t->bt_mp, h, 0); 256 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 257 return (RET_ERROR); 258 } 259 260 if (NEXTINDEX(h) == 0) { 261 mpool_put(t->bt_mp, h, 0); 262 return (RET_SPECIAL); 263 } 264 265 ep->page = h; 266 ep->index = NEXTINDEX(h) - 1; 267 break; 268 } 269 return (RET_SUCCESS); 270 } 271 272 /* 273 * BT_SEQADVANCE -- Advance the sequential scan. 274 * 275 * Parameters: 276 * t: tree 277 * flags: R_NEXT, R_PREV 278 * 279 * Side effects: 280 * Pins the page the new key/data record is on. 281 * 282 * Returns: 283 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 284 */ 285 static int 286 bt_seqadv(t, e, flags) 287 BTREE *t; 288 EPG *e; 289 int flags; 290 { 291 EPGNO *c, delc; 292 PAGE *h; 293 indx_t index; 294 pgno_t pg; 295 296 /* Save the current cursor if going to delete it. */ 297 c = &t->bt_bcursor; 298 if (ISSET(t, B_DELCRSR)) 299 delc = *c; 300 301 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 302 return (RET_ERROR); 303 304 /* 305 * Find the next/previous record in the tree and point the cursor at it. 306 * The cursor may not be moved until a new key has been found. 307 */ 308 index = c->index; 309 switch(flags) { 310 case R_NEXT: /* Next record. */ 311 if (++index == NEXTINDEX(h)) { 312 do { 313 pg = h->nextpg; 314 mpool_put(t->bt_mp, h, 0); 315 if (pg == P_INVALID) 316 return (RET_SPECIAL); 317 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 318 return (RET_ERROR); 319 } while (NEXTINDEX(h) == 0); 320 index = 0; 321 } 322 break; 323 case R_PREV: /* Previous record. */ 324 if (index-- == 0) { 325 do { 326 pg = h->prevpg; 327 mpool_put(t->bt_mp, h, 0); 328 if (pg == P_INVALID) 329 return (RET_SPECIAL); 330 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 331 return (RET_ERROR); 332 } while (NEXTINDEX(h) == 0); 333 index = NEXTINDEX(h) - 1; 334 } 335 break; 336 } 337 338 e->page = h; 339 e->index = index; 340 341 /* 342 * Delete any already deleted record that we've been saving because the 343 * cursor pointed to it. This could cause the new index to be shifted 344 * down by one if the record we're deleting is on the same page and has 345 * a larger index. 346 */ 347 if (ISSET(t, B_DELCRSR)) { 348 CLR(t, B_DELCRSR); /* Don't try twice. */ 349 if (c->pgno == delc.pgno && c->index > delc.index) 350 --c->index; 351 if (__bt_crsrdel(t, &delc)) 352 return (RET_ERROR); 353 } 354 return (RET_SUCCESS); 355 } 356 357 /* 358 * __BT_CRSRDEL -- Delete the record referenced by the cursor. 359 * 360 * Parameters: 361 * t: tree 362 * 363 * Returns: 364 * RET_ERROR, RET_SUCCESS 365 */ 366 int 367 __bt_crsrdel(t, c) 368 BTREE *t; 369 EPGNO *c; 370 { 371 PAGE *h; 372 int status; 373 374 CLR(t, B_DELCRSR); /* Don't try twice. */ 375 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) 376 return (RET_ERROR); 377 status = __bt_dleaf(t, h, c->index); 378 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 379 return (status); 380 } 381