1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #if defined(LIBC_SCCS) && !defined(lint) 38 static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; 39 #endif /* LIBC_SCCS and not lint */ 40 41 #include <sys/types.h> 42 43 #include <errno.h> 44 #include <stdio.h> 45 #include <string.h> 46 47 #include <db.h> 48 #include "btree.h" 49 50 static int bt_bdelete __P((BTREE *, const DBT *)); 51 52 /* 53 * __BT_DELETE -- Delete the item(s) referenced by a key. 54 * 55 * Parameters: 56 * dbp: pointer to access method 57 * key: key to delete 58 * flags: R_CURSOR if deleting what the cursor references 59 * 60 * Returns: 61 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 62 */ 63 int 64 __bt_delete(dbp, key, flags) 65 const DB *dbp; 66 const DBT *key; 67 u_int flags; 68 { 69 BTREE *t; 70 int status; 71 72 t = dbp->internal; 73 74 /* Toss any page pinned across calls. */ 75 if (t->bt_pinned != NULL) { 76 mpool_put(t->bt_mp, t->bt_pinned, 0); 77 t->bt_pinned = NULL; 78 } 79 80 if (ISSET(t, B_RDONLY)) { 81 errno = EPERM; 82 return (RET_ERROR); 83 } 84 85 switch(flags) { 86 case 0: 87 status = bt_bdelete(t, key); 88 break; 89 case R_CURSOR: 90 /* 91 * If flags is R_CURSOR, delete the cursor; must already have 92 * started a scan and not have already deleted the record. For 93 * the delete cursor bit to have been set requires that the 94 * scan be initialized, so no reason to check. 95 */ 96 if (!ISSET(t, B_SEQINIT)) 97 goto einval; 98 status = ISSET(t, B_DELCRSR) ? 99 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); 100 break; 101 default: 102 einval: errno = EINVAL; 103 return (RET_ERROR); 104 } 105 if (status == RET_SUCCESS) 106 SET(t, B_MODIFIED); 107 return (status); 108 } 109 110 /* 111 * BT_BDELETE -- Delete all key/data pairs matching the specified key. 112 * 113 * Parameters: 114 * tree: tree 115 * key: key to delete 116 * 117 * Returns: 118 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. 119 */ 120 static int 121 bt_bdelete(t, key) 122 BTREE *t; 123 const DBT *key; 124 { 125 EPG *e, save; 126 PAGE *h; 127 pgno_t cpgno, pg; 128 indx_t cindex; 129 int deleted, dirty1, dirty2, exact; 130 131 /* Find any matching record; __bt_search pins the page. */ 132 if ((e = __bt_search(t, key, &exact)) == NULL) 133 return (RET_ERROR); 134 if (!exact) { 135 mpool_put(t->bt_mp, e->page, 0); 136 return (RET_SPECIAL); 137 } 138 139 /* 140 * Delete forward, then delete backward, from the found key. The 141 * ordering is so that the deletions don't mess up the page refs. 142 * The first loop deletes the key from the original page, the second 143 * unpins the original page. In the first loop, dirty1 is set if 144 * the original page is modified, and dirty2 is set if any subsequent 145 * pages are modified. In the second loop, dirty1 starts off set if 146 * the original page has been modified, and is set if any subsequent 147 * pages are modified. 148 * 149 * If find the key referenced by the cursor, don't delete it, just 150 * flag it for future deletion. The cursor page number is P_INVALID 151 * unless the sequential scan is initialized, so no reason to check. 152 * A special case is when the already deleted cursor record was the 153 * only record found. If so, then the delete opertion fails as no 154 * records were deleted. 155 * 156 * Cycle in place in the current page until the current record doesn't 157 * match the key or the page is empty. If the latter, walk forward, 158 * skipping empty pages and repeating until a record doesn't match 159 * the key or the end of the tree is reached. 160 */ 161 cpgno = t->bt_bcursor.pgno; 162 cindex = t->bt_bcursor.index; 163 save = *e; 164 dirty1 = 0; 165 for (h = e->page, deleted = 0;;) { 166 dirty2 = 0; 167 do { 168 if (h->pgno == cpgno && e->index == cindex) { 169 if (!ISSET(t, B_DELCRSR)) { 170 SET(t, B_DELCRSR); 171 deleted = 1; 172 } 173 ++e->index; 174 } else { 175 if (__bt_dleaf(t, h, e->index)) { 176 if (h->pgno != save.page->pgno) 177 mpool_put(t->bt_mp, h, dirty2); 178 mpool_put(t->bt_mp, save.page, dirty1); 179 return (RET_ERROR); 180 } 181 if (h->pgno == save.page->pgno) 182 dirty1 = MPOOL_DIRTY; 183 else 184 dirty2 = MPOOL_DIRTY; 185 deleted = 1; 186 } 187 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); 188 189 /* 190 * Quit if didn't find a match, no next page, or first key on 191 * the next page doesn't match. Don't unpin the original page 192 * unless an error occurs. 193 */ 194 if (e->index < NEXTINDEX(h)) 195 break; 196 for (;;) { 197 if ((pg = h->nextpg) == P_INVALID) 198 goto done1; 199 if (h->pgno != save.page->pgno) 200 mpool_put(t->bt_mp, h, dirty2); 201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { 202 mpool_put(t->bt_mp, save.page, dirty1); 203 return (RET_ERROR); 204 } 205 if (NEXTINDEX(h) != 0) { 206 e->page = h; 207 e->index = 0; 208 break; 209 } 210 } 211 212 if (__bt_cmp(t, key, e) != 0) 213 break; 214 } 215 216 /* 217 * Reach here with the original page and the last page referenced 218 * pinned (they may be the same). Release it if not the original. 219 */ 220 done1: if (h->pgno != save.page->pgno) 221 mpool_put(t->bt_mp, h, dirty2); 222 223 /* 224 * Walk backwards from the record previous to the record returned by 225 * __bt_search, skipping empty pages, until a record doesn't match 226 * the key or reach the beginning of the tree. 227 */ 228 *e = save; 229 for (;;) { 230 if (e->index) 231 --e->index; 232 for (h = e->page; e->index; --e->index) { 233 if (__bt_cmp(t, key, e) != 0) 234 goto done2; 235 if (h->pgno == cpgno && e->index == cindex) { 236 if (!ISSET(t, B_DELCRSR)) { 237 SET(t, B_DELCRSR); 238 deleted = 1; 239 } 240 } else { 241 if (__bt_dleaf(t, h, e->index) == RET_ERROR) { 242 mpool_put(t->bt_mp, h, dirty1); 243 return (RET_ERROR); 244 } 245 if (h->pgno == save.page->pgno) 246 dirty1 = MPOOL_DIRTY; 247 deleted = 1; 248 } 249 } 250 251 if ((pg = h->prevpg) == P_INVALID) 252 goto done2; 253 mpool_put(t->bt_mp, h, dirty1); 254 dirty1 = 0; 255 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) 256 return (RET_ERROR); 257 e->index = NEXTINDEX(e->page); 258 } 259 260 /* 261 * Reach here with the last page that was looked at pinned. Release 262 * it. 263 */ 264 done2: mpool_put(t->bt_mp, h, dirty1); 265 return (deleted ? RET_SUCCESS : RET_SPECIAL); 266 } 267 268 /* 269 * __BT_DLEAF -- Delete a single record from a leaf page. 270 * 271 * Parameters: 272 * t: tree 273 * index: index on current page to delete 274 * 275 * Returns: 276 * RET_SUCCESS, RET_ERROR. 277 */ 278 int 279 __bt_dleaf(t, h, index) 280 BTREE *t; 281 PAGE *h; 282 indx_t index; 283 { 284 register BLEAF *bl; 285 register indx_t cnt, *ip, offset; 286 register size_t nbytes; 287 char *from; 288 void *to; 289 290 /* 291 * Delete a record from a btree leaf page. Internal records are never 292 * deleted from internal pages, regardless of the records that caused 293 * them to be added being deleted. Pages made empty by deletion are 294 * not reclaimed. They are, however, made available for reuse. 295 * 296 * Pack the remaining entries at the end of the page, shift the indices 297 * down, overwriting the deleted record and its index. If the record 298 * uses overflow pages, make them available for reuse. 299 */ 300 to = bl = GETBLEAF(h, index); 301 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) 302 return (RET_ERROR); 303 if (bl->flags & P_BIGDATA && 304 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) 305 return (RET_ERROR); 306 nbytes = NBLEAF(bl); 307 308 /* 309 * Compress the key/data pairs. Compress and adjust the [BR]LEAF 310 * offsets. Reset the headers. 311 */ 312 from = (char *)h + h->upper; 313 memmove(from + nbytes, from, (char *)to - from); 314 h->upper += nbytes; 315 316 offset = h->linp[index]; 317 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) 318 if (ip[0] < offset) 319 ip[0] += nbytes; 320 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) 321 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; 322 h->lower -= sizeof(indx_t); 323 return (RET_SUCCESS); 324 } 325