1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * btree.c - NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 62 struct buffer_head *bh; 63 64 bh = nilfs_btnode_create_block(btnc, ptr); 65 if (!bh) 66 return -ENOMEM; 67 68 set_buffer_nilfs_volatile(bh); 69 *bhp = bh; 70 return 0; 71 } 72 73 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 74 { 75 return node->bn_flags; 76 } 77 78 static void 79 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 80 { 81 node->bn_flags = flags; 82 } 83 84 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 85 { 86 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 87 } 88 89 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 90 { 91 return node->bn_level; 92 } 93 94 static void 95 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 96 { 97 node->bn_level = level; 98 } 99 100 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 101 { 102 return le16_to_cpu(node->bn_nchildren); 103 } 104 105 static void 106 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 107 { 108 node->bn_nchildren = cpu_to_le16(nchildren); 109 } 110 111 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 112 { 113 return i_blocksize(btree->b_inode); 114 } 115 116 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 117 { 118 return btree->b_nchildren_per_block; 119 } 120 121 static __le64 * 122 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 123 { 124 return (__le64 *)((char *)(node + 1) + 125 (nilfs_btree_node_root(node) ? 126 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 127 } 128 129 static __le64 * 130 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 131 { 132 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 133 } 134 135 static __u64 136 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 137 { 138 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 139 } 140 141 static void 142 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 143 { 144 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 145 } 146 147 static __u64 148 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 149 int ncmax) 150 { 151 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 152 } 153 154 static void 155 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 156 int ncmax) 157 { 158 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 159 } 160 161 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 162 int level, int nchildren, int ncmax, 163 const __u64 *keys, const __u64 *ptrs) 164 { 165 __le64 *dkeys; 166 __le64 *dptrs; 167 int i; 168 169 nilfs_btree_node_set_flags(node, flags); 170 nilfs_btree_node_set_level(node, level); 171 nilfs_btree_node_set_nchildren(node, nchildren); 172 173 dkeys = nilfs_btree_node_dkeys(node); 174 dptrs = nilfs_btree_node_dptrs(node, ncmax); 175 for (i = 0; i < nchildren; i++) { 176 dkeys[i] = cpu_to_le64(keys[i]); 177 dptrs[i] = cpu_to_le64(ptrs[i]); 178 } 179 } 180 181 /* Assume the buffer heads corresponding to left and right are locked. */ 182 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 183 struct nilfs_btree_node *right, 184 int n, int lncmax, int rncmax) 185 { 186 __le64 *ldkeys, *rdkeys; 187 __le64 *ldptrs, *rdptrs; 188 int lnchildren, rnchildren; 189 190 ldkeys = nilfs_btree_node_dkeys(left); 191 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 192 lnchildren = nilfs_btree_node_get_nchildren(left); 193 194 rdkeys = nilfs_btree_node_dkeys(right); 195 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 196 rnchildren = nilfs_btree_node_get_nchildren(right); 197 198 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 199 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 200 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 201 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 202 203 lnchildren += n; 204 rnchildren -= n; 205 nilfs_btree_node_set_nchildren(left, lnchildren); 206 nilfs_btree_node_set_nchildren(right, rnchildren); 207 } 208 209 /* Assume that the buffer heads corresponding to left and right are locked. */ 210 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 211 struct nilfs_btree_node *right, 212 int n, int lncmax, int rncmax) 213 { 214 __le64 *ldkeys, *rdkeys; 215 __le64 *ldptrs, *rdptrs; 216 int lnchildren, rnchildren; 217 218 ldkeys = nilfs_btree_node_dkeys(left); 219 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 220 lnchildren = nilfs_btree_node_get_nchildren(left); 221 222 rdkeys = nilfs_btree_node_dkeys(right); 223 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 224 rnchildren = nilfs_btree_node_get_nchildren(right); 225 226 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 227 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 228 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 229 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 230 231 lnchildren -= n; 232 rnchildren += n; 233 nilfs_btree_node_set_nchildren(left, lnchildren); 234 nilfs_btree_node_set_nchildren(right, rnchildren); 235 } 236 237 /* Assume that the buffer head corresponding to node is locked. */ 238 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 239 __u64 key, __u64 ptr, int ncmax) 240 { 241 __le64 *dkeys; 242 __le64 *dptrs; 243 int nchildren; 244 245 dkeys = nilfs_btree_node_dkeys(node); 246 dptrs = nilfs_btree_node_dptrs(node, ncmax); 247 nchildren = nilfs_btree_node_get_nchildren(node); 248 if (index < nchildren) { 249 memmove(dkeys + index + 1, dkeys + index, 250 (nchildren - index) * sizeof(*dkeys)); 251 memmove(dptrs + index + 1, dptrs + index, 252 (nchildren - index) * sizeof(*dptrs)); 253 } 254 dkeys[index] = cpu_to_le64(key); 255 dptrs[index] = cpu_to_le64(ptr); 256 nchildren++; 257 nilfs_btree_node_set_nchildren(node, nchildren); 258 } 259 260 /* Assume that the buffer head corresponding to node is locked. */ 261 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 262 __u64 *keyp, __u64 *ptrp, int ncmax) 263 { 264 __u64 key; 265 __u64 ptr; 266 __le64 *dkeys; 267 __le64 *dptrs; 268 int nchildren; 269 270 dkeys = nilfs_btree_node_dkeys(node); 271 dptrs = nilfs_btree_node_dptrs(node, ncmax); 272 key = le64_to_cpu(dkeys[index]); 273 ptr = le64_to_cpu(dptrs[index]); 274 nchildren = nilfs_btree_node_get_nchildren(node); 275 if (keyp != NULL) 276 *keyp = key; 277 if (ptrp != NULL) 278 *ptrp = ptr; 279 280 if (index < nchildren - 1) { 281 memmove(dkeys + index, dkeys + index + 1, 282 (nchildren - index - 1) * sizeof(*dkeys)); 283 memmove(dptrs + index, dptrs + index + 1, 284 (nchildren - index - 1) * sizeof(*dptrs)); 285 } 286 nchildren--; 287 nilfs_btree_node_set_nchildren(node, nchildren); 288 } 289 290 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 291 __u64 key, int *indexp) 292 { 293 __u64 nkey; 294 int index, low, high, s; 295 296 /* binary search */ 297 low = 0; 298 high = nilfs_btree_node_get_nchildren(node) - 1; 299 index = 0; 300 s = 0; 301 while (low <= high) { 302 index = (low + high) / 2; 303 nkey = nilfs_btree_node_get_key(node, index); 304 if (nkey == key) { 305 s = 0; 306 goto out; 307 } else if (nkey < key) { 308 low = index + 1; 309 s = -1; 310 } else { 311 high = index - 1; 312 s = 1; 313 } 314 } 315 316 /* adjust index */ 317 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 318 if (s > 0 && index > 0) 319 index--; 320 } else if (s < 0) 321 index++; 322 323 out: 324 *indexp = index; 325 326 return s == 0; 327 } 328 329 /** 330 * nilfs_btree_node_broken - verify consistency of btree node 331 * @node: btree node block to be examined 332 * @size: node size (in bytes) 333 * @inode: host inode of btree 334 * @blocknr: block number 335 * 336 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 337 */ 338 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 339 size_t size, struct inode *inode, 340 sector_t blocknr) 341 { 342 int level, flags, nchildren; 343 int ret = 0; 344 345 level = nilfs_btree_node_get_level(node); 346 flags = nilfs_btree_node_get_flags(node); 347 nchildren = nilfs_btree_node_get_nchildren(node); 348 349 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 350 level >= NILFS_BTREE_LEVEL_MAX || 351 (flags & NILFS_BTREE_NODE_ROOT) || 352 nchildren < 0 || 353 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 354 nilfs_crit(inode->i_sb, 355 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 356 inode->i_ino, (unsigned long long)blocknr, level, 357 flags, nchildren); 358 ret = 1; 359 } 360 return ret; 361 } 362 363 /** 364 * nilfs_btree_root_broken - verify consistency of btree root node 365 * @node: btree root node to be examined 366 * @inode: host inode of btree 367 * 368 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 369 */ 370 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 371 struct inode *inode) 372 { 373 int level, flags, nchildren; 374 int ret = 0; 375 376 level = nilfs_btree_node_get_level(node); 377 flags = nilfs_btree_node_get_flags(node); 378 nchildren = nilfs_btree_node_get_nchildren(node); 379 380 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 381 level >= NILFS_BTREE_LEVEL_MAX || 382 nchildren < 0 || 383 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 384 nilfs_crit(inode->i_sb, 385 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 386 inode->i_ino, level, flags, nchildren); 387 ret = 1; 388 } 389 return ret; 390 } 391 392 int nilfs_btree_broken_node_block(struct buffer_head *bh) 393 { 394 struct inode *inode; 395 int ret; 396 397 if (buffer_nilfs_checked(bh)) 398 return 0; 399 400 inode = bh->b_page->mapping->host; 401 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 402 bh->b_size, inode, bh->b_blocknr); 403 if (likely(!ret)) 404 set_buffer_nilfs_checked(bh); 405 return ret; 406 } 407 408 static struct nilfs_btree_node * 409 nilfs_btree_get_root(const struct nilfs_bmap *btree) 410 { 411 return (struct nilfs_btree_node *)btree->b_u.u_data; 412 } 413 414 static struct nilfs_btree_node * 415 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 416 { 417 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 418 } 419 420 static struct nilfs_btree_node * 421 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 422 { 423 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 424 } 425 426 static int nilfs_btree_height(const struct nilfs_bmap *btree) 427 { 428 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 429 } 430 431 static struct nilfs_btree_node * 432 nilfs_btree_get_node(const struct nilfs_bmap *btree, 433 const struct nilfs_btree_path *path, 434 int level, int *ncmaxp) 435 { 436 struct nilfs_btree_node *node; 437 438 if (level == nilfs_btree_height(btree) - 1) { 439 node = nilfs_btree_get_root(btree); 440 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 441 } else { 442 node = nilfs_btree_get_nonroot_node(path, level); 443 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 444 } 445 return node; 446 } 447 448 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 449 struct nilfs_btree_node *node, int level) 450 { 451 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 452 dump_stack(); 453 nilfs_crit(btree->b_inode->i_sb, 454 "btree level mismatch (ino=%lu): %d != %d", 455 btree->b_inode->i_ino, 456 nilfs_btree_node_get_level(node), level); 457 return 1; 458 } 459 return 0; 460 } 461 462 struct nilfs_btree_readahead_info { 463 struct nilfs_btree_node *node; /* parent node */ 464 int max_ra_blocks; /* max nof blocks to read ahead */ 465 int index; /* current index on the parent node */ 466 int ncmax; /* nof children in the parent node */ 467 }; 468 469 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 470 struct buffer_head **bhp, 471 const struct nilfs_btree_readahead_info *ra) 472 { 473 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 474 struct buffer_head *bh, *ra_bh; 475 sector_t submit_ptr = 0; 476 int ret; 477 478 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, 479 &submit_ptr); 480 if (ret) { 481 if (ret != -EEXIST) 482 return ret; 483 goto out_check; 484 } 485 486 if (ra) { 487 int i, n; 488 __u64 ptr2; 489 490 /* read ahead sibling nodes */ 491 for (n = ra->max_ra_blocks, i = ra->index + 1; 492 n > 0 && i < ra->ncmax; n--, i++) { 493 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 494 495 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 496 REQ_OP_READ, REQ_RAHEAD, 497 &ra_bh, &submit_ptr); 498 if (likely(!ret || ret == -EEXIST)) 499 brelse(ra_bh); 500 else if (ret != -EBUSY) 501 break; 502 if (!buffer_locked(bh)) 503 goto out_no_wait; 504 } 505 } 506 507 wait_on_buffer(bh); 508 509 out_no_wait: 510 if (!buffer_uptodate(bh)) { 511 nilfs_err(btree->b_inode->i_sb, 512 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 513 btree->b_inode->i_ino, (unsigned long long)ptr); 514 brelse(bh); 515 return -EIO; 516 } 517 518 out_check: 519 if (nilfs_btree_broken_node_block(bh)) { 520 clear_buffer_uptodate(bh); 521 brelse(bh); 522 return -EINVAL; 523 } 524 525 *bhp = bh; 526 return 0; 527 } 528 529 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 530 struct buffer_head **bhp) 531 { 532 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 533 } 534 535 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 536 struct nilfs_btree_path *path, 537 __u64 key, __u64 *ptrp, int minlevel, 538 int readahead) 539 { 540 struct nilfs_btree_node *node; 541 struct nilfs_btree_readahead_info p, *ra; 542 __u64 ptr; 543 int level, index, found, ncmax, ret; 544 545 node = nilfs_btree_get_root(btree); 546 level = nilfs_btree_node_get_level(node); 547 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 548 return -ENOENT; 549 550 found = nilfs_btree_node_lookup(node, key, &index); 551 ptr = nilfs_btree_node_get_ptr(node, index, 552 NILFS_BTREE_ROOT_NCHILDREN_MAX); 553 path[level].bp_bh = NULL; 554 path[level].bp_index = index; 555 556 ncmax = nilfs_btree_nchildren_per_block(btree); 557 558 while (--level >= minlevel) { 559 ra = NULL; 560 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 561 p.node = nilfs_btree_get_node(btree, path, level + 1, 562 &p.ncmax); 563 p.index = index; 564 p.max_ra_blocks = 7; 565 ra = &p; 566 } 567 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 568 ra); 569 if (ret < 0) 570 return ret; 571 572 node = nilfs_btree_get_nonroot_node(path, level); 573 if (nilfs_btree_bad_node(btree, node, level)) 574 return -EINVAL; 575 if (!found) 576 found = nilfs_btree_node_lookup(node, key, &index); 577 else 578 index = 0; 579 if (index < ncmax) { 580 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 581 } else { 582 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 583 /* insert */ 584 ptr = NILFS_BMAP_INVALID_PTR; 585 } 586 path[level].bp_index = index; 587 } 588 if (!found) 589 return -ENOENT; 590 591 if (ptrp != NULL) 592 *ptrp = ptr; 593 594 return 0; 595 } 596 597 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 598 struct nilfs_btree_path *path, 599 __u64 *keyp, __u64 *ptrp) 600 { 601 struct nilfs_btree_node *node; 602 __u64 ptr; 603 int index, level, ncmax, ret; 604 605 node = nilfs_btree_get_root(btree); 606 index = nilfs_btree_node_get_nchildren(node) - 1; 607 if (index < 0) 608 return -ENOENT; 609 level = nilfs_btree_node_get_level(node); 610 ptr = nilfs_btree_node_get_ptr(node, index, 611 NILFS_BTREE_ROOT_NCHILDREN_MAX); 612 path[level].bp_bh = NULL; 613 path[level].bp_index = index; 614 ncmax = nilfs_btree_nchildren_per_block(btree); 615 616 for (level--; level > 0; level--) { 617 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 618 if (ret < 0) 619 return ret; 620 node = nilfs_btree_get_nonroot_node(path, level); 621 if (nilfs_btree_bad_node(btree, node, level)) 622 return -EINVAL; 623 index = nilfs_btree_node_get_nchildren(node) - 1; 624 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 625 path[level].bp_index = index; 626 } 627 628 if (keyp != NULL) 629 *keyp = nilfs_btree_node_get_key(node, index); 630 if (ptrp != NULL) 631 *ptrp = ptr; 632 633 return 0; 634 } 635 636 /** 637 * nilfs_btree_get_next_key - get next valid key from btree path array 638 * @btree: bmap struct of btree 639 * @path: array of nilfs_btree_path struct 640 * @minlevel: start level 641 * @nextkey: place to store the next valid key 642 * 643 * Return Value: If a next key was found, 0 is returned. Otherwise, 644 * -ENOENT is returned. 645 */ 646 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 647 const struct nilfs_btree_path *path, 648 int minlevel, __u64 *nextkey) 649 { 650 struct nilfs_btree_node *node; 651 int maxlevel = nilfs_btree_height(btree) - 1; 652 int index, next_adj, level; 653 654 /* Next index is already set to bp_index for leaf nodes. */ 655 next_adj = 0; 656 for (level = minlevel; level <= maxlevel; level++) { 657 if (level == maxlevel) 658 node = nilfs_btree_get_root(btree); 659 else 660 node = nilfs_btree_get_nonroot_node(path, level); 661 662 index = path[level].bp_index + next_adj; 663 if (index < nilfs_btree_node_get_nchildren(node)) { 664 /* Next key is in this node */ 665 *nextkey = nilfs_btree_node_get_key(node, index); 666 return 0; 667 } 668 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 669 next_adj = 1; 670 } 671 return -ENOENT; 672 } 673 674 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 675 __u64 key, int level, __u64 *ptrp) 676 { 677 struct nilfs_btree_path *path; 678 int ret; 679 680 path = nilfs_btree_alloc_path(); 681 if (path == NULL) 682 return -ENOMEM; 683 684 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 685 686 nilfs_btree_free_path(path); 687 688 return ret; 689 } 690 691 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 692 __u64 key, __u64 *ptrp, 693 unsigned int maxblocks) 694 { 695 struct nilfs_btree_path *path; 696 struct nilfs_btree_node *node; 697 struct inode *dat = NULL; 698 __u64 ptr, ptr2; 699 sector_t blocknr; 700 int level = NILFS_BTREE_LEVEL_NODE_MIN; 701 int ret, cnt, index, maxlevel, ncmax; 702 struct nilfs_btree_readahead_info p; 703 704 path = nilfs_btree_alloc_path(); 705 if (path == NULL) 706 return -ENOMEM; 707 708 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 709 if (ret < 0) 710 goto out; 711 712 if (NILFS_BMAP_USE_VBN(btree)) { 713 dat = nilfs_bmap_get_dat(btree); 714 ret = nilfs_dat_translate(dat, ptr, &blocknr); 715 if (ret < 0) 716 goto out; 717 ptr = blocknr; 718 } 719 cnt = 1; 720 if (cnt == maxblocks) 721 goto end; 722 723 maxlevel = nilfs_btree_height(btree) - 1; 724 node = nilfs_btree_get_node(btree, path, level, &ncmax); 725 index = path[level].bp_index + 1; 726 for (;;) { 727 while (index < nilfs_btree_node_get_nchildren(node)) { 728 if (nilfs_btree_node_get_key(node, index) != 729 key + cnt) 730 goto end; 731 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 732 if (dat) { 733 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 734 if (ret < 0) 735 goto out; 736 ptr2 = blocknr; 737 } 738 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 739 goto end; 740 index++; 741 } 742 if (level == maxlevel) 743 break; 744 745 /* look-up right sibling node */ 746 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 747 p.index = path[level + 1].bp_index + 1; 748 p.max_ra_blocks = 7; 749 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 750 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 751 break; 752 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 753 path[level + 1].bp_index = p.index; 754 755 brelse(path[level].bp_bh); 756 path[level].bp_bh = NULL; 757 758 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 759 &p); 760 if (ret < 0) 761 goto out; 762 node = nilfs_btree_get_nonroot_node(path, level); 763 ncmax = nilfs_btree_nchildren_per_block(btree); 764 index = 0; 765 path[level].bp_index = index; 766 } 767 end: 768 *ptrp = ptr; 769 ret = cnt; 770 out: 771 nilfs_btree_free_path(path); 772 return ret; 773 } 774 775 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 776 struct nilfs_btree_path *path, 777 int level, __u64 key) 778 { 779 if (level < nilfs_btree_height(btree) - 1) { 780 do { 781 nilfs_btree_node_set_key( 782 nilfs_btree_get_nonroot_node(path, level), 783 path[level].bp_index, key); 784 if (!buffer_dirty(path[level].bp_bh)) 785 mark_buffer_dirty(path[level].bp_bh); 786 } while ((path[level].bp_index == 0) && 787 (++level < nilfs_btree_height(btree) - 1)); 788 } 789 790 /* root */ 791 if (level == nilfs_btree_height(btree) - 1) { 792 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 793 path[level].bp_index, key); 794 } 795 } 796 797 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 798 struct nilfs_btree_path *path, 799 int level, __u64 *keyp, __u64 *ptrp) 800 { 801 struct nilfs_btree_node *node; 802 int ncblk; 803 804 if (level < nilfs_btree_height(btree) - 1) { 805 node = nilfs_btree_get_nonroot_node(path, level); 806 ncblk = nilfs_btree_nchildren_per_block(btree); 807 nilfs_btree_node_insert(node, path[level].bp_index, 808 *keyp, *ptrp, ncblk); 809 if (!buffer_dirty(path[level].bp_bh)) 810 mark_buffer_dirty(path[level].bp_bh); 811 812 if (path[level].bp_index == 0) 813 nilfs_btree_promote_key(btree, path, level + 1, 814 nilfs_btree_node_get_key(node, 815 0)); 816 } else { 817 node = nilfs_btree_get_root(btree); 818 nilfs_btree_node_insert(node, path[level].bp_index, 819 *keyp, *ptrp, 820 NILFS_BTREE_ROOT_NCHILDREN_MAX); 821 } 822 } 823 824 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 825 struct nilfs_btree_path *path, 826 int level, __u64 *keyp, __u64 *ptrp) 827 { 828 struct nilfs_btree_node *node, *left; 829 int nchildren, lnchildren, n, move, ncblk; 830 831 node = nilfs_btree_get_nonroot_node(path, level); 832 left = nilfs_btree_get_sib_node(path, level); 833 nchildren = nilfs_btree_node_get_nchildren(node); 834 lnchildren = nilfs_btree_node_get_nchildren(left); 835 ncblk = nilfs_btree_nchildren_per_block(btree); 836 move = 0; 837 838 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 839 if (n > path[level].bp_index) { 840 /* move insert point */ 841 n--; 842 move = 1; 843 } 844 845 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 846 847 if (!buffer_dirty(path[level].bp_bh)) 848 mark_buffer_dirty(path[level].bp_bh); 849 if (!buffer_dirty(path[level].bp_sib_bh)) 850 mark_buffer_dirty(path[level].bp_sib_bh); 851 852 nilfs_btree_promote_key(btree, path, level + 1, 853 nilfs_btree_node_get_key(node, 0)); 854 855 if (move) { 856 brelse(path[level].bp_bh); 857 path[level].bp_bh = path[level].bp_sib_bh; 858 path[level].bp_sib_bh = NULL; 859 path[level].bp_index += lnchildren; 860 path[level + 1].bp_index--; 861 } else { 862 brelse(path[level].bp_sib_bh); 863 path[level].bp_sib_bh = NULL; 864 path[level].bp_index -= n; 865 } 866 867 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 868 } 869 870 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 871 struct nilfs_btree_path *path, 872 int level, __u64 *keyp, __u64 *ptrp) 873 { 874 struct nilfs_btree_node *node, *right; 875 int nchildren, rnchildren, n, move, ncblk; 876 877 node = nilfs_btree_get_nonroot_node(path, level); 878 right = nilfs_btree_get_sib_node(path, level); 879 nchildren = nilfs_btree_node_get_nchildren(node); 880 rnchildren = nilfs_btree_node_get_nchildren(right); 881 ncblk = nilfs_btree_nchildren_per_block(btree); 882 move = 0; 883 884 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 885 if (n > nchildren - path[level].bp_index) { 886 /* move insert point */ 887 n--; 888 move = 1; 889 } 890 891 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 892 893 if (!buffer_dirty(path[level].bp_bh)) 894 mark_buffer_dirty(path[level].bp_bh); 895 if (!buffer_dirty(path[level].bp_sib_bh)) 896 mark_buffer_dirty(path[level].bp_sib_bh); 897 898 path[level + 1].bp_index++; 899 nilfs_btree_promote_key(btree, path, level + 1, 900 nilfs_btree_node_get_key(right, 0)); 901 path[level + 1].bp_index--; 902 903 if (move) { 904 brelse(path[level].bp_bh); 905 path[level].bp_bh = path[level].bp_sib_bh; 906 path[level].bp_sib_bh = NULL; 907 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 908 path[level + 1].bp_index++; 909 } else { 910 brelse(path[level].bp_sib_bh); 911 path[level].bp_sib_bh = NULL; 912 } 913 914 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 915 } 916 917 static void nilfs_btree_split(struct nilfs_bmap *btree, 918 struct nilfs_btree_path *path, 919 int level, __u64 *keyp, __u64 *ptrp) 920 { 921 struct nilfs_btree_node *node, *right; 922 int nchildren, n, move, ncblk; 923 924 node = nilfs_btree_get_nonroot_node(path, level); 925 right = nilfs_btree_get_sib_node(path, level); 926 nchildren = nilfs_btree_node_get_nchildren(node); 927 ncblk = nilfs_btree_nchildren_per_block(btree); 928 move = 0; 929 930 n = (nchildren + 1) / 2; 931 if (n > nchildren - path[level].bp_index) { 932 n--; 933 move = 1; 934 } 935 936 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 937 938 if (!buffer_dirty(path[level].bp_bh)) 939 mark_buffer_dirty(path[level].bp_bh); 940 if (!buffer_dirty(path[level].bp_sib_bh)) 941 mark_buffer_dirty(path[level].bp_sib_bh); 942 943 if (move) { 944 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 945 nilfs_btree_node_insert(right, path[level].bp_index, 946 *keyp, *ptrp, ncblk); 947 948 *keyp = nilfs_btree_node_get_key(right, 0); 949 *ptrp = path[level].bp_newreq.bpr_ptr; 950 951 brelse(path[level].bp_bh); 952 path[level].bp_bh = path[level].bp_sib_bh; 953 path[level].bp_sib_bh = NULL; 954 } else { 955 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 956 957 *keyp = nilfs_btree_node_get_key(right, 0); 958 *ptrp = path[level].bp_newreq.bpr_ptr; 959 960 brelse(path[level].bp_sib_bh); 961 path[level].bp_sib_bh = NULL; 962 } 963 964 path[level + 1].bp_index++; 965 } 966 967 static void nilfs_btree_grow(struct nilfs_bmap *btree, 968 struct nilfs_btree_path *path, 969 int level, __u64 *keyp, __u64 *ptrp) 970 { 971 struct nilfs_btree_node *root, *child; 972 int n, ncblk; 973 974 root = nilfs_btree_get_root(btree); 975 child = nilfs_btree_get_sib_node(path, level); 976 ncblk = nilfs_btree_nchildren_per_block(btree); 977 978 n = nilfs_btree_node_get_nchildren(root); 979 980 nilfs_btree_node_move_right(root, child, n, 981 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 982 nilfs_btree_node_set_level(root, level + 1); 983 984 if (!buffer_dirty(path[level].bp_sib_bh)) 985 mark_buffer_dirty(path[level].bp_sib_bh); 986 987 path[level].bp_bh = path[level].bp_sib_bh; 988 path[level].bp_sib_bh = NULL; 989 990 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 991 992 *keyp = nilfs_btree_node_get_key(child, 0); 993 *ptrp = path[level].bp_newreq.bpr_ptr; 994 } 995 996 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 997 const struct nilfs_btree_path *path) 998 { 999 struct nilfs_btree_node *node; 1000 int level, ncmax; 1001 1002 if (path == NULL) 1003 return NILFS_BMAP_INVALID_PTR; 1004 1005 /* left sibling */ 1006 level = NILFS_BTREE_LEVEL_NODE_MIN; 1007 if (path[level].bp_index > 0) { 1008 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1009 return nilfs_btree_node_get_ptr(node, 1010 path[level].bp_index - 1, 1011 ncmax); 1012 } 1013 1014 /* parent */ 1015 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1016 if (level <= nilfs_btree_height(btree) - 1) { 1017 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1018 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1019 ncmax); 1020 } 1021 1022 return NILFS_BMAP_INVALID_PTR; 1023 } 1024 1025 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1026 const struct nilfs_btree_path *path, 1027 __u64 key) 1028 { 1029 __u64 ptr; 1030 1031 ptr = nilfs_bmap_find_target_seq(btree, key); 1032 if (ptr != NILFS_BMAP_INVALID_PTR) 1033 /* sequential access */ 1034 return ptr; 1035 1036 ptr = nilfs_btree_find_near(btree, path); 1037 if (ptr != NILFS_BMAP_INVALID_PTR) 1038 /* near */ 1039 return ptr; 1040 1041 /* block group */ 1042 return nilfs_bmap_find_target_in_group(btree); 1043 } 1044 1045 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1046 struct nilfs_btree_path *path, 1047 int *levelp, __u64 key, __u64 ptr, 1048 struct nilfs_bmap_stats *stats) 1049 { 1050 struct buffer_head *bh; 1051 struct nilfs_btree_node *node, *parent, *sib; 1052 __u64 sibptr; 1053 int pindex, level, ncmax, ncblk, ret; 1054 struct inode *dat = NULL; 1055 1056 stats->bs_nblocks = 0; 1057 level = NILFS_BTREE_LEVEL_DATA; 1058 1059 /* allocate a new ptr for data block */ 1060 if (NILFS_BMAP_USE_VBN(btree)) { 1061 path[level].bp_newreq.bpr_ptr = 1062 nilfs_btree_find_target_v(btree, path, key); 1063 dat = nilfs_bmap_get_dat(btree); 1064 } 1065 1066 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1067 if (ret < 0) 1068 goto err_out_data; 1069 1070 ncblk = nilfs_btree_nchildren_per_block(btree); 1071 1072 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1073 level < nilfs_btree_height(btree) - 1; 1074 level++) { 1075 node = nilfs_btree_get_nonroot_node(path, level); 1076 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1077 path[level].bp_op = nilfs_btree_do_insert; 1078 stats->bs_nblocks++; 1079 goto out; 1080 } 1081 1082 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1083 pindex = path[level + 1].bp_index; 1084 1085 /* left sibling */ 1086 if (pindex > 0) { 1087 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1088 ncmax); 1089 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1090 if (ret < 0) 1091 goto err_out_child_node; 1092 sib = (struct nilfs_btree_node *)bh->b_data; 1093 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1094 path[level].bp_sib_bh = bh; 1095 path[level].bp_op = nilfs_btree_carry_left; 1096 stats->bs_nblocks++; 1097 goto out; 1098 } else { 1099 brelse(bh); 1100 } 1101 } 1102 1103 /* right sibling */ 1104 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1105 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1106 ncmax); 1107 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1108 if (ret < 0) 1109 goto err_out_child_node; 1110 sib = (struct nilfs_btree_node *)bh->b_data; 1111 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1112 path[level].bp_sib_bh = bh; 1113 path[level].bp_op = nilfs_btree_carry_right; 1114 stats->bs_nblocks++; 1115 goto out; 1116 } else { 1117 brelse(bh); 1118 } 1119 } 1120 1121 /* split */ 1122 path[level].bp_newreq.bpr_ptr = 1123 path[level - 1].bp_newreq.bpr_ptr + 1; 1124 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1125 &path[level].bp_newreq, dat); 1126 if (ret < 0) 1127 goto err_out_child_node; 1128 ret = nilfs_btree_get_new_block(btree, 1129 path[level].bp_newreq.bpr_ptr, 1130 &bh); 1131 if (ret < 0) 1132 goto err_out_curr_node; 1133 1134 stats->bs_nblocks++; 1135 1136 sib = (struct nilfs_btree_node *)bh->b_data; 1137 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1138 path[level].bp_sib_bh = bh; 1139 path[level].bp_op = nilfs_btree_split; 1140 } 1141 1142 /* root */ 1143 node = nilfs_btree_get_root(btree); 1144 if (nilfs_btree_node_get_nchildren(node) < 1145 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1146 path[level].bp_op = nilfs_btree_do_insert; 1147 stats->bs_nblocks++; 1148 goto out; 1149 } 1150 1151 /* grow */ 1152 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1153 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1154 if (ret < 0) 1155 goto err_out_child_node; 1156 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1157 &bh); 1158 if (ret < 0) 1159 goto err_out_curr_node; 1160 1161 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1162 0, level, 0, ncblk, NULL, NULL); 1163 path[level].bp_sib_bh = bh; 1164 path[level].bp_op = nilfs_btree_grow; 1165 1166 level++; 1167 path[level].bp_op = nilfs_btree_do_insert; 1168 1169 /* a newly-created node block and a data block are added */ 1170 stats->bs_nblocks += 2; 1171 1172 /* success */ 1173 out: 1174 *levelp = level; 1175 return ret; 1176 1177 /* error */ 1178 err_out_curr_node: 1179 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1180 err_out_child_node: 1181 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1182 nilfs_btnode_delete(path[level].bp_sib_bh); 1183 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1184 1185 } 1186 1187 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1188 err_out_data: 1189 *levelp = level; 1190 stats->bs_nblocks = 0; 1191 return ret; 1192 } 1193 1194 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1195 struct nilfs_btree_path *path, 1196 int maxlevel, __u64 key, __u64 ptr) 1197 { 1198 struct inode *dat = NULL; 1199 int level; 1200 1201 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1202 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1203 if (NILFS_BMAP_USE_VBN(btree)) { 1204 nilfs_bmap_set_target_v(btree, key, ptr); 1205 dat = nilfs_bmap_get_dat(btree); 1206 } 1207 1208 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1209 nilfs_bmap_commit_alloc_ptr(btree, 1210 &path[level - 1].bp_newreq, dat); 1211 path[level].bp_op(btree, path, level, &key, &ptr); 1212 } 1213 1214 if (!nilfs_bmap_dirty(btree)) 1215 nilfs_bmap_set_dirty(btree); 1216 } 1217 1218 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1219 { 1220 struct nilfs_btree_path *path; 1221 struct nilfs_bmap_stats stats; 1222 int level, ret; 1223 1224 path = nilfs_btree_alloc_path(); 1225 if (path == NULL) 1226 return -ENOMEM; 1227 1228 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1229 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1230 if (ret != -ENOENT) { 1231 if (ret == 0) 1232 ret = -EEXIST; 1233 goto out; 1234 } 1235 1236 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1237 if (ret < 0) 1238 goto out; 1239 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1240 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1241 1242 out: 1243 nilfs_btree_free_path(path); 1244 return ret; 1245 } 1246 1247 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1248 struct nilfs_btree_path *path, 1249 int level, __u64 *keyp, __u64 *ptrp) 1250 { 1251 struct nilfs_btree_node *node; 1252 int ncblk; 1253 1254 if (level < nilfs_btree_height(btree) - 1) { 1255 node = nilfs_btree_get_nonroot_node(path, level); 1256 ncblk = nilfs_btree_nchildren_per_block(btree); 1257 nilfs_btree_node_delete(node, path[level].bp_index, 1258 keyp, ptrp, ncblk); 1259 if (!buffer_dirty(path[level].bp_bh)) 1260 mark_buffer_dirty(path[level].bp_bh); 1261 if (path[level].bp_index == 0) 1262 nilfs_btree_promote_key(btree, path, level + 1, 1263 nilfs_btree_node_get_key(node, 0)); 1264 } else { 1265 node = nilfs_btree_get_root(btree); 1266 nilfs_btree_node_delete(node, path[level].bp_index, 1267 keyp, ptrp, 1268 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1269 } 1270 } 1271 1272 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1273 struct nilfs_btree_path *path, 1274 int level, __u64 *keyp, __u64 *ptrp) 1275 { 1276 struct nilfs_btree_node *node, *left; 1277 int nchildren, lnchildren, n, ncblk; 1278 1279 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1280 1281 node = nilfs_btree_get_nonroot_node(path, level); 1282 left = nilfs_btree_get_sib_node(path, level); 1283 nchildren = nilfs_btree_node_get_nchildren(node); 1284 lnchildren = nilfs_btree_node_get_nchildren(left); 1285 ncblk = nilfs_btree_nchildren_per_block(btree); 1286 1287 n = (nchildren + lnchildren) / 2 - nchildren; 1288 1289 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1290 1291 if (!buffer_dirty(path[level].bp_bh)) 1292 mark_buffer_dirty(path[level].bp_bh); 1293 if (!buffer_dirty(path[level].bp_sib_bh)) 1294 mark_buffer_dirty(path[level].bp_sib_bh); 1295 1296 nilfs_btree_promote_key(btree, path, level + 1, 1297 nilfs_btree_node_get_key(node, 0)); 1298 1299 brelse(path[level].bp_sib_bh); 1300 path[level].bp_sib_bh = NULL; 1301 path[level].bp_index += n; 1302 } 1303 1304 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1305 struct nilfs_btree_path *path, 1306 int level, __u64 *keyp, __u64 *ptrp) 1307 { 1308 struct nilfs_btree_node *node, *right; 1309 int nchildren, rnchildren, n, ncblk; 1310 1311 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1312 1313 node = nilfs_btree_get_nonroot_node(path, level); 1314 right = nilfs_btree_get_sib_node(path, level); 1315 nchildren = nilfs_btree_node_get_nchildren(node); 1316 rnchildren = nilfs_btree_node_get_nchildren(right); 1317 ncblk = nilfs_btree_nchildren_per_block(btree); 1318 1319 n = (nchildren + rnchildren) / 2 - nchildren; 1320 1321 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1322 1323 if (!buffer_dirty(path[level].bp_bh)) 1324 mark_buffer_dirty(path[level].bp_bh); 1325 if (!buffer_dirty(path[level].bp_sib_bh)) 1326 mark_buffer_dirty(path[level].bp_sib_bh); 1327 1328 path[level + 1].bp_index++; 1329 nilfs_btree_promote_key(btree, path, level + 1, 1330 nilfs_btree_node_get_key(right, 0)); 1331 path[level + 1].bp_index--; 1332 1333 brelse(path[level].bp_sib_bh); 1334 path[level].bp_sib_bh = NULL; 1335 } 1336 1337 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1338 struct nilfs_btree_path *path, 1339 int level, __u64 *keyp, __u64 *ptrp) 1340 { 1341 struct nilfs_btree_node *node, *left; 1342 int n, ncblk; 1343 1344 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1345 1346 node = nilfs_btree_get_nonroot_node(path, level); 1347 left = nilfs_btree_get_sib_node(path, level); 1348 ncblk = nilfs_btree_nchildren_per_block(btree); 1349 1350 n = nilfs_btree_node_get_nchildren(node); 1351 1352 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1353 1354 if (!buffer_dirty(path[level].bp_sib_bh)) 1355 mark_buffer_dirty(path[level].bp_sib_bh); 1356 1357 nilfs_btnode_delete(path[level].bp_bh); 1358 path[level].bp_bh = path[level].bp_sib_bh; 1359 path[level].bp_sib_bh = NULL; 1360 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1361 } 1362 1363 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1364 struct nilfs_btree_path *path, 1365 int level, __u64 *keyp, __u64 *ptrp) 1366 { 1367 struct nilfs_btree_node *node, *right; 1368 int n, ncblk; 1369 1370 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1371 1372 node = nilfs_btree_get_nonroot_node(path, level); 1373 right = nilfs_btree_get_sib_node(path, level); 1374 ncblk = nilfs_btree_nchildren_per_block(btree); 1375 1376 n = nilfs_btree_node_get_nchildren(right); 1377 1378 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1379 1380 if (!buffer_dirty(path[level].bp_bh)) 1381 mark_buffer_dirty(path[level].bp_bh); 1382 1383 nilfs_btnode_delete(path[level].bp_sib_bh); 1384 path[level].bp_sib_bh = NULL; 1385 path[level + 1].bp_index++; 1386 } 1387 1388 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1389 struct nilfs_btree_path *path, 1390 int level, __u64 *keyp, __u64 *ptrp) 1391 { 1392 struct nilfs_btree_node *root, *child; 1393 int n, ncblk; 1394 1395 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1396 1397 root = nilfs_btree_get_root(btree); 1398 child = nilfs_btree_get_nonroot_node(path, level); 1399 ncblk = nilfs_btree_nchildren_per_block(btree); 1400 1401 nilfs_btree_node_delete(root, 0, NULL, NULL, 1402 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1403 nilfs_btree_node_set_level(root, level); 1404 n = nilfs_btree_node_get_nchildren(child); 1405 nilfs_btree_node_move_left(root, child, n, 1406 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1407 1408 nilfs_btnode_delete(path[level].bp_bh); 1409 path[level].bp_bh = NULL; 1410 } 1411 1412 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1413 struct nilfs_btree_path *path, 1414 int level, __u64 *keyp, __u64 *ptrp) 1415 { 1416 } 1417 1418 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1419 struct nilfs_btree_path *path, 1420 int *levelp, 1421 struct nilfs_bmap_stats *stats, 1422 struct inode *dat) 1423 { 1424 struct buffer_head *bh; 1425 struct nilfs_btree_node *node, *parent, *sib; 1426 __u64 sibptr; 1427 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1428 1429 ret = 0; 1430 stats->bs_nblocks = 0; 1431 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1432 ncblk = nilfs_btree_nchildren_per_block(btree); 1433 1434 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1435 level < nilfs_btree_height(btree) - 1; 1436 level++) { 1437 node = nilfs_btree_get_nonroot_node(path, level); 1438 path[level].bp_oldreq.bpr_ptr = 1439 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1440 ret = nilfs_bmap_prepare_end_ptr(btree, 1441 &path[level].bp_oldreq, dat); 1442 if (ret < 0) 1443 goto err_out_child_node; 1444 1445 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1446 path[level].bp_op = nilfs_btree_do_delete; 1447 stats->bs_nblocks++; 1448 goto out; 1449 } 1450 1451 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1452 pindex = path[level + 1].bp_index; 1453 dindex = pindex; 1454 1455 if (pindex > 0) { 1456 /* left sibling */ 1457 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1458 ncmax); 1459 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1460 if (ret < 0) 1461 goto err_out_curr_node; 1462 sib = (struct nilfs_btree_node *)bh->b_data; 1463 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1464 path[level].bp_sib_bh = bh; 1465 path[level].bp_op = nilfs_btree_borrow_left; 1466 stats->bs_nblocks++; 1467 goto out; 1468 } else { 1469 path[level].bp_sib_bh = bh; 1470 path[level].bp_op = nilfs_btree_concat_left; 1471 stats->bs_nblocks++; 1472 /* continue; */ 1473 } 1474 } else if (pindex < 1475 nilfs_btree_node_get_nchildren(parent) - 1) { 1476 /* right sibling */ 1477 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1478 ncmax); 1479 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1480 if (ret < 0) 1481 goto err_out_curr_node; 1482 sib = (struct nilfs_btree_node *)bh->b_data; 1483 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1484 path[level].bp_sib_bh = bh; 1485 path[level].bp_op = nilfs_btree_borrow_right; 1486 stats->bs_nblocks++; 1487 goto out; 1488 } else { 1489 path[level].bp_sib_bh = bh; 1490 path[level].bp_op = nilfs_btree_concat_right; 1491 stats->bs_nblocks++; 1492 /* 1493 * When merging right sibling node 1494 * into the current node, pointer to 1495 * the right sibling node must be 1496 * terminated instead. The adjustment 1497 * below is required for that. 1498 */ 1499 dindex = pindex + 1; 1500 /* continue; */ 1501 } 1502 } else { 1503 /* no siblings */ 1504 /* the only child of the root node */ 1505 WARN_ON(level != nilfs_btree_height(btree) - 2); 1506 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1507 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1508 path[level].bp_op = nilfs_btree_shrink; 1509 stats->bs_nblocks += 2; 1510 level++; 1511 path[level].bp_op = nilfs_btree_nop; 1512 goto shrink_root_child; 1513 } else { 1514 path[level].bp_op = nilfs_btree_do_delete; 1515 stats->bs_nblocks++; 1516 goto out; 1517 } 1518 } 1519 } 1520 1521 /* child of the root node is deleted */ 1522 path[level].bp_op = nilfs_btree_do_delete; 1523 stats->bs_nblocks++; 1524 1525 shrink_root_child: 1526 node = nilfs_btree_get_root(btree); 1527 path[level].bp_oldreq.bpr_ptr = 1528 nilfs_btree_node_get_ptr(node, dindex, 1529 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1530 1531 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1532 if (ret < 0) 1533 goto err_out_child_node; 1534 1535 /* success */ 1536 out: 1537 *levelp = level; 1538 return ret; 1539 1540 /* error */ 1541 err_out_curr_node: 1542 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1543 err_out_child_node: 1544 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1545 brelse(path[level].bp_sib_bh); 1546 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1547 } 1548 *levelp = level; 1549 stats->bs_nblocks = 0; 1550 return ret; 1551 } 1552 1553 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1554 struct nilfs_btree_path *path, 1555 int maxlevel, struct inode *dat) 1556 { 1557 int level; 1558 1559 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1560 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1561 path[level].bp_op(btree, path, level, NULL, NULL); 1562 } 1563 1564 if (!nilfs_bmap_dirty(btree)) 1565 nilfs_bmap_set_dirty(btree); 1566 } 1567 1568 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1569 1570 { 1571 struct nilfs_btree_path *path; 1572 struct nilfs_bmap_stats stats; 1573 struct inode *dat; 1574 int level, ret; 1575 1576 path = nilfs_btree_alloc_path(); 1577 if (path == NULL) 1578 return -ENOMEM; 1579 1580 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1581 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1582 if (ret < 0) 1583 goto out; 1584 1585 1586 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1587 1588 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1589 if (ret < 0) 1590 goto out; 1591 nilfs_btree_commit_delete(btree, path, level, dat); 1592 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1593 1594 out: 1595 nilfs_btree_free_path(path); 1596 return ret; 1597 } 1598 1599 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1600 __u64 *keyp) 1601 { 1602 struct nilfs_btree_path *path; 1603 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1604 int ret; 1605 1606 path = nilfs_btree_alloc_path(); 1607 if (!path) 1608 return -ENOMEM; 1609 1610 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1611 if (!ret) 1612 *keyp = start; 1613 else if (ret == -ENOENT) 1614 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1615 1616 nilfs_btree_free_path(path); 1617 return ret; 1618 } 1619 1620 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1621 { 1622 struct nilfs_btree_path *path; 1623 int ret; 1624 1625 path = nilfs_btree_alloc_path(); 1626 if (path == NULL) 1627 return -ENOMEM; 1628 1629 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1630 1631 nilfs_btree_free_path(path); 1632 1633 return ret; 1634 } 1635 1636 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1637 { 1638 struct buffer_head *bh; 1639 struct nilfs_btree_node *root, *node; 1640 __u64 maxkey, nextmaxkey; 1641 __u64 ptr; 1642 int nchildren, ret; 1643 1644 root = nilfs_btree_get_root(btree); 1645 switch (nilfs_btree_height(btree)) { 1646 case 2: 1647 bh = NULL; 1648 node = root; 1649 break; 1650 case 3: 1651 nchildren = nilfs_btree_node_get_nchildren(root); 1652 if (nchildren > 1) 1653 return 0; 1654 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1655 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1656 ret = nilfs_btree_get_block(btree, ptr, &bh); 1657 if (ret < 0) 1658 return ret; 1659 node = (struct nilfs_btree_node *)bh->b_data; 1660 break; 1661 default: 1662 return 0; 1663 } 1664 1665 nchildren = nilfs_btree_node_get_nchildren(node); 1666 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1667 nextmaxkey = (nchildren > 1) ? 1668 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1669 if (bh != NULL) 1670 brelse(bh); 1671 1672 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1673 } 1674 1675 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1676 __u64 *keys, __u64 *ptrs, int nitems) 1677 { 1678 struct buffer_head *bh; 1679 struct nilfs_btree_node *node, *root; 1680 __le64 *dkeys; 1681 __le64 *dptrs; 1682 __u64 ptr; 1683 int nchildren, ncmax, i, ret; 1684 1685 root = nilfs_btree_get_root(btree); 1686 switch (nilfs_btree_height(btree)) { 1687 case 2: 1688 bh = NULL; 1689 node = root; 1690 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1691 break; 1692 case 3: 1693 nchildren = nilfs_btree_node_get_nchildren(root); 1694 WARN_ON(nchildren > 1); 1695 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1696 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1697 ret = nilfs_btree_get_block(btree, ptr, &bh); 1698 if (ret < 0) 1699 return ret; 1700 node = (struct nilfs_btree_node *)bh->b_data; 1701 ncmax = nilfs_btree_nchildren_per_block(btree); 1702 break; 1703 default: 1704 node = NULL; 1705 return -EINVAL; 1706 } 1707 1708 nchildren = nilfs_btree_node_get_nchildren(node); 1709 if (nchildren < nitems) 1710 nitems = nchildren; 1711 dkeys = nilfs_btree_node_dkeys(node); 1712 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1713 for (i = 0; i < nitems; i++) { 1714 keys[i] = le64_to_cpu(dkeys[i]); 1715 ptrs[i] = le64_to_cpu(dptrs[i]); 1716 } 1717 1718 if (bh != NULL) 1719 brelse(bh); 1720 1721 return nitems; 1722 } 1723 1724 static int 1725 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1726 union nilfs_bmap_ptr_req *dreq, 1727 union nilfs_bmap_ptr_req *nreq, 1728 struct buffer_head **bhp, 1729 struct nilfs_bmap_stats *stats) 1730 { 1731 struct buffer_head *bh; 1732 struct inode *dat = NULL; 1733 int ret; 1734 1735 stats->bs_nblocks = 0; 1736 1737 /* for data */ 1738 /* cannot find near ptr */ 1739 if (NILFS_BMAP_USE_VBN(btree)) { 1740 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1741 dat = nilfs_bmap_get_dat(btree); 1742 } 1743 1744 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1745 if (ret < 0) 1746 return ret; 1747 1748 *bhp = NULL; 1749 stats->bs_nblocks++; 1750 if (nreq != NULL) { 1751 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1752 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1753 if (ret < 0) 1754 goto err_out_dreq; 1755 1756 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1757 if (ret < 0) 1758 goto err_out_nreq; 1759 1760 *bhp = bh; 1761 stats->bs_nblocks++; 1762 } 1763 1764 /* success */ 1765 return 0; 1766 1767 /* error */ 1768 err_out_nreq: 1769 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1770 err_out_dreq: 1771 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1772 stats->bs_nblocks = 0; 1773 return ret; 1774 1775 } 1776 1777 static void 1778 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1779 __u64 key, __u64 ptr, 1780 const __u64 *keys, const __u64 *ptrs, 1781 int n, 1782 union nilfs_bmap_ptr_req *dreq, 1783 union nilfs_bmap_ptr_req *nreq, 1784 struct buffer_head *bh) 1785 { 1786 struct nilfs_btree_node *node; 1787 struct inode *dat; 1788 __u64 tmpptr; 1789 int ncblk; 1790 1791 /* free resources */ 1792 if (btree->b_ops->bop_clear != NULL) 1793 btree->b_ops->bop_clear(btree); 1794 1795 /* ptr must be a pointer to a buffer head. */ 1796 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1797 1798 /* convert and insert */ 1799 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1800 __nilfs_btree_init(btree); 1801 if (nreq != NULL) { 1802 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1803 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1804 1805 /* create child node at level 1 */ 1806 node = (struct nilfs_btree_node *)bh->b_data; 1807 ncblk = nilfs_btree_nchildren_per_block(btree); 1808 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1809 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1810 if (!buffer_dirty(bh)) 1811 mark_buffer_dirty(bh); 1812 if (!nilfs_bmap_dirty(btree)) 1813 nilfs_bmap_set_dirty(btree); 1814 1815 brelse(bh); 1816 1817 /* create root node at level 2 */ 1818 node = nilfs_btree_get_root(btree); 1819 tmpptr = nreq->bpr_ptr; 1820 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1821 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1822 &keys[0], &tmpptr); 1823 } else { 1824 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1825 1826 /* create root node at level 1 */ 1827 node = nilfs_btree_get_root(btree); 1828 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1829 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1830 keys, ptrs); 1831 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1832 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1833 if (!nilfs_bmap_dirty(btree)) 1834 nilfs_bmap_set_dirty(btree); 1835 } 1836 1837 if (NILFS_BMAP_USE_VBN(btree)) 1838 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1839 } 1840 1841 /** 1842 * nilfs_btree_convert_and_insert - 1843 * @bmap: 1844 * @key: 1845 * @ptr: 1846 * @keys: 1847 * @ptrs: 1848 * @n: 1849 */ 1850 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1851 __u64 key, __u64 ptr, 1852 const __u64 *keys, const __u64 *ptrs, int n) 1853 { 1854 struct buffer_head *bh = NULL; 1855 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1856 struct nilfs_bmap_stats stats; 1857 int ret; 1858 1859 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1860 di = &dreq; 1861 ni = NULL; 1862 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1863 nilfs_btree_node_size(btree))) { 1864 di = &dreq; 1865 ni = &nreq; 1866 } else { 1867 di = NULL; 1868 ni = NULL; 1869 BUG(); 1870 } 1871 1872 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1873 &stats); 1874 if (ret < 0) 1875 return ret; 1876 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1877 di, ni, bh); 1878 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1879 return 0; 1880 } 1881 1882 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1883 struct nilfs_btree_path *path, 1884 int level, 1885 struct buffer_head *bh) 1886 { 1887 while ((++level < nilfs_btree_height(btree) - 1) && 1888 !buffer_dirty(path[level].bp_bh)) 1889 mark_buffer_dirty(path[level].bp_bh); 1890 1891 return 0; 1892 } 1893 1894 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1895 struct nilfs_btree_path *path, 1896 int level, struct inode *dat) 1897 { 1898 struct nilfs_btree_node *parent; 1899 int ncmax, ret; 1900 1901 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1902 path[level].bp_oldreq.bpr_ptr = 1903 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1904 ncmax); 1905 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1906 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1907 &path[level].bp_newreq.bpr_req); 1908 if (ret < 0) 1909 return ret; 1910 1911 if (buffer_nilfs_node(path[level].bp_bh)) { 1912 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1913 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1914 path[level].bp_ctxt.bh = path[level].bp_bh; 1915 ret = nilfs_btnode_prepare_change_key( 1916 &NILFS_BMAP_I(btree)->i_btnode_cache, 1917 &path[level].bp_ctxt); 1918 if (ret < 0) { 1919 nilfs_dat_abort_update(dat, 1920 &path[level].bp_oldreq.bpr_req, 1921 &path[level].bp_newreq.bpr_req); 1922 return ret; 1923 } 1924 } 1925 1926 return 0; 1927 } 1928 1929 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1930 struct nilfs_btree_path *path, 1931 int level, struct inode *dat) 1932 { 1933 struct nilfs_btree_node *parent; 1934 int ncmax; 1935 1936 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1937 &path[level].bp_newreq.bpr_req, 1938 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1939 1940 if (buffer_nilfs_node(path[level].bp_bh)) { 1941 nilfs_btnode_commit_change_key( 1942 &NILFS_BMAP_I(btree)->i_btnode_cache, 1943 &path[level].bp_ctxt); 1944 path[level].bp_bh = path[level].bp_ctxt.bh; 1945 } 1946 set_buffer_nilfs_volatile(path[level].bp_bh); 1947 1948 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1949 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1950 path[level].bp_newreq.bpr_ptr, ncmax); 1951 } 1952 1953 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1954 struct nilfs_btree_path *path, 1955 int level, struct inode *dat) 1956 { 1957 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1958 &path[level].bp_newreq.bpr_req); 1959 if (buffer_nilfs_node(path[level].bp_bh)) 1960 nilfs_btnode_abort_change_key( 1961 &NILFS_BMAP_I(btree)->i_btnode_cache, 1962 &path[level].bp_ctxt); 1963 } 1964 1965 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1966 struct nilfs_btree_path *path, 1967 int minlevel, int *maxlevelp, 1968 struct inode *dat) 1969 { 1970 int level, ret; 1971 1972 level = minlevel; 1973 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1974 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1975 if (ret < 0) 1976 return ret; 1977 } 1978 while ((++level < nilfs_btree_height(btree) - 1) && 1979 !buffer_dirty(path[level].bp_bh)) { 1980 1981 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1982 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1983 if (ret < 0) 1984 goto out; 1985 } 1986 1987 /* success */ 1988 *maxlevelp = level - 1; 1989 return 0; 1990 1991 /* error */ 1992 out: 1993 while (--level > minlevel) 1994 nilfs_btree_abort_update_v(btree, path, level, dat); 1995 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1996 nilfs_btree_abort_update_v(btree, path, level, dat); 1997 return ret; 1998 } 1999 2000 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2001 struct nilfs_btree_path *path, 2002 int minlevel, int maxlevel, 2003 struct buffer_head *bh, 2004 struct inode *dat) 2005 { 2006 int level; 2007 2008 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2009 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2010 2011 for (level = minlevel + 1; level <= maxlevel; level++) 2012 nilfs_btree_commit_update_v(btree, path, level, dat); 2013 } 2014 2015 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2016 struct nilfs_btree_path *path, 2017 int level, struct buffer_head *bh) 2018 { 2019 int maxlevel = 0, ret; 2020 struct nilfs_btree_node *parent; 2021 struct inode *dat = nilfs_bmap_get_dat(btree); 2022 __u64 ptr; 2023 int ncmax; 2024 2025 get_bh(bh); 2026 path[level].bp_bh = bh; 2027 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2028 dat); 2029 if (ret < 0) 2030 goto out; 2031 2032 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2033 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2034 ptr = nilfs_btree_node_get_ptr(parent, 2035 path[level + 1].bp_index, 2036 ncmax); 2037 ret = nilfs_dat_mark_dirty(dat, ptr); 2038 if (ret < 0) 2039 goto out; 2040 } 2041 2042 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2043 2044 out: 2045 brelse(path[level].bp_bh); 2046 path[level].bp_bh = NULL; 2047 return ret; 2048 } 2049 2050 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2051 struct buffer_head *bh) 2052 { 2053 struct nilfs_btree_path *path; 2054 struct nilfs_btree_node *node; 2055 __u64 key; 2056 int level, ret; 2057 2058 WARN_ON(!buffer_dirty(bh)); 2059 2060 path = nilfs_btree_alloc_path(); 2061 if (path == NULL) 2062 return -ENOMEM; 2063 2064 if (buffer_nilfs_node(bh)) { 2065 node = (struct nilfs_btree_node *)bh->b_data; 2066 key = nilfs_btree_node_get_key(node, 0); 2067 level = nilfs_btree_node_get_level(node); 2068 } else { 2069 key = nilfs_bmap_data_get_key(btree, bh); 2070 level = NILFS_BTREE_LEVEL_DATA; 2071 } 2072 2073 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2074 if (ret < 0) { 2075 if (unlikely(ret == -ENOENT)) 2076 nilfs_crit(btree->b_inode->i_sb, 2077 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2078 btree->b_inode->i_ino, 2079 (unsigned long long)key, level); 2080 goto out; 2081 } 2082 2083 ret = NILFS_BMAP_USE_VBN(btree) ? 2084 nilfs_btree_propagate_v(btree, path, level, bh) : 2085 nilfs_btree_propagate_p(btree, path, level, bh); 2086 2087 out: 2088 nilfs_btree_free_path(path); 2089 2090 return ret; 2091 } 2092 2093 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2094 struct buffer_head *bh) 2095 { 2096 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2097 } 2098 2099 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2100 struct list_head *lists, 2101 struct buffer_head *bh) 2102 { 2103 struct list_head *head; 2104 struct buffer_head *cbh; 2105 struct nilfs_btree_node *node, *cnode; 2106 __u64 key, ckey; 2107 int level; 2108 2109 get_bh(bh); 2110 node = (struct nilfs_btree_node *)bh->b_data; 2111 key = nilfs_btree_node_get_key(node, 0); 2112 level = nilfs_btree_node_get_level(node); 2113 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2114 level >= NILFS_BTREE_LEVEL_MAX) { 2115 dump_stack(); 2116 nilfs_warn(btree->b_inode->i_sb, 2117 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2118 level, (unsigned long long)key, 2119 btree->b_inode->i_ino, 2120 (unsigned long long)bh->b_blocknr); 2121 return; 2122 } 2123 2124 list_for_each(head, &lists[level]) { 2125 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2126 cnode = (struct nilfs_btree_node *)cbh->b_data; 2127 ckey = nilfs_btree_node_get_key(cnode, 0); 2128 if (key < ckey) 2129 break; 2130 } 2131 list_add_tail(&bh->b_assoc_buffers, head); 2132 } 2133 2134 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2135 struct list_head *listp) 2136 { 2137 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; 2138 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2139 struct pagevec pvec; 2140 struct buffer_head *bh, *head; 2141 pgoff_t index = 0; 2142 int level, i; 2143 2144 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2145 level < NILFS_BTREE_LEVEL_MAX; 2146 level++) 2147 INIT_LIST_HEAD(&lists[level]); 2148 2149 pagevec_init(&pvec); 2150 2151 while (pagevec_lookup_tag(&pvec, btcache, &index, 2152 PAGECACHE_TAG_DIRTY)) { 2153 for (i = 0; i < pagevec_count(&pvec); i++) { 2154 bh = head = page_buffers(pvec.pages[i]); 2155 do { 2156 if (buffer_dirty(bh)) 2157 nilfs_btree_add_dirty_buffer(btree, 2158 lists, bh); 2159 } while ((bh = bh->b_this_page) != head); 2160 } 2161 pagevec_release(&pvec); 2162 cond_resched(); 2163 } 2164 2165 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2166 level < NILFS_BTREE_LEVEL_MAX; 2167 level++) 2168 list_splice_tail(&lists[level], listp); 2169 } 2170 2171 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2172 struct nilfs_btree_path *path, 2173 int level, 2174 struct buffer_head **bh, 2175 sector_t blocknr, 2176 union nilfs_binfo *binfo) 2177 { 2178 struct nilfs_btree_node *parent; 2179 __u64 key; 2180 __u64 ptr; 2181 int ncmax, ret; 2182 2183 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2184 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2185 ncmax); 2186 if (buffer_nilfs_node(*bh)) { 2187 path[level].bp_ctxt.oldkey = ptr; 2188 path[level].bp_ctxt.newkey = blocknr; 2189 path[level].bp_ctxt.bh = *bh; 2190 ret = nilfs_btnode_prepare_change_key( 2191 &NILFS_BMAP_I(btree)->i_btnode_cache, 2192 &path[level].bp_ctxt); 2193 if (ret < 0) 2194 return ret; 2195 nilfs_btnode_commit_change_key( 2196 &NILFS_BMAP_I(btree)->i_btnode_cache, 2197 &path[level].bp_ctxt); 2198 *bh = path[level].bp_ctxt.bh; 2199 } 2200 2201 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2202 ncmax); 2203 2204 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2205 /* on-disk format */ 2206 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2207 binfo->bi_dat.bi_level = level; 2208 2209 return 0; 2210 } 2211 2212 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2213 struct nilfs_btree_path *path, 2214 int level, 2215 struct buffer_head **bh, 2216 sector_t blocknr, 2217 union nilfs_binfo *binfo) 2218 { 2219 struct nilfs_btree_node *parent; 2220 struct inode *dat = nilfs_bmap_get_dat(btree); 2221 __u64 key; 2222 __u64 ptr; 2223 union nilfs_bmap_ptr_req req; 2224 int ncmax, ret; 2225 2226 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2227 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2228 ncmax); 2229 req.bpr_ptr = ptr; 2230 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2231 if (ret < 0) 2232 return ret; 2233 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2234 2235 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2236 /* on-disk format */ 2237 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2238 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2239 2240 return 0; 2241 } 2242 2243 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2244 struct buffer_head **bh, 2245 sector_t blocknr, 2246 union nilfs_binfo *binfo) 2247 { 2248 struct nilfs_btree_path *path; 2249 struct nilfs_btree_node *node; 2250 __u64 key; 2251 int level, ret; 2252 2253 path = nilfs_btree_alloc_path(); 2254 if (path == NULL) 2255 return -ENOMEM; 2256 2257 if (buffer_nilfs_node(*bh)) { 2258 node = (struct nilfs_btree_node *)(*bh)->b_data; 2259 key = nilfs_btree_node_get_key(node, 0); 2260 level = nilfs_btree_node_get_level(node); 2261 } else { 2262 key = nilfs_bmap_data_get_key(btree, *bh); 2263 level = NILFS_BTREE_LEVEL_DATA; 2264 } 2265 2266 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2267 if (ret < 0) { 2268 WARN_ON(ret == -ENOENT); 2269 goto out; 2270 } 2271 2272 ret = NILFS_BMAP_USE_VBN(btree) ? 2273 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2274 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2275 2276 out: 2277 nilfs_btree_free_path(path); 2278 2279 return ret; 2280 } 2281 2282 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2283 struct buffer_head **bh, 2284 sector_t blocknr, 2285 union nilfs_binfo *binfo) 2286 { 2287 struct nilfs_btree_node *node; 2288 __u64 key; 2289 int ret; 2290 2291 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2292 blocknr); 2293 if (ret < 0) 2294 return ret; 2295 2296 if (buffer_nilfs_node(*bh)) { 2297 node = (struct nilfs_btree_node *)(*bh)->b_data; 2298 key = nilfs_btree_node_get_key(node, 0); 2299 } else 2300 key = nilfs_bmap_data_get_key(btree, *bh); 2301 2302 /* on-disk format */ 2303 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2304 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2305 2306 return 0; 2307 } 2308 2309 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2310 { 2311 struct buffer_head *bh; 2312 struct nilfs_btree_path *path; 2313 __u64 ptr; 2314 int ret; 2315 2316 path = nilfs_btree_alloc_path(); 2317 if (path == NULL) 2318 return -ENOMEM; 2319 2320 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2321 if (ret < 0) { 2322 WARN_ON(ret == -ENOENT); 2323 goto out; 2324 } 2325 ret = nilfs_btree_get_block(btree, ptr, &bh); 2326 if (ret < 0) { 2327 WARN_ON(ret == -ENOENT); 2328 goto out; 2329 } 2330 2331 if (!buffer_dirty(bh)) 2332 mark_buffer_dirty(bh); 2333 brelse(bh); 2334 if (!nilfs_bmap_dirty(btree)) 2335 nilfs_bmap_set_dirty(btree); 2336 2337 out: 2338 nilfs_btree_free_path(path); 2339 return ret; 2340 } 2341 2342 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2343 .bop_lookup = nilfs_btree_lookup, 2344 .bop_lookup_contig = nilfs_btree_lookup_contig, 2345 .bop_insert = nilfs_btree_insert, 2346 .bop_delete = nilfs_btree_delete, 2347 .bop_clear = NULL, 2348 2349 .bop_propagate = nilfs_btree_propagate, 2350 2351 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2352 2353 .bop_assign = nilfs_btree_assign, 2354 .bop_mark = nilfs_btree_mark, 2355 2356 .bop_seek_key = nilfs_btree_seek_key, 2357 .bop_last_key = nilfs_btree_last_key, 2358 2359 .bop_check_insert = NULL, 2360 .bop_check_delete = nilfs_btree_check_delete, 2361 .bop_gather_data = nilfs_btree_gather_data, 2362 }; 2363 2364 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2365 .bop_lookup = NULL, 2366 .bop_lookup_contig = NULL, 2367 .bop_insert = NULL, 2368 .bop_delete = NULL, 2369 .bop_clear = NULL, 2370 2371 .bop_propagate = nilfs_btree_propagate_gc, 2372 2373 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2374 2375 .bop_assign = nilfs_btree_assign_gc, 2376 .bop_mark = NULL, 2377 2378 .bop_seek_key = NULL, 2379 .bop_last_key = NULL, 2380 2381 .bop_check_insert = NULL, 2382 .bop_check_delete = NULL, 2383 .bop_gather_data = NULL, 2384 }; 2385 2386 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2387 { 2388 bmap->b_ops = &nilfs_btree_ops; 2389 bmap->b_nchildren_per_block = 2390 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2391 } 2392 2393 int nilfs_btree_init(struct nilfs_bmap *bmap) 2394 { 2395 int ret = 0; 2396 2397 __nilfs_btree_init(bmap); 2398 2399 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2400 ret = -EIO; 2401 return ret; 2402 } 2403 2404 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2405 { 2406 bmap->b_ops = &nilfs_btree_ops_gc; 2407 bmap->b_nchildren_per_block = 2408 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2409 } 2410