1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 62 struct address_space *btnc = btnc_inode->i_mapping; 63 struct buffer_head *bh; 64 65 bh = nilfs_btnode_create_block(btnc, ptr); 66 if (!bh) 67 return -ENOMEM; 68 69 set_buffer_nilfs_volatile(bh); 70 *bhp = bh; 71 return 0; 72 } 73 74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 75 { 76 return node->bn_flags; 77 } 78 79 static void 80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 81 { 82 node->bn_flags = flags; 83 } 84 85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 86 { 87 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 88 } 89 90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 91 { 92 return node->bn_level; 93 } 94 95 static void 96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 97 { 98 node->bn_level = level; 99 } 100 101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 102 { 103 return le16_to_cpu(node->bn_nchildren); 104 } 105 106 static void 107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 108 { 109 node->bn_nchildren = cpu_to_le16(nchildren); 110 } 111 112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 113 { 114 return i_blocksize(btree->b_inode); 115 } 116 117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 118 { 119 return btree->b_nchildren_per_block; 120 } 121 122 static __le64 * 123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 124 { 125 return (__le64 *)((char *)(node + 1) + 126 (nilfs_btree_node_root(node) ? 127 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 128 } 129 130 static __le64 * 131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 132 { 133 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 134 } 135 136 static __u64 137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 138 { 139 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 140 } 141 142 static void 143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 144 { 145 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 146 } 147 148 static __u64 149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 150 int ncmax) 151 { 152 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 153 } 154 155 static void 156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 157 int ncmax) 158 { 159 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 160 } 161 162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 163 int level, int nchildren, int ncmax, 164 const __u64 *keys, const __u64 *ptrs) 165 { 166 __le64 *dkeys; 167 __le64 *dptrs; 168 int i; 169 170 nilfs_btree_node_set_flags(node, flags); 171 nilfs_btree_node_set_level(node, level); 172 nilfs_btree_node_set_nchildren(node, nchildren); 173 174 dkeys = nilfs_btree_node_dkeys(node); 175 dptrs = nilfs_btree_node_dptrs(node, ncmax); 176 for (i = 0; i < nchildren; i++) { 177 dkeys[i] = cpu_to_le64(keys[i]); 178 dptrs[i] = cpu_to_le64(ptrs[i]); 179 } 180 } 181 182 /* Assume the buffer heads corresponding to left and right are locked. */ 183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 184 struct nilfs_btree_node *right, 185 int n, int lncmax, int rncmax) 186 { 187 __le64 *ldkeys, *rdkeys; 188 __le64 *ldptrs, *rdptrs; 189 int lnchildren, rnchildren; 190 191 ldkeys = nilfs_btree_node_dkeys(left); 192 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 193 lnchildren = nilfs_btree_node_get_nchildren(left); 194 195 rdkeys = nilfs_btree_node_dkeys(right); 196 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 197 rnchildren = nilfs_btree_node_get_nchildren(right); 198 199 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 200 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 201 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 202 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 203 204 lnchildren += n; 205 rnchildren -= n; 206 nilfs_btree_node_set_nchildren(left, lnchildren); 207 nilfs_btree_node_set_nchildren(right, rnchildren); 208 } 209 210 /* Assume that the buffer heads corresponding to left and right are locked. */ 211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 212 struct nilfs_btree_node *right, 213 int n, int lncmax, int rncmax) 214 { 215 __le64 *ldkeys, *rdkeys; 216 __le64 *ldptrs, *rdptrs; 217 int lnchildren, rnchildren; 218 219 ldkeys = nilfs_btree_node_dkeys(left); 220 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 221 lnchildren = nilfs_btree_node_get_nchildren(left); 222 223 rdkeys = nilfs_btree_node_dkeys(right); 224 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 225 rnchildren = nilfs_btree_node_get_nchildren(right); 226 227 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 228 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 229 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 230 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 231 232 lnchildren -= n; 233 rnchildren += n; 234 nilfs_btree_node_set_nchildren(left, lnchildren); 235 nilfs_btree_node_set_nchildren(right, rnchildren); 236 } 237 238 /* Assume that the buffer head corresponding to node is locked. */ 239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 240 __u64 key, __u64 ptr, int ncmax) 241 { 242 __le64 *dkeys; 243 __le64 *dptrs; 244 int nchildren; 245 246 dkeys = nilfs_btree_node_dkeys(node); 247 dptrs = nilfs_btree_node_dptrs(node, ncmax); 248 nchildren = nilfs_btree_node_get_nchildren(node); 249 if (index < nchildren) { 250 memmove(dkeys + index + 1, dkeys + index, 251 (nchildren - index) * sizeof(*dkeys)); 252 memmove(dptrs + index + 1, dptrs + index, 253 (nchildren - index) * sizeof(*dptrs)); 254 } 255 dkeys[index] = cpu_to_le64(key); 256 dptrs[index] = cpu_to_le64(ptr); 257 nchildren++; 258 nilfs_btree_node_set_nchildren(node, nchildren); 259 } 260 261 /* Assume that the buffer head corresponding to node is locked. */ 262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 263 __u64 *keyp, __u64 *ptrp, int ncmax) 264 { 265 __u64 key; 266 __u64 ptr; 267 __le64 *dkeys; 268 __le64 *dptrs; 269 int nchildren; 270 271 dkeys = nilfs_btree_node_dkeys(node); 272 dptrs = nilfs_btree_node_dptrs(node, ncmax); 273 key = le64_to_cpu(dkeys[index]); 274 ptr = le64_to_cpu(dptrs[index]); 275 nchildren = nilfs_btree_node_get_nchildren(node); 276 if (keyp != NULL) 277 *keyp = key; 278 if (ptrp != NULL) 279 *ptrp = ptr; 280 281 if (index < nchildren - 1) { 282 memmove(dkeys + index, dkeys + index + 1, 283 (nchildren - index - 1) * sizeof(*dkeys)); 284 memmove(dptrs + index, dptrs + index + 1, 285 (nchildren - index - 1) * sizeof(*dptrs)); 286 } 287 nchildren--; 288 nilfs_btree_node_set_nchildren(node, nchildren); 289 } 290 291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 292 __u64 key, int *indexp) 293 { 294 __u64 nkey; 295 int index, low, high, s; 296 297 /* binary search */ 298 low = 0; 299 high = nilfs_btree_node_get_nchildren(node) - 1; 300 index = 0; 301 s = 0; 302 while (low <= high) { 303 index = (low + high) / 2; 304 nkey = nilfs_btree_node_get_key(node, index); 305 if (nkey == key) { 306 s = 0; 307 goto out; 308 } else if (nkey < key) { 309 low = index + 1; 310 s = -1; 311 } else { 312 high = index - 1; 313 s = 1; 314 } 315 } 316 317 /* adjust index */ 318 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 319 if (s > 0 && index > 0) 320 index--; 321 } else if (s < 0) 322 index++; 323 324 out: 325 *indexp = index; 326 327 return s == 0; 328 } 329 330 /** 331 * nilfs_btree_node_broken - verify consistency of btree node 332 * @node: btree node block to be examined 333 * @size: node size (in bytes) 334 * @inode: host inode of btree 335 * @blocknr: block number 336 * 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 338 */ 339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 340 size_t size, struct inode *inode, 341 sector_t blocknr) 342 { 343 int level, flags, nchildren; 344 int ret = 0; 345 346 level = nilfs_btree_node_get_level(node); 347 flags = nilfs_btree_node_get_flags(node); 348 nchildren = nilfs_btree_node_get_nchildren(node); 349 350 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 351 level >= NILFS_BTREE_LEVEL_MAX || 352 (flags & NILFS_BTREE_NODE_ROOT) || 353 nchildren < 0 || 354 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 355 nilfs_crit(inode->i_sb, 356 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 357 inode->i_ino, (unsigned long long)blocknr, level, 358 flags, nchildren); 359 ret = 1; 360 } 361 return ret; 362 } 363 364 /** 365 * nilfs_btree_root_broken - verify consistency of btree root node 366 * @node: btree root node to be examined 367 * @inode: host inode of btree 368 * 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 370 */ 371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 372 struct inode *inode) 373 { 374 int level, flags, nchildren; 375 int ret = 0; 376 377 level = nilfs_btree_node_get_level(node); 378 flags = nilfs_btree_node_get_flags(node); 379 nchildren = nilfs_btree_node_get_nchildren(node); 380 381 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 382 level >= NILFS_BTREE_LEVEL_MAX || 383 nchildren < 0 || 384 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 385 nilfs_crit(inode->i_sb, 386 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 387 inode->i_ino, level, flags, nchildren); 388 ret = 1; 389 } 390 return ret; 391 } 392 393 int nilfs_btree_broken_node_block(struct buffer_head *bh) 394 { 395 struct inode *inode; 396 int ret; 397 398 if (buffer_nilfs_checked(bh)) 399 return 0; 400 401 inode = bh->b_folio->mapping->host; 402 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 403 bh->b_size, inode, bh->b_blocknr); 404 if (likely(!ret)) 405 set_buffer_nilfs_checked(bh); 406 return ret; 407 } 408 409 static struct nilfs_btree_node * 410 nilfs_btree_get_root(const struct nilfs_bmap *btree) 411 { 412 return (struct nilfs_btree_node *)btree->b_u.u_data; 413 } 414 415 static struct nilfs_btree_node * 416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 417 { 418 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 419 } 420 421 static struct nilfs_btree_node * 422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 423 { 424 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 425 } 426 427 static int nilfs_btree_height(const struct nilfs_bmap *btree) 428 { 429 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 430 } 431 432 static struct nilfs_btree_node * 433 nilfs_btree_get_node(const struct nilfs_bmap *btree, 434 const struct nilfs_btree_path *path, 435 int level, int *ncmaxp) 436 { 437 struct nilfs_btree_node *node; 438 439 if (level == nilfs_btree_height(btree) - 1) { 440 node = nilfs_btree_get_root(btree); 441 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 442 } else { 443 node = nilfs_btree_get_nonroot_node(path, level); 444 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 445 } 446 return node; 447 } 448 449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 450 struct nilfs_btree_node *node, int level) 451 { 452 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 453 dump_stack(); 454 nilfs_crit(btree->b_inode->i_sb, 455 "btree level mismatch (ino=%lu): %d != %d", 456 btree->b_inode->i_ino, 457 nilfs_btree_node_get_level(node), level); 458 return 1; 459 } 460 return 0; 461 } 462 463 struct nilfs_btree_readahead_info { 464 struct nilfs_btree_node *node; /* parent node */ 465 int max_ra_blocks; /* max nof blocks to read ahead */ 466 int index; /* current index on the parent node */ 467 int ncmax; /* nof children in the parent node */ 468 }; 469 470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 471 struct buffer_head **bhp, 472 const struct nilfs_btree_readahead_info *ra) 473 { 474 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 475 struct address_space *btnc = btnc_inode->i_mapping; 476 struct buffer_head *bh, *ra_bh; 477 sector_t submit_ptr = 0; 478 int ret; 479 480 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, 481 &submit_ptr); 482 if (ret) { 483 if (likely(ret == -EEXIST)) 484 goto out_check; 485 if (ret == -ENOENT) { 486 /* 487 * Block address translation failed due to invalid 488 * value of 'ptr'. In this case, return internal code 489 * -EINVAL (broken bmap) to notify bmap layer of fatal 490 * metadata corruption. 491 */ 492 ret = -EINVAL; 493 } 494 return ret; 495 } 496 497 if (ra) { 498 int i, n; 499 __u64 ptr2; 500 501 /* read ahead sibling nodes */ 502 for (n = ra->max_ra_blocks, i = ra->index + 1; 503 n > 0 && i < ra->ncmax; n--, i++) { 504 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 505 506 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 507 REQ_OP_READ | REQ_RAHEAD, 508 &ra_bh, &submit_ptr); 509 if (likely(!ret || ret == -EEXIST)) 510 brelse(ra_bh); 511 else if (ret != -EBUSY) 512 break; 513 if (!buffer_locked(bh)) 514 goto out_no_wait; 515 } 516 } 517 518 wait_on_buffer(bh); 519 520 out_no_wait: 521 if (!buffer_uptodate(bh)) { 522 nilfs_err(btree->b_inode->i_sb, 523 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 524 btree->b_inode->i_ino, (unsigned long long)ptr); 525 brelse(bh); 526 return -EIO; 527 } 528 529 out_check: 530 if (nilfs_btree_broken_node_block(bh)) { 531 clear_buffer_uptodate(bh); 532 brelse(bh); 533 return -EINVAL; 534 } 535 536 *bhp = bh; 537 return 0; 538 } 539 540 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 541 struct buffer_head **bhp) 542 { 543 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 544 } 545 546 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 547 struct nilfs_btree_path *path, 548 __u64 key, __u64 *ptrp, int minlevel, 549 int readahead) 550 { 551 struct nilfs_btree_node *node; 552 struct nilfs_btree_readahead_info p, *ra; 553 __u64 ptr; 554 int level, index, found, ncmax, ret; 555 556 node = nilfs_btree_get_root(btree); 557 level = nilfs_btree_node_get_level(node); 558 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 559 return -ENOENT; 560 561 found = nilfs_btree_node_lookup(node, key, &index); 562 ptr = nilfs_btree_node_get_ptr(node, index, 563 NILFS_BTREE_ROOT_NCHILDREN_MAX); 564 path[level].bp_bh = NULL; 565 path[level].bp_index = index; 566 567 ncmax = nilfs_btree_nchildren_per_block(btree); 568 569 while (--level >= minlevel) { 570 ra = NULL; 571 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 572 p.node = nilfs_btree_get_node(btree, path, level + 1, 573 &p.ncmax); 574 p.index = index; 575 p.max_ra_blocks = 7; 576 ra = &p; 577 } 578 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 579 ra); 580 if (ret < 0) 581 return ret; 582 583 node = nilfs_btree_get_nonroot_node(path, level); 584 if (nilfs_btree_bad_node(btree, node, level)) 585 return -EINVAL; 586 if (!found) 587 found = nilfs_btree_node_lookup(node, key, &index); 588 else 589 index = 0; 590 if (index < ncmax) { 591 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 592 } else { 593 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 594 /* insert */ 595 ptr = NILFS_BMAP_INVALID_PTR; 596 } 597 path[level].bp_index = index; 598 } 599 if (!found) 600 return -ENOENT; 601 602 if (ptrp != NULL) 603 *ptrp = ptr; 604 605 return 0; 606 } 607 608 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 609 struct nilfs_btree_path *path, 610 __u64 *keyp, __u64 *ptrp) 611 { 612 struct nilfs_btree_node *node; 613 __u64 ptr; 614 int index, level, ncmax, ret; 615 616 node = nilfs_btree_get_root(btree); 617 index = nilfs_btree_node_get_nchildren(node) - 1; 618 if (index < 0) 619 return -ENOENT; 620 level = nilfs_btree_node_get_level(node); 621 ptr = nilfs_btree_node_get_ptr(node, index, 622 NILFS_BTREE_ROOT_NCHILDREN_MAX); 623 path[level].bp_bh = NULL; 624 path[level].bp_index = index; 625 ncmax = nilfs_btree_nchildren_per_block(btree); 626 627 for (level--; level > 0; level--) { 628 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 629 if (ret < 0) 630 return ret; 631 node = nilfs_btree_get_nonroot_node(path, level); 632 if (nilfs_btree_bad_node(btree, node, level)) 633 return -EINVAL; 634 index = nilfs_btree_node_get_nchildren(node) - 1; 635 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 636 path[level].bp_index = index; 637 } 638 639 if (keyp != NULL) 640 *keyp = nilfs_btree_node_get_key(node, index); 641 if (ptrp != NULL) 642 *ptrp = ptr; 643 644 return 0; 645 } 646 647 /** 648 * nilfs_btree_get_next_key - get next valid key from btree path array 649 * @btree: bmap struct of btree 650 * @path: array of nilfs_btree_path struct 651 * @minlevel: start level 652 * @nextkey: place to store the next valid key 653 * 654 * Return Value: If a next key was found, 0 is returned. Otherwise, 655 * -ENOENT is returned. 656 */ 657 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 658 const struct nilfs_btree_path *path, 659 int minlevel, __u64 *nextkey) 660 { 661 struct nilfs_btree_node *node; 662 int maxlevel = nilfs_btree_height(btree) - 1; 663 int index, next_adj, level; 664 665 /* Next index is already set to bp_index for leaf nodes. */ 666 next_adj = 0; 667 for (level = minlevel; level <= maxlevel; level++) { 668 if (level == maxlevel) 669 node = nilfs_btree_get_root(btree); 670 else 671 node = nilfs_btree_get_nonroot_node(path, level); 672 673 index = path[level].bp_index + next_adj; 674 if (index < nilfs_btree_node_get_nchildren(node)) { 675 /* Next key is in this node */ 676 *nextkey = nilfs_btree_node_get_key(node, index); 677 return 0; 678 } 679 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 680 next_adj = 1; 681 } 682 return -ENOENT; 683 } 684 685 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 686 __u64 key, int level, __u64 *ptrp) 687 { 688 struct nilfs_btree_path *path; 689 int ret; 690 691 path = nilfs_btree_alloc_path(); 692 if (path == NULL) 693 return -ENOMEM; 694 695 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 696 697 nilfs_btree_free_path(path); 698 699 return ret; 700 } 701 702 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 703 __u64 key, __u64 *ptrp, 704 unsigned int maxblocks) 705 { 706 struct nilfs_btree_path *path; 707 struct nilfs_btree_node *node; 708 struct inode *dat = NULL; 709 __u64 ptr, ptr2; 710 sector_t blocknr; 711 int level = NILFS_BTREE_LEVEL_NODE_MIN; 712 int ret, cnt, index, maxlevel, ncmax; 713 struct nilfs_btree_readahead_info p; 714 715 path = nilfs_btree_alloc_path(); 716 if (path == NULL) 717 return -ENOMEM; 718 719 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 720 if (ret < 0) 721 goto out; 722 723 if (NILFS_BMAP_USE_VBN(btree)) { 724 dat = nilfs_bmap_get_dat(btree); 725 ret = nilfs_dat_translate(dat, ptr, &blocknr); 726 if (ret < 0) 727 goto out; 728 ptr = blocknr; 729 } 730 cnt = 1; 731 if (cnt == maxblocks) 732 goto end; 733 734 maxlevel = nilfs_btree_height(btree) - 1; 735 node = nilfs_btree_get_node(btree, path, level, &ncmax); 736 index = path[level].bp_index + 1; 737 for (;;) { 738 while (index < nilfs_btree_node_get_nchildren(node)) { 739 if (nilfs_btree_node_get_key(node, index) != 740 key + cnt) 741 goto end; 742 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 743 if (dat) { 744 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 745 if (ret < 0) 746 goto out; 747 ptr2 = blocknr; 748 } 749 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 750 goto end; 751 index++; 752 } 753 if (level == maxlevel) 754 break; 755 756 /* look-up right sibling node */ 757 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 758 p.index = path[level + 1].bp_index + 1; 759 p.max_ra_blocks = 7; 760 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 761 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 762 break; 763 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 764 path[level + 1].bp_index = p.index; 765 766 brelse(path[level].bp_bh); 767 path[level].bp_bh = NULL; 768 769 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 770 &p); 771 if (ret < 0) 772 goto out; 773 node = nilfs_btree_get_nonroot_node(path, level); 774 ncmax = nilfs_btree_nchildren_per_block(btree); 775 index = 0; 776 path[level].bp_index = index; 777 } 778 end: 779 *ptrp = ptr; 780 ret = cnt; 781 out: 782 nilfs_btree_free_path(path); 783 return ret; 784 } 785 786 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 787 struct nilfs_btree_path *path, 788 int level, __u64 key) 789 { 790 if (level < nilfs_btree_height(btree) - 1) { 791 do { 792 nilfs_btree_node_set_key( 793 nilfs_btree_get_nonroot_node(path, level), 794 path[level].bp_index, key); 795 if (!buffer_dirty(path[level].bp_bh)) 796 mark_buffer_dirty(path[level].bp_bh); 797 } while ((path[level].bp_index == 0) && 798 (++level < nilfs_btree_height(btree) - 1)); 799 } 800 801 /* root */ 802 if (level == nilfs_btree_height(btree) - 1) { 803 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 804 path[level].bp_index, key); 805 } 806 } 807 808 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 809 struct nilfs_btree_path *path, 810 int level, __u64 *keyp, __u64 *ptrp) 811 { 812 struct nilfs_btree_node *node; 813 int ncblk; 814 815 if (level < nilfs_btree_height(btree) - 1) { 816 node = nilfs_btree_get_nonroot_node(path, level); 817 ncblk = nilfs_btree_nchildren_per_block(btree); 818 nilfs_btree_node_insert(node, path[level].bp_index, 819 *keyp, *ptrp, ncblk); 820 if (!buffer_dirty(path[level].bp_bh)) 821 mark_buffer_dirty(path[level].bp_bh); 822 823 if (path[level].bp_index == 0) 824 nilfs_btree_promote_key(btree, path, level + 1, 825 nilfs_btree_node_get_key(node, 826 0)); 827 } else { 828 node = nilfs_btree_get_root(btree); 829 nilfs_btree_node_insert(node, path[level].bp_index, 830 *keyp, *ptrp, 831 NILFS_BTREE_ROOT_NCHILDREN_MAX); 832 } 833 } 834 835 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 836 struct nilfs_btree_path *path, 837 int level, __u64 *keyp, __u64 *ptrp) 838 { 839 struct nilfs_btree_node *node, *left; 840 int nchildren, lnchildren, n, move, ncblk; 841 842 node = nilfs_btree_get_nonroot_node(path, level); 843 left = nilfs_btree_get_sib_node(path, level); 844 nchildren = nilfs_btree_node_get_nchildren(node); 845 lnchildren = nilfs_btree_node_get_nchildren(left); 846 ncblk = nilfs_btree_nchildren_per_block(btree); 847 move = 0; 848 849 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 850 if (n > path[level].bp_index) { 851 /* move insert point */ 852 n--; 853 move = 1; 854 } 855 856 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 857 858 if (!buffer_dirty(path[level].bp_bh)) 859 mark_buffer_dirty(path[level].bp_bh); 860 if (!buffer_dirty(path[level].bp_sib_bh)) 861 mark_buffer_dirty(path[level].bp_sib_bh); 862 863 nilfs_btree_promote_key(btree, path, level + 1, 864 nilfs_btree_node_get_key(node, 0)); 865 866 if (move) { 867 brelse(path[level].bp_bh); 868 path[level].bp_bh = path[level].bp_sib_bh; 869 path[level].bp_sib_bh = NULL; 870 path[level].bp_index += lnchildren; 871 path[level + 1].bp_index--; 872 } else { 873 brelse(path[level].bp_sib_bh); 874 path[level].bp_sib_bh = NULL; 875 path[level].bp_index -= n; 876 } 877 878 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 879 } 880 881 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 882 struct nilfs_btree_path *path, 883 int level, __u64 *keyp, __u64 *ptrp) 884 { 885 struct nilfs_btree_node *node, *right; 886 int nchildren, rnchildren, n, move, ncblk; 887 888 node = nilfs_btree_get_nonroot_node(path, level); 889 right = nilfs_btree_get_sib_node(path, level); 890 nchildren = nilfs_btree_node_get_nchildren(node); 891 rnchildren = nilfs_btree_node_get_nchildren(right); 892 ncblk = nilfs_btree_nchildren_per_block(btree); 893 move = 0; 894 895 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 896 if (n > nchildren - path[level].bp_index) { 897 /* move insert point */ 898 n--; 899 move = 1; 900 } 901 902 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 903 904 if (!buffer_dirty(path[level].bp_bh)) 905 mark_buffer_dirty(path[level].bp_bh); 906 if (!buffer_dirty(path[level].bp_sib_bh)) 907 mark_buffer_dirty(path[level].bp_sib_bh); 908 909 path[level + 1].bp_index++; 910 nilfs_btree_promote_key(btree, path, level + 1, 911 nilfs_btree_node_get_key(right, 0)); 912 path[level + 1].bp_index--; 913 914 if (move) { 915 brelse(path[level].bp_bh); 916 path[level].bp_bh = path[level].bp_sib_bh; 917 path[level].bp_sib_bh = NULL; 918 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 919 path[level + 1].bp_index++; 920 } else { 921 brelse(path[level].bp_sib_bh); 922 path[level].bp_sib_bh = NULL; 923 } 924 925 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 926 } 927 928 static void nilfs_btree_split(struct nilfs_bmap *btree, 929 struct nilfs_btree_path *path, 930 int level, __u64 *keyp, __u64 *ptrp) 931 { 932 struct nilfs_btree_node *node, *right; 933 int nchildren, n, move, ncblk; 934 935 node = nilfs_btree_get_nonroot_node(path, level); 936 right = nilfs_btree_get_sib_node(path, level); 937 nchildren = nilfs_btree_node_get_nchildren(node); 938 ncblk = nilfs_btree_nchildren_per_block(btree); 939 move = 0; 940 941 n = (nchildren + 1) / 2; 942 if (n > nchildren - path[level].bp_index) { 943 n--; 944 move = 1; 945 } 946 947 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 948 949 if (!buffer_dirty(path[level].bp_bh)) 950 mark_buffer_dirty(path[level].bp_bh); 951 if (!buffer_dirty(path[level].bp_sib_bh)) 952 mark_buffer_dirty(path[level].bp_sib_bh); 953 954 if (move) { 955 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 956 nilfs_btree_node_insert(right, path[level].bp_index, 957 *keyp, *ptrp, ncblk); 958 959 *keyp = nilfs_btree_node_get_key(right, 0); 960 *ptrp = path[level].bp_newreq.bpr_ptr; 961 962 brelse(path[level].bp_bh); 963 path[level].bp_bh = path[level].bp_sib_bh; 964 path[level].bp_sib_bh = NULL; 965 } else { 966 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 967 968 *keyp = nilfs_btree_node_get_key(right, 0); 969 *ptrp = path[level].bp_newreq.bpr_ptr; 970 971 brelse(path[level].bp_sib_bh); 972 path[level].bp_sib_bh = NULL; 973 } 974 975 path[level + 1].bp_index++; 976 } 977 978 static void nilfs_btree_grow(struct nilfs_bmap *btree, 979 struct nilfs_btree_path *path, 980 int level, __u64 *keyp, __u64 *ptrp) 981 { 982 struct nilfs_btree_node *root, *child; 983 int n, ncblk; 984 985 root = nilfs_btree_get_root(btree); 986 child = nilfs_btree_get_sib_node(path, level); 987 ncblk = nilfs_btree_nchildren_per_block(btree); 988 989 n = nilfs_btree_node_get_nchildren(root); 990 991 nilfs_btree_node_move_right(root, child, n, 992 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 993 nilfs_btree_node_set_level(root, level + 1); 994 995 if (!buffer_dirty(path[level].bp_sib_bh)) 996 mark_buffer_dirty(path[level].bp_sib_bh); 997 998 path[level].bp_bh = path[level].bp_sib_bh; 999 path[level].bp_sib_bh = NULL; 1000 1001 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 1002 1003 *keyp = nilfs_btree_node_get_key(child, 0); 1004 *ptrp = path[level].bp_newreq.bpr_ptr; 1005 } 1006 1007 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 1008 const struct nilfs_btree_path *path) 1009 { 1010 struct nilfs_btree_node *node; 1011 int level, ncmax; 1012 1013 if (path == NULL) 1014 return NILFS_BMAP_INVALID_PTR; 1015 1016 /* left sibling */ 1017 level = NILFS_BTREE_LEVEL_NODE_MIN; 1018 if (path[level].bp_index > 0) { 1019 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1020 return nilfs_btree_node_get_ptr(node, 1021 path[level].bp_index - 1, 1022 ncmax); 1023 } 1024 1025 /* parent */ 1026 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1027 if (level <= nilfs_btree_height(btree) - 1) { 1028 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1029 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1030 ncmax); 1031 } 1032 1033 return NILFS_BMAP_INVALID_PTR; 1034 } 1035 1036 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1037 const struct nilfs_btree_path *path, 1038 __u64 key) 1039 { 1040 __u64 ptr; 1041 1042 ptr = nilfs_bmap_find_target_seq(btree, key); 1043 if (ptr != NILFS_BMAP_INVALID_PTR) 1044 /* sequential access */ 1045 return ptr; 1046 1047 ptr = nilfs_btree_find_near(btree, path); 1048 if (ptr != NILFS_BMAP_INVALID_PTR) 1049 /* near */ 1050 return ptr; 1051 1052 /* block group */ 1053 return nilfs_bmap_find_target_in_group(btree); 1054 } 1055 1056 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1057 struct nilfs_btree_path *path, 1058 int *levelp, __u64 key, __u64 ptr, 1059 struct nilfs_bmap_stats *stats) 1060 { 1061 struct buffer_head *bh; 1062 struct nilfs_btree_node *node, *parent, *sib; 1063 __u64 sibptr; 1064 int pindex, level, ncmax, ncblk, ret; 1065 struct inode *dat = NULL; 1066 1067 stats->bs_nblocks = 0; 1068 level = NILFS_BTREE_LEVEL_DATA; 1069 1070 /* allocate a new ptr for data block */ 1071 if (NILFS_BMAP_USE_VBN(btree)) { 1072 path[level].bp_newreq.bpr_ptr = 1073 nilfs_btree_find_target_v(btree, path, key); 1074 dat = nilfs_bmap_get_dat(btree); 1075 } 1076 1077 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1078 if (ret < 0) 1079 goto err_out_data; 1080 1081 ncblk = nilfs_btree_nchildren_per_block(btree); 1082 1083 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1084 level < nilfs_btree_height(btree) - 1; 1085 level++) { 1086 node = nilfs_btree_get_nonroot_node(path, level); 1087 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1088 path[level].bp_op = nilfs_btree_do_insert; 1089 stats->bs_nblocks++; 1090 goto out; 1091 } 1092 1093 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1094 pindex = path[level + 1].bp_index; 1095 1096 /* left sibling */ 1097 if (pindex > 0) { 1098 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1099 ncmax); 1100 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1101 if (ret < 0) 1102 goto err_out_child_node; 1103 sib = (struct nilfs_btree_node *)bh->b_data; 1104 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1105 path[level].bp_sib_bh = bh; 1106 path[level].bp_op = nilfs_btree_carry_left; 1107 stats->bs_nblocks++; 1108 goto out; 1109 } else { 1110 brelse(bh); 1111 } 1112 } 1113 1114 /* right sibling */ 1115 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1116 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1117 ncmax); 1118 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1119 if (ret < 0) 1120 goto err_out_child_node; 1121 sib = (struct nilfs_btree_node *)bh->b_data; 1122 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1123 path[level].bp_sib_bh = bh; 1124 path[level].bp_op = nilfs_btree_carry_right; 1125 stats->bs_nblocks++; 1126 goto out; 1127 } else { 1128 brelse(bh); 1129 } 1130 } 1131 1132 /* split */ 1133 path[level].bp_newreq.bpr_ptr = 1134 path[level - 1].bp_newreq.bpr_ptr + 1; 1135 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1136 &path[level].bp_newreq, dat); 1137 if (ret < 0) 1138 goto err_out_child_node; 1139 ret = nilfs_btree_get_new_block(btree, 1140 path[level].bp_newreq.bpr_ptr, 1141 &bh); 1142 if (ret < 0) 1143 goto err_out_curr_node; 1144 1145 stats->bs_nblocks++; 1146 1147 sib = (struct nilfs_btree_node *)bh->b_data; 1148 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1149 path[level].bp_sib_bh = bh; 1150 path[level].bp_op = nilfs_btree_split; 1151 } 1152 1153 /* root */ 1154 node = nilfs_btree_get_root(btree); 1155 if (nilfs_btree_node_get_nchildren(node) < 1156 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1157 path[level].bp_op = nilfs_btree_do_insert; 1158 stats->bs_nblocks++; 1159 goto out; 1160 } 1161 1162 /* grow */ 1163 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1164 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1165 if (ret < 0) 1166 goto err_out_child_node; 1167 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1168 &bh); 1169 if (ret < 0) 1170 goto err_out_curr_node; 1171 1172 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1173 0, level, 0, ncblk, NULL, NULL); 1174 path[level].bp_sib_bh = bh; 1175 path[level].bp_op = nilfs_btree_grow; 1176 1177 level++; 1178 path[level].bp_op = nilfs_btree_do_insert; 1179 1180 /* a newly-created node block and a data block are added */ 1181 stats->bs_nblocks += 2; 1182 1183 /* success */ 1184 out: 1185 *levelp = level; 1186 return ret; 1187 1188 /* error */ 1189 err_out_curr_node: 1190 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1191 err_out_child_node: 1192 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1193 nilfs_btnode_delete(path[level].bp_sib_bh); 1194 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1195 1196 } 1197 1198 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1199 err_out_data: 1200 *levelp = level; 1201 stats->bs_nblocks = 0; 1202 return ret; 1203 } 1204 1205 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1206 struct nilfs_btree_path *path, 1207 int maxlevel, __u64 key, __u64 ptr) 1208 { 1209 struct inode *dat = NULL; 1210 int level; 1211 1212 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1213 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1214 if (NILFS_BMAP_USE_VBN(btree)) { 1215 nilfs_bmap_set_target_v(btree, key, ptr); 1216 dat = nilfs_bmap_get_dat(btree); 1217 } 1218 1219 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1220 nilfs_bmap_commit_alloc_ptr(btree, 1221 &path[level - 1].bp_newreq, dat); 1222 path[level].bp_op(btree, path, level, &key, &ptr); 1223 } 1224 1225 if (!nilfs_bmap_dirty(btree)) 1226 nilfs_bmap_set_dirty(btree); 1227 } 1228 1229 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1230 { 1231 struct nilfs_btree_path *path; 1232 struct nilfs_bmap_stats stats; 1233 int level, ret; 1234 1235 path = nilfs_btree_alloc_path(); 1236 if (path == NULL) 1237 return -ENOMEM; 1238 1239 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1240 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1241 if (ret != -ENOENT) { 1242 if (ret == 0) 1243 ret = -EEXIST; 1244 goto out; 1245 } 1246 1247 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1248 if (ret < 0) 1249 goto out; 1250 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1251 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1252 1253 out: 1254 nilfs_btree_free_path(path); 1255 return ret; 1256 } 1257 1258 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1259 struct nilfs_btree_path *path, 1260 int level, __u64 *keyp, __u64 *ptrp) 1261 { 1262 struct nilfs_btree_node *node; 1263 int ncblk; 1264 1265 if (level < nilfs_btree_height(btree) - 1) { 1266 node = nilfs_btree_get_nonroot_node(path, level); 1267 ncblk = nilfs_btree_nchildren_per_block(btree); 1268 nilfs_btree_node_delete(node, path[level].bp_index, 1269 keyp, ptrp, ncblk); 1270 if (!buffer_dirty(path[level].bp_bh)) 1271 mark_buffer_dirty(path[level].bp_bh); 1272 if (path[level].bp_index == 0) 1273 nilfs_btree_promote_key(btree, path, level + 1, 1274 nilfs_btree_node_get_key(node, 0)); 1275 } else { 1276 node = nilfs_btree_get_root(btree); 1277 nilfs_btree_node_delete(node, path[level].bp_index, 1278 keyp, ptrp, 1279 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1280 } 1281 } 1282 1283 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1284 struct nilfs_btree_path *path, 1285 int level, __u64 *keyp, __u64 *ptrp) 1286 { 1287 struct nilfs_btree_node *node, *left; 1288 int nchildren, lnchildren, n, ncblk; 1289 1290 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1291 1292 node = nilfs_btree_get_nonroot_node(path, level); 1293 left = nilfs_btree_get_sib_node(path, level); 1294 nchildren = nilfs_btree_node_get_nchildren(node); 1295 lnchildren = nilfs_btree_node_get_nchildren(left); 1296 ncblk = nilfs_btree_nchildren_per_block(btree); 1297 1298 n = (nchildren + lnchildren) / 2 - nchildren; 1299 1300 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1301 1302 if (!buffer_dirty(path[level].bp_bh)) 1303 mark_buffer_dirty(path[level].bp_bh); 1304 if (!buffer_dirty(path[level].bp_sib_bh)) 1305 mark_buffer_dirty(path[level].bp_sib_bh); 1306 1307 nilfs_btree_promote_key(btree, path, level + 1, 1308 nilfs_btree_node_get_key(node, 0)); 1309 1310 brelse(path[level].bp_sib_bh); 1311 path[level].bp_sib_bh = NULL; 1312 path[level].bp_index += n; 1313 } 1314 1315 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1316 struct nilfs_btree_path *path, 1317 int level, __u64 *keyp, __u64 *ptrp) 1318 { 1319 struct nilfs_btree_node *node, *right; 1320 int nchildren, rnchildren, n, ncblk; 1321 1322 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1323 1324 node = nilfs_btree_get_nonroot_node(path, level); 1325 right = nilfs_btree_get_sib_node(path, level); 1326 nchildren = nilfs_btree_node_get_nchildren(node); 1327 rnchildren = nilfs_btree_node_get_nchildren(right); 1328 ncblk = nilfs_btree_nchildren_per_block(btree); 1329 1330 n = (nchildren + rnchildren) / 2 - nchildren; 1331 1332 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1333 1334 if (!buffer_dirty(path[level].bp_bh)) 1335 mark_buffer_dirty(path[level].bp_bh); 1336 if (!buffer_dirty(path[level].bp_sib_bh)) 1337 mark_buffer_dirty(path[level].bp_sib_bh); 1338 1339 path[level + 1].bp_index++; 1340 nilfs_btree_promote_key(btree, path, level + 1, 1341 nilfs_btree_node_get_key(right, 0)); 1342 path[level + 1].bp_index--; 1343 1344 brelse(path[level].bp_sib_bh); 1345 path[level].bp_sib_bh = NULL; 1346 } 1347 1348 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1349 struct nilfs_btree_path *path, 1350 int level, __u64 *keyp, __u64 *ptrp) 1351 { 1352 struct nilfs_btree_node *node, *left; 1353 int n, ncblk; 1354 1355 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1356 1357 node = nilfs_btree_get_nonroot_node(path, level); 1358 left = nilfs_btree_get_sib_node(path, level); 1359 ncblk = nilfs_btree_nchildren_per_block(btree); 1360 1361 n = nilfs_btree_node_get_nchildren(node); 1362 1363 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1364 1365 if (!buffer_dirty(path[level].bp_sib_bh)) 1366 mark_buffer_dirty(path[level].bp_sib_bh); 1367 1368 nilfs_btnode_delete(path[level].bp_bh); 1369 path[level].bp_bh = path[level].bp_sib_bh; 1370 path[level].bp_sib_bh = NULL; 1371 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1372 } 1373 1374 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1375 struct nilfs_btree_path *path, 1376 int level, __u64 *keyp, __u64 *ptrp) 1377 { 1378 struct nilfs_btree_node *node, *right; 1379 int n, ncblk; 1380 1381 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1382 1383 node = nilfs_btree_get_nonroot_node(path, level); 1384 right = nilfs_btree_get_sib_node(path, level); 1385 ncblk = nilfs_btree_nchildren_per_block(btree); 1386 1387 n = nilfs_btree_node_get_nchildren(right); 1388 1389 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1390 1391 if (!buffer_dirty(path[level].bp_bh)) 1392 mark_buffer_dirty(path[level].bp_bh); 1393 1394 nilfs_btnode_delete(path[level].bp_sib_bh); 1395 path[level].bp_sib_bh = NULL; 1396 path[level + 1].bp_index++; 1397 } 1398 1399 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1400 struct nilfs_btree_path *path, 1401 int level, __u64 *keyp, __u64 *ptrp) 1402 { 1403 struct nilfs_btree_node *root, *child; 1404 int n, ncblk; 1405 1406 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1407 1408 root = nilfs_btree_get_root(btree); 1409 child = nilfs_btree_get_nonroot_node(path, level); 1410 ncblk = nilfs_btree_nchildren_per_block(btree); 1411 1412 nilfs_btree_node_delete(root, 0, NULL, NULL, 1413 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1414 nilfs_btree_node_set_level(root, level); 1415 n = nilfs_btree_node_get_nchildren(child); 1416 nilfs_btree_node_move_left(root, child, n, 1417 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1418 1419 nilfs_btnode_delete(path[level].bp_bh); 1420 path[level].bp_bh = NULL; 1421 } 1422 1423 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1424 struct nilfs_btree_path *path, 1425 int level, __u64 *keyp, __u64 *ptrp) 1426 { 1427 } 1428 1429 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1430 struct nilfs_btree_path *path, 1431 int *levelp, 1432 struct nilfs_bmap_stats *stats, 1433 struct inode *dat) 1434 { 1435 struct buffer_head *bh; 1436 struct nilfs_btree_node *node, *parent, *sib; 1437 __u64 sibptr; 1438 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1439 1440 ret = 0; 1441 stats->bs_nblocks = 0; 1442 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1443 ncblk = nilfs_btree_nchildren_per_block(btree); 1444 1445 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1446 level < nilfs_btree_height(btree) - 1; 1447 level++) { 1448 node = nilfs_btree_get_nonroot_node(path, level); 1449 path[level].bp_oldreq.bpr_ptr = 1450 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1451 ret = nilfs_bmap_prepare_end_ptr(btree, 1452 &path[level].bp_oldreq, dat); 1453 if (ret < 0) 1454 goto err_out_child_node; 1455 1456 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1457 path[level].bp_op = nilfs_btree_do_delete; 1458 stats->bs_nblocks++; 1459 goto out; 1460 } 1461 1462 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1463 pindex = path[level + 1].bp_index; 1464 dindex = pindex; 1465 1466 if (pindex > 0) { 1467 /* left sibling */ 1468 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1469 ncmax); 1470 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1471 if (ret < 0) 1472 goto err_out_curr_node; 1473 sib = (struct nilfs_btree_node *)bh->b_data; 1474 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1475 path[level].bp_sib_bh = bh; 1476 path[level].bp_op = nilfs_btree_borrow_left; 1477 stats->bs_nblocks++; 1478 goto out; 1479 } else { 1480 path[level].bp_sib_bh = bh; 1481 path[level].bp_op = nilfs_btree_concat_left; 1482 stats->bs_nblocks++; 1483 /* continue; */ 1484 } 1485 } else if (pindex < 1486 nilfs_btree_node_get_nchildren(parent) - 1) { 1487 /* right sibling */ 1488 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1489 ncmax); 1490 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1491 if (ret < 0) 1492 goto err_out_curr_node; 1493 sib = (struct nilfs_btree_node *)bh->b_data; 1494 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1495 path[level].bp_sib_bh = bh; 1496 path[level].bp_op = nilfs_btree_borrow_right; 1497 stats->bs_nblocks++; 1498 goto out; 1499 } else { 1500 path[level].bp_sib_bh = bh; 1501 path[level].bp_op = nilfs_btree_concat_right; 1502 stats->bs_nblocks++; 1503 /* 1504 * When merging right sibling node 1505 * into the current node, pointer to 1506 * the right sibling node must be 1507 * terminated instead. The adjustment 1508 * below is required for that. 1509 */ 1510 dindex = pindex + 1; 1511 /* continue; */ 1512 } 1513 } else { 1514 /* no siblings */ 1515 /* the only child of the root node */ 1516 WARN_ON(level != nilfs_btree_height(btree) - 2); 1517 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1518 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1519 path[level].bp_op = nilfs_btree_shrink; 1520 stats->bs_nblocks += 2; 1521 level++; 1522 path[level].bp_op = nilfs_btree_nop; 1523 goto shrink_root_child; 1524 } else { 1525 path[level].bp_op = nilfs_btree_do_delete; 1526 stats->bs_nblocks++; 1527 goto out; 1528 } 1529 } 1530 } 1531 1532 /* child of the root node is deleted */ 1533 path[level].bp_op = nilfs_btree_do_delete; 1534 stats->bs_nblocks++; 1535 1536 shrink_root_child: 1537 node = nilfs_btree_get_root(btree); 1538 path[level].bp_oldreq.bpr_ptr = 1539 nilfs_btree_node_get_ptr(node, dindex, 1540 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1541 1542 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1543 if (ret < 0) 1544 goto err_out_child_node; 1545 1546 /* success */ 1547 out: 1548 *levelp = level; 1549 return ret; 1550 1551 /* error */ 1552 err_out_curr_node: 1553 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1554 err_out_child_node: 1555 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1556 brelse(path[level].bp_sib_bh); 1557 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1558 } 1559 *levelp = level; 1560 stats->bs_nblocks = 0; 1561 return ret; 1562 } 1563 1564 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1565 struct nilfs_btree_path *path, 1566 int maxlevel, struct inode *dat) 1567 { 1568 int level; 1569 1570 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1571 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1572 path[level].bp_op(btree, path, level, NULL, NULL); 1573 } 1574 1575 if (!nilfs_bmap_dirty(btree)) 1576 nilfs_bmap_set_dirty(btree); 1577 } 1578 1579 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1580 1581 { 1582 struct nilfs_btree_path *path; 1583 struct nilfs_bmap_stats stats; 1584 struct inode *dat; 1585 int level, ret; 1586 1587 path = nilfs_btree_alloc_path(); 1588 if (path == NULL) 1589 return -ENOMEM; 1590 1591 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1592 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1593 if (ret < 0) 1594 goto out; 1595 1596 1597 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1598 1599 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1600 if (ret < 0) 1601 goto out; 1602 nilfs_btree_commit_delete(btree, path, level, dat); 1603 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1604 1605 out: 1606 nilfs_btree_free_path(path); 1607 return ret; 1608 } 1609 1610 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1611 __u64 *keyp) 1612 { 1613 struct nilfs_btree_path *path; 1614 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1615 int ret; 1616 1617 path = nilfs_btree_alloc_path(); 1618 if (!path) 1619 return -ENOMEM; 1620 1621 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1622 if (!ret) 1623 *keyp = start; 1624 else if (ret == -ENOENT) 1625 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1626 1627 nilfs_btree_free_path(path); 1628 return ret; 1629 } 1630 1631 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1632 { 1633 struct nilfs_btree_path *path; 1634 int ret; 1635 1636 path = nilfs_btree_alloc_path(); 1637 if (path == NULL) 1638 return -ENOMEM; 1639 1640 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1641 1642 nilfs_btree_free_path(path); 1643 1644 return ret; 1645 } 1646 1647 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1648 { 1649 struct buffer_head *bh; 1650 struct nilfs_btree_node *root, *node; 1651 __u64 maxkey, nextmaxkey; 1652 __u64 ptr; 1653 int nchildren, ret; 1654 1655 root = nilfs_btree_get_root(btree); 1656 switch (nilfs_btree_height(btree)) { 1657 case 2: 1658 bh = NULL; 1659 node = root; 1660 break; 1661 case 3: 1662 nchildren = nilfs_btree_node_get_nchildren(root); 1663 if (nchildren > 1) 1664 return 0; 1665 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1666 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1667 ret = nilfs_btree_get_block(btree, ptr, &bh); 1668 if (ret < 0) 1669 return ret; 1670 node = (struct nilfs_btree_node *)bh->b_data; 1671 break; 1672 default: 1673 return 0; 1674 } 1675 1676 nchildren = nilfs_btree_node_get_nchildren(node); 1677 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1678 nextmaxkey = (nchildren > 1) ? 1679 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1680 brelse(bh); 1681 1682 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1683 } 1684 1685 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1686 __u64 *keys, __u64 *ptrs, int nitems) 1687 { 1688 struct buffer_head *bh; 1689 struct nilfs_btree_node *node, *root; 1690 __le64 *dkeys; 1691 __le64 *dptrs; 1692 __u64 ptr; 1693 int nchildren, ncmax, i, ret; 1694 1695 root = nilfs_btree_get_root(btree); 1696 switch (nilfs_btree_height(btree)) { 1697 case 2: 1698 bh = NULL; 1699 node = root; 1700 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1701 break; 1702 case 3: 1703 nchildren = nilfs_btree_node_get_nchildren(root); 1704 WARN_ON(nchildren > 1); 1705 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1706 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1707 ret = nilfs_btree_get_block(btree, ptr, &bh); 1708 if (ret < 0) 1709 return ret; 1710 node = (struct nilfs_btree_node *)bh->b_data; 1711 ncmax = nilfs_btree_nchildren_per_block(btree); 1712 break; 1713 default: 1714 node = NULL; 1715 return -EINVAL; 1716 } 1717 1718 nchildren = nilfs_btree_node_get_nchildren(node); 1719 if (nchildren < nitems) 1720 nitems = nchildren; 1721 dkeys = nilfs_btree_node_dkeys(node); 1722 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1723 for (i = 0; i < nitems; i++) { 1724 keys[i] = le64_to_cpu(dkeys[i]); 1725 ptrs[i] = le64_to_cpu(dptrs[i]); 1726 } 1727 1728 brelse(bh); 1729 1730 return nitems; 1731 } 1732 1733 static int 1734 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1735 union nilfs_bmap_ptr_req *dreq, 1736 union nilfs_bmap_ptr_req *nreq, 1737 struct buffer_head **bhp, 1738 struct nilfs_bmap_stats *stats) 1739 { 1740 struct buffer_head *bh; 1741 struct inode *dat = NULL; 1742 int ret; 1743 1744 stats->bs_nblocks = 0; 1745 1746 /* for data */ 1747 /* cannot find near ptr */ 1748 if (NILFS_BMAP_USE_VBN(btree)) { 1749 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1750 dat = nilfs_bmap_get_dat(btree); 1751 } 1752 1753 ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); 1754 if (ret < 0) 1755 return ret; 1756 1757 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1758 if (ret < 0) 1759 return ret; 1760 1761 *bhp = NULL; 1762 stats->bs_nblocks++; 1763 if (nreq != NULL) { 1764 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1765 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1766 if (ret < 0) 1767 goto err_out_dreq; 1768 1769 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1770 if (ret < 0) 1771 goto err_out_nreq; 1772 1773 *bhp = bh; 1774 stats->bs_nblocks++; 1775 } 1776 1777 /* success */ 1778 return 0; 1779 1780 /* error */ 1781 err_out_nreq: 1782 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1783 err_out_dreq: 1784 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1785 stats->bs_nblocks = 0; 1786 return ret; 1787 1788 } 1789 1790 static void 1791 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1792 __u64 key, __u64 ptr, 1793 const __u64 *keys, const __u64 *ptrs, 1794 int n, 1795 union nilfs_bmap_ptr_req *dreq, 1796 union nilfs_bmap_ptr_req *nreq, 1797 struct buffer_head *bh) 1798 { 1799 struct nilfs_btree_node *node; 1800 struct inode *dat; 1801 __u64 tmpptr; 1802 int ncblk; 1803 1804 /* free resources */ 1805 if (btree->b_ops->bop_clear != NULL) 1806 btree->b_ops->bop_clear(btree); 1807 1808 /* ptr must be a pointer to a buffer head. */ 1809 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1810 1811 /* convert and insert */ 1812 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1813 __nilfs_btree_init(btree); 1814 if (nreq != NULL) { 1815 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1816 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1817 1818 /* create child node at level 1 */ 1819 node = (struct nilfs_btree_node *)bh->b_data; 1820 ncblk = nilfs_btree_nchildren_per_block(btree); 1821 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1822 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1823 if (!buffer_dirty(bh)) 1824 mark_buffer_dirty(bh); 1825 if (!nilfs_bmap_dirty(btree)) 1826 nilfs_bmap_set_dirty(btree); 1827 1828 brelse(bh); 1829 1830 /* create root node at level 2 */ 1831 node = nilfs_btree_get_root(btree); 1832 tmpptr = nreq->bpr_ptr; 1833 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1834 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1835 &keys[0], &tmpptr); 1836 } else { 1837 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1838 1839 /* create root node at level 1 */ 1840 node = nilfs_btree_get_root(btree); 1841 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1842 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1843 keys, ptrs); 1844 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1845 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1846 if (!nilfs_bmap_dirty(btree)) 1847 nilfs_bmap_set_dirty(btree); 1848 } 1849 1850 if (NILFS_BMAP_USE_VBN(btree)) 1851 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1852 } 1853 1854 /** 1855 * nilfs_btree_convert_and_insert - 1856 * @bmap: 1857 * @key: 1858 * @ptr: 1859 * @keys: 1860 * @ptrs: 1861 * @n: 1862 */ 1863 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1864 __u64 key, __u64 ptr, 1865 const __u64 *keys, const __u64 *ptrs, int n) 1866 { 1867 struct buffer_head *bh = NULL; 1868 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1869 struct nilfs_bmap_stats stats; 1870 int ret; 1871 1872 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1873 di = &dreq; 1874 ni = NULL; 1875 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1876 nilfs_btree_node_size(btree))) { 1877 di = &dreq; 1878 ni = &nreq; 1879 } else { 1880 di = NULL; 1881 ni = NULL; 1882 BUG(); 1883 } 1884 1885 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1886 &stats); 1887 if (ret < 0) 1888 return ret; 1889 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1890 di, ni, bh); 1891 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1892 return 0; 1893 } 1894 1895 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1896 struct nilfs_btree_path *path, 1897 int level, 1898 struct buffer_head *bh) 1899 { 1900 while ((++level < nilfs_btree_height(btree) - 1) && 1901 !buffer_dirty(path[level].bp_bh)) 1902 mark_buffer_dirty(path[level].bp_bh); 1903 1904 return 0; 1905 } 1906 1907 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1908 struct nilfs_btree_path *path, 1909 int level, struct inode *dat) 1910 { 1911 struct nilfs_btree_node *parent; 1912 int ncmax, ret; 1913 1914 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1915 path[level].bp_oldreq.bpr_ptr = 1916 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1917 ncmax); 1918 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1919 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1920 &path[level].bp_newreq.bpr_req); 1921 if (ret < 0) 1922 return ret; 1923 1924 if (buffer_nilfs_node(path[level].bp_bh)) { 1925 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1926 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1927 path[level].bp_ctxt.bh = path[level].bp_bh; 1928 ret = nilfs_btnode_prepare_change_key( 1929 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1930 &path[level].bp_ctxt); 1931 if (ret < 0) { 1932 nilfs_dat_abort_update(dat, 1933 &path[level].bp_oldreq.bpr_req, 1934 &path[level].bp_newreq.bpr_req); 1935 return ret; 1936 } 1937 } 1938 1939 return 0; 1940 } 1941 1942 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1943 struct nilfs_btree_path *path, 1944 int level, struct inode *dat) 1945 { 1946 struct nilfs_btree_node *parent; 1947 int ncmax; 1948 1949 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1950 &path[level].bp_newreq.bpr_req, 1951 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1952 1953 if (buffer_nilfs_node(path[level].bp_bh)) { 1954 nilfs_btnode_commit_change_key( 1955 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1956 &path[level].bp_ctxt); 1957 path[level].bp_bh = path[level].bp_ctxt.bh; 1958 } 1959 set_buffer_nilfs_volatile(path[level].bp_bh); 1960 1961 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1962 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1963 path[level].bp_newreq.bpr_ptr, ncmax); 1964 } 1965 1966 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1967 struct nilfs_btree_path *path, 1968 int level, struct inode *dat) 1969 { 1970 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1971 &path[level].bp_newreq.bpr_req); 1972 if (buffer_nilfs_node(path[level].bp_bh)) 1973 nilfs_btnode_abort_change_key( 1974 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1975 &path[level].bp_ctxt); 1976 } 1977 1978 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1979 struct nilfs_btree_path *path, 1980 int minlevel, int *maxlevelp, 1981 struct inode *dat) 1982 { 1983 int level, ret; 1984 1985 level = minlevel; 1986 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1987 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1988 if (ret < 0) 1989 return ret; 1990 } 1991 while ((++level < nilfs_btree_height(btree) - 1) && 1992 !buffer_dirty(path[level].bp_bh)) { 1993 1994 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1995 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1996 if (ret < 0) 1997 goto out; 1998 } 1999 2000 /* success */ 2001 *maxlevelp = level - 1; 2002 return 0; 2003 2004 /* error */ 2005 out: 2006 while (--level > minlevel) 2007 nilfs_btree_abort_update_v(btree, path, level, dat); 2008 if (!buffer_nilfs_volatile(path[level].bp_bh)) 2009 nilfs_btree_abort_update_v(btree, path, level, dat); 2010 return ret; 2011 } 2012 2013 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2014 struct nilfs_btree_path *path, 2015 int minlevel, int maxlevel, 2016 struct buffer_head *bh, 2017 struct inode *dat) 2018 { 2019 int level; 2020 2021 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2022 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2023 2024 for (level = minlevel + 1; level <= maxlevel; level++) 2025 nilfs_btree_commit_update_v(btree, path, level, dat); 2026 } 2027 2028 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2029 struct nilfs_btree_path *path, 2030 int level, struct buffer_head *bh) 2031 { 2032 int maxlevel = 0, ret; 2033 struct nilfs_btree_node *parent; 2034 struct inode *dat = nilfs_bmap_get_dat(btree); 2035 __u64 ptr; 2036 int ncmax; 2037 2038 get_bh(bh); 2039 path[level].bp_bh = bh; 2040 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2041 dat); 2042 if (ret < 0) 2043 goto out; 2044 2045 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2046 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2047 ptr = nilfs_btree_node_get_ptr(parent, 2048 path[level + 1].bp_index, 2049 ncmax); 2050 ret = nilfs_dat_mark_dirty(dat, ptr); 2051 if (ret < 0) 2052 goto out; 2053 } 2054 2055 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2056 2057 out: 2058 brelse(path[level].bp_bh); 2059 path[level].bp_bh = NULL; 2060 return ret; 2061 } 2062 2063 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2064 struct buffer_head *bh) 2065 { 2066 struct nilfs_btree_path *path; 2067 struct nilfs_btree_node *node; 2068 __u64 key; 2069 int level, ret; 2070 2071 WARN_ON(!buffer_dirty(bh)); 2072 2073 path = nilfs_btree_alloc_path(); 2074 if (path == NULL) 2075 return -ENOMEM; 2076 2077 if (buffer_nilfs_node(bh)) { 2078 node = (struct nilfs_btree_node *)bh->b_data; 2079 key = nilfs_btree_node_get_key(node, 0); 2080 level = nilfs_btree_node_get_level(node); 2081 } else { 2082 key = nilfs_bmap_data_get_key(btree, bh); 2083 level = NILFS_BTREE_LEVEL_DATA; 2084 } 2085 2086 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2087 if (ret < 0) { 2088 if (unlikely(ret == -ENOENT)) 2089 nilfs_crit(btree->b_inode->i_sb, 2090 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2091 btree->b_inode->i_ino, 2092 (unsigned long long)key, level); 2093 goto out; 2094 } 2095 2096 ret = NILFS_BMAP_USE_VBN(btree) ? 2097 nilfs_btree_propagate_v(btree, path, level, bh) : 2098 nilfs_btree_propagate_p(btree, path, level, bh); 2099 2100 out: 2101 nilfs_btree_free_path(path); 2102 2103 return ret; 2104 } 2105 2106 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2107 struct buffer_head *bh) 2108 { 2109 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2110 } 2111 2112 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2113 struct list_head *lists, 2114 struct buffer_head *bh) 2115 { 2116 struct list_head *head; 2117 struct buffer_head *cbh; 2118 struct nilfs_btree_node *node, *cnode; 2119 __u64 key, ckey; 2120 int level; 2121 2122 get_bh(bh); 2123 node = (struct nilfs_btree_node *)bh->b_data; 2124 key = nilfs_btree_node_get_key(node, 0); 2125 level = nilfs_btree_node_get_level(node); 2126 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2127 level >= NILFS_BTREE_LEVEL_MAX) { 2128 dump_stack(); 2129 nilfs_warn(btree->b_inode->i_sb, 2130 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2131 level, (unsigned long long)key, 2132 btree->b_inode->i_ino, 2133 (unsigned long long)bh->b_blocknr); 2134 return; 2135 } 2136 2137 list_for_each(head, &lists[level]) { 2138 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2139 cnode = (struct nilfs_btree_node *)cbh->b_data; 2140 ckey = nilfs_btree_node_get_key(cnode, 0); 2141 if (key < ckey) 2142 break; 2143 } 2144 list_add_tail(&bh->b_assoc_buffers, head); 2145 } 2146 2147 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2148 struct list_head *listp) 2149 { 2150 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 2151 struct address_space *btcache = btnc_inode->i_mapping; 2152 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2153 struct folio_batch fbatch; 2154 struct buffer_head *bh, *head; 2155 pgoff_t index = 0; 2156 int level, i; 2157 2158 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2159 level < NILFS_BTREE_LEVEL_MAX; 2160 level++) 2161 INIT_LIST_HEAD(&lists[level]); 2162 2163 folio_batch_init(&fbatch); 2164 2165 while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, 2166 PAGECACHE_TAG_DIRTY, &fbatch)) { 2167 for (i = 0; i < folio_batch_count(&fbatch); i++) { 2168 bh = head = folio_buffers(fbatch.folios[i]); 2169 do { 2170 if (buffer_dirty(bh)) 2171 nilfs_btree_add_dirty_buffer(btree, 2172 lists, bh); 2173 } while ((bh = bh->b_this_page) != head); 2174 } 2175 folio_batch_release(&fbatch); 2176 cond_resched(); 2177 } 2178 2179 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2180 level < NILFS_BTREE_LEVEL_MAX; 2181 level++) 2182 list_splice_tail(&lists[level], listp); 2183 } 2184 2185 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2186 struct nilfs_btree_path *path, 2187 int level, 2188 struct buffer_head **bh, 2189 sector_t blocknr, 2190 union nilfs_binfo *binfo) 2191 { 2192 struct nilfs_btree_node *parent; 2193 __u64 key; 2194 __u64 ptr; 2195 int ncmax, ret; 2196 2197 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2198 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2199 ncmax); 2200 if (buffer_nilfs_node(*bh)) { 2201 path[level].bp_ctxt.oldkey = ptr; 2202 path[level].bp_ctxt.newkey = blocknr; 2203 path[level].bp_ctxt.bh = *bh; 2204 ret = nilfs_btnode_prepare_change_key( 2205 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2206 &path[level].bp_ctxt); 2207 if (ret < 0) 2208 return ret; 2209 nilfs_btnode_commit_change_key( 2210 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2211 &path[level].bp_ctxt); 2212 *bh = path[level].bp_ctxt.bh; 2213 } 2214 2215 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2216 ncmax); 2217 2218 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2219 /* on-disk format */ 2220 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2221 binfo->bi_dat.bi_level = level; 2222 memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); 2223 2224 return 0; 2225 } 2226 2227 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2228 struct nilfs_btree_path *path, 2229 int level, 2230 struct buffer_head **bh, 2231 sector_t blocknr, 2232 union nilfs_binfo *binfo) 2233 { 2234 struct nilfs_btree_node *parent; 2235 struct inode *dat = nilfs_bmap_get_dat(btree); 2236 __u64 key; 2237 __u64 ptr; 2238 union nilfs_bmap_ptr_req req; 2239 int ncmax, ret; 2240 2241 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2242 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2243 ncmax); 2244 req.bpr_ptr = ptr; 2245 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2246 if (ret < 0) 2247 return ret; 2248 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2249 2250 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2251 /* on-disk format */ 2252 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2253 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2254 2255 return 0; 2256 } 2257 2258 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2259 struct buffer_head **bh, 2260 sector_t blocknr, 2261 union nilfs_binfo *binfo) 2262 { 2263 struct nilfs_btree_path *path; 2264 struct nilfs_btree_node *node; 2265 __u64 key; 2266 int level, ret; 2267 2268 path = nilfs_btree_alloc_path(); 2269 if (path == NULL) 2270 return -ENOMEM; 2271 2272 if (buffer_nilfs_node(*bh)) { 2273 node = (struct nilfs_btree_node *)(*bh)->b_data; 2274 key = nilfs_btree_node_get_key(node, 0); 2275 level = nilfs_btree_node_get_level(node); 2276 } else { 2277 key = nilfs_bmap_data_get_key(btree, *bh); 2278 level = NILFS_BTREE_LEVEL_DATA; 2279 } 2280 2281 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2282 if (ret < 0) { 2283 WARN_ON(ret == -ENOENT); 2284 goto out; 2285 } 2286 2287 ret = NILFS_BMAP_USE_VBN(btree) ? 2288 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2289 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2290 2291 out: 2292 nilfs_btree_free_path(path); 2293 2294 return ret; 2295 } 2296 2297 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2298 struct buffer_head **bh, 2299 sector_t blocknr, 2300 union nilfs_binfo *binfo) 2301 { 2302 struct nilfs_btree_node *node; 2303 __u64 key; 2304 int ret; 2305 2306 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2307 blocknr); 2308 if (ret < 0) 2309 return ret; 2310 2311 if (buffer_nilfs_node(*bh)) { 2312 node = (struct nilfs_btree_node *)(*bh)->b_data; 2313 key = nilfs_btree_node_get_key(node, 0); 2314 } else 2315 key = nilfs_bmap_data_get_key(btree, *bh); 2316 2317 /* on-disk format */ 2318 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2319 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2320 2321 return 0; 2322 } 2323 2324 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2325 { 2326 struct buffer_head *bh; 2327 struct nilfs_btree_path *path; 2328 __u64 ptr; 2329 int ret; 2330 2331 path = nilfs_btree_alloc_path(); 2332 if (path == NULL) 2333 return -ENOMEM; 2334 2335 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2336 if (ret < 0) { 2337 WARN_ON(ret == -ENOENT); 2338 goto out; 2339 } 2340 ret = nilfs_btree_get_block(btree, ptr, &bh); 2341 if (ret < 0) { 2342 WARN_ON(ret == -ENOENT); 2343 goto out; 2344 } 2345 2346 if (!buffer_dirty(bh)) 2347 mark_buffer_dirty(bh); 2348 brelse(bh); 2349 if (!nilfs_bmap_dirty(btree)) 2350 nilfs_bmap_set_dirty(btree); 2351 2352 out: 2353 nilfs_btree_free_path(path); 2354 return ret; 2355 } 2356 2357 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2358 .bop_lookup = nilfs_btree_lookup, 2359 .bop_lookup_contig = nilfs_btree_lookup_contig, 2360 .bop_insert = nilfs_btree_insert, 2361 .bop_delete = nilfs_btree_delete, 2362 .bop_clear = NULL, 2363 2364 .bop_propagate = nilfs_btree_propagate, 2365 2366 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2367 2368 .bop_assign = nilfs_btree_assign, 2369 .bop_mark = nilfs_btree_mark, 2370 2371 .bop_seek_key = nilfs_btree_seek_key, 2372 .bop_last_key = nilfs_btree_last_key, 2373 2374 .bop_check_insert = NULL, 2375 .bop_check_delete = nilfs_btree_check_delete, 2376 .bop_gather_data = nilfs_btree_gather_data, 2377 }; 2378 2379 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2380 .bop_lookup = NULL, 2381 .bop_lookup_contig = NULL, 2382 .bop_insert = NULL, 2383 .bop_delete = NULL, 2384 .bop_clear = NULL, 2385 2386 .bop_propagate = nilfs_btree_propagate_gc, 2387 2388 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2389 2390 .bop_assign = nilfs_btree_assign_gc, 2391 .bop_mark = NULL, 2392 2393 .bop_seek_key = NULL, 2394 .bop_last_key = NULL, 2395 2396 .bop_check_insert = NULL, 2397 .bop_check_delete = NULL, 2398 .bop_gather_data = NULL, 2399 }; 2400 2401 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2402 { 2403 bmap->b_ops = &nilfs_btree_ops; 2404 bmap->b_nchildren_per_block = 2405 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2406 } 2407 2408 int nilfs_btree_init(struct nilfs_bmap *bmap) 2409 { 2410 int ret = 0; 2411 2412 __nilfs_btree_init(bmap); 2413 2414 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2415 ret = -EIO; 2416 else 2417 ret = nilfs_attach_btree_node_cache( 2418 &NILFS_BMAP_I(bmap)->vfs_inode); 2419 2420 return ret; 2421 } 2422 2423 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2424 { 2425 bmap->b_ops = &nilfs_btree_ops_gc; 2426 bmap->b_nchildren_per_block = 2427 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2428 } 2429