1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 62 struct address_space *btnc = btnc_inode->i_mapping; 63 struct buffer_head *bh; 64 65 bh = nilfs_btnode_create_block(btnc, ptr); 66 if (IS_ERR(bh)) 67 return PTR_ERR(bh); 68 69 set_buffer_nilfs_volatile(bh); 70 *bhp = bh; 71 return 0; 72 } 73 74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 75 { 76 return node->bn_flags; 77 } 78 79 static void 80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 81 { 82 node->bn_flags = flags; 83 } 84 85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 86 { 87 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 88 } 89 90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 91 { 92 return node->bn_level; 93 } 94 95 static void 96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 97 { 98 node->bn_level = level; 99 } 100 101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 102 { 103 return le16_to_cpu(node->bn_nchildren); 104 } 105 106 static void 107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 108 { 109 node->bn_nchildren = cpu_to_le16(nchildren); 110 } 111 112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 113 { 114 return i_blocksize(btree->b_inode); 115 } 116 117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 118 { 119 return btree->b_nchildren_per_block; 120 } 121 122 static __le64 * 123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 124 { 125 return (__le64 *)((char *)(node + 1) + 126 (nilfs_btree_node_root(node) ? 127 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 128 } 129 130 static __le64 * 131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 132 { 133 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 134 } 135 136 static __u64 137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 138 { 139 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 140 } 141 142 static void 143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 144 { 145 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 146 } 147 148 static __u64 149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 150 int ncmax) 151 { 152 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 153 } 154 155 static void 156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 157 int ncmax) 158 { 159 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 160 } 161 162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 163 int level, int nchildren, int ncmax, 164 const __u64 *keys, const __u64 *ptrs) 165 { 166 __le64 *dkeys; 167 __le64 *dptrs; 168 int i; 169 170 nilfs_btree_node_set_flags(node, flags); 171 nilfs_btree_node_set_level(node, level); 172 nilfs_btree_node_set_nchildren(node, nchildren); 173 174 dkeys = nilfs_btree_node_dkeys(node); 175 dptrs = nilfs_btree_node_dptrs(node, ncmax); 176 for (i = 0; i < nchildren; i++) { 177 dkeys[i] = cpu_to_le64(keys[i]); 178 dptrs[i] = cpu_to_le64(ptrs[i]); 179 } 180 } 181 182 /* Assume the buffer heads corresponding to left and right are locked. */ 183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 184 struct nilfs_btree_node *right, 185 int n, int lncmax, int rncmax) 186 { 187 __le64 *ldkeys, *rdkeys; 188 __le64 *ldptrs, *rdptrs; 189 int lnchildren, rnchildren; 190 191 ldkeys = nilfs_btree_node_dkeys(left); 192 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 193 lnchildren = nilfs_btree_node_get_nchildren(left); 194 195 rdkeys = nilfs_btree_node_dkeys(right); 196 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 197 rnchildren = nilfs_btree_node_get_nchildren(right); 198 199 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 200 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 201 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 202 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 203 204 lnchildren += n; 205 rnchildren -= n; 206 nilfs_btree_node_set_nchildren(left, lnchildren); 207 nilfs_btree_node_set_nchildren(right, rnchildren); 208 } 209 210 /* Assume that the buffer heads corresponding to left and right are locked. */ 211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 212 struct nilfs_btree_node *right, 213 int n, int lncmax, int rncmax) 214 { 215 __le64 *ldkeys, *rdkeys; 216 __le64 *ldptrs, *rdptrs; 217 int lnchildren, rnchildren; 218 219 ldkeys = nilfs_btree_node_dkeys(left); 220 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 221 lnchildren = nilfs_btree_node_get_nchildren(left); 222 223 rdkeys = nilfs_btree_node_dkeys(right); 224 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 225 rnchildren = nilfs_btree_node_get_nchildren(right); 226 227 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 228 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 229 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 230 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 231 232 lnchildren -= n; 233 rnchildren += n; 234 nilfs_btree_node_set_nchildren(left, lnchildren); 235 nilfs_btree_node_set_nchildren(right, rnchildren); 236 } 237 238 /* Assume that the buffer head corresponding to node is locked. */ 239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 240 __u64 key, __u64 ptr, int ncmax) 241 { 242 __le64 *dkeys; 243 __le64 *dptrs; 244 int nchildren; 245 246 dkeys = nilfs_btree_node_dkeys(node); 247 dptrs = nilfs_btree_node_dptrs(node, ncmax); 248 nchildren = nilfs_btree_node_get_nchildren(node); 249 if (index < nchildren) { 250 memmove(dkeys + index + 1, dkeys + index, 251 (nchildren - index) * sizeof(*dkeys)); 252 memmove(dptrs + index + 1, dptrs + index, 253 (nchildren - index) * sizeof(*dptrs)); 254 } 255 dkeys[index] = cpu_to_le64(key); 256 dptrs[index] = cpu_to_le64(ptr); 257 nchildren++; 258 nilfs_btree_node_set_nchildren(node, nchildren); 259 } 260 261 /* Assume that the buffer head corresponding to node is locked. */ 262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 263 __u64 *keyp, __u64 *ptrp, int ncmax) 264 { 265 __u64 key; 266 __u64 ptr; 267 __le64 *dkeys; 268 __le64 *dptrs; 269 int nchildren; 270 271 dkeys = nilfs_btree_node_dkeys(node); 272 dptrs = nilfs_btree_node_dptrs(node, ncmax); 273 key = le64_to_cpu(dkeys[index]); 274 ptr = le64_to_cpu(dptrs[index]); 275 nchildren = nilfs_btree_node_get_nchildren(node); 276 if (keyp != NULL) 277 *keyp = key; 278 if (ptrp != NULL) 279 *ptrp = ptr; 280 281 if (index < nchildren - 1) { 282 memmove(dkeys + index, dkeys + index + 1, 283 (nchildren - index - 1) * sizeof(*dkeys)); 284 memmove(dptrs + index, dptrs + index + 1, 285 (nchildren - index - 1) * sizeof(*dptrs)); 286 } 287 nchildren--; 288 nilfs_btree_node_set_nchildren(node, nchildren); 289 } 290 291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 292 __u64 key, int *indexp) 293 { 294 __u64 nkey; 295 int index, low, high, s; 296 297 /* binary search */ 298 low = 0; 299 high = nilfs_btree_node_get_nchildren(node) - 1; 300 index = 0; 301 s = 0; 302 while (low <= high) { 303 index = (low + high) / 2; 304 nkey = nilfs_btree_node_get_key(node, index); 305 if (nkey == key) { 306 s = 0; 307 goto out; 308 } else if (nkey < key) { 309 low = index + 1; 310 s = -1; 311 } else { 312 high = index - 1; 313 s = 1; 314 } 315 } 316 317 /* adjust index */ 318 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 319 if (s > 0 && index > 0) 320 index--; 321 } else if (s < 0) 322 index++; 323 324 out: 325 *indexp = index; 326 327 return s == 0; 328 } 329 330 /** 331 * nilfs_btree_node_broken - verify consistency of btree node 332 * @node: btree node block to be examined 333 * @size: node size (in bytes) 334 * @inode: host inode of btree 335 * @blocknr: block number 336 * 337 * Return: 0 if normal, 1 if the node is broken. 338 */ 339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 340 size_t size, struct inode *inode, 341 sector_t blocknr) 342 { 343 int level, flags, nchildren; 344 int ret = 0; 345 346 level = nilfs_btree_node_get_level(node); 347 flags = nilfs_btree_node_get_flags(node); 348 nchildren = nilfs_btree_node_get_nchildren(node); 349 350 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 351 level >= NILFS_BTREE_LEVEL_MAX || 352 (flags & NILFS_BTREE_NODE_ROOT) || 353 nchildren <= 0 || 354 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 355 nilfs_crit(inode->i_sb, 356 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 357 inode->i_ino, (unsigned long long)blocknr, level, 358 flags, nchildren); 359 ret = 1; 360 } 361 return ret; 362 } 363 364 /** 365 * nilfs_btree_root_broken - verify consistency of btree root node 366 * @node: btree root node to be examined 367 * @inode: host inode of btree 368 * 369 * Return: 0 if normal, 1 if the root node is broken. 370 */ 371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 372 struct inode *inode) 373 { 374 int level, flags, nchildren; 375 int ret = 0; 376 377 level = nilfs_btree_node_get_level(node); 378 flags = nilfs_btree_node_get_flags(node); 379 nchildren = nilfs_btree_node_get_nchildren(node); 380 381 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 382 level >= NILFS_BTREE_LEVEL_MAX || 383 nchildren < 0 || 384 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || 385 (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { 386 nilfs_crit(inode->i_sb, 387 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 388 inode->i_ino, level, flags, nchildren); 389 ret = 1; 390 } 391 return ret; 392 } 393 394 int nilfs_btree_broken_node_block(struct buffer_head *bh) 395 { 396 struct inode *inode; 397 int ret; 398 399 if (buffer_nilfs_checked(bh)) 400 return 0; 401 402 inode = bh->b_folio->mapping->host; 403 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 404 bh->b_size, inode, bh->b_blocknr); 405 if (likely(!ret)) 406 set_buffer_nilfs_checked(bh); 407 return ret; 408 } 409 410 static struct nilfs_btree_node * 411 nilfs_btree_get_root(const struct nilfs_bmap *btree) 412 { 413 return (struct nilfs_btree_node *)btree->b_u.u_data; 414 } 415 416 static struct nilfs_btree_node * 417 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 418 { 419 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 420 } 421 422 static struct nilfs_btree_node * 423 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 424 { 425 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 426 } 427 428 static int nilfs_btree_height(const struct nilfs_bmap *btree) 429 { 430 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 431 } 432 433 static struct nilfs_btree_node * 434 nilfs_btree_get_node(const struct nilfs_bmap *btree, 435 const struct nilfs_btree_path *path, 436 int level, int *ncmaxp) 437 { 438 struct nilfs_btree_node *node; 439 440 if (level == nilfs_btree_height(btree) - 1) { 441 node = nilfs_btree_get_root(btree); 442 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 443 } else { 444 node = nilfs_btree_get_nonroot_node(path, level); 445 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 446 } 447 return node; 448 } 449 450 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 451 struct nilfs_btree_node *node, int level) 452 { 453 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 454 dump_stack(); 455 nilfs_crit(btree->b_inode->i_sb, 456 "btree level mismatch (ino=%lu): %d != %d", 457 btree->b_inode->i_ino, 458 nilfs_btree_node_get_level(node), level); 459 return 1; 460 } 461 return 0; 462 } 463 464 struct nilfs_btree_readahead_info { 465 struct nilfs_btree_node *node; /* parent node */ 466 int max_ra_blocks; /* max nof blocks to read ahead */ 467 int index; /* current index on the parent node */ 468 int ncmax; /* nof children in the parent node */ 469 }; 470 471 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 472 struct buffer_head **bhp, 473 const struct nilfs_btree_readahead_info *ra) 474 { 475 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 476 struct address_space *btnc = btnc_inode->i_mapping; 477 struct buffer_head *bh, *ra_bh; 478 sector_t submit_ptr = 0; 479 int ret; 480 481 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, 482 &submit_ptr); 483 if (ret) { 484 if (likely(ret == -EEXIST)) 485 goto out_check; 486 if (ret == -ENOENT) { 487 /* 488 * Block address translation failed due to invalid 489 * value of 'ptr'. In this case, return internal code 490 * -EINVAL (broken bmap) to notify bmap layer of fatal 491 * metadata corruption. 492 */ 493 ret = -EINVAL; 494 } 495 return ret; 496 } 497 498 if (ra) { 499 int i, n; 500 __u64 ptr2; 501 502 /* read ahead sibling nodes */ 503 for (n = ra->max_ra_blocks, i = ra->index + 1; 504 n > 0 && i < ra->ncmax; n--, i++) { 505 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 506 507 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 508 REQ_OP_READ | REQ_RAHEAD, 509 &ra_bh, &submit_ptr); 510 if (likely(!ret || ret == -EEXIST)) 511 brelse(ra_bh); 512 else if (ret != -EBUSY) 513 break; 514 if (!buffer_locked(bh)) 515 goto out_no_wait; 516 } 517 } 518 519 wait_on_buffer(bh); 520 521 out_no_wait: 522 if (!buffer_uptodate(bh)) { 523 nilfs_err(btree->b_inode->i_sb, 524 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 525 btree->b_inode->i_ino, (unsigned long long)ptr); 526 brelse(bh); 527 return -EIO; 528 } 529 530 out_check: 531 if (nilfs_btree_broken_node_block(bh)) { 532 clear_buffer_uptodate(bh); 533 brelse(bh); 534 return -EINVAL; 535 } 536 537 *bhp = bh; 538 return 0; 539 } 540 541 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 542 struct buffer_head **bhp) 543 { 544 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 545 } 546 547 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 548 struct nilfs_btree_path *path, 549 __u64 key, __u64 *ptrp, int minlevel, 550 int readahead) 551 { 552 struct nilfs_btree_node *node; 553 struct nilfs_btree_readahead_info p, *ra; 554 __u64 ptr; 555 int level, index, found, ncmax, ret; 556 557 node = nilfs_btree_get_root(btree); 558 level = nilfs_btree_node_get_level(node); 559 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 560 return -ENOENT; 561 562 found = nilfs_btree_node_lookup(node, key, &index); 563 ptr = nilfs_btree_node_get_ptr(node, index, 564 NILFS_BTREE_ROOT_NCHILDREN_MAX); 565 path[level].bp_bh = NULL; 566 path[level].bp_index = index; 567 568 ncmax = nilfs_btree_nchildren_per_block(btree); 569 570 while (--level >= minlevel) { 571 ra = NULL; 572 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 573 p.node = nilfs_btree_get_node(btree, path, level + 1, 574 &p.ncmax); 575 p.index = index; 576 p.max_ra_blocks = 7; 577 ra = &p; 578 } 579 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 580 ra); 581 if (ret < 0) 582 return ret; 583 584 node = nilfs_btree_get_nonroot_node(path, level); 585 if (nilfs_btree_bad_node(btree, node, level)) 586 return -EINVAL; 587 if (!found) 588 found = nilfs_btree_node_lookup(node, key, &index); 589 else 590 index = 0; 591 if (index < ncmax) { 592 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 593 } else { 594 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 595 /* insert */ 596 ptr = NILFS_BMAP_INVALID_PTR; 597 } 598 path[level].bp_index = index; 599 } 600 if (!found) 601 return -ENOENT; 602 603 if (ptrp != NULL) 604 *ptrp = ptr; 605 606 return 0; 607 } 608 609 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 610 struct nilfs_btree_path *path, 611 __u64 *keyp, __u64 *ptrp) 612 { 613 struct nilfs_btree_node *node; 614 __u64 ptr; 615 int index, level, ncmax, ret; 616 617 node = nilfs_btree_get_root(btree); 618 index = nilfs_btree_node_get_nchildren(node) - 1; 619 if (index < 0) 620 return -ENOENT; 621 level = nilfs_btree_node_get_level(node); 622 ptr = nilfs_btree_node_get_ptr(node, index, 623 NILFS_BTREE_ROOT_NCHILDREN_MAX); 624 path[level].bp_bh = NULL; 625 path[level].bp_index = index; 626 ncmax = nilfs_btree_nchildren_per_block(btree); 627 628 for (level--; level > 0; level--) { 629 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 630 if (ret < 0) 631 return ret; 632 node = nilfs_btree_get_nonroot_node(path, level); 633 if (nilfs_btree_bad_node(btree, node, level)) 634 return -EINVAL; 635 index = nilfs_btree_node_get_nchildren(node) - 1; 636 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 637 path[level].bp_index = index; 638 } 639 640 if (keyp != NULL) 641 *keyp = nilfs_btree_node_get_key(node, index); 642 if (ptrp != NULL) 643 *ptrp = ptr; 644 645 return 0; 646 } 647 648 /** 649 * nilfs_btree_get_next_key - get next valid key from btree path array 650 * @btree: bmap struct of btree 651 * @path: array of nilfs_btree_path struct 652 * @minlevel: start level 653 * @nextkey: place to store the next valid key 654 * 655 * Return: 0 if the next key was found, %-ENOENT if not found. 656 */ 657 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 658 const struct nilfs_btree_path *path, 659 int minlevel, __u64 *nextkey) 660 { 661 struct nilfs_btree_node *node; 662 int maxlevel = nilfs_btree_height(btree) - 1; 663 int index, next_adj, level; 664 665 /* Next index is already set to bp_index for leaf nodes. */ 666 next_adj = 0; 667 for (level = minlevel; level <= maxlevel; level++) { 668 if (level == maxlevel) 669 node = nilfs_btree_get_root(btree); 670 else 671 node = nilfs_btree_get_nonroot_node(path, level); 672 673 index = path[level].bp_index + next_adj; 674 if (index < nilfs_btree_node_get_nchildren(node)) { 675 /* Next key is in this node */ 676 *nextkey = nilfs_btree_node_get_key(node, index); 677 return 0; 678 } 679 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 680 next_adj = 1; 681 } 682 return -ENOENT; 683 } 684 685 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 686 __u64 key, int level, __u64 *ptrp) 687 { 688 struct nilfs_btree_path *path; 689 int ret; 690 691 path = nilfs_btree_alloc_path(); 692 if (path == NULL) 693 return -ENOMEM; 694 695 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 696 697 nilfs_btree_free_path(path); 698 699 return ret; 700 } 701 702 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 703 __u64 key, __u64 *ptrp, 704 unsigned int maxblocks) 705 { 706 struct nilfs_btree_path *path; 707 struct nilfs_btree_node *node; 708 struct inode *dat = NULL; 709 __u64 ptr, ptr2; 710 sector_t blocknr; 711 int level = NILFS_BTREE_LEVEL_NODE_MIN; 712 int ret, cnt, index, maxlevel, ncmax; 713 struct nilfs_btree_readahead_info p; 714 715 path = nilfs_btree_alloc_path(); 716 if (path == NULL) 717 return -ENOMEM; 718 719 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 720 if (ret < 0) 721 goto out; 722 723 if (NILFS_BMAP_USE_VBN(btree)) { 724 dat = nilfs_bmap_get_dat(btree); 725 ret = nilfs_dat_translate(dat, ptr, &blocknr); 726 if (ret < 0) 727 goto dat_error; 728 ptr = blocknr; 729 } 730 cnt = 1; 731 if (cnt == maxblocks) 732 goto end; 733 734 maxlevel = nilfs_btree_height(btree) - 1; 735 node = nilfs_btree_get_node(btree, path, level, &ncmax); 736 index = path[level].bp_index + 1; 737 for (;;) { 738 while (index < nilfs_btree_node_get_nchildren(node)) { 739 if (nilfs_btree_node_get_key(node, index) != 740 key + cnt) 741 goto end; 742 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 743 if (dat) { 744 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 745 if (ret < 0) 746 goto dat_error; 747 ptr2 = blocknr; 748 } 749 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 750 goto end; 751 index++; 752 } 753 if (level == maxlevel) 754 break; 755 756 /* look-up right sibling node */ 757 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 758 p.index = path[level + 1].bp_index + 1; 759 p.max_ra_blocks = 7; 760 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 761 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 762 break; 763 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 764 path[level + 1].bp_index = p.index; 765 766 brelse(path[level].bp_bh); 767 path[level].bp_bh = NULL; 768 769 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 770 &p); 771 if (ret < 0) 772 goto out; 773 node = nilfs_btree_get_nonroot_node(path, level); 774 ncmax = nilfs_btree_nchildren_per_block(btree); 775 index = 0; 776 path[level].bp_index = index; 777 } 778 end: 779 *ptrp = ptr; 780 ret = cnt; 781 out: 782 nilfs_btree_free_path(path); 783 return ret; 784 785 dat_error: 786 if (ret == -ENOENT) 787 ret = -EINVAL; /* Notify bmap layer of metadata corruption */ 788 goto out; 789 } 790 791 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 792 struct nilfs_btree_path *path, 793 int level, __u64 key) 794 { 795 if (level < nilfs_btree_height(btree) - 1) { 796 do { 797 nilfs_btree_node_set_key( 798 nilfs_btree_get_nonroot_node(path, level), 799 path[level].bp_index, key); 800 if (!buffer_dirty(path[level].bp_bh)) 801 mark_buffer_dirty(path[level].bp_bh); 802 } while ((path[level].bp_index == 0) && 803 (++level < nilfs_btree_height(btree) - 1)); 804 } 805 806 /* root */ 807 if (level == nilfs_btree_height(btree) - 1) { 808 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 809 path[level].bp_index, key); 810 } 811 } 812 813 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 814 struct nilfs_btree_path *path, 815 int level, __u64 *keyp, __u64 *ptrp) 816 { 817 struct nilfs_btree_node *node; 818 int ncblk; 819 820 if (level < nilfs_btree_height(btree) - 1) { 821 node = nilfs_btree_get_nonroot_node(path, level); 822 ncblk = nilfs_btree_nchildren_per_block(btree); 823 nilfs_btree_node_insert(node, path[level].bp_index, 824 *keyp, *ptrp, ncblk); 825 if (!buffer_dirty(path[level].bp_bh)) 826 mark_buffer_dirty(path[level].bp_bh); 827 828 if (path[level].bp_index == 0) 829 nilfs_btree_promote_key(btree, path, level + 1, 830 nilfs_btree_node_get_key(node, 831 0)); 832 } else { 833 node = nilfs_btree_get_root(btree); 834 nilfs_btree_node_insert(node, path[level].bp_index, 835 *keyp, *ptrp, 836 NILFS_BTREE_ROOT_NCHILDREN_MAX); 837 } 838 } 839 840 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 841 struct nilfs_btree_path *path, 842 int level, __u64 *keyp, __u64 *ptrp) 843 { 844 struct nilfs_btree_node *node, *left; 845 int nchildren, lnchildren, n, move, ncblk; 846 847 node = nilfs_btree_get_nonroot_node(path, level); 848 left = nilfs_btree_get_sib_node(path, level); 849 nchildren = nilfs_btree_node_get_nchildren(node); 850 lnchildren = nilfs_btree_node_get_nchildren(left); 851 ncblk = nilfs_btree_nchildren_per_block(btree); 852 move = 0; 853 854 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 855 if (n > path[level].bp_index) { 856 /* move insert point */ 857 n--; 858 move = 1; 859 } 860 861 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 862 863 if (!buffer_dirty(path[level].bp_bh)) 864 mark_buffer_dirty(path[level].bp_bh); 865 if (!buffer_dirty(path[level].bp_sib_bh)) 866 mark_buffer_dirty(path[level].bp_sib_bh); 867 868 nilfs_btree_promote_key(btree, path, level + 1, 869 nilfs_btree_node_get_key(node, 0)); 870 871 if (move) { 872 brelse(path[level].bp_bh); 873 path[level].bp_bh = path[level].bp_sib_bh; 874 path[level].bp_sib_bh = NULL; 875 path[level].bp_index += lnchildren; 876 path[level + 1].bp_index--; 877 } else { 878 brelse(path[level].bp_sib_bh); 879 path[level].bp_sib_bh = NULL; 880 path[level].bp_index -= n; 881 } 882 883 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 884 } 885 886 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 887 struct nilfs_btree_path *path, 888 int level, __u64 *keyp, __u64 *ptrp) 889 { 890 struct nilfs_btree_node *node, *right; 891 int nchildren, rnchildren, n, move, ncblk; 892 893 node = nilfs_btree_get_nonroot_node(path, level); 894 right = nilfs_btree_get_sib_node(path, level); 895 nchildren = nilfs_btree_node_get_nchildren(node); 896 rnchildren = nilfs_btree_node_get_nchildren(right); 897 ncblk = nilfs_btree_nchildren_per_block(btree); 898 move = 0; 899 900 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 901 if (n > nchildren - path[level].bp_index) { 902 /* move insert point */ 903 n--; 904 move = 1; 905 } 906 907 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 908 909 if (!buffer_dirty(path[level].bp_bh)) 910 mark_buffer_dirty(path[level].bp_bh); 911 if (!buffer_dirty(path[level].bp_sib_bh)) 912 mark_buffer_dirty(path[level].bp_sib_bh); 913 914 path[level + 1].bp_index++; 915 nilfs_btree_promote_key(btree, path, level + 1, 916 nilfs_btree_node_get_key(right, 0)); 917 path[level + 1].bp_index--; 918 919 if (move) { 920 brelse(path[level].bp_bh); 921 path[level].bp_bh = path[level].bp_sib_bh; 922 path[level].bp_sib_bh = NULL; 923 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 924 path[level + 1].bp_index++; 925 } else { 926 brelse(path[level].bp_sib_bh); 927 path[level].bp_sib_bh = NULL; 928 } 929 930 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 931 } 932 933 static void nilfs_btree_split(struct nilfs_bmap *btree, 934 struct nilfs_btree_path *path, 935 int level, __u64 *keyp, __u64 *ptrp) 936 { 937 struct nilfs_btree_node *node, *right; 938 int nchildren, n, move, ncblk; 939 940 node = nilfs_btree_get_nonroot_node(path, level); 941 right = nilfs_btree_get_sib_node(path, level); 942 nchildren = nilfs_btree_node_get_nchildren(node); 943 ncblk = nilfs_btree_nchildren_per_block(btree); 944 move = 0; 945 946 n = (nchildren + 1) / 2; 947 if (n > nchildren - path[level].bp_index) { 948 n--; 949 move = 1; 950 } 951 952 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 953 954 if (!buffer_dirty(path[level].bp_bh)) 955 mark_buffer_dirty(path[level].bp_bh); 956 if (!buffer_dirty(path[level].bp_sib_bh)) 957 mark_buffer_dirty(path[level].bp_sib_bh); 958 959 if (move) { 960 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 961 nilfs_btree_node_insert(right, path[level].bp_index, 962 *keyp, *ptrp, ncblk); 963 964 *keyp = nilfs_btree_node_get_key(right, 0); 965 *ptrp = path[level].bp_newreq.bpr_ptr; 966 967 brelse(path[level].bp_bh); 968 path[level].bp_bh = path[level].bp_sib_bh; 969 path[level].bp_sib_bh = NULL; 970 } else { 971 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 972 973 *keyp = nilfs_btree_node_get_key(right, 0); 974 *ptrp = path[level].bp_newreq.bpr_ptr; 975 976 brelse(path[level].bp_sib_bh); 977 path[level].bp_sib_bh = NULL; 978 } 979 980 path[level + 1].bp_index++; 981 } 982 983 static void nilfs_btree_grow(struct nilfs_bmap *btree, 984 struct nilfs_btree_path *path, 985 int level, __u64 *keyp, __u64 *ptrp) 986 { 987 struct nilfs_btree_node *root, *child; 988 int n, ncblk; 989 990 root = nilfs_btree_get_root(btree); 991 child = nilfs_btree_get_sib_node(path, level); 992 ncblk = nilfs_btree_nchildren_per_block(btree); 993 994 n = nilfs_btree_node_get_nchildren(root); 995 996 nilfs_btree_node_move_right(root, child, n, 997 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 998 nilfs_btree_node_set_level(root, level + 1); 999 1000 if (!buffer_dirty(path[level].bp_sib_bh)) 1001 mark_buffer_dirty(path[level].bp_sib_bh); 1002 1003 path[level].bp_bh = path[level].bp_sib_bh; 1004 path[level].bp_sib_bh = NULL; 1005 1006 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 1007 1008 *keyp = nilfs_btree_node_get_key(child, 0); 1009 *ptrp = path[level].bp_newreq.bpr_ptr; 1010 } 1011 1012 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 1013 const struct nilfs_btree_path *path) 1014 { 1015 struct nilfs_btree_node *node; 1016 int level, ncmax; 1017 1018 if (path == NULL) 1019 return NILFS_BMAP_INVALID_PTR; 1020 1021 /* left sibling */ 1022 level = NILFS_BTREE_LEVEL_NODE_MIN; 1023 if (path[level].bp_index > 0) { 1024 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1025 return nilfs_btree_node_get_ptr(node, 1026 path[level].bp_index - 1, 1027 ncmax); 1028 } 1029 1030 /* parent */ 1031 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1032 if (level <= nilfs_btree_height(btree) - 1) { 1033 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1034 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1035 ncmax); 1036 } 1037 1038 return NILFS_BMAP_INVALID_PTR; 1039 } 1040 1041 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1042 const struct nilfs_btree_path *path, 1043 __u64 key) 1044 { 1045 __u64 ptr; 1046 1047 ptr = nilfs_bmap_find_target_seq(btree, key); 1048 if (ptr != NILFS_BMAP_INVALID_PTR) 1049 /* sequential access */ 1050 return ptr; 1051 1052 ptr = nilfs_btree_find_near(btree, path); 1053 if (ptr != NILFS_BMAP_INVALID_PTR) 1054 /* near */ 1055 return ptr; 1056 1057 /* block group */ 1058 return nilfs_bmap_find_target_in_group(btree); 1059 } 1060 1061 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1062 struct nilfs_btree_path *path, 1063 int *levelp, __u64 key, __u64 ptr, 1064 struct nilfs_bmap_stats *stats) 1065 { 1066 struct buffer_head *bh; 1067 struct nilfs_btree_node *node, *parent, *sib; 1068 __u64 sibptr; 1069 int pindex, level, ncmax, ncblk, ret; 1070 struct inode *dat = NULL; 1071 1072 stats->bs_nblocks = 0; 1073 level = NILFS_BTREE_LEVEL_DATA; 1074 1075 /* allocate a new ptr for data block */ 1076 if (NILFS_BMAP_USE_VBN(btree)) { 1077 path[level].bp_newreq.bpr_ptr = 1078 nilfs_btree_find_target_v(btree, path, key); 1079 dat = nilfs_bmap_get_dat(btree); 1080 } 1081 1082 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1083 if (ret < 0) 1084 goto err_out_data; 1085 1086 ncblk = nilfs_btree_nchildren_per_block(btree); 1087 1088 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1089 level < nilfs_btree_height(btree) - 1; 1090 level++) { 1091 node = nilfs_btree_get_nonroot_node(path, level); 1092 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1093 path[level].bp_op = nilfs_btree_do_insert; 1094 stats->bs_nblocks++; 1095 goto out; 1096 } 1097 1098 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1099 pindex = path[level + 1].bp_index; 1100 1101 /* left sibling */ 1102 if (pindex > 0) { 1103 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1104 ncmax); 1105 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1106 if (ret < 0) 1107 goto err_out_child_node; 1108 sib = (struct nilfs_btree_node *)bh->b_data; 1109 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1110 path[level].bp_sib_bh = bh; 1111 path[level].bp_op = nilfs_btree_carry_left; 1112 stats->bs_nblocks++; 1113 goto out; 1114 } else { 1115 brelse(bh); 1116 } 1117 } 1118 1119 /* right sibling */ 1120 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1121 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1122 ncmax); 1123 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1124 if (ret < 0) 1125 goto err_out_child_node; 1126 sib = (struct nilfs_btree_node *)bh->b_data; 1127 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1128 path[level].bp_sib_bh = bh; 1129 path[level].bp_op = nilfs_btree_carry_right; 1130 stats->bs_nblocks++; 1131 goto out; 1132 } else { 1133 brelse(bh); 1134 } 1135 } 1136 1137 /* split */ 1138 path[level].bp_newreq.bpr_ptr = 1139 path[level - 1].bp_newreq.bpr_ptr + 1; 1140 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1141 &path[level].bp_newreq, dat); 1142 if (ret < 0) 1143 goto err_out_child_node; 1144 ret = nilfs_btree_get_new_block(btree, 1145 path[level].bp_newreq.bpr_ptr, 1146 &bh); 1147 if (ret < 0) 1148 goto err_out_curr_node; 1149 1150 stats->bs_nblocks++; 1151 1152 sib = (struct nilfs_btree_node *)bh->b_data; 1153 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1154 path[level].bp_sib_bh = bh; 1155 path[level].bp_op = nilfs_btree_split; 1156 } 1157 1158 /* root */ 1159 node = nilfs_btree_get_root(btree); 1160 if (nilfs_btree_node_get_nchildren(node) < 1161 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1162 path[level].bp_op = nilfs_btree_do_insert; 1163 stats->bs_nblocks++; 1164 goto out; 1165 } 1166 1167 /* grow */ 1168 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1169 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1170 if (ret < 0) 1171 goto err_out_child_node; 1172 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1173 &bh); 1174 if (ret < 0) 1175 goto err_out_curr_node; 1176 1177 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1178 0, level, 0, ncblk, NULL, NULL); 1179 path[level].bp_sib_bh = bh; 1180 path[level].bp_op = nilfs_btree_grow; 1181 1182 level++; 1183 path[level].bp_op = nilfs_btree_do_insert; 1184 1185 /* a newly-created node block and a data block are added */ 1186 stats->bs_nblocks += 2; 1187 1188 /* success */ 1189 out: 1190 *levelp = level; 1191 return ret; 1192 1193 /* error */ 1194 err_out_curr_node: 1195 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1196 err_out_child_node: 1197 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1198 nilfs_btnode_delete(path[level].bp_sib_bh); 1199 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1200 1201 } 1202 1203 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1204 err_out_data: 1205 *levelp = level; 1206 stats->bs_nblocks = 0; 1207 return ret; 1208 } 1209 1210 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1211 struct nilfs_btree_path *path, 1212 int maxlevel, __u64 key, __u64 ptr) 1213 { 1214 struct inode *dat = NULL; 1215 int level; 1216 1217 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1218 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1219 if (NILFS_BMAP_USE_VBN(btree)) { 1220 nilfs_bmap_set_target_v(btree, key, ptr); 1221 dat = nilfs_bmap_get_dat(btree); 1222 } 1223 1224 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1225 nilfs_bmap_commit_alloc_ptr(btree, 1226 &path[level - 1].bp_newreq, dat); 1227 path[level].bp_op(btree, path, level, &key, &ptr); 1228 } 1229 1230 if (!nilfs_bmap_dirty(btree)) 1231 nilfs_bmap_set_dirty(btree); 1232 } 1233 1234 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1235 { 1236 struct nilfs_btree_path *path; 1237 struct nilfs_bmap_stats stats; 1238 int level, ret; 1239 1240 path = nilfs_btree_alloc_path(); 1241 if (path == NULL) 1242 return -ENOMEM; 1243 1244 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1245 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1246 if (ret != -ENOENT) { 1247 if (ret == 0) 1248 ret = -EEXIST; 1249 goto out; 1250 } 1251 1252 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1253 if (ret < 0) 1254 goto out; 1255 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1256 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1257 1258 out: 1259 nilfs_btree_free_path(path); 1260 return ret; 1261 } 1262 1263 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1264 struct nilfs_btree_path *path, 1265 int level, __u64 *keyp, __u64 *ptrp) 1266 { 1267 struct nilfs_btree_node *node; 1268 int ncblk; 1269 1270 if (level < nilfs_btree_height(btree) - 1) { 1271 node = nilfs_btree_get_nonroot_node(path, level); 1272 ncblk = nilfs_btree_nchildren_per_block(btree); 1273 nilfs_btree_node_delete(node, path[level].bp_index, 1274 keyp, ptrp, ncblk); 1275 if (!buffer_dirty(path[level].bp_bh)) 1276 mark_buffer_dirty(path[level].bp_bh); 1277 if (path[level].bp_index == 0) 1278 nilfs_btree_promote_key(btree, path, level + 1, 1279 nilfs_btree_node_get_key(node, 0)); 1280 } else { 1281 node = nilfs_btree_get_root(btree); 1282 nilfs_btree_node_delete(node, path[level].bp_index, 1283 keyp, ptrp, 1284 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1285 } 1286 } 1287 1288 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1289 struct nilfs_btree_path *path, 1290 int level, __u64 *keyp, __u64 *ptrp) 1291 { 1292 struct nilfs_btree_node *node, *left; 1293 int nchildren, lnchildren, n, ncblk; 1294 1295 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1296 1297 node = nilfs_btree_get_nonroot_node(path, level); 1298 left = nilfs_btree_get_sib_node(path, level); 1299 nchildren = nilfs_btree_node_get_nchildren(node); 1300 lnchildren = nilfs_btree_node_get_nchildren(left); 1301 ncblk = nilfs_btree_nchildren_per_block(btree); 1302 1303 n = (nchildren + lnchildren) / 2 - nchildren; 1304 1305 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1306 1307 if (!buffer_dirty(path[level].bp_bh)) 1308 mark_buffer_dirty(path[level].bp_bh); 1309 if (!buffer_dirty(path[level].bp_sib_bh)) 1310 mark_buffer_dirty(path[level].bp_sib_bh); 1311 1312 nilfs_btree_promote_key(btree, path, level + 1, 1313 nilfs_btree_node_get_key(node, 0)); 1314 1315 brelse(path[level].bp_sib_bh); 1316 path[level].bp_sib_bh = NULL; 1317 path[level].bp_index += n; 1318 } 1319 1320 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1321 struct nilfs_btree_path *path, 1322 int level, __u64 *keyp, __u64 *ptrp) 1323 { 1324 struct nilfs_btree_node *node, *right; 1325 int nchildren, rnchildren, n, ncblk; 1326 1327 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1328 1329 node = nilfs_btree_get_nonroot_node(path, level); 1330 right = nilfs_btree_get_sib_node(path, level); 1331 nchildren = nilfs_btree_node_get_nchildren(node); 1332 rnchildren = nilfs_btree_node_get_nchildren(right); 1333 ncblk = nilfs_btree_nchildren_per_block(btree); 1334 1335 n = (nchildren + rnchildren) / 2 - nchildren; 1336 1337 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1338 1339 if (!buffer_dirty(path[level].bp_bh)) 1340 mark_buffer_dirty(path[level].bp_bh); 1341 if (!buffer_dirty(path[level].bp_sib_bh)) 1342 mark_buffer_dirty(path[level].bp_sib_bh); 1343 1344 path[level + 1].bp_index++; 1345 nilfs_btree_promote_key(btree, path, level + 1, 1346 nilfs_btree_node_get_key(right, 0)); 1347 path[level + 1].bp_index--; 1348 1349 brelse(path[level].bp_sib_bh); 1350 path[level].bp_sib_bh = NULL; 1351 } 1352 1353 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1354 struct nilfs_btree_path *path, 1355 int level, __u64 *keyp, __u64 *ptrp) 1356 { 1357 struct nilfs_btree_node *node, *left; 1358 int n, ncblk; 1359 1360 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1361 1362 node = nilfs_btree_get_nonroot_node(path, level); 1363 left = nilfs_btree_get_sib_node(path, level); 1364 ncblk = nilfs_btree_nchildren_per_block(btree); 1365 1366 n = nilfs_btree_node_get_nchildren(node); 1367 1368 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1369 1370 if (!buffer_dirty(path[level].bp_sib_bh)) 1371 mark_buffer_dirty(path[level].bp_sib_bh); 1372 1373 nilfs_btnode_delete(path[level].bp_bh); 1374 path[level].bp_bh = path[level].bp_sib_bh; 1375 path[level].bp_sib_bh = NULL; 1376 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1377 } 1378 1379 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1380 struct nilfs_btree_path *path, 1381 int level, __u64 *keyp, __u64 *ptrp) 1382 { 1383 struct nilfs_btree_node *node, *right; 1384 int n, ncblk; 1385 1386 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1387 1388 node = nilfs_btree_get_nonroot_node(path, level); 1389 right = nilfs_btree_get_sib_node(path, level); 1390 ncblk = nilfs_btree_nchildren_per_block(btree); 1391 1392 n = nilfs_btree_node_get_nchildren(right); 1393 1394 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1395 1396 if (!buffer_dirty(path[level].bp_bh)) 1397 mark_buffer_dirty(path[level].bp_bh); 1398 1399 nilfs_btnode_delete(path[level].bp_sib_bh); 1400 path[level].bp_sib_bh = NULL; 1401 path[level + 1].bp_index++; 1402 } 1403 1404 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1405 struct nilfs_btree_path *path, 1406 int level, __u64 *keyp, __u64 *ptrp) 1407 { 1408 struct nilfs_btree_node *root, *child; 1409 int n, ncblk; 1410 1411 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1412 1413 root = nilfs_btree_get_root(btree); 1414 child = nilfs_btree_get_nonroot_node(path, level); 1415 ncblk = nilfs_btree_nchildren_per_block(btree); 1416 1417 nilfs_btree_node_delete(root, 0, NULL, NULL, 1418 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1419 nilfs_btree_node_set_level(root, level); 1420 n = nilfs_btree_node_get_nchildren(child); 1421 nilfs_btree_node_move_left(root, child, n, 1422 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1423 1424 nilfs_btnode_delete(path[level].bp_bh); 1425 path[level].bp_bh = NULL; 1426 } 1427 1428 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1429 struct nilfs_btree_path *path, 1430 int level, __u64 *keyp, __u64 *ptrp) 1431 { 1432 } 1433 1434 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1435 struct nilfs_btree_path *path, 1436 int *levelp, 1437 struct nilfs_bmap_stats *stats, 1438 struct inode *dat) 1439 { 1440 struct buffer_head *bh; 1441 struct nilfs_btree_node *node, *parent, *sib; 1442 __u64 sibptr; 1443 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1444 1445 ret = 0; 1446 stats->bs_nblocks = 0; 1447 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1448 ncblk = nilfs_btree_nchildren_per_block(btree); 1449 1450 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1451 level < nilfs_btree_height(btree) - 1; 1452 level++) { 1453 node = nilfs_btree_get_nonroot_node(path, level); 1454 path[level].bp_oldreq.bpr_ptr = 1455 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1456 ret = nilfs_bmap_prepare_end_ptr(btree, 1457 &path[level].bp_oldreq, dat); 1458 if (ret < 0) 1459 goto err_out_child_node; 1460 1461 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1462 path[level].bp_op = nilfs_btree_do_delete; 1463 stats->bs_nblocks++; 1464 goto out; 1465 } 1466 1467 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1468 pindex = path[level + 1].bp_index; 1469 dindex = pindex; 1470 1471 if (pindex > 0) { 1472 /* left sibling */ 1473 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1474 ncmax); 1475 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1476 if (ret < 0) 1477 goto err_out_curr_node; 1478 sib = (struct nilfs_btree_node *)bh->b_data; 1479 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1480 path[level].bp_sib_bh = bh; 1481 path[level].bp_op = nilfs_btree_borrow_left; 1482 stats->bs_nblocks++; 1483 goto out; 1484 } else { 1485 path[level].bp_sib_bh = bh; 1486 path[level].bp_op = nilfs_btree_concat_left; 1487 stats->bs_nblocks++; 1488 /* continue; */ 1489 } 1490 } else if (pindex < 1491 nilfs_btree_node_get_nchildren(parent) - 1) { 1492 /* right sibling */ 1493 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1494 ncmax); 1495 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1496 if (ret < 0) 1497 goto err_out_curr_node; 1498 sib = (struct nilfs_btree_node *)bh->b_data; 1499 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1500 path[level].bp_sib_bh = bh; 1501 path[level].bp_op = nilfs_btree_borrow_right; 1502 stats->bs_nblocks++; 1503 goto out; 1504 } else { 1505 path[level].bp_sib_bh = bh; 1506 path[level].bp_op = nilfs_btree_concat_right; 1507 stats->bs_nblocks++; 1508 /* 1509 * When merging right sibling node 1510 * into the current node, pointer to 1511 * the right sibling node must be 1512 * terminated instead. The adjustment 1513 * below is required for that. 1514 */ 1515 dindex = pindex + 1; 1516 /* continue; */ 1517 } 1518 } else { 1519 /* no siblings */ 1520 /* the only child of the root node */ 1521 WARN_ON(level != nilfs_btree_height(btree) - 2); 1522 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1523 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1524 path[level].bp_op = nilfs_btree_shrink; 1525 stats->bs_nblocks += 2; 1526 level++; 1527 path[level].bp_op = nilfs_btree_nop; 1528 goto shrink_root_child; 1529 } else { 1530 path[level].bp_op = nilfs_btree_do_delete; 1531 stats->bs_nblocks++; 1532 goto out; 1533 } 1534 } 1535 } 1536 1537 /* child of the root node is deleted */ 1538 path[level].bp_op = nilfs_btree_do_delete; 1539 stats->bs_nblocks++; 1540 1541 shrink_root_child: 1542 node = nilfs_btree_get_root(btree); 1543 path[level].bp_oldreq.bpr_ptr = 1544 nilfs_btree_node_get_ptr(node, dindex, 1545 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1546 1547 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1548 if (ret < 0) 1549 goto err_out_child_node; 1550 1551 /* success */ 1552 out: 1553 *levelp = level; 1554 return ret; 1555 1556 /* error */ 1557 err_out_curr_node: 1558 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1559 err_out_child_node: 1560 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1561 brelse(path[level].bp_sib_bh); 1562 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1563 } 1564 *levelp = level; 1565 stats->bs_nblocks = 0; 1566 return ret; 1567 } 1568 1569 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1570 struct nilfs_btree_path *path, 1571 int maxlevel, struct inode *dat) 1572 { 1573 int level; 1574 1575 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1576 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1577 path[level].bp_op(btree, path, level, NULL, NULL); 1578 } 1579 1580 if (!nilfs_bmap_dirty(btree)) 1581 nilfs_bmap_set_dirty(btree); 1582 } 1583 1584 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1585 1586 { 1587 struct nilfs_btree_path *path; 1588 struct nilfs_bmap_stats stats; 1589 struct inode *dat; 1590 int level, ret; 1591 1592 path = nilfs_btree_alloc_path(); 1593 if (path == NULL) 1594 return -ENOMEM; 1595 1596 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1597 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1598 if (ret < 0) 1599 goto out; 1600 1601 1602 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1603 1604 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1605 if (ret < 0) 1606 goto out; 1607 nilfs_btree_commit_delete(btree, path, level, dat); 1608 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1609 1610 out: 1611 nilfs_btree_free_path(path); 1612 return ret; 1613 } 1614 1615 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1616 __u64 *keyp) 1617 { 1618 struct nilfs_btree_path *path; 1619 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1620 int ret; 1621 1622 path = nilfs_btree_alloc_path(); 1623 if (!path) 1624 return -ENOMEM; 1625 1626 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1627 if (!ret) 1628 *keyp = start; 1629 else if (ret == -ENOENT) 1630 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1631 1632 nilfs_btree_free_path(path); 1633 return ret; 1634 } 1635 1636 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1637 { 1638 struct nilfs_btree_path *path; 1639 int ret; 1640 1641 path = nilfs_btree_alloc_path(); 1642 if (path == NULL) 1643 return -ENOMEM; 1644 1645 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1646 1647 nilfs_btree_free_path(path); 1648 1649 return ret; 1650 } 1651 1652 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1653 { 1654 struct buffer_head *bh; 1655 struct nilfs_btree_node *root, *node; 1656 __u64 maxkey, nextmaxkey; 1657 __u64 ptr; 1658 int nchildren, ret; 1659 1660 root = nilfs_btree_get_root(btree); 1661 nchildren = nilfs_btree_node_get_nchildren(root); 1662 if (unlikely(nchildren == 0)) 1663 return 0; 1664 1665 switch (nilfs_btree_height(btree)) { 1666 case 2: 1667 bh = NULL; 1668 node = root; 1669 break; 1670 case 3: 1671 if (nchildren > 1) 1672 return 0; 1673 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1674 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1675 ret = nilfs_btree_get_block(btree, ptr, &bh); 1676 if (ret < 0) 1677 return ret; 1678 node = (struct nilfs_btree_node *)bh->b_data; 1679 nchildren = nilfs_btree_node_get_nchildren(node); 1680 break; 1681 default: 1682 return 0; 1683 } 1684 1685 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1686 nextmaxkey = (nchildren > 1) ? 1687 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1688 brelse(bh); 1689 1690 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1691 } 1692 1693 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1694 __u64 *keys, __u64 *ptrs, int nitems) 1695 { 1696 struct buffer_head *bh; 1697 struct nilfs_btree_node *node, *root; 1698 __le64 *dkeys; 1699 __le64 *dptrs; 1700 __u64 ptr; 1701 int nchildren, ncmax, i, ret; 1702 1703 root = nilfs_btree_get_root(btree); 1704 switch (nilfs_btree_height(btree)) { 1705 case 2: 1706 bh = NULL; 1707 node = root; 1708 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1709 break; 1710 case 3: 1711 nchildren = nilfs_btree_node_get_nchildren(root); 1712 WARN_ON(nchildren > 1); 1713 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1714 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1715 ret = nilfs_btree_get_block(btree, ptr, &bh); 1716 if (ret < 0) 1717 return ret; 1718 node = (struct nilfs_btree_node *)bh->b_data; 1719 ncmax = nilfs_btree_nchildren_per_block(btree); 1720 break; 1721 default: 1722 node = NULL; 1723 return -EINVAL; 1724 } 1725 1726 nchildren = nilfs_btree_node_get_nchildren(node); 1727 if (nchildren < nitems) 1728 nitems = nchildren; 1729 dkeys = nilfs_btree_node_dkeys(node); 1730 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1731 for (i = 0; i < nitems; i++) { 1732 keys[i] = le64_to_cpu(dkeys[i]); 1733 ptrs[i] = le64_to_cpu(dptrs[i]); 1734 } 1735 1736 brelse(bh); 1737 1738 return nitems; 1739 } 1740 1741 static int 1742 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1743 union nilfs_bmap_ptr_req *dreq, 1744 union nilfs_bmap_ptr_req *nreq, 1745 struct buffer_head **bhp, 1746 struct nilfs_bmap_stats *stats) 1747 { 1748 struct buffer_head *bh; 1749 struct inode *dat = NULL; 1750 int ret; 1751 1752 stats->bs_nblocks = 0; 1753 1754 /* for data */ 1755 /* cannot find near ptr */ 1756 if (NILFS_BMAP_USE_VBN(btree)) { 1757 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1758 dat = nilfs_bmap_get_dat(btree); 1759 } 1760 1761 ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); 1762 if (ret < 0) 1763 return ret; 1764 1765 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1766 if (ret < 0) 1767 return ret; 1768 1769 *bhp = NULL; 1770 stats->bs_nblocks++; 1771 if (nreq != NULL) { 1772 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1773 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1774 if (ret < 0) 1775 goto err_out_dreq; 1776 1777 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1778 if (ret < 0) 1779 goto err_out_nreq; 1780 1781 *bhp = bh; 1782 stats->bs_nblocks++; 1783 } 1784 1785 /* success */ 1786 return 0; 1787 1788 /* error */ 1789 err_out_nreq: 1790 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1791 err_out_dreq: 1792 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1793 stats->bs_nblocks = 0; 1794 return ret; 1795 1796 } 1797 1798 static void 1799 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1800 __u64 key, __u64 ptr, 1801 const __u64 *keys, const __u64 *ptrs, 1802 int n, 1803 union nilfs_bmap_ptr_req *dreq, 1804 union nilfs_bmap_ptr_req *nreq, 1805 struct buffer_head *bh) 1806 { 1807 struct nilfs_btree_node *node; 1808 struct inode *dat; 1809 __u64 tmpptr; 1810 int ncblk; 1811 1812 /* free resources */ 1813 if (btree->b_ops->bop_clear != NULL) 1814 btree->b_ops->bop_clear(btree); 1815 1816 /* ptr must be a pointer to a buffer head. */ 1817 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1818 1819 /* convert and insert */ 1820 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1821 __nilfs_btree_init(btree); 1822 if (nreq != NULL) { 1823 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1824 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1825 1826 /* create child node at level 1 */ 1827 node = (struct nilfs_btree_node *)bh->b_data; 1828 ncblk = nilfs_btree_nchildren_per_block(btree); 1829 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1830 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1831 if (!buffer_dirty(bh)) 1832 mark_buffer_dirty(bh); 1833 if (!nilfs_bmap_dirty(btree)) 1834 nilfs_bmap_set_dirty(btree); 1835 1836 brelse(bh); 1837 1838 /* create root node at level 2 */ 1839 node = nilfs_btree_get_root(btree); 1840 tmpptr = nreq->bpr_ptr; 1841 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1842 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1843 &keys[0], &tmpptr); 1844 } else { 1845 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1846 1847 /* create root node at level 1 */ 1848 node = nilfs_btree_get_root(btree); 1849 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1850 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1851 keys, ptrs); 1852 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1853 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1854 if (!nilfs_bmap_dirty(btree)) 1855 nilfs_bmap_set_dirty(btree); 1856 } 1857 1858 if (NILFS_BMAP_USE_VBN(btree)) 1859 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1860 } 1861 1862 /** 1863 * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree 1864 * @btree: NILFS B-tree structure 1865 * @key: Key of the new entry to be inserted 1866 * @ptr: Pointer (block number) associated with the key to be inserted 1867 * @keys: Array of keys to be inserted in addition to @key 1868 * @ptrs: Array of pointers associated with @keys 1869 * @n: Number of keys and pointers in @keys and @ptrs 1870 * 1871 * This function is used to insert a new entry specified by @key and @ptr, 1872 * along with additional entries specified by @keys and @ptrs arrays, into a 1873 * NILFS B-tree. 1874 * It prepares the necessary changes by allocating the required blocks and any 1875 * necessary intermediate nodes. It converts configurations from other forms of 1876 * block mapping (the one that currently exists is direct mapping) to a B-tree. 1877 * 1878 * Return: 0 on success or a negative error code on failure. 1879 */ 1880 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1881 __u64 key, __u64 ptr, 1882 const __u64 *keys, const __u64 *ptrs, int n) 1883 { 1884 struct buffer_head *bh = NULL; 1885 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1886 struct nilfs_bmap_stats stats; 1887 int ret; 1888 1889 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1890 di = &dreq; 1891 ni = NULL; 1892 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1893 nilfs_btree_node_size(btree))) { 1894 di = &dreq; 1895 ni = &nreq; 1896 } else { 1897 di = NULL; 1898 ni = NULL; 1899 BUG(); 1900 } 1901 1902 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1903 &stats); 1904 if (ret < 0) 1905 return ret; 1906 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1907 di, ni, bh); 1908 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1909 return 0; 1910 } 1911 1912 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1913 struct nilfs_btree_path *path, 1914 int level, 1915 struct buffer_head *bh) 1916 { 1917 while ((++level < nilfs_btree_height(btree) - 1) && 1918 !buffer_dirty(path[level].bp_bh)) 1919 mark_buffer_dirty(path[level].bp_bh); 1920 1921 return 0; 1922 } 1923 1924 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1925 struct nilfs_btree_path *path, 1926 int level, struct inode *dat) 1927 { 1928 struct nilfs_btree_node *parent; 1929 int ncmax, ret; 1930 1931 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1932 path[level].bp_oldreq.bpr_ptr = 1933 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1934 ncmax); 1935 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1936 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1937 &path[level].bp_newreq.bpr_req); 1938 if (ret < 0) 1939 return ret; 1940 1941 if (buffer_nilfs_node(path[level].bp_bh)) { 1942 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1943 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1944 path[level].bp_ctxt.bh = path[level].bp_bh; 1945 ret = nilfs_btnode_prepare_change_key( 1946 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1947 &path[level].bp_ctxt); 1948 if (ret < 0) { 1949 nilfs_dat_abort_update(dat, 1950 &path[level].bp_oldreq.bpr_req, 1951 &path[level].bp_newreq.bpr_req); 1952 return ret; 1953 } 1954 } 1955 1956 return 0; 1957 } 1958 1959 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1960 struct nilfs_btree_path *path, 1961 int level, struct inode *dat) 1962 { 1963 struct nilfs_btree_node *parent; 1964 int ncmax; 1965 1966 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1967 &path[level].bp_newreq.bpr_req, 1968 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1969 1970 if (buffer_nilfs_node(path[level].bp_bh)) { 1971 nilfs_btnode_commit_change_key( 1972 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1973 &path[level].bp_ctxt); 1974 path[level].bp_bh = path[level].bp_ctxt.bh; 1975 } 1976 set_buffer_nilfs_volatile(path[level].bp_bh); 1977 1978 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1979 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1980 path[level].bp_newreq.bpr_ptr, ncmax); 1981 } 1982 1983 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1984 struct nilfs_btree_path *path, 1985 int level, struct inode *dat) 1986 { 1987 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1988 &path[level].bp_newreq.bpr_req); 1989 if (buffer_nilfs_node(path[level].bp_bh)) 1990 nilfs_btnode_abort_change_key( 1991 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1992 &path[level].bp_ctxt); 1993 } 1994 1995 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1996 struct nilfs_btree_path *path, 1997 int minlevel, int *maxlevelp, 1998 struct inode *dat) 1999 { 2000 int level, ret; 2001 2002 level = minlevel; 2003 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 2004 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 2005 if (ret < 0) 2006 return ret; 2007 } 2008 while ((++level < nilfs_btree_height(btree) - 1) && 2009 !buffer_dirty(path[level].bp_bh)) { 2010 2011 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 2012 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 2013 if (ret < 0) 2014 goto out; 2015 } 2016 2017 /* success */ 2018 *maxlevelp = level - 1; 2019 return 0; 2020 2021 /* error */ 2022 out: 2023 while (--level > minlevel) 2024 nilfs_btree_abort_update_v(btree, path, level, dat); 2025 if (!buffer_nilfs_volatile(path[level].bp_bh)) 2026 nilfs_btree_abort_update_v(btree, path, level, dat); 2027 return ret; 2028 } 2029 2030 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2031 struct nilfs_btree_path *path, 2032 int minlevel, int maxlevel, 2033 struct buffer_head *bh, 2034 struct inode *dat) 2035 { 2036 int level; 2037 2038 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2039 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2040 2041 for (level = minlevel + 1; level <= maxlevel; level++) 2042 nilfs_btree_commit_update_v(btree, path, level, dat); 2043 } 2044 2045 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2046 struct nilfs_btree_path *path, 2047 int level, struct buffer_head *bh) 2048 { 2049 int maxlevel = 0, ret; 2050 struct nilfs_btree_node *parent; 2051 struct inode *dat = nilfs_bmap_get_dat(btree); 2052 __u64 ptr; 2053 int ncmax; 2054 2055 get_bh(bh); 2056 path[level].bp_bh = bh; 2057 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2058 dat); 2059 if (ret < 0) 2060 goto out; 2061 2062 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2063 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2064 ptr = nilfs_btree_node_get_ptr(parent, 2065 path[level + 1].bp_index, 2066 ncmax); 2067 ret = nilfs_dat_mark_dirty(dat, ptr); 2068 if (ret < 0) 2069 goto out; 2070 } 2071 2072 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2073 2074 out: 2075 brelse(path[level].bp_bh); 2076 path[level].bp_bh = NULL; 2077 return ret; 2078 } 2079 2080 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2081 struct buffer_head *bh) 2082 { 2083 struct nilfs_btree_path *path; 2084 struct nilfs_btree_node *node; 2085 __u64 key; 2086 int level, ret; 2087 2088 WARN_ON(!buffer_dirty(bh)); 2089 2090 path = nilfs_btree_alloc_path(); 2091 if (path == NULL) 2092 return -ENOMEM; 2093 2094 if (buffer_nilfs_node(bh)) { 2095 node = (struct nilfs_btree_node *)bh->b_data; 2096 key = nilfs_btree_node_get_key(node, 0); 2097 level = nilfs_btree_node_get_level(node); 2098 } else { 2099 key = nilfs_bmap_data_get_key(btree, bh); 2100 level = NILFS_BTREE_LEVEL_DATA; 2101 } 2102 2103 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2104 if (ret < 0) { 2105 if (unlikely(ret == -ENOENT)) 2106 nilfs_crit(btree->b_inode->i_sb, 2107 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2108 btree->b_inode->i_ino, 2109 (unsigned long long)key, level); 2110 goto out; 2111 } 2112 2113 ret = NILFS_BMAP_USE_VBN(btree) ? 2114 nilfs_btree_propagate_v(btree, path, level, bh) : 2115 nilfs_btree_propagate_p(btree, path, level, bh); 2116 2117 out: 2118 nilfs_btree_free_path(path); 2119 2120 return ret; 2121 } 2122 2123 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2124 struct buffer_head *bh) 2125 { 2126 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2127 } 2128 2129 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2130 struct list_head *lists, 2131 struct buffer_head *bh) 2132 { 2133 struct list_head *head; 2134 struct buffer_head *cbh; 2135 struct nilfs_btree_node *node, *cnode; 2136 __u64 key, ckey; 2137 int level; 2138 2139 get_bh(bh); 2140 node = (struct nilfs_btree_node *)bh->b_data; 2141 key = nilfs_btree_node_get_key(node, 0); 2142 level = nilfs_btree_node_get_level(node); 2143 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2144 level >= NILFS_BTREE_LEVEL_MAX) { 2145 dump_stack(); 2146 nilfs_warn(btree->b_inode->i_sb, 2147 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2148 level, (unsigned long long)key, 2149 btree->b_inode->i_ino, 2150 (unsigned long long)bh->b_blocknr); 2151 return; 2152 } 2153 2154 list_for_each(head, &lists[level]) { 2155 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2156 cnode = (struct nilfs_btree_node *)cbh->b_data; 2157 ckey = nilfs_btree_node_get_key(cnode, 0); 2158 if (key < ckey) 2159 break; 2160 } 2161 list_add_tail(&bh->b_assoc_buffers, head); 2162 } 2163 2164 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2165 struct list_head *listp) 2166 { 2167 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 2168 struct address_space *btcache = btnc_inode->i_mapping; 2169 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2170 struct folio_batch fbatch; 2171 struct buffer_head *bh, *head; 2172 pgoff_t index = 0; 2173 int level, i; 2174 2175 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2176 level < NILFS_BTREE_LEVEL_MAX; 2177 level++) 2178 INIT_LIST_HEAD(&lists[level]); 2179 2180 folio_batch_init(&fbatch); 2181 2182 while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, 2183 PAGECACHE_TAG_DIRTY, &fbatch)) { 2184 for (i = 0; i < folio_batch_count(&fbatch); i++) { 2185 bh = head = folio_buffers(fbatch.folios[i]); 2186 do { 2187 if (buffer_dirty(bh)) 2188 nilfs_btree_add_dirty_buffer(btree, 2189 lists, bh); 2190 } while ((bh = bh->b_this_page) != head); 2191 } 2192 folio_batch_release(&fbatch); 2193 cond_resched(); 2194 } 2195 2196 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2197 level < NILFS_BTREE_LEVEL_MAX; 2198 level++) 2199 list_splice_tail(&lists[level], listp); 2200 } 2201 2202 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2203 struct nilfs_btree_path *path, 2204 int level, 2205 struct buffer_head **bh, 2206 sector_t blocknr, 2207 union nilfs_binfo *binfo) 2208 { 2209 struct nilfs_btree_node *parent; 2210 __u64 key; 2211 __u64 ptr; 2212 int ncmax, ret; 2213 2214 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2215 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2216 ncmax); 2217 if (buffer_nilfs_node(*bh)) { 2218 path[level].bp_ctxt.oldkey = ptr; 2219 path[level].bp_ctxt.newkey = blocknr; 2220 path[level].bp_ctxt.bh = *bh; 2221 ret = nilfs_btnode_prepare_change_key( 2222 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2223 &path[level].bp_ctxt); 2224 if (ret < 0) 2225 return ret; 2226 nilfs_btnode_commit_change_key( 2227 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2228 &path[level].bp_ctxt); 2229 *bh = path[level].bp_ctxt.bh; 2230 } 2231 2232 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2233 ncmax); 2234 2235 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2236 /* on-disk format */ 2237 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2238 binfo->bi_dat.bi_level = level; 2239 memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); 2240 2241 return 0; 2242 } 2243 2244 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2245 struct nilfs_btree_path *path, 2246 int level, 2247 struct buffer_head **bh, 2248 sector_t blocknr, 2249 union nilfs_binfo *binfo) 2250 { 2251 struct nilfs_btree_node *parent; 2252 struct inode *dat = nilfs_bmap_get_dat(btree); 2253 __u64 key; 2254 __u64 ptr; 2255 union nilfs_bmap_ptr_req req; 2256 int ncmax, ret; 2257 2258 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2259 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2260 ncmax); 2261 req.bpr_ptr = ptr; 2262 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2263 if (ret < 0) 2264 return ret; 2265 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2266 2267 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2268 /* on-disk format */ 2269 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2270 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2271 2272 return 0; 2273 } 2274 2275 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2276 struct buffer_head **bh, 2277 sector_t blocknr, 2278 union nilfs_binfo *binfo) 2279 { 2280 struct nilfs_btree_path *path; 2281 struct nilfs_btree_node *node; 2282 __u64 key; 2283 int level, ret; 2284 2285 path = nilfs_btree_alloc_path(); 2286 if (path == NULL) 2287 return -ENOMEM; 2288 2289 if (buffer_nilfs_node(*bh)) { 2290 node = (struct nilfs_btree_node *)(*bh)->b_data; 2291 key = nilfs_btree_node_get_key(node, 0); 2292 level = nilfs_btree_node_get_level(node); 2293 } else { 2294 key = nilfs_bmap_data_get_key(btree, *bh); 2295 level = NILFS_BTREE_LEVEL_DATA; 2296 } 2297 2298 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2299 if (ret < 0) { 2300 WARN_ON(ret == -ENOENT); 2301 goto out; 2302 } 2303 2304 ret = NILFS_BMAP_USE_VBN(btree) ? 2305 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2306 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2307 2308 out: 2309 nilfs_btree_free_path(path); 2310 2311 return ret; 2312 } 2313 2314 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2315 struct buffer_head **bh, 2316 sector_t blocknr, 2317 union nilfs_binfo *binfo) 2318 { 2319 struct nilfs_btree_node *node; 2320 __u64 key; 2321 int ret; 2322 2323 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2324 blocknr); 2325 if (ret < 0) 2326 return ret; 2327 2328 if (buffer_nilfs_node(*bh)) { 2329 node = (struct nilfs_btree_node *)(*bh)->b_data; 2330 key = nilfs_btree_node_get_key(node, 0); 2331 } else 2332 key = nilfs_bmap_data_get_key(btree, *bh); 2333 2334 /* on-disk format */ 2335 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2336 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2337 2338 return 0; 2339 } 2340 2341 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2342 { 2343 struct buffer_head *bh; 2344 struct nilfs_btree_path *path; 2345 __u64 ptr; 2346 int ret; 2347 2348 path = nilfs_btree_alloc_path(); 2349 if (path == NULL) 2350 return -ENOMEM; 2351 2352 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2353 if (ret < 0) { 2354 WARN_ON(ret == -ENOENT); 2355 goto out; 2356 } 2357 ret = nilfs_btree_get_block(btree, ptr, &bh); 2358 if (ret < 0) { 2359 WARN_ON(ret == -ENOENT); 2360 goto out; 2361 } 2362 2363 if (!buffer_dirty(bh)) 2364 mark_buffer_dirty(bh); 2365 brelse(bh); 2366 if (!nilfs_bmap_dirty(btree)) 2367 nilfs_bmap_set_dirty(btree); 2368 2369 out: 2370 nilfs_btree_free_path(path); 2371 return ret; 2372 } 2373 2374 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2375 .bop_lookup = nilfs_btree_lookup, 2376 .bop_lookup_contig = nilfs_btree_lookup_contig, 2377 .bop_insert = nilfs_btree_insert, 2378 .bop_delete = nilfs_btree_delete, 2379 .bop_clear = NULL, 2380 2381 .bop_propagate = nilfs_btree_propagate, 2382 2383 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2384 2385 .bop_assign = nilfs_btree_assign, 2386 .bop_mark = nilfs_btree_mark, 2387 2388 .bop_seek_key = nilfs_btree_seek_key, 2389 .bop_last_key = nilfs_btree_last_key, 2390 2391 .bop_check_insert = NULL, 2392 .bop_check_delete = nilfs_btree_check_delete, 2393 .bop_gather_data = nilfs_btree_gather_data, 2394 }; 2395 2396 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2397 .bop_lookup = NULL, 2398 .bop_lookup_contig = NULL, 2399 .bop_insert = NULL, 2400 .bop_delete = NULL, 2401 .bop_clear = NULL, 2402 2403 .bop_propagate = nilfs_btree_propagate_gc, 2404 2405 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2406 2407 .bop_assign = nilfs_btree_assign_gc, 2408 .bop_mark = NULL, 2409 2410 .bop_seek_key = NULL, 2411 .bop_last_key = NULL, 2412 2413 .bop_check_insert = NULL, 2414 .bop_check_delete = NULL, 2415 .bop_gather_data = NULL, 2416 }; 2417 2418 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2419 { 2420 bmap->b_ops = &nilfs_btree_ops; 2421 bmap->b_nchildren_per_block = 2422 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2423 } 2424 2425 int nilfs_btree_init(struct nilfs_bmap *bmap) 2426 { 2427 int ret = 0; 2428 2429 __nilfs_btree_init(bmap); 2430 2431 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2432 ret = -EIO; 2433 else 2434 ret = nilfs_attach_btree_node_cache( 2435 &NILFS_BMAP_I(bmap)->vfs_inode); 2436 2437 return ret; 2438 } 2439 2440 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2441 { 2442 bmap->b_ops = &nilfs_btree_ops_gc; 2443 bmap->b_nchildren_per_block = 2444 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2445 } 2446