1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 62 struct address_space *btnc = btnc_inode->i_mapping; 63 struct buffer_head *bh; 64 65 bh = nilfs_btnode_create_block(btnc, ptr); 66 if (!bh) 67 return -ENOMEM; 68 69 set_buffer_nilfs_volatile(bh); 70 *bhp = bh; 71 return 0; 72 } 73 74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 75 { 76 return node->bn_flags; 77 } 78 79 static void 80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 81 { 82 node->bn_flags = flags; 83 } 84 85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 86 { 87 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 88 } 89 90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 91 { 92 return node->bn_level; 93 } 94 95 static void 96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 97 { 98 node->bn_level = level; 99 } 100 101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 102 { 103 return le16_to_cpu(node->bn_nchildren); 104 } 105 106 static void 107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 108 { 109 node->bn_nchildren = cpu_to_le16(nchildren); 110 } 111 112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 113 { 114 return i_blocksize(btree->b_inode); 115 } 116 117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 118 { 119 return btree->b_nchildren_per_block; 120 } 121 122 static __le64 * 123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 124 { 125 return (__le64 *)((char *)(node + 1) + 126 (nilfs_btree_node_root(node) ? 127 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 128 } 129 130 static __le64 * 131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 132 { 133 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 134 } 135 136 static __u64 137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 138 { 139 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 140 } 141 142 static void 143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 144 { 145 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 146 } 147 148 static __u64 149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 150 int ncmax) 151 { 152 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 153 } 154 155 static void 156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 157 int ncmax) 158 { 159 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 160 } 161 162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 163 int level, int nchildren, int ncmax, 164 const __u64 *keys, const __u64 *ptrs) 165 { 166 __le64 *dkeys; 167 __le64 *dptrs; 168 int i; 169 170 nilfs_btree_node_set_flags(node, flags); 171 nilfs_btree_node_set_level(node, level); 172 nilfs_btree_node_set_nchildren(node, nchildren); 173 174 dkeys = nilfs_btree_node_dkeys(node); 175 dptrs = nilfs_btree_node_dptrs(node, ncmax); 176 for (i = 0; i < nchildren; i++) { 177 dkeys[i] = cpu_to_le64(keys[i]); 178 dptrs[i] = cpu_to_le64(ptrs[i]); 179 } 180 } 181 182 /* Assume the buffer heads corresponding to left and right are locked. */ 183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 184 struct nilfs_btree_node *right, 185 int n, int lncmax, int rncmax) 186 { 187 __le64 *ldkeys, *rdkeys; 188 __le64 *ldptrs, *rdptrs; 189 int lnchildren, rnchildren; 190 191 ldkeys = nilfs_btree_node_dkeys(left); 192 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 193 lnchildren = nilfs_btree_node_get_nchildren(left); 194 195 rdkeys = nilfs_btree_node_dkeys(right); 196 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 197 rnchildren = nilfs_btree_node_get_nchildren(right); 198 199 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 200 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 201 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 202 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 203 204 lnchildren += n; 205 rnchildren -= n; 206 nilfs_btree_node_set_nchildren(left, lnchildren); 207 nilfs_btree_node_set_nchildren(right, rnchildren); 208 } 209 210 /* Assume that the buffer heads corresponding to left and right are locked. */ 211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 212 struct nilfs_btree_node *right, 213 int n, int lncmax, int rncmax) 214 { 215 __le64 *ldkeys, *rdkeys; 216 __le64 *ldptrs, *rdptrs; 217 int lnchildren, rnchildren; 218 219 ldkeys = nilfs_btree_node_dkeys(left); 220 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 221 lnchildren = nilfs_btree_node_get_nchildren(left); 222 223 rdkeys = nilfs_btree_node_dkeys(right); 224 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 225 rnchildren = nilfs_btree_node_get_nchildren(right); 226 227 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 228 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 229 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 230 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 231 232 lnchildren -= n; 233 rnchildren += n; 234 nilfs_btree_node_set_nchildren(left, lnchildren); 235 nilfs_btree_node_set_nchildren(right, rnchildren); 236 } 237 238 /* Assume that the buffer head corresponding to node is locked. */ 239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 240 __u64 key, __u64 ptr, int ncmax) 241 { 242 __le64 *dkeys; 243 __le64 *dptrs; 244 int nchildren; 245 246 dkeys = nilfs_btree_node_dkeys(node); 247 dptrs = nilfs_btree_node_dptrs(node, ncmax); 248 nchildren = nilfs_btree_node_get_nchildren(node); 249 if (index < nchildren) { 250 memmove(dkeys + index + 1, dkeys + index, 251 (nchildren - index) * sizeof(*dkeys)); 252 memmove(dptrs + index + 1, dptrs + index, 253 (nchildren - index) * sizeof(*dptrs)); 254 } 255 dkeys[index] = cpu_to_le64(key); 256 dptrs[index] = cpu_to_le64(ptr); 257 nchildren++; 258 nilfs_btree_node_set_nchildren(node, nchildren); 259 } 260 261 /* Assume that the buffer head corresponding to node is locked. */ 262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 263 __u64 *keyp, __u64 *ptrp, int ncmax) 264 { 265 __u64 key; 266 __u64 ptr; 267 __le64 *dkeys; 268 __le64 *dptrs; 269 int nchildren; 270 271 dkeys = nilfs_btree_node_dkeys(node); 272 dptrs = nilfs_btree_node_dptrs(node, ncmax); 273 key = le64_to_cpu(dkeys[index]); 274 ptr = le64_to_cpu(dptrs[index]); 275 nchildren = nilfs_btree_node_get_nchildren(node); 276 if (keyp != NULL) 277 *keyp = key; 278 if (ptrp != NULL) 279 *ptrp = ptr; 280 281 if (index < nchildren - 1) { 282 memmove(dkeys + index, dkeys + index + 1, 283 (nchildren - index - 1) * sizeof(*dkeys)); 284 memmove(dptrs + index, dptrs + index + 1, 285 (nchildren - index - 1) * sizeof(*dptrs)); 286 } 287 nchildren--; 288 nilfs_btree_node_set_nchildren(node, nchildren); 289 } 290 291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 292 __u64 key, int *indexp) 293 { 294 __u64 nkey; 295 int index, low, high, s; 296 297 /* binary search */ 298 low = 0; 299 high = nilfs_btree_node_get_nchildren(node) - 1; 300 index = 0; 301 s = 0; 302 while (low <= high) { 303 index = (low + high) / 2; 304 nkey = nilfs_btree_node_get_key(node, index); 305 if (nkey == key) { 306 s = 0; 307 goto out; 308 } else if (nkey < key) { 309 low = index + 1; 310 s = -1; 311 } else { 312 high = index - 1; 313 s = 1; 314 } 315 } 316 317 /* adjust index */ 318 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 319 if (s > 0 && index > 0) 320 index--; 321 } else if (s < 0) 322 index++; 323 324 out: 325 *indexp = index; 326 327 return s == 0; 328 } 329 330 /** 331 * nilfs_btree_node_broken - verify consistency of btree node 332 * @node: btree node block to be examined 333 * @size: node size (in bytes) 334 * @inode: host inode of btree 335 * @blocknr: block number 336 * 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 338 */ 339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 340 size_t size, struct inode *inode, 341 sector_t blocknr) 342 { 343 int level, flags, nchildren; 344 int ret = 0; 345 346 level = nilfs_btree_node_get_level(node); 347 flags = nilfs_btree_node_get_flags(node); 348 nchildren = nilfs_btree_node_get_nchildren(node); 349 350 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 351 level >= NILFS_BTREE_LEVEL_MAX || 352 (flags & NILFS_BTREE_NODE_ROOT) || 353 nchildren < 0 || 354 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 355 nilfs_crit(inode->i_sb, 356 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 357 inode->i_ino, (unsigned long long)blocknr, level, 358 flags, nchildren); 359 ret = 1; 360 } 361 return ret; 362 } 363 364 /** 365 * nilfs_btree_root_broken - verify consistency of btree root node 366 * @node: btree root node to be examined 367 * @inode: host inode of btree 368 * 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 370 */ 371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 372 struct inode *inode) 373 { 374 int level, flags, nchildren; 375 int ret = 0; 376 377 level = nilfs_btree_node_get_level(node); 378 flags = nilfs_btree_node_get_flags(node); 379 nchildren = nilfs_btree_node_get_nchildren(node); 380 381 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 382 level >= NILFS_BTREE_LEVEL_MAX || 383 nchildren < 0 || 384 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 385 nilfs_crit(inode->i_sb, 386 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 387 inode->i_ino, level, flags, nchildren); 388 ret = 1; 389 } 390 return ret; 391 } 392 393 int nilfs_btree_broken_node_block(struct buffer_head *bh) 394 { 395 struct inode *inode; 396 int ret; 397 398 if (buffer_nilfs_checked(bh)) 399 return 0; 400 401 inode = bh->b_page->mapping->host; 402 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 403 bh->b_size, inode, bh->b_blocknr); 404 if (likely(!ret)) 405 set_buffer_nilfs_checked(bh); 406 return ret; 407 } 408 409 static struct nilfs_btree_node * 410 nilfs_btree_get_root(const struct nilfs_bmap *btree) 411 { 412 return (struct nilfs_btree_node *)btree->b_u.u_data; 413 } 414 415 static struct nilfs_btree_node * 416 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 417 { 418 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 419 } 420 421 static struct nilfs_btree_node * 422 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 423 { 424 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 425 } 426 427 static int nilfs_btree_height(const struct nilfs_bmap *btree) 428 { 429 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 430 } 431 432 static struct nilfs_btree_node * 433 nilfs_btree_get_node(const struct nilfs_bmap *btree, 434 const struct nilfs_btree_path *path, 435 int level, int *ncmaxp) 436 { 437 struct nilfs_btree_node *node; 438 439 if (level == nilfs_btree_height(btree) - 1) { 440 node = nilfs_btree_get_root(btree); 441 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 442 } else { 443 node = nilfs_btree_get_nonroot_node(path, level); 444 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 445 } 446 return node; 447 } 448 449 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 450 struct nilfs_btree_node *node, int level) 451 { 452 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 453 dump_stack(); 454 nilfs_crit(btree->b_inode->i_sb, 455 "btree level mismatch (ino=%lu): %d != %d", 456 btree->b_inode->i_ino, 457 nilfs_btree_node_get_level(node), level); 458 return 1; 459 } 460 return 0; 461 } 462 463 struct nilfs_btree_readahead_info { 464 struct nilfs_btree_node *node; /* parent node */ 465 int max_ra_blocks; /* max nof blocks to read ahead */ 466 int index; /* current index on the parent node */ 467 int ncmax; /* nof children in the parent node */ 468 }; 469 470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 471 struct buffer_head **bhp, 472 const struct nilfs_btree_readahead_info *ra) 473 { 474 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 475 struct address_space *btnc = btnc_inode->i_mapping; 476 struct buffer_head *bh, *ra_bh; 477 sector_t submit_ptr = 0; 478 int ret; 479 480 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, 481 &submit_ptr); 482 if (ret) { 483 if (ret != -EEXIST) 484 return ret; 485 goto out_check; 486 } 487 488 if (ra) { 489 int i, n; 490 __u64 ptr2; 491 492 /* read ahead sibling nodes */ 493 for (n = ra->max_ra_blocks, i = ra->index + 1; 494 n > 0 && i < ra->ncmax; n--, i++) { 495 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 496 497 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 498 REQ_OP_READ, REQ_RAHEAD, 499 &ra_bh, &submit_ptr); 500 if (likely(!ret || ret == -EEXIST)) 501 brelse(ra_bh); 502 else if (ret != -EBUSY) 503 break; 504 if (!buffer_locked(bh)) 505 goto out_no_wait; 506 } 507 } 508 509 wait_on_buffer(bh); 510 511 out_no_wait: 512 if (!buffer_uptodate(bh)) { 513 nilfs_err(btree->b_inode->i_sb, 514 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 515 btree->b_inode->i_ino, (unsigned long long)ptr); 516 brelse(bh); 517 return -EIO; 518 } 519 520 out_check: 521 if (nilfs_btree_broken_node_block(bh)) { 522 clear_buffer_uptodate(bh); 523 brelse(bh); 524 return -EINVAL; 525 } 526 527 *bhp = bh; 528 return 0; 529 } 530 531 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 532 struct buffer_head **bhp) 533 { 534 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 535 } 536 537 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 538 struct nilfs_btree_path *path, 539 __u64 key, __u64 *ptrp, int minlevel, 540 int readahead) 541 { 542 struct nilfs_btree_node *node; 543 struct nilfs_btree_readahead_info p, *ra; 544 __u64 ptr; 545 int level, index, found, ncmax, ret; 546 547 node = nilfs_btree_get_root(btree); 548 level = nilfs_btree_node_get_level(node); 549 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 550 return -ENOENT; 551 552 found = nilfs_btree_node_lookup(node, key, &index); 553 ptr = nilfs_btree_node_get_ptr(node, index, 554 NILFS_BTREE_ROOT_NCHILDREN_MAX); 555 path[level].bp_bh = NULL; 556 path[level].bp_index = index; 557 558 ncmax = nilfs_btree_nchildren_per_block(btree); 559 560 while (--level >= minlevel) { 561 ra = NULL; 562 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 563 p.node = nilfs_btree_get_node(btree, path, level + 1, 564 &p.ncmax); 565 p.index = index; 566 p.max_ra_blocks = 7; 567 ra = &p; 568 } 569 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 570 ra); 571 if (ret < 0) 572 return ret; 573 574 node = nilfs_btree_get_nonroot_node(path, level); 575 if (nilfs_btree_bad_node(btree, node, level)) 576 return -EINVAL; 577 if (!found) 578 found = nilfs_btree_node_lookup(node, key, &index); 579 else 580 index = 0; 581 if (index < ncmax) { 582 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 583 } else { 584 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 585 /* insert */ 586 ptr = NILFS_BMAP_INVALID_PTR; 587 } 588 path[level].bp_index = index; 589 } 590 if (!found) 591 return -ENOENT; 592 593 if (ptrp != NULL) 594 *ptrp = ptr; 595 596 return 0; 597 } 598 599 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 600 struct nilfs_btree_path *path, 601 __u64 *keyp, __u64 *ptrp) 602 { 603 struct nilfs_btree_node *node; 604 __u64 ptr; 605 int index, level, ncmax, ret; 606 607 node = nilfs_btree_get_root(btree); 608 index = nilfs_btree_node_get_nchildren(node) - 1; 609 if (index < 0) 610 return -ENOENT; 611 level = nilfs_btree_node_get_level(node); 612 ptr = nilfs_btree_node_get_ptr(node, index, 613 NILFS_BTREE_ROOT_NCHILDREN_MAX); 614 path[level].bp_bh = NULL; 615 path[level].bp_index = index; 616 ncmax = nilfs_btree_nchildren_per_block(btree); 617 618 for (level--; level > 0; level--) { 619 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 620 if (ret < 0) 621 return ret; 622 node = nilfs_btree_get_nonroot_node(path, level); 623 if (nilfs_btree_bad_node(btree, node, level)) 624 return -EINVAL; 625 index = nilfs_btree_node_get_nchildren(node) - 1; 626 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 627 path[level].bp_index = index; 628 } 629 630 if (keyp != NULL) 631 *keyp = nilfs_btree_node_get_key(node, index); 632 if (ptrp != NULL) 633 *ptrp = ptr; 634 635 return 0; 636 } 637 638 /** 639 * nilfs_btree_get_next_key - get next valid key from btree path array 640 * @btree: bmap struct of btree 641 * @path: array of nilfs_btree_path struct 642 * @minlevel: start level 643 * @nextkey: place to store the next valid key 644 * 645 * Return Value: If a next key was found, 0 is returned. Otherwise, 646 * -ENOENT is returned. 647 */ 648 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 649 const struct nilfs_btree_path *path, 650 int minlevel, __u64 *nextkey) 651 { 652 struct nilfs_btree_node *node; 653 int maxlevel = nilfs_btree_height(btree) - 1; 654 int index, next_adj, level; 655 656 /* Next index is already set to bp_index for leaf nodes. */ 657 next_adj = 0; 658 for (level = minlevel; level <= maxlevel; level++) { 659 if (level == maxlevel) 660 node = nilfs_btree_get_root(btree); 661 else 662 node = nilfs_btree_get_nonroot_node(path, level); 663 664 index = path[level].bp_index + next_adj; 665 if (index < nilfs_btree_node_get_nchildren(node)) { 666 /* Next key is in this node */ 667 *nextkey = nilfs_btree_node_get_key(node, index); 668 return 0; 669 } 670 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 671 next_adj = 1; 672 } 673 return -ENOENT; 674 } 675 676 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 677 __u64 key, int level, __u64 *ptrp) 678 { 679 struct nilfs_btree_path *path; 680 int ret; 681 682 path = nilfs_btree_alloc_path(); 683 if (path == NULL) 684 return -ENOMEM; 685 686 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 687 688 nilfs_btree_free_path(path); 689 690 return ret; 691 } 692 693 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 694 __u64 key, __u64 *ptrp, 695 unsigned int maxblocks) 696 { 697 struct nilfs_btree_path *path; 698 struct nilfs_btree_node *node; 699 struct inode *dat = NULL; 700 __u64 ptr, ptr2; 701 sector_t blocknr; 702 int level = NILFS_BTREE_LEVEL_NODE_MIN; 703 int ret, cnt, index, maxlevel, ncmax; 704 struct nilfs_btree_readahead_info p; 705 706 path = nilfs_btree_alloc_path(); 707 if (path == NULL) 708 return -ENOMEM; 709 710 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 711 if (ret < 0) 712 goto out; 713 714 if (NILFS_BMAP_USE_VBN(btree)) { 715 dat = nilfs_bmap_get_dat(btree); 716 ret = nilfs_dat_translate(dat, ptr, &blocknr); 717 if (ret < 0) 718 goto out; 719 ptr = blocknr; 720 } 721 cnt = 1; 722 if (cnt == maxblocks) 723 goto end; 724 725 maxlevel = nilfs_btree_height(btree) - 1; 726 node = nilfs_btree_get_node(btree, path, level, &ncmax); 727 index = path[level].bp_index + 1; 728 for (;;) { 729 while (index < nilfs_btree_node_get_nchildren(node)) { 730 if (nilfs_btree_node_get_key(node, index) != 731 key + cnt) 732 goto end; 733 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 734 if (dat) { 735 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 736 if (ret < 0) 737 goto out; 738 ptr2 = blocknr; 739 } 740 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 741 goto end; 742 index++; 743 } 744 if (level == maxlevel) 745 break; 746 747 /* look-up right sibling node */ 748 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 749 p.index = path[level + 1].bp_index + 1; 750 p.max_ra_blocks = 7; 751 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 752 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 753 break; 754 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 755 path[level + 1].bp_index = p.index; 756 757 brelse(path[level].bp_bh); 758 path[level].bp_bh = NULL; 759 760 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 761 &p); 762 if (ret < 0) 763 goto out; 764 node = nilfs_btree_get_nonroot_node(path, level); 765 ncmax = nilfs_btree_nchildren_per_block(btree); 766 index = 0; 767 path[level].bp_index = index; 768 } 769 end: 770 *ptrp = ptr; 771 ret = cnt; 772 out: 773 nilfs_btree_free_path(path); 774 return ret; 775 } 776 777 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 778 struct nilfs_btree_path *path, 779 int level, __u64 key) 780 { 781 if (level < nilfs_btree_height(btree) - 1) { 782 do { 783 nilfs_btree_node_set_key( 784 nilfs_btree_get_nonroot_node(path, level), 785 path[level].bp_index, key); 786 if (!buffer_dirty(path[level].bp_bh)) 787 mark_buffer_dirty(path[level].bp_bh); 788 } while ((path[level].bp_index == 0) && 789 (++level < nilfs_btree_height(btree) - 1)); 790 } 791 792 /* root */ 793 if (level == nilfs_btree_height(btree) - 1) { 794 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 795 path[level].bp_index, key); 796 } 797 } 798 799 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 800 struct nilfs_btree_path *path, 801 int level, __u64 *keyp, __u64 *ptrp) 802 { 803 struct nilfs_btree_node *node; 804 int ncblk; 805 806 if (level < nilfs_btree_height(btree) - 1) { 807 node = nilfs_btree_get_nonroot_node(path, level); 808 ncblk = nilfs_btree_nchildren_per_block(btree); 809 nilfs_btree_node_insert(node, path[level].bp_index, 810 *keyp, *ptrp, ncblk); 811 if (!buffer_dirty(path[level].bp_bh)) 812 mark_buffer_dirty(path[level].bp_bh); 813 814 if (path[level].bp_index == 0) 815 nilfs_btree_promote_key(btree, path, level + 1, 816 nilfs_btree_node_get_key(node, 817 0)); 818 } else { 819 node = nilfs_btree_get_root(btree); 820 nilfs_btree_node_insert(node, path[level].bp_index, 821 *keyp, *ptrp, 822 NILFS_BTREE_ROOT_NCHILDREN_MAX); 823 } 824 } 825 826 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 827 struct nilfs_btree_path *path, 828 int level, __u64 *keyp, __u64 *ptrp) 829 { 830 struct nilfs_btree_node *node, *left; 831 int nchildren, lnchildren, n, move, ncblk; 832 833 node = nilfs_btree_get_nonroot_node(path, level); 834 left = nilfs_btree_get_sib_node(path, level); 835 nchildren = nilfs_btree_node_get_nchildren(node); 836 lnchildren = nilfs_btree_node_get_nchildren(left); 837 ncblk = nilfs_btree_nchildren_per_block(btree); 838 move = 0; 839 840 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 841 if (n > path[level].bp_index) { 842 /* move insert point */ 843 n--; 844 move = 1; 845 } 846 847 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 848 849 if (!buffer_dirty(path[level].bp_bh)) 850 mark_buffer_dirty(path[level].bp_bh); 851 if (!buffer_dirty(path[level].bp_sib_bh)) 852 mark_buffer_dirty(path[level].bp_sib_bh); 853 854 nilfs_btree_promote_key(btree, path, level + 1, 855 nilfs_btree_node_get_key(node, 0)); 856 857 if (move) { 858 brelse(path[level].bp_bh); 859 path[level].bp_bh = path[level].bp_sib_bh; 860 path[level].bp_sib_bh = NULL; 861 path[level].bp_index += lnchildren; 862 path[level + 1].bp_index--; 863 } else { 864 brelse(path[level].bp_sib_bh); 865 path[level].bp_sib_bh = NULL; 866 path[level].bp_index -= n; 867 } 868 869 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 870 } 871 872 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 873 struct nilfs_btree_path *path, 874 int level, __u64 *keyp, __u64 *ptrp) 875 { 876 struct nilfs_btree_node *node, *right; 877 int nchildren, rnchildren, n, move, ncblk; 878 879 node = nilfs_btree_get_nonroot_node(path, level); 880 right = nilfs_btree_get_sib_node(path, level); 881 nchildren = nilfs_btree_node_get_nchildren(node); 882 rnchildren = nilfs_btree_node_get_nchildren(right); 883 ncblk = nilfs_btree_nchildren_per_block(btree); 884 move = 0; 885 886 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 887 if (n > nchildren - path[level].bp_index) { 888 /* move insert point */ 889 n--; 890 move = 1; 891 } 892 893 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 894 895 if (!buffer_dirty(path[level].bp_bh)) 896 mark_buffer_dirty(path[level].bp_bh); 897 if (!buffer_dirty(path[level].bp_sib_bh)) 898 mark_buffer_dirty(path[level].bp_sib_bh); 899 900 path[level + 1].bp_index++; 901 nilfs_btree_promote_key(btree, path, level + 1, 902 nilfs_btree_node_get_key(right, 0)); 903 path[level + 1].bp_index--; 904 905 if (move) { 906 brelse(path[level].bp_bh); 907 path[level].bp_bh = path[level].bp_sib_bh; 908 path[level].bp_sib_bh = NULL; 909 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 910 path[level + 1].bp_index++; 911 } else { 912 brelse(path[level].bp_sib_bh); 913 path[level].bp_sib_bh = NULL; 914 } 915 916 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 917 } 918 919 static void nilfs_btree_split(struct nilfs_bmap *btree, 920 struct nilfs_btree_path *path, 921 int level, __u64 *keyp, __u64 *ptrp) 922 { 923 struct nilfs_btree_node *node, *right; 924 int nchildren, n, move, ncblk; 925 926 node = nilfs_btree_get_nonroot_node(path, level); 927 right = nilfs_btree_get_sib_node(path, level); 928 nchildren = nilfs_btree_node_get_nchildren(node); 929 ncblk = nilfs_btree_nchildren_per_block(btree); 930 move = 0; 931 932 n = (nchildren + 1) / 2; 933 if (n > nchildren - path[level].bp_index) { 934 n--; 935 move = 1; 936 } 937 938 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 939 940 if (!buffer_dirty(path[level].bp_bh)) 941 mark_buffer_dirty(path[level].bp_bh); 942 if (!buffer_dirty(path[level].bp_sib_bh)) 943 mark_buffer_dirty(path[level].bp_sib_bh); 944 945 if (move) { 946 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 947 nilfs_btree_node_insert(right, path[level].bp_index, 948 *keyp, *ptrp, ncblk); 949 950 *keyp = nilfs_btree_node_get_key(right, 0); 951 *ptrp = path[level].bp_newreq.bpr_ptr; 952 953 brelse(path[level].bp_bh); 954 path[level].bp_bh = path[level].bp_sib_bh; 955 path[level].bp_sib_bh = NULL; 956 } else { 957 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 958 959 *keyp = nilfs_btree_node_get_key(right, 0); 960 *ptrp = path[level].bp_newreq.bpr_ptr; 961 962 brelse(path[level].bp_sib_bh); 963 path[level].bp_sib_bh = NULL; 964 } 965 966 path[level + 1].bp_index++; 967 } 968 969 static void nilfs_btree_grow(struct nilfs_bmap *btree, 970 struct nilfs_btree_path *path, 971 int level, __u64 *keyp, __u64 *ptrp) 972 { 973 struct nilfs_btree_node *root, *child; 974 int n, ncblk; 975 976 root = nilfs_btree_get_root(btree); 977 child = nilfs_btree_get_sib_node(path, level); 978 ncblk = nilfs_btree_nchildren_per_block(btree); 979 980 n = nilfs_btree_node_get_nchildren(root); 981 982 nilfs_btree_node_move_right(root, child, n, 983 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 984 nilfs_btree_node_set_level(root, level + 1); 985 986 if (!buffer_dirty(path[level].bp_sib_bh)) 987 mark_buffer_dirty(path[level].bp_sib_bh); 988 989 path[level].bp_bh = path[level].bp_sib_bh; 990 path[level].bp_sib_bh = NULL; 991 992 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 993 994 *keyp = nilfs_btree_node_get_key(child, 0); 995 *ptrp = path[level].bp_newreq.bpr_ptr; 996 } 997 998 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 999 const struct nilfs_btree_path *path) 1000 { 1001 struct nilfs_btree_node *node; 1002 int level, ncmax; 1003 1004 if (path == NULL) 1005 return NILFS_BMAP_INVALID_PTR; 1006 1007 /* left sibling */ 1008 level = NILFS_BTREE_LEVEL_NODE_MIN; 1009 if (path[level].bp_index > 0) { 1010 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1011 return nilfs_btree_node_get_ptr(node, 1012 path[level].bp_index - 1, 1013 ncmax); 1014 } 1015 1016 /* parent */ 1017 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1018 if (level <= nilfs_btree_height(btree) - 1) { 1019 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1020 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1021 ncmax); 1022 } 1023 1024 return NILFS_BMAP_INVALID_PTR; 1025 } 1026 1027 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1028 const struct nilfs_btree_path *path, 1029 __u64 key) 1030 { 1031 __u64 ptr; 1032 1033 ptr = nilfs_bmap_find_target_seq(btree, key); 1034 if (ptr != NILFS_BMAP_INVALID_PTR) 1035 /* sequential access */ 1036 return ptr; 1037 1038 ptr = nilfs_btree_find_near(btree, path); 1039 if (ptr != NILFS_BMAP_INVALID_PTR) 1040 /* near */ 1041 return ptr; 1042 1043 /* block group */ 1044 return nilfs_bmap_find_target_in_group(btree); 1045 } 1046 1047 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1048 struct nilfs_btree_path *path, 1049 int *levelp, __u64 key, __u64 ptr, 1050 struct nilfs_bmap_stats *stats) 1051 { 1052 struct buffer_head *bh; 1053 struct nilfs_btree_node *node, *parent, *sib; 1054 __u64 sibptr; 1055 int pindex, level, ncmax, ncblk, ret; 1056 struct inode *dat = NULL; 1057 1058 stats->bs_nblocks = 0; 1059 level = NILFS_BTREE_LEVEL_DATA; 1060 1061 /* allocate a new ptr for data block */ 1062 if (NILFS_BMAP_USE_VBN(btree)) { 1063 path[level].bp_newreq.bpr_ptr = 1064 nilfs_btree_find_target_v(btree, path, key); 1065 dat = nilfs_bmap_get_dat(btree); 1066 } 1067 1068 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1069 if (ret < 0) 1070 goto err_out_data; 1071 1072 ncblk = nilfs_btree_nchildren_per_block(btree); 1073 1074 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1075 level < nilfs_btree_height(btree) - 1; 1076 level++) { 1077 node = nilfs_btree_get_nonroot_node(path, level); 1078 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1079 path[level].bp_op = nilfs_btree_do_insert; 1080 stats->bs_nblocks++; 1081 goto out; 1082 } 1083 1084 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1085 pindex = path[level + 1].bp_index; 1086 1087 /* left sibling */ 1088 if (pindex > 0) { 1089 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1090 ncmax); 1091 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1092 if (ret < 0) 1093 goto err_out_child_node; 1094 sib = (struct nilfs_btree_node *)bh->b_data; 1095 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1096 path[level].bp_sib_bh = bh; 1097 path[level].bp_op = nilfs_btree_carry_left; 1098 stats->bs_nblocks++; 1099 goto out; 1100 } else { 1101 brelse(bh); 1102 } 1103 } 1104 1105 /* right sibling */ 1106 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1107 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1108 ncmax); 1109 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1110 if (ret < 0) 1111 goto err_out_child_node; 1112 sib = (struct nilfs_btree_node *)bh->b_data; 1113 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1114 path[level].bp_sib_bh = bh; 1115 path[level].bp_op = nilfs_btree_carry_right; 1116 stats->bs_nblocks++; 1117 goto out; 1118 } else { 1119 brelse(bh); 1120 } 1121 } 1122 1123 /* split */ 1124 path[level].bp_newreq.bpr_ptr = 1125 path[level - 1].bp_newreq.bpr_ptr + 1; 1126 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1127 &path[level].bp_newreq, dat); 1128 if (ret < 0) 1129 goto err_out_child_node; 1130 ret = nilfs_btree_get_new_block(btree, 1131 path[level].bp_newreq.bpr_ptr, 1132 &bh); 1133 if (ret < 0) 1134 goto err_out_curr_node; 1135 1136 stats->bs_nblocks++; 1137 1138 sib = (struct nilfs_btree_node *)bh->b_data; 1139 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1140 path[level].bp_sib_bh = bh; 1141 path[level].bp_op = nilfs_btree_split; 1142 } 1143 1144 /* root */ 1145 node = nilfs_btree_get_root(btree); 1146 if (nilfs_btree_node_get_nchildren(node) < 1147 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1148 path[level].bp_op = nilfs_btree_do_insert; 1149 stats->bs_nblocks++; 1150 goto out; 1151 } 1152 1153 /* grow */ 1154 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1155 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1156 if (ret < 0) 1157 goto err_out_child_node; 1158 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1159 &bh); 1160 if (ret < 0) 1161 goto err_out_curr_node; 1162 1163 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1164 0, level, 0, ncblk, NULL, NULL); 1165 path[level].bp_sib_bh = bh; 1166 path[level].bp_op = nilfs_btree_grow; 1167 1168 level++; 1169 path[level].bp_op = nilfs_btree_do_insert; 1170 1171 /* a newly-created node block and a data block are added */ 1172 stats->bs_nblocks += 2; 1173 1174 /* success */ 1175 out: 1176 *levelp = level; 1177 return ret; 1178 1179 /* error */ 1180 err_out_curr_node: 1181 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1182 err_out_child_node: 1183 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1184 nilfs_btnode_delete(path[level].bp_sib_bh); 1185 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1186 1187 } 1188 1189 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1190 err_out_data: 1191 *levelp = level; 1192 stats->bs_nblocks = 0; 1193 return ret; 1194 } 1195 1196 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1197 struct nilfs_btree_path *path, 1198 int maxlevel, __u64 key, __u64 ptr) 1199 { 1200 struct inode *dat = NULL; 1201 int level; 1202 1203 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1204 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1205 if (NILFS_BMAP_USE_VBN(btree)) { 1206 nilfs_bmap_set_target_v(btree, key, ptr); 1207 dat = nilfs_bmap_get_dat(btree); 1208 } 1209 1210 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1211 nilfs_bmap_commit_alloc_ptr(btree, 1212 &path[level - 1].bp_newreq, dat); 1213 path[level].bp_op(btree, path, level, &key, &ptr); 1214 } 1215 1216 if (!nilfs_bmap_dirty(btree)) 1217 nilfs_bmap_set_dirty(btree); 1218 } 1219 1220 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1221 { 1222 struct nilfs_btree_path *path; 1223 struct nilfs_bmap_stats stats; 1224 int level, ret; 1225 1226 path = nilfs_btree_alloc_path(); 1227 if (path == NULL) 1228 return -ENOMEM; 1229 1230 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1231 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1232 if (ret != -ENOENT) { 1233 if (ret == 0) 1234 ret = -EEXIST; 1235 goto out; 1236 } 1237 1238 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1239 if (ret < 0) 1240 goto out; 1241 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1242 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1243 1244 out: 1245 nilfs_btree_free_path(path); 1246 return ret; 1247 } 1248 1249 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1250 struct nilfs_btree_path *path, 1251 int level, __u64 *keyp, __u64 *ptrp) 1252 { 1253 struct nilfs_btree_node *node; 1254 int ncblk; 1255 1256 if (level < nilfs_btree_height(btree) - 1) { 1257 node = nilfs_btree_get_nonroot_node(path, level); 1258 ncblk = nilfs_btree_nchildren_per_block(btree); 1259 nilfs_btree_node_delete(node, path[level].bp_index, 1260 keyp, ptrp, ncblk); 1261 if (!buffer_dirty(path[level].bp_bh)) 1262 mark_buffer_dirty(path[level].bp_bh); 1263 if (path[level].bp_index == 0) 1264 nilfs_btree_promote_key(btree, path, level + 1, 1265 nilfs_btree_node_get_key(node, 0)); 1266 } else { 1267 node = nilfs_btree_get_root(btree); 1268 nilfs_btree_node_delete(node, path[level].bp_index, 1269 keyp, ptrp, 1270 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1271 } 1272 } 1273 1274 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1275 struct nilfs_btree_path *path, 1276 int level, __u64 *keyp, __u64 *ptrp) 1277 { 1278 struct nilfs_btree_node *node, *left; 1279 int nchildren, lnchildren, n, ncblk; 1280 1281 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1282 1283 node = nilfs_btree_get_nonroot_node(path, level); 1284 left = nilfs_btree_get_sib_node(path, level); 1285 nchildren = nilfs_btree_node_get_nchildren(node); 1286 lnchildren = nilfs_btree_node_get_nchildren(left); 1287 ncblk = nilfs_btree_nchildren_per_block(btree); 1288 1289 n = (nchildren + lnchildren) / 2 - nchildren; 1290 1291 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1292 1293 if (!buffer_dirty(path[level].bp_bh)) 1294 mark_buffer_dirty(path[level].bp_bh); 1295 if (!buffer_dirty(path[level].bp_sib_bh)) 1296 mark_buffer_dirty(path[level].bp_sib_bh); 1297 1298 nilfs_btree_promote_key(btree, path, level + 1, 1299 nilfs_btree_node_get_key(node, 0)); 1300 1301 brelse(path[level].bp_sib_bh); 1302 path[level].bp_sib_bh = NULL; 1303 path[level].bp_index += n; 1304 } 1305 1306 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1307 struct nilfs_btree_path *path, 1308 int level, __u64 *keyp, __u64 *ptrp) 1309 { 1310 struct nilfs_btree_node *node, *right; 1311 int nchildren, rnchildren, n, ncblk; 1312 1313 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1314 1315 node = nilfs_btree_get_nonroot_node(path, level); 1316 right = nilfs_btree_get_sib_node(path, level); 1317 nchildren = nilfs_btree_node_get_nchildren(node); 1318 rnchildren = nilfs_btree_node_get_nchildren(right); 1319 ncblk = nilfs_btree_nchildren_per_block(btree); 1320 1321 n = (nchildren + rnchildren) / 2 - nchildren; 1322 1323 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1324 1325 if (!buffer_dirty(path[level].bp_bh)) 1326 mark_buffer_dirty(path[level].bp_bh); 1327 if (!buffer_dirty(path[level].bp_sib_bh)) 1328 mark_buffer_dirty(path[level].bp_sib_bh); 1329 1330 path[level + 1].bp_index++; 1331 nilfs_btree_promote_key(btree, path, level + 1, 1332 nilfs_btree_node_get_key(right, 0)); 1333 path[level + 1].bp_index--; 1334 1335 brelse(path[level].bp_sib_bh); 1336 path[level].bp_sib_bh = NULL; 1337 } 1338 1339 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1340 struct nilfs_btree_path *path, 1341 int level, __u64 *keyp, __u64 *ptrp) 1342 { 1343 struct nilfs_btree_node *node, *left; 1344 int n, ncblk; 1345 1346 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1347 1348 node = nilfs_btree_get_nonroot_node(path, level); 1349 left = nilfs_btree_get_sib_node(path, level); 1350 ncblk = nilfs_btree_nchildren_per_block(btree); 1351 1352 n = nilfs_btree_node_get_nchildren(node); 1353 1354 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1355 1356 if (!buffer_dirty(path[level].bp_sib_bh)) 1357 mark_buffer_dirty(path[level].bp_sib_bh); 1358 1359 nilfs_btnode_delete(path[level].bp_bh); 1360 path[level].bp_bh = path[level].bp_sib_bh; 1361 path[level].bp_sib_bh = NULL; 1362 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1363 } 1364 1365 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1366 struct nilfs_btree_path *path, 1367 int level, __u64 *keyp, __u64 *ptrp) 1368 { 1369 struct nilfs_btree_node *node, *right; 1370 int n, ncblk; 1371 1372 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1373 1374 node = nilfs_btree_get_nonroot_node(path, level); 1375 right = nilfs_btree_get_sib_node(path, level); 1376 ncblk = nilfs_btree_nchildren_per_block(btree); 1377 1378 n = nilfs_btree_node_get_nchildren(right); 1379 1380 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1381 1382 if (!buffer_dirty(path[level].bp_bh)) 1383 mark_buffer_dirty(path[level].bp_bh); 1384 1385 nilfs_btnode_delete(path[level].bp_sib_bh); 1386 path[level].bp_sib_bh = NULL; 1387 path[level + 1].bp_index++; 1388 } 1389 1390 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1391 struct nilfs_btree_path *path, 1392 int level, __u64 *keyp, __u64 *ptrp) 1393 { 1394 struct nilfs_btree_node *root, *child; 1395 int n, ncblk; 1396 1397 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1398 1399 root = nilfs_btree_get_root(btree); 1400 child = nilfs_btree_get_nonroot_node(path, level); 1401 ncblk = nilfs_btree_nchildren_per_block(btree); 1402 1403 nilfs_btree_node_delete(root, 0, NULL, NULL, 1404 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1405 nilfs_btree_node_set_level(root, level); 1406 n = nilfs_btree_node_get_nchildren(child); 1407 nilfs_btree_node_move_left(root, child, n, 1408 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1409 1410 nilfs_btnode_delete(path[level].bp_bh); 1411 path[level].bp_bh = NULL; 1412 } 1413 1414 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1415 struct nilfs_btree_path *path, 1416 int level, __u64 *keyp, __u64 *ptrp) 1417 { 1418 } 1419 1420 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1421 struct nilfs_btree_path *path, 1422 int *levelp, 1423 struct nilfs_bmap_stats *stats, 1424 struct inode *dat) 1425 { 1426 struct buffer_head *bh; 1427 struct nilfs_btree_node *node, *parent, *sib; 1428 __u64 sibptr; 1429 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1430 1431 ret = 0; 1432 stats->bs_nblocks = 0; 1433 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1434 ncblk = nilfs_btree_nchildren_per_block(btree); 1435 1436 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1437 level < nilfs_btree_height(btree) - 1; 1438 level++) { 1439 node = nilfs_btree_get_nonroot_node(path, level); 1440 path[level].bp_oldreq.bpr_ptr = 1441 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1442 ret = nilfs_bmap_prepare_end_ptr(btree, 1443 &path[level].bp_oldreq, dat); 1444 if (ret < 0) 1445 goto err_out_child_node; 1446 1447 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1448 path[level].bp_op = nilfs_btree_do_delete; 1449 stats->bs_nblocks++; 1450 goto out; 1451 } 1452 1453 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1454 pindex = path[level + 1].bp_index; 1455 dindex = pindex; 1456 1457 if (pindex > 0) { 1458 /* left sibling */ 1459 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1460 ncmax); 1461 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1462 if (ret < 0) 1463 goto err_out_curr_node; 1464 sib = (struct nilfs_btree_node *)bh->b_data; 1465 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1466 path[level].bp_sib_bh = bh; 1467 path[level].bp_op = nilfs_btree_borrow_left; 1468 stats->bs_nblocks++; 1469 goto out; 1470 } else { 1471 path[level].bp_sib_bh = bh; 1472 path[level].bp_op = nilfs_btree_concat_left; 1473 stats->bs_nblocks++; 1474 /* continue; */ 1475 } 1476 } else if (pindex < 1477 nilfs_btree_node_get_nchildren(parent) - 1) { 1478 /* right sibling */ 1479 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1480 ncmax); 1481 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1482 if (ret < 0) 1483 goto err_out_curr_node; 1484 sib = (struct nilfs_btree_node *)bh->b_data; 1485 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1486 path[level].bp_sib_bh = bh; 1487 path[level].bp_op = nilfs_btree_borrow_right; 1488 stats->bs_nblocks++; 1489 goto out; 1490 } else { 1491 path[level].bp_sib_bh = bh; 1492 path[level].bp_op = nilfs_btree_concat_right; 1493 stats->bs_nblocks++; 1494 /* 1495 * When merging right sibling node 1496 * into the current node, pointer to 1497 * the right sibling node must be 1498 * terminated instead. The adjustment 1499 * below is required for that. 1500 */ 1501 dindex = pindex + 1; 1502 /* continue; */ 1503 } 1504 } else { 1505 /* no siblings */ 1506 /* the only child of the root node */ 1507 WARN_ON(level != nilfs_btree_height(btree) - 2); 1508 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1509 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1510 path[level].bp_op = nilfs_btree_shrink; 1511 stats->bs_nblocks += 2; 1512 level++; 1513 path[level].bp_op = nilfs_btree_nop; 1514 goto shrink_root_child; 1515 } else { 1516 path[level].bp_op = nilfs_btree_do_delete; 1517 stats->bs_nblocks++; 1518 goto out; 1519 } 1520 } 1521 } 1522 1523 /* child of the root node is deleted */ 1524 path[level].bp_op = nilfs_btree_do_delete; 1525 stats->bs_nblocks++; 1526 1527 shrink_root_child: 1528 node = nilfs_btree_get_root(btree); 1529 path[level].bp_oldreq.bpr_ptr = 1530 nilfs_btree_node_get_ptr(node, dindex, 1531 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1532 1533 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1534 if (ret < 0) 1535 goto err_out_child_node; 1536 1537 /* success */ 1538 out: 1539 *levelp = level; 1540 return ret; 1541 1542 /* error */ 1543 err_out_curr_node: 1544 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1545 err_out_child_node: 1546 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1547 brelse(path[level].bp_sib_bh); 1548 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1549 } 1550 *levelp = level; 1551 stats->bs_nblocks = 0; 1552 return ret; 1553 } 1554 1555 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1556 struct nilfs_btree_path *path, 1557 int maxlevel, struct inode *dat) 1558 { 1559 int level; 1560 1561 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1562 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1563 path[level].bp_op(btree, path, level, NULL, NULL); 1564 } 1565 1566 if (!nilfs_bmap_dirty(btree)) 1567 nilfs_bmap_set_dirty(btree); 1568 } 1569 1570 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1571 1572 { 1573 struct nilfs_btree_path *path; 1574 struct nilfs_bmap_stats stats; 1575 struct inode *dat; 1576 int level, ret; 1577 1578 path = nilfs_btree_alloc_path(); 1579 if (path == NULL) 1580 return -ENOMEM; 1581 1582 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1583 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1584 if (ret < 0) 1585 goto out; 1586 1587 1588 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1589 1590 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1591 if (ret < 0) 1592 goto out; 1593 nilfs_btree_commit_delete(btree, path, level, dat); 1594 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1595 1596 out: 1597 nilfs_btree_free_path(path); 1598 return ret; 1599 } 1600 1601 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1602 __u64 *keyp) 1603 { 1604 struct nilfs_btree_path *path; 1605 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1606 int ret; 1607 1608 path = nilfs_btree_alloc_path(); 1609 if (!path) 1610 return -ENOMEM; 1611 1612 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1613 if (!ret) 1614 *keyp = start; 1615 else if (ret == -ENOENT) 1616 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1617 1618 nilfs_btree_free_path(path); 1619 return ret; 1620 } 1621 1622 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1623 { 1624 struct nilfs_btree_path *path; 1625 int ret; 1626 1627 path = nilfs_btree_alloc_path(); 1628 if (path == NULL) 1629 return -ENOMEM; 1630 1631 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1632 1633 nilfs_btree_free_path(path); 1634 1635 return ret; 1636 } 1637 1638 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1639 { 1640 struct buffer_head *bh; 1641 struct nilfs_btree_node *root, *node; 1642 __u64 maxkey, nextmaxkey; 1643 __u64 ptr; 1644 int nchildren, ret; 1645 1646 root = nilfs_btree_get_root(btree); 1647 switch (nilfs_btree_height(btree)) { 1648 case 2: 1649 bh = NULL; 1650 node = root; 1651 break; 1652 case 3: 1653 nchildren = nilfs_btree_node_get_nchildren(root); 1654 if (nchildren > 1) 1655 return 0; 1656 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1657 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1658 ret = nilfs_btree_get_block(btree, ptr, &bh); 1659 if (ret < 0) 1660 return ret; 1661 node = (struct nilfs_btree_node *)bh->b_data; 1662 break; 1663 default: 1664 return 0; 1665 } 1666 1667 nchildren = nilfs_btree_node_get_nchildren(node); 1668 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1669 nextmaxkey = (nchildren > 1) ? 1670 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1671 if (bh != NULL) 1672 brelse(bh); 1673 1674 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1675 } 1676 1677 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1678 __u64 *keys, __u64 *ptrs, int nitems) 1679 { 1680 struct buffer_head *bh; 1681 struct nilfs_btree_node *node, *root; 1682 __le64 *dkeys; 1683 __le64 *dptrs; 1684 __u64 ptr; 1685 int nchildren, ncmax, i, ret; 1686 1687 root = nilfs_btree_get_root(btree); 1688 switch (nilfs_btree_height(btree)) { 1689 case 2: 1690 bh = NULL; 1691 node = root; 1692 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1693 break; 1694 case 3: 1695 nchildren = nilfs_btree_node_get_nchildren(root); 1696 WARN_ON(nchildren > 1); 1697 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1698 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1699 ret = nilfs_btree_get_block(btree, ptr, &bh); 1700 if (ret < 0) 1701 return ret; 1702 node = (struct nilfs_btree_node *)bh->b_data; 1703 ncmax = nilfs_btree_nchildren_per_block(btree); 1704 break; 1705 default: 1706 node = NULL; 1707 return -EINVAL; 1708 } 1709 1710 nchildren = nilfs_btree_node_get_nchildren(node); 1711 if (nchildren < nitems) 1712 nitems = nchildren; 1713 dkeys = nilfs_btree_node_dkeys(node); 1714 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1715 for (i = 0; i < nitems; i++) { 1716 keys[i] = le64_to_cpu(dkeys[i]); 1717 ptrs[i] = le64_to_cpu(dptrs[i]); 1718 } 1719 1720 if (bh != NULL) 1721 brelse(bh); 1722 1723 return nitems; 1724 } 1725 1726 static int 1727 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1728 union nilfs_bmap_ptr_req *dreq, 1729 union nilfs_bmap_ptr_req *nreq, 1730 struct buffer_head **bhp, 1731 struct nilfs_bmap_stats *stats) 1732 { 1733 struct buffer_head *bh; 1734 struct inode *dat = NULL; 1735 int ret; 1736 1737 stats->bs_nblocks = 0; 1738 1739 /* for data */ 1740 /* cannot find near ptr */ 1741 if (NILFS_BMAP_USE_VBN(btree)) { 1742 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1743 dat = nilfs_bmap_get_dat(btree); 1744 } 1745 1746 ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); 1747 if (ret < 0) 1748 return ret; 1749 1750 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1751 if (ret < 0) 1752 return ret; 1753 1754 *bhp = NULL; 1755 stats->bs_nblocks++; 1756 if (nreq != NULL) { 1757 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1758 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1759 if (ret < 0) 1760 goto err_out_dreq; 1761 1762 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1763 if (ret < 0) 1764 goto err_out_nreq; 1765 1766 *bhp = bh; 1767 stats->bs_nblocks++; 1768 } 1769 1770 /* success */ 1771 return 0; 1772 1773 /* error */ 1774 err_out_nreq: 1775 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1776 err_out_dreq: 1777 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1778 stats->bs_nblocks = 0; 1779 return ret; 1780 1781 } 1782 1783 static void 1784 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1785 __u64 key, __u64 ptr, 1786 const __u64 *keys, const __u64 *ptrs, 1787 int n, 1788 union nilfs_bmap_ptr_req *dreq, 1789 union nilfs_bmap_ptr_req *nreq, 1790 struct buffer_head *bh) 1791 { 1792 struct nilfs_btree_node *node; 1793 struct inode *dat; 1794 __u64 tmpptr; 1795 int ncblk; 1796 1797 /* free resources */ 1798 if (btree->b_ops->bop_clear != NULL) 1799 btree->b_ops->bop_clear(btree); 1800 1801 /* ptr must be a pointer to a buffer head. */ 1802 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1803 1804 /* convert and insert */ 1805 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1806 __nilfs_btree_init(btree); 1807 if (nreq != NULL) { 1808 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1809 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1810 1811 /* create child node at level 1 */ 1812 node = (struct nilfs_btree_node *)bh->b_data; 1813 ncblk = nilfs_btree_nchildren_per_block(btree); 1814 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1815 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1816 if (!buffer_dirty(bh)) 1817 mark_buffer_dirty(bh); 1818 if (!nilfs_bmap_dirty(btree)) 1819 nilfs_bmap_set_dirty(btree); 1820 1821 brelse(bh); 1822 1823 /* create root node at level 2 */ 1824 node = nilfs_btree_get_root(btree); 1825 tmpptr = nreq->bpr_ptr; 1826 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1827 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1828 &keys[0], &tmpptr); 1829 } else { 1830 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1831 1832 /* create root node at level 1 */ 1833 node = nilfs_btree_get_root(btree); 1834 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1835 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1836 keys, ptrs); 1837 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1838 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1839 if (!nilfs_bmap_dirty(btree)) 1840 nilfs_bmap_set_dirty(btree); 1841 } 1842 1843 if (NILFS_BMAP_USE_VBN(btree)) 1844 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1845 } 1846 1847 /** 1848 * nilfs_btree_convert_and_insert - 1849 * @bmap: 1850 * @key: 1851 * @ptr: 1852 * @keys: 1853 * @ptrs: 1854 * @n: 1855 */ 1856 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1857 __u64 key, __u64 ptr, 1858 const __u64 *keys, const __u64 *ptrs, int n) 1859 { 1860 struct buffer_head *bh = NULL; 1861 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1862 struct nilfs_bmap_stats stats; 1863 int ret; 1864 1865 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1866 di = &dreq; 1867 ni = NULL; 1868 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1869 nilfs_btree_node_size(btree))) { 1870 di = &dreq; 1871 ni = &nreq; 1872 } else { 1873 di = NULL; 1874 ni = NULL; 1875 BUG(); 1876 } 1877 1878 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1879 &stats); 1880 if (ret < 0) 1881 return ret; 1882 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1883 di, ni, bh); 1884 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1885 return 0; 1886 } 1887 1888 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1889 struct nilfs_btree_path *path, 1890 int level, 1891 struct buffer_head *bh) 1892 { 1893 while ((++level < nilfs_btree_height(btree) - 1) && 1894 !buffer_dirty(path[level].bp_bh)) 1895 mark_buffer_dirty(path[level].bp_bh); 1896 1897 return 0; 1898 } 1899 1900 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1901 struct nilfs_btree_path *path, 1902 int level, struct inode *dat) 1903 { 1904 struct nilfs_btree_node *parent; 1905 int ncmax, ret; 1906 1907 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1908 path[level].bp_oldreq.bpr_ptr = 1909 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1910 ncmax); 1911 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1912 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1913 &path[level].bp_newreq.bpr_req); 1914 if (ret < 0) 1915 return ret; 1916 1917 if (buffer_nilfs_node(path[level].bp_bh)) { 1918 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1919 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1920 path[level].bp_ctxt.bh = path[level].bp_bh; 1921 ret = nilfs_btnode_prepare_change_key( 1922 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1923 &path[level].bp_ctxt); 1924 if (ret < 0) { 1925 nilfs_dat_abort_update(dat, 1926 &path[level].bp_oldreq.bpr_req, 1927 &path[level].bp_newreq.bpr_req); 1928 return ret; 1929 } 1930 } 1931 1932 return 0; 1933 } 1934 1935 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1936 struct nilfs_btree_path *path, 1937 int level, struct inode *dat) 1938 { 1939 struct nilfs_btree_node *parent; 1940 int ncmax; 1941 1942 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1943 &path[level].bp_newreq.bpr_req, 1944 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1945 1946 if (buffer_nilfs_node(path[level].bp_bh)) { 1947 nilfs_btnode_commit_change_key( 1948 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1949 &path[level].bp_ctxt); 1950 path[level].bp_bh = path[level].bp_ctxt.bh; 1951 } 1952 set_buffer_nilfs_volatile(path[level].bp_bh); 1953 1954 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1955 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1956 path[level].bp_newreq.bpr_ptr, ncmax); 1957 } 1958 1959 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1960 struct nilfs_btree_path *path, 1961 int level, struct inode *dat) 1962 { 1963 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1964 &path[level].bp_newreq.bpr_req); 1965 if (buffer_nilfs_node(path[level].bp_bh)) 1966 nilfs_btnode_abort_change_key( 1967 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1968 &path[level].bp_ctxt); 1969 } 1970 1971 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1972 struct nilfs_btree_path *path, 1973 int minlevel, int *maxlevelp, 1974 struct inode *dat) 1975 { 1976 int level, ret; 1977 1978 level = minlevel; 1979 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1980 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1981 if (ret < 0) 1982 return ret; 1983 } 1984 while ((++level < nilfs_btree_height(btree) - 1) && 1985 !buffer_dirty(path[level].bp_bh)) { 1986 1987 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1988 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1989 if (ret < 0) 1990 goto out; 1991 } 1992 1993 /* success */ 1994 *maxlevelp = level - 1; 1995 return 0; 1996 1997 /* error */ 1998 out: 1999 while (--level > minlevel) 2000 nilfs_btree_abort_update_v(btree, path, level, dat); 2001 if (!buffer_nilfs_volatile(path[level].bp_bh)) 2002 nilfs_btree_abort_update_v(btree, path, level, dat); 2003 return ret; 2004 } 2005 2006 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2007 struct nilfs_btree_path *path, 2008 int minlevel, int maxlevel, 2009 struct buffer_head *bh, 2010 struct inode *dat) 2011 { 2012 int level; 2013 2014 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2015 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2016 2017 for (level = minlevel + 1; level <= maxlevel; level++) 2018 nilfs_btree_commit_update_v(btree, path, level, dat); 2019 } 2020 2021 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2022 struct nilfs_btree_path *path, 2023 int level, struct buffer_head *bh) 2024 { 2025 int maxlevel = 0, ret; 2026 struct nilfs_btree_node *parent; 2027 struct inode *dat = nilfs_bmap_get_dat(btree); 2028 __u64 ptr; 2029 int ncmax; 2030 2031 get_bh(bh); 2032 path[level].bp_bh = bh; 2033 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2034 dat); 2035 if (ret < 0) 2036 goto out; 2037 2038 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2039 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2040 ptr = nilfs_btree_node_get_ptr(parent, 2041 path[level + 1].bp_index, 2042 ncmax); 2043 ret = nilfs_dat_mark_dirty(dat, ptr); 2044 if (ret < 0) 2045 goto out; 2046 } 2047 2048 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2049 2050 out: 2051 brelse(path[level].bp_bh); 2052 path[level].bp_bh = NULL; 2053 return ret; 2054 } 2055 2056 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2057 struct buffer_head *bh) 2058 { 2059 struct nilfs_btree_path *path; 2060 struct nilfs_btree_node *node; 2061 __u64 key; 2062 int level, ret; 2063 2064 WARN_ON(!buffer_dirty(bh)); 2065 2066 path = nilfs_btree_alloc_path(); 2067 if (path == NULL) 2068 return -ENOMEM; 2069 2070 if (buffer_nilfs_node(bh)) { 2071 node = (struct nilfs_btree_node *)bh->b_data; 2072 key = nilfs_btree_node_get_key(node, 0); 2073 level = nilfs_btree_node_get_level(node); 2074 } else { 2075 key = nilfs_bmap_data_get_key(btree, bh); 2076 level = NILFS_BTREE_LEVEL_DATA; 2077 } 2078 2079 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2080 if (ret < 0) { 2081 if (unlikely(ret == -ENOENT)) 2082 nilfs_crit(btree->b_inode->i_sb, 2083 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2084 btree->b_inode->i_ino, 2085 (unsigned long long)key, level); 2086 goto out; 2087 } 2088 2089 ret = NILFS_BMAP_USE_VBN(btree) ? 2090 nilfs_btree_propagate_v(btree, path, level, bh) : 2091 nilfs_btree_propagate_p(btree, path, level, bh); 2092 2093 out: 2094 nilfs_btree_free_path(path); 2095 2096 return ret; 2097 } 2098 2099 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2100 struct buffer_head *bh) 2101 { 2102 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2103 } 2104 2105 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2106 struct list_head *lists, 2107 struct buffer_head *bh) 2108 { 2109 struct list_head *head; 2110 struct buffer_head *cbh; 2111 struct nilfs_btree_node *node, *cnode; 2112 __u64 key, ckey; 2113 int level; 2114 2115 get_bh(bh); 2116 node = (struct nilfs_btree_node *)bh->b_data; 2117 key = nilfs_btree_node_get_key(node, 0); 2118 level = nilfs_btree_node_get_level(node); 2119 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2120 level >= NILFS_BTREE_LEVEL_MAX) { 2121 dump_stack(); 2122 nilfs_warn(btree->b_inode->i_sb, 2123 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2124 level, (unsigned long long)key, 2125 btree->b_inode->i_ino, 2126 (unsigned long long)bh->b_blocknr); 2127 return; 2128 } 2129 2130 list_for_each(head, &lists[level]) { 2131 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2132 cnode = (struct nilfs_btree_node *)cbh->b_data; 2133 ckey = nilfs_btree_node_get_key(cnode, 0); 2134 if (key < ckey) 2135 break; 2136 } 2137 list_add_tail(&bh->b_assoc_buffers, head); 2138 } 2139 2140 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2141 struct list_head *listp) 2142 { 2143 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 2144 struct address_space *btcache = btnc_inode->i_mapping; 2145 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2146 struct pagevec pvec; 2147 struct buffer_head *bh, *head; 2148 pgoff_t index = 0; 2149 int level, i; 2150 2151 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2152 level < NILFS_BTREE_LEVEL_MAX; 2153 level++) 2154 INIT_LIST_HEAD(&lists[level]); 2155 2156 pagevec_init(&pvec); 2157 2158 while (pagevec_lookup_tag(&pvec, btcache, &index, 2159 PAGECACHE_TAG_DIRTY)) { 2160 for (i = 0; i < pagevec_count(&pvec); i++) { 2161 bh = head = page_buffers(pvec.pages[i]); 2162 do { 2163 if (buffer_dirty(bh)) 2164 nilfs_btree_add_dirty_buffer(btree, 2165 lists, bh); 2166 } while ((bh = bh->b_this_page) != head); 2167 } 2168 pagevec_release(&pvec); 2169 cond_resched(); 2170 } 2171 2172 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2173 level < NILFS_BTREE_LEVEL_MAX; 2174 level++) 2175 list_splice_tail(&lists[level], listp); 2176 } 2177 2178 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2179 struct nilfs_btree_path *path, 2180 int level, 2181 struct buffer_head **bh, 2182 sector_t blocknr, 2183 union nilfs_binfo *binfo) 2184 { 2185 struct nilfs_btree_node *parent; 2186 __u64 key; 2187 __u64 ptr; 2188 int ncmax, ret; 2189 2190 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2191 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2192 ncmax); 2193 if (buffer_nilfs_node(*bh)) { 2194 path[level].bp_ctxt.oldkey = ptr; 2195 path[level].bp_ctxt.newkey = blocknr; 2196 path[level].bp_ctxt.bh = *bh; 2197 ret = nilfs_btnode_prepare_change_key( 2198 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2199 &path[level].bp_ctxt); 2200 if (ret < 0) 2201 return ret; 2202 nilfs_btnode_commit_change_key( 2203 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2204 &path[level].bp_ctxt); 2205 *bh = path[level].bp_ctxt.bh; 2206 } 2207 2208 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2209 ncmax); 2210 2211 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2212 /* on-disk format */ 2213 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2214 binfo->bi_dat.bi_level = level; 2215 2216 return 0; 2217 } 2218 2219 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2220 struct nilfs_btree_path *path, 2221 int level, 2222 struct buffer_head **bh, 2223 sector_t blocknr, 2224 union nilfs_binfo *binfo) 2225 { 2226 struct nilfs_btree_node *parent; 2227 struct inode *dat = nilfs_bmap_get_dat(btree); 2228 __u64 key; 2229 __u64 ptr; 2230 union nilfs_bmap_ptr_req req; 2231 int ncmax, ret; 2232 2233 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2234 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2235 ncmax); 2236 req.bpr_ptr = ptr; 2237 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2238 if (ret < 0) 2239 return ret; 2240 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2241 2242 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2243 /* on-disk format */ 2244 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2245 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2246 2247 return 0; 2248 } 2249 2250 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2251 struct buffer_head **bh, 2252 sector_t blocknr, 2253 union nilfs_binfo *binfo) 2254 { 2255 struct nilfs_btree_path *path; 2256 struct nilfs_btree_node *node; 2257 __u64 key; 2258 int level, ret; 2259 2260 path = nilfs_btree_alloc_path(); 2261 if (path == NULL) 2262 return -ENOMEM; 2263 2264 if (buffer_nilfs_node(*bh)) { 2265 node = (struct nilfs_btree_node *)(*bh)->b_data; 2266 key = nilfs_btree_node_get_key(node, 0); 2267 level = nilfs_btree_node_get_level(node); 2268 } else { 2269 key = nilfs_bmap_data_get_key(btree, *bh); 2270 level = NILFS_BTREE_LEVEL_DATA; 2271 } 2272 2273 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2274 if (ret < 0) { 2275 WARN_ON(ret == -ENOENT); 2276 goto out; 2277 } 2278 2279 ret = NILFS_BMAP_USE_VBN(btree) ? 2280 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2281 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2282 2283 out: 2284 nilfs_btree_free_path(path); 2285 2286 return ret; 2287 } 2288 2289 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2290 struct buffer_head **bh, 2291 sector_t blocknr, 2292 union nilfs_binfo *binfo) 2293 { 2294 struct nilfs_btree_node *node; 2295 __u64 key; 2296 int ret; 2297 2298 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2299 blocknr); 2300 if (ret < 0) 2301 return ret; 2302 2303 if (buffer_nilfs_node(*bh)) { 2304 node = (struct nilfs_btree_node *)(*bh)->b_data; 2305 key = nilfs_btree_node_get_key(node, 0); 2306 } else 2307 key = nilfs_bmap_data_get_key(btree, *bh); 2308 2309 /* on-disk format */ 2310 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2311 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2312 2313 return 0; 2314 } 2315 2316 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2317 { 2318 struct buffer_head *bh; 2319 struct nilfs_btree_path *path; 2320 __u64 ptr; 2321 int ret; 2322 2323 path = nilfs_btree_alloc_path(); 2324 if (path == NULL) 2325 return -ENOMEM; 2326 2327 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2328 if (ret < 0) { 2329 WARN_ON(ret == -ENOENT); 2330 goto out; 2331 } 2332 ret = nilfs_btree_get_block(btree, ptr, &bh); 2333 if (ret < 0) { 2334 WARN_ON(ret == -ENOENT); 2335 goto out; 2336 } 2337 2338 if (!buffer_dirty(bh)) 2339 mark_buffer_dirty(bh); 2340 brelse(bh); 2341 if (!nilfs_bmap_dirty(btree)) 2342 nilfs_bmap_set_dirty(btree); 2343 2344 out: 2345 nilfs_btree_free_path(path); 2346 return ret; 2347 } 2348 2349 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2350 .bop_lookup = nilfs_btree_lookup, 2351 .bop_lookup_contig = nilfs_btree_lookup_contig, 2352 .bop_insert = nilfs_btree_insert, 2353 .bop_delete = nilfs_btree_delete, 2354 .bop_clear = NULL, 2355 2356 .bop_propagate = nilfs_btree_propagate, 2357 2358 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2359 2360 .bop_assign = nilfs_btree_assign, 2361 .bop_mark = nilfs_btree_mark, 2362 2363 .bop_seek_key = nilfs_btree_seek_key, 2364 .bop_last_key = nilfs_btree_last_key, 2365 2366 .bop_check_insert = NULL, 2367 .bop_check_delete = nilfs_btree_check_delete, 2368 .bop_gather_data = nilfs_btree_gather_data, 2369 }; 2370 2371 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2372 .bop_lookup = NULL, 2373 .bop_lookup_contig = NULL, 2374 .bop_insert = NULL, 2375 .bop_delete = NULL, 2376 .bop_clear = NULL, 2377 2378 .bop_propagate = nilfs_btree_propagate_gc, 2379 2380 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2381 2382 .bop_assign = nilfs_btree_assign_gc, 2383 .bop_mark = NULL, 2384 2385 .bop_seek_key = NULL, 2386 .bop_last_key = NULL, 2387 2388 .bop_check_insert = NULL, 2389 .bop_check_delete = NULL, 2390 .bop_gather_data = NULL, 2391 }; 2392 2393 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2394 { 2395 bmap->b_ops = &nilfs_btree_ops; 2396 bmap->b_nchildren_per_block = 2397 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2398 } 2399 2400 int nilfs_btree_init(struct nilfs_bmap *bmap) 2401 { 2402 int ret = 0; 2403 2404 __nilfs_btree_init(bmap); 2405 2406 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2407 ret = -EIO; 2408 else 2409 ret = nilfs_attach_btree_node_cache( 2410 &NILFS_BMAP_I(bmap)->vfs_inode); 2411 2412 return ret; 2413 } 2414 2415 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2416 { 2417 bmap->b_ops = &nilfs_btree_ops_gc; 2418 bmap->b_nchildren_per_block = 2419 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2420 } 2421