1 /* 2 * Copyright (C) 2011 Fujitsu. All rights reserved. 3 * Written by Miao Xie <miaox@cn.fujitsu.com> 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public 15 * License along with this program; if not, write to the 16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 * Boston, MA 021110-1307, USA. 18 */ 19 20 #include <linux/slab.h> 21 #include "delayed-inode.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "ctree.h" 25 26 #define BTRFS_DELAYED_WRITEBACK 512 27 #define BTRFS_DELAYED_BACKGROUND 128 28 #define BTRFS_DELAYED_BATCH 16 29 30 static struct kmem_cache *delayed_node_cache; 31 32 int __init btrfs_delayed_inode_init(void) 33 { 34 delayed_node_cache = kmem_cache_create("btrfs_delayed_node", 35 sizeof(struct btrfs_delayed_node), 36 0, 37 SLAB_MEM_SPREAD, 38 NULL); 39 if (!delayed_node_cache) 40 return -ENOMEM; 41 return 0; 42 } 43 44 void btrfs_delayed_inode_exit(void) 45 { 46 kmem_cache_destroy(delayed_node_cache); 47 } 48 49 static inline void btrfs_init_delayed_node( 50 struct btrfs_delayed_node *delayed_node, 51 struct btrfs_root *root, u64 inode_id) 52 { 53 delayed_node->root = root; 54 delayed_node->inode_id = inode_id; 55 refcount_set(&delayed_node->refs, 0); 56 delayed_node->ins_root = RB_ROOT; 57 delayed_node->del_root = RB_ROOT; 58 mutex_init(&delayed_node->mutex); 59 INIT_LIST_HEAD(&delayed_node->n_list); 60 INIT_LIST_HEAD(&delayed_node->p_list); 61 } 62 63 static inline int btrfs_is_continuous_delayed_item( 64 struct btrfs_delayed_item *item1, 65 struct btrfs_delayed_item *item2) 66 { 67 if (item1->key.type == BTRFS_DIR_INDEX_KEY && 68 item1->key.objectid == item2->key.objectid && 69 item1->key.type == item2->key.type && 70 item1->key.offset + 1 == item2->key.offset) 71 return 1; 72 return 0; 73 } 74 75 static struct btrfs_delayed_node *btrfs_get_delayed_node( 76 struct btrfs_inode *btrfs_inode) 77 { 78 struct btrfs_root *root = btrfs_inode->root; 79 u64 ino = btrfs_ino(btrfs_inode); 80 struct btrfs_delayed_node *node; 81 82 node = READ_ONCE(btrfs_inode->delayed_node); 83 if (node) { 84 refcount_inc(&node->refs); 85 return node; 86 } 87 88 spin_lock(&root->inode_lock); 89 node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 90 91 if (node) { 92 if (btrfs_inode->delayed_node) { 93 refcount_inc(&node->refs); /* can be accessed */ 94 BUG_ON(btrfs_inode->delayed_node != node); 95 spin_unlock(&root->inode_lock); 96 return node; 97 } 98 99 /* 100 * It's possible that we're racing into the middle of removing 101 * this node from the radix tree. In this case, the refcount 102 * was zero and it should never go back to one. Just return 103 * NULL like it was never in the radix at all; our release 104 * function is in the process of removing it. 105 * 106 * Some implementations of refcount_inc refuse to bump the 107 * refcount once it has hit zero. If we don't do this dance 108 * here, refcount_inc() may decide to just WARN_ONCE() instead 109 * of actually bumping the refcount. 110 * 111 * If this node is properly in the radix, we want to bump the 112 * refcount twice, once for the inode and once for this get 113 * operation. 114 */ 115 if (refcount_inc_not_zero(&node->refs)) { 116 refcount_inc(&node->refs); 117 btrfs_inode->delayed_node = node; 118 } else { 119 node = NULL; 120 } 121 122 spin_unlock(&root->inode_lock); 123 return node; 124 } 125 spin_unlock(&root->inode_lock); 126 127 return NULL; 128 } 129 130 /* Will return either the node or PTR_ERR(-ENOMEM) */ 131 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 132 struct btrfs_inode *btrfs_inode) 133 { 134 struct btrfs_delayed_node *node; 135 struct btrfs_root *root = btrfs_inode->root; 136 u64 ino = btrfs_ino(btrfs_inode); 137 int ret; 138 139 again: 140 node = btrfs_get_delayed_node(btrfs_inode); 141 if (node) 142 return node; 143 144 node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); 145 if (!node) 146 return ERR_PTR(-ENOMEM); 147 btrfs_init_delayed_node(node, root, ino); 148 149 /* cached in the btrfs inode and can be accessed */ 150 refcount_set(&node->refs, 2); 151 152 ret = radix_tree_preload(GFP_NOFS); 153 if (ret) { 154 kmem_cache_free(delayed_node_cache, node); 155 return ERR_PTR(ret); 156 } 157 158 spin_lock(&root->inode_lock); 159 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); 160 if (ret == -EEXIST) { 161 spin_unlock(&root->inode_lock); 162 kmem_cache_free(delayed_node_cache, node); 163 radix_tree_preload_end(); 164 goto again; 165 } 166 btrfs_inode->delayed_node = node; 167 spin_unlock(&root->inode_lock); 168 radix_tree_preload_end(); 169 170 return node; 171 } 172 173 /* 174 * Call it when holding delayed_node->mutex 175 * 176 * If mod = 1, add this node into the prepared list. 177 */ 178 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root, 179 struct btrfs_delayed_node *node, 180 int mod) 181 { 182 spin_lock(&root->lock); 183 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 184 if (!list_empty(&node->p_list)) 185 list_move_tail(&node->p_list, &root->prepare_list); 186 else if (mod) 187 list_add_tail(&node->p_list, &root->prepare_list); 188 } else { 189 list_add_tail(&node->n_list, &root->node_list); 190 list_add_tail(&node->p_list, &root->prepare_list); 191 refcount_inc(&node->refs); /* inserted into list */ 192 root->nodes++; 193 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); 194 } 195 spin_unlock(&root->lock); 196 } 197 198 /* Call it when holding delayed_node->mutex */ 199 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, 200 struct btrfs_delayed_node *node) 201 { 202 spin_lock(&root->lock); 203 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 204 root->nodes--; 205 refcount_dec(&node->refs); /* not in the list */ 206 list_del_init(&node->n_list); 207 if (!list_empty(&node->p_list)) 208 list_del_init(&node->p_list); 209 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); 210 } 211 spin_unlock(&root->lock); 212 } 213 214 static struct btrfs_delayed_node *btrfs_first_delayed_node( 215 struct btrfs_delayed_root *delayed_root) 216 { 217 struct list_head *p; 218 struct btrfs_delayed_node *node = NULL; 219 220 spin_lock(&delayed_root->lock); 221 if (list_empty(&delayed_root->node_list)) 222 goto out; 223 224 p = delayed_root->node_list.next; 225 node = list_entry(p, struct btrfs_delayed_node, n_list); 226 refcount_inc(&node->refs); 227 out: 228 spin_unlock(&delayed_root->lock); 229 230 return node; 231 } 232 233 static struct btrfs_delayed_node *btrfs_next_delayed_node( 234 struct btrfs_delayed_node *node) 235 { 236 struct btrfs_delayed_root *delayed_root; 237 struct list_head *p; 238 struct btrfs_delayed_node *next = NULL; 239 240 delayed_root = node->root->fs_info->delayed_root; 241 spin_lock(&delayed_root->lock); 242 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 243 /* not in the list */ 244 if (list_empty(&delayed_root->node_list)) 245 goto out; 246 p = delayed_root->node_list.next; 247 } else if (list_is_last(&node->n_list, &delayed_root->node_list)) 248 goto out; 249 else 250 p = node->n_list.next; 251 252 next = list_entry(p, struct btrfs_delayed_node, n_list); 253 refcount_inc(&next->refs); 254 out: 255 spin_unlock(&delayed_root->lock); 256 257 return next; 258 } 259 260 static void __btrfs_release_delayed_node( 261 struct btrfs_delayed_node *delayed_node, 262 int mod) 263 { 264 struct btrfs_delayed_root *delayed_root; 265 266 if (!delayed_node) 267 return; 268 269 delayed_root = delayed_node->root->fs_info->delayed_root; 270 271 mutex_lock(&delayed_node->mutex); 272 if (delayed_node->count) 273 btrfs_queue_delayed_node(delayed_root, delayed_node, mod); 274 else 275 btrfs_dequeue_delayed_node(delayed_root, delayed_node); 276 mutex_unlock(&delayed_node->mutex); 277 278 if (refcount_dec_and_test(&delayed_node->refs)) { 279 struct btrfs_root *root = delayed_node->root; 280 281 spin_lock(&root->inode_lock); 282 /* 283 * Once our refcount goes to zero, nobody is allowed to bump it 284 * back up. We can delete it now. 285 */ 286 ASSERT(refcount_read(&delayed_node->refs) == 0); 287 radix_tree_delete(&root->delayed_nodes_tree, 288 delayed_node->inode_id); 289 spin_unlock(&root->inode_lock); 290 kmem_cache_free(delayed_node_cache, delayed_node); 291 } 292 } 293 294 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) 295 { 296 __btrfs_release_delayed_node(node, 0); 297 } 298 299 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( 300 struct btrfs_delayed_root *delayed_root) 301 { 302 struct list_head *p; 303 struct btrfs_delayed_node *node = NULL; 304 305 spin_lock(&delayed_root->lock); 306 if (list_empty(&delayed_root->prepare_list)) 307 goto out; 308 309 p = delayed_root->prepare_list.next; 310 list_del_init(p); 311 node = list_entry(p, struct btrfs_delayed_node, p_list); 312 refcount_inc(&node->refs); 313 out: 314 spin_unlock(&delayed_root->lock); 315 316 return node; 317 } 318 319 static inline void btrfs_release_prepared_delayed_node( 320 struct btrfs_delayed_node *node) 321 { 322 __btrfs_release_delayed_node(node, 1); 323 } 324 325 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) 326 { 327 struct btrfs_delayed_item *item; 328 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); 329 if (item) { 330 item->data_len = data_len; 331 item->ins_or_del = 0; 332 item->bytes_reserved = 0; 333 item->delayed_node = NULL; 334 refcount_set(&item->refs, 1); 335 } 336 return item; 337 } 338 339 /* 340 * __btrfs_lookup_delayed_item - look up the delayed item by key 341 * @delayed_node: pointer to the delayed node 342 * @key: the key to look up 343 * @prev: used to store the prev item if the right item isn't found 344 * @next: used to store the next item if the right item isn't found 345 * 346 * Note: if we don't find the right item, we will return the prev item and 347 * the next item. 348 */ 349 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( 350 struct rb_root *root, 351 struct btrfs_key *key, 352 struct btrfs_delayed_item **prev, 353 struct btrfs_delayed_item **next) 354 { 355 struct rb_node *node, *prev_node = NULL; 356 struct btrfs_delayed_item *delayed_item = NULL; 357 int ret = 0; 358 359 node = root->rb_node; 360 361 while (node) { 362 delayed_item = rb_entry(node, struct btrfs_delayed_item, 363 rb_node); 364 prev_node = node; 365 ret = btrfs_comp_cpu_keys(&delayed_item->key, key); 366 if (ret < 0) 367 node = node->rb_right; 368 else if (ret > 0) 369 node = node->rb_left; 370 else 371 return delayed_item; 372 } 373 374 if (prev) { 375 if (!prev_node) 376 *prev = NULL; 377 else if (ret < 0) 378 *prev = delayed_item; 379 else if ((node = rb_prev(prev_node)) != NULL) { 380 *prev = rb_entry(node, struct btrfs_delayed_item, 381 rb_node); 382 } else 383 *prev = NULL; 384 } 385 386 if (next) { 387 if (!prev_node) 388 *next = NULL; 389 else if (ret > 0) 390 *next = delayed_item; 391 else if ((node = rb_next(prev_node)) != NULL) { 392 *next = rb_entry(node, struct btrfs_delayed_item, 393 rb_node); 394 } else 395 *next = NULL; 396 } 397 return NULL; 398 } 399 400 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( 401 struct btrfs_delayed_node *delayed_node, 402 struct btrfs_key *key) 403 { 404 return __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, 405 NULL, NULL); 406 } 407 408 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, 409 struct btrfs_delayed_item *ins, 410 int action) 411 { 412 struct rb_node **p, *node; 413 struct rb_node *parent_node = NULL; 414 struct rb_root *root; 415 struct btrfs_delayed_item *item; 416 int cmp; 417 418 if (action == BTRFS_DELAYED_INSERTION_ITEM) 419 root = &delayed_node->ins_root; 420 else if (action == BTRFS_DELAYED_DELETION_ITEM) 421 root = &delayed_node->del_root; 422 else 423 BUG(); 424 p = &root->rb_node; 425 node = &ins->rb_node; 426 427 while (*p) { 428 parent_node = *p; 429 item = rb_entry(parent_node, struct btrfs_delayed_item, 430 rb_node); 431 432 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key); 433 if (cmp < 0) 434 p = &(*p)->rb_right; 435 else if (cmp > 0) 436 p = &(*p)->rb_left; 437 else 438 return -EEXIST; 439 } 440 441 rb_link_node(node, parent_node, p); 442 rb_insert_color(node, root); 443 ins->delayed_node = delayed_node; 444 ins->ins_or_del = action; 445 446 if (ins->key.type == BTRFS_DIR_INDEX_KEY && 447 action == BTRFS_DELAYED_INSERTION_ITEM && 448 ins->key.offset >= delayed_node->index_cnt) 449 delayed_node->index_cnt = ins->key.offset + 1; 450 451 delayed_node->count++; 452 atomic_inc(&delayed_node->root->fs_info->delayed_root->items); 453 return 0; 454 } 455 456 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node, 457 struct btrfs_delayed_item *item) 458 { 459 return __btrfs_add_delayed_item(node, item, 460 BTRFS_DELAYED_INSERTION_ITEM); 461 } 462 463 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, 464 struct btrfs_delayed_item *item) 465 { 466 return __btrfs_add_delayed_item(node, item, 467 BTRFS_DELAYED_DELETION_ITEM); 468 } 469 470 static void finish_one_item(struct btrfs_delayed_root *delayed_root) 471 { 472 int seq = atomic_inc_return(&delayed_root->items_seq); 473 474 /* 475 * atomic_dec_return implies a barrier for waitqueue_active 476 */ 477 if ((atomic_dec_return(&delayed_root->items) < 478 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && 479 waitqueue_active(&delayed_root->wait)) 480 wake_up(&delayed_root->wait); 481 } 482 483 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) 484 { 485 struct rb_root *root; 486 struct btrfs_delayed_root *delayed_root; 487 488 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; 489 490 BUG_ON(!delayed_root); 491 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM && 492 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM); 493 494 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM) 495 root = &delayed_item->delayed_node->ins_root; 496 else 497 root = &delayed_item->delayed_node->del_root; 498 499 rb_erase(&delayed_item->rb_node, root); 500 delayed_item->delayed_node->count--; 501 502 finish_one_item(delayed_root); 503 } 504 505 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) 506 { 507 if (item) { 508 __btrfs_remove_delayed_item(item); 509 if (refcount_dec_and_test(&item->refs)) 510 kfree(item); 511 } 512 } 513 514 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( 515 struct btrfs_delayed_node *delayed_node) 516 { 517 struct rb_node *p; 518 struct btrfs_delayed_item *item = NULL; 519 520 p = rb_first(&delayed_node->ins_root); 521 if (p) 522 item = rb_entry(p, struct btrfs_delayed_item, rb_node); 523 524 return item; 525 } 526 527 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( 528 struct btrfs_delayed_node *delayed_node) 529 { 530 struct rb_node *p; 531 struct btrfs_delayed_item *item = NULL; 532 533 p = rb_first(&delayed_node->del_root); 534 if (p) 535 item = rb_entry(p, struct btrfs_delayed_item, rb_node); 536 537 return item; 538 } 539 540 static struct btrfs_delayed_item *__btrfs_next_delayed_item( 541 struct btrfs_delayed_item *item) 542 { 543 struct rb_node *p; 544 struct btrfs_delayed_item *next = NULL; 545 546 p = rb_next(&item->rb_node); 547 if (p) 548 next = rb_entry(p, struct btrfs_delayed_item, rb_node); 549 550 return next; 551 } 552 553 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, 554 struct btrfs_fs_info *fs_info, 555 struct btrfs_delayed_item *item) 556 { 557 struct btrfs_block_rsv *src_rsv; 558 struct btrfs_block_rsv *dst_rsv; 559 u64 num_bytes; 560 int ret; 561 562 if (!trans->bytes_reserved) 563 return 0; 564 565 src_rsv = trans->block_rsv; 566 dst_rsv = &fs_info->delayed_block_rsv; 567 568 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 569 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); 570 if (!ret) { 571 trace_btrfs_space_reservation(fs_info, "delayed_item", 572 item->key.objectid, 573 num_bytes, 1); 574 item->bytes_reserved = num_bytes; 575 } 576 577 return ret; 578 } 579 580 static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info, 581 struct btrfs_delayed_item *item) 582 { 583 struct btrfs_block_rsv *rsv; 584 585 if (!item->bytes_reserved) 586 return; 587 588 rsv = &fs_info->delayed_block_rsv; 589 trace_btrfs_space_reservation(fs_info, "delayed_item", 590 item->key.objectid, item->bytes_reserved, 591 0); 592 btrfs_block_rsv_release(fs_info, rsv, 593 item->bytes_reserved); 594 } 595 596 static int btrfs_delayed_inode_reserve_metadata( 597 struct btrfs_trans_handle *trans, 598 struct btrfs_root *root, 599 struct btrfs_inode *inode, 600 struct btrfs_delayed_node *node) 601 { 602 struct btrfs_fs_info *fs_info = root->fs_info; 603 struct btrfs_block_rsv *src_rsv; 604 struct btrfs_block_rsv *dst_rsv; 605 u64 num_bytes; 606 int ret; 607 608 src_rsv = trans->block_rsv; 609 dst_rsv = &fs_info->delayed_block_rsv; 610 611 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); 612 613 /* 614 * btrfs_dirty_inode will update the inode under btrfs_join_transaction 615 * which doesn't reserve space for speed. This is a problem since we 616 * still need to reserve space for this update, so try to reserve the 617 * space. 618 * 619 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 620 * we always reserve enough to update the inode item. 621 */ 622 if (!src_rsv || (!trans->bytes_reserved && 623 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { 624 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, 625 BTRFS_RESERVE_NO_FLUSH); 626 /* 627 * Since we're under a transaction reserve_metadata_bytes could 628 * try to commit the transaction which will make it return 629 * EAGAIN to make us stop the transaction we have, so return 630 * ENOSPC instead so that btrfs_dirty_inode knows what to do. 631 */ 632 if (ret == -EAGAIN) 633 ret = -ENOSPC; 634 if (!ret) { 635 node->bytes_reserved = num_bytes; 636 trace_btrfs_space_reservation(fs_info, 637 "delayed_inode", 638 btrfs_ino(inode), 639 num_bytes, 1); 640 } 641 return ret; 642 } 643 644 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); 645 if (!ret) { 646 trace_btrfs_space_reservation(fs_info, "delayed_inode", 647 btrfs_ino(inode), num_bytes, 1); 648 node->bytes_reserved = num_bytes; 649 } 650 651 return ret; 652 } 653 654 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info, 655 struct btrfs_delayed_node *node) 656 { 657 struct btrfs_block_rsv *rsv; 658 659 if (!node->bytes_reserved) 660 return; 661 662 rsv = &fs_info->delayed_block_rsv; 663 trace_btrfs_space_reservation(fs_info, "delayed_inode", 664 node->inode_id, node->bytes_reserved, 0); 665 btrfs_block_rsv_release(fs_info, rsv, 666 node->bytes_reserved); 667 node->bytes_reserved = 0; 668 } 669 670 /* 671 * This helper will insert some continuous items into the same leaf according 672 * to the free space of the leaf. 673 */ 674 static int btrfs_batch_insert_items(struct btrfs_root *root, 675 struct btrfs_path *path, 676 struct btrfs_delayed_item *item) 677 { 678 struct btrfs_fs_info *fs_info = root->fs_info; 679 struct btrfs_delayed_item *curr, *next; 680 int free_space; 681 int total_data_size = 0, total_size = 0; 682 struct extent_buffer *leaf; 683 char *data_ptr; 684 struct btrfs_key *keys; 685 u32 *data_size; 686 struct list_head head; 687 int slot; 688 int nitems; 689 int i; 690 int ret = 0; 691 692 BUG_ON(!path->nodes[0]); 693 694 leaf = path->nodes[0]; 695 free_space = btrfs_leaf_free_space(fs_info, leaf); 696 INIT_LIST_HEAD(&head); 697 698 next = item; 699 nitems = 0; 700 701 /* 702 * count the number of the continuous items that we can insert in batch 703 */ 704 while (total_size + next->data_len + sizeof(struct btrfs_item) <= 705 free_space) { 706 total_data_size += next->data_len; 707 total_size += next->data_len + sizeof(struct btrfs_item); 708 list_add_tail(&next->tree_list, &head); 709 nitems++; 710 711 curr = next; 712 next = __btrfs_next_delayed_item(curr); 713 if (!next) 714 break; 715 716 if (!btrfs_is_continuous_delayed_item(curr, next)) 717 break; 718 } 719 720 if (!nitems) { 721 ret = 0; 722 goto out; 723 } 724 725 /* 726 * we need allocate some memory space, but it might cause the task 727 * to sleep, so we set all locked nodes in the path to blocking locks 728 * first. 729 */ 730 btrfs_set_path_blocking(path); 731 732 keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS); 733 if (!keys) { 734 ret = -ENOMEM; 735 goto out; 736 } 737 738 data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS); 739 if (!data_size) { 740 ret = -ENOMEM; 741 goto error; 742 } 743 744 /* get keys of all the delayed items */ 745 i = 0; 746 list_for_each_entry(next, &head, tree_list) { 747 keys[i] = next->key; 748 data_size[i] = next->data_len; 749 i++; 750 } 751 752 /* reset all the locked nodes in the patch to spinning locks. */ 753 btrfs_clear_path_blocking(path, NULL, 0); 754 755 /* insert the keys of the items */ 756 setup_items_for_insert(root, path, keys, data_size, 757 total_data_size, total_size, nitems); 758 759 /* insert the dir index items */ 760 slot = path->slots[0]; 761 list_for_each_entry_safe(curr, next, &head, tree_list) { 762 data_ptr = btrfs_item_ptr(leaf, slot, char); 763 write_extent_buffer(leaf, &curr->data, 764 (unsigned long)data_ptr, 765 curr->data_len); 766 slot++; 767 768 btrfs_delayed_item_release_metadata(fs_info, curr); 769 770 list_del(&curr->tree_list); 771 btrfs_release_delayed_item(curr); 772 } 773 774 error: 775 kfree(data_size); 776 kfree(keys); 777 out: 778 return ret; 779 } 780 781 /* 782 * This helper can just do simple insertion that needn't extend item for new 783 * data, such as directory name index insertion, inode insertion. 784 */ 785 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, 786 struct btrfs_root *root, 787 struct btrfs_path *path, 788 struct btrfs_delayed_item *delayed_item) 789 { 790 struct btrfs_fs_info *fs_info = root->fs_info; 791 struct extent_buffer *leaf; 792 char *ptr; 793 int ret; 794 795 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, 796 delayed_item->data_len); 797 if (ret < 0 && ret != -EEXIST) 798 return ret; 799 800 leaf = path->nodes[0]; 801 802 ptr = btrfs_item_ptr(leaf, path->slots[0], char); 803 804 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, 805 delayed_item->data_len); 806 btrfs_mark_buffer_dirty(leaf); 807 808 btrfs_delayed_item_release_metadata(fs_info, delayed_item); 809 return 0; 810 } 811 812 /* 813 * we insert an item first, then if there are some continuous items, we try 814 * to insert those items into the same leaf. 815 */ 816 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans, 817 struct btrfs_path *path, 818 struct btrfs_root *root, 819 struct btrfs_delayed_node *node) 820 { 821 struct btrfs_delayed_item *curr, *prev; 822 int ret = 0; 823 824 do_again: 825 mutex_lock(&node->mutex); 826 curr = __btrfs_first_delayed_insertion_item(node); 827 if (!curr) 828 goto insert_end; 829 830 ret = btrfs_insert_delayed_item(trans, root, path, curr); 831 if (ret < 0) { 832 btrfs_release_path(path); 833 goto insert_end; 834 } 835 836 prev = curr; 837 curr = __btrfs_next_delayed_item(prev); 838 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) { 839 /* insert the continuous items into the same leaf */ 840 path->slots[0]++; 841 btrfs_batch_insert_items(root, path, curr); 842 } 843 btrfs_release_delayed_item(prev); 844 btrfs_mark_buffer_dirty(path->nodes[0]); 845 846 btrfs_release_path(path); 847 mutex_unlock(&node->mutex); 848 goto do_again; 849 850 insert_end: 851 mutex_unlock(&node->mutex); 852 return ret; 853 } 854 855 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans, 856 struct btrfs_root *root, 857 struct btrfs_path *path, 858 struct btrfs_delayed_item *item) 859 { 860 struct btrfs_fs_info *fs_info = root->fs_info; 861 struct btrfs_delayed_item *curr, *next; 862 struct extent_buffer *leaf; 863 struct btrfs_key key; 864 struct list_head head; 865 int nitems, i, last_item; 866 int ret = 0; 867 868 BUG_ON(!path->nodes[0]); 869 870 leaf = path->nodes[0]; 871 872 i = path->slots[0]; 873 last_item = btrfs_header_nritems(leaf) - 1; 874 if (i > last_item) 875 return -ENOENT; /* FIXME: Is errno suitable? */ 876 877 next = item; 878 INIT_LIST_HEAD(&head); 879 btrfs_item_key_to_cpu(leaf, &key, i); 880 nitems = 0; 881 /* 882 * count the number of the dir index items that we can delete in batch 883 */ 884 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) { 885 list_add_tail(&next->tree_list, &head); 886 nitems++; 887 888 curr = next; 889 next = __btrfs_next_delayed_item(curr); 890 if (!next) 891 break; 892 893 if (!btrfs_is_continuous_delayed_item(curr, next)) 894 break; 895 896 i++; 897 if (i > last_item) 898 break; 899 btrfs_item_key_to_cpu(leaf, &key, i); 900 } 901 902 if (!nitems) 903 return 0; 904 905 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems); 906 if (ret) 907 goto out; 908 909 list_for_each_entry_safe(curr, next, &head, tree_list) { 910 btrfs_delayed_item_release_metadata(fs_info, curr); 911 list_del(&curr->tree_list); 912 btrfs_release_delayed_item(curr); 913 } 914 915 out: 916 return ret; 917 } 918 919 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, 920 struct btrfs_path *path, 921 struct btrfs_root *root, 922 struct btrfs_delayed_node *node) 923 { 924 struct btrfs_delayed_item *curr, *prev; 925 int ret = 0; 926 927 do_again: 928 mutex_lock(&node->mutex); 929 curr = __btrfs_first_delayed_deletion_item(node); 930 if (!curr) 931 goto delete_fail; 932 933 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); 934 if (ret < 0) 935 goto delete_fail; 936 else if (ret > 0) { 937 /* 938 * can't find the item which the node points to, so this node 939 * is invalid, just drop it. 940 */ 941 prev = curr; 942 curr = __btrfs_next_delayed_item(prev); 943 btrfs_release_delayed_item(prev); 944 ret = 0; 945 btrfs_release_path(path); 946 if (curr) { 947 mutex_unlock(&node->mutex); 948 goto do_again; 949 } else 950 goto delete_fail; 951 } 952 953 btrfs_batch_delete_items(trans, root, path, curr); 954 btrfs_release_path(path); 955 mutex_unlock(&node->mutex); 956 goto do_again; 957 958 delete_fail: 959 btrfs_release_path(path); 960 mutex_unlock(&node->mutex); 961 return ret; 962 } 963 964 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) 965 { 966 struct btrfs_delayed_root *delayed_root; 967 968 if (delayed_node && 969 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 970 BUG_ON(!delayed_node->root); 971 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); 972 delayed_node->count--; 973 974 delayed_root = delayed_node->root->fs_info->delayed_root; 975 finish_one_item(delayed_root); 976 } 977 } 978 979 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node) 980 { 981 struct btrfs_delayed_root *delayed_root; 982 983 ASSERT(delayed_node->root); 984 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); 985 delayed_node->count--; 986 987 delayed_root = delayed_node->root->fs_info->delayed_root; 988 finish_one_item(delayed_root); 989 } 990 991 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 992 struct btrfs_root *root, 993 struct btrfs_path *path, 994 struct btrfs_delayed_node *node) 995 { 996 struct btrfs_fs_info *fs_info = root->fs_info; 997 struct btrfs_key key; 998 struct btrfs_inode_item *inode_item; 999 struct extent_buffer *leaf; 1000 int mod; 1001 int ret; 1002 1003 key.objectid = node->inode_id; 1004 key.type = BTRFS_INODE_ITEM_KEY; 1005 key.offset = 0; 1006 1007 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) 1008 mod = -1; 1009 else 1010 mod = 1; 1011 1012 ret = btrfs_lookup_inode(trans, root, path, &key, mod); 1013 if (ret > 0) { 1014 btrfs_release_path(path); 1015 return -ENOENT; 1016 } else if (ret < 0) { 1017 return ret; 1018 } 1019 1020 leaf = path->nodes[0]; 1021 inode_item = btrfs_item_ptr(leaf, path->slots[0], 1022 struct btrfs_inode_item); 1023 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, 1024 sizeof(struct btrfs_inode_item)); 1025 btrfs_mark_buffer_dirty(leaf); 1026 1027 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) 1028 goto no_iref; 1029 1030 path->slots[0]++; 1031 if (path->slots[0] >= btrfs_header_nritems(leaf)) 1032 goto search; 1033 again: 1034 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1035 if (key.objectid != node->inode_id) 1036 goto out; 1037 1038 if (key.type != BTRFS_INODE_REF_KEY && 1039 key.type != BTRFS_INODE_EXTREF_KEY) 1040 goto out; 1041 1042 /* 1043 * Delayed iref deletion is for the inode who has only one link, 1044 * so there is only one iref. The case that several irefs are 1045 * in the same item doesn't exist. 1046 */ 1047 btrfs_del_item(trans, root, path); 1048 out: 1049 btrfs_release_delayed_iref(node); 1050 no_iref: 1051 btrfs_release_path(path); 1052 err_out: 1053 btrfs_delayed_inode_release_metadata(fs_info, node); 1054 btrfs_release_delayed_inode(node); 1055 1056 return ret; 1057 1058 search: 1059 btrfs_release_path(path); 1060 1061 key.type = BTRFS_INODE_EXTREF_KEY; 1062 key.offset = -1; 1063 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1064 if (ret < 0) 1065 goto err_out; 1066 ASSERT(ret); 1067 1068 ret = 0; 1069 leaf = path->nodes[0]; 1070 path->slots[0]--; 1071 goto again; 1072 } 1073 1074 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1075 struct btrfs_root *root, 1076 struct btrfs_path *path, 1077 struct btrfs_delayed_node *node) 1078 { 1079 int ret; 1080 1081 mutex_lock(&node->mutex); 1082 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) { 1083 mutex_unlock(&node->mutex); 1084 return 0; 1085 } 1086 1087 ret = __btrfs_update_delayed_inode(trans, root, path, node); 1088 mutex_unlock(&node->mutex); 1089 return ret; 1090 } 1091 1092 static inline int 1093 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1094 struct btrfs_path *path, 1095 struct btrfs_delayed_node *node) 1096 { 1097 int ret; 1098 1099 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1100 if (ret) 1101 return ret; 1102 1103 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1104 if (ret) 1105 return ret; 1106 1107 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1108 return ret; 1109 } 1110 1111 /* 1112 * Called when committing the transaction. 1113 * Returns 0 on success. 1114 * Returns < 0 on error and returns with an aborted transaction with any 1115 * outstanding delayed items cleaned up. 1116 */ 1117 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1118 struct btrfs_fs_info *fs_info, int nr) 1119 { 1120 struct btrfs_delayed_root *delayed_root; 1121 struct btrfs_delayed_node *curr_node, *prev_node; 1122 struct btrfs_path *path; 1123 struct btrfs_block_rsv *block_rsv; 1124 int ret = 0; 1125 bool count = (nr > 0); 1126 1127 if (trans->aborted) 1128 return -EIO; 1129 1130 path = btrfs_alloc_path(); 1131 if (!path) 1132 return -ENOMEM; 1133 path->leave_spinning = 1; 1134 1135 block_rsv = trans->block_rsv; 1136 trans->block_rsv = &fs_info->delayed_block_rsv; 1137 1138 delayed_root = fs_info->delayed_root; 1139 1140 curr_node = btrfs_first_delayed_node(delayed_root); 1141 while (curr_node && (!count || (count && nr--))) { 1142 ret = __btrfs_commit_inode_delayed_items(trans, path, 1143 curr_node); 1144 if (ret) { 1145 btrfs_release_delayed_node(curr_node); 1146 curr_node = NULL; 1147 btrfs_abort_transaction(trans, ret); 1148 break; 1149 } 1150 1151 prev_node = curr_node; 1152 curr_node = btrfs_next_delayed_node(curr_node); 1153 btrfs_release_delayed_node(prev_node); 1154 } 1155 1156 if (curr_node) 1157 btrfs_release_delayed_node(curr_node); 1158 btrfs_free_path(path); 1159 trans->block_rsv = block_rsv; 1160 1161 return ret; 1162 } 1163 1164 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1165 struct btrfs_fs_info *fs_info) 1166 { 1167 return __btrfs_run_delayed_items(trans, fs_info, -1); 1168 } 1169 1170 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, 1171 struct btrfs_fs_info *fs_info, int nr) 1172 { 1173 return __btrfs_run_delayed_items(trans, fs_info, nr); 1174 } 1175 1176 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1177 struct btrfs_inode *inode) 1178 { 1179 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1180 struct btrfs_path *path; 1181 struct btrfs_block_rsv *block_rsv; 1182 int ret; 1183 1184 if (!delayed_node) 1185 return 0; 1186 1187 mutex_lock(&delayed_node->mutex); 1188 if (!delayed_node->count) { 1189 mutex_unlock(&delayed_node->mutex); 1190 btrfs_release_delayed_node(delayed_node); 1191 return 0; 1192 } 1193 mutex_unlock(&delayed_node->mutex); 1194 1195 path = btrfs_alloc_path(); 1196 if (!path) { 1197 btrfs_release_delayed_node(delayed_node); 1198 return -ENOMEM; 1199 } 1200 path->leave_spinning = 1; 1201 1202 block_rsv = trans->block_rsv; 1203 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; 1204 1205 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1206 1207 btrfs_release_delayed_node(delayed_node); 1208 btrfs_free_path(path); 1209 trans->block_rsv = block_rsv; 1210 1211 return ret; 1212 } 1213 1214 int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode) 1215 { 1216 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1217 struct btrfs_trans_handle *trans; 1218 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1219 struct btrfs_path *path; 1220 struct btrfs_block_rsv *block_rsv; 1221 int ret; 1222 1223 if (!delayed_node) 1224 return 0; 1225 1226 mutex_lock(&delayed_node->mutex); 1227 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1228 mutex_unlock(&delayed_node->mutex); 1229 btrfs_release_delayed_node(delayed_node); 1230 return 0; 1231 } 1232 mutex_unlock(&delayed_node->mutex); 1233 1234 trans = btrfs_join_transaction(delayed_node->root); 1235 if (IS_ERR(trans)) { 1236 ret = PTR_ERR(trans); 1237 goto out; 1238 } 1239 1240 path = btrfs_alloc_path(); 1241 if (!path) { 1242 ret = -ENOMEM; 1243 goto trans_out; 1244 } 1245 path->leave_spinning = 1; 1246 1247 block_rsv = trans->block_rsv; 1248 trans->block_rsv = &fs_info->delayed_block_rsv; 1249 1250 mutex_lock(&delayed_node->mutex); 1251 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) 1252 ret = __btrfs_update_delayed_inode(trans, delayed_node->root, 1253 path, delayed_node); 1254 else 1255 ret = 0; 1256 mutex_unlock(&delayed_node->mutex); 1257 1258 btrfs_free_path(path); 1259 trans->block_rsv = block_rsv; 1260 trans_out: 1261 btrfs_end_transaction(trans); 1262 btrfs_btree_balance_dirty(fs_info); 1263 out: 1264 btrfs_release_delayed_node(delayed_node); 1265 1266 return ret; 1267 } 1268 1269 void btrfs_remove_delayed_node(struct btrfs_inode *inode) 1270 { 1271 struct btrfs_delayed_node *delayed_node; 1272 1273 delayed_node = READ_ONCE(inode->delayed_node); 1274 if (!delayed_node) 1275 return; 1276 1277 inode->delayed_node = NULL; 1278 btrfs_release_delayed_node(delayed_node); 1279 } 1280 1281 struct btrfs_async_delayed_work { 1282 struct btrfs_delayed_root *delayed_root; 1283 int nr; 1284 struct btrfs_work work; 1285 }; 1286 1287 static void btrfs_async_run_delayed_root(struct btrfs_work *work) 1288 { 1289 struct btrfs_async_delayed_work *async_work; 1290 struct btrfs_delayed_root *delayed_root; 1291 struct btrfs_trans_handle *trans; 1292 struct btrfs_path *path; 1293 struct btrfs_delayed_node *delayed_node = NULL; 1294 struct btrfs_root *root; 1295 struct btrfs_block_rsv *block_rsv; 1296 int total_done = 0; 1297 1298 async_work = container_of(work, struct btrfs_async_delayed_work, work); 1299 delayed_root = async_work->delayed_root; 1300 1301 path = btrfs_alloc_path(); 1302 if (!path) 1303 goto out; 1304 1305 again: 1306 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) 1307 goto free_path; 1308 1309 delayed_node = btrfs_first_prepared_delayed_node(delayed_root); 1310 if (!delayed_node) 1311 goto free_path; 1312 1313 path->leave_spinning = 1; 1314 root = delayed_node->root; 1315 1316 trans = btrfs_join_transaction(root); 1317 if (IS_ERR(trans)) 1318 goto release_path; 1319 1320 block_rsv = trans->block_rsv; 1321 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1322 1323 __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1324 1325 trans->block_rsv = block_rsv; 1326 btrfs_end_transaction(trans); 1327 btrfs_btree_balance_dirty_nodelay(root->fs_info); 1328 1329 release_path: 1330 btrfs_release_path(path); 1331 total_done++; 1332 1333 btrfs_release_prepared_delayed_node(delayed_node); 1334 if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || 1335 total_done < async_work->nr) 1336 goto again; 1337 1338 free_path: 1339 btrfs_free_path(path); 1340 out: 1341 wake_up(&delayed_root->wait); 1342 kfree(async_work); 1343 } 1344 1345 1346 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1347 struct btrfs_fs_info *fs_info, int nr) 1348 { 1349 struct btrfs_async_delayed_work *async_work; 1350 1351 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || 1352 btrfs_workqueue_normal_congested(fs_info->delayed_workers)) 1353 return 0; 1354 1355 async_work = kmalloc(sizeof(*async_work), GFP_NOFS); 1356 if (!async_work) 1357 return -ENOMEM; 1358 1359 async_work->delayed_root = delayed_root; 1360 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, 1361 btrfs_async_run_delayed_root, NULL, NULL); 1362 async_work->nr = nr; 1363 1364 btrfs_queue_work(fs_info->delayed_workers, &async_work->work); 1365 return 0; 1366 } 1367 1368 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) 1369 { 1370 WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); 1371 } 1372 1373 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) 1374 { 1375 int val = atomic_read(&delayed_root->items_seq); 1376 1377 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH) 1378 return 1; 1379 1380 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1381 return 1; 1382 1383 return 0; 1384 } 1385 1386 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) 1387 { 1388 struct btrfs_delayed_root *delayed_root = fs_info->delayed_root; 1389 1390 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1391 return; 1392 1393 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1394 int seq; 1395 int ret; 1396 1397 seq = atomic_read(&delayed_root->items_seq); 1398 1399 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0); 1400 if (ret) 1401 return; 1402 1403 wait_event_interruptible(delayed_root->wait, 1404 could_end_wait(delayed_root, seq)); 1405 return; 1406 } 1407 1408 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); 1409 } 1410 1411 /* Will return 0 or -ENOMEM */ 1412 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1413 struct btrfs_fs_info *fs_info, 1414 const char *name, int name_len, 1415 struct btrfs_inode *dir, 1416 struct btrfs_disk_key *disk_key, u8 type, 1417 u64 index) 1418 { 1419 struct btrfs_delayed_node *delayed_node; 1420 struct btrfs_delayed_item *delayed_item; 1421 struct btrfs_dir_item *dir_item; 1422 int ret; 1423 1424 delayed_node = btrfs_get_or_create_delayed_node(dir); 1425 if (IS_ERR(delayed_node)) 1426 return PTR_ERR(delayed_node); 1427 1428 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len); 1429 if (!delayed_item) { 1430 ret = -ENOMEM; 1431 goto release_node; 1432 } 1433 1434 delayed_item->key.objectid = btrfs_ino(dir); 1435 delayed_item->key.type = BTRFS_DIR_INDEX_KEY; 1436 delayed_item->key.offset = index; 1437 1438 dir_item = (struct btrfs_dir_item *)delayed_item->data; 1439 dir_item->location = *disk_key; 1440 btrfs_set_stack_dir_transid(dir_item, trans->transid); 1441 btrfs_set_stack_dir_data_len(dir_item, 0); 1442 btrfs_set_stack_dir_name_len(dir_item, name_len); 1443 btrfs_set_stack_dir_type(dir_item, type); 1444 memcpy((char *)(dir_item + 1), name, name_len); 1445 1446 ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item); 1447 /* 1448 * we have reserved enough space when we start a new transaction, 1449 * so reserving metadata failure is impossible 1450 */ 1451 BUG_ON(ret); 1452 1453 1454 mutex_lock(&delayed_node->mutex); 1455 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1456 if (unlikely(ret)) { 1457 btrfs_err(fs_info, 1458 "err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1459 name_len, name, delayed_node->root->objectid, 1460 delayed_node->inode_id, ret); 1461 BUG(); 1462 } 1463 mutex_unlock(&delayed_node->mutex); 1464 1465 release_node: 1466 btrfs_release_delayed_node(delayed_node); 1467 return ret; 1468 } 1469 1470 static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info, 1471 struct btrfs_delayed_node *node, 1472 struct btrfs_key *key) 1473 { 1474 struct btrfs_delayed_item *item; 1475 1476 mutex_lock(&node->mutex); 1477 item = __btrfs_lookup_delayed_insertion_item(node, key); 1478 if (!item) { 1479 mutex_unlock(&node->mutex); 1480 return 1; 1481 } 1482 1483 btrfs_delayed_item_release_metadata(fs_info, item); 1484 btrfs_release_delayed_item(item); 1485 mutex_unlock(&node->mutex); 1486 return 0; 1487 } 1488 1489 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 1490 struct btrfs_fs_info *fs_info, 1491 struct btrfs_inode *dir, u64 index) 1492 { 1493 struct btrfs_delayed_node *node; 1494 struct btrfs_delayed_item *item; 1495 struct btrfs_key item_key; 1496 int ret; 1497 1498 node = btrfs_get_or_create_delayed_node(dir); 1499 if (IS_ERR(node)) 1500 return PTR_ERR(node); 1501 1502 item_key.objectid = btrfs_ino(dir); 1503 item_key.type = BTRFS_DIR_INDEX_KEY; 1504 item_key.offset = index; 1505 1506 ret = btrfs_delete_delayed_insertion_item(fs_info, node, &item_key); 1507 if (!ret) 1508 goto end; 1509 1510 item = btrfs_alloc_delayed_item(0); 1511 if (!item) { 1512 ret = -ENOMEM; 1513 goto end; 1514 } 1515 1516 item->key = item_key; 1517 1518 ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item); 1519 /* 1520 * we have reserved enough space when we start a new transaction, 1521 * so reserving metadata failure is impossible. 1522 */ 1523 BUG_ON(ret); 1524 1525 mutex_lock(&node->mutex); 1526 ret = __btrfs_add_delayed_deletion_item(node, item); 1527 if (unlikely(ret)) { 1528 btrfs_err(fs_info, 1529 "err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)", 1530 index, node->root->objectid, node->inode_id, ret); 1531 BUG(); 1532 } 1533 mutex_unlock(&node->mutex); 1534 end: 1535 btrfs_release_delayed_node(node); 1536 return ret; 1537 } 1538 1539 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode) 1540 { 1541 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1542 1543 if (!delayed_node) 1544 return -ENOENT; 1545 1546 /* 1547 * Since we have held i_mutex of this directory, it is impossible that 1548 * a new directory index is added into the delayed node and index_cnt 1549 * is updated now. So we needn't lock the delayed node. 1550 */ 1551 if (!delayed_node->index_cnt) { 1552 btrfs_release_delayed_node(delayed_node); 1553 return -EINVAL; 1554 } 1555 1556 inode->index_cnt = delayed_node->index_cnt; 1557 btrfs_release_delayed_node(delayed_node); 1558 return 0; 1559 } 1560 1561 bool btrfs_readdir_get_delayed_items(struct inode *inode, 1562 struct list_head *ins_list, 1563 struct list_head *del_list) 1564 { 1565 struct btrfs_delayed_node *delayed_node; 1566 struct btrfs_delayed_item *item; 1567 1568 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); 1569 if (!delayed_node) 1570 return false; 1571 1572 /* 1573 * We can only do one readdir with delayed items at a time because of 1574 * item->readdir_list. 1575 */ 1576 inode_unlock_shared(inode); 1577 inode_lock(inode); 1578 1579 mutex_lock(&delayed_node->mutex); 1580 item = __btrfs_first_delayed_insertion_item(delayed_node); 1581 while (item) { 1582 refcount_inc(&item->refs); 1583 list_add_tail(&item->readdir_list, ins_list); 1584 item = __btrfs_next_delayed_item(item); 1585 } 1586 1587 item = __btrfs_first_delayed_deletion_item(delayed_node); 1588 while (item) { 1589 refcount_inc(&item->refs); 1590 list_add_tail(&item->readdir_list, del_list); 1591 item = __btrfs_next_delayed_item(item); 1592 } 1593 mutex_unlock(&delayed_node->mutex); 1594 /* 1595 * This delayed node is still cached in the btrfs inode, so refs 1596 * must be > 1 now, and we needn't check it is going to be freed 1597 * or not. 1598 * 1599 * Besides that, this function is used to read dir, we do not 1600 * insert/delete delayed items in this period. So we also needn't 1601 * requeue or dequeue this delayed node. 1602 */ 1603 refcount_dec(&delayed_node->refs); 1604 1605 return true; 1606 } 1607 1608 void btrfs_readdir_put_delayed_items(struct inode *inode, 1609 struct list_head *ins_list, 1610 struct list_head *del_list) 1611 { 1612 struct btrfs_delayed_item *curr, *next; 1613 1614 list_for_each_entry_safe(curr, next, ins_list, readdir_list) { 1615 list_del(&curr->readdir_list); 1616 if (refcount_dec_and_test(&curr->refs)) 1617 kfree(curr); 1618 } 1619 1620 list_for_each_entry_safe(curr, next, del_list, readdir_list) { 1621 list_del(&curr->readdir_list); 1622 if (refcount_dec_and_test(&curr->refs)) 1623 kfree(curr); 1624 } 1625 1626 /* 1627 * The VFS is going to do up_read(), so we need to downgrade back to a 1628 * read lock. 1629 */ 1630 downgrade_write(&inode->i_rwsem); 1631 } 1632 1633 int btrfs_should_delete_dir_index(struct list_head *del_list, 1634 u64 index) 1635 { 1636 struct btrfs_delayed_item *curr; 1637 int ret = 0; 1638 1639 list_for_each_entry(curr, del_list, readdir_list) { 1640 if (curr->key.offset > index) 1641 break; 1642 if (curr->key.offset == index) { 1643 ret = 1; 1644 break; 1645 } 1646 } 1647 return ret; 1648 } 1649 1650 /* 1651 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree 1652 * 1653 */ 1654 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 1655 struct list_head *ins_list) 1656 { 1657 struct btrfs_dir_item *di; 1658 struct btrfs_delayed_item *curr, *next; 1659 struct btrfs_key location; 1660 char *name; 1661 int name_len; 1662 int over = 0; 1663 unsigned char d_type; 1664 1665 if (list_empty(ins_list)) 1666 return 0; 1667 1668 /* 1669 * Changing the data of the delayed item is impossible. So 1670 * we needn't lock them. And we have held i_mutex of the 1671 * directory, nobody can delete any directory indexes now. 1672 */ 1673 list_for_each_entry_safe(curr, next, ins_list, readdir_list) { 1674 list_del(&curr->readdir_list); 1675 1676 if (curr->key.offset < ctx->pos) { 1677 if (refcount_dec_and_test(&curr->refs)) 1678 kfree(curr); 1679 continue; 1680 } 1681 1682 ctx->pos = curr->key.offset; 1683 1684 di = (struct btrfs_dir_item *)curr->data; 1685 name = (char *)(di + 1); 1686 name_len = btrfs_stack_dir_name_len(di); 1687 1688 d_type = btrfs_filetype_table[di->type]; 1689 btrfs_disk_key_to_cpu(&location, &di->location); 1690 1691 over = !dir_emit(ctx, name, name_len, 1692 location.objectid, d_type); 1693 1694 if (refcount_dec_and_test(&curr->refs)) 1695 kfree(curr); 1696 1697 if (over) 1698 return 1; 1699 ctx->pos++; 1700 } 1701 return 0; 1702 } 1703 1704 static void fill_stack_inode_item(struct btrfs_trans_handle *trans, 1705 struct btrfs_inode_item *inode_item, 1706 struct inode *inode) 1707 { 1708 btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); 1709 btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); 1710 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); 1711 btrfs_set_stack_inode_mode(inode_item, inode->i_mode); 1712 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); 1713 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); 1714 btrfs_set_stack_inode_generation(inode_item, 1715 BTRFS_I(inode)->generation); 1716 btrfs_set_stack_inode_sequence(inode_item, inode->i_version); 1717 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1718 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1719 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1720 btrfs_set_stack_inode_block_group(inode_item, 0); 1721 1722 btrfs_set_stack_timespec_sec(&inode_item->atime, 1723 inode->i_atime.tv_sec); 1724 btrfs_set_stack_timespec_nsec(&inode_item->atime, 1725 inode->i_atime.tv_nsec); 1726 1727 btrfs_set_stack_timespec_sec(&inode_item->mtime, 1728 inode->i_mtime.tv_sec); 1729 btrfs_set_stack_timespec_nsec(&inode_item->mtime, 1730 inode->i_mtime.tv_nsec); 1731 1732 btrfs_set_stack_timespec_sec(&inode_item->ctime, 1733 inode->i_ctime.tv_sec); 1734 btrfs_set_stack_timespec_nsec(&inode_item->ctime, 1735 inode->i_ctime.tv_nsec); 1736 1737 btrfs_set_stack_timespec_sec(&inode_item->otime, 1738 BTRFS_I(inode)->i_otime.tv_sec); 1739 btrfs_set_stack_timespec_nsec(&inode_item->otime, 1740 BTRFS_I(inode)->i_otime.tv_nsec); 1741 } 1742 1743 int btrfs_fill_inode(struct inode *inode, u32 *rdev) 1744 { 1745 struct btrfs_delayed_node *delayed_node; 1746 struct btrfs_inode_item *inode_item; 1747 1748 delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); 1749 if (!delayed_node) 1750 return -ENOENT; 1751 1752 mutex_lock(&delayed_node->mutex); 1753 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1754 mutex_unlock(&delayed_node->mutex); 1755 btrfs_release_delayed_node(delayed_node); 1756 return -ENOENT; 1757 } 1758 1759 inode_item = &delayed_node->inode_item; 1760 1761 i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); 1762 i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); 1763 btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item)); 1764 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1765 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1766 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1767 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1768 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); 1769 1770 inode->i_version = btrfs_stack_inode_sequence(inode_item); 1771 inode->i_rdev = 0; 1772 *rdev = btrfs_stack_inode_rdev(inode_item); 1773 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1774 1775 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime); 1776 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime); 1777 1778 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); 1779 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); 1780 1781 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); 1782 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); 1783 1784 BTRFS_I(inode)->i_otime.tv_sec = 1785 btrfs_stack_timespec_sec(&inode_item->otime); 1786 BTRFS_I(inode)->i_otime.tv_nsec = 1787 btrfs_stack_timespec_nsec(&inode_item->otime); 1788 1789 inode->i_generation = BTRFS_I(inode)->generation; 1790 BTRFS_I(inode)->index_cnt = (u64)-1; 1791 1792 mutex_unlock(&delayed_node->mutex); 1793 btrfs_release_delayed_node(delayed_node); 1794 return 0; 1795 } 1796 1797 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 1798 struct btrfs_root *root, struct inode *inode) 1799 { 1800 struct btrfs_delayed_node *delayed_node; 1801 int ret = 0; 1802 1803 delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode)); 1804 if (IS_ERR(delayed_node)) 1805 return PTR_ERR(delayed_node); 1806 1807 mutex_lock(&delayed_node->mutex); 1808 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1809 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1810 goto release_node; 1811 } 1812 1813 ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode), 1814 delayed_node); 1815 if (ret) 1816 goto release_node; 1817 1818 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1819 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); 1820 delayed_node->count++; 1821 atomic_inc(&root->fs_info->delayed_root->items); 1822 release_node: 1823 mutex_unlock(&delayed_node->mutex); 1824 btrfs_release_delayed_node(delayed_node); 1825 return ret; 1826 } 1827 1828 int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode) 1829 { 1830 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); 1831 struct btrfs_delayed_node *delayed_node; 1832 1833 /* 1834 * we don't do delayed inode updates during log recovery because it 1835 * leads to enospc problems. This means we also can't do 1836 * delayed inode refs 1837 */ 1838 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) 1839 return -EAGAIN; 1840 1841 delayed_node = btrfs_get_or_create_delayed_node(inode); 1842 if (IS_ERR(delayed_node)) 1843 return PTR_ERR(delayed_node); 1844 1845 /* 1846 * We don't reserve space for inode ref deletion is because: 1847 * - We ONLY do async inode ref deletion for the inode who has only 1848 * one link(i_nlink == 1), it means there is only one inode ref. 1849 * And in most case, the inode ref and the inode item are in the 1850 * same leaf, and we will deal with them at the same time. 1851 * Since we are sure we will reserve the space for the inode item, 1852 * it is unnecessary to reserve space for inode ref deletion. 1853 * - If the inode ref and the inode item are not in the same leaf, 1854 * We also needn't worry about enospc problem, because we reserve 1855 * much more space for the inode update than it needs. 1856 * - At the worst, we can steal some space from the global reservation. 1857 * It is very rare. 1858 */ 1859 mutex_lock(&delayed_node->mutex); 1860 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) 1861 goto release_node; 1862 1863 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); 1864 delayed_node->count++; 1865 atomic_inc(&fs_info->delayed_root->items); 1866 release_node: 1867 mutex_unlock(&delayed_node->mutex); 1868 btrfs_release_delayed_node(delayed_node); 1869 return 0; 1870 } 1871 1872 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) 1873 { 1874 struct btrfs_root *root = delayed_node->root; 1875 struct btrfs_fs_info *fs_info = root->fs_info; 1876 struct btrfs_delayed_item *curr_item, *prev_item; 1877 1878 mutex_lock(&delayed_node->mutex); 1879 curr_item = __btrfs_first_delayed_insertion_item(delayed_node); 1880 while (curr_item) { 1881 btrfs_delayed_item_release_metadata(fs_info, curr_item); 1882 prev_item = curr_item; 1883 curr_item = __btrfs_next_delayed_item(prev_item); 1884 btrfs_release_delayed_item(prev_item); 1885 } 1886 1887 curr_item = __btrfs_first_delayed_deletion_item(delayed_node); 1888 while (curr_item) { 1889 btrfs_delayed_item_release_metadata(fs_info, curr_item); 1890 prev_item = curr_item; 1891 curr_item = __btrfs_next_delayed_item(prev_item); 1892 btrfs_release_delayed_item(prev_item); 1893 } 1894 1895 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) 1896 btrfs_release_delayed_iref(delayed_node); 1897 1898 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1899 btrfs_delayed_inode_release_metadata(fs_info, delayed_node); 1900 btrfs_release_delayed_inode(delayed_node); 1901 } 1902 mutex_unlock(&delayed_node->mutex); 1903 } 1904 1905 void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode) 1906 { 1907 struct btrfs_delayed_node *delayed_node; 1908 1909 delayed_node = btrfs_get_delayed_node(inode); 1910 if (!delayed_node) 1911 return; 1912 1913 __btrfs_kill_delayed_node(delayed_node); 1914 btrfs_release_delayed_node(delayed_node); 1915 } 1916 1917 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) 1918 { 1919 u64 inode_id = 0; 1920 struct btrfs_delayed_node *delayed_nodes[8]; 1921 int i, n; 1922 1923 while (1) { 1924 spin_lock(&root->inode_lock); 1925 n = radix_tree_gang_lookup(&root->delayed_nodes_tree, 1926 (void **)delayed_nodes, inode_id, 1927 ARRAY_SIZE(delayed_nodes)); 1928 if (!n) { 1929 spin_unlock(&root->inode_lock); 1930 break; 1931 } 1932 1933 inode_id = delayed_nodes[n - 1]->inode_id + 1; 1934 1935 for (i = 0; i < n; i++) 1936 refcount_inc(&delayed_nodes[i]->refs); 1937 spin_unlock(&root->inode_lock); 1938 1939 for (i = 0; i < n; i++) { 1940 __btrfs_kill_delayed_node(delayed_nodes[i]); 1941 btrfs_release_delayed_node(delayed_nodes[i]); 1942 } 1943 } 1944 } 1945 1946 void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info) 1947 { 1948 struct btrfs_delayed_node *curr_node, *prev_node; 1949 1950 curr_node = btrfs_first_delayed_node(fs_info->delayed_root); 1951 while (curr_node) { 1952 __btrfs_kill_delayed_node(curr_node); 1953 1954 prev_node = curr_node; 1955 curr_node = btrfs_next_delayed_node(curr_node); 1956 btrfs_release_delayed_node(prev_node); 1957 } 1958 } 1959 1960