1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 /* TODO XXX FIXME 38 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 * - reorganize keys 40 * - compressed 41 * - sync 42 * - copy also limits on subvol creation 43 * - limit 44 * - caches fuer ulists 45 * - performance benchmarks 46 * - check all ioctl parameters 47 */ 48 49 /* 50 * one struct for each qgroup, organized in fs_info->qgroup_tree. 51 */ 52 struct btrfs_qgroup { 53 u64 qgroupid; 54 55 /* 56 * state 57 */ 58 u64 rfer; /* referenced */ 59 u64 rfer_cmpr; /* referenced compressed */ 60 u64 excl; /* exclusive */ 61 u64 excl_cmpr; /* exclusive compressed */ 62 63 /* 64 * limits 65 */ 66 u64 lim_flags; /* which limits are set */ 67 u64 max_rfer; 68 u64 max_excl; 69 u64 rsv_rfer; 70 u64 rsv_excl; 71 72 /* 73 * reservation tracking 74 */ 75 u64 reserved; 76 77 /* 78 * lists 79 */ 80 struct list_head groups; /* groups this group is member of */ 81 struct list_head members; /* groups that are members of this group */ 82 struct list_head dirty; /* dirty groups */ 83 struct rb_node node; /* tree of qgroups */ 84 85 /* 86 * temp variables for accounting operations 87 */ 88 u64 old_refcnt; 89 u64 new_refcnt; 90 }; 91 92 /* 93 * glue structure to represent the relations between qgroups. 94 */ 95 struct btrfs_qgroup_list { 96 struct list_head next_group; 97 struct list_head next_member; 98 struct btrfs_qgroup *group; 99 struct btrfs_qgroup *member; 100 }; 101 102 #define ptr_to_u64(x) ((u64)(uintptr_t)x) 103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) 104 105 static int 106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 107 int init_flags); 108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 109 110 /* must be called with qgroup_ioctl_lock held */ 111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 112 u64 qgroupid) 113 { 114 struct rb_node *n = fs_info->qgroup_tree.rb_node; 115 struct btrfs_qgroup *qgroup; 116 117 while (n) { 118 qgroup = rb_entry(n, struct btrfs_qgroup, node); 119 if (qgroup->qgroupid < qgroupid) 120 n = n->rb_left; 121 else if (qgroup->qgroupid > qgroupid) 122 n = n->rb_right; 123 else 124 return qgroup; 125 } 126 return NULL; 127 } 128 129 /* must be called with qgroup_lock held */ 130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 131 u64 qgroupid) 132 { 133 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 134 struct rb_node *parent = NULL; 135 struct btrfs_qgroup *qgroup; 136 137 while (*p) { 138 parent = *p; 139 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 140 141 if (qgroup->qgroupid < qgroupid) 142 p = &(*p)->rb_left; 143 else if (qgroup->qgroupid > qgroupid) 144 p = &(*p)->rb_right; 145 else 146 return qgroup; 147 } 148 149 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 150 if (!qgroup) 151 return ERR_PTR(-ENOMEM); 152 153 qgroup->qgroupid = qgroupid; 154 INIT_LIST_HEAD(&qgroup->groups); 155 INIT_LIST_HEAD(&qgroup->members); 156 INIT_LIST_HEAD(&qgroup->dirty); 157 158 rb_link_node(&qgroup->node, parent, p); 159 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 160 161 return qgroup; 162 } 163 164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 165 { 166 struct btrfs_qgroup_list *list; 167 168 list_del(&qgroup->dirty); 169 while (!list_empty(&qgroup->groups)) { 170 list = list_first_entry(&qgroup->groups, 171 struct btrfs_qgroup_list, next_group); 172 list_del(&list->next_group); 173 list_del(&list->next_member); 174 kfree(list); 175 } 176 177 while (!list_empty(&qgroup->members)) { 178 list = list_first_entry(&qgroup->members, 179 struct btrfs_qgroup_list, next_member); 180 list_del(&list->next_group); 181 list_del(&list->next_member); 182 kfree(list); 183 } 184 kfree(qgroup); 185 } 186 187 /* must be called with qgroup_lock held */ 188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 189 { 190 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 191 192 if (!qgroup) 193 return -ENOENT; 194 195 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 196 __del_qgroup_rb(qgroup); 197 return 0; 198 } 199 200 /* must be called with qgroup_lock held */ 201 static int add_relation_rb(struct btrfs_fs_info *fs_info, 202 u64 memberid, u64 parentid) 203 { 204 struct btrfs_qgroup *member; 205 struct btrfs_qgroup *parent; 206 struct btrfs_qgroup_list *list; 207 208 member = find_qgroup_rb(fs_info, memberid); 209 parent = find_qgroup_rb(fs_info, parentid); 210 if (!member || !parent) 211 return -ENOENT; 212 213 list = kzalloc(sizeof(*list), GFP_ATOMIC); 214 if (!list) 215 return -ENOMEM; 216 217 list->group = parent; 218 list->member = member; 219 list_add_tail(&list->next_group, &member->groups); 220 list_add_tail(&list->next_member, &parent->members); 221 222 return 0; 223 } 224 225 /* must be called with qgroup_lock held */ 226 static int del_relation_rb(struct btrfs_fs_info *fs_info, 227 u64 memberid, u64 parentid) 228 { 229 struct btrfs_qgroup *member; 230 struct btrfs_qgroup *parent; 231 struct btrfs_qgroup_list *list; 232 233 member = find_qgroup_rb(fs_info, memberid); 234 parent = find_qgroup_rb(fs_info, parentid); 235 if (!member || !parent) 236 return -ENOENT; 237 238 list_for_each_entry(list, &member->groups, next_group) { 239 if (list->group == parent) { 240 list_del(&list->next_group); 241 list_del(&list->next_member); 242 kfree(list); 243 return 0; 244 } 245 } 246 return -ENOENT; 247 } 248 249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 251 u64 rfer, u64 excl) 252 { 253 struct btrfs_qgroup *qgroup; 254 255 qgroup = find_qgroup_rb(fs_info, qgroupid); 256 if (!qgroup) 257 return -EINVAL; 258 if (qgroup->rfer != rfer || qgroup->excl != excl) 259 return -EINVAL; 260 return 0; 261 } 262 #endif 263 264 /* 265 * The full config is read in one go, only called from open_ctree() 266 * It doesn't use any locking, as at this point we're still single-threaded 267 */ 268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 269 { 270 struct btrfs_key key; 271 struct btrfs_key found_key; 272 struct btrfs_root *quota_root = fs_info->quota_root; 273 struct btrfs_path *path = NULL; 274 struct extent_buffer *l; 275 int slot; 276 int ret = 0; 277 u64 flags = 0; 278 u64 rescan_progress = 0; 279 280 if (!fs_info->quota_enabled) 281 return 0; 282 283 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 284 if (!fs_info->qgroup_ulist) { 285 ret = -ENOMEM; 286 goto out; 287 } 288 289 path = btrfs_alloc_path(); 290 if (!path) { 291 ret = -ENOMEM; 292 goto out; 293 } 294 295 /* default this to quota off, in case no status key is found */ 296 fs_info->qgroup_flags = 0; 297 298 /* 299 * pass 1: read status, all qgroup infos and limits 300 */ 301 key.objectid = 0; 302 key.type = 0; 303 key.offset = 0; 304 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 305 if (ret) 306 goto out; 307 308 while (1) { 309 struct btrfs_qgroup *qgroup; 310 311 slot = path->slots[0]; 312 l = path->nodes[0]; 313 btrfs_item_key_to_cpu(l, &found_key, slot); 314 315 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 316 struct btrfs_qgroup_status_item *ptr; 317 318 ptr = btrfs_item_ptr(l, slot, 319 struct btrfs_qgroup_status_item); 320 321 if (btrfs_qgroup_status_version(l, ptr) != 322 BTRFS_QGROUP_STATUS_VERSION) { 323 btrfs_err(fs_info, 324 "old qgroup version, quota disabled"); 325 goto out; 326 } 327 if (btrfs_qgroup_status_generation(l, ptr) != 328 fs_info->generation) { 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 330 btrfs_err(fs_info, 331 "qgroup generation mismatch, " 332 "marked as inconsistent"); 333 } 334 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 335 ptr); 336 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 337 goto next1; 338 } 339 340 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 341 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 342 goto next1; 343 344 qgroup = find_qgroup_rb(fs_info, found_key.offset); 345 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 346 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 347 btrfs_err(fs_info, "inconsitent qgroup config"); 348 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 349 } 350 if (!qgroup) { 351 qgroup = add_qgroup_rb(fs_info, found_key.offset); 352 if (IS_ERR(qgroup)) { 353 ret = PTR_ERR(qgroup); 354 goto out; 355 } 356 } 357 switch (found_key.type) { 358 case BTRFS_QGROUP_INFO_KEY: { 359 struct btrfs_qgroup_info_item *ptr; 360 361 ptr = btrfs_item_ptr(l, slot, 362 struct btrfs_qgroup_info_item); 363 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 364 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 365 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 366 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 367 /* generation currently unused */ 368 break; 369 } 370 case BTRFS_QGROUP_LIMIT_KEY: { 371 struct btrfs_qgroup_limit_item *ptr; 372 373 ptr = btrfs_item_ptr(l, slot, 374 struct btrfs_qgroup_limit_item); 375 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 376 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 377 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 378 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 379 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 380 break; 381 } 382 } 383 next1: 384 ret = btrfs_next_item(quota_root, path); 385 if (ret < 0) 386 goto out; 387 if (ret) 388 break; 389 } 390 btrfs_release_path(path); 391 392 /* 393 * pass 2: read all qgroup relations 394 */ 395 key.objectid = 0; 396 key.type = BTRFS_QGROUP_RELATION_KEY; 397 key.offset = 0; 398 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 399 if (ret) 400 goto out; 401 while (1) { 402 slot = path->slots[0]; 403 l = path->nodes[0]; 404 btrfs_item_key_to_cpu(l, &found_key, slot); 405 406 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 407 goto next2; 408 409 if (found_key.objectid > found_key.offset) { 410 /* parent <- member, not needed to build config */ 411 /* FIXME should we omit the key completely? */ 412 goto next2; 413 } 414 415 ret = add_relation_rb(fs_info, found_key.objectid, 416 found_key.offset); 417 if (ret == -ENOENT) { 418 btrfs_warn(fs_info, 419 "orphan qgroup relation 0x%llx->0x%llx", 420 found_key.objectid, found_key.offset); 421 ret = 0; /* ignore the error */ 422 } 423 if (ret) 424 goto out; 425 next2: 426 ret = btrfs_next_item(quota_root, path); 427 if (ret < 0) 428 goto out; 429 if (ret) 430 break; 431 } 432 out: 433 fs_info->qgroup_flags |= flags; 434 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { 435 fs_info->quota_enabled = 0; 436 fs_info->pending_quota_state = 0; 437 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 438 ret >= 0) { 439 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 440 } 441 btrfs_free_path(path); 442 443 if (ret < 0) { 444 ulist_free(fs_info->qgroup_ulist); 445 fs_info->qgroup_ulist = NULL; 446 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 447 } 448 449 return ret < 0 ? ret : 0; 450 } 451 452 /* 453 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 454 * first two are in single-threaded paths.And for the third one, we have set 455 * quota_root to be null with qgroup_lock held before, so it is safe to clean 456 * up the in-memory structures without qgroup_lock held. 457 */ 458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 459 { 460 struct rb_node *n; 461 struct btrfs_qgroup *qgroup; 462 463 while ((n = rb_first(&fs_info->qgroup_tree))) { 464 qgroup = rb_entry(n, struct btrfs_qgroup, node); 465 rb_erase(n, &fs_info->qgroup_tree); 466 __del_qgroup_rb(qgroup); 467 } 468 /* 469 * we call btrfs_free_qgroup_config() when umounting 470 * filesystem and disabling quota, so we set qgroup_ulit 471 * to be null here to avoid double free. 472 */ 473 ulist_free(fs_info->qgroup_ulist); 474 fs_info->qgroup_ulist = NULL; 475 } 476 477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 478 struct btrfs_root *quota_root, 479 u64 src, u64 dst) 480 { 481 int ret; 482 struct btrfs_path *path; 483 struct btrfs_key key; 484 485 path = btrfs_alloc_path(); 486 if (!path) 487 return -ENOMEM; 488 489 key.objectid = src; 490 key.type = BTRFS_QGROUP_RELATION_KEY; 491 key.offset = dst; 492 493 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 494 495 btrfs_mark_buffer_dirty(path->nodes[0]); 496 497 btrfs_free_path(path); 498 return ret; 499 } 500 501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 502 struct btrfs_root *quota_root, 503 u64 src, u64 dst) 504 { 505 int ret; 506 struct btrfs_path *path; 507 struct btrfs_key key; 508 509 path = btrfs_alloc_path(); 510 if (!path) 511 return -ENOMEM; 512 513 key.objectid = src; 514 key.type = BTRFS_QGROUP_RELATION_KEY; 515 key.offset = dst; 516 517 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 518 if (ret < 0) 519 goto out; 520 521 if (ret > 0) { 522 ret = -ENOENT; 523 goto out; 524 } 525 526 ret = btrfs_del_item(trans, quota_root, path); 527 out: 528 btrfs_free_path(path); 529 return ret; 530 } 531 532 static int add_qgroup_item(struct btrfs_trans_handle *trans, 533 struct btrfs_root *quota_root, u64 qgroupid) 534 { 535 int ret; 536 struct btrfs_path *path; 537 struct btrfs_qgroup_info_item *qgroup_info; 538 struct btrfs_qgroup_limit_item *qgroup_limit; 539 struct extent_buffer *leaf; 540 struct btrfs_key key; 541 542 if (btrfs_test_is_dummy_root(quota_root)) 543 return 0; 544 545 path = btrfs_alloc_path(); 546 if (!path) 547 return -ENOMEM; 548 549 key.objectid = 0; 550 key.type = BTRFS_QGROUP_INFO_KEY; 551 key.offset = qgroupid; 552 553 /* 554 * Avoid a transaction abort by catching -EEXIST here. In that 555 * case, we proceed by re-initializing the existing structure 556 * on disk. 557 */ 558 559 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 560 sizeof(*qgroup_info)); 561 if (ret && ret != -EEXIST) 562 goto out; 563 564 leaf = path->nodes[0]; 565 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 566 struct btrfs_qgroup_info_item); 567 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 568 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 569 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 570 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 571 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 572 573 btrfs_mark_buffer_dirty(leaf); 574 575 btrfs_release_path(path); 576 577 key.type = BTRFS_QGROUP_LIMIT_KEY; 578 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 579 sizeof(*qgroup_limit)); 580 if (ret && ret != -EEXIST) 581 goto out; 582 583 leaf = path->nodes[0]; 584 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 585 struct btrfs_qgroup_limit_item); 586 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 587 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 588 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 589 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 590 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 591 592 btrfs_mark_buffer_dirty(leaf); 593 594 ret = 0; 595 out: 596 btrfs_free_path(path); 597 return ret; 598 } 599 600 static int del_qgroup_item(struct btrfs_trans_handle *trans, 601 struct btrfs_root *quota_root, u64 qgroupid) 602 { 603 int ret; 604 struct btrfs_path *path; 605 struct btrfs_key key; 606 607 path = btrfs_alloc_path(); 608 if (!path) 609 return -ENOMEM; 610 611 key.objectid = 0; 612 key.type = BTRFS_QGROUP_INFO_KEY; 613 key.offset = qgroupid; 614 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 615 if (ret < 0) 616 goto out; 617 618 if (ret > 0) { 619 ret = -ENOENT; 620 goto out; 621 } 622 623 ret = btrfs_del_item(trans, quota_root, path); 624 if (ret) 625 goto out; 626 627 btrfs_release_path(path); 628 629 key.type = BTRFS_QGROUP_LIMIT_KEY; 630 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 631 if (ret < 0) 632 goto out; 633 634 if (ret > 0) { 635 ret = -ENOENT; 636 goto out; 637 } 638 639 ret = btrfs_del_item(trans, quota_root, path); 640 641 out: 642 btrfs_free_path(path); 643 return ret; 644 } 645 646 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 647 struct btrfs_root *root, u64 qgroupid, 648 u64 flags, u64 max_rfer, u64 max_excl, 649 u64 rsv_rfer, u64 rsv_excl) 650 { 651 struct btrfs_path *path; 652 struct btrfs_key key; 653 struct extent_buffer *l; 654 struct btrfs_qgroup_limit_item *qgroup_limit; 655 int ret; 656 int slot; 657 658 key.objectid = 0; 659 key.type = BTRFS_QGROUP_LIMIT_KEY; 660 key.offset = qgroupid; 661 662 path = btrfs_alloc_path(); 663 if (!path) 664 return -ENOMEM; 665 666 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 667 if (ret > 0) 668 ret = -ENOENT; 669 670 if (ret) 671 goto out; 672 673 l = path->nodes[0]; 674 slot = path->slots[0]; 675 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 676 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 677 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 678 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 679 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); 680 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); 681 682 btrfs_mark_buffer_dirty(l); 683 684 out: 685 btrfs_free_path(path); 686 return ret; 687 } 688 689 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 690 struct btrfs_root *root, 691 struct btrfs_qgroup *qgroup) 692 { 693 struct btrfs_path *path; 694 struct btrfs_key key; 695 struct extent_buffer *l; 696 struct btrfs_qgroup_info_item *qgroup_info; 697 int ret; 698 int slot; 699 700 if (btrfs_test_is_dummy_root(root)) 701 return 0; 702 703 key.objectid = 0; 704 key.type = BTRFS_QGROUP_INFO_KEY; 705 key.offset = qgroup->qgroupid; 706 707 path = btrfs_alloc_path(); 708 if (!path) 709 return -ENOMEM; 710 711 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 712 if (ret > 0) 713 ret = -ENOENT; 714 715 if (ret) 716 goto out; 717 718 l = path->nodes[0]; 719 slot = path->slots[0]; 720 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 721 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 722 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 723 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 724 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 725 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 726 727 btrfs_mark_buffer_dirty(l); 728 729 out: 730 btrfs_free_path(path); 731 return ret; 732 } 733 734 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 735 struct btrfs_fs_info *fs_info, 736 struct btrfs_root *root) 737 { 738 struct btrfs_path *path; 739 struct btrfs_key key; 740 struct extent_buffer *l; 741 struct btrfs_qgroup_status_item *ptr; 742 int ret; 743 int slot; 744 745 key.objectid = 0; 746 key.type = BTRFS_QGROUP_STATUS_KEY; 747 key.offset = 0; 748 749 path = btrfs_alloc_path(); 750 if (!path) 751 return -ENOMEM; 752 753 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 754 if (ret > 0) 755 ret = -ENOENT; 756 757 if (ret) 758 goto out; 759 760 l = path->nodes[0]; 761 slot = path->slots[0]; 762 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 763 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 764 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 765 btrfs_set_qgroup_status_rescan(l, ptr, 766 fs_info->qgroup_rescan_progress.objectid); 767 768 btrfs_mark_buffer_dirty(l); 769 770 out: 771 btrfs_free_path(path); 772 return ret; 773 } 774 775 /* 776 * called with qgroup_lock held 777 */ 778 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 779 struct btrfs_root *root) 780 { 781 struct btrfs_path *path; 782 struct btrfs_key key; 783 struct extent_buffer *leaf = NULL; 784 int ret; 785 int nr = 0; 786 787 path = btrfs_alloc_path(); 788 if (!path) 789 return -ENOMEM; 790 791 path->leave_spinning = 1; 792 793 key.objectid = 0; 794 key.offset = 0; 795 key.type = 0; 796 797 while (1) { 798 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 799 if (ret < 0) 800 goto out; 801 leaf = path->nodes[0]; 802 nr = btrfs_header_nritems(leaf); 803 if (!nr) 804 break; 805 /* 806 * delete the leaf one by one 807 * since the whole tree is going 808 * to be deleted. 809 */ 810 path->slots[0] = 0; 811 ret = btrfs_del_items(trans, root, path, 0, nr); 812 if (ret) 813 goto out; 814 815 btrfs_release_path(path); 816 } 817 ret = 0; 818 out: 819 root->fs_info->pending_quota_state = 0; 820 btrfs_free_path(path); 821 return ret; 822 } 823 824 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 825 struct btrfs_fs_info *fs_info) 826 { 827 struct btrfs_root *quota_root; 828 struct btrfs_root *tree_root = fs_info->tree_root; 829 struct btrfs_path *path = NULL; 830 struct btrfs_qgroup_status_item *ptr; 831 struct extent_buffer *leaf; 832 struct btrfs_key key; 833 struct btrfs_key found_key; 834 struct btrfs_qgroup *qgroup = NULL; 835 int ret = 0; 836 int slot; 837 838 mutex_lock(&fs_info->qgroup_ioctl_lock); 839 if (fs_info->quota_root) { 840 fs_info->pending_quota_state = 1; 841 goto out; 842 } 843 844 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 845 if (!fs_info->qgroup_ulist) { 846 ret = -ENOMEM; 847 goto out; 848 } 849 850 /* 851 * initially create the quota tree 852 */ 853 quota_root = btrfs_create_tree(trans, fs_info, 854 BTRFS_QUOTA_TREE_OBJECTID); 855 if (IS_ERR(quota_root)) { 856 ret = PTR_ERR(quota_root); 857 goto out; 858 } 859 860 path = btrfs_alloc_path(); 861 if (!path) { 862 ret = -ENOMEM; 863 goto out_free_root; 864 } 865 866 key.objectid = 0; 867 key.type = BTRFS_QGROUP_STATUS_KEY; 868 key.offset = 0; 869 870 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 871 sizeof(*ptr)); 872 if (ret) 873 goto out_free_path; 874 875 leaf = path->nodes[0]; 876 ptr = btrfs_item_ptr(leaf, path->slots[0], 877 struct btrfs_qgroup_status_item); 878 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 879 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 880 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 881 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 882 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 883 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 884 885 btrfs_mark_buffer_dirty(leaf); 886 887 key.objectid = 0; 888 key.type = BTRFS_ROOT_REF_KEY; 889 key.offset = 0; 890 891 btrfs_release_path(path); 892 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 893 if (ret > 0) 894 goto out_add_root; 895 if (ret < 0) 896 goto out_free_path; 897 898 899 while (1) { 900 slot = path->slots[0]; 901 leaf = path->nodes[0]; 902 btrfs_item_key_to_cpu(leaf, &found_key, slot); 903 904 if (found_key.type == BTRFS_ROOT_REF_KEY) { 905 ret = add_qgroup_item(trans, quota_root, 906 found_key.offset); 907 if (ret) 908 goto out_free_path; 909 910 qgroup = add_qgroup_rb(fs_info, found_key.offset); 911 if (IS_ERR(qgroup)) { 912 ret = PTR_ERR(qgroup); 913 goto out_free_path; 914 } 915 } 916 ret = btrfs_next_item(tree_root, path); 917 if (ret < 0) 918 goto out_free_path; 919 if (ret) 920 break; 921 } 922 923 out_add_root: 924 btrfs_release_path(path); 925 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 926 if (ret) 927 goto out_free_path; 928 929 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 930 if (IS_ERR(qgroup)) { 931 ret = PTR_ERR(qgroup); 932 goto out_free_path; 933 } 934 spin_lock(&fs_info->qgroup_lock); 935 fs_info->quota_root = quota_root; 936 fs_info->pending_quota_state = 1; 937 spin_unlock(&fs_info->qgroup_lock); 938 out_free_path: 939 btrfs_free_path(path); 940 out_free_root: 941 if (ret) { 942 free_extent_buffer(quota_root->node); 943 free_extent_buffer(quota_root->commit_root); 944 kfree(quota_root); 945 } 946 out: 947 if (ret) { 948 ulist_free(fs_info->qgroup_ulist); 949 fs_info->qgroup_ulist = NULL; 950 } 951 mutex_unlock(&fs_info->qgroup_ioctl_lock); 952 return ret; 953 } 954 955 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 956 struct btrfs_fs_info *fs_info) 957 { 958 struct btrfs_root *tree_root = fs_info->tree_root; 959 struct btrfs_root *quota_root; 960 int ret = 0; 961 962 mutex_lock(&fs_info->qgroup_ioctl_lock); 963 if (!fs_info->quota_root) 964 goto out; 965 spin_lock(&fs_info->qgroup_lock); 966 fs_info->quota_enabled = 0; 967 fs_info->pending_quota_state = 0; 968 quota_root = fs_info->quota_root; 969 fs_info->quota_root = NULL; 970 spin_unlock(&fs_info->qgroup_lock); 971 972 btrfs_free_qgroup_config(fs_info); 973 974 ret = btrfs_clean_quota_tree(trans, quota_root); 975 if (ret) 976 goto out; 977 978 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 979 if (ret) 980 goto out; 981 982 list_del("a_root->dirty_list); 983 984 btrfs_tree_lock(quota_root->node); 985 clean_tree_block(trans, tree_root, quota_root->node); 986 btrfs_tree_unlock(quota_root->node); 987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 988 989 free_extent_buffer(quota_root->node); 990 free_extent_buffer(quota_root->commit_root); 991 kfree(quota_root); 992 out: 993 mutex_unlock(&fs_info->qgroup_ioctl_lock); 994 return ret; 995 } 996 997 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 998 struct btrfs_qgroup *qgroup) 999 { 1000 if (list_empty(&qgroup->dirty)) 1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1002 } 1003 1004 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1005 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1006 { 1007 struct btrfs_root *quota_root; 1008 struct btrfs_qgroup *parent; 1009 struct btrfs_qgroup *member; 1010 struct btrfs_qgroup_list *list; 1011 int ret = 0; 1012 1013 mutex_lock(&fs_info->qgroup_ioctl_lock); 1014 quota_root = fs_info->quota_root; 1015 if (!quota_root) { 1016 ret = -EINVAL; 1017 goto out; 1018 } 1019 member = find_qgroup_rb(fs_info, src); 1020 parent = find_qgroup_rb(fs_info, dst); 1021 if (!member || !parent) { 1022 ret = -EINVAL; 1023 goto out; 1024 } 1025 1026 /* check if such qgroup relation exist firstly */ 1027 list_for_each_entry(list, &member->groups, next_group) { 1028 if (list->group == parent) { 1029 ret = -EEXIST; 1030 goto out; 1031 } 1032 } 1033 1034 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1035 if (ret) 1036 goto out; 1037 1038 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1039 if (ret) { 1040 del_qgroup_relation_item(trans, quota_root, src, dst); 1041 goto out; 1042 } 1043 1044 spin_lock(&fs_info->qgroup_lock); 1045 ret = add_relation_rb(quota_root->fs_info, src, dst); 1046 spin_unlock(&fs_info->qgroup_lock); 1047 out: 1048 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1049 return ret; 1050 } 1051 1052 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1053 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1054 { 1055 struct btrfs_root *quota_root; 1056 struct btrfs_qgroup *parent; 1057 struct btrfs_qgroup *member; 1058 struct btrfs_qgroup_list *list; 1059 int ret = 0; 1060 int err; 1061 1062 mutex_lock(&fs_info->qgroup_ioctl_lock); 1063 quota_root = fs_info->quota_root; 1064 if (!quota_root) { 1065 ret = -EINVAL; 1066 goto out; 1067 } 1068 1069 member = find_qgroup_rb(fs_info, src); 1070 parent = find_qgroup_rb(fs_info, dst); 1071 if (!member || !parent) { 1072 ret = -EINVAL; 1073 goto out; 1074 } 1075 1076 /* check if such qgroup relation exist firstly */ 1077 list_for_each_entry(list, &member->groups, next_group) { 1078 if (list->group == parent) 1079 goto exist; 1080 } 1081 ret = -ENOENT; 1082 goto out; 1083 exist: 1084 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1085 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1086 if (err && !ret) 1087 ret = err; 1088 1089 spin_lock(&fs_info->qgroup_lock); 1090 del_relation_rb(fs_info, src, dst); 1091 spin_unlock(&fs_info->qgroup_lock); 1092 out: 1093 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1094 return ret; 1095 } 1096 1097 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1098 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) 1099 { 1100 struct btrfs_root *quota_root; 1101 struct btrfs_qgroup *qgroup; 1102 int ret = 0; 1103 1104 mutex_lock(&fs_info->qgroup_ioctl_lock); 1105 quota_root = fs_info->quota_root; 1106 if (!quota_root) { 1107 ret = -EINVAL; 1108 goto out; 1109 } 1110 qgroup = find_qgroup_rb(fs_info, qgroupid); 1111 if (qgroup) { 1112 ret = -EEXIST; 1113 goto out; 1114 } 1115 1116 ret = add_qgroup_item(trans, quota_root, qgroupid); 1117 if (ret) 1118 goto out; 1119 1120 spin_lock(&fs_info->qgroup_lock); 1121 qgroup = add_qgroup_rb(fs_info, qgroupid); 1122 spin_unlock(&fs_info->qgroup_lock); 1123 1124 if (IS_ERR(qgroup)) 1125 ret = PTR_ERR(qgroup); 1126 out: 1127 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1128 return ret; 1129 } 1130 1131 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1132 struct btrfs_fs_info *fs_info, u64 qgroupid) 1133 { 1134 struct btrfs_root *quota_root; 1135 struct btrfs_qgroup *qgroup; 1136 int ret = 0; 1137 1138 mutex_lock(&fs_info->qgroup_ioctl_lock); 1139 quota_root = fs_info->quota_root; 1140 if (!quota_root) { 1141 ret = -EINVAL; 1142 goto out; 1143 } 1144 1145 qgroup = find_qgroup_rb(fs_info, qgroupid); 1146 if (!qgroup) { 1147 ret = -ENOENT; 1148 goto out; 1149 } else { 1150 /* check if there are no relations to this qgroup */ 1151 if (!list_empty(&qgroup->groups) || 1152 !list_empty(&qgroup->members)) { 1153 ret = -EBUSY; 1154 goto out; 1155 } 1156 } 1157 ret = del_qgroup_item(trans, quota_root, qgroupid); 1158 1159 spin_lock(&fs_info->qgroup_lock); 1160 del_qgroup_rb(quota_root->fs_info, qgroupid); 1161 spin_unlock(&fs_info->qgroup_lock); 1162 out: 1163 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1164 return ret; 1165 } 1166 1167 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1168 struct btrfs_fs_info *fs_info, u64 qgroupid, 1169 struct btrfs_qgroup_limit *limit) 1170 { 1171 struct btrfs_root *quota_root; 1172 struct btrfs_qgroup *qgroup; 1173 int ret = 0; 1174 1175 mutex_lock(&fs_info->qgroup_ioctl_lock); 1176 quota_root = fs_info->quota_root; 1177 if (!quota_root) { 1178 ret = -EINVAL; 1179 goto out; 1180 } 1181 1182 qgroup = find_qgroup_rb(fs_info, qgroupid); 1183 if (!qgroup) { 1184 ret = -ENOENT; 1185 goto out; 1186 } 1187 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1188 limit->flags, limit->max_rfer, 1189 limit->max_excl, limit->rsv_rfer, 1190 limit->rsv_excl); 1191 if (ret) { 1192 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1193 btrfs_info(fs_info, "unable to update quota limit for %llu", 1194 qgroupid); 1195 } 1196 1197 spin_lock(&fs_info->qgroup_lock); 1198 qgroup->lim_flags = limit->flags; 1199 qgroup->max_rfer = limit->max_rfer; 1200 qgroup->max_excl = limit->max_excl; 1201 qgroup->rsv_rfer = limit->rsv_rfer; 1202 qgroup->rsv_excl = limit->rsv_excl; 1203 spin_unlock(&fs_info->qgroup_lock); 1204 out: 1205 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1206 return ret; 1207 } 1208 1209 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1210 struct btrfs_qgroup_operation *oper2) 1211 { 1212 /* 1213 * Ignore seq and type here, we're looking for any operation 1214 * at all related to this extent on that root. 1215 */ 1216 if (oper1->bytenr < oper2->bytenr) 1217 return -1; 1218 if (oper1->bytenr > oper2->bytenr) 1219 return 1; 1220 if (oper1->ref_root < oper2->ref_root) 1221 return -1; 1222 if (oper1->ref_root > oper2->ref_root) 1223 return 1; 1224 return 0; 1225 } 1226 1227 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1228 struct btrfs_qgroup_operation *oper) 1229 { 1230 struct rb_node *n; 1231 struct btrfs_qgroup_operation *cur; 1232 int cmp; 1233 1234 spin_lock(&fs_info->qgroup_op_lock); 1235 n = fs_info->qgroup_op_tree.rb_node; 1236 while (n) { 1237 cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1238 cmp = comp_oper_exist(cur, oper); 1239 if (cmp < 0) { 1240 n = n->rb_right; 1241 } else if (cmp) { 1242 n = n->rb_left; 1243 } else { 1244 spin_unlock(&fs_info->qgroup_op_lock); 1245 return -EEXIST; 1246 } 1247 } 1248 spin_unlock(&fs_info->qgroup_op_lock); 1249 return 0; 1250 } 1251 1252 static int comp_oper(struct btrfs_qgroup_operation *oper1, 1253 struct btrfs_qgroup_operation *oper2) 1254 { 1255 if (oper1->bytenr < oper2->bytenr) 1256 return -1; 1257 if (oper1->bytenr > oper2->bytenr) 1258 return 1; 1259 if (oper1->seq < oper2->seq) 1260 return -1; 1261 if (oper1->seq > oper2->seq) 1262 return -1; 1263 if (oper1->ref_root < oper2->ref_root) 1264 return -1; 1265 if (oper1->ref_root > oper2->ref_root) 1266 return 1; 1267 if (oper1->type < oper2->type) 1268 return -1; 1269 if (oper1->type > oper2->type) 1270 return 1; 1271 return 0; 1272 } 1273 1274 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1275 struct btrfs_qgroup_operation *oper) 1276 { 1277 struct rb_node **p; 1278 struct rb_node *parent = NULL; 1279 struct btrfs_qgroup_operation *cur; 1280 int cmp; 1281 1282 spin_lock(&fs_info->qgroup_op_lock); 1283 p = &fs_info->qgroup_op_tree.rb_node; 1284 while (*p) { 1285 parent = *p; 1286 cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1287 cmp = comp_oper(cur, oper); 1288 if (cmp < 0) { 1289 p = &(*p)->rb_right; 1290 } else if (cmp) { 1291 p = &(*p)->rb_left; 1292 } else { 1293 spin_unlock(&fs_info->qgroup_op_lock); 1294 return -EEXIST; 1295 } 1296 } 1297 rb_link_node(&oper->n, parent, p); 1298 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1299 spin_unlock(&fs_info->qgroup_op_lock); 1300 return 0; 1301 } 1302 1303 /* 1304 * Record a quota operation for processing later on. 1305 * @trans: the transaction we are adding the delayed op to. 1306 * @fs_info: the fs_info for this fs. 1307 * @ref_root: the root of the reference we are acting on, 1308 * @bytenr: the bytenr we are acting on. 1309 * @num_bytes: the number of bytes in the reference. 1310 * @type: the type of operation this is. 1311 * @mod_seq: do we need to get a sequence number for looking up roots. 1312 * 1313 * We just add it to our trans qgroup_ref_list and carry on and process these 1314 * operations in order at some later point. If the reference root isn't a fs 1315 * root then we don't bother with doing anything. 1316 * 1317 * MUST BE HOLDING THE REF LOCK. 1318 */ 1319 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1320 struct btrfs_fs_info *fs_info, u64 ref_root, 1321 u64 bytenr, u64 num_bytes, 1322 enum btrfs_qgroup_operation_type type, int mod_seq) 1323 { 1324 struct btrfs_qgroup_operation *oper; 1325 int ret; 1326 1327 if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1328 return 0; 1329 1330 oper = kmalloc(sizeof(*oper), GFP_NOFS); 1331 if (!oper) 1332 return -ENOMEM; 1333 1334 oper->ref_root = ref_root; 1335 oper->bytenr = bytenr; 1336 oper->num_bytes = num_bytes; 1337 oper->type = type; 1338 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1339 INIT_LIST_HEAD(&oper->elem.list); 1340 oper->elem.seq = 0; 1341 1342 trace_btrfs_qgroup_record_ref(oper); 1343 1344 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1345 /* 1346 * If any operation for this bytenr/ref_root combo 1347 * exists, then we know it's not exclusively owned and 1348 * shouldn't be queued up. 1349 * 1350 * This also catches the case where we have a cloned 1351 * extent that gets queued up multiple times during 1352 * drop snapshot. 1353 */ 1354 if (qgroup_oper_exists(fs_info, oper)) { 1355 kfree(oper); 1356 return 0; 1357 } 1358 } 1359 1360 ret = insert_qgroup_oper(fs_info, oper); 1361 if (ret) { 1362 /* Shouldn't happen so have an assert for developers */ 1363 ASSERT(0); 1364 kfree(oper); 1365 return ret; 1366 } 1367 list_add_tail(&oper->list, &trans->qgroup_ref_list); 1368 1369 if (mod_seq) 1370 btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1371 1372 return 0; 1373 } 1374 1375 /* 1376 * The easy accounting, if we are adding/removing the only ref for an extent 1377 * then this qgroup and all of the parent qgroups get their refrence and 1378 * exclusive counts adjusted. 1379 */ 1380 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1381 struct btrfs_qgroup_operation *oper) 1382 { 1383 struct btrfs_qgroup *qgroup; 1384 struct ulist *tmp; 1385 struct btrfs_qgroup_list *glist; 1386 struct ulist_node *unode; 1387 struct ulist_iterator uiter; 1388 int sign = 0; 1389 int ret = 0; 1390 1391 tmp = ulist_alloc(GFP_NOFS); 1392 if (!tmp) 1393 return -ENOMEM; 1394 1395 spin_lock(&fs_info->qgroup_lock); 1396 if (!fs_info->quota_root) 1397 goto out; 1398 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1399 if (!qgroup) 1400 goto out; 1401 switch (oper->type) { 1402 case BTRFS_QGROUP_OPER_ADD_EXCL: 1403 sign = 1; 1404 break; 1405 case BTRFS_QGROUP_OPER_SUB_EXCL: 1406 sign = -1; 1407 break; 1408 default: 1409 ASSERT(0); 1410 } 1411 qgroup->rfer += sign * oper->num_bytes; 1412 qgroup->rfer_cmpr += sign * oper->num_bytes; 1413 1414 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); 1415 qgroup->excl += sign * oper->num_bytes; 1416 qgroup->excl_cmpr += sign * oper->num_bytes; 1417 1418 qgroup_dirty(fs_info, qgroup); 1419 1420 /* Get all of the parent groups that contain this qgroup */ 1421 list_for_each_entry(glist, &qgroup->groups, next_group) { 1422 ret = ulist_add(tmp, glist->group->qgroupid, 1423 ptr_to_u64(glist->group), GFP_ATOMIC); 1424 if (ret < 0) 1425 goto out; 1426 } 1427 1428 /* Iterate all of the parents and adjust their reference counts */ 1429 ULIST_ITER_INIT(&uiter); 1430 while ((unode = ulist_next(tmp, &uiter))) { 1431 qgroup = u64_to_ptr(unode->aux); 1432 qgroup->rfer += sign * oper->num_bytes; 1433 qgroup->rfer_cmpr += sign * oper->num_bytes; 1434 qgroup->excl += sign * oper->num_bytes; 1435 if (sign < 0) 1436 WARN_ON(qgroup->excl < oper->num_bytes); 1437 qgroup->excl_cmpr += sign * oper->num_bytes; 1438 qgroup_dirty(fs_info, qgroup); 1439 1440 /* Add any parents of the parents */ 1441 list_for_each_entry(glist, &qgroup->groups, next_group) { 1442 ret = ulist_add(tmp, glist->group->qgroupid, 1443 ptr_to_u64(glist->group), GFP_ATOMIC); 1444 if (ret < 0) 1445 goto out; 1446 } 1447 } 1448 ret = 0; 1449 out: 1450 spin_unlock(&fs_info->qgroup_lock); 1451 ulist_free(tmp); 1452 return ret; 1453 } 1454 1455 /* 1456 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1457 * properly. 1458 */ 1459 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1460 u64 root_to_skip, struct ulist *tmp, 1461 struct ulist *roots, struct ulist *qgroups, 1462 u64 seq, int *old_roots, int rescan) 1463 { 1464 struct ulist_node *unode; 1465 struct ulist_iterator uiter; 1466 struct ulist_node *tmp_unode; 1467 struct ulist_iterator tmp_uiter; 1468 struct btrfs_qgroup *qg; 1469 int ret; 1470 1471 ULIST_ITER_INIT(&uiter); 1472 while ((unode = ulist_next(roots, &uiter))) { 1473 /* We don't count our current root here */ 1474 if (unode->val == root_to_skip) 1475 continue; 1476 qg = find_qgroup_rb(fs_info, unode->val); 1477 if (!qg) 1478 continue; 1479 /* 1480 * We could have a pending removal of this same ref so we may 1481 * not have actually found our ref root when doing 1482 * btrfs_find_all_roots, so we need to keep track of how many 1483 * old roots we find in case we removed ours and added a 1484 * different one at the same time. I don't think this could 1485 * happen in practice but that sort of thinking leads to pain 1486 * and suffering and to the dark side. 1487 */ 1488 (*old_roots)++; 1489 1490 ulist_reinit(tmp); 1491 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1492 GFP_ATOMIC); 1493 if (ret < 0) 1494 return ret; 1495 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1496 if (ret < 0) 1497 return ret; 1498 ULIST_ITER_INIT(&tmp_uiter); 1499 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1500 struct btrfs_qgroup_list *glist; 1501 1502 qg = u64_to_ptr(tmp_unode->aux); 1503 /* 1504 * We use this sequence number to keep from having to 1505 * run the whole list and 0 out the refcnt every time. 1506 * We basically use sequnce as the known 0 count and 1507 * then add 1 everytime we see a qgroup. This is how we 1508 * get how many of the roots actually point up to the 1509 * upper level qgroups in order to determine exclusive 1510 * counts. 1511 * 1512 * For rescan we want to set old_refcnt to seq so our 1513 * exclusive calculations end up correct. 1514 */ 1515 if (rescan) 1516 qg->old_refcnt = seq; 1517 else if (qg->old_refcnt < seq) 1518 qg->old_refcnt = seq + 1; 1519 else 1520 qg->old_refcnt++; 1521 1522 if (qg->new_refcnt < seq) 1523 qg->new_refcnt = seq + 1; 1524 else 1525 qg->new_refcnt++; 1526 list_for_each_entry(glist, &qg->groups, next_group) { 1527 ret = ulist_add(qgroups, glist->group->qgroupid, 1528 ptr_to_u64(glist->group), 1529 GFP_ATOMIC); 1530 if (ret < 0) 1531 return ret; 1532 ret = ulist_add(tmp, glist->group->qgroupid, 1533 ptr_to_u64(glist->group), 1534 GFP_ATOMIC); 1535 if (ret < 0) 1536 return ret; 1537 } 1538 } 1539 } 1540 return 0; 1541 } 1542 1543 /* 1544 * We need to walk forward in our operation tree and account for any roots that 1545 * were deleted after we made this operation. 1546 */ 1547 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1548 struct btrfs_qgroup_operation *oper, 1549 struct ulist *tmp, 1550 struct ulist *qgroups, u64 seq, 1551 int *old_roots) 1552 { 1553 struct ulist_node *unode; 1554 struct ulist_iterator uiter; 1555 struct btrfs_qgroup *qg; 1556 struct btrfs_qgroup_operation *tmp_oper; 1557 struct rb_node *n; 1558 int ret; 1559 1560 ulist_reinit(tmp); 1561 1562 /* 1563 * We only walk forward in the tree since we're only interested in 1564 * removals that happened _after_ our operation. 1565 */ 1566 spin_lock(&fs_info->qgroup_op_lock); 1567 n = rb_next(&oper->n); 1568 spin_unlock(&fs_info->qgroup_op_lock); 1569 if (!n) 1570 return 0; 1571 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1572 while (tmp_oper->bytenr == oper->bytenr) { 1573 /* 1574 * If it's not a removal we don't care, additions work out 1575 * properly with our refcnt tracking. 1576 */ 1577 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1578 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1579 goto next; 1580 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1581 if (!qg) 1582 goto next; 1583 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1584 GFP_ATOMIC); 1585 if (ret) { 1586 if (ret < 0) 1587 return ret; 1588 /* 1589 * We only want to increase old_roots if this qgroup is 1590 * not already in the list of qgroups. If it is already 1591 * there then that means it must have been re-added or 1592 * the delete will be discarded because we had an 1593 * existing ref that we haven't looked up yet. In this 1594 * case we don't want to increase old_roots. So if ret 1595 * == 1 then we know that this is the first time we've 1596 * seen this qgroup and we can bump the old_roots. 1597 */ 1598 (*old_roots)++; 1599 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1600 GFP_ATOMIC); 1601 if (ret < 0) 1602 return ret; 1603 } 1604 next: 1605 spin_lock(&fs_info->qgroup_op_lock); 1606 n = rb_next(&tmp_oper->n); 1607 spin_unlock(&fs_info->qgroup_op_lock); 1608 if (!n) 1609 break; 1610 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1611 } 1612 1613 /* Ok now process the qgroups we found */ 1614 ULIST_ITER_INIT(&uiter); 1615 while ((unode = ulist_next(tmp, &uiter))) { 1616 struct btrfs_qgroup_list *glist; 1617 1618 qg = u64_to_ptr(unode->aux); 1619 if (qg->old_refcnt < seq) 1620 qg->old_refcnt = seq + 1; 1621 else 1622 qg->old_refcnt++; 1623 if (qg->new_refcnt < seq) 1624 qg->new_refcnt = seq + 1; 1625 else 1626 qg->new_refcnt++; 1627 list_for_each_entry(glist, &qg->groups, next_group) { 1628 ret = ulist_add(qgroups, glist->group->qgroupid, 1629 ptr_to_u64(glist->group), GFP_ATOMIC); 1630 if (ret < 0) 1631 return ret; 1632 ret = ulist_add(tmp, glist->group->qgroupid, 1633 ptr_to_u64(glist->group), GFP_ATOMIC); 1634 if (ret < 0) 1635 return ret; 1636 } 1637 } 1638 return 0; 1639 } 1640 1641 /* Add refcnt for the newly added reference. */ 1642 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1643 struct btrfs_qgroup_operation *oper, 1644 struct btrfs_qgroup *qgroup, 1645 struct ulist *tmp, struct ulist *qgroups, 1646 u64 seq) 1647 { 1648 struct ulist_node *unode; 1649 struct ulist_iterator uiter; 1650 struct btrfs_qgroup *qg; 1651 int ret; 1652 1653 ulist_reinit(tmp); 1654 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1655 GFP_ATOMIC); 1656 if (ret < 0) 1657 return ret; 1658 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1659 GFP_ATOMIC); 1660 if (ret < 0) 1661 return ret; 1662 ULIST_ITER_INIT(&uiter); 1663 while ((unode = ulist_next(tmp, &uiter))) { 1664 struct btrfs_qgroup_list *glist; 1665 1666 qg = u64_to_ptr(unode->aux); 1667 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1668 if (qg->new_refcnt < seq) 1669 qg->new_refcnt = seq + 1; 1670 else 1671 qg->new_refcnt++; 1672 } else { 1673 if (qg->old_refcnt < seq) 1674 qg->old_refcnt = seq + 1; 1675 else 1676 qg->old_refcnt++; 1677 } 1678 list_for_each_entry(glist, &qg->groups, next_group) { 1679 ret = ulist_add(tmp, glist->group->qgroupid, 1680 ptr_to_u64(glist->group), GFP_ATOMIC); 1681 if (ret < 0) 1682 return ret; 1683 ret = ulist_add(qgroups, glist->group->qgroupid, 1684 ptr_to_u64(glist->group), GFP_ATOMIC); 1685 if (ret < 0) 1686 return ret; 1687 } 1688 } 1689 return 0; 1690 } 1691 1692 /* 1693 * This adjusts the counters for all referenced qgroups if need be. 1694 */ 1695 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1696 u64 root_to_skip, u64 num_bytes, 1697 struct ulist *qgroups, u64 seq, 1698 int old_roots, int new_roots, int rescan) 1699 { 1700 struct ulist_node *unode; 1701 struct ulist_iterator uiter; 1702 struct btrfs_qgroup *qg; 1703 u64 cur_new_count, cur_old_count; 1704 1705 ULIST_ITER_INIT(&uiter); 1706 while ((unode = ulist_next(qgroups, &uiter))) { 1707 bool dirty = false; 1708 1709 qg = u64_to_ptr(unode->aux); 1710 /* 1711 * Wasn't referenced before but is now, add to the reference 1712 * counters. 1713 */ 1714 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { 1715 qg->rfer += num_bytes; 1716 qg->rfer_cmpr += num_bytes; 1717 dirty = true; 1718 } 1719 1720 /* 1721 * Was referenced before but isn't now, subtract from the 1722 * reference counters. 1723 */ 1724 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { 1725 qg->rfer -= num_bytes; 1726 qg->rfer_cmpr -= num_bytes; 1727 dirty = true; 1728 } 1729 1730 if (qg->old_refcnt < seq) 1731 cur_old_count = 0; 1732 else 1733 cur_old_count = qg->old_refcnt - seq; 1734 if (qg->new_refcnt < seq) 1735 cur_new_count = 0; 1736 else 1737 cur_new_count = qg->new_refcnt - seq; 1738 1739 /* 1740 * If our refcount was the same as the roots previously but our 1741 * new count isn't the same as the number of roots now then we 1742 * went from having a exclusive reference on this range to not. 1743 */ 1744 if (old_roots && cur_old_count == old_roots && 1745 (cur_new_count != new_roots || new_roots == 0)) { 1746 WARN_ON(cur_new_count != new_roots && new_roots == 0); 1747 qg->excl -= num_bytes; 1748 qg->excl_cmpr -= num_bytes; 1749 dirty = true; 1750 } 1751 1752 /* 1753 * If we didn't reference all the roots before but now we do we 1754 * have an exclusive reference to this range. 1755 */ 1756 if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1757 && cur_new_count == new_roots) { 1758 qg->excl += num_bytes; 1759 qg->excl_cmpr += num_bytes; 1760 dirty = true; 1761 } 1762 1763 if (dirty) 1764 qgroup_dirty(fs_info, qg); 1765 } 1766 return 0; 1767 } 1768 1769 /* 1770 * If we removed a data extent and there were other references for that bytenr 1771 * then we need to lookup all referenced roots to make sure we still don't 1772 * reference this bytenr. If we do then we can just discard this operation. 1773 */ 1774 static int check_existing_refs(struct btrfs_trans_handle *trans, 1775 struct btrfs_fs_info *fs_info, 1776 struct btrfs_qgroup_operation *oper) 1777 { 1778 struct ulist *roots = NULL; 1779 struct ulist_node *unode; 1780 struct ulist_iterator uiter; 1781 int ret = 0; 1782 1783 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1784 oper->elem.seq, &roots); 1785 if (ret < 0) 1786 return ret; 1787 ret = 0; 1788 1789 ULIST_ITER_INIT(&uiter); 1790 while ((unode = ulist_next(roots, &uiter))) { 1791 if (unode->val == oper->ref_root) { 1792 ret = 1; 1793 break; 1794 } 1795 } 1796 ulist_free(roots); 1797 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1798 1799 return ret; 1800 } 1801 1802 /* 1803 * If we share a reference across multiple roots then we may need to adjust 1804 * various qgroups referenced and exclusive counters. The basic premise is this 1805 * 1806 * 1) We have seq to represent a 0 count. Instead of looping through all of the 1807 * qgroups and resetting their refcount to 0 we just constantly bump this 1808 * sequence number to act as the base reference count. This means that if 1809 * anybody is equal to or below this sequence they were never referenced. We 1810 * jack this sequence up by the number of roots we found each time in order to 1811 * make sure we don't have any overlap. 1812 * 1813 * 2) We first search all the roots that reference the area _except_ the root 1814 * we're acting on currently. This makes up the old_refcnt of all the qgroups 1815 * before. 1816 * 1817 * 3) We walk all of the qgroups referenced by the root we are currently acting 1818 * on, and will either adjust old_refcnt in the case of a removal or the 1819 * new_refcnt in the case of an addition. 1820 * 1821 * 4) Finally we walk all the qgroups that are referenced by this range 1822 * including the root we are acting on currently. We will adjust the counters 1823 * based on the number of roots we had and will have after this operation. 1824 * 1825 * Take this example as an illustration 1826 * 1827 * [qgroup 1/0] 1828 * / | \ 1829 * [qg 0/0] [qg 0/1] [qg 0/2] 1830 * \ | / 1831 * [ extent ] 1832 * 1833 * Say we are adding a reference that is covered by qg 0/0. The first step 1834 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1835 * old_roots being 2. Because it is adding new_roots will be 1. We then go 1836 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1837 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1838 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1839 * reference and thus must add the size to the referenced bytes. Everything 1840 * else is the same so nothing else changes. 1841 */ 1842 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1843 struct btrfs_fs_info *fs_info, 1844 struct btrfs_qgroup_operation *oper) 1845 { 1846 struct ulist *roots = NULL; 1847 struct ulist *qgroups, *tmp; 1848 struct btrfs_qgroup *qgroup; 1849 struct seq_list elem = {}; 1850 u64 seq; 1851 int old_roots = 0; 1852 int new_roots = 0; 1853 int ret = 0; 1854 1855 if (oper->elem.seq) { 1856 ret = check_existing_refs(trans, fs_info, oper); 1857 if (ret < 0) 1858 return ret; 1859 if (ret) 1860 return 0; 1861 } 1862 1863 qgroups = ulist_alloc(GFP_NOFS); 1864 if (!qgroups) 1865 return -ENOMEM; 1866 1867 tmp = ulist_alloc(GFP_NOFS); 1868 if (!tmp) { 1869 ulist_free(qgroups); 1870 return -ENOMEM; 1871 } 1872 1873 btrfs_get_tree_mod_seq(fs_info, &elem); 1874 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1875 &roots); 1876 btrfs_put_tree_mod_seq(fs_info, &elem); 1877 if (ret < 0) { 1878 ulist_free(qgroups); 1879 ulist_free(tmp); 1880 return ret; 1881 } 1882 spin_lock(&fs_info->qgroup_lock); 1883 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1884 if (!qgroup) 1885 goto out; 1886 seq = fs_info->qgroup_seq; 1887 1888 /* 1889 * So roots is the list of all the roots currently pointing at the 1890 * bytenr, including the ref we are adding if we are adding, or not if 1891 * we are removing a ref. So we pass in the ref_root to skip that root 1892 * in our calculations. We set old_refnct and new_refcnt cause who the 1893 * hell knows what everything looked like before, and it doesn't matter 1894 * except... 1895 */ 1896 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 1897 seq, &old_roots, 0); 1898 if (ret < 0) 1899 goto out; 1900 1901 /* 1902 * Now adjust the refcounts of the qgroups that care about this 1903 * reference, either the old_count in the case of removal or new_count 1904 * in the case of an addition. 1905 */ 1906 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 1907 seq); 1908 if (ret < 0) 1909 goto out; 1910 1911 /* 1912 * ...in the case of removals. If we had a removal before we got around 1913 * to processing this operation then we need to find that guy and count 1914 * his references as if they really existed so we don't end up screwing 1915 * up the exclusive counts. Then whenever we go to process the delete 1916 * everything will be grand and we can account for whatever exclusive 1917 * changes need to be made there. We also have to pass in old_roots so 1918 * we have an accurate count of the roots as it pertains to this 1919 * operations view of the world. 1920 */ 1921 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 1922 &old_roots); 1923 if (ret < 0) 1924 goto out; 1925 1926 /* 1927 * We are adding our root, need to adjust up the number of roots, 1928 * otherwise old_roots is the number of roots we want. 1929 */ 1930 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1931 new_roots = old_roots + 1; 1932 } else { 1933 new_roots = old_roots; 1934 old_roots++; 1935 } 1936 fs_info->qgroup_seq += old_roots + 1; 1937 1938 1939 /* 1940 * And now the magic happens, bless Arne for having a pretty elegant 1941 * solution for this. 1942 */ 1943 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 1944 qgroups, seq, old_roots, new_roots, 0); 1945 out: 1946 spin_unlock(&fs_info->qgroup_lock); 1947 ulist_free(qgroups); 1948 ulist_free(roots); 1949 ulist_free(tmp); 1950 return ret; 1951 } 1952 1953 /* 1954 * Process a reference to a shared subtree. This type of operation is 1955 * queued during snapshot removal when we encounter extents which are 1956 * shared between more than one root. 1957 */ 1958 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 1959 struct btrfs_fs_info *fs_info, 1960 struct btrfs_qgroup_operation *oper) 1961 { 1962 struct ulist *roots = NULL; 1963 struct ulist_node *unode; 1964 struct ulist_iterator uiter; 1965 struct btrfs_qgroup_list *glist; 1966 struct ulist *parents; 1967 int ret = 0; 1968 int err; 1969 struct btrfs_qgroup *qg; 1970 u64 root_obj = 0; 1971 struct seq_list elem = {}; 1972 1973 parents = ulist_alloc(GFP_NOFS); 1974 if (!parents) 1975 return -ENOMEM; 1976 1977 btrfs_get_tree_mod_seq(fs_info, &elem); 1978 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1979 elem.seq, &roots); 1980 btrfs_put_tree_mod_seq(fs_info, &elem); 1981 if (ret < 0) 1982 goto out; 1983 1984 if (roots->nnodes != 1) 1985 goto out; 1986 1987 ULIST_ITER_INIT(&uiter); 1988 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 1989 /* 1990 * If we find our ref root then that means all refs 1991 * this extent has to the root have not yet been 1992 * deleted. In that case, we do nothing and let the 1993 * last ref for this bytenr drive our update. 1994 * 1995 * This can happen for example if an extent is 1996 * referenced multiple times in a snapshot (clone, 1997 * etc). If we are in the middle of snapshot removal, 1998 * queued updates for such an extent will find the 1999 * root if we have not yet finished removing the 2000 * snapshot. 2001 */ 2002 if (unode->val == oper->ref_root) 2003 goto out; 2004 2005 root_obj = unode->val; 2006 BUG_ON(!root_obj); 2007 2008 spin_lock(&fs_info->qgroup_lock); 2009 qg = find_qgroup_rb(fs_info, root_obj); 2010 if (!qg) 2011 goto out_unlock; 2012 2013 qg->excl += oper->num_bytes; 2014 qg->excl_cmpr += oper->num_bytes; 2015 qgroup_dirty(fs_info, qg); 2016 2017 /* 2018 * Adjust counts for parent groups. First we find all 2019 * parents, then in the 2nd loop we do the adjustment 2020 * while adding parents of the parents to our ulist. 2021 */ 2022 list_for_each_entry(glist, &qg->groups, next_group) { 2023 err = ulist_add(parents, glist->group->qgroupid, 2024 ptr_to_u64(glist->group), GFP_ATOMIC); 2025 if (err < 0) { 2026 ret = err; 2027 goto out_unlock; 2028 } 2029 } 2030 2031 ULIST_ITER_INIT(&uiter); 2032 while ((unode = ulist_next(parents, &uiter))) { 2033 qg = u64_to_ptr(unode->aux); 2034 qg->excl += oper->num_bytes; 2035 qg->excl_cmpr += oper->num_bytes; 2036 qgroup_dirty(fs_info, qg); 2037 2038 /* Add any parents of the parents */ 2039 list_for_each_entry(glist, &qg->groups, next_group) { 2040 err = ulist_add(parents, glist->group->qgroupid, 2041 ptr_to_u64(glist->group), GFP_ATOMIC); 2042 if (err < 0) { 2043 ret = err; 2044 goto out_unlock; 2045 } 2046 } 2047 } 2048 2049 out_unlock: 2050 spin_unlock(&fs_info->qgroup_lock); 2051 2052 out: 2053 ulist_free(roots); 2054 ulist_free(parents); 2055 return ret; 2056 } 2057 2058 /* 2059 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2060 * from the fs. First, all roots referencing the extent are searched, and 2061 * then the space is accounted accordingly to the different roots. The 2062 * accounting algorithm works in 3 steps documented inline. 2063 */ 2064 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 2065 struct btrfs_fs_info *fs_info, 2066 struct btrfs_qgroup_operation *oper) 2067 { 2068 int ret = 0; 2069 2070 if (!fs_info->quota_enabled) 2071 return 0; 2072 2073 BUG_ON(!fs_info->quota_root); 2074 2075 mutex_lock(&fs_info->qgroup_rescan_lock); 2076 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2077 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 2078 mutex_unlock(&fs_info->qgroup_rescan_lock); 2079 return 0; 2080 } 2081 } 2082 mutex_unlock(&fs_info->qgroup_rescan_lock); 2083 2084 ASSERT(is_fstree(oper->ref_root)); 2085 2086 trace_btrfs_qgroup_account(oper); 2087 2088 switch (oper->type) { 2089 case BTRFS_QGROUP_OPER_ADD_EXCL: 2090 case BTRFS_QGROUP_OPER_SUB_EXCL: 2091 ret = qgroup_excl_accounting(fs_info, oper); 2092 break; 2093 case BTRFS_QGROUP_OPER_ADD_SHARED: 2094 case BTRFS_QGROUP_OPER_SUB_SHARED: 2095 ret = qgroup_shared_accounting(trans, fs_info, oper); 2096 break; 2097 case BTRFS_QGROUP_OPER_SUB_SUBTREE: 2098 ret = qgroup_subtree_accounting(trans, fs_info, oper); 2099 break; 2100 default: 2101 ASSERT(0); 2102 } 2103 return ret; 2104 } 2105 2106 /* 2107 * Needs to be called everytime we run delayed refs, even if there is an error 2108 * in order to cleanup outstanding operations. 2109 */ 2110 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 2111 struct btrfs_fs_info *fs_info) 2112 { 2113 struct btrfs_qgroup_operation *oper; 2114 int ret = 0; 2115 2116 while (!list_empty(&trans->qgroup_ref_list)) { 2117 oper = list_first_entry(&trans->qgroup_ref_list, 2118 struct btrfs_qgroup_operation, list); 2119 list_del_init(&oper->list); 2120 if (!ret || !trans->aborted) 2121 ret = btrfs_qgroup_account(trans, fs_info, oper); 2122 spin_lock(&fs_info->qgroup_op_lock); 2123 rb_erase(&oper->n, &fs_info->qgroup_op_tree); 2124 spin_unlock(&fs_info->qgroup_op_lock); 2125 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 2126 kfree(oper); 2127 } 2128 return ret; 2129 } 2130 2131 /* 2132 * called from commit_transaction. Writes all changed qgroups to disk. 2133 */ 2134 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2135 struct btrfs_fs_info *fs_info) 2136 { 2137 struct btrfs_root *quota_root = fs_info->quota_root; 2138 int ret = 0; 2139 int start_rescan_worker = 0; 2140 2141 if (!quota_root) 2142 goto out; 2143 2144 if (!fs_info->quota_enabled && fs_info->pending_quota_state) 2145 start_rescan_worker = 1; 2146 2147 fs_info->quota_enabled = fs_info->pending_quota_state; 2148 2149 spin_lock(&fs_info->qgroup_lock); 2150 while (!list_empty(&fs_info->dirty_qgroups)) { 2151 struct btrfs_qgroup *qgroup; 2152 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2153 struct btrfs_qgroup, dirty); 2154 list_del_init(&qgroup->dirty); 2155 spin_unlock(&fs_info->qgroup_lock); 2156 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2157 if (ret) 2158 fs_info->qgroup_flags |= 2159 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2160 spin_lock(&fs_info->qgroup_lock); 2161 } 2162 if (fs_info->quota_enabled) 2163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2164 else 2165 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2166 spin_unlock(&fs_info->qgroup_lock); 2167 2168 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2169 if (ret) 2170 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2171 2172 if (!ret && start_rescan_worker) { 2173 ret = qgroup_rescan_init(fs_info, 0, 1); 2174 if (!ret) { 2175 qgroup_rescan_zero_tracking(fs_info); 2176 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2177 &fs_info->qgroup_rescan_work); 2178 } 2179 ret = 0; 2180 } 2181 2182 out: 2183 2184 return ret; 2185 } 2186 2187 /* 2188 * copy the acounting information between qgroups. This is necessary when a 2189 * snapshot or a subvolume is created 2190 */ 2191 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2192 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2193 struct btrfs_qgroup_inherit *inherit) 2194 { 2195 int ret = 0; 2196 int i; 2197 u64 *i_qgroups; 2198 struct btrfs_root *quota_root = fs_info->quota_root; 2199 struct btrfs_qgroup *srcgroup; 2200 struct btrfs_qgroup *dstgroup; 2201 u32 level_size = 0; 2202 u64 nums; 2203 2204 mutex_lock(&fs_info->qgroup_ioctl_lock); 2205 if (!fs_info->quota_enabled) 2206 goto out; 2207 2208 if (!quota_root) { 2209 ret = -EINVAL; 2210 goto out; 2211 } 2212 2213 if (inherit) { 2214 i_qgroups = (u64 *)(inherit + 1); 2215 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2216 2 * inherit->num_excl_copies; 2217 for (i = 0; i < nums; ++i) { 2218 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2219 if (!srcgroup) { 2220 ret = -EINVAL; 2221 goto out; 2222 } 2223 ++i_qgroups; 2224 } 2225 } 2226 2227 /* 2228 * create a tracking group for the subvol itself 2229 */ 2230 ret = add_qgroup_item(trans, quota_root, objectid); 2231 if (ret) 2232 goto out; 2233 2234 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2235 ret = update_qgroup_limit_item(trans, quota_root, objectid, 2236 inherit->lim.flags, 2237 inherit->lim.max_rfer, 2238 inherit->lim.max_excl, 2239 inherit->lim.rsv_rfer, 2240 inherit->lim.rsv_excl); 2241 if (ret) 2242 goto out; 2243 } 2244 2245 if (srcid) { 2246 struct btrfs_root *srcroot; 2247 struct btrfs_key srckey; 2248 2249 srckey.objectid = srcid; 2250 srckey.type = BTRFS_ROOT_ITEM_KEY; 2251 srckey.offset = (u64)-1; 2252 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2253 if (IS_ERR(srcroot)) { 2254 ret = PTR_ERR(srcroot); 2255 goto out; 2256 } 2257 2258 rcu_read_lock(); 2259 level_size = srcroot->nodesize; 2260 rcu_read_unlock(); 2261 } 2262 2263 /* 2264 * add qgroup to all inherited groups 2265 */ 2266 if (inherit) { 2267 i_qgroups = (u64 *)(inherit + 1); 2268 for (i = 0; i < inherit->num_qgroups; ++i) { 2269 ret = add_qgroup_relation_item(trans, quota_root, 2270 objectid, *i_qgroups); 2271 if (ret) 2272 goto out; 2273 ret = add_qgroup_relation_item(trans, quota_root, 2274 *i_qgroups, objectid); 2275 if (ret) 2276 goto out; 2277 ++i_qgroups; 2278 } 2279 } 2280 2281 2282 spin_lock(&fs_info->qgroup_lock); 2283 2284 dstgroup = add_qgroup_rb(fs_info, objectid); 2285 if (IS_ERR(dstgroup)) { 2286 ret = PTR_ERR(dstgroup); 2287 goto unlock; 2288 } 2289 2290 if (srcid) { 2291 srcgroup = find_qgroup_rb(fs_info, srcid); 2292 if (!srcgroup) 2293 goto unlock; 2294 2295 /* 2296 * We call inherit after we clone the root in order to make sure 2297 * our counts don't go crazy, so at this point the only 2298 * difference between the two roots should be the root node. 2299 */ 2300 dstgroup->rfer = srcgroup->rfer; 2301 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2302 dstgroup->excl = level_size; 2303 dstgroup->excl_cmpr = level_size; 2304 srcgroup->excl = level_size; 2305 srcgroup->excl_cmpr = level_size; 2306 qgroup_dirty(fs_info, dstgroup); 2307 qgroup_dirty(fs_info, srcgroup); 2308 } 2309 2310 if (!inherit) 2311 goto unlock; 2312 2313 i_qgroups = (u64 *)(inherit + 1); 2314 for (i = 0; i < inherit->num_qgroups; ++i) { 2315 ret = add_relation_rb(quota_root->fs_info, objectid, 2316 *i_qgroups); 2317 if (ret) 2318 goto unlock; 2319 ++i_qgroups; 2320 } 2321 2322 for (i = 0; i < inherit->num_ref_copies; ++i) { 2323 struct btrfs_qgroup *src; 2324 struct btrfs_qgroup *dst; 2325 2326 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2327 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2328 2329 if (!src || !dst) { 2330 ret = -EINVAL; 2331 goto unlock; 2332 } 2333 2334 dst->rfer = src->rfer - level_size; 2335 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2336 i_qgroups += 2; 2337 } 2338 for (i = 0; i < inherit->num_excl_copies; ++i) { 2339 struct btrfs_qgroup *src; 2340 struct btrfs_qgroup *dst; 2341 2342 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2343 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2344 2345 if (!src || !dst) { 2346 ret = -EINVAL; 2347 goto unlock; 2348 } 2349 2350 dst->excl = src->excl + level_size; 2351 dst->excl_cmpr = src->excl_cmpr + level_size; 2352 i_qgroups += 2; 2353 } 2354 2355 unlock: 2356 spin_unlock(&fs_info->qgroup_lock); 2357 out: 2358 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2359 return ret; 2360 } 2361 2362 /* 2363 * reserve some space for a qgroup and all its parents. The reservation takes 2364 * place with start_transaction or dealloc_reserve, similar to ENOSPC 2365 * accounting. If not enough space is available, EDQUOT is returned. 2366 * We assume that the requested space is new for all qgroups. 2367 */ 2368 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2369 { 2370 struct btrfs_root *quota_root; 2371 struct btrfs_qgroup *qgroup; 2372 struct btrfs_fs_info *fs_info = root->fs_info; 2373 u64 ref_root = root->root_key.objectid; 2374 int ret = 0; 2375 struct ulist_node *unode; 2376 struct ulist_iterator uiter; 2377 2378 if (!is_fstree(ref_root)) 2379 return 0; 2380 2381 if (num_bytes == 0) 2382 return 0; 2383 2384 spin_lock(&fs_info->qgroup_lock); 2385 quota_root = fs_info->quota_root; 2386 if (!quota_root) 2387 goto out; 2388 2389 qgroup = find_qgroup_rb(fs_info, ref_root); 2390 if (!qgroup) 2391 goto out; 2392 2393 /* 2394 * in a first step, we check all affected qgroups if any limits would 2395 * be exceeded 2396 */ 2397 ulist_reinit(fs_info->qgroup_ulist); 2398 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2399 (uintptr_t)qgroup, GFP_ATOMIC); 2400 if (ret < 0) 2401 goto out; 2402 ULIST_ITER_INIT(&uiter); 2403 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2404 struct btrfs_qgroup *qg; 2405 struct btrfs_qgroup_list *glist; 2406 2407 qg = u64_to_ptr(unode->aux); 2408 2409 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2410 qg->reserved + (s64)qg->rfer + num_bytes > 2411 qg->max_rfer) { 2412 ret = -EDQUOT; 2413 goto out; 2414 } 2415 2416 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2417 qg->reserved + (s64)qg->excl + num_bytes > 2418 qg->max_excl) { 2419 ret = -EDQUOT; 2420 goto out; 2421 } 2422 2423 list_for_each_entry(glist, &qg->groups, next_group) { 2424 ret = ulist_add(fs_info->qgroup_ulist, 2425 glist->group->qgroupid, 2426 (uintptr_t)glist->group, GFP_ATOMIC); 2427 if (ret < 0) 2428 goto out; 2429 } 2430 } 2431 ret = 0; 2432 /* 2433 * no limits exceeded, now record the reservation into all qgroups 2434 */ 2435 ULIST_ITER_INIT(&uiter); 2436 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2437 struct btrfs_qgroup *qg; 2438 2439 qg = u64_to_ptr(unode->aux); 2440 2441 qg->reserved += num_bytes; 2442 } 2443 2444 out: 2445 spin_unlock(&fs_info->qgroup_lock); 2446 return ret; 2447 } 2448 2449 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) 2450 { 2451 struct btrfs_root *quota_root; 2452 struct btrfs_qgroup *qgroup; 2453 struct btrfs_fs_info *fs_info = root->fs_info; 2454 struct ulist_node *unode; 2455 struct ulist_iterator uiter; 2456 u64 ref_root = root->root_key.objectid; 2457 int ret = 0; 2458 2459 if (!is_fstree(ref_root)) 2460 return; 2461 2462 if (num_bytes == 0) 2463 return; 2464 2465 spin_lock(&fs_info->qgroup_lock); 2466 2467 quota_root = fs_info->quota_root; 2468 if (!quota_root) 2469 goto out; 2470 2471 qgroup = find_qgroup_rb(fs_info, ref_root); 2472 if (!qgroup) 2473 goto out; 2474 2475 ulist_reinit(fs_info->qgroup_ulist); 2476 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2477 (uintptr_t)qgroup, GFP_ATOMIC); 2478 if (ret < 0) 2479 goto out; 2480 ULIST_ITER_INIT(&uiter); 2481 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2482 struct btrfs_qgroup *qg; 2483 struct btrfs_qgroup_list *glist; 2484 2485 qg = u64_to_ptr(unode->aux); 2486 2487 qg->reserved -= num_bytes; 2488 2489 list_for_each_entry(glist, &qg->groups, next_group) { 2490 ret = ulist_add(fs_info->qgroup_ulist, 2491 glist->group->qgroupid, 2492 (uintptr_t)glist->group, GFP_ATOMIC); 2493 if (ret < 0) 2494 goto out; 2495 } 2496 } 2497 2498 out: 2499 spin_unlock(&fs_info->qgroup_lock); 2500 } 2501 2502 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2503 { 2504 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2505 return; 2506 btrfs_err(trans->root->fs_info, 2507 "qgroups not uptodate in trans handle %p: list is%s empty, " 2508 "seq is %#x.%x", 2509 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2510 (u32)(trans->delayed_ref_elem.seq >> 32), 2511 (u32)trans->delayed_ref_elem.seq); 2512 BUG(); 2513 } 2514 2515 /* 2516 * returns < 0 on error, 0 when more leafs are to be scanned. 2517 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. 2518 */ 2519 static int 2520 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2521 struct btrfs_trans_handle *trans, struct ulist *qgroups, 2522 struct ulist *tmp, struct extent_buffer *scratch_leaf) 2523 { 2524 struct btrfs_key found; 2525 struct ulist *roots = NULL; 2526 struct seq_list tree_mod_seq_elem = {}; 2527 u64 num_bytes; 2528 u64 seq; 2529 int new_roots; 2530 int slot; 2531 int ret; 2532 2533 path->leave_spinning = 1; 2534 mutex_lock(&fs_info->qgroup_rescan_lock); 2535 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2536 &fs_info->qgroup_rescan_progress, 2537 path, 1, 0); 2538 2539 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 2540 fs_info->qgroup_rescan_progress.objectid, 2541 fs_info->qgroup_rescan_progress.type, 2542 fs_info->qgroup_rescan_progress.offset, ret); 2543 2544 if (ret) { 2545 /* 2546 * The rescan is about to end, we will not be scanning any 2547 * further blocks. We cannot unset the RESCAN flag here, because 2548 * we want to commit the transaction if everything went well. 2549 * To make the live accounting work in this phase, we set our 2550 * scan progress pointer such that every real extent objectid 2551 * will be smaller. 2552 */ 2553 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2554 btrfs_release_path(path); 2555 mutex_unlock(&fs_info->qgroup_rescan_lock); 2556 return ret; 2557 } 2558 2559 btrfs_item_key_to_cpu(path->nodes[0], &found, 2560 btrfs_header_nritems(path->nodes[0]) - 1); 2561 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2562 2563 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2564 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); 2565 slot = path->slots[0]; 2566 btrfs_release_path(path); 2567 mutex_unlock(&fs_info->qgroup_rescan_lock); 2568 2569 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2570 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2571 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2572 found.type != BTRFS_METADATA_ITEM_KEY) 2573 continue; 2574 if (found.type == BTRFS_METADATA_ITEM_KEY) 2575 num_bytes = fs_info->extent_root->nodesize; 2576 else 2577 num_bytes = found.offset; 2578 2579 ulist_reinit(qgroups); 2580 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2581 &roots); 2582 if (ret < 0) 2583 goto out; 2584 spin_lock(&fs_info->qgroup_lock); 2585 seq = fs_info->qgroup_seq; 2586 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2587 2588 new_roots = 0; 2589 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, 2590 seq, &new_roots, 1); 2591 if (ret < 0) { 2592 spin_unlock(&fs_info->qgroup_lock); 2593 ulist_free(roots); 2594 goto out; 2595 } 2596 2597 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, 2598 seq, 0, new_roots, 1); 2599 if (ret < 0) { 2600 spin_unlock(&fs_info->qgroup_lock); 2601 ulist_free(roots); 2602 goto out; 2603 } 2604 spin_unlock(&fs_info->qgroup_lock); 2605 ulist_free(roots); 2606 } 2607 out: 2608 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2609 2610 return ret; 2611 } 2612 2613 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2614 { 2615 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2616 qgroup_rescan_work); 2617 struct btrfs_path *path; 2618 struct btrfs_trans_handle *trans = NULL; 2619 struct ulist *tmp = NULL, *qgroups = NULL; 2620 struct extent_buffer *scratch_leaf = NULL; 2621 int err = -ENOMEM; 2622 2623 path = btrfs_alloc_path(); 2624 if (!path) 2625 goto out; 2626 qgroups = ulist_alloc(GFP_NOFS); 2627 if (!qgroups) 2628 goto out; 2629 tmp = ulist_alloc(GFP_NOFS); 2630 if (!tmp) 2631 goto out; 2632 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); 2633 if (!scratch_leaf) 2634 goto out; 2635 2636 err = 0; 2637 while (!err) { 2638 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2639 if (IS_ERR(trans)) { 2640 err = PTR_ERR(trans); 2641 break; 2642 } 2643 if (!fs_info->quota_enabled) { 2644 err = -EINTR; 2645 } else { 2646 err = qgroup_rescan_leaf(fs_info, path, trans, 2647 qgroups, tmp, scratch_leaf); 2648 } 2649 if (err > 0) 2650 btrfs_commit_transaction(trans, fs_info->fs_root); 2651 else 2652 btrfs_end_transaction(trans, fs_info->fs_root); 2653 } 2654 2655 out: 2656 kfree(scratch_leaf); 2657 ulist_free(qgroups); 2658 ulist_free(tmp); 2659 btrfs_free_path(path); 2660 2661 mutex_lock(&fs_info->qgroup_rescan_lock); 2662 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2663 2664 if (err == 2 && 2665 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2666 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2667 } else if (err < 0) { 2668 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2669 } 2670 mutex_unlock(&fs_info->qgroup_rescan_lock); 2671 2672 if (err >= 0) { 2673 btrfs_info(fs_info, "qgroup scan completed%s", 2674 err == 2 ? " (inconsistency flag cleared)" : ""); 2675 } else { 2676 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2677 } 2678 2679 complete_all(&fs_info->qgroup_rescan_completion); 2680 } 2681 2682 /* 2683 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2684 * memory required for the rescan context. 2685 */ 2686 static int 2687 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2688 int init_flags) 2689 { 2690 int ret = 0; 2691 2692 if (!init_flags && 2693 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2694 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2695 ret = -EINVAL; 2696 goto err; 2697 } 2698 2699 mutex_lock(&fs_info->qgroup_rescan_lock); 2700 spin_lock(&fs_info->qgroup_lock); 2701 2702 if (init_flags) { 2703 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2704 ret = -EINPROGRESS; 2705 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2706 ret = -EINVAL; 2707 2708 if (ret) { 2709 spin_unlock(&fs_info->qgroup_lock); 2710 mutex_unlock(&fs_info->qgroup_rescan_lock); 2711 goto err; 2712 } 2713 2714 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2715 } 2716 2717 memset(&fs_info->qgroup_rescan_progress, 0, 2718 sizeof(fs_info->qgroup_rescan_progress)); 2719 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2720 2721 spin_unlock(&fs_info->qgroup_lock); 2722 mutex_unlock(&fs_info->qgroup_rescan_lock); 2723 2724 init_completion(&fs_info->qgroup_rescan_completion); 2725 2726 memset(&fs_info->qgroup_rescan_work, 0, 2727 sizeof(fs_info->qgroup_rescan_work)); 2728 btrfs_init_work(&fs_info->qgroup_rescan_work, 2729 btrfs_qgroup_rescan_helper, 2730 btrfs_qgroup_rescan_worker, NULL, NULL); 2731 2732 if (ret) { 2733 err: 2734 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2735 return ret; 2736 } 2737 2738 return 0; 2739 } 2740 2741 static void 2742 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2743 { 2744 struct rb_node *n; 2745 struct btrfs_qgroup *qgroup; 2746 2747 spin_lock(&fs_info->qgroup_lock); 2748 /* clear all current qgroup tracking information */ 2749 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2750 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2751 qgroup->rfer = 0; 2752 qgroup->rfer_cmpr = 0; 2753 qgroup->excl = 0; 2754 qgroup->excl_cmpr = 0; 2755 } 2756 spin_unlock(&fs_info->qgroup_lock); 2757 } 2758 2759 int 2760 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2761 { 2762 int ret = 0; 2763 struct btrfs_trans_handle *trans; 2764 2765 ret = qgroup_rescan_init(fs_info, 0, 1); 2766 if (ret) 2767 return ret; 2768 2769 /* 2770 * We have set the rescan_progress to 0, which means no more 2771 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2772 * However, btrfs_qgroup_account_ref may be right after its call 2773 * to btrfs_find_all_roots, in which case it would still do the 2774 * accounting. 2775 * To solve this, we're committing the transaction, which will 2776 * ensure we run all delayed refs and only after that, we are 2777 * going to clear all tracking information for a clean start. 2778 */ 2779 2780 trans = btrfs_join_transaction(fs_info->fs_root); 2781 if (IS_ERR(trans)) { 2782 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2783 return PTR_ERR(trans); 2784 } 2785 ret = btrfs_commit_transaction(trans, fs_info->fs_root); 2786 if (ret) { 2787 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2788 return ret; 2789 } 2790 2791 qgroup_rescan_zero_tracking(fs_info); 2792 2793 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2794 &fs_info->qgroup_rescan_work); 2795 2796 return 0; 2797 } 2798 2799 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2800 { 2801 int running; 2802 int ret = 0; 2803 2804 mutex_lock(&fs_info->qgroup_rescan_lock); 2805 spin_lock(&fs_info->qgroup_lock); 2806 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2807 spin_unlock(&fs_info->qgroup_lock); 2808 mutex_unlock(&fs_info->qgroup_rescan_lock); 2809 2810 if (running) 2811 ret = wait_for_completion_interruptible( 2812 &fs_info->qgroup_rescan_completion); 2813 2814 return ret; 2815 } 2816 2817 /* 2818 * this is only called from open_ctree where we're still single threaded, thus 2819 * locking is omitted here. 2820 */ 2821 void 2822 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2823 { 2824 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2825 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2826 &fs_info->qgroup_rescan_work); 2827 } 2828