1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 /* 51 * one struct for each qgroup, organized in fs_info->qgroup_tree. 52 */ 53 struct btrfs_qgroup { 54 u64 qgroupid; 55 56 /* 57 * state 58 */ 59 u64 rfer; /* referenced */ 60 u64 rfer_cmpr; /* referenced compressed */ 61 u64 excl; /* exclusive */ 62 u64 excl_cmpr; /* exclusive compressed */ 63 64 /* 65 * limits 66 */ 67 u64 lim_flags; /* which limits are set */ 68 u64 max_rfer; 69 u64 max_excl; 70 u64 rsv_rfer; 71 u64 rsv_excl; 72 73 /* 74 * reservation tracking 75 */ 76 u64 reserved; 77 78 /* 79 * lists 80 */ 81 struct list_head groups; /* groups this group is member of */ 82 struct list_head members; /* groups that are members of this group */ 83 struct list_head dirty; /* dirty groups */ 84 struct rb_node node; /* tree of qgroups */ 85 86 /* 87 * temp variables for accounting operations 88 * Refer to qgroup_shared_accounting() for details. 89 */ 90 u64 old_refcnt; 91 u64 new_refcnt; 92 }; 93 94 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 95 int mod) 96 { 97 if (qg->old_refcnt < seq) 98 qg->old_refcnt = seq; 99 qg->old_refcnt += mod; 100 } 101 102 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 103 int mod) 104 { 105 if (qg->new_refcnt < seq) 106 qg->new_refcnt = seq; 107 qg->new_refcnt += mod; 108 } 109 110 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 111 { 112 if (qg->old_refcnt < seq) 113 return 0; 114 return qg->old_refcnt - seq; 115 } 116 117 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 118 { 119 if (qg->new_refcnt < seq) 120 return 0; 121 return qg->new_refcnt - seq; 122 } 123 124 /* 125 * glue structure to represent the relations between qgroups. 126 */ 127 struct btrfs_qgroup_list { 128 struct list_head next_group; 129 struct list_head next_member; 130 struct btrfs_qgroup *group; 131 struct btrfs_qgroup *member; 132 }; 133 134 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 135 { 136 return (u64)(uintptr_t)qg; 137 } 138 139 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 140 { 141 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 142 } 143 144 static int 145 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 146 int init_flags); 147 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 148 149 /* must be called with qgroup_ioctl_lock held */ 150 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 151 u64 qgroupid) 152 { 153 struct rb_node *n = fs_info->qgroup_tree.rb_node; 154 struct btrfs_qgroup *qgroup; 155 156 while (n) { 157 qgroup = rb_entry(n, struct btrfs_qgroup, node); 158 if (qgroup->qgroupid < qgroupid) 159 n = n->rb_left; 160 else if (qgroup->qgroupid > qgroupid) 161 n = n->rb_right; 162 else 163 return qgroup; 164 } 165 return NULL; 166 } 167 168 /* must be called with qgroup_lock held */ 169 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 170 u64 qgroupid) 171 { 172 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 173 struct rb_node *parent = NULL; 174 struct btrfs_qgroup *qgroup; 175 176 while (*p) { 177 parent = *p; 178 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 179 180 if (qgroup->qgroupid < qgroupid) 181 p = &(*p)->rb_left; 182 else if (qgroup->qgroupid > qgroupid) 183 p = &(*p)->rb_right; 184 else 185 return qgroup; 186 } 187 188 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 189 if (!qgroup) 190 return ERR_PTR(-ENOMEM); 191 192 qgroup->qgroupid = qgroupid; 193 INIT_LIST_HEAD(&qgroup->groups); 194 INIT_LIST_HEAD(&qgroup->members); 195 INIT_LIST_HEAD(&qgroup->dirty); 196 197 rb_link_node(&qgroup->node, parent, p); 198 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 199 200 return qgroup; 201 } 202 203 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 204 { 205 struct btrfs_qgroup_list *list; 206 207 list_del(&qgroup->dirty); 208 while (!list_empty(&qgroup->groups)) { 209 list = list_first_entry(&qgroup->groups, 210 struct btrfs_qgroup_list, next_group); 211 list_del(&list->next_group); 212 list_del(&list->next_member); 213 kfree(list); 214 } 215 216 while (!list_empty(&qgroup->members)) { 217 list = list_first_entry(&qgroup->members, 218 struct btrfs_qgroup_list, next_member); 219 list_del(&list->next_group); 220 list_del(&list->next_member); 221 kfree(list); 222 } 223 kfree(qgroup); 224 } 225 226 /* must be called with qgroup_lock held */ 227 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 228 { 229 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 230 231 if (!qgroup) 232 return -ENOENT; 233 234 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 235 __del_qgroup_rb(qgroup); 236 return 0; 237 } 238 239 /* must be called with qgroup_lock held */ 240 static int add_relation_rb(struct btrfs_fs_info *fs_info, 241 u64 memberid, u64 parentid) 242 { 243 struct btrfs_qgroup *member; 244 struct btrfs_qgroup *parent; 245 struct btrfs_qgroup_list *list; 246 247 member = find_qgroup_rb(fs_info, memberid); 248 parent = find_qgroup_rb(fs_info, parentid); 249 if (!member || !parent) 250 return -ENOENT; 251 252 list = kzalloc(sizeof(*list), GFP_ATOMIC); 253 if (!list) 254 return -ENOMEM; 255 256 list->group = parent; 257 list->member = member; 258 list_add_tail(&list->next_group, &member->groups); 259 list_add_tail(&list->next_member, &parent->members); 260 261 return 0; 262 } 263 264 /* must be called with qgroup_lock held */ 265 static int del_relation_rb(struct btrfs_fs_info *fs_info, 266 u64 memberid, u64 parentid) 267 { 268 struct btrfs_qgroup *member; 269 struct btrfs_qgroup *parent; 270 struct btrfs_qgroup_list *list; 271 272 member = find_qgroup_rb(fs_info, memberid); 273 parent = find_qgroup_rb(fs_info, parentid); 274 if (!member || !parent) 275 return -ENOENT; 276 277 list_for_each_entry(list, &member->groups, next_group) { 278 if (list->group == parent) { 279 list_del(&list->next_group); 280 list_del(&list->next_member); 281 kfree(list); 282 return 0; 283 } 284 } 285 return -ENOENT; 286 } 287 288 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 289 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 290 u64 rfer, u64 excl) 291 { 292 struct btrfs_qgroup *qgroup; 293 294 qgroup = find_qgroup_rb(fs_info, qgroupid); 295 if (!qgroup) 296 return -EINVAL; 297 if (qgroup->rfer != rfer || qgroup->excl != excl) 298 return -EINVAL; 299 return 0; 300 } 301 #endif 302 303 /* 304 * The full config is read in one go, only called from open_ctree() 305 * It doesn't use any locking, as at this point we're still single-threaded 306 */ 307 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 308 { 309 struct btrfs_key key; 310 struct btrfs_key found_key; 311 struct btrfs_root *quota_root = fs_info->quota_root; 312 struct btrfs_path *path = NULL; 313 struct extent_buffer *l; 314 int slot; 315 int ret = 0; 316 u64 flags = 0; 317 u64 rescan_progress = 0; 318 319 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 320 return 0; 321 322 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 323 if (!fs_info->qgroup_ulist) { 324 ret = -ENOMEM; 325 goto out; 326 } 327 328 path = btrfs_alloc_path(); 329 if (!path) { 330 ret = -ENOMEM; 331 goto out; 332 } 333 334 /* default this to quota off, in case no status key is found */ 335 fs_info->qgroup_flags = 0; 336 337 /* 338 * pass 1: read status, all qgroup infos and limits 339 */ 340 key.objectid = 0; 341 key.type = 0; 342 key.offset = 0; 343 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 344 if (ret) 345 goto out; 346 347 while (1) { 348 struct btrfs_qgroup *qgroup; 349 350 slot = path->slots[0]; 351 l = path->nodes[0]; 352 btrfs_item_key_to_cpu(l, &found_key, slot); 353 354 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 355 struct btrfs_qgroup_status_item *ptr; 356 357 ptr = btrfs_item_ptr(l, slot, 358 struct btrfs_qgroup_status_item); 359 360 if (btrfs_qgroup_status_version(l, ptr) != 361 BTRFS_QGROUP_STATUS_VERSION) { 362 btrfs_err(fs_info, 363 "old qgroup version, quota disabled"); 364 goto out; 365 } 366 if (btrfs_qgroup_status_generation(l, ptr) != 367 fs_info->generation) { 368 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 369 btrfs_err(fs_info, 370 "qgroup generation mismatch, marked as inconsistent"); 371 } 372 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 373 ptr); 374 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 375 goto next1; 376 } 377 378 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 379 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 380 goto next1; 381 382 qgroup = find_qgroup_rb(fs_info, found_key.offset); 383 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 384 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 385 btrfs_err(fs_info, "inconsistent qgroup config"); 386 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 387 } 388 if (!qgroup) { 389 qgroup = add_qgroup_rb(fs_info, found_key.offset); 390 if (IS_ERR(qgroup)) { 391 ret = PTR_ERR(qgroup); 392 goto out; 393 } 394 } 395 switch (found_key.type) { 396 case BTRFS_QGROUP_INFO_KEY: { 397 struct btrfs_qgroup_info_item *ptr; 398 399 ptr = btrfs_item_ptr(l, slot, 400 struct btrfs_qgroup_info_item); 401 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 402 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 403 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 404 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 405 /* generation currently unused */ 406 break; 407 } 408 case BTRFS_QGROUP_LIMIT_KEY: { 409 struct btrfs_qgroup_limit_item *ptr; 410 411 ptr = btrfs_item_ptr(l, slot, 412 struct btrfs_qgroup_limit_item); 413 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 414 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 415 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 416 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 417 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 418 break; 419 } 420 } 421 next1: 422 ret = btrfs_next_item(quota_root, path); 423 if (ret < 0) 424 goto out; 425 if (ret) 426 break; 427 } 428 btrfs_release_path(path); 429 430 /* 431 * pass 2: read all qgroup relations 432 */ 433 key.objectid = 0; 434 key.type = BTRFS_QGROUP_RELATION_KEY; 435 key.offset = 0; 436 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 437 if (ret) 438 goto out; 439 while (1) { 440 slot = path->slots[0]; 441 l = path->nodes[0]; 442 btrfs_item_key_to_cpu(l, &found_key, slot); 443 444 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 445 goto next2; 446 447 if (found_key.objectid > found_key.offset) { 448 /* parent <- member, not needed to build config */ 449 /* FIXME should we omit the key completely? */ 450 goto next2; 451 } 452 453 ret = add_relation_rb(fs_info, found_key.objectid, 454 found_key.offset); 455 if (ret == -ENOENT) { 456 btrfs_warn(fs_info, 457 "orphan qgroup relation 0x%llx->0x%llx", 458 found_key.objectid, found_key.offset); 459 ret = 0; /* ignore the error */ 460 } 461 if (ret) 462 goto out; 463 next2: 464 ret = btrfs_next_item(quota_root, path); 465 if (ret < 0) 466 goto out; 467 if (ret) 468 break; 469 } 470 out: 471 fs_info->qgroup_flags |= flags; 472 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 473 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 474 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 475 ret >= 0) 476 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 477 btrfs_free_path(path); 478 479 if (ret < 0) { 480 ulist_free(fs_info->qgroup_ulist); 481 fs_info->qgroup_ulist = NULL; 482 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 483 } 484 485 return ret < 0 ? ret : 0; 486 } 487 488 /* 489 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 490 * first two are in single-threaded paths.And for the third one, we have set 491 * quota_root to be null with qgroup_lock held before, so it is safe to clean 492 * up the in-memory structures without qgroup_lock held. 493 */ 494 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 495 { 496 struct rb_node *n; 497 struct btrfs_qgroup *qgroup; 498 499 while ((n = rb_first(&fs_info->qgroup_tree))) { 500 qgroup = rb_entry(n, struct btrfs_qgroup, node); 501 rb_erase(n, &fs_info->qgroup_tree); 502 __del_qgroup_rb(qgroup); 503 } 504 /* 505 * we call btrfs_free_qgroup_config() when umounting 506 * filesystem and disabling quota, so we set qgroup_ulist 507 * to be null here to avoid double free. 508 */ 509 ulist_free(fs_info->qgroup_ulist); 510 fs_info->qgroup_ulist = NULL; 511 } 512 513 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 514 struct btrfs_root *quota_root, 515 u64 src, u64 dst) 516 { 517 int ret; 518 struct btrfs_path *path; 519 struct btrfs_key key; 520 521 path = btrfs_alloc_path(); 522 if (!path) 523 return -ENOMEM; 524 525 key.objectid = src; 526 key.type = BTRFS_QGROUP_RELATION_KEY; 527 key.offset = dst; 528 529 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 530 531 btrfs_mark_buffer_dirty(path->nodes[0]); 532 533 btrfs_free_path(path); 534 return ret; 535 } 536 537 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 538 struct btrfs_root *quota_root, 539 u64 src, u64 dst) 540 { 541 int ret; 542 struct btrfs_path *path; 543 struct btrfs_key key; 544 545 path = btrfs_alloc_path(); 546 if (!path) 547 return -ENOMEM; 548 549 key.objectid = src; 550 key.type = BTRFS_QGROUP_RELATION_KEY; 551 key.offset = dst; 552 553 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 554 if (ret < 0) 555 goto out; 556 557 if (ret > 0) { 558 ret = -ENOENT; 559 goto out; 560 } 561 562 ret = btrfs_del_item(trans, quota_root, path); 563 out: 564 btrfs_free_path(path); 565 return ret; 566 } 567 568 static int add_qgroup_item(struct btrfs_trans_handle *trans, 569 struct btrfs_root *quota_root, u64 qgroupid) 570 { 571 int ret; 572 struct btrfs_path *path; 573 struct btrfs_qgroup_info_item *qgroup_info; 574 struct btrfs_qgroup_limit_item *qgroup_limit; 575 struct extent_buffer *leaf; 576 struct btrfs_key key; 577 578 if (btrfs_is_testing(quota_root->fs_info)) 579 return 0; 580 581 path = btrfs_alloc_path(); 582 if (!path) 583 return -ENOMEM; 584 585 key.objectid = 0; 586 key.type = BTRFS_QGROUP_INFO_KEY; 587 key.offset = qgroupid; 588 589 /* 590 * Avoid a transaction abort by catching -EEXIST here. In that 591 * case, we proceed by re-initializing the existing structure 592 * on disk. 593 */ 594 595 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 596 sizeof(*qgroup_info)); 597 if (ret && ret != -EEXIST) 598 goto out; 599 600 leaf = path->nodes[0]; 601 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 602 struct btrfs_qgroup_info_item); 603 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 604 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 605 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 606 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 607 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 608 609 btrfs_mark_buffer_dirty(leaf); 610 611 btrfs_release_path(path); 612 613 key.type = BTRFS_QGROUP_LIMIT_KEY; 614 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 615 sizeof(*qgroup_limit)); 616 if (ret && ret != -EEXIST) 617 goto out; 618 619 leaf = path->nodes[0]; 620 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 621 struct btrfs_qgroup_limit_item); 622 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 623 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 624 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 625 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 626 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 627 628 btrfs_mark_buffer_dirty(leaf); 629 630 ret = 0; 631 out: 632 btrfs_free_path(path); 633 return ret; 634 } 635 636 static int del_qgroup_item(struct btrfs_trans_handle *trans, 637 struct btrfs_root *quota_root, u64 qgroupid) 638 { 639 int ret; 640 struct btrfs_path *path; 641 struct btrfs_key key; 642 643 path = btrfs_alloc_path(); 644 if (!path) 645 return -ENOMEM; 646 647 key.objectid = 0; 648 key.type = BTRFS_QGROUP_INFO_KEY; 649 key.offset = qgroupid; 650 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 651 if (ret < 0) 652 goto out; 653 654 if (ret > 0) { 655 ret = -ENOENT; 656 goto out; 657 } 658 659 ret = btrfs_del_item(trans, quota_root, path); 660 if (ret) 661 goto out; 662 663 btrfs_release_path(path); 664 665 key.type = BTRFS_QGROUP_LIMIT_KEY; 666 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 667 if (ret < 0) 668 goto out; 669 670 if (ret > 0) { 671 ret = -ENOENT; 672 goto out; 673 } 674 675 ret = btrfs_del_item(trans, quota_root, path); 676 677 out: 678 btrfs_free_path(path); 679 return ret; 680 } 681 682 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 683 struct btrfs_root *root, 684 struct btrfs_qgroup *qgroup) 685 { 686 struct btrfs_path *path; 687 struct btrfs_key key; 688 struct extent_buffer *l; 689 struct btrfs_qgroup_limit_item *qgroup_limit; 690 int ret; 691 int slot; 692 693 key.objectid = 0; 694 key.type = BTRFS_QGROUP_LIMIT_KEY; 695 key.offset = qgroup->qgroupid; 696 697 path = btrfs_alloc_path(); 698 if (!path) 699 return -ENOMEM; 700 701 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 702 if (ret > 0) 703 ret = -ENOENT; 704 705 if (ret) 706 goto out; 707 708 l = path->nodes[0]; 709 slot = path->slots[0]; 710 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 711 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 712 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 713 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 714 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 715 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 716 717 btrfs_mark_buffer_dirty(l); 718 719 out: 720 btrfs_free_path(path); 721 return ret; 722 } 723 724 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 725 struct btrfs_root *root, 726 struct btrfs_qgroup *qgroup) 727 { 728 struct btrfs_path *path; 729 struct btrfs_key key; 730 struct extent_buffer *l; 731 struct btrfs_qgroup_info_item *qgroup_info; 732 int ret; 733 int slot; 734 735 if (btrfs_is_testing(root->fs_info)) 736 return 0; 737 738 key.objectid = 0; 739 key.type = BTRFS_QGROUP_INFO_KEY; 740 key.offset = qgroup->qgroupid; 741 742 path = btrfs_alloc_path(); 743 if (!path) 744 return -ENOMEM; 745 746 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 747 if (ret > 0) 748 ret = -ENOENT; 749 750 if (ret) 751 goto out; 752 753 l = path->nodes[0]; 754 slot = path->slots[0]; 755 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 756 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 757 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 758 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 759 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 760 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 761 762 btrfs_mark_buffer_dirty(l); 763 764 out: 765 btrfs_free_path(path); 766 return ret; 767 } 768 769 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 770 struct btrfs_fs_info *fs_info, 771 struct btrfs_root *root) 772 { 773 struct btrfs_path *path; 774 struct btrfs_key key; 775 struct extent_buffer *l; 776 struct btrfs_qgroup_status_item *ptr; 777 int ret; 778 int slot; 779 780 key.objectid = 0; 781 key.type = BTRFS_QGROUP_STATUS_KEY; 782 key.offset = 0; 783 784 path = btrfs_alloc_path(); 785 if (!path) 786 return -ENOMEM; 787 788 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 789 if (ret > 0) 790 ret = -ENOENT; 791 792 if (ret) 793 goto out; 794 795 l = path->nodes[0]; 796 slot = path->slots[0]; 797 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 798 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 799 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 800 btrfs_set_qgroup_status_rescan(l, ptr, 801 fs_info->qgroup_rescan_progress.objectid); 802 803 btrfs_mark_buffer_dirty(l); 804 805 out: 806 btrfs_free_path(path); 807 return ret; 808 } 809 810 /* 811 * called with qgroup_lock held 812 */ 813 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 814 struct btrfs_root *root) 815 { 816 struct btrfs_path *path; 817 struct btrfs_key key; 818 struct extent_buffer *leaf = NULL; 819 int ret; 820 int nr = 0; 821 822 path = btrfs_alloc_path(); 823 if (!path) 824 return -ENOMEM; 825 826 path->leave_spinning = 1; 827 828 key.objectid = 0; 829 key.offset = 0; 830 key.type = 0; 831 832 while (1) { 833 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 834 if (ret < 0) 835 goto out; 836 leaf = path->nodes[0]; 837 nr = btrfs_header_nritems(leaf); 838 if (!nr) 839 break; 840 /* 841 * delete the leaf one by one 842 * since the whole tree is going 843 * to be deleted. 844 */ 845 path->slots[0] = 0; 846 ret = btrfs_del_items(trans, root, path, 0, nr); 847 if (ret) 848 goto out; 849 850 btrfs_release_path(path); 851 } 852 ret = 0; 853 out: 854 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 855 btrfs_free_path(path); 856 return ret; 857 } 858 859 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 860 struct btrfs_fs_info *fs_info) 861 { 862 struct btrfs_root *quota_root; 863 struct btrfs_root *tree_root = fs_info->tree_root; 864 struct btrfs_path *path = NULL; 865 struct btrfs_qgroup_status_item *ptr; 866 struct extent_buffer *leaf; 867 struct btrfs_key key; 868 struct btrfs_key found_key; 869 struct btrfs_qgroup *qgroup = NULL; 870 int ret = 0; 871 int slot; 872 873 mutex_lock(&fs_info->qgroup_ioctl_lock); 874 if (fs_info->quota_root) { 875 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 876 goto out; 877 } 878 879 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 880 if (!fs_info->qgroup_ulist) { 881 ret = -ENOMEM; 882 goto out; 883 } 884 885 /* 886 * initially create the quota tree 887 */ 888 quota_root = btrfs_create_tree(trans, fs_info, 889 BTRFS_QUOTA_TREE_OBJECTID); 890 if (IS_ERR(quota_root)) { 891 ret = PTR_ERR(quota_root); 892 goto out; 893 } 894 895 path = btrfs_alloc_path(); 896 if (!path) { 897 ret = -ENOMEM; 898 goto out_free_root; 899 } 900 901 key.objectid = 0; 902 key.type = BTRFS_QGROUP_STATUS_KEY; 903 key.offset = 0; 904 905 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 906 sizeof(*ptr)); 907 if (ret) 908 goto out_free_path; 909 910 leaf = path->nodes[0]; 911 ptr = btrfs_item_ptr(leaf, path->slots[0], 912 struct btrfs_qgroup_status_item); 913 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 914 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 915 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 916 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 917 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 918 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 919 920 btrfs_mark_buffer_dirty(leaf); 921 922 key.objectid = 0; 923 key.type = BTRFS_ROOT_REF_KEY; 924 key.offset = 0; 925 926 btrfs_release_path(path); 927 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 928 if (ret > 0) 929 goto out_add_root; 930 if (ret < 0) 931 goto out_free_path; 932 933 934 while (1) { 935 slot = path->slots[0]; 936 leaf = path->nodes[0]; 937 btrfs_item_key_to_cpu(leaf, &found_key, slot); 938 939 if (found_key.type == BTRFS_ROOT_REF_KEY) { 940 ret = add_qgroup_item(trans, quota_root, 941 found_key.offset); 942 if (ret) 943 goto out_free_path; 944 945 qgroup = add_qgroup_rb(fs_info, found_key.offset); 946 if (IS_ERR(qgroup)) { 947 ret = PTR_ERR(qgroup); 948 goto out_free_path; 949 } 950 } 951 ret = btrfs_next_item(tree_root, path); 952 if (ret < 0) 953 goto out_free_path; 954 if (ret) 955 break; 956 } 957 958 out_add_root: 959 btrfs_release_path(path); 960 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 961 if (ret) 962 goto out_free_path; 963 964 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 965 if (IS_ERR(qgroup)) { 966 ret = PTR_ERR(qgroup); 967 goto out_free_path; 968 } 969 spin_lock(&fs_info->qgroup_lock); 970 fs_info->quota_root = quota_root; 971 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 972 spin_unlock(&fs_info->qgroup_lock); 973 out_free_path: 974 btrfs_free_path(path); 975 out_free_root: 976 if (ret) { 977 free_extent_buffer(quota_root->node); 978 free_extent_buffer(quota_root->commit_root); 979 kfree(quota_root); 980 } 981 out: 982 if (ret) { 983 ulist_free(fs_info->qgroup_ulist); 984 fs_info->qgroup_ulist = NULL; 985 } 986 mutex_unlock(&fs_info->qgroup_ioctl_lock); 987 return ret; 988 } 989 990 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 991 struct btrfs_fs_info *fs_info) 992 { 993 struct btrfs_root *tree_root = fs_info->tree_root; 994 struct btrfs_root *quota_root; 995 int ret = 0; 996 997 mutex_lock(&fs_info->qgroup_ioctl_lock); 998 if (!fs_info->quota_root) 999 goto out; 1000 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1001 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 1002 btrfs_qgroup_wait_for_completion(fs_info, false); 1003 spin_lock(&fs_info->qgroup_lock); 1004 quota_root = fs_info->quota_root; 1005 fs_info->quota_root = NULL; 1006 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 1007 spin_unlock(&fs_info->qgroup_lock); 1008 1009 btrfs_free_qgroup_config(fs_info); 1010 1011 ret = btrfs_clean_quota_tree(trans, quota_root); 1012 if (ret) 1013 goto out; 1014 1015 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 1016 if (ret) 1017 goto out; 1018 1019 list_del("a_root->dirty_list); 1020 1021 btrfs_tree_lock(quota_root->node); 1022 clean_tree_block(fs_info, quota_root->node); 1023 btrfs_tree_unlock(quota_root->node); 1024 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 1025 1026 free_extent_buffer(quota_root->node); 1027 free_extent_buffer(quota_root->commit_root); 1028 kfree(quota_root); 1029 out: 1030 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1031 return ret; 1032 } 1033 1034 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 1035 struct btrfs_qgroup *qgroup) 1036 { 1037 if (list_empty(&qgroup->dirty)) 1038 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1039 } 1040 1041 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 1042 struct btrfs_qgroup *qgroup, 1043 u64 num_bytes) 1044 { 1045 btrfs_warn(fs_info, 1046 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1047 qgroup->qgroupid, qgroup->reserved, num_bytes); 1048 qgroup->reserved = 0; 1049 } 1050 /* 1051 * The easy accounting, if we are adding/removing the only ref for an extent 1052 * then this qgroup and all of the parent qgroups get their reference and 1053 * exclusive counts adjusted. 1054 * 1055 * Caller should hold fs_info->qgroup_lock. 1056 */ 1057 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1058 struct ulist *tmp, u64 ref_root, 1059 u64 num_bytes, int sign) 1060 { 1061 struct btrfs_qgroup *qgroup; 1062 struct btrfs_qgroup_list *glist; 1063 struct ulist_node *unode; 1064 struct ulist_iterator uiter; 1065 int ret = 0; 1066 1067 qgroup = find_qgroup_rb(fs_info, ref_root); 1068 if (!qgroup) 1069 goto out; 1070 1071 qgroup->rfer += sign * num_bytes; 1072 qgroup->rfer_cmpr += sign * num_bytes; 1073 1074 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1075 qgroup->excl += sign * num_bytes; 1076 qgroup->excl_cmpr += sign * num_bytes; 1077 if (sign > 0) { 1078 if (WARN_ON(qgroup->reserved < num_bytes)) 1079 report_reserved_underflow(fs_info, qgroup, num_bytes); 1080 else 1081 qgroup->reserved -= num_bytes; 1082 } 1083 1084 qgroup_dirty(fs_info, qgroup); 1085 1086 /* Get all of the parent groups that contain this qgroup */ 1087 list_for_each_entry(glist, &qgroup->groups, next_group) { 1088 ret = ulist_add(tmp, glist->group->qgroupid, 1089 qgroup_to_aux(glist->group), GFP_ATOMIC); 1090 if (ret < 0) 1091 goto out; 1092 } 1093 1094 /* Iterate all of the parents and adjust their reference counts */ 1095 ULIST_ITER_INIT(&uiter); 1096 while ((unode = ulist_next(tmp, &uiter))) { 1097 qgroup = unode_aux_to_qgroup(unode); 1098 qgroup->rfer += sign * num_bytes; 1099 qgroup->rfer_cmpr += sign * num_bytes; 1100 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1101 qgroup->excl += sign * num_bytes; 1102 if (sign > 0) { 1103 if (WARN_ON(qgroup->reserved < num_bytes)) 1104 report_reserved_underflow(fs_info, qgroup, 1105 num_bytes); 1106 else 1107 qgroup->reserved -= num_bytes; 1108 } 1109 qgroup->excl_cmpr += sign * num_bytes; 1110 qgroup_dirty(fs_info, qgroup); 1111 1112 /* Add any parents of the parents */ 1113 list_for_each_entry(glist, &qgroup->groups, next_group) { 1114 ret = ulist_add(tmp, glist->group->qgroupid, 1115 qgroup_to_aux(glist->group), GFP_ATOMIC); 1116 if (ret < 0) 1117 goto out; 1118 } 1119 } 1120 ret = 0; 1121 out: 1122 return ret; 1123 } 1124 1125 1126 /* 1127 * Quick path for updating qgroup with only excl refs. 1128 * 1129 * In that case, just update all parent will be enough. 1130 * Or we needs to do a full rescan. 1131 * Caller should also hold fs_info->qgroup_lock. 1132 * 1133 * Return 0 for quick update, return >0 for need to full rescan 1134 * and mark INCONSISTENT flag. 1135 * Return < 0 for other error. 1136 */ 1137 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1138 struct ulist *tmp, u64 src, u64 dst, 1139 int sign) 1140 { 1141 struct btrfs_qgroup *qgroup; 1142 int ret = 1; 1143 int err = 0; 1144 1145 qgroup = find_qgroup_rb(fs_info, src); 1146 if (!qgroup) 1147 goto out; 1148 if (qgroup->excl == qgroup->rfer) { 1149 ret = 0; 1150 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1151 qgroup->excl, sign); 1152 if (err < 0) { 1153 ret = err; 1154 goto out; 1155 } 1156 } 1157 out: 1158 if (ret) 1159 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1160 return ret; 1161 } 1162 1163 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1164 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1165 { 1166 struct btrfs_root *quota_root; 1167 struct btrfs_qgroup *parent; 1168 struct btrfs_qgroup *member; 1169 struct btrfs_qgroup_list *list; 1170 struct ulist *tmp; 1171 int ret = 0; 1172 1173 /* Check the level of src and dst first */ 1174 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1175 return -EINVAL; 1176 1177 tmp = ulist_alloc(GFP_KERNEL); 1178 if (!tmp) 1179 return -ENOMEM; 1180 1181 mutex_lock(&fs_info->qgroup_ioctl_lock); 1182 quota_root = fs_info->quota_root; 1183 if (!quota_root) { 1184 ret = -EINVAL; 1185 goto out; 1186 } 1187 member = find_qgroup_rb(fs_info, src); 1188 parent = find_qgroup_rb(fs_info, dst); 1189 if (!member || !parent) { 1190 ret = -EINVAL; 1191 goto out; 1192 } 1193 1194 /* check if such qgroup relation exist firstly */ 1195 list_for_each_entry(list, &member->groups, next_group) { 1196 if (list->group == parent) { 1197 ret = -EEXIST; 1198 goto out; 1199 } 1200 } 1201 1202 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1203 if (ret) 1204 goto out; 1205 1206 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1207 if (ret) { 1208 del_qgroup_relation_item(trans, quota_root, src, dst); 1209 goto out; 1210 } 1211 1212 spin_lock(&fs_info->qgroup_lock); 1213 ret = add_relation_rb(fs_info, src, dst); 1214 if (ret < 0) { 1215 spin_unlock(&fs_info->qgroup_lock); 1216 goto out; 1217 } 1218 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1219 spin_unlock(&fs_info->qgroup_lock); 1220 out: 1221 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1222 ulist_free(tmp); 1223 return ret; 1224 } 1225 1226 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1227 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1228 { 1229 struct btrfs_root *quota_root; 1230 struct btrfs_qgroup *parent; 1231 struct btrfs_qgroup *member; 1232 struct btrfs_qgroup_list *list; 1233 struct ulist *tmp; 1234 int ret = 0; 1235 int err; 1236 1237 tmp = ulist_alloc(GFP_KERNEL); 1238 if (!tmp) 1239 return -ENOMEM; 1240 1241 quota_root = fs_info->quota_root; 1242 if (!quota_root) { 1243 ret = -EINVAL; 1244 goto out; 1245 } 1246 1247 member = find_qgroup_rb(fs_info, src); 1248 parent = find_qgroup_rb(fs_info, dst); 1249 if (!member || !parent) { 1250 ret = -EINVAL; 1251 goto out; 1252 } 1253 1254 /* check if such qgroup relation exist firstly */ 1255 list_for_each_entry(list, &member->groups, next_group) { 1256 if (list->group == parent) 1257 goto exist; 1258 } 1259 ret = -ENOENT; 1260 goto out; 1261 exist: 1262 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1263 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1264 if (err && !ret) 1265 ret = err; 1266 1267 spin_lock(&fs_info->qgroup_lock); 1268 del_relation_rb(fs_info, src, dst); 1269 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1270 spin_unlock(&fs_info->qgroup_lock); 1271 out: 1272 ulist_free(tmp); 1273 return ret; 1274 } 1275 1276 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1277 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1278 { 1279 int ret = 0; 1280 1281 mutex_lock(&fs_info->qgroup_ioctl_lock); 1282 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1283 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1284 1285 return ret; 1286 } 1287 1288 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1289 struct btrfs_fs_info *fs_info, u64 qgroupid) 1290 { 1291 struct btrfs_root *quota_root; 1292 struct btrfs_qgroup *qgroup; 1293 int ret = 0; 1294 1295 mutex_lock(&fs_info->qgroup_ioctl_lock); 1296 quota_root = fs_info->quota_root; 1297 if (!quota_root) { 1298 ret = -EINVAL; 1299 goto out; 1300 } 1301 qgroup = find_qgroup_rb(fs_info, qgroupid); 1302 if (qgroup) { 1303 ret = -EEXIST; 1304 goto out; 1305 } 1306 1307 ret = add_qgroup_item(trans, quota_root, qgroupid); 1308 if (ret) 1309 goto out; 1310 1311 spin_lock(&fs_info->qgroup_lock); 1312 qgroup = add_qgroup_rb(fs_info, qgroupid); 1313 spin_unlock(&fs_info->qgroup_lock); 1314 1315 if (IS_ERR(qgroup)) 1316 ret = PTR_ERR(qgroup); 1317 out: 1318 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1319 return ret; 1320 } 1321 1322 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1323 struct btrfs_fs_info *fs_info, u64 qgroupid) 1324 { 1325 struct btrfs_root *quota_root; 1326 struct btrfs_qgroup *qgroup; 1327 struct btrfs_qgroup_list *list; 1328 int ret = 0; 1329 1330 mutex_lock(&fs_info->qgroup_ioctl_lock); 1331 quota_root = fs_info->quota_root; 1332 if (!quota_root) { 1333 ret = -EINVAL; 1334 goto out; 1335 } 1336 1337 qgroup = find_qgroup_rb(fs_info, qgroupid); 1338 if (!qgroup) { 1339 ret = -ENOENT; 1340 goto out; 1341 } else { 1342 /* check if there are no children of this qgroup */ 1343 if (!list_empty(&qgroup->members)) { 1344 ret = -EBUSY; 1345 goto out; 1346 } 1347 } 1348 ret = del_qgroup_item(trans, quota_root, qgroupid); 1349 1350 while (!list_empty(&qgroup->groups)) { 1351 list = list_first_entry(&qgroup->groups, 1352 struct btrfs_qgroup_list, next_group); 1353 ret = __del_qgroup_relation(trans, fs_info, 1354 qgroupid, 1355 list->group->qgroupid); 1356 if (ret) 1357 goto out; 1358 } 1359 1360 spin_lock(&fs_info->qgroup_lock); 1361 del_qgroup_rb(fs_info, qgroupid); 1362 spin_unlock(&fs_info->qgroup_lock); 1363 out: 1364 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1365 return ret; 1366 } 1367 1368 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1369 struct btrfs_fs_info *fs_info, u64 qgroupid, 1370 struct btrfs_qgroup_limit *limit) 1371 { 1372 struct btrfs_root *quota_root; 1373 struct btrfs_qgroup *qgroup; 1374 int ret = 0; 1375 /* Sometimes we would want to clear the limit on this qgroup. 1376 * To meet this requirement, we treat the -1 as a special value 1377 * which tell kernel to clear the limit on this qgroup. 1378 */ 1379 const u64 CLEAR_VALUE = -1; 1380 1381 mutex_lock(&fs_info->qgroup_ioctl_lock); 1382 quota_root = fs_info->quota_root; 1383 if (!quota_root) { 1384 ret = -EINVAL; 1385 goto out; 1386 } 1387 1388 qgroup = find_qgroup_rb(fs_info, qgroupid); 1389 if (!qgroup) { 1390 ret = -ENOENT; 1391 goto out; 1392 } 1393 1394 spin_lock(&fs_info->qgroup_lock); 1395 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1396 if (limit->max_rfer == CLEAR_VALUE) { 1397 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1398 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1399 qgroup->max_rfer = 0; 1400 } else { 1401 qgroup->max_rfer = limit->max_rfer; 1402 } 1403 } 1404 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1405 if (limit->max_excl == CLEAR_VALUE) { 1406 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1407 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1408 qgroup->max_excl = 0; 1409 } else { 1410 qgroup->max_excl = limit->max_excl; 1411 } 1412 } 1413 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1414 if (limit->rsv_rfer == CLEAR_VALUE) { 1415 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1416 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1417 qgroup->rsv_rfer = 0; 1418 } else { 1419 qgroup->rsv_rfer = limit->rsv_rfer; 1420 } 1421 } 1422 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1423 if (limit->rsv_excl == CLEAR_VALUE) { 1424 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1425 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1426 qgroup->rsv_excl = 0; 1427 } else { 1428 qgroup->rsv_excl = limit->rsv_excl; 1429 } 1430 } 1431 qgroup->lim_flags |= limit->flags; 1432 1433 spin_unlock(&fs_info->qgroup_lock); 1434 1435 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1436 if (ret) { 1437 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1438 btrfs_info(fs_info, "unable to update quota limit for %llu", 1439 qgroupid); 1440 } 1441 1442 out: 1443 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1444 return ret; 1445 } 1446 1447 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 1448 struct btrfs_fs_info *fs_info) 1449 { 1450 struct btrfs_qgroup_extent_record *record; 1451 struct btrfs_delayed_ref_root *delayed_refs; 1452 struct rb_node *node; 1453 u64 qgroup_to_skip; 1454 int ret = 0; 1455 1456 delayed_refs = &trans->transaction->delayed_refs; 1457 qgroup_to_skip = delayed_refs->qgroup_to_skip; 1458 1459 /* 1460 * No need to do lock, since this function will only be called in 1461 * btrfs_commit_transaction(). 1462 */ 1463 node = rb_first(&delayed_refs->dirty_extent_root); 1464 while (node) { 1465 record = rb_entry(node, struct btrfs_qgroup_extent_record, 1466 node); 1467 if (WARN_ON(!record->old_roots)) 1468 ret = btrfs_find_all_roots(NULL, fs_info, 1469 record->bytenr, 0, &record->old_roots); 1470 if (ret < 0) 1471 break; 1472 if (qgroup_to_skip) 1473 ulist_del(record->old_roots, qgroup_to_skip, 0); 1474 node = rb_next(node); 1475 } 1476 return ret; 1477 } 1478 1479 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1480 struct btrfs_delayed_ref_root *delayed_refs, 1481 struct btrfs_qgroup_extent_record *record) 1482 { 1483 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1484 struct rb_node *parent_node = NULL; 1485 struct btrfs_qgroup_extent_record *entry; 1486 u64 bytenr = record->bytenr; 1487 1488 assert_spin_locked(&delayed_refs->lock); 1489 trace_btrfs_qgroup_trace_extent(fs_info, record); 1490 1491 while (*p) { 1492 parent_node = *p; 1493 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1494 node); 1495 if (bytenr < entry->bytenr) 1496 p = &(*p)->rb_left; 1497 else if (bytenr > entry->bytenr) 1498 p = &(*p)->rb_right; 1499 else 1500 return 1; 1501 } 1502 1503 rb_link_node(&record->node, parent_node, p); 1504 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1505 return 0; 1506 } 1507 1508 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1509 struct btrfs_qgroup_extent_record *qrecord) 1510 { 1511 struct ulist *old_root; 1512 u64 bytenr = qrecord->bytenr; 1513 int ret; 1514 1515 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); 1516 if (ret < 0) 1517 return ret; 1518 1519 /* 1520 * Here we don't need to get the lock of 1521 * trans->transaction->delayed_refs, since inserted qrecord won't 1522 * be deleted, only qrecord->node may be modified (new qrecord insert) 1523 * 1524 * So modifying qrecord->old_roots is safe here 1525 */ 1526 qrecord->old_roots = old_root; 1527 return 0; 1528 } 1529 1530 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1531 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1532 gfp_t gfp_flag) 1533 { 1534 struct btrfs_qgroup_extent_record *record; 1535 struct btrfs_delayed_ref_root *delayed_refs; 1536 int ret; 1537 1538 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1539 || bytenr == 0 || num_bytes == 0) 1540 return 0; 1541 if (WARN_ON(trans == NULL)) 1542 return -EINVAL; 1543 record = kmalloc(sizeof(*record), gfp_flag); 1544 if (!record) 1545 return -ENOMEM; 1546 1547 delayed_refs = &trans->transaction->delayed_refs; 1548 record->bytenr = bytenr; 1549 record->num_bytes = num_bytes; 1550 record->old_roots = NULL; 1551 1552 spin_lock(&delayed_refs->lock); 1553 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1554 spin_unlock(&delayed_refs->lock); 1555 if (ret > 0) { 1556 kfree(record); 1557 return 0; 1558 } 1559 return btrfs_qgroup_trace_extent_post(fs_info, record); 1560 } 1561 1562 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1563 struct btrfs_fs_info *fs_info, 1564 struct extent_buffer *eb) 1565 { 1566 int nr = btrfs_header_nritems(eb); 1567 int i, extent_type, ret; 1568 struct btrfs_key key; 1569 struct btrfs_file_extent_item *fi; 1570 u64 bytenr, num_bytes; 1571 1572 /* We can be called directly from walk_up_proc() */ 1573 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1574 return 0; 1575 1576 for (i = 0; i < nr; i++) { 1577 btrfs_item_key_to_cpu(eb, &key, i); 1578 1579 if (key.type != BTRFS_EXTENT_DATA_KEY) 1580 continue; 1581 1582 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1583 /* filter out non qgroup-accountable extents */ 1584 extent_type = btrfs_file_extent_type(eb, fi); 1585 1586 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1587 continue; 1588 1589 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1590 if (!bytenr) 1591 continue; 1592 1593 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1594 1595 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1596 num_bytes, GFP_NOFS); 1597 if (ret) 1598 return ret; 1599 } 1600 return 0; 1601 } 1602 1603 /* 1604 * Walk up the tree from the bottom, freeing leaves and any interior 1605 * nodes which have had all slots visited. If a node (leaf or 1606 * interior) is freed, the node above it will have it's slot 1607 * incremented. The root node will never be freed. 1608 * 1609 * At the end of this function, we should have a path which has all 1610 * slots incremented to the next position for a search. If we need to 1611 * read a new node it will be NULL and the node above it will have the 1612 * correct slot selected for a later read. 1613 * 1614 * If we increment the root nodes slot counter past the number of 1615 * elements, 1 is returned to signal completion of the search. 1616 */ 1617 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1618 { 1619 int level = 0; 1620 int nr, slot; 1621 struct extent_buffer *eb; 1622 1623 if (root_level == 0) 1624 return 1; 1625 1626 while (level <= root_level) { 1627 eb = path->nodes[level]; 1628 nr = btrfs_header_nritems(eb); 1629 path->slots[level]++; 1630 slot = path->slots[level]; 1631 if (slot >= nr || level == 0) { 1632 /* 1633 * Don't free the root - we will detect this 1634 * condition after our loop and return a 1635 * positive value for caller to stop walking the tree. 1636 */ 1637 if (level != root_level) { 1638 btrfs_tree_unlock_rw(eb, path->locks[level]); 1639 path->locks[level] = 0; 1640 1641 free_extent_buffer(eb); 1642 path->nodes[level] = NULL; 1643 path->slots[level] = 0; 1644 } 1645 } else { 1646 /* 1647 * We have a valid slot to walk back down 1648 * from. Stop here so caller can process these 1649 * new nodes. 1650 */ 1651 break; 1652 } 1653 1654 level++; 1655 } 1656 1657 eb = path->nodes[root_level]; 1658 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1659 return 1; 1660 1661 return 0; 1662 } 1663 1664 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1665 struct btrfs_root *root, 1666 struct extent_buffer *root_eb, 1667 u64 root_gen, int root_level) 1668 { 1669 struct btrfs_fs_info *fs_info = root->fs_info; 1670 int ret = 0; 1671 int level; 1672 struct extent_buffer *eb = root_eb; 1673 struct btrfs_path *path = NULL; 1674 1675 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 1676 BUG_ON(root_eb == NULL); 1677 1678 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1679 return 0; 1680 1681 if (!extent_buffer_uptodate(root_eb)) { 1682 ret = btrfs_read_buffer(root_eb, root_gen); 1683 if (ret) 1684 goto out; 1685 } 1686 1687 if (root_level == 0) { 1688 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1689 goto out; 1690 } 1691 1692 path = btrfs_alloc_path(); 1693 if (!path) 1694 return -ENOMEM; 1695 1696 /* 1697 * Walk down the tree. Missing extent blocks are filled in as 1698 * we go. Metadata is accounted every time we read a new 1699 * extent block. 1700 * 1701 * When we reach a leaf, we account for file extent items in it, 1702 * walk back up the tree (adjusting slot pointers as we go) 1703 * and restart the search process. 1704 */ 1705 extent_buffer_get(root_eb); /* For path */ 1706 path->nodes[root_level] = root_eb; 1707 path->slots[root_level] = 0; 1708 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1709 walk_down: 1710 level = root_level; 1711 while (level >= 0) { 1712 if (path->nodes[level] == NULL) { 1713 int parent_slot; 1714 u64 child_gen; 1715 u64 child_bytenr; 1716 1717 /* 1718 * We need to get child blockptr/gen from parent before 1719 * we can read it. 1720 */ 1721 eb = path->nodes[level + 1]; 1722 parent_slot = path->slots[level + 1]; 1723 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1724 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1725 1726 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1727 if (IS_ERR(eb)) { 1728 ret = PTR_ERR(eb); 1729 goto out; 1730 } else if (!extent_buffer_uptodate(eb)) { 1731 free_extent_buffer(eb); 1732 ret = -EIO; 1733 goto out; 1734 } 1735 1736 path->nodes[level] = eb; 1737 path->slots[level] = 0; 1738 1739 btrfs_tree_read_lock(eb); 1740 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1741 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1742 1743 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1744 child_bytenr, 1745 fs_info->nodesize, 1746 GFP_NOFS); 1747 if (ret) 1748 goto out; 1749 } 1750 1751 if (level == 0) { 1752 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1753 path->nodes[level]); 1754 if (ret) 1755 goto out; 1756 1757 /* Nonzero return here means we completed our search */ 1758 ret = adjust_slots_upwards(path, root_level); 1759 if (ret) 1760 break; 1761 1762 /* Restart search with new slots */ 1763 goto walk_down; 1764 } 1765 1766 level--; 1767 } 1768 1769 ret = 0; 1770 out: 1771 btrfs_free_path(path); 1772 1773 return ret; 1774 } 1775 1776 #define UPDATE_NEW 0 1777 #define UPDATE_OLD 1 1778 /* 1779 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1780 */ 1781 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1782 struct ulist *roots, struct ulist *tmp, 1783 struct ulist *qgroups, u64 seq, int update_old) 1784 { 1785 struct ulist_node *unode; 1786 struct ulist_iterator uiter; 1787 struct ulist_node *tmp_unode; 1788 struct ulist_iterator tmp_uiter; 1789 struct btrfs_qgroup *qg; 1790 int ret = 0; 1791 1792 if (!roots) 1793 return 0; 1794 ULIST_ITER_INIT(&uiter); 1795 while ((unode = ulist_next(roots, &uiter))) { 1796 qg = find_qgroup_rb(fs_info, unode->val); 1797 if (!qg) 1798 continue; 1799 1800 ulist_reinit(tmp); 1801 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1802 GFP_ATOMIC); 1803 if (ret < 0) 1804 return ret; 1805 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1806 if (ret < 0) 1807 return ret; 1808 ULIST_ITER_INIT(&tmp_uiter); 1809 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1810 struct btrfs_qgroup_list *glist; 1811 1812 qg = unode_aux_to_qgroup(tmp_unode); 1813 if (update_old) 1814 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1815 else 1816 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1817 list_for_each_entry(glist, &qg->groups, next_group) { 1818 ret = ulist_add(qgroups, glist->group->qgroupid, 1819 qgroup_to_aux(glist->group), 1820 GFP_ATOMIC); 1821 if (ret < 0) 1822 return ret; 1823 ret = ulist_add(tmp, glist->group->qgroupid, 1824 qgroup_to_aux(glist->group), 1825 GFP_ATOMIC); 1826 if (ret < 0) 1827 return ret; 1828 } 1829 } 1830 } 1831 return 0; 1832 } 1833 1834 /* 1835 * Update qgroup rfer/excl counters. 1836 * Rfer update is easy, codes can explain themselves. 1837 * 1838 * Excl update is tricky, the update is split into 2 part. 1839 * Part 1: Possible exclusive <-> sharing detect: 1840 * | A | !A | 1841 * ------------------------------------- 1842 * B | * | - | 1843 * ------------------------------------- 1844 * !B | + | ** | 1845 * ------------------------------------- 1846 * 1847 * Conditions: 1848 * A: cur_old_roots < nr_old_roots (not exclusive before) 1849 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1850 * B: cur_new_roots < nr_new_roots (not exclusive now) 1851 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1852 * 1853 * Results: 1854 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1855 * *: Definitely not changed. **: Possible unchanged. 1856 * 1857 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1858 * 1859 * To make the logic clear, we first use condition A and B to split 1860 * combination into 4 results. 1861 * 1862 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1863 * only on variant maybe 0. 1864 * 1865 * Lastly, check result **, since there are 2 variants maybe 0, split them 1866 * again(2x2). 1867 * But this time we don't need to consider other things, the codes and logic 1868 * is easy to understand now. 1869 */ 1870 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1871 struct ulist *qgroups, 1872 u64 nr_old_roots, 1873 u64 nr_new_roots, 1874 u64 num_bytes, u64 seq) 1875 { 1876 struct ulist_node *unode; 1877 struct ulist_iterator uiter; 1878 struct btrfs_qgroup *qg; 1879 u64 cur_new_count, cur_old_count; 1880 1881 ULIST_ITER_INIT(&uiter); 1882 while ((unode = ulist_next(qgroups, &uiter))) { 1883 bool dirty = false; 1884 1885 qg = unode_aux_to_qgroup(unode); 1886 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1887 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1888 1889 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1890 cur_old_count, cur_new_count); 1891 1892 /* Rfer update part */ 1893 if (cur_old_count == 0 && cur_new_count > 0) { 1894 qg->rfer += num_bytes; 1895 qg->rfer_cmpr += num_bytes; 1896 dirty = true; 1897 } 1898 if (cur_old_count > 0 && cur_new_count == 0) { 1899 qg->rfer -= num_bytes; 1900 qg->rfer_cmpr -= num_bytes; 1901 dirty = true; 1902 } 1903 1904 /* Excl update part */ 1905 /* Exclusive/none -> shared case */ 1906 if (cur_old_count == nr_old_roots && 1907 cur_new_count < nr_new_roots) { 1908 /* Exclusive -> shared */ 1909 if (cur_old_count != 0) { 1910 qg->excl -= num_bytes; 1911 qg->excl_cmpr -= num_bytes; 1912 dirty = true; 1913 } 1914 } 1915 1916 /* Shared -> exclusive/none case */ 1917 if (cur_old_count < nr_old_roots && 1918 cur_new_count == nr_new_roots) { 1919 /* Shared->exclusive */ 1920 if (cur_new_count != 0) { 1921 qg->excl += num_bytes; 1922 qg->excl_cmpr += num_bytes; 1923 dirty = true; 1924 } 1925 } 1926 1927 /* Exclusive/none -> exclusive/none case */ 1928 if (cur_old_count == nr_old_roots && 1929 cur_new_count == nr_new_roots) { 1930 if (cur_old_count == 0) { 1931 /* None -> exclusive/none */ 1932 1933 if (cur_new_count != 0) { 1934 /* None -> exclusive */ 1935 qg->excl += num_bytes; 1936 qg->excl_cmpr += num_bytes; 1937 dirty = true; 1938 } 1939 /* None -> none, nothing changed */ 1940 } else { 1941 /* Exclusive -> exclusive/none */ 1942 1943 if (cur_new_count == 0) { 1944 /* Exclusive -> none */ 1945 qg->excl -= num_bytes; 1946 qg->excl_cmpr -= num_bytes; 1947 dirty = true; 1948 } 1949 /* Exclusive -> exclusive, nothing changed */ 1950 } 1951 } 1952 1953 if (dirty) 1954 qgroup_dirty(fs_info, qg); 1955 } 1956 return 0; 1957 } 1958 1959 int 1960 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1961 struct btrfs_fs_info *fs_info, 1962 u64 bytenr, u64 num_bytes, 1963 struct ulist *old_roots, struct ulist *new_roots) 1964 { 1965 struct ulist *qgroups = NULL; 1966 struct ulist *tmp = NULL; 1967 u64 seq; 1968 u64 nr_new_roots = 0; 1969 u64 nr_old_roots = 0; 1970 int ret = 0; 1971 1972 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1973 return 0; 1974 1975 if (new_roots) 1976 nr_new_roots = new_roots->nnodes; 1977 if (old_roots) 1978 nr_old_roots = old_roots->nnodes; 1979 1980 BUG_ON(!fs_info->quota_root); 1981 1982 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1983 nr_old_roots, nr_new_roots); 1984 1985 qgroups = ulist_alloc(GFP_NOFS); 1986 if (!qgroups) { 1987 ret = -ENOMEM; 1988 goto out_free; 1989 } 1990 tmp = ulist_alloc(GFP_NOFS); 1991 if (!tmp) { 1992 ret = -ENOMEM; 1993 goto out_free; 1994 } 1995 1996 mutex_lock(&fs_info->qgroup_rescan_lock); 1997 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1998 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1999 mutex_unlock(&fs_info->qgroup_rescan_lock); 2000 ret = 0; 2001 goto out_free; 2002 } 2003 } 2004 mutex_unlock(&fs_info->qgroup_rescan_lock); 2005 2006 spin_lock(&fs_info->qgroup_lock); 2007 seq = fs_info->qgroup_seq; 2008 2009 /* Update old refcnts using old_roots */ 2010 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 2011 UPDATE_OLD); 2012 if (ret < 0) 2013 goto out; 2014 2015 /* Update new refcnts using new_roots */ 2016 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 2017 UPDATE_NEW); 2018 if (ret < 0) 2019 goto out; 2020 2021 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 2022 num_bytes, seq); 2023 2024 /* 2025 * Bump qgroup_seq to avoid seq overlap 2026 */ 2027 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 2028 out: 2029 spin_unlock(&fs_info->qgroup_lock); 2030 out_free: 2031 ulist_free(tmp); 2032 ulist_free(qgroups); 2033 ulist_free(old_roots); 2034 ulist_free(new_roots); 2035 return ret; 2036 } 2037 2038 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2039 struct btrfs_fs_info *fs_info) 2040 { 2041 struct btrfs_qgroup_extent_record *record; 2042 struct btrfs_delayed_ref_root *delayed_refs; 2043 struct ulist *new_roots = NULL; 2044 struct rb_node *node; 2045 u64 qgroup_to_skip; 2046 int ret = 0; 2047 2048 delayed_refs = &trans->transaction->delayed_refs; 2049 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2050 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2051 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2052 node); 2053 2054 trace_btrfs_qgroup_account_extents(fs_info, record); 2055 2056 if (!ret) { 2057 /* 2058 * Use (u64)-1 as time_seq to do special search, which 2059 * doesn't lock tree or delayed_refs and search current 2060 * root. It's safe inside commit_transaction(). 2061 */ 2062 ret = btrfs_find_all_roots(trans, fs_info, 2063 record->bytenr, (u64)-1, &new_roots); 2064 if (ret < 0) 2065 goto cleanup; 2066 if (qgroup_to_skip) 2067 ulist_del(new_roots, qgroup_to_skip, 0); 2068 ret = btrfs_qgroup_account_extent(trans, fs_info, 2069 record->bytenr, record->num_bytes, 2070 record->old_roots, new_roots); 2071 record->old_roots = NULL; 2072 new_roots = NULL; 2073 } 2074 cleanup: 2075 ulist_free(record->old_roots); 2076 ulist_free(new_roots); 2077 new_roots = NULL; 2078 rb_erase(node, &delayed_refs->dirty_extent_root); 2079 kfree(record); 2080 2081 } 2082 return ret; 2083 } 2084 2085 /* 2086 * called from commit_transaction. Writes all changed qgroups to disk. 2087 */ 2088 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2089 struct btrfs_fs_info *fs_info) 2090 { 2091 struct btrfs_root *quota_root = fs_info->quota_root; 2092 int ret = 0; 2093 int start_rescan_worker = 0; 2094 2095 if (!quota_root) 2096 goto out; 2097 2098 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2099 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2100 start_rescan_worker = 1; 2101 2102 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2103 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2104 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2105 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2106 2107 spin_lock(&fs_info->qgroup_lock); 2108 while (!list_empty(&fs_info->dirty_qgroups)) { 2109 struct btrfs_qgroup *qgroup; 2110 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2111 struct btrfs_qgroup, dirty); 2112 list_del_init(&qgroup->dirty); 2113 spin_unlock(&fs_info->qgroup_lock); 2114 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2115 if (ret) 2116 fs_info->qgroup_flags |= 2117 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2118 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2119 if (ret) 2120 fs_info->qgroup_flags |= 2121 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2122 spin_lock(&fs_info->qgroup_lock); 2123 } 2124 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2125 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2126 else 2127 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2128 spin_unlock(&fs_info->qgroup_lock); 2129 2130 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2131 if (ret) 2132 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2133 2134 if (!ret && start_rescan_worker) { 2135 ret = qgroup_rescan_init(fs_info, 0, 1); 2136 if (!ret) { 2137 qgroup_rescan_zero_tracking(fs_info); 2138 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2139 &fs_info->qgroup_rescan_work); 2140 } 2141 ret = 0; 2142 } 2143 2144 out: 2145 2146 return ret; 2147 } 2148 2149 /* 2150 * Copy the accounting information between qgroups. This is necessary 2151 * when a snapshot or a subvolume is created. Throwing an error will 2152 * cause a transaction abort so we take extra care here to only error 2153 * when a readonly fs is a reasonable outcome. 2154 */ 2155 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2156 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2157 struct btrfs_qgroup_inherit *inherit) 2158 { 2159 int ret = 0; 2160 int i; 2161 u64 *i_qgroups; 2162 struct btrfs_root *quota_root = fs_info->quota_root; 2163 struct btrfs_qgroup *srcgroup; 2164 struct btrfs_qgroup *dstgroup; 2165 u32 level_size = 0; 2166 u64 nums; 2167 2168 mutex_lock(&fs_info->qgroup_ioctl_lock); 2169 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2170 goto out; 2171 2172 if (!quota_root) { 2173 ret = -EINVAL; 2174 goto out; 2175 } 2176 2177 if (inherit) { 2178 i_qgroups = (u64 *)(inherit + 1); 2179 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2180 2 * inherit->num_excl_copies; 2181 for (i = 0; i < nums; ++i) { 2182 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2183 2184 /* 2185 * Zero out invalid groups so we can ignore 2186 * them later. 2187 */ 2188 if (!srcgroup || 2189 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2190 *i_qgroups = 0ULL; 2191 2192 ++i_qgroups; 2193 } 2194 } 2195 2196 /* 2197 * create a tracking group for the subvol itself 2198 */ 2199 ret = add_qgroup_item(trans, quota_root, objectid); 2200 if (ret) 2201 goto out; 2202 2203 if (srcid) { 2204 struct btrfs_root *srcroot; 2205 struct btrfs_key srckey; 2206 2207 srckey.objectid = srcid; 2208 srckey.type = BTRFS_ROOT_ITEM_KEY; 2209 srckey.offset = (u64)-1; 2210 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2211 if (IS_ERR(srcroot)) { 2212 ret = PTR_ERR(srcroot); 2213 goto out; 2214 } 2215 2216 level_size = fs_info->nodesize; 2217 } 2218 2219 /* 2220 * add qgroup to all inherited groups 2221 */ 2222 if (inherit) { 2223 i_qgroups = (u64 *)(inherit + 1); 2224 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2225 if (*i_qgroups == 0) 2226 continue; 2227 ret = add_qgroup_relation_item(trans, quota_root, 2228 objectid, *i_qgroups); 2229 if (ret && ret != -EEXIST) 2230 goto out; 2231 ret = add_qgroup_relation_item(trans, quota_root, 2232 *i_qgroups, objectid); 2233 if (ret && ret != -EEXIST) 2234 goto out; 2235 } 2236 ret = 0; 2237 } 2238 2239 2240 spin_lock(&fs_info->qgroup_lock); 2241 2242 dstgroup = add_qgroup_rb(fs_info, objectid); 2243 if (IS_ERR(dstgroup)) { 2244 ret = PTR_ERR(dstgroup); 2245 goto unlock; 2246 } 2247 2248 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2249 dstgroup->lim_flags = inherit->lim.flags; 2250 dstgroup->max_rfer = inherit->lim.max_rfer; 2251 dstgroup->max_excl = inherit->lim.max_excl; 2252 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2253 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2254 2255 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2256 if (ret) { 2257 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2258 btrfs_info(fs_info, 2259 "unable to update quota limit for %llu", 2260 dstgroup->qgroupid); 2261 goto unlock; 2262 } 2263 } 2264 2265 if (srcid) { 2266 srcgroup = find_qgroup_rb(fs_info, srcid); 2267 if (!srcgroup) 2268 goto unlock; 2269 2270 /* 2271 * We call inherit after we clone the root in order to make sure 2272 * our counts don't go crazy, so at this point the only 2273 * difference between the two roots should be the root node. 2274 */ 2275 dstgroup->rfer = srcgroup->rfer; 2276 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2277 dstgroup->excl = level_size; 2278 dstgroup->excl_cmpr = level_size; 2279 srcgroup->excl = level_size; 2280 srcgroup->excl_cmpr = level_size; 2281 2282 /* inherit the limit info */ 2283 dstgroup->lim_flags = srcgroup->lim_flags; 2284 dstgroup->max_rfer = srcgroup->max_rfer; 2285 dstgroup->max_excl = srcgroup->max_excl; 2286 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2287 dstgroup->rsv_excl = srcgroup->rsv_excl; 2288 2289 qgroup_dirty(fs_info, dstgroup); 2290 qgroup_dirty(fs_info, srcgroup); 2291 } 2292 2293 if (!inherit) 2294 goto unlock; 2295 2296 i_qgroups = (u64 *)(inherit + 1); 2297 for (i = 0; i < inherit->num_qgroups; ++i) { 2298 if (*i_qgroups) { 2299 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2300 if (ret) 2301 goto unlock; 2302 } 2303 ++i_qgroups; 2304 } 2305 2306 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2307 struct btrfs_qgroup *src; 2308 struct btrfs_qgroup *dst; 2309 2310 if (!i_qgroups[0] || !i_qgroups[1]) 2311 continue; 2312 2313 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2314 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2315 2316 if (!src || !dst) { 2317 ret = -EINVAL; 2318 goto unlock; 2319 } 2320 2321 dst->rfer = src->rfer - level_size; 2322 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2323 } 2324 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2325 struct btrfs_qgroup *src; 2326 struct btrfs_qgroup *dst; 2327 2328 if (!i_qgroups[0] || !i_qgroups[1]) 2329 continue; 2330 2331 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2332 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2333 2334 if (!src || !dst) { 2335 ret = -EINVAL; 2336 goto unlock; 2337 } 2338 2339 dst->excl = src->excl + level_size; 2340 dst->excl_cmpr = src->excl_cmpr + level_size; 2341 } 2342 2343 unlock: 2344 spin_unlock(&fs_info->qgroup_lock); 2345 out: 2346 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2347 return ret; 2348 } 2349 2350 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2351 { 2352 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2353 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2354 return false; 2355 2356 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2357 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2358 return false; 2359 2360 return true; 2361 } 2362 2363 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2364 { 2365 struct btrfs_root *quota_root; 2366 struct btrfs_qgroup *qgroup; 2367 struct btrfs_fs_info *fs_info = root->fs_info; 2368 u64 ref_root = root->root_key.objectid; 2369 int ret = 0; 2370 struct ulist_node *unode; 2371 struct ulist_iterator uiter; 2372 2373 if (!is_fstree(ref_root)) 2374 return 0; 2375 2376 if (num_bytes == 0) 2377 return 0; 2378 2379 spin_lock(&fs_info->qgroup_lock); 2380 quota_root = fs_info->quota_root; 2381 if (!quota_root) 2382 goto out; 2383 2384 qgroup = find_qgroup_rb(fs_info, ref_root); 2385 if (!qgroup) 2386 goto out; 2387 2388 /* 2389 * in a first step, we check all affected qgroups if any limits would 2390 * be exceeded 2391 */ 2392 ulist_reinit(fs_info->qgroup_ulist); 2393 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2394 (uintptr_t)qgroup, GFP_ATOMIC); 2395 if (ret < 0) 2396 goto out; 2397 ULIST_ITER_INIT(&uiter); 2398 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2399 struct btrfs_qgroup *qg; 2400 struct btrfs_qgroup_list *glist; 2401 2402 qg = unode_aux_to_qgroup(unode); 2403 2404 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2405 ret = -EDQUOT; 2406 goto out; 2407 } 2408 2409 list_for_each_entry(glist, &qg->groups, next_group) { 2410 ret = ulist_add(fs_info->qgroup_ulist, 2411 glist->group->qgroupid, 2412 (uintptr_t)glist->group, GFP_ATOMIC); 2413 if (ret < 0) 2414 goto out; 2415 } 2416 } 2417 ret = 0; 2418 /* 2419 * no limits exceeded, now record the reservation into all qgroups 2420 */ 2421 ULIST_ITER_INIT(&uiter); 2422 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2423 struct btrfs_qgroup *qg; 2424 2425 qg = unode_aux_to_qgroup(unode); 2426 2427 qg->reserved += num_bytes; 2428 } 2429 2430 out: 2431 spin_unlock(&fs_info->qgroup_lock); 2432 return ret; 2433 } 2434 2435 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2436 u64 ref_root, u64 num_bytes) 2437 { 2438 struct btrfs_root *quota_root; 2439 struct btrfs_qgroup *qgroup; 2440 struct ulist_node *unode; 2441 struct ulist_iterator uiter; 2442 int ret = 0; 2443 2444 if (!is_fstree(ref_root)) 2445 return; 2446 2447 if (num_bytes == 0) 2448 return; 2449 2450 spin_lock(&fs_info->qgroup_lock); 2451 2452 quota_root = fs_info->quota_root; 2453 if (!quota_root) 2454 goto out; 2455 2456 qgroup = find_qgroup_rb(fs_info, ref_root); 2457 if (!qgroup) 2458 goto out; 2459 2460 ulist_reinit(fs_info->qgroup_ulist); 2461 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2462 (uintptr_t)qgroup, GFP_ATOMIC); 2463 if (ret < 0) 2464 goto out; 2465 ULIST_ITER_INIT(&uiter); 2466 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2467 struct btrfs_qgroup *qg; 2468 struct btrfs_qgroup_list *glist; 2469 2470 qg = unode_aux_to_qgroup(unode); 2471 2472 if (WARN_ON(qg->reserved < num_bytes)) 2473 report_reserved_underflow(fs_info, qg, num_bytes); 2474 else 2475 qg->reserved -= num_bytes; 2476 2477 list_for_each_entry(glist, &qg->groups, next_group) { 2478 ret = ulist_add(fs_info->qgroup_ulist, 2479 glist->group->qgroupid, 2480 (uintptr_t)glist->group, GFP_ATOMIC); 2481 if (ret < 0) 2482 goto out; 2483 } 2484 } 2485 2486 out: 2487 spin_unlock(&fs_info->qgroup_lock); 2488 } 2489 2490 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2491 { 2492 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2493 return; 2494 btrfs_err(trans->fs_info, 2495 "qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x", 2496 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2497 (u32)(trans->delayed_ref_elem.seq >> 32), 2498 (u32)trans->delayed_ref_elem.seq); 2499 BUG(); 2500 } 2501 2502 /* 2503 * returns < 0 on error, 0 when more leafs are to be scanned. 2504 * returns 1 when done. 2505 */ 2506 static int 2507 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2508 struct btrfs_trans_handle *trans) 2509 { 2510 struct btrfs_key found; 2511 struct extent_buffer *scratch_leaf = NULL; 2512 struct ulist *roots = NULL; 2513 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2514 u64 num_bytes; 2515 int slot; 2516 int ret; 2517 2518 mutex_lock(&fs_info->qgroup_rescan_lock); 2519 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2520 &fs_info->qgroup_rescan_progress, 2521 path, 1, 0); 2522 2523 btrfs_debug(fs_info, 2524 "current progress key (%llu %u %llu), search_slot ret %d", 2525 fs_info->qgroup_rescan_progress.objectid, 2526 fs_info->qgroup_rescan_progress.type, 2527 fs_info->qgroup_rescan_progress.offset, ret); 2528 2529 if (ret) { 2530 /* 2531 * The rescan is about to end, we will not be scanning any 2532 * further blocks. We cannot unset the RESCAN flag here, because 2533 * we want to commit the transaction if everything went well. 2534 * To make the live accounting work in this phase, we set our 2535 * scan progress pointer such that every real extent objectid 2536 * will be smaller. 2537 */ 2538 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2539 btrfs_release_path(path); 2540 mutex_unlock(&fs_info->qgroup_rescan_lock); 2541 return ret; 2542 } 2543 2544 btrfs_item_key_to_cpu(path->nodes[0], &found, 2545 btrfs_header_nritems(path->nodes[0]) - 1); 2546 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2547 2548 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2549 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2550 if (!scratch_leaf) { 2551 ret = -ENOMEM; 2552 mutex_unlock(&fs_info->qgroup_rescan_lock); 2553 goto out; 2554 } 2555 extent_buffer_get(scratch_leaf); 2556 btrfs_tree_read_lock(scratch_leaf); 2557 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2558 slot = path->slots[0]; 2559 btrfs_release_path(path); 2560 mutex_unlock(&fs_info->qgroup_rescan_lock); 2561 2562 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2563 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2564 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2565 found.type != BTRFS_METADATA_ITEM_KEY) 2566 continue; 2567 if (found.type == BTRFS_METADATA_ITEM_KEY) 2568 num_bytes = fs_info->nodesize; 2569 else 2570 num_bytes = found.offset; 2571 2572 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2573 &roots); 2574 if (ret < 0) 2575 goto out; 2576 /* For rescan, just pass old_roots as NULL */ 2577 ret = btrfs_qgroup_account_extent(trans, fs_info, 2578 found.objectid, num_bytes, NULL, roots); 2579 if (ret < 0) 2580 goto out; 2581 } 2582 out: 2583 if (scratch_leaf) { 2584 btrfs_tree_read_unlock_blocking(scratch_leaf); 2585 free_extent_buffer(scratch_leaf); 2586 } 2587 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2588 2589 return ret; 2590 } 2591 2592 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2593 { 2594 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2595 qgroup_rescan_work); 2596 struct btrfs_path *path; 2597 struct btrfs_trans_handle *trans = NULL; 2598 int err = -ENOMEM; 2599 int ret = 0; 2600 2601 path = btrfs_alloc_path(); 2602 if (!path) 2603 goto out; 2604 2605 err = 0; 2606 while (!err && !btrfs_fs_closing(fs_info)) { 2607 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2608 if (IS_ERR(trans)) { 2609 err = PTR_ERR(trans); 2610 break; 2611 } 2612 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2613 err = -EINTR; 2614 } else { 2615 err = qgroup_rescan_leaf(fs_info, path, trans); 2616 } 2617 if (err > 0) 2618 btrfs_commit_transaction(trans); 2619 else 2620 btrfs_end_transaction(trans); 2621 } 2622 2623 out: 2624 btrfs_free_path(path); 2625 2626 mutex_lock(&fs_info->qgroup_rescan_lock); 2627 if (!btrfs_fs_closing(fs_info)) 2628 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2629 2630 if (err > 0 && 2631 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2632 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2633 } else if (err < 0) { 2634 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2635 } 2636 mutex_unlock(&fs_info->qgroup_rescan_lock); 2637 2638 /* 2639 * only update status, since the previous part has already updated the 2640 * qgroup info. 2641 */ 2642 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2643 if (IS_ERR(trans)) { 2644 err = PTR_ERR(trans); 2645 btrfs_err(fs_info, 2646 "fail to start transaction for status update: %d\n", 2647 err); 2648 goto done; 2649 } 2650 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2651 if (ret < 0) { 2652 err = ret; 2653 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2654 } 2655 btrfs_end_transaction(trans); 2656 2657 if (btrfs_fs_closing(fs_info)) { 2658 btrfs_info(fs_info, "qgroup scan paused"); 2659 } else if (err >= 0) { 2660 btrfs_info(fs_info, "qgroup scan completed%s", 2661 err > 0 ? " (inconsistency flag cleared)" : ""); 2662 } else { 2663 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2664 } 2665 2666 done: 2667 mutex_lock(&fs_info->qgroup_rescan_lock); 2668 fs_info->qgroup_rescan_running = false; 2669 mutex_unlock(&fs_info->qgroup_rescan_lock); 2670 complete_all(&fs_info->qgroup_rescan_completion); 2671 } 2672 2673 /* 2674 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2675 * memory required for the rescan context. 2676 */ 2677 static int 2678 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2679 int init_flags) 2680 { 2681 int ret = 0; 2682 2683 if (!init_flags && 2684 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2685 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2686 ret = -EINVAL; 2687 goto err; 2688 } 2689 2690 mutex_lock(&fs_info->qgroup_rescan_lock); 2691 spin_lock(&fs_info->qgroup_lock); 2692 2693 if (init_flags) { 2694 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2695 ret = -EINPROGRESS; 2696 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2697 ret = -EINVAL; 2698 2699 if (ret) { 2700 spin_unlock(&fs_info->qgroup_lock); 2701 mutex_unlock(&fs_info->qgroup_rescan_lock); 2702 goto err; 2703 } 2704 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2705 } 2706 2707 memset(&fs_info->qgroup_rescan_progress, 0, 2708 sizeof(fs_info->qgroup_rescan_progress)); 2709 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2710 init_completion(&fs_info->qgroup_rescan_completion); 2711 fs_info->qgroup_rescan_running = true; 2712 2713 spin_unlock(&fs_info->qgroup_lock); 2714 mutex_unlock(&fs_info->qgroup_rescan_lock); 2715 2716 memset(&fs_info->qgroup_rescan_work, 0, 2717 sizeof(fs_info->qgroup_rescan_work)); 2718 btrfs_init_work(&fs_info->qgroup_rescan_work, 2719 btrfs_qgroup_rescan_helper, 2720 btrfs_qgroup_rescan_worker, NULL, NULL); 2721 2722 if (ret) { 2723 err: 2724 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2725 return ret; 2726 } 2727 2728 return 0; 2729 } 2730 2731 static void 2732 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2733 { 2734 struct rb_node *n; 2735 struct btrfs_qgroup *qgroup; 2736 2737 spin_lock(&fs_info->qgroup_lock); 2738 /* clear all current qgroup tracking information */ 2739 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2740 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2741 qgroup->rfer = 0; 2742 qgroup->rfer_cmpr = 0; 2743 qgroup->excl = 0; 2744 qgroup->excl_cmpr = 0; 2745 } 2746 spin_unlock(&fs_info->qgroup_lock); 2747 } 2748 2749 int 2750 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2751 { 2752 int ret = 0; 2753 struct btrfs_trans_handle *trans; 2754 2755 ret = qgroup_rescan_init(fs_info, 0, 1); 2756 if (ret) 2757 return ret; 2758 2759 /* 2760 * We have set the rescan_progress to 0, which means no more 2761 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2762 * However, btrfs_qgroup_account_ref may be right after its call 2763 * to btrfs_find_all_roots, in which case it would still do the 2764 * accounting. 2765 * To solve this, we're committing the transaction, which will 2766 * ensure we run all delayed refs and only after that, we are 2767 * going to clear all tracking information for a clean start. 2768 */ 2769 2770 trans = btrfs_join_transaction(fs_info->fs_root); 2771 if (IS_ERR(trans)) { 2772 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2773 return PTR_ERR(trans); 2774 } 2775 ret = btrfs_commit_transaction(trans); 2776 if (ret) { 2777 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2778 return ret; 2779 } 2780 2781 qgroup_rescan_zero_tracking(fs_info); 2782 2783 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2784 &fs_info->qgroup_rescan_work); 2785 2786 return 0; 2787 } 2788 2789 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2790 bool interruptible) 2791 { 2792 int running; 2793 int ret = 0; 2794 2795 mutex_lock(&fs_info->qgroup_rescan_lock); 2796 spin_lock(&fs_info->qgroup_lock); 2797 running = fs_info->qgroup_rescan_running; 2798 spin_unlock(&fs_info->qgroup_lock); 2799 mutex_unlock(&fs_info->qgroup_rescan_lock); 2800 2801 if (!running) 2802 return 0; 2803 2804 if (interruptible) 2805 ret = wait_for_completion_interruptible( 2806 &fs_info->qgroup_rescan_completion); 2807 else 2808 wait_for_completion(&fs_info->qgroup_rescan_completion); 2809 2810 return ret; 2811 } 2812 2813 /* 2814 * this is only called from open_ctree where we're still single threaded, thus 2815 * locking is omitted here. 2816 */ 2817 void 2818 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2819 { 2820 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2821 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2822 &fs_info->qgroup_rescan_work); 2823 } 2824 2825 /* 2826 * Reserve qgroup space for range [start, start + len). 2827 * 2828 * This function will either reserve space from related qgroups or doing 2829 * nothing if the range is already reserved. 2830 * 2831 * Return 0 for successful reserve 2832 * Return <0 for error (including -EQUOT) 2833 * 2834 * NOTE: this function may sleep for memory allocation. 2835 */ 2836 int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) 2837 { 2838 struct btrfs_root *root = BTRFS_I(inode)->root; 2839 struct extent_changeset changeset; 2840 struct ulist_node *unode; 2841 struct ulist_iterator uiter; 2842 int ret; 2843 2844 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2845 !is_fstree(root->objectid) || len == 0) 2846 return 0; 2847 2848 changeset.bytes_changed = 0; 2849 ulist_init(&changeset.range_changed); 2850 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2851 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2852 trace_btrfs_qgroup_reserve_data(inode, start, len, 2853 changeset.bytes_changed, 2854 QGROUP_RESERVE); 2855 if (ret < 0) 2856 goto cleanup; 2857 ret = qgroup_reserve(root, changeset.bytes_changed, true); 2858 if (ret < 0) 2859 goto cleanup; 2860 2861 ulist_release(&changeset.range_changed); 2862 return ret; 2863 2864 cleanup: 2865 /* cleanup already reserved ranges */ 2866 ULIST_ITER_INIT(&uiter); 2867 while ((unode = ulist_next(&changeset.range_changed, &uiter))) 2868 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2869 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2870 GFP_NOFS); 2871 ulist_release(&changeset.range_changed); 2872 return ret; 2873 } 2874 2875 static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, 2876 int free) 2877 { 2878 struct extent_changeset changeset; 2879 int trace_op = QGROUP_RELEASE; 2880 int ret; 2881 2882 changeset.bytes_changed = 0; 2883 ulist_init(&changeset.range_changed); 2884 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2885 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2886 if (ret < 0) 2887 goto out; 2888 2889 if (free) { 2890 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2891 BTRFS_I(inode)->root->objectid, 2892 changeset.bytes_changed); 2893 trace_op = QGROUP_FREE; 2894 } 2895 trace_btrfs_qgroup_release_data(inode, start, len, 2896 changeset.bytes_changed, trace_op); 2897 out: 2898 ulist_release(&changeset.range_changed); 2899 return ret; 2900 } 2901 2902 /* 2903 * Free a reserved space range from io_tree and related qgroups 2904 * 2905 * Should be called when a range of pages get invalidated before reaching disk. 2906 * Or for error cleanup case. 2907 * 2908 * For data written to disk, use btrfs_qgroup_release_data(). 2909 * 2910 * NOTE: This function may sleep for memory allocation. 2911 */ 2912 int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) 2913 { 2914 return __btrfs_qgroup_release_data(inode, start, len, 1); 2915 } 2916 2917 /* 2918 * Release a reserved space range from io_tree only. 2919 * 2920 * Should be called when a range of pages get written to disk and corresponding 2921 * FILE_EXTENT is inserted into corresponding root. 2922 * 2923 * Since new qgroup accounting framework will only update qgroup numbers at 2924 * commit_transaction() time, its reserved space shouldn't be freed from 2925 * related qgroups. 2926 * 2927 * But we should release the range from io_tree, to allow further write to be 2928 * COWed. 2929 * 2930 * NOTE: This function may sleep for memory allocation. 2931 */ 2932 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 2933 { 2934 return __btrfs_qgroup_release_data(inode, start, len, 0); 2935 } 2936 2937 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 2938 bool enforce) 2939 { 2940 struct btrfs_fs_info *fs_info = root->fs_info; 2941 int ret; 2942 2943 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2944 !is_fstree(root->objectid) || num_bytes == 0) 2945 return 0; 2946 2947 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2948 ret = qgroup_reserve(root, num_bytes, enforce); 2949 if (ret < 0) 2950 return ret; 2951 atomic_add(num_bytes, &root->qgroup_meta_rsv); 2952 return ret; 2953 } 2954 2955 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 2956 { 2957 struct btrfs_fs_info *fs_info = root->fs_info; 2958 int reserved; 2959 2960 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2961 !is_fstree(root->objectid)) 2962 return; 2963 2964 reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); 2965 if (reserved == 0) 2966 return; 2967 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 2968 } 2969 2970 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 2971 { 2972 struct btrfs_fs_info *fs_info = root->fs_info; 2973 2974 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2975 !is_fstree(root->objectid)) 2976 return; 2977 2978 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2979 WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); 2980 atomic_sub(num_bytes, &root->qgroup_meta_rsv); 2981 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 2982 } 2983 2984 /* 2985 * Check qgroup reserved space leaking, normally at destroy inode 2986 * time 2987 */ 2988 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 2989 { 2990 struct extent_changeset changeset; 2991 struct ulist_node *unode; 2992 struct ulist_iterator iter; 2993 int ret; 2994 2995 changeset.bytes_changed = 0; 2996 ulist_init(&changeset.range_changed); 2997 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 2998 EXTENT_QGROUP_RESERVED, &changeset); 2999 3000 WARN_ON(ret < 0); 3001 if (WARN_ON(changeset.bytes_changed)) { 3002 ULIST_ITER_INIT(&iter); 3003 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3004 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3005 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 3006 inode->i_ino, unode->val, unode->aux); 3007 } 3008 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 3009 BTRFS_I(inode)->root->objectid, 3010 changeset.bytes_changed); 3011 3012 } 3013 ulist_release(&changeset.range_changed); 3014 } 3015