1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2011 STRATO. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/writeback.h> 9 #include <linux/blkdev.h> 10 #include <linux/rbtree.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 #include <linux/btrfs.h> 14 #include <linux/sched/mm.h> 15 16 #include "ctree.h" 17 #include "transaction.h" 18 #include "disk-io.h" 19 #include "locking.h" 20 #include "ulist.h" 21 #include "backref.h" 22 #include "extent_io.h" 23 #include "qgroup.h" 24 #include "block-group.h" 25 #include "sysfs.h" 26 #include "tree-mod-log.h" 27 #include "fs.h" 28 #include "accessors.h" 29 #include "extent-tree.h" 30 #include "root-tree.h" 31 #include "tree-checker.h" 32 33 enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info) 34 { 35 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 36 return BTRFS_QGROUP_MODE_DISABLED; 37 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) 38 return BTRFS_QGROUP_MODE_SIMPLE; 39 return BTRFS_QGROUP_MODE_FULL; 40 } 41 42 bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info) 43 { 44 return btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_DISABLED; 45 } 46 47 bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info) 48 { 49 return btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL; 50 } 51 52 /* 53 * Helpers to access qgroup reservation 54 * 55 * Callers should ensure the lock context and type are valid 56 */ 57 58 static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup) 59 { 60 u64 ret = 0; 61 int i; 62 63 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 64 ret += qgroup->rsv.values[i]; 65 66 return ret; 67 } 68 69 #ifdef CONFIG_BTRFS_DEBUG 70 static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type) 71 { 72 if (type == BTRFS_QGROUP_RSV_DATA) 73 return "data"; 74 if (type == BTRFS_QGROUP_RSV_META_PERTRANS) 75 return "meta_pertrans"; 76 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 77 return "meta_prealloc"; 78 return NULL; 79 } 80 #endif 81 82 static void qgroup_rsv_add(struct btrfs_fs_info *fs_info, 83 struct btrfs_qgroup *qgroup, u64 num_bytes, 84 enum btrfs_qgroup_rsv_type type) 85 { 86 trace_btrfs_qgroup_update_reserve(fs_info, qgroup, num_bytes, type); 87 qgroup->rsv.values[type] += num_bytes; 88 } 89 90 static void qgroup_rsv_release(struct btrfs_fs_info *fs_info, 91 struct btrfs_qgroup *qgroup, u64 num_bytes, 92 enum btrfs_qgroup_rsv_type type) 93 { 94 trace_btrfs_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type); 95 if (qgroup->rsv.values[type] >= num_bytes) { 96 qgroup->rsv.values[type] -= num_bytes; 97 return; 98 } 99 #ifdef CONFIG_BTRFS_DEBUG 100 WARN_RATELIMIT(1, 101 "qgroup %llu %s reserved space underflow, have %llu to free %llu", 102 qgroup->qgroupid, qgroup_rsv_type_str(type), 103 qgroup->rsv.values[type], num_bytes); 104 #endif 105 qgroup->rsv.values[type] = 0; 106 } 107 108 static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info, 109 struct btrfs_qgroup *dest, 110 const struct btrfs_qgroup *src) 111 { 112 int i; 113 114 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 115 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i); 116 } 117 118 static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info, 119 struct btrfs_qgroup *dest, 120 const struct btrfs_qgroup *src) 121 { 122 int i; 123 124 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 125 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i); 126 } 127 128 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 129 int mod) 130 { 131 if (qg->old_refcnt < seq) 132 qg->old_refcnt = seq; 133 qg->old_refcnt += mod; 134 } 135 136 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 137 int mod) 138 { 139 if (qg->new_refcnt < seq) 140 qg->new_refcnt = seq; 141 qg->new_refcnt += mod; 142 } 143 144 static inline u64 btrfs_qgroup_get_old_refcnt(const struct btrfs_qgroup *qg, u64 seq) 145 { 146 if (qg->old_refcnt < seq) 147 return 0; 148 return qg->old_refcnt - seq; 149 } 150 151 static inline u64 btrfs_qgroup_get_new_refcnt(const struct btrfs_qgroup *qg, u64 seq) 152 { 153 if (qg->new_refcnt < seq) 154 return 0; 155 return qg->new_refcnt - seq; 156 } 157 158 static int 159 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 160 int init_flags); 161 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 162 163 static int btrfs_qgroup_qgroupid_key_cmp(const void *key, const struct rb_node *node) 164 { 165 const u64 *qgroupid = key; 166 const struct btrfs_qgroup *qgroup = rb_entry(node, struct btrfs_qgroup, node); 167 168 if (qgroup->qgroupid < *qgroupid) 169 return -1; 170 else if (qgroup->qgroupid > *qgroupid) 171 return 1; 172 173 return 0; 174 } 175 176 /* must be called with qgroup_ioctl_lock held */ 177 static struct btrfs_qgroup *find_qgroup_rb(const struct btrfs_fs_info *fs_info, 178 u64 qgroupid) 179 { 180 struct rb_node *node; 181 182 node = rb_find(&qgroupid, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_key_cmp); 183 return rb_entry_safe(node, struct btrfs_qgroup, node); 184 } 185 186 static int btrfs_qgroup_qgroupid_cmp(struct rb_node *new, const struct rb_node *existing) 187 { 188 const struct btrfs_qgroup *new_qgroup = rb_entry(new, struct btrfs_qgroup, node); 189 190 return btrfs_qgroup_qgroupid_key_cmp(&new_qgroup->qgroupid, existing); 191 } 192 193 /* 194 * Add qgroup to the filesystem's qgroup tree. 195 * 196 * Must be called with qgroup_lock held and @prealloc preallocated. 197 * 198 * The control on the lifespan of @prealloc would be transferred to this 199 * function, thus caller should no longer touch @prealloc. 200 */ 201 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 202 struct btrfs_qgroup *prealloc, 203 u64 qgroupid) 204 { 205 struct rb_node *node; 206 207 /* Caller must have pre-allocated @prealloc. */ 208 ASSERT(prealloc); 209 210 prealloc->qgroupid = qgroupid; 211 node = rb_find_add(&prealloc->node, &fs_info->qgroup_tree, btrfs_qgroup_qgroupid_cmp); 212 if (node) { 213 kfree(prealloc); 214 return rb_entry(node, struct btrfs_qgroup, node); 215 } 216 217 INIT_LIST_HEAD(&prealloc->groups); 218 INIT_LIST_HEAD(&prealloc->members); 219 INIT_LIST_HEAD(&prealloc->dirty); 220 INIT_LIST_HEAD(&prealloc->iterator); 221 INIT_LIST_HEAD(&prealloc->nested_iterator); 222 223 return prealloc; 224 } 225 226 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 227 { 228 struct btrfs_qgroup_list *list; 229 230 list_del(&qgroup->dirty); 231 while (!list_empty(&qgroup->groups)) { 232 list = list_first_entry(&qgroup->groups, 233 struct btrfs_qgroup_list, next_group); 234 list_del(&list->next_group); 235 list_del(&list->next_member); 236 kfree(list); 237 } 238 239 while (!list_empty(&qgroup->members)) { 240 list = list_first_entry(&qgroup->members, 241 struct btrfs_qgroup_list, next_member); 242 list_del(&list->next_group); 243 list_del(&list->next_member); 244 kfree(list); 245 } 246 } 247 248 /* must be called with qgroup_lock held */ 249 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 250 { 251 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 252 253 if (!qgroup) 254 return -ENOENT; 255 256 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 257 __del_qgroup_rb(qgroup); 258 return 0; 259 } 260 261 /* 262 * Add relation specified by two qgroups. 263 * 264 * Must be called with qgroup_lock held, the ownership of @prealloc is 265 * transferred to this function and caller should not touch it anymore. 266 * 267 * Return: 0 on success 268 * -ENOENT if one of the qgroups is NULL 269 * <0 other errors 270 */ 271 static int __add_relation_rb(struct btrfs_qgroup_list *prealloc, 272 struct btrfs_qgroup *member, 273 struct btrfs_qgroup *parent) 274 { 275 if (!member || !parent) { 276 kfree(prealloc); 277 return -ENOENT; 278 } 279 280 prealloc->group = parent; 281 prealloc->member = member; 282 list_add_tail(&prealloc->next_group, &member->groups); 283 list_add_tail(&prealloc->next_member, &parent->members); 284 285 return 0; 286 } 287 288 /* 289 * Add relation specified by two qgroup ids. 290 * 291 * Must be called with qgroup_lock held. 292 * 293 * Return: 0 on success 294 * -ENOENT if one of the ids does not exist 295 * <0 other errors 296 */ 297 static int add_relation_rb(struct btrfs_fs_info *fs_info, 298 struct btrfs_qgroup_list *prealloc, 299 u64 memberid, u64 parentid) 300 { 301 struct btrfs_qgroup *member; 302 struct btrfs_qgroup *parent; 303 304 member = find_qgroup_rb(fs_info, memberid); 305 parent = find_qgroup_rb(fs_info, parentid); 306 307 return __add_relation_rb(prealloc, member, parent); 308 } 309 310 /* Must be called with qgroup_lock held */ 311 static int del_relation_rb(struct btrfs_fs_info *fs_info, 312 u64 memberid, u64 parentid) 313 { 314 struct btrfs_qgroup *member; 315 struct btrfs_qgroup *parent; 316 struct btrfs_qgroup_list *list; 317 318 member = find_qgroup_rb(fs_info, memberid); 319 parent = find_qgroup_rb(fs_info, parentid); 320 if (!member || !parent) 321 return -ENOENT; 322 323 list_for_each_entry(list, &member->groups, next_group) { 324 if (list->group == parent) { 325 list_del(&list->next_group); 326 list_del(&list->next_member); 327 kfree(list); 328 return 0; 329 } 330 } 331 return -ENOENT; 332 } 333 334 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 335 int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid, 336 u64 rfer, u64 excl) 337 { 338 struct btrfs_qgroup *qgroup; 339 340 qgroup = find_qgroup_rb(fs_info, qgroupid); 341 if (!qgroup) 342 return -EINVAL; 343 if (qgroup->rfer != rfer || qgroup->excl != excl) 344 return -EINVAL; 345 return 0; 346 } 347 #endif 348 349 static bool squota_check_parent_usage(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *parent) 350 { 351 u64 excl_sum = 0; 352 u64 rfer_sum = 0; 353 u64 excl_cmpr_sum = 0; 354 u64 rfer_cmpr_sum = 0; 355 struct btrfs_qgroup_list *glist; 356 int nr_members = 0; 357 bool mismatch; 358 359 if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE) 360 return false; 361 if (btrfs_qgroup_level(parent->qgroupid) == 0) 362 return false; 363 364 /* Eligible parent qgroup. Squota; level > 0; empty members list. */ 365 list_for_each_entry(glist, &parent->members, next_member) { 366 excl_sum += glist->member->excl; 367 rfer_sum += glist->member->rfer; 368 excl_cmpr_sum += glist->member->excl_cmpr; 369 rfer_cmpr_sum += glist->member->rfer_cmpr; 370 nr_members++; 371 } 372 mismatch = (parent->excl != excl_sum || parent->rfer != rfer_sum || 373 parent->excl_cmpr != excl_cmpr_sum || parent->rfer_cmpr != rfer_cmpr_sum); 374 375 WARN(mismatch, 376 "parent squota qgroup " BTRFS_QGROUP_FMT " has mismatched usage from its %d members. " 377 "%llu %llu %llu %llu vs %llu %llu %llu %llu\n", 378 BTRFS_QGROUP_FMT_VALUE(parent), nr_members, parent->excl, 379 parent->rfer, parent->excl_cmpr, parent->rfer_cmpr, excl_sum, 380 rfer_sum, excl_cmpr_sum, rfer_cmpr_sum); 381 return mismatch; 382 } 383 384 __printf(2, 3) 385 static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info, const char *fmt, ...) 386 { 387 const u64 old_flags = fs_info->qgroup_flags; 388 389 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 390 return; 391 fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT | 392 BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN | 393 BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING); 394 if (!(old_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) { 395 struct va_format vaf; 396 va_list args; 397 398 va_start(args, fmt); 399 vaf.fmt = fmt; 400 vaf.va = &args; 401 402 btrfs_warn_rl(fs_info, "qgroup marked inconsistent, %pV", &vaf); 403 va_end(args); 404 } 405 } 406 407 static void qgroup_read_enable_gen(struct btrfs_fs_info *fs_info, 408 struct extent_buffer *leaf, int slot, 409 struct btrfs_qgroup_status_item *ptr) 410 { 411 ASSERT(btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)); 412 ASSERT(btrfs_item_size(leaf, slot) >= sizeof(*ptr)); 413 fs_info->qgroup_enable_gen = btrfs_qgroup_status_enable_gen(leaf, ptr); 414 } 415 416 /* 417 * The full config is read in one go, only called from open_ctree() 418 * It doesn't use any locking, as at this point we're still single-threaded 419 */ 420 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 421 { 422 struct btrfs_key key; 423 struct btrfs_key found_key; 424 struct btrfs_root *quota_root = fs_info->quota_root; 425 struct btrfs_path *path = NULL; 426 struct extent_buffer *l; 427 int slot; 428 int ret = 0; 429 u64 flags = 0; 430 u64 rescan_progress = 0; 431 432 if (!fs_info->quota_root) 433 return 0; 434 435 path = btrfs_alloc_path(); 436 if (!path) { 437 ret = -ENOMEM; 438 goto out; 439 } 440 441 ret = btrfs_sysfs_add_qgroups(fs_info); 442 if (ret < 0) 443 goto out; 444 /* default this to quota off, in case no status key is found */ 445 fs_info->qgroup_flags = 0; 446 447 /* 448 * pass 1: read status, all qgroup infos and limits 449 */ 450 key.objectid = 0; 451 key.type = 0; 452 key.offset = 0; 453 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 454 if (ret) 455 goto out; 456 457 while (1) { 458 struct btrfs_qgroup *qgroup; 459 460 slot = path->slots[0]; 461 l = path->nodes[0]; 462 btrfs_item_key_to_cpu(l, &found_key, slot); 463 464 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 465 struct btrfs_qgroup_status_item *ptr; 466 467 ptr = btrfs_item_ptr(l, slot, 468 struct btrfs_qgroup_status_item); 469 470 if (btrfs_qgroup_status_version(l, ptr) != 471 BTRFS_QGROUP_STATUS_VERSION) { 472 btrfs_err(fs_info, 473 "old qgroup version, quota disabled"); 474 goto out; 475 } 476 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); 477 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE) 478 qgroup_read_enable_gen(fs_info, l, slot, ptr); 479 else if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) 480 qgroup_mark_inconsistent(fs_info, "qgroup generation mismatch"); 481 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 482 goto next1; 483 } 484 485 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 486 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 487 goto next1; 488 489 qgroup = find_qgroup_rb(fs_info, found_key.offset); 490 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 491 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) 492 qgroup_mark_inconsistent(fs_info, "inconsistent qgroup config"); 493 if (!qgroup) { 494 struct btrfs_qgroup *prealloc; 495 struct btrfs_root *tree_root = fs_info->tree_root; 496 497 prealloc = kzalloc_obj(*prealloc); 498 if (!prealloc) { 499 ret = -ENOMEM; 500 goto out; 501 } 502 qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset); 503 /* 504 * If a qgroup exists for a subvolume ID, it is possible 505 * that subvolume has been deleted, in which case 506 * reusing that ID would lead to incorrect accounting. 507 * 508 * Ensure that we skip any such subvol ids. 509 * 510 * We don't need to lock because this is only called 511 * during mount before we start doing things like creating 512 * subvolumes. 513 */ 514 if (btrfs_is_fstree(qgroup->qgroupid) && 515 qgroup->qgroupid > tree_root->free_objectid) 516 /* 517 * Don't need to check against BTRFS_LAST_FREE_OBJECTID, 518 * as it will get checked on the next call to 519 * btrfs_get_free_objectid. 520 */ 521 tree_root->free_objectid = qgroup->qgroupid + 1; 522 } 523 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 524 if (ret < 0) 525 goto out; 526 527 switch (found_key.type) { 528 case BTRFS_QGROUP_INFO_KEY: { 529 struct btrfs_qgroup_info_item *ptr; 530 531 ptr = btrfs_item_ptr(l, slot, 532 struct btrfs_qgroup_info_item); 533 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 534 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 535 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 536 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 537 /* generation currently unused */ 538 break; 539 } 540 case BTRFS_QGROUP_LIMIT_KEY: { 541 struct btrfs_qgroup_limit_item *ptr; 542 543 ptr = btrfs_item_ptr(l, slot, 544 struct btrfs_qgroup_limit_item); 545 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 546 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 547 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 548 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 549 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 550 break; 551 } 552 } 553 next1: 554 ret = btrfs_next_item(quota_root, path); 555 if (ret < 0) 556 goto out; 557 if (ret) 558 break; 559 } 560 btrfs_release_path(path); 561 562 /* 563 * pass 2: read all qgroup relations 564 */ 565 key.objectid = 0; 566 key.type = BTRFS_QGROUP_RELATION_KEY; 567 key.offset = 0; 568 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 569 if (ret) 570 goto out; 571 while (1) { 572 struct btrfs_qgroup_list *list = NULL; 573 574 slot = path->slots[0]; 575 l = path->nodes[0]; 576 btrfs_item_key_to_cpu(l, &found_key, slot); 577 578 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 579 goto next2; 580 581 if (found_key.objectid > found_key.offset) { 582 /* parent <- member, not needed to build config */ 583 /* FIXME should we omit the key completely? */ 584 goto next2; 585 } 586 587 list = kzalloc_obj(*list); 588 if (!list) { 589 ret = -ENOMEM; 590 goto out; 591 } 592 ret = add_relation_rb(fs_info, list, found_key.objectid, 593 found_key.offset); 594 list = NULL; 595 if (ret == -ENOENT) { 596 btrfs_warn(fs_info, 597 "orphan qgroup relation 0x%llx->0x%llx", 598 found_key.objectid, found_key.offset); 599 ret = 0; /* ignore the error */ 600 } 601 if (ret) 602 goto out; 603 next2: 604 ret = btrfs_next_item(quota_root, path); 605 if (ret < 0) 606 goto out; 607 if (ret) 608 break; 609 } 610 out: 611 btrfs_free_path(path); 612 fs_info->qgroup_flags |= flags; 613 if (ret >= 0) { 614 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON) 615 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 616 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 617 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 618 } else { 619 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 620 btrfs_sysfs_del_qgroups(fs_info); 621 } 622 623 return ret < 0 ? ret : 0; 624 } 625 626 /* 627 * Called in close_ctree() when quota is still enabled. This verifies we don't 628 * leak some reserved space. 629 * 630 * Return false if no reserved space is left. 631 * Return true if some reserved space is leaked. 632 */ 633 bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info) 634 { 635 struct rb_node *node; 636 bool ret = false; 637 638 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) 639 return ret; 640 /* 641 * Since we're unmounting, there is no race and no need to grab qgroup 642 * lock. And here we don't go post-order to provide a more user 643 * friendly sorted result. 644 */ 645 for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { 646 struct btrfs_qgroup *qgroup; 647 int i; 648 649 qgroup = rb_entry(node, struct btrfs_qgroup, node); 650 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { 651 if (qgroup->rsv.values[i]) { 652 ret = true; 653 btrfs_warn(fs_info, 654 "qgroup " BTRFS_QGROUP_FMT " has unreleased space, type %d rsv %llu", 655 BTRFS_QGROUP_FMT_VALUE(qgroup), 656 i, qgroup->rsv.values[i]); 657 } 658 } 659 } 660 return ret; 661 } 662 663 /* 664 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 665 * first two are in single-threaded paths. 666 */ 667 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 668 { 669 struct rb_node *n; 670 struct btrfs_qgroup *qgroup; 671 672 /* 673 * btrfs_quota_disable() can be called concurrently with 674 * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the 675 * lock. 676 */ 677 spin_lock(&fs_info->qgroup_lock); 678 while ((n = rb_first(&fs_info->qgroup_tree))) { 679 qgroup = rb_entry(n, struct btrfs_qgroup, node); 680 rb_erase(n, &fs_info->qgroup_tree); 681 __del_qgroup_rb(qgroup); 682 spin_unlock(&fs_info->qgroup_lock); 683 btrfs_sysfs_del_one_qgroup(fs_info, qgroup); 684 kfree(qgroup); 685 spin_lock(&fs_info->qgroup_lock); 686 } 687 spin_unlock(&fs_info->qgroup_lock); 688 689 btrfs_sysfs_del_qgroups(fs_info); 690 } 691 692 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 693 u64 dst) 694 { 695 struct btrfs_root *quota_root = trans->fs_info->quota_root; 696 BTRFS_PATH_AUTO_FREE(path); 697 struct btrfs_key key; 698 699 path = btrfs_alloc_path(); 700 if (!path) 701 return -ENOMEM; 702 703 key.objectid = src; 704 key.type = BTRFS_QGROUP_RELATION_KEY; 705 key.offset = dst; 706 707 return btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 708 } 709 710 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 711 u64 dst) 712 { 713 int ret; 714 struct btrfs_root *quota_root = trans->fs_info->quota_root; 715 BTRFS_PATH_AUTO_FREE(path); 716 struct btrfs_key key; 717 718 path = btrfs_alloc_path(); 719 if (!path) 720 return -ENOMEM; 721 722 key.objectid = src; 723 key.type = BTRFS_QGROUP_RELATION_KEY; 724 key.offset = dst; 725 726 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 727 if (ret < 0) 728 return ret; 729 730 if (ret > 0) 731 return -ENOENT; 732 733 return btrfs_del_item(trans, quota_root, path); 734 } 735 736 static int add_qgroup_item(struct btrfs_trans_handle *trans, 737 struct btrfs_root *quota_root, u64 qgroupid) 738 { 739 int ret; 740 BTRFS_PATH_AUTO_FREE(path); 741 struct btrfs_qgroup_info_item *qgroup_info; 742 struct btrfs_qgroup_limit_item *qgroup_limit; 743 struct extent_buffer *leaf; 744 struct btrfs_key key; 745 746 if (btrfs_is_testing(quota_root->fs_info)) 747 return 0; 748 749 path = btrfs_alloc_path(); 750 if (!path) 751 return -ENOMEM; 752 753 key.objectid = 0; 754 key.type = BTRFS_QGROUP_INFO_KEY; 755 key.offset = qgroupid; 756 757 /* 758 * Avoid a transaction abort by catching -EEXIST here. In that 759 * case, we proceed by re-initializing the existing structure 760 * on disk. 761 */ 762 763 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 764 sizeof(*qgroup_info)); 765 if (ret && ret != -EEXIST) 766 return ret; 767 768 leaf = path->nodes[0]; 769 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 770 struct btrfs_qgroup_info_item); 771 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 772 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 773 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 774 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 775 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 776 777 btrfs_release_path(path); 778 779 key.type = BTRFS_QGROUP_LIMIT_KEY; 780 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 781 sizeof(*qgroup_limit)); 782 if (ret && ret != -EEXIST) 783 return ret; 784 785 leaf = path->nodes[0]; 786 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 787 struct btrfs_qgroup_limit_item); 788 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 789 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 790 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 791 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 792 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 793 794 return 0; 795 } 796 797 static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) 798 { 799 int ret; 800 struct btrfs_root *quota_root = trans->fs_info->quota_root; 801 BTRFS_PATH_AUTO_FREE(path); 802 struct btrfs_key key; 803 804 path = btrfs_alloc_path(); 805 if (!path) 806 return -ENOMEM; 807 808 key.objectid = 0; 809 key.type = BTRFS_QGROUP_INFO_KEY; 810 key.offset = qgroupid; 811 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 812 if (ret < 0) 813 return ret; 814 815 if (ret > 0) 816 return -ENOENT; 817 818 ret = btrfs_del_item(trans, quota_root, path); 819 if (ret) 820 return ret; 821 822 btrfs_release_path(path); 823 824 key.type = BTRFS_QGROUP_LIMIT_KEY; 825 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 826 if (ret < 0) 827 return ret; 828 829 if (ret > 0) 830 return -ENOENT; 831 832 return btrfs_del_item(trans, quota_root, path); 833 } 834 835 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 836 struct btrfs_qgroup *qgroup) 837 { 838 struct btrfs_root *quota_root = trans->fs_info->quota_root; 839 BTRFS_PATH_AUTO_FREE(path); 840 struct btrfs_key key; 841 struct extent_buffer *l; 842 struct btrfs_qgroup_limit_item *qgroup_limit; 843 int ret; 844 int slot; 845 846 key.objectid = 0; 847 key.type = BTRFS_QGROUP_LIMIT_KEY; 848 key.offset = qgroup->qgroupid; 849 850 path = btrfs_alloc_path(); 851 if (!path) 852 return -ENOMEM; 853 854 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 855 if (ret > 0) 856 ret = -ENOENT; 857 858 if (ret) 859 return ret; 860 861 l = path->nodes[0]; 862 slot = path->slots[0]; 863 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 864 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 865 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 866 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 867 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 868 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 869 870 return ret; 871 } 872 873 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 874 struct btrfs_qgroup *qgroup) 875 { 876 struct btrfs_fs_info *fs_info = trans->fs_info; 877 struct btrfs_root *quota_root = fs_info->quota_root; 878 BTRFS_PATH_AUTO_FREE(path); 879 struct btrfs_key key; 880 struct extent_buffer *l; 881 struct btrfs_qgroup_info_item *qgroup_info; 882 int ret; 883 int slot; 884 885 if (btrfs_is_testing(fs_info)) 886 return 0; 887 888 key.objectid = 0; 889 key.type = BTRFS_QGROUP_INFO_KEY; 890 key.offset = qgroup->qgroupid; 891 892 path = btrfs_alloc_path(); 893 if (!path) 894 return -ENOMEM; 895 896 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 897 if (ret > 0) 898 ret = -ENOENT; 899 900 if (ret) 901 return ret; 902 903 l = path->nodes[0]; 904 slot = path->slots[0]; 905 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 906 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 907 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 908 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 909 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 910 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 911 912 return ret; 913 } 914 915 static int update_qgroup_status_item(struct btrfs_trans_handle *trans) 916 { 917 struct btrfs_fs_info *fs_info = trans->fs_info; 918 struct btrfs_root *quota_root = fs_info->quota_root; 919 BTRFS_PATH_AUTO_FREE(path); 920 struct btrfs_key key; 921 struct extent_buffer *l; 922 struct btrfs_qgroup_status_item *ptr; 923 int ret; 924 int slot; 925 926 key.objectid = 0; 927 key.type = BTRFS_QGROUP_STATUS_KEY; 928 key.offset = 0; 929 930 path = btrfs_alloc_path(); 931 if (!path) 932 return -ENOMEM; 933 934 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 935 if (ret > 0) 936 ret = -ENOENT; 937 938 if (ret) 939 return ret; 940 941 l = path->nodes[0]; 942 slot = path->slots[0]; 943 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 944 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags & 945 BTRFS_QGROUP_STATUS_FLAGS_MASK); 946 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 947 btrfs_set_qgroup_status_rescan(l, ptr, 948 fs_info->qgroup_rescan_progress.objectid); 949 950 return ret; 951 } 952 953 /* 954 * called with qgroup_lock held 955 */ 956 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 957 struct btrfs_root *root) 958 { 959 BTRFS_PATH_AUTO_FREE(path); 960 struct btrfs_key key; 961 struct extent_buffer *leaf = NULL; 962 int ret; 963 int nr = 0; 964 965 path = btrfs_alloc_path(); 966 if (!path) 967 return -ENOMEM; 968 969 key.objectid = 0; 970 key.type = 0; 971 key.offset = 0; 972 973 while (1) { 974 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 975 if (ret < 0) 976 return ret; 977 leaf = path->nodes[0]; 978 nr = btrfs_header_nritems(leaf); 979 if (!nr) 980 break; 981 /* 982 * delete the leaf one by one 983 * since the whole tree is going 984 * to be deleted. 985 */ 986 path->slots[0] = 0; 987 ret = btrfs_del_items(trans, root, path, 0, nr); 988 if (ret) 989 return ret; 990 991 btrfs_release_path(path); 992 } 993 994 return 0; 995 } 996 997 int btrfs_quota_enable(struct btrfs_fs_info *fs_info, 998 struct btrfs_ioctl_quota_ctl_args *quota_ctl_args) 999 { 1000 struct btrfs_root *quota_root; 1001 struct btrfs_root *tree_root = fs_info->tree_root; 1002 struct btrfs_path *path = NULL; 1003 struct btrfs_qgroup_status_item *ptr; 1004 struct extent_buffer *leaf; 1005 struct btrfs_key key; 1006 struct btrfs_key found_key; 1007 struct btrfs_qgroup *qgroup = NULL; 1008 struct btrfs_qgroup *prealloc = NULL; 1009 struct btrfs_trans_handle *trans = NULL; 1010 const bool simple = (quota_ctl_args->cmd == BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA); 1011 int ret = 0; 1012 int slot; 1013 1014 /* 1015 * We need to have subvol_sem write locked, to prevent races between 1016 * concurrent tasks trying to enable quotas, because we will unlock 1017 * and relock qgroup_ioctl_lock before setting fs_info->quota_root 1018 * and before setting BTRFS_FS_QUOTA_ENABLED. 1019 */ 1020 lockdep_assert_held_write(&fs_info->subvol_sem); 1021 1022 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { 1023 btrfs_err(fs_info, 1024 "qgroups are currently unsupported in extent tree v2"); 1025 return -EINVAL; 1026 } 1027 1028 mutex_lock(&fs_info->qgroup_ioctl_lock); 1029 if (fs_info->quota_root) 1030 goto out; 1031 1032 ret = btrfs_sysfs_add_qgroups(fs_info); 1033 if (ret < 0) 1034 goto out; 1035 1036 /* 1037 * Unlock qgroup_ioctl_lock before starting the transaction. This is to 1038 * avoid lock acquisition inversion problems (reported by lockdep) between 1039 * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we 1040 * start a transaction. 1041 * After we started the transaction lock qgroup_ioctl_lock again and 1042 * check if someone else created the quota root in the meanwhile. If so, 1043 * just return success and release the transaction handle. 1044 * 1045 * Also we don't need to worry about someone else calling 1046 * btrfs_sysfs_add_qgroups() after we unlock and getting an error because 1047 * that function returns 0 (success) when the sysfs entries already exist. 1048 */ 1049 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1050 1051 /* 1052 * 1 for quota root item 1053 * 1 for BTRFS_QGROUP_STATUS item 1054 * 1055 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items 1056 * per subvolume. However those are not currently reserved since it 1057 * would be a lot of overkill. 1058 */ 1059 trans = btrfs_start_transaction(tree_root, 2); 1060 1061 mutex_lock(&fs_info->qgroup_ioctl_lock); 1062 if (IS_ERR(trans)) { 1063 ret = PTR_ERR(trans); 1064 trans = NULL; 1065 goto out; 1066 } 1067 1068 if (fs_info->quota_root) 1069 goto out; 1070 1071 /* 1072 * initially create the quota tree 1073 */ 1074 quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); 1075 if (IS_ERR(quota_root)) { 1076 ret = PTR_ERR(quota_root); 1077 btrfs_abort_transaction(trans, ret); 1078 goto out; 1079 } 1080 1081 path = btrfs_alloc_path(); 1082 if (unlikely(!path)) { 1083 ret = -ENOMEM; 1084 btrfs_abort_transaction(trans, ret); 1085 goto out_free_root; 1086 } 1087 1088 key.objectid = 0; 1089 key.type = BTRFS_QGROUP_STATUS_KEY; 1090 key.offset = 0; 1091 1092 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 1093 sizeof(*ptr)); 1094 if (unlikely(ret)) { 1095 btrfs_abort_transaction(trans, ret); 1096 goto out_free_path; 1097 } 1098 1099 leaf = path->nodes[0]; 1100 ptr = btrfs_item_ptr(leaf, path->slots[0], 1101 struct btrfs_qgroup_status_item); 1102 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 1103 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 1104 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON; 1105 if (simple) { 1106 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; 1107 btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA); 1108 /* 1109 * Set the enable generation to the next transaction, as we cannot 1110 * ensure that extents written during this transaction will see any 1111 * state we have set here. So we should treat all extents of the 1112 * transaction as coming in before squotas was enabled. 1113 */ 1114 btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid + 1); 1115 } else { 1116 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1117 } 1118 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags & 1119 BTRFS_QGROUP_STATUS_FLAGS_MASK); 1120 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 1121 1122 key.objectid = 0; 1123 key.type = BTRFS_ROOT_REF_KEY; 1124 key.offset = 0; 1125 1126 btrfs_release_path(path); 1127 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 1128 if (ret > 0) 1129 goto out_add_root; 1130 if (unlikely(ret < 0)) { 1131 btrfs_abort_transaction(trans, ret); 1132 goto out_free_path; 1133 } 1134 1135 while (1) { 1136 slot = path->slots[0]; 1137 leaf = path->nodes[0]; 1138 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1139 1140 if (found_key.type == BTRFS_ROOT_REF_KEY) { 1141 1142 /* Release locks on tree_root before we access quota_root */ 1143 btrfs_release_path(path); 1144 1145 /* We should not have a stray @prealloc pointer. */ 1146 ASSERT(prealloc == NULL); 1147 prealloc = kzalloc_obj(*prealloc, GFP_NOFS); 1148 if (unlikely(!prealloc)) { 1149 ret = -ENOMEM; 1150 btrfs_abort_transaction(trans, ret); 1151 goto out_free_path; 1152 } 1153 1154 ret = add_qgroup_item(trans, quota_root, 1155 found_key.offset); 1156 if (unlikely(ret)) { 1157 btrfs_abort_transaction(trans, ret); 1158 goto out_free_path; 1159 } 1160 1161 qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset); 1162 prealloc = NULL; 1163 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1164 if (unlikely(ret < 0)) { 1165 btrfs_abort_transaction(trans, ret); 1166 goto out_free_path; 1167 } 1168 ret = btrfs_search_slot_for_read(tree_root, &found_key, 1169 path, 1, 0); 1170 if (unlikely(ret < 0)) { 1171 btrfs_abort_transaction(trans, ret); 1172 goto out_free_path; 1173 } 1174 if (ret > 0) { 1175 /* 1176 * Shouldn't happen because the key should still 1177 * be there (return 0), but in case it does it 1178 * means we have reached the end of the tree - 1179 * there are no more leaves with items that have 1180 * a key greater than or equals to @found_key, 1181 * so just stop the search loop. 1182 */ 1183 break; 1184 } 1185 } 1186 ret = btrfs_next_item(tree_root, path); 1187 if (unlikely(ret < 0)) { 1188 btrfs_abort_transaction(trans, ret); 1189 goto out_free_path; 1190 } 1191 if (ret) 1192 break; 1193 } 1194 1195 out_add_root: 1196 btrfs_release_path(path); 1197 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1198 if (unlikely(ret)) { 1199 btrfs_abort_transaction(trans, ret); 1200 goto out_free_path; 1201 } 1202 1203 ASSERT(prealloc == NULL); 1204 prealloc = kzalloc_obj(*prealloc, GFP_NOFS); 1205 if (!prealloc) { 1206 ret = -ENOMEM; 1207 goto out_free_path; 1208 } 1209 qgroup = add_qgroup_rb(fs_info, prealloc, BTRFS_FS_TREE_OBJECTID); 1210 prealloc = NULL; 1211 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1212 if (unlikely(ret < 0)) { 1213 btrfs_abort_transaction(trans, ret); 1214 goto out_free_path; 1215 } 1216 1217 /* 1218 * Set fs_info->qgroup_enable_gen and BTRFS_FS_SQUOTA_ENABLING 1219 * under the transaction handle. We want to ensure that all extents in 1220 * the next transaction definitely see them. 1221 */ 1222 if (simple) { 1223 fs_info->qgroup_enable_gen = trans->transid + 1; 1224 set_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); 1225 } 1226 1227 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1228 /* 1229 * Commit the transaction while not holding qgroup_ioctl_lock, to avoid 1230 * a deadlock with tasks concurrently doing other qgroup operations, such 1231 * adding/removing qgroups or adding/deleting qgroup relations for example, 1232 * because all qgroup operations first start or join a transaction and then 1233 * lock the qgroup_ioctl_lock mutex. 1234 * We are safe from a concurrent task trying to enable quotas, by calling 1235 * this function, since we are serialized by fs_info->subvol_sem. 1236 */ 1237 ret = btrfs_commit_transaction(trans); 1238 trans = NULL; 1239 1240 mutex_lock(&fs_info->qgroup_ioctl_lock); 1241 if (ret) { 1242 if (simple) { 1243 clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); 1244 fs_info->qgroup_enable_gen = 0; 1245 } 1246 goto out_free_path; 1247 } 1248 1249 /* 1250 * Set quota enabled flag after committing the transaction, to avoid 1251 * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot 1252 * creation. 1253 */ 1254 spin_lock(&fs_info->qgroup_lock); 1255 fs_info->quota_root = quota_root; 1256 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1257 if (simple) 1258 clear_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags); 1259 spin_unlock(&fs_info->qgroup_lock); 1260 1261 /* Skip rescan for simple qgroups. */ 1262 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 1263 goto out_free_path; 1264 1265 ret = qgroup_rescan_init(fs_info, 0, 1); 1266 if (!ret) { 1267 qgroup_rescan_zero_tracking(fs_info); 1268 fs_info->qgroup_rescan_running = true; 1269 btrfs_queue_work(fs_info->qgroup_rescan_workers, 1270 &fs_info->qgroup_rescan_work); 1271 } else { 1272 /* 1273 * We have set both BTRFS_FS_QUOTA_ENABLED and 1274 * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with 1275 * -EINPROGRESS. That can happen because someone started the 1276 * rescan worker by calling quota rescan ioctl before we 1277 * attempted to initialize the rescan worker. Failure due to 1278 * quotas disabled in the meanwhile is not possible, because 1279 * we are holding a write lock on fs_info->subvol_sem, which 1280 * is also acquired when disabling quotas. 1281 * Ignore such error, and any other error would need to undo 1282 * everything we did in the transaction we just committed. 1283 */ 1284 ASSERT(ret == -EINPROGRESS); 1285 ret = 0; 1286 } 1287 1288 out_free_path: 1289 btrfs_free_path(path); 1290 out_free_root: 1291 if (ret) 1292 btrfs_put_root(quota_root); 1293 out: 1294 if (ret) 1295 btrfs_sysfs_del_qgroups(fs_info); 1296 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1297 if (ret && trans) 1298 btrfs_end_transaction(trans); 1299 else if (trans) 1300 ret = btrfs_end_transaction(trans); 1301 kfree(prealloc); 1302 return ret; 1303 } 1304 1305 /* 1306 * It is possible to have outstanding ordered extents which reserved bytes 1307 * before we disabled. We need to fully flush delalloc, ordered extents, and a 1308 * commit to ensure that we don't leak such reservations, only to have them 1309 * come back if we re-enable. 1310 * 1311 * - enable simple quotas 1312 * - reserve space 1313 * - release it, store rsv_bytes in OE 1314 * - disable quotas 1315 * - enable simple quotas (qgroup rsv are all 0) 1316 * - OE finishes 1317 * - run delayed refs 1318 * - free rsv_bytes, resulting in miscounting or even underflow 1319 */ 1320 static int flush_reservations(struct btrfs_fs_info *fs_info) 1321 { 1322 int ret; 1323 1324 ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); 1325 if (ret) 1326 return ret; 1327 btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); 1328 1329 return btrfs_commit_current_transaction(fs_info->tree_root); 1330 } 1331 1332 int btrfs_quota_disable(struct btrfs_fs_info *fs_info) 1333 { 1334 struct btrfs_root *quota_root = NULL; 1335 struct btrfs_trans_handle *trans = NULL; 1336 int ret = 0; 1337 1338 /* 1339 * We need to have subvol_sem write locked to prevent races with 1340 * snapshot creation. 1341 */ 1342 lockdep_assert_held_write(&fs_info->subvol_sem); 1343 1344 /* 1345 * Relocation will mess with backrefs, so make sure we have the 1346 * cleaner_mutex held to protect us from relocate. 1347 */ 1348 lockdep_assert_held(&fs_info->cleaner_mutex); 1349 1350 mutex_lock(&fs_info->qgroup_ioctl_lock); 1351 if (!fs_info->quota_root) 1352 goto out; 1353 1354 /* 1355 * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to 1356 * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs 1357 * to lock that mutex while holding a transaction handle and the rescan 1358 * worker needs to commit a transaction. 1359 */ 1360 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1361 1362 /* 1363 * Request qgroup rescan worker to complete and wait for it. This wait 1364 * must be done before transaction start for quota disable since it may 1365 * deadlock with transaction by the qgroup rescan worker. 1366 */ 1367 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1368 btrfs_qgroup_wait_for_completion(fs_info, false); 1369 1370 /* 1371 * We have nothing held here and no trans handle, just return the error 1372 * if there is one and set back the quota enabled bit since we didn't 1373 * actually disable quotas. 1374 */ 1375 ret = flush_reservations(fs_info); 1376 if (ret) { 1377 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1378 return ret; 1379 } 1380 1381 /* 1382 * 1 For the root item 1383 * 1384 * We should also reserve enough items for the quota tree deletion in 1385 * btrfs_clean_quota_tree but this is not done. 1386 * 1387 * Also, we must always start a transaction without holding the mutex 1388 * qgroup_ioctl_lock, see btrfs_quota_enable(). 1389 */ 1390 trans = btrfs_start_transaction(fs_info->tree_root, 1); 1391 1392 mutex_lock(&fs_info->qgroup_ioctl_lock); 1393 if (IS_ERR(trans)) { 1394 ret = PTR_ERR(trans); 1395 trans = NULL; 1396 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1397 goto out; 1398 } 1399 1400 if (!fs_info->quota_root) 1401 goto out; 1402 1403 spin_lock(&fs_info->qgroup_lock); 1404 quota_root = fs_info->quota_root; 1405 fs_info->quota_root = NULL; 1406 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 1407 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; 1408 fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT; 1409 spin_unlock(&fs_info->qgroup_lock); 1410 1411 btrfs_free_qgroup_config(fs_info); 1412 1413 ret = btrfs_clean_quota_tree(trans, quota_root); 1414 if (unlikely(ret)) { 1415 btrfs_abort_transaction(trans, ret); 1416 goto out; 1417 } 1418 1419 ret = btrfs_del_root(trans, "a_root->root_key); 1420 if (unlikely(ret)) { 1421 btrfs_abort_transaction(trans, ret); 1422 goto out; 1423 } 1424 1425 spin_lock(&fs_info->trans_lock); 1426 list_del("a_root->dirty_list); 1427 spin_unlock(&fs_info->trans_lock); 1428 1429 btrfs_tree_lock(quota_root->node); 1430 btrfs_clear_buffer_dirty(trans, quota_root->node); 1431 btrfs_tree_unlock(quota_root->node); 1432 ret = btrfs_free_tree_block(trans, btrfs_root_id(quota_root), 1433 quota_root->node, 0, 1); 1434 1435 if (ret < 0) 1436 btrfs_abort_transaction(trans, ret); 1437 1438 out: 1439 btrfs_put_root(quota_root); 1440 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1441 if (ret && trans) 1442 btrfs_end_transaction(trans); 1443 else if (trans) 1444 ret = btrfs_commit_transaction(trans); 1445 return ret; 1446 } 1447 1448 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 1449 struct btrfs_qgroup *qgroup) 1450 { 1451 if (list_empty(&qgroup->dirty)) 1452 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1453 } 1454 1455 static void qgroup_iterator_add(struct list_head *head, struct btrfs_qgroup *qgroup) 1456 { 1457 if (!list_empty(&qgroup->iterator)) 1458 return; 1459 1460 list_add_tail(&qgroup->iterator, head); 1461 } 1462 1463 static void qgroup_iterator_clean(struct list_head *head) 1464 { 1465 while (!list_empty(head)) { 1466 struct btrfs_qgroup *qgroup; 1467 1468 qgroup = list_first_entry(head, struct btrfs_qgroup, iterator); 1469 list_del_init(&qgroup->iterator); 1470 } 1471 } 1472 1473 /* 1474 * The easy accounting, we're updating qgroup relationship whose child qgroup 1475 * only has exclusive extents. 1476 * 1477 * In this case, all exclusive extents will also be exclusive for parent, so 1478 * excl/rfer just get added/removed. 1479 * 1480 * So is qgroup reservation space, which should also be added/removed to 1481 * parent. 1482 * Or when child tries to release reservation space, parent will underflow its 1483 * reservation (for relationship adding case). 1484 * 1485 * Caller should hold fs_info->qgroup_lock. 1486 */ 1487 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, 1488 struct btrfs_qgroup *src, int sign) 1489 { 1490 struct btrfs_qgroup *qgroup; 1491 LIST_HEAD(qgroup_list); 1492 u64 num_bytes = src->excl; 1493 u64 num_bytes_cmpr = src->excl_cmpr; 1494 int ret = 0; 1495 1496 qgroup = find_qgroup_rb(fs_info, ref_root); 1497 if (!qgroup) 1498 goto out; 1499 1500 qgroup_iterator_add(&qgroup_list, qgroup); 1501 list_for_each_entry(qgroup, &qgroup_list, iterator) { 1502 struct btrfs_qgroup_list *glist; 1503 1504 qgroup->rfer += sign * num_bytes; 1505 qgroup->rfer_cmpr += sign * num_bytes_cmpr; 1506 1507 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1508 WARN_ON(sign < 0 && qgroup->excl_cmpr < num_bytes_cmpr); 1509 qgroup->excl += sign * num_bytes; 1510 qgroup->excl_cmpr += sign * num_bytes_cmpr; 1511 1512 if (sign > 0) 1513 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); 1514 else 1515 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src); 1516 qgroup_dirty(fs_info, qgroup); 1517 1518 /* Append parent qgroups to @qgroup_list. */ 1519 list_for_each_entry(glist, &qgroup->groups, next_group) 1520 qgroup_iterator_add(&qgroup_list, glist->group); 1521 } 1522 ret = 0; 1523 out: 1524 qgroup_iterator_clean(&qgroup_list); 1525 return ret; 1526 } 1527 1528 1529 /* 1530 * Quick path for updating qgroup with only excl refs. 1531 * 1532 * In that case, just update all parent will be enough. 1533 * Or we needs to do a full rescan. 1534 * Caller should also hold fs_info->qgroup_lock. 1535 * 1536 * Return 0 for quick update, return >0 for need to full rescan 1537 * and mark INCONSISTENT flag. 1538 * Return < 0 for other error. 1539 */ 1540 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1541 u64 src, u64 dst, int sign) 1542 { 1543 struct btrfs_qgroup *qgroup; 1544 int ret = 1; 1545 1546 qgroup = find_qgroup_rb(fs_info, src); 1547 if (!qgroup) 1548 goto out; 1549 if (qgroup->excl == qgroup->rfer) { 1550 ret = __qgroup_excl_accounting(fs_info, dst, qgroup, sign); 1551 if (ret < 0) 1552 goto out; 1553 ret = 0; 1554 } 1555 out: 1556 if (ret) 1557 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1558 return ret; 1559 } 1560 1561 /* 1562 * Add relation between @src and @dst qgroup. The @prealloc is allocated by the 1563 * callers and transferred here (either used or freed on error). 1564 */ 1565 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst, 1566 struct btrfs_qgroup_list *prealloc) 1567 { 1568 struct btrfs_fs_info *fs_info = trans->fs_info; 1569 struct btrfs_qgroup *parent; 1570 struct btrfs_qgroup *member; 1571 struct btrfs_qgroup_list *list; 1572 int ret = 0; 1573 1574 ASSERT(prealloc); 1575 1576 /* Check the level of src and dst first */ 1577 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) { 1578 kfree(prealloc); 1579 return -EINVAL; 1580 } 1581 1582 mutex_lock(&fs_info->qgroup_ioctl_lock); 1583 if (!fs_info->quota_root) { 1584 ret = -ENOTCONN; 1585 goto out; 1586 } 1587 member = find_qgroup_rb(fs_info, src); 1588 parent = find_qgroup_rb(fs_info, dst); 1589 if (!member || !parent) { 1590 ret = -EINVAL; 1591 goto out; 1592 } 1593 1594 /* check if such qgroup relation exist firstly */ 1595 list_for_each_entry(list, &member->groups, next_group) { 1596 if (list->group == parent) { 1597 ret = -EEXIST; 1598 goto out; 1599 } 1600 } 1601 1602 ret = add_qgroup_relation_item(trans, src, dst); 1603 if (ret) 1604 goto out; 1605 1606 ret = add_qgroup_relation_item(trans, dst, src); 1607 if (ret) { 1608 del_qgroup_relation_item(trans, src, dst); 1609 goto out; 1610 } 1611 1612 spin_lock(&fs_info->qgroup_lock); 1613 ret = __add_relation_rb(prealloc, member, parent); 1614 prealloc = NULL; 1615 if (ret < 0) { 1616 spin_unlock(&fs_info->qgroup_lock); 1617 goto out; 1618 } 1619 ret = quick_update_accounting(fs_info, src, dst, 1); 1620 squota_check_parent_usage(fs_info, parent); 1621 spin_unlock(&fs_info->qgroup_lock); 1622 out: 1623 kfree(prealloc); 1624 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1625 return ret; 1626 } 1627 1628 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1629 u64 dst) 1630 { 1631 struct btrfs_fs_info *fs_info = trans->fs_info; 1632 struct btrfs_qgroup *parent; 1633 struct btrfs_qgroup *member; 1634 struct btrfs_qgroup_list *list; 1635 bool found = false; 1636 int ret = 0; 1637 int ret2; 1638 1639 if (!fs_info->quota_root) 1640 return -ENOTCONN; 1641 1642 member = find_qgroup_rb(fs_info, src); 1643 parent = find_qgroup_rb(fs_info, dst); 1644 /* 1645 * The parent/member pair doesn't exist, then try to delete the dead 1646 * relation items only. 1647 */ 1648 if (!member || !parent) 1649 goto delete_item; 1650 1651 /* check if such qgroup relation exist firstly */ 1652 list_for_each_entry(list, &member->groups, next_group) { 1653 if (list->group == parent) { 1654 found = true; 1655 break; 1656 } 1657 } 1658 1659 delete_item: 1660 ret = del_qgroup_relation_item(trans, src, dst); 1661 if (ret < 0 && ret != -ENOENT) 1662 return ret; 1663 ret2 = del_qgroup_relation_item(trans, dst, src); 1664 if (ret2 < 0 && ret2 != -ENOENT) 1665 return ret2; 1666 1667 /* At least one deletion succeeded, return 0 */ 1668 if (!ret || !ret2) 1669 ret = 0; 1670 1671 if (found) { 1672 spin_lock(&fs_info->qgroup_lock); 1673 del_relation_rb(fs_info, src, dst); 1674 ret = quick_update_accounting(fs_info, src, dst, -1); 1675 ASSERT(parent); 1676 squota_check_parent_usage(fs_info, parent); 1677 spin_unlock(&fs_info->qgroup_lock); 1678 } 1679 1680 return ret; 1681 } 1682 1683 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1684 u64 dst) 1685 { 1686 struct btrfs_fs_info *fs_info = trans->fs_info; 1687 int ret = 0; 1688 1689 mutex_lock(&fs_info->qgroup_ioctl_lock); 1690 ret = __del_qgroup_relation(trans, src, dst); 1691 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1692 1693 return ret; 1694 } 1695 1696 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1697 { 1698 struct btrfs_fs_info *fs_info = trans->fs_info; 1699 struct btrfs_root *quota_root; 1700 struct btrfs_qgroup *qgroup; 1701 struct btrfs_qgroup *prealloc = NULL; 1702 int ret = 0; 1703 1704 mutex_lock(&fs_info->qgroup_ioctl_lock); 1705 if (!fs_info->quota_root) { 1706 ret = -ENOTCONN; 1707 goto out; 1708 } 1709 quota_root = fs_info->quota_root; 1710 qgroup = find_qgroup_rb(fs_info, qgroupid); 1711 if (qgroup) { 1712 ret = -EEXIST; 1713 goto out; 1714 } 1715 1716 prealloc = kzalloc_obj(*prealloc, GFP_NOFS); 1717 if (!prealloc) { 1718 ret = -ENOMEM; 1719 goto out; 1720 } 1721 1722 ret = add_qgroup_item(trans, quota_root, qgroupid); 1723 if (ret) 1724 goto out; 1725 1726 spin_lock(&fs_info->qgroup_lock); 1727 qgroup = add_qgroup_rb(fs_info, prealloc, qgroupid); 1728 spin_unlock(&fs_info->qgroup_lock); 1729 prealloc = NULL; 1730 1731 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1732 out: 1733 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1734 kfree(prealloc); 1735 return ret; 1736 } 1737 1738 static bool can_delete_parent_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) 1739 { 1740 ASSERT(btrfs_qgroup_level(qgroup->qgroupid)); 1741 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 1742 squota_check_parent_usage(fs_info, qgroup); 1743 return list_empty(&qgroup->members); 1744 } 1745 1746 /* 1747 * Because a shared extent can outlive its owning subvolume, we cannot delete a 1748 * subvol squota qgroup until all of the extents it owns are gone, even if the 1749 * subvolume itself has been deleted. 1750 */ 1751 static bool can_delete_squota_subvol_qgroup(struct btrfs_fs_info *fs_info, 1752 struct btrfs_qgroup *qgroup) 1753 { 1754 ASSERT(btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE); 1755 ASSERT(btrfs_qgroup_level(qgroup->qgroupid) == 0); 1756 1757 return !(qgroup->rfer || qgroup->excl || qgroup->rfer_cmpr || qgroup->excl_cmpr); 1758 } 1759 1760 /* 1761 * Return 0 if we can not delete the qgroup (not empty or has children etc). 1762 * Return >0 if we can delete the qgroup. 1763 * Return <0 for other errors during tree search. 1764 */ 1765 static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup) 1766 { 1767 struct btrfs_key key; 1768 BTRFS_PATH_AUTO_FREE(path); 1769 int ret; 1770 1771 /* For higher level qgroup, we can only delete it if it has no child. */ 1772 if (btrfs_qgroup_level(qgroup->qgroupid)) 1773 return can_delete_parent_qgroup(fs_info, qgroup); 1774 1775 /* 1776 * For level-0 qgroups, we can only delete it if it has no subvolume 1777 * for it. 1778 * This means even a subvolume is unlinked but not yet fully dropped, 1779 * we can not delete the qgroup. 1780 */ 1781 key.objectid = qgroup->qgroupid; 1782 key.type = BTRFS_ROOT_ITEM_KEY; 1783 key.offset = -1ULL; 1784 path = btrfs_alloc_path(); 1785 if (!path) 1786 return -ENOMEM; 1787 1788 /* 1789 * Any subvol qgroup, regardless of mode, cannot be deleted if the 1790 * subvol still exists. 1791 */ 1792 ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL); 1793 /* 1794 * btrfs_find_root returns <0 on error, 0 if found, and >0 if not, 1795 * so the "found" and "error" cases match our desired return values. 1796 */ 1797 if (ret <= 0) 1798 return ret; 1799 1800 /* Squotas require additional checks, even if the subvol is deleted. */ 1801 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 1802 return can_delete_squota_subvol_qgroup(fs_info, qgroup); 1803 return 1; 1804 } 1805 1806 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1807 { 1808 struct btrfs_fs_info *fs_info = trans->fs_info; 1809 struct btrfs_qgroup *qgroup; 1810 struct btrfs_qgroup_list *list; 1811 int ret = 0; 1812 1813 mutex_lock(&fs_info->qgroup_ioctl_lock); 1814 if (!fs_info->quota_root) { 1815 ret = -ENOTCONN; 1816 goto out; 1817 } 1818 1819 qgroup = find_qgroup_rb(fs_info, qgroupid); 1820 if (!qgroup) { 1821 ret = -ENOENT; 1822 goto out; 1823 } 1824 1825 ret = can_delete_qgroup(fs_info, qgroup); 1826 if (ret < 0) 1827 goto out; 1828 if (ret == 0) { 1829 ret = -EBUSY; 1830 goto out; 1831 } 1832 1833 /* Check if there are no children of this qgroup */ 1834 if (!list_empty(&qgroup->members)) { 1835 ret = -EBUSY; 1836 goto out; 1837 } 1838 1839 ret = del_qgroup_item(trans, qgroupid); 1840 if (ret && ret != -ENOENT) 1841 goto out; 1842 1843 while (!list_empty(&qgroup->groups)) { 1844 list = list_first_entry(&qgroup->groups, 1845 struct btrfs_qgroup_list, next_group); 1846 ret = __del_qgroup_relation(trans, qgroupid, 1847 list->group->qgroupid); 1848 if (ret) 1849 goto out; 1850 } 1851 1852 spin_lock(&fs_info->qgroup_lock); 1853 /* 1854 * Warn on reserved space. The subvolume should has no child nor 1855 * corresponding subvolume. 1856 * Thus its reserved space should all be zero, no matter if qgroup 1857 * is consistent or the mode. 1858 */ 1859 if (unlikely(qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] || 1860 qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] || 1861 qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS])) { 1862 DEBUG_WARN(); 1863 btrfs_warn_rl(fs_info, 1864 "to be deleted qgroup " BTRFS_QGROUP_FMT " has non-zero numbers, data %llu meta prealloc %llu meta pertrans %llu", 1865 BTRFS_QGROUP_FMT_VALUE(qgroup), 1866 qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA], 1867 qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC], 1868 qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]); 1869 1870 } 1871 /* 1872 * The same for rfer/excl numbers, but that's only if our qgroup is 1873 * consistent and if it's in regular qgroup mode. 1874 * For simple mode it's not as accurate thus we can hit non-zero values 1875 * very frequently. 1876 */ 1877 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL && 1878 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) { 1879 if (unlikely(qgroup->rfer || qgroup->excl || 1880 qgroup->rfer_cmpr || qgroup->excl_cmpr)) { 1881 DEBUG_WARN(); 1882 qgroup_mark_inconsistent(fs_info, 1883 "to be deleted qgroup " BTRFS_QGROUP_FMT " has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu", 1884 BTRFS_QGROUP_FMT_VALUE(qgroup), 1885 qgroup->rfer, qgroup->rfer_cmpr, 1886 qgroup->excl, qgroup->excl_cmpr); 1887 } 1888 } 1889 del_qgroup_rb(fs_info, qgroupid); 1890 spin_unlock(&fs_info->qgroup_lock); 1891 1892 /* 1893 * Remove the qgroup from sysfs now without holding the qgroup_lock 1894 * spinlock, since the sysfs_remove_group() function needs to take 1895 * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). 1896 */ 1897 btrfs_sysfs_del_one_qgroup(fs_info, qgroup); 1898 kfree(qgroup); 1899 out: 1900 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1901 return ret; 1902 } 1903 1904 int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid) 1905 { 1906 struct btrfs_trans_handle *trans; 1907 int ret; 1908 1909 if (!btrfs_is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || 1910 !fs_info->quota_root) 1911 return 0; 1912 1913 /* 1914 * Commit current transaction to make sure all the rfer/excl numbers 1915 * get updated. 1916 */ 1917 ret = btrfs_commit_current_transaction(fs_info->quota_root); 1918 if (ret < 0) 1919 return ret; 1920 1921 /* Start new trans to delete the qgroup info and limit items. */ 1922 trans = btrfs_start_transaction(fs_info->quota_root, 2); 1923 if (IS_ERR(trans)) 1924 return PTR_ERR(trans); 1925 ret = btrfs_remove_qgroup(trans, subvolid); 1926 btrfs_end_transaction(trans); 1927 /* 1928 * It's squota and the subvolume still has numbers needed for future 1929 * accounting, in this case we can not delete it. Just skip it. 1930 * 1931 * Or the qgroup is already removed by a qgroup rescan. For both cases we're 1932 * safe to ignore them. 1933 */ 1934 if (ret == -EBUSY || ret == -ENOENT) 1935 ret = 0; 1936 return ret; 1937 } 1938 1939 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, 1940 struct btrfs_qgroup_limit *limit) 1941 { 1942 struct btrfs_fs_info *fs_info = trans->fs_info; 1943 struct btrfs_qgroup *qgroup; 1944 int ret = 0; 1945 /* Sometimes we would want to clear the limit on this qgroup. 1946 * To meet this requirement, we treat the -1 as a special value 1947 * which tell kernel to clear the limit on this qgroup. 1948 */ 1949 const u64 CLEAR_VALUE = -1; 1950 1951 mutex_lock(&fs_info->qgroup_ioctl_lock); 1952 if (!fs_info->quota_root) { 1953 ret = -ENOTCONN; 1954 goto out; 1955 } 1956 1957 qgroup = find_qgroup_rb(fs_info, qgroupid); 1958 if (!qgroup) { 1959 ret = -ENOENT; 1960 goto out; 1961 } 1962 1963 spin_lock(&fs_info->qgroup_lock); 1964 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1965 if (limit->max_rfer == CLEAR_VALUE) { 1966 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1967 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1968 qgroup->max_rfer = 0; 1969 } else { 1970 qgroup->max_rfer = limit->max_rfer; 1971 } 1972 } 1973 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1974 if (limit->max_excl == CLEAR_VALUE) { 1975 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1976 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1977 qgroup->max_excl = 0; 1978 } else { 1979 qgroup->max_excl = limit->max_excl; 1980 } 1981 } 1982 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1983 if (limit->rsv_rfer == CLEAR_VALUE) { 1984 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1985 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1986 qgroup->rsv_rfer = 0; 1987 } else { 1988 qgroup->rsv_rfer = limit->rsv_rfer; 1989 } 1990 } 1991 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1992 if (limit->rsv_excl == CLEAR_VALUE) { 1993 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1994 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1995 qgroup->rsv_excl = 0; 1996 } else { 1997 qgroup->rsv_excl = limit->rsv_excl; 1998 } 1999 } 2000 qgroup->lim_flags |= limit->flags; 2001 2002 spin_unlock(&fs_info->qgroup_lock); 2003 2004 ret = update_qgroup_limit_item(trans, qgroup); 2005 if (ret) 2006 qgroup_mark_inconsistent(fs_info, "qgroup item update error %d", ret); 2007 2008 out: 2009 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2010 return ret; 2011 } 2012 2013 /* 2014 * Inform qgroup to trace one dirty extent, its info is recorded in @record. 2015 * So qgroup can account it at transaction committing time. 2016 * 2017 * No lock version, caller must acquire delayed ref lock and allocated memory, 2018 * then call btrfs_qgroup_trace_extent_post() after exiting lock context. 2019 * 2020 * Return 0 for success insert 2021 * Return >0 for existing record, caller can free @record safely. 2022 * Return <0 for insertion failure, caller can free @record safely. 2023 */ 2024 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 2025 struct btrfs_delayed_ref_root *delayed_refs, 2026 struct btrfs_qgroup_extent_record *record, 2027 u64 bytenr) 2028 { 2029 struct btrfs_qgroup_extent_record *existing, *ret; 2030 const unsigned long index = (bytenr >> fs_info->sectorsize_bits); 2031 2032 if (!btrfs_qgroup_full_accounting(fs_info)) 2033 return 1; 2034 2035 #if BITS_PER_LONG == 32 2036 if (bytenr >= MAX_LFS_FILESIZE) { 2037 btrfs_err_rl(fs_info, 2038 "qgroup record for extent at %llu is beyond 32bit page cache and xarray index limit", 2039 bytenr); 2040 btrfs_err_32bit_limit(fs_info); 2041 return -EOVERFLOW; 2042 } 2043 #endif 2044 2045 trace_btrfs_qgroup_trace_extent(fs_info, record, bytenr); 2046 2047 xa_lock(&delayed_refs->dirty_extents); 2048 existing = xa_load(&delayed_refs->dirty_extents, index); 2049 if (existing) { 2050 if (record->data_rsv && !existing->data_rsv) { 2051 existing->data_rsv = record->data_rsv; 2052 existing->data_rsv_refroot = record->data_rsv_refroot; 2053 } 2054 xa_unlock(&delayed_refs->dirty_extents); 2055 return 1; 2056 } 2057 2058 ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC); 2059 xa_unlock(&delayed_refs->dirty_extents); 2060 if (xa_is_err(ret)) { 2061 qgroup_mark_inconsistent(fs_info, "xarray insert error: %d", xa_err(ret)); 2062 return xa_err(ret); 2063 } 2064 2065 return 0; 2066 } 2067 2068 /* 2069 * Post handler after qgroup_trace_extent_nolock(). 2070 * 2071 * NOTE: Current qgroup does the expensive backref walk at transaction 2072 * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming 2073 * new transaction. 2074 * This is designed to allow btrfs_find_all_roots() to get correct new_roots 2075 * result. 2076 * 2077 * However for old_roots there is no need to do backref walk at that time, 2078 * since we search commit roots to walk backref and result will always be 2079 * correct. 2080 * 2081 * Due to the nature of no lock version, we can't do backref there. 2082 * So we must call btrfs_qgroup_trace_extent_post() after exiting 2083 * spinlock context. 2084 * 2085 * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result 2086 * using current root, then we can move all expensive backref walk out of 2087 * transaction committing, but not now as qgroup accounting will be wrong again. 2088 */ 2089 int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, 2090 struct btrfs_qgroup_extent_record *qrecord, 2091 u64 bytenr) 2092 { 2093 struct btrfs_fs_info *fs_info = trans->fs_info; 2094 struct btrfs_backref_walk_ctx ctx = { 2095 .bytenr = bytenr, 2096 .fs_info = fs_info, 2097 }; 2098 int ret; 2099 2100 if (!btrfs_qgroup_full_accounting(fs_info)) 2101 return 0; 2102 /* 2103 * We are always called in a context where we are already holding a 2104 * transaction handle. Often we are called when adding a data delayed 2105 * reference from btrfs_truncate_inode_items() (truncating or unlinking), 2106 * in which case we will be holding a write lock on extent buffer from a 2107 * subvolume tree. In this case we can't allow btrfs_find_all_roots() to 2108 * acquire fs_info->commit_root_sem, because that is a higher level lock 2109 * that must be acquired before locking any extent buffers. 2110 * 2111 * So we want btrfs_find_all_roots() to not acquire the commit_root_sem 2112 * but we can't pass it a non-NULL transaction handle, because otherwise 2113 * it would not use commit roots and would lock extent buffers, causing 2114 * a deadlock if it ends up trying to read lock the same extent buffer 2115 * that was previously write locked at btrfs_truncate_inode_items(). 2116 * 2117 * So pass a NULL transaction handle to btrfs_find_all_roots() and 2118 * explicitly tell it to not acquire the commit_root_sem - if we are 2119 * holding a transaction handle we don't need its protection. 2120 */ 2121 ASSERT(trans != NULL); 2122 2123 if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING) 2124 return 0; 2125 2126 ret = btrfs_find_all_roots(&ctx, true); 2127 if (ret < 0) { 2128 qgroup_mark_inconsistent(fs_info, 2129 "error accounting new delayed refs extent: %d", ret); 2130 return 0; 2131 } 2132 2133 /* 2134 * Here we don't need to get the lock of 2135 * trans->transaction->delayed_refs, since inserted qrecord won't 2136 * be deleted, only qrecord->node may be modified (new qrecord insert) 2137 * 2138 * So modifying qrecord->old_roots is safe here 2139 */ 2140 qrecord->old_roots = ctx.roots; 2141 return 0; 2142 } 2143 2144 /* 2145 * Inform qgroup to trace one dirty extent, specified by @bytenr and 2146 * @num_bytes. 2147 * So qgroup can account it at commit trans time. 2148 * 2149 * Better encapsulated version, with memory allocation and backref walk for 2150 * commit roots. 2151 * So this can sleep. 2152 * 2153 * Return 0 if the operation is done. 2154 * Return <0 for error, like memory allocation failure or invalid parameter 2155 * (NULL trans) 2156 */ 2157 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, 2158 u64 num_bytes) 2159 { 2160 struct btrfs_fs_info *fs_info = trans->fs_info; 2161 struct btrfs_qgroup_extent_record *record; 2162 struct btrfs_delayed_ref_root *delayed_refs = &trans->transaction->delayed_refs; 2163 const unsigned long index = (bytenr >> fs_info->sectorsize_bits); 2164 int ret; 2165 2166 if (!btrfs_qgroup_full_accounting(fs_info) || bytenr == 0 || num_bytes == 0) 2167 return 0; 2168 record = kzalloc_obj(*record, GFP_NOFS); 2169 if (!record) 2170 return -ENOMEM; 2171 2172 if (xa_reserve(&delayed_refs->dirty_extents, index, GFP_NOFS)) { 2173 kfree(record); 2174 return -ENOMEM; 2175 } 2176 2177 record->num_bytes = num_bytes; 2178 2179 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record, bytenr); 2180 if (ret) { 2181 /* Clean up if insertion fails or item exists. */ 2182 xa_release(&delayed_refs->dirty_extents, index); 2183 kfree(record); 2184 return 0; 2185 } 2186 return btrfs_qgroup_trace_extent_post(trans, record, bytenr); 2187 } 2188 2189 /* 2190 * Inform qgroup to trace all leaf items of data 2191 * 2192 * Return 0 for success 2193 * Return <0 for error(ENOMEM) 2194 */ 2195 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 2196 struct extent_buffer *eb) 2197 { 2198 struct btrfs_fs_info *fs_info = trans->fs_info; 2199 int nr = btrfs_header_nritems(eb); 2200 int i, extent_type, ret; 2201 struct btrfs_key key; 2202 struct btrfs_file_extent_item *fi; 2203 u64 bytenr, num_bytes; 2204 2205 /* We can be called directly from walk_up_proc() */ 2206 if (!btrfs_qgroup_full_accounting(fs_info)) 2207 return 0; 2208 2209 for (i = 0; i < nr; i++) { 2210 btrfs_item_key_to_cpu(eb, &key, i); 2211 2212 if (key.type != BTRFS_EXTENT_DATA_KEY) 2213 continue; 2214 2215 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 2216 /* filter out non qgroup-accountable extents */ 2217 extent_type = btrfs_file_extent_type(eb, fi); 2218 2219 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 2220 continue; 2221 2222 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 2223 if (!bytenr) 2224 continue; 2225 2226 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 2227 2228 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes); 2229 if (ret) 2230 return ret; 2231 } 2232 cond_resched(); 2233 return 0; 2234 } 2235 2236 /* 2237 * Walk up the tree from the bottom, freeing leaves and any interior 2238 * nodes which have had all slots visited. If a node (leaf or 2239 * interior) is freed, the node above it will have it's slot 2240 * incremented. The root node will never be freed. 2241 * 2242 * At the end of this function, we should have a path which has all 2243 * slots incremented to the next position for a search. If we need to 2244 * read a new node it will be NULL and the node above it will have the 2245 * correct slot selected for a later read. 2246 * 2247 * If we increment the root nodes slot counter past the number of 2248 * elements, 1 is returned to signal completion of the search. 2249 */ 2250 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 2251 { 2252 int level = 0; 2253 int nr, slot; 2254 struct extent_buffer *eb; 2255 2256 if (root_level == 0) 2257 return 1; 2258 2259 while (level <= root_level) { 2260 eb = path->nodes[level]; 2261 nr = btrfs_header_nritems(eb); 2262 path->slots[level]++; 2263 slot = path->slots[level]; 2264 if (slot >= nr || level == 0) { 2265 /* 2266 * Don't free the root - we will detect this 2267 * condition after our loop and return a 2268 * positive value for caller to stop walking the tree. 2269 */ 2270 if (level != root_level) { 2271 btrfs_tree_unlock_rw(eb, path->locks[level]); 2272 path->locks[level] = 0; 2273 2274 free_extent_buffer(eb); 2275 path->nodes[level] = NULL; 2276 path->slots[level] = 0; 2277 } 2278 } else { 2279 /* 2280 * We have a valid slot to walk back down 2281 * from. Stop here so caller can process these 2282 * new nodes. 2283 */ 2284 break; 2285 } 2286 2287 level++; 2288 } 2289 2290 eb = path->nodes[root_level]; 2291 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 2292 return 1; 2293 2294 return 0; 2295 } 2296 2297 /* 2298 * Helper function to trace a subtree tree block swap. 2299 * 2300 * The swap will happen in highest tree block, but there may be a lot of 2301 * tree blocks involved. 2302 * 2303 * For example: 2304 * OO = Old tree blocks 2305 * NN = New tree blocks allocated during balance 2306 * 2307 * File tree (257) Reloc tree for 257 2308 * L2 OO NN 2309 * / \ / \ 2310 * L1 OO OO (a) OO NN (a) 2311 * / \ / \ / \ / \ 2312 * L0 OO OO OO OO OO OO NN NN 2313 * (b) (c) (b) (c) 2314 * 2315 * When calling qgroup_trace_extent_swap(), we will pass: 2316 * @src_eb = OO(a) 2317 * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ] 2318 * @dst_level = 0 2319 * @root_level = 1 2320 * 2321 * In that case, qgroup_trace_extent_swap() will search from OO(a) to 2322 * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty. 2323 * 2324 * The main work of qgroup_trace_extent_swap() can be split into 3 parts: 2325 * 2326 * 1) Tree search from @src_eb 2327 * It should acts as a simplified btrfs_search_slot(). 2328 * The key for search can be extracted from @dst_path->nodes[dst_level] 2329 * (first key). 2330 * 2331 * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty 2332 * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty. 2333 * They should be marked during previous (@dst_level = 1) iteration. 2334 * 2335 * 3) Mark file extents in leaves dirty 2336 * We don't have good way to pick out new file extents only. 2337 * So we still follow the old method by scanning all file extents in 2338 * the leave. 2339 * 2340 * This function can free us from keeping two paths, thus later we only need 2341 * to care about how to iterate all new tree blocks in reloc tree. 2342 */ 2343 static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, 2344 struct extent_buffer *src_eb, 2345 struct btrfs_path *dst_path, 2346 int dst_level, int root_level, 2347 bool trace_leaf) 2348 { 2349 struct btrfs_key key; 2350 BTRFS_PATH_AUTO_FREE(src_path); 2351 struct btrfs_fs_info *fs_info = trans->fs_info; 2352 u32 nodesize = fs_info->nodesize; 2353 int cur_level = root_level; 2354 int ret; 2355 2356 BUG_ON(dst_level > root_level); 2357 /* Level mismatch */ 2358 if (btrfs_header_level(src_eb) != root_level) 2359 return -EINVAL; 2360 2361 src_path = btrfs_alloc_path(); 2362 if (!src_path) 2363 return -ENOMEM; 2364 2365 if (dst_level) 2366 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 2367 else 2368 btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 2369 2370 /* For src_path */ 2371 refcount_inc(&src_eb->refs); 2372 src_path->nodes[root_level] = src_eb; 2373 src_path->slots[root_level] = dst_path->slots[root_level]; 2374 src_path->locks[root_level] = 0; 2375 2376 /* A simplified version of btrfs_search_slot() */ 2377 while (cur_level >= dst_level) { 2378 struct btrfs_key src_key; 2379 struct btrfs_key dst_key; 2380 2381 if (src_path->nodes[cur_level] == NULL) { 2382 struct extent_buffer *eb; 2383 int parent_slot; 2384 2385 eb = src_path->nodes[cur_level + 1]; 2386 parent_slot = src_path->slots[cur_level + 1]; 2387 2388 eb = btrfs_read_node_slot(eb, parent_slot); 2389 if (IS_ERR(eb)) 2390 return PTR_ERR(eb); 2391 2392 src_path->nodes[cur_level] = eb; 2393 2394 btrfs_tree_read_lock(eb); 2395 src_path->locks[cur_level] = BTRFS_READ_LOCK; 2396 } 2397 2398 src_path->slots[cur_level] = dst_path->slots[cur_level]; 2399 if (cur_level) { 2400 btrfs_node_key_to_cpu(dst_path->nodes[cur_level], 2401 &dst_key, dst_path->slots[cur_level]); 2402 btrfs_node_key_to_cpu(src_path->nodes[cur_level], 2403 &src_key, src_path->slots[cur_level]); 2404 } else { 2405 btrfs_item_key_to_cpu(dst_path->nodes[cur_level], 2406 &dst_key, dst_path->slots[cur_level]); 2407 btrfs_item_key_to_cpu(src_path->nodes[cur_level], 2408 &src_key, src_path->slots[cur_level]); 2409 } 2410 /* Content mismatch, something went wrong */ 2411 if (btrfs_comp_cpu_keys(&dst_key, &src_key)) 2412 return -ENOENT; 2413 cur_level--; 2414 } 2415 2416 /* 2417 * Now both @dst_path and @src_path have been populated, record the tree 2418 * blocks for qgroup accounting. 2419 */ 2420 ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, 2421 nodesize); 2422 if (ret < 0) 2423 return ret; 2424 ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start, 2425 nodesize); 2426 if (ret < 0) 2427 return ret; 2428 2429 /* Record leaf file extents */ 2430 if (dst_level == 0 && trace_leaf) { 2431 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); 2432 if (ret < 0) 2433 return ret; 2434 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); 2435 } 2436 2437 return ret; 2438 } 2439 2440 /* 2441 * Helper function to do recursive generation-aware depth-first search, to 2442 * locate all new tree blocks in a subtree of reloc tree. 2443 * 2444 * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot) 2445 * reloc tree 2446 * L2 NN (a) 2447 * / \ 2448 * L1 OO NN (b) 2449 * / \ / \ 2450 * L0 OO OO OO NN 2451 * (c) (d) 2452 * If we pass: 2453 * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ], 2454 * @cur_level = 1 2455 * @root_level = 1 2456 * 2457 * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace 2458 * above tree blocks along with their counter parts in file tree. 2459 * While during search, old tree blocks OO(c) will be skipped as tree block swap 2460 * won't affect OO(c). 2461 */ 2462 static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, 2463 struct extent_buffer *src_eb, 2464 struct btrfs_path *dst_path, 2465 int cur_level, int root_level, 2466 u64 last_snapshot, bool trace_leaf) 2467 { 2468 struct btrfs_fs_info *fs_info = trans->fs_info; 2469 struct extent_buffer *eb; 2470 bool need_cleanup = false; 2471 int ret = 0; 2472 int i; 2473 2474 /* Level sanity check */ 2475 if (unlikely(cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || 2476 root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || 2477 root_level < cur_level)) { 2478 btrfs_err_rl(fs_info, 2479 "%s: bad levels, cur_level=%d root_level=%d", 2480 __func__, cur_level, root_level); 2481 return -EUCLEAN; 2482 } 2483 2484 /* Read the tree block if needed */ 2485 if (dst_path->nodes[cur_level] == NULL) { 2486 int parent_slot; 2487 u64 child_gen; 2488 2489 /* 2490 * dst_path->nodes[root_level] must be initialized before 2491 * calling this function. 2492 */ 2493 if (unlikely(cur_level == root_level)) { 2494 btrfs_err_rl(fs_info, 2495 "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", 2496 __func__, root_level, root_level, cur_level); 2497 return -EUCLEAN; 2498 } 2499 2500 /* 2501 * We need to get child blockptr/gen from parent before we can 2502 * read it. 2503 */ 2504 eb = dst_path->nodes[cur_level + 1]; 2505 parent_slot = dst_path->slots[cur_level + 1]; 2506 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 2507 2508 /* This node is old, no need to trace */ 2509 if (child_gen < last_snapshot) 2510 return ret; 2511 2512 eb = btrfs_read_node_slot(eb, parent_slot); 2513 if (IS_ERR(eb)) 2514 return PTR_ERR(eb); 2515 2516 dst_path->nodes[cur_level] = eb; 2517 dst_path->slots[cur_level] = 0; 2518 2519 btrfs_tree_read_lock(eb); 2520 dst_path->locks[cur_level] = BTRFS_READ_LOCK; 2521 need_cleanup = true; 2522 } 2523 2524 /* Now record this tree block and its counter part for qgroups */ 2525 ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level, 2526 root_level, trace_leaf); 2527 if (ret < 0) 2528 goto cleanup; 2529 2530 eb = dst_path->nodes[cur_level]; 2531 2532 if (cur_level > 0) { 2533 /* Iterate all child tree blocks */ 2534 for (i = 0; i < btrfs_header_nritems(eb); i++) { 2535 /* Skip old tree blocks as they won't be swapped */ 2536 if (btrfs_node_ptr_generation(eb, i) < last_snapshot) 2537 continue; 2538 dst_path->slots[cur_level] = i; 2539 2540 /* Recursive call (at most 7 times) */ 2541 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, 2542 dst_path, cur_level - 1, root_level, 2543 last_snapshot, trace_leaf); 2544 if (ret < 0) 2545 goto cleanup; 2546 } 2547 } 2548 2549 cleanup: 2550 if (need_cleanup) { 2551 /* Clean up */ 2552 btrfs_tree_unlock_rw(dst_path->nodes[cur_level], 2553 dst_path->locks[cur_level]); 2554 free_extent_buffer(dst_path->nodes[cur_level]); 2555 dst_path->nodes[cur_level] = NULL; 2556 dst_path->slots[cur_level] = 0; 2557 dst_path->locks[cur_level] = 0; 2558 } 2559 2560 return ret; 2561 } 2562 2563 static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, 2564 struct extent_buffer *src_eb, 2565 struct extent_buffer *dst_eb, 2566 u64 last_snapshot, bool trace_leaf) 2567 { 2568 struct btrfs_fs_info *fs_info = trans->fs_info; 2569 struct btrfs_path *dst_path = NULL; 2570 int level; 2571 int ret; 2572 2573 if (!btrfs_qgroup_full_accounting(fs_info)) 2574 return 0; 2575 2576 /* Wrong parameter order */ 2577 if (unlikely(btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb))) { 2578 btrfs_err_rl(fs_info, 2579 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, 2580 btrfs_header_generation(src_eb), 2581 btrfs_header_generation(dst_eb)); 2582 return -EUCLEAN; 2583 } 2584 2585 if (unlikely(!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb))) { 2586 ret = -EIO; 2587 goto out; 2588 } 2589 2590 level = btrfs_header_level(dst_eb); 2591 dst_path = btrfs_alloc_path(); 2592 if (!dst_path) { 2593 ret = -ENOMEM; 2594 goto out; 2595 } 2596 /* For dst_path */ 2597 refcount_inc(&dst_eb->refs); 2598 dst_path->nodes[level] = dst_eb; 2599 dst_path->slots[level] = 0; 2600 dst_path->locks[level] = 0; 2601 2602 /* Do the generation aware breadth-first search */ 2603 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level, 2604 level, last_snapshot, trace_leaf); 2605 if (ret < 0) 2606 goto out; 2607 ret = 0; 2608 2609 out: 2610 btrfs_free_path(dst_path); 2611 if (ret < 0) 2612 qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret); 2613 return ret; 2614 } 2615 2616 /* 2617 * Inform qgroup to trace a whole subtree, including all its child tree 2618 * blocks and data. 2619 * The root tree block is specified by @root_eb. 2620 * 2621 * Normally used by relocation(tree block swap) and subvolume deletion. 2622 * 2623 * Return 0 for success 2624 * Return <0 for error(ENOMEM or tree search error) 2625 */ 2626 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 2627 struct extent_buffer *root_eb, 2628 u64 root_gen, int root_level) 2629 { 2630 struct btrfs_fs_info *fs_info = trans->fs_info; 2631 int ret = 0; 2632 int level; 2633 u8 drop_subptree_thres; 2634 struct extent_buffer *eb = root_eb; 2635 BTRFS_PATH_AUTO_FREE(path); 2636 2637 ASSERT(0 <= root_level && root_level < BTRFS_MAX_LEVEL); 2638 ASSERT(root_eb != NULL); 2639 2640 if (!btrfs_qgroup_full_accounting(fs_info)) 2641 return 0; 2642 2643 spin_lock(&fs_info->qgroup_lock); 2644 drop_subptree_thres = fs_info->qgroup_drop_subtree_thres; 2645 spin_unlock(&fs_info->qgroup_lock); 2646 2647 /* 2648 * This function only gets called for snapshot drop, if we hit a high 2649 * node here, it means we are going to change ownership for quite a lot 2650 * of extents, which will greatly slow down btrfs_commit_transaction(). 2651 * 2652 * So here if we find a high tree here, we just skip the accounting and 2653 * mark qgroup inconsistent. 2654 */ 2655 if (root_level >= drop_subptree_thres) { 2656 qgroup_mark_inconsistent(fs_info, "subtree level reached threshold"); 2657 return 0; 2658 } 2659 2660 if (!extent_buffer_uptodate(root_eb)) { 2661 struct btrfs_tree_parent_check check = { 2662 .transid = root_gen, 2663 .level = root_level 2664 }; 2665 2666 ret = btrfs_read_extent_buffer(root_eb, &check); 2667 if (ret) 2668 return ret; 2669 } 2670 2671 if (root_level == 0) 2672 return btrfs_qgroup_trace_leaf_items(trans, root_eb); 2673 2674 path = btrfs_alloc_path(); 2675 if (!path) 2676 return -ENOMEM; 2677 2678 /* 2679 * Walk down the tree. Missing extent blocks are filled in as 2680 * we go. Metadata is accounted every time we read a new 2681 * extent block. 2682 * 2683 * When we reach a leaf, we account for file extent items in it, 2684 * walk back up the tree (adjusting slot pointers as we go) 2685 * and restart the search process. 2686 */ 2687 refcount_inc(&root_eb->refs); /* For path */ 2688 path->nodes[root_level] = root_eb; 2689 path->slots[root_level] = 0; 2690 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 2691 walk_down: 2692 level = root_level; 2693 while (level >= 0) { 2694 if (path->nodes[level] == NULL) { 2695 int parent_slot; 2696 u64 child_bytenr; 2697 2698 /* 2699 * We need to get child blockptr from parent before we 2700 * can read it. 2701 */ 2702 eb = path->nodes[level + 1]; 2703 parent_slot = path->slots[level + 1]; 2704 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 2705 2706 eb = btrfs_read_node_slot(eb, parent_slot); 2707 if (IS_ERR(eb)) 2708 return PTR_ERR(eb); 2709 2710 path->nodes[level] = eb; 2711 path->slots[level] = 0; 2712 2713 btrfs_tree_read_lock(eb); 2714 path->locks[level] = BTRFS_READ_LOCK; 2715 2716 ret = btrfs_qgroup_trace_extent(trans, child_bytenr, 2717 fs_info->nodesize); 2718 if (ret) 2719 return ret; 2720 } 2721 2722 if (level == 0) { 2723 ret = btrfs_qgroup_trace_leaf_items(trans, 2724 path->nodes[level]); 2725 if (ret) 2726 return ret; 2727 2728 /* Nonzero return here means we completed our search */ 2729 ret = adjust_slots_upwards(path, root_level); 2730 if (ret) 2731 break; 2732 2733 /* Restart search with new slots */ 2734 goto walk_down; 2735 } 2736 2737 level--; 2738 } 2739 2740 return 0; 2741 } 2742 2743 static void qgroup_iterator_nested_add(struct list_head *head, struct btrfs_qgroup *qgroup) 2744 { 2745 if (!list_empty(&qgroup->nested_iterator)) 2746 return; 2747 2748 list_add_tail(&qgroup->nested_iterator, head); 2749 } 2750 2751 static void qgroup_iterator_nested_clean(struct list_head *head) 2752 { 2753 while (!list_empty(head)) { 2754 struct btrfs_qgroup *qgroup; 2755 2756 qgroup = list_first_entry(head, struct btrfs_qgroup, nested_iterator); 2757 list_del_init(&qgroup->nested_iterator); 2758 } 2759 } 2760 2761 /* 2762 * Walk all of the roots that points to the bytenr and adjust their refcnts. 2763 */ 2764 static void qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 2765 struct ulist *roots, struct list_head *qgroups, 2766 u64 seq, bool update_old) 2767 { 2768 struct ulist_node *unode; 2769 struct ulist_iterator uiter; 2770 struct btrfs_qgroup *qg; 2771 2772 if (!roots) 2773 return; 2774 ULIST_ITER_INIT(&uiter); 2775 while ((unode = ulist_next(roots, &uiter))) { 2776 LIST_HEAD(tmp); 2777 2778 qg = find_qgroup_rb(fs_info, unode->val); 2779 if (!qg) 2780 continue; 2781 2782 qgroup_iterator_nested_add(qgroups, qg); 2783 qgroup_iterator_add(&tmp, qg); 2784 list_for_each_entry(qg, &tmp, iterator) { 2785 struct btrfs_qgroup_list *glist; 2786 2787 if (update_old) 2788 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 2789 else 2790 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 2791 2792 list_for_each_entry(glist, &qg->groups, next_group) { 2793 qgroup_iterator_nested_add(qgroups, glist->group); 2794 qgroup_iterator_add(&tmp, glist->group); 2795 } 2796 } 2797 qgroup_iterator_clean(&tmp); 2798 } 2799 } 2800 2801 /* 2802 * Update qgroup rfer/excl counters. 2803 * Rfer update is easy, codes can explain themselves. 2804 * 2805 * Excl update is tricky, the update is split into 2 parts. 2806 * Part 1: Possible exclusive <-> sharing detect: 2807 * | A | !A | 2808 * ------------------------------------- 2809 * B | * | - | 2810 * ------------------------------------- 2811 * !B | + | ** | 2812 * ------------------------------------- 2813 * 2814 * Conditions: 2815 * A: cur_old_roots < nr_old_roots (not exclusive before) 2816 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 2817 * B: cur_new_roots < nr_new_roots (not exclusive now) 2818 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 2819 * 2820 * Results: 2821 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 2822 * *: Definitely not changed. **: Possible unchanged. 2823 * 2824 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 2825 * 2826 * To make the logic clear, we first use condition A and B to split 2827 * combination into 4 results. 2828 * 2829 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 2830 * only on variant maybe 0. 2831 * 2832 * Lastly, check result **, since there are 2 variants maybe 0, split them 2833 * again(2x2). 2834 * But this time we don't need to consider other things, the codes and logic 2835 * is easy to understand now. 2836 */ 2837 static void qgroup_update_counters(struct btrfs_fs_info *fs_info, 2838 struct list_head *qgroups, u64 nr_old_roots, 2839 u64 nr_new_roots, u64 num_bytes, u64 seq) 2840 { 2841 struct btrfs_qgroup *qg; 2842 2843 list_for_each_entry(qg, qgroups, nested_iterator) { 2844 u64 cur_new_count, cur_old_count; 2845 bool dirty = false; 2846 2847 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 2848 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 2849 2850 trace_btrfs_qgroup_update_counters(fs_info, qg, cur_old_count, 2851 cur_new_count); 2852 2853 /* Rfer update part */ 2854 if (cur_old_count == 0 && cur_new_count > 0) { 2855 qg->rfer += num_bytes; 2856 qg->rfer_cmpr += num_bytes; 2857 dirty = true; 2858 } 2859 if (cur_old_count > 0 && cur_new_count == 0) { 2860 qg->rfer -= num_bytes; 2861 qg->rfer_cmpr -= num_bytes; 2862 dirty = true; 2863 } 2864 2865 /* Excl update part */ 2866 /* Exclusive/none -> shared case */ 2867 if (cur_old_count == nr_old_roots && 2868 cur_new_count < nr_new_roots) { 2869 /* Exclusive -> shared */ 2870 if (cur_old_count != 0) { 2871 qg->excl -= num_bytes; 2872 qg->excl_cmpr -= num_bytes; 2873 dirty = true; 2874 } 2875 } 2876 2877 /* Shared -> exclusive/none case */ 2878 if (cur_old_count < nr_old_roots && 2879 cur_new_count == nr_new_roots) { 2880 /* Shared->exclusive */ 2881 if (cur_new_count != 0) { 2882 qg->excl += num_bytes; 2883 qg->excl_cmpr += num_bytes; 2884 dirty = true; 2885 } 2886 } 2887 2888 /* Exclusive/none -> exclusive/none case */ 2889 if (cur_old_count == nr_old_roots && 2890 cur_new_count == nr_new_roots) { 2891 if (cur_old_count == 0) { 2892 /* None -> exclusive/none */ 2893 2894 if (cur_new_count != 0) { 2895 /* None -> exclusive */ 2896 qg->excl += num_bytes; 2897 qg->excl_cmpr += num_bytes; 2898 dirty = true; 2899 } 2900 /* None -> none, nothing changed */ 2901 } else { 2902 /* Exclusive -> exclusive/none */ 2903 2904 if (cur_new_count == 0) { 2905 /* Exclusive -> none */ 2906 qg->excl -= num_bytes; 2907 qg->excl_cmpr -= num_bytes; 2908 dirty = true; 2909 } 2910 /* Exclusive -> exclusive, nothing changed */ 2911 } 2912 } 2913 2914 if (dirty) 2915 qgroup_dirty(fs_info, qg); 2916 } 2917 } 2918 2919 /* 2920 * Check if the @roots potentially is a list of fs tree roots 2921 * 2922 * Return 0 for definitely not a fs/subvol tree roots ulist 2923 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 2924 * one as well) 2925 */ 2926 static int maybe_fs_roots(struct ulist *roots) 2927 { 2928 struct ulist_node *unode; 2929 struct ulist_iterator uiter; 2930 2931 /* Empty one, still possible for fs roots */ 2932 if (!roots || roots->nnodes == 0) 2933 return 1; 2934 2935 ULIST_ITER_INIT(&uiter); 2936 unode = ulist_next(roots, &uiter); 2937 if (!unode) 2938 return 1; 2939 2940 /* 2941 * If it contains fs tree roots, then it must belong to fs/subvol 2942 * trees. 2943 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 2944 */ 2945 return btrfs_is_fstree(unode->val); 2946 } 2947 2948 int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 2949 u64 num_bytes, struct ulist *old_roots, 2950 struct ulist *new_roots) 2951 { 2952 struct btrfs_fs_info *fs_info = trans->fs_info; 2953 LIST_HEAD(qgroups); 2954 u64 seq; 2955 u64 nr_new_roots = 0; 2956 u64 nr_old_roots = 0; 2957 int ret = 0; 2958 2959 /* 2960 * If quotas get disabled meanwhile, the resources need to be freed and 2961 * we can't just exit here. 2962 */ 2963 if (!btrfs_qgroup_full_accounting(fs_info) || 2964 fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING) 2965 goto out_free; 2966 2967 if (new_roots) { 2968 if (!maybe_fs_roots(new_roots)) 2969 goto out_free; 2970 nr_new_roots = new_roots->nnodes; 2971 } 2972 if (old_roots) { 2973 if (!maybe_fs_roots(old_roots)) 2974 goto out_free; 2975 nr_old_roots = old_roots->nnodes; 2976 } 2977 2978 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 2979 if (nr_old_roots == 0 && nr_new_roots == 0) 2980 goto out_free; 2981 2982 trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr, 2983 num_bytes, nr_old_roots, nr_new_roots); 2984 2985 mutex_lock(&fs_info->qgroup_rescan_lock); 2986 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2987 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 2988 mutex_unlock(&fs_info->qgroup_rescan_lock); 2989 ret = 0; 2990 goto out_free; 2991 } 2992 } 2993 mutex_unlock(&fs_info->qgroup_rescan_lock); 2994 2995 spin_lock(&fs_info->qgroup_lock); 2996 seq = fs_info->qgroup_seq; 2997 2998 /* Update old refcnts using old_roots */ 2999 qgroup_update_refcnt(fs_info, old_roots, &qgroups, seq, true); 3000 3001 /* Update new refcnts using new_roots */ 3002 qgroup_update_refcnt(fs_info, new_roots, &qgroups, seq, false); 3003 3004 qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots, 3005 num_bytes, seq); 3006 3007 /* 3008 * We're done using the iterator, release all its qgroups while holding 3009 * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup() 3010 * and trigger use-after-free accesses to qgroups. 3011 */ 3012 qgroup_iterator_nested_clean(&qgroups); 3013 3014 /* 3015 * Bump qgroup_seq to avoid seq overlap 3016 */ 3017 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 3018 spin_unlock(&fs_info->qgroup_lock); 3019 out_free: 3020 ulist_free(old_roots); 3021 ulist_free(new_roots); 3022 return ret; 3023 } 3024 3025 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) 3026 { 3027 struct btrfs_fs_info *fs_info = trans->fs_info; 3028 struct btrfs_qgroup_extent_record *record; 3029 struct btrfs_delayed_ref_root *delayed_refs; 3030 struct ulist *new_roots = NULL; 3031 unsigned long index; 3032 u64 num_dirty_extents = 0; 3033 u64 qgroup_to_skip; 3034 int ret = 0; 3035 3036 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 3037 return 0; 3038 3039 delayed_refs = &trans->transaction->delayed_refs; 3040 qgroup_to_skip = delayed_refs->qgroup_to_skip; 3041 xa_for_each(&delayed_refs->dirty_extents, index, record) { 3042 const u64 bytenr = (((u64)index) << fs_info->sectorsize_bits); 3043 3044 num_dirty_extents++; 3045 trace_btrfs_qgroup_account_extents(fs_info, record, bytenr); 3046 3047 if (!ret && !(fs_info->qgroup_flags & 3048 BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) { 3049 struct btrfs_backref_walk_ctx ctx = { 0 }; 3050 3051 ctx.bytenr = bytenr; 3052 ctx.fs_info = fs_info; 3053 3054 /* 3055 * Old roots should be searched when inserting qgroup 3056 * extent record. 3057 * 3058 * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case, 3059 * we may have some record inserted during 3060 * NO_ACCOUNTING (thus no old_roots populated), but 3061 * later we start rescan, which clears NO_ACCOUNTING, 3062 * leaving some inserted records without old_roots 3063 * populated. 3064 * 3065 * Those cases are rare and should not cause too much 3066 * time spent during commit_transaction(). 3067 */ 3068 if (!record->old_roots) { 3069 /* Search commit root to find old_roots */ 3070 ret = btrfs_find_all_roots(&ctx, false); 3071 if (ret < 0) 3072 goto cleanup; 3073 record->old_roots = ctx.roots; 3074 ctx.roots = NULL; 3075 } 3076 3077 /* 3078 * Use BTRFS_SEQ_LAST as time_seq to do special search, 3079 * which doesn't lock tree or delayed_refs and search 3080 * current root. It's safe inside commit_transaction(). 3081 */ 3082 ctx.trans = trans; 3083 ctx.time_seq = BTRFS_SEQ_LAST; 3084 ret = btrfs_find_all_roots(&ctx, false); 3085 if (ret < 0) 3086 goto cleanup; 3087 new_roots = ctx.roots; 3088 if (qgroup_to_skip) { 3089 ulist_del(new_roots, qgroup_to_skip, 0); 3090 ulist_del(record->old_roots, qgroup_to_skip, 3091 0); 3092 } 3093 ret = btrfs_qgroup_account_extent(trans, bytenr, 3094 record->num_bytes, 3095 record->old_roots, 3096 new_roots); 3097 record->old_roots = NULL; 3098 new_roots = NULL; 3099 } 3100 /* Free the reserved data space */ 3101 btrfs_qgroup_free_refroot(fs_info, 3102 record->data_rsv_refroot, 3103 record->data_rsv, 3104 BTRFS_QGROUP_RSV_DATA); 3105 cleanup: 3106 ulist_free(record->old_roots); 3107 ulist_free(new_roots); 3108 new_roots = NULL; 3109 xa_erase(&delayed_refs->dirty_extents, index); 3110 kfree(record); 3111 3112 } 3113 trace_btrfs_qgroup_num_dirty_extents(fs_info, trans->transid, num_dirty_extents); 3114 return ret; 3115 } 3116 3117 /* 3118 * Writes all changed qgroups to disk. 3119 * Called by the transaction commit path and the qgroup assign ioctl. 3120 */ 3121 int btrfs_run_qgroups(struct btrfs_trans_handle *trans) 3122 { 3123 struct btrfs_fs_info *fs_info = trans->fs_info; 3124 int ret = 0; 3125 3126 /* 3127 * In case we are called from the qgroup assign ioctl, assert that we 3128 * are holding the qgroup_ioctl_lock, otherwise we can race with a quota 3129 * disable operation (ioctl) and access a freed quota root. 3130 */ 3131 if (trans->transaction->state != TRANS_STATE_COMMIT_DOING) 3132 lockdep_assert_held(&fs_info->qgroup_ioctl_lock); 3133 3134 if (!fs_info->quota_root) 3135 return ret; 3136 3137 spin_lock(&fs_info->qgroup_lock); 3138 while (!list_empty(&fs_info->dirty_qgroups)) { 3139 struct btrfs_qgroup *qgroup; 3140 qgroup = list_first_entry(&fs_info->dirty_qgroups, 3141 struct btrfs_qgroup, dirty); 3142 list_del_init(&qgroup->dirty); 3143 spin_unlock(&fs_info->qgroup_lock); 3144 ret = update_qgroup_info_item(trans, qgroup); 3145 if (ret) 3146 qgroup_mark_inconsistent(fs_info, 3147 "qgroup info item update error %d", ret); 3148 ret = update_qgroup_limit_item(trans, qgroup); 3149 if (ret) 3150 qgroup_mark_inconsistent(fs_info, 3151 "qgroup limit item update error %d", ret); 3152 spin_lock(&fs_info->qgroup_lock); 3153 } 3154 if (btrfs_qgroup_enabled(fs_info)) 3155 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 3156 else 3157 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 3158 spin_unlock(&fs_info->qgroup_lock); 3159 3160 ret = update_qgroup_status_item(trans); 3161 if (ret) 3162 qgroup_mark_inconsistent(fs_info, 3163 "qgroup status item update error %d", ret); 3164 3165 return ret; 3166 } 3167 3168 int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, 3169 struct btrfs_qgroup_inherit *inherit, 3170 size_t size) 3171 { 3172 if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) 3173 return -EOPNOTSUPP; 3174 if (size < sizeof(*inherit) || size > PAGE_SIZE) 3175 return -EINVAL; 3176 3177 /* 3178 * In the past we allowed btrfs_qgroup_inherit to specify to copy 3179 * rfer/excl numbers directly from other qgroups. This behavior has 3180 * been disabled in userspace for a very long time, but here we should 3181 * also disable it in kernel, as this behavior is known to mark qgroup 3182 * inconsistent, and a rescan would wipe out the changes anyway. 3183 * 3184 * Reject any btrfs_qgroup_inherit with num_ref_copies or num_excl_copies. 3185 */ 3186 if (inherit->num_ref_copies > 0 || inherit->num_excl_copies > 0) 3187 return -EINVAL; 3188 3189 if (size != struct_size(inherit, qgroups, inherit->num_qgroups)) 3190 return -EINVAL; 3191 3192 /* 3193 * Skip the inherit source qgroups check if qgroup is not enabled. 3194 * Qgroup can still be later enabled causing problems, but in that case 3195 * btrfs_qgroup_inherit() would just ignore those invalid ones. 3196 */ 3197 if (!btrfs_qgroup_enabled(fs_info)) 3198 return 0; 3199 3200 /* 3201 * Now check all the remaining qgroups, they should all: 3202 * 3203 * - Exist 3204 * - Be higher level qgroups. 3205 */ 3206 for (int i = 0; i < inherit->num_qgroups; i++) { 3207 struct btrfs_qgroup *qgroup; 3208 u64 qgroupid = inherit->qgroups[i]; 3209 3210 if (btrfs_qgroup_level(qgroupid) == 0) 3211 return -EINVAL; 3212 3213 spin_lock(&fs_info->qgroup_lock); 3214 qgroup = find_qgroup_rb(fs_info, qgroupid); 3215 if (!qgroup) { 3216 spin_unlock(&fs_info->qgroup_lock); 3217 return -ENOENT; 3218 } 3219 spin_unlock(&fs_info->qgroup_lock); 3220 } 3221 return 0; 3222 } 3223 3224 static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info, 3225 u64 inode_rootid, 3226 struct btrfs_qgroup_inherit **inherit) 3227 { 3228 int i = 0; 3229 u64 num_qgroups = 0; 3230 struct btrfs_qgroup *inode_qg; 3231 struct btrfs_qgroup_list *qg_list; 3232 struct btrfs_qgroup_inherit *res; 3233 size_t struct_sz; 3234 u64 *qgids; 3235 3236 if (*inherit) 3237 return -EEXIST; 3238 3239 inode_qg = find_qgroup_rb(fs_info, inode_rootid); 3240 if (!inode_qg) 3241 return -ENOENT; 3242 3243 num_qgroups = list_count_nodes(&inode_qg->groups); 3244 3245 if (!num_qgroups) 3246 return 0; 3247 3248 struct_sz = struct_size(res, qgroups, num_qgroups); 3249 if (struct_sz == SIZE_MAX) 3250 return -ERANGE; 3251 3252 res = kzalloc(struct_sz, GFP_NOFS); 3253 if (!res) 3254 return -ENOMEM; 3255 res->num_qgroups = num_qgroups; 3256 qgids = res->qgroups; 3257 3258 list_for_each_entry(qg_list, &inode_qg->groups, next_group) 3259 qgids[i++] = qg_list->group->qgroupid; 3260 3261 *inherit = res; 3262 return 0; 3263 } 3264 3265 /* 3266 * Check if we can skip rescan when inheriting qgroups. If @src has a single 3267 * @parent, and that @parent is owning all its bytes exclusively, we can skip 3268 * the full rescan, by just adding nodesize to the @parent's excl/rfer. 3269 * 3270 * Return <0 for fatal errors (like srcid/parentid has no qgroup). 3271 * Return 0 if a quick inherit is done. 3272 * Return >0 if a quick inherit is not possible, and a full rescan is needed. 3273 */ 3274 static int qgroup_snapshot_quick_inherit(struct btrfs_fs_info *fs_info, 3275 u64 srcid, u64 parentid) 3276 { 3277 struct btrfs_qgroup *src; 3278 struct btrfs_qgroup *parent; 3279 struct btrfs_qgroup *qgroup; 3280 struct btrfs_qgroup_list *list; 3281 LIST_HEAD(qgroup_list); 3282 const u32 nodesize = fs_info->nodesize; 3283 int nr_parents = 0; 3284 3285 if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_FULL) 3286 return 0; 3287 3288 src = find_qgroup_rb(fs_info, srcid); 3289 if (!src) 3290 return -ENOENT; 3291 parent = find_qgroup_rb(fs_info, parentid); 3292 if (!parent) 3293 return -ENOENT; 3294 3295 /* 3296 * Source has no parent qgroup, but our new qgroup would have one. 3297 * Qgroup numbers would become inconsistent. 3298 */ 3299 if (list_empty(&src->groups)) 3300 return 1; 3301 3302 list_for_each_entry(list, &src->groups, next_group) { 3303 /* The parent is not the same, quick update is not possible. */ 3304 if (list->group->qgroupid != parentid) 3305 return 1; 3306 nr_parents++; 3307 /* 3308 * More than one parent qgroup, we can't be sure about accounting 3309 * consistency. 3310 */ 3311 if (nr_parents > 1) 3312 return 1; 3313 } 3314 3315 /* 3316 * The parent is not exclusively owning all its bytes. We're not sure 3317 * if the source has any bytes not fully owned by the parent. 3318 */ 3319 if (parent->excl != parent->rfer) 3320 return 1; 3321 3322 qgroup_iterator_add(&qgroup_list, parent); 3323 list_for_each_entry(qgroup, &qgroup_list, iterator) { 3324 qgroup->rfer += nodesize; 3325 qgroup->rfer_cmpr += nodesize; 3326 qgroup->excl += nodesize; 3327 qgroup->excl_cmpr += nodesize; 3328 qgroup_dirty(fs_info, qgroup); 3329 3330 /* Append parent qgroups to @qgroup_list. */ 3331 list_for_each_entry(list, &qgroup->groups, next_group) 3332 qgroup_iterator_add(&qgroup_list, list->group); 3333 } 3334 qgroup_iterator_clean(&qgroup_list); 3335 return 0; 3336 } 3337 3338 /* 3339 * Copy the accounting information between qgroups. This is necessary 3340 * when a snapshot or a subvolume is created. Throwing an error will 3341 * cause a transaction abort so we take extra care here to only error 3342 * when a readonly fs is a reasonable outcome. 3343 */ 3344 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, 3345 u64 objectid, u64 inode_rootid, 3346 struct btrfs_qgroup_inherit *inherit) 3347 { 3348 int ret = 0; 3349 u64 *i_qgroups; 3350 bool committing = false; 3351 struct btrfs_fs_info *fs_info = trans->fs_info; 3352 struct btrfs_root *quota_root; 3353 struct btrfs_qgroup *srcgroup; 3354 struct btrfs_qgroup *dstgroup; 3355 struct btrfs_qgroup *prealloc; 3356 struct btrfs_qgroup_list **qlist_prealloc = NULL; 3357 bool free_inherit = false; 3358 bool need_rescan = false; 3359 u32 level_size = 0; 3360 u64 nums; 3361 3362 if (!btrfs_qgroup_enabled(fs_info)) 3363 return 0; 3364 3365 prealloc = kzalloc_obj(*prealloc, GFP_NOFS); 3366 if (!prealloc) 3367 return -ENOMEM; 3368 3369 /* 3370 * There are only two callers of this function. 3371 * 3372 * One in create_subvol() in the ioctl context, which needs to hold 3373 * the qgroup_ioctl_lock. 3374 * 3375 * The other one in create_pending_snapshot() where no other qgroup 3376 * code can modify the fs as they all need to either start a new trans 3377 * or hold a trans handler, thus we don't need to hold 3378 * qgroup_ioctl_lock. 3379 * This would avoid long and complex lock chain and make lockdep happy. 3380 */ 3381 spin_lock(&fs_info->trans_lock); 3382 if (trans->transaction->state == TRANS_STATE_COMMIT_DOING) 3383 committing = true; 3384 spin_unlock(&fs_info->trans_lock); 3385 3386 if (!committing) 3387 mutex_lock(&fs_info->qgroup_ioctl_lock); 3388 3389 quota_root = fs_info->quota_root; 3390 if (!quota_root) { 3391 ret = -EINVAL; 3392 goto out; 3393 } 3394 3395 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE && !inherit) { 3396 ret = qgroup_auto_inherit(fs_info, inode_rootid, &inherit); 3397 if (ret) 3398 goto out; 3399 free_inherit = true; 3400 } 3401 3402 if (inherit) { 3403 i_qgroups = (u64 *)(inherit + 1); 3404 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 3405 2 * inherit->num_excl_copies; 3406 for (int i = 0; i < nums; i++) { 3407 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 3408 3409 /* 3410 * Zero out invalid groups so we can ignore 3411 * them later. 3412 */ 3413 if (!srcgroup || 3414 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 3415 *i_qgroups = 0ULL; 3416 3417 ++i_qgroups; 3418 } 3419 } 3420 3421 /* 3422 * create a tracking group for the subvol itself 3423 */ 3424 ret = add_qgroup_item(trans, quota_root, objectid); 3425 if (ret) 3426 goto out; 3427 3428 /* 3429 * add qgroup to all inherited groups 3430 */ 3431 if (inherit) { 3432 i_qgroups = (u64 *)(inherit + 1); 3433 for (int i = 0; i < inherit->num_qgroups; i++, i_qgroups++) { 3434 if (*i_qgroups == 0) 3435 continue; 3436 ret = add_qgroup_relation_item(trans, objectid, 3437 *i_qgroups); 3438 if (ret && ret != -EEXIST) 3439 goto out; 3440 ret = add_qgroup_relation_item(trans, *i_qgroups, 3441 objectid); 3442 if (ret && ret != -EEXIST) 3443 goto out; 3444 } 3445 ret = 0; 3446 3447 qlist_prealloc = kzalloc_objs(struct btrfs_qgroup_list *, 3448 inherit->num_qgroups, GFP_NOFS); 3449 if (!qlist_prealloc) { 3450 ret = -ENOMEM; 3451 goto out; 3452 } 3453 for (int i = 0; i < inherit->num_qgroups; i++) { 3454 qlist_prealloc[i] = kzalloc_obj(struct btrfs_qgroup_list, 3455 GFP_NOFS); 3456 if (!qlist_prealloc[i]) { 3457 ret = -ENOMEM; 3458 goto out; 3459 } 3460 } 3461 } 3462 3463 spin_lock(&fs_info->qgroup_lock); 3464 3465 dstgroup = add_qgroup_rb(fs_info, prealloc, objectid); 3466 prealloc = NULL; 3467 3468 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 3469 dstgroup->lim_flags = inherit->lim.flags; 3470 dstgroup->max_rfer = inherit->lim.max_rfer; 3471 dstgroup->max_excl = inherit->lim.max_excl; 3472 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 3473 dstgroup->rsv_excl = inherit->lim.rsv_excl; 3474 3475 qgroup_dirty(fs_info, dstgroup); 3476 } 3477 3478 if (srcid && btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL) { 3479 srcgroup = find_qgroup_rb(fs_info, srcid); 3480 if (!srcgroup) 3481 goto unlock; 3482 3483 /* 3484 * We call inherit after we clone the root in order to make sure 3485 * our counts don't go crazy, so at this point the only 3486 * difference between the two roots should be the root node. 3487 */ 3488 level_size = fs_info->nodesize; 3489 dstgroup->rfer = srcgroup->rfer; 3490 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 3491 dstgroup->excl = level_size; 3492 dstgroup->excl_cmpr = level_size; 3493 srcgroup->excl = level_size; 3494 srcgroup->excl_cmpr = level_size; 3495 3496 /* inherit the limit info */ 3497 dstgroup->lim_flags = srcgroup->lim_flags; 3498 dstgroup->max_rfer = srcgroup->max_rfer; 3499 dstgroup->max_excl = srcgroup->max_excl; 3500 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 3501 dstgroup->rsv_excl = srcgroup->rsv_excl; 3502 3503 qgroup_dirty(fs_info, dstgroup); 3504 qgroup_dirty(fs_info, srcgroup); 3505 3506 /* 3507 * If the source qgroup has parent but the new one doesn't, 3508 * we need a full rescan. 3509 */ 3510 if (!inherit && !list_empty(&srcgroup->groups)) 3511 need_rescan = true; 3512 } 3513 3514 if (!inherit) 3515 goto unlock; 3516 3517 i_qgroups = (u64 *)(inherit + 1); 3518 for (int i = 0; i < inherit->num_qgroups; i++) { 3519 if (*i_qgroups) { 3520 ret = add_relation_rb(fs_info, qlist_prealloc[i], objectid, 3521 *i_qgroups); 3522 qlist_prealloc[i] = NULL; 3523 if (ret) 3524 goto unlock; 3525 } 3526 if (srcid) { 3527 /* Check if we can do a quick inherit. */ 3528 ret = qgroup_snapshot_quick_inherit(fs_info, srcid, *i_qgroups); 3529 if (ret < 0) 3530 goto unlock; 3531 if (ret > 0) 3532 need_rescan = true; 3533 ret = 0; 3534 } 3535 ++i_qgroups; 3536 } 3537 3538 for (int i = 0; i < inherit->num_ref_copies; i++, i_qgroups += 2) { 3539 struct btrfs_qgroup *src; 3540 struct btrfs_qgroup *dst; 3541 3542 if (!i_qgroups[0] || !i_qgroups[1]) 3543 continue; 3544 3545 src = find_qgroup_rb(fs_info, i_qgroups[0]); 3546 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 3547 3548 if (!src || !dst) { 3549 ret = -EINVAL; 3550 goto unlock; 3551 } 3552 3553 dst->rfer = src->rfer - level_size; 3554 dst->rfer_cmpr = src->rfer_cmpr - level_size; 3555 3556 /* Manually tweaking numbers certainly needs a rescan */ 3557 need_rescan = true; 3558 } 3559 for (int i = 0; i < inherit->num_excl_copies; i++, i_qgroups += 2) { 3560 struct btrfs_qgroup *src; 3561 struct btrfs_qgroup *dst; 3562 3563 if (!i_qgroups[0] || !i_qgroups[1]) 3564 continue; 3565 3566 src = find_qgroup_rb(fs_info, i_qgroups[0]); 3567 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 3568 3569 if (!src || !dst) { 3570 ret = -EINVAL; 3571 goto unlock; 3572 } 3573 3574 dst->excl = src->excl + level_size; 3575 dst->excl_cmpr = src->excl_cmpr + level_size; 3576 need_rescan = true; 3577 } 3578 3579 unlock: 3580 spin_unlock(&fs_info->qgroup_lock); 3581 if (!ret) 3582 ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup); 3583 out: 3584 if (!committing) 3585 mutex_unlock(&fs_info->qgroup_ioctl_lock); 3586 if (need_rescan) 3587 qgroup_mark_inconsistent(fs_info, "qgroup inherit needs a rescan"); 3588 if (qlist_prealloc) { 3589 for (int i = 0; i < inherit->num_qgroups; i++) 3590 kfree(qlist_prealloc[i]); 3591 kfree(qlist_prealloc); 3592 } 3593 if (free_inherit) 3594 kfree(inherit); 3595 kfree(prealloc); 3596 return ret; 3597 } 3598 3599 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 3600 { 3601 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 3602 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) 3603 return false; 3604 3605 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 3606 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) 3607 return false; 3608 3609 return true; 3610 } 3611 3612 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, 3613 enum btrfs_qgroup_rsv_type type) 3614 { 3615 struct btrfs_qgroup *qgroup; 3616 struct btrfs_fs_info *fs_info = root->fs_info; 3617 u64 ref_root = btrfs_root_id(root); 3618 int ret = 0; 3619 LIST_HEAD(qgroup_list); 3620 3621 if (!btrfs_is_fstree(ref_root)) 3622 return 0; 3623 3624 if (num_bytes == 0) 3625 return 0; 3626 3627 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 3628 capable(CAP_SYS_RESOURCE)) 3629 enforce = false; 3630 3631 spin_lock(&fs_info->qgroup_lock); 3632 if (!fs_info->quota_root) 3633 goto out; 3634 3635 qgroup = find_qgroup_rb(fs_info, ref_root); 3636 if (!qgroup) 3637 goto out; 3638 3639 qgroup_iterator_add(&qgroup_list, qgroup); 3640 list_for_each_entry(qgroup, &qgroup_list, iterator) { 3641 struct btrfs_qgroup_list *glist; 3642 3643 if (enforce && !qgroup_check_limits(qgroup, num_bytes)) { 3644 ret = -EDQUOT; 3645 goto out; 3646 } 3647 3648 list_for_each_entry(glist, &qgroup->groups, next_group) 3649 qgroup_iterator_add(&qgroup_list, glist->group); 3650 } 3651 3652 ret = 0; 3653 /* 3654 * no limits exceeded, now record the reservation into all qgroups 3655 */ 3656 list_for_each_entry(qgroup, &qgroup_list, iterator) 3657 qgroup_rsv_add(fs_info, qgroup, num_bytes, type); 3658 3659 out: 3660 qgroup_iterator_clean(&qgroup_list); 3661 spin_unlock(&fs_info->qgroup_lock); 3662 return ret; 3663 } 3664 3665 /* 3666 * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0 3667 * qgroup). 3668 * 3669 * Will handle all higher level qgroup too. 3670 * 3671 * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup. 3672 * This special case is only used for META_PERTRANS type. 3673 */ 3674 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 3675 u64 ref_root, u64 num_bytes, 3676 enum btrfs_qgroup_rsv_type type) 3677 { 3678 struct btrfs_qgroup *qgroup; 3679 LIST_HEAD(qgroup_list); 3680 3681 if (!btrfs_is_fstree(ref_root)) 3682 return; 3683 3684 if (num_bytes == 0) 3685 return; 3686 3687 if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) { 3688 WARN(1, "%s: Invalid type to free", __func__); 3689 return; 3690 } 3691 spin_lock(&fs_info->qgroup_lock); 3692 3693 if (!fs_info->quota_root) 3694 goto out; 3695 3696 qgroup = find_qgroup_rb(fs_info, ref_root); 3697 if (!qgroup) 3698 goto out; 3699 3700 if (num_bytes == (u64)-1) 3701 /* 3702 * We're freeing all pertrans rsv, get reserved value from 3703 * level 0 qgroup as real num_bytes to free. 3704 */ 3705 num_bytes = qgroup->rsv.values[type]; 3706 3707 qgroup_iterator_add(&qgroup_list, qgroup); 3708 list_for_each_entry(qgroup, &qgroup_list, iterator) { 3709 struct btrfs_qgroup_list *glist; 3710 3711 qgroup_rsv_release(fs_info, qgroup, num_bytes, type); 3712 list_for_each_entry(glist, &qgroup->groups, next_group) { 3713 qgroup_iterator_add(&qgroup_list, glist->group); 3714 } 3715 } 3716 out: 3717 qgroup_iterator_clean(&qgroup_list); 3718 spin_unlock(&fs_info->qgroup_lock); 3719 } 3720 3721 /* 3722 * Check if the leaf is the last leaf. Which means all node pointers 3723 * are at their last position. 3724 */ 3725 static bool is_last_leaf(struct btrfs_path *path) 3726 { 3727 int i; 3728 3729 for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 3730 if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1) 3731 return false; 3732 } 3733 return true; 3734 } 3735 3736 /* 3737 * returns < 0 on error, 0 when more leafs are to be scanned. 3738 * returns 1 when done. 3739 */ 3740 static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, 3741 struct btrfs_path *path) 3742 { 3743 struct btrfs_fs_info *fs_info = trans->fs_info; 3744 struct btrfs_root *extent_root; 3745 struct btrfs_key found; 3746 struct extent_buffer *scratch_leaf = NULL; 3747 u64 num_bytes; 3748 bool done; 3749 int slot; 3750 int ret; 3751 3752 if (!btrfs_qgroup_full_accounting(fs_info)) 3753 return 1; 3754 3755 mutex_lock(&fs_info->qgroup_rescan_lock); 3756 extent_root = btrfs_extent_root(fs_info, 3757 fs_info->qgroup_rescan_progress.objectid); 3758 if (unlikely(!extent_root)) { 3759 btrfs_err(fs_info, 3760 "missing extent root for extent at bytenr %llu", 3761 fs_info->qgroup_rescan_progress.objectid); 3762 mutex_unlock(&fs_info->qgroup_rescan_lock); 3763 return -EUCLEAN; 3764 } 3765 3766 ret = btrfs_search_slot_for_read(extent_root, 3767 &fs_info->qgroup_rescan_progress, 3768 path, 1, 0); 3769 3770 btrfs_debug(fs_info, 3771 "current progress key " BTRFS_KEY_FMT ", search_slot ret %d", 3772 BTRFS_KEY_FMT_VALUE(&fs_info->qgroup_rescan_progress), ret); 3773 3774 if (ret) { 3775 /* 3776 * The rescan is about to end, we will not be scanning any 3777 * further blocks. We cannot unset the RESCAN flag here, because 3778 * we want to commit the transaction if everything went well. 3779 * To make the live accounting work in this phase, we set our 3780 * scan progress pointer such that every real extent objectid 3781 * will be smaller. 3782 */ 3783 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3784 btrfs_release_path(path); 3785 mutex_unlock(&fs_info->qgroup_rescan_lock); 3786 return ret; 3787 } 3788 done = is_last_leaf(path); 3789 3790 btrfs_item_key_to_cpu(path->nodes[0], &found, 3791 btrfs_header_nritems(path->nodes[0]) - 1); 3792 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 3793 3794 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 3795 if (!scratch_leaf) { 3796 ret = -ENOMEM; 3797 mutex_unlock(&fs_info->qgroup_rescan_lock); 3798 goto out; 3799 } 3800 slot = path->slots[0]; 3801 btrfs_release_path(path); 3802 mutex_unlock(&fs_info->qgroup_rescan_lock); 3803 3804 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 3805 struct btrfs_backref_walk_ctx ctx = { 0 }; 3806 3807 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 3808 if (found.type != BTRFS_EXTENT_ITEM_KEY && 3809 found.type != BTRFS_METADATA_ITEM_KEY) 3810 continue; 3811 if (found.type == BTRFS_METADATA_ITEM_KEY) 3812 num_bytes = fs_info->nodesize; 3813 else 3814 num_bytes = found.offset; 3815 3816 ctx.bytenr = found.objectid; 3817 ctx.fs_info = fs_info; 3818 3819 ret = btrfs_find_all_roots(&ctx, false); 3820 if (ret < 0) 3821 goto out; 3822 /* For rescan, just pass old_roots as NULL */ 3823 ret = btrfs_qgroup_account_extent(trans, found.objectid, 3824 num_bytes, NULL, ctx.roots); 3825 if (ret < 0) 3826 goto out; 3827 } 3828 out: 3829 if (scratch_leaf) 3830 free_extent_buffer(scratch_leaf); 3831 3832 if (done && !ret) { 3833 ret = 1; 3834 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3835 } 3836 return ret; 3837 } 3838 3839 static bool rescan_should_stop(struct btrfs_fs_info *fs_info) 3840 { 3841 if (btrfs_fs_closing(fs_info)) 3842 return true; 3843 if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) 3844 return true; 3845 if (!btrfs_qgroup_enabled(fs_info)) 3846 return true; 3847 if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) 3848 return true; 3849 return false; 3850 } 3851 3852 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 3853 { 3854 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 3855 qgroup_rescan_work); 3856 struct btrfs_path *path; 3857 struct btrfs_trans_handle *trans = NULL; 3858 int ret = 0; 3859 bool stopped = false; 3860 bool did_leaf_rescans = false; 3861 3862 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) 3863 return; 3864 3865 path = btrfs_alloc_path(); 3866 if (!path) { 3867 ret = -ENOMEM; 3868 goto out; 3869 } 3870 /* 3871 * Rescan should only search for commit root, and any later difference 3872 * should be recorded by qgroup 3873 */ 3874 path->search_commit_root = true; 3875 path->skip_locking = true; 3876 3877 while (!ret && !(stopped = rescan_should_stop(fs_info))) { 3878 trans = btrfs_start_transaction(fs_info->fs_root, 0); 3879 if (IS_ERR(trans)) { 3880 ret = PTR_ERR(trans); 3881 break; 3882 } 3883 3884 ret = qgroup_rescan_leaf(trans, path); 3885 did_leaf_rescans = true; 3886 3887 if (ret > 0) 3888 btrfs_commit_transaction(trans); 3889 else 3890 btrfs_end_transaction(trans); 3891 } 3892 3893 out: 3894 btrfs_free_path(path); 3895 3896 mutex_lock(&fs_info->qgroup_rescan_lock); 3897 if (ret > 0 && 3898 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 3899 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3900 } else if (ret < 0 || stopped) { 3901 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3902 } 3903 mutex_unlock(&fs_info->qgroup_rescan_lock); 3904 3905 /* 3906 * Only update status, since the previous part has already updated the 3907 * qgroup info, and only if we did any actual work. This also prevents 3908 * race with a concurrent quota disable, which has already set 3909 * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at 3910 * btrfs_quota_disable(). 3911 */ 3912 if (did_leaf_rescans) { 3913 trans = btrfs_start_transaction(fs_info->quota_root, 1); 3914 if (IS_ERR(trans)) { 3915 ret = PTR_ERR(trans); 3916 trans = NULL; 3917 btrfs_err(fs_info, 3918 "fail to start transaction for status update: %d", 3919 ret); 3920 } 3921 } else { 3922 trans = NULL; 3923 } 3924 3925 mutex_lock(&fs_info->qgroup_rescan_lock); 3926 if (!stopped || 3927 fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) 3928 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3929 if (trans) { 3930 int ret2 = update_qgroup_status_item(trans); 3931 3932 if (ret2 < 0) { 3933 ret = ret2; 3934 btrfs_err(fs_info, "fail to update qgroup status: %d", ret); 3935 } 3936 } 3937 fs_info->qgroup_rescan_running = false; 3938 fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN; 3939 complete_all(&fs_info->qgroup_rescan_completion); 3940 mutex_unlock(&fs_info->qgroup_rescan_lock); 3941 3942 if (!trans) 3943 return; 3944 3945 btrfs_end_transaction(trans); 3946 3947 if (stopped) { 3948 btrfs_info(fs_info, "qgroup scan paused"); 3949 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) { 3950 btrfs_info(fs_info, "qgroup scan cancelled"); 3951 } else if (ret >= 0) { 3952 btrfs_info(fs_info, "qgroup scan completed%s", 3953 ret > 0 ? " (inconsistency flag cleared)" : ""); 3954 } else { 3955 btrfs_err(fs_info, "qgroup scan failed with %d", ret); 3956 } 3957 } 3958 3959 /* 3960 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 3961 * memory required for the rescan context. 3962 */ 3963 static int 3964 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 3965 int init_flags) 3966 { 3967 int ret = 0; 3968 3969 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE) { 3970 btrfs_warn(fs_info, "qgroup rescan init failed, running in simple mode"); 3971 return -EINVAL; 3972 } 3973 3974 if (!init_flags) { 3975 /* we're resuming qgroup rescan at mount time */ 3976 if (!(fs_info->qgroup_flags & 3977 BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { 3978 btrfs_debug(fs_info, 3979 "qgroup rescan init failed, qgroup rescan is not queued"); 3980 ret = -EINVAL; 3981 } else if (!(fs_info->qgroup_flags & 3982 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3983 btrfs_debug(fs_info, 3984 "qgroup rescan init failed, qgroup is not enabled"); 3985 ret = -ENOTCONN; 3986 } 3987 3988 if (ret) 3989 return ret; 3990 } 3991 3992 mutex_lock(&fs_info->qgroup_rescan_lock); 3993 3994 if (init_flags) { 3995 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 3996 ret = -EINPROGRESS; 3997 } else if (!(fs_info->qgroup_flags & 3998 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3999 btrfs_debug(fs_info, 4000 "qgroup rescan init failed, qgroup is not enabled"); 4001 ret = -ENOTCONN; 4002 } else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) { 4003 /* Quota disable is in progress */ 4004 ret = -EBUSY; 4005 } 4006 4007 if (ret) { 4008 mutex_unlock(&fs_info->qgroup_rescan_lock); 4009 return ret; 4010 } 4011 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 4012 } 4013 4014 memset(&fs_info->qgroup_rescan_progress, 0, 4015 sizeof(fs_info->qgroup_rescan_progress)); 4016 fs_info->qgroup_flags &= ~(BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN | 4017 BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING); 4018 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 4019 init_completion(&fs_info->qgroup_rescan_completion); 4020 mutex_unlock(&fs_info->qgroup_rescan_lock); 4021 4022 btrfs_init_work(&fs_info->qgroup_rescan_work, 4023 btrfs_qgroup_rescan_worker, NULL); 4024 return 0; 4025 } 4026 4027 static void 4028 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 4029 { 4030 struct rb_node *n; 4031 struct btrfs_qgroup *qgroup; 4032 4033 spin_lock(&fs_info->qgroup_lock); 4034 /* clear all current qgroup tracking information */ 4035 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 4036 qgroup = rb_entry(n, struct btrfs_qgroup, node); 4037 qgroup->rfer = 0; 4038 qgroup->rfer_cmpr = 0; 4039 qgroup->excl = 0; 4040 qgroup->excl_cmpr = 0; 4041 qgroup_dirty(fs_info, qgroup); 4042 } 4043 spin_unlock(&fs_info->qgroup_lock); 4044 } 4045 4046 int 4047 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 4048 { 4049 int ret = 0; 4050 4051 ret = qgroup_rescan_init(fs_info, 0, 1); 4052 if (ret) 4053 return ret; 4054 4055 /* 4056 * We have set the rescan_progress to 0, which means no more 4057 * delayed refs will be accounted by btrfs_qgroup_account_ref. 4058 * However, btrfs_qgroup_account_ref may be right after its call 4059 * to btrfs_find_all_roots, in which case it would still do the 4060 * accounting. 4061 * To solve this, we're committing the transaction, which will 4062 * ensure we run all delayed refs and only after that, we are 4063 * going to clear all tracking information for a clean start. 4064 */ 4065 4066 ret = btrfs_commit_current_transaction(fs_info->fs_root); 4067 if (ret) { 4068 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 4069 return ret; 4070 } 4071 4072 qgroup_rescan_zero_tracking(fs_info); 4073 4074 mutex_lock(&fs_info->qgroup_rescan_lock); 4075 /* 4076 * The rescan worker is only for full accounting qgroups, check if it's 4077 * enabled as it is pointless to queue it otherwise. A concurrent quota 4078 * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED. 4079 */ 4080 if (btrfs_qgroup_full_accounting(fs_info)) { 4081 fs_info->qgroup_rescan_running = true; 4082 btrfs_queue_work(fs_info->qgroup_rescan_workers, 4083 &fs_info->qgroup_rescan_work); 4084 } else { 4085 ret = -ENOTCONN; 4086 } 4087 mutex_unlock(&fs_info->qgroup_rescan_lock); 4088 4089 return ret; 4090 } 4091 4092 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 4093 bool interruptible) 4094 { 4095 int running; 4096 int ret = 0; 4097 4098 mutex_lock(&fs_info->qgroup_rescan_lock); 4099 running = fs_info->qgroup_rescan_running; 4100 mutex_unlock(&fs_info->qgroup_rescan_lock); 4101 4102 if (!running) 4103 return 0; 4104 4105 if (interruptible) 4106 ret = wait_for_completion_interruptible( 4107 &fs_info->qgroup_rescan_completion); 4108 else 4109 wait_for_completion(&fs_info->qgroup_rescan_completion); 4110 4111 return ret; 4112 } 4113 4114 /* 4115 * this is only called from open_ctree where we're still single threaded, thus 4116 * locking is omitted here. 4117 */ 4118 void 4119 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 4120 { 4121 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 4122 mutex_lock(&fs_info->qgroup_rescan_lock); 4123 fs_info->qgroup_rescan_running = true; 4124 btrfs_queue_work(fs_info->qgroup_rescan_workers, 4125 &fs_info->qgroup_rescan_work); 4126 mutex_unlock(&fs_info->qgroup_rescan_lock); 4127 } 4128 } 4129 4130 #define rbtree_iterate_from_safe(node, next, start) \ 4131 for (node = start; node && ({ next = rb_next(node); 1;}); node = next) 4132 4133 static int qgroup_unreserve_range(struct btrfs_inode *inode, 4134 struct extent_changeset *reserved, u64 start, 4135 u64 len) 4136 { 4137 struct rb_node *node; 4138 struct rb_node *next; 4139 struct ulist_node *entry; 4140 int ret = 0; 4141 4142 node = reserved->range_changed.root.rb_node; 4143 if (!node) 4144 return 0; 4145 while (node) { 4146 entry = rb_entry(node, struct ulist_node, rb_node); 4147 if (entry->val < start) 4148 node = node->rb_right; 4149 else 4150 node = node->rb_left; 4151 } 4152 4153 if (entry->val > start && rb_prev(&entry->rb_node)) 4154 entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, 4155 rb_node); 4156 4157 rbtree_iterate_from_safe(node, next, &entry->rb_node) { 4158 u64 entry_start; 4159 u64 entry_end; 4160 u64 entry_len; 4161 int clear_ret; 4162 4163 entry = rb_entry(node, struct ulist_node, rb_node); 4164 entry_start = entry->val; 4165 entry_end = entry->aux; 4166 entry_len = entry_end - entry_start + 1; 4167 4168 if (entry_start >= start + len) 4169 break; 4170 if (entry_start + entry_len <= start) 4171 continue; 4172 /* 4173 * Now the entry is in [start, start + len), revert the 4174 * EXTENT_QGROUP_RESERVED bit. 4175 */ 4176 clear_ret = btrfs_clear_extent_bit(&inode->io_tree, entry_start, entry_end, 4177 EXTENT_QGROUP_RESERVED, NULL); 4178 if (!ret && clear_ret < 0) 4179 ret = clear_ret; 4180 4181 ulist_del(&reserved->range_changed, entry->val, entry->aux); 4182 if (likely(reserved->bytes_changed >= entry_len)) { 4183 reserved->bytes_changed -= entry_len; 4184 } else { 4185 WARN_ON(1); 4186 reserved->bytes_changed = 0; 4187 } 4188 } 4189 4190 return ret; 4191 } 4192 4193 /* 4194 * Try to free some space for qgroup. 4195 * 4196 * For qgroup, there are only 3 ways to free qgroup space: 4197 * - Flush nodatacow write 4198 * Any nodatacow write will free its reserved data space at run_delalloc_range(). 4199 * In theory, we should only flush nodatacow inodes, but it's not yet 4200 * possible, so we need to flush the whole root. 4201 * 4202 * - Wait for ordered extents 4203 * When ordered extents are finished, their reserved metadata is finally 4204 * converted to per_trans status, which can be freed by later commit 4205 * transaction. 4206 * 4207 * - Commit transaction 4208 * This would free the meta_per_trans space. 4209 * In theory this shouldn't provide much space, but any more qgroup space 4210 * is needed. 4211 */ 4212 static int try_flush_qgroup(struct btrfs_root *root) 4213 { 4214 int ret; 4215 4216 /* Can't hold an open transaction or we run the risk of deadlocking. */ 4217 ASSERT(current->journal_info == NULL); 4218 if (WARN_ON(current->journal_info)) 4219 return 0; 4220 4221 /* 4222 * We don't want to run flush again and again, so if there is a running 4223 * one, we won't try to start a new flush, but exit directly. 4224 */ 4225 if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { 4226 wait_event(root->qgroup_flush_wait, 4227 !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); 4228 return 0; 4229 } 4230 4231 ret = btrfs_start_delalloc_snapshot(root, true); 4232 if (ret < 0) 4233 goto out; 4234 btrfs_wait_ordered_extents(root, U64_MAX, NULL); 4235 4236 /* 4237 * After waiting for ordered extents run delayed iputs in order to free 4238 * space from unlinked files before committing the current transaction, 4239 * as ordered extents may have been holding the last reference of an 4240 * inode and they add a delayed iput when they complete. 4241 */ 4242 btrfs_run_delayed_iputs(root->fs_info); 4243 btrfs_wait_on_delayed_iputs(root->fs_info); 4244 4245 ret = btrfs_commit_current_transaction(root); 4246 out: 4247 clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); 4248 wake_up(&root->qgroup_flush_wait); 4249 return ret; 4250 } 4251 4252 static int qgroup_reserve_data(struct btrfs_inode *inode, 4253 struct extent_changeset **reserved_ret, u64 start, 4254 u64 len) 4255 { 4256 struct btrfs_root *root = inode->root; 4257 struct extent_changeset *reserved; 4258 bool new_reserved = false; 4259 u64 orig_reserved; 4260 u64 to_reserve; 4261 int ret; 4262 4263 if (btrfs_qgroup_mode(root->fs_info) == BTRFS_QGROUP_MODE_DISABLED || 4264 !btrfs_is_fstree(btrfs_root_id(root)) || len == 0) 4265 return 0; 4266 4267 /* @reserved parameter is mandatory for qgroup */ 4268 if (WARN_ON(!reserved_ret)) 4269 return -EINVAL; 4270 if (!*reserved_ret) { 4271 new_reserved = true; 4272 *reserved_ret = extent_changeset_alloc(); 4273 if (!*reserved_ret) 4274 return -ENOMEM; 4275 } 4276 reserved = *reserved_ret; 4277 /* Record already reserved space */ 4278 orig_reserved = reserved->bytes_changed; 4279 ret = btrfs_set_record_extent_bits(&inode->io_tree, start, 4280 start + len - 1, EXTENT_QGROUP_RESERVED, 4281 reserved); 4282 4283 /* Newly reserved space */ 4284 to_reserve = reserved->bytes_changed - orig_reserved; 4285 trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, 4286 to_reserve, QGROUP_RESERVE); 4287 if (ret < 0) 4288 goto out; 4289 ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); 4290 if (ret < 0) 4291 goto cleanup; 4292 4293 return ret; 4294 4295 cleanup: 4296 qgroup_unreserve_range(inode, reserved, start, len); 4297 out: 4298 if (new_reserved) { 4299 extent_changeset_free(reserved); 4300 *reserved_ret = NULL; 4301 } 4302 return ret; 4303 } 4304 4305 /* 4306 * Reserve qgroup space for range [start, start + len). 4307 * 4308 * This function will either reserve space from related qgroups or do nothing 4309 * if the range is already reserved. 4310 * 4311 * Return 0 for successful reservation 4312 * Return <0 for error (including -EQUOT) 4313 * 4314 * NOTE: This function may sleep for memory allocation, dirty page flushing and 4315 * commit transaction. So caller should not hold any dirty page locked. 4316 */ 4317 int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, 4318 struct extent_changeset **reserved_ret, u64 start, 4319 u64 len) 4320 { 4321 int ret; 4322 4323 ret = qgroup_reserve_data(inode, reserved_ret, start, len); 4324 if (ret <= 0 && ret != -EDQUOT) 4325 return ret; 4326 4327 ret = try_flush_qgroup(inode->root); 4328 if (ret < 0) 4329 return ret; 4330 return qgroup_reserve_data(inode, reserved_ret, start, len); 4331 } 4332 4333 /* Free ranges specified by @reserved, normally in error path */ 4334 static int qgroup_free_reserved_data(struct btrfs_inode *inode, 4335 struct extent_changeset *reserved, 4336 u64 start, u64 len, u64 *freed_ret) 4337 { 4338 struct btrfs_root *root = inode->root; 4339 struct ulist_node *unode; 4340 struct ulist_iterator uiter; 4341 struct extent_changeset changeset; 4342 u64 freed = 0; 4343 int ret; 4344 4345 extent_changeset_init_bytes_only(&changeset); 4346 len = round_up(start + len, root->fs_info->sectorsize); 4347 start = round_down(start, root->fs_info->sectorsize); 4348 4349 ULIST_ITER_INIT(&uiter); 4350 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 4351 u64 range_start = unode->val; 4352 /* unode->aux is the inclusive end */ 4353 u64 range_len = unode->aux - range_start + 1; 4354 u64 free_start; 4355 u64 free_len; 4356 4357 extent_changeset_release(&changeset); 4358 4359 /* Only free range in range [start, start + len) */ 4360 if (range_start >= start + len || 4361 range_start + range_len <= start) 4362 continue; 4363 free_start = max(range_start, start); 4364 free_len = min(start + len, range_start + range_len) - 4365 free_start; 4366 /* 4367 * TODO: To also modify reserved->ranges_reserved to reflect 4368 * the modification. 4369 * 4370 * However as long as we free qgroup reserved according to 4371 * EXTENT_QGROUP_RESERVED, we won't double free. 4372 * So not need to rush. 4373 */ 4374 ret = btrfs_clear_record_extent_bits(&inode->io_tree, free_start, 4375 free_start + free_len - 1, 4376 EXTENT_QGROUP_RESERVED, 4377 &changeset); 4378 if (ret < 0) 4379 goto out; 4380 freed += changeset.bytes_changed; 4381 } 4382 btrfs_qgroup_free_refroot(root->fs_info, btrfs_root_id(root), freed, 4383 BTRFS_QGROUP_RSV_DATA); 4384 if (freed_ret) 4385 *freed_ret = freed; 4386 ret = 0; 4387 out: 4388 extent_changeset_release(&changeset); 4389 return ret; 4390 } 4391 4392 static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, 4393 struct extent_changeset *reserved, u64 start, u64 len, 4394 u64 *released, int free) 4395 { 4396 struct extent_changeset changeset; 4397 int trace_op = QGROUP_RELEASE; 4398 int ret; 4399 4400 if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) { 4401 return btrfs_clear_record_extent_bits(&inode->io_tree, start, 4402 start + len - 1, 4403 EXTENT_QGROUP_RESERVED, NULL); 4404 } 4405 4406 /* In release case, we shouldn't have @reserved */ 4407 WARN_ON(!free && reserved); 4408 if (free && reserved) 4409 return qgroup_free_reserved_data(inode, reserved, start, len, released); 4410 extent_changeset_init_bytes_only(&changeset); 4411 ret = btrfs_clear_record_extent_bits(&inode->io_tree, start, start + len - 1, 4412 EXTENT_QGROUP_RESERVED, &changeset); 4413 if (ret < 0) 4414 goto out; 4415 4416 if (free) 4417 trace_op = QGROUP_FREE; 4418 trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len, 4419 changeset.bytes_changed, trace_op); 4420 if (free) 4421 btrfs_qgroup_free_refroot(inode->root->fs_info, 4422 btrfs_root_id(inode->root), 4423 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 4424 if (released) 4425 *released = changeset.bytes_changed; 4426 out: 4427 extent_changeset_release(&changeset); 4428 return ret; 4429 } 4430 4431 /* 4432 * Free a reserved space range from io_tree and related qgroups 4433 * 4434 * Should be called when a range of pages get invalidated before reaching disk. 4435 * Or for error cleanup case. 4436 * if @reserved is given, only reserved range in [@start, @start + @len) will 4437 * be freed. 4438 * 4439 * For data written to disk, use btrfs_qgroup_release_data(). 4440 * 4441 * NOTE: This function may sleep for memory allocation. 4442 */ 4443 int btrfs_qgroup_free_data(struct btrfs_inode *inode, 4444 struct extent_changeset *reserved, 4445 u64 start, u64 len, u64 *freed) 4446 { 4447 return __btrfs_qgroup_release_data(inode, reserved, start, len, freed, 1); 4448 } 4449 4450 /* 4451 * Release a reserved space range from io_tree only. 4452 * 4453 * Should be called when a range of pages get written to disk and corresponding 4454 * FILE_EXTENT is inserted into corresponding root. 4455 * 4456 * Since new qgroup accounting framework will only update qgroup numbers at 4457 * commit_transaction() time, its reserved space shouldn't be freed from 4458 * related qgroups. 4459 * 4460 * But we should release the range from io_tree, to allow further write to be 4461 * COWed. 4462 * 4463 * NOTE: This function may sleep for memory allocation. 4464 */ 4465 int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len, u64 *released) 4466 { 4467 return __btrfs_qgroup_release_data(inode, NULL, start, len, released, 0); 4468 } 4469 4470 static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, 4471 enum btrfs_qgroup_rsv_type type) 4472 { 4473 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 4474 type != BTRFS_QGROUP_RSV_META_PERTRANS) 4475 return; 4476 if (num_bytes == 0) 4477 return; 4478 4479 spin_lock(&root->qgroup_meta_rsv_lock); 4480 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 4481 root->qgroup_meta_rsv_prealloc += num_bytes; 4482 else 4483 root->qgroup_meta_rsv_pertrans += num_bytes; 4484 spin_unlock(&root->qgroup_meta_rsv_lock); 4485 } 4486 4487 static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, 4488 enum btrfs_qgroup_rsv_type type) 4489 { 4490 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 4491 type != BTRFS_QGROUP_RSV_META_PERTRANS) 4492 return 0; 4493 if (num_bytes == 0) 4494 return 0; 4495 4496 spin_lock(&root->qgroup_meta_rsv_lock); 4497 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) { 4498 num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc, 4499 num_bytes); 4500 root->qgroup_meta_rsv_prealloc -= num_bytes; 4501 } else { 4502 num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans, 4503 num_bytes); 4504 root->qgroup_meta_rsv_pertrans -= num_bytes; 4505 } 4506 spin_unlock(&root->qgroup_meta_rsv_lock); 4507 return num_bytes; 4508 } 4509 4510 static int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 4511 enum btrfs_qgroup_rsv_type type, bool enforce) 4512 { 4513 struct btrfs_fs_info *fs_info = root->fs_info; 4514 int ret; 4515 4516 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || 4517 !btrfs_is_fstree(btrfs_root_id(root)) || num_bytes == 0) 4518 return 0; 4519 4520 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 4521 trace_btrfs_qgroup_meta_reserve(root, (s64)num_bytes, type); 4522 ret = qgroup_reserve(root, num_bytes, enforce, type); 4523 if (ret < 0) 4524 return ret; 4525 /* 4526 * Record what we have reserved into root. 4527 * 4528 * To avoid quota disabled->enabled underflow. 4529 * In that case, we may try to free space we haven't reserved 4530 * (since quota was disabled), so record what we reserved into root. 4531 * And ensure later release won't underflow this number. 4532 */ 4533 add_root_meta_rsv(root, num_bytes, type); 4534 return ret; 4535 } 4536 4537 int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root, int num_bytes, 4538 bool enforce, bool noflush) 4539 { 4540 int ret; 4541 4542 ret = btrfs_qgroup_reserve_meta(root, num_bytes, 4543 BTRFS_QGROUP_RSV_META_PREALLOC, enforce); 4544 if ((ret <= 0 && ret != -EDQUOT) || noflush) 4545 return ret; 4546 4547 ret = try_flush_qgroup(root); 4548 if (ret < 0) 4549 return ret; 4550 return btrfs_qgroup_reserve_meta(root, num_bytes, 4551 BTRFS_QGROUP_RSV_META_PREALLOC, enforce); 4552 } 4553 4554 /* 4555 * Per-transaction meta reservation should be all freed at transaction commit 4556 * time 4557 */ 4558 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) 4559 { 4560 struct btrfs_fs_info *fs_info = root->fs_info; 4561 4562 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || 4563 !btrfs_is_fstree(btrfs_root_id(root))) 4564 return; 4565 4566 /* TODO: Update trace point to handle such free */ 4567 trace_btrfs_qgroup_meta_free_all_pertrans(root); 4568 /* Special value -1 means to free all reserved space */ 4569 btrfs_qgroup_free_refroot(fs_info, btrfs_root_id(root), (u64)-1, 4570 BTRFS_QGROUP_RSV_META_PERTRANS); 4571 } 4572 4573 void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root, int num_bytes) 4574 { 4575 struct btrfs_fs_info *fs_info = root->fs_info; 4576 4577 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || 4578 !btrfs_is_fstree(btrfs_root_id(root))) 4579 return; 4580 4581 /* 4582 * reservation for META_PREALLOC can happen before quota is enabled, 4583 * which can lead to underflow. 4584 * Here ensure we will only free what we really have reserved. 4585 */ 4586 num_bytes = sub_root_meta_rsv(root, num_bytes, 4587 BTRFS_QGROUP_RSV_META_PREALLOC); 4588 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 4589 trace_btrfs_qgroup_meta_reserve(root, -(s64)num_bytes, 4590 BTRFS_QGROUP_RSV_META_PREALLOC); 4591 btrfs_qgroup_free_refroot(fs_info, btrfs_root_id(root), num_bytes, 4592 BTRFS_QGROUP_RSV_META_PREALLOC); 4593 } 4594 4595 static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, 4596 int num_bytes) 4597 { 4598 struct btrfs_qgroup *qgroup; 4599 LIST_HEAD(qgroup_list); 4600 4601 if (num_bytes == 0) 4602 return; 4603 if (!fs_info->quota_root) 4604 return; 4605 4606 spin_lock(&fs_info->qgroup_lock); 4607 qgroup = find_qgroup_rb(fs_info, ref_root); 4608 if (!qgroup) 4609 goto out; 4610 4611 qgroup_iterator_add(&qgroup_list, qgroup); 4612 list_for_each_entry(qgroup, &qgroup_list, iterator) { 4613 struct btrfs_qgroup_list *glist; 4614 4615 qgroup_rsv_release(fs_info, qgroup, num_bytes, 4616 BTRFS_QGROUP_RSV_META_PREALLOC); 4617 if (!sb_rdonly(fs_info->sb)) 4618 qgroup_rsv_add(fs_info, qgroup, num_bytes, 4619 BTRFS_QGROUP_RSV_META_PERTRANS); 4620 4621 list_for_each_entry(glist, &qgroup->groups, next_group) 4622 qgroup_iterator_add(&qgroup_list, glist->group); 4623 } 4624 out: 4625 qgroup_iterator_clean(&qgroup_list); 4626 spin_unlock(&fs_info->qgroup_lock); 4627 } 4628 4629 /* 4630 * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS. 4631 * 4632 * This is called when preallocated meta reservation needs to be used. 4633 * Normally after btrfs_join_transaction() call. 4634 */ 4635 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) 4636 { 4637 struct btrfs_fs_info *fs_info = root->fs_info; 4638 4639 if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED || 4640 !btrfs_is_fstree(btrfs_root_id(root))) 4641 return; 4642 /* Same as btrfs_qgroup_free_meta_prealloc() */ 4643 num_bytes = sub_root_meta_rsv(root, num_bytes, 4644 BTRFS_QGROUP_RSV_META_PREALLOC); 4645 trace_btrfs_qgroup_meta_convert(root, num_bytes); 4646 qgroup_convert_meta(fs_info, btrfs_root_id(root), num_bytes); 4647 if (!sb_rdonly(fs_info->sb)) 4648 add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS); 4649 } 4650 4651 /* 4652 * Check qgroup reserved space leaking, normally at destroy inode 4653 * time 4654 */ 4655 void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) 4656 { 4657 struct extent_changeset changeset; 4658 struct ulist_node *unode; 4659 struct ulist_iterator iter; 4660 int ret; 4661 4662 extent_changeset_init(&changeset); 4663 ret = btrfs_clear_record_extent_bits(&inode->io_tree, 0, (u64)-1, 4664 EXTENT_QGROUP_RESERVED, &changeset); 4665 4666 WARN_ON(ret < 0); 4667 if (WARN_ON(changeset.bytes_changed)) { 4668 ASSERT(extent_changeset_tracks_ranges(&changeset)); 4669 ULIST_ITER_INIT(&iter); 4670 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 4671 btrfs_warn(inode->root->fs_info, 4672 "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu", 4673 btrfs_ino(inode), unode->val, unode->aux); 4674 } 4675 btrfs_qgroup_free_refroot(inode->root->fs_info, 4676 btrfs_root_id(inode->root), 4677 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 4678 4679 } 4680 extent_changeset_release(&changeset); 4681 } 4682 4683 void btrfs_qgroup_init_swapped_blocks( 4684 struct btrfs_qgroup_swapped_blocks *swapped_blocks) 4685 { 4686 int i; 4687 4688 spin_lock_init(&swapped_blocks->lock); 4689 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 4690 swapped_blocks->blocks[i] = RB_ROOT; 4691 swapped_blocks->swapped = false; 4692 } 4693 4694 /* 4695 * Delete all swapped blocks record of @root. 4696 * Every record here means we skipped a full subtree scan for qgroup. 4697 * 4698 * Gets called when committing one transaction. 4699 */ 4700 void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root) 4701 { 4702 struct btrfs_qgroup_swapped_blocks *swapped_blocks; 4703 int i; 4704 4705 swapped_blocks = &root->swapped_blocks; 4706 4707 spin_lock(&swapped_blocks->lock); 4708 if (!swapped_blocks->swapped) 4709 goto out; 4710 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4711 struct rb_root *cur_root = &swapped_blocks->blocks[i]; 4712 struct btrfs_qgroup_swapped_block *entry; 4713 struct btrfs_qgroup_swapped_block *next; 4714 4715 rbtree_postorder_for_each_entry_safe(entry, next, cur_root, 4716 node) 4717 kfree(entry); 4718 swapped_blocks->blocks[i] = RB_ROOT; 4719 } 4720 swapped_blocks->swapped = false; 4721 out: 4722 spin_unlock(&swapped_blocks->lock); 4723 } 4724 4725 static int qgroup_swapped_block_bytenr_key_cmp(const void *key, const struct rb_node *node) 4726 { 4727 const u64 *bytenr = key; 4728 const struct btrfs_qgroup_swapped_block *block = rb_entry(node, 4729 struct btrfs_qgroup_swapped_block, node); 4730 4731 if (block->subvol_bytenr < *bytenr) 4732 return -1; 4733 else if (block->subvol_bytenr > *bytenr) 4734 return 1; 4735 4736 return 0; 4737 } 4738 4739 static int qgroup_swapped_block_bytenr_cmp(struct rb_node *new, const struct rb_node *existing) 4740 { 4741 const struct btrfs_qgroup_swapped_block *new_block = rb_entry(new, 4742 struct btrfs_qgroup_swapped_block, node); 4743 4744 return qgroup_swapped_block_bytenr_key_cmp(&new_block->subvol_bytenr, existing); 4745 } 4746 4747 /* 4748 * Add subtree roots record into @subvol_root. 4749 * 4750 * @subvol_root: tree root of the subvolume tree get swapped 4751 * @bg: block group under balance 4752 * @subvol_parent/slot: pointer to the subtree root in subvolume tree 4753 * @reloc_parent/slot: pointer to the subtree root in reloc tree 4754 * BOTH POINTERS ARE BEFORE TREE SWAP 4755 * @last_snapshot: last snapshot generation of the subvolume tree 4756 */ 4757 int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root, 4758 struct btrfs_block_group *bg, 4759 struct extent_buffer *subvol_parent, int subvol_slot, 4760 struct extent_buffer *reloc_parent, int reloc_slot, 4761 u64 last_snapshot) 4762 { 4763 struct btrfs_fs_info *fs_info = subvol_root->fs_info; 4764 struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; 4765 struct btrfs_qgroup_swapped_block *block; 4766 struct rb_node *node; 4767 int level = btrfs_header_level(subvol_parent) - 1; 4768 int ret = 0; 4769 4770 if (!btrfs_qgroup_full_accounting(fs_info)) 4771 return 0; 4772 4773 if (unlikely(btrfs_node_ptr_generation(subvol_parent, subvol_slot) > 4774 btrfs_node_ptr_generation(reloc_parent, reloc_slot))) { 4775 btrfs_err_rl(fs_info, 4776 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", 4777 __func__, 4778 btrfs_node_ptr_generation(subvol_parent, subvol_slot), 4779 btrfs_node_ptr_generation(reloc_parent, reloc_slot)); 4780 return -EUCLEAN; 4781 } 4782 4783 block = kmalloc_obj(*block, GFP_NOFS); 4784 if (!block) { 4785 ret = -ENOMEM; 4786 goto out; 4787 } 4788 4789 /* 4790 * @reloc_parent/slot is still before swap, while @block is going to 4791 * record the bytenr after swap, so we do the swap here. 4792 */ 4793 block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot); 4794 block->subvol_generation = btrfs_node_ptr_generation(reloc_parent, 4795 reloc_slot); 4796 block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot); 4797 block->reloc_generation = btrfs_node_ptr_generation(subvol_parent, 4798 subvol_slot); 4799 block->last_snapshot = last_snapshot; 4800 block->level = level; 4801 4802 /* 4803 * If we have bg == NULL, we're called from btrfs_recover_relocation(), 4804 * no one else can modify tree blocks thus we qgroup will not change 4805 * no matter the value of trace_leaf. 4806 */ 4807 if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) 4808 block->trace_leaf = true; 4809 else 4810 block->trace_leaf = false; 4811 btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot); 4812 4813 /* Insert @block into @blocks */ 4814 spin_lock(&blocks->lock); 4815 node = rb_find_add(&block->node, &blocks->blocks[level], qgroup_swapped_block_bytenr_cmp); 4816 if (node) { 4817 struct btrfs_qgroup_swapped_block *entry; 4818 4819 entry = rb_entry(node, struct btrfs_qgroup_swapped_block, node); 4820 4821 if (unlikely(entry->subvol_generation != block->subvol_generation || 4822 entry->reloc_bytenr != block->reloc_bytenr || 4823 entry->reloc_generation != block->reloc_generation)) { 4824 /* 4825 * Duplicated but mismatch entry found. Shouldn't happen. 4826 * Marking qgroup inconsistent should be enough for end 4827 * users. 4828 */ 4829 DEBUG_WARN("duplicated but mismatched entry found"); 4830 ret = -EEXIST; 4831 } 4832 kfree(block); 4833 goto out_unlock; 4834 } 4835 blocks->swapped = true; 4836 out_unlock: 4837 spin_unlock(&blocks->lock); 4838 out: 4839 if (ret < 0) 4840 qgroup_mark_inconsistent(fs_info, "%s error: %d", __func__, ret); 4841 return ret; 4842 } 4843 4844 /* 4845 * Check if the tree block is a subtree root, and if so do the needed 4846 * delayed subtree trace for qgroup. 4847 * 4848 * This is called during btrfs_cow_block(). 4849 */ 4850 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, 4851 struct btrfs_root *root, 4852 struct extent_buffer *subvol_eb) 4853 { 4854 struct btrfs_fs_info *fs_info = root->fs_info; 4855 struct btrfs_tree_parent_check check = { 0 }; 4856 struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; 4857 struct btrfs_qgroup_swapped_block AUTO_KFREE(block); 4858 struct extent_buffer *reloc_eb = NULL; 4859 struct rb_node *node; 4860 bool swapped = false; 4861 int level = btrfs_header_level(subvol_eb); 4862 int ret = 0; 4863 int i; 4864 4865 if (!btrfs_qgroup_full_accounting(fs_info)) 4866 return 0; 4867 if (!btrfs_is_fstree(btrfs_root_id(root)) || !root->reloc_root) 4868 return 0; 4869 4870 spin_lock(&blocks->lock); 4871 if (!blocks->swapped) { 4872 spin_unlock(&blocks->lock); 4873 return 0; 4874 } 4875 node = rb_find(&subvol_eb->start, &blocks->blocks[level], 4876 qgroup_swapped_block_bytenr_key_cmp); 4877 if (!node) { 4878 spin_unlock(&blocks->lock); 4879 goto out; 4880 } 4881 block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); 4882 4883 /* Found one, remove it from @blocks first and update blocks->swapped */ 4884 rb_erase(&block->node, &blocks->blocks[level]); 4885 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4886 if (RB_EMPTY_ROOT(&blocks->blocks[i])) { 4887 swapped = true; 4888 break; 4889 } 4890 } 4891 blocks->swapped = swapped; 4892 spin_unlock(&blocks->lock); 4893 4894 check.level = block->level; 4895 check.transid = block->reloc_generation; 4896 check.has_first_key = true; 4897 memcpy(&check.first_key, &block->first_key, sizeof(check.first_key)); 4898 4899 /* Read out reloc subtree root */ 4900 reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check); 4901 if (IS_ERR(reloc_eb)) { 4902 ret = PTR_ERR(reloc_eb); 4903 reloc_eb = NULL; 4904 goto free_out; 4905 } 4906 4907 ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, 4908 block->last_snapshot, block->trace_leaf); 4909 free_out: 4910 free_extent_buffer(reloc_eb); 4911 out: 4912 if (ret < 0) { 4913 qgroup_mark_inconsistent(fs_info, 4914 "failed to account subtree at bytenr %llu: %d", 4915 subvol_eb->start, ret); 4916 } 4917 return ret; 4918 } 4919 4920 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) 4921 { 4922 struct btrfs_qgroup_extent_record *entry; 4923 unsigned long index; 4924 4925 xa_for_each(&trans->delayed_refs.dirty_extents, index, entry) { 4926 ulist_free(entry->old_roots); 4927 kfree(entry); 4928 } 4929 xa_destroy(&trans->delayed_refs.dirty_extents); 4930 } 4931 4932 int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, 4933 const struct btrfs_squota_delta *delta) 4934 { 4935 int ret; 4936 struct btrfs_qgroup *qgroup; 4937 struct btrfs_qgroup *qg; 4938 LIST_HEAD(qgroup_list); 4939 u64 root = delta->root; 4940 u64 num_bytes = delta->num_bytes; 4941 const int sign = (delta->is_inc ? 1 : -1); 4942 4943 if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE && 4944 !test_bit(BTRFS_FS_SQUOTA_ENABLING, &fs_info->flags)) 4945 return 0; 4946 4947 if (!btrfs_is_fstree(root)) 4948 return 0; 4949 4950 /* If the extent predates enabling quotas, don't count it. */ 4951 if (delta->generation < fs_info->qgroup_enable_gen) 4952 return 0; 4953 4954 spin_lock(&fs_info->qgroup_lock); 4955 qgroup = find_qgroup_rb(fs_info, root); 4956 if (WARN_ON_ONCE(!qgroup)) { 4957 btrfs_warn(fs_info, "squota failed to find qgroup for root %llu", root); 4958 ret = 0; 4959 goto out; 4960 } 4961 4962 ret = 0; 4963 qgroup_iterator_add(&qgroup_list, qgroup); 4964 list_for_each_entry(qg, &qgroup_list, iterator) { 4965 struct btrfs_qgroup_list *glist; 4966 4967 ASSERT(qg->excl == qg->rfer); 4968 if (WARN_ON_ONCE(sign < 0 && qg->excl < num_bytes)) { 4969 btrfs_warn(fs_info, 4970 "squota underflow qg " BTRFS_QGROUP_FMT " excl %llu num_bytes %llu", 4971 BTRFS_QGROUP_FMT_VALUE(qg), 4972 qg->excl, num_bytes); 4973 qg->excl = 0; 4974 qg->rfer = 0; 4975 } else { 4976 qg->excl += num_bytes * sign; 4977 qg->rfer += num_bytes * sign; 4978 } 4979 qgroup_dirty(fs_info, qg); 4980 4981 list_for_each_entry(glist, &qg->groups, next_group) 4982 qgroup_iterator_add(&qgroup_list, glist->group); 4983 } 4984 qgroup_iterator_clean(&qgroup_list); 4985 4986 out: 4987 spin_unlock(&fs_info->qgroup_lock); 4988 return ret; 4989 } 4990