1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/jiffies.h> 4 #include <linux/kernel.h> 5 #include <linux/ktime.h> 6 #include <linux/list.h> 7 #include <linux/math64.h> 8 #include <linux/sizes.h> 9 #include <linux/workqueue.h> 10 #include "ctree.h" 11 #include "block-group.h" 12 #include "discard.h" 13 #include "free-space-cache.h" 14 15 /* 16 * This contains the logic to handle async discard. 17 * 18 * Async discard manages trimming of free space outside of transaction commit. 19 * Discarding is done by managing the block_groups on a LRU list based on free 20 * space recency. Two passes are used to first prioritize discarding extents 21 * and then allow for trimming in the bitmap the best opportunity to coalesce. 22 * The block_groups are maintained on multiple lists to allow for multiple 23 * passes with different discard filter requirements. A delayed work item is 24 * used to manage discarding with timeout determined by a max of the delay 25 * incurred by the iops rate limit, the byte rate limit, and the max delay of 26 * BTRFS_DISCARD_MAX_DELAY. 27 * 28 * Note, this only keeps track of block_groups that are explicitly for data. 29 * Mixed block_groups are not supported. 30 * 31 * The first list is special to manage discarding of fully free block groups. 32 * This is necessary because we issue a final trim for a full free block group 33 * after forgetting it. When a block group becomes unused, instead of directly 34 * being added to the unused_bgs list, we add it to this first list. Then 35 * from there, if it becomes fully discarded, we place it onto the unused_bgs 36 * list. 37 * 38 * The in-memory free space cache serves as the backing state for discard. 39 * Consequently this means there is no persistence. We opt to load all the 40 * block groups in as not discarded, so the mount case degenerates to the 41 * crashing case. 42 * 43 * As the free space cache uses bitmaps, there exists a tradeoff between 44 * ease/efficiency for find_free_extent() and the accuracy of discard state. 45 * Here we opt to let untrimmed regions merge with everything while only letting 46 * trimmed regions merge with other trimmed regions. This can cause 47 * overtrimming, but the coalescing benefit seems to be worth it. Additionally, 48 * bitmap state is tracked as a whole. If we're able to fully trim a bitmap, 49 * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in, 50 * this resets the state and we will retry trimming the whole bitmap. This is a 51 * tradeoff between discard state accuracy and the cost of accounting. 52 */ 53 54 /* This is an initial delay to give some chance for block reuse */ 55 #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) 56 #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) 57 58 /* Target completion latency of discarding all discardable extents */ 59 #define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC) 60 #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL) 61 #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL) 62 #define BTRFS_DISCARD_MAX_IOPS (10U) 63 64 /* Montonically decreasing minimum length filters after index 0 */ 65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { 66 0, 67 BTRFS_ASYNC_DISCARD_MAX_FILTER, 68 BTRFS_ASYNC_DISCARD_MIN_FILTER 69 }; 70 71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, 72 struct btrfs_block_group *block_group) 73 { 74 return &discard_ctl->discard_list[block_group->discard_index]; 75 } 76 77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 78 struct btrfs_block_group *block_group) 79 { 80 if (!btrfs_run_discard_work(discard_ctl)) 81 return; 82 83 if (list_empty(&block_group->discard_list) || 84 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { 85 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) 86 block_group->discard_index = BTRFS_DISCARD_INDEX_START; 87 block_group->discard_eligible_time = (ktime_get_ns() + 88 BTRFS_DISCARD_DELAY); 89 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 90 } 91 92 list_move_tail(&block_group->discard_list, 93 get_discard_list(discard_ctl, block_group)); 94 } 95 96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 97 struct btrfs_block_group *block_group) 98 { 99 if (!btrfs_is_block_group_data_only(block_group)) 100 return; 101 102 spin_lock(&discard_ctl->lock); 103 __add_to_discard_list(discard_ctl, block_group); 104 spin_unlock(&discard_ctl->lock); 105 } 106 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, 108 struct btrfs_block_group *block_group) 109 { 110 spin_lock(&discard_ctl->lock); 111 112 if (!btrfs_run_discard_work(discard_ctl)) { 113 spin_unlock(&discard_ctl->lock); 114 return; 115 } 116 117 list_del_init(&block_group->discard_list); 118 119 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; 120 block_group->discard_eligible_time = (ktime_get_ns() + 121 BTRFS_DISCARD_UNUSED_DELAY); 122 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 123 list_add_tail(&block_group->discard_list, 124 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); 125 126 spin_unlock(&discard_ctl->lock); 127 } 128 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, 130 struct btrfs_block_group *block_group) 131 { 132 bool running = false; 133 134 spin_lock(&discard_ctl->lock); 135 136 if (block_group == discard_ctl->block_group) { 137 running = true; 138 discard_ctl->block_group = NULL; 139 } 140 141 block_group->discard_eligible_time = 0; 142 list_del_init(&block_group->discard_list); 143 144 spin_unlock(&discard_ctl->lock); 145 146 return running; 147 } 148 149 /** 150 * find_next_block_group - find block_group that's up next for discarding 151 * @discard_ctl: discard control 152 * @now: current time 153 * 154 * Iterate over the discard lists to find the next block_group up for 155 * discarding checking the discard_eligible_time of block_group. 156 */ 157 static struct btrfs_block_group *find_next_block_group( 158 struct btrfs_discard_ctl *discard_ctl, 159 u64 now) 160 { 161 struct btrfs_block_group *ret_block_group = NULL, *block_group; 162 int i; 163 164 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 165 struct list_head *discard_list = &discard_ctl->discard_list[i]; 166 167 if (!list_empty(discard_list)) { 168 block_group = list_first_entry(discard_list, 169 struct btrfs_block_group, 170 discard_list); 171 172 if (!ret_block_group) 173 ret_block_group = block_group; 174 175 if (ret_block_group->discard_eligible_time < now) 176 break; 177 178 if (ret_block_group->discard_eligible_time > 179 block_group->discard_eligible_time) 180 ret_block_group = block_group; 181 } 182 } 183 184 return ret_block_group; 185 } 186 187 /** 188 * peek_discard_list - wrap find_next_block_group() 189 * @discard_ctl: discard control 190 * @discard_state: the discard_state of the block_group after state management 191 * @discard_index: the discard_index of the block_group after state management 192 * 193 * This wraps find_next_block_group() and sets the block_group to be in use. 194 * discard_state's control flow is managed here. Variables related to 195 * discard_state are reset here as needed (eg discard_cursor). @discard_state 196 * and @discard_index are remembered as it may change while we're discarding, 197 * but we want the discard to execute in the context determined here. 198 */ 199 static struct btrfs_block_group *peek_discard_list( 200 struct btrfs_discard_ctl *discard_ctl, 201 enum btrfs_discard_state *discard_state, 202 int *discard_index) 203 { 204 struct btrfs_block_group *block_group; 205 const u64 now = ktime_get_ns(); 206 207 spin_lock(&discard_ctl->lock); 208 again: 209 block_group = find_next_block_group(discard_ctl, now); 210 211 if (block_group && now > block_group->discard_eligible_time) { 212 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && 213 block_group->used != 0) { 214 if (btrfs_is_block_group_data_only(block_group)) 215 __add_to_discard_list(discard_ctl, block_group); 216 else 217 list_del_init(&block_group->discard_list); 218 goto again; 219 } 220 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { 221 block_group->discard_cursor = block_group->start; 222 block_group->discard_state = BTRFS_DISCARD_EXTENTS; 223 } 224 discard_ctl->block_group = block_group; 225 *discard_state = block_group->discard_state; 226 *discard_index = block_group->discard_index; 227 } else { 228 block_group = NULL; 229 } 230 231 spin_unlock(&discard_ctl->lock); 232 233 return block_group; 234 } 235 236 /** 237 * btrfs_discard_check_filter - updates a block groups filters 238 * @block_group: block group of interest 239 * @bytes: recently freed region size after coalescing 240 * 241 * Async discard maintains multiple lists with progressively smaller filters 242 * to prioritize discarding based on size. Should a free space that matches 243 * a larger filter be returned to the free_space_cache, prioritize that discard 244 * by moving @block_group to the proper filter. 245 */ 246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group, 247 u64 bytes) 248 { 249 struct btrfs_discard_ctl *discard_ctl; 250 251 if (!block_group || 252 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 253 return; 254 255 discard_ctl = &block_group->fs_info->discard_ctl; 256 257 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START && 258 bytes >= discard_minlen[block_group->discard_index - 1]) { 259 int i; 260 261 remove_from_discard_list(discard_ctl, block_group); 262 263 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS; 264 i++) { 265 if (bytes >= discard_minlen[i]) { 266 block_group->discard_index = i; 267 add_to_discard_list(discard_ctl, block_group); 268 break; 269 } 270 } 271 } 272 } 273 274 /** 275 * btrfs_update_discard_index - moves a block group along the discard lists 276 * @discard_ctl: discard control 277 * @block_group: block_group of interest 278 * 279 * Increment @block_group's discard_index. If it falls of the list, let it be. 280 * Otherwise add it back to the appropriate list. 281 */ 282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, 283 struct btrfs_block_group *block_group) 284 { 285 block_group->discard_index++; 286 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) { 287 block_group->discard_index = 1; 288 return; 289 } 290 291 add_to_discard_list(discard_ctl, block_group); 292 } 293 294 /** 295 * btrfs_discard_cancel_work - remove a block_group from the discard lists 296 * @discard_ctl: discard control 297 * @block_group: block_group of interest 298 * 299 * This removes @block_group from the discard lists. If necessary, it waits on 300 * the current work and then reschedules the delayed work. 301 */ 302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, 303 struct btrfs_block_group *block_group) 304 { 305 if (remove_from_discard_list(discard_ctl, block_group)) { 306 cancel_delayed_work_sync(&discard_ctl->work); 307 btrfs_discard_schedule_work(discard_ctl, true); 308 } 309 } 310 311 /** 312 * btrfs_discard_queue_work - handles queuing the block_groups 313 * @discard_ctl: discard control 314 * @block_group: block_group of interest 315 * 316 * This maintains the LRU order of the discard lists. 317 */ 318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, 319 struct btrfs_block_group *block_group) 320 { 321 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 322 return; 323 324 if (block_group->used == 0) 325 add_to_discard_unused_list(discard_ctl, block_group); 326 else 327 add_to_discard_list(discard_ctl, block_group); 328 329 if (!delayed_work_pending(&discard_ctl->work)) 330 btrfs_discard_schedule_work(discard_ctl, false); 331 } 332 333 /** 334 * btrfs_discard_schedule_work - responsible for scheduling the discard work 335 * @discard_ctl: discard control 336 * @override: override the current timer 337 * 338 * Discards are issued by a delayed workqueue item. @override is used to 339 * update the current delay as the baseline delay interval is reevaluated on 340 * transaction commit. This is also maxed with any other rate limit. 341 */ 342 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 343 bool override) 344 { 345 struct btrfs_block_group *block_group; 346 const u64 now = ktime_get_ns(); 347 348 spin_lock(&discard_ctl->lock); 349 350 if (!btrfs_run_discard_work(discard_ctl)) 351 goto out; 352 353 if (!override && delayed_work_pending(&discard_ctl->work)) 354 goto out; 355 356 block_group = find_next_block_group(discard_ctl, now); 357 if (block_group) { 358 unsigned long delay = discard_ctl->delay; 359 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit); 360 361 /* 362 * A single delayed workqueue item is responsible for 363 * discarding, so we can manage the bytes rate limit by keeping 364 * track of the previous discard. 365 */ 366 if (kbps_limit && discard_ctl->prev_discard) { 367 u64 bps_limit = ((u64)kbps_limit) * SZ_1K; 368 u64 bps_delay = div64_u64(discard_ctl->prev_discard * 369 MSEC_PER_SEC, bps_limit); 370 371 delay = max(delay, msecs_to_jiffies(bps_delay)); 372 } 373 374 /* 375 * This timeout is to hopefully prevent immediate discarding 376 * in a recently allocated block group. 377 */ 378 if (now < block_group->discard_eligible_time) { 379 u64 bg_timeout = block_group->discard_eligible_time - now; 380 381 delay = max(delay, nsecs_to_jiffies(bg_timeout)); 382 } 383 384 mod_delayed_work(discard_ctl->discard_workers, 385 &discard_ctl->work, delay); 386 } 387 out: 388 spin_unlock(&discard_ctl->lock); 389 } 390 391 /** 392 * btrfs_finish_discard_pass - determine next step of a block_group 393 * @discard_ctl: discard control 394 * @block_group: block_group of interest 395 * 396 * This determines the next step for a block group after it's finished going 397 * through a pass on a discard list. If it is unused and fully trimmed, we can 398 * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto 399 * the appropriate filter list or let it fall off. 400 */ 401 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, 402 struct btrfs_block_group *block_group) 403 { 404 remove_from_discard_list(discard_ctl, block_group); 405 406 if (block_group->used == 0) { 407 if (btrfs_is_free_space_trimmed(block_group)) 408 btrfs_mark_bg_unused(block_group); 409 else 410 add_to_discard_unused_list(discard_ctl, block_group); 411 } else { 412 btrfs_update_discard_index(discard_ctl, block_group); 413 } 414 } 415 416 /** 417 * btrfs_discard_workfn - discard work function 418 * @work: work 419 * 420 * This finds the next block_group to start discarding and then discards a 421 * single region. It does this in a two-pass fashion: first extents and second 422 * bitmaps. Completely discarded block groups are sent to the unused_bgs path. 423 */ 424 static void btrfs_discard_workfn(struct work_struct *work) 425 { 426 struct btrfs_discard_ctl *discard_ctl; 427 struct btrfs_block_group *block_group; 428 enum btrfs_discard_state discard_state; 429 int discard_index = 0; 430 u64 trimmed = 0; 431 u64 minlen = 0; 432 433 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); 434 435 block_group = peek_discard_list(discard_ctl, &discard_state, 436 &discard_index); 437 if (!block_group || !btrfs_run_discard_work(discard_ctl)) 438 return; 439 440 /* Perform discarding */ 441 minlen = discard_minlen[discard_index]; 442 443 if (discard_state == BTRFS_DISCARD_BITMAPS) { 444 u64 maxlen = 0; 445 446 /* 447 * Use the previous levels minimum discard length as the max 448 * length filter. In the case something is added to make a 449 * region go beyond the max filter, the entire bitmap is set 450 * back to BTRFS_TRIM_STATE_UNTRIMMED. 451 */ 452 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED) 453 maxlen = discard_minlen[discard_index - 1]; 454 455 btrfs_trim_block_group_bitmaps(block_group, &trimmed, 456 block_group->discard_cursor, 457 btrfs_block_group_end(block_group), 458 minlen, maxlen, true); 459 discard_ctl->discard_bitmap_bytes += trimmed; 460 } else { 461 btrfs_trim_block_group_extents(block_group, &trimmed, 462 block_group->discard_cursor, 463 btrfs_block_group_end(block_group), 464 minlen, true); 465 discard_ctl->discard_extent_bytes += trimmed; 466 } 467 468 discard_ctl->prev_discard = trimmed; 469 470 /* Determine next steps for a block_group */ 471 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { 472 if (discard_state == BTRFS_DISCARD_BITMAPS) { 473 btrfs_finish_discard_pass(discard_ctl, block_group); 474 } else { 475 block_group->discard_cursor = block_group->start; 476 spin_lock(&discard_ctl->lock); 477 if (block_group->discard_state != 478 BTRFS_DISCARD_RESET_CURSOR) 479 block_group->discard_state = 480 BTRFS_DISCARD_BITMAPS; 481 spin_unlock(&discard_ctl->lock); 482 } 483 } 484 485 spin_lock(&discard_ctl->lock); 486 discard_ctl->block_group = NULL; 487 spin_unlock(&discard_ctl->lock); 488 489 btrfs_discard_schedule_work(discard_ctl, false); 490 } 491 492 /** 493 * btrfs_run_discard_work - determines if async discard should be running 494 * @discard_ctl: discard control 495 * 496 * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. 497 */ 498 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) 499 { 500 struct btrfs_fs_info *fs_info = container_of(discard_ctl, 501 struct btrfs_fs_info, 502 discard_ctl); 503 504 return (!(fs_info->sb->s_flags & SB_RDONLY) && 505 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); 506 } 507 508 /** 509 * btrfs_discard_calc_delay - recalculate the base delay 510 * @discard_ctl: discard control 511 * 512 * Recalculate the base delay which is based off the total number of 513 * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms) 514 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC). 515 */ 516 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) 517 { 518 s32 discardable_extents; 519 s64 discardable_bytes; 520 u32 iops_limit; 521 unsigned long delay; 522 unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC; 523 524 discardable_extents = atomic_read(&discard_ctl->discardable_extents); 525 if (!discardable_extents) 526 return; 527 528 spin_lock(&discard_ctl->lock); 529 530 /* 531 * The following is to fix a potential -1 discrepenancy that we're not 532 * sure how to reproduce. But given that this is the only place that 533 * utilizes these numbers and this is only called by from 534 * btrfs_finish_extent_commit() which is synchronized, we can correct 535 * here. 536 */ 537 if (discardable_extents < 0) 538 atomic_add(-discardable_extents, 539 &discard_ctl->discardable_extents); 540 541 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes); 542 if (discardable_bytes < 0) 543 atomic64_add(-discardable_bytes, 544 &discard_ctl->discardable_bytes); 545 546 if (discardable_extents <= 0) { 547 spin_unlock(&discard_ctl->lock); 548 return; 549 } 550 551 iops_limit = READ_ONCE(discard_ctl->iops_limit); 552 if (iops_limit) 553 lower_limit = max_t(unsigned long, lower_limit, 554 MSEC_PER_SEC / iops_limit); 555 556 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents; 557 delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC); 558 discard_ctl->delay = msecs_to_jiffies(delay); 559 560 spin_unlock(&discard_ctl->lock); 561 } 562 563 /** 564 * btrfs_discard_update_discardable - propagate discard counters 565 * @block_group: block_group of interest 566 * @ctl: free_space_ctl of @block_group 567 * 568 * This propagates deltas of counters up to the discard_ctl. It maintains a 569 * current counter and a previous counter passing the delta up to the global 570 * stat. Then the current counter value becomes the previous counter value. 571 */ 572 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group, 573 struct btrfs_free_space_ctl *ctl) 574 { 575 struct btrfs_discard_ctl *discard_ctl; 576 s32 extents_delta; 577 s64 bytes_delta; 578 579 if (!block_group || 580 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) || 581 !btrfs_is_block_group_data_only(block_group)) 582 return; 583 584 discard_ctl = &block_group->fs_info->discard_ctl; 585 586 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] - 587 ctl->discardable_extents[BTRFS_STAT_PREV]; 588 if (extents_delta) { 589 atomic_add(extents_delta, &discard_ctl->discardable_extents); 590 ctl->discardable_extents[BTRFS_STAT_PREV] = 591 ctl->discardable_extents[BTRFS_STAT_CURR]; 592 } 593 594 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] - 595 ctl->discardable_bytes[BTRFS_STAT_PREV]; 596 if (bytes_delta) { 597 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes); 598 ctl->discardable_bytes[BTRFS_STAT_PREV] = 599 ctl->discardable_bytes[BTRFS_STAT_CURR]; 600 } 601 } 602 603 /** 604 * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists 605 * @fs_info: fs_info of interest 606 * 607 * The unused_bgs list needs to be punted to the discard lists because the 608 * order of operations is changed. In the normal sychronous discard path, the 609 * block groups are trimmed via a single large trim in transaction commit. This 610 * is ultimately what we are trying to avoid with asynchronous discard. Thus, 611 * it must be done before going down the unused_bgs path. 612 */ 613 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) 614 { 615 struct btrfs_block_group *block_group, *next; 616 617 spin_lock(&fs_info->unused_bgs_lock); 618 /* We enabled async discard, so punt all to the queue */ 619 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, 620 bg_list) { 621 list_del_init(&block_group->bg_list); 622 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 623 } 624 spin_unlock(&fs_info->unused_bgs_lock); 625 } 626 627 /** 628 * btrfs_discard_purge_list - purge discard lists 629 * @discard_ctl: discard control 630 * 631 * If we are disabling async discard, we may have intercepted block groups that 632 * are completely free and ready for the unused_bgs path. As discarding will 633 * now happen in transaction commit or not at all, we can safely mark the 634 * corresponding block groups as unused and they will be sent on their merry 635 * way to the unused_bgs list. 636 */ 637 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) 638 { 639 struct btrfs_block_group *block_group, *next; 640 int i; 641 642 spin_lock(&discard_ctl->lock); 643 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 644 list_for_each_entry_safe(block_group, next, 645 &discard_ctl->discard_list[i], 646 discard_list) { 647 list_del_init(&block_group->discard_list); 648 spin_unlock(&discard_ctl->lock); 649 if (block_group->used == 0) 650 btrfs_mark_bg_unused(block_group); 651 spin_lock(&discard_ctl->lock); 652 } 653 } 654 spin_unlock(&discard_ctl->lock); 655 } 656 657 void btrfs_discard_resume(struct btrfs_fs_info *fs_info) 658 { 659 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { 660 btrfs_discard_cleanup(fs_info); 661 return; 662 } 663 664 btrfs_discard_punt_unused_bgs_list(fs_info); 665 666 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 667 } 668 669 void btrfs_discard_stop(struct btrfs_fs_info *fs_info) 670 { 671 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 672 } 673 674 void btrfs_discard_init(struct btrfs_fs_info *fs_info) 675 { 676 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl; 677 int i; 678 679 spin_lock_init(&discard_ctl->lock); 680 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn); 681 682 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) 683 INIT_LIST_HEAD(&discard_ctl->discard_list[i]); 684 685 discard_ctl->prev_discard = 0; 686 atomic_set(&discard_ctl->discardable_extents, 0); 687 atomic64_set(&discard_ctl->discardable_bytes, 0); 688 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE; 689 discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC; 690 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS; 691 discard_ctl->kbps_limit = 0; 692 discard_ctl->discard_extent_bytes = 0; 693 discard_ctl->discard_bitmap_bytes = 0; 694 atomic64_set(&discard_ctl->discard_bytes_saved, 0); 695 } 696 697 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) 698 { 699 btrfs_discard_stop(fs_info); 700 cancel_delayed_work_sync(&fs_info->discard_ctl.work); 701 btrfs_discard_purge_list(&fs_info->discard_ctl); 702 } 703