1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/jiffies.h>
4 #include <linux/kernel.h>
5 #include <linux/ktime.h>
6 #include <linux/list.h>
7 #include <linux/math64.h>
8 #include <linux/sizes.h>
9 #include <linux/workqueue.h>
10 #include "ctree.h"
11 #include "block-group.h"
12 #include "discard.h"
13 #include "free-space-cache.h"
14 #include "fs.h"
15
16 /*
17 * This contains the logic to handle async discard.
18 *
19 * Async discard manages trimming of free space outside of transaction commit.
20 * Discarding is done by managing the block_groups on a LRU list based on free
21 * space recency. Two passes are used to first prioritize discarding extents
22 * and then allow for trimming in the bitmap the best opportunity to coalesce.
23 * The block_groups are maintained on multiple lists to allow for multiple
24 * passes with different discard filter requirements. A delayed work item is
25 * used to manage discarding with timeout determined by a max of the delay
26 * incurred by the iops rate limit, the byte rate limit, and the max delay of
27 * BTRFS_DISCARD_MAX_DELAY.
28 *
29 * Note, this only keeps track of block_groups that are explicitly for data.
30 * Mixed block_groups are not supported.
31 *
32 * The first list is special to manage discarding of fully free block groups.
33 * This is necessary because we issue a final trim for a full free block group
34 * after forgetting it. When a block group becomes unused, instead of directly
35 * being added to the unused_bgs list, we add it to this first list. Then
36 * from there, if it becomes fully discarded, we place it onto the unused_bgs
37 * list.
38 *
39 * The in-memory free space cache serves as the backing state for discard.
40 * Consequently this means there is no persistence. We opt to load all the
41 * block groups in as not discarded, so the mount case degenerates to the
42 * crashing case.
43 *
44 * As the free space cache uses bitmaps, there exists a tradeoff between
45 * ease/efficiency for find_free_extent() and the accuracy of discard state.
46 * Here we opt to let untrimmed regions merge with everything while only letting
47 * trimmed regions merge with other trimmed regions. This can cause
48 * overtrimming, but the coalescing benefit seems to be worth it. Additionally,
49 * bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
50 * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
51 * this resets the state and we will retry trimming the whole bitmap. This is a
52 * tradeoff between discard state accuracy and the cost of accounting.
53 */
54
55 /* This is an initial delay to give some chance for block reuse */
56 #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
57 #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
58
59 #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
60 #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
61 #define BTRFS_DISCARD_MAX_IOPS (1000U)
62
63 /* Monotonically decreasing minimum length filters after index 0 */
64 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
65 0,
66 BTRFS_ASYNC_DISCARD_MAX_FILTER,
67 BTRFS_ASYNC_DISCARD_MIN_FILTER
68 };
69
get_discard_list(struct btrfs_discard_ctl * discard_ctl,const struct btrfs_block_group * block_group)70 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
71 const struct btrfs_block_group *block_group)
72 {
73 return &discard_ctl->discard_list[block_group->discard_index];
74 }
75
76 /*
77 * Determine if async discard should be running.
78 *
79 * @discard_ctl: discard control
80 *
81 * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
82 */
btrfs_run_discard_work(const struct btrfs_discard_ctl * discard_ctl)83 static bool btrfs_run_discard_work(const struct btrfs_discard_ctl *discard_ctl)
84 {
85 struct btrfs_fs_info *fs_info = container_of(discard_ctl,
86 struct btrfs_fs_info,
87 discard_ctl);
88
89 return (!(fs_info->sb->s_flags & SB_RDONLY) &&
90 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
91 }
92
__add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)93 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
94 struct btrfs_block_group *block_group)
95 {
96 lockdep_assert_held(&discard_ctl->lock);
97 if (!btrfs_run_discard_work(discard_ctl))
98 return;
99
100 if (list_empty(&block_group->discard_list) ||
101 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
102 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
103 block_group->discard_index = BTRFS_DISCARD_INDEX_START;
104 block_group->discard_eligible_time = (ktime_get_ns() +
105 BTRFS_DISCARD_DELAY);
106 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
107 }
108 if (list_empty(&block_group->discard_list))
109 btrfs_get_block_group(block_group);
110
111 list_move_tail(&block_group->discard_list,
112 get_discard_list(discard_ctl, block_group));
113 }
114
add_to_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)115 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
116 struct btrfs_block_group *block_group)
117 {
118 if (!btrfs_is_block_group_data_only(block_group))
119 return;
120
121 spin_lock(&discard_ctl->lock);
122 __add_to_discard_list(discard_ctl, block_group);
123 spin_unlock(&discard_ctl->lock);
124 }
125
add_to_discard_unused_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)126 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
127 struct btrfs_block_group *block_group)
128 {
129 bool queued;
130
131 spin_lock(&discard_ctl->lock);
132
133 queued = !list_empty(&block_group->discard_list);
134
135 if (!btrfs_run_discard_work(discard_ctl)) {
136 spin_unlock(&discard_ctl->lock);
137 return;
138 }
139
140 list_del_init(&block_group->discard_list);
141
142 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
143 block_group->discard_eligible_time = (ktime_get_ns() +
144 BTRFS_DISCARD_UNUSED_DELAY);
145 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
146 if (!queued)
147 btrfs_get_block_group(block_group);
148 list_add_tail(&block_group->discard_list,
149 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
150
151 spin_unlock(&discard_ctl->lock);
152 }
153
remove_from_discard_list(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)154 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
155 struct btrfs_block_group *block_group)
156 {
157 bool running = false;
158 bool queued = false;
159
160 spin_lock(&discard_ctl->lock);
161
162 if (block_group == discard_ctl->block_group) {
163 running = true;
164 discard_ctl->block_group = NULL;
165 }
166
167 block_group->discard_eligible_time = 0;
168 queued = !list_empty(&block_group->discard_list);
169 list_del_init(&block_group->discard_list);
170 if (queued)
171 btrfs_put_block_group(block_group);
172
173 spin_unlock(&discard_ctl->lock);
174
175 return running;
176 }
177
178 /*
179 * Find block_group that's up next for discarding.
180 *
181 * @discard_ctl: discard control
182 * @now: current time
183 *
184 * Iterate over the discard lists to find the next block_group up for
185 * discarding checking the discard_eligible_time of block_group.
186 */
find_next_block_group(struct btrfs_discard_ctl * discard_ctl,u64 now)187 static struct btrfs_block_group *find_next_block_group(
188 struct btrfs_discard_ctl *discard_ctl,
189 u64 now)
190 {
191 struct btrfs_block_group *ret_block_group = NULL, *block_group;
192 int i;
193
194 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
195 struct list_head *discard_list = &discard_ctl->discard_list[i];
196
197 if (!list_empty(discard_list)) {
198 block_group = list_first_entry(discard_list,
199 struct btrfs_block_group,
200 discard_list);
201
202 if (!ret_block_group)
203 ret_block_group = block_group;
204
205 if (ret_block_group->discard_eligible_time < now)
206 break;
207
208 if (ret_block_group->discard_eligible_time >
209 block_group->discard_eligible_time)
210 ret_block_group = block_group;
211 }
212 }
213
214 return ret_block_group;
215 }
216
217 /*
218 * Look up next block group and set it for use.
219 *
220 * @discard_ctl: discard control
221 * @discard_state: the discard_state of the block_group after state management
222 * @discard_index: the discard_index of the block_group after state management
223 * @now: time when discard was invoked, in ns
224 *
225 * Wrap find_next_block_group() and set the block_group to be in use.
226 * @discard_state's control flow is managed here. Variables related to
227 * @discard_state are reset here as needed (eg. @discard_cursor). @discard_state
228 * and @discard_index are remembered as it may change while we're discarding,
229 * but we want the discard to execute in the context determined here.
230 */
peek_discard_list(struct btrfs_discard_ctl * discard_ctl,enum btrfs_discard_state * discard_state,int * discard_index,u64 now)231 static struct btrfs_block_group *peek_discard_list(
232 struct btrfs_discard_ctl *discard_ctl,
233 enum btrfs_discard_state *discard_state,
234 int *discard_index, u64 now)
235 {
236 struct btrfs_block_group *block_group;
237
238 spin_lock(&discard_ctl->lock);
239 again:
240 block_group = find_next_block_group(discard_ctl, now);
241
242 if (block_group && now >= block_group->discard_eligible_time) {
243 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
244 block_group->used != 0) {
245 if (btrfs_is_block_group_data_only(block_group)) {
246 __add_to_discard_list(discard_ctl, block_group);
247 } else {
248 list_del_init(&block_group->discard_list);
249 btrfs_put_block_group(block_group);
250 }
251 goto again;
252 }
253 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
254 block_group->discard_cursor = block_group->start;
255 block_group->discard_state = BTRFS_DISCARD_EXTENTS;
256 }
257 }
258 if (block_group) {
259 btrfs_get_block_group(block_group);
260 discard_ctl->block_group = block_group;
261 *discard_state = block_group->discard_state;
262 *discard_index = block_group->discard_index;
263 }
264 spin_unlock(&discard_ctl->lock);
265
266 return block_group;
267 }
268
269 /*
270 * Update a block group's filters.
271 *
272 * @block_group: block group of interest
273 * @bytes: recently freed region size after coalescing
274 *
275 * Async discard maintains multiple lists with progressively smaller filters
276 * to prioritize discarding based on size. Should a free space that matches
277 * a larger filter be returned to the free_space_cache, prioritize that discard
278 * by moving @block_group to the proper filter.
279 */
btrfs_discard_check_filter(struct btrfs_block_group * block_group,u64 bytes)280 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
281 u64 bytes)
282 {
283 struct btrfs_discard_ctl *discard_ctl;
284
285 if (!block_group ||
286 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
287 return;
288
289 discard_ctl = &block_group->fs_info->discard_ctl;
290
291 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
292 bytes >= discard_minlen[block_group->discard_index - 1]) {
293 int i;
294
295 remove_from_discard_list(discard_ctl, block_group);
296
297 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
298 i++) {
299 if (bytes >= discard_minlen[i]) {
300 block_group->discard_index = i;
301 add_to_discard_list(discard_ctl, block_group);
302 break;
303 }
304 }
305 }
306 }
307
308 /*
309 * Move a block group along the discard lists.
310 *
311 * @discard_ctl: discard control
312 * @block_group: block_group of interest
313 *
314 * Increment @block_group's discard_index. If it falls of the list, let it be.
315 * Otherwise add it back to the appropriate list.
316 */
btrfs_update_discard_index(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)317 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
318 struct btrfs_block_group *block_group)
319 {
320 block_group->discard_index++;
321 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
322 block_group->discard_index = 1;
323 return;
324 }
325
326 add_to_discard_list(discard_ctl, block_group);
327 }
328
329 /*
330 * Remove a block_group from the discard lists.
331 *
332 * @discard_ctl: discard control
333 * @block_group: block_group of interest
334 *
335 * Remove @block_group from the discard lists. If necessary, wait on the
336 * current work and then reschedule the delayed work.
337 */
btrfs_discard_cancel_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)338 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
339 struct btrfs_block_group *block_group)
340 {
341 if (remove_from_discard_list(discard_ctl, block_group)) {
342 cancel_delayed_work_sync(&discard_ctl->work);
343 btrfs_discard_schedule_work(discard_ctl, true);
344 }
345 }
346
347 /*
348 * Handles queuing the block_groups.
349 *
350 * @discard_ctl: discard control
351 * @block_group: block_group of interest
352 *
353 * Maintain the LRU order of the discard lists.
354 */
btrfs_discard_queue_work(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)355 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
356 struct btrfs_block_group *block_group)
357 {
358 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
359 return;
360
361 if (block_group->used == 0)
362 add_to_discard_unused_list(discard_ctl, block_group);
363 else
364 add_to_discard_list(discard_ctl, block_group);
365
366 if (!delayed_work_pending(&discard_ctl->work))
367 btrfs_discard_schedule_work(discard_ctl, false);
368 }
369
__btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,u64 now,bool override)370 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
371 u64 now, bool override)
372 {
373 struct btrfs_block_group *block_group;
374
375 if (!btrfs_run_discard_work(discard_ctl))
376 return;
377 if (!override && delayed_work_pending(&discard_ctl->work))
378 return;
379
380 block_group = find_next_block_group(discard_ctl, now);
381 if (block_group) {
382 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
383 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
384
385 /*
386 * A single delayed workqueue item is responsible for
387 * discarding, so we can manage the bytes rate limit by keeping
388 * track of the previous discard.
389 */
390 if (kbps_limit && discard_ctl->prev_discard) {
391 u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
392 u64 bps_delay = div64_u64(discard_ctl->prev_discard *
393 NSEC_PER_SEC, bps_limit);
394
395 delay = max(delay, bps_delay);
396 }
397
398 /*
399 * This timeout is to hopefully prevent immediate discarding
400 * in a recently allocated block group.
401 */
402 if (now < block_group->discard_eligible_time) {
403 u64 bg_timeout = block_group->discard_eligible_time - now;
404
405 delay = max(delay, bg_timeout);
406 }
407
408 if (override && discard_ctl->prev_discard) {
409 u64 elapsed = now - discard_ctl->prev_discard_time;
410
411 if (delay > elapsed)
412 delay -= elapsed;
413 else
414 delay = 0;
415 }
416
417 mod_delayed_work(discard_ctl->discard_workers,
418 &discard_ctl->work, nsecs_to_jiffies(delay));
419 }
420 }
421
422 /*
423 * Responsible for scheduling the discard work.
424 *
425 * @discard_ctl: discard control
426 * @override: override the current timer
427 *
428 * Discards are issued by a delayed workqueue item. @override is used to
429 * update the current delay as the baseline delay interval is reevaluated on
430 * transaction commit. This is also maxed with any other rate limit.
431 */
btrfs_discard_schedule_work(struct btrfs_discard_ctl * discard_ctl,bool override)432 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
433 bool override)
434 {
435 const u64 now = ktime_get_ns();
436
437 spin_lock(&discard_ctl->lock);
438 __btrfs_discard_schedule_work(discard_ctl, now, override);
439 spin_unlock(&discard_ctl->lock);
440 }
441
442 /*
443 * Determine next step of a block_group.
444 *
445 * @discard_ctl: discard control
446 * @block_group: block_group of interest
447 *
448 * Determine the next step for a block group after it's finished going through
449 * a pass on a discard list. If it is unused and fully trimmed, we can mark it
450 * unused and send it to the unused_bgs path. Otherwise, pass it onto the
451 * appropriate filter list or let it fall off.
452 */
btrfs_finish_discard_pass(struct btrfs_discard_ctl * discard_ctl,struct btrfs_block_group * block_group)453 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
454 struct btrfs_block_group *block_group)
455 {
456 remove_from_discard_list(discard_ctl, block_group);
457
458 if (block_group->used == 0) {
459 if (btrfs_is_free_space_trimmed(block_group))
460 btrfs_mark_bg_unused(block_group);
461 else
462 add_to_discard_unused_list(discard_ctl, block_group);
463 } else {
464 btrfs_update_discard_index(discard_ctl, block_group);
465 }
466 }
467
468 /*
469 * Discard work queue callback
470 *
471 * @work: work
472 *
473 * Find the next block_group to start discarding and then discard a single
474 * region. It does this in a two-pass fashion: first extents and second
475 * bitmaps. Completely discarded block groups are sent to the unused_bgs path.
476 */
btrfs_discard_workfn(struct work_struct * work)477 static void btrfs_discard_workfn(struct work_struct *work)
478 {
479 struct btrfs_discard_ctl *discard_ctl;
480 struct btrfs_block_group *block_group;
481 enum btrfs_discard_state discard_state;
482 int discard_index = 0;
483 u64 trimmed = 0;
484 u64 minlen = 0;
485 u64 now = ktime_get_ns();
486
487 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
488
489 block_group = peek_discard_list(discard_ctl, &discard_state,
490 &discard_index, now);
491 if (!block_group)
492 return;
493 if (!btrfs_run_discard_work(discard_ctl)) {
494 spin_lock(&discard_ctl->lock);
495 btrfs_put_block_group(block_group);
496 discard_ctl->block_group = NULL;
497 spin_unlock(&discard_ctl->lock);
498 return;
499 }
500 if (now < block_group->discard_eligible_time) {
501 spin_lock(&discard_ctl->lock);
502 btrfs_put_block_group(block_group);
503 discard_ctl->block_group = NULL;
504 spin_unlock(&discard_ctl->lock);
505 btrfs_discard_schedule_work(discard_ctl, false);
506 return;
507 }
508
509 /* Perform discarding */
510 minlen = discard_minlen[discard_index];
511
512 if (discard_state == BTRFS_DISCARD_BITMAPS) {
513 u64 maxlen = 0;
514
515 /*
516 * Use the previous levels minimum discard length as the max
517 * length filter. In the case something is added to make a
518 * region go beyond the max filter, the entire bitmap is set
519 * back to BTRFS_TRIM_STATE_UNTRIMMED.
520 */
521 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
522 maxlen = discard_minlen[discard_index - 1];
523
524 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
525 block_group->discard_cursor,
526 btrfs_block_group_end(block_group),
527 minlen, maxlen, true);
528 discard_ctl->discard_bitmap_bytes += trimmed;
529 } else {
530 btrfs_trim_block_group_extents(block_group, &trimmed,
531 block_group->discard_cursor,
532 btrfs_block_group_end(block_group),
533 minlen, true);
534 discard_ctl->discard_extent_bytes += trimmed;
535 }
536
537 /* Determine next steps for a block_group */
538 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
539 if (discard_state == BTRFS_DISCARD_BITMAPS) {
540 btrfs_finish_discard_pass(discard_ctl, block_group);
541 } else {
542 block_group->discard_cursor = block_group->start;
543 spin_lock(&discard_ctl->lock);
544 if (block_group->discard_state !=
545 BTRFS_DISCARD_RESET_CURSOR)
546 block_group->discard_state =
547 BTRFS_DISCARD_BITMAPS;
548 spin_unlock(&discard_ctl->lock);
549 }
550 }
551
552 now = ktime_get_ns();
553 spin_lock(&discard_ctl->lock);
554 discard_ctl->prev_discard = trimmed;
555 discard_ctl->prev_discard_time = now;
556 btrfs_put_block_group(block_group);
557 discard_ctl->block_group = NULL;
558 __btrfs_discard_schedule_work(discard_ctl, now, false);
559 spin_unlock(&discard_ctl->lock);
560 }
561
562 /*
563 * Recalculate the base delay.
564 *
565 * @discard_ctl: discard control
566 *
567 * Recalculate the base delay which is based off the total number of
568 * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
569 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
570 */
btrfs_discard_calc_delay(struct btrfs_discard_ctl * discard_ctl)571 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
572 {
573 s32 discardable_extents;
574 s64 discardable_bytes;
575 u32 iops_limit;
576 unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
577 unsigned long delay;
578
579 discardable_extents = atomic_read(&discard_ctl->discardable_extents);
580 if (!discardable_extents)
581 return;
582
583 spin_lock(&discard_ctl->lock);
584
585 /*
586 * The following is to fix a potential -1 discrepancy that we're not
587 * sure how to reproduce. But given that this is the only place that
588 * utilizes these numbers and this is only called by from
589 * btrfs_finish_extent_commit() which is synchronized, we can correct
590 * here.
591 */
592 if (discardable_extents < 0)
593 atomic_add(-discardable_extents,
594 &discard_ctl->discardable_extents);
595
596 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
597 if (discardable_bytes < 0)
598 atomic64_add(-discardable_bytes,
599 &discard_ctl->discardable_bytes);
600
601 if (discardable_extents <= 0) {
602 spin_unlock(&discard_ctl->lock);
603 return;
604 }
605
606 iops_limit = READ_ONCE(discard_ctl->iops_limit);
607
608 if (iops_limit) {
609 delay = MSEC_PER_SEC / iops_limit;
610 } else {
611 /*
612 * Unset iops_limit means go as fast as possible, so allow a
613 * delay of 0.
614 */
615 delay = 0;
616 min_delay = 0;
617 }
618
619 delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
620 discard_ctl->delay_ms = delay;
621
622 spin_unlock(&discard_ctl->lock);
623 }
624
625 /*
626 * Propagate discard counters.
627 *
628 * @block_group: block_group of interest
629 *
630 * Propagate deltas of counters up to the discard_ctl. It maintains a current
631 * counter and a previous counter passing the delta up to the global stat.
632 * Then the current counter value becomes the previous counter value.
633 */
btrfs_discard_update_discardable(struct btrfs_block_group * block_group)634 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
635 {
636 struct btrfs_free_space_ctl *ctl;
637 struct btrfs_discard_ctl *discard_ctl;
638 s32 extents_delta;
639 s64 bytes_delta;
640
641 if (!block_group ||
642 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
643 !btrfs_is_block_group_data_only(block_group))
644 return;
645
646 ctl = block_group->free_space_ctl;
647 discard_ctl = &block_group->fs_info->discard_ctl;
648
649 lockdep_assert_held(&ctl->tree_lock);
650 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
651 ctl->discardable_extents[BTRFS_STAT_PREV];
652 if (extents_delta) {
653 atomic_add(extents_delta, &discard_ctl->discardable_extents);
654 ctl->discardable_extents[BTRFS_STAT_PREV] =
655 ctl->discardable_extents[BTRFS_STAT_CURR];
656 }
657
658 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
659 ctl->discardable_bytes[BTRFS_STAT_PREV];
660 if (bytes_delta) {
661 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
662 ctl->discardable_bytes[BTRFS_STAT_PREV] =
663 ctl->discardable_bytes[BTRFS_STAT_CURR];
664 }
665 }
666
667 /*
668 * Punt unused_bgs list to discard lists.
669 *
670 * @fs_info: fs_info of interest
671 *
672 * The unused_bgs list needs to be punted to the discard lists because the
673 * order of operations is changed. In the normal synchronous discard path, the
674 * block groups are trimmed via a single large trim in transaction commit. This
675 * is ultimately what we are trying to avoid with asynchronous discard. Thus,
676 * it must be done before going down the unused_bgs path.
677 */
btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info * fs_info)678 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
679 {
680 struct btrfs_block_group *block_group, *next;
681
682 spin_lock(&fs_info->unused_bgs_lock);
683 /* We enabled async discard, so punt all to the queue */
684 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
685 bg_list) {
686 list_del_init(&block_group->bg_list);
687 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
688 /*
689 * This put is for the get done by btrfs_mark_bg_unused.
690 * Queueing discard incremented it for discard's reference.
691 */
692 btrfs_put_block_group(block_group);
693 }
694 spin_unlock(&fs_info->unused_bgs_lock);
695 }
696
697 /*
698 * Purge discard lists.
699 *
700 * @discard_ctl: discard control
701 *
702 * If we are disabling async discard, we may have intercepted block groups that
703 * are completely free and ready for the unused_bgs path. As discarding will
704 * now happen in transaction commit or not at all, we can safely mark the
705 * corresponding block groups as unused and they will be sent on their merry
706 * way to the unused_bgs list.
707 */
btrfs_discard_purge_list(struct btrfs_discard_ctl * discard_ctl)708 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
709 {
710 struct btrfs_block_group *block_group, *next;
711 int i;
712
713 spin_lock(&discard_ctl->lock);
714 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
715 list_for_each_entry_safe(block_group, next,
716 &discard_ctl->discard_list[i],
717 discard_list) {
718 list_del_init(&block_group->discard_list);
719 spin_unlock(&discard_ctl->lock);
720 if (block_group->used == 0)
721 btrfs_mark_bg_unused(block_group);
722 spin_lock(&discard_ctl->lock);
723 btrfs_put_block_group(block_group);
724 }
725 }
726 spin_unlock(&discard_ctl->lock);
727 }
728
btrfs_discard_resume(struct btrfs_fs_info * fs_info)729 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
730 {
731 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
732 btrfs_discard_cleanup(fs_info);
733 return;
734 }
735
736 btrfs_discard_punt_unused_bgs_list(fs_info);
737
738 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
739 }
740
btrfs_discard_stop(struct btrfs_fs_info * fs_info)741 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
742 {
743 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
744 }
745
btrfs_discard_init(struct btrfs_fs_info * fs_info)746 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
747 {
748 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
749 int i;
750
751 spin_lock_init(&discard_ctl->lock);
752 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
753
754 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
755 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
756
757 discard_ctl->prev_discard = 0;
758 discard_ctl->prev_discard_time = 0;
759 atomic_set(&discard_ctl->discardable_extents, 0);
760 atomic64_set(&discard_ctl->discardable_bytes, 0);
761 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
762 discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
763 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
764 discard_ctl->kbps_limit = 0;
765 discard_ctl->discard_extent_bytes = 0;
766 discard_ctl->discard_bitmap_bytes = 0;
767 atomic64_set(&discard_ctl->discard_bytes_saved, 0);
768 }
769
btrfs_discard_cleanup(struct btrfs_fs_info * fs_info)770 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
771 {
772 btrfs_discard_stop(fs_info);
773 cancel_delayed_work_sync(&fs_info->discard_ctl.work);
774 btrfs_discard_purge_list(&fs_info->discard_ctl);
775 }
776