xref: /linux/fs/xfs/xfs_zone_space_resv.c (revision 3ed1c68307c4ce53256e15b8a8830b12bdba1ff5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023-2025 Christoph Hellwig.
4  * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
5  */
6 #include "xfs.h"
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_trans_resv.h"
10 #include "xfs_mount.h"
11 #include "xfs_inode.h"
12 #include "xfs_rtbitmap.h"
13 #include "xfs_icache.h"
14 #include "xfs_zone_alloc.h"
15 #include "xfs_zone_priv.h"
16 #include "xfs_zones.h"
17 
18 /*
19  * Note: the zoned allocator does not support a rtextsize > 1, so this code and
20  * the allocator itself uses file system blocks interchangeable with realtime
21  * extents without doing the otherwise required conversions.
22  */
23 
24 /*
25  * Per-task space reservation.
26  *
27  * Tasks that need to wait for GC to free up space allocate one of these
28  * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
29  * The GC thread will then wake the tasks in order when space becomes available.
30  */
31 struct xfs_zone_reservation {
32 	struct list_head	entry;
33 	struct task_struct	*task;
34 	xfs_filblks_t		count_fsb;
35 };
36 
37 /*
38  * Calculate the number of reserved blocks.
39  *
40  * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
41  * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
42  * available for writes without waiting for GC.
43  *
44  * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
45  * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
46  * is further restricted by at least one zone as well as the optional
47  * persistently reserved blocks.  This allows the allocator to run more
48  * smoothly by not always triggering GC.
49  */
50 uint64_t
51 xfs_zoned_default_resblks(
52 	struct xfs_mount	*mp,
53 	enum xfs_free_counter	ctr)
54 {
55 	switch (ctr) {
56 	case XC_FREE_RTEXTENTS:
57 		return xfs_rtgs_to_rfsbs(mp, XFS_RESERVED_ZONES) +
58 				mp->m_sb.sb_rtreserved;
59 	case XC_FREE_RTAVAILABLE:
60 		return xfs_rtgs_to_rfsbs(mp, XFS_GC_ZONES);
61 	default:
62 		ASSERT(0);
63 		return 0;
64 	}
65 }
66 
67 void
68 xfs_zoned_resv_wake_all(
69 	struct xfs_mount		*mp)
70 {
71 	struct xfs_zone_info		*zi = mp->m_zone_info;
72 	struct xfs_zone_reservation	*reservation;
73 
74 	spin_lock(&zi->zi_reservation_lock);
75 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
76 		wake_up_process(reservation->task);
77 	spin_unlock(&zi->zi_reservation_lock);
78 }
79 
80 void
81 xfs_zoned_add_available(
82 	struct xfs_mount		*mp,
83 	xfs_filblks_t			count_fsb)
84 {
85 	struct xfs_zone_info		*zi = mp->m_zone_info;
86 	struct xfs_zone_reservation	*reservation;
87 
88 	if (list_empty_careful(&zi->zi_reclaim_reservations)) {
89 		xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
90 		return;
91 	}
92 
93 	spin_lock(&zi->zi_reservation_lock);
94 	xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
95 	count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
96 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
97 		if (reservation->count_fsb > count_fsb)
98 			break;
99 		wake_up_process(reservation->task);
100 		count_fsb -= reservation->count_fsb;
101 
102 	}
103 	spin_unlock(&zi->zi_reservation_lock);
104 }
105 
106 static int
107 xfs_zoned_space_wait_error(
108 	struct xfs_mount		*mp)
109 {
110 	if (xfs_is_shutdown(mp))
111 		return -EIO;
112 	if (fatal_signal_pending(current))
113 		return -EINTR;
114 	return 0;
115 }
116 
117 static int
118 xfs_zoned_reserve_available(
119 	struct xfs_mount		*mp,
120 	xfs_filblks_t			count_fsb,
121 	unsigned int			flags)
122 {
123 	struct xfs_zone_info		*zi = mp->m_zone_info;
124 	struct xfs_zone_reservation	reservation = {
125 		.task		= current,
126 		.count_fsb	= count_fsb,
127 	};
128 	int				error;
129 
130 	/*
131 	 * If there are no waiters, try to directly grab the available blocks
132 	 * from the percpu counter.
133 	 *
134 	 * If the caller wants to dip into the reserved pool also bypass the
135 	 * wait list.  This relies on the fact that we have a very graciously
136 	 * sized reserved pool that always has enough space.  If the reserved
137 	 * allocations fail we're in trouble.
138 	 */
139 	if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
140 	    (flags & XFS_ZR_RESERVED))) {
141 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
142 				flags & XFS_ZR_RESERVED);
143 		if (error != -ENOSPC)
144 			return error;
145 	}
146 
147 	if (flags & XFS_ZR_NOWAIT)
148 		return -EAGAIN;
149 
150 	spin_lock(&zi->zi_reservation_lock);
151 	list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
152 	while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
153 		set_current_state(TASK_KILLABLE);
154 
155 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
156 				flags & XFS_ZR_RESERVED);
157 		if (error != -ENOSPC)
158 			break;
159 
160 		/*
161 		 * Make sure to start GC if it is not running already. As we
162 		 * check the rtavailable count when filling up zones, GC is
163 		 * normally already running at this point, but in some setups
164 		 * with very few zones we may completely run out of non-
165 		 * reserved blocks in between filling zones.
166 		 */
167 		if (!xfs_is_zonegc_running(mp))
168 			wake_up_process(zi->zi_gc_thread);
169 
170 		/*
171 		 * If there is no reclaimable group left and we aren't still
172 		 * processing a pending GC request give up as we're fully out
173 		 * of space.
174 		 */
175 		if (!xfs_zoned_have_reclaimable(mp->m_zone_info) &&
176 		    !xfs_is_zonegc_running(mp))
177 			break;
178 
179 		spin_unlock(&zi->zi_reservation_lock);
180 		schedule();
181 		spin_lock(&zi->zi_reservation_lock);
182 	}
183 	list_del(&reservation.entry);
184 	spin_unlock(&zi->zi_reservation_lock);
185 
186 	__set_current_state(TASK_RUNNING);
187 	return error;
188 }
189 
190 /*
191  * Implement greedy space allocation for short writes by trying to grab all
192  * that is left after locking out other threads from trying to do the same.
193  *
194  * This isn't exactly optimal and can hopefully be replaced by a proper
195  * percpu_counter primitive one day.
196  */
197 static int
198 xfs_zoned_reserve_extents_greedy(
199 	struct xfs_mount		*mp,
200 	xfs_filblks_t			*count_fsb,
201 	unsigned int			flags)
202 {
203 	struct xfs_zone_info		*zi = mp->m_zone_info;
204 	s64				len = *count_fsb;
205 	int				error = -ENOSPC;
206 
207 	spin_lock(&zi->zi_reservation_lock);
208 	len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
209 	if (len > 0) {
210 		*count_fsb = len;
211 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
212 				flags & XFS_ZR_RESERVED);
213 	}
214 	spin_unlock(&zi->zi_reservation_lock);
215 	return error;
216 }
217 
218 int
219 xfs_zoned_space_reserve(
220 	struct xfs_mount		*mp,
221 	xfs_filblks_t			count_fsb,
222 	unsigned int			flags,
223 	struct xfs_zone_alloc_ctx	*ac)
224 {
225 	int				error;
226 
227 	ASSERT(ac->reserved_blocks == 0);
228 	ASSERT(ac->open_zone == NULL);
229 
230 	error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
231 			flags & XFS_ZR_RESERVED);
232 	if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) {
233 		xfs_inodegc_flush(mp);
234 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
235 				flags & XFS_ZR_RESERVED);
236 	}
237 	if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
238 		error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
239 	if (error)
240 		return error;
241 
242 	error = xfs_zoned_reserve_available(mp, count_fsb, flags);
243 	if (error) {
244 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
245 		return error;
246 	}
247 	ac->reserved_blocks = count_fsb;
248 	return 0;
249 }
250 
251 void
252 xfs_zoned_space_unreserve(
253 	struct xfs_mount		*mp,
254 	struct xfs_zone_alloc_ctx	*ac)
255 {
256 	if (ac->reserved_blocks > 0) {
257 		xfs_zoned_add_available(mp, ac->reserved_blocks);
258 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
259 	}
260 	if (ac->open_zone)
261 		xfs_open_zone_put(ac->open_zone);
262 }
263