xref: /linux/fs/xfs/xfs_zone_space_resv.c (revision 469447200aed04c383189b64aa07070be052c48a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023-2025 Christoph Hellwig.
4  * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
5  */
6 #include "xfs.h"
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_trans_resv.h"
10 #include "xfs_mount.h"
11 #include "xfs_inode.h"
12 #include "xfs_rtbitmap.h"
13 #include "xfs_icache.h"
14 #include "xfs_zone_alloc.h"
15 #include "xfs_zone_priv.h"
16 #include "xfs_zones.h"
17 
18 /*
19  * Note: the zoned allocator does not support a rtextsize > 1, so this code and
20  * the allocator itself uses file system blocks interchangeable with realtime
21  * extents without doing the otherwise required conversions.
22  */
23 
24 /*
25  * Per-task space reservation.
26  *
27  * Tasks that need to wait for GC to free up space allocate one of these
28  * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
29  * The GC thread will then wake the tasks in order when space becomes available.
30  */
31 struct xfs_zone_reservation {
32 	struct list_head	entry;
33 	struct task_struct	*task;
34 	xfs_filblks_t		count_fsb;
35 };
36 
37 /*
38  * Calculate the number of reserved blocks.
39  *
40  * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
41  * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
42  * available for writes without waiting for GC.
43  *
44  * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
45  * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
46  * is further restricted by at least one zone as well as the optional
47  * persistently reserved blocks.  This allows the allocator to run more
48  * smoothly by not always triggering GC.
49  */
50 uint64_t
xfs_zoned_default_resblks(struct xfs_mount * mp,enum xfs_free_counter ctr)51 xfs_zoned_default_resblks(
52 	struct xfs_mount	*mp,
53 	enum xfs_free_counter	ctr)
54 {
55 	switch (ctr) {
56 	case XC_FREE_RTEXTENTS:
57 		return (uint64_t)XFS_RESERVED_ZONES *
58 			mp->m_groups[XG_TYPE_RTG].blocks +
59 			mp->m_sb.sb_rtreserved;
60 	case XC_FREE_RTAVAILABLE:
61 		return (uint64_t)XFS_GC_ZONES *
62 			mp->m_groups[XG_TYPE_RTG].blocks;
63 	default:
64 		ASSERT(0);
65 		return 0;
66 	}
67 }
68 
69 void
xfs_zoned_resv_wake_all(struct xfs_mount * mp)70 xfs_zoned_resv_wake_all(
71 	struct xfs_mount		*mp)
72 {
73 	struct xfs_zone_info		*zi = mp->m_zone_info;
74 	struct xfs_zone_reservation	*reservation;
75 
76 	spin_lock(&zi->zi_reservation_lock);
77 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
78 		wake_up_process(reservation->task);
79 	spin_unlock(&zi->zi_reservation_lock);
80 }
81 
82 void
xfs_zoned_add_available(struct xfs_mount * mp,xfs_filblks_t count_fsb)83 xfs_zoned_add_available(
84 	struct xfs_mount		*mp,
85 	xfs_filblks_t			count_fsb)
86 {
87 	struct xfs_zone_info		*zi = mp->m_zone_info;
88 	struct xfs_zone_reservation	*reservation;
89 
90 	if (list_empty_careful(&zi->zi_reclaim_reservations)) {
91 		xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
92 		return;
93 	}
94 
95 	spin_lock(&zi->zi_reservation_lock);
96 	xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
97 	count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
98 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
99 		if (reservation->count_fsb > count_fsb)
100 			break;
101 		wake_up_process(reservation->task);
102 		count_fsb -= reservation->count_fsb;
103 
104 	}
105 	spin_unlock(&zi->zi_reservation_lock);
106 }
107 
108 static int
xfs_zoned_space_wait_error(struct xfs_mount * mp)109 xfs_zoned_space_wait_error(
110 	struct xfs_mount		*mp)
111 {
112 	if (xfs_is_shutdown(mp))
113 		return -EIO;
114 	if (fatal_signal_pending(current))
115 		return -EINTR;
116 	return 0;
117 }
118 
119 static int
xfs_zoned_reserve_available(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags)120 xfs_zoned_reserve_available(
121 	struct xfs_mount		*mp,
122 	xfs_filblks_t			count_fsb,
123 	unsigned int			flags)
124 {
125 	struct xfs_zone_info		*zi = mp->m_zone_info;
126 	struct xfs_zone_reservation	reservation = {
127 		.task		= current,
128 		.count_fsb	= count_fsb,
129 	};
130 	int				error;
131 
132 	/*
133 	 * If there are no waiters, try to directly grab the available blocks
134 	 * from the percpu counter.
135 	 *
136 	 * If the caller wants to dip into the reserved pool also bypass the
137 	 * wait list.  This relies on the fact that we have a very graciously
138 	 * sized reserved pool that always has enough space.  If the reserved
139 	 * allocations fail we're in trouble.
140 	 */
141 	if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
142 	    (flags & XFS_ZR_RESERVED))) {
143 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
144 				flags & XFS_ZR_RESERVED);
145 		if (error != -ENOSPC)
146 			return error;
147 	}
148 
149 	if (flags & XFS_ZR_NOWAIT)
150 		return -EAGAIN;
151 
152 	spin_lock(&zi->zi_reservation_lock);
153 	list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
154 	while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
155 		set_current_state(TASK_KILLABLE);
156 
157 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
158 				flags & XFS_ZR_RESERVED);
159 		if (error != -ENOSPC)
160 			break;
161 
162 		/*
163 		 * Make sure to start GC if it is not running already. As we
164 		 * check the rtavailable count when filling up zones, GC is
165 		 * normally already running at this point, but in some setups
166 		 * with very few zones we may completely run out of non-
167 		 * reserved blocks in between filling zones.
168 		 */
169 		if (!xfs_is_zonegc_running(mp))
170 			wake_up_process(zi->zi_gc_thread);
171 
172 		/*
173 		 * If there is no reclaimable group left and we aren't still
174 		 * processing a pending GC request give up as we're fully out
175 		 * of space.
176 		 */
177 		if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
178 		    !xfs_is_zonegc_running(mp))
179 			break;
180 
181 		spin_unlock(&zi->zi_reservation_lock);
182 		schedule();
183 		spin_lock(&zi->zi_reservation_lock);
184 	}
185 	list_del(&reservation.entry);
186 	spin_unlock(&zi->zi_reservation_lock);
187 
188 	__set_current_state(TASK_RUNNING);
189 	return error;
190 }
191 
192 /*
193  * Implement greedy space allocation for short writes by trying to grab all
194  * that is left after locking out other threads from trying to do the same.
195  *
196  * This isn't exactly optimal and can hopefully be replaced by a proper
197  * percpu_counter primitive one day.
198  */
199 static int
xfs_zoned_reserve_extents_greedy(struct xfs_mount * mp,xfs_filblks_t * count_fsb,unsigned int flags)200 xfs_zoned_reserve_extents_greedy(
201 	struct xfs_mount		*mp,
202 	xfs_filblks_t			*count_fsb,
203 	unsigned int			flags)
204 {
205 	struct xfs_zone_info		*zi = mp->m_zone_info;
206 	s64				len = *count_fsb;
207 	int				error = -ENOSPC;
208 
209 	spin_lock(&zi->zi_reservation_lock);
210 	len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
211 	if (len > 0) {
212 		*count_fsb = len;
213 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
214 				flags & XFS_ZR_RESERVED);
215 	}
216 	spin_unlock(&zi->zi_reservation_lock);
217 	return error;
218 }
219 
220 int
xfs_zoned_space_reserve(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags,struct xfs_zone_alloc_ctx * ac)221 xfs_zoned_space_reserve(
222 	struct xfs_mount		*mp,
223 	xfs_filblks_t			count_fsb,
224 	unsigned int			flags,
225 	struct xfs_zone_alloc_ctx	*ac)
226 {
227 	int				error;
228 
229 	ASSERT(ac->reserved_blocks == 0);
230 	ASSERT(ac->open_zone == NULL);
231 
232 	error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
233 			flags & XFS_ZR_RESERVED);
234 	if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) {
235 		xfs_inodegc_flush(mp);
236 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
237 				flags & XFS_ZR_RESERVED);
238 	}
239 	if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
240 		error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
241 	if (error)
242 		return error;
243 
244 	error = xfs_zoned_reserve_available(mp, count_fsb, flags);
245 	if (error) {
246 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
247 		return error;
248 	}
249 	ac->reserved_blocks = count_fsb;
250 	return 0;
251 }
252 
253 void
xfs_zoned_space_unreserve(struct xfs_mount * mp,struct xfs_zone_alloc_ctx * ac)254 xfs_zoned_space_unreserve(
255 	struct xfs_mount		*mp,
256 	struct xfs_zone_alloc_ctx	*ac)
257 {
258 	if (ac->reserved_blocks > 0) {
259 		xfs_zoned_add_available(mp, ac->reserved_blocks);
260 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
261 	}
262 	if (ac->open_zone)
263 		xfs_open_zone_put(ac->open_zone);
264 }
265