xref: /linux/fs/xfs/xfs_zone_space_resv.c (revision c148bc7535650fbfa95a1f571b9ffa2ab478ea33)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2023-2025 Christoph Hellwig.
4  * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
5  */
6 #include "xfs.h"
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_trans_resv.h"
10 #include "xfs_mount.h"
11 #include "xfs_inode.h"
12 #include "xfs_rtbitmap.h"
13 #include "xfs_zone_alloc.h"
14 #include "xfs_zone_priv.h"
15 #include "xfs_zones.h"
16 
17 /*
18  * Note: the zoned allocator does not support a rtextsize > 1, so this code and
19  * the allocator itself uses file system blocks interchangeable with realtime
20  * extents without doing the otherwise required conversions.
21  */
22 
23 /*
24  * Per-task space reservation.
25  *
26  * Tasks that need to wait for GC to free up space allocate one of these
27  * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
28  * The GC thread will then wake the tasks in order when space becomes available.
29  */
30 struct xfs_zone_reservation {
31 	struct list_head	entry;
32 	struct task_struct	*task;
33 	xfs_filblks_t		count_fsb;
34 };
35 
36 /*
37  * Calculate the number of reserved blocks.
38  *
39  * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
40  * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
41  * available for writes without waiting for GC.
42  *
43  * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
44  * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
45  * is further restricted by at least one zone as well as the optional
46  * persistently reserved blocks.  This allows the allocator to run more
47  * smoothly by not always triggering GC.
48  */
49 uint64_t
xfs_zoned_default_resblks(struct xfs_mount * mp,enum xfs_free_counter ctr)50 xfs_zoned_default_resblks(
51 	struct xfs_mount	*mp,
52 	enum xfs_free_counter	ctr)
53 {
54 	switch (ctr) {
55 	case XC_FREE_RTEXTENTS:
56 		return (uint64_t)XFS_RESERVED_ZONES *
57 			mp->m_groups[XG_TYPE_RTG].blocks +
58 			mp->m_sb.sb_rtreserved;
59 	case XC_FREE_RTAVAILABLE:
60 		return (uint64_t)XFS_GC_ZONES *
61 			mp->m_groups[XG_TYPE_RTG].blocks;
62 	default:
63 		ASSERT(0);
64 		return 0;
65 	}
66 }
67 
68 void
xfs_zoned_resv_wake_all(struct xfs_mount * mp)69 xfs_zoned_resv_wake_all(
70 	struct xfs_mount		*mp)
71 {
72 	struct xfs_zone_info		*zi = mp->m_zone_info;
73 	struct xfs_zone_reservation	*reservation;
74 
75 	spin_lock(&zi->zi_reservation_lock);
76 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
77 		wake_up_process(reservation->task);
78 	spin_unlock(&zi->zi_reservation_lock);
79 }
80 
81 void
xfs_zoned_add_available(struct xfs_mount * mp,xfs_filblks_t count_fsb)82 xfs_zoned_add_available(
83 	struct xfs_mount		*mp,
84 	xfs_filblks_t			count_fsb)
85 {
86 	struct xfs_zone_info		*zi = mp->m_zone_info;
87 	struct xfs_zone_reservation	*reservation;
88 
89 	if (list_empty_careful(&zi->zi_reclaim_reservations)) {
90 		xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
91 		return;
92 	}
93 
94 	spin_lock(&zi->zi_reservation_lock);
95 	xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
96 	count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
97 	list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
98 		if (reservation->count_fsb > count_fsb)
99 			break;
100 		wake_up_process(reservation->task);
101 		count_fsb -= reservation->count_fsb;
102 
103 	}
104 	spin_unlock(&zi->zi_reservation_lock);
105 }
106 
107 static int
xfs_zoned_space_wait_error(struct xfs_mount * mp)108 xfs_zoned_space_wait_error(
109 	struct xfs_mount		*mp)
110 {
111 	if (xfs_is_shutdown(mp))
112 		return -EIO;
113 	if (fatal_signal_pending(current))
114 		return -EINTR;
115 	return 0;
116 }
117 
118 static int
xfs_zoned_reserve_available(struct xfs_inode * ip,xfs_filblks_t count_fsb,unsigned int flags)119 xfs_zoned_reserve_available(
120 	struct xfs_inode		*ip,
121 	xfs_filblks_t			count_fsb,
122 	unsigned int			flags)
123 {
124 	struct xfs_mount		*mp = ip->i_mount;
125 	struct xfs_zone_info		*zi = mp->m_zone_info;
126 	struct xfs_zone_reservation	reservation = {
127 		.task		= current,
128 		.count_fsb	= count_fsb,
129 	};
130 	int				error;
131 
132 	/*
133 	 * If there are no waiters, try to directly grab the available blocks
134 	 * from the percpu counter.
135 	 *
136 	 * If the caller wants to dip into the reserved pool also bypass the
137 	 * wait list.  This relies on the fact that we have a very graciously
138 	 * sized reserved pool that always has enough space.  If the reserved
139 	 * allocations fail we're in trouble.
140 	 */
141 	if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
142 	    (flags & XFS_ZR_RESERVED))) {
143 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
144 				flags & XFS_ZR_RESERVED);
145 		if (error != -ENOSPC)
146 			return error;
147 	}
148 
149 	if (flags & XFS_ZR_NOWAIT)
150 		return -EAGAIN;
151 
152 	spin_lock(&zi->zi_reservation_lock);
153 	list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
154 	while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
155 		set_current_state(TASK_KILLABLE);
156 
157 		error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
158 				flags & XFS_ZR_RESERVED);
159 		if (error != -ENOSPC)
160 			break;
161 
162 		/*
163 		 * Make sure to start GC if it is not running already. As we
164 		 * check the rtavailable count when filling up zones, GC is
165 		 * normally already running at this point, but in some setups
166 		 * with very few zones we may completely run out of non-
167 		 * reserved blocks in between filling zones.
168 		 */
169 		if (!xfs_is_zonegc_running(mp))
170 			wake_up_process(zi->zi_gc_thread);
171 
172 		/*
173 		 * If there is no reclaimable group left and we aren't still
174 		 * processing a pending GC request give up as we're fully out
175 		 * of space.
176 		 */
177 		if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
178 		    !xfs_is_zonegc_running(mp))
179 			break;
180 
181 		spin_unlock(&zi->zi_reservation_lock);
182 		schedule();
183 		spin_lock(&zi->zi_reservation_lock);
184 	}
185 	list_del(&reservation.entry);
186 	spin_unlock(&zi->zi_reservation_lock);
187 
188 	__set_current_state(TASK_RUNNING);
189 	return error;
190 }
191 
192 /*
193  * Implement greedy space allocation for short writes by trying to grab all
194  * that is left after locking out other threads from trying to do the same.
195  *
196  * This isn't exactly optimal and can hopefully be replaced by a proper
197  * percpu_counter primitive one day.
198  */
199 static int
xfs_zoned_reserve_extents_greedy(struct xfs_inode * ip,xfs_filblks_t * count_fsb,unsigned int flags)200 xfs_zoned_reserve_extents_greedy(
201 	struct xfs_inode		*ip,
202 	xfs_filblks_t			*count_fsb,
203 	unsigned int			flags)
204 {
205 	struct xfs_mount		*mp = ip->i_mount;
206 	struct xfs_zone_info		*zi = mp->m_zone_info;
207 	s64				len = *count_fsb;
208 	int				error = -ENOSPC;
209 
210 	spin_lock(&zi->zi_reservation_lock);
211 	len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
212 	if (len > 0) {
213 		*count_fsb = len;
214 		error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
215 				flags & XFS_ZR_RESERVED);
216 	}
217 	spin_unlock(&zi->zi_reservation_lock);
218 	return error;
219 }
220 
221 int
xfs_zoned_space_reserve(struct xfs_inode * ip,xfs_filblks_t count_fsb,unsigned int flags,struct xfs_zone_alloc_ctx * ac)222 xfs_zoned_space_reserve(
223 	struct xfs_inode		*ip,
224 	xfs_filblks_t			count_fsb,
225 	unsigned int			flags,
226 	struct xfs_zone_alloc_ctx	*ac)
227 {
228 	struct xfs_mount		*mp = ip->i_mount;
229 	int				error;
230 
231 	ASSERT(ac->reserved_blocks == 0);
232 	ASSERT(ac->open_zone == NULL);
233 
234 	error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
235 			flags & XFS_ZR_RESERVED);
236 	if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
237 		error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags);
238 	if (error)
239 		return error;
240 
241 	error = xfs_zoned_reserve_available(ip, count_fsb, flags);
242 	if (error) {
243 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
244 		return error;
245 	}
246 	ac->reserved_blocks = count_fsb;
247 	return 0;
248 }
249 
250 void
xfs_zoned_space_unreserve(struct xfs_inode * ip,struct xfs_zone_alloc_ctx * ac)251 xfs_zoned_space_unreserve(
252 	struct xfs_inode		*ip,
253 	struct xfs_zone_alloc_ctx	*ac)
254 {
255 	if (ac->reserved_blocks > 0) {
256 		struct xfs_mount	*mp = ip->i_mount;
257 
258 		xfs_zoned_add_available(mp, ac->reserved_blocks);
259 		xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
260 	}
261 	if (ac->open_zone)
262 		xfs_open_zone_put(ac->open_zone);
263 }
264