1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2023-2025 Christoph Hellwig.
4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates.
5 */
6 #include "xfs.h"
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_trans_resv.h"
10 #include "xfs_mount.h"
11 #include "xfs_inode.h"
12 #include "xfs_rtbitmap.h"
13 #include "xfs_icache.h"
14 #include "xfs_zone_alloc.h"
15 #include "xfs_zone_priv.h"
16 #include "xfs_zones.h"
17
18 /*
19 * Note: the zoned allocator does not support a rtextsize > 1, so this code and
20 * the allocator itself uses file system blocks interchangeable with realtime
21 * extents without doing the otherwise required conversions.
22 */
23
24 /*
25 * Per-task space reservation.
26 *
27 * Tasks that need to wait for GC to free up space allocate one of these
28 * on-stack and adds it to the per-mount zi_reclaim_reservations lists.
29 * The GC thread will then wake the tasks in order when space becomes available.
30 */
31 struct xfs_zone_reservation {
32 struct list_head entry;
33 struct task_struct *task;
34 xfs_filblks_t count_fsb;
35 };
36
37 /*
38 * Calculate the number of reserved blocks.
39 *
40 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file
41 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly
42 * available for writes without waiting for GC.
43 *
44 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and
45 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS
46 * is further restricted by at least one zone as well as the optional
47 * persistently reserved blocks. This allows the allocator to run more
48 * smoothly by not always triggering GC.
49 */
50 uint64_t
xfs_zoned_default_resblks(struct xfs_mount * mp,enum xfs_free_counter ctr)51 xfs_zoned_default_resblks(
52 struct xfs_mount *mp,
53 enum xfs_free_counter ctr)
54 {
55 switch (ctr) {
56 case XC_FREE_RTEXTENTS:
57 return (uint64_t)XFS_RESERVED_ZONES *
58 mp->m_groups[XG_TYPE_RTG].blocks +
59 mp->m_sb.sb_rtreserved;
60 case XC_FREE_RTAVAILABLE:
61 return (uint64_t)XFS_GC_ZONES *
62 mp->m_groups[XG_TYPE_RTG].blocks;
63 default:
64 ASSERT(0);
65 return 0;
66 }
67 }
68
69 void
xfs_zoned_resv_wake_all(struct xfs_mount * mp)70 xfs_zoned_resv_wake_all(
71 struct xfs_mount *mp)
72 {
73 struct xfs_zone_info *zi = mp->m_zone_info;
74 struct xfs_zone_reservation *reservation;
75
76 spin_lock(&zi->zi_reservation_lock);
77 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry)
78 wake_up_process(reservation->task);
79 spin_unlock(&zi->zi_reservation_lock);
80 }
81
82 void
xfs_zoned_add_available(struct xfs_mount * mp,xfs_filblks_t count_fsb)83 xfs_zoned_add_available(
84 struct xfs_mount *mp,
85 xfs_filblks_t count_fsb)
86 {
87 struct xfs_zone_info *zi = mp->m_zone_info;
88 struct xfs_zone_reservation *reservation;
89
90 if (list_empty_careful(&zi->zi_reclaim_reservations)) {
91 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
92 return;
93 }
94
95 spin_lock(&zi->zi_reservation_lock);
96 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb);
97 count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE);
98 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) {
99 if (reservation->count_fsb > count_fsb)
100 break;
101 wake_up_process(reservation->task);
102 count_fsb -= reservation->count_fsb;
103
104 }
105 spin_unlock(&zi->zi_reservation_lock);
106 }
107
108 static int
xfs_zoned_space_wait_error(struct xfs_mount * mp)109 xfs_zoned_space_wait_error(
110 struct xfs_mount *mp)
111 {
112 if (xfs_is_shutdown(mp))
113 return -EIO;
114 if (fatal_signal_pending(current))
115 return -EINTR;
116 return 0;
117 }
118
119 static int
xfs_zoned_reserve_available(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags)120 xfs_zoned_reserve_available(
121 struct xfs_mount *mp,
122 xfs_filblks_t count_fsb,
123 unsigned int flags)
124 {
125 struct xfs_zone_info *zi = mp->m_zone_info;
126 struct xfs_zone_reservation reservation = {
127 .task = current,
128 .count_fsb = count_fsb,
129 };
130 int error;
131
132 /*
133 * If there are no waiters, try to directly grab the available blocks
134 * from the percpu counter.
135 *
136 * If the caller wants to dip into the reserved pool also bypass the
137 * wait list. This relies on the fact that we have a very graciously
138 * sized reserved pool that always has enough space. If the reserved
139 * allocations fail we're in trouble.
140 */
141 if (likely(list_empty_careful(&zi->zi_reclaim_reservations) ||
142 (flags & XFS_ZR_RESERVED))) {
143 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
144 flags & XFS_ZR_RESERVED);
145 if (error != -ENOSPC)
146 return error;
147 }
148
149 if (flags & XFS_ZR_NOWAIT)
150 return -EAGAIN;
151
152 spin_lock(&zi->zi_reservation_lock);
153 list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations);
154 while ((error = xfs_zoned_space_wait_error(mp)) == 0) {
155 set_current_state(TASK_KILLABLE);
156
157 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb,
158 flags & XFS_ZR_RESERVED);
159 if (error != -ENOSPC)
160 break;
161
162 /*
163 * Make sure to start GC if it is not running already. As we
164 * check the rtavailable count when filling up zones, GC is
165 * normally already running at this point, but in some setups
166 * with very few zones we may completely run out of non-
167 * reserved blocks in between filling zones.
168 */
169 if (!xfs_is_zonegc_running(mp))
170 wake_up_process(zi->zi_gc_thread);
171
172 /*
173 * If there is no reclaimable group left and we aren't still
174 * processing a pending GC request give up as we're fully out
175 * of space.
176 */
177 if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) &&
178 !xfs_is_zonegc_running(mp))
179 break;
180
181 spin_unlock(&zi->zi_reservation_lock);
182 schedule();
183 spin_lock(&zi->zi_reservation_lock);
184 }
185 list_del(&reservation.entry);
186 spin_unlock(&zi->zi_reservation_lock);
187
188 __set_current_state(TASK_RUNNING);
189 return error;
190 }
191
192 /*
193 * Implement greedy space allocation for short writes by trying to grab all
194 * that is left after locking out other threads from trying to do the same.
195 *
196 * This isn't exactly optimal and can hopefully be replaced by a proper
197 * percpu_counter primitive one day.
198 */
199 static int
xfs_zoned_reserve_extents_greedy(struct xfs_mount * mp,xfs_filblks_t * count_fsb,unsigned int flags)200 xfs_zoned_reserve_extents_greedy(
201 struct xfs_mount *mp,
202 xfs_filblks_t *count_fsb,
203 unsigned int flags)
204 {
205 struct xfs_zone_info *zi = mp->m_zone_info;
206 s64 len = *count_fsb;
207 int error = -ENOSPC;
208
209 spin_lock(&zi->zi_reservation_lock);
210 len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
211 if (len > 0) {
212 *count_fsb = len;
213 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb,
214 flags & XFS_ZR_RESERVED);
215 }
216 spin_unlock(&zi->zi_reservation_lock);
217 return error;
218 }
219
220 int
xfs_zoned_space_reserve(struct xfs_mount * mp,xfs_filblks_t count_fsb,unsigned int flags,struct xfs_zone_alloc_ctx * ac)221 xfs_zoned_space_reserve(
222 struct xfs_mount *mp,
223 xfs_filblks_t count_fsb,
224 unsigned int flags,
225 struct xfs_zone_alloc_ctx *ac)
226 {
227 int error;
228
229 ASSERT(ac->reserved_blocks == 0);
230 ASSERT(ac->open_zone == NULL);
231
232 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
233 flags & XFS_ZR_RESERVED);
234 if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) {
235 xfs_inodegc_flush(mp);
236 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb,
237 flags & XFS_ZR_RESERVED);
238 }
239 if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1)
240 error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags);
241 if (error)
242 return error;
243
244 error = xfs_zoned_reserve_available(mp, count_fsb, flags);
245 if (error) {
246 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb);
247 return error;
248 }
249 ac->reserved_blocks = count_fsb;
250 return 0;
251 }
252
253 void
xfs_zoned_space_unreserve(struct xfs_mount * mp,struct xfs_zone_alloc_ctx * ac)254 xfs_zoned_space_unreserve(
255 struct xfs_mount *mp,
256 struct xfs_zone_alloc_ctx *ac)
257 {
258 if (ac->reserved_blocks > 0) {
259 xfs_zoned_add_available(mp, ac->reserved_blocks);
260 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks);
261 }
262 if (ac->open_zone)
263 xfs_open_zone_put(ac->open_zone);
264 }
265