1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023-2025 Christoph Hellwig. 4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. 5 */ 6 #include "xfs.h" 7 #include "xfs_shared.h" 8 #include "xfs_format.h" 9 #include "xfs_trans_resv.h" 10 #include "xfs_mount.h" 11 #include "xfs_inode.h" 12 #include "xfs_rtbitmap.h" 13 #include "xfs_icache.h" 14 #include "xfs_zone_alloc.h" 15 #include "xfs_zone_priv.h" 16 #include "xfs_zones.h" 17 18 /* 19 * Note: the zoned allocator does not support a rtextsize > 1, so this code and 20 * the allocator itself uses file system blocks interchangeable with realtime 21 * extents without doing the otherwise required conversions. 22 */ 23 24 /* 25 * Per-task space reservation. 26 * 27 * Tasks that need to wait for GC to free up space allocate one of these 28 * on-stack and adds it to the per-mount zi_reclaim_reservations lists. 29 * The GC thread will then wake the tasks in order when space becomes available. 30 */ 31 struct xfs_zone_reservation { 32 struct list_head entry; 33 struct task_struct *task; 34 xfs_filblks_t count_fsb; 35 }; 36 37 /* 38 * Calculate the number of reserved blocks. 39 * 40 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file 41 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly 42 * available for writes without waiting for GC. 43 * 44 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and 45 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS 46 * is further restricted by at least one zone as well as the optional 47 * persistently reserved blocks. This allows the allocator to run more 48 * smoothly by not always triggering GC. 49 */ 50 uint64_t 51 xfs_zoned_default_resblks( 52 struct xfs_mount *mp, 53 enum xfs_free_counter ctr) 54 { 55 switch (ctr) { 56 case XC_FREE_RTEXTENTS: 57 return xfs_rtgs_to_rfsbs(mp, XFS_RESERVED_ZONES) + 58 mp->m_sb.sb_rtreserved; 59 case XC_FREE_RTAVAILABLE: 60 return xfs_rtgs_to_rfsbs(mp, XFS_GC_ZONES); 61 default: 62 ASSERT(0); 63 return 0; 64 } 65 } 66 67 void 68 xfs_zoned_resv_wake_all( 69 struct xfs_mount *mp) 70 { 71 struct xfs_zone_info *zi = mp->m_zone_info; 72 struct xfs_zone_reservation *reservation; 73 74 spin_lock(&zi->zi_reservation_lock); 75 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) 76 wake_up_process(reservation->task); 77 spin_unlock(&zi->zi_reservation_lock); 78 } 79 80 void 81 xfs_zoned_add_available( 82 struct xfs_mount *mp, 83 xfs_filblks_t count_fsb) 84 { 85 struct xfs_zone_info *zi = mp->m_zone_info; 86 struct xfs_zone_reservation *reservation; 87 88 if (list_empty_careful(&zi->zi_reclaim_reservations)) { 89 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 90 return; 91 } 92 93 spin_lock(&zi->zi_reservation_lock); 94 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 95 count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE); 96 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) { 97 if (reservation->count_fsb > count_fsb) 98 break; 99 wake_up_process(reservation->task); 100 count_fsb -= reservation->count_fsb; 101 102 } 103 spin_unlock(&zi->zi_reservation_lock); 104 } 105 106 static int 107 xfs_zoned_space_wait_error( 108 struct xfs_mount *mp) 109 { 110 if (xfs_is_shutdown(mp)) 111 return -EIO; 112 if (fatal_signal_pending(current)) 113 return -EINTR; 114 return 0; 115 } 116 117 static int 118 xfs_zoned_reserve_available( 119 struct xfs_mount *mp, 120 xfs_filblks_t count_fsb, 121 unsigned int flags) 122 { 123 struct xfs_zone_info *zi = mp->m_zone_info; 124 struct xfs_zone_reservation reservation = { 125 .task = current, 126 .count_fsb = count_fsb, 127 }; 128 int error; 129 130 /* 131 * If there are no waiters, try to directly grab the available blocks 132 * from the percpu counter. 133 * 134 * If the caller wants to dip into the reserved pool also bypass the 135 * wait list. This relies on the fact that we have a very graciously 136 * sized reserved pool that always has enough space. If the reserved 137 * allocations fail we're in trouble. 138 */ 139 if (likely(list_empty_careful(&zi->zi_reclaim_reservations) || 140 (flags & XFS_ZR_RESERVED))) { 141 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 142 flags & XFS_ZR_RESERVED); 143 if (error != -ENOSPC) 144 return error; 145 } 146 147 if (flags & XFS_ZR_NOWAIT) 148 return -EAGAIN; 149 150 spin_lock(&zi->zi_reservation_lock); 151 list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations); 152 while ((error = xfs_zoned_space_wait_error(mp)) == 0) { 153 set_current_state(TASK_KILLABLE); 154 155 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 156 flags & XFS_ZR_RESERVED); 157 if (error != -ENOSPC) 158 break; 159 160 /* 161 * Make sure to start GC if it is not running already. As we 162 * check the rtavailable count when filling up zones, GC is 163 * normally already running at this point, but in some setups 164 * with very few zones we may completely run out of non- 165 * reserved blocks in between filling zones. 166 */ 167 if (!xfs_is_zonegc_running(mp)) 168 wake_up_process(zi->zi_gc_thread); 169 170 /* 171 * If there is no reclaimable group left and we aren't still 172 * processing a pending GC request give up as we're fully out 173 * of space. 174 */ 175 if (!xfs_zoned_have_reclaimable(mp->m_zone_info) && 176 !xfs_is_zonegc_running(mp)) 177 break; 178 179 spin_unlock(&zi->zi_reservation_lock); 180 schedule(); 181 spin_lock(&zi->zi_reservation_lock); 182 } 183 list_del(&reservation.entry); 184 spin_unlock(&zi->zi_reservation_lock); 185 186 __set_current_state(TASK_RUNNING); 187 return error; 188 } 189 190 /* 191 * Implement greedy space allocation for short writes by trying to grab all 192 * that is left after locking out other threads from trying to do the same. 193 * 194 * This isn't exactly optimal and can hopefully be replaced by a proper 195 * percpu_counter primitive one day. 196 */ 197 static int 198 xfs_zoned_reserve_extents_greedy( 199 struct xfs_mount *mp, 200 xfs_filblks_t *count_fsb, 201 unsigned int flags) 202 { 203 struct xfs_zone_info *zi = mp->m_zone_info; 204 s64 len = *count_fsb; 205 int error = -ENOSPC; 206 207 spin_lock(&zi->zi_reservation_lock); 208 len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 209 if (len > 0) { 210 *count_fsb = len; 211 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb, 212 flags & XFS_ZR_RESERVED); 213 } 214 spin_unlock(&zi->zi_reservation_lock); 215 return error; 216 } 217 218 int 219 xfs_zoned_space_reserve( 220 struct xfs_mount *mp, 221 xfs_filblks_t count_fsb, 222 unsigned int flags, 223 struct xfs_zone_alloc_ctx *ac) 224 { 225 int error; 226 227 ASSERT(ac->reserved_blocks == 0); 228 ASSERT(ac->open_zone == NULL); 229 230 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, 231 flags & XFS_ZR_RESERVED); 232 if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) { 233 xfs_inodegc_flush(mp); 234 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, 235 flags & XFS_ZR_RESERVED); 236 } 237 if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) 238 error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); 239 if (error) 240 return error; 241 242 error = xfs_zoned_reserve_available(mp, count_fsb, flags); 243 if (error) { 244 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb); 245 return error; 246 } 247 ac->reserved_blocks = count_fsb; 248 return 0; 249 } 250 251 void 252 xfs_zoned_space_unreserve( 253 struct xfs_mount *mp, 254 struct xfs_zone_alloc_ctx *ac) 255 { 256 if (ac->reserved_blocks > 0) { 257 xfs_zoned_add_available(mp, ac->reserved_blocks); 258 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks); 259 } 260 if (ac->open_zone) 261 xfs_open_zone_put(ac->open_zone); 262 } 263