1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023-2025 Christoph Hellwig. 4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. 5 */ 6 #include "xfs.h" 7 #include "xfs_shared.h" 8 #include "xfs_format.h" 9 #include "xfs_trans_resv.h" 10 #include "xfs_mount.h" 11 #include "xfs_inode.h" 12 #include "xfs_rtbitmap.h" 13 #include "xfs_icache.h" 14 #include "xfs_zone_alloc.h" 15 #include "xfs_zone_priv.h" 16 #include "xfs_zones.h" 17 18 /* 19 * Note: the zoned allocator does not support a rtextsize > 1, so this code and 20 * the allocator itself uses file system blocks interchangeable with realtime 21 * extents without doing the otherwise required conversions. 22 */ 23 24 /* 25 * Per-task space reservation. 26 * 27 * Tasks that need to wait for GC to free up space allocate one of these 28 * on-stack and adds it to the per-mount zi_reclaim_reservations lists. 29 * The GC thread will then wake the tasks in order when space becomes available. 30 */ 31 struct xfs_zone_reservation { 32 struct list_head entry; 33 struct task_struct *task; 34 xfs_filblks_t count_fsb; 35 }; 36 37 /* 38 * Calculate the number of reserved blocks. 39 * 40 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file 41 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly 42 * available for writes without waiting for GC. 43 * 44 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and 45 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS 46 * is further restricted by at least one zone as well as the optional 47 * persistently reserved blocks. This allows the allocator to run more 48 * smoothly by not always triggering GC. 49 */ 50 uint64_t 51 xfs_zoned_default_resblks( 52 struct xfs_mount *mp, 53 enum xfs_free_counter ctr) 54 { 55 switch (ctr) { 56 case XC_FREE_RTEXTENTS: 57 return (uint64_t)XFS_RESERVED_ZONES * 58 mp->m_groups[XG_TYPE_RTG].blocks + 59 mp->m_sb.sb_rtreserved; 60 case XC_FREE_RTAVAILABLE: 61 return (uint64_t)XFS_GC_ZONES * 62 mp->m_groups[XG_TYPE_RTG].blocks; 63 default: 64 ASSERT(0); 65 return 0; 66 } 67 } 68 69 void 70 xfs_zoned_resv_wake_all( 71 struct xfs_mount *mp) 72 { 73 struct xfs_zone_info *zi = mp->m_zone_info; 74 struct xfs_zone_reservation *reservation; 75 76 spin_lock(&zi->zi_reservation_lock); 77 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) 78 wake_up_process(reservation->task); 79 spin_unlock(&zi->zi_reservation_lock); 80 } 81 82 void 83 xfs_zoned_add_available( 84 struct xfs_mount *mp, 85 xfs_filblks_t count_fsb) 86 { 87 struct xfs_zone_info *zi = mp->m_zone_info; 88 struct xfs_zone_reservation *reservation; 89 90 if (list_empty_careful(&zi->zi_reclaim_reservations)) { 91 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 92 return; 93 } 94 95 spin_lock(&zi->zi_reservation_lock); 96 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 97 count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE); 98 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) { 99 if (reservation->count_fsb > count_fsb) 100 break; 101 wake_up_process(reservation->task); 102 count_fsb -= reservation->count_fsb; 103 104 } 105 spin_unlock(&zi->zi_reservation_lock); 106 } 107 108 static int 109 xfs_zoned_space_wait_error( 110 struct xfs_mount *mp) 111 { 112 if (xfs_is_shutdown(mp)) 113 return -EIO; 114 if (fatal_signal_pending(current)) 115 return -EINTR; 116 return 0; 117 } 118 119 static int 120 xfs_zoned_reserve_available( 121 struct xfs_mount *mp, 122 xfs_filblks_t count_fsb, 123 unsigned int flags) 124 { 125 struct xfs_zone_info *zi = mp->m_zone_info; 126 struct xfs_zone_reservation reservation = { 127 .task = current, 128 .count_fsb = count_fsb, 129 }; 130 int error; 131 132 /* 133 * If there are no waiters, try to directly grab the available blocks 134 * from the percpu counter. 135 * 136 * If the caller wants to dip into the reserved pool also bypass the 137 * wait list. This relies on the fact that we have a very graciously 138 * sized reserved pool that always has enough space. If the reserved 139 * allocations fail we're in trouble. 140 */ 141 if (likely(list_empty_careful(&zi->zi_reclaim_reservations) || 142 (flags & XFS_ZR_RESERVED))) { 143 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 144 flags & XFS_ZR_RESERVED); 145 if (error != -ENOSPC) 146 return error; 147 } 148 149 if (flags & XFS_ZR_NOWAIT) 150 return -EAGAIN; 151 152 spin_lock(&zi->zi_reservation_lock); 153 list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations); 154 while ((error = xfs_zoned_space_wait_error(mp)) == 0) { 155 set_current_state(TASK_KILLABLE); 156 157 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 158 flags & XFS_ZR_RESERVED); 159 if (error != -ENOSPC) 160 break; 161 162 /* 163 * Make sure to start GC if it is not running already. As we 164 * check the rtavailable count when filling up zones, GC is 165 * normally already running at this point, but in some setups 166 * with very few zones we may completely run out of non- 167 * reserved blocks in between filling zones. 168 */ 169 if (!xfs_is_zonegc_running(mp)) 170 wake_up_process(zi->zi_gc_thread); 171 172 /* 173 * If there is no reclaimable group left and we aren't still 174 * processing a pending GC request give up as we're fully out 175 * of space. 176 */ 177 if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) && 178 !xfs_is_zonegc_running(mp)) 179 break; 180 181 spin_unlock(&zi->zi_reservation_lock); 182 schedule(); 183 spin_lock(&zi->zi_reservation_lock); 184 } 185 list_del(&reservation.entry); 186 spin_unlock(&zi->zi_reservation_lock); 187 188 __set_current_state(TASK_RUNNING); 189 return error; 190 } 191 192 /* 193 * Implement greedy space allocation for short writes by trying to grab all 194 * that is left after locking out other threads from trying to do the same. 195 * 196 * This isn't exactly optimal and can hopefully be replaced by a proper 197 * percpu_counter primitive one day. 198 */ 199 static int 200 xfs_zoned_reserve_extents_greedy( 201 struct xfs_mount *mp, 202 xfs_filblks_t *count_fsb, 203 unsigned int flags) 204 { 205 struct xfs_zone_info *zi = mp->m_zone_info; 206 s64 len = *count_fsb; 207 int error = -ENOSPC; 208 209 spin_lock(&zi->zi_reservation_lock); 210 len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 211 if (len > 0) { 212 *count_fsb = len; 213 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb, 214 flags & XFS_ZR_RESERVED); 215 } 216 spin_unlock(&zi->zi_reservation_lock); 217 return error; 218 } 219 220 int 221 xfs_zoned_space_reserve( 222 struct xfs_mount *mp, 223 xfs_filblks_t count_fsb, 224 unsigned int flags, 225 struct xfs_zone_alloc_ctx *ac) 226 { 227 int error; 228 229 ASSERT(ac->reserved_blocks == 0); 230 ASSERT(ac->open_zone == NULL); 231 232 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, 233 flags & XFS_ZR_RESERVED); 234 if (error == -ENOSPC && !(flags & XFS_ZR_NOWAIT)) { 235 xfs_inodegc_flush(mp); 236 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, 237 flags & XFS_ZR_RESERVED); 238 } 239 if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) 240 error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); 241 if (error) 242 return error; 243 244 error = xfs_zoned_reserve_available(mp, count_fsb, flags); 245 if (error) { 246 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb); 247 return error; 248 } 249 ac->reserved_blocks = count_fsb; 250 return 0; 251 } 252 253 void 254 xfs_zoned_space_unreserve( 255 struct xfs_mount *mp, 256 struct xfs_zone_alloc_ctx *ac) 257 { 258 if (ac->reserved_blocks > 0) { 259 xfs_zoned_add_available(mp, ac->reserved_blocks); 260 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks); 261 } 262 if (ac->open_zone) 263 xfs_open_zone_put(ac->open_zone); 264 } 265