1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2023-2025 Christoph Hellwig. 4 * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. 5 */ 6 #include "xfs.h" 7 #include "xfs_shared.h" 8 #include "xfs_format.h" 9 #include "xfs_trans_resv.h" 10 #include "xfs_mount.h" 11 #include "xfs_inode.h" 12 #include "xfs_rtbitmap.h" 13 #include "xfs_zone_alloc.h" 14 #include "xfs_zone_priv.h" 15 #include "xfs_zones.h" 16 17 /* 18 * Note: the zoned allocator does not support a rtextsize > 1, so this code and 19 * the allocator itself uses file system blocks interchangeable with realtime 20 * extents without doing the otherwise required conversions. 21 */ 22 23 /* 24 * Per-task space reservation. 25 * 26 * Tasks that need to wait for GC to free up space allocate one of these 27 * on-stack and adds it to the per-mount zi_reclaim_reservations lists. 28 * The GC thread will then wake the tasks in order when space becomes available. 29 */ 30 struct xfs_zone_reservation { 31 struct list_head entry; 32 struct task_struct *task; 33 xfs_filblks_t count_fsb; 34 }; 35 36 /* 37 * Calculate the number of reserved blocks. 38 * 39 * XC_FREE_RTEXTENTS counts the user available capacity, to which the file 40 * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly 41 * available for writes without waiting for GC. 42 * 43 * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and 44 * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS 45 * is further restricted by at least one zone as well as the optional 46 * persistently reserved blocks. This allows the allocator to run more 47 * smoothly by not always triggering GC. 48 */ 49 uint64_t 50 xfs_zoned_default_resblks( 51 struct xfs_mount *mp, 52 enum xfs_free_counter ctr) 53 { 54 switch (ctr) { 55 case XC_FREE_RTEXTENTS: 56 return (uint64_t)XFS_RESERVED_ZONES * 57 mp->m_groups[XG_TYPE_RTG].blocks + 58 mp->m_sb.sb_rtreserved; 59 case XC_FREE_RTAVAILABLE: 60 return (uint64_t)XFS_GC_ZONES * 61 mp->m_groups[XG_TYPE_RTG].blocks; 62 default: 63 ASSERT(0); 64 return 0; 65 } 66 } 67 68 void 69 xfs_zoned_resv_wake_all( 70 struct xfs_mount *mp) 71 { 72 struct xfs_zone_info *zi = mp->m_zone_info; 73 struct xfs_zone_reservation *reservation; 74 75 spin_lock(&zi->zi_reservation_lock); 76 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) 77 wake_up_process(reservation->task); 78 spin_unlock(&zi->zi_reservation_lock); 79 } 80 81 void 82 xfs_zoned_add_available( 83 struct xfs_mount *mp, 84 xfs_filblks_t count_fsb) 85 { 86 struct xfs_zone_info *zi = mp->m_zone_info; 87 struct xfs_zone_reservation *reservation; 88 89 if (list_empty_careful(&zi->zi_reclaim_reservations)) { 90 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 91 return; 92 } 93 94 spin_lock(&zi->zi_reservation_lock); 95 xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); 96 count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE); 97 list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) { 98 if (reservation->count_fsb > count_fsb) 99 break; 100 wake_up_process(reservation->task); 101 count_fsb -= reservation->count_fsb; 102 103 } 104 spin_unlock(&zi->zi_reservation_lock); 105 } 106 107 static int 108 xfs_zoned_space_wait_error( 109 struct xfs_mount *mp) 110 { 111 if (xfs_is_shutdown(mp)) 112 return -EIO; 113 if (fatal_signal_pending(current)) 114 return -EINTR; 115 return 0; 116 } 117 118 static int 119 xfs_zoned_reserve_available( 120 struct xfs_mount *mp, 121 xfs_filblks_t count_fsb, 122 unsigned int flags) 123 { 124 struct xfs_zone_info *zi = mp->m_zone_info; 125 struct xfs_zone_reservation reservation = { 126 .task = current, 127 .count_fsb = count_fsb, 128 }; 129 int error; 130 131 /* 132 * If there are no waiters, try to directly grab the available blocks 133 * from the percpu counter. 134 * 135 * If the caller wants to dip into the reserved pool also bypass the 136 * wait list. This relies on the fact that we have a very graciously 137 * sized reserved pool that always has enough space. If the reserved 138 * allocations fail we're in trouble. 139 */ 140 if (likely(list_empty_careful(&zi->zi_reclaim_reservations) || 141 (flags & XFS_ZR_RESERVED))) { 142 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 143 flags & XFS_ZR_RESERVED); 144 if (error != -ENOSPC) 145 return error; 146 } 147 148 if (flags & XFS_ZR_NOWAIT) 149 return -EAGAIN; 150 151 spin_lock(&zi->zi_reservation_lock); 152 list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations); 153 while ((error = xfs_zoned_space_wait_error(mp)) == 0) { 154 set_current_state(TASK_KILLABLE); 155 156 error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, 157 flags & XFS_ZR_RESERVED); 158 if (error != -ENOSPC) 159 break; 160 161 /* 162 * Make sure to start GC if it is not running already. As we 163 * check the rtavailable count when filling up zones, GC is 164 * normally already running at this point, but in some setups 165 * with very few zones we may completely run out of non- 166 * reserved blocks in between filling zones. 167 */ 168 if (!xfs_is_zonegc_running(mp)) 169 wake_up_process(zi->zi_gc_thread); 170 171 /* 172 * If there is no reclaimable group left and we aren't still 173 * processing a pending GC request give up as we're fully out 174 * of space. 175 */ 176 if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) && 177 !xfs_is_zonegc_running(mp)) 178 break; 179 180 spin_unlock(&zi->zi_reservation_lock); 181 schedule(); 182 spin_lock(&zi->zi_reservation_lock); 183 } 184 list_del(&reservation.entry); 185 spin_unlock(&zi->zi_reservation_lock); 186 187 __set_current_state(TASK_RUNNING); 188 return error; 189 } 190 191 /* 192 * Implement greedy space allocation for short writes by trying to grab all 193 * that is left after locking out other threads from trying to do the same. 194 * 195 * This isn't exactly optimal and can hopefully be replaced by a proper 196 * percpu_counter primitive one day. 197 */ 198 static int 199 xfs_zoned_reserve_extents_greedy( 200 struct xfs_mount *mp, 201 xfs_filblks_t *count_fsb, 202 unsigned int flags) 203 { 204 struct xfs_zone_info *zi = mp->m_zone_info; 205 s64 len = *count_fsb; 206 int error = -ENOSPC; 207 208 spin_lock(&zi->zi_reservation_lock); 209 len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 210 if (len > 0) { 211 *count_fsb = len; 212 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb, 213 flags & XFS_ZR_RESERVED); 214 } 215 spin_unlock(&zi->zi_reservation_lock); 216 return error; 217 } 218 219 int 220 xfs_zoned_space_reserve( 221 struct xfs_mount *mp, 222 xfs_filblks_t count_fsb, 223 unsigned int flags, 224 struct xfs_zone_alloc_ctx *ac) 225 { 226 int error; 227 228 ASSERT(ac->reserved_blocks == 0); 229 ASSERT(ac->open_zone == NULL); 230 231 error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, 232 flags & XFS_ZR_RESERVED); 233 if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) 234 error = xfs_zoned_reserve_extents_greedy(mp, &count_fsb, flags); 235 if (error) 236 return error; 237 238 error = xfs_zoned_reserve_available(mp, count_fsb, flags); 239 if (error) { 240 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb); 241 return error; 242 } 243 ac->reserved_blocks = count_fsb; 244 return 0; 245 } 246 247 void 248 xfs_zoned_space_unreserve( 249 struct xfs_mount *mp, 250 struct xfs_zone_alloc_ctx *ac) 251 { 252 if (ac->reserved_blocks > 0) { 253 xfs_zoned_add_available(mp, ac->reserved_blocks); 254 xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks); 255 } 256 if (ac->open_zone) 257 xfs_open_zone_put(ac->open_zone); 258 } 259