1080d01c4SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 2080d01c4SChristoph Hellwig /* 3080d01c4SChristoph Hellwig * Copyright (c) 2023-2025 Christoph Hellwig. 4080d01c4SChristoph Hellwig * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. 5080d01c4SChristoph Hellwig */ 6080d01c4SChristoph Hellwig #include "xfs.h" 7080d01c4SChristoph Hellwig #include "xfs_shared.h" 8080d01c4SChristoph Hellwig #include "xfs_format.h" 9080d01c4SChristoph Hellwig #include "xfs_log_format.h" 10080d01c4SChristoph Hellwig #include "xfs_trans_resv.h" 11080d01c4SChristoph Hellwig #include "xfs_mount.h" 12080d01c4SChristoph Hellwig #include "xfs_inode.h" 13080d01c4SChristoph Hellwig #include "xfs_btree.h" 14080d01c4SChristoph Hellwig #include "xfs_trans.h" 15080d01c4SChristoph Hellwig #include "xfs_icache.h" 16080d01c4SChristoph Hellwig #include "xfs_rmap.h" 17080d01c4SChristoph Hellwig #include "xfs_rtbitmap.h" 18080d01c4SChristoph Hellwig #include "xfs_rtrmap_btree.h" 19080d01c4SChristoph Hellwig #include "xfs_zone_alloc.h" 20080d01c4SChristoph Hellwig #include "xfs_zone_priv.h" 21080d01c4SChristoph Hellwig #include "xfs_zones.h" 22080d01c4SChristoph Hellwig #include "xfs_trace.h" 23080d01c4SChristoph Hellwig 24080d01c4SChristoph Hellwig /* 25080d01c4SChristoph Hellwig * Implement Garbage Collection (GC) of partially used zoned. 26080d01c4SChristoph Hellwig * 27080d01c4SChristoph Hellwig * To support the purely sequential writes in each zone, zoned XFS needs to be 28080d01c4SChristoph Hellwig * able to move data remaining in a zone out of it to reset the zone to prepare 29080d01c4SChristoph Hellwig * for writing to it again. 30080d01c4SChristoph Hellwig * 31080d01c4SChristoph Hellwig * This is done by the GC thread implemented in this file. To support that a 32080d01c4SChristoph Hellwig * number of zones (XFS_GC_ZONES) is reserved from the user visible capacity to 33080d01c4SChristoph Hellwig * write the garbage collected data into. 34080d01c4SChristoph Hellwig * 35080d01c4SChristoph Hellwig * Whenever the available space is below the chosen threshold, the GC thread 36080d01c4SChristoph Hellwig * looks for potential non-empty but not fully used zones that are worth 37080d01c4SChristoph Hellwig * reclaiming. Once found the rmap for the victim zone is queried, and after 38080d01c4SChristoph Hellwig * a bit of sorting to reduce fragmentation, the still live extents are read 39080d01c4SChristoph Hellwig * into memory and written to the GC target zone, and the bmap btree of the 40080d01c4SChristoph Hellwig * files is updated to point to the new location. To avoid taking the IOLOCK 41080d01c4SChristoph Hellwig * and MMAPLOCK for the entire GC process and thus affecting the latency of 42080d01c4SChristoph Hellwig * user reads and writes to the files, the GC writes are speculative and the 43080d01c4SChristoph Hellwig * I/O completion checks that no other writes happened for the affected regions 44080d01c4SChristoph Hellwig * before remapping. 45080d01c4SChristoph Hellwig * 46080d01c4SChristoph Hellwig * Once a zone does not contain any valid data, be that through GC or user 47080d01c4SChristoph Hellwig * block removal, it is queued for for a zone reset. The reset operation 48080d01c4SChristoph Hellwig * carefully ensures that the RT device cache is flushed and all transactions 49080d01c4SChristoph Hellwig * referencing the rmap have been committed to disk. 50080d01c4SChristoph Hellwig */ 51080d01c4SChristoph Hellwig 52080d01c4SChristoph Hellwig /* 53080d01c4SChristoph Hellwig * Size of each GC scratch pad. This is also the upper bound for each 54080d01c4SChristoph Hellwig * GC I/O, which helps to keep latency down. 55080d01c4SChristoph Hellwig */ 56080d01c4SChristoph Hellwig #define XFS_GC_CHUNK_SIZE SZ_1M 57080d01c4SChristoph Hellwig 58080d01c4SChristoph Hellwig /* 59080d01c4SChristoph Hellwig * Scratchpad data to read GCed data into. 60080d01c4SChristoph Hellwig * 61080d01c4SChristoph Hellwig * The offset member tracks where the next allocation starts, and freed tracks 62080d01c4SChristoph Hellwig * the amount of space that is not used anymore. 63080d01c4SChristoph Hellwig */ 64080d01c4SChristoph Hellwig #define XFS_ZONE_GC_NR_SCRATCH 2 65080d01c4SChristoph Hellwig struct xfs_zone_scratch { 66080d01c4SChristoph Hellwig struct folio *folio; 67080d01c4SChristoph Hellwig unsigned int offset; 68080d01c4SChristoph Hellwig unsigned int freed; 69080d01c4SChristoph Hellwig }; 70080d01c4SChristoph Hellwig 71080d01c4SChristoph Hellwig /* 72080d01c4SChristoph Hellwig * Chunk that is read and written for each GC operation. 73080d01c4SChristoph Hellwig * 74080d01c4SChristoph Hellwig * Note that for writes to actual zoned devices, the chunk can be split when 75080d01c4SChristoph Hellwig * reaching the hardware limit. 76080d01c4SChristoph Hellwig */ 77080d01c4SChristoph Hellwig struct xfs_gc_bio { 78080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 79080d01c4SChristoph Hellwig 80080d01c4SChristoph Hellwig /* 81080d01c4SChristoph Hellwig * Entry into the reading/writing/resetting list. Only accessed from 82080d01c4SChristoph Hellwig * the GC thread, so no locking needed. 83080d01c4SChristoph Hellwig */ 84080d01c4SChristoph Hellwig struct list_head entry; 85080d01c4SChristoph Hellwig 86080d01c4SChristoph Hellwig /* 87080d01c4SChristoph Hellwig * State of this gc_bio. Done means the current I/O completed. 88080d01c4SChristoph Hellwig * Set from the bio end I/O handler, read from the GC thread. 89080d01c4SChristoph Hellwig */ 90080d01c4SChristoph Hellwig enum { 91080d01c4SChristoph Hellwig XFS_GC_BIO_NEW, 92080d01c4SChristoph Hellwig XFS_GC_BIO_DONE, 93080d01c4SChristoph Hellwig } state; 94080d01c4SChristoph Hellwig 95080d01c4SChristoph Hellwig /* 96080d01c4SChristoph Hellwig * Pointer to the inode and byte range in the inode that this 97080d01c4SChristoph Hellwig * GC chunk is operating on. 98080d01c4SChristoph Hellwig */ 99080d01c4SChristoph Hellwig struct xfs_inode *ip; 100080d01c4SChristoph Hellwig loff_t offset; 101080d01c4SChristoph Hellwig unsigned int len; 102080d01c4SChristoph Hellwig 103080d01c4SChristoph Hellwig /* 104080d01c4SChristoph Hellwig * Existing startblock (in the zone to be freed) and newly assigned 105080d01c4SChristoph Hellwig * daddr in the zone GCed into. 106080d01c4SChristoph Hellwig */ 107080d01c4SChristoph Hellwig xfs_fsblock_t old_startblock; 108080d01c4SChristoph Hellwig xfs_daddr_t new_daddr; 109080d01c4SChristoph Hellwig struct xfs_zone_scratch *scratch; 110080d01c4SChristoph Hellwig 111080d01c4SChristoph Hellwig /* Are we writing to a sequential write required zone? */ 112080d01c4SChristoph Hellwig bool is_seq; 113080d01c4SChristoph Hellwig 114080d01c4SChristoph Hellwig /* Open Zone being written to */ 115080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 116080d01c4SChristoph Hellwig 117080d01c4SChristoph Hellwig /* Bio used for reads and writes, including the bvec used by it */ 118080d01c4SChristoph Hellwig struct bio_vec bv; 119080d01c4SChristoph Hellwig struct bio bio; /* must be last */ 120080d01c4SChristoph Hellwig }; 121080d01c4SChristoph Hellwig 122080d01c4SChristoph Hellwig #define XFS_ZONE_GC_RECS 1024 123080d01c4SChristoph Hellwig 124080d01c4SChristoph Hellwig /* iterator, needs to be reinitialized for each victim zone */ 125080d01c4SChristoph Hellwig struct xfs_zone_gc_iter { 126080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg; 127080d01c4SChristoph Hellwig unsigned int rec_count; 128080d01c4SChristoph Hellwig unsigned int rec_idx; 129080d01c4SChristoph Hellwig xfs_agblock_t next_startblock; 130080d01c4SChristoph Hellwig struct xfs_rmap_irec *recs; 131080d01c4SChristoph Hellwig }; 132080d01c4SChristoph Hellwig 133080d01c4SChristoph Hellwig /* 134080d01c4SChristoph Hellwig * Per-mount GC state. 135080d01c4SChristoph Hellwig */ 136080d01c4SChristoph Hellwig struct xfs_zone_gc_data { 137080d01c4SChristoph Hellwig struct xfs_mount *mp; 138080d01c4SChristoph Hellwig 139080d01c4SChristoph Hellwig /* bioset used to allocate the gc_bios */ 140080d01c4SChristoph Hellwig struct bio_set bio_set; 141080d01c4SChristoph Hellwig 142080d01c4SChristoph Hellwig /* 143080d01c4SChristoph Hellwig * Scratchpad used, and index to indicated which one is used. 144080d01c4SChristoph Hellwig */ 145080d01c4SChristoph Hellwig struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH]; 146080d01c4SChristoph Hellwig unsigned int scratch_idx; 147080d01c4SChristoph Hellwig 148080d01c4SChristoph Hellwig /* 149080d01c4SChristoph Hellwig * List of bios currently being read, written and reset. 150080d01c4SChristoph Hellwig * These lists are only accessed by the GC thread itself, and must only 151080d01c4SChristoph Hellwig * be processed in order. 152080d01c4SChristoph Hellwig */ 153080d01c4SChristoph Hellwig struct list_head reading; 154080d01c4SChristoph Hellwig struct list_head writing; 155080d01c4SChristoph Hellwig struct list_head resetting; 156080d01c4SChristoph Hellwig 157080d01c4SChristoph Hellwig /* 158080d01c4SChristoph Hellwig * Iterator for the victim zone. 159080d01c4SChristoph Hellwig */ 160080d01c4SChristoph Hellwig struct xfs_zone_gc_iter iter; 161080d01c4SChristoph Hellwig }; 162080d01c4SChristoph Hellwig 163080d01c4SChristoph Hellwig /* 164080d01c4SChristoph Hellwig * We aim to keep enough zones free in stock to fully use the open zone limit 165080d01c4SChristoph Hellwig * for data placement purposes. 166080d01c4SChristoph Hellwig */ 167080d01c4SChristoph Hellwig bool 168080d01c4SChristoph Hellwig xfs_zoned_need_gc( 169080d01c4SChristoph Hellwig struct xfs_mount *mp) 170080d01c4SChristoph Hellwig { 171080d01c4SChristoph Hellwig if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) 172080d01c4SChristoph Hellwig return false; 173080d01c4SChristoph Hellwig if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) < 174080d01c4SChristoph Hellwig mp->m_groups[XG_TYPE_RTG].blocks * 175080d01c4SChristoph Hellwig (mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) 176080d01c4SChristoph Hellwig return true; 177080d01c4SChristoph Hellwig return false; 178080d01c4SChristoph Hellwig } 179080d01c4SChristoph Hellwig 180080d01c4SChristoph Hellwig static struct xfs_zone_gc_data * 181080d01c4SChristoph Hellwig xfs_zone_gc_data_alloc( 182080d01c4SChristoph Hellwig struct xfs_mount *mp) 183080d01c4SChristoph Hellwig { 184080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 185080d01c4SChristoph Hellwig int i; 186080d01c4SChristoph Hellwig 187080d01c4SChristoph Hellwig data = kzalloc(sizeof(*data), GFP_KERNEL); 188080d01c4SChristoph Hellwig if (!data) 189080d01c4SChristoph Hellwig return NULL; 190080d01c4SChristoph Hellwig data->iter.recs = kcalloc(XFS_ZONE_GC_RECS, sizeof(*data->iter.recs), 191080d01c4SChristoph Hellwig GFP_KERNEL); 192080d01c4SChristoph Hellwig if (!data->iter.recs) 193080d01c4SChristoph Hellwig goto out_free_data; 194080d01c4SChristoph Hellwig 195080d01c4SChristoph Hellwig /* 196080d01c4SChristoph Hellwig * We actually only need a single bio_vec. It would be nice to have 197080d01c4SChristoph Hellwig * a flag that only allocates the inline bvecs and not the separate 198080d01c4SChristoph Hellwig * bvec pool. 199080d01c4SChristoph Hellwig */ 200080d01c4SChristoph Hellwig if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio), 201080d01c4SChristoph Hellwig BIOSET_NEED_BVECS)) 202080d01c4SChristoph Hellwig goto out_free_recs; 203080d01c4SChristoph Hellwig for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) { 204080d01c4SChristoph Hellwig data->scratch[i].folio = 205080d01c4SChristoph Hellwig folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE)); 206080d01c4SChristoph Hellwig if (!data->scratch[i].folio) 207080d01c4SChristoph Hellwig goto out_free_scratch; 208080d01c4SChristoph Hellwig } 209080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->reading); 210080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->writing); 211080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->resetting); 212080d01c4SChristoph Hellwig data->mp = mp; 213080d01c4SChristoph Hellwig return data; 214080d01c4SChristoph Hellwig 215080d01c4SChristoph Hellwig out_free_scratch: 216080d01c4SChristoph Hellwig while (--i >= 0) 217080d01c4SChristoph Hellwig folio_put(data->scratch[i].folio); 218080d01c4SChristoph Hellwig bioset_exit(&data->bio_set); 219080d01c4SChristoph Hellwig out_free_recs: 220080d01c4SChristoph Hellwig kfree(data->iter.recs); 221080d01c4SChristoph Hellwig out_free_data: 222080d01c4SChristoph Hellwig kfree(data); 223080d01c4SChristoph Hellwig return NULL; 224080d01c4SChristoph Hellwig } 225080d01c4SChristoph Hellwig 226080d01c4SChristoph Hellwig static void 227080d01c4SChristoph Hellwig xfs_zone_gc_data_free( 228080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 229080d01c4SChristoph Hellwig { 230080d01c4SChristoph Hellwig int i; 231080d01c4SChristoph Hellwig 232080d01c4SChristoph Hellwig for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) 233080d01c4SChristoph Hellwig folio_put(data->scratch[i].folio); 234080d01c4SChristoph Hellwig bioset_exit(&data->bio_set); 235080d01c4SChristoph Hellwig kfree(data->iter.recs); 236080d01c4SChristoph Hellwig kfree(data); 237080d01c4SChristoph Hellwig } 238080d01c4SChristoph Hellwig 239080d01c4SChristoph Hellwig static void 240080d01c4SChristoph Hellwig xfs_zone_gc_iter_init( 241080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 242080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg) 243080d01c4SChristoph Hellwig 244080d01c4SChristoph Hellwig { 245080d01c4SChristoph Hellwig iter->next_startblock = 0; 246080d01c4SChristoph Hellwig iter->rec_count = 0; 247080d01c4SChristoph Hellwig iter->rec_idx = 0; 248080d01c4SChristoph Hellwig iter->victim_rtg = victim_rtg; 249080d01c4SChristoph Hellwig } 250080d01c4SChristoph Hellwig 251080d01c4SChristoph Hellwig /* 252080d01c4SChristoph Hellwig * Query the rmap of the victim zone to gather the records to evacuate. 253080d01c4SChristoph Hellwig */ 254080d01c4SChristoph Hellwig static int 255080d01c4SChristoph Hellwig xfs_zone_gc_query_cb( 256080d01c4SChristoph Hellwig struct xfs_btree_cur *cur, 257080d01c4SChristoph Hellwig const struct xfs_rmap_irec *irec, 258080d01c4SChristoph Hellwig void *private) 259080d01c4SChristoph Hellwig { 260080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = private; 261080d01c4SChristoph Hellwig 262080d01c4SChristoph Hellwig ASSERT(!XFS_RMAP_NON_INODE_OWNER(irec->rm_owner)); 263080d01c4SChristoph Hellwig ASSERT(!xfs_is_sb_inum(cur->bc_mp, irec->rm_owner)); 264080d01c4SChristoph Hellwig ASSERT(!(irec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))); 265080d01c4SChristoph Hellwig 266080d01c4SChristoph Hellwig iter->recs[iter->rec_count] = *irec; 267080d01c4SChristoph Hellwig if (++iter->rec_count == XFS_ZONE_GC_RECS) { 268080d01c4SChristoph Hellwig iter->next_startblock = 269080d01c4SChristoph Hellwig irec->rm_startblock + irec->rm_blockcount; 270080d01c4SChristoph Hellwig return 1; 271080d01c4SChristoph Hellwig } 272080d01c4SChristoph Hellwig return 0; 273080d01c4SChristoph Hellwig } 274080d01c4SChristoph Hellwig 275080d01c4SChristoph Hellwig #define cmp_int(l, r) ((l > r) - (l < r)) 276080d01c4SChristoph Hellwig 277080d01c4SChristoph Hellwig static int 278080d01c4SChristoph Hellwig xfs_zone_gc_rmap_rec_cmp( 279080d01c4SChristoph Hellwig const void *a, 280080d01c4SChristoph Hellwig const void *b) 281080d01c4SChristoph Hellwig { 282080d01c4SChristoph Hellwig const struct xfs_rmap_irec *reca = a; 283080d01c4SChristoph Hellwig const struct xfs_rmap_irec *recb = b; 284080d01c4SChristoph Hellwig int diff; 285080d01c4SChristoph Hellwig 286080d01c4SChristoph Hellwig diff = cmp_int(reca->rm_owner, recb->rm_owner); 287080d01c4SChristoph Hellwig if (diff) 288080d01c4SChristoph Hellwig return diff; 289080d01c4SChristoph Hellwig return cmp_int(reca->rm_offset, recb->rm_offset); 290080d01c4SChristoph Hellwig } 291080d01c4SChristoph Hellwig 292080d01c4SChristoph Hellwig static int 293080d01c4SChristoph Hellwig xfs_zone_gc_query( 294080d01c4SChristoph Hellwig struct xfs_mount *mp, 295080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter) 296080d01c4SChristoph Hellwig { 297080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = iter->victim_rtg; 298080d01c4SChristoph Hellwig struct xfs_rmap_irec ri_low = { }; 299080d01c4SChristoph Hellwig struct xfs_rmap_irec ri_high; 300080d01c4SChristoph Hellwig struct xfs_btree_cur *cur; 301080d01c4SChristoph Hellwig struct xfs_trans *tp; 302080d01c4SChristoph Hellwig int error; 303080d01c4SChristoph Hellwig 304080d01c4SChristoph Hellwig ASSERT(iter->next_startblock <= rtg_blocks(rtg)); 305080d01c4SChristoph Hellwig if (iter->next_startblock == rtg_blocks(rtg)) 306080d01c4SChristoph Hellwig goto done; 307080d01c4SChristoph Hellwig 308080d01c4SChristoph Hellwig ASSERT(iter->next_startblock < rtg_blocks(rtg)); 309080d01c4SChristoph Hellwig ri_low.rm_startblock = iter->next_startblock; 310080d01c4SChristoph Hellwig memset(&ri_high, 0xFF, sizeof(ri_high)); 311080d01c4SChristoph Hellwig 312080d01c4SChristoph Hellwig iter->rec_idx = 0; 313080d01c4SChristoph Hellwig iter->rec_count = 0; 314080d01c4SChristoph Hellwig 315080d01c4SChristoph Hellwig error = xfs_trans_alloc_empty(mp, &tp); 316080d01c4SChristoph Hellwig if (error) 317080d01c4SChristoph Hellwig return error; 318080d01c4SChristoph Hellwig 319080d01c4SChristoph Hellwig xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 320080d01c4SChristoph Hellwig cur = xfs_rtrmapbt_init_cursor(tp, rtg); 321080d01c4SChristoph Hellwig error = xfs_rmap_query_range(cur, &ri_low, &ri_high, 322080d01c4SChristoph Hellwig xfs_zone_gc_query_cb, iter); 323080d01c4SChristoph Hellwig xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); 324080d01c4SChristoph Hellwig xfs_btree_del_cursor(cur, error < 0 ? error : 0); 325080d01c4SChristoph Hellwig xfs_trans_cancel(tp); 326080d01c4SChristoph Hellwig 327080d01c4SChristoph Hellwig if (error < 0) 328080d01c4SChristoph Hellwig return error; 329080d01c4SChristoph Hellwig 330080d01c4SChristoph Hellwig /* 331080d01c4SChristoph Hellwig * Sort the rmap records by inode number and increasing offset to 332080d01c4SChristoph Hellwig * defragment the mappings. 333080d01c4SChristoph Hellwig * 334080d01c4SChristoph Hellwig * This could be further enhanced by an even bigger look ahead window, 335080d01c4SChristoph Hellwig * but that's better left until we have better detection of changes to 336080d01c4SChristoph Hellwig * inode mapping to avoid the potential of GCing already dead data. 337080d01c4SChristoph Hellwig */ 338080d01c4SChristoph Hellwig sort(iter->recs, iter->rec_count, sizeof(iter->recs[0]), 339080d01c4SChristoph Hellwig xfs_zone_gc_rmap_rec_cmp, NULL); 340080d01c4SChristoph Hellwig 341080d01c4SChristoph Hellwig if (error == 0) { 342080d01c4SChristoph Hellwig /* 343080d01c4SChristoph Hellwig * We finished iterating through the zone. 344080d01c4SChristoph Hellwig */ 345080d01c4SChristoph Hellwig iter->next_startblock = rtg_blocks(rtg); 346080d01c4SChristoph Hellwig if (iter->rec_count == 0) 347080d01c4SChristoph Hellwig goto done; 348080d01c4SChristoph Hellwig } 349080d01c4SChristoph Hellwig 350080d01c4SChristoph Hellwig return 0; 351080d01c4SChristoph Hellwig done: 352080d01c4SChristoph Hellwig xfs_rtgroup_rele(iter->victim_rtg); 353080d01c4SChristoph Hellwig iter->victim_rtg = NULL; 354080d01c4SChristoph Hellwig return 0; 355080d01c4SChristoph Hellwig } 356080d01c4SChristoph Hellwig 357080d01c4SChristoph Hellwig static bool 358080d01c4SChristoph Hellwig xfs_zone_gc_iter_next( 359080d01c4SChristoph Hellwig struct xfs_mount *mp, 360080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 361080d01c4SChristoph Hellwig struct xfs_rmap_irec *chunk_rec, 362080d01c4SChristoph Hellwig struct xfs_inode **ipp) 363080d01c4SChristoph Hellwig { 364080d01c4SChristoph Hellwig struct xfs_rmap_irec *irec; 365080d01c4SChristoph Hellwig int error; 366080d01c4SChristoph Hellwig 367080d01c4SChristoph Hellwig if (!iter->victim_rtg) 368080d01c4SChristoph Hellwig return false; 369080d01c4SChristoph Hellwig 370080d01c4SChristoph Hellwig retry: 371080d01c4SChristoph Hellwig if (iter->rec_idx == iter->rec_count) { 372080d01c4SChristoph Hellwig error = xfs_zone_gc_query(mp, iter); 373080d01c4SChristoph Hellwig if (error) 374080d01c4SChristoph Hellwig goto fail; 375080d01c4SChristoph Hellwig if (!iter->victim_rtg) 376080d01c4SChristoph Hellwig return false; 377080d01c4SChristoph Hellwig } 378080d01c4SChristoph Hellwig 379080d01c4SChristoph Hellwig irec = &iter->recs[iter->rec_idx]; 380080d01c4SChristoph Hellwig error = xfs_iget(mp, NULL, irec->rm_owner, 381080d01c4SChristoph Hellwig XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, ipp); 382080d01c4SChristoph Hellwig if (error) { 383080d01c4SChristoph Hellwig /* 384080d01c4SChristoph Hellwig * If the inode was already deleted, skip over it. 385080d01c4SChristoph Hellwig */ 386080d01c4SChristoph Hellwig if (error == -ENOENT) { 387080d01c4SChristoph Hellwig iter->rec_idx++; 388080d01c4SChristoph Hellwig goto retry; 389080d01c4SChristoph Hellwig } 390080d01c4SChristoph Hellwig goto fail; 391080d01c4SChristoph Hellwig } 392080d01c4SChristoph Hellwig 393080d01c4SChristoph Hellwig if (!S_ISREG(VFS_I(*ipp)->i_mode) || !XFS_IS_REALTIME_INODE(*ipp)) { 394080d01c4SChristoph Hellwig iter->rec_idx++; 395080d01c4SChristoph Hellwig xfs_irele(*ipp); 396080d01c4SChristoph Hellwig goto retry; 397080d01c4SChristoph Hellwig } 398080d01c4SChristoph Hellwig 399080d01c4SChristoph Hellwig *chunk_rec = *irec; 400080d01c4SChristoph Hellwig return true; 401080d01c4SChristoph Hellwig 402080d01c4SChristoph Hellwig fail: 403080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 404080d01c4SChristoph Hellwig return false; 405080d01c4SChristoph Hellwig } 406080d01c4SChristoph Hellwig 407080d01c4SChristoph Hellwig static void 408080d01c4SChristoph Hellwig xfs_zone_gc_iter_advance( 409080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 410080d01c4SChristoph Hellwig xfs_extlen_t count_fsb) 411080d01c4SChristoph Hellwig { 412080d01c4SChristoph Hellwig struct xfs_rmap_irec *irec = &iter->recs[iter->rec_idx]; 413080d01c4SChristoph Hellwig 414080d01c4SChristoph Hellwig irec->rm_offset += count_fsb; 415080d01c4SChristoph Hellwig irec->rm_startblock += count_fsb; 416080d01c4SChristoph Hellwig irec->rm_blockcount -= count_fsb; 417080d01c4SChristoph Hellwig if (!irec->rm_blockcount) 418080d01c4SChristoph Hellwig iter->rec_idx++; 419080d01c4SChristoph Hellwig } 420080d01c4SChristoph Hellwig 421080d01c4SChristoph Hellwig static struct xfs_rtgroup * 422080d01c4SChristoph Hellwig xfs_zone_gc_pick_victim_from( 423080d01c4SChristoph Hellwig struct xfs_mount *mp, 424080d01c4SChristoph Hellwig uint32_t bucket) 425080d01c4SChristoph Hellwig { 426080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 427080d01c4SChristoph Hellwig uint32_t victim_used = U32_MAX; 428080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg = NULL; 429080d01c4SChristoph Hellwig uint32_t bit; 430080d01c4SChristoph Hellwig 431080d01c4SChristoph Hellwig if (!zi->zi_used_bucket_entries[bucket]) 432080d01c4SChristoph Hellwig return NULL; 433080d01c4SChristoph Hellwig 434080d01c4SChristoph Hellwig for_each_set_bit(bit, zi->zi_used_bucket_bitmap[bucket], 435080d01c4SChristoph Hellwig mp->m_sb.sb_rgcount) { 436080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = xfs_rtgroup_grab(mp, bit); 437080d01c4SChristoph Hellwig 438080d01c4SChristoph Hellwig if (!rtg) 439080d01c4SChristoph Hellwig continue; 440080d01c4SChristoph Hellwig 441080d01c4SChristoph Hellwig /* skip zones that are just waiting for a reset */ 442080d01c4SChristoph Hellwig if (rtg_rmap(rtg)->i_used_blocks == 0 || 443080d01c4SChristoph Hellwig rtg_rmap(rtg)->i_used_blocks >= victim_used) { 444080d01c4SChristoph Hellwig xfs_rtgroup_rele(rtg); 445080d01c4SChristoph Hellwig continue; 446080d01c4SChristoph Hellwig } 447080d01c4SChristoph Hellwig 448080d01c4SChristoph Hellwig if (victim_rtg) 449080d01c4SChristoph Hellwig xfs_rtgroup_rele(victim_rtg); 450080d01c4SChristoph Hellwig victim_rtg = rtg; 451080d01c4SChristoph Hellwig victim_used = rtg_rmap(rtg)->i_used_blocks; 452080d01c4SChristoph Hellwig 453080d01c4SChristoph Hellwig /* 454080d01c4SChristoph Hellwig * Any zone that is less than 1 percent used is fair game for 455080d01c4SChristoph Hellwig * instant reclaim. All of these zones are in the last 456080d01c4SChristoph Hellwig * bucket, so avoid the expensive division for the zones 457080d01c4SChristoph Hellwig * in the other buckets. 458080d01c4SChristoph Hellwig */ 459080d01c4SChristoph Hellwig if (bucket == 0 && 460080d01c4SChristoph Hellwig rtg_rmap(rtg)->i_used_blocks < rtg_blocks(rtg) / 100) 461080d01c4SChristoph Hellwig break; 462080d01c4SChristoph Hellwig } 463080d01c4SChristoph Hellwig 464080d01c4SChristoph Hellwig return victim_rtg; 465080d01c4SChristoph Hellwig } 466080d01c4SChristoph Hellwig 467080d01c4SChristoph Hellwig /* 468080d01c4SChristoph Hellwig * Iterate through all zones marked as reclaimable and find a candidate to 469080d01c4SChristoph Hellwig * reclaim. 470080d01c4SChristoph Hellwig */ 471080d01c4SChristoph Hellwig static bool 472080d01c4SChristoph Hellwig xfs_zone_gc_select_victim( 473080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 474080d01c4SChristoph Hellwig { 475080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = &data->iter; 476080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 477080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 478080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg = NULL; 479080d01c4SChristoph Hellwig unsigned int bucket; 480080d01c4SChristoph Hellwig 481080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) 482080d01c4SChristoph Hellwig return false; 483080d01c4SChristoph Hellwig 484080d01c4SChristoph Hellwig if (iter->victim_rtg) 485080d01c4SChristoph Hellwig return true; 486080d01c4SChristoph Hellwig 487080d01c4SChristoph Hellwig /* 488080d01c4SChristoph Hellwig * Don't start new work if we are asked to stop or park. 489080d01c4SChristoph Hellwig */ 490080d01c4SChristoph Hellwig if (kthread_should_stop() || kthread_should_park()) 491080d01c4SChristoph Hellwig return false; 492080d01c4SChristoph Hellwig 493080d01c4SChristoph Hellwig if (!xfs_zoned_need_gc(mp)) 494080d01c4SChristoph Hellwig return false; 495080d01c4SChristoph Hellwig 496080d01c4SChristoph Hellwig spin_lock(&zi->zi_used_buckets_lock); 497080d01c4SChristoph Hellwig for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { 498080d01c4SChristoph Hellwig victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); 499080d01c4SChristoph Hellwig if (victim_rtg) 500080d01c4SChristoph Hellwig break; 501080d01c4SChristoph Hellwig } 502080d01c4SChristoph Hellwig spin_unlock(&zi->zi_used_buckets_lock); 503080d01c4SChristoph Hellwig 504080d01c4SChristoph Hellwig if (!victim_rtg) 505080d01c4SChristoph Hellwig return false; 506080d01c4SChristoph Hellwig 507080d01c4SChristoph Hellwig trace_xfs_zone_gc_select_victim(victim_rtg, bucket); 508080d01c4SChristoph Hellwig xfs_zone_gc_iter_init(iter, victim_rtg); 509080d01c4SChristoph Hellwig return true; 510080d01c4SChristoph Hellwig } 511080d01c4SChristoph Hellwig 512080d01c4SChristoph Hellwig static struct xfs_open_zone * 513080d01c4SChristoph Hellwig xfs_zone_gc_steal_open( 514080d01c4SChristoph Hellwig struct xfs_zone_info *zi) 515080d01c4SChristoph Hellwig { 516080d01c4SChristoph Hellwig struct xfs_open_zone *oz, *found = NULL; 517080d01c4SChristoph Hellwig 518080d01c4SChristoph Hellwig spin_lock(&zi->zi_open_zones_lock); 519080d01c4SChristoph Hellwig list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) { 520080d01c4SChristoph Hellwig if (!found || 521080d01c4SChristoph Hellwig oz->oz_write_pointer < found->oz_write_pointer) 522080d01c4SChristoph Hellwig found = oz; 523080d01c4SChristoph Hellwig } 524080d01c4SChristoph Hellwig 525080d01c4SChristoph Hellwig if (found) { 526080d01c4SChristoph Hellwig found->oz_is_gc = true; 527080d01c4SChristoph Hellwig list_del_init(&found->oz_entry); 528080d01c4SChristoph Hellwig zi->zi_nr_open_zones--; 529080d01c4SChristoph Hellwig } 530080d01c4SChristoph Hellwig 531080d01c4SChristoph Hellwig spin_unlock(&zi->zi_open_zones_lock); 532080d01c4SChristoph Hellwig return found; 533080d01c4SChristoph Hellwig } 534080d01c4SChristoph Hellwig 535080d01c4SChristoph Hellwig static struct xfs_open_zone * 536080d01c4SChristoph Hellwig xfs_zone_gc_select_target( 537080d01c4SChristoph Hellwig struct xfs_mount *mp) 538080d01c4SChristoph Hellwig { 539080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 540080d01c4SChristoph Hellwig struct xfs_open_zone *oz = zi->zi_open_gc_zone; 541080d01c4SChristoph Hellwig 542080d01c4SChristoph Hellwig /* 543080d01c4SChristoph Hellwig * We need to wait for pending writes to finish. 544080d01c4SChristoph Hellwig */ 545080d01c4SChristoph Hellwig if (oz && oz->oz_written < rtg_blocks(oz->oz_rtg)) 546080d01c4SChristoph Hellwig return NULL; 547080d01c4SChristoph Hellwig 548080d01c4SChristoph Hellwig ASSERT(zi->zi_nr_open_zones <= 549080d01c4SChristoph Hellwig mp->m_max_open_zones - XFS_OPEN_GC_ZONES); 550*64d03611SHans Holmberg oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true); 551080d01c4SChristoph Hellwig if (oz) 552080d01c4SChristoph Hellwig trace_xfs_zone_gc_target_opened(oz->oz_rtg); 553080d01c4SChristoph Hellwig spin_lock(&zi->zi_open_zones_lock); 554080d01c4SChristoph Hellwig zi->zi_open_gc_zone = oz; 555080d01c4SChristoph Hellwig spin_unlock(&zi->zi_open_zones_lock); 556080d01c4SChristoph Hellwig return oz; 557080d01c4SChristoph Hellwig } 558080d01c4SChristoph Hellwig 559080d01c4SChristoph Hellwig /* 560080d01c4SChristoph Hellwig * Ensure we have a valid open zone to write the GC data to. 561080d01c4SChristoph Hellwig * 562080d01c4SChristoph Hellwig * If the current target zone has space keep writing to it, else first wait for 563080d01c4SChristoph Hellwig * all pending writes and then pick a new one. 564080d01c4SChristoph Hellwig */ 565080d01c4SChristoph Hellwig static struct xfs_open_zone * 566080d01c4SChristoph Hellwig xfs_zone_gc_ensure_target( 567080d01c4SChristoph Hellwig struct xfs_mount *mp) 568080d01c4SChristoph Hellwig { 569080d01c4SChristoph Hellwig struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone; 570080d01c4SChristoph Hellwig 571080d01c4SChristoph Hellwig if (!oz || oz->oz_write_pointer == rtg_blocks(oz->oz_rtg)) 572080d01c4SChristoph Hellwig return xfs_zone_gc_select_target(mp); 573080d01c4SChristoph Hellwig return oz; 574080d01c4SChristoph Hellwig } 575080d01c4SChristoph Hellwig 576080d01c4SChristoph Hellwig static unsigned int 577080d01c4SChristoph Hellwig xfs_zone_gc_scratch_available( 578080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 579080d01c4SChristoph Hellwig { 580080d01c4SChristoph Hellwig return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset; 581080d01c4SChristoph Hellwig } 582080d01c4SChristoph Hellwig 583080d01c4SChristoph Hellwig static bool 584080d01c4SChristoph Hellwig xfs_zone_gc_space_available( 585080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 586080d01c4SChristoph Hellwig { 587080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 588080d01c4SChristoph Hellwig 589080d01c4SChristoph Hellwig oz = xfs_zone_gc_ensure_target(data->mp); 590080d01c4SChristoph Hellwig if (!oz) 591080d01c4SChristoph Hellwig return false; 592080d01c4SChristoph Hellwig return oz->oz_write_pointer < rtg_blocks(oz->oz_rtg) && 593080d01c4SChristoph Hellwig xfs_zone_gc_scratch_available(data); 594080d01c4SChristoph Hellwig } 595080d01c4SChristoph Hellwig 596080d01c4SChristoph Hellwig static void 597080d01c4SChristoph Hellwig xfs_zone_gc_end_io( 598080d01c4SChristoph Hellwig struct bio *bio) 599080d01c4SChristoph Hellwig { 600080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk = 601080d01c4SChristoph Hellwig container_of(bio, struct xfs_gc_bio, bio); 602080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = chunk->data; 603080d01c4SChristoph Hellwig 604080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_DONE); 605080d01c4SChristoph Hellwig wake_up_process(data->mp->m_zone_info->zi_gc_thread); 606080d01c4SChristoph Hellwig } 607080d01c4SChristoph Hellwig 608080d01c4SChristoph Hellwig static struct xfs_open_zone * 609080d01c4SChristoph Hellwig xfs_zone_gc_alloc_blocks( 610080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 611080d01c4SChristoph Hellwig xfs_extlen_t *count_fsb, 612080d01c4SChristoph Hellwig xfs_daddr_t *daddr, 613080d01c4SChristoph Hellwig bool *is_seq) 614080d01c4SChristoph Hellwig { 615080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 616080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 617080d01c4SChristoph Hellwig 618080d01c4SChristoph Hellwig oz = xfs_zone_gc_ensure_target(mp); 619080d01c4SChristoph Hellwig if (!oz) 620080d01c4SChristoph Hellwig return NULL; 621080d01c4SChristoph Hellwig 622080d01c4SChristoph Hellwig *count_fsb = min(*count_fsb, 623080d01c4SChristoph Hellwig XFS_B_TO_FSB(mp, xfs_zone_gc_scratch_available(data))); 624080d01c4SChristoph Hellwig 625080d01c4SChristoph Hellwig /* 626080d01c4SChristoph Hellwig * Directly allocate GC blocks from the reserved pool. 627080d01c4SChristoph Hellwig * 628080d01c4SChristoph Hellwig * If we'd take them from the normal pool we could be stealing blocks 629080d01c4SChristoph Hellwig * from a regular writer, which would then have to wait for GC and 630080d01c4SChristoph Hellwig * deadlock. 631080d01c4SChristoph Hellwig */ 632080d01c4SChristoph Hellwig spin_lock(&mp->m_sb_lock); 633080d01c4SChristoph Hellwig *count_fsb = min(*count_fsb, 634080d01c4SChristoph Hellwig rtg_blocks(oz->oz_rtg) - oz->oz_write_pointer); 635080d01c4SChristoph Hellwig *count_fsb = min3(*count_fsb, 636080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTEXTENTS].res_avail, 637080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTAVAILABLE].res_avail); 638080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTEXTENTS].res_avail -= *count_fsb; 639080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTAVAILABLE].res_avail -= *count_fsb; 640080d01c4SChristoph Hellwig spin_unlock(&mp->m_sb_lock); 641080d01c4SChristoph Hellwig 642080d01c4SChristoph Hellwig if (!*count_fsb) 643080d01c4SChristoph Hellwig return NULL; 644080d01c4SChristoph Hellwig 645080d01c4SChristoph Hellwig *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0); 646080d01c4SChristoph Hellwig *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); 647080d01c4SChristoph Hellwig if (!*is_seq) 648080d01c4SChristoph Hellwig *daddr += XFS_FSB_TO_BB(mp, oz->oz_write_pointer); 649080d01c4SChristoph Hellwig oz->oz_write_pointer += *count_fsb; 650080d01c4SChristoph Hellwig atomic_inc(&oz->oz_ref); 651080d01c4SChristoph Hellwig return oz; 652080d01c4SChristoph Hellwig } 653080d01c4SChristoph Hellwig 654080d01c4SChristoph Hellwig static bool 655080d01c4SChristoph Hellwig xfs_zone_gc_start_chunk( 656080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 657080d01c4SChristoph Hellwig { 658080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = &data->iter; 659080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 660080d01c4SChristoph Hellwig struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; 661080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 662080d01c4SChristoph Hellwig struct xfs_rmap_irec irec; 663080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk; 664080d01c4SChristoph Hellwig struct xfs_inode *ip; 665080d01c4SChristoph Hellwig struct bio *bio; 666080d01c4SChristoph Hellwig xfs_daddr_t daddr; 667080d01c4SChristoph Hellwig bool is_seq; 668080d01c4SChristoph Hellwig 669080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) 670080d01c4SChristoph Hellwig return false; 671080d01c4SChristoph Hellwig 672080d01c4SChristoph Hellwig if (!xfs_zone_gc_iter_next(mp, iter, &irec, &ip)) 673080d01c4SChristoph Hellwig return false; 674080d01c4SChristoph Hellwig oz = xfs_zone_gc_alloc_blocks(data, &irec.rm_blockcount, &daddr, 675080d01c4SChristoph Hellwig &is_seq); 676080d01c4SChristoph Hellwig if (!oz) { 677080d01c4SChristoph Hellwig xfs_irele(ip); 678080d01c4SChristoph Hellwig return false; 679080d01c4SChristoph Hellwig } 680080d01c4SChristoph Hellwig 681080d01c4SChristoph Hellwig bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set); 682080d01c4SChristoph Hellwig 683080d01c4SChristoph Hellwig chunk = container_of(bio, struct xfs_gc_bio, bio); 684080d01c4SChristoph Hellwig chunk->ip = ip; 685080d01c4SChristoph Hellwig chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset); 686080d01c4SChristoph Hellwig chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount); 687080d01c4SChristoph Hellwig chunk->old_startblock = 688080d01c4SChristoph Hellwig xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock); 689080d01c4SChristoph Hellwig chunk->new_daddr = daddr; 690080d01c4SChristoph Hellwig chunk->is_seq = is_seq; 691080d01c4SChristoph Hellwig chunk->scratch = &data->scratch[data->scratch_idx]; 692080d01c4SChristoph Hellwig chunk->data = data; 693080d01c4SChristoph Hellwig chunk->oz = oz; 694080d01c4SChristoph Hellwig 695080d01c4SChristoph Hellwig bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); 696080d01c4SChristoph Hellwig bio->bi_end_io = xfs_zone_gc_end_io; 697080d01c4SChristoph Hellwig bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len, 698080d01c4SChristoph Hellwig chunk->scratch->offset); 699080d01c4SChristoph Hellwig chunk->scratch->offset += chunk->len; 700080d01c4SChristoph Hellwig if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) { 701080d01c4SChristoph Hellwig data->scratch_idx = 702080d01c4SChristoph Hellwig (data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH; 703080d01c4SChristoph Hellwig } 704080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 705080d01c4SChristoph Hellwig list_add_tail(&chunk->entry, &data->reading); 706080d01c4SChristoph Hellwig xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); 707080d01c4SChristoph Hellwig 708080d01c4SChristoph Hellwig submit_bio(bio); 709080d01c4SChristoph Hellwig return true; 710080d01c4SChristoph Hellwig } 711080d01c4SChristoph Hellwig 712080d01c4SChristoph Hellwig static void 713080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk( 714080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 715080d01c4SChristoph Hellwig { 716080d01c4SChristoph Hellwig list_del(&chunk->entry); 717080d01c4SChristoph Hellwig xfs_open_zone_put(chunk->oz); 718080d01c4SChristoph Hellwig xfs_irele(chunk->ip); 719080d01c4SChristoph Hellwig bio_put(&chunk->bio); 720080d01c4SChristoph Hellwig } 721080d01c4SChristoph Hellwig 722080d01c4SChristoph Hellwig static void 723080d01c4SChristoph Hellwig xfs_zone_gc_submit_write( 724080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 725080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 726080d01c4SChristoph Hellwig { 727080d01c4SChristoph Hellwig if (chunk->is_seq) { 728080d01c4SChristoph Hellwig chunk->bio.bi_opf &= ~REQ_OP_WRITE; 729080d01c4SChristoph Hellwig chunk->bio.bi_opf |= REQ_OP_ZONE_APPEND; 730080d01c4SChristoph Hellwig } 731080d01c4SChristoph Hellwig chunk->bio.bi_iter.bi_sector = chunk->new_daddr; 732080d01c4SChristoph Hellwig chunk->bio.bi_end_io = xfs_zone_gc_end_io; 733080d01c4SChristoph Hellwig submit_bio(&chunk->bio); 734080d01c4SChristoph Hellwig } 735080d01c4SChristoph Hellwig 736080d01c4SChristoph Hellwig static struct xfs_gc_bio * 737080d01c4SChristoph Hellwig xfs_zone_gc_split_write( 738080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 739080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 740080d01c4SChristoph Hellwig { 741080d01c4SChristoph Hellwig struct queue_limits *lim = 742080d01c4SChristoph Hellwig &bdev_get_queue(chunk->bio.bi_bdev)->limits; 743080d01c4SChristoph Hellwig struct xfs_gc_bio *split_chunk; 744080d01c4SChristoph Hellwig int split_sectors; 745080d01c4SChristoph Hellwig unsigned int split_len; 746080d01c4SChristoph Hellwig struct bio *split; 747080d01c4SChristoph Hellwig unsigned int nsegs; 748080d01c4SChristoph Hellwig 749080d01c4SChristoph Hellwig if (!chunk->is_seq) 750080d01c4SChristoph Hellwig return NULL; 751080d01c4SChristoph Hellwig 752080d01c4SChristoph Hellwig split_sectors = bio_split_rw_at(&chunk->bio, lim, &nsegs, 753080d01c4SChristoph Hellwig lim->max_zone_append_sectors << SECTOR_SHIFT); 754080d01c4SChristoph Hellwig if (!split_sectors) 755080d01c4SChristoph Hellwig return NULL; 756080d01c4SChristoph Hellwig 757080d01c4SChristoph Hellwig /* ensure the split chunk is still block size aligned */ 758080d01c4SChristoph Hellwig split_sectors = ALIGN_DOWN(split_sectors << SECTOR_SHIFT, 759080d01c4SChristoph Hellwig data->mp->m_sb.sb_blocksize) >> SECTOR_SHIFT; 760080d01c4SChristoph Hellwig split_len = split_sectors << SECTOR_SHIFT; 761080d01c4SChristoph Hellwig 762080d01c4SChristoph Hellwig split = bio_split(&chunk->bio, split_sectors, GFP_NOFS, &data->bio_set); 763080d01c4SChristoph Hellwig split_chunk = container_of(split, struct xfs_gc_bio, bio); 764080d01c4SChristoph Hellwig split_chunk->data = data; 765080d01c4SChristoph Hellwig ihold(VFS_I(chunk->ip)); 766080d01c4SChristoph Hellwig split_chunk->ip = chunk->ip; 767080d01c4SChristoph Hellwig split_chunk->is_seq = chunk->is_seq; 768080d01c4SChristoph Hellwig split_chunk->scratch = chunk->scratch; 769080d01c4SChristoph Hellwig split_chunk->offset = chunk->offset; 770080d01c4SChristoph Hellwig split_chunk->len = split_len; 771080d01c4SChristoph Hellwig split_chunk->old_startblock = chunk->old_startblock; 772080d01c4SChristoph Hellwig split_chunk->new_daddr = chunk->new_daddr; 773080d01c4SChristoph Hellwig split_chunk->oz = chunk->oz; 774080d01c4SChristoph Hellwig atomic_inc(&chunk->oz->oz_ref); 775080d01c4SChristoph Hellwig 776080d01c4SChristoph Hellwig chunk->offset += split_len; 777080d01c4SChristoph Hellwig chunk->len -= split_len; 778080d01c4SChristoph Hellwig chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); 779080d01c4SChristoph Hellwig 780080d01c4SChristoph Hellwig /* add right before the original chunk */ 781080d01c4SChristoph Hellwig WRITE_ONCE(split_chunk->state, XFS_GC_BIO_NEW); 782080d01c4SChristoph Hellwig list_add_tail(&split_chunk->entry, &chunk->entry); 783080d01c4SChristoph Hellwig return split_chunk; 784080d01c4SChristoph Hellwig } 785080d01c4SChristoph Hellwig 786080d01c4SChristoph Hellwig static void 787080d01c4SChristoph Hellwig xfs_zone_gc_write_chunk( 788080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 789080d01c4SChristoph Hellwig { 790080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = chunk->data; 791080d01c4SChristoph Hellwig struct xfs_mount *mp = chunk->ip->i_mount; 792080d01c4SChristoph Hellwig unsigned int folio_offset = chunk->bio.bi_io_vec->bv_offset; 793080d01c4SChristoph Hellwig struct xfs_gc_bio *split_chunk; 794080d01c4SChristoph Hellwig 795080d01c4SChristoph Hellwig if (chunk->bio.bi_status) 796080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 797080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) { 798080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 799080d01c4SChristoph Hellwig return; 800080d01c4SChristoph Hellwig } 801080d01c4SChristoph Hellwig 802080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 803080d01c4SChristoph Hellwig list_move_tail(&chunk->entry, &data->writing); 804080d01c4SChristoph Hellwig 805080d01c4SChristoph Hellwig bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); 806080d01c4SChristoph Hellwig bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, 807080d01c4SChristoph Hellwig folio_offset); 808080d01c4SChristoph Hellwig 809080d01c4SChristoph Hellwig while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) 810080d01c4SChristoph Hellwig xfs_zone_gc_submit_write(data, split_chunk); 811080d01c4SChristoph Hellwig xfs_zone_gc_submit_write(data, chunk); 812080d01c4SChristoph Hellwig } 813080d01c4SChristoph Hellwig 814080d01c4SChristoph Hellwig static void 815080d01c4SChristoph Hellwig xfs_zone_gc_finish_chunk( 816080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 817080d01c4SChristoph Hellwig { 818080d01c4SChristoph Hellwig uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 819080d01c4SChristoph Hellwig struct xfs_inode *ip = chunk->ip; 820080d01c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 821080d01c4SChristoph Hellwig int error; 822080d01c4SChristoph Hellwig 823080d01c4SChristoph Hellwig if (chunk->bio.bi_status) 824080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 825080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) { 826080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 827080d01c4SChristoph Hellwig return; 828080d01c4SChristoph Hellwig } 829080d01c4SChristoph Hellwig 830080d01c4SChristoph Hellwig chunk->scratch->freed += chunk->len; 831080d01c4SChristoph Hellwig if (chunk->scratch->freed == chunk->scratch->offset) { 832080d01c4SChristoph Hellwig chunk->scratch->offset = 0; 833080d01c4SChristoph Hellwig chunk->scratch->freed = 0; 834080d01c4SChristoph Hellwig } 835080d01c4SChristoph Hellwig 836080d01c4SChristoph Hellwig /* 837080d01c4SChristoph Hellwig * Cycle through the iolock and wait for direct I/O and layouts to 838080d01c4SChristoph Hellwig * ensure no one is reading from the old mapping before it goes away. 839080d01c4SChristoph Hellwig * 840080d01c4SChristoph Hellwig * Note that xfs_zoned_end_io() below checks that no other writer raced 841080d01c4SChristoph Hellwig * with us to update the mapping by checking that the old startblock 842080d01c4SChristoph Hellwig * didn't change. 843080d01c4SChristoph Hellwig */ 844080d01c4SChristoph Hellwig xfs_ilock(ip, iolock); 845080d01c4SChristoph Hellwig error = xfs_break_layouts(VFS_I(ip), &iolock, BREAK_UNMAP); 846080d01c4SChristoph Hellwig if (!error) 847080d01c4SChristoph Hellwig inode_dio_wait(VFS_I(ip)); 848080d01c4SChristoph Hellwig xfs_iunlock(ip, iolock); 849080d01c4SChristoph Hellwig if (error) 850080d01c4SChristoph Hellwig goto free; 851080d01c4SChristoph Hellwig 852080d01c4SChristoph Hellwig if (chunk->is_seq) 853080d01c4SChristoph Hellwig chunk->new_daddr = chunk->bio.bi_iter.bi_sector; 854080d01c4SChristoph Hellwig error = xfs_zoned_end_io(ip, chunk->offset, chunk->len, 855080d01c4SChristoph Hellwig chunk->new_daddr, chunk->oz, chunk->old_startblock); 856080d01c4SChristoph Hellwig free: 857080d01c4SChristoph Hellwig if (error) 858080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 859080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 860080d01c4SChristoph Hellwig } 861080d01c4SChristoph Hellwig 862080d01c4SChristoph Hellwig static void 863080d01c4SChristoph Hellwig xfs_zone_gc_finish_reset( 864080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 865080d01c4SChristoph Hellwig { 866080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = chunk->bio.bi_private; 867080d01c4SChristoph Hellwig struct xfs_mount *mp = rtg_mount(rtg); 868080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 869080d01c4SChristoph Hellwig 870080d01c4SChristoph Hellwig if (chunk->bio.bi_status) { 871080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 872080d01c4SChristoph Hellwig goto out; 873080d01c4SChristoph Hellwig } 874080d01c4SChristoph Hellwig 875080d01c4SChristoph Hellwig xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); 876080d01c4SChristoph Hellwig atomic_inc(&zi->zi_nr_free_zones); 877080d01c4SChristoph Hellwig 878080d01c4SChristoph Hellwig xfs_zoned_add_available(mp, rtg_blocks(rtg)); 879080d01c4SChristoph Hellwig 880080d01c4SChristoph Hellwig wake_up_all(&zi->zi_zone_wait); 881080d01c4SChristoph Hellwig out: 882080d01c4SChristoph Hellwig list_del(&chunk->entry); 883080d01c4SChristoph Hellwig bio_put(&chunk->bio); 884080d01c4SChristoph Hellwig } 885080d01c4SChristoph Hellwig 886080d01c4SChristoph Hellwig static bool 887080d01c4SChristoph Hellwig xfs_zone_gc_prepare_reset( 888080d01c4SChristoph Hellwig struct bio *bio, 889080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg) 890080d01c4SChristoph Hellwig { 891080d01c4SChristoph Hellwig trace_xfs_zone_reset(rtg); 892080d01c4SChristoph Hellwig 893080d01c4SChristoph Hellwig ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); 894080d01c4SChristoph Hellwig bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); 895080d01c4SChristoph Hellwig if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { 896080d01c4SChristoph Hellwig if (!bdev_max_discard_sectors(bio->bi_bdev)) 897080d01c4SChristoph Hellwig return false; 898080d01c4SChristoph Hellwig bio->bi_opf = REQ_OP_DISCARD | REQ_SYNC; 899080d01c4SChristoph Hellwig bio->bi_iter.bi_size = 900080d01c4SChristoph Hellwig XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); 901080d01c4SChristoph Hellwig } 902080d01c4SChristoph Hellwig 903080d01c4SChristoph Hellwig return true; 904080d01c4SChristoph Hellwig } 905080d01c4SChristoph Hellwig 906080d01c4SChristoph Hellwig int 907080d01c4SChristoph Hellwig xfs_zone_gc_reset_sync( 908080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg) 909080d01c4SChristoph Hellwig { 910080d01c4SChristoph Hellwig int error = 0; 911080d01c4SChristoph Hellwig struct bio bio; 912080d01c4SChristoph Hellwig 913080d01c4SChristoph Hellwig bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0, 914080d01c4SChristoph Hellwig REQ_OP_ZONE_RESET); 915080d01c4SChristoph Hellwig if (xfs_zone_gc_prepare_reset(&bio, rtg)) 916080d01c4SChristoph Hellwig error = submit_bio_wait(&bio); 917080d01c4SChristoph Hellwig bio_uninit(&bio); 918080d01c4SChristoph Hellwig 919080d01c4SChristoph Hellwig return error; 920080d01c4SChristoph Hellwig } 921080d01c4SChristoph Hellwig 922080d01c4SChristoph Hellwig static void 923080d01c4SChristoph Hellwig xfs_zone_gc_reset_zones( 924080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 925080d01c4SChristoph Hellwig struct xfs_group *reset_list) 926080d01c4SChristoph Hellwig { 927080d01c4SChristoph Hellwig struct xfs_group *next = reset_list; 928080d01c4SChristoph Hellwig 929080d01c4SChristoph Hellwig if (blkdev_issue_flush(data->mp->m_rtdev_targp->bt_bdev) < 0) { 930080d01c4SChristoph Hellwig xfs_force_shutdown(data->mp, SHUTDOWN_META_IO_ERROR); 931080d01c4SChristoph Hellwig return; 932080d01c4SChristoph Hellwig } 933080d01c4SChristoph Hellwig 934080d01c4SChristoph Hellwig do { 935080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = to_rtg(next); 936080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk; 937080d01c4SChristoph Hellwig struct bio *bio; 938080d01c4SChristoph Hellwig 939080d01c4SChristoph Hellwig xfs_log_force_inode(rtg_rmap(rtg)); 940080d01c4SChristoph Hellwig 941080d01c4SChristoph Hellwig next = rtg_group(rtg)->xg_next_reset; 942080d01c4SChristoph Hellwig rtg_group(rtg)->xg_next_reset = NULL; 943080d01c4SChristoph Hellwig 944080d01c4SChristoph Hellwig bio = bio_alloc_bioset(rtg_mount(rtg)->m_rtdev_targp->bt_bdev, 945080d01c4SChristoph Hellwig 0, REQ_OP_ZONE_RESET, GFP_NOFS, &data->bio_set); 946080d01c4SChristoph Hellwig bio->bi_private = rtg; 947080d01c4SChristoph Hellwig bio->bi_end_io = xfs_zone_gc_end_io; 948080d01c4SChristoph Hellwig 949080d01c4SChristoph Hellwig chunk = container_of(bio, struct xfs_gc_bio, bio); 950080d01c4SChristoph Hellwig chunk->data = data; 951080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 952080d01c4SChristoph Hellwig list_add_tail(&chunk->entry, &data->resetting); 953080d01c4SChristoph Hellwig 954080d01c4SChristoph Hellwig /* 955080d01c4SChristoph Hellwig * Also use the bio to drive the state machine when neither 956080d01c4SChristoph Hellwig * zone reset nor discard is supported to keep things simple. 957080d01c4SChristoph Hellwig */ 958080d01c4SChristoph Hellwig if (xfs_zone_gc_prepare_reset(bio, rtg)) 959080d01c4SChristoph Hellwig submit_bio(bio); 960080d01c4SChristoph Hellwig else 961080d01c4SChristoph Hellwig bio_endio(bio); 962080d01c4SChristoph Hellwig } while (next); 963080d01c4SChristoph Hellwig } 964080d01c4SChristoph Hellwig 965080d01c4SChristoph Hellwig /* 966080d01c4SChristoph Hellwig * Handle the work to read and write data for GC and to reset the zones, 967080d01c4SChristoph Hellwig * including handling all completions. 968080d01c4SChristoph Hellwig * 969080d01c4SChristoph Hellwig * Note that the order of the chunks is preserved so that we don't undo the 970080d01c4SChristoph Hellwig * optimal order established by xfs_zone_gc_query(). 971080d01c4SChristoph Hellwig */ 972080d01c4SChristoph Hellwig static bool 973080d01c4SChristoph Hellwig xfs_zone_gc_handle_work( 974080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 975080d01c4SChristoph Hellwig { 976080d01c4SChristoph Hellwig struct xfs_zone_info *zi = data->mp->m_zone_info; 977080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk, *next; 978080d01c4SChristoph Hellwig struct xfs_group *reset_list; 979080d01c4SChristoph Hellwig struct blk_plug plug; 980080d01c4SChristoph Hellwig 981080d01c4SChristoph Hellwig spin_lock(&zi->zi_reset_list_lock); 982080d01c4SChristoph Hellwig reset_list = zi->zi_reset_list; 983080d01c4SChristoph Hellwig zi->zi_reset_list = NULL; 984080d01c4SChristoph Hellwig spin_unlock(&zi->zi_reset_list_lock); 985080d01c4SChristoph Hellwig 986080d01c4SChristoph Hellwig if (!xfs_zone_gc_select_victim(data) || 987080d01c4SChristoph Hellwig !xfs_zone_gc_space_available(data)) { 988080d01c4SChristoph Hellwig if (list_empty(&data->reading) && 989080d01c4SChristoph Hellwig list_empty(&data->writing) && 990080d01c4SChristoph Hellwig list_empty(&data->resetting) && 991080d01c4SChristoph Hellwig !reset_list) 992080d01c4SChristoph Hellwig return false; 993080d01c4SChristoph Hellwig } 994080d01c4SChristoph Hellwig 995080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 996080d01c4SChristoph Hellwig try_to_freeze(); 997080d01c4SChristoph Hellwig 998080d01c4SChristoph Hellwig if (reset_list) 999080d01c4SChristoph Hellwig xfs_zone_gc_reset_zones(data, reset_list); 1000080d01c4SChristoph Hellwig 1001080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->resetting, entry) { 1002080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1003080d01c4SChristoph Hellwig break; 1004080d01c4SChristoph Hellwig xfs_zone_gc_finish_reset(chunk); 1005080d01c4SChristoph Hellwig } 1006080d01c4SChristoph Hellwig 1007080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->writing, entry) { 1008080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1009080d01c4SChristoph Hellwig break; 1010080d01c4SChristoph Hellwig xfs_zone_gc_finish_chunk(chunk); 1011080d01c4SChristoph Hellwig } 1012080d01c4SChristoph Hellwig 1013080d01c4SChristoph Hellwig blk_start_plug(&plug); 1014080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->reading, entry) { 1015080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1016080d01c4SChristoph Hellwig break; 1017080d01c4SChristoph Hellwig xfs_zone_gc_write_chunk(chunk); 1018080d01c4SChristoph Hellwig } 1019080d01c4SChristoph Hellwig blk_finish_plug(&plug); 1020080d01c4SChristoph Hellwig 1021080d01c4SChristoph Hellwig blk_start_plug(&plug); 1022080d01c4SChristoph Hellwig while (xfs_zone_gc_start_chunk(data)) 1023080d01c4SChristoph Hellwig ; 1024080d01c4SChristoph Hellwig blk_finish_plug(&plug); 1025080d01c4SChristoph Hellwig return true; 1026080d01c4SChristoph Hellwig } 1027080d01c4SChristoph Hellwig 1028080d01c4SChristoph Hellwig /* 1029080d01c4SChristoph Hellwig * Note that the current GC algorithm would break reflinks and thus duplicate 1030080d01c4SChristoph Hellwig * data that was shared by multiple owners before. Because of that reflinks 1031080d01c4SChristoph Hellwig * are currently not supported on zoned file systems and can't be created or 1032080d01c4SChristoph Hellwig * mounted. 1033080d01c4SChristoph Hellwig */ 1034080d01c4SChristoph Hellwig static int 1035080d01c4SChristoph Hellwig xfs_zoned_gcd( 1036080d01c4SChristoph Hellwig void *private) 1037080d01c4SChristoph Hellwig { 1038080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = private; 1039080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 1040080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1041080d01c4SChristoph Hellwig unsigned int nofs_flag; 1042080d01c4SChristoph Hellwig 1043080d01c4SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 1044080d01c4SChristoph Hellwig set_freezable(); 1045080d01c4SChristoph Hellwig 1046080d01c4SChristoph Hellwig for (;;) { 1047080d01c4SChristoph Hellwig set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); 1048080d01c4SChristoph Hellwig xfs_set_zonegc_running(mp); 1049080d01c4SChristoph Hellwig if (xfs_zone_gc_handle_work(data)) 1050080d01c4SChristoph Hellwig continue; 1051080d01c4SChristoph Hellwig 1052080d01c4SChristoph Hellwig if (list_empty(&data->reading) && 1053080d01c4SChristoph Hellwig list_empty(&data->writing) && 1054080d01c4SChristoph Hellwig list_empty(&data->resetting) && 1055080d01c4SChristoph Hellwig !zi->zi_reset_list) { 1056080d01c4SChristoph Hellwig xfs_clear_zonegc_running(mp); 1057080d01c4SChristoph Hellwig xfs_zoned_resv_wake_all(mp); 1058080d01c4SChristoph Hellwig 1059080d01c4SChristoph Hellwig if (kthread_should_stop()) { 1060080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 1061080d01c4SChristoph Hellwig break; 1062080d01c4SChristoph Hellwig } 1063080d01c4SChristoph Hellwig 1064080d01c4SChristoph Hellwig if (kthread_should_park()) { 1065080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 1066080d01c4SChristoph Hellwig kthread_parkme(); 1067080d01c4SChristoph Hellwig continue; 1068080d01c4SChristoph Hellwig } 1069080d01c4SChristoph Hellwig } 1070080d01c4SChristoph Hellwig 1071080d01c4SChristoph Hellwig schedule(); 1072080d01c4SChristoph Hellwig } 1073080d01c4SChristoph Hellwig xfs_clear_zonegc_running(mp); 1074080d01c4SChristoph Hellwig 1075080d01c4SChristoph Hellwig if (data->iter.victim_rtg) 1076080d01c4SChristoph Hellwig xfs_rtgroup_rele(data->iter.victim_rtg); 1077080d01c4SChristoph Hellwig 1078080d01c4SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 1079080d01c4SChristoph Hellwig xfs_zone_gc_data_free(data); 1080080d01c4SChristoph Hellwig return 0; 1081080d01c4SChristoph Hellwig } 1082080d01c4SChristoph Hellwig 1083080d01c4SChristoph Hellwig void 1084080d01c4SChristoph Hellwig xfs_zone_gc_start( 1085080d01c4SChristoph Hellwig struct xfs_mount *mp) 1086080d01c4SChristoph Hellwig { 1087080d01c4SChristoph Hellwig if (xfs_has_zoned(mp)) 1088080d01c4SChristoph Hellwig kthread_unpark(mp->m_zone_info->zi_gc_thread); 1089080d01c4SChristoph Hellwig } 1090080d01c4SChristoph Hellwig 1091080d01c4SChristoph Hellwig void 1092080d01c4SChristoph Hellwig xfs_zone_gc_stop( 1093080d01c4SChristoph Hellwig struct xfs_mount *mp) 1094080d01c4SChristoph Hellwig { 1095080d01c4SChristoph Hellwig if (xfs_has_zoned(mp)) 1096080d01c4SChristoph Hellwig kthread_park(mp->m_zone_info->zi_gc_thread); 1097080d01c4SChristoph Hellwig } 1098080d01c4SChristoph Hellwig 1099080d01c4SChristoph Hellwig int 1100080d01c4SChristoph Hellwig xfs_zone_gc_mount( 1101080d01c4SChristoph Hellwig struct xfs_mount *mp) 1102080d01c4SChristoph Hellwig { 1103080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1104080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 1105080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 1106080d01c4SChristoph Hellwig int error; 1107080d01c4SChristoph Hellwig 1108080d01c4SChristoph Hellwig /* 1109080d01c4SChristoph Hellwig * If there are no free zones available for GC, pick the open zone with 1110080d01c4SChristoph Hellwig * the least used space to GC into. This should only happen after an 1111080d01c4SChristoph Hellwig * unclean shutdown near ENOSPC while GC was ongoing. 1112080d01c4SChristoph Hellwig * 1113080d01c4SChristoph Hellwig * We also need to do this for the first gc zone allocation if we 1114080d01c4SChristoph Hellwig * unmounted while at the open limit. 1115080d01c4SChristoph Hellwig */ 1116080d01c4SChristoph Hellwig if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_FREE) || 1117080d01c4SChristoph Hellwig zi->zi_nr_open_zones == mp->m_max_open_zones) 1118080d01c4SChristoph Hellwig oz = xfs_zone_gc_steal_open(zi); 1119080d01c4SChristoph Hellwig else 1120*64d03611SHans Holmberg oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true); 1121080d01c4SChristoph Hellwig if (!oz) { 1122080d01c4SChristoph Hellwig xfs_warn(mp, "unable to allocate a zone for gc"); 1123080d01c4SChristoph Hellwig error = -EIO; 1124080d01c4SChristoph Hellwig goto out; 1125080d01c4SChristoph Hellwig } 1126080d01c4SChristoph Hellwig 1127080d01c4SChristoph Hellwig trace_xfs_zone_gc_target_opened(oz->oz_rtg); 1128080d01c4SChristoph Hellwig zi->zi_open_gc_zone = oz; 1129080d01c4SChristoph Hellwig 1130080d01c4SChristoph Hellwig data = xfs_zone_gc_data_alloc(mp); 1131080d01c4SChristoph Hellwig if (!data) { 1132080d01c4SChristoph Hellwig error = -ENOMEM; 1133080d01c4SChristoph Hellwig goto out_put_gc_zone; 1134080d01c4SChristoph Hellwig } 1135080d01c4SChristoph Hellwig 1136080d01c4SChristoph Hellwig mp->m_zone_info->zi_gc_thread = kthread_create(xfs_zoned_gcd, data, 1137080d01c4SChristoph Hellwig "xfs-zone-gc/%s", mp->m_super->s_id); 1138080d01c4SChristoph Hellwig if (IS_ERR(mp->m_zone_info->zi_gc_thread)) { 1139080d01c4SChristoph Hellwig xfs_warn(mp, "unable to create zone gc thread"); 1140080d01c4SChristoph Hellwig error = PTR_ERR(mp->m_zone_info->zi_gc_thread); 1141080d01c4SChristoph Hellwig goto out_free_gc_data; 1142080d01c4SChristoph Hellwig } 1143080d01c4SChristoph Hellwig 1144080d01c4SChristoph Hellwig /* xfs_zone_gc_start will unpark for rw mounts */ 1145080d01c4SChristoph Hellwig kthread_park(mp->m_zone_info->zi_gc_thread); 1146080d01c4SChristoph Hellwig return 0; 1147080d01c4SChristoph Hellwig 1148080d01c4SChristoph Hellwig out_free_gc_data: 1149080d01c4SChristoph Hellwig kfree(data); 1150080d01c4SChristoph Hellwig out_put_gc_zone: 1151080d01c4SChristoph Hellwig xfs_open_zone_put(zi->zi_open_gc_zone); 1152080d01c4SChristoph Hellwig out: 1153080d01c4SChristoph Hellwig return error; 1154080d01c4SChristoph Hellwig } 1155080d01c4SChristoph Hellwig 1156080d01c4SChristoph Hellwig void 1157080d01c4SChristoph Hellwig xfs_zone_gc_unmount( 1158080d01c4SChristoph Hellwig struct xfs_mount *mp) 1159080d01c4SChristoph Hellwig { 1160080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1161080d01c4SChristoph Hellwig 1162080d01c4SChristoph Hellwig kthread_stop(zi->zi_gc_thread); 1163080d01c4SChristoph Hellwig if (zi->zi_open_gc_zone) 1164080d01c4SChristoph Hellwig xfs_open_zone_put(zi->zi_open_gc_zone); 1165080d01c4SChristoph Hellwig } 1166