1*080d01c4SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 2*080d01c4SChristoph Hellwig /* 3*080d01c4SChristoph Hellwig * Copyright (c) 2023-2025 Christoph Hellwig. 4*080d01c4SChristoph Hellwig * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. 5*080d01c4SChristoph Hellwig */ 6*080d01c4SChristoph Hellwig #include "xfs.h" 7*080d01c4SChristoph Hellwig #include "xfs_shared.h" 8*080d01c4SChristoph Hellwig #include "xfs_format.h" 9*080d01c4SChristoph Hellwig #include "xfs_log_format.h" 10*080d01c4SChristoph Hellwig #include "xfs_trans_resv.h" 11*080d01c4SChristoph Hellwig #include "xfs_mount.h" 12*080d01c4SChristoph Hellwig #include "xfs_inode.h" 13*080d01c4SChristoph Hellwig #include "xfs_btree.h" 14*080d01c4SChristoph Hellwig #include "xfs_trans.h" 15*080d01c4SChristoph Hellwig #include "xfs_icache.h" 16*080d01c4SChristoph Hellwig #include "xfs_rmap.h" 17*080d01c4SChristoph Hellwig #include "xfs_rtbitmap.h" 18*080d01c4SChristoph Hellwig #include "xfs_rtrmap_btree.h" 19*080d01c4SChristoph Hellwig #include "xfs_zone_alloc.h" 20*080d01c4SChristoph Hellwig #include "xfs_zone_priv.h" 21*080d01c4SChristoph Hellwig #include "xfs_zones.h" 22*080d01c4SChristoph Hellwig #include "xfs_trace.h" 23*080d01c4SChristoph Hellwig 24*080d01c4SChristoph Hellwig /* 25*080d01c4SChristoph Hellwig * Implement Garbage Collection (GC) of partially used zoned. 26*080d01c4SChristoph Hellwig * 27*080d01c4SChristoph Hellwig * To support the purely sequential writes in each zone, zoned XFS needs to be 28*080d01c4SChristoph Hellwig * able to move data remaining in a zone out of it to reset the zone to prepare 29*080d01c4SChristoph Hellwig * for writing to it again. 30*080d01c4SChristoph Hellwig * 31*080d01c4SChristoph Hellwig * This is done by the GC thread implemented in this file. To support that a 32*080d01c4SChristoph Hellwig * number of zones (XFS_GC_ZONES) is reserved from the user visible capacity to 33*080d01c4SChristoph Hellwig * write the garbage collected data into. 34*080d01c4SChristoph Hellwig * 35*080d01c4SChristoph Hellwig * Whenever the available space is below the chosen threshold, the GC thread 36*080d01c4SChristoph Hellwig * looks for potential non-empty but not fully used zones that are worth 37*080d01c4SChristoph Hellwig * reclaiming. Once found the rmap for the victim zone is queried, and after 38*080d01c4SChristoph Hellwig * a bit of sorting to reduce fragmentation, the still live extents are read 39*080d01c4SChristoph Hellwig * into memory and written to the GC target zone, and the bmap btree of the 40*080d01c4SChristoph Hellwig * files is updated to point to the new location. To avoid taking the IOLOCK 41*080d01c4SChristoph Hellwig * and MMAPLOCK for the entire GC process and thus affecting the latency of 42*080d01c4SChristoph Hellwig * user reads and writes to the files, the GC writes are speculative and the 43*080d01c4SChristoph Hellwig * I/O completion checks that no other writes happened for the affected regions 44*080d01c4SChristoph Hellwig * before remapping. 45*080d01c4SChristoph Hellwig * 46*080d01c4SChristoph Hellwig * Once a zone does not contain any valid data, be that through GC or user 47*080d01c4SChristoph Hellwig * block removal, it is queued for for a zone reset. The reset operation 48*080d01c4SChristoph Hellwig * carefully ensures that the RT device cache is flushed and all transactions 49*080d01c4SChristoph Hellwig * referencing the rmap have been committed to disk. 50*080d01c4SChristoph Hellwig */ 51*080d01c4SChristoph Hellwig 52*080d01c4SChristoph Hellwig /* 53*080d01c4SChristoph Hellwig * Size of each GC scratch pad. This is also the upper bound for each 54*080d01c4SChristoph Hellwig * GC I/O, which helps to keep latency down. 55*080d01c4SChristoph Hellwig */ 56*080d01c4SChristoph Hellwig #define XFS_GC_CHUNK_SIZE SZ_1M 57*080d01c4SChristoph Hellwig 58*080d01c4SChristoph Hellwig /* 59*080d01c4SChristoph Hellwig * Scratchpad data to read GCed data into. 60*080d01c4SChristoph Hellwig * 61*080d01c4SChristoph Hellwig * The offset member tracks where the next allocation starts, and freed tracks 62*080d01c4SChristoph Hellwig * the amount of space that is not used anymore. 63*080d01c4SChristoph Hellwig */ 64*080d01c4SChristoph Hellwig #define XFS_ZONE_GC_NR_SCRATCH 2 65*080d01c4SChristoph Hellwig struct xfs_zone_scratch { 66*080d01c4SChristoph Hellwig struct folio *folio; 67*080d01c4SChristoph Hellwig unsigned int offset; 68*080d01c4SChristoph Hellwig unsigned int freed; 69*080d01c4SChristoph Hellwig }; 70*080d01c4SChristoph Hellwig 71*080d01c4SChristoph Hellwig /* 72*080d01c4SChristoph Hellwig * Chunk that is read and written for each GC operation. 73*080d01c4SChristoph Hellwig * 74*080d01c4SChristoph Hellwig * Note that for writes to actual zoned devices, the chunk can be split when 75*080d01c4SChristoph Hellwig * reaching the hardware limit. 76*080d01c4SChristoph Hellwig */ 77*080d01c4SChristoph Hellwig struct xfs_gc_bio { 78*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 79*080d01c4SChristoph Hellwig 80*080d01c4SChristoph Hellwig /* 81*080d01c4SChristoph Hellwig * Entry into the reading/writing/resetting list. Only accessed from 82*080d01c4SChristoph Hellwig * the GC thread, so no locking needed. 83*080d01c4SChristoph Hellwig */ 84*080d01c4SChristoph Hellwig struct list_head entry; 85*080d01c4SChristoph Hellwig 86*080d01c4SChristoph Hellwig /* 87*080d01c4SChristoph Hellwig * State of this gc_bio. Done means the current I/O completed. 88*080d01c4SChristoph Hellwig * Set from the bio end I/O handler, read from the GC thread. 89*080d01c4SChristoph Hellwig */ 90*080d01c4SChristoph Hellwig enum { 91*080d01c4SChristoph Hellwig XFS_GC_BIO_NEW, 92*080d01c4SChristoph Hellwig XFS_GC_BIO_DONE, 93*080d01c4SChristoph Hellwig } state; 94*080d01c4SChristoph Hellwig 95*080d01c4SChristoph Hellwig /* 96*080d01c4SChristoph Hellwig * Pointer to the inode and byte range in the inode that this 97*080d01c4SChristoph Hellwig * GC chunk is operating on. 98*080d01c4SChristoph Hellwig */ 99*080d01c4SChristoph Hellwig struct xfs_inode *ip; 100*080d01c4SChristoph Hellwig loff_t offset; 101*080d01c4SChristoph Hellwig unsigned int len; 102*080d01c4SChristoph Hellwig 103*080d01c4SChristoph Hellwig /* 104*080d01c4SChristoph Hellwig * Existing startblock (in the zone to be freed) and newly assigned 105*080d01c4SChristoph Hellwig * daddr in the zone GCed into. 106*080d01c4SChristoph Hellwig */ 107*080d01c4SChristoph Hellwig xfs_fsblock_t old_startblock; 108*080d01c4SChristoph Hellwig xfs_daddr_t new_daddr; 109*080d01c4SChristoph Hellwig struct xfs_zone_scratch *scratch; 110*080d01c4SChristoph Hellwig 111*080d01c4SChristoph Hellwig /* Are we writing to a sequential write required zone? */ 112*080d01c4SChristoph Hellwig bool is_seq; 113*080d01c4SChristoph Hellwig 114*080d01c4SChristoph Hellwig /* Open Zone being written to */ 115*080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 116*080d01c4SChristoph Hellwig 117*080d01c4SChristoph Hellwig /* Bio used for reads and writes, including the bvec used by it */ 118*080d01c4SChristoph Hellwig struct bio_vec bv; 119*080d01c4SChristoph Hellwig struct bio bio; /* must be last */ 120*080d01c4SChristoph Hellwig }; 121*080d01c4SChristoph Hellwig 122*080d01c4SChristoph Hellwig #define XFS_ZONE_GC_RECS 1024 123*080d01c4SChristoph Hellwig 124*080d01c4SChristoph Hellwig /* iterator, needs to be reinitialized for each victim zone */ 125*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter { 126*080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg; 127*080d01c4SChristoph Hellwig unsigned int rec_count; 128*080d01c4SChristoph Hellwig unsigned int rec_idx; 129*080d01c4SChristoph Hellwig xfs_agblock_t next_startblock; 130*080d01c4SChristoph Hellwig struct xfs_rmap_irec *recs; 131*080d01c4SChristoph Hellwig }; 132*080d01c4SChristoph Hellwig 133*080d01c4SChristoph Hellwig /* 134*080d01c4SChristoph Hellwig * Per-mount GC state. 135*080d01c4SChristoph Hellwig */ 136*080d01c4SChristoph Hellwig struct xfs_zone_gc_data { 137*080d01c4SChristoph Hellwig struct xfs_mount *mp; 138*080d01c4SChristoph Hellwig 139*080d01c4SChristoph Hellwig /* bioset used to allocate the gc_bios */ 140*080d01c4SChristoph Hellwig struct bio_set bio_set; 141*080d01c4SChristoph Hellwig 142*080d01c4SChristoph Hellwig /* 143*080d01c4SChristoph Hellwig * Scratchpad used, and index to indicated which one is used. 144*080d01c4SChristoph Hellwig */ 145*080d01c4SChristoph Hellwig struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH]; 146*080d01c4SChristoph Hellwig unsigned int scratch_idx; 147*080d01c4SChristoph Hellwig 148*080d01c4SChristoph Hellwig /* 149*080d01c4SChristoph Hellwig * List of bios currently being read, written and reset. 150*080d01c4SChristoph Hellwig * These lists are only accessed by the GC thread itself, and must only 151*080d01c4SChristoph Hellwig * be processed in order. 152*080d01c4SChristoph Hellwig */ 153*080d01c4SChristoph Hellwig struct list_head reading; 154*080d01c4SChristoph Hellwig struct list_head writing; 155*080d01c4SChristoph Hellwig struct list_head resetting; 156*080d01c4SChristoph Hellwig 157*080d01c4SChristoph Hellwig /* 158*080d01c4SChristoph Hellwig * Iterator for the victim zone. 159*080d01c4SChristoph Hellwig */ 160*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter iter; 161*080d01c4SChristoph Hellwig }; 162*080d01c4SChristoph Hellwig 163*080d01c4SChristoph Hellwig /* 164*080d01c4SChristoph Hellwig * We aim to keep enough zones free in stock to fully use the open zone limit 165*080d01c4SChristoph Hellwig * for data placement purposes. 166*080d01c4SChristoph Hellwig */ 167*080d01c4SChristoph Hellwig bool 168*080d01c4SChristoph Hellwig xfs_zoned_need_gc( 169*080d01c4SChristoph Hellwig struct xfs_mount *mp) 170*080d01c4SChristoph Hellwig { 171*080d01c4SChristoph Hellwig if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) 172*080d01c4SChristoph Hellwig return false; 173*080d01c4SChristoph Hellwig if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) < 174*080d01c4SChristoph Hellwig mp->m_groups[XG_TYPE_RTG].blocks * 175*080d01c4SChristoph Hellwig (mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) 176*080d01c4SChristoph Hellwig return true; 177*080d01c4SChristoph Hellwig return false; 178*080d01c4SChristoph Hellwig } 179*080d01c4SChristoph Hellwig 180*080d01c4SChristoph Hellwig static struct xfs_zone_gc_data * 181*080d01c4SChristoph Hellwig xfs_zone_gc_data_alloc( 182*080d01c4SChristoph Hellwig struct xfs_mount *mp) 183*080d01c4SChristoph Hellwig { 184*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 185*080d01c4SChristoph Hellwig int i; 186*080d01c4SChristoph Hellwig 187*080d01c4SChristoph Hellwig data = kzalloc(sizeof(*data), GFP_KERNEL); 188*080d01c4SChristoph Hellwig if (!data) 189*080d01c4SChristoph Hellwig return NULL; 190*080d01c4SChristoph Hellwig data->iter.recs = kcalloc(XFS_ZONE_GC_RECS, sizeof(*data->iter.recs), 191*080d01c4SChristoph Hellwig GFP_KERNEL); 192*080d01c4SChristoph Hellwig if (!data->iter.recs) 193*080d01c4SChristoph Hellwig goto out_free_data; 194*080d01c4SChristoph Hellwig 195*080d01c4SChristoph Hellwig /* 196*080d01c4SChristoph Hellwig * We actually only need a single bio_vec. It would be nice to have 197*080d01c4SChristoph Hellwig * a flag that only allocates the inline bvecs and not the separate 198*080d01c4SChristoph Hellwig * bvec pool. 199*080d01c4SChristoph Hellwig */ 200*080d01c4SChristoph Hellwig if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio), 201*080d01c4SChristoph Hellwig BIOSET_NEED_BVECS)) 202*080d01c4SChristoph Hellwig goto out_free_recs; 203*080d01c4SChristoph Hellwig for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) { 204*080d01c4SChristoph Hellwig data->scratch[i].folio = 205*080d01c4SChristoph Hellwig folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE)); 206*080d01c4SChristoph Hellwig if (!data->scratch[i].folio) 207*080d01c4SChristoph Hellwig goto out_free_scratch; 208*080d01c4SChristoph Hellwig } 209*080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->reading); 210*080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->writing); 211*080d01c4SChristoph Hellwig INIT_LIST_HEAD(&data->resetting); 212*080d01c4SChristoph Hellwig data->mp = mp; 213*080d01c4SChristoph Hellwig return data; 214*080d01c4SChristoph Hellwig 215*080d01c4SChristoph Hellwig out_free_scratch: 216*080d01c4SChristoph Hellwig while (--i >= 0) 217*080d01c4SChristoph Hellwig folio_put(data->scratch[i].folio); 218*080d01c4SChristoph Hellwig bioset_exit(&data->bio_set); 219*080d01c4SChristoph Hellwig out_free_recs: 220*080d01c4SChristoph Hellwig kfree(data->iter.recs); 221*080d01c4SChristoph Hellwig out_free_data: 222*080d01c4SChristoph Hellwig kfree(data); 223*080d01c4SChristoph Hellwig return NULL; 224*080d01c4SChristoph Hellwig } 225*080d01c4SChristoph Hellwig 226*080d01c4SChristoph Hellwig static void 227*080d01c4SChristoph Hellwig xfs_zone_gc_data_free( 228*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 229*080d01c4SChristoph Hellwig { 230*080d01c4SChristoph Hellwig int i; 231*080d01c4SChristoph Hellwig 232*080d01c4SChristoph Hellwig for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) 233*080d01c4SChristoph Hellwig folio_put(data->scratch[i].folio); 234*080d01c4SChristoph Hellwig bioset_exit(&data->bio_set); 235*080d01c4SChristoph Hellwig kfree(data->iter.recs); 236*080d01c4SChristoph Hellwig kfree(data); 237*080d01c4SChristoph Hellwig } 238*080d01c4SChristoph Hellwig 239*080d01c4SChristoph Hellwig static void 240*080d01c4SChristoph Hellwig xfs_zone_gc_iter_init( 241*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 242*080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg) 243*080d01c4SChristoph Hellwig 244*080d01c4SChristoph Hellwig { 245*080d01c4SChristoph Hellwig iter->next_startblock = 0; 246*080d01c4SChristoph Hellwig iter->rec_count = 0; 247*080d01c4SChristoph Hellwig iter->rec_idx = 0; 248*080d01c4SChristoph Hellwig iter->victim_rtg = victim_rtg; 249*080d01c4SChristoph Hellwig } 250*080d01c4SChristoph Hellwig 251*080d01c4SChristoph Hellwig /* 252*080d01c4SChristoph Hellwig * Query the rmap of the victim zone to gather the records to evacuate. 253*080d01c4SChristoph Hellwig */ 254*080d01c4SChristoph Hellwig static int 255*080d01c4SChristoph Hellwig xfs_zone_gc_query_cb( 256*080d01c4SChristoph Hellwig struct xfs_btree_cur *cur, 257*080d01c4SChristoph Hellwig const struct xfs_rmap_irec *irec, 258*080d01c4SChristoph Hellwig void *private) 259*080d01c4SChristoph Hellwig { 260*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = private; 261*080d01c4SChristoph Hellwig 262*080d01c4SChristoph Hellwig ASSERT(!XFS_RMAP_NON_INODE_OWNER(irec->rm_owner)); 263*080d01c4SChristoph Hellwig ASSERT(!xfs_is_sb_inum(cur->bc_mp, irec->rm_owner)); 264*080d01c4SChristoph Hellwig ASSERT(!(irec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))); 265*080d01c4SChristoph Hellwig 266*080d01c4SChristoph Hellwig iter->recs[iter->rec_count] = *irec; 267*080d01c4SChristoph Hellwig if (++iter->rec_count == XFS_ZONE_GC_RECS) { 268*080d01c4SChristoph Hellwig iter->next_startblock = 269*080d01c4SChristoph Hellwig irec->rm_startblock + irec->rm_blockcount; 270*080d01c4SChristoph Hellwig return 1; 271*080d01c4SChristoph Hellwig } 272*080d01c4SChristoph Hellwig return 0; 273*080d01c4SChristoph Hellwig } 274*080d01c4SChristoph Hellwig 275*080d01c4SChristoph Hellwig #define cmp_int(l, r) ((l > r) - (l < r)) 276*080d01c4SChristoph Hellwig 277*080d01c4SChristoph Hellwig static int 278*080d01c4SChristoph Hellwig xfs_zone_gc_rmap_rec_cmp( 279*080d01c4SChristoph Hellwig const void *a, 280*080d01c4SChristoph Hellwig const void *b) 281*080d01c4SChristoph Hellwig { 282*080d01c4SChristoph Hellwig const struct xfs_rmap_irec *reca = a; 283*080d01c4SChristoph Hellwig const struct xfs_rmap_irec *recb = b; 284*080d01c4SChristoph Hellwig int diff; 285*080d01c4SChristoph Hellwig 286*080d01c4SChristoph Hellwig diff = cmp_int(reca->rm_owner, recb->rm_owner); 287*080d01c4SChristoph Hellwig if (diff) 288*080d01c4SChristoph Hellwig return diff; 289*080d01c4SChristoph Hellwig return cmp_int(reca->rm_offset, recb->rm_offset); 290*080d01c4SChristoph Hellwig } 291*080d01c4SChristoph Hellwig 292*080d01c4SChristoph Hellwig static int 293*080d01c4SChristoph Hellwig xfs_zone_gc_query( 294*080d01c4SChristoph Hellwig struct xfs_mount *mp, 295*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter) 296*080d01c4SChristoph Hellwig { 297*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = iter->victim_rtg; 298*080d01c4SChristoph Hellwig struct xfs_rmap_irec ri_low = { }; 299*080d01c4SChristoph Hellwig struct xfs_rmap_irec ri_high; 300*080d01c4SChristoph Hellwig struct xfs_btree_cur *cur; 301*080d01c4SChristoph Hellwig struct xfs_trans *tp; 302*080d01c4SChristoph Hellwig int error; 303*080d01c4SChristoph Hellwig 304*080d01c4SChristoph Hellwig ASSERT(iter->next_startblock <= rtg_blocks(rtg)); 305*080d01c4SChristoph Hellwig if (iter->next_startblock == rtg_blocks(rtg)) 306*080d01c4SChristoph Hellwig goto done; 307*080d01c4SChristoph Hellwig 308*080d01c4SChristoph Hellwig ASSERT(iter->next_startblock < rtg_blocks(rtg)); 309*080d01c4SChristoph Hellwig ri_low.rm_startblock = iter->next_startblock; 310*080d01c4SChristoph Hellwig memset(&ri_high, 0xFF, sizeof(ri_high)); 311*080d01c4SChristoph Hellwig 312*080d01c4SChristoph Hellwig iter->rec_idx = 0; 313*080d01c4SChristoph Hellwig iter->rec_count = 0; 314*080d01c4SChristoph Hellwig 315*080d01c4SChristoph Hellwig error = xfs_trans_alloc_empty(mp, &tp); 316*080d01c4SChristoph Hellwig if (error) 317*080d01c4SChristoph Hellwig return error; 318*080d01c4SChristoph Hellwig 319*080d01c4SChristoph Hellwig xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); 320*080d01c4SChristoph Hellwig cur = xfs_rtrmapbt_init_cursor(tp, rtg); 321*080d01c4SChristoph Hellwig error = xfs_rmap_query_range(cur, &ri_low, &ri_high, 322*080d01c4SChristoph Hellwig xfs_zone_gc_query_cb, iter); 323*080d01c4SChristoph Hellwig xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); 324*080d01c4SChristoph Hellwig xfs_btree_del_cursor(cur, error < 0 ? error : 0); 325*080d01c4SChristoph Hellwig xfs_trans_cancel(tp); 326*080d01c4SChristoph Hellwig 327*080d01c4SChristoph Hellwig if (error < 0) 328*080d01c4SChristoph Hellwig return error; 329*080d01c4SChristoph Hellwig 330*080d01c4SChristoph Hellwig /* 331*080d01c4SChristoph Hellwig * Sort the rmap records by inode number and increasing offset to 332*080d01c4SChristoph Hellwig * defragment the mappings. 333*080d01c4SChristoph Hellwig * 334*080d01c4SChristoph Hellwig * This could be further enhanced by an even bigger look ahead window, 335*080d01c4SChristoph Hellwig * but that's better left until we have better detection of changes to 336*080d01c4SChristoph Hellwig * inode mapping to avoid the potential of GCing already dead data. 337*080d01c4SChristoph Hellwig */ 338*080d01c4SChristoph Hellwig sort(iter->recs, iter->rec_count, sizeof(iter->recs[0]), 339*080d01c4SChristoph Hellwig xfs_zone_gc_rmap_rec_cmp, NULL); 340*080d01c4SChristoph Hellwig 341*080d01c4SChristoph Hellwig if (error == 0) { 342*080d01c4SChristoph Hellwig /* 343*080d01c4SChristoph Hellwig * We finished iterating through the zone. 344*080d01c4SChristoph Hellwig */ 345*080d01c4SChristoph Hellwig iter->next_startblock = rtg_blocks(rtg); 346*080d01c4SChristoph Hellwig if (iter->rec_count == 0) 347*080d01c4SChristoph Hellwig goto done; 348*080d01c4SChristoph Hellwig } 349*080d01c4SChristoph Hellwig 350*080d01c4SChristoph Hellwig return 0; 351*080d01c4SChristoph Hellwig done: 352*080d01c4SChristoph Hellwig xfs_rtgroup_rele(iter->victim_rtg); 353*080d01c4SChristoph Hellwig iter->victim_rtg = NULL; 354*080d01c4SChristoph Hellwig return 0; 355*080d01c4SChristoph Hellwig } 356*080d01c4SChristoph Hellwig 357*080d01c4SChristoph Hellwig static bool 358*080d01c4SChristoph Hellwig xfs_zone_gc_iter_next( 359*080d01c4SChristoph Hellwig struct xfs_mount *mp, 360*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 361*080d01c4SChristoph Hellwig struct xfs_rmap_irec *chunk_rec, 362*080d01c4SChristoph Hellwig struct xfs_inode **ipp) 363*080d01c4SChristoph Hellwig { 364*080d01c4SChristoph Hellwig struct xfs_rmap_irec *irec; 365*080d01c4SChristoph Hellwig int error; 366*080d01c4SChristoph Hellwig 367*080d01c4SChristoph Hellwig if (!iter->victim_rtg) 368*080d01c4SChristoph Hellwig return false; 369*080d01c4SChristoph Hellwig 370*080d01c4SChristoph Hellwig retry: 371*080d01c4SChristoph Hellwig if (iter->rec_idx == iter->rec_count) { 372*080d01c4SChristoph Hellwig error = xfs_zone_gc_query(mp, iter); 373*080d01c4SChristoph Hellwig if (error) 374*080d01c4SChristoph Hellwig goto fail; 375*080d01c4SChristoph Hellwig if (!iter->victim_rtg) 376*080d01c4SChristoph Hellwig return false; 377*080d01c4SChristoph Hellwig } 378*080d01c4SChristoph Hellwig 379*080d01c4SChristoph Hellwig irec = &iter->recs[iter->rec_idx]; 380*080d01c4SChristoph Hellwig error = xfs_iget(mp, NULL, irec->rm_owner, 381*080d01c4SChristoph Hellwig XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, ipp); 382*080d01c4SChristoph Hellwig if (error) { 383*080d01c4SChristoph Hellwig /* 384*080d01c4SChristoph Hellwig * If the inode was already deleted, skip over it. 385*080d01c4SChristoph Hellwig */ 386*080d01c4SChristoph Hellwig if (error == -ENOENT) { 387*080d01c4SChristoph Hellwig iter->rec_idx++; 388*080d01c4SChristoph Hellwig goto retry; 389*080d01c4SChristoph Hellwig } 390*080d01c4SChristoph Hellwig goto fail; 391*080d01c4SChristoph Hellwig } 392*080d01c4SChristoph Hellwig 393*080d01c4SChristoph Hellwig if (!S_ISREG(VFS_I(*ipp)->i_mode) || !XFS_IS_REALTIME_INODE(*ipp)) { 394*080d01c4SChristoph Hellwig iter->rec_idx++; 395*080d01c4SChristoph Hellwig xfs_irele(*ipp); 396*080d01c4SChristoph Hellwig goto retry; 397*080d01c4SChristoph Hellwig } 398*080d01c4SChristoph Hellwig 399*080d01c4SChristoph Hellwig *chunk_rec = *irec; 400*080d01c4SChristoph Hellwig return true; 401*080d01c4SChristoph Hellwig 402*080d01c4SChristoph Hellwig fail: 403*080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 404*080d01c4SChristoph Hellwig return false; 405*080d01c4SChristoph Hellwig } 406*080d01c4SChristoph Hellwig 407*080d01c4SChristoph Hellwig static void 408*080d01c4SChristoph Hellwig xfs_zone_gc_iter_advance( 409*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter, 410*080d01c4SChristoph Hellwig xfs_extlen_t count_fsb) 411*080d01c4SChristoph Hellwig { 412*080d01c4SChristoph Hellwig struct xfs_rmap_irec *irec = &iter->recs[iter->rec_idx]; 413*080d01c4SChristoph Hellwig 414*080d01c4SChristoph Hellwig irec->rm_offset += count_fsb; 415*080d01c4SChristoph Hellwig irec->rm_startblock += count_fsb; 416*080d01c4SChristoph Hellwig irec->rm_blockcount -= count_fsb; 417*080d01c4SChristoph Hellwig if (!irec->rm_blockcount) 418*080d01c4SChristoph Hellwig iter->rec_idx++; 419*080d01c4SChristoph Hellwig } 420*080d01c4SChristoph Hellwig 421*080d01c4SChristoph Hellwig static struct xfs_rtgroup * 422*080d01c4SChristoph Hellwig xfs_zone_gc_pick_victim_from( 423*080d01c4SChristoph Hellwig struct xfs_mount *mp, 424*080d01c4SChristoph Hellwig uint32_t bucket) 425*080d01c4SChristoph Hellwig { 426*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 427*080d01c4SChristoph Hellwig uint32_t victim_used = U32_MAX; 428*080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg = NULL; 429*080d01c4SChristoph Hellwig uint32_t bit; 430*080d01c4SChristoph Hellwig 431*080d01c4SChristoph Hellwig if (!zi->zi_used_bucket_entries[bucket]) 432*080d01c4SChristoph Hellwig return NULL; 433*080d01c4SChristoph Hellwig 434*080d01c4SChristoph Hellwig for_each_set_bit(bit, zi->zi_used_bucket_bitmap[bucket], 435*080d01c4SChristoph Hellwig mp->m_sb.sb_rgcount) { 436*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = xfs_rtgroup_grab(mp, bit); 437*080d01c4SChristoph Hellwig 438*080d01c4SChristoph Hellwig if (!rtg) 439*080d01c4SChristoph Hellwig continue; 440*080d01c4SChristoph Hellwig 441*080d01c4SChristoph Hellwig /* skip zones that are just waiting for a reset */ 442*080d01c4SChristoph Hellwig if (rtg_rmap(rtg)->i_used_blocks == 0 || 443*080d01c4SChristoph Hellwig rtg_rmap(rtg)->i_used_blocks >= victim_used) { 444*080d01c4SChristoph Hellwig xfs_rtgroup_rele(rtg); 445*080d01c4SChristoph Hellwig continue; 446*080d01c4SChristoph Hellwig } 447*080d01c4SChristoph Hellwig 448*080d01c4SChristoph Hellwig if (victim_rtg) 449*080d01c4SChristoph Hellwig xfs_rtgroup_rele(victim_rtg); 450*080d01c4SChristoph Hellwig victim_rtg = rtg; 451*080d01c4SChristoph Hellwig victim_used = rtg_rmap(rtg)->i_used_blocks; 452*080d01c4SChristoph Hellwig 453*080d01c4SChristoph Hellwig /* 454*080d01c4SChristoph Hellwig * Any zone that is less than 1 percent used is fair game for 455*080d01c4SChristoph Hellwig * instant reclaim. All of these zones are in the last 456*080d01c4SChristoph Hellwig * bucket, so avoid the expensive division for the zones 457*080d01c4SChristoph Hellwig * in the other buckets. 458*080d01c4SChristoph Hellwig */ 459*080d01c4SChristoph Hellwig if (bucket == 0 && 460*080d01c4SChristoph Hellwig rtg_rmap(rtg)->i_used_blocks < rtg_blocks(rtg) / 100) 461*080d01c4SChristoph Hellwig break; 462*080d01c4SChristoph Hellwig } 463*080d01c4SChristoph Hellwig 464*080d01c4SChristoph Hellwig return victim_rtg; 465*080d01c4SChristoph Hellwig } 466*080d01c4SChristoph Hellwig 467*080d01c4SChristoph Hellwig /* 468*080d01c4SChristoph Hellwig * Iterate through all zones marked as reclaimable and find a candidate to 469*080d01c4SChristoph Hellwig * reclaim. 470*080d01c4SChristoph Hellwig */ 471*080d01c4SChristoph Hellwig static bool 472*080d01c4SChristoph Hellwig xfs_zone_gc_select_victim( 473*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 474*080d01c4SChristoph Hellwig { 475*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = &data->iter; 476*080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 477*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 478*080d01c4SChristoph Hellwig struct xfs_rtgroup *victim_rtg = NULL; 479*080d01c4SChristoph Hellwig unsigned int bucket; 480*080d01c4SChristoph Hellwig 481*080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) 482*080d01c4SChristoph Hellwig return false; 483*080d01c4SChristoph Hellwig 484*080d01c4SChristoph Hellwig if (iter->victim_rtg) 485*080d01c4SChristoph Hellwig return true; 486*080d01c4SChristoph Hellwig 487*080d01c4SChristoph Hellwig /* 488*080d01c4SChristoph Hellwig * Don't start new work if we are asked to stop or park. 489*080d01c4SChristoph Hellwig */ 490*080d01c4SChristoph Hellwig if (kthread_should_stop() || kthread_should_park()) 491*080d01c4SChristoph Hellwig return false; 492*080d01c4SChristoph Hellwig 493*080d01c4SChristoph Hellwig if (!xfs_zoned_need_gc(mp)) 494*080d01c4SChristoph Hellwig return false; 495*080d01c4SChristoph Hellwig 496*080d01c4SChristoph Hellwig spin_lock(&zi->zi_used_buckets_lock); 497*080d01c4SChristoph Hellwig for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { 498*080d01c4SChristoph Hellwig victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); 499*080d01c4SChristoph Hellwig if (victim_rtg) 500*080d01c4SChristoph Hellwig break; 501*080d01c4SChristoph Hellwig } 502*080d01c4SChristoph Hellwig spin_unlock(&zi->zi_used_buckets_lock); 503*080d01c4SChristoph Hellwig 504*080d01c4SChristoph Hellwig if (!victim_rtg) 505*080d01c4SChristoph Hellwig return false; 506*080d01c4SChristoph Hellwig 507*080d01c4SChristoph Hellwig trace_xfs_zone_gc_select_victim(victim_rtg, bucket); 508*080d01c4SChristoph Hellwig xfs_zone_gc_iter_init(iter, victim_rtg); 509*080d01c4SChristoph Hellwig return true; 510*080d01c4SChristoph Hellwig } 511*080d01c4SChristoph Hellwig 512*080d01c4SChristoph Hellwig static struct xfs_open_zone * 513*080d01c4SChristoph Hellwig xfs_zone_gc_steal_open( 514*080d01c4SChristoph Hellwig struct xfs_zone_info *zi) 515*080d01c4SChristoph Hellwig { 516*080d01c4SChristoph Hellwig struct xfs_open_zone *oz, *found = NULL; 517*080d01c4SChristoph Hellwig 518*080d01c4SChristoph Hellwig spin_lock(&zi->zi_open_zones_lock); 519*080d01c4SChristoph Hellwig list_for_each_entry(oz, &zi->zi_open_zones, oz_entry) { 520*080d01c4SChristoph Hellwig if (!found || 521*080d01c4SChristoph Hellwig oz->oz_write_pointer < found->oz_write_pointer) 522*080d01c4SChristoph Hellwig found = oz; 523*080d01c4SChristoph Hellwig } 524*080d01c4SChristoph Hellwig 525*080d01c4SChristoph Hellwig if (found) { 526*080d01c4SChristoph Hellwig found->oz_is_gc = true; 527*080d01c4SChristoph Hellwig list_del_init(&found->oz_entry); 528*080d01c4SChristoph Hellwig zi->zi_nr_open_zones--; 529*080d01c4SChristoph Hellwig } 530*080d01c4SChristoph Hellwig 531*080d01c4SChristoph Hellwig spin_unlock(&zi->zi_open_zones_lock); 532*080d01c4SChristoph Hellwig return found; 533*080d01c4SChristoph Hellwig } 534*080d01c4SChristoph Hellwig 535*080d01c4SChristoph Hellwig static struct xfs_open_zone * 536*080d01c4SChristoph Hellwig xfs_zone_gc_select_target( 537*080d01c4SChristoph Hellwig struct xfs_mount *mp) 538*080d01c4SChristoph Hellwig { 539*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 540*080d01c4SChristoph Hellwig struct xfs_open_zone *oz = zi->zi_open_gc_zone; 541*080d01c4SChristoph Hellwig 542*080d01c4SChristoph Hellwig /* 543*080d01c4SChristoph Hellwig * We need to wait for pending writes to finish. 544*080d01c4SChristoph Hellwig */ 545*080d01c4SChristoph Hellwig if (oz && oz->oz_written < rtg_blocks(oz->oz_rtg)) 546*080d01c4SChristoph Hellwig return NULL; 547*080d01c4SChristoph Hellwig 548*080d01c4SChristoph Hellwig ASSERT(zi->zi_nr_open_zones <= 549*080d01c4SChristoph Hellwig mp->m_max_open_zones - XFS_OPEN_GC_ZONES); 550*080d01c4SChristoph Hellwig oz = xfs_open_zone(mp, true); 551*080d01c4SChristoph Hellwig if (oz) 552*080d01c4SChristoph Hellwig trace_xfs_zone_gc_target_opened(oz->oz_rtg); 553*080d01c4SChristoph Hellwig spin_lock(&zi->zi_open_zones_lock); 554*080d01c4SChristoph Hellwig zi->zi_open_gc_zone = oz; 555*080d01c4SChristoph Hellwig spin_unlock(&zi->zi_open_zones_lock); 556*080d01c4SChristoph Hellwig return oz; 557*080d01c4SChristoph Hellwig } 558*080d01c4SChristoph Hellwig 559*080d01c4SChristoph Hellwig /* 560*080d01c4SChristoph Hellwig * Ensure we have a valid open zone to write the GC data to. 561*080d01c4SChristoph Hellwig * 562*080d01c4SChristoph Hellwig * If the current target zone has space keep writing to it, else first wait for 563*080d01c4SChristoph Hellwig * all pending writes and then pick a new one. 564*080d01c4SChristoph Hellwig */ 565*080d01c4SChristoph Hellwig static struct xfs_open_zone * 566*080d01c4SChristoph Hellwig xfs_zone_gc_ensure_target( 567*080d01c4SChristoph Hellwig struct xfs_mount *mp) 568*080d01c4SChristoph Hellwig { 569*080d01c4SChristoph Hellwig struct xfs_open_zone *oz = mp->m_zone_info->zi_open_gc_zone; 570*080d01c4SChristoph Hellwig 571*080d01c4SChristoph Hellwig if (!oz || oz->oz_write_pointer == rtg_blocks(oz->oz_rtg)) 572*080d01c4SChristoph Hellwig return xfs_zone_gc_select_target(mp); 573*080d01c4SChristoph Hellwig return oz; 574*080d01c4SChristoph Hellwig } 575*080d01c4SChristoph Hellwig 576*080d01c4SChristoph Hellwig static unsigned int 577*080d01c4SChristoph Hellwig xfs_zone_gc_scratch_available( 578*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 579*080d01c4SChristoph Hellwig { 580*080d01c4SChristoph Hellwig return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset; 581*080d01c4SChristoph Hellwig } 582*080d01c4SChristoph Hellwig 583*080d01c4SChristoph Hellwig static bool 584*080d01c4SChristoph Hellwig xfs_zone_gc_space_available( 585*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 586*080d01c4SChristoph Hellwig { 587*080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 588*080d01c4SChristoph Hellwig 589*080d01c4SChristoph Hellwig oz = xfs_zone_gc_ensure_target(data->mp); 590*080d01c4SChristoph Hellwig if (!oz) 591*080d01c4SChristoph Hellwig return false; 592*080d01c4SChristoph Hellwig return oz->oz_write_pointer < rtg_blocks(oz->oz_rtg) && 593*080d01c4SChristoph Hellwig xfs_zone_gc_scratch_available(data); 594*080d01c4SChristoph Hellwig } 595*080d01c4SChristoph Hellwig 596*080d01c4SChristoph Hellwig static void 597*080d01c4SChristoph Hellwig xfs_zone_gc_end_io( 598*080d01c4SChristoph Hellwig struct bio *bio) 599*080d01c4SChristoph Hellwig { 600*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk = 601*080d01c4SChristoph Hellwig container_of(bio, struct xfs_gc_bio, bio); 602*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = chunk->data; 603*080d01c4SChristoph Hellwig 604*080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_DONE); 605*080d01c4SChristoph Hellwig wake_up_process(data->mp->m_zone_info->zi_gc_thread); 606*080d01c4SChristoph Hellwig } 607*080d01c4SChristoph Hellwig 608*080d01c4SChristoph Hellwig static struct xfs_open_zone * 609*080d01c4SChristoph Hellwig xfs_zone_gc_alloc_blocks( 610*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 611*080d01c4SChristoph Hellwig xfs_extlen_t *count_fsb, 612*080d01c4SChristoph Hellwig xfs_daddr_t *daddr, 613*080d01c4SChristoph Hellwig bool *is_seq) 614*080d01c4SChristoph Hellwig { 615*080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 616*080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 617*080d01c4SChristoph Hellwig 618*080d01c4SChristoph Hellwig oz = xfs_zone_gc_ensure_target(mp); 619*080d01c4SChristoph Hellwig if (!oz) 620*080d01c4SChristoph Hellwig return NULL; 621*080d01c4SChristoph Hellwig 622*080d01c4SChristoph Hellwig *count_fsb = min(*count_fsb, 623*080d01c4SChristoph Hellwig XFS_B_TO_FSB(mp, xfs_zone_gc_scratch_available(data))); 624*080d01c4SChristoph Hellwig 625*080d01c4SChristoph Hellwig /* 626*080d01c4SChristoph Hellwig * Directly allocate GC blocks from the reserved pool. 627*080d01c4SChristoph Hellwig * 628*080d01c4SChristoph Hellwig * If we'd take them from the normal pool we could be stealing blocks 629*080d01c4SChristoph Hellwig * from a regular writer, which would then have to wait for GC and 630*080d01c4SChristoph Hellwig * deadlock. 631*080d01c4SChristoph Hellwig */ 632*080d01c4SChristoph Hellwig spin_lock(&mp->m_sb_lock); 633*080d01c4SChristoph Hellwig *count_fsb = min(*count_fsb, 634*080d01c4SChristoph Hellwig rtg_blocks(oz->oz_rtg) - oz->oz_write_pointer); 635*080d01c4SChristoph Hellwig *count_fsb = min3(*count_fsb, 636*080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTEXTENTS].res_avail, 637*080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTAVAILABLE].res_avail); 638*080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTEXTENTS].res_avail -= *count_fsb; 639*080d01c4SChristoph Hellwig mp->m_free[XC_FREE_RTAVAILABLE].res_avail -= *count_fsb; 640*080d01c4SChristoph Hellwig spin_unlock(&mp->m_sb_lock); 641*080d01c4SChristoph Hellwig 642*080d01c4SChristoph Hellwig if (!*count_fsb) 643*080d01c4SChristoph Hellwig return NULL; 644*080d01c4SChristoph Hellwig 645*080d01c4SChristoph Hellwig *daddr = xfs_gbno_to_daddr(&oz->oz_rtg->rtg_group, 0); 646*080d01c4SChristoph Hellwig *is_seq = bdev_zone_is_seq(mp->m_rtdev_targp->bt_bdev, *daddr); 647*080d01c4SChristoph Hellwig if (!*is_seq) 648*080d01c4SChristoph Hellwig *daddr += XFS_FSB_TO_BB(mp, oz->oz_write_pointer); 649*080d01c4SChristoph Hellwig oz->oz_write_pointer += *count_fsb; 650*080d01c4SChristoph Hellwig atomic_inc(&oz->oz_ref); 651*080d01c4SChristoph Hellwig return oz; 652*080d01c4SChristoph Hellwig } 653*080d01c4SChristoph Hellwig 654*080d01c4SChristoph Hellwig static bool 655*080d01c4SChristoph Hellwig xfs_zone_gc_start_chunk( 656*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 657*080d01c4SChristoph Hellwig { 658*080d01c4SChristoph Hellwig struct xfs_zone_gc_iter *iter = &data->iter; 659*080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 660*080d01c4SChristoph Hellwig struct block_device *bdev = mp->m_rtdev_targp->bt_bdev; 661*080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 662*080d01c4SChristoph Hellwig struct xfs_rmap_irec irec; 663*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk; 664*080d01c4SChristoph Hellwig struct xfs_inode *ip; 665*080d01c4SChristoph Hellwig struct bio *bio; 666*080d01c4SChristoph Hellwig xfs_daddr_t daddr; 667*080d01c4SChristoph Hellwig bool is_seq; 668*080d01c4SChristoph Hellwig 669*080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) 670*080d01c4SChristoph Hellwig return false; 671*080d01c4SChristoph Hellwig 672*080d01c4SChristoph Hellwig if (!xfs_zone_gc_iter_next(mp, iter, &irec, &ip)) 673*080d01c4SChristoph Hellwig return false; 674*080d01c4SChristoph Hellwig oz = xfs_zone_gc_alloc_blocks(data, &irec.rm_blockcount, &daddr, 675*080d01c4SChristoph Hellwig &is_seq); 676*080d01c4SChristoph Hellwig if (!oz) { 677*080d01c4SChristoph Hellwig xfs_irele(ip); 678*080d01c4SChristoph Hellwig return false; 679*080d01c4SChristoph Hellwig } 680*080d01c4SChristoph Hellwig 681*080d01c4SChristoph Hellwig bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set); 682*080d01c4SChristoph Hellwig 683*080d01c4SChristoph Hellwig chunk = container_of(bio, struct xfs_gc_bio, bio); 684*080d01c4SChristoph Hellwig chunk->ip = ip; 685*080d01c4SChristoph Hellwig chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset); 686*080d01c4SChristoph Hellwig chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount); 687*080d01c4SChristoph Hellwig chunk->old_startblock = 688*080d01c4SChristoph Hellwig xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock); 689*080d01c4SChristoph Hellwig chunk->new_daddr = daddr; 690*080d01c4SChristoph Hellwig chunk->is_seq = is_seq; 691*080d01c4SChristoph Hellwig chunk->scratch = &data->scratch[data->scratch_idx]; 692*080d01c4SChristoph Hellwig chunk->data = data; 693*080d01c4SChristoph Hellwig chunk->oz = oz; 694*080d01c4SChristoph Hellwig 695*080d01c4SChristoph Hellwig bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); 696*080d01c4SChristoph Hellwig bio->bi_end_io = xfs_zone_gc_end_io; 697*080d01c4SChristoph Hellwig bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len, 698*080d01c4SChristoph Hellwig chunk->scratch->offset); 699*080d01c4SChristoph Hellwig chunk->scratch->offset += chunk->len; 700*080d01c4SChristoph Hellwig if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) { 701*080d01c4SChristoph Hellwig data->scratch_idx = 702*080d01c4SChristoph Hellwig (data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH; 703*080d01c4SChristoph Hellwig } 704*080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 705*080d01c4SChristoph Hellwig list_add_tail(&chunk->entry, &data->reading); 706*080d01c4SChristoph Hellwig xfs_zone_gc_iter_advance(iter, irec.rm_blockcount); 707*080d01c4SChristoph Hellwig 708*080d01c4SChristoph Hellwig submit_bio(bio); 709*080d01c4SChristoph Hellwig return true; 710*080d01c4SChristoph Hellwig } 711*080d01c4SChristoph Hellwig 712*080d01c4SChristoph Hellwig static void 713*080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk( 714*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 715*080d01c4SChristoph Hellwig { 716*080d01c4SChristoph Hellwig list_del(&chunk->entry); 717*080d01c4SChristoph Hellwig xfs_open_zone_put(chunk->oz); 718*080d01c4SChristoph Hellwig xfs_irele(chunk->ip); 719*080d01c4SChristoph Hellwig bio_put(&chunk->bio); 720*080d01c4SChristoph Hellwig } 721*080d01c4SChristoph Hellwig 722*080d01c4SChristoph Hellwig static void 723*080d01c4SChristoph Hellwig xfs_zone_gc_submit_write( 724*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 725*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 726*080d01c4SChristoph Hellwig { 727*080d01c4SChristoph Hellwig if (chunk->is_seq) { 728*080d01c4SChristoph Hellwig chunk->bio.bi_opf &= ~REQ_OP_WRITE; 729*080d01c4SChristoph Hellwig chunk->bio.bi_opf |= REQ_OP_ZONE_APPEND; 730*080d01c4SChristoph Hellwig } 731*080d01c4SChristoph Hellwig chunk->bio.bi_iter.bi_sector = chunk->new_daddr; 732*080d01c4SChristoph Hellwig chunk->bio.bi_end_io = xfs_zone_gc_end_io; 733*080d01c4SChristoph Hellwig submit_bio(&chunk->bio); 734*080d01c4SChristoph Hellwig } 735*080d01c4SChristoph Hellwig 736*080d01c4SChristoph Hellwig static struct xfs_gc_bio * 737*080d01c4SChristoph Hellwig xfs_zone_gc_split_write( 738*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 739*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 740*080d01c4SChristoph Hellwig { 741*080d01c4SChristoph Hellwig struct queue_limits *lim = 742*080d01c4SChristoph Hellwig &bdev_get_queue(chunk->bio.bi_bdev)->limits; 743*080d01c4SChristoph Hellwig struct xfs_gc_bio *split_chunk; 744*080d01c4SChristoph Hellwig int split_sectors; 745*080d01c4SChristoph Hellwig unsigned int split_len; 746*080d01c4SChristoph Hellwig struct bio *split; 747*080d01c4SChristoph Hellwig unsigned int nsegs; 748*080d01c4SChristoph Hellwig 749*080d01c4SChristoph Hellwig if (!chunk->is_seq) 750*080d01c4SChristoph Hellwig return NULL; 751*080d01c4SChristoph Hellwig 752*080d01c4SChristoph Hellwig split_sectors = bio_split_rw_at(&chunk->bio, lim, &nsegs, 753*080d01c4SChristoph Hellwig lim->max_zone_append_sectors << SECTOR_SHIFT); 754*080d01c4SChristoph Hellwig if (!split_sectors) 755*080d01c4SChristoph Hellwig return NULL; 756*080d01c4SChristoph Hellwig 757*080d01c4SChristoph Hellwig /* ensure the split chunk is still block size aligned */ 758*080d01c4SChristoph Hellwig split_sectors = ALIGN_DOWN(split_sectors << SECTOR_SHIFT, 759*080d01c4SChristoph Hellwig data->mp->m_sb.sb_blocksize) >> SECTOR_SHIFT; 760*080d01c4SChristoph Hellwig split_len = split_sectors << SECTOR_SHIFT; 761*080d01c4SChristoph Hellwig 762*080d01c4SChristoph Hellwig split = bio_split(&chunk->bio, split_sectors, GFP_NOFS, &data->bio_set); 763*080d01c4SChristoph Hellwig split_chunk = container_of(split, struct xfs_gc_bio, bio); 764*080d01c4SChristoph Hellwig split_chunk->data = data; 765*080d01c4SChristoph Hellwig ihold(VFS_I(chunk->ip)); 766*080d01c4SChristoph Hellwig split_chunk->ip = chunk->ip; 767*080d01c4SChristoph Hellwig split_chunk->is_seq = chunk->is_seq; 768*080d01c4SChristoph Hellwig split_chunk->scratch = chunk->scratch; 769*080d01c4SChristoph Hellwig split_chunk->offset = chunk->offset; 770*080d01c4SChristoph Hellwig split_chunk->len = split_len; 771*080d01c4SChristoph Hellwig split_chunk->old_startblock = chunk->old_startblock; 772*080d01c4SChristoph Hellwig split_chunk->new_daddr = chunk->new_daddr; 773*080d01c4SChristoph Hellwig split_chunk->oz = chunk->oz; 774*080d01c4SChristoph Hellwig atomic_inc(&chunk->oz->oz_ref); 775*080d01c4SChristoph Hellwig 776*080d01c4SChristoph Hellwig chunk->offset += split_len; 777*080d01c4SChristoph Hellwig chunk->len -= split_len; 778*080d01c4SChristoph Hellwig chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); 779*080d01c4SChristoph Hellwig 780*080d01c4SChristoph Hellwig /* add right before the original chunk */ 781*080d01c4SChristoph Hellwig WRITE_ONCE(split_chunk->state, XFS_GC_BIO_NEW); 782*080d01c4SChristoph Hellwig list_add_tail(&split_chunk->entry, &chunk->entry); 783*080d01c4SChristoph Hellwig return split_chunk; 784*080d01c4SChristoph Hellwig } 785*080d01c4SChristoph Hellwig 786*080d01c4SChristoph Hellwig static void 787*080d01c4SChristoph Hellwig xfs_zone_gc_write_chunk( 788*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 789*080d01c4SChristoph Hellwig { 790*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = chunk->data; 791*080d01c4SChristoph Hellwig struct xfs_mount *mp = chunk->ip->i_mount; 792*080d01c4SChristoph Hellwig unsigned int folio_offset = chunk->bio.bi_io_vec->bv_offset; 793*080d01c4SChristoph Hellwig struct xfs_gc_bio *split_chunk; 794*080d01c4SChristoph Hellwig 795*080d01c4SChristoph Hellwig if (chunk->bio.bi_status) 796*080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 797*080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) { 798*080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 799*080d01c4SChristoph Hellwig return; 800*080d01c4SChristoph Hellwig } 801*080d01c4SChristoph Hellwig 802*080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 803*080d01c4SChristoph Hellwig list_move_tail(&chunk->entry, &data->writing); 804*080d01c4SChristoph Hellwig 805*080d01c4SChristoph Hellwig bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); 806*080d01c4SChristoph Hellwig bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, 807*080d01c4SChristoph Hellwig folio_offset); 808*080d01c4SChristoph Hellwig 809*080d01c4SChristoph Hellwig while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) 810*080d01c4SChristoph Hellwig xfs_zone_gc_submit_write(data, split_chunk); 811*080d01c4SChristoph Hellwig xfs_zone_gc_submit_write(data, chunk); 812*080d01c4SChristoph Hellwig } 813*080d01c4SChristoph Hellwig 814*080d01c4SChristoph Hellwig static void 815*080d01c4SChristoph Hellwig xfs_zone_gc_finish_chunk( 816*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 817*080d01c4SChristoph Hellwig { 818*080d01c4SChristoph Hellwig uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 819*080d01c4SChristoph Hellwig struct xfs_inode *ip = chunk->ip; 820*080d01c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 821*080d01c4SChristoph Hellwig int error; 822*080d01c4SChristoph Hellwig 823*080d01c4SChristoph Hellwig if (chunk->bio.bi_status) 824*080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 825*080d01c4SChristoph Hellwig if (xfs_is_shutdown(mp)) { 826*080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 827*080d01c4SChristoph Hellwig return; 828*080d01c4SChristoph Hellwig } 829*080d01c4SChristoph Hellwig 830*080d01c4SChristoph Hellwig chunk->scratch->freed += chunk->len; 831*080d01c4SChristoph Hellwig if (chunk->scratch->freed == chunk->scratch->offset) { 832*080d01c4SChristoph Hellwig chunk->scratch->offset = 0; 833*080d01c4SChristoph Hellwig chunk->scratch->freed = 0; 834*080d01c4SChristoph Hellwig } 835*080d01c4SChristoph Hellwig 836*080d01c4SChristoph Hellwig /* 837*080d01c4SChristoph Hellwig * Cycle through the iolock and wait for direct I/O and layouts to 838*080d01c4SChristoph Hellwig * ensure no one is reading from the old mapping before it goes away. 839*080d01c4SChristoph Hellwig * 840*080d01c4SChristoph Hellwig * Note that xfs_zoned_end_io() below checks that no other writer raced 841*080d01c4SChristoph Hellwig * with us to update the mapping by checking that the old startblock 842*080d01c4SChristoph Hellwig * didn't change. 843*080d01c4SChristoph Hellwig */ 844*080d01c4SChristoph Hellwig xfs_ilock(ip, iolock); 845*080d01c4SChristoph Hellwig error = xfs_break_layouts(VFS_I(ip), &iolock, BREAK_UNMAP); 846*080d01c4SChristoph Hellwig if (!error) 847*080d01c4SChristoph Hellwig inode_dio_wait(VFS_I(ip)); 848*080d01c4SChristoph Hellwig xfs_iunlock(ip, iolock); 849*080d01c4SChristoph Hellwig if (error) 850*080d01c4SChristoph Hellwig goto free; 851*080d01c4SChristoph Hellwig 852*080d01c4SChristoph Hellwig if (chunk->is_seq) 853*080d01c4SChristoph Hellwig chunk->new_daddr = chunk->bio.bi_iter.bi_sector; 854*080d01c4SChristoph Hellwig error = xfs_zoned_end_io(ip, chunk->offset, chunk->len, 855*080d01c4SChristoph Hellwig chunk->new_daddr, chunk->oz, chunk->old_startblock); 856*080d01c4SChristoph Hellwig free: 857*080d01c4SChristoph Hellwig if (error) 858*080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 859*080d01c4SChristoph Hellwig xfs_zone_gc_free_chunk(chunk); 860*080d01c4SChristoph Hellwig } 861*080d01c4SChristoph Hellwig 862*080d01c4SChristoph Hellwig static void 863*080d01c4SChristoph Hellwig xfs_zone_gc_finish_reset( 864*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk) 865*080d01c4SChristoph Hellwig { 866*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = chunk->bio.bi_private; 867*080d01c4SChristoph Hellwig struct xfs_mount *mp = rtg_mount(rtg); 868*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 869*080d01c4SChristoph Hellwig 870*080d01c4SChristoph Hellwig if (chunk->bio.bi_status) { 871*080d01c4SChristoph Hellwig xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 872*080d01c4SChristoph Hellwig goto out; 873*080d01c4SChristoph Hellwig } 874*080d01c4SChristoph Hellwig 875*080d01c4SChristoph Hellwig xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_FREE); 876*080d01c4SChristoph Hellwig atomic_inc(&zi->zi_nr_free_zones); 877*080d01c4SChristoph Hellwig 878*080d01c4SChristoph Hellwig xfs_zoned_add_available(mp, rtg_blocks(rtg)); 879*080d01c4SChristoph Hellwig 880*080d01c4SChristoph Hellwig wake_up_all(&zi->zi_zone_wait); 881*080d01c4SChristoph Hellwig out: 882*080d01c4SChristoph Hellwig list_del(&chunk->entry); 883*080d01c4SChristoph Hellwig bio_put(&chunk->bio); 884*080d01c4SChristoph Hellwig } 885*080d01c4SChristoph Hellwig 886*080d01c4SChristoph Hellwig static bool 887*080d01c4SChristoph Hellwig xfs_zone_gc_prepare_reset( 888*080d01c4SChristoph Hellwig struct bio *bio, 889*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg) 890*080d01c4SChristoph Hellwig { 891*080d01c4SChristoph Hellwig trace_xfs_zone_reset(rtg); 892*080d01c4SChristoph Hellwig 893*080d01c4SChristoph Hellwig ASSERT(rtg_rmap(rtg)->i_used_blocks == 0); 894*080d01c4SChristoph Hellwig bio->bi_iter.bi_sector = xfs_gbno_to_daddr(&rtg->rtg_group, 0); 895*080d01c4SChristoph Hellwig if (!bdev_zone_is_seq(bio->bi_bdev, bio->bi_iter.bi_sector)) { 896*080d01c4SChristoph Hellwig if (!bdev_max_discard_sectors(bio->bi_bdev)) 897*080d01c4SChristoph Hellwig return false; 898*080d01c4SChristoph Hellwig bio->bi_opf = REQ_OP_DISCARD | REQ_SYNC; 899*080d01c4SChristoph Hellwig bio->bi_iter.bi_size = 900*080d01c4SChristoph Hellwig XFS_FSB_TO_B(rtg_mount(rtg), rtg_blocks(rtg)); 901*080d01c4SChristoph Hellwig } 902*080d01c4SChristoph Hellwig 903*080d01c4SChristoph Hellwig return true; 904*080d01c4SChristoph Hellwig } 905*080d01c4SChristoph Hellwig 906*080d01c4SChristoph Hellwig int 907*080d01c4SChristoph Hellwig xfs_zone_gc_reset_sync( 908*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg) 909*080d01c4SChristoph Hellwig { 910*080d01c4SChristoph Hellwig int error = 0; 911*080d01c4SChristoph Hellwig struct bio bio; 912*080d01c4SChristoph Hellwig 913*080d01c4SChristoph Hellwig bio_init(&bio, rtg_mount(rtg)->m_rtdev_targp->bt_bdev, NULL, 0, 914*080d01c4SChristoph Hellwig REQ_OP_ZONE_RESET); 915*080d01c4SChristoph Hellwig if (xfs_zone_gc_prepare_reset(&bio, rtg)) 916*080d01c4SChristoph Hellwig error = submit_bio_wait(&bio); 917*080d01c4SChristoph Hellwig bio_uninit(&bio); 918*080d01c4SChristoph Hellwig 919*080d01c4SChristoph Hellwig return error; 920*080d01c4SChristoph Hellwig } 921*080d01c4SChristoph Hellwig 922*080d01c4SChristoph Hellwig static void 923*080d01c4SChristoph Hellwig xfs_zone_gc_reset_zones( 924*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data, 925*080d01c4SChristoph Hellwig struct xfs_group *reset_list) 926*080d01c4SChristoph Hellwig { 927*080d01c4SChristoph Hellwig struct xfs_group *next = reset_list; 928*080d01c4SChristoph Hellwig 929*080d01c4SChristoph Hellwig if (blkdev_issue_flush(data->mp->m_rtdev_targp->bt_bdev) < 0) { 930*080d01c4SChristoph Hellwig xfs_force_shutdown(data->mp, SHUTDOWN_META_IO_ERROR); 931*080d01c4SChristoph Hellwig return; 932*080d01c4SChristoph Hellwig } 933*080d01c4SChristoph Hellwig 934*080d01c4SChristoph Hellwig do { 935*080d01c4SChristoph Hellwig struct xfs_rtgroup *rtg = to_rtg(next); 936*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk; 937*080d01c4SChristoph Hellwig struct bio *bio; 938*080d01c4SChristoph Hellwig 939*080d01c4SChristoph Hellwig xfs_log_force_inode(rtg_rmap(rtg)); 940*080d01c4SChristoph Hellwig 941*080d01c4SChristoph Hellwig next = rtg_group(rtg)->xg_next_reset; 942*080d01c4SChristoph Hellwig rtg_group(rtg)->xg_next_reset = NULL; 943*080d01c4SChristoph Hellwig 944*080d01c4SChristoph Hellwig bio = bio_alloc_bioset(rtg_mount(rtg)->m_rtdev_targp->bt_bdev, 945*080d01c4SChristoph Hellwig 0, REQ_OP_ZONE_RESET, GFP_NOFS, &data->bio_set); 946*080d01c4SChristoph Hellwig bio->bi_private = rtg; 947*080d01c4SChristoph Hellwig bio->bi_end_io = xfs_zone_gc_end_io; 948*080d01c4SChristoph Hellwig 949*080d01c4SChristoph Hellwig chunk = container_of(bio, struct xfs_gc_bio, bio); 950*080d01c4SChristoph Hellwig chunk->data = data; 951*080d01c4SChristoph Hellwig WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW); 952*080d01c4SChristoph Hellwig list_add_tail(&chunk->entry, &data->resetting); 953*080d01c4SChristoph Hellwig 954*080d01c4SChristoph Hellwig /* 955*080d01c4SChristoph Hellwig * Also use the bio to drive the state machine when neither 956*080d01c4SChristoph Hellwig * zone reset nor discard is supported to keep things simple. 957*080d01c4SChristoph Hellwig */ 958*080d01c4SChristoph Hellwig if (xfs_zone_gc_prepare_reset(bio, rtg)) 959*080d01c4SChristoph Hellwig submit_bio(bio); 960*080d01c4SChristoph Hellwig else 961*080d01c4SChristoph Hellwig bio_endio(bio); 962*080d01c4SChristoph Hellwig } while (next); 963*080d01c4SChristoph Hellwig } 964*080d01c4SChristoph Hellwig 965*080d01c4SChristoph Hellwig /* 966*080d01c4SChristoph Hellwig * Handle the work to read and write data for GC and to reset the zones, 967*080d01c4SChristoph Hellwig * including handling all completions. 968*080d01c4SChristoph Hellwig * 969*080d01c4SChristoph Hellwig * Note that the order of the chunks is preserved so that we don't undo the 970*080d01c4SChristoph Hellwig * optimal order established by xfs_zone_gc_query(). 971*080d01c4SChristoph Hellwig */ 972*080d01c4SChristoph Hellwig static bool 973*080d01c4SChristoph Hellwig xfs_zone_gc_handle_work( 974*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data) 975*080d01c4SChristoph Hellwig { 976*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = data->mp->m_zone_info; 977*080d01c4SChristoph Hellwig struct xfs_gc_bio *chunk, *next; 978*080d01c4SChristoph Hellwig struct xfs_group *reset_list; 979*080d01c4SChristoph Hellwig struct blk_plug plug; 980*080d01c4SChristoph Hellwig 981*080d01c4SChristoph Hellwig spin_lock(&zi->zi_reset_list_lock); 982*080d01c4SChristoph Hellwig reset_list = zi->zi_reset_list; 983*080d01c4SChristoph Hellwig zi->zi_reset_list = NULL; 984*080d01c4SChristoph Hellwig spin_unlock(&zi->zi_reset_list_lock); 985*080d01c4SChristoph Hellwig 986*080d01c4SChristoph Hellwig if (!xfs_zone_gc_select_victim(data) || 987*080d01c4SChristoph Hellwig !xfs_zone_gc_space_available(data)) { 988*080d01c4SChristoph Hellwig if (list_empty(&data->reading) && 989*080d01c4SChristoph Hellwig list_empty(&data->writing) && 990*080d01c4SChristoph Hellwig list_empty(&data->resetting) && 991*080d01c4SChristoph Hellwig !reset_list) 992*080d01c4SChristoph Hellwig return false; 993*080d01c4SChristoph Hellwig } 994*080d01c4SChristoph Hellwig 995*080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 996*080d01c4SChristoph Hellwig try_to_freeze(); 997*080d01c4SChristoph Hellwig 998*080d01c4SChristoph Hellwig if (reset_list) 999*080d01c4SChristoph Hellwig xfs_zone_gc_reset_zones(data, reset_list); 1000*080d01c4SChristoph Hellwig 1001*080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->resetting, entry) { 1002*080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1003*080d01c4SChristoph Hellwig break; 1004*080d01c4SChristoph Hellwig xfs_zone_gc_finish_reset(chunk); 1005*080d01c4SChristoph Hellwig } 1006*080d01c4SChristoph Hellwig 1007*080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->writing, entry) { 1008*080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1009*080d01c4SChristoph Hellwig break; 1010*080d01c4SChristoph Hellwig xfs_zone_gc_finish_chunk(chunk); 1011*080d01c4SChristoph Hellwig } 1012*080d01c4SChristoph Hellwig 1013*080d01c4SChristoph Hellwig blk_start_plug(&plug); 1014*080d01c4SChristoph Hellwig list_for_each_entry_safe(chunk, next, &data->reading, entry) { 1015*080d01c4SChristoph Hellwig if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) 1016*080d01c4SChristoph Hellwig break; 1017*080d01c4SChristoph Hellwig xfs_zone_gc_write_chunk(chunk); 1018*080d01c4SChristoph Hellwig } 1019*080d01c4SChristoph Hellwig blk_finish_plug(&plug); 1020*080d01c4SChristoph Hellwig 1021*080d01c4SChristoph Hellwig blk_start_plug(&plug); 1022*080d01c4SChristoph Hellwig while (xfs_zone_gc_start_chunk(data)) 1023*080d01c4SChristoph Hellwig ; 1024*080d01c4SChristoph Hellwig blk_finish_plug(&plug); 1025*080d01c4SChristoph Hellwig return true; 1026*080d01c4SChristoph Hellwig } 1027*080d01c4SChristoph Hellwig 1028*080d01c4SChristoph Hellwig /* 1029*080d01c4SChristoph Hellwig * Note that the current GC algorithm would break reflinks and thus duplicate 1030*080d01c4SChristoph Hellwig * data that was shared by multiple owners before. Because of that reflinks 1031*080d01c4SChristoph Hellwig * are currently not supported on zoned file systems and can't be created or 1032*080d01c4SChristoph Hellwig * mounted. 1033*080d01c4SChristoph Hellwig */ 1034*080d01c4SChristoph Hellwig static int 1035*080d01c4SChristoph Hellwig xfs_zoned_gcd( 1036*080d01c4SChristoph Hellwig void *private) 1037*080d01c4SChristoph Hellwig { 1038*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data = private; 1039*080d01c4SChristoph Hellwig struct xfs_mount *mp = data->mp; 1040*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1041*080d01c4SChristoph Hellwig unsigned int nofs_flag; 1042*080d01c4SChristoph Hellwig 1043*080d01c4SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 1044*080d01c4SChristoph Hellwig set_freezable(); 1045*080d01c4SChristoph Hellwig 1046*080d01c4SChristoph Hellwig for (;;) { 1047*080d01c4SChristoph Hellwig set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); 1048*080d01c4SChristoph Hellwig xfs_set_zonegc_running(mp); 1049*080d01c4SChristoph Hellwig if (xfs_zone_gc_handle_work(data)) 1050*080d01c4SChristoph Hellwig continue; 1051*080d01c4SChristoph Hellwig 1052*080d01c4SChristoph Hellwig if (list_empty(&data->reading) && 1053*080d01c4SChristoph Hellwig list_empty(&data->writing) && 1054*080d01c4SChristoph Hellwig list_empty(&data->resetting) && 1055*080d01c4SChristoph Hellwig !zi->zi_reset_list) { 1056*080d01c4SChristoph Hellwig xfs_clear_zonegc_running(mp); 1057*080d01c4SChristoph Hellwig xfs_zoned_resv_wake_all(mp); 1058*080d01c4SChristoph Hellwig 1059*080d01c4SChristoph Hellwig if (kthread_should_stop()) { 1060*080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 1061*080d01c4SChristoph Hellwig break; 1062*080d01c4SChristoph Hellwig } 1063*080d01c4SChristoph Hellwig 1064*080d01c4SChristoph Hellwig if (kthread_should_park()) { 1065*080d01c4SChristoph Hellwig __set_current_state(TASK_RUNNING); 1066*080d01c4SChristoph Hellwig kthread_parkme(); 1067*080d01c4SChristoph Hellwig continue; 1068*080d01c4SChristoph Hellwig } 1069*080d01c4SChristoph Hellwig } 1070*080d01c4SChristoph Hellwig 1071*080d01c4SChristoph Hellwig schedule(); 1072*080d01c4SChristoph Hellwig } 1073*080d01c4SChristoph Hellwig xfs_clear_zonegc_running(mp); 1074*080d01c4SChristoph Hellwig 1075*080d01c4SChristoph Hellwig if (data->iter.victim_rtg) 1076*080d01c4SChristoph Hellwig xfs_rtgroup_rele(data->iter.victim_rtg); 1077*080d01c4SChristoph Hellwig 1078*080d01c4SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 1079*080d01c4SChristoph Hellwig xfs_zone_gc_data_free(data); 1080*080d01c4SChristoph Hellwig return 0; 1081*080d01c4SChristoph Hellwig } 1082*080d01c4SChristoph Hellwig 1083*080d01c4SChristoph Hellwig void 1084*080d01c4SChristoph Hellwig xfs_zone_gc_start( 1085*080d01c4SChristoph Hellwig struct xfs_mount *mp) 1086*080d01c4SChristoph Hellwig { 1087*080d01c4SChristoph Hellwig if (xfs_has_zoned(mp)) 1088*080d01c4SChristoph Hellwig kthread_unpark(mp->m_zone_info->zi_gc_thread); 1089*080d01c4SChristoph Hellwig } 1090*080d01c4SChristoph Hellwig 1091*080d01c4SChristoph Hellwig void 1092*080d01c4SChristoph Hellwig xfs_zone_gc_stop( 1093*080d01c4SChristoph Hellwig struct xfs_mount *mp) 1094*080d01c4SChristoph Hellwig { 1095*080d01c4SChristoph Hellwig if (xfs_has_zoned(mp)) 1096*080d01c4SChristoph Hellwig kthread_park(mp->m_zone_info->zi_gc_thread); 1097*080d01c4SChristoph Hellwig } 1098*080d01c4SChristoph Hellwig 1099*080d01c4SChristoph Hellwig int 1100*080d01c4SChristoph Hellwig xfs_zone_gc_mount( 1101*080d01c4SChristoph Hellwig struct xfs_mount *mp) 1102*080d01c4SChristoph Hellwig { 1103*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1104*080d01c4SChristoph Hellwig struct xfs_zone_gc_data *data; 1105*080d01c4SChristoph Hellwig struct xfs_open_zone *oz; 1106*080d01c4SChristoph Hellwig int error; 1107*080d01c4SChristoph Hellwig 1108*080d01c4SChristoph Hellwig /* 1109*080d01c4SChristoph Hellwig * If there are no free zones available for GC, pick the open zone with 1110*080d01c4SChristoph Hellwig * the least used space to GC into. This should only happen after an 1111*080d01c4SChristoph Hellwig * unclean shutdown near ENOSPC while GC was ongoing. 1112*080d01c4SChristoph Hellwig * 1113*080d01c4SChristoph Hellwig * We also need to do this for the first gc zone allocation if we 1114*080d01c4SChristoph Hellwig * unmounted while at the open limit. 1115*080d01c4SChristoph Hellwig */ 1116*080d01c4SChristoph Hellwig if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_FREE) || 1117*080d01c4SChristoph Hellwig zi->zi_nr_open_zones == mp->m_max_open_zones) 1118*080d01c4SChristoph Hellwig oz = xfs_zone_gc_steal_open(zi); 1119*080d01c4SChristoph Hellwig else 1120*080d01c4SChristoph Hellwig oz = xfs_open_zone(mp, true); 1121*080d01c4SChristoph Hellwig if (!oz) { 1122*080d01c4SChristoph Hellwig xfs_warn(mp, "unable to allocate a zone for gc"); 1123*080d01c4SChristoph Hellwig error = -EIO; 1124*080d01c4SChristoph Hellwig goto out; 1125*080d01c4SChristoph Hellwig } 1126*080d01c4SChristoph Hellwig 1127*080d01c4SChristoph Hellwig trace_xfs_zone_gc_target_opened(oz->oz_rtg); 1128*080d01c4SChristoph Hellwig zi->zi_open_gc_zone = oz; 1129*080d01c4SChristoph Hellwig 1130*080d01c4SChristoph Hellwig data = xfs_zone_gc_data_alloc(mp); 1131*080d01c4SChristoph Hellwig if (!data) { 1132*080d01c4SChristoph Hellwig error = -ENOMEM; 1133*080d01c4SChristoph Hellwig goto out_put_gc_zone; 1134*080d01c4SChristoph Hellwig } 1135*080d01c4SChristoph Hellwig 1136*080d01c4SChristoph Hellwig mp->m_zone_info->zi_gc_thread = kthread_create(xfs_zoned_gcd, data, 1137*080d01c4SChristoph Hellwig "xfs-zone-gc/%s", mp->m_super->s_id); 1138*080d01c4SChristoph Hellwig if (IS_ERR(mp->m_zone_info->zi_gc_thread)) { 1139*080d01c4SChristoph Hellwig xfs_warn(mp, "unable to create zone gc thread"); 1140*080d01c4SChristoph Hellwig error = PTR_ERR(mp->m_zone_info->zi_gc_thread); 1141*080d01c4SChristoph Hellwig goto out_free_gc_data; 1142*080d01c4SChristoph Hellwig } 1143*080d01c4SChristoph Hellwig 1144*080d01c4SChristoph Hellwig /* xfs_zone_gc_start will unpark for rw mounts */ 1145*080d01c4SChristoph Hellwig kthread_park(mp->m_zone_info->zi_gc_thread); 1146*080d01c4SChristoph Hellwig return 0; 1147*080d01c4SChristoph Hellwig 1148*080d01c4SChristoph Hellwig out_free_gc_data: 1149*080d01c4SChristoph Hellwig kfree(data); 1150*080d01c4SChristoph Hellwig out_put_gc_zone: 1151*080d01c4SChristoph Hellwig xfs_open_zone_put(zi->zi_open_gc_zone); 1152*080d01c4SChristoph Hellwig out: 1153*080d01c4SChristoph Hellwig return error; 1154*080d01c4SChristoph Hellwig } 1155*080d01c4SChristoph Hellwig 1156*080d01c4SChristoph Hellwig void 1157*080d01c4SChristoph Hellwig xfs_zone_gc_unmount( 1158*080d01c4SChristoph Hellwig struct xfs_mount *mp) 1159*080d01c4SChristoph Hellwig { 1160*080d01c4SChristoph Hellwig struct xfs_zone_info *zi = mp->m_zone_info; 1161*080d01c4SChristoph Hellwig 1162*080d01c4SChristoph Hellwig kthread_stop(zi->zi_gc_thread); 1163*080d01c4SChristoph Hellwig if (zi->zi_open_gc_zone) 1164*080d01c4SChristoph Hellwig xfs_open_zone_put(zi->zi_open_gc_zone); 1165*080d01c4SChristoph Hellwig } 1166