1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2024-2025 Christoph Hellwig. 4 */ 5 #include <linux/iomap.h> 6 #include <linux/list_sort.h> 7 #include "internal.h" 8 9 struct bio_set iomap_ioend_bioset; 10 EXPORT_SYMBOL_GPL(iomap_ioend_bioset); 11 12 struct iomap_ioend *iomap_init_ioend(struct inode *inode, 13 struct bio *bio, loff_t file_offset, u16 ioend_flags) 14 { 15 struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); 16 17 atomic_set(&ioend->io_remaining, 1); 18 ioend->io_error = 0; 19 ioend->io_parent = NULL; 20 INIT_LIST_HEAD(&ioend->io_list); 21 ioend->io_flags = ioend_flags; 22 ioend->io_inode = inode; 23 ioend->io_offset = file_offset; 24 ioend->io_size = bio->bi_iter.bi_size; 25 ioend->io_sector = bio->bi_iter.bi_sector; 26 ioend->io_private = NULL; 27 return ioend; 28 } 29 EXPORT_SYMBOL_GPL(iomap_init_ioend); 30 31 static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) 32 { 33 if (ioend->io_parent) { 34 struct bio *bio = &ioend->io_bio; 35 36 ioend = ioend->io_parent; 37 bio_put(bio); 38 } 39 40 if (error) 41 cmpxchg(&ioend->io_error, 0, error); 42 43 if (!atomic_dec_and_test(&ioend->io_remaining)) 44 return 0; 45 if (ioend->io_flags & IOMAP_IOEND_DIRECT) 46 return iomap_finish_ioend_direct(ioend); 47 return iomap_finish_ioend_buffered(ioend); 48 } 49 50 /* 51 * Ioend completion routine for merged bios. This can only be called from task 52 * contexts as merged ioends can be of unbound length. Hence we have to break up 53 * the writeback completions into manageable chunks to avoid long scheduler 54 * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get 55 * good batch processing throughput without creating adverse scheduler latency 56 * conditions. 57 */ 58 void iomap_finish_ioends(struct iomap_ioend *ioend, int error) 59 { 60 struct list_head tmp; 61 u32 completions; 62 63 might_sleep(); 64 65 list_replace_init(&ioend->io_list, &tmp); 66 completions = iomap_finish_ioend(ioend, error); 67 68 while (!list_empty(&tmp)) { 69 if (completions > IOEND_BATCH_SIZE * 8) { 70 cond_resched(); 71 completions = 0; 72 } 73 ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); 74 list_del_init(&ioend->io_list); 75 completions += iomap_finish_ioend(ioend, error); 76 } 77 } 78 EXPORT_SYMBOL_GPL(iomap_finish_ioends); 79 80 /* 81 * We can merge two adjacent ioends if they have the same set of work to do. 82 */ 83 static bool iomap_ioend_can_merge(struct iomap_ioend *ioend, 84 struct iomap_ioend *next) 85 { 86 if (ioend->io_bio.bi_status != next->io_bio.bi_status) 87 return false; 88 if (next->io_flags & IOMAP_IOEND_BOUNDARY) 89 return false; 90 if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) != 91 (next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) 92 return false; 93 if (ioend->io_offset + ioend->io_size != next->io_offset) 94 return false; 95 /* 96 * Do not merge physically discontiguous ioends. The filesystem 97 * completion functions will have to iterate the physical 98 * discontiguities even if we merge the ioends at a logical level, so 99 * we don't gain anything by merging physical discontiguities here. 100 * 101 * We cannot use bio->bi_iter.bi_sector here as it is modified during 102 * submission so does not point to the start sector of the bio at 103 * completion. 104 */ 105 if (ioend->io_sector + (ioend->io_size >> SECTOR_SHIFT) != 106 next->io_sector) 107 return false; 108 return true; 109 } 110 111 void iomap_ioend_try_merge(struct iomap_ioend *ioend, 112 struct list_head *more_ioends) 113 { 114 struct iomap_ioend *next; 115 116 INIT_LIST_HEAD(&ioend->io_list); 117 118 while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend, 119 io_list))) { 120 if (!iomap_ioend_can_merge(ioend, next)) 121 break; 122 list_move_tail(&next->io_list, &ioend->io_list); 123 ioend->io_size += next->io_size; 124 } 125 } 126 EXPORT_SYMBOL_GPL(iomap_ioend_try_merge); 127 128 static int iomap_ioend_compare(void *priv, const struct list_head *a, 129 const struct list_head *b) 130 { 131 struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list); 132 struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list); 133 134 if (ia->io_offset < ib->io_offset) 135 return -1; 136 if (ia->io_offset > ib->io_offset) 137 return 1; 138 return 0; 139 } 140 141 void iomap_sort_ioends(struct list_head *ioend_list) 142 { 143 list_sort(NULL, ioend_list, iomap_ioend_compare); 144 } 145 EXPORT_SYMBOL_GPL(iomap_sort_ioends); 146 147 /* 148 * Split up to the first @max_len bytes from @ioend if the ioend covers more 149 * than @max_len bytes. 150 * 151 * If @is_append is set, the split will be based on the hardware limits for 152 * REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware 153 * limits don't allow the entire @max_len length. 154 * 155 * The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer 156 * does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to 157 * switch the operation after this call, but before submitting the bio. 158 */ 159 struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, 160 unsigned int max_len, bool is_append) 161 { 162 struct bio *bio = &ioend->io_bio; 163 struct iomap_ioend *split_ioend; 164 unsigned int nr_segs; 165 int sector_offset; 166 struct bio *split; 167 168 if (is_append) { 169 struct queue_limits *lim = bdev_limits(bio->bi_bdev); 170 171 max_len = min(max_len, 172 lim->max_zone_append_sectors << SECTOR_SHIFT); 173 174 sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len); 175 if (unlikely(sector_offset < 0)) 176 return ERR_PTR(sector_offset); 177 if (!sector_offset) 178 return NULL; 179 } else { 180 if (bio->bi_iter.bi_size <= max_len) 181 return NULL; 182 sector_offset = max_len >> SECTOR_SHIFT; 183 } 184 185 /* ensure the split ioend is still block size aligned */ 186 sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT, 187 i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT; 188 189 split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset); 190 if (IS_ERR(split)) 191 return ERR_CAST(split); 192 split->bi_private = bio->bi_private; 193 split->bi_end_io = bio->bi_end_io; 194 195 split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset, 196 ioend->io_flags); 197 split_ioend->io_parent = ioend; 198 199 atomic_inc(&ioend->io_remaining); 200 ioend->io_offset += split_ioend->io_size; 201 ioend->io_size -= split_ioend->io_size; 202 203 split_ioend->io_sector = ioend->io_sector; 204 if (!is_append) 205 ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT); 206 return split_ioend; 207 } 208 EXPORT_SYMBOL_GPL(iomap_split_ioend); 209 210 static int __init iomap_ioend_init(void) 211 { 212 return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), 213 offsetof(struct iomap_ioend, io_bio), 214 BIOSET_NEED_BVECS); 215 } 216 fs_initcall(iomap_ioend_init); 217