15fcbd555SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0 25fcbd555SChristoph Hellwig /* 35fcbd555SChristoph Hellwig * Copyright (c) 2024-2025 Christoph Hellwig. 45fcbd555SChristoph Hellwig */ 55fcbd555SChristoph Hellwig #include <linux/iomap.h> 663b66913SChristoph Hellwig #include <linux/list_sort.h> 763b66913SChristoph Hellwig #include "internal.h" 863b66913SChristoph Hellwig 963b66913SChristoph Hellwig struct bio_set iomap_ioend_bioset; 1063b66913SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_ioend_bioset); 115fcbd555SChristoph Hellwig 125fcbd555SChristoph Hellwig struct iomap_ioend *iomap_init_ioend(struct inode *inode, 135fcbd555SChristoph Hellwig struct bio *bio, loff_t file_offset, u16 ioend_flags) 145fcbd555SChristoph Hellwig { 155fcbd555SChristoph Hellwig struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); 165fcbd555SChristoph Hellwig 175fcbd555SChristoph Hellwig atomic_set(&ioend->io_remaining, 1); 185fcbd555SChristoph Hellwig ioend->io_error = 0; 195fcbd555SChristoph Hellwig ioend->io_parent = NULL; 205fcbd555SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 215fcbd555SChristoph Hellwig ioend->io_flags = ioend_flags; 225fcbd555SChristoph Hellwig ioend->io_inode = inode; 235fcbd555SChristoph Hellwig ioend->io_offset = file_offset; 245fcbd555SChristoph Hellwig ioend->io_size = bio->bi_iter.bi_size; 255fcbd555SChristoph Hellwig ioend->io_sector = bio->bi_iter.bi_sector; 26*d06244c6SChristoph Hellwig ioend->io_private = NULL; 275fcbd555SChristoph Hellwig return ioend; 285fcbd555SChristoph Hellwig } 295fcbd555SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_init_ioend); 305fcbd555SChristoph Hellwig 3163b66913SChristoph Hellwig static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) 3263b66913SChristoph Hellwig { 3363b66913SChristoph Hellwig if (ioend->io_parent) { 3463b66913SChristoph Hellwig struct bio *bio = &ioend->io_bio; 3563b66913SChristoph Hellwig 3663b66913SChristoph Hellwig ioend = ioend->io_parent; 3763b66913SChristoph Hellwig bio_put(bio); 3863b66913SChristoph Hellwig } 3963b66913SChristoph Hellwig 4063b66913SChristoph Hellwig if (error) 4163b66913SChristoph Hellwig cmpxchg(&ioend->io_error, 0, error); 4263b66913SChristoph Hellwig 4363b66913SChristoph Hellwig if (!atomic_dec_and_test(&ioend->io_remaining)) 4463b66913SChristoph Hellwig return 0; 45e523f2d4SChristoph Hellwig if (ioend->io_flags & IOMAP_IOEND_DIRECT) 46e523f2d4SChristoph Hellwig return iomap_finish_ioend_direct(ioend); 4763b66913SChristoph Hellwig return iomap_finish_ioend_buffered(ioend); 4863b66913SChristoph Hellwig } 4963b66913SChristoph Hellwig 5063b66913SChristoph Hellwig /* 5163b66913SChristoph Hellwig * Ioend completion routine for merged bios. This can only be called from task 5263b66913SChristoph Hellwig * contexts as merged ioends can be of unbound length. Hence we have to break up 5363b66913SChristoph Hellwig * the writeback completions into manageable chunks to avoid long scheduler 5463b66913SChristoph Hellwig * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get 5563b66913SChristoph Hellwig * good batch processing throughput without creating adverse scheduler latency 5663b66913SChristoph Hellwig * conditions. 5763b66913SChristoph Hellwig */ 5863b66913SChristoph Hellwig void iomap_finish_ioends(struct iomap_ioend *ioend, int error) 5963b66913SChristoph Hellwig { 6063b66913SChristoph Hellwig struct list_head tmp; 6163b66913SChristoph Hellwig u32 completions; 6263b66913SChristoph Hellwig 6363b66913SChristoph Hellwig might_sleep(); 6463b66913SChristoph Hellwig 6563b66913SChristoph Hellwig list_replace_init(&ioend->io_list, &tmp); 6663b66913SChristoph Hellwig completions = iomap_finish_ioend(ioend, error); 6763b66913SChristoph Hellwig 6863b66913SChristoph Hellwig while (!list_empty(&tmp)) { 6963b66913SChristoph Hellwig if (completions > IOEND_BATCH_SIZE * 8) { 7063b66913SChristoph Hellwig cond_resched(); 7163b66913SChristoph Hellwig completions = 0; 7263b66913SChristoph Hellwig } 7363b66913SChristoph Hellwig ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); 7463b66913SChristoph Hellwig list_del_init(&ioend->io_list); 7563b66913SChristoph Hellwig completions += iomap_finish_ioend(ioend, error); 7663b66913SChristoph Hellwig } 7763b66913SChristoph Hellwig } 7863b66913SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_finish_ioends); 7963b66913SChristoph Hellwig 8063b66913SChristoph Hellwig /* 8163b66913SChristoph Hellwig * We can merge two adjacent ioends if they have the same set of work to do. 8263b66913SChristoph Hellwig */ 8363b66913SChristoph Hellwig static bool iomap_ioend_can_merge(struct iomap_ioend *ioend, 8463b66913SChristoph Hellwig struct iomap_ioend *next) 8563b66913SChristoph Hellwig { 8663b66913SChristoph Hellwig if (ioend->io_bio.bi_status != next->io_bio.bi_status) 8763b66913SChristoph Hellwig return false; 8863b66913SChristoph Hellwig if (next->io_flags & IOMAP_IOEND_BOUNDARY) 8963b66913SChristoph Hellwig return false; 9063b66913SChristoph Hellwig if ((ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS) != 9163b66913SChristoph Hellwig (next->io_flags & IOMAP_IOEND_NOMERGE_FLAGS)) 9263b66913SChristoph Hellwig return false; 9363b66913SChristoph Hellwig if (ioend->io_offset + ioend->io_size != next->io_offset) 9463b66913SChristoph Hellwig return false; 9563b66913SChristoph Hellwig /* 9663b66913SChristoph Hellwig * Do not merge physically discontiguous ioends. The filesystem 9763b66913SChristoph Hellwig * completion functions will have to iterate the physical 9863b66913SChristoph Hellwig * discontiguities even if we merge the ioends at a logical level, so 9963b66913SChristoph Hellwig * we don't gain anything by merging physical discontiguities here. 10063b66913SChristoph Hellwig * 10163b66913SChristoph Hellwig * We cannot use bio->bi_iter.bi_sector here as it is modified during 10263b66913SChristoph Hellwig * submission so does not point to the start sector of the bio at 10363b66913SChristoph Hellwig * completion. 10463b66913SChristoph Hellwig */ 10563b66913SChristoph Hellwig if (ioend->io_sector + (ioend->io_size >> SECTOR_SHIFT) != 10663b66913SChristoph Hellwig next->io_sector) 10763b66913SChristoph Hellwig return false; 10863b66913SChristoph Hellwig return true; 10963b66913SChristoph Hellwig } 11063b66913SChristoph Hellwig 11163b66913SChristoph Hellwig void iomap_ioend_try_merge(struct iomap_ioend *ioend, 11263b66913SChristoph Hellwig struct list_head *more_ioends) 11363b66913SChristoph Hellwig { 11463b66913SChristoph Hellwig struct iomap_ioend *next; 11563b66913SChristoph Hellwig 11663b66913SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 11763b66913SChristoph Hellwig 11863b66913SChristoph Hellwig while ((next = list_first_entry_or_null(more_ioends, struct iomap_ioend, 11963b66913SChristoph Hellwig io_list))) { 12063b66913SChristoph Hellwig if (!iomap_ioend_can_merge(ioend, next)) 12163b66913SChristoph Hellwig break; 12263b66913SChristoph Hellwig list_move_tail(&next->io_list, &ioend->io_list); 12363b66913SChristoph Hellwig ioend->io_size += next->io_size; 12463b66913SChristoph Hellwig } 12563b66913SChristoph Hellwig } 12663b66913SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_ioend_try_merge); 12763b66913SChristoph Hellwig 12863b66913SChristoph Hellwig static int iomap_ioend_compare(void *priv, const struct list_head *a, 12963b66913SChristoph Hellwig const struct list_head *b) 13063b66913SChristoph Hellwig { 13163b66913SChristoph Hellwig struct iomap_ioend *ia = container_of(a, struct iomap_ioend, io_list); 13263b66913SChristoph Hellwig struct iomap_ioend *ib = container_of(b, struct iomap_ioend, io_list); 13363b66913SChristoph Hellwig 13463b66913SChristoph Hellwig if (ia->io_offset < ib->io_offset) 13563b66913SChristoph Hellwig return -1; 13663b66913SChristoph Hellwig if (ia->io_offset > ib->io_offset) 13763b66913SChristoph Hellwig return 1; 13863b66913SChristoph Hellwig return 0; 13963b66913SChristoph Hellwig } 14063b66913SChristoph Hellwig 14163b66913SChristoph Hellwig void iomap_sort_ioends(struct list_head *ioend_list) 14263b66913SChristoph Hellwig { 14363b66913SChristoph Hellwig list_sort(NULL, ioend_list, iomap_ioend_compare); 14463b66913SChristoph Hellwig } 14563b66913SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_sort_ioends); 14663b66913SChristoph Hellwig 1475fcbd555SChristoph Hellwig /* 1485fcbd555SChristoph Hellwig * Split up to the first @max_len bytes from @ioend if the ioend covers more 1495fcbd555SChristoph Hellwig * than @max_len bytes. 1505fcbd555SChristoph Hellwig * 1515fcbd555SChristoph Hellwig * If @is_append is set, the split will be based on the hardware limits for 1525fcbd555SChristoph Hellwig * REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware 1535fcbd555SChristoph Hellwig * limits don't allow the entire @max_len length. 1545fcbd555SChristoph Hellwig * 1555fcbd555SChristoph Hellwig * The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer 1565fcbd555SChristoph Hellwig * does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to 1575fcbd555SChristoph Hellwig * switch the operation after this call, but before submitting the bio. 1585fcbd555SChristoph Hellwig */ 1595fcbd555SChristoph Hellwig struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, 1605fcbd555SChristoph Hellwig unsigned int max_len, bool is_append) 1615fcbd555SChristoph Hellwig { 1625fcbd555SChristoph Hellwig struct bio *bio = &ioend->io_bio; 1635fcbd555SChristoph Hellwig struct iomap_ioend *split_ioend; 1645fcbd555SChristoph Hellwig unsigned int nr_segs; 1655fcbd555SChristoph Hellwig int sector_offset; 1665fcbd555SChristoph Hellwig struct bio *split; 1675fcbd555SChristoph Hellwig 1685fcbd555SChristoph Hellwig if (is_append) { 1695fcbd555SChristoph Hellwig struct queue_limits *lim = bdev_limits(bio->bi_bdev); 1705fcbd555SChristoph Hellwig 1715fcbd555SChristoph Hellwig max_len = min(max_len, 1725fcbd555SChristoph Hellwig lim->max_zone_append_sectors << SECTOR_SHIFT); 1735fcbd555SChristoph Hellwig 1745fcbd555SChristoph Hellwig sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len); 1755fcbd555SChristoph Hellwig if (unlikely(sector_offset < 0)) 1765fcbd555SChristoph Hellwig return ERR_PTR(sector_offset); 1775fcbd555SChristoph Hellwig if (!sector_offset) 1785fcbd555SChristoph Hellwig return NULL; 1795fcbd555SChristoph Hellwig } else { 1805fcbd555SChristoph Hellwig if (bio->bi_iter.bi_size <= max_len) 1815fcbd555SChristoph Hellwig return NULL; 1825fcbd555SChristoph Hellwig sector_offset = max_len >> SECTOR_SHIFT; 1835fcbd555SChristoph Hellwig } 1845fcbd555SChristoph Hellwig 1855fcbd555SChristoph Hellwig /* ensure the split ioend is still block size aligned */ 1865fcbd555SChristoph Hellwig sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT, 1875fcbd555SChristoph Hellwig i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT; 1885fcbd555SChristoph Hellwig 1895fcbd555SChristoph Hellwig split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset); 1905fcbd555SChristoph Hellwig if (IS_ERR(split)) 1915fcbd555SChristoph Hellwig return ERR_CAST(split); 1925fcbd555SChristoph Hellwig split->bi_private = bio->bi_private; 1935fcbd555SChristoph Hellwig split->bi_end_io = bio->bi_end_io; 1945fcbd555SChristoph Hellwig 1955fcbd555SChristoph Hellwig split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset, 1965fcbd555SChristoph Hellwig ioend->io_flags); 1975fcbd555SChristoph Hellwig split_ioend->io_parent = ioend; 1985fcbd555SChristoph Hellwig 1995fcbd555SChristoph Hellwig atomic_inc(&ioend->io_remaining); 2005fcbd555SChristoph Hellwig ioend->io_offset += split_ioend->io_size; 2015fcbd555SChristoph Hellwig ioend->io_size -= split_ioend->io_size; 2025fcbd555SChristoph Hellwig 2035fcbd555SChristoph Hellwig split_ioend->io_sector = ioend->io_sector; 2045fcbd555SChristoph Hellwig if (!is_append) 2055fcbd555SChristoph Hellwig ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT); 2065fcbd555SChristoph Hellwig return split_ioend; 2075fcbd555SChristoph Hellwig } 2085fcbd555SChristoph Hellwig EXPORT_SYMBOL_GPL(iomap_split_ioend); 20963b66913SChristoph Hellwig 21063b66913SChristoph Hellwig static int __init iomap_ioend_init(void) 21163b66913SChristoph Hellwig { 21263b66913SChristoph Hellwig return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), 21363b66913SChristoph Hellwig offsetof(struct iomap_ioend, io_bio), 21463b66913SChristoph Hellwig BIOSET_NEED_BVECS); 21563b66913SChristoph Hellwig } 21663b66913SChristoph Hellwig fs_initcall(iomap_ioend_init); 217