1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-mq-dma.h> 6 #include "blk.h" 7 8 struct phys_vec { 9 phys_addr_t paddr; 10 u32 len; 11 }; 12 13 static bool blk_map_iter_next(struct request *req, struct req_iterator *iter, 14 struct phys_vec *vec) 15 { 16 unsigned int max_size; 17 struct bio_vec bv; 18 19 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 20 if (!iter->bio) 21 return false; 22 vec->paddr = bvec_phys(&req->special_vec); 23 vec->len = req->special_vec.bv_len; 24 iter->bio = NULL; 25 return true; 26 } 27 28 if (!iter->iter.bi_size) 29 return false; 30 31 bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); 32 vec->paddr = bvec_phys(&bv); 33 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 34 bv.bv_len = min(bv.bv_len, max_size); 35 bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); 36 37 /* 38 * If we are entirely done with this bi_io_vec entry, check if the next 39 * one could be merged into it. This typically happens when moving to 40 * the next bio, but some callers also don't pack bvecs tight. 41 */ 42 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 43 struct bio_vec next; 44 45 if (!iter->iter.bi_size) { 46 if (!iter->bio->bi_next) 47 break; 48 iter->bio = iter->bio->bi_next; 49 iter->iter = iter->bio->bi_iter; 50 } 51 52 next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); 53 if (bv.bv_len + next.bv_len > max_size || 54 !biovec_phys_mergeable(req->q, &bv, &next)) 55 break; 56 57 bv.bv_len += next.bv_len; 58 bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); 59 } 60 61 vec->len = bv.bv_len; 62 return true; 63 } 64 65 /* 66 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 67 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 68 * we need to ensure our segments are aligned to this as well. 69 * 70 * Note that there is no point in using the slightly more complicated IOVA based 71 * path for single segment mappings. 72 */ 73 static inline bool blk_can_dma_map_iova(struct request *req, 74 struct device *dma_dev) 75 { 76 return !((queue_virt_boundary(req->q) + 1) & 77 dma_get_merge_boundary(dma_dev)); 78 } 79 80 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 81 { 82 iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr); 83 iter->len = vec->len; 84 return true; 85 } 86 87 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 88 struct blk_dma_iter *iter, struct phys_vec *vec) 89 { 90 iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr), 91 offset_in_page(vec->paddr), vec->len, rq_dma_dir(req)); 92 if (dma_mapping_error(dma_dev, iter->addr)) { 93 iter->status = BLK_STS_RESOURCE; 94 return false; 95 } 96 iter->len = vec->len; 97 return true; 98 } 99 100 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 101 struct dma_iova_state *state, struct blk_dma_iter *iter, 102 struct phys_vec *vec) 103 { 104 enum dma_data_direction dir = rq_dma_dir(req); 105 unsigned int mapped = 0; 106 int error; 107 108 iter->addr = state->addr; 109 iter->len = dma_iova_size(state); 110 111 do { 112 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 113 vec->len, dir, 0); 114 if (error) 115 break; 116 mapped += vec->len; 117 } while (blk_map_iter_next(req, &iter->iter, vec)); 118 119 error = dma_iova_sync(dma_dev, state, 0, mapped); 120 if (error) { 121 iter->status = errno_to_blk_status(error); 122 return false; 123 } 124 125 return true; 126 } 127 128 /** 129 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 130 * @req: request to map 131 * @dma_dev: device to map to 132 * @state: DMA IOVA state 133 * @iter: block layer DMA iterator 134 * 135 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 136 * caller and don't need to be initialized. @state needs to be stored for use 137 * at unmap time, @iter is only needed at map time. 138 * 139 * Returns %false if there is no segment to map, including due to an error, or 140 * %true ft it did map a segment. 141 * 142 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 143 * the length in @iter.len. If no segment was mapped the status code is 144 * returned in @iter.status. 145 * 146 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 147 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 148 * to try to map the following segments. 149 */ 150 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 151 struct dma_iova_state *state, struct blk_dma_iter *iter) 152 { 153 unsigned int total_len = blk_rq_payload_bytes(req); 154 struct phys_vec vec; 155 156 iter->iter.bio = req->bio; 157 iter->iter.iter = req->bio->bi_iter; 158 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 159 iter->status = BLK_STS_OK; 160 161 /* 162 * Grab the first segment ASAP because we'll need it to check for P2P 163 * transfers. 164 */ 165 if (!blk_map_iter_next(req, &iter->iter, &vec)) 166 return false; 167 168 if (IS_ENABLED(CONFIG_PCI_P2PDMA) && (req->cmd_flags & REQ_P2PDMA)) { 169 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 170 phys_to_page(vec.paddr))) { 171 case PCI_P2PDMA_MAP_BUS_ADDR: 172 return blk_dma_map_bus(iter, &vec); 173 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 174 /* 175 * P2P transfers through the host bridge are treated the 176 * same as non-P2P transfers below and during unmap. 177 */ 178 req->cmd_flags &= ~REQ_P2PDMA; 179 break; 180 default: 181 iter->status = BLK_STS_INVAL; 182 return false; 183 } 184 } 185 186 if (blk_can_dma_map_iova(req, dma_dev) && 187 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 188 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 189 return blk_dma_map_direct(req, dma_dev, iter, &vec); 190 } 191 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 192 193 /** 194 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 195 * @req: request to map 196 * @dma_dev: device to map to 197 * @state: DMA IOVA state 198 * @iter: block layer DMA iterator 199 * 200 * Iterate to the next mapping after a previous call to 201 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 202 * arguments. 203 * 204 * Returns %false if there is no segment to map, including due to an error, or 205 * %true ft it did map a segment. 206 * 207 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 208 * the length in @iter.len. If no segment was mapped the status code is 209 * returned in @iter.status. 210 */ 211 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 212 struct dma_iova_state *state, struct blk_dma_iter *iter) 213 { 214 struct phys_vec vec; 215 216 if (!blk_map_iter_next(req, &iter->iter, &vec)) 217 return false; 218 219 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 220 return blk_dma_map_bus(iter, &vec); 221 return blk_dma_map_direct(req, dma_dev, iter, &vec); 222 } 223 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 224 225 static inline struct scatterlist * 226 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 227 { 228 if (!*sg) 229 return sglist; 230 231 /* 232 * If the driver previously mapped a shorter list, we could see a 233 * termination bit prematurely unless it fully inits the sg table 234 * on each mapping. We KNOW that there must be more entries here 235 * or the driver would be buggy, so force clear the termination bit 236 * to avoid doing a full sg_init_table() in drivers for each command. 237 */ 238 sg_unmark_end(*sg); 239 return sg_next(*sg); 240 } 241 242 /* 243 * Map a request to scatterlist, return number of sg entries setup. Caller 244 * must make sure sg can hold rq->nr_phys_segments entries. 245 */ 246 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 247 struct scatterlist **last_sg) 248 { 249 struct req_iterator iter = { 250 .bio = rq->bio, 251 }; 252 struct phys_vec vec; 253 int nsegs = 0; 254 255 /* the internal flush request may not have bio attached */ 256 if (iter.bio) 257 iter.iter = iter.bio->bi_iter; 258 259 while (blk_map_iter_next(rq, &iter, &vec)) { 260 *last_sg = blk_next_sg(last_sg, sglist); 261 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 262 offset_in_page(vec.paddr)); 263 nsegs++; 264 } 265 266 if (*last_sg) 267 sg_mark_end(*last_sg); 268 269 /* 270 * Something must have been wrong if the figured number of 271 * segment is bigger than number of req's physical segments 272 */ 273 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 274 275 return nsegs; 276 } 277 EXPORT_SYMBOL(__blk_rq_map_sg); 278