1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-integrity.h> 6 #include <linux/blk-mq-dma.h> 7 #include "blk.h" 8 9 struct phys_vec { 10 phys_addr_t paddr; 11 u32 len; 12 }; 13 14 static bool __blk_map_iter_next(struct blk_map_iter *iter) 15 { 16 if (iter->iter.bi_size) 17 return true; 18 if (!iter->bio || !iter->bio->bi_next) 19 return false; 20 21 iter->bio = iter->bio->bi_next; 22 if (iter->is_integrity) { 23 iter->iter = bio_integrity(iter->bio)->bip_iter; 24 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 25 } else { 26 iter->iter = iter->bio->bi_iter; 27 iter->bvecs = iter->bio->bi_io_vec; 28 } 29 return true; 30 } 31 32 static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 33 struct phys_vec *vec) 34 { 35 unsigned int max_size; 36 struct bio_vec bv; 37 38 if (!iter->iter.bi_size) 39 return false; 40 41 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 42 vec->paddr = bvec_phys(&bv); 43 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 44 bv.bv_len = min(bv.bv_len, max_size); 45 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 46 47 /* 48 * If we are entirely done with this bi_io_vec entry, check if the next 49 * one could be merged into it. This typically happens when moving to 50 * the next bio, but some callers also don't pack bvecs tight. 51 */ 52 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 53 struct bio_vec next; 54 55 if (!__blk_map_iter_next(iter)) 56 break; 57 58 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 59 if (bv.bv_len + next.bv_len > max_size || 60 !biovec_phys_mergeable(req->q, &bv, &next)) 61 break; 62 63 bv.bv_len += next.bv_len; 64 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 65 } 66 67 vec->len = bv.bv_len; 68 return true; 69 } 70 71 /* 72 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 73 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 74 * we need to ensure our segments are aligned to this as well. 75 * 76 * Note that there is no point in using the slightly more complicated IOVA based 77 * path for single segment mappings. 78 */ 79 static inline bool blk_can_dma_map_iova(struct request *req, 80 struct device *dma_dev) 81 { 82 return !((queue_virt_boundary(req->q) + 1) & 83 dma_get_merge_boundary(dma_dev)); 84 } 85 86 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 87 { 88 iter->addr = pci_p2pdma_bus_addr_map(&iter->p2pdma, vec->paddr); 89 iter->len = vec->len; 90 return true; 91 } 92 93 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 94 struct blk_dma_iter *iter, struct phys_vec *vec) 95 { 96 iter->addr = dma_map_page(dma_dev, phys_to_page(vec->paddr), 97 offset_in_page(vec->paddr), vec->len, rq_dma_dir(req)); 98 if (dma_mapping_error(dma_dev, iter->addr)) { 99 iter->status = BLK_STS_RESOURCE; 100 return false; 101 } 102 iter->len = vec->len; 103 return true; 104 } 105 106 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 107 struct dma_iova_state *state, struct blk_dma_iter *iter, 108 struct phys_vec *vec) 109 { 110 enum dma_data_direction dir = rq_dma_dir(req); 111 unsigned int mapped = 0; 112 int error; 113 114 iter->addr = state->addr; 115 iter->len = dma_iova_size(state); 116 117 do { 118 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 119 vec->len, dir, 0); 120 if (error) 121 break; 122 mapped += vec->len; 123 } while (blk_map_iter_next(req, &iter->iter, vec)); 124 125 error = dma_iova_sync(dma_dev, state, 0, mapped); 126 if (error) { 127 iter->status = errno_to_blk_status(error); 128 return false; 129 } 130 131 return true; 132 } 133 134 static inline void blk_rq_map_iter_init(struct request *rq, 135 struct blk_map_iter *iter) 136 { 137 struct bio *bio = rq->bio; 138 139 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 140 *iter = (struct blk_map_iter) { 141 .bvecs = &rq->special_vec, 142 .iter = { 143 .bi_size = rq->special_vec.bv_len, 144 } 145 }; 146 } else if (bio) { 147 *iter = (struct blk_map_iter) { 148 .bio = bio, 149 .bvecs = bio->bi_io_vec, 150 .iter = bio->bi_iter, 151 }; 152 } else { 153 /* the internal flush request may not have bio attached */ 154 *iter = (struct blk_map_iter) {}; 155 } 156 } 157 158 static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 159 struct dma_iova_state *state, struct blk_dma_iter *iter, 160 unsigned int total_len) 161 { 162 struct phys_vec vec; 163 164 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 165 iter->status = BLK_STS_OK; 166 167 /* 168 * Grab the first segment ASAP because we'll need it to check for P2P 169 * transfers. 170 */ 171 if (!blk_map_iter_next(req, &iter->iter, &vec)) 172 return false; 173 174 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 175 phys_to_page(vec.paddr))) { 176 case PCI_P2PDMA_MAP_BUS_ADDR: 177 if (iter->iter.is_integrity) 178 bio_integrity(req->bio)->bip_flags |= BIP_P2P_DMA; 179 else 180 req->cmd_flags |= REQ_P2PDMA; 181 return blk_dma_map_bus(iter, &vec); 182 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 183 /* 184 * P2P transfers through the host bridge are treated the 185 * same as non-P2P transfers below and during unmap. 186 */ 187 case PCI_P2PDMA_MAP_NONE: 188 break; 189 default: 190 iter->status = BLK_STS_INVAL; 191 return false; 192 } 193 194 if (blk_can_dma_map_iova(req, dma_dev) && 195 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 196 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 197 return blk_dma_map_direct(req, dma_dev, iter, &vec); 198 } 199 200 /** 201 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 202 * @req: request to map 203 * @dma_dev: device to map to 204 * @state: DMA IOVA state 205 * @iter: block layer DMA iterator 206 * 207 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 208 * caller and don't need to be initialized. @state needs to be stored for use 209 * at unmap time, @iter is only needed at map time. 210 * 211 * Returns %false if there is no segment to map, including due to an error, or 212 * %true ft it did map a segment. 213 * 214 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 215 * the length in @iter.len. If no segment was mapped the status code is 216 * returned in @iter.status. 217 * 218 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 219 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 220 * to try to map the following segments. 221 */ 222 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 223 struct dma_iova_state *state, struct blk_dma_iter *iter) 224 { 225 blk_rq_map_iter_init(req, &iter->iter); 226 return blk_dma_map_iter_start(req, dma_dev, state, iter, 227 blk_rq_payload_bytes(req)); 228 } 229 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 230 231 /** 232 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 233 * @req: request to map 234 * @dma_dev: device to map to 235 * @state: DMA IOVA state 236 * @iter: block layer DMA iterator 237 * 238 * Iterate to the next mapping after a previous call to 239 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 240 * arguments. 241 * 242 * Returns %false if there is no segment to map, including due to an error, or 243 * %true ft it did map a segment. 244 * 245 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 246 * the length in @iter.len. If no segment was mapped the status code is 247 * returned in @iter.status. 248 */ 249 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 250 struct dma_iova_state *state, struct blk_dma_iter *iter) 251 { 252 struct phys_vec vec; 253 254 if (!blk_map_iter_next(req, &iter->iter, &vec)) 255 return false; 256 257 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 258 return blk_dma_map_bus(iter, &vec); 259 return blk_dma_map_direct(req, dma_dev, iter, &vec); 260 } 261 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 262 263 static inline struct scatterlist * 264 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 265 { 266 if (!*sg) 267 return sglist; 268 269 /* 270 * If the driver previously mapped a shorter list, we could see a 271 * termination bit prematurely unless it fully inits the sg table 272 * on each mapping. We KNOW that there must be more entries here 273 * or the driver would be buggy, so force clear the termination bit 274 * to avoid doing a full sg_init_table() in drivers for each command. 275 */ 276 sg_unmark_end(*sg); 277 return sg_next(*sg); 278 } 279 280 /* 281 * Map a request to scatterlist, return number of sg entries setup. Caller 282 * must make sure sg can hold rq->nr_phys_segments entries. 283 */ 284 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 285 struct scatterlist **last_sg) 286 { 287 struct blk_map_iter iter; 288 struct phys_vec vec; 289 int nsegs = 0; 290 291 blk_rq_map_iter_init(rq, &iter); 292 while (blk_map_iter_next(rq, &iter, &vec)) { 293 *last_sg = blk_next_sg(last_sg, sglist); 294 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 295 offset_in_page(vec.paddr)); 296 nsegs++; 297 } 298 299 if (*last_sg) 300 sg_mark_end(*last_sg); 301 302 /* 303 * Something must have been wrong if the figured number of 304 * segment is bigger than number of req's physical segments 305 */ 306 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 307 308 return nsegs; 309 } 310 EXPORT_SYMBOL(__blk_rq_map_sg); 311 312 #ifdef CONFIG_BLK_DEV_INTEGRITY 313 /** 314 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 315 * for a request 316 * @req: request to map 317 * @dma_dev: device to map to 318 * @state: DMA IOVA state 319 * @iter: block layer DMA iterator 320 * 321 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 322 * provided by the caller and don't need to be initialized. @state needs to be 323 * stored for use at unmap time, @iter is only needed at map time. 324 * 325 * Returns %false if there is no segment to map, including due to an error, or 326 * %true if it did map a segment. 327 * 328 * If a segment was mapped, the DMA address for it is returned in @iter.addr 329 * and the length in @iter.len. If no segment was mapped the status code is 330 * returned in @iter.status. 331 * 332 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 333 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 334 * to try to map the following segments. 335 */ 336 bool blk_rq_integrity_dma_map_iter_start(struct request *req, 337 struct device *dma_dev, struct dma_iova_state *state, 338 struct blk_dma_iter *iter) 339 { 340 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 341 blk_rq_sectors(req)); 342 struct bio *bio = req->bio; 343 344 iter->iter = (struct blk_map_iter) { 345 .bio = bio, 346 .iter = bio_integrity(bio)->bip_iter, 347 .bvecs = bio_integrity(bio)->bip_vec, 348 .is_integrity = true, 349 }; 350 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 351 } 352 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 353 354 /** 355 * blk_rq_integrity_dma_map_iter_start - map the next integrity DMA segment for 356 * a request 357 * @req: request to map 358 * @dma_dev: device to map to 359 * @state: DMA IOVA state 360 * @iter: block layer DMA iterator 361 * 362 * Iterate to the next integrity mapping after a previous call to 363 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 364 * of the arguments. 365 * 366 * Returns %false if there is no segment to map, including due to an error, or 367 * %true if it did map a segment. 368 * 369 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 370 * the length in @iter.len. If no segment was mapped the status code is 371 * returned in @iter.status. 372 */ 373 bool blk_rq_integrity_dma_map_iter_next(struct request *req, 374 struct device *dma_dev, struct blk_dma_iter *iter) 375 { 376 struct phys_vec vec; 377 378 if (!blk_map_iter_next(req, &iter->iter, &vec)) 379 return false; 380 381 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 382 return blk_dma_map_bus(iter, &vec); 383 return blk_dma_map_direct(req, dma_dev, iter, &vec); 384 } 385 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 386 387 /** 388 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 389 * @rq: request to map 390 * @sglist: target scatterlist 391 * 392 * Description: Map the integrity vectors in request into a 393 * scatterlist. The scatterlist must be big enough to hold all 394 * elements. I.e. sized using blk_rq_count_integrity_sg() or 395 * rq->nr_integrity_segments. 396 */ 397 int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 398 { 399 struct request_queue *q = rq->q; 400 struct scatterlist *sg = NULL; 401 struct bio *bio = rq->bio; 402 unsigned int segments = 0; 403 struct phys_vec vec; 404 405 struct blk_map_iter iter = { 406 .bio = bio, 407 .iter = bio_integrity(bio)->bip_iter, 408 .bvecs = bio_integrity(bio)->bip_vec, 409 .is_integrity = true, 410 }; 411 412 while (blk_map_iter_next(rq, &iter, &vec)) { 413 sg = blk_next_sg(&sg, sglist); 414 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 415 offset_in_page(vec.paddr)); 416 segments++; 417 } 418 419 if (sg) 420 sg_mark_end(sg); 421 422 /* 423 * Something must have been wrong if the figured number of segment 424 * is bigger than number of req's physical integrity segments 425 */ 426 BUG_ON(segments > rq->nr_integrity_segments); 427 BUG_ON(segments > queue_max_integrity_segments(q)); 428 return segments; 429 } 430 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 431 #endif 432