1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-integrity.h> 6 #include <linux/blk-mq-dma.h> 7 #include "blk.h" 8 9 static bool __blk_map_iter_next(struct blk_map_iter *iter) 10 { 11 if (iter->iter.bi_size) 12 return true; 13 if (!iter->bio || !iter->bio->bi_next) 14 return false; 15 16 iter->bio = iter->bio->bi_next; 17 if (iter->is_integrity) { 18 iter->iter = bio_integrity(iter->bio)->bip_iter; 19 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 20 } else { 21 iter->iter = iter->bio->bi_iter; 22 iter->bvecs = iter->bio->bi_io_vec; 23 } 24 return true; 25 } 26 27 static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 28 struct phys_vec *vec) 29 { 30 unsigned int max_size; 31 struct bio_vec bv; 32 33 if (!iter->iter.bi_size) 34 return false; 35 36 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 37 vec->paddr = bvec_phys(&bv); 38 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 39 bv.bv_len = min(bv.bv_len, max_size); 40 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 41 42 /* 43 * If we are entirely done with this bi_io_vec entry, check if the next 44 * one could be merged into it. This typically happens when moving to 45 * the next bio, but some callers also don't pack bvecs tight. 46 */ 47 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 48 struct bio_vec next; 49 50 if (!__blk_map_iter_next(iter)) 51 break; 52 53 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 54 if (bv.bv_len + next.bv_len > max_size || 55 !biovec_phys_mergeable(req->q, &bv, &next)) 56 break; 57 58 bv.bv_len += next.bv_len; 59 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 60 } 61 62 vec->len = bv.bv_len; 63 return true; 64 } 65 66 /* 67 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 68 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 69 * we need to ensure our segments are aligned to this as well. 70 * 71 * Note that there is no point in using the slightly more complicated IOVA based 72 * path for single segment mappings. 73 */ 74 static inline bool blk_can_dma_map_iova(struct request *req, 75 struct device *dma_dev) 76 { 77 return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev)); 78 } 79 80 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 81 { 82 iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr); 83 iter->len = vec->len; 84 return true; 85 } 86 87 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 88 struct blk_dma_iter *iter, struct phys_vec *vec) 89 { 90 unsigned int attrs = 0; 91 92 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 93 attrs |= DMA_ATTR_MMIO; 94 95 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 96 rq_dma_dir(req), attrs); 97 if (dma_mapping_error(dma_dev, iter->addr)) { 98 iter->status = BLK_STS_RESOURCE; 99 return false; 100 } 101 iter->len = vec->len; 102 return true; 103 } 104 105 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 106 struct dma_iova_state *state, struct blk_dma_iter *iter, 107 struct phys_vec *vec) 108 { 109 enum dma_data_direction dir = rq_dma_dir(req); 110 unsigned int attrs = 0; 111 size_t mapped = 0; 112 int error; 113 114 iter->addr = state->addr; 115 iter->len = dma_iova_size(state); 116 117 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 118 attrs |= DMA_ATTR_MMIO; 119 120 do { 121 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 122 vec->len, dir, attrs); 123 if (error) 124 goto out_unlink; 125 mapped += vec->len; 126 } while (blk_map_iter_next(req, &iter->iter, vec)); 127 128 error = dma_iova_sync(dma_dev, state, 0, mapped); 129 if (error) 130 goto out_unlink; 131 132 return true; 133 134 out_unlink: 135 dma_iova_destroy(dma_dev, state, mapped, dir, attrs); 136 iter->status = errno_to_blk_status(error); 137 return false; 138 } 139 140 static inline void blk_rq_map_iter_init(struct request *rq, 141 struct blk_map_iter *iter) 142 { 143 struct bio *bio = rq->bio; 144 145 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 146 *iter = (struct blk_map_iter) { 147 .bvecs = &rq->special_vec, 148 .iter = { 149 .bi_size = rq->special_vec.bv_len, 150 } 151 }; 152 } else if (bio) { 153 *iter = (struct blk_map_iter) { 154 .bio = bio, 155 .bvecs = bio->bi_io_vec, 156 .iter = bio->bi_iter, 157 }; 158 } else { 159 /* the internal flush request may not have bio attached */ 160 *iter = (struct blk_map_iter) {}; 161 } 162 } 163 164 static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 165 struct dma_iova_state *state, struct blk_dma_iter *iter, 166 unsigned int total_len) 167 { 168 struct phys_vec vec; 169 170 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 171 iter->status = BLK_STS_OK; 172 iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 173 174 /* 175 * Grab the first segment ASAP because we'll need it to check for P2P 176 * transfers. 177 */ 178 if (!blk_map_iter_next(req, &iter->iter, &vec)) 179 return false; 180 181 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 182 phys_to_page(vec.paddr))) { 183 case PCI_P2PDMA_MAP_BUS_ADDR: 184 return blk_dma_map_bus(iter, &vec); 185 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 186 /* 187 * P2P transfers through the host bridge are treated the 188 * same as non-P2P transfers below and during unmap. 189 */ 190 case PCI_P2PDMA_MAP_NONE: 191 break; 192 default: 193 iter->status = BLK_STS_INVAL; 194 return false; 195 } 196 197 if (blk_can_dma_map_iova(req, dma_dev) && 198 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 199 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 200 memset(state, 0, sizeof(*state)); 201 return blk_dma_map_direct(req, dma_dev, iter, &vec); 202 } 203 204 /** 205 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 206 * @req: request to map 207 * @dma_dev: device to map to 208 * @state: DMA IOVA state 209 * @iter: block layer DMA iterator 210 * 211 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 212 * caller and don't need to be initialized. @state needs to be stored for use 213 * at unmap time, @iter is only needed at map time. 214 * 215 * Returns %false if there is no segment to map, including due to an error, or 216 * %true ft it did map a segment. 217 * 218 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 219 * the length in @iter.len. If no segment was mapped the status code is 220 * returned in @iter.status. 221 * 222 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 223 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 224 * to try to map the following segments. 225 */ 226 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 227 struct dma_iova_state *state, struct blk_dma_iter *iter) 228 { 229 blk_rq_map_iter_init(req, &iter->iter); 230 return blk_dma_map_iter_start(req, dma_dev, state, iter, 231 blk_rq_payload_bytes(req)); 232 } 233 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 234 235 /** 236 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 237 * @req: request to map 238 * @dma_dev: device to map to 239 * @iter: block layer DMA iterator 240 * 241 * Iterate to the next mapping after a previous call to 242 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 243 * arguments. 244 * 245 * Returns %false if there is no segment to map, including due to an error, or 246 * %true ft it did map a segment. 247 * 248 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 249 * the length in @iter.len. If no segment was mapped the status code is 250 * returned in @iter.status. 251 */ 252 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 253 struct blk_dma_iter *iter) 254 { 255 struct phys_vec vec; 256 257 if (!blk_map_iter_next(req, &iter->iter, &vec)) 258 return false; 259 260 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 261 return blk_dma_map_bus(iter, &vec); 262 return blk_dma_map_direct(req, dma_dev, iter, &vec); 263 } 264 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 265 266 static inline struct scatterlist * 267 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 268 { 269 if (!*sg) 270 return sglist; 271 272 /* 273 * If the driver previously mapped a shorter list, we could see a 274 * termination bit prematurely unless it fully inits the sg table 275 * on each mapping. We KNOW that there must be more entries here 276 * or the driver would be buggy, so force clear the termination bit 277 * to avoid doing a full sg_init_table() in drivers for each command. 278 */ 279 sg_unmark_end(*sg); 280 return sg_next(*sg); 281 } 282 283 /* 284 * Map a request to scatterlist, return number of sg entries setup. Caller 285 * must make sure sg can hold rq->nr_phys_segments entries. 286 */ 287 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 288 struct scatterlist **last_sg) 289 { 290 struct blk_map_iter iter; 291 struct phys_vec vec; 292 int nsegs = 0; 293 294 blk_rq_map_iter_init(rq, &iter); 295 while (blk_map_iter_next(rq, &iter, &vec)) { 296 *last_sg = blk_next_sg(last_sg, sglist); 297 298 WARN_ON_ONCE(overflows_type(vec.len, unsigned int)); 299 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 300 offset_in_page(vec.paddr)); 301 nsegs++; 302 } 303 304 if (*last_sg) 305 sg_mark_end(*last_sg); 306 307 /* 308 * Something must have been wrong if the figured number of 309 * segment is bigger than number of req's physical segments 310 */ 311 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 312 313 return nsegs; 314 } 315 EXPORT_SYMBOL(__blk_rq_map_sg); 316 317 #ifdef CONFIG_BLK_DEV_INTEGRITY 318 /** 319 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 320 * for a request 321 * @req: request to map 322 * @dma_dev: device to map to 323 * @state: DMA IOVA state 324 * @iter: block layer DMA iterator 325 * 326 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 327 * provided by the caller and don't need to be initialized. @state needs to be 328 * stored for use at unmap time, @iter is only needed at map time. 329 * 330 * Returns %false if there is no segment to map, including due to an error, or 331 * %true if it did map a segment. 332 * 333 * If a segment was mapped, the DMA address for it is returned in @iter.addr 334 * and the length in @iter.len. If no segment was mapped the status code is 335 * returned in @iter.status. 336 * 337 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 338 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 339 * to try to map the following segments. 340 */ 341 bool blk_rq_integrity_dma_map_iter_start(struct request *req, 342 struct device *dma_dev, struct dma_iova_state *state, 343 struct blk_dma_iter *iter) 344 { 345 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 346 blk_rq_sectors(req)); 347 struct bio *bio = req->bio; 348 349 iter->iter = (struct blk_map_iter) { 350 .bio = bio, 351 .iter = bio_integrity(bio)->bip_iter, 352 .bvecs = bio_integrity(bio)->bip_vec, 353 .is_integrity = true, 354 }; 355 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 356 } 357 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 358 359 /** 360 * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for 361 * a request 362 * @req: request to map 363 * @dma_dev: device to map to 364 * @state: DMA IOVA state 365 * @iter: block layer DMA iterator 366 * 367 * Iterate to the next integrity mapping after a previous call to 368 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 369 * of the arguments. 370 * 371 * Returns %false if there is no segment to map, including due to an error, or 372 * %true if it did map a segment. 373 * 374 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 375 * the length in @iter.len. If no segment was mapped the status code is 376 * returned in @iter.status. 377 */ 378 bool blk_rq_integrity_dma_map_iter_next(struct request *req, 379 struct device *dma_dev, struct blk_dma_iter *iter) 380 { 381 struct phys_vec vec; 382 383 if (!blk_map_iter_next(req, &iter->iter, &vec)) 384 return false; 385 386 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 387 return blk_dma_map_bus(iter, &vec); 388 return blk_dma_map_direct(req, dma_dev, iter, &vec); 389 } 390 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 391 392 /** 393 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 394 * @rq: request to map 395 * @sglist: target scatterlist 396 * 397 * Description: Map the integrity vectors in request into a 398 * scatterlist. The scatterlist must be big enough to hold all 399 * elements. I.e. sized using blk_rq_count_integrity_sg() or 400 * rq->nr_integrity_segments. 401 */ 402 int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 403 { 404 struct request_queue *q = rq->q; 405 struct scatterlist *sg = NULL; 406 struct bio *bio = rq->bio; 407 unsigned int segments = 0; 408 struct phys_vec vec; 409 410 struct blk_map_iter iter = { 411 .bio = bio, 412 .iter = bio_integrity(bio)->bip_iter, 413 .bvecs = bio_integrity(bio)->bip_vec, 414 .is_integrity = true, 415 }; 416 417 while (blk_map_iter_next(rq, &iter, &vec)) { 418 sg = blk_next_sg(&sg, sglist); 419 420 WARN_ON_ONCE(overflows_type(vec.len, unsigned int)); 421 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 422 offset_in_page(vec.paddr)); 423 segments++; 424 } 425 426 if (sg) 427 sg_mark_end(sg); 428 429 /* 430 * Something must have been wrong if the figured number of segment 431 * is bigger than number of req's physical integrity segments 432 */ 433 BUG_ON(segments > rq->nr_integrity_segments); 434 BUG_ON(segments > queue_max_integrity_segments(q)); 435 return segments; 436 } 437 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 438 #endif 439