1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-integrity.h> 6 #include <linux/blk-mq-dma.h> 7 #include "blk.h" 8 9 struct phys_vec { 10 phys_addr_t paddr; 11 u32 len; 12 }; 13 14 static bool __blk_map_iter_next(struct blk_map_iter *iter) 15 { 16 if (iter->iter.bi_size) 17 return true; 18 if (!iter->bio || !iter->bio->bi_next) 19 return false; 20 21 iter->bio = iter->bio->bi_next; 22 if (iter->is_integrity) { 23 iter->iter = bio_integrity(iter->bio)->bip_iter; 24 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 25 } else { 26 iter->iter = iter->bio->bi_iter; 27 iter->bvecs = iter->bio->bi_io_vec; 28 } 29 return true; 30 } 31 32 static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 33 struct phys_vec *vec) 34 { 35 unsigned int max_size; 36 struct bio_vec bv; 37 38 if (!iter->iter.bi_size) 39 return false; 40 41 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 42 vec->paddr = bvec_phys(&bv); 43 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 44 bv.bv_len = min(bv.bv_len, max_size); 45 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 46 47 /* 48 * If we are entirely done with this bi_io_vec entry, check if the next 49 * one could be merged into it. This typically happens when moving to 50 * the next bio, but some callers also don't pack bvecs tight. 51 */ 52 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 53 struct bio_vec next; 54 55 if (!__blk_map_iter_next(iter)) 56 break; 57 58 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 59 if (bv.bv_len + next.bv_len > max_size || 60 !biovec_phys_mergeable(req->q, &bv, &next)) 61 break; 62 63 bv.bv_len += next.bv_len; 64 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 65 } 66 67 vec->len = bv.bv_len; 68 return true; 69 } 70 71 /* 72 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 73 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 74 * we need to ensure our segments are aligned to this as well. 75 * 76 * Note that there is no point in using the slightly more complicated IOVA based 77 * path for single segment mappings. 78 */ 79 static inline bool blk_can_dma_map_iova(struct request *req, 80 struct device *dma_dev) 81 { 82 return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev)); 83 } 84 85 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 86 { 87 iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr); 88 iter->len = vec->len; 89 return true; 90 } 91 92 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 93 struct blk_dma_iter *iter, struct phys_vec *vec) 94 { 95 unsigned int attrs = 0; 96 97 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 98 attrs |= DMA_ATTR_MMIO; 99 100 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 101 rq_dma_dir(req), attrs); 102 if (dma_mapping_error(dma_dev, iter->addr)) { 103 iter->status = BLK_STS_RESOURCE; 104 return false; 105 } 106 iter->len = vec->len; 107 return true; 108 } 109 110 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 111 struct dma_iova_state *state, struct blk_dma_iter *iter, 112 struct phys_vec *vec) 113 { 114 enum dma_data_direction dir = rq_dma_dir(req); 115 unsigned int mapped = 0; 116 unsigned int attrs = 0; 117 int error; 118 119 iter->addr = state->addr; 120 iter->len = dma_iova_size(state); 121 122 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 123 attrs |= DMA_ATTR_MMIO; 124 125 do { 126 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 127 vec->len, dir, attrs); 128 if (error) 129 break; 130 mapped += vec->len; 131 } while (blk_map_iter_next(req, &iter->iter, vec)); 132 133 error = dma_iova_sync(dma_dev, state, 0, mapped); 134 if (error) { 135 iter->status = errno_to_blk_status(error); 136 return false; 137 } 138 139 return true; 140 } 141 142 static inline void blk_rq_map_iter_init(struct request *rq, 143 struct blk_map_iter *iter) 144 { 145 struct bio *bio = rq->bio; 146 147 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 148 *iter = (struct blk_map_iter) { 149 .bvecs = &rq->special_vec, 150 .iter = { 151 .bi_size = rq->special_vec.bv_len, 152 } 153 }; 154 } else if (bio) { 155 *iter = (struct blk_map_iter) { 156 .bio = bio, 157 .bvecs = bio->bi_io_vec, 158 .iter = bio->bi_iter, 159 }; 160 } else { 161 /* the internal flush request may not have bio attached */ 162 *iter = (struct blk_map_iter) {}; 163 } 164 } 165 166 static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 167 struct dma_iova_state *state, struct blk_dma_iter *iter, 168 unsigned int total_len) 169 { 170 struct phys_vec vec; 171 172 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 173 iter->status = BLK_STS_OK; 174 iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 175 176 /* 177 * Grab the first segment ASAP because we'll need it to check for P2P 178 * transfers. 179 */ 180 if (!blk_map_iter_next(req, &iter->iter, &vec)) 181 return false; 182 183 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 184 phys_to_page(vec.paddr))) { 185 case PCI_P2PDMA_MAP_BUS_ADDR: 186 return blk_dma_map_bus(iter, &vec); 187 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 188 /* 189 * P2P transfers through the host bridge are treated the 190 * same as non-P2P transfers below and during unmap. 191 */ 192 case PCI_P2PDMA_MAP_NONE: 193 break; 194 default: 195 iter->status = BLK_STS_INVAL; 196 return false; 197 } 198 199 if (blk_can_dma_map_iova(req, dma_dev) && 200 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 201 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 202 return blk_dma_map_direct(req, dma_dev, iter, &vec); 203 } 204 205 /** 206 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 207 * @req: request to map 208 * @dma_dev: device to map to 209 * @state: DMA IOVA state 210 * @iter: block layer DMA iterator 211 * 212 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 213 * caller and don't need to be initialized. @state needs to be stored for use 214 * at unmap time, @iter is only needed at map time. 215 * 216 * Returns %false if there is no segment to map, including due to an error, or 217 * %true ft it did map a segment. 218 * 219 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 220 * the length in @iter.len. If no segment was mapped the status code is 221 * returned in @iter.status. 222 * 223 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 224 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 225 * to try to map the following segments. 226 */ 227 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 228 struct dma_iova_state *state, struct blk_dma_iter *iter) 229 { 230 blk_rq_map_iter_init(req, &iter->iter); 231 return blk_dma_map_iter_start(req, dma_dev, state, iter, 232 blk_rq_payload_bytes(req)); 233 } 234 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 235 236 /** 237 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 238 * @req: request to map 239 * @dma_dev: device to map to 240 * @state: DMA IOVA state 241 * @iter: block layer DMA iterator 242 * 243 * Iterate to the next mapping after a previous call to 244 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 245 * arguments. 246 * 247 * Returns %false if there is no segment to map, including due to an error, or 248 * %true ft it did map a segment. 249 * 250 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 251 * the length in @iter.len. If no segment was mapped the status code is 252 * returned in @iter.status. 253 */ 254 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 255 struct dma_iova_state *state, struct blk_dma_iter *iter) 256 { 257 struct phys_vec vec; 258 259 if (!blk_map_iter_next(req, &iter->iter, &vec)) 260 return false; 261 262 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 263 return blk_dma_map_bus(iter, &vec); 264 return blk_dma_map_direct(req, dma_dev, iter, &vec); 265 } 266 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 267 268 static inline struct scatterlist * 269 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 270 { 271 if (!*sg) 272 return sglist; 273 274 /* 275 * If the driver previously mapped a shorter list, we could see a 276 * termination bit prematurely unless it fully inits the sg table 277 * on each mapping. We KNOW that there must be more entries here 278 * or the driver would be buggy, so force clear the termination bit 279 * to avoid doing a full sg_init_table() in drivers for each command. 280 */ 281 sg_unmark_end(*sg); 282 return sg_next(*sg); 283 } 284 285 /* 286 * Map a request to scatterlist, return number of sg entries setup. Caller 287 * must make sure sg can hold rq->nr_phys_segments entries. 288 */ 289 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 290 struct scatterlist **last_sg) 291 { 292 struct blk_map_iter iter; 293 struct phys_vec vec; 294 int nsegs = 0; 295 296 blk_rq_map_iter_init(rq, &iter); 297 while (blk_map_iter_next(rq, &iter, &vec)) { 298 *last_sg = blk_next_sg(last_sg, sglist); 299 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 300 offset_in_page(vec.paddr)); 301 nsegs++; 302 } 303 304 if (*last_sg) 305 sg_mark_end(*last_sg); 306 307 /* 308 * Something must have been wrong if the figured number of 309 * segment is bigger than number of req's physical segments 310 */ 311 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 312 313 return nsegs; 314 } 315 EXPORT_SYMBOL(__blk_rq_map_sg); 316 317 #ifdef CONFIG_BLK_DEV_INTEGRITY 318 /** 319 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 320 * for a request 321 * @req: request to map 322 * @dma_dev: device to map to 323 * @state: DMA IOVA state 324 * @iter: block layer DMA iterator 325 * 326 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 327 * provided by the caller and don't need to be initialized. @state needs to be 328 * stored for use at unmap time, @iter is only needed at map time. 329 * 330 * Returns %false if there is no segment to map, including due to an error, or 331 * %true if it did map a segment. 332 * 333 * If a segment was mapped, the DMA address for it is returned in @iter.addr 334 * and the length in @iter.len. If no segment was mapped the status code is 335 * returned in @iter.status. 336 * 337 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 338 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 339 * to try to map the following segments. 340 */ 341 bool blk_rq_integrity_dma_map_iter_start(struct request *req, 342 struct device *dma_dev, struct dma_iova_state *state, 343 struct blk_dma_iter *iter) 344 { 345 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 346 blk_rq_sectors(req)); 347 struct bio *bio = req->bio; 348 349 iter->iter = (struct blk_map_iter) { 350 .bio = bio, 351 .iter = bio_integrity(bio)->bip_iter, 352 .bvecs = bio_integrity(bio)->bip_vec, 353 .is_integrity = true, 354 }; 355 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 356 } 357 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 358 359 /** 360 * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for 361 * a request 362 * @req: request to map 363 * @dma_dev: device to map to 364 * @state: DMA IOVA state 365 * @iter: block layer DMA iterator 366 * 367 * Iterate to the next integrity mapping after a previous call to 368 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 369 * of the arguments. 370 * 371 * Returns %false if there is no segment to map, including due to an error, or 372 * %true if it did map a segment. 373 * 374 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 375 * the length in @iter.len. If no segment was mapped the status code is 376 * returned in @iter.status. 377 */ 378 bool blk_rq_integrity_dma_map_iter_next(struct request *req, 379 struct device *dma_dev, struct blk_dma_iter *iter) 380 { 381 struct phys_vec vec; 382 383 if (!blk_map_iter_next(req, &iter->iter, &vec)) 384 return false; 385 386 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 387 return blk_dma_map_bus(iter, &vec); 388 return blk_dma_map_direct(req, dma_dev, iter, &vec); 389 } 390 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 391 392 /** 393 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 394 * @rq: request to map 395 * @sglist: target scatterlist 396 * 397 * Description: Map the integrity vectors in request into a 398 * scatterlist. The scatterlist must be big enough to hold all 399 * elements. I.e. sized using blk_rq_count_integrity_sg() or 400 * rq->nr_integrity_segments. 401 */ 402 int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 403 { 404 struct request_queue *q = rq->q; 405 struct scatterlist *sg = NULL; 406 struct bio *bio = rq->bio; 407 unsigned int segments = 0; 408 struct phys_vec vec; 409 410 struct blk_map_iter iter = { 411 .bio = bio, 412 .iter = bio_integrity(bio)->bip_iter, 413 .bvecs = bio_integrity(bio)->bip_vec, 414 .is_integrity = true, 415 }; 416 417 while (blk_map_iter_next(rq, &iter, &vec)) { 418 sg = blk_next_sg(&sg, sglist); 419 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 420 offset_in_page(vec.paddr)); 421 segments++; 422 } 423 424 if (sg) 425 sg_mark_end(sg); 426 427 /* 428 * Something must have been wrong if the figured number of segment 429 * is bigger than number of req's physical integrity segments 430 */ 431 BUG_ON(segments > rq->nr_integrity_segments); 432 BUG_ON(segments > queue_max_integrity_segments(q)); 433 return segments; 434 } 435 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 436 #endif 437