1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-integrity.h> 6 #include <linux/blk-mq-dma.h> 7 #include "blk.h" 8 9 struct phys_vec { 10 phys_addr_t paddr; 11 u32 len; 12 }; 13 14 static bool __blk_map_iter_next(struct blk_map_iter *iter) 15 { 16 if (iter->iter.bi_size) 17 return true; 18 if (!iter->bio || !iter->bio->bi_next) 19 return false; 20 21 iter->bio = iter->bio->bi_next; 22 if (iter->is_integrity) { 23 iter->iter = bio_integrity(iter->bio)->bip_iter; 24 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 25 } else { 26 iter->iter = iter->bio->bi_iter; 27 iter->bvecs = iter->bio->bi_io_vec; 28 } 29 return true; 30 } 31 32 static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 33 struct phys_vec *vec) 34 { 35 unsigned int max_size; 36 struct bio_vec bv; 37 38 if (!iter->iter.bi_size) 39 return false; 40 41 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 42 vec->paddr = bvec_phys(&bv); 43 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 44 bv.bv_len = min(bv.bv_len, max_size); 45 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 46 47 /* 48 * If we are entirely done with this bi_io_vec entry, check if the next 49 * one could be merged into it. This typically happens when moving to 50 * the next bio, but some callers also don't pack bvecs tight. 51 */ 52 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 53 struct bio_vec next; 54 55 if (!__blk_map_iter_next(iter)) 56 break; 57 58 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 59 if (bv.bv_len + next.bv_len > max_size || 60 !biovec_phys_mergeable(req->q, &bv, &next)) 61 break; 62 63 bv.bv_len += next.bv_len; 64 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 65 } 66 67 vec->len = bv.bv_len; 68 return true; 69 } 70 71 /* 72 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 73 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 74 * we need to ensure our segments are aligned to this as well. 75 * 76 * Note that there is no point in using the slightly more complicated IOVA based 77 * path for single segment mappings. 78 */ 79 static inline bool blk_can_dma_map_iova(struct request *req, 80 struct device *dma_dev) 81 { 82 return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev)); 83 } 84 85 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 86 { 87 iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr); 88 iter->len = vec->len; 89 return true; 90 } 91 92 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 93 struct blk_dma_iter *iter, struct phys_vec *vec) 94 { 95 unsigned int attrs = 0; 96 97 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 98 attrs |= DMA_ATTR_MMIO; 99 100 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 101 rq_dma_dir(req), attrs); 102 if (dma_mapping_error(dma_dev, iter->addr)) { 103 iter->status = BLK_STS_RESOURCE; 104 return false; 105 } 106 iter->len = vec->len; 107 return true; 108 } 109 110 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 111 struct dma_iova_state *state, struct blk_dma_iter *iter, 112 struct phys_vec *vec) 113 { 114 enum dma_data_direction dir = rq_dma_dir(req); 115 unsigned int mapped = 0; 116 unsigned int attrs = 0; 117 int error; 118 119 iter->addr = state->addr; 120 iter->len = dma_iova_size(state); 121 122 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 123 attrs |= DMA_ATTR_MMIO; 124 125 do { 126 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 127 vec->len, dir, attrs); 128 if (error) 129 break; 130 mapped += vec->len; 131 } while (blk_map_iter_next(req, &iter->iter, vec)); 132 133 error = dma_iova_sync(dma_dev, state, 0, mapped); 134 if (error) { 135 iter->status = errno_to_blk_status(error); 136 return false; 137 } 138 139 return true; 140 } 141 142 static inline void blk_rq_map_iter_init(struct request *rq, 143 struct blk_map_iter *iter) 144 { 145 struct bio *bio = rq->bio; 146 147 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 148 *iter = (struct blk_map_iter) { 149 .bvecs = &rq->special_vec, 150 .iter = { 151 .bi_size = rq->special_vec.bv_len, 152 } 153 }; 154 } else if (bio) { 155 *iter = (struct blk_map_iter) { 156 .bio = bio, 157 .bvecs = bio->bi_io_vec, 158 .iter = bio->bi_iter, 159 }; 160 } else { 161 /* the internal flush request may not have bio attached */ 162 *iter = (struct blk_map_iter) {}; 163 } 164 } 165 166 static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 167 struct dma_iova_state *state, struct blk_dma_iter *iter, 168 unsigned int total_len) 169 { 170 struct phys_vec vec; 171 172 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 173 iter->status = BLK_STS_OK; 174 iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 175 176 /* 177 * Grab the first segment ASAP because we'll need it to check for P2P 178 * transfers. 179 */ 180 if (!blk_map_iter_next(req, &iter->iter, &vec)) 181 return false; 182 183 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 184 phys_to_page(vec.paddr))) { 185 case PCI_P2PDMA_MAP_BUS_ADDR: 186 return blk_dma_map_bus(iter, &vec); 187 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 188 /* 189 * P2P transfers through the host bridge are treated the 190 * same as non-P2P transfers below and during unmap. 191 */ 192 case PCI_P2PDMA_MAP_NONE: 193 break; 194 default: 195 iter->status = BLK_STS_INVAL; 196 return false; 197 } 198 199 if (blk_can_dma_map_iova(req, dma_dev) && 200 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 201 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 202 memset(state, 0, sizeof(*state)); 203 return blk_dma_map_direct(req, dma_dev, iter, &vec); 204 } 205 206 /** 207 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 208 * @req: request to map 209 * @dma_dev: device to map to 210 * @state: DMA IOVA state 211 * @iter: block layer DMA iterator 212 * 213 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 214 * caller and don't need to be initialized. @state needs to be stored for use 215 * at unmap time, @iter is only needed at map time. 216 * 217 * Returns %false if there is no segment to map, including due to an error, or 218 * %true ft it did map a segment. 219 * 220 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 221 * the length in @iter.len. If no segment was mapped the status code is 222 * returned in @iter.status. 223 * 224 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 225 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 226 * to try to map the following segments. 227 */ 228 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 229 struct dma_iova_state *state, struct blk_dma_iter *iter) 230 { 231 blk_rq_map_iter_init(req, &iter->iter); 232 return blk_dma_map_iter_start(req, dma_dev, state, iter, 233 blk_rq_payload_bytes(req)); 234 } 235 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 236 237 /** 238 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 239 * @req: request to map 240 * @dma_dev: device to map to 241 * @state: DMA IOVA state 242 * @iter: block layer DMA iterator 243 * 244 * Iterate to the next mapping after a previous call to 245 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 246 * arguments. 247 * 248 * Returns %false if there is no segment to map, including due to an error, or 249 * %true ft it did map a segment. 250 * 251 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 252 * the length in @iter.len. If no segment was mapped the status code is 253 * returned in @iter.status. 254 */ 255 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 256 struct dma_iova_state *state, struct blk_dma_iter *iter) 257 { 258 struct phys_vec vec; 259 260 if (!blk_map_iter_next(req, &iter->iter, &vec)) 261 return false; 262 263 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 264 return blk_dma_map_bus(iter, &vec); 265 return blk_dma_map_direct(req, dma_dev, iter, &vec); 266 } 267 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 268 269 static inline struct scatterlist * 270 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 271 { 272 if (!*sg) 273 return sglist; 274 275 /* 276 * If the driver previously mapped a shorter list, we could see a 277 * termination bit prematurely unless it fully inits the sg table 278 * on each mapping. We KNOW that there must be more entries here 279 * or the driver would be buggy, so force clear the termination bit 280 * to avoid doing a full sg_init_table() in drivers for each command. 281 */ 282 sg_unmark_end(*sg); 283 return sg_next(*sg); 284 } 285 286 /* 287 * Map a request to scatterlist, return number of sg entries setup. Caller 288 * must make sure sg can hold rq->nr_phys_segments entries. 289 */ 290 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 291 struct scatterlist **last_sg) 292 { 293 struct blk_map_iter iter; 294 struct phys_vec vec; 295 int nsegs = 0; 296 297 blk_rq_map_iter_init(rq, &iter); 298 while (blk_map_iter_next(rq, &iter, &vec)) { 299 *last_sg = blk_next_sg(last_sg, sglist); 300 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 301 offset_in_page(vec.paddr)); 302 nsegs++; 303 } 304 305 if (*last_sg) 306 sg_mark_end(*last_sg); 307 308 /* 309 * Something must have been wrong if the figured number of 310 * segment is bigger than number of req's physical segments 311 */ 312 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 313 314 return nsegs; 315 } 316 EXPORT_SYMBOL(__blk_rq_map_sg); 317 318 #ifdef CONFIG_BLK_DEV_INTEGRITY 319 /** 320 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 321 * for a request 322 * @req: request to map 323 * @dma_dev: device to map to 324 * @state: DMA IOVA state 325 * @iter: block layer DMA iterator 326 * 327 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 328 * provided by the caller and don't need to be initialized. @state needs to be 329 * stored for use at unmap time, @iter is only needed at map time. 330 * 331 * Returns %false if there is no segment to map, including due to an error, or 332 * %true if it did map a segment. 333 * 334 * If a segment was mapped, the DMA address for it is returned in @iter.addr 335 * and the length in @iter.len. If no segment was mapped the status code is 336 * returned in @iter.status. 337 * 338 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 339 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 340 * to try to map the following segments. 341 */ 342 bool blk_rq_integrity_dma_map_iter_start(struct request *req, 343 struct device *dma_dev, struct dma_iova_state *state, 344 struct blk_dma_iter *iter) 345 { 346 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 347 blk_rq_sectors(req)); 348 struct bio *bio = req->bio; 349 350 iter->iter = (struct blk_map_iter) { 351 .bio = bio, 352 .iter = bio_integrity(bio)->bip_iter, 353 .bvecs = bio_integrity(bio)->bip_vec, 354 .is_integrity = true, 355 }; 356 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 357 } 358 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 359 360 /** 361 * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for 362 * a request 363 * @req: request to map 364 * @dma_dev: device to map to 365 * @state: DMA IOVA state 366 * @iter: block layer DMA iterator 367 * 368 * Iterate to the next integrity mapping after a previous call to 369 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 370 * of the arguments. 371 * 372 * Returns %false if there is no segment to map, including due to an error, or 373 * %true if it did map a segment. 374 * 375 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 376 * the length in @iter.len. If no segment was mapped the status code is 377 * returned in @iter.status. 378 */ 379 bool blk_rq_integrity_dma_map_iter_next(struct request *req, 380 struct device *dma_dev, struct blk_dma_iter *iter) 381 { 382 struct phys_vec vec; 383 384 if (!blk_map_iter_next(req, &iter->iter, &vec)) 385 return false; 386 387 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 388 return blk_dma_map_bus(iter, &vec); 389 return blk_dma_map_direct(req, dma_dev, iter, &vec); 390 } 391 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 392 393 /** 394 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 395 * @rq: request to map 396 * @sglist: target scatterlist 397 * 398 * Description: Map the integrity vectors in request into a 399 * scatterlist. The scatterlist must be big enough to hold all 400 * elements. I.e. sized using blk_rq_count_integrity_sg() or 401 * rq->nr_integrity_segments. 402 */ 403 int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 404 { 405 struct request_queue *q = rq->q; 406 struct scatterlist *sg = NULL; 407 struct bio *bio = rq->bio; 408 unsigned int segments = 0; 409 struct phys_vec vec; 410 411 struct blk_map_iter iter = { 412 .bio = bio, 413 .iter = bio_integrity(bio)->bip_iter, 414 .bvecs = bio_integrity(bio)->bip_vec, 415 .is_integrity = true, 416 }; 417 418 while (blk_map_iter_next(rq, &iter, &vec)) { 419 sg = blk_next_sg(&sg, sglist); 420 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 421 offset_in_page(vec.paddr)); 422 segments++; 423 } 424 425 if (sg) 426 sg_mark_end(sg); 427 428 /* 429 * Something must have been wrong if the figured number of segment 430 * is bigger than number of req's physical integrity segments 431 */ 432 BUG_ON(segments > rq->nr_integrity_segments); 433 BUG_ON(segments > queue_max_integrity_segments(q)); 434 return segments; 435 } 436 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 437 #endif 438