1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Christoph Hellwig 4 */ 5 #include <linux/blk-integrity.h> 6 #include <linux/blk-mq-dma.h> 7 #include "blk.h" 8 9 static bool __blk_map_iter_next(struct blk_map_iter *iter) 10 { 11 if (iter->iter.bi_size) 12 return true; 13 if (!iter->bio || !iter->bio->bi_next) 14 return false; 15 16 iter->bio = iter->bio->bi_next; 17 if (iter->is_integrity) { 18 iter->iter = bio_integrity(iter->bio)->bip_iter; 19 iter->bvecs = bio_integrity(iter->bio)->bip_vec; 20 } else { 21 iter->iter = iter->bio->bi_iter; 22 iter->bvecs = iter->bio->bi_io_vec; 23 } 24 return true; 25 } 26 27 static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter, 28 struct phys_vec *vec) 29 { 30 unsigned int max_size; 31 struct bio_vec bv; 32 33 if (!iter->iter.bi_size) 34 return false; 35 36 bv = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 37 vec->paddr = bvec_phys(&bv); 38 max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); 39 bv.bv_len = min(bv.bv_len, max_size); 40 bvec_iter_advance_single(iter->bvecs, &iter->iter, bv.bv_len); 41 42 /* 43 * If we are entirely done with this bi_io_vec entry, check if the next 44 * one could be merged into it. This typically happens when moving to 45 * the next bio, but some callers also don't pack bvecs tight. 46 */ 47 while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { 48 struct bio_vec next; 49 50 if (!__blk_map_iter_next(iter)) 51 break; 52 53 next = mp_bvec_iter_bvec(iter->bvecs, iter->iter); 54 if (bv.bv_len + next.bv_len > max_size || 55 !biovec_phys_mergeable(req->q, &bv, &next)) 56 break; 57 58 bv.bv_len += next.bv_len; 59 bvec_iter_advance_single(iter->bvecs, &iter->iter, next.bv_len); 60 } 61 62 vec->len = bv.bv_len; 63 return true; 64 } 65 66 /* 67 * The IOVA-based DMA API wants to be able to coalesce at the minimal IOMMU page 68 * size granularity (which is guaranteed to be <= PAGE_SIZE and usually 4k), so 69 * we need to ensure our segments are aligned to this as well. 70 * 71 * Note that there is no point in using the slightly more complicated IOVA based 72 * path for single segment mappings. 73 */ 74 static inline bool blk_can_dma_map_iova(struct request *req, 75 struct device *dma_dev) 76 { 77 return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev)); 78 } 79 80 static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec) 81 { 82 iter->addr = pci_p2pdma_bus_addr_map(iter->p2pdma.mem, vec->paddr); 83 iter->len = vec->len; 84 return true; 85 } 86 87 static bool blk_dma_map_direct(struct request *req, struct device *dma_dev, 88 struct blk_dma_iter *iter, struct phys_vec *vec) 89 { 90 unsigned int attrs = 0; 91 92 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 93 attrs |= DMA_ATTR_MMIO; 94 95 iter->addr = dma_map_phys(dma_dev, vec->paddr, vec->len, 96 rq_dma_dir(req), attrs); 97 if (dma_mapping_error(dma_dev, iter->addr)) { 98 iter->status = BLK_STS_RESOURCE; 99 return false; 100 } 101 iter->len = vec->len; 102 return true; 103 } 104 105 static bool blk_rq_dma_map_iova(struct request *req, struct device *dma_dev, 106 struct dma_iova_state *state, struct blk_dma_iter *iter, 107 struct phys_vec *vec) 108 { 109 enum dma_data_direction dir = rq_dma_dir(req); 110 unsigned int attrs = 0; 111 size_t mapped = 0; 112 int error; 113 114 iter->addr = state->addr; 115 iter->len = dma_iova_size(state); 116 117 if (iter->p2pdma.map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) 118 attrs |= DMA_ATTR_MMIO; 119 120 do { 121 error = dma_iova_link(dma_dev, state, vec->paddr, mapped, 122 vec->len, dir, attrs); 123 if (error) 124 break; 125 mapped += vec->len; 126 } while (blk_map_iter_next(req, &iter->iter, vec)); 127 128 error = dma_iova_sync(dma_dev, state, 0, mapped); 129 if (error) { 130 iter->status = errno_to_blk_status(error); 131 return false; 132 } 133 134 return true; 135 } 136 137 static inline void blk_rq_map_iter_init(struct request *rq, 138 struct blk_map_iter *iter) 139 { 140 struct bio *bio = rq->bio; 141 142 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 143 *iter = (struct blk_map_iter) { 144 .bvecs = &rq->special_vec, 145 .iter = { 146 .bi_size = rq->special_vec.bv_len, 147 } 148 }; 149 } else if (bio) { 150 *iter = (struct blk_map_iter) { 151 .bio = bio, 152 .bvecs = bio->bi_io_vec, 153 .iter = bio->bi_iter, 154 }; 155 } else { 156 /* the internal flush request may not have bio attached */ 157 *iter = (struct blk_map_iter) {}; 158 } 159 } 160 161 static bool blk_dma_map_iter_start(struct request *req, struct device *dma_dev, 162 struct dma_iova_state *state, struct blk_dma_iter *iter, 163 unsigned int total_len) 164 { 165 struct phys_vec vec; 166 167 memset(&iter->p2pdma, 0, sizeof(iter->p2pdma)); 168 iter->status = BLK_STS_OK; 169 iter->p2pdma.map = PCI_P2PDMA_MAP_NONE; 170 171 /* 172 * Grab the first segment ASAP because we'll need it to check for P2P 173 * transfers. 174 */ 175 if (!blk_map_iter_next(req, &iter->iter, &vec)) 176 return false; 177 178 switch (pci_p2pdma_state(&iter->p2pdma, dma_dev, 179 phys_to_page(vec.paddr))) { 180 case PCI_P2PDMA_MAP_BUS_ADDR: 181 return blk_dma_map_bus(iter, &vec); 182 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 183 /* 184 * P2P transfers through the host bridge are treated the 185 * same as non-P2P transfers below and during unmap. 186 */ 187 case PCI_P2PDMA_MAP_NONE: 188 break; 189 default: 190 iter->status = BLK_STS_INVAL; 191 return false; 192 } 193 194 if (blk_can_dma_map_iova(req, dma_dev) && 195 dma_iova_try_alloc(dma_dev, state, vec.paddr, total_len)) 196 return blk_rq_dma_map_iova(req, dma_dev, state, iter, &vec); 197 memset(state, 0, sizeof(*state)); 198 return blk_dma_map_direct(req, dma_dev, iter, &vec); 199 } 200 201 /** 202 * blk_rq_dma_map_iter_start - map the first DMA segment for a request 203 * @req: request to map 204 * @dma_dev: device to map to 205 * @state: DMA IOVA state 206 * @iter: block layer DMA iterator 207 * 208 * Start DMA mapping @req to @dma_dev. @state and @iter are provided by the 209 * caller and don't need to be initialized. @state needs to be stored for use 210 * at unmap time, @iter is only needed at map time. 211 * 212 * Returns %false if there is no segment to map, including due to an error, or 213 * %true ft it did map a segment. 214 * 215 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 216 * the length in @iter.len. If no segment was mapped the status code is 217 * returned in @iter.status. 218 * 219 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 220 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 221 * to try to map the following segments. 222 */ 223 bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev, 224 struct dma_iova_state *state, struct blk_dma_iter *iter) 225 { 226 blk_rq_map_iter_init(req, &iter->iter); 227 return blk_dma_map_iter_start(req, dma_dev, state, iter, 228 blk_rq_payload_bytes(req)); 229 } 230 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_start); 231 232 /** 233 * blk_rq_dma_map_iter_next - map the next DMA segment for a request 234 * @req: request to map 235 * @dma_dev: device to map to 236 * @iter: block layer DMA iterator 237 * 238 * Iterate to the next mapping after a previous call to 239 * blk_rq_dma_map_iter_start(). See there for a detailed description of the 240 * arguments. 241 * 242 * Returns %false if there is no segment to map, including due to an error, or 243 * %true ft it did map a segment. 244 * 245 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 246 * the length in @iter.len. If no segment was mapped the status code is 247 * returned in @iter.status. 248 */ 249 bool blk_rq_dma_map_iter_next(struct request *req, struct device *dma_dev, 250 struct blk_dma_iter *iter) 251 { 252 struct phys_vec vec; 253 254 if (!blk_map_iter_next(req, &iter->iter, &vec)) 255 return false; 256 257 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 258 return blk_dma_map_bus(iter, &vec); 259 return blk_dma_map_direct(req, dma_dev, iter, &vec); 260 } 261 EXPORT_SYMBOL_GPL(blk_rq_dma_map_iter_next); 262 263 static inline struct scatterlist * 264 blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) 265 { 266 if (!*sg) 267 return sglist; 268 269 /* 270 * If the driver previously mapped a shorter list, we could see a 271 * termination bit prematurely unless it fully inits the sg table 272 * on each mapping. We KNOW that there must be more entries here 273 * or the driver would be buggy, so force clear the termination bit 274 * to avoid doing a full sg_init_table() in drivers for each command. 275 */ 276 sg_unmark_end(*sg); 277 return sg_next(*sg); 278 } 279 280 /* 281 * Map a request to scatterlist, return number of sg entries setup. Caller 282 * must make sure sg can hold rq->nr_phys_segments entries. 283 */ 284 int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, 285 struct scatterlist **last_sg) 286 { 287 struct blk_map_iter iter; 288 struct phys_vec vec; 289 int nsegs = 0; 290 291 blk_rq_map_iter_init(rq, &iter); 292 while (blk_map_iter_next(rq, &iter, &vec)) { 293 *last_sg = blk_next_sg(last_sg, sglist); 294 295 WARN_ON_ONCE(overflows_type(vec.len, unsigned int)); 296 sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, 297 offset_in_page(vec.paddr)); 298 nsegs++; 299 } 300 301 if (*last_sg) 302 sg_mark_end(*last_sg); 303 304 /* 305 * Something must have been wrong if the figured number of 306 * segment is bigger than number of req's physical segments 307 */ 308 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 309 310 return nsegs; 311 } 312 EXPORT_SYMBOL(__blk_rq_map_sg); 313 314 #ifdef CONFIG_BLK_DEV_INTEGRITY 315 /** 316 * blk_rq_integrity_dma_map_iter_start - map the first integrity DMA segment 317 * for a request 318 * @req: request to map 319 * @dma_dev: device to map to 320 * @state: DMA IOVA state 321 * @iter: block layer DMA iterator 322 * 323 * Start DMA mapping @req integrity data to @dma_dev. @state and @iter are 324 * provided by the caller and don't need to be initialized. @state needs to be 325 * stored for use at unmap time, @iter is only needed at map time. 326 * 327 * Returns %false if there is no segment to map, including due to an error, or 328 * %true if it did map a segment. 329 * 330 * If a segment was mapped, the DMA address for it is returned in @iter.addr 331 * and the length in @iter.len. If no segment was mapped the status code is 332 * returned in @iter.status. 333 * 334 * The caller can call blk_rq_dma_map_coalesce() to check if further segments 335 * need to be mapped after this, or go straight to blk_rq_dma_map_iter_next() 336 * to try to map the following segments. 337 */ 338 bool blk_rq_integrity_dma_map_iter_start(struct request *req, 339 struct device *dma_dev, struct dma_iova_state *state, 340 struct blk_dma_iter *iter) 341 { 342 unsigned len = bio_integrity_bytes(&req->q->limits.integrity, 343 blk_rq_sectors(req)); 344 struct bio *bio = req->bio; 345 346 iter->iter = (struct blk_map_iter) { 347 .bio = bio, 348 .iter = bio_integrity(bio)->bip_iter, 349 .bvecs = bio_integrity(bio)->bip_vec, 350 .is_integrity = true, 351 }; 352 return blk_dma_map_iter_start(req, dma_dev, state, iter, len); 353 } 354 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_start); 355 356 /** 357 * blk_rq_integrity_dma_map_iter_next - map the next integrity DMA segment for 358 * a request 359 * @req: request to map 360 * @dma_dev: device to map to 361 * @state: DMA IOVA state 362 * @iter: block layer DMA iterator 363 * 364 * Iterate to the next integrity mapping after a previous call to 365 * blk_rq_integrity_dma_map_iter_start(). See there for a detailed description 366 * of the arguments. 367 * 368 * Returns %false if there is no segment to map, including due to an error, or 369 * %true if it did map a segment. 370 * 371 * If a segment was mapped, the DMA address for it is returned in @iter.addr and 372 * the length in @iter.len. If no segment was mapped the status code is 373 * returned in @iter.status. 374 */ 375 bool blk_rq_integrity_dma_map_iter_next(struct request *req, 376 struct device *dma_dev, struct blk_dma_iter *iter) 377 { 378 struct phys_vec vec; 379 380 if (!blk_map_iter_next(req, &iter->iter, &vec)) 381 return false; 382 383 if (iter->p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR) 384 return blk_dma_map_bus(iter, &vec); 385 return blk_dma_map_direct(req, dma_dev, iter, &vec); 386 } 387 EXPORT_SYMBOL_GPL(blk_rq_integrity_dma_map_iter_next); 388 389 /** 390 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist 391 * @rq: request to map 392 * @sglist: target scatterlist 393 * 394 * Description: Map the integrity vectors in request into a 395 * scatterlist. The scatterlist must be big enough to hold all 396 * elements. I.e. sized using blk_rq_count_integrity_sg() or 397 * rq->nr_integrity_segments. 398 */ 399 int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) 400 { 401 struct request_queue *q = rq->q; 402 struct scatterlist *sg = NULL; 403 struct bio *bio = rq->bio; 404 unsigned int segments = 0; 405 struct phys_vec vec; 406 407 struct blk_map_iter iter = { 408 .bio = bio, 409 .iter = bio_integrity(bio)->bip_iter, 410 .bvecs = bio_integrity(bio)->bip_vec, 411 .is_integrity = true, 412 }; 413 414 while (blk_map_iter_next(rq, &iter, &vec)) { 415 sg = blk_next_sg(&sg, sglist); 416 417 WARN_ON_ONCE(overflows_type(vec.len, unsigned int)); 418 sg_set_page(sg, phys_to_page(vec.paddr), vec.len, 419 offset_in_page(vec.paddr)); 420 segments++; 421 } 422 423 if (sg) 424 sg_mark_end(sg); 425 426 /* 427 * Something must have been wrong if the figured number of segment 428 * is bigger than number of req's physical integrity segments 429 */ 430 BUG_ON(segments > rq->nr_integrity_segments); 431 BUG_ON(segments > queue_max_integrity_segments(q)); 432 return segments; 433 } 434 EXPORT_SYMBOL(blk_rq_map_integrity_sg); 435 #endif 436