1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2016 HGST, a Western Digital Company. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/moduleparam.h> 7 #include <linux/slab.h> 8 #include <linux/pci-p2pdma.h> 9 #include <rdma/mr_pool.h> 10 #include <rdma/rw.h> 11 12 enum { 13 RDMA_RW_SINGLE_WR, 14 RDMA_RW_MULTI_WR, 15 RDMA_RW_MR, 16 RDMA_RW_SIG_MR, 17 RDMA_RW_IOVA, 18 }; 19 20 static bool rdma_rw_force_mr; 21 module_param_named(force_mr, rdma_rw_force_mr, bool, 0); 22 MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations"); 23 24 /* 25 * Report whether memory registration should be used. Memory registration must 26 * be used for iWarp devices because of iWARP-specific limitations. Memory 27 * registration is also enabled if registering memory might yield better 28 * performance than using multiple SGE entries, see rdma_rw_io_needs_mr() 29 */ 30 static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num) 31 { 32 if (rdma_protocol_iwarp(dev, port_num)) 33 return true; 34 if (dev->attrs.max_sgl_rd) 35 return true; 36 if (unlikely(rdma_rw_force_mr)) 37 return true; 38 return false; 39 } 40 41 /* 42 * Check if the device will use memory registration for this RW operation. 43 * For RDMA READs we must use MRs on iWarp and can optionally use them as an 44 * optimization otherwise. Additionally we have a debug option to force usage 45 * of MRs to help testing this code path. 46 */ 47 static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num, 48 enum dma_data_direction dir, int dma_nents) 49 { 50 if (dir == DMA_FROM_DEVICE) { 51 if (rdma_protocol_iwarp(dev, port_num)) 52 return true; 53 if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd) 54 return true; 55 } 56 if (unlikely(rdma_rw_force_mr)) 57 return true; 58 return false; 59 } 60 61 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, 62 bool pi_support) 63 { 64 u32 max_pages; 65 66 if (pi_support) 67 max_pages = dev->attrs.max_pi_fast_reg_page_list_len; 68 else 69 max_pages = dev->attrs.max_fast_reg_page_list_len; 70 71 /* arbitrary limit to avoid allocating gigantic resources */ 72 return min_t(u32, max_pages, 256); 73 } 74 75 static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) 76 { 77 int count = 0; 78 79 if (reg->mr->need_inval) { 80 reg->inv_wr.opcode = IB_WR_LOCAL_INV; 81 reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; 82 reg->inv_wr.next = ®->reg_wr.wr; 83 count++; 84 } else { 85 reg->inv_wr.next = NULL; 86 } 87 88 return count; 89 } 90 91 /* Caller must have zero-initialized *reg. */ 92 static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num, 93 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, 94 u32 sg_cnt, u32 offset) 95 { 96 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, 97 qp->integrity_en); 98 u32 nents = min(sg_cnt, pages_per_mr); 99 int count = 0, ret; 100 101 reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs); 102 if (!reg->mr) 103 return -EAGAIN; 104 105 count += rdma_rw_inv_key(reg); 106 107 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 108 if (ret < 0 || ret < nents) { 109 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 110 return -EINVAL; 111 } 112 113 reg->reg_wr.wr.opcode = IB_WR_REG_MR; 114 reg->reg_wr.mr = reg->mr; 115 reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; 116 if (rdma_protocol_iwarp(qp->device, port_num)) 117 reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; 118 count++; 119 120 reg->sge.addr = reg->mr->iova; 121 reg->sge.length = reg->mr->length; 122 return count; 123 } 124 125 static int rdma_rw_init_reg_wr(struct rdma_rw_reg_ctx *reg, 126 struct rdma_rw_reg_ctx *prev, struct ib_qp *qp, u32 port_num, 127 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 128 { 129 if (prev) { 130 if (reg->mr->need_inval) 131 prev->wr.wr.next = ®->inv_wr; 132 else 133 prev->wr.wr.next = ®->reg_wr.wr; 134 } 135 136 reg->reg_wr.wr.next = ®->wr.wr; 137 138 reg->wr.wr.sg_list = ®->sge; 139 reg->wr.wr.num_sge = 1; 140 reg->wr.remote_addr = remote_addr; 141 reg->wr.rkey = rkey; 142 143 if (dir == DMA_TO_DEVICE) { 144 reg->wr.wr.opcode = IB_WR_RDMA_WRITE; 145 } else if (!rdma_cap_read_inv(qp->device, port_num)) { 146 reg->wr.wr.opcode = IB_WR_RDMA_READ; 147 } else { 148 reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 149 reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey; 150 } 151 152 return 1; 153 } 154 155 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 156 u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, 157 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 158 { 159 struct rdma_rw_reg_ctx *prev = NULL; 160 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, 161 qp->integrity_en); 162 int i, j, ret = 0, count = 0; 163 164 ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr); 165 ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL); 166 if (!ctx->reg) { 167 ret = -ENOMEM; 168 goto out; 169 } 170 171 for (i = 0; i < ctx->nr_ops; i++) { 172 struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 173 u32 nents = min(sg_cnt, pages_per_mr); 174 175 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt, 176 offset); 177 if (ret < 0) 178 goto out_free; 179 count += ret; 180 count += rdma_rw_init_reg_wr(reg, prev, qp, port_num, 181 remote_addr, rkey, dir); 182 remote_addr += reg->sge.length; 183 sg_cnt -= nents; 184 for (j = 0; j < nents; j++) 185 sg = sg_next(sg); 186 prev = reg; 187 offset = 0; 188 } 189 190 if (prev) 191 prev->wr.wr.next = NULL; 192 193 ctx->type = RDMA_RW_MR; 194 return count; 195 196 out_free: 197 while (--i >= 0) 198 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 199 kfree(ctx->reg); 200 out: 201 return ret; 202 } 203 204 static int rdma_rw_init_mr_wrs_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 205 u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec, 206 struct bvec_iter *iter, u64 remote_addr, u32 rkey, 207 enum dma_data_direction dir) 208 { 209 struct ib_device *dev = qp->pd->device; 210 struct rdma_rw_reg_ctx *prev = NULL; 211 u32 pages_per_mr = rdma_rw_fr_page_list_len(dev, qp->integrity_en); 212 struct scatterlist *sg; 213 int i, ret, count = 0; 214 u32 nents = 0; 215 216 ctx->reg = kcalloc(DIV_ROUND_UP(nr_bvec, pages_per_mr), 217 sizeof(*ctx->reg), GFP_KERNEL); 218 if (!ctx->reg) 219 return -ENOMEM; 220 221 /* 222 * Build scatterlist from bvecs using the iterator. This follows 223 * the pattern from __blk_rq_map_sg. 224 */ 225 ctx->reg[0].sgt.sgl = kmalloc_array(nr_bvec, 226 sizeof(*ctx->reg[0].sgt.sgl), 227 GFP_KERNEL); 228 if (!ctx->reg[0].sgt.sgl) { 229 ret = -ENOMEM; 230 goto out_free_reg; 231 } 232 sg_init_table(ctx->reg[0].sgt.sgl, nr_bvec); 233 234 for (sg = ctx->reg[0].sgt.sgl; iter->bi_size; sg = sg_next(sg)) { 235 struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter); 236 237 if (nents >= nr_bvec) { 238 ret = -EINVAL; 239 goto out_free_sgl; 240 } 241 sg_set_page(sg, bv.bv_page, bv.bv_len, bv.bv_offset); 242 bvec_iter_advance(bvecs, iter, bv.bv_len); 243 nents++; 244 } 245 sg_mark_end(sg_last(ctx->reg[0].sgt.sgl, nents)); 246 ctx->reg[0].sgt.orig_nents = nents; 247 248 /* DMA map the scatterlist */ 249 ret = ib_dma_map_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0); 250 if (ret) 251 goto out_free_sgl; 252 253 ctx->nr_ops = DIV_ROUND_UP(ctx->reg[0].sgt.nents, pages_per_mr); 254 255 sg = ctx->reg[0].sgt.sgl; 256 nents = ctx->reg[0].sgt.nents; 257 for (i = 0; i < ctx->nr_ops; i++) { 258 struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 259 u32 sge_cnt = min(nents, pages_per_mr); 260 261 ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sge_cnt, 0); 262 if (ret < 0) 263 goto out_free_mrs; 264 count += ret; 265 count += rdma_rw_init_reg_wr(reg, prev, qp, port_num, 266 remote_addr, rkey, dir); 267 remote_addr += reg->sge.length; 268 nents -= sge_cnt; 269 sg += sge_cnt; 270 prev = reg; 271 } 272 273 if (prev) 274 prev->wr.wr.next = NULL; 275 276 ctx->type = RDMA_RW_MR; 277 return count; 278 279 out_free_mrs: 280 while (--i >= 0) 281 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 282 ib_dma_unmap_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0); 283 out_free_sgl: 284 kfree(ctx->reg[0].sgt.sgl); 285 out_free_reg: 286 kfree(ctx->reg); 287 return ret; 288 } 289 290 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 291 struct scatterlist *sg, u32 sg_cnt, u32 offset, 292 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 293 { 294 u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : 295 qp->max_read_sge; 296 struct ib_sge *sge; 297 u32 total_len = 0, i, j; 298 299 ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge); 300 301 ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL); 302 if (!ctx->map.sges) 303 goto out; 304 305 ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL); 306 if (!ctx->map.wrs) 307 goto out_free_sges; 308 309 for (i = 0; i < ctx->nr_ops; i++) { 310 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i]; 311 u32 nr_sge = min(sg_cnt, max_sge); 312 313 if (dir == DMA_TO_DEVICE) 314 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 315 else 316 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 317 rdma_wr->remote_addr = remote_addr + total_len; 318 rdma_wr->rkey = rkey; 319 rdma_wr->wr.num_sge = nr_sge; 320 rdma_wr->wr.sg_list = sge; 321 322 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { 323 sge->addr = sg_dma_address(sg) + offset; 324 sge->length = sg_dma_len(sg) - offset; 325 sge->lkey = qp->pd->local_dma_lkey; 326 327 total_len += sge->length; 328 sge++; 329 sg_cnt--; 330 offset = 0; 331 } 332 333 rdma_wr->wr.next = i + 1 < ctx->nr_ops ? 334 &ctx->map.wrs[i + 1].wr : NULL; 335 } 336 337 ctx->type = RDMA_RW_MULTI_WR; 338 return ctx->nr_ops; 339 340 out_free_sges: 341 kfree(ctx->map.sges); 342 out: 343 return -ENOMEM; 344 } 345 346 static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 347 struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey, 348 enum dma_data_direction dir) 349 { 350 struct ib_rdma_wr *rdma_wr = &ctx->single.wr; 351 352 ctx->nr_ops = 1; 353 354 ctx->single.sge.lkey = qp->pd->local_dma_lkey; 355 ctx->single.sge.addr = sg_dma_address(sg) + offset; 356 ctx->single.sge.length = sg_dma_len(sg) - offset; 357 358 memset(rdma_wr, 0, sizeof(*rdma_wr)); 359 if (dir == DMA_TO_DEVICE) 360 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 361 else 362 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 363 rdma_wr->wr.sg_list = &ctx->single.sge; 364 rdma_wr->wr.num_sge = 1; 365 rdma_wr->remote_addr = remote_addr; 366 rdma_wr->rkey = rkey; 367 368 ctx->type = RDMA_RW_SINGLE_WR; 369 return 1; 370 } 371 372 static int rdma_rw_init_single_wr_bvec(struct rdma_rw_ctx *ctx, 373 struct ib_qp *qp, const struct bio_vec *bvecs, 374 struct bvec_iter *iter, u64 remote_addr, u32 rkey, 375 enum dma_data_direction dir) 376 { 377 struct ib_device *dev = qp->pd->device; 378 struct ib_rdma_wr *rdma_wr = &ctx->single.wr; 379 struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter); 380 u64 dma_addr; 381 382 ctx->nr_ops = 1; 383 384 dma_addr = ib_dma_map_bvec(dev, &bv, dir); 385 if (ib_dma_mapping_error(dev, dma_addr)) 386 return -ENOMEM; 387 388 ctx->single.sge.lkey = qp->pd->local_dma_lkey; 389 ctx->single.sge.addr = dma_addr; 390 ctx->single.sge.length = bv.bv_len; 391 392 memset(rdma_wr, 0, sizeof(*rdma_wr)); 393 if (dir == DMA_TO_DEVICE) 394 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 395 else 396 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 397 rdma_wr->wr.sg_list = &ctx->single.sge; 398 rdma_wr->wr.num_sge = 1; 399 rdma_wr->remote_addr = remote_addr; 400 rdma_wr->rkey = rkey; 401 402 ctx->type = RDMA_RW_SINGLE_WR; 403 return 1; 404 } 405 406 static int rdma_rw_init_map_wrs_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 407 const struct bio_vec *bvecs, u32 nr_bvec, struct bvec_iter *iter, 408 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 409 { 410 struct ib_device *dev = qp->pd->device; 411 u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : 412 qp->max_read_sge; 413 struct ib_sge *sge; 414 u32 total_len = 0, i, j; 415 u32 mapped_bvecs = 0; 416 u32 nr_ops = DIV_ROUND_UP(nr_bvec, max_sge); 417 size_t sges_size = array_size(nr_bvec, sizeof(*ctx->map.sges)); 418 size_t wrs_offset = ALIGN(sges_size, __alignof__(*ctx->map.wrs)); 419 size_t wrs_size = array_size(nr_ops, sizeof(*ctx->map.wrs)); 420 void *mem; 421 422 if (sges_size == SIZE_MAX || wrs_size == SIZE_MAX || 423 check_add_overflow(wrs_offset, wrs_size, &wrs_size)) 424 return -ENOMEM; 425 426 mem = kzalloc(wrs_size, GFP_KERNEL); 427 if (!mem) 428 return -ENOMEM; 429 430 ctx->map.sges = sge = mem; 431 ctx->map.wrs = mem + wrs_offset; 432 433 for (i = 0; i < nr_ops; i++) { 434 struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i]; 435 u32 nr_sge = min(nr_bvec - mapped_bvecs, max_sge); 436 437 if (dir == DMA_TO_DEVICE) 438 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 439 else 440 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 441 rdma_wr->remote_addr = remote_addr + total_len; 442 rdma_wr->rkey = rkey; 443 rdma_wr->wr.num_sge = nr_sge; 444 rdma_wr->wr.sg_list = sge; 445 446 for (j = 0; j < nr_sge; j++) { 447 struct bio_vec bv = mp_bvec_iter_bvec(bvecs, *iter); 448 u64 dma_addr; 449 450 dma_addr = ib_dma_map_bvec(dev, &bv, dir); 451 if (ib_dma_mapping_error(dev, dma_addr)) 452 goto out_unmap; 453 454 mapped_bvecs++; 455 sge->addr = dma_addr; 456 sge->length = bv.bv_len; 457 sge->lkey = qp->pd->local_dma_lkey; 458 459 total_len += bv.bv_len; 460 sge++; 461 462 bvec_iter_advance_single(bvecs, iter, bv.bv_len); 463 } 464 465 rdma_wr->wr.next = i + 1 < nr_ops ? 466 &ctx->map.wrs[i + 1].wr : NULL; 467 } 468 469 ctx->nr_ops = nr_ops; 470 ctx->type = RDMA_RW_MULTI_WR; 471 return nr_ops; 472 473 out_unmap: 474 for (i = 0; i < mapped_bvecs; i++) 475 ib_dma_unmap_bvec(dev, ctx->map.sges[i].addr, 476 ctx->map.sges[i].length, dir); 477 kfree(ctx->map.sges); 478 return -ENOMEM; 479 } 480 481 /* 482 * Try to use the two-step IOVA API to map bvecs into a contiguous DMA range. 483 * This reduces IOTLB sync overhead by doing one sync at the end instead of 484 * one per bvec, and produces a contiguous DMA address range that can be 485 * described by a single SGE. 486 * 487 * Returns the number of WQEs (always 1) on success, -EOPNOTSUPP if IOVA 488 * mapping is not available, or another negative error code on failure. 489 */ 490 static int rdma_rw_init_iova_wrs_bvec(struct rdma_rw_ctx *ctx, 491 struct ib_qp *qp, const struct bio_vec *bvec, 492 struct bvec_iter *iter, u64 remote_addr, u32 rkey, 493 enum dma_data_direction dir) 494 { 495 struct ib_device *dev = qp->pd->device; 496 struct device *dma_dev = dev->dma_device; 497 size_t total_len = iter->bi_size; 498 struct bio_vec first_bv; 499 size_t mapped_len = 0; 500 int ret; 501 502 /* Virtual DMA devices cannot support IOVA allocators */ 503 if (ib_uses_virt_dma(dev)) 504 return -EOPNOTSUPP; 505 506 /* Try to allocate contiguous IOVA space */ 507 first_bv = mp_bvec_iter_bvec(bvec, *iter); 508 if (!dma_iova_try_alloc(dma_dev, &ctx->iova.state, 509 bvec_phys(&first_bv), total_len)) 510 return -EOPNOTSUPP; 511 512 /* Link all bvecs into the IOVA space */ 513 while (iter->bi_size) { 514 struct bio_vec bv = mp_bvec_iter_bvec(bvec, *iter); 515 516 ret = dma_iova_link(dma_dev, &ctx->iova.state, bvec_phys(&bv), 517 mapped_len, bv.bv_len, dir, 0); 518 if (ret) 519 goto out_destroy; 520 521 mapped_len += bv.bv_len; 522 bvec_iter_advance(bvec, iter, bv.bv_len); 523 } 524 525 /* Sync the IOTLB once for all linked pages */ 526 ret = dma_iova_sync(dma_dev, &ctx->iova.state, 0, mapped_len); 527 if (ret) 528 goto out_destroy; 529 530 ctx->iova.mapped_len = mapped_len; 531 532 /* Single SGE covers the entire contiguous IOVA range */ 533 ctx->iova.sge.addr = ctx->iova.state.addr; 534 ctx->iova.sge.length = mapped_len; 535 ctx->iova.sge.lkey = qp->pd->local_dma_lkey; 536 537 /* Single WR for the whole transfer */ 538 memset(&ctx->iova.wr, 0, sizeof(ctx->iova.wr)); 539 if (dir == DMA_TO_DEVICE) 540 ctx->iova.wr.wr.opcode = IB_WR_RDMA_WRITE; 541 else 542 ctx->iova.wr.wr.opcode = IB_WR_RDMA_READ; 543 ctx->iova.wr.wr.num_sge = 1; 544 ctx->iova.wr.wr.sg_list = &ctx->iova.sge; 545 ctx->iova.wr.remote_addr = remote_addr; 546 ctx->iova.wr.rkey = rkey; 547 548 ctx->type = RDMA_RW_IOVA; 549 ctx->nr_ops = 1; 550 return 1; 551 552 out_destroy: 553 /* 554 * dma_iova_destroy() expects the actual mapped length, not the 555 * total allocation size. It unlinks only the successfully linked 556 * range and frees the entire IOVA allocation. 557 */ 558 dma_iova_destroy(dma_dev, &ctx->iova.state, mapped_len, dir, 0); 559 return ret; 560 } 561 562 /** 563 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context 564 * @ctx: context to initialize 565 * @qp: queue pair to operate on 566 * @port_num: port num to which the connection is bound 567 * @sg: scatterlist to READ/WRITE from/to 568 * @sg_cnt: number of entries in @sg 569 * @sg_offset: current byte offset into @sg 570 * @remote_addr:remote address to read/write (relative to @rkey) 571 * @rkey: remote key to operate on 572 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 573 * 574 * Returns the number of WQEs that will be needed on the workqueue if 575 * successful, or a negative error code. 576 */ 577 int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, 578 struct scatterlist *sg, u32 sg_cnt, u32 sg_offset, 579 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 580 { 581 struct ib_device *dev = qp->pd->device; 582 struct sg_table sgt = { 583 .sgl = sg, 584 .orig_nents = sg_cnt, 585 }; 586 int ret; 587 588 ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); 589 if (ret) 590 return ret; 591 sg_cnt = sgt.nents; 592 593 /* 594 * Skip to the S/G entry that sg_offset falls into: 595 */ 596 for (;;) { 597 u32 len = sg_dma_len(sg); 598 599 if (sg_offset < len) 600 break; 601 602 sg = sg_next(sg); 603 sg_offset -= len; 604 sg_cnt--; 605 } 606 607 ret = -EIO; 608 if (WARN_ON_ONCE(sg_cnt == 0)) 609 goto out_unmap_sg; 610 611 if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) { 612 ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt, 613 sg_offset, remote_addr, rkey, dir); 614 } else if (sg_cnt > 1) { 615 ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset, 616 remote_addr, rkey, dir); 617 } else { 618 ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset, 619 remote_addr, rkey, dir); 620 } 621 622 if (ret < 0) 623 goto out_unmap_sg; 624 return ret; 625 626 out_unmap_sg: 627 ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); 628 return ret; 629 } 630 EXPORT_SYMBOL(rdma_rw_ctx_init); 631 632 /** 633 * rdma_rw_ctx_init_bvec - initialize a RDMA READ/WRITE context from bio_vec 634 * @ctx: context to initialize 635 * @qp: queue pair to operate on 636 * @port_num: port num to which the connection is bound 637 * @bvecs: bio_vec array to READ/WRITE from/to 638 * @nr_bvec: number of entries in @bvecs 639 * @iter: bvec iterator describing offset and length 640 * @remote_addr: remote address to read/write (relative to @rkey) 641 * @rkey: remote key to operate on 642 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 643 * 644 * Maps the bio_vec array directly, avoiding intermediate scatterlist 645 * conversion. Supports MR registration for iWARP devices and force_mr mode. 646 * 647 * Returns the number of WQEs that will be needed on the workqueue if 648 * successful, or a negative error code: 649 * 650 * * -EINVAL - @nr_bvec is zero or @iter.bi_size is zero 651 * * -ENOMEM - DMA mapping or memory allocation failed 652 */ 653 int rdma_rw_ctx_init_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 654 u32 port_num, const struct bio_vec *bvecs, u32 nr_bvec, 655 struct bvec_iter iter, u64 remote_addr, u32 rkey, 656 enum dma_data_direction dir) 657 { 658 struct ib_device *dev = qp->pd->device; 659 int ret; 660 661 if (nr_bvec == 0 || iter.bi_size == 0) 662 return -EINVAL; 663 664 /* 665 * iWARP requires MR registration for all RDMA READs. The force_mr 666 * debug option also mandates MR usage. 667 */ 668 if (dir == DMA_FROM_DEVICE && rdma_protocol_iwarp(dev, port_num)) 669 return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, 670 nr_bvec, &iter, remote_addr, 671 rkey, dir); 672 if (unlikely(rdma_rw_force_mr)) 673 return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, 674 nr_bvec, &iter, remote_addr, 675 rkey, dir); 676 677 if (nr_bvec == 1) 678 return rdma_rw_init_single_wr_bvec(ctx, qp, bvecs, &iter, 679 remote_addr, rkey, dir); 680 681 /* 682 * Try IOVA-based mapping first for multi-bvec transfers. 683 * IOVA coalesces bvecs into a single DMA-contiguous region, 684 * reducing the number of WRs needed and avoiding MR overhead. 685 */ 686 ret = rdma_rw_init_iova_wrs_bvec(ctx, qp, bvecs, &iter, remote_addr, 687 rkey, dir); 688 if (ret != -EOPNOTSUPP) 689 return ret; 690 691 /* 692 * IOVA mapping not available. Check if MR registration provides 693 * better performance than multiple SGE entries. 694 */ 695 if (rdma_rw_io_needs_mr(dev, port_num, dir, nr_bvec)) 696 return rdma_rw_init_mr_wrs_bvec(ctx, qp, port_num, bvecs, 697 nr_bvec, &iter, remote_addr, 698 rkey, dir); 699 700 return rdma_rw_init_map_wrs_bvec(ctx, qp, bvecs, nr_bvec, &iter, 701 remote_addr, rkey, dir); 702 } 703 EXPORT_SYMBOL(rdma_rw_ctx_init_bvec); 704 705 /** 706 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload 707 * @ctx: context to initialize 708 * @qp: queue pair to operate on 709 * @port_num: port num to which the connection is bound 710 * @sg: scatterlist to READ/WRITE from/to 711 * @sg_cnt: number of entries in @sg 712 * @prot_sg: scatterlist to READ/WRITE protection information from/to 713 * @prot_sg_cnt: number of entries in @prot_sg 714 * @sig_attrs: signature offloading algorithms 715 * @remote_addr:remote address to read/write (relative to @rkey) 716 * @rkey: remote key to operate on 717 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 718 * 719 * Returns the number of WQEs that will be needed on the workqueue if 720 * successful, or a negative error code. 721 */ 722 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 723 u32 port_num, struct scatterlist *sg, u32 sg_cnt, 724 struct scatterlist *prot_sg, u32 prot_sg_cnt, 725 struct ib_sig_attrs *sig_attrs, 726 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 727 { 728 struct ib_device *dev = qp->pd->device; 729 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, 730 qp->integrity_en); 731 struct sg_table sgt = { 732 .sgl = sg, 733 .orig_nents = sg_cnt, 734 }; 735 struct sg_table prot_sgt = { 736 .sgl = prot_sg, 737 .orig_nents = prot_sg_cnt, 738 }; 739 struct ib_rdma_wr *rdma_wr; 740 int count = 0, ret; 741 742 if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { 743 pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n", 744 sg_cnt, prot_sg_cnt, pages_per_mr); 745 return -EINVAL; 746 } 747 748 ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); 749 if (ret) 750 return ret; 751 752 if (prot_sg_cnt) { 753 ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0); 754 if (ret) 755 goto out_unmap_sg; 756 } 757 758 ctx->type = RDMA_RW_SIG_MR; 759 ctx->nr_ops = 1; 760 ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL); 761 if (!ctx->reg) { 762 ret = -ENOMEM; 763 goto out_unmap_prot_sg; 764 } 765 766 ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs); 767 if (!ctx->reg->mr) { 768 ret = -EAGAIN; 769 goto out_free_ctx; 770 } 771 772 count += rdma_rw_inv_key(ctx->reg); 773 774 memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); 775 776 ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg, 777 prot_sgt.nents, NULL, SZ_4K); 778 if (unlikely(ret)) { 779 pr_err("failed to map PI sg (%u)\n", 780 sgt.nents + prot_sgt.nents); 781 goto out_destroy_sig_mr; 782 } 783 784 ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY; 785 ctx->reg->reg_wr.wr.wr_cqe = NULL; 786 ctx->reg->reg_wr.wr.num_sge = 0; 787 ctx->reg->reg_wr.wr.send_flags = 0; 788 ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; 789 if (rdma_protocol_iwarp(qp->device, port_num)) 790 ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; 791 ctx->reg->reg_wr.mr = ctx->reg->mr; 792 ctx->reg->reg_wr.key = ctx->reg->mr->lkey; 793 count++; 794 795 ctx->reg->sge.addr = ctx->reg->mr->iova; 796 ctx->reg->sge.length = ctx->reg->mr->length; 797 if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE) 798 ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length; 799 800 rdma_wr = &ctx->reg->wr; 801 rdma_wr->wr.sg_list = &ctx->reg->sge; 802 rdma_wr->wr.num_sge = 1; 803 rdma_wr->remote_addr = remote_addr; 804 rdma_wr->rkey = rkey; 805 if (dir == DMA_TO_DEVICE) 806 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; 807 else 808 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 809 ctx->reg->reg_wr.wr.next = &rdma_wr->wr; 810 count++; 811 812 return count; 813 814 out_destroy_sig_mr: 815 ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); 816 out_free_ctx: 817 kfree(ctx->reg); 818 out_unmap_prot_sg: 819 if (prot_sgt.nents) 820 ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0); 821 out_unmap_sg: 822 ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); 823 return ret; 824 } 825 EXPORT_SYMBOL(rdma_rw_ctx_signature_init); 826 827 /* 828 * Now that we are going to post the WRs we can update the lkey and need_inval 829 * state on the MRs. If we were doing this at init time, we would get double 830 * or missing invalidations if a context was initialized but not actually 831 * posted. 832 */ 833 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval) 834 { 835 reg->mr->need_inval = need_inval; 836 ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey)); 837 reg->reg_wr.key = reg->mr->lkey; 838 reg->sge.lkey = reg->mr->lkey; 839 } 840 841 /** 842 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation 843 * @ctx: context to operate on 844 * @qp: queue pair to operate on 845 * @port_num: port num to which the connection is bound 846 * @cqe: completion queue entry for the last WR 847 * @chain_wr: WR to append to the posted chain 848 * 849 * Return the WR chain for the set of RDMA READ/WRITE operations described by 850 * @ctx, as well as any memory registration operations needed. If @chain_wr 851 * is non-NULL the WR it points to will be appended to the chain of WRs posted. 852 * If @chain_wr is not set @cqe must be set so that the caller gets a 853 * completion notification. 854 */ 855 struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 856 u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 857 { 858 struct ib_send_wr *first_wr, *last_wr; 859 int i; 860 861 switch (ctx->type) { 862 case RDMA_RW_SIG_MR: 863 case RDMA_RW_MR: 864 for (i = 0; i < ctx->nr_ops; i++) { 865 rdma_rw_update_lkey(&ctx->reg[i], 866 ctx->reg[i].wr.wr.opcode != 867 IB_WR_RDMA_READ_WITH_INV); 868 } 869 870 if (ctx->reg[0].inv_wr.next) 871 first_wr = &ctx->reg[0].inv_wr; 872 else 873 first_wr = &ctx->reg[0].reg_wr.wr; 874 last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr; 875 break; 876 case RDMA_RW_IOVA: 877 first_wr = &ctx->iova.wr.wr; 878 last_wr = &ctx->iova.wr.wr; 879 break; 880 case RDMA_RW_MULTI_WR: 881 first_wr = &ctx->map.wrs[0].wr; 882 last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr; 883 break; 884 case RDMA_RW_SINGLE_WR: 885 first_wr = &ctx->single.wr.wr; 886 last_wr = &ctx->single.wr.wr; 887 break; 888 default: 889 BUG(); 890 } 891 892 if (chain_wr) { 893 last_wr->next = chain_wr; 894 } else { 895 last_wr->wr_cqe = cqe; 896 last_wr->send_flags |= IB_SEND_SIGNALED; 897 } 898 899 return first_wr; 900 } 901 EXPORT_SYMBOL(rdma_rw_ctx_wrs); 902 903 /** 904 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation 905 * @ctx: context to operate on 906 * @qp: queue pair to operate on 907 * @port_num: port num to which the connection is bound 908 * @cqe: completion queue entry for the last WR 909 * @chain_wr: WR to append to the posted chain 910 * 911 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as 912 * any memory registration operations needed. If @chain_wr is non-NULL the 913 * WR it points to will be appended to the chain of WRs posted. If @chain_wr 914 * is not set @cqe must be set so that the caller gets a completion 915 * notification. 916 */ 917 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, 918 struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 919 { 920 struct ib_send_wr *first_wr; 921 922 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); 923 return ib_post_send(qp, first_wr, NULL); 924 } 925 EXPORT_SYMBOL(rdma_rw_ctx_post); 926 927 /** 928 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init 929 * @ctx: context to release 930 * @qp: queue pair to operate on 931 * @port_num: port num to which the connection is bound 932 * @sg: scatterlist that was used for the READ/WRITE 933 * @sg_cnt: number of entries in @sg 934 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 935 */ 936 void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 937 u32 port_num, struct scatterlist *sg, u32 sg_cnt, 938 enum dma_data_direction dir) 939 { 940 int i; 941 942 switch (ctx->type) { 943 case RDMA_RW_MR: 944 /* Bvec MR contexts must use rdma_rw_ctx_destroy_bvec() */ 945 WARN_ON_ONCE(ctx->reg[0].sgt.sgl); 946 for (i = 0; i < ctx->nr_ops; i++) 947 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 948 kfree(ctx->reg); 949 break; 950 case RDMA_RW_MULTI_WR: 951 kfree(ctx->map.wrs); 952 kfree(ctx->map.sges); 953 break; 954 case RDMA_RW_SINGLE_WR: 955 break; 956 case RDMA_RW_IOVA: 957 /* IOVA contexts must use rdma_rw_ctx_destroy_bvec() */ 958 WARN_ON_ONCE(1); 959 return; 960 default: 961 BUG(); 962 break; 963 } 964 965 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 966 } 967 EXPORT_SYMBOL(rdma_rw_ctx_destroy); 968 969 /** 970 * rdma_rw_ctx_destroy_bvec - release resources from rdma_rw_ctx_init_bvec 971 * @ctx: context to release 972 * @qp: queue pair to operate on 973 * @port_num: port num to which the connection is bound (unused) 974 * @bvecs: bio_vec array that was used for the READ/WRITE (unused) 975 * @nr_bvec: number of entries in @bvecs 976 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 977 * 978 * Releases all resources allocated by a successful rdma_rw_ctx_init_bvec() 979 * call. Must not be called if rdma_rw_ctx_init_bvec() returned an error. 980 * 981 * The @port_num and @bvecs parameters are unused but present for API 982 * symmetry with rdma_rw_ctx_destroy(). 983 */ 984 void rdma_rw_ctx_destroy_bvec(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 985 u32 __maybe_unused port_num, 986 const struct bio_vec __maybe_unused *bvecs, 987 u32 nr_bvec, enum dma_data_direction dir) 988 { 989 struct ib_device *dev = qp->pd->device; 990 u32 i; 991 992 switch (ctx->type) { 993 case RDMA_RW_MR: 994 for (i = 0; i < ctx->nr_ops; i++) 995 ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr); 996 ib_dma_unmap_sgtable_attrs(dev, &ctx->reg[0].sgt, dir, 0); 997 kfree(ctx->reg[0].sgt.sgl); 998 kfree(ctx->reg); 999 break; 1000 case RDMA_RW_IOVA: 1001 dma_iova_destroy(dev->dma_device, &ctx->iova.state, 1002 ctx->iova.mapped_len, dir, 0); 1003 break; 1004 case RDMA_RW_MULTI_WR: 1005 for (i = 0; i < nr_bvec; i++) 1006 ib_dma_unmap_bvec(dev, ctx->map.sges[i].addr, 1007 ctx->map.sges[i].length, dir); 1008 kfree(ctx->map.sges); 1009 break; 1010 case RDMA_RW_SINGLE_WR: 1011 ib_dma_unmap_bvec(dev, ctx->single.sge.addr, 1012 ctx->single.sge.length, dir); 1013 break; 1014 default: 1015 WARN_ON_ONCE(1); 1016 return; 1017 } 1018 } 1019 EXPORT_SYMBOL(rdma_rw_ctx_destroy_bvec); 1020 1021 /** 1022 * rdma_rw_ctx_destroy_signature - release all resources allocated by 1023 * rdma_rw_ctx_signature_init 1024 * @ctx: context to release 1025 * @qp: queue pair to operate on 1026 * @port_num: port num to which the connection is bound 1027 * @sg: scatterlist that was used for the READ/WRITE 1028 * @sg_cnt: number of entries in @sg 1029 * @prot_sg: scatterlist that was used for the READ/WRITE of the PI 1030 * @prot_sg_cnt: number of entries in @prot_sg 1031 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ 1032 */ 1033 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, 1034 u32 port_num, struct scatterlist *sg, u32 sg_cnt, 1035 struct scatterlist *prot_sg, u32 prot_sg_cnt, 1036 enum dma_data_direction dir) 1037 { 1038 if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) 1039 return; 1040 1041 ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); 1042 kfree(ctx->reg); 1043 1044 if (prot_sg_cnt) 1045 ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); 1046 ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); 1047 } 1048 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); 1049 1050 /** 1051 * rdma_rw_mr_factor - return number of MRs required for a payload 1052 * @device: device handling the connection 1053 * @port_num: port num to which the connection is bound 1054 * @maxpages: maximum payload pages per rdma_rw_ctx 1055 * 1056 * Returns the number of MRs the device requires to move @maxpayload 1057 * bytes. The returned value is used during transport creation to 1058 * compute max_rdma_ctxts and the size of the transport's Send and 1059 * Send Completion Queues. 1060 */ 1061 unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num, 1062 unsigned int maxpages) 1063 { 1064 unsigned int mr_pages; 1065 1066 if (rdma_rw_can_use_mr(device, port_num)) 1067 mr_pages = rdma_rw_fr_page_list_len(device, false); 1068 else 1069 mr_pages = device->attrs.max_sge_rd; 1070 return DIV_ROUND_UP(maxpages, mr_pages); 1071 } 1072 EXPORT_SYMBOL(rdma_rw_mr_factor); 1073 1074 /** 1075 * rdma_rw_max_send_wr - compute max Send WRs needed for RDMA R/W contexts 1076 * @dev: RDMA device 1077 * @port_num: port number 1078 * @max_rdma_ctxs: number of rdma_rw_ctx structures 1079 * @create_flags: QP create flags (pass IB_QP_CREATE_INTEGRITY_EN if 1080 * data integrity will be enabled on the QP) 1081 * 1082 * Returns the total number of Send Queue entries needed for 1083 * @max_rdma_ctxs. The result accounts for memory registration and 1084 * invalidation work requests when the device requires them. 1085 * 1086 * ULPs use this to size Send Queues and Send CQs before creating a 1087 * Queue Pair. 1088 */ 1089 unsigned int rdma_rw_max_send_wr(struct ib_device *dev, u32 port_num, 1090 unsigned int max_rdma_ctxs, u32 create_flags) 1091 { 1092 unsigned int factor = 1; 1093 unsigned int result; 1094 1095 if (create_flags & IB_QP_CREATE_INTEGRITY_EN || 1096 rdma_rw_can_use_mr(dev, port_num)) 1097 factor += 2; /* reg + inv */ 1098 1099 if (check_mul_overflow(factor, max_rdma_ctxs, &result)) 1100 return UINT_MAX; 1101 return result; 1102 } 1103 EXPORT_SYMBOL(rdma_rw_max_send_wr); 1104 1105 void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) 1106 { 1107 unsigned int factor = 1; 1108 1109 WARN_ON_ONCE(attr->port_num == 0); 1110 1111 /* 1112 * If the device uses MRs to perform RDMA READ or WRITE operations, 1113 * or if data integrity is enabled, account for registration and 1114 * invalidation work requests. 1115 */ 1116 if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN || 1117 rdma_rw_can_use_mr(dev, attr->port_num)) 1118 factor += 2; /* reg + inv */ 1119 1120 attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; 1121 1122 /* 1123 * The device might not support all we need, and we'll have to 1124 * live with what we get. 1125 */ 1126 attr->cap.max_send_wr = 1127 min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr); 1128 } 1129 1130 int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) 1131 { 1132 struct ib_device *dev = qp->pd->device; 1133 u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0; 1134 int ret = 0; 1135 1136 if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { 1137 nr_sig_mrs = attr->cap.max_rdma_ctxs; 1138 nr_mrs = attr->cap.max_rdma_ctxs; 1139 max_num_sg = rdma_rw_fr_page_list_len(dev, true); 1140 } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { 1141 nr_mrs = attr->cap.max_rdma_ctxs; 1142 max_num_sg = rdma_rw_fr_page_list_len(dev, false); 1143 } 1144 1145 if (nr_mrs) { 1146 ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, 1147 IB_MR_TYPE_MEM_REG, 1148 max_num_sg, 0); 1149 if (ret) { 1150 pr_err("%s: failed to allocated %u MRs\n", 1151 __func__, nr_mrs); 1152 return ret; 1153 } 1154 } 1155 1156 if (nr_sig_mrs) { 1157 ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, 1158 IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg); 1159 if (ret) { 1160 pr_err("%s: failed to allocated %u SIG MRs\n", 1161 __func__, nr_sig_mrs); 1162 goto out_free_rdma_mrs; 1163 } 1164 } 1165 1166 return 0; 1167 1168 out_free_rdma_mrs: 1169 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 1170 return ret; 1171 } 1172 1173 void rdma_rw_cleanup_mrs(struct ib_qp *qp) 1174 { 1175 ib_mr_pool_destroy(qp, &qp->sig_mrs); 1176 ib_mr_pool_destroy(qp, &qp->rdma_mrs); 1177 } 1178