1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ 3 4 #include <rdma/ib_umem_odp.h> 5 #include "mlx5_ib.h" 6 #include "umr.h" 7 #include "wr.h" 8 9 /* 10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't 11 * work on kernel modules memory 12 */ 13 void *xlt_emergency_page; 14 static DEFINE_MUTEX(xlt_emergency_page_mutex); 15 16 static __be64 get_umr_enable_mr_mask(void) 17 { 18 u64 result; 19 20 result = MLX5_MKEY_MASK_KEY | 21 MLX5_MKEY_MASK_FREE; 22 23 return cpu_to_be64(result); 24 } 25 26 static __be64 get_umr_disable_mr_mask(void) 27 { 28 u64 result; 29 30 result = MLX5_MKEY_MASK_FREE; 31 32 return cpu_to_be64(result); 33 } 34 35 static __be64 get_umr_update_translation_mask(void) 36 { 37 u64 result; 38 39 result = MLX5_MKEY_MASK_LEN | 40 MLX5_MKEY_MASK_PAGE_SIZE | 41 MLX5_MKEY_MASK_START_ADDR; 42 43 return cpu_to_be64(result); 44 } 45 46 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) 47 { 48 u64 result; 49 50 result = MLX5_MKEY_MASK_LR | 51 MLX5_MKEY_MASK_LW | 52 MLX5_MKEY_MASK_RR | 53 MLX5_MKEY_MASK_RW; 54 55 if (MLX5_CAP_GEN(dev->mdev, atomic)) 56 result |= MLX5_MKEY_MASK_A; 57 58 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 59 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; 60 61 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 62 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; 63 64 return cpu_to_be64(result); 65 } 66 67 static __be64 get_umr_update_pd_mask(void) 68 { 69 u64 result; 70 71 result = MLX5_MKEY_MASK_PD; 72 73 return cpu_to_be64(result); 74 } 75 76 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) 77 { 78 if (mask & MLX5_MKEY_MASK_PAGE_SIZE && 79 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 80 return -EPERM; 81 82 if (mask & MLX5_MKEY_MASK_A && 83 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) 84 return -EPERM; 85 86 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && 87 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) 88 return -EPERM; 89 90 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && 91 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) 92 return -EPERM; 93 94 return 0; 95 } 96 97 enum { 98 MAX_UMR_WR = 128, 99 }; 100 101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) 102 { 103 struct ib_qp_attr attr = {}; 104 int ret; 105 106 attr.qp_state = IB_QPS_INIT; 107 attr.port_num = 1; 108 ret = ib_modify_qp(qp, &attr, 109 IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); 110 if (ret) { 111 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 112 return ret; 113 } 114 115 memset(&attr, 0, sizeof(attr)); 116 attr.qp_state = IB_QPS_RTR; 117 118 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 119 if (ret) { 120 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 121 return ret; 122 } 123 124 memset(&attr, 0, sizeof(attr)); 125 attr.qp_state = IB_QPS_RTS; 126 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 127 if (ret) { 128 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 129 return ret; 130 } 131 132 return 0; 133 } 134 135 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) 136 { 137 struct ib_qp_init_attr init_attr = {}; 138 struct ib_cq *cq; 139 struct ib_qp *qp; 140 int ret = 0; 141 142 143 /* 144 * UMR qp is set once, never changed until device unload. 145 * Avoid taking the mutex if initialization is already done. 146 */ 147 if (dev->umrc.qp) 148 return 0; 149 150 mutex_lock(&dev->umrc.init_lock); 151 /* First user allocates the UMR resources. Skip if already allocated. */ 152 if (dev->umrc.qp) 153 goto unlock; 154 155 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 156 if (IS_ERR(cq)) { 157 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 158 ret = PTR_ERR(cq); 159 goto unlock; 160 } 161 162 init_attr.send_cq = cq; 163 init_attr.recv_cq = cq; 164 init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; 165 init_attr.cap.max_send_wr = MAX_UMR_WR; 166 init_attr.cap.max_send_sge = 1; 167 init_attr.qp_type = MLX5_IB_QPT_REG_UMR; 168 init_attr.port_num = 1; 169 qp = ib_create_qp(dev->umrc.pd, &init_attr); 170 if (IS_ERR(qp)) { 171 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 172 ret = PTR_ERR(qp); 173 goto destroy_cq; 174 } 175 176 ret = mlx5r_umr_qp_rst2rts(dev, qp); 177 if (ret) 178 goto destroy_qp; 179 180 dev->umrc.cq = cq; 181 182 sema_init(&dev->umrc.sem, MAX_UMR_WR); 183 mutex_init(&dev->umrc.lock); 184 dev->umrc.state = MLX5_UMR_STATE_ACTIVE; 185 dev->umrc.qp = qp; 186 187 mutex_unlock(&dev->umrc.init_lock); 188 return 0; 189 190 destroy_qp: 191 ib_destroy_qp(qp); 192 destroy_cq: 193 ib_free_cq(cq); 194 unlock: 195 mutex_unlock(&dev->umrc.init_lock); 196 return ret; 197 } 198 199 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) 200 { 201 if (dev->umrc.state == MLX5_UMR_STATE_UNINIT) 202 return; 203 mutex_destroy(&dev->umrc.lock); 204 /* After device init, UMR cp/qp are not unset during the lifetime. */ 205 ib_destroy_qp(dev->umrc.qp); 206 ib_free_cq(dev->umrc.cq); 207 } 208 209 int mlx5r_umr_init(struct mlx5_ib_dev *dev) 210 { 211 struct ib_pd *pd; 212 213 pd = ib_alloc_pd(&dev->ib_dev, 0); 214 if (IS_ERR(pd)) { 215 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 216 return PTR_ERR(pd); 217 } 218 dev->umrc.pd = pd; 219 220 mutex_init(&dev->umrc.init_lock); 221 222 return 0; 223 } 224 225 void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) 226 { 227 if (!dev->umrc.pd) 228 return; 229 230 mutex_destroy(&dev->umrc.init_lock); 231 ib_dealloc_pd(dev->umrc.pd); 232 } 233 234 235 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, 236 struct mlx5r_umr_wqe *wqe, bool with_data) 237 { 238 unsigned int wqe_size = 239 with_data ? sizeof(struct mlx5r_umr_wqe) : 240 sizeof(struct mlx5r_umr_wqe) - 241 sizeof(struct mlx5_wqe_data_seg); 242 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 243 struct mlx5_core_dev *mdev = dev->mdev; 244 struct mlx5_ib_qp *qp = to_mqp(ibqp); 245 struct mlx5_wqe_ctrl_seg *ctrl; 246 union { 247 struct ib_cqe *ib_cqe; 248 u64 wr_id; 249 } id; 250 void *cur_edge, *seg; 251 unsigned long flags; 252 unsigned int idx; 253 int size, err; 254 255 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 256 return -EIO; 257 258 spin_lock_irqsave(&qp->sq.lock, flags); 259 260 err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, 261 cpu_to_be32(mkey), false, false); 262 if (WARN_ON(err)) 263 goto out; 264 265 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 266 267 mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); 268 269 id.ib_cqe = cqe; 270 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, 271 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); 272 273 mlx5r_ring_db(qp, 1, ctrl); 274 275 out: 276 spin_unlock_irqrestore(&qp->sq.lock, flags); 277 278 return err; 279 } 280 281 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, 282 struct mlx5r_umr_context *umr_context, 283 struct mlx5r_umr_wqe *wqe, bool with_data) 284 { 285 struct umr_common *umrc = &dev->umrc; 286 struct ib_qp_attr attr; 287 int err; 288 289 mutex_lock(&umrc->lock); 290 /* Preventing any further WRs to be sent now */ 291 if (umrc->state != MLX5_UMR_STATE_RECOVER) { 292 mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", 293 umrc->state); 294 umrc->state = MLX5_UMR_STATE_RECOVER; 295 } 296 mutex_unlock(&umrc->lock); 297 298 /* Sending a final/barrier WR (the failed one) and wait for its completion. 299 * This will ensure that all the previous WRs got a completion before 300 * we set the QP state to RESET. 301 */ 302 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, 303 with_data); 304 if (err) { 305 mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); 306 goto err; 307 } 308 309 /* Since the QP is in an error state, it will only receive 310 * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier 311 * we don't care about its status. 312 */ 313 wait_for_completion(&umr_context->done); 314 315 attr.qp_state = IB_QPS_RESET; 316 err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); 317 if (err) { 318 mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); 319 goto err; 320 } 321 322 err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); 323 if (err) { 324 mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); 325 goto err; 326 } 327 328 umrc->state = MLX5_UMR_STATE_ACTIVE; 329 return 0; 330 331 err: 332 umrc->state = MLX5_UMR_STATE_ERR; 333 return err; 334 } 335 336 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) 337 { 338 struct mlx5_ib_umr_context *context = 339 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 340 341 context->status = wc->status; 342 complete(&context->done); 343 } 344 345 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) 346 { 347 context->cqe.done = mlx5r_umr_done; 348 init_completion(&context->done); 349 } 350 351 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, 352 struct mlx5r_umr_wqe *wqe, bool with_data) 353 { 354 struct umr_common *umrc = &dev->umrc; 355 struct mlx5r_umr_context umr_context; 356 int err; 357 358 err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); 359 if (WARN_ON(err)) 360 return err; 361 362 mlx5r_umr_init_context(&umr_context); 363 364 down(&umrc->sem); 365 while (true) { 366 mutex_lock(&umrc->lock); 367 if (umrc->state == MLX5_UMR_STATE_ERR) { 368 mutex_unlock(&umrc->lock); 369 err = -EFAULT; 370 break; 371 } 372 373 if (umrc->state == MLX5_UMR_STATE_RECOVER) { 374 mutex_unlock(&umrc->lock); 375 usleep_range(3000, 5000); 376 continue; 377 } 378 379 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, 380 with_data); 381 mutex_unlock(&umrc->lock); 382 if (err) { 383 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", 384 err); 385 break; 386 } 387 388 wait_for_completion(&umr_context.done); 389 390 if (umr_context.status == IB_WC_SUCCESS) 391 break; 392 393 if (umr_context.status == IB_WC_WR_FLUSH_ERR) 394 continue; 395 396 WARN_ON_ONCE(1); 397 mlx5_ib_warn(dev, 398 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", 399 umr_context.status, mkey); 400 err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); 401 if (err) 402 mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", 403 err); 404 err = -EFAULT; 405 break; 406 } 407 up(&umrc->sem); 408 return err; 409 } 410 411 /** 412 * mlx5r_umr_revoke_mr - Fence all DMA on the MR 413 * @mr: The MR to fence 414 * 415 * Upon return the NIC will not be doing any DMA to the pages under the MR, 416 * and any DMA in progress will be completed. Failure of this function 417 * indicates the HW has failed catastrophically. 418 */ 419 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) 420 { 421 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 422 struct mlx5r_umr_wqe wqe = {}; 423 424 if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 425 return 0; 426 427 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 428 wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); 429 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 430 431 MLX5_SET(mkc, &wqe.mkey_seg, free, 1); 432 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); 433 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 434 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 435 mlx5_mkey_variant(mr->mmkey.key)); 436 437 return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 438 } 439 440 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, 441 struct mlx5_mkey_seg *seg, 442 unsigned int access_flags) 443 { 444 bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) && 445 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || 446 pcie_relaxed_ordering_enabled(dev->mdev->pdev)); 447 448 MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 449 MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 450 MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 451 MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 452 MLX5_SET(mkc, seg, lr, 1); 453 MLX5_SET(mkc, seg, relaxed_ordering_write, 454 !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 455 MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read); 456 } 457 458 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, 459 int access_flags) 460 { 461 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 462 struct mlx5r_umr_wqe wqe = {}; 463 int err; 464 465 wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); 466 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 467 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; 468 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; 469 470 mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); 471 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); 472 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); 473 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, 474 mlx5_mkey_variant(mr->mmkey.key)); 475 476 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); 477 if (err) 478 return err; 479 480 mr->access_flags = access_flags; 481 return 0; 482 } 483 484 #define MLX5_MAX_UMR_CHUNK \ 485 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT) 486 #define MLX5_SPARE_UMR_CHUNK 0x10000 487 488 /* 489 * Allocate a temporary buffer to hold the per-page information to transfer to 490 * HW. For efficiency this should be as large as it can be, but buffer 491 * allocation failure is not allowed, so try smaller sizes. 492 */ 493 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) 494 { 495 const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size; 496 size_t size; 497 void *res = NULL; 498 499 static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0); 500 501 /* 502 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the 503 * allocation can't trigger any kind of reclaim. 504 */ 505 might_sleep(); 506 507 gfp_mask |= __GFP_ZERO | __GFP_NORETRY; 508 509 /* 510 * If the system already has a suitable high order page then just use 511 * that, but don't try hard to create one. This max is about 1M, so a 512 * free x86 huge page will satisfy it. 513 */ 514 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), 515 MLX5_MAX_UMR_CHUNK); 516 *nents = size / ent_size; 517 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 518 get_order(size)); 519 if (res) 520 return res; 521 522 if (size > MLX5_SPARE_UMR_CHUNK) { 523 size = MLX5_SPARE_UMR_CHUNK; 524 *nents = size / ent_size; 525 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, 526 get_order(size)); 527 if (res) 528 return res; 529 } 530 531 *nents = PAGE_SIZE / ent_size; 532 res = (void *)__get_free_page(gfp_mask); 533 if (res) 534 return res; 535 536 mutex_lock(&xlt_emergency_page_mutex); 537 memset(xlt_emergency_page, 0, PAGE_SIZE); 538 return xlt_emergency_page; 539 } 540 541 static void mlx5r_umr_free_xlt(void *xlt, size_t length) 542 { 543 if (xlt == xlt_emergency_page) { 544 mutex_unlock(&xlt_emergency_page_mutex); 545 return; 546 } 547 548 free_pages((unsigned long)xlt, get_order(length)); 549 } 550 551 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, 552 struct ib_sge *sg) 553 { 554 struct device *ddev = &dev->mdev->pdev->dev; 555 556 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); 557 mlx5r_umr_free_xlt(xlt, sg->length); 558 } 559 560 /* 561 * Create an XLT buffer ready for submission. 562 */ 563 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, 564 size_t nents, size_t ent_size, 565 unsigned int flags) 566 { 567 struct device *ddev = &dev->mdev->pdev->dev; 568 dma_addr_t dma; 569 void *xlt; 570 571 xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, 572 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : 573 GFP_KERNEL); 574 sg->length = nents * ent_size; 575 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); 576 if (dma_mapping_error(ddev, dma)) { 577 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 578 mlx5r_umr_free_xlt(xlt, sg->length); 579 return NULL; 580 } 581 sg->addr = dma; 582 sg->lkey = dev->umrc.pd->local_dma_lkey; 583 584 return xlt; 585 } 586 587 static void 588 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 589 unsigned int flags, struct ib_sge *sg) 590 { 591 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 592 /* fail if free */ 593 ctrl_seg->flags = MLX5_UMR_CHECK_FREE; 594 else 595 /* fail if not free */ 596 ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; 597 ctrl_seg->xlt_octowords = 598 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 599 } 600 601 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, 602 struct mlx5_mkey_seg *mkey_seg, 603 struct mlx5_ib_mr *mr, 604 unsigned int page_shift) 605 { 606 mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); 607 MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); 608 MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); 609 MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); 610 MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); 611 MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); 612 MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); 613 } 614 615 static void 616 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, 617 struct ib_sge *sg) 618 { 619 data_seg->byte_count = cpu_to_be32(sg->length); 620 data_seg->lkey = cpu_to_be32(sg->lkey); 621 data_seg->addr = cpu_to_be64(sg->addr); 622 } 623 624 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, 625 u64 offset) 626 { 627 u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); 628 629 ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); 630 ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); 631 ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; 632 } 633 634 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, 635 struct mlx5r_umr_wqe *wqe, 636 struct mlx5_ib_mr *mr, struct ib_sge *sg, 637 unsigned int flags) 638 { 639 bool update_pd_access, update_translation; 640 641 if (flags & MLX5_IB_UPD_XLT_ENABLE) 642 wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); 643 644 update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || 645 flags & MLX5_IB_UPD_XLT_PD || 646 flags & MLX5_IB_UPD_XLT_ACCESS; 647 648 if (update_pd_access) { 649 wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); 650 wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); 651 } 652 653 update_translation = 654 flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; 655 656 if (update_translation) { 657 wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); 658 if (!mr->ibmr.length) 659 MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); 660 } 661 662 wqe->ctrl_seg.xlt_octowords = 663 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); 664 wqe->data_seg.byte_count = cpu_to_be32(sg->length); 665 } 666 667 static int 668 _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd) 669 { 670 size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt); 671 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 672 struct device *ddev = &dev->mdev->pdev->dev; 673 struct mlx5r_umr_wqe wqe = {}; 674 struct ib_block_iter biter; 675 struct mlx5_ksm *cur_ksm; 676 struct mlx5_mtt *cur_mtt; 677 size_t orig_sg_length; 678 size_t final_size; 679 void *curr_entry; 680 struct ib_sge sg; 681 void *entry; 682 u64 offset = 0; 683 int err = 0; 684 685 entry = mlx5r_umr_create_xlt(dev, &sg, 686 ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), 687 ent_size, flags); 688 if (!entry) 689 return -ENOMEM; 690 691 orig_sg_length = sg.length; 692 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 693 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, 694 mr->page_shift); 695 if (dd) { 696 /* Use the data direct internal kernel PD */ 697 MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn); 698 cur_ksm = entry; 699 } else { 700 cur_mtt = entry; 701 } 702 703 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 704 705 curr_entry = entry; 706 rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) { 707 if (curr_entry == entry + sg.length) { 708 dma_sync_single_for_device(ddev, sg.addr, sg.length, 709 DMA_TO_DEVICE); 710 711 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, 712 true); 713 if (err) 714 goto err; 715 dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 716 DMA_TO_DEVICE); 717 offset += sg.length; 718 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); 719 if (dd) 720 cur_ksm = entry; 721 else 722 cur_mtt = entry; 723 } 724 725 if (dd) { 726 cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter)); 727 cur_ksm->key = cpu_to_be32(dev->ddr.mkey); 728 cur_ksm++; 729 curr_entry = cur_ksm; 730 } else { 731 cur_mtt->ptag = 732 cpu_to_be64(rdma_block_iter_dma_address(&biter) | 733 MLX5_IB_MTT_PRESENT); 734 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) 735 cur_mtt->ptag = 0; 736 cur_mtt++; 737 curr_entry = cur_mtt; 738 } 739 } 740 741 final_size = curr_entry - entry; 742 sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT); 743 memset(curr_entry, 0, sg.length - final_size); 744 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 745 746 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); 747 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 748 749 err: 750 sg.length = orig_sg_length; 751 mlx5r_umr_unmap_free_xlt(dev, entry, &sg); 752 return err; 753 } 754 755 int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags) 756 { 757 /* No invalidation flow is expected */ 758 if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP)) 759 return -EINVAL; 760 761 return _mlx5r_umr_update_mr_pas(mr, flags, true); 762 } 763 764 /* 765 * Send the DMA list to the HW for a normal MR using UMR. 766 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP 767 * flag may be used. 768 */ 769 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) 770 { 771 if (WARN_ON(mr->umem->is_odp)) 772 return -EINVAL; 773 774 return _mlx5r_umr_update_mr_pas(mr, flags, false); 775 } 776 777 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 778 { 779 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 780 } 781 782 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 783 int page_shift, int flags) 784 { 785 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 786 ? sizeof(struct mlx5_klm) 787 : sizeof(struct mlx5_mtt); 788 const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size; 789 struct mlx5_ib_dev *dev = mr_to_mdev(mr); 790 struct device *ddev = &dev->mdev->pdev->dev; 791 const int page_mask = page_align - 1; 792 struct mlx5r_umr_wqe wqe = {}; 793 size_t pages_mapped = 0; 794 size_t pages_to_map = 0; 795 size_t size_to_map = 0; 796 size_t orig_sg_length; 797 size_t pages_iter; 798 struct ib_sge sg; 799 int err = 0; 800 void *xlt; 801 802 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 803 !umr_can_use_indirect_mkey(dev)) 804 return -EPERM; 805 806 if (WARN_ON(!mr->umem->is_odp)) 807 return -EINVAL; 808 809 /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes, 810 * so we need to align the offset and length accordingly 811 */ 812 if (idx & page_mask) { 813 npages += idx & page_mask; 814 idx &= ~page_mask; 815 } 816 pages_to_map = ALIGN(npages, page_align); 817 818 xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); 819 if (!xlt) 820 return -ENOMEM; 821 822 pages_iter = sg.length / desc_size; 823 orig_sg_length = sg.length; 824 825 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 826 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 827 size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 828 829 pages_to_map = min_t(size_t, pages_to_map, max_pages); 830 } 831 832 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); 833 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); 834 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); 835 836 for (pages_mapped = 0; 837 pages_mapped < pages_to_map && !err; 838 pages_mapped += pages_iter, idx += pages_iter) { 839 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 840 size_to_map = npages * desc_size; 841 dma_sync_single_for_cpu(ddev, sg.addr, sg.length, 842 DMA_TO_DEVICE); 843 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 844 dma_sync_single_for_device(ddev, sg.addr, sg.length, 845 DMA_TO_DEVICE); 846 sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT); 847 848 if (pages_mapped + pages_iter >= pages_to_map) 849 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); 850 mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); 851 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); 852 } 853 sg.length = orig_sg_length; 854 mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); 855 return err; 856 } 857