1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 28 29 switch (mr->type) { 30 case IB_MR_TYPE_DMA: 31 return 0; 32 33 case IB_MR_TYPE_USER: 34 case IB_MR_TYPE_MEM_REG: 35 if (iova < mr->ibmr.iova || length > mr->ibmr.length || 36 iova > mr->ibmr.iova + mr->ibmr.length - length) 37 return -EFAULT; 38 return 0; 39 40 default: 41 pr_warn("%s: mr type (%d) not supported\n", 42 __func__, mr->type); 43 return -EFAULT; 44 } 45 } 46 47 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 48 | IB_ACCESS_REMOTE_WRITE \ 49 | IB_ACCESS_REMOTE_ATOMIC) 50 51 static void rxe_mr_init(int access, struct rxe_mr *mr) 52 { 53 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); 54 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 55 56 /* set ibmr->l/rkey and also copy into private l/rkey 57 * for user MRs these will always be the same 58 * for cases where caller 'owns' the key portion 59 * they may be different until REG_MR WQE is executed. 60 */ 61 mr->lkey = mr->ibmr.lkey = lkey; 62 mr->rkey = mr->ibmr.rkey = rkey; 63 64 mr->state = RXE_MR_STATE_INVALID; 65 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 66 } 67 68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 69 { 70 int i; 71 int num_map; 72 struct rxe_map **map = mr->map; 73 74 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 75 76 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 77 if (!mr->map) 78 goto err1; 79 80 for (i = 0; i < num_map; i++) { 81 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 82 if (!mr->map[i]) 83 goto err2; 84 } 85 86 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 87 88 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 89 mr->map_mask = RXE_BUF_PER_MAP - 1; 90 91 mr->num_buf = num_buf; 92 mr->num_map = num_map; 93 mr->max_buf = num_map * RXE_BUF_PER_MAP; 94 95 return 0; 96 97 err2: 98 for (i--; i >= 0; i--) 99 kfree(mr->map[i]); 100 101 kfree(mr->map); 102 err1: 103 return -ENOMEM; 104 } 105 106 void rxe_mr_init_dma(int access, struct rxe_mr *mr) 107 { 108 rxe_mr_init(access, mr); 109 110 mr->access = access; 111 mr->state = RXE_MR_STATE_VALID; 112 mr->type = IB_MR_TYPE_DMA; 113 } 114 115 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, 116 int access, struct rxe_mr *mr) 117 { 118 struct rxe_map **map; 119 struct rxe_phys_buf *buf = NULL; 120 struct ib_umem *umem; 121 struct sg_page_iter sg_iter; 122 int num_buf; 123 void *vaddr; 124 int err; 125 int i; 126 127 umem = ib_umem_get(&rxe->ib_dev, start, length, access); 128 if (IS_ERR(umem)) { 129 pr_warn("%s: Unable to pin memory region err = %d\n", 130 __func__, (int)PTR_ERR(umem)); 131 err = PTR_ERR(umem); 132 goto err_out; 133 } 134 135 num_buf = ib_umem_num_pages(umem); 136 137 rxe_mr_init(access, mr); 138 139 err = rxe_mr_alloc(mr, num_buf); 140 if (err) { 141 pr_warn("%s: Unable to allocate memory for map\n", 142 __func__); 143 goto err_release_umem; 144 } 145 146 mr->page_shift = PAGE_SHIFT; 147 mr->page_mask = PAGE_SIZE - 1; 148 149 num_buf = 0; 150 map = mr->map; 151 if (length > 0) { 152 buf = map[0]->buf; 153 154 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { 155 if (num_buf >= RXE_BUF_PER_MAP) { 156 map++; 157 buf = map[0]->buf; 158 num_buf = 0; 159 } 160 161 vaddr = page_address(sg_page_iter_page(&sg_iter)); 162 if (!vaddr) { 163 pr_warn("%s: Unable to get virtual address\n", 164 __func__); 165 err = -ENOMEM; 166 goto err_cleanup_map; 167 } 168 169 buf->addr = (uintptr_t)vaddr; 170 buf->size = PAGE_SIZE; 171 num_buf++; 172 buf++; 173 174 } 175 } 176 177 mr->umem = umem; 178 mr->access = access; 179 mr->offset = ib_umem_offset(umem); 180 mr->state = RXE_MR_STATE_VALID; 181 mr->type = IB_MR_TYPE_USER; 182 183 return 0; 184 185 err_cleanup_map: 186 for (i = 0; i < mr->num_map; i++) 187 kfree(mr->map[i]); 188 kfree(mr->map); 189 err_release_umem: 190 ib_umem_release(umem); 191 err_out: 192 return err; 193 } 194 195 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) 196 { 197 int err; 198 199 /* always allow remote access for FMRs */ 200 rxe_mr_init(IB_ACCESS_REMOTE, mr); 201 202 err = rxe_mr_alloc(mr, max_pages); 203 if (err) 204 goto err1; 205 206 mr->max_buf = max_pages; 207 mr->state = RXE_MR_STATE_FREE; 208 mr->type = IB_MR_TYPE_MEM_REG; 209 210 return 0; 211 212 err1: 213 return err; 214 } 215 216 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 217 size_t *offset_out) 218 { 219 size_t offset = iova - mr->ibmr.iova + mr->offset; 220 int map_index; 221 int buf_index; 222 u64 length; 223 224 if (likely(mr->page_shift)) { 225 *offset_out = offset & mr->page_mask; 226 offset >>= mr->page_shift; 227 *n_out = offset & mr->map_mask; 228 *m_out = offset >> mr->map_shift; 229 } else { 230 map_index = 0; 231 buf_index = 0; 232 233 length = mr->map[map_index]->buf[buf_index].size; 234 235 while (offset >= length) { 236 offset -= length; 237 buf_index++; 238 239 if (buf_index == RXE_BUF_PER_MAP) { 240 map_index++; 241 buf_index = 0; 242 } 243 length = mr->map[map_index]->buf[buf_index].size; 244 } 245 246 *m_out = map_index; 247 *n_out = buf_index; 248 *offset_out = offset; 249 } 250 } 251 252 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 253 { 254 size_t offset; 255 int m, n; 256 void *addr; 257 258 if (mr->state != RXE_MR_STATE_VALID) { 259 pr_warn("mr not in valid state\n"); 260 addr = NULL; 261 goto out; 262 } 263 264 if (!mr->map) { 265 addr = (void *)(uintptr_t)iova; 266 goto out; 267 } 268 269 if (mr_check_range(mr, iova, length)) { 270 pr_warn("range violation\n"); 271 addr = NULL; 272 goto out; 273 } 274 275 lookup_iova(mr, iova, &m, &n, &offset); 276 277 if (offset + length > mr->map[m]->buf[n].size) { 278 pr_warn("crosses page boundary\n"); 279 addr = NULL; 280 goto out; 281 } 282 283 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 284 285 out: 286 return addr; 287 } 288 289 /* copy data from a range (vaddr, vaddr+length-1) to or from 290 * a mr object starting at iova. 291 */ 292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 293 enum rxe_mr_copy_dir dir) 294 { 295 int err; 296 int bytes; 297 u8 *va; 298 struct rxe_map **map; 299 struct rxe_phys_buf *buf; 300 int m; 301 int i; 302 size_t offset; 303 304 if (length == 0) 305 return 0; 306 307 if (mr->type == IB_MR_TYPE_DMA) { 308 u8 *src, *dest; 309 310 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 311 312 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 313 314 memcpy(dest, src, length); 315 316 return 0; 317 } 318 319 WARN_ON_ONCE(!mr->map); 320 321 err = mr_check_range(mr, iova, length); 322 if (err) { 323 err = -EFAULT; 324 goto err1; 325 } 326 327 lookup_iova(mr, iova, &m, &i, &offset); 328 329 map = mr->map + m; 330 buf = map[0]->buf + i; 331 332 while (length > 0) { 333 u8 *src, *dest; 334 335 va = (u8 *)(uintptr_t)buf->addr + offset; 336 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 337 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 338 339 bytes = buf->size - offset; 340 341 if (bytes > length) 342 bytes = length; 343 344 memcpy(dest, src, bytes); 345 346 length -= bytes; 347 addr += bytes; 348 349 offset = 0; 350 buf++; 351 i++; 352 353 if (i == RXE_BUF_PER_MAP) { 354 i = 0; 355 map++; 356 buf = map[0]->buf; 357 } 358 } 359 360 return 0; 361 362 err1: 363 return err; 364 } 365 366 /* copy data in or out of a wqe, i.e. sg list 367 * under the control of a dma descriptor 368 */ 369 int copy_data( 370 struct rxe_pd *pd, 371 int access, 372 struct rxe_dma_info *dma, 373 void *addr, 374 int length, 375 enum rxe_mr_copy_dir dir) 376 { 377 int bytes; 378 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 379 int offset = dma->sge_offset; 380 int resid = dma->resid; 381 struct rxe_mr *mr = NULL; 382 u64 iova; 383 int err; 384 385 if (length == 0) 386 return 0; 387 388 if (length > resid) { 389 err = -EINVAL; 390 goto err2; 391 } 392 393 if (sge->length && (offset < sge->length)) { 394 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 395 if (!mr) { 396 err = -EINVAL; 397 goto err1; 398 } 399 } 400 401 while (length > 0) { 402 bytes = length; 403 404 if (offset >= sge->length) { 405 if (mr) { 406 rxe_put(mr); 407 mr = NULL; 408 } 409 sge++; 410 dma->cur_sge++; 411 offset = 0; 412 413 if (dma->cur_sge >= dma->num_sge) { 414 err = -ENOSPC; 415 goto err2; 416 } 417 418 if (sge->length) { 419 mr = lookup_mr(pd, access, sge->lkey, 420 RXE_LOOKUP_LOCAL); 421 if (!mr) { 422 err = -EINVAL; 423 goto err1; 424 } 425 } else { 426 continue; 427 } 428 } 429 430 if (bytes > sge->length - offset) 431 bytes = sge->length - offset; 432 433 if (bytes > 0) { 434 iova = sge->addr + offset; 435 436 err = rxe_mr_copy(mr, iova, addr, bytes, dir); 437 if (err) 438 goto err2; 439 440 offset += bytes; 441 resid -= bytes; 442 length -= bytes; 443 addr += bytes; 444 } 445 } 446 447 dma->sge_offset = offset; 448 dma->resid = resid; 449 450 if (mr) 451 rxe_put(mr); 452 453 return 0; 454 455 err2: 456 if (mr) 457 rxe_put(mr); 458 err1: 459 return err; 460 } 461 462 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 463 { 464 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 465 int offset = dma->sge_offset; 466 int resid = dma->resid; 467 468 while (length) { 469 unsigned int bytes; 470 471 if (offset >= sge->length) { 472 sge++; 473 dma->cur_sge++; 474 offset = 0; 475 if (dma->cur_sge >= dma->num_sge) 476 return -ENOSPC; 477 } 478 479 bytes = length; 480 481 if (bytes > sge->length - offset) 482 bytes = sge->length - offset; 483 484 offset += bytes; 485 resid -= bytes; 486 length -= bytes; 487 } 488 489 dma->sge_offset = offset; 490 dma->resid = resid; 491 492 return 0; 493 } 494 495 /* (1) find the mr corresponding to lkey/rkey 496 * depending on lookup_type 497 * (2) verify that the (qp) pd matches the mr pd 498 * (3) verify that the mr can support the requested access 499 * (4) verify that mr state is valid 500 */ 501 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 502 enum rxe_mr_lookup_type type) 503 { 504 struct rxe_mr *mr; 505 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 506 int index = key >> 8; 507 508 mr = rxe_pool_get_index(&rxe->mr_pool, index); 509 if (!mr) 510 return NULL; 511 512 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || 513 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || 514 mr_pd(mr) != pd || (access && !(access & mr->access)) || 515 mr->state != RXE_MR_STATE_VALID)) { 516 rxe_put(mr); 517 mr = NULL; 518 } 519 520 return mr; 521 } 522 523 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) 524 { 525 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 526 struct rxe_mr *mr; 527 int ret; 528 529 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); 530 if (!mr) { 531 pr_err("%s: No MR for key %#x\n", __func__, key); 532 ret = -EINVAL; 533 goto err; 534 } 535 536 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { 537 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", 538 __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); 539 ret = -EINVAL; 540 goto err_drop_ref; 541 } 542 543 if (atomic_read(&mr->num_mw) > 0) { 544 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 545 __func__); 546 ret = -EINVAL; 547 goto err_drop_ref; 548 } 549 550 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { 551 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); 552 ret = -EINVAL; 553 goto err_drop_ref; 554 } 555 556 mr->state = RXE_MR_STATE_FREE; 557 ret = 0; 558 559 err_drop_ref: 560 rxe_put(mr); 561 err: 562 return ret; 563 } 564 565 /* user can (re)register fast MR by executing a REG_MR WQE. 566 * user is expected to hold a reference on the ib mr until the 567 * WQE completes. 568 * Once a fast MR is created this is the only way to change the 569 * private keys. It is the responsibility of the user to maintain 570 * the ib mr keys in sync with rxe mr keys. 571 */ 572 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 573 { 574 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); 575 u32 key = wqe->wr.wr.reg.key; 576 u32 access = wqe->wr.wr.reg.access; 577 578 /* user can only register MR in free state */ 579 if (unlikely(mr->state != RXE_MR_STATE_FREE)) { 580 pr_warn("%s: mr->lkey = 0x%x not free\n", 581 __func__, mr->lkey); 582 return -EINVAL; 583 } 584 585 /* user can only register mr with qp in same protection domain */ 586 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { 587 pr_warn("%s: qp->pd and mr->pd don't match\n", 588 __func__); 589 return -EINVAL; 590 } 591 592 /* user is only allowed to change key portion of l/rkey */ 593 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { 594 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", 595 __func__, key, mr->lkey); 596 return -EINVAL; 597 } 598 599 mr->access = access; 600 mr->lkey = key; 601 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; 602 mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; 603 mr->state = RXE_MR_STATE_VALID; 604 605 return 0; 606 } 607 608 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 609 { 610 struct rxe_mr *mr = to_rmr(ibmr); 611 612 /* See IBA 10.6.7.2.6 */ 613 if (atomic_read(&mr->num_mw) > 0) 614 return -EINVAL; 615 616 rxe_cleanup(mr); 617 618 return 0; 619 } 620 621 void rxe_mr_cleanup(struct rxe_pool_elem *elem) 622 { 623 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); 624 int i; 625 626 rxe_put(mr_pd(mr)); 627 ib_umem_release(mr->umem); 628 629 if (mr->map) { 630 for (i = 0; i < mr->num_map; i++) 631 kfree(mr->map[i]); 632 633 kfree(mr->map); 634 } 635 } 636