1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2007 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 #include <sys/ib/clients/of/rdma/ib_verbs.h> 58 #include <sys/ib/clients/of/rdma/ib_addr.h> 59 #include <sys/ib/clients/of/rdma/rdma_cm.h> 60 61 #include <sys/ib/clients/rdsv3/ib.h> 62 #include <sys/ib/clients/rdsv3/rdma.h> 63 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 64 65 #define DMA_TO_DEVICE 0 66 #define DMA_FROM_DEVICE 1 67 #define RB_CLEAR_NODE(nodep) AVL_SETPARENT(nodep, nodep); 68 69 /* 70 * XXX 71 * - build with sparse 72 * - should we limit the size of a mr region? let transport return failure? 73 * - should we detect duplicate keys on a socket? hmm. 74 * - an rdma is an mlock, apply rlimit? 75 */ 76 77 /* 78 * get the number of pages by looking at the page indices that the start and 79 * end addresses fall in. 80 * 81 * Returns 0 if the vec is invalid. It is invalid if the number of bytes 82 * causes the address to wrap or overflows an unsigned int. This comes 83 * from being stored in the 'length' member of 'struct rdsv3_scatterlist'. 84 */ 85 static unsigned int 86 rdsv3_pages_in_vec(struct rdsv3_iovec *vec) 87 { 88 if ((vec->addr + vec->bytes <= vec->addr) || 89 (vec->bytes > (uint64_t)UINT_MAX)) { 90 return (0); 91 } 92 93 return (((vec->addr + vec->bytes + PAGESIZE - 1) >> 94 PAGESHIFT) - (vec->addr >> PAGESHIFT)); 95 } 96 97 static struct rdsv3_mr * 98 rdsv3_mr_tree_walk(struct avl_tree *root, uint32_t key, 99 struct rdsv3_mr *insert) 100 { 101 struct rdsv3_mr *mr; 102 avl_index_t where; 103 104 mr = avl_find(root, &key, &where); 105 if ((mr == NULL) && (insert != NULL)) { 106 avl_insert(root, (void *)insert, where); 107 atomic_add_32(&insert->r_refcount, 1); 108 return (NULL); 109 } 110 111 return (mr); 112 } 113 114 /* 115 * Destroy the transport-specific part of a MR. 116 */ 117 static void 118 rdsv3_destroy_mr(struct rdsv3_mr *mr) 119 { 120 struct rdsv3_sock *rs = mr->r_sock; 121 void *trans_private = NULL; 122 avl_node_t *np; 123 124 RDSV3_DPRINTF5("rdsv3_destroy_mr", 125 "RDS: destroy mr key is %x refcnt %u", 126 mr->r_key, atomic_get(&mr->r_refcount)); 127 128 if (test_and_set_bit(RDSV3_MR_DEAD, &mr->r_state)) 129 return; 130 131 mutex_enter(&rs->rs_rdma_lock); 132 np = &mr->r_rb_node; 133 if (AVL_XPARENT(np) != np) 134 avl_remove(&rs->rs_rdma_keys, mr); 135 trans_private = mr->r_trans_private; 136 mr->r_trans_private = NULL; 137 mutex_exit(&rs->rs_rdma_lock); 138 139 if (trans_private) 140 mr->r_trans->free_mr(trans_private, mr->r_invalidate); 141 } 142 143 void 144 __rdsv3_put_mr_final(struct rdsv3_mr *mr) 145 { 146 rdsv3_destroy_mr(mr); 147 kmem_free(mr, sizeof (*mr)); 148 } 149 150 /* 151 * By the time this is called we can't have any more ioctls called on 152 * the socket so we don't need to worry about racing with others. 153 */ 154 void 155 rdsv3_rdma_drop_keys(struct rdsv3_sock *rs) 156 { 157 struct rdsv3_mr *mr; 158 struct avl_node *node; 159 160 /* Release any MRs associated with this socket */ 161 mutex_enter(&rs->rs_rdma_lock); 162 while ((node = avl_first(&rs->rs_rdma_keys))) { 163 mr = container_of(node, struct rdsv3_mr, r_rb_node); 164 if (mr->r_trans == rs->rs_transport) 165 mr->r_invalidate = 0; 166 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 167 RB_CLEAR_NODE(&mr->r_rb_node) 168 mutex_exit(&rs->rs_rdma_lock); 169 rdsv3_destroy_mr(mr); 170 rdsv3_mr_put(mr); 171 mutex_enter(&rs->rs_rdma_lock); 172 } 173 mutex_exit(&rs->rs_rdma_lock); 174 175 if (rs->rs_transport && rs->rs_transport->flush_mrs) 176 rs->rs_transport->flush_mrs(); 177 } 178 179 /* 180 * Helper function to pin user pages. 181 */ 182 #if 0 183 static int 184 rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, 185 struct page **pages, int write) 186 { 187 unsigned long l_user_addr = user_addr; 188 unsigned int l_nr_pages = nr_pages; 189 struct page **l_pages = pages; 190 int l_write = write; 191 192 /* memory pin in rds_ib_get_mr() */ 193 return (0); 194 } 195 #endif 196 197 static int 198 __rdsv3_rdma_map(struct rdsv3_sock *rs, struct rdsv3_get_mr_args *args, 199 uint64_t *cookie_ret, struct rdsv3_mr **mr_ret) 200 { 201 struct rdsv3_mr *mr = NULL, *found; 202 void *trans_private; 203 rdsv3_rdma_cookie_t cookie; 204 unsigned int nents = 0; 205 int ret; 206 207 if (rs->rs_bound_addr == 0) { 208 ret = -ENOTCONN; /* XXX not a great errno */ 209 goto out; 210 } 211 212 if (rs->rs_transport->get_mr == NULL) { 213 ret = -EOPNOTSUPP; 214 goto out; 215 } 216 217 mr = kmem_zalloc(sizeof (struct rdsv3_mr), KM_NOSLEEP); 218 if (mr == NULL) { 219 ret = -ENOMEM; 220 goto out; 221 } 222 223 mr->r_refcount = 1; 224 RB_CLEAR_NODE(&mr->r_rb_node); 225 mr->r_trans = rs->rs_transport; 226 mr->r_sock = rs; 227 228 if (args->flags & RDSV3_RDMA_USE_ONCE) 229 mr->r_use_once = 1; 230 if (args->flags & RDSV3_RDMA_INVALIDATE) 231 mr->r_invalidate = 1; 232 if (args->flags & RDSV3_RDMA_READWRITE) 233 mr->r_write = 1; 234 235 /* 236 * Obtain a transport specific MR. If this succeeds, the 237 * s/g list is now owned by the MR. 238 * Note that dma_map() implies that pending writes are 239 * flushed to RAM, so no dma_sync is needed here. 240 */ 241 trans_private = rs->rs_transport->get_mr(&args->vec, nents, rs, 242 &mr->r_key); 243 244 if (IS_ERR(trans_private)) { 245 ret = PTR_ERR(trans_private); 246 goto out; 247 } 248 249 mr->r_trans_private = trans_private; 250 251 /* 252 * The user may pass us an unaligned address, but we can only 253 * map page aligned regions. So we keep the offset, and build 254 * a 64bit cookie containing <R_Key, offset> and pass that 255 * around. 256 */ 257 cookie = rdsv3_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGEMASK); 258 if (cookie_ret) 259 *cookie_ret = cookie; 260 261 /* 262 * copy value of cookie to user address at args->cookie_addr 263 */ 264 if (args->cookie_addr) { 265 ret = ddi_copyout((void *)&cookie, 266 (void *)((intptr_t)args->cookie_addr), 267 sizeof (rdsv3_rdma_cookie_t), 0); 268 if (ret != 0) { 269 ret = -EFAULT; 270 goto out; 271 } 272 } 273 274 RDSV3_DPRINTF5("__rdsv3_rdma_map", 275 "RDS: get_mr mr 0x%p addr 0x%llx key 0x%x", 276 mr, args->vec.addr, mr->r_key); 277 /* 278 * Inserting the new MR into the rbtree bumps its 279 * reference count. 280 */ 281 mutex_enter(&rs->rs_rdma_lock); 282 found = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr); 283 mutex_exit(&rs->rs_rdma_lock); 284 285 ASSERT(!(found && found != mr)); 286 287 if (mr_ret) { 288 atomic_add_32(&mr->r_refcount, 1); 289 *mr_ret = mr; 290 } 291 292 ret = 0; 293 out: 294 if (mr) 295 rdsv3_mr_put(mr); 296 return (ret); 297 } 298 299 int 300 rdsv3_get_mr(struct rdsv3_sock *rs, const void *optval, int optlen) 301 { 302 struct rdsv3_get_mr_args args; 303 304 if (optlen != sizeof (struct rdsv3_get_mr_args)) 305 return (-EINVAL); 306 307 #if 1 308 bcopy((struct rdsv3_get_mr_args *)optval, &args, 309 sizeof (struct rdsv3_get_mr_args)); 310 #else 311 if (ddi_copyin(optval, &args, optlen, 0)) 312 return (-EFAULT); 313 #endif 314 315 return (__rdsv3_rdma_map(rs, &args, NULL, NULL)); 316 } 317 318 int 319 rdsv3_get_mr_for_dest(struct rdsv3_sock *rs, const void *optval, 320 int optlen) 321 { 322 struct rdsv3_get_mr_for_dest_args args; 323 struct rdsv3_get_mr_args new_args; 324 325 if (optlen != sizeof (struct rdsv3_get_mr_for_dest_args)) 326 return (-EINVAL); 327 328 #if 1 329 bcopy((struct rdsv3_get_mr_for_dest_args *)optval, &args, 330 sizeof (struct rdsv3_get_mr_for_dest_args)); 331 #else 332 if (ddi_copyin(optval, &args, optlen, 0)) 333 return (-EFAULT); 334 #endif 335 336 /* 337 * Initially, just behave like get_mr(). 338 * TODO: Implement get_mr as wrapper around this 339 * and deprecate it. 340 */ 341 new_args.vec = args.vec; 342 new_args.cookie_addr = args.cookie_addr; 343 new_args.flags = args.flags; 344 345 return (__rdsv3_rdma_map(rs, &new_args, NULL, NULL)); 346 } 347 348 /* 349 * Free the MR indicated by the given R_Key 350 */ 351 int 352 rdsv3_free_mr(struct rdsv3_sock *rs, const void *optval, int optlen) 353 { 354 struct rdsv3_free_mr_args args; 355 struct rdsv3_mr *mr; 356 357 if (optlen != sizeof (struct rdsv3_free_mr_args)) 358 return (-EINVAL); 359 360 #if 1 361 bcopy((struct rdsv3_free_mr_args *)optval, &args, 362 sizeof (struct rdsv3_free_mr_args)); 363 #else 364 if (ddi_copyin((struct rdsv3_free_mr_args *)optval, &args, 365 sizeof (struct rdsv3_free_mr_args), 0)) 366 return (-EFAULT); 367 #endif 368 369 /* Special case - a null cookie means flush all unused MRs */ 370 if (args.cookie == 0) { 371 if (!rs->rs_transport || !rs->rs_transport->flush_mrs) 372 return (-EINVAL); 373 rs->rs_transport->flush_mrs(); 374 return (0); 375 } 376 377 /* 378 * Look up the MR given its R_key and remove it from the rbtree 379 * so nobody else finds it. 380 * This should also prevent races with rdsv3_rdma_unuse. 381 */ 382 mutex_enter(&rs->rs_rdma_lock); 383 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, 384 rdsv3_rdma_cookie_key(args.cookie), NULL); 385 if (mr) { 386 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 387 RB_CLEAR_NODE(&mr->r_rb_node); 388 if (args.flags & RDSV3_RDMA_INVALIDATE) 389 mr->r_invalidate = 1; 390 } 391 mutex_exit(&rs->rs_rdma_lock); 392 393 if (!mr) 394 return (-EINVAL); 395 396 /* 397 * call rdsv3_destroy_mr() ourselves so that we're sure it's done 398 * by time we return. If we let rdsv3_mr_put() do it it might not 399 * happen until someone else drops their ref. 400 */ 401 rdsv3_destroy_mr(mr); 402 rdsv3_mr_put(mr); 403 return (0); 404 } 405 406 /* 407 * This is called when we receive an extension header that 408 * tells us this MR was used. It allows us to implement 409 * use_once semantics 410 */ 411 void 412 rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force) 413 { 414 struct rdsv3_mr *mr; 415 int zot_me = 0; 416 417 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Enter rkey: 0x%x", r_key); 418 419 mutex_enter(&rs->rs_rdma_lock); 420 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 421 if (mr && (mr->r_use_once || force)) { 422 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 423 RB_CLEAR_NODE(&mr->r_rb_node); 424 zot_me = 1; 425 } else if (mr) 426 atomic_add_32(&mr->r_refcount, 1); 427 mutex_exit(&rs->rs_rdma_lock); 428 429 /* 430 * May have to issue a dma_sync on this memory region. 431 * Note we could avoid this if the operation was a RDMA READ, 432 * but at this point we can't tell. 433 */ 434 if (mr != NULL) { 435 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "mr: %p zot_me %d", 436 mr, zot_me); 437 if (mr->r_trans->sync_mr) 438 mr->r_trans->sync_mr(mr->r_trans_private, 439 DMA_FROM_DEVICE); 440 441 /* 442 * If the MR was marked as invalidate, this will 443 * trigger an async flush. 444 */ 445 if (zot_me) 446 rdsv3_destroy_mr(mr); 447 rdsv3_mr_put(mr); 448 } 449 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Return"); 450 } 451 452 void 453 rdsv3_rdma_free_op(struct rdsv3_rdma_op *ro) 454 { 455 unsigned int i; 456 457 /* deallocate RDMA resources on rdsv3_message */ 458 459 for (i = 0; i < ro->r_nents; i++) { 460 ddi_umem_unlock(ro->r_rdma_sg[i].umem_cookie); 461 } 462 463 if (ro->r_notifier) 464 kmem_free(ro->r_notifier, sizeof (*ro->r_notifier)); 465 kmem_free(ro, sizeof (*ro)); 466 } 467 468 /* 469 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 470 */ 471 static struct rdsv3_rdma_op * 472 rdsv3_rdma_prepare(struct rdsv3_sock *rs, struct rdsv3_rdma_args *args) 473 { 474 struct rdsv3_iovec vec; 475 struct rdsv3_rdma_op *op = NULL; 476 unsigned int nr_bytes; 477 struct rdsv3_iovec *local_vec; 478 unsigned int nr; 479 unsigned int i; 480 ddi_umem_cookie_t umem_cookie; 481 size_t umem_len; 482 caddr_t umem_addr; 483 int ret; 484 485 if (rs->rs_bound_addr == 0) { 486 ret = -ENOTCONN; /* XXX not a great errno */ 487 goto out; 488 } 489 490 if (args->nr_local > (uint64_t)UINT_MAX) { 491 ret = -EMSGSIZE; 492 goto out; 493 } 494 495 op = kmem_zalloc(offsetof(struct rdsv3_rdma_op, 496 r_rdma_sg[args->nr_local]), KM_NOSLEEP); 497 if (op == NULL) { 498 ret = -ENOMEM; 499 goto out; 500 } 501 502 op->r_write = !!(args->flags & RDSV3_RDMA_READWRITE); 503 op->r_fence = !!(args->flags & RDSV3_RDMA_FENCE); 504 op->r_notify = !!(args->flags & RDSV3_RDMA_NOTIFY_ME); 505 op->r_recverr = rs->rs_recverr; 506 507 if (op->r_notify || op->r_recverr) { 508 /* 509 * We allocate an uninitialized notifier here, because 510 * we don't want to do that in the completion handler. We 511 * would have to use GFP_ATOMIC there, and don't want to deal 512 * with failed allocations. 513 */ 514 op->r_notifier = kmem_alloc(sizeof (struct rdsv3_notifier), 515 KM_NOSLEEP); 516 if (!op->r_notifier) { 517 ret = -ENOMEM; 518 goto out; 519 } 520 op->r_notifier->n_user_token = args->user_token; 521 op->r_notifier->n_status = RDSV3_RDMA_SUCCESS; 522 } 523 524 /* 525 * The cookie contains the R_Key of the remote memory region, and 526 * optionally an offset into it. This is how we implement RDMA into 527 * unaligned memory. 528 * When setting up the RDMA, we need to add that offset to the 529 * destination address (which is really an offset into the MR) 530 * FIXME: We may want to move this into ib_rdma.c 531 */ 532 op->r_key = rdsv3_rdma_cookie_key(args->cookie); 533 op->r_remote_addr = args->remote_vec.addr + 534 rdsv3_rdma_cookie_offset(args->cookie); 535 536 nr_bytes = 0; 537 538 RDSV3_DPRINTF5("rdsv3_rdma_prepare", 539 "RDS: rdma prepare nr_local %llu rva %llx rkey %x", 540 (unsigned long long)args->nr_local, 541 (unsigned long long)args->remote_vec.addr, 542 op->r_key); 543 544 local_vec = (struct rdsv3_iovec *)(unsigned long) args->local_vec_addr; 545 546 /* pin the scatter list of user buffers */ 547 for (i = 0; i < args->nr_local; i++) { 548 if (ddi_copyin(&local_vec[i], &vec, 549 sizeof (struct rdsv3_iovec), 0)) { 550 ret = -EFAULT; 551 goto out; 552 } 553 554 nr = rdsv3_pages_in_vec(&vec); 555 if (nr == 0) { 556 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 557 "rdsv3_pages_in_vec returned 0"); 558 ret = -EINVAL; 559 goto out; 560 } 561 562 rs->rs_user_addr = vec.addr; 563 rs->rs_user_bytes = vec.bytes; 564 565 /* pin user memory pages */ 566 umem_len = ptob(btopr(vec.bytes + 567 ((uintptr_t)vec.addr & PAGEOFFSET))); 568 umem_addr = (caddr_t)((uintptr_t)vec.addr & ~PAGEOFFSET); 569 ret = umem_lockmemory(umem_addr, umem_len, 570 DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ, 571 &umem_cookie, NULL, NULL); 572 if (ret != 0) { 573 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 574 "umem_lockmemory() returned %d", ret); 575 ret = -EFAULT; 576 goto out; 577 } 578 op->r_rdma_sg[i].umem_cookie = umem_cookie; 579 op->r_rdma_sg[i].iovec = vec; 580 nr_bytes += vec.bytes; 581 582 RDSV3_DPRINTF5("rdsv3_rdma_prepare", 583 "RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx", 584 nr_bytes, nr, vec.bytes, vec.addr); 585 } 586 op->r_nents = i; 587 588 if (nr_bytes > args->remote_vec.bytes) { 589 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 590 "RDS nr_bytes %u remote_bytes %u do not match", 591 nr_bytes, (unsigned int) args->remote_vec.bytes); 592 ret = -EINVAL; 593 goto out; 594 } 595 op->r_bytes = nr_bytes; 596 597 ret = 0; 598 out: 599 if (ret) { 600 if (op) 601 rdsv3_rdma_free_op(op); 602 op = ERR_PTR(ret); 603 } 604 return (op); 605 } 606 607 /* 608 * The application asks for a RDMA transfer. 609 * Extract all arguments and set up the rdma_op 610 */ 611 int 612 rdsv3_cmsg_rdma_args(struct rdsv3_sock *rs, struct rdsv3_message *rm, 613 struct cmsghdr *cmsg) 614 { 615 struct rdsv3_rdma_op *op; 616 struct rdsv3_rdma_args *ap; 617 618 if (cmsg->cmsg_len < CMSG_LEN(sizeof (struct rdsv3_rdma_args)) || 619 rm->m_rdma_op != NULL) 620 return (-EINVAL); 621 622 /* uint64_t alignment on struct rdsv3_get_mr_args */ 623 ap = (struct rdsv3_rdma_args *)kmem_alloc(cmsg->cmsg_len, KM_SLEEP); 624 bcopy(CMSG_DATA(cmsg), ap, cmsg->cmsg_len); 625 op = rdsv3_rdma_prepare(rs, ap); 626 kmem_free(ap, cmsg->cmsg_len); 627 if (IS_ERR(op)) 628 return (PTR_ERR(op)); 629 rdsv3_stats_inc(s_send_rdma); 630 rm->m_rdma_op = op; 631 return (0); 632 } 633 634 /* 635 * The application wants us to pass an RDMA destination (aka MR) 636 * to the remote 637 */ 638 int 639 rdsv3_cmsg_rdma_dest(struct rdsv3_sock *rs, struct rdsv3_message *rm, 640 struct cmsghdr *cmsg) 641 { 642 struct rdsv3_mr *mr; 643 uint32_t r_key; 644 int err = 0; 645 646 if (cmsg->cmsg_len < CMSG_LEN(sizeof (rdsv3_rdma_cookie_t)) || 647 rm->m_rdma_cookie != 0) 648 return (-EINVAL); 649 650 (void) memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), 651 sizeof (rm->m_rdma_cookie)); 652 653 /* 654 * We are reusing a previously mapped MR here. Most likely, the 655 * application has written to the buffer, so we need to explicitly 656 * flush those writes to RAM. Otherwise the HCA may not see them 657 * when doing a DMA from that buffer. 658 */ 659 r_key = rdsv3_rdma_cookie_key(rm->m_rdma_cookie); 660 661 mutex_enter(&rs->rs_rdma_lock); 662 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 663 if (mr == NULL) 664 err = -EINVAL; /* invalid r_key */ 665 else 666 atomic_add_32(&mr->r_refcount, 1); 667 mutex_exit(&rs->rs_rdma_lock); 668 669 if (mr) { 670 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 671 rm->m_rdma_mr = mr; 672 } 673 return (err); 674 } 675 676 /* 677 * The application passes us an address range it wants to enable RDMA 678 * to/from. We map the area, and save the <R_Key,offset> pair 679 * in rm->m_rdma_cookie. This causes it to be sent along to the peer 680 * in an extension header. 681 */ 682 int 683 rdsv3_cmsg_rdma_map(struct rdsv3_sock *rs, struct rdsv3_message *rm, 684 struct cmsghdr *cmsg) 685 { 686 struct rdsv3_get_mr_args *mrp; 687 int status; 688 689 if (cmsg->cmsg_len < CMSG_LEN(sizeof (struct rdsv3_get_mr_args)) || 690 rm->m_rdma_cookie != 0) 691 return (-EINVAL); 692 693 /* uint64_t alignment on struct rdsv3_get_mr_args */ 694 mrp = (struct rdsv3_get_mr_args *)kmem_alloc(cmsg->cmsg_len, KM_SLEEP); 695 bcopy(CMSG_DATA(cmsg), mrp, cmsg->cmsg_len); 696 status = __rdsv3_rdma_map(rs, mrp, &rm->m_rdma_cookie, &rm->m_rdma_mr); 697 kmem_free(mrp, cmsg->cmsg_len); 698 return (status); 699 } 700