1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 26 /* All Rights Reserved */ 27 /* 28 * Portions of this source code were derived from Berkeley 29 * 4.3 BSD under license from the Regents of the University of 30 * California. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/user.h> 36 #include <sys/systm.h> 37 #include <sys/sysmacros.h> 38 #include <sys/errno.h> 39 #include <sys/kmem.h> 40 #include <sys/debug.h> 41 #include <sys/systm.h> 42 #include <sys/kstat.h> 43 #include <sys/t_lock.h> 44 #include <sys/ddi.h> 45 #include <sys/cmn_err.h> 46 #include <sys/time.h> 47 #include <sys/isa_defs.h> 48 #include <sys/zone.h> 49 #include <sys/sdt.h> 50 51 #include <rpc/types.h> 52 #include <rpc/xdr.h> 53 #include <rpc/auth.h> 54 #include <rpc/clnt.h> 55 #include <rpc/rpc_msg.h> 56 #include <rpc/rpc_rdma.h> 57 #include <nfs/nfs.h> 58 #include <nfs/nfs4_kprot.h> 59 60 static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST; 61 62 static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *, 63 XDR *, xdrproc_t, caddr_t); 64 static int clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *, 65 XDR **, uint_t *); 66 static int clnt_setup_rlist(CONN *, XDR *, XDR *); 67 static int clnt_setup_wlist(CONN *, XDR *, XDR *, rdma_buf_t *); 68 static int clnt_setup_long_reply(CONN *, struct clist **, uint_t); 69 static void clnt_check_credit(CONN *); 70 static void clnt_return_credit(CONN *); 71 static void clnt_decode_long_reply(CONN *, struct clist *, 72 struct clist *, XDR *, XDR **, struct clist *, 73 struct clist *, uint_t, uint_t); 74 75 static void clnt_update_credit(CONN *, uint32_t); 76 77 static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t, 78 caddr_t, xdrproc_t, caddr_t, struct timeval); 79 static void clnt_rdma_kabort(CLIENT *); 80 static void clnt_rdma_kerror(CLIENT *, struct rpc_err *); 81 static bool_t clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t); 82 static void clnt_rdma_kdestroy(CLIENT *); 83 static bool_t clnt_rdma_kcontrol(CLIENT *, int, char *); 84 static int clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *, 85 struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t); 86 87 /* 88 * Operations vector for RDMA based RPC 89 */ 90 static struct clnt_ops rdma_clnt_ops = { 91 clnt_rdma_kcallit, /* do rpc call */ 92 clnt_rdma_kabort, /* abort call */ 93 clnt_rdma_kerror, /* return error status */ 94 clnt_rdma_kfreeres, /* free results */ 95 clnt_rdma_kdestroy, /* destroy rpc handle */ 96 clnt_rdma_kcontrol, /* the ioctl() of rpc */ 97 clnt_rdma_ksettimers, /* set retry timers */ 98 }; 99 100 /* 101 * The size of the preserialized RPC header information. 102 */ 103 #define CKU_HDRSIZE 20 104 #define CLNT_RDMA_SUCCESS 0 105 #define CLNT_RDMA_FAIL (-1) 106 107 #define AUTH_REFRESH_COUNT 2 108 109 #define IS_RPCSEC_GSS(authh) \ 110 (authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS) 111 112 /* 113 * Per RPC RDMA endpoint details 114 */ 115 typedef struct cku_private { 116 CLIENT cku_client; /* client handle */ 117 rdma_mod_t *cku_rd_mod; /* underlying RDMA mod */ 118 void *cku_rd_handle; /* underlying RDMA device */ 119 struct netbuf cku_addr; /* remote netbuf address */ 120 int cku_addrfmly; /* for finding addr_type */ 121 struct rpc_err cku_err; /* error status */ 122 struct cred *cku_cred; /* credentials */ 123 XDR cku_outxdr; /* xdr stream for output */ 124 uint32_t cku_outsz; 125 XDR cku_inxdr; /* xdr stream for input */ 126 char cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */ 127 uint32_t cku_xid; /* current XID */ 128 } cku_private_t; 129 130 #define CLNT_RDMA_DELAY 10 /* secs to delay after a connection failure */ 131 static int clnt_rdma_min_delay = CLNT_RDMA_DELAY; 132 133 struct { 134 kstat_named_t rccalls; 135 kstat_named_t rcbadcalls; 136 kstat_named_t rcbadxids; 137 kstat_named_t rctimeouts; 138 kstat_named_t rcnewcreds; 139 kstat_named_t rcbadverfs; 140 kstat_named_t rctimers; 141 kstat_named_t rccantconn; 142 kstat_named_t rcnomem; 143 kstat_named_t rcintrs; 144 kstat_named_t rclongrpcs; 145 } rdmarcstat = { 146 { "calls", KSTAT_DATA_UINT64 }, 147 { "badcalls", KSTAT_DATA_UINT64 }, 148 { "badxids", KSTAT_DATA_UINT64 }, 149 { "timeouts", KSTAT_DATA_UINT64 }, 150 { "newcreds", KSTAT_DATA_UINT64 }, 151 { "badverfs", KSTAT_DATA_UINT64 }, 152 { "timers", KSTAT_DATA_UINT64 }, 153 { "cantconn", KSTAT_DATA_UINT64 }, 154 { "nomem", KSTAT_DATA_UINT64 }, 155 { "interrupts", KSTAT_DATA_UINT64 }, 156 { "longrpc", KSTAT_DATA_UINT64 } 157 }; 158 159 kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat; 160 uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t); 161 162 #ifdef DEBUG 163 int rdma_clnt_debug = 0; 164 #endif 165 166 #ifdef accurate_stats 167 extern kmutex_t rdmarcstat_lock; /* mutex for rcstat updates */ 168 169 #define RCSTAT_INCR(x) \ 170 mutex_enter(&rdmarcstat_lock); \ 171 rdmarcstat.x.value.ui64++; \ 172 mutex_exit(&rdmarcstat_lock); 173 #else 174 #define RCSTAT_INCR(x) \ 175 rdmarcstat.x.value.ui64++; 176 #endif 177 178 #define ptoh(p) (&((p)->cku_client)) 179 #define htop(h) ((cku_private_t *)((h)->cl_private)) 180 181 uint_t 182 calc_length(uint_t len) 183 { 184 len = RNDUP(len); 185 186 if (len <= 64 * 1024) { 187 if (len > 32 * 1024) { 188 len = 64 * 1024; 189 } else { 190 if (len > 16 * 1024) { 191 len = 32 * 1024; 192 } else { 193 if (len > 8 * 1024) { 194 len = 16 * 1024; 195 } else { 196 len = 8 * 1024; 197 } 198 } 199 } 200 } 201 return (len); 202 } 203 int 204 clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family, 205 rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl) 206 { 207 CLIENT *h; 208 struct cku_private *p; 209 struct rpc_msg call_msg; 210 rdma_registry_t *rp; 211 212 ASSERT(INGLOBALZONE(curproc)); 213 214 if (cl == NULL) 215 return (EINVAL); 216 *cl = NULL; 217 218 p = kmem_zalloc(sizeof (*p), KM_SLEEP); 219 220 /* 221 * Find underlying RDMATF plugin 222 */ 223 rw_enter(&rdma_lock, RW_READER); 224 rp = rdma_mod_head; 225 while (rp != NULL) { 226 if (strcmp(rp->r_mod->rdma_api, proto)) 227 rp = rp->r_next; 228 else { 229 p->cku_rd_mod = rp->r_mod; 230 p->cku_rd_handle = handle; 231 break; 232 } 233 } 234 rw_exit(&rdma_lock); 235 236 if (p->cku_rd_mod == NULL) { 237 /* 238 * Should not happen. 239 * No matching RDMATF plugin. 240 */ 241 kmem_free(p, sizeof (struct cku_private)); 242 return (EINVAL); 243 } 244 245 h = ptoh(p); 246 h->cl_ops = &rdma_clnt_ops; 247 h->cl_private = (caddr_t)p; 248 h->cl_auth = authkern_create(); 249 250 /* call message, just used to pre-serialize below */ 251 call_msg.rm_xid = 0; 252 call_msg.rm_direction = CALL; 253 call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; 254 call_msg.rm_call.cb_prog = pgm; 255 call_msg.rm_call.cb_vers = vers; 256 257 xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE); 258 /* pre-serialize call message header */ 259 if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) { 260 XDR_DESTROY(&p->cku_outxdr); 261 auth_destroy(h->cl_auth); 262 kmem_free(p, sizeof (struct cku_private)); 263 return (EINVAL); 264 } 265 266 /* 267 * Set up the rpc information 268 */ 269 p->cku_cred = cred; 270 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 271 p->cku_addr.maxlen = raddr->maxlen; 272 p->cku_addr.len = raddr->len; 273 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 274 p->cku_addrfmly = family; 275 276 *cl = h; 277 return (0); 278 } 279 280 static void 281 clnt_rdma_kdestroy(CLIENT *h) 282 { 283 struct cku_private *p = htop(h); 284 285 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 286 kmem_free(p, sizeof (*p)); 287 } 288 289 void 290 clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr, 291 struct cred *cred) 292 { 293 struct cku_private *p = htop(h); 294 rdma_registry_t *rp; 295 296 ASSERT(INGLOBALZONE(curproc)); 297 /* 298 * Find underlying RDMATF plugin 299 */ 300 p->cku_rd_mod = NULL; 301 rw_enter(&rdma_lock, RW_READER); 302 rp = rdma_mod_head; 303 while (rp != NULL) { 304 if (strcmp(rp->r_mod->rdma_api, proto)) 305 rp = rp->r_next; 306 else { 307 p->cku_rd_mod = rp->r_mod; 308 p->cku_rd_handle = handle; 309 break; 310 } 311 312 } 313 rw_exit(&rdma_lock); 314 315 /* 316 * Set up the rpc information 317 */ 318 p->cku_cred = cred; 319 p->cku_xid = 0; 320 321 if (p->cku_addr.maxlen < raddr->len) { 322 if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL) 323 kmem_free(p->cku_addr.buf, p->cku_addr.maxlen); 324 p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP); 325 p->cku_addr.maxlen = raddr->maxlen; 326 } 327 328 p->cku_addr.len = raddr->len; 329 bcopy(raddr->buf, p->cku_addr.buf, raddr->len); 330 h->cl_ops = &rdma_clnt_ops; 331 } 332 333 static int 334 clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum, 335 rdma_buf_t *rpcmsg, XDR *xdrs, 336 xdrproc_t xdr_args, caddr_t argsp) 337 { 338 cku_private_t *p = htop(h); 339 340 if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { 341 /* 342 * Copy in the preserialized RPC header 343 * information. 344 */ 345 bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE); 346 347 /* 348 * transaction id is the 1st thing in the output 349 * buffer. 350 */ 351 /* LINTED pointer alignment */ 352 (*(uint32_t *)(rpcmsg->addr)) = p->cku_xid; 353 354 /* Skip the preserialized stuff. */ 355 XDR_SETPOS(xdrs, CKU_HDRSIZE); 356 357 /* Serialize dynamic stuff into the output buffer. */ 358 if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) || 359 (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) || 360 (!(*xdr_args)(xdrs, argsp))) { 361 DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs); 362 return (CLNT_RDMA_FAIL); 363 } 364 p->cku_outsz = XDR_GETPOS(xdrs); 365 } else { 366 uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE]; 367 IXDR_PUT_U_INT32(uproc, procnum); 368 (*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid; 369 XDR_SETPOS(xdrs, 0); 370 371 /* Serialize the procedure number and the arguments. */ 372 if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr, 373 CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) { 374 if (rpcmsg->addr != xdrs->x_base) { 375 rpcmsg->addr = xdrs->x_base; 376 rpcmsg->len = xdr_getbufsize(xdrs); 377 } 378 DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum); 379 return (CLNT_RDMA_FAIL); 380 } 381 /* 382 * If we had to allocate a new buffer while encoding 383 * then update the addr and len. 384 */ 385 if (rpcmsg->addr != xdrs->x_base) { 386 rpcmsg->addr = xdrs->x_base; 387 rpcmsg->len = xdr_getbufsize(xdrs); 388 } 389 390 p->cku_outsz = XDR_GETPOS(xdrs); 391 DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz) 392 } 393 394 return (CLNT_RDMA_SUCCESS); 395 } 396 397 static int 398 clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg, 399 XDR **xdrs, uint_t *op) 400 { 401 cku_private_t *p = htop(h); 402 uint_t vers; 403 uint32_t rdma_credit = rdma_bufs_rqst; 404 405 vers = RPCRDMA_VERS; 406 clmsg->type = SEND_BUFFER; 407 408 if (rdma_buf_alloc(conn, clmsg)) { 409 return (CLNT_RDMA_FAIL); 410 } 411 412 *xdrs = &p->cku_outxdr; 413 xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE); 414 415 (*(uint32_t *)clmsg->addr) = p->cku_xid; 416 XDR_SETPOS(*xdrs, sizeof (uint32_t)); 417 (void) xdr_u_int(*xdrs, &vers); 418 (void) xdr_u_int(*xdrs, &rdma_credit); 419 (void) xdr_u_int(*xdrs, op); 420 421 return (CLNT_RDMA_SUCCESS); 422 } 423 424 /* 425 * If xp_cl is NULL value, then the RPC payload will NOT carry 426 * an RDMA READ chunk list, in this case we insert FALSE into 427 * the XDR stream. Otherwise we use the clist and RDMA register 428 * the memory and encode the clist into the outbound XDR stream. 429 */ 430 static int 431 clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp) 432 { 433 int status; 434 struct clist *rclp; 435 int32_t xdr_flag = XDR_RDMA_RLIST_REG; 436 437 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp); 438 439 if (rclp != NULL) { 440 status = clist_register(conn, rclp, CLIST_REG_SOURCE); 441 if (status != RDMA_SUCCESS) { 442 return (CLNT_RDMA_FAIL); 443 } 444 XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag); 445 } 446 (void) xdr_do_clist(xdrs, &rclp); 447 448 return (CLNT_RDMA_SUCCESS); 449 } 450 451 /* 452 * If xp_wcl is NULL value, then the RPC payload will NOT carry 453 * an RDMA WRITE chunk list, in this case we insert FALSE into 454 * the XDR stream. Otherwise we use the clist and RDMA register 455 * the memory and encode the clist into the outbound XDR stream. 456 */ 457 static int 458 clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp, rdma_buf_t *rndbuf) 459 { 460 int status; 461 struct clist *wlist, *rndcl; 462 int wlen, rndlen; 463 int32_t xdr_flag = XDR_RDMA_WLIST_REG; 464 465 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist); 466 467 if (wlist != NULL) { 468 /* 469 * If we are sending a non 4-byte alligned length 470 * the server will roundup the length to 4-byte 471 * boundary. In such a case, a trailing chunk is 472 * added to take any spill over roundup bytes. 473 */ 474 wlen = clist_len(wlist); 475 rndlen = (roundup(wlen, BYTES_PER_XDR_UNIT) - wlen); 476 if (rndlen) { 477 rndcl = clist_alloc(); 478 /* 479 * calc_length() will allocate a PAGESIZE 480 * buffer below. 481 */ 482 rndcl->c_len = calc_length(rndlen); 483 rndcl->rb_longbuf.type = RDMA_LONG_BUFFER; 484 rndcl->rb_longbuf.len = rndcl->c_len; 485 if (rdma_buf_alloc(conn, &rndcl->rb_longbuf)) { 486 clist_free(rndcl); 487 return (CLNT_RDMA_FAIL); 488 } 489 490 /* Roundup buffer freed back in caller */ 491 *rndbuf = rndcl->rb_longbuf; 492 493 rndcl->u.c_daddr3 = rndcl->rb_longbuf.addr; 494 rndcl->c_next = NULL; 495 rndcl->c_dmemhandle = rndcl->rb_longbuf.handle; 496 wlist->c_next = rndcl; 497 } 498 499 status = clist_register(conn, wlist, CLIST_REG_DST); 500 if (status != RDMA_SUCCESS) { 501 rdma_buf_free(conn, rndbuf); 502 bzero(rndbuf, sizeof (rdma_buf_t)); 503 return (CLNT_RDMA_FAIL); 504 } 505 XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag); 506 } 507 508 if (!xdr_encode_wlist(xdrs, wlist)) { 509 if (rndlen) { 510 rdma_buf_free(conn, rndbuf); 511 bzero(rndbuf, sizeof (rdma_buf_t)); 512 } 513 return (CLNT_RDMA_FAIL); 514 } 515 516 return (CLNT_RDMA_SUCCESS); 517 } 518 519 static int 520 clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length) 521 { 522 if (length == 0) { 523 *clpp = NULL; 524 return (CLNT_RDMA_SUCCESS); 525 } 526 527 *clpp = clist_alloc(); 528 529 (*clpp)->rb_longbuf.len = calc_length(length); 530 (*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER; 531 532 if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) { 533 clist_free(*clpp); 534 *clpp = NULL; 535 return (CLNT_RDMA_FAIL); 536 } 537 538 (*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr; 539 (*clpp)->c_len = (*clpp)->rb_longbuf.len; 540 (*clpp)->c_next = NULL; 541 (*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle; 542 543 if (clist_register(conn, *clpp, CLIST_REG_DST)) { 544 DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf); 545 rdma_buf_free(conn, &((*clpp)->rb_longbuf)); 546 clist_free(*clpp); 547 return (CLNT_RDMA_FAIL); 548 } 549 550 return (CLNT_RDMA_SUCCESS); 551 } 552 553 /* ARGSUSED */ 554 static enum clnt_stat 555 clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args, 556 caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, 557 struct timeval wait) 558 { 559 cku_private_t *p = htop(h); 560 561 int try_call_again; 562 int refresh_attempt = AUTH_REFRESH_COUNT; 563 int status; 564 int msglen; 565 566 XDR *call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */ 567 XDR *reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */ 568 XDR *rdmahdr_o_xdrs, *rdmahdr_i_xdrs; 569 570 struct rpc_msg reply_msg; 571 rdma_registry_t *m; 572 573 struct clist *cl_sendlist; 574 struct clist *cl_recvlist; 575 struct clist *cl; 576 struct clist *cl_rpcmsg; 577 struct clist *cl_rdma_reply; 578 struct clist *cl_rpcreply_wlist; 579 struct clist *cl_long_reply; 580 rdma_buf_t rndup; 581 582 uint_t vers; 583 uint_t op; 584 uint_t off; 585 uint32_t seg_array_len; 586 uint_t long_reply_len; 587 uint_t rpcsec_gss; 588 uint_t gss_i_or_p; 589 590 CONN *conn = NULL; 591 rdma_buf_t clmsg; 592 rdma_buf_t rpcmsg; 593 rdma_chunkinfo_lengths_t rcil; 594 595 clock_t ticks; 596 bool_t wlist_exists_reply; 597 598 uint32_t rdma_credit = rdma_bufs_rqst; 599 600 RCSTAT_INCR(rccalls); 601 602 call_again: 603 604 bzero(&clmsg, sizeof (clmsg)); 605 bzero(&rpcmsg, sizeof (rpcmsg)); 606 bzero(&rndup, sizeof (rndup)); 607 try_call_again = 0; 608 cl_sendlist = NULL; 609 cl_recvlist = NULL; 610 cl = NULL; 611 cl_rpcmsg = NULL; 612 cl_rdma_reply = NULL; 613 call_xdrp = NULL; 614 reply_xdrp = NULL; 615 wlist_exists_reply = FALSE; 616 cl_rpcreply_wlist = NULL; 617 cl_long_reply = NULL; 618 rcil.rcil_len = 0; 619 rcil.rcil_len_alt = 0; 620 long_reply_len = 0; 621 622 rw_enter(&rdma_lock, RW_READER); 623 m = (rdma_registry_t *)p->cku_rd_handle; 624 if (m->r_mod_state == RDMA_MOD_INACTIVE) { 625 /* 626 * If we didn't find a matching RDMA module in the registry 627 * then there is no transport. 628 */ 629 rw_exit(&rdma_lock); 630 p->cku_err.re_status = RPC_CANTSEND; 631 p->cku_err.re_errno = EIO; 632 ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 633 if (h->cl_nosignal == TRUE) { 634 delay(ticks); 635 } else { 636 if (delay_sig(ticks) == EINTR) { 637 p->cku_err.re_status = RPC_INTR; 638 p->cku_err.re_errno = EINTR; 639 } 640 } 641 return (RPC_CANTSEND); 642 } 643 /* 644 * Get unique xid 645 */ 646 if (p->cku_xid == 0) 647 p->cku_xid = alloc_xid(); 648 649 status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr, 650 p->cku_addrfmly, p->cku_rd_handle, &conn); 651 rw_exit(&rdma_lock); 652 653 /* 654 * If there is a problem with the connection reflect the issue 655 * back to the higher level to address, we MAY delay for a short 656 * period so that we are kind to the transport. 657 */ 658 if (conn == NULL) { 659 /* 660 * Connect failed to server. Could be because of one 661 * of several things. In some cases we don't want 662 * the caller to retry immediately - delay before 663 * returning to caller. 664 */ 665 switch (status) { 666 case RDMA_TIMEDOUT: 667 /* 668 * Already timed out. No need to delay 669 * some more. 670 */ 671 p->cku_err.re_status = RPC_TIMEDOUT; 672 p->cku_err.re_errno = ETIMEDOUT; 673 break; 674 case RDMA_INTR: 675 /* 676 * Failed because of an signal. Very likely 677 * the caller will not retry. 678 */ 679 p->cku_err.re_status = RPC_INTR; 680 p->cku_err.re_errno = EINTR; 681 break; 682 default: 683 /* 684 * All other failures - server down or service 685 * down or temporary resource failure. Delay before 686 * returning to caller. 687 */ 688 ticks = clnt_rdma_min_delay * drv_usectohz(1000000); 689 p->cku_err.re_status = RPC_CANTCONNECT; 690 p->cku_err.re_errno = EIO; 691 692 if (h->cl_nosignal == TRUE) { 693 delay(ticks); 694 } else { 695 if (delay_sig(ticks) == EINTR) { 696 p->cku_err.re_status = RPC_INTR; 697 p->cku_err.re_errno = EINTR; 698 } 699 } 700 break; 701 } 702 703 return (p->cku_err.re_status); 704 } 705 706 clnt_check_credit(conn); 707 708 status = CLNT_RDMA_FAIL; 709 710 rpcsec_gss = gss_i_or_p = FALSE; 711 712 if (IS_RPCSEC_GSS(h)) { 713 rpcsec_gss = TRUE; 714 if (rpc_gss_get_service_type(h->cl_auth) == 715 rpc_gss_svc_integrity || 716 rpc_gss_get_service_type(h->cl_auth) == 717 rpc_gss_svc_privacy) 718 gss_i_or_p = TRUE; 719 } 720 721 /* 722 * Try a regular RDMA message if RPCSEC_GSS is not being used 723 * or if RPCSEC_GSS is being used for authentication only. 724 */ 725 if (rpcsec_gss == FALSE || 726 (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) { 727 /* 728 * Grab a send buffer for the request. Try to 729 * encode it to see if it fits. If not, then it 730 * needs to be sent in a chunk. 731 */ 732 rpcmsg.type = SEND_BUFFER; 733 if (rdma_buf_alloc(conn, &rpcmsg)) { 734 DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs); 735 goto done; 736 } 737 738 /* First try to encode into regular send buffer */ 739 op = RDMA_MSG; 740 741 call_xdrp = &callxdr; 742 743 xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len, 744 rdma_minchunk, NULL, XDR_ENCODE, conn); 745 746 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp, 747 xdr_args, argsp); 748 749 if (status != CLNT_RDMA_SUCCESS) { 750 /* Clean up from previous encode attempt */ 751 rdma_buf_free(conn, &rpcmsg); 752 XDR_DESTROY(call_xdrp); 753 } else { 754 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil); 755 } 756 } 757 758 /* If the encode didn't work, then try a NOMSG */ 759 if (status != CLNT_RDMA_SUCCESS) { 760 761 msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES + 762 xdr_sizeof(xdr_args, argsp); 763 764 msglen = calc_length(msglen); 765 766 /* pick up the lengths for the reply buffer needed */ 767 (void) xdrrdma_sizeof(xdr_args, argsp, 0, 768 &rcil.rcil_len, &rcil.rcil_len_alt); 769 770 /* 771 * Construct a clist to describe the CHUNK_BUFFER 772 * for the rpcmsg. 773 */ 774 cl_rpcmsg = clist_alloc(); 775 cl_rpcmsg->c_len = msglen; 776 cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER; 777 cl_rpcmsg->rb_longbuf.len = msglen; 778 if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) { 779 clist_free(cl_rpcmsg); 780 goto done; 781 } 782 cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr; 783 784 op = RDMA_NOMSG; 785 call_xdrp = &callxdr; 786 787 xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr, 788 cl_rpcmsg->rb_longbuf.len, 0, 789 cl_rpcmsg, XDR_ENCODE, conn); 790 791 status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp, 792 xdr_args, argsp); 793 794 if (status != CLNT_RDMA_SUCCESS) { 795 p->cku_err.re_status = RPC_CANTENCODEARGS; 796 p->cku_err.re_errno = EIO; 797 DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg); 798 goto done; 799 } 800 } 801 802 /* 803 * During the XDR_ENCODE we may have "allocated" an RDMA READ or 804 * RDMA WRITE clist. 805 * 806 * First pull the RDMA READ chunk list from the XDR private 807 * area to keep it handy. 808 */ 809 XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl); 810 811 if (gss_i_or_p) { 812 long_reply_len = rcil.rcil_len + rcil.rcil_len_alt; 813 long_reply_len += MAX_AUTH_BYTES; 814 } else { 815 long_reply_len = rcil.rcil_len; 816 } 817 818 /* 819 * Update the chunk size information for the Long RPC msg. 820 */ 821 if (cl && op == RDMA_NOMSG) 822 cl->c_len = p->cku_outsz; 823 824 /* 825 * Prepare the RDMA header. On success xdrs will hold the result 826 * of xdrmem_create() for a SEND_BUFFER. 827 */ 828 status = clnt_compose_rdma_header(conn, h, &clmsg, 829 &rdmahdr_o_xdrs, &op); 830 831 if (status != CLNT_RDMA_SUCCESS) { 832 p->cku_err.re_status = RPC_CANTSEND; 833 p->cku_err.re_errno = EIO; 834 RCSTAT_INCR(rcnomem); 835 DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2); 836 goto done; 837 } 838 839 /* 840 * Now insert the RDMA READ list iff present 841 */ 842 status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp); 843 if (status != CLNT_RDMA_SUCCESS) { 844 DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg); 845 rdma_buf_free(conn, &clmsg); 846 p->cku_err.re_status = RPC_CANTSEND; 847 p->cku_err.re_errno = EIO; 848 goto done; 849 } 850 851 /* 852 * Setup RDMA WRITE chunk list for nfs read operation 853 * other operations will have a NULL which will result 854 * as a NULL list in the XDR stream. 855 */ 856 status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp, &rndup); 857 if (status != CLNT_RDMA_SUCCESS) { 858 rdma_buf_free(conn, &clmsg); 859 p->cku_err.re_status = RPC_CANTSEND; 860 p->cku_err.re_errno = EIO; 861 goto done; 862 } 863 864 /* 865 * If NULL call and RPCSEC_GSS, provide a chunk such that 866 * large responses can flow back to the client. 867 * If RPCSEC_GSS with integrity or privacy is in use, get chunk. 868 */ 869 if ((procnum == 0 && rpcsec_gss == TRUE) || 870 (rpcsec_gss == TRUE && gss_i_or_p == TRUE)) 871 long_reply_len += 1024; 872 873 status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len); 874 875 if (status != CLNT_RDMA_SUCCESS) { 876 rdma_buf_free(conn, &clmsg); 877 p->cku_err.re_status = RPC_CANTSEND; 878 p->cku_err.re_errno = EIO; 879 goto done; 880 } 881 882 /* 883 * XDR encode the RDMA_REPLY write chunk 884 */ 885 seg_array_len = (cl_long_reply ? 1 : 0); 886 (void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply, 887 seg_array_len); 888 889 /* 890 * Construct a clist in "sendlist" that represents what we 891 * will push over the wire. 892 * 893 * Start with the RDMA header and clist (if any) 894 */ 895 clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle, 896 clmsg.addr, NULL, NULL); 897 898 /* 899 * Put the RPC call message in sendlist if small RPC 900 */ 901 if (op == RDMA_MSG) { 902 clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle, 903 rpcmsg.addr, NULL, NULL); 904 } else { 905 /* Long RPC already in chunk list */ 906 RCSTAT_INCR(rclongrpcs); 907 } 908 909 /* 910 * Set up a reply buffer ready for the reply 911 */ 912 status = rdma_clnt_postrecv(conn, p->cku_xid); 913 if (status != RDMA_SUCCESS) { 914 rdma_buf_free(conn, &clmsg); 915 p->cku_err.re_status = RPC_CANTSEND; 916 p->cku_err.re_errno = EIO; 917 goto done; 918 } 919 920 /* 921 * sync the memory for dma 922 */ 923 if (cl != NULL) { 924 status = clist_syncmem(conn, cl, CLIST_REG_SOURCE); 925 if (status != RDMA_SUCCESS) { 926 (void) rdma_clnt_postrecv_remove(conn, p->cku_xid); 927 rdma_buf_free(conn, &clmsg); 928 p->cku_err.re_status = RPC_CANTSEND; 929 p->cku_err.re_errno = EIO; 930 goto done; 931 } 932 } 933 934 /* 935 * Send the RDMA Header and RPC call message to the server 936 */ 937 status = RDMA_SEND(conn, cl_sendlist, p->cku_xid); 938 if (status != RDMA_SUCCESS) { 939 (void) rdma_clnt_postrecv_remove(conn, p->cku_xid); 940 p->cku_err.re_status = RPC_CANTSEND; 941 p->cku_err.re_errno = EIO; 942 goto done; 943 } 944 945 /* 946 * RDMA plugin now owns the send msg buffers. 947 * Clear them out and don't free them. 948 */ 949 clmsg.addr = NULL; 950 if (rpcmsg.type == SEND_BUFFER) 951 rpcmsg.addr = NULL; 952 953 /* 954 * Recv rpc reply 955 */ 956 status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid); 957 958 /* 959 * Now check recv status 960 */ 961 if (status != 0) { 962 if (status == RDMA_INTR) { 963 p->cku_err.re_status = RPC_INTR; 964 p->cku_err.re_errno = EINTR; 965 RCSTAT_INCR(rcintrs); 966 } else if (status == RPC_TIMEDOUT) { 967 p->cku_err.re_status = RPC_TIMEDOUT; 968 p->cku_err.re_errno = ETIMEDOUT; 969 RCSTAT_INCR(rctimeouts); 970 } else { 971 p->cku_err.re_status = RPC_CANTRECV; 972 p->cku_err.re_errno = EIO; 973 } 974 goto done; 975 } 976 977 /* 978 * Process the reply message. 979 * 980 * First the chunk list (if any) 981 */ 982 rdmahdr_i_xdrs = &(p->cku_inxdr); 983 xdrmem_create(rdmahdr_i_xdrs, 984 (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3, 985 cl_recvlist->c_len, XDR_DECODE); 986 987 /* 988 * Treat xid as opaque (xid is the first entity 989 * in the rpc rdma message). 990 * Skip xid and set the xdr position accordingly. 991 */ 992 XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t)); 993 (void) xdr_u_int(rdmahdr_i_xdrs, &vers); 994 (void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit); 995 (void) xdr_u_int(rdmahdr_i_xdrs, &op); 996 (void) xdr_do_clist(rdmahdr_i_xdrs, &cl); 997 998 clnt_update_credit(conn, rdma_credit); 999 1000 wlist_exists_reply = FALSE; 1001 if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist, 1002 &wlist_exists_reply)) { 1003 DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode); 1004 p->cku_err.re_status = RPC_CANTDECODERES; 1005 p->cku_err.re_errno = EIO; 1006 goto done; 1007 } 1008 1009 /* 1010 * The server shouldn't have sent a RDMA_SEND that 1011 * the client needs to RDMA_WRITE a reply back to 1012 * the server. So silently ignoring what the 1013 * server returns in the rdma_reply section of the 1014 * header. 1015 */ 1016 (void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply); 1017 off = xdr_getpos(rdmahdr_i_xdrs); 1018 1019 clnt_decode_long_reply(conn, cl_long_reply, 1020 cl_rdma_reply, &replyxdr, &reply_xdrp, 1021 cl, cl_recvlist, op, off); 1022 1023 if (reply_xdrp == NULL) 1024 goto done; 1025 1026 if (wlist_exists_reply) { 1027 XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist); 1028 } 1029 1030 reply_msg.rm_direction = REPLY; 1031 reply_msg.rm_reply.rp_stat = MSG_ACCEPTED; 1032 reply_msg.acpted_rply.ar_stat = SUCCESS; 1033 reply_msg.acpted_rply.ar_verf = _null_auth; 1034 1035 /* 1036 * xdr_results will be done in AUTH_UNWRAP. 1037 */ 1038 reply_msg.acpted_rply.ar_results.where = NULL; 1039 reply_msg.acpted_rply.ar_results.proc = xdr_void; 1040 1041 /* 1042 * Decode and validate the response. 1043 */ 1044 if (xdr_replymsg(reply_xdrp, &reply_msg)) { 1045 enum clnt_stat re_status; 1046 1047 _seterr_reply(&reply_msg, &(p->cku_err)); 1048 1049 re_status = p->cku_err.re_status; 1050 if (re_status == RPC_SUCCESS) { 1051 /* 1052 * Reply is good, check auth. 1053 */ 1054 if (!AUTH_VALIDATE(h->cl_auth, 1055 &reply_msg.acpted_rply.ar_verf)) { 1056 p->cku_err.re_status = RPC_AUTHERROR; 1057 p->cku_err.re_why = AUTH_INVALIDRESP; 1058 RCSTAT_INCR(rcbadverfs); 1059 DTRACE_PROBE( 1060 krpc__e__clntrdma__callit__authvalidate); 1061 } else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp, 1062 xdr_results, resultsp)) { 1063 p->cku_err.re_status = RPC_CANTDECODERES; 1064 p->cku_err.re_errno = EIO; 1065 DTRACE_PROBE( 1066 krpc__e__clntrdma__callit__authunwrap); 1067 } 1068 } else { 1069 /* set errno in case we can't recover */ 1070 if (re_status != RPC_VERSMISMATCH && 1071 re_status != RPC_AUTHERROR && 1072 re_status != RPC_PROGVERSMISMATCH) 1073 p->cku_err.re_errno = EIO; 1074 1075 if (re_status == RPC_AUTHERROR) { 1076 if ((refresh_attempt > 0) && 1077 AUTH_REFRESH(h->cl_auth, &reply_msg, 1078 p->cku_cred)) { 1079 refresh_attempt--; 1080 try_call_again = 1; 1081 goto done; 1082 } 1083 1084 try_call_again = 0; 1085 1086 /* 1087 * We have used the client handle to 1088 * do an AUTH_REFRESH and the RPC status may 1089 * be set to RPC_SUCCESS; Let's make sure to 1090 * set it to RPC_AUTHERROR. 1091 */ 1092 p->cku_err.re_status = RPC_AUTHERROR; 1093 1094 /* 1095 * Map recoverable and unrecoverable 1096 * authentication errors to appropriate 1097 * errno 1098 */ 1099 switch (p->cku_err.re_why) { 1100 case AUTH_BADCRED: 1101 case AUTH_BADVERF: 1102 case AUTH_INVALIDRESP: 1103 case AUTH_TOOWEAK: 1104 case AUTH_FAILED: 1105 case RPCSEC_GSS_NOCRED: 1106 case RPCSEC_GSS_FAILED: 1107 p->cku_err.re_errno = EACCES; 1108 break; 1109 case AUTH_REJECTEDCRED: 1110 case AUTH_REJECTEDVERF: 1111 default: 1112 p->cku_err.re_errno = EIO; 1113 break; 1114 } 1115 } 1116 DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed, 1117 int, p->cku_err.re_why); 1118 } 1119 } else { 1120 p->cku_err.re_status = RPC_CANTDECODERES; 1121 p->cku_err.re_errno = EIO; 1122 DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg); 1123 } 1124 1125 done: 1126 clnt_return_credit(conn); 1127 1128 if (cl_sendlist != NULL) 1129 clist_free(cl_sendlist); 1130 1131 /* 1132 * If rpc reply is in a chunk, free it now. 1133 */ 1134 if (cl_long_reply) { 1135 (void) clist_deregister(conn, cl_long_reply); 1136 rdma_buf_free(conn, &cl_long_reply->rb_longbuf); 1137 clist_free(cl_long_reply); 1138 } 1139 1140 if (call_xdrp) 1141 XDR_DESTROY(call_xdrp); 1142 1143 if (rndup.rb_private) { 1144 rdma_buf_free(conn, &rndup); 1145 } 1146 1147 if (reply_xdrp) { 1148 (void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg); 1149 XDR_DESTROY(reply_xdrp); 1150 } 1151 1152 if (cl_rdma_reply) { 1153 clist_free(cl_rdma_reply); 1154 } 1155 1156 if (cl_recvlist) { 1157 rdma_buf_t recvmsg = {0}; 1158 recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3; 1159 recvmsg.type = RECV_BUFFER; 1160 RDMA_BUF_FREE(conn, &recvmsg); 1161 clist_free(cl_recvlist); 1162 } 1163 1164 RDMA_REL_CONN(conn); 1165 1166 if (try_call_again) 1167 goto call_again; 1168 1169 if (p->cku_err.re_status != RPC_SUCCESS) { 1170 RCSTAT_INCR(rcbadcalls); 1171 } 1172 return (p->cku_err.re_status); 1173 } 1174 1175 1176 static void 1177 clnt_decode_long_reply(CONN *conn, 1178 struct clist *cl_long_reply, 1179 struct clist *cl_rdma_reply, XDR *xdrs, 1180 XDR **rxdrp, struct clist *cl, 1181 struct clist *cl_recvlist, 1182 uint_t op, uint_t off) 1183 { 1184 if (op != RDMA_NOMSG) { 1185 DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len, 1186 int, cl_recvlist->c_len - off); 1187 xdrrdma_create(xdrs, 1188 (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off), 1189 cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn); 1190 *rxdrp = xdrs; 1191 return; 1192 } 1193 1194 /* op must be RDMA_NOMSG */ 1195 if (cl) { 1196 DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist); 1197 return; 1198 } 1199 1200 if (cl_long_reply->u.c_daddr) { 1201 DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len, 1202 int, cl_rdma_reply->c_len); 1203 1204 xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3, 1205 cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn); 1206 1207 *rxdrp = xdrs; 1208 } 1209 } 1210 1211 static void 1212 clnt_return_credit(CONN *conn) 1213 { 1214 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1215 1216 mutex_enter(&conn->c_lock); 1217 cc_info->clnt_cc_in_flight_ops--; 1218 cv_signal(&cc_info->clnt_cc_cv); 1219 mutex_exit(&conn->c_lock); 1220 } 1221 1222 static void 1223 clnt_update_credit(CONN *conn, uint32_t rdma_credit) 1224 { 1225 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1226 1227 /* 1228 * If the granted has not altered, avoid taking the 1229 * mutex, to essentially do nothing.. 1230 */ 1231 if (cc_info->clnt_cc_granted_ops == rdma_credit) 1232 return; 1233 /* 1234 * Get the granted number of buffers for credit control. 1235 */ 1236 mutex_enter(&conn->c_lock); 1237 cc_info->clnt_cc_granted_ops = rdma_credit; 1238 mutex_exit(&conn->c_lock); 1239 } 1240 1241 static void 1242 clnt_check_credit(CONN *conn) 1243 { 1244 rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc; 1245 1246 /* 1247 * Make sure we are not going over our allowed buffer use 1248 * (and make sure we have gotten a granted value before). 1249 */ 1250 mutex_enter(&conn->c_lock); 1251 while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops && 1252 cc_info->clnt_cc_granted_ops != 0) { 1253 /* 1254 * Client has maxed out its granted buffers due to 1255 * credit control. Current handling is to block and wait. 1256 */ 1257 cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock); 1258 } 1259 cc_info->clnt_cc_in_flight_ops++; 1260 mutex_exit(&conn->c_lock); 1261 } 1262 1263 /* ARGSUSED */ 1264 static void 1265 clnt_rdma_kabort(CLIENT *h) 1266 { 1267 } 1268 1269 static void 1270 clnt_rdma_kerror(CLIENT *h, struct rpc_err *err) 1271 { 1272 struct cku_private *p = htop(h); 1273 *err = p->cku_err; 1274 } 1275 1276 static bool_t 1277 clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr) 1278 { 1279 struct cku_private *p = htop(h); 1280 XDR *xdrs; 1281 1282 xdrs = &(p->cku_outxdr); 1283 xdrs->x_op = XDR_FREE; 1284 return ((*xdr_res)(xdrs, res_ptr)); 1285 } 1286 1287 /* ARGSUSED */ 1288 static bool_t 1289 clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg) 1290 { 1291 return (TRUE); 1292 } 1293 1294 /* ARGSUSED */ 1295 static int 1296 clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all, 1297 int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg, 1298 uint32_t xid) 1299 { 1300 RCSTAT_INCR(rctimers); 1301 return (0); 1302 } 1303 1304 int 1305 rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf) 1306 { 1307 rdma_registry_t *rp; 1308 void *handle = NULL; 1309 struct knetconfig *knc; 1310 char *pf, *p; 1311 rdma_stat status; 1312 int error = 0; 1313 1314 if (!INGLOBALZONE(curproc)) 1315 return (-1); 1316 1317 /* 1318 * modload the RDMA plugins if not already done. 1319 */ 1320 if (!rdma_modloaded) { 1321 mutex_enter(&rdma_modload_lock); 1322 if (!rdma_modloaded) { 1323 error = rdma_modload(); 1324 } 1325 mutex_exit(&rdma_modload_lock); 1326 if (error) 1327 return (-1); 1328 } 1329 1330 if (!rdma_dev_available) 1331 return (-1); 1332 1333 rw_enter(&rdma_lock, RW_READER); 1334 rp = rdma_mod_head; 1335 while (rp != NULL) { 1336 if (rp->r_mod_state == RDMA_MOD_INACTIVE) { 1337 rp = rp->r_next; 1338 continue; 1339 } 1340 status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr, 1341 &handle); 1342 if (status == RDMA_SUCCESS) { 1343 knc = kmem_zalloc(sizeof (struct knetconfig), 1344 KM_SLEEP); 1345 knc->knc_semantics = NC_TPI_RDMA; 1346 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1347 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP); 1348 if (addr_type == AF_INET) 1349 (void) strncpy(pf, NC_INET, KNC_STRSIZE); 1350 else if (addr_type == AF_INET6) 1351 (void) strncpy(pf, NC_INET6, KNC_STRSIZE); 1352 pf[KNC_STRSIZE - 1] = '\0'; 1353 1354 (void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE); 1355 p[KNC_STRSIZE - 1] = '\0'; 1356 1357 knc->knc_protofmly = pf; 1358 knc->knc_proto = p; 1359 knc->knc_rdev = (dev_t)rp; 1360 *knconf = knc; 1361 rw_exit(&rdma_lock); 1362 return (0); 1363 } 1364 rp = rp->r_next; 1365 } 1366 rw_exit(&rdma_lock); 1367 return (-1); 1368 } 1369