1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2008, The Ohio State University. All rights reserved. 28 * 29 * Portions of this source code is developed by the team members of 30 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 31 * headed by Professor Dhabaleswar K. (DK) Panda. 32 * 33 * Acknowledgements to contributions from developors: 34 * Ranjit Noronha: noronha@cse.ohio-state.edu 35 * Lei Chai : chail@cse.ohio-state.edu 36 * Weikuan Yu : yuw@cse.ohio-state.edu 37 * 38 */ 39 40 #include <sys/systm.h> 41 #include <sys/kstat.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #include <rpc/rpc_rdma.h> 45 46 #include <sys/ib/ibtl/ibti.h> 47 48 uint_t rdma_minchunk = RDMA_MINCHUNK; 49 50 /* 51 * Globals 52 */ 53 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */ 54 int rdma_dev_available = 0; /* if any RDMA device is loaded */ 55 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ 56 57 rdma_svc_wait_t rdma_wait; 58 59 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */ 60 krwlock_t rdma_lock; /* protects rdma_mod_head list */ 61 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */ 62 63 kmem_cache_t *clist_cache = NULL; 64 65 /* 66 * Statics 67 */ 68 ldi_handle_t rpcib_handle = NULL; 69 70 /* 71 * Externs 72 */ 73 extern kstat_named_t *rdmarcstat_ptr; 74 extern uint_t rdmarcstat_ndata; 75 extern kstat_named_t *rdmarsstat_ptr; 76 extern uint_t rdmarsstat_ndata; 77 78 void rdma_kstat_init(); 79 80 /* 81 * RDMATF module registration routine. 82 * This routine is expected to be called by the init routine in 83 * the plugin modules. 84 */ 85 rdma_stat 86 rdma_register_mod(rdma_mod_t *mod) 87 { 88 rdma_registry_t **mp, *m; 89 90 if (mod->rdma_version != RDMATF_VERS) { 91 return (RDMA_BADVERS); 92 } 93 94 rw_enter(&rdma_lock, RW_WRITER); 95 /* 96 * Ensure not already registered 97 */ 98 mp = &rdma_mod_head; 99 while (*mp != NULL) { 100 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api, 101 KNC_STRSIZE) == 0) { 102 if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) { 103 (*mp)->r_mod_state = RDMA_MOD_ACTIVE; 104 (*mp)->r_mod->rdma_ops = mod->rdma_ops; 105 (*mp)->r_mod->rdma_count = mod->rdma_count; 106 goto announce_hca; 107 } 108 rw_exit(&rdma_lock); 109 return (RDMA_REG_EXIST); 110 } 111 mp = &((*mp)->r_next); 112 } 113 114 /* 115 * New one, create and add to registry 116 */ 117 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP); 118 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP); 119 *m->r_mod = *mod; 120 m->r_next = NULL; 121 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 122 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE); 123 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0'; 124 m->r_mod_state = RDMA_MOD_ACTIVE; 125 *mp = m; 126 127 announce_hca: 128 rw_exit(&rdma_lock); 129 /* 130 * Start the nfs service on the rdma xprts. 131 * (this notification mechanism will need to change when we support 132 * multiple hcas and have support for multiple rdma plugins). 133 */ 134 mutex_enter(&rdma_wait.svc_lock); 135 rdma_wait.svc_stat = RDMA_HCA_ATTACH; 136 cv_signal(&rdma_wait.svc_cv); 137 mutex_exit(&rdma_wait.svc_lock); 138 139 return (RDMA_SUCCESS); 140 } 141 142 /* 143 * RDMATF module unregistration routine. 144 * This routine is expected to be called by the fini routine in 145 * the plugin modules. 146 */ 147 rdma_stat 148 rdma_unregister_mod(rdma_mod_t *mod) 149 { 150 rdma_registry_t **m, *mmod = NULL; 151 152 rw_enter(&rdma_lock, RW_WRITER); 153 154 m = &rdma_mod_head; 155 while (*m != NULL) { 156 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api, 157 KNC_STRSIZE) != 0) { 158 m = &((*m)->r_next); 159 continue; 160 } 161 /* 162 * Check if any device attached, if so return error 163 */ 164 if (mod->rdma_count != 0) { 165 rw_exit(&rdma_lock); 166 return (RDMA_FAILED); 167 } 168 /* 169 * Found entry. Mark it inactive. 170 */ 171 mmod = *m; 172 mmod->r_mod->rdma_count = 0; 173 mmod->r_mod_state = RDMA_MOD_INACTIVE; 174 break; 175 } 176 177 rdma_modloaded = 0; 178 rdma_dev_available = 0; 179 rw_exit(&rdma_lock); 180 181 /* 182 * Stop the nfs service running on the rdma xprts. 183 * (this notification mechanism will need to change when we support 184 * multiple hcas and have support for multiple rdma plugins). 185 */ 186 mutex_enter(&rdma_wait.svc_lock); 187 rdma_wait.svc_stat = RDMA_HCA_DETACH; 188 cv_signal(&rdma_wait.svc_cv); 189 mutex_exit(&rdma_wait.svc_lock); 190 191 /* 192 * Not found. 193 */ 194 return (RDMA_SUCCESS); 195 } 196 197 struct clist * 198 clist_alloc(void) 199 { 200 struct clist *clp; 201 202 clp = kmem_cache_alloc(clist_cache, KM_SLEEP); 203 204 bzero(clp, sizeof (*clp)); 205 206 return (clp); 207 } 208 209 uint32_t 210 clist_len(struct clist *cl) 211 { 212 uint32_t len = 0; 213 while (cl) { 214 len += cl->c_len; 215 cl = cl->c_next; 216 } 217 return (len); 218 } 219 220 void 221 clist_zero_len(struct clist *cl) 222 { 223 while (cl != NULL) { 224 if (cl->c_dmemhandle.mrc_rmr == 0) 225 break; 226 cl->c_len = 0; 227 cl = cl->c_next; 228 } 229 } 230 231 /* 232 * Creates a new chunk list entry, and 233 * adds it to the end of a chunk list. 234 */ 235 void 236 clist_add(struct clist **clp, uint32_t xdroff, int len, 237 struct mrc *shandle, caddr_t saddr, 238 struct mrc *dhandle, caddr_t daddr) 239 { 240 struct clist *cl; 241 242 /* Find the end of the list */ 243 244 while (*clp != NULL) 245 clp = &((*clp)->c_next); 246 247 cl = clist_alloc(); 248 cl->c_xdroff = xdroff; 249 cl->c_len = len; 250 cl->w.c_saddr = (uint64_t)(uintptr_t)saddr; 251 if (shandle) 252 cl->c_smemhandle = *shandle; 253 cl->u.c_daddr = (uint64_t)(uintptr_t)daddr; 254 if (dhandle) 255 cl->c_dmemhandle = *dhandle; 256 cl->c_next = NULL; 257 258 *clp = cl; 259 } 260 261 rdma_stat 262 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) 263 { 264 struct clist *c; 265 int status; 266 267 for (c = cl; c; c = c->c_next) { 268 if (c->c_len <= 0) 269 continue; 270 271 c->c_regtype = dstsrc; 272 273 switch (dstsrc) { 274 case CLIST_REG_SOURCE: 275 status = RDMA_REGMEMSYNC(conn, 276 (caddr_t)(struct as *)c->c_adspc, 277 (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len, 278 &c->c_smemhandle, (void **)&c->c_ssynchandle, 279 (void *)c->rb_longbuf.rb_private); 280 break; 281 case CLIST_REG_DST: 282 status = RDMA_REGMEMSYNC(conn, 283 (caddr_t)(struct as *)c->c_adspc, 284 (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len, 285 &c->c_dmemhandle, (void **)&c->c_dsynchandle, 286 (void *)c->rb_longbuf.rb_private); 287 break; 288 default: 289 return (RDMA_INVAL); 290 } 291 if (status != RDMA_SUCCESS) { 292 (void) clist_deregister(conn, cl); 293 return (status); 294 } 295 } 296 297 return (RDMA_SUCCESS); 298 } 299 300 rdma_stat 301 clist_deregister(CONN *conn, struct clist *cl) 302 { 303 struct clist *c; 304 305 for (c = cl; c; c = c->c_next) { 306 switch (c->c_regtype) { 307 case CLIST_REG_SOURCE: 308 if (c->c_smemhandle.mrc_rmr != 0) { 309 (void) RDMA_DEREGMEMSYNC(conn, 310 (caddr_t)(uintptr_t)c->w.c_saddr3, 311 c->c_smemhandle, 312 (void *)(uintptr_t)c->c_ssynchandle, 313 (void *)c->rb_longbuf.rb_private); 314 c->c_smemhandle.mrc_rmr = 0; 315 c->c_ssynchandle = 0; 316 } 317 break; 318 case CLIST_REG_DST: 319 if (c->c_dmemhandle.mrc_rmr != 0) { 320 (void) RDMA_DEREGMEMSYNC(conn, 321 (caddr_t)(uintptr_t)c->u.c_daddr3, 322 c->c_dmemhandle, 323 (void *)(uintptr_t)c->c_dsynchandle, 324 (void *)c->rb_longbuf.rb_private); 325 c->c_dmemhandle.mrc_rmr = 0; 326 c->c_dsynchandle = 0; 327 } 328 break; 329 default: 330 /* clist unregistered. continue */ 331 break; 332 } 333 } 334 335 return (RDMA_SUCCESS); 336 } 337 338 rdma_stat 339 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) 340 { 341 struct clist *c; 342 rdma_stat status; 343 344 c = cl; 345 switch (dstsrc) { 346 case CLIST_REG_SOURCE: 347 while (c != NULL) { 348 if (c->c_ssynchandle) { 349 status = RDMA_SYNCMEM(conn, 350 (void *)(uintptr_t)c->c_ssynchandle, 351 (caddr_t)(uintptr_t)c->w.c_saddr3, 352 c->c_len, 0); 353 if (status != RDMA_SUCCESS) 354 return (status); 355 } 356 c = c->c_next; 357 } 358 break; 359 case CLIST_REG_DST: 360 while (c != NULL) { 361 if (c->c_ssynchandle) { 362 status = RDMA_SYNCMEM(conn, 363 (void *)(uintptr_t)c->c_dsynchandle, 364 (caddr_t)(uintptr_t)c->u.c_daddr3, 365 c->c_len, 1); 366 if (status != RDMA_SUCCESS) 367 return (status); 368 } 369 c = c->c_next; 370 } 371 break; 372 default: 373 return (RDMA_INVAL); 374 } 375 376 return (RDMA_SUCCESS); 377 } 378 379 /* 380 * Frees up entries in chunk list 381 */ 382 void 383 clist_free(struct clist *cl) 384 { 385 struct clist *c = cl; 386 387 while (c != NULL) { 388 cl = cl->c_next; 389 kmem_cache_free(clist_cache, c); 390 c = cl; 391 } 392 } 393 394 rdma_stat 395 rdma_clnt_postrecv(CONN *conn, uint32_t xid) 396 { 397 struct clist *cl = NULL; 398 rdma_stat retval; 399 rdma_buf_t rbuf = {0}; 400 401 rbuf.type = RECV_BUFFER; 402 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 403 return (RDMA_NORESOURCE); 404 } 405 406 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 407 NULL, NULL); 408 retval = RDMA_CLNT_RECVBUF(conn, cl, xid); 409 clist_free(cl); 410 411 return (retval); 412 } 413 414 rdma_stat 415 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid) 416 { 417 return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid)); 418 } 419 420 rdma_stat 421 rdma_svc_postrecv(CONN *conn) 422 { 423 struct clist *cl = NULL; 424 rdma_stat retval; 425 rdma_buf_t rbuf = {0}; 426 427 rbuf.type = RECV_BUFFER; 428 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 429 retval = RDMA_NORESOURCE; 430 } else { 431 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 432 NULL, NULL); 433 retval = RDMA_SVC_RECVBUF(conn, cl); 434 clist_free(cl); 435 } 436 return (retval); 437 } 438 439 rdma_stat 440 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf) 441 { 442 return (RDMA_BUF_ALLOC(conn, rbuf)); 443 } 444 445 void 446 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf) 447 { 448 if (!rbuf || rbuf->addr == NULL) { 449 return; 450 } 451 RDMA_BUF_FREE(conn, rbuf); 452 bzero(rbuf, sizeof (rdma_buf_t)); 453 } 454 455 /* 456 * Caller is holding rdma_modload_lock mutex 457 */ 458 int 459 rdma_modload() 460 { 461 int status; 462 ASSERT(MUTEX_HELD(&rdma_modload_lock)); 463 /* 464 * Load all available RDMA plugins which right now is only IB plugin. 465 * If no IB hardware is present, then quit right away. 466 * ENODEV -- For no device on the system 467 * EPROTONOSUPPORT -- For module not avilable either due to failure to 468 * load or some other reason. 469 */ 470 rdma_modloaded = 1; 471 if (ibt_hw_is_present() == 0) { 472 rdma_dev_available = 0; 473 return (ENODEV); 474 } 475 476 rdma_dev_available = 1; 477 if (rpcmod_li == NULL) 478 return (EPROTONOSUPPORT); 479 480 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib", 481 FREAD | FWRITE, kcred, 482 &rpcib_handle, rpcmod_li); 483 484 if (status != 0) 485 return (EPROTONOSUPPORT); 486 487 488 /* 489 * We will need to reload the plugin module after it was unregistered 490 * but the resources below need to allocated only the first time. 491 */ 492 if (!clist_cache) { 493 clist_cache = kmem_cache_create("rdma_clist", 494 sizeof (struct clist), _POINTER_ALIGNMENT, NULL, 495 NULL, NULL, NULL, 0, 0); 496 rdma_kstat_init(); 497 } 498 499 (void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred); 500 501 return (0); 502 } 503 504 void 505 rdma_kstat_init(void) 506 { 507 kstat_t *ksp; 508 509 /* 510 * The RDMA framework doesn't know how to deal with Zones, and is 511 * only available in the global zone. 512 */ 513 ASSERT(INGLOBALZONE(curproc)); 514 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc", 515 KSTAT_TYPE_NAMED, rdmarcstat_ndata, 516 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 517 if (ksp) { 518 ksp->ks_data = (void *) rdmarcstat_ptr; 519 kstat_install(ksp); 520 } 521 522 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc", 523 KSTAT_TYPE_NAMED, rdmarsstat_ndata, 524 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 525 if (ksp) { 526 ksp->ks_data = (void *) rdmarsstat_ptr; 527 kstat_install(ksp); 528 } 529 } 530 531 rdma_stat 532 rdma_kwait(void) 533 { 534 int ret; 535 rdma_stat stat; 536 537 mutex_enter(&rdma_wait.svc_lock); 538 539 ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock); 540 541 /* 542 * If signalled by a hca attach/detach, pass the right 543 * stat back. 544 */ 545 546 if (ret) 547 stat = rdma_wait.svc_stat; 548 else 549 stat = RDMA_INTR; 550 551 mutex_exit(&rdma_wait.svc_lock); 552 553 return (stat); 554 } 555