1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2007, The Ohio State University. All rights reserved. 28 * 29 * Portions of this source code is developed by the team members of 30 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 31 * headed by Professor Dhabaleswar K. (DK) Panda. 32 * 33 * Acknowledgements to contributions from developors: 34 * Ranjit Noronha: noronha@cse.ohio-state.edu 35 * Lei Chai : chail@cse.ohio-state.edu 36 * Weikuan Yu : yuw@cse.ohio-state.edu 37 * 38 */ 39 40 #include <sys/systm.h> 41 #include <sys/kstat.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #include <rpc/rpc_rdma.h> 45 46 #include <sys/ib/ibtl/ibti.h> 47 48 uint_t rdma_minchunk = RDMA_MINCHUNK; 49 50 /* 51 * Globals 52 */ 53 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */ 54 int rdma_dev_available = 0; /* if any RDMA device is loaded */ 55 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ 56 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */ 57 krwlock_t rdma_lock; /* protects rdma_mod_head list */ 58 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */ 59 60 kmem_cache_t *clist_cache = NULL; 61 62 /* 63 * Statics 64 */ 65 static ldi_handle_t rpcib_handle = NULL; 66 67 /* 68 * Externs 69 */ 70 extern kstat_named_t *rdmarcstat_ptr; 71 extern uint_t rdmarcstat_ndata; 72 extern kstat_named_t *rdmarsstat_ptr; 73 extern uint_t rdmarsstat_ndata; 74 75 void rdma_kstat_init(); 76 77 /* 78 * RDMATF module registration routine. 79 * This routine is expected to be called by the init routine in 80 * the plugin modules. 81 */ 82 rdma_stat 83 rdma_register_mod(rdma_mod_t *mod) 84 { 85 rdma_registry_t **mp, *m; 86 87 if (mod->rdma_version != RDMATF_VERS) { 88 return (RDMA_BADVERS); 89 } 90 91 rw_enter(&rdma_lock, RW_WRITER); 92 /* 93 * Ensure not already registered 94 */ 95 mp = &rdma_mod_head; 96 while (*mp != NULL) { 97 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api, 98 KNC_STRSIZE) == 0) { 99 rw_exit(&rdma_lock); 100 return (RDMA_REG_EXIST); 101 } 102 mp = &((*mp)->r_next); 103 } 104 105 /* 106 * New one, create and add to registry 107 */ 108 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP); 109 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP); 110 *m->r_mod = *mod; 111 m->r_next = NULL; 112 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 113 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE); 114 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0'; 115 *mp = m; 116 rw_exit(&rdma_lock); 117 118 return (RDMA_SUCCESS); 119 } 120 121 /* 122 * RDMATF module unregistration routine. 123 * This routine is expected to be called by the fini routine in 124 * the plugin modules. 125 */ 126 rdma_stat 127 rdma_unregister_mod(rdma_mod_t *mod) 128 { 129 rdma_registry_t **m, *mmod = NULL; 130 131 rw_enter(&rdma_lock, RW_WRITER); 132 133 m = &rdma_mod_head; 134 while (*m != NULL) { 135 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api, 136 KNC_STRSIZE) != 0) { 137 m = &((*m)->r_next); 138 continue; 139 } 140 /* 141 * Check if any device attached, if so return error 142 */ 143 if ((*m)->r_mod->rdma_count != 0) { 144 rw_exit(&rdma_lock); 145 return (RDMA_FAILED); 146 } 147 /* 148 * Found entry. Now remove it. 149 */ 150 mmod = *m; 151 *m = (*m)->r_next; 152 kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE); 153 kmem_free(mmod->r_mod, sizeof (rdma_mod_t)); 154 kmem_free(mmod, sizeof (rdma_registry_t)); 155 rw_exit(&rdma_lock); 156 return (RDMA_SUCCESS); 157 } 158 159 /* 160 * Not found. 161 */ 162 rw_exit(&rdma_lock); 163 return (RDMA_FAILED); 164 } 165 166 struct clist * 167 clist_alloc(void) 168 { 169 struct clist *clp; 170 171 clp = kmem_cache_alloc(clist_cache, KM_SLEEP); 172 173 bzero(clp, sizeof (*clp)); 174 175 return (clp); 176 } 177 178 /* 179 * Creates a new chunk list entry, and 180 * adds it to the end of a chunk list. 181 */ 182 void 183 clist_add(struct clist **clp, uint32_t xdroff, int len, 184 struct mrc *shandle, caddr_t saddr, 185 struct mrc *dhandle, caddr_t daddr) 186 { 187 struct clist *cl; 188 189 /* Find the end of the list */ 190 191 while (*clp != NULL) 192 clp = &((*clp)->c_next); 193 194 cl = clist_alloc(); 195 cl->c_xdroff = xdroff; 196 cl->c_len = len; 197 cl->w.c_saddr = (uint64_t)(uintptr_t)saddr; 198 if (shandle) 199 cl->c_smemhandle = *shandle; 200 cl->u.c_daddr = (uint64_t)(uintptr_t)daddr; 201 if (dhandle) 202 cl->c_dmemhandle = *dhandle; 203 cl->c_next = NULL; 204 205 *clp = cl; 206 } 207 208 rdma_stat 209 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) 210 { 211 struct clist *c; 212 int status; 213 214 for (c = cl; c; c = c->c_next) { 215 if (c->c_len <= 0) 216 continue; 217 switch (dstsrc) { 218 case CLIST_REG_SOURCE: 219 status = RDMA_REGMEMSYNC(conn, 220 (caddr_t)(struct as *)cl->c_adspc, 221 (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len, 222 &c->c_smemhandle, (void **)&c->c_ssynchandle, 223 (void *)c->rb_longbuf.rb_private); 224 break; 225 case CLIST_REG_DST: 226 status = RDMA_REGMEMSYNC(conn, 227 (caddr_t)(struct as *)cl->c_adspc, 228 (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len, 229 &c->c_dmemhandle, (void **)&c->c_dsynchandle, 230 (void *)c->rb_longbuf.rb_private); 231 break; 232 default: 233 return (RDMA_INVAL); 234 } 235 if (status != RDMA_SUCCESS) { 236 (void) clist_deregister(conn, cl, dstsrc); 237 return (status); 238 } 239 } 240 241 return (RDMA_SUCCESS); 242 } 243 244 rdma_stat 245 clist_deregister(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) 246 { 247 struct clist *c; 248 249 for (c = cl; c; c = c->c_next) { 250 switch (dstsrc) { 251 case CLIST_REG_SOURCE: 252 if (c->c_smemhandle.mrc_rmr != 0) { 253 (void) RDMA_DEREGMEMSYNC(conn, 254 (caddr_t)(uintptr_t)c->w.c_saddr3, 255 c->c_smemhandle, 256 (void *)(uintptr_t)c->c_ssynchandle, 257 (void *)c->rb_longbuf.rb_private); 258 c->c_smemhandle.mrc_rmr = 0; 259 c->c_ssynchandle = NULL; 260 } 261 break; 262 case CLIST_REG_DST: 263 if (c->c_dmemhandle.mrc_rmr != 0) { 264 (void) RDMA_DEREGMEMSYNC(conn, 265 (caddr_t)(uintptr_t)c->u.c_daddr3, 266 c->c_dmemhandle, 267 (void *)(uintptr_t)c->c_dsynchandle, 268 (void *)c->rb_longbuf.rb_private); 269 c->c_dmemhandle.mrc_rmr = 0; 270 c->c_dsynchandle = NULL; 271 } 272 break; 273 default: 274 return (RDMA_INVAL); 275 } 276 } 277 278 return (RDMA_SUCCESS); 279 } 280 281 rdma_stat 282 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) 283 { 284 struct clist *c; 285 rdma_stat status; 286 287 c = cl; 288 switch (dstsrc) { 289 case CLIST_REG_SOURCE: 290 while (c != NULL) { 291 if (c->c_ssynchandle) { 292 status = RDMA_SYNCMEM(conn, 293 (void *)(uintptr_t)c->c_ssynchandle, 294 (caddr_t)(uintptr_t)c->w.c_saddr3, 295 c->c_len, 0); 296 if (status != RDMA_SUCCESS) 297 return (status); 298 } 299 c = c->c_next; 300 } 301 break; 302 case CLIST_REG_DST: 303 while (c != NULL) { 304 if (c->c_ssynchandle) { 305 status = RDMA_SYNCMEM(conn, 306 (void *)(uintptr_t)c->c_dsynchandle, 307 (caddr_t)(uintptr_t)c->u.c_daddr3, 308 c->c_len, 1); 309 if (status != RDMA_SUCCESS) 310 return (status); 311 } 312 c = c->c_next; 313 } 314 break; 315 default: 316 return (RDMA_INVAL); 317 } 318 319 return (RDMA_SUCCESS); 320 } 321 322 /* 323 * Frees up entries in chunk list 324 */ 325 void 326 clist_free(struct clist *cl) 327 { 328 struct clist *c = cl; 329 330 while (c != NULL) { 331 cl = cl->c_next; 332 kmem_cache_free(clist_cache, c); 333 c = cl; 334 } 335 } 336 337 rdma_stat 338 rdma_clnt_postrecv(CONN *conn, uint32_t xid) 339 { 340 struct clist *cl = NULL; 341 rdma_stat retval; 342 rdma_buf_t rbuf = {0}; 343 344 rbuf.type = RECV_BUFFER; 345 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 346 return (RDMA_NORESOURCE); 347 } 348 349 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 350 NULL, NULL); 351 retval = RDMA_CLNT_RECVBUF(conn, cl, xid); 352 clist_free(cl); 353 354 return (retval); 355 } 356 357 rdma_stat 358 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid) 359 { 360 return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid)); 361 } 362 363 rdma_stat 364 rdma_svc_postrecv(CONN *conn) 365 { 366 struct clist *cl = NULL; 367 rdma_stat retval; 368 rdma_buf_t rbuf = {0}; 369 370 rbuf.type = RECV_BUFFER; 371 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 372 retval = RDMA_NORESOURCE; 373 } else { 374 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 375 NULL, NULL); 376 retval = RDMA_SVC_RECVBUF(conn, cl); 377 clist_free(cl); 378 } 379 return (retval); 380 } 381 382 rdma_stat 383 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf) 384 { 385 return (RDMA_BUF_ALLOC(conn, rbuf)); 386 } 387 388 void 389 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf) 390 { 391 if (!rbuf || rbuf->addr == NULL) { 392 return; 393 } 394 RDMA_BUF_FREE(conn, rbuf); 395 bzero(rbuf, sizeof (rdma_buf_t)); 396 } 397 398 /* 399 * Caller is holding rdma_modload_lock mutex 400 */ 401 int 402 rdma_modload() 403 { 404 int status; 405 ASSERT(MUTEX_HELD(&rdma_modload_lock)); 406 /* 407 * Load all available RDMA plugins which right now is only IB plugin. 408 * If no IB hardware is present, then quit right away. 409 * ENODEV -- For no device on the system 410 * EPROTONOSUPPORT -- For module not avilable either due to failure to 411 * load or some other reason. 412 */ 413 rdma_modloaded = 1; 414 if (ibt_hw_is_present() == 0) { 415 rdma_dev_available = 0; 416 return (ENODEV); 417 } 418 419 rdma_dev_available = 1; 420 if (rpcmod_li == NULL) 421 return (EPROTONOSUPPORT); 422 423 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib", 424 FREAD | FWRITE, kcred, 425 &rpcib_handle, rpcmod_li); 426 if (status != 0) 427 return (EPROTONOSUPPORT); 428 429 /* success */ 430 rdma_kstat_init(); 431 432 clist_cache = kmem_cache_create("rdma_clist", 433 sizeof (struct clist), _POINTER_ALIGNMENT, NULL, 434 NULL, NULL, NULL, 0, 0); 435 436 return (0); 437 } 438 439 void 440 rdma_kstat_init(void) 441 { 442 kstat_t *ksp; 443 444 /* 445 * The RDMA framework doesn't know how to deal with Zones, and is 446 * only available in the global zone. 447 */ 448 ASSERT(INGLOBALZONE(curproc)); 449 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc", 450 KSTAT_TYPE_NAMED, rdmarcstat_ndata, 451 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 452 if (ksp) { 453 ksp->ks_data = (void *) rdmarcstat_ptr; 454 kstat_install(ksp); 455 } 456 457 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc", 458 KSTAT_TYPE_NAMED, rdmarsstat_ndata, 459 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 460 if (ksp) { 461 ksp->ks_data = (void *) rdmarsstat_ptr; 462 kstat_install(ksp); 463 } 464 } 465