1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2008, The Ohio State University. All rights reserved.
28 *
29 * Portions of this source code is developed by the team members of
30 * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31 * headed by Professor Dhabaleswar K. (DK) Panda.
32 *
33 * Acknowledgements to contributions from developors:
34 * Ranjit Noronha: noronha@cse.ohio-state.edu
35 * Lei Chai : chail@cse.ohio-state.edu
36 * Weikuan Yu : yuw@cse.ohio-state.edu
37 *
38 */
39
40 #include <sys/systm.h>
41 #include <sys/kstat.h>
42 #include <sys/modctl.h>
43 #include <sys/sdt.h>
44 #include <rpc/rpc_rdma.h>
45
46 #include <sys/ib/ibtl/ibti.h>
47
48 uint_t rdma_minchunk = RDMA_MINCHUNK;
49
50 /*
51 * Globals
52 */
53 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */
54 int rdma_dev_available = 0; /* if any RDMA device is loaded */
55 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
56
57 rdma_svc_wait_t rdma_wait;
58
59 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */
60 krwlock_t rdma_lock; /* protects rdma_mod_head list */
61 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */
62
63 kmem_cache_t *clist_cache = NULL;
64
65 /*
66 * Statics
67 */
68 ldi_handle_t rpcib_handle = NULL;
69
70 /*
71 * Externs
72 */
73 extern kstat_named_t *rdmarcstat_ptr;
74 extern uint_t rdmarcstat_ndata;
75 extern kstat_named_t *rdmarsstat_ptr;
76 extern uint_t rdmarsstat_ndata;
77
78 void rdma_kstat_init();
79
80 /*
81 * RDMATF module registration routine.
82 * This routine is expected to be called by the init routine in
83 * the plugin modules.
84 */
85 rdma_stat
rdma_register_mod(rdma_mod_t * mod)86 rdma_register_mod(rdma_mod_t *mod)
87 {
88 rdma_registry_t **mp, *m;
89
90 if (mod->rdma_version != RDMATF_VERS) {
91 return (RDMA_BADVERS);
92 }
93
94 rw_enter(&rdma_lock, RW_WRITER);
95 /*
96 * Ensure not already registered
97 */
98 mp = &rdma_mod_head;
99 while (*mp != NULL) {
100 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
101 KNC_STRSIZE) == 0) {
102 if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) {
103 (*mp)->r_mod_state = RDMA_MOD_ACTIVE;
104 (*mp)->r_mod->rdma_ops = mod->rdma_ops;
105 (*mp)->r_mod->rdma_count = mod->rdma_count;
106 goto announce_hca;
107 }
108 rw_exit(&rdma_lock);
109 return (RDMA_REG_EXIST);
110 }
111 mp = &((*mp)->r_next);
112 }
113
114 /*
115 * New one, create and add to registry
116 */
117 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
118 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
119 *m->r_mod = *mod;
120 m->r_next = NULL;
121 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
122 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
123 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
124 m->r_mod_state = RDMA_MOD_ACTIVE;
125 *mp = m;
126
127 announce_hca:
128 rw_exit(&rdma_lock);
129 /*
130 * Start the nfs service on the rdma xprts.
131 * (this notification mechanism will need to change when we support
132 * multiple hcas and have support for multiple rdma plugins).
133 */
134 mutex_enter(&rdma_wait.svc_lock);
135 rdma_wait.svc_stat = RDMA_HCA_ATTACH;
136 cv_signal(&rdma_wait.svc_cv);
137 mutex_exit(&rdma_wait.svc_lock);
138
139 return (RDMA_SUCCESS);
140 }
141
142 /*
143 * RDMATF module unregistration routine.
144 * This routine is expected to be called by the fini routine in
145 * the plugin modules.
146 */
147 rdma_stat
rdma_unregister_mod(rdma_mod_t * mod)148 rdma_unregister_mod(rdma_mod_t *mod)
149 {
150 rdma_registry_t **m, *mmod = NULL;
151
152 rw_enter(&rdma_lock, RW_WRITER);
153
154 m = &rdma_mod_head;
155 while (*m != NULL) {
156 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
157 KNC_STRSIZE) != 0) {
158 m = &((*m)->r_next);
159 continue;
160 }
161 /*
162 * Check if any device attached, if so return error
163 */
164 if (mod->rdma_count != 0) {
165 rw_exit(&rdma_lock);
166 return (RDMA_FAILED);
167 }
168 /*
169 * Found entry. Mark it inactive.
170 */
171 mmod = *m;
172 mmod->r_mod->rdma_count = 0;
173 mmod->r_mod_state = RDMA_MOD_INACTIVE;
174 break;
175 }
176
177 rdma_modloaded = 0;
178 rdma_dev_available = 0;
179 rw_exit(&rdma_lock);
180
181 /*
182 * Stop the nfs service running on the rdma xprts.
183 * (this notification mechanism will need to change when we support
184 * multiple hcas and have support for multiple rdma plugins).
185 */
186 mutex_enter(&rdma_wait.svc_lock);
187 rdma_wait.svc_stat = RDMA_HCA_DETACH;
188 cv_signal(&rdma_wait.svc_cv);
189 mutex_exit(&rdma_wait.svc_lock);
190
191 /*
192 * Not found.
193 */
194 return (RDMA_SUCCESS);
195 }
196
197 struct clist *
clist_alloc(void)198 clist_alloc(void)
199 {
200 struct clist *clp;
201
202 clp = kmem_cache_alloc(clist_cache, KM_SLEEP);
203
204 bzero(clp, sizeof (*clp));
205
206 return (clp);
207 }
208
209 uint32_t
clist_len(struct clist * cl)210 clist_len(struct clist *cl)
211 {
212 uint32_t len = 0;
213 while (cl) {
214 len += cl->c_len;
215 cl = cl->c_next;
216 }
217 return (len);
218 }
219
220 void
clist_zero_len(struct clist * cl)221 clist_zero_len(struct clist *cl)
222 {
223 while (cl != NULL) {
224 if (cl->c_dmemhandle.mrc_rmr == 0)
225 break;
226 cl->c_len = 0;
227 cl = cl->c_next;
228 }
229 }
230
231 /*
232 * Creates a new chunk list entry, and
233 * adds it to the end of a chunk list.
234 */
235 void
clist_add(struct clist ** clp,uint32_t xdroff,int len,struct mrc * shandle,caddr_t saddr,struct mrc * dhandle,caddr_t daddr)236 clist_add(struct clist **clp, uint32_t xdroff, int len,
237 struct mrc *shandle, caddr_t saddr,
238 struct mrc *dhandle, caddr_t daddr)
239 {
240 struct clist *cl;
241
242 /* Find the end of the list */
243
244 while (*clp != NULL)
245 clp = &((*clp)->c_next);
246
247 cl = clist_alloc();
248 cl->c_xdroff = xdroff;
249 cl->c_len = len;
250 cl->w.c_saddr = (uint64_t)(uintptr_t)saddr;
251 if (shandle)
252 cl->c_smemhandle = *shandle;
253 cl->u.c_daddr = (uint64_t)(uintptr_t)daddr;
254 if (dhandle)
255 cl->c_dmemhandle = *dhandle;
256 cl->c_next = NULL;
257
258 *clp = cl;
259 }
260
261 rdma_stat
clist_register(CONN * conn,struct clist * cl,clist_dstsrc dstsrc)262 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
263 {
264 struct clist *c;
265 int status;
266
267 for (c = cl; c; c = c->c_next) {
268 if (c->c_len <= 0)
269 continue;
270
271 c->c_regtype = dstsrc;
272
273 switch (dstsrc) {
274 case CLIST_REG_SOURCE:
275 status = RDMA_REGMEMSYNC(conn,
276 (caddr_t)(struct as *)c->c_adspc,
277 (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len,
278 &c->c_smemhandle, (void **)&c->c_ssynchandle,
279 (void *)c->rb_longbuf.rb_private);
280 break;
281 case CLIST_REG_DST:
282 status = RDMA_REGMEMSYNC(conn,
283 (caddr_t)(struct as *)c->c_adspc,
284 (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len,
285 &c->c_dmemhandle, (void **)&c->c_dsynchandle,
286 (void *)c->rb_longbuf.rb_private);
287 break;
288 default:
289 return (RDMA_INVAL);
290 }
291 if (status != RDMA_SUCCESS) {
292 (void) clist_deregister(conn, cl);
293 return (status);
294 }
295 }
296
297 return (RDMA_SUCCESS);
298 }
299
300 rdma_stat
clist_deregister(CONN * conn,struct clist * cl)301 clist_deregister(CONN *conn, struct clist *cl)
302 {
303 struct clist *c;
304
305 for (c = cl; c; c = c->c_next) {
306 switch (c->c_regtype) {
307 case CLIST_REG_SOURCE:
308 if (c->c_smemhandle.mrc_rmr != 0) {
309 (void) RDMA_DEREGMEMSYNC(conn,
310 (caddr_t)(uintptr_t)c->w.c_saddr3,
311 c->c_smemhandle,
312 (void *)(uintptr_t)c->c_ssynchandle,
313 (void *)c->rb_longbuf.rb_private);
314 c->c_smemhandle.mrc_rmr = 0;
315 c->c_ssynchandle = NULL;
316 }
317 break;
318 case CLIST_REG_DST:
319 if (c->c_dmemhandle.mrc_rmr != 0) {
320 (void) RDMA_DEREGMEMSYNC(conn,
321 (caddr_t)(uintptr_t)c->u.c_daddr3,
322 c->c_dmemhandle,
323 (void *)(uintptr_t)c->c_dsynchandle,
324 (void *)c->rb_longbuf.rb_private);
325 c->c_dmemhandle.mrc_rmr = 0;
326 c->c_dsynchandle = NULL;
327 }
328 break;
329 default:
330 /* clist unregistered. continue */
331 break;
332 }
333 }
334
335 return (RDMA_SUCCESS);
336 }
337
338 rdma_stat
clist_syncmem(CONN * conn,struct clist * cl,clist_dstsrc dstsrc)339 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
340 {
341 struct clist *c;
342 rdma_stat status;
343
344 c = cl;
345 switch (dstsrc) {
346 case CLIST_REG_SOURCE:
347 while (c != NULL) {
348 if (c->c_ssynchandle) {
349 status = RDMA_SYNCMEM(conn,
350 (void *)(uintptr_t)c->c_ssynchandle,
351 (caddr_t)(uintptr_t)c->w.c_saddr3,
352 c->c_len, 0);
353 if (status != RDMA_SUCCESS)
354 return (status);
355 }
356 c = c->c_next;
357 }
358 break;
359 case CLIST_REG_DST:
360 while (c != NULL) {
361 if (c->c_ssynchandle) {
362 status = RDMA_SYNCMEM(conn,
363 (void *)(uintptr_t)c->c_dsynchandle,
364 (caddr_t)(uintptr_t)c->u.c_daddr3,
365 c->c_len, 1);
366 if (status != RDMA_SUCCESS)
367 return (status);
368 }
369 c = c->c_next;
370 }
371 break;
372 default:
373 return (RDMA_INVAL);
374 }
375
376 return (RDMA_SUCCESS);
377 }
378
379 /*
380 * Frees up entries in chunk list
381 */
382 void
clist_free(struct clist * cl)383 clist_free(struct clist *cl)
384 {
385 struct clist *c = cl;
386
387 while (c != NULL) {
388 cl = cl->c_next;
389 kmem_cache_free(clist_cache, c);
390 c = cl;
391 }
392 }
393
394 rdma_stat
rdma_clnt_postrecv(CONN * conn,uint32_t xid)395 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
396 {
397 struct clist *cl = NULL;
398 rdma_stat retval;
399 rdma_buf_t rbuf = {0};
400
401 rbuf.type = RECV_BUFFER;
402 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
403 return (RDMA_NORESOURCE);
404 }
405
406 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
407 NULL, NULL);
408 retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
409 clist_free(cl);
410
411 return (retval);
412 }
413
414 rdma_stat
rdma_clnt_postrecv_remove(CONN * conn,uint32_t xid)415 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid)
416 {
417 return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid));
418 }
419
420 rdma_stat
rdma_svc_postrecv(CONN * conn)421 rdma_svc_postrecv(CONN *conn)
422 {
423 struct clist *cl = NULL;
424 rdma_stat retval;
425 rdma_buf_t rbuf = {0};
426
427 rbuf.type = RECV_BUFFER;
428 if (RDMA_BUF_ALLOC(conn, &rbuf)) {
429 retval = RDMA_NORESOURCE;
430 } else {
431 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
432 NULL, NULL);
433 retval = RDMA_SVC_RECVBUF(conn, cl);
434 clist_free(cl);
435 }
436 return (retval);
437 }
438
439 rdma_stat
rdma_buf_alloc(CONN * conn,rdma_buf_t * rbuf)440 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf)
441 {
442 return (RDMA_BUF_ALLOC(conn, rbuf));
443 }
444
445 void
rdma_buf_free(CONN * conn,rdma_buf_t * rbuf)446 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
447 {
448 if (!rbuf || rbuf->addr == NULL) {
449 return;
450 }
451 RDMA_BUF_FREE(conn, rbuf);
452 bzero(rbuf, sizeof (rdma_buf_t));
453 }
454
455 /*
456 * Caller is holding rdma_modload_lock mutex
457 */
458 int
rdma_modload()459 rdma_modload()
460 {
461 int status;
462 ASSERT(MUTEX_HELD(&rdma_modload_lock));
463 /*
464 * Load all available RDMA plugins which right now is only IB plugin.
465 * If no IB hardware is present, then quit right away.
466 * ENODEV -- For no device on the system
467 * EPROTONOSUPPORT -- For module not avilable either due to failure to
468 * load or some other reason.
469 */
470 rdma_modloaded = 1;
471 if (ibt_hw_is_present() == 0) {
472 rdma_dev_available = 0;
473 return (ENODEV);
474 }
475
476 rdma_dev_available = 1;
477 if (rpcmod_li == NULL)
478 return (EPROTONOSUPPORT);
479
480 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
481 FREAD | FWRITE, kcred,
482 &rpcib_handle, rpcmod_li);
483
484 if (status != 0)
485 return (EPROTONOSUPPORT);
486
487
488 /*
489 * We will need to reload the plugin module after it was unregistered
490 * but the resources below need to allocated only the first time.
491 */
492 if (!clist_cache) {
493 clist_cache = kmem_cache_create("rdma_clist",
494 sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
495 NULL, NULL, NULL, 0, 0);
496 rdma_kstat_init();
497 }
498
499 (void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred);
500
501 return (0);
502 }
503
504 void
rdma_kstat_init(void)505 rdma_kstat_init(void)
506 {
507 kstat_t *ksp;
508
509 /*
510 * The RDMA framework doesn't know how to deal with Zones, and is
511 * only available in the global zone.
512 */
513 ASSERT(INGLOBALZONE(curproc));
514 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
515 KSTAT_TYPE_NAMED, rdmarcstat_ndata,
516 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
517 if (ksp) {
518 ksp->ks_data = (void *) rdmarcstat_ptr;
519 kstat_install(ksp);
520 }
521
522 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
523 KSTAT_TYPE_NAMED, rdmarsstat_ndata,
524 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
525 if (ksp) {
526 ksp->ks_data = (void *) rdmarsstat_ptr;
527 kstat_install(ksp);
528 }
529 }
530
531 rdma_stat
rdma_kwait(void)532 rdma_kwait(void)
533 {
534 int ret;
535 rdma_stat stat;
536
537 mutex_enter(&rdma_wait.svc_lock);
538
539 ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock);
540
541 /*
542 * If signalled by a hca attach/detach, pass the right
543 * stat back.
544 */
545
546 if (ret)
547 stat = rdma_wait.svc_stat;
548 else
549 stat = RDMA_INTR;
550
551 mutex_exit(&rdma_wait.svc_lock);
552
553 return (stat);
554 }
555