xref: /illumos-gate/usr/src/uts/common/rpc/rdma_subr.c (revision 1b22764f59e3a183ca5db98b6bfd27fdf2b20e02)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2007, The Ohio State University. All rights reserved.
28  *
29  * Portions of this source code is developed by the team members of
30  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
31  * headed by Professor Dhabaleswar K. (DK) Panda.
32  *
33  * Acknowledgements to contributions from developors:
34  *   Ranjit Noronha: noronha@cse.ohio-state.edu
35  *   Lei Chai      : chail@cse.ohio-state.edu
36  *   Weikuan Yu    : yuw@cse.ohio-state.edu
37  *
38  */
39 
40 #include <sys/systm.h>
41 #include <sys/kstat.h>
42 #include <sys/modctl.h>
43 #include <sys/sdt.h>
44 #include <rpc/rpc_rdma.h>
45 
46 #include <sys/ib/ibtl/ibti.h>
47 
48 uint_t rdma_minchunk = RDMA_MINCHUNK;
49 
50 /*
51  * Globals
52  */
53 int rdma_modloaded = 0;		/* flag to load RDMA plugin modules */
54 int rdma_dev_available = 0;	/* if any RDMA device is loaded */
55 kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
56 rdma_registry_t	*rdma_mod_head = NULL;	/* head for RDMA modules */
57 krwlock_t	rdma_lock;		/* protects rdma_mod_head list */
58 ldi_ident_t rpcmod_li = NULL;	/* identifies us with ldi_ framework */
59 
60 kmem_cache_t *clist_cache = NULL;
61 
62 /*
63  * Statics
64  */
65 static ldi_handle_t rpcib_handle = NULL;
66 
67 /*
68  * Externs
69  */
70 extern	kstat_named_t	*rdmarcstat_ptr;
71 extern	uint_t		rdmarcstat_ndata;
72 extern	kstat_named_t	*rdmarsstat_ptr;
73 extern	uint_t		rdmarsstat_ndata;
74 
75 void rdma_kstat_init();
76 
77 /*
78  * RDMATF module registration routine.
79  * This routine is expected to be called by the init routine in
80  * the plugin modules.
81  */
82 rdma_stat
83 rdma_register_mod(rdma_mod_t *mod)
84 {
85 	rdma_registry_t **mp, *m;
86 
87 	if (mod->rdma_version != RDMATF_VERS) {
88 		return (RDMA_BADVERS);
89 	}
90 
91 	rw_enter(&rdma_lock, RW_WRITER);
92 	/*
93 	 * Ensure not already registered
94 	 */
95 	mp = &rdma_mod_head;
96 	while (*mp != NULL) {
97 		if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
98 		    KNC_STRSIZE) == 0) {
99 			rw_exit(&rdma_lock);
100 			return (RDMA_REG_EXIST);
101 		}
102 		mp = &((*mp)->r_next);
103 	}
104 
105 	/*
106 	 * New one, create and add to registry
107 	 */
108 	m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
109 	m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
110 	*m->r_mod = *mod;
111 	m->r_next = NULL;
112 	m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
113 	(void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
114 	m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
115 	*mp = m;
116 	rw_exit(&rdma_lock);
117 
118 	return (RDMA_SUCCESS);
119 }
120 
121 /*
122  * RDMATF module unregistration routine.
123  * This routine is expected to be called by the fini routine in
124  * the plugin modules.
125  */
126 rdma_stat
127 rdma_unregister_mod(rdma_mod_t *mod)
128 {
129 	rdma_registry_t **m, *mmod = NULL;
130 
131 	rw_enter(&rdma_lock, RW_WRITER);
132 
133 	m = &rdma_mod_head;
134 	while (*m != NULL) {
135 		if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
136 		    KNC_STRSIZE) != 0) {
137 			m = &((*m)->r_next);
138 			continue;
139 		}
140 		/*
141 		 * Check if any device attached, if so return error
142 		 */
143 		if ((*m)->r_mod->rdma_count != 0) {
144 			rw_exit(&rdma_lock);
145 			return (RDMA_FAILED);
146 		}
147 		/*
148 		 * Found entry. Now remove it.
149 		 */
150 		mmod = *m;
151 		*m = (*m)->r_next;
152 		kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE);
153 		kmem_free(mmod->r_mod, sizeof (rdma_mod_t));
154 		kmem_free(mmod, sizeof (rdma_registry_t));
155 		rw_exit(&rdma_lock);
156 		return (RDMA_SUCCESS);
157 	}
158 
159 	/*
160 	 * Not found.
161 	 */
162 	rw_exit(&rdma_lock);
163 	return (RDMA_FAILED);
164 }
165 
166 struct clist *
167 clist_alloc(void)
168 {
169 	struct clist *clp;
170 
171 	clp = kmem_cache_alloc(clist_cache, KM_SLEEP);
172 
173 	bzero(clp, sizeof (*clp));
174 
175 	return (clp);
176 }
177 
178 /*
179  * Creates a new chunk list entry, and
180  * adds it to the end of a chunk list.
181  */
182 void
183 clist_add(struct clist **clp, uint32_t xdroff, int len,
184 	struct mrc *shandle, caddr_t saddr,
185 	struct mrc *dhandle, caddr_t daddr)
186 {
187 	struct clist *cl;
188 
189 	/* Find the end of the list */
190 
191 	while (*clp != NULL)
192 		clp = &((*clp)->c_next);
193 
194 	cl = clist_alloc();
195 	cl->c_xdroff = xdroff;
196 	cl->c_len = len;
197 	cl->w.c_saddr = (uint64_t)(uintptr_t)saddr;
198 	if (shandle)
199 		cl->c_smemhandle = *shandle;
200 	cl->u.c_daddr = (uint64_t)(uintptr_t)daddr;
201 	if (dhandle)
202 		cl->c_dmemhandle = *dhandle;
203 	cl->c_next = NULL;
204 
205 	*clp = cl;
206 }
207 
208 rdma_stat
209 clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
210 {
211 	struct clist *c;
212 	int status;
213 
214 	for (c = cl; c; c = c->c_next) {
215 		if (c->c_len <= 0)
216 			continue;
217 		switch (dstsrc) {
218 		case CLIST_REG_SOURCE:
219 			status = RDMA_REGMEMSYNC(conn,
220 			    (caddr_t)(struct as *)cl->c_adspc,
221 			    (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len,
222 			    &c->c_smemhandle, (void **)&c->c_ssynchandle,
223 			    (void *)c->rb_longbuf.rb_private);
224 			break;
225 		case CLIST_REG_DST:
226 			status = RDMA_REGMEMSYNC(conn,
227 			    (caddr_t)(struct as *)cl->c_adspc,
228 			    (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len,
229 			    &c->c_dmemhandle, (void **)&c->c_dsynchandle,
230 			    (void *)c->rb_longbuf.rb_private);
231 			break;
232 		default:
233 			return (RDMA_INVAL);
234 		}
235 		if (status != RDMA_SUCCESS) {
236 			(void) clist_deregister(conn, cl, dstsrc);
237 			return (status);
238 		}
239 	}
240 
241 	return (RDMA_SUCCESS);
242 }
243 
244 rdma_stat
245 clist_deregister(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
246 {
247 	struct clist *c;
248 
249 	for (c = cl; c; c = c->c_next) {
250 		switch (dstsrc) {
251 		case CLIST_REG_SOURCE:
252 			if (c->c_smemhandle.mrc_rmr != 0) {
253 				(void) RDMA_DEREGMEMSYNC(conn,
254 				    (caddr_t)(uintptr_t)c->w.c_saddr3,
255 				    c->c_smemhandle,
256 				    (void *)(uintptr_t)c->c_ssynchandle,
257 				    (void *)c->rb_longbuf.rb_private);
258 				c->c_smemhandle.mrc_rmr = 0;
259 				c->c_ssynchandle = NULL;
260 			}
261 			break;
262 		case CLIST_REG_DST:
263 			if (c->c_dmemhandle.mrc_rmr != 0) {
264 				(void) RDMA_DEREGMEMSYNC(conn,
265 				    (caddr_t)(uintptr_t)c->u.c_daddr3,
266 				    c->c_dmemhandle,
267 				    (void *)(uintptr_t)c->c_dsynchandle,
268 				    (void *)c->rb_longbuf.rb_private);
269 				c->c_dmemhandle.mrc_rmr = 0;
270 				c->c_dsynchandle = NULL;
271 			}
272 			break;
273 		default:
274 			return (RDMA_INVAL);
275 		}
276 	}
277 
278 	return (RDMA_SUCCESS);
279 }
280 
281 rdma_stat
282 clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc)
283 {
284 	struct clist *c;
285 	rdma_stat status;
286 
287 	c = cl;
288 	switch (dstsrc) {
289 	case CLIST_REG_SOURCE:
290 		while (c != NULL) {
291 			if (c->c_ssynchandle) {
292 				status = RDMA_SYNCMEM(conn,
293 				    (void *)(uintptr_t)c->c_ssynchandle,
294 				    (caddr_t)(uintptr_t)c->w.c_saddr3,
295 				    c->c_len, 0);
296 				if (status != RDMA_SUCCESS)
297 					return (status);
298 			}
299 			c = c->c_next;
300 		}
301 		break;
302 	case CLIST_REG_DST:
303 		while (c != NULL) {
304 			if (c->c_ssynchandle) {
305 				status = RDMA_SYNCMEM(conn,
306 				    (void *)(uintptr_t)c->c_dsynchandle,
307 				    (caddr_t)(uintptr_t)c->u.c_daddr3,
308 				    c->c_len, 1);
309 				if (status != RDMA_SUCCESS)
310 					return (status);
311 			}
312 			c = c->c_next;
313 		}
314 		break;
315 	default:
316 		return (RDMA_INVAL);
317 	}
318 
319 	return (RDMA_SUCCESS);
320 }
321 
322 /*
323  * Frees up entries in chunk list
324  */
325 void
326 clist_free(struct clist *cl)
327 {
328 	struct clist *c = cl;
329 
330 	while (c != NULL) {
331 		cl = cl->c_next;
332 		kmem_cache_free(clist_cache, c);
333 		c = cl;
334 	}
335 }
336 
337 rdma_stat
338 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
339 {
340 	struct clist *cl = NULL;
341 	rdma_stat retval;
342 	rdma_buf_t rbuf = {0};
343 
344 	rbuf.type = RECV_BUFFER;
345 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
346 		return (RDMA_NORESOURCE);
347 	}
348 
349 	clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
350 	    NULL, NULL);
351 	retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
352 	clist_free(cl);
353 
354 	return (retval);
355 }
356 
357 rdma_stat
358 rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid)
359 {
360 	return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid));
361 }
362 
363 rdma_stat
364 rdma_svc_postrecv(CONN *conn)
365 {
366 	struct clist *cl = NULL;
367 	rdma_stat retval;
368 	rdma_buf_t rbuf = {0};
369 
370 	rbuf.type = RECV_BUFFER;
371 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
372 		retval = RDMA_NORESOURCE;
373 	} else {
374 		clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
375 		    NULL, NULL);
376 		retval = RDMA_SVC_RECVBUF(conn, cl);
377 		clist_free(cl);
378 	}
379 	return (retval);
380 }
381 
382 rdma_stat
383 rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf)
384 {
385 	return (RDMA_BUF_ALLOC(conn, rbuf));
386 }
387 
388 void
389 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
390 {
391 	if (!rbuf || rbuf->addr == NULL) {
392 		return;
393 	}
394 	RDMA_BUF_FREE(conn, rbuf);
395 	bzero(rbuf, sizeof (rdma_buf_t));
396 }
397 
398 /*
399  * Caller is holding rdma_modload_lock mutex
400  */
401 int
402 rdma_modload()
403 {
404 	int status;
405 	ASSERT(MUTEX_HELD(&rdma_modload_lock));
406 	/*
407 	 * Load all available RDMA plugins which right now is only IB plugin.
408 	 * If no IB hardware is present, then quit right away.
409 	 * ENODEV -- For no device on the system
410 	 * EPROTONOSUPPORT -- For module not avilable either due to failure to
411 	 * load or some other reason.
412 	 */
413 	rdma_modloaded = 1;
414 	if (ibt_hw_is_present() == 0) {
415 		rdma_dev_available = 0;
416 		return (ENODEV);
417 	}
418 
419 	rdma_dev_available = 1;
420 	if (rpcmod_li == NULL)
421 		return (EPROTONOSUPPORT);
422 
423 	status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
424 	    FREAD | FWRITE, kcred,
425 	    &rpcib_handle, rpcmod_li);
426 	if (status != 0)
427 		return (EPROTONOSUPPORT);
428 
429 	/* success */
430 	rdma_kstat_init();
431 
432 	clist_cache = kmem_cache_create("rdma_clist",
433 	    sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
434 	    NULL, NULL, NULL, 0, 0);
435 
436 	return (0);
437 }
438 
439 void
440 rdma_kstat_init(void)
441 {
442 	kstat_t *ksp;
443 
444 	/*
445 	 * The RDMA framework doesn't know how to deal with Zones, and is
446 	 * only available in the global zone.
447 	 */
448 	ASSERT(INGLOBALZONE(curproc));
449 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
450 	    KSTAT_TYPE_NAMED, rdmarcstat_ndata,
451 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
452 	if (ksp) {
453 		ksp->ks_data = (void *) rdmarcstat_ptr;
454 		kstat_install(ksp);
455 	}
456 
457 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
458 	    KSTAT_TYPE_NAMED, rdmarsstat_ndata,
459 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
460 	if (ksp) {
461 		ksp->ks_data = (void *) rdmarsstat_ptr;
462 		kstat_install(ksp);
463 	}
464 }
465