xref: /illumos-gate/usr/src/uts/common/rpc/rdma_subr.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/systm.h>
30 #include <sys/kstat.h>
31 #include <sys/modctl.h>
32 #include <rpc/rpc_rdma.h>
33 
34 #include <sys/ib/ibtl/ibti.h>
35 
36 /*
37  * RDMA chunk size
38  */
39 #define	RDMA_MINCHUNK	1024
40 uint_t rdma_minchunk = RDMA_MINCHUNK;
41 
42 /*
43  * Globals
44  */
45 int rdma_modloaded = 0;		/* flag to load RDMA plugin modules */
46 int rdma_dev_available = 0;	/* if any RDMA device is loaded */
47 kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
48 rdma_registry_t	*rdma_mod_head = NULL;	/* head for RDMA modules */
49 krwlock_t	rdma_lock;		/* protects rdma_mod_head list */
50 ldi_ident_t rpcmod_li = NULL;	/* identifies us with ldi_ framework */
51 
52 /*
53  * Statics
54  */
55 static ldi_handle_t rpcib_handle = NULL;
56 
57 /*
58  * Externs
59  */
60 extern	kstat_named_t	*rdmarcstat_ptr;
61 extern	uint_t		rdmarcstat_ndata;
62 extern	kstat_named_t	*rdmarsstat_ptr;
63 extern	uint_t		rdmarsstat_ndata;
64 
65 void rdma_kstat_init();
66 
67 /*
68  * RDMATF module registration routine.
69  * This routine is expected to be called by the init routine in
70  * the plugin modules.
71  */
72 rdma_stat
73 rdma_register_mod(rdma_mod_t *mod)
74 {
75 	rdma_registry_t **mp, *m;
76 
77 	if (mod->rdma_version != RDMATF_VERS) {
78 		return (RDMA_BADVERS);
79 	}
80 
81 	rw_enter(&rdma_lock, RW_WRITER);
82 	/*
83 	 * Ensure not already registered
84 	 */
85 	mp = &rdma_mod_head;
86 	while (*mp != NULL) {
87 		if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
88 		    KNC_STRSIZE) == 0) {
89 			rw_exit(&rdma_lock);
90 			return (RDMA_REG_EXIST);
91 		}
92 		mp = &((*mp)->r_next);
93 	}
94 
95 	/*
96 	 * New one, create and add to registry
97 	 */
98 	m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP);
99 	m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP);
100 	*m->r_mod = *mod;
101 	m->r_next = NULL;
102 	m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
103 	(void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
104 	m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
105 	*mp = m;
106 	rw_exit(&rdma_lock);
107 
108 	return (RDMA_SUCCESS);
109 }
110 
111 /*
112  * RDMATF module unregistration routine.
113  * This routine is expected to be called by the fini routine in
114  * the plugin modules.
115  */
116 rdma_stat
117 rdma_unregister_mod(rdma_mod_t *mod)
118 {
119 	rdma_registry_t **m, *mmod = NULL;
120 
121 	rw_enter(&rdma_lock, RW_WRITER);
122 
123 	m = &rdma_mod_head;
124 	while (*m != NULL) {
125 		if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api,
126 		    KNC_STRSIZE) != 0) {
127 			m = &((*m)->r_next);
128 			continue;
129 		}
130 		/*
131 		 * Check if any device attached, if so return error
132 		 */
133 		if ((*m)->r_mod->rdma_count != 0) {
134 			rw_exit(&rdma_lock);
135 			return (RDMA_FAILED);
136 		}
137 		/*
138 		 * Found entry. Now remove it.
139 		 */
140 		mmod = *m;
141 		*m = (*m)->r_next;
142 		kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE);
143 		kmem_free(mmod->r_mod, sizeof (rdma_mod_t));
144 		kmem_free(mmod, sizeof (rdma_registry_t));
145 		rw_exit(&rdma_lock);
146 		return (RDMA_SUCCESS);
147 	}
148 
149 	/*
150 	 * Not found.
151 	 */
152 	rw_exit(&rdma_lock);
153 	return (RDMA_FAILED);
154 }
155 
156 /*
157  * Creates a new chunk list entry, and
158  * adds it to the end of a chunk list.
159  */
160 void
161 clist_add(struct clist **clp, uint32_t xdroff, int len,
162 	struct mrc *shandle, caddr_t saddr,
163 	struct mrc *dhandle, caddr_t daddr)
164 {
165 	struct clist *cl;
166 
167 	/* Find the end of the list */
168 
169 	while (*clp != NULL)
170 		clp = &((*clp)->c_next);
171 
172 	cl = kmem_zalloc(sizeof (*cl), KM_SLEEP);
173 	cl->c_xdroff = xdroff;
174 	cl->c_len = len;
175 	cl->c_saddr = (uint64_t)(uintptr_t)saddr;
176 	if (shandle)
177 		cl->c_smemhandle = *shandle;
178 	cl->c_daddr = (uint64_t)(uintptr_t)daddr;
179 	if (dhandle)
180 		cl->c_dmemhandle = *dhandle;
181 	cl->c_next = NULL;
182 
183 	*clp = cl;
184 }
185 
186 int
187 clist_register(CONN *conn, struct clist *cl, bool_t src)
188 {
189 	struct clist *c;
190 	int status;
191 
192 	for (c = cl; c; c = c->c_next) {
193 		if (src) {
194 			status = RDMA_REGMEMSYNC(conn,
195 			    (caddr_t)(uintptr_t)c->c_saddr, c->c_len,
196 			    &c->c_smemhandle, (void **)&c->c_ssynchandle);
197 		} else {
198 			status = RDMA_REGMEMSYNC(conn,
199 			    (caddr_t)(uintptr_t)c->c_daddr, c->c_len,
200 			    &c->c_dmemhandle, (void **)&c->c_dsynchandle);
201 		}
202 		if (status != RDMA_SUCCESS) {
203 			(void) clist_deregister(conn, cl, src);
204 			return (status);
205 		}
206 	}
207 
208 	return (RDMA_SUCCESS);
209 }
210 
211 int
212 clist_deregister(CONN *conn, struct clist *cl, bool_t src)
213 {
214 	struct clist *c;
215 
216 	for (c = cl; c; c = c->c_next) {
217 		if (src) {
218 			if (c->c_smemhandle.mrc_rmr != 0) {
219 				(void) RDMA_DEREGMEMSYNC(conn,
220 				    (caddr_t)(uintptr_t)c->c_saddr,
221 				    c->c_smemhandle,
222 				    (void *)(uintptr_t)c->c_ssynchandle);
223 				c->c_smemhandle.mrc_rmr = 0;
224 				c->c_ssynchandle = NULL;
225 			}
226 		} else {
227 			if (c->c_dmemhandle.mrc_rmr != 0) {
228 				(void) RDMA_DEREGMEMSYNC(conn,
229 				    (caddr_t)(uintptr_t)c->c_daddr,
230 				    c->c_dmemhandle,
231 				    (void *)(uintptr_t)c->c_dsynchandle);
232 				c->c_dmemhandle.mrc_rmr = 0;
233 				c->c_dsynchandle = NULL;
234 			}
235 		}
236 	}
237 
238 	return (RDMA_SUCCESS);
239 }
240 
241 /*
242  * Frees up entries in chunk list
243  */
244 void
245 clist_free(struct clist *cl)
246 {
247 	struct clist *c = cl;
248 
249 	while (c != NULL) {
250 		cl = cl->c_next;
251 		kmem_free(c, sizeof (struct clist));
252 		c = cl;
253 	}
254 }
255 
256 rdma_stat
257 rdma_clnt_postrecv(CONN *conn, uint32_t xid)
258 {
259 	struct clist *cl = NULL;
260 	rdma_stat retval;
261 	rdma_buf_t rbuf;
262 
263 	rbuf.type = RECV_BUFFER;
264 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
265 		retval = RDMA_NORESOURCE;
266 	} else {
267 		clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
268 			NULL, NULL);
269 		retval = RDMA_CLNT_RECVBUF(conn, cl, xid);
270 		clist_free(cl);
271 	}
272 	return (retval);
273 }
274 
275 rdma_stat
276 rdma_svc_postrecv(CONN *conn)
277 {
278 	struct clist *cl = NULL;
279 	rdma_stat retval;
280 	rdma_buf_t rbuf;
281 
282 	rbuf.type = RECV_BUFFER;
283 	if (RDMA_BUF_ALLOC(conn, &rbuf)) {
284 		retval = RDMA_NORESOURCE;
285 	} else {
286 		clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr,
287 			NULL, NULL);
288 		retval = RDMA_SVC_RECVBUF(conn, cl);
289 		clist_free(cl);
290 	}
291 	return (retval);
292 }
293 
294 rdma_stat
295 clist_syncmem(CONN *conn, struct clist *cl, bool_t src)
296 {
297 	struct clist *c;
298 	rdma_stat status;
299 
300 	c = cl;
301 	if (src) {
302 		while (c != NULL) {
303 			status = RDMA_SYNCMEM(conn,
304 			    (void *)(uintptr_t)c->c_ssynchandle,
305 			    (caddr_t)(uintptr_t)c->c_saddr, c->c_len, 0);
306 			if (status != RDMA_SUCCESS)
307 				return (status);
308 			c = c->c_next;
309 		}
310 	} else {
311 		while (c != NULL) {
312 			status = RDMA_SYNCMEM(conn,
313 			    (void *)(uintptr_t)c->c_dsynchandle,
314 			    (caddr_t)(uintptr_t)c->c_daddr, c->c_len, 1);
315 			if (status != RDMA_SUCCESS)
316 				return (status);
317 			c = c->c_next;
318 		}
319 	}
320 	return (RDMA_SUCCESS);
321 }
322 
323 void
324 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf)
325 {
326 	if (!rbuf || rbuf->addr == NULL) {
327 		return;
328 	}
329 	if (rbuf->type != CHUNK_BUFFER) {
330 		/* pool buffer */
331 		RDMA_BUF_FREE(conn, rbuf);
332 	} else {
333 		kmem_free(rbuf->addr, rbuf->len);
334 	}
335 	rbuf->addr = NULL;
336 	rbuf->len = 0;
337 }
338 
339 /*
340  * Caller is holding rdma_modload_lock mutex
341  */
342 int
343 rdma_modload()
344 {
345 	int status;
346 	ASSERT(MUTEX_HELD(&rdma_modload_lock));
347 	/*
348 	 * Load all available RDMA plugins which right now is only IB plugin.
349 	 * If no IB hardware is present, then quit right away.
350 	 * ENODEV -- For no device on the system
351 	 * EPROTONOSUPPORT -- For module not avilable either due to failure to
352 	 * load or some other reason.
353 	 */
354 	rdma_modloaded = 1;
355 	if (ibt_hw_is_present() == 0) {
356 		rdma_dev_available = 0;
357 		return (ENODEV);
358 	}
359 
360 	rdma_dev_available = 1;
361 	if (rpcmod_li == NULL)
362 		return (EPROTONOSUPPORT);
363 
364 	status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
365 	    FREAD | FWRITE, kcred,
366 	    &rpcib_handle, rpcmod_li);
367 	if (status != 0)
368 		return (EPROTONOSUPPORT);
369 
370 	/* success */
371 	rdma_kstat_init();
372 	return (0);
373 }
374 
375 void
376 rdma_kstat_init(void)
377 {
378 	kstat_t *ksp;
379 
380 	/*
381 	 * The RDMA framework doesn't know how to deal with Zones, and is
382 	 * only available in the global zone.
383 	 */
384 	ASSERT(INGLOBALZONE(curproc));
385 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc",
386 	    KSTAT_TYPE_NAMED, rdmarcstat_ndata,
387 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
388 	if (ksp) {
389 		ksp->ks_data = (void *) rdmarcstat_ptr;
390 		kstat_install(ksp);
391 	}
392 
393 	ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc",
394 	    KSTAT_TYPE_NAMED, rdmarsstat_ndata,
395 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID);
396 	if (ksp) {
397 		ksp->ks_data = (void *) rdmarsstat_ptr;
398 		kstat_install(ksp);
399 	}
400 }
401