xref: /illumos-gate/usr/src/uts/common/rpc/ib.h (revision fec509a05ddbf645268fe2e537314def7d1b67c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _IB_H
28 #define	_IB_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * ib.h, rpcib plugin interface.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/conf.h>
40 #include <sys/stat.h>
41 #include <rpc/rpc.h>
42 #include <rpc/rpc_rdma.h>
43 #include <sys/ib/ibtl/ibti.h>
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define	MAX_BUFS	256	/* max no. of buffers per pool */
50 #define	DEF_CQ_SIZE	4096 - 1	/* default CQ size */
51 				/*
52 				 * Tavor returns the next higher power of 2
53 				 * CQ entries than the requested size.
54 				 * For instance, if you request (2^12 - 1)
55 				 * CQ entries, Tavor returns 2^12 entries.
56 				 * 4K CQ entries suffice.  Hence, 4096 - 1.
57 				 */
58 #define	DEF_SQ_SIZE	128	/* default SendQ size */
59 #define	DEF_RQ_SIZE	256	/* default RecvQ size */
60 #define	DSEG_MAX	2
61 #define	RQ_DSEG_MAX	1	/* default RQ data seg */
62 #define	IBSRM_HB	0x8000	/* high order bit of pkey */
63 #define	NFS_SEC_KEY0	0x6878	/* randomly selected NFS security key */
64 #define	NFS_SEC_KEY1	0x8679
65 
66 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */
67 #define	REFRESH_ATTEMPTS	3
68 
69 typedef struct rib_hca_s rib_hca_t;
70 typedef struct rib_qp_s rib_qp_t;
71 typedef struct rib_cq_s rib_cq_t;
72 
73 /*
74  * Notification for RDMA_DONE is based on xid
75  */
76 struct rdma_done_list {
77 	uint32_t	xid;		/* XID waiting for RDMA_DONE */
78 	kcondvar_t	rdma_done_cv;	/* cv for RDMA_DONE */
79 	struct rdma_done_list	*next;
80 	struct rdma_done_list	*prev;
81 };
82 
83 /*
84  * State of the plugin.
85  * ACCEPT = accepting new connections and requests
86  * NO_ACCEPT = not accepting new connection and requests
87  */
88 #define	ACCEPT		1
89 #define	NO_ACCEPT	2
90 
91 /*
92  * Send Wait states
93  */
94 #define	SEND_WAIT	-1
95 
96 /*
97  * Reply states
98  */
99 #define	REPLY_WAIT	-1
100 
101 typedef void * rib_pvoid;
102 typedef rib_pvoid RIB_SYNCMEM_HANDLE;
103 
104 /*
105  * IB buffer pool management structure
106  */
107 
108 /*
109  * Buffer pool info
110  */
111 typedef struct {
112 	kmutex_t	buflock;	/* lock for this structure */
113 	caddr_t		buf;		/* pool address */
114 	uint32_t	bufhandle;	/* rkey for this pool */
115 	ulong_t		bufsize;	/* size of pool */
116 	int		rsize;		/* size of each element */
117 	int		numelems;	/* no. of elements allocated */
118 	int		buffree;	/* no. of free elements */
119 	void		*buflist[1];	/* free elements in pool */
120 } bufpool_t;
121 
122 typedef struct {
123 	bufpool_t	*bpool;
124 	ibt_mr_hdl_t	*mr_hdl;
125 	ibt_mr_desc_t	*mr_desc;	/* vaddr, lkey, rkey */
126 } rib_bufpool_t;
127 
128 /*
129  * ATS relsted defines and structures.
130  */
131 #define	ATS_AR_DATA_LEN	16
132 #define	IBD_NAME	"ibd"
133 #define	N_IBD_INSTANCES	4
134 
135 typedef struct rpcib_ats_s {
136 	int			ras_inst;
137 	ib_pkey_t		ras_pkey;
138 	ib_gid_t		ras_port_gid;
139 	sa_family_t		ras_inet_type;
140 	union {
141 		struct sockaddr_in	ras_sockaddr;
142 		struct sockaddr_in6	ras_sockaddr6;
143 	} ra_sin;
144 #define	ras_sin			ra_sin.ras_sockaddr
145 #define	ras_sin6		ra_sin.ras_sockaddr6
146 } rpcib_ats_t;
147 
148 typedef struct rpcib_ibd_insts_s {
149 	int			rib_ibd_alloc;
150 	int			rib_ibd_cnt;
151 	rpcib_ats_t		*rib_ats;
152 } rpcib_ibd_insts_t;
153 
154 /*
155  * Service types supported by RPCIB
156  * For now only NFS is supported.
157  */
158 #define	NFS		1
159 #define	NLM		2
160 
161 /*
162  * Tracks consumer state (client or server).
163  */
164 typedef enum {
165 	RIB_SERVER,
166 	RIB_CLIENT
167 } rib_mode_t;
168 
169 /*
170  * CQ structure
171  */
172 struct rib_cq_s {
173 	rib_hca_t		*rib_hca;
174 	ibt_cq_hdl_t		rib_cq_hdl;
175 };
176 
177 /*
178  * RPCIB plugin state
179  */
180 typedef struct rpcib_state {
181 	ibt_clnt_hdl_t		ibt_clnt_hdl;
182 	uint32_t		hca_count;
183 	uint32_t		nhca_inited;
184 	ib_guid_t		*hca_guids;
185 	rib_hca_t		*hcas;
186 	int			refcount;
187 	kmutex_t		open_hca_lock;
188 	rib_hca_t		*hca;		/* the hca being used */
189 	queue_t			*q;		/* up queue for a serv_type */
190 	uint32_t		service_type;	/* NFS, NLM, etc */
191 	void			*private;
192 } rpcib_state_t;
193 
194 /*
195  * Each registered service's data structure.
196  * Each HCA has a list of these structures, which are the registered
197  * services on this HCA.
198  */
199 typedef struct rib_service rib_service_t;
200 struct rib_service {
201 	uint32_t		srv_type;	/* i.e, NFS, NLM, v4CBD */
202 
203 	/*
204 	 * service name, i.e, <IP>::NFS or <IP>::NLM. Since
205 	 * each type of service can be registered with many
206 	 * IP addrs(srv_name) and is running on all ports
207 	 * for all HCAs.
208 	 */
209 	char			*srv_name;
210 
211 	uint32_t		srv_port;	/* port on which registered */
212 	ib_svc_id_t		srv_id;		/* from ibt_register call */
213 	ibt_srv_hdl_t		srv_hdl;	/* from ibt_register call */
214 	ibt_sbind_hdl_t		*srv_sbind_hdl;	/* from ibt_bind call */
215 	ibt_ar_t		srv_ar;
216 
217 	/*
218 	 * pointer to the next service registered on this
219 	 * particular HCA
220 	 */
221 	rib_service_t		*srv_next;
222 };
223 
224 /*
225  * Connection lists
226  */
227 typedef struct {
228 	krwlock_t	conn_lock;	/* list lock */
229 	CONN		*conn_hd;	/* list head */
230 } rib_conn_list_t;
231 
232 enum hca_state {
233 	HCA_INITED,		/* hca in up and running state */
234 	HCA_DETACHED		/* hca in detached state */
235 };
236 
237 /*
238  * RPCIB per HCA structure
239  */
240 struct rib_hca_s {
241 	ibt_clnt_hdl_t		ibt_clnt_hdl;
242 
243 	/*
244 	 * per HCA.
245 	 */
246 	ibt_hca_hdl_t		hca_hdl;	/* HCA handle */
247 	ibt_hca_attr_t		hca_attrs;	/* HCA attributes */
248 	ibt_pd_hdl_t		pd_hdl;
249 	ib_guid_t		hca_guid;
250 	uint32_t		hca_nports;
251 	ibt_hca_portinfo_t	*hca_ports;
252 	size_t			hca_pinfosz;
253 	enum hca_state		state;		/* state of HCA */
254 	krwlock_t		state_lock;	/* protects state field */
255 	bool_t			inuse;		/* indicates HCA usage */
256 	kmutex_t		inuse_lock;	/* protects inuse field */
257 	/*
258 	 * List of services registered on all ports available
259 	 * on this HCA. Only one consumer of KRPC can register
260 	 * its services at one time or tear them down at one
261 	 * time.
262 	 */
263 	rib_service_t	*service_list;
264 	krwlock_t		service_list_lock;
265 
266 	rib_service_t	*ats_list;		/* Service list for ATS */
267 
268 	rib_conn_list_t		cl_conn_list;	/* client conn list */
269 	rib_conn_list_t		srv_conn_list;	/* server conn list */
270 
271 	rib_cq_t		*clnt_scq;
272 	rib_cq_t		*clnt_rcq;
273 	rib_cq_t		*svc_scq;
274 	rib_cq_t		*svc_rcq;
275 	kmutex_t		cb_lock;
276 	kcondvar_t		cb_cv;
277 
278 	rib_bufpool_t		*recv_pool;	/* recv buf pool */
279 	rib_bufpool_t		*send_pool;	/* send buf pool */
280 
281 	void			*iblock;	/* interrupt cookie */
282 };
283 
284 
285 /*
286  * Structure on wait state of a post send
287  */
288 struct send_wid {
289 	uint32_t 	xid;
290 	int		cv_sig;
291 	kmutex_t	sendwait_lock;
292 	kcondvar_t	wait_cv;
293 	uint_t		status;
294 	rib_qp_t	*qp;
295 	int		nsbufs;			/* # of send buffers posted */
296 	uint64_t	sbufaddr[DSEG_MAX];	/* posted send buffers */
297 };
298 
299 /*
300  * Structure on reply descriptor for recv queue.
301  * Different from the above posting of a descriptor.
302  */
303 struct reply {
304 	uint32_t 	xid;
305 	uint_t		status;
306 	uint64_t	vaddr_cq;	/* buf addr from CQ */
307 	uint_t		bytes_xfer;
308 	kcondvar_t	wait_cv;
309 	struct reply	*next;
310 	struct reply 	*prev;
311 };
312 
313 struct svc_recv {
314 	rib_qp_t	*qp;
315 	uint64_t	vaddr;
316 	uint_t		bytes_xfer;
317 };
318 
319 struct recv_wid {
320 	uint32_t 	xid;
321 	rib_qp_t	*qp;
322 	uint64_t	addr;	/* posted buf addr */
323 };
324 
325 /*
326  * Per QP data structure
327  */
328 struct rib_qp_s {
329 	rib_hca_t		*hca;
330 	rib_mode_t		mode;	/* RIB_SERVER or RIB_CLIENT */
331 	CONN			rdmaconn;
332 	ibt_channel_hdl_t	qp_hdl;
333 	uint_t			port_num;
334 	ib_qpn_t		qpn;
335 	int			chan_flags;
336 	clock_t			timeout;
337 	ibt_rc_chan_query_attr_t	qp_q_attrs;
338 	rib_cq_t		*send_cq;	/* send CQ */
339 	rib_cq_t		*recv_cq;	/* recv CQ */
340 
341 	/*
342 	 * Number of pre-posted rbufs
343 	 */
344 	uint_t			n_posted_rbufs;
345 	kcondvar_t 		posted_rbufs_cv;
346 	kmutex_t		posted_rbufs_lock;
347 
348 	/*
349 	 * RPC reply
350 	 */
351 	uint_t			rep_list_size;
352 	struct reply		*replylist;
353 	kmutex_t		replylist_lock;
354 
355 	/*
356 	 * server only, RDMA_DONE
357 	 */
358 	struct rdma_done_list	*rdlist;
359 	kmutex_t		rdlist_lock;
360 
361 	kmutex_t		cb_lock;
362 	kcondvar_t 		cb_conn_cv;
363 
364 	caddr_t			q;	/* upstream queue */
365 };
366 
367 #define	ctoqp(conn)	((rib_qp_t *)((conn)->c_private))
368 #define	qptoc(rqp)	((CONN *)&((rqp)->rdmaconn))
369 
370 /*
371  * Timeout for various calls
372  */
373 #define	CONN_WAIT_TIME	40
374 #define	SEND_WAIT_TIME	40	/* time for send completion */
375 
376 #define	REPLY_WAIT_TIME	40	/* time to get reply from remote QP */
377 
378 #ifdef __cplusplus
379 }
380 #endif
381 
382 #endif	/* !_IB_H */
383