xref: /titanic_44/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_impl.h (revision 03494a9880d80f834bec10a1e8f0a2f8f7c97bf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _SYS_IB_MGT_IBMF_IBMF_IMPL_H
27 #define	_SYS_IB_MGT_IBMF_IBMF_IMPL_H
28 
29 
30 /*
31  * This file contains the IBMF implementation dependent structures and defines.
32  */
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/modctl.h>
41 #include <sys/kmem.h>
42 #include <sys/ksynch.h>
43 #include <sys/taskq.h>
44 #include <sys/sunddi.h>
45 #include <sys/disp.h>
46 #include <sys/ib/ibtl/ibvti.h>
47 #include <sys/ib/mgt/ibmf/ibmf.h>
48 #include <sys/ib/mgt/ibmf/ibmf_rmpp.h>
49 #include <sys/ib/mgt/ibmf/ibmf_kstat.h>
50 #include <sys/ib/mgt/ibmf/ibmf_trace.h>
51 
52 #define	IBMF_MEM_PER_WQE		(IBMF_MAD_SIZE + sizeof (ib_grh_t))
53 #define	IBMF_MAX_SQ_WRE			64
54 #define	IBMF_MAX_RQ_WRE			64
55 #define	IBMF_MAX_POSTED_RQ_PER_QP	512
56 #define	IBMF_MAX_POSTED_SQ_PER_QP	512
57 #define	IBMF_MAX_SQ_WR_SGL_ELEMENTS	1
58 #define	IBMF_MAX_RQ_WR_SGL_ELEMENTS	1
59 #define	IBMF_MGMT_Q_KEY			0x80010000
60 #define	IBMF_P_KEY_DEF_FULL		0xFFFF
61 #define	IBMF_P_KEY_DEF_LIMITED		0x7FFF
62 #define	IBMF_P_KEY_BASE_MASK		0x7FFF
63 #define	IBMF_PKEY_MEMBERSHIP_MASK	0x8000
64 
65 #define	IBMF_TASKQ_1THREAD		1
66 #define	IBMF_TASKQ_NTHREADS		128
67 
68 /*
69  * Work request ID format used for receive requests.
70  *
71  *  bit 0 set to 1
72  */
73 #define	IBMF_RCV_CQE			0x1
74 
75 /*
76  * Convenience macro used in the RMPP protocol to obtain R_Method field
77  * of MAD header with Response bit flipped.
78  */
79 #define	IBMF_FLIP_RESP_BIT(r_method)					\
80 	(((r_method & 0x80) ^ 0x80) | (r_method & 0x7F))
81 
82 /* Work Request ID macros */
83 #define	IBMF_IS_RECV_WR_ID(id)				\
84 	(((uint64_t)(id) & IBMF_RCV_CQE) ? B_TRUE : B_FALSE)
85 #define	IBMF_IS_SEND_WR_ID(id)				\
86 	(!(IBMF_IS_RECV_WR_ID((id))))
87 
88 /* Decrement IBMF message reference count */
89 #define	IBMF_MSG_DECR_REFCNT(msg)			{	\
90 	ASSERT(MUTEX_HELD(&(msg)->im_mutex));			\
91 	(msg)->im_ref_count--;					\
92 }
93 
94 /* Increment IBMF message reference count */
95 #define	IBMF_MSG_INCR_REFCNT(msg)				\
96 	(msg)->im_ref_count++;
97 
98 /* Callback setup/cleanup macros */
99 #define	IBMF_RECV_CB_SETUP(clp)				{	\
100 	ASSERT(MUTEX_HELD(&(clp)->ic_mutex));			\
101 	(clp)->ic_flags |= IBMF_CLIENT_RECV_CB_ACTIVE;		\
102 	(clp)->ic_recvs_active++;				\
103 	mutex_enter(&(clp)->ic_kstat_mutex);			\
104 	IBMF_ADD32_KSTATS((clp), recvs_active, 1);		\
105 	mutex_exit(&(clp)->ic_kstat_mutex);			\
106 }
107 
108 #define	IBMF_RECV_CB_CLEANUP(clp)			{		\
109 	ASSERT(MUTEX_HELD(&(clp)->ic_mutex));				\
110 	(clp)->ic_recvs_active--;					\
111 	mutex_enter(&(clp)->ic_kstat_mutex);				\
112 	IBMF_SUB32_KSTATS((clp), recvs_active, 1);			\
113 	mutex_exit(&(clp)->ic_kstat_mutex);				\
114 	if ((clp)->ic_recvs_active == 0)				\
115 		(clp)->ic_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE;		\
116 	if ((((clp)->ic_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) &&	\
117 	    (((clp)->ic_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0))	\
118 		cv_signal(&(clp)->ic_recv_cb_teardown_cv);		\
119 }
120 
121 #define	IBMF_ALT_RECV_CB_SETUP(altqp)			{		\
122 	ASSERT(MUTEX_HELD(&(altqp)->isq_mutex));			\
123 	(altqp)->isq_flags |= IBMF_CLIENT_RECV_CB_ACTIVE;		\
124 	(altqp)->isq_recvs_active++;					\
125 	mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
126 	IBMF_ADD32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1);	\
127 	mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
128 }
129 
130 #define	IBMF_ALT_RECV_CB_CLEANUP(altqp)			{		\
131 	ASSERT(MUTEX_HELD(&(altqp)->isq_mutex));			\
132 	(altqp)->isq_recvs_active--;					\
133 	mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
134 	IBMF_SUB32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1);	\
135 	mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex);		\
136 	if ((altqp)->isq_recvs_active == 0)				\
137 		(altqp)->isq_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE;	\
138 	if ((((altqp)->isq_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) &&	\
139 	    (((altqp)->isq_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0))	\
140 		cv_signal(&(altqp)->isq_recv_cb_teardown_cv);		\
141 }
142 
143 /* warlock annotations for ibmf.h and ibmf_msg.h structures */
144 _NOTE(READ_ONLY_DATA(_ibmf_msg::im_msgbufs_send.im_bufs_cl_data
145 	_ibmf_msg::im_msgbufs_send.im_bufs_cl_data_len
146 	_ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr
147 	_ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr_len
148 	_ibmf_msg::im_msgbufs_send.im_bufs_mad_hdr
149 	_ib_mad_hdr_t))
150 
151 /*
152  * WQE pool management contexts
153  */
154 typedef struct _ibmf_wqe_mgt {
155 	struct _ibmf_wqe_mgt	*wqe_mgt_next; /* next wqe management entry */
156 	void			*wqes_kmem;	/* kmem allocated for WQEs */
157 	uint64_t		wqes_kmem_sz; /* sizeof WQE kmem allocated */
158 	ib_vaddr_t		wqes_ib_mem;	/* Registered memory */
159 	ibt_lkey_t		wqes_ib_lkey;	/* Lkey that goes with it */
160 	ibt_mr_hdl_t		wqes_ib_mem_hdl; /* IB mem handle */
161 	kmutex_t		wqes_mutex;	/* WQE mgt context mutex */
162 } ibmf_wqe_mgt_t;
163 _NOTE(MUTEX_PROTECTS_DATA(ibmf_wqe_mgt_t::wqes_mutex,
164     ibmf_wqe_mgt_t::wqes_kmem
165     ibmf_wqe_mgt_t::wqes_kmem_sz
166     ibmf_wqe_mgt_t::wqes_ib_mem
167     ibmf_wqe_mgt_t::wqes_ib_lkey
168     ibmf_wqe_mgt_t::wqes_ib_mem_hdl))
169 
170 /*
171  * structure used to keep track of qp handles
172  */
173 typedef struct _ibmf_qp_t {
174 	struct _ibmf_qp_t	*iq_next;	/* next in the list */
175 	ibt_qp_hdl_t		iq_qp_handle;	/* qp handle from IB xport */
176 	int			iq_port_num;	/* port num for this qp */
177 	int			iq_qp_num;	/* qp num */
178 	int			iq_qp_ref;	/* no. of clients using this */
179 	uint_t			iq_flags;	/* for implementing state m/c */
180 	uint_t			iq_rwqes_posted; /* posted receive wqes */
181 	kmutex_t		iq_mutex;	/* mutex for some fields */
182 } ibmf_qp_t;
183 _NOTE(READ_ONLY_DATA(ibmf_qp_t::iq_port_num ibmf_qp_t::iq_qp_handle))
184 _NOTE(MUTEX_PROTECTS_DATA(ibmf_qp_t::iq_mutex,
185     ibmf_qp_t::iq_rwqes_posted))
186 
187 /* defines for iq_flags */
188 #define	IBMF_QP_FLAGS_INVALID				0x0001
189 #define	IBMF_QP_FLAGS_INITING				0x0002
190 #define	IBMF_QP_FLAGS_INITED				0x0004
191 #define	IBMF_QP_FLAGS_UNINITING				0x0008
192 
193 /*
194  * structure used to keep track of qp handles for qps other than
195  * the special qps
196  */
197 typedef struct _ibmf_alt_qp_t {
198 	struct _ibmf_alt_qp_t	*isq_next;	/* next qp ctx on list */
199 	ibt_qp_hdl_t		isq_qp_handle;	/* qp handle from IB xport */
200 	ibt_chan_sizes_t	isq_qp_sizes;	/* qp sizes returned by alloc */
201 	struct _ibmf_client	*isq_client_hdl; /* associated client handle */
202 	ibmf_msg_cb_t		isq_recv_cb;	/* recv callback for this qp */
203 	void			*isq_recv_cb_arg; /* arg for recv cb */
204 	kcondvar_t		isq_recv_cb_teardown_cv; /* wait on teardown */
205 	kmutex_t		isq_mutex;		/* qp context mutex */
206 	int			isq_flags;	/* to keep track of state */
207 	int			isq_sends_active; /* outstanding sends */
208 	int			isq_recvs_active; /* outstanding recvs */
209 	ib_qpn_t		isq_qpn;	/* qp number */
210 	ib_pkey_t		isq_pkey;	/* qp's partition key */
211 	ib_qkey_t		isq_qkey;	/* qp's queue keye */
212 	int			isq_port_num;	/* port num for this qp */
213 	boolean_t		isq_supports_rmpp; /* qp supports rmpp */
214 	kcondvar_t		isq_sqd_cv; 	/* wait on SQD event */
215 	int			isq_wqes_alloced; /* wqes allocated for QP */
216 	kcondvar_t		isq_wqes_cv; 	/* wait on wqes destruction */
217 	uint_t			isq_rwqes_posted; /* posted receive wqes */
218 
219 	/* Manage Send/Receive WQEs for Special QPs */
220 	struct kmem_cache	*isq_send_wqes_cache; /* Send WQE cache */
221 	struct kmem_cache	*isq_recv_wqes_cache; /* Receive WQE cache */
222 	vmem_t			*isq_wqe_ib_vmem; /* IB virtual address arena */
223 	kmutex_t		isq_wqe_mutex;	/* WQE management list mutex */
224 	ibmf_wqe_mgt_t		*isq_wqe_mgt_list; /* WQE management list */
225 } ibmf_alt_qp_t;
226 _NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_mutex,
227     ibmf_alt_qp_t::isq_sends_active
228     ibmf_alt_qp_t::isq_recvs_active
229     ibmf_alt_qp_t::isq_pkey
230     ibmf_alt_qp_t::isq_qkey
231     ibmf_alt_qp_t::isq_recv_cb
232     ibmf_alt_qp_t::isq_recv_cb_arg
233     ibmf_alt_qp_t::isq_flags
234     ibmf_alt_qp_t::isq_rwqes_posted))
235 _NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_wqe_mutex,
236     ibmf_alt_qp_t::isq_wqe_mgt_list))
237 _NOTE(READ_ONLY_DATA(ibmf_alt_qp_t::isq_port_num))
238 
239 #define	IBMF_MSG_FLAGS_QUEUED		0x00001000	/* in the ib xport */
240 #define	IBMF_MSG_FLAGS_DONE		0x00002000	/* xport done */
241 #define	IBMF_MSG_FLAGS_BLOCKING		0x00004000	/* sync command */
242 
243 /*
244  * This structure is used to keep track of IBT returned ibt_ud_dest_t
245  * structures.
246  */
247 typedef struct ibmf_ud_dest_s {
248 	ibt_ud_dest_t		ud_dest;
249 	struct ibmf_ud_dest_s	*ud_next;
250 } ibmf_ud_dest_t;
251 
252 /*
253  * ibmf_msg_impl definition
254  *	The IBMF client initializes various members of the msg while sending
255  *	the message. IBMF fills in the various members of the msg when a message
256  *	is received.
257  */
258 typedef struct _ibmf_msg_impl {
259 	ibmf_addr_info_t	im_local_addr;	/* local addressing info */
260 	ibmf_global_addr_info_t	im_global_addr;	/* global addressing info */
261 	int32_t			im_msg_status;	/* completion status */
262 	uint32_t		im_msg_flags;	/* flags */
263 	size_t			im_msg_sz_limit; /* max. message size */
264 	ibmf_msg_bufs_t		im_msgbufs_send; /* input data to ibmf */
265 	ibmf_msg_bufs_t		im_msgbufs_recv; /* output data from ibmf */
266 	struct _ibmf_msg_impl	*im_msg_next;	/* next message on the list */
267 	struct _ibmf_msg_impl	*im_msg_prev;	/* prev message on the list */
268 	void			*im_client;	/* client that allocd the pkt */
269 	ibmf_qp_handle_t	im_qp_hdl;	/* qp handle */
270 	ibt_ud_dest_t		*im_ud_dest;	/* ptr to the pkt's ud_dest */
271 	ibmf_ud_dest_t		*im_ibmf_ud_dest; /* ptr to the pkt's ud_dest */
272 	ibmf_msg_cb_t		im_trans_cb;	/* transaction completion cb */
273 	void			*im_trans_cb_arg; /* arg for completion cb */
274 	uint64_t		im_tid;		/* transaction ID */
275 	uint8_t			im_mgt_class; 	/* management class */
276 	kmutex_t		im_mutex;	/* protects trans context */
277 	uint32_t		im_state;	/* message state */
278 	uint32_t		im_transp_op_flags; /* transaction operation */
279 	uint32_t		im_flags;	/* message flags */
280 	uint32_t		im_trans_state_flags;	/* state flags */
281 	kcondvar_t		im_trans_cv;	/* wait for op completion */
282 	ibmf_rmpp_ctx_t		im_rmpp_ctx; 	/* RMPP context */
283 	ibmf_retrans_t		im_retrans;	/* retransmission info */
284 	timeout_id_t		im_rp_timeout_id; /* response timeout ID */
285 	timeout_id_t		im_tr_timeout_id; /* transaction timeout ID */
286 	timeout_id_t		im_rp_unset_timeout_id; /* id for untimeout() */
287 	timeout_id_t		im_tr_unset_timeout_id; /* id for untimeout() */
288 	int			im_ref_count;	/* reference count */
289 	boolean_t		im_unsolicited; /* msg was unsolicited recv */
290 	int			im_pending_send_compls; /* send completions */
291 } ibmf_msg_impl_t;
292 _NOTE(READ_ONLY_DATA(ibmf_msg_impl_t::im_trans_cb
293     ibmf_msg_impl_t::im_trans_cb_arg
294     ibmf_msg_impl_t::im_transp_op_flags
295     ibmf_msg_impl_t::im_local_addr
296     ibmf_msg_impl_t::im_unsolicited
297     ibmf_msg_impl_t::im_client))
298 _NOTE(MUTEX_PROTECTS_DATA(ibmf_msg_impl_t::im_mutex,
299     ibmf_msg_impl_t::im_flags
300     ibmf_msg_impl_t::im_trans_state_flags
301     ibmf_msg_impl_t::im_msgbufs_recv
302     ibmf_msg_impl_t::im_msg_status
303     ibmf_msg_impl_t::im_rmpp_ctx))
304 
305 /* im_flags */
306 #define	IBMF_MSG_FLAGS_SEQUENCED	0x1
307 #define	IBMF_MSG_FLAGS_SEND_RMPP	0x2
308 #define	IBMF_MSG_FLAGS_RECV_RMPP	0x4
309 #define	IBMF_MSG_FLAGS_NOT_RMPP		0x8
310 #define	IBMF_MSG_FLAGS_BUSY		0x10
311 #define	IBMF_MSG_FLAGS_FREE		0x20
312 #define	IBMF_MSG_FLAGS_ON_LIST		0x40
313 #define	IBMF_MSG_FLAGS_SET_TERMINATION	0x80
314 #define	IBMF_MSG_FLAGS_TERMINATION	0x100
315 
316 /* retransmission parameter defaults for im_retrans field */
317 #define	IBMF_RETRANS_DEF_RTV		4000000		/* 4 seconds */
318 #define	IBMF_RETRANS_DEF_RTTV		100000		/* 100 milliseconds */
319 #define	IBMF_RETRANS_DEF_TRANS_TO	40000000	/* 40 seconds */
320 #define	IBMF_RETRANS_DEF_RETRIES	0
321 
322 /*
323  * Transaction state flags (im_trans_state_flags) definitions
324  * Don't use 0x0 as a flag value since clients OR and AND the flags
325  */
326 #define	IBMF_TRANS_STATE_FLAG_UNINIT		0x1
327 #define	IBMF_TRANS_STATE_FLAG_INIT		0x2
328 #define	IBMF_TRANS_STATE_FLAG_WAIT		0x4
329 #define	IBMF_TRANS_STATE_FLAG_DONE		0x8
330 #define	IBMF_TRANS_STATE_FLAG_SIGNALED		0x10
331 #define	IBMF_TRANS_STATE_FLAG_TIMEOUT		0x20
332 #define	IBMF_TRANS_STATE_FLAG_RECV_ACTIVE	0x40
333 #define	IBMF_TRANS_STATE_FLAG_RECV_DONE		0x80
334 #define	IBMF_TRANS_STATE_FLAG_SEND_DONE		0x100
335 
336 /* Timer types */
337 typedef	enum _ibmf_timer_t {
338 	IBMF_RESP_TIMER			= 1,
339 	IBMF_TRANS_TIMER		= 2
340 } ibmf_timer_t;
341 
342 /*
343  * structure to hold specific client info taken from ibmf_register_info_t
344  * since we can register for more than one client at a time, but each specific
345  * ibmf_client_t only holds one client itself.
346  */
347 typedef struct _ibmf_client_info {
348 	ib_guid_t		ci_guid;
349 	uint_t			port_num;
350 	ibmf_client_type_t	client_class;
351 } ibmf_client_info_t;
352 
353 /*
354  * Defines for the client type (agent/manager/agent+manager)
355  * Bits 16-19 of the client_class specify the client type.
356  */
357 #define	IBMF_AGENT_ID			0x00010000
358 #define	IBMF_MANAGER_ID			0x00020000
359 #define	IBMF_AGENT_MANAGER_ID		0x00030000
360 
361 /*
362  * structure used to keep track of clients
363  */
364 typedef struct _ibmf_client {
365 	void			*ic_client_sig;	/* set for valid handles */
366 	struct _ibmf_ci		*ic_myci;	/* pointer to CI */
367 	struct _ibmf_client	*ic_next;	/* next client on list */
368 	struct _ibmf_client	*ic_prev;	/* previous client on list */
369 
370 	taskq_t			*ic_send_taskq;	/* taskq for send cb */
371 	taskq_t			*ic_recv_taskq;	/* taskq for receive cb */
372 	uint_t			ic_init_state_class; /* taskq initialization */
373 
374 	ibmf_msg_impl_t		*ic_msg_list; /* protected by ic_mutex */
375 	ibmf_msg_impl_t		*ic_msg_last; /* last message on list */
376 	ibmf_msg_impl_t		*ic_term_msg_list; /* termination loop mesgs */
377 	ibmf_msg_impl_t		*ic_term_msg_last; /* last message on list */
378 	kmutex_t		ic_msg_mutex; /* protect the message list */
379 
380 	/* IBTL asynchronous event callback (eg. HCA offline) */
381 	ibmf_async_event_cb_t	ic_async_cb; /* async/unsolicited handling */
382 	void			*ic_async_cb_arg; /* args for async cb */
383 
384 	/* Asynchronous/Unsolicited message handler */
385 	ibmf_msg_cb_t		ic_recv_cb;
386 	void			*ic_recv_cb_arg;
387 	kcondvar_t		ic_recv_cb_teardown_cv; /* wait on teardown */
388 
389 	ibmf_client_info_t	ic_client_info; /* client registration info */
390 	ibmf_qp_t		*ic_qp;		/* special qp context */
391 	ibt_hca_hdl_t		ic_ci_handle;	/* == ic_myci->ic_ci_handle */
392 	kmutex_t		ic_mutex;	/* prot the client struct */
393 	int			ic_flags;	/* to keep track of state */
394 	int			ic_reg_flags;	/* flags specified during */
395 						/* registration */
396 
397 	/* Statistics */
398 	int			ic_msgs_alloced; /* no. msgs alloced by/for */
399 	int			ic_msgs_active; /* no. msgs active */
400 	int			ic_trans_active; /* outstanding transacts  */
401 	int			ic_sends_active; /* outstanding sends */
402 	int			ic_recvs_active; /* outstanding recvs */
403 
404 	ib_lid_t		ic_base_lid;	/* used to calculate pathbits */
405 	kmutex_t		ic_kstat_mutex;	/* protect the kstat */
406 	struct kstat		*ic_kstatp;	/* kstats for client */
407 } ibmf_client_t;
408 _NOTE(READ_ONLY_DATA(ibmf_client_t::ic_ci_handle
409     ibmf_client_t::ic_client_info
410     ibmf_client_t::ic_client_sig))
411 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_msg_mutex,
412     ibmf_client_t::ic_msg_list
413     ibmf_client_t::ic_msg_last
414     ibmf_client_t::ic_term_msg_list
415     ibmf_client_t::ic_term_msg_last))
416 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_mutex,
417     ibmf_client_t::ic_msgs_alloced
418     ibmf_client_t::ic_flags
419     ibmf_client_t::ic_recv_cb
420     ibmf_client_t::ic_recv_cb_arg))
421 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_kstat_mutex,
422     ibmf_client_t::ic_kstatp))
423 
424 #define	IBMF_CLIENT_RECV_CB_ACTIVE		0x00000001 /* rcv CB active */
425 #define	IBMF_CLIENT_SEND_CB_ACTIVE		0x00000010 /* send CB active */
426 #define	IBMF_CLIENT_TEAR_DOWN_CB		0x00000100 /* client wants to */
427 							    /* remove recv_cb */
428 
429 /* IBMF_MAD_ONLY is used by the alternate QP context only (isq_flags) */
430 #define	IBMF_MAD_ONLY				0x00002000
431 #define	IBMF_RAW_ONLY				0x00004000
432 
433 #define	IBMF_REG_MSG_LIST	0
434 #define	IBMF_TERM_MSG_LIST	1
435 
436 /*
437  * Send WQE context
438  */
439 typedef struct _ibmf_send_wqe {
440 	struct _ibmf_send_wqe	*send_wqe_next;
441 	ibt_send_wr_t		send_wr;	/* IBT send work request */
442 	ibmf_client_t		*send_client;	/* client that sent this */
443 	void			*send_mem;	/* memory used in send */
444 	ib_vaddr_t		send_sg_mem;	/* registered memory */
445 	ibt_lkey_t		send_sg_lkey;	/* Lkey that goes with it */
446 	ibt_mr_hdl_t		send_mem_hdl;	/* == ci_send_mr_handle in ci */
447 	uint_t			send_wqe_flags;
448 	uchar_t			send_port_num;	/* port this is posted to */
449 	ibt_qp_hdl_t		send_qp_handle;	/* qp handle for this wqe */
450 	ibmf_qp_handle_t	send_ibmf_qp_handle; /* ibmf qp handle */
451 	ibmf_msg_impl_t		*send_msg;	/* message context */
452 	uint32_t		send_status;	/* completion status */
453 	uint32_t		send_rmpp_segment; /* rmpp segment */
454 } ibmf_send_wqe_t;
455 
456 /*
457  * Receive WQE context
458  */
459 typedef struct _ibmf_recv_wqe {
460 	struct _ibmf_recv_wqe	*recv_wqe_next;
461 	ibt_recv_wr_t		recv_wr;
462 	ibmf_client_t		*recv_client;	/* client that received this */
463 	void			*recv_mem;	/* memory used in WQEs */
464 	ibmf_qp_t		*recv_qpp;	/* qp this is posted */
465 	ibt_wc_t		recv_wc;	/* corresponding  cqe */
466 	ib_vaddr_t		recv_sg_mem;	/* registered mem */
467 	ibt_lkey_t		recv_sg_lkey;	/* Lkey that goes with it */
468 	ibt_mr_hdl_t		recv_mem_hdl;	/* == ci_recv_mr_handle in ci */
469 	uint_t			recv_wqe_flags;
470 	uchar_t			recv_port_num;	/* port this is posted to */
471 	ibt_qp_hdl_t		recv_qp_handle;	/* ibt qp handle for this wqe */
472 	ibmf_qp_handle_t	recv_ibmf_qp_handle; /* ibmf qp handle */
473 	ibmf_msg_impl_t		*recv_msg;	/* message context */
474 } ibmf_recv_wqe_t;
475 
476 #define	IBMF_RECV_WQE_FREE		0x00000001	/* WQE is free */
477 
478 /*
479  * Struct that keeps track of the underlying IB channel interface. There
480  * is one per CI. Each clients on a given ci gets a reference to the CI.
481  * References are tracked used ci_ref field; when ci_ref drops to 0, the
482  * structure can be freed.
483  */
484 typedef struct _ibmf_ci {
485 	struct _ibmf_ci		*ci_next;
486 	kmutex_t		ci_mutex;	/* protects the CI struct */
487 	ibmf_client_t		*ci_clients;	/* list of clients;head */
488 	ibmf_client_t		*ci_clients_last; /* tail */
489 	kmutex_t		ci_clients_mutex; /* protect the client list */
490 	ib_guid_t		ci_node_guid;	/* node GUID */
491 	ibt_hca_hdl_t		ci_ci_handle;	/* HCA handle */
492 	ibt_pd_hdl_t		ci_pd;		/* protection domain */
493 	ibmf_qp_t		*ci_qp_list;	/* sp. QP list for all ports */
494 	ibmf_qp_t		*ci_qp_list_tail;
495 	kcondvar_t		ci_qp_cv;	/* wait for QP valid state */
496 	ibt_cq_hdl_t		ci_cq_handle;	/* CQ handle for sp. QPs */
497 	ibt_cq_hdl_t		ci_alt_cq_handle; /* CQ handle for alt. QPs */
498 	ibmf_alt_qp_t		*ci_alt_qp_list; /* alternate QP list */
499 
500 	/* UD destination resources */
501 	uint32_t		ci_ud_dest_list_count; /* resources in pool */
502 	kmutex_t		ci_ud_dest_list_mutex; /* UD dest list mutex */
503 	ibmf_ud_dest_t		*ci_ud_dest_list_head; /* start of list */
504 
505 	/* Send/Receive WQEs for Special QPs */
506 	struct kmem_cache	*ci_send_wqes_cache; /* Send WQE cache */
507 	struct kmem_cache	*ci_recv_wqes_cache; /* Receive WQE cache */
508 	vmem_t			*ci_wqe_ib_vmem; /* IB virtual address arena */
509 	kmutex_t		ci_wqe_mutex;	/* WQE management list mutex */
510 	ibmf_wqe_mgt_t		*ci_wqe_mgt_list; /* WQE management list */
511 
512 	uint_t			ci_nports;	/* num ports on the CI */
513 	uint32_t		ci_vendor_id:24; /* HCA vendor ID */
514 	uint16_t		ci_device_id;	/* HCA device ID */
515 	uint_t			ci_ref;		/* reference count */
516 	uint16_t		ci_state;	/* CI context state */
517 	uint16_t		ci_state_flags;	/* CI context state flags */
518 	kcondvar_t		ci_state_cv;	/* wait on a state change */
519 	uint_t			ci_init_state;	/* used in cleanup */
520 
521 	/* free QP synchronization with WQE completion processing */
522 	int			ci_wqes_alloced; /* wqes alloced for sp QPs */
523 	kcondvar_t		ci_wqes_cv; 	/* wait on wqes destruction */
524 
525 	/* port kstats */
526 	struct kstat		*ci_port_kstatp;	/* kstats for client */
527 } ibmf_ci_t;
528 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_ud_dest_list_mutex,
529     ibmf_ci_t::ci_ud_dest_list_count
530     ibmf_ci_t::ci_ud_dest_list_head))
531 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex,
532     ibmf_ci_t::ci_state
533     ibmf_ci_t::ci_port_kstatp))
534 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_clients_mutex,
535     ibmf_ci_t::ci_clients
536     ibmf_ci_t::ci_clients_last))
537 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex,
538     ibmf_qp_t::iq_next
539     ibmf_qp_t::iq_flags))
540 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_wqe_mutex,
541     ibmf_ci_t::ci_wqe_mgt_list))
542 _NOTE(READ_ONLY_DATA(ibmf_ci_t::ci_cq_handle))
543 
544 #define	IBMF_CI_BLOCKED_ON_SEND_WQE		0x00000001 /* blockers on wqe */
545 
546 /* defines for ci_init_state */
547 #define	IBMF_CI_INIT_HCA_INITED				0x0001
548 #define	IBMF_CI_INIT_MUTEX_CV_INITED			0x0002
549 #define	IBMF_CI_INIT_SEND_TASKQ_DONE			0x0004
550 #define	IBMF_CI_INIT_RECV_TASKQ_DONE			0x0008
551 #define	IBMF_CI_INIT_CQ_INITED				0x0010
552 #define	IBMF_CI_INIT_WQES_ALLOCED			0x0020
553 #define	IBMF_CI_INIT_HCA_LINKED				0x0040
554 #define	IBMF_CI_INIT_QP_LIST_INITED			0x0080
555 
556 /* defines for ci_state */
557 #define	IBMF_CI_STATE_PRESENT				0x0001
558 #define	IBMF_CI_STATE_INITED				0x0002
559 #define	IBMF_CI_STATE_GONE				0x0003
560 
561 /* defines for ci_state_flags */
562 #define	IBMF_CI_STATE_INIT_WAIT				0x0001
563 #define	IBMF_CI_STATE_UNINIT_WAIT			0x0002
564 #define	IBMF_CI_STATE_VALIDATE_WAIT			0x0004
565 
566 #define	IBMF_CI_STATE_INVALIDATING			0x0100
567 #define	IBMF_CI_STATE_VALIDATING			0x0200
568 #define	IBMF_CI_STATE_UNINITING				0x0400
569 #define	IBMF_CI_STATE_INITING				0x0800
570 
571 /*
572  * for keeping track of ibmf state
573  */
574 typedef struct _ibmf_state {
575 	struct _ibmf_ci		*ibmf_ci_list;
576 	struct _ibmf_ci		*ibmf_ci_list_tail;
577 	ibt_clnt_hdl_t		ibmf_ibt_handle;
578 	ibt_cq_handler_t	ibmf_cq_handler;
579 	kmutex_t		ibmf_mutex;
580 	ibt_clnt_modinfo_t	ibmf_ibt_modinfo;
581 	taskq_t			*ibmf_taskq;	/* taskq for MAD processing */
582 						/* for classes not registered */
583 } ibmf_state_t;
584 _NOTE(MUTEX_PROTECTS_DATA(ibmf_state_t::ibmf_mutex,
585     ibmf_ci_t::ci_next))
586 
587 /* UD Destination resource cache definitions */
588 /*
589  * It is preferred that the difference between the hi and lo water
590  * marks be only a few ud_dest resources. The intent is that a
591  * thread that needs to run ibmf_i_populate_ud_dest_list() does not
592  * spend too much time in this ud_dest resource population process
593  * before it returns to its caller. A benefit of a higher lo water
594  * mark is that the larger available pool of resources supports high
595  * stress scenarios better.
596  */
597 #define	IBMF_UD_DEST_HI_WATER_MARK	512
598 #define	IBMF_UD_DEST_LO_WATER_MARK	500
599 
600 /*
601  * Prototypes
602  */
603 /* ci related functions */
604 int ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num);
605 int ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp);
606 void ibmf_i_release_ci(ibmf_ci_t *cip);
607 
608 /* client related functions */
609 int ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip,
610     ibmf_register_info_t *client_infop);
611 int ibmf_i_validate_class_mask(ibmf_register_info_t *client_infop);
612 int ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags,
613     ibmf_client_t **clientpp);
614 void ibmf_i_add_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp);
615 
616 void ibmf_i_free_client(ibmf_client_t *clientp);
617 void ibmf_i_delete_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp);
618 int ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num,
619     ibmf_client_type_t class, ibmf_client_t **clientpp);
620 
621 /* qp related functions */
622 int ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num,
623     ibmf_client_type_t class, ibmf_qp_t **qppp);
624 void ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qpp);
625 int ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key,
626     ib_qkey_t q_key, uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep);
627 int ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags);
628 int ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags,
629     uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump);
630 int ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key,
631     ib_qkey_t q_key, uint_t flags);
632 int ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey,
633     uint8_t port, ib_pkey_t *pkeyixp);
634 int ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix,
635     ib_pkey_t *pkeyp);
636 
637 /* pkt related functions */
638 int ibmf_i_issue_pkt(ibmf_client_t *clientp, ibmf_msg_impl_t *msgp,
639     ibmf_qp_handle_t ibmf_qp_handle, ibmf_send_wqe_t *send_wqep);
640 int ibmf_i_alloc_ud_dest(ibmf_client_t *clientp,
641     ibmf_msg_impl_t *msgimplp, ibt_ud_dest_hdl_t *ud_dest_p, boolean_t block);
642 void ibmf_i_free_ud_dest(ibmf_client_t *clientp,
643     ibmf_msg_impl_t *msgimplp);
644 void ibmf_i_init_ud_dest(ibmf_ci_t *cip);
645 void ibmf_i_fini_ud_dest(ibmf_ci_t *cip);
646 ibmf_ud_dest_t *ibmf_i_get_ud_dest(ibmf_ci_t *cip);
647 void ibmf_i_put_ud_dest(ibmf_ci_t *cip, ibmf_ud_dest_t *ud_dest);
648 void ibmf_i_pop_ud_dest_thread(void *argp);
649 void ibmf_i_clean_ud_dest_list(ibmf_ci_t *cip, boolean_t all);
650 int ibmf_i_alloc_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgp,
651     boolean_t block, ibmf_send_wqe_t **swqepp);
652 void ibmf_i_free_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgimplp,
653     ibmf_send_wqe_t *swqep);
654 int ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block,
655     ibmf_qp_handle_t ibmf_qp_handle);
656 int ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle);
657 int ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,
658     ibmf_qp_handle_t ibmf_qp_handle);
659 int ibmf_i_check_for_loopback(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t msgp,
660     void *msg_cb_args, ibmf_retrans_t *retrans, boolean_t *loopback);
661 int ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status);
662 int ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status);
663 int ibmf_i_send_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
664     ibmf_msg_impl_t *msgimplp, int block);
665 int ibmf_i_send_single_pkt(ibmf_client_t *clientp,
666     ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int block);
667 
668 /* WQE related functions */
669 int ibmf_i_init_wqes(ibmf_ci_t *cip);
670 void ibmf_i_fini_wqes(ibmf_ci_t *cip);
671 void ibmf_i_init_send_wqe(ibmf_client_t *clientp,
672     ibmf_msg_impl_t *msgimplp, ibt_wr_ds_t *sglp, ibmf_send_wqe_t *wqep,
673     ibt_ud_dest_hdl_t ud_dest, ibt_qp_hdl_t ibt_qp_handle,
674     ibmf_qp_handle_t ibmf_qp_handle);
675 void ibmf_i_init_recv_wqe(ibmf_qp_t *qpp, ibt_wr_ds_t *sglp,
676     ibmf_recv_wqe_t *wqep, ibt_qp_hdl_t ibt_qp_handle,
677     ibmf_qp_handle_t ibmf_qp_handle);
678 void ibmf_i_mad_completions(ibt_cq_hdl_t cq_handle, void *arg);
679 #ifdef DEBUG
680 void ibmf_i_dump_wcp(ibmf_ci_t *cip, ibt_wc_t *wcp, ibmf_recv_wqe_t *recv_wqep);
681 #endif
682 
683 void ibmf_ibt_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
684     ibt_async_code_t code, ibt_async_event_t *event);
685 
686 /* msg related functions */
687 void ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb,
688     void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block);
689 void ibmf_i_client_add_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp);
690 void ibmf_i_client_rem_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
691     uint_t *refcnt);
692 int ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp,
693     int km_flags);
694 void ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp);
695 int ibmf_i_msg_transport(ibmf_client_t *clientp,
696     ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int blocking);
697 void ibmf_i_decrement_ref_count(ibmf_msg_impl_t *msgimplp);
698 void ibmf_i_handle_send_completion(ibmf_ci_t *cip, ibt_wc_t *wcp);
699 void ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp);
700 int ibmf_setup_recvbuf_on_error(ibmf_msg_impl_t *msgimplp, uchar_t *mad);
701 
702 /* transaction related functions */
703 void ibmf_i_terminate_transaction(ibmf_client_t *clientp,
704     ibmf_msg_impl_t *msgimplp, uint32_t status);
705 void ibmf_i_notify_client(ibmf_msg_impl_t *msgimplp);
706 void ibmf_i_notify_sequence(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
707     int msg_flags);
708 
709 /* timer related functions */
710 void ibmf_i_set_timer(void (*func)(void *), ibmf_msg_impl_t *msgimplp,
711     ibmf_timer_t type);
712 void ibmf_i_unset_timer(ibmf_msg_impl_t *msgimplp, ibmf_timer_t type);
713 void ibmf_i_recv_timeout(void *argp);
714 void ibmf_i_send_timeout(void *argp);
715 void ibmf_i_err_terminate_timeout(void *msgp);
716 
717 /* rmpp related functions */
718 boolean_t ibmf_i_find_msg_client(ibmf_client_t *cl, ibmf_msg_impl_t *msgimplp,
719     boolean_t inc_refcnt);
720 boolean_t ibmf_i_is_rmpp(ibmf_client_t *clientp,
721     ibmf_qp_handle_t ibmf_qp_handle);
722 void ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp,
723     uint32_t *offp);
724 ibmf_msg_impl_t *ibmf_i_find_msg(ibmf_client_t *clientp, uint64_t tid,
725     uint8_t mgt_class, uint8_t r_method, ib_lid_t lid, ib_gid_t *gid,
726     boolean_t gid_pr, ibmf_rmpp_hdr_t *rmpp_hdr, boolean_t msg_list);
727 #ifdef NOTDEF
728 ibmf_msg_impl_t *ibmf_i_find_term_msg(ibmf_client_t *clientp, uint64_t tid,
729     uint8_t mgt_class, ib_lid_t lid, ib_gid_t *gid, boolean_t gid_pr,
730     ibmf_rmpp_hdr_t *rmpp_hd);
731 #endif
732 void ibmf_i_handle_rmpp(ibmf_client_t *clientp, ibmf_qp_handle_t qp_hdl,
733     ibmf_msg_impl_t *msgimpl, uchar_t *madp);
734 int ibmf_i_send_rmpp(ibmf_msg_impl_t *msgimplp, uint8_t rmpp_type,
735     uint8_t rmpp_status, uint32_t segno, uint32_t nwl, int block);
736 int ibmf_i_send_rmpp_pkts(ibmf_client_t *clientp,
737     ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, boolean_t isDS,
738     int block);
739 void ibmf_i_send_rmpp_window(ibmf_msg_impl_t *msgimplp, int block);
740 int ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp);
741 
742 /* Alternate QP WQE cache functions */
743 int ibmf_altqp_send_wqe_cache_constructor(void *buf, void *cdrarg,
744     int kmflags);
745 void ibmf_altqp_send_wqe_cache_destructor(void *buf, void *cdrarg);
746 int ibmf_altqp_recv_wqe_cache_constructor(void *buf, void *cdrarg,
747     int kmflags);
748 void ibmf_altqp_recv_wqe_cache_destructor(void *buf, void *cdrarg);
749 int ibmf_i_init_altqp_wqes(ibmf_alt_qp_t *qp_ctx);
750 void ibmf_i_fini_altqp_wqes(ibmf_alt_qp_t *qp_ctx);
751 int ibmf_i_extend_wqe_cache(ibmf_ci_t *cip, ibmf_qp_handle_t ibmf_qp_handle,
752     boolean_t block);
753 
754 /* Receive callback functions */
755 void ibmf_i_recv_cb_setup(ibmf_client_t *clientp);
756 void ibmf_i_recv_cb_cleanup(ibmf_client_t *clientp);
757 void ibmf_i_alt_recv_cb_setup(ibmf_alt_qp_t *qpp);
758 void ibmf_i_alt_recv_cb_cleanup(ibmf_alt_qp_t *qpp);
759 
760 /* UD Dest population thread */
761 int ibmf_ud_dest_tq_disp(ibmf_ci_t *cip);
762 
763 #ifdef __cplusplus
764 }
765 #endif
766 
767 #endif /* _SYS_IB_MGT_IBMF_IBMF_IMPL_H */
768