xref: /titanic_51/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c (revision 2c2d21e98a95cba5687ec6574c974a5c6c4a6adb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
26 #include <sys/ib/ibtl/ibti.h>
27 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
28 
29 /*
30  * ibcm_ti.c
31  *	These routines implement the Communication Manager's interfaces to IBTL.
32  */
33 
34 /* CM rc recycle task args structure definition */
35 typedef struct ibcm_taskq_recycle_arg_s {
36 	ibt_channel_hdl_t	rc_chan;
37 	ibt_cep_flags_t		control;
38 	uint8_t			hca_port_num;
39 	ibt_recycle_handler_t	func;
40 	void			*arg;
41 } ibcm_taskq_recycle_arg_t;
42 
43 _NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))
44 
45 static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
46     ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
47     ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
48 static void		ibcm_process_abort_via_taskq(void *args);
49 static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
50 static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
51 static void		ibcm_process_async_join_mcg(void *tq_arg);
52 
53 ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
54     uint64_t c_mask, void *, size_t *);
55 
56 static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
57     ibcm_state_data_t *statep, ibt_execution_mode_t mode);
58 
59 /* Address Record management definitions */
60 #define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
61 #define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
62 #define	IBCM_DAPL_ATS_NBYTES	16
63 ibcm_svc_info_t *ibcm_ar_svcinfop;
64 ibcm_ar_t	*ibcm_ar_list;
65 
66 /*
67  * Tunable parameter to turnoff the overriding of pi_path_mtu value.
68  *	1 	By default override the path record's pi_path_mtu value to
69  *		IB_MTU_1K for all RC channels. This is done only for the
70  *		channels established on Tavor HCA and the path's pi_path_mtu
71  *		is greater than IB_MTU_1K.
72  *	0	Do not override, use pi_path_mtu by default.
73  */
74 int	ibcm_override_path_mtu = 1;
75 
76 #ifdef DEBUG
77 static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
78 		    ibcm_mad_addr_t *cm_reply_addr);
79 #endif
80 
81 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))
82 
83 /* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
84 _NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
85     ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))
86 
87 /*
88  * Typically, clients initialize these args in one api call, and use in
89  * another api
90  */
91 _NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
92     {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
93     ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))
94 
95 /*
96  * ibt_open_rc_channel()
97  *	ibt_open_rc_channel opens a communication channel on the specified
98  *	channel to the specified service. For connection service type qp's
99  *	the CM initiates the CEP to establish the connection and transitions
100  *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
101  *	attributes as necessary.
102  *	The implementation of this function assumes that alt path is different
103  *	from primary path. It is assumed that the Path functions ensure that.
104  *
105  * RETURN VALUES:
106  *	IBT_SUCCESS	on success (or respective failure on error)
107  */
108 ibt_status_t
109 ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
110     ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
111     ibt_rc_returns_t *ret_args)
112 {
113 	/* all fields that are related to REQ MAD formation */
114 
115 	ib_pkey_t		prim_pkey;
116 	ib_lid_t		primary_slid, alternate_slid;
117 	ib_qpn_t		local_qpn = 0;
118 	ib_guid_t		hca_guid;
119 	ib_qkey_t		local_qkey = 0;
120 	ib_eecn_t		local_eecn = 0;
121 	ib_eecn_t		remote_eecn = 0;
122 	boolean_t		primary_grh;
123 	boolean_t		alternate_grh = B_FALSE;
124 	ib_lid_t		base_lid;
125 	ib_com_id_t		local_comid;
126 	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
127 	ibcm_req_msg_t		*req_msgp;
128 
129 	uint8_t			rdma_in, rdma_out;
130 	uint8_t			cm_retries;
131 	uint64_t		local_cm_proc_time;	/* In usec */
132 	uint8_t			local_cm_resp_time;	/* IB time */
133 	uint64_t		remote_cm_resp_time;	/* In usec */
134 	uint32_t		starting_psn = 0;
135 
136 	/* CM path related fields */
137 	ibmf_handle_t		ibmf_hdl;
138 	ibcm_qp_list_t		*cm_qp_entry;
139 	ibcm_mad_addr_t		cm_reply_addr;
140 
141 	uint8_t			cm_pkt_lt;
142 
143 	/* Local args for ibtl/internal CM functions called within */
144 	ibt_status_t		status;
145 	ibcm_status_t		lkup_status;
146 	ibt_qp_query_attr_t	qp_query_attr;
147 
148 	/* Other misc local args */
149 	ibt_priv_data_len_t	len;
150 	ibcm_hca_info_t		*hcap;
151 	ibcm_state_data_t	*statep;
152 	uint8_t			port_no;
153 
154 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
155 	    channel, flags, mode, chan_args, ret_args);
156 
157 	if (IBCM_INVALID_CHANNEL(channel)) {
158 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
159 		return (IBT_CHAN_HDL_INVALID);
160 	}
161 
162 	/* cm handler should always be specified */
163 	if (chan_args->oc_cm_handler == NULL) {
164 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
165 		    "CM handler is not be specified", channel);
166 		return (IBT_INVALID_PARAM);
167 	}
168 
169 	if (mode == IBT_NONBLOCKING) {
170 		if (ret_args != NULL) {
171 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
172 			    " ret_args should be NULL when called in "
173 			    "non-blocking mode", channel);
174 			return (IBT_INVALID_PARAM);
175 		}
176 	} else if (mode == IBT_BLOCKING) {
177 		if (ret_args == NULL) {
178 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
179 			    " ret_args should be Non-NULL when called in "
180 			    "blocking mode", channel);
181 			return (IBT_INVALID_PARAM);
182 		}
183 		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
184 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
185 			    " private data length is too large", channel);
186 			return (IBT_INVALID_PARAM);
187 		}
188 		if ((ret_args->rc_priv_data_len > 0) &&
189 		    (ret_args->rc_priv_data == NULL)) {
190 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
191 			    " rc_priv_data_len > 0, but rc_priv_data NULL",
192 			    channel);
193 			return (IBT_INVALID_PARAM);
194 		}
195 	} else { /* any other mode is not valid for ibt_open_rc_channel */
196 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
197 		    "invalid mode %x specified", channel, mode);
198 		return (IBT_INVALID_PARAM);
199 	}
200 
201 	/*
202 	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
203 	 */
204 	if (flags & IBT_OCHAN_DUP) {
205 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
206 		    "Unsupported Flags specified: 0x%X", channel, flags);
207 		return (IBT_INVALID_PARAM);
208 	}
209 
210 	if ((flags & IBT_OCHAN_REDIRECTED) &&
211 	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
212 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
213 		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
214 		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
215 		return (IBT_INVALID_PARAM);
216 	}
217 
218 	if (((flags & IBT_OCHAN_REDIRECTED) &&
219 	    (chan_args->oc_cm_redirect_info == NULL)) ||
220 	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
221 	    (chan_args->oc_cm_cep_path == NULL))) {
222 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
223 		    "Redirect flag specified, but respective arg is NULL",
224 		    channel);
225 		return (IBT_INVALID_PARAM);
226 	}
227 
228 	if ((flags & IBT_OCHAN_REDIRECTED) &&
229 	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
230 	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
231 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
232 		    "Either rdi_dlid or rdi_gid must be specified for"
233 		    " IBT_OCHAN_REDIRECTED", channel);
234 		return (IBT_INVALID_PARAM);
235 	}
236 
237 	/* primary dlid and hca_port_num should never be zero */
238 	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
239 
240 	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
241 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
242 		    "Primary Path's information is not valid", channel);
243 		return (IBT_INVALID_PARAM);
244 	}
245 
246 	/* validate SID */
247 	if (chan_args->oc_path->pi_sid == 0) {
248 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
249 		    "ERROR: Service ID in path information is 0", channel);
250 		return (IBT_INVALID_PARAM);
251 	}
252 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p  SID %llX",
253 	    channel, chan_args->oc_path->pi_sid);
254 
255 	/* validate rnr_retry_cnt (enum has more than 3 bits) */
256 	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
257 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
258 		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
259 		    channel, chan_args->oc_path_rnr_retry_cnt);
260 		return (IBT_INVALID_PARAM);
261 	}
262 
263 	/*
264 	 * Ensure that client is not re-using a QP that is still associated
265 	 * with a statep
266 	 */
267 	IBCM_GET_CHAN_PRIVATE(channel, statep);
268 	if (statep != NULL) {
269 		IBCM_RELEASE_CHAN_PRIVATE(channel);
270 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
271 		    "Channel being re-used on active side", channel);
272 		return (IBT_CHAN_IN_USE);
273 	}
274 
275 	/* Get GUID from Channel */
276 	hca_guid = ibt_channel_to_hca_guid(channel);
277 
278 	/* validate QP's hca guid with that from primary path  */
279 	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
280 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
281 		    "GUID from Channel and primary path don't match", channel);
282 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
283 		    "Channel GUID %llX primary path GUID %llX", channel,
284 		    hca_guid, chan_args->oc_path->pi_hca_guid);
285 		return (IBT_CHAN_HDL_INVALID);
286 	}
287 
288 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
289 	    "Local HCA GUID %llX", channel, hca_guid);
290 
291 	status = ibt_query_qp(channel, &qp_query_attr);
292 	if (status != IBT_SUCCESS) {
293 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
294 		    "ibt_query_qp failed %d", channel, status);
295 		return (status);
296 	}
297 
298 	/* If client specified "no port change on QP" */
299 	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
300 	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
301 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
302 		    "chan port %d and path port %d does not match", channel,
303 		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
304 		    cep_hca_port_num, port_no);
305 		return (IBT_INVALID_PARAM);
306 	}
307 
308 	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
309 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
310 		    "Invalid Channel type: Applicable only to RC Channel",
311 		    channel);
312 		return (IBT_CHAN_SRV_TYPE_INVALID);
313 	}
314 
315 	/* Check if QP is in INIT state or not */
316 	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
317 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
318 		    "QP is not in INIT state %x", channel,
319 		    qp_query_attr.qp_info.qp_state);
320 		return (IBT_CHAN_STATE_INVALID);
321 	}
322 
323 	local_qpn = qp_query_attr.qp_qpn;
324 
325 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
326 	    channel, local_qpn);
327 
328 #ifdef	NO_EEC_SUPPORT_YET
329 
330 	if (flags & IBT_OCHAN_RDC_EXISTS) {
331 		ibt_eec_query_attr_t	eec_query_attr;
332 
333 		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;
334 
335 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");
336 
337 		status = ibt_query_eec(channel, &eec_query_attr);
338 		if (status != IBT_SUCCESS) {
339 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
340 			    " ibt_query_eec failed %d", channel, status);
341 			return (status);
342 		}
343 		local_eecn = eec_query_attr.eec_eecn;
344 	}
345 
346 #endif
347 	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
348 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
349 		    "Huge PktLifeTime %d, Max is %d", channel,
350 		    chan_args->oc_path->pi_prim_pkt_lt, ibcm_max_ib_pkt_lt);
351 		return (IBT_PATH_PKT_LT_TOO_HIGH);
352 	}
353 
354 	/* If no HCA found return failure */
355 	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
356 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
357 		    "hcap is NULL. Probably hca is not in active state",
358 		    channel);
359 		return (IBT_CHAN_HDL_INVALID);
360 	}
361 
362 	rdma_out = chan_args->oc_rdma_ra_out;
363 	rdma_in = chan_args->oc_rdma_ra_in;
364 
365 	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
366 	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
367 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
368 		    "rdma in %d/out %d values exceed hca limits(%d/%d)",
369 		    channel, rdma_in, rdma_out, hcap->hca_max_rdma_in_qp,
370 		    hcap->hca_max_rdma_out_qp);
371 		ibcm_dec_hca_acc_cnt(hcap);
372 		return (IBT_INVALID_PARAM);
373 	}
374 
375 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
376 	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);
377 
378 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
379 	    NULL, &base_lid);
380 	if (status != IBT_SUCCESS) {
381 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
382 		    "primary port_num %d not active", channel, port_no);
383 		ibcm_dec_hca_acc_cnt(hcap);
384 		return (status);
385 	}
386 
387 	/* Validate P_KEY Index */
388 	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
389 	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
390 	if (status != IBT_SUCCESS) {
391 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
392 		    "Invalid Primary PKeyIx %x", channel,
393 		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
394 		ibcm_dec_hca_acc_cnt(hcap);
395 		return (status);
396 	}
397 
398 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
399 	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
400 	    prim_pkey);
401 
402 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
403 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
404 	    != IBT_SUCCESS)) {
405 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
406 		    "ibmf reg or callback setup failed during re-initialize",
407 		    channel);
408 		ibcm_dec_hca_acc_cnt(hcap);
409 		return (status);
410 	}
411 
412 	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
413 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
414 	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);
415 
416 	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
417 
418 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
419 	    "primary SLID = %x", channel, primary_slid);
420 
421 	/* check first if alternate path exists or not as it is OPTIONAL */
422 	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
423 		uint8_t	alt_port_no;
424 
425 		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
426 
427 		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
428 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
429 			    "Huge Alt Pkt lt %d", channel,
430 			    chan_args->oc_path->pi_alt_pkt_lt);
431 			ibcm_dec_hca_acc_cnt(hcap);
432 			return (IBT_PATH_PKT_LT_TOO_HIGH);
433 		}
434 
435 		if (port_no != alt_port_no) {
436 
437 			status = ibt_get_port_state_byguid(hcap->hca_guid,
438 			    alt_port_no, NULL, &base_lid);
439 			if (status != IBT_SUCCESS) {
440 
441 				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
442 				    "chan 0x%p alt_port_num %d inactive %d",
443 				    channel, alt_port_no, status);
444 				ibcm_dec_hca_acc_cnt(hcap);
445 				return (status);
446 			}
447 
448 		}
449 		alternate_slid =
450 		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
451 
452 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
453 		    "alternate SLID = %x", channel, alternate_slid);
454 	}
455 
456 	/*
457 	 * only pkey needs to be zero'ed, because all other fields are set in
458 	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
459 	 * any future modifications.
460 	 */
461 	bzero(&cm_reply_addr, sizeof (cm_reply_addr));
462 
463 	/* Initialize the MAD destination address in stored_reply_addr */
464 	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
465 	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {
466 
467 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
468 		    "ibcm_init_reply_addr failed status %d ", channel, status);
469 		ibcm_dec_hca_acc_cnt(hcap);
470 		return (status);
471 	}
472 
473 
474 	/* Initialize the pkey for CM MAD communication */
475 	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
476 		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;
477 
478 #ifdef DEBUG
479 	ibcm_print_reply_addr(channel, &cm_reply_addr);
480 #endif
481 
482 	/* Retrieve an ibmf qp for sending CM MADs */
483 	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
484 	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
485 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
486 		    "unable to allocate ibmf qp for CM MADs", channel);
487 		ibcm_dec_hca_acc_cnt(hcap);
488 		return (IBT_INSUFF_RESOURCE);
489 	}
490 
491 
492 	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
493 		ibcm_release_qp(cm_qp_entry);
494 		ibcm_dec_hca_acc_cnt(hcap);
495 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
496 		    " Unable to allocate comid", channel);
497 		return (IBT_INSUFF_KERNEL_RESOURCE);
498 	}
499 
500 	/* allocate an IBMF mad buffer (REQ) */
501 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
502 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
503 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
504 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
505 		ibcm_release_qp(cm_qp_entry);
506 		ibcm_free_comid(hcap, local_comid);
507 		ibcm_dec_hca_acc_cnt(hcap);
508 		return (status);
509 	}
510 
511 	/* allocate an IBMF mad buffer (DREQ) */
512 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
513 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
514 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
515 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
516 		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
517 		ibcm_release_qp(cm_qp_entry);
518 		ibcm_free_comid(hcap, local_comid);
519 		ibcm_dec_hca_acc_cnt(hcap);
520 		return (status);
521 	}
522 
523 	/* Init to Init, if QP's port does not match with path information */
524 	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
525 	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {
526 
527 		ibt_qp_info_t		qp_info;
528 		ibt_cep_modify_flags_t	cep_flags;
529 
530 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
531 		    "chan 0x%p chan port %d", channel,
532 		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
533 		    cep_hca_port_num);
534 
535 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
536 		    "chan 0x%p path port %d", channel, port_no);
537 
538 		bzero(&qp_info, sizeof (qp_info));
539 		/* For now, set it to RC type */
540 
541 		qp_info.qp_trans = IBT_RC_SRV;
542 		qp_info.qp_state = IBT_STATE_INIT;
543 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;
544 
545 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
546 
547 		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);
548 
549 		if (status != IBT_SUCCESS) {
550 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
551 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
552 			ibcm_release_qp(cm_qp_entry);
553 			ibcm_free_comid(hcap, local_comid);
554 			ibcm_dec_hca_acc_cnt(hcap);
555 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
556 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
557 			return (status);
558 		} else
559 			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
560 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
561 	}
562 
563 	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
564 	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
565 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
566 	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
567 	    hcap, &statep);
568 	rw_exit(&hcap->hca_state_rwlock);
569 
570 	/* CM should be seeing this for the first time */
571 	ASSERT(lkup_status == IBCM_LOOKUP_NEW);
572 
573 	/* Increment the hca's resource count */
574 	ibcm_inc_hca_res_cnt(hcap);
575 
576 	/* Once a resource created on hca, no need to hold the acc cnt */
577 	ibcm_dec_hca_acc_cnt(hcap);
578 
579 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
580 
581 	statep->timerid = 0;
582 	statep->local_hca_guid = hca_guid;
583 	statep->local_qpn = local_qpn;
584 	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
585 	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
586 	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
587 
588 
589 	/* Save "statep" as channel's CM private data.  */
590 	statep->channel = channel;
591 	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
592 
593 	statep->stored_msg = ibmf_msg;
594 	statep->dreq_msg = ibmf_msg_dreq;
595 
596 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))
597 
598 	/* Start filling in the REQ MAD */
599 	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
600 	req_msgp->req_local_comm_id = h2b32(local_comid);
601 	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
602 	req_msgp->req_local_ca_guid = h2b64(hca_guid);
603 	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */
604 
605 	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
606 	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);
607 
608 	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
609 	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);
610 
611 	if (flags & IBT_OCHAN_REMOTE_CM_TM)
612 		remote_cm_resp_time = chan_args->oc_remote_cm_time;
613 	else
614 		remote_cm_resp_time = ibcm_remote_response_time;
615 
616 	/*
617 	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
618 	 * IBT_CM_FLOW_CONTROL is always set by default.
619 	 */
620 	req_msgp->req_remote_eecn_plus = h2b32(
621 	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
622 	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);
623 
624 	if (flags & IBT_OCHAN_LOCAL_CM_TM)
625 		local_cm_proc_time = chan_args->oc_local_cm_time;
626 	else
627 		local_cm_proc_time = ibcm_local_processing_time;
628 
629 	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
630 	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
631 	    ibcm_sw_delay);
632 
633 	/* save retry count */
634 	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;
635 
636 	if (flags & IBT_OCHAN_STARTING_PSN)
637 		starting_psn = chan_args->oc_starting_psn;
638 
639 	if (local_cm_resp_time > 0x1f)
640 		local_cm_resp_time = 0x1f;
641 
642 	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
643 	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
644 	    local_cm_resp_time << 3 | statep->cep_retry_cnt);
645 
646 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
647 	    "Prim Pkt lt (IB time) 0x%x", channel,
648 	    chan_args->oc_path->pi_prim_pkt_lt);
649 
650 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
651 	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);
652 
653 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
654 	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);
655 
656 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
657 	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);
658 
659 	statep->starting_psn = starting_psn;
660 
661 	/* Pkey - bytes 48-49 */
662 	req_msgp->req_part_key = h2b16(prim_pkey);
663 
664 	if (flags & IBT_OCHAN_CM_RETRY)
665 		cm_retries = chan_args->oc_cm_retry_cnt;
666 	else
667 		cm_retries = ibcm_max_retries;
668 
669 	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
670 	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;
671 
672 	/*
673 	 * Check whether SRQ is associated with this Channel, if yes, then
674 	 * set the SRQ Exists bit in the REQ.
675 	 */
676 	if (qp_query_attr.qp_srq != NULL) {
677 		req_msgp->req_max_cm_retries_plus |= (1 << 3);
678 	}
679 
680 	/*
681 	 * By default on Tavor, we override the PathMTU to 1K.
682 	 * To turn this off, set ibcm_override_path_mtu = 0.
683 	 */
684 	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
685 	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
686 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
687 		    chan_args->oc_path_rnr_retry_cnt;
688 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
689 		    " overridden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
690 		    chan_args->oc_path->pi_path_mtu);
691 	} else
692 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
693 		    chan_args->oc_path_rnr_retry_cnt;
694 
695 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
696 	    " staring PSN %x", channel, cm_retries, starting_psn);
697 
698 
699 #ifdef	NO_EEC_SUPPORT_YET
700 	if (flags & IBT_OCHAN_RDC_EXISTS)
701 		req_msgp->req_mtu_plus |= 8;
702 #endif
703 
704 	/* Initialize the "primary" port stuff next - bytes 52-95 */
705 	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
706 	req_msgp->req_primary_r_port_lid =
707 	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
708 	req_msgp->req_primary_l_port_gid.gid_prefix =
709 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
710 	req_msgp->req_primary_l_port_gid.gid_guid =
711 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
712 	req_msgp->req_primary_r_port_gid.gid_prefix =
713 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
714 	req_msgp->req_primary_r_port_gid.gid_guid =
715 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
716 	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;
717 
718 	statep->remote_hca_guid = /* not correct, but helpful for debugging */
719 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;
720 
721 	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
722 	req_msgp->req_primary_flow_label_plus =
723 	    h2b32(((primary_grh == B_TRUE) ?
724 	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
725 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
726 	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
727 	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
728 	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
729 	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 1;
730 	req_msgp->req_primary_sl_plus =
731 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
732 	    ((primary_grh == B_TRUE) ? 0 : 8);
733 
734 	req_msgp->req_primary_localtime_plus =
735 	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
736 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
737 
738 	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
739 	    channel, statep);
740 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
741 	    "active hca_ack_delay (usec) %d", channel,
742 	    req_msgp->req_primary_localtime_plus);
743 
744 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
745 	    "Sent primary cep timeout (IB Time) %d", channel,
746 	    hcap->hca_ack_delay);
747 
748 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
749 	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
750 
751 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
752 	    "prim GID %llX:%llX", channel,
753 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
754 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
755 
756 	/* Initialize the "alternate" port stuff - optional */
757 	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
758 		ib_gid_t	tmp_gid;
759 
760 		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
761 		req_msgp->req_alt_r_port_lid =
762 		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
763 		/*
764 		 * doing all this as req_alt_r/l_port_gid is at offset
765 		 * 100, 116 which is not divisible by 8
766 		 */
767 
768 		tmp_gid.gid_prefix =
769 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
770 		tmp_gid.gid_guid =
771 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
772 		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
773 		    sizeof (ib_gid_t));
774 		tmp_gid.gid_prefix =
775 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
776 		tmp_gid.gid_guid =
777 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);
778 
779 		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
780 		    sizeof (ib_gid_t));
781 		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;
782 
783 		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
784 		req_msgp->req_alt_flow_label_plus = h2b32(
785 		    (((alternate_grh == B_TRUE) ?
786 		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
787 		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
788 		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
789 		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
790 		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
791 		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 1;
792 		req_msgp->req_alt_sl_plus =
793 		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
794 		    ((alternate_grh == B_TRUE) ? 0 : 8);
795 		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
796 		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
797 		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
798 
799 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
800 		    "alt_dlid %x ", channel,
801 		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
802 
803 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
804 		    "alt GID %llX:%llX", channel,
805 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
806 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
807 	}
808 
809 	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
810 	if ((len > 0) && chan_args->oc_priv_data)
811 		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);
812 
813 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))
814 
815 	/* return_data is filled up in the state machine code */
816 	if (ret_args != NULL) {
817 		statep->open_return_data = ret_args;
818 	}
819 
820 	/* initialize some statep fields here */
821 	statep->mode = IBCM_ACTIVE_MODE;
822 	statep->hcap = hcap;
823 
824 	statep->cm_handler = chan_args->oc_cm_handler;
825 	statep->state_cm_private = chan_args->oc_cm_clnt_private;
826 
827 	statep->pkt_life_time =
828 	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);
829 
830 	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
831 	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));
832 
833 	/* Initialize statep->stored_reply_addr */
834 	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;
835 
836 	/* Initialize stored reply addr fields */
837 	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
838 	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
839 	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
840 	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;
841 
842 	/*
843 	 * The IPD on local/active side is calculated by path functions,
844 	 * hence available in the args of ibt_open_rc_channel
845 	 */
846 	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
847 	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;
848 
849 	/* Store the source path bits for primary and alt paths */
850 	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
851 	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
852 
853 	statep->open_flow = 1;
854 	statep->open_done = B_FALSE;
855 	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
856 	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
857 	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
858 	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
859 
860 	/*
861 	 * Skip moving channel to error state during close, for OFUV clients.
862 	 * OFUV clients transition the channel to error state by itself.
863 	 */
864 	if (flags & IBT_OCHAN_OFUV)
865 		statep->is_this_ofuv_chan = B_TRUE;
866 
867 	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
868 	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);
869 
870 	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
871 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
872 	    0));
873 
874 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
875 
876 	ibtl_cm_chan_is_opening(channel);
877 
878 	ibcm_open_enqueue(statep);
879 
880 	mutex_enter(&statep->state_mutex);
881 
882 	if (mode == IBT_BLOCKING) {
883 
884 		/* wait for REQ/REP/RTU */
885 		while (statep->open_done != B_TRUE) {
886 			cv_wait(&statep->block_client_cv, &statep->state_mutex);
887 		}
888 
889 		/*
890 		 * In the case that open_channel() fails because of a
891 		 * REJ or timeout, change retval to IBT_CM_FAILURE
892 		 */
893 		if (statep->open_return_data->rc_status != IBT_CM_SUCCESS) {
894 			status = IBT_CM_FAILURE;
895 			ibtl_cm_chan_open_is_aborted(channel);
896 		}
897 
898 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
899 		    "ret status %d cm status %d", channel, status,
900 		    statep->open_return_data->rc_status);
901 	}
902 
903 	/* decrement the ref-count before leaving here */
904 	IBCM_REF_CNT_DECR(statep);
905 
906 	mutex_exit(&statep->state_mutex);
907 
908 	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
909 	return (status);
910 }
911 
912 /*
913  * ibcm_init_reply_addr:
914  *
915  * The brief description of functionality below.
916  *
917  * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
918  *	Build CM path from chan_args->oc_cm_cep_path
919  *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
920  *
921  * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
922  *	If Redirect LID is specified,
923  *		If Redirect GID is not specified or specified to be on the same
924  *		    subnet, then
925  *			Build CM path from chan_args->oc_cm_redirect_info
926  *			Set CM pkt lt to subnet timeout
927  *		Else (ie., GID specified, but on a different subnet)
928  *			Do a path lookup to build CM Path and set CM pkt lt
929  *
930  */
931 static ibt_status_t
932 ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
933     ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
934     ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
935 {
936 	ibt_adds_vect_t	*cm_adds;
937 	ibt_path_info_t	path;
938 	boolean_t	cm_grh;
939 	ibt_status_t	status;
940 
941 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");
942 
943 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))
944 
945 	/*
946 	 * sending side CM lid/gid/port num are not based on any redirect
947 	 * params. These values are set to primary RC path lid/gid/port num.
948 	 * In the future, these values can be set based on framework policy
949 	 * decisions ensuring reachability.
950 	 */
951 	reply_addr->grh_hdr.ig_sender_gid =
952 	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
953 	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
954 	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
955 
956 	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
957 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
958 		    "IBT_OCHAN_PORT_REDIRECTED specified");
959 
960 		status = ibt_index2pkey_byguid(hcap->hca_guid,
961 		    chan_args->oc_cm_cep_path->cep_hca_port_num,
962 		    chan_args->oc_cm_cep_path->cep_pkey_ix,
963 		    &reply_addr->rcvd_addr.ia_p_key);
964 
965 		if (status != IBT_SUCCESS) {
966 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
967 			    "CM PKeyIx %x port_num %x",
968 			    chan_args->oc_cm_cep_path->cep_pkey_ix,
969 			    chan_args->oc_cm_cep_path->cep_hca_port_num);
970 			return (status);
971 		}
972 
973 		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
974 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
975 		    cm_adds->av_dlid);
976 
977 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
978 		reply_addr->rcvd_addr.ia_remote_qno = 1;
979 		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;
980 
981 	} else if (flags & IBT_OCHAN_REDIRECTED) {
982 		ibt_redirect_info_t	*redirect_info;
983 		ibt_hca_portinfo_t	*port_infop;
984 		uint_t			psize, nports;
985 
986 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
987 		    "IBT_OCHAN_REDIRECTED specified");
988 
989 		redirect_info = chan_args->oc_cm_redirect_info;
990 
991 		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
992 		    (redirect_info->rdi_gid.gid_guid == 0)) {
993 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
994 			    "ERROR: Re-direct GID value NOT Provided.");
995 			return (IBT_INVALID_PARAM);
996 		}
997 
998 		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
999 		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
1000 		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
1001 		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;
1002 
1003 		/*
1004 		 * if LID is non-zero in classportinfo then use classportinfo
1005 		 * fields to form CM MAD destination address.
1006 		 */
1007 		if (redirect_info->rdi_dlid != 0) {
1008 			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
1009 			    reply_addr->port_num, &port_infop, &nports, &psize);
1010 			if ((status != IBT_SUCCESS) || (nports == 0)) {
1011 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1012 				    "Query Ports Failed: %d", status);
1013 				return (status);
1014 			} else if (port_infop->p_subnet_timeout >
1015 			    ibcm_max_ib_pkt_lt) {
1016 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1017 				    "large subnet timeout %x port_no %x",
1018 				    port_infop->p_subnet_timeout,
1019 				    reply_addr->port_num);
1020 				ibt_free_portinfo(port_infop, psize);
1021 				return (IBT_PATH_PKT_LT_TOO_HIGH);
1022 			} else {
1023 				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
1024 				    "subnet timeout %x port_no %x",
1025 				    port_infop->p_subnet_timeout,
1026 				    reply_addr->port_num);
1027 
1028 				*cm_pkt_lt =
1029 				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
1030 				    port_infop->p_subnet_timeout));
1031 
1032 				ibt_free_portinfo(port_infop, psize);
1033 			}
1034 
1035 			reply_addr->rcvd_addr.ia_remote_lid =
1036 			    redirect_info->rdi_dlid;
1037 			reply_addr->rcvd_addr.ia_service_level =
1038 			    redirect_info->rdi_sl;
1039 			reply_addr->grh_exists = B_TRUE;
1040 			reply_addr->grh_hdr.ig_recver_gid =
1041 			    redirect_info->rdi_gid;
1042 			reply_addr->grh_hdr.ig_tclass =
1043 			    redirect_info->rdi_tclass;
1044 			reply_addr->grh_hdr.ig_flow_label =
1045 			    redirect_info->rdi_flow;
1046 
1047 			/* Classportinfo doesn't have hoplimit field */
1048 			reply_addr->grh_hdr.ig_hop_limit = 1;
1049 			return (IBT_SUCCESS);
1050 
1051 		} else {
1052 			ibt_path_attr_t	path_attr;
1053 			ib_gid_t	path_dgid[1];
1054 
1055 			/*
1056 			 * If GID is specified, and LID is zero in classportinfo
1057 			 * do a path lookup using specified GID, Pkey,
1058 			 * in classportinfo
1059 			 */
1060 
1061 			bzero(&path_attr, sizeof (path_attr));
1062 
1063 			path_attr.pa_dgids = &path_dgid[0];
1064 			path_attr.pa_dgids[0] = redirect_info->rdi_gid;
1065 
1066 			/*
1067 			 * use reply_addr below, as sender_gid in reply_addr
1068 			 * may have been set above based on some policy decision
1069 			 * for originating end point for CM MADs above
1070 			 */
1071 			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
1072 			path_attr.pa_num_dgids = 1;
1073 			path_attr.pa_pkey = redirect_info->rdi_pkey;
1074 
1075 			if ((status = ibt_get_paths(ibcm_ibt_handle,
1076 			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
1077 			    IBT_SUCCESS)
1078 				return (status);
1079 
1080 			/* Initialize cm_adds */
1081 			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
1082 			*cm_pkt_lt = path.pi_prim_pkt_lt;
1083 		}
1084 
1085 	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
1086 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
1087 		reply_addr->rcvd_addr.ia_remote_qno = 1;
1088 		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
1089 		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
1090 	}
1091 
1092 
1093 	cm_grh = cm_adds->av_send_grh;
1094 	reply_addr->grh_exists = cm_grh;
1095 
1096 	reply_addr->rcvd_addr.ia_remote_lid =
1097 	    cm_adds->av_dlid;
1098 	reply_addr->grh_hdr.ig_recver_gid =
1099 	    cm_adds->av_dgid;
1100 	reply_addr->grh_hdr.ig_flow_label =
1101 	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
1102 	reply_addr->grh_hdr.ig_tclass =
1103 	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
1104 	reply_addr->grh_hdr.ig_hop_limit =
1105 	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 1;
1106 	reply_addr->rcvd_addr.ia_service_level =
1107 	    cm_adds->av_srvl;
1108 
1109 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))
1110 
1111 	return (IBT_SUCCESS);
1112 }
1113 
1114 
1115 /*
1116  * ibt_prime_close_rc_channel()
1117  *	It allocates resources required for close channel operation, so
1118  *	ibt_close_rc_channel can be called from interrupt routine.
1119  *
1120  * INPUTS:
1121  *	channel			The address of an ibt_channel_t struct that
1122  *				specifies the channel to open.
1123  *
1124  * RETURN VALUES:
1125  *	IBT_SUCCESS	on success(or respective failure on error)
1126  *
1127  * Clients are typically expected to call this function in established state
1128  */
1129 ibt_status_t
1130 ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
1131 {
1132 	ibcm_state_data_t	*statep;
1133 	ibt_status_t		status = IBT_SUCCESS;
1134 
1135 	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);
1136 
1137 	/* validate channel, first */
1138 	if (IBCM_INVALID_CHANNEL(channel)) {
1139 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1140 		    "invalid channel", channel);
1141 		return (IBT_CHAN_HDL_INVALID);
1142 	}
1143 
1144 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1145 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1146 		    "Invalid Channel type: Applicable only to RC Channel",
1147 		    channel);
1148 		return (IBT_CHAN_SRV_TYPE_INVALID);
1149 	}
1150 
1151 	/* get the statep */
1152 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1153 
1154 	/*
1155 	 * This can happen, if the statep is already gone by a DREQ from
1156 	 * the remote side
1157 	 */
1158 
1159 	if (statep == NULL) {
1160 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1161 		    "statep NULL", channel);
1162 		return (IBT_SUCCESS);
1163 	}
1164 
1165 	mutex_enter(&statep->state_mutex);
1166 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1167 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1168 		mutex_exit(&statep->state_mutex);
1169 		return (IBT_CHAN_STATE_INVALID);
1170 	}
1171 	IBCM_REF_CNT_INCR(statep);
1172 	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
1173 	    " state %x", channel, statep, statep->state);
1174 	mutex_exit(&statep->state_mutex);
1175 
1176 	/* clients could pre-allocate dreq mad, even before connection est */
1177 	if (statep->dreq_msg == NULL)
1178 		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
1179 		    &statep->dreq_msg, MAD_METHOD_SEND);
1180 
1181 	mutex_enter(&statep->state_mutex);
1182 	IBCM_REF_CNT_DECR(statep);
1183 	mutex_exit(&statep->state_mutex);
1184 
1185 	if (status != IBT_SUCCESS) {
1186 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1187 		    "ibcm_alloc_out_msg failed ", channel);
1188 		return (status);
1189 	}
1190 
1191 	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
1192 	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
1193 	    channel);
1194 
1195 	return (IBT_SUCCESS);
1196 }
1197 
1198 /*
1199  * ibt_close_rc_channel()
1200  *	It closes an established channel.
1201  *
1202  * RETURN VALUES:
1203  *	IBT_SUCCESS	on success(or respective failure on error)
1204  */
1205 ibt_status_t
1206 ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
1207     void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
1208     void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
1209 {
1210 	ibcm_state_data_t	*statep;
1211 
1212 	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
1213 	    channel, mode, priv_data, priv_data_len,
1214 	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);
1215 
1216 	/* validate channel, first */
1217 	if (IBCM_INVALID_CHANNEL(channel)) {
1218 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1219 		    "invalid channel", channel);
1220 		return (IBT_CHAN_HDL_INVALID);
1221 	}
1222 
1223 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1224 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1225 		    "Invalid Channel type: Applicable only to RC Channel",
1226 		    channel);
1227 		return (IBT_CHAN_SRV_TYPE_INVALID);
1228 	}
1229 
1230 	if (mode == IBT_BLOCKING) {
1231 		/* valid only for BLOCKING MODE */
1232 		if ((ret_priv_data_len_p != NULL) &&
1233 		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
1234 			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
1235 			    " private data len %d is too large", channel,
1236 			    *ret_priv_data_len_p);
1237 			return (IBT_INVALID_PARAM);
1238 		}
1239 	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
1240 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1241 		    "invalid mode %x specified", channel, mode);
1242 		return (IBT_INVALID_PARAM);
1243 	}
1244 
1245 	if (ibtl_cm_is_chan_closing(channel) ||
1246 	    ibtl_cm_is_chan_closed(channel)) {
1247 		if (ret_status)
1248 			*ret_status = IBT_CM_CLOSED_ALREADY;
1249 
1250 		/* No private data to return to the client */
1251 		if (ret_priv_data_len_p != NULL)
1252 			*ret_priv_data_len_p = 0;
1253 
1254 		if ((mode == IBT_BLOCKING) ||
1255 		    (mode == IBT_NOCALLBACKS)) {
1256 			IBCM_GET_CHAN_PRIVATE(channel, statep);
1257 			if (statep == NULL)
1258 				return (IBT_SUCCESS);
1259 			mutex_enter(&statep->state_mutex);
1260 			IBCM_RELEASE_CHAN_PRIVATE(channel);
1261 			IBCM_REF_CNT_INCR(statep);
1262 			while (statep->close_done != B_TRUE)
1263 				cv_wait(&statep->block_client_cv,
1264 				    &statep->state_mutex);
1265 			IBCM_REF_CNT_DECR(statep);
1266 			mutex_exit(&statep->state_mutex);
1267 		}
1268 
1269 		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
1270 		    "already marked for closing", channel);
1271 
1272 		return (IBT_SUCCESS);
1273 	}
1274 
1275 	/* get the statep */
1276 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1277 	if (statep == NULL) {
1278 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1279 		    "statep NULL", channel);
1280 		return (IBT_CHAN_STATE_INVALID);
1281 	}
1282 
1283 	mutex_enter(&statep->state_mutex);
1284 
1285 	if (statep->dreq_msg == NULL) {
1286 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1287 		    "Fatal Error: dreq_msg is NULL", channel);
1288 		IBCM_RELEASE_CHAN_PRIVATE(channel);
1289 		mutex_exit(&statep->state_mutex);
1290 		return (IBT_CHAN_STATE_INVALID);
1291 	}
1292 
1293 	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
1294 		statep->close_ret_priv_data = NULL;
1295 		statep->close_ret_priv_data_len = NULL;
1296 	} else {
1297 		statep->close_ret_priv_data = ret_priv_data;
1298 		statep->close_ret_priv_data_len = ret_priv_data_len_p;
1299 	}
1300 
1301 	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
1302 	if ((priv_data != NULL) && (priv_data_len > 0)) {
1303 		bcopy(priv_data, ((ibcm_dreq_msg_t *)
1304 		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
1305 		    priv_data_len);
1306 	}
1307 	statep->close_ret_status = ret_status;
1308 
1309 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1310 	IBCM_REF_CNT_INCR(statep);
1311 
1312 	if (mode != IBT_NONBLOCKING) {
1313 		return (ibcm_close_rc_channel(channel, statep, mode));
1314 	}
1315 
1316 	/* IBT_NONBLOCKING */
1317 	ibcm_close_enqueue(statep);
1318 	mutex_exit(&statep->state_mutex);
1319 
1320 	return (IBT_SUCCESS);
1321 }
1322 
1323 void
1324 ibcm_close_start(ibcm_state_data_t *statep)
1325 {
1326 	mutex_enter(&statep->state_mutex);
1327 	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
1328 }
1329 
1330 static
1331 ibt_status_t
1332 ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
1333     ibt_execution_mode_t mode)
1334 {
1335 	ibcm_hca_info_t		*hcap;
1336 
1337 	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
1338 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1339 
1340 	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
1341 	    channel, statep);
1342 
1343 	hcap = statep->hcap;
1344 
1345 	/* HCA must have been in active state. If not, it's a client bug */
1346 	if (!IBCM_ACCESS_HCA_OK(hcap)) {
1347 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1348 		    "hcap 0x%p not active", channel, hcap);
1349 		IBCM_REF_CNT_DECR(statep);
1350 		mutex_exit(&statep->state_mutex);
1351 		return (IBT_CHAN_HDL_INVALID);
1352 	}
1353 
1354 	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
1355 		while (statep->cep_in_rts == IBCM_BLOCK)
1356 			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1357 	}
1358 
1359 	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
1360 	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
1361 		cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1362 
1363 	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1364 	    "connection state is %x", channel, statep->state);
1365 
1366 	/* If state is in pre-established states, abort the connection est */
1367 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1368 		statep->cm_retries++;	/* ensure connection trace is dumped */
1369 
1370 		/* No DREP private data possible */
1371 		if (statep->close_ret_priv_data_len != NULL)
1372 			*statep->close_ret_priv_data_len = 0;
1373 
1374 		/*
1375 		 * If waiting for a response mad, then cancel the timer,
1376 		 * and delete the connection
1377 		 */
1378 		if (statep->state == IBCM_STATE_REQ_SENT ||
1379 		    statep->state == IBCM_STATE_REP_SENT ||
1380 		    statep->state == IBCM_STATE_REP_WAIT ||
1381 		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
1382 			timeout_id_t		timer_val = statep->timerid;
1383 			ibcm_conn_state_t	old_state;
1384 
1385 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1386 			    "chan 0x%p connection aborted in state %x", channel,
1387 			    statep->state);
1388 
1389 			old_state = statep->state;
1390 			statep->state = IBCM_STATE_DELETE;
1391 
1392 			if (mode == IBT_NONBLOCKING) {
1393 				if (taskq_dispatch(ibcm_taskq,
1394 				    ibcm_process_abort_via_taskq, statep,
1395 				    TQ_NOSLEEP) == 0) {
1396 
1397 					IBCM_REF_CNT_DECR(statep);
1398 					statep->state = old_state;
1399 					mutex_exit(&statep->state_mutex);
1400 					return (IBT_INSUFF_KERNEL_RESOURCE);
1401 				}	/* if taskq_dispatch succeeds */
1402 				/* Cancel the timer */
1403 				statep->timerid = 0;
1404 				mutex_exit(&statep->state_mutex);
1405 			} else {
1406 				/* Cancel the timer */
1407 				statep->timerid = 0;
1408 				mutex_exit(&statep->state_mutex);
1409 				(void) taskq_dispatch(ibcm_taskq,
1410 				    ibcm_process_abort_via_taskq, statep,
1411 				    TQ_SLEEP);
1412 			}
1413 
1414 			/* cancel the currently running timer */
1415 			if (timer_val != 0)
1416 				(void) untimeout(timer_val);
1417 
1418 			/* wait until cm handler returns for BLOCKING cases */
1419 			mutex_enter(&statep->state_mutex);
1420 			if ((mode == IBT_BLOCKING) ||
1421 			    (mode == IBT_NOCALLBACKS)) {
1422 				while (statep->close_done != B_TRUE)
1423 					cv_wait(&statep->block_client_cv,
1424 					    &statep->state_mutex);
1425 			}
1426 
1427 			if (statep->close_ret_status)
1428 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1429 			mutex_exit(&statep->state_mutex);
1430 
1431 			/*
1432 			 * It would ideal to post a REJ MAD, but that would
1433 			 * be non-conformance to spec. Hence, delete the state
1434 			 * data. Assuming that happens quickly, any retransmits
1435 			 * from the remote are replied by CM with reject
1436 			 * reason " no valid com id". That would stop remote
1437 			 * sending any more MADs.
1438 			 */
1439 			ibcm_delete_state_data(statep);
1440 			return (IBT_SUCCESS);
1441 
1442 		/* if CM busy in cm handler, wait until cm handler returns */
1443 		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
1444 		    statep->state == IBCM_STATE_REP_RCVD ||
1445 		    statep->state == IBCM_STATE_MRA_SENT ||
1446 		    statep->state == IBCM_STATE_MRA_REP_SENT) {
1447 
1448 			/* take control of statep */
1449 			statep->abort_flag |= IBCM_ABORT_CLIENT;
1450 
1451 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1452 			    "chan 0x%p connection aborted in state = %x",
1453 			    channel, statep->state);
1454 
1455 			/*
1456 			 * wait until state machine modifies qp state to error,
1457 			 * including disassociating statep and QP
1458 			 */
1459 			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
1460 				while (statep->close_done != B_TRUE)
1461 					cv_wait(&statep->block_client_cv,
1462 					    &statep->state_mutex);
1463 
1464 			/* a sanity setting */
1465 			if (mode == IBT_NOCALLBACKS)
1466 				statep->cm_handler = NULL;
1467 			IBCM_REF_CNT_DECR(statep);
1468 
1469 			/*
1470 			 * In rare situations, connection attempt could be
1471 			 * terminated for some other reason, before abort is
1472 			 * processed, but CM still returns ret_status as abort
1473 			 */
1474 			if (statep->close_ret_status)
1475 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1476 			mutex_exit(&statep->state_mutex);
1477 
1478 			/*
1479 			 * REJ MAD is posted by the CM state machine for this
1480 			 * case, hence state structure is deleted in the
1481 			 * state machine processing.
1482 			 */
1483 			return (IBT_SUCCESS);
1484 
1485 		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
1486 		    (statep->state == IBCM_STATE_DELETE)) {
1487 
1488 			/* State already in timewait, so no return priv data */
1489 			IBCM_REF_CNT_DECR(statep);
1490 
1491 			/* The teardown has already been done */
1492 			if (statep->close_ret_status)
1493 				*statep->close_ret_status =
1494 				    IBT_CM_CLOSED_ALREADY;
1495 			mutex_exit(&statep->state_mutex);
1496 
1497 			return (IBT_SUCCESS);
1498 
1499 		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
1500 		    (statep->state == IBCM_STATE_DREQ_SENT) ||
1501 		    (statep->state == IBCM_STATE_DREP_RCVD) ||
1502 		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
1503 		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {
1504 
1505 			/*
1506 			 * Either the remote or local client has already
1507 			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
1508 			 * possible, if CM initiated teardown without client's
1509 			 * knowledge, for stale handling, etc.,
1510 			 */
1511 			if (mode == IBT_NOCALLBACKS) {
1512 				if (statep->close_nocb_state == IBCM_UNBLOCK) {
1513 					statep->close_nocb_state = IBCM_FAIL;
1514 					/* enable free qp after return */
1515 					ibtl_cm_chan_is_closing(
1516 					    statep->channel);
1517 				} else while (statep->close_nocb_state ==
1518 				    IBCM_BLOCK)
1519 					cv_wait(&statep->block_client_cv,
1520 					    &statep->state_mutex);
1521 				statep->cm_handler = NULL; /* sanity setting */
1522 				if (statep->close_ret_status)
1523 					*statep->close_ret_status =
1524 					    IBT_CM_CLOSED_ALREADY;
1525 			} else if (mode == IBT_BLOCKING) {
1526 				/* wait until state is moved to timewait */
1527 				while (statep->close_done != B_TRUE)
1528 					cv_wait(&statep->block_client_cv,
1529 					    &statep->state_mutex);
1530 			}
1531 
1532 			IBCM_REF_CNT_DECR(statep);
1533 			mutex_exit(&statep->state_mutex);
1534 
1535 			/* ret_status is set in state machine code */
1536 			return (IBT_SUCCESS);
1537 
1538 		} else if (statep->state == IBCM_STATE_TIMED_OUT) {
1539 
1540 			if ((mode == IBT_BLOCKING) ||
1541 			    (mode == IBT_NOCALLBACKS)) {
1542 
1543 				/*
1544 				 * wait until cm handler invocation and
1545 				 * disassociation between statep and channel
1546 				 * is complete
1547 				 */
1548 				while (statep->close_done != B_TRUE)
1549 					cv_wait(&statep->block_client_cv,
1550 					    &statep->state_mutex);
1551 			}
1552 
1553 			if (statep->close_ret_status)
1554 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1555 			IBCM_REF_CNT_DECR(statep);
1556 			mutex_exit(&statep->state_mutex);
1557 
1558 			return (IBT_SUCCESS);
1559 		} else {
1560 			IBCM_REF_CNT_DECR(statep);
1561 			mutex_exit(&statep->state_mutex);
1562 
1563 			return (IBT_CM_FAILURE);
1564 		}
1565 	}
1566 
1567 	ASSERT(statep->close_nocb_state != IBCM_BLOCK);
1568 
1569 	if (mode == IBT_NOCALLBACKS) {
1570 		statep->close_nocb_state = IBCM_FAIL;
1571 		statep->cm_handler = NULL;
1572 		ibtl_cm_chan_is_closing(statep->channel);
1573 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1574 		    "NOCALLBACKS on in statep = %p", statep);
1575 	}
1576 
1577 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1578 		goto lost_race;
1579 	}
1580 
1581 	/*
1582 	 * Cancel/wait for any pending ibt_set_alt_path, and
1583 	 * release state mutex
1584 	 */
1585 	ibcm_sync_lapr_idle(statep);
1586 
1587 	ibcm_close_enter();
1588 
1589 	mutex_enter(&statep->state_mutex);
1590 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1591 		ibcm_close_exit();
1592 		goto lost_race;
1593 	}
1594 
1595 	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
1596 	statep->timerid = 0;
1597 	statep->close_done = B_FALSE;
1598 	statep->close_flow = 1;
1599 	mutex_exit(&statep->state_mutex);
1600 
1601 	ibcm_post_dreq_mad(statep);
1602 
1603 	mutex_enter(&statep->state_mutex);
1604 
1605 lost_race:
1606 	if (mode == IBT_BLOCKING) {
1607 
1608 		/* wait for DREP */
1609 		while (statep->close_done != B_TRUE)
1610 			cv_wait(&statep->block_client_cv,
1611 			    &statep->state_mutex);
1612 
1613 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1614 		    "done blocking", channel);
1615 	}
1616 
1617 	IBCM_REF_CNT_DECR(statep);
1618 	mutex_exit(&statep->state_mutex);
1619 
1620 	/* If this message isn't seen then ibt_close_rc_channel failed */
1621 	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
1622 	    channel);
1623 
1624 	return (IBT_SUCCESS);
1625 }
1626 
1627 ibt_status_t
1628 ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
1629     uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
1630 {
1631 	ibcm_state_data_t		*statep;
1632 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
1633 	ibt_qp_query_attr_t		qp_attr;
1634 	ibt_status_t			retval;
1635 
1636 	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
1637 	    control, hca_port_num, func, arg);
1638 
1639 	if (IBCM_INVALID_CHANNEL(rc_chan)) {
1640 		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
1641 		return (IBT_CHAN_HDL_INVALID);
1642 	}
1643 
1644 	/* check qp state */
1645 	retval = ibt_query_qp(rc_chan, &qp_attr);
1646 
1647 	if (retval != IBT_SUCCESS)
1648 		return (retval);
1649 
1650 	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
1651 		return (IBT_CHAN_SRV_TYPE_INVALID);
1652 
1653 	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
1654 		return (IBT_CHAN_STATE_INVALID);
1655 
1656 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1657 
1658 	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
1659 	    KM_SLEEP);
1660 
1661 	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
1662 	ibcm_tq_recycle_arg->control		= control;
1663 	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
1664 	ibcm_tq_recycle_arg->func		= func;
1665 	ibcm_tq_recycle_arg->arg		= arg;
1666 
1667 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1668 
1669 	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);
1670 
1671 	/*
1672 	 * If non-blocking ie., func specified and channel has not yet completed
1673 	 * the timewait, then schedule the work for later
1674 	 */
1675 	if ((func != NULL) && (statep != NULL)) {
1676 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1677 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1678 		statep->recycle_arg = ibcm_tq_recycle_arg;
1679 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1680 		return (IBT_SUCCESS);
1681 	}
1682 
1683 	/*
1684 	 * if blocking ie., func specified, and channel has not yet completed
1685 	 * the timewait, then block until the channel completes the timewait
1686 	 */
1687 	if (statep != NULL)
1688 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1689 	IBCM_WAIT_CHAN_PRIVATE(rc_chan);
1690 
1691 	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
1692 		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
1693 		    ibcm_tq_recycle_arg, TQ_SLEEP);
1694 		return (IBT_SUCCESS);
1695 	} else	/* BLOCKING case */
1696 		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
1697 }
1698 
1699 void
1700 ibcm_process_rc_recycle(void *recycle_arg)
1701 {
1702 	(void) ibcm_process_rc_recycle_ret(recycle_arg);
1703 }
1704 
1705 static ibt_status_t
1706 ibcm_process_rc_recycle_ret(void *recycle_arg)
1707 {
1708 	ibt_qp_info_t			qp_info;
1709 	ibt_status_t			ibt_status = IBT_SUCCESS;
1710 	ibt_cep_modify_flags_t		cep_flags;
1711 	ibt_qp_query_attr_t		qp_attr;
1712 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
1713 	    (ibcm_taskq_recycle_arg_t *)recycle_arg;
1714 
1715 	/* QP must have been in error state */
1716 	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
1717 	if (ibt_status != IBT_SUCCESS)
1718 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1719 		    "chanp %p ibt_query_qp() = %d",
1720 		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1721 	else {
1722 		/* perform the QP state change from ERROR to RESET */
1723 		bzero(&qp_info, sizeof (qp_info));
1724 
1725 		qp_info.qp_trans = IBT_RC_SRV;
1726 		qp_info.qp_state = IBT_STATE_RESET;
1727 
1728 		/* Call modify_qp to move to RESET state */
1729 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1730 		    IBT_CEP_SET_STATE, &qp_info, NULL);
1731 
1732 		if (ibt_status != IBT_SUCCESS)
1733 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1734 			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
1735 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1736 	}
1737 
1738 	if (ibt_status == IBT_SUCCESS) {
1739 
1740 		qp_info.qp_state = IBT_STATE_INIT;
1741 
1742 		/* set flags for all mandatory args from RESET to INIT */
1743 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
1744 		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
1745 		cep_flags |= IBT_CEP_SET_ATOMIC;
1746 
1747 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
1748 		    ibcm_tq_recycle_arg->hca_port_num;
1749 		qp_info.qp_flags |=
1750 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
1751 		qp_info.qp_flags |=
1752 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
1753 		qp_info.qp_flags |=
1754 		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;
1755 
1756 		/* Always use the existing pkey */
1757 		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
1758 		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;
1759 
1760 		/* Call modify_qp to move to INIT state */
1761 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1762 		    cep_flags, &qp_info, NULL);
1763 
1764 		if (ibt_status != IBT_SUCCESS)
1765 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1766 			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
1767 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1768 	}
1769 
1770 	/* Change the QP CM state to indicate QP being re-used */
1771 	if (ibt_status == IBT_SUCCESS)
1772 		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);
1773 
1774 	/* Call func, if defined */
1775 	if (ibcm_tq_recycle_arg->func)
1776 		(*(ibcm_tq_recycle_arg->func))(ibt_status,
1777 		    ibcm_tq_recycle_arg->arg);
1778 
1779 	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));
1780 
1781 	return (ibt_status);
1782 }
1783 
1784 static void
1785 ibcm_process_abort_via_taskq(void *args)
1786 {
1787 	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;
1788 
1789 	ibcm_process_abort(statep);
1790 	mutex_enter(&statep->state_mutex);
1791 	IBCM_REF_CNT_DECR(statep);
1792 	mutex_exit(&statep->state_mutex);
1793 }
1794 
1795 /*
1796  * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
1797  * Non-Blocking mode operations.
1798  */
1799 typedef struct ibcm_local_handler_s {
1800 	ibt_cm_ud_handler_t	actual_cm_handler;
1801 	void			*actual_cm_private;
1802 	ibt_ud_dest_t		*dest_hdl;
1803 } ibcm_local_handler_t;
1804 
1805 _NOTE(READ_ONLY_DATA(ibcm_local_handler_s))
1806 
1807 /*
1808  * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
1809  * NON-Blocking mode.
1810  *
1811  * Out here, we update the UD Destination handle with
1812  * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
1813  * handler that was specified by the client.
1814  */
1815 static ibt_cm_status_t
1816 ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
1817     ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
1818 {
1819 	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;
1820 
1821 	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
1822 	    event->cm_type);
1823 
1824 	ASSERT(handler_priv != NULL);
1825 
1826 	switch (event->cm_type) {
1827 	case IBT_CM_UD_EVENT_SIDR_REP:
1828 		/* Update QPN & QKey from event into destination handle. */
1829 		if (handler_priv->dest_hdl != NULL) {
1830 			handler_priv->dest_hdl->ud_dst_qpn =
1831 			    event->cm_event.sidr_rep.srep_remote_qpn;
1832 			handler_priv->dest_hdl->ud_qkey =
1833 			    event->cm_event.sidr_rep.srep_remote_qkey;
1834 		}
1835 
1836 		/* Invoke the client handler - inform only, so ignore retval */
1837 		(void) handler_priv->actual_cm_handler(
1838 		    handler_priv->actual_cm_private, event, ret_args, priv_data,
1839 		    len);
1840 
1841 		/* Free memory allocated for local handler's private data. */
1842 		if (handler_priv != NULL)
1843 			kmem_free(handler_priv, sizeof (*handler_priv));
1844 
1845 		break;
1846 	default:
1847 		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
1848 		break;
1849 	}
1850 
1851 	return (IBT_CM_ACCEPT);
1852 }
1853 
1854 
1855 /* Validate the input UD destination attributes.  */
1856 static ibt_status_t
1857 ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1858     ibt_ud_returns_t *ret_args)
1859 {
1860 	/* cm handler must always be specified */
1861 	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
1862 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1863 		    "CM handler is not specified ");
1864 		return (IBT_INVALID_PARAM);
1865 	}
1866 
1867 	if (mode == IBT_NONBLOCKING) {
1868 		if (ret_args != NULL) {
1869 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1870 			    "ret_args should be NULL when called in "
1871 			    "non-blocking mode");
1872 			return (IBT_INVALID_PARAM);
1873 		}
1874 	} else if (mode == IBT_BLOCKING) {
1875 		if (ret_args == NULL) {
1876 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1877 			    "ret_args should be Non-NULL when called in "
1878 			    "blocking mode");
1879 			return (IBT_INVALID_PARAM);
1880 		}
1881 	} else {
1882 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1883 		    "invalid mode %x specified ", mode);
1884 		return (IBT_INVALID_PARAM);
1885 	}
1886 
1887 	if (attr->ud_sid == 0) {
1888 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1889 		    "ServiceID must be specified. ");
1890 		return (IBT_INVALID_PARAM);
1891 	}
1892 
1893 	if (attr->ud_addr == NULL) {
1894 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1895 		    "Address Info NULL");
1896 		return (IBT_INVALID_PARAM);
1897 	}
1898 
1899 	/* Validate SGID */
1900 	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
1901 	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
1902 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
1903 		return (IBT_INVALID_PARAM);
1904 	}
1905 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
1906 	    attr->ud_addr->av_sgid.gid_prefix,
1907 	    attr->ud_addr->av_sgid.gid_guid);
1908 
1909 	/* Validate DGID */
1910 	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
1911 	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
1912 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
1913 		return (IBT_INVALID_PARAM);
1914 	}
1915 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
1916 	    attr->ud_addr->av_dgid.gid_prefix,
1917 	    attr->ud_addr->av_dgid.gid_guid);
1918 
1919 	return (IBT_SUCCESS);
1920 }
1921 
1922 
1923 /* Perform SIDR to retrieve DQPN and QKey.  */
1924 static ibt_status_t
1925 ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1926     ibt_ud_returns_t *ret_args)
1927 {
1928 	ibt_status_t		retval;
1929 	ib_pkey_t		ud_pkey;
1930 	ibmf_handle_t		ibmf_hdl;
1931 	ibmf_msg_t		*ibmf_msg;
1932 	ibcm_hca_info_t		*hcap;
1933 	ibcm_sidr_req_msg_t	*sidr_req_msgp;
1934 	ibcm_ud_state_data_t	*ud_statep;
1935 	ibtl_cm_hca_port_t	port;
1936 	ibcm_sidr_srch_t	sidr_entry;
1937 	ibcm_qp_list_t		*cm_qp_entry;
1938 
1939 	/* Retrieve HCA GUID value from the available SGID info. */
1940 	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
1941 	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
1942 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1943 		    "ibtl_cm_get_hca_port failed: %d", retval);
1944 		return (retval);
1945 	}
1946 
1947 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
1948 	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);
1949 
1950 	/* Lookup the HCA info for this GUID */
1951 	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
1952 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
1953 		return (IBT_HCA_INVALID);
1954 	}
1955 
1956 	/* Return failure if the HCA device or Port is not operational */
1957 
1958 	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
1959 	    NULL, NULL)) != IBT_SUCCESS) {
1960 		/* Device Port is not in good state, don't use it. */
1961 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
1962 		    "port specified or port not active");
1963 		ibcm_dec_hca_acc_cnt(hcap);
1964 		return (retval);
1965 	}
1966 
1967 	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
1968 	    attr->ud_pkey_ix, &ud_pkey);
1969 	if (retval != IBT_SUCCESS) {
1970 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1971 		    "Failed to convert index2pkey: %d", retval);
1972 		ibcm_dec_hca_acc_cnt(hcap);
1973 		return (retval);
1974 	}
1975 
1976 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))
1977 
1978 	/* Allocate a new request id */
1979 	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
1980 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1981 		    "no req id available");
1982 		ibcm_dec_hca_acc_cnt(hcap);
1983 		return (IBT_INSUFF_KERNEL_RESOURCE);
1984 	}
1985 
1986 	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
1987 	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
1988 	    != IBT_SUCCESS)) {
1989 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1990 		    "ibmf reg or callback setup failed during re-initialize");
1991 		return (retval);
1992 	}
1993 
1994 	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;
1995 
1996 	/* find the ibmf QP to post the SIDR REQ */
1997 	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
1998 	    NULL) {
1999 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
2000 		    " failed");
2001 		ibcm_dec_hca_acc_cnt(hcap);
2002 		return (IBT_INSUFF_RESOURCE);
2003 	}
2004 
2005 	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
2006 	    != IBT_SUCCESS) {
2007 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
2008 		    " failed");
2009 		ibcm_release_qp(cm_qp_entry);
2010 		ibcm_dec_hca_acc_cnt(hcap);
2011 		return (retval);
2012 	}
2013 
2014 	sidr_entry.srch_lid = port.hp_base_lid;
2015 	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
2016 	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
2017 	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;
2018 
2019 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))
2020 
2021 	/* do various allocations needed here */
2022 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
2023 
2024 	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
2025 	    IBCM_FLAG_ADD);
2026 	rw_exit(&hcap->hca_sidr_list_lock);
2027 
2028 	/* Increment hca's resource count */
2029 	ibcm_inc_hca_res_cnt(hcap);
2030 
2031 	/* After a resource created on hca, no need to hold the acc cnt */
2032 	ibcm_dec_hca_acc_cnt(hcap);
2033 
2034 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
2035 
2036 	/* Initialize some ud_statep fields */
2037 	ud_statep->ud_stored_msg = ibmf_msg;
2038 	ud_statep->ud_svc_id = attr->ud_sid;
2039 	ud_statep->ud_pkt_life_time =
2040 	    ibt_ib2usec(attr->ud_pkt_lt);
2041 	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;
2042 
2043 	/* set remaining retry cnt */
2044 	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;
2045 
2046 	/*
2047 	 * Get UD handler and corresponding args which is pass it back
2048 	 * as first argument for the handler.
2049 	 */
2050 	ud_statep->ud_state_cm_private = attr->ud_cm_private;
2051 
2052 	if (mode == IBT_BLOCKING)
2053 		ud_statep->ud_return_data = ret_args;
2054 	else
2055 		ud_statep->ud_cm_handler = attr->ud_cm_handler;
2056 
2057 	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
2058 	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
2059 	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
2060 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
2061 	    attr->ud_addr->av_hop;
2062 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
2063 	    attr->ud_addr->av_sgid;
2064 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
2065 	    attr->ud_addr->av_dgid;
2066 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
2067 	    attr->ud_addr->av_tclass;
2068 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
2069 	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;
2070 
2071 	/* needs to be derived based on the base LID and path bits */
2072 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
2073 	    port.hp_base_lid;
2074 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
2075 	    attr->ud_addr->av_dlid;
2076 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
2077 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
2078 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
2079 	    attr->ud_addr->av_srvl;
2080 
2081 	/*
2082 	 * This may be enchanced later, to use a remote qno based on past
2083 	 * redirect rej mad responses. This would be the place to specify
2084 	 * appropriate remote qno
2085 	 */
2086 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;
2087 
2088 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2089 
2090 	/* Initialize the SIDR REQ message fields */
2091 	sidr_req_msgp =
2092 	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
2093 
2094 	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
2095 	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
2096 	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
2097 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
2098 	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);
2099 
2100 	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
2101 		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
2102 		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
2103 	}
2104 
2105 	/* Send out the SIDR REQ message */
2106 	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
2107 	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
2108 	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
2109 	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
2110 	    (ud_statep->ud_pkt_life_time * 2);
2111 
2112 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
2113 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
2114 	    ud_statep->ud_req_id, 0));
2115 
2116 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
2117 	    ud_statep->ud_timer_value);
2118 
2119 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
2120 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2121 
2122 	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
2123 	    ibcm_post_sidr_req_complete, ud_statep);
2124 
2125 	mutex_enter(&ud_statep->ud_state_mutex);
2126 
2127 	/* Wait for SIDR_REP */
2128 	if (mode == IBT_BLOCKING) {
2129 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");
2130 
2131 		while (ud_statep->ud_blocking_done != B_TRUE) {
2132 			cv_wait(&ud_statep->ud_block_client_cv,
2133 			    &ud_statep->ud_state_mutex);
2134 		}
2135 
2136 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");
2137 
2138 		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
2139 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
2140 			    "status = %x, QKey = %x", ret_args->ud_dqpn,
2141 			    ret_args->ud_status, ret_args->ud_qkey);
2142 
2143 		} else {
2144 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
2145 			    ret_args->ud_status);
2146 			retval = IBT_CM_FAILURE;
2147 		}
2148 	}
2149 
2150 	IBCM_UD_REF_CNT_DECR(ud_statep);
2151 	mutex_exit(&ud_statep->ud_state_mutex);
2152 
2153 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");
2154 
2155 	return (retval);
2156 }
2157 
2158 
2159 /*
2160  * Function:
2161  *	ibt_request_ud_dest
2162  * Input:
2163  *	ud_dest		A previously allocated UD destination handle.
2164  *	mode		This function can execute in blocking or non blocking
2165  *			modes.
2166  *	attr		UD destination attributes to be modified.
2167  * Output:
2168  *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
2169  *			should be a pointer to an ibt_ud_returns_t struct.
2170  * Returns:
2171  *	IBT_SUCCESS
2172  * Description:
2173  *	Modify a previously allocated UD destination handle based on the
2174  *	results of doing the SIDR protocol.
2175  */
2176 ibt_status_t
2177 ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
2178     ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
2179 {
2180 	ibt_status_t		retval;
2181 	ibt_ud_dest_t		*ud_destp;
2182 	ibcm_local_handler_t	*local_handler_priv = NULL;
2183 
2184 	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
2185 	    ud_dest, mode, attr, ud_ret_args);
2186 
2187 	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
2188 	if (retval != IBT_SUCCESS) {
2189 		return (retval);
2190 	}
2191 
2192 	ud_destp = ud_dest;
2193 
2194 	/* Allocate an Address handle. */
2195 	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
2196 	    attr->ud_addr);
2197 	if (retval != IBT_SUCCESS) {
2198 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2199 		    "Address Handle Modification failed: %d", retval);
2200 		return (retval);
2201 	}
2202 
2203 	if (mode == IBT_NONBLOCKING) {
2204 		/*
2205 		 * In NON-BLOCKING mode, and we need to update the destination
2206 		 * handle with the DQPN and QKey that are obtained from
2207 		 * SIDR REP, hook-up our own handler, so that we can catch
2208 		 * the event, and we ourselves call the actual client's
2209 		 * ud_cm_handler, in our handler.
2210 		 */
2211 
2212 		/* Allocate memory for local handler's private data. */
2213 		local_handler_priv =
2214 		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);
2215 
2216 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2217 
2218 		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
2219 		local_handler_priv->actual_cm_private = attr->ud_cm_private;
2220 		local_handler_priv->dest_hdl = ud_destp;
2221 
2222 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2223 
2224 		attr->ud_cm_handler = ibcm_local_cm_handler;
2225 		attr->ud_cm_private = local_handler_priv;
2226 	}
2227 
2228 	/* In order to get DQPN and Destination QKey, perform SIDR */
2229 	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
2230 	if (retval != IBT_SUCCESS) {
2231 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2232 		    "Failed to get DQPN: %d", retval);
2233 
2234 		/* Free memory allocated for local handler's private data. */
2235 		if (local_handler_priv != NULL)
2236 			kmem_free(local_handler_priv,
2237 			    sizeof (*local_handler_priv));
2238 		return (retval);
2239 	}
2240 
2241 	/*
2242 	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
2243 	 * values will be valid only on BLOCKING mode.
2244 	 */
2245 	if (mode == IBT_BLOCKING) {
2246 		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
2247 		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
2248 	}
2249 
2250 	return (retval);
2251 }
2252 
2253 /*
2254  * Function:
2255  *	ibt_ud_get_dqpn
2256  * Input:
2257  *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
2258  *			required for SIDR REQ message. Not specified attributes
2259  *			should be set to "NULL" or "0".
2260  *			ud_sid, ud_addr and ud_pkt_lt must be specified.
2261  *	mode		This function can execute in blocking or non blocking
2262  *			modes.
2263  * Output:
2264  *	returns		If the function is called in blocking mode, returns
2265  *			should be a pointer to an ibt_ud_returns_t struct.
2266  * Return:
2267  *	IBT_SUCCESS	on success or respective failure on error.
2268  * Description:
2269  *	Finds the destination QPN at the specified destination that the
2270  *	specified service can be reached on. The IBTF CM initiates the
2271  *	service ID resolution protocol (SIDR) to determine a destination QPN.
2272  *
2273  * NOTE: SIDR_REQ is initiated from active side.
2274  */
2275 ibt_status_t
2276 ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
2277     ibt_ud_returns_t *returns)
2278 {
2279 	ibt_status_t		retval;
2280 
2281 	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
2282 	    attr, mode, returns);
2283 
2284 	retval = ibcm_validate_dqpn_data(attr, mode, returns);
2285 	if (retval != IBT_SUCCESS) {
2286 		return (retval);
2287 	}
2288 
2289 	return (ibcm_ud_get_dqpn(attr, mode, returns));
2290 }
2291 
2292 
2293 /*
2294  * ibt_cm_delay:
2295  *	A client CM handler function can call this function
2296  *	to extend its response time to a CM event.
2297  * INPUTS:
2298  *	flags		Indicates what CM message processing is being delayed
2299  *			by the CM handler, valid values are:
2300  *				IBT_CM_DELAY_REQ
2301  *				IBT_CM_DELAY_REP
2302  *				IBT_CM_DELAY_LAP
2303  *	cm_session_id	The session ID that was passed to client srv_handler
2304  *			by the CM
2305  *	service_time	The extended service time
2306  *	priv_data	Vendor specific data to be sent in the CM generated
2307  *			MRA message. Should be NULL if not specified.
2308  *	len		The number of bytes of data specified by priv_data.
2309  *
2310  * RETURN VALUES:
2311  *	IBT_SUCCESS	on success (or respective failure on error)
2312  */
2313 ibt_status_t
2314 ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
2315     clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
2316 {
2317 	uint8_t			msg_typ = 0;
2318 	ibcm_mra_msg_t		*mra_msgp;
2319 	ibcm_state_data_t	*statep;
2320 	ibt_status_t		status;
2321 
2322 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
2323 	    flags, cm_session_id, service_time);
2324 
2325 	/*
2326 	 * Make sure channel is associated with a statep
2327 	 */
2328 	statep = (ibcm_state_data_t *)cm_session_id;
2329 
2330 	if (statep == NULL) {
2331 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
2332 		return (IBT_INVALID_PARAM);
2333 	}
2334 
2335 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);
2336 
2337 	/* Allocate an ibmf msg for mra, if not allocated yet */
2338 	if (statep->mra_msg == NULL) {
2339 		if ((status = ibcm_alloc_out_msg(
2340 		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
2341 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
2342 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
2343 			    "IBMF MSG allocation failed", statep->channel);
2344 			return (status);
2345 		}
2346 	}
2347 
2348 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))
2349 
2350 	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
2351 	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
2352 	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);
2353 
2354 	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
2355 	if (flags == IBT_CM_DELAY_REQ) {
2356 		msg_typ = IBT_CM_MRA_TYPE_REQ;
2357 	} else if (flags == IBT_CM_DELAY_REP) {
2358 		msg_typ = IBT_CM_MRA_TYPE_REP;
2359 	} else if (flags == IBT_CM_DELAY_LAP) {
2360 		msg_typ = IBT_CM_MRA_TYPE_LAP;
2361 	}
2362 
2363 	mra_msgp->mra_message_type_plus = msg_typ << 6;
2364 	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;
2365 
2366 	len = min(len, IBT_MRA_PRIV_DATA_SZ);
2367 	if (priv_data && (len > 0))
2368 		bcopy(priv_data, mra_msgp->mra_private_data, len);
2369 
2370 	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
2371 	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);
2372 
2373 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))
2374 
2375 	mutex_enter(&statep->state_mutex);
2376 
2377 	if ((statep->mode == IBCM_ACTIVE_MODE) &&
2378 	    (statep->state == IBCM_STATE_REP_RCVD)) {
2379 		statep->state = IBCM_STATE_MRA_REP_SENT;
2380 	} else if (statep->mode == IBCM_PASSIVE_MODE) {
2381 		if (statep->state == IBCM_STATE_REQ_RCVD) {
2382 			statep->state = IBCM_STATE_MRA_SENT;
2383 		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
2384 			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
2385 		} else {
2386 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2387 			    "/ap_state/mode %x, %x, %x", statep->state,
2388 			    statep->ap_state, statep->mode);
2389 			mutex_exit(&statep->state_mutex);
2390 			return (IBT_CHAN_STATE_INVALID);
2391 		}
2392 	} else {
2393 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2394 		    "/ap_state/mode %x, %x, %x", statep->state,
2395 		    statep->ap_state, statep->mode);
2396 		mutex_exit(&statep->state_mutex);
2397 
2398 		return (IBT_CHAN_STATE_INVALID);
2399 	}
2400 	/* service time is usecs, stale_clock is nsecs */
2401 	statep->stale_clock = gethrtime() +
2402 	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
2403 	    statep->max_cm_retries);
2404 
2405 	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
2406 	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
2407 	mutex_exit(&statep->state_mutex);
2408 
2409 	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
2410 	    IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;
2411 
2412 	/* post the MRA mad in blocking mode, as no timers involved */
2413 	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
2414 	    statep);
2415 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
2416 	/* If this message isn't seen then ibt_cm_delay failed */
2417 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");
2418 
2419 	return (IBT_SUCCESS);
2420 }
2421 
2422 
2423 /*
2424  * ibt_register_service()
2425  *	Register a service with the IBCM
2426  *
2427  * INPUTS:
2428  *	ibt_hdl		The IBT client handle returned to the client
2429  *			on an ibt_attach() call.
2430  *
2431  *	srv		The address of a ibt_srv_desc_t that describes
2432  *			the service, containing the following:
2433  *
2434  *		sd_ud_handler	The Service CM UD event Handler.
2435  *		sd_handler	The Service CM RC/UC/RD event Handler.
2436  *		sd_flags	Service flags (peer-to-peer, or not).
2437  *
2438  *	sid		This tells CM if the service is local (sid is 0) or
2439  *			wellknown (sid is the starting service id of the range).
2440  *
2441  *	num_sids	The number of contiguous service-ids to reserve.
2442  *
2443  *	srv_hdl		The address of a service identification handle, used
2444  *			to deregister a service, and to bind GIDs to.
2445  *
2446  *	ret_sid		The address to store the Service ID return value.
2447  *			If num_sids > 1, ret_sid is the first Service ID
2448  *			in the range.
2449  *
2450  * ibt_register_service() returns:
2451  *	IBT_SUCCESS		- added a service successfully.
2452  *	IBT_INVALID_PARAM	- invalid input parameter.
2453  *	IBT_CM_FAILURE		- failed to add the service.
2454  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2455  *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
2456  *				     never happen).
2457  */
2458 ibt_status_t
2459 ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
2460     ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2461 {
2462 	ibcm_svc_info_t		*svcinfop;
2463 
2464 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p (%s), %p, 0x%llX, %d)",
2465 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv, (longlong_t)sid,
2466 	    num_sids);
2467 
2468 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))
2469 
2470 	*srv_hdl = NULL;
2471 
2472 	if (num_sids <= 0) {
2473 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2474 		    "Invalid number of service-ids specified (%d)", num_sids);
2475 		return (IBT_INVALID_PARAM);
2476 	}
2477 
2478 	if (sid == 0) {
2479 		if (ret_sid == NULL)
2480 			return (IBT_INVALID_PARAM);
2481 		sid = ibcm_alloc_local_sids(num_sids);
2482 		if (sid == 0)
2483 			return (IBT_INSUFF_KERNEL_RESOURCE);
2484 
2485 	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
2486 	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
2487 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2488 		    "Invalid non-LOCAL SID specified: 0x%llX",
2489 		    (longlong_t)sid);
2490 		return (IBT_INVALID_PARAM);
2491 	}
2492 
2493 	svcinfop = ibcm_create_svc_entry(sid, num_sids);
2494 
2495 	if (svcinfop == NULL) {
2496 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2497 		    "Service-ID 0x%llx already registered", (longlong_t)sid);
2498 		return (IBT_CM_SERVICE_EXISTS);
2499 	}
2500 
2501 	/*
2502 	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
2503 	 */
2504 	svcinfop->svc_flags = srv->sd_flags;
2505 	svcinfop->svc_rc_handler = srv->sd_handler;
2506 	svcinfop->svc_ud_handler = srv->sd_ud_handler;
2507 
2508 	if (ret_sid != NULL)
2509 		*ret_sid = sid;
2510 
2511 	*srv_hdl = svcinfop;
2512 
2513 	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);
2514 
2515 	/* If this message isn't seen, then ibt_register_service failed. */
2516 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
2517 	    svcinfop, sid);
2518 
2519 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))
2520 
2521 	return (IBT_SUCCESS);
2522 }
2523 
2524 
2525 static ibt_status_t
2526 ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
2527     sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
2528 {
2529 	int	rval;
2530 	int	retry;
2531 
2532 	ibcm_sa_access_enter();
2533 	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
2534 		rval = ibmf_saa_update_service_record(
2535 		    saa_handle, srv_recp, saa_type, 0);
2536 		if (rval != IBMF_TRANS_TIMEOUT) {
2537 			break;
2538 		}
2539 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2540 		    "ibmf_saa_update_service_record timed out"
2541 		    " SID = %llX, rval = %d, saa_type = %d",
2542 		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
2543 		delay(ibcm_sa_timeout_delay);
2544 	}
2545 	ibcm_sa_access_exit();
2546 
2547 	if (rval != IBMF_SUCCESS) {
2548 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2549 		    "ibmf_saa_update_service_record() : Failed - %d", rval);
2550 		return (ibcm_ibmf_analyze_error(rval));
2551 	} else
2552 		return (IBT_SUCCESS);
2553 }
2554 
2555 
2556 static void
2557 ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
2558 {
2559 	ibt_status_t		retval;
2560 	uint_t			num_found;
2561 	size_t			length;
2562 	sa_service_record_t	*srv_resp;
2563 	void			*results_p;
2564 	uint_t			i;
2565 	uint64_t		component_mask;
2566 	ibmf_saa_access_args_t	access_args;
2567 
2568 	component_mask =
2569 	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;
2570 
2571 	/* Call in SA Access retrieve routine to get Service Records. */
2572 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
2573 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
2574 	access_args.sq_component_mask = component_mask;
2575 	access_args.sq_template = srec;
2576 	access_args.sq_template_length = sizeof (sa_service_record_t);
2577 	access_args.sq_callback = NULL;
2578 	access_args.sq_callback_arg = NULL;
2579 
2580 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
2581 	    &results_p);
2582 	if (retval != IBT_SUCCESS) {
2583 		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2584 		    "SA Access Failure");
2585 		return;
2586 	}
2587 
2588 	num_found = length / sizeof (sa_service_record_t);
2589 
2590 	if (num_found)
2591 		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2592 		    "Found %d matching Service Records.", num_found);
2593 
2594 	/* Validate the returned number of records. */
2595 	if ((results_p != NULL) && (num_found > 0)) {
2596 
2597 		/* Remove all the records. */
2598 		for (i = 0; i < num_found; i++) {
2599 
2600 			srv_resp = (sa_service_record_t *)
2601 			    ((uchar_t *)results_p +
2602 			    i * sizeof (sa_service_record_t));
2603 
2604 			/*
2605 			 * Found some matching records, but check out whether
2606 			 * this Record is really stale or just happens to match
2607 			 * the current session records. If yes, don't remove it.
2608 			 */
2609 			mutex_enter(&ibcm_svc_info_lock);
2610 			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
2611 				/* This record is NOT STALE. */
2612 				mutex_exit(&ibcm_svc_info_lock);
2613 				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2614 				    "This is not Stale, it's an active record");
2615 				continue;
2616 			}
2617 			mutex_exit(&ibcm_svc_info_lock);
2618 
2619 			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2620 			    "Removing Stale Rec: %s, %llX",
2621 			    srv_resp->ServiceName, srv_resp->ServiceID);
2622 
2623 			IBCM_DUMP_SERVICE_REC(srv_resp);
2624 
2625 			/*
2626 			 * Remove the Service Record Entry from SA.
2627 			 *
2628 			 * Get ServiceID info from Response Buf, other
2629 			 * attributes are already filled-in.
2630 			 */
2631 
2632 			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2633 
2634 			srec->ServiceID = srv_resp->ServiceID;
2635 
2636 			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2637 
2638 			(void) ibcm_write_service_record(saa_handle, srec,
2639 			    IBMF_SAA_DELETE);
2640 		}
2641 
2642 		/* Deallocate the memory for results_p. */
2643 		kmem_free(results_p, length);
2644 	}
2645 }
2646 
2647 
2648 
2649 /*
2650  * ibt_bind_service()
2651  *	Register a service with the IBCM
2652  *
2653  * INPUTS:
2654  *	srv_hdl		The service id handle returned to the client
2655  *			on an ibt_service_register() call.
2656  *
2657  *	gid		The GID to which to bind the service.
2658  *
2659  *	srv_bind	The address of a ibt_srv_bind_t that describes
2660  *			the service record.  This should be NULL if there
2661  *			is to be no service record.  This contains:
2662  *
2663  *		sb_lease	Lease period
2664  *		sb_pkey		Partition
2665  *		sb_name		pointer to ASCII string Service Name,
2666  *				NULL terminated.
2667  *		sb_key[]	Key to secure the service record.
2668  *		sb_data		Service Data structure (64-byte)
2669  *
2670  *	cm_private	First argument of Service handler.
2671  *
2672  *	sb_hdl_p	The address of a service bind handle, used
2673  *			to undo the service binding.
2674  *
2675  * ibt_bind_service() returns:
2676  *	IBT_SUCCESS		- added a service successfully.
2677  *	IBT_INVALID_PARAM	- invalid input parameter.
2678  *	IBT_CM_FAILURE		- failed to add the service.
2679  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2680  */
2681 ibt_status_t
2682 ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
2683     void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
2684 {
2685 	ibt_status_t		status;
2686 	ibtl_cm_hca_port_t	port;
2687 	ibcm_svc_bind_t		*sbindp, *sbp;
2688 	ibcm_hca_info_t		*hcap;
2689 	ib_svc_id_t		sid, start_sid, end_sid;
2690 	ibmf_saa_handle_t	saa_handle;
2691 	sa_service_record_t	srv_rec;
2692 	uint16_t		pkey_ix;
2693 
2694 	if (sb_hdl_p != NULL)
2695 		*sb_hdl_p = NULL;	/* return value for error cases */
2696 
2697 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
2698 	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
2699 
2700 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))
2701 
2702 	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
2703 	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
2704 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2705 		    "ibtl_cm_get_hca_port failed: %d", status);
2706 		return (status);
2707 	}
2708 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
2709 	    port.hp_port, port.hp_hca_guid);
2710 
2711 	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
2712 	if (hcap == NULL) {
2713 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
2714 		return (IBT_HCA_BUSY_DETACHING);
2715 	}
2716 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);
2717 
2718 	if (srv_bind != NULL) {
2719 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2720 		if (saa_handle == NULL) {
2721 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2722 			    "saa_handle is NULL");
2723 			ibcm_dec_hca_acc_cnt(hcap);
2724 			return (IBT_HCA_PORT_NOT_ACTIVE);
2725 		}
2726 		if (srv_bind->sb_pkey == 0) {
2727 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2728 			    "P_Key must not be 0");
2729 			ibcm_dec_hca_acc_cnt(hcap);
2730 			return (IBT_INVALID_PARAM);
2731 		}
2732 		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
2733 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2734 			    "Service Name is too long");
2735 			ibcm_dec_hca_acc_cnt(hcap);
2736 			return (IBT_INVALID_PARAM);
2737 		} else
2738 			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
2739 			    "Service Name='%s'", srv_bind->sb_name);
2740 		status = ibt_pkey2index_byguid(port.hp_hca_guid,
2741 		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
2742 		if (status != IBT_SUCCESS) {
2743 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2744 			    "P_Key 0x%x not found in P_Key_Table",
2745 			    srv_bind->sb_pkey);
2746 			ibcm_dec_hca_acc_cnt(hcap);
2747 			return (status);
2748 		}
2749 	}
2750 
2751 	/* assume success - allocate before locking */
2752 	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
2753 	sbindp->sbind_cm_private = cm_private;
2754 	sbindp->sbind_gid = gid;
2755 	sbindp->sbind_hcaguid = port.hp_hca_guid;
2756 	sbindp->sbind_port = port.hp_port;
2757 
2758 	mutex_enter(&ibcm_svc_info_lock);
2759 
2760 	sbp = srv_hdl->svc_bind_list;
2761 	while (sbp != NULL) {
2762 		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
2763 		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
2764 			if (srv_bind == NULL ||
2765 			    srv_bind->sb_pkey == sbp->sbind_pkey) {
2766 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2767 				    "failed: GID %llX:%llX and PKEY %x is "
2768 				    "already bound", gid.gid_prefix,
2769 				    gid.gid_guid, sbp->sbind_pkey);
2770 				mutex_exit(&ibcm_svc_info_lock);
2771 				ibcm_dec_hca_acc_cnt(hcap);
2772 				kmem_free(sbindp, sizeof (*sbindp));
2773 				return (IBT_CM_SERVICE_EXISTS);
2774 			}
2775 		}
2776 		sbp = sbp->sbind_link;
2777 	}
2778 	/* no entry found */
2779 
2780 	sbindp->sbind_link = srv_hdl->svc_bind_list;
2781 	srv_hdl->svc_bind_list = sbindp;
2782 
2783 	mutex_exit(&ibcm_svc_info_lock);
2784 
2785 	if (srv_bind != NULL) {
2786 		bzero(&srv_rec, sizeof (srv_rec));
2787 
2788 		srv_rec.ServiceLease =
2789 		    sbindp->sbind_lease = srv_bind->sb_lease;
2790 		srv_rec.ServiceP_Key =
2791 		    sbindp->sbind_pkey = srv_bind->sb_pkey;
2792 		srv_rec.ServiceKey_hi =
2793 		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
2794 		srv_rec.ServiceKey_lo =
2795 		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
2796 		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
2797 		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
2798 		srv_rec.ServiceGID = gid;
2799 
2800 		/*
2801 		 * Find out whether we have any stale Local Service records
2802 		 * matching the current attributes.  If yes, we shall try to
2803 		 * remove them from SA using the current request's ServiceKey.
2804 		 *
2805 		 * We will perform this operation only for Local Services, as
2806 		 * it is handled by SA automatically for WellKnown Services.
2807 		 *
2808 		 * Ofcourse, clients can specify NOT to do this clean-up by
2809 		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
2810 		 */
2811 		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
2812 		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
2813 			ibcm_rem_stale_srec(saa_handle, &srv_rec);
2814 		}
2815 
2816 		/* Handle endianess for service data. */
2817 		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);
2818 
2819 		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
2820 
2821 		/* insert srv record into the SA */
2822 		start_sid = srv_hdl->svc_id;
2823 		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
2824 		for (sid = start_sid; sid <= end_sid; sid++) {
2825 
2826 			srv_rec.ServiceID = sid;
2827 
2828 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2829 
2830 			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
2831 			    "ibmf_saa_write_service_record, SvcId = %llX",
2832 			    (longlong_t)sid);
2833 
2834 			status = ibcm_write_service_record(saa_handle, &srv_rec,
2835 			    IBMF_SAA_UPDATE);
2836 			if (status != IBT_SUCCESS) {
2837 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
2838 				    " ibcm_write_service_record fails %d, "
2839 				    "sid %llX", status, (longlong_t)sid);
2840 
2841 				if (sid != start_sid) {
2842 					/*
2843 					 * Bind failed while bind SID other than
2844 					 * first in the sid_range.  So we need
2845 					 * to unbind those, which are passed.
2846 					 *
2847 					 * Need to increment svc count to
2848 					 * compensate for ibt_unbind_service().
2849 					 */
2850 					ibcm_inc_hca_svc_cnt(hcap);
2851 					ibcm_dec_hca_acc_cnt(hcap);
2852 
2853 					(void) ibt_unbind_service(srv_hdl,
2854 					    sbindp);
2855 				} else {
2856 					ibcm_svc_bind_t		**sbpp;
2857 
2858 					/*
2859 					 * Bind failed for the first SID or the
2860 					 * only SID in question, then no need
2861 					 * to unbind, just free memory and
2862 					 * return error.
2863 					 */
2864 					mutex_enter(&ibcm_svc_info_lock);
2865 
2866 					sbpp = &srv_hdl->svc_bind_list;
2867 					sbp = *sbpp;
2868 					while (sbp != NULL) {
2869 						if (sbp == sbindp) {
2870 							*sbpp = sbp->sbind_link;
2871 							break;
2872 						}
2873 						sbpp = &sbp->sbind_link;
2874 						sbp = *sbpp;
2875 					}
2876 					mutex_exit(&ibcm_svc_info_lock);
2877 					ibcm_dec_hca_acc_cnt(hcap);
2878 
2879 					kmem_free(sbindp, sizeof (*sbindp));
2880 				}
2881 				return (status);
2882 			}
2883 		}
2884 	}
2885 	ibcm_inc_hca_svc_cnt(hcap);
2886 	ibcm_dec_hca_acc_cnt(hcap);
2887 
2888 	/* If this message isn't seen then ibt_bind_service failed */
2889 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
2890 	    srv_hdl, gid.gid_prefix, gid.gid_guid);
2891 
2892 	if (sb_hdl_p != NULL)
2893 		*sb_hdl_p = sbindp;
2894 
2895 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))
2896 
2897 	return (IBT_SUCCESS);
2898 }
2899 
2900 ibt_status_t
2901 ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
2902 {
2903 	ib_svc_id_t	sid, end_sid;
2904 	ibt_status_t	rval;
2905 	ibcm_hca_info_t	*hcap;
2906 	ibcm_svc_bind_t	*sbp, **sbpp;
2907 
2908 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
2909 	    srv_hdl, sbindp);
2910 
2911 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
2912 
2913 	/* If there is a service on hca, respective hcap cannot go away */
2914 	ASSERT(hcap != NULL);
2915 
2916 	mutex_enter(&ibcm_svc_info_lock);
2917 
2918 	sbpp = &srv_hdl->svc_bind_list;
2919 	sbp = *sbpp;
2920 	while (sbp != NULL) {
2921 		if (sbp == sbindp) {
2922 			*sbpp = sbp->sbind_link;
2923 			break;
2924 		}
2925 		sbpp = &sbp->sbind_link;
2926 		sbp = *sbpp;
2927 	}
2928 	sid = srv_hdl->svc_id;
2929 	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
2930 	if (sbp != NULL)
2931 		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
2932 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
2933 	mutex_exit(&ibcm_svc_info_lock);
2934 
2935 	if (sbp == NULL) {
2936 		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2937 		    "service binding not found: srv_hdl %p, srv_bind %p",
2938 		    srv_hdl, sbindp);
2939 		ibcm_dec_hca_acc_cnt(hcap);
2940 		return (IBT_INVALID_PARAM);
2941 	}
2942 
2943 	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
2944 		ibtl_cm_hca_port_t	port;
2945 		sa_service_record_t	srv_rec;
2946 		ibmf_saa_handle_t	saa_handle;
2947 		ibt_status_t		status;
2948 
2949 		/* get the default SGID of the port */
2950 		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
2951 		    != IBT_SUCCESS) {
2952 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2953 			    "ibtl_cm_get_hca_port failed: %d", status);
2954 			/* we're done, but there may be stale service records */
2955 			goto done;
2956 		}
2957 
2958 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2959 		if (saa_handle == NULL) {
2960 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2961 			    "saa_handle is NULL");
2962 			/* we're done, but there may be stale service records */
2963 			goto done;
2964 		}
2965 
2966 		/* Fill in fields of srv_rec */
2967 		bzero(&srv_rec, sizeof (srv_rec));
2968 
2969 		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
2970 		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
2971 		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
2972 		srv_rec.ServiceGID = sbindp->sbind_gid;
2973 		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
2974 
2975 		while (sid <= end_sid) {
2976 
2977 			srv_rec.ServiceID = sid;
2978 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2979 
2980 			rval = ibcm_write_service_record(saa_handle, &srv_rec,
2981 			    IBMF_SAA_DELETE);
2982 
2983 			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
2984 			    "ibcm_write_service_record rval = %d, SID %llx",
2985 			    rval, sid);
2986 			if (rval != IBT_SUCCESS) {
2987 				/* this is not considered a reason to fail */
2988 				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2989 				    "ibcm_write_service_record fails %d, "
2990 				    "sid %llx", rval, sid);
2991 			}
2992 			sid++;
2993 		}
2994 	}
2995 done:
2996 	ibcm_dec_hca_svc_cnt(hcap);
2997 	ibcm_dec_hca_acc_cnt(hcap);
2998 	kmem_free(sbindp, sizeof (*sbindp));
2999 
3000 	/* If this message isn't seen then ibt_unbind_service failed */
3001 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");
3002 
3003 	return (IBT_SUCCESS);
3004 }
3005 
3006 /*
3007  * Simply pull off each binding from the list and unbind it.
3008  * If any of the unbind calls fail, we fail.
3009  */
3010 ibt_status_t
3011 ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
3012 {
3013 	ibt_status_t	status;
3014 	ibcm_svc_bind_t	*sbp;
3015 
3016 	mutex_enter(&ibcm_svc_info_lock);
3017 	sbp = NULL;
3018 
3019 	/* this compare keeps the loop from being infinite */
3020 	while (sbp != srv_hdl->svc_bind_list) {
3021 		sbp = srv_hdl->svc_bind_list;
3022 		mutex_exit(&ibcm_svc_info_lock);
3023 		status = ibt_unbind_service(srv_hdl, sbp);
3024 		if (status != IBT_SUCCESS)
3025 			return (status);
3026 		mutex_enter(&ibcm_svc_info_lock);
3027 		if (srv_hdl->svc_bind_list == NULL)
3028 			break;
3029 	}
3030 	mutex_exit(&ibcm_svc_info_lock);
3031 	return (IBT_SUCCESS);
3032 }
3033 
3034 /*
3035  * ibt_deregister_service()
3036  *	Deregister a service with the IBCM
3037  *
3038  * INPUTS:
3039  *	ibt_hdl		The IBT client handle returned to the client
3040  *			on an ibt_attach() call.
3041  *
3042  *	srv_hdl		The address of a service identification handle, used
3043  *			to de-register a service.
3044  * RETURN VALUES:
3045  *	IBT_SUCCESS	on success (or respective failure on error)
3046  */
3047 ibt_status_t
3048 ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
3049 {
3050 	ibcm_svc_info_t		*svcp;
3051 	ibcm_svc_lookup_t	svc;
3052 
3053 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p (%s), %p)",
3054 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv_hdl);
3055 
3056 	mutex_enter(&ibcm_svc_info_lock);
3057 
3058 	if (srv_hdl->svc_bind_list != NULL) {
3059 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
3060 		    " srv_hdl %p still has bindings", srv_hdl);
3061 		mutex_exit(&ibcm_svc_info_lock);
3062 		return (IBT_CM_SERVICE_BUSY);
3063 	}
3064 	svc.sid = srv_hdl->svc_id;
3065 	svc.num_sids = 1;
3066 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
3067 	    srv_hdl->svc_id, srv_hdl->svc_num_sids);
3068 
3069 #ifdef __lock_lint
3070 	ibcm_svc_compare(NULL, NULL);
3071 #endif
3072 	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
3073 	if (svcp != srv_hdl) {
3074 		mutex_exit(&ibcm_svc_info_lock);
3075 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
3076 		    "srv_hdl %p not found", srv_hdl);
3077 		return (IBT_INVALID_PARAM);
3078 	}
3079 	avl_remove(&ibcm_svc_avl_tree, svcp);
3080 
3081 	/* wait for active REQ/SREQ handling to be done */
3082 	svcp->svc_to_delete = 1;
3083 	while (svcp->svc_ref_cnt != 0)
3084 		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3085 
3086 	mutex_exit(&ibcm_svc_info_lock);
3087 
3088 	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
3089 		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);
3090 
3091 	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
3092 	kmem_free(srv_hdl, sizeof (*srv_hdl));
3093 
3094 	/* If this message isn't seen then ibt_deregister_service failed */
3095 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");
3096 
3097 	return (IBT_SUCCESS);
3098 }
3099 
3100 ibcm_status_t
3101 ibcm_ar_init(void)
3102 {
3103 	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
3104 	ibcm_svc_info_t *tmp_svcp;
3105 
3106 	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");
3107 
3108 	/* remove this special SID from the pool of available SIDs */
3109 	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
3110 		IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init: "
3111 		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
3112 		return (IBCM_FAILURE);
3113 	}
3114 	mutex_enter(&ibcm_svc_info_lock);
3115 	ibcm_ar_svcinfop = tmp_svcp;
3116 	ibcm_ar_list = NULL;	/* no address records registered yet */
3117 	mutex_exit(&ibcm_svc_info_lock);
3118 	return (IBCM_SUCCESS);
3119 }
3120 
3121 ibcm_status_t
3122 ibcm_ar_fini(void)
3123 {
3124 	ibcm_ar_t	*ar_list;
3125 	ibcm_svc_info_t	*tmp_svcp;
3126 
3127 	mutex_enter(&ibcm_svc_info_lock);
3128 	ar_list = ibcm_ar_list;
3129 
3130 	if (ar_list == NULL &&
3131 	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
3132 	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
3133 		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
3134 		tmp_svcp = ibcm_ar_svcinfop;
3135 		mutex_exit(&ibcm_svc_info_lock);
3136 		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
3137 		return (IBCM_SUCCESS);
3138 	}
3139 	mutex_exit(&ibcm_svc_info_lock);
3140 	return (IBCM_FAILURE);
3141 }
3142 
3143 
3144 /*
3145  * Return to the caller:
3146  *	IBT_SUCCESS		Found a perfect match.
3147  *				*arpp is set to the record.
3148  *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
3149  *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
3150  *				found no record with same data.
3151  */
3152 static ibt_status_t
3153 ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
3154 {
3155 	ibcm_ar_t	*tmp;
3156 	int		i;
3157 
3158 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3159 	tmp = ibcm_ar_list;
3160 	while (tmp != NULL) {
3161 		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
3162 		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
3163 		    tmp->ar.ar_pkey == arp->ar_pkey) {
3164 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3165 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3166 					return (IBT_INCONSISTENT_AR);
3167 			*arpp = tmp;
3168 			return (IBT_SUCCESS);
3169 		} else {
3170 			/* if all the data bytes match, we have inconsistency */
3171 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3172 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3173 					break;
3174 			if (i == IBCM_DAPL_ATS_NBYTES)
3175 				return (IBT_INCONSISTENT_AR);
3176 			/* try next address record */
3177 		}
3178 		tmp = tmp->ar_link;
3179 	}
3180 	return (IBT_AR_NOT_REGISTERED);
3181 }
3182 
3183 ibt_status_t
3184 ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3185 {
3186 	ibcm_ar_t		*found;
3187 	ibcm_ar_t		*tmp;
3188 	ibt_status_t		status;
3189 	ibt_status_t		s1, s2;
3190 	char			*s;
3191 	ibcm_ar_ref_t		*hdlp;
3192 	ibcm_ar_t		*new;
3193 	ibcm_ar_t		**linkp;
3194 	ibtl_cm_hca_port_t	cm_port;
3195 	uint16_t		pkey_ix;
3196 	ibcm_hca_info_t		*hcap;
3197 	ibmf_saa_handle_t	saa_handle;
3198 	sa_service_record_t	*srv_recp;
3199 	uint64_t		gid_ored;
3200 
3201 	IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
3202 	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
3203 	    (longlong_t)arp->ar_gid.gid_guid);
3204 
3205 	/*
3206 	 * If P_Key is 0, but GID is not, this query is invalid.
3207 	 * If GID is 0, but P_Key is not, this query is invalid.
3208 	 */
3209 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3210 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3211 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3212 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3213 		    "GID/P_Key is not valid");
3214 		return (IBT_INVALID_PARAM);
3215 	}
3216 
3217 	/* assume success, so these might be needed */
3218 	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
3219 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
3220 
3221 	mutex_enter(&ibcm_svc_info_lock);
3222 	/* search for existing GID/pkey (there can be at most 1) */
3223 	status = ibcm_search_ar(arp, &found);
3224 	if (status == IBT_INCONSISTENT_AR) {
3225 		mutex_exit(&ibcm_svc_info_lock);
3226 		kmem_free(new, sizeof (*new));
3227 		kmem_free(hdlp, sizeof (*hdlp));
3228 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3229 		    "address record is inconsistent with a known one");
3230 		return (IBT_INCONSISTENT_AR);
3231 	} else if (status == IBT_SUCCESS) {
3232 		if (found->ar_flags == IBCM_AR_INITING) {
3233 			found->ar_waiters++;
3234 			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
3235 			found->ar_waiters--;
3236 		}
3237 		if (found->ar_flags == IBCM_AR_FAILED) {
3238 			if (found->ar_waiters == 0) {
3239 				cv_destroy(&found->ar_cv);
3240 				kmem_free(found, sizeof (*found));
3241 			}
3242 			mutex_exit(&ibcm_svc_info_lock);
3243 			kmem_free(new, sizeof (*new));
3244 			kmem_free(hdlp, sizeof (*hdlp));
3245 			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
3246 		}
3247 		hdlp->ar_ibt_hdl = ibt_hdl;
3248 		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
3249 		found->ar_ibt_hdl_list = hdlp;
3250 		mutex_exit(&ibcm_svc_info_lock);
3251 		kmem_free(new, sizeof (*new));
3252 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3253 		return (IBT_SUCCESS);
3254 	} else {
3255 		ASSERT(status == IBT_AR_NOT_REGISTERED);
3256 	}
3257 	hdlp->ar_ref_link = NULL;
3258 	hdlp->ar_ibt_hdl = ibt_hdl;
3259 	new->ar_ibt_hdl_list = hdlp;
3260 	new->ar = *arp;
3261 	new->ar_flags = IBCM_AR_INITING;
3262 	new->ar_waiters = 0;
3263 	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
3264 	new->ar_link = ibcm_ar_list;
3265 	ibcm_ar_list = new;
3266 
3267 	/* verify GID/pkey is valid for a local port, etc. */
3268 	hcap = NULL;
3269 	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
3270 	    != IBT_SUCCESS ||
3271 	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
3272 	    arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
3273 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
3274 		cv_destroy(&new->ar_cv);
3275 		ibcm_ar_list = new->ar_link;
3276 		mutex_exit(&ibcm_svc_info_lock);
3277 		kmem_free(new, sizeof (*new));
3278 		kmem_free(hdlp, sizeof (*hdlp));
3279 		status = IBT_INVALID_PARAM;
3280 		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
3281 			s = "PORT DOWN";
3282 			status = IBT_HCA_PORT_NOT_ACTIVE;
3283 		} else if (s1 != IBT_SUCCESS)
3284 			s = "GID not found";
3285 		else if (s2 != IBT_SUCCESS)
3286 			s = "PKEY not found";
3287 		else
3288 			s = "CM could not find its HCA entry";
3289 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
3290 		    s, status);
3291 		return (status);
3292 	}
3293 	mutex_exit(&ibcm_svc_info_lock);
3294 	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);
3295 
3296 	/* create service record */
3297 	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
3298 	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
3299 	srv_recp->ServiceP_Key = arp->ar_pkey;
3300 	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
3301 	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
3302 	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
3303 	srv_recp->ServiceGID = arp->ar_gid;
3304 	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
3305 	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;
3306 
3307 	/* insert service record into the SA */
3308 
3309 	IBCM_DUMP_SERVICE_REC(srv_recp);
3310 
3311 	if (saa_handle != NULL)
3312 		status = ibcm_write_service_record(saa_handle, srv_recp,
3313 		    IBMF_SAA_UPDATE);
3314 	else
3315 		status = IBT_HCA_PORT_NOT_ACTIVE;
3316 
3317 	if (status != IBT_SUCCESS) {
3318 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
3319 		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
3320 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
3321 		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
3322 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3323 
3324 		kmem_free(srv_recp, sizeof (*srv_recp));
3325 		kmem_free(hdlp, sizeof (*hdlp));
3326 
3327 		mutex_enter(&ibcm_svc_info_lock);
3328 		linkp = &ibcm_ar_list;
3329 		tmp = *linkp;
3330 		while (tmp != NULL) {
3331 			if (tmp == new) {
3332 				*linkp = new->ar_link;
3333 				break;
3334 			}
3335 			linkp = &tmp->ar_link;
3336 			tmp = *linkp;
3337 		}
3338 		if (new->ar_waiters > 0) {
3339 			new->ar_flags = IBCM_AR_FAILED;
3340 			cv_broadcast(&new->ar_cv);
3341 			mutex_exit(&ibcm_svc_info_lock);
3342 		} else {
3343 			cv_destroy(&new->ar_cv);
3344 			mutex_exit(&ibcm_svc_info_lock);
3345 			kmem_free(new, sizeof (*new));
3346 		}
3347 		ibcm_dec_hca_acc_cnt(hcap);
3348 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3349 		    "IBMF_SAA failed to write address record");
3350 	} else {					/* SUCCESS */
3351 		uint8_t		*b;
3352 
3353 		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: SUCCESS for gid "
3354 		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
3355 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3356 		b = arp->ar_data;
3357 
3358 		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar:"
3359 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3360 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3361 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3362 		mutex_enter(&ibcm_svc_info_lock);
3363 		new->ar_srv_recp = srv_recp;
3364 		new->ar_saa_handle = saa_handle;
3365 		new->ar_port = cm_port.hp_port;
3366 		new->ar_hcap = hcap;
3367 		new->ar_flags = IBCM_AR_SUCCESS;
3368 		if (new->ar_waiters > 0)
3369 			cv_broadcast(&new->ar_cv);
3370 		mutex_exit(&ibcm_svc_info_lock);
3371 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3372 		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
3373 	}
3374 	return (status);
3375 }
3376 
3377 ibt_status_t
3378 ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3379 {
3380 	ibcm_ar_t		*found;
3381 	ibcm_ar_t		*tmp;
3382 	ibcm_ar_t		**linkp;
3383 	ibcm_ar_ref_t		*hdlp;
3384 	ibcm_ar_ref_t		**hdlpp;
3385 	ibt_status_t		status;
3386 	ibmf_saa_handle_t	saa_handle;
3387 	sa_service_record_t	*srv_recp;
3388 	uint64_t		gid_ored;
3389 
3390 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
3391 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: gid %llx:%llx",
3392 	    (longlong_t)arp->ar_gid.gid_prefix,
3393 	    (longlong_t)arp->ar_gid.gid_guid);
3394 
3395 	/*
3396 	 * If P_Key is 0, but GID is not, this query is invalid.
3397 	 * If GID is 0, but P_Key is not, this query is invalid.
3398 	 */
3399 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3400 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3401 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3402 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3403 		    "GID/P_Key is not valid");
3404 		return (IBT_INVALID_PARAM);
3405 	}
3406 
3407 	mutex_enter(&ibcm_svc_info_lock);
3408 	/* search for existing GID/pkey (there can be at most 1) */
3409 	status = ibcm_search_ar(arp, &found);
3410 	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
3411 		mutex_exit(&ibcm_svc_info_lock);
3412 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3413 		    "address record not found");
3414 		return (IBT_AR_NOT_REGISTERED);
3415 	}
3416 	ASSERT(status == IBT_SUCCESS);
3417 
3418 	hdlpp = &found->ar_ibt_hdl_list;
3419 	hdlp = *hdlpp;
3420 	while (hdlp != NULL) {
3421 		if (hdlp->ar_ibt_hdl == ibt_hdl)
3422 			break;
3423 		hdlpp = &hdlp->ar_ref_link;
3424 		hdlp = *hdlpp;
3425 	}
3426 	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
3427 		mutex_exit(&ibcm_svc_info_lock);
3428 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3429 		    "address record found, but not for this client");
3430 		return (IBT_AR_NOT_REGISTERED);
3431 	}
3432 	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
3433 	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
3434 		/* last entry was removed */
3435 		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
3436 		saa_handle = found->ar_saa_handle;
3437 		srv_recp = found->ar_srv_recp;
3438 
3439 		/* wait if this service record is being rewritten */
3440 		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
3441 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3442 		mutex_exit(&ibcm_svc_info_lock);
3443 
3444 		/* remove service record */
3445 		status = ibcm_write_service_record(saa_handle, srv_recp,
3446 		    IBMF_SAA_DELETE);
3447 		if (status != IBT_SUCCESS)
3448 			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3449 			    "IBMF_SAA failed to delete address record");
3450 		mutex_enter(&ibcm_svc_info_lock);
3451 		if (found->ar_waiters == 0) {	/* still no waiters */
3452 			linkp = &ibcm_ar_list;
3453 			tmp = *linkp;
3454 			while (tmp != found) {
3455 				linkp = &tmp->ar_link;
3456 				tmp = *linkp;
3457 			}
3458 			*linkp = tmp->ar_link;
3459 			ibcm_dec_hca_acc_cnt(found->ar_hcap);
3460 			kmem_free(srv_recp, sizeof (*srv_recp));
3461 			cv_destroy(&found->ar_cv);
3462 			kmem_free(found, sizeof (*found));
3463 		} else {
3464 			/* add service record back in for the waiters */
3465 			mutex_exit(&ibcm_svc_info_lock);
3466 			status = ibcm_write_service_record(saa_handle, srv_recp,
3467 			    IBMF_SAA_UPDATE);
3468 			mutex_enter(&ibcm_svc_info_lock);
3469 			if (status == IBT_SUCCESS)
3470 				found->ar_flags = IBCM_AR_SUCCESS;
3471 			else {
3472 				found->ar_flags = IBCM_AR_FAILED;
3473 				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3474 				    "IBMF_SAA failed to write address record");
3475 			}
3476 			cv_broadcast(&found->ar_cv);
3477 		}
3478 	}
3479 	mutex_exit(&ibcm_svc_info_lock);
3480 	kmem_free(hdlp, sizeof (*hdlp));
3481 	ibtl_cm_change_service_cnt(ibt_hdl, -1);
3482 	return (status);
3483 }
3484 
3485 ibt_status_t
3486 ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
3487 {
3488 	sa_service_record_t	svcrec_req;
3489 	sa_service_record_t	*svcrec_resp;
3490 	void			*results_p;
3491 	uint64_t		component_mask = 0;
3492 	uint64_t		gid_ored;
3493 	size_t			length;
3494 	int			num_rec;
3495 	int			i;
3496 	ibmf_saa_access_args_t	access_args;
3497 	ibt_status_t		retval;
3498 	ibtl_cm_hca_port_t	cm_port;
3499 	ibcm_hca_info_t		*hcap;
3500 	ibmf_saa_handle_t	saa_handle;
3501 
3502 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
3503 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
3504 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
3505 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
3506 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
3507 	    (longlong_t)queryp->ar_gid.gid_prefix,
3508 	    (longlong_t)queryp->ar_gid.gid_guid);
3509 
3510 	/*
3511 	 * If P_Key is 0, but GID is not, this query is invalid.
3512 	 * If GID is 0, but P_Key is not, this query is invalid.
3513 	 */
3514 	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
3515 	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
3516 	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
3517 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
3518 		return (IBT_INVALID_PARAM);
3519 	}
3520 
3521 	hcap = NULL;
3522 	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
3523 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
3524 	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
3525 		if (hcap != NULL)
3526 			ibcm_dec_hca_acc_cnt(hcap);
3527 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
3528 		return (IBT_INVALID_PARAM);
3529 	}
3530 
3531 	bzero(&svcrec_req, sizeof (svcrec_req));
3532 
3533 	/* Is GID/P_Key Specified. */
3534 	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
3535 		svcrec_req.ServiceP_Key = queryp->ar_pkey;
3536 		component_mask |= SA_SR_COMPMASK_PKEY;
3537 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
3538 		    queryp->ar_pkey);
3539 		svcrec_req.ServiceGID = queryp->ar_gid;
3540 		component_mask |= SA_SR_COMPMASK_GID;
3541 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
3542 		    (longlong_t)queryp->ar_gid.gid_prefix,
3543 		    (longlong_t)queryp->ar_gid.gid_guid);
3544 	}
3545 
3546 	/* Is ServiceData Specified. */
3547 	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
3548 		if (queryp->ar_data[i] != 0) {
3549 			bcopy(queryp->ar_data, svcrec_req.ServiceData,
3550 			    IBCM_DAPL_ATS_NBYTES);
3551 			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
3552 							/* components */
3553 			break;
3554 		}
3555 	}
3556 
3557 	/* Service Name */
3558 	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
3559 	component_mask |= SA_SR_COMPMASK_NAME;
3560 
3561 	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
3562 	component_mask |= SA_SR_COMPMASK_ID;
3563 
3564 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3565 	    "Perform SA Access: Mask: 0x%X", component_mask);
3566 
3567 	/*
3568 	 * Call in SA Access retrieve routine to get Service Records.
3569 	 *
3570 	 * SA Access framework allocated memory for the "results_p".
3571 	 * Make sure to deallocate once we are done with the results_p.
3572 	 * The size of the buffer allocated will be as returned in
3573 	 * "length" field.
3574 	 */
3575 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
3576 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
3577 	access_args.sq_component_mask = component_mask;
3578 	access_args.sq_template = &svcrec_req;
3579 	access_args.sq_template_length = sizeof (sa_service_record_t);
3580 	access_args.sq_callback = NULL;
3581 	access_args.sq_callback_arg = NULL;
3582 
3583 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
3584 	    &results_p);
3585 
3586 	ibcm_dec_hca_acc_cnt(hcap);
3587 	if (retval != IBT_SUCCESS) {
3588 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
3589 		return (retval);
3590 	}
3591 
3592 	num_rec = length / sizeof (sa_service_record_t);
3593 
3594 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3595 	    "Found %d Service Records.", num_rec);
3596 
3597 	/* Validate the returned number of records. */
3598 	if ((results_p != NULL) && (num_rec > 0)) {
3599 		uint8_t		*b;
3600 
3601 		/* Just return info from the first service record. */
3602 		svcrec_resp = (sa_service_record_t *)results_p;
3603 
3604 		/* The Service GID and Service ID */
3605 		resultp->ar_gid = svcrec_resp->ServiceGID;
3606 		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
3607 		bcopy(svcrec_resp->ServiceData,
3608 		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);
3609 
3610 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3611 		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
3612 		    (longlong_t)resultp->ar_gid.gid_prefix,
3613 		    (longlong_t)resultp->ar_gid.gid_guid);
3614 		b = resultp->ar_data;
3615 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar:"
3616 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3617 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3618 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3619 
3620 		/* Deallocate the memory for results_p. */
3621 		kmem_free(results_p, length);
3622 		if (num_rec > 1)
3623 			retval = IBT_MULTIPLE_AR;
3624 		else
3625 			retval = IBT_SUCCESS;
3626 	} else {
3627 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3628 		    "ibmf_sa_access found 0 matching records");
3629 		retval = IBT_AR_NOT_REGISTERED;
3630 	}
3631 	return (retval);
3632 }
3633 
3634 /* mark all ATS service records associated with the port */
3635 static void
3636 ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
3637 {
3638 	ibcm_ar_t	*tmp;
3639 
3640 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3641 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3642 		if (tmp->ar_hcap == NULL)
3643 			continue;
3644 		if (tmp->ar_hcap->hca_guid == hca_guid &&
3645 		    tmp->ar_port == port) {
3646 			/* even if it's busy, we mark it for rewrite */
3647 			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
3648 		}
3649 	}
3650 }
3651 
3652 /* rewrite all ATS service records */
3653 static int
3654 ibcm_rewrite_ar(void)
3655 {
3656 	ibcm_ar_t		*tmp;
3657 	ibmf_saa_handle_t	saa_handle;
3658 	sa_service_record_t	*srv_recp;
3659 	ibt_status_t		rval;
3660 	int			did_something = 0;
3661 
3662 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3663 check_for_work:
3664 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3665 		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
3666 			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
3667 			saa_handle = tmp->ar_saa_handle;
3668 			srv_recp = tmp->ar_srv_recp;
3669 			mutex_exit(&ibcm_svc_info_lock);
3670 			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
3671 			    "rewriting ar @ %p", tmp);
3672 			did_something = 1;
3673 			rval = ibcm_write_service_record(saa_handle, srv_recp,
3674 			    IBMF_SAA_UPDATE);
3675 			if (rval != IBT_SUCCESS)
3676 				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
3677 				    "ibcm_write_service_record failed: "
3678 				    "status = %d", rval);
3679 			mutex_enter(&ibcm_svc_info_lock);
3680 			/* if it got marked again, then we want to rewrite */
3681 			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
3682 				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
3683 			/* in case there was a waiter... */
3684 			cv_broadcast(&ibcm_svc_info_cv);
3685 			goto check_for_work;
3686 		}
3687 	}
3688 	return (did_something);
3689 }
3690 
3691 static void
3692 ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
3693 {
3694 	ibcm_hca_info_t		*hcap;
3695 	ib_svc_id_t		sid, start_sid, end_sid;
3696 	ibmf_saa_handle_t	saa_handle;
3697 	sa_service_record_t	srv_rec;
3698 	ibt_status_t		rval;
3699 
3700 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
3701 	if (hcap == NULL) {
3702 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3703 		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
3704 		return;
3705 	}
3706 
3707 	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
3708 	if (saa_handle == NULL) {
3709 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3710 		    "saa_handle is NULL");
3711 		ibcm_dec_hca_acc_cnt(hcap);
3712 		return;
3713 	}
3714 
3715 	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
3716 	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
3717 	    sbindp->sbind_gid.gid_guid);
3718 
3719 	bzero(&srv_rec, sizeof (srv_rec));
3720 
3721 	srv_rec.ServiceLease = sbindp->sbind_lease;
3722 	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
3723 	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
3724 	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
3725 	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
3726 	srv_rec.ServiceGID = sbindp->sbind_gid;
3727 
3728 	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
3729 
3730 	/* insert srv record into the SA */
3731 	start_sid = srv_hdl->svc_id;
3732 	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
3733 	for (sid = start_sid; sid <= end_sid; sid++) {
3734 		srv_rec.ServiceID = sid;
3735 
3736 		rval = ibcm_write_service_record(saa_handle, &srv_rec,
3737 		    IBMF_SAA_UPDATE);
3738 
3739 		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
3740 		    "ibcm_write_service_record, SvcId = %llX, "
3741 		    "rval = %d", (longlong_t)sid, rval);
3742 		if (rval != IBT_SUCCESS) {
3743 			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
3744 			    " ibcm_write_service_record fails %d sid %llX",
3745 			    rval, (longlong_t)sid);
3746 		}
3747 	}
3748 	ibcm_dec_hca_acc_cnt(hcap);
3749 }
3750 
3751 /*
3752  * Task to mark all service records as needing to be rewritten to the SM/SA.
3753  * This task does not return until all of them have been rewritten.
3754  */
3755 void
3756 ibcm_service_record_rewrite_task(void *arg)
3757 {
3758 	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
3759 	ib_guid_t	hca_guid = pup->pup_hca_guid;
3760 	uint8_t		port = pup->pup_port;
3761 	ibcm_svc_info_t	*svcp;
3762 	ibcm_svc_bind_t	*sbp;
3763 	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
3764 	static int	task_is_running = 0;
3765 
3766 	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task STARTED "
3767 	    "for hca_guid %llX, port %d", hca_guid, port);
3768 
3769 	mutex_enter(&ibcm_svc_info_lock);
3770 	ibcm_mark_ar(hca_guid, port);
3771 	for (svcp = avl_first(avl_tree); svcp != NULL;
3772 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3773 		sbp = svcp->svc_bind_list;
3774 		while (sbp != NULL) {
3775 			if (sbp->sbind_pkey != 0 &&
3776 			    sbp->sbind_port == port &&
3777 			    sbp->sbind_hcaguid == hca_guid) {
3778 				/* even if it's busy, we mark it for rewrite */
3779 				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
3780 			}
3781 			sbp = sbp->sbind_link;
3782 		}
3783 	}
3784 	if (task_is_running) {
3785 		/* let the other task thread finish the work */
3786 		mutex_exit(&ibcm_svc_info_lock);
3787 		return;
3788 	}
3789 	task_is_running = 1;
3790 
3791 	(void) ibcm_rewrite_ar();
3792 
3793 check_for_work:
3794 	for (svcp = avl_first(avl_tree); svcp != NULL;
3795 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3796 		sbp = svcp->svc_bind_list;
3797 		while (sbp != NULL) {
3798 			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
3799 				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
3800 				mutex_exit(&ibcm_svc_info_lock);
3801 				ibcm_rewrite_svc_record(svcp, sbp);
3802 				mutex_enter(&ibcm_svc_info_lock);
3803 				/* if it got marked again, we want to rewrite */
3804 				if (sbp->sbind_rewrite_state ==
3805 				    IBCM_REWRITE_BUSY)
3806 					sbp->sbind_rewrite_state =
3807 					    IBCM_REWRITE_IDLE;
3808 				/* in case there was a waiter... */
3809 				cv_broadcast(&ibcm_svc_info_cv);
3810 				goto check_for_work;
3811 			}
3812 			sbp = sbp->sbind_link;
3813 		}
3814 	}
3815 	/*
3816 	 * If there were no service records to write, and we failed to
3817 	 * have to rewrite any more ATS service records, then we're done.
3818 	 */
3819 	if (ibcm_rewrite_ar() != 0)
3820 		goto check_for_work;
3821 	task_is_running = 0;
3822 	mutex_exit(&ibcm_svc_info_lock);
3823 
3824 	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task DONE");
3825 	kmem_free(pup, sizeof (ibcm_port_up_t));
3826 }
3827 
3828 ibt_status_t
3829 ibt_ofuvcm_get_req_data(void *session_id, ibt_ofuvcm_req_data_t *req_data)
3830 {
3831 	ibcm_state_data_t 	*statep = (ibcm_state_data_t *)session_id;
3832 	ibcm_req_msg_t 		*req_msgp;
3833 
3834 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ofuvcm_req_data: session_id %p",
3835 	    session_id);
3836 	mutex_enter(&statep->state_mutex);
3837 	if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3838 	    (statep->state != IBCM_STATE_MRA_SENT)) {
3839 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Invalid "
3840 		    "State %x", statep->state);
3841 		mutex_exit(&statep->state_mutex);
3842 		return (IBT_CHAN_STATE_INVALID);
3843 	}
3844 	if (statep->mode == IBCM_ACTIVE_MODE) {
3845 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Active mode "
3846 		    "not supported");
3847 		mutex_exit(&statep->state_mutex);
3848 		return (IBT_INVALID_PARAM);
3849 	}
3850 	ASSERT(statep->req_msgp);
3851 
3852 	/*
3853 	 * Fill in the additional req message values reqired for
3854 	 * RTR transition.
3855 	 * Should the PSN be same as the active side??
3856 	 */
3857 	req_msgp = (ibcm_req_msg_t *)statep->req_msgp;
3858 	req_data->req_rnr_nak_time = ibcm_default_rnr_nak_time;
3859 	req_data->req_path_mtu = req_msgp->req_mtu_plus >> 4;
3860 	req_data->req_rq_psn = b2h32(req_msgp->req_starting_psn_plus) >> 8;
3861 	mutex_exit(&statep->state_mutex);
3862 	return (IBT_SUCCESS);
3863 }
3864 
3865 ibt_status_t
3866 ibt_ofuvcm_proceed(ibt_cm_event_type_t event, void *session_id,
3867     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3868     void *priv_data, ibt_priv_data_len_t priv_data_len)
3869 {
3870 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3871 	ibt_status_t		ret;
3872 
3873 	IBTF_DPRINTF_L3(cmlog, "ibt_ofuvcm_proceed chan 0x%p event %x "
3874 	    "status %x session_id %p", statep->channel, event, status,
3875 	    session_id);
3876 
3877 	IBTF_DPRINTF_L5(cmlog, "ibt_ofuvcm_proceed chan 0x%p "
3878 	    "cm_event_data %p, priv_data %p priv_data_len %x",
3879 	    statep->channel, cm_event_data, priv_data, priv_data_len);
3880 
3881 	/* validate session_id and status */
3882 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3883 		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : Invalid Args");
3884 		return (IBT_INVALID_PARAM);
3885 	}
3886 
3887 	if (event != IBT_CM_EVENT_REQ_RCV) {
3888 		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : only for REQ_RCV");
3889 		return (IBT_INVALID_PARAM);
3890 	}
3891 	mutex_enter(&statep->state_mutex);
3892 	statep->is_this_ofuv_chan = B_TRUE;
3893 	mutex_exit(&statep->state_mutex);
3894 
3895 	ret = ibt_cm_proceed(event, session_id, status, cm_event_data,
3896 	    priv_data, priv_data_len);
3897 	return (ret);
3898 }
3899 
3900 /*
3901  * Function:
3902  * 	ibt_cm_proceed
3903  *
3904  * Verifies the arguments and dispatches the cm state machine processing
3905  * via taskq
3906  */
3907 
3908 ibt_status_t
3909 ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
3910     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3911     void *priv_data, ibt_priv_data_len_t priv_data_len)
3912 {
3913 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3914 	ibcm_proceed_targs_t	*proceed_targs;
3915 	ibcm_proceed_error_t	proceed_error;
3916 
3917 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
3918 	    "session_id %p", statep->channel, event, status, session_id);
3919 
3920 	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
3921 	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
3922 	    priv_data, priv_data_len);
3923 
3924 	/* validate session_id and status */
3925 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3926 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
3927 		return (IBT_INVALID_PARAM);
3928 	}
3929 
3930 	/* If priv data len specified, then priv_data cannot be NULL */
3931 	if ((priv_data_len > 0) && (priv_data == NULL))
3932 		return (IBT_INVALID_PARAM);
3933 
3934 	proceed_error = IBCM_PROCEED_INVALID_NONE;
3935 
3936 	mutex_enter(&statep->state_mutex);
3937 	if (event == IBT_CM_EVENT_REQ_RCV) {
3938 
3939 		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3940 		    (statep->state != IBCM_STATE_MRA_SENT))
3941 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3942 		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
3943 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3944 
3945 	} else if (event == IBT_CM_EVENT_REP_RCV) {
3946 		if ((statep->state != IBCM_STATE_REP_RCVD) &&
3947 		    (statep->state != IBCM_STATE_MRA_REP_SENT))
3948 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3949 		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
3950 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3951 	} else if (event == IBT_CM_EVENT_LAP_RCV) {
3952 		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
3953 		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
3954 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3955 		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
3956 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3957 	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
3958 		if (statep->state != IBCM_STATE_DREQ_RCVD)
3959 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3960 		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
3961 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3962 	} else {
3963 			proceed_error = IBCM_PROCEED_INVALID_EVENT;
3964 	}
3965 
3966 	/* if there is an error, print an error message and return */
3967 	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
3968 		mutex_exit(&statep->state_mutex);
3969 		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
3970 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3971 			    "Invalid Event/State combination specified",
3972 			    statep->channel);
3973 			return (IBT_INVALID_PARAM);
3974 		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
3975 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3976 			    "Invalid Event/priv len combination specified",
3977 			    statep->channel);
3978 			return (IBT_INVALID_PARAM);
3979 		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
3980 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3981 			    "Invalid Event specified", statep->channel);
3982 			return (IBT_INVALID_PARAM);
3983 		} else {
3984 			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
3985 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3986 			    "IBT_CM_EVENT_LAP_RCV not supported",
3987 			    statep->channel);
3988 			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
3989 			return (IBT_APM_NOT_SUPPORTED);
3990 		}
3991 	}
3992 
3993 
3994 	/* wait until client's CM handler returns DEFER status back to CM */
3995 
3996 	while (statep->clnt_proceed == IBCM_BLOCK) {
3997 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
3998 		    "return of client's cm handler", statep->channel);
3999 		cv_wait(&statep->block_client_cv, &statep->state_mutex);
4000 	}
4001 
4002 	if (statep->clnt_proceed == IBCM_FAIL) {
4003 		mutex_exit(&statep->state_mutex);
4004 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
4005 		    "client returned non-DEFER status from cm handler",
4006 		    statep->channel);
4007 		return (IBT_CHAN_STATE_INVALID);
4008 	}
4009 
4010 	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
4011 	statep->clnt_proceed = IBCM_FAIL;
4012 	mutex_exit(&statep->state_mutex);
4013 
4014 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4015 
4016 	/* the state machine processing is done in a separate thread */
4017 
4018 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4019 	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
4020 	    KM_SLEEP);
4021 
4022 	proceed_targs->event  = event;
4023 	proceed_targs->status = status;
4024 	proceed_targs->priv_data_len = priv_data_len;
4025 
4026 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4027 
4028 	proceed_targs->tst.rc.statep = statep;
4029 	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
4030 	    sizeof (ibt_cm_proceed_reply_t));
4031 
4032 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4033 
4034 	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
4035 	    proceed_targs, TQ_SLEEP);
4036 
4037 	return (IBT_SUCCESS);
4038 }
4039 
4040 /*
4041  * Function:
4042  * 	ibcm_proceed_via_taskq
4043  *
4044  * Called from taskq, dispatched by ibt_cm_proceed
4045  * Completes the cm state processing for ibt_cm_proceed
4046  */
4047 void
4048 ibcm_proceed_via_taskq(void *targs)
4049 {
4050 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4051 	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
4052 	ibt_cm_reason_t reject_reason;
4053 	uint8_t arej_len;
4054 	ibcm_status_t response;
4055 	ibcm_clnt_reply_info_t clnt_info;
4056 
4057 	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
4058 	clnt_info.priv_data = proceed_targs->priv_data;
4059 	clnt_info.priv_data_len = proceed_targs->priv_data_len;
4060 
4061 	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
4062 	    statep->channel, targs);
4063 
4064 	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
4065 		response =
4066 		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
4067 		    &clnt_info, &reject_reason, &arej_len,
4068 		    (ibcm_req_msg_t *)statep->defer_cm_msg);
4069 
4070 		ibcm_handle_cep_req_response(statep, response, reject_reason,
4071 		    arej_len);
4072 
4073 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
4074 		response =
4075 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
4076 		    &clnt_info, &reject_reason, &arej_len,
4077 		    (ibcm_rep_msg_t *)statep->defer_cm_msg);
4078 
4079 		ibcm_handle_cep_rep_response(statep, response, reject_reason,
4080 		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);
4081 
4082 	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
4083 		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
4084 		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
4085 		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));
4086 
4087 		ibcm_post_apr_mad(statep);
4088 
4089 	} else {
4090 		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
4091 		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
4092 		    proceed_targs->priv_data_len);
4093 	}
4094 
4095 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4096 }
4097 
4098 /*
4099  * Function:
4100  * 	ibt_cm_ud_proceed
4101  *
4102  * Verifies the arguments and dispatches the cm state machine processing
4103  * via taskq
4104  */
4105 ibt_status_t
4106 ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
4107     ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
4108     void *priv_data, ibt_priv_data_len_t priv_data_len)
4109 {
4110 	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
4111 	ibcm_proceed_targs_t	*proceed_targs;
4112 	ibt_qp_query_attr_t	qp_attr;
4113 	ibt_status_t		retval;
4114 
4115 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
4116 	    "ud_channel %p ", session_id, ud_channel);
4117 
4118 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
4119 	    "priv_data_len %x",  status, priv_data, priv_data_len);
4120 
4121 	/* validate session_id and status */
4122 	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
4123 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
4124 		return (IBT_INVALID_PARAM);
4125 	}
4126 
4127 	/* If priv data len specified, then priv_data cannot be NULL */
4128 	if ((priv_data_len > 0) && (priv_data == NULL))
4129 		return (IBT_INVALID_PARAM);
4130 
4131 	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
4132 		return (IBT_INVALID_PARAM);
4133 
4134 	/* retrieve qpn and qkey from ud channel */
4135 
4136 	/* validate event and statep's state */
4137 
4138 	if (status == IBT_CM_ACCEPT) {
4139 		retval = ibt_query_qp(ud_channel, &qp_attr);
4140 		if ((retval != IBT_SUCCESS) ||
4141 		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
4142 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
4143 			    "Failed to retrieve QPN from the channel: %d",
4144 			    retval);
4145 			return (IBT_INVALID_PARAM);
4146 		}
4147 	}
4148 
4149 
4150 	mutex_enter(&ud_statep->ud_state_mutex);
4151 
4152 	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
4153 		mutex_exit(&ud_statep->ud_state_mutex);
4154 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
4155 		    "specified");
4156 		return (IBT_INVALID_PARAM);
4157 	}
4158 
4159 	/* wait until client's CM handler returns DEFER status back to CM */
4160 
4161 	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
4162 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
4163 		    " of client's ud cm handler");
4164 		cv_wait(&ud_statep->ud_block_client_cv,
4165 		    &ud_statep->ud_state_mutex);
4166 	}
4167 
4168 	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
4169 		mutex_exit(&ud_statep->ud_state_mutex);
4170 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
4171 		    "returned non-DEFER status from cm handler");
4172 		return (IBT_INVALID_PARAM);
4173 	}
4174 
4175 	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
4176 	ud_statep->ud_clnt_proceed = IBCM_FAIL;
4177 	mutex_exit(&ud_statep->ud_state_mutex);
4178 
4179 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4180 
4181 	/* the state machine processing is done in a separate thread */
4182 
4183 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4184 	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
4185 	    KM_SLEEP);
4186 
4187 	proceed_targs->status = status;
4188 	proceed_targs->priv_data_len = priv_data_len;
4189 
4190 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4191 
4192 	if (status == IBT_CM_ACCEPT) {
4193 		proceed_targs->tst.ud.ud_qkey =
4194 		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
4195 		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
4196 	}
4197 
4198 	proceed_targs->tst.ud.ud_statep = ud_statep;
4199 
4200 	/* copy redirect info based on status */
4201 	if (status == IBT_CM_REDIRECT)
4202 		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
4203 		    sizeof (ibt_redirect_info_t));
4204 
4205 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4206 
4207 	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
4208 	    proceed_targs, TQ_SLEEP);
4209 
4210 	return (IBT_SUCCESS);
4211 }
4212 
4213 /*
4214  * Function:
4215  * 	ibcm_ud_proceed_via_taskq
4216  *
4217  * Called from taskq, dispatched by ibt_cm_ud_proceed
4218  * Completes the cm state processing for ibt_cm_ud_proceed
4219  */
4220 void
4221 ibcm_ud_proceed_via_taskq(void *targs)
4222 {
4223 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4224 	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
4225 	ibcm_ud_clnt_reply_info_t ud_clnt_info;
4226 	ibt_sidr_status_t	sidr_status;
4227 
4228 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);
4229 
4230 	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
4231 	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
4232 	ud_clnt_info.priv_data = proceed_targs->priv_data;
4233 	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
4234 	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;
4235 
4236 	/* validate event and statep's state */
4237 	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
4238 	    &ud_clnt_info, &sidr_status,
4239 	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));
4240 
4241 	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
4242 
4243 	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
4244 	mutex_enter(&ud_statep->ud_state_mutex);
4245 	IBCM_UD_REF_CNT_DECR(ud_statep);
4246 	mutex_exit(&ud_statep->ud_state_mutex);
4247 
4248 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4249 }
4250 
4251 /*
4252  * Function:
4253  *	ibt_set_alt_path
4254  * Input:
4255  *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
4256  *
4257  *	mode		Execute in blocking or non blocking mode.
4258  *
4259  *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
4260  *			ibt_get_alt_path(9F) call that specifies the new
4261  *			alternate path.
4262  *
4263  *	priv_data       A pointer to a buffer specified by caller for the
4264  *			private data in the outgoing CM Load Alternate Path
4265  *			(LAP) message sent to the remote host. This can be NULL
4266  *			if no private data is available to communicate to the
4267  *			remote node.
4268  *
4269  *	priv_data_len   Length of valid data in priv_data, this should be less
4270  *			than or equal to IBT_LAP_PRIV_DATA_SZ.
4271  *
4272  * Output:
4273  *	ret_args	If called in blocking mode, points to a return argument
4274  *			structure of type ibt_ap_returns_t.
4275  *
4276  * Returns:
4277  *	IBT_SUCCESS on Success else appropriate error.
4278  * Description:
4279  *	Load the specified alternate path. Causes the CM to send an LAP message
4280  *	to the remote node.
4281  *	Can only be called on a previously opened RC channel.
4282  */
4283 ibt_status_t
4284 ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
4285     ibt_alt_path_info_t *alt_path, void *priv_data,
4286     ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
4287 {
4288 	ibmf_handle_t		ibmf_hdl;
4289 	ibt_status_t		status = IBT_SUCCESS;
4290 	ibcm_lap_msg_t		*lap_msgp;
4291 	ibcm_hca_info_t		*hcap;
4292 	ibcm_state_data_t	*statep;
4293 	uint8_t			port_no;
4294 	ib_lid_t		alternate_slid;
4295 	ibt_priv_data_len_t	len;
4296 	ib_lid_t		base_lid;
4297 	boolean_t		alt_grh;
4298 
4299 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
4300 	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);
4301 
4302 	/* validate channel */
4303 	if (IBCM_INVALID_CHANNEL(channel)) {
4304 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
4305 		return (IBT_CHAN_HDL_INVALID);
4306 	}
4307 
4308 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
4309 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4310 		    "Invalid Channel type: Applicable only to RC Channel");
4311 		return (IBT_CHAN_SRV_TYPE_INVALID);
4312 	}
4313 
4314 	if (mode == IBT_NONBLOCKING) {
4315 		if (ret_args != NULL) {
4316 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4317 			    "ret_args should be NULL when called in "
4318 			    "non-blocking mode");
4319 			return (IBT_INVALID_PARAM);
4320 		}
4321 	} else if (mode == IBT_BLOCKING) {
4322 		if (ret_args == NULL) {
4323 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4324 			    "ret_args should be Non-NULL when called in "
4325 			    "blocking mode");
4326 			return (IBT_INVALID_PARAM);
4327 		}
4328 		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
4329 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4330 			    "expected private data length is too large");
4331 			return (IBT_INVALID_PARAM);
4332 		}
4333 		if ((ret_args->ap_priv_data_len > 0) &&
4334 		    (ret_args->ap_priv_data == NULL)) {
4335 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4336 			    "apr_priv_data_len > 0, but apr_priv_data NULL");
4337 			return (IBT_INVALID_PARAM);
4338 		}
4339 	} else { /* any other mode is not valid for ibt_set_alt_path */
4340 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4341 		    "invalid mode %x specified", mode);
4342 		return (IBT_INVALID_PARAM);
4343 	}
4344 
4345 	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
4346 		return (IBT_INVALID_PARAM);
4347 
4348 	/* get the statep */
4349 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4350 	if (statep == NULL) {
4351 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
4352 		return (IBT_CM_FAILURE);
4353 	}
4354 
4355 	mutex_enter(&statep->state_mutex);
4356 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4357 	IBCM_REF_CNT_INCR(statep);
4358 	mutex_exit(&statep->state_mutex);
4359 
4360 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);
4361 
4362 	hcap = statep->hcap;
4363 
4364 	/* HCA must have been in active state. If not, it's a client bug */
4365 	if (!IBCM_ACCESS_HCA_OK(hcap))
4366 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");
4367 
4368 	ASSERT(statep->cm_handler != NULL);
4369 
4370 	/* Check Alternate port */
4371 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
4372 	    &base_lid);
4373 	if (status != IBT_SUCCESS) {
4374 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4375 		    "ibt_get_port_state_byguid status %d ", status);
4376 		mutex_enter(&statep->state_mutex);
4377 		IBCM_REF_CNT_DECR(statep);
4378 		mutex_exit(&statep->state_mutex);
4379 		return (status);
4380 	}
4381 
4382 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
4383 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
4384 	    != IBT_SUCCESS)) {
4385 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4386 		    "ibmf reg or callback setup failed during re-initialize");
4387 		mutex_enter(&statep->state_mutex);
4388 		IBCM_REF_CNT_DECR(statep);
4389 		mutex_exit(&statep->state_mutex);
4390 		return (status);
4391 	}
4392 
4393 	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;
4394 
4395 	alternate_slid = base_lid +
4396 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;
4397 
4398 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
4399 	    h2b16(alternate_slid));
4400 
4401 	ibcm_lapr_enter();	/* limit how many run simultaneously */
4402 
4403 	/* Allocate MAD for LAP */
4404 	if (statep->lapr_msg == NULL)
4405 		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
4406 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
4407 			ibcm_lapr_exit();
4408 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4409 			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
4410 			mutex_enter(&statep->state_mutex);
4411 			IBCM_REF_CNT_DECR(statep);
4412 			mutex_exit(&statep->state_mutex);
4413 			return (status);
4414 		}
4415 
4416 	mutex_enter(&statep->state_mutex);
4417 
4418 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
4419 	    " %x", statep->state);
4420 
4421 	/* Check state */
4422 	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
4423 	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
4424 		IBCM_REF_CNT_DECR(statep);
4425 		mutex_exit(&statep->state_mutex);
4426 		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
4427 		ibcm_lapr_exit();
4428 		return (IBT_CHAN_STATE_INVALID);
4429 	} else {
4430 		/* Set to LAP Sent state */
4431 		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
4432 		statep->ap_done = B_FALSE;
4433 		statep->remaining_retry_cnt = statep->max_cm_retries;
4434 		statep->timer_stored_state = statep->state;
4435 		statep->timer_stored_ap_state = statep->ap_state;
4436 		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
4437 	}
4438 
4439 	mutex_exit(&statep->state_mutex);
4440 
4441 	/* No more failure returns below */
4442 
4443 	/* Allocate MAD for LAP */
4444 	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
4445 	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));
4446 
4447 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))
4448 
4449 	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
4450 
4451 	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
4452 	lap_msgp->lap_alt_r_port_lid =
4453 	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);
4454 
4455 	/* Fill in remote port gid */
4456 	lap_msgp->lap_alt_r_port_gid.gid_prefix =
4457 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
4458 	lap_msgp->lap_alt_r_port_gid.gid_guid =
4459 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);
4460 
4461 	/* Fill in local port gid */
4462 	lap_msgp->lap_alt_l_port_gid.gid_prefix =
4463 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
4464 	lap_msgp->lap_alt_l_port_gid.gid_guid =
4465 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);
4466 
4467 	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;
4468 
4469 	/* alternate_flow_label, and alternate srate, alternate traffic class */
4470 	lap_msgp->lap_alt_srate_plus =
4471 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
4472 	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
4473 	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
4474 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);
4475 
4476 	/* Alternate hop limit, service level */
4477 	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
4478 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 1;
4479 	lap_msgp->lap_alt_sl_plus =
4480 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
4481 	    ((alt_grh == B_FALSE) ? 0x8 : 0);
4482 
4483 	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
4484 	    (2 * statep->rc_alt_pkt_lt) +
4485 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
4486 
4487 	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
4488 	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);
4489 
4490 	lap_msgp->lap_remote_qpn_eecn_plus =
4491 	    h2b32((statep->remote_qpn << 8) |
4492 	    ibt_usec2ib(ibcm_remote_response_time) << 3);
4493 
4494 	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
4495 	if ((len > 0) && priv_data) {
4496 		bcopy(priv_data, lap_msgp->lap_private_data, len);
4497 	}
4498 
4499 	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
4500 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
4501 
4502 	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);
4503 
4504 	/* return_data is filled up in the state machine code */
4505 	statep->ap_return_data = ret_args;
4506 
4507 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
4508 
4509 	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
4510 	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);
4511 
4512 	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
4513 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
4514 	    0));
4515 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
4516 	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);
4517 
4518 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))
4519 
4520 	/* Send LAP */
4521 	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
4522 	    statep);
4523 
4524 	mutex_enter(&statep->state_mutex);
4525 
4526 	if (mode == IBT_BLOCKING) {
4527 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");
4528 
4529 		/* wait for APR */
4530 		while (statep->ap_done != B_TRUE) {
4531 			cv_wait(&statep->block_client_cv,
4532 			    &statep->state_mutex);
4533 		}
4534 
4535 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");
4536 
4537 		/*
4538 		 * In the case that ibt_set_alt_path fails,
4539 		 * change retval to IBT_CM_FAILURE
4540 		 */
4541 		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
4542 			status = IBT_CM_FAILURE;
4543 
4544 	}
4545 
4546 	/* decrement the ref-count before leaving here */
4547 	IBCM_REF_CNT_DECR(statep);
4548 
4549 	mutex_exit(&statep->state_mutex);
4550 
4551 	ibcm_lapr_exit();
4552 
4553 	/* If this message isn't seen then ibt_set_alt_path failed */
4554 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");
4555 
4556 	return (status);
4557 }
4558 
4559 
4560 #ifdef DEBUG
4561 
4562 /*
4563  * ibcm_query_classport_info:
4564  *	Query classportinfo
4565  *
4566  * INPUTS:
4567  *	channel		- Channel that is associated with a statep
4568  *
4569  * RETURN VALUE: NONE
4570  * This function is currently used to generate a valid get method classport
4571  * info, and test CM functionality. There is no ibtl client interface to
4572  * generate a classportinfo. It is possible that CM may use classportinfo
4573  * from other nodes in the future, and most of the code below could be re-used.
4574  */
4575 void
4576 ibcm_query_classport_info(ibt_channel_hdl_t channel)
4577 {
4578 	ibcm_state_data_t	*statep;
4579 	ibmf_msg_t		*msgp;
4580 
4581 	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p)", channel);
4582 
4583 	/* validate channel, first */
4584 	if (IBCM_INVALID_CHANNEL(channel)) {
4585 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4586 		    "invalid channel (%p)", channel);
4587 		return;
4588 	}
4589 
4590 	/* get the statep */
4591 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4592 
4593 	/*
4594 	 * This can happen, if the statep is already gone by a DREQ from
4595 	 * the remote side
4596 	 */
4597 	if (statep == NULL) {
4598 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4599 		    "statep NULL");
4600 		return;
4601 	}
4602 
4603 	mutex_enter(&statep->state_mutex);
4604 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4605 	IBCM_REF_CNT_INCR(statep);
4606 	mutex_exit(&statep->state_mutex);
4607 
4608 	/* Debug/test code, so don't care about return status */
4609 	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
4610 	    MAD_METHOD_GET);
4611 
4612 	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
4613 	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
4614 	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
4615 
4616 	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);
4617 
4618 	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p) "
4619 	    "Get method MAD posted ", channel);
4620 
4621 	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);
4622 
4623 	mutex_enter(&statep->state_mutex);
4624 	IBCM_REF_CNT_DECR(statep);
4625 	mutex_exit(&statep->state_mutex);
4626 }
4627 
4628 static void
4629 ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
4630 {
4631 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
4632 	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
4633 	    cm_reply_addr->rcvd_addr.ia_remote_lid);
4634 
4635 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
4636 	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
4637 	    cm_reply_addr->rcvd_addr.ia_p_key,
4638 	    cm_reply_addr->rcvd_addr.ia_remote_qno,
4639 	    cm_reply_addr->rcvd_addr.ia_service_level);
4640 
4641 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
4642 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
4643 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);
4644 
4645 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
4646 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
4647 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);
4648 
4649 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
4650 	    cm_reply_addr->grh_hdr.ig_flow_label,
4651 	    cm_reply_addr->grh_hdr.ig_tclass,
4652 	    cm_reply_addr->grh_hdr.ig_hop_limit);
4653 }
4654 
4655 #endif
4656 
4657 /* For MCG List search */
4658 typedef struct ibcm_mcg_list_s {
4659 	struct ibcm_mcg_list_s	*ml_next;
4660 	ib_gid_t		ml_sgid;
4661 	ib_gid_t		ml_mgid;
4662 	ib_pkey_t		ml_pkey;
4663 	ib_qkey_t		ml_qkey;
4664 	uint_t			ml_refcnt;
4665 	uint8_t			ml_jstate;
4666 } ibcm_mcg_list_t;
4667 
4668 ibcm_mcg_list_t	*ibcm_mcglist = NULL;
4669 
4670 _NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcg_list_s))
4671 _NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcglist))
4672 
4673 typedef struct ibcm_join_mcg_tqarg_s {
4674 	ib_gid_t		rgid;
4675 	ibt_mcg_attr_t		mcg_attr;
4676 	ibt_mcg_info_t		*mcg_infop;
4677 	ibt_mcg_handler_t	func;
4678 	void			*arg;
4679 } ibcm_join_mcg_tqarg_t;
4680 
4681 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
4682 
4683 void
4684 ibcm_add_incr_mcg_entry(sa_mcmember_record_t *mcg_req,
4685     sa_mcmember_record_t *mcg_resp)
4686 {
4687 	ibcm_mcg_list_t	*new = NULL;
4688 	ibcm_mcg_list_t	*head = NULL;
4689 
4690 	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: MGID %llX:%llX"
4691 	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
4692 	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
4693 	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
4694 
4695 	mutex_enter(&ibcm_mcglist_lock);
4696 	head = ibcm_mcglist;
4697 
4698 	while (head != NULL) {
4699 		if ((head->ml_mgid.gid_guid == mcg_resp->MGID.gid_guid) &&
4700 		    (head->ml_mgid.gid_prefix == mcg_resp->MGID.gid_prefix) &&
4701 		    (head->ml_sgid.gid_guid == mcg_resp->PortGID.gid_guid)) {
4702 			/* Increment the count */
4703 			head->ml_refcnt++;
4704 			/* OR the join_state value, we need this during leave */
4705 			head->ml_jstate |= mcg_req->JoinState;
4706 
4707 			IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Entry "
4708 			    "FOUND: refcnt %d JState %X", head->ml_refcnt,
4709 			    head->ml_jstate);
4710 
4711 			mutex_exit(&ibcm_mcglist_lock);
4712 			return;
4713 		}
4714 		head = head->ml_next;
4715 	}
4716 	mutex_exit(&ibcm_mcglist_lock);
4717 
4718 	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Create NEW Entry ");
4719 
4720 	/* If we are here, either list is empty or match couldn't be found */
4721 	new = kmem_zalloc(sizeof (ibcm_mcg_list_t), KM_SLEEP);
4722 
4723 	mutex_enter(&ibcm_mcglist_lock);
4724 	/* Initialize the fields */
4725 	new->ml_sgid = mcg_resp->PortGID;
4726 	new->ml_mgid = mcg_resp->MGID;
4727 	new->ml_qkey = mcg_req->Q_Key;
4728 	new->ml_pkey = mcg_req->P_Key;
4729 	new->ml_refcnt = 1; /* As this is the first entry */
4730 	new->ml_jstate = mcg_req->JoinState;
4731 	new->ml_next = NULL;
4732 
4733 	new->ml_next = ibcm_mcglist;
4734 	ibcm_mcglist = new;
4735 	mutex_exit(&ibcm_mcglist_lock);
4736 }
4737 
4738 /*
4739  * ibcm_del_decr_mcg_entry
4740  *
4741  * Return value:
4742  * IBCM_SUCCESS		Entry found and ref_cnt is now zero. So go-ahead and
4743  * 			leave the MCG group. The return arg *jstate will have
4744  * 			a valid join_state value that needed to be used by
4745  * 			xxx_leave_mcg().
4746  * IBCM_LOOKUP_EXISTS	Entry found and ref_cnt is decremented but is NOT zero.
4747  * 			So do not leave the MCG group yet.
4748  * IBCM_LOOKUP_FAIL	Entry is NOT found.
4749  */
4750 ibcm_status_t
4751 ibcm_del_decr_mcg_entry(sa_mcmember_record_t *mcg_req, uint8_t *jstate)
4752 {
4753 	ibcm_mcg_list_t	*head, *prev;
4754 
4755 	IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry: MGID %llX:%llX"
4756 	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
4757 	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
4758 	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
4759 
4760 	*jstate = 0;
4761 
4762 	mutex_enter(&ibcm_mcglist_lock);
4763 	head = ibcm_mcglist;
4764 	prev = NULL;
4765 
4766 	while (head != NULL) {
4767 		if ((head->ml_mgid.gid_guid == mcg_req->MGID.gid_guid) &&
4768 		    (head->ml_mgid.gid_prefix == mcg_req->MGID.gid_prefix) &&
4769 		    (head->ml_sgid.gid_guid == mcg_req->PortGID.gid_guid)) {
4770 			if (!(head->ml_jstate & mcg_req->JoinState)) {
4771 				IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry"
4772 				    ": JoinState mismatch %X %X)",
4773 				    head->ml_jstate, mcg_req->JoinState);
4774 			}
4775 			/* Decrement the count */
4776 			head->ml_refcnt--;
4777 
4778 			if (head->ml_refcnt == 0) {
4779 				*jstate = head->ml_jstate;
4780 
4781 				IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry"
4782 				    ": refcnt is ZERO, so delete the entry ");
4783 				if ((head == ibcm_mcglist) || (prev == NULL)) {
4784 					ibcm_mcglist = head->ml_next;
4785 				} else if (prev != NULL) {
4786 					prev->ml_next = head->ml_next;
4787 				}
4788 				mutex_exit(&ibcm_mcglist_lock);
4789 
4790 				kmem_free(head, sizeof (ibcm_mcg_list_t));
4791 				return (IBCM_SUCCESS);
4792 			}
4793 			mutex_exit(&ibcm_mcglist_lock);
4794 			return (IBCM_LOOKUP_EXISTS);
4795 		}
4796 		prev = head;
4797 		head = head->ml_next;
4798 	}
4799 	mutex_exit(&ibcm_mcglist_lock);
4800 
4801 	/*
4802 	 * If we are here, something went wrong, we don't have the entry
4803 	 * for that MCG being joined.
4804 	 */
4805 	IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry: Match NOT "
4806 	    "Found ");
4807 
4808 	return (IBCM_LOOKUP_FAIL);
4809 }
4810 
4811 
4812 /*
4813  * Function:
4814  *	ibt_join_mcg
4815  * Input:
4816  *	rgid		The request GID that defines the HCA port from which a
4817  *			contact to SA Access is performed to add the specified
4818  *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
4819  *			If mcg_attr->mc_pgid is null, then this (rgid) will be
4820  *			treated as endport GID that is to be added to the
4821  *			multicast group.
4822  *
4823  *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
4824  *			the attributes of the desired multicast group to be
4825  *			created or joined.
4826  *
4827  *	func		NULL or a pointer to a function to call when
4828  *			ibt_join_mcg() completes. If 'func' is not NULL then
4829  *			ibt_join_mcg() will return as soon as possible after
4830  *			initiating the multicast group join/create process.
4831  *			'func' is then called when the process completes.
4832  *
4833  *	arg		Argument to the 'func'.
4834  *
4835  * Output:
4836  *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
4837  *			by the caller, where the attributes of the created or
4838  *			joined multicast group are copied.
4839  * Returns:
4840  *	IBT_SUCCESS
4841  *	IBT_INVALID_PARAM
4842  *	IBT_MCG_RECORDS_NOT_FOUND
4843  *	IBT_INSUFF_RESOURCE
4844  * Description:
4845  *	Join a multicast group.  The first full member "join" causes the MCG
4846  *	to be created.
4847  */
4848 ibt_status_t
4849 ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
4850     ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
4851 {
4852 	ibcm_join_mcg_tqarg_t	*mcg_tq;
4853 	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);
4854 
4855 	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
4856 	    rgid.gid_guid, mcg_attr);
4857 
4858 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4859 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
4860 		return (IBT_INVALID_PARAM);
4861 	}
4862 
4863 	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
4864 	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
4865 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
4866 		return (IBT_INVALID_PARAM);
4867 	}
4868 
4869 	if (mcg_attr->mc_join_state == 0) {
4870 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
4871 		return (IBT_INVALID_PARAM);
4872 	}
4873 
4874 	if (mcg_info_p == NULL) {
4875 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
4876 		return (IBT_INVALID_PARAM);
4877 	}
4878 
4879 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))
4880 
4881 	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
4882 	if (mcg_tq == NULL) {
4883 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
4884 		    "Unable to allocate memory for local usage.");
4885 		return (IBT_INSUFF_KERNEL_RESOURCE);
4886 	}
4887 
4888 	mcg_tq->rgid = rgid;
4889 	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
4890 	mcg_tq->mcg_infop = mcg_info_p;
4891 	mcg_tq->func = func;
4892 	mcg_tq->arg = arg;
4893 
4894 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))
4895 
4896 	if (func != NULL) {	/* Non-Blocking */
4897 		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
4898 		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
4899 		    mcg_tq, TQ_NOSLEEP) == 0) {
4900 			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
4901 			    "Dispatch the TaskQ");
4902 			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
4903 			return (IBT_INSUFF_KERNEL_RESOURCE);
4904 		} else
4905 			return (IBT_SUCCESS);
4906 	} else {		/* Blocking */
4907 		return (ibcm_process_join_mcg(mcg_tq));
4908 	}
4909 }
4910 
4911 static void
4912 ibcm_process_async_join_mcg(void *tq_arg)
4913 {
4914 	(void) ibcm_process_join_mcg(tq_arg);
4915 }
4916 
4917 static ibt_status_t
4918 ibcm_process_join_mcg(void *taskq_arg)
4919 {
4920 	sa_mcmember_record_t	mcg_req;
4921 	sa_mcmember_record_t	*mcg_resp;
4922 	ibmf_saa_access_args_t	access_args;
4923 	ibmf_saa_handle_t	saa_handle;
4924 	uint64_t		component_mask = 0;
4925 	ibt_status_t		retval;
4926 	ibtl_cm_hca_port_t	hca_port;
4927 	uint_t			num_records;
4928 	size_t			length;
4929 	ibcm_hca_info_t		*hcap;
4930 	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
4931 	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;
4932 
4933 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);
4934 
4935 	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
4936 	if (retval != IBT_SUCCESS) {
4937 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
4938 		    "port info from specified RGID: status = %d", retval);
4939 		goto ibcm_join_mcg_exit1;
4940 	}
4941 
4942 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4943 
4944 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
4945 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
4946 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4947 		    "Request GID is Port GID");
4948 		mcg_req.PortGID = mcg_arg->rgid;
4949 	} else {
4950 		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
4951 	}
4952 	component_mask |= SA_MC_COMPMASK_PORTGID;
4953 
4954 	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
4955 	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
4956 	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
4957 	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
4958 	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
4959 	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;
4960 
4961 	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
4962 	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
4963 	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;
4964 
4965 	/* If client has specified MGID, use it else SA will assign one. */
4966 	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4967 		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
4968 		component_mask |= SA_MC_COMPMASK_MGID;
4969 	}
4970 
4971 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: ");
4972 	IBTF_DPRINTF_L3(cmlog, "PGID=%016llX:%016llX, ",
4973 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
4974 	IBTF_DPRINTF_L3(cmlog, "MGID=%016llX:%016llX",
4975 	    mcg_req.MGID.gid_prefix, mcg_req.MGID.gid_guid);
4976 	IBTF_DPRINTF_L3(cmlog, "JoinState = %X",
4977 	    mcg_arg->mcg_attr.mc_join_state);
4978 	IBTF_DPRINTF_L5(cmlog, "QKey %lX, PKey %lX",
4979 	    mcg_arg->mcg_attr.mc_qkey, mcg_arg->mcg_attr.mc_pkey);
4980 	IBTF_DPRINTF_L5(cmlog, "Scope %X, MLID %X",
4981 	    mcg_arg->mcg_attr.mc_scope, mcg_arg->mcg_attr.mc_mlid);
4982 
4983 	/* Is MTU specified. */
4984 	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
4985 		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
4986 		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;
4987 
4988 		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
4989 		    SA_MC_COMPMASK_MTU;
4990 	}
4991 
4992 	/* Is RATE specified. */
4993 	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
4994 		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
4995 		mcg_req.RateSelector =
4996 		    mcg_arg->mcg_attr.mc_rate_req.r_selector;
4997 
4998 		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
4999 		    SA_MC_COMPMASK_RATE;
5000 	}
5001 
5002 	/* Is Packet Life Time specified. */
5003 	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
5004 		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
5005 		mcg_req.RateSelector =
5006 		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;
5007 
5008 		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5009 		    SA_MC_COMPMASK_PKTLT;
5010 	}
5011 
5012 	if (mcg_arg->mcg_attr.mc_hop) {
5013 		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
5014 		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5015 	}
5016 
5017 	if (mcg_arg->mcg_attr.mc_scope) {
5018 		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
5019 		component_mask |= SA_MC_COMPMASK_SCOPE;
5020 	}
5021 
5022 	if (mcg_arg->mcg_attr.mc_mlid) {
5023 		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
5024 		component_mask |= SA_MC_COMPMASK_MLID;
5025 	}
5026 
5027 	/* Get SA Access Handle. */
5028 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
5029 	if (hcap == NULL) {
5030 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");
5031 
5032 		retval = IBT_HCA_BUSY_DETACHING;
5033 		goto ibcm_join_mcg_exit1;
5034 	}
5035 
5036 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5037 	if (saa_handle == NULL) {
5038 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");
5039 
5040 		retval = IBT_HCA_PORT_NOT_ACTIVE;
5041 		goto ibcm_join_mcg_exit;
5042 	}
5043 
5044 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
5045 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
5046 		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
5047 		    &hca_port);
5048 		if (retval != IBT_SUCCESS) {
5049 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
5050 			    "to get PortInfo of specified PGID: status = %d",
5051 			    retval);
5052 			goto ibcm_join_mcg_exit1;
5053 		}
5054 	}
5055 
5056 	/* Contact SA Access */
5057 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5058 	access_args.sq_access_type = IBMF_SAA_UPDATE;
5059 	access_args.sq_component_mask = component_mask;
5060 	access_args.sq_template = &mcg_req;
5061 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5062 	access_args.sq_callback = NULL;
5063 	access_args.sq_callback_arg = NULL;
5064 
5065 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5066 	    (void **)&mcg_resp);
5067 	if (retval != IBT_SUCCESS) {
5068 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
5069 		    "SA Access Failed");
5070 		goto ibcm_join_mcg_exit;
5071 	}
5072 
5073 	num_records = length/sizeof (sa_mcmember_record_t);
5074 
5075 	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
5076 	    "Found %d MCMember Records", num_records);
5077 
5078 	/* Validate the returned number of records. */
5079 	if ((mcg_resp != NULL) && (num_records > 0)) {
5080 		/* Update the return values. */
5081 		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
5082 		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
5083 		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
5084 		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
5085 		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5086 		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
5087 		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
5088 		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
5089 		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
5090 		mcg_info_p->mc_mtu = mcg_resp->MTU;
5091 		mcg_info_p->mc_qkey = mcg_resp->Q_Key;
5092 
5093 		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
5094 		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
5095 		if (retval != IBT_SUCCESS) {
5096 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
5097 			    "Pkey2Index Conversion failed<%d>", retval);
5098 			mcg_info_p->mc_pkey_ix = 0;
5099 		}
5100 
5101 		mcg_info_p->mc_scope = mcg_resp->Scope;
5102 		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;
5103 
5104 		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
5105 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
5106 		mcg_info_p->mc_adds_vect.av_src_path = 0;
5107 
5108 		/* Add or Incr the matching MCG entry. */
5109 		ibcm_add_incr_mcg_entry(&mcg_req, mcg_resp);
5110 		/* Deallocate the memory allocated by SA for mcg_resp. */
5111 		kmem_free(mcg_resp, length);
5112 
5113 		retval = IBT_SUCCESS;
5114 	} else {
5115 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5116 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
5117 		    "MCG RECORDS NOT FOUND");
5118 	}
5119 
5120 ibcm_join_mcg_exit:
5121 	ibcm_dec_hca_acc_cnt(hcap);
5122 
5123 ibcm_join_mcg_exit1:
5124 	if (mcg_arg->func)
5125 		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);
5126 
5127 	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));
5128 
5129 	return (retval);
5130 }
5131 
5132 
5133 /*
5134  * Function:
5135  *	ibt_leave_mcg
5136  * Input:
5137  *	rgid		The request GID that defines the HCA port upon which
5138  *			to send the request to the Subnet Administrator, to
5139  *			remove the specified port (port_gid) from the multicast
5140  *			group.  If 'port_gid' is the Reserved GID (i.e.
5141  *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
5142  *			then the end-port associated with 'rgid' is removed
5143  *			from the multicast group.
5144  *
5145  *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
5146  *			call.  This is optional, if not specified (i.e.
5147  *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
5148  *			identify this as being a multicast GID), then the
5149  *			port is removed from all the multicast groups of
5150  *			which it is a member.
5151  *
5152  *	port_gid	This is optional, if not the Reserved GID (gid_prefix
5153  *			and gid_guid not equal to 0), then this specifies the
5154  *			endport GID of the multicast group member being deleted
5155  *			from the group. If it is the Reserved GID (gid_prefix
5156  *			and gid_guid equal to 0) then the member endport GID is
5157  *			determined from 'rgid'.
5158  *
5159  *	mc_join_state	The Join State attribute used when the group was joined
5160  *			using ibt_join_mcg(). This Join State component must
5161  *			contains at least one bit set to 1 in the same position
5162  *			as that used during ibt_join_mcg(). i.e. the logical
5163  *			AND of the two JoinState components is not all zeros.
5164  *			This Join State component must not have some bits set
5165  *			which are not set using ibt_join_mcg().
5166  * Output:
5167  *	None.
5168  * Returns:
5169  *	IBT_SUCCESS
5170  *	IBT_INVALID_PARAM
5171  *	IBT_MC_GROUP_INVALID
5172  *	IBT_INSUFF_RESOURCE
5173  * Description:
5174  *	The port associated with the port GID shall be removed from the
5175  *	multicast group specified by MGID (mc_gid) or from all the multicast
5176  *	groups of which it is a member if the MGID (mc_gid) is not specified.
5177  *
5178  *	The last full member to leave causes the destruction of the Multicast
5179  *	Group.
5180  */
5181 ibt_status_t
5182 ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
5183     uint8_t mc_join_state)
5184 {
5185 	sa_mcmember_record_t	mcg_req;
5186 	ibmf_saa_access_args_t	access_args;
5187 	ibmf_saa_handle_t	saa_handle;
5188 	uint64_t		component_mask = 0;
5189 	int			sa_retval;
5190 	ibt_status_t		retval;
5191 	ibcm_status_t		ret;
5192 	ibtl_cm_hca_port_t	hca_port;
5193 	size_t			length;
5194 	void			*results_p;
5195 	ibcm_hca_info_t		*hcap;
5196 	uint8_t			jstate = 0;
5197 
5198 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
5199 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
5200 
5201 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
5202 	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);
5203 
5204 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5205 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
5206 		return (IBT_INVALID_PARAM);
5207 	}
5208 
5209 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5210 
5211 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
5212 	    mc_gid.gid_prefix, mc_gid.gid_guid);
5213 
5214 	/* Validate MGID */
5215 	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5216 		mcg_req.MGID = mc_gid;
5217 		component_mask |= SA_MC_COMPMASK_MGID;
5218 	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
5219 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
5220 		return (IBT_MC_MGID_INVALID);
5221 	}
5222 
5223 	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
5224 		mcg_req.PortGID = rgid;
5225 	} else {
5226 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
5227 		mcg_req.PortGID = port_gid;
5228 	}
5229 	component_mask |= SA_MC_COMPMASK_PORTGID;
5230 
5231 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
5232 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
5233 
5234 	/* Join State */
5235 	mcg_req.JoinState = mc_join_state;
5236 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
5237 
5238 	ret = ibcm_del_decr_mcg_entry(&mcg_req, &jstate);
5239 	if (ret == IBCM_LOOKUP_EXISTS) {
5240 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Multiple JoinMCG record "
5241 		    " still exists, we shall leave for last leave_mcg call");
5242 		return (IBT_SUCCESS);
5243 	} else if (ret == IBCM_LOOKUP_FAIL) {
5244 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: No Record found, "
5245 		    "continue with leave_mcg call");
5246 	} else if ((ret == IBCM_SUCCESS) && (jstate != 0)) {
5247 		/*
5248 		 * Update with cached "jstate", as this will be OR'ed of
5249 		 * all ibt_join_mcg() calls for this record.
5250 		 */
5251 		mcg_req.JoinState = jstate;
5252 	}
5253 
5254 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
5255 	if (retval != IBT_SUCCESS) {
5256 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
5257 		    "from specified RGID : status = %d", retval);
5258 		return (retval);
5259 	}
5260 
5261 	/* Get SA Access Handle. */
5262 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
5263 	if (hcap == NULL) {
5264 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
5265 		    "NO HCA found");
5266 		return (IBT_HCA_BUSY_DETACHING);
5267 	}
5268 
5269 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5270 	if (saa_handle == NULL) {
5271 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
5272 		ibcm_dec_hca_acc_cnt(hcap);
5273 		return (IBT_HCA_PORT_NOT_ACTIVE);
5274 	}
5275 
5276 	/* Contact SA Access */
5277 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5278 	access_args.sq_access_type = IBMF_SAA_DELETE;
5279 	access_args.sq_component_mask = component_mask;
5280 	access_args.sq_template = &mcg_req;
5281 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5282 	access_args.sq_callback = NULL;
5283 	access_args.sq_callback_arg = NULL;
5284 
5285 	ibcm_sa_access_enter();
5286 
5287 	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
5288 	    &results_p);
5289 	if (sa_retval != IBMF_SUCCESS) {
5290 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
5291 		    sa_retval);
5292 		(void) ibcm_ibmf_analyze_error(sa_retval);
5293 		retval = IBT_MC_GROUP_INVALID;
5294 	}
5295 
5296 	ibcm_sa_access_exit();
5297 
5298 	ibcm_dec_hca_acc_cnt(hcap);
5299 
5300 	return (retval);
5301 }
5302 
5303 
5304 /*
5305  * Function:
5306  *	ibt_query_mcg
5307  * Input:
5308  *	rgid		The request GID that defines the HCA port upon which
5309  *			to send the request to the Subnet Administrator, to
5310  *			retrieve Multicast Records matching attributes as
5311  *			specified through 'mcg_attr' argument.
5312  *
5313  *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
5314  *			specifies MCG attributes that are to be matched.
5315  *			Attributes that are not required can be wild carded
5316  *			by specifying as '0'.
5317  *
5318  *	mcgs_max_num	The maximum number of matching multicast groups to
5319  *			return.  If zero, then all available matching multicast
5320  *			groups are returned.
5321  * Output:
5322  *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
5323  *			multicast group information is returned. The actual
5324  *			number of entries filled in the array is returned in
5325  *			entries_p.
5326  *
5327  *	entries_p	The number of ibt_mcg_attr_t entries returned.
5328  * Returns:
5329  *	IBT_SUCCESS
5330  *	IBT_INVALID_PARAM
5331  *	IBT_MCG_RECORDS_NOT_FOUND
5332  * Description:
5333  *	Request information on multicast groups that match the parameters
5334  *	specified in mcg_attr. Information on each multicast group is returned
5335  *	to the caller in the form of an array of ibt_mcg_info_t.
5336  *	ibt_query_mcg() allocates the memory for this array and returns a
5337  *	pointer to the array (mcgs_p) and the number of entries in the array
5338  *	(entries_p). This memory should be freed by the client using
5339  *	ibt_free_mcg_info().
5340  */
5341 ibt_status_t
5342 ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
5343     ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
5344 {
5345 	sa_mcmember_record_t	mcg_req;
5346 	sa_mcmember_record_t	*mcg_resp;
5347 	ibt_mcg_info_t		*mcg_infop;
5348 	ibmf_saa_access_args_t	access_args;
5349 	ibmf_saa_handle_t	saa_handle;
5350 	uint64_t		component_mask = 0;
5351 	ibt_status_t		retval;
5352 	ibtl_cm_hca_port_t	hport;
5353 	uint_t			num_records;
5354 	size_t			length;
5355 	void			*results_p;
5356 	ib_gid_t		port_gid;
5357 	ibcm_hca_info_t		*hcap;
5358 
5359 	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);
5360 
5361 	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
5362 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5363 		    "entries_p or mcgs_info_p is NULL");
5364 		return (IBT_INVALID_PARAM);
5365 	}
5366 
5367 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5368 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
5369 		return (IBT_INVALID_PARAM);
5370 	}
5371 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
5372 	    rgid.gid_prefix, rgid.gid_guid);
5373 
5374 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5375 	port_gid.gid_prefix = port_gid.gid_guid = 0;
5376 
5377 	if (mcg_attr != NULL) {
5378 		port_gid = mcg_attr->mc_pgid;
5379 
5380 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5381 			mcg_req.PortGID = mcg_attr->mc_pgid;
5382 			component_mask |= SA_MC_COMPMASK_PORTGID;
5383 
5384 			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
5385 			    port_gid.gid_prefix, port_gid.gid_guid);
5386 		}
5387 
5388 		/* Is Q_Key specified. */
5389 		if (mcg_attr->mc_qkey != 0) {
5390 			mcg_req.Q_Key = mcg_attr->mc_qkey;
5391 			component_mask |= SA_MC_COMPMASK_QKEY;
5392 		}
5393 
5394 		/* Is P_Key specified. */
5395 		if (mcg_attr->mc_pkey != 0) {
5396 			mcg_req.P_Key = mcg_attr->mc_pkey;
5397 			component_mask |= SA_MC_COMPMASK_PKEY;
5398 		}
5399 
5400 		/* Is MGID specified. */
5401 		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5402 			mcg_req.MGID = mcg_attr->mc_mgid;
5403 			component_mask |= SA_MC_COMPMASK_MGID;
5404 		}
5405 
5406 		/* Is MTU specified. */
5407 		if (mcg_attr->mc_mtu_req.r_mtu) {
5408 			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
5409 			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;
5410 
5411 			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
5412 			    SA_MC_COMPMASK_MTU;
5413 		}
5414 
5415 		if (mcg_attr->mc_tclass) {
5416 			mcg_req.TClass = mcg_attr->mc_tclass;
5417 			component_mask |= SA_MC_COMPMASK_TCLASS;
5418 		}
5419 
5420 		/* Is RATE specified. */
5421 		if (mcg_attr->mc_rate_req.r_srate) {
5422 			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
5423 			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;
5424 
5425 			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
5426 			    SA_MC_COMPMASK_RATE;
5427 		}
5428 
5429 		/* Is Packet Life Time specified. */
5430 		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
5431 			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
5432 			mcg_req.RateSelector =
5433 			    mcg_attr->mc_pkt_lt_req.p_selector;
5434 
5435 			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5436 			    SA_MC_COMPMASK_PKTLT;
5437 		}
5438 
5439 		if (mcg_attr->mc_hop) {
5440 			mcg_req.HopLimit = mcg_attr->mc_hop;
5441 			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5442 		}
5443 
5444 		if (mcg_attr->mc_flow) {
5445 			mcg_req.FlowLabel = mcg_attr->mc_flow;
5446 			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
5447 		}
5448 
5449 		if (mcg_attr->mc_sl) {
5450 			mcg_req.SL = mcg_attr->mc_sl;
5451 			component_mask |= SA_MC_COMPMASK_SL;
5452 		}
5453 
5454 		if (mcg_attr->mc_scope) {
5455 			mcg_req.Scope = mcg_attr->mc_scope;
5456 			component_mask |= SA_MC_COMPMASK_SCOPE;
5457 		}
5458 
5459 		if (mcg_attr->mc_join_state) {
5460 			mcg_req.JoinState = mcg_attr->mc_join_state;
5461 			component_mask |= SA_MC_COMPMASK_JOINSTATE;
5462 		}
5463 
5464 		if (mcg_attr->mc_mlid) {
5465 			mcg_req.MLID = mcg_attr->mc_mlid;
5466 			component_mask |= SA_MC_COMPMASK_MLID;
5467 		}
5468 	}
5469 
5470 	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
5471 	if (retval != IBT_SUCCESS) {
5472 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
5473 		    "from specified RGID : status = %d", retval);
5474 		return (retval);
5475 	}
5476 
5477 	/* Get SA Access Handle. */
5478 	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5479 	if (hcap == NULL) {
5480 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
5481 		return (IBT_HCA_BUSY_DETACHING);
5482 	}
5483 
5484 	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
5485 	if (saa_handle == NULL) {
5486 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
5487 		ibcm_dec_hca_acc_cnt(hcap);
5488 		return (IBT_HCA_PORT_NOT_ACTIVE);
5489 	}
5490 
5491 	/* Contact SA Access */
5492 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5493 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
5494 	access_args.sq_component_mask = component_mask;
5495 	access_args.sq_template = &mcg_req;
5496 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5497 	access_args.sq_callback = NULL;
5498 	access_args.sq_callback_arg = NULL;
5499 
5500 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5501 	    &results_p);
5502 	if (retval != IBT_SUCCESS) {
5503 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
5504 		ibcm_dec_hca_acc_cnt(hcap);
5505 		return (retval);
5506 	}
5507 
5508 	num_records = length/sizeof (sa_mcmember_record_t);
5509 
5510 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
5511 	    num_records);
5512 
5513 	/* Validate the returned number of records. */
5514 	if ((results_p != NULL) && (num_records > 0)) {
5515 		uint_t	i;
5516 
5517 		/*
5518 		 * If mcgs_max_num is zero, then return all records else
5519 		 * return only requested number of records
5520 		 */
5521 		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
5522 			/* we are interested in only mcgs_max_num records */
5523 			num_records = mcgs_max_num;
5524 		}
5525 
5526 		/*
5527 		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
5528 		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
5529 		 * else RequestGID (rgid) should be returned.
5530 		 */
5531 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5532 
5533 			/* Get sgid_ix and port number of 'port_gid' */
5534 			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
5535 			if (retval != IBT_SUCCESS) {
5536 				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5537 				    "Failed to Get Portinfo for PortGID :"
5538 				    "status = %d", retval);
5539 				return (retval);
5540 			}
5541 		} else {
5542 			/*
5543 			 * The sgid_ix and port number related to RequestGID
5544 			 * are already obtained at the beginning.
5545 			 */
5546 			port_gid = rgid;
5547 		}
5548 
5549 		/*
5550 		 * Allocate memory for return buffer, to be freed in
5551 		 * ibt_free_mcg_info().
5552 		 */
5553 		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
5554 		    KM_SLEEP);
5555 
5556 		*mcgs_info_p = mcg_infop;
5557 		*entries_p = num_records;
5558 
5559 		/* Update the return values. */
5560 		for (i = 0; i < num_records; i++) {
5561 
5562 			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
5563 			    results_p + i * sizeof (sa_mcmember_record_t));
5564 
5565 			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
5566 			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
5567 			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
5568 			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
5569 			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5570 			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
5571 			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
5572 			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
5573 			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
5574 			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
5575 			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
5576 			mcg_infop[i].mc_adds_vect.av_src_path = 0;
5577 			mcg_infop[i].mc_mtu = mcg_resp->MTU;
5578 			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
5579 			mcg_infop[i].mc_scope = mcg_resp->Scope;
5580 			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;
5581 
5582 			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
5583 			    hport.hp_port, mcg_resp->P_Key,
5584 			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
5585 				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
5586 				    "Pkey2Index Conversion failed");
5587 				mcg_infop[i].mc_pkey_ix = 0;
5588 			}
5589 		}
5590 
5591 		/*
5592 		 * Deallocate the memory allocated by SA for results_p.
5593 		 */
5594 		kmem_free(results_p, length);
5595 		retval = IBT_SUCCESS;
5596 
5597 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
5598 		    num_records);
5599 
5600 	} else {
5601 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5602 		*entries_p = 0;
5603 
5604 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
5605 	}
5606 
5607 	ibcm_dec_hca_acc_cnt(hcap);
5608 
5609 	return (retval);
5610 }
5611 
5612 
5613 /*
5614  * ibt_free_mcg_info()
5615  *	Free the memory allocated by successful ibt_query_mcg()
5616  *
5617  *	mcgs_info	Pointer returned by ibt_query_mcg().
5618  *
5619  *	entries		The number of ibt_mcg_info_t entries to free.
5620  */
5621 void
5622 ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
5623 {
5624 	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
5625 	    "Free <%d> entries from 0x%p", entries, mcgs_info);
5626 
5627 	if ((mcgs_info != NULL) && (entries > 0))
5628 		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
5629 	else
5630 		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
5631 		    "ERROR: NULL buf pointer or length specified.");
5632 }
5633 
5634 
5635 /*
5636  * Function:
5637  *	ibt_gid_to_node_info()
5638  * Input:
5639  *	gid		Identifies the IB Node and port for which to obtain
5640  *			Node information.
5641  * Output:
5642  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5643  *			by the caller) in which to return the node information.
5644  * Returns:
5645  *	IBT_SUCCESS
5646  *	IBT_INVALID_PARAM
5647  *	IBT_NODE_RECORDS_NOT_FOUND
5648  *	IBT_NO_HCAS_AVAILABLE
5649  * Description:
5650  *	Retrieve Node Information for the specified GID.
5651  */
5652 ibt_status_t
5653 ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
5654 {
5655 	sa_node_record_t	nr_req, *nr_resp;
5656 	ibmf_saa_handle_t	saa_handle;
5657 	ibt_status_t		retval;
5658 	ibcm_hca_info_t		*hcap;
5659 	ibtl_cm_hca_port_t	hport;
5660 	int			i, j;
5661 	uint_t			num_rec;
5662 	ib_guid_t		*guid_array = NULL;
5663 	sa_path_record_t	*path;
5664 	size_t			len;
5665 	uint8_t			npaths;
5666 	uint32_t		num_hcas = 0;
5667 	ib_lid_t		node_lid;
5668 	boolean_t		local_node = B_FALSE;
5669 	void			*res_p;
5670 	uint8_t			num_ports = 0;
5671 
5672 
5673 	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
5674 	    gid.gid_prefix, gid.gid_guid, node_info_p);
5675 
5676 	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
5677 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
5678 		return (IBT_INVALID_PARAM);
5679 	}
5680 
5681 	if (node_info_p == NULL) {
5682 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5683 		    "Return Buf (node_info_p) is NULL.");
5684 		return (IBT_INVALID_PARAM);
5685 	}
5686 
5687 	/*
5688 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5689 	 * associated port) info via ibtl_cm_get_hca_port() call.
5690 	 */
5691 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5692 	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {
5693 
5694 		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5695 		if (hcap == NULL) {
5696 			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5697 			    "HCA(%llX) info not found", hport.hp_hca_guid);
5698 			return (IBT_NO_HCAS_AVAILABLE);
5699 		}
5700 		num_ports = 1;
5701 		num_hcas = 1;
5702 		node_lid = hport.hp_base_lid;
5703 		local_node = B_TRUE;
5704 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
5705 		    "LID = 0x%X", node_lid);
5706 	} else {
5707 		/* Get the number of HCAs and their GUIDs */
5708 		num_hcas = ibt_get_hca_list(&guid_array);
5709 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
5710 		    "returned %d hcas", num_hcas);
5711 
5712 		if (num_hcas == 0) {
5713 			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5714 			    "NO HCA's Found on this system");
5715 			return (IBT_NO_HCAS_AVAILABLE);
5716 		}
5717 	}
5718 
5719 	for (i = 0; i < num_hcas; i++) {
5720 		if (local_node == B_FALSE) {
5721 			hcap = ibcm_find_hca_entry(guid_array[i]);
5722 			if (hcap == NULL) {
5723 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5724 				    "HCA(%llX) info not found", guid_array[i]);
5725 				retval = IBT_NO_HCAS_AVAILABLE;
5726 				continue;
5727 			}
5728 			num_ports = hcap->hca_num_ports;
5729 		}
5730 
5731 		for (j = 0; j < num_ports; j++) {
5732 			uint8_t		port = 0;
5733 
5734 			if (local_node == B_TRUE)
5735 				port = hport.hp_port;
5736 			else
5737 				port = j + 1;
5738 
5739 			/* Get SA Access Handle. */
5740 			saa_handle = ibcm_get_saa_handle(hcap, port);
5741 			if (saa_handle == NULL) {
5742 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5743 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5744 				    port, hport.hp_hca_guid);
5745 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5746 				continue;
5747 			}
5748 
5749 			if (local_node == B_FALSE) {
5750 				ib_gid_t	sgid;
5751 				int		sa_ret;
5752 
5753 				/*
5754 				 * Check whether 'gid' and this port has same
5755 				 * subnet prefix. If not, then there is no use
5756 				 * in searching from this port.
5757 				 */
5758 				sgid = hcap->hca_port_info[j].port_sgid0;
5759 				if (gid.gid_prefix != sgid.gid_prefix) {
5760 					IBTF_DPRINTF_L3(cmlog,
5761 					    "ibt_gid_to_node_info:Sn_Prefix of "
5762 					    "GID(%llX) and Port's(%llX) differ",
5763 					    gid.gid_prefix, sgid.gid_prefix);
5764 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5765 					continue;
5766 				}
5767 
5768 				/*
5769 				 * First Get Path Records for the specified DGID
5770 				 * from this port (SGID). From Path Records,
5771 				 * note down DLID, then use this DLID as Input
5772 				 * attribute to get NodeRecords from SA Access.
5773 				 */
5774 				npaths = 1;
5775 				path = NULL;
5776 
5777 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5778 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5779 				    &path);
5780 				if (sa_ret != IBMF_SUCCESS) {
5781 					IBTF_DPRINTF_L2(cmlog,
5782 					    "ibt_gid_to_node_info: "
5783 					    "ibmf_saa_gid_to_pathrecords() "
5784 					    "returned error: %d ", sa_ret);
5785 					retval =
5786 					    ibcm_ibmf_analyze_error(sa_ret);
5787 					continue;
5788 				} else if ((npaths == 0) || (path == NULL)) {
5789 					IBTF_DPRINTF_L3(cmlog,
5790 					    "ibt_gid_to_node_info: failed (%d) "
5791 					    "to get path records for the DGID "
5792 					    "0x%llX from SGID 0x%llX", sa_ret,
5793 					    gid.gid_guid, sgid.gid_guid);
5794 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5795 					continue;
5796 				}
5797 				node_lid = path->DLID;	/* LID */
5798 
5799 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5800 				    "Remote Node: LID = 0x%X", node_lid);
5801 
5802 				/* Free SA_Access memory for path record. */
5803 				kmem_free(path, len);
5804 			}
5805 
5806 			/* Retrieve Node Records from SA Access. */
5807 			bzero(&nr_req, sizeof (sa_node_record_t));
5808 
5809 			nr_req.LID = node_lid;	/* LID */
5810 
5811 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
5812 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5813 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5814 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5815 				    "failed (%d) to get Node records", retval);
5816 				continue;
5817 			} else if (retval != IBT_SUCCESS) {
5818 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5819 				    "failed (%d) to get Node records", retval);
5820 				ibcm_dec_hca_acc_cnt(hcap);
5821 				goto gid_to_ni_exit;
5822 			}
5823 
5824 			num_rec = len/sizeof (sa_node_record_t);
5825 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5826 
5827 			/* Validate the returned number of records. */
5828 			if ((nr_resp != NULL) && (num_rec > 0)) {
5829 
5830 				IBCM_DUMP_NODE_REC(nr_resp);
5831 
5832 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
5833 				    *node_info_p))
5834 
5835 				node_info_p->n_sys_img_guid =
5836 				    nr_resp->NodeInfo.SystemImageGUID;
5837 				node_info_p->n_node_guid =
5838 				    nr_resp->NodeInfo.NodeGUID;
5839 				node_info_p->n_port_guid =
5840 				    nr_resp->NodeInfo.PortGUID;
5841 				node_info_p->n_dev_id =
5842 				    nr_resp->NodeInfo.DeviceID;
5843 				node_info_p->n_revision =
5844 				    nr_resp->NodeInfo.Revision;
5845 				node_info_p->n_vendor_id =
5846 				    nr_resp->NodeInfo.VendorID;
5847 				node_info_p->n_num_ports =
5848 				    nr_resp->NodeInfo.NumPorts;
5849 				node_info_p->n_port_num =
5850 				    nr_resp->NodeInfo.LocalPortNum;
5851 				node_info_p->n_node_type =
5852 				    nr_resp->NodeInfo.NodeType;
5853 				(void) strncpy(node_info_p->n_description,
5854 				    (char *)&nr_resp->NodeDescription, 64);
5855 
5856 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
5857 				    *node_info_p))
5858 
5859 				/*
5860 				 * Deallocate the memory allocated by SA for
5861 				 * 'nr_resp'.
5862 				 */
5863 				ibcm_dec_hca_acc_cnt(hcap);
5864 				kmem_free(nr_resp, len);
5865 				retval = IBT_SUCCESS;
5866 
5867 				goto gid_to_ni_exit;
5868 			} else {
5869 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5870 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5871 				    "Node Records NOT found - PortGUID %016llX",
5872 				    gid.gid_guid);
5873 			}
5874 		}
5875 		ibcm_dec_hca_acc_cnt(hcap);
5876 
5877 		if (local_node == B_TRUE)
5878 			break;
5879 	}
5880 
5881 gid_to_ni_exit:
5882 	if (guid_array)
5883 		ibt_free_hca_list(guid_array, num_hcas);
5884 
5885 	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);
5886 
5887 	return (retval);
5888 }
5889 
5890 
5891 ibt_status_t
5892 ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
5893     uint64_t component_mask, void *result_p, size_t *len)
5894 {
5895 	ibmf_saa_access_args_t  args;
5896 	size_t			length;
5897 	ibt_status_t		retval;
5898 
5899 	args.sq_attr_id = SA_NODERECORD_ATTRID;
5900 	args.sq_template = nr_req;
5901 	args.sq_access_type = IBMF_SAA_RETRIEVE;
5902 	args.sq_template_length = sizeof (sa_node_record_t);
5903 	args.sq_component_mask = component_mask;
5904 	args.sq_callback = NULL;
5905 	args.sq_callback_arg = NULL;
5906 
5907 	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
5908 	if (retval != IBT_SUCCESS) {
5909 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
5910 		return (retval);
5911 	}
5912 
5913 	*len = length;
5914 
5915 	/* Validate the returned number of records. */
5916 	if ((result_p != NULL) && (length > 0)) {
5917 		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");
5918 
5919 		/* Got it, done!. */
5920 		return (IBT_SUCCESS);
5921 	} else {
5922 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
5923 		return (IBT_NODE_RECORDS_NOT_FOUND);
5924 	}
5925 }
5926 
5927 
5928 /*
5929  * Function:
5930  *	ibt_lid_to_node_info()
5931  * Input:
5932  *	lid		Identifies the IB Node and port for which to obtain
5933  *			Node information.
5934  * Output:
5935  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5936  *			by the caller) in which to return the node information.
5937  * Returns:
5938  *	IBT_SUCCESS
5939  *	IBT_INVALID_PARAM
5940  *	IBT_NODE_RECORDS_NOT_FOUND
5941  *	IBT_NO_HCAS_AVAILABLE
5942  * Description:
5943  *	Retrieve Node Information for the specified LID.
5944  */
5945 ibt_status_t
5946 ibt_lid_to_node_info(ib_lid_t lid, ibt_node_info_t *node_info_p)
5947 {
5948 	ibt_status_t	retval;
5949 	ibcm_hca_info_t	*hcap;
5950 	uint8_t		i, j;
5951 	ib_guid_t	*guid_array = NULL;
5952 	uint_t		num_hcas = 0;
5953 
5954 
5955 	IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info(0x%lX, %p)",
5956 	    lid, node_info_p);
5957 
5958 	if ((lid == 0) || (node_info_p == NULL)) {
5959 		IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
5960 		    "Lid is zero, or node_info_p is NULL.");
5961 		return (IBT_INVALID_PARAM);
5962 	}
5963 
5964 	/* Get the number of HCAs and their GUIDs */
5965 	num_hcas = ibt_get_hca_list(&guid_array);
5966 	IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info: ibt_get_hca_list "
5967 	    "returned %d hcas", num_hcas);
5968 
5969 	if (num_hcas == 0) {
5970 		IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
5971 		    "NO HCA's Found on this system");
5972 		return (IBT_NO_HCAS_AVAILABLE);
5973 	}
5974 
5975 	for (i = 0; i < num_hcas; i++) {
5976 		hcap = ibcm_find_hca_entry(guid_array[i]);
5977 		if (hcap == NULL) {
5978 			IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
5979 			    "HCA(%llX) info not found", guid_array[i]);
5980 			retval = IBT_NO_HCAS_AVAILABLE;
5981 			continue;
5982 		}
5983 
5984 		for (j = 0; j < hcap->hca_num_ports; j++) {
5985 			uint8_t			port;
5986 			ibmf_saa_handle_t	saa_handle;
5987 			uint_t			num_rec;
5988 			size_t			len;
5989 			void			*res_p;
5990 			sa_node_record_t	nr_req, *nr_resp;
5991 
5992 			port = j + 1;
5993 
5994 			/* Get SA Access Handle. */
5995 			saa_handle = ibcm_get_saa_handle(hcap, port);
5996 			if (saa_handle == NULL) {
5997 				IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
5998 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5999 				    port, guid_array[i]);
6000 				retval = IBT_NODE_RECORDS_NOT_FOUND;
6001 				continue;
6002 			}
6003 
6004 			/* Retrieve Node Records from SA Access. */
6005 			bzero(&nr_req, sizeof (sa_node_record_t));
6006 
6007 			nr_req.LID = lid;	/* LID */
6008 
6009 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
6010 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
6011 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
6012 				IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
6013 				    "failed (%d) to get Node records", retval);
6014 				continue;
6015 			} else if (retval != IBT_SUCCESS) {
6016 				IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
6017 				    "failed (%d) to get Node records", retval);
6018 				ibcm_dec_hca_acc_cnt(hcap);
6019 				goto lid_to_ni_exit;
6020 			}
6021 
6022 			num_rec = len/sizeof (sa_node_record_t);
6023 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
6024 
6025 			/* Validate the returned number of records. */
6026 			if ((nr_resp != NULL) && (num_rec > 0)) {
6027 
6028 				IBCM_DUMP_NODE_REC(nr_resp);
6029 
6030 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
6031 				    *node_info_p))
6032 
6033 				node_info_p->n_sys_img_guid =
6034 				    nr_resp->NodeInfo.SystemImageGUID;
6035 				node_info_p->n_node_guid =
6036 				    nr_resp->NodeInfo.NodeGUID;
6037 				node_info_p->n_port_guid =
6038 				    nr_resp->NodeInfo.PortGUID;
6039 				node_info_p->n_dev_id =
6040 				    nr_resp->NodeInfo.DeviceID;
6041 				node_info_p->n_revision =
6042 				    nr_resp->NodeInfo.Revision;
6043 				node_info_p->n_vendor_id =
6044 				    nr_resp->NodeInfo.VendorID;
6045 				node_info_p->n_num_ports =
6046 				    nr_resp->NodeInfo.NumPorts;
6047 				node_info_p->n_port_num =
6048 				    nr_resp->NodeInfo.LocalPortNum;
6049 				node_info_p->n_node_type =
6050 				    nr_resp->NodeInfo.NodeType;
6051 				(void) strncpy(node_info_p->n_description,
6052 				    (char *)&nr_resp->NodeDescription, 64);
6053 
6054 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
6055 				    *node_info_p))
6056 
6057 				/*
6058 				 * Deallocate the memory allocated by SA for
6059 				 * 'nr_resp'.
6060 				 */
6061 				ibcm_dec_hca_acc_cnt(hcap);
6062 				kmem_free(nr_resp, len);
6063 				retval = IBT_SUCCESS;
6064 
6065 				goto lid_to_ni_exit;
6066 			} else {
6067 				retval = IBT_NODE_RECORDS_NOT_FOUND;
6068 				IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
6069 				    "Node Records NOT found - LID 0x%lX",
6070 				    lid);
6071 			}
6072 		}
6073 		ibcm_dec_hca_acc_cnt(hcap);
6074 	}
6075 
6076 lid_to_ni_exit:
6077 	if (guid_array)
6078 		ibt_free_hca_list(guid_array, num_hcas);
6079 
6080 	IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: done. Status %d", retval);
6081 
6082 	return (retval);
6083 }
6084 
6085 /*
6086  * Function:
6087  *	ibt_get_companion_port_gids()
6088  * Description:
6089  *	Get list of GID's available on a companion port(s) of the specified
6090  *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
6091  */
6092 ibt_status_t
6093 ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
6094     ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
6095 {
6096 	sa_node_record_t	nr_req, *nr_resp;
6097 	void			*res_p;
6098 	ibmf_saa_handle_t	saa_handle;
6099 	int			sa_ret;
6100 	ibt_status_t		retval = IBT_SUCCESS;
6101 	ibcm_hca_info_t		*hcap;
6102 	ibtl_cm_hca_port_t	hport;
6103 	int			i, j;
6104 	uint_t			num_rec;
6105 	ib_guid_t		*guid_array = NULL;
6106 	sa_path_record_t	*path;
6107 	size_t			len;
6108 	uint8_t			npaths;
6109 	uint32_t		num_hcas = 0;
6110 	boolean_t		local_node = B_FALSE;
6111 	boolean_t		local_hca = B_FALSE;
6112 	ib_guid_t		h_guid = hca_guid;
6113 	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
6114 	int			multi_hca_loop = 0;
6115 
6116 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
6117 	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);
6118 
6119 	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
6120 	    (sysimg_guid == 0)) {
6121 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
6122 		    "Null Input attribute specified.");
6123 		return (IBT_INVALID_PARAM);
6124 	}
6125 
6126 	if ((num_gids_p == NULL) || (gids_p == NULL)) {
6127 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
6128 		    "num_gids_p or gids_p is NULL");
6129 		return (IBT_INVALID_PARAM);
6130 	}
6131 
6132 	*num_gids_p = 0;
6133 
6134 	/* Get the number of HCAs and their GUIDs */
6135 	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
6136 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
6137 		    "NO HCA's Found on this system");
6138 		return (IBT_NO_HCAS_AVAILABLE);
6139 	}
6140 
6141 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
6142 	    "ibt_get_hca_list() returned %d hcas", num_hcas);
6143 
6144 	/*
6145 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
6146 	 * associated port) info via ibtl_cm_get_hca_port() call.
6147 	 */
6148 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
6149 	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
6150 	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {
6151 
6152 		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
6153 			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
6154 			    "Invalid GID<->HCAGUID combination specified.");
6155 			retval = IBT_INVALID_PARAM;
6156 			goto get_comp_pgid_exit;
6157 		}
6158 		h_guid = hport.hp_hca_guid;
6159 		local_node = B_TRUE;
6160 
6161 		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
6162 		    "Local Node: HCA (0x%llX)", h_guid);
6163 	} else if (h_guid) {	/* Is specified HCA GUID - local? */
6164 		for (i = 0; i < num_hcas; i++) {
6165 			if (h_guid == guid_array[i]) {
6166 				local_hca = B_TRUE;
6167 				break;
6168 			}
6169 		}
6170 	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
6171 		for (i = 0; i < num_hcas; i++) {
6172 			ibt_status_t	ret;
6173 			ibt_hca_attr_t	hca_attr;
6174 
6175 			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
6176 			if (ret != IBT_SUCCESS) {
6177 				IBTF_DPRINTF_L2(cmlog,
6178 				    "ibt_get_companion_port_gids: HCA(%llX) "
6179 				    "info not found", guid_array[i]);
6180 				retval = IBT_NO_HCAS_AVAILABLE;
6181 				continue;
6182 			}
6183 			if (hca_attr.hca_si_guid == sysimg_guid) {
6184 				if ((hca_guid != 0) &&
6185 				    (hca_guid != hca_attr.hca_node_guid)) {
6186 					IBTF_DPRINTF_L2(cmlog,
6187 					    "ibt_get_companion_port_gids: "
6188 					    "Invalid SysImg<->HCA GUID "
6189 					    "combination specified.");
6190 					retval = IBT_INVALID_PARAM;
6191 					goto get_comp_pgid_exit;
6192 				}
6193 				local_hca = B_TRUE;
6194 				h_guid = hca_attr.hca_node_guid;
6195 				break;
6196 			}
6197 		}
6198 	}
6199 
6200 	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
6201 		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
6202 		    num_gids_p);
6203 		goto get_comp_pgid_exit;
6204 	}
6205 
6206 get_comp_for_multihca:
6207 	/* We will be here, if request is for remote node */
6208 	for (i = 0; i < num_hcas; i++) {
6209 		int		multism;
6210 		uint_t		count = 0;
6211 		int		multi_sm_loop = 0;
6212 		uint_t		k = 0, l;
6213 
6214 		hcap = ibcm_find_hca_entry(guid_array[i]);
6215 		if (hcap == NULL) {
6216 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6217 			    "HCA(%llX) info not found", guid_array[i]);
6218 			retval = IBT_NO_HCAS_AVAILABLE;
6219 			continue;
6220 		}
6221 
6222 		/* 1 - MultiSM, 0 - Single SM */
6223 		multism = ibtl_cm_is_multi_sm(guid_array[i]);
6224 
6225 		for (j = 0; j < hcap->hca_num_ports; j++) {
6226 			ib_gid_t	sgid;
6227 			uint64_t	c_mask = 0;
6228 			ib_guid_t	pg;
6229 			uint_t		port = j;
6230 
6231 get_comp_for_multism:
6232 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6233 			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
6234 			    port + 1, h_guid, multism, multi_sm_loop);
6235 
6236 			/* Get SA Access Handle. */
6237 			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
6238 			if (saa_handle == NULL) {
6239 				IBTF_DPRINTF_L2(cmlog,
6240 				    "ibt_get_companion_port_gids: "
6241 				    "Port (%d)  - NOT ACTIVE", port + 1);
6242 				retval = IBT_GIDS_NOT_FOUND;
6243 				continue;
6244 			}
6245 
6246 			/*
6247 			 * Check whether 'gid' and this port has same subnet
6248 			 * prefix. If not, then there is no use in searching
6249 			 * from this port.
6250 			 */
6251 			sgid = hcap->hca_port_info[port].port_sgid0;
6252 			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
6253 			    (multi_sm_loop == 0) &&
6254 			    (gid.gid_prefix != sgid.gid_prefix)) {
6255 				IBTF_DPRINTF_L2(cmlog,
6256 				    "ibt_get_companion_port_gids: SnPrefix of "
6257 				    "GID(%llX) and Port SN_Pfx(%llX) differ",
6258 				    gid.gid_prefix, sgid.gid_prefix);
6259 				retval = IBT_GIDS_NOT_FOUND;
6260 				continue;
6261 			}
6262 
6263 			/*
6264 			 * If HCA GUID or System Image GUID is specified, then
6265 			 * we can achieve our goal sooner!.
6266 			 */
6267 			if ((h_guid == 0) && (sysimg_guid == 0)) {
6268 				/* So only GID info is provided. */
6269 
6270 				/*
6271 				 * First Get Path Records for the specified DGID
6272 				 * from this port (SGID). From Path Records,
6273 				 * note down DLID, then use this DLID as Input
6274 				 * attribute to get NodeRecords.
6275 				 */
6276 				npaths = 1;
6277 				path = NULL;
6278 
6279 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
6280 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
6281 				    &path);
6282 				if (sa_ret != IBMF_SUCCESS) {
6283 					IBTF_DPRINTF_L2(cmlog,
6284 					    "ibt_get_companion_port_gids: "
6285 					    "ibmf_saa_gid_to_pathrecords() "
6286 					    "returned error: %d ", sa_ret);
6287 					retval =
6288 					    ibcm_ibmf_analyze_error(sa_ret);
6289 					ibcm_dec_hca_acc_cnt(hcap);
6290 					goto get_comp_pgid_exit;
6291 				} else if ((npaths == 0) || (path == NULL)) {
6292 					IBTF_DPRINTF_L2(cmlog,
6293 					    "ibt_get_companion_port_gids: "
6294 					    "failed (%d) to get path records "
6295 					    "for the DGID (0x%llX) from SGID "
6296 					    "(0x%llX)", sa_ret, gid.gid_guid,
6297 					    sgid.gid_guid);
6298 					retval = IBT_GIDS_NOT_FOUND;
6299 					continue;
6300 				}
6301 
6302 				bzero(&nr_req, sizeof (sa_node_record_t));
6303 				nr_req.LID = path->DLID;	/* LID */
6304 
6305 				IBTF_DPRINTF_L3(cmlog,
6306 				    "ibt_get_companion_port_gids: "
6307 				    "Remote Node: LID = 0x%X", nr_req.LID);
6308 
6309 				/* Free SA_Access memory for path record. */
6310 				kmem_free(path, len);
6311 
6312 				IBTF_DPRINTF_L3(cmlog,
6313 				    "ibt_get_companion_port_gids: SAA Call: "
6314 				    "based on LID ");
6315 
6316 				retval = ibcm_get_node_rec(saa_handle, &nr_req,
6317 				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
6318 				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
6319 					IBTF_DPRINTF_L2(cmlog,
6320 					    "ibt_get_companion_port_gids: "
6321 					    "failed (%d) to get Node records",
6322 					    retval);
6323 					continue;
6324 				} else if (retval != IBT_SUCCESS) {
6325 					IBTF_DPRINTF_L2(cmlog,
6326 					    "ibt_get_companion_port_gids: "
6327 					    "failed (%d) to get Node records",
6328 					    retval);
6329 					ibcm_dec_hca_acc_cnt(hcap);
6330 					goto get_comp_pgid_exit;
6331 				}
6332 
6333 				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
6334 				/* Note down HCA GUID info. */
6335 				h_guid = nr_resp->NodeInfo.NodeGUID;
6336 
6337 				IBTF_DPRINTF_L3(cmlog,
6338 				    "ibt_get_companion_port_gids: "
6339 				    "Remote HCA GUID: 0x%llX", h_guid);
6340 
6341 				IBCM_DUMP_NODE_REC(nr_resp);
6342 
6343 				kmem_free(res_p, len);
6344 			}
6345 
6346 			bzero(&nr_req, sizeof (sa_node_record_t));
6347 			if (h_guid != 0) {
6348 				nr_req.NodeInfo.NodeGUID = h_guid;
6349 				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
6350 			}
6351 
6352 			if (sysimg_guid != 0) {
6353 				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
6354 				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
6355 			}
6356 
6357 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
6358 			    "SAA Call: CMASK= 0x%llX", c_mask);
6359 
6360 			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
6361 			    &res_p, &len);
6362 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
6363 				IBTF_DPRINTF_L3(cmlog,
6364 				    "ibt_get_companion_port_gids: "
6365 				    "failed (%d) to get Node records", retval);
6366 				continue;
6367 			} else if (retval != IBT_SUCCESS) {
6368 				IBTF_DPRINTF_L2(cmlog,
6369 				    "ibt_get_companion_port_gids: Error: (%d) "
6370 				    "while getting Node records", retval);
6371 				ibcm_dec_hca_acc_cnt(hcap);
6372 				goto get_comp_pgid_exit;
6373 			}
6374 
6375 			num_rec = len/sizeof (sa_node_record_t);
6376 
6377 			/* We will be here, only if we found some NodeRec */
6378 			if (gid.gid_prefix && gid.gid_guid) {
6379 				nr_resp = (sa_node_record_t *)res_p;
6380 				for (l = 0; l < num_rec; l++, nr_resp++) {
6381 					pg = nr_resp->NodeInfo.PortGUID;
6382 					if (gid.gid_guid != pg)
6383 						count++;
6384 				}
6385 			} else {
6386 				count = num_rec;
6387 			}
6388 
6389 			if (count != 0) {
6390 				if (multi_sm_loop == 1) {
6391 					count += k;
6392 					t_gidp = kmem_zalloc(count *
6393 					    sizeof (ib_gid_t), KM_SLEEP);
6394 
6395 					if ((k != 0) && (gidp != NULL)) {
6396 						bcopy(gidp, t_gidp,
6397 						    k * sizeof (ib_gid_t));
6398 						kmem_free(gidp,
6399 						    k * sizeof (ib_gid_t));
6400 					}
6401 					gidp = t_gidp;
6402 				} else {
6403 					gidp = kmem_zalloc(count *
6404 					    sizeof (ib_gid_t), KM_SLEEP);
6405 				}
6406 				*num_gids_p = count;
6407 				*gids_p = gidp;
6408 
6409 				nr_resp = (sa_node_record_t *)res_p;
6410 				for (l = 0; l < num_rec; l++, nr_resp++) {
6411 					IBCM_DUMP_NODE_REC(nr_resp);
6412 
6413 					pg = nr_resp->NodeInfo.PortGUID;
6414 					IBTF_DPRINTF_L4(cmlog,
6415 					    "ibt_get_companion_port_gids: "
6416 					    "PortGID %llX", pg);
6417 
6418 					if (pg != gid.gid_guid) {
6419 						gidp[k].gid_prefix =
6420 						    sgid.gid_prefix;
6421 						gidp[k].gid_guid = pg;
6422 
6423 						IBTF_DPRINTF_L3(cmlog,
6424 						    "ibt_get_companion_pgids: "
6425 						    "GID[%d] = %llX:%llX", k,
6426 						    gidp[k].gid_prefix,
6427 						    gidp[k].gid_guid);
6428 
6429 						k++;
6430 						if (k == count)
6431 							break;
6432 					}
6433 				}
6434 				retval = IBT_SUCCESS;	/* done!. */
6435 				kmem_free(res_p, len);
6436 				ibcm_dec_hca_acc_cnt(hcap);
6437 				goto get_comp_pgid_exit;
6438 			} else {
6439 				IBTF_DPRINTF_L2(cmlog,
6440 				    "ibt_get_companion_port_gids: "
6441 				    "Companion PortGIDs not available");
6442 				retval = IBT_GIDS_NOT_FOUND;
6443 			}
6444 			/* Deallocate the memory for 'res_p'. */
6445 			kmem_free(res_p, len);
6446 
6447 			/*
6448 			 * If we are on MultiSM setup, then we need to lookout
6449 			 * from that subnet port too.
6450 			 */
6451 			if (multism) {
6452 				/* break if already searched both the subnet */
6453 				if (multi_sm_loop == 1)
6454 					break;
6455 
6456 				port = (j == 0) ? 1 : 0;
6457 				multi_sm_loop = 1;
6458 				goto get_comp_for_multism;
6459 			} else {
6460 				break;
6461 			}
6462 		}
6463 		ibcm_dec_hca_acc_cnt(hcap);
6464 
6465 		/*
6466 		 * We may be on dual HCA with dual SM configured system.  And
6467 		 * the input attr GID was visible from second HCA. So in order
6468 		 * to get the companion portgid we need to re-look from the
6469 		 * first HCA ports.
6470 		 */
6471 		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
6472 		    (multi_hca_loop != 1)) {
6473 			multi_hca_loop = 1;
6474 			goto get_comp_for_multihca;
6475 		}
6476 	}
6477 	if (*num_gids_p == 0)
6478 		retval = IBT_GIDS_NOT_FOUND;
6479 
6480 get_comp_pgid_exit:
6481 	if (guid_array)
6482 		ibt_free_hca_list(guid_array, num_hcas);
6483 
6484 	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
6485 		retval = IBT_SUCCESS;
6486 	}
6487 
6488 	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
6489 	    "Found %d GIDs", retval, *num_gids_p);
6490 
6491 	return (retval);
6492 }
6493 
6494 /* RDMA IP CM Support routines */
6495 ibt_status_t
6496 ibt_get_src_ip(ibt_srcip_attr_t *sattr, ibt_srcip_info_t **src_info_p,
6497     uint_t *entries_p)
6498 {
6499 	ibt_srcip_info_t	*s_ip;
6500 	ibcm_arp_ip_t		*ipp;
6501 	ibcm_arp_ibd_insts_t	ibds;
6502 	uint8_t			i, j;
6503 	uint_t			count;
6504 	ibt_status_t		retval = IBT_SUCCESS;
6505 
6506 	IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%p, %p, %p)",
6507 	    sattr, src_info_p, entries_p);
6508 
6509 	if (sattr == NULL || entries_p == NULL) {
6510 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid I/P Args.");
6511 		return (IBT_INVALID_PARAM);
6512 	}
6513 
6514 	if (sattr->sip_gid.gid_prefix == 0 || sattr->sip_gid.gid_guid == 0) {
6515 		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID.");
6516 		return (IBT_INVALID_PARAM);
6517 	}
6518 
6519 	/* TBD: Zoneid */
6520 	retval = ibcm_arp_get_ibds(&ibds, sattr->sip_family);
6521 	if (retval != IBT_SUCCESS) {
6522 		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
6523 		    "failed to get IBD Instances: ret 0x%x", retval);
6524 		goto get_src_ip_end;
6525 	}
6526 
6527 	count = 0;
6528 	for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
6529 	    i++, ipp++) {
6530 		if (ipp->ip_inet_family == AF_UNSPEC)
6531 			continue;
6532 		if (ipp->ip_port_gid.gid_prefix == sattr->sip_gid.gid_prefix &&
6533 		    ipp->ip_port_gid.gid_guid == sattr->sip_gid.gid_guid) {
6534 			if ((sattr->sip_pkey) &&
6535 			    (ipp->ip_pkey != sattr->sip_pkey))
6536 				continue;
6537 
6538 			if ((sattr->sip_zoneid != ALL_ZONES) &&
6539 			    (sattr->sip_zoneid != ipp->ip_zoneid))
6540 				continue;
6541 
6542 			count++;
6543 			break;
6544 		}
6545 	}
6546 
6547 	if (count) {
6548 		/*
6549 		 * Allocate memory for return buffer, to be freed by
6550 		 * ibt_free_srcip_info().
6551 		 */
6552 		s_ip = kmem_alloc((count * sizeof (ibt_srcip_info_t)),
6553 		    KM_SLEEP);
6554 
6555 		*src_info_p = s_ip;
6556 		*entries_p = count;
6557 
6558 		j = 0;
6559 		for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
6560 		    i++, ipp++) {
6561 			if (ipp->ip_inet_family == AF_UNSPEC)
6562 				continue;
6563 			if ((ipp->ip_port_gid.gid_prefix ==
6564 			    sattr->sip_gid.gid_prefix) &&
6565 			    (ipp->ip_port_gid.gid_guid ==
6566 			    sattr->sip_gid.gid_guid)) {
6567 				if ((sattr->sip_pkey) &&
6568 				    (ipp->ip_pkey != sattr->sip_pkey))
6569 					continue;
6570 
6571 				if ((sattr->sip_zoneid != ALL_ZONES) &&
6572 				    (sattr->sip_zoneid != ipp->ip_zoneid))
6573 					continue;
6574 
6575 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_ip))
6576 				s_ip[j].ip_addr.family = ipp->ip_inet_family;
6577 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_ip))
6578 				if (s_ip[j].ip_addr.family == AF_INET) {
6579 					bcopy(&ipp->ip_cm_sin.sin_addr,
6580 					    &s_ip[j].ip_addr.un.ip4addr,
6581 					    sizeof (in_addr_t));
6582 				} else if (s_ip[j].ip_addr.family == AF_INET6) {
6583 					bcopy(&ipp->ip_cm_sin6.sin6_addr,
6584 					    &s_ip[j].ip_addr.un.ip6addr,
6585 					    sizeof (in6_addr_t));
6586 					/* TBD: scope_id */
6587 				}
6588 				IBCM_PRINT_IP("ibt_get_src_ip",
6589 				    &s_ip[j].ip_addr);
6590 				j++;
6591 			}
6592 		}
6593 	} else {
6594 		retval = IBT_SRC_IP_NOT_FOUND;
6595 	}
6596 
6597 get_src_ip_end:
6598 	ibcm_arp_free_ibds(&ibds);
6599 	return (retval);
6600 }
6601 
6602 /*
6603  * ibt_free_srcip_info()
6604  *	Free the memory allocated by successful ibt_get_src_ip()
6605  *
6606  *	src_info	Pointer returned by ibt_get_src_ip().
6607  *
6608  *	entries		The number of ibt_ip_addr_t entries to free.
6609  */
6610 void
6611 ibt_free_srcip_info(ibt_srcip_info_t *src_info, uint_t entries)
6612 {
6613 	IBTF_DPRINTF_L3(cmlog, "ibt_free_srcip_info: "
6614 	    "Free <%d> entries from 0x%p", entries, src_info);
6615 
6616 	if ((src_info != NULL) && (entries > 0))
6617 		kmem_free(src_info, entries * sizeof (ibt_srcip_info_t));
6618 	else
6619 		IBTF_DPRINTF_L2(cmlog, "ibt_free_srcip_info: "
6620 		    "ERROR: NULL buf pointer or ZERO length specified.");
6621 }
6622 
6623 
6624 ib_svc_id_t
6625 ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port)
6626 {
6627 	ib_svc_id_t	sid;
6628 
6629 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num,
6630 	    dst_port);
6631 
6632 	/*
6633 	 * If protocol_num is non-zero, then formulate the SID and return it.
6634 	 * If protocol_num is zero, then we need to assign a locally generated
6635 	 * IP SID with IB_SID_IPADDR_PREFIX.
6636 	 */
6637 	if (protocol_num) {
6638 		sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port;
6639 	} else {
6640 		sid = ibcm_alloc_ip_sid();
6641 	}
6642 
6643 	IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid);
6644 	return (sid);
6645 }
6646 
6647 ibt_status_t
6648 ibt_release_ip_sid(ib_svc_id_t ip_sid)
6649 {
6650 	IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid);
6651 
6652 	if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) ||
6653 	    (!(ip_sid & IB_SID_IPADDR_PREFIX))) {
6654 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6655 		    "Called for Non-RDMA IP SID", ip_sid);
6656 		return (IBT_INVALID_PARAM);
6657 	}
6658 
6659 	/*
6660 	 * If protocol_num in ip_sid are all ZEROs, then this SID is allocated
6661 	 * by IBTF. If not, then the specified ip_sid is invalid.
6662 	 */
6663 	if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) {
6664 		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
6665 		    "Called for Non-IBTF assigned RDMA IP SID", ip_sid);
6666 		return (IBT_INVALID_PARAM);
6667 	}
6668 
6669 	ibcm_free_ip_sid(ip_sid);
6670 
6671 	return (IBT_SUCCESS);
6672 }
6673 
6674 
6675 uint8_t
6676 ibt_get_ip_protocol_num(ib_svc_id_t sid)
6677 {
6678 	return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16);
6679 }
6680 
6681 in_port_t
6682 ibt_get_ip_dst_port(ib_svc_id_t sid)
6683 {
6684 	return (sid & IB_SID_IPADDR_PORTNUM_MASK);
6685 }
6686 
6687 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t))
6688 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t))
6689 
6690 ibt_status_t
6691 ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info,
6692     ibt_priv_data_len_t priv_data_len, void *priv_data_p)
6693 {
6694 	ibcm_ip_pvtdata_t	ip_data;
6695 
6696 	IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)",
6697 	    ip_cm_info, priv_data_len, priv_data_p);
6698 
6699 	if ((ip_cm_info == NULL) || (priv_data_p == NULL) ||
6700 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6701 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6702 		    "Invalid Inputs.");
6703 		return (IBT_INVALID_PARAM);
6704 	}
6705 
6706 	bzero(&ip_data, sizeof (ibcm_ip_pvtdata_t));
6707 	ip_data.ip_srcport = ip_cm_info->src_port; /* Source Port */
6708 
6709 	IBCM_PRINT_IP("format_ip_pvt: src", &ip_cm_info->src_addr);
6710 	IBCM_PRINT_IP("format_ip_pvt: dst", &ip_cm_info->dst_addr);
6711 	/* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */
6712 	if (ip_cm_info->src_addr.family == AF_INET) {
6713 		ip_data.ip_ipv = IBT_CM_IP_IPV_V4;
6714 		ip_data.ip_srcv4 = ip_cm_info->src_addr.un.ip4addr;
6715 		ip_data.ip_dstv4 = ip_cm_info->dst_addr.un.ip4addr;
6716 	} else if (ip_cm_info->src_addr.family == AF_INET6) {
6717 		ip_data.ip_ipv = IBT_CM_IP_IPV_V6;
6718 		bcopy(&ip_cm_info->src_addr.un.ip6addr,
6719 		    &ip_data.ip_srcv6, sizeof (in6_addr_t));
6720 		bcopy(&ip_cm_info->dst_addr.un.ip6addr,
6721 		    &ip_data.ip_dstv6, sizeof (in6_addr_t));
6722 	} else {
6723 		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
6724 		    "IP Addr needs to be either AF_INET or AF_INET6 family.");
6725 		return (IBT_INVALID_PARAM);
6726 	}
6727 
6728 	ip_data.ip_MajV = IBT_CM_IP_MAJ_VER;
6729 	ip_data.ip_MinV = IBT_CM_IP_MIN_VER;
6730 
6731 	bcopy(&ip_data, priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ);
6732 
6733 	return (IBT_SUCCESS);
6734 }
6735 
6736 
6737 ibt_status_t
6738 ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data,
6739     ibt_ip_cm_info_t *ip_cm_infop)
6740 {
6741 	ibcm_ip_pvtdata_t	ip_data;
6742 
6743 	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)",
6744 	    priv_data_len, priv_data, ip_cm_infop);
6745 
6746 	if ((ip_cm_infop == NULL) || (priv_data == NULL) ||
6747 	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
6748 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs");
6749 		return (IBT_INVALID_PARAM);
6750 	}
6751 
6752 	bcopy(priv_data, &ip_data, IBT_IP_HDR_PRIV_DATA_SZ);
6753 	ip_cm_infop->src_port = ip_data.ip_srcport; /* Source Port */
6754 
6755 	/* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */
6756 	if (ip_data.ip_ipv == IBT_CM_IP_IPV_V4) {
6757 		/* Copy IPv4 Addr */
6758 		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
6759 		    AF_INET;
6760 		ip_cm_infop->src_addr.un.ip4addr = ip_data.ip_srcv4;
6761 		ip_cm_infop->dst_addr.un.ip4addr = ip_data.ip_dstv4;
6762 	} else if (ip_data.ip_ipv == IBT_CM_IP_IPV_V6) {
6763 		/* Copy IPv6 Addr */
6764 		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
6765 		    AF_INET6;
6766 		bcopy(&ip_data.ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr,
6767 		    sizeof (in6_addr_t));
6768 		bcopy(&ip_data.ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr,
6769 		    sizeof (in6_addr_t));
6770 	} else {
6771 		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs"
6772 		    " to be either AF_INET or AF_INET6 family.");
6773 		return (IBT_INVALID_PARAM);
6774 	}
6775 	IBCM_PRINT_IP("ibt_get_ip_data: src", &ip_cm_infop->src_addr);
6776 	IBCM_PRINT_IP("ibt_get_ip_data: dst", &ip_cm_infop->dst_addr);
6777 
6778 	return (IBT_SUCCESS);
6779 }
6780 
6781 
6782 /* Routines for warlock */
6783 
6784 /* ARGSUSED */
6785 static void
6786 ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
6787 {
6788 	ibcm_join_mcg_tqarg_t	dummy_mcg;
6789 
6790 	dummy_mcg.func = ibcm_dummy_mcg_handler;
6791 
6792 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
6793 	    "dummy_mcg.func %p", dummy_mcg.func);
6794 }
6795 
6796 
6797 /* ARGSUSED */
6798 static void
6799 ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
6800 {
6801 	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;
6802 
6803 	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;
6804 
6805 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
6806 	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
6807 }
6808 
6809 
6810 /* ARGSUSED */
6811 static ibt_cm_status_t
6812 ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
6813     ibt_cm_ud_return_args_t *ret_args,
6814     void *priv_data, ibt_priv_data_len_t len)
6815 {
6816 	/*
6817 	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
6818 	 * points to this routine.
6819 	 */
6820 	ibcm_local_handler_t	p;
6821 	ibcm_ud_state_data_t	dummy_ud;
6822 
6823 	p.actual_cm_handler = ibcm_dummy_ud_handler;
6824 	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;
6825 
6826 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
6827 	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
6828 	    dummy_ud.ud_cm_handler);
6829 	/*
6830 	 * Call all routines that the client's callback routine could call.
6831 	 */
6832 
6833 	return (IBT_CM_ACCEPT);
6834 }
6835 
6836 /* ARGSUSED */
6837 static ibt_cm_status_t
6838 ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
6839     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6840 {
6841 	ibcm_state_data_t	dummy_rc;
6842 
6843 	dummy_rc.cm_handler = ibcm_dummy_rc_handler;
6844 
6845 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
6846 	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
6847 	/*
6848 	 * Call all routines that the client's callback routine could call.
6849 	 */
6850 
6851 	return (IBT_CM_ACCEPT);
6852 }
6853