xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c (revision 058561cbaa119a6f2659bc27ef343e1b47266bb2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
29 #include <sys/ib/ibtl/ibti.h>
30 
31 /*
32  * ibcm_ti.c
33  *	These routines implement the Communication Manager's interfaces to IBTL.
34  */
35 
36 /* CM rc recycle task args structure definition */
37 typedef struct ibcm_taskq_recycle_arg_s {
38 	ibt_channel_hdl_t	rc_chan;
39 	ibt_cep_flags_t		control;
40 	uint8_t			hca_port_num;
41 	ibt_recycle_handler_t	func;
42 	void			*arg;
43 } ibcm_taskq_recycle_arg_t;
44 
45 _NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))
46 
47 static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
48     ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
49     ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
50 static void		ibcm_process_abort_via_taskq(void *args);
51 static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
52 static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
53 static void		ibcm_process_async_join_mcg(void *tq_arg);
54 
55 static ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
56     uint64_t c_mask, void *, size_t *);
57 
58 static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
59     ibcm_state_data_t *statep, ibt_execution_mode_t mode);
60 
61 /* Address Record management definitions */
62 #define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
63 #define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
64 #define	IBCM_DAPL_ATS_NBYTES	16
65 ibcm_svc_info_t *ibcm_ar_svcinfop;
66 ibcm_ar_t	*ibcm_ar_list;
67 
68 /*
69  * Tunable parameter to turnoff the overriding of pi_path_mtu value.
70  *	1 	By default override the path record's pi_path_mtu value to
71  *		IB_MTU_1K for all RC channels. This is done only for the
72  *		channels established on Tavor HCA and the path's pi_path_mtu
73  *		is greater than IB_MTU_1K.
74  *	0	Do not override, use pi_path_mtu by default.
75  */
76 int	ibcm_override_path_mtu = 1;
77 
78 #ifdef DEBUG
79 static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
80 		    ibcm_mad_addr_t *cm_reply_addr);
81 #endif
82 
83 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))
84 
85 /* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
86 _NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
87     ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))
88 
89 /*
90  * Typically, clients initialize these args in one api call, and use in
91  * another api
92  */
93 _NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
94     {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
95     ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))
96 
97 /*
98  * ibt_open_rc_channel()
99  *	ibt_open_rc_channel opens a communication channel on the specified
100  *	channel to the specified service. For connection service type qp's
101  *	the CM initiates the CEP to establish the connection and transitions
102  *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
103  *	attributes as necessary.
104  *	The implementation of this function assumes that alt path is different
105  *	from primary path. It is assumed that the Path functions ensure that.
106  *
107  * RETURN VALUES:
108  *	IBT_SUCCESS	on success (or respective failure on error)
109  */
110 ibt_status_t
111 ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
112     ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
113     ibt_rc_returns_t *ret_args)
114 {
115 	/* all fields that are related to REQ MAD formation */
116 
117 	ib_pkey_t		prim_pkey;
118 	ib_lid_t		primary_slid, alternate_slid;
119 	ib_qpn_t		local_qpn = 0;
120 	ib_guid_t		hca_guid;
121 	ib_qkey_t		local_qkey = 0;
122 	ib_eecn_t		local_eecn = 0;
123 	ib_eecn_t		remote_eecn = 0;
124 	boolean_t		primary_grh;
125 	boolean_t		alternate_grh = B_FALSE;
126 	ib_lid_t		base_lid;
127 	ib_com_id_t		local_comid;
128 	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
129 	ibcm_req_msg_t		*req_msgp;
130 
131 	uint8_t			rdma_in, rdma_out;
132 	uint8_t			cm_retries;
133 	uint64_t		local_cm_proc_time;	/* In usec */
134 	uint8_t			local_cm_resp_time;	/* IB time */
135 	uint64_t		remote_cm_resp_time;	/* In usec */
136 	uint32_t		starting_psn = 0;
137 
138 	/* CM path related fields */
139 	ibmf_handle_t		ibmf_hdl;
140 	ibcm_qp_list_t		*cm_qp_entry;
141 	ibcm_mad_addr_t		cm_reply_addr;
142 
143 	uint8_t			cm_pkt_lt;
144 
145 	/* Local args for ibtl/internal CM functions called within */
146 	ibt_status_t		status;
147 	ibcm_status_t		lkup_status;
148 	ibt_qp_query_attr_t	qp_query_attr;
149 
150 	/* Other misc local args */
151 	ibt_priv_data_len_t	len;
152 	ibcm_hca_info_t		*hcap;
153 	ibcm_state_data_t	*statep;
154 	uint8_t			port_no;
155 
156 	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
157 	    channel, flags, mode, chan_args, ret_args);
158 
159 	if (IBCM_INVALID_CHANNEL(channel)) {
160 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
161 		return (IBT_CHAN_HDL_INVALID);
162 	}
163 
164 	/* cm handler should always be specified */
165 	if (chan_args->oc_cm_handler == NULL) {
166 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
167 		    "CM handler is not be specified", channel);
168 		return (IBT_INVALID_PARAM);
169 	}
170 
171 	if (mode == IBT_NONBLOCKING) {
172 		if (ret_args != NULL) {
173 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
174 			    " ret_args should be NULL when called in "
175 			    "non-blocking mode", channel);
176 			return (IBT_INVALID_PARAM);
177 		}
178 	} else if (mode == IBT_BLOCKING) {
179 		if (ret_args == NULL) {
180 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
181 			    " ret_args should be Non-NULL when called in "
182 			    "blocking mode", channel);
183 			return (IBT_INVALID_PARAM);
184 		}
185 		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
186 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
187 			    " private data length is too large", channel);
188 			return (IBT_INVALID_PARAM);
189 		}
190 		if ((ret_args->rc_priv_data_len > 0) &&
191 		    (ret_args->rc_priv_data == NULL)) {
192 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
193 			    " rc_priv_data_len > 0, but rc_priv_data NULL",
194 			    channel);
195 			return (IBT_INVALID_PARAM);
196 		}
197 	} else { /* any other mode is not valid for ibt_open_rc_channel */
198 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
199 		    "invalid mode %x specified", channel, mode);
200 		return (IBT_INVALID_PARAM);
201 	}
202 
203 	/*
204 	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
205 	 */
206 	if (flags & IBT_OCHAN_DUP) {
207 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
208 		    "Unsupported Flags specified: 0x%X", channel, flags);
209 		return (IBT_INVALID_PARAM);
210 	}
211 
212 	if ((flags & IBT_OCHAN_REDIRECTED) &&
213 	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
214 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
215 		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
216 		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
217 		return (IBT_INVALID_PARAM);
218 	}
219 
220 	if (((flags & IBT_OCHAN_REDIRECTED) &&
221 	    (chan_args->oc_cm_redirect_info == NULL)) ||
222 	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
223 	    (chan_args->oc_cm_cep_path == NULL))) {
224 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
225 		    "Redirect flag specified, but respective arg is NULL",
226 		    channel);
227 		return (IBT_INVALID_PARAM);
228 	}
229 
230 	if ((flags & IBT_OCHAN_REDIRECTED) &&
231 	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
232 	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
233 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
234 		    "Either rdi_dlid or rdi_gid must be specified for"
235 		    " IBT_OCHAN_REDIRECTED", channel);
236 		return (IBT_INVALID_PARAM);
237 	}
238 
239 	/* primary dlid and hca_port_num should never be zero */
240 	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
241 
242 	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
243 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
244 		    "Primary Path's information is not valid", channel);
245 		return (IBT_INVALID_PARAM);
246 	}
247 
248 	/* validate SID */
249 	if (chan_args->oc_path->pi_sid == 0) {
250 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
251 		    "ERROR: Service ID in path information is 0", channel);
252 		return (IBT_INVALID_PARAM);
253 	}
254 
255 	/* validate rnr_retry_cnt (enum has more than 3 bits) */
256 	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
257 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
258 		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
259 		    channel, chan_args->oc_path_rnr_retry_cnt);
260 		return (IBT_INVALID_PARAM);
261 	}
262 
263 	/*
264 	 * Ensure that client is not re-using a QP that is still associated
265 	 * with a statep
266 	 */
267 	IBCM_GET_CHAN_PRIVATE(channel, statep);
268 	if (statep != NULL) {
269 		IBCM_RELEASE_CHAN_PRIVATE(channel);
270 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
271 		    "Channel being re-used on active side", channel);
272 		return (IBT_CHAN_IN_USE);
273 	}
274 
275 	/* Get GUID from Channel */
276 	hca_guid = ibt_channel_to_hca_guid(channel);
277 
278 	/* validate QP's hca guid with that from primary path  */
279 	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
280 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
281 		    "GUID from Channel and primary path don't match", channel);
282 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
283 		    "Channel GUID %llX primary path GUID %llX", channel,
284 		    hca_guid, chan_args->oc_path->pi_hca_guid);
285 		return (IBT_CHAN_HDL_INVALID);
286 	}
287 
288 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
289 	    "Local HCA GUID %llX", channel, hca_guid);
290 
291 	status = ibt_query_qp(channel, &qp_query_attr);
292 	if (status != IBT_SUCCESS) {
293 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
294 		    "ibt_query_qp failed %d", channel, status);
295 		return (status);
296 	}
297 
298 	/* If client specified "no port change on QP" */
299 	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
300 	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
301 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
302 		    "chan port %d and path port %d does not match", channel,
303 		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
304 		    cep_hca_port_num, port_no);
305 		return (IBT_INVALID_PARAM);
306 	}
307 
308 	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
309 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
310 		    "Invalid Channel type: Applicable only to RC Channel",
311 		    channel);
312 		return (IBT_CHAN_SRV_TYPE_INVALID);
313 	}
314 
315 	/* Check if QP is in INIT state or not */
316 	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
317 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
318 		    "QP is not in INIT state %x", channel,
319 		    qp_query_attr.qp_info.qp_state);
320 		return (IBT_CHAN_STATE_INVALID);
321 	}
322 
323 	local_qpn = qp_query_attr.qp_qpn;
324 
325 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
326 	    channel, local_qpn);
327 
328 #ifdef	NO_EEC_SUPPORT_YET
329 
330 	if (flags & IBT_OCHAN_RDC_EXISTS) {
331 		ibt_eec_query_attr_t	eec_query_attr;
332 
333 		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;
334 
335 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");
336 
337 		status = ibt_query_eec(channel, &eec_query_attr);
338 		if (status != IBT_SUCCESS) {
339 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
340 			    " ibt_query_eec failed %d", channel, status);
341 			return (status);
342 		}
343 		local_eecn = eec_query_attr.eec_eecn;
344 	}
345 
346 #endif
347 
348 	/* If no HCA found return failure */
349 	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
350 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
351 		    "hcap is NULL. Probably hca is not in active state",
352 		    channel);
353 		return (IBT_CHAN_HDL_INVALID);
354 	}
355 
356 	rdma_out = chan_args->oc_rdma_ra_out;
357 	rdma_in = chan_args->oc_rdma_ra_in;
358 
359 	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
360 	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
361 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
362 		    "rdma in %d/out %d values exceed hca limits", channel,
363 		    rdma_in, rdma_out);
364 		ibcm_dec_hca_acc_cnt(hcap);
365 		return (IBT_INVALID_PARAM);
366 	}
367 
368 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
369 	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);
370 
371 	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
372 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
373 		    "Huge Primary Pkt lt %d", channel,
374 		    chan_args->oc_path->pi_prim_pkt_lt);
375 		ibcm_dec_hca_acc_cnt(hcap);
376 		return (IBT_PATH_PKT_LT_TOO_HIGH);
377 	}
378 
379 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
380 	    NULL, &base_lid);
381 	if (status != IBT_SUCCESS) {
382 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
383 		    "primary port_num %d not active", channel, port_no);
384 		ibcm_dec_hca_acc_cnt(hcap);
385 		return (status);
386 	}
387 
388 	/* Validate P_KEY Index */
389 	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
390 	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
391 	if (status != IBT_SUCCESS) {
392 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
393 		    "Invalid Primary PKeyIx %x", channel,
394 		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
395 		ibcm_dec_hca_acc_cnt(hcap);
396 		return (status);
397 	}
398 
399 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
400 	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
401 	    prim_pkey);
402 
403 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
404 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
405 	    != IBT_SUCCESS)) {
406 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
407 		    "ibmf reg or callback setup failed during re-initialize",
408 		    channel);
409 		ibcm_dec_hca_acc_cnt(hcap);
410 		return (status);
411 	}
412 
413 	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
414 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
415 	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);
416 
417 
418 	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
419 
420 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
421 	    "primary SLID = %x", channel, primary_slid);
422 
423 	/* check first if alternate path exists or not as it is OPTIONAL */
424 	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
425 		uint8_t	alt_port_no;
426 
427 		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
428 
429 		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
430 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
431 			    "Huge Alt Pkt lt %d", channel,
432 			    chan_args->oc_path->pi_alt_pkt_lt);
433 			ibcm_dec_hca_acc_cnt(hcap);
434 			return (IBT_PATH_PKT_LT_TOO_HIGH);
435 		}
436 
437 		if (port_no != alt_port_no) {
438 
439 			status = ibt_get_port_state_byguid(hcap->hca_guid,
440 			    alt_port_no, NULL, &base_lid);
441 			if (status != IBT_SUCCESS) {
442 
443 				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
444 				    "chan 0x%p alt_port_num %d inactive %d",
445 				    channel, alt_port_no, status);
446 				ibcm_dec_hca_acc_cnt(hcap);
447 				return (status);
448 			}
449 
450 		}
451 		alternate_slid =
452 		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
453 
454 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan %0xp "
455 		    "alternate SLID = %x", channel, alternate_slid);
456 	}
457 
458 	/*
459 	 * only pkey needs to be zero'ed, because all other fields are set in
460 	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
461 	 * any future modifications.
462 	 */
463 	bzero(&cm_reply_addr, sizeof (cm_reply_addr));
464 
465 	/* Initialize the MAD destination address in stored_reply_addr */
466 	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
467 	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {
468 
469 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
470 		    "ibcm_init_reply_addr failed status %d ", channel, status);
471 		ibcm_dec_hca_acc_cnt(hcap);
472 		return (status);
473 	}
474 
475 
476 	/* Initialize the pkey for CM MAD communication */
477 	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
478 		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;
479 
480 #ifdef DEBUG
481 	ibcm_print_reply_addr(channel, &cm_reply_addr);
482 #endif
483 
484 	/* Retrieve an ibmf qp for sending CM MADs */
485 	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
486 	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
487 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
488 		    "unable to allocate ibmf qp for CM MADs", channel);
489 		ibcm_dec_hca_acc_cnt(hcap);
490 		return (IBT_INSUFF_RESOURCE);
491 	}
492 
493 
494 	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
495 		ibcm_release_qp(cm_qp_entry);
496 		ibcm_dec_hca_acc_cnt(hcap);
497 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
498 		    " Unable to allocate comid", channel);
499 		return (IBT_INSUFF_KERNEL_RESOURCE);
500 	}
501 
502 	/* allocate an IBMF mad buffer (REQ) */
503 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
504 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
505 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
506 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
507 		ibcm_release_qp(cm_qp_entry);
508 		ibcm_free_comid(hcap, local_comid);
509 		ibcm_dec_hca_acc_cnt(hcap);
510 		return (status);
511 	}
512 
513 	/* allocate an IBMF mad buffer (DREQ) */
514 	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
515 	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
516 		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
517 		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
518 		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
519 		ibcm_release_qp(cm_qp_entry);
520 		ibcm_free_comid(hcap, local_comid);
521 		ibcm_dec_hca_acc_cnt(hcap);
522 		return (status);
523 	}
524 
525 	/* Init to Init, if QP's port does not match with path information */
526 	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
527 	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {
528 
529 		ibt_qp_info_t		qp_info;
530 		ibt_cep_modify_flags_t	cep_flags;
531 
532 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
533 		    "chan 0x%p chan port %d", channel,
534 		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
535 		    cep_hca_port_num);
536 
537 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
538 		    "chan 0x%p path port %d", channel, port_no);
539 
540 		bzero(&qp_info, sizeof (qp_info));
541 		/* For now, set it to RC type */
542 
543 		qp_info.qp_trans = IBT_RC_SRV;
544 		qp_info.qp_state = IBT_STATE_INIT;
545 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;
546 
547 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
548 
549 		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);
550 
551 		if (status != IBT_SUCCESS) {
552 			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
553 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
554 			ibcm_release_qp(cm_qp_entry);
555 			ibcm_free_comid(hcap, local_comid);
556 			ibcm_dec_hca_acc_cnt(hcap);
557 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
558 			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
559 			return (status);
560 		} else
561 			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
562 			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
563 	}
564 
565 	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
566 	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
567 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
568 	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
569 	    hcap, &statep);
570 	rw_exit(&hcap->hca_state_rwlock);
571 
572 	/* CM should be seeing this for the first time */
573 	ASSERT(lkup_status == IBCM_LOOKUP_NEW);
574 
575 	/* Increment the hca's resource count */
576 	ibcm_inc_hca_res_cnt(hcap);
577 
578 	/* Once a resource created on hca, no need to hold the acc cnt */
579 	ibcm_dec_hca_acc_cnt(hcap);
580 
581 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
582 
583 	statep->timerid = 0;
584 	statep->local_hca_guid = hca_guid;
585 	statep->local_qpn = local_qpn;
586 	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
587 	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
588 	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;
589 
590 
591 	/* Save "statep" as channel's CM private data.  */
592 	statep->channel = channel;
593 	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);
594 
595 	statep->stored_msg = ibmf_msg;
596 	statep->dreq_msg = ibmf_msg_dreq;
597 
598 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))
599 
600 	/* Start filling in the REQ MAD */
601 	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
602 	req_msgp->req_local_comm_id = h2b32(local_comid);
603 	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
604 	req_msgp->req_local_ca_guid = h2b64(hca_guid);
605 	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */
606 
607 	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
608 	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);
609 
610 	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
611 	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);
612 
613 	if (flags & IBT_OCHAN_REMOTE_CM_TM)
614 		remote_cm_resp_time = chan_args->oc_remote_cm_time;
615 	else
616 		remote_cm_resp_time = ibcm_remote_response_time;
617 
618 	/*
619 	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
620 	 * IBT_CM_FLOW_CONTROL is always set by default.
621 	 */
622 	req_msgp->req_remote_eecn_plus = h2b32(
623 	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
624 	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);
625 
626 	if (flags & IBT_OCHAN_LOCAL_CM_TM)
627 		local_cm_proc_time = chan_args->oc_local_cm_time;
628 	else
629 		local_cm_proc_time = ibcm_local_processing_time;
630 
631 	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
632 	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
633 	    ibcm_sw_delay);
634 
635 	/* save retry count */
636 	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;
637 
638 	if (flags & IBT_OCHAN_STARTING_PSN)
639 		starting_psn = chan_args->oc_starting_psn;
640 
641 	if (local_cm_resp_time > 0x1f)
642 		local_cm_resp_time = 0x1f;
643 
644 	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
645 	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
646 	    local_cm_resp_time << 3 | statep->cep_retry_cnt);
647 
648 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
649 	    "Prim Pkt lt (IB time) 0x%x", channel,
650 	    chan_args->oc_path->pi_prim_pkt_lt);
651 
652 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
653 	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);
654 
655 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
656 	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);
657 
658 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
659 	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);
660 
661 	statep->starting_psn = starting_psn;
662 
663 	/* Pkey - bytes 48-49 */
664 	req_msgp->req_part_key = h2b16(prim_pkey);
665 
666 	if (flags & IBT_OCHAN_CM_RETRY)
667 		cm_retries = chan_args->oc_cm_retry_cnt;
668 	else
669 		cm_retries = ibcm_max_retries;
670 
671 	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
672 	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;
673 
674 	/*
675 	 * Check whether SRQ is associated with this Channel, if yes, then
676 	 * set the SRQ Exists bit in the REQ.
677 	 */
678 	if (qp_query_attr.qp_srq != NULL) {
679 		req_msgp->req_max_cm_retries_plus |= (1 << 3);
680 	}
681 
682 	/*
683 	 * By default on Tavor, we override the PathMTU to 1K.
684 	 * To turn this off, set ibcm_override_path_mtu = 0.
685 	 */
686 	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
687 	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
688 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
689 		    chan_args->oc_path_rnr_retry_cnt;
690 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
691 		    " overidden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
692 		    chan_args->oc_path->pi_path_mtu);
693 	} else
694 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
695 		    chan_args->oc_path_rnr_retry_cnt;
696 
697 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
698 	    " staring PSN %x", channel, cm_retries, starting_psn);
699 
700 
701 #ifdef	NO_EEC_SUPPORT_YET
702 	if (flags & IBT_OCHAN_RDC_EXISTS)
703 		req_msgp->req_mtu_plus |= 8;
704 #endif
705 
706 	/* Initialize the "primary" port stuff next - bytes 52-95 */
707 	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
708 	req_msgp->req_primary_r_port_lid =
709 	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
710 	req_msgp->req_primary_l_port_gid.gid_prefix =
711 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
712 	req_msgp->req_primary_l_port_gid.gid_guid =
713 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
714 	req_msgp->req_primary_r_port_gid.gid_prefix =
715 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
716 	req_msgp->req_primary_r_port_gid.gid_guid =
717 	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
718 	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;
719 
720 	statep->remote_hca_guid = /* not correct, but helpful for debugging */
721 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;
722 
723 	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
724 	req_msgp->req_primary_flow_label_plus =
725 	    h2b32(((primary_grh == B_TRUE) ?
726 	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
727 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
728 	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
729 	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
730 	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
731 	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 0xff;
732 	req_msgp->req_primary_sl_plus =
733 	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
734 	    ((primary_grh == B_TRUE) ? 0 : 8);
735 
736 	req_msgp->req_primary_localtime_plus =
737 	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
738 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
739 
740 	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
741 	    channel, statep);
742 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
743 	    "active hca_ack_delay (usec) %d", channel,
744 	    req_msgp->req_primary_localtime_plus);
745 
746 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
747 	    "Sent primary cep timeout (IB Time) %d", channel,
748 	    hcap->hca_ack_delay);
749 
750 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
751 	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
752 
753 	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
754 	    "prim GID %llX:%llX", channel,
755 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
756 	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
757 
758 	/* Initialize the "alternate" port stuff - optional */
759 	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
760 		ib_gid_t	tmp_gid;
761 
762 		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
763 		req_msgp->req_alt_r_port_lid =
764 		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
765 		/*
766 		 * doing all this as req_alt_r/l_port_gid is at offset
767 		 * 100, 116 which is not divisible by 8
768 		 */
769 
770 		tmp_gid.gid_prefix =
771 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
772 		tmp_gid.gid_guid =
773 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
774 		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
775 		    sizeof (ib_gid_t));
776 		tmp_gid.gid_prefix =
777 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
778 		tmp_gid.gid_guid =
779 		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);
780 
781 		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
782 		    sizeof (ib_gid_t));
783 		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;
784 
785 		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
786 		req_msgp->req_alt_flow_label_plus = h2b32(
787 		    (((alternate_grh == B_TRUE) ?
788 		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
789 		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
790 		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
791 		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
792 		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
793 		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 0xff;
794 		req_msgp->req_alt_sl_plus =
795 		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
796 		    ((alternate_grh == B_TRUE) ? 0 : 8);
797 		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
798 		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
799 		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
800 
801 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
802 		    "alt_dlid %x ", channel,
803 		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
804 
805 		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
806 		    "alt GID %llX:%llX", channel,
807 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
808 		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
809 	}
810 
811 	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
812 	if ((len > 0) && chan_args->oc_priv_data)
813 		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);
814 
815 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))
816 
817 	/* return_data is filled up in the state machine code */
818 	if (ret_args != NULL) {
819 		statep->open_return_data = ret_args;
820 	}
821 
822 	/* initialize some statep fields here */
823 	statep->mode = IBCM_ACTIVE_MODE;
824 	statep->hcap = hcap;
825 
826 	statep->cm_handler = chan_args->oc_cm_handler;
827 	statep->state_cm_private = chan_args->oc_cm_clnt_private;
828 
829 	statep->pkt_life_time =
830 	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);
831 
832 	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
833 	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));
834 
835 	/* Initialize statep->stored_reply_addr */
836 	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;
837 
838 	/* Initialize stored reply addr fields */
839 	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
840 	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
841 	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
842 	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;
843 
844 	/*
845 	 * The IPD on local/active side is calculated by path functions,
846 	 * hence available in the args of ibt_open_rc_channel
847 	 */
848 	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
849 	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;
850 
851 	/* Store the source path bits for primary and alt paths */
852 	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
853 	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;
854 
855 	statep->open_flow = 1;
856 	statep->open_done = B_FALSE;
857 	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
858 	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
859 	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
860 	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;
861 
862 	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
863 	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);
864 
865 	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
866 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
867 	    0));
868 
869 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
870 
871 	ibcm_open_enqueue(statep);
872 
873 	mutex_enter(&statep->state_mutex);
874 
875 	if (mode == IBT_BLOCKING) {
876 
877 		/* wait for REQ/REP/RTU */
878 		while (statep->open_done != B_TRUE) {
879 			cv_wait(&statep->block_client_cv, &statep->state_mutex);
880 		}
881 
882 		/*
883 		 * In the case that open_channel() fails because of a
884 		 * REJ or timeout, change retval to IBT_CM_FAILURE
885 		 */
886 		if (statep->open_return_data->rc_status != IBT_CM_ACCEPT)
887 			status = IBT_CM_FAILURE;
888 
889 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
890 		    "ret status %d cm status %d", channel, status,
891 		    statep->open_return_data->rc_status);
892 	}
893 
894 	/* decrement the ref-count before leaving here */
895 	IBCM_REF_CNT_DECR(statep);
896 
897 	mutex_exit(&statep->state_mutex);
898 
899 	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
900 	return (status);
901 }
902 
903 /*
904  * ibcm_init_reply_addr:
905  *
906  * The brief description of functionality below.
907  *
908  * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
909  *	Build CM path from chan_args->oc_cm_cep_path
910  *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
911  *
912  * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
913  *	If Redirect LID is specified,
914  *		If Redirect GID is not specified or specified to be on the same
915  *		    subnet, then
916  *			Build CM path from chan_args->oc_cm_redirect_info
917  *			Set CM pkt lt to subnet timeout
918  *		Else (ie., GID specified, but on a different subnet)
919  *			Do a path lookup to build CM Path and set CM pkt lt
920  *
921  */
922 static ibt_status_t
923 ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
924     ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
925     ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
926 {
927 	ibt_adds_vect_t	*cm_adds;
928 	ibt_path_info_t	path;
929 	boolean_t	cm_grh;
930 	ibt_status_t	status;
931 
932 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");
933 
934 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))
935 
936 	/*
937 	 * sending side CM lid/gid/port num are not based on any redirect
938 	 * params. These values are set to primary RC path lid/gid/port num.
939 	 * In the future, these values can be set based on framework policy
940 	 * decisions ensuring reachability.
941 	 */
942 	reply_addr->grh_hdr.ig_sender_gid =
943 	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
944 	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
945 	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
946 
947 	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
948 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
949 		    "IBT_OCHAN_PORT_REDIRECTED specified");
950 
951 		status = ibt_index2pkey_byguid(hcap->hca_guid,
952 		    chan_args->oc_cm_cep_path->cep_hca_port_num,
953 		    chan_args->oc_cm_cep_path->cep_pkey_ix,
954 		    &reply_addr->rcvd_addr.ia_p_key);
955 
956 		if (status != IBT_SUCCESS) {
957 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
958 			    "CM PKeyIx %x port_num %x",
959 			    chan_args->oc_cm_cep_path->cep_pkey_ix,
960 			    chan_args->oc_cm_cep_path->cep_hca_port_num);
961 			return (status);
962 		}
963 
964 		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
965 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
966 		    cm_adds->av_dlid);
967 
968 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
969 		reply_addr->rcvd_addr.ia_remote_qno = 1;
970 		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;
971 
972 	} else if (flags & IBT_OCHAN_REDIRECTED) {
973 		ibt_redirect_info_t	*redirect_info;
974 		ibt_hca_portinfo_t	*port_infop;
975 		uint_t			psize, nports;
976 
977 		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
978 		    "IBT_OCHAN_REDIRECTED specified");
979 
980 		redirect_info = chan_args->oc_cm_redirect_info;
981 
982 		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
983 		    (redirect_info->rdi_gid.gid_guid == 0)) {
984 			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
985 			    "ERROR: Re-direct GID value NOT Provided.");
986 			return (IBT_INVALID_PARAM);
987 		}
988 
989 		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
990 		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
991 		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
992 		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;
993 
994 		/*
995 		 * if LID is non-zero in classportinfo then use classportinfo
996 		 * fields to form CM MAD destination address.
997 		 */
998 		if (redirect_info->rdi_dlid != 0) {
999 			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
1000 			    reply_addr->port_num, &port_infop, &nports, &psize);
1001 			if ((status != IBT_SUCCESS) || (nports == 0)) {
1002 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1003 				    "Query Ports Failed: %d", status);
1004 				return (status);
1005 			} else if (port_infop->p_subnet_timeout >
1006 			    IBCM_MAX_IB_PKT_LT) {
1007 				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
1008 				    "large subnet timeout %x port_no %x",
1009 				    port_infop->p_subnet_timeout,
1010 				    reply_addr->port_num);
1011 				ibt_free_portinfo(port_infop, psize);
1012 				return (IBT_PATH_PKT_LT_TOO_HIGH);
1013 			} else {
1014 				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
1015 				    "subnet timeout %x port_no %x",
1016 				    port_infop->p_subnet_timeout,
1017 				    reply_addr->port_num);
1018 
1019 				*cm_pkt_lt =
1020 				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
1021 				    port_infop->p_subnet_timeout));
1022 
1023 				ibt_free_portinfo(port_infop, psize);
1024 			}
1025 
1026 			reply_addr->rcvd_addr.ia_remote_lid =
1027 			    redirect_info->rdi_dlid;
1028 			reply_addr->rcvd_addr.ia_service_level =
1029 			    redirect_info->rdi_sl;
1030 			reply_addr->grh_exists = B_TRUE;
1031 			reply_addr->grh_hdr.ig_recver_gid =
1032 			    redirect_info->rdi_gid;
1033 			reply_addr->grh_hdr.ig_tclass =
1034 			    redirect_info->rdi_tclass;
1035 			reply_addr->grh_hdr.ig_flow_label =
1036 			    redirect_info->rdi_flow;
1037 
1038 			/* Classportinfo doesn't have hoplimit field */
1039 			reply_addr->grh_hdr.ig_hop_limit = 0xff;
1040 			return (IBT_SUCCESS);
1041 
1042 		} else {
1043 			ibt_path_attr_t	path_attr;
1044 			ib_gid_t	path_dgid[1];
1045 
1046 			/*
1047 			 * If GID is specified, and LID is zero in classportinfo
1048 			 * do a path lookup using specified GID, Pkey,
1049 			 * in classportinfo
1050 			 */
1051 
1052 			bzero(&path_attr, sizeof (path_attr));
1053 
1054 			path_attr.pa_dgids = &path_dgid[0];
1055 			path_attr.pa_dgids[0] = redirect_info->rdi_gid;
1056 
1057 			/*
1058 			 * use reply_addr below, as sender_gid in reply_addr
1059 			 * may have been set above based on some policy decision
1060 			 * for originating end point for CM MADs above
1061 			 */
1062 			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
1063 			path_attr.pa_num_dgids = 1;
1064 			path_attr.pa_pkey = redirect_info->rdi_pkey;
1065 
1066 			if ((status = ibt_get_paths(ibcm_ibt_handle,
1067 			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
1068 			    IBT_SUCCESS)
1069 				return (status);
1070 
1071 			/* Initialize cm_adds */
1072 			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
1073 			*cm_pkt_lt = path.pi_prim_pkt_lt;
1074 		}
1075 
1076 	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
1077 		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
1078 		reply_addr->rcvd_addr.ia_remote_qno = 1;
1079 		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
1080 		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
1081 	}
1082 
1083 
1084 	cm_grh = cm_adds->av_send_grh;
1085 	reply_addr->grh_exists = cm_grh;
1086 
1087 	reply_addr->rcvd_addr.ia_remote_lid =
1088 	    cm_adds->av_dlid;
1089 	reply_addr->grh_hdr.ig_recver_gid =
1090 	    cm_adds->av_dgid;
1091 	reply_addr->grh_hdr.ig_flow_label =
1092 	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
1093 	reply_addr->grh_hdr.ig_tclass =
1094 	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
1095 	reply_addr->grh_hdr.ig_hop_limit =
1096 	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 0xff;
1097 	reply_addr->rcvd_addr.ia_service_level =
1098 	    cm_adds->av_srvl;
1099 
1100 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))
1101 
1102 	return (IBT_SUCCESS);
1103 }
1104 
1105 
1106 /*
1107  * ibt_prime_close_rc_channel()
1108  *	It allocates resources required for close channel operation, so
1109  *	ibt_close_rc_channel can be called from interrupt routine.
1110  *
1111  * INPUTS:
1112  *	channel			The address of an ibt_channel_t struct that
1113  *				specifies the channel to open.
1114  *
1115  * RETURN VALUES:
1116  *	IBT_SUCCESS	on success(or respective failure on error)
1117  *
1118  * Clients are typically expected to call this function in established state
1119  */
1120 ibt_status_t
1121 ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
1122 {
1123 	ibcm_state_data_t	*statep;
1124 	ibt_status_t		status = IBT_SUCCESS;
1125 
1126 	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);
1127 
1128 	/* validate channel, first */
1129 	if (IBCM_INVALID_CHANNEL(channel)) {
1130 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1131 		    "invalid channel", channel);
1132 		return (IBT_CHAN_HDL_INVALID);
1133 	}
1134 
1135 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1136 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1137 		    "Invalid Channel type: Applicable only to RC Channel",
1138 		    channel);
1139 		return (IBT_CHAN_SRV_TYPE_INVALID);
1140 	}
1141 
1142 	/* get the statep */
1143 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1144 
1145 	/*
1146 	 * This can happen, if the statep is already gone by a DREQ from
1147 	 * the remote side
1148 	 */
1149 
1150 	if (statep == NULL) {
1151 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1152 		    "statep NULL", channel);
1153 		return (IBT_SUCCESS);
1154 	}
1155 
1156 	mutex_enter(&statep->state_mutex);
1157 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1158 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1159 		mutex_exit(&statep->state_mutex);
1160 		return (IBT_CHAN_STATE_INVALID);
1161 	}
1162 	IBCM_REF_CNT_INCR(statep);
1163 	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
1164 	    " state %x", channel, statep, statep->state);
1165 	mutex_exit(&statep->state_mutex);
1166 
1167 	/* clients could pre-allocate dreq mad, even before connection est */
1168 	if (statep->dreq_msg == NULL)
1169 		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
1170 		    &statep->dreq_msg, MAD_METHOD_SEND);
1171 
1172 	mutex_enter(&statep->state_mutex);
1173 	IBCM_REF_CNT_DECR(statep);
1174 	mutex_exit(&statep->state_mutex);
1175 
1176 	if (status != IBT_SUCCESS) {
1177 		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
1178 		    "ibcm_alloc_out_msg failed ", channel);
1179 		return (status);
1180 	}
1181 
1182 	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
1183 	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
1184 	    channel);
1185 
1186 	return (IBT_SUCCESS);
1187 }
1188 
1189 /*
1190  * ibt_close_rc_channel()
1191  *	It closes an established channel.
1192  *
1193  * RETURN VALUES:
1194  *	IBT_SUCCESS	on success(or respective failure on error)
1195  */
1196 ibt_status_t
1197 ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
1198     void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
1199     void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
1200 {
1201 	ibcm_state_data_t	*statep;
1202 
1203 	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
1204 	    channel, mode, priv_data, priv_data_len,
1205 	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);
1206 
1207 	/* validate channel, first */
1208 	if (IBCM_INVALID_CHANNEL(channel)) {
1209 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1210 		    "invalid channel", channel);
1211 		return (IBT_CHAN_HDL_INVALID);
1212 	}
1213 
1214 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
1215 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1216 		    "Invalid Channel type: Applicable only to RC Channel",
1217 		    channel);
1218 		return (IBT_CHAN_SRV_TYPE_INVALID);
1219 	}
1220 
1221 	if (mode == IBT_BLOCKING) {
1222 		/* valid only for BLOCKING MODE */
1223 		if ((ret_priv_data_len_p != NULL) &&
1224 		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
1225 			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
1226 			    " private data len %d is too large", channel,
1227 			    *ret_priv_data_len_p);
1228 			return (IBT_INVALID_PARAM);
1229 		}
1230 	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
1231 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1232 		    "invalid mode %x specified", channel, mode);
1233 		return (IBT_INVALID_PARAM);
1234 	}
1235 
1236 	if (ibtl_cm_is_chan_closing(channel) ||
1237 	    ibtl_cm_is_chan_closed(channel)) {
1238 		if (ret_status)
1239 			*ret_status = IBT_CM_CLOSED_ALREADY;
1240 
1241 		/* No private data to return to the client */
1242 		if (ret_priv_data_len_p != NULL)
1243 			*ret_priv_data_len_p = 0;
1244 
1245 		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
1246 		    "already marked for closing", channel);
1247 
1248 		return (IBT_SUCCESS);
1249 	}
1250 
1251 	/* get the statep */
1252 	IBCM_GET_CHAN_PRIVATE(channel, statep);
1253 	if (statep == NULL) {
1254 		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
1255 		    "statep NULL", channel);
1256 		return (IBT_CHAN_STATE_INVALID);
1257 	}
1258 
1259 	mutex_enter(&statep->state_mutex);
1260 
1261 	if (statep->dreq_msg == NULL) {
1262 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1263 		    "Fatal Error: dreq_msg is NULL", channel);
1264 		IBCM_RELEASE_CHAN_PRIVATE(channel);
1265 		mutex_exit(&statep->state_mutex);
1266 		return (IBT_CHAN_STATE_INVALID);
1267 	}
1268 
1269 	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
1270 		statep->close_ret_priv_data = NULL;
1271 		statep->close_ret_priv_data_len = NULL;
1272 	} else {
1273 		statep->close_ret_priv_data = ret_priv_data;
1274 		statep->close_ret_priv_data_len = ret_priv_data_len_p;
1275 	}
1276 
1277 	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
1278 	if ((priv_data != NULL) && (priv_data_len > 0)) {
1279 		bcopy(priv_data, ((ibcm_dreq_msg_t *)
1280 		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
1281 		    priv_data_len);
1282 	}
1283 	statep->close_ret_status = ret_status;
1284 
1285 	IBCM_RELEASE_CHAN_PRIVATE(channel);
1286 	IBCM_REF_CNT_INCR(statep);
1287 
1288 	if (mode != IBT_NONBLOCKING) {
1289 		return (ibcm_close_rc_channel(channel, statep, mode));
1290 	}
1291 
1292 	/* IBT_NONBLOCKING */
1293 	ibcm_close_enqueue(statep);
1294 	mutex_exit(&statep->state_mutex);
1295 
1296 	return (IBT_SUCCESS);
1297 }
1298 
1299 void
1300 ibcm_close_start(ibcm_state_data_t *statep)
1301 {
1302 	mutex_enter(&statep->state_mutex);
1303 	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
1304 }
1305 
1306 static
1307 ibt_status_t
1308 ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
1309     ibt_execution_mode_t mode)
1310 {
1311 	ibcm_hca_info_t		*hcap;
1312 
1313 	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
1314 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1315 
1316 	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
1317 	    channel, statep);
1318 
1319 	hcap = statep->hcap;
1320 
1321 	/* HCA must have been in active state. If not, it's a client bug */
1322 	if (!IBCM_ACCESS_HCA_OK(hcap)) {
1323 		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1324 		    "hcap 0x%p not active", channel, hcap);
1325 		IBCM_REF_CNT_DECR(statep);
1326 		mutex_exit(&statep->state_mutex);
1327 		return (IBT_CHAN_HDL_INVALID);
1328 	}
1329 
1330 	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
1331 		while (statep->cep_in_rts == IBCM_BLOCK)
1332 			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1333 	}
1334 
1335 	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
1336 	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
1337 		cv_wait(&statep->block_mad_cv, &statep->state_mutex);
1338 
1339 	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1340 	    "connection state is %x", channel, statep->state);
1341 
1342 	/* If state is in pre-established states, abort the connection est */
1343 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1344 		statep->cm_retries++;	/* ensure connection trace is dumped */
1345 
1346 		/* No DREP private data possible */
1347 		if (statep->close_ret_priv_data_len != NULL)
1348 			*statep->close_ret_priv_data_len = 0;
1349 
1350 		/*
1351 		 * If waiting for a response mad, then cancel the timer,
1352 		 * and delete the connection
1353 		 */
1354 		if (statep->state == IBCM_STATE_REQ_SENT ||
1355 		    statep->state == IBCM_STATE_REP_SENT ||
1356 		    statep->state == IBCM_STATE_REP_WAIT ||
1357 		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
1358 			timeout_id_t		timer_val = statep->timerid;
1359 			ibcm_conn_state_t	old_state;
1360 
1361 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1362 			    "chan 0x%p connection aborted in state %x", channel,
1363 			    statep->state);
1364 
1365 			old_state = statep->state;
1366 			statep->state = IBCM_STATE_DELETE;
1367 
1368 			if (mode == IBT_NONBLOCKING) {
1369 				if (taskq_dispatch(ibcm_taskq,
1370 				    ibcm_process_abort_via_taskq, statep,
1371 				    TQ_NOSLEEP) == 0) {
1372 
1373 					IBCM_REF_CNT_DECR(statep);
1374 					statep->state = old_state;
1375 					mutex_exit(&statep->state_mutex);
1376 					return (IBT_INSUFF_KERNEL_RESOURCE);
1377 				}	/* if taskq_dispatch succeeds */
1378 				/* Cancel the timer */
1379 				statep->timerid = 0;
1380 				mutex_exit(&statep->state_mutex);
1381 			} else {
1382 				/* Cancel the timer */
1383 				statep->timerid = 0;
1384 				mutex_exit(&statep->state_mutex);
1385 				(void) taskq_dispatch(ibcm_taskq,
1386 				    ibcm_process_abort_via_taskq, statep,
1387 				    TQ_SLEEP);
1388 			}
1389 
1390 			/* cancel the currently running timer */
1391 			if (timer_val != 0)
1392 				(void) untimeout(timer_val);
1393 
1394 			/* wait until cm handler returns for BLOCKING cases */
1395 			mutex_enter(&statep->state_mutex);
1396 			if ((mode == IBT_BLOCKING) ||
1397 			    (mode == IBT_NOCALLBACKS)) {
1398 				while (statep->close_done != B_TRUE)
1399 					cv_wait(&statep->block_client_cv,
1400 					    &statep->state_mutex);
1401 			}
1402 
1403 			if (statep->close_ret_status)
1404 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1405 			mutex_exit(&statep->state_mutex);
1406 
1407 			/*
1408 			 * It would ideal to post a REJ MAD, but that would
1409 			 * be non-conformance to spec. Hence, delete the state
1410 			 * data. Assuming that happens quickly, any retransmits
1411 			 * from the remote are replied by CM with reject
1412 			 * reason " no valid com id". That would stop remote
1413 			 * sending any more MADs.
1414 			 */
1415 			ibcm_delete_state_data(statep);
1416 			return (IBT_SUCCESS);
1417 
1418 		/* if CM busy in cm handler, wait until cm handler returns */
1419 		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
1420 		    statep->state == IBCM_STATE_REP_RCVD ||
1421 		    statep->state == IBCM_STATE_MRA_SENT ||
1422 		    statep->state == IBCM_STATE_MRA_REP_SENT) {
1423 
1424 			/* take control of statep */
1425 			statep->abort_flag |= IBCM_ABORT_CLIENT;
1426 
1427 			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1428 			    "chan 0x%p connection aborted in state = %x",
1429 			    channel, statep->state);
1430 
1431 			/*
1432 			 * wait until state machine modifies qp state to error,
1433 			 * including disassociating statep and QP
1434 			 */
1435 			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
1436 				while (statep->close_done != B_TRUE)
1437 					cv_wait(&statep->block_client_cv,
1438 					    &statep->state_mutex);
1439 
1440 			/* a sanity setting */
1441 			if (mode == IBT_NOCALLBACKS)
1442 				statep->cm_handler = NULL;
1443 			IBCM_REF_CNT_DECR(statep);
1444 
1445 			/*
1446 			 * In rare situations, connection attempt could be
1447 			 * terminated for some other reason, before abort is
1448 			 * processed, but CM still returns ret_status as abort
1449 			 */
1450 			if (statep->close_ret_status)
1451 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1452 			mutex_exit(&statep->state_mutex);
1453 
1454 			/*
1455 			 * REJ MAD is posted by the CM state machine for this
1456 			 * case, hence state structure is deleted in the
1457 			 * state machine processing.
1458 			 */
1459 			return (IBT_SUCCESS);
1460 
1461 		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
1462 		    (statep->state == IBCM_STATE_DELETE)) {
1463 
1464 			/* State already in timewait, so no return priv data */
1465 			IBCM_REF_CNT_DECR(statep);
1466 
1467 			/* The teardown has already been done */
1468 			if (statep->close_ret_status)
1469 				*statep->close_ret_status =
1470 				    IBT_CM_CLOSED_ALREADY;
1471 			mutex_exit(&statep->state_mutex);
1472 
1473 			return (IBT_SUCCESS);
1474 
1475 		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
1476 		    (statep->state == IBCM_STATE_DREQ_SENT) ||
1477 		    (statep->state == IBCM_STATE_DREP_RCVD) ||
1478 		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
1479 		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {
1480 
1481 			/*
1482 			 * Either the remote or local client has already
1483 			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
1484 			 * possible, if CM initiated teardown without client's
1485 			 * knowledge, for stale handling, etc.,
1486 			 */
1487 			if (mode == IBT_NOCALLBACKS) {
1488 				if (statep->close_nocb_state == IBCM_UNBLOCK) {
1489 					statep->close_nocb_state = IBCM_FAIL;
1490 					/* enable free qp after return */
1491 					ibtl_cm_chan_is_closing(
1492 					    statep->channel);
1493 				} else while (statep->close_nocb_state ==
1494 				    IBCM_BLOCK)
1495 					cv_wait(&statep->block_client_cv,
1496 					    &statep->state_mutex);
1497 				statep->cm_handler = NULL; /* sanity setting */
1498 				if (statep->close_ret_status)
1499 					*statep->close_ret_status =
1500 					    IBT_CM_CLOSED_ALREADY;
1501 			} else if (mode == IBT_BLOCKING) {
1502 				/* wait until state is moved to timewait */
1503 				while (statep->close_done != B_TRUE)
1504 					cv_wait(&statep->block_client_cv,
1505 					    &statep->state_mutex);
1506 			}
1507 
1508 			IBCM_REF_CNT_DECR(statep);
1509 			mutex_exit(&statep->state_mutex);
1510 
1511 			/* ret_status is set in state machine code */
1512 			return (IBT_SUCCESS);
1513 
1514 		} else if (statep->state == IBCM_STATE_TIMED_OUT) {
1515 
1516 			if ((mode == IBT_BLOCKING) ||
1517 			    (mode == IBT_NOCALLBACKS)) {
1518 
1519 				/*
1520 				 * wait until cm handler invocation and
1521 				 * disassociation between statep and channel
1522 				 * is complete
1523 				 */
1524 				while (statep->close_done != B_TRUE)
1525 					cv_wait(&statep->block_client_cv,
1526 					    &statep->state_mutex);
1527 			}
1528 
1529 			if (statep->close_ret_status)
1530 				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
1531 			IBCM_REF_CNT_DECR(statep);
1532 			mutex_exit(&statep->state_mutex);
1533 
1534 			return (IBT_SUCCESS);
1535 		} else {
1536 			IBCM_REF_CNT_DECR(statep);
1537 			mutex_exit(&statep->state_mutex);
1538 
1539 			return (IBT_CM_FAILURE);
1540 		}
1541 	}
1542 
1543 	ASSERT(statep->close_nocb_state != IBCM_BLOCK);
1544 
1545 	if (mode == IBT_NOCALLBACKS) {
1546 		statep->close_nocb_state = IBCM_FAIL;
1547 		statep->cm_handler = NULL;
1548 		ibtl_cm_chan_is_closing(statep->channel);
1549 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
1550 		    "NOCALLBACKS on in statep = %p", statep);
1551 	}
1552 	mutex_exit(&statep->state_mutex);
1553 
1554 	mutex_enter(&statep->state_mutex);
1555 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1556 		goto lost_race;
1557 	}
1558 
1559 	/*
1560 	 * Cancel/wait for any pending ibt_set_alt_path, and
1561 	 * release state mutex
1562 	 */
1563 	ibcm_sync_lapr_idle(statep);
1564 
1565 	ibcm_close_enter();
1566 
1567 	mutex_enter(&statep->state_mutex);
1568 	if (statep->state != IBCM_STATE_ESTABLISHED) {
1569 		ibcm_close_exit();
1570 		goto lost_race;
1571 	}
1572 
1573 	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
1574 	statep->timerid = 0;
1575 	statep->close_done = B_FALSE;
1576 	statep->close_flow = 1;
1577 	mutex_exit(&statep->state_mutex);
1578 
1579 	ibcm_post_dreq_mad(statep);
1580 
1581 	mutex_enter(&statep->state_mutex);
1582 
1583 lost_race:
1584 	if (mode == IBT_BLOCKING) {
1585 
1586 		/* wait for DREP */
1587 		while (statep->close_done != B_TRUE)
1588 			cv_wait(&statep->block_client_cv,
1589 			    &statep->state_mutex);
1590 
1591 		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
1592 		    "done blocking", channel);
1593 	}
1594 
1595 	IBCM_REF_CNT_DECR(statep);
1596 	mutex_exit(&statep->state_mutex);
1597 
1598 	/* If this message isn't seen then ibt_close_rc_channel failed */
1599 	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
1600 	    channel);
1601 
1602 	return (IBT_SUCCESS);
1603 }
1604 
1605 ibt_status_t
1606 ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
1607     uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
1608 {
1609 	ibcm_state_data_t		*statep;
1610 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
1611 	ibt_qp_query_attr_t		qp_attr;
1612 	ibt_status_t			retval;
1613 
1614 	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
1615 	    control, hca_port_num, func, arg);
1616 
1617 	if (IBCM_INVALID_CHANNEL(rc_chan)) {
1618 		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
1619 		return (IBT_CHAN_HDL_INVALID);
1620 	}
1621 
1622 	/* check qp state */
1623 	retval = ibt_query_qp(rc_chan, &qp_attr);
1624 
1625 	if (retval != IBT_SUCCESS)
1626 		return (retval);
1627 
1628 	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
1629 		return (IBT_CHAN_SRV_TYPE_INVALID);
1630 
1631 	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
1632 		return (IBT_CHAN_STATE_INVALID);
1633 
1634 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1635 
1636 	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
1637 	    KM_SLEEP);
1638 
1639 	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
1640 	ibcm_tq_recycle_arg->control		= control;
1641 	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
1642 	ibcm_tq_recycle_arg->func		= func;
1643 	ibcm_tq_recycle_arg->arg		= arg;
1644 
1645 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))
1646 
1647 	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);
1648 
1649 	/*
1650 	 * If non-blocking ie., func specified and channel has not yet completed
1651 	 * the timewait, then schedule the work for later
1652 	 */
1653 	if ((func != NULL) && (statep != NULL)) {
1654 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1655 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1656 		statep->recycle_arg = ibcm_tq_recycle_arg;
1657 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
1658 		return (IBT_SUCCESS);
1659 	}
1660 
1661 	/*
1662 	 * if blocking ie., func specified, and channel has not yet completed
1663 	 * the timewait, then block until the channel completes the timewait
1664 	 */
1665 	if (statep != NULL)
1666 		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
1667 	IBCM_WAIT_CHAN_PRIVATE(rc_chan);
1668 
1669 	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
1670 		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
1671 		    ibcm_tq_recycle_arg, TQ_SLEEP);
1672 		return (IBT_SUCCESS);
1673 	} else	/* BLOCKING case */
1674 		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
1675 }
1676 
1677 void
1678 ibcm_process_rc_recycle(void *recycle_arg)
1679 {
1680 	(void) ibcm_process_rc_recycle_ret(recycle_arg);
1681 }
1682 
1683 static ibt_status_t
1684 ibcm_process_rc_recycle_ret(void *recycle_arg)
1685 {
1686 	ibt_qp_info_t			qp_info;
1687 	ibt_status_t			ibt_status = IBT_SUCCESS;
1688 	ibt_cep_modify_flags_t		cep_flags;
1689 	ibt_qp_query_attr_t		qp_attr;
1690 	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
1691 	    (ibcm_taskq_recycle_arg_t *)recycle_arg;
1692 
1693 	/* QP must have been in error state */
1694 	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
1695 	if (ibt_status != IBT_SUCCESS)
1696 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1697 		    "chanp %p ibt_query_qp() = %d",
1698 		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1699 	else {
1700 		/* perform the QP state change from ERROR to RESET */
1701 		bzero(&qp_info, sizeof (qp_info));
1702 
1703 		qp_info.qp_trans = IBT_RC_SRV;
1704 		qp_info.qp_state = IBT_STATE_RESET;
1705 
1706 		/* Call modify_qp to move to RESET state */
1707 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1708 		    IBT_CEP_SET_STATE, &qp_info, NULL);
1709 
1710 		if (ibt_status != IBT_SUCCESS)
1711 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1712 			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
1713 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1714 	}
1715 
1716 	if (ibt_status == IBT_SUCCESS) {
1717 
1718 		qp_info.qp_state = IBT_STATE_INIT;
1719 
1720 		/* set flags for all mandatory args from RESET to INIT */
1721 		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
1722 		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
1723 		cep_flags |= IBT_CEP_SET_ATOMIC;
1724 
1725 		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
1726 		    ibcm_tq_recycle_arg->hca_port_num;
1727 		qp_info.qp_flags |=
1728 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
1729 		qp_info.qp_flags |=
1730 		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
1731 		qp_info.qp_flags |=
1732 		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;
1733 
1734 		/* Always use the existing pkey */
1735 		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
1736 		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;
1737 
1738 		/* Call modify_qp to move to INIT state */
1739 		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
1740 		    cep_flags, &qp_info, NULL);
1741 
1742 		if (ibt_status != IBT_SUCCESS)
1743 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
1744 			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
1745 			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
1746 	}
1747 
1748 	/* Change the QP CM state to indicate QP being re-used */
1749 	if (ibt_status == IBT_SUCCESS)
1750 		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);
1751 
1752 	/* Call func, if defined */
1753 	if (ibcm_tq_recycle_arg->func)
1754 		(*(ibcm_tq_recycle_arg->func))(ibt_status,
1755 		    ibcm_tq_recycle_arg->arg);
1756 
1757 	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));
1758 
1759 	return (ibt_status);
1760 }
1761 
1762 static void
1763 ibcm_process_abort_via_taskq(void *args)
1764 {
1765 	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;
1766 
1767 	ibcm_process_abort(statep);
1768 	mutex_enter(&statep->state_mutex);
1769 	IBCM_REF_CNT_DECR(statep);
1770 	mutex_exit(&statep->state_mutex);
1771 }
1772 
1773 /*
1774  * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
1775  * Non-Blocking mode operations.
1776  */
1777 typedef struct ibcm_local_handler_s {
1778 	ibt_cm_ud_handler_t	actual_cm_handler;
1779 	void			*actual_cm_private;
1780 	ibt_ud_dest_t		*dest_hdl;
1781 } ibcm_local_handler_t;
1782 
1783 _NOTE(READ_ONLY_DATA(ibcm_local_handler_s))
1784 
1785 /*
1786  * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
1787  * NON-Blocking mode.
1788  *
1789  * Out here, we update the UD Destination handle with
1790  * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
1791  * handler that was specified by the client.
1792  */
1793 static ibt_cm_status_t
1794 ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
1795     ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
1796 {
1797 	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;
1798 
1799 	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
1800 	    event->cm_type);
1801 
1802 	ASSERT(handler_priv != NULL);
1803 
1804 	switch (event->cm_type) {
1805 	case IBT_CM_UD_EVENT_SIDR_REP:
1806 		/* Update QPN & QKey from event into destination handle. */
1807 		if (handler_priv->dest_hdl != NULL) {
1808 			handler_priv->dest_hdl->ud_dst_qpn =
1809 			    event->cm_event.sidr_rep.srep_remote_qpn;
1810 			handler_priv->dest_hdl->ud_qkey =
1811 			    event->cm_event.sidr_rep.srep_remote_qkey;
1812 		}
1813 
1814 		/* Invoke the client handler - inform only, so ignore retval */
1815 		(void) handler_priv->actual_cm_handler(
1816 		    handler_priv->actual_cm_private, event, ret_args, priv_data,
1817 		    len);
1818 
1819 		/* Free memory allocated for local handler's private data. */
1820 		if (handler_priv != NULL)
1821 			kmem_free(handler_priv, sizeof (*handler_priv));
1822 
1823 		break;
1824 	default:
1825 		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
1826 		break;
1827 	}
1828 
1829 	return (IBT_CM_ACCEPT);
1830 }
1831 
1832 
1833 /* Validate the input UD destination attributes.  */
1834 static ibt_status_t
1835 ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1836     ibt_ud_returns_t *ret_args)
1837 {
1838 	/* cm handler must always be specified */
1839 	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
1840 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1841 		    "CM handler is not specified ");
1842 		return (IBT_INVALID_PARAM);
1843 	}
1844 
1845 	if (mode == IBT_NONBLOCKING) {
1846 		if (ret_args != NULL) {
1847 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1848 			    "ret_args should be NULL when called in "
1849 			    "non-blocking mode");
1850 			return (IBT_INVALID_PARAM);
1851 		}
1852 	} else if (mode == IBT_BLOCKING) {
1853 		if (ret_args == NULL) {
1854 			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1855 			    "ret_args should be Non-NULL when called in "
1856 			    "blocking mode");
1857 			return (IBT_INVALID_PARAM);
1858 		}
1859 	} else {
1860 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1861 		    "invalid mode %x specified ", mode);
1862 		return (IBT_INVALID_PARAM);
1863 	}
1864 
1865 	if (attr->ud_sid == 0) {
1866 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1867 		    "ServiceID must be specified. ");
1868 		return (IBT_INVALID_PARAM);
1869 	}
1870 
1871 	if (attr->ud_addr == NULL) {
1872 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
1873 		    "Address Info NULL");
1874 		return (IBT_INVALID_PARAM);
1875 	}
1876 
1877 	/* Validate SGID */
1878 	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
1879 	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
1880 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
1881 		return (IBT_INVALID_PARAM);
1882 	}
1883 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
1884 	    attr->ud_addr->av_sgid.gid_prefix,
1885 	    attr->ud_addr->av_sgid.gid_guid);
1886 
1887 	/* Validate DGID */
1888 	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
1889 	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
1890 		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
1891 		return (IBT_INVALID_PARAM);
1892 	}
1893 	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
1894 	    attr->ud_addr->av_dgid.gid_prefix,
1895 	    attr->ud_addr->av_dgid.gid_guid);
1896 
1897 	return (IBT_SUCCESS);
1898 }
1899 
1900 
1901 /* Perform SIDR to retrieve DQPN and QKey.  */
1902 static ibt_status_t
1903 ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
1904     ibt_ud_returns_t *ret_args)
1905 {
1906 	ibt_status_t		retval;
1907 	ib_pkey_t		ud_pkey;
1908 	ibmf_handle_t		ibmf_hdl;
1909 	ibmf_msg_t		*ibmf_msg;
1910 	ibcm_hca_info_t		*hcap;
1911 	ibcm_sidr_req_msg_t	*sidr_req_msgp;
1912 	ibcm_ud_state_data_t	*ud_statep;
1913 	ibtl_cm_hca_port_t	port;
1914 	ibcm_sidr_srch_t	sidr_entry;
1915 	ibcm_qp_list_t		*cm_qp_entry;
1916 
1917 	/* Retrieve HCA GUID value from the available SGID info. */
1918 	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
1919 	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
1920 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1921 		    "ibtl_cm_get_hca_port failed: %d", retval);
1922 		return (retval);
1923 	}
1924 
1925 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
1926 	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);
1927 
1928 	/* Lookup the HCA info for this GUID */
1929 	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
1930 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
1931 		return (IBT_HCA_INVALID);
1932 	}
1933 
1934 	/* Return failure if the HCA device or Port is not operational */
1935 
1936 	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
1937 	    NULL, NULL)) != IBT_SUCCESS) {
1938 		/* Device Port is not in good state, don't use it. */
1939 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
1940 		    "port specified or port not active");
1941 		ibcm_dec_hca_acc_cnt(hcap);
1942 		return (retval);
1943 	}
1944 
1945 	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
1946 	    attr->ud_pkey_ix, &ud_pkey);
1947 	if (retval != IBT_SUCCESS) {
1948 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1949 		    "Failed to convert index2pkey: %d", retval);
1950 		ibcm_dec_hca_acc_cnt(hcap);
1951 		return (retval);
1952 	}
1953 
1954 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))
1955 
1956 	/* Allocate a new request id */
1957 	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
1958 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1959 		    "no req id available");
1960 		ibcm_dec_hca_acc_cnt(hcap);
1961 		return (IBT_INSUFF_KERNEL_RESOURCE);
1962 	}
1963 
1964 	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
1965 	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
1966 	    != IBT_SUCCESS)) {
1967 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
1968 		    "ibmf reg or callback setup failed during re-initialize");
1969 		return (retval);
1970 	}
1971 
1972 	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;
1973 
1974 	/* find the ibmf QP to post the SIDR REQ */
1975 	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
1976 	    NULL) {
1977 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
1978 		    " failed");
1979 		ibcm_dec_hca_acc_cnt(hcap);
1980 		return (IBT_INSUFF_RESOURCE);
1981 	}
1982 
1983 	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
1984 	    != IBT_SUCCESS) {
1985 		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
1986 		    " failed");
1987 		ibcm_release_qp(cm_qp_entry);
1988 		ibcm_dec_hca_acc_cnt(hcap);
1989 		return (retval);
1990 	}
1991 
1992 	sidr_entry.srch_lid = port.hp_base_lid;
1993 	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
1994 	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
1995 	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;
1996 
1997 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))
1998 
1999 	/* do various allocations needed here */
2000 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
2001 
2002 	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
2003 	    IBCM_FLAG_ADD);
2004 	rw_exit(&hcap->hca_sidr_list_lock);
2005 
2006 	/* Increment hca's resource count */
2007 	ibcm_inc_hca_res_cnt(hcap);
2008 
2009 	/* After a resource created on hca, no need to hold the acc cnt */
2010 	ibcm_dec_hca_acc_cnt(hcap);
2011 
2012 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))
2013 
2014 	/* Initialize some ud_statep fields */
2015 	ud_statep->ud_stored_msg = ibmf_msg;
2016 	ud_statep->ud_svc_id = attr->ud_sid;
2017 	ud_statep->ud_pkt_life_time =
2018 	    ibt_ib2usec(attr->ud_pkt_lt);
2019 	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;
2020 
2021 	/* set remaining retry cnt */
2022 	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;
2023 
2024 	/*
2025 	 * Get UD handler and corresponding args which is pass it back
2026 	 * as first argument for the handler.
2027 	 */
2028 	ud_statep->ud_state_cm_private = attr->ud_cm_private;
2029 
2030 	if (mode == IBT_BLOCKING)
2031 		ud_statep->ud_return_data = ret_args;
2032 	else
2033 		ud_statep->ud_cm_handler = attr->ud_cm_handler;
2034 
2035 	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
2036 	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
2037 	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
2038 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
2039 	    attr->ud_addr->av_hop;
2040 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
2041 	    attr->ud_addr->av_sgid;
2042 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
2043 	    attr->ud_addr->av_dgid;
2044 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
2045 	    attr->ud_addr->av_tclass;
2046 	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
2047 	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;
2048 
2049 	/* needs to be derived based on the base LID and path bits */
2050 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
2051 	    port.hp_base_lid;
2052 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
2053 	    attr->ud_addr->av_dlid;
2054 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
2055 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
2056 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
2057 	    attr->ud_addr->av_srvl;
2058 
2059 	/*
2060 	 * This may be enchanced later, to use a remote qno based on past
2061 	 * redirect rej mad responses. This would be the place to specify
2062 	 * appropriate remote qno
2063 	 */
2064 	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;
2065 
2066 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2067 
2068 	/* Initialize the SIDR REQ message fields */
2069 	sidr_req_msgp =
2070 	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);
2071 
2072 	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
2073 	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
2074 	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
2075 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
2076 	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);
2077 
2078 	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
2079 		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
2080 		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
2081 	}
2082 
2083 	/* Send out the SIDR REQ message */
2084 	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
2085 	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
2086 	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
2087 	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
2088 	    (ud_statep->ud_pkt_life_time * 2);
2089 
2090 	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
2091 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
2092 	    ud_statep->ud_req_id, 0));
2093 
2094 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
2095 	    ud_statep->ud_timer_value);
2096 
2097 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
2098 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))
2099 
2100 	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
2101 	    ibcm_post_sidr_req_complete, ud_statep);
2102 
2103 	mutex_enter(&ud_statep->ud_state_mutex);
2104 
2105 	/* Wait for SIDR_REP */
2106 	if (mode == IBT_BLOCKING) {
2107 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");
2108 
2109 		while (ud_statep->ud_blocking_done != B_TRUE) {
2110 			cv_wait(&ud_statep->ud_block_client_cv,
2111 			    &ud_statep->ud_state_mutex);
2112 		}
2113 
2114 		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");
2115 
2116 		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
2117 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
2118 			    "status = %x, QKey = %x", ret_args->ud_dqpn,
2119 			    ret_args->ud_status, ret_args->ud_qkey);
2120 
2121 		} else {
2122 			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
2123 			    ret_args->ud_status);
2124 			retval = IBT_CM_FAILURE;
2125 		}
2126 	}
2127 
2128 	IBCM_UD_REF_CNT_DECR(ud_statep);
2129 	mutex_exit(&ud_statep->ud_state_mutex);
2130 
2131 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");
2132 
2133 	return (retval);
2134 }
2135 
2136 
2137 /*
2138  * Function:
2139  *	ibt_request_ud_dest
2140  * Input:
2141  *	ud_dest		A previously allocated UD destination handle.
2142  *	mode		This function can execute in blocking or non blocking
2143  *			modes.
2144  *	attr		UD destination attributes to be modified.
2145  * Output:
2146  *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
2147  *			should be a pointer to an ibt_ud_returns_t struct.
2148  * Returns:
2149  *	IBT_SUCCESS
2150  * Description:
2151  *	Modify a previously allocated UD destination handle based on the
2152  *	results of doing the SIDR protocol.
2153  */
2154 ibt_status_t
2155 ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
2156     ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
2157 {
2158 	ibt_status_t		retval;
2159 	ibt_ud_dest_t		*ud_destp;
2160 	ibcm_local_handler_t	*local_handler_priv = NULL;
2161 
2162 	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
2163 	    ud_dest, mode, attr, ud_ret_args);
2164 
2165 	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
2166 	if (retval != IBT_SUCCESS) {
2167 		return (retval);
2168 	}
2169 
2170 	ud_destp = ud_dest;
2171 
2172 	/* Allocate an Address handle. */
2173 	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
2174 	    attr->ud_addr);
2175 	if (retval != IBT_SUCCESS) {
2176 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2177 		    "Address Handle Modification failed: %d", retval);
2178 		return (retval);
2179 	}
2180 
2181 	if (mode == IBT_NONBLOCKING) {
2182 		/*
2183 		 * In NON-BLOCKING mode, and we need to update the destination
2184 		 * handle with the DQPN and QKey that are obtained from
2185 		 * SIDR REP, hook-up our own handler, so that we can catch
2186 		 * the event, and we ourselves call the actual client's
2187 		 * ud_cm_handler, in our handler.
2188 		 */
2189 
2190 		/* Allocate memory for local handler's private data. */
2191 		local_handler_priv =
2192 		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);
2193 
2194 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2195 
2196 		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
2197 		local_handler_priv->actual_cm_private = attr->ud_cm_private;
2198 		local_handler_priv->dest_hdl = ud_destp;
2199 
2200 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))
2201 
2202 		attr->ud_cm_handler = ibcm_local_cm_handler;
2203 		attr->ud_cm_private = local_handler_priv;
2204 	}
2205 
2206 	/* In order to get DQPN and Destination QKey, perform SIDR */
2207 	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
2208 	if (retval != IBT_SUCCESS) {
2209 		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
2210 		    "Failed to get DQPN: %d", retval);
2211 
2212 		/* Free memory allocated for local handler's private data. */
2213 		if (local_handler_priv != NULL)
2214 			kmem_free(local_handler_priv,
2215 			    sizeof (*local_handler_priv));
2216 		return (retval);
2217 	}
2218 
2219 	/*
2220 	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
2221 	 * values will be valid only on BLOCKING mode.
2222 	 */
2223 	if (mode == IBT_BLOCKING) {
2224 		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
2225 		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
2226 	}
2227 
2228 	return (retval);
2229 }
2230 
2231 /*
2232  * Function:
2233  *	ibt_ud_get_dqpn
2234  * Input:
2235  *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
2236  *			required for SIDR REQ message. Not specified attributes
2237  *			should be set to "NULL" or "0".
2238  *			ud_sid, ud_addr and ud_pkt_lt must be specified.
2239  *	mode		This function can execute in blocking or non blocking
2240  *			modes.
2241  * Output:
2242  *	returns		If the function is called in blocking mode, returns
2243  *			should be a pointer to an ibt_ud_returns_t struct.
2244  * Return:
2245  *	IBT_SUCCESS	on success or respective failure on error.
2246  * Description:
2247  *	Finds the destination QPN at the specified destination that the
2248  *	specified service can be reached on. The IBTF CM initiates the
2249  *	service ID resolution protocol (SIDR) to determine a destination QPN.
2250  *
2251  * NOTE: SIDR_REQ is initiated from active side.
2252  */
2253 ibt_status_t
2254 ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
2255     ibt_ud_returns_t *returns)
2256 {
2257 	ibt_status_t		retval;
2258 
2259 	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
2260 	    attr, mode, returns);
2261 
2262 	retval = ibcm_validate_dqpn_data(attr, mode, returns);
2263 	if (retval != IBT_SUCCESS) {
2264 		return (retval);
2265 	}
2266 
2267 	return (ibcm_ud_get_dqpn(attr, mode, returns));
2268 }
2269 
2270 
2271 /*
2272  * ibt_cm_delay:
2273  *	A client CM handler function can call this function
2274  *	to extend its response time to a CM event.
2275  * INPUTS:
2276  *	flags		Indicates what CM message processing is being delayed
2277  *			by the CM handler, valid values are:
2278  *				IBT_CM_DELAY_REQ
2279  *				IBT_CM_DELAY_REP
2280  *				IBT_CM_DELAY_LAP
2281  *	cm_session_id	The session ID that was passed to client srv_handler
2282  *			by the CM
2283  *	service_time	The extended service time
2284  *	priv_data	Vendor specific data to be sent in the CM generated
2285  *			MRA message. Should be NULL if not specified.
2286  *	len		The number of bytes of data specified by priv_data.
2287  *
2288  * RETURN VALUES:
2289  *	IBT_SUCCESS	on success (or respective failure on error)
2290  */
2291 ibt_status_t
2292 ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
2293     clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
2294 {
2295 	uint8_t			msg_typ = 0;
2296 	ibcm_mra_msg_t		*mra_msgp;
2297 	ibcm_state_data_t	*statep;
2298 	ibt_status_t		status;
2299 
2300 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
2301 	    flags, cm_session_id, service_time);
2302 
2303 	/*
2304 	 * Make sure channel is associated with a statep
2305 	 */
2306 	statep = (ibcm_state_data_t *)cm_session_id;
2307 
2308 	if (statep == NULL) {
2309 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
2310 		return (IBT_INVALID_PARAM);
2311 	}
2312 
2313 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);
2314 
2315 	/* Allocate an ibmf msg for mra, if not allocated yet */
2316 	if (statep->mra_msg == NULL) {
2317 		if ((status = ibcm_alloc_out_msg(
2318 		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
2319 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
2320 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
2321 			    "IBMF MSG allocation failed", statep->channel);
2322 			return (status);
2323 		}
2324 	}
2325 
2326 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))
2327 
2328 	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
2329 	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
2330 	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);
2331 
2332 	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
2333 	if (flags == IBT_CM_DELAY_REQ) {
2334 		msg_typ = IBT_CM_MRA_TYPE_REQ;
2335 	} else if (flags == IBT_CM_DELAY_REP) {
2336 		msg_typ = IBT_CM_MRA_TYPE_REP;
2337 	} else if (flags == IBT_CM_DELAY_LAP) {
2338 		msg_typ = IBT_CM_MRA_TYPE_LAP;
2339 	}
2340 
2341 	mra_msgp->mra_message_type_plus = msg_typ << 6;
2342 	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;
2343 
2344 	len = min(len, IBT_MRA_PRIV_DATA_SZ);
2345 	if (priv_data && (len > 0))
2346 		bcopy(priv_data, mra_msgp->mra_private_data, len);
2347 
2348 	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
2349 	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);
2350 
2351 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))
2352 
2353 	mutex_enter(&statep->state_mutex);
2354 
2355 	if ((statep->mode == IBCM_ACTIVE_MODE) &&
2356 	    (statep->state == IBCM_STATE_REP_RCVD)) {
2357 		statep->state = IBCM_STATE_MRA_REP_SENT;
2358 	} else if (statep->mode == IBCM_PASSIVE_MODE) {
2359 		if (statep->state == IBCM_STATE_REQ_RCVD) {
2360 			statep->state = IBCM_STATE_MRA_SENT;
2361 		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
2362 			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
2363 		} else {
2364 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2365 			    "/ap_state/mode %x, %x, %x", statep->state,
2366 			    statep->ap_state, statep->mode);
2367 			mutex_exit(&statep->state_mutex);
2368 			return (IBT_CHAN_STATE_INVALID);
2369 		}
2370 	} else {
2371 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
2372 		    "/ap_state/mode %x, %x, %x", statep->state,
2373 		    statep->ap_state, statep->mode);
2374 		mutex_exit(&statep->state_mutex);
2375 
2376 		return (IBT_CHAN_STATE_INVALID);
2377 	}
2378 	/* service time is usecs, stale_clock is nsecs */
2379 	statep->stale_clock = gethrtime() +
2380 	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
2381 	    statep->max_cm_retries);
2382 
2383 	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
2384 	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
2385 	mutex_exit(&statep->state_mutex);
2386 
2387 	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
2388 		IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;
2389 
2390 	/* post the MRA mad in blocking mode, as no timers involved */
2391 	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
2392 	    statep);
2393 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
2394 	/* If this message isn't seen then ibt_cm_delay failed */
2395 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");
2396 
2397 	return (IBT_SUCCESS);
2398 }
2399 
2400 
2401 /*
2402  * ibt_register_service()
2403  *	Register a service with the IBCM
2404  *
2405  * INPUTS:
2406  *	ibt_hdl		The IBT client handle returned to the client
2407  *			on an ibt_attach() call.
2408  *
2409  *	srv		The address of a ibt_srv_desc_t that describes
2410  *			the service, containing the following:
2411  *
2412  *		sd_ud_handler	The Service CM UD event Handler.
2413  *		sd_handler	The Service CM RC/UC/RD event Handler.
2414  *		sd_flags	Service flags (peer-to-peer, or not).
2415  *
2416  *	sid		This tells CM if the service is local (sid is 0) or
2417  *			wellknown (sid is the starting service id of the range).
2418  *
2419  *	num_sids	The number of contiguous service-ids to reserve.
2420  *
2421  *	srv_hdl		The address of a service identification handle, used
2422  *			to deregister a service, and to bind GIDs to.
2423  *
2424  *	ret_sid		The address to store the Service ID return value.
2425  *			If num_sids > 1, ret_sid is the first Service ID
2426  *			in the range.
2427  *
2428  * ibt_register_service() returns:
2429  *	IBT_SUCCESS		- added a service successfully.
2430  *	IBT_INVALID_PARAM	- invalid input parameter.
2431  *	IBT_CM_FAILURE		- failed to add the service.
2432  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2433  *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
2434  *				     never happen).
2435  */
2436 ibt_status_t
2437 ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
2438     ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
2439 {
2440 	ibcm_svc_info_t		*svcinfop;
2441 
2442 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p, %p, %llx, %d)",
2443 	    ibt_hdl, srv, (longlong_t)sid, num_sids);
2444 
2445 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))
2446 
2447 	*srv_hdl = NULL;
2448 
2449 	if (num_sids <= 0) {
2450 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2451 		    "Invalid number of service-ids specified (%d)", num_sids);
2452 		return (IBT_INVALID_PARAM);
2453 	}
2454 
2455 	if (sid == 0) {
2456 		if (ret_sid == NULL)
2457 			return (IBT_INVALID_PARAM);
2458 		sid = ibcm_alloc_local_sids(num_sids);
2459 		if (sid == 0)
2460 			return (IBT_INSUFF_KERNEL_RESOURCE);
2461 
2462 	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
2463 	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
2464 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2465 		    "Invalid non-LOCAL SID specified: 0x%llX",
2466 		    (longlong_t)sid);
2467 		return (IBT_INVALID_PARAM);
2468 	}
2469 
2470 	svcinfop = ibcm_create_svc_entry(sid, num_sids);
2471 
2472 	if (svcinfop == NULL) {
2473 		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
2474 		    "Service-ID 0x%llx already registered", (longlong_t)sid);
2475 		return (IBT_CM_SERVICE_EXISTS);
2476 	}
2477 
2478 	/*
2479 	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
2480 	 */
2481 	svcinfop->svc_flags = srv->sd_flags;
2482 	svcinfop->svc_rc_handler = srv->sd_handler;
2483 	svcinfop->svc_ud_handler = srv->sd_ud_handler;
2484 
2485 	if (ret_sid != NULL)
2486 		*ret_sid = sid;
2487 
2488 	*srv_hdl = svcinfop;
2489 
2490 	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);
2491 
2492 	/* If this message isn't seen, then ibt_register_service failed. */
2493 	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
2494 	    svcinfop, sid);
2495 
2496 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))
2497 
2498 	return (IBT_SUCCESS);
2499 }
2500 
2501 
2502 static ibt_status_t
2503 ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
2504     sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
2505 {
2506 	int	rval;
2507 	int	retry;
2508 
2509 	ibcm_sa_access_enter();
2510 	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
2511 		rval = ibmf_saa_update_service_record(
2512 		    saa_handle, srv_recp, saa_type, 0);
2513 		if (rval != IBMF_TRANS_TIMEOUT) {
2514 			break;
2515 		}
2516 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2517 		    "ibmf_saa_update_service_record timed out"
2518 		    " SID = %llX, rval = %d, saa_type = %d",
2519 		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
2520 		delay(ibcm_sa_timeout_delay);
2521 	}
2522 	ibcm_sa_access_exit();
2523 
2524 	if (rval != IBMF_SUCCESS) {
2525 		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
2526 		    "ibmf_saa_update_service_record() : Failed - %d", rval);
2527 		return (ibcm_ibmf_analyze_error(rval));
2528 	} else
2529 		return (IBT_SUCCESS);
2530 }
2531 
2532 
2533 static void
2534 ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
2535 {
2536 	ibt_status_t		retval;
2537 	uint_t			num_found;
2538 	size_t			length;
2539 	sa_service_record_t	*srv_resp;
2540 	void			*results_p;
2541 	uint_t			i;
2542 	uint64_t		component_mask;
2543 	ibmf_saa_access_args_t	access_args;
2544 
2545 	component_mask =
2546 	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;
2547 
2548 	/* Call in SA Access retrieve routine to get Service Records. */
2549 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
2550 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
2551 	access_args.sq_component_mask = component_mask;
2552 	access_args.sq_template = srec;
2553 	access_args.sq_template_length = sizeof (sa_service_record_t);
2554 	access_args.sq_callback = NULL;
2555 	access_args.sq_callback_arg = NULL;
2556 
2557 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
2558 	    &results_p);
2559 	if (retval != IBT_SUCCESS) {
2560 		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2561 		    "SA Access Failure");
2562 		return;
2563 	}
2564 
2565 	num_found = length / sizeof (sa_service_record_t);
2566 
2567 	if (num_found)
2568 		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2569 		    "Found %d matching Service Records.", num_found);
2570 
2571 	/* Validate the returned number of records. */
2572 	if ((results_p != NULL) && (num_found > 0)) {
2573 
2574 		/* Remove all the records. */
2575 		for (i = 0; i < num_found; i++) {
2576 
2577 			srv_resp = (sa_service_record_t *)
2578 			    ((uchar_t *)results_p +
2579 			    i * sizeof (sa_service_record_t));
2580 
2581 			/*
2582 			 * Found some matching records, but check out whether
2583 			 * this Record is really stale or just happens to match
2584 			 * the current session records. If yes, don't remove it.
2585 			 */
2586 			mutex_enter(&ibcm_svc_info_lock);
2587 			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
2588 				/* This record is NOT STALE. */
2589 				mutex_exit(&ibcm_svc_info_lock);
2590 				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
2591 				    "This is not Stale, it's an active record");
2592 				continue;
2593 			}
2594 			mutex_exit(&ibcm_svc_info_lock);
2595 
2596 			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
2597 			    "Removing Stale Rec: %s, %llX",
2598 			    srv_resp->ServiceName, srv_resp->ServiceID);
2599 
2600 			IBCM_DUMP_SERVICE_REC(srv_resp);
2601 
2602 			/*
2603 			 * Remove the Service Record Entry from SA.
2604 			 *
2605 			 * Get ServiceID info from Response Buf, other
2606 			 * attributes are already filled-in.
2607 			 */
2608 
2609 			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2610 
2611 			srec->ServiceID = srv_resp->ServiceID;
2612 
2613 			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))
2614 
2615 			(void) ibcm_write_service_record(saa_handle, srec,
2616 			    IBMF_SAA_DELETE);
2617 		}
2618 
2619 		/* Deallocate the memory for results_p. */
2620 		kmem_free(results_p, length);
2621 	}
2622 }
2623 
2624 
2625 
2626 /*
2627  * ibt_bind_service()
2628  *	Register a service with the IBCM
2629  *
2630  * INPUTS:
2631  *	srv_hdl		The service id handle returned to the client
2632  *			on an ibt_service_register() call.
2633  *
2634  *	gid		The GID to which to bind the service.
2635  *
2636  *	srv_bind	The address of a ibt_srv_bind_t that describes
2637  *			the service record.  This should be NULL if there
2638  *			is to be no service record.  This contains:
2639  *
2640  *		sb_lease	Lease period
2641  *		sb_pkey		Partition
2642  *		sb_name		pointer to ASCII string Service Name,
2643  *				NULL terminated.
2644  *		sb_key[]	Key to secure the service record.
2645  *		sb_data		Service Data structure (64-byte)
2646  *
2647  *	cm_private	First argument of Service handler.
2648  *
2649  *	sb_hdl_p	The address of a service bind handle, used
2650  *			to undo the service binding.
2651  *
2652  * ibt_bind_service() returns:
2653  *	IBT_SUCCESS		- added a service successfully.
2654  *	IBT_INVALID_PARAM	- invalid input parameter.
2655  *	IBT_CM_FAILURE		- failed to add the service.
2656  *	IBT_CM_SERVICE_EXISTS	- service already exists.
2657  */
2658 ibt_status_t
2659 ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
2660     void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
2661 {
2662 	ibt_status_t		status;
2663 	ibtl_cm_hca_port_t	port;
2664 	ibcm_svc_bind_t		*sbindp, *sbp;
2665 	ibcm_hca_info_t		*hcap;
2666 	ib_svc_id_t		sid, start_sid, end_sid;
2667 	ibmf_saa_handle_t	saa_handle;
2668 	sa_service_record_t	srv_rec;
2669 	uint16_t		pkey_ix;
2670 
2671 	if (sb_hdl_p != NULL)
2672 		*sb_hdl_p = NULL;	/* return value for error cases */
2673 
2674 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
2675 	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
2676 
2677 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))
2678 
2679 	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
2680 	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
2681 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2682 		    "ibtl_cm_get_hca_port failed: %d", status);
2683 		return (status);
2684 	}
2685 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
2686 	    port.hp_port, port.hp_hca_guid);
2687 
2688 	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
2689 	if (hcap == NULL) {
2690 		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
2691 		return (IBT_HCA_BUSY_DETACHING);
2692 	}
2693 	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);
2694 
2695 	if (srv_bind != NULL) {
2696 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2697 		if (saa_handle == NULL) {
2698 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2699 			    "saa_handle is NULL");
2700 			ibcm_dec_hca_acc_cnt(hcap);
2701 			return (IBT_HCA_PORT_NOT_ACTIVE);
2702 		}
2703 		if (srv_bind->sb_pkey == 0) {
2704 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2705 			    "P_Key must not be 0");
2706 			ibcm_dec_hca_acc_cnt(hcap);
2707 			return (IBT_INVALID_PARAM);
2708 		}
2709 		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
2710 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2711 			    "Service Name is too long");
2712 			ibcm_dec_hca_acc_cnt(hcap);
2713 			return (IBT_INVALID_PARAM);
2714 		} else
2715 			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
2716 			    "Service Name='%s'", srv_bind->sb_name);
2717 		status = ibt_pkey2index_byguid(port.hp_hca_guid,
2718 		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
2719 		if (status != IBT_SUCCESS) {
2720 			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2721 			    "P_Key 0x%x not found in P_Key_Table",
2722 			    srv_bind->sb_pkey);
2723 			ibcm_dec_hca_acc_cnt(hcap);
2724 			return (status);
2725 		}
2726 	}
2727 
2728 	/* assume success - allocate before locking */
2729 	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
2730 	sbindp->sbind_cm_private = cm_private;
2731 	sbindp->sbind_gid = gid;
2732 	sbindp->sbind_hcaguid = port.hp_hca_guid;
2733 	sbindp->sbind_port = port.hp_port;
2734 
2735 	mutex_enter(&ibcm_svc_info_lock);
2736 
2737 	sbp = srv_hdl->svc_bind_list;
2738 	while (sbp != NULL) {
2739 		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
2740 		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
2741 			if (srv_bind == NULL ||
2742 			    srv_bind->sb_pkey == sbp->sbind_pkey) {
2743 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
2744 				    "failed: GID %llX:%llX and PKEY %x is "
2745 				    "already bound", gid.gid_guid,
2746 				    gid.gid_prefix, sbp->sbind_pkey);
2747 				mutex_exit(&ibcm_svc_info_lock);
2748 				ibcm_dec_hca_acc_cnt(hcap);
2749 				kmem_free(sbindp, sizeof (*sbindp));
2750 				return (IBT_CM_SERVICE_EXISTS);
2751 			}
2752 		}
2753 		sbp = sbp->sbind_link;
2754 	}
2755 	/* no entry found */
2756 
2757 	sbindp->sbind_link = srv_hdl->svc_bind_list;
2758 	srv_hdl->svc_bind_list = sbindp;
2759 
2760 	mutex_exit(&ibcm_svc_info_lock);
2761 
2762 	if (srv_bind != NULL) {
2763 		bzero(&srv_rec, sizeof (srv_rec));
2764 
2765 		srv_rec.ServiceLease =
2766 		    sbindp->sbind_lease = srv_bind->sb_lease;
2767 		srv_rec.ServiceP_Key =
2768 		    sbindp->sbind_pkey = srv_bind->sb_pkey;
2769 		srv_rec.ServiceKey_hi =
2770 		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
2771 		srv_rec.ServiceKey_lo =
2772 		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
2773 		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
2774 		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
2775 		srv_rec.ServiceGID = gid;
2776 
2777 		/*
2778 		 * Find out whether we have any stale Local Service records
2779 		 * matching the current attributes.  If yes, we shall try to
2780 		 * remove them from SA using the current request's ServiceKey.
2781 		 *
2782 		 * We will perform this operation only for Local Services, as
2783 		 * it is handled by SA automatically for WellKnown Services.
2784 		 *
2785 		 * Ofcourse, clients can specify NOT to do this clean-up by
2786 		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
2787 		 */
2788 		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
2789 		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
2790 			ibcm_rem_stale_srec(saa_handle, &srv_rec);
2791 		}
2792 
2793 		/* Handle endianess for service data. */
2794 		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);
2795 
2796 		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
2797 
2798 		/* insert srv record into the SA */
2799 		start_sid = srv_hdl->svc_id;
2800 		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
2801 		for (sid = start_sid; sid <= end_sid; sid++) {
2802 
2803 			srv_rec.ServiceID = sid;
2804 
2805 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2806 
2807 			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
2808 			    "ibmf_saa_write_service_record, SvcId = %llX",
2809 			    (longlong_t)sid);
2810 
2811 			status = ibcm_write_service_record(saa_handle, &srv_rec,
2812 			    IBMF_SAA_UPDATE);
2813 			if (status != IBT_SUCCESS) {
2814 				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
2815 				    " ibcm_write_service_record fails %d, "
2816 				    "sid %llX", status, (longlong_t)sid);
2817 
2818 				if (sid != start_sid) {
2819 					/*
2820 					 * Bind failed while bind SID other than
2821 					 * first in the sid_range.  So we need
2822 					 * to unbind those, which are passed.
2823 					 *
2824 					 * Need to increment svc count to
2825 					 * compensate for ibt_unbind_service().
2826 					 */
2827 					ibcm_inc_hca_svc_cnt(hcap);
2828 					ibcm_dec_hca_acc_cnt(hcap);
2829 
2830 					(void) ibt_unbind_service(srv_hdl,
2831 					    sbindp);
2832 				} else {
2833 					ibcm_svc_bind_t		**sbpp;
2834 
2835 					/*
2836 					 * Bind failed for the first SID or the
2837 					 * only SID in question, then no need
2838 					 * to unbind, just free memory and
2839 					 * return error.
2840 					 */
2841 					mutex_enter(&ibcm_svc_info_lock);
2842 
2843 					sbpp = &srv_hdl->svc_bind_list;
2844 					sbp = *sbpp;
2845 					while (sbp != NULL) {
2846 						if (sbp == sbindp) {
2847 							*sbpp = sbp->sbind_link;
2848 							break;
2849 						}
2850 						sbpp = &sbp->sbind_link;
2851 						sbp = *sbpp;
2852 					}
2853 					mutex_exit(&ibcm_svc_info_lock);
2854 					ibcm_dec_hca_acc_cnt(hcap);
2855 
2856 					kmem_free(sbindp, sizeof (*sbindp));
2857 				}
2858 				return (status);
2859 			}
2860 		}
2861 	}
2862 	ibcm_inc_hca_svc_cnt(hcap);
2863 	ibcm_dec_hca_acc_cnt(hcap);
2864 
2865 	/* If this message isn't seen then ibt_bind_service failed */
2866 	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
2867 	    srv_hdl, gid.gid_prefix, gid.gid_guid);
2868 
2869 	if (sb_hdl_p != NULL)
2870 		*sb_hdl_p = sbindp;
2871 
2872 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))
2873 
2874 	return (IBT_SUCCESS);
2875 }
2876 
2877 ibt_status_t
2878 ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
2879 {
2880 	ib_svc_id_t	sid, end_sid;
2881 	ibt_status_t	rval;
2882 	ibcm_hca_info_t	*hcap;
2883 	ibcm_svc_bind_t	*sbp, **sbpp;
2884 
2885 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
2886 	    srv_hdl, sbindp);
2887 
2888 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
2889 
2890 	/* If there is a service on hca, respective hcap cannot go away */
2891 	ASSERT(hcap != NULL);
2892 
2893 	mutex_enter(&ibcm_svc_info_lock);
2894 
2895 	sbpp = &srv_hdl->svc_bind_list;
2896 	sbp = *sbpp;
2897 	while (sbp != NULL) {
2898 		if (sbp == sbindp) {
2899 			*sbpp = sbp->sbind_link;
2900 			break;
2901 		}
2902 		sbpp = &sbp->sbind_link;
2903 		sbp = *sbpp;
2904 	}
2905 	sid = srv_hdl->svc_id;
2906 	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
2907 	if (sbp != NULL)
2908 		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
2909 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
2910 	mutex_exit(&ibcm_svc_info_lock);
2911 
2912 	if (sbp == NULL) {
2913 		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2914 		    "service binding not found: srv_hdl %p, srv_bind %p",
2915 		    srv_hdl, sbindp);
2916 		ibcm_dec_hca_acc_cnt(hcap);
2917 		return (IBT_INVALID_PARAM);
2918 	}
2919 
2920 	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
2921 		ibtl_cm_hca_port_t	port;
2922 		sa_service_record_t	srv_rec;
2923 		ibmf_saa_handle_t	saa_handle;
2924 		ibt_status_t		status;
2925 
2926 		/* get the default SGID of the port */
2927 		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
2928 		    != IBT_SUCCESS) {
2929 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2930 			    "ibtl_cm_get_hca_port failed: %d", status);
2931 			/* we're done, but there may be stale service records */
2932 			goto done;
2933 		}
2934 
2935 		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
2936 		if (saa_handle == NULL) {
2937 			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2938 			    "saa_handle is NULL");
2939 			/* we're done, but there may be stale service records */
2940 			goto done;
2941 		}
2942 
2943 		/* Fill in fields of srv_rec */
2944 		bzero(&srv_rec, sizeof (srv_rec));
2945 
2946 		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
2947 		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
2948 		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
2949 		srv_rec.ServiceGID = sbindp->sbind_gid;
2950 		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
2951 
2952 		while (sid <= end_sid) {
2953 
2954 			srv_rec.ServiceID = sid;
2955 			IBCM_DUMP_SERVICE_REC(&srv_rec);
2956 
2957 			rval = ibcm_write_service_record(saa_handle, &srv_rec,
2958 			    IBMF_SAA_DELETE);
2959 
2960 			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
2961 			    "ibcm_write_service_record rval = %d, SID %llx",
2962 			    rval, sid);
2963 			if (rval != IBT_SUCCESS) {
2964 				/* this is not considered a reason to fail */
2965 				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
2966 				    "ibcm_write_service_record fails %d, "
2967 				    "sid %llx", rval, sid);
2968 			}
2969 			sid++;
2970 		}
2971 	}
2972 done:
2973 	ibcm_dec_hca_svc_cnt(hcap);
2974 	ibcm_dec_hca_acc_cnt(hcap);
2975 	kmem_free(sbindp, sizeof (*sbindp));
2976 
2977 	/* If this message isn't seen then ibt_unbind_service failed */
2978 	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");
2979 
2980 	return (IBT_SUCCESS);
2981 }
2982 
2983 /*
2984  * Simply pull off each binding from the list and unbind it.
2985  * If any of the unbind calls fail, we fail.
2986  */
2987 ibt_status_t
2988 ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
2989 {
2990 	ibt_status_t	status;
2991 	ibcm_svc_bind_t	*sbp;
2992 
2993 	mutex_enter(&ibcm_svc_info_lock);
2994 	sbp = NULL;
2995 
2996 	/* this compare keeps the loop from being infinite */
2997 	while (sbp != srv_hdl->svc_bind_list) {
2998 		sbp = srv_hdl->svc_bind_list;
2999 		mutex_exit(&ibcm_svc_info_lock);
3000 		status = ibt_unbind_service(srv_hdl, sbp);
3001 		if (status != IBT_SUCCESS)
3002 			return (status);
3003 		mutex_enter(&ibcm_svc_info_lock);
3004 		if (srv_hdl->svc_bind_list == NULL)
3005 			break;
3006 	}
3007 	mutex_exit(&ibcm_svc_info_lock);
3008 	return (IBT_SUCCESS);
3009 }
3010 
3011 /*
3012  * ibt_deregister_service()
3013  *	Deregister a service with the IBCM
3014  *
3015  * INPUTS:
3016  *	ibt_hdl		The IBT client handle returned to the client
3017  *			on an ibt_attach() call.
3018  *
3019  *	srv_hdl		The address of a service identification handle, used
3020  *			to de-register a service.
3021  * RETURN VALUES:
3022  *	IBT_SUCCESS	on success (or respective failure on error)
3023  */
3024 ibt_status_t
3025 ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
3026 {
3027 	ibcm_svc_info_t		*svcp;
3028 	ibcm_svc_lookup_t	svc;
3029 
3030 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p, %p)",
3031 	    ibt_hdl, srv_hdl);
3032 
3033 	mutex_enter(&ibcm_svc_info_lock);
3034 
3035 	if (srv_hdl->svc_bind_list != NULL) {
3036 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
3037 		    " srv_hdl %p still has bindings", srv_hdl);
3038 		mutex_exit(&ibcm_svc_info_lock);
3039 		return (IBT_CM_SERVICE_BUSY);
3040 	}
3041 	svc.sid = srv_hdl->svc_id;
3042 	svc.num_sids = 1;
3043 	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
3044 	    srv_hdl->svc_id, srv_hdl->svc_num_sids);
3045 
3046 #ifdef __lock_lint
3047 	ibcm_svc_compare(NULL, NULL);
3048 #endif
3049 	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
3050 	if (svcp != srv_hdl) {
3051 		mutex_exit(&ibcm_svc_info_lock);
3052 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
3053 		    "srv_hdl %p not found", srv_hdl);
3054 		return (IBT_INVALID_PARAM);
3055 	}
3056 	avl_remove(&ibcm_svc_avl_tree, svcp);
3057 
3058 	/* wait for active REQ/SREQ handling to be done */
3059 	svcp->svc_to_delete = 1;
3060 	while (svcp->svc_ref_cnt != 0)
3061 		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3062 
3063 	mutex_exit(&ibcm_svc_info_lock);
3064 
3065 	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
3066 		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);
3067 
3068 	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
3069 	kmem_free(srv_hdl, sizeof (*srv_hdl));
3070 
3071 	/* If this message isn't seen then ibt_deregister_service failed */
3072 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");
3073 
3074 	return (IBT_SUCCESS);
3075 }
3076 
3077 ibcm_status_t
3078 ibcm_ar_init(void)
3079 {
3080 	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
3081 	ibcm_svc_info_t *tmp_svcp;
3082 
3083 	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");
3084 
3085 	/* remove this special SID from the pool of available SIDs */
3086 	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
3087 		IBTF_DPRINTF_L2(cmlog, "ibcm_ar_init: "
3088 		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
3089 		return (IBCM_FAILURE);
3090 	}
3091 	mutex_enter(&ibcm_svc_info_lock);
3092 	ibcm_ar_svcinfop = tmp_svcp;
3093 	ibcm_ar_list = NULL;	/* no address records registered yet */
3094 	mutex_exit(&ibcm_svc_info_lock);
3095 	return (IBCM_SUCCESS);
3096 }
3097 
3098 ibcm_status_t
3099 ibcm_ar_fini(void)
3100 {
3101 	ibcm_ar_t	*ar_list;
3102 	ibcm_svc_info_t	*tmp_svcp;
3103 
3104 	mutex_enter(&ibcm_svc_info_lock);
3105 	ar_list = ibcm_ar_list;
3106 
3107 	if (ar_list == NULL &&
3108 	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
3109 	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
3110 		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
3111 		tmp_svcp = ibcm_ar_svcinfop;
3112 		mutex_exit(&ibcm_svc_info_lock);
3113 		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
3114 		return (IBCM_SUCCESS);
3115 	}
3116 	mutex_exit(&ibcm_svc_info_lock);
3117 	return (IBCM_FAILURE);
3118 }
3119 
3120 
3121 /*
3122  * Return to the caller:
3123  *	IBT_SUCCESS		Found a perfect match.
3124  *				*arpp is set to the record.
3125  *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
3126  *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
3127  *				found no record with same data.
3128  */
3129 static ibt_status_t
3130 ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
3131 {
3132 	ibcm_ar_t	*tmp;
3133 	int		i;
3134 
3135 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3136 	tmp = ibcm_ar_list;
3137 	while (tmp != NULL) {
3138 		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
3139 		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
3140 		    tmp->ar.ar_pkey == arp->ar_pkey) {
3141 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3142 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3143 					return (IBT_INCONSISTENT_AR);
3144 			*arpp = tmp;
3145 			return (IBT_SUCCESS);
3146 		} else {
3147 			/* if all the data bytes match, we have inconsistency */
3148 			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
3149 				if (tmp->ar.ar_data[i] != arp->ar_data[i])
3150 					break;
3151 			if (i == IBCM_DAPL_ATS_NBYTES)
3152 				return (IBT_INCONSISTENT_AR);
3153 			/* try next address record */
3154 		}
3155 		tmp = tmp->ar_link;
3156 	}
3157 	return (IBT_AR_NOT_REGISTERED);
3158 }
3159 
3160 ibt_status_t
3161 ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3162 {
3163 	ibcm_ar_t		*found;
3164 	ibcm_ar_t		*tmp;
3165 	ibt_status_t		status;
3166 	ibt_status_t		s1, s2;
3167 	char			*s;
3168 	ibcm_ar_ref_t		*hdlp;
3169 	ibcm_ar_t		*new;
3170 	ibcm_ar_t		**linkp;
3171 	ibtl_cm_hca_port_t	cm_port;
3172 	uint16_t		pkey_ix;
3173 	ibcm_hca_info_t		*hcap;
3174 	ibmf_saa_handle_t	saa_handle;
3175 	sa_service_record_t	*srv_recp;
3176 	uint64_t		gid_ored;
3177 
3178 	IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
3179 	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
3180 	    (longlong_t)arp->ar_gid.gid_guid);
3181 
3182 	/*
3183 	 * If P_Key is 0, but GID is not, this query is invalid.
3184 	 * If GID is 0, but P_Key is not, this query is invalid.
3185 	 */
3186 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3187 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3188 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3189 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3190 		    "GID/P_Key is not valid");
3191 		return (IBT_INVALID_PARAM);
3192 	}
3193 
3194 	/* assume success, so these might be needed */
3195 	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
3196 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
3197 
3198 	mutex_enter(&ibcm_svc_info_lock);
3199 	/* search for existing GID/pkey (there can be at most 1) */
3200 	status = ibcm_search_ar(arp, &found);
3201 	if (status == IBT_INCONSISTENT_AR) {
3202 		mutex_exit(&ibcm_svc_info_lock);
3203 		kmem_free(new, sizeof (*new));
3204 		kmem_free(hdlp, sizeof (*hdlp));
3205 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3206 		    "address record is inconsistent with a known one");
3207 		return (IBT_INCONSISTENT_AR);
3208 	} else if (status == IBT_SUCCESS) {
3209 		if (found->ar_flags == IBCM_AR_INITING) {
3210 			found->ar_waiters++;
3211 			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
3212 			found->ar_waiters--;
3213 		}
3214 		if (found->ar_flags == IBCM_AR_FAILED) {
3215 			if (found->ar_waiters == 0) {
3216 				cv_destroy(&found->ar_cv);
3217 				kmem_free(found, sizeof (*found));
3218 			}
3219 			mutex_exit(&ibcm_svc_info_lock);
3220 			kmem_free(new, sizeof (*new));
3221 			kmem_free(hdlp, sizeof (*hdlp));
3222 			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
3223 		}
3224 		hdlp->ar_ibt_hdl = ibt_hdl;
3225 		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
3226 		found->ar_ibt_hdl_list = hdlp;
3227 		mutex_exit(&ibcm_svc_info_lock);
3228 		kmem_free(new, sizeof (*new));
3229 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3230 		return (IBT_SUCCESS);
3231 	} else {
3232 		ASSERT(status == IBT_AR_NOT_REGISTERED);
3233 	}
3234 	hdlp->ar_ref_link = NULL;
3235 	hdlp->ar_ibt_hdl = ibt_hdl;
3236 	new->ar_ibt_hdl_list = hdlp;
3237 	new->ar = *arp;
3238 	new->ar_flags = IBCM_AR_INITING;
3239 	new->ar_waiters = 0;
3240 	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
3241 	new->ar_link = ibcm_ar_list;
3242 	ibcm_ar_list = new;
3243 
3244 	/* verify GID/pkey is valid for a local port, etc. */
3245 	hcap = NULL;
3246 	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
3247 		!= IBT_SUCCESS ||
3248 	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
3249 		arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
3250 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
3251 		cv_destroy(&new->ar_cv);
3252 		ibcm_ar_list = new->ar_link;
3253 		mutex_exit(&ibcm_svc_info_lock);
3254 		kmem_free(new, sizeof (*new));
3255 		kmem_free(hdlp, sizeof (*hdlp));
3256 		status = IBT_INVALID_PARAM;
3257 		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
3258 			s = "PORT DOWN";
3259 			status = IBT_HCA_PORT_NOT_ACTIVE;
3260 		} else if (s1 != IBT_SUCCESS)
3261 			s = "GID not found";
3262 		else if (s2 != IBT_SUCCESS)
3263 			s = "PKEY not found";
3264 		else
3265 			s = "CM could not find its HCA entry";
3266 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
3267 		    s, status);
3268 		return (status);
3269 	}
3270 	mutex_exit(&ibcm_svc_info_lock);
3271 	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);
3272 
3273 	/* create service record */
3274 	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
3275 	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
3276 	srv_recp->ServiceP_Key = arp->ar_pkey;
3277 	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
3278 	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
3279 	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
3280 	srv_recp->ServiceGID = arp->ar_gid;
3281 	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
3282 	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;
3283 
3284 	/* insert service record into the SA */
3285 
3286 	IBCM_DUMP_SERVICE_REC(srv_recp);
3287 
3288 	if (saa_handle != NULL)
3289 		status = ibcm_write_service_record(saa_handle, srv_recp,
3290 		    IBMF_SAA_UPDATE);
3291 	else
3292 		status = IBT_HCA_PORT_NOT_ACTIVE;
3293 
3294 	if (status != IBT_SUCCESS) {
3295 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
3296 		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
3297 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
3298 		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
3299 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3300 
3301 		kmem_free(srv_recp, sizeof (*srv_recp));
3302 		kmem_free(hdlp, sizeof (*hdlp));
3303 
3304 		mutex_enter(&ibcm_svc_info_lock);
3305 		linkp = &ibcm_ar_list;
3306 		tmp = *linkp;
3307 		while (tmp != NULL) {
3308 			if (tmp == new) {
3309 				*linkp = new->ar_link;
3310 				break;
3311 			}
3312 			linkp = &tmp->ar_link;
3313 			tmp = *linkp;
3314 		}
3315 		if (new->ar_waiters > 0) {
3316 			new->ar_flags = IBCM_AR_FAILED;
3317 			cv_broadcast(&new->ar_cv);
3318 			mutex_exit(&ibcm_svc_info_lock);
3319 		} else {
3320 			cv_destroy(&new->ar_cv);
3321 			mutex_exit(&ibcm_svc_info_lock);
3322 			kmem_free(new, sizeof (*new));
3323 		}
3324 		ibcm_dec_hca_acc_cnt(hcap);
3325 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
3326 		    "IBMF_SAA failed to write address record");
3327 	} else {					/* SUCCESS */
3328 		uint8_t		*b;
3329 
3330 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: SUCCESS for gid "
3331 		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
3332 		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
3333 		b = arp->ar_data;
3334 
3335 		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar:"
3336 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3337 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3338 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3339 		mutex_enter(&ibcm_svc_info_lock);
3340 		new->ar_srv_recp = srv_recp;
3341 		new->ar_saa_handle = saa_handle;
3342 		new->ar_port = cm_port.hp_port;
3343 		new->ar_hcap = hcap;
3344 		new->ar_flags = IBCM_AR_SUCCESS;
3345 		if (new->ar_waiters > 0)
3346 			cv_broadcast(&new->ar_cv);
3347 		mutex_exit(&ibcm_svc_info_lock);
3348 		ibtl_cm_change_service_cnt(ibt_hdl, 1);
3349 		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
3350 	}
3351 	return (status);
3352 }
3353 
3354 ibt_status_t
3355 ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
3356 {
3357 	ibcm_ar_t		*found;
3358 	ibcm_ar_t		*tmp;
3359 	ibcm_ar_t		**linkp;
3360 	ibcm_ar_ref_t		*hdlp;
3361 	ibcm_ar_ref_t		**hdlpp;
3362 	ibt_status_t		status;
3363 	ibmf_saa_handle_t	saa_handle;
3364 	sa_service_record_t	*srv_recp;
3365 	uint64_t		gid_ored;
3366 
3367 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
3368 	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: gid %llx:%llx",
3369 	    (longlong_t)arp->ar_gid.gid_prefix,
3370 	    (longlong_t)arp->ar_gid.gid_guid);
3371 
3372 	/*
3373 	 * If P_Key is 0, but GID is not, this query is invalid.
3374 	 * If GID is 0, but P_Key is not, this query is invalid.
3375 	 */
3376 	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
3377 	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
3378 	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
3379 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3380 		    "GID/P_Key is not valid");
3381 		return (IBT_INVALID_PARAM);
3382 	}
3383 
3384 	mutex_enter(&ibcm_svc_info_lock);
3385 	/* search for existing GID/pkey (there can be at most 1) */
3386 	status = ibcm_search_ar(arp, &found);
3387 	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
3388 		mutex_exit(&ibcm_svc_info_lock);
3389 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3390 		    "address record not found");
3391 		return (IBT_AR_NOT_REGISTERED);
3392 	}
3393 	ASSERT(status == IBT_SUCCESS);
3394 
3395 	hdlpp = &found->ar_ibt_hdl_list;
3396 	hdlp = *hdlpp;
3397 	while (hdlp != NULL) {
3398 		if (hdlp->ar_ibt_hdl == ibt_hdl)
3399 			break;
3400 		hdlpp = &hdlp->ar_ref_link;
3401 		hdlp = *hdlpp;
3402 	}
3403 	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
3404 		mutex_exit(&ibcm_svc_info_lock);
3405 		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3406 		    "address record found, but not for this client");
3407 		return (IBT_AR_NOT_REGISTERED);
3408 	}
3409 	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
3410 	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
3411 		/* last entry was removed */
3412 		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
3413 		saa_handle = found->ar_saa_handle;
3414 		srv_recp = found->ar_srv_recp;
3415 
3416 		/* wait if this service record is being rewritten */
3417 		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
3418 			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
3419 		mutex_exit(&ibcm_svc_info_lock);
3420 
3421 		/* remove service record */
3422 		status = ibcm_write_service_record(saa_handle, srv_recp,
3423 		    IBMF_SAA_DELETE);
3424 		if (status != IBT_SUCCESS)
3425 			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3426 			    "IBMF_SAA failed to delete address record");
3427 		mutex_enter(&ibcm_svc_info_lock);
3428 		if (found->ar_waiters == 0) {	/* still no waiters */
3429 			linkp = &ibcm_ar_list;
3430 			tmp = *linkp;
3431 			while (tmp != found) {
3432 				linkp = &tmp->ar_link;
3433 				tmp = *linkp;
3434 			}
3435 			*linkp = tmp->ar_link;
3436 			ibcm_dec_hca_acc_cnt(found->ar_hcap);
3437 			kmem_free(srv_recp, sizeof (*srv_recp));
3438 			cv_destroy(&found->ar_cv);
3439 			kmem_free(found, sizeof (*found));
3440 		} else {
3441 			/* add service record back in for the waiters */
3442 			mutex_exit(&ibcm_svc_info_lock);
3443 			status = ibcm_write_service_record(saa_handle, srv_recp,
3444 			    IBMF_SAA_UPDATE);
3445 			mutex_enter(&ibcm_svc_info_lock);
3446 			if (status == IBT_SUCCESS)
3447 				found->ar_flags = IBCM_AR_SUCCESS;
3448 			else {
3449 				found->ar_flags = IBCM_AR_FAILED;
3450 				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
3451 				    "IBMF_SAA failed to write address record");
3452 			}
3453 			cv_broadcast(&found->ar_cv);
3454 		}
3455 	}
3456 	mutex_exit(&ibcm_svc_info_lock);
3457 	kmem_free(hdlp, sizeof (*hdlp));
3458 	ibtl_cm_change_service_cnt(ibt_hdl, -1);
3459 	return (status);
3460 }
3461 
3462 ibt_status_t
3463 ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
3464 {
3465 	sa_service_record_t	svcrec_req;
3466 	sa_service_record_t	*svcrec_resp;
3467 	void			*results_p;
3468 	uint64_t		component_mask = 0;
3469 	uint64_t		gid_ored;
3470 	size_t			length;
3471 	int			num_rec;
3472 	int			i;
3473 	ibmf_saa_access_args_t	access_args;
3474 	ibt_status_t		retval;
3475 	ibtl_cm_hca_port_t	cm_port;
3476 	ibcm_hca_info_t		*hcap;
3477 	ibmf_saa_handle_t	saa_handle;
3478 
3479 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
3480 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
3481 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
3482 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
3483 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
3484 	    (longlong_t)queryp->ar_gid.gid_prefix,
3485 	    (longlong_t)queryp->ar_gid.gid_guid);
3486 
3487 	/*
3488 	 * If P_Key is 0, but GID is not, this query is invalid.
3489 	 * If GID is 0, but P_Key is not, this query is invalid.
3490 	 */
3491 	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
3492 	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
3493 	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
3494 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
3495 		return (IBT_INVALID_PARAM);
3496 	}
3497 
3498 	hcap = NULL;
3499 	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
3500 	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
3501 	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
3502 		if (hcap != NULL)
3503 			ibcm_dec_hca_acc_cnt(hcap);
3504 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
3505 		return (IBT_INVALID_PARAM);
3506 	}
3507 
3508 	bzero(&svcrec_req, sizeof (svcrec_req));
3509 
3510 	/* Is GID/P_Key Specified. */
3511 	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
3512 		svcrec_req.ServiceP_Key = queryp->ar_pkey;
3513 		component_mask |= SA_SR_COMPMASK_PKEY;
3514 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
3515 		    queryp->ar_pkey);
3516 		svcrec_req.ServiceGID = queryp->ar_gid;
3517 		component_mask |= SA_SR_COMPMASK_GID;
3518 		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
3519 		    (longlong_t)queryp->ar_gid.gid_prefix,
3520 		    (longlong_t)queryp->ar_gid.gid_guid);
3521 	}
3522 
3523 	/* Is ServiceData Specified. */
3524 	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
3525 		if (queryp->ar_data[i] != 0) {
3526 			bcopy(queryp->ar_data, svcrec_req.ServiceData,
3527 			    IBCM_DAPL_ATS_NBYTES);
3528 			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
3529 							/* components */
3530 			break;
3531 		}
3532 	}
3533 
3534 	/* Service Name */
3535 	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
3536 	component_mask |= SA_SR_COMPMASK_NAME;
3537 
3538 	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
3539 	component_mask |= SA_SR_COMPMASK_ID;
3540 
3541 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3542 	    "Perform SA Access: Mask: 0x%X", component_mask);
3543 
3544 	/*
3545 	 * Call in SA Access retrieve routine to get Service Records.
3546 	 *
3547 	 * SA Access framework allocated memory for the "results_p".
3548 	 * Make sure to deallocate once we are done with the results_p.
3549 	 * The size of the buffer allocated will be as returned in
3550 	 * "length" field.
3551 	 */
3552 	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
3553 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
3554 	access_args.sq_component_mask = component_mask;
3555 	access_args.sq_template = &svcrec_req;
3556 	access_args.sq_template_length = sizeof (sa_service_record_t);
3557 	access_args.sq_callback = NULL;
3558 	access_args.sq_callback_arg = NULL;
3559 
3560 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
3561 	    &results_p);
3562 
3563 	ibcm_dec_hca_acc_cnt(hcap);
3564 	if (retval != IBT_SUCCESS) {
3565 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
3566 		return (retval);
3567 	}
3568 
3569 	num_rec = length / sizeof (sa_service_record_t);
3570 
3571 	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
3572 	    "Found %d Service Records.", num_rec);
3573 
3574 	/* Validate the returned number of records. */
3575 	if ((results_p != NULL) && (num_rec > 0)) {
3576 		uint8_t		*b;
3577 
3578 		/* Just return info from the first service record. */
3579 		svcrec_resp = (sa_service_record_t *)results_p;
3580 
3581 		/* The Service GID and Service ID */
3582 		resultp->ar_gid = svcrec_resp->ServiceGID;
3583 		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
3584 		bcopy(svcrec_resp->ServiceData,
3585 		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);
3586 
3587 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3588 		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
3589 		    (longlong_t)resultp->ar_gid.gid_prefix,
3590 		    (longlong_t)resultp->ar_gid.gid_guid);
3591 		b = resultp->ar_data;
3592 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar:"
3593 		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
3594 		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
3595 		    b[10], b[11], b[12], b[13], b[14], b[15]);
3596 
3597 		/* Deallocate the memory for results_p. */
3598 		kmem_free(results_p, length);
3599 		if (num_rec > 1)
3600 			retval = IBT_MULTIPLE_AR;
3601 		else
3602 			retval = IBT_SUCCESS;
3603 	} else {
3604 		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
3605 		    "ibmf_sa_access found 0 matching records");
3606 		retval = IBT_AR_NOT_REGISTERED;
3607 	}
3608 	return (retval);
3609 }
3610 
3611 /* mark all ATS service records associated with the port */
3612 static void
3613 ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
3614 {
3615 	ibcm_ar_t	*tmp;
3616 
3617 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3618 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3619 		if (tmp->ar_hcap->hca_guid == hca_guid &&
3620 		    tmp->ar_port == port) {
3621 			/* even if it's busy, we mark it for rewrite */
3622 			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
3623 		}
3624 	}
3625 }
3626 
3627 /* rewrite all ATS service records */
3628 static int
3629 ibcm_rewrite_ar(void)
3630 {
3631 	ibcm_ar_t		*tmp;
3632 	ibmf_saa_handle_t	saa_handle;
3633 	sa_service_record_t	*srv_recp;
3634 	ibt_status_t		rval;
3635 	int			did_something = 0;
3636 
3637 	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
3638 check_for_work:
3639 	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
3640 		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
3641 			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
3642 			saa_handle = tmp->ar_saa_handle;
3643 			srv_recp = tmp->ar_srv_recp;
3644 			mutex_exit(&ibcm_svc_info_lock);
3645 			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
3646 			    "rewriting ar @ %p", tmp);
3647 			did_something = 1;
3648 			rval = ibcm_write_service_record(saa_handle, srv_recp,
3649 			    IBMF_SAA_UPDATE);
3650 			if (rval != IBT_SUCCESS)
3651 				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
3652 				    "ibcm_write_service_record failed: "
3653 				    "status = %d", rval);
3654 			mutex_enter(&ibcm_svc_info_lock);
3655 			/* if it got marked again, then we want to rewrite */
3656 			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
3657 				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
3658 			/* in case there was a waiter... */
3659 			cv_broadcast(&ibcm_svc_info_cv);
3660 			goto check_for_work;
3661 		}
3662 	}
3663 	return (did_something);
3664 }
3665 
3666 static void
3667 ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
3668 {
3669 	ibcm_hca_info_t		*hcap;
3670 	ib_svc_id_t		sid, start_sid, end_sid;
3671 	ibmf_saa_handle_t	saa_handle;
3672 	sa_service_record_t	srv_rec;
3673 	ibt_status_t		rval;
3674 
3675 	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
3676 	if (hcap == NULL) {
3677 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3678 		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
3679 		return;
3680 	}
3681 
3682 	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
3683 	if (saa_handle == NULL) {
3684 		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
3685 		    "saa_handle is NULL");
3686 		ibcm_dec_hca_acc_cnt(hcap);
3687 		return;
3688 	}
3689 
3690 	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
3691 	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
3692 	    sbindp->sbind_gid.gid_guid);
3693 
3694 	bzero(&srv_rec, sizeof (srv_rec));
3695 
3696 	srv_rec.ServiceLease = sbindp->sbind_lease;
3697 	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
3698 	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
3699 	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
3700 	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
3701 	srv_rec.ServiceGID = sbindp->sbind_gid;
3702 
3703 	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);
3704 
3705 	/* insert srv record into the SA */
3706 	start_sid = srv_hdl->svc_id;
3707 	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
3708 	for (sid = start_sid; sid <= end_sid; sid++) {
3709 		srv_rec.ServiceID = sid;
3710 
3711 		rval = ibcm_write_service_record(saa_handle, &srv_rec,
3712 		    IBMF_SAA_UPDATE);
3713 
3714 		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
3715 		    "ibcm_write_service_record, SvcId = %llX, "
3716 		    "rval = %d", (longlong_t)sid, rval);
3717 		if (rval != IBT_SUCCESS) {
3718 			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
3719 			    " ibcm_write_service_record fails %d sid %llX",
3720 			    rval, (longlong_t)sid);
3721 		}
3722 	}
3723 	ibcm_dec_hca_acc_cnt(hcap);
3724 }
3725 
3726 /*
3727  * Task to mark all service records as needing to be rewritten to the SM/SA.
3728  * This task does not return until all of them have been rewritten.
3729  */
3730 void
3731 ibcm_service_record_rewrite_task(void *arg)
3732 {
3733 	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
3734 	ib_guid_t	hca_guid = pup->pup_hca_guid;
3735 	uint8_t		port = pup->pup_port;
3736 	ibcm_svc_info_t	*svcp;
3737 	ibcm_svc_bind_t	*sbp;
3738 	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
3739 	static int	task_is_running = 0;
3740 
3741 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task STARTED "
3742 	    "for hca_guid %llX, port %d", hca_guid, port);
3743 
3744 	mutex_enter(&ibcm_svc_info_lock);
3745 	ibcm_mark_ar(hca_guid, port);
3746 	for (svcp = avl_first(avl_tree); svcp != NULL;
3747 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3748 		sbp = svcp->svc_bind_list;
3749 		while (sbp != NULL) {
3750 			if (sbp->sbind_pkey != 0 &&
3751 			    sbp->sbind_port == port &&
3752 			    sbp->sbind_hcaguid == hca_guid) {
3753 				/* even if it's busy, we mark it for rewrite */
3754 				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
3755 			}
3756 			sbp = sbp->sbind_link;
3757 		}
3758 	}
3759 	if (task_is_running) {
3760 		/* let the other task thread finish the work */
3761 		mutex_exit(&ibcm_svc_info_lock);
3762 		return;
3763 	}
3764 	task_is_running = 1;
3765 
3766 	(void) ibcm_rewrite_ar();
3767 
3768 check_for_work:
3769 	for (svcp = avl_first(avl_tree); svcp != NULL;
3770 	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
3771 		sbp = svcp->svc_bind_list;
3772 		while (sbp != NULL) {
3773 			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
3774 				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
3775 				mutex_exit(&ibcm_svc_info_lock);
3776 				ibcm_rewrite_svc_record(svcp, sbp);
3777 				mutex_enter(&ibcm_svc_info_lock);
3778 				/* if it got marked again, we want to rewrite */
3779 				if (sbp->sbind_rewrite_state ==
3780 				    IBCM_REWRITE_BUSY)
3781 					sbp->sbind_rewrite_state =
3782 					    IBCM_REWRITE_IDLE;
3783 				/* in case there was a waiter... */
3784 				cv_broadcast(&ibcm_svc_info_cv);
3785 				goto check_for_work;
3786 			}
3787 			sbp = sbp->sbind_link;
3788 		}
3789 	}
3790 	/*
3791 	 * If there were no service records to write, and we failed to
3792 	 * have to rewrite any more ATS service records, then we're done.
3793 	 */
3794 	if (ibcm_rewrite_ar() != 0)
3795 		goto check_for_work;
3796 	task_is_running = 0;
3797 	mutex_exit(&ibcm_svc_info_lock);
3798 
3799 	IBTF_DPRINTF_L2(cmlog, "ibcm_service_record_rewrite_task DONE");
3800 	kmem_free(pup, sizeof (ibcm_port_up_t));
3801 }
3802 
3803 
3804 /*
3805  * Function:
3806  * 	ibt_cm_proceed
3807  *
3808  * Verifies the arguments and dispatches the cm state machine processing
3809  * via taskq
3810  */
3811 
3812 ibt_status_t
3813 ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
3814     ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
3815     void *priv_data, ibt_priv_data_len_t priv_data_len)
3816 {
3817 	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
3818 	ibcm_proceed_targs_t	*proceed_targs;
3819 	ibcm_proceed_error_t	proceed_error;
3820 
3821 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
3822 	    "session_id %p", statep->channel, event, status, session_id);
3823 
3824 	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
3825 	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
3826 	    priv_data, priv_data_len);
3827 
3828 	/* validate session_id and status */
3829 	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
3830 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
3831 		return (IBT_INVALID_PARAM);
3832 	}
3833 
3834 	/* If priv data len specified, then priv_data cannot be NULL */
3835 	if ((priv_data_len > 0) && (priv_data == NULL))
3836 		return (IBT_INVALID_PARAM);
3837 
3838 	proceed_error = IBCM_PROCEED_INVALID_NONE;
3839 
3840 	mutex_enter(&statep->state_mutex);
3841 	if (event == IBT_CM_EVENT_REQ_RCV) {
3842 
3843 		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
3844 		    (statep->state != IBCM_STATE_MRA_SENT))
3845 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3846 		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
3847 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3848 
3849 	} else if (event == IBT_CM_EVENT_REP_RCV) {
3850 		if ((statep->state != IBCM_STATE_REP_RCVD) &&
3851 		    (statep->state != IBCM_STATE_MRA_REP_SENT))
3852 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3853 		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
3854 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3855 	} else if (event == IBT_CM_EVENT_LAP_RCV) {
3856 		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
3857 		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
3858 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3859 		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
3860 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3861 	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
3862 		if (statep->state != IBCM_STATE_DREQ_RCVD)
3863 			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
3864 		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
3865 			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
3866 	} else {
3867 			proceed_error = IBCM_PROCEED_INVALID_EVENT;
3868 	}
3869 
3870 	/* if there is an error, print an error message and return */
3871 	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
3872 		mutex_exit(&statep->state_mutex);
3873 		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
3874 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3875 			    "Invalid Event/State combination specified",
3876 			    statep->channel);
3877 			return (IBT_INVALID_PARAM);
3878 		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
3879 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3880 			    "Invalid Event/priv len combination specified",
3881 			    statep->channel);
3882 			return (IBT_INVALID_PARAM);
3883 		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
3884 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3885 			    "Invalid Event specified", statep->channel);
3886 			return (IBT_INVALID_PARAM);
3887 		} else {
3888 			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
3889 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
3890 			    "IBT_CM_EVENT_LAP_RCV not supported",
3891 			    statep->channel);
3892 			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
3893 			return (IBT_APM_NOT_SUPPORTED);
3894 		}
3895 	}
3896 
3897 
3898 	/* wait until client's CM handler returns DEFER status back to CM */
3899 
3900 	while (statep->clnt_proceed == IBCM_BLOCK) {
3901 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
3902 		    "return of client's cm handler", statep->channel);
3903 		cv_wait(&statep->block_client_cv, &statep->state_mutex);
3904 	}
3905 
3906 	if (statep->clnt_proceed == IBCM_FAIL) {
3907 		mutex_exit(&statep->state_mutex);
3908 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
3909 		    "client returned non-DEFER status from cm handler",
3910 		    statep->channel);
3911 		return (IBT_CHAN_STATE_INVALID);
3912 	}
3913 
3914 	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
3915 	statep->clnt_proceed = IBCM_FAIL;
3916 	mutex_exit(&statep->state_mutex);
3917 
3918 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
3919 
3920 	/* the state machine processing is done in a separate thread */
3921 
3922 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
3923 	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
3924 	    KM_SLEEP);
3925 
3926 	proceed_targs->event  = event;
3927 	proceed_targs->status = status;
3928 	proceed_targs->priv_data_len = priv_data_len;
3929 
3930 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
3931 
3932 	proceed_targs->tst.rc.statep = statep;
3933 	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
3934 	    sizeof (ibt_cm_proceed_reply_t));
3935 
3936 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
3937 
3938 	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
3939 	    proceed_targs, TQ_SLEEP);
3940 
3941 	return (IBT_SUCCESS);
3942 }
3943 
3944 /*
3945  * Function:
3946  * 	ibcm_proceed_via_taskq
3947  *
3948  * Called from taskq, dispatched by ibt_cm_proceed
3949  * Completes the cm state processing for ibt_cm_proceed
3950  */
3951 void
3952 ibcm_proceed_via_taskq(void *targs)
3953 {
3954 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
3955 	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
3956 	ibt_cm_reason_t reject_reason;
3957 	uint8_t arej_len;
3958 	ibcm_status_t response;
3959 	ibcm_clnt_reply_info_t clnt_info;
3960 
3961 	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
3962 	clnt_info.priv_data = proceed_targs->priv_data;
3963 	clnt_info.priv_data_len = proceed_targs->priv_data_len;
3964 
3965 	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
3966 	    statep->channel, targs);
3967 
3968 	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
3969 		response =
3970 		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
3971 		    &clnt_info, &reject_reason, &arej_len,
3972 		    (ibcm_req_msg_t *)statep->defer_cm_msg);
3973 
3974 		ibcm_handle_cep_req_response(statep, response, reject_reason,
3975 		    arej_len);
3976 
3977 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
3978 		response =
3979 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
3980 		    &clnt_info, &reject_reason, &arej_len,
3981 		    (ibcm_rep_msg_t *)statep->defer_cm_msg);
3982 
3983 		ibcm_handle_cep_rep_response(statep, response, reject_reason,
3984 		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);
3985 
3986 	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
3987 		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
3988 		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
3989 		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));
3990 
3991 		ibcm_post_apr_mad(statep);
3992 
3993 	} else {
3994 		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
3995 		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
3996 		    proceed_targs->priv_data_len);
3997 	}
3998 
3999 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4000 }
4001 
4002 /*
4003  * Function:
4004  * 	ibt_cm_ud_proceed
4005  *
4006  * Verifies the arguments and dispatches the cm state machine processing
4007  * via taskq
4008  */
4009 ibt_status_t
4010 ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
4011     ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
4012     void *priv_data, ibt_priv_data_len_t priv_data_len)
4013 {
4014 	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
4015 	ibcm_proceed_targs_t	*proceed_targs;
4016 	ibt_qp_query_attr_t	qp_attr;
4017 	ibt_status_t		retval;
4018 
4019 	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
4020 	    "ud_channel %p ", session_id, ud_channel);
4021 
4022 	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
4023 	    "priv_data_len %x",  status, priv_data, priv_data_len);
4024 
4025 	/* validate session_id and status */
4026 	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
4027 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
4028 		return (IBT_INVALID_PARAM);
4029 	}
4030 
4031 	/* If priv data len specified, then priv_data cannot be NULL */
4032 	if ((priv_data_len > 0) && (priv_data == NULL))
4033 		return (IBT_INVALID_PARAM);
4034 
4035 	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
4036 		return (IBT_INVALID_PARAM);
4037 
4038 	/* retrieve qpn and qkey from ud channel */
4039 
4040 	/* validate event and statep's state */
4041 
4042 	if (status == IBT_CM_ACCEPT) {
4043 		retval = ibt_query_qp(ud_channel, &qp_attr);
4044 		if ((retval != IBT_SUCCESS) ||
4045 		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
4046 			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
4047 			    "Failed to retrieve QPN from the channel: %d",
4048 			    retval);
4049 			return (IBT_INVALID_PARAM);
4050 		}
4051 	}
4052 
4053 
4054 	mutex_enter(&ud_statep->ud_state_mutex);
4055 
4056 	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
4057 		mutex_exit(&ud_statep->ud_state_mutex);
4058 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
4059 		    "specified");
4060 		return (IBT_INVALID_PARAM);
4061 	}
4062 
4063 	/* wait until client's CM handler returns DEFER status back to CM */
4064 
4065 	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
4066 		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
4067 		    " of client's ud cm handler");
4068 		cv_wait(&ud_statep->ud_block_client_cv,
4069 		    &ud_statep->ud_state_mutex);
4070 	}
4071 
4072 	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
4073 		mutex_exit(&ud_statep->ud_state_mutex);
4074 		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
4075 		    "returned non-DEFER status from cm handler");
4076 		return (IBT_INVALID_PARAM);
4077 	}
4078 
4079 	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
4080 	ud_statep->ud_clnt_proceed = IBCM_FAIL;
4081 	mutex_exit(&ud_statep->ud_state_mutex);
4082 
4083 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))
4084 
4085 	/* the state machine processing is done in a separate thread */
4086 
4087 	/* proceed_targs is freed in ibcm_proceed_via_taskq */
4088 	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
4089 	    KM_SLEEP);
4090 
4091 	proceed_targs->status = status;
4092 	proceed_targs->priv_data_len = priv_data_len;
4093 
4094 	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);
4095 
4096 	if (status == IBT_CM_ACCEPT) {
4097 		proceed_targs->tst.ud.ud_qkey =
4098 		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
4099 		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
4100 	}
4101 
4102 	proceed_targs->tst.ud.ud_statep = ud_statep;
4103 
4104 	/* copy redirect info based on status */
4105 	if (status == IBT_CM_REDIRECT)
4106 		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
4107 		    sizeof (ibt_redirect_info_t));
4108 
4109 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))
4110 
4111 	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
4112 	    proceed_targs, TQ_SLEEP);
4113 
4114 	return (IBT_SUCCESS);
4115 }
4116 
4117 /*
4118  * Function:
4119  * 	ibcm_ud_proceed_via_taskq
4120  *
4121  * Called from taskq, dispatched by ibt_cm_ud_proceed
4122  * Completes the cm state processing for ibt_cm_ud_proceed
4123  */
4124 void
4125 ibcm_ud_proceed_via_taskq(void *targs)
4126 {
4127 	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
4128 	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
4129 	ibcm_ud_clnt_reply_info_t ud_clnt_info;
4130 	ibt_sidr_status_t	sidr_status;
4131 
4132 	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);
4133 
4134 	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
4135 	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
4136 	ud_clnt_info.priv_data = proceed_targs->priv_data;
4137 	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
4138 	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;
4139 
4140 	/* validate event and statep's state */
4141 	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
4142 	    &ud_clnt_info, &sidr_status,
4143 	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));
4144 
4145 	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);
4146 
4147 	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
4148 	mutex_enter(&ud_statep->ud_state_mutex);
4149 	IBCM_UD_REF_CNT_DECR(ud_statep);
4150 	mutex_exit(&ud_statep->ud_state_mutex);
4151 
4152 	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
4153 }
4154 
4155 /*
4156  * Function:
4157  *	ibt_set_alt_path
4158  * Input:
4159  *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
4160  *
4161  *	mode		Execute in blocking or non blocking mode.
4162  *
4163  *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
4164  *			ibt_get_alt_path(9F) call that specifies the new
4165  *			alternate path.
4166  *
4167  *	priv_data       A pointer to a buffer specified by caller for the
4168  *			private data in the outgoing CM Load Alternate Path
4169  *			(LAP) message sent to the remote host. This can be NULL
4170  *			if no private data is available to communicate to the
4171  *			remote node.
4172  *
4173  *	priv_data_len   Length of valid data in priv_data, this should be less
4174  *			than or equal to IBT_LAP_PRIV_DATA_SZ.
4175  *
4176  * Output:
4177  *	ret_args	If called in blocking mode, points to a return argument
4178  *			structure of type ibt_ap_returns_t.
4179  *
4180  * Returns:
4181  *	IBT_SUCCESS on Success else appropriate error.
4182  * Description:
4183  *	Load the specified alternate path. Causes the CM to send an LAP message
4184  *	to the remote node.
4185  *	Can only be called on a previously opened RC channel.
4186  */
4187 ibt_status_t
4188 ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
4189     ibt_alt_path_info_t *alt_path, void *priv_data,
4190     ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
4191 {
4192 	ibmf_handle_t		ibmf_hdl;
4193 	ibt_status_t		status = IBT_SUCCESS;
4194 	ibcm_lap_msg_t		*lap_msgp;
4195 	ibcm_hca_info_t		*hcap;
4196 	ibcm_state_data_t	*statep;
4197 	uint8_t			port_no;
4198 	ib_lid_t		alternate_slid;
4199 	ibt_priv_data_len_t	len;
4200 	ib_lid_t		base_lid;
4201 	boolean_t		alt_grh;
4202 
4203 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
4204 	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);
4205 
4206 	/* validate channel */
4207 	if (IBCM_INVALID_CHANNEL(channel)) {
4208 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
4209 		return (IBT_CHAN_HDL_INVALID);
4210 	}
4211 
4212 	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
4213 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4214 		    "Invalid Channel type: Applicable only to RC Channel");
4215 		return (IBT_CHAN_SRV_TYPE_INVALID);
4216 	}
4217 
4218 	if (mode == IBT_NONBLOCKING) {
4219 		if (ret_args != NULL) {
4220 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4221 			    "ret_args should be NULL when called in "
4222 			    "non-blocking mode");
4223 			return (IBT_INVALID_PARAM);
4224 		}
4225 	} else if (mode == IBT_BLOCKING) {
4226 		if (ret_args == NULL) {
4227 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4228 			    "ret_args should be Non-NULL when called in "
4229 			    "blocking mode");
4230 			return (IBT_INVALID_PARAM);
4231 		}
4232 		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
4233 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4234 			    "expected private data length is too large");
4235 			return (IBT_INVALID_PARAM);
4236 		}
4237 		if ((ret_args->ap_priv_data_len > 0) &&
4238 		    (ret_args->ap_priv_data == NULL)) {
4239 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4240 			    "apr_priv_data_len > 0, but apr_priv_data NULL");
4241 			return (IBT_INVALID_PARAM);
4242 		}
4243 	} else { /* any other mode is not valid for ibt_set_alt_path */
4244 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4245 		    "invalid mode %x specified", mode);
4246 		return (IBT_INVALID_PARAM);
4247 	}
4248 
4249 	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
4250 		return (IBT_INVALID_PARAM);
4251 
4252 	/* get the statep */
4253 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4254 	if (statep == NULL) {
4255 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
4256 		return (IBT_CM_FAILURE);
4257 	}
4258 
4259 	mutex_enter(&statep->state_mutex);
4260 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4261 	IBCM_REF_CNT_INCR(statep);
4262 	mutex_exit(&statep->state_mutex);
4263 
4264 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);
4265 
4266 	hcap = statep->hcap;
4267 
4268 	/* HCA must have been in active state. If not, it's a client bug */
4269 	if (!IBCM_ACCESS_HCA_OK(hcap))
4270 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");
4271 
4272 	ASSERT(statep->cm_handler != NULL);
4273 
4274 	/* Check Alternate port */
4275 	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
4276 	    &base_lid);
4277 	if (status != IBT_SUCCESS) {
4278 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4279 		    "ibt_get_port_state_byguid status %d ", status);
4280 		mutex_enter(&statep->state_mutex);
4281 		IBCM_REF_CNT_DECR(statep);
4282 		mutex_exit(&statep->state_mutex);
4283 		return (status);
4284 	}
4285 
4286 	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
4287 	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
4288 	    != IBT_SUCCESS)) {
4289 		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4290 		    "ibmf reg or callback setup failed during re-initialize");
4291 		mutex_enter(&statep->state_mutex);
4292 		IBCM_REF_CNT_DECR(statep);
4293 		mutex_exit(&statep->state_mutex);
4294 		return (status);
4295 	}
4296 
4297 	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;
4298 
4299 	alternate_slid = base_lid +
4300 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;
4301 
4302 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
4303 	    h2b16(alternate_slid));
4304 
4305 	ibcm_lapr_enter();	/* limit how many run simultaneously */
4306 
4307 	/* Allocate MAD for LAP */
4308 	if (statep->lapr_msg == NULL)
4309 		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
4310 		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
4311 			ibcm_lapr_exit();
4312 			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
4313 			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
4314 			mutex_enter(&statep->state_mutex);
4315 			IBCM_REF_CNT_DECR(statep);
4316 			mutex_exit(&statep->state_mutex);
4317 			return (status);
4318 		}
4319 
4320 	mutex_enter(&statep->state_mutex);
4321 
4322 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
4323 	    " %x", statep->state);
4324 
4325 	/* Check state */
4326 	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
4327 	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
4328 		IBCM_REF_CNT_DECR(statep);
4329 		mutex_exit(&statep->state_mutex);
4330 		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
4331 		ibcm_lapr_exit();
4332 		return (IBT_CHAN_STATE_INVALID);
4333 	} else {
4334 		/* Set to LAP Sent state */
4335 		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
4336 		statep->ap_done = B_FALSE;
4337 		statep->remaining_retry_cnt = statep->max_cm_retries;
4338 		statep->timer_stored_state = statep->state;
4339 		statep->timer_stored_ap_state = statep->ap_state;
4340 		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
4341 	}
4342 
4343 	mutex_exit(&statep->state_mutex);
4344 
4345 	/* No more failure returns below */
4346 
4347 	/* Allocate MAD for LAP */
4348 	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
4349 	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));
4350 
4351 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))
4352 
4353 	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);
4354 
4355 	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
4356 	lap_msgp->lap_alt_r_port_lid =
4357 	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);
4358 
4359 	/* Fill in remote port gid */
4360 	lap_msgp->lap_alt_r_port_gid.gid_prefix =
4361 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
4362 	lap_msgp->lap_alt_r_port_gid.gid_guid =
4363 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);
4364 
4365 	/* Fill in local port gid */
4366 	lap_msgp->lap_alt_l_port_gid.gid_prefix =
4367 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
4368 	lap_msgp->lap_alt_l_port_gid.gid_guid =
4369 	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);
4370 
4371 	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;
4372 
4373 	/* alternate_flow_label, and alternate srate, alternate traffic class */
4374 	lap_msgp->lap_alt_srate_plus =
4375 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
4376 	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
4377 	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
4378 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);
4379 
4380 	/* Alternate hop limit, service level */
4381 	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
4382 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 0xff;
4383 	lap_msgp->lap_alt_sl_plus =
4384 	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
4385 	    ((alt_grh == B_FALSE) ? 0x8 : 0);
4386 
4387 	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
4388 	    (2 * statep->rc_alt_pkt_lt) +
4389 	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;
4390 
4391 	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
4392 	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);
4393 
4394 	lap_msgp->lap_remote_qpn_eecn_plus =
4395 	    h2b32((statep->remote_qpn << 8) |
4396 	    ibt_usec2ib(ibcm_remote_response_time) << 3);
4397 
4398 	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
4399 	if ((len > 0) && priv_data) {
4400 		bcopy(priv_data, lap_msgp->lap_private_data, len);
4401 	}
4402 
4403 	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
4404 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))
4405 
4406 	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);
4407 
4408 	/* return_data is filled up in the state machine code */
4409 	statep->ap_return_data = ret_args;
4410 
4411 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))
4412 
4413 	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
4414 	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);
4415 
4416 	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
4417 	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
4418 	    0));
4419 	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
4420 	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);
4421 
4422 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))
4423 
4424 	/* Send LAP */
4425 	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
4426 	    statep);
4427 
4428 	mutex_enter(&statep->state_mutex);
4429 
4430 	if (mode == IBT_BLOCKING) {
4431 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");
4432 
4433 		/* wait for APR */
4434 		while (statep->ap_done != B_TRUE) {
4435 			cv_wait(&statep->block_client_cv,
4436 			    &statep->state_mutex);
4437 		}
4438 
4439 		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");
4440 
4441 		/*
4442 		 * In the case that ibt_set_alt_path fails,
4443 		 * change retval to IBT_CM_FAILURE
4444 		 */
4445 		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
4446 			status = IBT_CM_FAILURE;
4447 
4448 	}
4449 
4450 	/* decrement the ref-count before leaving here */
4451 	IBCM_REF_CNT_DECR(statep);
4452 
4453 	mutex_exit(&statep->state_mutex);
4454 
4455 	ibcm_lapr_exit();
4456 
4457 	/* If this message isn't seen then ibt_set_alt_path failed */
4458 	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");
4459 
4460 	return (status);
4461 }
4462 
4463 
4464 #ifdef DEBUG
4465 
4466 /*
4467  * ibcm_query_classport_info:
4468  *	Query classportinfo
4469  *
4470  * INPUTS:
4471  *	channel		- Channel that is associated with a statep
4472  *
4473  * RETURN VALUE: NONE
4474  * This function is currently used to generate a valid get method classport
4475  * info, and test CM functionality. There is no ibtl client interface to
4476  * generate a classportinfo. It is possible that CM may use classportinfo
4477  * from other nodes in the future, and most of the code below could be re-used.
4478  */
4479 void
4480 ibcm_query_classport_info(ibt_channel_hdl_t channel)
4481 {
4482 	ibcm_state_data_t	*statep;
4483 	ibmf_msg_t		*msgp;
4484 
4485 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p)", channel);
4486 
4487 	/* validate channel, first */
4488 	if (IBCM_INVALID_CHANNEL(channel)) {
4489 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4490 		    "invalid channel (%p)", channel);
4491 		return;
4492 	}
4493 
4494 	/* get the statep */
4495 	IBCM_GET_CHAN_PRIVATE(channel, statep);
4496 
4497 	/*
4498 	 * This can happen, if the statep is already gone by a DREQ from
4499 	 * the remote side
4500 	 */
4501 	if (statep == NULL) {
4502 		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
4503 		    "statep NULL");
4504 		return;
4505 	}
4506 
4507 	mutex_enter(&statep->state_mutex);
4508 	IBCM_RELEASE_CHAN_PRIVATE(channel);
4509 	IBCM_REF_CNT_INCR(statep);
4510 	mutex_exit(&statep->state_mutex);
4511 
4512 	/* Debug/test code, so don't care about return status */
4513 	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
4514 	    MAD_METHOD_GET);
4515 
4516 	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
4517 	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
4518 	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);
4519 
4520 	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);
4521 
4522 	IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info(%p) "
4523 	    "Get method MAD posted ", channel);
4524 
4525 	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);
4526 
4527 	mutex_enter(&statep->state_mutex);
4528 	IBCM_REF_CNT_DECR(statep);
4529 	mutex_exit(&statep->state_mutex);
4530 }
4531 
4532 static void
4533 ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
4534 {
4535 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
4536 	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
4537 	    cm_reply_addr->rcvd_addr.ia_remote_lid);
4538 
4539 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
4540 	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
4541 	    cm_reply_addr->rcvd_addr.ia_p_key,
4542 	    cm_reply_addr->rcvd_addr.ia_remote_qno,
4543 	    cm_reply_addr->rcvd_addr.ia_service_level);
4544 
4545 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
4546 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
4547 	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);
4548 
4549 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
4550 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
4551 	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);
4552 
4553 	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
4554 	    cm_reply_addr->grh_hdr.ig_flow_label,
4555 	    cm_reply_addr->grh_hdr.ig_tclass,
4556 	    cm_reply_addr->grh_hdr.ig_hop_limit);
4557 }
4558 
4559 #endif
4560 
4561 typedef struct ibcm_join_mcg_tqarg_s {
4562 	ib_gid_t		rgid;
4563 	ibt_mcg_attr_t		mcg_attr;
4564 	ibt_mcg_info_t		*mcg_infop;
4565 	ibt_mcg_handler_t	func;
4566 	void			*arg;
4567 } ibcm_join_mcg_tqarg_t;
4568 
4569 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
4570 
4571 /*
4572  * Function:
4573  *	ibt_join_mcg
4574  * Input:
4575  *	rgid		The request GID that defines the HCA port from which a
4576  *			contact to SA Access is performed to add the specified
4577  *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
4578  *			If mcg_attr->mc_pgid is null, then this (rgid) will be
4579  *			treated as endport GID that is to be added to the
4580  *			multicast group.
4581  *
4582  *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
4583  *			the attributes of the desired multicast group to be
4584  *			created or joined.
4585  *
4586  *	func		NULL or a pointer to a function to call when
4587  *			ibt_join_mcg() completes. If 'func' is not NULL then
4588  *			ibt_join_mcg() will return as soon as possible after
4589  *			initiating the multicast group join/create process.
4590  *			'func' is then called when the process completes.
4591  *
4592  *	arg		Argument to the 'func'.
4593  *
4594  * Output:
4595  *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
4596  *			by the caller, where the attributes of the created or
4597  *			joined multicast group are copied.
4598  * Returns:
4599  *	IBT_SUCCESS
4600  *	IBT_INVALID_PARAM
4601  *	IBT_MCG_RECORDS_NOT_FOUND
4602  *	IBT_INSUFF_RESOURCE
4603  * Description:
4604  *	Join a multicast group.  The first full member "join" causes the MCG
4605  *	to be created.
4606  */
4607 ibt_status_t
4608 ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
4609     ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
4610 {
4611 	ibcm_join_mcg_tqarg_t	*mcg_tq;
4612 	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);
4613 
4614 	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
4615 	    rgid.gid_guid, mcg_attr);
4616 
4617 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4618 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
4619 		return (IBT_INVALID_PARAM);
4620 	}
4621 
4622 	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
4623 	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
4624 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
4625 		return (IBT_INVALID_PARAM);
4626 	}
4627 
4628 	if (mcg_attr->mc_join_state == 0) {
4629 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
4630 		return (IBT_INVALID_PARAM);
4631 	}
4632 
4633 	if (mcg_info_p == NULL) {
4634 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
4635 		return (IBT_INVALID_PARAM);
4636 	}
4637 
4638 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))
4639 
4640 	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
4641 	if (mcg_tq == NULL) {
4642 		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
4643 		    "Unable to allocate memory for local usage.");
4644 		return (IBT_INSUFF_KERNEL_RESOURCE);
4645 	}
4646 
4647 	mcg_tq->rgid = rgid;
4648 	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
4649 	mcg_tq->mcg_infop = mcg_info_p;
4650 	mcg_tq->func = func;
4651 	mcg_tq->arg = arg;
4652 
4653 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))
4654 
4655 	if (func != NULL) {	/* Non-Blocking */
4656 		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
4657 		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
4658 		    mcg_tq, TQ_NOSLEEP) == 0) {
4659 			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
4660 			    "Dispatch the TaskQ");
4661 			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
4662 			return (IBT_INSUFF_KERNEL_RESOURCE);
4663 		} else
4664 			return (IBT_SUCCESS);
4665 	} else {		/* Blocking */
4666 		return (ibcm_process_join_mcg(mcg_tq));
4667 	}
4668 }
4669 
4670 static void
4671 ibcm_process_async_join_mcg(void *tq_arg)
4672 {
4673 	(void) ibcm_process_join_mcg(tq_arg);
4674 }
4675 
4676 static ibt_status_t
4677 ibcm_process_join_mcg(void *taskq_arg)
4678 {
4679 	sa_mcmember_record_t	mcg_req;
4680 	sa_mcmember_record_t	*mcg_resp;
4681 	ibmf_saa_access_args_t	access_args;
4682 	ibmf_saa_handle_t	saa_handle;
4683 	uint64_t		component_mask = 0;
4684 	ibt_status_t		retval;
4685 	ibtl_cm_hca_port_t	hca_port;
4686 	uint_t			num_records;
4687 	size_t			length;
4688 	ibcm_hca_info_t		*hcap;
4689 	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
4690 	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;
4691 
4692 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);
4693 
4694 	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
4695 	if (retval != IBT_SUCCESS) {
4696 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
4697 		    "port info from specified RGID: status = %d", retval);
4698 		goto ibcm_join_mcg_exit1;
4699 	}
4700 
4701 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4702 
4703 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
4704 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
4705 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4706 		    "Request GID is Port GID");
4707 		mcg_req.PortGID = mcg_arg->rgid;
4708 	} else {
4709 		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
4710 	}
4711 	component_mask |= SA_MC_COMPMASK_PORTGID;
4712 
4713 	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
4714 	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
4715 	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
4716 	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
4717 	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
4718 	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;
4719 
4720 	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
4721 	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
4722 	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;
4723 
4724 	/* If client has specified MGID, use it else SA will assign one. */
4725 	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4726 		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
4727 		component_mask |= SA_MC_COMPMASK_MGID;
4728 	}
4729 
4730 	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: PGID=%llX:%llX, "
4731 	    "MGID=%llX:%llX", mcg_req.PortGID.gid_prefix,
4732 	    mcg_req.PortGID.gid_guid, mcg_req.MGID.gid_prefix,
4733 	    mcg_req.MGID.gid_guid);
4734 
4735 	/* Is MTU specified. */
4736 	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
4737 		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
4738 		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;
4739 
4740 		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
4741 		    SA_MC_COMPMASK_MTU;
4742 	}
4743 
4744 	/* Is RATE specified. */
4745 	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
4746 		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
4747 		mcg_req.RateSelector =
4748 		    mcg_arg->mcg_attr.mc_rate_req.r_selector;
4749 
4750 		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
4751 		    SA_MC_COMPMASK_RATE;
4752 	}
4753 
4754 	/* Is Packet Life Time specified. */
4755 	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
4756 		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
4757 		mcg_req.RateSelector =
4758 		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;
4759 
4760 		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
4761 		    SA_MC_COMPMASK_PKTLT;
4762 	}
4763 
4764 	if (mcg_arg->mcg_attr.mc_hop) {
4765 		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
4766 		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
4767 	}
4768 
4769 	if (mcg_arg->mcg_attr.mc_scope) {
4770 		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
4771 		component_mask |= SA_MC_COMPMASK_SCOPE;
4772 	}
4773 
4774 	if (mcg_arg->mcg_attr.mc_mlid) {
4775 		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
4776 		component_mask |= SA_MC_COMPMASK_MLID;
4777 	}
4778 
4779 	/* Get SA Access Handle. */
4780 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
4781 	if (hcap == NULL) {
4782 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");
4783 
4784 		retval = IBT_HCA_BUSY_DETACHING;
4785 		goto ibcm_join_mcg_exit1;
4786 	}
4787 
4788 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
4789 	if (saa_handle == NULL) {
4790 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");
4791 
4792 		retval = IBT_HCA_PORT_NOT_ACTIVE;
4793 		goto ibcm_join_mcg_exit;
4794 	}
4795 
4796 	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
4797 	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
4798 		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
4799 		    &hca_port);
4800 		if (retval != IBT_SUCCESS) {
4801 			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
4802 			    "to get PortInfo of specified PGID: status = %d",
4803 			    retval);
4804 			goto ibcm_join_mcg_exit1;
4805 		}
4806 	}
4807 
4808 	/* Contact SA Access */
4809 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
4810 	access_args.sq_access_type = IBMF_SAA_UPDATE;
4811 	access_args.sq_component_mask = component_mask;
4812 	access_args.sq_template = &mcg_req;
4813 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
4814 	access_args.sq_callback = NULL;
4815 	access_args.sq_callback_arg = NULL;
4816 
4817 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
4818 	    (void **)&mcg_resp);
4819 	if (retval != IBT_SUCCESS) {
4820 		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
4821 		    "SA Access Failed");
4822 		goto ibcm_join_mcg_exit;
4823 	}
4824 
4825 	num_records = length/sizeof (sa_mcmember_record_t);
4826 
4827 	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
4828 	    "Found %d MCMember Records", num_records);
4829 
4830 	/* Validate the returned number of records. */
4831 	if ((mcg_resp != NULL) && (num_records > 0)) {
4832 		/* Update the return values. */
4833 		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
4834 		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
4835 		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
4836 		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
4837 		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
4838 		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
4839 		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
4840 		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
4841 		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
4842 		mcg_info_p->mc_mtu = mcg_resp->MTU;
4843 		mcg_info_p->mc_qkey = mcg_resp->Q_Key;
4844 
4845 		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
4846 		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
4847 		if (retval != IBT_SUCCESS) {
4848 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4849 			    "Pkey2Index Conversion failed<%d>", retval);
4850 			mcg_info_p->mc_pkey_ix = 0;
4851 		}
4852 
4853 		mcg_info_p->mc_scope = mcg_resp->Scope;
4854 		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;
4855 
4856 		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
4857 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
4858 		mcg_info_p->mc_adds_vect.av_src_path = 0;
4859 
4860 		/* Deallocate the memory allocated by SA for mcg_resp. */
4861 		kmem_free(mcg_resp, length);
4862 		retval = IBT_SUCCESS;
4863 	} else {
4864 		retval = IBT_MCG_RECORDS_NOT_FOUND;
4865 		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
4866 		    "MCG RECORDS NOT FOUND");
4867 	}
4868 
4869 ibcm_join_mcg_exit:
4870 	ibcm_dec_hca_acc_cnt(hcap);
4871 
4872 ibcm_join_mcg_exit1:
4873 	if (mcg_arg->func)
4874 		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);
4875 
4876 	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));
4877 
4878 	return (retval);
4879 }
4880 
4881 
4882 /*
4883  * Function:
4884  *	ibt_leave_mcg
4885  * Input:
4886  *	rgid		The request GID that defines the HCA port upon which
4887  *			to send the request to the Subnet Administrator, to
4888  *			remove the specified port (port_gid) from the multicast
4889  *			group.  If 'port_gid' is the Reserved GID (i.e.
4890  *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
4891  *			then the end-port associated with 'rgid' is removed
4892  *			from the multicast group.
4893  *
4894  *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
4895  *			call.  This is optional, if not specified (i.e.
4896  *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
4897  *			identify this as being a multicast GID), then the
4898  *			port is removed from all the multicast groups of
4899  *			which it is a member.
4900  *
4901  *	port_gid	This is optional, if not the Reserved GID (gid_prefix
4902  *			and gid_guid not equal to 0), then this specifies the
4903  *			endport GID of the multicast group member being deleted
4904  *			from the group. If it is the Reserved GID (gid_prefix
4905  *			and gid_guid equal to 0) then the member endport GID is
4906  *			determined from 'rgid'.
4907  *
4908  *	mc_join_state	The Join State attribute used when the group was joined
4909  *			using ibt_join_mcg(). This Join State component must
4910  *			contains at least one bit set to 1 in the same position
4911  *			as that used during ibt_join_mcg(). i.e. the logical
4912  *			AND of the two JoinState components is not all zeros.
4913  *			This Join State component must not have some bits set
4914  *			which are not set using ibt_join_mcg().
4915  * Output:
4916  *	None.
4917  * Returns:
4918  *	IBT_SUCCESS
4919  *	IBT_INVALID_PARAM
4920  *	IBT_MC_GROUP_INVALID
4921  *	IBT_INSUFF_RESOURCE
4922  * Description:
4923  *	The port associated with the port GID shall be removed from the
4924  *	multicast group specified by MGID (mc_gid) or from all the multicast
4925  *	groups of which it is a member if the MGID (mc_gid) is not specified.
4926  *
4927  *	The last full member to leave causes the destruction of the Multicast
4928  *	Group.
4929  */
4930 ibt_status_t
4931 ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
4932     uint8_t mc_join_state)
4933 {
4934 	sa_mcmember_record_t	mcg_req;
4935 	ibmf_saa_access_args_t	access_args;
4936 	ibmf_saa_handle_t	saa_handle;
4937 	uint64_t		component_mask = 0;
4938 	int			sa_retval;
4939 	ibt_status_t		retval;
4940 	ibtl_cm_hca_port_t	hca_port;
4941 	size_t			length;
4942 	void			*results_p;
4943 	ibcm_hca_info_t		*hcap;
4944 
4945 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
4946 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
4947 
4948 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
4949 	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);
4950 
4951 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
4952 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
4953 		return (IBT_INVALID_PARAM);
4954 	}
4955 
4956 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
4957 
4958 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
4959 	    mc_gid.gid_prefix, mc_gid.gid_guid);
4960 
4961 	/* Validate MGID */
4962 	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
4963 		mcg_req.MGID = mc_gid;
4964 		component_mask |= SA_MC_COMPMASK_MGID;
4965 	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
4966 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
4967 		return (IBT_MC_MGID_INVALID);
4968 	}
4969 
4970 	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
4971 		mcg_req.PortGID = rgid;
4972 	} else {
4973 		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
4974 		mcg_req.PortGID = port_gid;
4975 	}
4976 	component_mask |= SA_MC_COMPMASK_PORTGID;
4977 
4978 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
4979 	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
4980 
4981 	/* Join State */
4982 	mcg_req.JoinState = mc_join_state;
4983 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
4984 
4985 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
4986 	if (retval != IBT_SUCCESS) {
4987 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
4988 		    "from specified RGID : status = %d", retval);
4989 		return (retval);
4990 	}
4991 
4992 	/* Get SA Access Handle. */
4993 	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
4994 	if (hcap == NULL) {
4995 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
4996 		    "NO HCA found");
4997 		return (IBT_HCA_BUSY_DETACHING);
4998 	}
4999 
5000 	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
5001 	if (saa_handle == NULL) {
5002 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
5003 		ibcm_dec_hca_acc_cnt(hcap);
5004 		return (IBT_HCA_PORT_NOT_ACTIVE);
5005 	}
5006 
5007 	/* Contact SA Access */
5008 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5009 	access_args.sq_access_type = IBMF_SAA_DELETE;
5010 	access_args.sq_component_mask = component_mask;
5011 	access_args.sq_template = &mcg_req;
5012 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5013 	access_args.sq_callback = NULL;
5014 	access_args.sq_callback_arg = NULL;
5015 
5016 	ibcm_sa_access_enter();
5017 
5018 	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
5019 	    &results_p);
5020 	if (sa_retval != IBMF_SUCCESS) {
5021 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
5022 		    sa_retval);
5023 		(void) ibcm_ibmf_analyze_error(sa_retval);
5024 		retval = IBT_MC_GROUP_INVALID;
5025 	}
5026 
5027 	ibcm_sa_access_exit();
5028 
5029 	ibcm_dec_hca_acc_cnt(hcap);
5030 
5031 	return (retval);
5032 }
5033 
5034 
5035 /*
5036  * Function:
5037  *	ibt_query_mcg
5038  * Input:
5039  *	rgid		The request GID that defines the HCA port upon which
5040  *			to send the request to the Subnet Administrator, to
5041  *			retrieve Multicast Records matching attributes as
5042  *			specified through 'mcg_attr' argument.
5043  *
5044  *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
5045  *			specifies MCG attributes that are to be matched.
5046  *			Attributes that are not required can be wild carded
5047  *			by specifying as '0'.
5048  *
5049  *	mcgs_max_num	The maximum number of matching multicast groups to
5050  *			return.  If zero, then all available matching multicast
5051  *			groups are returned.
5052  * Output:
5053  *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
5054  *			multicast group information is returned. The actual
5055  *			number of entries filled in the array is returned in
5056  *			entries_p.
5057  *
5058  *	entries_p	The number of ibt_mcg_attr_t entries returned.
5059  * Returns:
5060  *	IBT_SUCCESS
5061  *	IBT_INVALID_PARAM
5062  *	IBT_MCG_RECORDS_NOT_FOUND
5063  * Description:
5064  *	Request information on multicast groups that match the parameters
5065  *	specified in mcg_attr. Information on each multicast group is returned
5066  *	to the caller in the form of an array of ibt_mcg_info_t.
5067  *	ibt_query_mcg() allocates the memory for this array and returns a
5068  *	pointer to the array (mcgs_p) and the number of entries in the array
5069  *	(entries_p). This memory should be freed by the client using
5070  *	ibt_free_mcg_info().
5071  */
5072 ibt_status_t
5073 ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
5074     ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
5075 {
5076 	sa_mcmember_record_t	mcg_req;
5077 	sa_mcmember_record_t	*mcg_resp;
5078 	ibt_mcg_info_t		*mcg_infop;
5079 	ibmf_saa_access_args_t	access_args;
5080 	ibmf_saa_handle_t	saa_handle;
5081 	uint64_t		component_mask = 0;
5082 	ibt_status_t		retval;
5083 	ibtl_cm_hca_port_t	hport;
5084 	uint_t			num_records;
5085 	size_t			length;
5086 	void			*results_p;
5087 	ib_gid_t		port_gid;
5088 	ibcm_hca_info_t		*hcap;
5089 
5090 	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);
5091 
5092 	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
5093 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5094 		    "entries_p or mcgs_info_p is NULL");
5095 		return (IBT_INVALID_PARAM);
5096 	}
5097 
5098 	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
5099 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
5100 		return (IBT_INVALID_PARAM);
5101 	}
5102 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
5103 	    rgid.gid_prefix, rgid.gid_guid);
5104 
5105 	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
5106 	port_gid.gid_prefix = port_gid.gid_guid = 0;
5107 
5108 	if (mcg_attr != NULL) {
5109 		port_gid = mcg_attr->mc_pgid;
5110 
5111 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5112 			mcg_req.PortGID = mcg_attr->mc_pgid;
5113 			component_mask |= SA_MC_COMPMASK_PORTGID;
5114 
5115 			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
5116 			    port_gid.gid_prefix, port_gid.gid_guid);
5117 		}
5118 
5119 		/* Is Q_Key specified. */
5120 		if (mcg_attr->mc_qkey != 0) {
5121 			mcg_req.Q_Key = mcg_attr->mc_qkey;
5122 			component_mask |= SA_MC_COMPMASK_QKEY;
5123 		}
5124 
5125 		/* Is P_Key specified. */
5126 		if (mcg_attr->mc_pkey != 0) {
5127 			mcg_req.P_Key = mcg_attr->mc_pkey;
5128 			component_mask |= SA_MC_COMPMASK_PKEY;
5129 		}
5130 
5131 		/* Is MGID specified. */
5132 		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
5133 			mcg_req.MGID = mcg_attr->mc_mgid;
5134 			component_mask |= SA_MC_COMPMASK_MGID;
5135 		}
5136 
5137 		/* Is MTU specified. */
5138 		if (mcg_attr->mc_mtu_req.r_mtu) {
5139 			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
5140 			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;
5141 
5142 			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
5143 			    SA_MC_COMPMASK_MTU;
5144 		}
5145 
5146 		if (mcg_attr->mc_tclass) {
5147 			mcg_req.TClass = mcg_attr->mc_tclass;
5148 			component_mask |= SA_MC_COMPMASK_TCLASS;
5149 		}
5150 
5151 		/* Is RATE specified. */
5152 		if (mcg_attr->mc_rate_req.r_srate) {
5153 			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
5154 			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;
5155 
5156 			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
5157 			    SA_MC_COMPMASK_RATE;
5158 		}
5159 
5160 		/* Is Packet Life Time specified. */
5161 		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
5162 			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
5163 			mcg_req.RateSelector =
5164 			    mcg_attr->mc_pkt_lt_req.p_selector;
5165 
5166 			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
5167 			    SA_MC_COMPMASK_PKTLT;
5168 		}
5169 
5170 		if (mcg_attr->mc_hop) {
5171 			mcg_req.HopLimit = mcg_attr->mc_hop;
5172 			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
5173 		}
5174 
5175 		if (mcg_attr->mc_flow) {
5176 			mcg_req.FlowLabel = mcg_attr->mc_flow;
5177 			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
5178 		}
5179 
5180 		if (mcg_attr->mc_sl) {
5181 			mcg_req.SL = mcg_attr->mc_sl;
5182 			component_mask |= SA_MC_COMPMASK_SL;
5183 		}
5184 
5185 		if (mcg_attr->mc_scope) {
5186 			mcg_req.Scope = mcg_attr->mc_scope;
5187 			component_mask |= SA_MC_COMPMASK_SCOPE;
5188 		}
5189 
5190 		if (mcg_attr->mc_join_state) {
5191 			mcg_req.JoinState = mcg_attr->mc_join_state;
5192 			component_mask |= SA_MC_COMPMASK_JOINSTATE;
5193 		}
5194 
5195 		if (mcg_attr->mc_mlid) {
5196 			mcg_req.MLID = mcg_attr->mc_mlid;
5197 			component_mask |= SA_MC_COMPMASK_MLID;
5198 		}
5199 	}
5200 
5201 	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
5202 	if (retval != IBT_SUCCESS) {
5203 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
5204 		    "from specified RGID : status = %d", retval);
5205 		return (retval);
5206 	}
5207 
5208 	/* Get SA Access Handle. */
5209 	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5210 	if (hcap == NULL) {
5211 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
5212 		return (IBT_HCA_BUSY_DETACHING);
5213 	}
5214 
5215 	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
5216 	if (saa_handle == NULL) {
5217 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
5218 		ibcm_dec_hca_acc_cnt(hcap);
5219 		return (IBT_HCA_PORT_NOT_ACTIVE);
5220 	}
5221 
5222 	/* Contact SA Access */
5223 	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
5224 	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
5225 	access_args.sq_component_mask = component_mask;
5226 	access_args.sq_template = &mcg_req;
5227 	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
5228 	access_args.sq_callback = NULL;
5229 	access_args.sq_callback_arg = NULL;
5230 
5231 	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
5232 	    &results_p);
5233 	if (retval != IBT_SUCCESS) {
5234 		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
5235 		ibcm_dec_hca_acc_cnt(hcap);
5236 		return (retval);
5237 	}
5238 
5239 	num_records = length/sizeof (sa_mcmember_record_t);
5240 
5241 	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
5242 	    num_records);
5243 
5244 	/* Validate the returned number of records. */
5245 	if ((results_p != NULL) && (num_records > 0)) {
5246 		uint_t	i;
5247 
5248 		/*
5249 		 * If mcgs_max_num is zero, then return all records else
5250 		 * return only requested number of records
5251 		 */
5252 		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
5253 			/* we are interested in only mcgs_max_num records */
5254 			num_records = mcgs_max_num;
5255 		}
5256 
5257 		/*
5258 		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
5259 		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
5260 		 * else RequestGID (rgid) should be returned.
5261 		 */
5262 		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
5263 
5264 			/* Get sgid_ix and port number of 'port_gid' */
5265 			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
5266 			if (retval != IBT_SUCCESS) {
5267 				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
5268 				    "Failed to Get Portinfo for PortGID :"
5269 				    "status = %d", retval);
5270 				return (retval);
5271 			}
5272 		} else {
5273 			/*
5274 			 * The sgid_ix and port number related to RequestGID
5275 			 * are already obtained at the beginning.
5276 			 */
5277 			port_gid = rgid;
5278 		}
5279 
5280 		/*
5281 		 * Allocate memory for return buffer, to be freed in
5282 		 * ibt_free_mcg_info().
5283 		 */
5284 		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
5285 		    KM_SLEEP);
5286 
5287 		*mcgs_info_p = mcg_infop;
5288 		*entries_p = num_records;
5289 
5290 		/* Update the return values. */
5291 		for (i = 0; i < num_records; i++) {
5292 
5293 			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
5294 			    results_p + i * sizeof (sa_mcmember_record_t));
5295 
5296 			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
5297 			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
5298 			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
5299 			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
5300 			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
5301 			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
5302 			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
5303 			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
5304 			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
5305 			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
5306 			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
5307 			mcg_infop[i].mc_adds_vect.av_src_path = 0;
5308 			mcg_infop[i].mc_mtu = mcg_resp->MTU;
5309 			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
5310 			mcg_infop[i].mc_scope = mcg_resp->Scope;
5311 			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;
5312 
5313 			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
5314 			    hport.hp_port, mcg_resp->P_Key,
5315 			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
5316 				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
5317 				    "Pkey2Index Conversion failed");
5318 				mcg_infop[i].mc_pkey_ix = 0;
5319 			}
5320 		}
5321 
5322 		/*
5323 		 * Deallocate the memory allocated by SA for results_p.
5324 		 */
5325 		kmem_free(results_p, length);
5326 		retval = IBT_SUCCESS;
5327 
5328 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
5329 		    num_records);
5330 
5331 	} else {
5332 		retval = IBT_MCG_RECORDS_NOT_FOUND;
5333 		*entries_p = 0;
5334 
5335 		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
5336 	}
5337 
5338 	ibcm_dec_hca_acc_cnt(hcap);
5339 
5340 	return (retval);
5341 }
5342 
5343 
5344 /*
5345  * ibt_free_mcg_info()
5346  *	Free the memory allocated by successful ibt_query_mcg()
5347  *
5348  *	mcgs_info	Pointer returned by ibt_query_mcg().
5349  *
5350  *	entries		The number of ibt_mcg_info_t entries to free.
5351  */
5352 void
5353 ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
5354 {
5355 	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
5356 	    "Free <%d> entries from 0x%p", entries, mcgs_info);
5357 
5358 	if ((mcgs_info != NULL) && (entries > 0))
5359 		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
5360 	else
5361 		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
5362 		    "ERROR: NULL buf pointer or length specified.");
5363 }
5364 
5365 
5366 /*
5367  * Function:
5368  *	ibt_gid_to_node_info()
5369  * Input:
5370  *	gid		Identifies the IB Node and port for which to obtain
5371  *			Node information.
5372  * Output:
5373  *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
5374  *			by the caller) in which to return the node information.
5375  * Returns:
5376  *	IBT_SUCCESS
5377  *	IBT_INVALID_PARAM
5378  *	IBT_NODE_RECORDS_NOT_FOUND
5379  *	IBT_NO_HCAS_AVAILABLE
5380  * Description:
5381  *	Retrieve Node Information for the specified GID.
5382  */
5383 ibt_status_t
5384 ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
5385 {
5386 	sa_node_record_t	nr_req, *nr_resp;
5387 	ibmf_saa_handle_t	saa_handle;
5388 	ibt_status_t		retval;
5389 	ibcm_hca_info_t		*hcap;
5390 	ibtl_cm_hca_port_t	hport;
5391 	int			i, j;
5392 	uint_t			num_rec;
5393 	ib_guid_t		*guid_array = NULL;
5394 	sa_path_record_t	*path;
5395 	size_t			len;
5396 	uint8_t			npaths;
5397 	uint32_t		num_hcas = 0;
5398 	ib_lid_t		node_lid;
5399 	boolean_t		local_node = B_FALSE;
5400 	void			*res_p;
5401 	uint8_t			num_ports = 0;
5402 
5403 
5404 	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
5405 	    gid.gid_prefix, gid.gid_guid, node_info_p);
5406 
5407 	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
5408 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
5409 		return (IBT_INVALID_PARAM);
5410 	}
5411 
5412 	if (node_info_p == NULL) {
5413 		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5414 		    "Return Buf (node_info_p) is NULL.");
5415 		return (IBT_INVALID_PARAM);
5416 	}
5417 
5418 	/*
5419 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5420 	 * associated port) info via ibtl_cm_get_hca_port() call.
5421 	 */
5422 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5423 	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {
5424 
5425 		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
5426 		if (hcap == NULL) {
5427 			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5428 			    "HCA(%llX) info not found", hport.hp_hca_guid);
5429 			return (IBT_NO_HCAS_AVAILABLE);
5430 		}
5431 		num_ports = 1;
5432 		num_hcas = 1;
5433 		node_lid = hport.hp_base_lid;
5434 		local_node = B_TRUE;
5435 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
5436 		    "LID = 0x%X", node_lid);
5437 	} else {
5438 		/* Get the number of HCAs and their GUIDs */
5439 		num_hcas = ibt_get_hca_list(&guid_array);
5440 		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
5441 		    "returned %d hcas", num_hcas);
5442 
5443 		if (num_hcas == 0) {
5444 			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5445 			    "NO HCA's Found on this system");
5446 			return (IBT_NO_HCAS_AVAILABLE);
5447 		}
5448 	}
5449 
5450 	for (i = 0; i < num_hcas; i++) {
5451 		if (local_node == B_FALSE) {
5452 			hcap = ibcm_find_hca_entry(guid_array[i]);
5453 			if (hcap == NULL) {
5454 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5455 				    "HCA(%llX) info not found", guid_array[i]);
5456 				retval = IBT_NO_HCAS_AVAILABLE;
5457 				continue;
5458 			}
5459 			num_ports = hcap->hca_num_ports;
5460 		}
5461 
5462 		for (j = 0; j < num_ports; j++) {
5463 			uint8_t		port = 0;
5464 
5465 			if (local_node == B_TRUE)
5466 				port = hport.hp_port;
5467 			else
5468 				port = j + 1;
5469 
5470 			/* Get SA Access Handle. */
5471 			saa_handle = ibcm_get_saa_handle(hcap, port);
5472 			if (saa_handle == NULL) {
5473 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5474 				    "Port %d of HCA (%llX) is NOT ACTIVE",
5475 				    port, hport.hp_hca_guid);
5476 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5477 				continue;
5478 			}
5479 
5480 			if (local_node == B_FALSE) {
5481 				ib_gid_t	sgid;
5482 				int		sa_ret;
5483 
5484 				/*
5485 				 * Check whether 'gid' and this port has same
5486 				 * subnet prefix. If not, then there is no use
5487 				 * in searching from this port.
5488 				 */
5489 				sgid = hcap->hca_port_info[j].port_sgid0;
5490 				if (gid.gid_prefix != sgid.gid_prefix) {
5491 					IBTF_DPRINTF_L3(cmlog,
5492 					    "ibt_gid_to_node_info:Sn_Prefix of "
5493 					    "GID(%llX) and Port's(%llX) differ",
5494 					    gid.gid_prefix, sgid.gid_prefix);
5495 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5496 					continue;
5497 				}
5498 
5499 				/*
5500 				 * First Get Path Records for the specified DGID
5501 				 * from this port (SGID). From Path Records,
5502 				 * note down DLID, then use this DLID as Input
5503 				 * attribute to get NodeRecords from SA Access.
5504 				 */
5505 				npaths = 1;
5506 				path = NULL;
5507 
5508 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5509 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5510 				    &path);
5511 				if (sa_ret != IBMF_SUCCESS) {
5512 					IBTF_DPRINTF_L2(cmlog,
5513 					    "ibt_gid_to_node_info: "
5514 					    "ibmf_saa_gid_to_pathrecords() "
5515 					    "returned error: %d ", sa_ret);
5516 					retval =
5517 					    ibcm_ibmf_analyze_error(sa_ret);
5518 					continue;
5519 				} else if ((npaths == 0) || (path == NULL)) {
5520 					IBTF_DPRINTF_L3(cmlog,
5521 					    "ibt_gid_to_node_info: failed (%d) "
5522 					    "to get path records for the DGID "
5523 					    "0x%llX from SGID 0x%llX", sa_ret,
5524 					    gid.gid_guid, sgid.gid_guid);
5525 					retval = IBT_NODE_RECORDS_NOT_FOUND;
5526 					continue;
5527 				}
5528 				node_lid = path->DLID;	/* LID */
5529 
5530 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5531 				    "Remote Node: LID = 0x%X", node_lid);
5532 
5533 				/* Free SA_Access memory for path record. */
5534 				kmem_free(path, len);
5535 			}
5536 
5537 			/* Retrieve Node Records from SA Access. */
5538 			bzero(&nr_req, sizeof (sa_node_record_t));
5539 
5540 			nr_req.LID = node_lid;	/* LID */
5541 
5542 			retval = ibcm_get_node_rec(saa_handle, &nr_req,
5543 			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5544 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5545 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5546 				    "failed (%d) to get Node records", retval);
5547 				continue;
5548 			} else if (retval != IBT_SUCCESS) {
5549 				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
5550 				    "failed (%d) to get Node records", retval);
5551 				ibcm_dec_hca_acc_cnt(hcap);
5552 				goto gid_to_ni_exit;
5553 			}
5554 
5555 			num_rec = len/sizeof (sa_node_record_t);
5556 			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5557 
5558 			/* Validate the returned number of records. */
5559 			if ((nr_resp != NULL) && (num_rec > 0)) {
5560 
5561 				IBCM_DUMP_NODE_REC(nr_resp);
5562 
5563 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
5564 				    *node_info_p))
5565 
5566 				node_info_p->n_sys_img_guid =
5567 				    nr_resp->NodeInfo.SystemImageGUID;
5568 				node_info_p->n_node_guid =
5569 				    nr_resp->NodeInfo.NodeGUID;
5570 				node_info_p->n_port_guid =
5571 				    nr_resp->NodeInfo.PortGUID;
5572 				node_info_p->n_dev_id =
5573 				    nr_resp->NodeInfo.DeviceID;
5574 				node_info_p->n_revision =
5575 				    nr_resp->NodeInfo.Revision;
5576 				node_info_p->n_vendor_id =
5577 				    nr_resp->NodeInfo.VendorID;
5578 				node_info_p->n_num_ports =
5579 				    nr_resp->NodeInfo.NumPorts;
5580 				node_info_p->n_port_num =
5581 				    nr_resp->NodeInfo.LocalPortNum;
5582 				node_info_p->n_node_type =
5583 				    nr_resp->NodeInfo.NodeType;
5584 				(void) strncpy(node_info_p->n_description,
5585 				    (char *)&nr_resp->NodeDescription, 64);
5586 
5587 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
5588 				    *node_info_p))
5589 
5590 				/*
5591 				 * Deallocate the memory allocated by SA for
5592 				 * 'nr_resp'.
5593 				 */
5594 				ibcm_dec_hca_acc_cnt(hcap);
5595 				kmem_free(nr_resp, len);
5596 				retval = IBT_SUCCESS;
5597 
5598 				goto gid_to_ni_exit;
5599 			} else {
5600 				retval = IBT_NODE_RECORDS_NOT_FOUND;
5601 				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
5602 				    "Node Records NOT found - PortGUID %016llX",
5603 				    gid.gid_guid);
5604 			}
5605 		}
5606 		ibcm_dec_hca_acc_cnt(hcap);
5607 
5608 		if (local_node == B_TRUE)
5609 			break;
5610 	}
5611 
5612 gid_to_ni_exit:
5613 	if (guid_array)
5614 		ibt_free_hca_list(guid_array, num_hcas);
5615 
5616 	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);
5617 
5618 	return (retval);
5619 }
5620 
5621 
5622 static ibt_status_t
5623 ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
5624     uint64_t component_mask, void *result_p, size_t *len)
5625 {
5626 	ibmf_saa_access_args_t  args;
5627 	size_t			length;
5628 	ibt_status_t		retval;
5629 
5630 	args.sq_attr_id = SA_NODERECORD_ATTRID;
5631 	args.sq_template = nr_req;
5632 	args.sq_access_type = IBMF_SAA_RETRIEVE;
5633 	args.sq_template_length = sizeof (sa_node_record_t);
5634 	args.sq_component_mask = component_mask;
5635 	args.sq_callback = NULL;
5636 	args.sq_callback_arg = NULL;
5637 
5638 	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
5639 	if (retval != IBT_SUCCESS) {
5640 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
5641 		return (retval);
5642 	}
5643 
5644 	*len = length;
5645 
5646 	/* Validate the returned number of records. */
5647 	if ((result_p != NULL) && (length > 0)) {
5648 		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");
5649 
5650 		/* Got it, done!. */
5651 		return (IBT_SUCCESS);
5652 	} else {
5653 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
5654 		return (IBT_NODE_RECORDS_NOT_FOUND);
5655 	}
5656 }
5657 
5658 
5659 /*
5660  * Function:
5661  *	ibt_get_companion_port_gids()
5662  * Description:
5663  *	Get list of GID's available on a companion port(s) of the specified
5664  *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
5665  */
5666 ibt_status_t
5667 ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
5668     ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
5669 {
5670 	sa_node_record_t	nr_req, *nr_resp;
5671 	void			*res_p;
5672 	ibmf_saa_handle_t	saa_handle;
5673 	int			sa_ret;
5674 	ibt_status_t		retval = IBT_SUCCESS;
5675 	ibcm_hca_info_t		*hcap;
5676 	ibtl_cm_hca_port_t	hport;
5677 	int			i, j;
5678 	uint_t			num_rec;
5679 	ib_guid_t		*guid_array = NULL;
5680 	sa_path_record_t	*path;
5681 	size_t			len;
5682 	uint8_t			npaths;
5683 	uint32_t		num_hcas = 0;
5684 	boolean_t		local_node = B_FALSE;
5685 	boolean_t		local_hca = B_FALSE;
5686 	ib_guid_t		h_guid = hca_guid;
5687 	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
5688 	int			multi_hca_loop = 0;
5689 
5690 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
5691 	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);
5692 
5693 	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
5694 	    (sysimg_guid == 0)) {
5695 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5696 		    "Null Input attribute specified.");
5697 		return (IBT_INVALID_PARAM);
5698 	}
5699 
5700 	if ((num_gids_p == NULL) || (gids_p == NULL)) {
5701 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5702 		    "num_gids_p or gids_p is NULL");
5703 		return (IBT_INVALID_PARAM);
5704 	}
5705 
5706 	*num_gids_p = 0;
5707 
5708 	/* Get the number of HCAs and their GUIDs */
5709 	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
5710 		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5711 		    "NO HCA's Found on this system");
5712 		return (IBT_NO_HCAS_AVAILABLE);
5713 	}
5714 
5715 	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5716 	    "ibt_get_hca_list() returned %d hcas", num_hcas);
5717 
5718 	/*
5719 	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
5720 	 * associated port) info via ibtl_cm_get_hca_port() call.
5721 	 */
5722 	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
5723 	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
5724 	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {
5725 
5726 		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
5727 			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
5728 			    "Invalid GID<->HCAGUID combination specified.");
5729 			retval = IBT_INVALID_PARAM;
5730 			goto get_comp_pgid_exit;
5731 		}
5732 		h_guid = hport.hp_hca_guid;
5733 		local_node = B_TRUE;
5734 
5735 		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
5736 		    "Local Node: HCA (0x%llX)", h_guid);
5737 	} else if (h_guid) {	/* Is specified HCA GUID - local? */
5738 		for (i = 0; i < num_hcas; i++) {
5739 			if (h_guid == guid_array[i]) {
5740 				local_hca = B_TRUE;
5741 				break;
5742 			}
5743 		}
5744 	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
5745 		for (i = 0; i < num_hcas; i++) {
5746 			ibt_status_t	ret;
5747 			ibt_hca_attr_t	hca_attr;
5748 
5749 			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
5750 			if (ret != IBT_SUCCESS) {
5751 				IBTF_DPRINTF_L2(cmlog,
5752 				    "ibt_get_companion_port_gids: HCA(%llX) "
5753 				    "info not found", guid_array[i]);
5754 				retval = IBT_NO_HCAS_AVAILABLE;
5755 				continue;
5756 			}
5757 			if (hca_attr.hca_si_guid == sysimg_guid) {
5758 				if ((hca_guid != 0) &&
5759 				    (hca_guid != hca_attr.hca_node_guid)) {
5760 					IBTF_DPRINTF_L2(cmlog,
5761 					    "ibt_get_companion_port_gids: "
5762 					    "Invalid SysImg<->HCA GUID "
5763 					    "combination specified.");
5764 					retval = IBT_INVALID_PARAM;
5765 					goto get_comp_pgid_exit;
5766 				}
5767 				local_hca = B_TRUE;
5768 				h_guid = hca_attr.hca_node_guid;
5769 				break;
5770 			}
5771 		}
5772 	}
5773 
5774 	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
5775 		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
5776 		    num_gids_p);
5777 		goto get_comp_pgid_exit;
5778 	}
5779 
5780 get_comp_for_multihca:
5781 	/* We will be here, if request is for remote node */
5782 	for (i = 0; i < num_hcas; i++) {
5783 		int		multism;
5784 		uint8_t		count = 0;
5785 		int		multi_sm_loop = 0;
5786 		uint_t		k = 0, l;
5787 
5788 		hcap = ibcm_find_hca_entry(guid_array[i]);
5789 		if (hcap == NULL) {
5790 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5791 			    "HCA(%llX) info not found", guid_array[i]);
5792 			retval = IBT_NO_HCAS_AVAILABLE;
5793 			continue;
5794 		}
5795 
5796 		/* 1 - MultiSM, 0 - Single SM */
5797 		multism = ibtl_cm_is_multi_sm(guid_array[i]);
5798 
5799 		for (j = 0; j < hcap->hca_num_ports; j++) {
5800 			ib_gid_t	sgid;
5801 			uint64_t	c_mask = 0;
5802 			ib_guid_t	pg;
5803 			uint_t		port = j;
5804 
5805 get_comp_for_multism:
5806 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5807 			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
5808 			    port + 1, h_guid, multism, multi_sm_loop);
5809 
5810 			/* Get SA Access Handle. */
5811 			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
5812 			if (saa_handle == NULL) {
5813 				IBTF_DPRINTF_L2(cmlog,
5814 				    "ibt_get_companion_port_gids: "
5815 				    "Port (%d)  - NOT ACTIVE", port + 1);
5816 				retval = IBT_GIDS_NOT_FOUND;
5817 				continue;
5818 			}
5819 
5820 			/*
5821 			 * Check whether 'gid' and this port has same subnet
5822 			 * prefix. If not, then there is no use in searching
5823 			 * from this port.
5824 			 */
5825 			sgid = hcap->hca_port_info[port].port_sgid0;
5826 			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
5827 			    (multi_sm_loop == 0) &&
5828 			    (gid.gid_prefix != sgid.gid_prefix)) {
5829 				IBTF_DPRINTF_L2(cmlog,
5830 				    "ibt_get_companion_port_gids: SnPrefix of "
5831 				    "GID(%llX) and Port SN_Pfx(%llX) differ",
5832 				    gid.gid_prefix, sgid.gid_prefix);
5833 				retval = IBT_GIDS_NOT_FOUND;
5834 				continue;
5835 			}
5836 
5837 			/*
5838 			 * If HCA GUID or System Image GUID is specified, then
5839 			 * we can achieve our goal sooner!.
5840 			 */
5841 			if ((h_guid == 0) && (sysimg_guid == 0)) {
5842 				/* So only GID info is provided. */
5843 
5844 				/*
5845 				 * First Get Path Records for the specified DGID
5846 				 * from this port (SGID). From Path Records,
5847 				 * note down DLID, then use this DLID as Input
5848 				 * attribute to get NodeRecords.
5849 				 */
5850 				npaths = 1;
5851 				path = NULL;
5852 
5853 				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
5854 				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
5855 				    &path);
5856 				if (sa_ret != IBMF_SUCCESS) {
5857 					IBTF_DPRINTF_L2(cmlog,
5858 					    "ibt_get_companion_port_gids: "
5859 					    "ibmf_saa_gid_to_pathrecords() "
5860 					    "returned error: %d ", sa_ret);
5861 					retval =
5862 					    ibcm_ibmf_analyze_error(sa_ret);
5863 					ibcm_dec_hca_acc_cnt(hcap);
5864 					goto get_comp_pgid_exit;
5865 				} else if ((npaths == 0) || (path == NULL)) {
5866 					IBTF_DPRINTF_L2(cmlog,
5867 					    "ibt_get_companion_port_gids: "
5868 					    "failed (%d) to get path records "
5869 					    "for the DGID (0x%llX) from SGID "
5870 					    "(0x%llX)", sa_ret, gid.gid_guid,
5871 					    sgid.gid_guid);
5872 					retval = IBT_GIDS_NOT_FOUND;
5873 					continue;
5874 				}
5875 
5876 				bzero(&nr_req, sizeof (sa_node_record_t));
5877 				nr_req.LID = path->DLID;	/* LID */
5878 
5879 				IBTF_DPRINTF_L3(cmlog,
5880 				    "ibt_get_companion_port_gids: "
5881 				    "Remote Node: LID = 0x%X", nr_req.LID);
5882 
5883 				/* Free SA_Access memory for path record. */
5884 				kmem_free(path, len);
5885 
5886 				IBTF_DPRINTF_L3(cmlog,
5887 				    "ibt_get_companion_port_gids: SAA Call: "
5888 				    "based on LID ");
5889 
5890 				retval = ibcm_get_node_rec(saa_handle, &nr_req,
5891 				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
5892 				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5893 					IBTF_DPRINTF_L2(cmlog,
5894 					    "ibt_get_companion_port_gids: "
5895 					    "failed (%d) to get Node records",
5896 					    retval);
5897 					continue;
5898 				} else if (retval != IBT_SUCCESS) {
5899 					IBTF_DPRINTF_L2(cmlog,
5900 					    "ibt_get_companion_port_gids: "
5901 					    "failed (%d) to get Node records",
5902 					    retval);
5903 					ibcm_dec_hca_acc_cnt(hcap);
5904 					goto get_comp_pgid_exit;
5905 				}
5906 
5907 				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
5908 				/* Note down HCA GUID info. */
5909 				h_guid = nr_resp->NodeInfo.NodeGUID;
5910 
5911 				IBTF_DPRINTF_L3(cmlog,
5912 				    "ibt_get_companion_port_gids: "
5913 				    "Remote HCA GUID: 0x%llX", h_guid);
5914 
5915 				IBCM_DUMP_NODE_REC(nr_resp);
5916 
5917 				kmem_free(res_p, len);
5918 			}
5919 
5920 			bzero(&nr_req, sizeof (sa_node_record_t));
5921 			if (h_guid != 0) {
5922 				nr_req.NodeInfo.NodeGUID = h_guid;
5923 				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
5924 			}
5925 
5926 			if (sysimg_guid != 0) {
5927 				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
5928 				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
5929 			}
5930 
5931 			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
5932 			    "SAA Call: CMASK= 0x%llX", c_mask);
5933 
5934 			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
5935 			    &res_p, &len);
5936 			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
5937 				IBTF_DPRINTF_L3(cmlog,
5938 				    "ibt_get_companion_port_gids: "
5939 				    "failed (%d) to get Node records", retval);
5940 				continue;
5941 			} else if (retval != IBT_SUCCESS) {
5942 				IBTF_DPRINTF_L2(cmlog,
5943 				    "ibt_get_companion_port_gids: Error: (%d) "
5944 				    "while getting Node records", retval);
5945 				ibcm_dec_hca_acc_cnt(hcap);
5946 				goto get_comp_pgid_exit;
5947 			}
5948 
5949 			num_rec = len/sizeof (sa_node_record_t);
5950 
5951 			/* We will be here, only if we found some NodeRec */
5952 			if (gid.gid_prefix && gid.gid_guid) {
5953 				nr_resp = (sa_node_record_t *)res_p;
5954 				for (l = 0; l < num_rec; l++, nr_resp++) {
5955 					pg = nr_resp->NodeInfo.PortGUID;
5956 					if (gid.gid_guid != pg)
5957 						count++;
5958 				}
5959 			} else {
5960 				count = num_rec;
5961 			}
5962 
5963 			if (count != 0) {
5964 				if (multi_sm_loop == 1) {
5965 					count += k;
5966 					t_gidp = kmem_zalloc(count *
5967 					    sizeof (ib_gid_t), KM_SLEEP);
5968 
5969 					if ((k != 0) && (gidp != NULL)) {
5970 						bcopy(gidp, t_gidp,
5971 						    k * sizeof (ib_gid_t));
5972 						kmem_free(gidp,
5973 						    k * sizeof (ib_gid_t));
5974 					}
5975 					gidp = t_gidp;
5976 				} else {
5977 					gidp = kmem_zalloc(count *
5978 					    sizeof (ib_gid_t), KM_SLEEP);
5979 				}
5980 				*num_gids_p = count;
5981 				*gids_p = gidp;
5982 
5983 				nr_resp = (sa_node_record_t *)res_p;
5984 				for (l = 0; l < num_rec; l++, nr_resp++) {
5985 					IBCM_DUMP_NODE_REC(nr_resp);
5986 
5987 					pg = nr_resp->NodeInfo.PortGUID;
5988 					IBTF_DPRINTF_L4(cmlog,
5989 					    "ibt_get_companion_port_gids: "
5990 					    "PortGID %llX", pg);
5991 
5992 					if (pg != gid.gid_guid) {
5993 						gidp[k].gid_prefix =
5994 						    sgid.gid_prefix;
5995 						gidp[k].gid_guid = pg;
5996 
5997 						IBTF_DPRINTF_L3(cmlog,
5998 						    "ibt_get_companion_pgids: "
5999 						    "GID[%d] = %llX:%llX", k,
6000 						    gidp[k].gid_prefix,
6001 						    gidp[k].gid_guid);
6002 
6003 						k++;
6004 						if (k == count)
6005 							break;
6006 					}
6007 				}
6008 				retval = IBT_SUCCESS;	/* done!. */
6009 				kmem_free(res_p, len);
6010 				ibcm_dec_hca_acc_cnt(hcap);
6011 				goto get_comp_pgid_exit;
6012 			} else {
6013 				IBTF_DPRINTF_L2(cmlog,
6014 				    "ibt_get_companion_port_gids: "
6015 				    "Companion PortGIDs not available");
6016 				retval = IBT_GIDS_NOT_FOUND;
6017 			}
6018 			/* Deallocate the memory for 'res_p'. */
6019 			kmem_free(res_p, len);
6020 
6021 			/*
6022 			 * If we are on MultiSM setup, then we need to lookout
6023 			 * from that subnet port too.
6024 			 */
6025 			if (multism) {
6026 				/* break if already searched both the subnet */
6027 				if (multi_sm_loop == 1)
6028 					break;
6029 
6030 				port = (j == 0) ? 1 : 0;
6031 				multi_sm_loop = 1;
6032 				goto get_comp_for_multism;
6033 			} else {
6034 				break;
6035 			}
6036 		}
6037 		ibcm_dec_hca_acc_cnt(hcap);
6038 
6039 		/*
6040 		 * We may be on dual HCA with dual SM configured system.  And
6041 		 * the input attr GID was visible from second HCA. So in order
6042 		 * to get the companion portgid we need to re-look from the
6043 		 * first HCA ports.
6044 		 */
6045 		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
6046 		    (multi_hca_loop != 1)) {
6047 			multi_hca_loop = 1;
6048 			goto get_comp_for_multihca;
6049 		}
6050 	}
6051 	if (*num_gids_p == 0)
6052 		retval = IBT_GIDS_NOT_FOUND;
6053 
6054 get_comp_pgid_exit:
6055 	if (guid_array)
6056 		ibt_free_hca_list(guid_array, num_hcas);
6057 
6058 	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
6059 		retval = IBT_SUCCESS;
6060 	}
6061 
6062 	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
6063 	    "Found %d GIDs", retval, *num_gids_p);
6064 
6065 	return (retval);
6066 }
6067 
6068 
6069 /* Routines for warlock */
6070 
6071 /* ARGSUSED */
6072 static void
6073 ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
6074 {
6075 	ibcm_join_mcg_tqarg_t	dummy_mcg;
6076 
6077 	dummy_mcg.func = ibcm_dummy_mcg_handler;
6078 
6079 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
6080 	    "dummy_mcg.func %p", dummy_mcg.func);
6081 }
6082 
6083 
6084 /* ARGSUSED */
6085 static void
6086 ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
6087 {
6088 	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;
6089 
6090 	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;
6091 
6092 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
6093 	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
6094 }
6095 
6096 
6097 /* ARGSUSED */
6098 static ibt_cm_status_t
6099 ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
6100     ibt_cm_ud_return_args_t *ret_args,
6101     void *priv_data, ibt_priv_data_len_t len)
6102 {
6103 	/*
6104 	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
6105 	 * points to this routine.
6106 	 */
6107 	ibcm_local_handler_t	p;
6108 	ibcm_ud_state_data_t	dummy_ud;
6109 
6110 	p.actual_cm_handler = ibcm_dummy_ud_handler;
6111 	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;
6112 
6113 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
6114 	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
6115 	    dummy_ud.ud_cm_handler);
6116 	/*
6117 	 * Call all routines that the client's callback routine could call.
6118 	 */
6119 
6120 	return (IBT_CM_ACCEPT);
6121 }
6122 
6123 /* ARGSUSED */
6124 static ibt_cm_status_t
6125 ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
6126     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6127 {
6128 	ibcm_state_data_t	dummy_rc;
6129 
6130 	dummy_rc.cm_handler = ibcm_dummy_rc_handler;
6131 
6132 	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
6133 	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
6134 	/*
6135 	 * Call all routines that the client's callback routine could call.
6136 	 */
6137 
6138 	return (IBT_CM_ACCEPT);
6139 }
6140