xref: /titanic_51/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_recv.c (revision d1a5c8385583011b8adaf259d3460c22595b4a66)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * This file implements the MAD receive logic in IBMF.
29  */
30 
31 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
32 #include <sys/ib/mgt/ibmf/ibmf_saa_impl.h>
33 
34 extern ibmf_state_t *ibmf_statep;
35 extern int ibmf_recv_wqes_per_port;
36 extern int ibmf_send_wqes_posted_per_qp;
37 extern int ibmf_recv_wqes_posted_per_qp;
38 
39 #define	IBMF_RECV_WR_ID_TO_ADDR(id, ptr)		 \
40 	(ptr) = (void *)(uintptr_t)((uint64_t)(id) & ~IBMF_RCV_CQE)
41 
42 #define	IBMF_QP0_NUM			0
43 #define	IBMF_QP1_NUM			1
44 #define	IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT	0x00000001
45 #define	IBMF_BM_MAD_ATTR_MOD_RESP		0x1
46 
47 /*
48  * Structure defintion of entries in the module names table
49  */
50 typedef struct _ibmf_mod_names_t {
51 	char			mod_name[8];
52 	ibmf_client_type_t	mgt_class;
53 } ibmf_mod_names_t;
54 
55 typedef struct _ibmf_mod_load_args_t {
56 	ibmf_ci_t		*cip;
57 	ibmf_recv_wqe_t		*recv_wqep;
58 	char			*modname;
59 	ibmf_client_type_t	ibmf_class;
60 } ibmf_mod_load_args_t;
61 
62 extern int ibmf_trace_level;
63 extern int ibmf_send_wqes_posted_per_qp;
64 extern int ibmf_recv_wqes_posted_per_qp;
65 
66 static void ibmf_i_do_recv_cb(void *taskq_arg);
67 static int ibmf_i_repost_recv_buffer(ibmf_ci_t *cip,
68     ibmf_recv_wqe_t *recv_wqep);
69 static int ibmf_i_get_class(ib_mad_hdr_t *madhdrp,
70     ibmf_qp_handle_t dest_ibmf_qp_handle, ib_lid_t slid,
71     ibmf_client_type_t *dest_classp);
72 static void ibmf_i_handle_non_rmpp(ibmf_client_t *clientp,
73     ibmf_msg_impl_t *msgimplp, uchar_t *mad);
74 static void ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class,
75     char *modname);
76 static void ibmf_module_load(void *taskq_arg);
77 static void ibmf_send_busy(ibmf_mod_load_args_t *modlargsp);
78 
79 #define	AGENT_CLASS(class)					\
80 	(((class & 0x000F0000) == IBMF_AGENT_ID))
81 #define	MANAGER_CLASS(class)				\
82 	(((class & 0x000F0000) == IBMF_MANAGER_ID))
83 #define	AGENT_MANAGER_CLASS(class)				\
84 	(((class & 0x000F0000) == IBMF_AGENT_MANAGER_ID))
85 #define	IS_MANDATORY_CLASS(class)			\
86 	((class == PERF_AGENT) || (class == BM_AGENT))
87 
88 char 	ibmf_client_modname[16];
89 
90 /*
91  * ibmf_i_handle_recv_completion():
92  *	Process the WQE from the RQ, obtain the management class of the
93  *	packet and retrieve the corresponding client context
94  */
95 void
96 ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp)
97 {
98 	int			ret;
99 	ibmf_client_type_t	class;
100 	ibmf_client_t		*clientp;
101 	ib_mad_hdr_t		*madhdrp;
102 	ibmf_recv_wqe_t		*recv_wqep;
103 	ibt_recv_wr_t		*rwrp;
104 	ibmf_qp_handle_t	ibmf_qp_handle;
105 	struct kmem_cache	*kmem_cachep;
106 	ibmf_alt_qp_t		*altqp;
107 
108 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
109 	    ibmf_i_handle_recv_completion_start, IBMF_TNF_TRACE, "",
110 	    "ibmf_i_handle_recv_completion() enter, cip = %p, wcp = %p\n",
111 	    tnf_opaque, cip, cip, tnf_opaque, wcp, wcp);
112 
113 	mutex_enter(&cip->ci_ud_dest_list_mutex);
114 	if (cip->ci_ud_dest_list_count < IBMF_UD_DEST_LO_WATER_MARK) {
115 		ret = ibmf_ud_dest_tq_disp(cip);
116 		if (ret == 0) {
117 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
118 			    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
119 			    "", "ibmf_i_handle_recv_completion(): %s\n",
120 			    tnf_string, msg, "taskq dispatch of ud_dest "
121 			    "population thread failed");
122 		}
123 	}
124 	mutex_exit(&cip->ci_ud_dest_list_mutex);
125 
126 	ASSERT(IBMF_IS_RECV_WR_ID(wcp->wc_id));
127 	IBMF_RECV_WR_ID_TO_ADDR(wcp->wc_id, recv_wqep);
128 
129 	rwrp = &recv_wqep->recv_wr;
130 
131 	/* Retrieve the QP handle from the receive WQE context */
132 	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
133 
134 	/* Get the WQE kmem cache pointer based on the QP type */
135 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
136 		kmem_cachep = cip->ci_recv_wqes_cache;
137 	} else {
138 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
139 		kmem_cachep = altqp->isq_recv_wqes_cache;
140 	}
141 
142 	/*
143 	 * if the wqe is being flushed due to shutting down of the qp, free
144 	 * the wqe and return.
145 	 */
146 	if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
147 		kmem_free(rwrp->wr_sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
148 		    sizeof (ibt_wr_ds_t));
149 		kmem_cache_free(kmem_cachep, recv_wqep);
150 		mutex_enter(&cip->ci_mutex);
151 		IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
152 		mutex_exit(&cip->ci_mutex);
153 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
154 			mutex_enter(&cip->ci_mutex);
155 			cip->ci_wqes_alloced--;
156 			if (cip->ci_wqes_alloced == 0)
157 				cv_signal(&cip->ci_wqes_cv);
158 			mutex_exit(&cip->ci_mutex);
159 		} else {
160 			mutex_enter(&altqp->isq_mutex);
161 			altqp->isq_wqes_alloced--;
162 			if (altqp->isq_wqes_alloced == 0)
163 				cv_signal(&altqp->isq_wqes_cv);
164 			mutex_exit(&altqp->isq_mutex);
165 		}
166 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
167 		    ibmf_i_handle_recv_completion, IBMF_TNF_TRACE,
168 		    "", "ibmf_i_handle_recv_completion(): %s\n",
169 		    tnf_string, msg, "recv wqe flushed");
170 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
171 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
172 		    "", "ibmf_i_handle_recv_completion() exit\n");
173 		return;
174 	}
175 
176 	/*
177 	 * Dynamic Posting of WQEs to the Receive Queue (RQ) of the QP:
178 	 * If the number of RQ WQEs posted to the QP drops below half
179 	 * the initial number of RQ WQEs posted to the QP, then, one additional
180 	 * WQE is posted to the RQ of the QP while processing this CQE.
181 	 */
182 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
183 		ibmf_qp_t *qpp = recv_wqep->recv_qpp;
184 
185 		mutex_enter(&qpp->iq_mutex);
186 		qpp->iq_rwqes_posted--;
187 		if (qpp->iq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
188 			mutex_exit(&qpp->iq_mutex);
189 
190 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
191 			    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
192 			    "ibmf_i_handle_recv_compl(): %s, "
193 			    "QP# = %d\n", tnf_string, msg,
194 			    "Posting more RQ WQEs",
195 			    tnf_int, qpnum, qpp->iq_qp_num);
196 
197 			/* Post an additional WQE to the RQ */
198 			ret = ibmf_i_post_recv_buffer(cip, qpp,
199 			    B_FALSE, ibmf_qp_handle);
200 			if (ret != IBMF_SUCCESS) {
201 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
202 				    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
203 				    "", "ibmf_i_handle_recv_compl(): %s, "
204 				    "status = %d\n", tnf_string, msg,
205 				    "ibmf_i_post_recv_buffer() failed",
206 				    tnf_int, status, ret);
207 			}
208 
209 			mutex_enter(&qpp->iq_mutex);
210 		}
211 		mutex_exit(&qpp->iq_mutex);
212 	} else {
213 		mutex_enter(&altqp->isq_mutex);
214 		altqp->isq_rwqes_posted--;
215 		if (altqp->isq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
216 			mutex_exit(&altqp->isq_mutex);
217 
218 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
219 			    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
220 			    "ibmf_i_handle_recv_compl(): %s, "
221 			    "QP# = %d\n", tnf_string, msg,
222 			    "Posting more RQ WQEs",
223 			    tnf_int, qpnum, altqp->isq_qpn);
224 
225 			/* Post an additional WQE to the RQ */
226 			ret = ibmf_i_post_recv_buffer(cip, NULL,
227 			    B_FALSE, ibmf_qp_handle);
228 			if (ret != IBMF_SUCCESS) {
229 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
230 				    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
231 				    "", "ibmf_i_handle_recv_compl(): %s, "
232 				    "status = %d\n", tnf_string, msg,
233 				    "ibmf_i_post_recv_buffer() failed",
234 				    tnf_int, status, ret);
235 			}
236 
237 			mutex_enter(&altqp->isq_mutex);
238 		}
239 		mutex_exit(&altqp->isq_mutex);
240 	}
241 
242 	/*
243 	 * for all other completion errors, repost the wqe, and if that
244 	 * fails, free the wqe and return.
245 	 */
246 	if (wcp->wc_status != IBT_WC_SUCCESS) {
247 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
248 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
249 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
250 		    "", "ibmf_i_handle_recv_completion(): %s, wc_status = %d\n",
251 		    tnf_string, msg, "bad completion status received",
252 		    tnf_uint, wc_status, wcp->wc_status);
253 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
254 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
255 		    "", "ibmf_i_handle_recv_completion() exit\n");
256 		return;
257 	}
258 
259 	/* find the client corresponding to this recv cqe */
260 	madhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
261 	    sizeof (ib_grh_t));
262 
263 	/* drop packet if MAD Base Version is not as expected */
264 	if (madhdrp->BaseVersion != MAD_CLASS_BASE_VERS_1) {
265 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
266 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
267 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
268 		    "", "ibmf_i_handle_recv_completion(): %s\n",
269 		    tnf_string, msg, "bad MAD version");
270 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
271 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
272 		    "", "ibmf_i_handle_recv_completion() exit\n");
273 		return;
274 	}
275 
276 	if (ibmf_i_get_class(madhdrp, recv_wqep->recv_ibmf_qp_handle,
277 	    wcp->wc_slid, &class) != IBMF_SUCCESS) {
278 		/* bad class & type? */
279 #ifdef DEBUG
280 		ibmf_i_dump_wcp(cip, wcp, recv_wqep);
281 #endif
282 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
283 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
284 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
285 		    "", "ibmf_i_handle_recv_completion(): %s\n",
286 		    tnf_string, msg, "bad class/type");
287 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
288 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
289 		    "", "ibmf_i_handle_recv_completion() exit\n");
290 		return;
291 	}
292 
293 	ret = ibmf_i_lookup_client_by_mgmt_class(cip, recv_wqep->recv_port_num,
294 	    class, &clientp);
295 	if (ret == IBMF_SUCCESS) {
296 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
297 		recv_wqep->recv_client = clientp;
298 		recv_wqep->recv_wc = *wcp; /* struct copy */
299 
300 		/*
301 		 * Increment the kstats for the number of active receiver side
302 		 * callbacks
303 		 */
304 		mutex_enter(&clientp->ic_kstat_mutex);
305 		IBMF_ADD32_KSTATS(clientp, recv_cb_active, 1);
306 		mutex_exit(&clientp->ic_kstat_mutex);
307 
308 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
309 			/* Dispatch the taskq thread to do further processing */
310 			ret = taskq_dispatch(clientp->ic_recv_taskq,
311 			    ibmf_i_do_recv_cb, recv_wqep, TQ_NOSLEEP);
312 			if (ret == 0) {
313 				mutex_enter(&clientp->ic_kstat_mutex);
314 				IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
315 				mutex_exit(&clientp->ic_kstat_mutex);
316 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
317 				    ibmf_i_handle_recv_completion_err,
318 				    IBMF_TNF_ERROR, "",
319 				    "ibmf_i_handle_recv_completion(): %s\n",
320 				    tnf_string, msg, "dispatch failed");
321 				(void) ibmf_i_repost_recv_buffer(cip,
322 				    recv_wqep);
323 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
324 				    ibmf_i_handle_recv_completion_end,
325 				    IBMF_TNF_TRACE, "",
326 				    "ibmf_i_handle_recv_completion() exit\n");
327 				return;
328 			}
329 		} else {
330 			ibmf_i_do_recv_cb((void *)recv_wqep);
331 		}
332 
333 		/*
334 		 * Decrement the kstats for the number of active receiver side
335 		 * callbacks
336 		 */
337 		mutex_enter(&clientp->ic_kstat_mutex);
338 		IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
339 		mutex_exit(&clientp->ic_kstat_mutex);
340 
341 	} else {
342 		/*
343 		 * A client has not registered to receive MADs of this
344 		 * management class. IBMF must attempt to load the
345 		 * client and request a resend of the request MAD.
346 		 * The name of the client MAD is derived using a
347 		 * convention described in PSARC case 2003/753.
348 		 */
349 
350 		ibmf_mod_load_args_t	*modlargsp;
351 
352 		/*
353 		 * HCA driver handles the Performance management
354 		 * class MAD's. It registers with the IBMF during early
355 		 * boot and unregisters during detach and during
356 		 * HCA unconfigure operation. We come here
357 		 * 1. Before HCA registers with IBMF
358 		 * 	Drop the MAD. Since this is a UD MAD,
359 		 *	sender will resend the request
360 		 * 2. After HCA unregistered with IBMF during DR operation.
361 		 *	Since HCA is going away, we can safely drop the PMA
362 		 *	MAD's here.
363 		 * Solaris does not support BM_AGENT and so drop the BM MAD's
364 		 */
365 		if ((class == PERF_AGENT) || (class == BM_AGENT)) {
366 			(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
367 			return;
368 		}
369 
370 		recv_wqep->recv_wc = *wcp; /* struct copy */
371 
372 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L4,
373 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR, "",
374 		    "ibmf_i_handle_recv_completion(): %s, port = %d, "
375 		    "class = 0x%x\n",
376 		    tnf_string, msg, "no client registered", tnf_uint, port,
377 		    recv_wqep->recv_port_num, tnf_opaque, class, class);
378 
379 		/* Construct the IBMF client module name */
380 		ibmf_get_mod_name(madhdrp->MgmtClass, class,
381 		    ibmf_client_modname);
382 
383 		/* Load the module using a taskq thread */
384 		modlargsp = (ibmf_mod_load_args_t *)kmem_zalloc(
385 		    sizeof (ibmf_mod_load_args_t), KM_NOSLEEP);
386 		if (modlargsp != NULL) {
387 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*modlargsp))
388 			modlargsp->cip		= cip;
389 			modlargsp->recv_wqep	= recv_wqep;
390 			modlargsp->modname	= ibmf_client_modname;
391 			modlargsp->ibmf_class	= class;
392 			ret = taskq_dispatch(ibmf_statep->ibmf_taskq,
393 			    ibmf_module_load, modlargsp, TQ_NOSLEEP);
394 			if (ret == 0) {
395 				kmem_free(modlargsp,
396 				    sizeof (ibmf_mod_load_args_t));
397 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
398 				    ibmf_i_handle_recv_completion_error,
399 				    IBMF_TNF_TRACE, "",
400 				    "ibmf_i_handle_recv_completion(): Failed "
401 				    "to dispatch ibmf_module_load taskq\n");
402 				(void) ibmf_i_repost_recv_buffer(cip,
403 				    recv_wqep);
404 			}
405 		} else {
406 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
407 			    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
408 			    "", "ibmf_i_handle_recv_completion(): "
409 			    "Failed to allocate memory for modlargs\n");
410 			(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
411 		}
412 	}
413 
414 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
415 	    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE, "",
416 	    "ibmf_i_handle_recv_completion() exit\n");
417 }
418 
419 /*
420  * ibmf_i_do_recv_cb():
421  *	This routine does the following:
422  *	o looks for a message in the client's message list
423  *	o creates a new message if one does not exist for unsolicited data
424  *	o invoke routines to do specific handling for rmpp and non-rmpp cases
425  *	o on a failure, the receive WQE is reposted to the RQ
426  */
427 static void
428 ibmf_i_do_recv_cb(void *taskq_arg)
429 {
430 	ibt_wc_t		*wcp;
431 	ibmf_msg_impl_t		*msgimplp;
432 	ibmf_client_t		*clientp;
433 	ibmf_addr_info_t	addrinfo;
434 	ibmf_recv_wqe_t		*recv_wqep;
435 	ib_grh_t		*ib_grh;
436 	boolean_t		grhpresent;
437 	ibmf_qp_handle_t	ibmf_qp_handle;
438 	ib_mad_hdr_t		*mad_hdr;
439 	ibmf_rmpp_hdr_t		*rmpp_hdr;
440 	ibmf_alt_qp_t		*qpp;
441 	ib_gid_t		gid;
442 	ib_lid_t		lid;
443 	int			msg_trans_state_flags, msg_flags;
444 	uint_t			ref_cnt;
445 	timeout_id_t		msg_rp_unset_id, msg_tr_unset_id;
446 	timeout_id_t		msg_rp_set_id, msg_tr_set_id;
447 	int			status;
448 	saa_port_t		*saa_portp;
449 
450 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
451 
452 	/* The taskq_arg argument is a pointer to the receive WQE context */
453 	recv_wqep = taskq_arg;
454 
455 	/* Retrieve the QP handle from the receive WQE context */
456 	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
457 
458 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
459 	    ibmf_i_do_recv_cb_start, IBMF_TNF_TRACE, "",
460 	    "ibmf_i_do_recv_cb() enter, recv_wqep = %p\n",
461 	    tnf_opaque, recv_wqep, recv_wqep);
462 
463 	/* Retrieve the client context pointer from the receive WQE context */
464 	clientp = recv_wqep->recv_client;
465 
466 	/* Get a pointer to the IBT work completion structure */
467 	wcp = &recv_wqep->recv_wc;
468 
469 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wcp))
470 
471 	/*
472 	 * Identify the port by the  LID or GID depending on whether the
473 	 * Global Route Header is valid or not
474 	 */
475 	if (wcp->wc_flags & IBT_WC_GRH_PRESENT) {
476 		grhpresent = B_TRUE;
477 		ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
478 		gid.gid_prefix	= b2h64(ib_grh->SGID.gid_prefix);
479 		gid.gid_guid 	= b2h64(ib_grh->SGID.gid_guid);
480 	} else {
481 		grhpresent = B_FALSE;
482 		lid = wcp->wc_slid;
483 	}
484 
485 	/* Get a pointer to the MAD header */
486 	mad_hdr = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
487 	    sizeof (ib_grh_t));
488 
489 	/* Get a pointer to the RMPP header */
490 	rmpp_hdr = (ibmf_rmpp_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
491 	    sizeof (ib_grh_t) + sizeof (ib_mad_hdr_t));
492 
493 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
494 	    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
495 	    "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ", class = 0x%x, "
496 	    "attrID = 0x%x, lid = 0x%x\n",
497 	    tnf_string, msg, "Received MAD", tnf_opaque, tid,
498 	    b2h64(mad_hdr->TransactionID), tnf_opaque, class,
499 	    mad_hdr->MgmtClass, tnf_opaque, attr_id,
500 	    b2h16(mad_hdr->AttributeID), tnf_opaque, remote_lid, lid);
501 
502 	/*
503 	 * Look for the matching message in the client's message list
504 	 * NOTE: if the message is found, the message reference count will
505 	 * have been increased by 1.
506 	 */
507 	msgimplp = ibmf_i_find_msg(clientp, b2h64(mad_hdr->TransactionID),
508 	    mad_hdr->MgmtClass, mad_hdr->R_Method, lid, &gid, grhpresent,
509 	    rmpp_hdr, IBMF_REG_MSG_LIST);
510 
511 	/*
512 	 * If the message is not on the regular message list, search
513 	 * for it in the termination message list.
514 	 */
515 	if (msgimplp == NULL) {
516 		msgimplp = ibmf_i_find_msg(clientp,
517 		    b2h64(mad_hdr->TransactionID), mad_hdr->MgmtClass,
518 		    mad_hdr->R_Method, lid, &gid, grhpresent, rmpp_hdr,
519 		    IBMF_TERM_MSG_LIST);
520 	}
521 
522 	if (msgimplp != NULL) {
523 
524 		/* if this packet is from the SA */
525 		if (clientp->ic_client_info.client_class == SUBN_ADM_MANAGER) {
526 
527 			/*
528 			 * ibmf_saa's callback arg is its saa_portp;
529 			 * take advantage of this fact to quickly update the
530 			 * port's SA uptime.  ibmf_saa uses the up time to
531 			 * determine if the SA is still alive
532 			 */
533 			saa_portp = clientp->ic_async_cb_arg;
534 
535 			/* update the SA uptime */
536 			mutex_enter(&saa_portp->saa_pt_mutex);
537 
538 			saa_portp->saa_pt_sa_uptime = gethrtime();
539 
540 			mutex_exit(&saa_portp->saa_pt_mutex);
541 		}
542 
543 		mutex_enter(&msgimplp->im_mutex);
544 
545 		/*
546 		 * Clear timers for transactions of solicited incoming packets
547 		 */
548 		if (msgimplp->im_rp_timeout_id != 0) {
549 			ibmf_i_unset_timer(msgimplp, IBMF_RESP_TIMER);
550 		}
551 
552 		/*
553 		 * If a MAD is received in the middle of an RMPP receive
554 		 * transaction, and the MAD's RMPPFlags.Active bit is 0,
555 		 * drop the MAD
556 		 */
557 		if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
558 		    (msgimplp->im_flags & IBMF_MSG_FLAGS_RECV_RMPP) &&
559 		    ((rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE) == 0)) {
560 			mutex_exit(&msgimplp->im_mutex);
561 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
562 			    recv_wqep);
563 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
564 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
565 			    "ibmf_i_do_recv_cb(): %s, msg = %p\n",
566 			    tnf_string, msg,
567 			    "Non-RMPP MAD received in RMPP transaction, "
568 			    "dropping MAD", tnf_opaque, msgimplp, msgimplp);
569 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
570 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
571 			    "ibmf_i_do_recv_cb() exit\n");
572 			return;
573 		}
574 
575 		/*
576 		 * If the message has been marked unitialized or done
577 		 * release the message mutex and return
578 		 */
579 		if ((msgimplp->im_trans_state_flags &
580 		    IBMF_TRANS_STATE_FLAG_DONE) ||
581 		    (msgimplp->im_trans_state_flags &
582 		    IBMF_TRANS_STATE_FLAG_UNINIT)) {
583 			IBMF_MSG_DECR_REFCNT(msgimplp);
584 			msg_trans_state_flags = msgimplp->im_trans_state_flags;
585 			msg_flags = msgimplp->im_flags;
586 			ref_cnt = msgimplp->im_ref_count;
587 			mutex_exit(&msgimplp->im_mutex);
588 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
589 			    recv_wqep);
590 			/*
591 			 * This thread may notify the client only if the
592 			 * transaction is done, the message has been removed
593 			 * from the client's message list, and the message
594 			 * reference count is 0.
595 			 * If the transaction is done, and the message reference
596 			 * count = 0, there is still a possibility that a
597 			 * packet could arrive for the message and its reference
598 			 * count increased if the message is still on the list.
599 			 * If the message is still on the list, it will be
600 			 * removed by a call to ibmf_i_client_rem_msg() at
601 			 * the completion point of the transaction.
602 			 * So, the reference count should be checked after the
603 			 * message has been removed.
604 			 */
605 			if ((msg_trans_state_flags &
606 			    IBMF_TRANS_STATE_FLAG_DONE) &&
607 			    !(msg_flags & IBMF_MSG_FLAGS_ON_LIST) &&
608 			    (ref_cnt == 0)) {
609 
610 				ibmf_i_notify_sequence(clientp, msgimplp,
611 				    msg_flags);
612 
613 			}
614 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
615 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
616 			    "ibmf_i_do_recv_cb(): %s, msg = %p\n",
617 			    tnf_string, msg,
618 			    "Message already marked for removal, dropping MAD",
619 			    tnf_opaque, msgimplp, msgimplp);
620 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
621 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
622 			    "ibmf_i_do_recv_cb() exit\n");
623 			return;
624 		}
625 	} else {
626 		/* unsolicited message packet */
627 
628 		/*
629 		 * Check if the client context, the alternate QP context
630 		 * (if not the default QP), and the incoming MAD support RMPP
631 		 */
632 		if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
633 		    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
634 
635 			/* Only unsolicited packets should be data seg 1 */
636 			if ((rmpp_hdr->rmpp_flags &
637 			    IBMF_RMPP_FLAGS_FIRST_PKT) == 0) {
638 				(void) ibmf_i_repost_recv_buffer(
639 				    clientp->ic_myci, recv_wqep);
640 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
641 				    ibmf_i_do_recv_cb_error, IBMF_TNF_TRACE, "",
642 				    "ibmf_i_do_recv_cb(): %s\n",
643 				    tnf_string, msg,
644 				    "unsolicited rmpp packet not first packet");
645 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
646 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
647 				    "ibmf_i_do_recv_cb() exit\n");
648 				return;
649 			}
650 		}
651 
652 		/*
653 		 * Before we alloc a message context, check to see if
654 		 * a callback has been registered with the client
655 		 * for this unsolicited message.
656 		 * If one has been registered, increment the recvs active
657 		 * count to get the teardown routine to wait until
658 		 * this callback is complete.
659 		 */
660 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
661 
662 			mutex_enter(&clientp->ic_mutex);
663 			if (clientp->ic_recv_cb == NULL) {
664 				mutex_exit(&clientp->ic_mutex);
665 				(void) ibmf_i_repost_recv_buffer(
666 				    clientp->ic_myci, recv_wqep);
667 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
668 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
669 				    "ibmf_i_do_recv_cb(): %s, class %x\n",
670 				    tnf_string, msg,
671 				    "ibmf_tear_down_recv_cb already occurred",
672 				    tnf_opaque, class,
673 				    clientp->ic_client_info.client_class);
674 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
675 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
676 				    "ibmf_i_do_recv_cb() exit\n");
677 				return;
678 			}
679 			IBMF_RECV_CB_SETUP(clientp);
680 			mutex_exit(&clientp->ic_mutex);
681 		} else {
682 			qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
683 
684 			mutex_enter(&qpp->isq_mutex);
685 			if (qpp->isq_recv_cb == NULL) {
686 				mutex_exit(&qpp->isq_mutex);
687 				(void) ibmf_i_repost_recv_buffer(
688 				    clientp->ic_myci, recv_wqep);
689 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
690 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
691 				    "ibmf_i_do_recv_cb(): %s, class %x\n",
692 				    tnf_string, msg,
693 				    "ibmf_tear_down_recv_cb already occurred",
694 				    tnf_opaque, class,
695 				    clientp->ic_client_info.client_class);
696 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
697 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
698 				    "ibmf_i_do_recv_cb() exit\n");
699 				return;
700 			}
701 			IBMF_ALT_RECV_CB_SETUP(qpp);
702 			mutex_exit(&qpp->isq_mutex);
703 		}
704 
705 		/*
706 		 * Allocate a message context
707 		 */
708 		msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(
709 		    sizeof (ibmf_msg_impl_t), KM_NOSLEEP);
710 
711 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
712 
713 		/* If we cannot allocate memory, drop the packet and clean up */
714 		if (msgimplp == NULL) {
715 			if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
716 				mutex_enter(&clientp->ic_mutex);
717 				IBMF_RECV_CB_CLEANUP(clientp);
718 				mutex_exit(&clientp->ic_mutex);
719 			} else {
720 				qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
721 				mutex_enter(&qpp->isq_mutex);
722 				IBMF_ALT_RECV_CB_CLEANUP(qpp);
723 				mutex_exit(&qpp->isq_mutex);
724 			}
725 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
726 			    recv_wqep);
727 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
728 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
729 			    "ibmf_i_do_recv_cb(): %s\n", tnf_string, msg,
730 			    "mem allocation failure");
731 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
732 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
733 			    "ibmf_i_do_recv_cb() exit\n");
734 			return;
735 		}
736 
737 		/* Get the port's base LID if it's not in the client context */
738 		if ((clientp->ic_base_lid == 0) &&
739 		    (clientp->ic_qp->iq_qp_num != 0)) {
740 			(void) ibt_get_port_state_byguid(
741 			    clientp->ic_client_info.ci_guid,
742 			    clientp->ic_client_info.port_num, NULL,
743 			    &clientp->ic_base_lid);
744 			if (clientp->ic_base_lid == 0) {
745 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
746 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
747 				    "ibmf_i_do_recv_cb(): %s\n",
748 				    tnf_string, msg, "base_lid is undefined");
749 			}
750 		}
751 
752 		/* Set up address information */
753 		addrinfo.ia_local_lid = clientp->ic_base_lid +
754 		    wcp->wc_path_bits;
755 		addrinfo.ia_remote_lid = wcp->wc_slid;
756 		addrinfo.ia_remote_qno = wcp->wc_qpn;
757 
758 		/* Get the pkey, including the correct partiton membership */
759 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
760 			if (recv_wqep->recv_qpp->iq_qp_num == IBMF_QP1_NUM) {
761 
762 				/*
763 				 * here too we expect the pkey index in the work
764 				 * completion belongs to a pkey in the pkey
765 				 * table
766 				 */
767 				status = ibmf_i_pkey_ix_to_key(
768 				    clientp->ic_myci, recv_wqep->recv_port_num,
769 				    wcp->wc_pkey_ix, &addrinfo.ia_p_key);
770 				if (status != IBMF_SUCCESS) {
771 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
772 					    DPRINT_L1, ibmf_i_do_recv_cb_error,
773 					    IBMF_TNF_ERROR, "",
774 					    "ibmf_i_do_recv_cb(): "
775 					    "get_pkey failed for ix %d,"
776 					    "status = %d\n", tnf_uint,
777 					    pkeyix, wcp->wc_pkey_ix, tnf_uint,
778 					    ibmf_status, status);
779 					mutex_enter(&clientp->ic_mutex);
780 					IBMF_RECV_CB_CLEANUP(clientp);
781 					mutex_exit(&clientp->ic_mutex);
782 					(void) ibmf_i_repost_recv_buffer(
783 					    clientp->ic_myci, recv_wqep);
784 					mutex_destroy(&msgimplp->im_mutex);
785 					cv_destroy(&msgimplp->im_trans_cv);
786 					kmem_free(msgimplp,
787 					    sizeof (ibmf_msg_impl_t));
788 					IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
789 					    ibmf_i_do_recv_cb_end,
790 					    IBMF_TNF_TRACE, "",
791 					    "ibmf_i_do_recv_cb() exit\n");
792 					return;
793 				}
794 			}
795 			addrinfo.ia_q_key = IBMF_MGMT_Q_KEY;
796 		} else {
797 			qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
798 
799 			/* For alternate QPs, the pkey is in the QP context */
800 			mutex_enter(&qpp->isq_mutex);
801 			addrinfo.ia_p_key = qpp->isq_pkey;
802 			addrinfo.ia_q_key = qpp->isq_qkey;
803 			mutex_exit(&qpp->isq_mutex);
804 		}
805 
806 		addrinfo.ia_service_level = wcp->wc_sl;
807 		msgimplp->im_local_addr = addrinfo;
808 
809 		/* Initialize the message context */
810 		cv_init(&msgimplp->im_trans_cv, NULL, CV_DRIVER, NULL);
811 		mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
812 		msgimplp->im_client = clientp;
813 		msgimplp->im_qp_hdl = ibmf_qp_handle;
814 		msgimplp->im_flags = 0;
815 		msgimplp->im_unsolicited = B_TRUE;
816 		msgimplp->im_tid = b2h64(mad_hdr->TransactionID);
817 		msgimplp->im_mgt_class = mad_hdr->MgmtClass;
818 		msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
819 		msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
820 		msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
821 		msgimplp->im_retrans.retrans_trans_to =
822 		    IBMF_RETRANS_DEF_TRANS_TO;
823 		msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
824 		msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
825 		IBMF_MSG_INCR_REFCNT(msgimplp);
826 		msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
827 
828 		/*
829 		 * Initialize (and possibly allocate) the IBT UD destination
830 		 * address handle.
831 		 */
832 		status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
833 		    &msgimplp->im_ud_dest, B_FALSE);
834 		if (status != IBMF_SUCCESS) {
835 			if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
836 				mutex_enter(&clientp->ic_mutex);
837 				IBMF_RECV_CB_CLEANUP(clientp);
838 				mutex_exit(&clientp->ic_mutex);
839 			} else {
840 				qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
841 				mutex_enter(&qpp->isq_mutex);
842 				IBMF_ALT_RECV_CB_CLEANUP(qpp);
843 				mutex_exit(&qpp->isq_mutex);
844 			}
845 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
846 			    recv_wqep);
847 			mutex_destroy(&msgimplp->im_mutex);
848 			cv_destroy(&msgimplp->im_trans_cv);
849 			kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
850 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
851 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
852 			    "ibmf_i_do_recv_cb(): %s, status = %d\n",
853 			    tnf_string, msg, "alloc ah failed", tnf_uint,
854 			    ibmf_status, status);
855 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
856 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
857 			    "ibmf_i_do_recv_cb() exit\n");
858 			return;
859 		}
860 
861 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp))
862 
863 		/* add message to client's list */
864 		ibmf_i_client_add_msg(clientp, msgimplp);
865 
866 		mutex_enter(&msgimplp->im_mutex);
867 
868 		/* no one should have touched our state */
869 		ASSERT(msgimplp->im_trans_state_flags ==
870 		    IBMF_TRANS_STATE_FLAG_UNINIT);
871 
872 		/* transition out of uninit state */
873 		msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
874 	}
875 
876 	/* fill in the grh with the contents of the recv wqe */
877 	if (grhpresent == B_TRUE) {
878 		uint32_t tmp32;
879 
880 		msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_GLOBAL_ADDRESS;
881 		ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
882 		msgimplp->im_global_addr.ig_sender_gid.gid_prefix =
883 		    b2h64(ib_grh->SGID.gid_prefix);
884 		msgimplp->im_global_addr.ig_sender_gid.gid_guid =
885 		    b2h64(ib_grh->SGID.gid_guid);
886 		msgimplp->im_global_addr.ig_recver_gid.gid_prefix =
887 		    b2h64(ib_grh->DGID.gid_prefix);
888 		msgimplp->im_global_addr.ig_recver_gid.gid_guid =
889 		    b2h64(ib_grh->DGID.gid_guid);
890 		/*
891 		 * swap to get byte order back to wire format on little endian
892 		 * systems so we can apply the GRH masks
893 		 */
894 		tmp32 = b2h32(ib_grh->IPVer_TC_Flow);
895 		msgimplp->im_global_addr.ig_flow_label =
896 		    tmp32 & IB_GRH_FLOW_LABEL_MASK;
897 		msgimplp->im_global_addr.ig_tclass =
898 		    (tmp32 & IB_GRH_TCLASS_MASK) >> 20;
899 		msgimplp->im_global_addr.ig_hop_limit =
900 		    ib_grh->HopLmt;
901 	}
902 
903 	/* Perform RMPP or non-RMPP processing */
904 	if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
905 	    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
906 		IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
907 		    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
908 		    "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ","
909 		    "flags = 0x%x rmpp_type = %d, rmpp_segnum = %d\n",
910 		    tnf_string, msg, "Handling rmpp MAD",
911 		    tnf_opaque, tid, b2h64(mad_hdr->TransactionID),
912 		    tnf_opaque, flags, rmpp_hdr->rmpp_flags,
913 		    tnf_opaque, type, rmpp_hdr->rmpp_type,
914 		    tnf_opaque, segment, b2h32(rmpp_hdr->rmpp_segnum));
915 
916 		/*
917 		 * Set the RMPP state to "receiver active" on the first packet
918 		 * of all RMPP message, and initialize the
919 		 * the expected segment to 1.
920 		 */
921 		if ((msgimplp->im_rmpp_ctx.rmpp_state ==
922 		    IBMF_RMPP_STATE_UNDEFINED) &&
923 		    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_FIRST_PKT)) {
924 
925 			msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
926 
927 			if (rmpp_hdr->rmpp_type == IBMF_RMPP_TYPE_DATA) {
928 				msgimplp->im_rmpp_ctx.rmpp_state =
929 				    IBMF_RMPP_STATE_RECEVR_ACTIVE;
930 
931 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
932 				    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
933 				    "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
934 				    tnf_string, msg, "first RMPP pkt received",
935 				    tnf_opaque, msgimplp, msgimplp);
936 			}
937 
938 			msgimplp->im_rmpp_ctx.rmpp_es = 1;
939 			msgimplp->im_rmpp_ctx.rmpp_wl = 1;
940 			msgimplp->im_rmpp_ctx.rmpp_wf = 1;
941 
942 			/* set double-sided transfer flag for certain methods */
943 			if (mad_hdr->R_Method == SA_SUBN_ADM_GET_MULTI)
944 				msgimplp->im_rmpp_ctx.rmpp_is_ds = B_TRUE;
945 			else	msgimplp->im_rmpp_ctx.rmpp_is_ds = B_FALSE;
946 
947 			msgimplp->im_trans_state_flags |=
948 			    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
949 		}
950 
951 		if (rmpp_hdr->rmpp_resp_time != IBMF_RMPP_DEFAULT_RRESPT) {
952 			msgimplp->im_retrans.retrans_rtv =
953 			    1 << rmpp_hdr->rmpp_resp_time;
954 
955 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
956 			    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
957 			    "ibmf_i_do_recv_cb: %s, resp_time %d\n",
958 			    tnf_string, msg, "new resp time received",
959 			    tnf_uint, resp_time, rmpp_hdr->rmpp_resp_time);
960 		}
961 
962 		ibmf_i_handle_rmpp(clientp, ibmf_qp_handle, msgimplp,
963 		    (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
964 		    sizeof (ib_grh_t)));
965 	} else {
966 
967 		msgimplp->im_trans_state_flags |=
968 		    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
969 
970 		ibmf_i_handle_non_rmpp(clientp, msgimplp,
971 		    (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
972 		    sizeof (ib_grh_t)));
973 	}
974 
975 	msg_rp_unset_id = msg_tr_unset_id = msg_rp_set_id = msg_tr_set_id = 0;
976 
977 	/*
978 	 * Save the transaction state flags and the timeout IDs
979 	 * before releasing the mutex as they may be changed after that.
980 	 */
981 	msg_trans_state_flags = msgimplp->im_trans_state_flags;
982 	msg_flags = msgimplp->im_flags;
983 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
984 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
985 	msgimplp->im_rp_unset_timeout_id = 0;
986 	msgimplp->im_tr_unset_timeout_id = 0;
987 
988 	/*
989 	 * Decrement the message reference count
990 	 * This count was incremented either when the message was found
991 	 * on the client's message list (ibmf_i_find_msg()) or when
992 	 * a new message was created for unsolicited data
993 	 */
994 	IBMF_MSG_DECR_REFCNT(msgimplp);
995 
996 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
997 		if (msgimplp->im_rp_timeout_id != 0) {
998 			msg_rp_set_id = msgimplp->im_rp_timeout_id;
999 			msgimplp->im_rp_timeout_id = 0;
1000 		}
1001 		if (msgimplp->im_tr_timeout_id != 0) {
1002 			msg_tr_set_id = msgimplp->im_tr_timeout_id;
1003 			msgimplp->im_tr_timeout_id = 0;
1004 		}
1005 	}
1006 
1007 	mutex_exit(&msgimplp->im_mutex);
1008 
1009 	/*
1010 	 * Call untimeout() after releasing the lock because the
1011 	 * lock is acquired in the timeout handler as well. Untimeout()
1012 	 * does not return until the timeout handler has run, if it already
1013 	 * fired, which would result in a deadlock if we did not first
1014 	 * release the im_mutex lock.
1015 	 */
1016 	if (msg_rp_unset_id != 0) {
1017 		(void) untimeout(msg_rp_unset_id);
1018 	}
1019 
1020 	if (msg_tr_unset_id != 0) {
1021 		(void) untimeout(msg_tr_unset_id);
1022 	}
1023 
1024 	if (msg_rp_set_id != 0) {
1025 		(void) untimeout(msg_rp_set_id);
1026 	}
1027 
1028 	if (msg_tr_set_id != 0) {
1029 		(void) untimeout(msg_tr_set_id);
1030 	}
1031 
1032 	/* Increment the kstats for number of messages received */
1033 	mutex_enter(&clientp->ic_kstat_mutex);
1034 	IBMF_ADD32_KSTATS(clientp, msgs_received, 1);
1035 	mutex_exit(&clientp->ic_kstat_mutex);
1036 
1037 	/*
1038 	 * now that we are done gleaning all we want out of the receive
1039 	 * completion, we repost the receive request.
1040 	 */
1041 	(void) ibmf_i_repost_recv_buffer(clientp->ic_myci, recv_wqep);
1042 
1043 	/*
1044 	 * If the transaction flags indicate a completed transaction,
1045 	 * notify the client
1046 	 */
1047 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
1048 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
1049 		    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
1050 		    "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
1051 		    tnf_string, msg, "notifying client",
1052 		    tnf_opaque, msgimplp, msgimplp);
1053 
1054 		/* Remove the message from the client's message list */
1055 		ibmf_i_client_rem_msg(clientp, msgimplp, &ref_cnt);
1056 
1057 		/*
1058 		 * Notify the client if the message reference count is zero.
1059 		 * At this point, we know that the transaction is done and
1060 		 * the message has been removed from the client's message list.
1061 		 * So, we only need to make sure the reference count is zero
1062 		 * before notifying the client.
1063 		 */
1064 		if (ref_cnt == 0) {
1065 
1066 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
1067 
1068 		}
1069 	}
1070 
1071 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1072 	    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
1073 	    "ibmf_i_do_recv_cb() exit, msgimplp = %p\n",
1074 	    tnf_opaque, msgimplp, msgimplp);
1075 }
1076 
1077 /*
1078  * ibmf_i_handle_non_rmpp():
1079  *	Handle non-RMPP processing of an incoming IB packet
1080  */
1081 void
1082 ibmf_i_handle_non_rmpp(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
1083     uchar_t *mad)
1084 {
1085 	ibmf_rmpp_ctx_t	*rmpp_ctx = &msgimplp->im_rmpp_ctx;
1086 	ib_mad_hdr_t	*mad_hdr;
1087 	size_t		offset;
1088 	uchar_t		*msgbufp;
1089 	uint32_t	clhdrsz, clhdroff;
1090 
1091 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1092 	    ibmf_i_handle_non_rmpp_start, IBMF_TNF_TRACE, "",
1093 	    "ibmf_i_handle_non_rmpp(): clientp = 0x%p, "
1094 	    "msgp = 0x%p, madp = 0x%p\n", tnf_opaque, clientp, clientp,
1095 	    tnf_opaque, msg, msgimplp, tnf_opaque, mad, mad);
1096 
1097 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
1098 
1099 	/* Get the MAD header */
1100 	mad_hdr = (ib_mad_hdr_t *)mad;
1101 
1102 	/* Determine the MAD's class header size */
1103 	ibmf_i_mgt_class_to_hdr_sz_off(mad_hdr->MgmtClass, &clhdrsz, &clhdroff);
1104 
1105 	/* Allocate the message receive buffers if not already allocated */
1106 	if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1107 
1108 		msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
1109 		    (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
1110 		if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1111 
1112 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1113 			    ibmf_i_handle_non_rmpp_err, IBMF_TNF_ERROR, "",
1114 			    "ibmf_i_handle_non_rmpp(): %s\n", tnf_string, msg,
1115 			    "mem allocation failure (non-rmpp payload)");
1116 
1117 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1118 			    ibmf_i_handle_non_rmpp_end, IBMF_TNF_TRACE, "",
1119 			    "ibmf_i_handle_non_rmpp() exit\n");
1120 
1121 			return;
1122 		}
1123 		mutex_enter(&clientp->ic_kstat_mutex);
1124 		IBMF_ADD32_KSTATS(clientp, recv_bufs_alloced, 1);
1125 		mutex_exit(&clientp->ic_kstat_mutex);
1126 	}
1127 
1128 	/* Get a pointer to the MAD location in the receive buffer */
1129 	msgbufp = (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr;
1130 
1131 	/* Copy the incoming MAD into the receive buffer */
1132 	bcopy((const void *)mad, (void *)msgbufp, IBMF_MAD_SIZE);
1133 
1134 	/* Get the offset of the class header */
1135 	offset = sizeof (ib_mad_hdr_t) + clhdroff;
1136 
1137 	/* initialize class header pointer */
1138 	if (clhdrsz == 0) {
1139 		msgimplp->im_msgbufs_recv.im_bufs_cl_hdr = NULL;
1140 	} else {
1141 		msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
1142 		    (void *)(msgbufp + offset);
1143 	}
1144 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len = clhdrsz;
1145 
1146 	offset += clhdrsz;
1147 
1148 	/* initialize data area pointer */
1149 	msgimplp->im_msgbufs_recv.im_bufs_cl_data = (void *)(msgbufp + offset);
1150 	msgimplp->im_msgbufs_recv.im_bufs_cl_data_len = IBMF_MAD_SIZE -
1151 	    sizeof (ib_mad_hdr_t) - clhdroff - clhdrsz;
1152 
1153 	rmpp_ctx->rmpp_state = IBMF_RMPP_STATE_DONE;
1154 	ibmf_i_terminate_transaction(clientp, msgimplp, IBMF_SUCCESS);
1155 
1156 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_handle_non_rmpp_end,
1157 	    IBMF_TNF_TRACE, "", "ibmf_i_handle_non_rmpp() exit\n");
1158 }
1159 
1160 /*
1161  * ibmf_i_repost_recv_buffer():
1162  *	Repost a WQE to the RQ after processing it
1163  */
1164 /* ARGSUSED */
1165 int
1166 ibmf_i_repost_recv_buffer(ibmf_ci_t *cip, ibmf_recv_wqe_t *recv_wqep)
1167 {
1168 	int			ret;
1169 	ibt_status_t		status;
1170 	ibmf_qp_handle_t	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
1171 	struct kmem_cache	*kmem_cachep;
1172 	ibmf_alt_qp_t		*altqp;
1173 	ibmf_qp_t		*qpp;
1174 
1175 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1176 	    ibmf_i_repost_recv_buffer_start, IBMF_TNF_TRACE, "",
1177 	    "ibmf_i_repost_recv_buffer() enter, cip = %p, rwqep = %p\n",
1178 	    tnf_opaque, cip, cip, tnf_opaque, rwqep, recv_wqep);
1179 
1180 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
1181 
1182 	/* Get the WQE kmem cache pointer based on the QP type */
1183 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1184 		kmem_cachep = cip->ci_recv_wqes_cache;
1185 		qpp = recv_wqep->recv_qpp;
1186 	} else {
1187 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
1188 		kmem_cachep = altqp->isq_recv_wqes_cache;
1189 	}
1190 
1191 	/* post recv wqe; free it if the post fails */
1192 	status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
1193 	    1, NULL);
1194 
1195 	ret = ibmf_i_ibt_to_ibmf_status(status);
1196 	if (ret != IBMF_SUCCESS) {
1197 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1198 		    ibmf_i_repost_recv_buffer_err, IBMF_TNF_ERROR, "",
1199 		    "ibmf_i_repost_recv_buffer(): %s, status = %d\n",
1200 		    tnf_string, msg, "repost_recv failed", tnf_uint,
1201 		    ibt_status, status);
1202 		kmem_free(recv_wqep->recv_wr.wr_sgl,
1203 		    IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t));
1204 		kmem_cache_free(kmem_cachep, recv_wqep);
1205 		mutex_enter(&cip->ci_mutex);
1206 		IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
1207 		mutex_exit(&cip->ci_mutex);
1208 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1209 			mutex_enter(&cip->ci_mutex);
1210 			cip->ci_wqes_alloced--;
1211 			if (cip->ci_wqes_alloced == 0)
1212 				cv_signal(&cip->ci_wqes_cv);
1213 			mutex_exit(&cip->ci_mutex);
1214 		} else {
1215 			mutex_enter(&altqp->isq_mutex);
1216 			altqp->isq_wqes_alloced--;
1217 			if (altqp->isq_wqes_alloced == 0)
1218 				cv_signal(&altqp->isq_wqes_cv);
1219 			mutex_exit(&altqp->isq_mutex);
1220 		}
1221 	}
1222 
1223 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1224 		mutex_enter(&qpp->iq_mutex);
1225 		qpp->iq_rwqes_posted++;
1226 		mutex_exit(&qpp->iq_mutex);
1227 	} else {
1228 		mutex_enter(&altqp->isq_mutex);
1229 		altqp->isq_rwqes_posted++;
1230 		mutex_exit(&altqp->isq_mutex);
1231 	}
1232 
1233 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_repost_recv_buffer_end,
1234 	    IBMF_TNF_TRACE, "", "ibmf_i_repost_recv_buffer() exit\n");
1235 	return (ret);
1236 }
1237 
1238 /*
1239  * ibmf_i_get_class:
1240  * Parses the mad header and determines which class should be notified of the
1241  * notification.
1242  *
1243  * Input Argument
1244  * madhdrp    contents of mad header for the packet
1245  *
1246  * Output Argument
1247  * dest_classp pointer to the class type of the client that should be notified
1248  *
1249  * Returns
1250  * status
1251  */
1252 static int
1253 ibmf_i_get_class(ib_mad_hdr_t *madhdrp, ibmf_qp_handle_t dest_ibmf_qp_handle,
1254     ib_lid_t slid, ibmf_client_type_t *dest_classp)
1255 {
1256 	int		method = madhdrp->R_Method;
1257 	int		attrib = b2h16(madhdrp->AttributeID);
1258 	int		class = madhdrp->MgmtClass;
1259 	uint32_t	attrib_mod = b2h32(madhdrp->AttributeModifier);
1260 
1261 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
1262 	    ibmf_i_get_class_start, IBMF_TNF_TRACE, "",
1263 	    "ibmf_i_get_class() enter, class = 0x%x, method = 0x%x, "
1264 	    "attribute = 0x%x, dest_qp_hdl = 0x%p\n",
1265 	    tnf_opaque, class, class,
1266 	    tnf_opaque, method, method,
1267 	    tnf_opaque, attrib, attrib,
1268 	    tnf_opaque, ibmf_qp_handle, dest_ibmf_qp_handle);
1269 
1270 	/* set default for error checking */
1271 	*dest_classp = 0;
1272 
1273 	/*
1274 	 * Determine the class type
1275 	 */
1276 	switch (class) {
1277 	case MAD_MGMT_CLASS_SUBN_LID_ROUTED:
1278 	case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE:
1279 
1280 		/*
1281 		 * tavor generates trap by sending mad with slid 0;
1282 		 * deliver this to SMA
1283 		 */
1284 		if ((method == MAD_METHOD_TRAP) && (slid == 0)) {
1285 			*dest_classp = SUBN_AGENT;
1286 			break;
1287 		}
1288 
1289 		/* this is derived from table 109 of IB Spec 1.1, vol1 */
1290 		if (attrib == SM_SMINFO_ATTRID || method == MAD_METHOD_TRAP ||
1291 		    method == MAD_METHOD_GET_RESPONSE)
1292 			*dest_classp = SUBN_MANAGER;
1293 		else
1294 			*dest_classp = SUBN_AGENT;
1295 
1296 		break;
1297 	case MAD_MGMT_CLASS_SUBN_ADM:
1298 
1299 		/*
1300 		 * Deliver to SA client (agent) if packet was sent to default qp
1301 		 * Deliver to ibmf_saa client (manager) if packet was sent to
1302 		 * alternate qp
1303 		 */
1304 		if (dest_ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
1305 			*dest_classp = SUBN_ADM_AGENT;
1306 		else
1307 			*dest_classp = SUBN_ADM_MANAGER;
1308 		break;
1309 	case MAD_MGMT_CLASS_PERF:
1310 
1311 		/* Deliver to PM if response bit is set */
1312 		if ((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT)
1313 			*dest_classp = PERF_MANAGER;
1314 		else
1315 			*dest_classp = PERF_AGENT;
1316 		break;
1317 	case MAD_MGMT_CLASS_BM:
1318 
1319 		/*
1320 		 * Deliver to BM if response bit is set, packet is a trap,
1321 		 * or packet is a BMSend
1322 		 */
1323 		if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1324 		    (method == MAD_METHOD_TRAP) ||
1325 		    ((method == MAD_METHOD_SEND) &&
1326 		    ((attrib_mod & IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT) ==
1327 		    IBMF_BM_MAD_ATTR_MOD_RESP)))
1328 			*dest_classp = BM_MANAGER;
1329 		else
1330 			*dest_classp = BM_AGENT;
1331 
1332 		break;
1333 	case MAD_MGMT_CLASS_DEV_MGT:
1334 
1335 		/* Deliver to DM if response bit is set or packet is a trap */
1336 		if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1337 		    (method == MAD_METHOD_TRAP))
1338 			*dest_classp = DEV_MGT_MANAGER;
1339 		else
1340 			*dest_classp = DEV_MGT_AGENT;
1341 		break;
1342 	case MAD_MGMT_CLASS_COMM_MGT:
1343 		*dest_classp = COMM_MGT_MANAGER_AGENT;
1344 		break;
1345 	case MAD_MGMT_CLASS_SNMP:
1346 		*dest_classp = SNMP_MANAGER_AGENT;
1347 		break;
1348 	default:
1349 
1350 		if ((class >= MAD_MGMT_CLASS_VENDOR_START) &&
1351 		    (class <= MAD_MGMT_CLASS_VENDOR_END)) {
1352 			*dest_classp = VENDOR_09_MANAGER_AGENT +
1353 			    (class - MAD_MGMT_CLASS_VENDOR_START);
1354 		} else if ((class >= MAD_MGMT_CLASS_VENDOR2_START) &&
1355 		    (class <= MAD_MGMT_CLASS_VENDOR2_END)) {
1356 			*dest_classp = VENDOR_30_MANAGER_AGENT +
1357 			    (class - MAD_MGMT_CLASS_VENDOR2_START);
1358 		} else if ((class >= MAD_MGMT_CLASS_APPLICATION_START) &&
1359 		    (class <= MAD_MGMT_CLASS_APPLICATION_END)) {
1360 			*dest_classp = APPLICATION_10_MANAGER_AGENT +
1361 			    (class - MAD_MGMT_CLASS_APPLICATION_START);
1362 		}
1363 
1364 		break;
1365 	}
1366 
1367 	if (*dest_classp == 0) {
1368 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1369 		    ibmf_i_get_class_type_err, IBMF_TNF_TRACE, "",
1370 		    "ibmf_i_get_class(): %s, class = 0x%x\n",
1371 		    tnf_string, msg, "invalid class", tnf_opaque, class, class);
1372 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_class_end,
1373 		    IBMF_TNF_TRACE, "", "ibmf_i_get_class() exit\n");
1374 		return (IBMF_FAILURE);
1375 	}
1376 
1377 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1378 	    ibmf_i_get_class_end, IBMF_TNF_TRACE, "",
1379 	    "ibmf_i_get_class() exit, class = 0x%x\n",
1380 	    tnf_opaque, class, *dest_classp);
1381 
1382 	return (IBMF_SUCCESS);
1383 }
1384 
1385 /*
1386  * ibmf_get_mod_name():
1387  * Constructs the module name based on the naming convention described in
1388  * PSARC case 2003/753.
1389  * The name should be "sunwibmgt<MgtClass><a_m>
1390  * where:
1391  *	MgtClass = Management class field in the MAD header.
1392  *		   Two lower-case characters are used to represent
1393  *		   this 8-bit value as 2 hex digits.
1394  *	a_m	 = "a" if the client is an agent-only module
1395  *		   "m" if the client is a manager-only module
1396  *		   ""  if the client is both agent and manager.
1397  *
1398  * Input Argument
1399  * mad_class	management class in the MAD header
1400  * class	IBMF management class of incoming MAD
1401  *
1402  * Output Argument
1403  * modname	pointer to the character array that holds the module name
1404  *
1405  * Status
1406  * None
1407  */
1408 static void
1409 ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class, char *modname)
1410 {
1411 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_start,
1412 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name_qphdl() enter\n");
1413 
1414 	if (AGENT_CLASS(class)) {
1415 		(void) sprintf(modname, "sunwibmgt%02xa", mad_class);
1416 	} else if (MANAGER_CLASS(class)) {
1417 		(void) sprintf(modname, "sunwibmgt%02xm", mad_class);
1418 	} else {
1419 		/* AGENT+MANAGER class */
1420 		(void) sprintf(modname, "sunwibmgt%02x", mad_class);
1421 	}
1422 
1423 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_get_mod_name,
1424 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name(): name = %s\n",
1425 	    tnf_string, msg, modname);
1426 
1427 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_end,
1428 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name() exit\n");
1429 }
1430 
1431 /*
1432  * ibmf_send_busy():
1433  *
1434  * When a MAD request is received for an IB mandatory agent (BMA or PMA),
1435  * which has not yet registered with IBMF, IBMF returns a BUSY MAD
1436  * to the source of the request to solicit a retry while IBMF attempts
1437  * to load the mandatory agent.
1438  * A temporary, alternate QP is allocated for the purpose of sending the
1439  * MAD. This QP is configured to be in the same partition as the manager
1440  * that sent the request.
1441  *
1442  * Input Argument
1443  * modlargsp	Pointer to ibmf_mod_load_args_t structure
1444  *
1445  * Output Argument
1446  * None
1447  *
1448  * Status
1449  * None
1450  */
1451 static void
1452 ibmf_send_busy(ibmf_mod_load_args_t *modlargsp)
1453 {
1454 	ibmf_ci_t		*cip = modlargsp->cip;
1455 	ibmf_recv_wqe_t		*recv_wqep = modlargsp->recv_wqep;
1456 	ibt_wr_ds_t		sgl[1];
1457 	ibmf_send_wqe_t		*send_wqep;
1458 	ibt_send_wr_t		*swrp;
1459 	ibmf_msg_impl_t 	*msgimplp;
1460 	ibmf_ud_dest_t		*ibmf_ud_dest;
1461 	ibt_ud_dest_t		*ud_dest;
1462 	ib_mad_hdr_t		*smadhdrp, *rmadhdrp;
1463 	ibt_adds_vect_t		adds_vec;
1464 	ibt_wc_t		*wcp = &recv_wqep->recv_wc;
1465 	ibt_status_t		ibtstatus;
1466 	uint_t			num_work_reqs;
1467 	ibt_qp_alloc_attr_t	qp_attrs;
1468 	ibt_qp_info_t		qp_modify_attr;
1469 	ibt_chan_sizes_t	qp_sizes;
1470 	ib_qpn_t		qp_num;
1471 	ibt_qp_hdl_t		ibt_qp_handle;
1472 	ibt_mr_hdl_t		mem_hdl;
1473 	ibt_mr_desc_t		mem_desc;
1474 	ibt_mr_attr_t		mem_attr;
1475 
1476 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_start,
1477 	    IBMF_TNF_TRACE, "", "ibmf_send_busy() enter\n");
1478 
1479 	/* setup the qp attrs for the alloc call */
1480 	qp_attrs.qp_scq_hdl = cip->ci_alt_cq_handle;
1481 	qp_attrs.qp_rcq_hdl = cip->ci_alt_cq_handle;
1482 	qp_attrs.qp_pd_hdl = cip->ci_pd;
1483 	qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
1484 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1485 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1486 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1487 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1488 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1489 
1490 	/* request IBT for a qp with the desired attributes */
1491 	ibtstatus = ibt_alloc_qp(cip->ci_ci_handle, IBT_UD_RQP,
1492 	    &qp_attrs, &qp_sizes, &qp_num, &ibt_qp_handle);
1493 	if (ibtstatus != IBT_SUCCESS) {
1494 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1495 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1496 		    tnf_string, msg, "failed to allocate alternate QP",
1497 		    tnf_int, ibt_status, ibtstatus);
1498 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1499 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1500 		return;
1501 	}
1502 
1503 	qp_modify_attr.qp_trans = IBT_UD_SRV;
1504 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1505 	qp_modify_attr.qp_transport.ud.ud_qkey = IB_GSI_QKEY;
1506 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1507 	qp_modify_attr.qp_transport.ud.ud_pkey_ix = wcp->wc_pkey_ix;
1508 	qp_modify_attr.qp_transport.ud.ud_port = recv_wqep->recv_port_num;
1509 
1510 	/* call the IB transport to initialize the QP */
1511 	ibtstatus = ibt_initialize_qp(ibt_qp_handle, &qp_modify_attr);
1512 	if (ibtstatus != IBT_SUCCESS) {
1513 		(void) ibt_free_qp(ibt_qp_handle);
1514 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1515 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1516 		    tnf_string, msg, "failed to initialize alternate QP",
1517 		    tnf_int, ibt_status, ibtstatus);
1518 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1519 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1520 		return;
1521 	}
1522 
1523 	/* allocate the message context */
1524 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1525 	    KM_SLEEP);
1526 
1527 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
1528 
1529 	ibmf_i_pop_ud_dest_thread(cip);
1530 
1531 	/*
1532 	 * Get a UD dest structure from the pool, this will not fail
1533 	 * because ibmf_i_pop_ud_dest_thread() calls
1534 	 * ibmf_i_populate_ud_dest_list with the KM_SLEEP flag.
1535 	 */
1536 	ibmf_ud_dest = ibmf_i_get_ud_dest(cip);
1537 
1538 	msgimplp->im_ibmf_ud_dest = ibmf_ud_dest;
1539 	msgimplp->im_ud_dest = &ibmf_ud_dest->ud_dest;
1540 	msgimplp->im_qp_hdl = NULL;
1541 
1542 	/*
1543 	 * Reset send_done to indicate we have not received the completion
1544 	 * for this send yet.
1545 	 */
1546 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_SEND_DONE;
1547 
1548 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
1549 
1550 	/*
1551 	 * Allocate resources needed to send a UD packet including the
1552 	 * send WQE context
1553 	 */
1554 	send_wqep = (ibmf_send_wqe_t *)kmem_zalloc(sizeof (ibmf_send_wqe_t),
1555 	    KM_SLEEP);
1556 	send_wqep->send_mem = (void *)kmem_zalloc(IBMF_MEM_PER_WQE, KM_SLEEP);
1557 
1558 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1559 	mem_attr.mr_len = IBMF_MEM_PER_WQE;
1560 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
1561 	mem_attr.mr_as = NULL;
1562 
1563 	/* Register the allocated memory */
1564 	ibtstatus = ibt_register_mr(cip->ci_ci_handle, cip->ci_pd, &mem_attr,
1565 	    &mem_hdl, &mem_desc);
1566 	if (ibtstatus != IBT_SUCCESS) {
1567 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1568 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1569 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1570 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1571 		(void) ibt_free_qp(ibt_qp_handle);
1572 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1573 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1574 		    tnf_string, msg, "failed to register memory",
1575 		    tnf_int, ibt_status, ibtstatus);
1576 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1577 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1578 		return;
1579 	}
1580 
1581 	send_wqep->send_sg_lkey = mem_desc.md_lkey;
1582 	send_wqep->send_mem_hdl = mem_hdl;
1583 
1584 	swrp = &send_wqep->send_wr;
1585 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrp))
1586 
1587 	/* use send wqe pointer as the WR ID */
1588 	swrp->wr_id		= (ibt_wrid_t)(uintptr_t)send_wqep;
1589 	ASSERT(swrp->wr_id != NULL);
1590 	swrp->wr_flags		= IBT_WR_NO_FLAGS;
1591 	swrp->wr_opcode		= IBT_WRC_SEND;
1592 	swrp->wr_trans		= IBT_UD_SRV;
1593 
1594 	send_wqep->send_client	= NULL;
1595 	send_wqep->send_msg	= msgimplp;
1596 
1597 	/* Initialize the scatter-gather list */
1598 	sgl[0].ds_va		= (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1599 	sgl[0].ds_key		= send_wqep->send_sg_lkey;
1600 	sgl[0].ds_len		= IBMF_MAD_SIZE;
1601 
1602 	wcp			= &recv_wqep->recv_wc;
1603 
1604 	/* Initialize the address vector */
1605 	adds_vec.av_send_grh	= B_FALSE;
1606 	adds_vec.av_dlid	= wcp->wc_slid;
1607 	adds_vec.av_src_path	= wcp->wc_path_bits;
1608 	adds_vec.av_srvl	= 0;
1609 	adds_vec.av_srate	= IBT_SRATE_1X;
1610 	adds_vec.av_port_num	= recv_wqep->recv_port_num;
1611 
1612 	ud_dest			= msgimplp->im_ud_dest;
1613 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_dest))
1614 	ud_dest->ud_qkey	= IB_GSI_QKEY;
1615 	ud_dest->ud_dst_qpn	= wcp->wc_qpn;
1616 
1617 	/* modify the address handle with the address vector information */
1618 	ibtstatus = ibt_modify_ah(cip->ci_ci_handle, ud_dest->ud_ah, &adds_vec);
1619 	if (ibtstatus != IBT_SUCCESS) {
1620 		(void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1621 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1622 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1623 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1624 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1625 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1626 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1627 		    tnf_string, msg, "ibt modify ah failed", tnf_uint,
1628 		    ibt_status, ibtstatus);
1629 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1630 		    IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1631 		return;
1632 	}
1633 
1634 	bzero(send_wqep->send_mem, IBMF_MAD_SIZE);
1635 
1636 	rmadhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
1637 	    sizeof (ib_grh_t));
1638 	smadhdrp = (ib_mad_hdr_t *)send_wqep->send_mem;
1639 
1640 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rmadhdrp))
1641 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smadhdrp))
1642 
1643 	/* Set up the MAD header */
1644 	smadhdrp->BaseVersion	= rmadhdrp->BaseVersion;
1645 	smadhdrp->MgmtClass	= rmadhdrp->MgmtClass;
1646 	smadhdrp->ClassVersion	= rmadhdrp->ClassVersion;
1647 	smadhdrp->R_Method	= MAD_METHOD_GET_RESPONSE;
1648 	smadhdrp->Status	= MAD_STATUS_BUSY;
1649 	smadhdrp->TransactionID	= rmadhdrp->TransactionID;
1650 	smadhdrp->AttributeID	= rmadhdrp->AttributeID;
1651 	smadhdrp->AttributeModifier = rmadhdrp->AttributeModifier;
1652 
1653 	swrp->wr_sgl		= sgl;
1654 	swrp->wr_nds		= 1;
1655 	swrp->wr.ud.udwr_dest	= msgimplp->im_ud_dest;
1656 	send_wqep->send_port_num = recv_wqep->recv_port_num;
1657 	send_wqep->send_qp_handle = ibt_qp_handle;
1658 	send_wqep->send_ibmf_qp_handle = NULL;
1659 
1660 	/* Post the MAD to the IBT layer */
1661 	num_work_reqs		= 1;
1662 
1663 	ibtstatus = ibt_post_send(ibt_qp_handle, &send_wqep->send_wr,
1664 	    num_work_reqs, NULL);
1665 	if (ibtstatus != IBT_SUCCESS) {
1666 		(void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1667 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1668 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1669 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1670 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1671 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1672 		    ibmf_send_busy_err, IBMF_TNF_TRACE, "",
1673 		    "ibmf_send_busy(): %s, status = %d\n", tnf_string, msg,
1674 		    "post send failure", tnf_uint, ibt_status, ibtstatus);
1675 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1676 		    IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1677 		return;
1678 	}
1679 
1680 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1681 	    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1682 }
1683 
1684 /*
1685  * ibmf_module_load():
1686  * This function attempts to load a client module that has not yet
1687  * registered with IBMF at the time a request MAD arrives for it.
1688  * Prior to loading the module, it sends a busy MAD to the sender of
1689  * the request MAD, this soliciting a resend of the request MAD.
1690  *
1691  * Input Argument
1692  * modlargsp	Pointer to ibmf_mod_load_args_t structure
1693  *
1694  * Output Argument
1695  * None
1696  *
1697  * Status
1698  * None
1699  */
1700 static void
1701 ibmf_module_load(void *taskq_arg)
1702 {
1703 	char *modname;
1704 	ibmf_mod_load_args_t *modlargsp = (ibmf_mod_load_args_t *)taskq_arg;
1705 	ibmf_ci_t *cip = modlargsp->cip;
1706 	ibmf_recv_wqe_t	*recv_wqep = modlargsp->recv_wqep;
1707 	ibmf_client_type_t class = modlargsp->ibmf_class;
1708 
1709 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_start,
1710 	    IBMF_TNF_TRACE, "", "ibmf_module_load_busy() enter\n");
1711 	modname = modlargsp->modname;
1712 
1713 	if (IS_MANDATORY_CLASS(class)) {
1714 		ibmf_send_busy(modlargsp);
1715 	}
1716 
1717 	if (modload("misc", modname) < 0) {
1718 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1719 		kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1720 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1, ibmf_module_load_error,
1721 		    IBMF_TNF_TRACE, "",
1722 		    "ibmf_module_load(): modload failed for %s\n",
1723 		    tnf_string, module, modname);
1724 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1725 		    IBMF_TNF_TRACE, "", "ibmf_module_load() exit\n");
1726 		return;
1727 	}
1728 
1729 	(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1730 
1731 	kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1732 
1733 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1734 	    IBMF_TNF_TRACE, "", "ibmf_module_load_busy() exit\n");
1735 }
1736