xref: /titanic_52/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_send.c (revision d01412971af32f806ad2e91e40c00b485e893b2c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file implements the MAD send logic in IBMF.
31  */
32 
33 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
34 
35 #define	IBMF_SEND_WR_ID_TO_ADDR(id, ptr)		\
36 	(ptr) = (void *)(uintptr_t)(id)
37 
38 extern int ibmf_trace_level;
39 
40 static void ibmf_i_do_send_cb(void *taskq_arg);
41 static void ibmf_i_do_send_compl(ibmf_handle_t ibmf_handle,
42     ibmf_msg_impl_t *msgimplp, ibmf_send_wqe_t *send_wqep);
43 
44 /*
45  * ibmf_i_issue_pkt():
46  *	Post an IB packet on the specified QP's send queue
47  */
48 int
49 ibmf_i_issue_pkt(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
50     ibmf_qp_handle_t ibmf_qp_handle, ibmf_send_wqe_t *send_wqep)
51 {
52 	int			ret;
53 	ibt_status_t		status;
54 	ibt_wr_ds_t		sgl[1];
55 	ibt_qp_hdl_t		ibt_qp_handle;
56 
57 	_NOTE(ASSUMING_PROTECTED(*send_wqep))
58 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
59 
60 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
61 	    ibmf_i_issue_pkt_start, IBMF_TNF_TRACE, "",
62 	    "ibmf_i_issue_pkt() enter, clientp = %p, msg = %p, "
63 	    "qp_hdl = %p,  swqep = %p\n", tnf_opaque, clientp, clientp,
64 	    tnf_opaque, msg, msgimplp, tnf_opaque, ibmf_qp_handle,
65 	    ibmf_qp_handle, tnf_opaque, send_wqep, send_wqep);
66 
67 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
68 	ASSERT(MUTEX_NOT_HELD(&clientp->ic_mutex));
69 
70 	/*
71 	 * if the qp handle provided in ibmf_send_pkt()
72 	 * is not the default qp handle for this client,
73 	 * then the wqe must be sent on this qp,
74 	 * else use the default qp handle set up during ibmf_register()
75 	 */
76 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
77 		ibt_qp_handle = clientp->ic_qp->iq_qp_handle;
78 	} else {
79 		ibt_qp_handle =
80 		    ((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_qp_handle;
81 	}
82 
83 	/* initialize the send WQE */
84 	ibmf_i_init_send_wqe(clientp, msgimplp, sgl, send_wqep,
85 	    msgimplp->im_ud_dest, ibt_qp_handle, ibmf_qp_handle);
86 
87 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
88 
89 	/*
90 	 * Issue the wqe to the transport.
91 	 * NOTE: ibt_post_send() will not block, so, it is ok
92 	 * to hold the msgimpl mutex across this call.
93 	 */
94 	status = ibt_post_send(send_wqep->send_qp_handle, &send_wqep->send_wr,
95 	    1, NULL);
96 	if (status != IBT_SUCCESS) {
97 		mutex_enter(&clientp->ic_kstat_mutex);
98 		IBMF_ADD32_KSTATS(clientp, send_pkt_failed, 1);
99 		mutex_exit(&clientp->ic_kstat_mutex);
100 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
101 		    ibmf_i_issue_pkt_err, IBMF_TNF_TRACE, "",
102 		    "ibmf_i_issue_pkt(): %s, status = %d\n",
103 		    tnf_string, msg, "post send failure",
104 		    tnf_uint, ibt_status, status);
105 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_issue_pkt_end,
106 		    IBMF_TNF_TRACE, "", "ibmf_i_issue_pkt(() exit\n");
107 		return (IBMF_TRANSPORT_FAILURE);
108 	}
109 
110 	ret = IBMF_SUCCESS;
111 
112 	/* bump the number of active sends */
113 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
114 		mutex_enter(&clientp->ic_mutex);
115 		clientp->ic_sends_active++;
116 		mutex_exit(&clientp->ic_mutex);
117 		mutex_enter(&clientp->ic_kstat_mutex);
118 		IBMF_ADD32_KSTATS(clientp, sends_active, 1);
119 		mutex_exit(&clientp->ic_kstat_mutex);
120 	} else {
121 		ibmf_alt_qp_t *qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
122 		mutex_enter(&qpp->isq_mutex);
123 		qpp->isq_sends_active++;
124 		mutex_exit(&qpp->isq_mutex);
125 		mutex_enter(&clientp->ic_kstat_mutex);
126 		IBMF_ADD32_KSTATS(clientp, sends_active, 1);
127 		mutex_exit(&clientp->ic_kstat_mutex);
128 	}
129 
130 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_issue_pkt_end,
131 	    IBMF_TNF_TRACE, "", "ibmf_i_issue_pkt() exit\n");
132 	return (ret);
133 }
134 
135 /*
136  * ibmf_i_send_pkt()
137  *	Send an IB packet after allocating send resources
138  */
139 int
140 ibmf_i_send_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
141     ibmf_msg_impl_t *msgimplp, int block)
142 {
143 	ibmf_send_wqe_t	*send_wqep;
144 	int		status;
145 
146 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_send_pkt_start,
147 	    IBMF_TNF_TRACE, "",
148 	    "ibmf_i_send_pkt(): clientp = 0x%p, qp_hdl = 0x%p, "
149 	    "msgp = 0x%p, block = %d\n", tnf_opaque, clientp, clientp,
150 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_opaque, msg, msgimplp,
151 	    tnf_uint, block, block);
152 
153 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
154 
155 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
156 
157 	/*
158 	 * Reset send_done to indicate we have not received the completion
159 	 * for this send yet.
160 	 */
161 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_SEND_DONE;
162 
163 	/*
164 	 * Allocate resources needed to send a UD packet including the
165 	 * send WQE context
166 	 */
167 	status = ibmf_i_alloc_send_resources(clientp->ic_myci,
168 	    msgimplp, block, &send_wqep);
169 	if (status != IBMF_SUCCESS) {
170 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_send_pkt_err,
171 		    IBMF_TNF_ERROR, "", "ibmf_i_send_pkt(): %s, status = %d\n",
172 		    tnf_string, msg, "unable to allocate send resources",
173 		    tnf_uint, status, status);
174 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
175 		    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit\n");
176 		return (status);
177 	}
178 
179 	/* Set the segment number in the send WQE context */
180 	if (msgimplp->im_flags & IBMF_MSG_FLAGS_SEND_RMPP)
181 		send_wqep->send_rmpp_segment = msgimplp->im_rmpp_ctx.rmpp_ns;
182 
183 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
184 
185 	/*
186 	 * Increment the count of pending send completions.
187 	 * Only when this count is zero should the client be notified
188 	 * of completion of the transaction.
189 	 */
190 	msgimplp->im_pending_send_compls += 1;
191 
192 	/* Send the packet */
193 	status = ibmf_i_issue_pkt(clientp, msgimplp, ibmf_qp_handle, send_wqep);
194 	if (status != IBMF_SUCCESS) {
195 		ibmf_i_free_send_resources(clientp->ic_myci, msgimplp,
196 		    send_wqep);
197 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_send_pkt_err,
198 		    IBMF_TNF_ERROR, "", "ibmf_i_send_pkt(): %s, status = %d\n",
199 		    tnf_string, msg, "unable to issue packet",
200 		    tnf_uint, status, status);
201 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
202 		    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit\n");
203 		return (status);
204 	}
205 
206 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
207 	    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit, status = %d\n",
208 	    tnf_uint, status, status);
209 
210 	return (IBMF_SUCCESS);
211 }
212 
213 /*
214  * ibmf_i_send_single_pkt():
215  *	Send a single IB packet.  Only used to send non-RMPP packets.
216  */
217 int
218 ibmf_i_send_single_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
219     ibmf_msg_impl_t *msgimplp, int block)
220 {
221 	int	status;
222 
223 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_send_single_pkt_start,
224 	    IBMF_TNF_TRACE, "",
225 	    "ibmf_i_send_single_pkt(): clientp = 0x%p, qp_hdl = 0x%p, "
226 	    "msgp = 0x%p, block = %d\n", tnf_opaque, clientp, clientp,
227 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_opaque, msg, msgimplp,
228 	    tnf_uint, block, block);
229 
230 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
231 
232 	status = ibmf_i_send_pkt(clientp, ibmf_qp_handle, msgimplp, block);
233 	if (status != IBMF_SUCCESS) {
234 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
235 		    ibmf_i_send_single_pkt_err, IBMF_TNF_ERROR, "",
236 		    "ibmf_i_send_single_pkt(): %s, msgp = 0x%p\n",
237 		    tnf_string, msg, "unable to send packet",
238 		    tnf_uint, status, status);
239 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
240 		    ibmf_i_send_single_pkt_end, IBMF_TNF_TRACE, "",
241 		    "ibmf_i_send_single_pkt() exit\n");
242 		return (status);
243 	}
244 
245 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_single_pkt_end,
246 	    IBMF_TNF_TRACE, "", "ibmf_i_send_single_pkt() exit\n");
247 	return (IBMF_SUCCESS);
248 }
249 
250 /*
251  * ibmf_i_handle_send_completion():
252  *	Process the WQE from the SQ identified in the work completion entry.
253  */
254 /* ARGSUSED */
255 void
256 ibmf_i_handle_send_completion(ibmf_ci_t *cip, ibt_wc_t *wcp)
257 {
258 	ibmf_client_t		*clientp, *cclientp;
259 	ibmf_send_wqe_t		*send_wqep;
260 	ibmf_qp_handle_t	ibmf_qp_handle;
261 	ibmf_alt_qp_t		*qpp;
262 	int			ret;
263 
264 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
265 	    ibmf_i_handle_send_completion_start, IBMF_TNF_TRACE, "",
266 	    "ibmf_i_handle_send_completion() enter, cip = %p, wcp = %p\n",
267 	    tnf_opaque, cip, cip, tnf_opaque, wcp, wcp);
268 
269 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
270 
271 	ASSERT(wcp->wc_id != NULL);
272 
273 	ASSERT(IBMF_IS_SEND_WR_ID(wcp->wc_id));
274 
275 	/* get the IBMF send WQE context */
276 	IBMF_SEND_WR_ID_TO_ADDR(wcp->wc_id, send_wqep);
277 
278 	ASSERT(send_wqep != NULL);
279 
280 	/* get the client context */
281 	cclientp =  clientp = send_wqep->send_client;
282 
283 	/* Check if this is a completion for a BUSY MAD sent by IBMF */
284 	if (clientp == NULL) {
285 		ibmf_msg_impl_t		*msgimplp;
286 
287 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L3,
288 		    ibmf_i_handle_send_completion, IBMF_TNF_TRACE, "",
289 		    "ibmf_i_handle_send_completion(): NULL client\n");
290 
291 		msgimplp = send_wqep->send_msg;
292 
293 		/*
294 		 * Deregister registered memory and free it, and
295 		 * free up the send WQE context
296 		 */
297 		(void) ibt_deregister_mr(cip->ci_ci_handle,
298 		    send_wqep->send_mem_hdl);
299 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
300 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
301 
302 		/* Free up the message context */
303 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
304 		ibmf_i_clean_ud_dest_list(cip, B_FALSE);
305 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
306 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
307 		    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
308 		    "ibmf_i_handle_send_completion() exit\n");
309 		return;
310 	}
311 
312 	/* get the QP handle */
313 	ibmf_qp_handle = send_wqep->send_ibmf_qp_handle;
314 	qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
315 
316 	ASSERT(clientp != NULL);
317 
318 	/* decrement the number of active sends */
319 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
320 		mutex_enter(&clientp->ic_mutex);
321 		clientp->ic_sends_active--;
322 		mutex_exit(&clientp->ic_mutex);
323 	} else {
324 		mutex_enter(&qpp->isq_mutex);
325 		qpp->isq_sends_active--;
326 		mutex_exit(&qpp->isq_mutex);
327 	}
328 
329 	mutex_enter(&clientp->ic_kstat_mutex);
330 	IBMF_SUB32_KSTATS(clientp, sends_active, 1);
331 	mutex_exit(&clientp->ic_kstat_mutex);
332 
333 	send_wqep->send_status = ibmf_i_ibt_wc_to_ibmf_status(wcp->wc_status);
334 
335 	/*
336 	 * issue the callback using taskq. If no taskq or if the
337 	 * dispatch fails, we do the send processing in the callback context
338 	 * which is the interrupt context
339 	 */
340 	if (cclientp->ic_send_taskq == NULL) {
341 		/* Do the processing in callback context */
342 		mutex_enter(&clientp->ic_kstat_mutex);
343 		IBMF_ADD32_KSTATS(clientp, send_cb_active, 1);
344 		mutex_exit(&clientp->ic_kstat_mutex);
345 		ibmf_i_do_send_cb((void *)send_wqep);
346 		mutex_enter(&clientp->ic_kstat_mutex);
347 		IBMF_SUB32_KSTATS(clientp, send_cb_active, 1);
348 		mutex_exit(&clientp->ic_kstat_mutex);
349 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
350 		    ibmf_i_handle_send_err, IBMF_TNF_ERROR, "",
351 		    "ibmf_i_handle_send_completion(): %s\n",
352 		    tnf_string, msg, "ci_send_taskq == NULL");
353 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
354 		    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
355 		    "ibmf_i_handle_send_completion() exit\n");
356 		return;
357 	}
358 
359 	mutex_enter(&clientp->ic_kstat_mutex);
360 	IBMF_ADD32_KSTATS(clientp, send_cb_active, 1);
361 	mutex_exit(&clientp->ic_kstat_mutex);
362 
363 	/* Use taskq for processing if the IBMF_REG_FLAG_NO_OFFLOAD isn't set */
364 	if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
365 		ret = taskq_dispatch(cclientp->ic_send_taskq, ibmf_i_do_send_cb,
366 		    send_wqep, TQ_NOSLEEP);
367 		if (ret == 0) {
368 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
369 			    ibmf_i_handle_send_err, IBMF_TNF_ERROR, "",
370 			    "ibmf_i_handle_send_completion(): %s\n",
371 			    tnf_string, msg, "send: dispatch failed");
372 			ibmf_i_do_send_cb((void *)send_wqep);
373 		}
374 	} else {
375 		ibmf_i_do_send_cb((void *)send_wqep);
376 	}
377 
378 	mutex_enter(&clientp->ic_kstat_mutex);
379 	IBMF_SUB32_KSTATS(clientp, send_cb_active, 1);
380 	mutex_exit(&clientp->ic_kstat_mutex);
381 
382 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
383 
384 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
385 	    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
386 	    "ibmf_i_handle_send_completion() exit\n");
387 }
388 
389 /*
390  * ibmf_i_do_send_cb():
391  *	Do the send completion processing
392  */
393 static void
394 ibmf_i_do_send_cb(void *taskq_arg)
395 {
396 	ibmf_ci_t		*cip;
397 	ibmf_msg_impl_t		*msgimplp;
398 	ibmf_client_t		*clientp;
399 	ibmf_send_wqe_t		*send_wqep;
400 	boolean_t		found;
401 	int			msg_trans_state_flags, msg_flags;
402 	uint_t			ref_cnt;
403 	ibmf_qp_handle_t	ibmf_qp_handle;
404 	struct kmem_cache	*kmem_cachep;
405 	timeout_id_t		msg_rp_unset_id, msg_tr_unset_id;
406 	timeout_id_t		msg_rp_set_id, msg_tr_set_id;
407 	ibmf_alt_qp_t		*altqp;
408 	boolean_t		inc_refcnt;
409 
410 	send_wqep = taskq_arg;
411 
412 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
413 	    ibmf_i_do_send_cb_start, IBMF_TNF_TRACE, "",
414 	    "ibmf_i_do_send_cb() enter, send_wqep = %p\n",
415 	    tnf_opaque, send_wqep, send_wqep);
416 
417 	clientp = send_wqep->send_client;
418 	cip = clientp->ic_myci;
419 	msgimplp = send_wqep->send_msg;
420 
421 	/* get the QP handle */
422 	ibmf_qp_handle = send_wqep->send_ibmf_qp_handle;
423 
424 	/* Get the WQE kmem cache pointer based on the QP type */
425 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
426 		kmem_cachep = cip->ci_send_wqes_cache;
427 	else {
428 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
429 		kmem_cachep = altqp->isq_send_wqes_cache;
430 	}
431 
432 	/* Look for a message in the client's message list */
433 	inc_refcnt = B_TRUE;
434 	found = ibmf_i_find_msg_client(clientp, msgimplp, inc_refcnt);
435 
436 	/*
437 	 * If the message context was not found, then it's likely
438 	 * been freed up. So, do nothing in this timeout handler
439 	 */
440 	if (found == B_FALSE) {
441 		kmem_cache_free(kmem_cachep, send_wqep);
442 		mutex_enter(&cip->ci_mutex);
443 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
444 		mutex_exit(&cip->ci_mutex);
445 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
446 			mutex_enter(&cip->ci_mutex);
447 			cip->ci_wqes_alloced--;
448 			if (cip->ci_wqes_alloced == 0)
449 				cv_signal(&cip->ci_wqes_cv);
450 			mutex_exit(&cip->ci_mutex);
451 		} else {
452 			mutex_enter(&altqp->isq_mutex);
453 			altqp->isq_wqes_alloced--;
454 			if (altqp->isq_wqes_alloced == 0)
455 				cv_signal(&altqp->isq_wqes_cv);
456 			mutex_exit(&altqp->isq_mutex);
457 		}
458 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
459 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
460 		    "ibmf_i_do_send_cb(): %s\n", tnf_string, msg,
461 		    "Message not found, return without processing send cb");
462 		return;
463 	}
464 
465 	/* Grab the message context lock */
466 	mutex_enter(&msgimplp->im_mutex);
467 
468 	/*
469 	 * Decrement the count of pending send completions for
470 	 * this transaction
471 	 */
472 	msgimplp->im_pending_send_compls -= 1;
473 
474 	/*
475 	 * If the pending send completions is not zero, then we must
476 	 * not attempt to notify the client of a transaction completion
477 	 * in this instance of the send completion handler. Notification
478 	 * of transaction completion should be provided only by the
479 	 * last send completion so that all send completions are accounted
480 	 * for before the client is notified and subsequently attempts to
481 	 * reuse the message for an other transaction.
482 	 * If this is not done, the message may be reused while the
483 	 * send WR from the old transaction is still active in the QP's WQ.
484 	 * This could result in an attempt to modify the address handle with
485 	 * information for the new transaction which could be potentially
486 	 * incompatible, such as an incorrect port number. Such an
487 	 * incompatible modification of the address handle of the old
488 	 * transaction could result in a QP error.
489 	 */
490 	if (msgimplp->im_pending_send_compls != 0) {
491 		IBMF_MSG_DECR_REFCNT(msgimplp);
492 		mutex_exit(&msgimplp->im_mutex);
493 		kmem_cache_free(kmem_cachep, send_wqep);
494 		mutex_enter(&cip->ci_mutex);
495 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
496 		mutex_exit(&cip->ci_mutex);
497 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
498 			mutex_enter(&cip->ci_mutex);
499 			cip->ci_wqes_alloced--;
500 			if (cip->ci_wqes_alloced == 0)
501 				cv_signal(&cip->ci_wqes_cv);
502 			mutex_exit(&cip->ci_mutex);
503 		} else {
504 			mutex_enter(&altqp->isq_mutex);
505 			altqp->isq_wqes_alloced--;
506 			if (altqp->isq_wqes_alloced == 0)
507 				cv_signal(&altqp->isq_wqes_cv);
508 			mutex_exit(&altqp->isq_mutex);
509 		}
510 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
511 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
512 		    "ibmf_i_do_send_cb(): %s\n", tnf_string, msg,
513 		    "Message found with pending send completions, "
514 		    "return without processing send cb");
515 		return;
516 	}
517 
518 	/*
519 	 * If the message has been marked unitialized or done
520 	 * release the message mutex and return
521 	 */
522 	if ((msgimplp->im_trans_state_flags & IBMF_TRANS_STATE_FLAG_UNINIT) ||
523 	    (msgimplp->im_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE)) {
524 		IBMF_MSG_DECR_REFCNT(msgimplp);
525 		msg_trans_state_flags = msgimplp->im_trans_state_flags;
526 		msg_flags = msgimplp->im_flags;
527 		ref_cnt = msgimplp->im_ref_count;
528 		mutex_exit(&msgimplp->im_mutex);
529 		/*
530 		 * This thread may notify the client only if the
531 		 * transaction is done, the message has been removed
532 		 * from the client's message list, and the message
533 		 * reference count is 0.
534 		 * If the transaction is done, and the message reference
535 		 * count = 0, there is still a possibility that a
536 		 * packet could arrive for the message and its reference
537 		 * count increased if the message is still on the list.
538 		 * If the message is still on the list, it will be
539 		 * removed by a call to ibmf_i_client_rem_msg() at
540 		 * the completion point of the transaction.
541 		 * So, the reference count should be checked after the
542 		 * message has been removed.
543 		 */
544 		if ((msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) &&
545 		    !(msg_flags & IBMF_MSG_FLAGS_ON_LIST) &&
546 		    (ref_cnt == 0)) {
547 
548 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
549 
550 		}
551 		kmem_cache_free(kmem_cachep, send_wqep);
552 		mutex_enter(&cip->ci_mutex);
553 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
554 		mutex_exit(&cip->ci_mutex);
555 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
556 			mutex_enter(&cip->ci_mutex);
557 			cip->ci_wqes_alloced--;
558 			if (cip->ci_wqes_alloced == 0)
559 				cv_signal(&cip->ci_wqes_cv);
560 			mutex_exit(&cip->ci_mutex);
561 		} else {
562 			mutex_enter(&altqp->isq_mutex);
563 			altqp->isq_wqes_alloced--;
564 			if (altqp->isq_wqes_alloced == 0)
565 				cv_signal(&altqp->isq_wqes_cv);
566 			mutex_exit(&altqp->isq_mutex);
567 		}
568 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
569 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
570 		    "ibmf_i_do_send_cb(): %s, msg = %p\n", tnf_string, msg,
571 		    "Message marked for removal, return without processing "
572 		    "send cb", tnf_opaque, msgimplp, msgimplp);
573 		return;
574 	}
575 
576 	/* Perform send completion processing of the message context */
577 	ibmf_i_do_send_compl((ibmf_handle_t)clientp, msgimplp, send_wqep);
578 
579 	msg_rp_unset_id = msg_tr_unset_id = msg_rp_set_id = msg_tr_set_id = 0;
580 
581 	/* Save the message flags before releasing the mutex */
582 	msg_trans_state_flags = msgimplp->im_trans_state_flags;
583 	msg_flags = msgimplp->im_flags;
584 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
585 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
586 	msgimplp->im_rp_unset_timeout_id = 0;
587 	msgimplp->im_tr_unset_timeout_id = 0;
588 
589 	/*
590 	 * Decrement the message reference count
591 	 * This count was inceremented when the message was found on the
592 	 * client's message list
593 	 */
594 	IBMF_MSG_DECR_REFCNT(msgimplp);
595 
596 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
597 		if (msgimplp->im_rp_timeout_id != 0) {
598 			msg_rp_set_id = msgimplp->im_rp_timeout_id;
599 			msgimplp->im_rp_timeout_id = 0;
600 		}
601 		if (msgimplp->im_tr_timeout_id != 0) {
602 			msg_tr_set_id = msgimplp->im_tr_timeout_id;
603 			msgimplp->im_tr_timeout_id = 0;
604 		}
605 	}
606 
607 	mutex_exit(&msgimplp->im_mutex);
608 
609 	if (msg_rp_unset_id != 0) {
610 		(void) untimeout(msg_rp_unset_id);
611 	}
612 
613 	if (msg_tr_unset_id != 0) {
614 		(void) untimeout(msg_tr_unset_id);
615 	}
616 
617 	if (msg_rp_set_id != 0) {
618 		(void) untimeout(msg_rp_set_id);
619 	}
620 
621 	if (msg_tr_set_id != 0) {
622 		(void) untimeout(msg_tr_set_id);
623 	}
624 
625 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
626 	    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
627 	    "ibmf_i_do_send_cb(): %s, msg = %p\n",
628 	    tnf_string, msg, "Send callback done.  Dec ref count",
629 	    tnf_opaque, msgimplp, msgimplp);
630 
631 	/*
632 	 * If the transaction is done, signal the block thread if the
633 	 * transaction is blocking, or call the client's transaction done
634 	 * notification callback
635 	 */
636 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
637 
638 		/* Remove the message from the client's message list */
639 		ibmf_i_client_rem_msg(clientp, msgimplp, &ref_cnt);
640 
641 		/*
642 		 * Notify the client if the message reference count is zero.
643 		 * At this point, we know that the transaction is done and
644 		 * the message has been removed from the client's message list.
645 		 * So, we only need to make sure the reference count is zero
646 		 * before notifying the client.
647 		 */
648 		if (ref_cnt == 0) {
649 
650 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
651 
652 		}
653 	}
654 
655 	kmem_cache_free(kmem_cachep, send_wqep);
656 	mutex_enter(&cip->ci_mutex);
657 	IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
658 	mutex_exit(&cip->ci_mutex);
659 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
660 		mutex_enter(&cip->ci_mutex);
661 		cip->ci_wqes_alloced--;
662 		if (cip->ci_wqes_alloced == 0)
663 			cv_signal(&cip->ci_wqes_cv);
664 		mutex_exit(&cip->ci_mutex);
665 	} else {
666 		mutex_enter(&altqp->isq_mutex);
667 		altqp->isq_wqes_alloced--;
668 		if (altqp->isq_wqes_alloced == 0)
669 			cv_signal(&altqp->isq_wqes_cv);
670 		mutex_exit(&altqp->isq_mutex);
671 	}
672 
673 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
674 	    ibmf_i_do_send_cb_end, IBMF_TNF_TRACE, "",
675 	    "ibmf_i_do_send_cb() exit\n");
676 }
677 
678 /*
679  * ibmf_i_do_send_compl():
680  *	Determine if the transaction is complete
681  */
682 /* ARGSUSED */
683 static void
684 ibmf_i_do_send_compl(ibmf_handle_t ibmf_handle, ibmf_msg_impl_t *msgimplp,
685     ibmf_send_wqe_t *send_wqep)
686 {
687 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_do_send_compl_start,
688 	    IBMF_TNF_TRACE, "", "ibmf_i_do_send_compl(): ibmf_hdl = 0x%p "
689 	    "msgp = %p, send_wqep = 0x%p, msg_flags = 0x%x\n",
690 	    tnf_opaque, ibmf_hdl, ibmf_handle, tnf_opaque, msgimplp, msgimplp,
691 	    tnf_opaque, send_wqep, send_wqep,
692 	    tnf_opaque, msg_flags, msgimplp->im_flags);
693 
694 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
695 
696 	/*
697 	 * For RMPP transactions, we only care about the final packet of the
698 	 * transaction.  For others, the code does not need to wait for the send
699 	 * completion (although bad things can happen if it never occurs).
700 	 * The final packets of a transaction are sent when the state is either
701 	 * ABORT or RECEVR_TERMINATE.
702 	 * Don't mark the transaction as send_done if there are still more
703 	 * packets to be sent, including doing the second part of a double-sided
704 	 * transaction.
705 	 */
706 	if ((msgimplp->im_flags & IBMF_MSG_FLAGS_RECV_RMPP) ||
707 	    (msgimplp->im_flags & IBMF_MSG_FLAGS_SEND_RMPP)) {
708 
709 		IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3,
710 		    ibmf_i_do_send_compl, IBMF_TNF_TRACE, "",
711 		    "ibmf_i_do_send_compl(): %s msgp = %p, rmpp_state = 0x%x\n",
712 		    tnf_string, msg, "Received send callback for RMPP trans",
713 		    tnf_opaque, msg, msgimplp,
714 		    tnf_opaque, rmpp_state, msgimplp->im_rmpp_ctx.rmpp_state);
715 
716 		/*
717 		 * For ABORT state, we should not return control to
718 		 * the client from the send completion handler.
719 		 * Control should be returned in the error timeout handler.
720 		 *
721 		 * The exception is when the IBMF_TRANS_STATE_FLAG_RECV_DONE
722 		 * flag has already been set. This flag is set when
723 		 * ibmf_i_terminate_transaction is called from one of the
724 		 * three timeout handlers. In this case return control from
725 		 * here.
726 		 */
727 		if (msgimplp->im_rmpp_ctx.rmpp_state == IBMF_RMPP_STATE_ABORT) {
728 			msgimplp->im_trans_state_flags |=
729 			    IBMF_TRANS_STATE_FLAG_SEND_DONE;
730 			if (msgimplp->im_trans_state_flags &
731 			    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
732 				msgimplp->im_trans_state_flags |=
733 				    IBMF_TRANS_STATE_FLAG_DONE;
734 			}
735 		}
736 
737 		if ((msgimplp->im_rmpp_ctx.rmpp_state ==
738 		    IBMF_RMPP_STATE_RECEVR_TERMINATE) ||
739 		    (msgimplp->im_rmpp_ctx.rmpp_state ==
740 		    IBMF_RMPP_STATE_DONE)) {
741 			msgimplp->im_trans_state_flags |=
742 			    IBMF_TRANS_STATE_FLAG_SEND_DONE;
743 			if (msgimplp->im_trans_state_flags  &
744 			    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
745 				msgimplp->im_trans_state_flags |=
746 				    IBMF_TRANS_STATE_FLAG_DONE;
747 			}
748 		}
749 
750 		/*
751 		 * If the transaction is a send-only RMPP, then
752 		 * set the SEND_DONE flag on every send completion
753 		 * as long as there are no outstanding ones.
754 		 * This is needed so that the transaction can return
755 		 * in the receive path, where ibmf_i_terminate_transaction
756 		 * is called from ibmf_i_rmpp_sender_active_flow,
757 		 * after checking if the SEND_DONE flag is set.
758 		 * When a new MAD is sent as part of the RMPP transaction,
759 		 * the SEND_DONE flag will get reset.
760 		 * The RECV_DONE indicates that the last ACK was received.
761 		 */
762 		if ((msgimplp->im_flags & IBMF_MSG_FLAGS_SEQUENCED) == 0) {
763 			if (msgimplp->im_pending_send_compls == 0) {
764 				msgimplp->im_trans_state_flags |=
765 				    IBMF_TRANS_STATE_FLAG_SEND_DONE;
766 				if (msgimplp->im_trans_state_flags  &
767 				    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
768 					msgimplp->im_trans_state_flags |=
769 					    IBMF_TRANS_STATE_FLAG_DONE;
770 				}
771 			}
772 		}
773 
774 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
775 		    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
776 		    "ibmf_i_do_send_compl() exit\n");
777 		return;
778 	}
779 
780 	/*
781 	 * Only non-RMPP send completion gets here.
782 	 * If the send is a single-packet send that does not use RMPP, and if
783 	 * the transaction is not a sequenced transaction, call the transaction
784 	 * callback handler after flagging the transaction as done.  If the
785 	 * message is sequenced, start a timer to bound the wait for the first
786 	 * data packet of the response.
787 	 */
788 	if (msgimplp->im_flags & IBMF_MSG_FLAGS_SEQUENCED) {
789 
790 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
791 		    ibmf_i_do_send_compl, IBMF_TNF_TRACE, "",
792 		    "ibmf_i_do_send_compl(): %s msgp = %p\n", tnf_string, msg,
793 		    "Sequenced transaction, setting response timer",
794 		    tnf_opaque, msg, msgimplp);
795 
796 		/*
797 		 * Check if the send completion already occured,
798 		 * which could imply that this is a send completion
799 		 * for some previous transaction that has come in very late.
800 		 * In this case exit here.
801 		 */
802 		if (msgimplp->im_trans_state_flags  &
803 		    IBMF_TRANS_STATE_FLAG_SEND_DONE) {
804 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
805 			    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
806 			    "ibmf_i_do_send_compl() exit, "
807 			    "Duplicate SEND completion\n");
808 			return;
809 		}
810 
811 		/* mark as send_compl happened */
812 		msgimplp->im_trans_state_flags |=
813 		    IBMF_TRANS_STATE_FLAG_SEND_DONE;
814 
815 		if (msgimplp->im_trans_state_flags  &
816 		    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
817 			msgimplp->im_trans_state_flags |=
818 			    IBMF_TRANS_STATE_FLAG_DONE;
819 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
820 			    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
821 			    "ibmf_i_do_send_compl() exit, RECV_DONE\n");
822 			return;
823 		}
824 
825 		/*
826 		 * check if response was received before send
827 		 * completion
828 		 */
829 		if (((msgimplp->im_trans_state_flags &
830 		    IBMF_TRANS_STATE_FLAG_DONE) == 0) &&
831 		    ((msgimplp->im_trans_state_flags &
832 		    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE) == 0)) {
833 			/* set timer for first packet of response */
834 			ibmf_i_set_timer(ibmf_i_send_timeout, msgimplp,
835 			    IBMF_RESP_TIMER);
836 		}
837 	} else {
838 		msgimplp->im_msg_status = IBMF_SUCCESS;
839 		msgimplp->im_trans_state_flags |=
840 		    IBMF_TRANS_STATE_FLAG_SEND_DONE;
841 		msgimplp->im_trans_state_flags |= IBMF_TRANS_STATE_FLAG_DONE;
842 	}
843 
844 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_do_send_compl_end,
845 	    IBMF_TNF_TRACE, "", "ibmf_i_do_send_compl() exit\n");
846 }
847