xref: /illumos-gate/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/ib/ibtl/impl/ibtl.h>
29 #include <sys/ib/ibtl/impl/ibtl_cm.h>
30 #include <sys/taskq.h>
31 #include <sys/disp.h>
32 #include <sys/callb.h>
33 #include <sys/proc.h>
34 
35 /*
36  * ibtl_handlers.c
37  */
38 
39 /*
40  * What's in this file?
41  *
42  *   This file started as an implementation of Asynchronous Event/Error
43  *   handling and Completion Queue handling.  As the implementation
44  *   evolved, code has been added for other ibc_* interfaces (resume,
45  *   predetach, etc.) that use the same mechanisms as used for asyncs.
46  *
47  * Async and CQ handling at interrupt level.
48  *
49  *   CQ handling is normally done at interrupt level using the CQ callback
50  *   handler to call the appropriate IBT Client (owner of the CQ).  For
51  *   clients that would prefer a fully flexible non-interrupt context to
52  *   do their CQ handling, a CQ can be created so that its handler is
53  *   called from a non-interrupt thread.  CQ handling is done frequently
54  *   whereas Async handling is expected to occur very infrequently.
55  *
56  *   Async handling is done by marking (or'ing in of an async_code of) the
57  *   pertinent IBTL data structure, and then notifying the async_thread(s)
58  *   that the data structure has async work to be done.  The notification
59  *   occurs by linking the data structure through its async_link onto a
60  *   list of like data structures and waking up an async_thread.  This
61  *   list append is not done if there is already async work pending on
62  *   this data structure (IBTL_ASYNC_PENDING).
63  *
64  * Async Mutex and CQ Mutex
65  *
66  *   The global ibtl_async_mutex is "the" mutex used to control access
67  *   to all the data needed by ibc_async_handler.  All the threads that
68  *   use this mutex are written so that the mutex is held for very short
69  *   periods of time, and never held while making calls to functions
70  *   that may block.
71  *
72  *   The global ibtl_cq_mutex is used similarly by ibc_cq_handler and
73  *   the ibtl_cq_thread(s).
74  *
75  * Mutex hierarchy
76  *
77  *   The ibtl_clnt_list_mutex is above the ibtl_async_mutex.
78  *   ibtl_clnt_list_mutex protects all of the various lists.
79  *   The ibtl_async_mutex is below this in the hierarchy.
80  *
81  *   The ibtl_cq_mutex is independent of the above mutexes.
82  *
83  * Threads
84  *
85  *   There are "ibtl_cq_threads" number of threads created for handling
86  *   Completion Queues in threads.  If this feature really gets used,
87  *   then we will want to do some suitable tuning.  Similarly, we may
88  *   want to tune the number of "ibtl_async_thread_init".
89  *
90  *   The function ibtl_cq_thread is the main loop for handling a CQ in a
91  *   thread.  There can be multiple threads executing this same code.
92  *   The code sleeps when there is no work to be done (list is empty),
93  *   otherwise it pulls the first CQ structure off the list and performs
94  *   the CQ handler callback to the client.  After that returns, a check
95  *   is made, and if another ibc_cq_handler call was made for this CQ,
96  *   the client is called again.
97  *
98  *   The function ibtl_async_thread is the main loop for handling async
99  *   events/errors.  There can be multiple threads executing this same code.
100  *   The code sleeps when there is no work to be done (lists are empty),
101  *   otherwise it pulls the first structure off one of the lists and
102  *   performs the async callback(s) to the client(s).  Note that HCA
103  *   async handling is done by calling each of the clients using the HCA.
104  *   When the async handling completes, the data structure having the async
105  *   event/error is checked for more work before it's considered "done".
106  *
107  * Taskq
108  *
109  *   The async_taskq is used here for allowing async handler callbacks to
110  *   occur simultaneously to multiple clients of an HCA.  This taskq could
111  *   be used for other purposes, e.g., if all the async_threads are in
112  *   use, but this is deemed as overkill since asyncs should occur rarely.
113  */
114 
115 /* Globals */
116 static char ibtf_handlers[] = "ibtl_handlers";
117 
118 /* priority for IBTL threads (async, cq, and taskq) */
119 static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */
120 
121 /* taskq used for HCA asyncs */
122 #define	ibtl_async_taskq system_taskq
123 
124 /* data for async handling by threads */
125 static kmutex_t ibtl_async_mutex;	/* protects most *_async_* data */
126 static kcondvar_t ibtl_async_cv;	/* async_threads wait on this */
127 static kcondvar_t ibtl_clnt_cv;		/* ibt_detach might wait on this */
128 static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp);
129 static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp);
130 
131 static kt_did_t *ibtl_async_did;	/* for thread_join() */
132 static int ibtl_async_thread_init = 4;	/* total # of async_threads to create */
133 static int ibtl_async_thread_exit = 0;	/* set if/when thread(s) should exit */
134 
135 /* async lists for various structures */
136 static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end;
137 static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end;
138 static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end;
139 static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end;
140 static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end;
141 
142 /* data for CQ completion handling by threads */
143 static kmutex_t ibtl_cq_mutex;	/* protects the cv and the list below */
144 static kcondvar_t ibtl_cq_cv;
145 static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end;
146 
147 static int ibtl_cq_threads = 0;		/* total # of cq threads */
148 static int ibtl_cqs_using_threads = 0;	/* total # of cqs using threads */
149 static int ibtl_cq_thread_exit = 0;	/* set if/when thread(s) should exit */
150 
151 /* value used to tell IBTL threads to exit */
152 #define	IBTL_THREAD_EXIT 0x1b7fdead	/* IBTF DEAD */
153 
154 int ibtl_eec_not_supported = 1;
155 
156 char *ibtl_last_client_name;	/* may help debugging */
157 
158 _NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex))
159 
160 /*
161  * ibc_async_handler()
162  *
163  * Asynchronous Event/Error Handler.
164  *
165  *	This is the function called HCA drivers to post various async
166  *	event and errors mention in the IB architecture spec.  See
167  *	ibtl_types.h for additional details of this.
168  *
169  *	This function marks the pertinent IBTF object with the async_code,
170  *	and queues the object for handling by an ibtl_async_thread.  If
171  *	the object is NOT already marked for async processing, it is added
172  *	to the associated list for that type of object, and an
173  *	ibtl_async_thread is signaled to finish the async work.
174  */
175 void
176 ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code,
177     ibc_async_event_t *event_p)
178 {
179 	ibtl_qp_t	*ibtl_qp;
180 	ibtl_cq_t	*ibtl_cq;
181 	ibtl_srq_t	*ibtl_srq;
182 	ibtl_eec_t	*ibtl_eec;
183 	uint8_t		port_minus1;
184 
185 	IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)",
186 	    hca_devp, code, event_p);
187 
188 	mutex_enter(&ibtl_async_mutex);
189 
190 	switch (code) {
191 	case IBT_EVENT_PATH_MIGRATED_QP:
192 	case IBT_EVENT_SQD:
193 	case IBT_ERROR_CATASTROPHIC_QP:
194 	case IBT_ERROR_PATH_MIGRATE_REQ_QP:
195 	case IBT_EVENT_COM_EST_QP:
196 	case IBT_ERROR_INVALID_REQUEST_QP:
197 	case IBT_ERROR_ACCESS_VIOLATION_QP:
198 	case IBT_EVENT_EMPTY_QP:
199 		ibtl_qp = event_p->ev_qp_hdl;
200 		if (ibtl_qp == NULL) {
201 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
202 			    "bad qp handle");
203 			break;
204 		}
205 		switch (code) {
206 		case IBT_ERROR_CATASTROPHIC_QP:
207 			ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break;
208 		case IBT_ERROR_PATH_MIGRATE_REQ_QP:
209 			ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break;
210 		case IBT_ERROR_INVALID_REQUEST_QP:
211 			ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break;
212 		case IBT_ERROR_ACCESS_VIOLATION_QP:
213 			ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break;
214 		}
215 
216 		ibtl_qp->qp_async_codes |= code;
217 		if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) {
218 			ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING;
219 			ibtl_qp->qp_async_link = NULL;
220 			if (ibtl_async_qp_list_end == NULL)
221 				ibtl_async_qp_list_start = ibtl_qp;
222 			else
223 				ibtl_async_qp_list_end->qp_async_link = ibtl_qp;
224 			ibtl_async_qp_list_end = ibtl_qp;
225 			cv_signal(&ibtl_async_cv);
226 		}
227 		break;
228 
229 	case IBT_ERROR_CQ:
230 		ibtl_cq = event_p->ev_cq_hdl;
231 		if (ibtl_cq == NULL) {
232 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
233 			    "bad cq handle");
234 			break;
235 		}
236 		ibtl_cq->cq_async_codes |= code;
237 		ibtl_cq->cq_fma_ena = event_p->ev_fma_ena;
238 		if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) {
239 			ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING;
240 			ibtl_cq->cq_async_link = NULL;
241 			if (ibtl_async_cq_list_end == NULL)
242 				ibtl_async_cq_list_start = ibtl_cq;
243 			else
244 				ibtl_async_cq_list_end->cq_async_link = ibtl_cq;
245 			ibtl_async_cq_list_end = ibtl_cq;
246 			cv_signal(&ibtl_async_cv);
247 		}
248 		break;
249 
250 	case IBT_ERROR_CATASTROPHIC_SRQ:
251 	case IBT_EVENT_LIMIT_REACHED_SRQ:
252 		ibtl_srq = event_p->ev_srq_hdl;
253 		if (ibtl_srq == NULL) {
254 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
255 			    "bad srq handle");
256 			break;
257 		}
258 		ibtl_srq->srq_async_codes |= code;
259 		ibtl_srq->srq_fma_ena = event_p->ev_fma_ena;
260 		if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) {
261 			ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING;
262 			ibtl_srq->srq_async_link = NULL;
263 			if (ibtl_async_srq_list_end == NULL)
264 				ibtl_async_srq_list_start = ibtl_srq;
265 			else
266 				ibtl_async_srq_list_end->srq_async_link =
267 				    ibtl_srq;
268 			ibtl_async_srq_list_end = ibtl_srq;
269 			cv_signal(&ibtl_async_cv);
270 		}
271 		break;
272 
273 	case IBT_EVENT_PATH_MIGRATED_EEC:
274 	case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
275 	case IBT_ERROR_CATASTROPHIC_EEC:
276 	case IBT_EVENT_COM_EST_EEC:
277 		if (ibtl_eec_not_supported) {
278 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
279 			    "EEC events are disabled.");
280 			break;
281 		}
282 		ibtl_eec = event_p->ev_eec_hdl;
283 		if (ibtl_eec == NULL) {
284 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
285 			    "bad eec handle");
286 			break;
287 		}
288 		switch (code) {
289 		case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
290 			ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break;
291 		case IBT_ERROR_CATASTROPHIC_EEC:
292 			ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break;
293 		}
294 		ibtl_eec->eec_async_codes |= code;
295 		if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) {
296 			ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING;
297 			ibtl_eec->eec_async_link = NULL;
298 			if (ibtl_async_eec_list_end == NULL)
299 				ibtl_async_eec_list_start = ibtl_eec;
300 			else
301 				ibtl_async_eec_list_end->eec_async_link =
302 				    ibtl_eec;
303 			ibtl_async_eec_list_end = ibtl_eec;
304 			cv_signal(&ibtl_async_cv);
305 		}
306 		break;
307 
308 	case IBT_ERROR_LOCAL_CATASTROPHIC:
309 		hca_devp->hd_async_codes |= code;
310 		hca_devp->hd_fma_ena = event_p->ev_fma_ena;
311 		/* FALLTHROUGH */
312 
313 	case IBT_EVENT_PORT_UP:
314 	case IBT_ERROR_PORT_DOWN:
315 		if ((code == IBT_EVENT_PORT_UP) ||
316 		    (code == IBT_ERROR_PORT_DOWN)) {
317 			if ((port_minus1 = event_p->ev_port - 1) >=
318 			    hca_devp->hd_hca_attr->hca_nports) {
319 				IBTF_DPRINTF_L2(ibtf_handlers,
320 				    "ibc_async_handler: bad port #: %d",
321 				    event_p->ev_port);
322 			    break;
323 			}
324 			hca_devp->hd_async_port[port_minus1] =
325 			    ((code == IBT_EVENT_PORT_UP) ? IBTL_HCA_PORT_UP :
326 			    IBTL_HCA_PORT_DOWN) | IBTL_HCA_PORT_CHANGED;
327 			hca_devp->hd_async_codes |= code;
328 		}
329 
330 		if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) {
331 			hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING;
332 			hca_devp->hd_async_link = NULL;
333 			if (ibtl_async_hca_list_end == NULL)
334 				ibtl_async_hca_list_start = hca_devp;
335 			else
336 				ibtl_async_hca_list_end->hd_async_link =
337 				    hca_devp;
338 			ibtl_async_hca_list_end = hca_devp;
339 			cv_signal(&ibtl_async_cv);
340 		}
341 
342 		break;
343 
344 	default:
345 		IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: "
346 		    "invalid code (0x%x)", code);
347 	}
348 
349 	mutex_exit(&ibtl_async_mutex);
350 }
351 
352 
353 /* Finally, make the async call to the client. */
354 
355 static void
356 ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code,
357     ibt_async_event_t *event_p)
358 {
359 	ibtl_clnt_t		*clntp;
360 	void			*client_private;
361 	ibt_async_handler_t	async_handler;
362 	char			*client_name;
363 
364 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)",
365 	    ibt_hca, code, event_p);
366 
367 	clntp = ibt_hca->ha_clnt_devp;
368 
369 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
370 	/* Record who is being called (just a debugging aid) */
371 	ibtl_last_client_name = client_name = clntp->clnt_name;
372 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
373 
374 	client_private = clntp->clnt_private;
375 	async_handler = clntp->clnt_modinfop->mi_async_handler;
376 
377 	if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) {
378 		mutex_enter(&ibtl_clnt_list_mutex);
379 		async_handler = ibtl_cm_async_handler;
380 		client_private = ibtl_cm_clnt_private;
381 		mutex_exit(&ibtl_clnt_list_mutex);
382 		ibt_hca = NULL;
383 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
384 		    "calling CM for COM_EST");
385 	} else {
386 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
387 		    "calling client '%s'", client_name);
388 	}
389 	if (async_handler != NULL)
390 		async_handler(client_private, ibt_hca, code, event_p);
391 	else
392 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
393 		    "client '%s' has no async handler", client_name);
394 }
395 
396 /*
397  * Inform CM or DM about HCA events.
398  *
399  *	We use taskqs to allow simultaneous notification, with sleeping.
400  *	Since taskqs only allow one argument, we define a structure
401  *	because we need to pass in more than one argument.
402  */
403 
404 struct ibtl_mgr_s {
405 	ibtl_hca_devinfo_t	*mgr_hca_devp;
406 	ibt_async_handler_t	mgr_async_handler;
407 	void			*mgr_clnt_private;
408 };
409 
410 /*
411  * Asyncs of HCA level events for CM and DM.  Call CM or DM and tell them
412  * about the HCA for the event recorded in the ibtl_hca_devinfo_t.
413  */
414 static void
415 ibtl_do_mgr_async_task(void *arg)
416 {
417 	struct ibtl_mgr_s	*mgrp = (struct ibtl_mgr_s *)arg;
418 	ibtl_hca_devinfo_t	*hca_devp = mgrp->mgr_hca_devp;
419 
420 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)",
421 	    hca_devp->hd_async_code);
422 
423 	mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL,
424 	    hca_devp->hd_async_code, &hca_devp->hd_async_event);
425 	kmem_free(mgrp, sizeof (*mgrp));
426 
427 	mutex_enter(&ibtl_clnt_list_mutex);
428 	if (--hca_devp->hd_async_task_cnt == 0)
429 		cv_signal(&hca_devp->hd_async_task_cv);
430 	mutex_exit(&ibtl_clnt_list_mutex);
431 }
432 
433 static void
434 ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler,
435     void *clnt_private)
436 {
437 	struct ibtl_mgr_s *mgrp;
438 
439 	if (async_handler == NULL)
440 		return;
441 
442 	_NOTE(NO_COMPETING_THREADS_NOW)
443 	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
444 	mgrp->mgr_hca_devp = hca_devp;
445 	mgrp->mgr_async_handler = async_handler;
446 	mgrp->mgr_clnt_private = clnt_private;
447 	hca_devp->hd_async_task_cnt++;
448 
449 	(void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp,
450 	    TQ_SLEEP);
451 	_NOTE(COMPETING_THREADS_NOW)
452 }
453 
454 /*
455  * Per client-device asyncs for HCA level events.  Call each client that is
456  * using the HCA for the event recorded in the ibtl_hca_devinfo_t.
457  */
458 static void
459 ibtl_hca_client_async_task(void *arg)
460 {
461 	ibtl_hca_t		*ibt_hca = (ibtl_hca_t *)arg;
462 	ibtl_hca_devinfo_t	*hca_devp = ibt_hca->ha_hca_devp;
463 	ibtl_clnt_t		*clntp = ibt_hca->ha_clnt_devp;
464 	ibt_async_event_t	async_event;
465 
466 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)",
467 	    ibt_hca, hca_devp->hd_async_code);
468 
469 	bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event));
470 	ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event);
471 
472 	mutex_enter(&ibtl_async_mutex);
473 	if (--ibt_hca->ha_async_cnt == 0 &&
474 	    (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) {
475 		mutex_exit(&ibtl_async_mutex);
476 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
477 	} else
478 		mutex_exit(&ibtl_async_mutex);
479 
480 	mutex_enter(&ibtl_clnt_list_mutex);
481 	if (--hca_devp->hd_async_task_cnt == 0)
482 		cv_signal(&hca_devp->hd_async_task_cv);
483 	if (--clntp->clnt_async_cnt == 0)
484 		cv_broadcast(&ibtl_clnt_cv);
485 
486 	mutex_exit(&ibtl_clnt_list_mutex);
487 }
488 
489 /*
490  * Asyncs for HCA level events.
491  *
492  * The function continues to run until there are no more async
493  * events/errors for this HCA.  An event is chosen for dispatch
494  * to all clients of this HCA.  This thread dispatches them via
495  * the ibtl_async_taskq, then sleeps until all tasks are done.
496  *
497  * This thread records the async_code and async_event in the
498  * ibtl_hca_devinfo_t for all client taskq threads to reference.
499  *
500  * This is called from an async or taskq thread with ibtl_async_mutex held.
501  */
502 static void
503 ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
504 {
505 	ibtl_hca_t			*ibt_hca;
506 	ibt_async_code_t		code;
507 	ibtl_async_port_status_t  	temp;
508 	uint8_t				nports;
509 	uint8_t				port_minus1;
510 	ibtl_async_port_status_t	*portp;
511 
512 	mutex_exit(&ibtl_async_mutex);
513 
514 	mutex_enter(&ibtl_clnt_list_mutex);
515 	while (hca_devp->hd_async_busy)
516 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
517 	hca_devp->hd_async_busy = 1;
518 	mutex_enter(&ibtl_async_mutex);
519 
520 	bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event));
521 	for (;;) {
522 
523 		hca_devp->hd_async_event.ev_fma_ena = 0;
524 
525 		code = hca_devp->hd_async_codes;
526 		if (code & IBT_ERROR_LOCAL_CATASTROPHIC) {
527 			code = IBT_ERROR_LOCAL_CATASTROPHIC;
528 			hca_devp->hd_async_event.ev_fma_ena =
529 			    hca_devp->hd_fma_ena;
530 		} else if (code & IBT_ERROR_PORT_DOWN)
531 			code = IBT_ERROR_PORT_DOWN;
532 		else if (code & IBT_EVENT_PORT_UP)
533 			code = IBT_EVENT_PORT_UP;
534 		else {
535 			hca_devp->hd_async_codes = 0;
536 			code = 0;
537 		}
538 
539 		if (code == 0) {
540 			hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING;
541 			break;
542 		}
543 		hca_devp->hd_async_codes &= ~code;
544 
545 		if ((code == IBT_EVENT_PORT_UP) ||
546 		    (code == IBT_ERROR_PORT_DOWN)) {
547 			/* PORT_UP or PORT_DOWN */
548 			portp = hca_devp->hd_async_port;
549 			nports = hca_devp->hd_hca_attr->hca_nports;
550 			for (port_minus1 = 0; port_minus1 < nports;
551 			    port_minus1++) {
552 				temp = ((code == IBT_EVENT_PORT_UP) ?
553 				    IBTL_HCA_PORT_UP : IBTL_HCA_PORT_DOWN) |
554 				    IBTL_HCA_PORT_CHANGED;
555 				if (portp[port_minus1] == temp)
556 					break;
557 			}
558 			if (port_minus1 >= nports) {
559 				/* we checked again, but found nothing */
560 				continue;
561 			}
562 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: "
563 			    "async: port# %x code %x", port_minus1 + 1, code);
564 			/* mark it to check for other ports after we're done */
565 			hca_devp->hd_async_codes |= code;
566 
567 			hca_devp->hd_async_event.ev_port = port_minus1 + 1;
568 			hca_devp->hd_async_port[port_minus1] &=
569 			    ~IBTL_HCA_PORT_CHANGED;
570 
571 			mutex_exit(&ibtl_async_mutex);
572 			ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1);
573 			mutex_enter(&ibtl_async_mutex);
574 		}
575 
576 		hca_devp->hd_async_code = code;
577 		hca_devp->hd_async_event.ev_hca_guid =
578 		    hca_devp->hd_hca_attr->hca_node_guid;
579 		mutex_exit(&ibtl_async_mutex);
580 
581 		/*
582 		 * Make sure to inform CM, DM, and IBMA if we know of them.
583 		 * Also, make sure not to inform them a second time, which
584 		 * would occur if they have the HCA open.
585 		 */
586 
587 		if (ibtl_ibma_async_handler)
588 			ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler,
589 			    ibtl_ibma_clnt_private);
590 		/* wait for all tasks to complete */
591 		while (hca_devp->hd_async_task_cnt != 0)
592 			cv_wait(&hca_devp->hd_async_task_cv,
593 			    &ibtl_clnt_list_mutex);
594 
595 		if (ibtl_dm_async_handler)
596 			ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler,
597 			    ibtl_dm_clnt_private);
598 		if (ibtl_cm_async_handler)
599 			ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
600 			    ibtl_cm_clnt_private);
601 		/* wait for all tasks to complete */
602 		while (hca_devp->hd_async_task_cnt != 0)
603 			cv_wait(&hca_devp->hd_async_task_cv,
604 			    &ibtl_clnt_list_mutex);
605 
606 		for (ibt_hca = hca_devp->hd_clnt_list;
607 		    ibt_hca != NULL;
608 		    ibt_hca = ibt_hca->ha_clnt_link) {
609 
610 			/* Managers are handled above */
611 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
612 			    ibtl_cm_async_handler)
613 				continue;
614 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
615 			    ibtl_dm_async_handler)
616 				continue;
617 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
618 			    ibtl_ibma_async_handler)
619 				continue;
620 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
621 
622 			mutex_enter(&ibtl_async_mutex);
623 			ibt_hca->ha_async_cnt++;
624 			mutex_exit(&ibtl_async_mutex);
625 			hca_devp->hd_async_task_cnt++;
626 			(void) taskq_dispatch(ibtl_async_taskq,
627 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
628 		}
629 
630 		/* wait for all tasks to complete */
631 		while (hca_devp->hd_async_task_cnt != 0)
632 			cv_wait(&hca_devp->hd_async_task_cv,
633 			    &ibtl_clnt_list_mutex);
634 
635 		mutex_enter(&ibtl_async_mutex);
636 	}
637 	hca_devp->hd_async_code = 0;
638 	hca_devp->hd_async_busy = 0;
639 	cv_broadcast(&hca_devp->hd_async_busy_cv);
640 	mutex_exit(&ibtl_clnt_list_mutex);
641 }
642 
643 /*
644  * Asyncs for QP objects.
645  *
646  * The function continues to run until there are no more async
647  * events/errors for this object.
648  */
649 static void
650 ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp)
651 {
652 	ibt_async_code_t	code;
653 	ibt_async_event_t	async_event;
654 
655 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
656 	bzero(&async_event, sizeof (async_event));
657 	async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp);
658 
659 	while ((code = ibtl_qp->qp_async_codes) != 0) {
660 		async_event.ev_fma_ena = 0;
661 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT)
662 			code = 0;	/* fallthrough to "kmem_free" */
663 		else if (code & IBT_ERROR_CATASTROPHIC_QP) {
664 			code = IBT_ERROR_CATASTROPHIC_QP;
665 			async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena;
666 		} else if (code & IBT_ERROR_INVALID_REQUEST_QP) {
667 			code = IBT_ERROR_INVALID_REQUEST_QP;
668 			async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena;
669 		} else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) {
670 			code = IBT_ERROR_ACCESS_VIOLATION_QP;
671 			async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena;
672 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) {
673 			code = IBT_ERROR_PATH_MIGRATE_REQ_QP;
674 			async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena;
675 		} else if (code & IBT_EVENT_PATH_MIGRATED_QP)
676 			code = IBT_EVENT_PATH_MIGRATED_QP;
677 		else if (code & IBT_EVENT_SQD)
678 			code = IBT_EVENT_SQD;
679 		else if (code & IBT_EVENT_COM_EST_QP)
680 			code = IBT_EVENT_COM_EST_QP;
681 		else if (code & IBT_EVENT_EMPTY_QP)
682 			code = IBT_EVENT_EMPTY_QP;
683 		else {
684 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: "
685 			    "async: unexpected QP async code 0x%x", code);
686 			ibtl_qp->qp_async_codes = 0;
687 			code = 0;
688 		}
689 		ibtl_qp->qp_async_codes &= ~code;
690 
691 		if (code) {
692 			mutex_exit(&ibtl_async_mutex);
693 			ibtl_async_client_call(ibtl_qp->qp_hca,
694 			    code, &async_event);
695 			mutex_enter(&ibtl_async_mutex);
696 		}
697 
698 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) {
699 			mutex_exit(&ibtl_async_mutex);
700 			cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
701 			mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
702 			kmem_free(IBTL_QP2CHAN(ibtl_qp),
703 			    sizeof (ibtl_channel_t));
704 			mutex_enter(&ibtl_async_mutex);
705 			return;
706 		}
707 	}
708 	ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING;
709 }
710 
711 /*
712  * Asyncs for SRQ objects.
713  *
714  * The function continues to run until there are no more async
715  * events/errors for this object.
716  */
717 static void
718 ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq)
719 {
720 	ibt_async_code_t	code;
721 	ibt_async_event_t	async_event;
722 
723 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
724 	bzero(&async_event, sizeof (async_event));
725 	async_event.ev_srq_hdl = ibtl_srq;
726 	async_event.ev_fma_ena = ibtl_srq->srq_fma_ena;
727 
728 	while ((code = ibtl_srq->srq_async_codes) != 0) {
729 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT)
730 			code = 0;	/* fallthrough to "kmem_free" */
731 		else if (code & IBT_ERROR_CATASTROPHIC_SRQ)
732 			code = IBT_ERROR_CATASTROPHIC_SRQ;
733 		else if (code & IBT_EVENT_LIMIT_REACHED_SRQ)
734 			code = IBT_EVENT_LIMIT_REACHED_SRQ;
735 		else {
736 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: "
737 			    "async: unexpected SRQ async code 0x%x", code);
738 			ibtl_srq->srq_async_codes = 0;
739 			code = 0;
740 		}
741 		ibtl_srq->srq_async_codes &= ~code;
742 
743 		if (code) {
744 			mutex_exit(&ibtl_async_mutex);
745 			ibtl_async_client_call(ibtl_srq->srq_hca,
746 			    code, &async_event);
747 			mutex_enter(&ibtl_async_mutex);
748 		}
749 
750 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
751 			mutex_exit(&ibtl_async_mutex);
752 			kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
753 			mutex_enter(&ibtl_async_mutex);
754 			return;
755 		}
756 	}
757 	ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING;
758 }
759 
760 /*
761  * Asyncs for CQ objects.
762  *
763  * The function continues to run until there are no more async
764  * events/errors for this object.
765  */
766 static void
767 ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq)
768 {
769 	ibt_async_code_t	code;
770 	ibt_async_event_t	async_event;
771 
772 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
773 	bzero(&async_event, sizeof (async_event));
774 	async_event.ev_cq_hdl = ibtl_cq;
775 	async_event.ev_fma_ena = ibtl_cq->cq_fma_ena;
776 
777 	while ((code = ibtl_cq->cq_async_codes) != 0) {
778 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT)
779 			code = 0;	/* fallthrough to "kmem_free" */
780 		else if (code & IBT_ERROR_CQ)
781 			code = IBT_ERROR_CQ;
782 		else {
783 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: "
784 			    "async: unexpected CQ async code 0x%x", code);
785 			ibtl_cq->cq_async_codes = 0;
786 			code = 0;
787 		}
788 		ibtl_cq->cq_async_codes &= ~code;
789 
790 		if (code) {
791 			mutex_exit(&ibtl_async_mutex);
792 			ibtl_async_client_call(ibtl_cq->cq_hca,
793 			    code, &async_event);
794 			mutex_enter(&ibtl_async_mutex);
795 		}
796 
797 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
798 			mutex_exit(&ibtl_async_mutex);
799 			mutex_destroy(&ibtl_cq->cq_mutex);
800 			kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
801 			mutex_enter(&ibtl_async_mutex);
802 			return;
803 		}
804 	}
805 	ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING;
806 }
807 
808 /*
809  * Asyncs for EEC objects.
810  *
811  * The function continues to run until there are no more async
812  * events/errors for this object.
813  */
814 static void
815 ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec)
816 {
817 	ibt_async_code_t	code;
818 	ibt_async_event_t	async_event;
819 
820 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
821 	bzero(&async_event, sizeof (async_event));
822 	async_event.ev_chan_hdl = ibtl_eec->eec_channel;
823 
824 	while ((code = ibtl_eec->eec_async_codes) != 0) {
825 		async_event.ev_fma_ena = 0;
826 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT)
827 			code = 0;	/* fallthrough to "kmem_free" */
828 		else if (code & IBT_ERROR_CATASTROPHIC_EEC) {
829 			code = IBT_ERROR_CATASTROPHIC_CHAN;
830 			async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena;
831 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) {
832 			code = IBT_ERROR_PATH_MIGRATE_REQ;
833 			async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena;
834 		} else if (code & IBT_EVENT_PATH_MIGRATED_EEC)
835 			code = IBT_EVENT_PATH_MIGRATED;
836 		else if (code & IBT_EVENT_COM_EST_EEC)
837 			code = IBT_EVENT_COM_EST;
838 		else {
839 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: "
840 			    "async: unexpected code 0x%x", code);
841 			ibtl_eec->eec_async_codes = 0;
842 			code = 0;
843 		}
844 		ibtl_eec->eec_async_codes &= ~code;
845 
846 		if (code) {
847 			mutex_exit(&ibtl_async_mutex);
848 			ibtl_async_client_call(ibtl_eec->eec_hca,
849 			    code, &async_event);
850 			mutex_enter(&ibtl_async_mutex);
851 		}
852 
853 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) {
854 			mutex_exit(&ibtl_async_mutex);
855 			kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
856 			mutex_enter(&ibtl_async_mutex);
857 			return;
858 		}
859 	}
860 	ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING;
861 }
862 
863 #ifdef __lock_lint
864 kmutex_t cpr_mutex;
865 #endif
866 
867 /*
868  * Loop forever, calling async_handlers until all of the async lists
869  * are empty.
870  */
871 
872 static void
873 ibtl_async_thread(void)
874 {
875 #ifndef __lock_lint
876 	kmutex_t cpr_mutex;
877 #endif
878 	callb_cpr_t	cprinfo;
879 
880 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
881 	_NOTE(NO_COMPETING_THREADS_NOW)
882 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
883 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
884 	    "ibtl_async_thread");
885 	_NOTE(COMPETING_THREADS_NOW)
886 
887 	mutex_enter(&ibtl_async_mutex);
888 
889 	for (;;) {
890 		if (ibtl_async_hca_list_start) {
891 			ibtl_hca_devinfo_t *hca_devp;
892 
893 			/* remove first entry from list */
894 			hca_devp = ibtl_async_hca_list_start;
895 			ibtl_async_hca_list_start = hca_devp->hd_async_link;
896 			hca_devp->hd_async_link = NULL;
897 			if (ibtl_async_hca_list_start == NULL)
898 				ibtl_async_hca_list_end = NULL;
899 
900 			ibtl_do_hca_asyncs(hca_devp);
901 
902 		} else if (ibtl_async_qp_list_start) {
903 			ibtl_qp_t *ibtl_qp;
904 
905 			/* remove from list */
906 			ibtl_qp = ibtl_async_qp_list_start;
907 			ibtl_async_qp_list_start = ibtl_qp->qp_async_link;
908 			ibtl_qp->qp_async_link = NULL;
909 			if (ibtl_async_qp_list_start == NULL)
910 				ibtl_async_qp_list_end = NULL;
911 
912 			ibtl_do_qp_asyncs(ibtl_qp);
913 
914 		} else if (ibtl_async_srq_list_start) {
915 			ibtl_srq_t *ibtl_srq;
916 
917 			/* remove from list */
918 			ibtl_srq = ibtl_async_srq_list_start;
919 			ibtl_async_srq_list_start = ibtl_srq->srq_async_link;
920 			ibtl_srq->srq_async_link = NULL;
921 			if (ibtl_async_srq_list_start == NULL)
922 				ibtl_async_srq_list_end = NULL;
923 
924 			ibtl_do_srq_asyncs(ibtl_srq);
925 
926 		} else if (ibtl_async_eec_list_start) {
927 			ibtl_eec_t *ibtl_eec;
928 
929 			/* remove from list */
930 			ibtl_eec = ibtl_async_eec_list_start;
931 			ibtl_async_eec_list_start = ibtl_eec->eec_async_link;
932 			ibtl_eec->eec_async_link = NULL;
933 			if (ibtl_async_eec_list_start == NULL)
934 				ibtl_async_eec_list_end = NULL;
935 
936 			ibtl_do_eec_asyncs(ibtl_eec);
937 
938 		} else if (ibtl_async_cq_list_start) {
939 			ibtl_cq_t *ibtl_cq;
940 
941 			/* remove from list */
942 			ibtl_cq = ibtl_async_cq_list_start;
943 			ibtl_async_cq_list_start = ibtl_cq->cq_async_link;
944 			ibtl_cq->cq_async_link = NULL;
945 			if (ibtl_async_cq_list_start == NULL)
946 				ibtl_async_cq_list_end = NULL;
947 
948 			ibtl_do_cq_asyncs(ibtl_cq);
949 
950 		} else {
951 			if (ibtl_async_thread_exit == IBTL_THREAD_EXIT)
952 				break;
953 			mutex_enter(&cpr_mutex);
954 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
955 			mutex_exit(&cpr_mutex);
956 
957 			cv_wait(&ibtl_async_cv, &ibtl_async_mutex);
958 
959 			mutex_exit(&ibtl_async_mutex);
960 			mutex_enter(&cpr_mutex);
961 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
962 			mutex_exit(&cpr_mutex);
963 			mutex_enter(&ibtl_async_mutex);
964 		}
965 	}
966 
967 	mutex_exit(&ibtl_async_mutex);
968 
969 #ifndef __lock_lint
970 	mutex_enter(&cpr_mutex);
971 	CALLB_CPR_EXIT(&cprinfo);
972 #endif
973 	mutex_destroy(&cpr_mutex);
974 }
975 
976 
977 void
978 ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp)
979 {
980 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp);
981 
982 	mutex_enter(&ibtl_async_mutex);
983 
984 	/*
985 	 * If there is an active async, mark this object to be freed
986 	 * by the async_thread when it's done.
987 	 */
988 	if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) {
989 		ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT;
990 		mutex_exit(&ibtl_async_mutex);
991 	} else {	/* free the object now */
992 		mutex_exit(&ibtl_async_mutex);
993 		cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
994 		mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
995 		kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t));
996 	}
997 }
998 
999 void
1000 ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq)
1001 {
1002 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq);
1003 
1004 	mutex_enter(&ibtl_async_mutex);
1005 
1006 	/* if there is an active async, mark this object to be freed */
1007 	if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) {
1008 		ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1009 		mutex_exit(&ibtl_async_mutex);
1010 	} else {	/* free the object now */
1011 		mutex_exit(&ibtl_async_mutex);
1012 		mutex_destroy(&ibtl_cq->cq_mutex);
1013 		kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
1014 	}
1015 }
1016 
1017 void
1018 ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq)
1019 {
1020 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)",
1021 	    ibtl_srq);
1022 
1023 	mutex_enter(&ibtl_async_mutex);
1024 
1025 	/* if there is an active async, mark this object to be freed */
1026 	if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) {
1027 		ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1028 		mutex_exit(&ibtl_async_mutex);
1029 	} else {	/* free the object now */
1030 		mutex_exit(&ibtl_async_mutex);
1031 		kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
1032 	}
1033 }
1034 
1035 void
1036 ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec)
1037 {
1038 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)",
1039 	    ibtl_eec);
1040 
1041 	mutex_enter(&ibtl_async_mutex);
1042 
1043 	/* if there is an active async, mark this object to be freed */
1044 	if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) {
1045 		ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1046 		mutex_exit(&ibtl_async_mutex);
1047 	} else {	/* free the object now */
1048 		mutex_exit(&ibtl_async_mutex);
1049 		kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
1050 	}
1051 }
1052 
1053 /*
1054  * This function differs from above in that we assume this is called
1055  * from non-interrupt context, and never called from the async_thread.
1056  */
1057 
1058 void
1059 ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca)
1060 {
1061 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)",
1062 	    ibt_hca);
1063 
1064 	mutex_enter(&ibtl_async_mutex);
1065 
1066 	/* if there is an active async, mark this object to be freed */
1067 	if (ibt_hca->ha_async_cnt > 0) {
1068 		ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1069 		mutex_exit(&ibtl_async_mutex);
1070 	} else {	/* free the object now */
1071 		mutex_exit(&ibtl_async_mutex);
1072 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
1073 	}
1074 }
1075 
1076 /*
1077  * Completion Queue Handling.
1078  *
1079  *	A completion queue can be handled through a simple callback
1080  *	at interrupt level, or it may be queued for an ibtl_cq_thread
1081  *	to handle.  The latter is chosen during ibt_alloc_cq when the
1082  *	IBTF_CQ_HANDLER_IN_THREAD is specified.
1083  */
1084 
1085 static void
1086 ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq)
1087 {
1088 	ibt_cq_handler_t	cq_handler;
1089 	void			*arg;
1090 
1091 	IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq);
1092 
1093 	mutex_enter(&ibtl_cq->cq_mutex);
1094 	cq_handler = ibtl_cq->cq_comp_handler;
1095 	arg = ibtl_cq->cq_arg;
1096 	if (cq_handler != NULL)
1097 		cq_handler(ibtl_cq, arg);
1098 	else
1099 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: "
1100 		    "no cq_handler for cq %p", ibtl_cq);
1101 	mutex_exit(&ibtl_cq->cq_mutex);
1102 }
1103 
1104 /*
1105  * Before ibt_free_cq can continue, we need to ensure no more cq_handler
1106  * callbacks can occur.  When we get the mutex, we know there are no
1107  * outstanding cq_handler callbacks.  We set the cq_handler to NULL to
1108  * prohibit future callbacks.
1109  */
1110 void
1111 ibtl_free_cq_check(ibtl_cq_t *ibtl_cq)
1112 {
1113 	mutex_enter(&ibtl_cq->cq_mutex);
1114 	ibtl_cq->cq_comp_handler = NULL;
1115 	mutex_exit(&ibtl_cq->cq_mutex);
1116 	if (ibtl_cq->cq_in_thread) {
1117 		mutex_enter(&ibtl_cq_mutex);
1118 		--ibtl_cqs_using_threads;
1119 		while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) {
1120 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1121 			ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE;
1122 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1123 		}
1124 		mutex_exit(&ibtl_cq_mutex);
1125 	}
1126 }
1127 
1128 /*
1129  * Loop forever, calling cq_handlers until the cq list
1130  * is empty.
1131  */
1132 
1133 static void
1134 ibtl_cq_thread(void)
1135 {
1136 #ifndef __lock_lint
1137 	kmutex_t cpr_mutex;
1138 #endif
1139 	callb_cpr_t	cprinfo;
1140 
1141 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
1142 	_NOTE(NO_COMPETING_THREADS_NOW)
1143 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
1144 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
1145 	    "ibtl_cq_thread");
1146 	_NOTE(COMPETING_THREADS_NOW)
1147 
1148 	mutex_enter(&ibtl_cq_mutex);
1149 
1150 	for (;;) {
1151 		if (ibtl_cq_list_start) {
1152 			ibtl_cq_t *ibtl_cq;
1153 
1154 			ibtl_cq = ibtl_cq_list_start;
1155 			ibtl_cq_list_start = ibtl_cq->cq_link;
1156 			ibtl_cq->cq_link = NULL;
1157 			if (ibtl_cq == ibtl_cq_list_end)
1158 				ibtl_cq_list_end = NULL;
1159 
1160 			while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) {
1161 				ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1162 				mutex_exit(&ibtl_cq_mutex);
1163 				ibtl_cq_handler_call(ibtl_cq);
1164 				mutex_enter(&ibtl_cq_mutex);
1165 			}
1166 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING;
1167 			if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE)
1168 				cv_broadcast(&ibtl_cq_cv);
1169 		} else {
1170 			if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT)
1171 				break;
1172 			mutex_enter(&cpr_mutex);
1173 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1174 			mutex_exit(&cpr_mutex);
1175 
1176 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1177 
1178 			mutex_exit(&ibtl_cq_mutex);
1179 			mutex_enter(&cpr_mutex);
1180 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
1181 			mutex_exit(&cpr_mutex);
1182 			mutex_enter(&ibtl_cq_mutex);
1183 		}
1184 	}
1185 
1186 	mutex_exit(&ibtl_cq_mutex);
1187 #ifndef __lock_lint
1188 	mutex_enter(&cpr_mutex);
1189 	CALLB_CPR_EXIT(&cprinfo);
1190 #endif
1191 	mutex_destroy(&cpr_mutex);
1192 }
1193 
1194 
1195 /*
1196  * ibc_cq_handler()
1197  *
1198  *    Completion Queue Notification Handler.
1199  *
1200  */
1201 /*ARGSUSED*/
1202 void
1203 ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq)
1204 {
1205 	IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)",
1206 	    ibc_hdl, ibtl_cq);
1207 
1208 	if (ibtl_cq->cq_in_thread) {
1209 		mutex_enter(&ibtl_cq_mutex);
1210 		ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT;
1211 		if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) {
1212 			ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING;
1213 			ibtl_cq->cq_link = NULL;
1214 			if (ibtl_cq_list_end == NULL)
1215 				ibtl_cq_list_start = ibtl_cq;
1216 			else
1217 				ibtl_cq_list_end->cq_link = ibtl_cq;
1218 			ibtl_cq_list_end = ibtl_cq;
1219 			cv_signal(&ibtl_cq_cv);
1220 		}
1221 		mutex_exit(&ibtl_cq_mutex);
1222 		return;
1223 	} else
1224 		ibtl_cq_handler_call(ibtl_cq);
1225 }
1226 
1227 
1228 /*
1229  * ibt_enable_cq_notify()
1230  *      Enable Notification requests on the specified CQ.
1231  *
1232  *      ibt_cq          The CQ handle.
1233  *
1234  *      notify_type     Enable notifications for all (IBT_NEXT_COMPLETION)
1235  *                      completions, or the next Solicited completion
1236  *                      (IBT_NEXT_SOLICITED) only.
1237  *
1238  *	Completion notifications are disabled by setting the completion
1239  *	handler to NULL by calling ibt_set_cq_handler().
1240  */
1241 ibt_status_t
1242 ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type)
1243 {
1244 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)",
1245 	    ibtl_cq, notify_type);
1246 
1247 	return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq(
1248 	    IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type));
1249 }
1250 
1251 
1252 /*
1253  * ibt_set_cq_handler()
1254  *      Register a work request completion handler with the IBTF.
1255  *
1256  *      ibt_cq                  The CQ handle.
1257  *
1258  *      completion_handler      The completion handler.
1259  *
1260  *      arg                     The IBTF client private argument to be passed
1261  *                              back to the client when calling the CQ
1262  *                              completion handler.
1263  *
1264  *	Completion notifications are disabled by setting the completion
1265  *	handler to NULL.  When setting the handler to NULL, no additional
1266  *	calls to the previous CQ handler will be initiated, but there may
1267  *	be one in progress.
1268  *
1269  *      This function does not otherwise change the state of previous
1270  *      calls to ibt_enable_cq_notify().
1271  */
1272 void
1273 ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler,
1274     void *arg)
1275 {
1276 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)",
1277 	    ibtl_cq, completion_handler, arg);
1278 
1279 	mutex_enter(&ibtl_cq->cq_mutex);
1280 	ibtl_cq->cq_comp_handler = completion_handler;
1281 	ibtl_cq->cq_arg = arg;
1282 	mutex_exit(&ibtl_cq->cq_mutex);
1283 }
1284 
1285 
1286 /*
1287  * Inform IBT clients about New HCAs.
1288  *
1289  *	We use taskqs to allow simultaneous notification, with sleeping.
1290  *	Since taskqs only allow one argument, we define a structure
1291  *	because we need to pass in two arguments.
1292  */
1293 
1294 struct ibtl_new_hca_s {
1295 	ibtl_clnt_t		*nh_clntp;
1296 	ibtl_hca_devinfo_t	*nh_hca_devp;
1297 	ibt_async_code_t	nh_code;
1298 };
1299 
1300 static void
1301 ibtl_tell_client_about_new_hca(void *arg)
1302 {
1303 	struct ibtl_new_hca_s	*new_hcap = (struct ibtl_new_hca_s *)arg;
1304 	ibtl_clnt_t		*clntp = new_hcap->nh_clntp;
1305 	ibt_async_event_t	async_event;
1306 	ibtl_hca_devinfo_t	*hca_devp = new_hcap->nh_hca_devp;
1307 
1308 	bzero(&async_event, sizeof (async_event));
1309 	async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
1310 	clntp->clnt_modinfop->mi_async_handler(
1311 	    clntp->clnt_private, NULL, new_hcap->nh_code, &async_event);
1312 	kmem_free(new_hcap, sizeof (*new_hcap));
1313 #ifdef __lock_lint
1314 	{
1315 		ibt_hca_hdl_t hca_hdl;
1316 		(void) ibt_open_hca(clntp, 0ULL, &hca_hdl);
1317 	}
1318 #endif
1319 	mutex_enter(&ibtl_clnt_list_mutex);
1320 	if (--hca_devp->hd_async_task_cnt == 0)
1321 		cv_signal(&hca_devp->hd_async_task_cv);
1322 	if (--clntp->clnt_async_cnt == 0)
1323 		cv_broadcast(&ibtl_clnt_cv);
1324 	mutex_exit(&ibtl_clnt_list_mutex);
1325 }
1326 
1327 /*
1328  * ibtl_announce_new_hca:
1329  *
1330  *	o First attach these clients in the given order
1331  *		IBMA
1332  *		IBCM
1333  *
1334  *	o Next attach all other clients in parallel.
1335  *
1336  * NOTE: Use the taskq to simultaneously notify all clients of the new HCA.
1337  * Retval from clients is ignored.
1338  */
1339 void
1340 ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp)
1341 {
1342 	ibtl_clnt_t		*clntp;
1343 	struct ibtl_new_hca_s	*new_hcap;
1344 
1345 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)",
1346 	    hca_devp, hca_devp->hd_hca_attr->hca_node_guid);
1347 
1348 	mutex_enter(&ibtl_clnt_list_mutex);
1349 
1350 	clntp = ibtl_clnt_list;
1351 	while (clntp != NULL) {
1352 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1353 			IBTF_DPRINTF_L4(ibtf_handlers,
1354 			    "ibtl_announce_new_hca: calling IBMF");
1355 			if (clntp->clnt_modinfop->mi_async_handler) {
1356 				_NOTE(NO_COMPETING_THREADS_NOW)
1357 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1358 				    KM_SLEEP);
1359 				new_hcap->nh_clntp = clntp;
1360 				new_hcap->nh_hca_devp = hca_devp;
1361 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1362 				_NOTE(COMPETING_THREADS_NOW)
1363 				clntp->clnt_async_cnt++;
1364 				hca_devp->hd_async_task_cnt++;
1365 
1366 				(void) taskq_dispatch(ibtl_async_taskq,
1367 				    ibtl_tell_client_about_new_hca, new_hcap,
1368 				    TQ_SLEEP);
1369 			}
1370 			break;
1371 		}
1372 		clntp = clntp->clnt_list_link;
1373 	}
1374 	if (clntp != NULL)
1375 		while (clntp->clnt_async_cnt > 0)
1376 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1377 	clntp = ibtl_clnt_list;
1378 	while (clntp != NULL) {
1379 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1380 			IBTF_DPRINTF_L4(ibtf_handlers,
1381 			    "ibtl_announce_new_hca: calling IBDM");
1382 			if (clntp->clnt_modinfop->mi_async_handler) {
1383 				_NOTE(NO_COMPETING_THREADS_NOW)
1384 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1385 				    KM_SLEEP);
1386 				new_hcap->nh_clntp = clntp;
1387 				new_hcap->nh_hca_devp = hca_devp;
1388 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1389 				_NOTE(COMPETING_THREADS_NOW)
1390 				clntp->clnt_async_cnt++;
1391 				hca_devp->hd_async_task_cnt++;
1392 
1393 				(void) taskq_dispatch(ibtl_async_taskq,
1394 				    ibtl_tell_client_about_new_hca, new_hcap,
1395 				    TQ_SLEEP);
1396 			}
1397 			break;
1398 		}
1399 		clntp = clntp->clnt_list_link;
1400 	}
1401 	if (clntp != NULL)
1402 		while (clntp->clnt_async_cnt > 0)
1403 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1404 	clntp = ibtl_clnt_list;
1405 	while (clntp != NULL) {
1406 		if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) &&
1407 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) {
1408 			IBTF_DPRINTF_L4(ibtf_handlers,
1409 			    "ibtl_announce_new_hca: Calling %s ",
1410 			    clntp->clnt_modinfop->mi_clnt_name);
1411 			if (clntp->clnt_modinfop->mi_async_handler) {
1412 				_NOTE(NO_COMPETING_THREADS_NOW)
1413 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1414 				    KM_SLEEP);
1415 				new_hcap->nh_clntp = clntp;
1416 				new_hcap->nh_hca_devp = hca_devp;
1417 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1418 				_NOTE(COMPETING_THREADS_NOW)
1419 				clntp->clnt_async_cnt++;
1420 				hca_devp->hd_async_task_cnt++;
1421 
1422 				(void) taskq_dispatch(ibtl_async_taskq,
1423 				    ibtl_tell_client_about_new_hca, new_hcap,
1424 				    TQ_SLEEP);
1425 			}
1426 		}
1427 		clntp = clntp->clnt_list_link;
1428 	}
1429 
1430 	/* wait for all tasks to complete */
1431 	while (hca_devp->hd_async_task_cnt != 0)
1432 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1433 
1434 	/* wakeup thread that may be waiting to send an HCA async */
1435 	ASSERT(hca_devp->hd_async_busy == 1);
1436 	hca_devp->hd_async_busy = 0;
1437 	cv_broadcast(&hca_devp->hd_async_busy_cv);
1438 	mutex_exit(&ibtl_clnt_list_mutex);
1439 }
1440 
1441 /*
1442  * ibtl_detach_all_clients:
1443  *
1444  *	Return value - 0 for Success, 1 for Failure
1445  *
1446  *	o First detach general clients.
1447  *
1448  *	o Next detach these clients
1449  *		IBCM
1450  *		IBDM
1451  *
1452  *	o Finally, detach this client
1453  *		IBMA
1454  */
1455 int
1456 ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp)
1457 {
1458 	ib_guid_t		hcaguid = hca_devp->hd_hca_attr->hca_node_guid;
1459 	ibtl_hca_t		*ibt_hca;
1460 	ibtl_clnt_t		*clntp;
1461 	int			retval;
1462 
1463 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)",
1464 	    hcaguid);
1465 
1466 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1467 
1468 	while (hca_devp->hd_async_busy)
1469 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
1470 	hca_devp->hd_async_busy = 1;
1471 
1472 	/* First inform general clients asynchronously */
1473 	hca_devp->hd_async_event.ev_hca_guid = hcaguid;
1474 	hca_devp->hd_async_event.ev_fma_ena = 0;
1475 	hca_devp->hd_async_event.ev_chan_hdl = NULL;
1476 	hca_devp->hd_async_event.ev_cq_hdl = NULL;
1477 	hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT;
1478 
1479 	ibt_hca = hca_devp->hd_clnt_list;
1480 	while (ibt_hca != NULL) {
1481 		clntp = ibt_hca->ha_clnt_devp;
1482 		if (IBTL_GENERIC_CLIENT(clntp)) {
1483 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1484 			mutex_enter(&ibtl_async_mutex);
1485 			ibt_hca->ha_async_cnt++;
1486 			mutex_exit(&ibtl_async_mutex);
1487 			hca_devp->hd_async_task_cnt++;
1488 
1489 			(void) taskq_dispatch(ibtl_async_taskq,
1490 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1491 		}
1492 		ibt_hca = ibt_hca->ha_clnt_link;
1493 	}
1494 
1495 	/* wait for all clients to complete */
1496 	while (hca_devp->hd_async_task_cnt != 0) {
1497 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1498 	}
1499 	/* Go thru the clients and check if any have not closed this HCA. */
1500 	retval = 0;
1501 	ibt_hca = hca_devp->hd_clnt_list;
1502 	while (ibt_hca != NULL) {
1503 		clntp = ibt_hca->ha_clnt_devp;
1504 		if (IBTL_GENERIC_CLIENT(clntp)) {
1505 			IBTF_DPRINTF_L2(ibtf_handlers,
1506 			    "ibtl_detach_all_clients: "
1507 			    "client '%s' failed to close the HCA.",
1508 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1509 			retval = 1;
1510 		}
1511 		ibt_hca = ibt_hca->ha_clnt_link;
1512 	}
1513 	if (retval == 1)
1514 		goto bailout;
1515 
1516 	/* Next inform IBDM asynchronously */
1517 	ibt_hca = hca_devp->hd_clnt_list;
1518 	while (ibt_hca != NULL) {
1519 		clntp = ibt_hca->ha_clnt_devp;
1520 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1521 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1522 			mutex_enter(&ibtl_async_mutex);
1523 			ibt_hca->ha_async_cnt++;
1524 			mutex_exit(&ibtl_async_mutex);
1525 			hca_devp->hd_async_task_cnt++;
1526 
1527 			(void) taskq_dispatch(ibtl_async_taskq,
1528 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1529 		}
1530 		ibt_hca = ibt_hca->ha_clnt_link;
1531 	}
1532 	/* wait for IBDM to complete */
1533 	while (hca_devp->hd_async_task_cnt != 0) {
1534 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1535 	}
1536 
1537 	/*
1538 	 * Next inform IBCM.
1539 	 * As IBCM doesn't perform ibt_open_hca(), IBCM will not be
1540 	 * accessible via hca_devp->hd_clnt_list.
1541 	 * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered.
1542 	 */
1543 	if (ibtl_cm_async_handler) {
1544 		ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
1545 		    ibtl_cm_clnt_private);
1546 
1547 		/* wait for all tasks to complete */
1548 		while (hca_devp->hd_async_task_cnt != 0)
1549 			cv_wait(&hca_devp->hd_async_task_cv,
1550 			    &ibtl_clnt_list_mutex);
1551 	}
1552 
1553 	/* Go thru the clients and check if any have not closed this HCA. */
1554 	retval = 0;
1555 	ibt_hca = hca_devp->hd_clnt_list;
1556 	while (ibt_hca != NULL) {
1557 		clntp = ibt_hca->ha_clnt_devp;
1558 		if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) {
1559 			IBTF_DPRINTF_L2(ibtf_handlers,
1560 			    "ibtl_detach_all_clients: "
1561 			    "client '%s' failed to close the HCA.",
1562 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1563 			retval = 1;
1564 		}
1565 		ibt_hca = ibt_hca->ha_clnt_link;
1566 	}
1567 	if (retval == 1)
1568 		goto bailout;
1569 
1570 	/* Finally, inform IBMA */
1571 	ibt_hca = hca_devp->hd_clnt_list;
1572 	while (ibt_hca != NULL) {
1573 		clntp = ibt_hca->ha_clnt_devp;
1574 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1575 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1576 			mutex_enter(&ibtl_async_mutex);
1577 			ibt_hca->ha_async_cnt++;
1578 			mutex_exit(&ibtl_async_mutex);
1579 			hca_devp->hd_async_task_cnt++;
1580 
1581 			(void) taskq_dispatch(ibtl_async_taskq,
1582 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1583 		} else
1584 			IBTF_DPRINTF_L2(ibtf_handlers,
1585 			    "ibtl_detach_all_clients: "
1586 			    "client '%s' is unexpectedly on the client list",
1587 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1588 		ibt_hca = ibt_hca->ha_clnt_link;
1589 	}
1590 
1591 	/* wait for IBMA to complete */
1592 	while (hca_devp->hd_async_task_cnt != 0) {
1593 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1594 	}
1595 
1596 	/* Check if this HCA's client list is empty. */
1597 	ibt_hca = hca_devp->hd_clnt_list;
1598 	if (ibt_hca != NULL) {
1599 		IBTF_DPRINTF_L2(ibtf_handlers,
1600 		    "ibtl_detach_all_clients: "
1601 		    "client '%s' failed to close the HCA.",
1602 		    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1603 		retval = 1;
1604 	} else
1605 		retval = 0;
1606 
1607 bailout:
1608 	hca_devp->hd_async_busy = 0;
1609 	cv_broadcast(&hca_devp->hd_async_busy_cv);
1610 	return (retval);
1611 }
1612 
1613 void
1614 ibtl_free_clnt_async_check(ibtl_clnt_t *clntp)
1615 {
1616 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp);
1617 
1618 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1619 
1620 	/* wait for all asyncs based on "ibtl_clnt_list" to complete */
1621 	while (clntp->clnt_async_cnt != 0) {
1622 		cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1623 	}
1624 }
1625 
1626 static void
1627 ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp)
1628 {
1629 	mutex_enter(&ibtl_clnt_list_mutex);
1630 	if (--clntp->clnt_async_cnt == 0) {
1631 		cv_broadcast(&ibtl_clnt_cv);
1632 	}
1633 	mutex_exit(&ibtl_clnt_list_mutex);
1634 }
1635 
1636 static void
1637 ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp)
1638 {
1639 	mutex_enter(&ibtl_clnt_list_mutex);
1640 	++clntp->clnt_async_cnt;
1641 	mutex_exit(&ibtl_clnt_list_mutex);
1642 }
1643 
1644 
1645 /*
1646  * Functions and data structures to inform clients that a notification
1647  * has occurred about Multicast Groups that might interest them.
1648  */
1649 struct ibtl_sm_notice {
1650 	ibt_clnt_hdl_t		np_ibt_hdl;
1651 	ib_gid_t		np_sgid;
1652 	ibt_subnet_event_code_t	np_code;
1653 	ibt_subnet_event_t	np_event;
1654 };
1655 
1656 static void
1657 ibtl_sm_notice_task(void *arg)
1658 {
1659 	struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg;
1660 	ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl;
1661 	ibt_sm_notice_handler_t sm_notice_handler;
1662 
1663 	sm_notice_handler = ibt_hdl->clnt_sm_trap_handler;
1664 	if (sm_notice_handler != NULL)
1665 		sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg,
1666 		    noticep->np_sgid, noticep->np_code, &noticep->np_event);
1667 	kmem_free(noticep, sizeof (*noticep));
1668 	ibtl_dec_clnt_async_cnt(ibt_hdl);
1669 }
1670 
1671 /*
1672  * Inform the client that MCG notices are not working at this time.
1673  */
1674 void
1675 ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail)
1676 {
1677 	ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl;
1678 	struct ibtl_sm_notice *noticep;
1679 	ib_gid_t *sgidp = &ifail->smf_sgid[0];
1680 	int i;
1681 
1682 	for (i = 0; i < ifail->smf_num_sgids; i++) {
1683 		_NOTE(NO_COMPETING_THREADS_NOW)
1684 		noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1685 		noticep->np_ibt_hdl = ibt_hdl;
1686 		noticep->np_sgid = *sgidp++;
1687 		noticep->np_code = IBT_SM_EVENT_UNAVAILABLE;
1688 		_NOTE(COMPETING_THREADS_NOW)
1689 		ibtl_inc_clnt_async_cnt(ibt_hdl);
1690 		(void) taskq_dispatch(ibtl_async_taskq,
1691 		    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1692 	}
1693 }
1694 
1695 /*
1696  * Inform all clients of the event.
1697  */
1698 void
1699 ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code,
1700     ibt_subnet_event_t *event)
1701 {
1702 	_NOTE(NO_COMPETING_THREADS_NOW)
1703 	struct ibtl_sm_notice	*noticep;
1704 	ibtl_clnt_t		*clntp;
1705 
1706 	mutex_enter(&ibtl_clnt_list_mutex);
1707 	clntp = ibtl_clnt_list;
1708 	while (clntp != NULL) {
1709 		if (clntp->clnt_sm_trap_handler) {
1710 			noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1711 			noticep->np_ibt_hdl = clntp;
1712 			noticep->np_sgid = sgid;
1713 			noticep->np_code = code;
1714 			noticep->np_event = *event;
1715 			++clntp->clnt_async_cnt;
1716 			(void) taskq_dispatch(ibtl_async_taskq,
1717 			    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1718 		}
1719 		clntp = clntp->clnt_list_link;
1720 	}
1721 	mutex_exit(&ibtl_clnt_list_mutex);
1722 	_NOTE(COMPETING_THREADS_NOW)
1723 }
1724 
1725 /*
1726  * Record the handler for this client.
1727  */
1728 void
1729 ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl,
1730     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1731 {
1732 	_NOTE(NO_COMPETING_THREADS_NOW)
1733 	ibt_hdl->clnt_sm_trap_handler = sm_notice_handler;
1734 	ibt_hdl->clnt_sm_trap_handler_arg = private;
1735 	_NOTE(COMPETING_THREADS_NOW)
1736 }
1737 
1738 
1739 /*
1740  * ibtl_another_cq_handler_in_thread()
1741  *
1742  * Conditionally increase the number of cq_threads.
1743  * The number of threads grows, based on the number of cqs using threads.
1744  *
1745  * The table below controls the number of threads as follows:
1746  *
1747  *	Number of CQs	Number of cq_threads
1748  *		0		0
1749  *		1		1
1750  *		2-3		2
1751  *		4-5		3
1752  *		6-9		4
1753  *		10-15		5
1754  *		16-23		6
1755  *		24-31		7
1756  *		32+		8
1757  */
1758 
1759 #define	IBTL_CQ_MAXTHREADS 8
1760 static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = {
1761 	1, 2, 4, 6, 10, 16, 24, 32
1762 };
1763 
1764 static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS];
1765 
1766 void
1767 ibtl_another_cq_handler_in_thread(void)
1768 {
1769 	kthread_t *t;
1770 	int my_idx;
1771 
1772 	mutex_enter(&ibtl_cq_mutex);
1773 	if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) ||
1774 	    (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) {
1775 		mutex_exit(&ibtl_cq_mutex);
1776 		return;
1777 	}
1778 	my_idx = ibtl_cq_threads++;
1779 	mutex_exit(&ibtl_cq_mutex);
1780 	t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN,
1781 	    ibtl_pri - 1);
1782 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1783 	ibtl_cq_did[my_idx] = t->t_did;	/* save for thread_join() */
1784 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1785 }
1786 
1787 void
1788 ibtl_thread_init(void)
1789 {
1790 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()");
1791 
1792 	mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL);
1793 	cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL);
1794 	cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL);
1795 
1796 	mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL);
1797 	cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL);
1798 }
1799 
1800 void
1801 ibtl_thread_init2(void)
1802 {
1803 	int i;
1804 	static int initted = 0;
1805 	kthread_t *t;
1806 
1807 	mutex_enter(&ibtl_async_mutex);
1808 	if (initted == 1) {
1809 		mutex_exit(&ibtl_async_mutex);
1810 		return;
1811 	}
1812 	initted = 1;
1813 	mutex_exit(&ibtl_async_mutex);
1814 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1815 	ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t),
1816 	    KM_SLEEP);
1817 
1818 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()");
1819 
1820 	for (i = 0; i < ibtl_async_thread_init; i++) {
1821 		t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0,
1822 		    TS_RUN, ibtl_pri - 1);
1823 		ibtl_async_did[i] = t->t_did; /* thread_join() */
1824 	}
1825 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1826 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1827 	for (i = 0; i < ibtl_cq_threads; i++) {
1828 		t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0,
1829 		    TS_RUN, ibtl_pri - 1);
1830 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1831 		ibtl_cq_did[i] = t->t_did; /* save for thread_join() */
1832 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1833 	}
1834 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1835 }
1836 
1837 void
1838 ibtl_thread_fini(void)
1839 {
1840 	int i;
1841 
1842 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()");
1843 
1844 	/* undo the work done by ibtl_thread_init() */
1845 
1846 	mutex_enter(&ibtl_cq_mutex);
1847 	ibtl_cq_thread_exit = IBTL_THREAD_EXIT;
1848 	cv_broadcast(&ibtl_cq_cv);
1849 	mutex_exit(&ibtl_cq_mutex);
1850 
1851 	mutex_enter(&ibtl_async_mutex);
1852 	ibtl_async_thread_exit = IBTL_THREAD_EXIT;
1853 	cv_broadcast(&ibtl_async_cv);
1854 	mutex_exit(&ibtl_async_mutex);
1855 
1856 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1857 	for (i = 0; i < ibtl_cq_threads; i++)
1858 		thread_join(ibtl_cq_did[i]);
1859 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1860 
1861 	if (ibtl_async_did) {
1862 		for (i = 0; i < ibtl_async_thread_init; i++)
1863 			thread_join(ibtl_async_did[i]);
1864 
1865 		kmem_free(ibtl_async_did,
1866 		    ibtl_async_thread_init * sizeof (kt_did_t));
1867 	}
1868 	mutex_destroy(&ibtl_cq_mutex);
1869 	cv_destroy(&ibtl_cq_cv);
1870 
1871 	mutex_destroy(&ibtl_async_mutex);
1872 	cv_destroy(&ibtl_async_cv);
1873 	cv_destroy(&ibtl_clnt_cv);
1874 }
1875