xref: /illumos-gate/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c (revision 48215d30bccaf4a9d58050835b3eb6ed630a2fde)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ib/ibtl/impl/ibtl.h>
27 #include <sys/ib/ibtl/impl/ibtl_cm.h>
28 #include <sys/taskq.h>
29 #include <sys/disp.h>
30 #include <sys/callb.h>
31 #include <sys/proc.h>
32 
33 /*
34  * ibtl_handlers.c
35  */
36 
37 /*
38  * What's in this file?
39  *
40  *   This file started as an implementation of Asynchronous Event/Error
41  *   handling and Completion Queue handling.  As the implementation
42  *   evolved, code has been added for other ibc_* interfaces (resume,
43  *   predetach, etc.) that use the same mechanisms as used for asyncs.
44  *
45  * Async and CQ handling at interrupt level.
46  *
47  *   CQ handling is normally done at interrupt level using the CQ callback
48  *   handler to call the appropriate IBT Client (owner of the CQ).  For
49  *   clients that would prefer a fully flexible non-interrupt context to
50  *   do their CQ handling, a CQ can be created so that its handler is
51  *   called from a non-interrupt thread.  CQ handling is done frequently
52  *   whereas Async handling is expected to occur very infrequently.
53  *
54  *   Async handling is done by marking (or'ing in of an async_code of) the
55  *   pertinent IBTL data structure, and then notifying the async_thread(s)
56  *   that the data structure has async work to be done.  The notification
57  *   occurs by linking the data structure through its async_link onto a
58  *   list of like data structures and waking up an async_thread.  This
59  *   list append is not done if there is already async work pending on
60  *   this data structure (IBTL_ASYNC_PENDING).
61  *
62  * Async Mutex and CQ Mutex
63  *
64  *   The global ibtl_async_mutex is "the" mutex used to control access
65  *   to all the data needed by ibc_async_handler.  All the threads that
66  *   use this mutex are written so that the mutex is held for very short
67  *   periods of time, and never held while making calls to functions
68  *   that may block.
69  *
70  *   The global ibtl_cq_mutex is used similarly by ibc_cq_handler and
71  *   the ibtl_cq_thread(s).
72  *
73  * Mutex hierarchy
74  *
75  *   The ibtl_clnt_list_mutex is above the ibtl_async_mutex.
76  *   ibtl_clnt_list_mutex protects all of the various lists.
77  *   The ibtl_async_mutex is below this in the hierarchy.
78  *
79  *   The ibtl_cq_mutex is independent of the above mutexes.
80  *
81  * Threads
82  *
83  *   There are "ibtl_cq_threads" number of threads created for handling
84  *   Completion Queues in threads.  If this feature really gets used,
85  *   then we will want to do some suitable tuning.  Similarly, we may
86  *   want to tune the number of "ibtl_async_thread_init".
87  *
88  *   The function ibtl_cq_thread is the main loop for handling a CQ in a
89  *   thread.  There can be multiple threads executing this same code.
90  *   The code sleeps when there is no work to be done (list is empty),
91  *   otherwise it pulls the first CQ structure off the list and performs
92  *   the CQ handler callback to the client.  After that returns, a check
93  *   is made, and if another ibc_cq_handler call was made for this CQ,
94  *   the client is called again.
95  *
96  *   The function ibtl_async_thread is the main loop for handling async
97  *   events/errors.  There can be multiple threads executing this same code.
98  *   The code sleeps when there is no work to be done (lists are empty),
99  *   otherwise it pulls the first structure off one of the lists and
100  *   performs the async callback(s) to the client(s).  Note that HCA
101  *   async handling is done by calling each of the clients using the HCA.
102  *   When the async handling completes, the data structure having the async
103  *   event/error is checked for more work before it's considered "done".
104  *
105  * Taskq
106  *
107  *   The async_taskq is used here for allowing async handler callbacks to
108  *   occur simultaneously to multiple clients of an HCA.  This taskq could
109  *   be used for other purposes, e.g., if all the async_threads are in
110  *   use, but this is deemed as overkill since asyncs should occur rarely.
111  */
112 
113 /* Globals */
114 static char ibtf_handlers[] = "ibtl_handlers";
115 
116 /* priority for IBTL threads (async, cq, and taskq) */
117 static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */
118 
119 /* taskq used for HCA asyncs */
120 #define	ibtl_async_taskq system_taskq
121 
122 /* data for async handling by threads */
123 static kmutex_t ibtl_async_mutex;	/* protects most *_async_* data */
124 static kcondvar_t ibtl_async_cv;	/* async_threads wait on this */
125 static kcondvar_t ibtl_clnt_cv;		/* ibt_detach might wait on this */
126 static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp);
127 static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp);
128 
129 static kt_did_t *ibtl_async_did;	/* for thread_join() */
130 int ibtl_async_thread_init = 4;	/* total # of async_threads to create */
131 static int ibtl_async_thread_exit = 0;	/* set if/when thread(s) should exit */
132 
133 /* async lists for various structures */
134 static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end;
135 static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end;
136 static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end;
137 static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end;
138 static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end;
139 
140 /* data for CQ completion handling by threads */
141 static kmutex_t ibtl_cq_mutex;	/* protects the cv and the list below */
142 static kcondvar_t ibtl_cq_cv;
143 static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end;
144 
145 static int ibtl_cq_threads = 0;		/* total # of cq threads */
146 static int ibtl_cqs_using_threads = 0;	/* total # of cqs using threads */
147 static int ibtl_cq_thread_exit = 0;	/* set if/when thread(s) should exit */
148 
149 /* value used to tell IBTL threads to exit */
150 #define	IBTL_THREAD_EXIT 0x1b7fdead	/* IBTF DEAD */
151 
152 int ibtl_eec_not_supported = 1;
153 
154 char *ibtl_last_client_name;	/* may help debugging */
155 
156 _NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex))
157 
158 /*
159  * ibc_async_handler()
160  *
161  * Asynchronous Event/Error Handler.
162  *
163  *	This is the function called HCA drivers to post various async
164  *	event and errors mention in the IB architecture spec.  See
165  *	ibtl_types.h for additional details of this.
166  *
167  *	This function marks the pertinent IBTF object with the async_code,
168  *	and queues the object for handling by an ibtl_async_thread.  If
169  *	the object is NOT already marked for async processing, it is added
170  *	to the associated list for that type of object, and an
171  *	ibtl_async_thread is signaled to finish the async work.
172  */
173 void
174 ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code,
175     ibc_async_event_t *event_p)
176 {
177 	ibtl_qp_t	*ibtl_qp;
178 	ibtl_cq_t	*ibtl_cq;
179 	ibtl_srq_t	*ibtl_srq;
180 	ibtl_eec_t	*ibtl_eec;
181 	uint8_t		port_minus1;
182 
183 	IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)",
184 	    hca_devp, code, event_p);
185 
186 	mutex_enter(&ibtl_async_mutex);
187 
188 	switch (code) {
189 	case IBT_EVENT_PATH_MIGRATED_QP:
190 	case IBT_EVENT_SQD:
191 	case IBT_ERROR_CATASTROPHIC_QP:
192 	case IBT_ERROR_PATH_MIGRATE_REQ_QP:
193 	case IBT_EVENT_COM_EST_QP:
194 	case IBT_ERROR_INVALID_REQUEST_QP:
195 	case IBT_ERROR_ACCESS_VIOLATION_QP:
196 	case IBT_EVENT_EMPTY_QP:
197 		ibtl_qp = event_p->ev_qp_hdl;
198 		if (ibtl_qp == NULL) {
199 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
200 			    "bad qp handle");
201 			break;
202 		}
203 		switch (code) {
204 		case IBT_ERROR_CATASTROPHIC_QP:
205 			ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break;
206 		case IBT_ERROR_PATH_MIGRATE_REQ_QP:
207 			ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break;
208 		case IBT_ERROR_INVALID_REQUEST_QP:
209 			ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break;
210 		case IBT_ERROR_ACCESS_VIOLATION_QP:
211 			ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break;
212 		}
213 
214 		ibtl_qp->qp_async_codes |= code;
215 		if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) {
216 			ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING;
217 			ibtl_qp->qp_async_link = NULL;
218 			if (ibtl_async_qp_list_end == NULL)
219 				ibtl_async_qp_list_start = ibtl_qp;
220 			else
221 				ibtl_async_qp_list_end->qp_async_link = ibtl_qp;
222 			ibtl_async_qp_list_end = ibtl_qp;
223 			cv_signal(&ibtl_async_cv);
224 		}
225 		break;
226 
227 	case IBT_ERROR_CQ:
228 		ibtl_cq = event_p->ev_cq_hdl;
229 		if (ibtl_cq == NULL) {
230 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
231 			    "bad cq handle");
232 			break;
233 		}
234 		ibtl_cq->cq_async_codes |= code;
235 		ibtl_cq->cq_fma_ena = event_p->ev_fma_ena;
236 		if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) {
237 			ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING;
238 			ibtl_cq->cq_async_link = NULL;
239 			if (ibtl_async_cq_list_end == NULL)
240 				ibtl_async_cq_list_start = ibtl_cq;
241 			else
242 				ibtl_async_cq_list_end->cq_async_link = ibtl_cq;
243 			ibtl_async_cq_list_end = ibtl_cq;
244 			cv_signal(&ibtl_async_cv);
245 		}
246 		break;
247 
248 	case IBT_ERROR_CATASTROPHIC_SRQ:
249 	case IBT_EVENT_LIMIT_REACHED_SRQ:
250 		ibtl_srq = event_p->ev_srq_hdl;
251 		if (ibtl_srq == NULL) {
252 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
253 			    "bad srq handle");
254 			break;
255 		}
256 		ibtl_srq->srq_async_codes |= code;
257 		ibtl_srq->srq_fma_ena = event_p->ev_fma_ena;
258 		if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) {
259 			ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING;
260 			ibtl_srq->srq_async_link = NULL;
261 			if (ibtl_async_srq_list_end == NULL)
262 				ibtl_async_srq_list_start = ibtl_srq;
263 			else
264 				ibtl_async_srq_list_end->srq_async_link =
265 				    ibtl_srq;
266 			ibtl_async_srq_list_end = ibtl_srq;
267 			cv_signal(&ibtl_async_cv);
268 		}
269 		break;
270 
271 	case IBT_EVENT_PATH_MIGRATED_EEC:
272 	case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
273 	case IBT_ERROR_CATASTROPHIC_EEC:
274 	case IBT_EVENT_COM_EST_EEC:
275 		if (ibtl_eec_not_supported) {
276 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
277 			    "EEC events are disabled.");
278 			break;
279 		}
280 		ibtl_eec = event_p->ev_eec_hdl;
281 		if (ibtl_eec == NULL) {
282 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
283 			    "bad eec handle");
284 			break;
285 		}
286 		switch (code) {
287 		case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
288 			ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break;
289 		case IBT_ERROR_CATASTROPHIC_EEC:
290 			ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break;
291 		}
292 		ibtl_eec->eec_async_codes |= code;
293 		if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) {
294 			ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING;
295 			ibtl_eec->eec_async_link = NULL;
296 			if (ibtl_async_eec_list_end == NULL)
297 				ibtl_async_eec_list_start = ibtl_eec;
298 			else
299 				ibtl_async_eec_list_end->eec_async_link =
300 				    ibtl_eec;
301 			ibtl_async_eec_list_end = ibtl_eec;
302 			cv_signal(&ibtl_async_cv);
303 		}
304 		break;
305 
306 	case IBT_ERROR_LOCAL_CATASTROPHIC:
307 		hca_devp->hd_async_codes |= code;
308 		hca_devp->hd_fma_ena = event_p->ev_fma_ena;
309 		/* FALLTHROUGH */
310 
311 	case IBT_EVENT_PORT_UP:
312 	case IBT_ERROR_PORT_DOWN:
313 		if ((code == IBT_EVENT_PORT_UP) ||
314 		    (code == IBT_ERROR_PORT_DOWN)) {
315 			if ((port_minus1 = event_p->ev_port - 1) >=
316 			    hca_devp->hd_hca_attr->hca_nports) {
317 				IBTF_DPRINTF_L2(ibtf_handlers,
318 				    "ibc_async_handler: bad port #: %d",
319 				    event_p->ev_port);
320 				break;
321 			}
322 			hca_devp->hd_async_port[port_minus1] =
323 			    ((code == IBT_EVENT_PORT_UP) ? IBTL_HCA_PORT_UP :
324 			    IBTL_HCA_PORT_DOWN) | IBTL_HCA_PORT_CHANGED;
325 			hca_devp->hd_async_codes |= code;
326 		}
327 
328 		if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) {
329 			hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING;
330 			hca_devp->hd_async_link = NULL;
331 			if (ibtl_async_hca_list_end == NULL)
332 				ibtl_async_hca_list_start = hca_devp;
333 			else
334 				ibtl_async_hca_list_end->hd_async_link =
335 				    hca_devp;
336 			ibtl_async_hca_list_end = hca_devp;
337 			cv_signal(&ibtl_async_cv);
338 		}
339 
340 		break;
341 
342 	default:
343 		IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: "
344 		    "invalid code (0x%x)", code);
345 	}
346 
347 	mutex_exit(&ibtl_async_mutex);
348 }
349 
350 
351 /* Finally, make the async call to the client. */
352 
353 static void
354 ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code,
355     ibt_async_event_t *event_p)
356 {
357 	ibtl_clnt_t		*clntp;
358 	void			*client_private;
359 	ibt_async_handler_t	async_handler;
360 	char			*client_name;
361 
362 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)",
363 	    ibt_hca, code, event_p);
364 
365 	clntp = ibt_hca->ha_clnt_devp;
366 
367 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
368 	/* Record who is being called (just a debugging aid) */
369 	ibtl_last_client_name = client_name = clntp->clnt_name;
370 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
371 
372 	client_private = clntp->clnt_private;
373 	async_handler = clntp->clnt_modinfop->mi_async_handler;
374 
375 	if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) {
376 		mutex_enter(&ibtl_clnt_list_mutex);
377 		async_handler = ibtl_cm_async_handler;
378 		client_private = ibtl_cm_clnt_private;
379 		mutex_exit(&ibtl_clnt_list_mutex);
380 		ibt_hca = NULL;
381 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
382 		    "calling CM for COM_EST");
383 	} else {
384 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
385 		    "calling client '%s'", client_name);
386 	}
387 	if (async_handler != NULL)
388 		async_handler(client_private, ibt_hca, code, event_p);
389 	else
390 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
391 		    "client '%s' has no async handler", client_name);
392 }
393 
394 /*
395  * Inform CM or DM about HCA events.
396  *
397  *	We use taskqs to allow simultaneous notification, with sleeping.
398  *	Since taskqs only allow one argument, we define a structure
399  *	because we need to pass in more than one argument.
400  */
401 
402 struct ibtl_mgr_s {
403 	ibtl_hca_devinfo_t	*mgr_hca_devp;
404 	ibt_async_handler_t	mgr_async_handler;
405 	void			*mgr_clnt_private;
406 };
407 
408 /*
409  * Asyncs of HCA level events for CM and DM.  Call CM or DM and tell them
410  * about the HCA for the event recorded in the ibtl_hca_devinfo_t.
411  */
412 static void
413 ibtl_do_mgr_async_task(void *arg)
414 {
415 	struct ibtl_mgr_s	*mgrp = (struct ibtl_mgr_s *)arg;
416 	ibtl_hca_devinfo_t	*hca_devp = mgrp->mgr_hca_devp;
417 
418 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)",
419 	    hca_devp->hd_async_code);
420 
421 	mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL,
422 	    hca_devp->hd_async_code, &hca_devp->hd_async_event);
423 	kmem_free(mgrp, sizeof (*mgrp));
424 
425 	mutex_enter(&ibtl_clnt_list_mutex);
426 	if (--hca_devp->hd_async_task_cnt == 0)
427 		cv_signal(&hca_devp->hd_async_task_cv);
428 	mutex_exit(&ibtl_clnt_list_mutex);
429 }
430 
431 static void
432 ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler,
433     void *clnt_private)
434 {
435 	struct ibtl_mgr_s *mgrp;
436 
437 	if (async_handler == NULL)
438 		return;
439 
440 	_NOTE(NO_COMPETING_THREADS_NOW)
441 	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
442 	mgrp->mgr_hca_devp = hca_devp;
443 	mgrp->mgr_async_handler = async_handler;
444 	mgrp->mgr_clnt_private = clnt_private;
445 	hca_devp->hd_async_task_cnt++;
446 
447 	(void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp,
448 	    TQ_SLEEP);
449 #ifndef lint
450 	_NOTE(COMPETING_THREADS_NOW)
451 #endif
452 }
453 
454 /*
455  * Per client-device asyncs for HCA level events.  Call each client that is
456  * using the HCA for the event recorded in the ibtl_hca_devinfo_t.
457  */
458 static void
459 ibtl_hca_client_async_task(void *arg)
460 {
461 	ibtl_hca_t		*ibt_hca = (ibtl_hca_t *)arg;
462 	ibtl_hca_devinfo_t	*hca_devp = ibt_hca->ha_hca_devp;
463 	ibtl_clnt_t		*clntp = ibt_hca->ha_clnt_devp;
464 	ibt_async_event_t	async_event;
465 
466 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)",
467 	    ibt_hca, hca_devp->hd_async_code);
468 
469 	bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event));
470 	ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event);
471 
472 	mutex_enter(&ibtl_async_mutex);
473 	if (--ibt_hca->ha_async_cnt == 0 &&
474 	    (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) {
475 		mutex_exit(&ibtl_async_mutex);
476 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
477 	} else
478 		mutex_exit(&ibtl_async_mutex);
479 
480 	mutex_enter(&ibtl_clnt_list_mutex);
481 	if (--hca_devp->hd_async_task_cnt == 0)
482 		cv_signal(&hca_devp->hd_async_task_cv);
483 	if (--clntp->clnt_async_cnt == 0)
484 		cv_broadcast(&ibtl_clnt_cv);
485 
486 	mutex_exit(&ibtl_clnt_list_mutex);
487 }
488 
489 /*
490  * Asyncs for HCA level events.
491  *
492  * The function continues to run until there are no more async
493  * events/errors for this HCA.  An event is chosen for dispatch
494  * to all clients of this HCA.  This thread dispatches them via
495  * the ibtl_async_taskq, then sleeps until all tasks are done.
496  *
497  * This thread records the async_code and async_event in the
498  * ibtl_hca_devinfo_t for all client taskq threads to reference.
499  *
500  * This is called from an async or taskq thread with ibtl_async_mutex held.
501  */
502 static void
503 ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
504 {
505 	ibtl_hca_t			*ibt_hca;
506 	ibt_async_code_t		code;
507 	ibtl_async_port_status_t  	temp;
508 	uint8_t				nports;
509 	uint8_t				port_minus1;
510 	ibtl_async_port_status_t	*portp;
511 
512 	mutex_exit(&ibtl_async_mutex);
513 
514 	mutex_enter(&ibtl_clnt_list_mutex);
515 	while (hca_devp->hd_async_busy)
516 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
517 	hca_devp->hd_async_busy = 1;
518 	mutex_enter(&ibtl_async_mutex);
519 
520 	bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event));
521 	for (;;) {
522 
523 		hca_devp->hd_async_event.ev_fma_ena = 0;
524 
525 		code = hca_devp->hd_async_codes;
526 		if (code & IBT_ERROR_LOCAL_CATASTROPHIC) {
527 			code = IBT_ERROR_LOCAL_CATASTROPHIC;
528 			hca_devp->hd_async_event.ev_fma_ena =
529 			    hca_devp->hd_fma_ena;
530 		} else if (code & IBT_ERROR_PORT_DOWN)
531 			code = IBT_ERROR_PORT_DOWN;
532 		else if (code & IBT_EVENT_PORT_UP)
533 			code = IBT_EVENT_PORT_UP;
534 		else {
535 			hca_devp->hd_async_codes = 0;
536 			code = 0;
537 		}
538 
539 		if (code == 0) {
540 			hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING;
541 			break;
542 		}
543 		hca_devp->hd_async_codes &= ~code;
544 
545 		if ((code == IBT_EVENT_PORT_UP) ||
546 		    (code == IBT_ERROR_PORT_DOWN)) {
547 			/* PORT_UP or PORT_DOWN */
548 			portp = hca_devp->hd_async_port;
549 			nports = hca_devp->hd_hca_attr->hca_nports;
550 			for (port_minus1 = 0; port_minus1 < nports;
551 			    port_minus1++) {
552 				temp = ((code == IBT_EVENT_PORT_UP) ?
553 				    IBTL_HCA_PORT_UP : IBTL_HCA_PORT_DOWN) |
554 				    IBTL_HCA_PORT_CHANGED;
555 				if (portp[port_minus1] == temp)
556 					break;
557 			}
558 			if (port_minus1 >= nports) {
559 				/* we checked again, but found nothing */
560 				continue;
561 			}
562 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: "
563 			    "async: port# %x code %x", port_minus1 + 1, code);
564 			/* mark it to check for other ports after we're done */
565 			hca_devp->hd_async_codes |= code;
566 
567 			hca_devp->hd_async_event.ev_port = port_minus1 + 1;
568 			hca_devp->hd_async_port[port_minus1] &=
569 			    ~IBTL_HCA_PORT_CHANGED;
570 
571 			mutex_exit(&ibtl_async_mutex);
572 			ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1);
573 			mutex_enter(&ibtl_async_mutex);
574 		}
575 
576 		hca_devp->hd_async_code = code;
577 		hca_devp->hd_async_event.ev_hca_guid =
578 		    hca_devp->hd_hca_attr->hca_node_guid;
579 		mutex_exit(&ibtl_async_mutex);
580 
581 		/*
582 		 * Make sure to inform CM, DM, and IBMA if we know of them.
583 		 * Also, make sure not to inform them a second time, which
584 		 * would occur if they have the HCA open.
585 		 */
586 
587 		if (ibtl_ibma_async_handler)
588 			ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler,
589 			    ibtl_ibma_clnt_private);
590 		/* wait for all tasks to complete */
591 		while (hca_devp->hd_async_task_cnt != 0)
592 			cv_wait(&hca_devp->hd_async_task_cv,
593 			    &ibtl_clnt_list_mutex);
594 
595 		if (ibtl_dm_async_handler)
596 			ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler,
597 			    ibtl_dm_clnt_private);
598 		if (ibtl_cm_async_handler)
599 			ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
600 			    ibtl_cm_clnt_private);
601 		/* wait for all tasks to complete */
602 		while (hca_devp->hd_async_task_cnt != 0)
603 			cv_wait(&hca_devp->hd_async_task_cv,
604 			    &ibtl_clnt_list_mutex);
605 
606 		for (ibt_hca = hca_devp->hd_clnt_list;
607 		    ibt_hca != NULL;
608 		    ibt_hca = ibt_hca->ha_clnt_link) {
609 
610 			/* Managers are handled above */
611 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
612 			    ibtl_cm_async_handler)
613 				continue;
614 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
615 			    ibtl_dm_async_handler)
616 				continue;
617 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
618 			    ibtl_ibma_async_handler)
619 				continue;
620 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
621 
622 			mutex_enter(&ibtl_async_mutex);
623 			ibt_hca->ha_async_cnt++;
624 			mutex_exit(&ibtl_async_mutex);
625 			hca_devp->hd_async_task_cnt++;
626 			(void) taskq_dispatch(ibtl_async_taskq,
627 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
628 		}
629 
630 		/* wait for all tasks to complete */
631 		while (hca_devp->hd_async_task_cnt != 0)
632 			cv_wait(&hca_devp->hd_async_task_cv,
633 			    &ibtl_clnt_list_mutex);
634 
635 		mutex_enter(&ibtl_async_mutex);
636 	}
637 	hca_devp->hd_async_code = 0;
638 	hca_devp->hd_async_busy = 0;
639 	cv_broadcast(&hca_devp->hd_async_busy_cv);
640 	mutex_exit(&ibtl_clnt_list_mutex);
641 }
642 
643 /*
644  * Asyncs for QP objects.
645  *
646  * The function continues to run until there are no more async
647  * events/errors for this object.
648  */
649 static void
650 ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp)
651 {
652 	ibt_async_code_t	code;
653 	ibt_async_event_t	async_event;
654 
655 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
656 	bzero(&async_event, sizeof (async_event));
657 	async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp);
658 
659 	while ((code = ibtl_qp->qp_async_codes) != 0) {
660 		async_event.ev_fma_ena = 0;
661 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT)
662 			code = 0;	/* fallthrough to "kmem_free" */
663 		else if (code & IBT_ERROR_CATASTROPHIC_QP) {
664 			code = IBT_ERROR_CATASTROPHIC_QP;
665 			async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena;
666 		} else if (code & IBT_ERROR_INVALID_REQUEST_QP) {
667 			code = IBT_ERROR_INVALID_REQUEST_QP;
668 			async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena;
669 		} else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) {
670 			code = IBT_ERROR_ACCESS_VIOLATION_QP;
671 			async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena;
672 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) {
673 			code = IBT_ERROR_PATH_MIGRATE_REQ_QP;
674 			async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena;
675 		} else if (code & IBT_EVENT_PATH_MIGRATED_QP)
676 			code = IBT_EVENT_PATH_MIGRATED_QP;
677 		else if (code & IBT_EVENT_SQD)
678 			code = IBT_EVENT_SQD;
679 		else if (code & IBT_EVENT_COM_EST_QP)
680 			code = IBT_EVENT_COM_EST_QP;
681 		else if (code & IBT_EVENT_EMPTY_QP)
682 			code = IBT_EVENT_EMPTY_QP;
683 		else {
684 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: "
685 			    "async: unexpected QP async code 0x%x", code);
686 			ibtl_qp->qp_async_codes = 0;
687 			code = 0;
688 		}
689 		ibtl_qp->qp_async_codes &= ~code;
690 
691 		if (code) {
692 			mutex_exit(&ibtl_async_mutex);
693 			ibtl_async_client_call(ibtl_qp->qp_hca,
694 			    code, &async_event);
695 			mutex_enter(&ibtl_async_mutex);
696 		}
697 
698 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) {
699 			mutex_exit(&ibtl_async_mutex);
700 			cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
701 			mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
702 			kmem_free(IBTL_QP2CHAN(ibtl_qp),
703 			    sizeof (ibtl_channel_t));
704 			mutex_enter(&ibtl_async_mutex);
705 			return;
706 		}
707 	}
708 	ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING;
709 }
710 
711 /*
712  * Asyncs for SRQ objects.
713  *
714  * The function continues to run until there are no more async
715  * events/errors for this object.
716  */
717 static void
718 ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq)
719 {
720 	ibt_async_code_t	code;
721 	ibt_async_event_t	async_event;
722 
723 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
724 	bzero(&async_event, sizeof (async_event));
725 	async_event.ev_srq_hdl = ibtl_srq;
726 	async_event.ev_fma_ena = ibtl_srq->srq_fma_ena;
727 
728 	while ((code = ibtl_srq->srq_async_codes) != 0) {
729 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT)
730 			code = 0;	/* fallthrough to "kmem_free" */
731 		else if (code & IBT_ERROR_CATASTROPHIC_SRQ)
732 			code = IBT_ERROR_CATASTROPHIC_SRQ;
733 		else if (code & IBT_EVENT_LIMIT_REACHED_SRQ)
734 			code = IBT_EVENT_LIMIT_REACHED_SRQ;
735 		else {
736 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: "
737 			    "async: unexpected SRQ async code 0x%x", code);
738 			ibtl_srq->srq_async_codes = 0;
739 			code = 0;
740 		}
741 		ibtl_srq->srq_async_codes &= ~code;
742 
743 		if (code) {
744 			mutex_exit(&ibtl_async_mutex);
745 			ibtl_async_client_call(ibtl_srq->srq_hca,
746 			    code, &async_event);
747 			mutex_enter(&ibtl_async_mutex);
748 		}
749 
750 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
751 			mutex_exit(&ibtl_async_mutex);
752 			kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
753 			mutex_enter(&ibtl_async_mutex);
754 			return;
755 		}
756 	}
757 	ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING;
758 }
759 
760 /*
761  * Asyncs for CQ objects.
762  *
763  * The function continues to run until there are no more async
764  * events/errors for this object.
765  */
766 static void
767 ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq)
768 {
769 	ibt_async_code_t	code;
770 	ibt_async_event_t	async_event;
771 
772 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
773 	bzero(&async_event, sizeof (async_event));
774 	async_event.ev_cq_hdl = ibtl_cq;
775 	async_event.ev_fma_ena = ibtl_cq->cq_fma_ena;
776 
777 	while ((code = ibtl_cq->cq_async_codes) != 0) {
778 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT)
779 			code = 0;	/* fallthrough to "kmem_free" */
780 		else if (code & IBT_ERROR_CQ)
781 			code = IBT_ERROR_CQ;
782 		else {
783 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: "
784 			    "async: unexpected CQ async code 0x%x", code);
785 			ibtl_cq->cq_async_codes = 0;
786 			code = 0;
787 		}
788 		ibtl_cq->cq_async_codes &= ~code;
789 
790 		if (code) {
791 			mutex_exit(&ibtl_async_mutex);
792 			ibtl_async_client_call(ibtl_cq->cq_hca,
793 			    code, &async_event);
794 			mutex_enter(&ibtl_async_mutex);
795 		}
796 
797 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
798 			mutex_exit(&ibtl_async_mutex);
799 			mutex_destroy(&ibtl_cq->cq_mutex);
800 			kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
801 			mutex_enter(&ibtl_async_mutex);
802 			return;
803 		}
804 	}
805 	ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING;
806 }
807 
808 /*
809  * Asyncs for EEC objects.
810  *
811  * The function continues to run until there are no more async
812  * events/errors for this object.
813  */
814 static void
815 ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec)
816 {
817 	ibt_async_code_t	code;
818 	ibt_async_event_t	async_event;
819 
820 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
821 	bzero(&async_event, sizeof (async_event));
822 	async_event.ev_chan_hdl = ibtl_eec->eec_channel;
823 
824 	while ((code = ibtl_eec->eec_async_codes) != 0) {
825 		async_event.ev_fma_ena = 0;
826 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT)
827 			code = 0;	/* fallthrough to "kmem_free" */
828 		else if (code & IBT_ERROR_CATASTROPHIC_EEC) {
829 			code = IBT_ERROR_CATASTROPHIC_CHAN;
830 			async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena;
831 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) {
832 			code = IBT_ERROR_PATH_MIGRATE_REQ;
833 			async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena;
834 		} else if (code & IBT_EVENT_PATH_MIGRATED_EEC)
835 			code = IBT_EVENT_PATH_MIGRATED;
836 		else if (code & IBT_EVENT_COM_EST_EEC)
837 			code = IBT_EVENT_COM_EST;
838 		else {
839 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: "
840 			    "async: unexpected code 0x%x", code);
841 			ibtl_eec->eec_async_codes = 0;
842 			code = 0;
843 		}
844 		ibtl_eec->eec_async_codes &= ~code;
845 
846 		if (code) {
847 			mutex_exit(&ibtl_async_mutex);
848 			ibtl_async_client_call(ibtl_eec->eec_hca,
849 			    code, &async_event);
850 			mutex_enter(&ibtl_async_mutex);
851 		}
852 
853 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) {
854 			mutex_exit(&ibtl_async_mutex);
855 			kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
856 			mutex_enter(&ibtl_async_mutex);
857 			return;
858 		}
859 	}
860 	ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING;
861 }
862 
863 #ifdef __lock_lint
864 kmutex_t cpr_mutex;
865 #endif
866 
867 /*
868  * Loop forever, calling async_handlers until all of the async lists
869  * are empty.
870  */
871 
872 static void
873 ibtl_async_thread(void)
874 {
875 #ifndef __lock_lint
876 	kmutex_t cpr_mutex;
877 #endif
878 	callb_cpr_t	cprinfo;
879 
880 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
881 	_NOTE(NO_COMPETING_THREADS_NOW)
882 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
883 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
884 	    "ibtl_async_thread");
885 #ifndef lint
886 	_NOTE(COMPETING_THREADS_NOW)
887 #endif
888 
889 	mutex_enter(&ibtl_async_mutex);
890 
891 	for (;;) {
892 		if (ibtl_async_hca_list_start) {
893 			ibtl_hca_devinfo_t *hca_devp;
894 
895 			/* remove first entry from list */
896 			hca_devp = ibtl_async_hca_list_start;
897 			ibtl_async_hca_list_start = hca_devp->hd_async_link;
898 			hca_devp->hd_async_link = NULL;
899 			if (ibtl_async_hca_list_start == NULL)
900 				ibtl_async_hca_list_end = NULL;
901 
902 			ibtl_do_hca_asyncs(hca_devp);
903 
904 		} else if (ibtl_async_qp_list_start) {
905 			ibtl_qp_t *ibtl_qp;
906 
907 			/* remove from list */
908 			ibtl_qp = ibtl_async_qp_list_start;
909 			ibtl_async_qp_list_start = ibtl_qp->qp_async_link;
910 			ibtl_qp->qp_async_link = NULL;
911 			if (ibtl_async_qp_list_start == NULL)
912 				ibtl_async_qp_list_end = NULL;
913 
914 			ibtl_do_qp_asyncs(ibtl_qp);
915 
916 		} else if (ibtl_async_srq_list_start) {
917 			ibtl_srq_t *ibtl_srq;
918 
919 			/* remove from list */
920 			ibtl_srq = ibtl_async_srq_list_start;
921 			ibtl_async_srq_list_start = ibtl_srq->srq_async_link;
922 			ibtl_srq->srq_async_link = NULL;
923 			if (ibtl_async_srq_list_start == NULL)
924 				ibtl_async_srq_list_end = NULL;
925 
926 			ibtl_do_srq_asyncs(ibtl_srq);
927 
928 		} else if (ibtl_async_eec_list_start) {
929 			ibtl_eec_t *ibtl_eec;
930 
931 			/* remove from list */
932 			ibtl_eec = ibtl_async_eec_list_start;
933 			ibtl_async_eec_list_start = ibtl_eec->eec_async_link;
934 			ibtl_eec->eec_async_link = NULL;
935 			if (ibtl_async_eec_list_start == NULL)
936 				ibtl_async_eec_list_end = NULL;
937 
938 			ibtl_do_eec_asyncs(ibtl_eec);
939 
940 		} else if (ibtl_async_cq_list_start) {
941 			ibtl_cq_t *ibtl_cq;
942 
943 			/* remove from list */
944 			ibtl_cq = ibtl_async_cq_list_start;
945 			ibtl_async_cq_list_start = ibtl_cq->cq_async_link;
946 			ibtl_cq->cq_async_link = NULL;
947 			if (ibtl_async_cq_list_start == NULL)
948 				ibtl_async_cq_list_end = NULL;
949 
950 			ibtl_do_cq_asyncs(ibtl_cq);
951 
952 		} else {
953 			if (ibtl_async_thread_exit == IBTL_THREAD_EXIT)
954 				break;
955 			mutex_enter(&cpr_mutex);
956 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
957 			mutex_exit(&cpr_mutex);
958 
959 			cv_wait(&ibtl_async_cv, &ibtl_async_mutex);
960 
961 			mutex_exit(&ibtl_async_mutex);
962 			mutex_enter(&cpr_mutex);
963 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
964 			mutex_exit(&cpr_mutex);
965 			mutex_enter(&ibtl_async_mutex);
966 		}
967 	}
968 
969 	mutex_exit(&ibtl_async_mutex);
970 
971 #ifndef __lock_lint
972 	mutex_enter(&cpr_mutex);
973 	CALLB_CPR_EXIT(&cprinfo);
974 #endif
975 	mutex_destroy(&cpr_mutex);
976 }
977 
978 
979 void
980 ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp)
981 {
982 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp);
983 
984 	mutex_enter(&ibtl_async_mutex);
985 
986 	/*
987 	 * If there is an active async, mark this object to be freed
988 	 * by the async_thread when it's done.
989 	 */
990 	if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) {
991 		ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT;
992 		mutex_exit(&ibtl_async_mutex);
993 	} else {	/* free the object now */
994 		mutex_exit(&ibtl_async_mutex);
995 		cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
996 		mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
997 		kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t));
998 	}
999 }
1000 
1001 void
1002 ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq)
1003 {
1004 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq);
1005 
1006 	mutex_enter(&ibtl_async_mutex);
1007 
1008 	/* if there is an active async, mark this object to be freed */
1009 	if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) {
1010 		ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1011 		mutex_exit(&ibtl_async_mutex);
1012 	} else {	/* free the object now */
1013 		mutex_exit(&ibtl_async_mutex);
1014 		mutex_destroy(&ibtl_cq->cq_mutex);
1015 		kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
1016 	}
1017 }
1018 
1019 void
1020 ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq)
1021 {
1022 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)",
1023 	    ibtl_srq);
1024 
1025 	mutex_enter(&ibtl_async_mutex);
1026 
1027 	/* if there is an active async, mark this object to be freed */
1028 	if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) {
1029 		ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1030 		mutex_exit(&ibtl_async_mutex);
1031 	} else {	/* free the object now */
1032 		mutex_exit(&ibtl_async_mutex);
1033 		kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
1034 	}
1035 }
1036 
1037 void
1038 ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec)
1039 {
1040 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)",
1041 	    ibtl_eec);
1042 
1043 	mutex_enter(&ibtl_async_mutex);
1044 
1045 	/* if there is an active async, mark this object to be freed */
1046 	if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) {
1047 		ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1048 		mutex_exit(&ibtl_async_mutex);
1049 	} else {	/* free the object now */
1050 		mutex_exit(&ibtl_async_mutex);
1051 		kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
1052 	}
1053 }
1054 
1055 /*
1056  * This function differs from above in that we assume this is called
1057  * from non-interrupt context, and never called from the async_thread.
1058  */
1059 
1060 void
1061 ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca)
1062 {
1063 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)",
1064 	    ibt_hca);
1065 
1066 	mutex_enter(&ibtl_async_mutex);
1067 
1068 	/* if there is an active async, mark this object to be freed */
1069 	if (ibt_hca->ha_async_cnt > 0) {
1070 		ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1071 		mutex_exit(&ibtl_async_mutex);
1072 	} else {	/* free the object now */
1073 		mutex_exit(&ibtl_async_mutex);
1074 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
1075 	}
1076 }
1077 
1078 /*
1079  * Completion Queue Handling.
1080  *
1081  *	A completion queue can be handled through a simple callback
1082  *	at interrupt level, or it may be queued for an ibtl_cq_thread
1083  *	to handle.  The latter is chosen during ibt_alloc_cq when the
1084  *	IBTF_CQ_HANDLER_IN_THREAD is specified.
1085  */
1086 
1087 static void
1088 ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq)
1089 {
1090 	ibt_cq_handler_t	cq_handler;
1091 	void			*arg;
1092 
1093 	IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq);
1094 
1095 	mutex_enter(&ibtl_cq->cq_mutex);
1096 	cq_handler = ibtl_cq->cq_comp_handler;
1097 	arg = ibtl_cq->cq_arg;
1098 	if (cq_handler != NULL)
1099 		cq_handler(ibtl_cq, arg);
1100 	else
1101 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: "
1102 		    "no cq_handler for cq %p", ibtl_cq);
1103 	mutex_exit(&ibtl_cq->cq_mutex);
1104 }
1105 
1106 /*
1107  * Before ibt_free_cq can continue, we need to ensure no more cq_handler
1108  * callbacks can occur.  When we get the mutex, we know there are no
1109  * outstanding cq_handler callbacks.  We set the cq_handler to NULL to
1110  * prohibit future callbacks.
1111  */
1112 void
1113 ibtl_free_cq_check(ibtl_cq_t *ibtl_cq)
1114 {
1115 	mutex_enter(&ibtl_cq->cq_mutex);
1116 	ibtl_cq->cq_comp_handler = NULL;
1117 	mutex_exit(&ibtl_cq->cq_mutex);
1118 	if (ibtl_cq->cq_in_thread) {
1119 		mutex_enter(&ibtl_cq_mutex);
1120 		--ibtl_cqs_using_threads;
1121 		while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) {
1122 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1123 			ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE;
1124 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1125 		}
1126 		mutex_exit(&ibtl_cq_mutex);
1127 	}
1128 }
1129 
1130 /*
1131  * Loop forever, calling cq_handlers until the cq list
1132  * is empty.
1133  */
1134 
1135 static void
1136 ibtl_cq_thread(void)
1137 {
1138 #ifndef __lock_lint
1139 	kmutex_t cpr_mutex;
1140 #endif
1141 	callb_cpr_t	cprinfo;
1142 
1143 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
1144 	_NOTE(NO_COMPETING_THREADS_NOW)
1145 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
1146 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
1147 	    "ibtl_cq_thread");
1148 #ifndef lint
1149 	_NOTE(COMPETING_THREADS_NOW)
1150 #endif
1151 
1152 	mutex_enter(&ibtl_cq_mutex);
1153 
1154 	for (;;) {
1155 		if (ibtl_cq_list_start) {
1156 			ibtl_cq_t *ibtl_cq;
1157 
1158 			ibtl_cq = ibtl_cq_list_start;
1159 			ibtl_cq_list_start = ibtl_cq->cq_link;
1160 			ibtl_cq->cq_link = NULL;
1161 			if (ibtl_cq == ibtl_cq_list_end)
1162 				ibtl_cq_list_end = NULL;
1163 
1164 			while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) {
1165 				ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1166 				mutex_exit(&ibtl_cq_mutex);
1167 				ibtl_cq_handler_call(ibtl_cq);
1168 				mutex_enter(&ibtl_cq_mutex);
1169 			}
1170 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING;
1171 			if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE)
1172 				cv_broadcast(&ibtl_cq_cv);
1173 		} else {
1174 			if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT)
1175 				break;
1176 			mutex_enter(&cpr_mutex);
1177 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1178 			mutex_exit(&cpr_mutex);
1179 
1180 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1181 
1182 			mutex_exit(&ibtl_cq_mutex);
1183 			mutex_enter(&cpr_mutex);
1184 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
1185 			mutex_exit(&cpr_mutex);
1186 			mutex_enter(&ibtl_cq_mutex);
1187 		}
1188 	}
1189 
1190 	mutex_exit(&ibtl_cq_mutex);
1191 #ifndef __lock_lint
1192 	mutex_enter(&cpr_mutex);
1193 	CALLB_CPR_EXIT(&cprinfo);
1194 #endif
1195 	mutex_destroy(&cpr_mutex);
1196 }
1197 
1198 
1199 /*
1200  * ibc_cq_handler()
1201  *
1202  *    Completion Queue Notification Handler.
1203  *
1204  */
1205 /*ARGSUSED*/
1206 void
1207 ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq)
1208 {
1209 	IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)",
1210 	    ibc_hdl, ibtl_cq);
1211 
1212 	if (ibtl_cq->cq_in_thread) {
1213 		mutex_enter(&ibtl_cq_mutex);
1214 		ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT;
1215 		if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) {
1216 			ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING;
1217 			ibtl_cq->cq_link = NULL;
1218 			if (ibtl_cq_list_end == NULL)
1219 				ibtl_cq_list_start = ibtl_cq;
1220 			else
1221 				ibtl_cq_list_end->cq_link = ibtl_cq;
1222 			ibtl_cq_list_end = ibtl_cq;
1223 			cv_signal(&ibtl_cq_cv);
1224 		}
1225 		mutex_exit(&ibtl_cq_mutex);
1226 		return;
1227 	} else
1228 		ibtl_cq_handler_call(ibtl_cq);
1229 }
1230 
1231 
1232 /*
1233  * ibt_enable_cq_notify()
1234  *      Enable Notification requests on the specified CQ.
1235  *
1236  *      ibt_cq          The CQ handle.
1237  *
1238  *      notify_type     Enable notifications for all (IBT_NEXT_COMPLETION)
1239  *                      completions, or the next Solicited completion
1240  *                      (IBT_NEXT_SOLICITED) only.
1241  *
1242  *	Completion notifications are disabled by setting the completion
1243  *	handler to NULL by calling ibt_set_cq_handler().
1244  */
1245 ibt_status_t
1246 ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type)
1247 {
1248 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)",
1249 	    ibtl_cq, notify_type);
1250 
1251 	return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq(
1252 	    IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type));
1253 }
1254 
1255 
1256 /*
1257  * ibt_set_cq_handler()
1258  *      Register a work request completion handler with the IBTF.
1259  *
1260  *      ibt_cq                  The CQ handle.
1261  *
1262  *      completion_handler      The completion handler.
1263  *
1264  *      arg                     The IBTF client private argument to be passed
1265  *                              back to the client when calling the CQ
1266  *                              completion handler.
1267  *
1268  *	Completion notifications are disabled by setting the completion
1269  *	handler to NULL.  When setting the handler to NULL, no additional
1270  *	calls to the previous CQ handler will be initiated, but there may
1271  *	be one in progress.
1272  *
1273  *      This function does not otherwise change the state of previous
1274  *      calls to ibt_enable_cq_notify().
1275  */
1276 void
1277 ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler,
1278     void *arg)
1279 {
1280 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)",
1281 	    ibtl_cq, completion_handler, arg);
1282 
1283 	mutex_enter(&ibtl_cq->cq_mutex);
1284 	ibtl_cq->cq_comp_handler = completion_handler;
1285 	ibtl_cq->cq_arg = arg;
1286 	mutex_exit(&ibtl_cq->cq_mutex);
1287 }
1288 
1289 
1290 /*
1291  * Inform IBT clients about New HCAs.
1292  *
1293  *	We use taskqs to allow simultaneous notification, with sleeping.
1294  *	Since taskqs only allow one argument, we define a structure
1295  *	because we need to pass in two arguments.
1296  */
1297 
1298 struct ibtl_new_hca_s {
1299 	ibtl_clnt_t		*nh_clntp;
1300 	ibtl_hca_devinfo_t	*nh_hca_devp;
1301 	ibt_async_code_t	nh_code;
1302 };
1303 
1304 static void
1305 ibtl_tell_client_about_new_hca(void *arg)
1306 {
1307 	struct ibtl_new_hca_s	*new_hcap = (struct ibtl_new_hca_s *)arg;
1308 	ibtl_clnt_t		*clntp = new_hcap->nh_clntp;
1309 	ibt_async_event_t	async_event;
1310 	ibtl_hca_devinfo_t	*hca_devp = new_hcap->nh_hca_devp;
1311 
1312 	bzero(&async_event, sizeof (async_event));
1313 	async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
1314 	clntp->clnt_modinfop->mi_async_handler(
1315 	    clntp->clnt_private, NULL, new_hcap->nh_code, &async_event);
1316 	kmem_free(new_hcap, sizeof (*new_hcap));
1317 #ifdef __lock_lint
1318 	{
1319 		ibt_hca_hdl_t hca_hdl;
1320 		(void) ibt_open_hca(clntp, 0ULL, &hca_hdl);
1321 	}
1322 #endif
1323 	mutex_enter(&ibtl_clnt_list_mutex);
1324 	if (--hca_devp->hd_async_task_cnt == 0)
1325 		cv_signal(&hca_devp->hd_async_task_cv);
1326 	if (--clntp->clnt_async_cnt == 0)
1327 		cv_broadcast(&ibtl_clnt_cv);
1328 	mutex_exit(&ibtl_clnt_list_mutex);
1329 }
1330 
1331 /*
1332  * ibtl_announce_new_hca:
1333  *
1334  *	o First attach these clients in the given order
1335  *		IBMA
1336  *		IBCM
1337  *
1338  *	o Next attach all other clients in parallel.
1339  *
1340  * NOTE: Use the taskq to simultaneously notify all clients of the new HCA.
1341  * Retval from clients is ignored.
1342  */
1343 void
1344 ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp)
1345 {
1346 	ibtl_clnt_t		*clntp;
1347 	struct ibtl_new_hca_s	*new_hcap;
1348 
1349 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)",
1350 	    hca_devp, hca_devp->hd_hca_attr->hca_node_guid);
1351 
1352 	mutex_enter(&ibtl_clnt_list_mutex);
1353 
1354 	clntp = ibtl_clnt_list;
1355 	while (clntp != NULL) {
1356 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1357 			IBTF_DPRINTF_L4(ibtf_handlers,
1358 			    "ibtl_announce_new_hca: calling IBMF");
1359 			if (clntp->clnt_modinfop->mi_async_handler) {
1360 				_NOTE(NO_COMPETING_THREADS_NOW)
1361 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1362 				    KM_SLEEP);
1363 				new_hcap->nh_clntp = clntp;
1364 				new_hcap->nh_hca_devp = hca_devp;
1365 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1366 #ifndef lint
1367 				_NOTE(COMPETING_THREADS_NOW)
1368 #endif
1369 				clntp->clnt_async_cnt++;
1370 				hca_devp->hd_async_task_cnt++;
1371 
1372 				(void) taskq_dispatch(ibtl_async_taskq,
1373 				    ibtl_tell_client_about_new_hca, new_hcap,
1374 				    TQ_SLEEP);
1375 			}
1376 			break;
1377 		}
1378 		clntp = clntp->clnt_list_link;
1379 	}
1380 	if (clntp != NULL)
1381 		while (clntp->clnt_async_cnt > 0)
1382 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1383 	clntp = ibtl_clnt_list;
1384 	while (clntp != NULL) {
1385 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1386 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1387 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1388 			if (clntp->clnt_modinfop->mi_async_handler) {
1389 				_NOTE(NO_COMPETING_THREADS_NOW)
1390 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1391 				    KM_SLEEP);
1392 				new_hcap->nh_clntp = clntp;
1393 				new_hcap->nh_hca_devp = hca_devp;
1394 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1395 #ifndef lint
1396 				_NOTE(COMPETING_THREADS_NOW)
1397 #endif
1398 				clntp->clnt_async_cnt++;
1399 				hca_devp->hd_async_task_cnt++;
1400 
1401 				mutex_exit(&ibtl_clnt_list_mutex);
1402 				(void) ibtl_tell_client_about_new_hca(
1403 				    new_hcap);
1404 				mutex_enter(&ibtl_clnt_list_mutex);
1405 			}
1406 			break;
1407 		}
1408 		clntp = clntp->clnt_list_link;
1409 	}
1410 
1411 	clntp = ibtl_clnt_list;
1412 	while (clntp != NULL) {
1413 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) {
1414 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1415 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1416 			if (clntp->clnt_modinfop->mi_async_handler) {
1417 				_NOTE(NO_COMPETING_THREADS_NOW)
1418 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1419 				    KM_SLEEP);
1420 				new_hcap->nh_clntp = clntp;
1421 				new_hcap->nh_hca_devp = hca_devp;
1422 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1423 #ifndef lint
1424 				_NOTE(COMPETING_THREADS_NOW)
1425 #endif
1426 				clntp->clnt_async_cnt++;
1427 				hca_devp->hd_async_task_cnt++;
1428 
1429 				(void) taskq_dispatch(ibtl_async_taskq,
1430 				    ibtl_tell_client_about_new_hca, new_hcap,
1431 				    TQ_SLEEP);
1432 			}
1433 			break;
1434 		}
1435 		clntp = clntp->clnt_list_link;
1436 	}
1437 	if (clntp != NULL)
1438 		while (clntp->clnt_async_cnt > 0)
1439 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1440 	clntp = ibtl_clnt_list;
1441 	while (clntp != NULL) {
1442 		if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) &&
1443 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_CM) &&
1444 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) {
1445 			IBTF_DPRINTF_L4(ibtf_handlers,
1446 			    "ibtl_announce_new_hca: Calling %s ",
1447 			    clntp->clnt_modinfop->mi_clnt_name);
1448 			if (clntp->clnt_modinfop->mi_async_handler) {
1449 				_NOTE(NO_COMPETING_THREADS_NOW)
1450 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1451 				    KM_SLEEP);
1452 				new_hcap->nh_clntp = clntp;
1453 				new_hcap->nh_hca_devp = hca_devp;
1454 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1455 #ifndef lint
1456 				_NOTE(COMPETING_THREADS_NOW)
1457 #endif
1458 				clntp->clnt_async_cnt++;
1459 				hca_devp->hd_async_task_cnt++;
1460 
1461 				(void) taskq_dispatch(ibtl_async_taskq,
1462 				    ibtl_tell_client_about_new_hca, new_hcap,
1463 				    TQ_SLEEP);
1464 			}
1465 		}
1466 		clntp = clntp->clnt_list_link;
1467 	}
1468 
1469 	/* wait for all tasks to complete */
1470 	while (hca_devp->hd_async_task_cnt != 0)
1471 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1472 
1473 	/* wakeup thread that may be waiting to send an HCA async */
1474 	ASSERT(hca_devp->hd_async_busy == 1);
1475 	hca_devp->hd_async_busy = 0;
1476 	cv_broadcast(&hca_devp->hd_async_busy_cv);
1477 	mutex_exit(&ibtl_clnt_list_mutex);
1478 }
1479 
1480 /*
1481  * ibtl_detach_all_clients:
1482  *
1483  *	Return value - 0 for Success, 1 for Failure
1484  *
1485  *	o First detach general clients.
1486  *
1487  *	o Next detach these clients
1488  *		IBCM
1489  *		IBDM
1490  *
1491  *	o Finally, detach this client
1492  *		IBMA
1493  */
1494 int
1495 ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp)
1496 {
1497 	ib_guid_t		hcaguid = hca_devp->hd_hca_attr->hca_node_guid;
1498 	ibtl_hca_t		*ibt_hca;
1499 	ibtl_clnt_t		*clntp;
1500 	int			retval;
1501 
1502 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)",
1503 	    hcaguid);
1504 
1505 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1506 
1507 	while (hca_devp->hd_async_busy)
1508 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
1509 	hca_devp->hd_async_busy = 1;
1510 
1511 	/* First inform general clients asynchronously */
1512 	hca_devp->hd_async_event.ev_hca_guid = hcaguid;
1513 	hca_devp->hd_async_event.ev_fma_ena = 0;
1514 	hca_devp->hd_async_event.ev_chan_hdl = NULL;
1515 	hca_devp->hd_async_event.ev_cq_hdl = NULL;
1516 	hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT;
1517 
1518 	ibt_hca = hca_devp->hd_clnt_list;
1519 	while (ibt_hca != NULL) {
1520 		clntp = ibt_hca->ha_clnt_devp;
1521 		if (IBTL_GENERIC_CLIENT(clntp)) {
1522 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1523 			mutex_enter(&ibtl_async_mutex);
1524 			ibt_hca->ha_async_cnt++;
1525 			mutex_exit(&ibtl_async_mutex);
1526 			hca_devp->hd_async_task_cnt++;
1527 
1528 			(void) taskq_dispatch(ibtl_async_taskq,
1529 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1530 		}
1531 		ibt_hca = ibt_hca->ha_clnt_link;
1532 	}
1533 
1534 	/* wait for all clients to complete */
1535 	while (hca_devp->hd_async_task_cnt != 0) {
1536 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1537 	}
1538 	/* Go thru the clients and check if any have not closed this HCA. */
1539 	retval = 0;
1540 	ibt_hca = hca_devp->hd_clnt_list;
1541 	while (ibt_hca != NULL) {
1542 		clntp = ibt_hca->ha_clnt_devp;
1543 		if (IBTL_GENERIC_CLIENT(clntp)) {
1544 			IBTF_DPRINTF_L2(ibtf_handlers,
1545 			    "ibtl_detach_all_clients: "
1546 			    "client '%s' failed to close the HCA.",
1547 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1548 			retval = 1;
1549 		}
1550 		ibt_hca = ibt_hca->ha_clnt_link;
1551 	}
1552 	if (retval == 1)
1553 		goto bailout;
1554 
1555 	/* Next inform IBDM asynchronously */
1556 	ibt_hca = hca_devp->hd_clnt_list;
1557 	while (ibt_hca != NULL) {
1558 		clntp = ibt_hca->ha_clnt_devp;
1559 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1560 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1561 			mutex_enter(&ibtl_async_mutex);
1562 			ibt_hca->ha_async_cnt++;
1563 			mutex_exit(&ibtl_async_mutex);
1564 			hca_devp->hd_async_task_cnt++;
1565 
1566 			mutex_exit(&ibtl_clnt_list_mutex);
1567 			ibtl_hca_client_async_task(ibt_hca);
1568 			mutex_enter(&ibtl_clnt_list_mutex);
1569 			break;
1570 		}
1571 		ibt_hca = ibt_hca->ha_clnt_link;
1572 	}
1573 
1574 	/*
1575 	 * Next inform IBCM.
1576 	 * As IBCM doesn't perform ibt_open_hca(), IBCM will not be
1577 	 * accessible via hca_devp->hd_clnt_list.
1578 	 * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered.
1579 	 */
1580 	if (ibtl_cm_async_handler) {
1581 		ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
1582 		    ibtl_cm_clnt_private);
1583 
1584 		/* wait for all tasks to complete */
1585 		while (hca_devp->hd_async_task_cnt != 0)
1586 			cv_wait(&hca_devp->hd_async_task_cv,
1587 			    &ibtl_clnt_list_mutex);
1588 	}
1589 
1590 	/* Go thru the clients and check if any have not closed this HCA. */
1591 	retval = 0;
1592 	ibt_hca = hca_devp->hd_clnt_list;
1593 	while (ibt_hca != NULL) {
1594 		clntp = ibt_hca->ha_clnt_devp;
1595 		if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) {
1596 			IBTF_DPRINTF_L2(ibtf_handlers,
1597 			    "ibtl_detach_all_clients: "
1598 			    "client '%s' failed to close the HCA.",
1599 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1600 			retval = 1;
1601 		}
1602 		ibt_hca = ibt_hca->ha_clnt_link;
1603 	}
1604 	if (retval == 1)
1605 		goto bailout;
1606 
1607 	/* Finally, inform IBMA */
1608 	ibt_hca = hca_devp->hd_clnt_list;
1609 	while (ibt_hca != NULL) {
1610 		clntp = ibt_hca->ha_clnt_devp;
1611 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1612 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1613 			mutex_enter(&ibtl_async_mutex);
1614 			ibt_hca->ha_async_cnt++;
1615 			mutex_exit(&ibtl_async_mutex);
1616 			hca_devp->hd_async_task_cnt++;
1617 
1618 			(void) taskq_dispatch(ibtl_async_taskq,
1619 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1620 		} else
1621 			IBTF_DPRINTF_L2(ibtf_handlers,
1622 			    "ibtl_detach_all_clients: "
1623 			    "client '%s' is unexpectedly on the client list",
1624 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1625 		ibt_hca = ibt_hca->ha_clnt_link;
1626 	}
1627 
1628 	/* wait for IBMA to complete */
1629 	while (hca_devp->hd_async_task_cnt != 0) {
1630 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1631 	}
1632 
1633 	/* Check if this HCA's client list is empty. */
1634 	ibt_hca = hca_devp->hd_clnt_list;
1635 	if (ibt_hca != NULL) {
1636 		IBTF_DPRINTF_L2(ibtf_handlers,
1637 		    "ibtl_detach_all_clients: "
1638 		    "client '%s' failed to close the HCA.",
1639 		    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1640 		retval = 1;
1641 	} else
1642 		retval = 0;
1643 
1644 bailout:
1645 	if (retval) {
1646 		hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */
1647 		mutex_exit(&ibtl_clnt_list_mutex);
1648 		ibtl_announce_new_hca(hca_devp);
1649 		mutex_enter(&ibtl_clnt_list_mutex);
1650 	} else {
1651 		hca_devp->hd_async_busy = 0;
1652 		cv_broadcast(&hca_devp->hd_async_busy_cv);
1653 	}
1654 
1655 	return (retval);
1656 }
1657 
1658 void
1659 ibtl_free_clnt_async_check(ibtl_clnt_t *clntp)
1660 {
1661 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp);
1662 
1663 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1664 
1665 	/* wait for all asyncs based on "ibtl_clnt_list" to complete */
1666 	while (clntp->clnt_async_cnt != 0) {
1667 		cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1668 	}
1669 }
1670 
1671 static void
1672 ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp)
1673 {
1674 	mutex_enter(&ibtl_clnt_list_mutex);
1675 	if (--clntp->clnt_async_cnt == 0) {
1676 		cv_broadcast(&ibtl_clnt_cv);
1677 	}
1678 	mutex_exit(&ibtl_clnt_list_mutex);
1679 }
1680 
1681 static void
1682 ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp)
1683 {
1684 	mutex_enter(&ibtl_clnt_list_mutex);
1685 	++clntp->clnt_async_cnt;
1686 	mutex_exit(&ibtl_clnt_list_mutex);
1687 }
1688 
1689 
1690 /*
1691  * Functions and data structures to inform clients that a notification
1692  * has occurred about Multicast Groups that might interest them.
1693  */
1694 struct ibtl_sm_notice {
1695 	ibt_clnt_hdl_t		np_ibt_hdl;
1696 	ib_gid_t		np_sgid;
1697 	ibt_subnet_event_code_t	np_code;
1698 	ibt_subnet_event_t	np_event;
1699 };
1700 
1701 static void
1702 ibtl_sm_notice_task(void *arg)
1703 {
1704 	struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg;
1705 	ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl;
1706 	ibt_sm_notice_handler_t sm_notice_handler;
1707 
1708 	sm_notice_handler = ibt_hdl->clnt_sm_trap_handler;
1709 	if (sm_notice_handler != NULL)
1710 		sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg,
1711 		    noticep->np_sgid, noticep->np_code, &noticep->np_event);
1712 	kmem_free(noticep, sizeof (*noticep));
1713 	ibtl_dec_clnt_async_cnt(ibt_hdl);
1714 }
1715 
1716 /*
1717  * Inform the client that MCG notices are not working at this time.
1718  */
1719 void
1720 ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail)
1721 {
1722 	ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl;
1723 	struct ibtl_sm_notice *noticep;
1724 	ib_gid_t *sgidp = &ifail->smf_sgid[0];
1725 	int i;
1726 
1727 	for (i = 0; i < ifail->smf_num_sgids; i++) {
1728 		_NOTE(NO_COMPETING_THREADS_NOW)
1729 		noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1730 		noticep->np_ibt_hdl = ibt_hdl;
1731 		noticep->np_sgid = *sgidp++;
1732 		noticep->np_code = IBT_SM_EVENT_UNAVAILABLE;
1733 #ifndef lint
1734 		_NOTE(COMPETING_THREADS_NOW)
1735 #endif
1736 		ibtl_inc_clnt_async_cnt(ibt_hdl);
1737 		(void) taskq_dispatch(ibtl_async_taskq,
1738 		    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1739 	}
1740 }
1741 
1742 /*
1743  * Inform all clients of the event.
1744  */
1745 void
1746 ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code,
1747     ibt_subnet_event_t *event)
1748 {
1749 	_NOTE(NO_COMPETING_THREADS_NOW)
1750 	struct ibtl_sm_notice	*noticep;
1751 	ibtl_clnt_t		*clntp;
1752 
1753 	mutex_enter(&ibtl_clnt_list_mutex);
1754 	clntp = ibtl_clnt_list;
1755 	while (clntp != NULL) {
1756 		if (clntp->clnt_sm_trap_handler) {
1757 			noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1758 			noticep->np_ibt_hdl = clntp;
1759 			noticep->np_sgid = sgid;
1760 			noticep->np_code = code;
1761 			noticep->np_event = *event;
1762 			++clntp->clnt_async_cnt;
1763 			(void) taskq_dispatch(ibtl_async_taskq,
1764 			    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1765 		}
1766 		clntp = clntp->clnt_list_link;
1767 	}
1768 	mutex_exit(&ibtl_clnt_list_mutex);
1769 #ifndef lint
1770 	_NOTE(COMPETING_THREADS_NOW)
1771 #endif
1772 }
1773 
1774 /*
1775  * Record the handler for this client.
1776  */
1777 void
1778 ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl,
1779     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1780 {
1781 	_NOTE(NO_COMPETING_THREADS_NOW)
1782 	ibt_hdl->clnt_sm_trap_handler = sm_notice_handler;
1783 	ibt_hdl->clnt_sm_trap_handler_arg = private;
1784 #ifndef lint
1785 	_NOTE(COMPETING_THREADS_NOW)
1786 #endif
1787 }
1788 
1789 
1790 /*
1791  * ibtl_another_cq_handler_in_thread()
1792  *
1793  * Conditionally increase the number of cq_threads.
1794  * The number of threads grows, based on the number of cqs using threads.
1795  *
1796  * The table below controls the number of threads as follows:
1797  *
1798  *	Number of CQs	Number of cq_threads
1799  *		0		0
1800  *		1		1
1801  *		2-3		2
1802  *		4-5		3
1803  *		6-9		4
1804  *		10-15		5
1805  *		16-23		6
1806  *		24-31		7
1807  *		32+		8
1808  */
1809 
1810 #define	IBTL_CQ_MAXTHREADS 8
1811 static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = {
1812 	1, 2, 4, 6, 10, 16, 24, 32
1813 };
1814 
1815 static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS];
1816 
1817 void
1818 ibtl_another_cq_handler_in_thread(void)
1819 {
1820 	kthread_t *t;
1821 	int my_idx;
1822 
1823 	mutex_enter(&ibtl_cq_mutex);
1824 	if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) ||
1825 	    (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) {
1826 		mutex_exit(&ibtl_cq_mutex);
1827 		return;
1828 	}
1829 	my_idx = ibtl_cq_threads++;
1830 	mutex_exit(&ibtl_cq_mutex);
1831 	t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN,
1832 	    ibtl_pri - 1);
1833 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1834 	ibtl_cq_did[my_idx] = t->t_did;	/* save for thread_join() */
1835 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1836 }
1837 
1838 void
1839 ibtl_thread_init(void)
1840 {
1841 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()");
1842 
1843 	mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL);
1844 	cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL);
1845 	cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL);
1846 
1847 	mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL);
1848 	cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL);
1849 }
1850 
1851 void
1852 ibtl_thread_init2(void)
1853 {
1854 	int i;
1855 	static int initted = 0;
1856 	kthread_t *t;
1857 
1858 	mutex_enter(&ibtl_async_mutex);
1859 	if (initted == 1) {
1860 		mutex_exit(&ibtl_async_mutex);
1861 		return;
1862 	}
1863 	initted = 1;
1864 	mutex_exit(&ibtl_async_mutex);
1865 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1866 	ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t),
1867 	    KM_SLEEP);
1868 
1869 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()");
1870 
1871 	for (i = 0; i < ibtl_async_thread_init; i++) {
1872 		t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0,
1873 		    TS_RUN, ibtl_pri - 1);
1874 		ibtl_async_did[i] = t->t_did; /* thread_join() */
1875 	}
1876 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1877 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1878 	for (i = 0; i < ibtl_cq_threads; i++) {
1879 		t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0,
1880 		    TS_RUN, ibtl_pri - 1);
1881 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1882 		ibtl_cq_did[i] = t->t_did; /* save for thread_join() */
1883 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1884 	}
1885 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1886 }
1887 
1888 void
1889 ibtl_thread_fini(void)
1890 {
1891 	int i;
1892 
1893 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()");
1894 
1895 	/* undo the work done by ibtl_thread_init() */
1896 
1897 	mutex_enter(&ibtl_cq_mutex);
1898 	ibtl_cq_thread_exit = IBTL_THREAD_EXIT;
1899 	cv_broadcast(&ibtl_cq_cv);
1900 	mutex_exit(&ibtl_cq_mutex);
1901 
1902 	mutex_enter(&ibtl_async_mutex);
1903 	ibtl_async_thread_exit = IBTL_THREAD_EXIT;
1904 	cv_broadcast(&ibtl_async_cv);
1905 	mutex_exit(&ibtl_async_mutex);
1906 
1907 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1908 	for (i = 0; i < ibtl_cq_threads; i++)
1909 		thread_join(ibtl_cq_did[i]);
1910 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1911 
1912 	if (ibtl_async_did) {
1913 		for (i = 0; i < ibtl_async_thread_init; i++)
1914 			thread_join(ibtl_async_did[i]);
1915 
1916 		kmem_free(ibtl_async_did,
1917 		    ibtl_async_thread_init * sizeof (kt_did_t));
1918 	}
1919 	mutex_destroy(&ibtl_cq_mutex);
1920 	cv_destroy(&ibtl_cq_cv);
1921 
1922 	mutex_destroy(&ibtl_async_mutex);
1923 	cv_destroy(&ibtl_async_cv);
1924 	cv_destroy(&ibtl_clnt_cv);
1925 }
1926