xref: /illumos-gate/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c (revision 3a7782fe8269426104107f8b4144794a995733f0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ib/ibtl/impl/ibtl.h>
27 #include <sys/ib/ibtl/impl/ibtl_cm.h>
28 #include <sys/taskq.h>
29 #include <sys/disp.h>
30 #include <sys/callb.h>
31 #include <sys/proc.h>
32 
33 /*
34  * ibtl_handlers.c
35  */
36 
37 /*
38  * What's in this file?
39  *
40  *   This file started as an implementation of Asynchronous Event/Error
41  *   handling and Completion Queue handling.  As the implementation
42  *   evolved, code has been added for other ibc_* interfaces (resume,
43  *   predetach, etc.) that use the same mechanisms as used for asyncs.
44  *
45  * Async and CQ handling at interrupt level.
46  *
47  *   CQ handling is normally done at interrupt level using the CQ callback
48  *   handler to call the appropriate IBT Client (owner of the CQ).  For
49  *   clients that would prefer a fully flexible non-interrupt context to
50  *   do their CQ handling, a CQ can be created so that its handler is
51  *   called from a non-interrupt thread.  CQ handling is done frequently
52  *   whereas Async handling is expected to occur very infrequently.
53  *
54  *   Async handling is done by marking (or'ing in of an async_code of) the
55  *   pertinent IBTL data structure, and then notifying the async_thread(s)
56  *   that the data structure has async work to be done.  The notification
57  *   occurs by linking the data structure through its async_link onto a
58  *   list of like data structures and waking up an async_thread.  This
59  *   list append is not done if there is already async work pending on
60  *   this data structure (IBTL_ASYNC_PENDING).
61  *
62  * Async Mutex and CQ Mutex
63  *
64  *   The global ibtl_async_mutex is "the" mutex used to control access
65  *   to all the data needed by ibc_async_handler.  All the threads that
66  *   use this mutex are written so that the mutex is held for very short
67  *   periods of time, and never held while making calls to functions
68  *   that may block.
69  *
70  *   The global ibtl_cq_mutex is used similarly by ibc_cq_handler and
71  *   the ibtl_cq_thread(s).
72  *
73  * Mutex hierarchy
74  *
75  *   The ibtl_clnt_list_mutex is above the ibtl_async_mutex.
76  *   ibtl_clnt_list_mutex protects all of the various lists.
77  *   The ibtl_async_mutex is below this in the hierarchy.
78  *
79  *   The ibtl_cq_mutex is independent of the above mutexes.
80  *
81  * Threads
82  *
83  *   There are "ibtl_cq_threads" number of threads created for handling
84  *   Completion Queues in threads.  If this feature really gets used,
85  *   then we will want to do some suitable tuning.  Similarly, we may
86  *   want to tune the number of "ibtl_async_thread_init".
87  *
88  *   The function ibtl_cq_thread is the main loop for handling a CQ in a
89  *   thread.  There can be multiple threads executing this same code.
90  *   The code sleeps when there is no work to be done (list is empty),
91  *   otherwise it pulls the first CQ structure off the list and performs
92  *   the CQ handler callback to the client.  After that returns, a check
93  *   is made, and if another ibc_cq_handler call was made for this CQ,
94  *   the client is called again.
95  *
96  *   The function ibtl_async_thread is the main loop for handling async
97  *   events/errors.  There can be multiple threads executing this same code.
98  *   The code sleeps when there is no work to be done (lists are empty),
99  *   otherwise it pulls the first structure off one of the lists and
100  *   performs the async callback(s) to the client(s).  Note that HCA
101  *   async handling is done by calling each of the clients using the HCA.
102  *   When the async handling completes, the data structure having the async
103  *   event/error is checked for more work before it's considered "done".
104  *
105  * Taskq
106  *
107  *   The async_taskq is used here for allowing async handler callbacks to
108  *   occur simultaneously to multiple clients of an HCA.  This taskq could
109  *   be used for other purposes, e.g., if all the async_threads are in
110  *   use, but this is deemed as overkill since asyncs should occur rarely.
111  */
112 
113 /* Globals */
114 static char ibtf_handlers[] = "ibtl_handlers";
115 
116 /* priority for IBTL threads (async, cq, and taskq) */
117 static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */
118 
119 /* taskq used for HCA asyncs */
120 #define	ibtl_async_taskq system_taskq
121 
122 /* data for async handling by threads */
123 static kmutex_t ibtl_async_mutex;	/* protects most *_async_* data */
124 static kcondvar_t ibtl_async_cv;	/* async_threads wait on this */
125 static kcondvar_t ibtl_clnt_cv;		/* ibt_detach might wait on this */
126 static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp);
127 static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp);
128 
129 static kt_did_t *ibtl_async_did;	/* for thread_join() */
130 int ibtl_async_thread_init = 4;	/* total # of async_threads to create */
131 static int ibtl_async_thread_exit = 0;	/* set if/when thread(s) should exit */
132 
133 /* async lists for various structures */
134 static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end;
135 static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end;
136 static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end;
137 static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end;
138 static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end;
139 
140 /* data for CQ completion handling by threads */
141 static kmutex_t ibtl_cq_mutex;	/* protects the cv and the list below */
142 static kcondvar_t ibtl_cq_cv;
143 static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end;
144 
145 static int ibtl_cq_threads = 0;		/* total # of cq threads */
146 static int ibtl_cqs_using_threads = 0;	/* total # of cqs using threads */
147 static int ibtl_cq_thread_exit = 0;	/* set if/when thread(s) should exit */
148 
149 /* value used to tell IBTL threads to exit */
150 #define	IBTL_THREAD_EXIT 0x1b7fdead	/* IBTF DEAD */
151 
152 int ibtl_eec_not_supported = 1;
153 
154 char *ibtl_last_client_name;	/* may help debugging */
155 
156 _NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex))
157 
158 /*
159  * ibc_async_handler()
160  *
161  * Asynchronous Event/Error Handler.
162  *
163  *	This is the function called HCA drivers to post various async
164  *	event and errors mention in the IB architecture spec.  See
165  *	ibtl_types.h for additional details of this.
166  *
167  *	This function marks the pertinent IBTF object with the async_code,
168  *	and queues the object for handling by an ibtl_async_thread.  If
169  *	the object is NOT already marked for async processing, it is added
170  *	to the associated list for that type of object, and an
171  *	ibtl_async_thread is signaled to finish the async work.
172  */
173 void
174 ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code,
175     ibc_async_event_t *event_p)
176 {
177 	ibtl_qp_t	*ibtl_qp;
178 	ibtl_cq_t	*ibtl_cq;
179 	ibtl_srq_t	*ibtl_srq;
180 	ibtl_eec_t	*ibtl_eec;
181 	uint8_t		port_minus1;
182 
183 	IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)",
184 	    hca_devp, code, event_p);
185 
186 	mutex_enter(&ibtl_async_mutex);
187 
188 	switch (code) {
189 	case IBT_EVENT_PATH_MIGRATED_QP:
190 	case IBT_EVENT_SQD:
191 	case IBT_ERROR_CATASTROPHIC_QP:
192 	case IBT_ERROR_PATH_MIGRATE_REQ_QP:
193 	case IBT_EVENT_COM_EST_QP:
194 	case IBT_ERROR_INVALID_REQUEST_QP:
195 	case IBT_ERROR_ACCESS_VIOLATION_QP:
196 	case IBT_EVENT_EMPTY_QP:
197 		ibtl_qp = event_p->ev_qp_hdl;
198 		if (ibtl_qp == NULL) {
199 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
200 			    "bad qp handle");
201 			break;
202 		}
203 		switch (code) {
204 		case IBT_ERROR_CATASTROPHIC_QP:
205 			ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break;
206 		case IBT_ERROR_PATH_MIGRATE_REQ_QP:
207 			ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break;
208 		case IBT_ERROR_INVALID_REQUEST_QP:
209 			ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break;
210 		case IBT_ERROR_ACCESS_VIOLATION_QP:
211 			ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break;
212 		}
213 
214 		ibtl_qp->qp_async_codes |= code;
215 		if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) {
216 			ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING;
217 			ibtl_qp->qp_async_link = NULL;
218 			if (ibtl_async_qp_list_end == NULL)
219 				ibtl_async_qp_list_start = ibtl_qp;
220 			else
221 				ibtl_async_qp_list_end->qp_async_link = ibtl_qp;
222 			ibtl_async_qp_list_end = ibtl_qp;
223 			cv_signal(&ibtl_async_cv);
224 		}
225 		break;
226 
227 	case IBT_ERROR_CQ:
228 		ibtl_cq = event_p->ev_cq_hdl;
229 		if (ibtl_cq == NULL) {
230 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
231 			    "bad cq handle");
232 			break;
233 		}
234 		ibtl_cq->cq_async_codes |= code;
235 		ibtl_cq->cq_fma_ena = event_p->ev_fma_ena;
236 		if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) {
237 			ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING;
238 			ibtl_cq->cq_async_link = NULL;
239 			if (ibtl_async_cq_list_end == NULL)
240 				ibtl_async_cq_list_start = ibtl_cq;
241 			else
242 				ibtl_async_cq_list_end->cq_async_link = ibtl_cq;
243 			ibtl_async_cq_list_end = ibtl_cq;
244 			cv_signal(&ibtl_async_cv);
245 		}
246 		break;
247 
248 	case IBT_ERROR_CATASTROPHIC_SRQ:
249 	case IBT_EVENT_LIMIT_REACHED_SRQ:
250 		ibtl_srq = event_p->ev_srq_hdl;
251 		if (ibtl_srq == NULL) {
252 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
253 			    "bad srq handle");
254 			break;
255 		}
256 		ibtl_srq->srq_async_codes |= code;
257 		ibtl_srq->srq_fma_ena = event_p->ev_fma_ena;
258 		if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) {
259 			ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING;
260 			ibtl_srq->srq_async_link = NULL;
261 			if (ibtl_async_srq_list_end == NULL)
262 				ibtl_async_srq_list_start = ibtl_srq;
263 			else
264 				ibtl_async_srq_list_end->srq_async_link =
265 				    ibtl_srq;
266 			ibtl_async_srq_list_end = ibtl_srq;
267 			cv_signal(&ibtl_async_cv);
268 		}
269 		break;
270 
271 	case IBT_EVENT_PATH_MIGRATED_EEC:
272 	case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
273 	case IBT_ERROR_CATASTROPHIC_EEC:
274 	case IBT_EVENT_COM_EST_EEC:
275 		if (ibtl_eec_not_supported) {
276 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
277 			    "EEC events are disabled.");
278 			break;
279 		}
280 		ibtl_eec = event_p->ev_eec_hdl;
281 		if (ibtl_eec == NULL) {
282 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
283 			    "bad eec handle");
284 			break;
285 		}
286 		switch (code) {
287 		case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
288 			ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break;
289 		case IBT_ERROR_CATASTROPHIC_EEC:
290 			ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break;
291 		}
292 		ibtl_eec->eec_async_codes |= code;
293 		if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) {
294 			ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING;
295 			ibtl_eec->eec_async_link = NULL;
296 			if (ibtl_async_eec_list_end == NULL)
297 				ibtl_async_eec_list_start = ibtl_eec;
298 			else
299 				ibtl_async_eec_list_end->eec_async_link =
300 				    ibtl_eec;
301 			ibtl_async_eec_list_end = ibtl_eec;
302 			cv_signal(&ibtl_async_cv);
303 		}
304 		break;
305 
306 	case IBT_ERROR_LOCAL_CATASTROPHIC:
307 		hca_devp->hd_async_codes |= code;
308 		hca_devp->hd_fma_ena = event_p->ev_fma_ena;
309 		/* FALLTHROUGH */
310 
311 	case IBT_EVENT_PORT_UP:
312 	case IBT_ERROR_PORT_DOWN:
313 		if ((code == IBT_EVENT_PORT_UP) ||
314 		    (code == IBT_ERROR_PORT_DOWN)) {
315 			if ((port_minus1 = event_p->ev_port - 1) >=
316 			    hca_devp->hd_hca_attr->hca_nports) {
317 				IBTF_DPRINTF_L2(ibtf_handlers,
318 				    "ibc_async_handler: bad port #: %d",
319 				    event_p->ev_port);
320 				break;
321 			}
322 			hca_devp->hd_async_port[port_minus1] =
323 			    ((code == IBT_EVENT_PORT_UP) ? IBTL_HCA_PORT_UP :
324 			    IBTL_HCA_PORT_DOWN) | IBTL_HCA_PORT_CHANGED;
325 			hca_devp->hd_async_codes |= code;
326 		}
327 
328 		if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) {
329 			hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING;
330 			hca_devp->hd_async_link = NULL;
331 			if (ibtl_async_hca_list_end == NULL)
332 				ibtl_async_hca_list_start = hca_devp;
333 			else
334 				ibtl_async_hca_list_end->hd_async_link =
335 				    hca_devp;
336 			ibtl_async_hca_list_end = hca_devp;
337 			cv_signal(&ibtl_async_cv);
338 		}
339 
340 		break;
341 
342 	default:
343 		IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: "
344 		    "invalid code (0x%x)", code);
345 	}
346 
347 	mutex_exit(&ibtl_async_mutex);
348 }
349 
350 
351 /* Finally, make the async call to the client. */
352 
353 static void
354 ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code,
355     ibt_async_event_t *event_p)
356 {
357 	ibtl_clnt_t		*clntp;
358 	void			*client_private;
359 	ibt_async_handler_t	async_handler;
360 	char			*client_name;
361 
362 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)",
363 	    ibt_hca, code, event_p);
364 
365 	clntp = ibt_hca->ha_clnt_devp;
366 
367 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
368 	/* Record who is being called (just a debugging aid) */
369 	ibtl_last_client_name = client_name = clntp->clnt_name;
370 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
371 
372 	client_private = clntp->clnt_private;
373 	async_handler = clntp->clnt_modinfop->mi_async_handler;
374 
375 	if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) {
376 		mutex_enter(&ibtl_clnt_list_mutex);
377 		async_handler = ibtl_cm_async_handler;
378 		client_private = ibtl_cm_clnt_private;
379 		mutex_exit(&ibtl_clnt_list_mutex);
380 		ibt_hca = NULL;
381 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
382 		    "calling CM for COM_EST");
383 	} else {
384 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
385 		    "calling client '%s'", client_name);
386 	}
387 	if (async_handler != NULL)
388 		async_handler(client_private, ibt_hca, code, event_p);
389 	else
390 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
391 		    "client '%s' has no async handler", client_name);
392 }
393 
394 /*
395  * Inform CM or DM about HCA events.
396  *
397  *	We use taskqs to allow simultaneous notification, with sleeping.
398  *	Since taskqs only allow one argument, we define a structure
399  *	because we need to pass in more than one argument.
400  */
401 
402 struct ibtl_mgr_s {
403 	ibtl_hca_devinfo_t	*mgr_hca_devp;
404 	ibt_async_handler_t	mgr_async_handler;
405 	void			*mgr_clnt_private;
406 };
407 
408 /*
409  * Asyncs of HCA level events for CM and DM.  Call CM or DM and tell them
410  * about the HCA for the event recorded in the ibtl_hca_devinfo_t.
411  */
412 static void
413 ibtl_do_mgr_async_task(void *arg)
414 {
415 	struct ibtl_mgr_s	*mgrp = (struct ibtl_mgr_s *)arg;
416 	ibtl_hca_devinfo_t	*hca_devp = mgrp->mgr_hca_devp;
417 
418 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)",
419 	    hca_devp->hd_async_code);
420 
421 	mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL,
422 	    hca_devp->hd_async_code, &hca_devp->hd_async_event);
423 	kmem_free(mgrp, sizeof (*mgrp));
424 
425 	mutex_enter(&ibtl_clnt_list_mutex);
426 	if (--hca_devp->hd_async_task_cnt == 0)
427 		cv_signal(&hca_devp->hd_async_task_cv);
428 	mutex_exit(&ibtl_clnt_list_mutex);
429 }
430 
431 static void
432 ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler,
433     void *clnt_private)
434 {
435 	struct ibtl_mgr_s *mgrp;
436 
437 	if (async_handler == NULL)
438 		return;
439 
440 	_NOTE(NO_COMPETING_THREADS_NOW)
441 	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
442 	mgrp->mgr_hca_devp = hca_devp;
443 	mgrp->mgr_async_handler = async_handler;
444 	mgrp->mgr_clnt_private = clnt_private;
445 	hca_devp->hd_async_task_cnt++;
446 
447 	(void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp,
448 	    TQ_SLEEP);
449 #ifndef lint
450 	_NOTE(COMPETING_THREADS_NOW)
451 #endif
452 }
453 
454 /*
455  * Per client-device asyncs for HCA level events.  Call each client that is
456  * using the HCA for the event recorded in the ibtl_hca_devinfo_t.
457  */
458 static void
459 ibtl_hca_client_async_task(void *arg)
460 {
461 	ibtl_hca_t		*ibt_hca = (ibtl_hca_t *)arg;
462 	ibtl_hca_devinfo_t	*hca_devp = ibt_hca->ha_hca_devp;
463 	ibtl_clnt_t		*clntp = ibt_hca->ha_clnt_devp;
464 	ibt_async_event_t	async_event;
465 
466 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)",
467 	    ibt_hca, hca_devp->hd_async_code);
468 
469 	bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event));
470 	ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event);
471 
472 	mutex_enter(&ibtl_async_mutex);
473 	if (--ibt_hca->ha_async_cnt == 0 &&
474 	    (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) {
475 		mutex_exit(&ibtl_async_mutex);
476 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
477 	} else
478 		mutex_exit(&ibtl_async_mutex);
479 
480 	mutex_enter(&ibtl_clnt_list_mutex);
481 	if (--hca_devp->hd_async_task_cnt == 0)
482 		cv_signal(&hca_devp->hd_async_task_cv);
483 	if (--clntp->clnt_async_cnt == 0)
484 		cv_broadcast(&ibtl_clnt_cv);
485 
486 	mutex_exit(&ibtl_clnt_list_mutex);
487 }
488 
489 /*
490  * Asyncs for HCA level events.
491  *
492  * The function continues to run until there are no more async
493  * events/errors for this HCA.  An event is chosen for dispatch
494  * to all clients of this HCA.  This thread dispatches them via
495  * the ibtl_async_taskq, then sleeps until all tasks are done.
496  *
497  * This thread records the async_code and async_event in the
498  * ibtl_hca_devinfo_t for all client taskq threads to reference.
499  *
500  * This is called from an async or taskq thread with ibtl_async_mutex held.
501  */
502 static void
503 ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
504 {
505 	ibtl_hca_t			*ibt_hca;
506 	ibt_async_code_t		code;
507 	ibtl_async_port_status_t  	temp;
508 	uint8_t				nports;
509 	uint8_t				port_minus1;
510 	ibtl_async_port_status_t	*portp;
511 
512 	mutex_exit(&ibtl_async_mutex);
513 
514 	mutex_enter(&ibtl_clnt_list_mutex);
515 	while (hca_devp->hd_async_busy)
516 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
517 	hca_devp->hd_async_busy = 1;
518 	mutex_enter(&ibtl_async_mutex);
519 
520 	bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event));
521 	for (;;) {
522 
523 		hca_devp->hd_async_event.ev_fma_ena = 0;
524 
525 		code = hca_devp->hd_async_codes;
526 		if (code & IBT_ERROR_LOCAL_CATASTROPHIC) {
527 			code = IBT_ERROR_LOCAL_CATASTROPHIC;
528 			hca_devp->hd_async_event.ev_fma_ena =
529 			    hca_devp->hd_fma_ena;
530 		} else if (code & IBT_ERROR_PORT_DOWN)
531 			code = IBT_ERROR_PORT_DOWN;
532 		else if (code & IBT_EVENT_PORT_UP)
533 			code = IBT_EVENT_PORT_UP;
534 		else {
535 			hca_devp->hd_async_codes = 0;
536 			code = 0;
537 		}
538 
539 		if (code == 0) {
540 			hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING;
541 			break;
542 		}
543 		hca_devp->hd_async_codes &= ~code;
544 
545 		if ((code == IBT_EVENT_PORT_UP) ||
546 		    (code == IBT_ERROR_PORT_DOWN)) {
547 			/* PORT_UP or PORT_DOWN */
548 			portp = hca_devp->hd_async_port;
549 			nports = hca_devp->hd_hca_attr->hca_nports;
550 			for (port_minus1 = 0; port_minus1 < nports;
551 			    port_minus1++) {
552 				temp = ((code == IBT_EVENT_PORT_UP) ?
553 				    IBTL_HCA_PORT_UP : IBTL_HCA_PORT_DOWN) |
554 				    IBTL_HCA_PORT_CHANGED;
555 				if (portp[port_minus1] == temp)
556 					break;
557 			}
558 			if (port_minus1 >= nports) {
559 				/* we checked again, but found nothing */
560 				continue;
561 			}
562 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: "
563 			    "async: port# %x code %x", port_minus1 + 1, code);
564 			/* mark it to check for other ports after we're done */
565 			hca_devp->hd_async_codes |= code;
566 
567 			hca_devp->hd_async_event.ev_port = port_minus1 + 1;
568 			hca_devp->hd_async_port[port_minus1] &=
569 			    ~IBTL_HCA_PORT_CHANGED;
570 
571 			mutex_exit(&ibtl_async_mutex);
572 			ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1);
573 			mutex_enter(&ibtl_async_mutex);
574 		}
575 
576 		hca_devp->hd_async_code = code;
577 		hca_devp->hd_async_event.ev_hca_guid =
578 		    hca_devp->hd_hca_attr->hca_node_guid;
579 		mutex_exit(&ibtl_async_mutex);
580 
581 		/*
582 		 * Make sure to inform CM, DM, and IBMA if we know of them.
583 		 * Also, make sure not to inform them a second time, which
584 		 * would occur if they have the HCA open.
585 		 */
586 
587 		if (ibtl_ibma_async_handler)
588 			ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler,
589 			    ibtl_ibma_clnt_private);
590 		/* wait for all tasks to complete */
591 		while (hca_devp->hd_async_task_cnt != 0)
592 			cv_wait(&hca_devp->hd_async_task_cv,
593 			    &ibtl_clnt_list_mutex);
594 
595 		if (ibtl_dm_async_handler)
596 			ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler,
597 			    ibtl_dm_clnt_private);
598 		if (ibtl_cm_async_handler)
599 			ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
600 			    ibtl_cm_clnt_private);
601 		/* wait for all tasks to complete */
602 		while (hca_devp->hd_async_task_cnt != 0)
603 			cv_wait(&hca_devp->hd_async_task_cv,
604 			    &ibtl_clnt_list_mutex);
605 
606 		for (ibt_hca = hca_devp->hd_clnt_list;
607 		    ibt_hca != NULL;
608 		    ibt_hca = ibt_hca->ha_clnt_link) {
609 
610 			/* Managers are handled above */
611 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
612 			    ibtl_cm_async_handler)
613 				continue;
614 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
615 			    ibtl_dm_async_handler)
616 				continue;
617 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
618 			    ibtl_ibma_async_handler)
619 				continue;
620 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
621 
622 			mutex_enter(&ibtl_async_mutex);
623 			ibt_hca->ha_async_cnt++;
624 			mutex_exit(&ibtl_async_mutex);
625 			hca_devp->hd_async_task_cnt++;
626 			(void) taskq_dispatch(ibtl_async_taskq,
627 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
628 		}
629 
630 		/* wait for all tasks to complete */
631 		while (hca_devp->hd_async_task_cnt != 0)
632 			cv_wait(&hca_devp->hd_async_task_cv,
633 			    &ibtl_clnt_list_mutex);
634 
635 		mutex_enter(&ibtl_async_mutex);
636 	}
637 	hca_devp->hd_async_code = 0;
638 	hca_devp->hd_async_busy = 0;
639 	cv_broadcast(&hca_devp->hd_async_busy_cv);
640 	mutex_exit(&ibtl_clnt_list_mutex);
641 }
642 
643 /*
644  * Asyncs for QP objects.
645  *
646  * The function continues to run until there are no more async
647  * events/errors for this object.
648  */
649 static void
650 ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp)
651 {
652 	ibt_async_code_t	code;
653 	ibt_async_event_t	async_event;
654 
655 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
656 	bzero(&async_event, sizeof (async_event));
657 	async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp);
658 
659 	while ((code = ibtl_qp->qp_async_codes) != 0) {
660 		async_event.ev_fma_ena = 0;
661 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT)
662 			code = 0;	/* fallthrough to "kmem_free" */
663 		else if (code & IBT_ERROR_CATASTROPHIC_QP) {
664 			code = IBT_ERROR_CATASTROPHIC_QP;
665 			async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena;
666 		} else if (code & IBT_ERROR_INVALID_REQUEST_QP) {
667 			code = IBT_ERROR_INVALID_REQUEST_QP;
668 			async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena;
669 		} else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) {
670 			code = IBT_ERROR_ACCESS_VIOLATION_QP;
671 			async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena;
672 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) {
673 			code = IBT_ERROR_PATH_MIGRATE_REQ_QP;
674 			async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena;
675 		} else if (code & IBT_EVENT_PATH_MIGRATED_QP)
676 			code = IBT_EVENT_PATH_MIGRATED_QP;
677 		else if (code & IBT_EVENT_SQD)
678 			code = IBT_EVENT_SQD;
679 		else if (code & IBT_EVENT_COM_EST_QP)
680 			code = IBT_EVENT_COM_EST_QP;
681 		else if (code & IBT_EVENT_EMPTY_QP)
682 			code = IBT_EVENT_EMPTY_QP;
683 		else {
684 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: "
685 			    "async: unexpected QP async code 0x%x", code);
686 			ibtl_qp->qp_async_codes = 0;
687 			code = 0;
688 		}
689 		ibtl_qp->qp_async_codes &= ~code;
690 
691 		if (code) {
692 			mutex_exit(&ibtl_async_mutex);
693 			ibtl_async_client_call(ibtl_qp->qp_hca,
694 			    code, &async_event);
695 			mutex_enter(&ibtl_async_mutex);
696 		}
697 
698 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) {
699 			mutex_exit(&ibtl_async_mutex);
700 			cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
701 			mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
702 			kmem_free(IBTL_QP2CHAN(ibtl_qp),
703 			    sizeof (ibtl_channel_t));
704 			mutex_enter(&ibtl_async_mutex);
705 			return;
706 		}
707 	}
708 	ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING;
709 }
710 
711 /*
712  * Asyncs for SRQ objects.
713  *
714  * The function continues to run until there are no more async
715  * events/errors for this object.
716  */
717 static void
718 ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq)
719 {
720 	ibt_async_code_t	code;
721 	ibt_async_event_t	async_event;
722 
723 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
724 	bzero(&async_event, sizeof (async_event));
725 	async_event.ev_srq_hdl = ibtl_srq;
726 	async_event.ev_fma_ena = ibtl_srq->srq_fma_ena;
727 
728 	while ((code = ibtl_srq->srq_async_codes) != 0) {
729 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT)
730 			code = 0;	/* fallthrough to "kmem_free" */
731 		else if (code & IBT_ERROR_CATASTROPHIC_SRQ)
732 			code = IBT_ERROR_CATASTROPHIC_SRQ;
733 		else if (code & IBT_EVENT_LIMIT_REACHED_SRQ)
734 			code = IBT_EVENT_LIMIT_REACHED_SRQ;
735 		else {
736 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: "
737 			    "async: unexpected SRQ async code 0x%x", code);
738 			ibtl_srq->srq_async_codes = 0;
739 			code = 0;
740 		}
741 		ibtl_srq->srq_async_codes &= ~code;
742 
743 		if (code) {
744 			mutex_exit(&ibtl_async_mutex);
745 			ibtl_async_client_call(ibtl_srq->srq_hca,
746 			    code, &async_event);
747 			mutex_enter(&ibtl_async_mutex);
748 		}
749 
750 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
751 			mutex_exit(&ibtl_async_mutex);
752 			kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
753 			mutex_enter(&ibtl_async_mutex);
754 			return;
755 		}
756 	}
757 	ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING;
758 }
759 
760 /*
761  * Asyncs for CQ objects.
762  *
763  * The function continues to run until there are no more async
764  * events/errors for this object.
765  */
766 static void
767 ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq)
768 {
769 	ibt_async_code_t	code;
770 	ibt_async_event_t	async_event;
771 
772 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
773 	bzero(&async_event, sizeof (async_event));
774 	async_event.ev_cq_hdl = ibtl_cq;
775 	async_event.ev_fma_ena = ibtl_cq->cq_fma_ena;
776 
777 	while ((code = ibtl_cq->cq_async_codes) != 0) {
778 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT)
779 			code = 0;	/* fallthrough to "kmem_free" */
780 		else if (code & IBT_ERROR_CQ)
781 			code = IBT_ERROR_CQ;
782 		else {
783 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: "
784 			    "async: unexpected CQ async code 0x%x", code);
785 			ibtl_cq->cq_async_codes = 0;
786 			code = 0;
787 		}
788 		ibtl_cq->cq_async_codes &= ~code;
789 
790 		if (code) {
791 			mutex_exit(&ibtl_async_mutex);
792 			ibtl_async_client_call(ibtl_cq->cq_hca,
793 			    code, &async_event);
794 			mutex_enter(&ibtl_async_mutex);
795 		}
796 
797 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
798 			mutex_exit(&ibtl_async_mutex);
799 			mutex_destroy(&ibtl_cq->cq_mutex);
800 			kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
801 			mutex_enter(&ibtl_async_mutex);
802 			return;
803 		}
804 	}
805 	ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING;
806 }
807 
808 /*
809  * Asyncs for EEC objects.
810  *
811  * The function continues to run until there are no more async
812  * events/errors for this object.
813  */
814 static void
815 ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec)
816 {
817 	ibt_async_code_t	code;
818 	ibt_async_event_t	async_event;
819 
820 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
821 	bzero(&async_event, sizeof (async_event));
822 	async_event.ev_chan_hdl = ibtl_eec->eec_channel;
823 
824 	while ((code = ibtl_eec->eec_async_codes) != 0) {
825 		async_event.ev_fma_ena = 0;
826 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT)
827 			code = 0;	/* fallthrough to "kmem_free" */
828 		else if (code & IBT_ERROR_CATASTROPHIC_EEC) {
829 			code = IBT_ERROR_CATASTROPHIC_CHAN;
830 			async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena;
831 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) {
832 			code = IBT_ERROR_PATH_MIGRATE_REQ;
833 			async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena;
834 		} else if (code & IBT_EVENT_PATH_MIGRATED_EEC)
835 			code = IBT_EVENT_PATH_MIGRATED;
836 		else if (code & IBT_EVENT_COM_EST_EEC)
837 			code = IBT_EVENT_COM_EST;
838 		else {
839 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: "
840 			    "async: unexpected code 0x%x", code);
841 			ibtl_eec->eec_async_codes = 0;
842 			code = 0;
843 		}
844 		ibtl_eec->eec_async_codes &= ~code;
845 
846 		if (code) {
847 			mutex_exit(&ibtl_async_mutex);
848 			ibtl_async_client_call(ibtl_eec->eec_hca,
849 			    code, &async_event);
850 			mutex_enter(&ibtl_async_mutex);
851 		}
852 
853 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) {
854 			mutex_exit(&ibtl_async_mutex);
855 			kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
856 			mutex_enter(&ibtl_async_mutex);
857 			return;
858 		}
859 	}
860 	ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING;
861 }
862 
863 #ifdef __lock_lint
864 kmutex_t cpr_mutex;
865 #endif
866 
867 /*
868  * Loop forever, calling async_handlers until all of the async lists
869  * are empty.
870  */
871 
872 static void
873 ibtl_async_thread(void)
874 {
875 #ifndef __lock_lint
876 	kmutex_t cpr_mutex;
877 #endif
878 	callb_cpr_t	cprinfo;
879 
880 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
881 	_NOTE(NO_COMPETING_THREADS_NOW)
882 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
883 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
884 	    "ibtl_async_thread");
885 #ifndef lint
886 	_NOTE(COMPETING_THREADS_NOW)
887 #endif
888 
889 	mutex_enter(&ibtl_async_mutex);
890 
891 	for (;;) {
892 		if (ibtl_async_hca_list_start) {
893 			ibtl_hca_devinfo_t *hca_devp;
894 
895 			/* remove first entry from list */
896 			hca_devp = ibtl_async_hca_list_start;
897 			ibtl_async_hca_list_start = hca_devp->hd_async_link;
898 			hca_devp->hd_async_link = NULL;
899 			if (ibtl_async_hca_list_start == NULL)
900 				ibtl_async_hca_list_end = NULL;
901 
902 			ibtl_do_hca_asyncs(hca_devp);
903 
904 		} else if (ibtl_async_qp_list_start) {
905 			ibtl_qp_t *ibtl_qp;
906 
907 			/* remove from list */
908 			ibtl_qp = ibtl_async_qp_list_start;
909 			ibtl_async_qp_list_start = ibtl_qp->qp_async_link;
910 			ibtl_qp->qp_async_link = NULL;
911 			if (ibtl_async_qp_list_start == NULL)
912 				ibtl_async_qp_list_end = NULL;
913 
914 			ibtl_do_qp_asyncs(ibtl_qp);
915 
916 		} else if (ibtl_async_srq_list_start) {
917 			ibtl_srq_t *ibtl_srq;
918 
919 			/* remove from list */
920 			ibtl_srq = ibtl_async_srq_list_start;
921 			ibtl_async_srq_list_start = ibtl_srq->srq_async_link;
922 			ibtl_srq->srq_async_link = NULL;
923 			if (ibtl_async_srq_list_start == NULL)
924 				ibtl_async_srq_list_end = NULL;
925 
926 			ibtl_do_srq_asyncs(ibtl_srq);
927 
928 		} else if (ibtl_async_eec_list_start) {
929 			ibtl_eec_t *ibtl_eec;
930 
931 			/* remove from list */
932 			ibtl_eec = ibtl_async_eec_list_start;
933 			ibtl_async_eec_list_start = ibtl_eec->eec_async_link;
934 			ibtl_eec->eec_async_link = NULL;
935 			if (ibtl_async_eec_list_start == NULL)
936 				ibtl_async_eec_list_end = NULL;
937 
938 			ibtl_do_eec_asyncs(ibtl_eec);
939 
940 		} else if (ibtl_async_cq_list_start) {
941 			ibtl_cq_t *ibtl_cq;
942 
943 			/* remove from list */
944 			ibtl_cq = ibtl_async_cq_list_start;
945 			ibtl_async_cq_list_start = ibtl_cq->cq_async_link;
946 			ibtl_cq->cq_async_link = NULL;
947 			if (ibtl_async_cq_list_start == NULL)
948 				ibtl_async_cq_list_end = NULL;
949 
950 			ibtl_do_cq_asyncs(ibtl_cq);
951 
952 		} else {
953 			if (ibtl_async_thread_exit == IBTL_THREAD_EXIT)
954 				break;
955 			mutex_enter(&cpr_mutex);
956 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
957 			mutex_exit(&cpr_mutex);
958 
959 			cv_wait(&ibtl_async_cv, &ibtl_async_mutex);
960 
961 			mutex_exit(&ibtl_async_mutex);
962 			mutex_enter(&cpr_mutex);
963 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
964 			mutex_exit(&cpr_mutex);
965 			mutex_enter(&ibtl_async_mutex);
966 		}
967 	}
968 
969 	mutex_exit(&ibtl_async_mutex);
970 
971 #ifndef __lock_lint
972 	mutex_enter(&cpr_mutex);
973 	CALLB_CPR_EXIT(&cprinfo);
974 #endif
975 	mutex_destroy(&cpr_mutex);
976 }
977 
978 
979 void
980 ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp)
981 {
982 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp);
983 
984 	mutex_enter(&ibtl_async_mutex);
985 
986 	/*
987 	 * If there is an active async, mark this object to be freed
988 	 * by the async_thread when it's done.
989 	 */
990 	if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) {
991 		ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT;
992 		mutex_exit(&ibtl_async_mutex);
993 	} else {	/* free the object now */
994 		mutex_exit(&ibtl_async_mutex);
995 		cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
996 		mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
997 		kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t));
998 	}
999 }
1000 
1001 void
1002 ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq)
1003 {
1004 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq);
1005 
1006 	mutex_enter(&ibtl_async_mutex);
1007 
1008 	/* if there is an active async, mark this object to be freed */
1009 	if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) {
1010 		ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1011 		mutex_exit(&ibtl_async_mutex);
1012 	} else {	/* free the object now */
1013 		mutex_exit(&ibtl_async_mutex);
1014 		mutex_destroy(&ibtl_cq->cq_mutex);
1015 		kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
1016 	}
1017 }
1018 
1019 void
1020 ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq)
1021 {
1022 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)",
1023 	    ibtl_srq);
1024 
1025 	mutex_enter(&ibtl_async_mutex);
1026 
1027 	/* if there is an active async, mark this object to be freed */
1028 	if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) {
1029 		ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1030 		mutex_exit(&ibtl_async_mutex);
1031 	} else {	/* free the object now */
1032 		mutex_exit(&ibtl_async_mutex);
1033 		kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
1034 	}
1035 }
1036 
1037 void
1038 ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec)
1039 {
1040 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)",
1041 	    ibtl_eec);
1042 
1043 	mutex_enter(&ibtl_async_mutex);
1044 
1045 	/* if there is an active async, mark this object to be freed */
1046 	if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) {
1047 		ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1048 		mutex_exit(&ibtl_async_mutex);
1049 	} else {	/* free the object now */
1050 		mutex_exit(&ibtl_async_mutex);
1051 		kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
1052 	}
1053 }
1054 
1055 /*
1056  * This function differs from above in that we assume this is called
1057  * from non-interrupt context, and never called from the async_thread.
1058  */
1059 
1060 void
1061 ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca)
1062 {
1063 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)",
1064 	    ibt_hca);
1065 
1066 	mutex_enter(&ibtl_async_mutex);
1067 
1068 	/* if there is an active async, mark this object to be freed */
1069 	if (ibt_hca->ha_async_cnt > 0) {
1070 		ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1071 		mutex_exit(&ibtl_async_mutex);
1072 	} else {	/* free the object now */
1073 		mutex_exit(&ibtl_async_mutex);
1074 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
1075 	}
1076 }
1077 
1078 /*
1079  * Completion Queue Handling.
1080  *
1081  *	A completion queue can be handled through a simple callback
1082  *	at interrupt level, or it may be queued for an ibtl_cq_thread
1083  *	to handle.  The latter is chosen during ibt_alloc_cq when the
1084  *	IBTF_CQ_HANDLER_IN_THREAD is specified.
1085  */
1086 
1087 static void
1088 ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq)
1089 {
1090 	ibt_cq_handler_t	cq_handler;
1091 	void			*arg;
1092 
1093 	IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq);
1094 
1095 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
1096 	cq_handler = ibtl_cq->cq_comp_handler;
1097 	arg = ibtl_cq->cq_arg;
1098 	if (cq_handler != NULL)
1099 		cq_handler(ibtl_cq, arg);
1100 	else
1101 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: "
1102 		    "no cq_handler for cq %p", ibtl_cq);
1103 }
1104 
1105 /*
1106  * Before ibt_free_cq can continue, we need to ensure no more cq_handler
1107  * callbacks can occur.  When we get the mutex, we know there are no
1108  * outstanding cq_handler callbacks.  We set the cq_handler to NULL to
1109  * prohibit future callbacks.
1110  */
1111 void
1112 ibtl_free_cq_check(ibtl_cq_t *ibtl_cq)
1113 {
1114 	mutex_enter(&ibtl_cq->cq_mutex);
1115 	ibtl_cq->cq_comp_handler = NULL;
1116 	mutex_exit(&ibtl_cq->cq_mutex);
1117 	if (ibtl_cq->cq_in_thread) {
1118 		mutex_enter(&ibtl_cq_mutex);
1119 		--ibtl_cqs_using_threads;
1120 		while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) {
1121 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1122 			ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE;
1123 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1124 		}
1125 		mutex_exit(&ibtl_cq_mutex);
1126 	}
1127 }
1128 
1129 /*
1130  * Loop forever, calling cq_handlers until the cq list
1131  * is empty.
1132  */
1133 
1134 static void
1135 ibtl_cq_thread(void)
1136 {
1137 #ifndef __lock_lint
1138 	kmutex_t cpr_mutex;
1139 #endif
1140 	callb_cpr_t	cprinfo;
1141 
1142 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
1143 	_NOTE(NO_COMPETING_THREADS_NOW)
1144 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
1145 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
1146 	    "ibtl_cq_thread");
1147 #ifndef lint
1148 	_NOTE(COMPETING_THREADS_NOW)
1149 #endif
1150 
1151 	mutex_enter(&ibtl_cq_mutex);
1152 
1153 	for (;;) {
1154 		if (ibtl_cq_list_start) {
1155 			ibtl_cq_t *ibtl_cq;
1156 
1157 			ibtl_cq = ibtl_cq_list_start;
1158 			ibtl_cq_list_start = ibtl_cq->cq_link;
1159 			ibtl_cq->cq_link = NULL;
1160 			if (ibtl_cq == ibtl_cq_list_end)
1161 				ibtl_cq_list_end = NULL;
1162 
1163 			while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) {
1164 				ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1165 				mutex_exit(&ibtl_cq_mutex);
1166 				ibtl_cq_handler_call(ibtl_cq);
1167 				mutex_enter(&ibtl_cq_mutex);
1168 			}
1169 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING;
1170 			if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE)
1171 				cv_broadcast(&ibtl_cq_cv);
1172 		} else {
1173 			if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT)
1174 				break;
1175 			mutex_enter(&cpr_mutex);
1176 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1177 			mutex_exit(&cpr_mutex);
1178 
1179 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1180 
1181 			mutex_exit(&ibtl_cq_mutex);
1182 			mutex_enter(&cpr_mutex);
1183 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
1184 			mutex_exit(&cpr_mutex);
1185 			mutex_enter(&ibtl_cq_mutex);
1186 		}
1187 	}
1188 
1189 	mutex_exit(&ibtl_cq_mutex);
1190 #ifndef __lock_lint
1191 	mutex_enter(&cpr_mutex);
1192 	CALLB_CPR_EXIT(&cprinfo);
1193 #endif
1194 	mutex_destroy(&cpr_mutex);
1195 }
1196 
1197 
1198 /*
1199  * ibc_cq_handler()
1200  *
1201  *    Completion Queue Notification Handler.
1202  *
1203  */
1204 /*ARGSUSED*/
1205 void
1206 ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq)
1207 {
1208 	IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)",
1209 	    ibc_hdl, ibtl_cq);
1210 
1211 	if (ibtl_cq->cq_in_thread) {
1212 		mutex_enter(&ibtl_cq_mutex);
1213 		ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT;
1214 		if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) {
1215 			ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING;
1216 			ibtl_cq->cq_link = NULL;
1217 			if (ibtl_cq_list_end == NULL)
1218 				ibtl_cq_list_start = ibtl_cq;
1219 			else
1220 				ibtl_cq_list_end->cq_link = ibtl_cq;
1221 			ibtl_cq_list_end = ibtl_cq;
1222 			cv_signal(&ibtl_cq_cv);
1223 		}
1224 		mutex_exit(&ibtl_cq_mutex);
1225 		return;
1226 	} else
1227 		ibtl_cq_handler_call(ibtl_cq);
1228 }
1229 
1230 
1231 /*
1232  * ibt_enable_cq_notify()
1233  *      Enable Notification requests on the specified CQ.
1234  *
1235  *      ibt_cq          The CQ handle.
1236  *
1237  *      notify_type     Enable notifications for all (IBT_NEXT_COMPLETION)
1238  *                      completions, or the next Solicited completion
1239  *                      (IBT_NEXT_SOLICITED) only.
1240  *
1241  *	Completion notifications are disabled by setting the completion
1242  *	handler to NULL by calling ibt_set_cq_handler().
1243  */
1244 ibt_status_t
1245 ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type)
1246 {
1247 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)",
1248 	    ibtl_cq, notify_type);
1249 
1250 	return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq(
1251 	    IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type));
1252 }
1253 
1254 
1255 /*
1256  * ibt_set_cq_handler()
1257  *      Register a work request completion handler with the IBTF.
1258  *
1259  *      ibt_cq                  The CQ handle.
1260  *
1261  *      completion_handler      The completion handler.
1262  *
1263  *      arg                     The IBTF client private argument to be passed
1264  *                              back to the client when calling the CQ
1265  *                              completion handler.
1266  *
1267  *	Completion notifications are disabled by setting the completion
1268  *	handler to NULL.  When setting the handler to NULL, no additional
1269  *	calls to the previous CQ handler will be initiated, but there may
1270  *	be one in progress.
1271  *
1272  *      This function does not otherwise change the state of previous
1273  *      calls to ibt_enable_cq_notify().
1274  */
1275 void
1276 ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler,
1277     void *arg)
1278 {
1279 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)",
1280 	    ibtl_cq, completion_handler, arg);
1281 
1282 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
1283 	ibtl_cq->cq_comp_handler = completion_handler;
1284 	ibtl_cq->cq_arg = arg;
1285 }
1286 
1287 
1288 /*
1289  * Inform IBT clients about New HCAs.
1290  *
1291  *	We use taskqs to allow simultaneous notification, with sleeping.
1292  *	Since taskqs only allow one argument, we define a structure
1293  *	because we need to pass in two arguments.
1294  */
1295 
1296 struct ibtl_new_hca_s {
1297 	ibtl_clnt_t		*nh_clntp;
1298 	ibtl_hca_devinfo_t	*nh_hca_devp;
1299 	ibt_async_code_t	nh_code;
1300 };
1301 
1302 static void
1303 ibtl_tell_client_about_new_hca(void *arg)
1304 {
1305 	struct ibtl_new_hca_s	*new_hcap = (struct ibtl_new_hca_s *)arg;
1306 	ibtl_clnt_t		*clntp = new_hcap->nh_clntp;
1307 	ibt_async_event_t	async_event;
1308 	ibtl_hca_devinfo_t	*hca_devp = new_hcap->nh_hca_devp;
1309 
1310 	bzero(&async_event, sizeof (async_event));
1311 	async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
1312 	clntp->clnt_modinfop->mi_async_handler(
1313 	    clntp->clnt_private, NULL, new_hcap->nh_code, &async_event);
1314 	kmem_free(new_hcap, sizeof (*new_hcap));
1315 #ifdef __lock_lint
1316 	{
1317 		ibt_hca_hdl_t hca_hdl;
1318 		(void) ibt_open_hca(clntp, 0ULL, &hca_hdl);
1319 	}
1320 #endif
1321 	mutex_enter(&ibtl_clnt_list_mutex);
1322 	if (--hca_devp->hd_async_task_cnt == 0)
1323 		cv_signal(&hca_devp->hd_async_task_cv);
1324 	if (--clntp->clnt_async_cnt == 0)
1325 		cv_broadcast(&ibtl_clnt_cv);
1326 	mutex_exit(&ibtl_clnt_list_mutex);
1327 }
1328 
1329 /*
1330  * ibtl_announce_new_hca:
1331  *
1332  *	o First attach these clients in the given order
1333  *		IBMA
1334  *		IBCM
1335  *
1336  *	o Next attach all other clients in parallel.
1337  *
1338  * NOTE: Use the taskq to simultaneously notify all clients of the new HCA.
1339  * Retval from clients is ignored.
1340  */
1341 void
1342 ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp)
1343 {
1344 	ibtl_clnt_t		*clntp;
1345 	struct ibtl_new_hca_s	*new_hcap;
1346 
1347 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)",
1348 	    hca_devp, hca_devp->hd_hca_attr->hca_node_guid);
1349 
1350 	mutex_enter(&ibtl_clnt_list_mutex);
1351 
1352 	clntp = ibtl_clnt_list;
1353 	while (clntp != NULL) {
1354 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1355 			IBTF_DPRINTF_L4(ibtf_handlers,
1356 			    "ibtl_announce_new_hca: calling IBMF");
1357 			if (clntp->clnt_modinfop->mi_async_handler) {
1358 				_NOTE(NO_COMPETING_THREADS_NOW)
1359 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1360 				    KM_SLEEP);
1361 				new_hcap->nh_clntp = clntp;
1362 				new_hcap->nh_hca_devp = hca_devp;
1363 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1364 #ifndef lint
1365 				_NOTE(COMPETING_THREADS_NOW)
1366 #endif
1367 				clntp->clnt_async_cnt++;
1368 				hca_devp->hd_async_task_cnt++;
1369 
1370 				(void) taskq_dispatch(ibtl_async_taskq,
1371 				    ibtl_tell_client_about_new_hca, new_hcap,
1372 				    TQ_SLEEP);
1373 			}
1374 			break;
1375 		}
1376 		clntp = clntp->clnt_list_link;
1377 	}
1378 	if (clntp != NULL)
1379 		while (clntp->clnt_async_cnt > 0)
1380 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1381 	clntp = ibtl_clnt_list;
1382 	while (clntp != NULL) {
1383 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1384 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1385 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1386 			if (clntp->clnt_modinfop->mi_async_handler) {
1387 				_NOTE(NO_COMPETING_THREADS_NOW)
1388 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1389 				    KM_SLEEP);
1390 				new_hcap->nh_clntp = clntp;
1391 				new_hcap->nh_hca_devp = hca_devp;
1392 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1393 #ifndef lint
1394 				_NOTE(COMPETING_THREADS_NOW)
1395 #endif
1396 				clntp->clnt_async_cnt++;
1397 				hca_devp->hd_async_task_cnt++;
1398 
1399 				mutex_exit(&ibtl_clnt_list_mutex);
1400 				(void) ibtl_tell_client_about_new_hca(
1401 				    new_hcap);
1402 				mutex_enter(&ibtl_clnt_list_mutex);
1403 			}
1404 			break;
1405 		}
1406 		clntp = clntp->clnt_list_link;
1407 	}
1408 
1409 	clntp = ibtl_clnt_list;
1410 	while (clntp != NULL) {
1411 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) {
1412 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1413 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1414 			if (clntp->clnt_modinfop->mi_async_handler) {
1415 				_NOTE(NO_COMPETING_THREADS_NOW)
1416 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1417 				    KM_SLEEP);
1418 				new_hcap->nh_clntp = clntp;
1419 				new_hcap->nh_hca_devp = hca_devp;
1420 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1421 #ifndef lint
1422 				_NOTE(COMPETING_THREADS_NOW)
1423 #endif
1424 				clntp->clnt_async_cnt++;
1425 				hca_devp->hd_async_task_cnt++;
1426 
1427 				(void) taskq_dispatch(ibtl_async_taskq,
1428 				    ibtl_tell_client_about_new_hca, new_hcap,
1429 				    TQ_SLEEP);
1430 			}
1431 			break;
1432 		}
1433 		clntp = clntp->clnt_list_link;
1434 	}
1435 	if (clntp != NULL)
1436 		while (clntp->clnt_async_cnt > 0)
1437 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1438 	clntp = ibtl_clnt_list;
1439 	while (clntp != NULL) {
1440 		if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) &&
1441 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_CM) &&
1442 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) {
1443 			IBTF_DPRINTF_L4(ibtf_handlers,
1444 			    "ibtl_announce_new_hca: Calling %s ",
1445 			    clntp->clnt_modinfop->mi_clnt_name);
1446 			if (clntp->clnt_modinfop->mi_async_handler) {
1447 				_NOTE(NO_COMPETING_THREADS_NOW)
1448 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1449 				    KM_SLEEP);
1450 				new_hcap->nh_clntp = clntp;
1451 				new_hcap->nh_hca_devp = hca_devp;
1452 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1453 #ifndef lint
1454 				_NOTE(COMPETING_THREADS_NOW)
1455 #endif
1456 				clntp->clnt_async_cnt++;
1457 				hca_devp->hd_async_task_cnt++;
1458 
1459 				(void) taskq_dispatch(ibtl_async_taskq,
1460 				    ibtl_tell_client_about_new_hca, new_hcap,
1461 				    TQ_SLEEP);
1462 			}
1463 		}
1464 		clntp = clntp->clnt_list_link;
1465 	}
1466 
1467 	/* wait for all tasks to complete */
1468 	while (hca_devp->hd_async_task_cnt != 0)
1469 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1470 
1471 	/* wakeup thread that may be waiting to send an HCA async */
1472 	ASSERT(hca_devp->hd_async_busy == 1);
1473 	hca_devp->hd_async_busy = 0;
1474 	cv_broadcast(&hca_devp->hd_async_busy_cv);
1475 	mutex_exit(&ibtl_clnt_list_mutex);
1476 }
1477 
1478 /*
1479  * ibtl_detach_all_clients:
1480  *
1481  *	Return value - 0 for Success, 1 for Failure
1482  *
1483  *	o First detach general clients.
1484  *
1485  *	o Next detach these clients
1486  *		IBCM
1487  *		IBDM
1488  *
1489  *	o Finally, detach this client
1490  *		IBMA
1491  */
1492 int
1493 ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp)
1494 {
1495 	ib_guid_t		hcaguid = hca_devp->hd_hca_attr->hca_node_guid;
1496 	ibtl_hca_t		*ibt_hca;
1497 	ibtl_clnt_t		*clntp;
1498 	int			retval;
1499 
1500 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)",
1501 	    hcaguid);
1502 
1503 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1504 
1505 	while (hca_devp->hd_async_busy)
1506 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
1507 	hca_devp->hd_async_busy = 1;
1508 
1509 	/* First inform general clients asynchronously */
1510 	hca_devp->hd_async_event.ev_hca_guid = hcaguid;
1511 	hca_devp->hd_async_event.ev_fma_ena = 0;
1512 	hca_devp->hd_async_event.ev_chan_hdl = NULL;
1513 	hca_devp->hd_async_event.ev_cq_hdl = NULL;
1514 	hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT;
1515 
1516 	ibt_hca = hca_devp->hd_clnt_list;
1517 	while (ibt_hca != NULL) {
1518 		clntp = ibt_hca->ha_clnt_devp;
1519 		if (IBTL_GENERIC_CLIENT(clntp)) {
1520 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1521 			mutex_enter(&ibtl_async_mutex);
1522 			ibt_hca->ha_async_cnt++;
1523 			mutex_exit(&ibtl_async_mutex);
1524 			hca_devp->hd_async_task_cnt++;
1525 
1526 			(void) taskq_dispatch(ibtl_async_taskq,
1527 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1528 		}
1529 		ibt_hca = ibt_hca->ha_clnt_link;
1530 	}
1531 
1532 	/* wait for all clients to complete */
1533 	while (hca_devp->hd_async_task_cnt != 0) {
1534 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1535 	}
1536 	/* Go thru the clients and check if any have not closed this HCA. */
1537 	retval = 0;
1538 	ibt_hca = hca_devp->hd_clnt_list;
1539 	while (ibt_hca != NULL) {
1540 		clntp = ibt_hca->ha_clnt_devp;
1541 		if (IBTL_GENERIC_CLIENT(clntp)) {
1542 			IBTF_DPRINTF_L2(ibtf_handlers,
1543 			    "ibtl_detach_all_clients: "
1544 			    "client '%s' failed to close the HCA.",
1545 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1546 			retval = 1;
1547 		}
1548 		ibt_hca = ibt_hca->ha_clnt_link;
1549 	}
1550 	if (retval == 1)
1551 		goto bailout;
1552 
1553 	/* Next inform IBDM asynchronously */
1554 	ibt_hca = hca_devp->hd_clnt_list;
1555 	while (ibt_hca != NULL) {
1556 		clntp = ibt_hca->ha_clnt_devp;
1557 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1558 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1559 			mutex_enter(&ibtl_async_mutex);
1560 			ibt_hca->ha_async_cnt++;
1561 			mutex_exit(&ibtl_async_mutex);
1562 			hca_devp->hd_async_task_cnt++;
1563 
1564 			mutex_exit(&ibtl_clnt_list_mutex);
1565 			ibtl_hca_client_async_task(ibt_hca);
1566 			mutex_enter(&ibtl_clnt_list_mutex);
1567 			break;
1568 		}
1569 		ibt_hca = ibt_hca->ha_clnt_link;
1570 	}
1571 
1572 	/*
1573 	 * Next inform IBCM.
1574 	 * As IBCM doesn't perform ibt_open_hca(), IBCM will not be
1575 	 * accessible via hca_devp->hd_clnt_list.
1576 	 * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered.
1577 	 */
1578 	if (ibtl_cm_async_handler) {
1579 		ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
1580 		    ibtl_cm_clnt_private);
1581 
1582 		/* wait for all tasks to complete */
1583 		while (hca_devp->hd_async_task_cnt != 0)
1584 			cv_wait(&hca_devp->hd_async_task_cv,
1585 			    &ibtl_clnt_list_mutex);
1586 	}
1587 
1588 	/* Go thru the clients and check if any have not closed this HCA. */
1589 	retval = 0;
1590 	ibt_hca = hca_devp->hd_clnt_list;
1591 	while (ibt_hca != NULL) {
1592 		clntp = ibt_hca->ha_clnt_devp;
1593 		if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) {
1594 			IBTF_DPRINTF_L2(ibtf_handlers,
1595 			    "ibtl_detach_all_clients: "
1596 			    "client '%s' failed to close the HCA.",
1597 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1598 			retval = 1;
1599 		}
1600 		ibt_hca = ibt_hca->ha_clnt_link;
1601 	}
1602 	if (retval == 1)
1603 		goto bailout;
1604 
1605 	/* Finally, inform IBMA */
1606 	ibt_hca = hca_devp->hd_clnt_list;
1607 	while (ibt_hca != NULL) {
1608 		clntp = ibt_hca->ha_clnt_devp;
1609 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1610 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1611 			mutex_enter(&ibtl_async_mutex);
1612 			ibt_hca->ha_async_cnt++;
1613 			mutex_exit(&ibtl_async_mutex);
1614 			hca_devp->hd_async_task_cnt++;
1615 
1616 			(void) taskq_dispatch(ibtl_async_taskq,
1617 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1618 		} else
1619 			IBTF_DPRINTF_L2(ibtf_handlers,
1620 			    "ibtl_detach_all_clients: "
1621 			    "client '%s' is unexpectedly on the client list",
1622 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1623 		ibt_hca = ibt_hca->ha_clnt_link;
1624 	}
1625 
1626 	/* wait for IBMA to complete */
1627 	while (hca_devp->hd_async_task_cnt != 0) {
1628 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1629 	}
1630 
1631 	/* Check if this HCA's client list is empty. */
1632 	ibt_hca = hca_devp->hd_clnt_list;
1633 	if (ibt_hca != NULL) {
1634 		IBTF_DPRINTF_L2(ibtf_handlers,
1635 		    "ibtl_detach_all_clients: "
1636 		    "client '%s' failed to close the HCA.",
1637 		    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1638 		retval = 1;
1639 	} else
1640 		retval = 0;
1641 
1642 bailout:
1643 	if (retval) {
1644 		hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */
1645 		mutex_exit(&ibtl_clnt_list_mutex);
1646 		ibtl_announce_new_hca(hca_devp);
1647 		mutex_enter(&ibtl_clnt_list_mutex);
1648 	} else {
1649 		hca_devp->hd_async_busy = 0;
1650 		cv_broadcast(&hca_devp->hd_async_busy_cv);
1651 	}
1652 
1653 	return (retval);
1654 }
1655 
1656 void
1657 ibtl_free_clnt_async_check(ibtl_clnt_t *clntp)
1658 {
1659 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp);
1660 
1661 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1662 
1663 	/* wait for all asyncs based on "ibtl_clnt_list" to complete */
1664 	while (clntp->clnt_async_cnt != 0) {
1665 		cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1666 	}
1667 }
1668 
1669 static void
1670 ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp)
1671 {
1672 	mutex_enter(&ibtl_clnt_list_mutex);
1673 	if (--clntp->clnt_async_cnt == 0) {
1674 		cv_broadcast(&ibtl_clnt_cv);
1675 	}
1676 	mutex_exit(&ibtl_clnt_list_mutex);
1677 }
1678 
1679 static void
1680 ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp)
1681 {
1682 	mutex_enter(&ibtl_clnt_list_mutex);
1683 	++clntp->clnt_async_cnt;
1684 	mutex_exit(&ibtl_clnt_list_mutex);
1685 }
1686 
1687 
1688 /*
1689  * Functions and data structures to inform clients that a notification
1690  * has occurred about Multicast Groups that might interest them.
1691  */
1692 struct ibtl_sm_notice {
1693 	ibt_clnt_hdl_t		np_ibt_hdl;
1694 	ib_gid_t		np_sgid;
1695 	ibt_subnet_event_code_t	np_code;
1696 	ibt_subnet_event_t	np_event;
1697 };
1698 
1699 static void
1700 ibtl_sm_notice_task(void *arg)
1701 {
1702 	struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg;
1703 	ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl;
1704 	ibt_sm_notice_handler_t sm_notice_handler;
1705 
1706 	sm_notice_handler = ibt_hdl->clnt_sm_trap_handler;
1707 	if (sm_notice_handler != NULL)
1708 		sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg,
1709 		    noticep->np_sgid, noticep->np_code, &noticep->np_event);
1710 	kmem_free(noticep, sizeof (*noticep));
1711 	ibtl_dec_clnt_async_cnt(ibt_hdl);
1712 }
1713 
1714 /*
1715  * Inform the client that MCG notices are not working at this time.
1716  */
1717 void
1718 ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail)
1719 {
1720 	ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl;
1721 	struct ibtl_sm_notice *noticep;
1722 	ib_gid_t *sgidp = &ifail->smf_sgid[0];
1723 	int i;
1724 
1725 	for (i = 0; i < ifail->smf_num_sgids; i++) {
1726 		_NOTE(NO_COMPETING_THREADS_NOW)
1727 		noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1728 		noticep->np_ibt_hdl = ibt_hdl;
1729 		noticep->np_sgid = *sgidp++;
1730 		noticep->np_code = IBT_SM_EVENT_UNAVAILABLE;
1731 #ifndef lint
1732 		_NOTE(COMPETING_THREADS_NOW)
1733 #endif
1734 		ibtl_inc_clnt_async_cnt(ibt_hdl);
1735 		(void) taskq_dispatch(ibtl_async_taskq,
1736 		    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1737 	}
1738 }
1739 
1740 /*
1741  * Inform all clients of the event.
1742  */
1743 void
1744 ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code,
1745     ibt_subnet_event_t *event)
1746 {
1747 	_NOTE(NO_COMPETING_THREADS_NOW)
1748 	struct ibtl_sm_notice	*noticep;
1749 	ibtl_clnt_t		*clntp;
1750 
1751 	mutex_enter(&ibtl_clnt_list_mutex);
1752 	clntp = ibtl_clnt_list;
1753 	while (clntp != NULL) {
1754 		if (clntp->clnt_sm_trap_handler) {
1755 			noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1756 			noticep->np_ibt_hdl = clntp;
1757 			noticep->np_sgid = sgid;
1758 			noticep->np_code = code;
1759 			noticep->np_event = *event;
1760 			++clntp->clnt_async_cnt;
1761 			(void) taskq_dispatch(ibtl_async_taskq,
1762 			    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1763 		}
1764 		clntp = clntp->clnt_list_link;
1765 	}
1766 	mutex_exit(&ibtl_clnt_list_mutex);
1767 #ifndef lint
1768 	_NOTE(COMPETING_THREADS_NOW)
1769 #endif
1770 }
1771 
1772 /*
1773  * Record the handler for this client.
1774  */
1775 void
1776 ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl,
1777     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1778 {
1779 	_NOTE(NO_COMPETING_THREADS_NOW)
1780 	ibt_hdl->clnt_sm_trap_handler = sm_notice_handler;
1781 	ibt_hdl->clnt_sm_trap_handler_arg = private;
1782 #ifndef lint
1783 	_NOTE(COMPETING_THREADS_NOW)
1784 #endif
1785 }
1786 
1787 
1788 /*
1789  * ibtl_another_cq_handler_in_thread()
1790  *
1791  * Conditionally increase the number of cq_threads.
1792  * The number of threads grows, based on the number of cqs using threads.
1793  *
1794  * The table below controls the number of threads as follows:
1795  *
1796  *	Number of CQs	Number of cq_threads
1797  *		0		0
1798  *		1		1
1799  *		2-3		2
1800  *		4-5		3
1801  *		6-9		4
1802  *		10-15		5
1803  *		16-23		6
1804  *		24-31		7
1805  *		32+		8
1806  */
1807 
1808 #define	IBTL_CQ_MAXTHREADS 8
1809 static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = {
1810 	1, 2, 4, 6, 10, 16, 24, 32
1811 };
1812 
1813 static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS];
1814 
1815 void
1816 ibtl_another_cq_handler_in_thread(void)
1817 {
1818 	kthread_t *t;
1819 	int my_idx;
1820 
1821 	mutex_enter(&ibtl_cq_mutex);
1822 	if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) ||
1823 	    (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) {
1824 		mutex_exit(&ibtl_cq_mutex);
1825 		return;
1826 	}
1827 	my_idx = ibtl_cq_threads++;
1828 	mutex_exit(&ibtl_cq_mutex);
1829 	t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN,
1830 	    ibtl_pri - 1);
1831 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1832 	ibtl_cq_did[my_idx] = t->t_did;	/* save for thread_join() */
1833 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1834 }
1835 
1836 void
1837 ibtl_thread_init(void)
1838 {
1839 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()");
1840 
1841 	mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL);
1842 	cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL);
1843 	cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL);
1844 
1845 	mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL);
1846 	cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL);
1847 }
1848 
1849 void
1850 ibtl_thread_init2(void)
1851 {
1852 	int i;
1853 	static int initted = 0;
1854 	kthread_t *t;
1855 
1856 	mutex_enter(&ibtl_async_mutex);
1857 	if (initted == 1) {
1858 		mutex_exit(&ibtl_async_mutex);
1859 		return;
1860 	}
1861 	initted = 1;
1862 	mutex_exit(&ibtl_async_mutex);
1863 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1864 	ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t),
1865 	    KM_SLEEP);
1866 
1867 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()");
1868 
1869 	for (i = 0; i < ibtl_async_thread_init; i++) {
1870 		t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0,
1871 		    TS_RUN, ibtl_pri - 1);
1872 		ibtl_async_did[i] = t->t_did; /* thread_join() */
1873 	}
1874 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did))
1875 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1876 	for (i = 0; i < ibtl_cq_threads; i++) {
1877 		t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0,
1878 		    TS_RUN, ibtl_pri - 1);
1879 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1880 		ibtl_cq_did[i] = t->t_did; /* save for thread_join() */
1881 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1882 	}
1883 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1884 }
1885 
1886 void
1887 ibtl_thread_fini(void)
1888 {
1889 	int i;
1890 
1891 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()");
1892 
1893 	/* undo the work done by ibtl_thread_init() */
1894 
1895 	mutex_enter(&ibtl_cq_mutex);
1896 	ibtl_cq_thread_exit = IBTL_THREAD_EXIT;
1897 	cv_broadcast(&ibtl_cq_cv);
1898 	mutex_exit(&ibtl_cq_mutex);
1899 
1900 	mutex_enter(&ibtl_async_mutex);
1901 	ibtl_async_thread_exit = IBTL_THREAD_EXIT;
1902 	cv_broadcast(&ibtl_async_cv);
1903 	mutex_exit(&ibtl_async_mutex);
1904 
1905 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1906 	for (i = 0; i < ibtl_cq_threads; i++)
1907 		thread_join(ibtl_cq_did[i]);
1908 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
1909 
1910 	if (ibtl_async_did) {
1911 		for (i = 0; i < ibtl_async_thread_init; i++)
1912 			thread_join(ibtl_async_did[i]);
1913 
1914 		kmem_free(ibtl_async_did,
1915 		    ibtl_async_thread_init * sizeof (kt_did_t));
1916 	}
1917 	mutex_destroy(&ibtl_cq_mutex);
1918 	cv_destroy(&ibtl_cq_cv);
1919 
1920 	mutex_destroy(&ibtl_async_mutex);
1921 	cv_destroy(&ibtl_async_cv);
1922 	cv_destroy(&ibtl_clnt_cv);
1923 }
1924