xref: /illumos-gate/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c (revision 46b592853d0f4f11781b6b0a7533f267c6aee132)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ib/ibtl/impl/ibtl.h>
27 #include <sys/ib/ibtl/impl/ibtl_cm.h>
28 #include <sys/taskq.h>
29 #include <sys/disp.h>
30 #include <sys/callb.h>
31 #include <sys/proc.h>
32 
33 /*
34  * ibtl_handlers.c
35  */
36 
37 /*
38  * What's in this file?
39  *
40  *   This file started as an implementation of Asynchronous Event/Error
41  *   handling and Completion Queue handling.  As the implementation
42  *   evolved, code has been added for other ibc_* interfaces (resume,
43  *   predetach, etc.) that use the same mechanisms as used for asyncs.
44  *
45  * Async and CQ handling at interrupt level.
46  *
47  *   CQ handling is normally done at interrupt level using the CQ callback
48  *   handler to call the appropriate IBT Client (owner of the CQ).  For
49  *   clients that would prefer a fully flexible non-interrupt context to
50  *   do their CQ handling, a CQ can be created so that its handler is
51  *   called from a non-interrupt thread.  CQ handling is done frequently
52  *   whereas Async handling is expected to occur very infrequently.
53  *
54  *   Async handling is done by marking (or'ing in of an async_code of) the
55  *   pertinent IBTL data structure, and then notifying the async_thread(s)
56  *   that the data structure has async work to be done.  The notification
57  *   occurs by linking the data structure through its async_link onto a
58  *   list of like data structures and waking up an async_thread.  This
59  *   list append is not done if there is already async work pending on
60  *   this data structure (IBTL_ASYNC_PENDING).
61  *
62  * Async Mutex and CQ Mutex
63  *
64  *   The global ibtl_async_mutex is "the" mutex used to control access
65  *   to all the data needed by ibc_async_handler.  All the threads that
66  *   use this mutex are written so that the mutex is held for very short
67  *   periods of time, and never held while making calls to functions
68  *   that may block.
69  *
70  *   The global ibtl_cq_mutex is used similarly by ibc_cq_handler and
71  *   the ibtl_cq_thread(s).
72  *
73  * Mutex hierarchy
74  *
75  *   The ibtl_clnt_list_mutex is above the ibtl_async_mutex.
76  *   ibtl_clnt_list_mutex protects all of the various lists.
77  *   The ibtl_async_mutex is below this in the hierarchy.
78  *
79  *   The ibtl_cq_mutex is independent of the above mutexes.
80  *
81  * Threads
82  *
83  *   There are "ibtl_cq_threads" number of threads created for handling
84  *   Completion Queues in threads.  If this feature really gets used,
85  *   then we will want to do some suitable tuning.  Similarly, we may
86  *   want to tune the number of "ibtl_async_thread_init".
87  *
88  *   The function ibtl_cq_thread is the main loop for handling a CQ in a
89  *   thread.  There can be multiple threads executing this same code.
90  *   The code sleeps when there is no work to be done (list is empty),
91  *   otherwise it pulls the first CQ structure off the list and performs
92  *   the CQ handler callback to the client.  After that returns, a check
93  *   is made, and if another ibc_cq_handler call was made for this CQ,
94  *   the client is called again.
95  *
96  *   The function ibtl_async_thread is the main loop for handling async
97  *   events/errors.  There can be multiple threads executing this same code.
98  *   The code sleeps when there is no work to be done (lists are empty),
99  *   otherwise it pulls the first structure off one of the lists and
100  *   performs the async callback(s) to the client(s).  Note that HCA
101  *   async handling is done by calling each of the clients using the HCA.
102  *   When the async handling completes, the data structure having the async
103  *   event/error is checked for more work before it's considered "done".
104  *
105  * Taskq
106  *
107  *   The async_taskq is used here for allowing async handler callbacks to
108  *   occur simultaneously to multiple clients of an HCA.  This taskq could
109  *   be used for other purposes, e.g., if all the async_threads are in
110  *   use, but this is deemed as overkill since asyncs should occur rarely.
111  */
112 
113 /* Globals */
114 static char ibtf_handlers[] = "ibtl_handlers";
115 
116 /* priority for IBTL threads (async, cq, and taskq) */
117 static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */
118 
119 /* taskq used for HCA asyncs */
120 #define	ibtl_async_taskq system_taskq
121 
122 /* data for async handling by threads */
123 static kmutex_t ibtl_async_mutex;	/* protects most *_async_* data */
124 static kcondvar_t ibtl_async_cv;	/* async_threads wait on this */
125 static kcondvar_t ibtl_clnt_cv;		/* ibt_detach might wait on this */
126 static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp);
127 static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp);
128 
129 static kt_did_t *ibtl_async_did;	/* for thread_join() */
130 int ibtl_async_thread_init = 4;	/* total # of async_threads to create */
131 static int ibtl_async_thread_exit = 0;	/* set if/when thread(s) should exit */
132 
133 /* async lists for various structures */
134 static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end;
135 static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end;
136 static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end;
137 static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end;
138 static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end;
139 
140 /* data for CQ completion handling by threads */
141 static kmutex_t ibtl_cq_mutex;	/* protects the cv and the list below */
142 static kcondvar_t ibtl_cq_cv;
143 static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end;
144 
145 static int ibtl_cq_threads = 0;		/* total # of cq threads */
146 static int ibtl_cqs_using_threads = 0;	/* total # of cqs using threads */
147 static int ibtl_cq_thread_exit = 0;	/* set if/when thread(s) should exit */
148 
149 /* value used to tell IBTL threads to exit */
150 #define	IBTL_THREAD_EXIT 0x1b7fdead	/* IBTF DEAD */
151 /* Cisco Topspin Vendor ID for Rereg hack */
152 #define	IBT_VENDOR_CISCO 0x05ad
153 
154 int ibtl_eec_not_supported = 1;
155 
156 char *ibtl_last_client_name;	/* may help debugging */
157 typedef ibt_status_t (*ibtl_node_info_cb_t)(ib_guid_t, uint8_t, ib_lid_t,
158     ibt_node_info_t *);
159 
160 ibtl_node_info_cb_t ibtl_node_info_cb;
161 
162 _NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex))
163 
164 void
165 ibtl_cm_set_node_info_cb(ibt_status_t (*node_info_cb)(ib_guid_t, uint8_t,
166     ib_lid_t, ibt_node_info_t *))
167 {
168 	mutex_enter(&ibtl_clnt_list_mutex);
169 	ibtl_node_info_cb = node_info_cb;
170 	mutex_exit(&ibtl_clnt_list_mutex);
171 }
172 
173 /*
174  * ibc_async_handler()
175  *
176  * Asynchronous Event/Error Handler.
177  *
178  *	This is the function called HCA drivers to post various async
179  *	event and errors mention in the IB architecture spec.  See
180  *	ibtl_types.h for additional details of this.
181  *
182  *	This function marks the pertinent IBTF object with the async_code,
183  *	and queues the object for handling by an ibtl_async_thread.  If
184  *	the object is NOT already marked for async processing, it is added
185  *	to the associated list for that type of object, and an
186  *	ibtl_async_thread is signaled to finish the async work.
187  */
188 void
189 ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code,
190     ibc_async_event_t *event_p)
191 {
192 	ibtl_qp_t	*ibtl_qp;
193 	ibtl_cq_t	*ibtl_cq;
194 	ibtl_srq_t	*ibtl_srq;
195 	ibtl_eec_t	*ibtl_eec;
196 	uint8_t		port_minus1;
197 
198 	ibtl_async_port_event_t	*portp;
199 
200 	IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)",
201 	    hca_devp, code, event_p);
202 
203 	mutex_enter(&ibtl_async_mutex);
204 
205 	switch (code) {
206 	case IBT_EVENT_PATH_MIGRATED_QP:
207 	case IBT_EVENT_SQD:
208 	case IBT_ERROR_CATASTROPHIC_QP:
209 	case IBT_ERROR_PATH_MIGRATE_REQ_QP:
210 	case IBT_EVENT_COM_EST_QP:
211 	case IBT_ERROR_INVALID_REQUEST_QP:
212 	case IBT_ERROR_ACCESS_VIOLATION_QP:
213 	case IBT_EVENT_EMPTY_QP:
214 		ibtl_qp = event_p->ev_qp_hdl;
215 		if (ibtl_qp == NULL) {
216 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
217 			    "bad qp handle");
218 			break;
219 		}
220 		switch (code) {
221 		case IBT_ERROR_CATASTROPHIC_QP:
222 			ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break;
223 		case IBT_ERROR_PATH_MIGRATE_REQ_QP:
224 			ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break;
225 		case IBT_ERROR_INVALID_REQUEST_QP:
226 			ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break;
227 		case IBT_ERROR_ACCESS_VIOLATION_QP:
228 			ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break;
229 		}
230 
231 		ibtl_qp->qp_async_codes |= code;
232 		if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) {
233 			ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING;
234 			ibtl_qp->qp_async_link = NULL;
235 			if (ibtl_async_qp_list_end == NULL)
236 				ibtl_async_qp_list_start = ibtl_qp;
237 			else
238 				ibtl_async_qp_list_end->qp_async_link = ibtl_qp;
239 			ibtl_async_qp_list_end = ibtl_qp;
240 			cv_signal(&ibtl_async_cv);
241 		}
242 		break;
243 
244 	case IBT_ERROR_CQ:
245 		ibtl_cq = event_p->ev_cq_hdl;
246 		if (ibtl_cq == NULL) {
247 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
248 			    "bad cq handle");
249 			break;
250 		}
251 		ibtl_cq->cq_async_codes |= code;
252 		ibtl_cq->cq_fma_ena = event_p->ev_fma_ena;
253 		if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) {
254 			ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING;
255 			ibtl_cq->cq_async_link = NULL;
256 			if (ibtl_async_cq_list_end == NULL)
257 				ibtl_async_cq_list_start = ibtl_cq;
258 			else
259 				ibtl_async_cq_list_end->cq_async_link = ibtl_cq;
260 			ibtl_async_cq_list_end = ibtl_cq;
261 			cv_signal(&ibtl_async_cv);
262 		}
263 		break;
264 
265 	case IBT_ERROR_CATASTROPHIC_SRQ:
266 	case IBT_EVENT_LIMIT_REACHED_SRQ:
267 		ibtl_srq = event_p->ev_srq_hdl;
268 		if (ibtl_srq == NULL) {
269 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
270 			    "bad srq handle");
271 			break;
272 		}
273 		ibtl_srq->srq_async_codes |= code;
274 		ibtl_srq->srq_fma_ena = event_p->ev_fma_ena;
275 		if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) {
276 			ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING;
277 			ibtl_srq->srq_async_link = NULL;
278 			if (ibtl_async_srq_list_end == NULL)
279 				ibtl_async_srq_list_start = ibtl_srq;
280 			else
281 				ibtl_async_srq_list_end->srq_async_link =
282 				    ibtl_srq;
283 			ibtl_async_srq_list_end = ibtl_srq;
284 			cv_signal(&ibtl_async_cv);
285 		}
286 		break;
287 
288 	case IBT_EVENT_PATH_MIGRATED_EEC:
289 	case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
290 	case IBT_ERROR_CATASTROPHIC_EEC:
291 	case IBT_EVENT_COM_EST_EEC:
292 		if (ibtl_eec_not_supported) {
293 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
294 			    "EEC events are disabled.");
295 			break;
296 		}
297 		ibtl_eec = event_p->ev_eec_hdl;
298 		if (ibtl_eec == NULL) {
299 			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
300 			    "bad eec handle");
301 			break;
302 		}
303 		switch (code) {
304 		case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
305 			ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break;
306 		case IBT_ERROR_CATASTROPHIC_EEC:
307 			ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break;
308 		}
309 		ibtl_eec->eec_async_codes |= code;
310 		if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) {
311 			ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING;
312 			ibtl_eec->eec_async_link = NULL;
313 			if (ibtl_async_eec_list_end == NULL)
314 				ibtl_async_eec_list_start = ibtl_eec;
315 			else
316 				ibtl_async_eec_list_end->eec_async_link =
317 				    ibtl_eec;
318 			ibtl_async_eec_list_end = ibtl_eec;
319 			cv_signal(&ibtl_async_cv);
320 		}
321 		break;
322 
323 	case IBT_ERROR_LOCAL_CATASTROPHIC:
324 		hca_devp->hd_async_codes |= code;
325 		hca_devp->hd_fma_ena = event_p->ev_fma_ena;
326 		/* FALLTHROUGH */
327 
328 	case IBT_EVENT_PORT_UP:
329 	case IBT_PORT_CHANGE_EVENT:
330 	case IBT_CLNT_REREG_EVENT:
331 	case IBT_ERROR_PORT_DOWN:
332 		if ((code & IBT_PORT_EVENTS) != 0) {
333 			if ((port_minus1 = event_p->ev_port - 1) >=
334 			    hca_devp->hd_hca_attr->hca_nports) {
335 				IBTF_DPRINTF_L2(ibtf_handlers,
336 				    "ibc_async_handler: bad port #: %d",
337 				    event_p->ev_port);
338 				break;
339 			}
340 			portp = &hca_devp->hd_async_port[port_minus1];
341 			if (code == IBT_EVENT_PORT_UP) {
342 				/*
343 				 * The port is just coming UP we can't have any
344 				 * valid older events.
345 				 */
346 				portp->status = IBTL_HCA_PORT_UP;
347 			} else if (code == IBT_ERROR_PORT_DOWN) {
348 				/*
349 				 * The port is going DOWN older events don't
350 				 * count.
351 				 */
352 				portp->status = IBTL_HCA_PORT_DOWN;
353 			} else if (code == IBT_PORT_CHANGE_EVENT) {
354 				/*
355 				 * For port UP and DOWN events only the latest
356 				 * event counts. If we get a UP after DOWN it
357 				 * is sufficient to send just UP and vice versa.
358 				 * In the case of port CHANGE event it is valid
359 				 * only when the port is UP already but if we
360 				 * receive it after UP but before UP is
361 				 * delivered we still need to deliver CHANGE
362 				 * after we deliver UP event.
363 				 *
364 				 * We will not get a CHANGE event when the port
365 				 * is down or DOWN event is pending.
366 				 */
367 				portp->flags |= event_p->ev_port_flags;
368 				portp->status |= IBTL_HCA_PORT_CHG;
369 			} else if (code == IBT_CLNT_REREG_EVENT) {
370 				/*
371 				 * SM has requested a re-register of
372 				 * subscription to SM events notification.
373 				 */
374 				portp->status |= IBTL_HCA_PORT_ASYNC_CLNT_REREG;
375 			}
376 
377 			hca_devp->hd_async_codes |= code;
378 		}
379 
380 		if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) {
381 			hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING;
382 			hca_devp->hd_async_link = NULL;
383 			if (ibtl_async_hca_list_end == NULL)
384 				ibtl_async_hca_list_start = hca_devp;
385 			else
386 				ibtl_async_hca_list_end->hd_async_link =
387 				    hca_devp;
388 			ibtl_async_hca_list_end = hca_devp;
389 			cv_signal(&ibtl_async_cv);
390 		}
391 
392 		break;
393 
394 	default:
395 		IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: "
396 		    "invalid code (0x%x)", code);
397 	}
398 
399 	mutex_exit(&ibtl_async_mutex);
400 }
401 
402 
403 /* Finally, make the async call to the client. */
404 
405 static void
406 ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code,
407     ibt_async_event_t *event_p)
408 {
409 	ibtl_clnt_t		*clntp;
410 	void			*client_private;
411 	ibt_async_handler_t	async_handler;
412 	char			*client_name;
413 
414 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)",
415 	    ibt_hca, code, event_p);
416 
417 	clntp = ibt_hca->ha_clnt_devp;
418 
419 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
420 	/* Record who is being called (just a debugging aid) */
421 	ibtl_last_client_name = client_name = clntp->clnt_name;
422 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
423 
424 	client_private = clntp->clnt_private;
425 	async_handler = clntp->clnt_modinfop->mi_async_handler;
426 
427 	if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) {
428 		mutex_enter(&ibtl_clnt_list_mutex);
429 		async_handler = ibtl_cm_async_handler;
430 		client_private = ibtl_cm_clnt_private;
431 		mutex_exit(&ibtl_clnt_list_mutex);
432 		ibt_hca = NULL;
433 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
434 		    "calling CM for COM_EST");
435 	} else {
436 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
437 		    "calling client '%s'", client_name);
438 	}
439 	if (async_handler != NULL)
440 		async_handler(client_private, ibt_hca, code, event_p);
441 	else
442 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
443 		    "client '%s' has no async handler", client_name);
444 }
445 
446 /*
447  * Inform CM or DM about HCA events.
448  *
449  *	We use taskqs to allow simultaneous notification, with sleeping.
450  *	Since taskqs only allow one argument, we define a structure
451  *	because we need to pass in more than one argument.
452  */
453 
454 struct ibtl_mgr_s {
455 	ibtl_hca_devinfo_t	*mgr_hca_devp;
456 	ibt_async_handler_t	mgr_async_handler;
457 	void			*mgr_clnt_private;
458 };
459 
460 /*
461  * Asyncs of HCA level events for CM and DM.  Call CM or DM and tell them
462  * about the HCA for the event recorded in the ibtl_hca_devinfo_t.
463  */
464 static void
465 ibtl_do_mgr_async_task(void *arg)
466 {
467 	struct ibtl_mgr_s	*mgrp = (struct ibtl_mgr_s *)arg;
468 	ibtl_hca_devinfo_t	*hca_devp = mgrp->mgr_hca_devp;
469 
470 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)",
471 	    hca_devp->hd_async_code);
472 
473 	mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL,
474 	    hca_devp->hd_async_code, &hca_devp->hd_async_event);
475 	kmem_free(mgrp, sizeof (*mgrp));
476 
477 	mutex_enter(&ibtl_clnt_list_mutex);
478 	if (--hca_devp->hd_async_task_cnt == 0)
479 		cv_signal(&hca_devp->hd_async_task_cv);
480 	mutex_exit(&ibtl_clnt_list_mutex);
481 }
482 
483 static void
484 ibt_cisco_embedded_sm_rereg_fix(void *arg)
485 {
486 	struct ibtl_mgr_s *mgrp = arg;
487 	ibtl_hca_devinfo_t *hca_devp;
488 	ibt_node_info_t node_info;
489 	ibt_status_t ibt_status;
490 	ibtl_async_port_event_t *portp;
491 	ib_lid_t sm_lid;
492 	ib_guid_t hca_guid;
493 	ibt_async_event_t *event_p;
494 	ibt_hca_portinfo_t *pinfop;
495 	uint8_t	port;
496 
497 	hca_devp = mgrp->mgr_hca_devp;
498 
499 	mutex_enter(&ibtl_clnt_list_mutex);
500 	event_p = &hca_devp->hd_async_event;
501 	port = event_p->ev_port;
502 	portp = &hca_devp->hd_async_port[port - 1];
503 	pinfop = &hca_devp->hd_portinfop[port - 1];
504 	sm_lid = pinfop->p_sm_lid;
505 	hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
506 	mutex_exit(&ibtl_clnt_list_mutex);
507 
508 	ibt_status = ((ibtl_node_info_cb_t)mgrp->mgr_async_handler)(hca_guid,
509 	    port, sm_lid, &node_info);
510 	if (ibt_status == IBT_SUCCESS) {
511 		if ((node_info.n_vendor_id == IBT_VENDOR_CISCO) &&
512 		    (node_info.n_node_type == IBT_NODE_TYPE_SWITCH)) {
513 			mutex_enter(&ibtl_async_mutex);
514 			portp->status |= IBTL_HCA_PORT_ASYNC_CLNT_REREG;
515 			hca_devp->hd_async_codes |= IBT_CLNT_REREG_EVENT;
516 			mutex_exit(&ibtl_async_mutex);
517 		}
518 	}
519 	kmem_free(mgrp, sizeof (*mgrp));
520 
521 	mutex_enter(&ibtl_clnt_list_mutex);
522 	if (--hca_devp->hd_async_task_cnt == 0)
523 		cv_signal(&hca_devp->hd_async_task_cv);
524 	mutex_exit(&ibtl_clnt_list_mutex);
525 }
526 
527 static void
528 ibtl_cm_get_node_info(ibtl_hca_devinfo_t *hca_devp,
529     ibt_async_handler_t async_handler)
530 {
531 	struct ibtl_mgr_s *mgrp;
532 
533 	if (async_handler == NULL)
534 		return;
535 
536 	_NOTE(NO_COMPETING_THREADS_NOW)
537 	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
538 	mgrp->mgr_hca_devp = hca_devp;
539 	mgrp->mgr_async_handler = async_handler;
540 	mgrp->mgr_clnt_private = NULL;
541 	hca_devp->hd_async_task_cnt++;
542 
543 	(void) taskq_dispatch(ibtl_async_taskq,
544 	    ibt_cisco_embedded_sm_rereg_fix, mgrp, TQ_SLEEP);
545 #ifndef lint
546 	_NOTE(COMPETING_THREADS_NOW)
547 #endif
548 }
549 
550 static void
551 ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler,
552     void *clnt_private)
553 {
554 	struct ibtl_mgr_s *mgrp;
555 
556 	if (async_handler == NULL)
557 		return;
558 
559 	_NOTE(NO_COMPETING_THREADS_NOW)
560 	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
561 	mgrp->mgr_hca_devp = hca_devp;
562 	mgrp->mgr_async_handler = async_handler;
563 	mgrp->mgr_clnt_private = clnt_private;
564 	hca_devp->hd_async_task_cnt++;
565 
566 	(void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp,
567 	    TQ_SLEEP);
568 #ifndef lint
569 	_NOTE(COMPETING_THREADS_NOW)
570 #endif
571 }
572 
573 /*
574  * Per client-device asyncs for HCA level events.  Call each client that is
575  * using the HCA for the event recorded in the ibtl_hca_devinfo_t.
576  */
577 static void
578 ibtl_hca_client_async_task(void *arg)
579 {
580 	ibtl_hca_t		*ibt_hca = (ibtl_hca_t *)arg;
581 	ibtl_hca_devinfo_t	*hca_devp = ibt_hca->ha_hca_devp;
582 	ibtl_clnt_t		*clntp = ibt_hca->ha_clnt_devp;
583 	ibt_async_event_t	async_event;
584 
585 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)",
586 	    ibt_hca, hca_devp->hd_async_code);
587 
588 	bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event));
589 	ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event);
590 
591 	mutex_enter(&ibtl_async_mutex);
592 	if (--ibt_hca->ha_async_cnt == 0 &&
593 	    (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) {
594 		mutex_exit(&ibtl_async_mutex);
595 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
596 	} else
597 		mutex_exit(&ibtl_async_mutex);
598 
599 	mutex_enter(&ibtl_clnt_list_mutex);
600 	if (--hca_devp->hd_async_task_cnt == 0)
601 		cv_signal(&hca_devp->hd_async_task_cv);
602 	if (--clntp->clnt_async_cnt == 0)
603 		cv_broadcast(&ibtl_clnt_cv);
604 
605 	mutex_exit(&ibtl_clnt_list_mutex);
606 }
607 
608 /*
609  * Asyncs for HCA level events.
610  *
611  * The function continues to run until there are no more async
612  * events/errors for this HCA.  An event is chosen for dispatch
613  * to all clients of this HCA.  This thread dispatches them via
614  * the ibtl_async_taskq, then sleeps until all tasks are done.
615  *
616  * This thread records the async_code and async_event in the
617  * ibtl_hca_devinfo_t for all client taskq threads to reference.
618  *
619  * This is called from an async or taskq thread with ibtl_async_mutex held.
620  */
621 static void
622 ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
623 {
624 	ibtl_hca_t			*ibt_hca;
625 	ibt_async_event_t		*eventp;
626 	ibt_async_code_t		code;
627 	ibtl_async_port_status_t  	temp;
628 	uint8_t				nports;
629 	uint8_t				port_minus1;
630 	ibtl_async_port_event_t		*portp;
631 
632 	mutex_exit(&ibtl_async_mutex);
633 
634 	mutex_enter(&ibtl_clnt_list_mutex);
635 	while (hca_devp->hd_async_busy)
636 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
637 	hca_devp->hd_async_busy = 1;
638 	mutex_enter(&ibtl_async_mutex);
639 
640 	bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event));
641 	for (;;) {
642 
643 		hca_devp->hd_async_event.ev_fma_ena = 0;
644 
645 		code = hca_devp->hd_async_codes;
646 		if (code & IBT_ERROR_LOCAL_CATASTROPHIC) {
647 			code = IBT_ERROR_LOCAL_CATASTROPHIC;
648 			hca_devp->hd_async_event.ev_fma_ena =
649 			    hca_devp->hd_fma_ena;
650 		} else if (code & IBT_ERROR_PORT_DOWN) {
651 			code = IBT_ERROR_PORT_DOWN;
652 			temp = IBTL_HCA_PORT_DOWN;
653 		} else if (code & IBT_EVENT_PORT_UP) {
654 			code = IBT_EVENT_PORT_UP;
655 			temp = IBTL_HCA_PORT_UP;
656 		} else if (code & IBT_PORT_CHANGE_EVENT) {
657 			code = IBT_PORT_CHANGE_EVENT;
658 			temp = IBTL_HCA_PORT_CHG;
659 		} else if (code & IBT_CLNT_REREG_EVENT) {
660 			code = IBT_CLNT_REREG_EVENT;
661 			temp = IBTL_HCA_PORT_ASYNC_CLNT_REREG;
662 		} else {
663 			hca_devp->hd_async_codes = 0;
664 			code = 0;
665 		}
666 
667 		if (code == 0) {
668 			hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING;
669 			break;
670 		}
671 		hca_devp->hd_async_codes &= ~code;
672 
673 		/* PORT_UP, PORT_CHANGE, PORT_DOWN or ASYNC_REREG */
674 		if ((code & IBT_PORT_EVENTS) != 0) {
675 			portp = hca_devp->hd_async_port;
676 			nports = hca_devp->hd_hca_attr->hca_nports;
677 			for (port_minus1 = 0; port_minus1 < nports;
678 			    port_minus1++) {
679 				/*
680 				 * Matching event in this port, let's go handle
681 				 * it.
682 				 */
683 				if ((portp[port_minus1].status & temp) != 0)
684 					break;
685 			}
686 			if (port_minus1 >= nports) {
687 				/* we checked again, but found nothing */
688 				continue;
689 			}
690 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: "
691 			    "async: port# %x code %x", port_minus1 + 1, code);
692 			/* mark it to check for other ports after we're done */
693 			hca_devp->hd_async_codes |= code;
694 
695 			/*
696 			 * Copy the event information into hca_devp and clear
697 			 * event information from the per port data.
698 			 */
699 			hca_devp->hd_async_event.ev_port = port_minus1 + 1;
700 			if (temp == IBTL_HCA_PORT_CHG) {
701 				hca_devp->hd_async_event.ev_port_flags =
702 				    hca_devp->hd_async_port[port_minus1].flags;
703 				hca_devp->hd_async_port[port_minus1].flags = 0;
704 			}
705 			hca_devp->hd_async_port[port_minus1].status &= ~temp;
706 
707 			mutex_exit(&ibtl_async_mutex);
708 			ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1);
709 			mutex_enter(&ibtl_async_mutex);
710 			eventp = &hca_devp->hd_async_event;
711 			eventp->ev_hca_guid =
712 			    hca_devp->hd_hca_attr->hca_node_guid;
713 		}
714 
715 		hca_devp->hd_async_code = code;
716 		hca_devp->hd_async_event.ev_hca_guid =
717 		    hca_devp->hd_hca_attr->hca_node_guid;
718 		mutex_exit(&ibtl_async_mutex);
719 
720 		/*
721 		 * Make sure to inform CM, DM, and IBMA if we know of them.
722 		 * Also, make sure not to inform them a second time, which
723 		 * would occur if they have the HCA open.
724 		 */
725 
726 		if (ibtl_ibma_async_handler)
727 			ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler,
728 			    ibtl_ibma_clnt_private);
729 		/* wait for all tasks to complete */
730 		while (hca_devp->hd_async_task_cnt != 0)
731 			cv_wait(&hca_devp->hd_async_task_cv,
732 			    &ibtl_clnt_list_mutex);
733 
734 		/*
735 		 * Hack Alert:
736 		 * The ibmf handler would have updated the Master SM LID if it
737 		 * was SM LID change event. Now lets check if the new Master SM
738 		 * is a Embedded Cisco Topspin SM.
739 		 */
740 		if ((code == IBT_PORT_CHANGE_EVENT) &&
741 		    eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID)
742 			ibtl_cm_get_node_info(hca_devp,
743 			    (ibt_async_handler_t)ibtl_node_info_cb);
744 		/* wait for node info task to complete */
745 		while (hca_devp->hd_async_task_cnt != 0)
746 			cv_wait(&hca_devp->hd_async_task_cv,
747 			    &ibtl_clnt_list_mutex);
748 
749 		if (ibtl_dm_async_handler)
750 			ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler,
751 			    ibtl_dm_clnt_private);
752 		if (ibtl_cm_async_handler)
753 			ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
754 			    ibtl_cm_clnt_private);
755 		/* wait for all tasks to complete */
756 		while (hca_devp->hd_async_task_cnt != 0)
757 			cv_wait(&hca_devp->hd_async_task_cv,
758 			    &ibtl_clnt_list_mutex);
759 
760 		for (ibt_hca = hca_devp->hd_clnt_list;
761 		    ibt_hca != NULL;
762 		    ibt_hca = ibt_hca->ha_clnt_link) {
763 
764 			/* Managers are handled above */
765 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
766 			    ibtl_cm_async_handler)
767 				continue;
768 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
769 			    ibtl_dm_async_handler)
770 				continue;
771 			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
772 			    ibtl_ibma_async_handler)
773 				continue;
774 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
775 
776 			mutex_enter(&ibtl_async_mutex);
777 			ibt_hca->ha_async_cnt++;
778 			mutex_exit(&ibtl_async_mutex);
779 			hca_devp->hd_async_task_cnt++;
780 			(void) taskq_dispatch(ibtl_async_taskq,
781 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
782 		}
783 
784 		/* wait for all tasks to complete */
785 		while (hca_devp->hd_async_task_cnt != 0)
786 			cv_wait(&hca_devp->hd_async_task_cv,
787 			    &ibtl_clnt_list_mutex);
788 
789 		mutex_enter(&ibtl_async_mutex);
790 	}
791 	hca_devp->hd_async_code = 0;
792 	hca_devp->hd_async_busy = 0;
793 	cv_broadcast(&hca_devp->hd_async_busy_cv);
794 	mutex_exit(&ibtl_clnt_list_mutex);
795 }
796 
797 /*
798  * Asyncs for QP objects.
799  *
800  * The function continues to run until there are no more async
801  * events/errors for this object.
802  */
803 static void
804 ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp)
805 {
806 	ibt_async_code_t	code;
807 	ibt_async_event_t	async_event;
808 
809 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
810 	bzero(&async_event, sizeof (async_event));
811 	async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp);
812 
813 	while ((code = ibtl_qp->qp_async_codes) != 0) {
814 		async_event.ev_fma_ena = 0;
815 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT)
816 			code = 0;	/* fallthrough to "kmem_free" */
817 		else if (code & IBT_ERROR_CATASTROPHIC_QP) {
818 			code = IBT_ERROR_CATASTROPHIC_QP;
819 			async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena;
820 		} else if (code & IBT_ERROR_INVALID_REQUEST_QP) {
821 			code = IBT_ERROR_INVALID_REQUEST_QP;
822 			async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena;
823 		} else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) {
824 			code = IBT_ERROR_ACCESS_VIOLATION_QP;
825 			async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena;
826 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) {
827 			code = IBT_ERROR_PATH_MIGRATE_REQ_QP;
828 			async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena;
829 		} else if (code & IBT_EVENT_PATH_MIGRATED_QP)
830 			code = IBT_EVENT_PATH_MIGRATED_QP;
831 		else if (code & IBT_EVENT_SQD)
832 			code = IBT_EVENT_SQD;
833 		else if (code & IBT_EVENT_COM_EST_QP)
834 			code = IBT_EVENT_COM_EST_QP;
835 		else if (code & IBT_EVENT_EMPTY_QP)
836 			code = IBT_EVENT_EMPTY_QP;
837 		else {
838 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: "
839 			    "async: unexpected QP async code 0x%x", code);
840 			ibtl_qp->qp_async_codes = 0;
841 			code = 0;
842 		}
843 		ibtl_qp->qp_async_codes &= ~code;
844 
845 		if (code) {
846 			mutex_exit(&ibtl_async_mutex);
847 			ibtl_async_client_call(ibtl_qp->qp_hca,
848 			    code, &async_event);
849 			mutex_enter(&ibtl_async_mutex);
850 		}
851 
852 		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) {
853 			mutex_exit(&ibtl_async_mutex);
854 			cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
855 			mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
856 			kmem_free(IBTL_QP2CHAN(ibtl_qp),
857 			    sizeof (ibtl_channel_t));
858 			mutex_enter(&ibtl_async_mutex);
859 			return;
860 		}
861 	}
862 	ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING;
863 }
864 
865 /*
866  * Asyncs for SRQ objects.
867  *
868  * The function continues to run until there are no more async
869  * events/errors for this object.
870  */
871 static void
872 ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq)
873 {
874 	ibt_async_code_t	code;
875 	ibt_async_event_t	async_event;
876 
877 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
878 	bzero(&async_event, sizeof (async_event));
879 	async_event.ev_srq_hdl = ibtl_srq;
880 	async_event.ev_fma_ena = ibtl_srq->srq_fma_ena;
881 
882 	while ((code = ibtl_srq->srq_async_codes) != 0) {
883 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT)
884 			code = 0;	/* fallthrough to "kmem_free" */
885 		else if (code & IBT_ERROR_CATASTROPHIC_SRQ)
886 			code = IBT_ERROR_CATASTROPHIC_SRQ;
887 		else if (code & IBT_EVENT_LIMIT_REACHED_SRQ)
888 			code = IBT_EVENT_LIMIT_REACHED_SRQ;
889 		else {
890 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: "
891 			    "async: unexpected SRQ async code 0x%x", code);
892 			ibtl_srq->srq_async_codes = 0;
893 			code = 0;
894 		}
895 		ibtl_srq->srq_async_codes &= ~code;
896 
897 		if (code) {
898 			mutex_exit(&ibtl_async_mutex);
899 			ibtl_async_client_call(ibtl_srq->srq_hca,
900 			    code, &async_event);
901 			mutex_enter(&ibtl_async_mutex);
902 		}
903 
904 		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
905 			mutex_exit(&ibtl_async_mutex);
906 			kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
907 			mutex_enter(&ibtl_async_mutex);
908 			return;
909 		}
910 	}
911 	ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING;
912 }
913 
914 /*
915  * Asyncs for CQ objects.
916  *
917  * The function continues to run until there are no more async
918  * events/errors for this object.
919  */
920 static void
921 ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq)
922 {
923 	ibt_async_code_t	code;
924 	ibt_async_event_t	async_event;
925 
926 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
927 	bzero(&async_event, sizeof (async_event));
928 	async_event.ev_cq_hdl = ibtl_cq;
929 	async_event.ev_fma_ena = ibtl_cq->cq_fma_ena;
930 
931 	while ((code = ibtl_cq->cq_async_codes) != 0) {
932 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT)
933 			code = 0;	/* fallthrough to "kmem_free" */
934 		else if (code & IBT_ERROR_CQ)
935 			code = IBT_ERROR_CQ;
936 		else {
937 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: "
938 			    "async: unexpected CQ async code 0x%x", code);
939 			ibtl_cq->cq_async_codes = 0;
940 			code = 0;
941 		}
942 		ibtl_cq->cq_async_codes &= ~code;
943 
944 		if (code) {
945 			mutex_exit(&ibtl_async_mutex);
946 			ibtl_async_client_call(ibtl_cq->cq_hca,
947 			    code, &async_event);
948 			mutex_enter(&ibtl_async_mutex);
949 		}
950 
951 		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
952 			mutex_exit(&ibtl_async_mutex);
953 			mutex_destroy(&ibtl_cq->cq_mutex);
954 			kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
955 			mutex_enter(&ibtl_async_mutex);
956 			return;
957 		}
958 	}
959 	ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING;
960 }
961 
962 /*
963  * Asyncs for EEC objects.
964  *
965  * The function continues to run until there are no more async
966  * events/errors for this object.
967  */
968 static void
969 ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec)
970 {
971 	ibt_async_code_t	code;
972 	ibt_async_event_t	async_event;
973 
974 	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
975 	bzero(&async_event, sizeof (async_event));
976 	async_event.ev_chan_hdl = ibtl_eec->eec_channel;
977 
978 	while ((code = ibtl_eec->eec_async_codes) != 0) {
979 		async_event.ev_fma_ena = 0;
980 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT)
981 			code = 0;	/* fallthrough to "kmem_free" */
982 		else if (code & IBT_ERROR_CATASTROPHIC_EEC) {
983 			code = IBT_ERROR_CATASTROPHIC_CHAN;
984 			async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena;
985 		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) {
986 			code = IBT_ERROR_PATH_MIGRATE_REQ;
987 			async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena;
988 		} else if (code & IBT_EVENT_PATH_MIGRATED_EEC)
989 			code = IBT_EVENT_PATH_MIGRATED;
990 		else if (code & IBT_EVENT_COM_EST_EEC)
991 			code = IBT_EVENT_COM_EST;
992 		else {
993 			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: "
994 			    "async: unexpected code 0x%x", code);
995 			ibtl_eec->eec_async_codes = 0;
996 			code = 0;
997 		}
998 		ibtl_eec->eec_async_codes &= ~code;
999 
1000 		if (code) {
1001 			mutex_exit(&ibtl_async_mutex);
1002 			ibtl_async_client_call(ibtl_eec->eec_hca,
1003 			    code, &async_event);
1004 			mutex_enter(&ibtl_async_mutex);
1005 		}
1006 
1007 		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) {
1008 			mutex_exit(&ibtl_async_mutex);
1009 			kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
1010 			mutex_enter(&ibtl_async_mutex);
1011 			return;
1012 		}
1013 	}
1014 	ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING;
1015 }
1016 
1017 #ifdef __lock_lint
1018 kmutex_t cpr_mutex;
1019 #endif
1020 
1021 /*
1022  * Loop forever, calling async_handlers until all of the async lists
1023  * are empty.
1024  */
1025 
1026 static void
1027 ibtl_async_thread(void)
1028 {
1029 #ifndef __lock_lint
1030 	kmutex_t cpr_mutex;
1031 #endif
1032 	callb_cpr_t	cprinfo;
1033 
1034 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
1035 	_NOTE(NO_COMPETING_THREADS_NOW)
1036 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
1037 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
1038 	    "ibtl_async_thread");
1039 #ifndef lint
1040 	_NOTE(COMPETING_THREADS_NOW)
1041 #endif
1042 
1043 	mutex_enter(&ibtl_async_mutex);
1044 
1045 	for (;;) {
1046 		if (ibtl_async_hca_list_start) {
1047 			ibtl_hca_devinfo_t *hca_devp;
1048 
1049 			/* remove first entry from list */
1050 			hca_devp = ibtl_async_hca_list_start;
1051 			ibtl_async_hca_list_start = hca_devp->hd_async_link;
1052 			hca_devp->hd_async_link = NULL;
1053 			if (ibtl_async_hca_list_start == NULL)
1054 				ibtl_async_hca_list_end = NULL;
1055 
1056 			ibtl_do_hca_asyncs(hca_devp);
1057 
1058 		} else if (ibtl_async_qp_list_start) {
1059 			ibtl_qp_t *ibtl_qp;
1060 
1061 			/* remove from list */
1062 			ibtl_qp = ibtl_async_qp_list_start;
1063 			ibtl_async_qp_list_start = ibtl_qp->qp_async_link;
1064 			ibtl_qp->qp_async_link = NULL;
1065 			if (ibtl_async_qp_list_start == NULL)
1066 				ibtl_async_qp_list_end = NULL;
1067 
1068 			ibtl_do_qp_asyncs(ibtl_qp);
1069 
1070 		} else if (ibtl_async_srq_list_start) {
1071 			ibtl_srq_t *ibtl_srq;
1072 
1073 			/* remove from list */
1074 			ibtl_srq = ibtl_async_srq_list_start;
1075 			ibtl_async_srq_list_start = ibtl_srq->srq_async_link;
1076 			ibtl_srq->srq_async_link = NULL;
1077 			if (ibtl_async_srq_list_start == NULL)
1078 				ibtl_async_srq_list_end = NULL;
1079 
1080 			ibtl_do_srq_asyncs(ibtl_srq);
1081 
1082 		} else if (ibtl_async_eec_list_start) {
1083 			ibtl_eec_t *ibtl_eec;
1084 
1085 			/* remove from list */
1086 			ibtl_eec = ibtl_async_eec_list_start;
1087 			ibtl_async_eec_list_start = ibtl_eec->eec_async_link;
1088 			ibtl_eec->eec_async_link = NULL;
1089 			if (ibtl_async_eec_list_start == NULL)
1090 				ibtl_async_eec_list_end = NULL;
1091 
1092 			ibtl_do_eec_asyncs(ibtl_eec);
1093 
1094 		} else if (ibtl_async_cq_list_start) {
1095 			ibtl_cq_t *ibtl_cq;
1096 
1097 			/* remove from list */
1098 			ibtl_cq = ibtl_async_cq_list_start;
1099 			ibtl_async_cq_list_start = ibtl_cq->cq_async_link;
1100 			ibtl_cq->cq_async_link = NULL;
1101 			if (ibtl_async_cq_list_start == NULL)
1102 				ibtl_async_cq_list_end = NULL;
1103 
1104 			ibtl_do_cq_asyncs(ibtl_cq);
1105 
1106 		} else {
1107 			if (ibtl_async_thread_exit == IBTL_THREAD_EXIT)
1108 				break;
1109 			mutex_enter(&cpr_mutex);
1110 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1111 			mutex_exit(&cpr_mutex);
1112 
1113 			cv_wait(&ibtl_async_cv, &ibtl_async_mutex);
1114 
1115 			mutex_exit(&ibtl_async_mutex);
1116 			mutex_enter(&cpr_mutex);
1117 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
1118 			mutex_exit(&cpr_mutex);
1119 			mutex_enter(&ibtl_async_mutex);
1120 		}
1121 	}
1122 
1123 	mutex_exit(&ibtl_async_mutex);
1124 
1125 #ifndef __lock_lint
1126 	mutex_enter(&cpr_mutex);
1127 	CALLB_CPR_EXIT(&cprinfo);
1128 #endif
1129 	mutex_destroy(&cpr_mutex);
1130 }
1131 
1132 
1133 void
1134 ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp)
1135 {
1136 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp);
1137 
1138 	mutex_enter(&ibtl_async_mutex);
1139 
1140 	/*
1141 	 * If there is an active async, mark this object to be freed
1142 	 * by the async_thread when it's done.
1143 	 */
1144 	if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) {
1145 		ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1146 		mutex_exit(&ibtl_async_mutex);
1147 	} else {	/* free the object now */
1148 		mutex_exit(&ibtl_async_mutex);
1149 		cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
1150 		mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
1151 		kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t));
1152 	}
1153 }
1154 
1155 void
1156 ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq)
1157 {
1158 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq);
1159 
1160 	mutex_enter(&ibtl_async_mutex);
1161 
1162 	/* if there is an active async, mark this object to be freed */
1163 	if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) {
1164 		ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1165 		mutex_exit(&ibtl_async_mutex);
1166 	} else {	/* free the object now */
1167 		mutex_exit(&ibtl_async_mutex);
1168 		mutex_destroy(&ibtl_cq->cq_mutex);
1169 		kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
1170 	}
1171 }
1172 
1173 void
1174 ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq)
1175 {
1176 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)",
1177 	    ibtl_srq);
1178 
1179 	mutex_enter(&ibtl_async_mutex);
1180 
1181 	/* if there is an active async, mark this object to be freed */
1182 	if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) {
1183 		ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1184 		mutex_exit(&ibtl_async_mutex);
1185 	} else {	/* free the object now */
1186 		mutex_exit(&ibtl_async_mutex);
1187 		kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
1188 	}
1189 }
1190 
1191 void
1192 ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec)
1193 {
1194 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)",
1195 	    ibtl_eec);
1196 
1197 	mutex_enter(&ibtl_async_mutex);
1198 
1199 	/* if there is an active async, mark this object to be freed */
1200 	if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) {
1201 		ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1202 		mutex_exit(&ibtl_async_mutex);
1203 	} else {	/* free the object now */
1204 		mutex_exit(&ibtl_async_mutex);
1205 		kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
1206 	}
1207 }
1208 
1209 /*
1210  * This function differs from above in that we assume this is called
1211  * from non-interrupt context, and never called from the async_thread.
1212  */
1213 
1214 void
1215 ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca)
1216 {
1217 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)",
1218 	    ibt_hca);
1219 
1220 	mutex_enter(&ibtl_async_mutex);
1221 
1222 	/* if there is an active async, mark this object to be freed */
1223 	if (ibt_hca->ha_async_cnt > 0) {
1224 		ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT;
1225 		mutex_exit(&ibtl_async_mutex);
1226 	} else {	/* free the object now */
1227 		mutex_exit(&ibtl_async_mutex);
1228 		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
1229 	}
1230 }
1231 
1232 /*
1233  * Completion Queue Handling.
1234  *
1235  *	A completion queue can be handled through a simple callback
1236  *	at interrupt level, or it may be queued for an ibtl_cq_thread
1237  *	to handle.  The latter is chosen during ibt_alloc_cq when the
1238  *	IBTF_CQ_HANDLER_IN_THREAD is specified.
1239  */
1240 
1241 static void
1242 ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq)
1243 {
1244 	ibt_cq_handler_t	cq_handler;
1245 	void			*arg;
1246 
1247 	IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq);
1248 
1249 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
1250 	cq_handler = ibtl_cq->cq_comp_handler;
1251 	arg = ibtl_cq->cq_arg;
1252 	if (cq_handler != NULL)
1253 		cq_handler(ibtl_cq, arg);
1254 	else
1255 		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: "
1256 		    "no cq_handler for cq %p", ibtl_cq);
1257 }
1258 
1259 /*
1260  * Before ibt_free_cq can continue, we need to ensure no more cq_handler
1261  * callbacks can occur.  When we get the mutex, we know there are no
1262  * outstanding cq_handler callbacks.  We set the cq_handler to NULL to
1263  * prohibit future callbacks.
1264  */
1265 void
1266 ibtl_free_cq_check(ibtl_cq_t *ibtl_cq)
1267 {
1268 	mutex_enter(&ibtl_cq->cq_mutex);
1269 	ibtl_cq->cq_comp_handler = NULL;
1270 	mutex_exit(&ibtl_cq->cq_mutex);
1271 	if (ibtl_cq->cq_in_thread) {
1272 		mutex_enter(&ibtl_cq_mutex);
1273 		--ibtl_cqs_using_threads;
1274 		while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) {
1275 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1276 			ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE;
1277 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1278 		}
1279 		mutex_exit(&ibtl_cq_mutex);
1280 	}
1281 }
1282 
1283 /*
1284  * Loop forever, calling cq_handlers until the cq list
1285  * is empty.
1286  */
1287 
1288 static void
1289 ibtl_cq_thread(void)
1290 {
1291 #ifndef __lock_lint
1292 	kmutex_t cpr_mutex;
1293 #endif
1294 	callb_cpr_t	cprinfo;
1295 
1296 	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
1297 	_NOTE(NO_COMPETING_THREADS_NOW)
1298 	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
1299 	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
1300 	    "ibtl_cq_thread");
1301 #ifndef lint
1302 	_NOTE(COMPETING_THREADS_NOW)
1303 #endif
1304 
1305 	mutex_enter(&ibtl_cq_mutex);
1306 
1307 	for (;;) {
1308 		if (ibtl_cq_list_start) {
1309 			ibtl_cq_t *ibtl_cq;
1310 
1311 			ibtl_cq = ibtl_cq_list_start;
1312 			ibtl_cq_list_start = ibtl_cq->cq_link;
1313 			ibtl_cq->cq_link = NULL;
1314 			if (ibtl_cq == ibtl_cq_list_end)
1315 				ibtl_cq_list_end = NULL;
1316 
1317 			while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) {
1318 				ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
1319 				mutex_exit(&ibtl_cq_mutex);
1320 				ibtl_cq_handler_call(ibtl_cq);
1321 				mutex_enter(&ibtl_cq_mutex);
1322 			}
1323 			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING;
1324 			if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE)
1325 				cv_broadcast(&ibtl_cq_cv);
1326 		} else {
1327 			if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT)
1328 				break;
1329 			mutex_enter(&cpr_mutex);
1330 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1331 			mutex_exit(&cpr_mutex);
1332 
1333 			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
1334 
1335 			mutex_exit(&ibtl_cq_mutex);
1336 			mutex_enter(&cpr_mutex);
1337 			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
1338 			mutex_exit(&cpr_mutex);
1339 			mutex_enter(&ibtl_cq_mutex);
1340 		}
1341 	}
1342 
1343 	mutex_exit(&ibtl_cq_mutex);
1344 #ifndef __lock_lint
1345 	mutex_enter(&cpr_mutex);
1346 	CALLB_CPR_EXIT(&cprinfo);
1347 #endif
1348 	mutex_destroy(&cpr_mutex);
1349 }
1350 
1351 
1352 /*
1353  * ibc_cq_handler()
1354  *
1355  *    Completion Queue Notification Handler.
1356  *
1357  */
1358 /*ARGSUSED*/
1359 void
1360 ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq)
1361 {
1362 	IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)",
1363 	    ibc_hdl, ibtl_cq);
1364 
1365 	if (ibtl_cq->cq_in_thread) {
1366 		mutex_enter(&ibtl_cq_mutex);
1367 		ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT;
1368 		if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) {
1369 			ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING;
1370 			ibtl_cq->cq_link = NULL;
1371 			if (ibtl_cq_list_end == NULL)
1372 				ibtl_cq_list_start = ibtl_cq;
1373 			else
1374 				ibtl_cq_list_end->cq_link = ibtl_cq;
1375 			ibtl_cq_list_end = ibtl_cq;
1376 			cv_signal(&ibtl_cq_cv);
1377 		}
1378 		mutex_exit(&ibtl_cq_mutex);
1379 		return;
1380 	} else
1381 		ibtl_cq_handler_call(ibtl_cq);
1382 }
1383 
1384 
1385 /*
1386  * ibt_enable_cq_notify()
1387  *      Enable Notification requests on the specified CQ.
1388  *
1389  *      ibt_cq          The CQ handle.
1390  *
1391  *      notify_type     Enable notifications for all (IBT_NEXT_COMPLETION)
1392  *                      completions, or the next Solicited completion
1393  *                      (IBT_NEXT_SOLICITED) only.
1394  *
1395  *	Completion notifications are disabled by setting the completion
1396  *	handler to NULL by calling ibt_set_cq_handler().
1397  */
1398 ibt_status_t
1399 ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type)
1400 {
1401 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)",
1402 	    ibtl_cq, notify_type);
1403 
1404 	return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq(
1405 	    IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type));
1406 }
1407 
1408 
1409 /*
1410  * ibt_set_cq_handler()
1411  *      Register a work request completion handler with the IBTF.
1412  *
1413  *      ibt_cq                  The CQ handle.
1414  *
1415  *      completion_handler      The completion handler.
1416  *
1417  *      arg                     The IBTF client private argument to be passed
1418  *                              back to the client when calling the CQ
1419  *                              completion handler.
1420  *
1421  *	Completion notifications are disabled by setting the completion
1422  *	handler to NULL.  When setting the handler to NULL, no additional
1423  *	calls to the previous CQ handler will be initiated, but there may
1424  *	be one in progress.
1425  *
1426  *      This function does not otherwise change the state of previous
1427  *      calls to ibt_enable_cq_notify().
1428  */
1429 void
1430 ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler,
1431     void *arg)
1432 {
1433 	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)",
1434 	    ibtl_cq, completion_handler, arg);
1435 
1436 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
1437 	ibtl_cq->cq_comp_handler = completion_handler;
1438 	ibtl_cq->cq_arg = arg;
1439 }
1440 
1441 
1442 /*
1443  * Inform IBT clients about New HCAs.
1444  *
1445  *	We use taskqs to allow simultaneous notification, with sleeping.
1446  *	Since taskqs only allow one argument, we define a structure
1447  *	because we need to pass in two arguments.
1448  */
1449 
1450 struct ibtl_new_hca_s {
1451 	ibtl_clnt_t		*nh_clntp;
1452 	ibtl_hca_devinfo_t	*nh_hca_devp;
1453 	ibt_async_code_t	nh_code;
1454 };
1455 
1456 static void
1457 ibtl_tell_client_about_new_hca(void *arg)
1458 {
1459 	struct ibtl_new_hca_s	*new_hcap = (struct ibtl_new_hca_s *)arg;
1460 	ibtl_clnt_t		*clntp = new_hcap->nh_clntp;
1461 	ibt_async_event_t	async_event;
1462 	ibtl_hca_devinfo_t	*hca_devp = new_hcap->nh_hca_devp;
1463 
1464 	bzero(&async_event, sizeof (async_event));
1465 	async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
1466 	clntp->clnt_modinfop->mi_async_handler(
1467 	    clntp->clnt_private, NULL, new_hcap->nh_code, &async_event);
1468 	kmem_free(new_hcap, sizeof (*new_hcap));
1469 #ifdef __lock_lint
1470 	{
1471 		ibt_hca_hdl_t hca_hdl;
1472 		(void) ibt_open_hca(clntp, 0ULL, &hca_hdl);
1473 	}
1474 #endif
1475 	mutex_enter(&ibtl_clnt_list_mutex);
1476 	if (--hca_devp->hd_async_task_cnt == 0)
1477 		cv_signal(&hca_devp->hd_async_task_cv);
1478 	if (--clntp->clnt_async_cnt == 0)
1479 		cv_broadcast(&ibtl_clnt_cv);
1480 	mutex_exit(&ibtl_clnt_list_mutex);
1481 }
1482 
1483 /*
1484  * ibtl_announce_new_hca:
1485  *
1486  *	o First attach these clients in the given order
1487  *		IBMA
1488  *		IBCM
1489  *
1490  *	o Next attach all other clients in parallel.
1491  *
1492  * NOTE: Use the taskq to simultaneously notify all clients of the new HCA.
1493  * Retval from clients is ignored.
1494  */
1495 void
1496 ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp)
1497 {
1498 	ibtl_clnt_t		*clntp;
1499 	struct ibtl_new_hca_s	*new_hcap;
1500 
1501 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)",
1502 	    hca_devp, hca_devp->hd_hca_attr->hca_node_guid);
1503 
1504 	mutex_enter(&ibtl_clnt_list_mutex);
1505 
1506 	clntp = ibtl_clnt_list;
1507 	while (clntp != NULL) {
1508 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1509 			IBTF_DPRINTF_L4(ibtf_handlers,
1510 			    "ibtl_announce_new_hca: calling IBMF");
1511 			if (clntp->clnt_modinfop->mi_async_handler) {
1512 				_NOTE(NO_COMPETING_THREADS_NOW)
1513 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1514 				    KM_SLEEP);
1515 				new_hcap->nh_clntp = clntp;
1516 				new_hcap->nh_hca_devp = hca_devp;
1517 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1518 #ifndef lint
1519 				_NOTE(COMPETING_THREADS_NOW)
1520 #endif
1521 				clntp->clnt_async_cnt++;
1522 				hca_devp->hd_async_task_cnt++;
1523 
1524 				(void) taskq_dispatch(ibtl_async_taskq,
1525 				    ibtl_tell_client_about_new_hca, new_hcap,
1526 				    TQ_SLEEP);
1527 			}
1528 			break;
1529 		}
1530 		clntp = clntp->clnt_list_link;
1531 	}
1532 	if (clntp != NULL)
1533 		while (clntp->clnt_async_cnt > 0)
1534 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1535 	clntp = ibtl_clnt_list;
1536 	while (clntp != NULL) {
1537 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1538 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1539 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1540 			if (clntp->clnt_modinfop->mi_async_handler) {
1541 				_NOTE(NO_COMPETING_THREADS_NOW)
1542 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1543 				    KM_SLEEP);
1544 				new_hcap->nh_clntp = clntp;
1545 				new_hcap->nh_hca_devp = hca_devp;
1546 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1547 #ifndef lint
1548 				_NOTE(COMPETING_THREADS_NOW)
1549 #endif
1550 				clntp->clnt_async_cnt++;
1551 				hca_devp->hd_async_task_cnt++;
1552 
1553 				mutex_exit(&ibtl_clnt_list_mutex);
1554 				(void) ibtl_tell_client_about_new_hca(
1555 				    new_hcap);
1556 				mutex_enter(&ibtl_clnt_list_mutex);
1557 			}
1558 			break;
1559 		}
1560 		clntp = clntp->clnt_list_link;
1561 	}
1562 
1563 	clntp = ibtl_clnt_list;
1564 	while (clntp != NULL) {
1565 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) {
1566 			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
1567 			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
1568 			if (clntp->clnt_modinfop->mi_async_handler) {
1569 				_NOTE(NO_COMPETING_THREADS_NOW)
1570 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1571 				    KM_SLEEP);
1572 				new_hcap->nh_clntp = clntp;
1573 				new_hcap->nh_hca_devp = hca_devp;
1574 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1575 #ifndef lint
1576 				_NOTE(COMPETING_THREADS_NOW)
1577 #endif
1578 				clntp->clnt_async_cnt++;
1579 				hca_devp->hd_async_task_cnt++;
1580 
1581 				(void) taskq_dispatch(ibtl_async_taskq,
1582 				    ibtl_tell_client_about_new_hca, new_hcap,
1583 				    TQ_SLEEP);
1584 			}
1585 			break;
1586 		}
1587 		clntp = clntp->clnt_list_link;
1588 	}
1589 	if (clntp != NULL)
1590 		while (clntp->clnt_async_cnt > 0)
1591 			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1592 	clntp = ibtl_clnt_list;
1593 	while (clntp != NULL) {
1594 		if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) &&
1595 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_CM) &&
1596 		    (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) {
1597 			IBTF_DPRINTF_L4(ibtf_handlers,
1598 			    "ibtl_announce_new_hca: Calling %s ",
1599 			    clntp->clnt_modinfop->mi_clnt_name);
1600 			if (clntp->clnt_modinfop->mi_async_handler) {
1601 				_NOTE(NO_COMPETING_THREADS_NOW)
1602 				new_hcap = kmem_alloc(sizeof (*new_hcap),
1603 				    KM_SLEEP);
1604 				new_hcap->nh_clntp = clntp;
1605 				new_hcap->nh_hca_devp = hca_devp;
1606 				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
1607 #ifndef lint
1608 				_NOTE(COMPETING_THREADS_NOW)
1609 #endif
1610 				clntp->clnt_async_cnt++;
1611 				hca_devp->hd_async_task_cnt++;
1612 
1613 				(void) taskq_dispatch(ibtl_async_taskq,
1614 				    ibtl_tell_client_about_new_hca, new_hcap,
1615 				    TQ_SLEEP);
1616 			}
1617 		}
1618 		clntp = clntp->clnt_list_link;
1619 	}
1620 
1621 	/* wait for all tasks to complete */
1622 	while (hca_devp->hd_async_task_cnt != 0)
1623 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1624 
1625 	/* wakeup thread that may be waiting to send an HCA async */
1626 	ASSERT(hca_devp->hd_async_busy == 1);
1627 	hca_devp->hd_async_busy = 0;
1628 	cv_broadcast(&hca_devp->hd_async_busy_cv);
1629 	mutex_exit(&ibtl_clnt_list_mutex);
1630 }
1631 
1632 /*
1633  * ibtl_detach_all_clients:
1634  *
1635  *	Return value - 0 for Success, 1 for Failure
1636  *
1637  *	o First detach general clients.
1638  *
1639  *	o Next detach these clients
1640  *		IBCM
1641  *		IBDM
1642  *
1643  *	o Finally, detach this client
1644  *		IBMA
1645  */
1646 int
1647 ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp)
1648 {
1649 	ib_guid_t		hcaguid = hca_devp->hd_hca_attr->hca_node_guid;
1650 	ibtl_hca_t		*ibt_hca;
1651 	ibtl_clnt_t		*clntp;
1652 	int			retval;
1653 
1654 	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)",
1655 	    hcaguid);
1656 
1657 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1658 
1659 	while (hca_devp->hd_async_busy)
1660 		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
1661 	hca_devp->hd_async_busy = 1;
1662 
1663 	/* First inform general clients asynchronously */
1664 	hca_devp->hd_async_event.ev_hca_guid = hcaguid;
1665 	hca_devp->hd_async_event.ev_fma_ena = 0;
1666 	hca_devp->hd_async_event.ev_chan_hdl = NULL;
1667 	hca_devp->hd_async_event.ev_cq_hdl = NULL;
1668 	hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT;
1669 
1670 	ibt_hca = hca_devp->hd_clnt_list;
1671 	while (ibt_hca != NULL) {
1672 		clntp = ibt_hca->ha_clnt_devp;
1673 		if (IBTL_GENERIC_CLIENT(clntp)) {
1674 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1675 			mutex_enter(&ibtl_async_mutex);
1676 			ibt_hca->ha_async_cnt++;
1677 			mutex_exit(&ibtl_async_mutex);
1678 			hca_devp->hd_async_task_cnt++;
1679 
1680 			(void) taskq_dispatch(ibtl_async_taskq,
1681 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1682 		}
1683 		ibt_hca = ibt_hca->ha_clnt_link;
1684 	}
1685 
1686 	/* wait for all clients to complete */
1687 	while (hca_devp->hd_async_task_cnt != 0) {
1688 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1689 	}
1690 	/* Go thru the clients and check if any have not closed this HCA. */
1691 	retval = 0;
1692 	ibt_hca = hca_devp->hd_clnt_list;
1693 	while (ibt_hca != NULL) {
1694 		clntp = ibt_hca->ha_clnt_devp;
1695 		if (IBTL_GENERIC_CLIENT(clntp)) {
1696 			IBTF_DPRINTF_L2(ibtf_handlers,
1697 			    "ibtl_detach_all_clients: "
1698 			    "client '%s' failed to close the HCA.",
1699 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1700 			retval = 1;
1701 		}
1702 		ibt_hca = ibt_hca->ha_clnt_link;
1703 	}
1704 	if (retval == 1)
1705 		goto bailout;
1706 
1707 	/* Next inform IBDM asynchronously */
1708 	ibt_hca = hca_devp->hd_clnt_list;
1709 	while (ibt_hca != NULL) {
1710 		clntp = ibt_hca->ha_clnt_devp;
1711 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
1712 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1713 			mutex_enter(&ibtl_async_mutex);
1714 			ibt_hca->ha_async_cnt++;
1715 			mutex_exit(&ibtl_async_mutex);
1716 			hca_devp->hd_async_task_cnt++;
1717 
1718 			mutex_exit(&ibtl_clnt_list_mutex);
1719 			ibtl_hca_client_async_task(ibt_hca);
1720 			mutex_enter(&ibtl_clnt_list_mutex);
1721 			break;
1722 		}
1723 		ibt_hca = ibt_hca->ha_clnt_link;
1724 	}
1725 
1726 	/*
1727 	 * Next inform IBCM.
1728 	 * As IBCM doesn't perform ibt_open_hca(), IBCM will not be
1729 	 * accessible via hca_devp->hd_clnt_list.
1730 	 * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered.
1731 	 */
1732 	if (ibtl_cm_async_handler) {
1733 		ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
1734 		    ibtl_cm_clnt_private);
1735 
1736 		/* wait for all tasks to complete */
1737 		while (hca_devp->hd_async_task_cnt != 0)
1738 			cv_wait(&hca_devp->hd_async_task_cv,
1739 			    &ibtl_clnt_list_mutex);
1740 	}
1741 
1742 	/* Go thru the clients and check if any have not closed this HCA. */
1743 	retval = 0;
1744 	ibt_hca = hca_devp->hd_clnt_list;
1745 	while (ibt_hca != NULL) {
1746 		clntp = ibt_hca->ha_clnt_devp;
1747 		if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) {
1748 			IBTF_DPRINTF_L2(ibtf_handlers,
1749 			    "ibtl_detach_all_clients: "
1750 			    "client '%s' failed to close the HCA.",
1751 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1752 			retval = 1;
1753 		}
1754 		ibt_hca = ibt_hca->ha_clnt_link;
1755 	}
1756 	if (retval == 1)
1757 		goto bailout;
1758 
1759 	/* Finally, inform IBMA */
1760 	ibt_hca = hca_devp->hd_clnt_list;
1761 	while (ibt_hca != NULL) {
1762 		clntp = ibt_hca->ha_clnt_devp;
1763 		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
1764 			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
1765 			mutex_enter(&ibtl_async_mutex);
1766 			ibt_hca->ha_async_cnt++;
1767 			mutex_exit(&ibtl_async_mutex);
1768 			hca_devp->hd_async_task_cnt++;
1769 
1770 			(void) taskq_dispatch(ibtl_async_taskq,
1771 			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
1772 		} else
1773 			IBTF_DPRINTF_L2(ibtf_handlers,
1774 			    "ibtl_detach_all_clients: "
1775 			    "client '%s' is unexpectedly on the client list",
1776 			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1777 		ibt_hca = ibt_hca->ha_clnt_link;
1778 	}
1779 
1780 	/* wait for IBMA to complete */
1781 	while (hca_devp->hd_async_task_cnt != 0) {
1782 		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
1783 	}
1784 
1785 	/* Check if this HCA's client list is empty. */
1786 	ibt_hca = hca_devp->hd_clnt_list;
1787 	if (ibt_hca != NULL) {
1788 		IBTF_DPRINTF_L2(ibtf_handlers,
1789 		    "ibtl_detach_all_clients: "
1790 		    "client '%s' failed to close the HCA.",
1791 		    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
1792 		retval = 1;
1793 	} else
1794 		retval = 0;
1795 
1796 bailout:
1797 	if (retval) {
1798 		hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */
1799 		mutex_exit(&ibtl_clnt_list_mutex);
1800 		ibtl_announce_new_hca(hca_devp);
1801 		mutex_enter(&ibtl_clnt_list_mutex);
1802 	} else {
1803 		hca_devp->hd_async_busy = 0;
1804 		cv_broadcast(&hca_devp->hd_async_busy_cv);
1805 	}
1806 
1807 	return (retval);
1808 }
1809 
1810 void
1811 ibtl_free_clnt_async_check(ibtl_clnt_t *clntp)
1812 {
1813 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp);
1814 
1815 	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));
1816 
1817 	/* wait for all asyncs based on "ibtl_clnt_list" to complete */
1818 	while (clntp->clnt_async_cnt != 0) {
1819 		cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
1820 	}
1821 }
1822 
1823 static void
1824 ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp)
1825 {
1826 	mutex_enter(&ibtl_clnt_list_mutex);
1827 	if (--clntp->clnt_async_cnt == 0) {
1828 		cv_broadcast(&ibtl_clnt_cv);
1829 	}
1830 	mutex_exit(&ibtl_clnt_list_mutex);
1831 }
1832 
1833 static void
1834 ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp)
1835 {
1836 	mutex_enter(&ibtl_clnt_list_mutex);
1837 	++clntp->clnt_async_cnt;
1838 	mutex_exit(&ibtl_clnt_list_mutex);
1839 }
1840 
1841 
1842 /*
1843  * Functions and data structures to inform clients that a notification
1844  * has occurred about Multicast Groups that might interest them.
1845  */
1846 struct ibtl_sm_notice {
1847 	ibt_clnt_hdl_t		np_ibt_hdl;
1848 	ib_gid_t		np_sgid;
1849 	ibt_subnet_event_code_t	np_code;
1850 	ibt_subnet_event_t	np_event;
1851 };
1852 
1853 static void
1854 ibtl_sm_notice_task(void *arg)
1855 {
1856 	struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg;
1857 	ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl;
1858 	ibt_sm_notice_handler_t sm_notice_handler;
1859 
1860 	sm_notice_handler = ibt_hdl->clnt_sm_trap_handler;
1861 	if (sm_notice_handler != NULL)
1862 		sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg,
1863 		    noticep->np_sgid, noticep->np_code, &noticep->np_event);
1864 	kmem_free(noticep, sizeof (*noticep));
1865 	ibtl_dec_clnt_async_cnt(ibt_hdl);
1866 }
1867 
1868 /*
1869  * Inform the client that MCG notices are not working at this time.
1870  */
1871 void
1872 ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail)
1873 {
1874 	ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl;
1875 	struct ibtl_sm_notice *noticep;
1876 	ib_gid_t *sgidp = &ifail->smf_sgid[0];
1877 	int i;
1878 
1879 	for (i = 0; i < ifail->smf_num_sgids; i++) {
1880 		_NOTE(NO_COMPETING_THREADS_NOW)
1881 		noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1882 		noticep->np_ibt_hdl = ibt_hdl;
1883 		noticep->np_sgid = *sgidp++;
1884 		noticep->np_code = IBT_SM_EVENT_UNAVAILABLE;
1885 #ifndef lint
1886 		_NOTE(COMPETING_THREADS_NOW)
1887 #endif
1888 		ibtl_inc_clnt_async_cnt(ibt_hdl);
1889 		(void) taskq_dispatch(ibtl_async_taskq,
1890 		    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1891 	}
1892 }
1893 
1894 /*
1895  * Inform all clients of the event.
1896  */
1897 void
1898 ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code,
1899     ibt_subnet_event_t *event)
1900 {
1901 	_NOTE(NO_COMPETING_THREADS_NOW)
1902 	struct ibtl_sm_notice	*noticep;
1903 	ibtl_clnt_t		*clntp;
1904 
1905 	mutex_enter(&ibtl_clnt_list_mutex);
1906 	clntp = ibtl_clnt_list;
1907 	while (clntp != NULL) {
1908 		if (clntp->clnt_sm_trap_handler) {
1909 			noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
1910 			noticep->np_ibt_hdl = clntp;
1911 			noticep->np_sgid = sgid;
1912 			noticep->np_code = code;
1913 			noticep->np_event = *event;
1914 			++clntp->clnt_async_cnt;
1915 			(void) taskq_dispatch(ibtl_async_taskq,
1916 			    ibtl_sm_notice_task, noticep, TQ_SLEEP);
1917 		}
1918 		clntp = clntp->clnt_list_link;
1919 	}
1920 	mutex_exit(&ibtl_clnt_list_mutex);
1921 #ifndef lint
1922 	_NOTE(COMPETING_THREADS_NOW)
1923 #endif
1924 }
1925 
1926 /*
1927  * Record the handler for this client.
1928  */
1929 void
1930 ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl,
1931     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1932 {
1933 	_NOTE(NO_COMPETING_THREADS_NOW)
1934 	ibt_hdl->clnt_sm_trap_handler = sm_notice_handler;
1935 	ibt_hdl->clnt_sm_trap_handler_arg = private;
1936 #ifndef lint
1937 	_NOTE(COMPETING_THREADS_NOW)
1938 #endif
1939 }
1940 
1941 
1942 /*
1943  * ibtl_another_cq_handler_in_thread()
1944  *
1945  * Conditionally increase the number of cq_threads.
1946  * The number of threads grows, based on the number of cqs using threads.
1947  *
1948  * The table below controls the number of threads as follows:
1949  *
1950  *	Number of CQs	Number of cq_threads
1951  *		0		0
1952  *		1		1
1953  *		2-3		2
1954  *		4-5		3
1955  *		6-9		4
1956  *		10-15		5
1957  *		16-23		6
1958  *		24-31		7
1959  *		32+		8
1960  */
1961 
1962 #define	IBTL_CQ_MAXTHREADS 8
1963 static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = {
1964 	1, 2, 4, 6, 10, 16, 24, 32
1965 };
1966 
1967 static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS];
1968 
1969 void
1970 ibtl_another_cq_handler_in_thread(void)
1971 {
1972 	kthread_t *t;
1973 	int my_idx;
1974 
1975 	mutex_enter(&ibtl_cq_mutex);
1976 	if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) ||
1977 	    (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) {
1978 		mutex_exit(&ibtl_cq_mutex);
1979 		return;
1980 	}
1981 	my_idx = ibtl_cq_threads++;
1982 	mutex_exit(&ibtl_cq_mutex);
1983 	t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN,
1984 	    ibtl_pri - 1);
1985 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1986 	ibtl_cq_did[my_idx] = t->t_did;	/* save for thread_join() */
1987 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
1988 }
1989 
1990 void
1991 ibtl_thread_init(void)
1992 {
1993 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()");
1994 
1995 	mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL);
1996 	cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL);
1997 	cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL);
1998 
1999 	mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL);
2000 	cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL);
2001 }
2002 
2003 void
2004 ibtl_thread_init2(void)
2005 {
2006 	int i;
2007 	static int initted = 0;
2008 	kthread_t *t;
2009 
2010 	mutex_enter(&ibtl_async_mutex);
2011 	if (initted == 1) {
2012 		mutex_exit(&ibtl_async_mutex);
2013 		return;
2014 	}
2015 	initted = 1;
2016 	mutex_exit(&ibtl_async_mutex);
2017 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did))
2018 	ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t),
2019 	    KM_SLEEP);
2020 
2021 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()");
2022 
2023 	for (i = 0; i < ibtl_async_thread_init; i++) {
2024 		t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0,
2025 		    TS_RUN, ibtl_pri - 1);
2026 		ibtl_async_did[i] = t->t_did; /* thread_join() */
2027 	}
2028 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did))
2029 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
2030 	for (i = 0; i < ibtl_cq_threads; i++) {
2031 		t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0,
2032 		    TS_RUN, ibtl_pri - 1);
2033 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
2034 		ibtl_cq_did[i] = t->t_did; /* save for thread_join() */
2035 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
2036 	}
2037 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
2038 }
2039 
2040 void
2041 ibtl_thread_fini(void)
2042 {
2043 	int i;
2044 
2045 	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()");
2046 
2047 	/* undo the work done by ibtl_thread_init() */
2048 
2049 	mutex_enter(&ibtl_cq_mutex);
2050 	ibtl_cq_thread_exit = IBTL_THREAD_EXIT;
2051 	cv_broadcast(&ibtl_cq_cv);
2052 	mutex_exit(&ibtl_cq_mutex);
2053 
2054 	mutex_enter(&ibtl_async_mutex);
2055 	ibtl_async_thread_exit = IBTL_THREAD_EXIT;
2056 	cv_broadcast(&ibtl_async_cv);
2057 	mutex_exit(&ibtl_async_mutex);
2058 
2059 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
2060 	for (i = 0; i < ibtl_cq_threads; i++)
2061 		thread_join(ibtl_cq_did[i]);
2062 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
2063 
2064 	if (ibtl_async_did) {
2065 		for (i = 0; i < ibtl_async_thread_init; i++)
2066 			thread_join(ibtl_async_did[i]);
2067 
2068 		kmem_free(ibtl_async_did,
2069 		    ibtl_async_thread_init * sizeof (kt_did_t));
2070 	}
2071 	mutex_destroy(&ibtl_cq_mutex);
2072 	cv_destroy(&ibtl_cq_cv);
2073 
2074 	mutex_destroy(&ibtl_async_mutex);
2075 	cv_destroy(&ibtl_async_cv);
2076 	cv_destroy(&ibtl_clnt_cv);
2077 }
2078 
2079 /* ARGSUSED */
2080 ibt_status_t ibtl_dummy_node_info_cb(ib_guid_t hca_guid, uint8_t port,
2081     ib_lid_t lid, ibt_node_info_t *node_info)
2082 {
2083 	return (IBT_SUCCESS);
2084 }
2085