/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/ib/ibtl/impl/ibtl.h>
#include <sys/ib/ibtl/impl/ibtl_cm.h>
#include <sys/taskq.h>
#include <sys/disp.h>
#include <sys/callb.h>
#include <sys/proc.h>

/*
 * ibtl_handlers.c
 */

/*
 * What's in this file?
 *
 *   This file started as an implementation of Asynchronous Event/Error
 *   handling and Completion Queue handling.  As the implementation
 *   evolved, code has been added for other ibc_* interfaces (resume,
 *   predetach, etc.) that use the same mechanisms as used for asyncs.
 *
 * Async and CQ handling at interrupt level.
 *
 *   CQ handling is normally done at interrupt level using the CQ callback
 *   handler to call the appropriate IBT Client (owner of the CQ).  For
 *   clients that would prefer a fully flexible non-interrupt context to
 *   do their CQ handling, a CQ can be created so that its handler is
 *   called from a non-interrupt thread.  CQ handling is done frequently
 *   whereas Async handling is expected to occur very infrequently.
 *
 *   Async handling is done by marking (or'ing in of an async_code of) the
 *   pertinent IBTL data structure, and then notifying the async_thread(s)
 *   that the data structure has async work to be done.  The notification
 *   occurs by linking the data structure through its async_link onto a
 *   list of like data structures and waking up an async_thread.  This
 *   list append is not done if there is already async work pending on
 *   this data structure (IBTL_ASYNC_PENDING).
 *
 * Async Mutex and CQ Mutex
 *
 *   The global ibtl_async_mutex is "the" mutex used to control access
 *   to all the data needed by ibc_async_handler.  All the threads that
 *   use this mutex are written so that the mutex is held for very short
 *   periods of time, and never held while making calls to functions
 *   that may block.
 *
 *   The global ibtl_cq_mutex is used similarly by ibc_cq_handler and
 *   the ibtl_cq_thread(s).
 *
 * Mutex hierarchy
 *
 *   The ibtl_clnt_list_mutex is above the ibtl_async_mutex.
 *   ibtl_clnt_list_mutex protects all of the various lists.
 *   The ibtl_async_mutex is below this in the hierarchy.
 *
 *   The ibtl_cq_mutex is independent of the above mutexes.
 *
 * Threads
 *
 *   There are "ibtl_cq_threads" number of threads created for handling
 *   Completion Queues in threads.  If this feature really gets used,
 *   then we will want to do some suitable tuning.  Similarly, we may
 *   want to tune the number of "ibtl_async_thread_init".
 *
 *   The function ibtl_cq_thread is the main loop for handling a CQ in a
 *   thread.  There can be multiple threads executing this same code.
 *   The code sleeps when there is no work to be done (list is empty),
 *   otherwise it pulls the first CQ structure off the list and performs
 *   the CQ handler callback to the client.  After that returns, a check
 *   is made, and if another ibc_cq_handler call was made for this CQ,
 *   the client is called again.
 *
 *   The function ibtl_async_thread is the main loop for handling async
 *   events/errors.  There can be multiple threads executing this same code.
 *   The code sleeps when there is no work to be done (lists are empty),
 *   otherwise it pulls the first structure off one of the lists and
 *   performs the async callback(s) to the client(s).  Note that HCA
 *   async handling is done by calling each of the clients using the HCA.
 *   When the async handling completes, the data structure having the async
 *   event/error is checked for more work before it's considered "done".
 *
 * Taskq
 *
 *   The async_taskq is used here for allowing async handler callbacks to
 *   occur simultaneously to multiple clients of an HCA.  This taskq could
 *   be used for other purposes, e.g., if all the async_threads are in
 *   use, but this is deemed as overkill since asyncs should occur rarely.
 */

/* Globals */
static char ibtf_handlers[] = "ibtl_handlers";

/* priority for IBTL threads (async, cq, and taskq) */
static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */

/* taskq used for HCA asyncs */
#define	ibtl_async_taskq system_taskq

/* data for async handling by threads */
static kmutex_t ibtl_async_mutex;	/* protects most *_async_* data */
static kcondvar_t ibtl_async_cv;	/* async_threads wait on this */
static kcondvar_t ibtl_clnt_cv;		/* ibt_detach might wait on this */
static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp);
static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp);

static kt_did_t *ibtl_async_did;	/* for thread_join() */
int ibtl_async_thread_init = 4;	/* total # of async_threads to create */
static int ibtl_async_thread_exit = 0;	/* set if/when thread(s) should exit */

/* async lists for various structures */
static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end;
static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end;
static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end;
static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end;
static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end;

/* data for CQ completion handling by threads */
static kmutex_t ibtl_cq_mutex;	/* protects the cv and the list below */
static kcondvar_t ibtl_cq_cv;
static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end;

static int ibtl_cq_threads = 0;		/* total # of cq threads */
static int ibtl_cqs_using_threads = 0;	/* total # of cqs using threads */
static int ibtl_cq_thread_exit = 0;	/* set if/when thread(s) should exit */

/* value used to tell IBTL threads to exit */
#define	IBTL_THREAD_EXIT 0x1b7fdead	/* IBTF DEAD */
/* Cisco Topspin Vendor ID for Rereg hack */
#define	IBT_VENDOR_CISCO 0x05ad

int ibtl_eec_not_supported = 1;

char *ibtl_last_client_name;	/* may help debugging */
typedef ibt_status_t (*ibtl_node_info_cb_t)(ib_guid_t, uint8_t, ib_lid_t,
    ibt_node_info_t *);

ibtl_node_info_cb_t ibtl_node_info_cb;

_NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex))

void
ibtl_cm_set_node_info_cb(ibt_status_t (*node_info_cb)(ib_guid_t, uint8_t,
    ib_lid_t, ibt_node_info_t *))
{
	mutex_enter(&ibtl_clnt_list_mutex);
	ibtl_node_info_cb = node_info_cb;
	mutex_exit(&ibtl_clnt_list_mutex);
}

/*
 * ibc_async_handler()
 *
 * Asynchronous Event/Error Handler.
 *
 *	This is the function called HCA drivers to post various async
 *	event and errors mention in the IB architecture spec.  See
 *	ibtl_types.h for additional details of this.
 *
 *	This function marks the pertinent IBTF object with the async_code,
 *	and queues the object for handling by an ibtl_async_thread.  If
 *	the object is NOT already marked for async processing, it is added
 *	to the associated list for that type of object, and an
 *	ibtl_async_thread is signaled to finish the async work.
 */
void
ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code,
    ibc_async_event_t *event_p)
{
	ibtl_qp_t	*ibtl_qp;
	ibtl_cq_t	*ibtl_cq;
	ibtl_srq_t	*ibtl_srq;
	ibtl_eec_t	*ibtl_eec;
	uint8_t		port_minus1;

	ibtl_async_port_event_t	*portp;

	IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)",
	    hca_devp, code, event_p);

	mutex_enter(&ibtl_async_mutex);

	switch (code) {
	case IBT_EVENT_PATH_MIGRATED_QP:
	case IBT_EVENT_SQD:
	case IBT_ERROR_CATASTROPHIC_QP:
	case IBT_ERROR_PATH_MIGRATE_REQ_QP:
	case IBT_EVENT_COM_EST_QP:
	case IBT_ERROR_INVALID_REQUEST_QP:
	case IBT_ERROR_ACCESS_VIOLATION_QP:
	case IBT_EVENT_EMPTY_QP:
	case IBT_FEXCH_ERROR:
		ibtl_qp = event_p->ev_qp_hdl;
		if (ibtl_qp == NULL) {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
			    "bad qp handle");
			break;
		}
		switch (code) {
		case IBT_ERROR_CATASTROPHIC_QP:
			ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break;
		case IBT_ERROR_PATH_MIGRATE_REQ_QP:
			ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break;
		case IBT_ERROR_INVALID_REQUEST_QP:
			ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break;
		case IBT_ERROR_ACCESS_VIOLATION_QP:
			ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break;
		}

		ibtl_qp->qp_async_codes |= code;
		if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) {
			ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING;
			ibtl_qp->qp_async_link = NULL;
			if (ibtl_async_qp_list_end == NULL)
				ibtl_async_qp_list_start = ibtl_qp;
			else
				ibtl_async_qp_list_end->qp_async_link = ibtl_qp;
			ibtl_async_qp_list_end = ibtl_qp;
			cv_signal(&ibtl_async_cv);
		}
		break;

	case IBT_ERROR_CQ:
		ibtl_cq = event_p->ev_cq_hdl;
		if (ibtl_cq == NULL) {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
			    "bad cq handle");
			break;
		}
		ibtl_cq->cq_async_codes |= code;
		ibtl_cq->cq_fma_ena = event_p->ev_fma_ena;
		if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) {
			ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING;
			ibtl_cq->cq_async_link = NULL;
			if (ibtl_async_cq_list_end == NULL)
				ibtl_async_cq_list_start = ibtl_cq;
			else
				ibtl_async_cq_list_end->cq_async_link = ibtl_cq;
			ibtl_async_cq_list_end = ibtl_cq;
			cv_signal(&ibtl_async_cv);
		}
		break;

	case IBT_ERROR_CATASTROPHIC_SRQ:
	case IBT_EVENT_LIMIT_REACHED_SRQ:
		ibtl_srq = event_p->ev_srq_hdl;
		if (ibtl_srq == NULL) {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
			    "bad srq handle");
			break;
		}
		ibtl_srq->srq_async_codes |= code;
		ibtl_srq->srq_fma_ena = event_p->ev_fma_ena;
		if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) {
			ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING;
			ibtl_srq->srq_async_link = NULL;
			if (ibtl_async_srq_list_end == NULL)
				ibtl_async_srq_list_start = ibtl_srq;
			else
				ibtl_async_srq_list_end->srq_async_link =
				    ibtl_srq;
			ibtl_async_srq_list_end = ibtl_srq;
			cv_signal(&ibtl_async_cv);
		}
		break;

	case IBT_EVENT_PATH_MIGRATED_EEC:
	case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
	case IBT_ERROR_CATASTROPHIC_EEC:
	case IBT_EVENT_COM_EST_EEC:
		if (ibtl_eec_not_supported) {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
			    "EEC events are disabled.");
			break;
		}
		ibtl_eec = event_p->ev_eec_hdl;
		if (ibtl_eec == NULL) {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
			    "bad eec handle");
			break;
		}
		switch (code) {
		case IBT_ERROR_PATH_MIGRATE_REQ_EEC:
			ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break;
		case IBT_ERROR_CATASTROPHIC_EEC:
			ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break;
		}
		ibtl_eec->eec_async_codes |= code;
		if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) {
			ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING;
			ibtl_eec->eec_async_link = NULL;
			if (ibtl_async_eec_list_end == NULL)
				ibtl_async_eec_list_start = ibtl_eec;
			else
				ibtl_async_eec_list_end->eec_async_link =
				    ibtl_eec;
			ibtl_async_eec_list_end = ibtl_eec;
			cv_signal(&ibtl_async_cv);
		}
		break;

	case IBT_ERROR_LOCAL_CATASTROPHIC:
		hca_devp->hd_async_codes |= code;
		hca_devp->hd_fma_ena = event_p->ev_fma_ena;
		/* FALLTHROUGH */

	case IBT_EVENT_PORT_UP:
	case IBT_PORT_CHANGE_EVENT:
	case IBT_CLNT_REREG_EVENT:
	case IBT_ERROR_PORT_DOWN:
		if ((code & IBT_PORT_EVENTS) != 0) {
			if ((port_minus1 = event_p->ev_port - 1) >=
			    hca_devp->hd_hca_attr->hca_nports) {
				IBTF_DPRINTF_L2(ibtf_handlers,
				    "ibc_async_handler: bad port #: %d",
				    event_p->ev_port);
				break;
			}
			portp = &hca_devp->hd_async_port[port_minus1];
			if (code == IBT_EVENT_PORT_UP) {
				/*
				 * The port is just coming UP we can't have any
				 * valid older events.
				 */
				portp->status = IBTL_HCA_PORT_UP;
			} else if (code == IBT_ERROR_PORT_DOWN) {
				/*
				 * The port is going DOWN older events don't
				 * count.
				 */
				portp->status = IBTL_HCA_PORT_DOWN;
			} else if (code == IBT_PORT_CHANGE_EVENT) {
				/*
				 * For port UP and DOWN events only the latest
				 * event counts. If we get a UP after DOWN it
				 * is sufficient to send just UP and vice versa.
				 * In the case of port CHANGE event it is valid
				 * only when the port is UP already but if we
				 * receive it after UP but before UP is
				 * delivered we still need to deliver CHANGE
				 * after we deliver UP event.
				 *
				 * We will not get a CHANGE event when the port
				 * is down or DOWN event is pending.
				 */
				portp->flags |= event_p->ev_port_flags;
				portp->status |= IBTL_HCA_PORT_CHG;
			} else if (code == IBT_CLNT_REREG_EVENT) {
				/*
				 * SM has requested a re-register of
				 * subscription to SM events notification.
				 */
				portp->status |= IBTL_HCA_PORT_ASYNC_CLNT_REREG;
			}

			hca_devp->hd_async_codes |= code;
		}

		if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) {
			hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING;
			hca_devp->hd_async_link = NULL;
			if (ibtl_async_hca_list_end == NULL)
				ibtl_async_hca_list_start = hca_devp;
			else
				ibtl_async_hca_list_end->hd_async_link =
				    hca_devp;
			ibtl_async_hca_list_end = hca_devp;
			cv_signal(&ibtl_async_cv);
		}

		break;

	default:
		IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: "
		    "invalid code (0x%x)", code);
	}

	mutex_exit(&ibtl_async_mutex);
}


/* Finally, make the async call to the client. */

static void
ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code,
    ibt_async_event_t *event_p)
{
	ibtl_clnt_t		*clntp;
	void			*client_private;
	ibt_async_handler_t	async_handler;
	char			*client_name;

	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)",
	    ibt_hca, code, event_p);

	clntp = ibt_hca->ha_clnt_devp;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))
	/* Record who is being called (just a debugging aid) */
	ibtl_last_client_name = client_name = clntp->clnt_name;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name))

	client_private = clntp->clnt_private;
	async_handler = clntp->clnt_modinfop->mi_async_handler;

	if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) {
		mutex_enter(&ibtl_clnt_list_mutex);
		async_handler = ibtl_cm_async_handler;
		client_private = ibtl_cm_clnt_private;
		mutex_exit(&ibtl_clnt_list_mutex);
		ibt_hca = NULL;
		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
		    "calling CM for COM_EST");
	} else {
		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
		    "calling client '%s'", client_name);
	}
	if (async_handler != NULL)
		async_handler(client_private, ibt_hca, code, event_p);
	else
		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: "
		    "client '%s' has no async handler", client_name);
}

/*
 * Inform CM or DM about HCA events.
 *
 *	We use taskqs to allow simultaneous notification, with sleeping.
 *	Since taskqs only allow one argument, we define a structure
 *	because we need to pass in more than one argument.
 */

struct ibtl_mgr_s {
	ibtl_hca_devinfo_t	*mgr_hca_devp;
	ibt_async_handler_t	mgr_async_handler;
	void			*mgr_clnt_private;
};

/*
 * Asyncs of HCA level events for CM and DM.  Call CM or DM and tell them
 * about the HCA for the event recorded in the ibtl_hca_devinfo_t.
 */
static void
ibtl_do_mgr_async_task(void *arg)
{
	struct ibtl_mgr_s	*mgrp = (struct ibtl_mgr_s *)arg;
	ibtl_hca_devinfo_t	*hca_devp = mgrp->mgr_hca_devp;

	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)",
	    hca_devp->hd_async_code);

	mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL,
	    hca_devp->hd_async_code, &hca_devp->hd_async_event);
	kmem_free(mgrp, sizeof (*mgrp));

	mutex_enter(&ibtl_clnt_list_mutex);
	if (--hca_devp->hd_async_task_cnt == 0)
		cv_signal(&hca_devp->hd_async_task_cv);
	mutex_exit(&ibtl_clnt_list_mutex);
}

static void
ibt_cisco_embedded_sm_rereg_fix(void *arg)
{
	struct ibtl_mgr_s *mgrp = arg;
	ibtl_hca_devinfo_t *hca_devp;
	ibt_node_info_t node_info;
	ibt_status_t ibt_status;
	ibtl_async_port_event_t *portp;
	ib_lid_t sm_lid;
	ib_guid_t hca_guid;
	ibt_async_event_t *event_p;
	ibt_hca_portinfo_t *pinfop;
	uint8_t	port;

	hca_devp = mgrp->mgr_hca_devp;

	mutex_enter(&ibtl_clnt_list_mutex);
	event_p = &hca_devp->hd_async_event;
	port = event_p->ev_port;
	portp = &hca_devp->hd_async_port[port - 1];
	pinfop = &hca_devp->hd_portinfop[port - 1];
	sm_lid = pinfop->p_sm_lid;
	hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
	mutex_exit(&ibtl_clnt_list_mutex);

	ibt_status = ((ibtl_node_info_cb_t)(uintptr_t)
	    mgrp->mgr_async_handler)(hca_guid, port, sm_lid, &node_info);
	if (ibt_status == IBT_SUCCESS) {
		if ((node_info.n_vendor_id == IBT_VENDOR_CISCO) &&
		    (node_info.n_node_type == IBT_NODE_TYPE_SWITCH)) {
			mutex_enter(&ibtl_async_mutex);
			portp->status |= IBTL_HCA_PORT_ASYNC_CLNT_REREG;
			hca_devp->hd_async_codes |= IBT_CLNT_REREG_EVENT;
			mutex_exit(&ibtl_async_mutex);
		}
	}
	kmem_free(mgrp, sizeof (*mgrp));

	mutex_enter(&ibtl_clnt_list_mutex);
	if (--hca_devp->hd_async_task_cnt == 0)
		cv_signal(&hca_devp->hd_async_task_cv);
	mutex_exit(&ibtl_clnt_list_mutex);
}

static void
ibtl_cm_get_node_info(ibtl_hca_devinfo_t *hca_devp,
    ibt_async_handler_t async_handler)
{
	struct ibtl_mgr_s *mgrp;

	if (async_handler == NULL)
		return;

	_NOTE(NO_COMPETING_THREADS_NOW)
	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
	mgrp->mgr_hca_devp = hca_devp;
	mgrp->mgr_async_handler = async_handler;
	mgrp->mgr_clnt_private = NULL;
	hca_devp->hd_async_task_cnt++;

	(void) taskq_dispatch(ibtl_async_taskq,
	    ibt_cisco_embedded_sm_rereg_fix, mgrp, TQ_SLEEP);
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif
}

static void
ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler,
    void *clnt_private)
{
	struct ibtl_mgr_s *mgrp;

	if (async_handler == NULL)
		return;

	_NOTE(NO_COMPETING_THREADS_NOW)
	mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP);
	mgrp->mgr_hca_devp = hca_devp;
	mgrp->mgr_async_handler = async_handler;
	mgrp->mgr_clnt_private = clnt_private;
	hca_devp->hd_async_task_cnt++;

	(void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp,
	    TQ_SLEEP);
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif
}

/*
 * Per client-device asyncs for HCA level events.  Call each client that is
 * using the HCA for the event recorded in the ibtl_hca_devinfo_t.
 */
static void
ibtl_hca_client_async_task(void *arg)
{
	ibtl_hca_t		*ibt_hca = (ibtl_hca_t *)arg;
	ibtl_hca_devinfo_t	*hca_devp = ibt_hca->ha_hca_devp;
	ibtl_clnt_t		*clntp = ibt_hca->ha_clnt_devp;
	ibt_async_event_t	async_event;

	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)",
	    ibt_hca, hca_devp->hd_async_code);

	bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event));
	ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event);

	mutex_enter(&ibtl_async_mutex);
	if (--ibt_hca->ha_async_cnt == 0 &&
	    (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) {
		mutex_exit(&ibtl_async_mutex);
		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
	} else
		mutex_exit(&ibtl_async_mutex);

	mutex_enter(&ibtl_clnt_list_mutex);
	if (--hca_devp->hd_async_task_cnt == 0)
		cv_signal(&hca_devp->hd_async_task_cv);
	if (--clntp->clnt_async_cnt == 0)
		cv_broadcast(&ibtl_clnt_cv);

	mutex_exit(&ibtl_clnt_list_mutex);
}

/*
 * Asyncs for HCA level events.
 *
 * The function continues to run until there are no more async
 * events/errors for this HCA.  An event is chosen for dispatch
 * to all clients of this HCA.  This thread dispatches them via
 * the ibtl_async_taskq, then sleeps until all tasks are done.
 *
 * This thread records the async_code and async_event in the
 * ibtl_hca_devinfo_t for all client taskq threads to reference.
 *
 * This is called from an async or taskq thread with ibtl_async_mutex held.
 */
static void
ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp)
{
	ibtl_hca_t			*ibt_hca;
	ibt_async_event_t		*eventp;
	ibt_async_code_t		code;
	ibtl_async_port_status_t  	temp;
	uint8_t				nports;
	uint8_t				port_minus1;
	ibtl_async_port_event_t		*portp;

	mutex_exit(&ibtl_async_mutex);

	mutex_enter(&ibtl_clnt_list_mutex);
	while (hca_devp->hd_async_busy)
		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
	hca_devp->hd_async_busy = 1;
	mutex_enter(&ibtl_async_mutex);

	bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event));
	for (;;) {

		hca_devp->hd_async_event.ev_fma_ena = 0;

		code = hca_devp->hd_async_codes;
		if (code & IBT_ERROR_LOCAL_CATASTROPHIC) {
			code = IBT_ERROR_LOCAL_CATASTROPHIC;
			hca_devp->hd_async_event.ev_fma_ena =
			    hca_devp->hd_fma_ena;
		} else if (code & IBT_ERROR_PORT_DOWN) {
			code = IBT_ERROR_PORT_DOWN;
			temp = IBTL_HCA_PORT_DOWN;
		} else if (code & IBT_EVENT_PORT_UP) {
			code = IBT_EVENT_PORT_UP;
			temp = IBTL_HCA_PORT_UP;
		} else if (code & IBT_PORT_CHANGE_EVENT) {
			code = IBT_PORT_CHANGE_EVENT;
			temp = IBTL_HCA_PORT_CHG;
		} else if (code & IBT_CLNT_REREG_EVENT) {
			code = IBT_CLNT_REREG_EVENT;
			temp = IBTL_HCA_PORT_ASYNC_CLNT_REREG;
		} else {
			hca_devp->hd_async_codes = 0;
			code = 0;
		}

		if (code == 0) {
			hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING;
			break;
		}
		hca_devp->hd_async_codes &= ~code;

		/* PORT_UP, PORT_CHANGE, PORT_DOWN or ASYNC_REREG */
		if ((code & IBT_PORT_EVENTS) != 0) {
			portp = hca_devp->hd_async_port;
			nports = hca_devp->hd_hca_attr->hca_nports;
			for (port_minus1 = 0; port_minus1 < nports;
			    port_minus1++) {
				/*
				 * Matching event in this port, let's go handle
				 * it.
				 */
				if ((portp[port_minus1].status & temp) != 0)
					break;
			}
			if (port_minus1 >= nports) {
				/* we checked again, but found nothing */
				continue;
			}
			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: "
			    "async: port# %x code %x", port_minus1 + 1, code);
			/* mark it to check for other ports after we're done */
			hca_devp->hd_async_codes |= code;

			/*
			 * Copy the event information into hca_devp and clear
			 * event information from the per port data.
			 */
			hca_devp->hd_async_event.ev_port = port_minus1 + 1;
			if (temp == IBTL_HCA_PORT_CHG) {
				hca_devp->hd_async_event.ev_port_flags =
				    hca_devp->hd_async_port[port_minus1].flags;
				hca_devp->hd_async_port[port_minus1].flags = 0;
			}
			hca_devp->hd_async_port[port_minus1].status &= ~temp;

			mutex_exit(&ibtl_async_mutex);
			ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1);
			mutex_enter(&ibtl_async_mutex);
			eventp = &hca_devp->hd_async_event;
			eventp->ev_hca_guid =
			    hca_devp->hd_hca_attr->hca_node_guid;
		}

		hca_devp->hd_async_code = code;
		hca_devp->hd_async_event.ev_hca_guid =
		    hca_devp->hd_hca_attr->hca_node_guid;
		mutex_exit(&ibtl_async_mutex);

		/*
		 * Make sure to inform CM, DM, and IBMA if we know of them.
		 * Also, make sure not to inform them a second time, which
		 * would occur if they have the HCA open.
		 */

		if (ibtl_ibma_async_handler)
			ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler,
			    ibtl_ibma_clnt_private);
		/* wait for all tasks to complete */
		while (hca_devp->hd_async_task_cnt != 0)
			cv_wait(&hca_devp->hd_async_task_cv,
			    &ibtl_clnt_list_mutex);

		/*
		 * Hack Alert:
		 * The ibmf handler would have updated the Master SM LID if it
		 * was SM LID change event. Now lets check if the new Master SM
		 * is a Embedded Cisco Topspin SM.
		 */
		if ((code == IBT_PORT_CHANGE_EVENT) &&
		    eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID)
			ibtl_cm_get_node_info(hca_devp,
			    (ibt_async_handler_t)(uintptr_t)ibtl_node_info_cb);
		/* wait for node info task to complete */
		while (hca_devp->hd_async_task_cnt != 0)
			cv_wait(&hca_devp->hd_async_task_cv,
			    &ibtl_clnt_list_mutex);

		if (ibtl_dm_async_handler)
			ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler,
			    ibtl_dm_clnt_private);
		if (ibtl_cm_async_handler)
			ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
			    ibtl_cm_clnt_private);
		/* wait for all tasks to complete */
		while (hca_devp->hd_async_task_cnt != 0)
			cv_wait(&hca_devp->hd_async_task_cv,
			    &ibtl_clnt_list_mutex);

		for (ibt_hca = hca_devp->hd_clnt_list;
		    ibt_hca != NULL;
		    ibt_hca = ibt_hca->ha_clnt_link) {

			/* Managers are handled above */
			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
			    ibtl_cm_async_handler)
				continue;
			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
			    ibtl_dm_async_handler)
				continue;
			if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler ==
			    ibtl_ibma_async_handler)
				continue;
			++ibt_hca->ha_clnt_devp->clnt_async_cnt;

			mutex_enter(&ibtl_async_mutex);
			ibt_hca->ha_async_cnt++;
			mutex_exit(&ibtl_async_mutex);
			hca_devp->hd_async_task_cnt++;
			(void) taskq_dispatch(ibtl_async_taskq,
			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
		}

		/* wait for all tasks to complete */
		while (hca_devp->hd_async_task_cnt != 0)
			cv_wait(&hca_devp->hd_async_task_cv,
			    &ibtl_clnt_list_mutex);

		mutex_enter(&ibtl_async_mutex);
	}
	hca_devp->hd_async_code = 0;
	hca_devp->hd_async_busy = 0;
	cv_broadcast(&hca_devp->hd_async_busy_cv);
	mutex_exit(&ibtl_clnt_list_mutex);
}

/*
 * Asyncs for QP objects.
 *
 * The function continues to run until there are no more async
 * events/errors for this object.
 */
static void
ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp)
{
	ibt_async_code_t	code;
	ibt_async_event_t	async_event;

	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
	bzero(&async_event, sizeof (async_event));
	async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp);

	while ((code = ibtl_qp->qp_async_codes) != 0) {
		async_event.ev_fma_ena = 0;
		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT)
			code = 0;	/* fallthrough to "kmem_free" */
		else if (code & IBT_ERROR_CATASTROPHIC_QP) {
			code = IBT_ERROR_CATASTROPHIC_QP;
			async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena;
		} else if (code & IBT_ERROR_INVALID_REQUEST_QP) {
			code = IBT_ERROR_INVALID_REQUEST_QP;
			async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena;
		} else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) {
			code = IBT_ERROR_ACCESS_VIOLATION_QP;
			async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena;
		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) {
			code = IBT_ERROR_PATH_MIGRATE_REQ_QP;
			async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena;
		} else if (code & IBT_EVENT_PATH_MIGRATED_QP)
			code = IBT_EVENT_PATH_MIGRATED_QP;
		else if (code & IBT_EVENT_SQD)
			code = IBT_EVENT_SQD;
		else if (code & IBT_EVENT_COM_EST_QP)
			code = IBT_EVENT_COM_EST_QP;
		else if (code & IBT_EVENT_EMPTY_QP)
			code = IBT_EVENT_EMPTY_QP;
		else {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: "
			    "async: unexpected QP async code 0x%x", code);
			ibtl_qp->qp_async_codes = 0;
			code = 0;
		}
		ibtl_qp->qp_async_codes &= ~code;

		if (code) {
			mutex_exit(&ibtl_async_mutex);
			ibtl_async_client_call(ibtl_qp->qp_hca,
			    code, &async_event);
			mutex_enter(&ibtl_async_mutex);
		}

		if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) {
			mutex_exit(&ibtl_async_mutex);
			cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
			mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
			kmem_free(IBTL_QP2CHAN(ibtl_qp),
			    sizeof (ibtl_channel_t));
			mutex_enter(&ibtl_async_mutex);
			return;
		}
	}
	ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING;
}

/*
 * Asyncs for SRQ objects.
 *
 * The function continues to run until there are no more async
 * events/errors for this object.
 */
static void
ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq)
{
	ibt_async_code_t	code;
	ibt_async_event_t	async_event;

	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
	bzero(&async_event, sizeof (async_event));
	async_event.ev_srq_hdl = ibtl_srq;
	async_event.ev_fma_ena = ibtl_srq->srq_fma_ena;

	while ((code = ibtl_srq->srq_async_codes) != 0) {
		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT)
			code = 0;	/* fallthrough to "kmem_free" */
		else if (code & IBT_ERROR_CATASTROPHIC_SRQ)
			code = IBT_ERROR_CATASTROPHIC_SRQ;
		else if (code & IBT_EVENT_LIMIT_REACHED_SRQ)
			code = IBT_EVENT_LIMIT_REACHED_SRQ;
		else {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: "
			    "async: unexpected SRQ async code 0x%x", code);
			ibtl_srq->srq_async_codes = 0;
			code = 0;
		}
		ibtl_srq->srq_async_codes &= ~code;

		if (code) {
			mutex_exit(&ibtl_async_mutex);
			ibtl_async_client_call(ibtl_srq->srq_hca,
			    code, &async_event);
			mutex_enter(&ibtl_async_mutex);
		}

		if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
			mutex_exit(&ibtl_async_mutex);
			kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
			mutex_enter(&ibtl_async_mutex);
			return;
		}
	}
	ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING;
}

/*
 * Asyncs for CQ objects.
 *
 * The function continues to run until there are no more async
 * events/errors for this object.
 */
static void
ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq)
{
	ibt_async_code_t	code;
	ibt_async_event_t	async_event;

	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
	bzero(&async_event, sizeof (async_event));
	async_event.ev_cq_hdl = ibtl_cq;
	async_event.ev_fma_ena = ibtl_cq->cq_fma_ena;

	while ((code = ibtl_cq->cq_async_codes) != 0) {
		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT)
			code = 0;	/* fallthrough to "kmem_free" */
		else if (code & IBT_ERROR_CQ)
			code = IBT_ERROR_CQ;
		else {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: "
			    "async: unexpected CQ async code 0x%x", code);
			ibtl_cq->cq_async_codes = 0;
			code = 0;
		}
		ibtl_cq->cq_async_codes &= ~code;

		if (code) {
			mutex_exit(&ibtl_async_mutex);
			ibtl_async_client_call(ibtl_cq->cq_hca,
			    code, &async_event);
			mutex_enter(&ibtl_async_mutex);
		}

		if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) {
			mutex_exit(&ibtl_async_mutex);
			mutex_destroy(&ibtl_cq->cq_mutex);
			kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
			mutex_enter(&ibtl_async_mutex);
			return;
		}
	}
	ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING;
}

/*
 * Asyncs for EEC objects.
 *
 * The function continues to run until there are no more async
 * events/errors for this object.
 */
static void
ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec)
{
	ibt_async_code_t	code;
	ibt_async_event_t	async_event;

	ASSERT(MUTEX_HELD(&ibtl_async_mutex));
	bzero(&async_event, sizeof (async_event));
	async_event.ev_chan_hdl = ibtl_eec->eec_channel;

	while ((code = ibtl_eec->eec_async_codes) != 0) {
		async_event.ev_fma_ena = 0;
		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT)
			code = 0;	/* fallthrough to "kmem_free" */
		else if (code & IBT_ERROR_CATASTROPHIC_EEC) {
			code = IBT_ERROR_CATASTROPHIC_CHAN;
			async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena;
		} else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) {
			code = IBT_ERROR_PATH_MIGRATE_REQ;
			async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena;
		} else if (code & IBT_EVENT_PATH_MIGRATED_EEC)
			code = IBT_EVENT_PATH_MIGRATED;
		else if (code & IBT_EVENT_COM_EST_EEC)
			code = IBT_EVENT_COM_EST;
		else {
			IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: "
			    "async: unexpected code 0x%x", code);
			ibtl_eec->eec_async_codes = 0;
			code = 0;
		}
		ibtl_eec->eec_async_codes &= ~code;

		if (code) {
			mutex_exit(&ibtl_async_mutex);
			ibtl_async_client_call(ibtl_eec->eec_hca,
			    code, &async_event);
			mutex_enter(&ibtl_async_mutex);
		}

		if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) {
			mutex_exit(&ibtl_async_mutex);
			kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
			mutex_enter(&ibtl_async_mutex);
			return;
		}
	}
	ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING;
}

#ifdef __lock_lint
kmutex_t cpr_mutex;
#endif

/*
 * Loop forever, calling async_handlers until all of the async lists
 * are empty.
 */

static void
ibtl_async_thread(void)
{
#ifndef __lock_lint
	kmutex_t cpr_mutex;
#endif
	callb_cpr_t	cprinfo;

	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
	_NOTE(NO_COMPETING_THREADS_NOW)
	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
	    "ibtl_async_thread");
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif

	mutex_enter(&ibtl_async_mutex);

	for (;;) {
		if (ibtl_async_hca_list_start) {
			ibtl_hca_devinfo_t *hca_devp;

			/* remove first entry from list */
			hca_devp = ibtl_async_hca_list_start;
			ibtl_async_hca_list_start = hca_devp->hd_async_link;
			hca_devp->hd_async_link = NULL;
			if (ibtl_async_hca_list_start == NULL)
				ibtl_async_hca_list_end = NULL;

			ibtl_do_hca_asyncs(hca_devp);

		} else if (ibtl_async_qp_list_start) {
			ibtl_qp_t *ibtl_qp;

			/* remove from list */
			ibtl_qp = ibtl_async_qp_list_start;
			ibtl_async_qp_list_start = ibtl_qp->qp_async_link;
			ibtl_qp->qp_async_link = NULL;
			if (ibtl_async_qp_list_start == NULL)
				ibtl_async_qp_list_end = NULL;

			ibtl_do_qp_asyncs(ibtl_qp);

		} else if (ibtl_async_srq_list_start) {
			ibtl_srq_t *ibtl_srq;

			/* remove from list */
			ibtl_srq = ibtl_async_srq_list_start;
			ibtl_async_srq_list_start = ibtl_srq->srq_async_link;
			ibtl_srq->srq_async_link = NULL;
			if (ibtl_async_srq_list_start == NULL)
				ibtl_async_srq_list_end = NULL;

			ibtl_do_srq_asyncs(ibtl_srq);

		} else if (ibtl_async_eec_list_start) {
			ibtl_eec_t *ibtl_eec;

			/* remove from list */
			ibtl_eec = ibtl_async_eec_list_start;
			ibtl_async_eec_list_start = ibtl_eec->eec_async_link;
			ibtl_eec->eec_async_link = NULL;
			if (ibtl_async_eec_list_start == NULL)
				ibtl_async_eec_list_end = NULL;

			ibtl_do_eec_asyncs(ibtl_eec);

		} else if (ibtl_async_cq_list_start) {
			ibtl_cq_t *ibtl_cq;

			/* remove from list */
			ibtl_cq = ibtl_async_cq_list_start;
			ibtl_async_cq_list_start = ibtl_cq->cq_async_link;
			ibtl_cq->cq_async_link = NULL;
			if (ibtl_async_cq_list_start == NULL)
				ibtl_async_cq_list_end = NULL;

			ibtl_do_cq_asyncs(ibtl_cq);

		} else {
			if (ibtl_async_thread_exit == IBTL_THREAD_EXIT)
				break;
			mutex_enter(&cpr_mutex);
			CALLB_CPR_SAFE_BEGIN(&cprinfo);
			mutex_exit(&cpr_mutex);

			cv_wait(&ibtl_async_cv, &ibtl_async_mutex);

			mutex_exit(&ibtl_async_mutex);
			mutex_enter(&cpr_mutex);
			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
			mutex_exit(&cpr_mutex);
			mutex_enter(&ibtl_async_mutex);
		}
	}

	mutex_exit(&ibtl_async_mutex);

#ifndef __lock_lint
	mutex_enter(&cpr_mutex);
	CALLB_CPR_EXIT(&cprinfo);
#endif
	mutex_destroy(&cpr_mutex);
}


void
ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp);

	mutex_enter(&ibtl_async_mutex);

	/*
	 * If there is an active async, mark this object to be freed
	 * by the async_thread when it's done.
	 */
	if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) {
		ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT;
		mutex_exit(&ibtl_async_mutex);
	} else {	/* free the object now */
		mutex_exit(&ibtl_async_mutex);
		cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv);
		mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex);
		kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t));
	}
}

void
ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq);

	mutex_enter(&ibtl_async_mutex);

	/* if there is an active async, mark this object to be freed */
	if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) {
		ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
		mutex_exit(&ibtl_async_mutex);
	} else {	/* free the object now */
		mutex_exit(&ibtl_async_mutex);
		mutex_destroy(&ibtl_cq->cq_mutex);
		kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s));
	}
}

void
ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)",
	    ibtl_srq);

	mutex_enter(&ibtl_async_mutex);

	/* if there is an active async, mark this object to be freed */
	if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) {
		ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT;
		mutex_exit(&ibtl_async_mutex);
	} else {	/* free the object now */
		mutex_exit(&ibtl_async_mutex);
		kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s));
	}
}

void
ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)",
	    ibtl_eec);

	mutex_enter(&ibtl_async_mutex);

	/* if there is an active async, mark this object to be freed */
	if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) {
		ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT;
		mutex_exit(&ibtl_async_mutex);
	} else {	/* free the object now */
		mutex_exit(&ibtl_async_mutex);
		kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s));
	}
}

/*
 * This function differs from above in that we assume this is called
 * from non-interrupt context, and never called from the async_thread.
 */

void
ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)",
	    ibt_hca);

	mutex_enter(&ibtl_async_mutex);

	/* if there is an active async, mark this object to be freed */
	if (ibt_hca->ha_async_cnt > 0) {
		ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT;
		mutex_exit(&ibtl_async_mutex);
	} else {	/* free the object now */
		mutex_exit(&ibtl_async_mutex);
		kmem_free(ibt_hca, sizeof (ibtl_hca_t));
	}
}

/*
 * Completion Queue Handling.
 *
 *	A completion queue can be handled through a simple callback
 *	at interrupt level, or it may be queued for an ibtl_cq_thread
 *	to handle.  The latter is chosen during ibt_alloc_cq when the
 *	IBTF_CQ_HANDLER_IN_THREAD is specified.
 */

static void
ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq)
{
	ibt_cq_handler_t	cq_handler;
	void			*arg;

	IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
	cq_handler = ibtl_cq->cq_comp_handler;
	arg = ibtl_cq->cq_arg;
	if (cq_handler != NULL)
		cq_handler(ibtl_cq, arg);
	else
		IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: "
		    "no cq_handler for cq %p", ibtl_cq);
}

/*
 * Before ibt_free_cq can continue, we need to ensure no more cq_handler
 * callbacks can occur.  When we get the mutex, we know there are no
 * outstanding cq_handler callbacks.  We set the cq_handler to NULL to
 * prohibit future callbacks.
 */
void
ibtl_free_cq_check(ibtl_cq_t *ibtl_cq)
{
	mutex_enter(&ibtl_cq->cq_mutex);
	ibtl_cq->cq_comp_handler = NULL;
	mutex_exit(&ibtl_cq->cq_mutex);
	if (ibtl_cq->cq_in_thread) {
		mutex_enter(&ibtl_cq_mutex);
		--ibtl_cqs_using_threads;
		while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) {
			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
			ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE;
			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);
		}
		mutex_exit(&ibtl_cq_mutex);
	}
}

/*
 * Loop forever, calling cq_handlers until the cq list
 * is empty.
 */

static void
ibtl_cq_thread(void)
{
#ifndef __lock_lint
	kmutex_t cpr_mutex;
#endif
	callb_cpr_t	cprinfo;

	_NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo))
	_NOTE(NO_COMPETING_THREADS_NOW)
	mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL);
	CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr,
	    "ibtl_cq_thread");
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif

	mutex_enter(&ibtl_cq_mutex);

	for (;;) {
		if (ibtl_cq_list_start) {
			ibtl_cq_t *ibtl_cq;

			ibtl_cq = ibtl_cq_list_start;
			ibtl_cq_list_start = ibtl_cq->cq_link;
			ibtl_cq->cq_link = NULL;
			if (ibtl_cq == ibtl_cq_list_end)
				ibtl_cq_list_end = NULL;

			while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) {
				ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT;
				mutex_exit(&ibtl_cq_mutex);
				ibtl_cq_handler_call(ibtl_cq);
				mutex_enter(&ibtl_cq_mutex);
			}
			ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING;
			if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE)
				cv_broadcast(&ibtl_cq_cv);
		} else {
			if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT)
				break;
			mutex_enter(&cpr_mutex);
			CALLB_CPR_SAFE_BEGIN(&cprinfo);
			mutex_exit(&cpr_mutex);

			cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex);

			mutex_exit(&ibtl_cq_mutex);
			mutex_enter(&cpr_mutex);
			CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex);
			mutex_exit(&cpr_mutex);
			mutex_enter(&ibtl_cq_mutex);
		}
	}

	mutex_exit(&ibtl_cq_mutex);
#ifndef __lock_lint
	mutex_enter(&cpr_mutex);
	CALLB_CPR_EXIT(&cprinfo);
#endif
	mutex_destroy(&cpr_mutex);
}


/*
 * ibc_cq_handler()
 *
 *    Completion Queue Notification Handler.
 *
 */
/*ARGSUSED*/
void
ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq)
{
	IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)",
	    ibc_hdl, ibtl_cq);

	if (ibtl_cq->cq_in_thread) {
		mutex_enter(&ibtl_cq_mutex);
		ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT;
		if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) {
			ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING;
			ibtl_cq->cq_link = NULL;
			if (ibtl_cq_list_end == NULL)
				ibtl_cq_list_start = ibtl_cq;
			else
				ibtl_cq_list_end->cq_link = ibtl_cq;
			ibtl_cq_list_end = ibtl_cq;
			cv_signal(&ibtl_cq_cv);
		}
		mutex_exit(&ibtl_cq_mutex);
		return;
	} else
		ibtl_cq_handler_call(ibtl_cq);
}


/*
 * ibt_enable_cq_notify()
 *      Enable Notification requests on the specified CQ.
 *
 *      ibt_cq          The CQ handle.
 *
 *      notify_type     Enable notifications for all (IBT_NEXT_COMPLETION)
 *                      completions, or the next Solicited completion
 *                      (IBT_NEXT_SOLICITED) only.
 *
 *	Completion notifications are disabled by setting the completion
 *	handler to NULL by calling ibt_set_cq_handler().
 */
ibt_status_t
ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)",
	    ibtl_cq, notify_type);

	return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq(
	    IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type));
}


/*
 * ibt_set_cq_handler()
 *      Register a work request completion handler with the IBTF.
 *
 *      ibt_cq                  The CQ handle.
 *
 *      completion_handler      The completion handler.
 *
 *      arg                     The IBTF client private argument to be passed
 *                              back to the client when calling the CQ
 *                              completion handler.
 *
 *	Completion notifications are disabled by setting the completion
 *	handler to NULL.  When setting the handler to NULL, no additional
 *	calls to the previous CQ handler will be initiated, but there may
 *	be one in progress.
 *
 *      This function does not otherwise change the state of previous
 *      calls to ibt_enable_cq_notify().
 */
void
ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler,
    void *arg)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)",
	    ibtl_cq, completion_handler, arg);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq))
	ibtl_cq->cq_comp_handler = completion_handler;
	ibtl_cq->cq_arg = arg;
}


/*
 * Inform IBT clients about New HCAs.
 *
 *	We use taskqs to allow simultaneous notification, with sleeping.
 *	Since taskqs only allow one argument, we define a structure
 *	because we need to pass in two arguments.
 */

struct ibtl_new_hca_s {
	ibtl_clnt_t		*nh_clntp;
	ibtl_hca_devinfo_t	*nh_hca_devp;
	ibt_async_code_t	nh_code;
};

static void
ibtl_tell_client_about_new_hca(void *arg)
{
	struct ibtl_new_hca_s	*new_hcap = (struct ibtl_new_hca_s *)arg;
	ibtl_clnt_t		*clntp = new_hcap->nh_clntp;
	ibt_async_event_t	async_event;
	ibtl_hca_devinfo_t	*hca_devp = new_hcap->nh_hca_devp;

	bzero(&async_event, sizeof (async_event));
	async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid;
	clntp->clnt_modinfop->mi_async_handler(
	    clntp->clnt_private, NULL, new_hcap->nh_code, &async_event);
	kmem_free(new_hcap, sizeof (*new_hcap));
#ifdef __lock_lint
	{
		ibt_hca_hdl_t hca_hdl;
		(void) ibt_open_hca(clntp, 0ULL, &hca_hdl);
	}
#endif
	mutex_enter(&ibtl_clnt_list_mutex);
	if (--hca_devp->hd_async_task_cnt == 0)
		cv_signal(&hca_devp->hd_async_task_cv);
	if (--clntp->clnt_async_cnt == 0)
		cv_broadcast(&ibtl_clnt_cv);
	mutex_exit(&ibtl_clnt_list_mutex);
}

/*
 * ibtl_announce_new_hca:
 *
 *	o First attach these clients in the given order
 *		IBMA
 *		IBCM
 *
 *	o Next attach all other clients in parallel.
 *
 * NOTE: Use the taskq to simultaneously notify all clients of the new HCA.
 * Retval from clients is ignored.
 */
void
ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp)
{
	ibtl_clnt_t		*clntp;
	struct ibtl_new_hca_s	*new_hcap;

	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)",
	    hca_devp, hca_devp->hd_hca_attr->hca_node_guid);

	mutex_enter(&ibtl_clnt_list_mutex);

	clntp = ibtl_clnt_list;
	while (clntp != NULL) {
		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
			IBTF_DPRINTF_L4(ibtf_handlers,
			    "ibtl_announce_new_hca: calling IBMF");
			if (clntp->clnt_modinfop->mi_async_handler) {
				_NOTE(NO_COMPETING_THREADS_NOW)
				new_hcap = kmem_alloc(sizeof (*new_hcap),
				    KM_SLEEP);
				new_hcap->nh_clntp = clntp;
				new_hcap->nh_hca_devp = hca_devp;
				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
#ifndef lint
				_NOTE(COMPETING_THREADS_NOW)
#endif
				clntp->clnt_async_cnt++;
				hca_devp->hd_async_task_cnt++;

				(void) taskq_dispatch(ibtl_async_taskq,
				    ibtl_tell_client_about_new_hca, new_hcap,
				    TQ_SLEEP);
			}
			break;
		}
		clntp = clntp->clnt_list_link;
	}
	if (clntp != NULL)
		while (clntp->clnt_async_cnt > 0)
			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
	clntp = ibtl_clnt_list;
	while (clntp != NULL) {
		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
			if (clntp->clnt_modinfop->mi_async_handler) {
				_NOTE(NO_COMPETING_THREADS_NOW)
				new_hcap = kmem_alloc(sizeof (*new_hcap),
				    KM_SLEEP);
				new_hcap->nh_clntp = clntp;
				new_hcap->nh_hca_devp = hca_devp;
				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
#ifndef lint
				_NOTE(COMPETING_THREADS_NOW)
#endif
				clntp->clnt_async_cnt++;
				hca_devp->hd_async_task_cnt++;

				mutex_exit(&ibtl_clnt_list_mutex);
				(void) ibtl_tell_client_about_new_hca(
				    new_hcap);
				mutex_enter(&ibtl_clnt_list_mutex);
			}
			break;
		}
		clntp = clntp->clnt_list_link;
	}

	clntp = ibtl_clnt_list;
	while (clntp != NULL) {
		if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) {
			IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: "
			    "calling  %s", clntp->clnt_modinfop->mi_clnt_name);
			if (clntp->clnt_modinfop->mi_async_handler) {
				_NOTE(NO_COMPETING_THREADS_NOW)
				new_hcap = kmem_alloc(sizeof (*new_hcap),
				    KM_SLEEP);
				new_hcap->nh_clntp = clntp;
				new_hcap->nh_hca_devp = hca_devp;
				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
#ifndef lint
				_NOTE(COMPETING_THREADS_NOW)
#endif
				clntp->clnt_async_cnt++;
				hca_devp->hd_async_task_cnt++;

				(void) taskq_dispatch(ibtl_async_taskq,
				    ibtl_tell_client_about_new_hca, new_hcap,
				    TQ_SLEEP);
			}
			break;
		}
		clntp = clntp->clnt_list_link;
	}
	if (clntp != NULL)
		while (clntp->clnt_async_cnt > 0)
			cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
	clntp = ibtl_clnt_list;
	while (clntp != NULL) {
		if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) &&
		    (clntp->clnt_modinfop->mi_clnt_class != IBT_CM) &&
		    (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) {
			IBTF_DPRINTF_L4(ibtf_handlers,
			    "ibtl_announce_new_hca: Calling %s ",
			    clntp->clnt_modinfop->mi_clnt_name);
			if (clntp->clnt_modinfop->mi_async_handler) {
				_NOTE(NO_COMPETING_THREADS_NOW)
				new_hcap = kmem_alloc(sizeof (*new_hcap),
				    KM_SLEEP);
				new_hcap->nh_clntp = clntp;
				new_hcap->nh_hca_devp = hca_devp;
				new_hcap->nh_code = IBT_HCA_ATTACH_EVENT;
#ifndef lint
				_NOTE(COMPETING_THREADS_NOW)
#endif
				clntp->clnt_async_cnt++;
				hca_devp->hd_async_task_cnt++;

				(void) taskq_dispatch(ibtl_async_taskq,
				    ibtl_tell_client_about_new_hca, new_hcap,
				    TQ_SLEEP);
			}
		}
		clntp = clntp->clnt_list_link;
	}

	/* wait for all tasks to complete */
	while (hca_devp->hd_async_task_cnt != 0)
		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);

	/* wakeup thread that may be waiting to send an HCA async */
	ASSERT(hca_devp->hd_async_busy == 1);
	hca_devp->hd_async_busy = 0;
	cv_broadcast(&hca_devp->hd_async_busy_cv);
	mutex_exit(&ibtl_clnt_list_mutex);
}

/*
 * ibtl_detach_all_clients:
 *
 *	Return value - 0 for Success, 1 for Failure
 *
 *	o First detach general clients.
 *
 *	o Next detach these clients
 *		IBCM
 *		IBDM
 *
 *	o Finally, detach this client
 *		IBMA
 */
int
ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp)
{
	ib_guid_t		hcaguid = hca_devp->hd_hca_attr->hca_node_guid;
	ibtl_hca_t		*ibt_hca;
	ibtl_clnt_t		*clntp;
	int			retval;

	IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)",
	    hcaguid);

	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));

	while (hca_devp->hd_async_busy)
		cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex);
	hca_devp->hd_async_busy = 1;

	/* First inform general clients asynchronously */
	hca_devp->hd_async_event.ev_hca_guid = hcaguid;
	hca_devp->hd_async_event.ev_fma_ena = 0;
	hca_devp->hd_async_event.ev_chan_hdl = NULL;
	hca_devp->hd_async_event.ev_cq_hdl = NULL;
	hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT;

	ibt_hca = hca_devp->hd_clnt_list;
	while (ibt_hca != NULL) {
		clntp = ibt_hca->ha_clnt_devp;
		if (IBTL_GENERIC_CLIENT(clntp)) {
			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
			mutex_enter(&ibtl_async_mutex);
			ibt_hca->ha_async_cnt++;
			mutex_exit(&ibtl_async_mutex);
			hca_devp->hd_async_task_cnt++;

			(void) taskq_dispatch(ibtl_async_taskq,
			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
		}
		ibt_hca = ibt_hca->ha_clnt_link;
	}

	/* wait for all clients to complete */
	while (hca_devp->hd_async_task_cnt != 0) {
		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
	}
	/* Go thru the clients and check if any have not closed this HCA. */
	retval = 0;
	ibt_hca = hca_devp->hd_clnt_list;
	while (ibt_hca != NULL) {
		clntp = ibt_hca->ha_clnt_devp;
		if (IBTL_GENERIC_CLIENT(clntp)) {
			IBTF_DPRINTF_L2(ibtf_handlers,
			    "ibtl_detach_all_clients: "
			    "client '%s' failed to close the HCA.",
			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
			retval = 1;
		}
		ibt_hca = ibt_hca->ha_clnt_link;
	}
	if (retval == 1)
		goto bailout;

	/* Next inform IBDM asynchronously */
	ibt_hca = hca_devp->hd_clnt_list;
	while (ibt_hca != NULL) {
		clntp = ibt_hca->ha_clnt_devp;
		if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) {
			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
			mutex_enter(&ibtl_async_mutex);
			ibt_hca->ha_async_cnt++;
			mutex_exit(&ibtl_async_mutex);
			hca_devp->hd_async_task_cnt++;

			mutex_exit(&ibtl_clnt_list_mutex);
			ibtl_hca_client_async_task(ibt_hca);
			mutex_enter(&ibtl_clnt_list_mutex);
			break;
		}
		ibt_hca = ibt_hca->ha_clnt_link;
	}

	/*
	 * Next inform IBCM.
	 * As IBCM doesn't perform ibt_open_hca(), IBCM will not be
	 * accessible via hca_devp->hd_clnt_list.
	 * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered.
	 */
	if (ibtl_cm_async_handler) {
		ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler,
		    ibtl_cm_clnt_private);

		/* wait for all tasks to complete */
		while (hca_devp->hd_async_task_cnt != 0)
			cv_wait(&hca_devp->hd_async_task_cv,
			    &ibtl_clnt_list_mutex);
	}

	/* Go thru the clients and check if any have not closed this HCA. */
	retval = 0;
	ibt_hca = hca_devp->hd_clnt_list;
	while (ibt_hca != NULL) {
		clntp = ibt_hca->ha_clnt_devp;
		if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) {
			IBTF_DPRINTF_L2(ibtf_handlers,
			    "ibtl_detach_all_clients: "
			    "client '%s' failed to close the HCA.",
			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
			retval = 1;
		}
		ibt_hca = ibt_hca->ha_clnt_link;
	}
	if (retval == 1)
		goto bailout;

	/* Finally, inform IBMA */
	ibt_hca = hca_devp->hd_clnt_list;
	while (ibt_hca != NULL) {
		clntp = ibt_hca->ha_clnt_devp;
		if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) {
			++ibt_hca->ha_clnt_devp->clnt_async_cnt;
			mutex_enter(&ibtl_async_mutex);
			ibt_hca->ha_async_cnt++;
			mutex_exit(&ibtl_async_mutex);
			hca_devp->hd_async_task_cnt++;

			(void) taskq_dispatch(ibtl_async_taskq,
			    ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP);
		} else
			IBTF_DPRINTF_L2(ibtf_handlers,
			    "ibtl_detach_all_clients: "
			    "client '%s' is unexpectedly on the client list",
			    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
		ibt_hca = ibt_hca->ha_clnt_link;
	}

	/* wait for IBMA to complete */
	while (hca_devp->hd_async_task_cnt != 0) {
		cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex);
	}

	/* Check if this HCA's client list is empty. */
	ibt_hca = hca_devp->hd_clnt_list;
	if (ibt_hca != NULL) {
		IBTF_DPRINTF_L2(ibtf_handlers,
		    "ibtl_detach_all_clients: "
		    "client '%s' failed to close the HCA.",
		    ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name);
		retval = 1;
	} else
		retval = 0;

bailout:
	if (retval) {
		hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */
		mutex_exit(&ibtl_clnt_list_mutex);
		ibtl_announce_new_hca(hca_devp);
		mutex_enter(&ibtl_clnt_list_mutex);
	} else {
		hca_devp->hd_async_busy = 0;
		cv_broadcast(&hca_devp->hd_async_busy_cv);
	}

	return (retval);
}

void
ibtl_free_clnt_async_check(ibtl_clnt_t *clntp)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp);

	ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex));

	/* wait for all asyncs based on "ibtl_clnt_list" to complete */
	while (clntp->clnt_async_cnt != 0) {
		cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex);
	}
}

static void
ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp)
{
	mutex_enter(&ibtl_clnt_list_mutex);
	if (--clntp->clnt_async_cnt == 0) {
		cv_broadcast(&ibtl_clnt_cv);
	}
	mutex_exit(&ibtl_clnt_list_mutex);
}

static void
ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp)
{
	mutex_enter(&ibtl_clnt_list_mutex);
	++clntp->clnt_async_cnt;
	mutex_exit(&ibtl_clnt_list_mutex);
}


/*
 * Functions and data structures to inform clients that a notification
 * has occurred about Multicast Groups that might interest them.
 */
struct ibtl_sm_notice {
	ibt_clnt_hdl_t		np_ibt_hdl;
	ib_gid_t		np_sgid;
	ibt_subnet_event_code_t	np_code;
	ibt_subnet_event_t	np_event;
};

static void
ibtl_sm_notice_task(void *arg)
{
	struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg;
	ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl;
	ibt_sm_notice_handler_t sm_notice_handler;

	sm_notice_handler = ibt_hdl->clnt_sm_trap_handler;
	if (sm_notice_handler != NULL)
		sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg,
		    noticep->np_sgid, noticep->np_code, &noticep->np_event);
	kmem_free(noticep, sizeof (*noticep));
	ibtl_dec_clnt_async_cnt(ibt_hdl);
}

/*
 * Inform the client that MCG notices are not working at this time.
 */
void
ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail)
{
	ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl;
	struct ibtl_sm_notice *noticep;
	ib_gid_t *sgidp = &ifail->smf_sgid[0];
	int i;

	for (i = 0; i < ifail->smf_num_sgids; i++) {
		_NOTE(NO_COMPETING_THREADS_NOW)
		noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
		noticep->np_ibt_hdl = ibt_hdl;
		noticep->np_sgid = *sgidp++;
		noticep->np_code = IBT_SM_EVENT_UNAVAILABLE;
#ifndef lint
		_NOTE(COMPETING_THREADS_NOW)
#endif
		ibtl_inc_clnt_async_cnt(ibt_hdl);
		(void) taskq_dispatch(ibtl_async_taskq,
		    ibtl_sm_notice_task, noticep, TQ_SLEEP);
	}
}

/*
 * Inform all clients of the event.
 */
void
ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code,
    ibt_subnet_event_t *event)
{
	_NOTE(NO_COMPETING_THREADS_NOW)
	struct ibtl_sm_notice	*noticep;
	ibtl_clnt_t		*clntp;

	mutex_enter(&ibtl_clnt_list_mutex);
	clntp = ibtl_clnt_list;
	while (clntp != NULL) {
		if (clntp->clnt_sm_trap_handler) {
			noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP);
			noticep->np_ibt_hdl = clntp;
			noticep->np_sgid = sgid;
			noticep->np_code = code;
			noticep->np_event = *event;
			++clntp->clnt_async_cnt;
			(void) taskq_dispatch(ibtl_async_taskq,
			    ibtl_sm_notice_task, noticep, TQ_SLEEP);
		}
		clntp = clntp->clnt_list_link;
	}
	mutex_exit(&ibtl_clnt_list_mutex);
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif
}

/*
 * Record the handler for this client.
 */
void
ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl,
    ibt_sm_notice_handler_t sm_notice_handler, void *private)
{
	_NOTE(NO_COMPETING_THREADS_NOW)
	ibt_hdl->clnt_sm_trap_handler = sm_notice_handler;
	ibt_hdl->clnt_sm_trap_handler_arg = private;
#ifndef lint
	_NOTE(COMPETING_THREADS_NOW)
#endif
}


/*
 * ibtl_another_cq_handler_in_thread()
 *
 * Conditionally increase the number of cq_threads.
 * The number of threads grows, based on the number of cqs using threads.
 *
 * The table below controls the number of threads as follows:
 *
 *	Number of CQs	Number of cq_threads
 *		0		0
 *		1		1
 *		2-3		2
 *		4-5		3
 *		6-9		4
 *		10-15		5
 *		16-23		6
 *		24-31		7
 *		32+		8
 */

#define	IBTL_CQ_MAXTHREADS 8
static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = {
	1, 2, 4, 6, 10, 16, 24, 32
};

static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS];

void
ibtl_another_cq_handler_in_thread(void)
{
	kthread_t *t;
	int my_idx;

	mutex_enter(&ibtl_cq_mutex);
	if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) ||
	    (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) {
		mutex_exit(&ibtl_cq_mutex);
		return;
	}
	my_idx = ibtl_cq_threads++;
	mutex_exit(&ibtl_cq_mutex);
	t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN,
	    ibtl_pri - 1);
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
	ibtl_cq_did[my_idx] = t->t_did;	/* save for thread_join() */
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
}

void
ibtl_thread_init(void)
{
	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()");

	mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL);
	cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL);
	cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL);

	mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL);
	cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL);
}

void
ibtl_thread_init2(void)
{
	int i;
	static int initted = 0;
	kthread_t *t;

	mutex_enter(&ibtl_async_mutex);
	if (initted == 1) {
		mutex_exit(&ibtl_async_mutex);
		return;
	}
	initted = 1;
	mutex_exit(&ibtl_async_mutex);
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did))
	ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t),
	    KM_SLEEP);

	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()");

	for (i = 0; i < ibtl_async_thread_init; i++) {
		t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0,
		    TS_RUN, ibtl_pri - 1);
		ibtl_async_did[i] = t->t_did; /* thread_join() */
	}
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did))
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
	for (i = 0; i < ibtl_cq_threads; i++) {
		t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0,
		    TS_RUN, ibtl_pri - 1);
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
		ibtl_cq_did[i] = t->t_did; /* save for thread_join() */
		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did))
	}
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
}

void
ibtl_thread_fini(void)
{
	int i;

	IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()");

	/* undo the work done by ibtl_thread_init() */

	mutex_enter(&ibtl_cq_mutex);
	ibtl_cq_thread_exit = IBTL_THREAD_EXIT;
	cv_broadcast(&ibtl_cq_cv);
	mutex_exit(&ibtl_cq_mutex);

	mutex_enter(&ibtl_async_mutex);
	ibtl_async_thread_exit = IBTL_THREAD_EXIT;
	cv_broadcast(&ibtl_async_cv);
	mutex_exit(&ibtl_async_mutex);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))
	for (i = 0; i < ibtl_cq_threads; i++)
		thread_join(ibtl_cq_did[i]);
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads))

	if (ibtl_async_did) {
		for (i = 0; i < ibtl_async_thread_init; i++)
			thread_join(ibtl_async_did[i]);

		kmem_free(ibtl_async_did,
		    ibtl_async_thread_init * sizeof (kt_did_t));
	}
	mutex_destroy(&ibtl_cq_mutex);
	cv_destroy(&ibtl_cq_cv);

	mutex_destroy(&ibtl_async_mutex);
	cv_destroy(&ibtl_async_cv);
	cv_destroy(&ibtl_clnt_cv);
}

/* ARGSUSED */
ibt_status_t ibtl_dummy_node_info_cb(ib_guid_t hca_guid, uint8_t port,
    ib_lid_t lid, ibt_node_info_t *node_info)
{
	return (IBT_SUCCESS);
}