/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 *
 * UDAPL kernel agent
 */

#include <sys/types.h>
#include <sys/errno.h>
#include <sys/debug.h>
#include <sys/stropts.h>
#include <sys/stream.h>
#include <sys/strlog.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/stat.h>
#include <sys/modctl.h>
#include <sys/kstat.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/strsun.h>
#include <sys/taskq.h>
#include <sys/open.h>
#include <sys/uio.h>
#include <sys/cpuvar.h>
#include <sys/atomic.h>
#include <sys/sysmacros.h>
#include <sys/esunddi.h>
#include <sys/avl.h>
#include <sys/cred.h>
#include <sys/note.h>
#include <sys/ib/ibtl/ibti.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <daplt_if.h>
#include <daplt.h>

/*
 * The following variables support the debug log buffer scheme.
 */
#ifdef	DEBUG
static char daplka_dbgbuf[0x80000];
#else /* DEBUG */
static char daplka_dbgbuf[0x4000];
#endif /* DEBUG */
static int daplka_dbgsize = sizeof (daplka_dbgbuf);
static size_t daplka_dbgnext;
static int daplka_dbginit = 0;
static kmutex_t daplka_dbglock;
_NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
    daplka_dbgbuf
    daplka_dbgnext))

static int daplka_dbg = 0x0103;
static void daplka_console(const char *, ...);
static void daplka_debug(const char *, ...);
static int daplka_apm = 0x1;			/* default enable */
static int daplka_failback = 0x1;		/* default enable */
static int daplka_query_aft_setaltpath = 10;

#define	DERR				\
	if (daplka_dbg & 0x100) 	\
	    daplka_debug

#ifdef DEBUG

#define	DINFO				\
	daplka_console

#define	D1				\
	if (daplka_dbg & 0x01)		\
	    daplka_debug
#define	D2				\
	if (daplka_dbg & 0x02) 		\
	    daplka_debug
#define	D3				\
	if (daplka_dbg & 0x04) 		\
	    daplka_debug
#define	D4				\
	if (daplka_dbg & 0x08) 		\
	    daplka_debug

#else /* DEBUG */

#define	DINFO	if (0) printf
#define	D1	if (0) printf
#define	D2	if (0) printf
#define	D3	if (0) printf
#define	D4	if (0) printf

#endif /* DEBUG */

/*
 * driver entry points
 */
static int daplka_open(dev_t *, int, int, struct cred *);
static int daplka_close(dev_t, int, int, struct cred *);
static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);

/*
 * types of ioctls
 */
static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);

/*
 * common ioctls and supporting functions
 */
static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
static int daplka_ia_destroy(daplka_resource_t *);

/*
 * EP ioctls and supporting functions
 */
static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ep_destroy(daplka_resource_t *);
static void daplka_hash_ep_free(void *);
static int daplka_ep_failback(void *objp, void *arg);
static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);

static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
static void daplka_timer_info_free(daplka_timer_info_t *);
static void daplka_timer_handler(void *);
static void daplka_timer_dispatch(void *);
static void daplka_timer_thread(void *);
static int daplka_cancel_timer(daplka_ep_resource_t *);
static void daplka_hash_timer_free(void *);

/*
 * EVD ioctls and supporting functions
 */
static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_evd_destroy(daplka_resource_t *);
static void daplka_cq_handler(ibt_cq_hdl_t, void *);
static void daplka_evd_wakeup(daplka_evd_resource_t *,
    daplka_evd_event_list_t *, daplka_evd_event_t *);
static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
    daplka_evd_event_t *);
static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
static void daplka_hash_evd_free(void *);


/*
 * SRQ ioctls and supporting functions
 */
static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_srq_destroy(daplka_resource_t *);
static void daplka_hash_srq_free(void *);

/*
 * Miscellaneous ioctls
 */
static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);

/*
 * PD ioctls and supporting functions
 */
static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_pd_destroy(daplka_resource_t *);
static void daplka_hash_pd_free(void *);

/*
 * SP ioctls and supporting functions
 */
static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_sp_destroy(daplka_resource_t *);
static void daplka_hash_sp_free(void *);
static void daplka_hash_sp_unref(void *);

/*
 * MR ioctls and supporting functions
 */
static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mr_destroy(daplka_resource_t *);
static void daplka_hash_mr_free(void *);
static void daplka_shared_mr_free(daplka_mr_resource_t *);

/*
 * MW ioctls and supporting functions
 */
static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_mw_destroy(daplka_resource_t *);
static void daplka_hash_mw_free(void *);

/*
 * CNO ioctls and supporting functions
 */
static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
    cred_t *, int *);
static int daplka_cno_destroy(daplka_resource_t *);
static void daplka_hash_cno_free(void *);

/*
 * CM handlers
 */
static  ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
    ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);

static  ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
    ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);

static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
    ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);

/*
 * resource management routines
 */
static int daplka_resource_reserve(minor_t *);
static int daplka_resource_insert(minor_t, daplka_resource_t *);
static daplka_resource_t *daplka_resource_remove(minor_t rnum);
static daplka_resource_t *daplka_resource_lookup(minor_t);
static void daplka_resource_init(void);
static void daplka_resource_fini(void);
static struct daplka_resource_table daplka_resource;

/*
 * hash table routines
 */
static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
    void *, krw_t);
static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
static int daplka_hash_create(daplka_hash_table_t *, uint_t,
    void (*)(void *), void (*)(void *));
static void daplka_hash_destroy(daplka_hash_table_t *);
static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
static void daplka_hash_generic_lookup(void *);

static uint32_t daplka_timer_hkey_gen();

/*
 * async event handlers
 */
static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
    uint64_t, daplka_ia_resource_t *);
static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
    ibt_async_event_t *);
static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
    ibt_async_event_t *);
static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
    ibt_async_event_t *);
static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
    ibt_async_event_t *);
static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
    ibt_subnet_event_t *event);
static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);

/*
 * IBTF wrappers and default limits used for resource accounting
 */
static boolean_t	daplka_accounting_enabled = B_TRUE;
static uint32_t		daplka_max_qp_percent = 100;
static uint32_t		daplka_max_cq_percent = 100;
static uint32_t		daplka_max_pd_percent = 100;
static uint32_t		daplka_max_mw_percent = 100;
static uint32_t		daplka_max_mr_percent = 100;
static uint32_t		daplka_max_srq_percent = 100;

static ibt_status_t
daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
    ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
    ibt_channel_hdl_t *, ibt_chan_sizes_t *);

static ibt_status_t
daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);

static ibt_status_t
daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
    ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);

static ibt_status_t
daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);

static ibt_status_t
daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
    ibt_pd_flags_t, ibt_pd_hdl_t *);

static ibt_status_t
daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);

static ibt_status_t
daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
    ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);

static ibt_status_t
daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);

static ibt_status_t
daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
    ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);

static ibt_status_t
daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
    ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
    ibt_mr_desc_t *);

static ibt_status_t
daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);

static ibt_status_t
daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
    ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);

static ibt_status_t
daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);

/*
 * macros for manipulating resource objects.
 * these macros can be used on objects that begin with a
 * daplka_resource_t header.
 */
#define	DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)

#define	DAPLKA_RS_REF(rp) {			\
	mutex_enter(&(rp)->header.rs_reflock);	\
	(rp)->header.rs_refcnt++;		\
	ASSERT((rp)->header.rs_refcnt != 0);	\
	mutex_exit(&(rp)->header.rs_reflock);	\
}

#define	DAPLKA_RS_UNREF(rp) {					\
	mutex_enter(&(rp)->header.rs_reflock);			\
	ASSERT((rp)->header.rs_refcnt != 0);			\
	if (--(rp)->header.rs_refcnt == 0) {			\
		ASSERT((rp)->header.rs_free != NULL);		\
		mutex_exit(&(rp)->header.rs_reflock);		\
		(rp)->header.rs_free((daplka_resource_t *)rp);	\
	} else {						\
		mutex_exit(&(rp)->header.rs_reflock);		\
	}							\
}

#define	DAPLKA_RS_INIT(rp, type, rnum, free_func) {	\
	(rp)->header.rs_refcnt = 1;			\
	(rp)->header.rs_type = (type);			\
	(rp)->header.rs_rnum = (rnum); 			\
	(rp)->header.rs_charged = 0;			\
	(rp)->header.rs_free = (free_func);		\
	mutex_init(&(rp)->header.rs_reflock, NULL,	\
	    MUTEX_DRIVER, NULL);			\
}

#define	DAPLKA_RS_FINI(rp) {				\
	mutex_destroy(&(rp)->header.rs_reflock);	\
}

#define	DAPLKA_RS_ACCT_INC(rp, cnt) {				\
	atomic_add_32(&(rp)->header.rs_charged, (cnt));		\
}
#define	DAPLKA_RS_ACCT_DEC(rp, cnt) {				\
	atomic_add_32(&(rp)->header.rs_charged, -(cnt));	\
}
#define	DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)

#define	DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
#define	DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
#define	DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)

/*
 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
 */
#define	DAPLKA_EVD_WAIT(cvp, mp, timeout)			\
	((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) :	\
	cv_timedwait_sig((cvp), (mp), (timeout))

#define	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt++)
#define	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt--)

#define	DAPLKA_HOLD_HCA(dp, hca) {			\
	mutex_enter(&(dp)->daplka_mutex);		\
	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);		\
	mutex_exit(&(dp)->daplka_mutex);		\
}

#define	DAPLKA_RELE_HCA(dp, hca) {			\
	mutex_enter(&(dp)->daplka_mutex);		\
	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca);		\
	mutex_exit(&(dp)->daplka_mutex);		\
}

#define	DAPLKA_HCA_BUSY(hca)				\
	((hca)->hca_ref_cnt != 0 ||			\
	(hca)->hca_qp_count != 0 ||			\
	(hca)->hca_cq_count != 0 ||			\
	(hca)->hca_pd_count != 0 ||			\
	(hca)->hca_mw_count != 0 ||			\
	(hca)->hca_mr_count != 0)


static struct cb_ops daplka_cb_ops = {
	daplka_open,		/* cb_open */
	daplka_close,		/* cb_close */
	nodev,			/* cb_strategy */
	nodev,			/* cb_print */
	nodev,			/* cb_dump */
	nodev,			/* cb_read */
	nodev,			/* cb_write */
	daplka_ioctl,		/* cb_ioctl */
	nodev,			/* cb_devmap */
	nodev,			/* cb_mmap */
	nodev,			/* cb_segmap */
	nochpoll,		/* cb_chpoll */
	ddi_prop_op,		/* cb_prop_op */
	NULL,			/* cb_stream */
	D_NEW | D_MP,		/* cb_flag */
	CB_REV,			/* rev */
	nodev,			/* int (*cb_aread)() */
	nodev			/* int (*cb_awrite)() */
};

static struct dev_ops daplka_ops = {
	DEVO_REV,		/* devo_rev */
	0,			/* devo_refcnt */
	daplka_info,		/* devo_getinfo */
	nulldev,		/* devo_identify */
	nulldev,		/* devo_probe */
	daplka_attach,		/* devo_attach */
	daplka_detach,		/* devo_detach */
	nodev,			/* devo_reset */
	&daplka_cb_ops,		/* devo_cb_ops */
	(struct bus_ops *)NULL,	/* devo_bus_ops */
	nulldev,		/* power */
	ddi_quiesce_not_needed,	/* devo_quiesce */
};

/*
 * Module linkage information for the kernel.
 */
static struct modldrv modldrv = {
	&mod_driverops,
	"uDAPL Service Driver",
	&daplka_ops,
};

static struct modlinkage modlinkage = {
#ifdef _LP64
	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
#else
	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
#endif
};

/*
 * daplka_dev holds global driver state and a list of HCAs
 */
static daplka_t *daplka_dev = NULL;
static void *daplka_state = NULL;

/*
 * global SP hash table
 */
static daplka_hash_table_t daplka_global_sp_htbl;

/*
 * timer_info hash table
 */
static daplka_hash_table_t daplka_timer_info_htbl;
static uint32_t daplka_timer_hkey = 0;

/*
 * shared MR avl tree
 */
static avl_tree_t daplka_shared_mr_tree;
static kmutex_t daplka_shared_mr_lock;
static int daplka_shared_mr_cmp(const void *, const void *);
_NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
    daplka_shared_mr_tree))

/*
 * default kmem flags used by this driver
 */
static int daplka_km_flags = KM_SLEEP;

/*
 * taskq used for handling background tasks
 */
static taskq_t *daplka_taskq = NULL;

/*
 * daplka_cm_delay is the length of time the active
 * side needs to wait before timing out on the REP message.
 */
static clock_t daplka_cm_delay = 60000000;

/*
 * modunload will fail if pending_close is non-zero
 */
static uint32_t daplka_pending_close = 0;

static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
	IBTI_V_CURR,
	IBT_USER,
	daplka_async_handler,
	NULL,
	DAPLKA_DRV_NAME
};

/*
 * Module Installation
 */
int
_init(void)
{
	int status;

	status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
	if (status != 0) {
		return (status);
	}

	mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
	bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
	daplka_dbgnext = 0;
	daplka_dbginit = 1;

	daplka_resource_init();

	status = mod_install(&modlinkage);
	if (status != DDI_SUCCESS) {
		/* undo inits done before mod_install */
		daplka_resource_fini();
		mutex_destroy(&daplka_dbglock);
		ddi_soft_state_fini(&daplka_state);
	}
	return (status);
}

/*
 * Module Removal
 */
int
_fini(void)
{
	int	status;

	/*
	 * mod_remove causes detach to be called
	 */
	if ((status = mod_remove(&modlinkage)) != 0) {
		DERR("fini: mod_remove failed: 0x%x\n", status);
		return (status);
	}

	daplka_resource_fini();
	mutex_destroy(&daplka_dbglock);
	ddi_soft_state_fini(&daplka_state);

	return (status);
}

/*
 * Return Module Info.
 */
int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

static void
daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
{
	daplka_hca_t *h;

	ASSERT(mutex_owned(&dp->daplka_mutex));

	if (dp->daplka_hca_list_head == NULL) {
		dp->daplka_hca_list_head = hca;
	} else {
		h = dp->daplka_hca_list_head;
		while (h->hca_next != NULL)
			h = h->hca_next;

		h->hca_next = hca;
	}
}

static void
daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
{
	daplka_hca_t *h;

	ASSERT(mutex_owned(&dp->daplka_mutex));

	if (dp->daplka_hca_list_head == hca)
		dp->daplka_hca_list_head = hca->hca_next;
	else {
		h = dp->daplka_hca_list_head;
		while (h->hca_next != hca)
			h = h->hca_next;
		h->hca_next = hca->hca_next;
	}
}

static int
daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
{
	daplka_hca_t		*hca;
	ibt_hca_portinfo_t	*pinfop;
	uint_t			size;
	int			j;
	ibt_status_t		status;

	hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);

	hca->hca_guid = hca_guid;

	/*
	 * open the HCA for use
	 */
	status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
	if (status != IBT_SUCCESS) {
		if (status == IBT_HCA_IN_USE) {
			DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
		} else {
			DERR("ibt_open_hca() returned %d\n", status);
		}
		kmem_free(hca, sizeof (daplka_hca_t));
		return (status);
	}

	/*
	 * query HCA to get its info
	 */
	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
	if (status != IBT_SUCCESS) {
		DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
		    status, (longlong_t)hca_guid);
		goto out;
	}

	/*
	 * query HCA to get info of all ports
	 */
	status = ibt_query_hca_ports(hca->hca_hdl,
	    0, &pinfop, &hca->hca_nports, &size);
	if (status != IBT_SUCCESS) {
		DERR("ibt_query_all_ports returned %d "
		    "(hca_guid 0x%llx)\n", status,
		    (longlong_t)hca_guid);
		goto out;
	}
	hca->hca_ports = pinfop;
	hca->hca_pinfosz = size;

	DERR("hca guid 0x%llx, nports %d\n",
	    (longlong_t)hca_guid, hca->hca_nports);
	for (j = 0; j < hca->hca_nports; j++) {
		DERR("port %d: state %d prefix 0x%016llx "
		    "guid %016llx\n",
		    pinfop[j].p_port_num, pinfop[j].p_linkstate,
		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
	}

	mutex_enter(&dp->daplka_mutex);
	daplka_enqueue_hca(dp, hca);
	mutex_exit(&dp->daplka_mutex);

	return (IBT_SUCCESS);

out:
	(void) ibt_close_hca(hca->hca_hdl);
	kmem_free(hca, sizeof (daplka_hca_t));
	return (status);
}

/*
 * this function obtains the list of HCAs from IBTF.
 * the HCAs are then opened and the returned handles
 * and attributes are stored into the global daplka_dev
 * structure.
 */
static int
daplka_init_hcas(daplka_t *dp)
{
	int		i;
	ib_guid_t	*hca_guids;
	uint32_t	hca_count;

	/*
	 * get the num & list of HCAs present
	 */
	hca_count = ibt_get_hca_list(&hca_guids);
	DERR("No. of HCAs present %d\n", hca_count);

	if (hca_count != 0) {
		/*
		 * get the info for each available HCA
		 */
		for (i = 0; i < hca_count; i++)
			(void) daplka_init_hca(dp, hca_guids[i]);

		ibt_free_hca_list(hca_guids, hca_count);
	}

	if (dp->daplka_hca_list_head != NULL)
		return (IBT_SUCCESS);
	else
		return (IBT_FAILURE);
}

static int
daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
{
	ibt_status_t	status;

	if (hca->hca_hdl != NULL) {
		status = ibt_close_hca(hca->hca_hdl);
		if (status != IBT_SUCCESS) {
			DERR("ibt_close_hca returned %d"
			    " (hca_guid 0x%llx)\n", status,
			    (longlong_t)hca->hca_guid);

			mutex_enter(&dp->daplka_mutex);
			daplka_enqueue_hca(dp, hca);
			mutex_exit(&dp->daplka_mutex);

			return (status);
		}
	}

	if (hca->hca_ports != NULL)
		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);

	kmem_free(hca, sizeof (daplka_hca_t));
	return (IBT_SUCCESS);
}

/*
 * closes all HCAs and frees up the HCA list
 */
static int
daplka_fini_hcas(daplka_t *dp)
{
	ibt_status_t	status;
	daplka_hca_t	*hca;

	mutex_enter(&daplka_dev->daplka_mutex);
	while ((hca = dp->daplka_hca_list_head) != NULL) {
		if (DAPLKA_HCA_BUSY(hca)) {
			mutex_exit(&daplka_dev->daplka_mutex);
			return (IBT_HCA_RESOURCES_NOT_FREED);
		}
		daplka_dequeue_hca(daplka_dev, hca);
		mutex_exit(&daplka_dev->daplka_mutex);

		if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
			return (status);

		mutex_enter(&daplka_dev->daplka_mutex);
	}
	mutex_exit(&daplka_dev->daplka_mutex);

	DERR("dapl kernel agent unloaded\n");
	return (IBT_SUCCESS);
}


/*
 * Attach the device, create and fill in daplka_dev
 */
static int
daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	daplka_t	*dp;
	int		instance, retval, err;
	boolean_t	sp_htbl_allocated = B_FALSE;
	boolean_t	timer_htbl_allocated = B_FALSE;
	boolean_t	shared_mr_tree_allocated = B_FALSE;

	switch (cmd) {
	case DDI_ATTACH:
		break;
	case DDI_RESUME:
		return (DDI_SUCCESS);
	default:
		return (DDI_FAILURE);
	}

	/*
	 * Allocate soft data structure
	 */
	instance = ddi_get_instance(dip);
	if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
		DERR("attach: bad state zalloc\n");
		return (DDI_FAILURE);
	}

	dp = ddi_get_soft_state(daplka_state, instance);
	if (dp == NULL) {
		ddi_soft_state_free(daplka_state, instance);
		DERR("attach: cannot get soft state\n");
		return (DDI_FAILURE);
	}
	/*
	 * Stuff private info into dip.
	 */
	dp->daplka_dip = dip;
	ddi_set_driver_private(dip, dp);
	daplka_dev = dp;
	mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);

	/*
	 * Register driver with IBTF
	 */
	retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
	    &dp->daplka_clnt_hdl);
	if (retval != IBT_SUCCESS) {
		DERR("attach: ibt_attach failed: error = %d\n", retval);
		retval = DDI_FAILURE;
		goto error;
	}
	/* Register to receive SM events */
	ibt_register_subnet_notices(dp->daplka_clnt_hdl,
	    daplka_sm_notice_handler, NULL);

	retval = daplka_init_hcas(dp);
	if (retval != IBT_SUCCESS) {
		DERR("attach: hca_init failed: error = %d\n", retval);
		retval = DDI_FAILURE;
		goto error;
	}
	/*
	 * this table is used by cr_handoff
	 */
	retval = daplka_hash_create(&daplka_global_sp_htbl,
	    DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
	    daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("attach: cannot create sp hash table\n");
		retval = DDI_FAILURE;
		goto error;
	}
	sp_htbl_allocated = B_TRUE;

	/*
	 * this table stores per EP timer information.
	 * timer_info_t objects are inserted into this table whenever
	 * a EP timer is set. timers get removed when they expire
	 * or when they get cancelled.
	 */
	retval = daplka_hash_create(&daplka_timer_info_htbl,
	    DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
	if (retval != 0) {
		DERR("attach: cannot create timer hash table\n");
		retval = DDI_FAILURE;
		goto error;
	}
	timer_htbl_allocated = B_TRUE;

	/*
	 * this taskq is currently only used for processing timers.
	 * other processing may also use this taskq in the future.
	 */
	daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
	    maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
	if (daplka_taskq == NULL) {
		DERR("attach: cannot create daplka_taskq\n");
		retval = DDI_FAILURE;
		goto error;
	}

	/*
	 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
	 * gets retrieved or created when daplka_mr_register_shared is
	 * called.
	 */
	mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);

	avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
	    sizeof (daplka_shared_mr_t),
	    offsetof(daplka_shared_mr_t, smr_node));
	shared_mr_tree_allocated = B_TRUE;

	/*
	 * Create the filesystem device node.
	 */
	if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
	    0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
		DERR("attach: bad create_minor_node\n");
		retval = DDI_FAILURE;
		goto error;
	}
	dp->daplka_status = DAPLKA_STATE_ATTACHED;
	ddi_report_dev(dip);
	return (DDI_SUCCESS);

error:
	if (shared_mr_tree_allocated) {
		avl_destroy(&daplka_shared_mr_tree);
		mutex_destroy(&daplka_shared_mr_lock);
	}

	if (daplka_taskq) {
		taskq_destroy(daplka_taskq);
		daplka_taskq = NULL;
	}

	if (timer_htbl_allocated) {
		daplka_hash_destroy(&daplka_timer_info_htbl);
	}

	if (sp_htbl_allocated) {
		daplka_hash_destroy(&daplka_global_sp_htbl);
	}

	err = daplka_fini_hcas(dp);
	if (err != IBT_SUCCESS) {
		DERR("attach: hca_fini returned %d\n", err);
	}

	if (dp->daplka_clnt_hdl != NULL) {
		/* unregister SM event notification */
		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
		    (ibt_sm_notice_handler_t)NULL, NULL);
		err = ibt_detach(dp->daplka_clnt_hdl);

		if (err != IBT_SUCCESS) {
			DERR("attach: ibt_detach returned %d\n", err);
		}
	}
	mutex_destroy(&dp->daplka_mutex);

	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
		ddi_remove_minor_node(dip, NULL);
	}
	ddi_soft_state_free(daplka_state, instance);
	return (retval);
}

/*
 * Detach - Free resources allocated in attach
 */
/* ARGSUSED */
static int
daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	int		instance, err;
	void		*cookie = NULL;
	daplka_t	*dp;

	if (cmd != DDI_DETACH) {
		return (DDI_FAILURE);
	}
	if (daplka_resource.daplka_rc_cnt > 0 ||
	    daplka_pending_close > 0) {
		DERR("detach: driver in use\n");
		return (DDI_FAILURE);
	}

	instance = ddi_get_instance(dip);
	dp = ddi_get_soft_state(daplka_state, instance);
	if (dp == NULL) {
		DERR("detach: cannot get soft state\n");
		return (DDI_FAILURE);
	}
	err = daplka_fini_hcas(dp);
	if (err != IBT_SUCCESS) {
		DERR("detach: hca_fini returned %d\n", err);
		return (DDI_FAILURE);
	}
	if (dp->daplka_clnt_hdl != NULL) {
		/* unregister SM event notification */
		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
		    (ibt_sm_notice_handler_t)NULL, NULL);
		err = ibt_detach(dp->daplka_clnt_hdl);
		if (err != IBT_SUCCESS) {
			DERR("detach: ibt_detach returned %d\n", err);
			return (DDI_FAILURE);
		}
		dp->daplka_clnt_hdl = NULL;
	}
	mutex_destroy(&dp->daplka_mutex);
	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
		ddi_remove_minor_node(dip, NULL);
	}
	dp->daplka_status = DAPLKA_STATE_DETACHED;
	ddi_soft_state_free(daplka_state, instance);
	daplka_dev = NULL;

	/*
	 * by the time we get here, all clients of dapl should
	 * have exited and completed their cleanup properly.
	 * we can assert that all global data structures are now
	 * empty.
	 */
	ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
	avl_destroy(&daplka_shared_mr_tree);
	mutex_destroy(&daplka_shared_mr_lock);

	ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
	daplka_hash_destroy(&daplka_timer_info_htbl);

	ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
	daplka_hash_destroy(&daplka_global_sp_htbl);

	taskq_destroy(daplka_taskq);

	return (DDI_SUCCESS);
}

/* ARGSUSED */
static int
daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
	switch (infocmd) {
	case DDI_INFO_DEVT2DEVINFO:
		if (daplka_dev !=  NULL) {
			*result = daplka_dev->daplka_dip;
			return (DDI_SUCCESS);
		} else {
			return (DDI_FAILURE);
		}

	case DDI_INFO_DEVT2INSTANCE:
		*result = 0;
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}
}

/*
 * creates a EP resource.
 * A EP resource contains a RC channel. A EP resource holds a
 * reference to a send_evd (for the send CQ), recv_evd (for the
 * recv CQ), a connection evd and a PD. These references ensure
 * that the referenced resources are not freed until the EP itself
 * gets freed.
 */
/* ARGSUSED */
static int
daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t		*ep_rp;
	daplka_pd_resource_t		*pd_rp;
	dapl_ep_create_t		args;
	ibt_rc_chan_alloc_args_t	chan_args;
	ibt_chan_alloc_flags_t		achan_flags;
	ibt_chan_sizes_t		chan_real_sizes;
	ibt_hca_attr_t			*hca_attrp;
	uint64_t			ep_hkey = 0;
	boolean_t			inserted = B_FALSE;
	uint32_t			old_state, new_state;
	int				retval;
	ibt_status_t			status;

	D3("ep_create: enter\n");
	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
	    mode);
	if (retval != 0) {
		DERR("ep_create: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
	if (ep_rp == NULL) {
		DERR("ep_create: cannot allocate ep_rp\n");
		return (ENOMEM);
	}
	DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
	    DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);

	mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
	ep_rp->ep_hca = ia_rp->ia_hca;
	ep_rp->ep_cookie = args.ep_cookie;
	ep_rp->ep_timer_hkey = 0;

	/*
	 * we don't have to use ep_get_state here because ep_rp is not in
	 * ep_htbl yet. refer to the description of daplka_ep_set_state
	 * for details about the EP state machine.
	 */
	ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
	new_state = old_state = DAPLKA_EP_STATE_CLOSED;

	/* get reference to send evd and get cq handle */
	ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
	if (ep_rp->ep_snd_evd == NULL) {
		DERR("ep_create: ep_snd_evd %llx not found\n",
		    args.ep_snd_evd_hkey);
		retval = EINVAL;
		goto cleanup;
	}
	chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
	if (chan_args.rc_scq == NULL) {
		DERR("ep_create: ep_snd_evd cq invalid\n");
		retval = EINVAL;
		goto cleanup;
	}

	/* get reference to recv evd and get cq handle */
	ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
	if (ep_rp->ep_rcv_evd == NULL) {
		DERR("ep_create: ep_rcv_evd %llx not found\n",
		    args.ep_rcv_evd_hkey);
		retval = EINVAL;
		goto cleanup;
	}
	chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
	if (chan_args.rc_rcq == NULL) {
		DERR("ep_create: ep_rcv_evd cq invalid\n");
		retval = EINVAL;
		goto cleanup;
	}

	/* get reference to conn evd */
	ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
	if (ep_rp->ep_conn_evd == NULL) {
		DERR("ep_create: ep_conn_evd %llx not found\n",
		    args.ep_conn_evd_hkey);
		retval = EINVAL;
		goto cleanup;
	}

	/* get reference to SRQ if needed */
	if (args.ep_srq_attached) {
		ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
		    &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
		if (ep_rp->ep_srq_res == NULL) {
			DERR("ep_create: ep_srq %llx not found\n",
			    (longlong_t)args.ep_srq_hkey);
			retval = EINVAL;
			goto cleanup;
		}
		ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
		D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
		    (longlong_t)args.ep_srq_hkey);
	} else {
		ep_rp->ep_srq_res = NULL;
	}

	/* get pd handle */
	pd_rp = (daplka_pd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
	if (pd_rp == NULL) {
		DERR("ep_create: cannot find pd resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	ep_rp->ep_pd_res = pd_rp;
	chan_args.rc_pd = pd_rp->pd_hdl;


	/*
	 * these checks ensure that the requested channel sizes
	 * are within the limits supported by the chosen HCA.
	 */
	hca_attrp = &ia_rp->ia_hca->hca_attr;
	if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
		DERR("ep_create: invalid cs_sq_sgl %d\n",
		    args.ep_ch_sizes.dcs_sq_sgl);
		retval = EINVAL;
		goto cleanup;
	}
	if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
		DERR("ep_create: invalid cs_rq_sgl %d\n",
		    args.ep_ch_sizes.dcs_rq_sgl);
		retval = EINVAL;
		goto cleanup;
	}
	if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
		DERR("ep_create: invalid cs_sq %d\n",
		    args.ep_ch_sizes.dcs_sq);
		retval = EINVAL;
		goto cleanup;
	}
	if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
		DERR("ep_create: invalid cs_rq %d\n",
		    args.ep_ch_sizes.dcs_rq);
		retval = EINVAL;
		goto cleanup;
	}

	chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
	chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
	chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
	chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
	chan_args.rc_flags = IBT_WR_SIGNALED;
	chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
	chan_args.rc_hca_port_num = ia_rp->ia_port_num;
	chan_args.rc_clone_chan = NULL;
	if (args.ep_srq_attached) {
		chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
	} else {
		chan_args.rc_srq = NULL;
	}

	D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
	    "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
	    args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
	    args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
	    chan_args.rc_flags, chan_args.rc_control,
	    chan_args.rc_hca_port_num, chan_args.rc_clone_chan);

	if (args.ep_srq_attached) {
		achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
	} else {
		achan_flags = IBT_ACHAN_USER_MAP;
	}
	/* create rc channel */
	status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
	    achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
	    &chan_real_sizes);
	if (status != IBT_SUCCESS) {
		DERR("ep_create: alloc_rc_channel returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
	args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
	args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
	args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;

	/*
	 * store ep ptr with chan_hdl.
	 * this ep_ptr is used by the CM handlers (both active and
	 * passive)
	 * mutex is only needed for race of "destroy" and "async"
	 */
	mutex_enter(&daplka_dev->daplka_mutex);
	ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
	mutex_exit(&daplka_dev->daplka_mutex);

	/* Get HCA-specific data_out info */
	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
	    &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));

	if (status != IBT_SUCCESS) {
		DERR("ep_create: ibt_ci_data_out error(%d)\n",
		    status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/* insert into ep hash table */
	retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
	    &ep_hkey, (void *)ep_rp);
	if (retval != 0) {
		DERR("ep_create: cannot insert ep resource into ep_htbl\n");
		goto cleanup;
	}
	inserted = B_TRUE;

	/*
	 * at this point, the ep_rp can be looked up by other threads
	 * if they manage to guess the correct hkey. but they are not
	 * permitted to operate on ep_rp until we transition to the
	 * CLOSED state.
	 */

	/* return hkey to library */
	args.ep_hkey = ep_hkey;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
	    mode);
	if (retval != 0) {
		DERR("ep_create: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

	daplka_ep_set_state(ep_rp, old_state, new_state);
	D3("ep_create: exit\n");
	return (0);

cleanup:
	if (inserted) {
		daplka_ep_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
		    (void **)&free_rp);
		if (free_rp != ep_rp) {
			/*
			 * this case is impossible because ep_free will
			 * wait until our state transition is complete.
			 */
			DERR("ep_create: cannot remove ep from hash table\n");
			ASSERT(B_FALSE);
			return (retval);
		}
	}
	new_state = DAPLKA_EP_STATE_FREED;
	daplka_ep_set_state(ep_rp, old_state, new_state);
	DAPLKA_RS_UNREF(ep_rp);
	return (retval);
}

/*
 * daplka_ep_get_state retrieves the current state of the EP and
 * sets the state to TRANSITIONING. if the current state is already
 * TRANSITIONING, this function will wait until the state becomes one
 * of the other EP states. Most of the EP related ioctls follow the
 * call sequence:
 *
 *	new_state = old_state = daplka_ep_get_state(ep_rp);
 *	...
 *	...some code that affects the EP
 *	...
 *	new_state = <NEW_STATE>;
 *	daplka_ep_set_state(ep_rp, old_state, new_state);
 *
 * this call sequence ensures that only one thread may access the EP
 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
 * transitions ep_state to new_state and wakes up any waiters blocking
 * on ep_cv.
 *
 */
static uint32_t
daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
{
	uint32_t	old_state = 0;

	mutex_enter(&ep_rp->ep_lock);
	while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
		D2("get_state: wait for state transition to complete\n");
		cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
		D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
	}
	ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
	old_state = ep_rp->ep_state;

	/*
	 * an ep that is in the FREED state cannot transition
	 * back to any of the regular states
	 */
	if (old_state != DAPLKA_EP_STATE_FREED) {
		ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
	}
	mutex_exit(&ep_rp->ep_lock);
	return (old_state);
}

/*
 * EP state transition diagram
 *
 *              CLOSED<-------------------
 *                |                      |
 *                |                      |
 *     ------------------------          |
 *     |                      |          |
 *     |                      |          |
 *     v                      v          |
 *   CONNECTING       ACCEPTING          |
 *     |  |   |       |       |          |
 *     |  |   |       |       |          |
 *     |  |   |       |       |          |
 *     |  |   |_______|_______|          |
 *     |  |           |   |   |          |
 *     |  |___________|   |   |          |
 *     |        |         |   |          |
 *     |        v         |   |---->DISCONNECTED
 *     |     CONNECTED    |              ^
 *     v        |         |              |
 *    ABORTING  |---------|--------------|
 *     |        |         |              |
 *     |        |         v              |
 *     |        |-------->DISCONNECTING--|
 *     |                                 |
 *     |---------------------------------|
 *
 *	*not shown in this diagram:
 *	    -loopback transitions
 *	    -transitions to the FREED state
 */
static boolean_t
daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
{
	boolean_t valid = B_FALSE;

	/*
	 * reseting to the same state is a no-op and is always
	 * permitted. transitioning to the FREED state indicates
	 * that the ep is about to be freed and no further operation
	 * is allowed on it. to support abrupt close, the ep is
	 * permitted to transition to the FREED state from any state.
	 */
	if (old_state == new_state ||
	    new_state == DAPLKA_EP_STATE_FREED) {
		return (B_TRUE);
	}

	switch (old_state) {
	case DAPLKA_EP_STATE_CLOSED:
		/*
		 * this is the initial ep_state.
		 * a transition to CONNECTING or ACCEPTING may occur
		 * upon calling daplka_ep_connect or daplka_cr_accept,
		 * respectively.
		 */
		if (new_state == DAPLKA_EP_STATE_CONNECTING ||
		    new_state == DAPLKA_EP_STATE_ACCEPTING) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_CONNECTING:
		/*
		 * we transition to this state if daplka_ep_connect
		 * is successful. from this state, we can transition
		 * to CONNECTED if daplka_cm_rc_conn_est gets called;
		 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
		 * daplka_cm_rc_event_failure gets called. If the
		 * client calls daplka_ep_disconnect, we transition
		 * to DISCONNECTING. If a timer was set at ep_connect
		 * time and if the timer expires prior to any of the
		 * CM callbacks, we transition to ABORTING and then
		 * to DISCONNECTED.
		 */
		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
		    new_state == DAPLKA_EP_STATE_DISCONNECTED ||
		    new_state == DAPLKA_EP_STATE_ABORTING) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_ACCEPTING:
		/*
		 * we transition to this state if daplka_cr_accept
		 * is successful. from this state, we can transition
		 * to CONNECTED if daplka_cm_service_conn_est gets called;
		 * or to DISCONNECTED if daplka_cm_service_conn_closed or
		 * daplka_cm_service_event_failure gets called. If the
		 * client calls daplka_ep_disconnect, we transition to
		 * DISCONNECTING.
		 */
		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_CONNECTED:
		/*
		 * we transition to this state if a active or passive
		 * connection gets established. if the client calls
		 * daplka_ep_disconnect, we transition to the
		 * DISCONNECTING state. subsequent CM callbacks will
		 * cause ep_state to be set to DISCONNECTED. If the
		 * remote peer terminates the connection before we do,
		 * it is possible for us to transition directly from
		 * CONNECTED to DISCONNECTED.
		 */
		if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_DISCONNECTING:
		/*
		 * we transition to this state if the client calls
		 * daplka_ep_disconnect.
		 */
		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_ABORTING:
		/*
		 * we transition to this state if the active side
		 * EP timer has expired. this is only a transient
		 * state that is set during timer processing. when
		 * timer processing completes, ep_state will become
		 * DISCONNECTED.
		 */
		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
			valid = B_TRUE;
		}
		break;
	case DAPLKA_EP_STATE_DISCONNECTED:
		/*
		 * we transition to this state if we get a closed
		 * or event_failure CM callback. an expired timer
		 * can also cause us to be in this state. this
		 * is the only state in which we permit the
		 * ep_reinit operation.
		 */
		if (new_state == DAPLKA_EP_STATE_CLOSED) {
			valid = B_TRUE;
		}
		break;
	default:
		break;
	}

	if (!valid) {
		DERR("ep_transition: invalid state change %d -> %d\n",
		    old_state, new_state);
	}
	return (valid);
}

/*
 * first check if the transition is valid. then set ep_state
 * to new_state and wake up all waiters.
 */
static void
daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
	uint32_t new_state)
{
	boolean_t	valid;

	ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);

	valid = daplka_ep_transition_is_valid(old_state, new_state);
	mutex_enter(&ep_rp->ep_lock);
	if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
		if (valid) {
			ep_rp->ep_state = new_state;
		} else {
			/*
			 * this case is impossible.
			 * we have a serious problem if we get here.
			 * instead of panicing, we reset the state to
			 * old_state. doing this would at least prevent
			 * threads from hanging due to ep_state being
			 * stuck in TRANSITIONING.
			 */
			ep_rp->ep_state = old_state;
			ASSERT(B_FALSE);
		}
	}
	cv_broadcast(&ep_rp->ep_cv);
	mutex_exit(&ep_rp->ep_lock);
}

/*
 * modifies RC channel attributes.
 * currently, only the rdma_in and rdma_out attributes may
 * be modified. the channel must be in quiescent state when
 * this function is called.
 */
/* ARGSUSED */
static int
daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t		*ep_rp = NULL;
	ibt_cep_modify_flags_t		good_flags;
	ibt_rc_chan_modify_attr_t	rcm_attr;
	ibt_hca_attr_t			*hca_attrp;
	dapl_ep_modify_t		args;
	ibt_status_t			status;
	uint32_t			old_state, new_state;
	int				retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
	    mode);
	if (retval != 0) {
		DERR("ep_modify: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
	if (ep_rp == NULL) {
		DERR("ep_modify: cannot find ep resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
	new_state = old_state = daplka_ep_get_state(ep_rp);

	if (old_state != DAPLKA_EP_STATE_CLOSED &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
		DERR("ep_modify: invalid state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}

	good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
	if ((args.epm_flags & ~good_flags) != 0) {
		DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
		retval = EINVAL;
		goto cleanup;
	}

	hca_attrp = &ia_rp->ia_hca->hca_attr;

	bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
	if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
		if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
			DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
			    args.epm_rdma_ra_out);
			retval = EINVAL;
			goto cleanup;
		}
		rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
	}
	if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
		if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
			DERR("ep_modify: epm_rdma_ra_in %d\n",
			    args.epm_rdma_ra_in);
			retval = EINVAL;
			goto cleanup;
		}
		rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
	}
	status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
	    &rcm_attr, NULL);
	if (status != IBT_SUCCESS) {
		DERR("ep_modify: modify_rc_channel returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/*
	 * ep_modify does not change ep_state
	 */
cleanup:;
	daplka_ep_set_state(ep_rp, old_state, new_state);
	DAPLKA_RS_UNREF(ep_rp);
	return (retval);
}

/*
 * Frees a EP resource.
 * a EP may only be freed when it is in the CLOSED or
 * DISCONNECTED state.
 */
/* ARGSUSED */
static int
daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t	*ep_rp = NULL;
	dapl_ep_free_t		args;
	uint32_t		old_state, new_state;
	int			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
	if (retval != 0) {
		DERR("ep_free: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
	if (ep_rp == NULL) {
		DERR("ep_free: cannot find ep resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
	new_state = old_state = daplka_ep_get_state(ep_rp);

	/*
	 * ep cannot be freed if it is in an invalid state.
	 */
	if (old_state != DAPLKA_EP_STATE_CLOSED &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
		DERR("ep_free: invalid state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}
	ep_rp = NULL;
	retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
	    args.epf_hkey, (void **)&ep_rp);
	if (retval != 0 || ep_rp == NULL) {
		/*
		 * this is only possible if we have two threads
		 * calling ep_free in parallel.
		 */
		DERR("ep_free: cannot find ep resource\n");
		goto cleanup;
	}
	/* there should not be any outstanding timers */
	ASSERT(ep_rp->ep_timer_hkey == 0);

	new_state = DAPLKA_EP_STATE_FREED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/* remove reference obtained by lookup */
	DAPLKA_RS_UNREF(ep_rp);

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(ep_rp);
	return (0);

cleanup:;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/* remove reference obtained by lookup */
	DAPLKA_RS_UNREF(ep_rp);
	return (retval);
}

/*
 * The following routines supports the timeout feature of ep_connect.
 * Refer to the description of ep_connect for details.
 */

/*
 * this is the timer processing thread.
 */
static void
daplka_timer_thread(void *arg)
{
	daplka_timer_info_t	*timerp = (daplka_timer_info_t *)arg;
	daplka_ep_resource_t	*ep_rp;
	daplka_evd_event_t	*disc_ev = NULL;
	ibt_status_t		status;
	int			old_state, new_state;

	ep_rp = timerp->ti_ep_res;
	ASSERT(ep_rp != NULL);
	ASSERT(timerp->ti_tmo_id != 0);
	timerp->ti_tmo_id = 0;

	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
		/* unblock hash_ep_free */
		mutex_enter(&ep_rp->ep_lock);
		ASSERT(ep_rp->ep_timer_hkey != 0);
		ep_rp->ep_timer_hkey = 0;
		cv_broadcast(&ep_rp->ep_cv);
		mutex_exit(&ep_rp->ep_lock);

		/* reset state to original state */
		daplka_ep_set_state(ep_rp, old_state, new_state);

		/* this function will also unref ep_rp */
		daplka_timer_info_free(timerp);
		return;
	}

	ASSERT(ep_rp->ep_timer_hkey != 0);
	ep_rp->ep_timer_hkey = 0;

	/*
	 * we cannot keep ep_state in TRANSITIONING if we call
	 * ibt_close_rc_channel in blocking mode. this would cause
	 * a deadlock because the cm callbacks will be blocked and
	 * will not be able to wake us up.
	 */
	new_state = DAPLKA_EP_STATE_ABORTING;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * when we return from close_rc_channel, all callbacks should have
	 * completed. we can also be certain that these callbacks did not
	 * enqueue any events to conn_evd.
	 */
	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
	    NULL, 0, NULL, NULL, NULL);
	if (status != IBT_SUCCESS) {
		DERR("timer_thread: ibt_close_rc_channel returned %d\n",
		    status);
	}
	old_state = daplka_ep_get_state(ep_rp);

	/*
	 * this is the only thread that can transition ep_state out
	 * of ABORTING. all other ep operations would fail when
	 * ep_state is in ABORTING.
	 */
	ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);

	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
	ASSERT(disc_ev != NULL);

	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;

	D2("timer_thread: enqueue event(%p) evdp(%p)\n",
	    disc_ev, ep_rp->ep_conn_evd);

	new_state = DAPLKA_EP_STATE_DISCONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);

	/* this function will also unref ep_rp */
	daplka_timer_info_free(timerp);
}

/*
 * dispatches a thread to continue with timer processing.
 */
static void
daplka_timer_dispatch(void *arg)
{
	/*
	 * keep rescheduling this function until
	 * taskq_dispatch succeeds.
	 */
	if (taskq_dispatch(daplka_taskq,
	    daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
		DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
		(void) timeout(daplka_timer_dispatch, arg, 10);
	}
}

/*
 * this function is called by the kernel's callout thread.
 * we first attempt to remove the timer object from the
 * global timer table. if it is found, we dispatch a thread
 * to continue processing the timer object. if it is not
 * found, that means the timer has been cancelled by someone
 * else.
 */
static void
daplka_timer_handler(void *arg)
{
	uint64_t		timer_hkey = (uintptr_t)arg;
	daplka_timer_info_t	*timerp = NULL;

	D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);

	(void) daplka_hash_remove(&daplka_timer_info_htbl,
	    timer_hkey, (void **)&timerp);
	if (timerp == NULL) {
		D2("timer_handler: timer already cancelled\n");
		return;
	}
	daplka_timer_dispatch((void *)timerp);
}

/*
 * allocates a timer_info object.
 * a reference to a EP is held by this object. this ensures
 * that the EP stays valid when a timer is outstanding.
 */
static daplka_timer_info_t *
daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
{
	daplka_timer_info_t	*timerp;

	timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
	if (timerp == NULL) {
		DERR("timer_info_alloc: cannot allocate timer info\n");
		return (NULL);
	}
	timerp->ti_ep_res = ep_rp;
	timerp->ti_tmo_id = 0;

	return (timerp);
}

/*
 * Frees the timer_info object.
 * we release the EP reference before freeing the object.
 */
static void
daplka_timer_info_free(daplka_timer_info_t *timerp)
{
	ASSERT(timerp->ti_ep_res != NULL);
	DAPLKA_RS_UNREF(timerp->ti_ep_res);
	timerp->ti_ep_res = NULL;
	ASSERT(timerp->ti_tmo_id == 0);
	kmem_free(timerp, sizeof (*timerp));
}

/*
 * cancels the timer set by ep_connect.
 * returns -1 if timer handling is in progress
 * and 0 otherwise.
 */
static int
daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
{
	/*
	 * this function can only be called when ep_state
	 * is frozen.
	 */
	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
	if (ep_rp->ep_timer_hkey != 0) {
		daplka_timer_info_t	*timerp = NULL;

		(void) daplka_hash_remove(&daplka_timer_info_htbl,
		    ep_rp->ep_timer_hkey, (void **)&timerp);
		if (timerp == NULL) {
			/*
			 * this is possible if the timer_handler has
			 * removed the timerp but the taskq thread has
			 * not transitioned the ep_state to DISCONNECTED.
			 * we need to reset the ep_state to allow the
			 * taskq thread to continue with its work. the
			 * taskq thread will set the ep_timer_hkey to 0
			 * so we don't have to do it here.
			 */
			DERR("cancel_timer: timer is being processed\n");
			return (-1);
		}
		/*
		 * we got the timer object. if the handler fires at
		 * this point, it will not be able to find the object
		 * and will return immediately. normally, ti_tmo_id gets
		 * cleared when the handler fires.
		 */
		ASSERT(timerp->ti_tmo_id != 0);

		/*
		 * note that untimeout can possibly call the handler.
		 * we are safe because the handler will be a no-op.
		 */
		(void) untimeout(timerp->ti_tmo_id);
		timerp->ti_tmo_id = 0;
		daplka_timer_info_free(timerp);
		ep_rp->ep_timer_hkey = 0;
	}
	return (0);
}

/*
 * this function is called by daplka_hash_destroy for
 * freeing timer_info objects
 */
static void
daplka_hash_timer_free(void *obj)
{
	daplka_timer_info_free((daplka_timer_info_t *)obj);
}

/* ARGSUSED */
static uint16_t
daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
{
	uint8_t *bp;
	int i;
	uint16_t cksum = 0;

	bp = (uint8_t *)dp;
	for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
		cksum += bp[i];
	}
	return (cksum);
}

/*
 * ep_connect is called by the client to initiate a connection to a
 * remote service point. It is a non-blocking call. If a non-zero
 * timeout is specified by the client, a timer will be set just before
 * returning from ep_connect. Upon a successful return from ep_connect,
 * the client will call evd_wait to wait for the connection to complete.
 * If the connection is rejected or has failed due to an error, the
 * client will be notified with an event containing the appropriate error
 * code. If the connection is accepted, the client will be notified with
 * the CONN_ESTABLISHED event. If the timer expires before either of the
 * above events (error or established), a TIMED_OUT event will be delivered
 * to the client.
 *
 * the complicated part of the timer logic is the handling of race
 * conditions with CM callbacks. we need to ensure that either the CM or
 * the timer thread gets to deliver an event, but not both. when the
 * CM callback is about to deliver an event, it always tries to cancel
 * the outstanding timer. if cancel_timer indicates a that the timer is
 * already being processed, the CM callback will simply return without
 * delivering an event. when the timer thread executes, it tries to check
 * if the EP is still in CONNECTING state (timers only work on the active
 * side). if the EP is not in this state, the timer thread will return
 * without delivering an event.
 */
/* ARGSUSED */
static int
daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t	*ep_rp = NULL;
	dapl_ep_connect_t	args;
	daplka_timer_info_t	*timerp = NULL;
	uint32_t		old_state, new_state;
	boolean_t		timer_inserted = B_FALSE;
	uint64_t		timer_hkey = 0;
	ibt_path_info_t		path_info;
	ibt_path_attr_t		path_attr;
	ibt_hca_attr_t		*hca_attrp;
	ibt_chan_open_args_t	chan_args;
	ibt_status_t		status = IBT_SUCCESS;
	uint8_t			num_paths;
	void			*priv_data;
	DAPL_PRIVATE		*dp;
	int			retval = 0;
	ib_gid_t		*sgid;
	ib_gid_t		*dgid;
	uint64_t		dgid_ored;
	ibt_ar_t		ar_query_s;
	ibt_ar_t		ar_result_s;
	ibt_path_flags_t	pathflags;

	D3("ep_connect: enter\n");
	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
	    mode);
	if (retval != 0) {
		DERR("ep_connect: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
	if (ep_rp == NULL) {
		DERR("ep_connect: cannot find ep resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);

	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CLOSED) {
		DERR("ep_connect: invalid state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}
	if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
		DERR("ep_connect: private data len (%d) exceeded "
		    "max size %d\n", args.epc_priv_sz,
		    DAPL_MAX_PRIVATE_DATA_SIZE);
		retval = EINVAL;
		goto cleanup;
	}

	/*
	 * check for remote ipaddress to dgid resolution needs ATS
	 */
	dgid = &args.epc_dgid;
	dgid_ored = dgid->gid_guid | dgid->gid_prefix;
#if defined(DAPLKA_DEBUG_FORCE_ATS)
	dgid_ored = 0ULL;
#endif /* DAPLKA_DEBUG_FORCE_ATS */
	/* check for unidentified dgid */
	if (dgid_ored == 0ULL) {
		/*
		 * setup for ibt_query_ar()
		 */
		sgid = &ia_rp->ia_hca_sgid;
		ar_query_s.ar_gid.gid_guid = 0ULL;
		ar_query_s.ar_gid.gid_prefix = 0ULL;
		ar_query_s.ar_pkey = 0;
		bcopy(args.epc_raddr_sadata.iad_sadata,
		    ar_query_s.ar_data, DAPL_ATS_NBYTES);
#define	UR(b) ar_query_s.ar_data[(b)]
		D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
		    UR(8), UR(9), UR(10), UR(11));
		D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
		    UR(12), UR(13), UR(14), UR(15));
		status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
		if (status != IBT_SUCCESS) {
			DERR("ep_connect: ibt_query_ar returned %d\n", status);
			*rvalp = (int)status;
			retval = 0;
			goto cleanup;
		}
		/*
		 * dgid identified from SA record
		 */
		dgid = &ar_result_s.ar_gid;
		D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
		    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
	}

	bzero(&path_info, sizeof (ibt_path_info_t));
	bzero(&path_attr, sizeof (ibt_path_attr_t));
	bzero(&chan_args, sizeof (ibt_chan_open_args_t));

	path_attr.pa_dgids = dgid;
	path_attr.pa_num_dgids = 1;
	/*
	 * don't set sid in path_attr saves 1 SA query
	 * Also makes server side not to write the service record
	 */
	path_attr.pa_sgid = ia_rp->ia_hca_sgid;
	path_attr.pa_pkey = ia_rp->ia_port_pkey;

	/* save the connection ep  - struct copy */
	ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
	ep_rp->ep_dgid = *dgid;

	num_paths = 0;
	pathflags = IBT_PATH_PKEY;
	/* enable APM on remote port but not on loopback case */
	if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
	    (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
		pathflags |= IBT_PATH_APM;
	}
	status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
	    pathflags, &path_attr, 1, &path_info, &num_paths);

	if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
		DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
		    status, num_paths);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}
	/* fill in the sid directly to path_info */
	path_info.pi_sid = args.epc_sid;
	hca_attrp = &ia_rp->ia_hca->hca_attr;

	/* fill in open channel args */
	chan_args.oc_path = &path_info;
	chan_args.oc_cm_handler = daplka_cm_rc_handler;
	chan_args.oc_cm_clnt_private = (void *)ep_rp;
	chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
	chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
	chan_args.oc_path_retry_cnt = 7;	/* 3-bit field */
	chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;

	ASSERT(args.epc_priv_sz > 0);
	priv_data = (void *)args.epc_priv;

	chan_args.oc_priv_data_len = args.epc_priv_sz;
	chan_args.oc_priv_data = priv_data;

	/*
	 * calculate checksum value of hello message and
	 * put hello message in networking byte order
	 */
	dp = (DAPL_PRIVATE *)priv_data;
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
	dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
	dp->hello_msg.hi_checksum = 0;
	dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))

	if (args.epc_timeout > 0) {
		/*
		 * increment refcnt before passing reference to
		 * timer_info_alloc.
		 */
		DAPLKA_RS_REF(ep_rp);
		timerp = daplka_timer_info_alloc(ep_rp);
		if (timerp == NULL) {
			DERR("ep_connect: cannot allocate timer\n");
			/*
			 * we need to remove the reference if
			 * allocation failed.
			 */
			DAPLKA_RS_UNREF(ep_rp);
			retval = ENOMEM;
			goto cleanup;
		}
		/*
		 * We generate our own hkeys so that timer_hkey can fit
		 * into a pointer and passed as an arg to timeout()
		 */
		timer_hkey = (uint64_t)daplka_timer_hkey_gen();
		retval = daplka_hash_insert(&daplka_timer_info_htbl,
		    &timer_hkey, (void *)timerp);
		if (retval != 0) {
			DERR("ep_connect: cannot insert timer info\n");
			goto cleanup;
		}
		ASSERT(ep_rp->ep_timer_hkey == 0);
		ep_rp->ep_timer_hkey = timer_hkey;
		timer_inserted = B_TRUE;
		D2("ep_connect: timer_hkey = 0x%llx\n",
		    (longlong_t)timer_hkey);
	}
	status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
	    IBT_NONBLOCKING, &chan_args, NULL);

	if (status != IBT_SUCCESS) {
		DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}
	/*
	 * if a cm callback gets called at this point, it'll have to wait until
	 * ep_state becomes connecting (or some other state if another thread
	 * manages to get ahead of the callback). this guarantees that the
	 * callback will not touch the timer until it gets set.
	 */
	if (timerp != NULL) {
		clock_t		tmo;

		tmo = drv_usectohz((clock_t)args.epc_timeout);
		/*
		 * We generate our own 32 bit timer_hkey so that it can fit
		 * into a pointer
		 */
		ASSERT(timer_hkey != 0);
		timerp->ti_tmo_id = timeout(daplka_timer_handler,
		    (void *)(uintptr_t)timer_hkey, tmo);
	}
	new_state = DAPLKA_EP_STATE_CONNECTING;

cleanup:;
	if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
		/*
		 * if ibt_open_rc_channel failed, the timerp must still
		 * be in daplka_timer_info_htbl because neither the cm
		 * callback nor the timer_handler will be called.
		 */
		if (timer_inserted) {
			daplka_timer_info_t	*new_timerp = NULL;

			ASSERT(timer_hkey != 0);
			(void) daplka_hash_remove(&daplka_timer_info_htbl,
			    timer_hkey, (void **)&new_timerp);
			ASSERT(new_timerp == timerp);
			ep_rp->ep_timer_hkey = 0;
		}
		daplka_timer_info_free(timerp);
	}
	daplka_ep_set_state(ep_rp, old_state, new_state);
	DAPLKA_RS_UNREF(ep_rp);
	D3("ep_connect: exit\n");
	return (retval);
}

/*
 * ep_disconnect closes a connection with a remote peer.
 * if a connection has not been established, ep_disconnect
 * will instead flush all recv bufs posted to this channel.
 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
 * entry to ep_disconnect, the EP state will transition to
 * DISCONNECTING upon exit. the CM callbacks triggered by
 * ibt_close_rc_channel will cause EP state to become
 * DISCONNECTED. This function is a no-op if EP state is
 * DISCONNECTED.
 */
/* ARGSUSED */
static int
daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t	*ep_rp = NULL;
	dapl_ep_disconnect_t	args;
	ibt_status_t		status;
	uint32_t		old_state, new_state;
	int			retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
	    mode);
	if (retval != 0) {
		DERR("ep_disconnect: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
	if (ep_rp == NULL) {
		DERR("ep_disconnect: cannot find ep resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);

	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
	    old_state != DAPLKA_EP_STATE_CONNECTING &&
	    old_state != DAPLKA_EP_STATE_ACCEPTING &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTED &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTING &&
	    old_state != DAPLKA_EP_STATE_CLOSED) {
		DERR("ep_disconnect: invalid state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}

	if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
	    (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
		D2("ep_disconnect: ep already disconnected\n");
		retval = 0;
		/* we leave the state as DISCONNECTED */
		goto cleanup;
	}
	if (old_state == DAPLKA_EP_STATE_CONNECTING ||
	    old_state == DAPLKA_EP_STATE_ACCEPTING) {
		D2("ep_disconnect: aborting, old_state = %d\n", old_state);
	}

	/*
	 * according to the udapl spec, ep_disconnect should
	 * flush the channel if the channel is not CONNECTED.
	 */
	if (old_state == DAPLKA_EP_STATE_CLOSED) {
		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
		if (status != IBT_SUCCESS) {
			DERR("ep_disconnect: ibt_flush_channel failed %d\n",
			    status);
			*rvalp = (int)status;
		}
		retval = 0;
		/* we leave the state as CLOSED */
		goto cleanup;
	}

	new_state = DAPLKA_EP_STATE_DISCONNECTING;
	daplka_ep_set_state(ep_rp, old_state, new_state);
	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
	    NULL, 0, NULL, NULL, NULL);

	if (status == IBT_SUCCESS) {
		DAPLKA_RS_UNREF(ep_rp);
		return (retval);
	} else {
		DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
		    status);
		*rvalp = (int)status;
		retval = 0;
		new_state = old_state;
	}

cleanup:;
	daplka_ep_set_state(ep_rp, old_state, new_state);
	DAPLKA_RS_UNREF(ep_rp);
	return (retval);
}

/*
 * this function resets the EP to a usable state (ie. from
 * DISCONNECTED to CLOSED). this function is best implemented using
 * the ibt_recycle_channel interface. until that is available, we will
 * instead clone and tear down the existing channel and replace the
 * existing channel with the cloned one.
 */
/* ARGSUSED */
static int
daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t		*ep_rp = NULL;
	dapl_ep_reinit_t		args;
	ibt_status_t			status;
	uint32_t			old_state, new_state;
	int				retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
	    mode);
	if (retval != 0) {
		DERR("reinit: copyin error %d\n", retval);
		return (EFAULT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
	if (ep_rp == NULL) {
		DERR("reinit: cannot find ep resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
	    (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
		DERR("reinit: invalid state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}

	status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
	    IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
	    ia_rp->ia_port_num, NULL, NULL);
	if (status != IBT_SUCCESS) {
		DERR("reinit: unable to clone channel\n");
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}
	new_state = DAPLKA_EP_STATE_CLOSED;

cleanup:;
	daplka_ep_set_state(ep_rp, old_state, new_state);
	DAPLKA_RS_UNREF(ep_rp);
	return (retval);
}

/*
 * destroys a EP resource.
 * called when refcnt drops to zero.
 */
static int
daplka_ep_destroy(daplka_resource_t *gen_rp)
{
	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)gen_rp;
	ibt_status_t		status;

	ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);

	/*
	 * by the time we get here, we can be sure that
	 * there is no outstanding timer.
	 */
	ASSERT(ep_rp->ep_timer_hkey == 0);

	D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
	    ep_rp, DAPLKA_RS_RNUM(ep_rp));
	/*
	 * free rc channel
	 */
	if (ep_rp->ep_chan_hdl != NULL) {
		mutex_enter(&daplka_dev->daplka_mutex);
		ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
		mutex_exit(&daplka_dev->daplka_mutex);
		status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
		if (status != IBT_SUCCESS) {
			DERR("ep_free: ibt_free_channel returned %d\n",
			    status);
		}
		ep_rp->ep_chan_hdl = NULL;
		D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
	}
	/*
	 * release all references
	 */
	if (ep_rp->ep_snd_evd != NULL) {
		DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
		ep_rp->ep_snd_evd = NULL;
	}
	if (ep_rp->ep_rcv_evd != NULL) {
		DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
		ep_rp->ep_rcv_evd = NULL;
	}
	if (ep_rp->ep_conn_evd != NULL) {
		DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
		ep_rp->ep_conn_evd = NULL;
	}
	if (ep_rp->ep_srq_res != NULL) {
		DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
		ep_rp->ep_srq_res = NULL;
	}
	if (ep_rp->ep_pd_res != NULL) {
		DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
		ep_rp->ep_pd_res = NULL;
	}
	cv_destroy(&ep_rp->ep_cv);
	mutex_destroy(&ep_rp->ep_lock);

	DAPLKA_RS_FINI(ep_rp);
	kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
	D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
	return (0);
}

/*
 * this function is called by daplka_hash_destroy for
 * freeing EP resource objects
 */
static void
daplka_hash_ep_free(void *obj)
{
	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)obj;
	ibt_status_t		status;
	uint32_t		old_state, new_state;
	int			retval;

	old_state = daplka_ep_get_state(ep_rp);
	retval = daplka_cancel_timer(ep_rp);
	new_state = DAPLKA_EP_STATE_FREED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	if (retval != 0) {
		D2("hash_ep_free: ep_rp 0x%p "
		    "timer is still being processed\n", ep_rp);
		mutex_enter(&ep_rp->ep_lock);
		if (ep_rp->ep_timer_hkey != 0) {
			D2("hash_ep_free: ep_rp 0x%p "
			    "waiting for timer_hkey to be 0\n", ep_rp);
			cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
		}
		mutex_exit(&ep_rp->ep_lock);
	}

	/* call ibt_close_rc_channel regardless of what state we are in */
	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
	    NULL, 0, NULL, NULL, NULL);
	if (status != IBT_SUCCESS) {
		if (old_state == DAPLKA_EP_STATE_CONNECTED ||
		    old_state == DAPLKA_EP_STATE_CONNECTING ||
		    old_state == DAPLKA_EP_STATE_ACCEPTING) {
			DERR("hash_ep_free: ep_rp 0x%p state %d "
			    "unexpected error %d from close_rc_channel\n",
			    ep_rp, old_state, status);
		}
		D2("hash_ep_free: close_rc_channel, status %d\n", status);
	}

	DAPLKA_RS_UNREF(ep_rp);
}

/*
 * creates a EVD resource.
 * a EVD is used by the client to wait for events from one
 * or more sources.
 */
/* ARGSUSED */
static int
daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_evd_resource_t		*evd_rp = NULL;
	daplka_async_evd_hkey_t		*async_evd;
	ibt_hca_attr_t			*hca_attrp;
	ibt_cq_attr_t			cq_attr;
	dapl_evd_create_t		args;
	uint64_t			evd_hkey = 0;
	boolean_t			inserted = B_FALSE;
	int				retval = 0;
	ibt_status_t			status;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
	    mode);
	if (retval != 0) {
		DERR("evd_create: copyin error %d", retval);
		return (EFAULT);
	}
	if ((args.evd_flags &
	    ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
		DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
		return (EINVAL);
	}

	evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
	DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
	    DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);

	mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
	evd_rp->evd_hca = ia_rp->ia_hca;
	evd_rp->evd_flags = args.evd_flags;
	evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
	evd_rp->evd_cookie = args.evd_cookie;
	evd_rp->evd_cno_res = NULL;
	evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
	evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
	evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;

	/*
	 * if the client specified a non-zero cno_hkey, we
	 * lookup the cno and save the reference for later use.
	 */
	if (args.evd_cno_hkey > 0) {
		daplka_cno_resource_t *cno_rp;

		cno_rp = (daplka_cno_resource_t *)
		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
		    args.evd_cno_hkey);
		if (cno_rp == NULL) {
			DERR("evd_create: cannot find cno resource\n");
			goto cleanup;
		}
		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
		evd_rp->evd_cno_res = cno_rp;
	}
	hca_attrp = &ia_rp->ia_hca->hca_attr;
	if ((evd_rp->evd_flags &
	    (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
		if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
			DERR("evd_create: invalid cq size %d",
			    args.evd_cq_size);
			retval = EINVAL;
			goto cleanup;
		}
		cq_attr.cq_size = args.evd_cq_size;
		cq_attr.cq_sched = NULL;
		cq_attr.cq_flags = IBT_CQ_USER_MAP;

		status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
		    &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);

		if (status != IBT_SUCCESS) {
			DERR("evd_create: ibt_alloc_cq returned %d", status);
			*rvalp = (int)status;
			retval = 0;
			goto cleanup;
		}

		/*
		 * store evd ptr with cq_hdl
		 * mutex is only needed for race of "destroy" and "async"
		 */
		mutex_enter(&daplka_dev->daplka_mutex);
		ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
		mutex_exit(&daplka_dev->daplka_mutex);

		/* Get HCA-specific data_out info */
		status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
		    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
		    &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));

		if (status != IBT_SUCCESS) {
			DERR("evd_create: ibt_ci_data_out error(%d)", status);
			*rvalp = (int)status;
			retval = 0;
			goto cleanup;
		}

		args.evd_cq_real_size = evd_rp->evd_cq_real_size;

		ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
		    (void *)evd_rp);
	}

	retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
	    &evd_hkey, (void *)evd_rp);
	if (retval != 0) {
		DERR("evd_ceate: cannot insert evd %d\n", retval);
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))

	/*
	 * If this evd handles async events need to add to the IA resource
	 * async evd list
	 */
	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
		async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
		    daplka_km_flags);
		/* add the evd to the head of the list */
		mutex_enter(&ia_rp->ia_lock);
		async_evd->aeh_evd_hkey = evd_hkey;
		async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
		ia_rp->ia_async_evd_hkeys = async_evd;
		mutex_exit(&ia_rp->ia_lock);
	}

	args.evd_hkey = evd_hkey;
	retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
	if (retval != 0) {
		DERR("evd_create: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_evd_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
		    (void **)&free_rp);
		if (free_rp != evd_rp) {
			DERR("evd_create: cannot remove evd\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in evd_free
			 */
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(evd_rp);
	return (retval);
}

/*
 * resizes CQ and returns new mapping info to library.
 */
/* ARGSUSED */
static int
daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_evd_resource_t		*evd_rp = NULL;
	ibt_hca_attr_t			*hca_attrp;
	dapl_cq_resize_t		args;
	ibt_status_t			status;
	int				retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
	    mode);
	if (retval != 0) {
		DERR("cq_resize: copyin error %d\n", retval);
		return (EFAULT);
	}

	/* get evd resource */
	evd_rp = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
	if (evd_rp == NULL) {
		DERR("cq_resize: cannot find evd resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);

	hca_attrp = &ia_rp->ia_hca->hca_attr;
	if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
		DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
		retval = EINVAL;
		goto cleanup;
	}
	/*
	 * If ibt_resize_cq fails that it is primarily due to resource
	 * shortage. Per IB spec resize will never loose events and
	 * a resize error leaves the CQ intact. Therefore even if the
	 * resize request fails we proceed and get the mapping data
	 * from the CQ so that the library can mmap it.
	 */
	status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
	    &args.cqr_cq_real_size);
	if (status != IBT_SUCCESS) {
		/* we return the size of the old CQ if resize fails */
		args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
		ASSERT(status != IBT_CQ_HDL_INVALID);
		DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
	} else {
		mutex_enter(&evd_rp->evd_lock);
		evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
		mutex_exit(&evd_rp->evd_lock);
	}

	D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
	    DAPLKA_RS_RNUM(evd_rp),
	    args.cqr_cq_new_size, args.cqr_cq_real_size);

	/* Get HCA-specific data_out info */
	status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
	    &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
	if (status != IBT_SUCCESS) {
		DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
		/* return ibt_ci_data_out status */
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
	    mode);
	if (retval != 0) {
		DERR("cq_resize: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

cleanup:;
	if (evd_rp != NULL) {
		DAPLKA_RS_UNREF(evd_rp);
	}
	return (retval);
}

/*
 * Routine to copyin the event poll message so that 32 bit libraries
 * can be safely supported
 */
int
daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
{
	int	retval;

#ifdef _MULTI_DATAMODEL
	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
		dapl_event_poll32_t	args32;

		retval = ddi_copyin((void *)inarg, &args32,
		    sizeof (dapl_event_poll32_t), mode);
		if (retval != 0) {
			DERR("event_poll_copyin: 32bit error %d\n", retval);
			return (EFAULT);
		}

		outarg->evp_evd_hkey = args32.evp_evd_hkey;
		outarg->evp_threshold = args32.evp_threshold;
		outarg->evp_timeout = args32.evp_timeout;
		outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
		outarg->evp_num_ev = args32.evp_num_ev;
		outarg->evp_num_polled = args32.evp_num_polled;
		return (0);
	}
#endif
	retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
	    mode);
	if (retval != 0) {
		DERR("event_poll: copyin error %d\n", retval);
		return (EFAULT);
	}

	return (0);
}

/*
 * Routine to copyout the event poll message so that 32 bit libraries
 * can be safely supported
 */
int
daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
{
	int	retval;

#ifdef _MULTI_DATAMODEL
	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
		dapl_event_poll32_t	args32;

		args32.evp_evd_hkey = inarg->evp_evd_hkey;
		args32.evp_threshold = inarg->evp_threshold;
		args32.evp_timeout = inarg->evp_timeout;
		args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
		args32.evp_num_ev = inarg->evp_num_ev;
		args32.evp_num_polled = inarg->evp_num_polled;

		retval = ddi_copyout((void *)&args32, (void *)outarg,
		    sizeof (dapl_event_poll32_t), mode);
		if (retval != 0) {
			DERR("event_poll_copyout: 32bit error %d\n", retval);
			return (EFAULT);
		}
		return (0);
	}
#endif
	retval = ddi_copyout((void *)inarg, (void *)outarg,
	    sizeof (dapl_event_poll_t), mode);
	if (retval != 0) {
		DERR("event_poll_copyout: error %d\n", retval);
		return (EFAULT);
	}

	return (0);
}

/*
 * fucntion to handle CM REQ RCV private data from Solaris or third parties
 */
/* ARGSUSED */
static void
daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
	dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
{
	DAPL_PRIVATE	*dp;
	ib_gid_t	*lgid;
	ibt_ar_t	ar_query_s;
	ibt_ar_t	ar_result_s;
	DAPL_HELLO_MSG	*hip;
	uint32_t	ipaddr_ord;
	ibt_priv_data_len_t clen;
	ibt_priv_data_len_t olen;
	ibt_status_t	status;
	uint16_t	cksum;

	/*
	 * get private data and len
	 */
	dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
	clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
#if defined(DAPLKA_DEBUG_FORCE_ATS)
	/* skip the DAPL_PRIVATE chekcsum check */
#else
	/* for remote connects */
	/* look up hello message in the CM private data area */
	if (clen >= sizeof (DAPL_PRIVATE) &&
	    (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
		cksum = ntohs(dp->hello_msg.hi_checksum);
		dp->hello_msg.hi_checksum = 0;
		if (daplka_hellomsg_cksum(dp) == cksum) {
			D2("daplka_crevent_privdata_post: Solaris msg\n");
			evd_rp->ibe_ce.ibce_priv_data_size = clen;
			dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
			dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
			bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
			kmem_free(dp, clen);
			return;
		}
	}
#endif /* DAPLKA_DEBUG_FORCE_ATS */

	D2("daplka_crevent_privdata_post: 3rd party msg\n");
	/* transpose CM private data into hello message */
	if (clen) {
		olen = clen;
		if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
			clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
		}
		bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
		kmem_free(dp, olen);
	} else {
		bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
		    DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
	}
	evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
	dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
	/*
	 * fill in hello message
	 */
	hip = &dp->hello_msg;
	hip->hi_checksum = DAPL_CHECKSUM;
	hip->hi_clen = clen;
	hip->hi_mid = 0;
	hip->hi_vers = DAPL_HELLO_MSG_VERS;
	hip->hi_port = 0;

	/* assign sgid and dgid */
	lgid = &ia_rp->ia_hca_sgid;
	ar_query_s.ar_gid.gid_prefix =
	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
	ar_query_s.ar_gid.gid_guid =
	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
	ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
	bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);

	/* reverse ip address lookup through ATS */
	status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
	if (status == IBT_SUCCESS) {
		bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
		/* determine the address families */
		ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
		    hip->hi_v4pad[2];
		if (ipaddr_ord == 0) {
			hip->hi_ipv = AF_INET;
		} else {
			hip->hi_ipv = AF_INET6;
		}

#define	UL(b) ar_result_s.ar_data[(b)]
		D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
		    hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
		D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
		    UL(12), UL(13), UL(14), UL(15));
	} else {
		/* non-conformed third parties */
		hip->hi_ipv = AF_UNSPEC;
		bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
	}
}

/*
 * this function is called by evd_wait and evd_dequeue to wait for
 * connection events and CQ notifications. typically this function
 * is called when the userland CQ is empty and the client has
 * specified a non-zero timeout to evd_wait. if the client is
 * interested in CQ events, the CQ must be armed in userland prior
 * to calling this function.
 */
/* ARGSUSED */
static int
daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_evd_resource_t	*evd_rp = NULL;
	dapl_event_poll_t	args;
	daplka_evd_event_t	*head;
	dapl_ib_event_t		evp_arr[NUM_EVENTS_PER_POLL];
	dapl_ib_event_t		*evp;
	dapl_ib_event_t		*evp_start;
	size_t			evp_size;
	int			threshold;
	clock_t			timeout;
	uint32_t		max_events;
	uint32_t		num_events = 0;
	void			*pd;
	ibt_priv_data_len_t	n;
	int			retval = 0;
	int			rc;

	retval = daplka_event_poll_copyin(arg, &args, mode);
	if (retval != 0) {
		return (EFAULT);
	}

	if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
		DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
		    args.evp_num_ev);
		return (EINVAL);
	}
	/*
	 * Note: dequeue requests have a threshold = 0, timeout = 0
	 */
	threshold = args.evp_threshold;

	max_events = args.evp_num_ev;
	/* ensure library is passing sensible values */
	if (max_events < threshold) {
		DERR("event_poll: max_events(%d) < threshold(%d)\n",
		    max_events, threshold);
		return (EINVAL);
	}
	/* Do a sanity check to avoid excessive memory allocation */
	if (max_events > DAPL_EVD_MAX_EVENTS) {
		DERR("event_poll: max_events(%d) > %d",
		    max_events, DAPL_EVD_MAX_EVENTS);
		return (EINVAL);
	}
	D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
	    threshold, (longlong_t)args.evp_timeout, max_events);

	/* get evd resource */
	evd_rp = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
	if (evd_rp == NULL) {
		DERR("event_poll: cannot find evd resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);

	/*
	 * Use event array on the stack if possible
	 */
	if (max_events <= NUM_EVENTS_PER_POLL) {
		evp_start = evp = &evp_arr[0];
	} else {
		evp_size = max_events * sizeof (dapl_ib_event_t);
		evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
		if (evp == NULL) {
			DERR("event_poll: kmem_zalloc failed, evp_size %d",
			    evp_size);
			retval = ENOMEM;
			goto cleanup;
		}
	}

	/*
	 * The Event poll algorithm is as follows -
	 * The library passes a buffer big enough to hold "max_events"
	 * events. max_events is >= threshold. If at any stage we get
	 * max_events no. of events we bail. The events are polled in
	 * the following order -
	 * 1) Check for CR events in the evd_cr_events list
	 * 2) Check for Connection events in the evd_connection_events list
	 *
	 * If after the above 2 steps we don't have enough(>= threshold) events
	 * we block for CQ notification and sleep. Upon being woken up we start
	 * at step 1 again.
	 */

	/*
	 * Note: this could be 0 or INFINITE or anyother value in microsec
	 */
	if (args.evp_timeout > 0) {
		if (args.evp_timeout >= LONG_MAX) {
			timeout = LONG_MAX;
		} else {
			clock_t	curr_time = ddi_get_lbolt();

			timeout = curr_time +
			    drv_usectohz((clock_t)args.evp_timeout);
			/*
			 * use the max value if we wrapped around
			 */
			if (timeout <= curr_time) {
				timeout = LONG_MAX;
			}
		}
	} else {
		timeout = 0;
	}

	mutex_enter(&evd_rp->evd_lock);
	for (;;) {
		/*
		 * If this evd is waiting for CM events check that now.
		 */
		if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
		    (evd_rp->evd_cr_events.eel_num_elements > 0)) {
			/* dequeue events from evd_cr_events list */
			while (head = daplka_evd_event_dequeue(
			    &evd_rp->evd_cr_events)) {
				/*
				 * populate the evp array
				 */
				evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
				evp[num_events].ibe_ce.ibce_event =
				    head->ee_cmev.ec_cm_ev_type;
				evp[num_events].ibe_ce.ibce_cookie =
				    (uint64_t)head->ee_cmev.ec_cm_cookie;
				evp[num_events].ibe_ce.ibce_psep_cookie =
				    head->ee_cmev.ec_cm_psep_cookie;
				daplka_crevent_privdata_post(ia_rp,
				    &evp[num_events], head);
				kmem_free(head, sizeof (daplka_evd_event_t));

				if (++num_events == max_events) {
					mutex_exit(&evd_rp->evd_lock);
					goto maxevent_reached;
				}
			}
		}

		if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
		    (evd_rp->evd_conn_events.eel_num_elements > 0)) {
			/* dequeue events from evd_connection_events list */
			while ((head = daplka_evd_event_dequeue
			    (&evd_rp->evd_conn_events))) {
				/*
				 * populate the evp array -
				 *
				 */
				if (head->ee_cmev.ec_cm_is_passive) {
					evp[num_events].ibe_ev_family =
					    DAPL_PASSIVE_CONNECTION_EVENTS;
				} else {
					evp[num_events].ibe_ev_family =
					    DAPL_ACTIVE_CONNECTION_EVENTS;
				}
				evp[num_events].ibe_ce.ibce_event =
				    head->ee_cmev.ec_cm_ev_type;
				evp[num_events].ibe_ce.ibce_cookie =
				    (uint64_t)head->ee_cmev.ec_cm_cookie;
				evp[num_events].ibe_ce.ibce_psep_cookie =
				    head->ee_cmev.ec_cm_psep_cookie;

				if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
					pd = head->ee_cmev.ec_cm_ev_priv_data;
					n = head->
					    ee_cmev.ec_cm_ev_priv_data_len;
					bcopy(pd, (void *)evp[num_events].
					    ibe_ce.ibce_priv_data_ptr, n);
					evp[num_events].ibe_ce.
					    ibce_priv_data_size = n;
					kmem_free(pd, n);
				}

				kmem_free(head, sizeof (daplka_evd_event_t));

				if (++num_events == max_events) {
					mutex_exit(&evd_rp->evd_lock);
					goto maxevent_reached;
				}
			}
		}

		if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
		    (evd_rp->evd_async_events.eel_num_elements > 0)) {
			/* dequeue events from evd_async_events list */
			while (head = daplka_evd_event_dequeue(
			    &evd_rp->evd_async_events)) {
				/*
				 * populate the evp array
				 */
				evp[num_events].ibe_ev_family =
				    DAPL_ASYNC_EVENTS;
				evp[num_events].ibe_async.ibae_type =
				    head->ee_aev.ibae_type;
				evp[num_events].ibe_async.ibae_hca_guid =
				    head->ee_aev.ibae_hca_guid;
				evp[num_events].ibe_async.ibae_cookie =
				    head->ee_aev.ibae_cookie;
				evp[num_events].ibe_async.ibae_port =
				    head->ee_aev.ibae_port;

				kmem_free(head, sizeof (daplka_evd_event_t));

				if (++num_events == max_events) {
					break;
				}
			}
		}

		/*
		 * We have sufficient events for this call so no need to wait
		 */
		if ((threshold > 0) && (num_events >= threshold)) {
			mutex_exit(&evd_rp->evd_lock);
			break;
		}

		evd_rp->evd_waiters++;
		/*
		 * There are no new events and a timeout was specified.
		 * Note: for CQ events threshold is 0 but timeout is
		 * not necessarily 0.
		 */
		while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
		    timeout) {
			retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
			    &evd_rp->evd_lock, timeout);
			if (retval == 0) {
				retval = EINTR;
				break;
			} else if (retval == -1) {
				retval = ETIME;
				break;
			} else {
				retval = 0;
				continue;
			}
		}
		evd_rp->evd_waiters--;
		if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
			/*
			 * If we got woken up by the CQ handler due to events
			 * in the CQ. Need to go to userland to check for
			 * CQ events. Or if we were woken up due to S/W events
			 */

			/* check for userland events only */
			if (!(evd_rp->evd_newevents &
			    ~DAPLKA_EVD_ULAND_EVENTS)) {
				evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
				mutex_exit(&evd_rp->evd_lock);
				break;
			}
			/*
			 * Clear newevents since we are going to loopback
			 * back and check for both CM and CQ events
			 */
			evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
		} else { /* error */
			mutex_exit(&evd_rp->evd_lock);
			break;
		}
	}

maxevent_reached:
	args.evp_num_polled = num_events;

	/*
	 * At this point retval might have a value that we want to return
	 * back to the user. So the copyouts shouldn't tamper retval.
	 */
	if (args.evp_num_polled > 0) { /* copyout the events */
		rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
		    sizeof (dapl_ib_event_t), mode);
		if (rc != 0) { /* XXX: we are losing events here */
			DERR("event_poll: event array copyout error %d", rc);
			retval = EFAULT;
			goto cleanup;
		}
		rc = daplka_event_poll_copyout(&args, arg, mode);
		if (rc != 0) {  /* XXX: we are losing events here */
			DERR("event_poll: copyout error %d\n", rc);
			retval = EFAULT;
			goto cleanup;
		}
	}

cleanup:;
	if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
		kmem_free(evp_start, evp_size);
	}

	if (evd_rp != NULL) {
		DAPLKA_RS_UNREF(evd_rp);
	}
	return (retval);
}

/* ARGSUSED */
static int
daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_event_wakeup_t	args;
	daplka_evd_resource_t	*evd_rp;
	int			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
	    mode);
	if (retval != 0) {
		DERR("event_wakeup: copyin error %d\n", retval);
		return (EFAULT);
	}

	/* get evd resource */
	evd_rp = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
	if (evd_rp == NULL) {
		DERR("event_wakeup: cannot find evd resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);

	daplka_evd_wakeup(evd_rp, NULL, NULL);

	DAPLKA_RS_UNREF(evd_rp);

	return (retval);
}

/* ARGSUSED */
static int
daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_evd_modify_cno_t	args;
	daplka_evd_resource_t	*evd_rp;
	daplka_cno_resource_t	*cno_rp;
	daplka_cno_resource_t	*old_cno_rp;
	int			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
	    mode);
	if (retval != 0) {
		DERR("evd_modify_cno: copyin error %d\n", retval);
		return (EFAULT);
	}

	/* get evd resource */
	evd_rp = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
	if (evd_rp == NULL) {
		DERR("evd_modify_cno: cannot find evd resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);

	if (args.evmc_cno_hkey > 0) {
		/* get cno resource corresponding to the new CNO */
		cno_rp = (daplka_cno_resource_t *)
		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
		    args.evmc_cno_hkey);
		if (cno_rp == NULL) {
			DERR("evd_modify_cno: cannot find CNO resource\n");
			retval = EINVAL;
			goto cleanup;
		}
		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
	} else {
		cno_rp = NULL;
	}

	mutex_enter(&evd_rp->evd_lock);
	old_cno_rp = evd_rp->evd_cno_res;
	evd_rp->evd_cno_res = cno_rp;
	mutex_exit(&evd_rp->evd_lock);

	/*
	 * drop the refcnt on the old CNO, the refcnt on the new CNO is
	 * retained since the evd holds a reference to it.
	 */
	if (old_cno_rp) {
		DAPLKA_RS_UNREF(old_cno_rp);
	}

cleanup:
	if (evd_rp) {
		DAPLKA_RS_UNREF(evd_rp);
	}

	return (retval);
}

/*
 * Frees the EVD and associated resources.
 * If there are other threads still using this EVD, the destruction
 * will defer until the EVD's refcnt drops to zero.
 */
/* ARGSUSED */
static int
daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_evd_resource_t	*evd_rp = NULL;
	daplka_async_evd_hkey_t	*curr;
	daplka_async_evd_hkey_t	*prev;
	dapl_evd_free_t		args;
	int			retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
	if (retval != 0) {
		DERR("evd_free: copyin error %d\n", retval);
		return (EFAULT);
	}
	retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
	    (void **)&evd_rp);
	if (retval != 0 || evd_rp == NULL) {
		DERR("evd_free: cannot find evd resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);

	/* If this is an async evd remove it from the IA's async evd list */
	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
		mutex_enter(&ia_rp->ia_lock);
		curr = prev = ia_rp->ia_async_evd_hkeys;
		while (curr != NULL) {
			if (curr->aeh_evd_hkey == args.evf_hkey) {
				/* unlink curr from the list */
				if (curr == prev) {
					/*
					 * if first element in the list update
					 * the list head
					 */
					ia_rp->ia_async_evd_hkeys =
					    curr->aeh_next;
				} else {
					prev->aeh_next = curr->aeh_next;
				}
				break;
			}
			prev = curr;
			curr = curr->aeh_next;
		}
		mutex_exit(&ia_rp->ia_lock);
		/* free the curr entry */
		kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
	}

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(evd_rp);
	return (0);
}

/*
 * destroys EVD resource.
 * called when refcnt drops to zero.
 */
static int
daplka_evd_destroy(daplka_resource_t *gen_rp)
{
	daplka_evd_resource_t	*evd_rp = (daplka_evd_resource_t *)gen_rp;
	ibt_status_t		status;
	daplka_evd_event_t	*evt;
	ibt_priv_data_len_t	len;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
	D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
	    evd_rp, DAPLKA_RS_RNUM(evd_rp));
	/*
	 * free CQ
	 */
	if (evd_rp->evd_cq_hdl) {
		ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
		mutex_enter(&daplka_dev->daplka_mutex);
		ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
		mutex_exit(&daplka_dev->daplka_mutex);

		status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
		if (status != IBT_SUCCESS) {
			DERR("evd_destroy: ibt_free_cq returned %d\n", status);
		}
		evd_rp->evd_cq_hdl = NULL;
		D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
	}

	/*
	 * release reference on CNO
	 */
	if (evd_rp->evd_cno_res != NULL) {
		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
		if (evd_rp->evd_cno_res->cno_evd_cookie ==
		    evd_rp->evd_cookie) {
			evd_rp->evd_cno_res->cno_evd_cookie = 0;
		}
		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
		DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
		evd_rp->evd_cno_res = NULL;
	}

	/*
	 * discard all remaining events
	 */
	mutex_enter(&evd_rp->evd_lock);
	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
		D2("evd_destroy: discarding CR event: %d\n",
		    evt->ee_cmev.ec_cm_ev_type);
		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
		if (len > 0) {
			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
		}
		kmem_free(evt, sizeof (*evt));
	}
	ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);

	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
		D2("evd_destroy: discarding CONN event: %d\n",
		    evt->ee_cmev.ec_cm_ev_type);
		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
		if (len > 0) {
			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
		}
		kmem_free(evt, sizeof (*evt));
	}
	ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);

	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
		DERR("evd_destroy: discarding ASYNC event: %d\n",
		    evt->ee_aev.ibae_type);
		kmem_free(evt, sizeof (*evt));
	}
	ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
	mutex_exit(&evd_rp->evd_lock);

	mutex_destroy(&evd_rp->evd_lock);
	DAPLKA_RS_FINI(evd_rp);
	kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
	D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
	return (0);
}

static void
daplka_hash_evd_free(void *obj)
{
	daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
	DAPLKA_RS_UNREF(evd_rp);
}

/*
 * this handler fires when new completions arrive.
 */
/* ARGSUSED */
static void
daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
{
	D3("cq_handler: fired setting evd_newevents\n");
	daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
}

/*
 * this routine wakes up a client from evd_wait. if evtq and evt
 * are non-null, the event evt will be enqueued prior to waking
 * up the client. if the evd is associated with a CNO and if there
 * are no waiters on the evd, the CNO will be notified.
 */
static void
daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
	daplka_evd_event_t *evt)
{
	uint32_t waiters = 0;

	mutex_enter(&evd_rp->evd_lock);
	if (evtq != NULL && evt != NULL) {
		ASSERT(evtq == &evd_rp->evd_cr_events ||
		    evtq == &evd_rp->evd_conn_events ||
		    evtq == &evd_rp->evd_async_events);
		daplka_evd_event_enqueue(evtq, evt);
		ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
		    (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
		evd_rp->evd_newevents |= evtq->eel_event_type;
	} else {
		evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
	}
	waiters = evd_rp->evd_waiters;
	cv_broadcast(&evd_rp->evd_cv);
	mutex_exit(&evd_rp->evd_lock);

	/*
	 * only wakeup the CNO if there are no waiters on this evd.
	 */
	if (evd_rp->evd_cno_res != NULL && waiters == 0) {
		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
		evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
		cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
	}
}

/*
 * daplka_evd_event_enqueue adds elem to the end of the event list
 * The caller is expected to acquire appropriate locks before
 * calling enqueue
 */
static void
daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
    daplka_evd_event_t *elem)
{
	if (evlist->eel_tail) {
		evlist->eel_tail->ee_next = elem;
		evlist->eel_tail = elem;
	} else {
		/* list is empty */
		ASSERT(evlist->eel_head == NULL);
		evlist->eel_head = elem;
		evlist->eel_tail = elem;
	}
	evlist->eel_num_elements++;
}

/*
 * daplka_evd_event_dequeue removes and returns the first element of event
 * list. NULL is returned if the list is empty. The caller is expected to
 * acquire appropriate locks before calling enqueue.
 */
static daplka_evd_event_t *
daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
{
	daplka_evd_event_t *head;

	head = evlist->eel_head;
	if (head == NULL) {
		return (NULL);
	}

	evlist->eel_head = head->ee_next;
	evlist->eel_num_elements--;
	/* if it was the last element update the tail pointer too */
	if (evlist->eel_head == NULL) {
		ASSERT(evlist->eel_num_elements == 0);
		evlist->eel_tail = NULL;
	}
	return (head);
}

/*
 * A CNO allows the client to wait for notifications from multiple EVDs.
 * To use a CNO, the client needs to follow the procedure below:
 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
 * 2. create one or more EVDs using the returned cno_hkey.
 * 3. call cno_wait. when one of the associated EVDs get notified, the
 *    CNO will also get notified. cno_wait will then return with a
 *    evd_cookie identifying the EVD that triggered the event.
 *
 * A note about cno_wait:
 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
 *  example, suppose multiple EVDs triggered a CNO before the client calls
 *  cno_wait; when the client calls cno_wait, it will return with the
 *  evd_cookie that identifies the *last* EVD that triggered the CNO. It
 *  is the responsibility of the client, upon returning from cno_wait, to
 *  check on all EVDs that can potentially trigger the CNO. the returned
 *  evd_cookie is only meant to be a hint. there is no guarantee that the
 *  EVD identified by the evd_cookie still contains an event or still
 *  exists by the time cno_wait returns.
 */

/*
 * allocates a CNO.
 * the returned cno_hkey may subsequently be used in evd_create.
 */
/* ARGSUSED */
static int
daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_cno_alloc_t	args;
	daplka_cno_resource_t	*cno_rp = NULL;
	uint64_t		cno_hkey = 0;
	boolean_t		inserted = B_FALSE;
	int			retval = 0;

	cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
	if (cno_rp == NULL) {
		DERR("cno_alloc: cannot allocate cno resource\n");
		return (ENOMEM);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
	DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
	    DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);

	mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
	cno_rp->cno_evd_cookie = 0;

	/* insert into cno hash table */
	retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
	    &cno_hkey, (void *)cno_rp);
	if (retval != 0) {
		DERR("cno_alloc: cannot insert cno resource\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))

	/* return hkey to library */
	args.cno_hkey = cno_hkey;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
	    mode);
	if (retval != 0) {
		DERR("cno_alloc: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_cno_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
		    (void **)&free_rp);
		if (free_rp != cno_rp) {
			DERR("cno_alloc: cannot remove cno\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in cno_free
			 */
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(cno_rp);
	return (retval);
}

/*
 * destroys a CNO.
 * this gets called when a CNO resource's refcnt drops to zero.
 */
static int
daplka_cno_destroy(daplka_resource_t *gen_rp)
{
	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;

	ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
	D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
	    cno_rp, DAPLKA_RS_RNUM(cno_rp));

	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
	cv_destroy(&cno_rp->cno_cv);
	mutex_destroy(&cno_rp->cno_lock);

	DAPLKA_RS_FINI(cno_rp);
	kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
	D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
	return (0);
}

static void
daplka_hash_cno_free(void *obj)
{
	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
	DAPLKA_RS_UNREF(cno_rp);
}

/*
 * removes the CNO from the cno hash table and frees the CNO
 * if there are no references to it. if there are references to
 * it, the CNO will be destroyed when the last of the references
 * is released. once the CNO is removed from the cno hash table,
 * the client will no longer be able to call cno_wait on the CNO.
 */
/* ARGSUSED */
static int
daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_cno_resource_t	*cno_rp = NULL;
	dapl_cno_free_t		args;
	int			retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
	if (retval != 0) {
		DERR("cno_free: copyin error %d\n", retval);
		return (EINVAL);
	}

	retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
	    args.cnf_hkey, (void **)&cno_rp);
	if (retval != 0 || cno_rp == NULL) {
		DERR("cno_free: cannot find cno resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(cno_rp);
	return (0);
}

/*
 * wait for a notification from one of the associated EVDs.
 */
/* ARGSUSED */
static int
daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_cno_resource_t	*cno_rp = NULL;
	dapl_cno_wait_t		args;
	int			retval = 0;
	uint64_t		evd_cookie = 0;
	clock_t			timeout, curr_time;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
	if (retval != 0) {
		DERR("cno_wait: copyin error %d\n", retval);
		return (EINVAL);
	}
	/* get cno resource */
	cno_rp = (daplka_cno_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
	if (cno_rp == NULL) {
		DERR("cno_wait: cannot find cno resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);

	curr_time = ddi_get_lbolt();
	timeout = curr_time + drv_usectohz(args.cnw_timeout);

	/*
	 * use the max value if we wrapped around
	 */
	if (args.cnw_timeout > 0 && timeout <= curr_time) {
		/*
		 * clock_t (size long) changes between 32 and 64-bit kernels
		 */
		timeout = LONG_MAX >> 4;
	}
	mutex_enter(&cno_rp->cno_lock);
	while (cno_rp->cno_evd_cookie == 0) {
		int rval = 0;

		rval = cv_timedwait_sig(&cno_rp->cno_cv,
		    &cno_rp->cno_lock, timeout);
		if (rval == 0) {
			DERR("cno_wait: interrupted\n");
			mutex_exit(&cno_rp->cno_lock);
			retval = EINTR;
			goto cleanup;
		} else if (rval == -1) {
			DERR("cno_wait: timed out\n");
			mutex_exit(&cno_rp->cno_lock);
			retval = ETIME;
			goto cleanup;
		}
	}
	evd_cookie = cno_rp->cno_evd_cookie;
	cno_rp->cno_evd_cookie = 0;
	mutex_exit(&cno_rp->cno_lock);

	ASSERT(evd_cookie != 0);
	D2("cno_wait: returning evd_cookie 0x%p\n",
	    (void *)(uintptr_t)evd_cookie);
	args.cnw_evd_cookie = evd_cookie;
	retval = ddi_copyout((void *)&args, (void *)arg,
	    sizeof (dapl_cno_wait_t), mode);
	if (retval != 0) {
		DERR("cno_wait: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

cleanup:;
	if (cno_rp != NULL) {
		DAPLKA_RS_UNREF(cno_rp);
	}
	return (retval);
}

/*
 * this function is called by the client when it decides to
 * accept a connection request. a connection request is generated
 * when the active side generates REQ MAD to a service point on
 * the destination node. this causes the CM service handler
 * (daplka_cm_service_req) on the passive side to be callee. This
 * handler will then enqueue this connection request to the backlog
 * array of the service point. A connection event containing the
 * backlog array index and connection request private data is passed
 * to the client's service point EVD (sp_evd_res). once the event
 * is passed up to the userland, the client may examine the request
 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
 */
/* ARGSUSED */
static int
daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ep_resource_t		*ep_rp = NULL;
	daplka_sp_resource_t		*sp_rp = NULL;
	dapl_cr_accept_t		args;
	daplka_sp_conn_pend_t		*conn;
	ibt_cm_proceed_reply_t		proc_reply;
	ibt_status_t			status;
	uint16_t			bkl_index;
	uint32_t			old_state, new_state;
	int				retval = 0;
	void				*priv_data = NULL, *sid;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
	    mode);
	if (retval != 0) {
		DERR("cr_accept: copyin error %d\n", retval);
		return (EFAULT);
	}
	if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
		DERR("cr_accept: private data len (%d) exceeded "
		    "max size %d\n", args.cra_priv_sz,
		    DAPL_MAX_PRIVATE_DATA_SIZE);
		return (EINVAL);
	}
	priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;

	D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
	    args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);

	/* get sp resource */
	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
	    args.cra_sp_hkey);
	if (sp_rp == NULL) {
		DERR("cr_accept: cannot find sp resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);

	/* get ep resource */
	ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
	    args.cra_ep_hkey);
	if (ep_rp == NULL) {
		DERR("cr_accept: cannot find ep resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);

	/*
	 * accept is only allowed if ep_state is CLOSED.
	 * note that after this point, the ep_state is frozen
	 * (i.e. TRANSITIONING) until we transition ep_state
	 * to ACCEPTING or back to CLOSED if we get an error.
	 */
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CLOSED) {
		DERR("cr_accept: invalid ep state %d\n", old_state);
		retval = EINVAL;
		goto cleanup;
	}

	mutex_enter(&sp_rp->sp_lock);
	bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
	/*
	 * make sure the backlog index is not bogus.
	 */
	if (bkl_index >= sp_rp->sp_backlog_size) {
		DERR("cr_accept: invalid backlog index 0x%llx %d\n",
		    (longlong_t)args.cra_bkl_cookie, bkl_index);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	/*
	 * make sure the backlog index indeed refers
	 * to a pending connection.
	 */
	conn = &sp_rp->sp_backlog[bkl_index];
	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
		DERR("cr_accept: invalid conn state %d\n",
		    conn->spcp_state);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	if (conn->spcp_sid == NULL) {
		DERR("cr_accept: sid == NULL\n");
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	if (ep_rp->ep_chan_hdl == NULL) {
		/*
		 * a ep_rp with a NULL chan_hdl is impossible.
		 */
		DERR("cr_accept: ep_chan_hdl == NULL\n");
		mutex_exit(&sp_rp->sp_lock);
		ASSERT(B_FALSE);
		retval = EINVAL;
		goto cleanup;
	}
	proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
	proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
	proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
	proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
	sid = conn->spcp_sid;

	/*
	 * this clears our slot in the backlog array.
	 * this slot may now be used by other pending connections.
	 */
	conn->spcp_sid = NULL;
	conn->spcp_state = DAPLKA_SPCP_INIT;
	conn->spcp_req_len = 0;
	mutex_exit(&sp_rp->sp_lock);

	/*
	 * Set the unique cookie corresponding to the CR to this EP
	 * so that is can be used in passive side CM callbacks
	 */
	ep_rp->ep_psep_cookie = args.cra_bkl_cookie;

	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
	    &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);

	if (status != IBT_SUCCESS) {
		DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
	}
	/*
	 * note that the CM handler may actually be called at this
	 * point. but since ep_state is still in TRANSITIONING, the
	 * handler will wait until we transition to ACCEPTING. this
	 * prevents the case where we set ep_state to ACCEPTING after
	 * daplka_service_conn_est sets ep_state to CONNECTED.
	 */
	new_state = DAPLKA_EP_STATE_ACCEPTING;

cleanup:;
	if (sp_rp != NULL) {
		DAPLKA_RS_UNREF(sp_rp);
	}
	if (ep_rp != NULL) {
		daplka_ep_set_state(ep_rp, old_state, new_state);
		DAPLKA_RS_UNREF(ep_rp);
	}
	return (retval);
}

/*
 * this function is called by the client to reject a
 * connection request.
 */
/* ARGSUSED */
static int
daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_cr_reject_t	args;
	daplka_sp_resource_t	*sp_rp = NULL;
	daplka_sp_conn_pend_t	*conn;
	ibt_cm_proceed_reply_t	proc_reply;
	ibt_cm_status_t		proc_status;
	ibt_status_t		status;
	uint16_t		bkl_index;
	int			retval = 0;
	void			*sid;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
	    mode);
	if (retval != 0) {
		DERR("cr_reject: copyin error %d\n", retval);
		return (EFAULT);
	}
	/* get sp resource */
	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
	    args.crr_sp_hkey);
	if (sp_rp == NULL) {
		DERR("cr_reject: cannot find sp resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);

	D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);

	mutex_enter(&sp_rp->sp_lock);
	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
	/*
	 * make sure the backlog index is not bogus.
	 */
	if (bkl_index >= sp_rp->sp_backlog_size) {
		DERR("cr_reject: invalid backlog index 0x%llx %d\n",
		    (longlong_t)args.crr_bkl_cookie, bkl_index);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	/*
	 * make sure the backlog index indeed refers
	 * to a pending connection.
	 */
	conn = &sp_rp->sp_backlog[bkl_index];
	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
		DERR("cr_reject: invalid conn state %d\n",
		    conn->spcp_state);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	if (conn->spcp_sid == NULL) {
		DERR("cr_reject: sid == NULL\n");
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	bzero(&proc_reply, sizeof (proc_reply));
	sid = conn->spcp_sid;

	/*
	 * this clears our slot in the backlog array.
	 * this slot may now be used by other pending connections.
	 */
	conn->spcp_sid = NULL;
	conn->spcp_state = DAPLKA_SPCP_INIT;
	conn->spcp_req_len = 0;

	switch (args.crr_reason) {
	case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
		/* results in IBT_CM_CONSUMER as the reason for reject */
		proc_status = IBT_CM_REJECT;
		break;
	case DAPL_IB_CME_LOCAL_FAILURE:
		/*FALLTHRU*/
	case DAPL_IB_CME_DESTINATION_UNREACHABLE:
		/* results in IBT_CM_NO_RESC as the reason for reject */
		proc_status = IBT_CM_NO_RESOURCE;
		break;
	default:
		/* unexpect reason code */
		ASSERT(!"unexpected reject reason code");
		proc_status = IBT_CM_NO_RESOURCE;
		break;
	}

	mutex_exit(&sp_rp->sp_lock);

	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
	    &proc_reply, NULL, 0);

	if (status != IBT_SUCCESS) {
		DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
	}

cleanup:;
	if (sp_rp != NULL) {
		DAPLKA_RS_UNREF(sp_rp);
	}
	return (retval);
}


/*
 * daplka_sp_match is used by daplka_hash_walk for finding SPs
 */
typedef struct daplka_sp_match_s {
	uint64_t		spm_conn_qual;
	daplka_sp_resource_t	*spm_sp_rp;
} daplka_sp_match_t;
_NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))

static int
daplka_sp_match(void *objp, void *arg)
{
	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)objp;

	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
	if (sp_rp->sp_conn_qual ==
	    ((daplka_sp_match_t *)arg)->spm_conn_qual) {
		((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
		D2("daplka_sp_match: found sp, conn_qual %016llu\n",
		    (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
		DAPLKA_RS_REF(sp_rp);
		return (1);
	}
	return (0);
}

/*
 * cr_handoff allows the client to handoff a connection request from
 * one service point to another.
 */
/* ARGSUSED */
static int
daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_cr_handoff_t		args;
	daplka_sp_resource_t		*sp_rp = NULL, *new_sp_rp = NULL;
	daplka_sp_conn_pend_t		*conn;
	daplka_sp_match_t		sp_match;
	ibt_cm_event_t			fake_event;
	ibt_cm_status_t			cm_status;
	ibt_status_t			status;
	uint16_t			bkl_index;
	void				*sid, *priv = NULL;
	int				retval = 0, priv_len = 0;

	D3("cr_handoff: entering\n");
	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
	    mode);
	if (retval != 0) {
		DERR("cr_handoff: copyin error %d\n", retval);
		return (EFAULT);
	}
	/* get sp resource */
	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
	    args.crh_sp_hkey);
	if (sp_rp == NULL) {
		DERR("cr_handoff: cannot find sp resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);

	/*
	 * find the destination service point.
	 */
	sp_match.spm_conn_qual = args.crh_conn_qual;
	sp_match.spm_sp_rp = NULL;
	daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
	    (void *)&sp_match, RW_READER);

	/*
	 * return if we cannot find the service point
	 */
	if (sp_match.spm_sp_rp == NULL) {
		DERR("cr_handoff: new sp not found, conn qual = %llu\n",
		    (longlong_t)args.crh_conn_qual);
		retval = EINVAL;
		goto cleanup;
	}
	new_sp_rp = sp_match.spm_sp_rp;

	/*
	 * the spec does not discuss the security implications of this
	 * function. to be safe, we currently only allow processes
	 * owned by the same user to handoff connection requests
	 * to each other.
	 */
	if (crgetruid(cred) != new_sp_rp->sp_ruid) {
		DERR("cr_handoff: permission denied\n");
		retval = EPERM;
		goto cleanup;
	}

	D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);

	mutex_enter(&sp_rp->sp_lock);
	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
	/*
	 * make sure the backlog index is not bogus.
	 */
	if (bkl_index >= sp_rp->sp_backlog_size) {
		DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
		    (longlong_t)args.crh_bkl_cookie, bkl_index);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	/*
	 * make sure the backlog index indeed refers
	 * to a pending connection.
	 */
	conn = &sp_rp->sp_backlog[bkl_index];
	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
		DERR("cr_handoff: invalid conn state %d\n",
		    conn->spcp_state);
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	if (conn->spcp_sid == NULL) {
		DERR("cr_handoff: sid == NULL\n");
		mutex_exit(&sp_rp->sp_lock);
		retval = EINVAL;
		goto cleanup;
	}
	sid = conn->spcp_sid;
	priv = NULL;
	priv_len = conn->spcp_req_len;
	if (priv_len > 0) {
		priv = kmem_zalloc(priv_len, daplka_km_flags);
		if (priv == NULL) {
			mutex_exit(&sp_rp->sp_lock);
			retval = ENOMEM;
			goto cleanup;
		}
		bcopy(conn->spcp_req_data, priv, priv_len);
	}
	/*
	 * this clears our slot in the backlog array.
	 * this slot may now be used by other pending connections.
	 */
	conn->spcp_sid = NULL;
	conn->spcp_state = DAPLKA_SPCP_INIT;
	conn->spcp_req_len = 0;
	mutex_exit(&sp_rp->sp_lock);

	/* fill fake_event and call service_req handler */
	bzero(&fake_event, sizeof (fake_event));
	fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
	fake_event.cm_session_id = sid;
	fake_event.cm_priv_data_len = priv_len;
	fake_event.cm_priv_data = priv;

	cm_status = daplka_cm_service_req(new_sp_rp,
	    &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
	if (cm_status != IBT_CM_DEFER) {
		ibt_cm_proceed_reply_t	proc_reply;

		DERR("cr_handoff: service_req returned %d\n", cm_status);
		/*
		 * if for some reason cm_service_req failed, we
		 * reject the connection.
		 */
		bzero(&proc_reply, sizeof (proc_reply));

		status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
		    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
		if (status != IBT_SUCCESS) {
			DERR("cr_handoff: ibt_cm_proceed returned %d\n",
			    status);
		}
		*rvalp = (int)status;
		retval = 0;
	}

cleanup:;
	if (priv_len > 0 && priv != NULL) {
		kmem_free(priv, priv_len);
	}
	if (new_sp_rp != NULL) {
		DAPLKA_RS_UNREF(new_sp_rp);
	}
	if (sp_rp != NULL) {
		DAPLKA_RS_UNREF(sp_rp);
	}
	D3("cr_handoff: exiting\n");
	return (retval);
}

/*
 * returns a list of hca attributes
 */
/* ARGSUSED */
static int
daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_ia_query_t		args;
	int			retval;
	ibt_hca_attr_t		*hcap;

	hcap = &ia_rp->ia_hca->hca_attr;

	/*
	 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
	 */
	args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
	args.hca_attr.dhca_device_id = hcap->hca_device_id;
	args.hca_attr.dhca_version_id = hcap->hca_version_id;
	args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
	args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
	args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
	args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
	args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
	args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
	args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
	args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
	args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
	args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
	args.hca_attr.dhca_max_partitions  = hcap->hca_max_partitions;
	args.hca_attr.dhca_nports  = hcap->hca_nports;
	args.hca_attr.dhca_node_guid  = hcap->hca_node_guid;
	args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
	args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
	args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
	args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
	    mode);
	if (retval != 0) {
		DERR("ia_query: copyout error %d\n", retval);
		return (EFAULT);
	}
	return (0);
}

/*
 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
 * it frees the mw embedded in the mw resource object.
 */

/* ARGSUSED */
static int
daplka_mr_cb_freemw(void *objp, void *arg)
{
	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)objp;
	ibt_mw_hdl_t		mw_hdl;
	ibt_status_t		status;

	D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
	DAPLKA_RS_REF(mw_rp);

	mutex_enter(&mw_rp->mw_lock);
	mw_hdl = mw_rp->mw_hdl;
	/*
	 * we set mw_hdl to NULL so it won't get freed again
	 */
	mw_rp->mw_hdl = NULL;
	mutex_exit(&mw_rp->mw_lock);

	if (mw_hdl != NULL) {
		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
		if (status != IBT_SUCCESS) {
			DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
		}
		D3("mr_cb_freemw: mw freed\n");
	}

	DAPLKA_RS_UNREF(mw_rp);
	return (0);
}

/*
 * This routine is called from HCA driver's umem lock undo callback
 * when the memory associated with an MR is being unmapped. In this callback
 * we free all the MW associated with the IA and post an unaffiliated
 * async event to tell the app that there was a catastrophic event.
 * This allows the HCA to deregister the MR in its callback processing.
 */
static void
daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
{
	daplka_mr_resource_t	*mr_rp;
	daplka_ia_resource_t	*ia_rp;
#ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
	ibt_async_event_t	event;
	ibt_hca_attr_t		*hca_attrp;
#endif
	minor_t			rnum;

	mr_rp = (daplka_mr_resource_t *)arg1;
	rnum = DAPLKA_RS_RNUM(mr_rp);
	daplka_shared_mr_free(mr_rp);

	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
	if (ia_rp == NULL) {
		DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
		    rnum);
		return;
	}

	DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);

	mutex_enter(&ia_rp->ia_lock);
	/*
	 * MW is being alloced OR MW freeze has already begun. In
	 * both these cases we wait for that to complete before
	 * continuing.
	 */
	while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
	    (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
		cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
	}

	switch (ia_rp->ia_state) {
	case DAPLKA_IA_INIT:
		ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
		mutex_exit(&ia_rp->ia_lock);
		break;
	case DAPLKA_IA_MW_FROZEN:
		/* the mw on this ia have been freed */
		D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
		    ia_rp->ia_state);
		mutex_exit(&ia_rp->ia_lock);
		goto cleanup;
	default:
		ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
		DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
		    ia_rp->ia_state);
		mutex_exit(&ia_rp->ia_lock);
		goto cleanup;
	}

	/*
	 * Walk the mw hash table and free the mws. Acquire a writer
	 * lock since we don't want anyone else traversing this tree
	 * while we are freeing the MW.
	 */
	daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
	    RW_WRITER);

	mutex_enter(&ia_rp->ia_lock);
	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
	ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
	cv_broadcast(&ia_rp->ia_cv);
	mutex_exit(&ia_rp->ia_lock);

	/*
	 * Currently commented out because Oracle skgxp is incapable
	 * of handling async events correctly.
	 */
#ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
	/*
	 * Enqueue an unaffiliated async error event to indicate this
	 * IA has encountered a problem that caused the MW to freed up
	 */

	/* Create a fake event, only relevant field is the hca_guid */
	bzero(&event, sizeof (ibt_async_event_t));
	hca_attrp = &ia_rp->ia_hca->hca_attr;
	event.ev_hca_guid = hca_attrp->hca_node_guid;

	daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
	    ia_rp);
#endif	/* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */

cleanup:;
	D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
	DAPLKA_RS_UNREF(ia_rp);
}

/*
 * registers a memory region.
 * memory locking will be done by the HCA driver.
 */
/* ARGSUSED */
static int
daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	boolean_t			inserted = B_FALSE;
	daplka_mr_resource_t		*mr_rp;
	daplka_pd_resource_t		*pd_rp;
	dapl_mr_register_t		args;
	ibt_mr_data_in_t		mr_cb_data_in;
	uint64_t			mr_hkey = 0;
	ibt_status_t			status;
	int				retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
	    mode);
	if (retval != 0) {
		DERR("mr_register: copyin error %d\n", retval);
		return (EINVAL);
	}
	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
	if (mr_rp == NULL) {
		DERR("mr_register: cannot allocate mr resource\n");
		return (ENOMEM);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);

	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
	mr_rp->mr_hca = ia_rp->ia_hca;
	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
	mr_rp->mr_next = NULL;
	mr_rp->mr_shared_mr = NULL;

	/* get pd handle */
	pd_rp = (daplka_pd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
	if (pd_rp == NULL) {
		DERR("mr_register: cannot find pd resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	mr_rp->mr_pd_res = pd_rp;

	mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
	mr_rp->mr_attr.mr_len = args.mr_len;
	mr_rp->mr_attr.mr_as = curproc->p_as;
	mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;

	D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
	    (longlong_t)mr_rp->mr_attr.mr_len,
	    mr_rp->mr_attr.mr_flags);

	status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
	    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
	    &mr_rp->mr_desc);

	if (status != IBT_SUCCESS) {
		DERR("mr_register: ibt_register_mr error %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
	mr_cb_data_in.mr_arg2 = NULL;

	/* Pass the service driver mr cleanup handler to the hca driver */
	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
	    &mr_cb_data_in, sizeof (mr_cb_data_in));

	if (status != IBT_SUCCESS) {
		DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
		    status, mr_cb_data_in.mr_rev);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/* insert into mr hash table */
	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
	    &mr_hkey, (void *)mr_rp);
	if (retval != 0) {
		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))

	args.mr_lkey = mr_rp->mr_desc.md_lkey;
	args.mr_rkey = mr_rp->mr_desc.md_rkey;
	args.mr_hkey = mr_hkey;

	retval = ddi_copyout((void *)&args, (void *)arg,
	    sizeof (dapl_mr_register_t), mode);
	if (retval != 0) {
		DERR("mr_register: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_mr_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
		    (void **)&free_rp);
		if (free_rp != mr_rp) {
			DERR("mr_register: cannot remove mr from hash table\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in mr_deregister
			 */
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(mr_rp);
	return (retval);
}

/*
 * registers a shared memory region.
 * the client calls this function with the intention to share the memory
 * region with other clients. it is assumed that, prior to calling this
 * function, the client(s) are already sharing parts of their address
 * space using a mechanism such as SYSV shared memory. the first client
 * that calls this function will create and insert a daplka_shared_mr_t
 * object into the global daplka_shared_mr_tree. this shared mr object
 * will be identified by a unique 40-byte key and will maintain a list
 * of mr resources. every time this function gets called with the same
 * 40-byte key, a new mr resource (containing a new mr handle generated
 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
 * into this list. similarly, every time a shared mr gets deregistered
 * or invalidated by a callback, the mr resource gets removed from this
 * list. the shared mr object has a reference count. when it drops to
 * zero, the shared mr object will be removed from the global avl tree
 * and be freed.
 */
/* ARGSUSED */
static int
daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_mr_register_shared_t	args;
	daplka_shared_mr_t		*smrp = NULL;
	daplka_shared_mr_t		tmp_smr;
	ibt_mr_data_in_t		mr_cb_data_in;
	avl_index_t			where;
	boolean_t			inserted = B_FALSE;
	daplka_mr_resource_t		*mr_rp = NULL;
	daplka_pd_resource_t		*pd_rp;
	uint64_t			mr_hkey = 0;
	ibt_status_t			status;
	int				retval;

	retval = ddi_copyin((void *)arg, &args,
	    sizeof (dapl_mr_register_shared_t), mode);
	if (retval != 0) {
		DERR("mr_register_shared: copyin error %d\n", retval);
		return (EINVAL);
	}

	mutex_enter(&daplka_shared_mr_lock);
	/*
	 * find smrp from the global avl tree.
	 * the 40-byte key is used as the lookup key.
	 */
	tmp_smr.smr_cookie = args.mrs_shm_cookie;
	smrp = (daplka_shared_mr_t *)
	    avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
	if (smrp != NULL) {
		D2("mr_register_shared: smrp 0x%p, found cookie:\n"
		    "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);

		/*
		 * if the smrp exists, other threads could still be
		 * accessing it. we wait until they are done before
		 * we continue.
		 */
		smrp->smr_refcnt++;
		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
			D2("mr_register_shared: smrp 0x%p, "
			    "waiting in transitioning state, refcnt %d\n",
			    smrp, smrp->smr_refcnt);
			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
		}
		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
		D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
		    smrp, smrp->smr_refcnt);

		/*
		 * we set smr_state to TRANSITIONING to temporarily
		 * prevent other threads from trying to access smrp.
		 */
		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
	} else {
		D2("mr_register_shared: cannot find cookie:\n"
		    "0x%016llx%016llx%016llx%016llx%016llx\n",
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);

		/*
		 * if we cannot find smrp, we need to create and
		 * insert one into daplka_shared_mr_tree
		 */
		smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
		    daplka_km_flags);
		if (smrp == NULL) {
			retval = ENOMEM;
			mutex_exit(&daplka_shared_mr_lock);
			goto cleanup;
		}
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
		smrp->smr_refcnt = 1;
		smrp->smr_cookie = args.mrs_shm_cookie;
		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
		smrp->smr_mr_list = NULL;
		cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
		avl_insert(&daplka_shared_mr_tree, smrp, where);
		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
	}
	mutex_exit(&daplka_shared_mr_lock);

	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
	if (mr_rp == NULL) {
		DERR("mr_register_shared: cannot allocate mr resource\n");
		goto cleanup;
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);

	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
	mr_rp->mr_hca = ia_rp->ia_hca;
	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
	mr_rp->mr_next = NULL;
	mr_rp->mr_shared_mr = NULL;

	/* get pd handle */
	pd_rp = (daplka_pd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
	if (pd_rp == NULL) {
		DERR("mr_register_shared: cannot find pd resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	mr_rp->mr_pd_res = pd_rp;

	mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
	mr_rp->mr_attr.mr_len = args.mrs_len;
	mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
	mr_rp->mr_attr.mr_as = curproc->p_as;

	D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
	    "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
	    (longlong_t)mr_rp->mr_attr.mr_len,
	    mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
	    (int)(smrp->smr_mr_list != NULL), smrp);

	/*
	 * since we are in TRANSITIONING state, we are guaranteed
	 * that we have exclusive access to smr_mr_list.
	 */
	if (smrp->smr_mr_list != NULL) {
		ibt_smr_attr_t	mem_sattr;

		/*
		 * a non-null smr_mr_list indicates that someone
		 * else has already inserted an mr_resource into
		 * smr_mr_list. we use the mr_handle from the first
		 * element as an arg to ibt_register_shared_mr.
		 */
		mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
		mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;

		D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
		    (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
		status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
		    smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
		    &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);

		if (status != IBT_SUCCESS) {
			DERR("mr_register_shared: "
			    "ibt_register_shared_mr error %d\n", status);
			*rvalp = (int)status;
			retval = 0;
			goto cleanup;
		}
	} else {
		/*
		 * an mr does not exist yet. we need to create one
		 * using ibt_register_mr.
		 */
		status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
		    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
		    &mr_rp->mr_hdl, &mr_rp->mr_desc);

		if (status != IBT_SUCCESS) {
			DERR("mr_register_shared: "
			    "ibt_register_mr error %d\n", status);
			*rvalp = (int)status;
			retval = 0;
			goto cleanup;
		}
	}

	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
	mr_cb_data_in.mr_arg2 = NULL;

	/* Pass the service driver mr cleanup handler to the hca driver */
	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
	    &mr_cb_data_in, sizeof (mr_cb_data_in));

	if (status != IBT_SUCCESS) {
		DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
		    status, mr_cb_data_in.mr_rev);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/*
	 * we bump reference of mr_rp and enqueue it onto smrp.
	 */
	DAPLKA_RS_REF(mr_rp);
	mr_rp->mr_next = smrp->smr_mr_list;
	smrp->smr_mr_list = mr_rp;
	mr_rp->mr_shared_mr = smrp;

	/* insert into mr hash table */
	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
	    &mr_hkey, (void *)mr_rp);
	if (retval != 0) {
		DERR("mr_register_shared: cannot insert mr resource\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))

	/*
	 * at this point, there are two references to our mr resource.
	 * one is kept in ia_mr_htbl. the other is kept in the list
	 * within this shared mr object (smrp). when we deregister this
	 * mr or when a callback invalidates this mr, the reference kept
	 * by this shared mr object will be removed.
	 */

	args.mrs_lkey = mr_rp->mr_desc.md_lkey;
	args.mrs_rkey = mr_rp->mr_desc.md_rkey;
	args.mrs_hkey = mr_hkey;

	retval = ddi_copyout((void *)&args, (void *)arg,
	    sizeof (dapl_mr_register_shared_t), mode);
	if (retval != 0) {
		DERR("mr_register_shared: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

	/*
	 * set the state to READY to allow others to continue
	 */
	mutex_enter(&daplka_shared_mr_lock);
	smrp->smr_state = DAPLKA_SMR_READY;
	cv_broadcast(&smrp->smr_cv);
	mutex_exit(&daplka_shared_mr_lock);
	return (0);

cleanup:;
	if (inserted) {
		daplka_mr_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
		    (void **)&free_rp);
		if (free_rp != mr_rp) {
			DERR("mr_register_shared: "
			    "cannot remove mr from hash table\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in mr_deregister
			 */
			return (retval);
		}
	}
	if (smrp != NULL) {
		mutex_enter(&daplka_shared_mr_lock);
		ASSERT(smrp->smr_refcnt > 0);
		smrp->smr_refcnt--;

		if (smrp->smr_refcnt == 0) {
			DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
			avl_remove(&daplka_shared_mr_tree, smrp);
			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
			if (smrp->smr_mr_list != NULL) {
				/*
				 * the refcnt is 0. if there is anything
				 * left on the list, it must be ours.
				 */
				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
				ASSERT(smrp->smr_mr_list == mr_rp);
				DAPLKA_RS_UNREF(mr_rp);
				smrp->smr_mr_list = NULL;
				ASSERT(mr_rp->mr_shared_mr == smrp);
				mr_rp->mr_shared_mr = NULL;
				ASSERT(mr_rp->mr_next == NULL);
			}
			smrp->smr_state = DAPLKA_SMR_FREED;
			cv_destroy(&smrp->smr_cv);
			kmem_free(smrp, sizeof (daplka_shared_mr_t));
		} else {
			DERR("mr_register_shared: resetting smr_state "
			    "smrp 0x%p, %d waiters remain\n", smrp,
			    smrp->smr_refcnt);
			ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
			if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
				daplka_mr_resource_t	**mpp;

				/*
				 * search and remove mr_rp from smr_mr_list
				 */
				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
				mpp = &smrp->smr_mr_list;
				while (*mpp != NULL) {
					if (*mpp == mr_rp) {
						*mpp = (*mpp)->mr_next;
						DAPLKA_RS_UNREF(mr_rp);
						ASSERT(mr_rp->mr_shared_mr ==
						    smrp);
						mr_rp->mr_shared_mr = NULL;
						mr_rp->mr_next = NULL;
						break;
					}
					mpp = &(*mpp)->mr_next;
				}
			}
			/*
			 * note that smr_state == READY does not necessarily
			 * mean that smr_mr_list is non empty. for this case,
			 * we are doing cleanup because of a failure. we set
			 * the state to READY to allow other threads to
			 * continue.
			 */
			smrp->smr_state = DAPLKA_SMR_READY;
			cv_broadcast(&smrp->smr_cv);
		}
		mutex_exit(&daplka_shared_mr_lock);
	}
	if (mr_rp != NULL) {
		DAPLKA_RS_UNREF(mr_rp);
	}
	return (retval);
}

/*
 * registers a memory region using the attributes of an
 * existing region.
 */
/* ARGSUSED */
static int
daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	boolean_t 			inserted = B_FALSE;
	dapl_mr_register_lmr_t		args;
	ibt_mr_data_in_t		mr_cb_data_in;
	daplka_mr_resource_t		*orig_mr_rp = NULL;
	daplka_mr_resource_t		*mr_rp;
	ibt_smr_attr_t			mem_sattr;
	uint64_t			mr_hkey = 0;
	ibt_status_t			status;
	int				retval;

	retval = ddi_copyin((void *)arg, &args,
	    sizeof (dapl_mr_register_lmr_t), mode);
	if (retval != 0) {
		DERR("mr_register_lmr: copyin error %d\n", retval);
		return (EINVAL);
	}
	orig_mr_rp = (daplka_mr_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
	if (orig_mr_rp == NULL) {
		DERR("mr_register_lmr: cannot find mr resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);

	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
	if (mr_rp == NULL) {
		DERR("mr_register_lmr: cannot allocate mr resource\n");
		retval = ENOMEM;
		goto cleanup;
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);

	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
	mr_rp->mr_hca = ia_rp->ia_hca;
	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
	mr_rp->mr_next = NULL;
	mr_rp->mr_shared_mr = NULL;

	DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
	mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
	mr_rp->mr_attr = orig_mr_rp->mr_attr;

	/* Pass the IO addr that was returned while allocating the orig MR */
	mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
	mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;

	status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
	    orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
	    &mr_rp->mr_hdl, &mr_rp->mr_desc);

	if (status != IBT_SUCCESS) {
		DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
		    status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
	mr_cb_data_in.mr_arg2 = NULL;

	/* Pass the service driver mr cleanup handler to the hca driver */
	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
	    &mr_cb_data_in, sizeof (mr_cb_data_in));

	if (status != IBT_SUCCESS) {
		DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
		    status, mr_cb_data_in.mr_rev);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}
	mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
	mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;

	/* insert into mr hash table */
	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
	    (void *)mr_rp);
	if (retval != 0) {
		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))

	args.mrl_lkey = mr_rp->mr_desc.md_lkey;
	args.mrl_rkey = mr_rp->mr_desc.md_rkey;
	args.mrl_hkey = mr_hkey;

	retval = ddi_copyout((void *)&args, (void *)arg,
	    sizeof (dapl_mr_register_lmr_t), mode);
	if (retval != 0) {
		DERR("mr_register_lmr: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	if (orig_mr_rp != NULL) {
		DAPLKA_RS_UNREF(orig_mr_rp);
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_mr_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
		    (void **)&free_rp);
		if (free_rp != mr_rp) {
			DERR("mr_register: cannot remove mr from hash table\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in mr_deregister
			 */
			return (retval);
		}
	}
	if (orig_mr_rp != NULL) {
		DAPLKA_RS_UNREF(orig_mr_rp);
	}
	if (mr_rp != NULL) {
		DAPLKA_RS_UNREF(mr_rp);
	}
	return (retval);
}

/*
 * this function is called by mr_deregister and mr_cleanup_callback to
 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
 * if mr_shared_mr is already NULL, that means the region being
 * deregistered or invalidated is not a shared mr region and we can
 * return immediately.
 */
static void
daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
{
	daplka_shared_mr_t	*smrp;

	/*
	 * we need a lock because mr_callback also checks this field.
	 * for the rare case that mr_deregister and mr_cleanup_callback
	 * gets called simultaneously, we are guaranteed that smrp won't
	 * be dereferenced twice because either function will find
	 * mr_shared_mr to be NULL.
	 */
	mutex_enter(&mr_rp->mr_lock);
	smrp = mr_rp->mr_shared_mr;
	mr_rp->mr_shared_mr = NULL;
	mutex_exit(&mr_rp->mr_lock);

	if (smrp != NULL) {
		daplka_mr_resource_t	**mpp;
		boolean_t		mr_found = B_FALSE;

		mutex_enter(&daplka_shared_mr_lock);
		ASSERT(smrp->smr_refcnt > 0);
		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
		}
		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
		smrp->smr_refcnt--;

		/*
		 * search and remove mr_rp from smr_mr_list.
		 * also UNREF mr_rp because it is no longer
		 * on the list.
		 */
		mpp = &smrp->smr_mr_list;
		while (*mpp != NULL) {
			if (*mpp == mr_rp) {
				*mpp = (*mpp)->mr_next;
				DAPLKA_RS_UNREF(mr_rp);
				mr_rp->mr_next = NULL;
				mr_found = B_TRUE;
				break;
			}
			mpp = &(*mpp)->mr_next;
		}
		/*
		 * since mr_clean_callback may not touch smr_mr_list
		 * at this time (due to smr_state), we can be sure
		 * that we can find and remove mr_rp from smr_mr_list
		 */
		ASSERT(mr_found);
		if (smrp->smr_refcnt == 0) {
			D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
			avl_remove(&daplka_shared_mr_tree, smrp);
			ASSERT(smrp->smr_mr_list == NULL);
			smrp->smr_state = DAPLKA_SMR_FREED;
			cv_destroy(&smrp->smr_cv);
			kmem_free(smrp, sizeof (daplka_shared_mr_t));
		} else {
			D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
			    smrp, smrp->smr_refcnt);
			smrp->smr_state = DAPLKA_SMR_READY;
			cv_broadcast(&smrp->smr_cv);
		}
		mutex_exit(&daplka_shared_mr_lock);
	}
}

/*
 * deregisters a memory region.
 * if mr is shared, remove reference from global shared mr object.
 * release the initial reference to the mr. if the mr's refcnt is
 * zero, call mr_destroy to free mr.
 */
/* ARGSUSED */
static int
daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_mr_resource_t	*mr_rp;
	dapl_mr_deregister_t	args;
	int 			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
	    mode);
	if (retval != 0) {
		DERR("mr_deregister: copyin error %d\n", retval);
		return (EINVAL);
	}
	retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
	    args.mrd_hkey, (void **)&mr_rp);
	if (retval != 0 || mr_rp == NULL) {
		DERR("mr_deregister: cannot find mr resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);

	daplka_shared_mr_free(mr_rp);
	DAPLKA_RS_UNREF(mr_rp);
	return (0);
}

/*
 * sync local memory regions on RDMA read or write.
 */
/* ARGSUSED */
static int
daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_mr_sync_t	args;
	daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
	ibt_mr_sync_t	mrs[DAPL_MR_PER_SYNC];
	uint32_t	sync_direction_flags;
	ibt_status_t	status;
	int		i, j;
	int		retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
	if (retval != 0) {
		DERR("mr_sync: copyin error %d\n", retval);
		return (EFAULT);
	}

	/* number of segments bound check */
	if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
		DERR("mr_sync: number of segments too large\n");
		return (EINVAL);
	}

	/* translate MR sync direction flag */
	if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
		sync_direction_flags = IBT_SYNC_READ;
	} else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
		sync_direction_flags = IBT_SYNC_WRITE;
	} else {
		DERR("mr_sync: unknown flags\n");
		return (EINVAL);
	}

	/*
	 * all the segments are going to be sync'd by ibtl together
	 */
	for (i = 0; i < args.mrs_numseg; i++) {
		mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
		    &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
		if (mr_rp[i] == NULL) {
			for (j = 0; j < i; j++) {
				DAPLKA_RS_UNREF(mr_rp[j]);
			}
			DERR("mr_sync: lookup error\n");
			return (EINVAL);
		}
		ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
		mrs[i].ms_handle = mr_rp[i]->mr_hdl;
		mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
		mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
		mrs[i].ms_flags = sync_direction_flags;
	}

	status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
	if (status != IBT_SUCCESS) {
		DERR("mr_sync: ibt_sync_mr error %d\n", status);
		*rvalp = (int)status;
	}
	for (i = 0; i < args.mrs_numseg; i++) {
		DAPLKA_RS_UNREF(mr_rp[i]);
	}
	return (0);
}

/*
 * destroys a memory region.
 * called when refcnt drops to zero.
 */
static int
daplka_mr_destroy(daplka_resource_t *gen_rp)
{
	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)gen_rp;
	ibt_status_t		status;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
	ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
	ASSERT(mr_rp->mr_shared_mr == NULL);
	D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
	    mr_rp, DAPLKA_RS_RNUM(mr_rp));

	/*
	 * deregister mr
	 */
	if (mr_rp->mr_hdl) {
		status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
		    mr_rp->mr_hdl);
		if (status != IBT_SUCCESS) {
			DERR("mr_destroy: ibt_deregister_mr returned %d\n",
			    status);
		}
		mr_rp->mr_hdl = NULL;
		D3("mr_destroy: mr deregistered\n");
	}
	mr_rp->mr_attr.mr_vaddr = NULL;

	/*
	 * release reference on PD
	 */
	if (mr_rp->mr_pd_res != NULL) {
		DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
		mr_rp->mr_pd_res = NULL;
	}
	mutex_destroy(&mr_rp->mr_lock);
	DAPLKA_RS_FINI(mr_rp);
	kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
	D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
	return (0);
}

/*
 * this function is called by daplka_hash_destroy for
 * freeing MR resource objects
 */
static void
daplka_hash_mr_free(void *obj)
{
	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)obj;

	daplka_shared_mr_free(mr_rp);
	DAPLKA_RS_UNREF(mr_rp);
}

/*
 * comparison function used for finding a shared mr object
 * from the global shared mr avl tree.
 */
static int
daplka_shared_mr_cmp(const void *smr1, const void *smr2)
{
	daplka_shared_mr_t	*s1 = (daplka_shared_mr_t *)smr1;
	daplka_shared_mr_t	*s2 = (daplka_shared_mr_t *)smr2;
	int i;

	for (i = 4; i >= 0; i--) {
		if (s1->smr_cookie.mc_uint_arr[i] <
		    s2->smr_cookie.mc_uint_arr[i]) {
			return (-1);
		}
		if (s1->smr_cookie.mc_uint_arr[i] >
		    s2->smr_cookie.mc_uint_arr[i]) {
			return (1);
		}
	}
	return (0);
}

/*
 * allocates a protection domain.
 */
/* ARGSUSED */
static int
daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_pd_alloc_t		args;
	daplka_pd_resource_t	*pd_rp;
	ibt_status_t		status;
	uint64_t		pd_hkey = 0;
	boolean_t		inserted = B_FALSE;
	int			retval;

	pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
	if (pd_rp == NULL) {
		DERR("pd_alloc: cannot allocate pd resource\n");
		return (ENOMEM);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
	DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
	    DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);

	pd_rp->pd_hca = ia_rp->ia_hca;
	pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
	status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
	    IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
	if (status != IBT_SUCCESS) {
		DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/* insert into pd hash table */
	retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
	    &pd_hkey, (void *)pd_rp);
	if (retval != 0) {
		DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))

	/* return hkey to library */
	args.pda_hkey = pd_hkey;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
	    mode);
	if (retval != 0) {
		DERR("pd_alloc: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_pd_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
		    (void **)&free_rp);
		if (free_rp != pd_rp) {
			DERR("pd_alloc: cannot remove pd from hash table\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in pd_free
			 */
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(pd_rp);
	return (retval);
}

/*
 * destroys a protection domain.
 * called when refcnt drops to zero.
 */
static int
daplka_pd_destroy(daplka_resource_t *gen_rp)
{
	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
	ibt_status_t status;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
	ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
	D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
	    pd_rp, DAPLKA_RS_RNUM(pd_rp));

	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	if (pd_rp->pd_hdl != NULL) {
		status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
		    pd_rp->pd_hdl);
		if (status != IBT_SUCCESS) {
			DERR("pd_destroy: ibt_free_pd returned %d\n", status);
		}
	}
	DAPLKA_RS_FINI(pd_rp);
	kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
	D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
	return (0);
}

static void
daplka_hash_pd_free(void *obj)
{
	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	DAPLKA_RS_UNREF(pd_rp);
}

/*
 * removes the pd reference from ia_pd_htbl and releases the
 * initial reference to the pd. also destroys the pd if the refcnt
 * is zero.
 */
/* ARGSUSED */
static int
daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_pd_resource_t *pd_rp;
	dapl_pd_free_t args;
	int retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
	if (retval != 0) {
		DERR("pd_free: copyin error %d\n", retval);
		return (EINVAL);
	}

	retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
	    args.pdf_hkey, (void **)&pd_rp);
	if (retval != 0 || pd_rp == NULL) {
		DERR("pd_free: cannot find pd resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(pd_rp);
	return (0);
}

/*
 * allocates a memory window
 */
/* ARGSUSED */
static int
daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_pd_resource_t	*pd_rp;
	daplka_mw_resource_t	*mw_rp;
	dapl_mw_alloc_t		args;
	ibt_status_t		status;
	boolean_t		inserted = B_FALSE;
	uint64_t		mw_hkey;
	ibt_rkey_t		mw_rkey;
	int			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
	if (retval != 0) {
		DERR("mw_alloc: copyin error %d\n", retval);
		return (EFAULT);
	}

	/*
	 * Allocate and initialize a MW resource
	 */
	mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
	if (mw_rp == NULL) {
		DERR("mw_alloc: cannot allocate mw resource\n");
		return (ENOMEM);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
	DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
	    DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);

	mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
	mw_rp->mw_hca = ia_rp->ia_hca;
	mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;

	/* get pd handle */
	pd_rp = (daplka_pd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
	if (pd_rp == NULL) {
		DERR("mw_alloc: cannot find pd resource\n");
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);

	mw_rp->mw_pd_res = pd_rp;

	status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
	    pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);

	if (status != IBT_SUCCESS) {
		DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	mutex_enter(&ia_rp->ia_lock);
	switch (ia_rp->ia_state) {
	case DAPLKA_IA_INIT:
		ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
		ia_rp->ia_mw_alloccnt++;
		retval = 0;
		break;
	case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
		/* another mw_alloc is already in progress increase cnt */
		ia_rp->ia_mw_alloccnt++;
		retval = 0;
		break;
	case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
		/* FALLTHRU */
	case DAPLKA_IA_MW_FROZEN:
		/*
		 * IA is being or already frozen don't allow more MWs to be
		 * allocated.
		 */
		DERR("mw_alloc:	IA is freezing MWs (state=%d)\n",
		    ia_rp->ia_state);
		retval = EINVAL;
		break;
	default:
		ASSERT(!"Invalid IA state in mw_alloc");
		DERR("mw_alloc:	IA state=%d invalid\n", ia_rp->ia_state);
		retval = EINVAL;
		break;
	}
	mutex_exit(&ia_rp->ia_lock);
	/* retval is 0 when ia_mw_alloccnt is incremented */
	if (retval != 0) {
		goto cleanup;
	}

	/* insert into mw hash table */
	mw_hkey = 0;
	retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
	    (void *)mw_rp);
	if (retval != 0) {
		DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
		mutex_enter(&ia_rp->ia_lock);
		ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
		ia_rp->ia_mw_alloccnt--;
		if (ia_rp->ia_mw_alloccnt == 0) {
			ia_rp->ia_state = DAPLKA_IA_INIT;
			cv_broadcast(&ia_rp->ia_cv);
		}
		mutex_exit(&ia_rp->ia_lock);
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))

	D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
	    mw_rp->mw_hdl, (longlong_t)mw_rkey);

	mutex_enter(&ia_rp->ia_lock);
	/*
	 * We are done with mw_alloc if this was the last mw_alloc
	 * change state back to DAPLKA_IA_INIT and wake up waiters
	 * specifically the unlock callback.
	 */
	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
	ia_rp->ia_mw_alloccnt--;
	if (ia_rp->ia_mw_alloccnt == 0) {
		ia_rp->ia_state = DAPLKA_IA_INIT;
		cv_broadcast(&ia_rp->ia_cv);
	}
	mutex_exit(&ia_rp->ia_lock);

	args.mw_hkey = mw_hkey;
	args.mw_rkey = mw_rkey;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
	    mode);
	if (retval != 0) {
		DERR("mw_alloc: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		daplka_mw_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
		    (void **)&free_rp);
		if (free_rp != mw_rp) {
			DERR("mw_alloc: cannot remove mw from hash table\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in mw_free
			 */
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(mw_rp);
	return (retval);
}

/*
 * removes the mw reference from ia_mw_htbl and releases the
 * initial reference to the mw. also destroys the mw if the refcnt
 * is zero.
 */
/* ARGSUSED */
static int
daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_mw_resource_t	*mw_rp = NULL;
	dapl_mw_free_t		args;
	int			retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
	if (retval != 0) {
		DERR("mw_free: copyin error %d\n", retval);
		return (EFAULT);
	}

	retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
	    (void **)&mw_rp);
	if (retval != 0 || mw_rp == NULL) {
		DERR("mw_free: cannot find mw resrc (0x%llx)\n",
		    (longlong_t)args.mw_hkey);
		return (EINVAL);
	}

	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(mw_rp);
	return (retval);
}

/*
 * destroys the memory window.
 * called when refcnt drops to zero.
 */
static int
daplka_mw_destroy(daplka_resource_t *gen_rp)
{
	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)gen_rp;
	ibt_status_t		status;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
	ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
	D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
	    mw_rp, DAPLKA_RS_RNUM(mw_rp));

	/*
	 * free memory window
	 */
	if (mw_rp->mw_hdl) {
		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
		    mw_rp->mw_hdl);
		if (status != IBT_SUCCESS) {
			DERR("mw_destroy: ibt_free_mw returned %d\n", status);
		}
		mw_rp->mw_hdl = NULL;
		D3("mw_destroy: mw freed\n");
	}

	/*
	 * release reference on PD
	 */
	if (mw_rp->mw_pd_res != NULL) {
		DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
		mw_rp->mw_pd_res = NULL;
	}
	mutex_destroy(&mw_rp->mw_lock);
	DAPLKA_RS_FINI(mw_rp);
	kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
	D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
	return (0);
}

static void
daplka_hash_mw_free(void *obj)
{
	daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
	DAPLKA_RS_UNREF(mw_rp);
}

/*
 * SRQ ioctls and supporting functions
 */
/* ARGSUSED */
static int
daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
    cred_t *cred, int *rvalp)
{
	daplka_srq_resource_t		*srq_rp;
	daplka_pd_resource_t		*pd_rp;
	dapl_srq_create_t		args;
	ibt_srq_sizes_t			srq_sizes;
	ibt_srq_sizes_t			srq_real_sizes;
	ibt_hca_attr_t			*hca_attrp;
	uint64_t			srq_hkey = 0;
	boolean_t			inserted = B_FALSE;
	int				retval;
	ibt_status_t			status;

	D3("srq_create: enter\n");
	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
	    mode);
	if (retval != 0) {
		DERR("srq_create: copyin error %d\n", retval);
		return (EFAULT);
	}
	srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
	if (srq_rp == NULL) {
		DERR("srq_create: cannot allocate ep_rp\n");
		return (ENOMEM);
	}
	DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
	    DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);

	srq_rp->srq_hca = ia_rp->ia_hca;
	srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
	mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);

	/* get pd handle */
	pd_rp = (daplka_pd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
	if (pd_rp == NULL) {
		DERR("srq_create: cannot find pd resource\n");
		retval = EINVAL;
		goto cleanup;
	}
	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
	srq_rp->srq_pd_res = pd_rp;

	/*
	 * these checks ensure that the requested SRQ sizes
	 * are within the limits supported by the chosen HCA.
	 */
	hca_attrp = &ia_rp->ia_hca->hca_attr;
	if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
		DERR("srq_create: invalid srqs_sz %d\n",
		    args.srqc_sizes.srqs_sz);
		retval = EINVAL;
		goto cleanup;
	}
	if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
		DERR("srq_create: invalid srqs_sgl %d\n",
		    args.srqc_sizes.srqs_sgl);
		retval = EINVAL;
		goto cleanup;
	}

	D3("srq_create: srq_sgl %d, srq_sz %d\n",
	    args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);

	srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
	srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;

	/* create srq */
	status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
	    IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
	    &srq_real_sizes);
	if (status != IBT_SUCCESS) {
		DERR("srq_create: alloc_srq returned %d\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;

	/* Get HCA-specific data_out info */
	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
	    &args.srqc_data_out, sizeof (args.srqc_data_out));

	if (status != IBT_SUCCESS) {
		DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;

	/* preparing to copyout map_data back to the library */
	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;

	/* insert into srq hash table */
	retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
	    &srq_hkey, (void *)srq_rp);
	if (retval != 0) {
		DERR("srq_create: cannot insert srq resource into srq_htbl\n");
		goto cleanup;
	}
	inserted = B_TRUE;

	/* return hkey to library */
	args.srqc_hkey = srq_hkey;

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
	    mode);
	if (retval != 0) {
		DERR("srq_create: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

	D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
	D3("	sz(%d) sgl(%d)\n",
	    args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
	D3("srq_create: exit\n");
	return (0);

cleanup:
	if (inserted) {
		daplka_srq_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
		    (void **)&free_rp);
		if (free_rp != srq_rp) {
			/*
			 * this case is impossible because ep_free will
			 * wait until our state transition is complete.
			 */
			DERR("srq_create: cannot remove srq from hash table\n");
			ASSERT(B_FALSE);
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(srq_rp);
	return (retval);
}

/*
 * Resize an existing SRQ
 */
/* ARGSUSED */
static int
daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
    cred_t *cred, int *rvalp)
{
	daplka_srq_resource_t		*srq_rp = NULL;
	ibt_hca_attr_t			*hca_attrp;
	dapl_srq_resize_t		args;
	ibt_status_t			status;
	int				retval = 0;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
	    mode);
	if (retval != 0) {
		DERR("srq_resize: copyin error %d\n", retval);
		return (EFAULT);
	}

	/* get srq resource */
	srq_rp = (daplka_srq_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
	if (srq_rp == NULL) {
		DERR("srq_resize: cannot find srq resource\n");
		return (EINVAL);
	}
	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);

	hca_attrp = &ia_rp->ia_hca->hca_attr;
	if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
		DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
		retval = EINVAL;
		goto cleanup;
	}

	mutex_enter(&srq_rp->srq_lock);
	/*
	 * If ibt_resize_srq fails that it is primarily due to resource
	 * shortage. Per IB spec resize will never loose events and
	 * a resize error leaves the SRQ intact. Therefore even if the
	 * resize request fails we proceed and get the mapping data
	 * from the SRQ so that the library can mmap it.
	 */
	status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
	    args.srqr_new_size, 0, &args.srqr_real_size);
	if (status != IBT_SUCCESS) {
		/* we return the size of the old CQ if resize fails */
		args.srqr_real_size = srq_rp->srq_real_size;
		ASSERT(status != IBT_SRQ_HDL_INVALID);
		DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
	} else {
		srq_rp->srq_real_size = args.srqr_real_size;
	}
	mutex_exit(&srq_rp->srq_lock);


	D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
	    DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);

	/* Get HCA-specific data_out info */
	status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
	    &args.srqr_data_out, sizeof (args.srqr_data_out));
	if (status != IBT_SUCCESS) {
		DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
		/* return ibt_ci_data_out status */
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
	    mode);
	if (retval != 0) {
		DERR("srq_resize: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}

cleanup:;
	if (srq_rp != NULL) {
		DAPLKA_RS_UNREF(srq_rp);
	}
	return (retval);
}

/*
 * Frees an SRQ resource.
 */
/* ARGSUSED */
static int
daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
    cred_t *cred, int *rvalp)
{
	daplka_srq_resource_t	*srq_rp = NULL;
	dapl_srq_free_t		args;
	int			retval;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
	if (retval != 0) {
		DERR("srq_free: copyin error %d\n", retval);
		return (EFAULT);
	}

	retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
	    args.srqf_hkey, (void **)&srq_rp);
	if (retval != 0 || srq_rp == NULL) {
		/*
		 * this is only possible if we have two threads
		 * calling ep_free in parallel.
		 */
		DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
		    retval, args.srqf_hkey);
		return (EINVAL);
	}

	/* UNREF calls the actual free function when refcnt is zero */
	DAPLKA_RS_UNREF(srq_rp);
	return (0);
}

/*
 * destroys a SRQ resource.
 * called when refcnt drops to zero.
 */
static int
daplka_srq_destroy(daplka_resource_t *gen_rp)
{
	daplka_srq_resource_t	*srq_rp = (daplka_srq_resource_t *)gen_rp;
	ibt_status_t		status;

	ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);

	D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
	    srq_rp, DAPLKA_RS_RNUM(srq_rp));
	/*
	 * destroy the srq
	 */
	if (srq_rp->srq_hdl != NULL) {
		status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
		if (status != IBT_SUCCESS) {
			DERR("srq_destroy: ibt_free_srq returned %d\n",
			    status);
		}
		srq_rp->srq_hdl = NULL;
		D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
	}
	/*
	 * release all references
	 */
	if (srq_rp->srq_pd_res != NULL) {
		DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
		srq_rp->srq_pd_res = NULL;
	}

	mutex_destroy(&srq_rp->srq_lock);
	DAPLKA_RS_FINI(srq_rp);
	kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
	D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
	return (0);
}

static void
daplka_hash_srq_free(void *obj)
{
	daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
	DAPLKA_RS_UNREF(srq_rp);
}

/*
 * This function tells the CM to start listening on a service id.
 * It must be called by the passive side client before the client
 * can receive connection requests from remote endpoints. If the
 * client specifies a non-zero service id (connection qualifier in
 * dapl terms), this function will attempt to bind to this service
 * id and return an error if the id is already in use. If the client
 * specifies zero as the service id, this function will try to find
 * the next available service id and return it back to the client.
 * To support the cr_handoff function, this function will, in addition
 * to creating and inserting an SP resource into the per-IA SP hash
 * table, insert the SP resource into a global SP table. This table
 * maintains all active service points created by all dapl clients.
 * CR handoff locates the target SP by iterating through this global
 * table.
 */
/* ARGSUSED */
static int
daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_evd_resource_t	*evd_rp = NULL;
	daplka_sp_resource_t	*sp_rp = NULL;
	dapl_service_register_t	args;
	ibt_srv_desc_t		sd_args;
	ibt_srv_bind_t		sb_args;
	ibt_status_t		status;
	ib_svc_id_t		retsid = 0;
	uint64_t		sp_hkey = 0;
	boolean_t		bumped = B_FALSE;
	int			backlog_size;
	int			retval = 0;

	retval = ddi_copyin((void *)arg, &args,
	    sizeof (dapl_service_register_t), mode);
	if (retval != 0) {
		DERR("service_register: copyin error %d\n", retval);
		return (EINVAL);
	}

	sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
	if (sp_rp == NULL) {
		DERR("service_register: cannot allocate sp resource\n");
		return (ENOMEM);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
	DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
	    DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);

	/* check if evd exists */
	evd_rp = (daplka_evd_resource_t *)
	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
	if (evd_rp == NULL) {
		DERR("service_register: evd resource not found\n");
		retval = EINVAL;
		goto cleanup;
	}
	/*
	 * initialize backlog size
	 */
	if (evd_rp && evd_rp->evd_cq_real_size > 0) {
		backlog_size = evd_rp->evd_cq_real_size + 1;
	} else {
		backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
	}
	D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);

	/* save the userland sp ptr */
	sp_rp->sp_cookie = args.sr_sp_cookie;
	sp_rp->sp_backlog_size = backlog_size;
	D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
	sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
	    sizeof (daplka_sp_conn_pend_t), daplka_km_flags);

	/* save evd resource pointer */
	sp_rp->sp_evd_res = evd_rp;

	/*
	 * save ruid here so that we can do a comparison later
	 * when someone does cr_handoff. the check will prevent
	 * a malicious app from passing a CR to us.
	 */
	sp_rp->sp_ruid = crgetruid(cred);

	/* fill in args for register_service */
	sd_args.sd_ud_handler = NULL;
	sd_args.sd_handler = daplka_cm_service_handler;
	sd_args.sd_flags = IBT_SRV_NO_FLAGS;

	status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
	    &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);

	if (status != IBT_SUCCESS) {
		DERR("service_register: ibt_register_service returned %d\n",
		    status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}
	/* save returned sid */
	sp_rp->sp_conn_qual = retsid;
	args.sr_retsid = retsid;

	/* fill in args for bind_service */
	sb_args.sb_pkey = ia_rp->ia_port_pkey;
	sb_args.sb_lease = 0xffffffff;
	sb_args.sb_key[0] = 0x1234;
	sb_args.sb_key[1] = 0x5678;
	sb_args.sb_name = DAPLKA_DRV_NAME;

	D2("service_register: bind(0x%llx:0x%llx)\n",
	    (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
	    (longlong_t)ia_rp->ia_hca_sgid.gid_guid);

	status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
	    &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
	if (status != IBT_SUCCESS) {
		DERR("service_register: ibt_bind_service returned %d\n",
		    status);
		*rvalp = (int)status;
		retval = 0;
		goto cleanup;
	}

	/*
	 * need to bump refcnt because the global hash table will
	 * have a reference to sp_rp
	 */
	DAPLKA_RS_REF(sp_rp);
	bumped = B_TRUE;

	/* insert into global sp hash table */
	sp_rp->sp_global_hkey = 0;
	retval = daplka_hash_insert(&daplka_global_sp_htbl,
	    &sp_rp->sp_global_hkey, (void *)sp_rp);
	if (retval != 0) {
		DERR("service_register: cannot insert sp resource\n");
		goto cleanup;
	}
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))

	/* insert into per-IA sp hash table */
	retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
	    &sp_hkey, (void *)sp_rp);
	if (retval != 0) {
		DERR("service_register: cannot insert sp resource\n");
		goto cleanup;
	}

	/* pass index to application */
	args.sr_sp_hkey = sp_hkey;
	retval = ddi_copyout(&args, (void *)arg,
	    sizeof (dapl_service_register_t), mode);
	if (retval != 0) {
		DERR("service_register: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	ASSERT(sp_rp != NULL);
	/* remove from ia table */
	if (sp_hkey != 0) {
		daplka_sp_resource_t *free_rp = NULL;

		(void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
		    sp_hkey, (void **)&free_rp);
		if (free_rp != sp_rp) {
			DERR("service_register: cannot remove sp\n");
			/*
			 * we can only get here if another thread
			 * has completed the cleanup in svc_deregister
			 */
			return (retval);
		}
	}

	/* remove from global table */
	if (sp_rp->sp_global_hkey != 0) {
		daplka_sp_resource_t *free_rp = NULL;

		/*
		 * we get here if either the hash_insert into
		 * ia_sp_htbl failed or the ddi_copyout failed.
		 * hash_insert failure implies that we are the
		 * only thread with a reference to sp. ddi_copyout
		 * failure implies that svc_deregister could have
		 * picked up the sp and destroyed it. but since
		 * we got to this point, we must have removed
		 * the sp ourselves in hash_remove above and
		 * that the sp can be destroyed by us.
		 */
		(void) daplka_hash_remove(&daplka_global_sp_htbl,
		    sp_rp->sp_global_hkey, (void **)&free_rp);
		if (free_rp != sp_rp) {
			DERR("service_register: cannot remove sp\n");
			/*
			 * this case is impossible. see explanation above.
			 */
			ASSERT(B_FALSE);
			return (retval);
		}
		sp_rp->sp_global_hkey = 0;
	}
	/* unreference sp */
	if (bumped) {
		DAPLKA_RS_UNREF(sp_rp);
	}

	/* destroy sp resource */
	DAPLKA_RS_UNREF(sp_rp);
	return (retval);
}

/*
 * deregisters the service and removes SP from the global table.
 */
/* ARGSUSED */
static int
daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	dapl_service_deregister_t	args;
	daplka_sp_resource_t		*sp_rp = NULL, *g_sp_rp = NULL;
	int				retval;

	retval = ddi_copyin((void *)arg, &args,
	    sizeof (dapl_service_deregister_t), mode);

	if (retval != 0) {
		DERR("service_deregister: copyin error %d\n", retval);
		return (EINVAL);
	}

	retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
	    args.sdr_sp_hkey, (void **)&sp_rp);
	if (retval != 0 || sp_rp == NULL) {
		DERR("service_deregister: cannot find sp resource\n");
		return (EINVAL);
	}

	retval = daplka_hash_remove(&daplka_global_sp_htbl,
	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
	if (retval != 0 || g_sp_rp == NULL) {
		DERR("service_deregister: cannot find sp resource\n");
	}

	/* remove the global reference */
	if (g_sp_rp == sp_rp) {
		DAPLKA_RS_UNREF(g_sp_rp);
	}

	DAPLKA_RS_UNREF(sp_rp);
	return (0);
}

/*
 * destroys a service point.
 * called when the refcnt drops to zero.
 */
static int
daplka_sp_destroy(daplka_resource_t *gen_rp)
{
	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
	ibt_status_t status;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
	ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
	D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
	    sp_rp, DAPLKA_RS_RNUM(sp_rp));

	/*
	 * it is possible for pending connections to remain
	 * on an SP. We need to clean them up here.
	 */
	if (sp_rp->sp_backlog != NULL) {
		ibt_cm_proceed_reply_t proc_reply;
		int i, cnt = 0;
		void *spcp_sidp;

		for (i = 0; i < sp_rp->sp_backlog_size; i++) {
			if (sp_rp->sp_backlog[i].spcp_state ==
			    DAPLKA_SPCP_PENDING) {
				cnt++;
				if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
					DERR("sp_destroy: "
					    "spcp_sid == NULL!\n");
					continue;
				}
				mutex_enter(&sp_rp->sp_lock);
				spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
				sp_rp->sp_backlog[i].spcp_state =
				    DAPLKA_SPCP_INIT;
				sp_rp->sp_backlog[i].spcp_sid = NULL;
				sp_rp->sp_backlog[i].spcp_req_len = 0;
				mutex_exit(&sp_rp->sp_lock);
				status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
				    spcp_sidp,
				    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
				if (status != IBT_SUCCESS) {
					DERR("sp_destroy: proceed failed %d\n",
					    status);
				}
			}
		}
		if (cnt > 0) {
			DERR("sp_destroy: found %d pending "
			    "connections\n", cnt);
		}
	}

	if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
		status = ibt_unbind_service(sp_rp->sp_srv_hdl,
		    sp_rp->sp_bind_hdl);
		if (status != IBT_SUCCESS) {
			DERR("sp_destroy: ibt_unbind_service "
			    "failed: %d\n", status);
		}
	}

	if (sp_rp->sp_srv_hdl != NULL) {
		status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
		    sp_rp->sp_srv_hdl);
		if (status != IBT_SUCCESS) {
			DERR("sp_destroy: ibt_deregister_service "
			    "failed: %d\n", status);
		}
	}
	if (sp_rp->sp_backlog != NULL) {
		kmem_free(sp_rp->sp_backlog,
		    sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
		sp_rp->sp_backlog = NULL;
		sp_rp->sp_backlog_size = 0;
	}

	/*
	 * release reference to evd
	 */
	if (sp_rp->sp_evd_res != NULL) {
		DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
	}
	sp_rp->sp_bind_hdl = NULL;
	sp_rp->sp_srv_hdl = NULL;
	DAPLKA_RS_FINI(sp_rp);
	kmem_free(sp_rp, sizeof (*sp_rp));
	D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
	return (0);
}

/*
 * this function is called by daplka_hash_destroy for
 * freeing SP resource objects
 */
static void
daplka_hash_sp_free(void *obj)
{
	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
	daplka_sp_resource_t *g_sp_rp;
	int retval;

	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);

	retval = daplka_hash_remove(&daplka_global_sp_htbl,
	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
	if (retval != 0 || g_sp_rp == NULL) {
		DERR("sp_free: cannot find sp resource\n");
	}
	if (g_sp_rp == sp_rp) {
		DAPLKA_RS_UNREF(g_sp_rp);
	}

	DAPLKA_RS_UNREF(sp_rp);
}

static void
daplka_hash_sp_unref(void *obj)
{
	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;

	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
	DAPLKA_RS_UNREF(sp_rp);
}

/*
 * Passive side CM handlers
 */

/*
 * processes the REQ_RCV event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
{
	daplka_sp_conn_pend_t	*conn = NULL;
	daplka_evd_event_t	*cr_ev = NULL;
	ibt_cm_status_t		cm_status = IBT_CM_DEFAULT;
	uint16_t		bkl_index;
	ibt_status_t		status;

	/*
	 * acquire a slot in the connection backlog of this service point
	 */
	mutex_enter(&spp->sp_lock);
	for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
		if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
			conn = &spp->sp_backlog[bkl_index];
			ASSERT(conn->spcp_sid == NULL);
			conn->spcp_state = DAPLKA_SPCP_PENDING;
			conn->spcp_sid = event->cm_session_id;
			break;
		}
	}
	mutex_exit(&spp->sp_lock);

	/*
	 * too many pending connections
	 */
	if (bkl_index == spp->sp_backlog_size) {
		DERR("service_req: connection pending exceeded %d limit\n",
		    spp->sp_backlog_size);
		return (IBT_CM_NO_RESOURCE);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))

	/*
	 * save data for cr_handoff
	 */
	if (pr_data != NULL && pr_len > 0) {
		int trunc_len = pr_len;

		if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
			DERR("service_req: private data truncated\n");
			trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
		}
		conn->spcp_req_len = trunc_len;
		bcopy(pr_data, conn->spcp_req_data, trunc_len);
	} else {
		conn->spcp_req_len = 0;
	}
	conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
	conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;

	/*
	 * create a CR event
	 */
	cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (cr_ev == NULL) {
		DERR("service_req: could not alloc cr_ev\n");
		cm_status = IBT_CM_NO_RESOURCE;
		goto cleanup;
	}

	cr_ev->ee_next = NULL;
	cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
	cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
	cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
	/*
	 * save the requestor gid
	 * daplka_event_poll needs this if this is a third party REQ_RCV
	 */
	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
	    event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
	    event->cm_event.req.req_prim_addr.av_dgid.gid_guid;

	/*
	 * set event type
	 */
	if (pr_len == 0) {
		cr_ev->ee_cmev.ec_cm_ev_type =
		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
	} else {
		cr_ev->ee_cmev.ec_cm_ev_priv_data =
		    kmem_zalloc(pr_len, KM_NOSLEEP);
		if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
			DERR("service_req: could not alloc priv\n");
			cm_status = IBT_CM_NO_RESOURCE;
			goto cleanup;
		}
		bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
		cr_ev->ee_cmev.ec_cm_ev_type =
		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
	}
	cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;

	/*
	 * tell the active side to expect the processing time to be
	 * at most equal to daplka_cm_delay
	 */
	status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
	    daplka_cm_delay, NULL, 0);
	if (status != IBT_SUCCESS) {
		DERR("service_req: ibt_cm_delay failed %d\n", status);
		cm_status = IBT_CM_NO_RESOURCE;
		goto cleanup;
	}

	/*
	 * enqueue cr_ev onto the cr_events list of the EVD
	 * corresponding to the SP
	 */
	D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
	    "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
	    cr_ev->ee_cmev.ec_cm_ev_priv_data,
	    (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
	    (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);

	daplka_evd_wakeup(spp->sp_evd_res,
	    &spp->sp_evd_res->evd_cr_events, cr_ev);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
	return (IBT_CM_DEFER);

cleanup:;
	/*
	 * free the cr event
	 */
	if (cr_ev != NULL) {
		if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
			kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
			cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
			cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
		}
		kmem_free(cr_ev, sizeof (daplka_evd_event_t));
	}
	/*
	 * release our slot in the backlog array
	 */
	if (conn != NULL) {
		mutex_enter(&spp->sp_lock);
		ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
		ASSERT(conn->spcp_sid == event->cm_session_id);
		conn->spcp_state = DAPLKA_SPCP_INIT;
		conn->spcp_req_len = 0;
		conn->spcp_sid = NULL;
		mutex_exit(&spp->sp_lock);
	}
	return (cm_status);
}

/*
 * processes the CONN_CLOSED event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
    ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
    void *priv_data, ibt_priv_data_len_t len)
{
	daplka_ep_resource_t	*ep_rp;
	daplka_evd_event_t	*disc_ev;
	uint32_t		old_state, new_state;

	ep_rp = (daplka_ep_resource_t *)
	    ibt_get_chan_private(event->cm_channel);
	if (ep_rp == NULL) {
		DERR("service_conn_closed: ep_rp == NULL\n");
		return (IBT_CM_ACCEPT);
	}

	/*
	 * verify that the ep_state is either CONNECTED or
	 * DISCONNECTING. if it is not in either states return
	 * without generating an event.
	 */
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		D2("service_conn_closed: conn aborted, state = %d, "
		    "closed = %d\n", old_state, (int)event->cm_event.closed);
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * create a DAPL_IB_CME_DISCONNECTED event
	 */
	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (disc_ev == NULL) {
		DERR("service_conn_closed: cannot alloc disc_ev\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;

	D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
	    disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);

	/*
	 * transition ep_state to DISCONNECTED
	 */
	new_state = DAPLKA_EP_STATE_DISCONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * processes the CONN_EST event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_ep_resource_t	*ep_rp;
	daplka_evd_event_t	*conn_ev;
	void			*pr_data = event->cm_priv_data;
	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
	uint32_t		old_state, new_state;

	ep_rp = (daplka_ep_resource_t *)
	    ibt_get_chan_private(event->cm_channel);
	if (ep_rp == NULL) {
		DERR("service_conn_est: ep_rp == NULL\n");
		return (IBT_CM_ACCEPT);
	}

	/*
	 * verify that ep_state is ACCEPTING. if it is not in this
	 * state, return without generating an event.
	 */
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		DERR("service_conn_est: conn aborted, state = %d\n",
		    old_state);
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * create a DAPL_IB_CME_CONNECTED event
	 */
	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (conn_ev == NULL) {
		DERR("service_conn_est: conn_ev alloc failed\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
	conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
	conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
	conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;

	/*
	 * copy private data into event
	 */
	if (pr_len > 0) {
		conn_ev->ee_cmev.ec_cm_ev_priv_data =
		    kmem_zalloc(pr_len, KM_NOSLEEP);
		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
			DERR("service_conn_est: pr_data alloc failed\n");
			daplka_ep_set_state(ep_rp, old_state, new_state);
			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
			return (IBT_CM_ACCEPT);
		}
		bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
	}
	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;

	D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
	    conn_ev, ep_rp->ep_conn_evd);

	/*
	 * transition ep_state to CONNECTED
	 */
	new_state = DAPLKA_EP_STATE_CONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * processes the FAILURE event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
    ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
    ibt_priv_data_len_t len)
{
	daplka_evd_event_t	*disc_ev;
	daplka_ep_resource_t	*ep_rp;
	uint32_t		old_state, new_state;
	ibt_rc_chan_query_attr_t chan_attrs;
	ibt_status_t		status;

	/*
	 * check that we still have a valid cm_channel before continuing
	 */
	if (event->cm_channel == NULL) {
		DERR("serice_event_failure: event->cm_channel == NULL\n");
		return (IBT_CM_ACCEPT);
	}
	ep_rp = (daplka_ep_resource_t *)
	    ibt_get_chan_private(event->cm_channel);
	if (ep_rp == NULL) {
		DERR("service_event_failure: ep_rp == NULL\n");
		return (IBT_CM_ACCEPT);
	}

	/*
	 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
	 * is not in either state, return without generating an event.
	 */
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		DERR("service_event_failure: conn aborted, state = %d, "
		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
		    (int)event->cm_event.failed.cf_code,
		    (int)event->cm_event.failed.cf_msg,
		    (int)event->cm_event.failed.cf_reason);

		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);

	if ((status == IBT_SUCCESS) &&
	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
		DERR("service_event_failure: conn abort qpn %d state %d\n",
		    chan_attrs.rc_qpn, chan_attrs.rc_state);

		/* explicit transition the QP to ERROR state */
		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
	}

	/*
	 * create an event
	 */
	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (disc_ev == NULL) {
		DERR("service_event_failure: cannot alloc disc_ev\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * fill in the appropriate event type
	 */
	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
		switch (event->cm_event.failed.cf_reason) {
		case IBT_CM_INVALID_CID:
			disc_ev->ee_cmev.ec_cm_ev_type =
			    DAPL_IB_CME_DESTINATION_REJECT;
			break;
		default:
			disc_ev->ee_cmev.ec_cm_ev_type =
			    DAPL_IB_CME_LOCAL_FAILURE;
			break;
		}
	} else {
		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
	}
	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;

	D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
	    "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
	    ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
	    (int)event->cm_event.failed.cf_msg,
	    (int)event->cm_event.failed.cf_reason,
	    (longlong_t)ep_rp->ep_psep_cookie);

	/*
	 * transition ep_state to DISCONNECTED
	 */
	new_state = DAPLKA_EP_STATE_DISCONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * this is the passive side CM handler. it gets registered
 * when an SP resource is created in daplka_service_register.
 */
static ibt_cm_status_t
daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)cm_private;

	if (sp_rp == NULL) {
		DERR("service_handler: sp_rp == NULL\n");
		return (IBT_CM_NO_RESOURCE);
	}
	/*
	 * default is not to return priv data
	 */
	if (ret_args != NULL) {
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
		ret_args->cm_ret_len = 0;
	}

	switch (event->cm_type) {
	case IBT_CM_EVENT_REQ_RCV:
		D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
		return (daplka_cm_service_req(sp_rp, event, ret_args,
		    event->cm_priv_data, event->cm_priv_data_len));

	case IBT_CM_EVENT_REP_RCV:
		/* passive side should not receive this event */
		D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
		return (IBT_CM_DEFAULT);

	case IBT_CM_EVENT_CONN_CLOSED:
		D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
		    event->cm_event.closed);
		return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
		    priv_data, len));

	case IBT_CM_EVENT_MRA_RCV:
		/* passive side does default processing MRA event */
		D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
		return (IBT_CM_DEFAULT);

	case IBT_CM_EVENT_CONN_EST:
		D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
		return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
		    priv_data, len));

	case IBT_CM_EVENT_FAILURE:
		D2("service_handler: IBT_CM_EVENT_FAILURE\n");
		return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
		    priv_data, len));
	case IBT_CM_EVENT_LAP_RCV:
		/* active side had initiated a path migration operation */
		D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
		return (IBT_CM_ACCEPT);
	default:
		DERR("service_handler: invalid event %d\n", event->cm_type);
		break;
	}
	return (IBT_CM_DEFAULT);
}

/*
 * Active side CM handlers
 */

/*
 * Processes the REP_RCV event. When the passive side accepts the
 * connection, this handler is called. We make a copy of the private
 * data into the ep so that it can be passed back to userland in when
 * the CONN_EST event occurs.
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	void			*pr_data = event->cm_priv_data;
	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
	uint32_t		old_state, new_state;

	D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
	    (int)pr_len);

	ASSERT(ep_rp != NULL);
	new_state = old_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_NO_CHANNEL);
	}

	/*
	 * we do not cancel the timer here because the connection
	 * handshake is still in progress.
	 */

	/*
	 * save the private data. it will be passed up when
	 * the connection is established.
	 */
	if (pr_len > 0) {
		ep_rp->ep_priv_len = pr_len;
		bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
	}

	/*
	 * we do not actually transition to a different state.
	 * the state will change when we get a conn_est, failure,
	 * closed, or timeout event.
	 */
	daplka_ep_set_state(ep_rp, old_state, new_state);
	return (IBT_CM_ACCEPT);
}

/*
 * Processes the CONN_CLOSED event. This gets called when either
 * the active or passive side closes the rc channel.
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_evd_event_t	*disc_ev;
	uint32_t		old_state, new_state;

	ASSERT(ep_rp != NULL);
	old_state = new_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		D2("rc_conn_closed: conn aborted, state = %d, "
		    "closed = %d\n", old_state, (int)event->cm_event.closed);
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * it's ok for the timer to fire at this point. the
	 * taskq thread that processes the timer will just wait
	 * until we are done with our state transition.
	 */
	if (daplka_cancel_timer(ep_rp) != 0) {
		/*
		 * daplka_cancel_timer returns -1 if the timer is
		 * being processed and 0 for all other cases.
		 * we need to reset ep_state to allow timer processing
		 * to continue.
		 */
		DERR("rc_conn_closed: timer is being processed\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * create a DAPL_IB_CME_DISCONNECTED event
	 */
	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (disc_ev == NULL) {
		DERR("rc_conn_closed: could not alloc ev\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;

	D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
	    disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);

	/*
	 * transition ep_state to DISCONNECTED
	 */
	new_state = DAPLKA_EP_STATE_DISCONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * processes the CONN_EST event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_evd_event_t	*conn_ev;
	uint32_t		old_state, new_state;

	ASSERT(ep_rp != NULL);
	old_state = new_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * it's ok for the timer to fire at this point. the
	 * taskq thread that processes the timer will just wait
	 * until we are done with our state transition.
	 */
	if (daplka_cancel_timer(ep_rp) != 0) {
		/*
		 * daplka_cancel_timer returns -1 if the timer is
		 * being processed and 0 for all other cases.
		 * we need to reset ep_state to allow timer processing
		 * to continue.
		 */
		DERR("rc_conn_est: timer is being processed\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * create a DAPL_IB_CME_CONNECTED event
	 */
	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (conn_ev == NULL) {
		DERR("rc_conn_est: could not alloc ev\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
	conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
	conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
	conn_ev->ee_cmev.ec_cm_psep_cookie = 0;

	/*
	 * The private data passed back in the connection established
	 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
	 * saved in ep resource structure.
	 */
	if (ep_rp->ep_priv_len > 0) {
		conn_ev->ee_cmev.ec_cm_ev_priv_data =
		    kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);

		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
			DERR("rc_conn_est: could not alloc pr_data\n");
			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
			daplka_ep_set_state(ep_rp, old_state, new_state);
			return (IBT_CM_ACCEPT);
		}
		bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
		    ep_rp->ep_priv_len);
	}
	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;

	D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
	    "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
	    conn_ev->ee_cmev.ec_cm_ev_priv_data,
	    (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);

	/*
	 * transition ep_state to CONNECTED
	 */
	new_state = DAPLKA_EP_STATE_CONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * processes the FAILURE event
 */
/* ARGSUSED */
static ibt_cm_status_t
daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_evd_event_t	*disc_ev;
	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
	void			*pr_data = event->cm_priv_data;
	uint32_t		old_state, new_state;
	ibt_rc_chan_query_attr_t chan_attrs;
	ibt_status_t		status;

	ASSERT(ep_rp != NULL);
	old_state = new_state = daplka_ep_get_state(ep_rp);
	if (old_state != DAPLKA_EP_STATE_CONNECTING &&
	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
		/*
		 * we can get here if the connection is being aborted
		 */
		DERR("rc_event_failure: conn aborted, state = %d, "
		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
		    (int)event->cm_event.failed.cf_code,
		    (int)event->cm_event.failed.cf_msg,
		    (int)event->cm_event.failed.cf_reason);

		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * it's ok for the timer to fire at this point. the
	 * taskq thread that processes the timer will just wait
	 * until we are done with our state transition.
	 */
	if (daplka_cancel_timer(ep_rp) != 0) {
		/*
		 * daplka_cancel_timer returns -1 if the timer is
		 * being processed and 0 for all other cases.
		 * we need to reset ep_state to allow timer processing
		 * to continue.
		 */
		DERR("rc_event_failure: timer is being processed\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);

	if ((status == IBT_SUCCESS) &&
	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
		DERR("rc_event_failure: conn abort qpn %d state %d\n",
		    chan_attrs.rc_qpn, chan_attrs.rc_state);

		/* explicit transition the QP to ERROR state */
		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
	}

	/*
	 * create an event
	 */
	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
	if (disc_ev == NULL) {
		DERR("rc_event_failure: cannot alloc disc_ev\n");
		daplka_ep_set_state(ep_rp, old_state, new_state);
		return (IBT_CM_ACCEPT);
	}

	/*
	 * copy private data into event
	 */
	if (pr_len > 0) {
		disc_ev->ee_cmev.ec_cm_ev_priv_data =
		    kmem_zalloc(pr_len, KM_NOSLEEP);

		if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
			DERR("rc_event_failure: cannot alloc pr data\n");
			kmem_free(disc_ev, sizeof (daplka_evd_event_t));
			daplka_ep_set_state(ep_rp, old_state, new_state);
			return (IBT_CM_ACCEPT);
		}
		bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
	}
	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;

	/*
	 * fill in the appropriate event type
	 */
	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
		switch (event->cm_event.failed.cf_reason) {
		case IBT_CM_CONSUMER:
			disc_ev->ee_cmev.ec_cm_ev_type =
			    DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
			break;
		case IBT_CM_NO_CHAN:
		case IBT_CM_NO_RESC:
			disc_ev->ee_cmev.ec_cm_ev_type =
			    DAPL_IB_CME_DESTINATION_REJECT;
			break;
		default:
			disc_ev->ee_cmev.ec_cm_ev_type =
			    DAPL_IB_CME_DESTINATION_REJECT;
			break;
		}
	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
	} else {
		/* others we'll mark as local failure */
		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
	}
	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;

	D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
	    "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
	    (int)event->cm_event.failed.cf_code,
	    (int)event->cm_event.failed.cf_msg,
	    (int)event->cm_event.failed.cf_reason);

	/*
	 * transition ep_state to DISCONNECTED
	 */
	new_state = DAPLKA_EP_STATE_DISCONNECTED;
	daplka_ep_set_state(ep_rp, old_state, new_state);

	/*
	 * enqueue event onto the conn_evd owned by ep_rp
	 */
	daplka_evd_wakeup(ep_rp->ep_conn_evd,
	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);

	return (IBT_CM_ACCEPT);
}

/*
 * This is the active side CM handler. It gets registered when
 * ibt_open_rc_channel is called.
 */
static ibt_cm_status_t
daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;

	if (ep_rp == NULL) {
		DERR("rc_handler: ep_rp == NULL\n");
		return (IBT_CM_NO_CHANNEL);
	}
	/*
	 * default is not to return priv data
	 */
	if (ret_args != NULL) {
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
		ret_args->cm_ret_len = 0;
	}

	switch (event->cm_type) {
	case IBT_CM_EVENT_REQ_RCV:
		/* active side should not receive this event */
		D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
		break;

	case IBT_CM_EVENT_REP_RCV:
		/* connection accepted by passive side */
		D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
		return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
		    priv_data, len));

	case IBT_CM_EVENT_CONN_CLOSED:
		D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
		    event->cm_event.closed);
		return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
		    priv_data, len));

	case IBT_CM_EVENT_MRA_RCV:
		/* passive side does default processing MRA event */
		D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
		return (IBT_CM_DEFAULT);

	case IBT_CM_EVENT_CONN_EST:
		D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
		return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
		    priv_data, len));

	case IBT_CM_EVENT_FAILURE:
		D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
		return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
		    priv_data, len));

	default:
		D2("rc_handler: invalid event %d\n", event->cm_type);
		break;
	}
	return (IBT_CM_DEFAULT);
}

/*
 * creates an IA resource and inserts it into the global resource table.
 */
/* ARGSUSED */
static int
daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	daplka_ia_resource_t	*ia_rp, *tmp_rp;
	boolean_t		inserted = B_FALSE;
	dapl_ia_create_t	args;
	ibt_hca_hdl_t		hca_hdl;
	ibt_status_t		status;
	ib_gid_t		sgid;
	int			retval;
	ibt_hca_portinfo_t	*pinfop;
	uint_t			pinfon;
	uint_t			size;
	ibt_ar_t		ar_s;
	daplka_hca_t		*hca;

	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
	    mode);
	if (retval != 0) {
		DERR("ia_create: copyin error %d\n", retval);
		return (EFAULT);
	}
	if (args.ia_version != DAPL_IF_VERSION) {
		DERR("ia_create: invalid version %d, expected version %d\n",
		    args.ia_version, DAPL_IF_VERSION);
		return (EINVAL);
	}

	/*
	 * find the hca with the matching guid
	 */
	mutex_enter(&daplka_dev->daplka_mutex);
	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
	    hca = hca->hca_next) {
		if (hca->hca_guid == args.ia_guid) {
			DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
			break;
		}
	}
	mutex_exit(&daplka_dev->daplka_mutex);

	if (hca == NULL) {
		DERR("ia_create: guid 0x%016llx not found\n",
		    (longlong_t)args.ia_guid);
		return (EINVAL);
	}

	/*
	 * check whether port number is valid and whether it is up
	 */
	if (args.ia_port > hca->hca_nports) {
		DERR("ia_create: invalid hca_port %d\n", args.ia_port);
		DAPLKA_RELE_HCA(daplka_dev, hca);
		return (EINVAL);
	}
	hca_hdl = hca->hca_hdl;
	if (hca_hdl == NULL) {
		DERR("ia_create: hca_hdl == NULL\n");
		DAPLKA_RELE_HCA(daplka_dev, hca);
		return (EINVAL);
	}
	status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
	    &pinfop, &pinfon, &size);
	if (status != IBT_SUCCESS) {
		DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
		*rvalp = (int)status;
		DAPLKA_RELE_HCA(daplka_dev, hca);
		return (0);
	}
	sgid = pinfop->p_sgid_tbl[0];
	ibt_free_portinfo(pinfop, size);

	ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
	DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);

	mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
	cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
	ia_rp->ia_hca_hdl = hca_hdl;
	ia_rp->ia_hca_sgid = sgid;
	ia_rp->ia_hca = hca;
	ia_rp->ia_port_num = args.ia_port;
	ia_rp->ia_port_pkey = args.ia_pkey;
	ia_rp->ia_pid = ddi_get_pid();
	ia_rp->ia_async_evd_hkeys = NULL;
	ia_rp->ia_ar_registered = B_FALSE;
	bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);

	/* register Address Record */
	ar_s.ar_gid = ia_rp->ia_hca_sgid;
	ar_s.ar_pkey = ia_rp->ia_port_pkey;
	bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
#define	UC(b) ar_s.ar_data[(b)]
	D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
	    UC(8), UC(9), UC(10), UC(11));
	D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
	    UC(12), UC(13), UC(14), UC(15));
	retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
	if (retval != IBT_SUCCESS) {
		DERR("ia_create: failed to register Address Record.\n");
		retval = EINVAL;
		goto cleanup;
	}
	ia_rp->ia_ar_registered = B_TRUE;

	/*
	 * create hash tables for all object types
	 */
	retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
	    daplka_hash_ep_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create ep hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
	    daplka_hash_mr_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create mr hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
	    daplka_hash_mw_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create mw hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
	    daplka_hash_pd_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create pd hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
	    daplka_hash_evd_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create evd hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
	    daplka_hash_cno_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create cno hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
	    daplka_hash_sp_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create sp hash table\n");
		goto cleanup;
	}
	retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
	    daplka_hash_srq_free, daplka_hash_generic_lookup);
	if (retval != 0) {
		DERR("ia_create: cannot create srq hash table\n");
		goto cleanup;
	}
	/*
	 * insert ia_rp into the global resource table
	 */
	retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
	if (retval != 0) {
		DERR("ia_create: cannot insert resource\n");
		goto cleanup;
	}
	inserted = B_TRUE;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))

	args.ia_resnum = rnum;
	retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
	if (retval != 0) {
		DERR("ia_create: copyout error %d\n", retval);
		retval = EFAULT;
		goto cleanup;
	}
	return (0);

cleanup:;
	if (inserted) {
		tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
		if (tmp_rp != ia_rp) {
			/*
			 * we can return here because another thread must
			 * have freed up the resource
			 */
			DERR("ia_create: cannot remove resource\n");
			return (retval);
		}
	}
	DAPLKA_RS_UNREF(ia_rp);
	return (retval);
}

/*
 * destroys an IA resource
 */
static int
daplka_ia_destroy(daplka_resource_t *gen_rp)
{
	daplka_ia_resource_t	*ia_rp = (daplka_ia_resource_t *)gen_rp;
	daplka_async_evd_hkey_t *hkp;
	int			cnt;
	ibt_ar_t		ar_s;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
	D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);

	/* deregister Address Record */
	if (ia_rp->ia_ar_registered) {
		ar_s.ar_gid = ia_rp->ia_hca_sgid;
		ar_s.ar_pkey = ia_rp->ia_port_pkey;
		bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
		(void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
		ia_rp->ia_ar_registered = B_FALSE;
	}

	/*
	 * destroy hash tables. make sure resources are
	 * destroyed in the correct order.
	 */
	daplka_hash_destroy(&ia_rp->ia_mw_htbl);
	daplka_hash_destroy(&ia_rp->ia_mr_htbl);
	daplka_hash_destroy(&ia_rp->ia_ep_htbl);
	daplka_hash_destroy(&ia_rp->ia_srq_htbl);
	daplka_hash_destroy(&ia_rp->ia_evd_htbl);
	daplka_hash_destroy(&ia_rp->ia_cno_htbl);
	daplka_hash_destroy(&ia_rp->ia_pd_htbl);
	daplka_hash_destroy(&ia_rp->ia_sp_htbl);

	/*
	 * free the async evd list
	 */
	cnt = 0;
	hkp = ia_rp->ia_async_evd_hkeys;
	while (hkp != NULL) {
		daplka_async_evd_hkey_t	*free_hkp;

		cnt++;
		free_hkp = hkp;
		hkp = hkp->aeh_next;
		kmem_free(free_hkp, sizeof (*free_hkp));
	}
	if (cnt > 0) {
		D3("ia_destroy: freed %d hkeys\n", cnt);
	}
	mutex_destroy(&ia_rp->ia_lock);
	cv_destroy(&ia_rp->ia_cv);
	ia_rp->ia_hca_hdl = NULL;

	DAPLKA_RS_FINI(ia_rp);

	if (ia_rp->ia_hca)
		DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);

	kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
	D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
	return (0);
}

static void
daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
    uint64_t cookie, daplka_ia_resource_t *ia_rp)
{
	daplka_evd_event_t	*evp;
	daplka_evd_resource_t	*async_evd;
	daplka_async_evd_hkey_t	*curr;

	mutex_enter(&ia_rp->ia_lock);
	curr = ia_rp->ia_async_evd_hkeys;
	while (curr != NULL) {
		/*
		 * Note: this allocation does not zero out the buffer
		 * since we init all the fields.
		 */
		evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
		if (evp == NULL) {
			DERR("async_event_enqueue: event alloc failed"
			    "!found\n", ia_rp, curr->aeh_evd_hkey);
			curr = curr->aeh_next;
			continue;
		}
		evp->ee_next = NULL;
		evp->ee_aev.ibae_type = code;
		evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
		evp->ee_aev.ibae_cookie = cookie;
		evp->ee_aev.ibae_port = event->ev_port;

		/*
		 * Lookup the async evd corresponding to this ia and enqueue
		 * evp and wakeup any waiter.
		 */
		async_evd = (daplka_evd_resource_t *)
		    daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
		if (async_evd == NULL) { /* async evd is being freed */
			DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
			    "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
			kmem_free(evp, sizeof (daplka_evd_event_t));
			curr = curr->aeh_next;
			continue;
		}
		daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);

		/* decrement refcnt on async_evd */
		DAPLKA_RS_UNREF(async_evd);
		curr = curr->aeh_next;
	}
	mutex_exit(&ia_rp->ia_lock);
}
/*
 * This routine is called in kernel context
 */

/* ARGSUSED */
static void
daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
    ibt_async_code_t code, ibt_async_event_t *event)
{
	daplka_ep_resource_t		*epp;
	daplka_ia_resource_t		*ia_rp;
	minor_t				ia_rnum;

	if (event->ev_chan_hdl == NULL) {
		DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
		return;
	}

	mutex_enter(&daplka_dev->daplka_mutex);
	epp = ibt_get_chan_private(event->ev_chan_hdl);
	if (epp == NULL) {
		mutex_exit(&daplka_dev->daplka_mutex);
		DERR("daplka_rc_async_handler: chan_private is NULL\n");
		return;
	}

	/* grab a reference to this ep */
	DAPLKA_RS_REF(epp);
	mutex_exit(&daplka_dev->daplka_mutex);

	/*
	 * The endpoint resource has the resource number corresponding to
	 * the IA resource. Use that to lookup the ia resource entry
	 */
	ia_rnum = DAPLKA_RS_RNUM(epp);
	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
		D2("daplka_rc_async_handler: resource (%d) not found\n",
		    ia_rnum);
		DAPLKA_RS_UNREF(epp);
		return;
	}

	/*
	 * Create an async event and chain it to the async evd
	 */
	daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);

	DAPLKA_RS_UNREF(ia_rp);
	DAPLKA_RS_UNREF(epp);
}

/*
 * This routine is called in kernel context
 */

/* ARGSUSED */
static void
daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
    ibt_async_code_t code, ibt_async_event_t *event)
{
	daplka_evd_resource_t		*evdp;
	daplka_ia_resource_t		*ia_rp;
	minor_t				ia_rnum;

	if (event->ev_cq_hdl == NULL)
		return;

	mutex_enter(&daplka_dev->daplka_mutex);
	evdp = ibt_get_cq_private(event->ev_cq_hdl);
	if (evdp == NULL) {
		mutex_exit(&daplka_dev->daplka_mutex);
		DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
		    event->ev_cq_hdl);
		return;
	}
	/* grab a reference to this evd resource */
	DAPLKA_RS_REF(evdp);
	mutex_exit(&daplka_dev->daplka_mutex);

	/*
	 * The endpoint resource has the resource number corresponding to
	 * the IA resource. Use that to lookup the ia resource entry
	 */
	ia_rnum = DAPLKA_RS_RNUM(evdp);
	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
		DERR("daplka_cq_async_handler: resource (%d) not found\n",
		    ia_rnum);
		DAPLKA_RS_UNREF(evdp);
		return;
	}

	/*
	 * Create an async event and chain it to the async evd
	 */
	daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);

	/* release all the refcount that were acquired */
	DAPLKA_RS_UNREF(ia_rp);
	DAPLKA_RS_UNREF(evdp);
}

/*
 * This routine is called in kernel context, handles unaffiliated async errors
 */

/* ARGSUSED */
static void
daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
    ibt_async_code_t code, ibt_async_event_t *event)
{
	int			i, j;
	daplka_resource_blk_t	*blk;
	daplka_resource_t	*rp;
	daplka_ia_resource_t	*ia_rp;

	/*
	 * Walk the resource table looking for an ia that matches the
	 * hca_hdl.
	 */
	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
		blk = daplka_resource.daplka_rc_root[i];
		if (blk == NULL)
			continue;
		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
			rp = blk->daplka_rcblk_blks[j];
			if ((rp == NULL) ||
			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
			    (rp->rs_type != DAPL_TYPE_IA)) {
				continue;
			}
			/*
			 * rp is an IA resource check if it belongs
			 * to the hca/port for which we got the event
			 */
			ia_rp = (daplka_ia_resource_t *)rp;
			DAPLKA_RS_REF(ia_rp);
			if ((hca_hdl == ia_rp->ia_hca_hdl) &&
			    (event->ev_port == ia_rp->ia_port_num)) {
				/*
				 * walk the ep hash table. Acquire a
				 * reader lock. NULL dgid indicates
				 * local port up event.
				 */
				daplka_hash_walk(&ia_rp->ia_ep_htbl,
				    daplka_ep_failback, NULL, RW_READER);
			}
			DAPLKA_RS_UNREF(ia_rp);
		}
	}
	rw_exit(&daplka_resource.daplka_rct_lock);
}

static int
daplka_handle_hca_detach_event(ibt_async_event_t *event)
{
	daplka_hca_t	*hca;

	/*
	 * find the hca with the matching guid
	 */
	mutex_enter(&daplka_dev->daplka_mutex);
	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
	    hca = hca->hca_next) {
		if (hca->hca_guid == event->ev_hca_guid) {
			if (DAPLKA_HCA_BUSY(hca)) {
				mutex_exit(&daplka_dev->daplka_mutex);
				return (IBT_HCA_RESOURCES_NOT_FREED);
			}
			daplka_dequeue_hca(daplka_dev, hca);
			break;
		}
	}
	mutex_exit(&daplka_dev->daplka_mutex);

	if (hca == NULL)
		return (IBT_FAILURE);

	return (daplka_fini_hca(daplka_dev, hca));
}

/*
 * This routine is called in kernel context
 */
static void
daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
    ibt_async_code_t code, ibt_async_event_t *event)
{
	switch (code) {
	case IBT_ERROR_CATASTROPHIC_CHAN:
	case IBT_ERROR_INVALID_REQUEST_CHAN:
	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
	case IBT_ERROR_PATH_MIGRATE_REQ:
		D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
		/* These events are affiliated with a the RC channel */
		daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
		break;
	case IBT_ERROR_CQ:
		/* This event is affiliated with a the CQ */
		D2("daplka_async_handler(): IBT_ERROR_CQ\n");
		daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
		break;
	case IBT_ERROR_PORT_DOWN:
		D2("daplka_async_handler(): IBT_PORT_DOWN\n");
		break;
	case IBT_EVENT_PORT_UP:
		D2("daplka_async_handler(): IBT_PORT_UP\n");
		if (daplka_apm) {
			daplka_un_async_handler(clnt_private, hca_hdl, code,
			    event);
		}
		break;
	case IBT_HCA_ATTACH_EVENT:
		/*
		 * NOTE: In some error recovery paths, it is possible to
		 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
		 */
		D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
		(void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
		break;
	case IBT_HCA_DETACH_EVENT:
		D2("daplka_async_handler(): IBT_HCA_DETACH\n");
		/* Free all hca resources and close the HCA. */
		(void) daplka_handle_hca_detach_event(event);
		break;
	case IBT_EVENT_PATH_MIGRATED:
		/* This event is affiliated with APM */
		D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
		break;
	default:
		D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
		break;
	}
}

/*
 * This routine is called in kernel context related to Subnet events
 */
/*ARGSUSED*/
static void
daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
	ibt_subnet_event_t *event)
{
	ib_gid_t *sgid = &gid;
	ib_gid_t *dgid;

	dgid = &event->sm_notice_gid;
	switch (code) {
	case IBT_SM_EVENT_GID_AVAIL:
		/* This event is affiliated with remote port up */
		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
		if (daplka_apm)
			daplka_sm_gid_avail(sgid, dgid);
		return;
	case IBT_SM_EVENT_GID_UNAVAIL:
		/* This event is affiliated with remote port down */
		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
		return;
	default:
		D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
		    code);
		return;
	}
}

/*
 * This routine is called in kernel context, handles Subnet GID avail events
 * which correspond to remote port up. Setting up alternate path or path
 * migration (failback) has to be initiated from the active side of the
 * original connect.
 */
static void
daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
{
	int			i, j;
	daplka_resource_blk_t	*blk;
	daplka_resource_t	*rp;
	daplka_ia_resource_t	*ia_rp;

	D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
	    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);

	/*
	 * Walk the resource table looking for an ia that matches the sgid
	 */
	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
		blk = daplka_resource.daplka_rc_root[i];
		if (blk == NULL)
			continue;
		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
			rp = blk->daplka_rcblk_blks[j];
			if ((rp == NULL) ||
			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
			    (rp->rs_type != DAPL_TYPE_IA)) {
				continue;
			}
			/*
			 * rp is an IA resource check if its gid
			 * matches with the calling sgid
			 */
			ia_rp = (daplka_ia_resource_t *)rp;
			DAPLKA_RS_REF(ia_rp);
			if ((sgid->gid_prefix ==
			    ia_rp->ia_hca_sgid.gid_prefix) &&
			    (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
				/*
				 * walk the ep hash table. Acquire a
				 * reader lock.
				 */
				daplka_hash_walk(&ia_rp->ia_ep_htbl,
				    daplka_ep_failback,
				    (void *)dgid, RW_READER);
			}
			DAPLKA_RS_UNREF(ia_rp);
		}
	}
	rw_exit(&daplka_resource.daplka_rct_lock);
}

/*
 * This routine is called in kernel context to get and set an alternate path
 */
static int
daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
{
	ibt_alt_path_info_t path_info;
	ibt_alt_path_attr_t path_attr;
	ibt_ap_returns_t ap_rets;
	ibt_status_t status;

	D2("daplka_ep_altpath : ibt_get_alt_path()\n");
	bzero(&path_info, sizeof (ibt_alt_path_info_t));
	bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
	if (dgid != NULL) {
		path_attr.apa_sgid = ep_rp->ep_sgid;
		path_attr.apa_dgid = *dgid;
	}
	status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
	    &path_attr, &path_info);
	if (status != IBT_SUCCESS) {
		DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
		    status);
		return (1);
	}

	D2("daplka_ep_altpath : ibt_set_alt_path()\n");
	bzero(&ap_rets, sizeof (ibt_ap_returns_t));
	status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
	    &path_info, NULL, 0, &ap_rets);
	if ((status != IBT_SUCCESS) ||
	    (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
		DERR("daplka_ep_altpath : ibt_set_alt_path failed "
		    "status %d ap_status %d\n", status, ap_rets.ap_status);
		return (1);
	}
	return (0);
}

/*
 * This routine is called in kernel context to failback to the original path
 */
static int
daplka_ep_failback(void *objp, void *arg)
{
	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
	ib_gid_t *dgid;
	ibt_status_t status;
	ibt_rc_chan_query_attr_t chan_attrs;
	int i;

	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
	D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
	    (longlong_t)ep_rp->ep_sgid.gid_prefix,
	    (longlong_t)ep_rp->ep_sgid.gid_guid,
	    (longlong_t)ep_rp->ep_dgid.gid_prefix,
	    (longlong_t)ep_rp->ep_dgid.gid_guid);

	/*
	 * daplka_ep_failback is called from daplka_hash_walk
	 * which holds the read lock on hash table to protect
	 * the endpoint resource from removal
	 */
	mutex_enter(&ep_rp->ep_lock);
	/* check for unconnected endpoints */
	/* first check for ep state */
	if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
		mutex_exit(&ep_rp->ep_lock);
		D2("daplka_ep_failback : endpoints not connected\n");
		return (0);
	}

	/* second check for gids */
	if (((ep_rp->ep_sgid.gid_prefix == 0) &&
	    (ep_rp->ep_sgid.gid_guid == 0)) ||
	    ((ep_rp->ep_dgid.gid_prefix == 0) &&
	    (ep_rp->ep_dgid.gid_guid == 0))) {
		mutex_exit(&ep_rp->ep_lock);
		D2("daplka_ep_failback : skip unconnected endpoints\n");
		return (0);
	}

	/*
	 * matching destination ep
	 * when dgid is NULL, the async event is a local port up.
	 * dgid becomes wild card, i.e. all endpoints match
	 */
	dgid = (ib_gid_t *)arg;
	if (dgid == NULL) {
		/* ignore loopback ep */
		if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
		    (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
			mutex_exit(&ep_rp->ep_lock);
			D2("daplka_ep_failback : skip loopback endpoints\n");
			return (0);
		}
	} else {
		/* matching remote ep */
		if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
		    (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
			mutex_exit(&ep_rp->ep_lock);
			D2("daplka_ep_failback : unrelated endpoints\n");
			return (0);
		}
	}

	/* call get and set altpath with original dgid used in ep_connect */
	if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
		mutex_exit(&ep_rp->ep_lock);
		return (0);
	}

	/*
	 * wait for migration state to be ARMed
	 * e.g. a post_send msg will transit mig_state from REARM to ARM
	 */
	for (i = 0; i < daplka_query_aft_setaltpath; i++) {
		bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
		status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
		if (status != IBT_SUCCESS) {
			mutex_exit(&ep_rp->ep_lock);
			DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
			return (0);
		}
		if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
			break;
	}

	D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
	    i, chan_attrs.rc_mig_state);
	D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
	    (longlong_t)
	    chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
	    (longlong_t)
	    chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
	D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);

	/* skip failback on ARMed state not reached or env override */
	if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
		mutex_exit(&ep_rp->ep_lock);
		DERR("daplka_ep_altpath : ARMed state not reached\n");
		return (0);
	}

	D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
	status = ibt_migrate_path(ep_rp->ep_chan_hdl);
	if (status != IBT_SUCCESS) {
		mutex_exit(&ep_rp->ep_lock);
		DERR("daplka_ep_failback : migration failed "
		    "status %d\n", status);
		return (0);
	}

	/* call get and altpath with NULL dgid to indicate unspecified dgid */
	(void) daplka_ep_altpath(ep_rp, NULL);
	mutex_exit(&ep_rp->ep_lock);
	return (0);
}

/*
 * IBTF wrappers used for resource accounting
 */
static ibt_status_t
daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
    ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
    ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_qps;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = ep_rp->ep_hca;
	max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;

	if (acct_enabled) {
		if (daplka_max_qp_percent != 0 &&
		    max_qps <= hca_p->hca_qp_count) {
			DERR("ibt_alloc_rc_channel: resource limit exceeded "
			    "(limit %d, count %d)\n", max_qps,
			    hca_p->hca_qp_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(ep_rp, 1);
		atomic_add_32(&hca_p->hca_qp_count, 1);
	}
	status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
		atomic_add_32(&hca_p->hca_qp_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = ep_rp->ep_hca;

	status = ibt_free_channel(chan_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
		atomic_add_32(&hca_p->hca_qp_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
    ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_cqs;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = evd_rp->evd_hca;
	max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;

	if (acct_enabled) {
		if (daplka_max_cq_percent != 0 &&
		    max_cqs <= hca_p->hca_cq_count) {
			DERR("ibt_alloc_cq: resource limit exceeded "
			    "(limit %d, count %d)\n", max_cqs,
			    hca_p->hca_cq_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(evd_rp, 1);
		atomic_add_32(&hca_p->hca_cq_count, 1);
	}
	status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
		atomic_add_32(&hca_p->hca_cq_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = evd_rp->evd_hca;

	status = ibt_free_cq(cq_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
		atomic_add_32(&hca_p->hca_cq_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
    ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_pds;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = pd_rp->pd_hca;
	max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;

	if (acct_enabled) {
		if (daplka_max_pd_percent != 0 &&
		    max_pds <= hca_p->hca_pd_count) {
			DERR("ibt_alloc_pd: resource limit exceeded "
			    "(limit %d, count %d)\n", max_pds,
			    hca_p->hca_pd_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(pd_rp, 1);
		atomic_add_32(&hca_p->hca_pd_count, 1);
	}
	status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
		atomic_add_32(&hca_p->hca_pd_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
    ibt_pd_hdl_t pd_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = pd_rp->pd_hca;

	status = ibt_free_pd(hca_hdl, pd_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
		atomic_add_32(&hca_p->hca_pd_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
    ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
    ibt_rkey_t *rkey_p)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_mws;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = mw_rp->mw_hca;
	max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;

	if (acct_enabled) {
		if (daplka_max_mw_percent != 0 &&
		    max_mws <= hca_p->hca_mw_count) {
			DERR("ibt_alloc_mw: resource limit exceeded "
			    "(limit %d, count %d)\n", max_mws,
			    hca_p->hca_mw_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(mw_rp, 1);
		atomic_add_32(&hca_p->hca_mw_count, 1);
	}
	status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
		atomic_add_32(&hca_p->hca_mw_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
    ibt_mw_hdl_t mw_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = mw_rp->mw_hca;

	status = ibt_free_mw(hca_hdl, mw_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
		atomic_add_32(&hca_p->hca_mw_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
    ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
    ibt_mr_desc_t *mr_desc_p)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_mrs;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = mr_rp->mr_hca;
	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;

	if (acct_enabled) {
		if (daplka_max_mr_percent != 0 &&
		    max_mrs <= hca_p->hca_mr_count) {
			DERR("ibt_register_mr: resource limit exceeded "
			    "(limit %d, count %d)\n", max_mrs,
			    hca_p->hca_mr_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(mr_rp, 1);
		atomic_add_32(&hca_p->hca_mr_count, 1);
	}
	status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
		atomic_add_32(&hca_p->hca_mr_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
    ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
    ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
    ibt_mr_desc_t *mr_desc_p)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_mrs;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = mr_rp->mr_hca;
	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;

	if (acct_enabled) {
		if (daplka_max_mr_percent != 0 &&
		    max_mrs <= hca_p->hca_mr_count) {
			DERR("ibt_register_shared_mr: resource limit exceeded "
			    "(limit %d, count %d)\n", max_mrs,
			    hca_p->hca_mr_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(mr_rp, 1);
		atomic_add_32(&hca_p->hca_mr_count, 1);
	}
	status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
	    smr_attr_p, mr_hdl_p, mr_desc_p);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
		atomic_add_32(&hca_p->hca_mr_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
    ibt_mr_hdl_t mr_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = mr_rp->mr_hca;

	status = ibt_deregister_mr(hca_hdl, mr_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
		atomic_add_32(&hca_p->hca_mr_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
    ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
    ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
{
	daplka_hca_t	*hca_p;
	uint32_t	max_srqs;
	boolean_t	acct_enabled;
	ibt_status_t	status;

	acct_enabled = daplka_accounting_enabled;
	hca_p = srq_rp->srq_hca;
	max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;

	if (acct_enabled) {
		if (daplka_max_srq_percent != 0 &&
		    max_srqs <= hca_p->hca_srq_count) {
			DERR("ibt_alloc_srq: resource limit exceeded "
			    "(limit %d, count %d)\n", max_srqs,
			    hca_p->hca_srq_count);
			return (IBT_INSUFF_RESOURCE);
		}
		DAPLKA_RS_ACCT_INC(srq_rp, 1);
		atomic_add_32(&hca_p->hca_srq_count, 1);
	}
	status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);

	if (status != IBT_SUCCESS && acct_enabled) {
		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
		atomic_add_32(&hca_p->hca_srq_count, -1);
	}
	return (status);
}

static ibt_status_t
daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
{
	daplka_hca_t	*hca_p;
	ibt_status_t	status;

	hca_p = srq_rp->srq_hca;

	D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);

	status = ibt_free_srq(srq_hdl);
	if (status != IBT_SUCCESS) {
		return (status);
	}
	if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
		atomic_add_32(&hca_p->hca_srq_count, -1);
	}
	return (status);
}


static int
daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_IA_CREATE:
		error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
		break;

	/* can potentially add other commands here */

	default:
		DERR("daplka_common_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_EVD_CREATE:
		error = daplka_evd_create(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_CQ_RESIZE:
		error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EVENT_POLL:
		error = daplka_event_poll(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EVENT_WAKEUP:
		error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EVD_MODIFY_CNO:
		error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EVD_FREE:
		error = daplka_evd_free(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_evd_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_EP_MODIFY:
		error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EP_FREE:
		error = daplka_ep_free(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EP_CONNECT:
		error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EP_DISCONNECT:
		error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EP_REINIT:
		error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_EP_CREATE:
		error = daplka_ep_create(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_ep_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_MR_REGISTER:
		error = daplka_mr_register(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_MR_REGISTER_LMR:
		error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_MR_REGISTER_SHARED:
		error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_MR_DEREGISTER:
		error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_MR_SYNC:
		error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_mr_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_MW_ALLOC:
		error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_MW_FREE:
		error = daplka_mw_free(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_mw_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_CNO_ALLOC:
		error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_CNO_FREE:
		error = daplka_cno_free(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_CNO_WAIT:
		error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_cno_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_PD_ALLOC:
		error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_PD_FREE:
		error = daplka_pd_free(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_pd_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_SERVICE_REGISTER:
		error = daplka_service_register(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_SERVICE_DEREGISTER:
		error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_sp_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

static int
daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_SRQ_CREATE:
		error = daplka_srq_create(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_SRQ_RESIZE:
		error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_SRQ_FREE:
		error = daplka_srq_free(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
		error = DDI_FAILURE;
		break;
	}
	return (error);
}

static int
daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
	cred_t *cred, int *rvalp)
{
	int error;

	switch (cmd) {
	case DAPL_CR_ACCEPT:
		error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_CR_REJECT:
		error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_IA_QUERY:
		error = daplka_ia_query(rp, arg, mode, cred, rvalp);
		break;

	case DAPL_CR_HANDOFF:
		error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("daplka_misc_ioctl: cmd not supported\n");
		error = DDI_FAILURE;
	}
	return (error);
}

/*ARGSUSED*/
static int
daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
	int *rvalp)
{
	daplka_ia_resource_t	*ia_rp;
	minor_t			rnum;
	int			error = 0;

	rnum = getminor(dev);
	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
	if (ia_rp == NULL) {
		DERR("ioctl: resource not found, rnum %d\n", rnum);
		return (ENXIO);
	}

	D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
	if (DAPLKA_RS_RESERVED(ia_rp)) {
		error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
		return (error);
	}
	if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
		DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
		error = EINVAL;
		goto cleanup;
	}
	if (ia_rp->ia_pid != ddi_get_pid()) {
		DERR("ioctl: ia_pid %d != pid %d\n",
		    ia_rp->ia_pid, ddi_get_pid());
		error = EINVAL;
		goto cleanup;
	}

	switch (cmd & DAPL_TYPE_MASK) {
	case DAPL_TYPE_EVD:
		error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_EP:
		error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_MR:
		error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_MW:
		error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_PD:
		error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_SP:
		error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_CNO:
		error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_MISC:
		error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	case DAPL_TYPE_SRQ:
		error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
		break;

	default:
		DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
		error = DDI_FAILURE;
	}

cleanup:;
	DAPLKA_RS_UNREF(ia_rp);
	return (error);
}

/* ARGSUSED */
static int
daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
{
	minor_t rnum;

	/*
	 * Char only
	 */
	if (otyp != OTYP_CHR) {
		return (EINVAL);
	}

	/*
	 * Only zero can be opened, clones are used for resources.
	 */
	if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
		DERR("daplka_open: bad minor %d\n", getminor(*devp));
		return (ENODEV);
	}

	/*
	 * - allocate new minor number
	 * - update devp argument to new device
	 */
	if (daplka_resource_reserve(&rnum) == 0) {
		*devp = makedevice(getmajor(*devp), rnum);
	} else {
		return (ENOMEM);
	}

	return (DDI_SUCCESS);
}

/* ARGSUSED */
static int
daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
{
	daplka_ia_resource_t	*ia_rp;
	minor_t			rnum = getminor(dev);

	/*
	 * Char only
	 */
	if (otyp != OTYP_CHR) {
		return (EINVAL);
	}
	D2("daplka_close: closing rnum = %d\n", rnum);
	atomic_add_32(&daplka_pending_close, 1);

	/*
	 * remove from resource table.
	 */
	ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);

	/*
	 * remove the initial reference
	 */
	if (ia_rp != NULL) {
		DAPLKA_RS_UNREF(ia_rp);
	}
	atomic_add_32(&daplka_pending_close, -1);
	return (DDI_SUCCESS);
}


/*
 * Resource management routines
 *
 * We start with no resource array. Each time we run out of slots, we
 * reallocate a new larger array and copy the pointer to the new array and
 * a new resource blk is allocated and added to the hash table.
 *
 * The resource control block contains:
 *      root    - array of pointer of resource blks
 *      sz      - current size of array.
 *      len     - last valid entry in array.
 *
 * A search operation based on a resource number is as follows:
 *      index = rnum / RESOURCE_BLKSZ;
 *      ASSERT(index < resource_block.len);
 *      ASSERT(index < resource_block.sz);
 *      offset = rnum % RESOURCE_BLKSZ;
 *      ASSERT(offset >= resource_block.root[index]->base);
 *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
 *      return resource_block.root[index]->blks[offset];
 *
 * A resource blk is freed when its used count reaches zero.
 */

/*
 * initializes the global resource table
 */
static void
daplka_resource_init(void)
{
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
	rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
	daplka_resource.daplka_rc_len = 0;
	daplka_resource.daplka_rc_sz = 0;
	daplka_resource.daplka_rc_cnt = 0;
	daplka_resource.daplka_rc_flag = 0;
	daplka_resource.daplka_rc_root = NULL;
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
}

/*
 * destroys the global resource table
 */
static void
daplka_resource_fini(void)
{
	int	i;

	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
		daplka_resource_blk_t	*blk;
		int			j;

		blk = daplka_resource.daplka_rc_root[i];
		if (blk == NULL) {
			continue;
		}
		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
			if (blk->daplka_rcblk_blks[j] != NULL) {
				DERR("resource_fini: non-null slot %d, %p\n",
				    j, blk->daplka_rcblk_blks[j]);
			}
		}
		kmem_free(blk, sizeof (*blk));
		daplka_resource.daplka_rc_root[i] = NULL;
	}
	if (daplka_resource.daplka_rc_root != NULL) {
		uint_t	sz;

		sz = daplka_resource.daplka_rc_sz *
		    sizeof (daplka_resource_blk_t *);
		kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
		daplka_resource.daplka_rc_root = NULL;
		daplka_resource.daplka_rc_len = 0;
		daplka_resource.daplka_rc_sz = 0;
	}
	rw_exit(&daplka_resource.daplka_rct_lock);
	rw_destroy(&daplka_resource.daplka_rct_lock);
}

/*
 * reserves a slot in the global resource table.
 * this is called by the open() syscall. it is needed because
 * at open() time, we do not have sufficient information to
 * create an IA resource. the library needs to subsequently
 * call daplka_ia_create to insert an IA resource into this
 * reserved slot.
 */
static int
daplka_resource_reserve(minor_t *rnum)
{
	int i, j, empty = -1;
	daplka_resource_blk_t *blk;

	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
	/*
	 * Try to find an empty slot
	 */
	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
		blk = daplka_resource.daplka_rc_root[i];
		if (blk != NULL && blk->daplka_rcblk_avail > 0) {

			D3("resource_alloc: available blks %d\n",
			    blk->daplka_rcblk_avail);

			/*
			 * found an empty slot in this blk
			 */
			for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
				if (blk->daplka_rcblk_blks[j] == NULL) {
					*rnum = (minor_t)
					    (j + (i * DAPLKA_RC_BLKSZ));
					blk->daplka_rcblk_blks[j] =
					    (daplka_resource_t *)
					    DAPLKA_RC_RESERVED;
					blk->daplka_rcblk_avail--;
					daplka_resource.daplka_rc_cnt++;
					rw_exit(&daplka_resource.
					    daplka_rct_lock);
					return (0);
				}
			}
		} else if (blk == NULL && empty < 0) {
			/*
			 * remember first empty slot
			 */
			empty = i;
		}
	}

	/*
	 * Couldn't find anything, allocate a new blk
	 * Do we need to reallocate the root array
	 */
	if (empty < 0) {
		if (daplka_resource.daplka_rc_len ==
		    daplka_resource.daplka_rc_sz) {
			/*
			 * Allocate new array and copy current stuff into it
			 */
			daplka_resource_blk_t	**p;
			uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
			    DAPLKA_RC_BLKSZ;

			D3("resource_alloc: increasing no. of buckets to %d\n",
			    newsz);

			p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);

			if (daplka_resource.daplka_rc_root) {
				uint_t oldsz;

				oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
				    (int)sizeof (*p));

				/*
				 * Copy old data into new space and
				 * free old stuff
				 */
				bcopy(daplka_resource.daplka_rc_root, p, oldsz);
				kmem_free(daplka_resource.daplka_rc_root,
				    oldsz);
			}

			daplka_resource.daplka_rc_root = p;
			daplka_resource.daplka_rc_sz = (int)newsz;
		}

		empty = daplka_resource.daplka_rc_len;
		daplka_resource.daplka_rc_len++;

		D3("resource_alloc: daplka_rc_len %d\n",
		    daplka_resource.daplka_rc_len);
	}

	/*
	 * Allocate a new blk
	 */
	blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
	ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
	daplka_resource.daplka_rc_root[empty] = blk;
	blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;

	/*
	 * Allocate slot
	 */
	*rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
	blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
	daplka_resource.daplka_rc_cnt++;
	rw_exit(&daplka_resource.daplka_rct_lock);

	return (0);
}

/*
 * removes resource from global resource table
 */
static daplka_resource_t *
daplka_resource_remove(minor_t rnum)
{
	int i, j;
	daplka_resource_blk_t *blk;
	daplka_resource_t *p;

	i = (int)(rnum / DAPLKA_RC_BLKSZ);
	j = (int)(rnum % DAPLKA_RC_BLKSZ);

	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
	if (i >= daplka_resource.daplka_rc_len) {
		rw_exit(&daplka_resource.daplka_rct_lock);
		DERR("resource_remove: invalid rnum %d\n", rnum);
		return (NULL);
	}

	ASSERT(daplka_resource.daplka_rc_root);
	ASSERT(i < daplka_resource.daplka_rc_len);
	ASSERT(i < daplka_resource.daplka_rc_sz);
	blk = daplka_resource.daplka_rc_root[i];
	if (blk == NULL) {
		rw_exit(&daplka_resource.daplka_rct_lock);
		DERR("resource_remove: invalid rnum %d\n", rnum);
		return (NULL);
	}

	if (blk->daplka_rcblk_blks[j] == NULL) {
		rw_exit(&daplka_resource.daplka_rct_lock);
		DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
		return (NULL);
	}
	p = blk->daplka_rcblk_blks[j];
	blk->daplka_rcblk_blks[j] = NULL;
	blk->daplka_rcblk_avail++;
	if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
		/*
		 * free this blk
		 */
		kmem_free(blk, sizeof (*blk));
		daplka_resource.daplka_rc_root[i] = NULL;
	}
	daplka_resource.daplka_rc_cnt--;
	rw_exit(&daplka_resource.daplka_rct_lock);

	if ((intptr_t)p == DAPLKA_RC_RESERVED) {
		return (NULL);
	} else {
		return (p);
	}
}

/*
 * inserts resource into the slot designated by rnum
 */
static int
daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
{
	int i, j, error = -1;
	daplka_resource_blk_t *blk;

	/*
	 * Find resource and lock it in WRITER mode
	 * search for available resource slot
	 */

	i = (int)(rnum / DAPLKA_RC_BLKSZ);
	j = (int)(rnum % DAPLKA_RC_BLKSZ);

	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
	if (i >= daplka_resource.daplka_rc_len) {
		rw_exit(&daplka_resource.daplka_rct_lock);
		DERR("resource_insert: resource %d not found\n", rnum);
		return (-1);
	}

	blk = daplka_resource.daplka_rc_root[i];
	if (blk != NULL) {
		ASSERT(i < daplka_resource.daplka_rc_len);
		ASSERT(i < daplka_resource.daplka_rc_sz);

		if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
			blk->daplka_rcblk_blks[j] = rp;
			error = 0;
		} else {
			DERR("resource_insert: %d not reserved, blk = %p\n",
			    rnum, blk->daplka_rcblk_blks[j]);
		}
	} else {
		DERR("resource_insert: resource %d not found\n", rnum);
	}
	rw_exit(&daplka_resource.daplka_rct_lock);
	return (error);
}

/*
 * finds resource using minor device number
 */
static daplka_resource_t *
daplka_resource_lookup(minor_t rnum)
{
	int i, j;
	daplka_resource_blk_t *blk;
	daplka_resource_t *rp;

	/*
	 * Find resource and lock it in READER mode
	 * search for available resource slot
	 */

	i = (int)(rnum / DAPLKA_RC_BLKSZ);
	j = (int)(rnum % DAPLKA_RC_BLKSZ);

	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
	if (i >= daplka_resource.daplka_rc_len) {
		rw_exit(&daplka_resource.daplka_rct_lock);
		DERR("resource_lookup: resource %d not found\n", rnum);
		return (NULL);
	}

	blk = daplka_resource.daplka_rc_root[i];
	if (blk != NULL) {
		ASSERT(i < daplka_resource.daplka_rc_len);
		ASSERT(i < daplka_resource.daplka_rc_sz);

		rp = blk->daplka_rcblk_blks[j];
		if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
			D3("resource_lookup: %d not found, blk = %p\n",
			    rnum, blk->daplka_rcblk_blks[j]);
		} else {
			DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
		}
	} else {
		DERR("resource_lookup: resource %d not found\n", rnum);
		rp = NULL;
	}
	rw_exit(&daplka_resource.daplka_rct_lock);
	return (rp);
}

/*
 * generic hash table implementation
 */

/*
 * daplka_hash_create:
 *	initializes a hash table with the specified parameters
 *
 * input:
 *	htblp			pointer to hash table
 *
 *	nbuckets		number of buckets (must be power of 2)
 *
 *	free_func		this function is called on each hash
 *				table element when daplka_hash_destroy
 *				is called
 *
 *	lookup_func		if daplka_hash_lookup is able to find
 *				the desired object, this function is
 *				applied on the object before
 *				daplka_hash_lookup returns
 * output:
 *	none
 *
 * return value(s):
 *	EINVAL			nbuckets is not a power of 2
 *	ENOMEM			cannot allocate buckets
 *	0			success
 */
static int
daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
	void (*free_func)(void *), void (*lookup_func)(void *))
{
	int i;

	if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
		DERR("hash_create: nbuckets not power of 2\n");
		return (EINVAL);
	}
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))

	htblp->ht_buckets =
	    kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
	    daplka_km_flags);
	if (htblp->ht_buckets == NULL) {
		DERR("hash_create: cannot allocate buckets\n");
		return (ENOMEM);
	}
	for (i = 0; i < nbuckets; i++) {
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
		htblp->ht_buckets[i].hb_count = 0;
		htblp->ht_buckets[i].hb_entries = NULL;
		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
	}
	rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
	mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);

	htblp->ht_count = 0;
	htblp->ht_next_hkey = (uint64_t)gethrtime();
	htblp->ht_nbuckets = nbuckets;
	htblp->ht_free_func = free_func;
	htblp->ht_lookup_func = lookup_func;
	htblp->ht_initialized = B_TRUE;
	D3("hash_create: done, buckets = %d\n", nbuckets);
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
	return (0);
}

/*
 * daplka_hash_insert:
 *	inserts an object into a hash table
 *
 * input:
 *	htblp			pointer to hash table
 *
 *	hkeyp			pointer to hash key.
 *				*hkeyp being non-zero means that the caller
 *				has generated its own hkey. if *hkeyp is zero,
 *				this function will generate an hkey for the
 *				caller. it is recommended that the caller
 *				leave the hkey generation to this function
 *				because the hkey is more likely to be evenly
 *				distributed.
 *
 *	objp			pointer to object to be inserted into
 *				hash table
 *
 * output:
 *	hkeyp			the generated hkey is returned via this pointer
 *
 * return value(s):
 *	EINVAL			invalid parameter
 *	ENOMEM			cannot allocate hash entry
 *	0			successful
 */
static int
daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
{
	daplka_hash_entry_t *hep, *curr_hep;
	daplka_hash_bucket_t *hbp;
	uint32_t bucket;
	uint64_t hkey;

	if (hkeyp == NULL) {
		DERR("hash_insert: hkeyp == NULL\n");
		return (EINVAL);
	}
	hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
	if (hep == NULL) {
		DERR("hash_insert: cannot alloc hash_entry\n");
		return (ENOMEM);
	}
	if (*hkeyp == 0) {
		/* generate a new key */
		mutex_enter(&htblp->ht_key_lock);
		hkey = ++htblp->ht_next_hkey;
		if (hkey == 0) {
			hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
		}
		mutex_exit(&htblp->ht_key_lock);
	} else {
		/* use user generated key */
		hkey = *hkeyp;
	}

	/* only works if ht_nbuckets is a power of 2 */
	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
	ASSERT(objp != NULL);
	ASSERT(bucket < htblp->ht_nbuckets);

	rw_enter(&htblp->ht_table_lock, RW_WRITER);
	hep->he_hkey = hkey;
	hep->he_objp = objp;

	/* look for duplicate entries */
	hbp = &htblp->ht_buckets[bucket];
	curr_hep = hbp->hb_entries;
	while (curr_hep != NULL) {
		if (curr_hep->he_hkey == hep->he_hkey) {
			break;
		}
		curr_hep = curr_hep->he_next;
	}
	if (curr_hep != NULL) {
		DERR("hash_insert: found duplicate hash entry: "
		    "bucket %d, hkey 0x%016llx\n",
		    bucket, (longlong_t)hep->he_hkey);
		kmem_free(hep, sizeof (*hep));
		rw_exit(&htblp->ht_table_lock);
		return (EINVAL);
	}
	hep->he_next = hbp->hb_entries;
	hbp->hb_entries = hep;
	hbp->hb_count++;
	htblp->ht_count++;
	rw_exit(&htblp->ht_table_lock);

	if (*hkeyp == 0) {
		*hkeyp = hkey;
		ASSERT(*hkeyp != 0);
	}
	D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
	    htblp, (longlong_t)*hkeyp, bucket);
	return (0);
}

/*
 * daplka_hash_remove:
 *	removes object identified by hkey from hash table
 *
 * input:
 *	htblp			pointer to hash table
 *
 *	hkey			hkey that identifies the object to be removed
 *
 * output:
 *	objpp			pointer to pointer to object.
 *				if remove is successful, the removed object
 *				will be returned via *objpp.
 *
 * return value(s):
 *	EINVAL			cannot find hash entry
 *	0			successful
 */
static int
daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
{
	daplka_hash_entry_t	*free_hep, **curr_hepp;
	daplka_hash_bucket_t	*hbp;
	uint32_t		bucket;

	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));

	rw_enter(&htblp->ht_table_lock, RW_WRITER);
	hbp = &htblp->ht_buckets[bucket];

	curr_hepp = &hbp->hb_entries;
	while (*curr_hepp != NULL) {
		if ((*curr_hepp)->he_hkey == hkey) {
			break;
		}
		curr_hepp = &(*curr_hepp)->he_next;
	}
	if (*curr_hepp == NULL) {
		DERR("hash_remove: cannot find hash entry: "
		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
		rw_exit(&htblp->ht_table_lock);
		return (EINVAL);
	} else {
		if (objpp != NULL) {
			*objpp = (*curr_hepp)->he_objp;
		}
		free_hep = *curr_hepp;
		*curr_hepp = (*curr_hepp)->he_next;
		kmem_free(free_hep, sizeof (*free_hep));
	}
	hbp->hb_count--;
	htblp->ht_count--;
	D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
	    "hb_count %d, hb_count %d\n",
	    (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
	rw_exit(&htblp->ht_table_lock);
	return (0);
}

/*
 * daplka_hash_walk:
 *	walks through the entire hash table. applying func on each of
 *	the inserted objects. stops walking if func returns non-zero.
 *
 * input:
 *	htblp			pointer to hash table
 *
 *	func			function to be applied on each object
 *
 *	farg			second argument to func
 *
 *	lockmode		can be RW_WRITER or RW_READER. this
 *				allows the caller to choose what type
 *				of lock to acquire before walking the
 *				table.
 *
 * output:
 *	none
 *
 * return value(s):
 *	none
 */
static void
daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
	void *farg, krw_t lockmode)
{
	daplka_hash_entry_t *curr_hep;
	daplka_hash_bucket_t *hbp;
	uint32_t bucket, retval = 0;

	ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);

	/* needed for warlock */
	if (lockmode == RW_WRITER) {
		rw_enter(&htblp->ht_table_lock, RW_WRITER);
	} else {
		rw_enter(&htblp->ht_table_lock, RW_READER);
	}
	for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
		hbp = &htblp->ht_buckets[bucket];
		curr_hep = hbp->hb_entries;
		while (curr_hep != NULL) {
			retval = (*func)(curr_hep->he_objp, farg);
			if (retval != 0) {
				break;
			}
			curr_hep = curr_hep->he_next;
		}
	}
	rw_exit(&htblp->ht_table_lock);
}

/*
 * daplka_hash_lookup:
 *	finds object from hkey
 *
 * input:
 *	htblp			pointer to hash table
 *
 *	hkey			hkey that identifies the object to be looked up
 *
 * output:
 *	none
 *
 * return value(s):
 *	NULL			if not found
 *	object pointer		if found
 */
static void *
daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
{
	daplka_hash_entry_t *curr_hep;
	uint32_t bucket;
	void *objp;

	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));

	rw_enter(&htblp->ht_table_lock, RW_READER);
	curr_hep = htblp->ht_buckets[bucket].hb_entries;
	while (curr_hep != NULL) {
		if (curr_hep->he_hkey == hkey) {
			break;
		}
		curr_hep = curr_hep->he_next;
	}
	if (curr_hep == NULL) {
		DERR("hash_lookup: cannot find hash entry: "
		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
		rw_exit(&htblp->ht_table_lock);
		return (NULL);
	}
	objp = curr_hep->he_objp;
	ASSERT(objp != NULL);
	if (htblp->ht_lookup_func != NULL) {
		(*htblp->ht_lookup_func)(objp);
	}
	rw_exit(&htblp->ht_table_lock);
	return (objp);
}

/*
 * daplka_hash_destroy:
 *	destroys hash table. applies free_func on all inserted objects.
 *
 * input:
 *	htblp			pointer to hash table
 *
 * output:
 *	none
 *
 * return value(s):
 *	none
 */
static void
daplka_hash_destroy(daplka_hash_table_t *htblp)
{
	daplka_hash_entry_t *curr_hep, *free_hep;
	daplka_hash_entry_t *free_list = NULL;
	daplka_hash_bucket_t *hbp;
	uint32_t bucket, cnt, total = 0;

	if (!htblp->ht_initialized) {
		DERR("hash_destroy: not initialized\n");
		return;
	}
	/* free all elements from hash table */
	rw_enter(&htblp->ht_table_lock, RW_WRITER);
	for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
		hbp = &htblp->ht_buckets[bucket];

		/* build list of elements to be freed */
		curr_hep = hbp->hb_entries;
		cnt = 0;
		while (curr_hep != NULL) {
			cnt++;
			free_hep = curr_hep;
			curr_hep = curr_hep->he_next;

			free_hep->he_next = free_list;
			free_list = free_hep;
		}
		ASSERT(cnt == hbp->hb_count);
		total += cnt;
		hbp->hb_count = 0;
		hbp->hb_entries = NULL;
	}
	ASSERT(total == htblp->ht_count);
	D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
	    htblp, htblp->ht_nbuckets, total);
	rw_exit(&htblp->ht_table_lock);

	/* free all objects, now without holding the hash table lock */
	cnt = 0;
	while (free_list != NULL) {
		cnt++;
		free_hep = free_list;
		free_list = free_list->he_next;
		if (htblp->ht_free_func != NULL) {
			(*htblp->ht_free_func)(free_hep->he_objp);
		}
		kmem_free(free_hep, sizeof (*free_hep));
	}
	ASSERT(total == cnt);

	/* free hash buckets and destroy locks */
	kmem_free(htblp->ht_buckets,
	    sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);

	rw_enter(&htblp->ht_table_lock, RW_WRITER);
	htblp->ht_buckets = NULL;
	htblp->ht_count = 0;
	htblp->ht_nbuckets = 0;
	htblp->ht_free_func = NULL;
	htblp->ht_lookup_func = NULL;
	htblp->ht_initialized = B_FALSE;
	rw_exit(&htblp->ht_table_lock);

	mutex_destroy(&htblp->ht_key_lock);
	rw_destroy(&htblp->ht_table_lock);
}

/*
 * daplka_hash_getsize:
 *	return the number of objects in hash table
 *
 * input:
 *	htblp			pointer to hash table
 *
 * output:
 *	none
 *
 * return value(s):
 *	number of objects in hash table
 */
static uint32_t
daplka_hash_getsize(daplka_hash_table_t *htblp)
{
	uint32_t sz;

	rw_enter(&htblp->ht_table_lock, RW_READER);
	sz = htblp->ht_count;
	rw_exit(&htblp->ht_table_lock);

	return (sz);
}

/*
 * this function is used as ht_lookup_func above when lookup is called.
 * other types of objs may use a more elaborate lookup_func.
 */
static void
daplka_hash_generic_lookup(void *obj)
{
	daplka_resource_t	*rp = (daplka_resource_t *)obj;

	mutex_enter(&rp->rs_reflock);
	rp->rs_refcnt++;
	ASSERT(rp->rs_refcnt != 0);
	mutex_exit(&rp->rs_reflock);
}

/*
 * Generates a non-zero 32 bit hash key used for the timer hash table.
 */
static uint32_t
daplka_timer_hkey_gen()
{
	uint32_t new_hkey;

	do {
		new_hkey = atomic_add_32_nv(&daplka_timer_hkey, 1);
	} while (new_hkey == 0);

	return (new_hkey);
}


/*
 * The DAPL KA debug logging routines
 */

/*
 * Add the string str to the end of the debug log, followed by a newline.
 */
static void
daplka_dbglog(char *str)
{
	size_t	length;
	size_t	remlen;

	/*
	 * If this is the first time we've written to the log, initialize it.
	 */
	if (!daplka_dbginit) {
		return;
	}
	mutex_enter(&daplka_dbglock);
	/*
	 * Note the log is circular; if this string would run over the end,
	 * we copy the first piece to the end and then the last piece to
	 * the beginning of the log.
	 */
	length = strlen(str);

	remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;

	if (length > remlen) {
		if (remlen)
			bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
		daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
		str += remlen;
		length -= remlen;
		daplka_dbgnext = 0;
	}
	bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
	daplka_dbgnext += length;

	if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
		daplka_dbgnext = 0;
	mutex_exit(&daplka_dbglock);
}


/*
 * Add a printf-style message to whichever debug logs we're currently using.
 */
static void
daplka_debug(const char *fmt, ...)
{
	char	buff[512];
	va_list	ap;
	/*
	 * The system prepends the thread id and high resolution time
	 * (nanoseconds are dropped and so are the upper digits)
	 * to the specified string.
	 * The unit for timestamp is 10 microseconds.
	 * It wraps around every 10000 seconds.
	 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
	 */
	int	micro_time = (int)((gethrtime() / 10000) % 1000000000);
	(void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);

	va_start(ap, fmt);
	(void) vsprintf(buff+strlen(buff), fmt, ap);
	va_end(ap);

	daplka_dbglog(buff);
}

static void
daplka_console(const char *fmt, ...)
{
	char buff[512];
	va_list ap;

	va_start(ap, fmt);
	(void) vsprintf(buff, fmt, ap);
	va_end(ap);

	cmn_err(CE_CONT, "%s", buff);
}