xref: /titanic_53/usr/src/uts/common/rpc/rpcib.c (revision 065714dcbd54a8548637c20c4291ec9fbb3b68d9)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50a701b1eSRobert Gordon  * Common Development and Distribution License (the "License").
60a701b1eSRobert Gordon  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22e11c3f44Smeem  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
260a701b1eSRobert Gordon /*
270a701b1eSRobert Gordon  * Copyright (c) 2007, The Ohio State University. All rights reserved.
280a701b1eSRobert Gordon  *
290a701b1eSRobert Gordon  * Portions of this source code is developed by the team members of
300a701b1eSRobert Gordon  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
310a701b1eSRobert Gordon  * headed by Professor Dhabaleswar K. (DK) Panda.
320a701b1eSRobert Gordon  *
330a701b1eSRobert Gordon  * Acknowledgements to contributions from developors:
340a701b1eSRobert Gordon  *   Ranjit Noronha: noronha@cse.ohio-state.edu
350a701b1eSRobert Gordon  *   Lei Chai      : chail@cse.ohio-state.edu
360a701b1eSRobert Gordon  *   Weikuan Yu    : yuw@cse.ohio-state.edu
370a701b1eSRobert Gordon  *
380a701b1eSRobert Gordon  */
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate /*
417c478bd9Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
427c478bd9Sstevel@tonic-gate  * interaction with IBTF.
437c478bd9Sstevel@tonic-gate  */
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #include <sys/param.h>
467c478bd9Sstevel@tonic-gate #include <sys/types.h>
477c478bd9Sstevel@tonic-gate #include <sys/user.h>
487c478bd9Sstevel@tonic-gate #include <sys/systm.h>
497c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
507c478bd9Sstevel@tonic-gate #include <sys/proc.h>
517c478bd9Sstevel@tonic-gate #include <sys/socket.h>
527c478bd9Sstevel@tonic-gate #include <sys/file.h>
537c478bd9Sstevel@tonic-gate #include <sys/stream.h>
547c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
557c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
567c478bd9Sstevel@tonic-gate #include <sys/errno.h>
577c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
587c478bd9Sstevel@tonic-gate #include <sys/debug.h>
597c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
607c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
617c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
627c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
637c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
647c478bd9Sstevel@tonic-gate #include <sys/time.h>
657c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
667c478bd9Sstevel@tonic-gate #include <sys/callb.h>
677c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
687c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
690a701b1eSRobert Gordon #include <sys/sdt.h>
707c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
717c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
727c478bd9Sstevel@tonic-gate #include <rpc/ib.h>
737c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
747c478bd9Sstevel@tonic-gate #include <sys/kstr.h>
757c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
767c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
777c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
787c478bd9Sstevel@tonic-gate #include <net/if.h>
79e11c3f44Smeem #include <net/if_types.h>
807c478bd9Sstevel@tonic-gate #include <sys/cred.h>
810a701b1eSRobert Gordon #include <rpc/rpc_rdma.h>
820a701b1eSRobert Gordon #include <nfs/nfs.h>
830a701b1eSRobert Gordon #include <sys/atomic.h>
840a701b1eSRobert Gordon 
85f837ee4aSSiddheshwar Mahesh #define	NFS_RDMA_PORT	20049
86f837ee4aSSiddheshwar Mahesh 
877c478bd9Sstevel@tonic-gate 
88e11c3f44Smeem /*
89214ae7d0SSiddheshwar Mahesh  * Convenience structures for connection management
90e11c3f44Smeem  */
91e11c3f44Smeem typedef struct rpcib_ipaddrs {
92e11c3f44Smeem 	void	*ri_list;	/* pointer to list of addresses */
93e11c3f44Smeem 	uint_t	ri_count;	/* number of addresses in list */
94e11c3f44Smeem 	uint_t	ri_size;	/* size of ri_list in bytes */
95e11c3f44Smeem } rpcib_ipaddrs_t;
967c478bd9Sstevel@tonic-gate 
97214ae7d0SSiddheshwar Mahesh 
98214ae7d0SSiddheshwar Mahesh typedef struct rpcib_ping {
99214ae7d0SSiddheshwar Mahesh 	rib_hca_t  *hca;
100214ae7d0SSiddheshwar Mahesh 	ibt_path_info_t path;
101214ae7d0SSiddheshwar Mahesh 	ibt_ip_addr_t srcip;
102214ae7d0SSiddheshwar Mahesh 	ibt_ip_addr_t dstip;
103214ae7d0SSiddheshwar Mahesh } rpcib_ping_t;
104214ae7d0SSiddheshwar Mahesh 
1057c478bd9Sstevel@tonic-gate /*
1067c478bd9Sstevel@tonic-gate  * Prototype declarations for driver ops
1077c478bd9Sstevel@tonic-gate  */
1087c478bd9Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
1097c478bd9Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
1107c478bd9Sstevel@tonic-gate 				void *, void **);
1117c478bd9Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
112e11c3f44Smeem static boolean_t rpcib_rdma_capable_interface(struct lifreq *);
113e11c3f44Smeem static int	rpcib_do_ip_ioctl(int, int, void *);
114e11c3f44Smeem static boolean_t rpcib_get_ib_addresses(rpcib_ipaddrs_t *, rpcib_ipaddrs_t *);
1150a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *, int);
1160a701b1eSRobert Gordon static void rib_force_cleanup(void *);
1177c478bd9Sstevel@tonic-gate 
1180a701b1eSRobert Gordon struct {
1190a701b1eSRobert Gordon 	kstat_named_t cache_limit;
1200a701b1eSRobert Gordon 	kstat_named_t cache_allocation;
1210a701b1eSRobert Gordon 	kstat_named_t cache_hits;
1220a701b1eSRobert Gordon 	kstat_named_t cache_misses;
1230a701b1eSRobert Gordon 	kstat_named_t cache_misses_above_the_limit;
1240a701b1eSRobert Gordon } rpcib_kstat = {
1250a701b1eSRobert Gordon 	{"cache_limit",			KSTAT_DATA_UINT64 },
1260a701b1eSRobert Gordon 	{"cache_allocation",		KSTAT_DATA_UINT64 },
1270a701b1eSRobert Gordon 	{"cache_hits",			KSTAT_DATA_UINT64 },
1280a701b1eSRobert Gordon 	{"cache_misses",		KSTAT_DATA_UINT64 },
1290a701b1eSRobert Gordon 	{"cache_misses_above_the_limit", KSTAT_DATA_UINT64 },
1300a701b1eSRobert Gordon };
1317c478bd9Sstevel@tonic-gate 
1327c478bd9Sstevel@tonic-gate /* rpcib cb_ops */
1337c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
1347c478bd9Sstevel@tonic-gate 	nulldev,		/* open */
1357c478bd9Sstevel@tonic-gate 	nulldev,		/* close */
1367c478bd9Sstevel@tonic-gate 	nodev,			/* strategy */
1377c478bd9Sstevel@tonic-gate 	nodev,			/* print */
1387c478bd9Sstevel@tonic-gate 	nodev,			/* dump */
1397c478bd9Sstevel@tonic-gate 	nodev,			/* read */
1407c478bd9Sstevel@tonic-gate 	nodev,			/* write */
1417c478bd9Sstevel@tonic-gate 	nodev,			/* ioctl */
1427c478bd9Sstevel@tonic-gate 	nodev,			/* devmap */
1437c478bd9Sstevel@tonic-gate 	nodev,			/* mmap */
1447c478bd9Sstevel@tonic-gate 	nodev,			/* segmap */
1457c478bd9Sstevel@tonic-gate 	nochpoll,		/* poll */
1467c478bd9Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
1477c478bd9Sstevel@tonic-gate 	NULL,			/* stream */
1487c478bd9Sstevel@tonic-gate 	D_MP,			/* cb_flag */
1497c478bd9Sstevel@tonic-gate 	CB_REV,			/* rev */
1507c478bd9Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
1517c478bd9Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
1527c478bd9Sstevel@tonic-gate };
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate /*
1557c478bd9Sstevel@tonic-gate  * Device options
1567c478bd9Sstevel@tonic-gate  */
1577c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
1587c478bd9Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
1597c478bd9Sstevel@tonic-gate 	0,			/* refcnt  */
1607c478bd9Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
1617c478bd9Sstevel@tonic-gate 	nulldev,		/* identify */
1627c478bd9Sstevel@tonic-gate 	nulldev,		/* probe */
1637c478bd9Sstevel@tonic-gate 	rpcib_attach,		/* attach */
1647c478bd9Sstevel@tonic-gate 	rpcib_detach,		/* detach */
1657c478bd9Sstevel@tonic-gate 	nodev,			/* reset */
1667c478bd9Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
1677c478bd9Sstevel@tonic-gate 	NULL,			/* bus operations */
16819397407SSherry Moore 	NULL,			/* power */
16919397407SSherry Moore 	ddi_quiesce_not_needed,		/* quiesce */
1707c478bd9Sstevel@tonic-gate };
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate  * Module linkage information.
1747c478bd9Sstevel@tonic-gate  */
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = {
1777c478bd9Sstevel@tonic-gate 	&mod_driverops,		/* Driver module */
17819397407SSherry Moore 	"RPCIB plugin driver",	/* Driver name and version */
1797c478bd9Sstevel@tonic-gate 	&rpcib_ops,		/* Driver ops */
1807c478bd9Sstevel@tonic-gate };
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
1837c478bd9Sstevel@tonic-gate 	MODREV_1,
1847c478bd9Sstevel@tonic-gate 	(void *)&rib_modldrv,
1857c478bd9Sstevel@tonic-gate 	NULL
1867c478bd9Sstevel@tonic-gate };
1877c478bd9Sstevel@tonic-gate 
1880a701b1eSRobert Gordon typedef struct rib_lrc_entry {
1890a701b1eSRobert Gordon 	struct rib_lrc_entry *forw;
1900a701b1eSRobert Gordon 	struct rib_lrc_entry *back;
1910a701b1eSRobert Gordon 	char *lrc_buf;
1920a701b1eSRobert Gordon 
1930a701b1eSRobert Gordon 	uint32_t lrc_len;
1940a701b1eSRobert Gordon 	void  *avl_node;
1950a701b1eSRobert Gordon 	bool_t registered;
1960a701b1eSRobert Gordon 
1970a701b1eSRobert Gordon 	struct mrc lrc_mhandle;
1980a701b1eSRobert Gordon 	bool_t lrc_on_freed_list;
1990a701b1eSRobert Gordon } rib_lrc_entry_t;
2000a701b1eSRobert Gordon 
2010a701b1eSRobert Gordon typedef	struct cache_struct	{
2020a701b1eSRobert Gordon 	rib_lrc_entry_t		r;
2030a701b1eSRobert Gordon 	uint32_t		len;
2040a701b1eSRobert Gordon 	uint32_t		elements;
2050a701b1eSRobert Gordon 	kmutex_t		node_lock;
2060a701b1eSRobert Gordon 	avl_node_t		avl_link;
2070a701b1eSRobert Gordon } cache_avl_struct_t;
2080a701b1eSRobert Gordon 
2090a701b1eSRobert Gordon static uint64_t	rib_total_buffers = 0;
2100a701b1eSRobert Gordon uint64_t	cache_limit = 100 * 1024 * 1024;
2110a701b1eSRobert Gordon static volatile uint64_t	cache_allocation = 0;
2120a701b1eSRobert Gordon static uint64_t	cache_watermark = 80 * 1024 * 1024;
2130a701b1eSRobert Gordon static uint64_t	cache_hits = 0;
2140a701b1eSRobert Gordon static uint64_t	cache_misses = 0;
2150a701b1eSRobert Gordon static uint64_t	cache_cold_misses = 0;
2160a701b1eSRobert Gordon static uint64_t	cache_hot_misses = 0;
2170a701b1eSRobert Gordon static uint64_t	cache_misses_above_the_limit = 0;
2180a701b1eSRobert Gordon static bool_t	stats_enabled = FALSE;
2190a701b1eSRobert Gordon 
2200a701b1eSRobert Gordon static uint64_t max_unsignaled_rws = 5;
221f837ee4aSSiddheshwar Mahesh int nfs_rdma_port = NFS_RDMA_PORT;
2220a701b1eSRobert Gordon 
2237c478bd9Sstevel@tonic-gate /*
2247c478bd9Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
2257c478bd9Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
2267c478bd9Sstevel@tonic-gate  *	in all callbacks.
2277c478bd9Sstevel@tonic-gate  */
2287c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
2297c478bd9Sstevel@tonic-gate 
2300a701b1eSRobert Gordon #define	RNR_RETRIES	IBT_RNR_RETRY_1
2317c478bd9Sstevel@tonic-gate #define	MAX_PORTS	2
232*065714dcSSiddheshwar Mahesh #define	RDMA_DUMMY_WRID	0x4D3A1D4D3A1D
233*065714dcSSiddheshwar Mahesh #define	RDMA_CONN_REAP_RETRY	10	/* 10 secs */
2347c478bd9Sstevel@tonic-gate 
2350a701b1eSRobert Gordon int preposted_rbufs = RDMA_BUFS_GRANT;
2367c478bd9Sstevel@tonic-gate int send_threshold = 1;
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate /*
239*065714dcSSiddheshwar Mahesh  * Old cards with Tavor driver have limited memory footprint
240*065714dcSSiddheshwar Mahesh  * when booted in 32bit. The rib_max_rbufs tunable can be
241*065714dcSSiddheshwar Mahesh  * tuned for more buffers if needed.
242*065714dcSSiddheshwar Mahesh  */
243*065714dcSSiddheshwar Mahesh 
244*065714dcSSiddheshwar Mahesh #if !defined(_ELF64) && !defined(__sparc)
245*065714dcSSiddheshwar Mahesh int rib_max_rbufs = MAX_BUFS;
246*065714dcSSiddheshwar Mahesh #else
247*065714dcSSiddheshwar Mahesh int rib_max_rbufs = 10 * MAX_BUFS;
248*065714dcSSiddheshwar Mahesh #endif	/* !(_ELF64) && !(__sparc) */
249*065714dcSSiddheshwar Mahesh 
250*065714dcSSiddheshwar Mahesh int rib_conn_timeout = 60 * 12;		/* 12 minutes */
251*065714dcSSiddheshwar Mahesh 
252*065714dcSSiddheshwar Mahesh /*
2537c478bd9Sstevel@tonic-gate  * State of the plugin.
2547c478bd9Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
2557c478bd9Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
2567c478bd9Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
2577c478bd9Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
2587c478bd9Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
2597c478bd9Sstevel@tonic-gate  * state for one and in no_accept state for the other.
2607c478bd9Sstevel@tonic-gate  */
2617c478bd9Sstevel@tonic-gate int		plugin_state;
2627c478bd9Sstevel@tonic-gate kmutex_t	plugin_state_lock;
2637c478bd9Sstevel@tonic-gate 
2640a701b1eSRobert Gordon ldi_ident_t rpcib_li;
2657c478bd9Sstevel@tonic-gate 
2667c478bd9Sstevel@tonic-gate /*
2677c478bd9Sstevel@tonic-gate  * RPCIB RDMATF operations
2687c478bd9Sstevel@tonic-gate  */
2697c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
2707c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
2717c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
2727c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
2730a701b1eSRobert Gordon static rdma_stat rib_registermem(CONN *conn, caddr_t  adsp, caddr_t buf,
2740a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle);
2757c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
2767c478bd9Sstevel@tonic-gate 	struct mrc buf_handle);
2770a701b1eSRobert Gordon static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
2780a701b1eSRobert Gordon 		caddr_t buf, uint_t buflen, struct mrc *buf_handle);
2790a701b1eSRobert Gordon static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
2800a701b1eSRobert Gordon 		struct mrc buf_handle);
2810a701b1eSRobert Gordon static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf,
2820a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle,
2830a701b1eSRobert Gordon 	void *lrc);
2847c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
2850a701b1eSRobert Gordon 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
2867c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
2877c478bd9Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
2927c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
2977c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2987c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2990a701b1eSRobert Gordon static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid);
3007c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
3017c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
3027c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
3037c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
304214ae7d0SSiddheshwar Mahesh static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rpcib_ping_t *);
3057c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
3067c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
3077c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
3080a701b1eSRobert Gordon 
3090a701b1eSRobert Gordon static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len);
3100a701b1eSRobert Gordon static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
3110a701b1eSRobert Gordon static void rib_destroy_cache(rib_hca_t *hca);
3120a701b1eSRobert Gordon static	void	rib_server_side_cache_reclaim(void *argp);
3130a701b1eSRobert Gordon static int avl_compare(const void *t1, const void *t2);
3140a701b1eSRobert Gordon 
3157c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
3160a701b1eSRobert Gordon static void rib_close_channels(rib_conn_list_t *);
317*065714dcSSiddheshwar Mahesh static void rib_conn_close(void *);
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate /*
3207c478bd9Sstevel@tonic-gate  * RPCIB addressing operations
3217c478bd9Sstevel@tonic-gate  */
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate /*
3247c478bd9Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
3257c478bd9Sstevel@tonic-gate  */
3267c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = {
3277c478bd9Sstevel@tonic-gate 	rib_reachable,
3287c478bd9Sstevel@tonic-gate 	rib_conn_get,
3297c478bd9Sstevel@tonic-gate 	rib_conn_release,
3307c478bd9Sstevel@tonic-gate 	rib_listen,
3317c478bd9Sstevel@tonic-gate 	rib_listen_stop,
3327c478bd9Sstevel@tonic-gate 	rib_registermem,
3337c478bd9Sstevel@tonic-gate 	rib_deregistermem,
3347c478bd9Sstevel@tonic-gate 	rib_registermemsync,
3357c478bd9Sstevel@tonic-gate 	rib_deregistermemsync,
3367c478bd9Sstevel@tonic-gate 	rib_syncmem,
3377c478bd9Sstevel@tonic-gate 	rib_reg_buf_alloc,
3387c478bd9Sstevel@tonic-gate 	rib_reg_buf_free,
3397c478bd9Sstevel@tonic-gate 	rib_send,
3407c478bd9Sstevel@tonic-gate 	rib_send_resp,
3417c478bd9Sstevel@tonic-gate 	rib_post_resp,
3420a701b1eSRobert Gordon 	rib_post_resp_remove,
3437c478bd9Sstevel@tonic-gate 	rib_post_recv,
3447c478bd9Sstevel@tonic-gate 	rib_recv,
3457c478bd9Sstevel@tonic-gate 	rib_read,
3467c478bd9Sstevel@tonic-gate 	rib_write,
3470a701b1eSRobert Gordon 	rib_getinfo,
3487c478bd9Sstevel@tonic-gate };
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate /*
3517c478bd9Sstevel@tonic-gate  * RDMATF RPCIB plugin details
3527c478bd9Sstevel@tonic-gate  */
3537c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = {
3547c478bd9Sstevel@tonic-gate 	"ibtf",		/* api name */
3557c478bd9Sstevel@tonic-gate 	RDMATF_VERS_1,
3567c478bd9Sstevel@tonic-gate 	0,
3577c478bd9Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
3587c478bd9Sstevel@tonic-gate };
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *);
3617c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
3627c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
3637c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
3647c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
3657c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
3667c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
3670a701b1eSRobert Gordon static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t,
3680a701b1eSRobert Gordon 	ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *);
3690a701b1eSRobert Gordon static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
3700a701b1eSRobert Gordon 	ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
371214ae7d0SSiddheshwar Mahesh static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, rpcib_ping_t *);
3727c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
3737c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3747c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
3757c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3767c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
3777c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
3787c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
3797c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
3807c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
3817c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
3827c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate static void rib_async_handler(void *,
3857c478bd9Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
3867c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
3877c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
3887c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
3897c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
3907c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
3917c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
3927c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *);
393*065714dcSSiddheshwar Mahesh static void rib_close_a_channel(CONN *);
394*065714dcSSiddheshwar Mahesh static void rib_send_hold(rib_qp_t *);
395*065714dcSSiddheshwar Mahesh static void rib_send_rele(rib_qp_t *);
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate /*
3987c478bd9Sstevel@tonic-gate  * Registration with IBTF as a consumer
3997c478bd9Sstevel@tonic-gate  */
4007c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
40103494a98SBill Taylor 	IBTI_V_CURR,
4027c478bd9Sstevel@tonic-gate 	IBT_GENERIC,
4037c478bd9Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
4047c478bd9Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
4057c478bd9Sstevel@tonic-gate 	"nfs/ib"
4067c478bd9Sstevel@tonic-gate };
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate /*
4097c478bd9Sstevel@tonic-gate  * Global strucuture
4107c478bd9Sstevel@tonic-gate  */
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate typedef struct rpcib_s {
4137c478bd9Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
4147c478bd9Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
4157c478bd9Sstevel@tonic-gate } rpcib_t;
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate rpcib_t rpcib;
4187c478bd9Sstevel@tonic-gate 
4197c478bd9Sstevel@tonic-gate /*
4207c478bd9Sstevel@tonic-gate  * /etc/system controlled variable to control
4217c478bd9Sstevel@tonic-gate  * debugging in rpcib kernel module.
4227c478bd9Sstevel@tonic-gate  * Set it to values greater that 1 to control
4237c478bd9Sstevel@tonic-gate  * the amount of debugging messages required.
4247c478bd9Sstevel@tonic-gate  */
4257c478bd9Sstevel@tonic-gate int rib_debug = 0;
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate int
4287c478bd9Sstevel@tonic-gate _init(void)
4297c478bd9Sstevel@tonic-gate {
4307c478bd9Sstevel@tonic-gate 	int error;
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
4337c478bd9Sstevel@tonic-gate 	if (error != 0) {
4347c478bd9Sstevel@tonic-gate 		/*
4357c478bd9Sstevel@tonic-gate 		 * Could not load module
4367c478bd9Sstevel@tonic-gate 		 */
4377c478bd9Sstevel@tonic-gate 		return (error);
4387c478bd9Sstevel@tonic-gate 	}
4397c478bd9Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
4407c478bd9Sstevel@tonic-gate 	return (0);
4417c478bd9Sstevel@tonic-gate }
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate int
4447c478bd9Sstevel@tonic-gate _fini()
4457c478bd9Sstevel@tonic-gate {
4467c478bd9Sstevel@tonic-gate 	int status;
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 	/*
4497c478bd9Sstevel@tonic-gate 	 * Remove module
4507c478bd9Sstevel@tonic-gate 	 */
4517c478bd9Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
4527c478bd9Sstevel@tonic-gate 		return (status);
4537c478bd9Sstevel@tonic-gate 	}
4547c478bd9Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
4557c478bd9Sstevel@tonic-gate 	return (0);
4567c478bd9Sstevel@tonic-gate }
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate int
4597c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
4607c478bd9Sstevel@tonic-gate {
4617c478bd9Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
4627c478bd9Sstevel@tonic-gate }
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate /*
4657c478bd9Sstevel@tonic-gate  * rpcib_getinfo()
4667c478bd9Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
4677c478bd9Sstevel@tonic-gate  * instance number.
4687c478bd9Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
4697c478bd9Sstevel@tonic-gate  */
4707c478bd9Sstevel@tonic-gate 
4717c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4727c478bd9Sstevel@tonic-gate static int
4737c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 	switch (cmd) {
4787c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
4797c478bd9Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
4807c478bd9Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
4817c478bd9Sstevel@tonic-gate 		else {
4827c478bd9Sstevel@tonic-gate 			*result = NULL;
4837c478bd9Sstevel@tonic-gate 			ret = DDI_FAILURE;
4847c478bd9Sstevel@tonic-gate 		}
4857c478bd9Sstevel@tonic-gate 		break;
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
4887c478bd9Sstevel@tonic-gate 		*result = NULL;
4897c478bd9Sstevel@tonic-gate 		break;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	default:
4927c478bd9Sstevel@tonic-gate 		ret = DDI_FAILURE;
4937c478bd9Sstevel@tonic-gate 	}
4947c478bd9Sstevel@tonic-gate 	return (ret);
4957c478bd9Sstevel@tonic-gate }
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate static int
4987c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4997c478bd9Sstevel@tonic-gate {
5007c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
5017c478bd9Sstevel@tonic-gate 	rdma_stat	r_status;
5027c478bd9Sstevel@tonic-gate 
5037c478bd9Sstevel@tonic-gate 	switch (cmd) {
5047c478bd9Sstevel@tonic-gate 	case DDI_ATTACH:
5057c478bd9Sstevel@tonic-gate 		break;
5067c478bd9Sstevel@tonic-gate 	case DDI_RESUME:
5077c478bd9Sstevel@tonic-gate 		return (DDI_SUCCESS);
5087c478bd9Sstevel@tonic-gate 	default:
5097c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5107c478bd9Sstevel@tonic-gate 	}
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
5157c478bd9Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
5167c478bd9Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
5177c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5187c478bd9Sstevel@tonic-gate 	}
5197c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
5207c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
5217c478bd9Sstevel@tonic-gate 	/*
5227c478bd9Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
5237c478bd9Sstevel@tonic-gate 	 */
5247c478bd9Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
5257c478bd9Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
5267c478bd9Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
5277c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5287c478bd9Sstevel@tonic-gate 	}
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate 	if (rib_stat == NULL) {
5317c478bd9Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
5327c478bd9Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
5337c478bd9Sstevel@tonic-gate 	}
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate 	rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids);
5367c478bd9Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
5377c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5387c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5397c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5407c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5417c478bd9Sstevel@tonic-gate 	}
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
5447c478bd9Sstevel@tonic-gate 	    (void *)rib_stat, &rib_stat->ibt_clnt_hdl);
5450a701b1eSRobert Gordon 
5467c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
5477c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5487c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5497c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5507c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5517c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5527c478bd9Sstevel@tonic-gate 	}
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
5557c478bd9Sstevel@tonic-gate 	if (open_hcas(rib_stat) != RDMA_SUCCESS) {
5567c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
55751f34d4bSRajkumar Sivaprakasam 		goto open_fail;
5587c478bd9Sstevel@tonic-gate 	}
5597c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
5607c478bd9Sstevel@tonic-gate 
56151f34d4bSRajkumar Sivaprakasam 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) !=
56251f34d4bSRajkumar Sivaprakasam 	    DDI_PROP_SUCCESS) {
56351f34d4bSRajkumar Sivaprakasam 		cmn_err(CE_WARN, "rpcib_attach: ddi-no-autodetach prop update "
56451f34d4bSRajkumar Sivaprakasam 		    "failed.");
56551f34d4bSRajkumar Sivaprakasam 		goto register_fail;
56651f34d4bSRajkumar Sivaprakasam 	}
56751f34d4bSRajkumar Sivaprakasam 
5687c478bd9Sstevel@tonic-gate 	/*
5697c478bd9Sstevel@tonic-gate 	 * Register with rdmatf
5707c478bd9Sstevel@tonic-gate 	 */
57151f34d4bSRajkumar Sivaprakasam 	rib_mod.rdma_count = rib_stat->nhca_inited;
5727c478bd9Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
5737c478bd9Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
57451f34d4bSRajkumar Sivaprakasam 		cmn_err(CE_WARN, "rpcib_attach:rdma_register_mod failed, "
57551f34d4bSRajkumar Sivaprakasam 		    "status = %d", r_status);
57651f34d4bSRajkumar Sivaprakasam 		goto register_fail;
57751f34d4bSRajkumar Sivaprakasam 	}
57851f34d4bSRajkumar Sivaprakasam 
57951f34d4bSRajkumar Sivaprakasam 	return (DDI_SUCCESS);
58051f34d4bSRajkumar Sivaprakasam 
58151f34d4bSRajkumar Sivaprakasam register_fail:
5827c478bd9Sstevel@tonic-gate 	rib_detach_hca(rib_stat->hca);
58351f34d4bSRajkumar Sivaprakasam open_fail:
5847c478bd9Sstevel@tonic-gate 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5857c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
5867c478bd9Sstevel@tonic-gate 	mutex_destroy(&rib_stat->open_hca_lock);
5877c478bd9Sstevel@tonic-gate 	kmem_free(rib_stat, sizeof (*rib_stat));
5887c478bd9Sstevel@tonic-gate 	rib_stat = NULL;
5897c478bd9Sstevel@tonic-gate 	return (DDI_FAILURE);
5907c478bd9Sstevel@tonic-gate }
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5937c478bd9Sstevel@tonic-gate static int
5947c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5957c478bd9Sstevel@tonic-gate {
5967c478bd9Sstevel@tonic-gate 	switch (cmd) {
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	case DDI_DETACH:
5997c478bd9Sstevel@tonic-gate 		break;
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 	case DDI_SUSPEND:
6027c478bd9Sstevel@tonic-gate 	default:
6037c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
6047c478bd9Sstevel@tonic-gate 	}
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 	/*
6077c478bd9Sstevel@tonic-gate 	 * Detach the hca and free resources
6087c478bd9Sstevel@tonic-gate 	 */
6097c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
6107c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
6117c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
6127c478bd9Sstevel@tonic-gate 	rib_detach_hca(rib_stat->hca);
6137c478bd9Sstevel@tonic-gate 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
6147c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
61551f34d4bSRajkumar Sivaprakasam 	mutex_destroy(&rib_stat->open_hca_lock);
61651f34d4bSRajkumar Sivaprakasam 	if (rib_stat->hcas) {
61751f34d4bSRajkumar Sivaprakasam 		kmem_free(rib_stat->hcas, rib_stat->hca_count *
61851f34d4bSRajkumar Sivaprakasam 		    sizeof (rib_hca_t));
61951f34d4bSRajkumar Sivaprakasam 		rib_stat->hcas = NULL;
62051f34d4bSRajkumar Sivaprakasam 	}
62151f34d4bSRajkumar Sivaprakasam 	kmem_free(rib_stat, sizeof (*rib_stat));
62251f34d4bSRajkumar Sivaprakasam 	rib_stat = NULL;
6237c478bd9Sstevel@tonic-gate 
6247c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
6257c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
6267c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
6277c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
6287c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
6297c478bd9Sstevel@tonic-gate }
6307c478bd9Sstevel@tonic-gate 
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
6337c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
6347c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
6357c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
6367c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
6377c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
6387c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
6397c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
6407c478bd9Sstevel@tonic-gate 
6410a701b1eSRobert Gordon 
6427c478bd9Sstevel@tonic-gate /*
6437c478bd9Sstevel@tonic-gate  * One CQ pair per HCA
6447c478bd9Sstevel@tonic-gate  */
6457c478bd9Sstevel@tonic-gate static rdma_stat
6467c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
6477c478bd9Sstevel@tonic-gate 	rib_cq_t **cqp, rpcib_state_t *ribstat)
6487c478bd9Sstevel@tonic-gate {
6497c478bd9Sstevel@tonic-gate 	rib_cq_t	*cq;
6507c478bd9Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
6517c478bd9Sstevel@tonic-gate 	uint32_t	real_size;
6527c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
6537c478bd9Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
6567c478bd9Sstevel@tonic-gate 	cq->rib_hca = hca;
6577c478bd9Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
6587c478bd9Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
6597c478bd9Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
6607c478bd9Sstevel@tonic-gate 	    &real_size);
6617c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
6627c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
6637c478bd9Sstevel@tonic-gate 		    " status=%d", status);
6647c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
6657c478bd9Sstevel@tonic-gate 		goto fail;
6667c478bd9Sstevel@tonic-gate 	}
6677c478bd9Sstevel@tonic-gate 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat);
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	/*
6707c478bd9Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
6717c478bd9Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
6727c478bd9Sstevel@tonic-gate 	 * after each callback to get another one).
6737c478bd9Sstevel@tonic-gate 	 */
6747c478bd9Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
6757c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
6767c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
6777c478bd9Sstevel@tonic-gate 		    "enable_cq_notify failed, status %d", status);
6787c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
6797c478bd9Sstevel@tonic-gate 		goto fail;
6807c478bd9Sstevel@tonic-gate 	}
6817c478bd9Sstevel@tonic-gate 	*cqp = cq;
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate 	return (error);
6847c478bd9Sstevel@tonic-gate fail:
6857c478bd9Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
6867c478bd9Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
6877c478bd9Sstevel@tonic-gate 	if (cq)
6887c478bd9Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
6897c478bd9Sstevel@tonic-gate 	return (error);
6907c478bd9Sstevel@tonic-gate }
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate static rdma_stat
6937c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat)
6947c478bd9Sstevel@tonic-gate {
6957c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
6967c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
6977c478bd9Sstevel@tonic-gate 	rdma_stat		status;
6987c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
6997c478bd9Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
7007c478bd9Sstevel@tonic-gate 	uint_t			size, cq_size;
7017c478bd9Sstevel@tonic-gate 	int			i;
7020a701b1eSRobert Gordon 	kstat_t *ksp;
7030a701b1eSRobert Gordon 	cache_avl_struct_t example_avl_node;
7040a701b1eSRobert Gordon 	char rssc_name[32];
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
7070a701b1eSRobert Gordon 
7087c478bd9Sstevel@tonic-gate 	if (ribstat->hcas == NULL)
7097c478bd9Sstevel@tonic-gate 		ribstat->hcas = kmem_zalloc(ribstat->hca_count *
7107c478bd9Sstevel@tonic-gate 		    sizeof (rib_hca_t), KM_SLEEP);
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 	/*
7137c478bd9Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
7147c478bd9Sstevel@tonic-gate 	 */
7157c478bd9Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
7167c478bd9Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
7177c478bd9Sstevel@tonic-gate 		    ribstat->hca_guids[i],
7187c478bd9Sstevel@tonic-gate 		    &ribstat->hcas[i].hca_hdl);
7197c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7207c478bd9Sstevel@tonic-gate 			continue;
7217c478bd9Sstevel@tonic-gate 		}
7227c478bd9Sstevel@tonic-gate 		ribstat->hcas[i].hca_guid = ribstat->hca_guids[i];
7237c478bd9Sstevel@tonic-gate 		hca = &(ribstat->hcas[i]);
7247c478bd9Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
7257c478bd9Sstevel@tonic-gate 		hca->state = HCA_INITED;
7267c478bd9Sstevel@tonic-gate 
7277c478bd9Sstevel@tonic-gate 		/*
7287c478bd9Sstevel@tonic-gate 		 * query HCA info
7297c478bd9Sstevel@tonic-gate 		 */
7307c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
7317c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7327c478bd9Sstevel@tonic-gate 			goto fail1;
7337c478bd9Sstevel@tonic-gate 		}
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 		/*
7367c478bd9Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
7377c478bd9Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
7387c478bd9Sstevel@tonic-gate 		 * only when it's in the same PD as that of
7397c478bd9Sstevel@tonic-gate 		 * the memory region.
7407c478bd9Sstevel@tonic-gate 		 */
7417c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
7427c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7437c478bd9Sstevel@tonic-gate 			goto fail1;
7447c478bd9Sstevel@tonic-gate 		}
7457c478bd9Sstevel@tonic-gate 
7467c478bd9Sstevel@tonic-gate 		/*
7477c478bd9Sstevel@tonic-gate 		 * query HCA ports
7487c478bd9Sstevel@tonic-gate 		 */
7497c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
7507c478bd9Sstevel@tonic-gate 		    0, &pinfop, &hca->hca_nports, &size);
7517c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7527c478bd9Sstevel@tonic-gate 			goto fail2;
7537c478bd9Sstevel@tonic-gate 		}
7547c478bd9Sstevel@tonic-gate 		hca->hca_ports = pinfop;
7557c478bd9Sstevel@tonic-gate 		hca->hca_pinfosz = size;
7567c478bd9Sstevel@tonic-gate 		pinfop = NULL;
7577c478bd9Sstevel@tonic-gate 
7587c478bd9Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
7597c478bd9Sstevel@tonic-gate 		/*
7607c478bd9Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
7617c478bd9Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
7627c478bd9Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
7637c478bd9Sstevel@tonic-gate 		 * cq's will be needed.
7647c478bd9Sstevel@tonic-gate 		 */
7657c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
7667c478bd9Sstevel@tonic-gate 		    &hca->svc_rcq, ribstat);
7677c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7687c478bd9Sstevel@tonic-gate 			goto fail3;
7697c478bd9Sstevel@tonic-gate 		}
7707c478bd9Sstevel@tonic-gate 
7717c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
7727c478bd9Sstevel@tonic-gate 		    &hca->svc_scq, ribstat);
7737c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7747c478bd9Sstevel@tonic-gate 			goto fail3;
7757c478bd9Sstevel@tonic-gate 		}
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
7787c478bd9Sstevel@tonic-gate 		    &hca->clnt_rcq, ribstat);
7797c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7807c478bd9Sstevel@tonic-gate 			goto fail3;
7817c478bd9Sstevel@tonic-gate 		}
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
7847c478bd9Sstevel@tonic-gate 		    &hca->clnt_scq, ribstat);
7857c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7867c478bd9Sstevel@tonic-gate 			goto fail3;
7877c478bd9Sstevel@tonic-gate 		}
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate 		/*
7907c478bd9Sstevel@tonic-gate 		 * Create buffer pools.
7917c478bd9Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
7927c478bd9Sstevel@tonic-gate 		 */
7937c478bd9Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
794*065714dcSSiddheshwar Mahesh 		    RECV_BUFFER, rib_max_rbufs);
7957c478bd9Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
7967c478bd9Sstevel@tonic-gate 			goto fail3;
7977c478bd9Sstevel@tonic-gate 		}
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
800*065714dcSSiddheshwar Mahesh 		    SEND_BUFFER, rib_max_rbufs);
8017c478bd9Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
8027c478bd9Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
8037c478bd9Sstevel@tonic-gate 			goto fail3;
8047c478bd9Sstevel@tonic-gate 		}
8057c478bd9Sstevel@tonic-gate 
8060a701b1eSRobert Gordon 		if (hca->server_side_cache == NULL) {
8070a701b1eSRobert Gordon 			(void) sprintf(rssc_name,
8080a701b1eSRobert Gordon 			    "rib_server_side_cache_%04d", i);
8090a701b1eSRobert Gordon 			hca->server_side_cache = kmem_cache_create(
8100a701b1eSRobert Gordon 			    rssc_name,
8110a701b1eSRobert Gordon 			    sizeof (cache_avl_struct_t), 0,
8120a701b1eSRobert Gordon 			    NULL,
8130a701b1eSRobert Gordon 			    NULL,
8140a701b1eSRobert Gordon 			    rib_server_side_cache_reclaim,
8150a701b1eSRobert Gordon 			    hca, NULL, 0);
8160a701b1eSRobert Gordon 		}
8170a701b1eSRobert Gordon 
8180a701b1eSRobert Gordon 		avl_create(&hca->avl_tree,
8190a701b1eSRobert Gordon 		    avl_compare,
8200a701b1eSRobert Gordon 		    sizeof (cache_avl_struct_t),
8210a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node.avl_link-
8220a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node);
8230a701b1eSRobert Gordon 
8240a701b1eSRobert Gordon 		rw_init(&hca->avl_rw_lock,
8250a701b1eSRobert Gordon 		    NULL, RW_DRIVER, hca->iblock);
8260a701b1eSRobert Gordon 		mutex_init(&hca->cache_allocation,
8270a701b1eSRobert Gordon 		    NULL, MUTEX_DRIVER, NULL);
8280a701b1eSRobert Gordon 		hca->avl_init = TRUE;
8290a701b1eSRobert Gordon 
8300a701b1eSRobert Gordon 		/* Create kstats for the cache */
8310a701b1eSRobert Gordon 		ASSERT(INGLOBALZONE(curproc));
8320a701b1eSRobert Gordon 
8330a701b1eSRobert Gordon 		if (!stats_enabled) {
8340a701b1eSRobert Gordon 			ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc",
8350a701b1eSRobert Gordon 			    KSTAT_TYPE_NAMED,
8360a701b1eSRobert Gordon 			    sizeof (rpcib_kstat) / sizeof (kstat_named_t),
8370a701b1eSRobert Gordon 			    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
8380a701b1eSRobert Gordon 			    GLOBAL_ZONEID);
8390a701b1eSRobert Gordon 			if (ksp) {
8400a701b1eSRobert Gordon 				ksp->ks_data = (void *) &rpcib_kstat;
8410a701b1eSRobert Gordon 				ksp->ks_update = rpcib_cache_kstat_update;
8420a701b1eSRobert Gordon 				kstat_install(ksp);
8430a701b1eSRobert Gordon 				stats_enabled = TRUE;
8440a701b1eSRobert Gordon 			}
8450a701b1eSRobert Gordon 		}
846*065714dcSSiddheshwar Mahesh 		if (hca->cleanup_helper == NULL) {
847*065714dcSSiddheshwar Mahesh 			hca->cleanup_helper = ddi_taskq_create(NULL,
848*065714dcSSiddheshwar Mahesh 			    "CLEANUP_HELPER", 1, TASKQ_DEFAULTPRI, 0);
8490a701b1eSRobert Gordon 		}
8500a701b1eSRobert Gordon 
8517c478bd9Sstevel@tonic-gate 		/*
8527c478bd9Sstevel@tonic-gate 		 * Initialize the registered service list and
8537c478bd9Sstevel@tonic-gate 		 * the lock
8547c478bd9Sstevel@tonic-gate 		 */
8557c478bd9Sstevel@tonic-gate 		hca->service_list = NULL;
8567c478bd9Sstevel@tonic-gate 		rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock);
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
8597c478bd9Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
8607c478bd9Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
8617c478bd9Sstevel@tonic-gate 		    hca->iblock);
8627c478bd9Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
8637c478bd9Sstevel@tonic-gate 		    hca->iblock);
8647c478bd9Sstevel@tonic-gate 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
8657c478bd9Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
8667c478bd9Sstevel@tonic-gate 		hca->inuse = TRUE;
8677c478bd9Sstevel@tonic-gate 		/*
8687c478bd9Sstevel@tonic-gate 		 * XXX One hca only. Add multi-hca functionality if needed
8697c478bd9Sstevel@tonic-gate 		 * later.
8707c478bd9Sstevel@tonic-gate 		 */
8717c478bd9Sstevel@tonic-gate 		ribstat->hca = hca;
8727c478bd9Sstevel@tonic-gate 		ribstat->nhca_inited++;
8737c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
8747c478bd9Sstevel@tonic-gate 		break;
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate fail3:
8777c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
8787c478bd9Sstevel@tonic-gate fail2:
8797c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
8807c478bd9Sstevel@tonic-gate fail1:
8817c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	}
8847c478bd9Sstevel@tonic-gate 	if (ribstat->hca != NULL)
8857c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
8867c478bd9Sstevel@tonic-gate 	else
8877c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate /*
8917c478bd9Sstevel@tonic-gate  * Callback routines
8927c478bd9Sstevel@tonic-gate  */
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate /*
8957c478bd9Sstevel@tonic-gate  * SCQ handlers
8967c478bd9Sstevel@tonic-gate  */
8977c478bd9Sstevel@tonic-gate /* ARGSUSED */
8987c478bd9Sstevel@tonic-gate static void
8997c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
9007c478bd9Sstevel@tonic-gate {
9017c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
9027c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
903*065714dcSSiddheshwar Mahesh 	struct send_wid	*wd;
904*065714dcSSiddheshwar Mahesh 	CONN		*conn;
905*065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
9067c478bd9Sstevel@tonic-gate 	int		i;
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 	/*
9097c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
9107c478bd9Sstevel@tonic-gate 	 * completion queue notification.
9117c478bd9Sstevel@tonic-gate 	 */
9127c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
9157c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
9167c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
9177c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
9187c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
9197c478bd9Sstevel@tonic-gate 			return;
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 		/*
9227c478bd9Sstevel@tonic-gate 		 * Got a send completion
9237c478bd9Sstevel@tonic-gate 		 */
924*065714dcSSiddheshwar Mahesh 		if (wc.wc_id != RDMA_DUMMY_WRID) {
925*065714dcSSiddheshwar Mahesh 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
926*065714dcSSiddheshwar Mahesh 			qp = wd->qp;
927*065714dcSSiddheshwar Mahesh 			conn = qptoc(qp);
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 			mutex_enter(&wd->sendwait_lock);
9307c478bd9Sstevel@tonic-gate 			switch (wc.wc_status) {
9317c478bd9Sstevel@tonic-gate 			case IBT_WC_SUCCESS:
9327c478bd9Sstevel@tonic-gate 				wd->status = RDMA_SUCCESS;
9337c478bd9Sstevel@tonic-gate 				break;
9347c478bd9Sstevel@tonic-gate 			default:
9357c478bd9Sstevel@tonic-gate /*
9367c478bd9Sstevel@tonic-gate  *    RC Send Q Error Code		Local state     Remote State
9377c478bd9Sstevel@tonic-gate  *    ==================== 		===========     ============
9387c478bd9Sstevel@tonic-gate  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
9397c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
9407c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
9417c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
9427c478bd9Sstevel@tonic-gate  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
9437c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
9447c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
9457c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
9467c478bd9Sstevel@tonic-gate  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
9477c478bd9Sstevel@tonic-gate  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
948*065714dcSSiddheshwar Mahesh  *    IBT_WC_WR_FLUSHED_ERR               ERROR           None
9497c478bd9Sstevel@tonic-gate  */
9507c478bd9Sstevel@tonic-gate 				/*
9517c478bd9Sstevel@tonic-gate 				 * Channel in error state. Set connection to
9527c478bd9Sstevel@tonic-gate 				 * ERROR and cleanup will happen either from
9537c478bd9Sstevel@tonic-gate 				 * conn_release  or from rib_conn_get
9547c478bd9Sstevel@tonic-gate 				 */
9557c478bd9Sstevel@tonic-gate 				wd->status = RDMA_FAILED;
9567c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
9577c478bd9Sstevel@tonic-gate 				if (conn->c_state != C_DISCONN_PEND)
9580a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
9597c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
9607c478bd9Sstevel@tonic-gate 				break;
9617c478bd9Sstevel@tonic-gate 			}
9620a701b1eSRobert Gordon 
9637c478bd9Sstevel@tonic-gate 			if (wd->cv_sig == 1) {
9647c478bd9Sstevel@tonic-gate 				/*
9657c478bd9Sstevel@tonic-gate 				 * Notify poster
9667c478bd9Sstevel@tonic-gate 				 */
9677c478bd9Sstevel@tonic-gate 				cv_signal(&wd->wait_cv);
9687c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
9697c478bd9Sstevel@tonic-gate 			} else {
9707c478bd9Sstevel@tonic-gate 				/*
9717c478bd9Sstevel@tonic-gate 				 * Poster not waiting for notification.
9727c478bd9Sstevel@tonic-gate 				 * Free the send buffers and send_wid
9737c478bd9Sstevel@tonic-gate 				 */
9747c478bd9Sstevel@tonic-gate 				for (i = 0; i < wd->nsbufs; i++) {
975*065714dcSSiddheshwar Mahesh 					rib_rbuf_free(qptoc(wd->qp),
976*065714dcSSiddheshwar Mahesh 					    SEND_BUFFER,
97711606941Sjwahlig 					    (void *)(uintptr_t)wd->sbufaddr[i]);
9787c478bd9Sstevel@tonic-gate 				}
979*065714dcSSiddheshwar Mahesh 
980*065714dcSSiddheshwar Mahesh 				/* decrement the send ref count */
981*065714dcSSiddheshwar Mahesh 				rib_send_rele(qp);
982*065714dcSSiddheshwar Mahesh 
9837c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
9847c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wd);
9857c478bd9Sstevel@tonic-gate 			}
9867c478bd9Sstevel@tonic-gate 		}
9877c478bd9Sstevel@tonic-gate 	}
9887c478bd9Sstevel@tonic-gate }
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate /* ARGSUSED */
9917c478bd9Sstevel@tonic-gate static void
9927c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
9937c478bd9Sstevel@tonic-gate {
9947c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
9957c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
996*065714dcSSiddheshwar Mahesh 	struct send_wid	*wd;
997*065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
998*065714dcSSiddheshwar Mahesh 	CONN		*conn;
9997c478bd9Sstevel@tonic-gate 	int		i;
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate 	/*
10027c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10037c478bd9Sstevel@tonic-gate 	 * completion queue notification.
10047c478bd9Sstevel@tonic-gate 	 */
10057c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
10087c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
10097c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
10107c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
10117c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
10127c478bd9Sstevel@tonic-gate 			return;
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 		/*
10157c478bd9Sstevel@tonic-gate 		 * Got a send completion
10167c478bd9Sstevel@tonic-gate 		 */
1017*065714dcSSiddheshwar Mahesh 		if (wc.wc_id != RDMA_DUMMY_WRID) {
1018*065714dcSSiddheshwar Mahesh 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
1019*065714dcSSiddheshwar Mahesh 			qp = wd->qp;
1020*065714dcSSiddheshwar Mahesh 			conn = qptoc(qp);
10217c478bd9Sstevel@tonic-gate 			mutex_enter(&wd->sendwait_lock);
1022*065714dcSSiddheshwar Mahesh 
1023*065714dcSSiddheshwar Mahesh 			switch (wc.wc_status) {
1024*065714dcSSiddheshwar Mahesh 			case IBT_WC_SUCCESS:
1025*065714dcSSiddheshwar Mahesh 				wd->status = RDMA_SUCCESS;
1026*065714dcSSiddheshwar Mahesh 				break;
1027*065714dcSSiddheshwar Mahesh 			default:
1028*065714dcSSiddheshwar Mahesh 				/*
1029*065714dcSSiddheshwar Mahesh 				 * Channel in error state. Set connection to
1030*065714dcSSiddheshwar Mahesh 				 * ERROR and cleanup will happen either from
1031*065714dcSSiddheshwar Mahesh 				 * conn_release  or conn timeout.
1032*065714dcSSiddheshwar Mahesh 				 */
1033*065714dcSSiddheshwar Mahesh 				wd->status = RDMA_FAILED;
1034*065714dcSSiddheshwar Mahesh 				mutex_enter(&conn->c_lock);
1035*065714dcSSiddheshwar Mahesh 				if (conn->c_state != C_DISCONN_PEND)
1036*065714dcSSiddheshwar Mahesh 					conn->c_state = C_ERROR_CONN;
1037*065714dcSSiddheshwar Mahesh 				mutex_exit(&conn->c_lock);
1038*065714dcSSiddheshwar Mahesh 				break;
1039*065714dcSSiddheshwar Mahesh 			}
1040*065714dcSSiddheshwar Mahesh 
10417c478bd9Sstevel@tonic-gate 			if (wd->cv_sig == 1) {
10427c478bd9Sstevel@tonic-gate 				/*
10437c478bd9Sstevel@tonic-gate 				 * Update completion status and notify poster
10447c478bd9Sstevel@tonic-gate 				 */
10457c478bd9Sstevel@tonic-gate 				cv_signal(&wd->wait_cv);
10467c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10477c478bd9Sstevel@tonic-gate 			} else {
10487c478bd9Sstevel@tonic-gate 				/*
10497c478bd9Sstevel@tonic-gate 				 * Poster not waiting for notification.
10507c478bd9Sstevel@tonic-gate 				 * Free the send buffers and send_wid
10517c478bd9Sstevel@tonic-gate 				 */
10527c478bd9Sstevel@tonic-gate 				for (i = 0; i < wd->nsbufs; i++) {
10530a701b1eSRobert Gordon 					rib_rbuf_free(qptoc(wd->qp),
10540a701b1eSRobert Gordon 					    SEND_BUFFER,
105511606941Sjwahlig 					    (void *)(uintptr_t)wd->sbufaddr[i]);
10567c478bd9Sstevel@tonic-gate 				}
1057*065714dcSSiddheshwar Mahesh 
1058*065714dcSSiddheshwar Mahesh 				/* decrement the send ref count */
1059*065714dcSSiddheshwar Mahesh 				rib_send_rele(qp);
1060*065714dcSSiddheshwar Mahesh 
10617c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10627c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wd);
10637c478bd9Sstevel@tonic-gate 			}
10647c478bd9Sstevel@tonic-gate 		}
10657c478bd9Sstevel@tonic-gate 	}
10667c478bd9Sstevel@tonic-gate }
10677c478bd9Sstevel@tonic-gate 
10687c478bd9Sstevel@tonic-gate /*
10697c478bd9Sstevel@tonic-gate  * RCQ handler
10707c478bd9Sstevel@tonic-gate  */
10717c478bd9Sstevel@tonic-gate /* ARGSUSED */
10727c478bd9Sstevel@tonic-gate static void
10737c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10747c478bd9Sstevel@tonic-gate {
10757c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
10767c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10777c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
10787c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
10797c478bd9Sstevel@tonic-gate 
10807c478bd9Sstevel@tonic-gate 	/*
10817c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10827c478bd9Sstevel@tonic-gate 	 * completion queue notification.
10837c478bd9Sstevel@tonic-gate 	 */
10847c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
10857c478bd9Sstevel@tonic-gate 
10867c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
10877c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
10887c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
10897c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
10907c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
10917c478bd9Sstevel@tonic-gate 			return;
10927c478bd9Sstevel@tonic-gate 
109311606941Sjwahlig 		rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
10947c478bd9Sstevel@tonic-gate 		qp = rwid->qp;
10957c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
10967c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
10977c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op, find_xid = 0;
10987c478bd9Sstevel@tonic-gate 			struct reply	*r;
10997c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
11000a701b1eSRobert Gordon 			uint32_t rdma_credit = 0;
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
110311606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
11047c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
11057c478bd9Sstevel@tonic-gate 			/*
11067c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
11077c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
11087c478bd9Sstevel@tonic-gate 			 */
110911606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)rwid->addr;
11100a701b1eSRobert Gordon 
11117c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
11127c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
11137c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &vers);
11140a701b1eSRobert Gordon 			(void) xdr_u_int(xdrs, &rdma_credit);
11157c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &op);
11167c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
11170a701b1eSRobert Gordon 
11187c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
11197c478bd9Sstevel@tonic-gate 				/*
11200a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version. Cannot
11210a701b1eSRobert Gordon 				 * interoperate.  Set connection to
11220a701b1eSRobert Gordon 				 * ERROR state and bail out.
11237c478bd9Sstevel@tonic-gate 				 */
11247c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
11257c478bd9Sstevel@tonic-gate 				if (conn->c_state != C_DISCONN_PEND)
11260a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
11277c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
112811606941Sjwahlig 				rib_rbuf_free(conn, RECV_BUFFER,
112911606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
11307c478bd9Sstevel@tonic-gate 				rib_free_wid(rwid);
11317c478bd9Sstevel@tonic-gate 				continue;
11327c478bd9Sstevel@tonic-gate 			}
11337c478bd9Sstevel@tonic-gate 
11347c478bd9Sstevel@tonic-gate 			mutex_enter(&qp->replylist_lock);
11357c478bd9Sstevel@tonic-gate 			for (r = qp->replylist; r != NULL; r = r->next) {
11367c478bd9Sstevel@tonic-gate 				if (r->xid == xid) {
11377c478bd9Sstevel@tonic-gate 					find_xid = 1;
11387c478bd9Sstevel@tonic-gate 					switch (op) {
11397c478bd9Sstevel@tonic-gate 					case RDMA_MSG:
11407c478bd9Sstevel@tonic-gate 					case RDMA_NOMSG:
11417c478bd9Sstevel@tonic-gate 					case RDMA_MSGP:
11427c478bd9Sstevel@tonic-gate 						r->status = RDMA_SUCCESS;
11437c478bd9Sstevel@tonic-gate 						r->vaddr_cq = rwid->addr;
11440a701b1eSRobert Gordon 						r->bytes_xfer =
11450a701b1eSRobert Gordon 						    wc.wc_bytes_xfer;
11467c478bd9Sstevel@tonic-gate 						cv_signal(&r->wait_cv);
11477c478bd9Sstevel@tonic-gate 						break;
11487c478bd9Sstevel@tonic-gate 					default:
11490a701b1eSRobert Gordon 						rib_rbuf_free(qptoc(qp),
11500a701b1eSRobert Gordon 						    RECV_BUFFER,
11510a701b1eSRobert Gordon 						    (void *)(uintptr_t)
11520a701b1eSRobert Gordon 						    rwid->addr);
11537c478bd9Sstevel@tonic-gate 						break;
11547c478bd9Sstevel@tonic-gate 					}
11557c478bd9Sstevel@tonic-gate 					break;
11567c478bd9Sstevel@tonic-gate 				}
11577c478bd9Sstevel@tonic-gate 			}
11587c478bd9Sstevel@tonic-gate 			mutex_exit(&qp->replylist_lock);
11597c478bd9Sstevel@tonic-gate 			if (find_xid == 0) {
11607c478bd9Sstevel@tonic-gate 				/* RPC caller not waiting for reply */
11610a701b1eSRobert Gordon 
11620a701b1eSRobert Gordon 				DTRACE_PROBE1(rpcib__i__nomatchxid1,
11630a701b1eSRobert Gordon 				    int, xid);
11640a701b1eSRobert Gordon 
11657c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
116611606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
11677c478bd9Sstevel@tonic-gate 			}
11687c478bd9Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
11697c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
11707c478bd9Sstevel@tonic-gate 
11717c478bd9Sstevel@tonic-gate 			/*
11727c478bd9Sstevel@tonic-gate 			 * Connection being flushed. Just free
11737c478bd9Sstevel@tonic-gate 			 * the posted buffer
11747c478bd9Sstevel@tonic-gate 			 */
117511606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
117611606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
11777c478bd9Sstevel@tonic-gate 		} else {
11787c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
11797c478bd9Sstevel@tonic-gate /*
11807c478bd9Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
11817c478bd9Sstevel@tonic-gate  *  ====================		===========     ============
11827c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
11837c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
11847c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
11857c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
11867c478bd9Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
11877c478bd9Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
11887c478bd9Sstevel@tonic-gate  */
11897c478bd9Sstevel@tonic-gate 			/*
11907c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection
11917c478bd9Sstevel@tonic-gate 			 * in ERROR state.
11927c478bd9Sstevel@tonic-gate 			 */
11937c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
11947c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
11950a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
11967c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
119711606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
119811606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
11997c478bd9Sstevel@tonic-gate 		}
12007c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
12017c478bd9Sstevel@tonic-gate 	}
12027c478bd9Sstevel@tonic-gate }
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate /* Server side */
12057c478bd9Sstevel@tonic-gate /* ARGSUSED */
12067c478bd9Sstevel@tonic-gate static void
12077c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
12087c478bd9Sstevel@tonic-gate {
12090a701b1eSRobert Gordon 	rdma_recv_data_t *rdp;
12107c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
12117c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
12127c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
12137c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
12147c478bd9Sstevel@tonic-gate 	CONN		*conn;
12157c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate 	/*
12187c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
12197c478bd9Sstevel@tonic-gate 	 * completion queue notification.
12207c478bd9Sstevel@tonic-gate 	 */
12217c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
12247c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
12257c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
12267c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
12277c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
12287c478bd9Sstevel@tonic-gate 			return;
12297c478bd9Sstevel@tonic-gate 
123011606941Sjwahlig 		s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
12317c478bd9Sstevel@tonic-gate 		qp = s_recvp->qp;
12327c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
12337c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
12347c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs--;
12357c478bd9Sstevel@tonic-gate 		if (qp->n_posted_rbufs == 0)
12367c478bd9Sstevel@tonic-gate 			cv_signal(&qp->posted_rbufs_cv);
12377c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
12407c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
12417c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op;
12420a701b1eSRobert Gordon 			uint32_t rdma_credit;
12437c478bd9Sstevel@tonic-gate 
12447c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
12457c478bd9Sstevel@tonic-gate 			/* s_recvp->vaddr stores data */
124611606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
12477c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate 			/*
12507c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
12517c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
12527c478bd9Sstevel@tonic-gate 			 */
125311606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
12547c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
12557c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
12567c478bd9Sstevel@tonic-gate 			if (!xdr_u_int(xdrs, &vers) ||
12570a701b1eSRobert Gordon 			    !xdr_u_int(xdrs, &rdma_credit) ||
12587c478bd9Sstevel@tonic-gate 			    !xdr_u_int(xdrs, &op)) {
12597c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
126011606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12617c478bd9Sstevel@tonic-gate 				XDR_DESTROY(xdrs);
12627c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12637c478bd9Sstevel@tonic-gate 				continue;
12647c478bd9Sstevel@tonic-gate 			}
12657c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
12667c478bd9Sstevel@tonic-gate 
12677c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
12687c478bd9Sstevel@tonic-gate 				/*
12690a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version.
12700a701b1eSRobert Gordon 				 * Drop rpc rdma message.
12717c478bd9Sstevel@tonic-gate 				 */
12727c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
127311606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12747c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12757c478bd9Sstevel@tonic-gate 				continue;
12767c478bd9Sstevel@tonic-gate 			}
12777c478bd9Sstevel@tonic-gate 			/*
12787c478bd9Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
12797c478bd9Sstevel@tonic-gate 			 */
12807c478bd9Sstevel@tonic-gate 			if (op == RDMA_DONE) {
12817c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
128211606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12837c478bd9Sstevel@tonic-gate 				/*
12847c478bd9Sstevel@tonic-gate 				 * Wake up the thread waiting on
12857c478bd9Sstevel@tonic-gate 				 * a RDMA_DONE for xid
12867c478bd9Sstevel@tonic-gate 				 */
12877c478bd9Sstevel@tonic-gate 				mutex_enter(&qp->rdlist_lock);
12887c478bd9Sstevel@tonic-gate 				rdma_done_notify(qp, xid);
12897c478bd9Sstevel@tonic-gate 				mutex_exit(&qp->rdlist_lock);
12907c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12917c478bd9Sstevel@tonic-gate 				continue;
12927c478bd9Sstevel@tonic-gate 			}
12937c478bd9Sstevel@tonic-gate 
12947c478bd9Sstevel@tonic-gate 			mutex_enter(&plugin_state_lock);
12957c478bd9Sstevel@tonic-gate 			if (plugin_state == ACCEPT) {
12960a701b1eSRobert Gordon 				while ((mp = allocb(sizeof (*rdp), BPRI_LO))
12970a701b1eSRobert Gordon 				    == NULL)
12980a701b1eSRobert Gordon 					(void) strwaitbuf(
12990a701b1eSRobert Gordon 					    sizeof (*rdp), BPRI_LO);
13007c478bd9Sstevel@tonic-gate 				/*
13017c478bd9Sstevel@tonic-gate 				 * Plugin is in accept state, hence the master
13027c478bd9Sstevel@tonic-gate 				 * transport queue for this is still accepting
13037c478bd9Sstevel@tonic-gate 				 * requests. Hence we can call svc_queuereq to
13047c478bd9Sstevel@tonic-gate 				 * queue this recieved msg.
13057c478bd9Sstevel@tonic-gate 				 */
13060a701b1eSRobert Gordon 				rdp = (rdma_recv_data_t *)mp->b_rptr;
13070a701b1eSRobert Gordon 				rdp->conn = conn;
13080a701b1eSRobert Gordon 				rdp->rpcmsg.addr =
13090a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)s_recvp->vaddr;
13100a701b1eSRobert Gordon 				rdp->rpcmsg.type = RECV_BUFFER;
13110a701b1eSRobert Gordon 				rdp->rpcmsg.len = wc.wc_bytes_xfer;
13120a701b1eSRobert Gordon 				rdp->status = wc.wc_status;
13137c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
13147c478bd9Sstevel@tonic-gate 				conn->c_ref++;
13157c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
13160a701b1eSRobert Gordon 				mp->b_wptr += sizeof (*rdp);
13177c478bd9Sstevel@tonic-gate 				svc_queuereq((queue_t *)rib_stat->q, mp);
13187c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
13197c478bd9Sstevel@tonic-gate 			} else {
13207c478bd9Sstevel@tonic-gate 				/*
13217c478bd9Sstevel@tonic-gate 				 * The master transport for this is going
13227c478bd9Sstevel@tonic-gate 				 * away and the queue is not accepting anymore
13237c478bd9Sstevel@tonic-gate 				 * requests for krpc, so don't do anything, just
13247c478bd9Sstevel@tonic-gate 				 * free the msg.
13257c478bd9Sstevel@tonic-gate 				 */
13267c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
13277c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
132811606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
13297c478bd9Sstevel@tonic-gate 			}
13307c478bd9Sstevel@tonic-gate 		} else {
13317c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
133211606941Sjwahlig 			    (void *)(uintptr_t)s_recvp->vaddr);
13337c478bd9Sstevel@tonic-gate 		}
13347c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
13357c478bd9Sstevel@tonic-gate 	}
13367c478bd9Sstevel@tonic-gate }
13377c478bd9Sstevel@tonic-gate 
13387c478bd9Sstevel@tonic-gate /*
13397c478bd9Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
13407c478bd9Sstevel@tonic-gate  */
13417c478bd9Sstevel@tonic-gate /* ARGSUSED */
13427c478bd9Sstevel@tonic-gate static void
13437c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
13447c478bd9Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
13457c478bd9Sstevel@tonic-gate {
13467c478bd9Sstevel@tonic-gate 
13477c478bd9Sstevel@tonic-gate 	switch (code) {
13487c478bd9Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
13497c478bd9Sstevel@tonic-gate 		/* ignore */
13507c478bd9Sstevel@tonic-gate 		break;
13517c478bd9Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
13527c478bd9Sstevel@tonic-gate 	{
13537c478bd9Sstevel@tonic-gate 		ASSERT(rib_stat->hca->hca_hdl == hca_hdl);
13547c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
13557c478bd9Sstevel@tonic-gate #ifdef DEBUG
13567c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
13577c478bd9Sstevel@tonic-gate #endif
13587c478bd9Sstevel@tonic-gate 		break;
13597c478bd9Sstevel@tonic-gate 	}
13607c478bd9Sstevel@tonic-gate #ifdef DEBUG
13617c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
13620a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
13630a701b1eSRobert Gordon 		    "IBT_EVENT_PATH_MIGRATED\n");
13647c478bd9Sstevel@tonic-gate 		break;
13657c478bd9Sstevel@tonic-gate 	case IBT_EVENT_SQD:
13667c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
13677c478bd9Sstevel@tonic-gate 		break;
13687c478bd9Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
13697c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
13707c478bd9Sstevel@tonic-gate 		break;
13717c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
13720a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
13730a701b1eSRobert Gordon 		    "IBT_ERROR_CATASTROPHIC_CHAN\n");
13747c478bd9Sstevel@tonic-gate 		break;
13757c478bd9Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
13767c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
13777c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_INVALID_REQUEST_CHAN\n");
13787c478bd9Sstevel@tonic-gate 		break;
13797c478bd9Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
13807c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
13817c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
13827c478bd9Sstevel@tonic-gate 		break;
13837c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
13840a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
13850a701b1eSRobert Gordon 		    "IBT_ERROR_PATH_MIGRATE_REQ\n");
13867c478bd9Sstevel@tonic-gate 		break;
13877c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CQ:
13887c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
13897c478bd9Sstevel@tonic-gate 		break;
13907c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
13917c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
13927c478bd9Sstevel@tonic-gate 		break;
13937c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PORT_UP:
13947c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
13957c478bd9Sstevel@tonic-gate 		break;
13967c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
13977c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
13987c478bd9Sstevel@tonic-gate 		break;
13997c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
14007c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
14017c478bd9Sstevel@tonic-gate 		break;
14027c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
14037c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
14047c478bd9Sstevel@tonic-gate 		break;
14057c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
14067c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
14077c478bd9Sstevel@tonic-gate 		break;
14087c478bd9Sstevel@tonic-gate #endif
14097c478bd9Sstevel@tonic-gate 	default:
14107c478bd9Sstevel@tonic-gate 		break;
14117c478bd9Sstevel@tonic-gate 	}
14127c478bd9Sstevel@tonic-gate }
14137c478bd9Sstevel@tonic-gate 
14147c478bd9Sstevel@tonic-gate /*
14157c478bd9Sstevel@tonic-gate  * Client's reachable function.
14167c478bd9Sstevel@tonic-gate  */
14177c478bd9Sstevel@tonic-gate static rdma_stat
14187c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
14197c478bd9Sstevel@tonic-gate {
14207c478bd9Sstevel@tonic-gate 	rdma_stat	status;
1421214ae7d0SSiddheshwar Mahesh 	rpcib_ping_t	rpt;
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 	/*
14247c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
14257c478bd9Sstevel@tonic-gate 	 */
14267c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
14277c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
14287c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
14297c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
14307c478bd9Sstevel@tonic-gate 	}
1431214ae7d0SSiddheshwar Mahesh 
1432214ae7d0SSiddheshwar Mahesh 	bzero(&rpt, sizeof (rpcib_ping_t));
1433214ae7d0SSiddheshwar Mahesh 	status = rib_ping_srv(addr_type, raddr, &rpt);
14347c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
14357c478bd9Sstevel@tonic-gate 
14367c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
1437214ae7d0SSiddheshwar Mahesh 		*handle = (void *)rpt.hca;
14387c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
14397c478bd9Sstevel@tonic-gate 	} else {
14407c478bd9Sstevel@tonic-gate 		*handle = NULL;
14410a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__pingfailed);
14427c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
14437c478bd9Sstevel@tonic-gate 	}
14447c478bd9Sstevel@tonic-gate }
14457c478bd9Sstevel@tonic-gate 
14467c478bd9Sstevel@tonic-gate /* Client side qp creation */
14477c478bd9Sstevel@tonic-gate static rdma_stat
14487c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
14497c478bd9Sstevel@tonic-gate {
14507c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
14517c478bd9Sstevel@tonic-gate 	CONN		*conn;
14520a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info;
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
14557c478bd9Sstevel@tonic-gate 	*qp = NULL;
14567c478bd9Sstevel@tonic-gate 
14577c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
14587c478bd9Sstevel@tonic-gate 	conn = qptoc(kqp);
14597c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
14607c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
14617c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
14627c478bd9Sstevel@tonic-gate 
14637c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
14647c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
14657c478bd9Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
14667c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
14677c478bd9Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
14687c478bd9Sstevel@tonic-gate 	/*
14697c478bd9Sstevel@tonic-gate 	 * Initialize
14707c478bd9Sstevel@tonic-gate 	 */
14717c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
14727c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
14737c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1474*065714dcSSiddheshwar Mahesh 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
1475*065714dcSSiddheshwar Mahesh 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
14767c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
14777c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
14787c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
14797c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
14807c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
14810a701b1eSRobert Gordon 	/*
14820a701b1eSRobert Gordon 	 * Initialize the client credit control
14830a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
14840a701b1eSRobert Gordon 	 */
14850a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
14860a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
14870a701b1eSRobert Gordon 	cc_info->clnt_cc_granted_ops = 0;
14880a701b1eSRobert Gordon 	cc_info->clnt_cc_in_flight_ops = 0;
14890a701b1eSRobert Gordon 	cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
14907c478bd9Sstevel@tonic-gate 
14917c478bd9Sstevel@tonic-gate 	*qp = kqp;
14927c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
14937c478bd9Sstevel@tonic-gate }
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate /* Server side qp creation */
14967c478bd9Sstevel@tonic-gate static rdma_stat
14977c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
14987c478bd9Sstevel@tonic-gate {
14997c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
15007c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
15017c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
15027c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
15030a701b1eSRobert Gordon 	rdma_srv_cred_ctrl_t *cc_info;
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate 	*qp = NULL;
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
15087c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
15097c478bd9Sstevel@tonic-gate 	kqp->port_num = port;
15107c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
15117c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
15127c478bd9Sstevel@tonic-gate 
15137c478bd9Sstevel@tonic-gate 	/*
15147c478bd9Sstevel@tonic-gate 	 * Create the qp handle
15157c478bd9Sstevel@tonic-gate 	 */
15167c478bd9Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
15177c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
15187c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
15197c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
15207c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
15217c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
15227c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
15237c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
15247c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
15257c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
15267c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
15277c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
15287c478bd9Sstevel@tonic-gate 
15297c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
15307c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
15317c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
15327c478bd9Sstevel@tonic-gate 		    IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
15337c478bd9Sstevel@tonic-gate 		    &chan_sizes);
15347c478bd9Sstevel@tonic-gate 	} else {
15357c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
15367c478bd9Sstevel@tonic-gate 		goto fail;
15377c478bd9Sstevel@tonic-gate 	}
15387c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
15397c478bd9Sstevel@tonic-gate 
15407c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
15410a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_svccreatechanfail,
15420a701b1eSRobert Gordon 		    int, ibt_status);
15437c478bd9Sstevel@tonic-gate 		goto fail;
15447c478bd9Sstevel@tonic-gate 	}
15457c478bd9Sstevel@tonic-gate 
15467c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
15477c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
15487c478bd9Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
15497c478bd9Sstevel@tonic-gate 
15507c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
15517c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
15527c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15537c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1554*065714dcSSiddheshwar Mahesh 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
1555*065714dcSSiddheshwar Mahesh 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
15567c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15577c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
15587c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
15597c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
15607c478bd9Sstevel@tonic-gate 	/*
15617c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
15627c478bd9Sstevel@tonic-gate 	 */
15637c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
15647c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
15650a701b1eSRobert Gordon 
15660a701b1eSRobert Gordon 	/*
15670a701b1eSRobert Gordon 	 * Initialize the server credit control
15680a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
15690a701b1eSRobert Gordon 	 */
15700a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
15710a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
15720a701b1eSRobert Gordon 	cc_info->srv_cc_buffers_granted = preposted_rbufs;
15730a701b1eSRobert Gordon 	cc_info->srv_cc_cur_buffers_used = 0;
15740a701b1eSRobert Gordon 	cc_info->srv_cc_posted = preposted_rbufs;
15750a701b1eSRobert Gordon 
15767c478bd9Sstevel@tonic-gate 	*qp = kqp;
15770a701b1eSRobert Gordon 
15787c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
15797c478bd9Sstevel@tonic-gate fail:
15807c478bd9Sstevel@tonic-gate 	if (kqp)
15817c478bd9Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
15827c478bd9Sstevel@tonic-gate 
15837c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
15847c478bd9Sstevel@tonic-gate }
15857c478bd9Sstevel@tonic-gate 
15867c478bd9Sstevel@tonic-gate /* ARGSUSED */
15877c478bd9Sstevel@tonic-gate ibt_cm_status_t
15887c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
15897c478bd9Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
15907c478bd9Sstevel@tonic-gate     ibt_priv_data_len_t len)
15917c478bd9Sstevel@tonic-gate {
15927c478bd9Sstevel@tonic-gate 	rpcib_state_t   *ribstat;
15937c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
15947c478bd9Sstevel@tonic-gate 
15957c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)clnt_hdl;
15967c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate 	/* got a connection close event */
16017c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
16027c478bd9Sstevel@tonic-gate 	{
16037c478bd9Sstevel@tonic-gate 		CONN	*conn;
16047c478bd9Sstevel@tonic-gate 		rib_qp_t *qp;
16057c478bd9Sstevel@tonic-gate 
16067c478bd9Sstevel@tonic-gate 		/* check reason why connection was closed */
16077c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
16087c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
16097c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
16107c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
16117c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
16127c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
16137c478bd9Sstevel@tonic-gate 			/*
16147c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
16157c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
16167c478bd9Sstevel@tonic-gate 			 */
16177c478bd9Sstevel@tonic-gate 			break;
16187c478bd9Sstevel@tonic-gate 		default:
16197c478bd9Sstevel@tonic-gate 			/*
16207c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
16217c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
16227c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
16237c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
16247c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
16257c478bd9Sstevel@tonic-gate 			 * state
16267c478bd9Sstevel@tonic-gate 			 */
16277c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
16287c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
16297c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
16307c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
16317c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
16327c478bd9Sstevel@tonic-gate 				break;
16337c478bd9Sstevel@tonic-gate 			}
16347c478bd9Sstevel@tonic-gate 
16350a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
16367c478bd9Sstevel@tonic-gate 
16377c478bd9Sstevel@tonic-gate 			/*
16387c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
16397c478bd9Sstevel@tonic-gate 			 */
16407c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
16417c478bd9Sstevel@tonic-gate 				/*
16427c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
16437c478bd9Sstevel@tonic-gate 				 */
16447c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
16457c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
16467c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
16477c478bd9Sstevel@tonic-gate 				    &hca->cl_conn_list);
16487c478bd9Sstevel@tonic-gate 			} else {
1649*065714dcSSiddheshwar Mahesh 				/*
1650*065714dcSSiddheshwar Mahesh 				 * conn will be freed when c_ref goes to 0.
1651*065714dcSSiddheshwar Mahesh 				 * Indicate to cleaning thread not to close
1652*065714dcSSiddheshwar Mahesh 				 * the connection, but just free the channel.
1653*065714dcSSiddheshwar Mahesh 				 */
1654*065714dcSSiddheshwar Mahesh 				conn->c_flags |= C_CLOSE_NOTNEEDED;
16557c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
16567c478bd9Sstevel@tonic-gate 			}
16577c478bd9Sstevel@tonic-gate #ifdef DEBUG
16587c478bd9Sstevel@tonic-gate 			if (rib_debug)
16597c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
16607c478bd9Sstevel@tonic-gate 				    "(CONN_CLOSED) channel disconnected");
16617c478bd9Sstevel@tonic-gate #endif
16627c478bd9Sstevel@tonic-gate 			break;
16637c478bd9Sstevel@tonic-gate 		}
16647c478bd9Sstevel@tonic-gate 		break;
16657c478bd9Sstevel@tonic-gate 	}
16667c478bd9Sstevel@tonic-gate 	default:
16677c478bd9Sstevel@tonic-gate 		break;
16687c478bd9Sstevel@tonic-gate 	}
16697c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
16707c478bd9Sstevel@tonic-gate }
16717c478bd9Sstevel@tonic-gate 
16727c478bd9Sstevel@tonic-gate /*
16737c478bd9Sstevel@tonic-gate  * Connect to the server.
16747c478bd9Sstevel@tonic-gate  */
16757c478bd9Sstevel@tonic-gate rdma_stat
1676214ae7d0SSiddheshwar Mahesh rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, rpcib_ping_t *rptp)
16777c478bd9Sstevel@tonic-gate {
16787c478bd9Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
16797c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
16807c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
16817c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
16827c478bd9Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
16837c478bd9Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
16840a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipcm_info;
16850a701b1eSRobert Gordon 	uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ];
16860a701b1eSRobert Gordon 
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
16897c478bd9Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
16900a701b1eSRobert Gordon 	(void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
16910a701b1eSRobert Gordon 
1692214ae7d0SSiddheshwar Mahesh 	ipcm_info.src_addr.family = rptp->srcip.family;
1693214ae7d0SSiddheshwar Mahesh 	switch (ipcm_info.src_addr.family) {
16940a701b1eSRobert Gordon 	case AF_INET:
1695214ae7d0SSiddheshwar Mahesh 		ipcm_info.src_addr.un.ip4addr = rptp->srcip.un.ip4addr;
16960a701b1eSRobert Gordon 		break;
16970a701b1eSRobert Gordon 	case AF_INET6:
1698214ae7d0SSiddheshwar Mahesh 		ipcm_info.src_addr.un.ip6addr = rptp->srcip.un.ip6addr;
16990a701b1eSRobert Gordon 		break;
17000a701b1eSRobert Gordon 	}
17010a701b1eSRobert Gordon 
1702214ae7d0SSiddheshwar Mahesh 	ipcm_info.dst_addr.family = rptp->srcip.family;
1703214ae7d0SSiddheshwar Mahesh 	switch (ipcm_info.dst_addr.family) {
17040a701b1eSRobert Gordon 	case AF_INET:
1705214ae7d0SSiddheshwar Mahesh 		ipcm_info.dst_addr.un.ip4addr = rptp->dstip.un.ip4addr;
17060a701b1eSRobert Gordon 		break;
17070a701b1eSRobert Gordon 	case AF_INET6:
1708214ae7d0SSiddheshwar Mahesh 		ipcm_info.dst_addr.un.ip6addr = rptp->dstip.un.ip6addr;
17090a701b1eSRobert Gordon 		break;
17100a701b1eSRobert Gordon 	}
17110a701b1eSRobert Gordon 
1712f837ee4aSSiddheshwar Mahesh 	ipcm_info.src_port = (in_port_t)nfs_rdma_port;
17130a701b1eSRobert Gordon 
17140a701b1eSRobert Gordon 	ibt_status = ibt_format_ip_private_data(&ipcm_info,
17150a701b1eSRobert Gordon 	    IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt);
17160a701b1eSRobert Gordon 
17170a701b1eSRobert Gordon 	if (ibt_status != IBT_SUCCESS) {
17180a701b1eSRobert Gordon 		cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n");
17190a701b1eSRobert Gordon 		return (-1);
17200a701b1eSRobert Gordon 	}
17217c478bd9Sstevel@tonic-gate 
1722214ae7d0SSiddheshwar Mahesh 	qp_attr.rc_hca_port_num = rptp->path.pi_prim_cep_path.cep_hca_port_num;
17237c478bd9Sstevel@tonic-gate 	/* Alloc a RC channel */
17247c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
17257c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
17267c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
17277c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
17287c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
17297c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
17307c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
17317c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
17327c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
17337c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
17347c478bd9Sstevel@tonic-gate 
1735f837ee4aSSiddheshwar Mahesh 	rptp->path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port);
1736214ae7d0SSiddheshwar Mahesh 	chan_args.oc_path = &rptp->path;
1737f837ee4aSSiddheshwar Mahesh 
17387c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
17397c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_clnt_private = (void *)rib_stat;
17400a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_out = 4;
17410a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_in = 4;
17427c478bd9Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
17437c478bd9Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
17440a701b1eSRobert Gordon 	chan_args.oc_priv_data = cmp_ip_pvt;
17450a701b1eSRobert Gordon 	chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ;
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate refresh:
17487c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
17497c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
17507c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
17510a701b1eSRobert Gordon 		    IBT_ACHAN_NO_FLAGS,
17520a701b1eSRobert Gordon 		    &qp_attr, &qp->qp_hdl,
17537c478bd9Sstevel@tonic-gate 		    &chan_sizes);
17547c478bd9Sstevel@tonic-gate 	} else {
17557c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
17567c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17577c478bd9Sstevel@tonic-gate 	}
17587c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
17597c478bd9Sstevel@tonic-gate 
17607c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
17610a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_conntosrv,
17620a701b1eSRobert Gordon 		    int, ibt_status);
17637c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17647c478bd9Sstevel@tonic-gate 	}
17657c478bd9Sstevel@tonic-gate 
17667c478bd9Sstevel@tonic-gate 	/* Connect to the Server */
17677c478bd9Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
17687c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
17697c478bd9Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
17707c478bd9Sstevel@tonic-gate 	    IBT_BLOCKING, &chan_args, &ret_args);
17717c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
17720a701b1eSRobert Gordon 		DTRACE_PROBE2(rpcib__i_openrctosrv,
17730a701b1eSRobert Gordon 		    int, ibt_status, int, ret_args.rc_status);
17740a701b1eSRobert Gordon 
17757c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
17767c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
17777c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
17787c478bd9Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
17797c478bd9Sstevel@tonic-gate 		    ret_args.rc_status == IBT_CM_CONN_STALE) {
17807c478bd9Sstevel@tonic-gate 			/*
17817c478bd9Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
17827c478bd9Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
17837c478bd9Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
17847c478bd9Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
17857c478bd9Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
17867c478bd9Sstevel@tonic-gate 			 */
17877c478bd9Sstevel@tonic-gate 			goto refresh;
17887c478bd9Sstevel@tonic-gate 		}
17897c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17907c478bd9Sstevel@tonic-gate 	}
17917c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
17927c478bd9Sstevel@tonic-gate 	/*
17937c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
17947c478bd9Sstevel@tonic-gate 	 */
17957c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
17967c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
17977c478bd9Sstevel@tonic-gate }
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate rdma_stat
1800214ae7d0SSiddheshwar Mahesh rib_ping_srv(int addr_type, struct netbuf *raddr, rpcib_ping_t *rptp)
18017c478bd9Sstevel@tonic-gate {
1802e11c3f44Smeem 	uint_t			i;
18037c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
18040a701b1eSRobert Gordon 	uint8_t			num_paths_p;
18050a701b1eSRobert Gordon 	ibt_ip_path_attr_t	ipattr;
18060a701b1eSRobert Gordon 	ibt_path_ip_src_t	srcip;
1807e11c3f44Smeem 	rpcib_ipaddrs_t		addrs4;
1808e11c3f44Smeem 	rpcib_ipaddrs_t		addrs6;
1809e11c3f44Smeem 	struct sockaddr_in	*sinp;
1810e11c3f44Smeem 	struct sockaddr_in6	*sin6p;
1811e11c3f44Smeem 	rdma_stat		retval = RDMA_SUCCESS;
18120a701b1eSRobert Gordon 
18137c478bd9Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
18147c478bd9Sstevel@tonic-gate 
18150a701b1eSRobert Gordon 	bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
18167c478bd9Sstevel@tonic-gate 
1817e11c3f44Smeem 	if (!rpcib_get_ib_addresses(&addrs4, &addrs6) ||
1818e11c3f44Smeem 	    (addrs4.ri_count == 0 && addrs6.ri_count == 0)) {
1819e11c3f44Smeem 		retval = RDMA_FAILED;
1820e11c3f44Smeem 		goto done;
18210a701b1eSRobert Gordon 	}
18220a701b1eSRobert Gordon 
18230a701b1eSRobert Gordon 	/* Prep the destination address */
18247c478bd9Sstevel@tonic-gate 	switch (addr_type) {
18257c478bd9Sstevel@tonic-gate 	case AF_INET:
1826e11c3f44Smeem 		sinp = (struct sockaddr_in *)raddr->buf;
1827214ae7d0SSiddheshwar Mahesh 		rptp->dstip.family = AF_INET;
1828214ae7d0SSiddheshwar Mahesh 		rptp->dstip.un.ip4addr = sinp->sin_addr.s_addr;
1829e11c3f44Smeem 		sinp = addrs4.ri_list;
18300a701b1eSRobert Gordon 
1831214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_dst_ip 	= &rptp->dstip;
18320a701b1eSRobert Gordon 		ipattr.ipa_hca_guid	= rib_stat->hca->hca_guid;
18330a701b1eSRobert Gordon 		ipattr.ipa_ndst		= 1;
18340a701b1eSRobert Gordon 		ipattr.ipa_max_paths	= 1;
1835214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_src_ip.family = rptp->dstip.family;
1836214ae7d0SSiddheshwar Mahesh 		for (i = 0; i < addrs4.ri_count; i++) {
1837214ae7d0SSiddheshwar Mahesh 			num_paths_p = 0;
1838e11c3f44Smeem 			ipattr.ipa_src_ip.un.ip4addr = sinp[i].sin_addr.s_addr;
1839214ae7d0SSiddheshwar Mahesh 			bzero(&srcip, sizeof (ibt_path_ip_src_t));
18400a701b1eSRobert Gordon 
18410a701b1eSRobert Gordon 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1842214ae7d0SSiddheshwar Mahesh 			    IBT_PATH_NO_FLAGS, &ipattr, &rptp->path,
1843214ae7d0SSiddheshwar Mahesh 			    &num_paths_p, &srcip);
18440a701b1eSRobert Gordon 			if (ibt_status == IBT_SUCCESS &&
18450a701b1eSRobert Gordon 			    num_paths_p != 0 &&
1846214ae7d0SSiddheshwar Mahesh 			    rptp->path.pi_hca_guid == rib_stat->hca->hca_guid) {
1847214ae7d0SSiddheshwar Mahesh 				rptp->hca = rib_stat->hca;
1848214ae7d0SSiddheshwar Mahesh 				rptp->srcip.family = AF_INET;
1849214ae7d0SSiddheshwar Mahesh 				rptp->srcip.un.ip4addr =
1850214ae7d0SSiddheshwar Mahesh 				    srcip.ip_primary.un.ip4addr;
1851e11c3f44Smeem 				goto done;
18520a701b1eSRobert Gordon 			}
18530a701b1eSRobert Gordon 		}
1854e11c3f44Smeem 		retval = RDMA_FAILED;
18557c478bd9Sstevel@tonic-gate 		break;
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	case AF_INET6:
1858e11c3f44Smeem 		sin6p = (struct sockaddr_in6 *)raddr->buf;
1859214ae7d0SSiddheshwar Mahesh 		rptp->dstip.family = AF_INET6;
1860214ae7d0SSiddheshwar Mahesh 		rptp->dstip.un.ip6addr = sin6p->sin6_addr;
1861e11c3f44Smeem 		sin6p = addrs6.ri_list;
18620a701b1eSRobert Gordon 
1863214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_dst_ip 	= &rptp->dstip;
18640a701b1eSRobert Gordon 		ipattr.ipa_hca_guid	= rib_stat->hca->hca_guid;
18650a701b1eSRobert Gordon 		ipattr.ipa_ndst		= 1;
18660a701b1eSRobert Gordon 		ipattr.ipa_max_paths	= 1;
1867214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_src_ip.family = rptp->dstip.family;
1868214ae7d0SSiddheshwar Mahesh 		for (i = 0; i < addrs6.ri_count; i++) {
1869214ae7d0SSiddheshwar Mahesh 			num_paths_p = 0;
1870e11c3f44Smeem 			ipattr.ipa_src_ip.un.ip6addr = sin6p[i].sin6_addr;
1871214ae7d0SSiddheshwar Mahesh 			bzero(&srcip, sizeof (ibt_path_ip_src_t));
18720a701b1eSRobert Gordon 
18730a701b1eSRobert Gordon 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1874214ae7d0SSiddheshwar Mahesh 			    IBT_PATH_NO_FLAGS, &ipattr, &rptp->path,
1875214ae7d0SSiddheshwar Mahesh 			    &num_paths_p, &srcip);
18760a701b1eSRobert Gordon 			if (ibt_status == IBT_SUCCESS &&
18770a701b1eSRobert Gordon 			    num_paths_p != 0 &&
1878214ae7d0SSiddheshwar Mahesh 			    rptp->path.pi_hca_guid == rib_stat->hca->hca_guid) {
1879214ae7d0SSiddheshwar Mahesh 				rptp->hca = rib_stat->hca;
1880214ae7d0SSiddheshwar Mahesh 				rptp->srcip.family = AF_INET6;
1881214ae7d0SSiddheshwar Mahesh 				rptp->srcip.un.ip6addr =
1882214ae7d0SSiddheshwar Mahesh 				    srcip.ip_primary.un.ip6addr;
1883e11c3f44Smeem 				goto done;
18840a701b1eSRobert Gordon 			}
18850a701b1eSRobert Gordon 		}
1886e11c3f44Smeem 		retval = RDMA_FAILED;
18877c478bd9Sstevel@tonic-gate 		break;
18887c478bd9Sstevel@tonic-gate 
18897c478bd9Sstevel@tonic-gate 	default:
1890e11c3f44Smeem 		retval = RDMA_INVAL;
1891e11c3f44Smeem 		break;
18927c478bd9Sstevel@tonic-gate 	}
1893e11c3f44Smeem done:
1894214ae7d0SSiddheshwar Mahesh 
1895e11c3f44Smeem 	if (addrs4.ri_size > 0)
1896e11c3f44Smeem 		kmem_free(addrs4.ri_list, addrs4.ri_size);
1897e11c3f44Smeem 	if (addrs6.ri_size > 0)
1898e11c3f44Smeem 		kmem_free(addrs6.ri_list, addrs6.ri_size);
1899e11c3f44Smeem 	return (retval);
19007c478bd9Sstevel@tonic-gate }
19017c478bd9Sstevel@tonic-gate 
19027c478bd9Sstevel@tonic-gate /*
19037c478bd9Sstevel@tonic-gate  * Close channel, remove from connection list and
19047c478bd9Sstevel@tonic-gate  * free up resources allocated for that channel.
19057c478bd9Sstevel@tonic-gate  */
19067c478bd9Sstevel@tonic-gate rdma_stat
19077c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
19087c478bd9Sstevel@tonic-gate {
19097c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
19107c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
19117c478bd9Sstevel@tonic-gate 
1912*065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
1913*065714dcSSiddheshwar Mahesh 	if (conn->c_timeout != NULL) {
1914*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
1915*065714dcSSiddheshwar Mahesh 		(void) untimeout(conn->c_timeout);
1916*065714dcSSiddheshwar Mahesh 		mutex_enter(&conn->c_lock);
1917*065714dcSSiddheshwar Mahesh 	}
1918*065714dcSSiddheshwar Mahesh 
1919*065714dcSSiddheshwar Mahesh 	while (conn->c_flags & C_CLOSE_PENDING) {
1920*065714dcSSiddheshwar Mahesh 		cv_wait(&conn->c_cv, &conn->c_lock);
1921*065714dcSSiddheshwar Mahesh 	}
1922*065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
1923*065714dcSSiddheshwar Mahesh 
19247c478bd9Sstevel@tonic-gate 	/*
19257c478bd9Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
19267c478bd9Sstevel@tonic-gate 	 */
19277c478bd9Sstevel@tonic-gate 	hca = qp->hca;
19287c478bd9Sstevel@tonic-gate 	if (conn_list != NULL)
19297c478bd9Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
19300a701b1eSRobert Gordon 
19317c478bd9Sstevel@tonic-gate 	/*
1932*065714dcSSiddheshwar Mahesh 	 * There is only one case where we get here with
1933*065714dcSSiddheshwar Mahesh 	 * qp_hdl = NULL, which is during connection setup on
1934*065714dcSSiddheshwar Mahesh 	 * the client. In such a case there are no posted
1935*065714dcSSiddheshwar Mahesh 	 * send/recv buffers.
19367c478bd9Sstevel@tonic-gate 	 */
1937*065714dcSSiddheshwar Mahesh 	if (qp->qp_hdl != NULL) {
19387c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
19397c478bd9Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
19407c478bd9Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
19417c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
1942*065714dcSSiddheshwar Mahesh 
1943*065714dcSSiddheshwar Mahesh 		mutex_enter(&qp->send_rbufs_lock);
1944*065714dcSSiddheshwar Mahesh 		while (qp->n_send_rbufs)
1945*065714dcSSiddheshwar Mahesh 			cv_wait(&qp->send_rbufs_cv, &qp->send_rbufs_lock);
1946*065714dcSSiddheshwar Mahesh 		mutex_exit(&qp->send_rbufs_lock);
1947*065714dcSSiddheshwar Mahesh 
19487c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
19497c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
19507c478bd9Sstevel@tonic-gate 	}
19510a701b1eSRobert Gordon 
19527c478bd9Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
19530a701b1eSRobert Gordon 
19547c478bd9Sstevel@tonic-gate 	if (qp->replylist != NULL) {
19557c478bd9Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
19567c478bd9Sstevel@tonic-gate 	}
19577c478bd9Sstevel@tonic-gate 
19587c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
19597c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
1960*065714dcSSiddheshwar Mahesh 	cv_destroy(&qp->send_rbufs_cv);
19617c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
19627c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
19637c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
1964*065714dcSSiddheshwar Mahesh 	mutex_destroy(&qp->send_rbufs_lock);
19657c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
19687c478bd9Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
19697c478bd9Sstevel@tonic-gate 
19707c478bd9Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
19717c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
19727c478bd9Sstevel@tonic-gate 	}
19737c478bd9Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
19747c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
19757c478bd9Sstevel@tonic-gate 	}
19760a701b1eSRobert Gordon 
19770a701b1eSRobert Gordon 	/*
19780a701b1eSRobert Gordon 	 * Credit control cleanup.
19790a701b1eSRobert Gordon 	 */
19800a701b1eSRobert Gordon 	if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
19810a701b1eSRobert Gordon 		rdma_clnt_cred_ctrl_t *cc_info;
19820a701b1eSRobert Gordon 		cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
19830a701b1eSRobert Gordon 		cv_destroy(&cc_info->clnt_cc_cv);
19840a701b1eSRobert Gordon 	}
19850a701b1eSRobert Gordon 
19867c478bd9Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
19877c478bd9Sstevel@tonic-gate 
19887c478bd9Sstevel@tonic-gate 	/*
19897c478bd9Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
19907c478bd9Sstevel@tonic-gate 	 * then the hca is no longer being used.
19917c478bd9Sstevel@tonic-gate 	 */
19927c478bd9Sstevel@tonic-gate 	if (conn_list != NULL) {
19937c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
19947c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
19957c478bd9Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
19967c478bd9Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
19977c478bd9Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
19987c478bd9Sstevel@tonic-gate 				    RW_READER);
19990a701b1eSRobert Gordon 
20007c478bd9Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
20017c478bd9Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
20027c478bd9Sstevel@tonic-gate 					hca->inuse = FALSE;
20037c478bd9Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
20047c478bd9Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
20057c478bd9Sstevel@tonic-gate 				}
20067c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
20077c478bd9Sstevel@tonic-gate 			}
20087c478bd9Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
20097c478bd9Sstevel@tonic-gate 		}
20107c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
20117c478bd9Sstevel@tonic-gate 	}
20120a701b1eSRobert Gordon 
20137c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
20147c478bd9Sstevel@tonic-gate }
20157c478bd9Sstevel@tonic-gate 
20167c478bd9Sstevel@tonic-gate /*
2017*065714dcSSiddheshwar Mahesh  * All sends are done under the protection of
2018*065714dcSSiddheshwar Mahesh  * the wdesc->sendwait_lock. n_send_rbufs count
2019*065714dcSSiddheshwar Mahesh  * is protected using the send_rbufs_lock.
2020*065714dcSSiddheshwar Mahesh  * lock ordering is:
2021*065714dcSSiddheshwar Mahesh  * sendwait_lock -> send_rbufs_lock
2022*065714dcSSiddheshwar Mahesh  */
2023*065714dcSSiddheshwar Mahesh 
2024*065714dcSSiddheshwar Mahesh void
2025*065714dcSSiddheshwar Mahesh rib_send_hold(rib_qp_t *qp)
2026*065714dcSSiddheshwar Mahesh {
2027*065714dcSSiddheshwar Mahesh 	mutex_enter(&qp->send_rbufs_lock);
2028*065714dcSSiddheshwar Mahesh 	qp->n_send_rbufs++;
2029*065714dcSSiddheshwar Mahesh 	mutex_exit(&qp->send_rbufs_lock);
2030*065714dcSSiddheshwar Mahesh }
2031*065714dcSSiddheshwar Mahesh 
2032*065714dcSSiddheshwar Mahesh void
2033*065714dcSSiddheshwar Mahesh rib_send_rele(rib_qp_t *qp)
2034*065714dcSSiddheshwar Mahesh {
2035*065714dcSSiddheshwar Mahesh 	mutex_enter(&qp->send_rbufs_lock);
2036*065714dcSSiddheshwar Mahesh 	qp->n_send_rbufs--;
2037*065714dcSSiddheshwar Mahesh 	if (qp->n_send_rbufs == 0)
2038*065714dcSSiddheshwar Mahesh 		cv_signal(&qp->send_rbufs_cv);
2039*065714dcSSiddheshwar Mahesh 	mutex_exit(&qp->send_rbufs_lock);
2040*065714dcSSiddheshwar Mahesh }
2041*065714dcSSiddheshwar Mahesh 
2042*065714dcSSiddheshwar Mahesh /*
20437c478bd9Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
20447c478bd9Sstevel@tonic-gate  * notification be it a successful or error completion, free the
20457c478bd9Sstevel@tonic-gate  * send_wid.
20467c478bd9Sstevel@tonic-gate  */
20477c478bd9Sstevel@tonic-gate static rdma_stat
20487c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
20497c478bd9Sstevel@tonic-gate {
20507c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
20517c478bd9Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
20527c478bd9Sstevel@tonic-gate 	int	i;
20537c478bd9Sstevel@tonic-gate 
20547c478bd9Sstevel@tonic-gate 	/*
20557c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
20567c478bd9Sstevel@tonic-gate 	 */
20577c478bd9Sstevel@tonic-gate 	ASSERT(wd != NULL);
20587c478bd9Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
20597c478bd9Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
20607c478bd9Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
20617c478bd9Sstevel@tonic-gate 		    ddi_get_lbolt();
20620a701b1eSRobert Gordon 
20637c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
20647c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
20657c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
20667c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
20677c478bd9Sstevel@tonic-gate 				;
20687c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
20697c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
20700a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__srvsendwait__timeout);
20710a701b1eSRobert Gordon 
20727c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20737c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
20747c478bd9Sstevel@tonic-gate 				break;
20757c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
20767c478bd9Sstevel@tonic-gate 				break;
20777c478bd9Sstevel@tonic-gate 			}
20787c478bd9Sstevel@tonic-gate 		} else {
20797c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
20807c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
20817c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
20827c478bd9Sstevel@tonic-gate 				;
20837c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
20847c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
20850a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__timeout);
20860a701b1eSRobert Gordon 
20877c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20887c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
20897c478bd9Sstevel@tonic-gate 				break;
20907c478bd9Sstevel@tonic-gate 			case 0:		/* interrupted */
20910a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__intr);
20920a701b1eSRobert Gordon 
20937c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20947c478bd9Sstevel@tonic-gate 				error = RDMA_INTR;
20957c478bd9Sstevel@tonic-gate 				break;
20967c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
20977c478bd9Sstevel@tonic-gate 				break;
20987c478bd9Sstevel@tonic-gate 			}
20997c478bd9Sstevel@tonic-gate 		}
21007c478bd9Sstevel@tonic-gate 	}
21017c478bd9Sstevel@tonic-gate 
21027c478bd9Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
21037c478bd9Sstevel@tonic-gate 		/* got send completion */
21047c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
2105*065714dcSSiddheshwar Mahesh 			switch (wd->status) {
2106*065714dcSSiddheshwar Mahesh 			case RDMA_CONNLOST:
2107f837ee4aSSiddheshwar Mahesh 				error = RDMA_CONNLOST;
2108*065714dcSSiddheshwar Mahesh 				break;
2109*065714dcSSiddheshwar Mahesh 			default:
2110*065714dcSSiddheshwar Mahesh 				error = RDMA_FAILED;
2111*065714dcSSiddheshwar Mahesh 				break;
2112f837ee4aSSiddheshwar Mahesh 			}
21137c478bd9Sstevel@tonic-gate 		}
21147c478bd9Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
21157c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
211611606941Sjwahlig 			    (void *)(uintptr_t)wd->sbufaddr[i]);
21177c478bd9Sstevel@tonic-gate 		}
2118*065714dcSSiddheshwar Mahesh 
2119*065714dcSSiddheshwar Mahesh 		rib_send_rele(qp);
2120*065714dcSSiddheshwar Mahesh 
21217c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
21227c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
2123*065714dcSSiddheshwar Mahesh 
21247c478bd9Sstevel@tonic-gate 	} else {
21257c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
21267c478bd9Sstevel@tonic-gate 	}
21277c478bd9Sstevel@tonic-gate 	return (error);
21287c478bd9Sstevel@tonic-gate }
21297c478bd9Sstevel@tonic-gate 
21307c478bd9Sstevel@tonic-gate static struct send_wid *
21317c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
21327c478bd9Sstevel@tonic-gate {
21337c478bd9Sstevel@tonic-gate 	struct send_wid	*wd;
21347c478bd9Sstevel@tonic-gate 
21357c478bd9Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
21367c478bd9Sstevel@tonic-gate 	wd->xid = xid;
21377c478bd9Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
21387c478bd9Sstevel@tonic-gate 	wd->qp = qp;
21397c478bd9Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
21407c478bd9Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
21417c478bd9Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
21427c478bd9Sstevel@tonic-gate 
21437c478bd9Sstevel@tonic-gate 	return (wd);
21447c478bd9Sstevel@tonic-gate }
21457c478bd9Sstevel@tonic-gate 
21467c478bd9Sstevel@tonic-gate static int
21477c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
21487c478bd9Sstevel@tonic-gate {
21497c478bd9Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
21507c478bd9Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
21517c478bd9Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
21527c478bd9Sstevel@tonic-gate 
21537c478bd9Sstevel@tonic-gate 	return (0);
21547c478bd9Sstevel@tonic-gate }
21557c478bd9Sstevel@tonic-gate 
21567c478bd9Sstevel@tonic-gate static rdma_stat
21577c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
21587c478bd9Sstevel@tonic-gate {
21597c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
21607c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
21617c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
21627c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->replylist_lock);
21637c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
21647c478bd9Sstevel@tonic-gate 	}
21657c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
21667c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
21677c478bd9Sstevel@tonic-gate }
21687c478bd9Sstevel@tonic-gate 
21697c478bd9Sstevel@tonic-gate /*
21707c478bd9Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
21717c478bd9Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
21727c478bd9Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
21737c478bd9Sstevel@tonic-gate  */
21747c478bd9Sstevel@tonic-gate rdma_stat
21757c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
21760a701b1eSRobert Gordon 	int send_sig, int cv_sig, caddr_t *swid)
21777c478bd9Sstevel@tonic-gate {
21787c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
21797c478bd9Sstevel@tonic-gate 	struct clist	*clp;
21807c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
21817c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
21827c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
21837c478bd9Sstevel@tonic-gate 	int		i, nds;
21847c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
21857c478bd9Sstevel@tonic-gate 	uint_t		total_msg_size;
21860a701b1eSRobert Gordon 	rib_qp_t	*qp;
21870a701b1eSRobert Gordon 
21880a701b1eSRobert Gordon 	qp = ctoqp(conn);
21897c478bd9Sstevel@tonic-gate 
21907c478bd9Sstevel@tonic-gate 	ASSERT(cl != NULL);
21917c478bd9Sstevel@tonic-gate 
21927c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
21937c478bd9Sstevel@tonic-gate 
21947c478bd9Sstevel@tonic-gate 	nds = 0;
21957c478bd9Sstevel@tonic-gate 	total_msg_size = 0;
21967c478bd9Sstevel@tonic-gate 	clp = cl;
21977c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
21987c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
21990a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded);
22007c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
22017c478bd9Sstevel@tonic-gate 		}
22020a701b1eSRobert Gordon 		sgl[nds].ds_va = clp->w.c_saddr;
22037c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
22047c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
22057c478bd9Sstevel@tonic-gate 		total_msg_size += clp->c_len;
22067c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
22077c478bd9Sstevel@tonic-gate 		nds++;
22087c478bd9Sstevel@tonic-gate 	}
22097c478bd9Sstevel@tonic-gate 
22107c478bd9Sstevel@tonic-gate 	if (send_sig) {
22117c478bd9Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
22127c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
22137c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
22140a701b1eSRobert Gordon 		*swid = (caddr_t)wdesc;
2215*065714dcSSiddheshwar Mahesh 		tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2216*065714dcSSiddheshwar Mahesh 		mutex_enter(&wdesc->sendwait_lock);
22177c478bd9Sstevel@tonic-gate 		wdesc->nsbufs = nds;
22187c478bd9Sstevel@tonic-gate 		for (i = 0; i < nds; i++) {
22197c478bd9Sstevel@tonic-gate 			wdesc->sbufaddr[i] = sgl[i].ds_va;
22207c478bd9Sstevel@tonic-gate 		}
2221*065714dcSSiddheshwar Mahesh 	} else {
2222*065714dcSSiddheshwar Mahesh 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2223*065714dcSSiddheshwar Mahesh 		*swid = NULL;
2224*065714dcSSiddheshwar Mahesh 		tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
2225*065714dcSSiddheshwar Mahesh 	}
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
22287c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
22297c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
22307c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
22330a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
22347c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
22357c478bd9Sstevel@tonic-gate 	}
22360a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
22377c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
22380a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
22390a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
22407c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2241*065714dcSSiddheshwar Mahesh 		if (send_sig) {
22427c478bd9Sstevel@tonic-gate 			for (i = 0; i < nds; i++) {
22437c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, SEND_BUFFER,
224411606941Sjwahlig 				    (void *)(uintptr_t)wdesc->sbufaddr[i]);
22457c478bd9Sstevel@tonic-gate 			}
2246*065714dcSSiddheshwar Mahesh 			mutex_exit(&wdesc->sendwait_lock);
22477c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wdesc);
2248*065714dcSSiddheshwar Mahesh 		}
22490a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
22507c478bd9Sstevel@tonic-gate 	}
2251*065714dcSSiddheshwar Mahesh 
22527c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
22537c478bd9Sstevel@tonic-gate 
22547c478bd9Sstevel@tonic-gate 	if (send_sig) {
2255*065714dcSSiddheshwar Mahesh 		rib_send_hold(qp);
2256*065714dcSSiddheshwar Mahesh 		mutex_exit(&wdesc->sendwait_lock);
22577c478bd9Sstevel@tonic-gate 		if (cv_sig) {
22587c478bd9Sstevel@tonic-gate 			/*
22597c478bd9Sstevel@tonic-gate 			 * cv_wait for send to complete.
22607c478bd9Sstevel@tonic-gate 			 * We can fail due to a timeout or signal or
22617c478bd9Sstevel@tonic-gate 			 * unsuccessful send.
22627c478bd9Sstevel@tonic-gate 			 */
22637c478bd9Sstevel@tonic-gate 			ret = rib_sendwait(qp, wdesc);
22640a701b1eSRobert Gordon 
22657c478bd9Sstevel@tonic-gate 			return (ret);
22667c478bd9Sstevel@tonic-gate 		}
22677c478bd9Sstevel@tonic-gate 	}
22687c478bd9Sstevel@tonic-gate 
22697c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
22707c478bd9Sstevel@tonic-gate }
22717c478bd9Sstevel@tonic-gate 
22720a701b1eSRobert Gordon 
22737c478bd9Sstevel@tonic-gate rdma_stat
22747c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
22757c478bd9Sstevel@tonic-gate {
22767c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
22770a701b1eSRobert Gordon 	caddr_t		wd;
22787c478bd9Sstevel@tonic-gate 
22797c478bd9Sstevel@tonic-gate 	/* send-wait & cv_signal */
22800a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
22817c478bd9Sstevel@tonic-gate 	return (ret);
22827c478bd9Sstevel@tonic-gate }
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate /*
2285*065714dcSSiddheshwar Mahesh  * Deprecated/obsolete interface not used currently
2286*065714dcSSiddheshwar Mahesh  * but earlier used for READ-READ protocol.
22877c478bd9Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
22887c478bd9Sstevel@tonic-gate  */
22897c478bd9Sstevel@tonic-gate rdma_stat
22907c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
22917c478bd9Sstevel@tonic-gate {
22927c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
22937c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
22947c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
22950a701b1eSRobert Gordon 	caddr_t *wid = NULL;
22967c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
22977c478bd9Sstevel@tonic-gate 
22987c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
22997c478bd9Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
23007c478bd9Sstevel@tonic-gate 
23017c478bd9Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
23020a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
23037c478bd9Sstevel@tonic-gate 
23040a701b1eSRobert Gordon 	if (ret != RDMA_SUCCESS) {
23050a701b1eSRobert Gordon 		rdma_done_rm(qp, rd);
23060a701b1eSRobert Gordon 	} else {
23077c478bd9Sstevel@tonic-gate 		/*
23087c478bd9Sstevel@tonic-gate 		 * Wait for RDMA_DONE from remote end
23097c478bd9Sstevel@tonic-gate 		 */
23100a701b1eSRobert Gordon 		timout =
23110a701b1eSRobert Gordon 		    drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt();
23120a701b1eSRobert Gordon 		cv_wait_ret = cv_timedwait(&rd->rdma_done_cv,
23130a701b1eSRobert Gordon 		    &qp->rdlist_lock,
23147c478bd9Sstevel@tonic-gate 		    timout);
23150a701b1eSRobert Gordon 
23167c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, rd);
23170a701b1eSRobert Gordon 
23187c478bd9Sstevel@tonic-gate 		if (cv_wait_ret < 0) {
23197c478bd9Sstevel@tonic-gate 			ret = RDMA_TIMEDOUT;
23200a701b1eSRobert Gordon 		}
23217c478bd9Sstevel@tonic-gate 	}
23227c478bd9Sstevel@tonic-gate 
23237c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
23247c478bd9Sstevel@tonic-gate 	return (ret);
23257c478bd9Sstevel@tonic-gate }
23267c478bd9Sstevel@tonic-gate 
23277c478bd9Sstevel@tonic-gate static struct recv_wid *
23287c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
23297c478bd9Sstevel@tonic-gate {
23307c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
23317c478bd9Sstevel@tonic-gate 
23327c478bd9Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
23337c478bd9Sstevel@tonic-gate 	rwid->xid = msgid;
23347c478bd9Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
23357c478bd9Sstevel@tonic-gate 	rwid->qp = qp;
23367c478bd9Sstevel@tonic-gate 
23377c478bd9Sstevel@tonic-gate 	return (rwid);
23387c478bd9Sstevel@tonic-gate }
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate static void
23417c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
23427c478bd9Sstevel@tonic-gate {
23437c478bd9Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
23447c478bd9Sstevel@tonic-gate }
23457c478bd9Sstevel@tonic-gate 
23467c478bd9Sstevel@tonic-gate rdma_stat
23477c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
23487c478bd9Sstevel@tonic-gate {
23497c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
23507c478bd9Sstevel@tonic-gate 	struct clist	*clp = cl;
23517c478bd9Sstevel@tonic-gate 	struct reply	*rep;
23527c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
23537c478bd9Sstevel@tonic-gate 	int		nds;
23547c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
23557c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
23567c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
23577c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
23587c478bd9Sstevel@tonic-gate 
23597c478bd9Sstevel@tonic-gate 	/*
23607c478bd9Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
23617c478bd9Sstevel@tonic-gate 	 */
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 	nds = 0;
23647c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
23657c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
23667c478bd9Sstevel@tonic-gate 			ret = RDMA_FAILED;
23677c478bd9Sstevel@tonic-gate 			goto done;
23687c478bd9Sstevel@tonic-gate 		}
23690a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
23707c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
23717c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
23727c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
23737c478bd9Sstevel@tonic-gate 		nds++;
23747c478bd9Sstevel@tonic-gate 	}
23757c478bd9Sstevel@tonic-gate 
23767c478bd9Sstevel@tonic-gate 	if (nds != 1) {
23777c478bd9Sstevel@tonic-gate 		ret = RDMA_FAILED;
23787c478bd9Sstevel@tonic-gate 		goto done;
23797c478bd9Sstevel@tonic-gate 	}
23800a701b1eSRobert Gordon 
23817c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
23827c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
23837c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
23847c478bd9Sstevel@tonic-gate 
23857c478bd9Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
23867c478bd9Sstevel@tonic-gate 	if (rwid) {
238711606941Sjwahlig 		recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid;
23887c478bd9Sstevel@tonic-gate 	} else {
23897c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
23907c478bd9Sstevel@tonic-gate 		goto done;
23917c478bd9Sstevel@tonic-gate 	}
23927c478bd9Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
23937c478bd9Sstevel@tonic-gate 	if (!rep) {
23947c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
23957c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
23967c478bd9Sstevel@tonic-gate 		goto done;
23977c478bd9Sstevel@tonic-gate 	}
23987c478bd9Sstevel@tonic-gate 
23997c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
24000a701b1eSRobert Gordon 
24010a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
24027c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
24037c478bd9Sstevel@tonic-gate 	}
24040a701b1eSRobert Gordon 
24050a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
24067c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
24070a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
24080a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
24097c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
24107c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
24117c478bd9Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
24120a701b1eSRobert Gordon 		ret = RDMA_CONNLOST;
24137c478bd9Sstevel@tonic-gate 		goto done;
24147c478bd9Sstevel@tonic-gate 	}
24157c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
24167c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
24177c478bd9Sstevel@tonic-gate 
24187c478bd9Sstevel@tonic-gate done:
24197c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
24200a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
24210a701b1eSRobert Gordon 		    (void *)(uintptr_t)clp->w.c_saddr3);
24227c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
24237c478bd9Sstevel@tonic-gate 	}
24247c478bd9Sstevel@tonic-gate 	return (ret);
24257c478bd9Sstevel@tonic-gate }
24267c478bd9Sstevel@tonic-gate 
24277c478bd9Sstevel@tonic-gate rdma_stat
24287c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
24297c478bd9Sstevel@tonic-gate {
24307c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
24317c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
24327c478bd9Sstevel@tonic-gate 	int		nds;
24337c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
24347c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
24357c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
24367c478bd9Sstevel@tonic-gate 
24377c478bd9Sstevel@tonic-gate 	nds = 0;
24387c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
24397c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
24407c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
24417c478bd9Sstevel@tonic-gate 		}
24420a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
24437c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
24447c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
24457c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
24467c478bd9Sstevel@tonic-gate 		nds++;
24477c478bd9Sstevel@tonic-gate 	}
24487c478bd9Sstevel@tonic-gate 
24497c478bd9Sstevel@tonic-gate 	if (nds != 1) {
24500a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
24510a701b1eSRobert Gordon 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
24520a701b1eSRobert Gordon 
24537c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
24547c478bd9Sstevel@tonic-gate 	}
24550a701b1eSRobert Gordon 
24567c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
24577c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
24587c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
24597c478bd9Sstevel@tonic-gate 
24607c478bd9Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
246111606941Sjwahlig 	/* Use s_recvp's addr as wr id */
246211606941Sjwahlig 	recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp;
24637c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
24640a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
24657c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
24667c478bd9Sstevel@tonic-gate 	}
24670a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
24687c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
24690a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
24700a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
24717c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
247211606941Sjwahlig 		rib_rbuf_free(conn, RECV_BUFFER,
247311606941Sjwahlig 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
24747c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
24750a701b1eSRobert Gordon 
24760a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
24777c478bd9Sstevel@tonic-gate 	}
24787c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
24797c478bd9Sstevel@tonic-gate 
24807c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
24817c478bd9Sstevel@tonic-gate }
24827c478bd9Sstevel@tonic-gate 
24837c478bd9Sstevel@tonic-gate /* Client */
24847c478bd9Sstevel@tonic-gate rdma_stat
24857c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
24867c478bd9Sstevel@tonic-gate {
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
24897c478bd9Sstevel@tonic-gate }
24907c478bd9Sstevel@tonic-gate 
24910a701b1eSRobert Gordon /* Client */
24920a701b1eSRobert Gordon rdma_stat
24930a701b1eSRobert Gordon rib_post_resp_remove(CONN* conn, uint32_t msgid)
24940a701b1eSRobert Gordon {
24950a701b1eSRobert Gordon 	rib_qp_t	*qp = ctoqp(conn);
24960a701b1eSRobert Gordon 	struct reply	*rep;
24970a701b1eSRobert Gordon 
24980a701b1eSRobert Gordon 	mutex_enter(&qp->replylist_lock);
24990a701b1eSRobert Gordon 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
25000a701b1eSRobert Gordon 		if (rep->xid == msgid) {
25010a701b1eSRobert Gordon 			if (rep->vaddr_cq) {
25020a701b1eSRobert Gordon 				rib_rbuf_free(conn, RECV_BUFFER,
25030a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
25040a701b1eSRobert Gordon 			}
25050a701b1eSRobert Gordon 			(void) rib_remreply(qp, rep);
25060a701b1eSRobert Gordon 			break;
25070a701b1eSRobert Gordon 		}
25080a701b1eSRobert Gordon 	}
25090a701b1eSRobert Gordon 	mutex_exit(&qp->replylist_lock);
25100a701b1eSRobert Gordon 
25110a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
25120a701b1eSRobert Gordon }
25130a701b1eSRobert Gordon 
25147c478bd9Sstevel@tonic-gate /* Server */
25157c478bd9Sstevel@tonic-gate rdma_stat
25167c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
25177c478bd9Sstevel@tonic-gate {
25187c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25197c478bd9Sstevel@tonic-gate 
25207c478bd9Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
25217c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
25227c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs++;
25237c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
25247c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
25257c478bd9Sstevel@tonic-gate 	}
25267c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
25277c478bd9Sstevel@tonic-gate }
25287c478bd9Sstevel@tonic-gate 
25297c478bd9Sstevel@tonic-gate /*
25307c478bd9Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
25317c478bd9Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
25327c478bd9Sstevel@tonic-gate  */
25337c478bd9Sstevel@tonic-gate rdma_stat
25347c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
25357c478bd9Sstevel@tonic-gate {
25367c478bd9Sstevel@tonic-gate 	struct reply *rep = NULL;
25377c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
25387c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
25397c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
25407c478bd9Sstevel@tonic-gate 
25417c478bd9Sstevel@tonic-gate 	/*
25427c478bd9Sstevel@tonic-gate 	 * Find the reply structure for this msgid
25437c478bd9Sstevel@tonic-gate 	 */
25447c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
25457c478bd9Sstevel@tonic-gate 
25467c478bd9Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
25477c478bd9Sstevel@tonic-gate 		if (rep->xid == msgid)
25487c478bd9Sstevel@tonic-gate 			break;
25497c478bd9Sstevel@tonic-gate 	}
25500a701b1eSRobert Gordon 
25517c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
25527c478bd9Sstevel@tonic-gate 		/*
25537c478bd9Sstevel@tonic-gate 		 * If message not yet received, wait.
25547c478bd9Sstevel@tonic-gate 		 */
25557c478bd9Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
25567c478bd9Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
25577c478bd9Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
25580a701b1eSRobert Gordon 
25597c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
25607c478bd9Sstevel@tonic-gate 			    &qp->replylist_lock, timout)) > 0 &&
25610a701b1eSRobert Gordon 			    rep->status == (uint_t)REPLY_WAIT)
25620a701b1eSRobert Gordon 				;
25637c478bd9Sstevel@tonic-gate 
25647c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
25657c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
25667c478bd9Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
25677c478bd9Sstevel@tonic-gate 				break;
25687c478bd9Sstevel@tonic-gate 			case 0:
25697c478bd9Sstevel@tonic-gate 				ret = RDMA_INTR;
25707c478bd9Sstevel@tonic-gate 				break;
25717c478bd9Sstevel@tonic-gate 			default:
25727c478bd9Sstevel@tonic-gate 				break;
25737c478bd9Sstevel@tonic-gate 			}
25747c478bd9Sstevel@tonic-gate 		}
25757c478bd9Sstevel@tonic-gate 
25767c478bd9Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
25777c478bd9Sstevel@tonic-gate 			struct clist *cl = NULL;
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate 			/*
25807c478bd9Sstevel@tonic-gate 			 * Got message successfully
25817c478bd9Sstevel@tonic-gate 			 */
25827c478bd9Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
258311606941Sjwahlig 			    (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL);
25847c478bd9Sstevel@tonic-gate 			*clp = cl;
25857c478bd9Sstevel@tonic-gate 		} else {
25867c478bd9Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
25877c478bd9Sstevel@tonic-gate 				/*
25887c478bd9Sstevel@tonic-gate 				 * Got error in reply message. Free
25897c478bd9Sstevel@tonic-gate 				 * recv buffer here.
25907c478bd9Sstevel@tonic-gate 				 */
25917c478bd9Sstevel@tonic-gate 				ret = rep->status;
25927c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
259311606941Sjwahlig 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
25947c478bd9Sstevel@tonic-gate 			}
25957c478bd9Sstevel@tonic-gate 		}
25967c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
25977c478bd9Sstevel@tonic-gate 	} else {
25987c478bd9Sstevel@tonic-gate 		/*
25997c478bd9Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
26007c478bd9Sstevel@tonic-gate 		 * reply wait list.
26017c478bd9Sstevel@tonic-gate 		 */
26027c478bd9Sstevel@tonic-gate 		ret = RDMA_INVAL;
26030a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__nomatchxid2);
26047c478bd9Sstevel@tonic-gate 	}
26057c478bd9Sstevel@tonic-gate 
26067c478bd9Sstevel@tonic-gate 	/*
26077c478bd9Sstevel@tonic-gate 	 * Done.
26087c478bd9Sstevel@tonic-gate 	 */
26097c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
26107c478bd9Sstevel@tonic-gate 	return (ret);
26117c478bd9Sstevel@tonic-gate }
26127c478bd9Sstevel@tonic-gate 
26137c478bd9Sstevel@tonic-gate /*
26147c478bd9Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
26157c478bd9Sstevel@tonic-gate  */
26167c478bd9Sstevel@tonic-gate rdma_stat
26177c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
26187c478bd9Sstevel@tonic-gate {
26197c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
26207c478bd9Sstevel@tonic-gate 	int		cv_sig;
26217c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
26227c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
26237c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
26247c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
26257c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
26260a701b1eSRobert Gordon 	uint64_t	n_writes = 0;
26277c478bd9Sstevel@tonic-gate 
26287c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
26297c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
26307c478bd9Sstevel@tonic-gate 	}
26317c478bd9Sstevel@tonic-gate 
26320a701b1eSRobert Gordon 	while ((cl != NULL)) {
26330a701b1eSRobert Gordon 		if (cl->c_len > 0) {
26347c478bd9Sstevel@tonic-gate 			bzero(&tx_wr, sizeof (ibt_send_wr_t));
26350a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr;
26360a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_rkey =
26370a701b1eSRobert Gordon 			    cl->c_dmemhandle.mrc_rmr; /* rkey */
26380a701b1eSRobert Gordon 			sgl[0].ds_va = cl->w.c_saddr;
26390a701b1eSRobert Gordon 			sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
26400a701b1eSRobert Gordon 			sgl[0].ds_len = cl->c_len;
26417c478bd9Sstevel@tonic-gate 
26427c478bd9Sstevel@tonic-gate 			if (wait) {
26437c478bd9Sstevel@tonic-gate 				cv_sig = 1;
26447c478bd9Sstevel@tonic-gate 			} else {
26450a701b1eSRobert Gordon 				if (n_writes > max_unsignaled_rws) {
26460a701b1eSRobert Gordon 					n_writes = 0;
26470a701b1eSRobert Gordon 					cv_sig = 1;
26480a701b1eSRobert Gordon 				} else {
26497c478bd9Sstevel@tonic-gate 					cv_sig = 0;
26507c478bd9Sstevel@tonic-gate 				}
26510a701b1eSRobert Gordon 			}
26527c478bd9Sstevel@tonic-gate 
2653*065714dcSSiddheshwar Mahesh 			if (cv_sig) {
2654*065714dcSSiddheshwar Mahesh 				tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
26557c478bd9Sstevel@tonic-gate 				wdesc = rib_init_sendwait(0, cv_sig, qp);
265611606941Sjwahlig 				tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2657*065714dcSSiddheshwar Mahesh 				mutex_enter(&wdesc->sendwait_lock);
2658*065714dcSSiddheshwar Mahesh 			} else {
2659*065714dcSSiddheshwar Mahesh 				tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2660*065714dcSSiddheshwar Mahesh 				tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
2661*065714dcSSiddheshwar Mahesh 			}
26627c478bd9Sstevel@tonic-gate 			tx_wr.wr_opcode = IBT_WRC_RDMAW;
26637c478bd9Sstevel@tonic-gate 			tx_wr.wr_trans = IBT_RC_SRV;
26640a701b1eSRobert Gordon 			tx_wr.wr_nds = 1;
26657c478bd9Sstevel@tonic-gate 			tx_wr.wr_sgl = sgl;
26667c478bd9Sstevel@tonic-gate 
26677c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
26680a701b1eSRobert Gordon 			if (conn->c_state == C_CONNECTED) {
26690a701b1eSRobert Gordon 				ibt_status =
26700a701b1eSRobert Gordon 				    ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
26717c478bd9Sstevel@tonic-gate 			}
26720a701b1eSRobert Gordon 			if (conn->c_state != C_CONNECTED ||
26737c478bd9Sstevel@tonic-gate 			    ibt_status != IBT_SUCCESS) {
26740a701b1eSRobert Gordon 				if (conn->c_state != C_DISCONN_PEND)
26750a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
26767c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
2677*065714dcSSiddheshwar Mahesh 				if (cv_sig) {
2678*065714dcSSiddheshwar Mahesh 					mutex_exit(&wdesc->sendwait_lock);
26797c478bd9Sstevel@tonic-gate 					(void) rib_free_sendwait(wdesc);
2680*065714dcSSiddheshwar Mahesh 				}
26810a701b1eSRobert Gordon 				return (RDMA_CONNLOST);
26827c478bd9Sstevel@tonic-gate 			}
2683*065714dcSSiddheshwar Mahesh 
26847c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
26857c478bd9Sstevel@tonic-gate 
26867c478bd9Sstevel@tonic-gate 			/*
26877c478bd9Sstevel@tonic-gate 			 * Wait for send to complete
26887c478bd9Sstevel@tonic-gate 			 */
2689*065714dcSSiddheshwar Mahesh 			if (cv_sig) {
2690*065714dcSSiddheshwar Mahesh 
2691*065714dcSSiddheshwar Mahesh 				rib_send_hold(qp);
26920a701b1eSRobert Gordon 				mutex_exit(&wdesc->sendwait_lock);
2693*065714dcSSiddheshwar Mahesh 
2694*065714dcSSiddheshwar Mahesh 				ret = rib_sendwait(qp, wdesc);
2695*065714dcSSiddheshwar Mahesh 				if (ret != 0)
2696*065714dcSSiddheshwar Mahesh 					return (ret);
26970a701b1eSRobert Gordon 			}
26980a701b1eSRobert Gordon 			n_writes ++;
26990a701b1eSRobert Gordon 		}
27000a701b1eSRobert Gordon 		cl = cl->c_next;
27017c478bd9Sstevel@tonic-gate 	}
27027c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
27037c478bd9Sstevel@tonic-gate }
27047c478bd9Sstevel@tonic-gate 
27057c478bd9Sstevel@tonic-gate /*
27067c478bd9Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
27077c478bd9Sstevel@tonic-gate  */
27087c478bd9Sstevel@tonic-gate rdma_stat
27097c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
27107c478bd9Sstevel@tonic-gate {
27117c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
2712*065714dcSSiddheshwar Mahesh 	int		cv_sig = 0;
27130a701b1eSRobert Gordon 	ibt_wr_ds_t	sgl;
27147c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
27157c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
27167c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
27177c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
27187c478bd9Sstevel@tonic-gate 
27197c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
27207c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
27217c478bd9Sstevel@tonic-gate 	}
27227c478bd9Sstevel@tonic-gate 
27230a701b1eSRobert Gordon 	while (cl != NULL) {
27247c478bd9Sstevel@tonic-gate 		bzero(&rx_wr, sizeof (ibt_send_wr_t));
27257c478bd9Sstevel@tonic-gate 		/*
27267c478bd9Sstevel@tonic-gate 		 * Remote address is at the head chunk item in list.
27277c478bd9Sstevel@tonic-gate 		 */
27280a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr;
27290a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr;
27307c478bd9Sstevel@tonic-gate 
27310a701b1eSRobert Gordon 		sgl.ds_va = cl->u.c_daddr;
27320a701b1eSRobert Gordon 		sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
27330a701b1eSRobert Gordon 		sgl.ds_len = cl->c_len;
27347c478bd9Sstevel@tonic-gate 
2735*065714dcSSiddheshwar Mahesh 		/*
2736*065714dcSSiddheshwar Mahesh 		 * If there are multiple chunks to be read, and
2737*065714dcSSiddheshwar Mahesh 		 * wait is set, ask for signal only for the last chunk
2738*065714dcSSiddheshwar Mahesh 		 * and wait only on the last chunk. The completion of
2739*065714dcSSiddheshwar Mahesh 		 * RDMA_READ on last chunk ensures that reads on all
2740*065714dcSSiddheshwar Mahesh 		 * previous chunks are also completed.
2741*065714dcSSiddheshwar Mahesh 		 */
2742*065714dcSSiddheshwar Mahesh 		if (wait && (cl->c_next == NULL)) {
27437c478bd9Sstevel@tonic-gate 			cv_sig = 1;
2744*065714dcSSiddheshwar Mahesh 			wdesc = rib_init_sendwait(0, cv_sig, qp);
2745*065714dcSSiddheshwar Mahesh 			rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2746*065714dcSSiddheshwar Mahesh 			rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2747*065714dcSSiddheshwar Mahesh 			mutex_enter(&wdesc->sendwait_lock);
27487c478bd9Sstevel@tonic-gate 		} else {
27497c478bd9Sstevel@tonic-gate 			rx_wr.wr_flags = IBT_WR_NO_FLAGS;
2750*065714dcSSiddheshwar Mahesh 			rx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
27517c478bd9Sstevel@tonic-gate 		}
27527c478bd9Sstevel@tonic-gate 		rx_wr.wr_opcode = IBT_WRC_RDMAR;
27537c478bd9Sstevel@tonic-gate 		rx_wr.wr_trans = IBT_RC_SRV;
27540a701b1eSRobert Gordon 		rx_wr.wr_nds = 1;
27550a701b1eSRobert Gordon 		rx_wr.wr_sgl = &sgl;
27567c478bd9Sstevel@tonic-gate 
27577c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
27580a701b1eSRobert Gordon 		if (conn->c_state == C_CONNECTED) {
27597c478bd9Sstevel@tonic-gate 			ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
27607c478bd9Sstevel@tonic-gate 		}
27610a701b1eSRobert Gordon 		if (conn->c_state != C_CONNECTED ||
27627c478bd9Sstevel@tonic-gate 		    ibt_status != IBT_SUCCESS) {
27630a701b1eSRobert Gordon 			if (conn->c_state != C_DISCONN_PEND)
27640a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
27657c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
2766*065714dcSSiddheshwar Mahesh 			if (wait && (cl->c_next == NULL)) {
2767*065714dcSSiddheshwar Mahesh 				mutex_exit(&wdesc->sendwait_lock);
27687c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wdesc);
2769*065714dcSSiddheshwar Mahesh 			}
27700a701b1eSRobert Gordon 			return (RDMA_CONNLOST);
27717c478bd9Sstevel@tonic-gate 		}
2772*065714dcSSiddheshwar Mahesh 
27737c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
27747c478bd9Sstevel@tonic-gate 
27757c478bd9Sstevel@tonic-gate 		/*
27760a701b1eSRobert Gordon 		 * Wait for send to complete if this is the
27770a701b1eSRobert Gordon 		 * last item in the list.
27787c478bd9Sstevel@tonic-gate 		 */
27790a701b1eSRobert Gordon 		if (wait && cl->c_next == NULL) {
2780*065714dcSSiddheshwar Mahesh 			rib_send_hold(qp);
27810a701b1eSRobert Gordon 			mutex_exit(&wdesc->sendwait_lock);
2782*065714dcSSiddheshwar Mahesh 
2783*065714dcSSiddheshwar Mahesh 			ret = rib_sendwait(qp, wdesc);
2784*065714dcSSiddheshwar Mahesh 
2785*065714dcSSiddheshwar Mahesh 			if (ret != 0)
2786*065714dcSSiddheshwar Mahesh 				return (ret);
27870a701b1eSRobert Gordon 		}
27880a701b1eSRobert Gordon 		cl = cl->c_next;
27890a701b1eSRobert Gordon 	}
27907c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
27917c478bd9Sstevel@tonic-gate }
27927c478bd9Sstevel@tonic-gate 
27937c478bd9Sstevel@tonic-gate /*
27947c478bd9Sstevel@tonic-gate  * rib_srv_cm_handler()
27957c478bd9Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
27967c478bd9Sstevel@tonic-gate  */
27977c478bd9Sstevel@tonic-gate /* ARGSUSED */
27987c478bd9Sstevel@tonic-gate static ibt_cm_status_t
27997c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
28007c478bd9Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
28017c478bd9Sstevel@tonic-gate 	ibt_priv_data_len_t len)
28027c478bd9Sstevel@tonic-gate {
28037c478bd9Sstevel@tonic-gate 	queue_t		*q;
28047c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
28057c478bd9Sstevel@tonic-gate 	rpcib_state_t	*ribstat;
28067c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
28077c478bd9Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
28087c478bd9Sstevel@tonic-gate 	int		i;
28097c478bd9Sstevel@tonic-gate 	struct clist	cl;
28100a701b1eSRobert Gordon 	rdma_buf_t	rdbuf = {0};
28117c478bd9Sstevel@tonic-gate 	void		*buf = NULL;
28127c478bd9Sstevel@tonic-gate 	CONN		*conn;
28130a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipinfo;
28140a701b1eSRobert Gordon 	struct sockaddr_in *s;
28150a701b1eSRobert Gordon 	struct sockaddr_in6 *s6;
28160a701b1eSRobert Gordon 	int sin_size = sizeof (struct sockaddr_in);
28170a701b1eSRobert Gordon 	int in_size = sizeof (struct in_addr);
28180a701b1eSRobert Gordon 	int sin6_size = sizeof (struct sockaddr_in6);
28197c478bd9Sstevel@tonic-gate 
28207c478bd9Sstevel@tonic-gate 	ASSERT(any != NULL);
28217c478bd9Sstevel@tonic-gate 	ASSERT(event != NULL);
28227c478bd9Sstevel@tonic-gate 
28237c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)any;
28247c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
28257c478bd9Sstevel@tonic-gate 	ASSERT(hca != NULL);
28267c478bd9Sstevel@tonic-gate 
28277c478bd9Sstevel@tonic-gate 	/* got a connection request */
28287c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
28297c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
28307c478bd9Sstevel@tonic-gate 		/*
28317c478bd9Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
28327c478bd9Sstevel@tonic-gate 		 */
28337c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
28347c478bd9Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
28357c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
28367c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28377c478bd9Sstevel@tonic-gate 		}
28387c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
28397c478bd9Sstevel@tonic-gate 
28407c478bd9Sstevel@tonic-gate 		/*
28417c478bd9Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
28427c478bd9Sstevel@tonic-gate 		 * timeout on us.
28437c478bd9Sstevel@tonic-gate 		 */
28447c478bd9Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
28457c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_timeout * 8, NULL, 0);
28467c478bd9Sstevel@tonic-gate 
28477c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
28487c478bd9Sstevel@tonic-gate 		q = rib_stat->q;
28497c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
28500a701b1eSRobert Gordon 
28517c478bd9Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
28527c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_prim_hca_port, &qp);
28530a701b1eSRobert Gordon 
28547c478bd9Sstevel@tonic-gate 		if (status) {
28557c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28567c478bd9Sstevel@tonic-gate 		}
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
28590a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
28600a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
28617c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate 		/*
28647c478bd9Sstevel@tonic-gate 		 * Pre-posts RECV buffers
28657c478bd9Sstevel@tonic-gate 		 */
28667c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
28677c478bd9Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
28687c478bd9Sstevel@tonic-gate 			bzero(&rdbuf, sizeof (rdbuf));
28697c478bd9Sstevel@tonic-gate 			rdbuf.type = RECV_BUFFER;
28707c478bd9Sstevel@tonic-gate 			buf = rib_rbuf_alloc(conn, &rdbuf);
28717c478bd9Sstevel@tonic-gate 			if (buf == NULL) {
2872*065714dcSSiddheshwar Mahesh 				/*
2873*065714dcSSiddheshwar Mahesh 				 * A connection is not established yet.
2874*065714dcSSiddheshwar Mahesh 				 * Just flush the channel. Buffers
2875*065714dcSSiddheshwar Mahesh 				 * posted till now will error out with
2876*065714dcSSiddheshwar Mahesh 				 * IBT_WC_WR_FLUSHED_ERR.
2877*065714dcSSiddheshwar Mahesh 				 */
2878*065714dcSSiddheshwar Mahesh 				(void) ibt_flush_channel(qp->qp_hdl);
28797c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
28807c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
28817c478bd9Sstevel@tonic-gate 			}
28827c478bd9Sstevel@tonic-gate 
28837c478bd9Sstevel@tonic-gate 			bzero(&cl, sizeof (cl));
28840a701b1eSRobert Gordon 			cl.w.c_saddr3 = (caddr_t)rdbuf.addr;
28857c478bd9Sstevel@tonic-gate 			cl.c_len = rdbuf.len;
28860a701b1eSRobert Gordon 			cl.c_smemhandle.mrc_lmr =
28870a701b1eSRobert Gordon 			    rdbuf.handle.mrc_lmr; /* lkey */
28887c478bd9Sstevel@tonic-gate 			cl.c_next = NULL;
28897c478bd9Sstevel@tonic-gate 			status = rib_post_recv(conn, &cl);
28907c478bd9Sstevel@tonic-gate 			if (status != RDMA_SUCCESS) {
2891*065714dcSSiddheshwar Mahesh 				/*
2892*065714dcSSiddheshwar Mahesh 				 * A connection is not established yet.
2893*065714dcSSiddheshwar Mahesh 				 * Just flush the channel. Buffers
2894*065714dcSSiddheshwar Mahesh 				 * posted till now will error out with
2895*065714dcSSiddheshwar Mahesh 				 * IBT_WC_WR_FLUSHED_ERR.
2896*065714dcSSiddheshwar Mahesh 				 */
2897*065714dcSSiddheshwar Mahesh 				(void) ibt_flush_channel(qp->qp_hdl);
28987c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
28997c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
29007c478bd9Sstevel@tonic-gate 			}
29017c478bd9Sstevel@tonic-gate 		}
29027c478bd9Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
29037c478bd9Sstevel@tonic-gate 
29047c478bd9Sstevel@tonic-gate 		/*
29050a701b1eSRobert Gordon 		 * Get the address translation
29067c478bd9Sstevel@tonic-gate 		 */
29077c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
29087c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
29097c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
29107c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
29117c478bd9Sstevel@tonic-gate 		}
29127c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
29137c478bd9Sstevel@tonic-gate 
29140a701b1eSRobert Gordon 		bzero(&ipinfo, sizeof (ibt_ip_cm_info_t));
29157c478bd9Sstevel@tonic-gate 
29160a701b1eSRobert Gordon 		if (ibt_get_ip_data(event->cm_priv_data_len,
29170a701b1eSRobert Gordon 		    event->cm_priv_data,
29180a701b1eSRobert Gordon 		    &ipinfo) != IBT_SUCCESS) {
29190a701b1eSRobert Gordon 
29200a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
29210a701b1eSRobert Gordon 		}
29220a701b1eSRobert Gordon 
29230a701b1eSRobert Gordon 		switch (ipinfo.src_addr.family) {
29240a701b1eSRobert Gordon 		case AF_INET:
29257c478bd9Sstevel@tonic-gate 
29267c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
29277c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin_size;
29280a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
29290a701b1eSRobert Gordon 
29307c478bd9Sstevel@tonic-gate 			s = (struct sockaddr_in *)conn->c_raddr.buf;
29317c478bd9Sstevel@tonic-gate 			s->sin_family = AF_INET;
29327c478bd9Sstevel@tonic-gate 
29330a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip4addr,
29340a701b1eSRobert Gordon 			    &s->sin_addr, in_size);
29350a701b1eSRobert Gordon 
29360a701b1eSRobert Gordon 			break;
29370a701b1eSRobert Gordon 
29380a701b1eSRobert Gordon 		case AF_INET6:
29397c478bd9Sstevel@tonic-gate 
29407c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
29417c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin6_size;
29420a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
29437c478bd9Sstevel@tonic-gate 
29447c478bd9Sstevel@tonic-gate 			s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
29457c478bd9Sstevel@tonic-gate 			s6->sin6_family = AF_INET6;
29460a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip6addr,
29470a701b1eSRobert Gordon 			    &s6->sin6_addr,
29487c478bd9Sstevel@tonic-gate 			    sizeof (struct in6_addr));
29497c478bd9Sstevel@tonic-gate 
29500a701b1eSRobert Gordon 			break;
29510a701b1eSRobert Gordon 
29520a701b1eSRobert Gordon 		default:
29530a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
29547c478bd9Sstevel@tonic-gate 		}
29550a701b1eSRobert Gordon 
29567c478bd9Sstevel@tonic-gate 		break;
29577c478bd9Sstevel@tonic-gate 
29587c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
29597c478bd9Sstevel@tonic-gate 	{
29607c478bd9Sstevel@tonic-gate 		CONN		*conn;
29617c478bd9Sstevel@tonic-gate 		rib_qp_t	*qp;
29627c478bd9Sstevel@tonic-gate 
29637c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
29647c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
29657c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
29667c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
29677c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
29687c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
29697c478bd9Sstevel@tonic-gate 			/*
29707c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
29717c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
29727c478bd9Sstevel@tonic-gate 			 */
29737c478bd9Sstevel@tonic-gate 			break;
29747c478bd9Sstevel@tonic-gate 		default:
29757c478bd9Sstevel@tonic-gate 			/*
29767c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
29777c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
29787c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
29797c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
29807c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
29817c478bd9Sstevel@tonic-gate 			 * state
29827c478bd9Sstevel@tonic-gate 			 */
29837c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
29847c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
29857c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
29867c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
29877c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29887c478bd9Sstevel@tonic-gate 				break;
29897c478bd9Sstevel@tonic-gate 			}
29900a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
29917c478bd9Sstevel@tonic-gate 
29927c478bd9Sstevel@tonic-gate 			/*
29937c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
29947c478bd9Sstevel@tonic-gate 			 */
29957c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
29967c478bd9Sstevel@tonic-gate 				/*
29977c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
29987c478bd9Sstevel@tonic-gate 				 */
29997c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
30007c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
30017c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
30027c478bd9Sstevel@tonic-gate 				    &hca->srv_conn_list);
30037c478bd9Sstevel@tonic-gate 			} else {
3004*065714dcSSiddheshwar Mahesh 				/*
3005*065714dcSSiddheshwar Mahesh 				 * conn will be freed when c_ref goes to 0.
3006*065714dcSSiddheshwar Mahesh 				 * Indicate to cleaning thread not to close
3007*065714dcSSiddheshwar Mahesh 				 * the connection, but just free the channel.
3008*065714dcSSiddheshwar Mahesh 				 */
3009*065714dcSSiddheshwar Mahesh 				conn->c_flags |= C_CLOSE_NOTNEEDED;
30107c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
30117c478bd9Sstevel@tonic-gate 			}
30120a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__srvcm_chandisconnect);
30137c478bd9Sstevel@tonic-gate 			break;
30147c478bd9Sstevel@tonic-gate 		}
30157c478bd9Sstevel@tonic-gate 		break;
30167c478bd9Sstevel@tonic-gate 	}
30177c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
30187c478bd9Sstevel@tonic-gate 		/*
30197c478bd9Sstevel@tonic-gate 		 * RTU received, hence connection established.
30207c478bd9Sstevel@tonic-gate 		 */
30217c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
30227c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30237c478bd9Sstevel@tonic-gate 			    "(CONN_EST) channel established");
30247c478bd9Sstevel@tonic-gate 		break;
30257c478bd9Sstevel@tonic-gate 
30267c478bd9Sstevel@tonic-gate 	default:
30277c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
30287c478bd9Sstevel@tonic-gate 			/* Let CM handle the following events. */
30297c478bd9Sstevel@tonic-gate 			if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
30307c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30317c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
30327c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
30337c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30347c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
30357c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
30367c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30377c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
30387c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
30397c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30407c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
30417c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
30427c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30437c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_FAILURE\n");
30447c478bd9Sstevel@tonic-gate 			}
30457c478bd9Sstevel@tonic-gate 		}
30460a701b1eSRobert Gordon 		return (IBT_CM_DEFAULT);
30477c478bd9Sstevel@tonic-gate 	}
30487c478bd9Sstevel@tonic-gate 
30497c478bd9Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
30507c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
30517c478bd9Sstevel@tonic-gate }
30527c478bd9Sstevel@tonic-gate 
30537c478bd9Sstevel@tonic-gate static rdma_stat
30547c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type)
30557c478bd9Sstevel@tonic-gate {
30567c478bd9Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
30577c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
30587c478bd9Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
30597c478bd9Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
30607c478bd9Sstevel@tonic-gate 	uint_t			port_size;
30610a701b1eSRobert Gordon 	uint_t			pki, i, num_ports, nbinds;
30627c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
30630a701b1eSRobert Gordon 	rib_service_t		*new_service;
30647c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
30657c478bd9Sstevel@tonic-gate 
30667c478bd9Sstevel@tonic-gate 	/*
30677c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
30687c478bd9Sstevel@tonic-gate 	 */
30697c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
30707c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
30717c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
30727c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
30737c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30747c478bd9Sstevel@tonic-gate 	} else {
30757c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30767c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30777c478bd9Sstevel@tonic-gate 	}
30787c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
30797c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30807c478bd9Sstevel@tonic-gate 	}
30817c478bd9Sstevel@tonic-gate 
30820a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__regservice_numports,
30830a701b1eSRobert Gordon 	    int, num_ports);
30847c478bd9Sstevel@tonic-gate 
30857c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
30867c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
30870a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portinactive,
30880a701b1eSRobert Gordon 			    int, i+1);
30890a701b1eSRobert Gordon 		} else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) {
30900a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portactive,
30910a701b1eSRobert Gordon 			    int, i+1);
30927c478bd9Sstevel@tonic-gate 		}
30937c478bd9Sstevel@tonic-gate 	}
30940a701b1eSRobert Gordon 
30957c478bd9Sstevel@tonic-gate 	/*
30967c478bd9Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
30977c478bd9Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
30987c478bd9Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
30997c478bd9Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
31007c478bd9Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
31017c478bd9Sstevel@tonic-gate 	 */
31027c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
31037c478bd9Sstevel@tonic-gate 	/*
31047c478bd9Sstevel@tonic-gate 	 * Start registering and binding service to active
31057c478bd9Sstevel@tonic-gate 	 * on active ports on this HCA.
31067c478bd9Sstevel@tonic-gate 	 */
31077c478bd9Sstevel@tonic-gate 	nbinds = 0;
31087c478bd9Sstevel@tonic-gate 	new_service = NULL;
31097c478bd9Sstevel@tonic-gate 
31107c478bd9Sstevel@tonic-gate 	/*
31117c478bd9Sstevel@tonic-gate 	 * We use IP addresses as the service names for
31127c478bd9Sstevel@tonic-gate 	 * service registration.  Register each of them
31137c478bd9Sstevel@tonic-gate 	 * with CM to obtain a svc_id and svc_hdl.  We do not
31147c478bd9Sstevel@tonic-gate 	 * register the service with machine's loopback address.
31157c478bd9Sstevel@tonic-gate 	 */
31167c478bd9Sstevel@tonic-gate 	(void) bzero(&srv_id, sizeof (ib_svc_id_t));
31177c478bd9Sstevel@tonic-gate 	(void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
31187c478bd9Sstevel@tonic-gate 	(void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
31197c478bd9Sstevel@tonic-gate 
31207c478bd9Sstevel@tonic-gate 	sdesc.sd_handler = rib_srv_cm_handler;
31217c478bd9Sstevel@tonic-gate 	sdesc.sd_flags = 0;
31227c478bd9Sstevel@tonic-gate 	ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
3123f837ee4aSSiddheshwar Mahesh 	    &sdesc, ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port),
31240a701b1eSRobert Gordon 	    1, &srv_hdl, &srv_id);
31250a701b1eSRobert Gordon 
31267c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
31277c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
31287c478bd9Sstevel@tonic-gate 			continue;
31297c478bd9Sstevel@tonic-gate 
31307c478bd9Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
31317c478bd9Sstevel@tonic-gate 			pkey = port_infop[i].p_pkey_tbl[pki];
31320a701b1eSRobert Gordon 			if ((pkey & IBSRM_HB) &&
31330a701b1eSRobert Gordon 			    (pkey != IB_PKEY_INVALID_FULL)) {
31347c478bd9Sstevel@tonic-gate 
31357c478bd9Sstevel@tonic-gate 				/*
31367c478bd9Sstevel@tonic-gate 				 * Allocate and prepare a service entry
31377c478bd9Sstevel@tonic-gate 				 */
31380a701b1eSRobert Gordon 				new_service =
31390a701b1eSRobert Gordon 				    kmem_zalloc(1 * sizeof (rib_service_t),
31407c478bd9Sstevel@tonic-gate 				    KM_SLEEP);
31417c478bd9Sstevel@tonic-gate 
31420a701b1eSRobert Gordon 				new_service->srv_type = service_type;
31430a701b1eSRobert Gordon 				new_service->srv_hdl = srv_hdl;
31447c478bd9Sstevel@tonic-gate 				new_service->srv_next = NULL;
31457c478bd9Sstevel@tonic-gate 
31467c478bd9Sstevel@tonic-gate 				ibt_status = ibt_bind_service(srv_hdl,
31470a701b1eSRobert Gordon 				    port_infop[i].p_sgid_tbl[0],
31480a701b1eSRobert Gordon 				    NULL, rib_stat, NULL);
31490a701b1eSRobert Gordon 
31500a701b1eSRobert Gordon 				DTRACE_PROBE1(rpcib__i__regservice__bindres,
31510a701b1eSRobert Gordon 				    int, ibt_status);
31520a701b1eSRobert Gordon 
31537c478bd9Sstevel@tonic-gate 				if (ibt_status != IBT_SUCCESS) {
31547c478bd9Sstevel@tonic-gate 					kmem_free(new_service,
31557c478bd9Sstevel@tonic-gate 					    sizeof (rib_service_t));
31567c478bd9Sstevel@tonic-gate 					new_service = NULL;
31577c478bd9Sstevel@tonic-gate 					continue;
31587c478bd9Sstevel@tonic-gate 				}
31590a701b1eSRobert Gordon 
31607c478bd9Sstevel@tonic-gate 				/*
31617c478bd9Sstevel@tonic-gate 				 * Add to the service list for this HCA
31627c478bd9Sstevel@tonic-gate 				 */
31637c478bd9Sstevel@tonic-gate 				new_service->srv_next = hca->service_list;
31647c478bd9Sstevel@tonic-gate 				hca->service_list = new_service;
31657c478bd9Sstevel@tonic-gate 				new_service = NULL;
31667c478bd9Sstevel@tonic-gate 				nbinds++;
31677c478bd9Sstevel@tonic-gate 			}
31687c478bd9Sstevel@tonic-gate 		}
31697c478bd9Sstevel@tonic-gate 	}
31707c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
31717c478bd9Sstevel@tonic-gate 
31727c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
31737c478bd9Sstevel@tonic-gate 
31747c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
31757c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
31767c478bd9Sstevel@tonic-gate 	} else {
31777c478bd9Sstevel@tonic-gate 		/*
31787c478bd9Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
31797c478bd9Sstevel@tonic-gate 		 * one registration was successful.
31807c478bd9Sstevel@tonic-gate 		 */
31817c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
31827c478bd9Sstevel@tonic-gate 		plugin_state = ACCEPT;
31837c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
31847c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
31857c478bd9Sstevel@tonic-gate 	}
31867c478bd9Sstevel@tonic-gate }
31877c478bd9Sstevel@tonic-gate 
31887c478bd9Sstevel@tonic-gate void
31897c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
31907c478bd9Sstevel@tonic-gate {
31917c478bd9Sstevel@tonic-gate 	rdma_stat status = RDMA_SUCCESS;
31927c478bd9Sstevel@tonic-gate 
31937c478bd9Sstevel@tonic-gate 	rd->active = 0;
31947c478bd9Sstevel@tonic-gate 	rd->err_code = RDMA_FAILED;
31957c478bd9Sstevel@tonic-gate 
31967c478bd9Sstevel@tonic-gate 	/*
31977c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
31987c478bd9Sstevel@tonic-gate 	 */
31997c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
32007c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
32017c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
32027c478bd9Sstevel@tonic-gate 		return;
32037c478bd9Sstevel@tonic-gate 	}
32047c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
32057c478bd9Sstevel@tonic-gate 
32067c478bd9Sstevel@tonic-gate 	rib_stat->q = &rd->q;
32077c478bd9Sstevel@tonic-gate 	/*
32087c478bd9Sstevel@tonic-gate 	 * Right now the only service type is NFS. Hence force feed this
32097c478bd9Sstevel@tonic-gate 	 * value. Ideally to communicate the service type it should be
32107c478bd9Sstevel@tonic-gate 	 * passed down in rdma_svc_data.
32117c478bd9Sstevel@tonic-gate 	 */
32127c478bd9Sstevel@tonic-gate 	rib_stat->service_type = NFS;
32137c478bd9Sstevel@tonic-gate 	status = rib_register_service(rib_stat->hca, NFS);
32147c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
32157c478bd9Sstevel@tonic-gate 		rd->err_code = status;
32167c478bd9Sstevel@tonic-gate 		return;
32177c478bd9Sstevel@tonic-gate 	}
32187c478bd9Sstevel@tonic-gate 	/*
32197c478bd9Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
32207c478bd9Sstevel@tonic-gate 	 * explainable errors.
32217c478bd9Sstevel@tonic-gate 	 */
32227c478bd9Sstevel@tonic-gate 	rd->active = 1;
32237c478bd9Sstevel@tonic-gate 	rd->err_code = status;
32247c478bd9Sstevel@tonic-gate }
32257c478bd9Sstevel@tonic-gate 
32267c478bd9Sstevel@tonic-gate /* XXXX */
32277c478bd9Sstevel@tonic-gate /* ARGSUSED */
32287c478bd9Sstevel@tonic-gate static void
32297c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
32307c478bd9Sstevel@tonic-gate {
32317c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
32327c478bd9Sstevel@tonic-gate 
32337c478bd9Sstevel@tonic-gate 	/*
32347c478bd9Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
32357c478bd9Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
32367c478bd9Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
32377c478bd9Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
32387c478bd9Sstevel@tonic-gate 	 */
32397c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
32407c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
32417c478bd9Sstevel@tonic-gate 	if (svcdata != NULL)
32427c478bd9Sstevel@tonic-gate 		svcdata->active = 0;
32437c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
32447c478bd9Sstevel@tonic-gate 
32457c478bd9Sstevel@tonic-gate 	/*
32467c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
32477c478bd9Sstevel@tonic-gate 	 */
32487c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
32497c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
32507c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
32517c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32527c478bd9Sstevel@tonic-gate 		return;
32537c478bd9Sstevel@tonic-gate 	}
32540a701b1eSRobert Gordon 	rib_close_channels(&hca->srv_conn_list);
32557c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
32567c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
32577c478bd9Sstevel@tonic-gate }
32587c478bd9Sstevel@tonic-gate 
32597c478bd9Sstevel@tonic-gate /*
32607c478bd9Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
32617c478bd9Sstevel@tonic-gate  * Instead of unbinding the service for a service handle by
32627c478bd9Sstevel@tonic-gate  * calling ibt_unbind_service() for each port/pkey, we unbind
32637c478bd9Sstevel@tonic-gate  * all the services for the service handle by making only one
32647c478bd9Sstevel@tonic-gate  * call to ibt_unbind_all_services().  Then, we deregister the
32657c478bd9Sstevel@tonic-gate  * service for the service handle.
32667c478bd9Sstevel@tonic-gate  *
32677c478bd9Sstevel@tonic-gate  * When traversing the entries in service_list, we compare the
32687c478bd9Sstevel@tonic-gate  * srv_hdl of the current entry with that of the next.  If they
32697c478bd9Sstevel@tonic-gate  * are different or if the next entry is NULL, the current entry
32707c478bd9Sstevel@tonic-gate  * marks the last binding of the service handle.  In this case,
32717c478bd9Sstevel@tonic-gate  * call ibt_unbind_all_services() and deregister the service for
32727c478bd9Sstevel@tonic-gate  * the service handle.  If they are the same, the current and the
32737c478bd9Sstevel@tonic-gate  * next entries are bound to the same service handle.  In this
32747c478bd9Sstevel@tonic-gate  * case, move on to the next entry.
32757c478bd9Sstevel@tonic-gate  */
32767c478bd9Sstevel@tonic-gate static void
32777c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
32787c478bd9Sstevel@tonic-gate {
32797c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
32807c478bd9Sstevel@tonic-gate 
32817c478bd9Sstevel@tonic-gate 	/*
32827c478bd9Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
32837c478bd9Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
32847c478bd9Sstevel@tonic-gate 	 * be passed down to this function.
32857c478bd9Sstevel@tonic-gate 	 */
32867c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
32877c478bd9Sstevel@tonic-gate 	srv_list = hca->service_list;
32887c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
32897c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
32907c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
32917c478bd9Sstevel@tonic-gate 		if (srv_list == NULL || bcmp(to_remove->srv_hdl,
32927c478bd9Sstevel@tonic-gate 		    srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) {
32937c478bd9Sstevel@tonic-gate 
32940a701b1eSRobert Gordon 			(void) ibt_unbind_all_services(to_remove->srv_hdl);
32950a701b1eSRobert Gordon 			(void) ibt_deregister_service(hca->ibt_clnt_hdl,
32967c478bd9Sstevel@tonic-gate 			    to_remove->srv_hdl);
32977c478bd9Sstevel@tonic-gate 		}
32987c478bd9Sstevel@tonic-gate 
32997c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
33007c478bd9Sstevel@tonic-gate 	}
33017c478bd9Sstevel@tonic-gate 	hca->service_list = NULL;
33027c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
33037c478bd9Sstevel@tonic-gate }
33047c478bd9Sstevel@tonic-gate 
33057c478bd9Sstevel@tonic-gate static struct svc_recv *
33067c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
33077c478bd9Sstevel@tonic-gate {
33087c478bd9Sstevel@tonic-gate 	struct svc_recv	*recvp;
33097c478bd9Sstevel@tonic-gate 
33107c478bd9Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
33117c478bd9Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
33127c478bd9Sstevel@tonic-gate 	recvp->qp = qp;
33137c478bd9Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
33147c478bd9Sstevel@tonic-gate 	return (recvp);
33157c478bd9Sstevel@tonic-gate }
33167c478bd9Sstevel@tonic-gate 
33177c478bd9Sstevel@tonic-gate static int
33187c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
33197c478bd9Sstevel@tonic-gate {
33207c478bd9Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
33217c478bd9Sstevel@tonic-gate 
33227c478bd9Sstevel@tonic-gate 	return (0);
33237c478bd9Sstevel@tonic-gate }
33247c478bd9Sstevel@tonic-gate 
33257c478bd9Sstevel@tonic-gate static struct reply *
33267c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
33277c478bd9Sstevel@tonic-gate {
33287c478bd9Sstevel@tonic-gate 	struct reply	*rep;
33297c478bd9Sstevel@tonic-gate 
33307c478bd9Sstevel@tonic-gate 
33317c478bd9Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
33327c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
33330a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__addrreply__nomem);
33347c478bd9Sstevel@tonic-gate 		return (NULL);
33357c478bd9Sstevel@tonic-gate 	}
33367c478bd9Sstevel@tonic-gate 	rep->xid = msgid;
33377c478bd9Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
33387c478bd9Sstevel@tonic-gate 	rep->bytes_xfer = 0;
33397c478bd9Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
33407c478bd9Sstevel@tonic-gate 	rep->prev = NULL;
33417c478bd9Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
33427c478bd9Sstevel@tonic-gate 
33437c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
33447c478bd9Sstevel@tonic-gate 	if (qp->replylist) {
33457c478bd9Sstevel@tonic-gate 		rep->next = qp->replylist;
33467c478bd9Sstevel@tonic-gate 		qp->replylist->prev = rep;
33477c478bd9Sstevel@tonic-gate 	}
33487c478bd9Sstevel@tonic-gate 	qp->rep_list_size++;
33490a701b1eSRobert Gordon 
33500a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__addrreply__listsize,
33510a701b1eSRobert Gordon 	    int, qp->rep_list_size);
33520a701b1eSRobert Gordon 
33537c478bd9Sstevel@tonic-gate 	qp->replylist = rep;
33547c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
33557c478bd9Sstevel@tonic-gate 
33567c478bd9Sstevel@tonic-gate 	return (rep);
33577c478bd9Sstevel@tonic-gate }
33587c478bd9Sstevel@tonic-gate 
33597c478bd9Sstevel@tonic-gate static rdma_stat
33607c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
33617c478bd9Sstevel@tonic-gate {
33627c478bd9Sstevel@tonic-gate 	struct reply	*r, *n;
33637c478bd9Sstevel@tonic-gate 
33647c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
33657c478bd9Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
33667c478bd9Sstevel@tonic-gate 		n = r->next;
33677c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
33687c478bd9Sstevel@tonic-gate 	}
33697c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
33707c478bd9Sstevel@tonic-gate 
33717c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
33727c478bd9Sstevel@tonic-gate }
33737c478bd9Sstevel@tonic-gate 
33747c478bd9Sstevel@tonic-gate static int
33757c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
33767c478bd9Sstevel@tonic-gate {
33777c478bd9Sstevel@tonic-gate 
33787c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
33797c478bd9Sstevel@tonic-gate 	if (rep->prev) {
33807c478bd9Sstevel@tonic-gate 		rep->prev->next = rep->next;
33817c478bd9Sstevel@tonic-gate 	}
33827c478bd9Sstevel@tonic-gate 	if (rep->next) {
33837c478bd9Sstevel@tonic-gate 		rep->next->prev = rep->prev;
33847c478bd9Sstevel@tonic-gate 	}
33857c478bd9Sstevel@tonic-gate 	if (qp->replylist == rep)
33867c478bd9Sstevel@tonic-gate 		qp->replylist = rep->next;
33877c478bd9Sstevel@tonic-gate 
33887c478bd9Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
33897c478bd9Sstevel@tonic-gate 	qp->rep_list_size--;
33900a701b1eSRobert Gordon 
33910a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__remreply__listsize,
33920a701b1eSRobert Gordon 	    int, qp->rep_list_size);
33937c478bd9Sstevel@tonic-gate 
33947c478bd9Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
33957c478bd9Sstevel@tonic-gate 
33967c478bd9Sstevel@tonic-gate 	return (0);
33977c478bd9Sstevel@tonic-gate }
33987c478bd9Sstevel@tonic-gate 
33997c478bd9Sstevel@tonic-gate rdma_stat
34000a701b1eSRobert Gordon rib_registermem(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
34017c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle)
34027c478bd9Sstevel@tonic-gate {
34037c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
34047c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
34057c478bd9Sstevel@tonic-gate 	rdma_stat	status;
34067c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
34077c478bd9Sstevel@tonic-gate 
34087c478bd9Sstevel@tonic-gate 	/*
34097c478bd9Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
34107c478bd9Sstevel@tonic-gate 	 */
34110a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
34127c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
341311606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
34147c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
34157c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
34167c478bd9Sstevel@tonic-gate 	} else {
34177c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
34187c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
34197c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
34207c478bd9Sstevel@tonic-gate 	}
34217c478bd9Sstevel@tonic-gate 	return (status);
34227c478bd9Sstevel@tonic-gate }
34237c478bd9Sstevel@tonic-gate 
34247c478bd9Sstevel@tonic-gate static rdma_stat
34250a701b1eSRobert Gordon rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size,
34260a701b1eSRobert Gordon 	ibt_mr_flags_t spec,
34277c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
34287c478bd9Sstevel@tonic-gate {
34297c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
34307c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
343111606941Sjwahlig 	mem_attr.mr_vaddr = (uintptr_t)buf;
34327c478bd9Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
34330a701b1eSRobert Gordon 	mem_attr.mr_as = (struct as *)(caddr_t)adsp;
34347c478bd9Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
34357c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
34367c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
34377c478bd9Sstevel@tonic-gate 
34387c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
34397c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
34407c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
34417c478bd9Sstevel@tonic-gate 		    &mem_attr, mr_hdlp, mr_descp);
34427c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
34437c478bd9Sstevel@tonic-gate 	} else {
34447c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
34457c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
34467c478bd9Sstevel@tonic-gate 	}
34477c478bd9Sstevel@tonic-gate 
34487c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
34497c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
34507c478bd9Sstevel@tonic-gate 	}
34517c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
34527c478bd9Sstevel@tonic-gate }
34537c478bd9Sstevel@tonic-gate 
34547c478bd9Sstevel@tonic-gate rdma_stat
34550a701b1eSRobert Gordon rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
34560a701b1eSRobert Gordon 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
34577c478bd9Sstevel@tonic-gate {
34587c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
34590a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
34607c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
34617c478bd9Sstevel@tonic-gate 	rdma_stat	status;
34627c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
34637c478bd9Sstevel@tonic-gate 
34647c478bd9Sstevel@tonic-gate 	/*
34657c478bd9Sstevel@tonic-gate 	 * Non-coherent memory registration.
34667c478bd9Sstevel@tonic-gate 	 */
34670a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
34680a701b1eSRobert Gordon 	if (l) {
34690a701b1eSRobert Gordon 		if (l->registered) {
34700a701b1eSRobert Gordon 			buf_handle->mrc_linfo =
34710a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
34720a701b1eSRobert Gordon 			buf_handle->mrc_lmr =
34730a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_lmr;
34740a701b1eSRobert Gordon 			buf_handle->mrc_rmr =
34750a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_rmr;
34760a701b1eSRobert Gordon 			*sync_handle = (RIB_SYNCMEM_HANDLE)
34770a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
34780a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
34790a701b1eSRobert Gordon 		} else {
34800a701b1eSRobert Gordon 			/* Always register the whole buffer */
34810a701b1eSRobert Gordon 			buf = (caddr_t)l->lrc_buf;
34820a701b1eSRobert Gordon 			buflen = l->lrc_len;
34830a701b1eSRobert Gordon 		}
34840a701b1eSRobert Gordon 	}
34850a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
34860a701b1eSRobert Gordon 
34877c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
34880a701b1eSRobert Gordon 		if (l) {
34890a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
34900a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
34910a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
34920a701b1eSRobert Gordon 			l->registered		 = TRUE;
34930a701b1eSRobert Gordon 		}
349411606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
34957c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
34967c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
34977c478bd9Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
34987c478bd9Sstevel@tonic-gate 	} else {
34997c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
35007c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
35017c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
35027c478bd9Sstevel@tonic-gate 	}
35037c478bd9Sstevel@tonic-gate 	return (status);
35047c478bd9Sstevel@tonic-gate }
35057c478bd9Sstevel@tonic-gate 
35067c478bd9Sstevel@tonic-gate /* ARGSUSED */
35077c478bd9Sstevel@tonic-gate rdma_stat
35087c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
35097c478bd9Sstevel@tonic-gate {
35107c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
35117c478bd9Sstevel@tonic-gate 	/*
35127c478bd9Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
35137c478bd9Sstevel@tonic-gate 	 * getting detached. Need all outstanding
35147c478bd9Sstevel@tonic-gate 	 * memory registrations to be deregistered
35157c478bd9Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
35167c478bd9Sstevel@tonic-gate 	 */
35177c478bd9Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
351811606941Sjwahlig 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
35197c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
35207c478bd9Sstevel@tonic-gate }
35217c478bd9Sstevel@tonic-gate 
35227c478bd9Sstevel@tonic-gate /* ARGSUSED */
35237c478bd9Sstevel@tonic-gate rdma_stat
35247c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
35250a701b1eSRobert Gordon 		RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
35267c478bd9Sstevel@tonic-gate {
35270a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
35280a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
35290a701b1eSRobert Gordon 	if (l)
35300a701b1eSRobert Gordon 		if (l->registered)
35310a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
35320a701b1eSRobert Gordon 
35337c478bd9Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
35347c478bd9Sstevel@tonic-gate 
35357c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
35367c478bd9Sstevel@tonic-gate }
35377c478bd9Sstevel@tonic-gate 
35387c478bd9Sstevel@tonic-gate /* ARGSUSED */
35397c478bd9Sstevel@tonic-gate rdma_stat
35407c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
35417c478bd9Sstevel@tonic-gate 		int len, int cpu)
35427c478bd9Sstevel@tonic-gate {
35437c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
35447c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
35457c478bd9Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
35467c478bd9Sstevel@tonic-gate 
35477c478bd9Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
354811606941Sjwahlig 	mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf;
35497c478bd9Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
35507c478bd9Sstevel@tonic-gate 	if (cpu) {
35517c478bd9Sstevel@tonic-gate 		/* make incoming data visible to memory */
35527c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
35537c478bd9Sstevel@tonic-gate 	} else {
35547c478bd9Sstevel@tonic-gate 		/* make memory changes visible to IO */
35557c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
35567c478bd9Sstevel@tonic-gate 	}
35577c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
35587c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
35597c478bd9Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
35607c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35617c478bd9Sstevel@tonic-gate 	} else {
35627c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35637c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
35647c478bd9Sstevel@tonic-gate 	}
35657c478bd9Sstevel@tonic-gate 
35667c478bd9Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
35677c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
35687c478bd9Sstevel@tonic-gate 	else {
35697c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
35707c478bd9Sstevel@tonic-gate 	}
35717c478bd9Sstevel@tonic-gate }
35727c478bd9Sstevel@tonic-gate 
35737c478bd9Sstevel@tonic-gate /*
35747c478bd9Sstevel@tonic-gate  * XXXX	????
35757c478bd9Sstevel@tonic-gate  */
35767c478bd9Sstevel@tonic-gate static rdma_stat
35777c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
35787c478bd9Sstevel@tonic-gate {
35797c478bd9Sstevel@tonic-gate 	/*
35807c478bd9Sstevel@tonic-gate 	 * XXXX	Hack!
35817c478bd9Sstevel@tonic-gate 	 */
35827c478bd9Sstevel@tonic-gate 	info->addrlen = 16;
35837c478bd9Sstevel@tonic-gate 	info->mts = 1000000;
35847c478bd9Sstevel@tonic-gate 	info->mtu = 1000000;
35857c478bd9Sstevel@tonic-gate 
35867c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
35877c478bd9Sstevel@tonic-gate }
35887c478bd9Sstevel@tonic-gate 
35897c478bd9Sstevel@tonic-gate rib_bufpool_t *
35907c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
35917c478bd9Sstevel@tonic-gate {
35927c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
35937c478bd9Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
35947c478bd9Sstevel@tonic-gate 	caddr_t		buf;
35957c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
35967c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
35977c478bd9Sstevel@tonic-gate 	int		i, j;
35987c478bd9Sstevel@tonic-gate 
35997c478bd9Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
36007c478bd9Sstevel@tonic-gate 
36017c478bd9Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
36027c478bd9Sstevel@tonic-gate 	    num * sizeof (void *), KM_SLEEP);
36037c478bd9Sstevel@tonic-gate 
36047c478bd9Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
36057c478bd9Sstevel@tonic-gate 	bp->numelems = num;
36067c478bd9Sstevel@tonic-gate 
36070a701b1eSRobert Gordon 
36087c478bd9Sstevel@tonic-gate 	switch (ptype) {
36097c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
36107c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
36117c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
36127c478bd9Sstevel@tonic-gate 		break;
36137c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
36147c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
36157c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
36167c478bd9Sstevel@tonic-gate 		break;
36177c478bd9Sstevel@tonic-gate 	default:
36187c478bd9Sstevel@tonic-gate 		goto fail;
36197c478bd9Sstevel@tonic-gate 	}
36207c478bd9Sstevel@tonic-gate 
36217c478bd9Sstevel@tonic-gate 	/*
36227c478bd9Sstevel@tonic-gate 	 * Register the pool.
36237c478bd9Sstevel@tonic-gate 	 */
36247c478bd9Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
36257c478bd9Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
36267c478bd9Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
36277c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_hdl_t), KM_SLEEP);
36287c478bd9Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
36297c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_desc_t), KM_SLEEP);
36307c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
36310a701b1eSRobert Gordon 
36327c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
36337c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
36347c478bd9Sstevel@tonic-gate 		goto fail;
36357c478bd9Sstevel@tonic-gate 	}
36360a701b1eSRobert Gordon 
36377c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
36387c478bd9Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
363911606941Sjwahlig 		mem_attr.mr_vaddr = (uintptr_t)buf;
36407c478bd9Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
36417c478bd9Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
36427c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
36430a701b1eSRobert Gordon 		    hca->pd_hdl, &mem_attr,
36440a701b1eSRobert Gordon 		    &rbp->mr_hdl[i],
36457c478bd9Sstevel@tonic-gate 		    &rbp->mr_desc[i]);
36467c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
36477c478bd9Sstevel@tonic-gate 			for (j = 0; j < i; j++) {
36480a701b1eSRobert Gordon 				(void) ibt_deregister_mr(hca->hca_hdl,
36490a701b1eSRobert Gordon 				    rbp->mr_hdl[j]);
36507c478bd9Sstevel@tonic-gate 			}
36517c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
36527c478bd9Sstevel@tonic-gate 			goto fail;
36537c478bd9Sstevel@tonic-gate 		}
36547c478bd9Sstevel@tonic-gate 	}
36557c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
36567c478bd9Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
36577c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
36587c478bd9Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
36597c478bd9Sstevel@tonic-gate 	}
36607c478bd9Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
36617c478bd9Sstevel@tonic-gate 	rbp->bpool = bp;
36627c478bd9Sstevel@tonic-gate 
36637c478bd9Sstevel@tonic-gate 	return (rbp);
36647c478bd9Sstevel@tonic-gate fail:
36657c478bd9Sstevel@tonic-gate 	if (bp) {
36667c478bd9Sstevel@tonic-gate 		if (bp->buf)
36677c478bd9Sstevel@tonic-gate 			kmem_free(bp->buf, bp->bufsize);
36687c478bd9Sstevel@tonic-gate 		kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
36697c478bd9Sstevel@tonic-gate 	}
36707c478bd9Sstevel@tonic-gate 	if (rbp) {
36717c478bd9Sstevel@tonic-gate 		if (rbp->mr_hdl)
36727c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
36737c478bd9Sstevel@tonic-gate 		if (rbp->mr_desc)
36747c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
36757c478bd9Sstevel@tonic-gate 		kmem_free(rbp, sizeof (rib_bufpool_t));
36767c478bd9Sstevel@tonic-gate 	}
36777c478bd9Sstevel@tonic-gate 	return (NULL);
36787c478bd9Sstevel@tonic-gate }
36797c478bd9Sstevel@tonic-gate 
36807c478bd9Sstevel@tonic-gate static void
36817c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
36827c478bd9Sstevel@tonic-gate {
36837c478bd9Sstevel@tonic-gate 	int i;
36847c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
36857c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
36867c478bd9Sstevel@tonic-gate 
36877c478bd9Sstevel@tonic-gate 	/*
36887c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
36897c478bd9Sstevel@tonic-gate 	 */
36907c478bd9Sstevel@tonic-gate 	switch (ptype) {
36917c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
36927c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
36937c478bd9Sstevel@tonic-gate 			break;
36947c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
36957c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
36967c478bd9Sstevel@tonic-gate 			break;
36977c478bd9Sstevel@tonic-gate 		default:
36987c478bd9Sstevel@tonic-gate 			return;
36997c478bd9Sstevel@tonic-gate 	}
37007c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
37017c478bd9Sstevel@tonic-gate 		return;
37027c478bd9Sstevel@tonic-gate 
37037c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
37047c478bd9Sstevel@tonic-gate 
37057c478bd9Sstevel@tonic-gate 	/*
37067c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
37077c478bd9Sstevel@tonic-gate 	 */
37087c478bd9Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
37097c478bd9Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
37107c478bd9Sstevel@tonic-gate 	}
37117c478bd9Sstevel@tonic-gate }
37127c478bd9Sstevel@tonic-gate 
37137c478bd9Sstevel@tonic-gate static void
37147c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
37157c478bd9Sstevel@tonic-gate {
37167c478bd9Sstevel@tonic-gate 
37177c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
37187c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
37197c478bd9Sstevel@tonic-gate 
37207c478bd9Sstevel@tonic-gate 	/*
37217c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
37227c478bd9Sstevel@tonic-gate 	 */
37237c478bd9Sstevel@tonic-gate 	switch (ptype) {
37247c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
37257c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
37267c478bd9Sstevel@tonic-gate 			break;
37277c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
37287c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
37297c478bd9Sstevel@tonic-gate 			break;
37307c478bd9Sstevel@tonic-gate 		default:
37317c478bd9Sstevel@tonic-gate 			return;
37327c478bd9Sstevel@tonic-gate 	}
37337c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
37347c478bd9Sstevel@tonic-gate 		return;
37357c478bd9Sstevel@tonic-gate 
37367c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
37377c478bd9Sstevel@tonic-gate 
37387c478bd9Sstevel@tonic-gate 	/*
37397c478bd9Sstevel@tonic-gate 	 * Free the pool memory.
37407c478bd9Sstevel@tonic-gate 	 */
37417c478bd9Sstevel@tonic-gate 	if (rbp->mr_hdl)
37427c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
37437c478bd9Sstevel@tonic-gate 
37447c478bd9Sstevel@tonic-gate 	if (rbp->mr_desc)
37457c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
37467c478bd9Sstevel@tonic-gate 	if (bp->buf)
37477c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
37487c478bd9Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
37497c478bd9Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
37507c478bd9Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
37517c478bd9Sstevel@tonic-gate }
37527c478bd9Sstevel@tonic-gate 
37537c478bd9Sstevel@tonic-gate void
37547c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
37557c478bd9Sstevel@tonic-gate {
37567c478bd9Sstevel@tonic-gate 	/*
37577c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
37587c478bd9Sstevel@tonic-gate 	 */
37597c478bd9Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
37607c478bd9Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
37617c478bd9Sstevel@tonic-gate }
37627c478bd9Sstevel@tonic-gate 
37637c478bd9Sstevel@tonic-gate /*
37647c478bd9Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
37657c478bd9Sstevel@tonic-gate  */
37667c478bd9Sstevel@tonic-gate static rdma_stat
37677c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
37687c478bd9Sstevel@tonic-gate {
37690a701b1eSRobert Gordon 	rib_lrc_entry_t *rlep;
37700a701b1eSRobert Gordon 
37710a701b1eSRobert Gordon 	if (rdbuf->type ==  RDMA_LONG_BUFFER) {
37720a701b1eSRobert Gordon 		rlep = rib_get_cache_buf(conn, rdbuf->len);
37730a701b1eSRobert Gordon 		rdbuf->rb_private =  (caddr_t)rlep;
37740a701b1eSRobert Gordon 		rdbuf->addr = rlep->lrc_buf;
37750a701b1eSRobert Gordon 		rdbuf->handle = rlep->lrc_mhandle;
37760a701b1eSRobert Gordon 		return (RDMA_SUCCESS);
37770a701b1eSRobert Gordon 	}
37787c478bd9Sstevel@tonic-gate 
37797c478bd9Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
37807c478bd9Sstevel@tonic-gate 	if (rdbuf->addr) {
37817c478bd9Sstevel@tonic-gate 		switch (rdbuf->type) {
37827c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
37837c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
37847c478bd9Sstevel@tonic-gate 			break;
37857c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
37867c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
37877c478bd9Sstevel@tonic-gate 			break;
37887c478bd9Sstevel@tonic-gate 		default:
37897c478bd9Sstevel@tonic-gate 			rdbuf->len = 0;
37907c478bd9Sstevel@tonic-gate 		}
37917c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
37927c478bd9Sstevel@tonic-gate 	} else
37937c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37947c478bd9Sstevel@tonic-gate }
37957c478bd9Sstevel@tonic-gate 
37967c478bd9Sstevel@tonic-gate /*
37977c478bd9Sstevel@tonic-gate  * Fetch a buffer of specified type.
37987c478bd9Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
37997c478bd9Sstevel@tonic-gate  */
38007c478bd9Sstevel@tonic-gate static void *
38017c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
38027c478bd9Sstevel@tonic-gate {
38037c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
38047c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
38057c478bd9Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
38067c478bd9Sstevel@tonic-gate 	void		*buf;
38077c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
38087c478bd9Sstevel@tonic-gate 	bufpool_t	*bp;
38097c478bd9Sstevel@tonic-gate 	int		i;
38107c478bd9Sstevel@tonic-gate 
38117c478bd9Sstevel@tonic-gate 	/*
38127c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
38137c478bd9Sstevel@tonic-gate 	 */
38147c478bd9Sstevel@tonic-gate 	switch (ptype) {
38157c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
38167c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
38177c478bd9Sstevel@tonic-gate 		break;
38187c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
38197c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
38207c478bd9Sstevel@tonic-gate 		break;
38217c478bd9Sstevel@tonic-gate 	default:
38227c478bd9Sstevel@tonic-gate 		return (NULL);
38237c478bd9Sstevel@tonic-gate 	}
38247c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
38257c478bd9Sstevel@tonic-gate 		return (NULL);
38267c478bd9Sstevel@tonic-gate 
38277c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
38287c478bd9Sstevel@tonic-gate 
38297c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
38307c478bd9Sstevel@tonic-gate 	if (bp->buffree < 0) {
38317c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
38327c478bd9Sstevel@tonic-gate 		return (NULL);
38337c478bd9Sstevel@tonic-gate 	}
38347c478bd9Sstevel@tonic-gate 
38357c478bd9Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
38367c478bd9Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
38377c478bd9Sstevel@tonic-gate 	rdbuf->addr = buf;
38387c478bd9Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
38397c478bd9Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
384011606941Sjwahlig 		if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
38410a701b1eSRobert Gordon 			rdbuf->handle.mrc_rmr =
38420a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_rkey;
38430a701b1eSRobert Gordon 			rdbuf->handle.mrc_linfo =
38440a701b1eSRobert Gordon 			    (uintptr_t)rbp->mr_hdl[i];
38450a701b1eSRobert Gordon 			rdbuf->handle.mrc_lmr =
38460a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_lkey;
38477c478bd9Sstevel@tonic-gate 			bp->buffree--;
38487c478bd9Sstevel@tonic-gate 
38497c478bd9Sstevel@tonic-gate 			mutex_exit(&bp->buflock);
38507c478bd9Sstevel@tonic-gate 
38517c478bd9Sstevel@tonic-gate 			return (buf);
38527c478bd9Sstevel@tonic-gate 		}
38537c478bd9Sstevel@tonic-gate 	}
38540a701b1eSRobert Gordon 
38557c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
38567c478bd9Sstevel@tonic-gate 
38577c478bd9Sstevel@tonic-gate 	return (NULL);
38587c478bd9Sstevel@tonic-gate }
38597c478bd9Sstevel@tonic-gate 
38607c478bd9Sstevel@tonic-gate static void
38617c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
38627c478bd9Sstevel@tonic-gate {
38637c478bd9Sstevel@tonic-gate 
38640a701b1eSRobert Gordon 	if (rdbuf->type == RDMA_LONG_BUFFER) {
38650a701b1eSRobert Gordon 		rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private);
38660a701b1eSRobert Gordon 		rdbuf->rb_private = NULL;
38670a701b1eSRobert Gordon 		return;
38680a701b1eSRobert Gordon 	}
38697c478bd9Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
38707c478bd9Sstevel@tonic-gate }
38717c478bd9Sstevel@tonic-gate 
38727c478bd9Sstevel@tonic-gate static void
38737c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
38747c478bd9Sstevel@tonic-gate {
38757c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
38767c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
38777c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
38787c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
38797c478bd9Sstevel@tonic-gate 
38807c478bd9Sstevel@tonic-gate 	/*
38817c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
38827c478bd9Sstevel@tonic-gate 	 */
38837c478bd9Sstevel@tonic-gate 	switch (ptype) {
38847c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
38857c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
38867c478bd9Sstevel@tonic-gate 		break;
38877c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
38887c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
38897c478bd9Sstevel@tonic-gate 		break;
38907c478bd9Sstevel@tonic-gate 	default:
38917c478bd9Sstevel@tonic-gate 		return;
38927c478bd9Sstevel@tonic-gate 	}
38937c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
38947c478bd9Sstevel@tonic-gate 		return;
38957c478bd9Sstevel@tonic-gate 
38967c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
38977c478bd9Sstevel@tonic-gate 
38987c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
38997c478bd9Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
39007c478bd9Sstevel@tonic-gate 		/*
39017c478bd9Sstevel@tonic-gate 		 * Should never happen
39027c478bd9Sstevel@tonic-gate 		 */
39037c478bd9Sstevel@tonic-gate 		bp->buffree--;
39047c478bd9Sstevel@tonic-gate 	} else {
39057c478bd9Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
39067c478bd9Sstevel@tonic-gate 	}
39077c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
39087c478bd9Sstevel@tonic-gate }
39097c478bd9Sstevel@tonic-gate 
39107c478bd9Sstevel@tonic-gate static rdma_stat
39117c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
39127c478bd9Sstevel@tonic-gate {
39137c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
39147c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd) {
39157c478bd9Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
39167c478bd9Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
39177c478bd9Sstevel@tonic-gate 	}
39187c478bd9Sstevel@tonic-gate 	connlist->conn_hd = cn;
39197c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
39207c478bd9Sstevel@tonic-gate 
39217c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
39227c478bd9Sstevel@tonic-gate }
39237c478bd9Sstevel@tonic-gate 
39247c478bd9Sstevel@tonic-gate static rdma_stat
39257c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
39267c478bd9Sstevel@tonic-gate {
39277c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
39287c478bd9Sstevel@tonic-gate 	if (cn->c_prev) {
39297c478bd9Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
39307c478bd9Sstevel@tonic-gate 	}
39317c478bd9Sstevel@tonic-gate 	if (cn->c_next) {
39327c478bd9Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
39337c478bd9Sstevel@tonic-gate 	}
39347c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
39357c478bd9Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
39367c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
39377c478bd9Sstevel@tonic-gate 
39387c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
39397c478bd9Sstevel@tonic-gate }
39407c478bd9Sstevel@tonic-gate 
39417c478bd9Sstevel@tonic-gate /*
39427c478bd9Sstevel@tonic-gate  * Connection management.
39437c478bd9Sstevel@tonic-gate  * IBTF does not support recycling of channels. So connections are only
39440a701b1eSRobert Gordon  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or
39457c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state. No C_IDLE state.
39467c478bd9Sstevel@tonic-gate  * C_CONN_PEND state: Connection establishment in progress to the server.
39477c478bd9Sstevel@tonic-gate  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
39487c478bd9Sstevel@tonic-gate  * It has an RC channel associated with it. ibt_post_send/recv are allowed
39497c478bd9Sstevel@tonic-gate  * only in this state.
39500a701b1eSRobert Gordon  * C_ERROR_CONN state: A connection transitions to this state when WRs on the
39517c478bd9Sstevel@tonic-gate  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
39527c478bd9Sstevel@tonic-gate  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
39530a701b1eSRobert Gordon  * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when
39547c478bd9Sstevel@tonic-gate  * c_ref drops to 0 (this indicates that RPC has no more references to this
39557c478bd9Sstevel@tonic-gate  * connection), the connection should be destroyed. A connection transitions
39567c478bd9Sstevel@tonic-gate  * into this state when it is being destroyed.
39577c478bd9Sstevel@tonic-gate  */
395851f34d4bSRajkumar Sivaprakasam /* ARGSUSED */
39597c478bd9Sstevel@tonic-gate static rdma_stat
39607c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn)
39617c478bd9Sstevel@tonic-gate {
39627c478bd9Sstevel@tonic-gate 	CONN *cn;
39637c478bd9Sstevel@tonic-gate 	int status = RDMA_SUCCESS;
396451f34d4bSRajkumar Sivaprakasam 	rib_hca_t *hca = rib_stat->hca;
39657c478bd9Sstevel@tonic-gate 	rib_qp_t *qp;
39667c478bd9Sstevel@tonic-gate 	clock_t cv_stat, timout;
3967214ae7d0SSiddheshwar Mahesh 	rpcib_ping_t rpt;
39687c478bd9Sstevel@tonic-gate 
396951f34d4bSRajkumar Sivaprakasam 	if (hca == NULL)
397051f34d4bSRajkumar Sivaprakasam 		return (RDMA_FAILED);
397151f34d4bSRajkumar Sivaprakasam 
397251f34d4bSRajkumar Sivaprakasam 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
397351f34d4bSRajkumar Sivaprakasam 	if (hca->state == HCA_DETACHED) {
397451f34d4bSRajkumar Sivaprakasam 		rw_exit(&rib_stat->hca->state_lock);
397551f34d4bSRajkumar Sivaprakasam 		return (RDMA_FAILED);
397651f34d4bSRajkumar Sivaprakasam 	}
397751f34d4bSRajkumar Sivaprakasam 	rw_exit(&rib_stat->hca->state_lock);
397851f34d4bSRajkumar Sivaprakasam 
39797c478bd9Sstevel@tonic-gate again:
39807c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
39817c478bd9Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
39827c478bd9Sstevel@tonic-gate 	while (cn != NULL) {
39837c478bd9Sstevel@tonic-gate 		/*
39847c478bd9Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
39857c478bd9Sstevel@tonic-gate 		 */
39867c478bd9Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
39870a701b1eSRobert Gordon 		if (cn->c_state == C_ERROR_CONN) {
39887c478bd9Sstevel@tonic-gate 			if (cn->c_ref == 0) {
39897c478bd9Sstevel@tonic-gate 				/*
39907c478bd9Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
39917c478bd9Sstevel@tonic-gate 				 */
39927c478bd9Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
39937c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
39947c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
3995*065714dcSSiddheshwar Mahesh 				rib_conn_close((void *)cn);
39967c478bd9Sstevel@tonic-gate 				goto again;
39977c478bd9Sstevel@tonic-gate 			}
39987c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
39997c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
40007c478bd9Sstevel@tonic-gate 			continue;
40010a701b1eSRobert Gordon 		}
40020a701b1eSRobert Gordon 		if (cn->c_state == C_DISCONN_PEND) {
40037c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
40047c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
40057c478bd9Sstevel@tonic-gate 			continue;
40067c478bd9Sstevel@tonic-gate 		}
40077c478bd9Sstevel@tonic-gate 		if ((cn->c_raddr.len == svcaddr->len) &&
40087c478bd9Sstevel@tonic-gate 		    bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) {
40097c478bd9Sstevel@tonic-gate 			/*
40107c478bd9Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
40117c478bd9Sstevel@tonic-gate 			 * as we are done traversing the list.
40127c478bd9Sstevel@tonic-gate 			 */
40137c478bd9Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
40147c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
40157c478bd9Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
40167c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
40177c478bd9Sstevel@tonic-gate 				*conn = cn;
40187c478bd9Sstevel@tonic-gate 				return (status);
40197c478bd9Sstevel@tonic-gate 			}
40207c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
40217c478bd9Sstevel@tonic-gate 				/*
40227c478bd9Sstevel@tonic-gate 				 * Hold a reference to this conn before
40237c478bd9Sstevel@tonic-gate 				 * we give up the lock.
40247c478bd9Sstevel@tonic-gate 				 */
40257c478bd9Sstevel@tonic-gate 				cn->c_ref++;
40267c478bd9Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
40277c478bd9Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
40287c478bd9Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
40297c478bd9Sstevel@tonic-gate 				    &cn->c_lock, timout)) > 0 &&
40307c478bd9Sstevel@tonic-gate 				    cn->c_state == C_CONN_PEND)
40317c478bd9Sstevel@tonic-gate 					;
40327c478bd9Sstevel@tonic-gate 				if (cv_stat == 0) {
40337c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40347c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40357c478bd9Sstevel@tonic-gate 					return (RDMA_INTR);
40367c478bd9Sstevel@tonic-gate 				}
40377c478bd9Sstevel@tonic-gate 				if (cv_stat < 0) {
40387c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40397c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40407c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
40417c478bd9Sstevel@tonic-gate 				}
40427c478bd9Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
40437c478bd9Sstevel@tonic-gate 					*conn = cn;
40447c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40457c478bd9Sstevel@tonic-gate 					return (status);
40467c478bd9Sstevel@tonic-gate 				} else {
40477c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40487c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40497c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
40507c478bd9Sstevel@tonic-gate 				}
40517c478bd9Sstevel@tonic-gate 			}
40527c478bd9Sstevel@tonic-gate 		}
40537c478bd9Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
40547c478bd9Sstevel@tonic-gate 		cn = cn->c_next;
40557c478bd9Sstevel@tonic-gate 	}
40567c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
40577c478bd9Sstevel@tonic-gate 
4058214ae7d0SSiddheshwar Mahesh 	bzero(&rpt, sizeof (rpcib_ping_t));
40590a701b1eSRobert Gordon 
4060214ae7d0SSiddheshwar Mahesh 	status = rib_ping_srv(addr_type, svcaddr, &rpt);
40617c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
40627c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40637c478bd9Sstevel@tonic-gate 	}
40647c478bd9Sstevel@tonic-gate 
40657c478bd9Sstevel@tonic-gate 	/*
40667c478bd9Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
40677c478bd9Sstevel@tonic-gate 	 */
40687c478bd9Sstevel@tonic-gate 	if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) {
40697c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40707c478bd9Sstevel@tonic-gate 	}
40717c478bd9Sstevel@tonic-gate 	cn = qptoc(qp);
40727c478bd9Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
40737c478bd9Sstevel@tonic-gate 	cn->c_ref = 1;
40747c478bd9Sstevel@tonic-gate 
40757c478bd9Sstevel@tonic-gate 	/*
40767c478bd9Sstevel@tonic-gate 	 * Add to conn list.
40777c478bd9Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
40787c478bd9Sstevel@tonic-gate 	 * another thread might have created the connection we are
40797c478bd9Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
40807c478bd9Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
40817c478bd9Sstevel@tonic-gate 	 * of one. If we really want to close that window,
40827c478bd9Sstevel@tonic-gate 	 * then need to check the list after acquiring the
40837c478bd9Sstevel@tonic-gate 	 * WRITER lock.
40847c478bd9Sstevel@tonic-gate 	 */
40857c478bd9Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
4086214ae7d0SSiddheshwar Mahesh 	status = rib_conn_to_srv(hca, qp, &rpt);
40877c478bd9Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
40887c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
40897c478bd9Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
40907c478bd9Sstevel@tonic-gate 		*conn = cn;
40917c478bd9Sstevel@tonic-gate 	} else {
40920a701b1eSRobert Gordon 		cn->c_state = C_ERROR_CONN;
40937c478bd9Sstevel@tonic-gate 		cn->c_ref--;
40947c478bd9Sstevel@tonic-gate 	}
40957c478bd9Sstevel@tonic-gate 	cv_broadcast(&cn->c_cv);
40967c478bd9Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
40977c478bd9Sstevel@tonic-gate 	return (status);
40987c478bd9Sstevel@tonic-gate }
40997c478bd9Sstevel@tonic-gate 
4100*065714dcSSiddheshwar Mahesh static void
4101*065714dcSSiddheshwar Mahesh rib_conn_close(void *rarg)
41027c478bd9Sstevel@tonic-gate {
4103*065714dcSSiddheshwar Mahesh 	CONN *conn = (CONN *)rarg;
41047c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
41057c478bd9Sstevel@tonic-gate 
41067c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
4107*065714dcSSiddheshwar Mahesh 	if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
41087c478bd9Sstevel@tonic-gate 
4109*065714dcSSiddheshwar Mahesh 		conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
41107c478bd9Sstevel@tonic-gate 		/*
4111*065714dcSSiddheshwar Mahesh 		 * Live connection in CONNECTED state.
41127c478bd9Sstevel@tonic-gate 		 */
4113*065714dcSSiddheshwar Mahesh 		if (conn->c_state == C_CONNECTED) {
4114*065714dcSSiddheshwar Mahesh 			conn->c_state = C_ERROR_CONN;
4115*065714dcSSiddheshwar Mahesh 		}
41167c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4117*065714dcSSiddheshwar Mahesh 
4118*065714dcSSiddheshwar Mahesh 		rib_close_a_channel(conn);
4119*065714dcSSiddheshwar Mahesh 
4120*065714dcSSiddheshwar Mahesh 		mutex_enter(&conn->c_lock);
4121*065714dcSSiddheshwar Mahesh 		conn->c_flags &= ~C_CLOSE_PENDING;
4122*065714dcSSiddheshwar Mahesh 		cv_signal(&conn->c_cv);
4123*065714dcSSiddheshwar Mahesh 	}
4124*065714dcSSiddheshwar Mahesh 
4125*065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
4126*065714dcSSiddheshwar Mahesh 
41277c478bd9Sstevel@tonic-gate 	if (qp->mode == RIB_SERVER)
41287c478bd9Sstevel@tonic-gate 		(void) rib_disconnect_channel(conn,
41297c478bd9Sstevel@tonic-gate 		    &qp->hca->srv_conn_list);
41307c478bd9Sstevel@tonic-gate 	else
41317c478bd9Sstevel@tonic-gate 		(void) rib_disconnect_channel(conn,
41327c478bd9Sstevel@tonic-gate 		    &qp->hca->cl_conn_list);
4133*065714dcSSiddheshwar Mahesh }
4134*065714dcSSiddheshwar Mahesh 
4135*065714dcSSiddheshwar Mahesh static void
4136*065714dcSSiddheshwar Mahesh rib_conn_timeout_call(void *carg)
4137*065714dcSSiddheshwar Mahesh {
4138*065714dcSSiddheshwar Mahesh 	time_t idle_time;
4139*065714dcSSiddheshwar Mahesh 	CONN *conn = (CONN *)carg;
4140*065714dcSSiddheshwar Mahesh 	rib_hca_t *hca = ctoqp(conn)->hca;
4141*065714dcSSiddheshwar Mahesh 	int error;
4142*065714dcSSiddheshwar Mahesh 
4143*065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
4144*065714dcSSiddheshwar Mahesh 	if ((conn->c_ref > 0) ||
4145*065714dcSSiddheshwar Mahesh 	    (conn->c_state == C_DISCONN_PEND)) {
4146*065714dcSSiddheshwar Mahesh 		conn->c_timeout = NULL;
4147*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4148*065714dcSSiddheshwar Mahesh 		return;
4149*065714dcSSiddheshwar Mahesh 	}
4150*065714dcSSiddheshwar Mahesh 
4151*065714dcSSiddheshwar Mahesh 	idle_time = (gethrestime_sec() - conn->c_last_used);
4152*065714dcSSiddheshwar Mahesh 
4153*065714dcSSiddheshwar Mahesh 	if ((idle_time <= rib_conn_timeout) &&
4154*065714dcSSiddheshwar Mahesh 	    (conn->c_state != C_ERROR_CONN)) {
4155*065714dcSSiddheshwar Mahesh 		/*
4156*065714dcSSiddheshwar Mahesh 		 * There was activity after the last timeout.
4157*065714dcSSiddheshwar Mahesh 		 * Extend the conn life. Unless the conn is
4158*065714dcSSiddheshwar Mahesh 		 * already in error state.
4159*065714dcSSiddheshwar Mahesh 		 */
4160*065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4161*065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(rib_conn_timeout - idle_time));
4162*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4163*065714dcSSiddheshwar Mahesh 		return;
4164*065714dcSSiddheshwar Mahesh 	}
4165*065714dcSSiddheshwar Mahesh 
4166*065714dcSSiddheshwar Mahesh 	error = ddi_taskq_dispatch(hca->cleanup_helper, rib_conn_close,
4167*065714dcSSiddheshwar Mahesh 	    (void *)conn, DDI_NOSLEEP);
4168*065714dcSSiddheshwar Mahesh 
4169*065714dcSSiddheshwar Mahesh 	/*
4170*065714dcSSiddheshwar Mahesh 	 * If taskq dispatch fails above, then reset the timeout
4171*065714dcSSiddheshwar Mahesh 	 * to try again after 10 secs.
4172*065714dcSSiddheshwar Mahesh 	 */
4173*065714dcSSiddheshwar Mahesh 
4174*065714dcSSiddheshwar Mahesh 	if (error != DDI_SUCCESS) {
4175*065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4176*065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(RDMA_CONN_REAP_RETRY));
4177*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4178*065714dcSSiddheshwar Mahesh 		return;
4179*065714dcSSiddheshwar Mahesh 	}
4180*065714dcSSiddheshwar Mahesh 
4181*065714dcSSiddheshwar Mahesh 	conn->c_state = C_DISCONN_PEND;
4182*065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
4183*065714dcSSiddheshwar Mahesh }
4184*065714dcSSiddheshwar Mahesh 
4185*065714dcSSiddheshwar Mahesh static rdma_stat
4186*065714dcSSiddheshwar Mahesh rib_conn_release(CONN *conn)
4187*065714dcSSiddheshwar Mahesh {
4188*065714dcSSiddheshwar Mahesh 
4189*065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
4190*065714dcSSiddheshwar Mahesh 	conn->c_ref--;
4191*065714dcSSiddheshwar Mahesh 
4192*065714dcSSiddheshwar Mahesh 	conn->c_last_used = gethrestime_sec();
4193*065714dcSSiddheshwar Mahesh 	if (conn->c_ref > 0) {
4194*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
41957c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
41967c478bd9Sstevel@tonic-gate 	}
4197*065714dcSSiddheshwar Mahesh 
4198*065714dcSSiddheshwar Mahesh 	/*
4199*065714dcSSiddheshwar Mahesh 	 * If a conn is C_ERROR_CONN, close the channel.
4200*065714dcSSiddheshwar Mahesh 	 */
4201*065714dcSSiddheshwar Mahesh 	if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) {
4202*065714dcSSiddheshwar Mahesh 		conn->c_state = C_DISCONN_PEND;
4203*065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4204*065714dcSSiddheshwar Mahesh 		rib_conn_close((void *)conn);
4205*065714dcSSiddheshwar Mahesh 		return (RDMA_SUCCESS);
4206*065714dcSSiddheshwar Mahesh 	}
4207*065714dcSSiddheshwar Mahesh 
4208*065714dcSSiddheshwar Mahesh 	/*
4209*065714dcSSiddheshwar Mahesh 	 * c_ref == 0, set a timeout for conn release
4210*065714dcSSiddheshwar Mahesh 	 */
4211*065714dcSSiddheshwar Mahesh 
4212*065714dcSSiddheshwar Mahesh 	if (conn->c_timeout == NULL) {
4213*065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4214*065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(rib_conn_timeout));
4215*065714dcSSiddheshwar Mahesh 	}
4216*065714dcSSiddheshwar Mahesh 
42177c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
42187c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
42197c478bd9Sstevel@tonic-gate }
42207c478bd9Sstevel@tonic-gate 
42217c478bd9Sstevel@tonic-gate /*
42227c478bd9Sstevel@tonic-gate  * Add at front of list
42237c478bd9Sstevel@tonic-gate  */
42247c478bd9Sstevel@tonic-gate static struct rdma_done_list *
42257c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
42267c478bd9Sstevel@tonic-gate {
42277c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
42287c478bd9Sstevel@tonic-gate 
42297c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
42307c478bd9Sstevel@tonic-gate 
42317c478bd9Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
42327c478bd9Sstevel@tonic-gate 	rd->xid = xid;
42337c478bd9Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
42347c478bd9Sstevel@tonic-gate 
42357c478bd9Sstevel@tonic-gate 	rd->prev = NULL;
42367c478bd9Sstevel@tonic-gate 	rd->next = qp->rdlist;
42377c478bd9Sstevel@tonic-gate 	if (qp->rdlist != NULL)
42387c478bd9Sstevel@tonic-gate 		qp->rdlist->prev = rd;
42397c478bd9Sstevel@tonic-gate 	qp->rdlist = rd;
42407c478bd9Sstevel@tonic-gate 
42417c478bd9Sstevel@tonic-gate 	return (rd);
42427c478bd9Sstevel@tonic-gate }
42437c478bd9Sstevel@tonic-gate 
42447c478bd9Sstevel@tonic-gate static void
42457c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
42467c478bd9Sstevel@tonic-gate {
42477c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r;
42487c478bd9Sstevel@tonic-gate 
42497c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
42507c478bd9Sstevel@tonic-gate 
42517c478bd9Sstevel@tonic-gate 	r = rd->next;
42527c478bd9Sstevel@tonic-gate 	if (r != NULL) {
42537c478bd9Sstevel@tonic-gate 		r->prev = rd->prev;
42547c478bd9Sstevel@tonic-gate 	}
42557c478bd9Sstevel@tonic-gate 
42567c478bd9Sstevel@tonic-gate 	r = rd->prev;
42577c478bd9Sstevel@tonic-gate 	if (r != NULL) {
42587c478bd9Sstevel@tonic-gate 		r->next = rd->next;
42597c478bd9Sstevel@tonic-gate 	} else {
42607c478bd9Sstevel@tonic-gate 		qp->rdlist = rd->next;
42617c478bd9Sstevel@tonic-gate 	}
42627c478bd9Sstevel@tonic-gate 
42637c478bd9Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
42647c478bd9Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
42657c478bd9Sstevel@tonic-gate }
42667c478bd9Sstevel@tonic-gate 
42677c478bd9Sstevel@tonic-gate static void
42687c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
42697c478bd9Sstevel@tonic-gate {
42707c478bd9Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
42717c478bd9Sstevel@tonic-gate 
42727c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
42737c478bd9Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
42747c478bd9Sstevel@tonic-gate 		n = r->next;
42757c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, r);
42767c478bd9Sstevel@tonic-gate 	}
42777c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
42787c478bd9Sstevel@tonic-gate }
42797c478bd9Sstevel@tonic-gate 
42807c478bd9Sstevel@tonic-gate static void
42817c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
42827c478bd9Sstevel@tonic-gate {
42837c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
42847c478bd9Sstevel@tonic-gate 
42857c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
42867c478bd9Sstevel@tonic-gate 
42877c478bd9Sstevel@tonic-gate 	while (r) {
42887c478bd9Sstevel@tonic-gate 		if (r->xid == xid) {
42897c478bd9Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
42907c478bd9Sstevel@tonic-gate 			return;
42917c478bd9Sstevel@tonic-gate 		} else {
42927c478bd9Sstevel@tonic-gate 			r = r->next;
42937c478bd9Sstevel@tonic-gate 		}
42947c478bd9Sstevel@tonic-gate 	}
42950a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid,
42960a701b1eSRobert Gordon 	    int, xid);
42977c478bd9Sstevel@tonic-gate }
42987c478bd9Sstevel@tonic-gate 
4299*065714dcSSiddheshwar Mahesh /*
4300*065714dcSSiddheshwar Mahesh  * Expects conn->c_lock to be held by the caller.
4301*065714dcSSiddheshwar Mahesh  */
4302*065714dcSSiddheshwar Mahesh 
4303*065714dcSSiddheshwar Mahesh static void
4304*065714dcSSiddheshwar Mahesh rib_close_a_channel(CONN *conn)
4305*065714dcSSiddheshwar Mahesh {
4306*065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
4307*065714dcSSiddheshwar Mahesh 	qp = ctoqp(conn);
4308*065714dcSSiddheshwar Mahesh 
4309*065714dcSSiddheshwar Mahesh 	if (qp->qp_hdl == NULL) {
4310*065714dcSSiddheshwar Mahesh 		/* channel already freed */
4311*065714dcSSiddheshwar Mahesh 		return;
4312*065714dcSSiddheshwar Mahesh 	}
4313*065714dcSSiddheshwar Mahesh 
4314*065714dcSSiddheshwar Mahesh 	/*
4315*065714dcSSiddheshwar Mahesh 	 * Call ibt_close_rc_channel in blocking mode
4316*065714dcSSiddheshwar Mahesh 	 * with no callbacks.
4317*065714dcSSiddheshwar Mahesh 	 */
4318*065714dcSSiddheshwar Mahesh 	(void) ibt_close_rc_channel(qp->qp_hdl, IBT_NOCALLBACKS,
4319*065714dcSSiddheshwar Mahesh 	    NULL, 0, NULL, NULL, 0);
4320*065714dcSSiddheshwar Mahesh }
43217c478bd9Sstevel@tonic-gate 
43227c478bd9Sstevel@tonic-gate /*
43237c478bd9Sstevel@tonic-gate  * Goes through all connections and closes the channel
43247c478bd9Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
43257c478bd9Sstevel@tonic-gate  * flushed.
43267c478bd9Sstevel@tonic-gate  */
43277c478bd9Sstevel@tonic-gate static void
43287c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
43297c478bd9Sstevel@tonic-gate {
4330*065714dcSSiddheshwar Mahesh 	CONN 		*conn, *tmp;
43317c478bd9Sstevel@tonic-gate 
43327c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
43337c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
43347c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
43357c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
4336*065714dcSSiddheshwar Mahesh 		tmp = conn->c_next;
4337*065714dcSSiddheshwar Mahesh 		if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
4338*065714dcSSiddheshwar Mahesh 
4339*065714dcSSiddheshwar Mahesh 			conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
4340*065714dcSSiddheshwar Mahesh 
43417c478bd9Sstevel@tonic-gate 			/*
43427c478bd9Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
43437c478bd9Sstevel@tonic-gate 			 */
4344*065714dcSSiddheshwar Mahesh 			if (conn->c_state == C_CONNECTED)
43450a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
4346*065714dcSSiddheshwar Mahesh 			mutex_exit(&conn->c_lock);
4347*065714dcSSiddheshwar Mahesh 
4348*065714dcSSiddheshwar Mahesh 			rib_close_a_channel(conn);
4349*065714dcSSiddheshwar Mahesh 
4350*065714dcSSiddheshwar Mahesh 			mutex_enter(&conn->c_lock);
4351*065714dcSSiddheshwar Mahesh 			conn->c_flags &= ~C_CLOSE_PENDING;
4352*065714dcSSiddheshwar Mahesh 			/* Signal a pending rib_disconnect_channel() */
4353*065714dcSSiddheshwar Mahesh 			cv_signal(&conn->c_cv);
43547c478bd9Sstevel@tonic-gate 		}
43557c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4356*065714dcSSiddheshwar Mahesh 		conn = tmp;
43577c478bd9Sstevel@tonic-gate 	}
43587c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
43597c478bd9Sstevel@tonic-gate }
43607c478bd9Sstevel@tonic-gate 
43617c478bd9Sstevel@tonic-gate /*
43627c478bd9Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
43637c478bd9Sstevel@tonic-gate  */
43647c478bd9Sstevel@tonic-gate static void
43657c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
43667c478bd9Sstevel@tonic-gate {
43677c478bd9Sstevel@tonic-gate 	CONN 		*conn;
43687c478bd9Sstevel@tonic-gate 
43697c478bd9Sstevel@tonic-gate top:
43707c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
43717c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
43727c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
43737c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
43747c478bd9Sstevel@tonic-gate 
43757c478bd9Sstevel@tonic-gate 		/*
43767c478bd9Sstevel@tonic-gate 		 * At this point connection is either in ERROR
43777c478bd9Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
43787c478bd9Sstevel@tonic-gate 		 * then some other thread is culling that connection.
43797c478bd9Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
43807c478bd9Sstevel@tonic-gate 		 */
43817c478bd9Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
43827c478bd9Sstevel@tonic-gate 		    conn->c_state != C_DISCONN_PEND) {
43837c478bd9Sstevel@tonic-gate 			/*
43847c478bd9Sstevel@tonic-gate 			 * Cull the connection
43857c478bd9Sstevel@tonic-gate 			 */
43867c478bd9Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
43877c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
43887c478bd9Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
43897c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
43907c478bd9Sstevel@tonic-gate 			goto top;
43917c478bd9Sstevel@tonic-gate 		} else {
43927c478bd9Sstevel@tonic-gate 			/*
43937c478bd9Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
43947c478bd9Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
43957c478bd9Sstevel@tonic-gate 			 */
43967c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
43977c478bd9Sstevel@tonic-gate 		}
43987c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
43997c478bd9Sstevel@tonic-gate 	}
44007c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
44017c478bd9Sstevel@tonic-gate 
44027c478bd9Sstevel@tonic-gate 	/*
44037c478bd9Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
44047c478bd9Sstevel@tonic-gate 	 */
44057c478bd9Sstevel@tonic-gate }
44067c478bd9Sstevel@tonic-gate 
44077c478bd9Sstevel@tonic-gate /*
4408*065714dcSSiddheshwar Mahesh  * Free all the HCA resources and close
4409*065714dcSSiddheshwar Mahesh  * the hca.
4410*065714dcSSiddheshwar Mahesh  */
4411*065714dcSSiddheshwar Mahesh 
4412*065714dcSSiddheshwar Mahesh static void
4413*065714dcSSiddheshwar Mahesh rib_free_hca(rib_hca_t *hca)
4414*065714dcSSiddheshwar Mahesh {
4415*065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
4416*065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
4417*065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
4418*065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
4419*065714dcSSiddheshwar Mahesh 
4420*065714dcSSiddheshwar Mahesh 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
4421*065714dcSSiddheshwar Mahesh 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
4422*065714dcSSiddheshwar Mahesh 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
4423*065714dcSSiddheshwar Mahesh 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
4424*065714dcSSiddheshwar Mahesh 
4425*065714dcSSiddheshwar Mahesh 	rib_rbufpool_destroy(hca, RECV_BUFFER);
4426*065714dcSSiddheshwar Mahesh 	rib_rbufpool_destroy(hca, SEND_BUFFER);
4427*065714dcSSiddheshwar Mahesh 	rib_destroy_cache(hca);
4428*065714dcSSiddheshwar Mahesh 	if (rib_mod.rdma_count == 0)
4429*065714dcSSiddheshwar Mahesh 		rdma_unregister_mod(&rib_mod);
4430*065714dcSSiddheshwar Mahesh 	(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
4431*065714dcSSiddheshwar Mahesh 	(void) ibt_close_hca(hca->hca_hdl);
4432*065714dcSSiddheshwar Mahesh 	hca->hca_hdl = NULL;
4433*065714dcSSiddheshwar Mahesh }
4434*065714dcSSiddheshwar Mahesh 
4435*065714dcSSiddheshwar Mahesh /*
44367c478bd9Sstevel@tonic-gate  * Cleans and closes up all uses of the HCA
44377c478bd9Sstevel@tonic-gate  */
44387c478bd9Sstevel@tonic-gate static void
44397c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca)
44407c478bd9Sstevel@tonic-gate {
44417c478bd9Sstevel@tonic-gate 
44427c478bd9Sstevel@tonic-gate 	/*
44437c478bd9Sstevel@tonic-gate 	 * Stop all services on the HCA
44447c478bd9Sstevel@tonic-gate 	 * Go through cl_conn_list and close all rc_channels
44457c478bd9Sstevel@tonic-gate 	 * Go through svr_conn_list and close all rc_channels
44467c478bd9Sstevel@tonic-gate 	 * Free connections whose c_ref has dropped to 0
44477c478bd9Sstevel@tonic-gate 	 * Destroy all CQs
44487c478bd9Sstevel@tonic-gate 	 * Deregister and released all buffer pool memory after all
44497c478bd9Sstevel@tonic-gate 	 * connections are destroyed
44507c478bd9Sstevel@tonic-gate 	 * Free the protection domain
44517c478bd9Sstevel@tonic-gate 	 * ibt_close_hca()
44527c478bd9Sstevel@tonic-gate 	 */
44537c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_WRITER);
44547c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_DETACHED) {
44557c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
44567c478bd9Sstevel@tonic-gate 		return;
44577c478bd9Sstevel@tonic-gate 	}
44587c478bd9Sstevel@tonic-gate 
44597c478bd9Sstevel@tonic-gate 	hca->state = HCA_DETACHED;
44607c478bd9Sstevel@tonic-gate 	rib_stat->nhca_inited--;
44617c478bd9Sstevel@tonic-gate 
44627c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
44637c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
44647c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
446551f34d4bSRajkumar Sivaprakasam 
446651f34d4bSRajkumar Sivaprakasam 	rib_mod.rdma_count--;
446751f34d4bSRajkumar Sivaprakasam 
44687c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
44697c478bd9Sstevel@tonic-gate 
447051f34d4bSRajkumar Sivaprakasam 	rib_purge_connlist(&hca->cl_conn_list);
447151f34d4bSRajkumar Sivaprakasam 	rib_purge_connlist(&hca->srv_conn_list);
447251f34d4bSRajkumar Sivaprakasam 
447351f34d4bSRajkumar Sivaprakasam 	if (stats_enabled) {
447451f34d4bSRajkumar Sivaprakasam 		kstat_delete_byname_zone("unix", 0, "rpcib_cache",
447551f34d4bSRajkumar Sivaprakasam 		    GLOBAL_ZONEID);
447651f34d4bSRajkumar Sivaprakasam 	}
44777c478bd9Sstevel@tonic-gate 
44787c478bd9Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
44797c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
44807c478bd9Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
44817c478bd9Sstevel@tonic-gate 	    hca->cl_conn_list.conn_hd == NULL) {
44827c478bd9Sstevel@tonic-gate 		/*
44837c478bd9Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
44847c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
44857c478bd9Sstevel@tonic-gate 		 */
4486*065714dcSSiddheshwar Mahesh 		rib_free_hca(hca);
44877c478bd9Sstevel@tonic-gate 	}
44887c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
44897c478bd9Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
44907c478bd9Sstevel@tonic-gate 
44917c478bd9Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
44927c478bd9Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
44937c478bd9Sstevel@tonic-gate 		while (hca->inuse)
44947c478bd9Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
44957c478bd9Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
449651f34d4bSRajkumar Sivaprakasam 
4497*065714dcSSiddheshwar Mahesh 		rib_free_hca(hca);
4498*065714dcSSiddheshwar Mahesh 	}
449951f34d4bSRajkumar Sivaprakasam 
4500*065714dcSSiddheshwar Mahesh 	if (hca->cleanup_helper != NULL) {
4501*065714dcSSiddheshwar Mahesh 		ddi_taskq_destroy(hca->cleanup_helper);
4502*065714dcSSiddheshwar Mahesh 		hca->cleanup_helper = NULL;
45037c478bd9Sstevel@tonic-gate 	}
45047c478bd9Sstevel@tonic-gate }
45050a701b1eSRobert Gordon 
45060a701b1eSRobert Gordon static void
45070a701b1eSRobert Gordon rib_server_side_cache_reclaim(void *argp)
45080a701b1eSRobert Gordon {
45090a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
45100a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
45110a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
45120a701b1eSRobert Gordon 
45130a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
45140a701b1eSRobert Gordon 	rcas = avl_first(&hca->avl_tree);
45150a701b1eSRobert Gordon 	if (rcas != NULL)
45160a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
45170a701b1eSRobert Gordon 
45180a701b1eSRobert Gordon 	while (rcas != NULL) {
45190a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
45200a701b1eSRobert Gordon 			rcas->elements--;
45210a701b1eSRobert Gordon 			rib_total_buffers --;
45220a701b1eSRobert Gordon 			rb = rcas->r.forw;
45230a701b1eSRobert Gordon 			remque(rb);
45240a701b1eSRobert Gordon 			if (rb->registered)
45250a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
45260a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
45270a701b1eSRobert Gordon 			cache_allocation -= rb->lrc_len;
45280a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
45290a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
45300a701b1eSRobert Gordon 		}
45310a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
45320a701b1eSRobert Gordon 		kmem_cache_free(hca->server_side_cache, rcas);
45330a701b1eSRobert Gordon 		rcas = avl_first(&hca->avl_tree);
45340a701b1eSRobert Gordon 		if (rcas != NULL)
45350a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
45360a701b1eSRobert Gordon 	}
45370a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
45380a701b1eSRobert Gordon }
45390a701b1eSRobert Gordon 
45400a701b1eSRobert Gordon static void
45410a701b1eSRobert Gordon rib_server_side_cache_cleanup(void *argp)
45420a701b1eSRobert Gordon {
45430a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
45440a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
45450a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
45460a701b1eSRobert Gordon 
45470a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
45480a701b1eSRobert Gordon 	if (cache_allocation < cache_limit) {
45490a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
45500a701b1eSRobert Gordon 		return;
45510a701b1eSRobert Gordon 	}
45520a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
45530a701b1eSRobert Gordon 
45540a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
45550a701b1eSRobert Gordon 	rcas = avl_last(&hca->avl_tree);
45560a701b1eSRobert Gordon 	if (rcas != NULL)
45570a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
45580a701b1eSRobert Gordon 
45590a701b1eSRobert Gordon 	while (rcas != NULL) {
45600a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
45610a701b1eSRobert Gordon 			rcas->elements--;
45620a701b1eSRobert Gordon 			rib_total_buffers --;
45630a701b1eSRobert Gordon 			rb = rcas->r.forw;
45640a701b1eSRobert Gordon 			remque(rb);
45650a701b1eSRobert Gordon 			if (rb->registered)
45660a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
45670a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
45680a701b1eSRobert Gordon 			cache_allocation -= rb->lrc_len;
45690a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
45700a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
45710a701b1eSRobert Gordon 		}
45720a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
457351f34d4bSRajkumar Sivaprakasam 		if (hca->server_side_cache) {
45740a701b1eSRobert Gordon 			kmem_cache_free(hca->server_side_cache, rcas);
457551f34d4bSRajkumar Sivaprakasam 		}
45760a701b1eSRobert Gordon 		if ((cache_allocation) < cache_limit) {
45770a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
45780a701b1eSRobert Gordon 			return;
45790a701b1eSRobert Gordon 		}
45800a701b1eSRobert Gordon 
45810a701b1eSRobert Gordon 		rcas = avl_last(&hca->avl_tree);
45820a701b1eSRobert Gordon 		if (rcas != NULL)
45830a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
45840a701b1eSRobert Gordon 	}
45850a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
45860a701b1eSRobert Gordon }
45870a701b1eSRobert Gordon 
45880a701b1eSRobert Gordon static int
45890a701b1eSRobert Gordon avl_compare(const void *t1, const void *t2)
45900a701b1eSRobert Gordon {
45910a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
45920a701b1eSRobert Gordon 		return (0);
45930a701b1eSRobert Gordon 
45940a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
45950a701b1eSRobert Gordon 		return (-1);
45960a701b1eSRobert Gordon 
45970a701b1eSRobert Gordon 	return (1);
45980a701b1eSRobert Gordon }
45990a701b1eSRobert Gordon 
46000a701b1eSRobert Gordon static void
46010a701b1eSRobert Gordon rib_destroy_cache(rib_hca_t *hca)
46020a701b1eSRobert Gordon {
460351f34d4bSRajkumar Sivaprakasam 	if (hca->avl_init) {
460451f34d4bSRajkumar Sivaprakasam 		rib_server_side_cache_reclaim((void *)hca);
460551f34d4bSRajkumar Sivaprakasam 		if (hca->server_side_cache) {
46060a701b1eSRobert Gordon 			kmem_cache_destroy(hca->server_side_cache);
460751f34d4bSRajkumar Sivaprakasam 			hca->server_side_cache = NULL;
460851f34d4bSRajkumar Sivaprakasam 		}
46090a701b1eSRobert Gordon 		avl_destroy(&hca->avl_tree);
46100a701b1eSRobert Gordon 		mutex_destroy(&hca->cache_allocation);
46110a701b1eSRobert Gordon 		rw_destroy(&hca->avl_rw_lock);
46120a701b1eSRobert Gordon 	}
46130a701b1eSRobert Gordon 	hca->avl_init = FALSE;
46140a701b1eSRobert Gordon }
46150a701b1eSRobert Gordon 
46160a701b1eSRobert Gordon static void
46170a701b1eSRobert Gordon rib_force_cleanup(void *hca)
46180a701b1eSRobert Gordon {
4619*065714dcSSiddheshwar Mahesh 	if (((rib_hca_t *)hca)->cleanup_helper != NULL)
46200a701b1eSRobert Gordon 		(void) ddi_taskq_dispatch(
4621*065714dcSSiddheshwar Mahesh 		    ((rib_hca_t *)hca)->cleanup_helper,
46220a701b1eSRobert Gordon 		    rib_server_side_cache_cleanup,
46230a701b1eSRobert Gordon 		    (void *)hca, DDI_NOSLEEP);
46240a701b1eSRobert Gordon }
46250a701b1eSRobert Gordon 
46260a701b1eSRobert Gordon static rib_lrc_entry_t *
46270a701b1eSRobert Gordon rib_get_cache_buf(CONN *conn, uint32_t len)
46280a701b1eSRobert Gordon {
46290a701b1eSRobert Gordon 	cache_avl_struct_t	cas, *rcas;
46300a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
46310a701b1eSRobert Gordon 	rib_lrc_entry_t *reply_buf;
46320a701b1eSRobert Gordon 	avl_index_t where = NULL;
46330a701b1eSRobert Gordon 	uint64_t c_alloc = 0;
46340a701b1eSRobert Gordon 
46350a701b1eSRobert Gordon 	if (!hca->avl_init)
46360a701b1eSRobert Gordon 		goto  error_alloc;
46370a701b1eSRobert Gordon 
46380a701b1eSRobert Gordon 	cas.len = len;
46390a701b1eSRobert Gordon 
46400a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
46410a701b1eSRobert Gordon 
46420a701b1eSRobert Gordon 	mutex_enter(&hca->cache_allocation);
46430a701b1eSRobert Gordon 	c_alloc = cache_allocation;
46440a701b1eSRobert Gordon 	mutex_exit(&hca->cache_allocation);
46450a701b1eSRobert Gordon 
46460a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas,
46470a701b1eSRobert Gordon 	    &where)) == NULL) {
46480a701b1eSRobert Gordon 		/* Am I above the cache limit */
46490a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
46500a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
46510a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
46520a701b1eSRobert Gordon 			cache_misses_above_the_limit ++;
46530a701b1eSRobert Gordon 
46540a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
46550a701b1eSRobert Gordon 			goto error_alloc;
46560a701b1eSRobert Gordon 		}
46570a701b1eSRobert Gordon 
46580a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
46590a701b1eSRobert Gordon 		rw_enter(&hca->avl_rw_lock, RW_WRITER);
46600a701b1eSRobert Gordon 
46610a701b1eSRobert Gordon 		/* Recheck to make sure no other thread added the entry in */
46620a701b1eSRobert Gordon 		if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,
46630a701b1eSRobert Gordon 		    &cas, &where)) == NULL) {
46640a701b1eSRobert Gordon 			/* Allocate an avl tree entry */
46650a701b1eSRobert Gordon 			rcas = (cache_avl_struct_t *)
46660a701b1eSRobert Gordon 			    kmem_cache_alloc(hca->server_side_cache, KM_SLEEP);
46670a701b1eSRobert Gordon 
46680a701b1eSRobert Gordon 			bzero(rcas, sizeof (cache_avl_struct_t));
46690a701b1eSRobert Gordon 			rcas->elements = 0;
46700a701b1eSRobert Gordon 			rcas->r.forw = &rcas->r;
46710a701b1eSRobert Gordon 			rcas->r.back = &rcas->r;
46720a701b1eSRobert Gordon 			rcas->len = len;
46730a701b1eSRobert Gordon 			mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
46740a701b1eSRobert Gordon 			avl_insert(&hca->avl_tree, rcas, where);
46750a701b1eSRobert Gordon 		}
46760a701b1eSRobert Gordon 	}
46770a701b1eSRobert Gordon 
46780a701b1eSRobert Gordon 	mutex_enter(&rcas->node_lock);
46790a701b1eSRobert Gordon 
46800a701b1eSRobert Gordon 	if (rcas->r.forw != &rcas->r && rcas->elements > 0) {
46810a701b1eSRobert Gordon 		rib_total_buffers--;
46820a701b1eSRobert Gordon 		cache_hits++;
46830a701b1eSRobert Gordon 		reply_buf = rcas->r.forw;
46840a701b1eSRobert Gordon 		remque(reply_buf);
46850a701b1eSRobert Gordon 		rcas->elements--;
46860a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
46870a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
46880a701b1eSRobert Gordon 		mutex_enter(&hca->cache_allocation);
46890a701b1eSRobert Gordon 		cache_allocation -= len;
46900a701b1eSRobert Gordon 		mutex_exit(&hca->cache_allocation);
46910a701b1eSRobert Gordon 	} else {
46920a701b1eSRobert Gordon 		/* Am I above the cache limit */
46930a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
46940a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
46950a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
46960a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
46970a701b1eSRobert Gordon 			cache_misses_above_the_limit ++;
46980a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
46990a701b1eSRobert Gordon 			goto error_alloc;
47000a701b1eSRobert Gordon 		}
47010a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
47020a701b1eSRobert Gordon 		cache_misses ++;
47030a701b1eSRobert Gordon 		/* Allocate a reply_buf entry */
47040a701b1eSRobert Gordon 		reply_buf = (rib_lrc_entry_t *)
47050a701b1eSRobert Gordon 		    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
47060a701b1eSRobert Gordon 		bzero(reply_buf, sizeof (rib_lrc_entry_t));
47070a701b1eSRobert Gordon 		reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
47080a701b1eSRobert Gordon 		reply_buf->lrc_len  = len;
47090a701b1eSRobert Gordon 		reply_buf->registered = FALSE;
47100a701b1eSRobert Gordon 		reply_buf->avl_node = (void *)rcas;
47110a701b1eSRobert Gordon 	}
47120a701b1eSRobert Gordon 
47130a701b1eSRobert Gordon 	return (reply_buf);
47140a701b1eSRobert Gordon 
47150a701b1eSRobert Gordon error_alloc:
47160a701b1eSRobert Gordon 	reply_buf = (rib_lrc_entry_t *)
47170a701b1eSRobert Gordon 	    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
47180a701b1eSRobert Gordon 	bzero(reply_buf, sizeof (rib_lrc_entry_t));
47190a701b1eSRobert Gordon 	reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
47200a701b1eSRobert Gordon 	reply_buf->lrc_len = len;
47210a701b1eSRobert Gordon 	reply_buf->registered = FALSE;
47220a701b1eSRobert Gordon 	reply_buf->avl_node = NULL;
47230a701b1eSRobert Gordon 
47240a701b1eSRobert Gordon 	return (reply_buf);
47250a701b1eSRobert Gordon }
47260a701b1eSRobert Gordon 
47270a701b1eSRobert Gordon /*
47280a701b1eSRobert Gordon  * Return a pre-registered back to the cache (without
47290a701b1eSRobert Gordon  * unregistering the buffer)..
47300a701b1eSRobert Gordon  */
47310a701b1eSRobert Gordon 
47320a701b1eSRobert Gordon static void
47330a701b1eSRobert Gordon rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
47340a701b1eSRobert Gordon {
47350a701b1eSRobert Gordon 	cache_avl_struct_t    cas, *rcas;
47360a701b1eSRobert Gordon 	avl_index_t where = NULL;
47370a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
47380a701b1eSRobert Gordon 
47390a701b1eSRobert Gordon 	if (!hca->avl_init)
47400a701b1eSRobert Gordon 		goto  error_free;
47410a701b1eSRobert Gordon 
47420a701b1eSRobert Gordon 	cas.len = reg_buf->lrc_len;
47430a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
47440a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)
47450a701b1eSRobert Gordon 	    avl_find(&hca->avl_tree, &cas, &where)) == NULL) {
47460a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
47470a701b1eSRobert Gordon 		goto error_free;
47480a701b1eSRobert Gordon 	} else {
47490a701b1eSRobert Gordon 		rib_total_buffers ++;
47500a701b1eSRobert Gordon 		cas.len = reg_buf->lrc_len;
47510a701b1eSRobert Gordon 		mutex_enter(&rcas->node_lock);
47520a701b1eSRobert Gordon 		insque(reg_buf, &rcas->r);
47530a701b1eSRobert Gordon 		rcas->elements ++;
47540a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
47550a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
47560a701b1eSRobert Gordon 		mutex_enter(&hca->cache_allocation);
47570a701b1eSRobert Gordon 		cache_allocation += cas.len;
47580a701b1eSRobert Gordon 		mutex_exit(&hca->cache_allocation);
47590a701b1eSRobert Gordon 	}
47600a701b1eSRobert Gordon 
47610a701b1eSRobert Gordon 	return;
47620a701b1eSRobert Gordon 
47630a701b1eSRobert Gordon error_free:
47640a701b1eSRobert Gordon 
47650a701b1eSRobert Gordon 	if (reg_buf->registered)
47660a701b1eSRobert Gordon 		(void) rib_deregistermem_via_hca(hca,
47670a701b1eSRobert Gordon 		    reg_buf->lrc_buf, reg_buf->lrc_mhandle);
47680a701b1eSRobert Gordon 	kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len);
47690a701b1eSRobert Gordon 	kmem_free(reg_buf, sizeof (rib_lrc_entry_t));
47700a701b1eSRobert Gordon }
47710a701b1eSRobert Gordon 
47720a701b1eSRobert Gordon static rdma_stat
47730a701b1eSRobert Gordon rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
47740a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle)
47750a701b1eSRobert Gordon {
47760a701b1eSRobert Gordon 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
47770a701b1eSRobert Gordon 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
47780a701b1eSRobert Gordon 	rdma_stat	status;
47790a701b1eSRobert Gordon 
47800a701b1eSRobert Gordon 
47810a701b1eSRobert Gordon 	/*
47820a701b1eSRobert Gordon 	 * Note: ALL buffer pools use the same memory type RDMARW.
47830a701b1eSRobert Gordon 	 */
47840a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
47850a701b1eSRobert Gordon 	if (status == RDMA_SUCCESS) {
47860a701b1eSRobert Gordon 		buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl;
47870a701b1eSRobert Gordon 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
47880a701b1eSRobert Gordon 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
47890a701b1eSRobert Gordon 	} else {
47900a701b1eSRobert Gordon 		buf_handle->mrc_linfo = NULL;
47910a701b1eSRobert Gordon 		buf_handle->mrc_lmr = 0;
47920a701b1eSRobert Gordon 		buf_handle->mrc_rmr = 0;
47930a701b1eSRobert Gordon 	}
47940a701b1eSRobert Gordon 	return (status);
47950a701b1eSRobert Gordon }
47960a701b1eSRobert Gordon 
47970a701b1eSRobert Gordon /* ARGSUSED */
47980a701b1eSRobert Gordon static rdma_stat
47990a701b1eSRobert Gordon rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
48000a701b1eSRobert Gordon     struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
48010a701b1eSRobert Gordon {
48020a701b1eSRobert Gordon 
48030a701b1eSRobert Gordon 	(void) rib_deregistermem_via_hca(hca, buf, buf_handle);
48040a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
48050a701b1eSRobert Gordon }
48060a701b1eSRobert Gordon 
48070a701b1eSRobert Gordon /* ARGSUSED */
48080a701b1eSRobert Gordon static rdma_stat
48090a701b1eSRobert Gordon rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
48100a701b1eSRobert Gordon {
48110a701b1eSRobert Gordon 
48120a701b1eSRobert Gordon 	(void) ibt_deregister_mr(hca->hca_hdl,
48130a701b1eSRobert Gordon 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
48140a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
48150a701b1eSRobert Gordon }
48160a701b1eSRobert Gordon 
48170a701b1eSRobert Gordon /*
4818e11c3f44Smeem  * Check if the IP interface named by `lifrp' is RDMA-capable.
48190a701b1eSRobert Gordon  */
4820e11c3f44Smeem static boolean_t
4821e11c3f44Smeem rpcib_rdma_capable_interface(struct lifreq *lifrp)
48220a701b1eSRobert Gordon {
4823e11c3f44Smeem 	char ifname[LIFNAMSIZ];
4824e11c3f44Smeem 	char *cp;
48250a701b1eSRobert Gordon 
4826e11c3f44Smeem 	if (lifrp->lifr_type == IFT_IB)
4827e11c3f44Smeem 		return (B_TRUE);
48280a701b1eSRobert Gordon 
48290a701b1eSRobert Gordon 	/*
4830e11c3f44Smeem 	 * Strip off the logical interface portion before getting
4831e11c3f44Smeem 	 * intimate with the name.
48320a701b1eSRobert Gordon 	 */
4833e11c3f44Smeem 	(void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
4834e11c3f44Smeem 	if ((cp = strchr(ifname, ':')) != NULL)
4835e11c3f44Smeem 		*cp = '\0';
48360a701b1eSRobert Gordon 
4837e11c3f44Smeem 	return (strcmp("lo0", ifname) == 0);
48380a701b1eSRobert Gordon }
48390a701b1eSRobert Gordon 
48400a701b1eSRobert Gordon static int
4841e11c3f44Smeem rpcib_do_ip_ioctl(int cmd, int len, void *arg)
48420a701b1eSRobert Gordon {
48430a701b1eSRobert Gordon 	vnode_t *kvp, *vp;
48440a701b1eSRobert Gordon 	TIUSER  *tiptr;
48450a701b1eSRobert Gordon 	struct  strioctl iocb;
48460a701b1eSRobert Gordon 	k_sigset_t smask;
48470a701b1eSRobert Gordon 	int	err = 0;
48480a701b1eSRobert Gordon 
4849e11c3f44Smeem 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kvp) == 0) {
4850e11c3f44Smeem 		if (t_kopen(NULL, kvp->v_rdev, FREAD|FWRITE,
48510a701b1eSRobert Gordon 		    &tiptr, CRED()) == 0) {
48520a701b1eSRobert Gordon 			vp = tiptr->fp->f_vnode;
48530a701b1eSRobert Gordon 		} else {
48540a701b1eSRobert Gordon 			VN_RELE(kvp);
48550a701b1eSRobert Gordon 			return (EPROTO);
48560a701b1eSRobert Gordon 		}
48570a701b1eSRobert Gordon 	} else {
48580a701b1eSRobert Gordon 		return (EPROTO);
48590a701b1eSRobert Gordon 	}
48600a701b1eSRobert Gordon 
48610a701b1eSRobert Gordon 	iocb.ic_cmd = cmd;
48620a701b1eSRobert Gordon 	iocb.ic_timout = 0;
48630a701b1eSRobert Gordon 	iocb.ic_len = len;
4864e11c3f44Smeem 	iocb.ic_dp = (caddr_t)arg;
48650a701b1eSRobert Gordon 	sigintr(&smask, 0);
48660a701b1eSRobert Gordon 	err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
48670a701b1eSRobert Gordon 	sigunintr(&smask);
48680a701b1eSRobert Gordon 	(void) t_kclose(tiptr, 0);
48690a701b1eSRobert Gordon 	VN_RELE(kvp);
48700a701b1eSRobert Gordon 	return (err);
48710a701b1eSRobert Gordon }
48720a701b1eSRobert Gordon 
4873e11c3f44Smeem /*
4874e11c3f44Smeem  * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
4875e11c3f44Smeem  * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
4876e11c3f44Smeem  */
4877e11c3f44Smeem static int
4878e11c3f44Smeem rpcib_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep)
4879e11c3f44Smeem {
4880e11c3f44Smeem 	int err;
4881e11c3f44Smeem 	struct lifnum lifn;
4882e11c3f44Smeem 
4883e11c3f44Smeem 	bzero(&lifn, sizeof (struct lifnum));
4884e11c3f44Smeem 	lifn.lifn_family = AF_UNSPEC;
4885e11c3f44Smeem 
4886e11c3f44Smeem 	err = rpcib_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
4887e11c3f44Smeem 	if (err != 0)
4888e11c3f44Smeem 		return (err);
4889e11c3f44Smeem 
4890e11c3f44Smeem 	/*
4891e11c3f44Smeem 	 * Pad the interface count to account for additional interfaces that
4892e11c3f44Smeem 	 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
4893e11c3f44Smeem 	 */
4894e11c3f44Smeem 	lifn.lifn_count += 4;
4895e11c3f44Smeem 
4896e11c3f44Smeem 	bzero(lifcp, sizeof (struct lifconf));
4897e11c3f44Smeem 	lifcp->lifc_family = AF_UNSPEC;
4898e11c3f44Smeem 	lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
4899e11c3f44Smeem 	lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
4900e11c3f44Smeem 
4901e11c3f44Smeem 	err = rpcib_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
4902e11c3f44Smeem 	if (err != 0) {
4903e11c3f44Smeem 		kmem_free(lifcp->lifc_buf, *bufsizep);
4904e11c3f44Smeem 		return (err);
49050a701b1eSRobert Gordon 	}
4906e11c3f44Smeem 	return (0);
49070a701b1eSRobert Gordon }
49080a701b1eSRobert Gordon 
49090a701b1eSRobert Gordon static boolean_t
4910e11c3f44Smeem rpcib_get_ib_addresses(rpcib_ipaddrs_t *addrs4, rpcib_ipaddrs_t *addrs6)
49110a701b1eSRobert Gordon {
4912e11c3f44Smeem 	uint_t i, nifs;
4913e11c3f44Smeem 	uint_t bufsize;
4914e11c3f44Smeem 	struct lifconf lifc;
4915e11c3f44Smeem 	struct lifreq *lifrp;
4916e11c3f44Smeem 	struct sockaddr_in *sinp;
4917e11c3f44Smeem 	struct sockaddr_in6 *sin6p;
49180a701b1eSRobert Gordon 
4919e11c3f44Smeem 	bzero(addrs4, sizeof (rpcib_ipaddrs_t));
4920e11c3f44Smeem 	bzero(addrs6, sizeof (rpcib_ipaddrs_t));
49210a701b1eSRobert Gordon 
4922e11c3f44Smeem 	if (rpcib_do_lifconf(&lifc, &bufsize) != 0)
4923e11c3f44Smeem 		return (B_FALSE);
4924e11c3f44Smeem 
4925e11c3f44Smeem 	if ((nifs = lifc.lifc_len / sizeof (struct lifreq)) == 0) {
4926e11c3f44Smeem 		kmem_free(lifc.lifc_buf, bufsize);
4927e11c3f44Smeem 		return (B_FALSE);
49280a701b1eSRobert Gordon 	}
49290a701b1eSRobert Gordon 
4930e11c3f44Smeem 	/*
4931e11c3f44Smeem 	 * Worst case is that all of the addresses are IB-capable and have
4932e11c3f44Smeem 	 * the same address family, so size our buffers accordingly.
4933e11c3f44Smeem 	 */
4934e11c3f44Smeem 	addrs4->ri_size = nifs * sizeof (struct sockaddr_in);
4935e11c3f44Smeem 	addrs4->ri_list = kmem_zalloc(addrs4->ri_size, KM_SLEEP);
4936e11c3f44Smeem 	addrs6->ri_size = nifs * sizeof (struct sockaddr_in6);
4937e11c3f44Smeem 	addrs6->ri_list = kmem_zalloc(addrs6->ri_size, KM_SLEEP);
49380a701b1eSRobert Gordon 
4939e11c3f44Smeem 	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
4940e11c3f44Smeem 		if (!rpcib_rdma_capable_interface(lifrp))
4941e11c3f44Smeem 			continue;
4942e11c3f44Smeem 
4943e11c3f44Smeem 		if (lifrp->lifr_addr.ss_family == AF_INET) {
4944e11c3f44Smeem 			sinp = addrs4->ri_list;
4945e11c3f44Smeem 			bcopy(&lifrp->lifr_addr, &sinp[addrs4->ri_count++],
4946e11c3f44Smeem 			    sizeof (struct sockaddr_in));
4947e11c3f44Smeem 		} else if (lifrp->lifr_addr.ss_family == AF_INET6) {
4948e11c3f44Smeem 			sin6p = addrs6->ri_list;
4949e11c3f44Smeem 			bcopy(&lifrp->lifr_addr, &sin6p[addrs6->ri_count++],
4950e11c3f44Smeem 			    sizeof (struct sockaddr_in6));
4951e11c3f44Smeem 		}
49520a701b1eSRobert Gordon 	}
49530a701b1eSRobert Gordon 
4954e11c3f44Smeem 	kmem_free(lifc.lifc_buf, bufsize);
4955e11c3f44Smeem 	return (B_TRUE);
49560a701b1eSRobert Gordon }
49570a701b1eSRobert Gordon 
49580a701b1eSRobert Gordon /* ARGSUSED */
49590a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *ksp, int rw) {
49600a701b1eSRobert Gordon 
49610a701b1eSRobert Gordon 	if (KSTAT_WRITE == rw) {
49620a701b1eSRobert Gordon 		return (EACCES);
49630a701b1eSRobert Gordon 	}
49640a701b1eSRobert Gordon 	rpcib_kstat.cache_limit.value.ui64 =
49650a701b1eSRobert Gordon 	    (uint64_t)cache_limit;
49660a701b1eSRobert Gordon 	rpcib_kstat.cache_allocation.value.ui64 =
49670a701b1eSRobert Gordon 	    (uint64_t)cache_allocation;
49680a701b1eSRobert Gordon 	rpcib_kstat.cache_hits.value.ui64 =
49690a701b1eSRobert Gordon 	    (uint64_t)cache_hits;
49700a701b1eSRobert Gordon 	rpcib_kstat.cache_misses.value.ui64 =
49710a701b1eSRobert Gordon 	    (uint64_t)cache_misses;
49720a701b1eSRobert Gordon 	rpcib_kstat.cache_misses_above_the_limit.value.ui64 =
49730a701b1eSRobert Gordon 	    (uint64_t)cache_misses_above_the_limit;
49740a701b1eSRobert Gordon 	return (0);
49750a701b1eSRobert Gordon }
4976