xref: /titanic_53/usr/src/uts/common/rpc/rpcib.c (revision 0a4b08109ed53cd50c90d18c62198d62eec19da0)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50a701b1eSRobert Gordon  * Common Development and Distribution License (the "License").
60a701b1eSRobert Gordon  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*0a4b0810SKaren Rochford  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
250a701b1eSRobert Gordon /*
260a701b1eSRobert Gordon  * Copyright (c) 2007, The Ohio State University. All rights reserved.
270a701b1eSRobert Gordon  *
280a701b1eSRobert Gordon  * Portions of this source code is developed by the team members of
290a701b1eSRobert Gordon  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
300a701b1eSRobert Gordon  * headed by Professor Dhabaleswar K. (DK) Panda.
310a701b1eSRobert Gordon  *
320a701b1eSRobert Gordon  * Acknowledgements to contributions from developors:
330a701b1eSRobert Gordon  *   Ranjit Noronha: noronha@cse.ohio-state.edu
340a701b1eSRobert Gordon  *   Lei Chai      : chail@cse.ohio-state.edu
350a701b1eSRobert Gordon  *   Weikuan Yu    : yuw@cse.ohio-state.edu
360a701b1eSRobert Gordon  *
370a701b1eSRobert Gordon  */
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate /*
407c478bd9Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
417c478bd9Sstevel@tonic-gate  * interaction with IBTF.
427c478bd9Sstevel@tonic-gate  */
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate #include <sys/param.h>
457c478bd9Sstevel@tonic-gate #include <sys/types.h>
467c478bd9Sstevel@tonic-gate #include <sys/user.h>
477c478bd9Sstevel@tonic-gate #include <sys/systm.h>
487c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
497c478bd9Sstevel@tonic-gate #include <sys/proc.h>
507c478bd9Sstevel@tonic-gate #include <sys/socket.h>
517c478bd9Sstevel@tonic-gate #include <sys/file.h>
527c478bd9Sstevel@tonic-gate #include <sys/stream.h>
537c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
547c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
557c478bd9Sstevel@tonic-gate #include <sys/errno.h>
567c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
577c478bd9Sstevel@tonic-gate #include <sys/debug.h>
587c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
597c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
607c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
617c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
627c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
637c478bd9Sstevel@tonic-gate #include <sys/time.h>
647c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
657c478bd9Sstevel@tonic-gate #include <sys/callb.h>
667c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
677c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
680a701b1eSRobert Gordon #include <sys/sdt.h>
697c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
707c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
717c478bd9Sstevel@tonic-gate #include <rpc/ib.h>
727c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
737c478bd9Sstevel@tonic-gate #include <sys/kstr.h>
747c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
757c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
767c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
777c478bd9Sstevel@tonic-gate #include <net/if.h>
78e11c3f44Smeem #include <net/if_types.h>
797c478bd9Sstevel@tonic-gate #include <sys/cred.h>
800a701b1eSRobert Gordon #include <rpc/rpc_rdma.h>
810a701b1eSRobert Gordon #include <nfs/nfs.h>
820a701b1eSRobert Gordon #include <sys/atomic.h>
830a701b1eSRobert Gordon 
84f837ee4aSSiddheshwar Mahesh #define	NFS_RDMA_PORT	20049
85f837ee4aSSiddheshwar Mahesh 
867c478bd9Sstevel@tonic-gate 
87e11c3f44Smeem /*
88214ae7d0SSiddheshwar Mahesh  * Convenience structures for connection management
89e11c3f44Smeem  */
90e11c3f44Smeem typedef struct rpcib_ipaddrs {
91e11c3f44Smeem 	void	*ri_list;	/* pointer to list of addresses */
92e11c3f44Smeem 	uint_t	ri_count;	/* number of addresses in list */
93e11c3f44Smeem 	uint_t	ri_size;	/* size of ri_list in bytes */
94e11c3f44Smeem } rpcib_ipaddrs_t;
957c478bd9Sstevel@tonic-gate 
96214ae7d0SSiddheshwar Mahesh 
97214ae7d0SSiddheshwar Mahesh typedef struct rpcib_ping {
98214ae7d0SSiddheshwar Mahesh 	rib_hca_t  *hca;
99214ae7d0SSiddheshwar Mahesh 	ibt_path_info_t path;
100214ae7d0SSiddheshwar Mahesh 	ibt_ip_addr_t srcip;
101214ae7d0SSiddheshwar Mahesh 	ibt_ip_addr_t dstip;
102214ae7d0SSiddheshwar Mahesh } rpcib_ping_t;
103214ae7d0SSiddheshwar Mahesh 
1047c478bd9Sstevel@tonic-gate /*
1057c478bd9Sstevel@tonic-gate  * Prototype declarations for driver ops
1067c478bd9Sstevel@tonic-gate  */
1077c478bd9Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
1087c478bd9Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
1097c478bd9Sstevel@tonic-gate 				void *, void **);
1107c478bd9Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
111e11c3f44Smeem static boolean_t rpcib_rdma_capable_interface(struct lifreq *);
112e11c3f44Smeem static int	rpcib_do_ip_ioctl(int, int, void *);
113e11c3f44Smeem static boolean_t rpcib_get_ib_addresses(rpcib_ipaddrs_t *, rpcib_ipaddrs_t *);
1140a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *, int);
1150a701b1eSRobert Gordon static void rib_force_cleanup(void *);
1167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static void rib_stop_hca_services(rib_hca_t *);
1177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static void rib_attach_hca(void);
1187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static int rib_find_hca_connection(rib_hca_t *hca, struct netbuf *s_svcaddr,
1197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		struct netbuf *d_svcaddr, CONN **conn);
1207c478bd9Sstevel@tonic-gate 
1210a701b1eSRobert Gordon struct {
1220a701b1eSRobert Gordon 	kstat_named_t cache_limit;
1230a701b1eSRobert Gordon 	kstat_named_t cache_allocation;
1240a701b1eSRobert Gordon 	kstat_named_t cache_hits;
1250a701b1eSRobert Gordon 	kstat_named_t cache_misses;
1260a701b1eSRobert Gordon 	kstat_named_t cache_misses_above_the_limit;
1270a701b1eSRobert Gordon } rpcib_kstat = {
1280a701b1eSRobert Gordon 	{"cache_limit",			KSTAT_DATA_UINT64 },
1290a701b1eSRobert Gordon 	{"cache_allocation",		KSTAT_DATA_UINT64 },
1300a701b1eSRobert Gordon 	{"cache_hits",			KSTAT_DATA_UINT64 },
1310a701b1eSRobert Gordon 	{"cache_misses",		KSTAT_DATA_UINT64 },
1320a701b1eSRobert Gordon 	{"cache_misses_above_the_limit", KSTAT_DATA_UINT64 },
1330a701b1eSRobert Gordon };
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate /* rpcib cb_ops */
1367c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
1377c478bd9Sstevel@tonic-gate 	nulldev,		/* open */
1387c478bd9Sstevel@tonic-gate 	nulldev,		/* close */
1397c478bd9Sstevel@tonic-gate 	nodev,			/* strategy */
1407c478bd9Sstevel@tonic-gate 	nodev,			/* print */
1417c478bd9Sstevel@tonic-gate 	nodev,			/* dump */
1427c478bd9Sstevel@tonic-gate 	nodev,			/* read */
1437c478bd9Sstevel@tonic-gate 	nodev,			/* write */
1447c478bd9Sstevel@tonic-gate 	nodev,			/* ioctl */
1457c478bd9Sstevel@tonic-gate 	nodev,			/* devmap */
1467c478bd9Sstevel@tonic-gate 	nodev,			/* mmap */
1477c478bd9Sstevel@tonic-gate 	nodev,			/* segmap */
1487c478bd9Sstevel@tonic-gate 	nochpoll,		/* poll */
1497c478bd9Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
1507c478bd9Sstevel@tonic-gate 	NULL,			/* stream */
1517c478bd9Sstevel@tonic-gate 	D_MP,			/* cb_flag */
1527c478bd9Sstevel@tonic-gate 	CB_REV,			/* rev */
1537c478bd9Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
1547c478bd9Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
1557c478bd9Sstevel@tonic-gate };
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate /*
1587c478bd9Sstevel@tonic-gate  * Device options
1597c478bd9Sstevel@tonic-gate  */
1607c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
1617c478bd9Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
1627c478bd9Sstevel@tonic-gate 	0,			/* refcnt  */
1637c478bd9Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
1647c478bd9Sstevel@tonic-gate 	nulldev,		/* identify */
1657c478bd9Sstevel@tonic-gate 	nulldev,		/* probe */
1667c478bd9Sstevel@tonic-gate 	rpcib_attach,		/* attach */
1677c478bd9Sstevel@tonic-gate 	rpcib_detach,		/* detach */
1687c478bd9Sstevel@tonic-gate 	nodev,			/* reset */
1697c478bd9Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
1707c478bd9Sstevel@tonic-gate 	NULL,			/* bus operations */
17119397407SSherry Moore 	NULL,			/* power */
17219397407SSherry Moore 	ddi_quiesce_not_needed,		/* quiesce */
1737c478bd9Sstevel@tonic-gate };
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate /*
1767c478bd9Sstevel@tonic-gate  * Module linkage information.
1777c478bd9Sstevel@tonic-gate  */
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = {
1807c478bd9Sstevel@tonic-gate 	&mod_driverops,		/* Driver module */
18119397407SSherry Moore 	"RPCIB plugin driver",	/* Driver name and version */
1827c478bd9Sstevel@tonic-gate 	&rpcib_ops,		/* Driver ops */
1837c478bd9Sstevel@tonic-gate };
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
1867c478bd9Sstevel@tonic-gate 	MODREV_1,
1877c478bd9Sstevel@tonic-gate 	(void *)&rib_modldrv,
1887c478bd9Sstevel@tonic-gate 	NULL
1897c478bd9Sstevel@tonic-gate };
1907c478bd9Sstevel@tonic-gate 
1910a701b1eSRobert Gordon typedef struct rib_lrc_entry {
1920a701b1eSRobert Gordon 	struct rib_lrc_entry *forw;
1930a701b1eSRobert Gordon 	struct rib_lrc_entry *back;
1940a701b1eSRobert Gordon 	char *lrc_buf;
1950a701b1eSRobert Gordon 
1960a701b1eSRobert Gordon 	uint32_t lrc_len;
1970a701b1eSRobert Gordon 	void  *avl_node;
1980a701b1eSRobert Gordon 	bool_t registered;
1990a701b1eSRobert Gordon 
2000a701b1eSRobert Gordon 	struct mrc lrc_mhandle;
2010a701b1eSRobert Gordon 	bool_t lrc_on_freed_list;
2020a701b1eSRobert Gordon } rib_lrc_entry_t;
2030a701b1eSRobert Gordon 
2040a701b1eSRobert Gordon typedef	struct cache_struct	{
2050a701b1eSRobert Gordon 	rib_lrc_entry_t		r;
2060a701b1eSRobert Gordon 	uint32_t		len;
2070a701b1eSRobert Gordon 	uint32_t		elements;
2080a701b1eSRobert Gordon 	kmutex_t		node_lock;
2090a701b1eSRobert Gordon 	avl_node_t		avl_link;
2100a701b1eSRobert Gordon } cache_avl_struct_t;
2110a701b1eSRobert Gordon 
2120a701b1eSRobert Gordon uint64_t	cache_limit = 100 * 1024 * 1024;
2130a701b1eSRobert Gordon static uint64_t	cache_watermark = 80 * 1024 * 1024;
2140a701b1eSRobert Gordon static bool_t	stats_enabled = FALSE;
2150a701b1eSRobert Gordon 
2160a701b1eSRobert Gordon static uint64_t max_unsignaled_rws = 5;
217f837ee4aSSiddheshwar Mahesh int nfs_rdma_port = NFS_RDMA_PORT;
2180a701b1eSRobert Gordon 
2197523bef8SSiddheshwar Mahesh #define	RIBNETID_TCP	"tcp"
2207523bef8SSiddheshwar Mahesh #define	RIBNETID_TCP6	"tcp6"
2217523bef8SSiddheshwar Mahesh 
2227c478bd9Sstevel@tonic-gate /*
2237c478bd9Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
2247c478bd9Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
2257c478bd9Sstevel@tonic-gate  *	in all callbacks.
2267c478bd9Sstevel@tonic-gate  */
2277c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
2287c478bd9Sstevel@tonic-gate 
2290a701b1eSRobert Gordon #define	RNR_RETRIES	IBT_RNR_RETRY_1
2307c478bd9Sstevel@tonic-gate #define	MAX_PORTS	2
231065714dcSSiddheshwar Mahesh #define	RDMA_DUMMY_WRID	0x4D3A1D4D3A1D
232065714dcSSiddheshwar Mahesh #define	RDMA_CONN_REAP_RETRY	10	/* 10 secs */
2337c478bd9Sstevel@tonic-gate 
2340a701b1eSRobert Gordon int preposted_rbufs = RDMA_BUFS_GRANT;
2357c478bd9Sstevel@tonic-gate int send_threshold = 1;
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate /*
238065714dcSSiddheshwar Mahesh  * Old cards with Tavor driver have limited memory footprint
239065714dcSSiddheshwar Mahesh  * when booted in 32bit. The rib_max_rbufs tunable can be
240065714dcSSiddheshwar Mahesh  * tuned for more buffers if needed.
241065714dcSSiddheshwar Mahesh  */
242065714dcSSiddheshwar Mahesh 
243065714dcSSiddheshwar Mahesh #if !defined(_ELF64) && !defined(__sparc)
244065714dcSSiddheshwar Mahesh int rib_max_rbufs = MAX_BUFS;
245065714dcSSiddheshwar Mahesh #else
246065714dcSSiddheshwar Mahesh int rib_max_rbufs = 10 * MAX_BUFS;
247065714dcSSiddheshwar Mahesh #endif	/* !(_ELF64) && !(__sparc) */
248065714dcSSiddheshwar Mahesh 
249065714dcSSiddheshwar Mahesh int rib_conn_timeout = 60 * 12;		/* 12 minutes */
250065714dcSSiddheshwar Mahesh 
251065714dcSSiddheshwar Mahesh /*
2527c478bd9Sstevel@tonic-gate  * State of the plugin.
2537c478bd9Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
2547c478bd9Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
2557c478bd9Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
2567c478bd9Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
2577c478bd9Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
2587c478bd9Sstevel@tonic-gate  * state for one and in no_accept state for the other.
2597c478bd9Sstevel@tonic-gate  */
2607c478bd9Sstevel@tonic-gate int		plugin_state;
2617c478bd9Sstevel@tonic-gate kmutex_t	plugin_state_lock;
2627c478bd9Sstevel@tonic-gate 
2630a701b1eSRobert Gordon ldi_ident_t rpcib_li;
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate /*
2667c478bd9Sstevel@tonic-gate  * RPCIB RDMATF operations
2677c478bd9Sstevel@tonic-gate  */
2687c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
2697c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
2707c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
2717c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
2720a701b1eSRobert Gordon static rdma_stat rib_registermem(CONN *conn, caddr_t  adsp, caddr_t buf,
2730a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle);
2747c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
2757c478bd9Sstevel@tonic-gate 	struct mrc buf_handle);
2760a701b1eSRobert Gordon static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
2770a701b1eSRobert Gordon 		caddr_t buf, uint_t buflen, struct mrc *buf_handle);
2780a701b1eSRobert Gordon static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
2790a701b1eSRobert Gordon 		struct mrc buf_handle);
2800a701b1eSRobert Gordon static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf,
2810a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle,
2820a701b1eSRobert Gordon 	void *lrc);
2837c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
2840a701b1eSRobert Gordon 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
2857c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
2867c478bd9Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
2917c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
2967c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2977c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2980a701b1eSRobert Gordon static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid);
2997c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
3007c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
3017c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
3027c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
303214ae7d0SSiddheshwar Mahesh static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rpcib_ping_t *);
3047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static rdma_stat rib_conn_get(struct netbuf *, struct netbuf *,
3057f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	int addr_type, void *, CONN **);
3067c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
307ed629aefSSiddheshwar Mahesh static rdma_stat rib_connect(struct netbuf *, struct netbuf *, int,
308ed629aefSSiddheshwar Mahesh 	rpcib_ping_t *, CONN **);
3097c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
3100a701b1eSRobert Gordon 
3110a701b1eSRobert Gordon static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len);
3120a701b1eSRobert Gordon static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
3130a701b1eSRobert Gordon static void rib_destroy_cache(rib_hca_t *hca);
3140a701b1eSRobert Gordon static	void	rib_server_side_cache_reclaim(void *argp);
3150a701b1eSRobert Gordon static int avl_compare(const void *t1, const void *t2);
3160a701b1eSRobert Gordon 
3177c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
3180a701b1eSRobert Gordon static void rib_close_channels(rib_conn_list_t *);
319065714dcSSiddheshwar Mahesh static void rib_conn_close(void *);
3209c86cdcdSSiddheshwar Mahesh static void rib_recv_rele(rib_qp_t *);
3219c86cdcdSSiddheshwar Mahesh static rdma_stat rib_conn_release_locked(CONN *conn);
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate /*
3247c478bd9Sstevel@tonic-gate  * RPCIB addressing operations
3257c478bd9Sstevel@tonic-gate  */
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate /*
3287c478bd9Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
3297c478bd9Sstevel@tonic-gate  */
3307c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = {
3317c478bd9Sstevel@tonic-gate 	rib_reachable,
3327c478bd9Sstevel@tonic-gate 	rib_conn_get,
3337c478bd9Sstevel@tonic-gate 	rib_conn_release,
3347c478bd9Sstevel@tonic-gate 	rib_listen,
3357c478bd9Sstevel@tonic-gate 	rib_listen_stop,
3367c478bd9Sstevel@tonic-gate 	rib_registermem,
3377c478bd9Sstevel@tonic-gate 	rib_deregistermem,
3387c478bd9Sstevel@tonic-gate 	rib_registermemsync,
3397c478bd9Sstevel@tonic-gate 	rib_deregistermemsync,
3407c478bd9Sstevel@tonic-gate 	rib_syncmem,
3417c478bd9Sstevel@tonic-gate 	rib_reg_buf_alloc,
3427c478bd9Sstevel@tonic-gate 	rib_reg_buf_free,
3437c478bd9Sstevel@tonic-gate 	rib_send,
3447c478bd9Sstevel@tonic-gate 	rib_send_resp,
3457c478bd9Sstevel@tonic-gate 	rib_post_resp,
3460a701b1eSRobert Gordon 	rib_post_resp_remove,
3477c478bd9Sstevel@tonic-gate 	rib_post_recv,
3487c478bd9Sstevel@tonic-gate 	rib_recv,
3497c478bd9Sstevel@tonic-gate 	rib_read,
3507c478bd9Sstevel@tonic-gate 	rib_write,
3510a701b1eSRobert Gordon 	rib_getinfo,
3527c478bd9Sstevel@tonic-gate };
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate /*
3557c478bd9Sstevel@tonic-gate  * RDMATF RPCIB plugin details
3567c478bd9Sstevel@tonic-gate  */
3577c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = {
3587c478bd9Sstevel@tonic-gate 	"ibtf",		/* api name */
3597c478bd9Sstevel@tonic-gate 	RDMATF_VERS_1,
3607c478bd9Sstevel@tonic-gate 	0,
3617c478bd9Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
3627c478bd9Sstevel@tonic-gate };
3637c478bd9Sstevel@tonic-gate 
3647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static rdma_stat rpcib_open_hcas(rpcib_state_t *);
3657c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
3667c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
3677c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
3687c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
3697c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
3707c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
3710a701b1eSRobert Gordon static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t,
3720a701b1eSRobert Gordon 	ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *);
3730a701b1eSRobert Gordon static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
3740a701b1eSRobert Gordon 	ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
375214ae7d0SSiddheshwar Mahesh static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, rpcib_ping_t *);
3767c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
3777c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3787c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
3797c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3807c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
3817c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
3827c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
3837c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
3847c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
3857c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
3867c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate static void rib_async_handler(void *,
3897c478bd9Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
3907c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
3917c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
3927c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
3937c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
3947c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
3957c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
3969c86cdcdSSiddheshwar Mahesh static void rib_detach_hca(ibt_hca_hdl_t);
397065714dcSSiddheshwar Mahesh static void rib_close_a_channel(CONN *);
398065714dcSSiddheshwar Mahesh static void rib_send_hold(rib_qp_t *);
399065714dcSSiddheshwar Mahesh static void rib_send_rele(rib_qp_t *);
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate /*
4027c478bd9Sstevel@tonic-gate  * Registration with IBTF as a consumer
4037c478bd9Sstevel@tonic-gate  */
4047c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
40503494a98SBill Taylor 	IBTI_V_CURR,
4067c478bd9Sstevel@tonic-gate 	IBT_GENERIC,
4077c478bd9Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
4087c478bd9Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
4097c478bd9Sstevel@tonic-gate 	"nfs/ib"
4107c478bd9Sstevel@tonic-gate };
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate /*
4137c478bd9Sstevel@tonic-gate  * Global strucuture
4147c478bd9Sstevel@tonic-gate  */
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate typedef struct rpcib_s {
4177c478bd9Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
4187c478bd9Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
4197c478bd9Sstevel@tonic-gate } rpcib_t;
4207c478bd9Sstevel@tonic-gate 
4217c478bd9Sstevel@tonic-gate rpcib_t rpcib;
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate /*
4247c478bd9Sstevel@tonic-gate  * /etc/system controlled variable to control
4257c478bd9Sstevel@tonic-gate  * debugging in rpcib kernel module.
4267c478bd9Sstevel@tonic-gate  * Set it to values greater that 1 to control
4277c478bd9Sstevel@tonic-gate  * the amount of debugging messages required.
4287c478bd9Sstevel@tonic-gate  */
4297c478bd9Sstevel@tonic-gate int rib_debug = 0;
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate int
4327c478bd9Sstevel@tonic-gate _init(void)
4337c478bd9Sstevel@tonic-gate {
4347c478bd9Sstevel@tonic-gate 	int error;
4357c478bd9Sstevel@tonic-gate 
4367c478bd9Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
4377c478bd9Sstevel@tonic-gate 	if (error != 0) {
4387c478bd9Sstevel@tonic-gate 		/*
4397c478bd9Sstevel@tonic-gate 		 * Could not load module
4407c478bd9Sstevel@tonic-gate 		 */
4417c478bd9Sstevel@tonic-gate 		return (error);
4427c478bd9Sstevel@tonic-gate 	}
4437c478bd9Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
4447c478bd9Sstevel@tonic-gate 	return (0);
4457c478bd9Sstevel@tonic-gate }
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate int
4487c478bd9Sstevel@tonic-gate _fini()
4497c478bd9Sstevel@tonic-gate {
4507c478bd9Sstevel@tonic-gate 	int status;
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 	/*
4537c478bd9Sstevel@tonic-gate 	 * Remove module
4547c478bd9Sstevel@tonic-gate 	 */
4557c478bd9Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
4567c478bd9Sstevel@tonic-gate 		return (status);
4577c478bd9Sstevel@tonic-gate 	}
4587c478bd9Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
4597c478bd9Sstevel@tonic-gate 	return (0);
4607c478bd9Sstevel@tonic-gate }
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate int
4637c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
4647c478bd9Sstevel@tonic-gate {
4657c478bd9Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
4667c478bd9Sstevel@tonic-gate }
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate /*
4697c478bd9Sstevel@tonic-gate  * rpcib_getinfo()
4707c478bd9Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
4717c478bd9Sstevel@tonic-gate  * instance number.
4727c478bd9Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
4737c478bd9Sstevel@tonic-gate  */
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4767c478bd9Sstevel@tonic-gate static int
4777c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
4787c478bd9Sstevel@tonic-gate {
4797c478bd9Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 	switch (cmd) {
4827c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
4837c478bd9Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
4847c478bd9Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
4857c478bd9Sstevel@tonic-gate 		else {
4867c478bd9Sstevel@tonic-gate 			*result = NULL;
4877c478bd9Sstevel@tonic-gate 			ret = DDI_FAILURE;
4887c478bd9Sstevel@tonic-gate 		}
4897c478bd9Sstevel@tonic-gate 		break;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
4927c478bd9Sstevel@tonic-gate 		*result = NULL;
4937c478bd9Sstevel@tonic-gate 		break;
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	default:
4967c478bd9Sstevel@tonic-gate 		ret = DDI_FAILURE;
4977c478bd9Sstevel@tonic-gate 	}
4987c478bd9Sstevel@tonic-gate 	return (ret);
4997c478bd9Sstevel@tonic-gate }
5007c478bd9Sstevel@tonic-gate 
5017f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static void
5027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rpcib_free_hca_list()
5037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
5047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca, *hcap;
5057f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
5077f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	hca = rib_stat->hcas_list;
5087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_stat->hcas_list = NULL;
5097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
5107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	while (hca != NULL) {
5117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_enter(&hca->state_lock, RW_WRITER);
5127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hcap = hca;
5137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca = hca->next;
5147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_stat->nhca_inited--;
5157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_mod.rdma_count--;
5167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hcap->state = HCA_DETACHED;
5177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_exit(&hcap->state_lock);
5187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_stop_hca_services(hcap);
5197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		kmem_free(hcap, sizeof (*hcap));
5217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
5227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
5237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static rdma_stat
5257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rpcib_free_service_list()
5267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
5277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_service_t *service;
5287f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	ibt_status_t ret;
5297f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5307f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->service_list_lock, RW_WRITER);
5317f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	while (rib_stat->service_list != NULL) {
5327f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service = rib_stat->service_list;
5337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ret = ibt_unbind_all_services(service->srv_hdl);
5347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (ret != IBT_SUCCESS) {
5357f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&rib_stat->service_list_lock);
5367f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #ifdef DEBUG
5377f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			cmn_err(CE_NOTE, "rpcib_free_service_list: "
5387f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    "ibt_unbind_all_services failed (%d)\n", (int)ret);
5397f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #endif
5407f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			return (RDMA_FAILED);
5417f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
5427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ret = ibt_deregister_service(rib_stat->ibt_clnt_hdl,
5437f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    service->srv_hdl);
5447f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (ret != IBT_SUCCESS) {
5457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&rib_stat->service_list_lock);
5467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #ifdef DEBUG
5477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			cmn_err(CE_NOTE, "rpcib_free_service_list: "
5487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    "ibt_deregister_service failed (%d)\n", (int)ret);
5497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #endif
5507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			return (RDMA_FAILED);
5517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
5527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_stat->service_list = service->next;
5537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		kmem_free(service, sizeof (rib_service_t));
5547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
5557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->service_list_lock);
5567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	return (RDMA_SUCCESS);
5587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
5597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
5607c478bd9Sstevel@tonic-gate static int
5617c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5627c478bd9Sstevel@tonic-gate {
5637c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
5647c478bd9Sstevel@tonic-gate 	rdma_stat	r_status;
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 	switch (cmd) {
5677c478bd9Sstevel@tonic-gate 	case DDI_ATTACH:
5687c478bd9Sstevel@tonic-gate 		break;
5697c478bd9Sstevel@tonic-gate 	case DDI_RESUME:
5707c478bd9Sstevel@tonic-gate 		return (DDI_SUCCESS);
5717c478bd9Sstevel@tonic-gate 	default:
5727c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5737c478bd9Sstevel@tonic-gate 	}
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
5787c478bd9Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
5797c478bd9Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
5807c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5817c478bd9Sstevel@tonic-gate 	}
5827c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
5837c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
5847c478bd9Sstevel@tonic-gate 	/*
5857c478bd9Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
5867c478bd9Sstevel@tonic-gate 	 */
5877c478bd9Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
5887c478bd9Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
5897c478bd9Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
5907c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5917c478bd9Sstevel@tonic-gate 	}
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	if (rib_stat == NULL) {
5947c478bd9Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
5957c478bd9Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
5967f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_init(&rib_stat->hcas_list_lock, NULL, RW_DRIVER, NULL);
5977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_init(&rib_stat->listen_lock, NULL, MUTEX_DRIVER, NULL);
5987c478bd9Sstevel@tonic-gate 	}
5997c478bd9Sstevel@tonic-gate 
6007f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_stat->hca_count = ibt_get_hca_list(NULL);
6017c478bd9Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
6027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_destroy(&rib_stat->listen_lock);
6037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_destroy(&rib_stat->hcas_list_lock);
6047c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
6057c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
6067c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
6077c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
6087c478bd9Sstevel@tonic-gate 	}
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
6117c478bd9Sstevel@tonic-gate 	    (void *)rib_stat, &rib_stat->ibt_clnt_hdl);
6120a701b1eSRobert Gordon 
6137c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
6147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_destroy(&rib_stat->listen_lock);
6157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_destroy(&rib_stat->hcas_list_lock);
6167c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
6177c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
6187c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
6197c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
6207c478bd9Sstevel@tonic-gate 	}
6217c478bd9Sstevel@tonic-gate 
6227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_stat->service_list = NULL;
6237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_init(&rib_stat->service_list_lock, NULL, RW_DRIVER, NULL);
6247c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
6257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (rpcib_open_hcas(rib_stat) != RDMA_SUCCESS) {
6267c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
62751f34d4bSRajkumar Sivaprakasam 		goto open_fail;
6287c478bd9Sstevel@tonic-gate 	}
6297c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
6307c478bd9Sstevel@tonic-gate 
63151f34d4bSRajkumar Sivaprakasam 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) !=
63251f34d4bSRajkumar Sivaprakasam 	    DDI_PROP_SUCCESS) {
63351f34d4bSRajkumar Sivaprakasam 		cmn_err(CE_WARN, "rpcib_attach: ddi-no-autodetach prop update "
63451f34d4bSRajkumar Sivaprakasam 		    "failed.");
63551f34d4bSRajkumar Sivaprakasam 		goto register_fail;
63651f34d4bSRajkumar Sivaprakasam 	}
63751f34d4bSRajkumar Sivaprakasam 
6387c478bd9Sstevel@tonic-gate 	/*
6397c478bd9Sstevel@tonic-gate 	 * Register with rdmatf
6407c478bd9Sstevel@tonic-gate 	 */
6417c478bd9Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
6427c478bd9Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
64351f34d4bSRajkumar Sivaprakasam 		cmn_err(CE_WARN, "rpcib_attach:rdma_register_mod failed, "
64451f34d4bSRajkumar Sivaprakasam 		    "status = %d", r_status);
64551f34d4bSRajkumar Sivaprakasam 		goto register_fail;
64651f34d4bSRajkumar Sivaprakasam 	}
64751f34d4bSRajkumar Sivaprakasam 
64851f34d4bSRajkumar Sivaprakasam 	return (DDI_SUCCESS);
64951f34d4bSRajkumar Sivaprakasam 
65051f34d4bSRajkumar Sivaprakasam register_fail:
6517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
65251f34d4bSRajkumar Sivaprakasam open_fail:
6537c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
6547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rpcib_free_hca_list();
6557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	(void) rpcib_free_service_list();
6567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_destroy(&rib_stat->listen_lock);
6577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_destroy(&rib_stat->hcas_list_lock);
6587c478bd9Sstevel@tonic-gate 	mutex_destroy(&rib_stat->open_hca_lock);
6597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_destroy(&rib_stat->service_list_lock);
6607c478bd9Sstevel@tonic-gate 	kmem_free(rib_stat, sizeof (*rib_stat));
6617c478bd9Sstevel@tonic-gate 	rib_stat = NULL;
6627c478bd9Sstevel@tonic-gate 	return (DDI_FAILURE);
6637c478bd9Sstevel@tonic-gate }
6647c478bd9Sstevel@tonic-gate 
6657c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6667c478bd9Sstevel@tonic-gate static int
6677c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6687c478bd9Sstevel@tonic-gate {
6697c478bd9Sstevel@tonic-gate 	switch (cmd) {
6707c478bd9Sstevel@tonic-gate 
6717c478bd9Sstevel@tonic-gate 	case DDI_DETACH:
6727c478bd9Sstevel@tonic-gate 		break;
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 	case DDI_SUSPEND:
6757c478bd9Sstevel@tonic-gate 	default:
6767c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
6777c478bd9Sstevel@tonic-gate 	}
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate 	/*
6807c478bd9Sstevel@tonic-gate 	 * Detach the hca and free resources
6817c478bd9Sstevel@tonic-gate 	 */
6827c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
6837c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
6847c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
6857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
6867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (rpcib_free_service_list() != RDMA_SUCCESS)
6877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		return (DDI_FAILURE);
6887f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rpcib_free_hca_list();
6897f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
6907c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
6917f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_destroy(&rib_stat->listen_lock);
6927f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_destroy(&rib_stat->hcas_list_lock);
69351f34d4bSRajkumar Sivaprakasam 	mutex_destroy(&rib_stat->open_hca_lock);
6947f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_destroy(&rib_stat->service_list_lock);
6957f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
69651f34d4bSRajkumar Sivaprakasam 	kmem_free(rib_stat, sizeof (*rib_stat));
69751f34d4bSRajkumar Sivaprakasam 	rib_stat = NULL;
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
7007c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
7017c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
7027c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
7037c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
7047c478bd9Sstevel@tonic-gate }
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 
7077c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
7087c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
7097c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
7107c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
7117c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
7127c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
7137c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
7147c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
7157c478bd9Sstevel@tonic-gate 
7160a701b1eSRobert Gordon 
7177c478bd9Sstevel@tonic-gate /*
7187c478bd9Sstevel@tonic-gate  * One CQ pair per HCA
7197c478bd9Sstevel@tonic-gate  */
7207c478bd9Sstevel@tonic-gate static rdma_stat
7217c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
7227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_cq_t **cqp)
7237c478bd9Sstevel@tonic-gate {
7247c478bd9Sstevel@tonic-gate 	rib_cq_t	*cq;
7257c478bd9Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
7267c478bd9Sstevel@tonic-gate 	uint32_t	real_size;
7277c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
7287c478bd9Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
7317c478bd9Sstevel@tonic-gate 	cq->rib_hca = hca;
7327c478bd9Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
7337c478bd9Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
7347c478bd9Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
7357c478bd9Sstevel@tonic-gate 	    &real_size);
7367c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
7377c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
7387c478bd9Sstevel@tonic-gate 		    " status=%d", status);
7397c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
7407c478bd9Sstevel@tonic-gate 		goto fail;
7417c478bd9Sstevel@tonic-gate 	}
7427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, hca);
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	/*
7457c478bd9Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
7467c478bd9Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
7477c478bd9Sstevel@tonic-gate 	 * after each callback to get another one).
7487c478bd9Sstevel@tonic-gate 	 */
7497c478bd9Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
7507c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
7517c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
7527c478bd9Sstevel@tonic-gate 		    "enable_cq_notify failed, status %d", status);
7537c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
7547c478bd9Sstevel@tonic-gate 		goto fail;
7557c478bd9Sstevel@tonic-gate 	}
7567c478bd9Sstevel@tonic-gate 	*cqp = cq;
7577c478bd9Sstevel@tonic-gate 
7587c478bd9Sstevel@tonic-gate 	return (error);
7597c478bd9Sstevel@tonic-gate fail:
7607c478bd9Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
7617c478bd9Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
7627c478bd9Sstevel@tonic-gate 	if (cq)
7637c478bd9Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
7647c478bd9Sstevel@tonic-gate 	return (error);
7657c478bd9Sstevel@tonic-gate }
7667c478bd9Sstevel@tonic-gate 
7677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States /*
7687f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * rpcib_find_hca
7697f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  *
7707f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * Caller should have already locked the hcas_lock before calling
7717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * this function.
7727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  */
7737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static rib_hca_t *
7747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rpcib_find_hca(rpcib_state_t *ribstat, ib_guid_t guid)
7757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
7767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca = ribstat->hcas_list;
7777f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
7787f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	while (hca && hca->hca_guid != guid)
7797f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca = hca->next;
7807f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
7817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	return (hca);
7827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
7837f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
7847c478bd9Sstevel@tonic-gate static rdma_stat
7857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rpcib_open_hcas(rpcib_state_t *ribstat)
7867c478bd9Sstevel@tonic-gate {
7877c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
7887c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
7897c478bd9Sstevel@tonic-gate 	rdma_stat		status;
7907c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
7917c478bd9Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
7927c478bd9Sstevel@tonic-gate 	uint_t			size, cq_size;
7937c478bd9Sstevel@tonic-gate 	int			i;
7940a701b1eSRobert Gordon 	kstat_t *ksp;
7950a701b1eSRobert Gordon 	cache_avl_struct_t example_avl_node;
7960a701b1eSRobert Gordon 	char rssc_name[32];
7977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	int old_nhca_inited = ribstat->nhca_inited;
7987f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	ib_guid_t		*hca_guids;
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
8010a701b1eSRobert Gordon 
8027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	ribstat->hca_count = ibt_get_hca_list(&hca_guids);
8037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (ribstat->hca_count == 0)
8047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		return (RDMA_FAILED);
8057c478bd9Sstevel@tonic-gate 
8067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&ribstat->hcas_list_lock, RW_WRITER);
8077c478bd9Sstevel@tonic-gate 	/*
8087c478bd9Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
8097c478bd9Sstevel@tonic-gate 	 */
8107c478bd9Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
8117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (rpcib_find_hca(ribstat, hca_guids[i]))
8127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			continue;
8137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca = kmem_zalloc(sizeof (rib_hca_t), KM_SLEEP);
8147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
8157c478bd9Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
8167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    hca_guids[i], &hca->hca_hdl);
8177c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			kmem_free(hca, sizeof (rib_hca_t));
8197c478bd9Sstevel@tonic-gate 			continue;
8207c478bd9Sstevel@tonic-gate 		}
8217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->hca_guid = hca_guids[i];
8227c478bd9Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
8237c478bd9Sstevel@tonic-gate 		hca->state = HCA_INITED;
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 		/*
8267c478bd9Sstevel@tonic-gate 		 * query HCA info
8277c478bd9Sstevel@tonic-gate 		 */
8287c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
8297c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8307c478bd9Sstevel@tonic-gate 			goto fail1;
8317c478bd9Sstevel@tonic-gate 		}
8327c478bd9Sstevel@tonic-gate 
8337c478bd9Sstevel@tonic-gate 		/*
8347c478bd9Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
8357c478bd9Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
8367c478bd9Sstevel@tonic-gate 		 * only when it's in the same PD as that of
8377c478bd9Sstevel@tonic-gate 		 * the memory region.
8387c478bd9Sstevel@tonic-gate 		 */
8397c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
8407c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8417c478bd9Sstevel@tonic-gate 			goto fail1;
8427c478bd9Sstevel@tonic-gate 		}
8437c478bd9Sstevel@tonic-gate 
8447c478bd9Sstevel@tonic-gate 		/*
8457c478bd9Sstevel@tonic-gate 		 * query HCA ports
8467c478bd9Sstevel@tonic-gate 		 */
8477c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
8487c478bd9Sstevel@tonic-gate 		    0, &pinfop, &hca->hca_nports, &size);
8497c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8507c478bd9Sstevel@tonic-gate 			goto fail2;
8517c478bd9Sstevel@tonic-gate 		}
8527c478bd9Sstevel@tonic-gate 		hca->hca_ports = pinfop;
8537c478bd9Sstevel@tonic-gate 		hca->hca_pinfosz = size;
8547c478bd9Sstevel@tonic-gate 		pinfop = NULL;
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
8577c478bd9Sstevel@tonic-gate 		/*
8587c478bd9Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
8597c478bd9Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
8607c478bd9Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
8617c478bd9Sstevel@tonic-gate 		 * cq's will be needed.
8627c478bd9Sstevel@tonic-gate 		 */
8637c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
8647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    &hca->svc_rcq);
8657c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8667c478bd9Sstevel@tonic-gate 			goto fail3;
8677c478bd9Sstevel@tonic-gate 		}
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
8707f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    &hca->svc_scq);
8717c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8727c478bd9Sstevel@tonic-gate 			goto fail3;
8737c478bd9Sstevel@tonic-gate 		}
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
8767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    &hca->clnt_rcq);
8777c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8787c478bd9Sstevel@tonic-gate 			goto fail3;
8797c478bd9Sstevel@tonic-gate 		}
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
8827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    &hca->clnt_scq);
8837c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8847c478bd9Sstevel@tonic-gate 			goto fail3;
8857c478bd9Sstevel@tonic-gate 		}
8867c478bd9Sstevel@tonic-gate 
8877c478bd9Sstevel@tonic-gate 		/*
8887c478bd9Sstevel@tonic-gate 		 * Create buffer pools.
8897c478bd9Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
8907c478bd9Sstevel@tonic-gate 		 */
8917c478bd9Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
892065714dcSSiddheshwar Mahesh 		    RECV_BUFFER, rib_max_rbufs);
8937c478bd9Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
8947c478bd9Sstevel@tonic-gate 			goto fail3;
8957c478bd9Sstevel@tonic-gate 		}
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
898065714dcSSiddheshwar Mahesh 		    SEND_BUFFER, rib_max_rbufs);
8997c478bd9Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
9007c478bd9Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
9017c478bd9Sstevel@tonic-gate 			goto fail3;
9027c478bd9Sstevel@tonic-gate 		}
9037c478bd9Sstevel@tonic-gate 
9040a701b1eSRobert Gordon 		if (hca->server_side_cache == NULL) {
9050a701b1eSRobert Gordon 			(void) sprintf(rssc_name,
9067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    "rib_srvr_cache_%llx",
9077f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    (long long unsigned int) hca->hca_guid);
9080a701b1eSRobert Gordon 			hca->server_side_cache = kmem_cache_create(
9090a701b1eSRobert Gordon 			    rssc_name,
9100a701b1eSRobert Gordon 			    sizeof (cache_avl_struct_t), 0,
9110a701b1eSRobert Gordon 			    NULL,
9120a701b1eSRobert Gordon 			    NULL,
9130a701b1eSRobert Gordon 			    rib_server_side_cache_reclaim,
9140a701b1eSRobert Gordon 			    hca, NULL, 0);
9150a701b1eSRobert Gordon 		}
9160a701b1eSRobert Gordon 
9170a701b1eSRobert Gordon 		avl_create(&hca->avl_tree,
9180a701b1eSRobert Gordon 		    avl_compare,
9190a701b1eSRobert Gordon 		    sizeof (cache_avl_struct_t),
9200a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node.avl_link-
9210a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node);
9220a701b1eSRobert Gordon 
9237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_init(&hca->bound_services_lock, NULL, RW_DRIVER,
9247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    hca->iblock);
9257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
9260a701b1eSRobert Gordon 		rw_init(&hca->avl_rw_lock,
9270a701b1eSRobert Gordon 		    NULL, RW_DRIVER, hca->iblock);
9287f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_init(&hca->cache_allocation_lock,
9290a701b1eSRobert Gordon 		    NULL, MUTEX_DRIVER, NULL);
9300a701b1eSRobert Gordon 		hca->avl_init = TRUE;
9310a701b1eSRobert Gordon 
9320a701b1eSRobert Gordon 		/* Create kstats for the cache */
9330a701b1eSRobert Gordon 		ASSERT(INGLOBALZONE(curproc));
9340a701b1eSRobert Gordon 
9350a701b1eSRobert Gordon 		if (!stats_enabled) {
9360a701b1eSRobert Gordon 			ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc",
9370a701b1eSRobert Gordon 			    KSTAT_TYPE_NAMED,
9380a701b1eSRobert Gordon 			    sizeof (rpcib_kstat) / sizeof (kstat_named_t),
9390a701b1eSRobert Gordon 			    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
9400a701b1eSRobert Gordon 			    GLOBAL_ZONEID);
9410a701b1eSRobert Gordon 			if (ksp) {
9420a701b1eSRobert Gordon 				ksp->ks_data = (void *) &rpcib_kstat;
9430a701b1eSRobert Gordon 				ksp->ks_update = rpcib_cache_kstat_update;
9440a701b1eSRobert Gordon 				kstat_install(ksp);
9450a701b1eSRobert Gordon 				stats_enabled = TRUE;
9460a701b1eSRobert Gordon 			}
9470a701b1eSRobert Gordon 		}
948065714dcSSiddheshwar Mahesh 		if (hca->cleanup_helper == NULL) {
9497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			char tq_name[sizeof (hca->hca_guid) * 2 + 1];
9500a701b1eSRobert Gordon 
9517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			(void) snprintf(tq_name, sizeof (tq_name), "%llX",
9527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    (unsigned long long int) hca->hca_guid);
9537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			hca->cleanup_helper = ddi_taskq_create(NULL,
9547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    tq_name, 1, TASKQ_DEFAULTPRI, 0);
9557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
9567c478bd9Sstevel@tonic-gate 
9577c478bd9Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
9587c478bd9Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
9597c478bd9Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
9607c478bd9Sstevel@tonic-gate 		    hca->iblock);
9617c478bd9Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
9627c478bd9Sstevel@tonic-gate 		    hca->iblock);
9637c478bd9Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
9647c478bd9Sstevel@tonic-gate 		hca->inuse = TRUE;
9657f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
9667f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->next = ribstat->hcas_list;
9677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ribstat->hcas_list = hca;
9687c478bd9Sstevel@tonic-gate 		ribstat->nhca_inited++;
9697c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
9707f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		continue;
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate fail3:
9737c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
9747c478bd9Sstevel@tonic-gate fail2:
9757c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
9767c478bd9Sstevel@tonic-gate fail1:
9777c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
9787f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		kmem_free(hca, sizeof (rib_hca_t));
9797c478bd9Sstevel@tonic-gate 	}
9807f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&ribstat->hcas_list_lock);
9817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	ibt_free_hca_list(hca_guids, ribstat->hca_count);
9827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_mod.rdma_count = rib_stat->nhca_inited;
9837f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
9847f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
9857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * return success if at least one new hca has been configured.
9867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 */
9877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (ribstat->nhca_inited != old_nhca_inited)
9887c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
9897c478bd9Sstevel@tonic-gate 	else
9907c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
9917c478bd9Sstevel@tonic-gate }
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate /*
9947c478bd9Sstevel@tonic-gate  * Callback routines
9957c478bd9Sstevel@tonic-gate  */
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate /*
9987c478bd9Sstevel@tonic-gate  * SCQ handlers
9997c478bd9Sstevel@tonic-gate  */
10007c478bd9Sstevel@tonic-gate /* ARGSUSED */
10017c478bd9Sstevel@tonic-gate static void
10027c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10037c478bd9Sstevel@tonic-gate {
10047c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10057c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
1006065714dcSSiddheshwar Mahesh 	struct send_wid	*wd;
1007065714dcSSiddheshwar Mahesh 	CONN		*conn;
1008065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
10097c478bd9Sstevel@tonic-gate 	int		i;
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate 	/*
10127c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10137c478bd9Sstevel@tonic-gate 	 * completion queue notification.
10147c478bd9Sstevel@tonic-gate 	 */
10157c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
10187c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
10197c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
10207c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
10217c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
10227c478bd9Sstevel@tonic-gate 			return;
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 		/*
10257c478bd9Sstevel@tonic-gate 		 * Got a send completion
10267c478bd9Sstevel@tonic-gate 		 */
1027065714dcSSiddheshwar Mahesh 		if (wc.wc_id != RDMA_DUMMY_WRID) {
1028065714dcSSiddheshwar Mahesh 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
1029065714dcSSiddheshwar Mahesh 			qp = wd->qp;
1030065714dcSSiddheshwar Mahesh 			conn = qptoc(qp);
10317c478bd9Sstevel@tonic-gate 
10327c478bd9Sstevel@tonic-gate 			mutex_enter(&wd->sendwait_lock);
10337c478bd9Sstevel@tonic-gate 			switch (wc.wc_status) {
10347c478bd9Sstevel@tonic-gate 			case IBT_WC_SUCCESS:
10357c478bd9Sstevel@tonic-gate 				wd->status = RDMA_SUCCESS;
10367c478bd9Sstevel@tonic-gate 				break;
10377c478bd9Sstevel@tonic-gate 			default:
10387c478bd9Sstevel@tonic-gate /*
10397c478bd9Sstevel@tonic-gate  *    RC Send Q Error Code		Local state     Remote State
10407c478bd9Sstevel@tonic-gate  *    ==================== 		===========     ============
10417c478bd9Sstevel@tonic-gate  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
10427c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
10437c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
10447c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
10457c478bd9Sstevel@tonic-gate  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
10467c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
10477c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
10487c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
10497c478bd9Sstevel@tonic-gate  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
10507c478bd9Sstevel@tonic-gate  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
1051065714dcSSiddheshwar Mahesh  *    IBT_WC_WR_FLUSHED_ERR               ERROR           None
10527c478bd9Sstevel@tonic-gate  */
10537c478bd9Sstevel@tonic-gate 				/*
10547c478bd9Sstevel@tonic-gate 				 * Channel in error state. Set connection to
10557c478bd9Sstevel@tonic-gate 				 * ERROR and cleanup will happen either from
10567c478bd9Sstevel@tonic-gate 				 * conn_release  or from rib_conn_get
10577c478bd9Sstevel@tonic-gate 				 */
10587c478bd9Sstevel@tonic-gate 				wd->status = RDMA_FAILED;
10597c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
10607c478bd9Sstevel@tonic-gate 				if (conn->c_state != C_DISCONN_PEND)
10610a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
10627c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
10637c478bd9Sstevel@tonic-gate 				break;
10647c478bd9Sstevel@tonic-gate 			}
10650a701b1eSRobert Gordon 
10667c478bd9Sstevel@tonic-gate 			if (wd->cv_sig == 1) {
10677c478bd9Sstevel@tonic-gate 				/*
10687c478bd9Sstevel@tonic-gate 				 * Notify poster
10697c478bd9Sstevel@tonic-gate 				 */
10707c478bd9Sstevel@tonic-gate 				cv_signal(&wd->wait_cv);
10717c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10727c478bd9Sstevel@tonic-gate 			} else {
10737c478bd9Sstevel@tonic-gate 				/*
10747c478bd9Sstevel@tonic-gate 				 * Poster not waiting for notification.
10757c478bd9Sstevel@tonic-gate 				 * Free the send buffers and send_wid
10767c478bd9Sstevel@tonic-gate 				 */
10777c478bd9Sstevel@tonic-gate 				for (i = 0; i < wd->nsbufs; i++) {
1078065714dcSSiddheshwar Mahesh 					rib_rbuf_free(qptoc(wd->qp),
1079065714dcSSiddheshwar Mahesh 					    SEND_BUFFER,
108011606941Sjwahlig 					    (void *)(uintptr_t)wd->sbufaddr[i]);
10817c478bd9Sstevel@tonic-gate 				}
1082065714dcSSiddheshwar Mahesh 
1083065714dcSSiddheshwar Mahesh 				/* decrement the send ref count */
1084065714dcSSiddheshwar Mahesh 				rib_send_rele(qp);
1085065714dcSSiddheshwar Mahesh 
10867c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10877c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wd);
10887c478bd9Sstevel@tonic-gate 			}
10897c478bd9Sstevel@tonic-gate 		}
10907c478bd9Sstevel@tonic-gate 	}
10917c478bd9Sstevel@tonic-gate }
10927c478bd9Sstevel@tonic-gate 
10937c478bd9Sstevel@tonic-gate /* ARGSUSED */
10947c478bd9Sstevel@tonic-gate static void
10957c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10967c478bd9Sstevel@tonic-gate {
10977c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10987c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
1099065714dcSSiddheshwar Mahesh 	struct send_wid	*wd;
1100065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
1101065714dcSSiddheshwar Mahesh 	CONN		*conn;
11027c478bd9Sstevel@tonic-gate 	int		i;
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 	/*
11057c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
11067c478bd9Sstevel@tonic-gate 	 * completion queue notification.
11077c478bd9Sstevel@tonic-gate 	 */
11087c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
11117c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
11127c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
11137c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11147c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
11157c478bd9Sstevel@tonic-gate 			return;
11167c478bd9Sstevel@tonic-gate 
11177c478bd9Sstevel@tonic-gate 		/*
11187c478bd9Sstevel@tonic-gate 		 * Got a send completion
11197c478bd9Sstevel@tonic-gate 		 */
1120065714dcSSiddheshwar Mahesh 		if (wc.wc_id != RDMA_DUMMY_WRID) {
1121065714dcSSiddheshwar Mahesh 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
1122065714dcSSiddheshwar Mahesh 			qp = wd->qp;
1123065714dcSSiddheshwar Mahesh 			conn = qptoc(qp);
11247c478bd9Sstevel@tonic-gate 			mutex_enter(&wd->sendwait_lock);
1125065714dcSSiddheshwar Mahesh 
1126065714dcSSiddheshwar Mahesh 			switch (wc.wc_status) {
1127065714dcSSiddheshwar Mahesh 			case IBT_WC_SUCCESS:
1128065714dcSSiddheshwar Mahesh 				wd->status = RDMA_SUCCESS;
1129065714dcSSiddheshwar Mahesh 				break;
1130065714dcSSiddheshwar Mahesh 			default:
1131065714dcSSiddheshwar Mahesh 				/*
1132065714dcSSiddheshwar Mahesh 				 * Channel in error state. Set connection to
1133065714dcSSiddheshwar Mahesh 				 * ERROR and cleanup will happen either from
1134065714dcSSiddheshwar Mahesh 				 * conn_release  or conn timeout.
1135065714dcSSiddheshwar Mahesh 				 */
1136065714dcSSiddheshwar Mahesh 				wd->status = RDMA_FAILED;
1137065714dcSSiddheshwar Mahesh 				mutex_enter(&conn->c_lock);
1138065714dcSSiddheshwar Mahesh 				if (conn->c_state != C_DISCONN_PEND)
1139065714dcSSiddheshwar Mahesh 					conn->c_state = C_ERROR_CONN;
1140065714dcSSiddheshwar Mahesh 				mutex_exit(&conn->c_lock);
1141065714dcSSiddheshwar Mahesh 				break;
1142065714dcSSiddheshwar Mahesh 			}
1143065714dcSSiddheshwar Mahesh 
11447c478bd9Sstevel@tonic-gate 			if (wd->cv_sig == 1) {
11457c478bd9Sstevel@tonic-gate 				/*
11467c478bd9Sstevel@tonic-gate 				 * Update completion status and notify poster
11477c478bd9Sstevel@tonic-gate 				 */
11487c478bd9Sstevel@tonic-gate 				cv_signal(&wd->wait_cv);
11497c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
11507c478bd9Sstevel@tonic-gate 			} else {
11517c478bd9Sstevel@tonic-gate 				/*
11527c478bd9Sstevel@tonic-gate 				 * Poster not waiting for notification.
11537c478bd9Sstevel@tonic-gate 				 * Free the send buffers and send_wid
11547c478bd9Sstevel@tonic-gate 				 */
11557c478bd9Sstevel@tonic-gate 				for (i = 0; i < wd->nsbufs; i++) {
11560a701b1eSRobert Gordon 					rib_rbuf_free(qptoc(wd->qp),
11570a701b1eSRobert Gordon 					    SEND_BUFFER,
115811606941Sjwahlig 					    (void *)(uintptr_t)wd->sbufaddr[i]);
11597c478bd9Sstevel@tonic-gate 				}
1160065714dcSSiddheshwar Mahesh 
1161065714dcSSiddheshwar Mahesh 				/* decrement the send ref count */
1162065714dcSSiddheshwar Mahesh 				rib_send_rele(qp);
1163065714dcSSiddheshwar Mahesh 
11647c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
11657c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wd);
11667c478bd9Sstevel@tonic-gate 			}
11677c478bd9Sstevel@tonic-gate 		}
11687c478bd9Sstevel@tonic-gate 	}
11697c478bd9Sstevel@tonic-gate }
11707c478bd9Sstevel@tonic-gate 
11717c478bd9Sstevel@tonic-gate /*
11727c478bd9Sstevel@tonic-gate  * RCQ handler
11737c478bd9Sstevel@tonic-gate  */
11747c478bd9Sstevel@tonic-gate /* ARGSUSED */
11757c478bd9Sstevel@tonic-gate static void
11767c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
11777c478bd9Sstevel@tonic-gate {
11787c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
11797c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
11807c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
11817c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	/*
11847c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
11857c478bd9Sstevel@tonic-gate 	 * completion queue notification.
11867c478bd9Sstevel@tonic-gate 	 */
11877c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11887c478bd9Sstevel@tonic-gate 
11897c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
11907c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
11917c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
11927c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11937c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
11947c478bd9Sstevel@tonic-gate 			return;
11957c478bd9Sstevel@tonic-gate 
119611606941Sjwahlig 		rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
11977c478bd9Sstevel@tonic-gate 		qp = rwid->qp;
11989c86cdcdSSiddheshwar Mahesh 
11997c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
12007c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
12017c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op, find_xid = 0;
12027c478bd9Sstevel@tonic-gate 			struct reply	*r;
12037c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
12040a701b1eSRobert Gordon 			uint32_t rdma_credit = 0;
12057c478bd9Sstevel@tonic-gate 
12067c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
120711606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
12087c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
12097c478bd9Sstevel@tonic-gate 			/*
12107c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
12117c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
12127c478bd9Sstevel@tonic-gate 			 */
121311606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)rwid->addr;
12140a701b1eSRobert Gordon 
12157c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
12167c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
12177c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &vers);
12180a701b1eSRobert Gordon 			(void) xdr_u_int(xdrs, &rdma_credit);
12197c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &op);
12207c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
12210a701b1eSRobert Gordon 
12227c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
12237c478bd9Sstevel@tonic-gate 				/*
12240a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version. Cannot
12250a701b1eSRobert Gordon 				 * interoperate.  Set connection to
12260a701b1eSRobert Gordon 				 * ERROR state and bail out.
12277c478bd9Sstevel@tonic-gate 				 */
12287c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
12297c478bd9Sstevel@tonic-gate 				if (conn->c_state != C_DISCONN_PEND)
12300a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
12317c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
123211606941Sjwahlig 				rib_rbuf_free(conn, RECV_BUFFER,
123311606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
12347c478bd9Sstevel@tonic-gate 				rib_free_wid(rwid);
12359c86cdcdSSiddheshwar Mahesh 				rib_recv_rele(qp);
12367c478bd9Sstevel@tonic-gate 				continue;
12377c478bd9Sstevel@tonic-gate 			}
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 			mutex_enter(&qp->replylist_lock);
12407c478bd9Sstevel@tonic-gate 			for (r = qp->replylist; r != NULL; r = r->next) {
12417c478bd9Sstevel@tonic-gate 				if (r->xid == xid) {
12427c478bd9Sstevel@tonic-gate 					find_xid = 1;
12437c478bd9Sstevel@tonic-gate 					switch (op) {
12447c478bd9Sstevel@tonic-gate 					case RDMA_MSG:
12457c478bd9Sstevel@tonic-gate 					case RDMA_NOMSG:
12467c478bd9Sstevel@tonic-gate 					case RDMA_MSGP:
12477c478bd9Sstevel@tonic-gate 						r->status = RDMA_SUCCESS;
12487c478bd9Sstevel@tonic-gate 						r->vaddr_cq = rwid->addr;
12490a701b1eSRobert Gordon 						r->bytes_xfer =
12500a701b1eSRobert Gordon 						    wc.wc_bytes_xfer;
12517c478bd9Sstevel@tonic-gate 						cv_signal(&r->wait_cv);
12527c478bd9Sstevel@tonic-gate 						break;
12537c478bd9Sstevel@tonic-gate 					default:
12540a701b1eSRobert Gordon 						rib_rbuf_free(qptoc(qp),
12550a701b1eSRobert Gordon 						    RECV_BUFFER,
12560a701b1eSRobert Gordon 						    (void *)(uintptr_t)
12570a701b1eSRobert Gordon 						    rwid->addr);
12587c478bd9Sstevel@tonic-gate 						break;
12597c478bd9Sstevel@tonic-gate 					}
12607c478bd9Sstevel@tonic-gate 					break;
12617c478bd9Sstevel@tonic-gate 				}
12627c478bd9Sstevel@tonic-gate 			}
12637c478bd9Sstevel@tonic-gate 			mutex_exit(&qp->replylist_lock);
12647c478bd9Sstevel@tonic-gate 			if (find_xid == 0) {
12657c478bd9Sstevel@tonic-gate 				/* RPC caller not waiting for reply */
12660a701b1eSRobert Gordon 
12670a701b1eSRobert Gordon 				DTRACE_PROBE1(rpcib__i__nomatchxid1,
12680a701b1eSRobert Gordon 				    int, xid);
12690a701b1eSRobert Gordon 
12707c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
127111606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
12727c478bd9Sstevel@tonic-gate 			}
12737c478bd9Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
12747c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
12757c478bd9Sstevel@tonic-gate 
12767c478bd9Sstevel@tonic-gate 			/*
12777c478bd9Sstevel@tonic-gate 			 * Connection being flushed. Just free
12787c478bd9Sstevel@tonic-gate 			 * the posted buffer
12797c478bd9Sstevel@tonic-gate 			 */
128011606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
128111606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
12827c478bd9Sstevel@tonic-gate 		} else {
12837c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
12847c478bd9Sstevel@tonic-gate /*
12857c478bd9Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
12867c478bd9Sstevel@tonic-gate  *  ====================		===========     ============
12877c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
12887c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
12897c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
12907c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
12917c478bd9Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
12927c478bd9Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
12937c478bd9Sstevel@tonic-gate  */
12947c478bd9Sstevel@tonic-gate 			/*
12957c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection
12967c478bd9Sstevel@tonic-gate 			 * in ERROR state.
12977c478bd9Sstevel@tonic-gate 			 */
12987c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
12997c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
13000a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
13017c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
130211606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
130311606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
13047c478bd9Sstevel@tonic-gate 		}
13057c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
13069c86cdcdSSiddheshwar Mahesh 		rib_recv_rele(qp);
13077c478bd9Sstevel@tonic-gate 	}
13087c478bd9Sstevel@tonic-gate }
13097c478bd9Sstevel@tonic-gate 
13107c478bd9Sstevel@tonic-gate /* Server side */
13117c478bd9Sstevel@tonic-gate /* ARGSUSED */
13127c478bd9Sstevel@tonic-gate static void
13137c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
13147c478bd9Sstevel@tonic-gate {
13150a701b1eSRobert Gordon 	rdma_recv_data_t *rdp;
13167c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
13177c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
13187c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
13197c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
13207c478bd9Sstevel@tonic-gate 	CONN		*conn;
13217c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate 	/*
13247c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
13257c478bd9Sstevel@tonic-gate 	 * completion queue notification.
13267c478bd9Sstevel@tonic-gate 	 */
13277c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
13287c478bd9Sstevel@tonic-gate 
13297c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
13307c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
13317c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
13327c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
13337c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
13347c478bd9Sstevel@tonic-gate 			return;
13357c478bd9Sstevel@tonic-gate 
133611606941Sjwahlig 		s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
13377c478bd9Sstevel@tonic-gate 		qp = s_recvp->qp;
13387c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
13397c478bd9Sstevel@tonic-gate 
13407c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
13417c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
13427c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op;
13430a701b1eSRobert Gordon 			uint32_t rdma_credit;
13447c478bd9Sstevel@tonic-gate 
13457c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
13467c478bd9Sstevel@tonic-gate 			/* s_recvp->vaddr stores data */
134711606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
13487c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
13497c478bd9Sstevel@tonic-gate 
13507c478bd9Sstevel@tonic-gate 			/*
13517c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
13527c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
13537c478bd9Sstevel@tonic-gate 			 */
135411606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
13557c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
13567c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
13577c478bd9Sstevel@tonic-gate 			if (!xdr_u_int(xdrs, &vers) ||
13580a701b1eSRobert Gordon 			    !xdr_u_int(xdrs, &rdma_credit) ||
13597c478bd9Sstevel@tonic-gate 			    !xdr_u_int(xdrs, &op)) {
13607c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
136111606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
13627c478bd9Sstevel@tonic-gate 				XDR_DESTROY(xdrs);
13639c86cdcdSSiddheshwar Mahesh 				rib_recv_rele(qp);
13647c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
13657c478bd9Sstevel@tonic-gate 				continue;
13667c478bd9Sstevel@tonic-gate 			}
13677c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
13687c478bd9Sstevel@tonic-gate 
13697c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
13707c478bd9Sstevel@tonic-gate 				/*
13710a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version.
13720a701b1eSRobert Gordon 				 * Drop rpc rdma message.
13737c478bd9Sstevel@tonic-gate 				 */
13747c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
137511606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
13769c86cdcdSSiddheshwar Mahesh 				rib_recv_rele(qp);
13777c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
13787c478bd9Sstevel@tonic-gate 				continue;
13797c478bd9Sstevel@tonic-gate 			}
13807c478bd9Sstevel@tonic-gate 			/*
13817c478bd9Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
13827c478bd9Sstevel@tonic-gate 			 */
13837c478bd9Sstevel@tonic-gate 			if (op == RDMA_DONE) {
13847c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
138511606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
13867c478bd9Sstevel@tonic-gate 				/*
13877c478bd9Sstevel@tonic-gate 				 * Wake up the thread waiting on
13887c478bd9Sstevel@tonic-gate 				 * a RDMA_DONE for xid
13897c478bd9Sstevel@tonic-gate 				 */
13907c478bd9Sstevel@tonic-gate 				mutex_enter(&qp->rdlist_lock);
13917c478bd9Sstevel@tonic-gate 				rdma_done_notify(qp, xid);
13927c478bd9Sstevel@tonic-gate 				mutex_exit(&qp->rdlist_lock);
13939c86cdcdSSiddheshwar Mahesh 				rib_recv_rele(qp);
13947c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
13957c478bd9Sstevel@tonic-gate 				continue;
13967c478bd9Sstevel@tonic-gate 			}
13977c478bd9Sstevel@tonic-gate 
13987c478bd9Sstevel@tonic-gate 			mutex_enter(&plugin_state_lock);
139959c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_enter(&conn->c_lock);
140059c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 			if ((plugin_state == ACCEPT) &&
140159c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 			    (conn->c_state == C_CONNECTED)) {
140259c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 				conn->c_ref++;
140359c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 				mutex_exit(&conn->c_lock);
14040a701b1eSRobert Gordon 				while ((mp = allocb(sizeof (*rdp), BPRI_LO))
14050a701b1eSRobert Gordon 				    == NULL)
14060a701b1eSRobert Gordon 					(void) strwaitbuf(
14070a701b1eSRobert Gordon 					    sizeof (*rdp), BPRI_LO);
14087c478bd9Sstevel@tonic-gate 				/*
14097c478bd9Sstevel@tonic-gate 				 * Plugin is in accept state, hence the master
14107c478bd9Sstevel@tonic-gate 				 * transport queue for this is still accepting
14117c478bd9Sstevel@tonic-gate 				 * requests. Hence we can call svc_queuereq to
14127c478bd9Sstevel@tonic-gate 				 * queue this recieved msg.
14137c478bd9Sstevel@tonic-gate 				 */
14140a701b1eSRobert Gordon 				rdp = (rdma_recv_data_t *)mp->b_rptr;
14150a701b1eSRobert Gordon 				rdp->conn = conn;
14160a701b1eSRobert Gordon 				rdp->rpcmsg.addr =
14170a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)s_recvp->vaddr;
14180a701b1eSRobert Gordon 				rdp->rpcmsg.type = RECV_BUFFER;
14190a701b1eSRobert Gordon 				rdp->rpcmsg.len = wc.wc_bytes_xfer;
14200a701b1eSRobert Gordon 				rdp->status = wc.wc_status;
14210a701b1eSRobert Gordon 				mp->b_wptr += sizeof (*rdp);
14227c478bd9Sstevel@tonic-gate 				svc_queuereq((queue_t *)rib_stat->q, mp);
14237c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
14247c478bd9Sstevel@tonic-gate 			} else {
14257c478bd9Sstevel@tonic-gate 				/*
14267c478bd9Sstevel@tonic-gate 				 * The master transport for this is going
14277c478bd9Sstevel@tonic-gate 				 * away and the queue is not accepting anymore
14287c478bd9Sstevel@tonic-gate 				 * requests for krpc, so don't do anything, just
14297c478bd9Sstevel@tonic-gate 				 * free the msg.
14307c478bd9Sstevel@tonic-gate 				 */
143159c5abfeSFaramarz Jalalian - Sun Microsystems - Irvine United States 				mutex_exit(&conn->c_lock);
14327c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
14337c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
143411606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
14357c478bd9Sstevel@tonic-gate 			}
14367c478bd9Sstevel@tonic-gate 		} else {
14377c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
143811606941Sjwahlig 			    (void *)(uintptr_t)s_recvp->vaddr);
14397c478bd9Sstevel@tonic-gate 		}
14409c86cdcdSSiddheshwar Mahesh 		rib_recv_rele(qp);
14417c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
14427c478bd9Sstevel@tonic-gate 	}
14437c478bd9Sstevel@tonic-gate }
14447c478bd9Sstevel@tonic-gate 
14457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static void
14467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rib_attach_hca()
14477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
14487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_enter(&rib_stat->open_hca_lock);
1449ed629aefSSiddheshwar Mahesh 	(void) rpcib_open_hcas(rib_stat);
14507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_listen(NULL);
14517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_exit(&rib_stat->open_hca_lock);
14527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
14537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
14547c478bd9Sstevel@tonic-gate /*
14557c478bd9Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
14567c478bd9Sstevel@tonic-gate  */
14577c478bd9Sstevel@tonic-gate /* ARGSUSED */
14587c478bd9Sstevel@tonic-gate static void
14597c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
14607c478bd9Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
14617c478bd9Sstevel@tonic-gate {
14627c478bd9Sstevel@tonic-gate 	switch (code) {
14637c478bd9Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
14647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_attach_hca();
14657c478bd9Sstevel@tonic-gate 		break;
14667c478bd9Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
14679c86cdcdSSiddheshwar Mahesh 		rib_detach_hca(hca_hdl);
14687c478bd9Sstevel@tonic-gate #ifdef DEBUG
14697c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
14707c478bd9Sstevel@tonic-gate #endif
14717c478bd9Sstevel@tonic-gate 		break;
14727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	case IBT_EVENT_PORT_UP:
14737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		/*
14747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * A port is up. We should call rib_listen() since there is
14757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * a chance that rib_listen() may have failed during
14767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * rib_attach_hca() because the port had not been up yet.
14777f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 */
14787f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_listen(NULL);
14797f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #ifdef DEBUG
14807f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
14817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States #endif
14827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		break;
14837c478bd9Sstevel@tonic-gate #ifdef DEBUG
14847c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
14850a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
14860a701b1eSRobert Gordon 		    "IBT_EVENT_PATH_MIGRATED\n");
14877c478bd9Sstevel@tonic-gate 		break;
14887c478bd9Sstevel@tonic-gate 	case IBT_EVENT_SQD:
14897c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
14907c478bd9Sstevel@tonic-gate 		break;
14917c478bd9Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
14927c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
14937c478bd9Sstevel@tonic-gate 		break;
14947c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
14950a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
14960a701b1eSRobert Gordon 		    "IBT_ERROR_CATASTROPHIC_CHAN\n");
14977c478bd9Sstevel@tonic-gate 		break;
14987c478bd9Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
14997c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
15007c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_INVALID_REQUEST_CHAN\n");
15017c478bd9Sstevel@tonic-gate 		break;
15027c478bd9Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
15037c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
15047c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
15057c478bd9Sstevel@tonic-gate 		break;
15067c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
15070a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
15080a701b1eSRobert Gordon 		    "IBT_ERROR_PATH_MIGRATE_REQ\n");
15097c478bd9Sstevel@tonic-gate 		break;
15107c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CQ:
15117c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
15127c478bd9Sstevel@tonic-gate 		break;
15137c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
15147c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
15157c478bd9Sstevel@tonic-gate 		break;
15167c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
15177c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
15187c478bd9Sstevel@tonic-gate 		break;
15197c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
15207c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
15217c478bd9Sstevel@tonic-gate 		break;
15227c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
15237c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
15247c478bd9Sstevel@tonic-gate 		break;
15257c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
15267c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
15277c478bd9Sstevel@tonic-gate 		break;
15287c478bd9Sstevel@tonic-gate #endif
15297c478bd9Sstevel@tonic-gate 	default:
15307c478bd9Sstevel@tonic-gate 		break;
15317c478bd9Sstevel@tonic-gate 	}
15327c478bd9Sstevel@tonic-gate }
15337c478bd9Sstevel@tonic-gate 
15347c478bd9Sstevel@tonic-gate /*
15357c478bd9Sstevel@tonic-gate  * Client's reachable function.
15367c478bd9Sstevel@tonic-gate  */
15377c478bd9Sstevel@tonic-gate static rdma_stat
15387c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
15397c478bd9Sstevel@tonic-gate {
15407c478bd9Sstevel@tonic-gate 	rdma_stat	status;
1541214ae7d0SSiddheshwar Mahesh 	rpcib_ping_t	rpt;
1542ed629aefSSiddheshwar Mahesh 	struct netbuf	saddr;
1543ed629aefSSiddheshwar Mahesh 	CONN		*conn;
15447c478bd9Sstevel@tonic-gate 
1545ed629aefSSiddheshwar Mahesh 	bzero(&saddr, sizeof (struct netbuf));
1546ed629aefSSiddheshwar Mahesh 	status = rib_connect(&saddr, raddr, addr_type, &rpt, &conn);
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
1549214ae7d0SSiddheshwar Mahesh 		*handle = (void *)rpt.hca;
1550ed629aefSSiddheshwar Mahesh 		/* release the reference */
1551ed629aefSSiddheshwar Mahesh 		(void) rib_conn_release(conn);
15527c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
15537c478bd9Sstevel@tonic-gate 	} else {
15547c478bd9Sstevel@tonic-gate 		*handle = NULL;
15550a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__pingfailed);
15567c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
15577c478bd9Sstevel@tonic-gate 	}
15587c478bd9Sstevel@tonic-gate }
15597c478bd9Sstevel@tonic-gate 
15607c478bd9Sstevel@tonic-gate /* Client side qp creation */
15617c478bd9Sstevel@tonic-gate static rdma_stat
15627c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
15637c478bd9Sstevel@tonic-gate {
15647c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
15657c478bd9Sstevel@tonic-gate 	CONN		*conn;
15660a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info;
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
15697c478bd9Sstevel@tonic-gate 	*qp = NULL;
15707c478bd9Sstevel@tonic-gate 
15717c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
15727c478bd9Sstevel@tonic-gate 	conn = qptoc(kqp);
15737c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
15747c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
15757c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
15767c478bd9Sstevel@tonic-gate 
15777c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
15787c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
15797c478bd9Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
15807c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
15817c478bd9Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
15827c478bd9Sstevel@tonic-gate 	/*
15837c478bd9Sstevel@tonic-gate 	 * Initialize
15847c478bd9Sstevel@tonic-gate 	 */
15857c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
15867c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
15877c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1588065714dcSSiddheshwar Mahesh 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
1589065714dcSSiddheshwar Mahesh 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
15907c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
15917c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15927c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
15937c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
15947c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
15950a701b1eSRobert Gordon 	/*
15960a701b1eSRobert Gordon 	 * Initialize the client credit control
15970a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
15980a701b1eSRobert Gordon 	 */
15990a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
16000a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
16010a701b1eSRobert Gordon 	cc_info->clnt_cc_granted_ops = 0;
16020a701b1eSRobert Gordon 	cc_info->clnt_cc_in_flight_ops = 0;
16030a701b1eSRobert Gordon 	cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
16047c478bd9Sstevel@tonic-gate 
16057c478bd9Sstevel@tonic-gate 	*qp = kqp;
16067c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
16077c478bd9Sstevel@tonic-gate }
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate /* Server side qp creation */
16107c478bd9Sstevel@tonic-gate static rdma_stat
16117c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
16127c478bd9Sstevel@tonic-gate {
16137c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
16147c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
16157c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
16167c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
16170a701b1eSRobert Gordon 	rdma_srv_cred_ctrl_t *cc_info;
16187c478bd9Sstevel@tonic-gate 
16197c478bd9Sstevel@tonic-gate 	*qp = NULL;
16207c478bd9Sstevel@tonic-gate 
16217c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
16227c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
16237c478bd9Sstevel@tonic-gate 	kqp->port_num = port;
16247c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
16257c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
16267c478bd9Sstevel@tonic-gate 
16277c478bd9Sstevel@tonic-gate 	/*
16287c478bd9Sstevel@tonic-gate 	 * Create the qp handle
16297c478bd9Sstevel@tonic-gate 	 */
16307c478bd9Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
16317c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
16327c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
16337c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
16347c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
16357c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
16367c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
16377c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
16387c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
16397c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
16407c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
16417c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
16447c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
16457c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
16467c478bd9Sstevel@tonic-gate 		    IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
16477c478bd9Sstevel@tonic-gate 		    &chan_sizes);
16487c478bd9Sstevel@tonic-gate 	} else {
16497c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
16507c478bd9Sstevel@tonic-gate 		goto fail;
16517c478bd9Sstevel@tonic-gate 	}
16527c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
16537c478bd9Sstevel@tonic-gate 
16547c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
16550a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_svccreatechanfail,
16560a701b1eSRobert Gordon 		    int, ibt_status);
16577c478bd9Sstevel@tonic-gate 		goto fail;
16587c478bd9Sstevel@tonic-gate 	}
16597c478bd9Sstevel@tonic-gate 
16607c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
16617c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
16627c478bd9Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
16657c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
16667c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
16677c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1668065714dcSSiddheshwar Mahesh 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
1669065714dcSSiddheshwar Mahesh 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
16707c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
16717c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
16727c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
16737c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
16747c478bd9Sstevel@tonic-gate 	/*
16757c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
16767c478bd9Sstevel@tonic-gate 	 */
16777c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
16787c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
16790a701b1eSRobert Gordon 
16800a701b1eSRobert Gordon 	/*
16810a701b1eSRobert Gordon 	 * Initialize the server credit control
16820a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
16830a701b1eSRobert Gordon 	 */
16840a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
16850a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
16860a701b1eSRobert Gordon 	cc_info->srv_cc_buffers_granted = preposted_rbufs;
16870a701b1eSRobert Gordon 	cc_info->srv_cc_cur_buffers_used = 0;
16880a701b1eSRobert Gordon 	cc_info->srv_cc_posted = preposted_rbufs;
16890a701b1eSRobert Gordon 
16907c478bd9Sstevel@tonic-gate 	*qp = kqp;
16910a701b1eSRobert Gordon 
16927c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
16937c478bd9Sstevel@tonic-gate fail:
16947c478bd9Sstevel@tonic-gate 	if (kqp)
16957c478bd9Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
16987c478bd9Sstevel@tonic-gate }
16997c478bd9Sstevel@tonic-gate 
17007c478bd9Sstevel@tonic-gate /* ARGSUSED */
17017c478bd9Sstevel@tonic-gate ibt_cm_status_t
17027c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
17037c478bd9Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
17047c478bd9Sstevel@tonic-gate     ibt_priv_data_len_t len)
17057c478bd9Sstevel@tonic-gate {
17067c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
17077c478bd9Sstevel@tonic-gate 
17087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	hca = (rib_hca_t *)clnt_hdl;
17097c478bd9Sstevel@tonic-gate 
17107c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
17117c478bd9Sstevel@tonic-gate 
17127c478bd9Sstevel@tonic-gate 	/* got a connection close event */
17137c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
17147c478bd9Sstevel@tonic-gate 	{
17157c478bd9Sstevel@tonic-gate 		CONN	*conn;
17167c478bd9Sstevel@tonic-gate 		rib_qp_t *qp;
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 		/* check reason why connection was closed */
17197c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
17207c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
17217c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
17227c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
17237c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
17247c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
17257c478bd9Sstevel@tonic-gate 			/*
17267c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
17277c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
17287c478bd9Sstevel@tonic-gate 			 */
17297c478bd9Sstevel@tonic-gate 			break;
17307c478bd9Sstevel@tonic-gate 		default:
17317c478bd9Sstevel@tonic-gate 			/*
17327c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
17337c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
17347c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
17357c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
17367c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
17377c478bd9Sstevel@tonic-gate 			 * state
17387c478bd9Sstevel@tonic-gate 			 */
17397c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
17407c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
17417c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
17427c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
17437c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17447c478bd9Sstevel@tonic-gate 				break;
17457c478bd9Sstevel@tonic-gate 			}
17467c478bd9Sstevel@tonic-gate 
17470a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate 			/*
17507c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
17517c478bd9Sstevel@tonic-gate 			 */
17527c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
17537c478bd9Sstevel@tonic-gate 				/*
17547c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
17557c478bd9Sstevel@tonic-gate 				 */
17567c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
17577c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				rw_enter(&hca->state_lock, RW_READER);
17597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				if (hca->state != HCA_DETACHED)
17607c478bd9Sstevel@tonic-gate 					(void) rib_disconnect_channel(conn,
17617c478bd9Sstevel@tonic-gate 					    &hca->cl_conn_list);
17627f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				rw_exit(&hca->state_lock);
17637c478bd9Sstevel@tonic-gate 			} else {
1764065714dcSSiddheshwar Mahesh 				/*
1765065714dcSSiddheshwar Mahesh 				 * conn will be freed when c_ref goes to 0.
1766065714dcSSiddheshwar Mahesh 				 * Indicate to cleaning thread not to close
1767065714dcSSiddheshwar Mahesh 				 * the connection, but just free the channel.
1768065714dcSSiddheshwar Mahesh 				 */
1769065714dcSSiddheshwar Mahesh 				conn->c_flags |= C_CLOSE_NOTNEEDED;
17707c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17717c478bd9Sstevel@tonic-gate 			}
17727c478bd9Sstevel@tonic-gate #ifdef DEBUG
17737c478bd9Sstevel@tonic-gate 			if (rib_debug)
17747c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
17757c478bd9Sstevel@tonic-gate 				    "(CONN_CLOSED) channel disconnected");
17767c478bd9Sstevel@tonic-gate #endif
17777c478bd9Sstevel@tonic-gate 			break;
17787c478bd9Sstevel@tonic-gate 		}
17797c478bd9Sstevel@tonic-gate 		break;
17807c478bd9Sstevel@tonic-gate 	}
17817c478bd9Sstevel@tonic-gate 	default:
17827c478bd9Sstevel@tonic-gate 		break;
17837c478bd9Sstevel@tonic-gate 	}
17847c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
17857c478bd9Sstevel@tonic-gate }
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate /*
17887c478bd9Sstevel@tonic-gate  * Connect to the server.
17897c478bd9Sstevel@tonic-gate  */
17907c478bd9Sstevel@tonic-gate rdma_stat
1791214ae7d0SSiddheshwar Mahesh rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, rpcib_ping_t *rptp)
17927c478bd9Sstevel@tonic-gate {
17937c478bd9Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
17947c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
17957c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
17967c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
17977c478bd9Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
17987c478bd9Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
17990a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipcm_info;
18000a701b1eSRobert Gordon 	uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ];
18010a701b1eSRobert Gordon 
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
18047c478bd9Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
18050a701b1eSRobert Gordon 	(void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
18060a701b1eSRobert Gordon 
1807214ae7d0SSiddheshwar Mahesh 	ipcm_info.src_addr.family = rptp->srcip.family;
1808214ae7d0SSiddheshwar Mahesh 	switch (ipcm_info.src_addr.family) {
18090a701b1eSRobert Gordon 	case AF_INET:
1810214ae7d0SSiddheshwar Mahesh 		ipcm_info.src_addr.un.ip4addr = rptp->srcip.un.ip4addr;
18110a701b1eSRobert Gordon 		break;
18120a701b1eSRobert Gordon 	case AF_INET6:
1813214ae7d0SSiddheshwar Mahesh 		ipcm_info.src_addr.un.ip6addr = rptp->srcip.un.ip6addr;
18140a701b1eSRobert Gordon 		break;
18150a701b1eSRobert Gordon 	}
18160a701b1eSRobert Gordon 
1817214ae7d0SSiddheshwar Mahesh 	ipcm_info.dst_addr.family = rptp->srcip.family;
1818214ae7d0SSiddheshwar Mahesh 	switch (ipcm_info.dst_addr.family) {
18190a701b1eSRobert Gordon 	case AF_INET:
1820214ae7d0SSiddheshwar Mahesh 		ipcm_info.dst_addr.un.ip4addr = rptp->dstip.un.ip4addr;
18210a701b1eSRobert Gordon 		break;
18220a701b1eSRobert Gordon 	case AF_INET6:
1823214ae7d0SSiddheshwar Mahesh 		ipcm_info.dst_addr.un.ip6addr = rptp->dstip.un.ip6addr;
18240a701b1eSRobert Gordon 		break;
18250a701b1eSRobert Gordon 	}
18260a701b1eSRobert Gordon 
1827f837ee4aSSiddheshwar Mahesh 	ipcm_info.src_port = (in_port_t)nfs_rdma_port;
18280a701b1eSRobert Gordon 
18290a701b1eSRobert Gordon 	ibt_status = ibt_format_ip_private_data(&ipcm_info,
18300a701b1eSRobert Gordon 	    IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt);
18310a701b1eSRobert Gordon 
18320a701b1eSRobert Gordon 	if (ibt_status != IBT_SUCCESS) {
18330a701b1eSRobert Gordon 		cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n");
18340a701b1eSRobert Gordon 		return (-1);
18350a701b1eSRobert Gordon 	}
18367c478bd9Sstevel@tonic-gate 
1837214ae7d0SSiddheshwar Mahesh 	qp_attr.rc_hca_port_num = rptp->path.pi_prim_cep_path.cep_hca_port_num;
18387c478bd9Sstevel@tonic-gate 	/* Alloc a RC channel */
18397c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
18407c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
18417c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
18427c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
18437c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
18447c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
18457c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
18467c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
18477c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
18487c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
18497c478bd9Sstevel@tonic-gate 
1850f837ee4aSSiddheshwar Mahesh 	rptp->path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port);
1851214ae7d0SSiddheshwar Mahesh 	chan_args.oc_path = &rptp->path;
1852f837ee4aSSiddheshwar Mahesh 
18537c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
18547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	chan_args.oc_cm_clnt_private = (void *)hca;
18550a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_out = 4;
18560a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_in = 4;
18577c478bd9Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
18587c478bd9Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
18590a701b1eSRobert Gordon 	chan_args.oc_priv_data = cmp_ip_pvt;
18600a701b1eSRobert Gordon 	chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ;
18617c478bd9Sstevel@tonic-gate 
18627c478bd9Sstevel@tonic-gate refresh:
18637c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
18647c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
18657c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
18660a701b1eSRobert Gordon 		    IBT_ACHAN_NO_FLAGS,
18670a701b1eSRobert Gordon 		    &qp_attr, &qp->qp_hdl,
18687c478bd9Sstevel@tonic-gate 		    &chan_sizes);
18697c478bd9Sstevel@tonic-gate 	} else {
18707c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
18717c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
18727c478bd9Sstevel@tonic-gate 	}
18737c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
18747c478bd9Sstevel@tonic-gate 
18757c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
18760a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_conntosrv,
18770a701b1eSRobert Gordon 		    int, ibt_status);
18787c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
18797c478bd9Sstevel@tonic-gate 	}
18807c478bd9Sstevel@tonic-gate 
18817c478bd9Sstevel@tonic-gate 	/* Connect to the Server */
18827c478bd9Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
18837c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
18847c478bd9Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
18857c478bd9Sstevel@tonic-gate 	    IBT_BLOCKING, &chan_args, &ret_args);
18867c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
18870a701b1eSRobert Gordon 		DTRACE_PROBE2(rpcib__i_openrctosrv,
18880a701b1eSRobert Gordon 		    int, ibt_status, int, ret_args.rc_status);
18890a701b1eSRobert Gordon 
18907c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
18917c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
18927c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
18937c478bd9Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
18947c478bd9Sstevel@tonic-gate 		    ret_args.rc_status == IBT_CM_CONN_STALE) {
18957c478bd9Sstevel@tonic-gate 			/*
18967c478bd9Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
18977c478bd9Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
18987c478bd9Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
18997c478bd9Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
19007c478bd9Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
19017c478bd9Sstevel@tonic-gate 			 */
19027c478bd9Sstevel@tonic-gate 			goto refresh;
19037c478bd9Sstevel@tonic-gate 		}
19047c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
19057c478bd9Sstevel@tonic-gate 	}
19067c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
19077c478bd9Sstevel@tonic-gate 	/*
19087c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
19097c478bd9Sstevel@tonic-gate 	 */
19107c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
19117c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
19127c478bd9Sstevel@tonic-gate }
19137c478bd9Sstevel@tonic-gate 
19147c478bd9Sstevel@tonic-gate rdma_stat
1915214ae7d0SSiddheshwar Mahesh rib_ping_srv(int addr_type, struct netbuf *raddr, rpcib_ping_t *rptp)
19167c478bd9Sstevel@tonic-gate {
19177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	uint_t			i, addr_count;
19187c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
19190a701b1eSRobert Gordon 	uint8_t			num_paths_p;
19200a701b1eSRobert Gordon 	ibt_ip_path_attr_t	ipattr;
19210a701b1eSRobert Gordon 	ibt_path_ip_src_t	srcip;
1922e11c3f44Smeem 	rpcib_ipaddrs_t		addrs4;
1923e11c3f44Smeem 	rpcib_ipaddrs_t		addrs6;
1924e11c3f44Smeem 	struct sockaddr_in	*sinp;
1925e11c3f44Smeem 	struct sockaddr_in6	*sin6p;
19267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rdma_stat		retval = RDMA_FAILED;
19277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca;
19280a701b1eSRobert Gordon 
19297f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if ((addr_type != AF_INET) && (addr_type != AF_INET6))
19307f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		return (RDMA_INVAL);
19317c478bd9Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
19327c478bd9Sstevel@tonic-gate 
19330a701b1eSRobert Gordon 	bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
19347c478bd9Sstevel@tonic-gate 
1935e11c3f44Smeem 	if (!rpcib_get_ib_addresses(&addrs4, &addrs6) ||
1936e11c3f44Smeem 	    (addrs4.ri_count == 0 && addrs6.ri_count == 0)) {
1937e11c3f44Smeem 		retval = RDMA_FAILED;
19387f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		goto done2;
19390a701b1eSRobert Gordon 	}
19400a701b1eSRobert Gordon 
19417f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (addr_type == AF_INET) {
19427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		addr_count = addrs4.ri_count;
1943e11c3f44Smeem 		sinp = (struct sockaddr_in *)raddr->buf;
1944214ae7d0SSiddheshwar Mahesh 		rptp->dstip.family = AF_INET;
1945214ae7d0SSiddheshwar Mahesh 		rptp->dstip.un.ip4addr = sinp->sin_addr.s_addr;
1946e11c3f44Smeem 		sinp = addrs4.ri_list;
19477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	} else {
19487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		addr_count = addrs6.ri_count;
1949e11c3f44Smeem 		sin6p = (struct sockaddr_in6 *)raddr->buf;
1950214ae7d0SSiddheshwar Mahesh 		rptp->dstip.family = AF_INET6;
1951214ae7d0SSiddheshwar Mahesh 		rptp->dstip.un.ip6addr = sin6p->sin6_addr;
1952e11c3f44Smeem 		sin6p = addrs6.ri_list;
19537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
19547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
19557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
19567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
19577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_enter(&hca->state_lock, RW_READER);
19587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (hca->state == HCA_DETACHED) {
19597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&hca->state_lock);
19607f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			continue;
19617f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
19620a701b1eSRobert Gordon 
1963214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_dst_ip 	= &rptp->dstip;
19647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ipattr.ipa_hca_guid	= hca->hca_guid;
19650a701b1eSRobert Gordon 		ipattr.ipa_ndst		= 1;
19660a701b1eSRobert Gordon 		ipattr.ipa_max_paths	= 1;
1967214ae7d0SSiddheshwar Mahesh 		ipattr.ipa_src_ip.family = rptp->dstip.family;
19687f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		for (i = 0; i < addr_count; i++) {
1969214ae7d0SSiddheshwar Mahesh 			num_paths_p = 0;
19707f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			if (addr_type == AF_INET) {
19717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				ipattr.ipa_src_ip.un.ip4addr =
19727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    sinp[i].sin_addr.s_addr;
19737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			} else {
19747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				ipattr.ipa_src_ip.un.ip6addr =
19757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    sin6p[i].sin6_addr;
19767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			}
1977214ae7d0SSiddheshwar Mahesh 			bzero(&srcip, sizeof (ibt_path_ip_src_t));
19780a701b1eSRobert Gordon 
19790a701b1eSRobert Gordon 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1980214ae7d0SSiddheshwar Mahesh 			    IBT_PATH_NO_FLAGS, &ipattr, &rptp->path,
1981214ae7d0SSiddheshwar Mahesh 			    &num_paths_p, &srcip);
19820a701b1eSRobert Gordon 			if (ibt_status == IBT_SUCCESS &&
19830a701b1eSRobert Gordon 			    num_paths_p != 0 &&
19847f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    rptp->path.pi_hca_guid == hca->hca_guid) {
19857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				rptp->hca = hca;
19867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				rw_exit(&hca->state_lock);
19877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				if (addr_type == AF_INET) {
19887f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					rptp->srcip.family = AF_INET;
19897f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					rptp->srcip.un.ip4addr =
19907f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					    srcip.ip_primary.un.ip4addr;
19917f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				} else {
1992214ae7d0SSiddheshwar Mahesh 					rptp->srcip.family = AF_INET6;
1993214ae7d0SSiddheshwar Mahesh 					rptp->srcip.un.ip6addr =
1994214ae7d0SSiddheshwar Mahesh 					    srcip.ip_primary.un.ip6addr;
19957c478bd9Sstevel@tonic-gate 
19967c478bd9Sstevel@tonic-gate 				}
19977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				retval = RDMA_SUCCESS;
19987f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				goto done1;
19997f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			}
20007f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
20017f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_exit(&hca->state_lock);
20027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
20037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States done1:
20047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
20057f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States done2:
2006e11c3f44Smeem 	if (addrs4.ri_size > 0)
2007e11c3f44Smeem 		kmem_free(addrs4.ri_list, addrs4.ri_size);
2008e11c3f44Smeem 	if (addrs6.ri_size > 0)
2009e11c3f44Smeem 		kmem_free(addrs6.ri_list, addrs6.ri_size);
2010e11c3f44Smeem 	return (retval);
20117c478bd9Sstevel@tonic-gate }
20127c478bd9Sstevel@tonic-gate 
20137c478bd9Sstevel@tonic-gate /*
20147c478bd9Sstevel@tonic-gate  * Close channel, remove from connection list and
20157c478bd9Sstevel@tonic-gate  * free up resources allocated for that channel.
20167c478bd9Sstevel@tonic-gate  */
20177c478bd9Sstevel@tonic-gate rdma_stat
20187c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
20197c478bd9Sstevel@tonic-gate {
20207c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
20217c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
20227c478bd9Sstevel@tonic-gate 
2023065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
2024065714dcSSiddheshwar Mahesh 	if (conn->c_timeout != NULL) {
2025065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
2026065714dcSSiddheshwar Mahesh 		(void) untimeout(conn->c_timeout);
2027065714dcSSiddheshwar Mahesh 		mutex_enter(&conn->c_lock);
2028065714dcSSiddheshwar Mahesh 	}
2029065714dcSSiddheshwar Mahesh 
2030065714dcSSiddheshwar Mahesh 	while (conn->c_flags & C_CLOSE_PENDING) {
2031065714dcSSiddheshwar Mahesh 		cv_wait(&conn->c_cv, &conn->c_lock);
2032065714dcSSiddheshwar Mahesh 	}
2033065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
2034065714dcSSiddheshwar Mahesh 
20357c478bd9Sstevel@tonic-gate 	/*
20367c478bd9Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
20377c478bd9Sstevel@tonic-gate 	 */
20387c478bd9Sstevel@tonic-gate 	hca = qp->hca;
20397c478bd9Sstevel@tonic-gate 	if (conn_list != NULL)
20407c478bd9Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
20410a701b1eSRobert Gordon 
20427c478bd9Sstevel@tonic-gate 	/*
2043065714dcSSiddheshwar Mahesh 	 * There is only one case where we get here with
2044065714dcSSiddheshwar Mahesh 	 * qp_hdl = NULL, which is during connection setup on
2045065714dcSSiddheshwar Mahesh 	 * the client. In such a case there are no posted
2046065714dcSSiddheshwar Mahesh 	 * send/recv buffers.
20477c478bd9Sstevel@tonic-gate 	 */
2048065714dcSSiddheshwar Mahesh 	if (qp->qp_hdl != NULL) {
20497c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
20507c478bd9Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
20517c478bd9Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
20527c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
2053065714dcSSiddheshwar Mahesh 
2054065714dcSSiddheshwar Mahesh 		mutex_enter(&qp->send_rbufs_lock);
2055065714dcSSiddheshwar Mahesh 		while (qp->n_send_rbufs)
2056065714dcSSiddheshwar Mahesh 			cv_wait(&qp->send_rbufs_cv, &qp->send_rbufs_lock);
2057065714dcSSiddheshwar Mahesh 			mutex_exit(&qp->send_rbufs_lock);
2058065714dcSSiddheshwar Mahesh 
20597c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
20607c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
20617c478bd9Sstevel@tonic-gate 	}
20620a701b1eSRobert Gordon 
20637c478bd9Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
20640a701b1eSRobert Gordon 
20657c478bd9Sstevel@tonic-gate 	if (qp->replylist != NULL) {
20667c478bd9Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
20677c478bd9Sstevel@tonic-gate 	}
20687c478bd9Sstevel@tonic-gate 
20697c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
20707c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
2071065714dcSSiddheshwar Mahesh 	cv_destroy(&qp->send_rbufs_cv);
20727c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
20737c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
20747c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
2075065714dcSSiddheshwar Mahesh 	mutex_destroy(&qp->send_rbufs_lock);
20767c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
20777c478bd9Sstevel@tonic-gate 
20787c478bd9Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
20797c478bd9Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
20807c478bd9Sstevel@tonic-gate 
20817c478bd9Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
20827c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
20837c478bd9Sstevel@tonic-gate 	}
20847c478bd9Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
20857c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
20867c478bd9Sstevel@tonic-gate 	}
20877523bef8SSiddheshwar Mahesh 	if (conn->c_netid != NULL) {
20887523bef8SSiddheshwar Mahesh 		kmem_free(conn->c_netid, (strlen(conn->c_netid) + 1));
20897523bef8SSiddheshwar Mahesh 	}
2090*0a4b0810SKaren Rochford 	if (conn->c_addrmask.buf != NULL) {
2091*0a4b0810SKaren Rochford 		kmem_free(conn->c_addrmask.buf, conn->c_addrmask.len);
2092*0a4b0810SKaren Rochford 	}
20930a701b1eSRobert Gordon 
20940a701b1eSRobert Gordon 	/*
20950a701b1eSRobert Gordon 	 * Credit control cleanup.
20960a701b1eSRobert Gordon 	 */
20970a701b1eSRobert Gordon 	if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
20980a701b1eSRobert Gordon 		rdma_clnt_cred_ctrl_t *cc_info;
20990a701b1eSRobert Gordon 		cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
21000a701b1eSRobert Gordon 		cv_destroy(&cc_info->clnt_cc_cv);
21010a701b1eSRobert Gordon 	}
21020a701b1eSRobert Gordon 
21037c478bd9Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
21047c478bd9Sstevel@tonic-gate 
21057c478bd9Sstevel@tonic-gate 	/*
21067c478bd9Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
21077c478bd9Sstevel@tonic-gate 	 * then the hca is no longer being used.
21087c478bd9Sstevel@tonic-gate 	 */
21097c478bd9Sstevel@tonic-gate 	if (conn_list != NULL) {
21107c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
21117c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
21127c478bd9Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
21137c478bd9Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
21147c478bd9Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
21157c478bd9Sstevel@tonic-gate 				    RW_READER);
21160a701b1eSRobert Gordon 
21177c478bd9Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
21187c478bd9Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
21197c478bd9Sstevel@tonic-gate 					hca->inuse = FALSE;
21207c478bd9Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
21217c478bd9Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
21227c478bd9Sstevel@tonic-gate 				}
21237c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
21247c478bd9Sstevel@tonic-gate 			}
21257c478bd9Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
21267c478bd9Sstevel@tonic-gate 		}
21277c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
21287c478bd9Sstevel@tonic-gate 	}
21290a701b1eSRobert Gordon 
21307c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
21317c478bd9Sstevel@tonic-gate }
21327c478bd9Sstevel@tonic-gate 
21337c478bd9Sstevel@tonic-gate /*
2134065714dcSSiddheshwar Mahesh  * All sends are done under the protection of
2135065714dcSSiddheshwar Mahesh  * the wdesc->sendwait_lock. n_send_rbufs count
2136065714dcSSiddheshwar Mahesh  * is protected using the send_rbufs_lock.
2137065714dcSSiddheshwar Mahesh  * lock ordering is:
2138065714dcSSiddheshwar Mahesh  * sendwait_lock -> send_rbufs_lock
2139065714dcSSiddheshwar Mahesh  */
2140065714dcSSiddheshwar Mahesh 
2141065714dcSSiddheshwar Mahesh void
2142065714dcSSiddheshwar Mahesh rib_send_hold(rib_qp_t *qp)
2143065714dcSSiddheshwar Mahesh {
2144065714dcSSiddheshwar Mahesh 	mutex_enter(&qp->send_rbufs_lock);
2145065714dcSSiddheshwar Mahesh 	qp->n_send_rbufs++;
2146065714dcSSiddheshwar Mahesh 	mutex_exit(&qp->send_rbufs_lock);
2147065714dcSSiddheshwar Mahesh }
2148065714dcSSiddheshwar Mahesh 
2149065714dcSSiddheshwar Mahesh void
2150065714dcSSiddheshwar Mahesh rib_send_rele(rib_qp_t *qp)
2151065714dcSSiddheshwar Mahesh {
2152065714dcSSiddheshwar Mahesh 	mutex_enter(&qp->send_rbufs_lock);
2153065714dcSSiddheshwar Mahesh 	qp->n_send_rbufs--;
2154065714dcSSiddheshwar Mahesh 	if (qp->n_send_rbufs == 0)
2155065714dcSSiddheshwar Mahesh 		cv_signal(&qp->send_rbufs_cv);
2156065714dcSSiddheshwar Mahesh 	mutex_exit(&qp->send_rbufs_lock);
2157065714dcSSiddheshwar Mahesh }
2158065714dcSSiddheshwar Mahesh 
21599c86cdcdSSiddheshwar Mahesh void
21609c86cdcdSSiddheshwar Mahesh rib_recv_rele(rib_qp_t *qp)
21619c86cdcdSSiddheshwar Mahesh {
21629c86cdcdSSiddheshwar Mahesh 	mutex_enter(&qp->posted_rbufs_lock);
21639c86cdcdSSiddheshwar Mahesh 	qp->n_posted_rbufs--;
21649c86cdcdSSiddheshwar Mahesh 	if (qp->n_posted_rbufs == 0)
21659c86cdcdSSiddheshwar Mahesh 		cv_signal(&qp->posted_rbufs_cv);
21669c86cdcdSSiddheshwar Mahesh 	mutex_exit(&qp->posted_rbufs_lock);
21679c86cdcdSSiddheshwar Mahesh }
21689c86cdcdSSiddheshwar Mahesh 
2169065714dcSSiddheshwar Mahesh /*
21707c478bd9Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
21717c478bd9Sstevel@tonic-gate  * notification be it a successful or error completion, free the
21727c478bd9Sstevel@tonic-gate  * send_wid.
21737c478bd9Sstevel@tonic-gate  */
21747c478bd9Sstevel@tonic-gate static rdma_stat
21757c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
21767c478bd9Sstevel@tonic-gate {
21777c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
21787c478bd9Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
21797c478bd9Sstevel@tonic-gate 	int	i;
21807c478bd9Sstevel@tonic-gate 
21817c478bd9Sstevel@tonic-gate 	/*
21827c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
21837c478bd9Sstevel@tonic-gate 	 */
21847c478bd9Sstevel@tonic-gate 	ASSERT(wd != NULL);
21857c478bd9Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
21867c478bd9Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
21877c478bd9Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
21887c478bd9Sstevel@tonic-gate 		    ddi_get_lbolt();
21890a701b1eSRobert Gordon 
21907c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
21917c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
21927c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
21937c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
21947c478bd9Sstevel@tonic-gate 				;
21957c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
21967c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
21970a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__srvsendwait__timeout);
21980a701b1eSRobert Gordon 
21997c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22007c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
22017c478bd9Sstevel@tonic-gate 				break;
22027c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
22037c478bd9Sstevel@tonic-gate 				break;
22047c478bd9Sstevel@tonic-gate 			}
22057c478bd9Sstevel@tonic-gate 		} else {
22067c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
22077c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
22087c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
22097c478bd9Sstevel@tonic-gate 				;
22107c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
22117c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
22120a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__timeout);
22130a701b1eSRobert Gordon 
22147c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22157c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
22167c478bd9Sstevel@tonic-gate 				break;
22177c478bd9Sstevel@tonic-gate 			case 0:		/* interrupted */
22180a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__intr);
22190a701b1eSRobert Gordon 
22207c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22217c478bd9Sstevel@tonic-gate 				error = RDMA_INTR;
22227c478bd9Sstevel@tonic-gate 				break;
22237c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
22247c478bd9Sstevel@tonic-gate 				break;
22257c478bd9Sstevel@tonic-gate 			}
22267c478bd9Sstevel@tonic-gate 		}
22277c478bd9Sstevel@tonic-gate 	}
22287c478bd9Sstevel@tonic-gate 
22297c478bd9Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
22307c478bd9Sstevel@tonic-gate 		/* got send completion */
22317c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
2232065714dcSSiddheshwar Mahesh 			switch (wd->status) {
2233065714dcSSiddheshwar Mahesh 			case RDMA_CONNLOST:
2234f837ee4aSSiddheshwar Mahesh 				error = RDMA_CONNLOST;
2235065714dcSSiddheshwar Mahesh 				break;
2236065714dcSSiddheshwar Mahesh 			default:
2237065714dcSSiddheshwar Mahesh 				error = RDMA_FAILED;
2238065714dcSSiddheshwar Mahesh 				break;
2239f837ee4aSSiddheshwar Mahesh 			}
22407c478bd9Sstevel@tonic-gate 		}
22417c478bd9Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
22427c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
224311606941Sjwahlig 			    (void *)(uintptr_t)wd->sbufaddr[i]);
22447c478bd9Sstevel@tonic-gate 		}
2245065714dcSSiddheshwar Mahesh 
2246065714dcSSiddheshwar Mahesh 		rib_send_rele(qp);
2247065714dcSSiddheshwar Mahesh 
22487c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
22497c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
2250065714dcSSiddheshwar Mahesh 
22517c478bd9Sstevel@tonic-gate 	} else {
22527c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
22537c478bd9Sstevel@tonic-gate 	}
22547c478bd9Sstevel@tonic-gate 	return (error);
22557c478bd9Sstevel@tonic-gate }
22567c478bd9Sstevel@tonic-gate 
22577c478bd9Sstevel@tonic-gate static struct send_wid *
22587c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
22597c478bd9Sstevel@tonic-gate {
22607c478bd9Sstevel@tonic-gate 	struct send_wid	*wd;
22617c478bd9Sstevel@tonic-gate 
22627c478bd9Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
22637c478bd9Sstevel@tonic-gate 	wd->xid = xid;
22647c478bd9Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
22657c478bd9Sstevel@tonic-gate 	wd->qp = qp;
22667c478bd9Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
22677c478bd9Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
22687c478bd9Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
22697c478bd9Sstevel@tonic-gate 
22707c478bd9Sstevel@tonic-gate 	return (wd);
22717c478bd9Sstevel@tonic-gate }
22727c478bd9Sstevel@tonic-gate 
22737c478bd9Sstevel@tonic-gate static int
22747c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
22757c478bd9Sstevel@tonic-gate {
22767c478bd9Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
22777c478bd9Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
22787c478bd9Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
22797c478bd9Sstevel@tonic-gate 
22807c478bd9Sstevel@tonic-gate 	return (0);
22817c478bd9Sstevel@tonic-gate }
22827c478bd9Sstevel@tonic-gate 
22837c478bd9Sstevel@tonic-gate static rdma_stat
22847c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
22857c478bd9Sstevel@tonic-gate {
22867c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
22877c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
22887c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
22897c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->replylist_lock);
22907c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
22917c478bd9Sstevel@tonic-gate 	}
22927c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
22937c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
22947c478bd9Sstevel@tonic-gate }
22957c478bd9Sstevel@tonic-gate 
22967c478bd9Sstevel@tonic-gate /*
22977c478bd9Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
22987c478bd9Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
22997c478bd9Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
23007c478bd9Sstevel@tonic-gate  */
23017c478bd9Sstevel@tonic-gate rdma_stat
23027c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
23030a701b1eSRobert Gordon 	int send_sig, int cv_sig, caddr_t *swid)
23047c478bd9Sstevel@tonic-gate {
23057c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
23067c478bd9Sstevel@tonic-gate 	struct clist	*clp;
23077c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
23087c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
23097c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
23107c478bd9Sstevel@tonic-gate 	int		i, nds;
23117c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
23127c478bd9Sstevel@tonic-gate 	uint_t		total_msg_size;
23130a701b1eSRobert Gordon 	rib_qp_t	*qp;
23140a701b1eSRobert Gordon 
23150a701b1eSRobert Gordon 	qp = ctoqp(conn);
23167c478bd9Sstevel@tonic-gate 
23177c478bd9Sstevel@tonic-gate 	ASSERT(cl != NULL);
23187c478bd9Sstevel@tonic-gate 
23197c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
23207c478bd9Sstevel@tonic-gate 
23217c478bd9Sstevel@tonic-gate 	nds = 0;
23227c478bd9Sstevel@tonic-gate 	total_msg_size = 0;
23237c478bd9Sstevel@tonic-gate 	clp = cl;
23247c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
23257c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
23260a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded);
23277c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
23287c478bd9Sstevel@tonic-gate 		}
23290a701b1eSRobert Gordon 		sgl[nds].ds_va = clp->w.c_saddr;
23307c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
23317c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
23327c478bd9Sstevel@tonic-gate 		total_msg_size += clp->c_len;
23337c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
23347c478bd9Sstevel@tonic-gate 		nds++;
23357c478bd9Sstevel@tonic-gate 	}
23367c478bd9Sstevel@tonic-gate 
23377c478bd9Sstevel@tonic-gate 	if (send_sig) {
23387c478bd9Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
23397c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
23407c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
23410a701b1eSRobert Gordon 		*swid = (caddr_t)wdesc;
2342065714dcSSiddheshwar Mahesh 		tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2343065714dcSSiddheshwar Mahesh 		mutex_enter(&wdesc->sendwait_lock);
23447c478bd9Sstevel@tonic-gate 		wdesc->nsbufs = nds;
23457c478bd9Sstevel@tonic-gate 		for (i = 0; i < nds; i++) {
23467c478bd9Sstevel@tonic-gate 			wdesc->sbufaddr[i] = sgl[i].ds_va;
23477c478bd9Sstevel@tonic-gate 		}
2348065714dcSSiddheshwar Mahesh 	} else {
2349065714dcSSiddheshwar Mahesh 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2350065714dcSSiddheshwar Mahesh 		*swid = NULL;
2351065714dcSSiddheshwar Mahesh 		tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
2352065714dcSSiddheshwar Mahesh 	}
23537c478bd9Sstevel@tonic-gate 
23547c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
23557c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
23567c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
23577c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
23587c478bd9Sstevel@tonic-gate 
23597c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
23600a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
23617c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
23627c478bd9Sstevel@tonic-gate 	}
23630a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
23647c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
23650a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
23660a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
23677c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2368065714dcSSiddheshwar Mahesh 		if (send_sig) {
23697c478bd9Sstevel@tonic-gate 			for (i = 0; i < nds; i++) {
23707c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, SEND_BUFFER,
237111606941Sjwahlig 				    (void *)(uintptr_t)wdesc->sbufaddr[i]);
23727c478bd9Sstevel@tonic-gate 			}
2373065714dcSSiddheshwar Mahesh 			mutex_exit(&wdesc->sendwait_lock);
23747c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wdesc);
2375065714dcSSiddheshwar Mahesh 		}
23760a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
23777c478bd9Sstevel@tonic-gate 	}
2378065714dcSSiddheshwar Mahesh 
23797c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
23807c478bd9Sstevel@tonic-gate 
23817c478bd9Sstevel@tonic-gate 	if (send_sig) {
2382065714dcSSiddheshwar Mahesh 		rib_send_hold(qp);
2383065714dcSSiddheshwar Mahesh 		mutex_exit(&wdesc->sendwait_lock);
23847c478bd9Sstevel@tonic-gate 		if (cv_sig) {
23857c478bd9Sstevel@tonic-gate 			/*
23867c478bd9Sstevel@tonic-gate 			 * cv_wait for send to complete.
23877c478bd9Sstevel@tonic-gate 			 * We can fail due to a timeout or signal or
23887c478bd9Sstevel@tonic-gate 			 * unsuccessful send.
23897c478bd9Sstevel@tonic-gate 			 */
23907c478bd9Sstevel@tonic-gate 			ret = rib_sendwait(qp, wdesc);
23910a701b1eSRobert Gordon 
23927c478bd9Sstevel@tonic-gate 			return (ret);
23937c478bd9Sstevel@tonic-gate 		}
23947c478bd9Sstevel@tonic-gate 	}
23957c478bd9Sstevel@tonic-gate 
23967c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
23977c478bd9Sstevel@tonic-gate }
23987c478bd9Sstevel@tonic-gate 
23990a701b1eSRobert Gordon 
24007c478bd9Sstevel@tonic-gate rdma_stat
24017c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
24027c478bd9Sstevel@tonic-gate {
24037c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
24040a701b1eSRobert Gordon 	caddr_t		wd;
24057c478bd9Sstevel@tonic-gate 
24067c478bd9Sstevel@tonic-gate 	/* send-wait & cv_signal */
24070a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
24087c478bd9Sstevel@tonic-gate 	return (ret);
24097c478bd9Sstevel@tonic-gate }
24107c478bd9Sstevel@tonic-gate 
24117c478bd9Sstevel@tonic-gate /*
2412065714dcSSiddheshwar Mahesh  * Deprecated/obsolete interface not used currently
2413065714dcSSiddheshwar Mahesh  * but earlier used for READ-READ protocol.
24147c478bd9Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
24157c478bd9Sstevel@tonic-gate  */
24167c478bd9Sstevel@tonic-gate rdma_stat
24177c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
24187c478bd9Sstevel@tonic-gate {
24197c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
24207c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
2421d3d50737SRafael Vanoni 	clock_t cv_wait_ret;
24220a701b1eSRobert Gordon 	caddr_t *wid = NULL;
24237c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
24247c478bd9Sstevel@tonic-gate 
24257c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
24267c478bd9Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
24277c478bd9Sstevel@tonic-gate 
24287c478bd9Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
24290a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
24307c478bd9Sstevel@tonic-gate 
24310a701b1eSRobert Gordon 	if (ret != RDMA_SUCCESS) {
24320a701b1eSRobert Gordon 		rdma_done_rm(qp, rd);
24330a701b1eSRobert Gordon 	} else {
24347c478bd9Sstevel@tonic-gate 		/*
24357c478bd9Sstevel@tonic-gate 		 * Wait for RDMA_DONE from remote end
24367c478bd9Sstevel@tonic-gate 		 */
2437d3d50737SRafael Vanoni 		cv_wait_ret = cv_reltimedwait(&rd->rdma_done_cv,
2438d3d50737SRafael Vanoni 		    &qp->rdlist_lock, drv_usectohz(REPLY_WAIT_TIME * 1000000),
2439d3d50737SRafael Vanoni 		    TR_CLOCK_TICK);
24400a701b1eSRobert Gordon 
24417c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, rd);
24420a701b1eSRobert Gordon 
24437c478bd9Sstevel@tonic-gate 		if (cv_wait_ret < 0) {
24447c478bd9Sstevel@tonic-gate 			ret = RDMA_TIMEDOUT;
24450a701b1eSRobert Gordon 		}
24467c478bd9Sstevel@tonic-gate 	}
24477c478bd9Sstevel@tonic-gate 
24487c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
24497c478bd9Sstevel@tonic-gate 	return (ret);
24507c478bd9Sstevel@tonic-gate }
24517c478bd9Sstevel@tonic-gate 
24527c478bd9Sstevel@tonic-gate static struct recv_wid *
24537c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
24547c478bd9Sstevel@tonic-gate {
24557c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
24587c478bd9Sstevel@tonic-gate 	rwid->xid = msgid;
24597c478bd9Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
24607c478bd9Sstevel@tonic-gate 	rwid->qp = qp;
24617c478bd9Sstevel@tonic-gate 
24627c478bd9Sstevel@tonic-gate 	return (rwid);
24637c478bd9Sstevel@tonic-gate }
24647c478bd9Sstevel@tonic-gate 
24657c478bd9Sstevel@tonic-gate static void
24667c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
24677c478bd9Sstevel@tonic-gate {
24687c478bd9Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
24697c478bd9Sstevel@tonic-gate }
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate rdma_stat
24727c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
24737c478bd9Sstevel@tonic-gate {
24747c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
24757c478bd9Sstevel@tonic-gate 	struct clist	*clp = cl;
24767c478bd9Sstevel@tonic-gate 	struct reply	*rep;
24777c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
24787c478bd9Sstevel@tonic-gate 	int		nds;
24797c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
24807c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
24817c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
24827c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
24837c478bd9Sstevel@tonic-gate 
24847c478bd9Sstevel@tonic-gate 	/*
24857c478bd9Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
24867c478bd9Sstevel@tonic-gate 	 */
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate 	nds = 0;
24897c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
24907c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
24917c478bd9Sstevel@tonic-gate 			ret = RDMA_FAILED;
24927c478bd9Sstevel@tonic-gate 			goto done;
24937c478bd9Sstevel@tonic-gate 		}
24940a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
24957c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
24967c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
24977c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
24987c478bd9Sstevel@tonic-gate 		nds++;
24997c478bd9Sstevel@tonic-gate 	}
25007c478bd9Sstevel@tonic-gate 
25017c478bd9Sstevel@tonic-gate 	if (nds != 1) {
25027c478bd9Sstevel@tonic-gate 		ret = RDMA_FAILED;
25037c478bd9Sstevel@tonic-gate 		goto done;
25047c478bd9Sstevel@tonic-gate 	}
25050a701b1eSRobert Gordon 
25067c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
25077c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
25087c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
25097c478bd9Sstevel@tonic-gate 
25107c478bd9Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
25117c478bd9Sstevel@tonic-gate 	if (rwid) {
251211606941Sjwahlig 		recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid;
25137c478bd9Sstevel@tonic-gate 	} else {
25147c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
25157c478bd9Sstevel@tonic-gate 		goto done;
25167c478bd9Sstevel@tonic-gate 	}
25177c478bd9Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
25187c478bd9Sstevel@tonic-gate 	if (!rep) {
25197c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
25207c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
25217c478bd9Sstevel@tonic-gate 		goto done;
25227c478bd9Sstevel@tonic-gate 	}
25237c478bd9Sstevel@tonic-gate 
25247c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
25250a701b1eSRobert Gordon 
25260a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
25277c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
25287c478bd9Sstevel@tonic-gate 	}
25290a701b1eSRobert Gordon 
25300a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
25317c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
25320a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
25330a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
25347c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
25357c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
25367c478bd9Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
25370a701b1eSRobert Gordon 		ret = RDMA_CONNLOST;
25387c478bd9Sstevel@tonic-gate 		goto done;
25397c478bd9Sstevel@tonic-gate 	}
25409c86cdcdSSiddheshwar Mahesh 
25419c86cdcdSSiddheshwar Mahesh 	mutex_enter(&qp->posted_rbufs_lock);
25429c86cdcdSSiddheshwar Mahesh 	qp->n_posted_rbufs++;
25439c86cdcdSSiddheshwar Mahesh 	mutex_exit(&qp->posted_rbufs_lock);
25449c86cdcdSSiddheshwar Mahesh 
25457c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
25467c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
25477c478bd9Sstevel@tonic-gate 
25487c478bd9Sstevel@tonic-gate done:
25497c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
25500a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
25510a701b1eSRobert Gordon 		    (void *)(uintptr_t)clp->w.c_saddr3);
25527c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
25537c478bd9Sstevel@tonic-gate 	}
25547c478bd9Sstevel@tonic-gate 	return (ret);
25557c478bd9Sstevel@tonic-gate }
25567c478bd9Sstevel@tonic-gate 
25577c478bd9Sstevel@tonic-gate rdma_stat
25587c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
25597c478bd9Sstevel@tonic-gate {
25607c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25617c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
25627c478bd9Sstevel@tonic-gate 	int		nds;
25637c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
25647c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
25657c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
25667c478bd9Sstevel@tonic-gate 
25677c478bd9Sstevel@tonic-gate 	nds = 0;
25687c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
25697c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
25707c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
25717c478bd9Sstevel@tonic-gate 		}
25720a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
25737c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
25747c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
25757c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
25767c478bd9Sstevel@tonic-gate 		nds++;
25777c478bd9Sstevel@tonic-gate 	}
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate 	if (nds != 1) {
25800a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
25810a701b1eSRobert Gordon 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
25820a701b1eSRobert Gordon 
25837c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
25847c478bd9Sstevel@tonic-gate 	}
25850a701b1eSRobert Gordon 
25867c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
25877c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
25887c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
25897c478bd9Sstevel@tonic-gate 
25907c478bd9Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
259111606941Sjwahlig 	/* Use s_recvp's addr as wr id */
259211606941Sjwahlig 	recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp;
25937c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
25940a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
25957c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
25967c478bd9Sstevel@tonic-gate 	}
25970a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
25987c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
25990a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
26000a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
26017c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
260211606941Sjwahlig 		rib_rbuf_free(conn, RECV_BUFFER,
260311606941Sjwahlig 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
26047c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
26050a701b1eSRobert Gordon 
26060a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
26077c478bd9Sstevel@tonic-gate 	}
26087c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
26117c478bd9Sstevel@tonic-gate }
26127c478bd9Sstevel@tonic-gate 
26137c478bd9Sstevel@tonic-gate /* Client */
26147c478bd9Sstevel@tonic-gate rdma_stat
26157c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
26167c478bd9Sstevel@tonic-gate {
26177c478bd9Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
26187c478bd9Sstevel@tonic-gate }
26197c478bd9Sstevel@tonic-gate 
26200a701b1eSRobert Gordon /* Client */
26210a701b1eSRobert Gordon rdma_stat
26220a701b1eSRobert Gordon rib_post_resp_remove(CONN* conn, uint32_t msgid)
26230a701b1eSRobert Gordon {
26240a701b1eSRobert Gordon 	rib_qp_t	*qp = ctoqp(conn);
26250a701b1eSRobert Gordon 	struct reply	*rep;
26260a701b1eSRobert Gordon 
26270a701b1eSRobert Gordon 	mutex_enter(&qp->replylist_lock);
26280a701b1eSRobert Gordon 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
26290a701b1eSRobert Gordon 		if (rep->xid == msgid) {
26300a701b1eSRobert Gordon 			if (rep->vaddr_cq) {
26310a701b1eSRobert Gordon 				rib_rbuf_free(conn, RECV_BUFFER,
26320a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
26330a701b1eSRobert Gordon 			}
26340a701b1eSRobert Gordon 			(void) rib_remreply(qp, rep);
26350a701b1eSRobert Gordon 			break;
26360a701b1eSRobert Gordon 		}
26370a701b1eSRobert Gordon 	}
26380a701b1eSRobert Gordon 	mutex_exit(&qp->replylist_lock);
26390a701b1eSRobert Gordon 
26400a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
26410a701b1eSRobert Gordon }
26420a701b1eSRobert Gordon 
26437c478bd9Sstevel@tonic-gate /* Server */
26447c478bd9Sstevel@tonic-gate rdma_stat
26457c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
26467c478bd9Sstevel@tonic-gate {
26477c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
26487c478bd9Sstevel@tonic-gate 
26497c478bd9Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
26507c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
26517c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs++;
26527c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
26537c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
26547c478bd9Sstevel@tonic-gate 	}
26557c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
26567c478bd9Sstevel@tonic-gate }
26577c478bd9Sstevel@tonic-gate 
26587c478bd9Sstevel@tonic-gate /*
26597c478bd9Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
26607c478bd9Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
26617c478bd9Sstevel@tonic-gate  */
26627c478bd9Sstevel@tonic-gate rdma_stat
26637c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
26647c478bd9Sstevel@tonic-gate {
26657c478bd9Sstevel@tonic-gate 	struct reply *rep = NULL;
26667c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
26677c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
26687c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
26697c478bd9Sstevel@tonic-gate 
26707c478bd9Sstevel@tonic-gate 	/*
26717c478bd9Sstevel@tonic-gate 	 * Find the reply structure for this msgid
26727c478bd9Sstevel@tonic-gate 	 */
26737c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
26747c478bd9Sstevel@tonic-gate 
26757c478bd9Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
26767c478bd9Sstevel@tonic-gate 		if (rep->xid == msgid)
26777c478bd9Sstevel@tonic-gate 			break;
26787c478bd9Sstevel@tonic-gate 	}
26790a701b1eSRobert Gordon 
26807c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
26817c478bd9Sstevel@tonic-gate 		/*
26827c478bd9Sstevel@tonic-gate 		 * If message not yet received, wait.
26837c478bd9Sstevel@tonic-gate 		 */
26847c478bd9Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
26857c478bd9Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
26867c478bd9Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
26870a701b1eSRobert Gordon 
26887c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
26897c478bd9Sstevel@tonic-gate 			    &qp->replylist_lock, timout)) > 0 &&
26900a701b1eSRobert Gordon 			    rep->status == (uint_t)REPLY_WAIT)
26910a701b1eSRobert Gordon 				;
26927c478bd9Sstevel@tonic-gate 
26937c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
26947c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
26957c478bd9Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
26967c478bd9Sstevel@tonic-gate 				break;
26977c478bd9Sstevel@tonic-gate 			case 0:
26987c478bd9Sstevel@tonic-gate 				ret = RDMA_INTR;
26997c478bd9Sstevel@tonic-gate 				break;
27007c478bd9Sstevel@tonic-gate 			default:
27017c478bd9Sstevel@tonic-gate 				break;
27027c478bd9Sstevel@tonic-gate 			}
27037c478bd9Sstevel@tonic-gate 		}
27047c478bd9Sstevel@tonic-gate 
27057c478bd9Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
27067c478bd9Sstevel@tonic-gate 			struct clist *cl = NULL;
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 			/*
27097c478bd9Sstevel@tonic-gate 			 * Got message successfully
27107c478bd9Sstevel@tonic-gate 			 */
27117c478bd9Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
271211606941Sjwahlig 			    (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL);
27137c478bd9Sstevel@tonic-gate 			*clp = cl;
27147c478bd9Sstevel@tonic-gate 		} else {
27157c478bd9Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
27167c478bd9Sstevel@tonic-gate 				/*
27177c478bd9Sstevel@tonic-gate 				 * Got error in reply message. Free
27187c478bd9Sstevel@tonic-gate 				 * recv buffer here.
27197c478bd9Sstevel@tonic-gate 				 */
27207c478bd9Sstevel@tonic-gate 				ret = rep->status;
27217c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
272211606941Sjwahlig 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
27237c478bd9Sstevel@tonic-gate 			}
27247c478bd9Sstevel@tonic-gate 		}
27257c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
27267c478bd9Sstevel@tonic-gate 	} else {
27277c478bd9Sstevel@tonic-gate 		/*
27287c478bd9Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
27297c478bd9Sstevel@tonic-gate 		 * reply wait list.
27307c478bd9Sstevel@tonic-gate 		 */
27317c478bd9Sstevel@tonic-gate 		ret = RDMA_INVAL;
27320a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__nomatchxid2);
27337c478bd9Sstevel@tonic-gate 	}
27347c478bd9Sstevel@tonic-gate 
27357c478bd9Sstevel@tonic-gate 	/*
27367c478bd9Sstevel@tonic-gate 	 * Done.
27377c478bd9Sstevel@tonic-gate 	 */
27387c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
27397c478bd9Sstevel@tonic-gate 	return (ret);
27407c478bd9Sstevel@tonic-gate }
27417c478bd9Sstevel@tonic-gate 
27427c478bd9Sstevel@tonic-gate /*
27437c478bd9Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
27447c478bd9Sstevel@tonic-gate  */
27457c478bd9Sstevel@tonic-gate rdma_stat
27467c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
27477c478bd9Sstevel@tonic-gate {
27487c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
27497c478bd9Sstevel@tonic-gate 	int		cv_sig;
27507c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
27517c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
27527c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
27537c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
27547c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
27550a701b1eSRobert Gordon 	uint64_t	n_writes = 0;
27567c478bd9Sstevel@tonic-gate 
27577c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
27587c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
27597c478bd9Sstevel@tonic-gate 	}
27607c478bd9Sstevel@tonic-gate 
27610a701b1eSRobert Gordon 	while ((cl != NULL)) {
27620a701b1eSRobert Gordon 		if (cl->c_len > 0) {
27637c478bd9Sstevel@tonic-gate 			bzero(&tx_wr, sizeof (ibt_send_wr_t));
27640a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr;
27650a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_rkey =
27660a701b1eSRobert Gordon 			    cl->c_dmemhandle.mrc_rmr; /* rkey */
27670a701b1eSRobert Gordon 			sgl[0].ds_va = cl->w.c_saddr;
27680a701b1eSRobert Gordon 			sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
27690a701b1eSRobert Gordon 			sgl[0].ds_len = cl->c_len;
27707c478bd9Sstevel@tonic-gate 
27717c478bd9Sstevel@tonic-gate 			if (wait) {
27727c478bd9Sstevel@tonic-gate 				cv_sig = 1;
27737c478bd9Sstevel@tonic-gate 			} else {
27740a701b1eSRobert Gordon 				if (n_writes > max_unsignaled_rws) {
27750a701b1eSRobert Gordon 					n_writes = 0;
27760a701b1eSRobert Gordon 					cv_sig = 1;
27770a701b1eSRobert Gordon 				} else {
27787c478bd9Sstevel@tonic-gate 					cv_sig = 0;
27797c478bd9Sstevel@tonic-gate 				}
27800a701b1eSRobert Gordon 			}
27817c478bd9Sstevel@tonic-gate 
2782065714dcSSiddheshwar Mahesh 			if (cv_sig) {
2783065714dcSSiddheshwar Mahesh 				tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
27847c478bd9Sstevel@tonic-gate 				wdesc = rib_init_sendwait(0, cv_sig, qp);
278511606941Sjwahlig 				tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2786065714dcSSiddheshwar Mahesh 				mutex_enter(&wdesc->sendwait_lock);
2787065714dcSSiddheshwar Mahesh 			} else {
2788065714dcSSiddheshwar Mahesh 				tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2789065714dcSSiddheshwar Mahesh 				tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
2790065714dcSSiddheshwar Mahesh 			}
27917c478bd9Sstevel@tonic-gate 			tx_wr.wr_opcode = IBT_WRC_RDMAW;
27927c478bd9Sstevel@tonic-gate 			tx_wr.wr_trans = IBT_RC_SRV;
27930a701b1eSRobert Gordon 			tx_wr.wr_nds = 1;
27947c478bd9Sstevel@tonic-gate 			tx_wr.wr_sgl = sgl;
27957c478bd9Sstevel@tonic-gate 
27967c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
27970a701b1eSRobert Gordon 			if (conn->c_state == C_CONNECTED) {
27980a701b1eSRobert Gordon 				ibt_status =
27990a701b1eSRobert Gordon 				    ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
28007c478bd9Sstevel@tonic-gate 			}
28010a701b1eSRobert Gordon 			if (conn->c_state != C_CONNECTED ||
28027c478bd9Sstevel@tonic-gate 			    ibt_status != IBT_SUCCESS) {
28030a701b1eSRobert Gordon 				if (conn->c_state != C_DISCONN_PEND)
28040a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
28057c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
2806065714dcSSiddheshwar Mahesh 				if (cv_sig) {
2807065714dcSSiddheshwar Mahesh 					mutex_exit(&wdesc->sendwait_lock);
28087c478bd9Sstevel@tonic-gate 					(void) rib_free_sendwait(wdesc);
2809065714dcSSiddheshwar Mahesh 				}
28100a701b1eSRobert Gordon 				return (RDMA_CONNLOST);
28117c478bd9Sstevel@tonic-gate 			}
2812065714dcSSiddheshwar Mahesh 
28137c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
28147c478bd9Sstevel@tonic-gate 
28157c478bd9Sstevel@tonic-gate 			/*
28167c478bd9Sstevel@tonic-gate 			 * Wait for send to complete
28177c478bd9Sstevel@tonic-gate 			 */
2818065714dcSSiddheshwar Mahesh 			if (cv_sig) {
2819065714dcSSiddheshwar Mahesh 
2820065714dcSSiddheshwar Mahesh 				rib_send_hold(qp);
28210a701b1eSRobert Gordon 				mutex_exit(&wdesc->sendwait_lock);
2822065714dcSSiddheshwar Mahesh 
2823065714dcSSiddheshwar Mahesh 				ret = rib_sendwait(qp, wdesc);
2824065714dcSSiddheshwar Mahesh 				if (ret != 0)
2825065714dcSSiddheshwar Mahesh 					return (ret);
28260a701b1eSRobert Gordon 			}
28270a701b1eSRobert Gordon 			n_writes ++;
28280a701b1eSRobert Gordon 		}
28290a701b1eSRobert Gordon 		cl = cl->c_next;
28307c478bd9Sstevel@tonic-gate 	}
28317c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
28327c478bd9Sstevel@tonic-gate }
28337c478bd9Sstevel@tonic-gate 
28347c478bd9Sstevel@tonic-gate /*
28357c478bd9Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
28367c478bd9Sstevel@tonic-gate  */
28377c478bd9Sstevel@tonic-gate rdma_stat
28387c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
28397c478bd9Sstevel@tonic-gate {
28407c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
2841065714dcSSiddheshwar Mahesh 	int		cv_sig = 0;
28420a701b1eSRobert Gordon 	ibt_wr_ds_t	sgl;
28437c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
28447c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
28457c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
28467c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
28497c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
28507c478bd9Sstevel@tonic-gate 	}
28517c478bd9Sstevel@tonic-gate 
28520a701b1eSRobert Gordon 	while (cl != NULL) {
28537c478bd9Sstevel@tonic-gate 		bzero(&rx_wr, sizeof (ibt_send_wr_t));
28547c478bd9Sstevel@tonic-gate 		/*
28557c478bd9Sstevel@tonic-gate 		 * Remote address is at the head chunk item in list.
28567c478bd9Sstevel@tonic-gate 		 */
28570a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr;
28580a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr;
28597c478bd9Sstevel@tonic-gate 
28600a701b1eSRobert Gordon 		sgl.ds_va = cl->u.c_daddr;
28610a701b1eSRobert Gordon 		sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
28620a701b1eSRobert Gordon 		sgl.ds_len = cl->c_len;
28637c478bd9Sstevel@tonic-gate 
2864065714dcSSiddheshwar Mahesh 		/*
2865065714dcSSiddheshwar Mahesh 		 * If there are multiple chunks to be read, and
2866065714dcSSiddheshwar Mahesh 		 * wait is set, ask for signal only for the last chunk
2867065714dcSSiddheshwar Mahesh 		 * and wait only on the last chunk. The completion of
2868065714dcSSiddheshwar Mahesh 		 * RDMA_READ on last chunk ensures that reads on all
2869065714dcSSiddheshwar Mahesh 		 * previous chunks are also completed.
2870065714dcSSiddheshwar Mahesh 		 */
2871065714dcSSiddheshwar Mahesh 		if (wait && (cl->c_next == NULL)) {
28727c478bd9Sstevel@tonic-gate 			cv_sig = 1;
2873065714dcSSiddheshwar Mahesh 			wdesc = rib_init_sendwait(0, cv_sig, qp);
2874065714dcSSiddheshwar Mahesh 			rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2875065714dcSSiddheshwar Mahesh 			rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
2876065714dcSSiddheshwar Mahesh 			mutex_enter(&wdesc->sendwait_lock);
28777c478bd9Sstevel@tonic-gate 		} else {
28787c478bd9Sstevel@tonic-gate 			rx_wr.wr_flags = IBT_WR_NO_FLAGS;
2879065714dcSSiddheshwar Mahesh 			rx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
28807c478bd9Sstevel@tonic-gate 		}
28817c478bd9Sstevel@tonic-gate 		rx_wr.wr_opcode = IBT_WRC_RDMAR;
28827c478bd9Sstevel@tonic-gate 		rx_wr.wr_trans = IBT_RC_SRV;
28830a701b1eSRobert Gordon 		rx_wr.wr_nds = 1;
28840a701b1eSRobert Gordon 		rx_wr.wr_sgl = &sgl;
28857c478bd9Sstevel@tonic-gate 
28867c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
28870a701b1eSRobert Gordon 		if (conn->c_state == C_CONNECTED) {
28887c478bd9Sstevel@tonic-gate 			ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
28897c478bd9Sstevel@tonic-gate 		}
28900a701b1eSRobert Gordon 		if (conn->c_state != C_CONNECTED ||
28917c478bd9Sstevel@tonic-gate 		    ibt_status != IBT_SUCCESS) {
28920a701b1eSRobert Gordon 			if (conn->c_state != C_DISCONN_PEND)
28930a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
28947c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
2895065714dcSSiddheshwar Mahesh 			if (wait && (cl->c_next == NULL)) {
2896065714dcSSiddheshwar Mahesh 				mutex_exit(&wdesc->sendwait_lock);
28977c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wdesc);
2898065714dcSSiddheshwar Mahesh 			}
28990a701b1eSRobert Gordon 			return (RDMA_CONNLOST);
29007c478bd9Sstevel@tonic-gate 		}
2901065714dcSSiddheshwar Mahesh 
29027c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
29037c478bd9Sstevel@tonic-gate 
29047c478bd9Sstevel@tonic-gate 		/*
29050a701b1eSRobert Gordon 		 * Wait for send to complete if this is the
29060a701b1eSRobert Gordon 		 * last item in the list.
29077c478bd9Sstevel@tonic-gate 		 */
29080a701b1eSRobert Gordon 		if (wait && cl->c_next == NULL) {
2909065714dcSSiddheshwar Mahesh 			rib_send_hold(qp);
29100a701b1eSRobert Gordon 			mutex_exit(&wdesc->sendwait_lock);
2911065714dcSSiddheshwar Mahesh 
2912065714dcSSiddheshwar Mahesh 			ret = rib_sendwait(qp, wdesc);
2913065714dcSSiddheshwar Mahesh 
2914065714dcSSiddheshwar Mahesh 			if (ret != 0)
2915065714dcSSiddheshwar Mahesh 				return (ret);
29160a701b1eSRobert Gordon 		}
29170a701b1eSRobert Gordon 		cl = cl->c_next;
29180a701b1eSRobert Gordon 	}
29197c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
29207c478bd9Sstevel@tonic-gate }
29217c478bd9Sstevel@tonic-gate 
29227c478bd9Sstevel@tonic-gate /*
29237c478bd9Sstevel@tonic-gate  * rib_srv_cm_handler()
29247c478bd9Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
29257c478bd9Sstevel@tonic-gate  */
29267c478bd9Sstevel@tonic-gate /* ARGSUSED */
29277c478bd9Sstevel@tonic-gate static ibt_cm_status_t
29287c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
29297c478bd9Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
29307c478bd9Sstevel@tonic-gate 	ibt_priv_data_len_t len)
29317c478bd9Sstevel@tonic-gate {
29327c478bd9Sstevel@tonic-gate 	queue_t		*q;
29337c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
29347c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
29357c478bd9Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
29367c478bd9Sstevel@tonic-gate 	int		i;
29377c478bd9Sstevel@tonic-gate 	struct clist	cl;
29380a701b1eSRobert Gordon 	rdma_buf_t	rdbuf = {0};
29397c478bd9Sstevel@tonic-gate 	void		*buf = NULL;
29407c478bd9Sstevel@tonic-gate 	CONN		*conn;
29410a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipinfo;
29420a701b1eSRobert Gordon 	struct sockaddr_in *s;
29430a701b1eSRobert Gordon 	struct sockaddr_in6 *s6;
29440a701b1eSRobert Gordon 	int sin_size = sizeof (struct sockaddr_in);
29450a701b1eSRobert Gordon 	int in_size = sizeof (struct in_addr);
29460a701b1eSRobert Gordon 	int sin6_size = sizeof (struct sockaddr_in6);
29477c478bd9Sstevel@tonic-gate 
29487c478bd9Sstevel@tonic-gate 	ASSERT(any != NULL);
29497c478bd9Sstevel@tonic-gate 	ASSERT(event != NULL);
29507c478bd9Sstevel@tonic-gate 
29517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	hca = (rib_hca_t *)any;
29527c478bd9Sstevel@tonic-gate 
29537c478bd9Sstevel@tonic-gate 	/* got a connection request */
29547c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
29557c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
29567c478bd9Sstevel@tonic-gate 		/*
29577c478bd9Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
29587c478bd9Sstevel@tonic-gate 		 */
29597c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
29607c478bd9Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
29617c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
29627c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
29637c478bd9Sstevel@tonic-gate 		}
29647c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
29657c478bd9Sstevel@tonic-gate 
29667c478bd9Sstevel@tonic-gate 		/*
29677c478bd9Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
29687c478bd9Sstevel@tonic-gate 		 * timeout on us.
29697c478bd9Sstevel@tonic-gate 		 */
29707c478bd9Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
29717c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_timeout * 8, NULL, 0);
29727c478bd9Sstevel@tonic-gate 
29737c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
29747c478bd9Sstevel@tonic-gate 		q = rib_stat->q;
29757c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
29760a701b1eSRobert Gordon 
29777c478bd9Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
29787c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_prim_hca_port, &qp);
29790a701b1eSRobert Gordon 
29807c478bd9Sstevel@tonic-gate 		if (status) {
29817c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
29827c478bd9Sstevel@tonic-gate 		}
29837c478bd9Sstevel@tonic-gate 
29847c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
29850a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
29860a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
29877c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
29887c478bd9Sstevel@tonic-gate 
29897c478bd9Sstevel@tonic-gate 		/*
29907c478bd9Sstevel@tonic-gate 		 * Pre-posts RECV buffers
29917c478bd9Sstevel@tonic-gate 		 */
29927c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
29937c478bd9Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
29947c478bd9Sstevel@tonic-gate 			bzero(&rdbuf, sizeof (rdbuf));
29957c478bd9Sstevel@tonic-gate 			rdbuf.type = RECV_BUFFER;
29967c478bd9Sstevel@tonic-gate 			buf = rib_rbuf_alloc(conn, &rdbuf);
29977c478bd9Sstevel@tonic-gate 			if (buf == NULL) {
2998065714dcSSiddheshwar Mahesh 				/*
2999065714dcSSiddheshwar Mahesh 				 * A connection is not established yet.
3000065714dcSSiddheshwar Mahesh 				 * Just flush the channel. Buffers
3001065714dcSSiddheshwar Mahesh 				 * posted till now will error out with
3002065714dcSSiddheshwar Mahesh 				 * IBT_WC_WR_FLUSHED_ERR.
3003065714dcSSiddheshwar Mahesh 				 */
3004065714dcSSiddheshwar Mahesh 				(void) ibt_flush_channel(qp->qp_hdl);
30057c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
30067c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
30077c478bd9Sstevel@tonic-gate 			}
30087c478bd9Sstevel@tonic-gate 
30097c478bd9Sstevel@tonic-gate 			bzero(&cl, sizeof (cl));
30100a701b1eSRobert Gordon 			cl.w.c_saddr3 = (caddr_t)rdbuf.addr;
30117c478bd9Sstevel@tonic-gate 			cl.c_len = rdbuf.len;
30120a701b1eSRobert Gordon 			cl.c_smemhandle.mrc_lmr =
30130a701b1eSRobert Gordon 			    rdbuf.handle.mrc_lmr; /* lkey */
30147c478bd9Sstevel@tonic-gate 			cl.c_next = NULL;
30157c478bd9Sstevel@tonic-gate 			status = rib_post_recv(conn, &cl);
30167c478bd9Sstevel@tonic-gate 			if (status != RDMA_SUCCESS) {
3017065714dcSSiddheshwar Mahesh 				/*
3018065714dcSSiddheshwar Mahesh 				 * A connection is not established yet.
3019065714dcSSiddheshwar Mahesh 				 * Just flush the channel. Buffers
3020065714dcSSiddheshwar Mahesh 				 * posted till now will error out with
3021065714dcSSiddheshwar Mahesh 				 * IBT_WC_WR_FLUSHED_ERR.
3022065714dcSSiddheshwar Mahesh 				 */
3023065714dcSSiddheshwar Mahesh 				(void) ibt_flush_channel(qp->qp_hdl);
30247c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
30257c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
30267c478bd9Sstevel@tonic-gate 			}
30277c478bd9Sstevel@tonic-gate 		}
30287c478bd9Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
30297c478bd9Sstevel@tonic-gate 
30307c478bd9Sstevel@tonic-gate 		/*
30310a701b1eSRobert Gordon 		 * Get the address translation
30327c478bd9Sstevel@tonic-gate 		 */
30337c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
30347c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
30357c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
30367c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
30377c478bd9Sstevel@tonic-gate 		}
30387c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30397c478bd9Sstevel@tonic-gate 
30400a701b1eSRobert Gordon 		bzero(&ipinfo, sizeof (ibt_ip_cm_info_t));
30417c478bd9Sstevel@tonic-gate 
30420a701b1eSRobert Gordon 		if (ibt_get_ip_data(event->cm_priv_data_len,
30430a701b1eSRobert Gordon 		    event->cm_priv_data,
30440a701b1eSRobert Gordon 		    &ipinfo) != IBT_SUCCESS) {
30450a701b1eSRobert Gordon 
30460a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
30470a701b1eSRobert Gordon 		}
30480a701b1eSRobert Gordon 
30490a701b1eSRobert Gordon 		switch (ipinfo.src_addr.family) {
30500a701b1eSRobert Gordon 		case AF_INET:
30517c478bd9Sstevel@tonic-gate 
30527523bef8SSiddheshwar Mahesh 			conn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP) + 1,
30537523bef8SSiddheshwar Mahesh 			    KM_SLEEP);
30547523bef8SSiddheshwar Mahesh 			(void) strcpy(conn->c_netid, RIBNETID_TCP);
30557523bef8SSiddheshwar Mahesh 
30567c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
30577c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin_size;
30580a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
30590a701b1eSRobert Gordon 
30607c478bd9Sstevel@tonic-gate 			s = (struct sockaddr_in *)conn->c_raddr.buf;
30617c478bd9Sstevel@tonic-gate 			s->sin_family = AF_INET;
30620a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip4addr,
30630a701b1eSRobert Gordon 			    &s->sin_addr, in_size);
30640a701b1eSRobert Gordon 
30657523bef8SSiddheshwar Mahesh 			conn->c_laddr.maxlen =
30667523bef8SSiddheshwar Mahesh 			    conn->c_laddr.len = sin_size;
30677523bef8SSiddheshwar Mahesh 			conn->c_laddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
30687523bef8SSiddheshwar Mahesh 
30697523bef8SSiddheshwar Mahesh 			s = (struct sockaddr_in *)conn->c_laddr.buf;
30707523bef8SSiddheshwar Mahesh 			s->sin_family = AF_INET;
30717523bef8SSiddheshwar Mahesh 			bcopy((void *)&ipinfo.dst_addr.un.ip4addr,
30727523bef8SSiddheshwar Mahesh 			    &s->sin_addr, in_size);
30737523bef8SSiddheshwar Mahesh 
3074*0a4b0810SKaren Rochford 			conn->c_addrmask.maxlen = conn->c_addrmask.len =
3075*0a4b0810SKaren Rochford 			    sizeof (struct sockaddr_in);
3076*0a4b0810SKaren Rochford 			conn->c_addrmask.buf =
3077*0a4b0810SKaren Rochford 			    kmem_zalloc(conn->c_addrmask.len, KM_SLEEP);
3078*0a4b0810SKaren Rochford 			((struct sockaddr_in *)
3079*0a4b0810SKaren Rochford 			    conn->c_addrmask.buf)->sin_addr.s_addr =
3080*0a4b0810SKaren Rochford 			    (uint32_t)~0;
3081*0a4b0810SKaren Rochford 			((struct sockaddr_in *)
3082*0a4b0810SKaren Rochford 			    conn->c_addrmask.buf)->sin_family =
3083*0a4b0810SKaren Rochford 			    (sa_family_t)~0;
30840a701b1eSRobert Gordon 			break;
30850a701b1eSRobert Gordon 
30860a701b1eSRobert Gordon 		case AF_INET6:
30877c478bd9Sstevel@tonic-gate 
30887523bef8SSiddheshwar Mahesh 			conn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP6) + 1,
30897523bef8SSiddheshwar Mahesh 			    KM_SLEEP);
30907523bef8SSiddheshwar Mahesh 			(void) strcpy(conn->c_netid, RIBNETID_TCP6);
30917523bef8SSiddheshwar Mahesh 
30927c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
30937c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin6_size;
30940a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
30957c478bd9Sstevel@tonic-gate 
30967c478bd9Sstevel@tonic-gate 			s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
30977c478bd9Sstevel@tonic-gate 			s6->sin6_family = AF_INET6;
30980a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip6addr,
30990a701b1eSRobert Gordon 			    &s6->sin6_addr,
31007c478bd9Sstevel@tonic-gate 			    sizeof (struct in6_addr));
31017c478bd9Sstevel@tonic-gate 
31027523bef8SSiddheshwar Mahesh 			conn->c_laddr.maxlen =
31037523bef8SSiddheshwar Mahesh 			    conn->c_laddr.len = sin6_size;
31047523bef8SSiddheshwar Mahesh 			conn->c_laddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
31057523bef8SSiddheshwar Mahesh 
31067523bef8SSiddheshwar Mahesh 			s6 = (struct sockaddr_in6 *)conn->c_laddr.buf;
31077523bef8SSiddheshwar Mahesh 			s6->sin6_family = AF_INET6;
31087523bef8SSiddheshwar Mahesh 			bcopy((void *)&ipinfo.dst_addr.un.ip6addr,
31097523bef8SSiddheshwar Mahesh 			    &s6->sin6_addr,
31107523bef8SSiddheshwar Mahesh 			    sizeof (struct in6_addr));
31117523bef8SSiddheshwar Mahesh 
3112*0a4b0810SKaren Rochford 			conn->c_addrmask.maxlen = conn->c_addrmask.len =
3113*0a4b0810SKaren Rochford 			    sizeof (struct sockaddr_in6);
3114*0a4b0810SKaren Rochford 			conn->c_addrmask.buf =
3115*0a4b0810SKaren Rochford 			    kmem_zalloc(conn->c_addrmask.len, KM_SLEEP);
3116*0a4b0810SKaren Rochford 			(void) memset(&((struct sockaddr_in6 *)
3117*0a4b0810SKaren Rochford 			    conn->c_addrmask.buf)->sin6_addr, (uchar_t)~0,
3118*0a4b0810SKaren Rochford 			    sizeof (struct in6_addr));
3119*0a4b0810SKaren Rochford 			((struct sockaddr_in6 *)
3120*0a4b0810SKaren Rochford 			    conn->c_addrmask.buf)->sin6_family =
3121*0a4b0810SKaren Rochford 			    (sa_family_t)~0;
31220a701b1eSRobert Gordon 			break;
31230a701b1eSRobert Gordon 
31240a701b1eSRobert Gordon 		default:
31250a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
31267c478bd9Sstevel@tonic-gate 		}
31270a701b1eSRobert Gordon 
31287c478bd9Sstevel@tonic-gate 		break;
31297c478bd9Sstevel@tonic-gate 
31307c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
31317c478bd9Sstevel@tonic-gate 	{
31327c478bd9Sstevel@tonic-gate 		CONN		*conn;
31337c478bd9Sstevel@tonic-gate 		rib_qp_t	*qp;
31347c478bd9Sstevel@tonic-gate 
31357c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
31367c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
31377c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
31387c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
31397c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
31407c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
31417c478bd9Sstevel@tonic-gate 			/*
31427c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
31437c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
31447c478bd9Sstevel@tonic-gate 			 */
31457c478bd9Sstevel@tonic-gate 			break;
31467c478bd9Sstevel@tonic-gate 		default:
31477c478bd9Sstevel@tonic-gate 			/*
31487c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
31497c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
31507c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
31517c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
31527c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
31537c478bd9Sstevel@tonic-gate 			 * state
31547c478bd9Sstevel@tonic-gate 			 */
31557c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
31567c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
31577c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
31587c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
31597c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31607c478bd9Sstevel@tonic-gate 				break;
31617c478bd9Sstevel@tonic-gate 			}
31620a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
31637c478bd9Sstevel@tonic-gate 
31647c478bd9Sstevel@tonic-gate 			/*
31657c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
31667c478bd9Sstevel@tonic-gate 			 */
31677c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
31687c478bd9Sstevel@tonic-gate 				/*
31697c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
31707c478bd9Sstevel@tonic-gate 				 */
31717c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
31727c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31737c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
31747c478bd9Sstevel@tonic-gate 				    &hca->srv_conn_list);
31757c478bd9Sstevel@tonic-gate 			} else {
3176065714dcSSiddheshwar Mahesh 				/*
3177065714dcSSiddheshwar Mahesh 				 * conn will be freed when c_ref goes to 0.
3178065714dcSSiddheshwar Mahesh 				 * Indicate to cleaning thread not to close
3179065714dcSSiddheshwar Mahesh 				 * the connection, but just free the channel.
3180065714dcSSiddheshwar Mahesh 				 */
3181065714dcSSiddheshwar Mahesh 				conn->c_flags |= C_CLOSE_NOTNEEDED;
31827c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31837c478bd9Sstevel@tonic-gate 			}
31840a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__srvcm_chandisconnect);
31857c478bd9Sstevel@tonic-gate 			break;
31867c478bd9Sstevel@tonic-gate 		}
31877c478bd9Sstevel@tonic-gate 		break;
31887c478bd9Sstevel@tonic-gate 	}
31897c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
31907c478bd9Sstevel@tonic-gate 		/*
31917c478bd9Sstevel@tonic-gate 		 * RTU received, hence connection established.
31927c478bd9Sstevel@tonic-gate 		 */
31937c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
31947c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
31957c478bd9Sstevel@tonic-gate 			    "(CONN_EST) channel established");
31967c478bd9Sstevel@tonic-gate 		break;
31977c478bd9Sstevel@tonic-gate 
31987c478bd9Sstevel@tonic-gate 	default:
31997c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
32007c478bd9Sstevel@tonic-gate 			/* Let CM handle the following events. */
32017c478bd9Sstevel@tonic-gate 			if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
32027c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32037c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
32047c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
32057c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32067c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
32077c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
32087c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32097c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
32107c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
32117c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32127c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
32137c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
32147c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32157c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_FAILURE\n");
32167c478bd9Sstevel@tonic-gate 			}
32177c478bd9Sstevel@tonic-gate 		}
32180a701b1eSRobert Gordon 		return (IBT_CM_DEFAULT);
32197c478bd9Sstevel@tonic-gate 	}
32207c478bd9Sstevel@tonic-gate 
32217c478bd9Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
32227c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
32237c478bd9Sstevel@tonic-gate }
32247c478bd9Sstevel@tonic-gate 
32257c478bd9Sstevel@tonic-gate static rdma_stat
32267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rib_register_service(rib_hca_t *hca, int service_type,
32277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	uint8_t protocol_num, in_port_t dst_port)
32287c478bd9Sstevel@tonic-gate {
32297c478bd9Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
32307c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
32317c478bd9Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
32327c478bd9Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
32337c478bd9Sstevel@tonic-gate 	uint_t			port_size;
32340a701b1eSRobert Gordon 	uint_t			pki, i, num_ports, nbinds;
32357c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
32367f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_service_t		*service;
32377c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
32387c478bd9Sstevel@tonic-gate 
32397c478bd9Sstevel@tonic-gate 	/*
32407c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
32417c478bd9Sstevel@tonic-gate 	 */
32427c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
32437c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
32447c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
32457c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
32467c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32477c478bd9Sstevel@tonic-gate 	} else {
32487c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32497c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
32507c478bd9Sstevel@tonic-gate 	}
32517c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
32527c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
32537c478bd9Sstevel@tonic-gate 	}
32547c478bd9Sstevel@tonic-gate 
32550a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__regservice_numports,
32560a701b1eSRobert Gordon 	    int, num_ports);
32577c478bd9Sstevel@tonic-gate 
32587c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
32597c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
32600a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portinactive,
32610a701b1eSRobert Gordon 			    int, i+1);
32620a701b1eSRobert Gordon 		} else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) {
32630a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portactive,
32640a701b1eSRobert Gordon 			    int, i+1);
32657c478bd9Sstevel@tonic-gate 		}
32667c478bd9Sstevel@tonic-gate 	}
32670a701b1eSRobert Gordon 
32687c478bd9Sstevel@tonic-gate 	/*
32697c478bd9Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
32707c478bd9Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
32717c478bd9Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
32727c478bd9Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
32737c478bd9Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
32747c478bd9Sstevel@tonic-gate 	 */
32757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->service_list_lock, RW_WRITER);
32767c478bd9Sstevel@tonic-gate 	/*
32777c478bd9Sstevel@tonic-gate 	 * Start registering and binding service to active
32787c478bd9Sstevel@tonic-gate 	 * on active ports on this HCA.
32797c478bd9Sstevel@tonic-gate 	 */
32807c478bd9Sstevel@tonic-gate 	nbinds = 0;
32817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (service = rib_stat->service_list;
32827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	    service && (service->srv_type != service_type);
32837f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	    service = service->next)
32847f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		;
32857c478bd9Sstevel@tonic-gate 
32867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (service == NULL) {
32877c478bd9Sstevel@tonic-gate 		/*
32887c478bd9Sstevel@tonic-gate 		 * We use IP addresses as the service names for
32897c478bd9Sstevel@tonic-gate 		 * service registration.  Register each of them
32907c478bd9Sstevel@tonic-gate 		 * with CM to obtain a svc_id and svc_hdl.  We do not
32917c478bd9Sstevel@tonic-gate 		 * register the service with machine's loopback address.
32927c478bd9Sstevel@tonic-gate 		 */
32937c478bd9Sstevel@tonic-gate 		(void) bzero(&srv_id, sizeof (ib_svc_id_t));
32947c478bd9Sstevel@tonic-gate 		(void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
32957c478bd9Sstevel@tonic-gate 		(void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
32967c478bd9Sstevel@tonic-gate 		sdesc.sd_handler = rib_srv_cm_handler;
32977c478bd9Sstevel@tonic-gate 		sdesc.sd_flags = 0;
32987c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
32997f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    &sdesc, ibt_get_ip_sid(protocol_num, dst_port),
33000a701b1eSRobert Gordon 		    1, &srv_hdl, &srv_id);
33017f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if ((ibt_status != IBT_SUCCESS) &&
33027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (ibt_status != IBT_CM_SERVICE_EXISTS)) {
33037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&rib_stat->service_list_lock);
33047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			DTRACE_PROBE1(rpcib__i__regservice__ibtres,
33057f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    int, ibt_status);
33067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			ibt_free_portinfo(port_infop, port_size);
33077f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			return (RDMA_FAILED);
33087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
33097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		/*
33117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * Allocate and prepare a service entry
33127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 */
33137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service = kmem_zalloc(sizeof (rib_service_t), KM_SLEEP);
33147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service->srv_type = service_type;
33167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service->srv_hdl = srv_hdl;
33177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service->srv_id = srv_id;
33187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		service->next = rib_stat->service_list;
33207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_stat->service_list = service;
33217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		DTRACE_PROBE1(rpcib__i__regservice__new__service,
33227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    int, service->srv_type);
33237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	} else {
33247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		srv_hdl = service->srv_hdl;
33257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		srv_id = service->srv_id;
33267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		DTRACE_PROBE1(rpcib__i__regservice__existing__service,
33277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    int, service->srv_type);
33287f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
33290a701b1eSRobert Gordon 
33307c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
33317f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ibt_sbind_hdl_t		sbp;
33327f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_hca_service_t	*hca_srv;
33337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		ib_gid_t		gid;
33347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33357c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
33367c478bd9Sstevel@tonic-gate 			continue;
33377c478bd9Sstevel@tonic-gate 
33387c478bd9Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
33397c478bd9Sstevel@tonic-gate 			pkey = port_infop[i].p_pkey_tbl[pki];
33407c478bd9Sstevel@tonic-gate 
33417f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_enter(&hca->bound_services_lock, RW_READER);
33427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			gid = port_infop[i].p_sgid_tbl[0];
33437f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			for (hca_srv = hca->bound_services; hca_srv;
33447f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    hca_srv = hca_srv->next) {
33457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				if ((hca_srv->srv_id == service->srv_id) &&
33467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    (hca_srv->gid.gid_prefix ==
33477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    gid.gid_prefix) &&
33487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    (hca_srv->gid.gid_guid == gid.gid_guid))
33497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					break;
33507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			}
33517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&hca->bound_services_lock);
33527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			if (hca_srv != NULL) {
33537c478bd9Sstevel@tonic-gate 				/*
33547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				 * port is alreay bound the the service
33557c478bd9Sstevel@tonic-gate 				 */
33567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				DTRACE_PROBE1(
33577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    rpcib__i__regservice__already__bound,
33587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    int, i+1);
33597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				nbinds++;
33607c478bd9Sstevel@tonic-gate 				continue;
33617c478bd9Sstevel@tonic-gate 			}
33620a701b1eSRobert Gordon 
33637f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			if ((pkey & IBSRM_HB) &&
33647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    (pkey != IB_PKEY_INVALID_FULL)) {
33657f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33667f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				sbp = NULL;
33677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				ibt_status = ibt_bind_service(srv_hdl,
33687f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    gid, NULL, hca, &sbp);
33697f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33707f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				if (ibt_status == IBT_SUCCESS) {
33717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca_srv = kmem_zalloc(
33727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					    sizeof (rib_hca_service_t),
33737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					    KM_SLEEP);
33747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca_srv->srv_id = srv_id;
33757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca_srv->gid = gid;
33767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca_srv->sbind_hdl = sbp;
33777f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33787f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					rw_enter(&hca->bound_services_lock,
33797f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					    RW_WRITER);
33807f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca_srv->next = hca->bound_services;
33817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					hca->bound_services = hca_srv;
33827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					rw_exit(&hca->bound_services_lock);
33837c478bd9Sstevel@tonic-gate 					nbinds++;
33847c478bd9Sstevel@tonic-gate 				}
33857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
33867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				DTRACE_PROBE1(rpcib__i__regservice__bindres,
33877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				    int, ibt_status);
33887c478bd9Sstevel@tonic-gate 			}
33897c478bd9Sstevel@tonic-gate 		}
33907f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
33917f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->service_list_lock);
33927c478bd9Sstevel@tonic-gate 
33937c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
33947c478bd9Sstevel@tonic-gate 
33957c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
33967c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
33977c478bd9Sstevel@tonic-gate 	} else {
33987c478bd9Sstevel@tonic-gate 		/*
33997c478bd9Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
34007c478bd9Sstevel@tonic-gate 		 * one registration was successful.
34017c478bd9Sstevel@tonic-gate 		 */
34027c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
34037c478bd9Sstevel@tonic-gate 		plugin_state = ACCEPT;
34047c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
34057c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
34067c478bd9Sstevel@tonic-gate 	}
34077c478bd9Sstevel@tonic-gate }
34087c478bd9Sstevel@tonic-gate 
34097c478bd9Sstevel@tonic-gate void
34107c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
34117c478bd9Sstevel@tonic-gate {
34127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rdma_stat status;
34137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	int n_listening = 0;
34147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca;
34157c478bd9Sstevel@tonic-gate 
34167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_enter(&rib_stat->listen_lock);
34177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
34187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * if rd parameter is NULL then it means that rib_stat->q is
34197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * already initialized by a call from RDMA and we just want to
34207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * add a newly attached HCA to the same listening state as other
34217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * HCAs.
34227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 */
34237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (rd == NULL) {
34247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (rib_stat->q == NULL) {
34257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_exit(&rib_stat->listen_lock);
34267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			return;
34277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
34287f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	} else {
34297f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_stat->q = &rd->q;
34307f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
34317f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
34327f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
34337c478bd9Sstevel@tonic-gate 		/*
34347c478bd9Sstevel@tonic-gate 		 * First check if a hca is still attached
34357c478bd9Sstevel@tonic-gate 		 */
34367f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_enter(&hca->state_lock, RW_READER);
34377f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (hca->state != HCA_INITED) {
34387f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&hca->state_lock);
34397f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			continue;
34407c478bd9Sstevel@tonic-gate 		}
34417f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_exit(&hca->state_lock);
34427c478bd9Sstevel@tonic-gate 
34437c478bd9Sstevel@tonic-gate 		/*
34447f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * Right now the only service type is NFS. Hence
34457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * force feed this value. Ideally to communicate
34467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * the service type it should be passed down in
34477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * rdma_svc_data.
34487c478bd9Sstevel@tonic-gate 		 */
34497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		status = rib_register_service(hca, NFS,
34507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    IPPROTO_TCP, nfs_rdma_port);
34517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (status == RDMA_SUCCESS)
34527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			n_listening++;
34537c478bd9Sstevel@tonic-gate 	}
34547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
34557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
34567c478bd9Sstevel@tonic-gate 	/*
34577c478bd9Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
34587c478bd9Sstevel@tonic-gate 	 * explainable errors.
34597c478bd9Sstevel@tonic-gate 	 */
34607f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (rd) {
34617f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (n_listening > 0) {
34627c478bd9Sstevel@tonic-gate 			rd->active = 1;
34637f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rd->err_code = RDMA_SUCCESS;
34647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		} else {
34657f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rd->active = 0;
34667f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rd->err_code = RDMA_FAILED;
34677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		}
34687f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
34697f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_exit(&rib_stat->listen_lock);
34707c478bd9Sstevel@tonic-gate }
34717c478bd9Sstevel@tonic-gate 
34727c478bd9Sstevel@tonic-gate /* XXXX */
34737c478bd9Sstevel@tonic-gate /* ARGSUSED */
34747c478bd9Sstevel@tonic-gate static void
34757c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
34767c478bd9Sstevel@tonic-gate {
34777c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
34787c478bd9Sstevel@tonic-gate 
34797f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_enter(&rib_stat->listen_lock);
34807c478bd9Sstevel@tonic-gate 	/*
34817c478bd9Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
34827c478bd9Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
34837c478bd9Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
34847c478bd9Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
34857c478bd9Sstevel@tonic-gate 	 */
34867c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
34877c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
34887c478bd9Sstevel@tonic-gate 	if (svcdata != NULL)
34897c478bd9Sstevel@tonic-gate 		svcdata->active = 0;
34907c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
34917c478bd9Sstevel@tonic-gate 
34927f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
34937f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
34947c478bd9Sstevel@tonic-gate 		/*
34957c478bd9Sstevel@tonic-gate 		 * First check if a hca is still attached
34967c478bd9Sstevel@tonic-gate 		 */
34977c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
34987f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (hca->state == HCA_DETACHED) {
34997c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
35007f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			continue;
35017c478bd9Sstevel@tonic-gate 		}
35020a701b1eSRobert Gordon 		rib_close_channels(&hca->srv_conn_list);
35037c478bd9Sstevel@tonic-gate 		rib_stop_services(hca);
35047c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35057c478bd9Sstevel@tonic-gate 	}
35067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
35077f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
35087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
35097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Avoid rib_listen() using the stale q field.
35107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * This could happen if a port goes up after all services
35117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * are already unregistered.
35127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 */
35137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_stat->q = NULL;
35147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_exit(&rib_stat->listen_lock);
35157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
35167c478bd9Sstevel@tonic-gate 
35177c478bd9Sstevel@tonic-gate /*
35187c478bd9Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
35197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * For each bound service of HCA to be removed, first find the corresponding
35207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * service handle (srv_hdl) and then unbind the service by calling
35217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * ibt_unbind_service().
35227c478bd9Sstevel@tonic-gate  */
35237c478bd9Sstevel@tonic-gate static void
35247c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
35257c478bd9Sstevel@tonic-gate {
35267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_service_t *srv_list, *to_remove;
35277c478bd9Sstevel@tonic-gate 
35287c478bd9Sstevel@tonic-gate 	/*
35297c478bd9Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
35307c478bd9Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
35317c478bd9Sstevel@tonic-gate 	 * be passed down to this function.
35327c478bd9Sstevel@tonic-gate 	 */
35337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&hca->bound_services_lock, RW_READER);
35347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	srv_list = hca->bound_services;
35357f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	hca->bound_services = NULL;
35367f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&hca->bound_services_lock);
35377f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
35387c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
35397f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rib_service_t *sc;
35407f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
35417c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
35427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		srv_list = to_remove->next;
35437f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_enter(&rib_stat->service_list_lock, RW_READER);
35447f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		for (sc = rib_stat->service_list;
35457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    sc && (sc->srv_id != to_remove->srv_id);
35467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    sc = sc->next)
35477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			;
35487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		/*
35497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * if sc is NULL then the service doesn't exist anymore,
35507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * probably just removed completely through rib_stat.
35517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 */
35527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (sc != NULL)
35537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			(void) ibt_unbind_service(sc->srv_hdl,
35547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    to_remove->sbind_hdl);
35557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_exit(&rib_stat->service_list_lock);
35567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		kmem_free(to_remove, sizeof (rib_hca_service_t));
35577c478bd9Sstevel@tonic-gate 	}
35587c478bd9Sstevel@tonic-gate }
35597c478bd9Sstevel@tonic-gate 
35607c478bd9Sstevel@tonic-gate static struct svc_recv *
35617c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
35627c478bd9Sstevel@tonic-gate {
35637c478bd9Sstevel@tonic-gate 	struct svc_recv	*recvp;
35647c478bd9Sstevel@tonic-gate 
35657c478bd9Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
35667c478bd9Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
35677c478bd9Sstevel@tonic-gate 	recvp->qp = qp;
35687c478bd9Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
35697c478bd9Sstevel@tonic-gate 	return (recvp);
35707c478bd9Sstevel@tonic-gate }
35717c478bd9Sstevel@tonic-gate 
35727c478bd9Sstevel@tonic-gate static int
35737c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
35747c478bd9Sstevel@tonic-gate {
35757c478bd9Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
35767c478bd9Sstevel@tonic-gate 
35777c478bd9Sstevel@tonic-gate 	return (0);
35787c478bd9Sstevel@tonic-gate }
35797c478bd9Sstevel@tonic-gate 
35807c478bd9Sstevel@tonic-gate static struct reply *
35817c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
35827c478bd9Sstevel@tonic-gate {
35837c478bd9Sstevel@tonic-gate 	struct reply	*rep;
35847c478bd9Sstevel@tonic-gate 
35857c478bd9Sstevel@tonic-gate 
35867c478bd9Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
35877c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
35880a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__addrreply__nomem);
35897c478bd9Sstevel@tonic-gate 		return (NULL);
35907c478bd9Sstevel@tonic-gate 	}
35917c478bd9Sstevel@tonic-gate 	rep->xid = msgid;
35927c478bd9Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
35937c478bd9Sstevel@tonic-gate 	rep->bytes_xfer = 0;
35947c478bd9Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
35957c478bd9Sstevel@tonic-gate 	rep->prev = NULL;
35967c478bd9Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
35977c478bd9Sstevel@tonic-gate 
35987c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
35997c478bd9Sstevel@tonic-gate 	if (qp->replylist) {
36007c478bd9Sstevel@tonic-gate 		rep->next = qp->replylist;
36017c478bd9Sstevel@tonic-gate 		qp->replylist->prev = rep;
36027c478bd9Sstevel@tonic-gate 	}
36037c478bd9Sstevel@tonic-gate 	qp->rep_list_size++;
36040a701b1eSRobert Gordon 
36050a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__addrreply__listsize,
36060a701b1eSRobert Gordon 	    int, qp->rep_list_size);
36070a701b1eSRobert Gordon 
36087c478bd9Sstevel@tonic-gate 	qp->replylist = rep;
36097c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36107c478bd9Sstevel@tonic-gate 
36117c478bd9Sstevel@tonic-gate 	return (rep);
36127c478bd9Sstevel@tonic-gate }
36137c478bd9Sstevel@tonic-gate 
36147c478bd9Sstevel@tonic-gate static rdma_stat
36157c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
36167c478bd9Sstevel@tonic-gate {
36177c478bd9Sstevel@tonic-gate 	struct reply	*r, *n;
36187c478bd9Sstevel@tonic-gate 
36197c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
36207c478bd9Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
36217c478bd9Sstevel@tonic-gate 		n = r->next;
36227c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
36237c478bd9Sstevel@tonic-gate 	}
36247c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36257c478bd9Sstevel@tonic-gate 
36267c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
36277c478bd9Sstevel@tonic-gate }
36287c478bd9Sstevel@tonic-gate 
36297c478bd9Sstevel@tonic-gate static int
36307c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
36317c478bd9Sstevel@tonic-gate {
36327c478bd9Sstevel@tonic-gate 
36337c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
36347c478bd9Sstevel@tonic-gate 	if (rep->prev) {
36357c478bd9Sstevel@tonic-gate 		rep->prev->next = rep->next;
36367c478bd9Sstevel@tonic-gate 	}
36377c478bd9Sstevel@tonic-gate 	if (rep->next) {
36387c478bd9Sstevel@tonic-gate 		rep->next->prev = rep->prev;
36397c478bd9Sstevel@tonic-gate 	}
36407c478bd9Sstevel@tonic-gate 	if (qp->replylist == rep)
36417c478bd9Sstevel@tonic-gate 		qp->replylist = rep->next;
36427c478bd9Sstevel@tonic-gate 
36437c478bd9Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
36447c478bd9Sstevel@tonic-gate 	qp->rep_list_size--;
36450a701b1eSRobert Gordon 
36460a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__remreply__listsize,
36470a701b1eSRobert Gordon 	    int, qp->rep_list_size);
36487c478bd9Sstevel@tonic-gate 
36497c478bd9Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
36507c478bd9Sstevel@tonic-gate 
36517c478bd9Sstevel@tonic-gate 	return (0);
36527c478bd9Sstevel@tonic-gate }
36537c478bd9Sstevel@tonic-gate 
36547c478bd9Sstevel@tonic-gate rdma_stat
36550a701b1eSRobert Gordon rib_registermem(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
36567c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle)
36577c478bd9Sstevel@tonic-gate {
36587c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
36597c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
36607c478bd9Sstevel@tonic-gate 	rdma_stat	status;
36617c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
36627c478bd9Sstevel@tonic-gate 
36637c478bd9Sstevel@tonic-gate 	/*
36647c478bd9Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
36657c478bd9Sstevel@tonic-gate 	 */
36660a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
36677c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
366811606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
36697c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
36707c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
36717c478bd9Sstevel@tonic-gate 	} else {
36727c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
36737c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
36747c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
36757c478bd9Sstevel@tonic-gate 	}
36767c478bd9Sstevel@tonic-gate 	return (status);
36777c478bd9Sstevel@tonic-gate }
36787c478bd9Sstevel@tonic-gate 
36797c478bd9Sstevel@tonic-gate static rdma_stat
36800a701b1eSRobert Gordon rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size,
36810a701b1eSRobert Gordon 	ibt_mr_flags_t spec,
36827c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
36837c478bd9Sstevel@tonic-gate {
36847c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
36857c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
368611606941Sjwahlig 	mem_attr.mr_vaddr = (uintptr_t)buf;
36877c478bd9Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
36880a701b1eSRobert Gordon 	mem_attr.mr_as = (struct as *)(caddr_t)adsp;
36897c478bd9Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
36907c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
36917c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
36927c478bd9Sstevel@tonic-gate 
36937c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
36947f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (hca->state != HCA_DETACHED) {
36957c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
36967c478bd9Sstevel@tonic-gate 		    &mem_attr, mr_hdlp, mr_descp);
36977c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
36987c478bd9Sstevel@tonic-gate 	} else {
36997c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
37007c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37017c478bd9Sstevel@tonic-gate 	}
37027c478bd9Sstevel@tonic-gate 
37037c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
37047c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37057c478bd9Sstevel@tonic-gate 	}
37067c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37077c478bd9Sstevel@tonic-gate }
37087c478bd9Sstevel@tonic-gate 
37097c478bd9Sstevel@tonic-gate rdma_stat
37100a701b1eSRobert Gordon rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
37110a701b1eSRobert Gordon 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
37127c478bd9Sstevel@tonic-gate {
37137c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
37140a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
37157c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
37167c478bd9Sstevel@tonic-gate 	rdma_stat	status;
37177c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
37187c478bd9Sstevel@tonic-gate 
37197c478bd9Sstevel@tonic-gate 	/*
37207c478bd9Sstevel@tonic-gate 	 * Non-coherent memory registration.
37217c478bd9Sstevel@tonic-gate 	 */
37220a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
37230a701b1eSRobert Gordon 	if (l) {
37240a701b1eSRobert Gordon 		if (l->registered) {
37250a701b1eSRobert Gordon 			buf_handle->mrc_linfo =
37260a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
37270a701b1eSRobert Gordon 			buf_handle->mrc_lmr =
37280a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_lmr;
37290a701b1eSRobert Gordon 			buf_handle->mrc_rmr =
37300a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_rmr;
37310a701b1eSRobert Gordon 			*sync_handle = (RIB_SYNCMEM_HANDLE)
37320a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
37330a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
37340a701b1eSRobert Gordon 		} else {
37350a701b1eSRobert Gordon 			/* Always register the whole buffer */
37360a701b1eSRobert Gordon 			buf = (caddr_t)l->lrc_buf;
37370a701b1eSRobert Gordon 			buflen = l->lrc_len;
37380a701b1eSRobert Gordon 		}
37390a701b1eSRobert Gordon 	}
37400a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
37410a701b1eSRobert Gordon 
37427c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
37430a701b1eSRobert Gordon 		if (l) {
37440a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
37450a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
37460a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
37470a701b1eSRobert Gordon 			l->registered		 = TRUE;
37480a701b1eSRobert Gordon 		}
374911606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
37507c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
37517c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
37527c478bd9Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
37537c478bd9Sstevel@tonic-gate 	} else {
37547c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
37557c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
37567c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
37577c478bd9Sstevel@tonic-gate 	}
37587c478bd9Sstevel@tonic-gate 	return (status);
37597c478bd9Sstevel@tonic-gate }
37607c478bd9Sstevel@tonic-gate 
37617c478bd9Sstevel@tonic-gate /* ARGSUSED */
37627c478bd9Sstevel@tonic-gate rdma_stat
37637c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
37647c478bd9Sstevel@tonic-gate {
37657c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
37667c478bd9Sstevel@tonic-gate 	/*
37677c478bd9Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
37687c478bd9Sstevel@tonic-gate 	 * getting detached. Need all outstanding
37697c478bd9Sstevel@tonic-gate 	 * memory registrations to be deregistered
37707c478bd9Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
37717c478bd9Sstevel@tonic-gate 	 */
37727c478bd9Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
377311606941Sjwahlig 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
37747c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37757c478bd9Sstevel@tonic-gate }
37767c478bd9Sstevel@tonic-gate 
37777c478bd9Sstevel@tonic-gate /* ARGSUSED */
37787c478bd9Sstevel@tonic-gate rdma_stat
37797c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
37800a701b1eSRobert Gordon 		RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
37817c478bd9Sstevel@tonic-gate {
37820a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
37830a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
37840a701b1eSRobert Gordon 	if (l)
37850a701b1eSRobert Gordon 		if (l->registered)
37860a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
37870a701b1eSRobert Gordon 
37887c478bd9Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
37897c478bd9Sstevel@tonic-gate 
37907c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37917c478bd9Sstevel@tonic-gate }
37927c478bd9Sstevel@tonic-gate 
37937c478bd9Sstevel@tonic-gate /* ARGSUSED */
37947c478bd9Sstevel@tonic-gate rdma_stat
37957c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
37967c478bd9Sstevel@tonic-gate 		int len, int cpu)
37977c478bd9Sstevel@tonic-gate {
37987c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
37997c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
38007c478bd9Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
38017c478bd9Sstevel@tonic-gate 
38027c478bd9Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
380311606941Sjwahlig 	mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf;
38047c478bd9Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
38057c478bd9Sstevel@tonic-gate 	if (cpu) {
38067c478bd9Sstevel@tonic-gate 		/* make incoming data visible to memory */
38077c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
38087c478bd9Sstevel@tonic-gate 	} else {
38097c478bd9Sstevel@tonic-gate 		/* make memory changes visible to IO */
38107c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
38117c478bd9Sstevel@tonic-gate 	}
38127c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
38137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (hca->state != HCA_DETACHED) {
38147c478bd9Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
38157c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38167c478bd9Sstevel@tonic-gate 	} else {
38177c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38187c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
38197c478bd9Sstevel@tonic-gate 	}
38207c478bd9Sstevel@tonic-gate 
38217c478bd9Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
38227c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
38237c478bd9Sstevel@tonic-gate 	else {
38247c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
38257c478bd9Sstevel@tonic-gate 	}
38267c478bd9Sstevel@tonic-gate }
38277c478bd9Sstevel@tonic-gate 
38287c478bd9Sstevel@tonic-gate /*
38297c478bd9Sstevel@tonic-gate  * XXXX	????
38307c478bd9Sstevel@tonic-gate  */
38317c478bd9Sstevel@tonic-gate static rdma_stat
38327c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
38337c478bd9Sstevel@tonic-gate {
38347c478bd9Sstevel@tonic-gate 	/*
38357c478bd9Sstevel@tonic-gate 	 * XXXX	Hack!
38367c478bd9Sstevel@tonic-gate 	 */
38377c478bd9Sstevel@tonic-gate 	info->addrlen = 16;
38387c478bd9Sstevel@tonic-gate 	info->mts = 1000000;
38397c478bd9Sstevel@tonic-gate 	info->mtu = 1000000;
38407c478bd9Sstevel@tonic-gate 
38417c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
38427c478bd9Sstevel@tonic-gate }
38437c478bd9Sstevel@tonic-gate 
38447c478bd9Sstevel@tonic-gate rib_bufpool_t *
38457c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
38467c478bd9Sstevel@tonic-gate {
38477c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
38487c478bd9Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
38497c478bd9Sstevel@tonic-gate 	caddr_t		buf;
38507c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
38517c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
38527c478bd9Sstevel@tonic-gate 	int		i, j;
38537c478bd9Sstevel@tonic-gate 
38547c478bd9Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
38557c478bd9Sstevel@tonic-gate 
38567c478bd9Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
38577c478bd9Sstevel@tonic-gate 	    num * sizeof (void *), KM_SLEEP);
38587c478bd9Sstevel@tonic-gate 
38597c478bd9Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
38607c478bd9Sstevel@tonic-gate 	bp->numelems = num;
38617c478bd9Sstevel@tonic-gate 
38620a701b1eSRobert Gordon 
38637c478bd9Sstevel@tonic-gate 	switch (ptype) {
38647c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
38657c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38667c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
38677c478bd9Sstevel@tonic-gate 		break;
38687c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
38697c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38707c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
38717c478bd9Sstevel@tonic-gate 		break;
38727c478bd9Sstevel@tonic-gate 	default:
38737c478bd9Sstevel@tonic-gate 		goto fail;
38747c478bd9Sstevel@tonic-gate 	}
38757c478bd9Sstevel@tonic-gate 
38767c478bd9Sstevel@tonic-gate 	/*
38777c478bd9Sstevel@tonic-gate 	 * Register the pool.
38787c478bd9Sstevel@tonic-gate 	 */
38797c478bd9Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
38807c478bd9Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
38817c478bd9Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
38827c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_hdl_t), KM_SLEEP);
38837c478bd9Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
38847c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_desc_t), KM_SLEEP);
38857c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
38860a701b1eSRobert Gordon 
38877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (hca->state == HCA_DETACHED) {
38887c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38897c478bd9Sstevel@tonic-gate 		goto fail;
38907c478bd9Sstevel@tonic-gate 	}
38910a701b1eSRobert Gordon 
38927c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
38937c478bd9Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
389411606941Sjwahlig 		mem_attr.mr_vaddr = (uintptr_t)buf;
38957c478bd9Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
38967c478bd9Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
38977c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
38980a701b1eSRobert Gordon 		    hca->pd_hdl, &mem_attr,
38990a701b1eSRobert Gordon 		    &rbp->mr_hdl[i],
39007c478bd9Sstevel@tonic-gate 		    &rbp->mr_desc[i]);
39017c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
39027c478bd9Sstevel@tonic-gate 			for (j = 0; j < i; j++) {
39030a701b1eSRobert Gordon 				(void) ibt_deregister_mr(hca->hca_hdl,
39040a701b1eSRobert Gordon 				    rbp->mr_hdl[j]);
39057c478bd9Sstevel@tonic-gate 			}
39067c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
39077c478bd9Sstevel@tonic-gate 			goto fail;
39087c478bd9Sstevel@tonic-gate 		}
39097c478bd9Sstevel@tonic-gate 	}
39107c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
39117c478bd9Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
39127c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
39137c478bd9Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
39147c478bd9Sstevel@tonic-gate 	}
39157c478bd9Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
39167c478bd9Sstevel@tonic-gate 	rbp->bpool = bp;
39177c478bd9Sstevel@tonic-gate 
39187c478bd9Sstevel@tonic-gate 	return (rbp);
39197c478bd9Sstevel@tonic-gate fail:
39207c478bd9Sstevel@tonic-gate 	if (bp) {
39217c478bd9Sstevel@tonic-gate 		if (bp->buf)
39227c478bd9Sstevel@tonic-gate 			kmem_free(bp->buf, bp->bufsize);
39237c478bd9Sstevel@tonic-gate 		kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
39247c478bd9Sstevel@tonic-gate 	}
39257c478bd9Sstevel@tonic-gate 	if (rbp) {
39267c478bd9Sstevel@tonic-gate 		if (rbp->mr_hdl)
39277c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
39287c478bd9Sstevel@tonic-gate 		if (rbp->mr_desc)
39297c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
39307c478bd9Sstevel@tonic-gate 		kmem_free(rbp, sizeof (rib_bufpool_t));
39317c478bd9Sstevel@tonic-gate 	}
39327c478bd9Sstevel@tonic-gate 	return (NULL);
39337c478bd9Sstevel@tonic-gate }
39347c478bd9Sstevel@tonic-gate 
39357c478bd9Sstevel@tonic-gate static void
39367c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
39377c478bd9Sstevel@tonic-gate {
39387c478bd9Sstevel@tonic-gate 	int i;
39397c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39407c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
39417c478bd9Sstevel@tonic-gate 
39427c478bd9Sstevel@tonic-gate 	/*
39437c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39447c478bd9Sstevel@tonic-gate 	 */
39457c478bd9Sstevel@tonic-gate 	switch (ptype) {
39467c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
39477c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
39487c478bd9Sstevel@tonic-gate 			break;
39497c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
39507c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
39517c478bd9Sstevel@tonic-gate 			break;
39527c478bd9Sstevel@tonic-gate 		default:
39537c478bd9Sstevel@tonic-gate 			return;
39547c478bd9Sstevel@tonic-gate 	}
39557c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
39567c478bd9Sstevel@tonic-gate 		return;
39577c478bd9Sstevel@tonic-gate 
39587c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
39597c478bd9Sstevel@tonic-gate 
39607c478bd9Sstevel@tonic-gate 	/*
39617c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
39627c478bd9Sstevel@tonic-gate 	 */
39637c478bd9Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
39647c478bd9Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
39657c478bd9Sstevel@tonic-gate 	}
39667c478bd9Sstevel@tonic-gate }
39677c478bd9Sstevel@tonic-gate 
39687c478bd9Sstevel@tonic-gate static void
39697c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
39707c478bd9Sstevel@tonic-gate {
39717c478bd9Sstevel@tonic-gate 
39727c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39737c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
39747c478bd9Sstevel@tonic-gate 
39757c478bd9Sstevel@tonic-gate 	/*
39767c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39777c478bd9Sstevel@tonic-gate 	 */
39787c478bd9Sstevel@tonic-gate 	switch (ptype) {
39797c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
39807c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
39817c478bd9Sstevel@tonic-gate 			break;
39827c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
39837c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
39847c478bd9Sstevel@tonic-gate 			break;
39857c478bd9Sstevel@tonic-gate 		default:
39867c478bd9Sstevel@tonic-gate 			return;
39877c478bd9Sstevel@tonic-gate 	}
39887c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
39897c478bd9Sstevel@tonic-gate 		return;
39907c478bd9Sstevel@tonic-gate 
39917c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
39927c478bd9Sstevel@tonic-gate 
39937c478bd9Sstevel@tonic-gate 	/*
39947c478bd9Sstevel@tonic-gate 	 * Free the pool memory.
39957c478bd9Sstevel@tonic-gate 	 */
39967c478bd9Sstevel@tonic-gate 	if (rbp->mr_hdl)
39977c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
39987c478bd9Sstevel@tonic-gate 
39997c478bd9Sstevel@tonic-gate 	if (rbp->mr_desc)
40007c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
40017c478bd9Sstevel@tonic-gate 	if (bp->buf)
40027c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
40037c478bd9Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
40047c478bd9Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
40057c478bd9Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
40067c478bd9Sstevel@tonic-gate }
40077c478bd9Sstevel@tonic-gate 
40087c478bd9Sstevel@tonic-gate void
40097c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
40107c478bd9Sstevel@tonic-gate {
40117c478bd9Sstevel@tonic-gate 	/*
40127c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
40137c478bd9Sstevel@tonic-gate 	 */
40147c478bd9Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
40157c478bd9Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
40167c478bd9Sstevel@tonic-gate }
40177c478bd9Sstevel@tonic-gate 
40187c478bd9Sstevel@tonic-gate /*
40197c478bd9Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
40207c478bd9Sstevel@tonic-gate  */
40217c478bd9Sstevel@tonic-gate static rdma_stat
40227c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40237c478bd9Sstevel@tonic-gate {
40240a701b1eSRobert Gordon 	rib_lrc_entry_t *rlep;
40250a701b1eSRobert Gordon 
40260a701b1eSRobert Gordon 	if (rdbuf->type ==  RDMA_LONG_BUFFER) {
40270a701b1eSRobert Gordon 		rlep = rib_get_cache_buf(conn, rdbuf->len);
40280a701b1eSRobert Gordon 		rdbuf->rb_private =  (caddr_t)rlep;
40290a701b1eSRobert Gordon 		rdbuf->addr = rlep->lrc_buf;
40300a701b1eSRobert Gordon 		rdbuf->handle = rlep->lrc_mhandle;
40310a701b1eSRobert Gordon 		return (RDMA_SUCCESS);
40320a701b1eSRobert Gordon 	}
40337c478bd9Sstevel@tonic-gate 
40347c478bd9Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
40357c478bd9Sstevel@tonic-gate 	if (rdbuf->addr) {
40367c478bd9Sstevel@tonic-gate 		switch (rdbuf->type) {
40377c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
40387c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
40397c478bd9Sstevel@tonic-gate 			break;
40407c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
40417c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
40427c478bd9Sstevel@tonic-gate 			break;
40437c478bd9Sstevel@tonic-gate 		default:
40447c478bd9Sstevel@tonic-gate 			rdbuf->len = 0;
40457c478bd9Sstevel@tonic-gate 		}
40467c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
40477c478bd9Sstevel@tonic-gate 	} else
40487c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40497c478bd9Sstevel@tonic-gate }
40507c478bd9Sstevel@tonic-gate 
40517c478bd9Sstevel@tonic-gate /*
40527c478bd9Sstevel@tonic-gate  * Fetch a buffer of specified type.
40537c478bd9Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
40547c478bd9Sstevel@tonic-gate  */
40557c478bd9Sstevel@tonic-gate static void *
40567c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40577c478bd9Sstevel@tonic-gate {
40587c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
40597c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
40607c478bd9Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
40617c478bd9Sstevel@tonic-gate 	void		*buf;
40627c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
40637c478bd9Sstevel@tonic-gate 	bufpool_t	*bp;
40647c478bd9Sstevel@tonic-gate 	int		i;
40657c478bd9Sstevel@tonic-gate 
40667c478bd9Sstevel@tonic-gate 	/*
40677c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
40687c478bd9Sstevel@tonic-gate 	 */
40697c478bd9Sstevel@tonic-gate 	switch (ptype) {
40707c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
40717c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
40727c478bd9Sstevel@tonic-gate 		break;
40737c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
40747c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
40757c478bd9Sstevel@tonic-gate 		break;
40767c478bd9Sstevel@tonic-gate 	default:
40777c478bd9Sstevel@tonic-gate 		return (NULL);
40787c478bd9Sstevel@tonic-gate 	}
40797c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
40807c478bd9Sstevel@tonic-gate 		return (NULL);
40817c478bd9Sstevel@tonic-gate 
40827c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
40837c478bd9Sstevel@tonic-gate 
40847c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
40857c478bd9Sstevel@tonic-gate 	if (bp->buffree < 0) {
40867c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
40877c478bd9Sstevel@tonic-gate 		return (NULL);
40887c478bd9Sstevel@tonic-gate 	}
40897c478bd9Sstevel@tonic-gate 
40907c478bd9Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
40917c478bd9Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
40927c478bd9Sstevel@tonic-gate 	rdbuf->addr = buf;
40937c478bd9Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
40947c478bd9Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
409511606941Sjwahlig 		if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
40960a701b1eSRobert Gordon 			rdbuf->handle.mrc_rmr =
40970a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_rkey;
40980a701b1eSRobert Gordon 			rdbuf->handle.mrc_linfo =
40990a701b1eSRobert Gordon 			    (uintptr_t)rbp->mr_hdl[i];
41000a701b1eSRobert Gordon 			rdbuf->handle.mrc_lmr =
41010a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_lkey;
41027c478bd9Sstevel@tonic-gate 			bp->buffree--;
41037c478bd9Sstevel@tonic-gate 
41047c478bd9Sstevel@tonic-gate 			mutex_exit(&bp->buflock);
41057c478bd9Sstevel@tonic-gate 
41067c478bd9Sstevel@tonic-gate 			return (buf);
41077c478bd9Sstevel@tonic-gate 		}
41087c478bd9Sstevel@tonic-gate 	}
41090a701b1eSRobert Gordon 
41107c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41117c478bd9Sstevel@tonic-gate 
41127c478bd9Sstevel@tonic-gate 	return (NULL);
41137c478bd9Sstevel@tonic-gate }
41147c478bd9Sstevel@tonic-gate 
41157c478bd9Sstevel@tonic-gate static void
41167c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
41177c478bd9Sstevel@tonic-gate {
41187c478bd9Sstevel@tonic-gate 
41190a701b1eSRobert Gordon 	if (rdbuf->type == RDMA_LONG_BUFFER) {
41200a701b1eSRobert Gordon 		rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private);
41210a701b1eSRobert Gordon 		rdbuf->rb_private = NULL;
41220a701b1eSRobert Gordon 		return;
41230a701b1eSRobert Gordon 	}
41247c478bd9Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
41257c478bd9Sstevel@tonic-gate }
41267c478bd9Sstevel@tonic-gate 
41277c478bd9Sstevel@tonic-gate static void
41287c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
41297c478bd9Sstevel@tonic-gate {
41307c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
41317c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
41327c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
41337c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
41347c478bd9Sstevel@tonic-gate 
41357c478bd9Sstevel@tonic-gate 	/*
41367c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
41377c478bd9Sstevel@tonic-gate 	 */
41387c478bd9Sstevel@tonic-gate 	switch (ptype) {
41397c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
41407c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
41417c478bd9Sstevel@tonic-gate 		break;
41427c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
41437c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
41447c478bd9Sstevel@tonic-gate 		break;
41457c478bd9Sstevel@tonic-gate 	default:
41467c478bd9Sstevel@tonic-gate 		return;
41477c478bd9Sstevel@tonic-gate 	}
41487c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
41497c478bd9Sstevel@tonic-gate 		return;
41507c478bd9Sstevel@tonic-gate 
41517c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
41527c478bd9Sstevel@tonic-gate 
41537c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
41547c478bd9Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
41557c478bd9Sstevel@tonic-gate 		/*
41567c478bd9Sstevel@tonic-gate 		 * Should never happen
41577c478bd9Sstevel@tonic-gate 		 */
41587c478bd9Sstevel@tonic-gate 		bp->buffree--;
41597c478bd9Sstevel@tonic-gate 	} else {
41607c478bd9Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
41617c478bd9Sstevel@tonic-gate 	}
41627c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41637c478bd9Sstevel@tonic-gate }
41647c478bd9Sstevel@tonic-gate 
41657c478bd9Sstevel@tonic-gate static rdma_stat
41667c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
41677c478bd9Sstevel@tonic-gate {
41687c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41697c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd) {
41707c478bd9Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
41717c478bd9Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
41727c478bd9Sstevel@tonic-gate 	}
41737c478bd9Sstevel@tonic-gate 	connlist->conn_hd = cn;
41747c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
41757c478bd9Sstevel@tonic-gate 
41767c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41777c478bd9Sstevel@tonic-gate }
41787c478bd9Sstevel@tonic-gate 
41797c478bd9Sstevel@tonic-gate static rdma_stat
41807c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
41817c478bd9Sstevel@tonic-gate {
41827c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41837c478bd9Sstevel@tonic-gate 	if (cn->c_prev) {
41847c478bd9Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
41857c478bd9Sstevel@tonic-gate 	}
41867c478bd9Sstevel@tonic-gate 	if (cn->c_next) {
41877c478bd9Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
41887c478bd9Sstevel@tonic-gate 	}
41897c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
41907c478bd9Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
41917c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
41927c478bd9Sstevel@tonic-gate 
41937c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41947c478bd9Sstevel@tonic-gate }
41957c478bd9Sstevel@tonic-gate 
4196ed629aefSSiddheshwar Mahesh /* ARGSUSED */
4197ed629aefSSiddheshwar Mahesh static rdma_stat
4198ed629aefSSiddheshwar Mahesh rib_conn_get(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
4199ed629aefSSiddheshwar Mahesh     int addr_type, void *handle, CONN **conn)
4200ed629aefSSiddheshwar Mahesh {
4201ed629aefSSiddheshwar Mahesh 	rdma_stat status;
4202ed629aefSSiddheshwar Mahesh 	rpcib_ping_t rpt;
4203ed629aefSSiddheshwar Mahesh 
4204ed629aefSSiddheshwar Mahesh 	status = rib_connect(s_svcaddr, d_svcaddr, addr_type, &rpt, conn);
4205ed629aefSSiddheshwar Mahesh 	return (status);
4206ed629aefSSiddheshwar Mahesh }
4207ed629aefSSiddheshwar Mahesh 
42087c478bd9Sstevel@tonic-gate /*
42097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * rib_find_hca_connection
42107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  *
42117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * if there is an existing connection to the specified address then
42127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * it will be returned in conn, otherwise conn will be set to NULL.
42137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * Also cleans up any connection that is in error state.
42147c478bd9Sstevel@tonic-gate  */
42157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static int
42167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rib_find_hca_connection(rib_hca_t *hca, struct netbuf *s_svcaddr,
42177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States     struct netbuf *d_svcaddr, CONN **conn)
42187c478bd9Sstevel@tonic-gate {
42197c478bd9Sstevel@tonic-gate 	CONN *cn;
42207c478bd9Sstevel@tonic-gate 	clock_t cv_stat, timout;
42217c478bd9Sstevel@tonic-gate 
42227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	*conn = NULL;
42237c478bd9Sstevel@tonic-gate again:
42247c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
42257c478bd9Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
42267c478bd9Sstevel@tonic-gate 	while (cn != NULL) {
42277c478bd9Sstevel@tonic-gate 		/*
42287c478bd9Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
42297c478bd9Sstevel@tonic-gate 		 */
42307c478bd9Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
42310a701b1eSRobert Gordon 		if (cn->c_state == C_ERROR_CONN) {
42327c478bd9Sstevel@tonic-gate 			if (cn->c_ref == 0) {
42337c478bd9Sstevel@tonic-gate 				/*
42347c478bd9Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
42357c478bd9Sstevel@tonic-gate 				 */
42367c478bd9Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
42377c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42387c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
4239065714dcSSiddheshwar Mahesh 				rib_conn_close((void *)cn);
42407c478bd9Sstevel@tonic-gate 				goto again;
42417c478bd9Sstevel@tonic-gate 			}
42427c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42437c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
42447c478bd9Sstevel@tonic-gate 			continue;
42450a701b1eSRobert Gordon 		}
42460a701b1eSRobert Gordon 		if (cn->c_state == C_DISCONN_PEND) {
42477c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42487c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
42497c478bd9Sstevel@tonic-gate 			continue;
42507c478bd9Sstevel@tonic-gate 		}
42517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
42527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		/*
42537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * source address is only checked for if there is one,
42547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 * this is the case for retries.
42557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		 */
42567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if ((cn->c_raddr.len == d_svcaddr->len) &&
42577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (bcmp(d_svcaddr->buf, cn->c_raddr.buf,
42587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    d_svcaddr->len) == 0) &&
42597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    ((s_svcaddr->len == 0) ||
42607f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    ((cn->c_laddr.len == s_svcaddr->len) &&
42617f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (bcmp(s_svcaddr->buf, cn->c_laddr.buf,
42627f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    s_svcaddr->len) == 0)))) {
42637c478bd9Sstevel@tonic-gate 			/*
42647c478bd9Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
42657c478bd9Sstevel@tonic-gate 			 * as we are done traversing the list.
42667c478bd9Sstevel@tonic-gate 			 */
42677c478bd9Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
42687c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
42697c478bd9Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
42707c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42717c478bd9Sstevel@tonic-gate 				*conn = cn;
42727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				return (RDMA_SUCCESS);
42737c478bd9Sstevel@tonic-gate 			}
42747c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
42757c478bd9Sstevel@tonic-gate 				/*
42767c478bd9Sstevel@tonic-gate 				 * Hold a reference to this conn before
42777c478bd9Sstevel@tonic-gate 				 * we give up the lock.
42787c478bd9Sstevel@tonic-gate 				 */
42797c478bd9Sstevel@tonic-gate 				cn->c_ref++;
42807c478bd9Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
42817c478bd9Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
42827c478bd9Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
42837c478bd9Sstevel@tonic-gate 				    &cn->c_lock, timout)) > 0 &&
42847c478bd9Sstevel@tonic-gate 				    cn->c_state == C_CONN_PEND)
42857c478bd9Sstevel@tonic-gate 					;
42867c478bd9Sstevel@tonic-gate 				if (cv_stat == 0) {
42879c86cdcdSSiddheshwar Mahesh 					(void) rib_conn_release_locked(cn);
42887c478bd9Sstevel@tonic-gate 					return (RDMA_INTR);
42897c478bd9Sstevel@tonic-gate 				}
42907c478bd9Sstevel@tonic-gate 				if (cv_stat < 0) {
42919c86cdcdSSiddheshwar Mahesh 					(void) rib_conn_release_locked(cn);
42927c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
42937c478bd9Sstevel@tonic-gate 				}
42947c478bd9Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
42957c478bd9Sstevel@tonic-gate 					*conn = cn;
42967c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
42977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 					return (RDMA_SUCCESS);
42987c478bd9Sstevel@tonic-gate 				} else {
42999c86cdcdSSiddheshwar Mahesh 					(void) rib_conn_release_locked(cn);
43007c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
43017c478bd9Sstevel@tonic-gate 				}
43027c478bd9Sstevel@tonic-gate 			}
43037c478bd9Sstevel@tonic-gate 		}
43047c478bd9Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
43057c478bd9Sstevel@tonic-gate 		cn = cn->c_next;
43067c478bd9Sstevel@tonic-gate 	}
43077c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
43087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	*conn = NULL;
43097c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
43107c478bd9Sstevel@tonic-gate }
43117c478bd9Sstevel@tonic-gate 
43127c478bd9Sstevel@tonic-gate /*
43137f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * Connection management.
43147f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * IBTF does not support recycling of channels. So connections are only
43157f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or
43167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * C_DISCONN_PEND state. No C_IDLE state.
43177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * C_CONN_PEND state: Connection establishment in progress to the server.
43187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
43197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * It has an RC channel associated with it. ibt_post_send/recv are allowed
43207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * only in this state.
43217f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * C_ERROR_CONN state: A connection transitions to this state when WRs on the
43227f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
43237f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
43247f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when
43257f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * c_ref drops to 0 (this indicates that RPC has no more references to this
43267f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * connection), the connection should be destroyed. A connection transitions
43277f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * into this state when it is being destroyed.
43287f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  */
43297f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States /* ARGSUSED */
43307f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static rdma_stat
4331ed629aefSSiddheshwar Mahesh rib_connect(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
4332ed629aefSSiddheshwar Mahesh     int addr_type, rpcib_ping_t *rpt, CONN **conn)
43337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
43347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	CONN *cn;
43357f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	int status;
43367f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca;
43377f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_qp_t *qp;
43387f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	int s_addr_len;
43397f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	char *s_addr_buf;
43407f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
43417f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
43427f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
43437f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rw_enter(&hca->state_lock, RW_READER);
43447f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (hca->state != HCA_DETACHED) {
43457f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			status = rib_find_hca_connection(hca, s_svcaddr,
43467f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			    d_svcaddr, conn);
43477f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&hca->state_lock);
43487f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			if ((status == RDMA_INTR) || (status == RDMA_SUCCESS)) {
43497f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				rw_exit(&rib_stat->hcas_list_lock);
43507f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 				return (status);
43517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			}
43527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		} else
43537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			rw_exit(&hca->state_lock);
43547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
43557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
43567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
43577f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
43587f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * No existing connection found, establish a new connection.
43597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 */
4360ed629aefSSiddheshwar Mahesh 	bzero(rpt, sizeof (rpcib_ping_t));
43617f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
4362ed629aefSSiddheshwar Mahesh 	status = rib_ping_srv(addr_type, d_svcaddr, rpt);
43637f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (status != RDMA_SUCCESS) {
43647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		return (RDMA_FAILED);
43657f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
4366ed629aefSSiddheshwar Mahesh 	hca = rpt->hca;
43677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
4368ed629aefSSiddheshwar Mahesh 	if (rpt->srcip.family == AF_INET) {
4369ed629aefSSiddheshwar Mahesh 		s_addr_len = sizeof (rpt->srcip.un.ip4addr);
4370ed629aefSSiddheshwar Mahesh 		s_addr_buf = (char *)&rpt->srcip.un.ip4addr;
4371ed629aefSSiddheshwar Mahesh 	} else if (rpt->srcip.family == AF_INET6) {
4372ed629aefSSiddheshwar Mahesh 		s_addr_len = sizeof (rpt->srcip.un.ip6addr);
4373ed629aefSSiddheshwar Mahesh 		s_addr_buf = (char *)&rpt->srcip.un.ip6addr;
4374ed629aefSSiddheshwar Mahesh 	} else {
43757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		return (RDMA_FAILED);
4376ed629aefSSiddheshwar Mahesh 	}
43777f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
43787f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
43797c478bd9Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
43807c478bd9Sstevel@tonic-gate 	 */
43817f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (rib_clnt_create_chan(hca, d_svcaddr, &qp) != RDMA_SUCCESS) {
43827c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
43837c478bd9Sstevel@tonic-gate 	}
43847c478bd9Sstevel@tonic-gate 	cn = qptoc(qp);
43857c478bd9Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
43867c478bd9Sstevel@tonic-gate 	cn->c_ref = 1;
43877c478bd9Sstevel@tonic-gate 
43887f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	cn->c_laddr.buf = kmem_alloc(s_addr_len, KM_SLEEP);
43897f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	bcopy(s_addr_buf, cn->c_laddr.buf, s_addr_len);
43907f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	cn->c_laddr.len = cn->c_laddr.maxlen = s_addr_len;
43917f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
43927523bef8SSiddheshwar Mahesh 	if (rpt->srcip.family == AF_INET) {
43937523bef8SSiddheshwar Mahesh 		cn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP) + 1, KM_SLEEP);
43947523bef8SSiddheshwar Mahesh 		(void) strcpy(cn->c_netid, RIBNETID_TCP);
4395*0a4b0810SKaren Rochford 
4396*0a4b0810SKaren Rochford 		cn->c_addrmask.len = cn->c_addrmask.maxlen =
4397*0a4b0810SKaren Rochford 		    sizeof (struct sockaddr_in);
4398*0a4b0810SKaren Rochford 		cn->c_addrmask.buf = kmem_zalloc(cn->c_addrmask.len, KM_SLEEP);
4399*0a4b0810SKaren Rochford 
4400*0a4b0810SKaren Rochford 		((struct sockaddr_in *)cn->c_addrmask.buf)->sin_addr.s_addr =
4401*0a4b0810SKaren Rochford 		    (uint32_t)~0;
4402*0a4b0810SKaren Rochford 		((struct sockaddr_in *)cn->c_addrmask.buf)->sin_family =
4403*0a4b0810SKaren Rochford 		    (ushort_t)~0;
4404*0a4b0810SKaren Rochford 
44057523bef8SSiddheshwar Mahesh 	} else {
44067523bef8SSiddheshwar Mahesh 		cn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP6) + 1, KM_SLEEP);
44077523bef8SSiddheshwar Mahesh 		(void) strcpy(cn->c_netid, RIBNETID_TCP6);
4408*0a4b0810SKaren Rochford 
4409*0a4b0810SKaren Rochford 		cn->c_addrmask.len = cn->c_addrmask.maxlen =
4410*0a4b0810SKaren Rochford 		    sizeof (struct sockaddr_in6);
4411*0a4b0810SKaren Rochford 		cn->c_addrmask.buf = kmem_zalloc(cn->c_addrmask.len, KM_SLEEP);
4412*0a4b0810SKaren Rochford 
4413*0a4b0810SKaren Rochford 		(void) memset(
4414*0a4b0810SKaren Rochford 		    &((struct sockaddr_in6 *)cn->c_addrmask.buf)->sin6_addr,
4415*0a4b0810SKaren Rochford 		    (uchar_t)~0, sizeof (struct in6_addr));
4416*0a4b0810SKaren Rochford 		((struct sockaddr_in6 *)cn->c_addrmask.buf)->sin6_family =
4417*0a4b0810SKaren Rochford 		    (sa_family_t)~0;
44187523bef8SSiddheshwar Mahesh 	}
44197523bef8SSiddheshwar Mahesh 
44207c478bd9Sstevel@tonic-gate 	/*
44217c478bd9Sstevel@tonic-gate 	 * Add to conn list.
44227c478bd9Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
44237c478bd9Sstevel@tonic-gate 	 * another thread might have created the connection we are
44247c478bd9Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
44257c478bd9Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
44267c478bd9Sstevel@tonic-gate 	 * of one. If we really want to close that window,
44277c478bd9Sstevel@tonic-gate 	 * then need to check the list after acquiring the
44287c478bd9Sstevel@tonic-gate 	 * WRITER lock.
44297c478bd9Sstevel@tonic-gate 	 */
44307c478bd9Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
4431ed629aefSSiddheshwar Mahesh 	status = rib_conn_to_srv(hca, qp, rpt);
44327c478bd9Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
44339c86cdcdSSiddheshwar Mahesh 
44349c86cdcdSSiddheshwar Mahesh 	if (cn->c_flags & C_CLOSE_PENDING) {
44359c86cdcdSSiddheshwar Mahesh 		/*
44369c86cdcdSSiddheshwar Mahesh 		 * This handles a case where the module or
44379c86cdcdSSiddheshwar Mahesh 		 * HCA detached in the time a connection is
44389c86cdcdSSiddheshwar Mahesh 		 * established. In such a case close the
44399c86cdcdSSiddheshwar Mahesh 		 * connection immediately if this is the
44409c86cdcdSSiddheshwar Mahesh 		 * only reference.
44419c86cdcdSSiddheshwar Mahesh 		 */
44429c86cdcdSSiddheshwar Mahesh 		if (cn->c_ref == 1) {
44439c86cdcdSSiddheshwar Mahesh 			cn->c_ref--;
44449c86cdcdSSiddheshwar Mahesh 			cn->c_state = C_DISCONN_PEND;
44459c86cdcdSSiddheshwar Mahesh 			mutex_exit(&cn->c_lock);
44469c86cdcdSSiddheshwar Mahesh 			rib_conn_close((void *)cn);
44479c86cdcdSSiddheshwar Mahesh 			return (RDMA_FAILED);
44489c86cdcdSSiddheshwar Mahesh 		}
44499c86cdcdSSiddheshwar Mahesh 
44509c86cdcdSSiddheshwar Mahesh 		/*
44519c86cdcdSSiddheshwar Mahesh 		 * Connection to be closed later when c_ref = 0
44529c86cdcdSSiddheshwar Mahesh 		 */
44539c86cdcdSSiddheshwar Mahesh 		status = RDMA_FAILED;
44549c86cdcdSSiddheshwar Mahesh 	}
44559c86cdcdSSiddheshwar Mahesh 
44567c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
44577c478bd9Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
44587c478bd9Sstevel@tonic-gate 		*conn = cn;
44597c478bd9Sstevel@tonic-gate 	} else {
44600a701b1eSRobert Gordon 		cn->c_state = C_ERROR_CONN;
44617c478bd9Sstevel@tonic-gate 		cn->c_ref--;
44627c478bd9Sstevel@tonic-gate 	}
44639c86cdcdSSiddheshwar Mahesh 	cv_signal(&cn->c_cv);
44647c478bd9Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
44657c478bd9Sstevel@tonic-gate 	return (status);
44667c478bd9Sstevel@tonic-gate }
44677c478bd9Sstevel@tonic-gate 
4468065714dcSSiddheshwar Mahesh static void
4469065714dcSSiddheshwar Mahesh rib_conn_close(void *rarg)
44707c478bd9Sstevel@tonic-gate {
4471065714dcSSiddheshwar Mahesh 	CONN *conn = (CONN *)rarg;
44727c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
44737c478bd9Sstevel@tonic-gate 
44747c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
4475065714dcSSiddheshwar Mahesh 	if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
44767c478bd9Sstevel@tonic-gate 
4477065714dcSSiddheshwar Mahesh 		conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
44789c86cdcdSSiddheshwar Mahesh 
44797c478bd9Sstevel@tonic-gate 		/*
4480065714dcSSiddheshwar Mahesh 		 * Live connection in CONNECTED state.
44817c478bd9Sstevel@tonic-gate 		 */
4482065714dcSSiddheshwar Mahesh 		if (conn->c_state == C_CONNECTED) {
4483065714dcSSiddheshwar Mahesh 			conn->c_state = C_ERROR_CONN;
4484065714dcSSiddheshwar Mahesh 		}
44857c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4486065714dcSSiddheshwar Mahesh 
4487065714dcSSiddheshwar Mahesh 		rib_close_a_channel(conn);
4488065714dcSSiddheshwar Mahesh 
4489065714dcSSiddheshwar Mahesh 		mutex_enter(&conn->c_lock);
4490065714dcSSiddheshwar Mahesh 		conn->c_flags &= ~C_CLOSE_PENDING;
4491065714dcSSiddheshwar Mahesh 	}
4492065714dcSSiddheshwar Mahesh 
4493065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
4494065714dcSSiddheshwar Mahesh 
44957c478bd9Sstevel@tonic-gate 	if (qp->mode == RIB_SERVER)
44967c478bd9Sstevel@tonic-gate 		(void) rib_disconnect_channel(conn,
44977c478bd9Sstevel@tonic-gate 		    &qp->hca->srv_conn_list);
44987c478bd9Sstevel@tonic-gate 	else
44997c478bd9Sstevel@tonic-gate 		(void) rib_disconnect_channel(conn,
45007c478bd9Sstevel@tonic-gate 		    &qp->hca->cl_conn_list);
4501065714dcSSiddheshwar Mahesh }
4502065714dcSSiddheshwar Mahesh 
4503065714dcSSiddheshwar Mahesh static void
4504065714dcSSiddheshwar Mahesh rib_conn_timeout_call(void *carg)
4505065714dcSSiddheshwar Mahesh {
4506065714dcSSiddheshwar Mahesh 	time_t idle_time;
4507065714dcSSiddheshwar Mahesh 	CONN *conn = (CONN *)carg;
4508065714dcSSiddheshwar Mahesh 	rib_hca_t *hca = ctoqp(conn)->hca;
4509065714dcSSiddheshwar Mahesh 	int error;
4510065714dcSSiddheshwar Mahesh 
4511065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
4512065714dcSSiddheshwar Mahesh 	if ((conn->c_ref > 0) ||
4513065714dcSSiddheshwar Mahesh 	    (conn->c_state == C_DISCONN_PEND)) {
4514065714dcSSiddheshwar Mahesh 		conn->c_timeout = NULL;
4515065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4516065714dcSSiddheshwar Mahesh 		return;
4517065714dcSSiddheshwar Mahesh 	}
4518065714dcSSiddheshwar Mahesh 
4519065714dcSSiddheshwar Mahesh 	idle_time = (gethrestime_sec() - conn->c_last_used);
4520065714dcSSiddheshwar Mahesh 
4521065714dcSSiddheshwar Mahesh 	if ((idle_time <= rib_conn_timeout) &&
4522065714dcSSiddheshwar Mahesh 	    (conn->c_state != C_ERROR_CONN)) {
4523065714dcSSiddheshwar Mahesh 		/*
4524065714dcSSiddheshwar Mahesh 		 * There was activity after the last timeout.
4525065714dcSSiddheshwar Mahesh 		 * Extend the conn life. Unless the conn is
4526065714dcSSiddheshwar Mahesh 		 * already in error state.
4527065714dcSSiddheshwar Mahesh 		 */
4528065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4529065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(rib_conn_timeout - idle_time));
4530065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4531065714dcSSiddheshwar Mahesh 		return;
4532065714dcSSiddheshwar Mahesh 	}
4533065714dcSSiddheshwar Mahesh 
4534065714dcSSiddheshwar Mahesh 	error = ddi_taskq_dispatch(hca->cleanup_helper, rib_conn_close,
4535065714dcSSiddheshwar Mahesh 	    (void *)conn, DDI_NOSLEEP);
4536065714dcSSiddheshwar Mahesh 
4537065714dcSSiddheshwar Mahesh 	/*
4538065714dcSSiddheshwar Mahesh 	 * If taskq dispatch fails above, then reset the timeout
4539065714dcSSiddheshwar Mahesh 	 * to try again after 10 secs.
4540065714dcSSiddheshwar Mahesh 	 */
4541065714dcSSiddheshwar Mahesh 
4542065714dcSSiddheshwar Mahesh 	if (error != DDI_SUCCESS) {
4543065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4544065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(RDMA_CONN_REAP_RETRY));
4545065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4546065714dcSSiddheshwar Mahesh 		return;
4547065714dcSSiddheshwar Mahesh 	}
4548065714dcSSiddheshwar Mahesh 
4549065714dcSSiddheshwar Mahesh 	conn->c_state = C_DISCONN_PEND;
4550065714dcSSiddheshwar Mahesh 	mutex_exit(&conn->c_lock);
4551065714dcSSiddheshwar Mahesh }
4552065714dcSSiddheshwar Mahesh 
4553065714dcSSiddheshwar Mahesh static rdma_stat
4554065714dcSSiddheshwar Mahesh rib_conn_release(CONN *conn)
4555065714dcSSiddheshwar Mahesh {
4556065714dcSSiddheshwar Mahesh 	mutex_enter(&conn->c_lock);
45579c86cdcdSSiddheshwar Mahesh 	return (rib_conn_release_locked(conn));
45589c86cdcdSSiddheshwar Mahesh }
45599c86cdcdSSiddheshwar Mahesh 
45609c86cdcdSSiddheshwar Mahesh /*
45619c86cdcdSSiddheshwar Mahesh  * Expects conn->c_lock to be held on entry.
45629c86cdcdSSiddheshwar Mahesh  * c_lock released on return
45639c86cdcdSSiddheshwar Mahesh  */
45649c86cdcdSSiddheshwar Mahesh static rdma_stat
45659c86cdcdSSiddheshwar Mahesh rib_conn_release_locked(CONN *conn)
45669c86cdcdSSiddheshwar Mahesh {
4567065714dcSSiddheshwar Mahesh 	conn->c_ref--;
4568065714dcSSiddheshwar Mahesh 
4569065714dcSSiddheshwar Mahesh 	conn->c_last_used = gethrestime_sec();
4570065714dcSSiddheshwar Mahesh 	if (conn->c_ref > 0) {
4571065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
45727c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
45737c478bd9Sstevel@tonic-gate 	}
4574065714dcSSiddheshwar Mahesh 
4575065714dcSSiddheshwar Mahesh 	/*
4576065714dcSSiddheshwar Mahesh 	 * If a conn is C_ERROR_CONN, close the channel.
4577065714dcSSiddheshwar Mahesh 	 */
4578065714dcSSiddheshwar Mahesh 	if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) {
4579065714dcSSiddheshwar Mahesh 		conn->c_state = C_DISCONN_PEND;
4580065714dcSSiddheshwar Mahesh 		mutex_exit(&conn->c_lock);
4581065714dcSSiddheshwar Mahesh 		rib_conn_close((void *)conn);
4582065714dcSSiddheshwar Mahesh 		return (RDMA_SUCCESS);
4583065714dcSSiddheshwar Mahesh 	}
4584065714dcSSiddheshwar Mahesh 
4585065714dcSSiddheshwar Mahesh 	/*
4586065714dcSSiddheshwar Mahesh 	 * c_ref == 0, set a timeout for conn release
4587065714dcSSiddheshwar Mahesh 	 */
4588065714dcSSiddheshwar Mahesh 
4589065714dcSSiddheshwar Mahesh 	if (conn->c_timeout == NULL) {
4590065714dcSSiddheshwar Mahesh 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
4591065714dcSSiddheshwar Mahesh 		    SEC_TO_TICK(rib_conn_timeout));
4592065714dcSSiddheshwar Mahesh 	}
4593065714dcSSiddheshwar Mahesh 
45947c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
45957c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
45967c478bd9Sstevel@tonic-gate }
45977c478bd9Sstevel@tonic-gate 
45987c478bd9Sstevel@tonic-gate /*
45997c478bd9Sstevel@tonic-gate  * Add at front of list
46007c478bd9Sstevel@tonic-gate  */
46017c478bd9Sstevel@tonic-gate static struct rdma_done_list *
46027c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
46037c478bd9Sstevel@tonic-gate {
46047c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
46057c478bd9Sstevel@tonic-gate 
46067c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46077c478bd9Sstevel@tonic-gate 
46087c478bd9Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
46097c478bd9Sstevel@tonic-gate 	rd->xid = xid;
46107c478bd9Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
46117c478bd9Sstevel@tonic-gate 
46127c478bd9Sstevel@tonic-gate 	rd->prev = NULL;
46137c478bd9Sstevel@tonic-gate 	rd->next = qp->rdlist;
46147c478bd9Sstevel@tonic-gate 	if (qp->rdlist != NULL)
46157c478bd9Sstevel@tonic-gate 		qp->rdlist->prev = rd;
46167c478bd9Sstevel@tonic-gate 	qp->rdlist = rd;
46177c478bd9Sstevel@tonic-gate 
46187c478bd9Sstevel@tonic-gate 	return (rd);
46197c478bd9Sstevel@tonic-gate }
46207c478bd9Sstevel@tonic-gate 
46217c478bd9Sstevel@tonic-gate static void
46227c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
46237c478bd9Sstevel@tonic-gate {
46247c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r;
46257c478bd9Sstevel@tonic-gate 
46267c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46277c478bd9Sstevel@tonic-gate 
46287c478bd9Sstevel@tonic-gate 	r = rd->next;
46297c478bd9Sstevel@tonic-gate 	if (r != NULL) {
46307c478bd9Sstevel@tonic-gate 		r->prev = rd->prev;
46317c478bd9Sstevel@tonic-gate 	}
46327c478bd9Sstevel@tonic-gate 
46337c478bd9Sstevel@tonic-gate 	r = rd->prev;
46347c478bd9Sstevel@tonic-gate 	if (r != NULL) {
46357c478bd9Sstevel@tonic-gate 		r->next = rd->next;
46367c478bd9Sstevel@tonic-gate 	} else {
46377c478bd9Sstevel@tonic-gate 		qp->rdlist = rd->next;
46387c478bd9Sstevel@tonic-gate 	}
46397c478bd9Sstevel@tonic-gate 
46407c478bd9Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
46417c478bd9Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
46427c478bd9Sstevel@tonic-gate }
46437c478bd9Sstevel@tonic-gate 
46447c478bd9Sstevel@tonic-gate static void
46457c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
46467c478bd9Sstevel@tonic-gate {
46477c478bd9Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
46487c478bd9Sstevel@tonic-gate 
46497c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
46507c478bd9Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
46517c478bd9Sstevel@tonic-gate 		n = r->next;
46527c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, r);
46537c478bd9Sstevel@tonic-gate 	}
46547c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
46557c478bd9Sstevel@tonic-gate }
46567c478bd9Sstevel@tonic-gate 
46577c478bd9Sstevel@tonic-gate static void
46587c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
46597c478bd9Sstevel@tonic-gate {
46607c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
46617c478bd9Sstevel@tonic-gate 
46627c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46637c478bd9Sstevel@tonic-gate 
46647c478bd9Sstevel@tonic-gate 	while (r) {
46657c478bd9Sstevel@tonic-gate 		if (r->xid == xid) {
46667c478bd9Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
46677c478bd9Sstevel@tonic-gate 			return;
46687c478bd9Sstevel@tonic-gate 		} else {
46697c478bd9Sstevel@tonic-gate 			r = r->next;
46707c478bd9Sstevel@tonic-gate 		}
46717c478bd9Sstevel@tonic-gate 	}
46720a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid,
46730a701b1eSRobert Gordon 	    int, xid);
46747c478bd9Sstevel@tonic-gate }
46757c478bd9Sstevel@tonic-gate 
4676065714dcSSiddheshwar Mahesh /*
4677065714dcSSiddheshwar Mahesh  * Expects conn->c_lock to be held by the caller.
4678065714dcSSiddheshwar Mahesh  */
4679065714dcSSiddheshwar Mahesh 
4680065714dcSSiddheshwar Mahesh static void
4681065714dcSSiddheshwar Mahesh rib_close_a_channel(CONN *conn)
4682065714dcSSiddheshwar Mahesh {
4683065714dcSSiddheshwar Mahesh 	rib_qp_t	*qp;
4684065714dcSSiddheshwar Mahesh 	qp = ctoqp(conn);
4685065714dcSSiddheshwar Mahesh 
4686065714dcSSiddheshwar Mahesh 	if (qp->qp_hdl == NULL) {
4687065714dcSSiddheshwar Mahesh 		/* channel already freed */
4688065714dcSSiddheshwar Mahesh 		return;
4689065714dcSSiddheshwar Mahesh 	}
4690065714dcSSiddheshwar Mahesh 
4691065714dcSSiddheshwar Mahesh 	/*
4692065714dcSSiddheshwar Mahesh 	 * Call ibt_close_rc_channel in blocking mode
4693065714dcSSiddheshwar Mahesh 	 * with no callbacks.
4694065714dcSSiddheshwar Mahesh 	 */
4695065714dcSSiddheshwar Mahesh 	(void) ibt_close_rc_channel(qp->qp_hdl, IBT_NOCALLBACKS,
4696065714dcSSiddheshwar Mahesh 	    NULL, 0, NULL, NULL, 0);
4697065714dcSSiddheshwar Mahesh }
46987c478bd9Sstevel@tonic-gate 
46997c478bd9Sstevel@tonic-gate /*
47007c478bd9Sstevel@tonic-gate  * Goes through all connections and closes the channel
47017c478bd9Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
47027c478bd9Sstevel@tonic-gate  * flushed.
47037c478bd9Sstevel@tonic-gate  */
47047c478bd9Sstevel@tonic-gate static void
47057c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
47067c478bd9Sstevel@tonic-gate {
4707065714dcSSiddheshwar Mahesh 	CONN 		*conn, *tmp;
47087c478bd9Sstevel@tonic-gate 
47097c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
47107c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
47117c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
47127c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
4713065714dcSSiddheshwar Mahesh 		tmp = conn->c_next;
4714065714dcSSiddheshwar Mahesh 		if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
4715065714dcSSiddheshwar Mahesh 
47169c86cdcdSSiddheshwar Mahesh 			if (conn->c_state == C_CONN_PEND) {
47179c86cdcdSSiddheshwar Mahesh 				conn->c_flags |= C_CLOSE_PENDING;
47189c86cdcdSSiddheshwar Mahesh 				goto next;
47199c86cdcdSSiddheshwar Mahesh 			}
47209c86cdcdSSiddheshwar Mahesh 
4721065714dcSSiddheshwar Mahesh 			conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
4722065714dcSSiddheshwar Mahesh 
47237c478bd9Sstevel@tonic-gate 			/*
47247c478bd9Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
47257c478bd9Sstevel@tonic-gate 			 */
4726065714dcSSiddheshwar Mahesh 			if (conn->c_state == C_CONNECTED)
47270a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
4728065714dcSSiddheshwar Mahesh 			mutex_exit(&conn->c_lock);
4729065714dcSSiddheshwar Mahesh 
4730065714dcSSiddheshwar Mahesh 			rib_close_a_channel(conn);
4731065714dcSSiddheshwar Mahesh 
4732065714dcSSiddheshwar Mahesh 			mutex_enter(&conn->c_lock);
4733065714dcSSiddheshwar Mahesh 			conn->c_flags &= ~C_CLOSE_PENDING;
4734065714dcSSiddheshwar Mahesh 			/* Signal a pending rib_disconnect_channel() */
4735065714dcSSiddheshwar Mahesh 			cv_signal(&conn->c_cv);
47367c478bd9Sstevel@tonic-gate 		}
47379c86cdcdSSiddheshwar Mahesh next:
47387c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4739065714dcSSiddheshwar Mahesh 		conn = tmp;
47407c478bd9Sstevel@tonic-gate 	}
47417c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
47427c478bd9Sstevel@tonic-gate }
47437c478bd9Sstevel@tonic-gate 
47447c478bd9Sstevel@tonic-gate /*
47457c478bd9Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
47467c478bd9Sstevel@tonic-gate  */
47477c478bd9Sstevel@tonic-gate static void
47487c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
47497c478bd9Sstevel@tonic-gate {
47507c478bd9Sstevel@tonic-gate 	CONN 		*conn;
47517c478bd9Sstevel@tonic-gate 
47527c478bd9Sstevel@tonic-gate top:
47537c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
47547c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
47557c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
47567c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
47577c478bd9Sstevel@tonic-gate 
47587c478bd9Sstevel@tonic-gate 		/*
47597c478bd9Sstevel@tonic-gate 		 * At this point connection is either in ERROR
47607c478bd9Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
47617c478bd9Sstevel@tonic-gate 		 * then some other thread is culling that connection.
47627c478bd9Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
47637c478bd9Sstevel@tonic-gate 		 */
47647c478bd9Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
47657c478bd9Sstevel@tonic-gate 		    conn->c_state != C_DISCONN_PEND) {
47667c478bd9Sstevel@tonic-gate 			/*
47677c478bd9Sstevel@tonic-gate 			 * Cull the connection
47687c478bd9Sstevel@tonic-gate 			 */
47697c478bd9Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
47707c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
47717c478bd9Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
47727c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
47737c478bd9Sstevel@tonic-gate 			goto top;
47747c478bd9Sstevel@tonic-gate 		} else {
47757c478bd9Sstevel@tonic-gate 			/*
47767c478bd9Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
47777c478bd9Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
47787c478bd9Sstevel@tonic-gate 			 */
47797c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
47807c478bd9Sstevel@tonic-gate 		}
47817c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
47827c478bd9Sstevel@tonic-gate 	}
47837c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
47847c478bd9Sstevel@tonic-gate 
47857c478bd9Sstevel@tonic-gate 	/*
47867c478bd9Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
47877c478bd9Sstevel@tonic-gate 	 */
47887c478bd9Sstevel@tonic-gate }
47897c478bd9Sstevel@tonic-gate 
47907c478bd9Sstevel@tonic-gate /*
4791065714dcSSiddheshwar Mahesh  * Free all the HCA resources and close
4792065714dcSSiddheshwar Mahesh  * the hca.
4793065714dcSSiddheshwar Mahesh  */
4794065714dcSSiddheshwar Mahesh 
4795065714dcSSiddheshwar Mahesh static void
4796065714dcSSiddheshwar Mahesh rib_free_hca(rib_hca_t *hca)
4797065714dcSSiddheshwar Mahesh {
4798065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
4799065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
4800065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
4801065714dcSSiddheshwar Mahesh 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
4802065714dcSSiddheshwar Mahesh 
4803065714dcSSiddheshwar Mahesh 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
4804065714dcSSiddheshwar Mahesh 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
4805065714dcSSiddheshwar Mahesh 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
4806065714dcSSiddheshwar Mahesh 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
4807065714dcSSiddheshwar Mahesh 
4808065714dcSSiddheshwar Mahesh 	rib_rbufpool_destroy(hca, RECV_BUFFER);
4809065714dcSSiddheshwar Mahesh 	rib_rbufpool_destroy(hca, SEND_BUFFER);
4810065714dcSSiddheshwar Mahesh 	rib_destroy_cache(hca);
4811065714dcSSiddheshwar Mahesh 	if (rib_mod.rdma_count == 0)
4812c1374a13SSurya Prakki 		(void) rdma_unregister_mod(&rib_mod);
4813065714dcSSiddheshwar Mahesh 	(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
4814065714dcSSiddheshwar Mahesh 	(void) ibt_close_hca(hca->hca_hdl);
4815065714dcSSiddheshwar Mahesh 	hca->hca_hdl = NULL;
4816065714dcSSiddheshwar Mahesh }
4817065714dcSSiddheshwar Mahesh 
48187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
48197c478bd9Sstevel@tonic-gate static void
48207f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rib_stop_hca_services(rib_hca_t *hca)
48217c478bd9Sstevel@tonic-gate {
48227c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
48237c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
48247c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
482551f34d4bSRajkumar Sivaprakasam 
482651f34d4bSRajkumar Sivaprakasam 	rib_purge_connlist(&hca->cl_conn_list);
482751f34d4bSRajkumar Sivaprakasam 	rib_purge_connlist(&hca->srv_conn_list);
482851f34d4bSRajkumar Sivaprakasam 
48297f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if ((rib_stat->hcas_list == NULL) && stats_enabled) {
483051f34d4bSRajkumar Sivaprakasam 		kstat_delete_byname_zone("unix", 0, "rpcib_cache",
483151f34d4bSRajkumar Sivaprakasam 		    GLOBAL_ZONEID);
48327f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		stats_enabled = FALSE;
483351f34d4bSRajkumar Sivaprakasam 	}
48347c478bd9Sstevel@tonic-gate 
48357c478bd9Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
48367c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
48377c478bd9Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
48387c478bd9Sstevel@tonic-gate 	    hca->cl_conn_list.conn_hd == NULL) {
48397c478bd9Sstevel@tonic-gate 		/*
48407c478bd9Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
48417c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
48427c478bd9Sstevel@tonic-gate 		 */
4843065714dcSSiddheshwar Mahesh 		rib_free_hca(hca);
48447c478bd9Sstevel@tonic-gate 	}
48457c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
48467c478bd9Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
48477c478bd9Sstevel@tonic-gate 
48487c478bd9Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
48497c478bd9Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
48507c478bd9Sstevel@tonic-gate 		while (hca->inuse)
48517c478bd9Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
48527c478bd9Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
485351f34d4bSRajkumar Sivaprakasam 
4854065714dcSSiddheshwar Mahesh 		rib_free_hca(hca);
4855065714dcSSiddheshwar Mahesh 	}
48567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_destroy(&hca->bound_services_lock);
485751f34d4bSRajkumar Sivaprakasam 
4858065714dcSSiddheshwar Mahesh 	if (hca->cleanup_helper != NULL) {
4859065714dcSSiddheshwar Mahesh 		ddi_taskq_destroy(hca->cleanup_helper);
4860065714dcSSiddheshwar Mahesh 		hca->cleanup_helper = NULL;
48617c478bd9Sstevel@tonic-gate 	}
48627c478bd9Sstevel@tonic-gate }
48630a701b1eSRobert Gordon 
48647f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States /*
48657f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  * Cleans and closes up all uses of the HCA
48667f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States  */
48677f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static void
48689c86cdcdSSiddheshwar Mahesh rib_detach_hca(ibt_hca_hdl_t hca_hdl)
48697f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
48709c86cdcdSSiddheshwar Mahesh 	rib_hca_t *hca = NULL;
48717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t **hcap;
48727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
48739c86cdcdSSiddheshwar Mahesh 	rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
48749c86cdcdSSiddheshwar Mahesh 	for (hcap = &rib_stat->hcas_list; *hcap; hcap = &(*hcap)->next) {
48759c86cdcdSSiddheshwar Mahesh 		hca = *hcap;
48769c86cdcdSSiddheshwar Mahesh 		rw_enter(&hca->state_lock, RW_WRITER);
48779c86cdcdSSiddheshwar Mahesh 		if (hca->hca_hdl == hca_hdl) {
48789c86cdcdSSiddheshwar Mahesh 			/*
48799c86cdcdSSiddheshwar Mahesh 			 * Mark as detached and remove from
48809c86cdcdSSiddheshwar Mahesh 			 * hca list.
48819c86cdcdSSiddheshwar Mahesh 			 */
48829c86cdcdSSiddheshwar Mahesh 			hca->state = HCA_DETACHED;
48839c86cdcdSSiddheshwar Mahesh 			*hcap = hca->next;
48849c86cdcdSSiddheshwar Mahesh 			rib_stat->nhca_inited--;
48859c86cdcdSSiddheshwar Mahesh 			rib_mod.rdma_count--;
48869c86cdcdSSiddheshwar Mahesh 			rw_exit(&hca->state_lock);
48879c86cdcdSSiddheshwar Mahesh 			break;
48889c86cdcdSSiddheshwar Mahesh 		}
48899c86cdcdSSiddheshwar Mahesh 		rw_exit(&hca->state_lock);
48909c86cdcdSSiddheshwar Mahesh 	}
48919c86cdcdSSiddheshwar Mahesh 	rw_exit(&rib_stat->hcas_list_lock);
48929c86cdcdSSiddheshwar Mahesh 
48939c86cdcdSSiddheshwar Mahesh 	if (hca == NULL)
48949c86cdcdSSiddheshwar Mahesh 		return;
48959c86cdcdSSiddheshwar Mahesh 	ASSERT(hca->hca_hdl == hca_hdl);
48969c86cdcdSSiddheshwar Mahesh 
48977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	/*
48987f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Stop all services on the HCA
48997f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Go through cl_conn_list and close all rc_channels
49007f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Go through svr_conn_list and close all rc_channels
49017f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Free connections whose c_ref has dropped to 0
49027f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Destroy all CQs
49037f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Deregister and released all buffer pool memory after all
49047f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * connections are destroyed
49057f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * Free the protection domain
49067f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 * ibt_close_hca()
49077f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	 */
49087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_stop_hca_services(hca);
49097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	kmem_free(hca, sizeof (*hca));
49117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States }
49127f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49130a701b1eSRobert Gordon static void
49140a701b1eSRobert Gordon rib_server_side_cache_reclaim(void *argp)
49150a701b1eSRobert Gordon {
49160a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
49170a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
49180a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
49190a701b1eSRobert Gordon 
49200a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
49210a701b1eSRobert Gordon 	rcas = avl_first(&hca->avl_tree);
49220a701b1eSRobert Gordon 	if (rcas != NULL)
49230a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
49240a701b1eSRobert Gordon 
49250a701b1eSRobert Gordon 	while (rcas != NULL) {
49260a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
49270a701b1eSRobert Gordon 			rcas->elements--;
49280a701b1eSRobert Gordon 			rb = rcas->r.forw;
49290a701b1eSRobert Gordon 			remque(rb);
49300a701b1eSRobert Gordon 			if (rb->registered)
49310a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
49320a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
49337f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49347f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			hca->cache_allocation -= rb->lrc_len;
49350a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
49360a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
49370a701b1eSRobert Gordon 		}
49380a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
49390a701b1eSRobert Gordon 		kmem_cache_free(hca->server_side_cache, rcas);
49400a701b1eSRobert Gordon 		rcas = avl_first(&hca->avl_tree);
49410a701b1eSRobert Gordon 		if (rcas != NULL)
49420a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
49430a701b1eSRobert Gordon 	}
49440a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
49450a701b1eSRobert Gordon }
49460a701b1eSRobert Gordon 
49470a701b1eSRobert Gordon static void
49480a701b1eSRobert Gordon rib_server_side_cache_cleanup(void *argp)
49490a701b1eSRobert Gordon {
49500a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
49510a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
49520a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
49530a701b1eSRobert Gordon 
49547f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_enter(&hca->cache_allocation_lock);
49557f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	if (hca->cache_allocation < cache_limit) {
49567f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_exit(&hca->cache_allocation_lock);
49570a701b1eSRobert Gordon 		return;
49580a701b1eSRobert Gordon 	}
49597f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_exit(&hca->cache_allocation_lock);
49600a701b1eSRobert Gordon 
49610a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
49620a701b1eSRobert Gordon 	rcas = avl_last(&hca->avl_tree);
49630a701b1eSRobert Gordon 	if (rcas != NULL)
49640a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
49650a701b1eSRobert Gordon 
49660a701b1eSRobert Gordon 	while (rcas != NULL) {
49670a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
49680a701b1eSRobert Gordon 			rcas->elements--;
49690a701b1eSRobert Gordon 			rb = rcas->r.forw;
49700a701b1eSRobert Gordon 			remque(rb);
49710a701b1eSRobert Gordon 			if (rb->registered)
49720a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
49730a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
49747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			hca->cache_allocation -= rb->lrc_len;
49767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49770a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
49780a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
49790a701b1eSRobert Gordon 		}
49800a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
498151f34d4bSRajkumar Sivaprakasam 		if (hca->server_side_cache) {
49820a701b1eSRobert Gordon 			kmem_cache_free(hca->server_side_cache, rcas);
498351f34d4bSRajkumar Sivaprakasam 		}
49847f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
49857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		if (hca->cache_allocation < cache_limit) {
49860a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
49870a701b1eSRobert Gordon 			return;
49880a701b1eSRobert Gordon 		}
49890a701b1eSRobert Gordon 
49900a701b1eSRobert Gordon 		rcas = avl_last(&hca->avl_tree);
49910a701b1eSRobert Gordon 		if (rcas != NULL)
49920a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
49930a701b1eSRobert Gordon 	}
49940a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
49950a701b1eSRobert Gordon }
49960a701b1eSRobert Gordon 
49970a701b1eSRobert Gordon static int
49980a701b1eSRobert Gordon avl_compare(const void *t1, const void *t2)
49990a701b1eSRobert Gordon {
50000a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
50010a701b1eSRobert Gordon 		return (0);
50020a701b1eSRobert Gordon 
50030a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
50040a701b1eSRobert Gordon 		return (-1);
50050a701b1eSRobert Gordon 
50060a701b1eSRobert Gordon 	return (1);
50070a701b1eSRobert Gordon }
50080a701b1eSRobert Gordon 
50090a701b1eSRobert Gordon static void
50100a701b1eSRobert Gordon rib_destroy_cache(rib_hca_t *hca)
50110a701b1eSRobert Gordon {
501251f34d4bSRajkumar Sivaprakasam 	if (hca->avl_init) {
501351f34d4bSRajkumar Sivaprakasam 		rib_server_side_cache_reclaim((void *)hca);
501451f34d4bSRajkumar Sivaprakasam 		if (hca->server_side_cache) {
50150a701b1eSRobert Gordon 			kmem_cache_destroy(hca->server_side_cache);
501651f34d4bSRajkumar Sivaprakasam 			hca->server_side_cache = NULL;
501751f34d4bSRajkumar Sivaprakasam 		}
50180a701b1eSRobert Gordon 		avl_destroy(&hca->avl_tree);
50197f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_destroy(&hca->cache_allocation_lock);
50200a701b1eSRobert Gordon 		rw_destroy(&hca->avl_rw_lock);
50210a701b1eSRobert Gordon 	}
50220a701b1eSRobert Gordon 	hca->avl_init = FALSE;
50230a701b1eSRobert Gordon }
50240a701b1eSRobert Gordon 
50250a701b1eSRobert Gordon static void
50260a701b1eSRobert Gordon rib_force_cleanup(void *hca)
50270a701b1eSRobert Gordon {
5028065714dcSSiddheshwar Mahesh 	if (((rib_hca_t *)hca)->cleanup_helper != NULL)
50290a701b1eSRobert Gordon 		(void) ddi_taskq_dispatch(
5030065714dcSSiddheshwar Mahesh 		    ((rib_hca_t *)hca)->cleanup_helper,
50310a701b1eSRobert Gordon 		    rib_server_side_cache_cleanup,
50320a701b1eSRobert Gordon 		    (void *)hca, DDI_NOSLEEP);
50330a701b1eSRobert Gordon }
50340a701b1eSRobert Gordon 
50350a701b1eSRobert Gordon static rib_lrc_entry_t *
50360a701b1eSRobert Gordon rib_get_cache_buf(CONN *conn, uint32_t len)
50370a701b1eSRobert Gordon {
50380a701b1eSRobert Gordon 	cache_avl_struct_t	cas, *rcas;
50390a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
50400a701b1eSRobert Gordon 	rib_lrc_entry_t *reply_buf;
50410a701b1eSRobert Gordon 	avl_index_t where = NULL;
50420a701b1eSRobert Gordon 	uint64_t c_alloc = 0;
50430a701b1eSRobert Gordon 
50440a701b1eSRobert Gordon 	if (!hca->avl_init)
50450a701b1eSRobert Gordon 		goto  error_alloc;
50460a701b1eSRobert Gordon 
50470a701b1eSRobert Gordon 	cas.len = len;
50480a701b1eSRobert Gordon 
50490a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
50500a701b1eSRobert Gordon 
50517f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_enter(&hca->cache_allocation_lock);
50527f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	c_alloc = hca->cache_allocation;
50537f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	mutex_exit(&hca->cache_allocation_lock);
50540a701b1eSRobert Gordon 
50550a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas,
50560a701b1eSRobert Gordon 	    &where)) == NULL) {
50570a701b1eSRobert Gordon 		/* Am I above the cache limit */
50580a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
50590a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
50600a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
50617f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_enter(&hca->cache_allocation_lock);
50627f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			hca->cache_misses_above_the_limit ++;
50637f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_exit(&hca->cache_allocation_lock);
50640a701b1eSRobert Gordon 
50650a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
50660a701b1eSRobert Gordon 			goto error_alloc;
50670a701b1eSRobert Gordon 		}
50680a701b1eSRobert Gordon 
50690a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
50700a701b1eSRobert Gordon 		rw_enter(&hca->avl_rw_lock, RW_WRITER);
50710a701b1eSRobert Gordon 
50720a701b1eSRobert Gordon 		/* Recheck to make sure no other thread added the entry in */
50730a701b1eSRobert Gordon 		if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,
50740a701b1eSRobert Gordon 		    &cas, &where)) == NULL) {
50750a701b1eSRobert Gordon 			/* Allocate an avl tree entry */
50760a701b1eSRobert Gordon 			rcas = (cache_avl_struct_t *)
50770a701b1eSRobert Gordon 			    kmem_cache_alloc(hca->server_side_cache, KM_SLEEP);
50780a701b1eSRobert Gordon 
50790a701b1eSRobert Gordon 			bzero(rcas, sizeof (cache_avl_struct_t));
50800a701b1eSRobert Gordon 			rcas->elements = 0;
50810a701b1eSRobert Gordon 			rcas->r.forw = &rcas->r;
50820a701b1eSRobert Gordon 			rcas->r.back = &rcas->r;
50830a701b1eSRobert Gordon 			rcas->len = len;
50840a701b1eSRobert Gordon 			mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
50850a701b1eSRobert Gordon 			avl_insert(&hca->avl_tree, rcas, where);
50860a701b1eSRobert Gordon 		}
50870a701b1eSRobert Gordon 	}
50880a701b1eSRobert Gordon 
50890a701b1eSRobert Gordon 	mutex_enter(&rcas->node_lock);
50900a701b1eSRobert Gordon 
50910a701b1eSRobert Gordon 	if (rcas->r.forw != &rcas->r && rcas->elements > 0) {
50920a701b1eSRobert Gordon 		reply_buf = rcas->r.forw;
50930a701b1eSRobert Gordon 		remque(reply_buf);
50940a701b1eSRobert Gordon 		rcas->elements--;
50950a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
50960a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
50977f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
50987f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_enter(&hca->cache_allocation_lock);
50997f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->cache_hits++;
51007f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->cache_allocation -= len;
51017f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_exit(&hca->cache_allocation_lock);
51020a701b1eSRobert Gordon 	} else {
51030a701b1eSRobert Gordon 		/* Am I above the cache limit */
51040a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
51050a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
51060a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
51070a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
51087f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
51097f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_enter(&hca->cache_allocation_lock);
51107f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			hca->cache_misses_above_the_limit++;
51117f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 			mutex_exit(&hca->cache_allocation_lock);
51120a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
51130a701b1eSRobert Gordon 			goto error_alloc;
51140a701b1eSRobert Gordon 		}
51150a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
51167f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_enter(&hca->cache_allocation_lock);
51177f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->cache_misses++;
51187f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_exit(&hca->cache_allocation_lock);
51190a701b1eSRobert Gordon 		/* Allocate a reply_buf entry */
51200a701b1eSRobert Gordon 		reply_buf = (rib_lrc_entry_t *)
51210a701b1eSRobert Gordon 		    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
51220a701b1eSRobert Gordon 		bzero(reply_buf, sizeof (rib_lrc_entry_t));
51230a701b1eSRobert Gordon 		reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
51240a701b1eSRobert Gordon 		reply_buf->lrc_len  = len;
51250a701b1eSRobert Gordon 		reply_buf->registered = FALSE;
51260a701b1eSRobert Gordon 		reply_buf->avl_node = (void *)rcas;
51270a701b1eSRobert Gordon 	}
51280a701b1eSRobert Gordon 
51290a701b1eSRobert Gordon 	return (reply_buf);
51300a701b1eSRobert Gordon 
51310a701b1eSRobert Gordon error_alloc:
51320a701b1eSRobert Gordon 	reply_buf = (rib_lrc_entry_t *)
51330a701b1eSRobert Gordon 	    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
51340a701b1eSRobert Gordon 	bzero(reply_buf, sizeof (rib_lrc_entry_t));
51350a701b1eSRobert Gordon 	reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
51360a701b1eSRobert Gordon 	reply_buf->lrc_len = len;
51370a701b1eSRobert Gordon 	reply_buf->registered = FALSE;
51380a701b1eSRobert Gordon 	reply_buf->avl_node = NULL;
51390a701b1eSRobert Gordon 
51400a701b1eSRobert Gordon 	return (reply_buf);
51410a701b1eSRobert Gordon }
51420a701b1eSRobert Gordon 
51430a701b1eSRobert Gordon /*
51440a701b1eSRobert Gordon  * Return a pre-registered back to the cache (without
51450a701b1eSRobert Gordon  * unregistering the buffer)..
51460a701b1eSRobert Gordon  */
51470a701b1eSRobert Gordon 
51480a701b1eSRobert Gordon static void
51490a701b1eSRobert Gordon rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
51500a701b1eSRobert Gordon {
51510a701b1eSRobert Gordon 	cache_avl_struct_t    cas, *rcas;
51520a701b1eSRobert Gordon 	avl_index_t where = NULL;
51530a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
51540a701b1eSRobert Gordon 
51550a701b1eSRobert Gordon 	if (!hca->avl_init)
51560a701b1eSRobert Gordon 		goto  error_free;
51570a701b1eSRobert Gordon 
51580a701b1eSRobert Gordon 	cas.len = reg_buf->lrc_len;
51590a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
51600a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)
51610a701b1eSRobert Gordon 	    avl_find(&hca->avl_tree, &cas, &where)) == NULL) {
51620a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
51630a701b1eSRobert Gordon 		goto error_free;
51640a701b1eSRobert Gordon 	} else {
51650a701b1eSRobert Gordon 		cas.len = reg_buf->lrc_len;
51660a701b1eSRobert Gordon 		mutex_enter(&rcas->node_lock);
51670a701b1eSRobert Gordon 		insque(reg_buf, &rcas->r);
51680a701b1eSRobert Gordon 		rcas->elements ++;
51690a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
51700a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
51717f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_enter(&hca->cache_allocation_lock);
51727f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		hca->cache_allocation += cas.len;
51737f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		mutex_exit(&hca->cache_allocation_lock);
51740a701b1eSRobert Gordon 	}
51750a701b1eSRobert Gordon 
51760a701b1eSRobert Gordon 	return;
51770a701b1eSRobert Gordon 
51780a701b1eSRobert Gordon error_free:
51790a701b1eSRobert Gordon 
51800a701b1eSRobert Gordon 	if (reg_buf->registered)
51810a701b1eSRobert Gordon 		(void) rib_deregistermem_via_hca(hca,
51820a701b1eSRobert Gordon 		    reg_buf->lrc_buf, reg_buf->lrc_mhandle);
51830a701b1eSRobert Gordon 	kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len);
51840a701b1eSRobert Gordon 	kmem_free(reg_buf, sizeof (rib_lrc_entry_t));
51850a701b1eSRobert Gordon }
51860a701b1eSRobert Gordon 
51870a701b1eSRobert Gordon static rdma_stat
51880a701b1eSRobert Gordon rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
51890a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle)
51900a701b1eSRobert Gordon {
51910a701b1eSRobert Gordon 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
51920a701b1eSRobert Gordon 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
51930a701b1eSRobert Gordon 	rdma_stat	status;
51940a701b1eSRobert Gordon 
51950a701b1eSRobert Gordon 
51960a701b1eSRobert Gordon 	/*
51970a701b1eSRobert Gordon 	 * Note: ALL buffer pools use the same memory type RDMARW.
51980a701b1eSRobert Gordon 	 */
51990a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
52000a701b1eSRobert Gordon 	if (status == RDMA_SUCCESS) {
52010a701b1eSRobert Gordon 		buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl;
52020a701b1eSRobert Gordon 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
52030a701b1eSRobert Gordon 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
52040a701b1eSRobert Gordon 	} else {
52050a701b1eSRobert Gordon 		buf_handle->mrc_linfo = NULL;
52060a701b1eSRobert Gordon 		buf_handle->mrc_lmr = 0;
52070a701b1eSRobert Gordon 		buf_handle->mrc_rmr = 0;
52080a701b1eSRobert Gordon 	}
52090a701b1eSRobert Gordon 	return (status);
52100a701b1eSRobert Gordon }
52110a701b1eSRobert Gordon 
52120a701b1eSRobert Gordon /* ARGSUSED */
52130a701b1eSRobert Gordon static rdma_stat
52140a701b1eSRobert Gordon rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
52150a701b1eSRobert Gordon     struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
52160a701b1eSRobert Gordon {
52170a701b1eSRobert Gordon 
52180a701b1eSRobert Gordon 	(void) rib_deregistermem_via_hca(hca, buf, buf_handle);
52190a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
52200a701b1eSRobert Gordon }
52210a701b1eSRobert Gordon 
52220a701b1eSRobert Gordon /* ARGSUSED */
52230a701b1eSRobert Gordon static rdma_stat
52240a701b1eSRobert Gordon rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
52250a701b1eSRobert Gordon {
52260a701b1eSRobert Gordon 
52270a701b1eSRobert Gordon 	(void) ibt_deregister_mr(hca->hca_hdl,
52280a701b1eSRobert Gordon 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
52290a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
52300a701b1eSRobert Gordon }
52310a701b1eSRobert Gordon 
52320a701b1eSRobert Gordon /*
5233e11c3f44Smeem  * Check if the IP interface named by `lifrp' is RDMA-capable.
52340a701b1eSRobert Gordon  */
5235e11c3f44Smeem static boolean_t
5236e11c3f44Smeem rpcib_rdma_capable_interface(struct lifreq *lifrp)
52370a701b1eSRobert Gordon {
5238e11c3f44Smeem 	char ifname[LIFNAMSIZ];
5239e11c3f44Smeem 	char *cp;
52400a701b1eSRobert Gordon 
5241e11c3f44Smeem 	if (lifrp->lifr_type == IFT_IB)
5242e11c3f44Smeem 		return (B_TRUE);
52430a701b1eSRobert Gordon 
52440a701b1eSRobert Gordon 	/*
5245e11c3f44Smeem 	 * Strip off the logical interface portion before getting
5246e11c3f44Smeem 	 * intimate with the name.
52470a701b1eSRobert Gordon 	 */
5248e11c3f44Smeem 	(void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
5249e11c3f44Smeem 	if ((cp = strchr(ifname, ':')) != NULL)
5250e11c3f44Smeem 		*cp = '\0';
52510a701b1eSRobert Gordon 
5252e11c3f44Smeem 	return (strcmp("lo0", ifname) == 0);
52530a701b1eSRobert Gordon }
52540a701b1eSRobert Gordon 
52550a701b1eSRobert Gordon static int
5256e11c3f44Smeem rpcib_do_ip_ioctl(int cmd, int len, void *arg)
52570a701b1eSRobert Gordon {
5258af4c679fSSean McEnroe 	vnode_t *kkvp, *vp;
52590a701b1eSRobert Gordon 	TIUSER  *tiptr;
52600a701b1eSRobert Gordon 	struct  strioctl iocb;
52610a701b1eSRobert Gordon 	k_sigset_t smask;
52620a701b1eSRobert Gordon 	int	err = 0;
52630a701b1eSRobert Gordon 
5264af4c679fSSean McEnroe 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kkvp) == 0) {
5265af4c679fSSean McEnroe 		if (t_kopen(NULL, kkvp->v_rdev, FREAD|FWRITE,
52660a701b1eSRobert Gordon 		    &tiptr, CRED()) == 0) {
52670a701b1eSRobert Gordon 			vp = tiptr->fp->f_vnode;
52680a701b1eSRobert Gordon 		} else {
5269af4c679fSSean McEnroe 			VN_RELE(kkvp);
52700a701b1eSRobert Gordon 			return (EPROTO);
52710a701b1eSRobert Gordon 		}
52720a701b1eSRobert Gordon 	} else {
52730a701b1eSRobert Gordon 		return (EPROTO);
52740a701b1eSRobert Gordon 	}
52750a701b1eSRobert Gordon 
52760a701b1eSRobert Gordon 	iocb.ic_cmd = cmd;
52770a701b1eSRobert Gordon 	iocb.ic_timout = 0;
52780a701b1eSRobert Gordon 	iocb.ic_len = len;
5279e11c3f44Smeem 	iocb.ic_dp = (caddr_t)arg;
52800a701b1eSRobert Gordon 	sigintr(&smask, 0);
52810a701b1eSRobert Gordon 	err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
52820a701b1eSRobert Gordon 	sigunintr(&smask);
52830a701b1eSRobert Gordon 	(void) t_kclose(tiptr, 0);
5284af4c679fSSean McEnroe 	VN_RELE(kkvp);
52850a701b1eSRobert Gordon 	return (err);
52860a701b1eSRobert Gordon }
52870a701b1eSRobert Gordon 
5288e11c3f44Smeem /*
5289e11c3f44Smeem  * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
5290e11c3f44Smeem  * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
5291e11c3f44Smeem  */
5292e11c3f44Smeem static int
5293e11c3f44Smeem rpcib_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep)
5294e11c3f44Smeem {
5295e11c3f44Smeem 	int err;
5296e11c3f44Smeem 	struct lifnum lifn;
5297e11c3f44Smeem 
5298e11c3f44Smeem 	bzero(&lifn, sizeof (struct lifnum));
5299e11c3f44Smeem 	lifn.lifn_family = AF_UNSPEC;
5300e11c3f44Smeem 
5301e11c3f44Smeem 	err = rpcib_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
5302e11c3f44Smeem 	if (err != 0)
5303e11c3f44Smeem 		return (err);
5304e11c3f44Smeem 
5305e11c3f44Smeem 	/*
5306e11c3f44Smeem 	 * Pad the interface count to account for additional interfaces that
5307e11c3f44Smeem 	 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
5308e11c3f44Smeem 	 */
5309e11c3f44Smeem 	lifn.lifn_count += 4;
5310e11c3f44Smeem 
5311e11c3f44Smeem 	bzero(lifcp, sizeof (struct lifconf));
5312e11c3f44Smeem 	lifcp->lifc_family = AF_UNSPEC;
5313e11c3f44Smeem 	lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
5314e11c3f44Smeem 	lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
5315e11c3f44Smeem 
5316e11c3f44Smeem 	err = rpcib_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
5317e11c3f44Smeem 	if (err != 0) {
5318e11c3f44Smeem 		kmem_free(lifcp->lifc_buf, *bufsizep);
5319e11c3f44Smeem 		return (err);
53200a701b1eSRobert Gordon 	}
5321e11c3f44Smeem 	return (0);
53220a701b1eSRobert Gordon }
53230a701b1eSRobert Gordon 
53240a701b1eSRobert Gordon static boolean_t
5325e11c3f44Smeem rpcib_get_ib_addresses(rpcib_ipaddrs_t *addrs4, rpcib_ipaddrs_t *addrs6)
53260a701b1eSRobert Gordon {
5327e11c3f44Smeem 	uint_t i, nifs;
5328e11c3f44Smeem 	uint_t bufsize;
5329e11c3f44Smeem 	struct lifconf lifc;
5330e11c3f44Smeem 	struct lifreq *lifrp;
5331e11c3f44Smeem 	struct sockaddr_in *sinp;
5332e11c3f44Smeem 	struct sockaddr_in6 *sin6p;
53330a701b1eSRobert Gordon 
5334e11c3f44Smeem 	bzero(addrs4, sizeof (rpcib_ipaddrs_t));
5335e11c3f44Smeem 	bzero(addrs6, sizeof (rpcib_ipaddrs_t));
53360a701b1eSRobert Gordon 
5337e11c3f44Smeem 	if (rpcib_do_lifconf(&lifc, &bufsize) != 0)
5338e11c3f44Smeem 		return (B_FALSE);
5339e11c3f44Smeem 
5340e11c3f44Smeem 	if ((nifs = lifc.lifc_len / sizeof (struct lifreq)) == 0) {
5341e11c3f44Smeem 		kmem_free(lifc.lifc_buf, bufsize);
5342e11c3f44Smeem 		return (B_FALSE);
53430a701b1eSRobert Gordon 	}
53440a701b1eSRobert Gordon 
5345e11c3f44Smeem 	/*
5346e11c3f44Smeem 	 * Worst case is that all of the addresses are IB-capable and have
5347e11c3f44Smeem 	 * the same address family, so size our buffers accordingly.
5348e11c3f44Smeem 	 */
5349e11c3f44Smeem 	addrs4->ri_size = nifs * sizeof (struct sockaddr_in);
5350e11c3f44Smeem 	addrs4->ri_list = kmem_zalloc(addrs4->ri_size, KM_SLEEP);
5351e11c3f44Smeem 	addrs6->ri_size = nifs * sizeof (struct sockaddr_in6);
5352e11c3f44Smeem 	addrs6->ri_list = kmem_zalloc(addrs6->ri_size, KM_SLEEP);
53530a701b1eSRobert Gordon 
5354e11c3f44Smeem 	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
5355e11c3f44Smeem 		if (!rpcib_rdma_capable_interface(lifrp))
5356e11c3f44Smeem 			continue;
5357e11c3f44Smeem 
5358e11c3f44Smeem 		if (lifrp->lifr_addr.ss_family == AF_INET) {
5359e11c3f44Smeem 			sinp = addrs4->ri_list;
5360e11c3f44Smeem 			bcopy(&lifrp->lifr_addr, &sinp[addrs4->ri_count++],
5361e11c3f44Smeem 			    sizeof (struct sockaddr_in));
5362e11c3f44Smeem 		} else if (lifrp->lifr_addr.ss_family == AF_INET6) {
5363e11c3f44Smeem 			sin6p = addrs6->ri_list;
5364e11c3f44Smeem 			bcopy(&lifrp->lifr_addr, &sin6p[addrs6->ri_count++],
5365e11c3f44Smeem 			    sizeof (struct sockaddr_in6));
5366e11c3f44Smeem 		}
53670a701b1eSRobert Gordon 	}
53680a701b1eSRobert Gordon 
5369e11c3f44Smeem 	kmem_free(lifc.lifc_buf, bufsize);
5370e11c3f44Smeem 	return (B_TRUE);
53710a701b1eSRobert Gordon }
53720a701b1eSRobert Gordon 
53730a701b1eSRobert Gordon /* ARGSUSED */
53747f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States static int
53757f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States rpcib_cache_kstat_update(kstat_t *ksp, int rw)
53767f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States {
53777f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rib_hca_t *hca;
53780a701b1eSRobert Gordon 
53790a701b1eSRobert Gordon 	if (KSTAT_WRITE == rw) {
53800a701b1eSRobert Gordon 		return (EACCES);
53810a701b1eSRobert Gordon 	}
53827f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 
53830a701b1eSRobert Gordon 	rpcib_kstat.cache_limit.value.ui64 =
53840a701b1eSRobert Gordon 	    (uint64_t)cache_limit;
53857f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
53867f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
53877f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rpcib_kstat.cache_allocation.value.ui64 +=
53887f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (uint64_t)hca->cache_allocation;
53897f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rpcib_kstat.cache_hits.value.ui64 +=
53907f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (uint64_t)hca->cache_hits;
53917f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rpcib_kstat.cache_misses.value.ui64 +=
53927f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (uint64_t)hca->cache_misses;
53937f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		rpcib_kstat.cache_misses_above_the_limit.value.ui64 +=
53947f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 		    (uint64_t)hca->cache_misses_above_the_limit;
53957f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	}
53967f379ad1SFaramarz Jalalian - Sun Microsystems - Irvine United States 	rw_exit(&rib_stat->hcas_list_lock);
53970a701b1eSRobert Gordon 	return (0);
53980a701b1eSRobert Gordon }
5399