xref: /titanic_53/usr/src/uts/common/rpc/rpcib.c (revision 0a701b1ec2b55bddc48b62124df936152ff820f7)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0a701b1eSRobert Gordon  * Common Development and Distribution License (the "License").
6*0a701b1eSRobert Gordon  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*0a701b1eSRobert Gordon  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26*0a701b1eSRobert Gordon 
27*0a701b1eSRobert Gordon /*
28*0a701b1eSRobert Gordon  * Copyright (c) 2007, The Ohio State University. All rights reserved.
29*0a701b1eSRobert Gordon  *
30*0a701b1eSRobert Gordon  * Portions of this source code is developed by the team members of
31*0a701b1eSRobert Gordon  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
32*0a701b1eSRobert Gordon  * headed by Professor Dhabaleswar K. (DK) Panda.
33*0a701b1eSRobert Gordon  *
34*0a701b1eSRobert Gordon  * Acknowledgements to contributions from developors:
35*0a701b1eSRobert Gordon  *   Ranjit Noronha: noronha@cse.ohio-state.edu
36*0a701b1eSRobert Gordon  *   Lei Chai      : chail@cse.ohio-state.edu
37*0a701b1eSRobert Gordon  *   Weikuan Yu    : yuw@cse.ohio-state.edu
38*0a701b1eSRobert Gordon  *
39*0a701b1eSRobert Gordon  */
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate /*
427c478bd9Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
437c478bd9Sstevel@tonic-gate  * interaction with IBTF.
447c478bd9Sstevel@tonic-gate  */
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate #include <sys/param.h>
477c478bd9Sstevel@tonic-gate #include <sys/types.h>
487c478bd9Sstevel@tonic-gate #include <sys/user.h>
497c478bd9Sstevel@tonic-gate #include <sys/systm.h>
507c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
517c478bd9Sstevel@tonic-gate #include <sys/proc.h>
527c478bd9Sstevel@tonic-gate #include <sys/socket.h>
537c478bd9Sstevel@tonic-gate #include <sys/file.h>
547c478bd9Sstevel@tonic-gate #include <sys/stream.h>
557c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
567c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
577c478bd9Sstevel@tonic-gate #include <sys/errno.h>
587c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
597c478bd9Sstevel@tonic-gate #include <sys/debug.h>
607c478bd9Sstevel@tonic-gate #include <sys/systm.h>
617c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
627c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
637c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
647c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
657c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
667c478bd9Sstevel@tonic-gate #include <sys/time.h>
677c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
687c478bd9Sstevel@tonic-gate #include <sys/callb.h>
697c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
707c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
71*0a701b1eSRobert Gordon #include <sys/sunldi.h>
72*0a701b1eSRobert Gordon #include <sys/sdt.h>
73*0a701b1eSRobert Gordon #include <sys/dlpi.h>
747c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
757c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
767c478bd9Sstevel@tonic-gate #include <rpc/ib.h>
777c478bd9Sstevel@tonic-gate 
787c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
817c478bd9Sstevel@tonic-gate #include <sys/kstr.h>
827c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
837c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
847c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
857c478bd9Sstevel@tonic-gate #include <net/if.h>
867c478bd9Sstevel@tonic-gate #include <sys/cred.h>
87*0a701b1eSRobert Gordon #include <rpc/rpc_rdma.h>
887c478bd9Sstevel@tonic-gate 
89*0a701b1eSRobert Gordon #include <nfs/nfs.h>
90*0a701b1eSRobert Gordon #include <sys/kstat.h>
91*0a701b1eSRobert Gordon #include <sys/atomic.h>
92*0a701b1eSRobert Gordon 
93*0a701b1eSRobert Gordon #define	NFS_RDMA_PORT	2050
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate extern char *inet_ntop(int, const void *, char *, int);
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate /*
997c478bd9Sstevel@tonic-gate  * Prototype declarations for driver ops
1007c478bd9Sstevel@tonic-gate  */
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
1037c478bd9Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
1047c478bd9Sstevel@tonic-gate 				void *, void **);
1057c478bd9Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
106*0a701b1eSRobert Gordon static int	rpcib_is_ib_interface(char *);
107*0a701b1eSRobert Gordon static int	rpcib_dl_info(ldi_handle_t, dl_info_ack_t *);
108*0a701b1eSRobert Gordon static int	rpcib_do_ip_ioctl(int, int, caddr_t);
109*0a701b1eSRobert Gordon static boolean_t	rpcib_get_ib_addresses(struct sockaddr_in *,
110*0a701b1eSRobert Gordon 			struct sockaddr_in6 *, uint_t *, uint_t *);
111*0a701b1eSRobert Gordon static	uint_t rpcib_get_number_interfaces(void);
112*0a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *, int);
113*0a701b1eSRobert Gordon static void rib_force_cleanup(void *);
1147c478bd9Sstevel@tonic-gate 
115*0a701b1eSRobert Gordon struct {
116*0a701b1eSRobert Gordon 	kstat_named_t cache_limit;
117*0a701b1eSRobert Gordon 	kstat_named_t cache_allocation;
118*0a701b1eSRobert Gordon 	kstat_named_t cache_hits;
119*0a701b1eSRobert Gordon 	kstat_named_t cache_misses;
120*0a701b1eSRobert Gordon 	kstat_named_t cache_misses_above_the_limit;
121*0a701b1eSRobert Gordon } rpcib_kstat = {
122*0a701b1eSRobert Gordon 	{"cache_limit",			KSTAT_DATA_UINT64 },
123*0a701b1eSRobert Gordon 	{"cache_allocation",		KSTAT_DATA_UINT64 },
124*0a701b1eSRobert Gordon 	{"cache_hits",			KSTAT_DATA_UINT64 },
125*0a701b1eSRobert Gordon 	{"cache_misses",		KSTAT_DATA_UINT64 },
126*0a701b1eSRobert Gordon 	{"cache_misses_above_the_limit", KSTAT_DATA_UINT64 },
127*0a701b1eSRobert Gordon };
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate /* rpcib cb_ops */
1307c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
1317c478bd9Sstevel@tonic-gate 	nulldev,		/* open */
1327c478bd9Sstevel@tonic-gate 	nulldev,		/* close */
1337c478bd9Sstevel@tonic-gate 	nodev,			/* strategy */
1347c478bd9Sstevel@tonic-gate 	nodev,			/* print */
1357c478bd9Sstevel@tonic-gate 	nodev,			/* dump */
1367c478bd9Sstevel@tonic-gate 	nodev,			/* read */
1377c478bd9Sstevel@tonic-gate 	nodev,			/* write */
1387c478bd9Sstevel@tonic-gate 	nodev,			/* ioctl */
1397c478bd9Sstevel@tonic-gate 	nodev,			/* devmap */
1407c478bd9Sstevel@tonic-gate 	nodev,			/* mmap */
1417c478bd9Sstevel@tonic-gate 	nodev,			/* segmap */
1427c478bd9Sstevel@tonic-gate 	nochpoll,		/* poll */
1437c478bd9Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
1447c478bd9Sstevel@tonic-gate 	NULL,			/* stream */
1457c478bd9Sstevel@tonic-gate 	D_MP,			/* cb_flag */
1467c478bd9Sstevel@tonic-gate 	CB_REV,			/* rev */
1477c478bd9Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
1487c478bd9Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
1497c478bd9Sstevel@tonic-gate };
1507c478bd9Sstevel@tonic-gate 
151*0a701b1eSRobert Gordon 
152*0a701b1eSRobert Gordon 
153*0a701b1eSRobert Gordon 
1547c478bd9Sstevel@tonic-gate /*
1557c478bd9Sstevel@tonic-gate  * Device options
1567c478bd9Sstevel@tonic-gate  */
1577c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
1587c478bd9Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
1597c478bd9Sstevel@tonic-gate 	0,			/* refcnt  */
1607c478bd9Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
1617c478bd9Sstevel@tonic-gate 	nulldev,		/* identify */
1627c478bd9Sstevel@tonic-gate 	nulldev,		/* probe */
1637c478bd9Sstevel@tonic-gate 	rpcib_attach,		/* attach */
1647c478bd9Sstevel@tonic-gate 	rpcib_detach,		/* detach */
1657c478bd9Sstevel@tonic-gate 	nodev,			/* reset */
1667c478bd9Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
1677c478bd9Sstevel@tonic-gate 	NULL,			/* bus operations */
1687c478bd9Sstevel@tonic-gate 	NULL			/* power */
1697c478bd9Sstevel@tonic-gate };
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate /*
1727c478bd9Sstevel@tonic-gate  * Module linkage information.
1737c478bd9Sstevel@tonic-gate  */
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = {
1767c478bd9Sstevel@tonic-gate 	&mod_driverops,			    /* Driver module */
177*0a701b1eSRobert Gordon 	"RPCIB plugin driver, ver 1.30", /* Driver name and version */
1787c478bd9Sstevel@tonic-gate 	&rpcib_ops,		    /* Driver ops */
1797c478bd9Sstevel@tonic-gate };
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
1827c478bd9Sstevel@tonic-gate 	MODREV_1,
1837c478bd9Sstevel@tonic-gate 	(void *)&rib_modldrv,
1847c478bd9Sstevel@tonic-gate 	NULL
1857c478bd9Sstevel@tonic-gate };
1867c478bd9Sstevel@tonic-gate 
187*0a701b1eSRobert Gordon typedef struct rib_lrc_entry {
188*0a701b1eSRobert Gordon 	struct rib_lrc_entry *forw;
189*0a701b1eSRobert Gordon 	struct rib_lrc_entry *back;
190*0a701b1eSRobert Gordon 	char *lrc_buf;
191*0a701b1eSRobert Gordon 
192*0a701b1eSRobert Gordon 	uint32_t lrc_len;
193*0a701b1eSRobert Gordon 	void  *avl_node;
194*0a701b1eSRobert Gordon 	bool_t registered;
195*0a701b1eSRobert Gordon 
196*0a701b1eSRobert Gordon 	struct mrc lrc_mhandle;
197*0a701b1eSRobert Gordon 	bool_t lrc_on_freed_list;
198*0a701b1eSRobert Gordon } rib_lrc_entry_t;
199*0a701b1eSRobert Gordon 
200*0a701b1eSRobert Gordon typedef	struct cache_struct	{
201*0a701b1eSRobert Gordon 	rib_lrc_entry_t		r;
202*0a701b1eSRobert Gordon 	uint32_t		len;
203*0a701b1eSRobert Gordon 	uint32_t		elements;
204*0a701b1eSRobert Gordon 	kmutex_t		node_lock;
205*0a701b1eSRobert Gordon 	avl_node_t		avl_link;
206*0a701b1eSRobert Gordon } cache_avl_struct_t;
207*0a701b1eSRobert Gordon 
208*0a701b1eSRobert Gordon 
209*0a701b1eSRobert Gordon static uint64_t 	rib_total_buffers = 0;
210*0a701b1eSRobert Gordon uint64_t	cache_limit = 100 * 1024 * 1024;
211*0a701b1eSRobert Gordon static volatile uint64_t	cache_allocation = 0;
212*0a701b1eSRobert Gordon static uint64_t	cache_watermark = 80 * 1024 * 1024;
213*0a701b1eSRobert Gordon static uint64_t	cache_hits = 0;
214*0a701b1eSRobert Gordon static uint64_t	cache_misses = 0;
215*0a701b1eSRobert Gordon static uint64_t	cache_cold_misses = 0;
216*0a701b1eSRobert Gordon static uint64_t	cache_hot_misses = 0;
217*0a701b1eSRobert Gordon static uint64_t	cache_misses_above_the_limit = 0;
218*0a701b1eSRobert Gordon static bool_t	stats_enabled = FALSE;
219*0a701b1eSRobert Gordon 
220*0a701b1eSRobert Gordon static uint64_t max_unsignaled_rws = 5;
221*0a701b1eSRobert Gordon 
2227c478bd9Sstevel@tonic-gate /*
2237c478bd9Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
2247c478bd9Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
2257c478bd9Sstevel@tonic-gate  *	in all callbacks.
2267c478bd9Sstevel@tonic-gate  */
2277c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
2287c478bd9Sstevel@tonic-gate 
229*0a701b1eSRobert Gordon #define	RNR_RETRIES	IBT_RNR_RETRY_1
2307c478bd9Sstevel@tonic-gate #define	MAX_PORTS	2
2317c478bd9Sstevel@tonic-gate 
232*0a701b1eSRobert Gordon int preposted_rbufs = RDMA_BUFS_GRANT;
2337c478bd9Sstevel@tonic-gate int send_threshold = 1;
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate /*
2367c478bd9Sstevel@tonic-gate  * State of the plugin.
2377c478bd9Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
2387c478bd9Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
2397c478bd9Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
2407c478bd9Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
2417c478bd9Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
2427c478bd9Sstevel@tonic-gate  * state for one and in no_accept state for the other.
2437c478bd9Sstevel@tonic-gate  */
2447c478bd9Sstevel@tonic-gate int		plugin_state;
2457c478bd9Sstevel@tonic-gate kmutex_t	plugin_state_lock;
2467c478bd9Sstevel@tonic-gate 
247*0a701b1eSRobert Gordon ldi_ident_t rpcib_li;
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate /*
2507c478bd9Sstevel@tonic-gate  * RPCIB RDMATF operations
2517c478bd9Sstevel@tonic-gate  */
252*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH)
253*0a701b1eSRobert Gordon static void rib_posted_rbufs(uint32_t x) { return; }
254*0a701b1eSRobert Gordon #endif
2557c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
2567c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
2577c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
2587c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
259*0a701b1eSRobert Gordon static rdma_stat rib_registermem(CONN *conn, caddr_t  adsp, caddr_t buf,
260*0a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle);
2617c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
2627c478bd9Sstevel@tonic-gate 	struct mrc buf_handle);
263*0a701b1eSRobert Gordon static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
264*0a701b1eSRobert Gordon 		caddr_t buf, uint_t buflen, struct mrc *buf_handle);
265*0a701b1eSRobert Gordon static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
266*0a701b1eSRobert Gordon 		struct mrc buf_handle);
267*0a701b1eSRobert Gordon static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf,
268*0a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle,
269*0a701b1eSRobert Gordon 	void *lrc);
2707c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
271*0a701b1eSRobert Gordon 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
2727c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
2737c478bd9Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
2787c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
2837c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2847c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
285*0a701b1eSRobert Gordon static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid);
2867c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
2877c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
2887c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
2897c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
2907c478bd9Sstevel@tonic-gate static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
2917c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
2927c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
2937c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
294*0a701b1eSRobert Gordon 
295*0a701b1eSRobert Gordon static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len);
296*0a701b1eSRobert Gordon static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
297*0a701b1eSRobert Gordon static void rib_destroy_cache(rib_hca_t *hca);
298*0a701b1eSRobert Gordon static	void	rib_server_side_cache_reclaim(void *argp);
299*0a701b1eSRobert Gordon static int avl_compare(const void *t1, const void *t2);
300*0a701b1eSRobert Gordon 
3017c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
302*0a701b1eSRobert Gordon static void rib_close_channels(rib_conn_list_t *);
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate /*
3057c478bd9Sstevel@tonic-gate  * RPCIB addressing operations
3067c478bd9Sstevel@tonic-gate  */
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate /*
3097c478bd9Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
3107c478bd9Sstevel@tonic-gate  */
3117c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = {
3127c478bd9Sstevel@tonic-gate 	rib_reachable,
3137c478bd9Sstevel@tonic-gate 	rib_conn_get,
3147c478bd9Sstevel@tonic-gate 	rib_conn_release,
3157c478bd9Sstevel@tonic-gate 	rib_listen,
3167c478bd9Sstevel@tonic-gate 	rib_listen_stop,
3177c478bd9Sstevel@tonic-gate 	rib_registermem,
3187c478bd9Sstevel@tonic-gate 	rib_deregistermem,
3197c478bd9Sstevel@tonic-gate 	rib_registermemsync,
3207c478bd9Sstevel@tonic-gate 	rib_deregistermemsync,
3217c478bd9Sstevel@tonic-gate 	rib_syncmem,
3227c478bd9Sstevel@tonic-gate 	rib_reg_buf_alloc,
3237c478bd9Sstevel@tonic-gate 	rib_reg_buf_free,
3247c478bd9Sstevel@tonic-gate 	rib_send,
3257c478bd9Sstevel@tonic-gate 	rib_send_resp,
3267c478bd9Sstevel@tonic-gate 	rib_post_resp,
327*0a701b1eSRobert Gordon 	rib_post_resp_remove,
3287c478bd9Sstevel@tonic-gate 	rib_post_recv,
3297c478bd9Sstevel@tonic-gate 	rib_recv,
3307c478bd9Sstevel@tonic-gate 	rib_read,
3317c478bd9Sstevel@tonic-gate 	rib_write,
332*0a701b1eSRobert Gordon 	rib_getinfo,
3337c478bd9Sstevel@tonic-gate };
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate /*
3367c478bd9Sstevel@tonic-gate  * RDMATF RPCIB plugin details
3377c478bd9Sstevel@tonic-gate  */
3387c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = {
3397c478bd9Sstevel@tonic-gate 	"ibtf",		/* api name */
3407c478bd9Sstevel@tonic-gate 	RDMATF_VERS_1,
3417c478bd9Sstevel@tonic-gate 	0,
3427c478bd9Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
3437c478bd9Sstevel@tonic-gate };
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *);
3467c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
3477c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
3487c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
3497c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
3507c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
3517c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
352*0a701b1eSRobert Gordon static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t,
353*0a701b1eSRobert Gordon 	ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *);
354*0a701b1eSRobert Gordon static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
355*0a701b1eSRobert Gordon 	ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
356*0a701b1eSRobert Gordon static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *,
357*0a701b1eSRobert Gordon 	ibt_ip_addr_t *, ibt_ip_addr_t *);
3587c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
3597c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3607c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
3617c478bd9Sstevel@tonic-gate 	rib_qp_t **);
3627c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
3637c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
3647c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
3657c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
3667c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
3677c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
3687c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate static void rib_async_handler(void *,
3717c478bd9Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
3727c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
3737c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
3747c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
3757c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
3767c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
3777c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
3787c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *);
379*0a701b1eSRobert Gordon static rdma_stat rib_chk_srv_ibaddr(struct netbuf *, int,
380*0a701b1eSRobert Gordon 	ibt_path_info_t *, ibt_ip_addr_t *, ibt_ip_addr_t *);
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate /*
3837c478bd9Sstevel@tonic-gate  * Registration with IBTF as a consumer
3847c478bd9Sstevel@tonic-gate  */
3857c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
38643ed929aSsrust 	IBTI_V2,
3877c478bd9Sstevel@tonic-gate 	IBT_GENERIC,
3887c478bd9Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
3897c478bd9Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
3907c478bd9Sstevel@tonic-gate 	"nfs/ib"
3917c478bd9Sstevel@tonic-gate };
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate /*
3947c478bd9Sstevel@tonic-gate  * Global strucuture
3957c478bd9Sstevel@tonic-gate  */
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate typedef struct rpcib_s {
3987c478bd9Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
3997c478bd9Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
4007c478bd9Sstevel@tonic-gate } rpcib_t;
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate rpcib_t rpcib;
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate /*
4057c478bd9Sstevel@tonic-gate  * /etc/system controlled variable to control
4067c478bd9Sstevel@tonic-gate  * debugging in rpcib kernel module.
4077c478bd9Sstevel@tonic-gate  * Set it to values greater that 1 to control
4087c478bd9Sstevel@tonic-gate  * the amount of debugging messages required.
4097c478bd9Sstevel@tonic-gate  */
4107c478bd9Sstevel@tonic-gate int rib_debug = 0;
4117c478bd9Sstevel@tonic-gate 
412*0a701b1eSRobert Gordon 
4137c478bd9Sstevel@tonic-gate int
4147c478bd9Sstevel@tonic-gate _init(void)
4157c478bd9Sstevel@tonic-gate {
4167c478bd9Sstevel@tonic-gate 	int		error;
417*0a701b1eSRobert Gordon 	int ret;
4187c478bd9Sstevel@tonic-gate 
4197c478bd9Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
4207c478bd9Sstevel@tonic-gate 	if (error != 0) {
4217c478bd9Sstevel@tonic-gate 		/*
4227c478bd9Sstevel@tonic-gate 		 * Could not load module
4237c478bd9Sstevel@tonic-gate 		 */
4247c478bd9Sstevel@tonic-gate 		return (error);
4257c478bd9Sstevel@tonic-gate 	}
426*0a701b1eSRobert Gordon 	ret = ldi_ident_from_mod(&rib_modlinkage, &rpcib_li);
427*0a701b1eSRobert Gordon 	if (ret != 0)
428*0a701b1eSRobert Gordon 		rpcib_li = NULL;
4297c478bd9Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	return (0);
4327c478bd9Sstevel@tonic-gate }
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate int
4357c478bd9Sstevel@tonic-gate _fini()
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	int status;
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) {
4407c478bd9Sstevel@tonic-gate 		return (EBUSY);
4417c478bd9Sstevel@tonic-gate 	}
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 	/*
4447c478bd9Sstevel@tonic-gate 	 * Remove module
4457c478bd9Sstevel@tonic-gate 	 */
4467c478bd9Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
4477c478bd9Sstevel@tonic-gate 		(void) rdma_register_mod(&rib_mod);
4487c478bd9Sstevel@tonic-gate 		return (status);
4497c478bd9Sstevel@tonic-gate 	}
4507c478bd9Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
451*0a701b1eSRobert Gordon 	ldi_ident_release(rpcib_li);
4527c478bd9Sstevel@tonic-gate 	return (0);
4537c478bd9Sstevel@tonic-gate }
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate int
4567c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
4577c478bd9Sstevel@tonic-gate {
4587c478bd9Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
4597c478bd9Sstevel@tonic-gate }
4607c478bd9Sstevel@tonic-gate 
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate /*
4637c478bd9Sstevel@tonic-gate  * rpcib_getinfo()
4647c478bd9Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
4657c478bd9Sstevel@tonic-gate  * instance number.
4667c478bd9Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
4677c478bd9Sstevel@tonic-gate  */
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4707c478bd9Sstevel@tonic-gate static int
4717c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
4727c478bd9Sstevel@tonic-gate {
4737c478bd9Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 	switch (cmd) {
4767c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
4777c478bd9Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
4787c478bd9Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
4797c478bd9Sstevel@tonic-gate 		else {
4807c478bd9Sstevel@tonic-gate 			*result = NULL;
4817c478bd9Sstevel@tonic-gate 			ret = DDI_FAILURE;
4827c478bd9Sstevel@tonic-gate 		}
4837c478bd9Sstevel@tonic-gate 		break;
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
4867c478bd9Sstevel@tonic-gate 		*result = NULL;
4877c478bd9Sstevel@tonic-gate 		break;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	default:
4907c478bd9Sstevel@tonic-gate 		ret = DDI_FAILURE;
4917c478bd9Sstevel@tonic-gate 	}
4927c478bd9Sstevel@tonic-gate 	return (ret);
4937c478bd9Sstevel@tonic-gate }
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate static int
4967c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4977c478bd9Sstevel@tonic-gate {
4987c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
4997c478bd9Sstevel@tonic-gate 	rdma_stat	r_status;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	switch (cmd) {
5027c478bd9Sstevel@tonic-gate 	case DDI_ATTACH:
5037c478bd9Sstevel@tonic-gate 		break;
5047c478bd9Sstevel@tonic-gate 	case DDI_RESUME:
5057c478bd9Sstevel@tonic-gate 		return (DDI_SUCCESS);
5067c478bd9Sstevel@tonic-gate 	default:
5077c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5087c478bd9Sstevel@tonic-gate 	}
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
5137c478bd9Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
5147c478bd9Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
5157c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5167c478bd9Sstevel@tonic-gate 	}
5177c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
5187c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
5197c478bd9Sstevel@tonic-gate 	/*
5207c478bd9Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
5217c478bd9Sstevel@tonic-gate 	 */
5227c478bd9Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
5237c478bd9Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
5247c478bd9Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
5257c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5267c478bd9Sstevel@tonic-gate 	}
5277c478bd9Sstevel@tonic-gate 
5287c478bd9Sstevel@tonic-gate 	if (rib_stat == NULL) {
5297c478bd9Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
5307c478bd9Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
5317c478bd9Sstevel@tonic-gate 	}
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate 	rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids);
5347c478bd9Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
5357c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5367c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5377c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5387c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5397c478bd9Sstevel@tonic-gate 	}
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
5427c478bd9Sstevel@tonic-gate 	    (void *)rib_stat, &rib_stat->ibt_clnt_hdl);
543*0a701b1eSRobert Gordon 
5447c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
5457c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5467c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5477c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5487c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5497c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5507c478bd9Sstevel@tonic-gate 	}
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
5537c478bd9Sstevel@tonic-gate 	if (open_hcas(rib_stat) != RDMA_SUCCESS) {
5547c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5557c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
5567c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
5577c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5587c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5597c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5607c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5617c478bd9Sstevel@tonic-gate 	}
5627c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 	/*
5657c478bd9Sstevel@tonic-gate 	 * Register with rdmatf
5667c478bd9Sstevel@tonic-gate 	 */
5677c478bd9Sstevel@tonic-gate 	rib_mod.rdma_count = rib_stat->hca_count;
5687c478bd9Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
5697c478bd9Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
5707c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
5717c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5727c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
5737c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
5747c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
5757c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
5767c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5777c478bd9Sstevel@tonic-gate 	}
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
5817c478bd9Sstevel@tonic-gate }
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5847c478bd9Sstevel@tonic-gate static int
5857c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5867c478bd9Sstevel@tonic-gate {
5877c478bd9Sstevel@tonic-gate 	switch (cmd) {
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	case DDI_DETACH:
5907c478bd9Sstevel@tonic-gate 		break;
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	case DDI_SUSPEND:
5937c478bd9Sstevel@tonic-gate 	default:
5947c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5957c478bd9Sstevel@tonic-gate 	}
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 	/*
5987c478bd9Sstevel@tonic-gate 	 * Detach the hca and free resources
5997c478bd9Sstevel@tonic-gate 	 */
6007c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
6017c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
6027c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
6037c478bd9Sstevel@tonic-gate 	rib_detach_hca(rib_stat->hca);
6047c478bd9Sstevel@tonic-gate 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
6057c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
6067c478bd9Sstevel@tonic-gate 
6077c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
6087c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
6097c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
6107c478bd9Sstevel@tonic-gate 
6117c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
6127c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
6177c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
6187c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
6197c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
6207c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
6217c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
6227c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
6237c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
6247c478bd9Sstevel@tonic-gate 
625*0a701b1eSRobert Gordon 
6267c478bd9Sstevel@tonic-gate /*
6277c478bd9Sstevel@tonic-gate  * One CQ pair per HCA
6287c478bd9Sstevel@tonic-gate  */
6297c478bd9Sstevel@tonic-gate static rdma_stat
6307c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
6317c478bd9Sstevel@tonic-gate 	rib_cq_t **cqp, rpcib_state_t *ribstat)
6327c478bd9Sstevel@tonic-gate {
6337c478bd9Sstevel@tonic-gate 	rib_cq_t	*cq;
6347c478bd9Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
6357c478bd9Sstevel@tonic-gate 	uint32_t	real_size;
6367c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
6377c478bd9Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
6387c478bd9Sstevel@tonic-gate 
6397c478bd9Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
6407c478bd9Sstevel@tonic-gate 	cq->rib_hca = hca;
6417c478bd9Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
6427c478bd9Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
6437c478bd9Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
6447c478bd9Sstevel@tonic-gate 	    &real_size);
6457c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
6467c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
6477c478bd9Sstevel@tonic-gate 		    " status=%d", status);
6487c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
6497c478bd9Sstevel@tonic-gate 		goto fail;
6507c478bd9Sstevel@tonic-gate 	}
6517c478bd9Sstevel@tonic-gate 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat);
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 	/*
6547c478bd9Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
6557c478bd9Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
6567c478bd9Sstevel@tonic-gate 	 * after each callback to get another one).
6577c478bd9Sstevel@tonic-gate 	 */
6587c478bd9Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
6597c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
6607c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
6617c478bd9Sstevel@tonic-gate 		    "enable_cq_notify failed, status %d", status);
6627c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
6637c478bd9Sstevel@tonic-gate 		goto fail;
6647c478bd9Sstevel@tonic-gate 	}
6657c478bd9Sstevel@tonic-gate 	*cqp = cq;
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate 	return (error);
6687c478bd9Sstevel@tonic-gate fail:
6697c478bd9Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
6707c478bd9Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
6717c478bd9Sstevel@tonic-gate 	if (cq)
6727c478bd9Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
6737c478bd9Sstevel@tonic-gate 	return (error);
6747c478bd9Sstevel@tonic-gate }
6757c478bd9Sstevel@tonic-gate 
6767c478bd9Sstevel@tonic-gate static rdma_stat
6777c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat)
6787c478bd9Sstevel@tonic-gate {
6797c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
6807c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
6817c478bd9Sstevel@tonic-gate 	rdma_stat		status;
6827c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
6837c478bd9Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
6847c478bd9Sstevel@tonic-gate 	uint_t			size, cq_size;
6857c478bd9Sstevel@tonic-gate 	int			i;
686*0a701b1eSRobert Gordon 	kstat_t *ksp;
687*0a701b1eSRobert Gordon 	cache_avl_struct_t example_avl_node;
688*0a701b1eSRobert Gordon 	char rssc_name[32];
6897c478bd9Sstevel@tonic-gate 
6907c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
691*0a701b1eSRobert Gordon 
6927c478bd9Sstevel@tonic-gate 	if (ribstat->hcas == NULL)
6937c478bd9Sstevel@tonic-gate 		ribstat->hcas = kmem_zalloc(ribstat->hca_count *
6947c478bd9Sstevel@tonic-gate 		    sizeof (rib_hca_t), KM_SLEEP);
6957c478bd9Sstevel@tonic-gate 
6967c478bd9Sstevel@tonic-gate 	/*
6977c478bd9Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
6987c478bd9Sstevel@tonic-gate 	 */
6997c478bd9Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
7007c478bd9Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
7017c478bd9Sstevel@tonic-gate 		    ribstat->hca_guids[i],
7027c478bd9Sstevel@tonic-gate 		    &ribstat->hcas[i].hca_hdl);
7037c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7047c478bd9Sstevel@tonic-gate 			continue;
7057c478bd9Sstevel@tonic-gate 		}
7067c478bd9Sstevel@tonic-gate 		ribstat->hcas[i].hca_guid = ribstat->hca_guids[i];
7077c478bd9Sstevel@tonic-gate 		hca = &(ribstat->hcas[i]);
7087c478bd9Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
7097c478bd9Sstevel@tonic-gate 		hca->state = HCA_INITED;
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 		/*
7127c478bd9Sstevel@tonic-gate 		 * query HCA info
7137c478bd9Sstevel@tonic-gate 		 */
7147c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
7157c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7167c478bd9Sstevel@tonic-gate 			goto fail1;
7177c478bd9Sstevel@tonic-gate 		}
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate 		/*
7207c478bd9Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
7217c478bd9Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
7227c478bd9Sstevel@tonic-gate 		 * only when it's in the same PD as that of
7237c478bd9Sstevel@tonic-gate 		 * the memory region.
7247c478bd9Sstevel@tonic-gate 		 */
7257c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
7267c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7277c478bd9Sstevel@tonic-gate 			goto fail1;
7287c478bd9Sstevel@tonic-gate 		}
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 		/*
7317c478bd9Sstevel@tonic-gate 		 * query HCA ports
7327c478bd9Sstevel@tonic-gate 		 */
7337c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
7347c478bd9Sstevel@tonic-gate 		    0, &pinfop, &hca->hca_nports, &size);
7357c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
7367c478bd9Sstevel@tonic-gate 			goto fail2;
7377c478bd9Sstevel@tonic-gate 		}
7387c478bd9Sstevel@tonic-gate 		hca->hca_ports = pinfop;
7397c478bd9Sstevel@tonic-gate 		hca->hca_pinfosz = size;
7407c478bd9Sstevel@tonic-gate 		pinfop = NULL;
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
7437c478bd9Sstevel@tonic-gate 		/*
7447c478bd9Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
7457c478bd9Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
7467c478bd9Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
7477c478bd9Sstevel@tonic-gate 		 * cq's will be needed.
7487c478bd9Sstevel@tonic-gate 		 */
7497c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
7507c478bd9Sstevel@tonic-gate 		    &hca->svc_rcq, ribstat);
7517c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7527c478bd9Sstevel@tonic-gate 			goto fail3;
7537c478bd9Sstevel@tonic-gate 		}
7547c478bd9Sstevel@tonic-gate 
7557c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
7567c478bd9Sstevel@tonic-gate 		    &hca->svc_scq, ribstat);
7577c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7587c478bd9Sstevel@tonic-gate 			goto fail3;
7597c478bd9Sstevel@tonic-gate 		}
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
7627c478bd9Sstevel@tonic-gate 		    &hca->clnt_rcq, ribstat);
7637c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7647c478bd9Sstevel@tonic-gate 			goto fail3;
7657c478bd9Sstevel@tonic-gate 		}
7667c478bd9Sstevel@tonic-gate 
7677c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
7687c478bd9Sstevel@tonic-gate 		    &hca->clnt_scq, ribstat);
7697c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7707c478bd9Sstevel@tonic-gate 			goto fail3;
7717c478bd9Sstevel@tonic-gate 		}
7727c478bd9Sstevel@tonic-gate 
7737c478bd9Sstevel@tonic-gate 		/*
7747c478bd9Sstevel@tonic-gate 		 * Create buffer pools.
7757c478bd9Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
7767c478bd9Sstevel@tonic-gate 		 */
7777c478bd9Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
7787c478bd9Sstevel@tonic-gate 		    RECV_BUFFER, MAX_BUFS);
7797c478bd9Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
7807c478bd9Sstevel@tonic-gate 			goto fail3;
7817c478bd9Sstevel@tonic-gate 		}
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
7847c478bd9Sstevel@tonic-gate 		    SEND_BUFFER, MAX_BUFS);
7857c478bd9Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
7867c478bd9Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
7877c478bd9Sstevel@tonic-gate 			goto fail3;
7887c478bd9Sstevel@tonic-gate 		}
7897c478bd9Sstevel@tonic-gate 
790*0a701b1eSRobert Gordon 		if (hca->server_side_cache == NULL) {
791*0a701b1eSRobert Gordon 			(void) sprintf(rssc_name,
792*0a701b1eSRobert Gordon 			    "rib_server_side_cache_%04d", i);
793*0a701b1eSRobert Gordon 			hca->server_side_cache = kmem_cache_create(
794*0a701b1eSRobert Gordon 			    rssc_name,
795*0a701b1eSRobert Gordon 			    sizeof (cache_avl_struct_t), 0,
796*0a701b1eSRobert Gordon 			    NULL,
797*0a701b1eSRobert Gordon 			    NULL,
798*0a701b1eSRobert Gordon 			    rib_server_side_cache_reclaim,
799*0a701b1eSRobert Gordon 			    hca, NULL, 0);
800*0a701b1eSRobert Gordon 		}
801*0a701b1eSRobert Gordon 
802*0a701b1eSRobert Gordon 		avl_create(&hca->avl_tree,
803*0a701b1eSRobert Gordon 		    avl_compare,
804*0a701b1eSRobert Gordon 		    sizeof (cache_avl_struct_t),
805*0a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node.avl_link-
806*0a701b1eSRobert Gordon 		    (uint_t)(uintptr_t)&example_avl_node);
807*0a701b1eSRobert Gordon 
808*0a701b1eSRobert Gordon 		rw_init(&hca->avl_rw_lock,
809*0a701b1eSRobert Gordon 		    NULL, RW_DRIVER, hca->iblock);
810*0a701b1eSRobert Gordon 		mutex_init(&hca->cache_allocation,
811*0a701b1eSRobert Gordon 		    NULL, MUTEX_DRIVER, NULL);
812*0a701b1eSRobert Gordon 		hca->avl_init = TRUE;
813*0a701b1eSRobert Gordon 
814*0a701b1eSRobert Gordon 		/* Create kstats for the cache */
815*0a701b1eSRobert Gordon 		ASSERT(INGLOBALZONE(curproc));
816*0a701b1eSRobert Gordon 
817*0a701b1eSRobert Gordon 		if (!stats_enabled) {
818*0a701b1eSRobert Gordon 			ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc",
819*0a701b1eSRobert Gordon 			    KSTAT_TYPE_NAMED,
820*0a701b1eSRobert Gordon 			    sizeof (rpcib_kstat) / sizeof (kstat_named_t),
821*0a701b1eSRobert Gordon 			    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
822*0a701b1eSRobert Gordon 			    GLOBAL_ZONEID);
823*0a701b1eSRobert Gordon 			if (ksp) {
824*0a701b1eSRobert Gordon 				ksp->ks_data = (void *) &rpcib_kstat;
825*0a701b1eSRobert Gordon 				ksp->ks_update = rpcib_cache_kstat_update;
826*0a701b1eSRobert Gordon 				kstat_install(ksp);
827*0a701b1eSRobert Gordon 				stats_enabled = TRUE;
828*0a701b1eSRobert Gordon 			}
829*0a701b1eSRobert Gordon 		}
830*0a701b1eSRobert Gordon 		if (NULL == hca->reg_cache_clean_up) {
831*0a701b1eSRobert Gordon 			hca->reg_cache_clean_up = ddi_taskq_create(NULL,
832*0a701b1eSRobert Gordon 			    "REG_CACHE_CLEANUP", 1, TASKQ_DEFAULTPRI, 0);
833*0a701b1eSRobert Gordon 		}
834*0a701b1eSRobert Gordon 
8357c478bd9Sstevel@tonic-gate 		/*
8367c478bd9Sstevel@tonic-gate 		 * Initialize the registered service list and
8377c478bd9Sstevel@tonic-gate 		 * the lock
8387c478bd9Sstevel@tonic-gate 		 */
8397c478bd9Sstevel@tonic-gate 		hca->service_list = NULL;
8407c478bd9Sstevel@tonic-gate 		rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock);
8417c478bd9Sstevel@tonic-gate 
8427c478bd9Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
8437c478bd9Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
8447c478bd9Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
8457c478bd9Sstevel@tonic-gate 		    hca->iblock);
8467c478bd9Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
8477c478bd9Sstevel@tonic-gate 		    hca->iblock);
8487c478bd9Sstevel@tonic-gate 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
8497c478bd9Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
8507c478bd9Sstevel@tonic-gate 		hca->inuse = TRUE;
8517c478bd9Sstevel@tonic-gate 		/*
8527c478bd9Sstevel@tonic-gate 		 * XXX One hca only. Add multi-hca functionality if needed
8537c478bd9Sstevel@tonic-gate 		 * later.
8547c478bd9Sstevel@tonic-gate 		 */
8557c478bd9Sstevel@tonic-gate 		ribstat->hca = hca;
8567c478bd9Sstevel@tonic-gate 		ribstat->nhca_inited++;
8577c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
8587c478bd9Sstevel@tonic-gate 		break;
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate fail3:
8617c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
8627c478bd9Sstevel@tonic-gate fail2:
8637c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
8647c478bd9Sstevel@tonic-gate fail1:
8657c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate 	}
8687c478bd9Sstevel@tonic-gate 	if (ribstat->hca != NULL)
8697c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
8707c478bd9Sstevel@tonic-gate 	else
8717c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
8727c478bd9Sstevel@tonic-gate }
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate /*
8757c478bd9Sstevel@tonic-gate  * Callback routines
8767c478bd9Sstevel@tonic-gate  */
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate /*
8797c478bd9Sstevel@tonic-gate  * SCQ handlers
8807c478bd9Sstevel@tonic-gate  */
8817c478bd9Sstevel@tonic-gate /* ARGSUSED */
8827c478bd9Sstevel@tonic-gate static void
8837c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
8847c478bd9Sstevel@tonic-gate {
8857c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
8867c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
8877c478bd9Sstevel@tonic-gate 	int		i;
8887c478bd9Sstevel@tonic-gate 
8897c478bd9Sstevel@tonic-gate 	/*
8907c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
8917c478bd9Sstevel@tonic-gate 	 * completion queue notification.
8927c478bd9Sstevel@tonic-gate 	 */
8937c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
8947c478bd9Sstevel@tonic-gate 
8957c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
8967c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
8977c478bd9Sstevel@tonic-gate 	bzero(&wc, sizeof (wc));
8987c478bd9Sstevel@tonic-gate 	ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
8997c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS)
9007c478bd9Sstevel@tonic-gate 		return;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	/*
9037c478bd9Sstevel@tonic-gate 	 * Got a send completion
9047c478bd9Sstevel@tonic-gate 	 */
9057c478bd9Sstevel@tonic-gate 	if (wc.wc_id != NULL) {	/* XXX can it be otherwise ???? */
90611606941Sjwahlig 		struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
9077c478bd9Sstevel@tonic-gate 		CONN	*conn = qptoc(wd->qp);
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 		mutex_enter(&wd->sendwait_lock);
9107c478bd9Sstevel@tonic-gate 		switch (wc.wc_status) {
9117c478bd9Sstevel@tonic-gate 		case IBT_WC_SUCCESS:
9127c478bd9Sstevel@tonic-gate 			wd->status = RDMA_SUCCESS;
9137c478bd9Sstevel@tonic-gate 			break;
9147c478bd9Sstevel@tonic-gate 		case IBT_WC_WR_FLUSHED_ERR:
9157c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
9167c478bd9Sstevel@tonic-gate 			break;
9177c478bd9Sstevel@tonic-gate 		default:
9187c478bd9Sstevel@tonic-gate /*
9197c478bd9Sstevel@tonic-gate  *    RC Send Q Error Code		Local state     Remote State
9207c478bd9Sstevel@tonic-gate  *    ==================== 		===========     ============
9217c478bd9Sstevel@tonic-gate  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
9227c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
9237c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
9247c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
9257c478bd9Sstevel@tonic-gate  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
9267c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
9277c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
9287c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
9297c478bd9Sstevel@tonic-gate  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
9307c478bd9Sstevel@tonic-gate  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
9317c478bd9Sstevel@tonic-gate  *    IBT_WC_WR_FLUSHED_ERR               None            None
9327c478bd9Sstevel@tonic-gate  */
9337c478bd9Sstevel@tonic-gate 			/*
9347c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection to
9357c478bd9Sstevel@tonic-gate 			 * ERROR and cleanup will happen either from
9367c478bd9Sstevel@tonic-gate 			 * conn_release  or from rib_conn_get
9377c478bd9Sstevel@tonic-gate 			 */
9387c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
9397c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
9407c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
941*0a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
9427c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
9437c478bd9Sstevel@tonic-gate 			break;
9447c478bd9Sstevel@tonic-gate 		}
945*0a701b1eSRobert Gordon 
9467c478bd9Sstevel@tonic-gate 		if (wd->cv_sig == 1) {
9477c478bd9Sstevel@tonic-gate 			/*
9487c478bd9Sstevel@tonic-gate 			 * Notify poster
9497c478bd9Sstevel@tonic-gate 			 */
9507c478bd9Sstevel@tonic-gate 			cv_signal(&wd->wait_cv);
9517c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
9527c478bd9Sstevel@tonic-gate 		} else {
9537c478bd9Sstevel@tonic-gate 			/*
9547c478bd9Sstevel@tonic-gate 			 * Poster not waiting for notification.
9557c478bd9Sstevel@tonic-gate 			 * Free the send buffers and send_wid
9567c478bd9Sstevel@tonic-gate 			 */
9577c478bd9Sstevel@tonic-gate 			for (i = 0; i < wd->nsbufs; i++) {
9587c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
95911606941Sjwahlig 				    (void *)(uintptr_t)wd->sbufaddr[i]);
9607c478bd9Sstevel@tonic-gate 				}
9617c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
9627c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wd);
9637c478bd9Sstevel@tonic-gate 			}
9647c478bd9Sstevel@tonic-gate 		}
9657c478bd9Sstevel@tonic-gate 	}
9667c478bd9Sstevel@tonic-gate }
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate /* ARGSUSED */
9697c478bd9Sstevel@tonic-gate static void
9707c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
9717c478bd9Sstevel@tonic-gate {
9727c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
9737c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
9747c478bd9Sstevel@tonic-gate 	int		i;
9757c478bd9Sstevel@tonic-gate 
9767c478bd9Sstevel@tonic-gate 	/*
9777c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
9787c478bd9Sstevel@tonic-gate 	 * completion queue notification.
9797c478bd9Sstevel@tonic-gate 	 */
9807c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
9837c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
9847c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
9857c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
9867c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
9877c478bd9Sstevel@tonic-gate 			return;
9887c478bd9Sstevel@tonic-gate 
9897c478bd9Sstevel@tonic-gate 		/*
9907c478bd9Sstevel@tonic-gate 		 * Got a send completion
9917c478bd9Sstevel@tonic-gate 		 */
9927c478bd9Sstevel@tonic-gate 		if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
993*0a701b1eSRobert Gordon 			struct send_wid *wd =
994*0a701b1eSRobert Gordon 			    (struct send_wid *)(uintptr_t)wc.wc_id;
9957c478bd9Sstevel@tonic-gate 			mutex_enter(&wd->sendwait_lock);
9967c478bd9Sstevel@tonic-gate 			if (wd->cv_sig == 1) {
9977c478bd9Sstevel@tonic-gate 				/*
9987c478bd9Sstevel@tonic-gate 				 * Update completion status and notify poster
9997c478bd9Sstevel@tonic-gate 				 */
10007c478bd9Sstevel@tonic-gate 				if (wc.wc_status == IBT_WC_SUCCESS)
10017c478bd9Sstevel@tonic-gate 					wd->status = RDMA_SUCCESS;
10027c478bd9Sstevel@tonic-gate 				else
10037c478bd9Sstevel@tonic-gate 					wd->status = RDMA_FAILED;
10047c478bd9Sstevel@tonic-gate 				cv_signal(&wd->wait_cv);
10057c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10067c478bd9Sstevel@tonic-gate 			} else {
10077c478bd9Sstevel@tonic-gate 				/*
10087c478bd9Sstevel@tonic-gate 				 * Poster not waiting for notification.
10097c478bd9Sstevel@tonic-gate 				 * Free the send buffers and send_wid
10107c478bd9Sstevel@tonic-gate 				 */
10117c478bd9Sstevel@tonic-gate 				for (i = 0; i < wd->nsbufs; i++) {
1012*0a701b1eSRobert Gordon 					rib_rbuf_free(qptoc(wd->qp),
1013*0a701b1eSRobert Gordon 					    SEND_BUFFER,
101411606941Sjwahlig 					    (void *)(uintptr_t)wd->sbufaddr[i]);
10157c478bd9Sstevel@tonic-gate 				}
10167c478bd9Sstevel@tonic-gate 				mutex_exit(&wd->sendwait_lock);
10177c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wd);
10187c478bd9Sstevel@tonic-gate 			}
10197c478bd9Sstevel@tonic-gate 		}
10207c478bd9Sstevel@tonic-gate 	}
10217c478bd9Sstevel@tonic-gate }
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate /*
10247c478bd9Sstevel@tonic-gate  * RCQ handler
10257c478bd9Sstevel@tonic-gate  */
10267c478bd9Sstevel@tonic-gate /* ARGSUSED */
10277c478bd9Sstevel@tonic-gate static void
10287c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10297c478bd9Sstevel@tonic-gate {
10307c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
10317c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10327c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
10337c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	/*
10367c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10377c478bd9Sstevel@tonic-gate 	 * completion queue notification.
10387c478bd9Sstevel@tonic-gate 	 */
10397c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
10427c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
10437c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
10447c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
10457c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
10467c478bd9Sstevel@tonic-gate 			return;
10477c478bd9Sstevel@tonic-gate 
104811606941Sjwahlig 		rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
10497c478bd9Sstevel@tonic-gate 		qp = rwid->qp;
10507c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
10517c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
10527c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op, find_xid = 0;
10537c478bd9Sstevel@tonic-gate 			struct reply	*r;
10547c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
1055*0a701b1eSRobert Gordon 			uint32_t rdma_credit = 0;
10567c478bd9Sstevel@tonic-gate 
10577c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
105811606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
10597c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
10607c478bd9Sstevel@tonic-gate 			/*
10617c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
10627c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
10637c478bd9Sstevel@tonic-gate 			 */
106411606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)rwid->addr;
1065*0a701b1eSRobert Gordon 
10667c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
10677c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
10687c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &vers);
1069*0a701b1eSRobert Gordon 			(void) xdr_u_int(xdrs, &rdma_credit);
10707c478bd9Sstevel@tonic-gate 			(void) xdr_u_int(xdrs, &op);
10717c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
1072*0a701b1eSRobert Gordon 
10737c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
10747c478bd9Sstevel@tonic-gate 				/*
1075*0a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version. Cannot
1076*0a701b1eSRobert Gordon 				 * interoperate.  Set connection to
1077*0a701b1eSRobert Gordon 				 * ERROR state and bail out.
10787c478bd9Sstevel@tonic-gate 				 */
10797c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
10807c478bd9Sstevel@tonic-gate 				if (conn->c_state != C_DISCONN_PEND)
1081*0a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
10827c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
108311606941Sjwahlig 				rib_rbuf_free(conn, RECV_BUFFER,
108411606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
10857c478bd9Sstevel@tonic-gate 				rib_free_wid(rwid);
10867c478bd9Sstevel@tonic-gate 				continue;
10877c478bd9Sstevel@tonic-gate 			}
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 			mutex_enter(&qp->replylist_lock);
10907c478bd9Sstevel@tonic-gate 			for (r = qp->replylist; r != NULL; r = r->next) {
10917c478bd9Sstevel@tonic-gate 				if (r->xid == xid) {
10927c478bd9Sstevel@tonic-gate 					find_xid = 1;
10937c478bd9Sstevel@tonic-gate 					switch (op) {
10947c478bd9Sstevel@tonic-gate 					case RDMA_MSG:
10957c478bd9Sstevel@tonic-gate 					case RDMA_NOMSG:
10967c478bd9Sstevel@tonic-gate 					case RDMA_MSGP:
10977c478bd9Sstevel@tonic-gate 						r->status = RDMA_SUCCESS;
10987c478bd9Sstevel@tonic-gate 						r->vaddr_cq = rwid->addr;
1099*0a701b1eSRobert Gordon 						r->bytes_xfer =
1100*0a701b1eSRobert Gordon 						    wc.wc_bytes_xfer;
11017c478bd9Sstevel@tonic-gate 						cv_signal(&r->wait_cv);
11027c478bd9Sstevel@tonic-gate 						break;
11037c478bd9Sstevel@tonic-gate 					default:
1104*0a701b1eSRobert Gordon 						rib_rbuf_free(qptoc(qp),
1105*0a701b1eSRobert Gordon 						    RECV_BUFFER,
1106*0a701b1eSRobert Gordon 						    (void *)(uintptr_t)
1107*0a701b1eSRobert Gordon 						    rwid->addr);
11087c478bd9Sstevel@tonic-gate 						break;
11097c478bd9Sstevel@tonic-gate 					}
11107c478bd9Sstevel@tonic-gate 					break;
11117c478bd9Sstevel@tonic-gate 				}
11127c478bd9Sstevel@tonic-gate 			}
11137c478bd9Sstevel@tonic-gate 			mutex_exit(&qp->replylist_lock);
11147c478bd9Sstevel@tonic-gate 			if (find_xid == 0) {
11157c478bd9Sstevel@tonic-gate 				/* RPC caller not waiting for reply */
1116*0a701b1eSRobert Gordon 
1117*0a701b1eSRobert Gordon 				DTRACE_PROBE1(rpcib__i__nomatchxid1,
1118*0a701b1eSRobert Gordon 				    int, xid);
1119*0a701b1eSRobert Gordon 
11207c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
112111606941Sjwahlig 				    (void *)(uintptr_t)rwid->addr);
11227c478bd9Sstevel@tonic-gate 			}
11237c478bd9Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
11247c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
11257c478bd9Sstevel@tonic-gate 
11267c478bd9Sstevel@tonic-gate 			/*
11277c478bd9Sstevel@tonic-gate 			 * Connection being flushed. Just free
11287c478bd9Sstevel@tonic-gate 			 * the posted buffer
11297c478bd9Sstevel@tonic-gate 			 */
113011606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
113111606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
11327c478bd9Sstevel@tonic-gate 		} else {
11337c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
11347c478bd9Sstevel@tonic-gate /*
11357c478bd9Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
11367c478bd9Sstevel@tonic-gate  *  ====================		===========     ============
11377c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
11387c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
11397c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
11407c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
11417c478bd9Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
11427c478bd9Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
11437c478bd9Sstevel@tonic-gate  */
11447c478bd9Sstevel@tonic-gate 			/*
11457c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection
11467c478bd9Sstevel@tonic-gate 			 * in ERROR state.
11477c478bd9Sstevel@tonic-gate 			 */
11487c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
11497c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
1150*0a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
11517c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
115211606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
115311606941Sjwahlig 			    (void *)(uintptr_t)rwid->addr);
11547c478bd9Sstevel@tonic-gate 		}
11557c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
11567c478bd9Sstevel@tonic-gate 	}
11577c478bd9Sstevel@tonic-gate }
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate /* Server side */
11607c478bd9Sstevel@tonic-gate /* ARGSUSED */
11617c478bd9Sstevel@tonic-gate static void
11627c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
11637c478bd9Sstevel@tonic-gate {
1164*0a701b1eSRobert Gordon 	rdma_recv_data_t *rdp;
11657c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
11667c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
11677c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
11687c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
11697c478bd9Sstevel@tonic-gate 	CONN		*conn;
11707c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
11717c478bd9Sstevel@tonic-gate 
11727c478bd9Sstevel@tonic-gate 	/*
11737c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
11747c478bd9Sstevel@tonic-gate 	 * completion queue notification.
11757c478bd9Sstevel@tonic-gate 	 */
11767c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11777c478bd9Sstevel@tonic-gate 
11787c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
11797c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
11807c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
11817c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11827c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
11837c478bd9Sstevel@tonic-gate 			return;
11847c478bd9Sstevel@tonic-gate 
118511606941Sjwahlig 		s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
11867c478bd9Sstevel@tonic-gate 		qp = s_recvp->qp;
11877c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
11887c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
11897c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs--;
1190*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH)
1191*0a701b1eSRobert Gordon 		rib_posted_rbufs(preposted_rbufs -  qp->n_posted_rbufs);
1192*0a701b1eSRobert Gordon #endif
11937c478bd9Sstevel@tonic-gate 		if (qp->n_posted_rbufs == 0)
11947c478bd9Sstevel@tonic-gate 			cv_signal(&qp->posted_rbufs_cv);
11957c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
11987c478bd9Sstevel@tonic-gate 			XDR	inxdrs, *xdrs;
11997c478bd9Sstevel@tonic-gate 			uint_t	xid, vers, op;
1200*0a701b1eSRobert Gordon 			uint32_t rdma_credit;
12017c478bd9Sstevel@tonic-gate 
12027c478bd9Sstevel@tonic-gate 			xdrs = &inxdrs;
12037c478bd9Sstevel@tonic-gate 			/* s_recvp->vaddr stores data */
120411606941Sjwahlig 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
12057c478bd9Sstevel@tonic-gate 			    wc.wc_bytes_xfer, XDR_DECODE);
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate 			/*
12087c478bd9Sstevel@tonic-gate 			 * Treat xid as opaque (xid is the first entity
12097c478bd9Sstevel@tonic-gate 			 * in the rpc rdma message).
12107c478bd9Sstevel@tonic-gate 			 */
121111606941Sjwahlig 			xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
12127c478bd9Sstevel@tonic-gate 			/* Skip xid and set the xdr position accordingly. */
12137c478bd9Sstevel@tonic-gate 			XDR_SETPOS(xdrs, sizeof (uint32_t));
12147c478bd9Sstevel@tonic-gate 			if (!xdr_u_int(xdrs, &vers) ||
1215*0a701b1eSRobert Gordon 			    !xdr_u_int(xdrs, &rdma_credit) ||
12167c478bd9Sstevel@tonic-gate 			    !xdr_u_int(xdrs, &op)) {
12177c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
121811606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12197c478bd9Sstevel@tonic-gate 				XDR_DESTROY(xdrs);
12207c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12217c478bd9Sstevel@tonic-gate 				continue;
12227c478bd9Sstevel@tonic-gate 			}
12237c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
12247c478bd9Sstevel@tonic-gate 
12257c478bd9Sstevel@tonic-gate 			if (vers != RPCRDMA_VERS) {
12267c478bd9Sstevel@tonic-gate 				/*
1227*0a701b1eSRobert Gordon 				 * Invalid RPC/RDMA version.
1228*0a701b1eSRobert Gordon 				 * Drop rpc rdma message.
12297c478bd9Sstevel@tonic-gate 				 */
12307c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
123111606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12327c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12337c478bd9Sstevel@tonic-gate 				continue;
12347c478bd9Sstevel@tonic-gate 			}
12357c478bd9Sstevel@tonic-gate 			/*
12367c478bd9Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
12377c478bd9Sstevel@tonic-gate 			 */
12387c478bd9Sstevel@tonic-gate 			if (op == RDMA_DONE) {
12397c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
124011606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12417c478bd9Sstevel@tonic-gate 				/*
12427c478bd9Sstevel@tonic-gate 				 * Wake up the thread waiting on
12437c478bd9Sstevel@tonic-gate 				 * a RDMA_DONE for xid
12447c478bd9Sstevel@tonic-gate 				 */
12457c478bd9Sstevel@tonic-gate 				mutex_enter(&qp->rdlist_lock);
12467c478bd9Sstevel@tonic-gate 				rdma_done_notify(qp, xid);
12477c478bd9Sstevel@tonic-gate 				mutex_exit(&qp->rdlist_lock);
12487c478bd9Sstevel@tonic-gate 				(void) rib_free_svc_recv(s_recvp);
12497c478bd9Sstevel@tonic-gate 				continue;
12507c478bd9Sstevel@tonic-gate 			}
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 			mutex_enter(&plugin_state_lock);
12537c478bd9Sstevel@tonic-gate 			if (plugin_state == ACCEPT) {
1254*0a701b1eSRobert Gordon 				while ((mp = allocb(sizeof (*rdp), BPRI_LO))
1255*0a701b1eSRobert Gordon 				    == NULL)
1256*0a701b1eSRobert Gordon 					(void) strwaitbuf(
1257*0a701b1eSRobert Gordon 					    sizeof (*rdp), BPRI_LO);
12587c478bd9Sstevel@tonic-gate 				/*
12597c478bd9Sstevel@tonic-gate 				 * Plugin is in accept state, hence the master
12607c478bd9Sstevel@tonic-gate 				 * transport queue for this is still accepting
12617c478bd9Sstevel@tonic-gate 				 * requests. Hence we can call svc_queuereq to
12627c478bd9Sstevel@tonic-gate 				 * queue this recieved msg.
12637c478bd9Sstevel@tonic-gate 				 */
1264*0a701b1eSRobert Gordon 				rdp = (rdma_recv_data_t *)mp->b_rptr;
1265*0a701b1eSRobert Gordon 				rdp->conn = conn;
1266*0a701b1eSRobert Gordon 				rdp->rpcmsg.addr =
1267*0a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)s_recvp->vaddr;
1268*0a701b1eSRobert Gordon 				rdp->rpcmsg.type = RECV_BUFFER;
1269*0a701b1eSRobert Gordon 				rdp->rpcmsg.len = wc.wc_bytes_xfer;
1270*0a701b1eSRobert Gordon 				rdp->status = wc.wc_status;
12717c478bd9Sstevel@tonic-gate 				mutex_enter(&conn->c_lock);
12727c478bd9Sstevel@tonic-gate 				conn->c_ref++;
12737c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
1274*0a701b1eSRobert Gordon 				mp->b_wptr += sizeof (*rdp);
12757c478bd9Sstevel@tonic-gate 				svc_queuereq((queue_t *)rib_stat->q, mp);
12767c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
12777c478bd9Sstevel@tonic-gate 			} else {
12787c478bd9Sstevel@tonic-gate 				/*
12797c478bd9Sstevel@tonic-gate 				 * The master transport for this is going
12807c478bd9Sstevel@tonic-gate 				 * away and the queue is not accepting anymore
12817c478bd9Sstevel@tonic-gate 				 * requests for krpc, so don't do anything, just
12827c478bd9Sstevel@tonic-gate 				 * free the msg.
12837c478bd9Sstevel@tonic-gate 				 */
12847c478bd9Sstevel@tonic-gate 				mutex_exit(&plugin_state_lock);
12857c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
128611606941Sjwahlig 				    (void *)(uintptr_t)s_recvp->vaddr);
12877c478bd9Sstevel@tonic-gate 			}
12887c478bd9Sstevel@tonic-gate 		} else {
12897c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
129011606941Sjwahlig 			    (void *)(uintptr_t)s_recvp->vaddr);
12917c478bd9Sstevel@tonic-gate 		}
12927c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
12937c478bd9Sstevel@tonic-gate 	}
12947c478bd9Sstevel@tonic-gate }
12957c478bd9Sstevel@tonic-gate 
12967c478bd9Sstevel@tonic-gate /*
12977c478bd9Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
12987c478bd9Sstevel@tonic-gate  */
12997c478bd9Sstevel@tonic-gate /* ARGSUSED */
13007c478bd9Sstevel@tonic-gate static void
13017c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
13027c478bd9Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
13037c478bd9Sstevel@tonic-gate {
13047c478bd9Sstevel@tonic-gate 
13057c478bd9Sstevel@tonic-gate 	switch (code) {
13067c478bd9Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
13077c478bd9Sstevel@tonic-gate 		/* ignore */
13087c478bd9Sstevel@tonic-gate 		break;
13097c478bd9Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
13107c478bd9Sstevel@tonic-gate 	{
13117c478bd9Sstevel@tonic-gate 		ASSERT(rib_stat->hca->hca_hdl == hca_hdl);
13127c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
13137c478bd9Sstevel@tonic-gate #ifdef DEBUG
13147c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
13157c478bd9Sstevel@tonic-gate #endif
13167c478bd9Sstevel@tonic-gate 		break;
13177c478bd9Sstevel@tonic-gate 	}
13187c478bd9Sstevel@tonic-gate #ifdef DEBUG
13197c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
1320*0a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
1321*0a701b1eSRobert Gordon 		    "IBT_EVENT_PATH_MIGRATED\n");
13227c478bd9Sstevel@tonic-gate 		break;
13237c478bd9Sstevel@tonic-gate 	case IBT_EVENT_SQD:
13247c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
13257c478bd9Sstevel@tonic-gate 		break;
13267c478bd9Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
13277c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
13287c478bd9Sstevel@tonic-gate 		break;
13297c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
1330*0a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
1331*0a701b1eSRobert Gordon 		    "IBT_ERROR_CATASTROPHIC_CHAN\n");
13327c478bd9Sstevel@tonic-gate 		break;
13337c478bd9Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
13347c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
13357c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_INVALID_REQUEST_CHAN\n");
13367c478bd9Sstevel@tonic-gate 		break;
13377c478bd9Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
13387c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): "
13397c478bd9Sstevel@tonic-gate 		    "IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
13407c478bd9Sstevel@tonic-gate 		break;
13417c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
1342*0a701b1eSRobert Gordon 		cmn_err(CE_NOTE, "rib_async_handler(): "
1343*0a701b1eSRobert Gordon 		    "IBT_ERROR_PATH_MIGRATE_REQ\n");
13447c478bd9Sstevel@tonic-gate 		break;
13457c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CQ:
13467c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
13477c478bd9Sstevel@tonic-gate 		break;
13487c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
13497c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
13507c478bd9Sstevel@tonic-gate 		break;
13517c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PORT_UP:
13527c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
13537c478bd9Sstevel@tonic-gate 		break;
13547c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
13557c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
13567c478bd9Sstevel@tonic-gate 		break;
13577c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
13587c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
13597c478bd9Sstevel@tonic-gate 		break;
13607c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
13617c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
13627c478bd9Sstevel@tonic-gate 		break;
13637c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
13647c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
13657c478bd9Sstevel@tonic-gate 		break;
13667c478bd9Sstevel@tonic-gate #endif
13677c478bd9Sstevel@tonic-gate 	default:
13687c478bd9Sstevel@tonic-gate 		break;
13697c478bd9Sstevel@tonic-gate 	}
13707c478bd9Sstevel@tonic-gate }
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate /*
13737c478bd9Sstevel@tonic-gate  * Client's reachable function.
13747c478bd9Sstevel@tonic-gate  */
13757c478bd9Sstevel@tonic-gate static rdma_stat
13767c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
13777c478bd9Sstevel@tonic-gate {
13787c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
13797c478bd9Sstevel@tonic-gate 	rdma_stat	status;
13807c478bd9Sstevel@tonic-gate 
13817c478bd9Sstevel@tonic-gate 	/*
13827c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
13837c478bd9Sstevel@tonic-gate 	 */
13847c478bd9Sstevel@tonic-gate 	*handle = NULL;
13857c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
13867c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
13877c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
13887c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
13897c478bd9Sstevel@tonic-gate 	}
13907c478bd9Sstevel@tonic-gate 	status = rib_ping_srv(addr_type, raddr, &hca);
13917c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
13947c478bd9Sstevel@tonic-gate 		*handle = (void *)hca;
13957c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
13967c478bd9Sstevel@tonic-gate 	} else {
13977c478bd9Sstevel@tonic-gate 		*handle = NULL;
1398*0a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__pingfailed);
13997c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
14007c478bd9Sstevel@tonic-gate 	}
14017c478bd9Sstevel@tonic-gate }
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate /* Client side qp creation */
14047c478bd9Sstevel@tonic-gate static rdma_stat
14057c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
14067c478bd9Sstevel@tonic-gate {
14077c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
14087c478bd9Sstevel@tonic-gate 	CONN		*conn;
1409*0a701b1eSRobert Gordon 	rdma_clnt_cred_ctrl_t *cc_info;
14107c478bd9Sstevel@tonic-gate 
14117c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
14127c478bd9Sstevel@tonic-gate 	*qp = NULL;
14137c478bd9Sstevel@tonic-gate 
14147c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
14157c478bd9Sstevel@tonic-gate 	conn = qptoc(kqp);
14167c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
14177c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
14187c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
14217c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
14227c478bd9Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
14237c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
14247c478bd9Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
14257c478bd9Sstevel@tonic-gate 	/*
14267c478bd9Sstevel@tonic-gate 	 * Initialize
14277c478bd9Sstevel@tonic-gate 	 */
14287c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
14297c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
14307c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
14317c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
14327c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
14337c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
14347c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
14357c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
1436*0a701b1eSRobert Gordon 	/*
1437*0a701b1eSRobert Gordon 	 * Initialize the client credit control
1438*0a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
1439*0a701b1eSRobert Gordon 	 */
1440*0a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
1441*0a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
1442*0a701b1eSRobert Gordon 	cc_info->clnt_cc_granted_ops = 0;
1443*0a701b1eSRobert Gordon 	cc_info->clnt_cc_in_flight_ops = 0;
1444*0a701b1eSRobert Gordon 	cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
14457c478bd9Sstevel@tonic-gate 
14467c478bd9Sstevel@tonic-gate 	*qp = kqp;
14477c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
14487c478bd9Sstevel@tonic-gate }
14497c478bd9Sstevel@tonic-gate 
14507c478bd9Sstevel@tonic-gate /* Server side qp creation */
14517c478bd9Sstevel@tonic-gate static rdma_stat
14527c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
14537c478bd9Sstevel@tonic-gate {
14547c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
14557c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
14567c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
14577c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1458*0a701b1eSRobert Gordon 	rdma_srv_cred_ctrl_t *cc_info;
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate 	*qp = NULL;
14617c478bd9Sstevel@tonic-gate 
14627c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
14637c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
14647c478bd9Sstevel@tonic-gate 	kqp->port_num = port;
14657c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
14667c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
14677c478bd9Sstevel@tonic-gate 
14687c478bd9Sstevel@tonic-gate 	/*
14697c478bd9Sstevel@tonic-gate 	 * Create the qp handle
14707c478bd9Sstevel@tonic-gate 	 */
14717c478bd9Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
14727c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
14737c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
14747c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
14757c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
14767c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
14777c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
14787c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
14797c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
14807c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
14817c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
14827c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
14857c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
14867c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
14877c478bd9Sstevel@tonic-gate 		    IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
14887c478bd9Sstevel@tonic-gate 		    &chan_sizes);
14897c478bd9Sstevel@tonic-gate 	} else {
14907c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
14917c478bd9Sstevel@tonic-gate 		goto fail;
14927c478bd9Sstevel@tonic-gate 	}
14937c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1496*0a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_svccreatechanfail,
1497*0a701b1eSRobert Gordon 		    int, ibt_status);
14987c478bd9Sstevel@tonic-gate 		goto fail;
14997c478bd9Sstevel@tonic-gate 	}
15007c478bd9Sstevel@tonic-gate 
15017c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
15027c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
15037c478bd9Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
15067c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
15077c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15087c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
15097c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15107c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
15117c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
15127c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
15137c478bd9Sstevel@tonic-gate 	/*
15147c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
15157c478bd9Sstevel@tonic-gate 	 */
15167c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
15177c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
1518*0a701b1eSRobert Gordon 
1519*0a701b1eSRobert Gordon 	/*
1520*0a701b1eSRobert Gordon 	 * Initialize the server credit control
1521*0a701b1eSRobert Gordon 	 * portion of the rdmaconn struct.
1522*0a701b1eSRobert Gordon 	 */
1523*0a701b1eSRobert Gordon 	kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
1524*0a701b1eSRobert Gordon 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
1525*0a701b1eSRobert Gordon 	cc_info->srv_cc_buffers_granted = preposted_rbufs;
1526*0a701b1eSRobert Gordon 	cc_info->srv_cc_cur_buffers_used = 0;
1527*0a701b1eSRobert Gordon 	cc_info->srv_cc_posted = preposted_rbufs;
1528*0a701b1eSRobert Gordon 
15297c478bd9Sstevel@tonic-gate 	*qp = kqp;
1530*0a701b1eSRobert Gordon 
15317c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
15327c478bd9Sstevel@tonic-gate fail:
15337c478bd9Sstevel@tonic-gate 	if (kqp)
15347c478bd9Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
15377c478bd9Sstevel@tonic-gate }
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate /* ARGSUSED */
15407c478bd9Sstevel@tonic-gate ibt_cm_status_t
15417c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
15427c478bd9Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
15437c478bd9Sstevel@tonic-gate     ibt_priv_data_len_t len)
15447c478bd9Sstevel@tonic-gate {
15457c478bd9Sstevel@tonic-gate 	rpcib_state_t   *ribstat;
15467c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)clnt_hdl;
15497c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
15507c478bd9Sstevel@tonic-gate 
15517c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
15527c478bd9Sstevel@tonic-gate 
15537c478bd9Sstevel@tonic-gate 	/* got a connection close event */
15547c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
15557c478bd9Sstevel@tonic-gate 	{
15567c478bd9Sstevel@tonic-gate 		CONN	*conn;
15577c478bd9Sstevel@tonic-gate 		rib_qp_t *qp;
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate 		/* check reason why connection was closed */
15607c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
15617c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
15627c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
15637c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
15647c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
15657c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
15667c478bd9Sstevel@tonic-gate 			/*
15677c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
15687c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
15697c478bd9Sstevel@tonic-gate 			 */
15707c478bd9Sstevel@tonic-gate 			break;
15717c478bd9Sstevel@tonic-gate 		default:
15727c478bd9Sstevel@tonic-gate 			/*
15737c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
15747c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
15757c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
15767c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
15777c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
15787c478bd9Sstevel@tonic-gate 			 * state
15797c478bd9Sstevel@tonic-gate 			 */
15807c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
15817c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
15827c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
15837c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
15847c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
15857c478bd9Sstevel@tonic-gate 				break;
15867c478bd9Sstevel@tonic-gate 			}
15877c478bd9Sstevel@tonic-gate 
1588*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
15897c478bd9Sstevel@tonic-gate 
15907c478bd9Sstevel@tonic-gate 			/*
15917c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
15927c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
15937c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
15947c478bd9Sstevel@tonic-gate 			 */
15957c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
15967c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 			/*
15997c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
16007c478bd9Sstevel@tonic-gate 			 */
16017c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
16027c478bd9Sstevel@tonic-gate 				/*
16037c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
16047c478bd9Sstevel@tonic-gate 				 */
16057c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
16067c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
16077c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
16087c478bd9Sstevel@tonic-gate 				    &hca->cl_conn_list);
16097c478bd9Sstevel@tonic-gate 			} else {
16107c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
16117c478bd9Sstevel@tonic-gate 			}
16127c478bd9Sstevel@tonic-gate #ifdef DEBUG
16137c478bd9Sstevel@tonic-gate 			if (rib_debug)
16147c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
16157c478bd9Sstevel@tonic-gate 				    "(CONN_CLOSED) channel disconnected");
16167c478bd9Sstevel@tonic-gate #endif
16177c478bd9Sstevel@tonic-gate 			break;
16187c478bd9Sstevel@tonic-gate 		}
16197c478bd9Sstevel@tonic-gate 		break;
16207c478bd9Sstevel@tonic-gate 	}
16217c478bd9Sstevel@tonic-gate 	default:
16227c478bd9Sstevel@tonic-gate 		break;
16237c478bd9Sstevel@tonic-gate 	}
16247c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
16257c478bd9Sstevel@tonic-gate }
16267c478bd9Sstevel@tonic-gate 
1627*0a701b1eSRobert Gordon /* Check server ib address */
16287c478bd9Sstevel@tonic-gate rdma_stat
1629*0a701b1eSRobert Gordon rib_chk_srv_ibaddr(struct netbuf *raddr,
1630*0a701b1eSRobert Gordon 	int addr_type, ibt_path_info_t *path, ibt_ip_addr_t *s_ip,
1631*0a701b1eSRobert Gordon 	ibt_ip_addr_t *d_ip)
16327c478bd9Sstevel@tonic-gate {
16337c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
16347c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
16357c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1636*0a701b1eSRobert Gordon 	ibt_ip_path_attr_t	ipattr;
1637*0a701b1eSRobert Gordon 	uint8_t npaths = 0;
1638*0a701b1eSRobert Gordon 	ibt_path_ip_src_t	srcip;
16397c478bd9Sstevel@tonic-gate 
1640*0a701b1eSRobert Gordon 	ASSERT(raddr->buf != NULL);
1641*0a701b1eSRobert Gordon 
16427c478bd9Sstevel@tonic-gate 	(void) bzero(path, sizeof (ibt_path_info_t));
16437c478bd9Sstevel@tonic-gate 
16447c478bd9Sstevel@tonic-gate 	switch (addr_type) {
16457c478bd9Sstevel@tonic-gate 	case AF_INET:
16467c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
1647*0a701b1eSRobert Gordon 		d_ip->family = AF_INET;
1648*0a701b1eSRobert Gordon 		d_ip->un.ip4addr = htonl(sin4->sin_addr.s_addr);
16497c478bd9Sstevel@tonic-gate 		break;
16507c478bd9Sstevel@tonic-gate 
16517c478bd9Sstevel@tonic-gate 	case AF_INET6:
16527c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
1653*0a701b1eSRobert Gordon 		d_ip->family = AF_INET6;
1654*0a701b1eSRobert Gordon 		d_ip->un.ip6addr = sin6->sin6_addr;
16557c478bd9Sstevel@tonic-gate 		break;
16567c478bd9Sstevel@tonic-gate 
16577c478bd9Sstevel@tonic-gate 	default:
16587c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
16597c478bd9Sstevel@tonic-gate 	}
16607c478bd9Sstevel@tonic-gate 
1661*0a701b1eSRobert Gordon 	bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
1662*0a701b1eSRobert Gordon 	bzero(&srcip, sizeof (ibt_path_ip_src_t));
16637c478bd9Sstevel@tonic-gate 
1664*0a701b1eSRobert Gordon 	ipattr.ipa_dst_ip 	= d_ip;
1665*0a701b1eSRobert Gordon 	ipattr.ipa_hca_guid 	= rib_stat->hca->hca_guid;
1666*0a701b1eSRobert Gordon 	ipattr.ipa_ndst		= 1;
1667*0a701b1eSRobert Gordon 	ipattr.ipa_max_paths	= 1;
1668*0a701b1eSRobert Gordon 	npaths = 0;
16697c478bd9Sstevel@tonic-gate 
1670*0a701b1eSRobert Gordon 	ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1671*0a701b1eSRobert Gordon 	    IBT_PATH_NO_FLAGS,
1672*0a701b1eSRobert Gordon 	    &ipattr,
1673*0a701b1eSRobert Gordon 	    path,
1674*0a701b1eSRobert Gordon 	    &npaths,
1675*0a701b1eSRobert Gordon 	    &srcip);
1676*0a701b1eSRobert Gordon 
1677*0a701b1eSRobert Gordon 	if (ibt_status != IBT_SUCCESS ||
1678*0a701b1eSRobert Gordon 	    npaths < 1 ||
1679*0a701b1eSRobert Gordon 	    path->pi_hca_guid != rib_stat->hca->hca_guid) {
1680*0a701b1eSRobert Gordon 
1681*0a701b1eSRobert Gordon 		bzero(s_ip, sizeof (ibt_path_ip_src_t));
16827c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
16837c478bd9Sstevel@tonic-gate 	}
16847c478bd9Sstevel@tonic-gate 
1685*0a701b1eSRobert Gordon 	if (srcip.ip_primary.family == AF_INET) {
1686*0a701b1eSRobert Gordon 		s_ip->family = AF_INET;
1687*0a701b1eSRobert Gordon 		s_ip->un.ip4addr = htonl(srcip.ip_primary.un.ip4addr);
1688*0a701b1eSRobert Gordon 	} else {
1689*0a701b1eSRobert Gordon 		s_ip->family = AF_INET6;
1690*0a701b1eSRobert Gordon 		s_ip->un.ip6addr = srcip.ip_primary.un.ip6addr;
1691*0a701b1eSRobert Gordon 	}
1692*0a701b1eSRobert Gordon 
1693*0a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
1694*0a701b1eSRobert Gordon }
1695*0a701b1eSRobert Gordon 
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate /*
16987c478bd9Sstevel@tonic-gate  * Connect to the server.
16997c478bd9Sstevel@tonic-gate  */
17007c478bd9Sstevel@tonic-gate rdma_stat
1701*0a701b1eSRobert Gordon rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, ibt_path_info_t *path,
1702*0a701b1eSRobert Gordon 		ibt_ip_addr_t *s_ip, ibt_ip_addr_t *d_ip)
17037c478bd9Sstevel@tonic-gate {
17047c478bd9Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
17057c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
17067c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
17077c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
17087c478bd9Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
17097c478bd9Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
1710*0a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipcm_info;
1711*0a701b1eSRobert Gordon 	uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ];
1712*0a701b1eSRobert Gordon 
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
17157c478bd9Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
1716*0a701b1eSRobert Gordon 	(void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
1717*0a701b1eSRobert Gordon 
1718*0a701b1eSRobert Gordon 	switch (ipcm_info.src_addr.family = s_ip->family) {
1719*0a701b1eSRobert Gordon 	case AF_INET:
1720*0a701b1eSRobert Gordon 		ipcm_info.src_addr.un.ip4addr = s_ip->un.ip4addr;
1721*0a701b1eSRobert Gordon 		break;
1722*0a701b1eSRobert Gordon 	case AF_INET6:
1723*0a701b1eSRobert Gordon 		ipcm_info.src_addr.un.ip6addr = s_ip->un.ip6addr;
1724*0a701b1eSRobert Gordon 		break;
1725*0a701b1eSRobert Gordon 	}
1726*0a701b1eSRobert Gordon 
1727*0a701b1eSRobert Gordon 	switch (ipcm_info.dst_addr.family = d_ip->family) {
1728*0a701b1eSRobert Gordon 	case AF_INET:
1729*0a701b1eSRobert Gordon 		ipcm_info.dst_addr.un.ip4addr = d_ip->un.ip4addr;
1730*0a701b1eSRobert Gordon 		break;
1731*0a701b1eSRobert Gordon 	case AF_INET6:
1732*0a701b1eSRobert Gordon 		ipcm_info.dst_addr.un.ip6addr = d_ip->un.ip6addr;
1733*0a701b1eSRobert Gordon 		break;
1734*0a701b1eSRobert Gordon 	}
1735*0a701b1eSRobert Gordon 
1736*0a701b1eSRobert Gordon 	ipcm_info.src_port = NFS_RDMA_PORT;
1737*0a701b1eSRobert Gordon 
1738*0a701b1eSRobert Gordon 	ibt_status = ibt_format_ip_private_data(&ipcm_info,
1739*0a701b1eSRobert Gordon 	    IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt);
1740*0a701b1eSRobert Gordon 
1741*0a701b1eSRobert Gordon 	if (ibt_status != IBT_SUCCESS) {
1742*0a701b1eSRobert Gordon 		cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n");
1743*0a701b1eSRobert Gordon 		return (-1);
1744*0a701b1eSRobert Gordon 	}
17457c478bd9Sstevel@tonic-gate 
17467c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = path->pi_prim_cep_path.cep_hca_port_num;
17477c478bd9Sstevel@tonic-gate 	/* Alloc a RC channel */
17487c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
17497c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
17507c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
17517c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
17527c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
17537c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
17547c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
17557c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
17567c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
17577c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
17587c478bd9Sstevel@tonic-gate 
1759*0a701b1eSRobert Gordon 	path->pi_sid = ibt_get_ip_sid(IPPROTO_TCP, NFS_RDMA_PORT);
17607c478bd9Sstevel@tonic-gate 	chan_args.oc_path = path;
17617c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
17627c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_clnt_private = (void *)rib_stat;
1763*0a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_out = 4;
1764*0a701b1eSRobert Gordon 	chan_args.oc_rdma_ra_in = 4;
17657c478bd9Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
17667c478bd9Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
1767*0a701b1eSRobert Gordon 	chan_args.oc_priv_data = cmp_ip_pvt;
1768*0a701b1eSRobert Gordon 	chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ;
17697c478bd9Sstevel@tonic-gate 
17707c478bd9Sstevel@tonic-gate refresh:
17717c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
17727c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
17737c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
1774*0a701b1eSRobert Gordon 		    IBT_ACHAN_NO_FLAGS,
1775*0a701b1eSRobert Gordon 		    &qp_attr, &qp->qp_hdl,
17767c478bd9Sstevel@tonic-gate 		    &chan_sizes);
17777c478bd9Sstevel@tonic-gate 	} else {
17787c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
17797c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17807c478bd9Sstevel@tonic-gate 	}
17817c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
17827c478bd9Sstevel@tonic-gate 
17837c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1784*0a701b1eSRobert Gordon 		DTRACE_PROBE1(rpcib__i_conntosrv,
1785*0a701b1eSRobert Gordon 		    int, ibt_status);
17867c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17877c478bd9Sstevel@tonic-gate 	}
17887c478bd9Sstevel@tonic-gate 
17897c478bd9Sstevel@tonic-gate 	/* Connect to the Server */
17907c478bd9Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
17917c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
17927c478bd9Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
17937c478bd9Sstevel@tonic-gate 	    IBT_BLOCKING, &chan_args, &ret_args);
17947c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1795*0a701b1eSRobert Gordon 		DTRACE_PROBE2(rpcib__i_openrctosrv,
1796*0a701b1eSRobert Gordon 		    int, ibt_status, int, ret_args.rc_status);
1797*0a701b1eSRobert Gordon 
17987c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
17997c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
18007c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
18017c478bd9Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
18027c478bd9Sstevel@tonic-gate 		    ret_args.rc_status == IBT_CM_CONN_STALE) {
18037c478bd9Sstevel@tonic-gate 			/*
18047c478bd9Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
18057c478bd9Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
18067c478bd9Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
18077c478bd9Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
18087c478bd9Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
18097c478bd9Sstevel@tonic-gate 			 */
18107c478bd9Sstevel@tonic-gate 			goto refresh;
18117c478bd9Sstevel@tonic-gate 		}
18127c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
18137c478bd9Sstevel@tonic-gate 	}
18147c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
18157c478bd9Sstevel@tonic-gate 	/*
18167c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
18177c478bd9Sstevel@tonic-gate 	 */
18187c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
18197c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
18207c478bd9Sstevel@tonic-gate }
18217c478bd9Sstevel@tonic-gate 
18227c478bd9Sstevel@tonic-gate rdma_stat
18237c478bd9Sstevel@tonic-gate rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca)
18247c478bd9Sstevel@tonic-gate {
1825*0a701b1eSRobert Gordon 	struct sockaddr_in	*sin4, *sin4arr;
1826*0a701b1eSRobert Gordon 	struct sockaddr_in6	*sin6, *sin6arr;
1827*0a701b1eSRobert Gordon 	uint_t			nif, nif4, nif6, i;
18287c478bd9Sstevel@tonic-gate 	ibt_path_info_t		path;
18297c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1830*0a701b1eSRobert Gordon 	uint8_t			num_paths_p;
1831*0a701b1eSRobert Gordon 	ibt_ip_path_attr_t	ipattr;
1832*0a701b1eSRobert Gordon 	ibt_ip_addr_t		dstip;
1833*0a701b1eSRobert Gordon 	ibt_path_ip_src_t	srcip;
1834*0a701b1eSRobert Gordon 
1835*0a701b1eSRobert Gordon 
1836*0a701b1eSRobert Gordon 	*hca = NULL;
18377c478bd9Sstevel@tonic-gate 
18387c478bd9Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
18397c478bd9Sstevel@tonic-gate 
18407c478bd9Sstevel@tonic-gate 	bzero(&path, sizeof (ibt_path_info_t));
1841*0a701b1eSRobert Gordon 	bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
1842*0a701b1eSRobert Gordon 	bzero(&srcip, sizeof (ibt_path_ip_src_t));
18437c478bd9Sstevel@tonic-gate 
1844*0a701b1eSRobert Gordon 	/* Obtain the source IP addresses for the system */
1845*0a701b1eSRobert Gordon 	nif = rpcib_get_number_interfaces();
1846*0a701b1eSRobert Gordon 	sin4arr = (struct sockaddr_in *)
1847*0a701b1eSRobert Gordon 	    kmem_zalloc(sizeof (struct sockaddr_in) * nif, KM_SLEEP);
1848*0a701b1eSRobert Gordon 	sin6arr = (struct sockaddr_in6 *)
1849*0a701b1eSRobert Gordon 	    kmem_zalloc(sizeof (struct sockaddr_in6) * nif, KM_SLEEP);
1850*0a701b1eSRobert Gordon 
1851*0a701b1eSRobert Gordon 	(void) rpcib_get_ib_addresses(sin4arr, sin6arr, &nif4, &nif6);
1852*0a701b1eSRobert Gordon 
1853*0a701b1eSRobert Gordon 	/* Are there really any IB interfaces available */
1854*0a701b1eSRobert Gordon 	if (nif4 == 0 && nif6 == 0) {
1855*0a701b1eSRobert Gordon 		kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
1856*0a701b1eSRobert Gordon 		kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
1857*0a701b1eSRobert Gordon 		return (RDMA_FAILED);
1858*0a701b1eSRobert Gordon 	}
1859*0a701b1eSRobert Gordon 
1860*0a701b1eSRobert Gordon 	/* Prep the destination address */
18617c478bd9Sstevel@tonic-gate 	switch (addr_type) {
18627c478bd9Sstevel@tonic-gate 	case AF_INET:
18637c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
1864*0a701b1eSRobert Gordon 		dstip.family = AF_INET;
1865*0a701b1eSRobert Gordon 		dstip.un.ip4addr = htonl(sin4->sin_addr.s_addr);
1866*0a701b1eSRobert Gordon 
1867*0a701b1eSRobert Gordon 		for (i = 0; i < nif4; i++) {
1868*0a701b1eSRobert Gordon 			num_paths_p = 0;
1869*0a701b1eSRobert Gordon 			ipattr.ipa_dst_ip 	= &dstip;
1870*0a701b1eSRobert Gordon 			ipattr.ipa_hca_guid	= rib_stat->hca->hca_guid;
1871*0a701b1eSRobert Gordon 			ipattr.ipa_ndst		= 1;
1872*0a701b1eSRobert Gordon 			ipattr.ipa_max_paths	= 1;
1873*0a701b1eSRobert Gordon 			ipattr.ipa_src_ip.family = dstip.family;
1874*0a701b1eSRobert Gordon 			ipattr.ipa_src_ip.un.ip4addr =
1875*0a701b1eSRobert Gordon 			    htonl(sin4arr[i].sin_addr.s_addr);
1876*0a701b1eSRobert Gordon 
1877*0a701b1eSRobert Gordon 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1878*0a701b1eSRobert Gordon 			    IBT_PATH_NO_FLAGS,
1879*0a701b1eSRobert Gordon 			    &ipattr,
1880*0a701b1eSRobert Gordon 			    &path,
1881*0a701b1eSRobert Gordon 			    &num_paths_p,
1882*0a701b1eSRobert Gordon 			    &srcip);
1883*0a701b1eSRobert Gordon 			if (ibt_status == IBT_SUCCESS &&
1884*0a701b1eSRobert Gordon 			    num_paths_p != 0 &&
1885*0a701b1eSRobert Gordon 			    path.pi_hca_guid == rib_stat->hca->hca_guid) {
1886*0a701b1eSRobert Gordon 				*hca = rib_stat->hca;
1887*0a701b1eSRobert Gordon 
1888*0a701b1eSRobert Gordon 				kmem_free(sin4arr,
1889*0a701b1eSRobert Gordon 				    sizeof (struct sockaddr_in) * nif);
1890*0a701b1eSRobert Gordon 				kmem_free(sin6arr,
1891*0a701b1eSRobert Gordon 				    sizeof (struct sockaddr_in6) * nif);
1892*0a701b1eSRobert Gordon 
1893*0a701b1eSRobert Gordon 				return (RDMA_SUCCESS);
1894*0a701b1eSRobert Gordon 			}
1895*0a701b1eSRobert Gordon 		}
18967c478bd9Sstevel@tonic-gate 		break;
18977c478bd9Sstevel@tonic-gate 
18987c478bd9Sstevel@tonic-gate 	case AF_INET6:
18997c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
1900*0a701b1eSRobert Gordon 		dstip.family = AF_INET6;
1901*0a701b1eSRobert Gordon 		dstip.un.ip6addr = sin6->sin6_addr;
1902*0a701b1eSRobert Gordon 
1903*0a701b1eSRobert Gordon 		for (i = 0; i < nif6; i++) {
1904*0a701b1eSRobert Gordon 			num_paths_p = 0;
1905*0a701b1eSRobert Gordon 			ipattr.ipa_dst_ip 	= &dstip;
1906*0a701b1eSRobert Gordon 			ipattr.ipa_hca_guid	= rib_stat->hca->hca_guid;
1907*0a701b1eSRobert Gordon 			ipattr.ipa_ndst		= 1;
1908*0a701b1eSRobert Gordon 			ipattr.ipa_max_paths	= 1;
1909*0a701b1eSRobert Gordon 			ipattr.ipa_src_ip.family = dstip.family;
1910*0a701b1eSRobert Gordon 			ipattr.ipa_src_ip.un.ip6addr = sin6arr[i].sin6_addr;
1911*0a701b1eSRobert Gordon 
1912*0a701b1eSRobert Gordon 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
1913*0a701b1eSRobert Gordon 			    IBT_PATH_NO_FLAGS,
1914*0a701b1eSRobert Gordon 			    &ipattr,
1915*0a701b1eSRobert Gordon 			    &path,
1916*0a701b1eSRobert Gordon 			    &num_paths_p,
1917*0a701b1eSRobert Gordon 			    &srcip);
1918*0a701b1eSRobert Gordon 			if (ibt_status == IBT_SUCCESS &&
1919*0a701b1eSRobert Gordon 			    num_paths_p != 0 &&
1920*0a701b1eSRobert Gordon 			    path.pi_hca_guid == rib_stat->hca->hca_guid) {
1921*0a701b1eSRobert Gordon 				*hca = rib_stat->hca;
1922*0a701b1eSRobert Gordon 
1923*0a701b1eSRobert Gordon 				kmem_free(sin4arr,
1924*0a701b1eSRobert Gordon 				    sizeof (struct sockaddr_in) * nif);
1925*0a701b1eSRobert Gordon 				kmem_free(sin6arr,
1926*0a701b1eSRobert Gordon 				    sizeof (struct sockaddr_in6) * nif);
1927*0a701b1eSRobert Gordon 
1928*0a701b1eSRobert Gordon 				return (RDMA_SUCCESS);
1929*0a701b1eSRobert Gordon 			}
1930*0a701b1eSRobert Gordon 		}
1931*0a701b1eSRobert Gordon 
19327c478bd9Sstevel@tonic-gate 		break;
19337c478bd9Sstevel@tonic-gate 
19347c478bd9Sstevel@tonic-gate 	default:
1935*0a701b1eSRobert Gordon 		kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
1936*0a701b1eSRobert Gordon 		kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
19377c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
19387c478bd9Sstevel@tonic-gate 	}
19397c478bd9Sstevel@tonic-gate 
1940*0a701b1eSRobert Gordon 	kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif);
1941*0a701b1eSRobert Gordon 	kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif);
19427c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
19437c478bd9Sstevel@tonic-gate }
19447c478bd9Sstevel@tonic-gate 
19457c478bd9Sstevel@tonic-gate /*
19467c478bd9Sstevel@tonic-gate  * Close channel, remove from connection list and
19477c478bd9Sstevel@tonic-gate  * free up resources allocated for that channel.
19487c478bd9Sstevel@tonic-gate  */
19497c478bd9Sstevel@tonic-gate rdma_stat
19507c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
19517c478bd9Sstevel@tonic-gate {
19527c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
19537c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate 	/*
19567c478bd9Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
19577c478bd9Sstevel@tonic-gate 	 */
19587c478bd9Sstevel@tonic-gate 	hca = qp->hca;
19597c478bd9Sstevel@tonic-gate 	if (conn_list != NULL)
19607c478bd9Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
1961*0a701b1eSRobert Gordon 
19627c478bd9Sstevel@tonic-gate 	if (qp->qp_hdl != NULL) {
19637c478bd9Sstevel@tonic-gate 		/*
19647c478bd9Sstevel@tonic-gate 		 * If the channel has not been establised,
19657c478bd9Sstevel@tonic-gate 		 * ibt_flush_channel is called to flush outstanding WRs
19667c478bd9Sstevel@tonic-gate 		 * on the Qs.  Otherwise, ibt_close_rc_channel() is
19677c478bd9Sstevel@tonic-gate 		 * called.  The channel is then freed.
19687c478bd9Sstevel@tonic-gate 		 */
19697c478bd9Sstevel@tonic-gate 		if (conn_list != NULL)
19707c478bd9Sstevel@tonic-gate 			(void) ibt_close_rc_channel(qp->qp_hdl,
19717c478bd9Sstevel@tonic-gate 			    IBT_BLOCKING, NULL, 0, NULL, NULL, 0);
19727c478bd9Sstevel@tonic-gate 		else
19737c478bd9Sstevel@tonic-gate 			(void) ibt_flush_channel(qp->qp_hdl);
19747c478bd9Sstevel@tonic-gate 
19757c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
19767c478bd9Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
19777c478bd9Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
19787c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
19797c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
19807c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
19817c478bd9Sstevel@tonic-gate 	}
1982*0a701b1eSRobert Gordon 
19837c478bd9Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
1984*0a701b1eSRobert Gordon 
19857c478bd9Sstevel@tonic-gate 	if (qp->replylist != NULL) {
19867c478bd9Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
19877c478bd9Sstevel@tonic-gate 	}
19887c478bd9Sstevel@tonic-gate 
19897c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
19907c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
19917c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
19927c478bd9Sstevel@tonic-gate 
19937c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
19947c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
19957c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
19967c478bd9Sstevel@tonic-gate 
19977c478bd9Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
19987c478bd9Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
19997c478bd9Sstevel@tonic-gate 
20007c478bd9Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
20017c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
20027c478bd9Sstevel@tonic-gate 	}
20037c478bd9Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
20047c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
20057c478bd9Sstevel@tonic-gate 	}
2006*0a701b1eSRobert Gordon 
2007*0a701b1eSRobert Gordon 	/*
2008*0a701b1eSRobert Gordon 	 * Credit control cleanup.
2009*0a701b1eSRobert Gordon 	 */
2010*0a701b1eSRobert Gordon 	if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
2011*0a701b1eSRobert Gordon 		rdma_clnt_cred_ctrl_t *cc_info;
2012*0a701b1eSRobert Gordon 		cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
2013*0a701b1eSRobert Gordon 		cv_destroy(&cc_info->clnt_cc_cv);
2014*0a701b1eSRobert Gordon 	}
2015*0a701b1eSRobert Gordon 
20167c478bd9Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
20177c478bd9Sstevel@tonic-gate 
20187c478bd9Sstevel@tonic-gate 	/*
20197c478bd9Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
20207c478bd9Sstevel@tonic-gate 	 * then the hca is no longer being used.
20217c478bd9Sstevel@tonic-gate 	 */
20227c478bd9Sstevel@tonic-gate 	if (conn_list != NULL) {
20237c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
20247c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
20257c478bd9Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
20267c478bd9Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
20277c478bd9Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
20287c478bd9Sstevel@tonic-gate 				    RW_READER);
2029*0a701b1eSRobert Gordon 
20307c478bd9Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
20317c478bd9Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
20327c478bd9Sstevel@tonic-gate 					hca->inuse = FALSE;
20337c478bd9Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
20347c478bd9Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
20357c478bd9Sstevel@tonic-gate 				}
20367c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
20377c478bd9Sstevel@tonic-gate 			}
20387c478bd9Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
20397c478bd9Sstevel@tonic-gate 		}
20407c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
20417c478bd9Sstevel@tonic-gate 	}
2042*0a701b1eSRobert Gordon 
20437c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
20447c478bd9Sstevel@tonic-gate }
20457c478bd9Sstevel@tonic-gate 
20467c478bd9Sstevel@tonic-gate /*
20477c478bd9Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
20487c478bd9Sstevel@tonic-gate  * notification be it a successful or error completion, free the
20497c478bd9Sstevel@tonic-gate  * send_wid.
20507c478bd9Sstevel@tonic-gate  */
20517c478bd9Sstevel@tonic-gate static rdma_stat
20527c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
20537c478bd9Sstevel@tonic-gate {
20547c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
20557c478bd9Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
20567c478bd9Sstevel@tonic-gate 	int	i;
20577c478bd9Sstevel@tonic-gate 
20587c478bd9Sstevel@tonic-gate 	/*
20597c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
20607c478bd9Sstevel@tonic-gate 	 */
20617c478bd9Sstevel@tonic-gate 	ASSERT(wd != NULL);
20627c478bd9Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
20637c478bd9Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
20647c478bd9Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
20657c478bd9Sstevel@tonic-gate 		    ddi_get_lbolt();
2066*0a701b1eSRobert Gordon 
20677c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
20687c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
20697c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
20707c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
20717c478bd9Sstevel@tonic-gate 				;
20727c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
20737c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
2074*0a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__srvsendwait__timeout);
2075*0a701b1eSRobert Gordon 
20767c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20777c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
20787c478bd9Sstevel@tonic-gate 				break;
20797c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
20807c478bd9Sstevel@tonic-gate 				break;
20817c478bd9Sstevel@tonic-gate 			}
20827c478bd9Sstevel@tonic-gate 		} else {
20837c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
20847c478bd9Sstevel@tonic-gate 			    &wd->sendwait_lock, timout)) > 0 &&
20857c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
20867c478bd9Sstevel@tonic-gate 				;
20877c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
20887c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
2089*0a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__timeout);
2090*0a701b1eSRobert Gordon 
20917c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20927c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
20937c478bd9Sstevel@tonic-gate 				break;
20947c478bd9Sstevel@tonic-gate 			case 0:		/* interrupted */
2095*0a701b1eSRobert Gordon 				DTRACE_PROBE(rpcib__i__clntsendwait__intr);
2096*0a701b1eSRobert Gordon 
20977c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
20987c478bd9Sstevel@tonic-gate 				error = RDMA_INTR;
20997c478bd9Sstevel@tonic-gate 				break;
21007c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
21017c478bd9Sstevel@tonic-gate 				break;
21027c478bd9Sstevel@tonic-gate 			}
21037c478bd9Sstevel@tonic-gate 		}
21047c478bd9Sstevel@tonic-gate 	}
21057c478bd9Sstevel@tonic-gate 
21067c478bd9Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
21077c478bd9Sstevel@tonic-gate 		/* got send completion */
21087c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
21097c478bd9Sstevel@tonic-gate 			error = wd->status;
21107c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_CONNLOST)
21117c478bd9Sstevel@tonic-gate 			error = RDMA_FAILED;
21127c478bd9Sstevel@tonic-gate 		}
21137c478bd9Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
21147c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
211511606941Sjwahlig 			    (void *)(uintptr_t)wd->sbufaddr[i]);
21167c478bd9Sstevel@tonic-gate 		}
21177c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
21187c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
21197c478bd9Sstevel@tonic-gate 	} else {
21207c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
21217c478bd9Sstevel@tonic-gate 	}
21227c478bd9Sstevel@tonic-gate 	return (error);
21237c478bd9Sstevel@tonic-gate }
21247c478bd9Sstevel@tonic-gate 
21257c478bd9Sstevel@tonic-gate static struct send_wid *
21267c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
21277c478bd9Sstevel@tonic-gate {
21287c478bd9Sstevel@tonic-gate 	struct send_wid	*wd;
21297c478bd9Sstevel@tonic-gate 
21307c478bd9Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
21317c478bd9Sstevel@tonic-gate 	wd->xid = xid;
21327c478bd9Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
21337c478bd9Sstevel@tonic-gate 	wd->qp = qp;
21347c478bd9Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
21357c478bd9Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
21367c478bd9Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
21377c478bd9Sstevel@tonic-gate 
21387c478bd9Sstevel@tonic-gate 	return (wd);
21397c478bd9Sstevel@tonic-gate }
21407c478bd9Sstevel@tonic-gate 
21417c478bd9Sstevel@tonic-gate static int
21427c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
21437c478bd9Sstevel@tonic-gate {
21447c478bd9Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
21457c478bd9Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
21467c478bd9Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
21477c478bd9Sstevel@tonic-gate 
21487c478bd9Sstevel@tonic-gate 	return (0);
21497c478bd9Sstevel@tonic-gate }
21507c478bd9Sstevel@tonic-gate 
21517c478bd9Sstevel@tonic-gate static rdma_stat
21527c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
21537c478bd9Sstevel@tonic-gate {
21547c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
21557c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
21567c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
21577c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->replylist_lock);
21587c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
21597c478bd9Sstevel@tonic-gate 	}
21607c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
21617c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
21627c478bd9Sstevel@tonic-gate }
21637c478bd9Sstevel@tonic-gate 
21647c478bd9Sstevel@tonic-gate /*
21657c478bd9Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
21667c478bd9Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
21677c478bd9Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
21687c478bd9Sstevel@tonic-gate  */
21697c478bd9Sstevel@tonic-gate rdma_stat
21707c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
2171*0a701b1eSRobert Gordon 	int send_sig, int cv_sig, caddr_t *swid)
21727c478bd9Sstevel@tonic-gate {
21737c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
21747c478bd9Sstevel@tonic-gate 	struct clist	*clp;
21757c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
21767c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
21777c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
21787c478bd9Sstevel@tonic-gate 	int		i, nds;
21797c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
21807c478bd9Sstevel@tonic-gate 	uint_t		total_msg_size;
2181*0a701b1eSRobert Gordon 	rib_qp_t	*qp;
2182*0a701b1eSRobert Gordon 
2183*0a701b1eSRobert Gordon 	qp = ctoqp(conn);
21847c478bd9Sstevel@tonic-gate 
21857c478bd9Sstevel@tonic-gate 	ASSERT(cl != NULL);
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate 	nds = 0;
21907c478bd9Sstevel@tonic-gate 	total_msg_size = 0;
21917c478bd9Sstevel@tonic-gate 	clp = cl;
21927c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
21937c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2194*0a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded);
21957c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
21967c478bd9Sstevel@tonic-gate 		}
2197*0a701b1eSRobert Gordon 		sgl[nds].ds_va = clp->w.c_saddr;
21987c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
21997c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
22007c478bd9Sstevel@tonic-gate 		total_msg_size += clp->c_len;
22017c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
22027c478bd9Sstevel@tonic-gate 		nds++;
22037c478bd9Sstevel@tonic-gate 	}
22047c478bd9Sstevel@tonic-gate 
22057c478bd9Sstevel@tonic-gate 	if (send_sig) {
22067c478bd9Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
22077c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
22087c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
2209*0a701b1eSRobert Gordon 		*swid = (caddr_t)wdesc;
22107c478bd9Sstevel@tonic-gate 	} else {
22117c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
22127c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, 0, qp);
2213*0a701b1eSRobert Gordon 		*swid = (caddr_t)wdesc;
22147c478bd9Sstevel@tonic-gate 	}
22157c478bd9Sstevel@tonic-gate 	wdesc->nsbufs = nds;
22167c478bd9Sstevel@tonic-gate 	for (i = 0; i < nds; i++) {
22177c478bd9Sstevel@tonic-gate 		wdesc->sbufaddr[i] = sgl[i].ds_va;
22187c478bd9Sstevel@tonic-gate 	}
22197c478bd9Sstevel@tonic-gate 
222011606941Sjwahlig 	tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
22217c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
22227c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
22237c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
22247c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
22257c478bd9Sstevel@tonic-gate 
22267c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2227*0a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
22287c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
22297c478bd9Sstevel@tonic-gate 	}
2230*0a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
22317c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
2232*0a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
2233*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
22347c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
22357c478bd9Sstevel@tonic-gate 		for (i = 0; i < nds; i++) {
22367c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, SEND_BUFFER,
223711606941Sjwahlig 			    (void *)(uintptr_t)wdesc->sbufaddr[i]);
22387c478bd9Sstevel@tonic-gate 		}
2239*0a701b1eSRobert Gordon 
22407c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
2241*0a701b1eSRobert Gordon 
2242*0a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
22437c478bd9Sstevel@tonic-gate 	}
22447c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
22457c478bd9Sstevel@tonic-gate 
22467c478bd9Sstevel@tonic-gate 	if (send_sig) {
22477c478bd9Sstevel@tonic-gate 		if (cv_sig) {
22487c478bd9Sstevel@tonic-gate 			/*
22497c478bd9Sstevel@tonic-gate 			 * cv_wait for send to complete.
22507c478bd9Sstevel@tonic-gate 			 * We can fail due to a timeout or signal or
22517c478bd9Sstevel@tonic-gate 			 * unsuccessful send.
22527c478bd9Sstevel@tonic-gate 			 */
22537c478bd9Sstevel@tonic-gate 			ret = rib_sendwait(qp, wdesc);
2254*0a701b1eSRobert Gordon 
22557c478bd9Sstevel@tonic-gate 			return (ret);
22567c478bd9Sstevel@tonic-gate 		}
22577c478bd9Sstevel@tonic-gate 	}
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
22607c478bd9Sstevel@tonic-gate }
22617c478bd9Sstevel@tonic-gate 
2262*0a701b1eSRobert Gordon 
22637c478bd9Sstevel@tonic-gate rdma_stat
22647c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
22657c478bd9Sstevel@tonic-gate {
22667c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
2267*0a701b1eSRobert Gordon 	caddr_t		wd;
22687c478bd9Sstevel@tonic-gate 
22697c478bd9Sstevel@tonic-gate 	/* send-wait & cv_signal */
2270*0a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
22717c478bd9Sstevel@tonic-gate 	return (ret);
22727c478bd9Sstevel@tonic-gate }
22737c478bd9Sstevel@tonic-gate 
22747c478bd9Sstevel@tonic-gate /*
22757c478bd9Sstevel@tonic-gate  * Server interface (svc_rdma_ksend).
22767c478bd9Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
22777c478bd9Sstevel@tonic-gate  */
22787c478bd9Sstevel@tonic-gate rdma_stat
22797c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
22807c478bd9Sstevel@tonic-gate {
22817c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
22827c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
22837c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
2284*0a701b1eSRobert Gordon 	caddr_t *wid = NULL;
22857c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
22867c478bd9Sstevel@tonic-gate 
22877c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
22887c478bd9Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
22897c478bd9Sstevel@tonic-gate 
22907c478bd9Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
2291*0a701b1eSRobert Gordon 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
22927c478bd9Sstevel@tonic-gate 
2293*0a701b1eSRobert Gordon 	if (ret != RDMA_SUCCESS) {
2294*0a701b1eSRobert Gordon 		rdma_done_rm(qp, rd);
2295*0a701b1eSRobert Gordon 	} else {
22967c478bd9Sstevel@tonic-gate 		/*
22977c478bd9Sstevel@tonic-gate 		 * Wait for RDMA_DONE from remote end
22987c478bd9Sstevel@tonic-gate 		 */
2299*0a701b1eSRobert Gordon 		timout =
2300*0a701b1eSRobert Gordon 		    drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt();
2301*0a701b1eSRobert Gordon 		cv_wait_ret = cv_timedwait(&rd->rdma_done_cv,
2302*0a701b1eSRobert Gordon 		    &qp->rdlist_lock,
23037c478bd9Sstevel@tonic-gate 		    timout);
2304*0a701b1eSRobert Gordon 
23057c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, rd);
2306*0a701b1eSRobert Gordon 
23077c478bd9Sstevel@tonic-gate 		if (cv_wait_ret < 0) {
23087c478bd9Sstevel@tonic-gate 			ret = RDMA_TIMEDOUT;
2309*0a701b1eSRobert Gordon 		}
23107c478bd9Sstevel@tonic-gate 	}
23117c478bd9Sstevel@tonic-gate 
23127c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
23137c478bd9Sstevel@tonic-gate 	return (ret);
23147c478bd9Sstevel@tonic-gate }
23157c478bd9Sstevel@tonic-gate 
23167c478bd9Sstevel@tonic-gate static struct recv_wid *
23177c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
23187c478bd9Sstevel@tonic-gate {
23197c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
23207c478bd9Sstevel@tonic-gate 
23217c478bd9Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
23227c478bd9Sstevel@tonic-gate 	rwid->xid = msgid;
23237c478bd9Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
23247c478bd9Sstevel@tonic-gate 	rwid->qp = qp;
23257c478bd9Sstevel@tonic-gate 
23267c478bd9Sstevel@tonic-gate 	return (rwid);
23277c478bd9Sstevel@tonic-gate }
23287c478bd9Sstevel@tonic-gate 
23297c478bd9Sstevel@tonic-gate static void
23307c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
23317c478bd9Sstevel@tonic-gate {
23327c478bd9Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
23337c478bd9Sstevel@tonic-gate }
23347c478bd9Sstevel@tonic-gate 
23357c478bd9Sstevel@tonic-gate rdma_stat
23367c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
23377c478bd9Sstevel@tonic-gate {
23387c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
23397c478bd9Sstevel@tonic-gate 	struct clist	*clp = cl;
23407c478bd9Sstevel@tonic-gate 	struct reply	*rep;
23417c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
23427c478bd9Sstevel@tonic-gate 	int		nds;
23437c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
23447c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
23457c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
23467c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 	/*
23497c478bd9Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
23507c478bd9Sstevel@tonic-gate 	 */
23517c478bd9Sstevel@tonic-gate 
23527c478bd9Sstevel@tonic-gate 	nds = 0;
23537c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
23547c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
23557c478bd9Sstevel@tonic-gate 			ret = RDMA_FAILED;
23567c478bd9Sstevel@tonic-gate 			goto done;
23577c478bd9Sstevel@tonic-gate 		}
2358*0a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
23597c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
23607c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
23617c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
23627c478bd9Sstevel@tonic-gate 		nds++;
23637c478bd9Sstevel@tonic-gate 	}
23647c478bd9Sstevel@tonic-gate 
23657c478bd9Sstevel@tonic-gate 	if (nds != 1) {
23667c478bd9Sstevel@tonic-gate 		ret = RDMA_FAILED;
23677c478bd9Sstevel@tonic-gate 		goto done;
23687c478bd9Sstevel@tonic-gate 	}
2369*0a701b1eSRobert Gordon 
23707c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
23717c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
23727c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
23737c478bd9Sstevel@tonic-gate 
23747c478bd9Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
23757c478bd9Sstevel@tonic-gate 	if (rwid) {
237611606941Sjwahlig 		recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid;
23777c478bd9Sstevel@tonic-gate 	} else {
23787c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
23797c478bd9Sstevel@tonic-gate 		goto done;
23807c478bd9Sstevel@tonic-gate 	}
23817c478bd9Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
23827c478bd9Sstevel@tonic-gate 	if (!rep) {
23837c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
23847c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
23857c478bd9Sstevel@tonic-gate 		goto done;
23867c478bd9Sstevel@tonic-gate 	}
23877c478bd9Sstevel@tonic-gate 
23887c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2389*0a701b1eSRobert Gordon 
2390*0a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
23917c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
23927c478bd9Sstevel@tonic-gate 	}
2393*0a701b1eSRobert Gordon 
2394*0a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
23957c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
2396*0a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
2397*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
23987c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
23997c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
24007c478bd9Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
2401*0a701b1eSRobert Gordon 		ret = RDMA_CONNLOST;
24027c478bd9Sstevel@tonic-gate 		goto done;
24037c478bd9Sstevel@tonic-gate 	}
24047c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
24057c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
24067c478bd9Sstevel@tonic-gate 
24077c478bd9Sstevel@tonic-gate done:
24087c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
2409*0a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
2410*0a701b1eSRobert Gordon 		    (void *)(uintptr_t)clp->w.c_saddr3);
24117c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
24127c478bd9Sstevel@tonic-gate 	}
24137c478bd9Sstevel@tonic-gate 	return (ret);
24147c478bd9Sstevel@tonic-gate }
24157c478bd9Sstevel@tonic-gate 
24167c478bd9Sstevel@tonic-gate rdma_stat
24177c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
24187c478bd9Sstevel@tonic-gate {
24197c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
24207c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
24217c478bd9Sstevel@tonic-gate 	int		nds;
24227c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
24237c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
24247c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
24257c478bd9Sstevel@tonic-gate 
24267c478bd9Sstevel@tonic-gate 	nds = 0;
24277c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
24287c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
24297c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
24307c478bd9Sstevel@tonic-gate 		}
2431*0a701b1eSRobert Gordon 		sgl[nds].ds_va = cl->w.c_saddr;
24327c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
24337c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
24347c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
24357c478bd9Sstevel@tonic-gate 		nds++;
24367c478bd9Sstevel@tonic-gate 	}
24377c478bd9Sstevel@tonic-gate 
24387c478bd9Sstevel@tonic-gate 	if (nds != 1) {
2439*0a701b1eSRobert Gordon 		rib_rbuf_free(conn, RECV_BUFFER,
2440*0a701b1eSRobert Gordon 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
2441*0a701b1eSRobert Gordon 
24427c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
24437c478bd9Sstevel@tonic-gate 	}
2444*0a701b1eSRobert Gordon 
24457c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
24467c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
24477c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
24487c478bd9Sstevel@tonic-gate 
24497c478bd9Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
245011606941Sjwahlig 	/* Use s_recvp's addr as wr id */
245111606941Sjwahlig 	recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp;
24527c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2453*0a701b1eSRobert Gordon 	if (conn->c_state == C_CONNECTED) {
24547c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
24557c478bd9Sstevel@tonic-gate 	}
2456*0a701b1eSRobert Gordon 	if (conn->c_state != C_CONNECTED ||
24577c478bd9Sstevel@tonic-gate 	    ibt_status != IBT_SUCCESS) {
2458*0a701b1eSRobert Gordon 		if (conn->c_state != C_DISCONN_PEND)
2459*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
24607c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
246111606941Sjwahlig 		rib_rbuf_free(conn, RECV_BUFFER,
246211606941Sjwahlig 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
24637c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
2464*0a701b1eSRobert Gordon 
2465*0a701b1eSRobert Gordon 		return (RDMA_CONNLOST);
24667c478bd9Sstevel@tonic-gate 	}
24677c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
24687c478bd9Sstevel@tonic-gate 
24697c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
24707c478bd9Sstevel@tonic-gate }
24717c478bd9Sstevel@tonic-gate 
24727c478bd9Sstevel@tonic-gate /* Client */
24737c478bd9Sstevel@tonic-gate rdma_stat
24747c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
24757c478bd9Sstevel@tonic-gate {
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
24787c478bd9Sstevel@tonic-gate }
24797c478bd9Sstevel@tonic-gate 
2480*0a701b1eSRobert Gordon /* Client */
2481*0a701b1eSRobert Gordon rdma_stat
2482*0a701b1eSRobert Gordon rib_post_resp_remove(CONN* conn, uint32_t msgid)
2483*0a701b1eSRobert Gordon {
2484*0a701b1eSRobert Gordon 	rib_qp_t	*qp = ctoqp(conn);
2485*0a701b1eSRobert Gordon 	struct reply	*rep;
2486*0a701b1eSRobert Gordon 
2487*0a701b1eSRobert Gordon 	mutex_enter(&qp->replylist_lock);
2488*0a701b1eSRobert Gordon 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
2489*0a701b1eSRobert Gordon 		if (rep->xid == msgid) {
2490*0a701b1eSRobert Gordon 			if (rep->vaddr_cq) {
2491*0a701b1eSRobert Gordon 				rib_rbuf_free(conn, RECV_BUFFER,
2492*0a701b1eSRobert Gordon 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
2493*0a701b1eSRobert Gordon 			}
2494*0a701b1eSRobert Gordon 			(void) rib_remreply(qp, rep);
2495*0a701b1eSRobert Gordon 			break;
2496*0a701b1eSRobert Gordon 		}
2497*0a701b1eSRobert Gordon 	}
2498*0a701b1eSRobert Gordon 	mutex_exit(&qp->replylist_lock);
2499*0a701b1eSRobert Gordon 
2500*0a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
2501*0a701b1eSRobert Gordon }
2502*0a701b1eSRobert Gordon 
25037c478bd9Sstevel@tonic-gate /* Server */
25047c478bd9Sstevel@tonic-gate rdma_stat
25057c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
25067c478bd9Sstevel@tonic-gate {
25077c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25087c478bd9Sstevel@tonic-gate 
25097c478bd9Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
25107c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
25117c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs++;
25127c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
25137c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
25147c478bd9Sstevel@tonic-gate 	}
25157c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
25167c478bd9Sstevel@tonic-gate }
25177c478bd9Sstevel@tonic-gate 
25187c478bd9Sstevel@tonic-gate /*
25197c478bd9Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
25207c478bd9Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
25217c478bd9Sstevel@tonic-gate  */
25227c478bd9Sstevel@tonic-gate rdma_stat
25237c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
25247c478bd9Sstevel@tonic-gate {
25257c478bd9Sstevel@tonic-gate 	struct reply *rep = NULL;
25267c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
25277c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
25287c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
25297c478bd9Sstevel@tonic-gate 
25307c478bd9Sstevel@tonic-gate 	/*
25317c478bd9Sstevel@tonic-gate 	 * Find the reply structure for this msgid
25327c478bd9Sstevel@tonic-gate 	 */
25337c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
25347c478bd9Sstevel@tonic-gate 
25357c478bd9Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
25367c478bd9Sstevel@tonic-gate 		if (rep->xid == msgid)
25377c478bd9Sstevel@tonic-gate 			break;
25387c478bd9Sstevel@tonic-gate 	}
2539*0a701b1eSRobert Gordon 
25407c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
25417c478bd9Sstevel@tonic-gate 		/*
25427c478bd9Sstevel@tonic-gate 		 * If message not yet received, wait.
25437c478bd9Sstevel@tonic-gate 		 */
25447c478bd9Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
25457c478bd9Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
25467c478bd9Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
2547*0a701b1eSRobert Gordon 
25487c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
25497c478bd9Sstevel@tonic-gate 			    &qp->replylist_lock, timout)) > 0 &&
2550*0a701b1eSRobert Gordon 			    rep->status == (uint_t)REPLY_WAIT)
2551*0a701b1eSRobert Gordon 				;
25527c478bd9Sstevel@tonic-gate 
25537c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
25547c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
25557c478bd9Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
25567c478bd9Sstevel@tonic-gate 				break;
25577c478bd9Sstevel@tonic-gate 			case 0:
25587c478bd9Sstevel@tonic-gate 				ret = RDMA_INTR;
25597c478bd9Sstevel@tonic-gate 				break;
25607c478bd9Sstevel@tonic-gate 			default:
25617c478bd9Sstevel@tonic-gate 				break;
25627c478bd9Sstevel@tonic-gate 			}
25637c478bd9Sstevel@tonic-gate 		}
25647c478bd9Sstevel@tonic-gate 
25657c478bd9Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
25667c478bd9Sstevel@tonic-gate 			struct clist *cl = NULL;
25677c478bd9Sstevel@tonic-gate 
25687c478bd9Sstevel@tonic-gate 			/*
25697c478bd9Sstevel@tonic-gate 			 * Got message successfully
25707c478bd9Sstevel@tonic-gate 			 */
25717c478bd9Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
257211606941Sjwahlig 			    (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL);
25737c478bd9Sstevel@tonic-gate 			*clp = cl;
25747c478bd9Sstevel@tonic-gate 		} else {
25757c478bd9Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
25767c478bd9Sstevel@tonic-gate 				/*
25777c478bd9Sstevel@tonic-gate 				 * Got error in reply message. Free
25787c478bd9Sstevel@tonic-gate 				 * recv buffer here.
25797c478bd9Sstevel@tonic-gate 				 */
25807c478bd9Sstevel@tonic-gate 				ret = rep->status;
25817c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
258211606941Sjwahlig 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
25837c478bd9Sstevel@tonic-gate 			}
25847c478bd9Sstevel@tonic-gate 		}
25857c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
25867c478bd9Sstevel@tonic-gate 	} else {
25877c478bd9Sstevel@tonic-gate 		/*
25887c478bd9Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
25897c478bd9Sstevel@tonic-gate 		 * reply wait list.
25907c478bd9Sstevel@tonic-gate 		 */
25917c478bd9Sstevel@tonic-gate 		ret = RDMA_INVAL;
2592*0a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__nomatchxid2);
25937c478bd9Sstevel@tonic-gate 	}
25947c478bd9Sstevel@tonic-gate 
25957c478bd9Sstevel@tonic-gate 	/*
25967c478bd9Sstevel@tonic-gate 	 * Done.
25977c478bd9Sstevel@tonic-gate 	 */
25987c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
25997c478bd9Sstevel@tonic-gate 	return (ret);
26007c478bd9Sstevel@tonic-gate }
26017c478bd9Sstevel@tonic-gate 
26027c478bd9Sstevel@tonic-gate /*
26037c478bd9Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
26047c478bd9Sstevel@tonic-gate  */
26057c478bd9Sstevel@tonic-gate rdma_stat
26067c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
26077c478bd9Sstevel@tonic-gate {
26087c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
26097c478bd9Sstevel@tonic-gate 	int		cv_sig;
2610*0a701b1eSRobert Gordon 	int		i;
26117c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
26127c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
26137c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
26147c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
26157c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2616*0a701b1eSRobert Gordon 	uint64_t	n_writes = 0;
2617*0a701b1eSRobert Gordon 	bool_t		force_wait = FALSE;
26187c478bd9Sstevel@tonic-gate 
26197c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
26207c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
26217c478bd9Sstevel@tonic-gate 	}
26227c478bd9Sstevel@tonic-gate 
2623*0a701b1eSRobert Gordon 
2624*0a701b1eSRobert Gordon 	while ((cl != NULL)) {
2625*0a701b1eSRobert Gordon 		if (cl->c_len > 0) {
26267c478bd9Sstevel@tonic-gate 			bzero(&tx_wr, sizeof (ibt_send_wr_t));
2627*0a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr;
2628*0a701b1eSRobert Gordon 			tx_wr.wr.rc.rcwr.rdma.rdma_rkey =
2629*0a701b1eSRobert Gordon 			    cl->c_dmemhandle.mrc_rmr; /* rkey */
2630*0a701b1eSRobert Gordon 			sgl[0].ds_va = cl->w.c_saddr;
2631*0a701b1eSRobert Gordon 			sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
2632*0a701b1eSRobert Gordon 			sgl[0].ds_len = cl->c_len;
26337c478bd9Sstevel@tonic-gate 
26347c478bd9Sstevel@tonic-gate 			if (wait) {
26357c478bd9Sstevel@tonic-gate 				tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
26367c478bd9Sstevel@tonic-gate 				cv_sig = 1;
26377c478bd9Sstevel@tonic-gate 			} else {
2638*0a701b1eSRobert Gordon 				if (n_writes > max_unsignaled_rws) {
2639*0a701b1eSRobert Gordon 					n_writes = 0;
2640*0a701b1eSRobert Gordon 					force_wait = TRUE;
2641*0a701b1eSRobert Gordon 					tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2642*0a701b1eSRobert Gordon 					cv_sig = 1;
2643*0a701b1eSRobert Gordon 				} else {
26447c478bd9Sstevel@tonic-gate 					tx_wr.wr_flags = IBT_WR_NO_FLAGS;
26457c478bd9Sstevel@tonic-gate 					cv_sig = 0;
26467c478bd9Sstevel@tonic-gate 				}
2647*0a701b1eSRobert Gordon 			}
26487c478bd9Sstevel@tonic-gate 
26497c478bd9Sstevel@tonic-gate 			wdesc = rib_init_sendwait(0, cv_sig, qp);
265011606941Sjwahlig 			tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
26517c478bd9Sstevel@tonic-gate 			tx_wr.wr_opcode = IBT_WRC_RDMAW;
26527c478bd9Sstevel@tonic-gate 			tx_wr.wr_trans = IBT_RC_SRV;
2653*0a701b1eSRobert Gordon 			tx_wr.wr_nds = 1;
26547c478bd9Sstevel@tonic-gate 			tx_wr.wr_sgl = sgl;
26557c478bd9Sstevel@tonic-gate 
26567c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
2657*0a701b1eSRobert Gordon 			if (conn->c_state == C_CONNECTED) {
2658*0a701b1eSRobert Gordon 				ibt_status =
2659*0a701b1eSRobert Gordon 				    ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
26607c478bd9Sstevel@tonic-gate 			}
2661*0a701b1eSRobert Gordon 			if (conn->c_state != C_CONNECTED ||
26627c478bd9Sstevel@tonic-gate 			    ibt_status != IBT_SUCCESS) {
2663*0a701b1eSRobert Gordon 				if (conn->c_state != C_DISCONN_PEND)
2664*0a701b1eSRobert Gordon 					conn->c_state = C_ERROR_CONN;
26657c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
26667c478bd9Sstevel@tonic-gate 				(void) rib_free_sendwait(wdesc);
2667*0a701b1eSRobert Gordon 				return (RDMA_CONNLOST);
26687c478bd9Sstevel@tonic-gate 			}
26697c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
26707c478bd9Sstevel@tonic-gate 
26717c478bd9Sstevel@tonic-gate 			/*
26727c478bd9Sstevel@tonic-gate 			 * Wait for send to complete
26737c478bd9Sstevel@tonic-gate 			 */
2674*0a701b1eSRobert Gordon 			if (wait || force_wait) {
2675*0a701b1eSRobert Gordon 				force_wait = FALSE;
26767c478bd9Sstevel@tonic-gate 				ret = rib_sendwait(qp, wdesc);
26777c478bd9Sstevel@tonic-gate 				if (ret != 0) {
26787c478bd9Sstevel@tonic-gate 					return (ret);
26797c478bd9Sstevel@tonic-gate 				}
2680*0a701b1eSRobert Gordon 			} else {
2681*0a701b1eSRobert Gordon 				mutex_enter(&wdesc->sendwait_lock);
2682*0a701b1eSRobert Gordon 				for (i = 0; i < wdesc->nsbufs; i++) {
2683*0a701b1eSRobert Gordon 					rib_rbuf_free(qptoc(qp), SEND_BUFFER,
2684*0a701b1eSRobert Gordon 					    (void *)(uintptr_t)
2685*0a701b1eSRobert Gordon 					    wdesc->sbufaddr[i]);
2686*0a701b1eSRobert Gordon 				}
2687*0a701b1eSRobert Gordon 				mutex_exit(&wdesc->sendwait_lock);
2688*0a701b1eSRobert Gordon 				(void) rib_free_sendwait(wdesc);
2689*0a701b1eSRobert Gordon 			}
2690*0a701b1eSRobert Gordon 			n_writes ++;
2691*0a701b1eSRobert Gordon 		}
2692*0a701b1eSRobert Gordon 		cl = cl->c_next;
26937c478bd9Sstevel@tonic-gate 	}
26947c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
26957c478bd9Sstevel@tonic-gate }
26967c478bd9Sstevel@tonic-gate 
26977c478bd9Sstevel@tonic-gate /*
26987c478bd9Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
26997c478bd9Sstevel@tonic-gate  */
27007c478bd9Sstevel@tonic-gate rdma_stat
27017c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
27027c478bd9Sstevel@tonic-gate {
27037c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
27047c478bd9Sstevel@tonic-gate 	int		cv_sig;
2705*0a701b1eSRobert Gordon 	int		i;
2706*0a701b1eSRobert Gordon 	ibt_wr_ds_t	sgl;
27077c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
27087c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
27097c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
27107c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
27117c478bd9Sstevel@tonic-gate 
27127c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
27137c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
27147c478bd9Sstevel@tonic-gate 	}
27157c478bd9Sstevel@tonic-gate 
2716*0a701b1eSRobert Gordon 	while (cl != NULL) {
27177c478bd9Sstevel@tonic-gate 		bzero(&rx_wr, sizeof (ibt_send_wr_t));
27187c478bd9Sstevel@tonic-gate 		/*
27197c478bd9Sstevel@tonic-gate 		 * Remote address is at the head chunk item in list.
27207c478bd9Sstevel@tonic-gate 		 */
2721*0a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr;
2722*0a701b1eSRobert Gordon 		rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr;
27237c478bd9Sstevel@tonic-gate 
2724*0a701b1eSRobert Gordon 		sgl.ds_va = cl->u.c_daddr;
2725*0a701b1eSRobert Gordon 		sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
2726*0a701b1eSRobert Gordon 		sgl.ds_len = cl->c_len;
27277c478bd9Sstevel@tonic-gate 
27287c478bd9Sstevel@tonic-gate 		if (wait) {
27297c478bd9Sstevel@tonic-gate 			rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
27307c478bd9Sstevel@tonic-gate 			cv_sig = 1;
27317c478bd9Sstevel@tonic-gate 		} else {
27327c478bd9Sstevel@tonic-gate 			rx_wr.wr_flags = IBT_WR_NO_FLAGS;
27337c478bd9Sstevel@tonic-gate 			cv_sig = 0;
27347c478bd9Sstevel@tonic-gate 		}
27357c478bd9Sstevel@tonic-gate 
27367c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(0, cv_sig, qp);
273711606941Sjwahlig 		rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
27387c478bd9Sstevel@tonic-gate 		rx_wr.wr_opcode = IBT_WRC_RDMAR;
27397c478bd9Sstevel@tonic-gate 		rx_wr.wr_trans = IBT_RC_SRV;
2740*0a701b1eSRobert Gordon 		rx_wr.wr_nds = 1;
2741*0a701b1eSRobert Gordon 		rx_wr.wr_sgl = &sgl;
27427c478bd9Sstevel@tonic-gate 
27437c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
2744*0a701b1eSRobert Gordon 		if (conn->c_state == C_CONNECTED) {
27457c478bd9Sstevel@tonic-gate 			ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
27467c478bd9Sstevel@tonic-gate 		}
2747*0a701b1eSRobert Gordon 		if (conn->c_state != C_CONNECTED ||
27487c478bd9Sstevel@tonic-gate 		    ibt_status != IBT_SUCCESS) {
2749*0a701b1eSRobert Gordon 			if (conn->c_state != C_DISCONN_PEND)
2750*0a701b1eSRobert Gordon 				conn->c_state = C_ERROR_CONN;
27517c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
27527c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wdesc);
2753*0a701b1eSRobert Gordon 			return (RDMA_CONNLOST);
27547c478bd9Sstevel@tonic-gate 		}
27557c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
27567c478bd9Sstevel@tonic-gate 
27577c478bd9Sstevel@tonic-gate 		/*
2758*0a701b1eSRobert Gordon 		 * Wait for send to complete if this is the
2759*0a701b1eSRobert Gordon 		 * last item in the list.
27607c478bd9Sstevel@tonic-gate 		 */
2761*0a701b1eSRobert Gordon 		if (wait && cl->c_next == NULL) {
27627c478bd9Sstevel@tonic-gate 			ret = rib_sendwait(qp, wdesc);
27637c478bd9Sstevel@tonic-gate 			if (ret != 0) {
27647c478bd9Sstevel@tonic-gate 				return (ret);
27657c478bd9Sstevel@tonic-gate 			}
2766*0a701b1eSRobert Gordon 		} else {
2767*0a701b1eSRobert Gordon 			mutex_enter(&wdesc->sendwait_lock);
2768*0a701b1eSRobert Gordon 			for (i = 0; i < wdesc->nsbufs; i++) {
2769*0a701b1eSRobert Gordon 				rib_rbuf_free(qptoc(qp), SEND_BUFFER,
2770*0a701b1eSRobert Gordon 				    (void *)(uintptr_t)wdesc->sbufaddr[i]);
27717c478bd9Sstevel@tonic-gate 			}
2772*0a701b1eSRobert Gordon 			mutex_exit(&wdesc->sendwait_lock);
2773*0a701b1eSRobert Gordon 			(void) rib_free_sendwait(wdesc);
2774*0a701b1eSRobert Gordon 		}
2775*0a701b1eSRobert Gordon 		cl = cl->c_next;
2776*0a701b1eSRobert Gordon 	}
27777c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
27787c478bd9Sstevel@tonic-gate }
27797c478bd9Sstevel@tonic-gate 
27807c478bd9Sstevel@tonic-gate /*
27817c478bd9Sstevel@tonic-gate  * rib_srv_cm_handler()
27827c478bd9Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
27837c478bd9Sstevel@tonic-gate  */
27847c478bd9Sstevel@tonic-gate /* ARGSUSED */
27857c478bd9Sstevel@tonic-gate static ibt_cm_status_t
27867c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
27877c478bd9Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
27887c478bd9Sstevel@tonic-gate 	ibt_priv_data_len_t len)
27897c478bd9Sstevel@tonic-gate {
27907c478bd9Sstevel@tonic-gate 	queue_t		*q;
27917c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
27927c478bd9Sstevel@tonic-gate 	rpcib_state_t	*ribstat;
27937c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
27947c478bd9Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
27957c478bd9Sstevel@tonic-gate 	int		i;
27967c478bd9Sstevel@tonic-gate 	struct clist	cl;
2797*0a701b1eSRobert Gordon 	rdma_buf_t	rdbuf = {0};
27987c478bd9Sstevel@tonic-gate 	void		*buf = NULL;
27997c478bd9Sstevel@tonic-gate 	CONN		*conn;
2800*0a701b1eSRobert Gordon 	ibt_ip_cm_info_t	ipinfo;
2801*0a701b1eSRobert Gordon 	struct sockaddr_in *s;
2802*0a701b1eSRobert Gordon 	struct sockaddr_in6 *s6;
2803*0a701b1eSRobert Gordon 	int sin_size = sizeof (struct sockaddr_in);
2804*0a701b1eSRobert Gordon 	int in_size = sizeof (struct in_addr);
2805*0a701b1eSRobert Gordon 	int sin6_size = sizeof (struct sockaddr_in6);
28067c478bd9Sstevel@tonic-gate 
28077c478bd9Sstevel@tonic-gate 	ASSERT(any != NULL);
28087c478bd9Sstevel@tonic-gate 	ASSERT(event != NULL);
28097c478bd9Sstevel@tonic-gate 
28107c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)any;
28117c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
28127c478bd9Sstevel@tonic-gate 	ASSERT(hca != NULL);
28137c478bd9Sstevel@tonic-gate 
28147c478bd9Sstevel@tonic-gate 	/* got a connection request */
28157c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
28167c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
28177c478bd9Sstevel@tonic-gate 		/*
28187c478bd9Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
28197c478bd9Sstevel@tonic-gate 		 */
28207c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
28217c478bd9Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
28227c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
28237c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28247c478bd9Sstevel@tonic-gate 		}
28257c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
28267c478bd9Sstevel@tonic-gate 
28277c478bd9Sstevel@tonic-gate 		/*
28287c478bd9Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
28297c478bd9Sstevel@tonic-gate 		 * timeout on us.
28307c478bd9Sstevel@tonic-gate 		 */
28317c478bd9Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
28327c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_timeout * 8, NULL, 0);
28337c478bd9Sstevel@tonic-gate 
28347c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
28357c478bd9Sstevel@tonic-gate 		q = rib_stat->q;
28367c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
2837*0a701b1eSRobert Gordon 
28387c478bd9Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
28397c478bd9Sstevel@tonic-gate 		    event->cm_event.req.req_prim_hca_port, &qp);
2840*0a701b1eSRobert Gordon 
28417c478bd9Sstevel@tonic-gate 		if (status) {
28427c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28437c478bd9Sstevel@tonic-gate 		}
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
2846*0a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
2847*0a701b1eSRobert Gordon 		ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
28487c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
28497c478bd9Sstevel@tonic-gate 
28507c478bd9Sstevel@tonic-gate 		/*
28517c478bd9Sstevel@tonic-gate 		 * Pre-posts RECV buffers
28527c478bd9Sstevel@tonic-gate 		 */
28537c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
28547c478bd9Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
28557c478bd9Sstevel@tonic-gate 			bzero(&rdbuf, sizeof (rdbuf));
28567c478bd9Sstevel@tonic-gate 			rdbuf.type = RECV_BUFFER;
28577c478bd9Sstevel@tonic-gate 			buf = rib_rbuf_alloc(conn, &rdbuf);
28587c478bd9Sstevel@tonic-gate 			if (buf == NULL) {
28597c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
28607c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
28617c478bd9Sstevel@tonic-gate 			}
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate 			bzero(&cl, sizeof (cl));
2864*0a701b1eSRobert Gordon 			cl.w.c_saddr3 = (caddr_t)rdbuf.addr;
28657c478bd9Sstevel@tonic-gate 			cl.c_len = rdbuf.len;
2866*0a701b1eSRobert Gordon 			cl.c_smemhandle.mrc_lmr =
2867*0a701b1eSRobert Gordon 			    rdbuf.handle.mrc_lmr; /* lkey */
28687c478bd9Sstevel@tonic-gate 			cl.c_next = NULL;
28697c478bd9Sstevel@tonic-gate 			status = rib_post_recv(conn, &cl);
28707c478bd9Sstevel@tonic-gate 			if (status != RDMA_SUCCESS) {
28717c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn, NULL);
28727c478bd9Sstevel@tonic-gate 				return (IBT_CM_REJECT);
28737c478bd9Sstevel@tonic-gate 			}
28747c478bd9Sstevel@tonic-gate 		}
28757c478bd9Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
28767c478bd9Sstevel@tonic-gate 
28777c478bd9Sstevel@tonic-gate 		/*
2878*0a701b1eSRobert Gordon 		 * Get the address translation
28797c478bd9Sstevel@tonic-gate 		 */
28807c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
28817c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
28827c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
28837c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28847c478bd9Sstevel@tonic-gate 		}
28857c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
28867c478bd9Sstevel@tonic-gate 
2887*0a701b1eSRobert Gordon 		bzero(&ipinfo, sizeof (ibt_ip_cm_info_t));
28887c478bd9Sstevel@tonic-gate 
2889*0a701b1eSRobert Gordon 		if (ibt_get_ip_data(event->cm_priv_data_len,
2890*0a701b1eSRobert Gordon 		    event->cm_priv_data,
2891*0a701b1eSRobert Gordon 		    &ipinfo) != IBT_SUCCESS) {
2892*0a701b1eSRobert Gordon 
2893*0a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
2894*0a701b1eSRobert Gordon 		}
2895*0a701b1eSRobert Gordon 
2896*0a701b1eSRobert Gordon 		switch (ipinfo.src_addr.family) {
2897*0a701b1eSRobert Gordon 		case AF_INET:
28987c478bd9Sstevel@tonic-gate 
28997c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
29007c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin_size;
2901*0a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
2902*0a701b1eSRobert Gordon 
29037c478bd9Sstevel@tonic-gate 			s = (struct sockaddr_in *)conn->c_raddr.buf;
29047c478bd9Sstevel@tonic-gate 			s->sin_family = AF_INET;
29057c478bd9Sstevel@tonic-gate 
2906*0a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip4addr,
2907*0a701b1eSRobert Gordon 			    &s->sin_addr, in_size);
2908*0a701b1eSRobert Gordon 
2909*0a701b1eSRobert Gordon 			break;
2910*0a701b1eSRobert Gordon 
2911*0a701b1eSRobert Gordon 		case AF_INET6:
29127c478bd9Sstevel@tonic-gate 
29137c478bd9Sstevel@tonic-gate 			conn->c_raddr.maxlen =
29147c478bd9Sstevel@tonic-gate 			    conn->c_raddr.len = sin6_size;
2915*0a701b1eSRobert Gordon 			conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
29167c478bd9Sstevel@tonic-gate 
29177c478bd9Sstevel@tonic-gate 			s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
29187c478bd9Sstevel@tonic-gate 			s6->sin6_family = AF_INET6;
2919*0a701b1eSRobert Gordon 			bcopy((void *)&ipinfo.src_addr.un.ip6addr,
2920*0a701b1eSRobert Gordon 			    &s6->sin6_addr,
29217c478bd9Sstevel@tonic-gate 			    sizeof (struct in6_addr));
29227c478bd9Sstevel@tonic-gate 
2923*0a701b1eSRobert Gordon 			break;
2924*0a701b1eSRobert Gordon 
2925*0a701b1eSRobert Gordon 		default:
2926*0a701b1eSRobert Gordon 			return (IBT_CM_REJECT);
29277c478bd9Sstevel@tonic-gate 		}
2928*0a701b1eSRobert Gordon 
29297c478bd9Sstevel@tonic-gate 		break;
29307c478bd9Sstevel@tonic-gate 
29317c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
29327c478bd9Sstevel@tonic-gate 	{
29337c478bd9Sstevel@tonic-gate 		CONN		*conn;
29347c478bd9Sstevel@tonic-gate 		rib_qp_t	*qp;
29357c478bd9Sstevel@tonic-gate 
29367c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
29377c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
29387c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
29397c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
29407c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
29417c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
29427c478bd9Sstevel@tonic-gate 			/*
29437c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
29447c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
29457c478bd9Sstevel@tonic-gate 			 */
29467c478bd9Sstevel@tonic-gate 			break;
29477c478bd9Sstevel@tonic-gate 		default:
29487c478bd9Sstevel@tonic-gate 			/*
29497c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
29507c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
29517c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
29527c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
29537c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
29547c478bd9Sstevel@tonic-gate 			 * state
29557c478bd9Sstevel@tonic-gate 			 */
29567c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
29577c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
29587c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
29597c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
29607c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29617c478bd9Sstevel@tonic-gate 				break;
29627c478bd9Sstevel@tonic-gate 			}
2963*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
29647c478bd9Sstevel@tonic-gate 
29657c478bd9Sstevel@tonic-gate 			/*
29667c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
29677c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
29687c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
29697c478bd9Sstevel@tonic-gate 			 */
29707c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
29717c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
29727c478bd9Sstevel@tonic-gate 
29737c478bd9Sstevel@tonic-gate 			/*
29747c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
29757c478bd9Sstevel@tonic-gate 			 */
29767c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
29777c478bd9Sstevel@tonic-gate 				/*
29787c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
29797c478bd9Sstevel@tonic-gate 				 */
29807c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
29817c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29827c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
29837c478bd9Sstevel@tonic-gate 				    &hca->srv_conn_list);
29847c478bd9Sstevel@tonic-gate 			} else {
29857c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29867c478bd9Sstevel@tonic-gate 			}
2987*0a701b1eSRobert Gordon 			DTRACE_PROBE(rpcib__i__srvcm_chandisconnect);
29887c478bd9Sstevel@tonic-gate 			break;
29897c478bd9Sstevel@tonic-gate 		}
29907c478bd9Sstevel@tonic-gate 		break;
29917c478bd9Sstevel@tonic-gate 	}
29927c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
29937c478bd9Sstevel@tonic-gate 		/*
29947c478bd9Sstevel@tonic-gate 		 * RTU received, hence connection established.
29957c478bd9Sstevel@tonic-gate 		 */
29967c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
29977c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29987c478bd9Sstevel@tonic-gate 			    "(CONN_EST) channel established");
29997c478bd9Sstevel@tonic-gate 		break;
30007c478bd9Sstevel@tonic-gate 
30017c478bd9Sstevel@tonic-gate 	default:
30027c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
30037c478bd9Sstevel@tonic-gate 			/* Let CM handle the following events. */
30047c478bd9Sstevel@tonic-gate 			if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
30057c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30067c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
30077c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
30087c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30097c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
30107c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
30117c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30127c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
30137c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
30147c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30157c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
30167c478bd9Sstevel@tonic-gate 			} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
30177c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
30187c478bd9Sstevel@tonic-gate 				    "server recv'ed IBT_CM_EVENT_FAILURE\n");
30197c478bd9Sstevel@tonic-gate 			}
30207c478bd9Sstevel@tonic-gate 		}
3021*0a701b1eSRobert Gordon 		return (IBT_CM_DEFAULT);
30227c478bd9Sstevel@tonic-gate 	}
30237c478bd9Sstevel@tonic-gate 
30247c478bd9Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
30257c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
30267c478bd9Sstevel@tonic-gate }
30277c478bd9Sstevel@tonic-gate 
30287c478bd9Sstevel@tonic-gate static rdma_stat
30297c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type)
30307c478bd9Sstevel@tonic-gate {
30317c478bd9Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
30327c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
30337c478bd9Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
30347c478bd9Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
30357c478bd9Sstevel@tonic-gate 	uint_t			port_size;
3036*0a701b1eSRobert Gordon 	uint_t			pki, i, num_ports, nbinds;
30377c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
3038*0a701b1eSRobert Gordon 	rib_service_t		*new_service;
30397c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
30407c478bd9Sstevel@tonic-gate 
30417c478bd9Sstevel@tonic-gate 	/*
30427c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
30437c478bd9Sstevel@tonic-gate 	 */
30447c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
30457c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
30467c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
30477c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
30487c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30497c478bd9Sstevel@tonic-gate 	} else {
30507c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30517c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30527c478bd9Sstevel@tonic-gate 	}
30537c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
30547c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30557c478bd9Sstevel@tonic-gate 	}
30567c478bd9Sstevel@tonic-gate 
3057*0a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__regservice_numports,
3058*0a701b1eSRobert Gordon 	    int, num_ports);
30597c478bd9Sstevel@tonic-gate 
30607c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
30617c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
3062*0a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portinactive,
3063*0a701b1eSRobert Gordon 			    int, i+1);
3064*0a701b1eSRobert Gordon 		} else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) {
3065*0a701b1eSRobert Gordon 			DTRACE_PROBE1(rpcib__i__regservice__portactive,
3066*0a701b1eSRobert Gordon 			    int, i+1);
30677c478bd9Sstevel@tonic-gate 		}
30687c478bd9Sstevel@tonic-gate 	}
3069*0a701b1eSRobert Gordon 
30707c478bd9Sstevel@tonic-gate 	/*
30717c478bd9Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
30727c478bd9Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
30737c478bd9Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
30747c478bd9Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
30757c478bd9Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
30767c478bd9Sstevel@tonic-gate 	 */
30777c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
30787c478bd9Sstevel@tonic-gate 	/*
30797c478bd9Sstevel@tonic-gate 	 * Start registering and binding service to active
30807c478bd9Sstevel@tonic-gate 	 * on active ports on this HCA.
30817c478bd9Sstevel@tonic-gate 	 */
30827c478bd9Sstevel@tonic-gate 	nbinds = 0;
30837c478bd9Sstevel@tonic-gate 	new_service = NULL;
30847c478bd9Sstevel@tonic-gate 
30857c478bd9Sstevel@tonic-gate 	/*
30867c478bd9Sstevel@tonic-gate 	 * We use IP addresses as the service names for
30877c478bd9Sstevel@tonic-gate 	 * service registration.  Register each of them
30887c478bd9Sstevel@tonic-gate 	 * with CM to obtain a svc_id and svc_hdl.  We do not
30897c478bd9Sstevel@tonic-gate 	 * register the service with machine's loopback address.
30907c478bd9Sstevel@tonic-gate 	 */
30917c478bd9Sstevel@tonic-gate 	(void) bzero(&srv_id, sizeof (ib_svc_id_t));
30927c478bd9Sstevel@tonic-gate 	(void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
30937c478bd9Sstevel@tonic-gate 	(void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
30947c478bd9Sstevel@tonic-gate 
30957c478bd9Sstevel@tonic-gate 	sdesc.sd_handler = rib_srv_cm_handler;
30967c478bd9Sstevel@tonic-gate 	sdesc.sd_flags = 0;
30977c478bd9Sstevel@tonic-gate 	ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
3098*0a701b1eSRobert Gordon 	    &sdesc, ibt_get_ip_sid(IPPROTO_TCP, NFS_RDMA_PORT),
3099*0a701b1eSRobert Gordon 	    1, &srv_hdl, &srv_id);
3100*0a701b1eSRobert Gordon 
31017c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
31027c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
31037c478bd9Sstevel@tonic-gate 			continue;
31047c478bd9Sstevel@tonic-gate 
31057c478bd9Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
31067c478bd9Sstevel@tonic-gate 			pkey = port_infop[i].p_pkey_tbl[pki];
3107*0a701b1eSRobert Gordon 			if ((pkey & IBSRM_HB) &&
3108*0a701b1eSRobert Gordon 			    (pkey != IB_PKEY_INVALID_FULL)) {
31097c478bd9Sstevel@tonic-gate 
31107c478bd9Sstevel@tonic-gate 				/*
31117c478bd9Sstevel@tonic-gate 				 * Allocate and prepare a service entry
31127c478bd9Sstevel@tonic-gate 				 */
3113*0a701b1eSRobert Gordon 				new_service =
3114*0a701b1eSRobert Gordon 				    kmem_zalloc(1 * sizeof (rib_service_t),
31157c478bd9Sstevel@tonic-gate 				    KM_SLEEP);
31167c478bd9Sstevel@tonic-gate 
3117*0a701b1eSRobert Gordon 				new_service->srv_type = service_type;
3118*0a701b1eSRobert Gordon 				new_service->srv_hdl = srv_hdl;
31197c478bd9Sstevel@tonic-gate 				new_service->srv_next = NULL;
31207c478bd9Sstevel@tonic-gate 
31217c478bd9Sstevel@tonic-gate 				ibt_status = ibt_bind_service(srv_hdl,
3122*0a701b1eSRobert Gordon 				    port_infop[i].p_sgid_tbl[0],
3123*0a701b1eSRobert Gordon 				    NULL, rib_stat, NULL);
3124*0a701b1eSRobert Gordon 
3125*0a701b1eSRobert Gordon 				DTRACE_PROBE1(rpcib__i__regservice__bindres,
3126*0a701b1eSRobert Gordon 				    int, ibt_status);
3127*0a701b1eSRobert Gordon 
31287c478bd9Sstevel@tonic-gate 				if (ibt_status != IBT_SUCCESS) {
31297c478bd9Sstevel@tonic-gate 					kmem_free(new_service,
31307c478bd9Sstevel@tonic-gate 					    sizeof (rib_service_t));
31317c478bd9Sstevel@tonic-gate 					new_service = NULL;
31327c478bd9Sstevel@tonic-gate 					continue;
31337c478bd9Sstevel@tonic-gate 				}
3134*0a701b1eSRobert Gordon 
31357c478bd9Sstevel@tonic-gate 				/*
31367c478bd9Sstevel@tonic-gate 				 * Add to the service list for this HCA
31377c478bd9Sstevel@tonic-gate 				 */
31387c478bd9Sstevel@tonic-gate 				new_service->srv_next = hca->service_list;
31397c478bd9Sstevel@tonic-gate 				hca->service_list = new_service;
31407c478bd9Sstevel@tonic-gate 				new_service = NULL;
31417c478bd9Sstevel@tonic-gate 				nbinds++;
31427c478bd9Sstevel@tonic-gate 			}
31437c478bd9Sstevel@tonic-gate 		}
31447c478bd9Sstevel@tonic-gate 	}
31457c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
31467c478bd9Sstevel@tonic-gate 
31477c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
31487c478bd9Sstevel@tonic-gate 
31497c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
31507c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
31517c478bd9Sstevel@tonic-gate 	} else {
31527c478bd9Sstevel@tonic-gate 		/*
31537c478bd9Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
31547c478bd9Sstevel@tonic-gate 		 * one registration was successful.
31557c478bd9Sstevel@tonic-gate 		 */
31567c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
31577c478bd9Sstevel@tonic-gate 		plugin_state = ACCEPT;
31587c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
31597c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
31607c478bd9Sstevel@tonic-gate 	}
31617c478bd9Sstevel@tonic-gate }
31627c478bd9Sstevel@tonic-gate 
31637c478bd9Sstevel@tonic-gate void
31647c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
31657c478bd9Sstevel@tonic-gate {
31667c478bd9Sstevel@tonic-gate 	rdma_stat status = RDMA_SUCCESS;
31677c478bd9Sstevel@tonic-gate 
31687c478bd9Sstevel@tonic-gate 	rd->active = 0;
31697c478bd9Sstevel@tonic-gate 	rd->err_code = RDMA_FAILED;
31707c478bd9Sstevel@tonic-gate 
31717c478bd9Sstevel@tonic-gate 	/*
31727c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
31737c478bd9Sstevel@tonic-gate 	 */
31747c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
31757c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
31767c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
31777c478bd9Sstevel@tonic-gate 		return;
31787c478bd9Sstevel@tonic-gate 	}
31797c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
31807c478bd9Sstevel@tonic-gate 
31817c478bd9Sstevel@tonic-gate 	rib_stat->q = &rd->q;
31827c478bd9Sstevel@tonic-gate 	/*
31837c478bd9Sstevel@tonic-gate 	 * Right now the only service type is NFS. Hence force feed this
31847c478bd9Sstevel@tonic-gate 	 * value. Ideally to communicate the service type it should be
31857c478bd9Sstevel@tonic-gate 	 * passed down in rdma_svc_data.
31867c478bd9Sstevel@tonic-gate 	 */
31877c478bd9Sstevel@tonic-gate 	rib_stat->service_type = NFS;
31887c478bd9Sstevel@tonic-gate 	status = rib_register_service(rib_stat->hca, NFS);
31897c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
31907c478bd9Sstevel@tonic-gate 		rd->err_code = status;
31917c478bd9Sstevel@tonic-gate 		return;
31927c478bd9Sstevel@tonic-gate 	}
31937c478bd9Sstevel@tonic-gate 	/*
31947c478bd9Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
31957c478bd9Sstevel@tonic-gate 	 * explainable errors.
31967c478bd9Sstevel@tonic-gate 	 */
31977c478bd9Sstevel@tonic-gate 	rd->active = 1;
31987c478bd9Sstevel@tonic-gate 	rd->err_code = status;
31997c478bd9Sstevel@tonic-gate }
32007c478bd9Sstevel@tonic-gate 
32017c478bd9Sstevel@tonic-gate /* XXXX */
32027c478bd9Sstevel@tonic-gate /* ARGSUSED */
32037c478bd9Sstevel@tonic-gate static void
32047c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
32057c478bd9Sstevel@tonic-gate {
32067c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
32077c478bd9Sstevel@tonic-gate 
32087c478bd9Sstevel@tonic-gate 	/*
32097c478bd9Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
32107c478bd9Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
32117c478bd9Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
32127c478bd9Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
32137c478bd9Sstevel@tonic-gate 	 */
32147c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
32157c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
32167c478bd9Sstevel@tonic-gate 	if (svcdata != NULL)
32177c478bd9Sstevel@tonic-gate 		svcdata->active = 0;
32187c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 	/*
32217c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
32227c478bd9Sstevel@tonic-gate 	 */
32237c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
32247c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
32257c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
32267c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32277c478bd9Sstevel@tonic-gate 		return;
32287c478bd9Sstevel@tonic-gate 	}
3229*0a701b1eSRobert Gordon 	rib_close_channels(&hca->srv_conn_list);
32307c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
32317c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
32327c478bd9Sstevel@tonic-gate }
32337c478bd9Sstevel@tonic-gate 
32347c478bd9Sstevel@tonic-gate /*
32357c478bd9Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
32367c478bd9Sstevel@tonic-gate  * Instead of unbinding the service for a service handle by
32377c478bd9Sstevel@tonic-gate  * calling ibt_unbind_service() for each port/pkey, we unbind
32387c478bd9Sstevel@tonic-gate  * all the services for the service handle by making only one
32397c478bd9Sstevel@tonic-gate  * call to ibt_unbind_all_services().  Then, we deregister the
32407c478bd9Sstevel@tonic-gate  * service for the service handle.
32417c478bd9Sstevel@tonic-gate  *
32427c478bd9Sstevel@tonic-gate  * When traversing the entries in service_list, we compare the
32437c478bd9Sstevel@tonic-gate  * srv_hdl of the current entry with that of the next.  If they
32447c478bd9Sstevel@tonic-gate  * are different or if the next entry is NULL, the current entry
32457c478bd9Sstevel@tonic-gate  * marks the last binding of the service handle.  In this case,
32467c478bd9Sstevel@tonic-gate  * call ibt_unbind_all_services() and deregister the service for
32477c478bd9Sstevel@tonic-gate  * the service handle.  If they are the same, the current and the
32487c478bd9Sstevel@tonic-gate  * next entries are bound to the same service handle.  In this
32497c478bd9Sstevel@tonic-gate  * case, move on to the next entry.
32507c478bd9Sstevel@tonic-gate  */
32517c478bd9Sstevel@tonic-gate static void
32527c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
32537c478bd9Sstevel@tonic-gate {
32547c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
32557c478bd9Sstevel@tonic-gate 
32567c478bd9Sstevel@tonic-gate 	/*
32577c478bd9Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
32587c478bd9Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
32597c478bd9Sstevel@tonic-gate 	 * be passed down to this function.
32607c478bd9Sstevel@tonic-gate 	 */
32617c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
32627c478bd9Sstevel@tonic-gate 	srv_list = hca->service_list;
32637c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
32647c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
32657c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
32667c478bd9Sstevel@tonic-gate 		if (srv_list == NULL || bcmp(to_remove->srv_hdl,
32677c478bd9Sstevel@tonic-gate 		    srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) {
32687c478bd9Sstevel@tonic-gate 
3269*0a701b1eSRobert Gordon 			(void) ibt_unbind_all_services(to_remove->srv_hdl);
3270*0a701b1eSRobert Gordon 			(void) ibt_deregister_service(hca->ibt_clnt_hdl,
32717c478bd9Sstevel@tonic-gate 			    to_remove->srv_hdl);
32727c478bd9Sstevel@tonic-gate 		}
32737c478bd9Sstevel@tonic-gate 
32747c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
32757c478bd9Sstevel@tonic-gate 	}
32767c478bd9Sstevel@tonic-gate 	hca->service_list = NULL;
32777c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
32787c478bd9Sstevel@tonic-gate }
32797c478bd9Sstevel@tonic-gate 
32807c478bd9Sstevel@tonic-gate static struct svc_recv *
32817c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
32827c478bd9Sstevel@tonic-gate {
32837c478bd9Sstevel@tonic-gate 	struct svc_recv	*recvp;
32847c478bd9Sstevel@tonic-gate 
32857c478bd9Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
32867c478bd9Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
32877c478bd9Sstevel@tonic-gate 	recvp->qp = qp;
32887c478bd9Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
32897c478bd9Sstevel@tonic-gate 	return (recvp);
32907c478bd9Sstevel@tonic-gate }
32917c478bd9Sstevel@tonic-gate 
32927c478bd9Sstevel@tonic-gate static int
32937c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
32947c478bd9Sstevel@tonic-gate {
32957c478bd9Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
32967c478bd9Sstevel@tonic-gate 
32977c478bd9Sstevel@tonic-gate 	return (0);
32987c478bd9Sstevel@tonic-gate }
32997c478bd9Sstevel@tonic-gate 
33007c478bd9Sstevel@tonic-gate static struct reply *
33017c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
33027c478bd9Sstevel@tonic-gate {
33037c478bd9Sstevel@tonic-gate 	struct reply	*rep;
33047c478bd9Sstevel@tonic-gate 
33057c478bd9Sstevel@tonic-gate 
33067c478bd9Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
33077c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
3308*0a701b1eSRobert Gordon 		DTRACE_PROBE(rpcib__i__addrreply__nomem);
33097c478bd9Sstevel@tonic-gate 		return (NULL);
33107c478bd9Sstevel@tonic-gate 	}
33117c478bd9Sstevel@tonic-gate 	rep->xid = msgid;
33127c478bd9Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
33137c478bd9Sstevel@tonic-gate 	rep->bytes_xfer = 0;
33147c478bd9Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
33157c478bd9Sstevel@tonic-gate 	rep->prev = NULL;
33167c478bd9Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
33177c478bd9Sstevel@tonic-gate 
33187c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
33197c478bd9Sstevel@tonic-gate 	if (qp->replylist) {
33207c478bd9Sstevel@tonic-gate 		rep->next = qp->replylist;
33217c478bd9Sstevel@tonic-gate 		qp->replylist->prev = rep;
33227c478bd9Sstevel@tonic-gate 	}
33237c478bd9Sstevel@tonic-gate 	qp->rep_list_size++;
3324*0a701b1eSRobert Gordon 
3325*0a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__addrreply__listsize,
3326*0a701b1eSRobert Gordon 	    int, qp->rep_list_size);
3327*0a701b1eSRobert Gordon 
33287c478bd9Sstevel@tonic-gate 	qp->replylist = rep;
33297c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
33307c478bd9Sstevel@tonic-gate 
33317c478bd9Sstevel@tonic-gate 	return (rep);
33327c478bd9Sstevel@tonic-gate }
33337c478bd9Sstevel@tonic-gate 
33347c478bd9Sstevel@tonic-gate static rdma_stat
33357c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
33367c478bd9Sstevel@tonic-gate {
33377c478bd9Sstevel@tonic-gate 	struct reply	*r, *n;
33387c478bd9Sstevel@tonic-gate 
33397c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
33407c478bd9Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
33417c478bd9Sstevel@tonic-gate 		n = r->next;
33427c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
33437c478bd9Sstevel@tonic-gate 	}
33447c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
33457c478bd9Sstevel@tonic-gate 
33467c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
33477c478bd9Sstevel@tonic-gate }
33487c478bd9Sstevel@tonic-gate 
33497c478bd9Sstevel@tonic-gate static int
33507c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
33517c478bd9Sstevel@tonic-gate {
33527c478bd9Sstevel@tonic-gate 
33537c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
33547c478bd9Sstevel@tonic-gate 	if (rep->prev) {
33557c478bd9Sstevel@tonic-gate 		rep->prev->next = rep->next;
33567c478bd9Sstevel@tonic-gate 	}
33577c478bd9Sstevel@tonic-gate 	if (rep->next) {
33587c478bd9Sstevel@tonic-gate 		rep->next->prev = rep->prev;
33597c478bd9Sstevel@tonic-gate 	}
33607c478bd9Sstevel@tonic-gate 	if (qp->replylist == rep)
33617c478bd9Sstevel@tonic-gate 		qp->replylist = rep->next;
33627c478bd9Sstevel@tonic-gate 
33637c478bd9Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
33647c478bd9Sstevel@tonic-gate 	qp->rep_list_size--;
3365*0a701b1eSRobert Gordon 
3366*0a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__remreply__listsize,
3367*0a701b1eSRobert Gordon 	    int, qp->rep_list_size);
33687c478bd9Sstevel@tonic-gate 
33697c478bd9Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
33707c478bd9Sstevel@tonic-gate 
33717c478bd9Sstevel@tonic-gate 	return (0);
33727c478bd9Sstevel@tonic-gate }
33737c478bd9Sstevel@tonic-gate 
33747c478bd9Sstevel@tonic-gate rdma_stat
3375*0a701b1eSRobert Gordon rib_registermem(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
33767c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle)
33777c478bd9Sstevel@tonic-gate {
33787c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
33797c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
33807c478bd9Sstevel@tonic-gate 	rdma_stat	status;
33817c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
33827c478bd9Sstevel@tonic-gate 
33837c478bd9Sstevel@tonic-gate 	/*
33847c478bd9Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
33857c478bd9Sstevel@tonic-gate 	 */
3386*0a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
33877c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
338811606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
33897c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
33907c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
33917c478bd9Sstevel@tonic-gate 	} else {
33927c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
33937c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
33947c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
33957c478bd9Sstevel@tonic-gate 	}
33967c478bd9Sstevel@tonic-gate 	return (status);
33977c478bd9Sstevel@tonic-gate }
33987c478bd9Sstevel@tonic-gate 
33997c478bd9Sstevel@tonic-gate static rdma_stat
3400*0a701b1eSRobert Gordon rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size,
3401*0a701b1eSRobert Gordon 	ibt_mr_flags_t spec,
34027c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
34037c478bd9Sstevel@tonic-gate {
34047c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
34057c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
340611606941Sjwahlig 	mem_attr.mr_vaddr = (uintptr_t)buf;
34077c478bd9Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
3408*0a701b1eSRobert Gordon 	mem_attr.mr_as = (struct as *)(caddr_t)adsp;
34097c478bd9Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
34107c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
34117c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
34127c478bd9Sstevel@tonic-gate 
34137c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
34147c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
34157c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
34167c478bd9Sstevel@tonic-gate 		    &mem_attr, mr_hdlp, mr_descp);
34177c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
34187c478bd9Sstevel@tonic-gate 	} else {
34197c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
34207c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
34217c478bd9Sstevel@tonic-gate 	}
34227c478bd9Sstevel@tonic-gate 
34237c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
34247c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
34257c478bd9Sstevel@tonic-gate 	}
34267c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
34277c478bd9Sstevel@tonic-gate }
34287c478bd9Sstevel@tonic-gate 
34297c478bd9Sstevel@tonic-gate rdma_stat
3430*0a701b1eSRobert Gordon rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
3431*0a701b1eSRobert Gordon 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
34327c478bd9Sstevel@tonic-gate {
34337c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
3434*0a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
34357c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
34367c478bd9Sstevel@tonic-gate 	rdma_stat	status;
34377c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
34387c478bd9Sstevel@tonic-gate 
34397c478bd9Sstevel@tonic-gate 	/*
34407c478bd9Sstevel@tonic-gate 	 * Non-coherent memory registration.
34417c478bd9Sstevel@tonic-gate 	 */
3442*0a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
3443*0a701b1eSRobert Gordon 	if (l) {
3444*0a701b1eSRobert Gordon 		if (l->registered) {
3445*0a701b1eSRobert Gordon 			buf_handle->mrc_linfo =
3446*0a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
3447*0a701b1eSRobert Gordon 			buf_handle->mrc_lmr =
3448*0a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_lmr;
3449*0a701b1eSRobert Gordon 			buf_handle->mrc_rmr =
3450*0a701b1eSRobert Gordon 			    (uint32_t)l->lrc_mhandle.mrc_rmr;
3451*0a701b1eSRobert Gordon 			*sync_handle = (RIB_SYNCMEM_HANDLE)
3452*0a701b1eSRobert Gordon 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
3453*0a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
3454*0a701b1eSRobert Gordon 		} else {
3455*0a701b1eSRobert Gordon 			/* Always register the whole buffer */
3456*0a701b1eSRobert Gordon 			buf = (caddr_t)l->lrc_buf;
3457*0a701b1eSRobert Gordon 			buflen = l->lrc_len;
3458*0a701b1eSRobert Gordon 		}
3459*0a701b1eSRobert Gordon 	}
3460*0a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
3461*0a701b1eSRobert Gordon 
34627c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
3463*0a701b1eSRobert Gordon 		if (l) {
3464*0a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
3465*0a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
3466*0a701b1eSRobert Gordon 			l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
3467*0a701b1eSRobert Gordon 			l->registered		 = TRUE;
3468*0a701b1eSRobert Gordon 		}
346911606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
34707c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
34717c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
34727c478bd9Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
34737c478bd9Sstevel@tonic-gate 	} else {
34747c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
34757c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
34767c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
34777c478bd9Sstevel@tonic-gate 	}
34787c478bd9Sstevel@tonic-gate 	return (status);
34797c478bd9Sstevel@tonic-gate }
34807c478bd9Sstevel@tonic-gate 
34817c478bd9Sstevel@tonic-gate /* ARGSUSED */
34827c478bd9Sstevel@tonic-gate rdma_stat
34837c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
34847c478bd9Sstevel@tonic-gate {
34857c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
34867c478bd9Sstevel@tonic-gate 	/*
34877c478bd9Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
34887c478bd9Sstevel@tonic-gate 	 * getting detached. Need all outstanding
34897c478bd9Sstevel@tonic-gate 	 * memory registrations to be deregistered
34907c478bd9Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
34917c478bd9Sstevel@tonic-gate 	 */
34927c478bd9Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
349311606941Sjwahlig 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
34947c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
34957c478bd9Sstevel@tonic-gate }
34967c478bd9Sstevel@tonic-gate 
34977c478bd9Sstevel@tonic-gate /* ARGSUSED */
34987c478bd9Sstevel@tonic-gate rdma_stat
34997c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
3500*0a701b1eSRobert Gordon 		RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
35017c478bd9Sstevel@tonic-gate {
3502*0a701b1eSRobert Gordon 	rib_lrc_entry_t *l;
3503*0a701b1eSRobert Gordon 	l = (rib_lrc_entry_t *)lrc;
3504*0a701b1eSRobert Gordon 	if (l)
3505*0a701b1eSRobert Gordon 		if (l->registered)
3506*0a701b1eSRobert Gordon 			return (RDMA_SUCCESS);
3507*0a701b1eSRobert Gordon 
35087c478bd9Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
35097c478bd9Sstevel@tonic-gate 
35107c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
35117c478bd9Sstevel@tonic-gate }
35127c478bd9Sstevel@tonic-gate 
35137c478bd9Sstevel@tonic-gate /* ARGSUSED */
35147c478bd9Sstevel@tonic-gate rdma_stat
35157c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
35167c478bd9Sstevel@tonic-gate 		int len, int cpu)
35177c478bd9Sstevel@tonic-gate {
35187c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
35197c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
35207c478bd9Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
35217c478bd9Sstevel@tonic-gate 
35227c478bd9Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
352311606941Sjwahlig 	mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf;
35247c478bd9Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
35257c478bd9Sstevel@tonic-gate 	if (cpu) {
35267c478bd9Sstevel@tonic-gate 		/* make incoming data visible to memory */
35277c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
35287c478bd9Sstevel@tonic-gate 	} else {
35297c478bd9Sstevel@tonic-gate 		/* make memory changes visible to IO */
35307c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
35317c478bd9Sstevel@tonic-gate 	}
35327c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
35337c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
35347c478bd9Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
35357c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35367c478bd9Sstevel@tonic-gate 	} else {
35377c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35387c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
35397c478bd9Sstevel@tonic-gate 	}
35407c478bd9Sstevel@tonic-gate 
35417c478bd9Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
35427c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
35437c478bd9Sstevel@tonic-gate 	else {
35447c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
35457c478bd9Sstevel@tonic-gate 	}
35467c478bd9Sstevel@tonic-gate }
35477c478bd9Sstevel@tonic-gate 
35487c478bd9Sstevel@tonic-gate /*
35497c478bd9Sstevel@tonic-gate  * XXXX	????
35507c478bd9Sstevel@tonic-gate  */
35517c478bd9Sstevel@tonic-gate static rdma_stat
35527c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
35537c478bd9Sstevel@tonic-gate {
35547c478bd9Sstevel@tonic-gate 	/*
35557c478bd9Sstevel@tonic-gate 	 * XXXX	Hack!
35567c478bd9Sstevel@tonic-gate 	 */
35577c478bd9Sstevel@tonic-gate 	info->addrlen = 16;
35587c478bd9Sstevel@tonic-gate 	info->mts = 1000000;
35597c478bd9Sstevel@tonic-gate 	info->mtu = 1000000;
35607c478bd9Sstevel@tonic-gate 
35617c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
35627c478bd9Sstevel@tonic-gate }
35637c478bd9Sstevel@tonic-gate 
35647c478bd9Sstevel@tonic-gate rib_bufpool_t *
35657c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
35667c478bd9Sstevel@tonic-gate {
35677c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
35687c478bd9Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
35697c478bd9Sstevel@tonic-gate 	caddr_t		buf;
35707c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
35717c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
35727c478bd9Sstevel@tonic-gate 	int		i, j;
35737c478bd9Sstevel@tonic-gate 
35747c478bd9Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
35757c478bd9Sstevel@tonic-gate 
35767c478bd9Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
35777c478bd9Sstevel@tonic-gate 	    num * sizeof (void *), KM_SLEEP);
35787c478bd9Sstevel@tonic-gate 
35797c478bd9Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
35807c478bd9Sstevel@tonic-gate 	bp->numelems = num;
35817c478bd9Sstevel@tonic-gate 
3582*0a701b1eSRobert Gordon 
35837c478bd9Sstevel@tonic-gate 	switch (ptype) {
35847c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
35857c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
35867c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
35877c478bd9Sstevel@tonic-gate 		break;
35887c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
35897c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
35907c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
35917c478bd9Sstevel@tonic-gate 		break;
35927c478bd9Sstevel@tonic-gate 	default:
35937c478bd9Sstevel@tonic-gate 		goto fail;
35947c478bd9Sstevel@tonic-gate 	}
35957c478bd9Sstevel@tonic-gate 
35967c478bd9Sstevel@tonic-gate 	/*
35977c478bd9Sstevel@tonic-gate 	 * Register the pool.
35987c478bd9Sstevel@tonic-gate 	 */
35997c478bd9Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
36007c478bd9Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
36017c478bd9Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
36027c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_hdl_t), KM_SLEEP);
36037c478bd9Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
36047c478bd9Sstevel@tonic-gate 	    sizeof (ibt_mr_desc_t), KM_SLEEP);
36057c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3606*0a701b1eSRobert Gordon 
36077c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
36087c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
36097c478bd9Sstevel@tonic-gate 		goto fail;
36107c478bd9Sstevel@tonic-gate 	}
3611*0a701b1eSRobert Gordon 
36127c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
36137c478bd9Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
361411606941Sjwahlig 		mem_attr.mr_vaddr = (uintptr_t)buf;
36157c478bd9Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
36167c478bd9Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
36177c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
3618*0a701b1eSRobert Gordon 		    hca->pd_hdl, &mem_attr,
3619*0a701b1eSRobert Gordon 		    &rbp->mr_hdl[i],
36207c478bd9Sstevel@tonic-gate 		    &rbp->mr_desc[i]);
36217c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
36227c478bd9Sstevel@tonic-gate 			for (j = 0; j < i; j++) {
3623*0a701b1eSRobert Gordon 				(void) ibt_deregister_mr(hca->hca_hdl,
3624*0a701b1eSRobert Gordon 				    rbp->mr_hdl[j]);
36257c478bd9Sstevel@tonic-gate 			}
36267c478bd9Sstevel@tonic-gate 			rw_exit(&hca->state_lock);
36277c478bd9Sstevel@tonic-gate 			goto fail;
36287c478bd9Sstevel@tonic-gate 		}
36297c478bd9Sstevel@tonic-gate 	}
36307c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
36317c478bd9Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
36327c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
36337c478bd9Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
36347c478bd9Sstevel@tonic-gate 	}
36357c478bd9Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
36367c478bd9Sstevel@tonic-gate 	rbp->bpool = bp;
36377c478bd9Sstevel@tonic-gate 
36387c478bd9Sstevel@tonic-gate 	return (rbp);
36397c478bd9Sstevel@tonic-gate fail:
36407c478bd9Sstevel@tonic-gate 	if (bp) {
36417c478bd9Sstevel@tonic-gate 		if (bp->buf)
36427c478bd9Sstevel@tonic-gate 			kmem_free(bp->buf, bp->bufsize);
36437c478bd9Sstevel@tonic-gate 		kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
36447c478bd9Sstevel@tonic-gate 	}
36457c478bd9Sstevel@tonic-gate 	if (rbp) {
36467c478bd9Sstevel@tonic-gate 		if (rbp->mr_hdl)
36477c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
36487c478bd9Sstevel@tonic-gate 		if (rbp->mr_desc)
36497c478bd9Sstevel@tonic-gate 			kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
36507c478bd9Sstevel@tonic-gate 		kmem_free(rbp, sizeof (rib_bufpool_t));
36517c478bd9Sstevel@tonic-gate 	}
36527c478bd9Sstevel@tonic-gate 	return (NULL);
36537c478bd9Sstevel@tonic-gate }
36547c478bd9Sstevel@tonic-gate 
36557c478bd9Sstevel@tonic-gate static void
36567c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
36577c478bd9Sstevel@tonic-gate {
36587c478bd9Sstevel@tonic-gate 	int i;
36597c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
36607c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
36617c478bd9Sstevel@tonic-gate 
36627c478bd9Sstevel@tonic-gate 	/*
36637c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
36647c478bd9Sstevel@tonic-gate 	 */
36657c478bd9Sstevel@tonic-gate 	switch (ptype) {
36667c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
36677c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
36687c478bd9Sstevel@tonic-gate 			break;
36697c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
36707c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
36717c478bd9Sstevel@tonic-gate 			break;
36727c478bd9Sstevel@tonic-gate 		default:
36737c478bd9Sstevel@tonic-gate 			return;
36747c478bd9Sstevel@tonic-gate 	}
36757c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
36767c478bd9Sstevel@tonic-gate 		return;
36777c478bd9Sstevel@tonic-gate 
36787c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
36797c478bd9Sstevel@tonic-gate 
36807c478bd9Sstevel@tonic-gate 	/*
36817c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
36827c478bd9Sstevel@tonic-gate 	 */
36837c478bd9Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
36847c478bd9Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
36857c478bd9Sstevel@tonic-gate 	}
36867c478bd9Sstevel@tonic-gate }
36877c478bd9Sstevel@tonic-gate 
36887c478bd9Sstevel@tonic-gate static void
36897c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
36907c478bd9Sstevel@tonic-gate {
36917c478bd9Sstevel@tonic-gate 
36927c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
36937c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
36947c478bd9Sstevel@tonic-gate 
36957c478bd9Sstevel@tonic-gate 	/*
36967c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
36977c478bd9Sstevel@tonic-gate 	 */
36987c478bd9Sstevel@tonic-gate 	switch (ptype) {
36997c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
37007c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
37017c478bd9Sstevel@tonic-gate 			break;
37027c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
37037c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
37047c478bd9Sstevel@tonic-gate 			break;
37057c478bd9Sstevel@tonic-gate 		default:
37067c478bd9Sstevel@tonic-gate 			return;
37077c478bd9Sstevel@tonic-gate 	}
37087c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
37097c478bd9Sstevel@tonic-gate 		return;
37107c478bd9Sstevel@tonic-gate 
37117c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
37127c478bd9Sstevel@tonic-gate 
37137c478bd9Sstevel@tonic-gate 	/*
37147c478bd9Sstevel@tonic-gate 	 * Free the pool memory.
37157c478bd9Sstevel@tonic-gate 	 */
37167c478bd9Sstevel@tonic-gate 	if (rbp->mr_hdl)
37177c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
37187c478bd9Sstevel@tonic-gate 
37197c478bd9Sstevel@tonic-gate 	if (rbp->mr_desc)
37207c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
37217c478bd9Sstevel@tonic-gate 	if (bp->buf)
37227c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
37237c478bd9Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
37247c478bd9Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
37257c478bd9Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
37267c478bd9Sstevel@tonic-gate }
37277c478bd9Sstevel@tonic-gate 
37287c478bd9Sstevel@tonic-gate void
37297c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
37307c478bd9Sstevel@tonic-gate {
37317c478bd9Sstevel@tonic-gate 	/*
37327c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
37337c478bd9Sstevel@tonic-gate 	 */
37347c478bd9Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
37357c478bd9Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
37367c478bd9Sstevel@tonic-gate }
37377c478bd9Sstevel@tonic-gate 
37387c478bd9Sstevel@tonic-gate /*
37397c478bd9Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
37407c478bd9Sstevel@tonic-gate  */
37417c478bd9Sstevel@tonic-gate static rdma_stat
37427c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
37437c478bd9Sstevel@tonic-gate {
3744*0a701b1eSRobert Gordon 	rib_lrc_entry_t *rlep;
3745*0a701b1eSRobert Gordon 
3746*0a701b1eSRobert Gordon 	if (rdbuf->type ==  RDMA_LONG_BUFFER) {
3747*0a701b1eSRobert Gordon 		rlep = rib_get_cache_buf(conn, rdbuf->len);
3748*0a701b1eSRobert Gordon 		rdbuf->rb_private =  (caddr_t)rlep;
3749*0a701b1eSRobert Gordon 		rdbuf->addr = rlep->lrc_buf;
3750*0a701b1eSRobert Gordon 		rdbuf->handle = rlep->lrc_mhandle;
3751*0a701b1eSRobert Gordon 		return (RDMA_SUCCESS);
3752*0a701b1eSRobert Gordon 	}
37537c478bd9Sstevel@tonic-gate 
37547c478bd9Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
37557c478bd9Sstevel@tonic-gate 	if (rdbuf->addr) {
37567c478bd9Sstevel@tonic-gate 		switch (rdbuf->type) {
37577c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
37587c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
37597c478bd9Sstevel@tonic-gate 			break;
37607c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
37617c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
37627c478bd9Sstevel@tonic-gate 			break;
37637c478bd9Sstevel@tonic-gate 		default:
37647c478bd9Sstevel@tonic-gate 			rdbuf->len = 0;
37657c478bd9Sstevel@tonic-gate 		}
37667c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
37677c478bd9Sstevel@tonic-gate 	} else
37687c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37697c478bd9Sstevel@tonic-gate }
37707c478bd9Sstevel@tonic-gate 
3771*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH)
3772*0a701b1eSRobert Gordon static void rib_recv_bufs(uint32_t x) {
3773*0a701b1eSRobert Gordon 
3774*0a701b1eSRobert Gordon }
3775*0a701b1eSRobert Gordon 
3776*0a701b1eSRobert Gordon static void rib_send_bufs(uint32_t x) {
3777*0a701b1eSRobert Gordon 
3778*0a701b1eSRobert Gordon }
3779*0a701b1eSRobert Gordon #endif
37807c478bd9Sstevel@tonic-gate 
37817c478bd9Sstevel@tonic-gate /*
37827c478bd9Sstevel@tonic-gate  * Fetch a buffer of specified type.
37837c478bd9Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
37847c478bd9Sstevel@tonic-gate  */
37857c478bd9Sstevel@tonic-gate static void *
37867c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
37877c478bd9Sstevel@tonic-gate {
37887c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
37897c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
37907c478bd9Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
37917c478bd9Sstevel@tonic-gate 	void		*buf;
37927c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
37937c478bd9Sstevel@tonic-gate 	bufpool_t	*bp;
37947c478bd9Sstevel@tonic-gate 	int		i;
37957c478bd9Sstevel@tonic-gate 
37967c478bd9Sstevel@tonic-gate 	/*
37977c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
37987c478bd9Sstevel@tonic-gate 	 */
37997c478bd9Sstevel@tonic-gate 	switch (ptype) {
38007c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
38017c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
38027c478bd9Sstevel@tonic-gate 		break;
38037c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
38047c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
38057c478bd9Sstevel@tonic-gate 		break;
38067c478bd9Sstevel@tonic-gate 	default:
38077c478bd9Sstevel@tonic-gate 		return (NULL);
38087c478bd9Sstevel@tonic-gate 	}
38097c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
38107c478bd9Sstevel@tonic-gate 		return (NULL);
38117c478bd9Sstevel@tonic-gate 
38127c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
38137c478bd9Sstevel@tonic-gate 
38147c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
38157c478bd9Sstevel@tonic-gate 	if (bp->buffree < 0) {
38167c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
38177c478bd9Sstevel@tonic-gate 		return (NULL);
38187c478bd9Sstevel@tonic-gate 	}
38197c478bd9Sstevel@tonic-gate 
38207c478bd9Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
38217c478bd9Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
38227c478bd9Sstevel@tonic-gate 	rdbuf->addr = buf;
38237c478bd9Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
38247c478bd9Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
382511606941Sjwahlig 		if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
3826*0a701b1eSRobert Gordon 			rdbuf->handle.mrc_rmr =
3827*0a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_rkey;
3828*0a701b1eSRobert Gordon 			rdbuf->handle.mrc_linfo =
3829*0a701b1eSRobert Gordon 			    (uintptr_t)rbp->mr_hdl[i];
3830*0a701b1eSRobert Gordon 			rdbuf->handle.mrc_lmr =
3831*0a701b1eSRobert Gordon 			    (uint32_t)rbp->mr_desc[i].md_lkey;
3832*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH)
3833*0a701b1eSRobert Gordon 			if (ptype == SEND_BUFFER)
3834*0a701b1eSRobert Gordon 				rib_send_bufs(MAX_BUFS - (bp->buffree+1));
3835*0a701b1eSRobert Gordon 			if (ptype == RECV_BUFFER)
3836*0a701b1eSRobert Gordon 				rib_recv_bufs(MAX_BUFS - (bp->buffree+1));
3837*0a701b1eSRobert Gordon #endif
38387c478bd9Sstevel@tonic-gate 			bp->buffree--;
38397c478bd9Sstevel@tonic-gate 
38407c478bd9Sstevel@tonic-gate 			mutex_exit(&bp->buflock);
38417c478bd9Sstevel@tonic-gate 
38427c478bd9Sstevel@tonic-gate 			return (buf);
38437c478bd9Sstevel@tonic-gate 		}
38447c478bd9Sstevel@tonic-gate 	}
3845*0a701b1eSRobert Gordon 
38467c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
38477c478bd9Sstevel@tonic-gate 
38487c478bd9Sstevel@tonic-gate 	return (NULL);
38497c478bd9Sstevel@tonic-gate }
38507c478bd9Sstevel@tonic-gate 
38517c478bd9Sstevel@tonic-gate static void
38527c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
38537c478bd9Sstevel@tonic-gate {
38547c478bd9Sstevel@tonic-gate 
3855*0a701b1eSRobert Gordon 	if (rdbuf->type == RDMA_LONG_BUFFER) {
3856*0a701b1eSRobert Gordon 		rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private);
3857*0a701b1eSRobert Gordon 		rdbuf->rb_private = NULL;
3858*0a701b1eSRobert Gordon 		return;
3859*0a701b1eSRobert Gordon 	}
38607c478bd9Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
38617c478bd9Sstevel@tonic-gate }
38627c478bd9Sstevel@tonic-gate 
38637c478bd9Sstevel@tonic-gate static void
38647c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
38657c478bd9Sstevel@tonic-gate {
38667c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
38677c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
38687c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
38697c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
38707c478bd9Sstevel@tonic-gate 
38717c478bd9Sstevel@tonic-gate 	/*
38727c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
38737c478bd9Sstevel@tonic-gate 	 */
38747c478bd9Sstevel@tonic-gate 	switch (ptype) {
38757c478bd9Sstevel@tonic-gate 	case SEND_BUFFER:
38767c478bd9Sstevel@tonic-gate 		rbp = hca->send_pool;
38777c478bd9Sstevel@tonic-gate 		break;
38787c478bd9Sstevel@tonic-gate 	case RECV_BUFFER:
38797c478bd9Sstevel@tonic-gate 		rbp = hca->recv_pool;
38807c478bd9Sstevel@tonic-gate 		break;
38817c478bd9Sstevel@tonic-gate 	default:
38827c478bd9Sstevel@tonic-gate 		return;
38837c478bd9Sstevel@tonic-gate 	}
38847c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
38857c478bd9Sstevel@tonic-gate 		return;
38867c478bd9Sstevel@tonic-gate 
38877c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
38887c478bd9Sstevel@tonic-gate 
38897c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
38907c478bd9Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
38917c478bd9Sstevel@tonic-gate 		/*
38927c478bd9Sstevel@tonic-gate 		 * Should never happen
38937c478bd9Sstevel@tonic-gate 		 */
38947c478bd9Sstevel@tonic-gate 		bp->buffree--;
38957c478bd9Sstevel@tonic-gate 	} else {
38967c478bd9Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
38977c478bd9Sstevel@tonic-gate 	}
38987c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
38997c478bd9Sstevel@tonic-gate }
39007c478bd9Sstevel@tonic-gate 
39017c478bd9Sstevel@tonic-gate static rdma_stat
39027c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
39037c478bd9Sstevel@tonic-gate {
39047c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
39057c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd) {
39067c478bd9Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
39077c478bd9Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
39087c478bd9Sstevel@tonic-gate 	}
39097c478bd9Sstevel@tonic-gate 	connlist->conn_hd = cn;
39107c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
39117c478bd9Sstevel@tonic-gate 
39127c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
39137c478bd9Sstevel@tonic-gate }
39147c478bd9Sstevel@tonic-gate 
39157c478bd9Sstevel@tonic-gate static rdma_stat
39167c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
39177c478bd9Sstevel@tonic-gate {
39187c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
39197c478bd9Sstevel@tonic-gate 	if (cn->c_prev) {
39207c478bd9Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
39217c478bd9Sstevel@tonic-gate 	}
39227c478bd9Sstevel@tonic-gate 	if (cn->c_next) {
39237c478bd9Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
39247c478bd9Sstevel@tonic-gate 	}
39257c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
39267c478bd9Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
39277c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
39287c478bd9Sstevel@tonic-gate 
39297c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
39307c478bd9Sstevel@tonic-gate }
39317c478bd9Sstevel@tonic-gate 
39327c478bd9Sstevel@tonic-gate /*
39337c478bd9Sstevel@tonic-gate  * Connection management.
39347c478bd9Sstevel@tonic-gate  * IBTF does not support recycling of channels. So connections are only
3935*0a701b1eSRobert Gordon  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or
39367c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state. No C_IDLE state.
39377c478bd9Sstevel@tonic-gate  * C_CONN_PEND state: Connection establishment in progress to the server.
39387c478bd9Sstevel@tonic-gate  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
39397c478bd9Sstevel@tonic-gate  * It has an RC channel associated with it. ibt_post_send/recv are allowed
39407c478bd9Sstevel@tonic-gate  * only in this state.
3941*0a701b1eSRobert Gordon  * C_ERROR_CONN state: A connection transitions to this state when WRs on the
39427c478bd9Sstevel@tonic-gate  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
39437c478bd9Sstevel@tonic-gate  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
3944*0a701b1eSRobert Gordon  * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when
39457c478bd9Sstevel@tonic-gate  * c_ref drops to 0 (this indicates that RPC has no more references to this
39467c478bd9Sstevel@tonic-gate  * connection), the connection should be destroyed. A connection transitions
39477c478bd9Sstevel@tonic-gate  * into this state when it is being destroyed.
39487c478bd9Sstevel@tonic-gate  */
39497c478bd9Sstevel@tonic-gate static rdma_stat
39507c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn)
39517c478bd9Sstevel@tonic-gate {
39527c478bd9Sstevel@tonic-gate 	CONN *cn;
39537c478bd9Sstevel@tonic-gate 	int status = RDMA_SUCCESS;
39547c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (rib_hca_t *)handle;
39557c478bd9Sstevel@tonic-gate 	rib_qp_t *qp;
39567c478bd9Sstevel@tonic-gate 	clock_t cv_stat, timout;
39577c478bd9Sstevel@tonic-gate 	ibt_path_info_t path;
3958*0a701b1eSRobert Gordon 	ibt_ip_addr_t s_ip, d_ip;
39597c478bd9Sstevel@tonic-gate 
39607c478bd9Sstevel@tonic-gate again:
39617c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
39627c478bd9Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
39637c478bd9Sstevel@tonic-gate 	while (cn != NULL) {
39647c478bd9Sstevel@tonic-gate 		/*
39657c478bd9Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
39667c478bd9Sstevel@tonic-gate 		 */
39677c478bd9Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
3968*0a701b1eSRobert Gordon 		if (cn->c_state == C_ERROR_CONN) {
39697c478bd9Sstevel@tonic-gate 			if (cn->c_ref == 0) {
39707c478bd9Sstevel@tonic-gate 				/*
39717c478bd9Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
39727c478bd9Sstevel@tonic-gate 				 */
39737c478bd9Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
39747c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
39757c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
39767c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(cn,
39777c478bd9Sstevel@tonic-gate 				    &hca->cl_conn_list);
39787c478bd9Sstevel@tonic-gate 				goto again;
39797c478bd9Sstevel@tonic-gate 			}
39807c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
39817c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
39827c478bd9Sstevel@tonic-gate 			continue;
3983*0a701b1eSRobert Gordon 		}
3984*0a701b1eSRobert Gordon 		if (cn->c_state == C_DISCONN_PEND) {
39857c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
39867c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
39877c478bd9Sstevel@tonic-gate 			continue;
39887c478bd9Sstevel@tonic-gate 		}
39897c478bd9Sstevel@tonic-gate 		if ((cn->c_raddr.len == svcaddr->len) &&
39907c478bd9Sstevel@tonic-gate 		    bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) {
39917c478bd9Sstevel@tonic-gate 			/*
39927c478bd9Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
39937c478bd9Sstevel@tonic-gate 			 * as we are done traversing the list.
39947c478bd9Sstevel@tonic-gate 			 */
39957c478bd9Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
39967c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
39977c478bd9Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
39987c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
39997c478bd9Sstevel@tonic-gate 				*conn = cn;
40007c478bd9Sstevel@tonic-gate 				return (status);
40017c478bd9Sstevel@tonic-gate 			}
40027c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
40037c478bd9Sstevel@tonic-gate 				/*
40047c478bd9Sstevel@tonic-gate 				 * Hold a reference to this conn before
40057c478bd9Sstevel@tonic-gate 				 * we give up the lock.
40067c478bd9Sstevel@tonic-gate 				 */
40077c478bd9Sstevel@tonic-gate 				cn->c_ref++;
40087c478bd9Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
40097c478bd9Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
40107c478bd9Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
40117c478bd9Sstevel@tonic-gate 				    &cn->c_lock, timout)) > 0 &&
40127c478bd9Sstevel@tonic-gate 				    cn->c_state == C_CONN_PEND)
40137c478bd9Sstevel@tonic-gate 					;
40147c478bd9Sstevel@tonic-gate 				if (cv_stat == 0) {
40157c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40167c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40177c478bd9Sstevel@tonic-gate 					return (RDMA_INTR);
40187c478bd9Sstevel@tonic-gate 				}
40197c478bd9Sstevel@tonic-gate 				if (cv_stat < 0) {
40207c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40217c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40227c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
40237c478bd9Sstevel@tonic-gate 				}
40247c478bd9Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
40257c478bd9Sstevel@tonic-gate 					*conn = cn;
40267c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40277c478bd9Sstevel@tonic-gate 					return (status);
40287c478bd9Sstevel@tonic-gate 				} else {
40297c478bd9Sstevel@tonic-gate 					cn->c_ref--;
40307c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
40317c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
40327c478bd9Sstevel@tonic-gate 				}
40337c478bd9Sstevel@tonic-gate 			}
40347c478bd9Sstevel@tonic-gate 		}
40357c478bd9Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
40367c478bd9Sstevel@tonic-gate 		cn = cn->c_next;
40377c478bd9Sstevel@tonic-gate 	}
40387c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
40397c478bd9Sstevel@tonic-gate 
4040*0a701b1eSRobert Gordon 	bzero(&path, sizeof (ibt_path_info_t));
4041*0a701b1eSRobert Gordon 	bzero(&s_ip, sizeof (ibt_ip_addr_t));
4042*0a701b1eSRobert Gordon 	bzero(&d_ip, sizeof (ibt_ip_addr_t));
4043*0a701b1eSRobert Gordon 
4044*0a701b1eSRobert Gordon 	status = rib_chk_srv_ibaddr(svcaddr, addr_type, &path, &s_ip, &d_ip);
40457c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
40467c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40477c478bd9Sstevel@tonic-gate 	}
40487c478bd9Sstevel@tonic-gate 
40497c478bd9Sstevel@tonic-gate 	/*
40507c478bd9Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
40517c478bd9Sstevel@tonic-gate 	 */
40527c478bd9Sstevel@tonic-gate 	if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) {
40537c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40547c478bd9Sstevel@tonic-gate 	}
40557c478bd9Sstevel@tonic-gate 	cn = qptoc(qp);
40567c478bd9Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
40577c478bd9Sstevel@tonic-gate 	cn->c_ref = 1;
40587c478bd9Sstevel@tonic-gate 
40597c478bd9Sstevel@tonic-gate 	/*
40607c478bd9Sstevel@tonic-gate 	 * Add to conn list.
40617c478bd9Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
40627c478bd9Sstevel@tonic-gate 	 * another thread might have created the connection we are
40637c478bd9Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
40647c478bd9Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
40657c478bd9Sstevel@tonic-gate 	 * of one. If we really want to close that window,
40667c478bd9Sstevel@tonic-gate 	 * then need to check the list after acquiring the
40677c478bd9Sstevel@tonic-gate 	 * WRITER lock.
40687c478bd9Sstevel@tonic-gate 	 */
40697c478bd9Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
4070*0a701b1eSRobert Gordon 	status = rib_conn_to_srv(hca, qp, &path, &s_ip, &d_ip);
40717c478bd9Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
40727c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
40737c478bd9Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
40747c478bd9Sstevel@tonic-gate 		*conn = cn;
40757c478bd9Sstevel@tonic-gate 	} else {
4076*0a701b1eSRobert Gordon 		cn->c_state = C_ERROR_CONN;
40777c478bd9Sstevel@tonic-gate 		cn->c_ref--;
40787c478bd9Sstevel@tonic-gate 	}
40797c478bd9Sstevel@tonic-gate 	cv_broadcast(&cn->c_cv);
40807c478bd9Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
40817c478bd9Sstevel@tonic-gate 	return (status);
40827c478bd9Sstevel@tonic-gate }
40837c478bd9Sstevel@tonic-gate 
40847c478bd9Sstevel@tonic-gate static rdma_stat
40857c478bd9Sstevel@tonic-gate rib_conn_release(CONN *conn)
40867c478bd9Sstevel@tonic-gate {
40877c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
40887c478bd9Sstevel@tonic-gate 
40897c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
40907c478bd9Sstevel@tonic-gate 	conn->c_ref--;
40917c478bd9Sstevel@tonic-gate 
40927c478bd9Sstevel@tonic-gate 	/*
4093*0a701b1eSRobert Gordon 	 * If a conn is C_ERROR_CONN, close the channel.
40947c478bd9Sstevel@tonic-gate 	 * If it's CONNECTED, keep it that way.
40957c478bd9Sstevel@tonic-gate 	 */
4096*0a701b1eSRobert Gordon 	if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) {
40977c478bd9Sstevel@tonic-gate 		conn->c_state = C_DISCONN_PEND;
40987c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
40997c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER)
41007c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
41017c478bd9Sstevel@tonic-gate 			    &qp->hca->srv_conn_list);
41027c478bd9Sstevel@tonic-gate 		else
41037c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
41047c478bd9Sstevel@tonic-gate 			    &qp->hca->cl_conn_list);
41057c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
41067c478bd9Sstevel@tonic-gate 	}
41077c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
41087c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41097c478bd9Sstevel@tonic-gate }
41107c478bd9Sstevel@tonic-gate 
41117c478bd9Sstevel@tonic-gate /*
41127c478bd9Sstevel@tonic-gate  * Add at front of list
41137c478bd9Sstevel@tonic-gate  */
41147c478bd9Sstevel@tonic-gate static struct rdma_done_list *
41157c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
41167c478bd9Sstevel@tonic-gate {
41177c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
41187c478bd9Sstevel@tonic-gate 
41197c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
41207c478bd9Sstevel@tonic-gate 
41217c478bd9Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
41227c478bd9Sstevel@tonic-gate 	rd->xid = xid;
41237c478bd9Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
41247c478bd9Sstevel@tonic-gate 
41257c478bd9Sstevel@tonic-gate 	rd->prev = NULL;
41267c478bd9Sstevel@tonic-gate 	rd->next = qp->rdlist;
41277c478bd9Sstevel@tonic-gate 	if (qp->rdlist != NULL)
41287c478bd9Sstevel@tonic-gate 		qp->rdlist->prev = rd;
41297c478bd9Sstevel@tonic-gate 	qp->rdlist = rd;
41307c478bd9Sstevel@tonic-gate 
41317c478bd9Sstevel@tonic-gate 	return (rd);
41327c478bd9Sstevel@tonic-gate }
41337c478bd9Sstevel@tonic-gate 
41347c478bd9Sstevel@tonic-gate static void
41357c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
41367c478bd9Sstevel@tonic-gate {
41377c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r;
41387c478bd9Sstevel@tonic-gate 
41397c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
41407c478bd9Sstevel@tonic-gate 
41417c478bd9Sstevel@tonic-gate 	r = rd->next;
41427c478bd9Sstevel@tonic-gate 	if (r != NULL) {
41437c478bd9Sstevel@tonic-gate 		r->prev = rd->prev;
41447c478bd9Sstevel@tonic-gate 	}
41457c478bd9Sstevel@tonic-gate 
41467c478bd9Sstevel@tonic-gate 	r = rd->prev;
41477c478bd9Sstevel@tonic-gate 	if (r != NULL) {
41487c478bd9Sstevel@tonic-gate 		r->next = rd->next;
41497c478bd9Sstevel@tonic-gate 	} else {
41507c478bd9Sstevel@tonic-gate 		qp->rdlist = rd->next;
41517c478bd9Sstevel@tonic-gate 	}
41527c478bd9Sstevel@tonic-gate 
41537c478bd9Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
41547c478bd9Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
41557c478bd9Sstevel@tonic-gate }
41567c478bd9Sstevel@tonic-gate 
41577c478bd9Sstevel@tonic-gate static void
41587c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
41597c478bd9Sstevel@tonic-gate {
41607c478bd9Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
41617c478bd9Sstevel@tonic-gate 
41627c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
41637c478bd9Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
41647c478bd9Sstevel@tonic-gate 		n = r->next;
41657c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, r);
41667c478bd9Sstevel@tonic-gate 	}
41677c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
41687c478bd9Sstevel@tonic-gate }
41697c478bd9Sstevel@tonic-gate 
41707c478bd9Sstevel@tonic-gate static void
41717c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
41727c478bd9Sstevel@tonic-gate {
41737c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
41747c478bd9Sstevel@tonic-gate 
41757c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
41767c478bd9Sstevel@tonic-gate 
41777c478bd9Sstevel@tonic-gate 	while (r) {
41787c478bd9Sstevel@tonic-gate 		if (r->xid == xid) {
41797c478bd9Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
41807c478bd9Sstevel@tonic-gate 			return;
41817c478bd9Sstevel@tonic-gate 		} else {
41827c478bd9Sstevel@tonic-gate 			r = r->next;
41837c478bd9Sstevel@tonic-gate 		}
41847c478bd9Sstevel@tonic-gate 	}
4185*0a701b1eSRobert Gordon 	DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid,
4186*0a701b1eSRobert Gordon 	    int, xid);
41877c478bd9Sstevel@tonic-gate }
41887c478bd9Sstevel@tonic-gate 
41897c478bd9Sstevel@tonic-gate 
41907c478bd9Sstevel@tonic-gate /*
41917c478bd9Sstevel@tonic-gate  * Goes through all connections and closes the channel
41927c478bd9Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
41937c478bd9Sstevel@tonic-gate  * flushed.
41947c478bd9Sstevel@tonic-gate  */
41957c478bd9Sstevel@tonic-gate static void
41967c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
41977c478bd9Sstevel@tonic-gate {
41987c478bd9Sstevel@tonic-gate 	CONN 		*conn;
41997c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
42007c478bd9Sstevel@tonic-gate 
42017c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
42027c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
42037c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
42047c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
42057c478bd9Sstevel@tonic-gate 		qp = ctoqp(conn);
4206*0a701b1eSRobert Gordon 		if (conn->c_state == C_CONNECTED) {
42077c478bd9Sstevel@tonic-gate 			/*
42087c478bd9Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
42097c478bd9Sstevel@tonic-gate 			 * Call ibt_close_rc_channel in nonblocking mode
42107c478bd9Sstevel@tonic-gate 			 * with no callbacks.
42117c478bd9Sstevel@tonic-gate 			 */
4212*0a701b1eSRobert Gordon 			conn->c_state = C_ERROR_CONN;
42137c478bd9Sstevel@tonic-gate 			(void) ibt_close_rc_channel(qp->qp_hdl,
42147c478bd9Sstevel@tonic-gate 			    IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 0);
42157c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
42167c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
42177c478bd9Sstevel@tonic-gate 		} else {
4218*0a701b1eSRobert Gordon 			if (conn->c_state == C_ERROR_CONN &&
42197c478bd9Sstevel@tonic-gate 			    qp->qp_hdl != NULL) {
42207c478bd9Sstevel@tonic-gate 				/*
42217c478bd9Sstevel@tonic-gate 				 * Connection in ERROR state but
42227c478bd9Sstevel@tonic-gate 				 * channel is not yet freed.
42237c478bd9Sstevel@tonic-gate 				 */
42247c478bd9Sstevel@tonic-gate 				(void) ibt_close_rc_channel(qp->qp_hdl,
42257c478bd9Sstevel@tonic-gate 				    IBT_NOCALLBACKS, NULL, 0, NULL,
42267c478bd9Sstevel@tonic-gate 				    NULL, 0);
42277c478bd9Sstevel@tonic-gate 				(void) ibt_free_channel(qp->qp_hdl);
42287c478bd9Sstevel@tonic-gate 				qp->qp_hdl = NULL;
42297c478bd9Sstevel@tonic-gate 			}
42307c478bd9Sstevel@tonic-gate 		}
42317c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
42327c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
42337c478bd9Sstevel@tonic-gate 	}
42347c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
42357c478bd9Sstevel@tonic-gate }
42367c478bd9Sstevel@tonic-gate 
42377c478bd9Sstevel@tonic-gate /*
42387c478bd9Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
42397c478bd9Sstevel@tonic-gate  */
42407c478bd9Sstevel@tonic-gate static void
42417c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
42427c478bd9Sstevel@tonic-gate {
42437c478bd9Sstevel@tonic-gate 	CONN 		*conn;
42447c478bd9Sstevel@tonic-gate 
42457c478bd9Sstevel@tonic-gate top:
42467c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
42477c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
42487c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
42497c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
42507c478bd9Sstevel@tonic-gate 
42517c478bd9Sstevel@tonic-gate 		/*
42527c478bd9Sstevel@tonic-gate 		 * At this point connection is either in ERROR
42537c478bd9Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
42547c478bd9Sstevel@tonic-gate 		 * then some other thread is culling that connection.
42557c478bd9Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
42567c478bd9Sstevel@tonic-gate 		 */
42577c478bd9Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
42587c478bd9Sstevel@tonic-gate 		    conn->c_state != C_DISCONN_PEND) {
42597c478bd9Sstevel@tonic-gate 			/*
42607c478bd9Sstevel@tonic-gate 			 * Cull the connection
42617c478bd9Sstevel@tonic-gate 			 */
42627c478bd9Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
42637c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
42647c478bd9Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
42657c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
42667c478bd9Sstevel@tonic-gate 			goto top;
42677c478bd9Sstevel@tonic-gate 		} else {
42687c478bd9Sstevel@tonic-gate 			/*
42697c478bd9Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
42707c478bd9Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
42717c478bd9Sstevel@tonic-gate 			 */
42727c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
42737c478bd9Sstevel@tonic-gate 		}
42747c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
42757c478bd9Sstevel@tonic-gate 	}
42767c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
42777c478bd9Sstevel@tonic-gate 
42787c478bd9Sstevel@tonic-gate 	/*
42797c478bd9Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
42807c478bd9Sstevel@tonic-gate 	 */
42817c478bd9Sstevel@tonic-gate }
42827c478bd9Sstevel@tonic-gate 
42837c478bd9Sstevel@tonic-gate /*
42847c478bd9Sstevel@tonic-gate  * Cleans and closes up all uses of the HCA
42857c478bd9Sstevel@tonic-gate  */
42867c478bd9Sstevel@tonic-gate static void
42877c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca)
42887c478bd9Sstevel@tonic-gate {
42897c478bd9Sstevel@tonic-gate 
42907c478bd9Sstevel@tonic-gate 	/*
42917c478bd9Sstevel@tonic-gate 	 * Stop all services on the HCA
42927c478bd9Sstevel@tonic-gate 	 * Go through cl_conn_list and close all rc_channels
42937c478bd9Sstevel@tonic-gate 	 * Go through svr_conn_list and close all rc_channels
42947c478bd9Sstevel@tonic-gate 	 * Free connections whose c_ref has dropped to 0
42957c478bd9Sstevel@tonic-gate 	 * Destroy all CQs
42967c478bd9Sstevel@tonic-gate 	 * Deregister and released all buffer pool memory after all
42977c478bd9Sstevel@tonic-gate 	 * connections are destroyed
42987c478bd9Sstevel@tonic-gate 	 * Free the protection domain
42997c478bd9Sstevel@tonic-gate 	 * ibt_close_hca()
43007c478bd9Sstevel@tonic-gate 	 */
43017c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_WRITER);
43027c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_DETACHED) {
43037c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
43047c478bd9Sstevel@tonic-gate 		return;
43057c478bd9Sstevel@tonic-gate 	}
43067c478bd9Sstevel@tonic-gate 
43077c478bd9Sstevel@tonic-gate 	hca->state = HCA_DETACHED;
43087c478bd9Sstevel@tonic-gate 	rib_stat->nhca_inited--;
43097c478bd9Sstevel@tonic-gate 
43107c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
43117c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
43127c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
43137c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
43147c478bd9Sstevel@tonic-gate 
43157c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->cl_conn_list);
43167c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->srv_conn_list);
43177c478bd9Sstevel@tonic-gate 
43187c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
43197c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
43207c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
43217c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
43227c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
43237c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
43247c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
43257c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
43267c478bd9Sstevel@tonic-gate 
43277c478bd9Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
43287c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
43297c478bd9Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
43307c478bd9Sstevel@tonic-gate 	    hca->cl_conn_list.conn_hd == NULL) {
43317c478bd9Sstevel@tonic-gate 		/*
43327c478bd9Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
43337c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
43347c478bd9Sstevel@tonic-gate 		 */
43357c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
43367c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
4337*0a701b1eSRobert Gordon 		rib_destroy_cache(hca);
43387c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
43397c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
43407c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
43417c478bd9Sstevel@tonic-gate 	}
43427c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
43437c478bd9Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
43447c478bd9Sstevel@tonic-gate 
43457c478bd9Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
43467c478bd9Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
43477c478bd9Sstevel@tonic-gate 		while (hca->inuse)
43487c478bd9Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
43497c478bd9Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
43507c478bd9Sstevel@tonic-gate 		/*
43517c478bd9Sstevel@tonic-gate 		 * conn_lists are now NULL, so destroy
43527c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
43537c478bd9Sstevel@tonic-gate 		 */
43547c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
43557c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
43567c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
43577c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
43587c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
43597c478bd9Sstevel@tonic-gate 	}
43607c478bd9Sstevel@tonic-gate }
4361*0a701b1eSRobert Gordon 
4362*0a701b1eSRobert Gordon static void
4363*0a701b1eSRobert Gordon rib_server_side_cache_reclaim(void *argp)
4364*0a701b1eSRobert Gordon {
4365*0a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
4366*0a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
4367*0a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
4368*0a701b1eSRobert Gordon 
4369*0a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
4370*0a701b1eSRobert Gordon 	rcas = avl_first(&hca->avl_tree);
4371*0a701b1eSRobert Gordon 	if (rcas != NULL)
4372*0a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
4373*0a701b1eSRobert Gordon 
4374*0a701b1eSRobert Gordon 	while (rcas != NULL) {
4375*0a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
4376*0a701b1eSRobert Gordon 			rcas->elements--;
4377*0a701b1eSRobert Gordon 			rib_total_buffers --;
4378*0a701b1eSRobert Gordon 			rb = rcas->r.forw;
4379*0a701b1eSRobert Gordon 			remque(rb);
4380*0a701b1eSRobert Gordon 			if (rb->registered)
4381*0a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
4382*0a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
4383*0a701b1eSRobert Gordon 			cache_allocation -= rb->lrc_len;
4384*0a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
4385*0a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
4386*0a701b1eSRobert Gordon 		}
4387*0a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
4388*0a701b1eSRobert Gordon 		kmem_cache_free(hca->server_side_cache, rcas);
4389*0a701b1eSRobert Gordon 		rcas = avl_first(&hca->avl_tree);
4390*0a701b1eSRobert Gordon 		if (rcas != NULL)
4391*0a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
4392*0a701b1eSRobert Gordon 	}
4393*0a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
4394*0a701b1eSRobert Gordon }
4395*0a701b1eSRobert Gordon 
4396*0a701b1eSRobert Gordon static void
4397*0a701b1eSRobert Gordon rib_server_side_cache_cleanup(void *argp)
4398*0a701b1eSRobert Gordon {
4399*0a701b1eSRobert Gordon 	cache_avl_struct_t    *rcas;
4400*0a701b1eSRobert Gordon 	rib_lrc_entry_t		*rb;
4401*0a701b1eSRobert Gordon 	rib_hca_t *hca = (rib_hca_t *)argp;
4402*0a701b1eSRobert Gordon 
4403*0a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
4404*0a701b1eSRobert Gordon 	if (cache_allocation < cache_limit) {
4405*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4406*0a701b1eSRobert Gordon 		return;
4407*0a701b1eSRobert Gordon 	}
4408*0a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
4409*0a701b1eSRobert Gordon 
4410*0a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
4411*0a701b1eSRobert Gordon 	rcas = avl_last(&hca->avl_tree);
4412*0a701b1eSRobert Gordon 	if (rcas != NULL)
4413*0a701b1eSRobert Gordon 		avl_remove(&hca->avl_tree, rcas);
4414*0a701b1eSRobert Gordon 
4415*0a701b1eSRobert Gordon 	while (rcas != NULL) {
4416*0a701b1eSRobert Gordon 		while (rcas->r.forw != &rcas->r) {
4417*0a701b1eSRobert Gordon 			rcas->elements--;
4418*0a701b1eSRobert Gordon 			rib_total_buffers --;
4419*0a701b1eSRobert Gordon 			rb = rcas->r.forw;
4420*0a701b1eSRobert Gordon 			remque(rb);
4421*0a701b1eSRobert Gordon 			if (rb->registered)
4422*0a701b1eSRobert Gordon 				(void) rib_deregistermem_via_hca(hca,
4423*0a701b1eSRobert Gordon 				    rb->lrc_buf, rb->lrc_mhandle);
4424*0a701b1eSRobert Gordon 			cache_allocation -= rb->lrc_len;
4425*0a701b1eSRobert Gordon 			kmem_free(rb->lrc_buf, rb->lrc_len);
4426*0a701b1eSRobert Gordon 			kmem_free(rb, sizeof (rib_lrc_entry_t));
4427*0a701b1eSRobert Gordon 		}
4428*0a701b1eSRobert Gordon 		mutex_destroy(&rcas->node_lock);
4429*0a701b1eSRobert Gordon 		kmem_cache_free(hca->server_side_cache, rcas);
4430*0a701b1eSRobert Gordon 		if ((cache_allocation) < cache_limit) {
4431*0a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
4432*0a701b1eSRobert Gordon 			return;
4433*0a701b1eSRobert Gordon 		}
4434*0a701b1eSRobert Gordon 
4435*0a701b1eSRobert Gordon 		rcas = avl_last(&hca->avl_tree);
4436*0a701b1eSRobert Gordon 		if (rcas != NULL)
4437*0a701b1eSRobert Gordon 			avl_remove(&hca->avl_tree, rcas);
4438*0a701b1eSRobert Gordon 	}
4439*0a701b1eSRobert Gordon 	rw_exit(&hca->avl_rw_lock);
4440*0a701b1eSRobert Gordon }
4441*0a701b1eSRobert Gordon 
4442*0a701b1eSRobert Gordon static int
4443*0a701b1eSRobert Gordon avl_compare(const void *t1, const void *t2)
4444*0a701b1eSRobert Gordon {
4445*0a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
4446*0a701b1eSRobert Gordon 		return (0);
4447*0a701b1eSRobert Gordon 
4448*0a701b1eSRobert Gordon 	if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
4449*0a701b1eSRobert Gordon 		return (-1);
4450*0a701b1eSRobert Gordon 
4451*0a701b1eSRobert Gordon 	return (1);
4452*0a701b1eSRobert Gordon }
4453*0a701b1eSRobert Gordon 
4454*0a701b1eSRobert Gordon static void
4455*0a701b1eSRobert Gordon rib_destroy_cache(rib_hca_t *hca)
4456*0a701b1eSRobert Gordon {
4457*0a701b1eSRobert Gordon 	if (hca->reg_cache_clean_up != NULL) {
4458*0a701b1eSRobert Gordon 		ddi_taskq_destroy(hca->reg_cache_clean_up);
4459*0a701b1eSRobert Gordon 		hca->reg_cache_clean_up = NULL;
4460*0a701b1eSRobert Gordon 	}
4461*0a701b1eSRobert Gordon 	if (!hca->avl_init) {
4462*0a701b1eSRobert Gordon 		kmem_cache_destroy(hca->server_side_cache);
4463*0a701b1eSRobert Gordon 		avl_destroy(&hca->avl_tree);
4464*0a701b1eSRobert Gordon 		mutex_destroy(&hca->cache_allocation);
4465*0a701b1eSRobert Gordon 		rw_destroy(&hca->avl_rw_lock);
4466*0a701b1eSRobert Gordon 	}
4467*0a701b1eSRobert Gordon 	hca->avl_init = FALSE;
4468*0a701b1eSRobert Gordon }
4469*0a701b1eSRobert Gordon 
4470*0a701b1eSRobert Gordon static void
4471*0a701b1eSRobert Gordon rib_force_cleanup(void *hca)
4472*0a701b1eSRobert Gordon {
4473*0a701b1eSRobert Gordon 	if (((rib_hca_t *)hca)->reg_cache_clean_up != NULL)
4474*0a701b1eSRobert Gordon 		(void) ddi_taskq_dispatch(
4475*0a701b1eSRobert Gordon 		    ((rib_hca_t *)hca)->reg_cache_clean_up,
4476*0a701b1eSRobert Gordon 		    rib_server_side_cache_cleanup,
4477*0a701b1eSRobert Gordon 		    (void *)hca, DDI_NOSLEEP);
4478*0a701b1eSRobert Gordon }
4479*0a701b1eSRobert Gordon 
4480*0a701b1eSRobert Gordon static rib_lrc_entry_t *
4481*0a701b1eSRobert Gordon rib_get_cache_buf(CONN *conn, uint32_t len)
4482*0a701b1eSRobert Gordon {
4483*0a701b1eSRobert Gordon 	cache_avl_struct_t	cas, *rcas;
4484*0a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
4485*0a701b1eSRobert Gordon 	rib_lrc_entry_t *reply_buf;
4486*0a701b1eSRobert Gordon 	avl_index_t where = NULL;
4487*0a701b1eSRobert Gordon 	uint64_t c_alloc = 0;
4488*0a701b1eSRobert Gordon 
4489*0a701b1eSRobert Gordon 	if (!hca->avl_init)
4490*0a701b1eSRobert Gordon 		goto  error_alloc;
4491*0a701b1eSRobert Gordon 
4492*0a701b1eSRobert Gordon 	cas.len = len;
4493*0a701b1eSRobert Gordon 
4494*0a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
4495*0a701b1eSRobert Gordon 
4496*0a701b1eSRobert Gordon 	mutex_enter(&hca->cache_allocation);
4497*0a701b1eSRobert Gordon 	c_alloc = cache_allocation;
4498*0a701b1eSRobert Gordon 	mutex_exit(&hca->cache_allocation);
4499*0a701b1eSRobert Gordon 
4500*0a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas,
4501*0a701b1eSRobert Gordon 	    &where)) == NULL) {
4502*0a701b1eSRobert Gordon 		/* Am I above the cache limit */
4503*0a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
4504*0a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
4505*0a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
4506*0a701b1eSRobert Gordon 			cache_misses_above_the_limit ++;
4507*0a701b1eSRobert Gordon 
4508*0a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
4509*0a701b1eSRobert Gordon 			goto error_alloc;
4510*0a701b1eSRobert Gordon 		}
4511*0a701b1eSRobert Gordon 
4512*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4513*0a701b1eSRobert Gordon 		rw_enter(&hca->avl_rw_lock, RW_WRITER);
4514*0a701b1eSRobert Gordon 
4515*0a701b1eSRobert Gordon 		/* Recheck to make sure no other thread added the entry in */
4516*0a701b1eSRobert Gordon 		if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,
4517*0a701b1eSRobert Gordon 		    &cas, &where)) == NULL) {
4518*0a701b1eSRobert Gordon 			/* Allocate an avl tree entry */
4519*0a701b1eSRobert Gordon 			rcas = (cache_avl_struct_t *)
4520*0a701b1eSRobert Gordon 			    kmem_cache_alloc(hca->server_side_cache, KM_SLEEP);
4521*0a701b1eSRobert Gordon 
4522*0a701b1eSRobert Gordon 			bzero(rcas, sizeof (cache_avl_struct_t));
4523*0a701b1eSRobert Gordon 			rcas->elements = 0;
4524*0a701b1eSRobert Gordon 			rcas->r.forw = &rcas->r;
4525*0a701b1eSRobert Gordon 			rcas->r.back = &rcas->r;
4526*0a701b1eSRobert Gordon 			rcas->len = len;
4527*0a701b1eSRobert Gordon 			mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
4528*0a701b1eSRobert Gordon 			avl_insert(&hca->avl_tree, rcas, where);
4529*0a701b1eSRobert Gordon 		}
4530*0a701b1eSRobert Gordon 	}
4531*0a701b1eSRobert Gordon 
4532*0a701b1eSRobert Gordon 	mutex_enter(&rcas->node_lock);
4533*0a701b1eSRobert Gordon 
4534*0a701b1eSRobert Gordon 	if (rcas->r.forw != &rcas->r && rcas->elements > 0) {
4535*0a701b1eSRobert Gordon 		rib_total_buffers--;
4536*0a701b1eSRobert Gordon 		cache_hits++;
4537*0a701b1eSRobert Gordon 		reply_buf = rcas->r.forw;
4538*0a701b1eSRobert Gordon 		remque(reply_buf);
4539*0a701b1eSRobert Gordon 		rcas->elements--;
4540*0a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
4541*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4542*0a701b1eSRobert Gordon 		mutex_enter(&hca->cache_allocation);
4543*0a701b1eSRobert Gordon 		cache_allocation -= len;
4544*0a701b1eSRobert Gordon 		mutex_exit(&hca->cache_allocation);
4545*0a701b1eSRobert Gordon 	} else {
4546*0a701b1eSRobert Gordon 		/* Am I above the cache limit */
4547*0a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
4548*0a701b1eSRobert Gordon 		if ((c_alloc + len) >= cache_limit) {
4549*0a701b1eSRobert Gordon 			rib_force_cleanup((void *)hca);
4550*0a701b1eSRobert Gordon 			rw_exit(&hca->avl_rw_lock);
4551*0a701b1eSRobert Gordon 			cache_misses_above_the_limit ++;
4552*0a701b1eSRobert Gordon 			/* Allocate and register the buffer directly */
4553*0a701b1eSRobert Gordon 			goto error_alloc;
4554*0a701b1eSRobert Gordon 		}
4555*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4556*0a701b1eSRobert Gordon 		cache_misses ++;
4557*0a701b1eSRobert Gordon 		/* Allocate a reply_buf entry */
4558*0a701b1eSRobert Gordon 		reply_buf = (rib_lrc_entry_t *)
4559*0a701b1eSRobert Gordon 		    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
4560*0a701b1eSRobert Gordon 		bzero(reply_buf, sizeof (rib_lrc_entry_t));
4561*0a701b1eSRobert Gordon 		reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
4562*0a701b1eSRobert Gordon 		reply_buf->lrc_len  = len;
4563*0a701b1eSRobert Gordon 		reply_buf->registered = FALSE;
4564*0a701b1eSRobert Gordon 		reply_buf->avl_node = (void *)rcas;
4565*0a701b1eSRobert Gordon 	}
4566*0a701b1eSRobert Gordon 
4567*0a701b1eSRobert Gordon 	return (reply_buf);
4568*0a701b1eSRobert Gordon 
4569*0a701b1eSRobert Gordon error_alloc:
4570*0a701b1eSRobert Gordon 	reply_buf = (rib_lrc_entry_t *)
4571*0a701b1eSRobert Gordon 	    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
4572*0a701b1eSRobert Gordon 	bzero(reply_buf, sizeof (rib_lrc_entry_t));
4573*0a701b1eSRobert Gordon 	reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
4574*0a701b1eSRobert Gordon 	reply_buf->lrc_len = len;
4575*0a701b1eSRobert Gordon 	reply_buf->registered = FALSE;
4576*0a701b1eSRobert Gordon 	reply_buf->avl_node = NULL;
4577*0a701b1eSRobert Gordon 
4578*0a701b1eSRobert Gordon 	return (reply_buf);
4579*0a701b1eSRobert Gordon }
4580*0a701b1eSRobert Gordon 
4581*0a701b1eSRobert Gordon /*
4582*0a701b1eSRobert Gordon  * Return a pre-registered back to the cache (without
4583*0a701b1eSRobert Gordon  * unregistering the buffer)..
4584*0a701b1eSRobert Gordon  */
4585*0a701b1eSRobert Gordon 
4586*0a701b1eSRobert Gordon static void
4587*0a701b1eSRobert Gordon rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
4588*0a701b1eSRobert Gordon {
4589*0a701b1eSRobert Gordon 	cache_avl_struct_t    cas, *rcas;
4590*0a701b1eSRobert Gordon 	avl_index_t where = NULL;
4591*0a701b1eSRobert Gordon 	rib_hca_t	*hca = (ctoqp(conn))->hca;
4592*0a701b1eSRobert Gordon 
4593*0a701b1eSRobert Gordon 	if (!hca->avl_init)
4594*0a701b1eSRobert Gordon 		goto  error_free;
4595*0a701b1eSRobert Gordon 
4596*0a701b1eSRobert Gordon 	cas.len = reg_buf->lrc_len;
4597*0a701b1eSRobert Gordon 	rw_enter(&hca->avl_rw_lock, RW_READER);
4598*0a701b1eSRobert Gordon 	if ((rcas = (cache_avl_struct_t *)
4599*0a701b1eSRobert Gordon 	    avl_find(&hca->avl_tree, &cas, &where)) == NULL) {
4600*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4601*0a701b1eSRobert Gordon 		goto error_free;
4602*0a701b1eSRobert Gordon 	} else {
4603*0a701b1eSRobert Gordon 		rib_total_buffers ++;
4604*0a701b1eSRobert Gordon 		cas.len = reg_buf->lrc_len;
4605*0a701b1eSRobert Gordon 		mutex_enter(&rcas->node_lock);
4606*0a701b1eSRobert Gordon 		insque(reg_buf, &rcas->r);
4607*0a701b1eSRobert Gordon 		rcas->elements ++;
4608*0a701b1eSRobert Gordon 		mutex_exit(&rcas->node_lock);
4609*0a701b1eSRobert Gordon 		rw_exit(&hca->avl_rw_lock);
4610*0a701b1eSRobert Gordon 		mutex_enter(&hca->cache_allocation);
4611*0a701b1eSRobert Gordon 		cache_allocation += cas.len;
4612*0a701b1eSRobert Gordon 		mutex_exit(&hca->cache_allocation);
4613*0a701b1eSRobert Gordon 	}
4614*0a701b1eSRobert Gordon 
4615*0a701b1eSRobert Gordon 	return;
4616*0a701b1eSRobert Gordon 
4617*0a701b1eSRobert Gordon error_free:
4618*0a701b1eSRobert Gordon 
4619*0a701b1eSRobert Gordon 	if (reg_buf->registered)
4620*0a701b1eSRobert Gordon 		(void) rib_deregistermem_via_hca(hca,
4621*0a701b1eSRobert Gordon 		    reg_buf->lrc_buf, reg_buf->lrc_mhandle);
4622*0a701b1eSRobert Gordon 	kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len);
4623*0a701b1eSRobert Gordon 	kmem_free(reg_buf, sizeof (rib_lrc_entry_t));
4624*0a701b1eSRobert Gordon }
4625*0a701b1eSRobert Gordon 
4626*0a701b1eSRobert Gordon static rdma_stat
4627*0a701b1eSRobert Gordon rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
4628*0a701b1eSRobert Gordon 	uint_t buflen, struct mrc *buf_handle)
4629*0a701b1eSRobert Gordon {
4630*0a701b1eSRobert Gordon 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
4631*0a701b1eSRobert Gordon 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
4632*0a701b1eSRobert Gordon 	rdma_stat	status;
4633*0a701b1eSRobert Gordon 
4634*0a701b1eSRobert Gordon 
4635*0a701b1eSRobert Gordon 	/*
4636*0a701b1eSRobert Gordon 	 * Note: ALL buffer pools use the same memory type RDMARW.
4637*0a701b1eSRobert Gordon 	 */
4638*0a701b1eSRobert Gordon 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
4639*0a701b1eSRobert Gordon 	if (status == RDMA_SUCCESS) {
4640*0a701b1eSRobert Gordon 		buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl;
4641*0a701b1eSRobert Gordon 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
4642*0a701b1eSRobert Gordon 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
4643*0a701b1eSRobert Gordon 	} else {
4644*0a701b1eSRobert Gordon 		buf_handle->mrc_linfo = NULL;
4645*0a701b1eSRobert Gordon 		buf_handle->mrc_lmr = 0;
4646*0a701b1eSRobert Gordon 		buf_handle->mrc_rmr = 0;
4647*0a701b1eSRobert Gordon 	}
4648*0a701b1eSRobert Gordon 	return (status);
4649*0a701b1eSRobert Gordon }
4650*0a701b1eSRobert Gordon 
4651*0a701b1eSRobert Gordon /* ARGSUSED */
4652*0a701b1eSRobert Gordon static rdma_stat
4653*0a701b1eSRobert Gordon rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
4654*0a701b1eSRobert Gordon     struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
4655*0a701b1eSRobert Gordon {
4656*0a701b1eSRobert Gordon 
4657*0a701b1eSRobert Gordon 	(void) rib_deregistermem_via_hca(hca, buf, buf_handle);
4658*0a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
4659*0a701b1eSRobert Gordon }
4660*0a701b1eSRobert Gordon 
4661*0a701b1eSRobert Gordon /* ARGSUSED */
4662*0a701b1eSRobert Gordon static rdma_stat
4663*0a701b1eSRobert Gordon rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
4664*0a701b1eSRobert Gordon {
4665*0a701b1eSRobert Gordon 
4666*0a701b1eSRobert Gordon 	(void) ibt_deregister_mr(hca->hca_hdl,
4667*0a701b1eSRobert Gordon 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
4668*0a701b1eSRobert Gordon 	return (RDMA_SUCCESS);
4669*0a701b1eSRobert Gordon }
4670*0a701b1eSRobert Gordon 
4671*0a701b1eSRobert Gordon 
4672*0a701b1eSRobert Gordon /*
4673*0a701b1eSRobert Gordon  * Return 0 if the interface is IB.
4674*0a701b1eSRobert Gordon  * Return error (>0) if any error is encountered during processing.
4675*0a701b1eSRobert Gordon  * Return -1 if the interface is not IB and no error.
4676*0a701b1eSRobert Gordon  */
4677*0a701b1eSRobert Gordon #define	isalpha(ch)	(((ch) >= 'a' && (ch) <= 'z') || \
4678*0a701b1eSRobert Gordon 			((ch) >= 'A' && (ch) <= 'Z'))
4679*0a701b1eSRobert Gordon static int
4680*0a701b1eSRobert Gordon rpcib_is_ib_interface(char *name)
4681*0a701b1eSRobert Gordon {
4682*0a701b1eSRobert Gordon 
4683*0a701b1eSRobert Gordon 	char	dev_path[MAXPATHLEN];
4684*0a701b1eSRobert Gordon 	char	devname[MAXNAMELEN];
4685*0a701b1eSRobert Gordon 	ldi_handle_t	lh;
4686*0a701b1eSRobert Gordon 	dl_info_ack_t	info;
4687*0a701b1eSRobert Gordon 	int	ret = 0;
4688*0a701b1eSRobert Gordon 	int	i;
4689*0a701b1eSRobert Gordon 
4690*0a701b1eSRobert Gordon 	/*
4691*0a701b1eSRobert Gordon 	 * ibd devices are only style 2 devices
4692*0a701b1eSRobert Gordon 	 * so we will open only style 2 devices
4693*0a701b1eSRobert Gordon 	 * by ignoring the ppa
4694*0a701b1eSRobert Gordon 	 */
4695*0a701b1eSRobert Gordon 
4696*0a701b1eSRobert Gordon 	i = strlen(name) - 1;
4697*0a701b1eSRobert Gordon 	while ((i >= 0) && (!isalpha(name[i]))) i--;
4698*0a701b1eSRobert Gordon 
4699*0a701b1eSRobert Gordon 	if (i < 0) {
4700*0a701b1eSRobert Gordon 		/* Invalid interface name, no alphabet */
4701*0a701b1eSRobert Gordon 		return (-1);
4702*0a701b1eSRobert Gordon 	}
4703*0a701b1eSRobert Gordon 
4704*0a701b1eSRobert Gordon 	(void) strncpy(devname, name, i + 1);
4705*0a701b1eSRobert Gordon 	devname[i + 1] = '\0';
4706*0a701b1eSRobert Gordon 
4707*0a701b1eSRobert Gordon 	if (strcmp("lo", devname) == 0) {
4708*0a701b1eSRobert Gordon 		/*
4709*0a701b1eSRobert Gordon 		 * loopback interface  not rpc/rdma capable
4710*0a701b1eSRobert Gordon 		 */
4711*0a701b1eSRobert Gordon 		return (-1);
4712*0a701b1eSRobert Gordon 	}
4713*0a701b1eSRobert Gordon 
4714*0a701b1eSRobert Gordon 	(void) strncpy(dev_path, "/dev/", MAXPATHLEN);
4715*0a701b1eSRobert Gordon 	if (strlcat(dev_path, devname, MAXPATHLEN) >= MAXPATHLEN) {
4716*0a701b1eSRobert Gordon 		/* string overflow */
4717*0a701b1eSRobert Gordon 		return (-1);
4718*0a701b1eSRobert Gordon 	}
4719*0a701b1eSRobert Gordon 
4720*0a701b1eSRobert Gordon 	ret = ldi_open_by_name(dev_path, FREAD|FWRITE, kcred, &lh, rpcib_li);
4721*0a701b1eSRobert Gordon 	if (ret != 0) {
4722*0a701b1eSRobert Gordon 		return (ret);
4723*0a701b1eSRobert Gordon 	}
4724*0a701b1eSRobert Gordon 	ret = rpcib_dl_info(lh, &info);
4725*0a701b1eSRobert Gordon 	(void) ldi_close(lh, FREAD|FWRITE, kcred);
4726*0a701b1eSRobert Gordon 	if (ret != 0) {
4727*0a701b1eSRobert Gordon 		return (ret);
4728*0a701b1eSRobert Gordon 	}
4729*0a701b1eSRobert Gordon 
4730*0a701b1eSRobert Gordon 	if (info.dl_mac_type != DL_IB) {
4731*0a701b1eSRobert Gordon 		return (-1);
4732*0a701b1eSRobert Gordon 	}
4733*0a701b1eSRobert Gordon 
4734*0a701b1eSRobert Gordon 	return (0);
4735*0a701b1eSRobert Gordon }
4736*0a701b1eSRobert Gordon 
4737*0a701b1eSRobert Gordon static int
4738*0a701b1eSRobert Gordon rpcib_dl_info(ldi_handle_t lh, dl_info_ack_t *info)
4739*0a701b1eSRobert Gordon {
4740*0a701b1eSRobert Gordon 	dl_info_req_t *info_req;
4741*0a701b1eSRobert Gordon 	union DL_primitives *dl_prim;
4742*0a701b1eSRobert Gordon 	mblk_t *mp;
4743*0a701b1eSRobert Gordon 	k_sigset_t smask;
4744*0a701b1eSRobert Gordon 	int error;
4745*0a701b1eSRobert Gordon 
4746*0a701b1eSRobert Gordon 	if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) {
4747*0a701b1eSRobert Gordon 		return (ENOMEM);
4748*0a701b1eSRobert Gordon 	}
4749*0a701b1eSRobert Gordon 
4750*0a701b1eSRobert Gordon 	mp->b_datap->db_type = M_PROTO;
4751*0a701b1eSRobert Gordon 
4752*0a701b1eSRobert Gordon 	info_req = (dl_info_req_t *)(uintptr_t)mp->b_wptr;
4753*0a701b1eSRobert Gordon 	mp->b_wptr += sizeof (dl_info_req_t);
4754*0a701b1eSRobert Gordon 	info_req->dl_primitive = DL_INFO_REQ;
4755*0a701b1eSRobert Gordon 
4756*0a701b1eSRobert Gordon 	sigintr(&smask, 0);
4757*0a701b1eSRobert Gordon 	if ((error = ldi_putmsg(lh, mp)) != 0) {
4758*0a701b1eSRobert Gordon 		sigunintr(&smask);
4759*0a701b1eSRobert Gordon 		return (error);
4760*0a701b1eSRobert Gordon 	}
4761*0a701b1eSRobert Gordon 	if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) {
4762*0a701b1eSRobert Gordon 		sigunintr(&smask);
4763*0a701b1eSRobert Gordon 		return (error);
4764*0a701b1eSRobert Gordon 	}
4765*0a701b1eSRobert Gordon 	sigunintr(&smask);
4766*0a701b1eSRobert Gordon 
4767*0a701b1eSRobert Gordon 	dl_prim = (union DL_primitives *)(uintptr_t)mp->b_rptr;
4768*0a701b1eSRobert Gordon 	switch (dl_prim->dl_primitive) {
4769*0a701b1eSRobert Gordon 		case DL_INFO_ACK:
4770*0a701b1eSRobert Gordon 			if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) <
4771*0a701b1eSRobert Gordon 			    sizeof (dl_info_ack_t)) {
4772*0a701b1eSRobert Gordon 			error = -1;
4773*0a701b1eSRobert Gordon 			} else {
4774*0a701b1eSRobert Gordon 				*info = *(dl_info_ack_t *)(uintptr_t)mp->b_rptr;
4775*0a701b1eSRobert Gordon 				error = 0;
4776*0a701b1eSRobert Gordon 			}
4777*0a701b1eSRobert Gordon 			break;
4778*0a701b1eSRobert Gordon 		default:
4779*0a701b1eSRobert Gordon 			error = -1;
4780*0a701b1eSRobert Gordon 			break;
4781*0a701b1eSRobert Gordon 	}
4782*0a701b1eSRobert Gordon 
4783*0a701b1eSRobert Gordon 	freemsg(mp);
4784*0a701b1eSRobert Gordon 	return (error);
4785*0a701b1eSRobert Gordon }
4786*0a701b1eSRobert Gordon static int
4787*0a701b1eSRobert Gordon rpcib_do_ip_ioctl(int cmd, int len, caddr_t arg)
4788*0a701b1eSRobert Gordon {
4789*0a701b1eSRobert Gordon 	vnode_t *kvp, *vp;
4790*0a701b1eSRobert Gordon 	TIUSER  *tiptr;
4791*0a701b1eSRobert Gordon 	struct  strioctl iocb;
4792*0a701b1eSRobert Gordon 	k_sigset_t smask;
4793*0a701b1eSRobert Gordon 	int	err = 0;
4794*0a701b1eSRobert Gordon 
4795*0a701b1eSRobert Gordon 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP,
4796*0a701b1eSRobert Gordon 	    &kvp) == 0) {
4797*0a701b1eSRobert Gordon 		if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
4798*0a701b1eSRobert Gordon 		    &tiptr, CRED()) == 0) {
4799*0a701b1eSRobert Gordon 		vp = tiptr->fp->f_vnode;
4800*0a701b1eSRobert Gordon 	} else {
4801*0a701b1eSRobert Gordon 		VN_RELE(kvp);
4802*0a701b1eSRobert Gordon 		return (EPROTO);
4803*0a701b1eSRobert Gordon 		}
4804*0a701b1eSRobert Gordon 	} else {
4805*0a701b1eSRobert Gordon 			return (EPROTO);
4806*0a701b1eSRobert Gordon 	}
4807*0a701b1eSRobert Gordon 
4808*0a701b1eSRobert Gordon 	iocb.ic_cmd = cmd;
4809*0a701b1eSRobert Gordon 	iocb.ic_timout = 0;
4810*0a701b1eSRobert Gordon 	iocb.ic_len = len;
4811*0a701b1eSRobert Gordon 	iocb.ic_dp = arg;
4812*0a701b1eSRobert Gordon 	sigintr(&smask, 0);
4813*0a701b1eSRobert Gordon 	err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
4814*0a701b1eSRobert Gordon 	sigunintr(&smask);
4815*0a701b1eSRobert Gordon 	(void) t_kclose(tiptr, 0);
4816*0a701b1eSRobert Gordon 	VN_RELE(kvp);
4817*0a701b1eSRobert Gordon 	return (err);
4818*0a701b1eSRobert Gordon }
4819*0a701b1eSRobert Gordon 
4820*0a701b1eSRobert Gordon static uint_t rpcib_get_number_interfaces(void) {
4821*0a701b1eSRobert Gordon uint_t	numifs;
4822*0a701b1eSRobert Gordon 	if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (uint_t), (caddr_t)&numifs)) {
4823*0a701b1eSRobert Gordon 		return (0);
4824*0a701b1eSRobert Gordon 	}
4825*0a701b1eSRobert Gordon 	return (numifs);
4826*0a701b1eSRobert Gordon }
4827*0a701b1eSRobert Gordon 
4828*0a701b1eSRobert Gordon static boolean_t
4829*0a701b1eSRobert Gordon rpcib_get_ib_addresses(
4830*0a701b1eSRobert Gordon 	struct sockaddr_in *saddr4,
4831*0a701b1eSRobert Gordon 	struct sockaddr_in6 *saddr6,
4832*0a701b1eSRobert Gordon 	uint_t *number4,
4833*0a701b1eSRobert Gordon 	uint_t *number6)
4834*0a701b1eSRobert Gordon {
4835*0a701b1eSRobert Gordon 	int	numifs;
4836*0a701b1eSRobert Gordon 	struct	ifconf	kifc;
4837*0a701b1eSRobert Gordon 	struct  ifreq *ifr;
4838*0a701b1eSRobert Gordon 	boolean_t ret = B_FALSE;
4839*0a701b1eSRobert Gordon 
4840*0a701b1eSRobert Gordon 	*number4 = 0;
4841*0a701b1eSRobert Gordon 	*number6 = 0;
4842*0a701b1eSRobert Gordon 
4843*0a701b1eSRobert Gordon 	if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (int), (caddr_t)&numifs)) {
4844*0a701b1eSRobert Gordon 		return (ret);
4845*0a701b1eSRobert Gordon 	}
4846*0a701b1eSRobert Gordon 
4847*0a701b1eSRobert Gordon 	kifc.ifc_len = numifs * sizeof (struct ifreq);
4848*0a701b1eSRobert Gordon 	kifc.ifc_buf = kmem_zalloc(kifc.ifc_len, KM_SLEEP);
4849*0a701b1eSRobert Gordon 
4850*0a701b1eSRobert Gordon 	if (rpcib_do_ip_ioctl(SIOCGIFCONF, sizeof (struct ifconf),
4851*0a701b1eSRobert Gordon 	    (caddr_t)&kifc)) {
4852*0a701b1eSRobert Gordon 		goto done;
4853*0a701b1eSRobert Gordon 	}
4854*0a701b1eSRobert Gordon 
4855*0a701b1eSRobert Gordon 	ifr = kifc.ifc_req;
4856*0a701b1eSRobert Gordon 	for (numifs = kifc.ifc_len / sizeof (struct ifreq);
4857*0a701b1eSRobert Gordon 	    numifs > 0; numifs--, ifr++) {
4858*0a701b1eSRobert Gordon 		struct sockaddr_in *sin4;
4859*0a701b1eSRobert Gordon 		struct sockaddr_in6 *sin6;
4860*0a701b1eSRobert Gordon 
4861*0a701b1eSRobert Gordon 		if ((rpcib_is_ib_interface(ifr->ifr_name) == 0)) {
4862*0a701b1eSRobert Gordon 			sin4 = (struct sockaddr_in *)(uintptr_t)&ifr->ifr_addr;
4863*0a701b1eSRobert Gordon 			sin6 = (struct sockaddr_in6 *)(uintptr_t)&ifr->ifr_addr;
4864*0a701b1eSRobert Gordon 			if (sin4->sin_family == AF_INET) {
4865*0a701b1eSRobert Gordon 				saddr4[*number4] = *(struct sockaddr_in *)
4866*0a701b1eSRobert Gordon 				    (uintptr_t)&ifr->ifr_addr;
4867*0a701b1eSRobert Gordon 				*number4 = *number4 + 1;
4868*0a701b1eSRobert Gordon 			} else if (sin6->sin6_family == AF_INET6) {
4869*0a701b1eSRobert Gordon 				saddr6[*number6] = *(struct sockaddr_in6 *)
4870*0a701b1eSRobert Gordon 				    (uintptr_t)&ifr->ifr_addr;
4871*0a701b1eSRobert Gordon 				*number6 = *number6 + 1;
4872*0a701b1eSRobert Gordon 			}
4873*0a701b1eSRobert Gordon 		}
4874*0a701b1eSRobert Gordon 	}
4875*0a701b1eSRobert Gordon 	ret = B_TRUE;
4876*0a701b1eSRobert Gordon done:
4877*0a701b1eSRobert Gordon 	kmem_free(kifc.ifc_buf, kifc.ifc_len);
4878*0a701b1eSRobert Gordon 	return (ret);
4879*0a701b1eSRobert Gordon }
4880*0a701b1eSRobert Gordon 
4881*0a701b1eSRobert Gordon /* ARGSUSED */
4882*0a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *ksp, int rw) {
4883*0a701b1eSRobert Gordon 
4884*0a701b1eSRobert Gordon 	if (KSTAT_WRITE == rw) {
4885*0a701b1eSRobert Gordon 		return (EACCES);
4886*0a701b1eSRobert Gordon 	}
4887*0a701b1eSRobert Gordon 	rpcib_kstat.cache_limit.value.ui64 =
4888*0a701b1eSRobert Gordon 	    (uint64_t)cache_limit;
4889*0a701b1eSRobert Gordon 	rpcib_kstat.cache_allocation.value.ui64 =
4890*0a701b1eSRobert Gordon 	    (uint64_t)cache_allocation;
4891*0a701b1eSRobert Gordon 	rpcib_kstat.cache_hits.value.ui64 =
4892*0a701b1eSRobert Gordon 	    (uint64_t)cache_hits;
4893*0a701b1eSRobert Gordon 	rpcib_kstat.cache_misses.value.ui64 =
4894*0a701b1eSRobert Gordon 	    (uint64_t)cache_misses;
4895*0a701b1eSRobert Gordon 	rpcib_kstat.cache_misses_above_the_limit.value.ui64 =
4896*0a701b1eSRobert Gordon 	    (uint64_t)cache_misses_above_the_limit;
4897*0a701b1eSRobert Gordon 	return (0);
4898*0a701b1eSRobert Gordon }
4899