xref: /titanic_53/usr/src/uts/common/rpc/rpcib.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
31*7c478bd9Sstevel@tonic-gate  * interaction with IBTF.
32*7c478bd9Sstevel@tonic-gate  */
33*7c478bd9Sstevel@tonic-gate 
34*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/socket.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/file.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/stream.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/callb.h>
57*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
58*7c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
61*7c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
62*7c478bd9Sstevel@tonic-gate #include <rpc/ib.h>
63*7c478bd9Sstevel@tonic-gate 
64*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
67*7c478bd9Sstevel@tonic-gate #include <sys/kstr.h>
68*7c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
69*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
70*7c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
71*7c478bd9Sstevel@tonic-gate #include <net/if.h>
72*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
73*7c478bd9Sstevel@tonic-gate 
74*7c478bd9Sstevel@tonic-gate 
75*7c478bd9Sstevel@tonic-gate extern char *inet_ntop(int, const void *, char *, int);
76*7c478bd9Sstevel@tonic-gate 
77*7c478bd9Sstevel@tonic-gate 
78*7c478bd9Sstevel@tonic-gate /*
79*7c478bd9Sstevel@tonic-gate  * Prototype declarations for driver ops
80*7c478bd9Sstevel@tonic-gate  */
81*7c478bd9Sstevel@tonic-gate 
82*7c478bd9Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
83*7c478bd9Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
84*7c478bd9Sstevel@tonic-gate 			    void *, void **);
85*7c478bd9Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
86*7c478bd9Sstevel@tonic-gate 
87*7c478bd9Sstevel@tonic-gate 
88*7c478bd9Sstevel@tonic-gate /* rpcib cb_ops */
89*7c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
90*7c478bd9Sstevel@tonic-gate 	nulldev,		/* open */
91*7c478bd9Sstevel@tonic-gate 	nulldev,		/* close */
92*7c478bd9Sstevel@tonic-gate 	nodev,			/* strategy */
93*7c478bd9Sstevel@tonic-gate 	nodev,			/* print */
94*7c478bd9Sstevel@tonic-gate 	nodev,			/* dump */
95*7c478bd9Sstevel@tonic-gate 	nodev,			/* read */
96*7c478bd9Sstevel@tonic-gate 	nodev,			/* write */
97*7c478bd9Sstevel@tonic-gate 	nodev,			/* ioctl */
98*7c478bd9Sstevel@tonic-gate 	nodev,			/* devmap */
99*7c478bd9Sstevel@tonic-gate 	nodev,			/* mmap */
100*7c478bd9Sstevel@tonic-gate 	nodev,			/* segmap */
101*7c478bd9Sstevel@tonic-gate 	nochpoll,		/* poll */
102*7c478bd9Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
103*7c478bd9Sstevel@tonic-gate 	NULL,			/* stream */
104*7c478bd9Sstevel@tonic-gate 	D_MP,			/* cb_flag */
105*7c478bd9Sstevel@tonic-gate 	CB_REV,			/* rev */
106*7c478bd9Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
107*7c478bd9Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
108*7c478bd9Sstevel@tonic-gate };
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate /*
111*7c478bd9Sstevel@tonic-gate  * Device options
112*7c478bd9Sstevel@tonic-gate  */
113*7c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
114*7c478bd9Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
115*7c478bd9Sstevel@tonic-gate 	0,			/* refcnt  */
116*7c478bd9Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
117*7c478bd9Sstevel@tonic-gate 	nulldev,		/* identify */
118*7c478bd9Sstevel@tonic-gate 	nulldev,		/* probe */
119*7c478bd9Sstevel@tonic-gate 	rpcib_attach,		/* attach */
120*7c478bd9Sstevel@tonic-gate 	rpcib_detach,		/* detach */
121*7c478bd9Sstevel@tonic-gate 	nodev,			/* reset */
122*7c478bd9Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
123*7c478bd9Sstevel@tonic-gate 	NULL,			/* bus operations */
124*7c478bd9Sstevel@tonic-gate 	NULL			/* power */
125*7c478bd9Sstevel@tonic-gate };
126*7c478bd9Sstevel@tonic-gate 
127*7c478bd9Sstevel@tonic-gate /*
128*7c478bd9Sstevel@tonic-gate  * Module linkage information.
129*7c478bd9Sstevel@tonic-gate  */
130*7c478bd9Sstevel@tonic-gate 
131*7c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = {
132*7c478bd9Sstevel@tonic-gate 	&mod_driverops,			    /* Driver module */
133*7c478bd9Sstevel@tonic-gate 	"RPCIB plugin driver, ver %I%", /* Driver name and version */
134*7c478bd9Sstevel@tonic-gate 	&rpcib_ops,		    /* Driver ops */
135*7c478bd9Sstevel@tonic-gate };
136*7c478bd9Sstevel@tonic-gate 
137*7c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
138*7c478bd9Sstevel@tonic-gate 	MODREV_1,
139*7c478bd9Sstevel@tonic-gate 	(void *)&rib_modldrv,
140*7c478bd9Sstevel@tonic-gate 	NULL
141*7c478bd9Sstevel@tonic-gate };
142*7c478bd9Sstevel@tonic-gate 
143*7c478bd9Sstevel@tonic-gate /*
144*7c478bd9Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
145*7c478bd9Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
146*7c478bd9Sstevel@tonic-gate  *	in all callbacks.
147*7c478bd9Sstevel@tonic-gate  */
148*7c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
149*7c478bd9Sstevel@tonic-gate 
150*7c478bd9Sstevel@tonic-gate #define	RNR_RETRIES	2
151*7c478bd9Sstevel@tonic-gate #define	MAX_PORTS	2
152*7c478bd9Sstevel@tonic-gate 
153*7c478bd9Sstevel@tonic-gate int preposted_rbufs = 16;
154*7c478bd9Sstevel@tonic-gate int send_threshold = 1;
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate /*
157*7c478bd9Sstevel@tonic-gate  * State of the plugin.
158*7c478bd9Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
159*7c478bd9Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
160*7c478bd9Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
161*7c478bd9Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
162*7c478bd9Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
163*7c478bd9Sstevel@tonic-gate  * state for one and in no_accept state for the other.
164*7c478bd9Sstevel@tonic-gate  */
165*7c478bd9Sstevel@tonic-gate int		plugin_state;
166*7c478bd9Sstevel@tonic-gate kmutex_t	plugin_state_lock;
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate /*
170*7c478bd9Sstevel@tonic-gate  * RPCIB RDMATF operations
171*7c478bd9Sstevel@tonic-gate  */
172*7c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
173*7c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
174*7c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
175*7c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
176*7c478bd9Sstevel@tonic-gate static rdma_stat rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
177*7c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle);
178*7c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
179*7c478bd9Sstevel@tonic-gate 	struct mrc buf_handle);
180*7c478bd9Sstevel@tonic-gate static rdma_stat rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
181*7c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
182*7c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
183*7c478bd9Sstevel@tonic-gate 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
184*7c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
185*7c478bd9Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
188*7c478bd9Sstevel@tonic-gate 
189*7c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
190*7c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
191*7c478bd9Sstevel@tonic-gate 
192*7c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
193*7c478bd9Sstevel@tonic-gate 
194*7c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
195*7c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
196*7c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
197*7c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
198*7c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
199*7c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
200*7c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
201*7c478bd9Sstevel@tonic-gate static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
202*7c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
203*7c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
204*7c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
205*7c478bd9Sstevel@tonic-gate static rdma_stat rib_register_ats(rib_hca_t *);
206*7c478bd9Sstevel@tonic-gate static void rib_deregister_ats();
207*7c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate /*
210*7c478bd9Sstevel@tonic-gate  * RPCIB addressing operations
211*7c478bd9Sstevel@tonic-gate  */
212*7c478bd9Sstevel@tonic-gate char ** get_ip_addrs(int *count);
213*7c478bd9Sstevel@tonic-gate int get_interfaces(TIUSER *tiptr, int *num);
214*7c478bd9Sstevel@tonic-gate int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
215*7c478bd9Sstevel@tonic-gate int get_ibd_ipaddr(rpcib_ibd_insts_t *);
216*7c478bd9Sstevel@tonic-gate rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
217*7c478bd9Sstevel@tonic-gate void rib_get_ibd_insts(rpcib_ibd_insts_t *);
218*7c478bd9Sstevel@tonic-gate 
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate /*
221*7c478bd9Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
222*7c478bd9Sstevel@tonic-gate  */
223*7c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = {
224*7c478bd9Sstevel@tonic-gate 	rib_reachable,
225*7c478bd9Sstevel@tonic-gate 	rib_conn_get,
226*7c478bd9Sstevel@tonic-gate 	rib_conn_release,
227*7c478bd9Sstevel@tonic-gate 	rib_listen,
228*7c478bd9Sstevel@tonic-gate 	rib_listen_stop,
229*7c478bd9Sstevel@tonic-gate 	rib_registermem,
230*7c478bd9Sstevel@tonic-gate 	rib_deregistermem,
231*7c478bd9Sstevel@tonic-gate 	rib_registermemsync,
232*7c478bd9Sstevel@tonic-gate 	rib_deregistermemsync,
233*7c478bd9Sstevel@tonic-gate 	rib_syncmem,
234*7c478bd9Sstevel@tonic-gate 	rib_reg_buf_alloc,
235*7c478bd9Sstevel@tonic-gate 	rib_reg_buf_free,
236*7c478bd9Sstevel@tonic-gate 	rib_send,
237*7c478bd9Sstevel@tonic-gate 	rib_send_resp,
238*7c478bd9Sstevel@tonic-gate 	rib_post_resp,
239*7c478bd9Sstevel@tonic-gate 	rib_post_recv,
240*7c478bd9Sstevel@tonic-gate 	rib_recv,
241*7c478bd9Sstevel@tonic-gate 	rib_read,
242*7c478bd9Sstevel@tonic-gate 	rib_write,
243*7c478bd9Sstevel@tonic-gate 	rib_getinfo
244*7c478bd9Sstevel@tonic-gate };
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate /*
247*7c478bd9Sstevel@tonic-gate  * RDMATF RPCIB plugin details
248*7c478bd9Sstevel@tonic-gate  */
249*7c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = {
250*7c478bd9Sstevel@tonic-gate 	"ibtf",		/* api name */
251*7c478bd9Sstevel@tonic-gate 	RDMATF_VERS_1,
252*7c478bd9Sstevel@tonic-gate 	0,
253*7c478bd9Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
254*7c478bd9Sstevel@tonic-gate };
255*7c478bd9Sstevel@tonic-gate 
256*7c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *);
257*7c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
258*7c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
259*7c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
260*7c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
261*7c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
262*7c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
263*7c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
264*7c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *, ibt_mr_desc_t *);
265*7c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
266*7c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
267*7c478bd9Sstevel@tonic-gate 	rib_qp_t **);
268*7c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
269*7c478bd9Sstevel@tonic-gate 	rib_qp_t **);
270*7c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
271*7c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
272*7c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
273*7c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
274*7c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
275*7c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
276*7c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
277*7c478bd9Sstevel@tonic-gate 
278*7c478bd9Sstevel@tonic-gate static void rib_async_handler(void *,
279*7c478bd9Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
280*7c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
281*7c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
282*7c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
283*7c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
284*7c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
285*7c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
286*7c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *);
287*7c478bd9Sstevel@tonic-gate static rdma_stat rib_chk_srv_ats(rib_hca_t *, struct netbuf *, int,
288*7c478bd9Sstevel@tonic-gate 	ibt_path_info_t *);
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate /*
291*7c478bd9Sstevel@tonic-gate  * Registration with IBTF as a consumer
292*7c478bd9Sstevel@tonic-gate  */
293*7c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
294*7c478bd9Sstevel@tonic-gate 	IBTI_V1,
295*7c478bd9Sstevel@tonic-gate 	IBT_GENERIC,
296*7c478bd9Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
297*7c478bd9Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
298*7c478bd9Sstevel@tonic-gate 	"nfs/ib"
299*7c478bd9Sstevel@tonic-gate };
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate /*
302*7c478bd9Sstevel@tonic-gate  * Global strucuture
303*7c478bd9Sstevel@tonic-gate  */
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate typedef struct rpcib_s {
306*7c478bd9Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
307*7c478bd9Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
308*7c478bd9Sstevel@tonic-gate } rpcib_t;
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate rpcib_t rpcib;
311*7c478bd9Sstevel@tonic-gate 
312*7c478bd9Sstevel@tonic-gate /*
313*7c478bd9Sstevel@tonic-gate  * /etc/system controlled variable to control
314*7c478bd9Sstevel@tonic-gate  * debugging in rpcib kernel module.
315*7c478bd9Sstevel@tonic-gate  * Set it to values greater that 1 to control
316*7c478bd9Sstevel@tonic-gate  * the amount of debugging messages required.
317*7c478bd9Sstevel@tonic-gate  */
318*7c478bd9Sstevel@tonic-gate int rib_debug = 0;
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate static int ats_running = 0;
321*7c478bd9Sstevel@tonic-gate int
322*7c478bd9Sstevel@tonic-gate _init(void)
323*7c478bd9Sstevel@tonic-gate {
324*7c478bd9Sstevel@tonic-gate 	int		error;
325*7c478bd9Sstevel@tonic-gate 
326*7c478bd9Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
327*7c478bd9Sstevel@tonic-gate 	if (error != 0) {
328*7c478bd9Sstevel@tonic-gate 		/*
329*7c478bd9Sstevel@tonic-gate 		 * Could not load module
330*7c478bd9Sstevel@tonic-gate 		 */
331*7c478bd9Sstevel@tonic-gate 		return (error);
332*7c478bd9Sstevel@tonic-gate 	}
333*7c478bd9Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
334*7c478bd9Sstevel@tonic-gate 
335*7c478bd9Sstevel@tonic-gate 	return (0);
336*7c478bd9Sstevel@tonic-gate }
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate int
339*7c478bd9Sstevel@tonic-gate _fini()
340*7c478bd9Sstevel@tonic-gate {
341*7c478bd9Sstevel@tonic-gate 	int status;
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 	if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) {
344*7c478bd9Sstevel@tonic-gate 		return (EBUSY);
345*7c478bd9Sstevel@tonic-gate 	}
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 	rib_deregister_ats();
348*7c478bd9Sstevel@tonic-gate 
349*7c478bd9Sstevel@tonic-gate 	/*
350*7c478bd9Sstevel@tonic-gate 	 * Remove module
351*7c478bd9Sstevel@tonic-gate 	 */
352*7c478bd9Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
353*7c478bd9Sstevel@tonic-gate 		(void) rdma_register_mod(&rib_mod);
354*7c478bd9Sstevel@tonic-gate 		return (status);
355*7c478bd9Sstevel@tonic-gate 	}
356*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
357*7c478bd9Sstevel@tonic-gate 	return (0);
358*7c478bd9Sstevel@tonic-gate }
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate int
361*7c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
362*7c478bd9Sstevel@tonic-gate {
363*7c478bd9Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
364*7c478bd9Sstevel@tonic-gate }
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate 
367*7c478bd9Sstevel@tonic-gate /*
368*7c478bd9Sstevel@tonic-gate  * rpcib_getinfo()
369*7c478bd9Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
370*7c478bd9Sstevel@tonic-gate  * instance number.
371*7c478bd9Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
372*7c478bd9Sstevel@tonic-gate  */
373*7c478bd9Sstevel@tonic-gate 
374*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
375*7c478bd9Sstevel@tonic-gate static int
376*7c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
377*7c478bd9Sstevel@tonic-gate {
378*7c478bd9Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
379*7c478bd9Sstevel@tonic-gate 
380*7c478bd9Sstevel@tonic-gate 	switch (cmd) {
381*7c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
382*7c478bd9Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
383*7c478bd9Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
384*7c478bd9Sstevel@tonic-gate 		else {
385*7c478bd9Sstevel@tonic-gate 			*result = NULL;
386*7c478bd9Sstevel@tonic-gate 			ret = DDI_FAILURE;
387*7c478bd9Sstevel@tonic-gate 		}
388*7c478bd9Sstevel@tonic-gate 		break;
389*7c478bd9Sstevel@tonic-gate 
390*7c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
391*7c478bd9Sstevel@tonic-gate 		*result = NULL;
392*7c478bd9Sstevel@tonic-gate 		break;
393*7c478bd9Sstevel@tonic-gate 
394*7c478bd9Sstevel@tonic-gate 	default:
395*7c478bd9Sstevel@tonic-gate 		ret = DDI_FAILURE;
396*7c478bd9Sstevel@tonic-gate 	}
397*7c478bd9Sstevel@tonic-gate 	return (ret);
398*7c478bd9Sstevel@tonic-gate }
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate static int
401*7c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
402*7c478bd9Sstevel@tonic-gate {
403*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
404*7c478bd9Sstevel@tonic-gate 	rdma_stat	r_status;
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate 	switch (cmd) {
407*7c478bd9Sstevel@tonic-gate 	case DDI_ATTACH:
408*7c478bd9Sstevel@tonic-gate 		break;
409*7c478bd9Sstevel@tonic-gate 	case DDI_RESUME:
410*7c478bd9Sstevel@tonic-gate 		return (DDI_SUCCESS);
411*7c478bd9Sstevel@tonic-gate 	default:
412*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
413*7c478bd9Sstevel@tonic-gate 	}
414*7c478bd9Sstevel@tonic-gate 
415*7c478bd9Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
416*7c478bd9Sstevel@tonic-gate 
417*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
418*7c478bd9Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
419*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
420*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
421*7c478bd9Sstevel@tonic-gate 	}
422*7c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
423*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
424*7c478bd9Sstevel@tonic-gate 	/*
425*7c478bd9Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
426*7c478bd9Sstevel@tonic-gate 	 */
427*7c478bd9Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
428*7c478bd9Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
429*7c478bd9Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
430*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
431*7c478bd9Sstevel@tonic-gate 	}
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 	if (rib_stat == NULL) {
434*7c478bd9Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
435*7c478bd9Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
436*7c478bd9Sstevel@tonic-gate 	}
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate 	rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids);
439*7c478bd9Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
440*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
441*7c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
442*7c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
443*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
444*7c478bd9Sstevel@tonic-gate 	}
445*7c478bd9Sstevel@tonic-gate 
446*7c478bd9Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
447*7c478bd9Sstevel@tonic-gate 			(void *)rib_stat, &rib_stat->ibt_clnt_hdl);
448*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
449*7c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
450*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
451*7c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
452*7c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
453*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
454*7c478bd9Sstevel@tonic-gate 	}
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
457*7c478bd9Sstevel@tonic-gate 	if (open_hcas(rib_stat) != RDMA_SUCCESS) {
458*7c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
459*7c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
460*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
461*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
462*7c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
463*7c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
464*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
465*7c478bd9Sstevel@tonic-gate 	}
466*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
467*7c478bd9Sstevel@tonic-gate 
468*7c478bd9Sstevel@tonic-gate 	/*
469*7c478bd9Sstevel@tonic-gate 	 * Register with rdmatf
470*7c478bd9Sstevel@tonic-gate 	 */
471*7c478bd9Sstevel@tonic-gate 	rib_mod.rdma_count = rib_stat->hca_count;
472*7c478bd9Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
473*7c478bd9Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
474*7c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
475*7c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
476*7c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
477*7c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
478*7c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
479*7c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
480*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
481*7c478bd9Sstevel@tonic-gate 	}
482*7c478bd9Sstevel@tonic-gate 
483*7c478bd9Sstevel@tonic-gate 
484*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
485*7c478bd9Sstevel@tonic-gate }
486*7c478bd9Sstevel@tonic-gate 
487*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
488*7c478bd9Sstevel@tonic-gate static int
489*7c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
490*7c478bd9Sstevel@tonic-gate {
491*7c478bd9Sstevel@tonic-gate 	switch (cmd) {
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate 	case DDI_DETACH:
494*7c478bd9Sstevel@tonic-gate 		break;
495*7c478bd9Sstevel@tonic-gate 
496*7c478bd9Sstevel@tonic-gate 	case DDI_SUSPEND:
497*7c478bd9Sstevel@tonic-gate 	default:
498*7c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
499*7c478bd9Sstevel@tonic-gate 	}
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 	/*
502*7c478bd9Sstevel@tonic-gate 	 * Detach the hca and free resources
503*7c478bd9Sstevel@tonic-gate 	 */
504*7c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
505*7c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
506*7c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
507*7c478bd9Sstevel@tonic-gate 	rib_detach_hca(rib_stat->hca);
508*7c478bd9Sstevel@tonic-gate 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
509*7c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
510*7c478bd9Sstevel@tonic-gate 
511*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
512*7c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
513*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
514*7c478bd9Sstevel@tonic-gate 
515*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
516*7c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
517*7c478bd9Sstevel@tonic-gate }
518*7c478bd9Sstevel@tonic-gate 
519*7c478bd9Sstevel@tonic-gate 
520*7c478bd9Sstevel@tonic-gate static void
521*7c478bd9Sstevel@tonic-gate rib_deregister_ats()
522*7c478bd9Sstevel@tonic-gate {
523*7c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
524*7c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
525*7c478bd9Sstevel@tonic-gate 	ibt_status_t   		ibt_status;
526*7c478bd9Sstevel@tonic-gate 
527*7c478bd9Sstevel@tonic-gate 	/*
528*7c478bd9Sstevel@tonic-gate 	 * deregister the Address Translation Service.
529*7c478bd9Sstevel@tonic-gate 	 */
530*7c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
531*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
532*7c478bd9Sstevel@tonic-gate 	srv_list = hca->ats_list;
533*7c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
534*7c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
535*7c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
536*7c478bd9Sstevel@tonic-gate 
537*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_deregister_ar(hca->ibt_clnt_hdl,
538*7c478bd9Sstevel@tonic-gate 				&to_remove->srv_ar);
539*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
540*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
541*7c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
542*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "_fini: "
543*7c478bd9Sstevel@tonic-gate 			    "ibt_deregister_ar FAILED"
544*7c478bd9Sstevel@tonic-gate 				" status: %d", ibt_status);
545*7c478bd9Sstevel@tonic-gate 		    }
546*7c478bd9Sstevel@tonic-gate #endif
547*7c478bd9Sstevel@tonic-gate 		} else {
548*7c478bd9Sstevel@tonic-gate 		    mutex_enter(&rib_stat->open_hca_lock);
549*7c478bd9Sstevel@tonic-gate 		    ats_running = 0;
550*7c478bd9Sstevel@tonic-gate 		    mutex_exit(&rib_stat->open_hca_lock);
551*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
552*7c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
553*7c478bd9Sstevel@tonic-gate 
554*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "_fini: "
555*7c478bd9Sstevel@tonic-gate 			    "Successfully unregistered"
556*7c478bd9Sstevel@tonic-gate 			    " ATS service: %s",
557*7c478bd9Sstevel@tonic-gate 			    to_remove->srv_name);
558*7c478bd9Sstevel@tonic-gate 		    }
559*7c478bd9Sstevel@tonic-gate #endif
560*7c478bd9Sstevel@tonic-gate 		}
561*7c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
562*7c478bd9Sstevel@tonic-gate 	}
563*7c478bd9Sstevel@tonic-gate 	hca->ats_list = NULL;
564*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
565*7c478bd9Sstevel@tonic-gate }
566*7c478bd9Sstevel@tonic-gate 
567*7c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
568*7c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
569*7c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
570*7c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
571*7c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
572*7c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
573*7c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
574*7c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate /*
577*7c478bd9Sstevel@tonic-gate  * One CQ pair per HCA
578*7c478bd9Sstevel@tonic-gate  */
579*7c478bd9Sstevel@tonic-gate static rdma_stat
580*7c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
581*7c478bd9Sstevel@tonic-gate 	rib_cq_t **cqp, rpcib_state_t *ribstat)
582*7c478bd9Sstevel@tonic-gate {
583*7c478bd9Sstevel@tonic-gate 	rib_cq_t	*cq;
584*7c478bd9Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
585*7c478bd9Sstevel@tonic-gate 	uint32_t	real_size;
586*7c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
587*7c478bd9Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
588*7c478bd9Sstevel@tonic-gate 
589*7c478bd9Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
590*7c478bd9Sstevel@tonic-gate 	cq->rib_hca = hca;
591*7c478bd9Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
592*7c478bd9Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
593*7c478bd9Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
594*7c478bd9Sstevel@tonic-gate 	    &real_size);
595*7c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
596*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
597*7c478bd9Sstevel@tonic-gate 				" status=%d", status);
598*7c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
599*7c478bd9Sstevel@tonic-gate 		goto fail;
600*7c478bd9Sstevel@tonic-gate 	}
601*7c478bd9Sstevel@tonic-gate 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat);
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 	/*
604*7c478bd9Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
605*7c478bd9Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
606*7c478bd9Sstevel@tonic-gate 	 * after each callback to get another one).
607*7c478bd9Sstevel@tonic-gate 	 */
608*7c478bd9Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
609*7c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
610*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
611*7c478bd9Sstevel@tonic-gate 			"enable_cq_notify failed, status %d", status);
612*7c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
613*7c478bd9Sstevel@tonic-gate 		goto fail;
614*7c478bd9Sstevel@tonic-gate 	}
615*7c478bd9Sstevel@tonic-gate 	*cqp = cq;
616*7c478bd9Sstevel@tonic-gate 
617*7c478bd9Sstevel@tonic-gate 	return (error);
618*7c478bd9Sstevel@tonic-gate fail:
619*7c478bd9Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
620*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
621*7c478bd9Sstevel@tonic-gate 	if (cq)
622*7c478bd9Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
623*7c478bd9Sstevel@tonic-gate 	return (error);
624*7c478bd9Sstevel@tonic-gate }
625*7c478bd9Sstevel@tonic-gate 
626*7c478bd9Sstevel@tonic-gate static rdma_stat
627*7c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat)
628*7c478bd9Sstevel@tonic-gate {
629*7c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
630*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
631*7c478bd9Sstevel@tonic-gate 	rdma_stat		status;
632*7c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
633*7c478bd9Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
634*7c478bd9Sstevel@tonic-gate 	uint_t			size, cq_size;
635*7c478bd9Sstevel@tonic-gate 	int			i;
636*7c478bd9Sstevel@tonic-gate 
637*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
638*7c478bd9Sstevel@tonic-gate 	if (ribstat->hcas == NULL)
639*7c478bd9Sstevel@tonic-gate 		ribstat->hcas = kmem_zalloc(ribstat->hca_count *
640*7c478bd9Sstevel@tonic-gate 				    sizeof (rib_hca_t), KM_SLEEP);
641*7c478bd9Sstevel@tonic-gate 
642*7c478bd9Sstevel@tonic-gate 	/*
643*7c478bd9Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
644*7c478bd9Sstevel@tonic-gate 	 */
645*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
646*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
647*7c478bd9Sstevel@tonic-gate 				ribstat->hca_guids[i],
648*7c478bd9Sstevel@tonic-gate 				&ribstat->hcas[i].hca_hdl);
649*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
650*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_open_hca (%d) "
651*7c478bd9Sstevel@tonic-gate 				"returned %d", i, ibt_status);
652*7c478bd9Sstevel@tonic-gate 			continue;
653*7c478bd9Sstevel@tonic-gate 		}
654*7c478bd9Sstevel@tonic-gate 		ribstat->hcas[i].hca_guid = ribstat->hca_guids[i];
655*7c478bd9Sstevel@tonic-gate 		hca = &(ribstat->hcas[i]);
656*7c478bd9Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
657*7c478bd9Sstevel@tonic-gate 		hca->state = HCA_INITED;
658*7c478bd9Sstevel@tonic-gate 
659*7c478bd9Sstevel@tonic-gate 		/*
660*7c478bd9Sstevel@tonic-gate 		 * query HCA info
661*7c478bd9Sstevel@tonic-gate 		 */
662*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
663*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
664*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_query_hca "
665*7c478bd9Sstevel@tonic-gate 			    "returned %d (hca_guid 0x%llx)",
666*7c478bd9Sstevel@tonic-gate 			    ibt_status, (longlong_t)ribstat->hca_guids[i]);
667*7c478bd9Sstevel@tonic-gate 			goto fail1;
668*7c478bd9Sstevel@tonic-gate 		}
669*7c478bd9Sstevel@tonic-gate 
670*7c478bd9Sstevel@tonic-gate 		/*
671*7c478bd9Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
672*7c478bd9Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
673*7c478bd9Sstevel@tonic-gate 		 * only when it's in the same PD as that of
674*7c478bd9Sstevel@tonic-gate 		 * the memory region.
675*7c478bd9Sstevel@tonic-gate 		 */
676*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
677*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
678*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_alloc_pd "
679*7c478bd9Sstevel@tonic-gate 				"returned %d (hca_guid 0x%llx)",
680*7c478bd9Sstevel@tonic-gate 				ibt_status, (longlong_t)ribstat->hca_guids[i]);
681*7c478bd9Sstevel@tonic-gate 			goto fail1;
682*7c478bd9Sstevel@tonic-gate 		}
683*7c478bd9Sstevel@tonic-gate 
684*7c478bd9Sstevel@tonic-gate 		/*
685*7c478bd9Sstevel@tonic-gate 		 * query HCA ports
686*7c478bd9Sstevel@tonic-gate 		 */
687*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
688*7c478bd9Sstevel@tonic-gate 				0, &pinfop, &hca->hca_nports, &size);
689*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
690*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: "
691*7c478bd9Sstevel@tonic-gate 				"ibt_query_hca_ports returned %d "
692*7c478bd9Sstevel@tonic-gate 				"(hca_guid 0x%llx)",
693*7c478bd9Sstevel@tonic-gate 				ibt_status, (longlong_t)hca->hca_guid);
694*7c478bd9Sstevel@tonic-gate 			goto fail2;
695*7c478bd9Sstevel@tonic-gate 		}
696*7c478bd9Sstevel@tonic-gate 		hca->hca_ports = pinfop;
697*7c478bd9Sstevel@tonic-gate 		hca->hca_pinfosz = size;
698*7c478bd9Sstevel@tonic-gate 		pinfop = NULL;
699*7c478bd9Sstevel@tonic-gate 
700*7c478bd9Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
701*7c478bd9Sstevel@tonic-gate 		/*
702*7c478bd9Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
703*7c478bd9Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
704*7c478bd9Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
705*7c478bd9Sstevel@tonic-gate 		 * cq's will be needed.
706*7c478bd9Sstevel@tonic-gate 		 */
707*7c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
708*7c478bd9Sstevel@tonic-gate 				&hca->svc_rcq, ribstat);
709*7c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
710*7c478bd9Sstevel@tonic-gate 			goto fail3;
711*7c478bd9Sstevel@tonic-gate 		}
712*7c478bd9Sstevel@tonic-gate 
713*7c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
714*7c478bd9Sstevel@tonic-gate 				&hca->svc_scq, ribstat);
715*7c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
716*7c478bd9Sstevel@tonic-gate 			goto fail3;
717*7c478bd9Sstevel@tonic-gate 		}
718*7c478bd9Sstevel@tonic-gate 
719*7c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
720*7c478bd9Sstevel@tonic-gate 				&hca->clnt_rcq, ribstat);
721*7c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
722*7c478bd9Sstevel@tonic-gate 			goto fail3;
723*7c478bd9Sstevel@tonic-gate 		}
724*7c478bd9Sstevel@tonic-gate 
725*7c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
726*7c478bd9Sstevel@tonic-gate 				&hca->clnt_scq, ribstat);
727*7c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
728*7c478bd9Sstevel@tonic-gate 			goto fail3;
729*7c478bd9Sstevel@tonic-gate 		}
730*7c478bd9Sstevel@tonic-gate 
731*7c478bd9Sstevel@tonic-gate 		/*
732*7c478bd9Sstevel@tonic-gate 		 * Create buffer pools.
733*7c478bd9Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
734*7c478bd9Sstevel@tonic-gate 		 */
735*7c478bd9Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
736*7c478bd9Sstevel@tonic-gate 					RECV_BUFFER, MAX_BUFS);
737*7c478bd9Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
738*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: recv buf pool failed\n");
739*7c478bd9Sstevel@tonic-gate 			goto fail3;
740*7c478bd9Sstevel@tonic-gate 		}
741*7c478bd9Sstevel@tonic-gate 
742*7c478bd9Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
743*7c478bd9Sstevel@tonic-gate 					SEND_BUFFER, MAX_BUFS);
744*7c478bd9Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
745*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: send buf pool failed\n");
746*7c478bd9Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
747*7c478bd9Sstevel@tonic-gate 			goto fail3;
748*7c478bd9Sstevel@tonic-gate 		}
749*7c478bd9Sstevel@tonic-gate 
750*7c478bd9Sstevel@tonic-gate 		/*
751*7c478bd9Sstevel@tonic-gate 		 * Initialize the registered service list and
752*7c478bd9Sstevel@tonic-gate 		 * the lock
753*7c478bd9Sstevel@tonic-gate 		 */
754*7c478bd9Sstevel@tonic-gate 		hca->service_list = NULL;
755*7c478bd9Sstevel@tonic-gate 		rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock);
756*7c478bd9Sstevel@tonic-gate 
757*7c478bd9Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
758*7c478bd9Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
759*7c478bd9Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
760*7c478bd9Sstevel@tonic-gate 			hca->iblock);
761*7c478bd9Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
762*7c478bd9Sstevel@tonic-gate 			hca->iblock);
763*7c478bd9Sstevel@tonic-gate 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
764*7c478bd9Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
765*7c478bd9Sstevel@tonic-gate 		hca->inuse = TRUE;
766*7c478bd9Sstevel@tonic-gate 		/*
767*7c478bd9Sstevel@tonic-gate 		 * XXX One hca only. Add multi-hca functionality if needed
768*7c478bd9Sstevel@tonic-gate 		 * later.
769*7c478bd9Sstevel@tonic-gate 		 */
770*7c478bd9Sstevel@tonic-gate 		ribstat->hca = hca;
771*7c478bd9Sstevel@tonic-gate 		ribstat->nhca_inited++;
772*7c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
773*7c478bd9Sstevel@tonic-gate 		break;
774*7c478bd9Sstevel@tonic-gate 
775*7c478bd9Sstevel@tonic-gate fail3:
776*7c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
777*7c478bd9Sstevel@tonic-gate fail2:
778*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
779*7c478bd9Sstevel@tonic-gate fail1:
780*7c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
781*7c478bd9Sstevel@tonic-gate 
782*7c478bd9Sstevel@tonic-gate 	}
783*7c478bd9Sstevel@tonic-gate 	if (ribstat->hca != NULL)
784*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
785*7c478bd9Sstevel@tonic-gate 	else
786*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
787*7c478bd9Sstevel@tonic-gate }
788*7c478bd9Sstevel@tonic-gate 
789*7c478bd9Sstevel@tonic-gate /*
790*7c478bd9Sstevel@tonic-gate  * Callback routines
791*7c478bd9Sstevel@tonic-gate  */
792*7c478bd9Sstevel@tonic-gate 
793*7c478bd9Sstevel@tonic-gate /*
794*7c478bd9Sstevel@tonic-gate  * SCQ handlers
795*7c478bd9Sstevel@tonic-gate  */
796*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
797*7c478bd9Sstevel@tonic-gate static void
798*7c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
799*7c478bd9Sstevel@tonic-gate {
800*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
801*7c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
802*7c478bd9Sstevel@tonic-gate 	int		i;
803*7c478bd9Sstevel@tonic-gate 
804*7c478bd9Sstevel@tonic-gate 	/*
805*7c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
806*7c478bd9Sstevel@tonic-gate 	 * completion queue notification.
807*7c478bd9Sstevel@tonic-gate 	 */
808*7c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
809*7c478bd9Sstevel@tonic-gate 
810*7c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
811*7c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
812*7c478bd9Sstevel@tonic-gate 	    bzero(&wc, sizeof (wc));
813*7c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
814*7c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS)
815*7c478bd9Sstevel@tonic-gate 		return;
816*7c478bd9Sstevel@tonic-gate 
817*7c478bd9Sstevel@tonic-gate 	/*
818*7c478bd9Sstevel@tonic-gate 	 * Got a send completion
819*7c478bd9Sstevel@tonic-gate 	 */
820*7c478bd9Sstevel@tonic-gate 	    if (wc.wc_id != NULL) {	/* XXX can it be otherwise ???? */
821*7c478bd9Sstevel@tonic-gate 		struct send_wid *wd = (struct send_wid *)wc.wc_id;
822*7c478bd9Sstevel@tonic-gate 		CONN	*conn = qptoc(wd->qp);
823*7c478bd9Sstevel@tonic-gate 
824*7c478bd9Sstevel@tonic-gate 		mutex_enter(&wd->sendwait_lock);
825*7c478bd9Sstevel@tonic-gate 		switch (wc.wc_status) {
826*7c478bd9Sstevel@tonic-gate 		case IBT_WC_SUCCESS:
827*7c478bd9Sstevel@tonic-gate 			wd->status = RDMA_SUCCESS;
828*7c478bd9Sstevel@tonic-gate 			break;
829*7c478bd9Sstevel@tonic-gate 		case IBT_WC_WR_FLUSHED_ERR:
830*7c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
831*7c478bd9Sstevel@tonic-gate 			break;
832*7c478bd9Sstevel@tonic-gate 		default:
833*7c478bd9Sstevel@tonic-gate /*
834*7c478bd9Sstevel@tonic-gate  *    RC Send Q Error Code		Local state     Remote State
835*7c478bd9Sstevel@tonic-gate  *    ==================== 		===========     ============
836*7c478bd9Sstevel@tonic-gate  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
837*7c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
838*7c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
839*7c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
840*7c478bd9Sstevel@tonic-gate  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
841*7c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
842*7c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
843*7c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
844*7c478bd9Sstevel@tonic-gate  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
845*7c478bd9Sstevel@tonic-gate  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
846*7c478bd9Sstevel@tonic-gate  *    IBT_WC_WR_FLUSHED_ERR               None            None
847*7c478bd9Sstevel@tonic-gate  */
848*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
849*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
850*7c478bd9Sstevel@tonic-gate 	    if (wc.wc_status != IBT_WC_SUCCESS) {
851*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_clnt_scq_handler: "
852*7c478bd9Sstevel@tonic-gate 			"WR completed in error, wc.wc_status:%d, "
853*7c478bd9Sstevel@tonic-gate 			"wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id);
854*7c478bd9Sstevel@tonic-gate 	    }
855*7c478bd9Sstevel@tonic-gate 	}
856*7c478bd9Sstevel@tonic-gate #endif
857*7c478bd9Sstevel@tonic-gate 			/*
858*7c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection to
859*7c478bd9Sstevel@tonic-gate 			 * ERROR and cleanup will happen either from
860*7c478bd9Sstevel@tonic-gate 			 * conn_release  or from rib_conn_get
861*7c478bd9Sstevel@tonic-gate 			 */
862*7c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
863*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
864*7c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
865*7c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
866*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
867*7c478bd9Sstevel@tonic-gate 			break;
868*7c478bd9Sstevel@tonic-gate 		}
869*7c478bd9Sstevel@tonic-gate 		if (wd->cv_sig == 1) {
870*7c478bd9Sstevel@tonic-gate 			/*
871*7c478bd9Sstevel@tonic-gate 			 * Notify poster
872*7c478bd9Sstevel@tonic-gate 			 */
873*7c478bd9Sstevel@tonic-gate 			cv_signal(&wd->wait_cv);
874*7c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
875*7c478bd9Sstevel@tonic-gate 		} else {
876*7c478bd9Sstevel@tonic-gate 			/*
877*7c478bd9Sstevel@tonic-gate 			 * Poster not waiting for notification.
878*7c478bd9Sstevel@tonic-gate 			 * Free the send buffers and send_wid
879*7c478bd9Sstevel@tonic-gate 			 */
880*7c478bd9Sstevel@tonic-gate 			for (i = 0; i < wd->nsbufs; i++) {
881*7c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
882*7c478bd9Sstevel@tonic-gate 					(void *)wd->sbufaddr[i]);
883*7c478bd9Sstevel@tonic-gate 			}
884*7c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
885*7c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wd);
886*7c478bd9Sstevel@tonic-gate 		}
887*7c478bd9Sstevel@tonic-gate 	    }
888*7c478bd9Sstevel@tonic-gate 	}
889*7c478bd9Sstevel@tonic-gate }
890*7c478bd9Sstevel@tonic-gate 
891*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
892*7c478bd9Sstevel@tonic-gate static void
893*7c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
894*7c478bd9Sstevel@tonic-gate {
895*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
896*7c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
897*7c478bd9Sstevel@tonic-gate 	int		i;
898*7c478bd9Sstevel@tonic-gate 
899*7c478bd9Sstevel@tonic-gate 	/*
900*7c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
901*7c478bd9Sstevel@tonic-gate 	 * completion queue notification.
902*7c478bd9Sstevel@tonic-gate 	 */
903*7c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
904*7c478bd9Sstevel@tonic-gate 
905*7c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
906*7c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
907*7c478bd9Sstevel@tonic-gate 	    bzero(&wc, sizeof (wc));
908*7c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
909*7c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS)
910*7c478bd9Sstevel@tonic-gate 		return;
911*7c478bd9Sstevel@tonic-gate 
912*7c478bd9Sstevel@tonic-gate 	/*
913*7c478bd9Sstevel@tonic-gate 	 * Got a send completion
914*7c478bd9Sstevel@tonic-gate 	 */
915*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
916*7c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1 && wc.wc_status != IBT_WC_SUCCESS) {
917*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_svc_scq_handler: WR completed in error "
918*7c478bd9Sstevel@tonic-gate 			"wc.wc_status:%d, wc_id:%llX",
919*7c478bd9Sstevel@tonic-gate 			wc.wc_status, (longlong_t)wc.wc_id);
920*7c478bd9Sstevel@tonic-gate 	    }
921*7c478bd9Sstevel@tonic-gate #endif
922*7c478bd9Sstevel@tonic-gate 	    if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
923*7c478bd9Sstevel@tonic-gate 		struct send_wid *wd = (struct send_wid *)wc.wc_id;
924*7c478bd9Sstevel@tonic-gate 
925*7c478bd9Sstevel@tonic-gate 		mutex_enter(&wd->sendwait_lock);
926*7c478bd9Sstevel@tonic-gate 		if (wd->cv_sig == 1) {
927*7c478bd9Sstevel@tonic-gate 			/*
928*7c478bd9Sstevel@tonic-gate 			 * Update completion status and notify poster
929*7c478bd9Sstevel@tonic-gate 			 */
930*7c478bd9Sstevel@tonic-gate 			if (wc.wc_status == IBT_WC_SUCCESS)
931*7c478bd9Sstevel@tonic-gate 				wd->status = RDMA_SUCCESS;
932*7c478bd9Sstevel@tonic-gate 			else
933*7c478bd9Sstevel@tonic-gate 				wd->status = RDMA_FAILED;
934*7c478bd9Sstevel@tonic-gate 			cv_signal(&wd->wait_cv);
935*7c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
936*7c478bd9Sstevel@tonic-gate 		} else {
937*7c478bd9Sstevel@tonic-gate 			/*
938*7c478bd9Sstevel@tonic-gate 			 * Poster not waiting for notification.
939*7c478bd9Sstevel@tonic-gate 			 * Free the send buffers and send_wid
940*7c478bd9Sstevel@tonic-gate 			 */
941*7c478bd9Sstevel@tonic-gate 			for (i = 0; i < wd->nsbufs; i++) {
942*7c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
943*7c478bd9Sstevel@tonic-gate 					(void *)wd->sbufaddr[i]);
944*7c478bd9Sstevel@tonic-gate 			}
945*7c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
946*7c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wd);
947*7c478bd9Sstevel@tonic-gate 		}
948*7c478bd9Sstevel@tonic-gate 	    }
949*7c478bd9Sstevel@tonic-gate 	}
950*7c478bd9Sstevel@tonic-gate }
951*7c478bd9Sstevel@tonic-gate 
952*7c478bd9Sstevel@tonic-gate /*
953*7c478bd9Sstevel@tonic-gate  * RCQ handler
954*7c478bd9Sstevel@tonic-gate  */
955*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
956*7c478bd9Sstevel@tonic-gate static void
957*7c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
958*7c478bd9Sstevel@tonic-gate {
959*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
960*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
961*7c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
962*7c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
963*7c478bd9Sstevel@tonic-gate 
964*7c478bd9Sstevel@tonic-gate 	/*
965*7c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
966*7c478bd9Sstevel@tonic-gate 	 * completion queue notification.
967*7c478bd9Sstevel@tonic-gate 	 */
968*7c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
969*7c478bd9Sstevel@tonic-gate 
970*7c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
971*7c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
972*7c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
973*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
974*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
975*7c478bd9Sstevel@tonic-gate 		    return;
976*7c478bd9Sstevel@tonic-gate 
977*7c478bd9Sstevel@tonic-gate 		rwid = (struct recv_wid *)wc.wc_id;
978*7c478bd9Sstevel@tonic-gate 		qp = rwid->qp;
979*7c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
980*7c478bd9Sstevel@tonic-gate 		    XDR			inxdrs, *xdrs;
981*7c478bd9Sstevel@tonic-gate 		    uint_t		xid, vers, op, find_xid = 0;
982*7c478bd9Sstevel@tonic-gate 		    struct reply	*r;
983*7c478bd9Sstevel@tonic-gate 		    CONN *conn = qptoc(qp);
984*7c478bd9Sstevel@tonic-gate 
985*7c478bd9Sstevel@tonic-gate 		    xdrs = &inxdrs;
986*7c478bd9Sstevel@tonic-gate 		    xdrmem_create(xdrs, (caddr_t)rwid->addr,
987*7c478bd9Sstevel@tonic-gate 			wc.wc_bytes_xfer, XDR_DECODE);
988*7c478bd9Sstevel@tonic-gate 		/*
989*7c478bd9Sstevel@tonic-gate 		 * Treat xid as opaque (xid is the first entity
990*7c478bd9Sstevel@tonic-gate 		 * in the rpc rdma message).
991*7c478bd9Sstevel@tonic-gate 		 */
992*7c478bd9Sstevel@tonic-gate 		    xid = *(uint32_t *)rwid->addr;
993*7c478bd9Sstevel@tonic-gate 		/* Skip xid and set the xdr position accordingly. */
994*7c478bd9Sstevel@tonic-gate 		    XDR_SETPOS(xdrs, sizeof (uint32_t));
995*7c478bd9Sstevel@tonic-gate 		    (void) xdr_u_int(xdrs, &vers);
996*7c478bd9Sstevel@tonic-gate 		    (void) xdr_u_int(xdrs, &op);
997*7c478bd9Sstevel@tonic-gate 		    XDR_DESTROY(xdrs);
998*7c478bd9Sstevel@tonic-gate 		    if (vers != RPCRDMA_VERS) {
999*7c478bd9Sstevel@tonic-gate 			/*
1000*7c478bd9Sstevel@tonic-gate 			 * Invalid RPC/RDMA version. Cannot interoperate.
1001*7c478bd9Sstevel@tonic-gate 			 * Set connection to ERROR state and bail out.
1002*7c478bd9Sstevel@tonic-gate 			 */
1003*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
1004*7c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
1005*7c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
1006*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
1007*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER, (void *)rwid->addr);
1008*7c478bd9Sstevel@tonic-gate 			rib_free_wid(rwid);
1009*7c478bd9Sstevel@tonic-gate 			continue;
1010*7c478bd9Sstevel@tonic-gate 		    }
1011*7c478bd9Sstevel@tonic-gate 
1012*7c478bd9Sstevel@tonic-gate 		    mutex_enter(&qp->replylist_lock);
1013*7c478bd9Sstevel@tonic-gate 		    for (r = qp->replylist; r != NULL; r = r->next) {
1014*7c478bd9Sstevel@tonic-gate 			if (r->xid == xid) {
1015*7c478bd9Sstevel@tonic-gate 			    find_xid = 1;
1016*7c478bd9Sstevel@tonic-gate 			    switch (op) {
1017*7c478bd9Sstevel@tonic-gate 			    case RDMA_MSG:
1018*7c478bd9Sstevel@tonic-gate 			    case RDMA_NOMSG:
1019*7c478bd9Sstevel@tonic-gate 			    case RDMA_MSGP:
1020*7c478bd9Sstevel@tonic-gate 				r->status = RDMA_SUCCESS;
1021*7c478bd9Sstevel@tonic-gate 				r->vaddr_cq = rwid->addr;
1022*7c478bd9Sstevel@tonic-gate 				r->bytes_xfer = wc.wc_bytes_xfer;
1023*7c478bd9Sstevel@tonic-gate 				cv_signal(&r->wait_cv);
1024*7c478bd9Sstevel@tonic-gate 				break;
1025*7c478bd9Sstevel@tonic-gate 			    default:
1026*7c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
1027*7c478bd9Sstevel@tonic-gate 						(void *)rwid->addr);
1028*7c478bd9Sstevel@tonic-gate 				break;
1029*7c478bd9Sstevel@tonic-gate 			    }
1030*7c478bd9Sstevel@tonic-gate 			    break;
1031*7c478bd9Sstevel@tonic-gate 			}
1032*7c478bd9Sstevel@tonic-gate 		    }
1033*7c478bd9Sstevel@tonic-gate 		    mutex_exit(&qp->replylist_lock);
1034*7c478bd9Sstevel@tonic-gate 		    if (find_xid == 0) {
1035*7c478bd9Sstevel@tonic-gate 			/* RPC caller not waiting for reply */
1036*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1037*7c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
1038*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_clnt_rcq_handler: "
1039*7c478bd9Sstevel@tonic-gate 			    "NO matching xid %u!\n", xid);
1040*7c478bd9Sstevel@tonic-gate 			    }
1041*7c478bd9Sstevel@tonic-gate #endif
1042*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), RECV_BUFFER,
1043*7c478bd9Sstevel@tonic-gate 				(void *)rwid->addr);
1044*7c478bd9Sstevel@tonic-gate 		    }
1045*7c478bd9Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
1046*7c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
1047*7c478bd9Sstevel@tonic-gate 
1048*7c478bd9Sstevel@tonic-gate 			/*
1049*7c478bd9Sstevel@tonic-gate 			 * Connection being flushed. Just free
1050*7c478bd9Sstevel@tonic-gate 			 * the posted buffer
1051*7c478bd9Sstevel@tonic-gate 			 */
1052*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER, (void *)rwid->addr);
1053*7c478bd9Sstevel@tonic-gate 		} else {
1054*7c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
1055*7c478bd9Sstevel@tonic-gate /*
1056*7c478bd9Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
1057*7c478bd9Sstevel@tonic-gate  *  ====================		===========     ============
1058*7c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
1059*7c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
1060*7c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
1061*7c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
1062*7c478bd9Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
1063*7c478bd9Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
1064*7c478bd9Sstevel@tonic-gate  */
1065*7c478bd9Sstevel@tonic-gate 			/*
1066*7c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection
1067*7c478bd9Sstevel@tonic-gate 			 * in ERROR state.
1068*7c478bd9Sstevel@tonic-gate 			 */
1069*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
1070*7c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
1071*7c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
1072*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
1073*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER, (void *)rwid->addr);
1074*7c478bd9Sstevel@tonic-gate 		}
1075*7c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
1076*7c478bd9Sstevel@tonic-gate 	}
1077*7c478bd9Sstevel@tonic-gate }
1078*7c478bd9Sstevel@tonic-gate 
1079*7c478bd9Sstevel@tonic-gate /* Server side */
1080*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1081*7c478bd9Sstevel@tonic-gate static void
1082*7c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
1083*7c478bd9Sstevel@tonic-gate {
1084*7c478bd9Sstevel@tonic-gate 	struct recv_data *rd;
1085*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
1086*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
1087*7c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
1088*7c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
1089*7c478bd9Sstevel@tonic-gate 	CONN		*conn;
1090*7c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 	/*
1093*7c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
1094*7c478bd9Sstevel@tonic-gate 	 * completion queue notification.
1095*7c478bd9Sstevel@tonic-gate 	 */
1096*7c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
1097*7c478bd9Sstevel@tonic-gate 
1098*7c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
1099*7c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
1100*7c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
1101*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
1102*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
1103*7c478bd9Sstevel@tonic-gate 		    return;
1104*7c478bd9Sstevel@tonic-gate 
1105*7c478bd9Sstevel@tonic-gate 		s_recvp = (struct svc_recv *)wc.wc_id;
1106*7c478bd9Sstevel@tonic-gate 		qp = s_recvp->qp;
1107*7c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
1108*7c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
1109*7c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs--;
1110*7c478bd9Sstevel@tonic-gate 		if (qp->n_posted_rbufs == 0)
1111*7c478bd9Sstevel@tonic-gate 			cv_signal(&qp->posted_rbufs_cv);
1112*7c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
1113*7c478bd9Sstevel@tonic-gate 
1114*7c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
1115*7c478bd9Sstevel@tonic-gate 		    XDR		inxdrs, *xdrs;
1116*7c478bd9Sstevel@tonic-gate 		    uint_t	xid, vers, op;
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 		    xdrs = &inxdrs;
1119*7c478bd9Sstevel@tonic-gate 		    /* s_recvp->vaddr stores data */
1120*7c478bd9Sstevel@tonic-gate 		    xdrmem_create(xdrs, (caddr_t)s_recvp->vaddr,
1121*7c478bd9Sstevel@tonic-gate 			wc.wc_bytes_xfer, XDR_DECODE);
1122*7c478bd9Sstevel@tonic-gate 
1123*7c478bd9Sstevel@tonic-gate 		/*
1124*7c478bd9Sstevel@tonic-gate 		 * Treat xid as opaque (xid is the first entity
1125*7c478bd9Sstevel@tonic-gate 		 * in the rpc rdma message).
1126*7c478bd9Sstevel@tonic-gate 		 */
1127*7c478bd9Sstevel@tonic-gate 		    xid = *(uint32_t *)s_recvp->vaddr;
1128*7c478bd9Sstevel@tonic-gate 		/* Skip xid and set the xdr position accordingly. */
1129*7c478bd9Sstevel@tonic-gate 		    XDR_SETPOS(xdrs, sizeof (uint32_t));
1130*7c478bd9Sstevel@tonic-gate 		    if (!xdr_u_int(xdrs, &vers) ||
1131*7c478bd9Sstevel@tonic-gate 			!xdr_u_int(xdrs, &op)) {
1132*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
1133*7c478bd9Sstevel@tonic-gate 				(void *)s_recvp->vaddr);
1134*7c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
1135*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1136*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_svc_rcq_handler: "
1137*7c478bd9Sstevel@tonic-gate 			    "xdr_u_int failed for qp %p, wc_id=%llx",
1138*7c478bd9Sstevel@tonic-gate 			    (void *)qp, (longlong_t)wc.wc_id);
1139*7c478bd9Sstevel@tonic-gate #endif
1140*7c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
1141*7c478bd9Sstevel@tonic-gate 			continue;
1142*7c478bd9Sstevel@tonic-gate 		    }
1143*7c478bd9Sstevel@tonic-gate 		    XDR_DESTROY(xdrs);
1144*7c478bd9Sstevel@tonic-gate 
1145*7c478bd9Sstevel@tonic-gate 		    if (vers != RPCRDMA_VERS) {
1146*7c478bd9Sstevel@tonic-gate 			/*
1147*7c478bd9Sstevel@tonic-gate 			 * Invalid RPC/RDMA version. Drop rpc rdma message.
1148*7c478bd9Sstevel@tonic-gate 			 */
1149*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
1150*7c478bd9Sstevel@tonic-gate 				(void *)s_recvp->vaddr);
1151*7c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
1152*7c478bd9Sstevel@tonic-gate 			continue;
1153*7c478bd9Sstevel@tonic-gate 		    }
1154*7c478bd9Sstevel@tonic-gate 			/*
1155*7c478bd9Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
1156*7c478bd9Sstevel@tonic-gate 			 */
1157*7c478bd9Sstevel@tonic-gate 		    if (op == RDMA_DONE) {
1158*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
1159*7c478bd9Sstevel@tonic-gate 				(void *)s_recvp->vaddr);
1160*7c478bd9Sstevel@tonic-gate 			/*
1161*7c478bd9Sstevel@tonic-gate 			 * Wake up the thread waiting on
1162*7c478bd9Sstevel@tonic-gate 			 * a RDMA_DONE for xid
1163*7c478bd9Sstevel@tonic-gate 			 */
1164*7c478bd9Sstevel@tonic-gate 			mutex_enter(&qp->rdlist_lock);
1165*7c478bd9Sstevel@tonic-gate 			rdma_done_notify(qp, xid);
1166*7c478bd9Sstevel@tonic-gate 			mutex_exit(&qp->rdlist_lock);
1167*7c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
1168*7c478bd9Sstevel@tonic-gate 			continue;
1169*7c478bd9Sstevel@tonic-gate 		    }
1170*7c478bd9Sstevel@tonic-gate 
1171*7c478bd9Sstevel@tonic-gate 		    mutex_enter(&plugin_state_lock);
1172*7c478bd9Sstevel@tonic-gate 		    if (plugin_state == ACCEPT) {
1173*7c478bd9Sstevel@tonic-gate 			while ((mp = allocb(sizeof (*rd), BPRI_LO)) == NULL)
1174*7c478bd9Sstevel@tonic-gate 			    (void) strwaitbuf(sizeof (*rd), BPRI_LO);
1175*7c478bd9Sstevel@tonic-gate 			/*
1176*7c478bd9Sstevel@tonic-gate 			 * Plugin is in accept state, hence the master
1177*7c478bd9Sstevel@tonic-gate 			 * transport queue for this is still accepting
1178*7c478bd9Sstevel@tonic-gate 			 * requests. Hence we can call svc_queuereq to
1179*7c478bd9Sstevel@tonic-gate 			 * queue this recieved msg.
1180*7c478bd9Sstevel@tonic-gate 			 */
1181*7c478bd9Sstevel@tonic-gate 			rd = (struct recv_data *)mp->b_rptr;
1182*7c478bd9Sstevel@tonic-gate 			rd->conn = conn;
1183*7c478bd9Sstevel@tonic-gate 			rd->rpcmsg.addr = (caddr_t)s_recvp->vaddr;
1184*7c478bd9Sstevel@tonic-gate 			rd->rpcmsg.type = RECV_BUFFER;
1185*7c478bd9Sstevel@tonic-gate 			rd->rpcmsg.len = wc.wc_bytes_xfer;
1186*7c478bd9Sstevel@tonic-gate 			rd->status = wc.wc_status;
1187*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
1188*7c478bd9Sstevel@tonic-gate 			conn->c_ref++;
1189*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
1190*7c478bd9Sstevel@tonic-gate 			mp->b_wptr += sizeof (*rd);
1191*7c478bd9Sstevel@tonic-gate 			svc_queuereq((queue_t *)rib_stat->q, mp);
1192*7c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
1193*7c478bd9Sstevel@tonic-gate 		    } else {
1194*7c478bd9Sstevel@tonic-gate 			/*
1195*7c478bd9Sstevel@tonic-gate 			 * The master transport for this is going
1196*7c478bd9Sstevel@tonic-gate 			 * away and the queue is not accepting anymore
1197*7c478bd9Sstevel@tonic-gate 			 * requests for krpc, so don't do anything, just
1198*7c478bd9Sstevel@tonic-gate 			 * free the msg.
1199*7c478bd9Sstevel@tonic-gate 			 */
1200*7c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
1201*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
1202*7c478bd9Sstevel@tonic-gate 			(void *)s_recvp->vaddr);
1203*7c478bd9Sstevel@tonic-gate 		    }
1204*7c478bd9Sstevel@tonic-gate 		} else {
1205*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
1206*7c478bd9Sstevel@tonic-gate 				(void *)s_recvp->vaddr);
1207*7c478bd9Sstevel@tonic-gate 		}
1208*7c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
1209*7c478bd9Sstevel@tonic-gate 	}
1210*7c478bd9Sstevel@tonic-gate }
1211*7c478bd9Sstevel@tonic-gate 
1212*7c478bd9Sstevel@tonic-gate /*
1213*7c478bd9Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
1214*7c478bd9Sstevel@tonic-gate  */
1215*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1216*7c478bd9Sstevel@tonic-gate static void
1217*7c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
1218*7c478bd9Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
1219*7c478bd9Sstevel@tonic-gate {
1220*7c478bd9Sstevel@tonic-gate 
1221*7c478bd9Sstevel@tonic-gate 	switch (code) {
1222*7c478bd9Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
1223*7c478bd9Sstevel@tonic-gate 		/* ignore */
1224*7c478bd9Sstevel@tonic-gate 		break;
1225*7c478bd9Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
1226*7c478bd9Sstevel@tonic-gate 	{
1227*7c478bd9Sstevel@tonic-gate 		ASSERT(rib_stat->hca->hca_hdl == hca_hdl);
1228*7c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
1229*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1230*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
1231*7c478bd9Sstevel@tonic-gate #endif
1232*7c478bd9Sstevel@tonic-gate 		break;
1233*7c478bd9Sstevel@tonic-gate 	}
1234*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1235*7c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
1236*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PATH_MIGRATED\n");
1237*7c478bd9Sstevel@tonic-gate 		break;
1238*7c478bd9Sstevel@tonic-gate 	case IBT_EVENT_SQD:
1239*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
1240*7c478bd9Sstevel@tonic-gate 		break;
1241*7c478bd9Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
1242*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
1243*7c478bd9Sstevel@tonic-gate 		break;
1244*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
1245*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CATASTROPHIC_CHAN\n");
1246*7c478bd9Sstevel@tonic-gate 		break;
1247*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
1248*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): "
1249*7c478bd9Sstevel@tonic-gate 		"IBT_ERROR_INVALID_REQUEST_CHAN\n");
1250*7c478bd9Sstevel@tonic-gate 		break;
1251*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
1252*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): "
1253*7c478bd9Sstevel@tonic-gate 		"IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
1254*7c478bd9Sstevel@tonic-gate 		break;
1255*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
1256*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PATH_MIGRATE_REQ\n");
1257*7c478bd9Sstevel@tonic-gate 		break;
1258*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CQ:
1259*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
1260*7c478bd9Sstevel@tonic-gate 		break;
1261*7c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
1262*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
1263*7c478bd9Sstevel@tonic-gate 		break;
1264*7c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PORT_UP:
1265*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
1266*7c478bd9Sstevel@tonic-gate 		break;
1267*7c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
1268*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
1269*7c478bd9Sstevel@tonic-gate 		break;
1270*7c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
1271*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
1272*7c478bd9Sstevel@tonic-gate 		break;
1273*7c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
1274*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
1275*7c478bd9Sstevel@tonic-gate 		break;
1276*7c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
1277*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
1278*7c478bd9Sstevel@tonic-gate 		break;
1279*7c478bd9Sstevel@tonic-gate #endif
1280*7c478bd9Sstevel@tonic-gate 	default:
1281*7c478bd9Sstevel@tonic-gate 		break;
1282*7c478bd9Sstevel@tonic-gate 	}
1283*7c478bd9Sstevel@tonic-gate }
1284*7c478bd9Sstevel@tonic-gate 
1285*7c478bd9Sstevel@tonic-gate /*
1286*7c478bd9Sstevel@tonic-gate  * Client's reachable function.
1287*7c478bd9Sstevel@tonic-gate  */
1288*7c478bd9Sstevel@tonic-gate static rdma_stat
1289*7c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
1290*7c478bd9Sstevel@tonic-gate {
1291*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
1292*7c478bd9Sstevel@tonic-gate 	rdma_stat	status;
1293*7c478bd9Sstevel@tonic-gate 
1294*7c478bd9Sstevel@tonic-gate 	/*
1295*7c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
1296*7c478bd9Sstevel@tonic-gate 	 */
1297*7c478bd9Sstevel@tonic-gate 	*handle = NULL;
1298*7c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
1299*7c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
1300*7c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
1301*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
1302*7c478bd9Sstevel@tonic-gate 	}
1303*7c478bd9Sstevel@tonic-gate 	status = rib_ping_srv(addr_type, raddr, &hca);
1304*7c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
1305*7c478bd9Sstevel@tonic-gate 
1306*7c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
1307*7c478bd9Sstevel@tonic-gate 		*handle = (void *)hca;
1308*7c478bd9Sstevel@tonic-gate 		/*
1309*7c478bd9Sstevel@tonic-gate 		 * Register the Address translation service
1310*7c478bd9Sstevel@tonic-gate 		 */
1311*7c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
1312*7c478bd9Sstevel@tonic-gate 		if (ats_running == 0) {
1313*7c478bd9Sstevel@tonic-gate 			if (rib_register_ats(rib_stat->hca)
1314*7c478bd9Sstevel@tonic-gate 			    == RDMA_SUCCESS) {
1315*7c478bd9Sstevel@tonic-gate 				ats_running = 1;
1316*7c478bd9Sstevel@tonic-gate 				mutex_exit(&rib_stat->open_hca_lock);
1317*7c478bd9Sstevel@tonic-gate 				return (RDMA_SUCCESS);
1318*7c478bd9Sstevel@tonic-gate 			} else {
1319*7c478bd9Sstevel@tonic-gate 				mutex_exit(&rib_stat->open_hca_lock);
1320*7c478bd9Sstevel@tonic-gate 				return (RDMA_FAILED);
1321*7c478bd9Sstevel@tonic-gate 			}
1322*7c478bd9Sstevel@tonic-gate 		} else {
1323*7c478bd9Sstevel@tonic-gate 			mutex_exit(&rib_stat->open_hca_lock);
1324*7c478bd9Sstevel@tonic-gate 			return (RDMA_SUCCESS);
1325*7c478bd9Sstevel@tonic-gate 		}
1326*7c478bd9Sstevel@tonic-gate 	} else {
1327*7c478bd9Sstevel@tonic-gate 		*handle = NULL;
1328*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 2)
1329*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_reachable(): ping_srv failed.\n");
1330*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
1331*7c478bd9Sstevel@tonic-gate 	}
1332*7c478bd9Sstevel@tonic-gate }
1333*7c478bd9Sstevel@tonic-gate 
1334*7c478bd9Sstevel@tonic-gate /* Client side qp creation */
1335*7c478bd9Sstevel@tonic-gate static rdma_stat
1336*7c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
1337*7c478bd9Sstevel@tonic-gate {
1338*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
1339*7c478bd9Sstevel@tonic-gate 	CONN		*conn;
1340*7c478bd9Sstevel@tonic-gate 
1341*7c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
1342*7c478bd9Sstevel@tonic-gate 	*qp = NULL;
1343*7c478bd9Sstevel@tonic-gate 
1344*7c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
1345*7c478bd9Sstevel@tonic-gate 	conn = qptoc(kqp);
1346*7c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
1347*7c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
1348*7c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
1349*7c478bd9Sstevel@tonic-gate 
1350*7c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
1351*7c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
1352*7c478bd9Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
1353*7c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
1354*7c478bd9Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
1355*7c478bd9Sstevel@tonic-gate 
1356*7c478bd9Sstevel@tonic-gate 	/*
1357*7c478bd9Sstevel@tonic-gate 	 * Initialize
1358*7c478bd9Sstevel@tonic-gate 	 */
1359*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
1360*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
1361*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1362*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
1363*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
1364*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
1365*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
1366*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
1367*7c478bd9Sstevel@tonic-gate 
1368*7c478bd9Sstevel@tonic-gate 	*qp = kqp;
1369*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
1370*7c478bd9Sstevel@tonic-gate }
1371*7c478bd9Sstevel@tonic-gate 
1372*7c478bd9Sstevel@tonic-gate /* Server side qp creation */
1373*7c478bd9Sstevel@tonic-gate static rdma_stat
1374*7c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
1375*7c478bd9Sstevel@tonic-gate {
1376*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
1377*7c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
1378*7c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
1379*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1380*7c478bd9Sstevel@tonic-gate 
1381*7c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
1382*7c478bd9Sstevel@tonic-gate 	*qp = NULL;
1383*7c478bd9Sstevel@tonic-gate 
1384*7c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
1385*7c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
1386*7c478bd9Sstevel@tonic-gate 	kqp->port_num = port;
1387*7c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
1388*7c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
1389*7c478bd9Sstevel@tonic-gate 
1390*7c478bd9Sstevel@tonic-gate 	/*
1391*7c478bd9Sstevel@tonic-gate 	 * Create the qp handle
1392*7c478bd9Sstevel@tonic-gate 	 */
1393*7c478bd9Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
1394*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
1395*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
1396*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
1397*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
1398*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
1399*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
1400*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
1401*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
1402*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
1403*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1404*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
1405*7c478bd9Sstevel@tonic-gate 
1406*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
1407*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
1408*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
1409*7c478bd9Sstevel@tonic-gate 			IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
1410*7c478bd9Sstevel@tonic-gate 			&chan_sizes);
1411*7c478bd9Sstevel@tonic-gate 	} else {
1412*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
1413*7c478bd9Sstevel@tonic-gate 		goto fail;
1414*7c478bd9Sstevel@tonic-gate 	}
1415*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
1416*7c478bd9Sstevel@tonic-gate 
1417*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1418*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_svc_create_chan: "
1419*7c478bd9Sstevel@tonic-gate 			"ibt_alloc_rc_channel failed, ibt_status=%d.",
1420*7c478bd9Sstevel@tonic-gate 			ibt_status);
1421*7c478bd9Sstevel@tonic-gate 		goto fail;
1422*7c478bd9Sstevel@tonic-gate 	}
1423*7c478bd9Sstevel@tonic-gate 
1424*7c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
1425*7c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
1426*7c478bd9Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
1427*7c478bd9Sstevel@tonic-gate 
1428*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
1429*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
1430*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
1431*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
1432*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
1433*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
1434*7c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
1435*7c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
1436*7c478bd9Sstevel@tonic-gate 	/*
1437*7c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
1438*7c478bd9Sstevel@tonic-gate 	 */
1439*7c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
1440*7c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
1441*7c478bd9Sstevel@tonic-gate 	*qp = kqp;
1442*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
1443*7c478bd9Sstevel@tonic-gate fail:
1444*7c478bd9Sstevel@tonic-gate 	if (kqp)
1445*7c478bd9Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
1446*7c478bd9Sstevel@tonic-gate 
1447*7c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
1448*7c478bd9Sstevel@tonic-gate }
1449*7c478bd9Sstevel@tonic-gate 
1450*7c478bd9Sstevel@tonic-gate void
1451*7c478bd9Sstevel@tonic-gate rib_dump_pathrec(ibt_path_info_t *path_rec)
1452*7c478bd9Sstevel@tonic-gate {
1453*7c478bd9Sstevel@tonic-gate 	ib_pkey_t	pkey;
1454*7c478bd9Sstevel@tonic-gate 
1455*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
1456*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path Record:\n");
1457*7c478bd9Sstevel@tonic-gate 
1458*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Source HCA GUID = %llx\n",
1459*7c478bd9Sstevel@tonic-gate 		(longlong_t)path_rec->pi_hca_guid);
1460*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Dest Service ID = %llx\n",
1461*7c478bd9Sstevel@tonic-gate 		(longlong_t)path_rec->pi_sid);
1462*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Port Num        = %02d\n",
1463*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_hca_port_num);
1464*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "P_Key Index     = %04d\n",
1465*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_pkey_ix);
1466*7c478bd9Sstevel@tonic-gate 
1467*7c478bd9Sstevel@tonic-gate 	    (void) ibt_index2pkey_byguid(path_rec->pi_hca_guid,
1468*7c478bd9Sstevel@tonic-gate 			path_rec->pi_prim_cep_path.cep_hca_port_num,
1469*7c478bd9Sstevel@tonic-gate 			path_rec->pi_prim_cep_path.cep_pkey_ix, &pkey);
1470*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "P_Key		= 0x%x\n", pkey);
1471*7c478bd9Sstevel@tonic-gate 
1472*7c478bd9Sstevel@tonic-gate 
1473*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "SGID:           = %llx:%llx\n",
1474*7c478bd9Sstevel@tonic-gate 		(longlong_t)
1475*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_prefix,
1476*7c478bd9Sstevel@tonic-gate 		(longlong_t)
1477*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_guid);
1478*7c478bd9Sstevel@tonic-gate 
1479*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "DGID:           = %llx:%llx\n",
1480*7c478bd9Sstevel@tonic-gate 		(longlong_t)
1481*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_prefix,
1482*7c478bd9Sstevel@tonic-gate 		(longlong_t)
1483*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_guid);
1484*7c478bd9Sstevel@tonic-gate 
1485*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path Rate       = %02x\n",
1486*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_srate);
1487*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "SL              = %02x\n",
1488*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_srvl);
1489*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Prim Packet LT  = %02x\n",
1490*7c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_pkt_lt);
1491*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path MTU        = %02x\n",
1492*7c478bd9Sstevel@tonic-gate 		path_rec->pi_path_mtu);
1493*7c478bd9Sstevel@tonic-gate 	}
1494*7c478bd9Sstevel@tonic-gate }
1495*7c478bd9Sstevel@tonic-gate 
1496*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1497*7c478bd9Sstevel@tonic-gate ibt_cm_status_t
1498*7c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
1499*7c478bd9Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
1500*7c478bd9Sstevel@tonic-gate     ibt_priv_data_len_t len)
1501*7c478bd9Sstevel@tonic-gate {
1502*7c478bd9Sstevel@tonic-gate 	rpcib_state_t   *ribstat;
1503*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
1504*7c478bd9Sstevel@tonic-gate 
1505*7c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)clnt_hdl;
1506*7c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
1507*7c478bd9Sstevel@tonic-gate 
1508*7c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
1509*7c478bd9Sstevel@tonic-gate 
1510*7c478bd9Sstevel@tonic-gate 	/* got a connection close event */
1511*7c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
1512*7c478bd9Sstevel@tonic-gate 	{
1513*7c478bd9Sstevel@tonic-gate 		CONN	*conn;
1514*7c478bd9Sstevel@tonic-gate 		rib_qp_t *qp;
1515*7c478bd9Sstevel@tonic-gate 
1516*7c478bd9Sstevel@tonic-gate 		/* check reason why connection was closed */
1517*7c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
1518*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
1519*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
1520*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
1521*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
1522*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
1523*7c478bd9Sstevel@tonic-gate 			/*
1524*7c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
1525*7c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
1526*7c478bd9Sstevel@tonic-gate 			 */
1527*7c478bd9Sstevel@tonic-gate 			break;
1528*7c478bd9Sstevel@tonic-gate 		default:
1529*7c478bd9Sstevel@tonic-gate 			/*
1530*7c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
1531*7c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
1532*7c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
1533*7c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
1534*7c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
1535*7c478bd9Sstevel@tonic-gate 			 * state
1536*7c478bd9Sstevel@tonic-gate 			 */
1537*7c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
1538*7c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
1539*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
1540*7c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
1541*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
1542*7c478bd9Sstevel@tonic-gate 				break;
1543*7c478bd9Sstevel@tonic-gate 			}
1544*7c478bd9Sstevel@tonic-gate 
1545*7c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
1546*7c478bd9Sstevel@tonic-gate 
1547*7c478bd9Sstevel@tonic-gate 			/*
1548*7c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
1549*7c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
1550*7c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
1551*7c478bd9Sstevel@tonic-gate 			 */
1552*7c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
1553*7c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
1554*7c478bd9Sstevel@tonic-gate 
1555*7c478bd9Sstevel@tonic-gate 			/*
1556*7c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
1557*7c478bd9Sstevel@tonic-gate 			 */
1558*7c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
1559*7c478bd9Sstevel@tonic-gate 				/*
1560*7c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
1561*7c478bd9Sstevel@tonic-gate 				 */
1562*7c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
1563*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
1564*7c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
1565*7c478bd9Sstevel@tonic-gate 					&hca->cl_conn_list);
1566*7c478bd9Sstevel@tonic-gate 			} else {
1567*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
1568*7c478bd9Sstevel@tonic-gate 			}
1569*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1570*7c478bd9Sstevel@tonic-gate 			if (rib_debug)
1571*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
1572*7c478bd9Sstevel@tonic-gate 					"(CONN_CLOSED) channel disconnected");
1573*7c478bd9Sstevel@tonic-gate #endif
1574*7c478bd9Sstevel@tonic-gate 			break;
1575*7c478bd9Sstevel@tonic-gate 		}
1576*7c478bd9Sstevel@tonic-gate 		break;
1577*7c478bd9Sstevel@tonic-gate 	}
1578*7c478bd9Sstevel@tonic-gate 	default:
1579*7c478bd9Sstevel@tonic-gate 		break;
1580*7c478bd9Sstevel@tonic-gate 	}
1581*7c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
1582*7c478bd9Sstevel@tonic-gate }
1583*7c478bd9Sstevel@tonic-gate 
1584*7c478bd9Sstevel@tonic-gate 
1585*7c478bd9Sstevel@tonic-gate /* Check if server has done ATS registration */
1586*7c478bd9Sstevel@tonic-gate rdma_stat
1587*7c478bd9Sstevel@tonic-gate rib_chk_srv_ats(rib_hca_t *hca, struct netbuf *raddr,
1588*7c478bd9Sstevel@tonic-gate 	int addr_type, ibt_path_info_t *path)
1589*7c478bd9Sstevel@tonic-gate {
1590*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
1591*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
1592*7c478bd9Sstevel@tonic-gate 	ibt_path_attr_t		path_attr;
1593*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1594*7c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
1595*7c478bd9Sstevel@tonic-gate 	ibt_ar_t		ar_query, ar_result;
1596*7c478bd9Sstevel@tonic-gate 	rib_service_t		*ats;
1597*7c478bd9Sstevel@tonic-gate 	ib_gid_t		sgid;
1598*7c478bd9Sstevel@tonic-gate 	ibt_path_info_t		paths[MAX_PORTS];
1599*7c478bd9Sstevel@tonic-gate 	uint8_t			npaths, i;
1600*7c478bd9Sstevel@tonic-gate 
1601*7c478bd9Sstevel@tonic-gate 	(void) bzero(&path_attr, sizeof (ibt_path_attr_t));
1602*7c478bd9Sstevel@tonic-gate 	(void) bzero(path, sizeof (ibt_path_info_t));
1603*7c478bd9Sstevel@tonic-gate 
1604*7c478bd9Sstevel@tonic-gate 	/*
1605*7c478bd9Sstevel@tonic-gate 	 * Construct svc name
1606*7c478bd9Sstevel@tonic-gate 	 */
1607*7c478bd9Sstevel@tonic-gate 	path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
1608*7c478bd9Sstevel@tonic-gate 	switch (addr_type) {
1609*7c478bd9Sstevel@tonic-gate 	case AF_INET:
1610*7c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
1611*7c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname,
1612*7c478bd9Sstevel@tonic-gate 		    IB_SVC_NAME_LEN);
1613*7c478bd9Sstevel@tonic-gate 		break;
1614*7c478bd9Sstevel@tonic-gate 
1615*7c478bd9Sstevel@tonic-gate 	case AF_INET6:
1616*7c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
1617*7c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
1618*7c478bd9Sstevel@tonic-gate 		    path_attr.pa_sname, IB_SVC_NAME_LEN);
1619*7c478bd9Sstevel@tonic-gate 		break;
1620*7c478bd9Sstevel@tonic-gate 
1621*7c478bd9Sstevel@tonic-gate 	default:
1622*7c478bd9Sstevel@tonic-gate 		kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
1623*7c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
1624*7c478bd9Sstevel@tonic-gate 	}
1625*7c478bd9Sstevel@tonic-gate 	(void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN);
1626*7c478bd9Sstevel@tonic-gate 
1627*7c478bd9Sstevel@tonic-gate 	/*
1628*7c478bd9Sstevel@tonic-gate 	 * Attempt a path to the server on an ATS-registered port.
1629*7c478bd9Sstevel@tonic-gate 	 * Try all ATS-registered ports until one succeeds.
1630*7c478bd9Sstevel@tonic-gate 	 * The first one that succeeds will be used to connect
1631*7c478bd9Sstevel@tonic-gate 	 * to the server.  If none of them succeed, return RDMA_FAILED.
1632*7c478bd9Sstevel@tonic-gate 	 */
1633*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
1634*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
1635*7c478bd9Sstevel@tonic-gate 	    rw_enter(&hca->service_list_lock, RW_READER);
1636*7c478bd9Sstevel@tonic-gate 	    for (ats = hca->ats_list; ats != NULL; ats = ats->srv_next) {
1637*7c478bd9Sstevel@tonic-gate 		path_attr.pa_hca_guid = hca->hca_guid;
1638*7c478bd9Sstevel@tonic-gate 		path_attr.pa_hca_port_num = ats->srv_port;
1639*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_get_paths(hca->ibt_clnt_hdl,
1640*7c478bd9Sstevel@tonic-gate 			IBT_PATH_MULTI_SVC_DEST, &path_attr, 2, paths, &npaths);
1641*7c478bd9Sstevel@tonic-gate 		if (ibt_status == IBT_SUCCESS ||
1642*7c478bd9Sstevel@tonic-gate 			ibt_status == IBT_INSUFF_DATA) {
1643*7c478bd9Sstevel@tonic-gate 		    for (i = 0; i < npaths; i++) {
1644*7c478bd9Sstevel@tonic-gate 			if (paths[i].pi_hca_guid) {
1645*7c478bd9Sstevel@tonic-gate 			/*
1646*7c478bd9Sstevel@tonic-gate 			 * do ibt_query_ar()
1647*7c478bd9Sstevel@tonic-gate 			 */
1648*7c478bd9Sstevel@tonic-gate 			    sgid =
1649*7c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_adds_vect.av_sgid;
1650*7c478bd9Sstevel@tonic-gate 
1651*7c478bd9Sstevel@tonic-gate 			    (void) ibt_index2pkey_byguid(paths[i].pi_hca_guid,
1652*7c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_hca_port_num,
1653*7c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_pkey_ix, &pkey);
1654*7c478bd9Sstevel@tonic-gate 
1655*7c478bd9Sstevel@tonic-gate 			    bzero(&ar_query, sizeof (ar_query));
1656*7c478bd9Sstevel@tonic-gate 			    bzero(&ar_result, sizeof (ar_result));
1657*7c478bd9Sstevel@tonic-gate 			    ar_query.ar_gid =
1658*7c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_adds_vect.av_dgid;
1659*7c478bd9Sstevel@tonic-gate 			    ar_query.ar_pkey = pkey;
1660*7c478bd9Sstevel@tonic-gate 			    ibt_status = ibt_query_ar(&sgid, &ar_query,
1661*7c478bd9Sstevel@tonic-gate 					&ar_result);
1662*7c478bd9Sstevel@tonic-gate 			    if (ibt_status == IBT_SUCCESS) {
1663*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1664*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 1)
1665*7c478bd9Sstevel@tonic-gate 				    rib_dump_pathrec(&paths[i]);
1666*7c478bd9Sstevel@tonic-gate #endif
1667*7c478bd9Sstevel@tonic-gate 				bcopy(&paths[i], path,
1668*7c478bd9Sstevel@tonic-gate 					sizeof (ibt_path_info_t));
1669*7c478bd9Sstevel@tonic-gate 				rw_exit(&hca->service_list_lock);
1670*7c478bd9Sstevel@tonic-gate 				kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
1671*7c478bd9Sstevel@tonic-gate 				rw_exit(&hca->state_lock);
1672*7c478bd9Sstevel@tonic-gate 				return (RDMA_SUCCESS);
1673*7c478bd9Sstevel@tonic-gate 			    }
1674*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1675*7c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
1676*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_chk_srv_ats: "
1677*7c478bd9Sstevel@tonic-gate 				    "ibt_query_ar FAILED, return\n");
1678*7c478bd9Sstevel@tonic-gate 			    }
1679*7c478bd9Sstevel@tonic-gate #endif
1680*7c478bd9Sstevel@tonic-gate 			}
1681*7c478bd9Sstevel@tonic-gate 		    }
1682*7c478bd9Sstevel@tonic-gate 		}
1683*7c478bd9Sstevel@tonic-gate 	    }
1684*7c478bd9Sstevel@tonic-gate 	    rw_exit(&hca->service_list_lock);
1685*7c478bd9Sstevel@tonic-gate 	}
1686*7c478bd9Sstevel@tonic-gate 	kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
1687*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
1688*7c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
1689*7c478bd9Sstevel@tonic-gate }
1690*7c478bd9Sstevel@tonic-gate 
1691*7c478bd9Sstevel@tonic-gate 
1692*7c478bd9Sstevel@tonic-gate /*
1693*7c478bd9Sstevel@tonic-gate  * Connect to the server.
1694*7c478bd9Sstevel@tonic-gate  */
1695*7c478bd9Sstevel@tonic-gate rdma_stat
1696*7c478bd9Sstevel@tonic-gate rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, ibt_path_info_t *path)
1697*7c478bd9Sstevel@tonic-gate {
1698*7c478bd9Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
1699*7c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
1700*7c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
1701*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1702*7c478bd9Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
1703*7c478bd9Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
1704*7c478bd9Sstevel@tonic-gate 
1705*7c478bd9Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
1706*7c478bd9Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
1707*7c478bd9Sstevel@tonic-gate 
1708*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = path->pi_prim_cep_path.cep_hca_port_num;
1709*7c478bd9Sstevel@tonic-gate 	/* Alloc a RC channel */
1710*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
1711*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
1712*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
1713*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
1714*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
1715*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
1716*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
1717*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
1718*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1719*7c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
1720*7c478bd9Sstevel@tonic-gate 
1721*7c478bd9Sstevel@tonic-gate 	chan_args.oc_path = path;
1722*7c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
1723*7c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_clnt_private = (void *)rib_stat;
1724*7c478bd9Sstevel@tonic-gate 	chan_args.oc_rdma_ra_out = 1;
1725*7c478bd9Sstevel@tonic-gate 	chan_args.oc_rdma_ra_in = 1;
1726*7c478bd9Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
1727*7c478bd9Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
1728*7c478bd9Sstevel@tonic-gate 
1729*7c478bd9Sstevel@tonic-gate refresh:
1730*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
1731*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
1732*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
1733*7c478bd9Sstevel@tonic-gate 			IBT_ACHAN_NO_FLAGS, &qp_attr, &qp->qp_hdl,
1734*7c478bd9Sstevel@tonic-gate 			&chan_sizes);
1735*7c478bd9Sstevel@tonic-gate 	} else {
1736*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
1737*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
1738*7c478bd9Sstevel@tonic-gate 	}
1739*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
1740*7c478bd9Sstevel@tonic-gate 
1741*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1742*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1743*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_conn_to_srv: alloc_rc_channel "
1744*7c478bd9Sstevel@tonic-gate 		"failed, ibt_status=%d.", ibt_status);
1745*7c478bd9Sstevel@tonic-gate #endif
1746*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
1747*7c478bd9Sstevel@tonic-gate 	}
1748*7c478bd9Sstevel@tonic-gate 
1749*7c478bd9Sstevel@tonic-gate 	/* Connect to the Server */
1750*7c478bd9Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
1751*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
1752*7c478bd9Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
1753*7c478bd9Sstevel@tonic-gate 			IBT_BLOCKING, &chan_args, &ret_args);
1754*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1755*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1756*7c478bd9Sstevel@tonic-gate 		if (rib_debug)
1757*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_to_srv: open_rc_channel"
1758*7c478bd9Sstevel@tonic-gate 				" failed for qp %p, status=%d, "
1759*7c478bd9Sstevel@tonic-gate 				"ret_args.rc_status=%d\n",
1760*7c478bd9Sstevel@tonic-gate 				(void *)qp, ibt_status, ret_args.rc_status);
1761*7c478bd9Sstevel@tonic-gate #endif
1762*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
1763*7c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
1764*7c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
1765*7c478bd9Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
1766*7c478bd9Sstevel@tonic-gate 			ret_args.rc_status == IBT_CM_CONN_STALE) {
1767*7c478bd9Sstevel@tonic-gate 			/*
1768*7c478bd9Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
1769*7c478bd9Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
1770*7c478bd9Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
1771*7c478bd9Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
1772*7c478bd9Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
1773*7c478bd9Sstevel@tonic-gate 			 */
1774*7c478bd9Sstevel@tonic-gate 			goto refresh;
1775*7c478bd9Sstevel@tonic-gate 		}
1776*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
1777*7c478bd9Sstevel@tonic-gate 	}
1778*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
1779*7c478bd9Sstevel@tonic-gate 	/*
1780*7c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
1781*7c478bd9Sstevel@tonic-gate 	 */
1782*7c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
1783*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
1784*7c478bd9Sstevel@tonic-gate }
1785*7c478bd9Sstevel@tonic-gate 
1786*7c478bd9Sstevel@tonic-gate rdma_stat
1787*7c478bd9Sstevel@tonic-gate rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca)
1788*7c478bd9Sstevel@tonic-gate {
1789*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
1790*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
1791*7c478bd9Sstevel@tonic-gate 	ibt_path_attr_t		path_attr;
1792*7c478bd9Sstevel@tonic-gate 	ibt_path_info_t		path;
1793*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
1794*7c478bd9Sstevel@tonic-gate 
1795*7c478bd9Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
1796*7c478bd9Sstevel@tonic-gate 
1797*7c478bd9Sstevel@tonic-gate 	bzero(&path_attr, sizeof (ibt_path_attr_t));
1798*7c478bd9Sstevel@tonic-gate 	bzero(&path, sizeof (ibt_path_info_t));
1799*7c478bd9Sstevel@tonic-gate 
1800*7c478bd9Sstevel@tonic-gate 	/*
1801*7c478bd9Sstevel@tonic-gate 	 * Conctruct svc name
1802*7c478bd9Sstevel@tonic-gate 	 */
1803*7c478bd9Sstevel@tonic-gate 	path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
1804*7c478bd9Sstevel@tonic-gate 	switch (addr_type) {
1805*7c478bd9Sstevel@tonic-gate 	case AF_INET:
1806*7c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
1807*7c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname,
1808*7c478bd9Sstevel@tonic-gate 		    IB_SVC_NAME_LEN);
1809*7c478bd9Sstevel@tonic-gate 		break;
1810*7c478bd9Sstevel@tonic-gate 
1811*7c478bd9Sstevel@tonic-gate 	case AF_INET6:
1812*7c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
1813*7c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
1814*7c478bd9Sstevel@tonic-gate 		    path_attr.pa_sname, IB_SVC_NAME_LEN);
1815*7c478bd9Sstevel@tonic-gate 		break;
1816*7c478bd9Sstevel@tonic-gate 
1817*7c478bd9Sstevel@tonic-gate 	default:
1818*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
1819*7c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
1820*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_ping_srv: Address not recognized\n");
1821*7c478bd9Sstevel@tonic-gate 	    }
1822*7c478bd9Sstevel@tonic-gate #endif
1823*7c478bd9Sstevel@tonic-gate 		kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
1824*7c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
1825*7c478bd9Sstevel@tonic-gate 	}
1826*7c478bd9Sstevel@tonic-gate 	(void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN);
1827*7c478bd9Sstevel@tonic-gate 
1828*7c478bd9Sstevel@tonic-gate 	ibt_status = ibt_get_paths(rib_stat->ibt_clnt_hdl,
1829*7c478bd9Sstevel@tonic-gate 		IBT_PATH_NO_FLAGS, &path_attr, 1, &path, NULL);
1830*7c478bd9Sstevel@tonic-gate 	kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
1831*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
1832*7c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1) {
1833*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_ping_srv: ibt_get_paths FAILED!"
1834*7c478bd9Sstevel@tonic-gate 			" status=%d\n", ibt_status);
1835*7c478bd9Sstevel@tonic-gate 	    }
1836*7c478bd9Sstevel@tonic-gate 	} else if (path.pi_hca_guid) {
1837*7c478bd9Sstevel@tonic-gate 		ASSERT(path.pi_hca_guid == rib_stat->hca->hca_guid);
1838*7c478bd9Sstevel@tonic-gate 		*hca = rib_stat->hca;
1839*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
1840*7c478bd9Sstevel@tonic-gate 	}
1841*7c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
1842*7c478bd9Sstevel@tonic-gate }
1843*7c478bd9Sstevel@tonic-gate 
1844*7c478bd9Sstevel@tonic-gate /*
1845*7c478bd9Sstevel@tonic-gate  * Close channel, remove from connection list and
1846*7c478bd9Sstevel@tonic-gate  * free up resources allocated for that channel.
1847*7c478bd9Sstevel@tonic-gate  */
1848*7c478bd9Sstevel@tonic-gate rdma_stat
1849*7c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
1850*7c478bd9Sstevel@tonic-gate {
1851*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
1852*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
1853*7c478bd9Sstevel@tonic-gate 
1854*7c478bd9Sstevel@tonic-gate 	/*
1855*7c478bd9Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
1856*7c478bd9Sstevel@tonic-gate 	 */
1857*7c478bd9Sstevel@tonic-gate 	hca = qp->hca;
1858*7c478bd9Sstevel@tonic-gate 	if (conn_list != NULL)
1859*7c478bd9Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
1860*7c478bd9Sstevel@tonic-gate 	if (qp->qp_hdl != NULL) {
1861*7c478bd9Sstevel@tonic-gate 		/*
1862*7c478bd9Sstevel@tonic-gate 		 * If the channel has not been establised,
1863*7c478bd9Sstevel@tonic-gate 		 * ibt_flush_channel is called to flush outstanding WRs
1864*7c478bd9Sstevel@tonic-gate 		 * on the Qs.  Otherwise, ibt_close_rc_channel() is
1865*7c478bd9Sstevel@tonic-gate 		 * called.  The channel is then freed.
1866*7c478bd9Sstevel@tonic-gate 		 */
1867*7c478bd9Sstevel@tonic-gate 		if (conn_list != NULL)
1868*7c478bd9Sstevel@tonic-gate 		    (void) ibt_close_rc_channel(qp->qp_hdl,
1869*7c478bd9Sstevel@tonic-gate 			IBT_BLOCKING, NULL, 0, NULL, NULL, 0);
1870*7c478bd9Sstevel@tonic-gate 		else
1871*7c478bd9Sstevel@tonic-gate 		    (void) ibt_flush_channel(qp->qp_hdl);
1872*7c478bd9Sstevel@tonic-gate 
1873*7c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
1874*7c478bd9Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
1875*7c478bd9Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
1876*7c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
1877*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
1878*7c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
1879*7c478bd9Sstevel@tonic-gate 	}
1880*7c478bd9Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
1881*7c478bd9Sstevel@tonic-gate 	if (qp->replylist != NULL) {
1882*7c478bd9Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
1883*7c478bd9Sstevel@tonic-gate 	}
1884*7c478bd9Sstevel@tonic-gate 
1885*7c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
1886*7c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
1887*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
1888*7c478bd9Sstevel@tonic-gate 
1889*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
1890*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
1891*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
1892*7c478bd9Sstevel@tonic-gate 
1893*7c478bd9Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
1894*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
1895*7c478bd9Sstevel@tonic-gate 
1896*7c478bd9Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
1897*7c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
1898*7c478bd9Sstevel@tonic-gate 	}
1899*7c478bd9Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
1900*7c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
1901*7c478bd9Sstevel@tonic-gate 	}
1902*7c478bd9Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
1903*7c478bd9Sstevel@tonic-gate 
1904*7c478bd9Sstevel@tonic-gate 	/*
1905*7c478bd9Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
1906*7c478bd9Sstevel@tonic-gate 	 * then the hca is no longer being used.
1907*7c478bd9Sstevel@tonic-gate 	 */
1908*7c478bd9Sstevel@tonic-gate 	if (conn_list != NULL) {
1909*7c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
1910*7c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
1911*7c478bd9Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
1912*7c478bd9Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
1913*7c478bd9Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
1914*7c478bd9Sstevel@tonic-gate 					RW_READER);
1915*7c478bd9Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
1916*7c478bd9Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
1917*7c478bd9Sstevel@tonic-gate 					hca->inuse = FALSE;
1918*7c478bd9Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
1919*7c478bd9Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
1920*7c478bd9Sstevel@tonic-gate 				}
1921*7c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
1922*7c478bd9Sstevel@tonic-gate 			}
1923*7c478bd9Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
1924*7c478bd9Sstevel@tonic-gate 		}
1925*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
1926*7c478bd9Sstevel@tonic-gate 	}
1927*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
1928*7c478bd9Sstevel@tonic-gate }
1929*7c478bd9Sstevel@tonic-gate 
1930*7c478bd9Sstevel@tonic-gate /*
1931*7c478bd9Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
1932*7c478bd9Sstevel@tonic-gate  * notification be it a successful or error completion, free the
1933*7c478bd9Sstevel@tonic-gate  * send_wid.
1934*7c478bd9Sstevel@tonic-gate  */
1935*7c478bd9Sstevel@tonic-gate static rdma_stat
1936*7c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
1937*7c478bd9Sstevel@tonic-gate {
1938*7c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
1939*7c478bd9Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
1940*7c478bd9Sstevel@tonic-gate 	int	i;
1941*7c478bd9Sstevel@tonic-gate 
1942*7c478bd9Sstevel@tonic-gate 	/*
1943*7c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
1944*7c478bd9Sstevel@tonic-gate 	 */
1945*7c478bd9Sstevel@tonic-gate 	ASSERT(wd != NULL);
1946*7c478bd9Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
1947*7c478bd9Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
1948*7c478bd9Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
1949*7c478bd9Sstevel@tonic-gate 		    ddi_get_lbolt();
1950*7c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
1951*7c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
1952*7c478bd9Sstevel@tonic-gate 				    &wd->sendwait_lock, timout)) > 0 &&
1953*7c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
1954*7c478bd9Sstevel@tonic-gate 				;
1955*7c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
1956*7c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
1957*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1958*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
1959*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN, "rib_sendwait: "
1960*7c478bd9Sstevel@tonic-gate 					    "timed out qp %p\n", (void *)qp);
1961*7c478bd9Sstevel@tonic-gate #endif
1962*7c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
1963*7c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
1964*7c478bd9Sstevel@tonic-gate 				break;
1965*7c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
1966*7c478bd9Sstevel@tonic-gate 				break;
1967*7c478bd9Sstevel@tonic-gate 			}
1968*7c478bd9Sstevel@tonic-gate 		} else {
1969*7c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
1970*7c478bd9Sstevel@tonic-gate 				    &wd->sendwait_lock, timout)) > 0 &&
1971*7c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
1972*7c478bd9Sstevel@tonic-gate 				;
1973*7c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
1974*7c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
1975*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1976*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
1977*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN, "rib_sendwait: "
1978*7c478bd9Sstevel@tonic-gate 					    "timed out qp %p\n", (void *)qp);
1979*7c478bd9Sstevel@tonic-gate #endif
1980*7c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
1981*7c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
1982*7c478bd9Sstevel@tonic-gate 				break;
1983*7c478bd9Sstevel@tonic-gate 			case 0:		/* interrupted */
1984*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1985*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
1986*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "rib_sendwait:"
1987*7c478bd9Sstevel@tonic-gate 					    " interrupted on qp %p\n",
1988*7c478bd9Sstevel@tonic-gate 					    (void *)qp);
1989*7c478bd9Sstevel@tonic-gate #endif
1990*7c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
1991*7c478bd9Sstevel@tonic-gate 				error = RDMA_INTR;
1992*7c478bd9Sstevel@tonic-gate 				break;
1993*7c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
1994*7c478bd9Sstevel@tonic-gate 				break;
1995*7c478bd9Sstevel@tonic-gate 			}
1996*7c478bd9Sstevel@tonic-gate 		}
1997*7c478bd9Sstevel@tonic-gate 	}
1998*7c478bd9Sstevel@tonic-gate 
1999*7c478bd9Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
2000*7c478bd9Sstevel@tonic-gate 		/* got send completion */
2001*7c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
2002*7c478bd9Sstevel@tonic-gate 		    error = wd->status;
2003*7c478bd9Sstevel@tonic-gate 		    if (wd->status != RDMA_CONNLOST)
2004*7c478bd9Sstevel@tonic-gate 			error = RDMA_FAILED;
2005*7c478bd9Sstevel@tonic-gate 		}
2006*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
2007*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
2008*7c478bd9Sstevel@tonic-gate 				(void *)wd->sbufaddr[i]);
2009*7c478bd9Sstevel@tonic-gate 		}
2010*7c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
2011*7c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
2012*7c478bd9Sstevel@tonic-gate 	} else {
2013*7c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
2014*7c478bd9Sstevel@tonic-gate 	}
2015*7c478bd9Sstevel@tonic-gate 
2016*7c478bd9Sstevel@tonic-gate 	return (error);
2017*7c478bd9Sstevel@tonic-gate }
2018*7c478bd9Sstevel@tonic-gate 
2019*7c478bd9Sstevel@tonic-gate static struct send_wid *
2020*7c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
2021*7c478bd9Sstevel@tonic-gate {
2022*7c478bd9Sstevel@tonic-gate 	struct send_wid	*wd;
2023*7c478bd9Sstevel@tonic-gate 
2024*7c478bd9Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
2025*7c478bd9Sstevel@tonic-gate 	wd->xid = xid;
2026*7c478bd9Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
2027*7c478bd9Sstevel@tonic-gate 	wd->qp = qp;
2028*7c478bd9Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
2029*7c478bd9Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
2030*7c478bd9Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
2031*7c478bd9Sstevel@tonic-gate 
2032*7c478bd9Sstevel@tonic-gate 	return (wd);
2033*7c478bd9Sstevel@tonic-gate }
2034*7c478bd9Sstevel@tonic-gate 
2035*7c478bd9Sstevel@tonic-gate static int
2036*7c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
2037*7c478bd9Sstevel@tonic-gate {
2038*7c478bd9Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
2039*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
2040*7c478bd9Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
2041*7c478bd9Sstevel@tonic-gate 
2042*7c478bd9Sstevel@tonic-gate 	return (0);
2043*7c478bd9Sstevel@tonic-gate }
2044*7c478bd9Sstevel@tonic-gate 
2045*7c478bd9Sstevel@tonic-gate static rdma_stat
2046*7c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
2047*7c478bd9Sstevel@tonic-gate {
2048*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
2049*7c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
2050*7c478bd9Sstevel@tonic-gate 	    (void) rib_remreply(qp, rep);
2051*7c478bd9Sstevel@tonic-gate 	    mutex_exit(&qp->replylist_lock);
2052*7c478bd9Sstevel@tonic-gate 	    return (RDMA_SUCCESS);
2053*7c478bd9Sstevel@tonic-gate 	}
2054*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
2055*7c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
2056*7c478bd9Sstevel@tonic-gate }
2057*7c478bd9Sstevel@tonic-gate 
2058*7c478bd9Sstevel@tonic-gate /*
2059*7c478bd9Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
2060*7c478bd9Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
2061*7c478bd9Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
2062*7c478bd9Sstevel@tonic-gate  */
2063*7c478bd9Sstevel@tonic-gate rdma_stat
2064*7c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
2065*7c478bd9Sstevel@tonic-gate 	int send_sig, int cv_sig)
2066*7c478bd9Sstevel@tonic-gate {
2067*7c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
2068*7c478bd9Sstevel@tonic-gate 	struct clist	*clp;
2069*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
2070*7c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
2071*7c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
2072*7c478bd9Sstevel@tonic-gate 	int		i, nds;
2073*7c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
2074*7c478bd9Sstevel@tonic-gate 	uint_t		total_msg_size;
2075*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2076*7c478bd9Sstevel@tonic-gate 
2077*7c478bd9Sstevel@tonic-gate 	ASSERT(cl != NULL);
2078*7c478bd9Sstevel@tonic-gate 
2079*7c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
2080*7c478bd9Sstevel@tonic-gate 
2081*7c478bd9Sstevel@tonic-gate 	nds = 0;
2082*7c478bd9Sstevel@tonic-gate 	total_msg_size = 0;
2083*7c478bd9Sstevel@tonic-gate 	clp = cl;
2084*7c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
2085*7c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2086*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_and_wait: DSEG_MAX"
2087*7c478bd9Sstevel@tonic-gate 			    " too small!");
2088*7c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
2089*7c478bd9Sstevel@tonic-gate 		}
2090*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = clp->c_saddr;
2091*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
2092*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
2093*7c478bd9Sstevel@tonic-gate 		total_msg_size += clp->c_len;
2094*7c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
2095*7c478bd9Sstevel@tonic-gate 		nds++;
2096*7c478bd9Sstevel@tonic-gate 	}
2097*7c478bd9Sstevel@tonic-gate 
2098*7c478bd9Sstevel@tonic-gate 	if (send_sig) {
2099*7c478bd9Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
2100*7c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2101*7c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
2102*7c478bd9Sstevel@tonic-gate 	} else {
2103*7c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2104*7c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, 0, qp);
2105*7c478bd9Sstevel@tonic-gate 	}
2106*7c478bd9Sstevel@tonic-gate 	wdesc->nsbufs = nds;
2107*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nds; i++) {
2108*7c478bd9Sstevel@tonic-gate 		wdesc->sbufaddr[i] = sgl[i].ds_va;
2109*7c478bd9Sstevel@tonic-gate 	}
2110*7c478bd9Sstevel@tonic-gate 
2111*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_id = (ibt_wrid_t)wdesc;
2112*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
2113*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
2114*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
2115*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
2116*7c478bd9Sstevel@tonic-gate 
2117*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2118*7c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
2119*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
2120*7c478bd9Sstevel@tonic-gate 	}
2121*7c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
2122*7c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
2123*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2124*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < nds; i++) {
2125*7c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, SEND_BUFFER,
2126*7c478bd9Sstevel@tonic-gate 				(void *)wdesc->sbufaddr[i]);
2127*7c478bd9Sstevel@tonic-gate 		}
2128*7c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
2129*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2130*7c478bd9Sstevel@tonic-gate 		if (rib_debug && ibt_status != IBT_SUCCESS)
2131*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_and_wait: ibt_post_send "
2132*7c478bd9Sstevel@tonic-gate 				"failed! wr_id %llx on qpn %p, status=%d!",
2133*7c478bd9Sstevel@tonic-gate 				(longlong_t)tx_wr.wr_id, (void *)qp,
2134*7c478bd9Sstevel@tonic-gate 				ibt_status);
2135*7c478bd9Sstevel@tonic-gate #endif
2136*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2137*7c478bd9Sstevel@tonic-gate 	}
2138*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
2139*7c478bd9Sstevel@tonic-gate 
2140*7c478bd9Sstevel@tonic-gate 	if (send_sig) {
2141*7c478bd9Sstevel@tonic-gate 	    if (cv_sig) {
2142*7c478bd9Sstevel@tonic-gate 		/*
2143*7c478bd9Sstevel@tonic-gate 		 * cv_wait for send to complete.
2144*7c478bd9Sstevel@tonic-gate 		 * We can fail due to a timeout or signal or
2145*7c478bd9Sstevel@tonic-gate 		 * unsuccessful send.
2146*7c478bd9Sstevel@tonic-gate 		 */
2147*7c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
2148*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2149*7c478bd9Sstevel@tonic-gate 	    if (rib_debug > 2)
2150*7c478bd9Sstevel@tonic-gate 		if (ret != 0) {
2151*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_send_and_wait: rib_sendwait "
2152*7c478bd9Sstevel@tonic-gate 			"FAILED, rdma stat=%d, wr_id %llx, qp %p!",
2153*7c478bd9Sstevel@tonic-gate 			ret, (longlong_t)tx_wr.wr_id, (void *)qp);
2154*7c478bd9Sstevel@tonic-gate 		}
2155*7c478bd9Sstevel@tonic-gate #endif
2156*7c478bd9Sstevel@tonic-gate 		return (ret);
2157*7c478bd9Sstevel@tonic-gate 	    }
2158*7c478bd9Sstevel@tonic-gate 	}
2159*7c478bd9Sstevel@tonic-gate 
2160*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
2161*7c478bd9Sstevel@tonic-gate }
2162*7c478bd9Sstevel@tonic-gate 
2163*7c478bd9Sstevel@tonic-gate rdma_stat
2164*7c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
2165*7c478bd9Sstevel@tonic-gate {
2166*7c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
2167*7c478bd9Sstevel@tonic-gate 
2168*7c478bd9Sstevel@tonic-gate 	/* send-wait & cv_signal */
2169*7c478bd9Sstevel@tonic-gate 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1);
2170*7c478bd9Sstevel@tonic-gate 
2171*7c478bd9Sstevel@tonic-gate 	return (ret);
2172*7c478bd9Sstevel@tonic-gate }
2173*7c478bd9Sstevel@tonic-gate 
2174*7c478bd9Sstevel@tonic-gate /*
2175*7c478bd9Sstevel@tonic-gate  * Server interface (svc_rdma_ksend).
2176*7c478bd9Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
2177*7c478bd9Sstevel@tonic-gate  */
2178*7c478bd9Sstevel@tonic-gate rdma_stat
2179*7c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
2180*7c478bd9Sstevel@tonic-gate {
2181*7c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
2182*7c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
2183*7c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
2184*7c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
2185*7c478bd9Sstevel@tonic-gate 
2186*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
2187*7c478bd9Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
2188*7c478bd9Sstevel@tonic-gate 
2189*7c478bd9Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
2190*7c478bd9Sstevel@tonic-gate 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0);
2191*7c478bd9Sstevel@tonic-gate 	if (ret != RDMA_SUCCESS) {
2192*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2193*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
2194*7c478bd9Sstevel@tonic-gate 		"failed, msgid %u, qp %p", msgid, (void *)qp);
2195*7c478bd9Sstevel@tonic-gate #endif
2196*7c478bd9Sstevel@tonic-gate 	    rdma_done_rm(qp, rd);
2197*7c478bd9Sstevel@tonic-gate 	    goto done;
2198*7c478bd9Sstevel@tonic-gate 	}
2199*7c478bd9Sstevel@tonic-gate 
2200*7c478bd9Sstevel@tonic-gate 	/*
2201*7c478bd9Sstevel@tonic-gate 	 * Wait for RDMA_DONE from remote end
2202*7c478bd9Sstevel@tonic-gate 	 */
2203*7c478bd9Sstevel@tonic-gate 	timout = drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt();
2204*7c478bd9Sstevel@tonic-gate 	cv_wait_ret = cv_timedwait(&rd->rdma_done_cv, &qp->rdlist_lock,
2205*7c478bd9Sstevel@tonic-gate 	    timout);
2206*7c478bd9Sstevel@tonic-gate 	rdma_done_rm(qp, rd);
2207*7c478bd9Sstevel@tonic-gate 	if (cv_wait_ret < 0) {
2208*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2209*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 1) {
2210*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_resp: RDMA_DONE not"
2211*7c478bd9Sstevel@tonic-gate 			    " recv'd for qp %p, xid:%u\n",
2212*7c478bd9Sstevel@tonic-gate 			    (void *)qp, msgid);
2213*7c478bd9Sstevel@tonic-gate 		}
2214*7c478bd9Sstevel@tonic-gate #endif
2215*7c478bd9Sstevel@tonic-gate 		ret = RDMA_TIMEDOUT;
2216*7c478bd9Sstevel@tonic-gate 		goto done;
2217*7c478bd9Sstevel@tonic-gate 	}
2218*7c478bd9Sstevel@tonic-gate 
2219*7c478bd9Sstevel@tonic-gate done:
2220*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
2221*7c478bd9Sstevel@tonic-gate 	return (ret);
2222*7c478bd9Sstevel@tonic-gate }
2223*7c478bd9Sstevel@tonic-gate 
2224*7c478bd9Sstevel@tonic-gate static struct recv_wid *
2225*7c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
2226*7c478bd9Sstevel@tonic-gate {
2227*7c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
2228*7c478bd9Sstevel@tonic-gate 
2229*7c478bd9Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
2230*7c478bd9Sstevel@tonic-gate 	rwid->xid = msgid;
2231*7c478bd9Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
2232*7c478bd9Sstevel@tonic-gate 	rwid->qp = qp;
2233*7c478bd9Sstevel@tonic-gate 
2234*7c478bd9Sstevel@tonic-gate 	return (rwid);
2235*7c478bd9Sstevel@tonic-gate }
2236*7c478bd9Sstevel@tonic-gate 
2237*7c478bd9Sstevel@tonic-gate static void
2238*7c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
2239*7c478bd9Sstevel@tonic-gate {
2240*7c478bd9Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
2241*7c478bd9Sstevel@tonic-gate }
2242*7c478bd9Sstevel@tonic-gate 
2243*7c478bd9Sstevel@tonic-gate rdma_stat
2244*7c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
2245*7c478bd9Sstevel@tonic-gate {
2246*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2247*7c478bd9Sstevel@tonic-gate 	struct clist	*clp = cl;
2248*7c478bd9Sstevel@tonic-gate 	struct reply	*rep;
2249*7c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
2250*7c478bd9Sstevel@tonic-gate 	int		nds;
2251*7c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
2252*7c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
2253*7c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
2254*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
2255*7c478bd9Sstevel@tonic-gate 
2256*7c478bd9Sstevel@tonic-gate 	/*
2257*7c478bd9Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
2258*7c478bd9Sstevel@tonic-gate 	 */
2259*7c478bd9Sstevel@tonic-gate 
2260*7c478bd9Sstevel@tonic-gate 	nds = 0;
2261*7c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
2262*7c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2263*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_clnt_post: DSEG_MAX too small!");
2264*7c478bd9Sstevel@tonic-gate 		    ret = RDMA_FAILED;
2265*7c478bd9Sstevel@tonic-gate 		    goto done;
2266*7c478bd9Sstevel@tonic-gate 		}
2267*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
2268*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
2269*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
2270*7c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
2271*7c478bd9Sstevel@tonic-gate 		nds++;
2272*7c478bd9Sstevel@tonic-gate 	}
2273*7c478bd9Sstevel@tonic-gate 
2274*7c478bd9Sstevel@tonic-gate 	if (nds != 1) {
2275*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_clnt_post: nds!=1\n");
2276*7c478bd9Sstevel@tonic-gate 	    ret = RDMA_FAILED;
2277*7c478bd9Sstevel@tonic-gate 	    goto done;
2278*7c478bd9Sstevel@tonic-gate 	}
2279*7c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
2280*7c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
2281*7c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
2282*7c478bd9Sstevel@tonic-gate 
2283*7c478bd9Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
2284*7c478bd9Sstevel@tonic-gate 	if (rwid) {
2285*7c478bd9Sstevel@tonic-gate 	    recv_wr.wr_id = (ibt_wrid_t)rwid;
2286*7c478bd9Sstevel@tonic-gate 	} else {
2287*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: out of memory");
2288*7c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
2289*7c478bd9Sstevel@tonic-gate 		goto done;
2290*7c478bd9Sstevel@tonic-gate 	}
2291*7c478bd9Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
2292*7c478bd9Sstevel@tonic-gate 	if (!rep) {
2293*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: out of memory");
2294*7c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
2295*7c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
2296*7c478bd9Sstevel@tonic-gate 		goto done;
2297*7c478bd9Sstevel@tonic-gate 	}
2298*7c478bd9Sstevel@tonic-gate 
2299*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2300*7c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
2301*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
2302*7c478bd9Sstevel@tonic-gate 	}
2303*7c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
2304*7c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
2305*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2306*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2307*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: QPN %p failed in "
2308*7c478bd9Sstevel@tonic-gate 		    "ibt_post_recv(), msgid=%d, status=%d",
2309*7c478bd9Sstevel@tonic-gate 		    (void *)qp,  msgid, ibt_status);
2310*7c478bd9Sstevel@tonic-gate #endif
2311*7c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
2312*7c478bd9Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
2313*7c478bd9Sstevel@tonic-gate 		ret = RDMA_FAILED;
2314*7c478bd9Sstevel@tonic-gate 		goto done;
2315*7c478bd9Sstevel@tonic-gate 	}
2316*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
2317*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
2318*7c478bd9Sstevel@tonic-gate 
2319*7c478bd9Sstevel@tonic-gate done:
2320*7c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
2321*7c478bd9Sstevel@tonic-gate 	    rib_rbuf_free(conn, RECV_BUFFER, (void *)clp->c_saddr);
2322*7c478bd9Sstevel@tonic-gate 	    clp = clp->c_next;
2323*7c478bd9Sstevel@tonic-gate 	}
2324*7c478bd9Sstevel@tonic-gate 	return (ret);
2325*7c478bd9Sstevel@tonic-gate }
2326*7c478bd9Sstevel@tonic-gate 
2327*7c478bd9Sstevel@tonic-gate rdma_stat
2328*7c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
2329*7c478bd9Sstevel@tonic-gate {
2330*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2331*7c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
2332*7c478bd9Sstevel@tonic-gate 	int		nds;
2333*7c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
2334*7c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
2335*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
2336*7c478bd9Sstevel@tonic-gate 
2337*7c478bd9Sstevel@tonic-gate 	nds = 0;
2338*7c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
2339*7c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2340*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_svc_post: DSEG_MAX too small!");
2341*7c478bd9Sstevel@tonic-gate 		    return (RDMA_FAILED);
2342*7c478bd9Sstevel@tonic-gate 		}
2343*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
2344*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
2345*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
2346*7c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
2347*7c478bd9Sstevel@tonic-gate 		nds++;
2348*7c478bd9Sstevel@tonic-gate 	}
2349*7c478bd9Sstevel@tonic-gate 
2350*7c478bd9Sstevel@tonic-gate 	if (nds != 1) {
2351*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_svc_post: nds!=1\n");
2352*7c478bd9Sstevel@tonic-gate 	    rib_rbuf_free(conn, RECV_BUFFER, (caddr_t)sgl[0].ds_va);
2353*7c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
2354*7c478bd9Sstevel@tonic-gate 	}
2355*7c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
2356*7c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
2357*7c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
2358*7c478bd9Sstevel@tonic-gate 
2359*7c478bd9Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
2360*7c478bd9Sstevel@tonic-gate 	recv_wr.wr_id = (ibt_wrid_t)s_recvp; /* Use s_recvp's addr as wr id */
2361*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2362*7c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
2363*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
2364*7c478bd9Sstevel@tonic-gate 	}
2365*7c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
2366*7c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
2367*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2368*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2369*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_svc_post: QP %p failed in "
2370*7c478bd9Sstevel@tonic-gate 		    "ibt_post_recv(), status=%d",
2371*7c478bd9Sstevel@tonic-gate 		    (void *)qp, ibt_status);
2372*7c478bd9Sstevel@tonic-gate #endif
2373*7c478bd9Sstevel@tonic-gate 		rib_rbuf_free(conn, RECV_BUFFER, (caddr_t)sgl[0].ds_va);
2374*7c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
2375*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2376*7c478bd9Sstevel@tonic-gate 	}
2377*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
2378*7c478bd9Sstevel@tonic-gate 
2379*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
2380*7c478bd9Sstevel@tonic-gate }
2381*7c478bd9Sstevel@tonic-gate 
2382*7c478bd9Sstevel@tonic-gate /* Client */
2383*7c478bd9Sstevel@tonic-gate rdma_stat
2384*7c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
2385*7c478bd9Sstevel@tonic-gate {
2386*7c478bd9Sstevel@tonic-gate 
2387*7c478bd9Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
2388*7c478bd9Sstevel@tonic-gate }
2389*7c478bd9Sstevel@tonic-gate 
2390*7c478bd9Sstevel@tonic-gate /* Server */
2391*7c478bd9Sstevel@tonic-gate rdma_stat
2392*7c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
2393*7c478bd9Sstevel@tonic-gate {
2394*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2395*7c478bd9Sstevel@tonic-gate 
2396*7c478bd9Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
2397*7c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
2398*7c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs++;
2399*7c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
2400*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
2401*7c478bd9Sstevel@tonic-gate 	}
2402*7c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
2403*7c478bd9Sstevel@tonic-gate }
2404*7c478bd9Sstevel@tonic-gate 
2405*7c478bd9Sstevel@tonic-gate /*
2406*7c478bd9Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
2407*7c478bd9Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
2408*7c478bd9Sstevel@tonic-gate  */
2409*7c478bd9Sstevel@tonic-gate rdma_stat
2410*7c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
2411*7c478bd9Sstevel@tonic-gate {
2412*7c478bd9Sstevel@tonic-gate 	struct reply *rep = NULL;
2413*7c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
2414*7c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
2415*7c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
2416*7c478bd9Sstevel@tonic-gate 
2417*7c478bd9Sstevel@tonic-gate 	/*
2418*7c478bd9Sstevel@tonic-gate 	 * Find the reply structure for this msgid
2419*7c478bd9Sstevel@tonic-gate 	 */
2420*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
2421*7c478bd9Sstevel@tonic-gate 
2422*7c478bd9Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
2423*7c478bd9Sstevel@tonic-gate 	    if (rep->xid == msgid)
2424*7c478bd9Sstevel@tonic-gate 		break;
2425*7c478bd9Sstevel@tonic-gate 	}
2426*7c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
2427*7c478bd9Sstevel@tonic-gate 		/*
2428*7c478bd9Sstevel@tonic-gate 		 * If message not yet received, wait.
2429*7c478bd9Sstevel@tonic-gate 		 */
2430*7c478bd9Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
2431*7c478bd9Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
2432*7c478bd9Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
2433*7c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
2434*7c478bd9Sstevel@tonic-gate 				    &qp->replylist_lock, timout)) > 0 &&
2435*7c478bd9Sstevel@tonic-gate 			    rep->status == (uint_t)REPLY_WAIT);
2436*7c478bd9Sstevel@tonic-gate 
2437*7c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
2438*7c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
2439*7c478bd9Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
2440*7c478bd9Sstevel@tonic-gate 				break;
2441*7c478bd9Sstevel@tonic-gate 			case 0:
2442*7c478bd9Sstevel@tonic-gate 				ret = RDMA_INTR;
2443*7c478bd9Sstevel@tonic-gate 				break;
2444*7c478bd9Sstevel@tonic-gate 			default:
2445*7c478bd9Sstevel@tonic-gate 				break;
2446*7c478bd9Sstevel@tonic-gate 			}
2447*7c478bd9Sstevel@tonic-gate 		}
2448*7c478bd9Sstevel@tonic-gate 
2449*7c478bd9Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
2450*7c478bd9Sstevel@tonic-gate 			struct clist *cl = NULL;
2451*7c478bd9Sstevel@tonic-gate 
2452*7c478bd9Sstevel@tonic-gate 			/*
2453*7c478bd9Sstevel@tonic-gate 			 * Got message successfully
2454*7c478bd9Sstevel@tonic-gate 			 */
2455*7c478bd9Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
2456*7c478bd9Sstevel@tonic-gate 			    (caddr_t)rep->vaddr_cq, NULL, NULL);
2457*7c478bd9Sstevel@tonic-gate 			*clp = cl;
2458*7c478bd9Sstevel@tonic-gate 		} else {
2459*7c478bd9Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
2460*7c478bd9Sstevel@tonic-gate 				/*
2461*7c478bd9Sstevel@tonic-gate 				 * Got error in reply message. Free
2462*7c478bd9Sstevel@tonic-gate 				 * recv buffer here.
2463*7c478bd9Sstevel@tonic-gate 				 */
2464*7c478bd9Sstevel@tonic-gate 				ret = rep->status;
2465*7c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
2466*7c478bd9Sstevel@tonic-gate 					(caddr_t)rep->vaddr_cq);
2467*7c478bd9Sstevel@tonic-gate 			}
2468*7c478bd9Sstevel@tonic-gate 		}
2469*7c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
2470*7c478bd9Sstevel@tonic-gate 	} else {
2471*7c478bd9Sstevel@tonic-gate 		/*
2472*7c478bd9Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
2473*7c478bd9Sstevel@tonic-gate 		 * reply wait list.
2474*7c478bd9Sstevel@tonic-gate 		 */
2475*7c478bd9Sstevel@tonic-gate 		ret = RDMA_INVAL;
2476*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2477*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_recv: no matching reply for "
2478*7c478bd9Sstevel@tonic-gate 		    "xid %u, qp %p\n", msgid, (void *)qp);
2479*7c478bd9Sstevel@tonic-gate #endif
2480*7c478bd9Sstevel@tonic-gate 	}
2481*7c478bd9Sstevel@tonic-gate 
2482*7c478bd9Sstevel@tonic-gate 	/*
2483*7c478bd9Sstevel@tonic-gate 	 * Done.
2484*7c478bd9Sstevel@tonic-gate 	 */
2485*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
2486*7c478bd9Sstevel@tonic-gate 	return (ret);
2487*7c478bd9Sstevel@tonic-gate }
2488*7c478bd9Sstevel@tonic-gate 
2489*7c478bd9Sstevel@tonic-gate /*
2490*7c478bd9Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
2491*7c478bd9Sstevel@tonic-gate  */
2492*7c478bd9Sstevel@tonic-gate rdma_stat
2493*7c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
2494*7c478bd9Sstevel@tonic-gate {
2495*7c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
2496*7c478bd9Sstevel@tonic-gate 	int		nds;
2497*7c478bd9Sstevel@tonic-gate 	int		cv_sig;
2498*7c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
2499*7c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
2500*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
2501*7c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
2502*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2503*7c478bd9Sstevel@tonic-gate 
2504*7c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
2505*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_write: NULL clist\n");
2506*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2507*7c478bd9Sstevel@tonic-gate 	}
2508*7c478bd9Sstevel@tonic-gate 
2509*7c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
2510*7c478bd9Sstevel@tonic-gate 	/*
2511*7c478bd9Sstevel@tonic-gate 	 * Remote address is at the head chunk item in list.
2512*7c478bd9Sstevel@tonic-gate 	 */
2513*7c478bd9Sstevel@tonic-gate 	tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
2514*7c478bd9Sstevel@tonic-gate 	tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
2515*7c478bd9Sstevel@tonic-gate 
2516*7c478bd9Sstevel@tonic-gate 	nds = 0;
2517*7c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
2518*7c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2519*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_write: DSEG_MAX too small!");
2520*7c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
2521*7c478bd9Sstevel@tonic-gate 		}
2522*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
2523*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
2524*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
2525*7c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
2526*7c478bd9Sstevel@tonic-gate 		nds++;
2527*7c478bd9Sstevel@tonic-gate 	}
2528*7c478bd9Sstevel@tonic-gate 
2529*7c478bd9Sstevel@tonic-gate 	if (wait) {
2530*7c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2531*7c478bd9Sstevel@tonic-gate 		cv_sig = 1;
2532*7c478bd9Sstevel@tonic-gate 	} else {
2533*7c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
2534*7c478bd9Sstevel@tonic-gate 		cv_sig = 0;
2535*7c478bd9Sstevel@tonic-gate 	}
2536*7c478bd9Sstevel@tonic-gate 
2537*7c478bd9Sstevel@tonic-gate 	wdesc = rib_init_sendwait(0, cv_sig, qp);
2538*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_id = (ibt_wrid_t)wdesc;
2539*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_RDMAW;
2540*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
2541*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
2542*7c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
2543*7c478bd9Sstevel@tonic-gate 
2544*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2545*7c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
2546*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
2547*7c478bd9Sstevel@tonic-gate 	}
2548*7c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
2549*7c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
2550*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2551*7c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
2552*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2553*7c478bd9Sstevel@tonic-gate 	}
2554*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
2555*7c478bd9Sstevel@tonic-gate 
2556*7c478bd9Sstevel@tonic-gate 	/*
2557*7c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
2558*7c478bd9Sstevel@tonic-gate 	 */
2559*7c478bd9Sstevel@tonic-gate 	if (wait) {
2560*7c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
2561*7c478bd9Sstevel@tonic-gate 		if (ret != 0) {
2562*7c478bd9Sstevel@tonic-gate 			return (ret);
2563*7c478bd9Sstevel@tonic-gate 		}
2564*7c478bd9Sstevel@tonic-gate 	}
2565*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
2566*7c478bd9Sstevel@tonic-gate }
2567*7c478bd9Sstevel@tonic-gate 
2568*7c478bd9Sstevel@tonic-gate /*
2569*7c478bd9Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
2570*7c478bd9Sstevel@tonic-gate  */
2571*7c478bd9Sstevel@tonic-gate rdma_stat
2572*7c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
2573*7c478bd9Sstevel@tonic-gate {
2574*7c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
2575*7c478bd9Sstevel@tonic-gate 	int		nds;
2576*7c478bd9Sstevel@tonic-gate 	int		cv_sig;
2577*7c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];	/* is 2 sufficient? */
2578*7c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
2579*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
2580*7c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
2581*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
2582*7c478bd9Sstevel@tonic-gate 
2583*7c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
2584*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_read: NULL clist\n");
2585*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2586*7c478bd9Sstevel@tonic-gate 	}
2587*7c478bd9Sstevel@tonic-gate 
2588*7c478bd9Sstevel@tonic-gate 	bzero(&rx_wr, sizeof (ibt_send_wr_t));
2589*7c478bd9Sstevel@tonic-gate 	/*
2590*7c478bd9Sstevel@tonic-gate 	 * Remote address is at the head chunk item in list.
2591*7c478bd9Sstevel@tonic-gate 	 */
2592*7c478bd9Sstevel@tonic-gate 	rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_saddr;
2593*7c478bd9Sstevel@tonic-gate 	rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr; /* rkey */
2594*7c478bd9Sstevel@tonic-gate 
2595*7c478bd9Sstevel@tonic-gate 	nds = 0;
2596*7c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
2597*7c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
2598*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_read: DSEG_MAX too small!");
2599*7c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
2600*7c478bd9Sstevel@tonic-gate 		}
2601*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_daddr;
2602*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
2603*7c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
2604*7c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
2605*7c478bd9Sstevel@tonic-gate 		nds++;
2606*7c478bd9Sstevel@tonic-gate 	}
2607*7c478bd9Sstevel@tonic-gate 
2608*7c478bd9Sstevel@tonic-gate 	if (wait) {
2609*7c478bd9Sstevel@tonic-gate 		rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
2610*7c478bd9Sstevel@tonic-gate 		cv_sig = 1;
2611*7c478bd9Sstevel@tonic-gate 	} else {
2612*7c478bd9Sstevel@tonic-gate 		rx_wr.wr_flags = IBT_WR_NO_FLAGS;
2613*7c478bd9Sstevel@tonic-gate 		cv_sig = 0;
2614*7c478bd9Sstevel@tonic-gate 	}
2615*7c478bd9Sstevel@tonic-gate 
2616*7c478bd9Sstevel@tonic-gate 	wdesc = rib_init_sendwait(0, cv_sig, qp);
2617*7c478bd9Sstevel@tonic-gate 	rx_wr.wr_id = (ibt_wrid_t)wdesc;
2618*7c478bd9Sstevel@tonic-gate 	rx_wr.wr_opcode = IBT_WRC_RDMAR;
2619*7c478bd9Sstevel@tonic-gate 	rx_wr.wr_trans = IBT_RC_SRV;
2620*7c478bd9Sstevel@tonic-gate 	rx_wr.wr_nds = nds;
2621*7c478bd9Sstevel@tonic-gate 	rx_wr.wr_sgl = sgl;
2622*7c478bd9Sstevel@tonic-gate 
2623*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
2624*7c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
2625*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
2626*7c478bd9Sstevel@tonic-gate 	}
2627*7c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
2628*7c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
2629*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2630*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2631*7c478bd9Sstevel@tonic-gate 		if (rib_debug && ibt_status != IBT_SUCCESS)
2632*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_read: FAILED post_sending RDMAR"
2633*7c478bd9Sstevel@tonic-gate 				" wr_id %llx on qp %p, status=%d",
2634*7c478bd9Sstevel@tonic-gate 				(longlong_t)rx_wr.wr_id, (void *)qp,
2635*7c478bd9Sstevel@tonic-gate 				ibt_status);
2636*7c478bd9Sstevel@tonic-gate #endif
2637*7c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
2638*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
2639*7c478bd9Sstevel@tonic-gate 	}
2640*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
2641*7c478bd9Sstevel@tonic-gate 
2642*7c478bd9Sstevel@tonic-gate 	/*
2643*7c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
2644*7c478bd9Sstevel@tonic-gate 	 */
2645*7c478bd9Sstevel@tonic-gate 	if (wait) {
2646*7c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
2647*7c478bd9Sstevel@tonic-gate 		if (ret != 0) {
2648*7c478bd9Sstevel@tonic-gate 			return (ret);
2649*7c478bd9Sstevel@tonic-gate 		}
2650*7c478bd9Sstevel@tonic-gate 	}
2651*7c478bd9Sstevel@tonic-gate 
2652*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
2653*7c478bd9Sstevel@tonic-gate }
2654*7c478bd9Sstevel@tonic-gate 
2655*7c478bd9Sstevel@tonic-gate int
2656*7c478bd9Sstevel@tonic-gate is_for_ipv4(ibt_ar_t *result)
2657*7c478bd9Sstevel@tonic-gate {
2658*7c478bd9Sstevel@tonic-gate 	int	i, size = sizeof (struct in_addr);
2659*7c478bd9Sstevel@tonic-gate 	uint8_t	zero = 0;
2660*7c478bd9Sstevel@tonic-gate 
2661*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < (ATS_AR_DATA_LEN - size); i++)
2662*7c478bd9Sstevel@tonic-gate 		zero |= result->ar_data[i];
2663*7c478bd9Sstevel@tonic-gate 	return (zero == 0);
2664*7c478bd9Sstevel@tonic-gate }
2665*7c478bd9Sstevel@tonic-gate 
2666*7c478bd9Sstevel@tonic-gate /*
2667*7c478bd9Sstevel@tonic-gate  * rib_srv_cm_handler()
2668*7c478bd9Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
2669*7c478bd9Sstevel@tonic-gate  */
2670*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
2671*7c478bd9Sstevel@tonic-gate static ibt_cm_status_t
2672*7c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
2673*7c478bd9Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
2674*7c478bd9Sstevel@tonic-gate 	ibt_priv_data_len_t len)
2675*7c478bd9Sstevel@tonic-gate {
2676*7c478bd9Sstevel@tonic-gate 	queue_t		*q;
2677*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
2678*7c478bd9Sstevel@tonic-gate 	rpcib_state_t	*ribstat;
2679*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
2680*7c478bd9Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
2681*7c478bd9Sstevel@tonic-gate 	int		i;
2682*7c478bd9Sstevel@tonic-gate 	struct clist	cl;
2683*7c478bd9Sstevel@tonic-gate 	rdma_buf_t	rdbuf;
2684*7c478bd9Sstevel@tonic-gate 	void		*buf = NULL;
2685*7c478bd9Sstevel@tonic-gate 	ibt_cm_req_rcv_t	cm_req_rcv;
2686*7c478bd9Sstevel@tonic-gate 	CONN		*conn;
2687*7c478bd9Sstevel@tonic-gate 	ibt_status_t ibt_status;
2688*7c478bd9Sstevel@tonic-gate 	ibt_ar_t	ar_query, ar_result;
2689*7c478bd9Sstevel@tonic-gate 	ib_gid_t	sgid;
2690*7c478bd9Sstevel@tonic-gate 
2691*7c478bd9Sstevel@tonic-gate 
2692*7c478bd9Sstevel@tonic-gate 	ASSERT(any != NULL);
2693*7c478bd9Sstevel@tonic-gate 	ASSERT(event != NULL);
2694*7c478bd9Sstevel@tonic-gate 
2695*7c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)any;
2696*7c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
2697*7c478bd9Sstevel@tonic-gate 	ASSERT(hca != NULL);
2698*7c478bd9Sstevel@tonic-gate 
2699*7c478bd9Sstevel@tonic-gate 	/* got a connection request */
2700*7c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
2701*7c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
2702*7c478bd9Sstevel@tonic-gate 		/*
2703*7c478bd9Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
2704*7c478bd9Sstevel@tonic-gate 		 */
2705*7c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
2706*7c478bd9Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
2707*7c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
2708*7c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
2709*7c478bd9Sstevel@tonic-gate 		}
2710*7c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
2711*7c478bd9Sstevel@tonic-gate 
2712*7c478bd9Sstevel@tonic-gate 		/*
2713*7c478bd9Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
2714*7c478bd9Sstevel@tonic-gate 		 * timeout on us.
2715*7c478bd9Sstevel@tonic-gate 		 */
2716*7c478bd9Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
2717*7c478bd9Sstevel@tonic-gate 			    event->cm_event.req.req_timeout * 8, NULL, 0);
2718*7c478bd9Sstevel@tonic-gate 
2719*7c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
2720*7c478bd9Sstevel@tonic-gate 		q = rib_stat->q;
2721*7c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
2722*7c478bd9Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
2723*7c478bd9Sstevel@tonic-gate 			event->cm_event.req.req_prim_hca_port, &qp);
2724*7c478bd9Sstevel@tonic-gate 		if (status) {
2725*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2726*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_srv_cm_handler: "
2727*7c478bd9Sstevel@tonic-gate 			    "create_channel failed %d", status);
2728*7c478bd9Sstevel@tonic-gate #endif
2729*7c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
2730*7c478bd9Sstevel@tonic-gate 		}
2731*7c478bd9Sstevel@tonic-gate 		cm_req_rcv = event->cm_event.req;
2732*7c478bd9Sstevel@tonic-gate 
2733*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2734*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
2735*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2736*7c478bd9Sstevel@tonic-gate 			"server recv'ed IBT_CM_EVENT_REQ_RCV\n");
2737*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t SID:%llx\n",
2738*7c478bd9Sstevel@tonic-gate 				(longlong_t)cm_req_rcv.req_service_id);
2739*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local Port:%d\n",
2740*7c478bd9Sstevel@tonic-gate 				cm_req_rcv.req_prim_hca_port);
2741*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE,
2742*7c478bd9Sstevel@tonic-gate 			"\t\t Remote GID:(prefix:%llx,guid:%llx)\n",
2743*7c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_prefix,
2744*7c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_guid);
2745*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local GID:(prefix:%llx,guid:%llx)\n",
2746*7c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_prefix,
2747*7c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_guid);
2748*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Remote QPN:%u\n",
2749*7c478bd9Sstevel@tonic-gate 			cm_req_rcv.req_remote_qpn);
2750*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Remote Q_Key:%x\n",
2751*7c478bd9Sstevel@tonic-gate 			cm_req_rcv.req_remote_qkey);
2752*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local QP %p (qp_hdl=%p)\n",
2753*7c478bd9Sstevel@tonic-gate 			(void *)qp, (void *)qp->qp_hdl);
2754*7c478bd9Sstevel@tonic-gate 		}
2755*7c478bd9Sstevel@tonic-gate 
2756*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
2757*7c478bd9Sstevel@tonic-gate 		    ibt_rc_chan_query_attr_t	chan_attrs;
2758*7c478bd9Sstevel@tonic-gate 
2759*7c478bd9Sstevel@tonic-gate 		    if (ibt_query_rc_channel(qp->qp_hdl, &chan_attrs)
2760*7c478bd9Sstevel@tonic-gate 			== IBT_SUCCESS) {
2761*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_svc_cm_handler: qp %p in "
2762*7c478bd9Sstevel@tonic-gate 			    "CEP state %d\n", (void *)qp, chan_attrs.rc_state);
2763*7c478bd9Sstevel@tonic-gate 		    }
2764*7c478bd9Sstevel@tonic-gate 		}
2765*7c478bd9Sstevel@tonic-gate #endif
2766*7c478bd9Sstevel@tonic-gate 
2767*7c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
2768*7c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rdma_ra_out = 1;
2769*7c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rdma_ra_in = 1;
2770*7c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
2771*7c478bd9Sstevel@tonic-gate 
2772*7c478bd9Sstevel@tonic-gate 		/*
2773*7c478bd9Sstevel@tonic-gate 		 * Pre-posts RECV buffers
2774*7c478bd9Sstevel@tonic-gate 		 */
2775*7c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
2776*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
2777*7c478bd9Sstevel@tonic-gate 		    bzero(&rdbuf, sizeof (rdbuf));
2778*7c478bd9Sstevel@tonic-gate 		    rdbuf.type = RECV_BUFFER;
2779*7c478bd9Sstevel@tonic-gate 		    buf = rib_rbuf_alloc(conn, &rdbuf);
2780*7c478bd9Sstevel@tonic-gate 		    if (buf == NULL) {
2781*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_svc_cm_handler: "
2782*7c478bd9Sstevel@tonic-gate 			    "No RECV_BUFFER buf!\n");
2783*7c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, NULL);
2784*7c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
2785*7c478bd9Sstevel@tonic-gate 		    }
2786*7c478bd9Sstevel@tonic-gate 
2787*7c478bd9Sstevel@tonic-gate 		    bzero(&cl, sizeof (cl));
2788*7c478bd9Sstevel@tonic-gate 		    cl.c_saddr = (uint64)rdbuf.addr;
2789*7c478bd9Sstevel@tonic-gate 		    cl.c_len = rdbuf.len;
2790*7c478bd9Sstevel@tonic-gate 		    cl.c_smemhandle.mrc_lmr = rdbuf.handle.mrc_lmr; /* lkey */
2791*7c478bd9Sstevel@tonic-gate 		    cl.c_next = NULL;
2792*7c478bd9Sstevel@tonic-gate 		    status = rib_post_recv(conn, &cl);
2793*7c478bd9Sstevel@tonic-gate 		    if (status != RDMA_SUCCESS) {
2794*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_srv_cm_handler: failed "
2795*7c478bd9Sstevel@tonic-gate 			    "posting RPC_REQ buf to qp %p!", (void *)qp);
2796*7c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, NULL);
2797*7c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
2798*7c478bd9Sstevel@tonic-gate 		    }
2799*7c478bd9Sstevel@tonic-gate 		}
2800*7c478bd9Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
2801*7c478bd9Sstevel@tonic-gate 
2802*7c478bd9Sstevel@tonic-gate 		/*
2803*7c478bd9Sstevel@tonic-gate 		 * Get the address translation service record from ATS
2804*7c478bd9Sstevel@tonic-gate 		 */
2805*7c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
2806*7c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
2807*7c478bd9Sstevel@tonic-gate 		    rw_exit(&hca->state_lock);
2808*7c478bd9Sstevel@tonic-gate 		    return (IBT_CM_REJECT);
2809*7c478bd9Sstevel@tonic-gate 		}
2810*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
2811*7c478bd9Sstevel@tonic-gate 
2812*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < hca->hca_nports; i++) {
2813*7c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_get_port_state(hca->hca_hdl, i+1,
2814*7c478bd9Sstevel@tonic-gate 					&sgid, NULL);
2815*7c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
2816*7c478bd9Sstevel@tonic-gate 			if (rib_debug) {
2817*7c478bd9Sstevel@tonic-gate 			    cmn_err(CE_WARN, "rib_srv_cm_handler: "
2818*7c478bd9Sstevel@tonic-gate 				"ibt_get_port_state FAILED!"
2819*7c478bd9Sstevel@tonic-gate 				"status = %d\n", ibt_status);
2820*7c478bd9Sstevel@tonic-gate 			}
2821*7c478bd9Sstevel@tonic-gate 		    } else {
2822*7c478bd9Sstevel@tonic-gate 			/*
2823*7c478bd9Sstevel@tonic-gate 			 * do ibt_query_ar()
2824*7c478bd9Sstevel@tonic-gate 			 */
2825*7c478bd9Sstevel@tonic-gate 			bzero(&ar_query, sizeof (ar_query));
2826*7c478bd9Sstevel@tonic-gate 			bzero(&ar_result, sizeof (ar_result));
2827*7c478bd9Sstevel@tonic-gate 			ar_query.ar_gid = cm_req_rcv.req_prim_addr.av_dgid;
2828*7c478bd9Sstevel@tonic-gate 			ar_query.ar_pkey = event->cm_event.req.req_pkey;
2829*7c478bd9Sstevel@tonic-gate 			ibt_status = ibt_query_ar(&sgid, &ar_query,
2830*7c478bd9Sstevel@tonic-gate 							&ar_result);
2831*7c478bd9Sstevel@tonic-gate 			if (ibt_status != IBT_SUCCESS) {
2832*7c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
2833*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_srv_cm_handler: "
2834*7c478bd9Sstevel@tonic-gate 				    "ibt_query_ar FAILED!"
2835*7c478bd9Sstevel@tonic-gate 				    "status = %d\n", ibt_status);
2836*7c478bd9Sstevel@tonic-gate 			    }
2837*7c478bd9Sstevel@tonic-gate 			} else {
2838*7c478bd9Sstevel@tonic-gate 			    conn = qptoc(qp);
2839*7c478bd9Sstevel@tonic-gate 
2840*7c478bd9Sstevel@tonic-gate 			    if (is_for_ipv4(&ar_result)) {
2841*7c478bd9Sstevel@tonic-gate 				struct sockaddr_in *s;
2842*7c478bd9Sstevel@tonic-gate 				int sin_size = sizeof (struct sockaddr_in);
2843*7c478bd9Sstevel@tonic-gate 				int in_size = sizeof (struct in_addr);
2844*7c478bd9Sstevel@tonic-gate 				uint8_t	*start_pos;
2845*7c478bd9Sstevel@tonic-gate 
2846*7c478bd9Sstevel@tonic-gate 				conn->c_raddr.maxlen =
2847*7c478bd9Sstevel@tonic-gate 					conn->c_raddr.len = sin_size;
2848*7c478bd9Sstevel@tonic-gate 				conn->c_raddr.buf = kmem_zalloc(sin_size,
2849*7c478bd9Sstevel@tonic-gate 						KM_SLEEP);
2850*7c478bd9Sstevel@tonic-gate 				s = (struct sockaddr_in *)conn->c_raddr.buf;
2851*7c478bd9Sstevel@tonic-gate 				s->sin_family = AF_INET;
2852*7c478bd9Sstevel@tonic-gate 				/*
2853*7c478bd9Sstevel@tonic-gate 				 * For IPv4,  the IP addr is stored in
2854*7c478bd9Sstevel@tonic-gate 				 * the last four bytes of ar_data.
2855*7c478bd9Sstevel@tonic-gate 				 */
2856*7c478bd9Sstevel@tonic-gate 				start_pos = ar_result.ar_data +
2857*7c478bd9Sstevel@tonic-gate 					ATS_AR_DATA_LEN - in_size;
2858*7c478bd9Sstevel@tonic-gate 				bcopy(start_pos, &s->sin_addr, in_size);
2859*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 1) {
2860*7c478bd9Sstevel@tonic-gate 				    char print_addr[INET_ADDRSTRLEN];
2861*7c478bd9Sstevel@tonic-gate 
2862*7c478bd9Sstevel@tonic-gate 				    bzero(print_addr, INET_ADDRSTRLEN);
2863*7c478bd9Sstevel@tonic-gate 				    (void) inet_ntop(AF_INET, &s->sin_addr,
2864*7c478bd9Sstevel@tonic-gate 						print_addr, INET_ADDRSTRLEN);
2865*7c478bd9Sstevel@tonic-gate 				    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2866*7c478bd9Sstevel@tonic-gate 					"remote clnt_addr: %s\n", print_addr);
2867*7c478bd9Sstevel@tonic-gate 				}
2868*7c478bd9Sstevel@tonic-gate 			    } else {
2869*7c478bd9Sstevel@tonic-gate 				struct sockaddr_in6 *s6;
2870*7c478bd9Sstevel@tonic-gate 				int sin6_size = sizeof (struct sockaddr_in6);
2871*7c478bd9Sstevel@tonic-gate 
2872*7c478bd9Sstevel@tonic-gate 				conn->c_raddr.maxlen =
2873*7c478bd9Sstevel@tonic-gate 					conn->c_raddr.len = sin6_size;
2874*7c478bd9Sstevel@tonic-gate 				conn->c_raddr.buf = kmem_zalloc(sin6_size,
2875*7c478bd9Sstevel@tonic-gate 					KM_SLEEP);
2876*7c478bd9Sstevel@tonic-gate 
2877*7c478bd9Sstevel@tonic-gate 				s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
2878*7c478bd9Sstevel@tonic-gate 				s6->sin6_family = AF_INET6;
2879*7c478bd9Sstevel@tonic-gate 				/* sin6_addr is stored in ar_data */
2880*7c478bd9Sstevel@tonic-gate 				bcopy(ar_result.ar_data, &s6->sin6_addr,
2881*7c478bd9Sstevel@tonic-gate 					sizeof (struct in6_addr));
2882*7c478bd9Sstevel@tonic-gate 				if (rib_debug > 1) {
2883*7c478bd9Sstevel@tonic-gate 				    char print_addr[INET6_ADDRSTRLEN];
2884*7c478bd9Sstevel@tonic-gate 
2885*7c478bd9Sstevel@tonic-gate 				    bzero(print_addr, INET6_ADDRSTRLEN);
2886*7c478bd9Sstevel@tonic-gate 				    (void) inet_ntop(AF_INET6, &s6->sin6_addr,
2887*7c478bd9Sstevel@tonic-gate 						print_addr, INET6_ADDRSTRLEN);
2888*7c478bd9Sstevel@tonic-gate 				    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2889*7c478bd9Sstevel@tonic-gate 					"remote clnt_addr: %s\n", print_addr);
2890*7c478bd9Sstevel@tonic-gate 				}
2891*7c478bd9Sstevel@tonic-gate 			    }
2892*7c478bd9Sstevel@tonic-gate 			    return (IBT_CM_ACCEPT);
2893*7c478bd9Sstevel@tonic-gate 			}
2894*7c478bd9Sstevel@tonic-gate 		    }
2895*7c478bd9Sstevel@tonic-gate 		}
2896*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 1) {
2897*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_srv_cm_handler: "
2898*7c478bd9Sstevel@tonic-gate 				"address record query failed!");
2899*7c478bd9Sstevel@tonic-gate 		}
2900*7c478bd9Sstevel@tonic-gate 		break;
2901*7c478bd9Sstevel@tonic-gate 
2902*7c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
2903*7c478bd9Sstevel@tonic-gate 	{
2904*7c478bd9Sstevel@tonic-gate 		CONN		*conn;
2905*7c478bd9Sstevel@tonic-gate 		rib_qp_t	*qp;
2906*7c478bd9Sstevel@tonic-gate 
2907*7c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
2908*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
2909*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
2910*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
2911*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
2912*7c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
2913*7c478bd9Sstevel@tonic-gate 			/*
2914*7c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
2915*7c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
2916*7c478bd9Sstevel@tonic-gate 			 */
2917*7c478bd9Sstevel@tonic-gate 			break;
2918*7c478bd9Sstevel@tonic-gate 		default:
2919*7c478bd9Sstevel@tonic-gate 			/*
2920*7c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
2921*7c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
2922*7c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
2923*7c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
2924*7c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
2925*7c478bd9Sstevel@tonic-gate 			 * state
2926*7c478bd9Sstevel@tonic-gate 			 */
2927*7c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
2928*7c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
2929*7c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
2930*7c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
2931*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
2932*7c478bd9Sstevel@tonic-gate 				break;
2933*7c478bd9Sstevel@tonic-gate 			}
2934*7c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
2935*7c478bd9Sstevel@tonic-gate 
2936*7c478bd9Sstevel@tonic-gate 			/*
2937*7c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
2938*7c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
2939*7c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
2940*7c478bd9Sstevel@tonic-gate 			 */
2941*7c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
2942*7c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
2943*7c478bd9Sstevel@tonic-gate 
2944*7c478bd9Sstevel@tonic-gate 			/*
2945*7c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
2946*7c478bd9Sstevel@tonic-gate 			 */
2947*7c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
2948*7c478bd9Sstevel@tonic-gate 				/*
2949*7c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
2950*7c478bd9Sstevel@tonic-gate 				 */
2951*7c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
2952*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
2953*7c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
2954*7c478bd9Sstevel@tonic-gate 					&hca->srv_conn_list);
2955*7c478bd9Sstevel@tonic-gate 			} else {
2956*7c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
2957*7c478bd9Sstevel@tonic-gate 			}
2958*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2959*7c478bd9Sstevel@tonic-gate 			if (rib_debug)
2960*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2961*7c478bd9Sstevel@tonic-gate 					" (CONN_CLOSED) channel disconnected");
2962*7c478bd9Sstevel@tonic-gate #endif
2963*7c478bd9Sstevel@tonic-gate 			break;
2964*7c478bd9Sstevel@tonic-gate 		}
2965*7c478bd9Sstevel@tonic-gate 		break;
2966*7c478bd9Sstevel@tonic-gate 	}
2967*7c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
2968*7c478bd9Sstevel@tonic-gate 	/*
2969*7c478bd9Sstevel@tonic-gate 	 * RTU received, hence connection established.
2970*7c478bd9Sstevel@tonic-gate 	 */
2971*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
2972*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2973*7c478bd9Sstevel@tonic-gate 				"(CONN_EST) channel established");
2974*7c478bd9Sstevel@tonic-gate 		break;
2975*7c478bd9Sstevel@tonic-gate 
2976*7c478bd9Sstevel@tonic-gate 	default:
2977*7c478bd9Sstevel@tonic-gate 	    if (rib_debug > 2) {
2978*7c478bd9Sstevel@tonic-gate 		/* Let CM handle the following events. */
2979*7c478bd9Sstevel@tonic-gate 		if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
2980*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2981*7c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
2982*7c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
2983*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2984*7c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
2985*7c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
2986*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2987*7c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
2988*7c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
2989*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2990*7c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
2991*7c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
2992*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
2993*7c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_FAILURE\n");
2994*7c478bd9Sstevel@tonic-gate 		}
2995*7c478bd9Sstevel@tonic-gate 	    }
2996*7c478bd9Sstevel@tonic-gate 	    return (IBT_CM_REJECT);
2997*7c478bd9Sstevel@tonic-gate 	}
2998*7c478bd9Sstevel@tonic-gate 
2999*7c478bd9Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
3000*7c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
3001*7c478bd9Sstevel@tonic-gate }
3002*7c478bd9Sstevel@tonic-gate 
3003*7c478bd9Sstevel@tonic-gate static rdma_stat
3004*7c478bd9Sstevel@tonic-gate rib_register_ats(rib_hca_t *hca)
3005*7c478bd9Sstevel@tonic-gate {
3006*7c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
3007*7c478bd9Sstevel@tonic-gate 	uint_t			port_size;
3008*7c478bd9Sstevel@tonic-gate 	uint_t			pki, i, num_ports, nbinds;
3009*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
3010*7c478bd9Sstevel@tonic-gate 	rib_service_t		*new_service, *temp_srv;
3011*7c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
3012*7c478bd9Sstevel@tonic-gate 	rpcib_ibd_insts_t	ibds;
3013*7c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
3014*7c478bd9Sstevel@tonic-gate 	ibt_ar_t		ar;	/* address record */
3015*7c478bd9Sstevel@tonic-gate 
3016*7c478bd9Sstevel@tonic-gate 	/*
3017*7c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
3018*7c478bd9Sstevel@tonic-gate 	 */
3019*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3020*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
3021*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
3022*7c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
3023*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3024*7c478bd9Sstevel@tonic-gate 	} else {
3025*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3026*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3027*7c478bd9Sstevel@tonic-gate 	}
3028*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
3029*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3030*7c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
3031*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_ats: FAILED in "
3032*7c478bd9Sstevel@tonic-gate 		    "ibt_query_hca_ports, status = %d\n", ibt_status);
3033*7c478bd9Sstevel@tonic-gate 	    }
3034*7c478bd9Sstevel@tonic-gate #endif
3035*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3036*7c478bd9Sstevel@tonic-gate 	}
3037*7c478bd9Sstevel@tonic-gate 
3038*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3039*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3040*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_ats: Ports detected "
3041*7c478bd9Sstevel@tonic-gate 		    "%d\n", num_ports);
3042*7c478bd9Sstevel@tonic-gate 
3043*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < num_ports; i++) {
3044*7c478bd9Sstevel@tonic-gate 			if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
3045*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_ats "
3046*7c478bd9Sstevel@tonic-gate 				    "Port #: %d INACTIVE\n", i+1);
3047*7c478bd9Sstevel@tonic-gate 			} else if (port_infop[i].p_linkstate ==
3048*7c478bd9Sstevel@tonic-gate 			    IBT_PORT_ACTIVE) {
3049*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_ats "
3050*7c478bd9Sstevel@tonic-gate 				    "Port #: %d ACTIVE\n", i+1);
3051*7c478bd9Sstevel@tonic-gate 			}
3052*7c478bd9Sstevel@tonic-gate 		}
3053*7c478bd9Sstevel@tonic-gate 	}
3054*7c478bd9Sstevel@tonic-gate #endif
3055*7c478bd9Sstevel@tonic-gate 
3056*7c478bd9Sstevel@tonic-gate 	ibds.rib_ibd_alloc = N_IBD_INSTANCES;
3057*7c478bd9Sstevel@tonic-gate 	ibds.rib_ibd_cnt = 0;
3058*7c478bd9Sstevel@tonic-gate 	ibds.rib_ats = (rpcib_ats_t *)kmem_zalloc(ibds.rib_ibd_alloc *
3059*7c478bd9Sstevel@tonic-gate 			sizeof (rpcib_ats_t), KM_SLEEP);
3060*7c478bd9Sstevel@tonic-gate 	rib_get_ibd_insts(&ibds);
3061*7c478bd9Sstevel@tonic-gate 
3062*7c478bd9Sstevel@tonic-gate 	if (ibds.rib_ibd_cnt == 0) {
3063*7c478bd9Sstevel@tonic-gate 	    kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc *
3064*7c478bd9Sstevel@tonic-gate 				sizeof (rpcib_ats_t));
3065*7c478bd9Sstevel@tonic-gate 	    ibt_free_portinfo(port_infop, port_size);
3066*7c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
3067*7c478bd9Sstevel@tonic-gate 	}
3068*7c478bd9Sstevel@tonic-gate 
3069*7c478bd9Sstevel@tonic-gate 	/*
3070*7c478bd9Sstevel@tonic-gate 	 * Get the IP addresses of active ports and
3071*7c478bd9Sstevel@tonic-gate 	 * register them with ATS.  IPv4 addresses
3072*7c478bd9Sstevel@tonic-gate 	 * have precedence over IPv6 addresses.
3073*7c478bd9Sstevel@tonic-gate 	 */
3074*7c478bd9Sstevel@tonic-gate 	if (get_ibd_ipaddr(&ibds) != 0) {
3075*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3076*7c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1) {
3077*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_ats: "
3078*7c478bd9Sstevel@tonic-gate 		    "get_ibd_ipaddr failed");
3079*7c478bd9Sstevel@tonic-gate 	    }
3080*7c478bd9Sstevel@tonic-gate #endif
3081*7c478bd9Sstevel@tonic-gate 	    kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc *
3082*7c478bd9Sstevel@tonic-gate 				sizeof (rpcib_ats_t));
3083*7c478bd9Sstevel@tonic-gate 	    ibt_free_portinfo(port_infop, port_size);
3084*7c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
3085*7c478bd9Sstevel@tonic-gate 	}
3086*7c478bd9Sstevel@tonic-gate 
3087*7c478bd9Sstevel@tonic-gate 	/*
3088*7c478bd9Sstevel@tonic-gate 	 * Start ATS registration for active ports on this HCA.
3089*7c478bd9Sstevel@tonic-gate 	 */
3090*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
3091*7c478bd9Sstevel@tonic-gate 	nbinds = 0;
3092*7c478bd9Sstevel@tonic-gate 	new_service = NULL;
3093*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
3094*7c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
3095*7c478bd9Sstevel@tonic-gate 			continue;
3096*7c478bd9Sstevel@tonic-gate 
3097*7c478bd9Sstevel@tonic-gate 	    for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
3098*7c478bd9Sstevel@tonic-gate 		pkey = port_infop[i].p_pkey_tbl[pki];
3099*7c478bd9Sstevel@tonic-gate 		if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) {
3100*7c478bd9Sstevel@tonic-gate 		    ar.ar_gid = port_infop[i].p_sgid_tbl[0];
3101*7c478bd9Sstevel@tonic-gate 		    ar.ar_pkey = pkey;
3102*7c478bd9Sstevel@tonic-gate 		    atsp = get_ibd_entry(&ar.ar_gid, pkey, &ibds);
3103*7c478bd9Sstevel@tonic-gate 		    if (atsp == NULL)
3104*7c478bd9Sstevel@tonic-gate 			continue;
3105*7c478bd9Sstevel@tonic-gate 		/*
3106*7c478bd9Sstevel@tonic-gate 		 * store the sin[6]_addr in ar_data
3107*7c478bd9Sstevel@tonic-gate 		 */
3108*7c478bd9Sstevel@tonic-gate 		    (void) bzero(ar.ar_data, ATS_AR_DATA_LEN);
3109*7c478bd9Sstevel@tonic-gate 		    if (atsp->ras_inet_type == AF_INET) {
3110*7c478bd9Sstevel@tonic-gate 			uint8_t *start_pos;
3111*7c478bd9Sstevel@tonic-gate 
3112*7c478bd9Sstevel@tonic-gate 			/*
3113*7c478bd9Sstevel@tonic-gate 			 * The ipv4 addr goes into the last
3114*7c478bd9Sstevel@tonic-gate 			 * four bytes of ar_data.
3115*7c478bd9Sstevel@tonic-gate 			 */
3116*7c478bd9Sstevel@tonic-gate 			start_pos = ar.ar_data + ATS_AR_DATA_LEN -
3117*7c478bd9Sstevel@tonic-gate 				sizeof (struct in_addr);
3118*7c478bd9Sstevel@tonic-gate 			bcopy(&atsp->ras_sin.sin_addr, start_pos,
3119*7c478bd9Sstevel@tonic-gate 				sizeof (struct in_addr));
3120*7c478bd9Sstevel@tonic-gate 		    } else if (atsp->ras_inet_type == AF_INET6) {
3121*7c478bd9Sstevel@tonic-gate 			bcopy(&atsp->ras_sin6.sin6_addr, ar.ar_data,
3122*7c478bd9Sstevel@tonic-gate 				sizeof (struct in6_addr));
3123*7c478bd9Sstevel@tonic-gate 		    } else
3124*7c478bd9Sstevel@tonic-gate 			continue;
3125*7c478bd9Sstevel@tonic-gate 
3126*7c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_register_ar(hca->ibt_clnt_hdl, &ar);
3127*7c478bd9Sstevel@tonic-gate 		    if (ibt_status == IBT_SUCCESS) {
3128*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3129*7c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
3130*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_ats: "
3131*7c478bd9Sstevel@tonic-gate 				    "ibt_register_ar OK on port %d", i+1);
3132*7c478bd9Sstevel@tonic-gate 			}
3133*7c478bd9Sstevel@tonic-gate #endif
3134*7c478bd9Sstevel@tonic-gate 			/*
3135*7c478bd9Sstevel@tonic-gate 			 * Allocate and prepare a service entry
3136*7c478bd9Sstevel@tonic-gate 			 */
3137*7c478bd9Sstevel@tonic-gate 			new_service = kmem_zalloc(sizeof (rib_service_t),
3138*7c478bd9Sstevel@tonic-gate 				KM_SLEEP);
3139*7c478bd9Sstevel@tonic-gate 			new_service->srv_port = i + 1;
3140*7c478bd9Sstevel@tonic-gate 			new_service->srv_ar = ar;
3141*7c478bd9Sstevel@tonic-gate 			new_service->srv_next = NULL;
3142*7c478bd9Sstevel@tonic-gate 
3143*7c478bd9Sstevel@tonic-gate 			/*
3144*7c478bd9Sstevel@tonic-gate 			 * Add to the service list for this HCA
3145*7c478bd9Sstevel@tonic-gate 			 */
3146*7c478bd9Sstevel@tonic-gate 			new_service->srv_next = hca->ats_list;
3147*7c478bd9Sstevel@tonic-gate 			hca->ats_list = new_service;
3148*7c478bd9Sstevel@tonic-gate 			new_service = NULL;
3149*7c478bd9Sstevel@tonic-gate 			nbinds ++;
3150*7c478bd9Sstevel@tonic-gate 		    } else {
3151*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3152*7c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
3153*7c478bd9Sstevel@tonic-gate 			    cmn_err(CE_WARN, "rib_register_ats: "
3154*7c478bd9Sstevel@tonic-gate 			    "ibt_register_ar FAILED on port %d", i+1);
3155*7c478bd9Sstevel@tonic-gate 			}
3156*7c478bd9Sstevel@tonic-gate #endif
3157*7c478bd9Sstevel@tonic-gate 		    }
3158*7c478bd9Sstevel@tonic-gate 		}
3159*7c478bd9Sstevel@tonic-gate 	    }
3160*7c478bd9Sstevel@tonic-gate 	}
3161*7c478bd9Sstevel@tonic-gate 
3162*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3163*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3164*7c478bd9Sstevel@tonic-gate 		for (temp_srv = hca->ats_list; temp_srv != NULL;
3165*7c478bd9Sstevel@tonic-gate 			temp_srv = temp_srv->srv_next) {
3166*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Service: ATS, active on"
3167*7c478bd9Sstevel@tonic-gate 					" port: %d\n", temp_srv->srv_port);
3168*7c478bd9Sstevel@tonic-gate 		}
3169*7c478bd9Sstevel@tonic-gate 	}
3170*7c478bd9Sstevel@tonic-gate #endif
3171*7c478bd9Sstevel@tonic-gate 
3172*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
3173*7c478bd9Sstevel@tonic-gate 	kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc * sizeof (rpcib_ats_t));
3174*7c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
3175*7c478bd9Sstevel@tonic-gate 
3176*7c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
3177*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3178*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3179*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_ats FAILED!\n");
3180*7c478bd9Sstevel@tonic-gate 	}
3181*7c478bd9Sstevel@tonic-gate #endif
3182*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3183*7c478bd9Sstevel@tonic-gate 	}
3184*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3185*7c478bd9Sstevel@tonic-gate }
3186*7c478bd9Sstevel@tonic-gate 
3187*7c478bd9Sstevel@tonic-gate static rdma_stat
3188*7c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type)
3189*7c478bd9Sstevel@tonic-gate {
3190*7c478bd9Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
3191*7c478bd9Sstevel@tonic-gate 	ibt_srv_bind_t		sbind;
3192*7c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
3193*7c478bd9Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
3194*7c478bd9Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
3195*7c478bd9Sstevel@tonic-gate 	uint_t			port_size;
3196*7c478bd9Sstevel@tonic-gate 	uint_t			pki, i, j, num_ports, nbinds;
3197*7c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
3198*7c478bd9Sstevel@tonic-gate 	char			**addrs;
3199*7c478bd9Sstevel@tonic-gate 	int			addr_count;
3200*7c478bd9Sstevel@tonic-gate 	rib_service_t		*new_service, *temp_srv;
3201*7c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
3202*7c478bd9Sstevel@tonic-gate 
3203*7c478bd9Sstevel@tonic-gate 	/*
3204*7c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
3205*7c478bd9Sstevel@tonic-gate 	 */
3206*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3207*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
3208*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
3209*7c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
3210*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3211*7c478bd9Sstevel@tonic-gate 	} else {
3212*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3213*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3214*7c478bd9Sstevel@tonic-gate 	}
3215*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
3216*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3217*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_service: FAILED in "
3218*7c478bd9Sstevel@tonic-gate 		    "ibt_query_hca_ports, status = %d\n", ibt_status);
3219*7c478bd9Sstevel@tonic-gate #endif
3220*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3221*7c478bd9Sstevel@tonic-gate 	}
3222*7c478bd9Sstevel@tonic-gate 
3223*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3224*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3225*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_service: Ports detected "
3226*7c478bd9Sstevel@tonic-gate 		    "%d\n", num_ports);
3227*7c478bd9Sstevel@tonic-gate 
3228*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < num_ports; i++) {
3229*7c478bd9Sstevel@tonic-gate 			if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
3230*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_service "
3231*7c478bd9Sstevel@tonic-gate 				    "Port #: %d INACTIVE\n", i+1);
3232*7c478bd9Sstevel@tonic-gate 			} else if (port_infop[i].p_linkstate ==
3233*7c478bd9Sstevel@tonic-gate 			    IBT_PORT_ACTIVE) {
3234*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_service "
3235*7c478bd9Sstevel@tonic-gate 				    "Port #: %d ACTIVE\n", i+1);
3236*7c478bd9Sstevel@tonic-gate 			}
3237*7c478bd9Sstevel@tonic-gate 		}
3238*7c478bd9Sstevel@tonic-gate 	}
3239*7c478bd9Sstevel@tonic-gate #endif
3240*7c478bd9Sstevel@tonic-gate 	/*
3241*7c478bd9Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
3242*7c478bd9Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
3243*7c478bd9Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
3244*7c478bd9Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
3245*7c478bd9Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
3246*7c478bd9Sstevel@tonic-gate 	 */
3247*7c478bd9Sstevel@tonic-gate 	addrs = get_ip_addrs(&addr_count);
3248*7c478bd9Sstevel@tonic-gate 	if (addrs == NULL) {
3249*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3250*7c478bd9Sstevel@tonic-gate 		if (rib_debug) {
3251*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_register_service: "
3252*7c478bd9Sstevel@tonic-gate 			"get_ip_addrs failed\n");
3253*7c478bd9Sstevel@tonic-gate 		}
3254*7c478bd9Sstevel@tonic-gate #endif
3255*7c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(port_infop, port_size);
3256*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3257*7c478bd9Sstevel@tonic-gate 	}
3258*7c478bd9Sstevel@tonic-gate 
3259*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3260*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3261*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < addr_count; i++)
3262*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "addr %d: %s\n", i, addrs[i]);
3263*7c478bd9Sstevel@tonic-gate 	}
3264*7c478bd9Sstevel@tonic-gate #endif
3265*7c478bd9Sstevel@tonic-gate 
3266*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
3267*7c478bd9Sstevel@tonic-gate 	/*
3268*7c478bd9Sstevel@tonic-gate 	 * Start registering and binding service to active
3269*7c478bd9Sstevel@tonic-gate 	 * on active ports on this HCA.
3270*7c478bd9Sstevel@tonic-gate 	 */
3271*7c478bd9Sstevel@tonic-gate 	nbinds = 0;
3272*7c478bd9Sstevel@tonic-gate 	new_service = NULL;
3273*7c478bd9Sstevel@tonic-gate 
3274*7c478bd9Sstevel@tonic-gate 	/*
3275*7c478bd9Sstevel@tonic-gate 	 * We use IP addresses as the service names for
3276*7c478bd9Sstevel@tonic-gate 	 * service registration.  Register each of them
3277*7c478bd9Sstevel@tonic-gate 	 * with CM to obtain a svc_id and svc_hdl.  We do not
3278*7c478bd9Sstevel@tonic-gate 	 * register the service with machine's loopback address.
3279*7c478bd9Sstevel@tonic-gate 	 */
3280*7c478bd9Sstevel@tonic-gate 	for (j = 1; j < addr_count; j++) {
3281*7c478bd9Sstevel@tonic-gate 	    (void) bzero(&srv_id, sizeof (ib_svc_id_t));
3282*7c478bd9Sstevel@tonic-gate 	    (void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
3283*7c478bd9Sstevel@tonic-gate 	    (void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
3284*7c478bd9Sstevel@tonic-gate 
3285*7c478bd9Sstevel@tonic-gate 	    sdesc.sd_handler = rib_srv_cm_handler;
3286*7c478bd9Sstevel@tonic-gate 	    sdesc.sd_flags = 0;
3287*7c478bd9Sstevel@tonic-gate 
3288*7c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
3289*7c478bd9Sstevel@tonic-gate 			    &sdesc, 0, 1, &srv_hdl, &srv_id);
3290*7c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS) {
3291*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3292*7c478bd9Sstevel@tonic-gate 		if (rib_debug) {
3293*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_register_service: "
3294*7c478bd9Sstevel@tonic-gate 			"ibt_register_service FAILED, status "
3295*7c478bd9Sstevel@tonic-gate 			"= %d\n", ibt_status);
3296*7c478bd9Sstevel@tonic-gate 		}
3297*7c478bd9Sstevel@tonic-gate #endif
3298*7c478bd9Sstevel@tonic-gate 		/*
3299*7c478bd9Sstevel@tonic-gate 		 * No need to go on, since we failed to obtain
3300*7c478bd9Sstevel@tonic-gate 		 * a srv_id and srv_hdl. Move on to the next
3301*7c478bd9Sstevel@tonic-gate 		 * IP addr as a service name.
3302*7c478bd9Sstevel@tonic-gate 		 */
3303*7c478bd9Sstevel@tonic-gate 		continue;
3304*7c478bd9Sstevel@tonic-gate 	    }
3305*7c478bd9Sstevel@tonic-gate 	    for (i = 0; i < num_ports; i++) {
3306*7c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
3307*7c478bd9Sstevel@tonic-gate 			continue;
3308*7c478bd9Sstevel@tonic-gate 
3309*7c478bd9Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
3310*7c478bd9Sstevel@tonic-gate 		    pkey = port_infop[i].p_pkey_tbl[pki];
3311*7c478bd9Sstevel@tonic-gate 		    if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) {
3312*7c478bd9Sstevel@tonic-gate 
3313*7c478bd9Sstevel@tonic-gate 			/*
3314*7c478bd9Sstevel@tonic-gate 			 * Allocate and prepare a service entry
3315*7c478bd9Sstevel@tonic-gate 			 */
3316*7c478bd9Sstevel@tonic-gate 			new_service = kmem_zalloc(1 * sizeof (rib_service_t),
3317*7c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
3318*7c478bd9Sstevel@tonic-gate 			new_service->srv_type = service_type;
3319*7c478bd9Sstevel@tonic-gate 			new_service->srv_port = i + 1;
3320*7c478bd9Sstevel@tonic-gate 			new_service->srv_id = srv_id;
3321*7c478bd9Sstevel@tonic-gate 			new_service->srv_hdl = srv_hdl;
3322*7c478bd9Sstevel@tonic-gate 			new_service->srv_sbind_hdl = kmem_zalloc(1 *
3323*7c478bd9Sstevel@tonic-gate 			    sizeof (ibt_sbind_hdl_t), KM_SLEEP);
3324*7c478bd9Sstevel@tonic-gate 
3325*7c478bd9Sstevel@tonic-gate 			new_service->srv_name = kmem_zalloc(IB_SVC_NAME_LEN,
3326*7c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
3327*7c478bd9Sstevel@tonic-gate 			(void) bcopy(addrs[j], new_service->srv_name,
3328*7c478bd9Sstevel@tonic-gate 			    IB_SVC_NAME_LEN);
3329*7c478bd9Sstevel@tonic-gate 			(void) strlcat(new_service->srv_name, "::NFS",
3330*7c478bd9Sstevel@tonic-gate 				IB_SVC_NAME_LEN);
3331*7c478bd9Sstevel@tonic-gate 			new_service->srv_next = NULL;
3332*7c478bd9Sstevel@tonic-gate 
3333*7c478bd9Sstevel@tonic-gate 			/*
3334*7c478bd9Sstevel@tonic-gate 			 * Bind the service, specified by the IP address,
3335*7c478bd9Sstevel@tonic-gate 			 * to the port/pkey using the srv_hdl returned
3336*7c478bd9Sstevel@tonic-gate 			 * from ibt_register_service().
3337*7c478bd9Sstevel@tonic-gate 			 */
3338*7c478bd9Sstevel@tonic-gate 			(void) bzero(&sbind, sizeof (ibt_srv_bind_t));
3339*7c478bd9Sstevel@tonic-gate 			sbind.sb_pkey = pkey;
3340*7c478bd9Sstevel@tonic-gate 			sbind.sb_lease = 0xFFFFFFFF;
3341*7c478bd9Sstevel@tonic-gate 			sbind.sb_key[0] = NFS_SEC_KEY0;
3342*7c478bd9Sstevel@tonic-gate 			sbind.sb_key[1] = NFS_SEC_KEY1;
3343*7c478bd9Sstevel@tonic-gate 			sbind.sb_name = new_service->srv_name;
3344*7c478bd9Sstevel@tonic-gate 
3345*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3346*7c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
3347*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_service: "
3348*7c478bd9Sstevel@tonic-gate 				    "binding service using name: %s\n",
3349*7c478bd9Sstevel@tonic-gate 				    sbind.sb_name);
3350*7c478bd9Sstevel@tonic-gate 			}
3351*7c478bd9Sstevel@tonic-gate #endif
3352*7c478bd9Sstevel@tonic-gate 			ibt_status = ibt_bind_service(srv_hdl,
3353*7c478bd9Sstevel@tonic-gate 			    port_infop[i].p_sgid_tbl[0], &sbind, rib_stat,
3354*7c478bd9Sstevel@tonic-gate 			    new_service->srv_sbind_hdl);
3355*7c478bd9Sstevel@tonic-gate 			if (ibt_status != IBT_SUCCESS) {
3356*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3357*7c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
3358*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_service: FAILED"
3359*7c478bd9Sstevel@tonic-gate 				    " in ibt_bind_service, status = %d\n",
3360*7c478bd9Sstevel@tonic-gate 				    ibt_status);
3361*7c478bd9Sstevel@tonic-gate 			    }
3362*7c478bd9Sstevel@tonic-gate #endif
3363*7c478bd9Sstevel@tonic-gate 				kmem_free(new_service->srv_sbind_hdl,
3364*7c478bd9Sstevel@tonic-gate 				    sizeof (ibt_sbind_hdl_t));
3365*7c478bd9Sstevel@tonic-gate 				kmem_free(new_service->srv_name,
3366*7c478bd9Sstevel@tonic-gate 				    IB_SVC_NAME_LEN);
3367*7c478bd9Sstevel@tonic-gate 				kmem_free(new_service,
3368*7c478bd9Sstevel@tonic-gate 				    sizeof (rib_service_t));
3369*7c478bd9Sstevel@tonic-gate 				new_service = NULL;
3370*7c478bd9Sstevel@tonic-gate 				continue;
3371*7c478bd9Sstevel@tonic-gate 			}
3372*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3373*7c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
3374*7c478bd9Sstevel@tonic-gate 				if (ibt_status == IBT_SUCCESS)
3375*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "rib_regstr_service: "
3376*7c478bd9Sstevel@tonic-gate 					    "Serv: %s REGISTERED on port: %d",
3377*7c478bd9Sstevel@tonic-gate 					    sbind.sb_name, i+1);
3378*7c478bd9Sstevel@tonic-gate 			}
3379*7c478bd9Sstevel@tonic-gate #endif
3380*7c478bd9Sstevel@tonic-gate 			/*
3381*7c478bd9Sstevel@tonic-gate 			 * Add to the service list for this HCA
3382*7c478bd9Sstevel@tonic-gate 			 */
3383*7c478bd9Sstevel@tonic-gate 			new_service->srv_next = hca->service_list;
3384*7c478bd9Sstevel@tonic-gate 			hca->service_list = new_service;
3385*7c478bd9Sstevel@tonic-gate 			new_service = NULL;
3386*7c478bd9Sstevel@tonic-gate 			nbinds ++;
3387*7c478bd9Sstevel@tonic-gate 		    }
3388*7c478bd9Sstevel@tonic-gate 		}
3389*7c478bd9Sstevel@tonic-gate 	    }
3390*7c478bd9Sstevel@tonic-gate 	}
3391*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
3392*7c478bd9Sstevel@tonic-gate 
3393*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3394*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
3395*7c478bd9Sstevel@tonic-gate 		/*
3396*7c478bd9Sstevel@tonic-gate 		 * Change this print to a more generic one, as rpcib
3397*7c478bd9Sstevel@tonic-gate 		 * is supposed to handle multiple service types.
3398*7c478bd9Sstevel@tonic-gate 		 */
3399*7c478bd9Sstevel@tonic-gate 		for (temp_srv = hca->service_list; temp_srv != NULL;
3400*7c478bd9Sstevel@tonic-gate 			temp_srv = temp_srv->srv_next) {
3401*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "NFS-IB, active on port:"
3402*7c478bd9Sstevel@tonic-gate 					" %d\n"
3403*7c478bd9Sstevel@tonic-gate 					"Using name: %s", temp_srv->srv_port,
3404*7c478bd9Sstevel@tonic-gate 					temp_srv->srv_name);
3405*7c478bd9Sstevel@tonic-gate 		}
3406*7c478bd9Sstevel@tonic-gate 	}
3407*7c478bd9Sstevel@tonic-gate #endif
3408*7c478bd9Sstevel@tonic-gate 
3409*7c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
3410*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < addr_count; i++) {
3411*7c478bd9Sstevel@tonic-gate 		if (addrs[i])
3412*7c478bd9Sstevel@tonic-gate 			kmem_free(addrs[i], IB_SVC_NAME_LEN);
3413*7c478bd9Sstevel@tonic-gate 	}
3414*7c478bd9Sstevel@tonic-gate 	kmem_free(addrs, addr_count * sizeof (char *));
3415*7c478bd9Sstevel@tonic-gate 
3416*7c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
3417*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3418*7c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
3419*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_service: "
3420*7c478bd9Sstevel@tonic-gate 		    "bind_service FAILED!\n");
3421*7c478bd9Sstevel@tonic-gate 	    }
3422*7c478bd9Sstevel@tonic-gate #endif
3423*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3424*7c478bd9Sstevel@tonic-gate 	} else {
3425*7c478bd9Sstevel@tonic-gate 		/*
3426*7c478bd9Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
3427*7c478bd9Sstevel@tonic-gate 		 * one registration was successful.
3428*7c478bd9Sstevel@tonic-gate 		 */
3429*7c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
3430*7c478bd9Sstevel@tonic-gate 		plugin_state = ACCEPT;
3431*7c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
3432*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
3433*7c478bd9Sstevel@tonic-gate 	}
3434*7c478bd9Sstevel@tonic-gate }
3435*7c478bd9Sstevel@tonic-gate 
3436*7c478bd9Sstevel@tonic-gate void
3437*7c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
3438*7c478bd9Sstevel@tonic-gate {
3439*7c478bd9Sstevel@tonic-gate 	rdma_stat status = RDMA_SUCCESS;
3440*7c478bd9Sstevel@tonic-gate 
3441*7c478bd9Sstevel@tonic-gate 	rd->active = 0;
3442*7c478bd9Sstevel@tonic-gate 	rd->err_code = RDMA_FAILED;
3443*7c478bd9Sstevel@tonic-gate 
3444*7c478bd9Sstevel@tonic-gate 	/*
3445*7c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
3446*7c478bd9Sstevel@tonic-gate 	 */
3447*7c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
3448*7c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
3449*7c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
3450*7c478bd9Sstevel@tonic-gate 		return;
3451*7c478bd9Sstevel@tonic-gate 	}
3452*7c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
3453*7c478bd9Sstevel@tonic-gate 
3454*7c478bd9Sstevel@tonic-gate 	rib_stat->q = &rd->q;
3455*7c478bd9Sstevel@tonic-gate 	/*
3456*7c478bd9Sstevel@tonic-gate 	 * Register the Address translation service
3457*7c478bd9Sstevel@tonic-gate 	 */
3458*7c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
3459*7c478bd9Sstevel@tonic-gate 	if (ats_running == 0) {
3460*7c478bd9Sstevel@tonic-gate 		if (rib_register_ats(rib_stat->hca) != RDMA_SUCCESS) {
3461*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3462*7c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
3463*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
3464*7c478bd9Sstevel@tonic-gate 			    "rib_listen(): ats registration failed!");
3465*7c478bd9Sstevel@tonic-gate 		    }
3466*7c478bd9Sstevel@tonic-gate #endif
3467*7c478bd9Sstevel@tonic-gate 		    mutex_exit(&rib_stat->open_hca_lock);
3468*7c478bd9Sstevel@tonic-gate 		    return;
3469*7c478bd9Sstevel@tonic-gate 		} else {
3470*7c478bd9Sstevel@tonic-gate 			ats_running = 1;
3471*7c478bd9Sstevel@tonic-gate 		}
3472*7c478bd9Sstevel@tonic-gate 	}
3473*7c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
3474*7c478bd9Sstevel@tonic-gate 
3475*7c478bd9Sstevel@tonic-gate 	/*
3476*7c478bd9Sstevel@tonic-gate 	 * Right now the only service type is NFS. Hence force feed this
3477*7c478bd9Sstevel@tonic-gate 	 * value. Ideally to communicate the service type it should be
3478*7c478bd9Sstevel@tonic-gate 	 * passed down in rdma_svc_data.
3479*7c478bd9Sstevel@tonic-gate 	 */
3480*7c478bd9Sstevel@tonic-gate 	rib_stat->service_type = NFS;
3481*7c478bd9Sstevel@tonic-gate 	status = rib_register_service(rib_stat->hca, NFS);
3482*7c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
3483*7c478bd9Sstevel@tonic-gate 		rd->err_code = status;
3484*7c478bd9Sstevel@tonic-gate 		return;
3485*7c478bd9Sstevel@tonic-gate 	}
3486*7c478bd9Sstevel@tonic-gate 	/*
3487*7c478bd9Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
3488*7c478bd9Sstevel@tonic-gate 	 * explainable errors.
3489*7c478bd9Sstevel@tonic-gate 	 */
3490*7c478bd9Sstevel@tonic-gate 	rd->active = 1;
3491*7c478bd9Sstevel@tonic-gate 	rd->err_code = status;
3492*7c478bd9Sstevel@tonic-gate }
3493*7c478bd9Sstevel@tonic-gate 
3494*7c478bd9Sstevel@tonic-gate /* XXXX */
3495*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
3496*7c478bd9Sstevel@tonic-gate static void
3497*7c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
3498*7c478bd9Sstevel@tonic-gate {
3499*7c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
3500*7c478bd9Sstevel@tonic-gate 
3501*7c478bd9Sstevel@tonic-gate 	/*
3502*7c478bd9Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
3503*7c478bd9Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
3504*7c478bd9Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
3505*7c478bd9Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
3506*7c478bd9Sstevel@tonic-gate 	 */
3507*7c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
3508*7c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
3509*7c478bd9Sstevel@tonic-gate 	if (svcdata != NULL)
3510*7c478bd9Sstevel@tonic-gate 		svcdata->active = 0;
3511*7c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
3512*7c478bd9Sstevel@tonic-gate 
3513*7c478bd9Sstevel@tonic-gate 	/*
3514*7c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
3515*7c478bd9Sstevel@tonic-gate 	 */
3516*7c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
3517*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3518*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
3519*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3520*7c478bd9Sstevel@tonic-gate 		return;
3521*7c478bd9Sstevel@tonic-gate 	}
3522*7c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
3523*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
3524*7c478bd9Sstevel@tonic-gate }
3525*7c478bd9Sstevel@tonic-gate 
3526*7c478bd9Sstevel@tonic-gate /*
3527*7c478bd9Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
3528*7c478bd9Sstevel@tonic-gate  * Instead of unbinding the service for a service handle by
3529*7c478bd9Sstevel@tonic-gate  * calling ibt_unbind_service() for each port/pkey, we unbind
3530*7c478bd9Sstevel@tonic-gate  * all the services for the service handle by making only one
3531*7c478bd9Sstevel@tonic-gate  * call to ibt_unbind_all_services().  Then, we deregister the
3532*7c478bd9Sstevel@tonic-gate  * service for the service handle.
3533*7c478bd9Sstevel@tonic-gate  *
3534*7c478bd9Sstevel@tonic-gate  * When traversing the entries in service_list, we compare the
3535*7c478bd9Sstevel@tonic-gate  * srv_hdl of the current entry with that of the next.  If they
3536*7c478bd9Sstevel@tonic-gate  * are different or if the next entry is NULL, the current entry
3537*7c478bd9Sstevel@tonic-gate  * marks the last binding of the service handle.  In this case,
3538*7c478bd9Sstevel@tonic-gate  * call ibt_unbind_all_services() and deregister the service for
3539*7c478bd9Sstevel@tonic-gate  * the service handle.  If they are the same, the current and the
3540*7c478bd9Sstevel@tonic-gate  * next entries are bound to the same service handle.  In this
3541*7c478bd9Sstevel@tonic-gate  * case, move on to the next entry.
3542*7c478bd9Sstevel@tonic-gate  */
3543*7c478bd9Sstevel@tonic-gate static void
3544*7c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
3545*7c478bd9Sstevel@tonic-gate {
3546*7c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
3547*7c478bd9Sstevel@tonic-gate 	ibt_status_t   		ibt_status;
3548*7c478bd9Sstevel@tonic-gate 
3549*7c478bd9Sstevel@tonic-gate 	/*
3550*7c478bd9Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
3551*7c478bd9Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
3552*7c478bd9Sstevel@tonic-gate 	 * be passed down to this function.
3553*7c478bd9Sstevel@tonic-gate 	 */
3554*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
3555*7c478bd9Sstevel@tonic-gate 	srv_list = hca->service_list;
3556*7c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
3557*7c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
3558*7c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
3559*7c478bd9Sstevel@tonic-gate 		if (srv_list == NULL || bcmp(to_remove->srv_hdl,
3560*7c478bd9Sstevel@tonic-gate 		    srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) {
3561*7c478bd9Sstevel@tonic-gate 
3562*7c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_unbind_all_services(to_remove->srv_hdl);
3563*7c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
3564*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_listen_stop: "
3565*7c478bd9Sstevel@tonic-gate 			    "ibt_unbind_all_services FAILED"
3566*7c478bd9Sstevel@tonic-gate 				" status: %d\n", ibt_status);
3567*7c478bd9Sstevel@tonic-gate 		    }
3568*7c478bd9Sstevel@tonic-gate 
3569*7c478bd9Sstevel@tonic-gate 		    ibt_status =
3570*7c478bd9Sstevel@tonic-gate 			ibt_deregister_service(hca->ibt_clnt_hdl,
3571*7c478bd9Sstevel@tonic-gate 				to_remove->srv_hdl);
3572*7c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
3573*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_listen_stop: "
3574*7c478bd9Sstevel@tonic-gate 			    "ibt_deregister_service FAILED"
3575*7c478bd9Sstevel@tonic-gate 				" status: %d\n", ibt_status);
3576*7c478bd9Sstevel@tonic-gate 		    }
3577*7c478bd9Sstevel@tonic-gate 
3578*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
3579*7c478bd9Sstevel@tonic-gate 		    if (rib_debug > 1) {
3580*7c478bd9Sstevel@tonic-gate 			if (ibt_status == IBT_SUCCESS)
3581*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_listen_stop: "
3582*7c478bd9Sstevel@tonic-gate 				    "Successfully stopped and"
3583*7c478bd9Sstevel@tonic-gate 				    " UNREGISTERED service: %s\n",
3584*7c478bd9Sstevel@tonic-gate 				    to_remove->srv_name);
3585*7c478bd9Sstevel@tonic-gate 		    }
3586*7c478bd9Sstevel@tonic-gate #endif
3587*7c478bd9Sstevel@tonic-gate 		}
3588*7c478bd9Sstevel@tonic-gate 		kmem_free(to_remove->srv_name, IB_SVC_NAME_LEN);
3589*7c478bd9Sstevel@tonic-gate 		kmem_free(to_remove->srv_sbind_hdl,
3590*7c478bd9Sstevel@tonic-gate 			sizeof (ibt_sbind_hdl_t));
3591*7c478bd9Sstevel@tonic-gate 
3592*7c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
3593*7c478bd9Sstevel@tonic-gate 	}
3594*7c478bd9Sstevel@tonic-gate 	hca->service_list = NULL;
3595*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
3596*7c478bd9Sstevel@tonic-gate }
3597*7c478bd9Sstevel@tonic-gate 
3598*7c478bd9Sstevel@tonic-gate static struct svc_recv *
3599*7c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
3600*7c478bd9Sstevel@tonic-gate {
3601*7c478bd9Sstevel@tonic-gate 	struct svc_recv	*recvp;
3602*7c478bd9Sstevel@tonic-gate 
3603*7c478bd9Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
3604*7c478bd9Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
3605*7c478bd9Sstevel@tonic-gate 	recvp->qp = qp;
3606*7c478bd9Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
3607*7c478bd9Sstevel@tonic-gate 	return (recvp);
3608*7c478bd9Sstevel@tonic-gate }
3609*7c478bd9Sstevel@tonic-gate 
3610*7c478bd9Sstevel@tonic-gate static int
3611*7c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
3612*7c478bd9Sstevel@tonic-gate {
3613*7c478bd9Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
3614*7c478bd9Sstevel@tonic-gate 
3615*7c478bd9Sstevel@tonic-gate 	return (0);
3616*7c478bd9Sstevel@tonic-gate }
3617*7c478bd9Sstevel@tonic-gate 
3618*7c478bd9Sstevel@tonic-gate static struct reply *
3619*7c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
3620*7c478bd9Sstevel@tonic-gate {
3621*7c478bd9Sstevel@tonic-gate 	struct reply	*rep;
3622*7c478bd9Sstevel@tonic-gate 
3623*7c478bd9Sstevel@tonic-gate 
3624*7c478bd9Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
3625*7c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
3626*7c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->replylist_lock);
3627*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_addreplylist: no memory\n");
3628*7c478bd9Sstevel@tonic-gate 		return (NULL);
3629*7c478bd9Sstevel@tonic-gate 	}
3630*7c478bd9Sstevel@tonic-gate 	rep->xid = msgid;
3631*7c478bd9Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
3632*7c478bd9Sstevel@tonic-gate 	rep->bytes_xfer = 0;
3633*7c478bd9Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
3634*7c478bd9Sstevel@tonic-gate 	rep->prev = NULL;
3635*7c478bd9Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
3636*7c478bd9Sstevel@tonic-gate 
3637*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
3638*7c478bd9Sstevel@tonic-gate 	if (qp->replylist) {
3639*7c478bd9Sstevel@tonic-gate 		rep->next = qp->replylist;
3640*7c478bd9Sstevel@tonic-gate 		qp->replylist->prev = rep;
3641*7c478bd9Sstevel@tonic-gate 	}
3642*7c478bd9Sstevel@tonic-gate 	qp->rep_list_size++;
3643*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
3644*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "rib_addreplylist: qp:%p, rep_list_size:%d\n",
3645*7c478bd9Sstevel@tonic-gate 		(void *)qp, qp->rep_list_size);
3646*7c478bd9Sstevel@tonic-gate 	qp->replylist = rep;
3647*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
3648*7c478bd9Sstevel@tonic-gate 
3649*7c478bd9Sstevel@tonic-gate 	return (rep);
3650*7c478bd9Sstevel@tonic-gate }
3651*7c478bd9Sstevel@tonic-gate 
3652*7c478bd9Sstevel@tonic-gate static rdma_stat
3653*7c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
3654*7c478bd9Sstevel@tonic-gate {
3655*7c478bd9Sstevel@tonic-gate 	struct reply	*r, *n;
3656*7c478bd9Sstevel@tonic-gate 
3657*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
3658*7c478bd9Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
3659*7c478bd9Sstevel@tonic-gate 		n = r->next;
3660*7c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
3661*7c478bd9Sstevel@tonic-gate 	}
3662*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
3663*7c478bd9Sstevel@tonic-gate 
3664*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3665*7c478bd9Sstevel@tonic-gate }
3666*7c478bd9Sstevel@tonic-gate 
3667*7c478bd9Sstevel@tonic-gate static int
3668*7c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
3669*7c478bd9Sstevel@tonic-gate {
3670*7c478bd9Sstevel@tonic-gate 
3671*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
3672*7c478bd9Sstevel@tonic-gate 	if (rep->prev) {
3673*7c478bd9Sstevel@tonic-gate 		rep->prev->next = rep->next;
3674*7c478bd9Sstevel@tonic-gate 	}
3675*7c478bd9Sstevel@tonic-gate 	if (rep->next) {
3676*7c478bd9Sstevel@tonic-gate 		rep->next->prev = rep->prev;
3677*7c478bd9Sstevel@tonic-gate 	}
3678*7c478bd9Sstevel@tonic-gate 	if (qp->replylist == rep)
3679*7c478bd9Sstevel@tonic-gate 		qp->replylist = rep->next;
3680*7c478bd9Sstevel@tonic-gate 
3681*7c478bd9Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
3682*7c478bd9Sstevel@tonic-gate 	qp->rep_list_size--;
3683*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
3684*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "rib_remreply: qp:%p, rep_list_size:%d\n",
3685*7c478bd9Sstevel@tonic-gate 		(void *)qp, qp->rep_list_size);
3686*7c478bd9Sstevel@tonic-gate 
3687*7c478bd9Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
3688*7c478bd9Sstevel@tonic-gate 
3689*7c478bd9Sstevel@tonic-gate 	return (0);
3690*7c478bd9Sstevel@tonic-gate }
3691*7c478bd9Sstevel@tonic-gate 
3692*7c478bd9Sstevel@tonic-gate rdma_stat
3693*7c478bd9Sstevel@tonic-gate rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
3694*7c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle)
3695*7c478bd9Sstevel@tonic-gate {
3696*7c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
3697*7c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
3698*7c478bd9Sstevel@tonic-gate 	rdma_stat	status;
3699*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
3700*7c478bd9Sstevel@tonic-gate 
3701*7c478bd9Sstevel@tonic-gate 	/*
3702*7c478bd9Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
3703*7c478bd9Sstevel@tonic-gate 	 */
3704*7c478bd9Sstevel@tonic-gate 	status = rib_reg_mem(hca, buf, buflen, 0, &mr_hdl, &mr_desc);
3705*7c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
3706*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = (uint64_t)mr_hdl;
3707*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
3708*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
3709*7c478bd9Sstevel@tonic-gate 	} else {
3710*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
3711*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
3712*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
3713*7c478bd9Sstevel@tonic-gate 	}
3714*7c478bd9Sstevel@tonic-gate 	return (status);
3715*7c478bd9Sstevel@tonic-gate }
3716*7c478bd9Sstevel@tonic-gate 
3717*7c478bd9Sstevel@tonic-gate static rdma_stat
3718*7c478bd9Sstevel@tonic-gate rib_reg_mem(rib_hca_t *hca, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
3719*7c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
3720*7c478bd9Sstevel@tonic-gate {
3721*7c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
3722*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
3723*7c478bd9Sstevel@tonic-gate 
3724*7c478bd9Sstevel@tonic-gate 	mem_attr.mr_vaddr = (uint64_t)buf;
3725*7c478bd9Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
3726*7c478bd9Sstevel@tonic-gate 	mem_attr.mr_as = NULL;
3727*7c478bd9Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
3728*7c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
3729*7c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
3730*7c478bd9Sstevel@tonic-gate 
3731*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3732*7c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
3733*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
3734*7c478bd9Sstevel@tonic-gate 					&mem_attr, mr_hdlp, mr_descp);
3735*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3736*7c478bd9Sstevel@tonic-gate 	} else {
3737*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3738*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3739*7c478bd9Sstevel@tonic-gate 	}
3740*7c478bd9Sstevel@tonic-gate 
3741*7c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
3742*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_reg_mem: ibt_register_mr "
3743*7c478bd9Sstevel@tonic-gate 			"(spec:%d) failed for addr %llX, status %d",
3744*7c478bd9Sstevel@tonic-gate 			spec, (longlong_t)mem_attr.mr_vaddr, ibt_status);
3745*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3746*7c478bd9Sstevel@tonic-gate 	}
3747*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3748*7c478bd9Sstevel@tonic-gate }
3749*7c478bd9Sstevel@tonic-gate 
3750*7c478bd9Sstevel@tonic-gate rdma_stat
3751*7c478bd9Sstevel@tonic-gate rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
3752*7c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
3753*7c478bd9Sstevel@tonic-gate {
3754*7c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
3755*7c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
3756*7c478bd9Sstevel@tonic-gate 	rdma_stat	status;
3757*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
3758*7c478bd9Sstevel@tonic-gate 
3759*7c478bd9Sstevel@tonic-gate 	/*
3760*7c478bd9Sstevel@tonic-gate 	 * Non-coherent memory registration.
3761*7c478bd9Sstevel@tonic-gate 	 */
3762*7c478bd9Sstevel@tonic-gate 	status = rib_reg_mem(hca, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
3763*7c478bd9Sstevel@tonic-gate 			&mr_desc);
3764*7c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
3765*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = (uint64_t)mr_hdl;
3766*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
3767*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
3768*7c478bd9Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
3769*7c478bd9Sstevel@tonic-gate 	} else {
3770*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
3771*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
3772*7c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
3773*7c478bd9Sstevel@tonic-gate 	}
3774*7c478bd9Sstevel@tonic-gate 	return (status);
3775*7c478bd9Sstevel@tonic-gate }
3776*7c478bd9Sstevel@tonic-gate 
3777*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
3778*7c478bd9Sstevel@tonic-gate rdma_stat
3779*7c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
3780*7c478bd9Sstevel@tonic-gate {
3781*7c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
3782*7c478bd9Sstevel@tonic-gate 
3783*7c478bd9Sstevel@tonic-gate 	/*
3784*7c478bd9Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
3785*7c478bd9Sstevel@tonic-gate 	 * getting detached. Need all outstanding
3786*7c478bd9Sstevel@tonic-gate 	 * memory registrations to be deregistered
3787*7c478bd9Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
3788*7c478bd9Sstevel@tonic-gate 	 */
3789*7c478bd9Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
3790*7c478bd9Sstevel@tonic-gate 			(ibt_mr_hdl_t)buf_handle.mrc_linfo);
3791*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3792*7c478bd9Sstevel@tonic-gate }
3793*7c478bd9Sstevel@tonic-gate 
3794*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
3795*7c478bd9Sstevel@tonic-gate rdma_stat
3796*7c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
3797*7c478bd9Sstevel@tonic-gate 		RIB_SYNCMEM_HANDLE sync_handle)
3798*7c478bd9Sstevel@tonic-gate {
3799*7c478bd9Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
3800*7c478bd9Sstevel@tonic-gate 
3801*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3802*7c478bd9Sstevel@tonic-gate }
3803*7c478bd9Sstevel@tonic-gate 
3804*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
3805*7c478bd9Sstevel@tonic-gate rdma_stat
3806*7c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
3807*7c478bd9Sstevel@tonic-gate 		int len, int cpu)
3808*7c478bd9Sstevel@tonic-gate {
3809*7c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
3810*7c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
3811*7c478bd9Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
3812*7c478bd9Sstevel@tonic-gate 
3813*7c478bd9Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
3814*7c478bd9Sstevel@tonic-gate 	mr_segment.ms_vaddr = (ib_vaddr_t)buf;
3815*7c478bd9Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
3816*7c478bd9Sstevel@tonic-gate 	if (cpu) {
3817*7c478bd9Sstevel@tonic-gate 		/* make incoming data visible to memory */
3818*7c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
3819*7c478bd9Sstevel@tonic-gate 	} else {
3820*7c478bd9Sstevel@tonic-gate 		/* make memory changes visible to IO */
3821*7c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
3822*7c478bd9Sstevel@tonic-gate 	}
3823*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3824*7c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
3825*7c478bd9Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
3826*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3827*7c478bd9Sstevel@tonic-gate 	} else {
3828*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3829*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3830*7c478bd9Sstevel@tonic-gate 	}
3831*7c478bd9Sstevel@tonic-gate 
3832*7c478bd9Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
3833*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
3834*7c478bd9Sstevel@tonic-gate 	else {
3835*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
3836*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_syncmem: ibt_sync_mr failed with %d\n",
3837*7c478bd9Sstevel@tonic-gate 			status);
3838*7c478bd9Sstevel@tonic-gate #endif
3839*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
3840*7c478bd9Sstevel@tonic-gate 	}
3841*7c478bd9Sstevel@tonic-gate }
3842*7c478bd9Sstevel@tonic-gate 
3843*7c478bd9Sstevel@tonic-gate /*
3844*7c478bd9Sstevel@tonic-gate  * XXXX	????
3845*7c478bd9Sstevel@tonic-gate  */
3846*7c478bd9Sstevel@tonic-gate static rdma_stat
3847*7c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
3848*7c478bd9Sstevel@tonic-gate {
3849*7c478bd9Sstevel@tonic-gate 	/*
3850*7c478bd9Sstevel@tonic-gate 	 * XXXX	Hack!
3851*7c478bd9Sstevel@tonic-gate 	 */
3852*7c478bd9Sstevel@tonic-gate 	info->addrlen = 16;
3853*7c478bd9Sstevel@tonic-gate 	info->mts = 1000000;
3854*7c478bd9Sstevel@tonic-gate 	info->mtu = 1000000;
3855*7c478bd9Sstevel@tonic-gate 
3856*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
3857*7c478bd9Sstevel@tonic-gate }
3858*7c478bd9Sstevel@tonic-gate 
3859*7c478bd9Sstevel@tonic-gate rib_bufpool_t *
3860*7c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
3861*7c478bd9Sstevel@tonic-gate {
3862*7c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
3863*7c478bd9Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
3864*7c478bd9Sstevel@tonic-gate 	caddr_t		buf;
3865*7c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
3866*7c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
3867*7c478bd9Sstevel@tonic-gate 	int		i, j;
3868*7c478bd9Sstevel@tonic-gate 
3869*7c478bd9Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
3870*7c478bd9Sstevel@tonic-gate 
3871*7c478bd9Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
3872*7c478bd9Sstevel@tonic-gate 			num * sizeof (void *), KM_SLEEP);
3873*7c478bd9Sstevel@tonic-gate 
3874*7c478bd9Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
3875*7c478bd9Sstevel@tonic-gate 	bp->numelems = num;
3876*7c478bd9Sstevel@tonic-gate 
3877*7c478bd9Sstevel@tonic-gate 	switch (ptype) {
3878*7c478bd9Sstevel@tonic-gate 	    case SEND_BUFFER:
3879*7c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
3880*7c478bd9Sstevel@tonic-gate 		/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
3881*7c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
3882*7c478bd9Sstevel@tonic-gate 		break;
3883*7c478bd9Sstevel@tonic-gate 	    case RECV_BUFFER:
3884*7c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
3885*7c478bd9Sstevel@tonic-gate 		/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
3886*7c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
3887*7c478bd9Sstevel@tonic-gate 		break;
3888*7c478bd9Sstevel@tonic-gate 	    default:
3889*7c478bd9Sstevel@tonic-gate 		goto fail;
3890*7c478bd9Sstevel@tonic-gate 	}
3891*7c478bd9Sstevel@tonic-gate 
3892*7c478bd9Sstevel@tonic-gate 	/*
3893*7c478bd9Sstevel@tonic-gate 	 * Register the pool.
3894*7c478bd9Sstevel@tonic-gate 	 */
3895*7c478bd9Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
3896*7c478bd9Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
3897*7c478bd9Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
3898*7c478bd9Sstevel@tonic-gate 			sizeof (ibt_mr_hdl_t), KM_SLEEP);
3899*7c478bd9Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
3900*7c478bd9Sstevel@tonic-gate 			sizeof (ibt_mr_desc_t), KM_SLEEP);
3901*7c478bd9Sstevel@tonic-gate 
3902*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
3903*7c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
3904*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
3905*7c478bd9Sstevel@tonic-gate 		goto fail;
3906*7c478bd9Sstevel@tonic-gate 	}
3907*7c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
3908*7c478bd9Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
3909*7c478bd9Sstevel@tonic-gate 		mem_attr.mr_vaddr = (uint64_t)buf;
3910*7c478bd9Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
3911*7c478bd9Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
3912*7c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
3913*7c478bd9Sstevel@tonic-gate 			hca->pd_hdl, &mem_attr, &rbp->mr_hdl[i],
3914*7c478bd9Sstevel@tonic-gate 			&rbp->mr_desc[i]);
3915*7c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
3916*7c478bd9Sstevel@tonic-gate 		    for (j = 0; j < i; j++) {
3917*7c478bd9Sstevel@tonic-gate 			(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[j]);
3918*7c478bd9Sstevel@tonic-gate 		    }
3919*7c478bd9Sstevel@tonic-gate 		    rw_exit(&hca->state_lock);
3920*7c478bd9Sstevel@tonic-gate 		    goto fail;
3921*7c478bd9Sstevel@tonic-gate 		}
3922*7c478bd9Sstevel@tonic-gate 	}
3923*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
3924*7c478bd9Sstevel@tonic-gate 
3925*7c478bd9Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
3926*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
3927*7c478bd9Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
3928*7c478bd9Sstevel@tonic-gate 	}
3929*7c478bd9Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
3930*7c478bd9Sstevel@tonic-gate 	rbp->bpool = bp;
3931*7c478bd9Sstevel@tonic-gate 
3932*7c478bd9Sstevel@tonic-gate 	return (rbp);
3933*7c478bd9Sstevel@tonic-gate fail:
3934*7c478bd9Sstevel@tonic-gate 	if (bp) {
3935*7c478bd9Sstevel@tonic-gate 	    if (bp->buf)
3936*7c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
3937*7c478bd9Sstevel@tonic-gate 	    kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
3938*7c478bd9Sstevel@tonic-gate 	}
3939*7c478bd9Sstevel@tonic-gate 	if (rbp) {
3940*7c478bd9Sstevel@tonic-gate 	    if (rbp->mr_hdl)
3941*7c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
3942*7c478bd9Sstevel@tonic-gate 	    if (rbp->mr_desc)
3943*7c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
3944*7c478bd9Sstevel@tonic-gate 	    kmem_free(rbp, sizeof (rib_bufpool_t));
3945*7c478bd9Sstevel@tonic-gate 	}
3946*7c478bd9Sstevel@tonic-gate 	return (NULL);
3947*7c478bd9Sstevel@tonic-gate }
3948*7c478bd9Sstevel@tonic-gate 
3949*7c478bd9Sstevel@tonic-gate static void
3950*7c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
3951*7c478bd9Sstevel@tonic-gate {
3952*7c478bd9Sstevel@tonic-gate 	int i;
3953*7c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
3954*7c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
3955*7c478bd9Sstevel@tonic-gate 
3956*7c478bd9Sstevel@tonic-gate 	/*
3957*7c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
3958*7c478bd9Sstevel@tonic-gate 	 */
3959*7c478bd9Sstevel@tonic-gate 	switch (ptype) {
3960*7c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
3961*7c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
3962*7c478bd9Sstevel@tonic-gate 			break;
3963*7c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
3964*7c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
3965*7c478bd9Sstevel@tonic-gate 			break;
3966*7c478bd9Sstevel@tonic-gate 		default:
3967*7c478bd9Sstevel@tonic-gate 			return;
3968*7c478bd9Sstevel@tonic-gate 	}
3969*7c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
3970*7c478bd9Sstevel@tonic-gate 		return;
3971*7c478bd9Sstevel@tonic-gate 
3972*7c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
3973*7c478bd9Sstevel@tonic-gate 
3974*7c478bd9Sstevel@tonic-gate 	/*
3975*7c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
3976*7c478bd9Sstevel@tonic-gate 	 */
3977*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
3978*7c478bd9Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
3979*7c478bd9Sstevel@tonic-gate 	}
3980*7c478bd9Sstevel@tonic-gate }
3981*7c478bd9Sstevel@tonic-gate 
3982*7c478bd9Sstevel@tonic-gate static void
3983*7c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
3984*7c478bd9Sstevel@tonic-gate {
3985*7c478bd9Sstevel@tonic-gate 
3986*7c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
3987*7c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
3988*7c478bd9Sstevel@tonic-gate 
3989*7c478bd9Sstevel@tonic-gate 	/*
3990*7c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
3991*7c478bd9Sstevel@tonic-gate 	 */
3992*7c478bd9Sstevel@tonic-gate 	switch (ptype) {
3993*7c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
3994*7c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
3995*7c478bd9Sstevel@tonic-gate 			break;
3996*7c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
3997*7c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
3998*7c478bd9Sstevel@tonic-gate 			break;
3999*7c478bd9Sstevel@tonic-gate 		default:
4000*7c478bd9Sstevel@tonic-gate 			return;
4001*7c478bd9Sstevel@tonic-gate 	}
4002*7c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
4003*7c478bd9Sstevel@tonic-gate 		return;
4004*7c478bd9Sstevel@tonic-gate 
4005*7c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
4006*7c478bd9Sstevel@tonic-gate 
4007*7c478bd9Sstevel@tonic-gate 	/*
4008*7c478bd9Sstevel@tonic-gate 	 * Free the pool memory.
4009*7c478bd9Sstevel@tonic-gate 	 */
4010*7c478bd9Sstevel@tonic-gate 	if (rbp->mr_hdl)
4011*7c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
4012*7c478bd9Sstevel@tonic-gate 
4013*7c478bd9Sstevel@tonic-gate 	if (rbp->mr_desc)
4014*7c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
4015*7c478bd9Sstevel@tonic-gate 
4016*7c478bd9Sstevel@tonic-gate 	if (bp->buf)
4017*7c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
4018*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
4019*7c478bd9Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
4020*7c478bd9Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
4021*7c478bd9Sstevel@tonic-gate }
4022*7c478bd9Sstevel@tonic-gate 
4023*7c478bd9Sstevel@tonic-gate void
4024*7c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
4025*7c478bd9Sstevel@tonic-gate {
4026*7c478bd9Sstevel@tonic-gate 	/*
4027*7c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
4028*7c478bd9Sstevel@tonic-gate 	 */
4029*7c478bd9Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
4030*7c478bd9Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
4031*7c478bd9Sstevel@tonic-gate }
4032*7c478bd9Sstevel@tonic-gate 
4033*7c478bd9Sstevel@tonic-gate /*
4034*7c478bd9Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
4035*7c478bd9Sstevel@tonic-gate  */
4036*7c478bd9Sstevel@tonic-gate static rdma_stat
4037*7c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
4038*7c478bd9Sstevel@tonic-gate {
4039*7c478bd9Sstevel@tonic-gate 
4040*7c478bd9Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
4041*7c478bd9Sstevel@tonic-gate 	if (rdbuf->addr) {
4042*7c478bd9Sstevel@tonic-gate 		switch (rdbuf->type) {
4043*7c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
4044*7c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
4045*7c478bd9Sstevel@tonic-gate 			break;
4046*7c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
4047*7c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
4048*7c478bd9Sstevel@tonic-gate 			break;
4049*7c478bd9Sstevel@tonic-gate 		default:
4050*7c478bd9Sstevel@tonic-gate 			rdbuf->len = 0;
4051*7c478bd9Sstevel@tonic-gate 		}
4052*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
4053*7c478bd9Sstevel@tonic-gate 	} else
4054*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
4055*7c478bd9Sstevel@tonic-gate }
4056*7c478bd9Sstevel@tonic-gate 
4057*7c478bd9Sstevel@tonic-gate 
4058*7c478bd9Sstevel@tonic-gate /*
4059*7c478bd9Sstevel@tonic-gate  * Fetch a buffer of specified type.
4060*7c478bd9Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
4061*7c478bd9Sstevel@tonic-gate  */
4062*7c478bd9Sstevel@tonic-gate static void *
4063*7c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
4064*7c478bd9Sstevel@tonic-gate {
4065*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
4066*7c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
4067*7c478bd9Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
4068*7c478bd9Sstevel@tonic-gate 	void		*buf;
4069*7c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
4070*7c478bd9Sstevel@tonic-gate 	bufpool_t	*bp;
4071*7c478bd9Sstevel@tonic-gate 	int		i;
4072*7c478bd9Sstevel@tonic-gate 
4073*7c478bd9Sstevel@tonic-gate 	/*
4074*7c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
4075*7c478bd9Sstevel@tonic-gate 	 */
4076*7c478bd9Sstevel@tonic-gate 	switch (ptype) {
4077*7c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
4078*7c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
4079*7c478bd9Sstevel@tonic-gate 			break;
4080*7c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
4081*7c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
4082*7c478bd9Sstevel@tonic-gate 			break;
4083*7c478bd9Sstevel@tonic-gate 		default:
4084*7c478bd9Sstevel@tonic-gate 			return (NULL);
4085*7c478bd9Sstevel@tonic-gate 	}
4086*7c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
4087*7c478bd9Sstevel@tonic-gate 		return (NULL);
4088*7c478bd9Sstevel@tonic-gate 
4089*7c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
4090*7c478bd9Sstevel@tonic-gate 
4091*7c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
4092*7c478bd9Sstevel@tonic-gate 	if (bp->buffree < 0) {
4093*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_rbuf_alloc: No free buffers!");
4094*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
4095*7c478bd9Sstevel@tonic-gate 		return (NULL);
4096*7c478bd9Sstevel@tonic-gate 	}
4097*7c478bd9Sstevel@tonic-gate 
4098*7c478bd9Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
4099*7c478bd9Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
4100*7c478bd9Sstevel@tonic-gate 	rdbuf->addr = buf;
4101*7c478bd9Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
4102*7c478bd9Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
4103*7c478bd9Sstevel@tonic-gate 	    if ((ib_vaddr_t)buf == rbp->mr_desc[i].md_vaddr) {
4104*7c478bd9Sstevel@tonic-gate 		rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey;
4105*7c478bd9Sstevel@tonic-gate 		rdbuf->handle.mrc_linfo = (uint64_t)rbp->mr_hdl[i];
4106*7c478bd9Sstevel@tonic-gate 		rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey;
4107*7c478bd9Sstevel@tonic-gate 		bp->buffree--;
4108*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
4109*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs "
4110*7c478bd9Sstevel@tonic-gate 			"(type %d)\n", bp->buffree+1, ptype);
4111*7c478bd9Sstevel@tonic-gate 
4112*7c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
4113*7c478bd9Sstevel@tonic-gate 
4114*7c478bd9Sstevel@tonic-gate 		return (buf);
4115*7c478bd9Sstevel@tonic-gate 	    }
4116*7c478bd9Sstevel@tonic-gate 	}
4117*7c478bd9Sstevel@tonic-gate 	cmn_err(CE_WARN, "rib_rbuf_alloc: NO matching buf %p of "
4118*7c478bd9Sstevel@tonic-gate 		"type %d found!", buf, ptype);
4119*7c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
4120*7c478bd9Sstevel@tonic-gate 
4121*7c478bd9Sstevel@tonic-gate 	return (NULL);
4122*7c478bd9Sstevel@tonic-gate }
4123*7c478bd9Sstevel@tonic-gate 
4124*7c478bd9Sstevel@tonic-gate static void
4125*7c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
4126*7c478bd9Sstevel@tonic-gate {
4127*7c478bd9Sstevel@tonic-gate 
4128*7c478bd9Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
4129*7c478bd9Sstevel@tonic-gate }
4130*7c478bd9Sstevel@tonic-gate 
4131*7c478bd9Sstevel@tonic-gate static void
4132*7c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
4133*7c478bd9Sstevel@tonic-gate {
4134*7c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
4135*7c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
4136*7c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
4137*7c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
4138*7c478bd9Sstevel@tonic-gate 
4139*7c478bd9Sstevel@tonic-gate 	/*
4140*7c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
4141*7c478bd9Sstevel@tonic-gate 	 */
4142*7c478bd9Sstevel@tonic-gate 	switch (ptype) {
4143*7c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
4144*7c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
4145*7c478bd9Sstevel@tonic-gate 			break;
4146*7c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
4147*7c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
4148*7c478bd9Sstevel@tonic-gate 			break;
4149*7c478bd9Sstevel@tonic-gate 		default:
4150*7c478bd9Sstevel@tonic-gate 			return;
4151*7c478bd9Sstevel@tonic-gate 	}
4152*7c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
4153*7c478bd9Sstevel@tonic-gate 		return;
4154*7c478bd9Sstevel@tonic-gate 
4155*7c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
4156*7c478bd9Sstevel@tonic-gate 
4157*7c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
4158*7c478bd9Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
4159*7c478bd9Sstevel@tonic-gate 		/*
4160*7c478bd9Sstevel@tonic-gate 		 * Should never happen
4161*7c478bd9Sstevel@tonic-gate 		 */
4162*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_rbuf_free: One (type %d) "
4163*7c478bd9Sstevel@tonic-gate 			"too many frees!", ptype);
4164*7c478bd9Sstevel@tonic-gate 		bp->buffree--;
4165*7c478bd9Sstevel@tonic-gate 	} else {
4166*7c478bd9Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
4167*7c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
4168*7c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_rbuf_free: %d free bufs "
4169*7c478bd9Sstevel@tonic-gate 			"(type %d)\n", bp->buffree+1, ptype);
4170*7c478bd9Sstevel@tonic-gate 	}
4171*7c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
4172*7c478bd9Sstevel@tonic-gate }
4173*7c478bd9Sstevel@tonic-gate 
4174*7c478bd9Sstevel@tonic-gate static rdma_stat
4175*7c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
4176*7c478bd9Sstevel@tonic-gate {
4177*7c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
4178*7c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd) {
4179*7c478bd9Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
4180*7c478bd9Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
4181*7c478bd9Sstevel@tonic-gate 	}
4182*7c478bd9Sstevel@tonic-gate 	connlist->conn_hd = cn;
4183*7c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
4184*7c478bd9Sstevel@tonic-gate 
4185*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
4186*7c478bd9Sstevel@tonic-gate }
4187*7c478bd9Sstevel@tonic-gate 
4188*7c478bd9Sstevel@tonic-gate static rdma_stat
4189*7c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
4190*7c478bd9Sstevel@tonic-gate {
4191*7c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
4192*7c478bd9Sstevel@tonic-gate 	if (cn->c_prev) {
4193*7c478bd9Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
4194*7c478bd9Sstevel@tonic-gate 	}
4195*7c478bd9Sstevel@tonic-gate 	if (cn->c_next) {
4196*7c478bd9Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
4197*7c478bd9Sstevel@tonic-gate 	}
4198*7c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
4199*7c478bd9Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
4200*7c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
4201*7c478bd9Sstevel@tonic-gate 
4202*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
4203*7c478bd9Sstevel@tonic-gate }
4204*7c478bd9Sstevel@tonic-gate 
4205*7c478bd9Sstevel@tonic-gate /*
4206*7c478bd9Sstevel@tonic-gate  * Connection management.
4207*7c478bd9Sstevel@tonic-gate  * IBTF does not support recycling of channels. So connections are only
4208*7c478bd9Sstevel@tonic-gate  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR or
4209*7c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state. No C_IDLE state.
4210*7c478bd9Sstevel@tonic-gate  * C_CONN_PEND state: Connection establishment in progress to the server.
4211*7c478bd9Sstevel@tonic-gate  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
4212*7c478bd9Sstevel@tonic-gate  * It has an RC channel associated with it. ibt_post_send/recv are allowed
4213*7c478bd9Sstevel@tonic-gate  * only in this state.
4214*7c478bd9Sstevel@tonic-gate  * C_ERROR state: A connection transitions to this state when WRs on the
4215*7c478bd9Sstevel@tonic-gate  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
4216*7c478bd9Sstevel@tonic-gate  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
4217*7c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state: When a connection is in C_ERROR state and when
4218*7c478bd9Sstevel@tonic-gate  * c_ref drops to 0 (this indicates that RPC has no more references to this
4219*7c478bd9Sstevel@tonic-gate  * connection), the connection should be destroyed. A connection transitions
4220*7c478bd9Sstevel@tonic-gate  * into this state when it is being destroyed.
4221*7c478bd9Sstevel@tonic-gate  */
4222*7c478bd9Sstevel@tonic-gate static rdma_stat
4223*7c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn)
4224*7c478bd9Sstevel@tonic-gate {
4225*7c478bd9Sstevel@tonic-gate 	CONN *cn;
4226*7c478bd9Sstevel@tonic-gate 	int status = RDMA_SUCCESS;
4227*7c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (rib_hca_t *)handle;
4228*7c478bd9Sstevel@tonic-gate 	rib_qp_t *qp;
4229*7c478bd9Sstevel@tonic-gate 	clock_t cv_stat, timout;
4230*7c478bd9Sstevel@tonic-gate 	ibt_path_info_t path;
4231*7c478bd9Sstevel@tonic-gate 
4232*7c478bd9Sstevel@tonic-gate again:
4233*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
4234*7c478bd9Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
4235*7c478bd9Sstevel@tonic-gate 	while (cn != NULL) {
4236*7c478bd9Sstevel@tonic-gate 		/*
4237*7c478bd9Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
4238*7c478bd9Sstevel@tonic-gate 		 */
4239*7c478bd9Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
4240*7c478bd9Sstevel@tonic-gate 		if (cn->c_state == C_ERROR) {
4241*7c478bd9Sstevel@tonic-gate 			if (cn->c_ref == 0) {
4242*7c478bd9Sstevel@tonic-gate 				/*
4243*7c478bd9Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
4244*7c478bd9Sstevel@tonic-gate 				 */
4245*7c478bd9Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
4246*7c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
4247*7c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
4248*7c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(cn,
4249*7c478bd9Sstevel@tonic-gate 				    &hca->cl_conn_list);
4250*7c478bd9Sstevel@tonic-gate 				goto again;
4251*7c478bd9Sstevel@tonic-gate 			}
4252*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
4253*7c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
4254*7c478bd9Sstevel@tonic-gate 			continue;
4255*7c478bd9Sstevel@tonic-gate 		} else if (cn->c_state == C_DISCONN_PEND) {
4256*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
4257*7c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
4258*7c478bd9Sstevel@tonic-gate 			continue;
4259*7c478bd9Sstevel@tonic-gate 		}
4260*7c478bd9Sstevel@tonic-gate 		if ((cn->c_raddr.len == svcaddr->len) &&
4261*7c478bd9Sstevel@tonic-gate 		    bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) {
4262*7c478bd9Sstevel@tonic-gate 			/*
4263*7c478bd9Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
4264*7c478bd9Sstevel@tonic-gate 			 * as we are done traversing the list.
4265*7c478bd9Sstevel@tonic-gate 			 */
4266*7c478bd9Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
4267*7c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
4268*7c478bd9Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
4269*7c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
4270*7c478bd9Sstevel@tonic-gate 				*conn = cn;
4271*7c478bd9Sstevel@tonic-gate 				return (status);
4272*7c478bd9Sstevel@tonic-gate 			}
4273*7c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
4274*7c478bd9Sstevel@tonic-gate 				/*
4275*7c478bd9Sstevel@tonic-gate 				 * Hold a reference to this conn before
4276*7c478bd9Sstevel@tonic-gate 				 * we give up the lock.
4277*7c478bd9Sstevel@tonic-gate 				 */
4278*7c478bd9Sstevel@tonic-gate 				cn->c_ref++;
4279*7c478bd9Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
4280*7c478bd9Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
4281*7c478bd9Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
4282*7c478bd9Sstevel@tonic-gate 					&cn->c_lock, timout)) > 0 &&
4283*7c478bd9Sstevel@tonic-gate 					cn->c_state == C_CONN_PEND)
4284*7c478bd9Sstevel@tonic-gate 					;
4285*7c478bd9Sstevel@tonic-gate 				if (cv_stat == 0) {
4286*7c478bd9Sstevel@tonic-gate 					cn->c_ref--;
4287*7c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
4288*7c478bd9Sstevel@tonic-gate 					return (RDMA_INTR);
4289*7c478bd9Sstevel@tonic-gate 				}
4290*7c478bd9Sstevel@tonic-gate 				if (cv_stat < 0) {
4291*7c478bd9Sstevel@tonic-gate 					cn->c_ref--;
4292*7c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
4293*7c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
4294*7c478bd9Sstevel@tonic-gate 				}
4295*7c478bd9Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
4296*7c478bd9Sstevel@tonic-gate 					*conn = cn;
4297*7c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
4298*7c478bd9Sstevel@tonic-gate 					return (status);
4299*7c478bd9Sstevel@tonic-gate 				} else {
4300*7c478bd9Sstevel@tonic-gate 					cn->c_ref--;
4301*7c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
4302*7c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
4303*7c478bd9Sstevel@tonic-gate 				}
4304*7c478bd9Sstevel@tonic-gate 			}
4305*7c478bd9Sstevel@tonic-gate 		}
4306*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
4307*7c478bd9Sstevel@tonic-gate 		cn = cn->c_next;
4308*7c478bd9Sstevel@tonic-gate 	}
4309*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
4310*7c478bd9Sstevel@tonic-gate 
4311*7c478bd9Sstevel@tonic-gate 	status = rib_chk_srv_ats(hca, svcaddr, addr_type, &path);
4312*7c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
4313*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4314*7c478bd9Sstevel@tonic-gate 		if (rib_debug) {
4315*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_get: "
4316*7c478bd9Sstevel@tonic-gate 				"No server ATS record!");
4317*7c478bd9Sstevel@tonic-gate 		}
4318*7c478bd9Sstevel@tonic-gate #endif
4319*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
4320*7c478bd9Sstevel@tonic-gate 	}
4321*7c478bd9Sstevel@tonic-gate 
4322*7c478bd9Sstevel@tonic-gate 	/*
4323*7c478bd9Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
4324*7c478bd9Sstevel@tonic-gate 	 */
4325*7c478bd9Sstevel@tonic-gate 	if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) {
4326*7c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
4327*7c478bd9Sstevel@tonic-gate 	}
4328*7c478bd9Sstevel@tonic-gate 	cn = qptoc(qp);
4329*7c478bd9Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
4330*7c478bd9Sstevel@tonic-gate 	cn->c_ref = 1;
4331*7c478bd9Sstevel@tonic-gate 
4332*7c478bd9Sstevel@tonic-gate 	/*
4333*7c478bd9Sstevel@tonic-gate 	 * Add to conn list.
4334*7c478bd9Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
4335*7c478bd9Sstevel@tonic-gate 	 * another thread might have created the connection we are
4336*7c478bd9Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
4337*7c478bd9Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
4338*7c478bd9Sstevel@tonic-gate 	 * of one. If we really want to close that window,
4339*7c478bd9Sstevel@tonic-gate 	 * then need to check the list after acquiring the
4340*7c478bd9Sstevel@tonic-gate 	 * WRITER lock.
4341*7c478bd9Sstevel@tonic-gate 	 */
4342*7c478bd9Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
4343*7c478bd9Sstevel@tonic-gate 	status = rib_conn_to_srv(hca, qp, &path);
4344*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
4345*7c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
4346*7c478bd9Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
4347*7c478bd9Sstevel@tonic-gate 		*conn = cn;
4348*7c478bd9Sstevel@tonic-gate 	} else {
4349*7c478bd9Sstevel@tonic-gate 		cn->c_state = C_ERROR;
4350*7c478bd9Sstevel@tonic-gate 		cn->c_ref--;
4351*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
4352*7c478bd9Sstevel@tonic-gate 		if (rib_debug) {
4353*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_get: FAILED creating"
4354*7c478bd9Sstevel@tonic-gate 			    " a channel!");
4355*7c478bd9Sstevel@tonic-gate 		}
4356*7c478bd9Sstevel@tonic-gate #endif
4357*7c478bd9Sstevel@tonic-gate 	}
4358*7c478bd9Sstevel@tonic-gate 	cv_broadcast(&cn->c_cv);
4359*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
4360*7c478bd9Sstevel@tonic-gate 	return (status);
4361*7c478bd9Sstevel@tonic-gate }
4362*7c478bd9Sstevel@tonic-gate 
4363*7c478bd9Sstevel@tonic-gate static rdma_stat
4364*7c478bd9Sstevel@tonic-gate rib_conn_release(CONN *conn)
4365*7c478bd9Sstevel@tonic-gate {
4366*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
4367*7c478bd9Sstevel@tonic-gate 
4368*7c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
4369*7c478bd9Sstevel@tonic-gate 	conn->c_ref--;
4370*7c478bd9Sstevel@tonic-gate 
4371*7c478bd9Sstevel@tonic-gate 	/*
4372*7c478bd9Sstevel@tonic-gate 	 * If a conn is C_ERROR, close the channel.
4373*7c478bd9Sstevel@tonic-gate 	 * If it's CONNECTED, keep it that way.
4374*7c478bd9Sstevel@tonic-gate 	 */
4375*7c478bd9Sstevel@tonic-gate 	if (conn->c_ref == 0 && (conn->c_state &  C_ERROR)) {
4376*7c478bd9Sstevel@tonic-gate 		conn->c_state = C_DISCONN_PEND;
4377*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4378*7c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER)
4379*7c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
4380*7c478bd9Sstevel@tonic-gate 			    &qp->hca->srv_conn_list);
4381*7c478bd9Sstevel@tonic-gate 		else
4382*7c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
4383*7c478bd9Sstevel@tonic-gate 			    &qp->hca->cl_conn_list);
4384*7c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
4385*7c478bd9Sstevel@tonic-gate 	}
4386*7c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
4387*7c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
4388*7c478bd9Sstevel@tonic-gate }
4389*7c478bd9Sstevel@tonic-gate 
4390*7c478bd9Sstevel@tonic-gate /*
4391*7c478bd9Sstevel@tonic-gate  * Add at front of list
4392*7c478bd9Sstevel@tonic-gate  */
4393*7c478bd9Sstevel@tonic-gate static struct rdma_done_list *
4394*7c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
4395*7c478bd9Sstevel@tonic-gate {
4396*7c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
4397*7c478bd9Sstevel@tonic-gate 
4398*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
4399*7c478bd9Sstevel@tonic-gate 
4400*7c478bd9Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
4401*7c478bd9Sstevel@tonic-gate 	rd->xid = xid;
4402*7c478bd9Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
4403*7c478bd9Sstevel@tonic-gate 
4404*7c478bd9Sstevel@tonic-gate 	rd->prev = NULL;
4405*7c478bd9Sstevel@tonic-gate 	rd->next = qp->rdlist;
4406*7c478bd9Sstevel@tonic-gate 	if (qp->rdlist != NULL)
4407*7c478bd9Sstevel@tonic-gate 		qp->rdlist->prev = rd;
4408*7c478bd9Sstevel@tonic-gate 	qp->rdlist = rd;
4409*7c478bd9Sstevel@tonic-gate 
4410*7c478bd9Sstevel@tonic-gate 	return (rd);
4411*7c478bd9Sstevel@tonic-gate }
4412*7c478bd9Sstevel@tonic-gate 
4413*7c478bd9Sstevel@tonic-gate static void
4414*7c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
4415*7c478bd9Sstevel@tonic-gate {
4416*7c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r;
4417*7c478bd9Sstevel@tonic-gate 
4418*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
4419*7c478bd9Sstevel@tonic-gate 
4420*7c478bd9Sstevel@tonic-gate 	r = rd->next;
4421*7c478bd9Sstevel@tonic-gate 	if (r != NULL) {
4422*7c478bd9Sstevel@tonic-gate 		r->prev = rd->prev;
4423*7c478bd9Sstevel@tonic-gate 	}
4424*7c478bd9Sstevel@tonic-gate 
4425*7c478bd9Sstevel@tonic-gate 	r = rd->prev;
4426*7c478bd9Sstevel@tonic-gate 	if (r != NULL) {
4427*7c478bd9Sstevel@tonic-gate 		r->next = rd->next;
4428*7c478bd9Sstevel@tonic-gate 	} else {
4429*7c478bd9Sstevel@tonic-gate 		qp->rdlist = rd->next;
4430*7c478bd9Sstevel@tonic-gate 	}
4431*7c478bd9Sstevel@tonic-gate 
4432*7c478bd9Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
4433*7c478bd9Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
4434*7c478bd9Sstevel@tonic-gate }
4435*7c478bd9Sstevel@tonic-gate 
4436*7c478bd9Sstevel@tonic-gate static void
4437*7c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
4438*7c478bd9Sstevel@tonic-gate {
4439*7c478bd9Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
4440*7c478bd9Sstevel@tonic-gate 
4441*7c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
4442*7c478bd9Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
4443*7c478bd9Sstevel@tonic-gate 		n = r->next;
4444*7c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, r);
4445*7c478bd9Sstevel@tonic-gate 	}
4446*7c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
4447*7c478bd9Sstevel@tonic-gate }
4448*7c478bd9Sstevel@tonic-gate 
4449*7c478bd9Sstevel@tonic-gate static void
4450*7c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
4451*7c478bd9Sstevel@tonic-gate {
4452*7c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
4453*7c478bd9Sstevel@tonic-gate 
4454*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
4455*7c478bd9Sstevel@tonic-gate 
4456*7c478bd9Sstevel@tonic-gate 	while (r) {
4457*7c478bd9Sstevel@tonic-gate 		if (r->xid == xid) {
4458*7c478bd9Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
4459*7c478bd9Sstevel@tonic-gate 			return;
4460*7c478bd9Sstevel@tonic-gate 		} else {
4461*7c478bd9Sstevel@tonic-gate 			r = r->next;
4462*7c478bd9Sstevel@tonic-gate 		}
4463*7c478bd9Sstevel@tonic-gate 	}
4464*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
4465*7c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rdma_done_notify: "
4466*7c478bd9Sstevel@tonic-gate 		"No matching xid for %u, qp %p\n", xid, (void *)qp);
4467*7c478bd9Sstevel@tonic-gate 	}
4468*7c478bd9Sstevel@tonic-gate }
4469*7c478bd9Sstevel@tonic-gate 
4470*7c478bd9Sstevel@tonic-gate rpcib_ats_t *
4471*7c478bd9Sstevel@tonic-gate get_ibd_entry(ib_gid_t *gid, ib_pkey_t pkey, rpcib_ibd_insts_t *ibds)
4472*7c478bd9Sstevel@tonic-gate {
4473*7c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
4474*7c478bd9Sstevel@tonic-gate 	int			i;
4475*7c478bd9Sstevel@tonic-gate 
4476*7c478bd9Sstevel@tonic-gate 	for (i = 0, atsp = ibds->rib_ats; i < ibds->rib_ibd_cnt; i++, atsp++) {
4477*7c478bd9Sstevel@tonic-gate 		if (atsp->ras_port_gid.gid_prefix == gid->gid_prefix &&
4478*7c478bd9Sstevel@tonic-gate 		    atsp->ras_port_gid.gid_guid == gid->gid_guid &&
4479*7c478bd9Sstevel@tonic-gate 		    atsp->ras_pkey == pkey) {
4480*7c478bd9Sstevel@tonic-gate 			return (atsp);
4481*7c478bd9Sstevel@tonic-gate 		}
4482*7c478bd9Sstevel@tonic-gate 	}
4483*7c478bd9Sstevel@tonic-gate 	return (NULL);
4484*7c478bd9Sstevel@tonic-gate }
4485*7c478bd9Sstevel@tonic-gate 
4486*7c478bd9Sstevel@tonic-gate int
4487*7c478bd9Sstevel@tonic-gate rib_get_ibd_insts_cb(dev_info_t *dip, void *arg)
4488*7c478bd9Sstevel@tonic-gate {
4489*7c478bd9Sstevel@tonic-gate 	rpcib_ibd_insts_t *ibds = (rpcib_ibd_insts_t *)arg;
4490*7c478bd9Sstevel@tonic-gate 	rpcib_ats_t	*atsp;
4491*7c478bd9Sstevel@tonic-gate 	ib_pkey_t	pkey;
4492*7c478bd9Sstevel@tonic-gate 	uint8_t		port;
4493*7c478bd9Sstevel@tonic-gate 	ib_guid_t	hca_guid;
4494*7c478bd9Sstevel@tonic-gate 	ib_gid_t	port_gid;
4495*7c478bd9Sstevel@tonic-gate 
4496*7c478bd9Sstevel@tonic-gate 	if ((i_ddi_node_state(dip) >= DS_ATTACHED) &&
4497*7c478bd9Sstevel@tonic-gate 	    (strcmp(ddi_node_name(dip), "ibport") == 0) &&
4498*7c478bd9Sstevel@tonic-gate 	    (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) {
4499*7c478bd9Sstevel@tonic-gate 
4500*7c478bd9Sstevel@tonic-gate 		if (ibds->rib_ibd_cnt >= ibds->rib_ibd_alloc) {
4501*7c478bd9Sstevel@tonic-gate 		    rpcib_ats_t	*tmp;
4502*7c478bd9Sstevel@tonic-gate 
4503*7c478bd9Sstevel@tonic-gate 		    tmp = (rpcib_ats_t *)kmem_zalloc((ibds->rib_ibd_alloc +
4504*7c478bd9Sstevel@tonic-gate 			N_IBD_INSTANCES) * sizeof (rpcib_ats_t), KM_SLEEP);
4505*7c478bd9Sstevel@tonic-gate 		    bcopy(ibds->rib_ats, tmp,
4506*7c478bd9Sstevel@tonic-gate 			ibds->rib_ibd_alloc * sizeof (rpcib_ats_t));
4507*7c478bd9Sstevel@tonic-gate 		    kmem_free(ibds->rib_ats,
4508*7c478bd9Sstevel@tonic-gate 			ibds->rib_ibd_alloc * sizeof (rpcib_ats_t));
4509*7c478bd9Sstevel@tonic-gate 		    ibds->rib_ats = tmp;
4510*7c478bd9Sstevel@tonic-gate 		    ibds->rib_ibd_alloc += N_IBD_INSTANCES;
4511*7c478bd9Sstevel@tonic-gate 		}
4512*7c478bd9Sstevel@tonic-gate 		if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY,
4513*7c478bd9Sstevel@tonic-gate 			dip, 0, "hca-guid", 0)) == 0) ||
4514*7c478bd9Sstevel@tonic-gate 		    ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
4515*7c478bd9Sstevel@tonic-gate 			0, "port-number", 0)) == 0) ||
4516*7c478bd9Sstevel@tonic-gate 		    (ibt_get_port_state_byguid(hca_guid, port,
4517*7c478bd9Sstevel@tonic-gate 			&port_gid, NULL) != IBT_SUCCESS) ||
4518*7c478bd9Sstevel@tonic-gate 		    ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
4519*7c478bd9Sstevel@tonic-gate 			"port-pkey", IB_PKEY_INVALID_LIMITED)) <=
4520*7c478bd9Sstevel@tonic-gate 			IB_PKEY_INVALID_FULL)) {
4521*7c478bd9Sstevel@tonic-gate 		    return (DDI_WALK_CONTINUE);
4522*7c478bd9Sstevel@tonic-gate 		}
4523*7c478bd9Sstevel@tonic-gate 		atsp = &ibds->rib_ats[ibds->rib_ibd_cnt];
4524*7c478bd9Sstevel@tonic-gate 		atsp->ras_inst = ddi_get_instance(dip);
4525*7c478bd9Sstevel@tonic-gate 		atsp->ras_pkey = pkey;
4526*7c478bd9Sstevel@tonic-gate 		atsp->ras_port_gid = port_gid;
4527*7c478bd9Sstevel@tonic-gate 		ibds->rib_ibd_cnt++;
4528*7c478bd9Sstevel@tonic-gate 	}
4529*7c478bd9Sstevel@tonic-gate 	return (DDI_WALK_CONTINUE);
4530*7c478bd9Sstevel@tonic-gate }
4531*7c478bd9Sstevel@tonic-gate 
4532*7c478bd9Sstevel@tonic-gate void
4533*7c478bd9Sstevel@tonic-gate rib_get_ibd_insts(rpcib_ibd_insts_t *ibds)
4534*7c478bd9Sstevel@tonic-gate {
4535*7c478bd9Sstevel@tonic-gate 	ddi_walk_devs(ddi_root_node(), rib_get_ibd_insts_cb, ibds);
4536*7c478bd9Sstevel@tonic-gate }
4537*7c478bd9Sstevel@tonic-gate 
4538*7c478bd9Sstevel@tonic-gate /*
4539*7c478bd9Sstevel@tonic-gate  * Return ibd interfaces and ibd instances.
4540*7c478bd9Sstevel@tonic-gate  */
4541*7c478bd9Sstevel@tonic-gate int
4542*7c478bd9Sstevel@tonic-gate get_ibd_ipaddr(rpcib_ibd_insts_t *ibds)
4543*7c478bd9Sstevel@tonic-gate {
4544*7c478bd9Sstevel@tonic-gate 	TIUSER			*tiptr, *tiptr6;
4545*7c478bd9Sstevel@tonic-gate 	vnode_t			*kvp, *kvp6;
4546*7c478bd9Sstevel@tonic-gate 	vnode_t			*vp = NULL, *vp6 = NULL;
4547*7c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
4548*7c478bd9Sstevel@tonic-gate 	struct lifreq		lif_req;
4549*7c478bd9Sstevel@tonic-gate 	int			k, ip_cnt;
4550*7c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
4551*7c478bd9Sstevel@tonic-gate 
4552*7c478bd9Sstevel@tonic-gate 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP,
4553*7c478bd9Sstevel@tonic-gate 		&kvp) == 0) {
4554*7c478bd9Sstevel@tonic-gate 	    if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
4555*7c478bd9Sstevel@tonic-gate 		&tiptr, CRED()) == 0) {
4556*7c478bd9Sstevel@tonic-gate 		vp = tiptr->fp->f_vnode;
4557*7c478bd9Sstevel@tonic-gate 	    } else {
4558*7c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
4559*7c478bd9Sstevel@tonic-gate 	    }
4560*7c478bd9Sstevel@tonic-gate 	}
4561*7c478bd9Sstevel@tonic-gate 
4562*7c478bd9Sstevel@tonic-gate 	if (lookupname("/dev/udp6", UIO_SYSSPACE, FOLLOW, NULLVPP,
4563*7c478bd9Sstevel@tonic-gate 		&kvp6) == 0) {
4564*7c478bd9Sstevel@tonic-gate 	    if (t_kopen((file_t *)NULL, kvp6->v_rdev, FREAD|FWRITE,
4565*7c478bd9Sstevel@tonic-gate 		&tiptr6, CRED()) == 0) {
4566*7c478bd9Sstevel@tonic-gate 		vp6 = tiptr6->fp->f_vnode;
4567*7c478bd9Sstevel@tonic-gate 	    } else {
4568*7c478bd9Sstevel@tonic-gate 		VN_RELE(kvp6);
4569*7c478bd9Sstevel@tonic-gate 	    }
4570*7c478bd9Sstevel@tonic-gate 	}
4571*7c478bd9Sstevel@tonic-gate 
4572*7c478bd9Sstevel@tonic-gate 	if (vp == NULL && vp6 == NULL)
4573*7c478bd9Sstevel@tonic-gate 		return (-1);
4574*7c478bd9Sstevel@tonic-gate 
4575*7c478bd9Sstevel@tonic-gate 	/* Get ibd ip's */
4576*7c478bd9Sstevel@tonic-gate 	ip_cnt = 0;
4577*7c478bd9Sstevel@tonic-gate 	for (k = 0, atsp = ibds->rib_ats; k < ibds->rib_ibd_cnt; k++, atsp++) {
4578*7c478bd9Sstevel@tonic-gate 		/* IPv4 */
4579*7c478bd9Sstevel@tonic-gate 	    if (vp != NULL) {
4580*7c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&lif_req, sizeof (struct lifreq));
4581*7c478bd9Sstevel@tonic-gate 		(void) snprintf(lif_req.lifr_name,
4582*7c478bd9Sstevel@tonic-gate 			sizeof (lif_req.lifr_name), "%s%d",
4583*7c478bd9Sstevel@tonic-gate 			IBD_NAME, atsp->ras_inst);
4584*7c478bd9Sstevel@tonic-gate 
4585*7c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&iocb, sizeof (struct strioctl));
4586*7c478bd9Sstevel@tonic-gate 		iocb.ic_cmd = SIOCGLIFADDR;
4587*7c478bd9Sstevel@tonic-gate 		iocb.ic_timout = 0;
4588*7c478bd9Sstevel@tonic-gate 		iocb.ic_len = sizeof (struct lifreq);
4589*7c478bd9Sstevel@tonic-gate 		iocb.ic_dp = (caddr_t)&lif_req;
4590*7c478bd9Sstevel@tonic-gate 		if (kstr_ioctl(vp, I_STR, (intptr_t)&iocb) == 0) {
4591*7c478bd9Sstevel@tonic-gate 		    atsp->ras_inet_type = AF_INET;
4592*7c478bd9Sstevel@tonic-gate 		    bcopy(&lif_req.lifr_addr, &atsp->ras_sin,
4593*7c478bd9Sstevel@tonic-gate 			sizeof (struct sockaddr_in));
4594*7c478bd9Sstevel@tonic-gate 		    ip_cnt++;
4595*7c478bd9Sstevel@tonic-gate 		    continue;
4596*7c478bd9Sstevel@tonic-gate 		}
4597*7c478bd9Sstevel@tonic-gate 	    }
4598*7c478bd9Sstevel@tonic-gate 		/* Try IPv6 */
4599*7c478bd9Sstevel@tonic-gate 	    if (vp6 != NULL) {
4600*7c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&lif_req, sizeof (struct lifreq));
4601*7c478bd9Sstevel@tonic-gate 		(void) snprintf(lif_req.lifr_name,
4602*7c478bd9Sstevel@tonic-gate 			sizeof (lif_req.lifr_name), "%s%d",
4603*7c478bd9Sstevel@tonic-gate 			IBD_NAME, atsp->ras_inst);
4604*7c478bd9Sstevel@tonic-gate 
4605*7c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&iocb, sizeof (struct strioctl));
4606*7c478bd9Sstevel@tonic-gate 		iocb.ic_cmd = SIOCGLIFADDR;
4607*7c478bd9Sstevel@tonic-gate 		iocb.ic_timout = 0;
4608*7c478bd9Sstevel@tonic-gate 		iocb.ic_len = sizeof (struct lifreq);
4609*7c478bd9Sstevel@tonic-gate 		iocb.ic_dp = (caddr_t)&lif_req;
4610*7c478bd9Sstevel@tonic-gate 		if (kstr_ioctl(vp6, I_STR, (intptr_t)&iocb) == 0) {
4611*7c478bd9Sstevel@tonic-gate 
4612*7c478bd9Sstevel@tonic-gate 		    atsp->ras_inet_type = AF_INET6;
4613*7c478bd9Sstevel@tonic-gate 		    bcopy(&lif_req.lifr_addr, &atsp->ras_sin6,
4614*7c478bd9Sstevel@tonic-gate 			    sizeof (struct sockaddr_in6));
4615*7c478bd9Sstevel@tonic-gate 		    ip_cnt++;
4616*7c478bd9Sstevel@tonic-gate 		}
4617*7c478bd9Sstevel@tonic-gate 	    }
4618*7c478bd9Sstevel@tonic-gate 	}
4619*7c478bd9Sstevel@tonic-gate 
4620*7c478bd9Sstevel@tonic-gate 	if (vp6 != NULL) {
4621*7c478bd9Sstevel@tonic-gate 	    (void) t_kclose(tiptr6, 0);
4622*7c478bd9Sstevel@tonic-gate 	    VN_RELE(kvp6);
4623*7c478bd9Sstevel@tonic-gate 	}
4624*7c478bd9Sstevel@tonic-gate 	if (vp != NULL) {
4625*7c478bd9Sstevel@tonic-gate 	    (void) t_kclose(tiptr, 0);
4626*7c478bd9Sstevel@tonic-gate 	    VN_RELE(kvp);
4627*7c478bd9Sstevel@tonic-gate 	}
4628*7c478bd9Sstevel@tonic-gate 
4629*7c478bd9Sstevel@tonic-gate 	if (ip_cnt == 0)
4630*7c478bd9Sstevel@tonic-gate 	    return (-1);
4631*7c478bd9Sstevel@tonic-gate 	else
4632*7c478bd9Sstevel@tonic-gate 	    return (0);
4633*7c478bd9Sstevel@tonic-gate }
4634*7c478bd9Sstevel@tonic-gate 
4635*7c478bd9Sstevel@tonic-gate char **
4636*7c478bd9Sstevel@tonic-gate get_ip_addrs(int *count)
4637*7c478bd9Sstevel@tonic-gate {
4638*7c478bd9Sstevel@tonic-gate 	TIUSER			*tiptr;
4639*7c478bd9Sstevel@tonic-gate 	vnode_t			*kvp;
4640*7c478bd9Sstevel@tonic-gate 	int			num_of_ifs;
4641*7c478bd9Sstevel@tonic-gate 	char			**addresses;
4642*7c478bd9Sstevel@tonic-gate 	int			return_code;
4643*7c478bd9Sstevel@tonic-gate 
4644*7c478bd9Sstevel@tonic-gate 	/*
4645*7c478bd9Sstevel@tonic-gate 	 * Open a device for doing down stream kernel ioctls
4646*7c478bd9Sstevel@tonic-gate 	 */
4647*7c478bd9Sstevel@tonic-gate 	return_code = lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW,
4648*7c478bd9Sstevel@tonic-gate 	    NULLVPP, &kvp);
4649*7c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
4650*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: lookupname failed\n");
4651*7c478bd9Sstevel@tonic-gate 		*count = -1;
4652*7c478bd9Sstevel@tonic-gate 		return (NULL);
4653*7c478bd9Sstevel@tonic-gate 	}
4654*7c478bd9Sstevel@tonic-gate 
4655*7c478bd9Sstevel@tonic-gate 	return_code = t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
4656*7c478bd9Sstevel@tonic-gate 	    &tiptr, CRED());
4657*7c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
4658*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: t_kopen failed\n");
4659*7c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
4660*7c478bd9Sstevel@tonic-gate 		*count = -1;
4661*7c478bd9Sstevel@tonic-gate 		return (NULL);
4662*7c478bd9Sstevel@tonic-gate 	}
4663*7c478bd9Sstevel@tonic-gate 
4664*7c478bd9Sstevel@tonic-gate 	/*
4665*7c478bd9Sstevel@tonic-gate 	 * Perform the first ioctl to get the number of interfaces
4666*7c478bd9Sstevel@tonic-gate 	 */
4667*7c478bd9Sstevel@tonic-gate 	return_code = get_interfaces(tiptr, &num_of_ifs);
4668*7c478bd9Sstevel@tonic-gate 	if (return_code != 0 || num_of_ifs == 0) {
4669*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: get_interfaces failed\n");
4670*7c478bd9Sstevel@tonic-gate 		(void) t_kclose(tiptr, 0);
4671*7c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
4672*7c478bd9Sstevel@tonic-gate 		*count = -1;
4673*7c478bd9Sstevel@tonic-gate 		return (NULL);
4674*7c478bd9Sstevel@tonic-gate 	}
4675*7c478bd9Sstevel@tonic-gate 
4676*7c478bd9Sstevel@tonic-gate 	/*
4677*7c478bd9Sstevel@tonic-gate 	 * Perform the second ioctl to get the address on each interface
4678*7c478bd9Sstevel@tonic-gate 	 * found.
4679*7c478bd9Sstevel@tonic-gate 	 */
4680*7c478bd9Sstevel@tonic-gate 	addresses = kmem_zalloc(num_of_ifs * sizeof (char *), KM_SLEEP);
4681*7c478bd9Sstevel@tonic-gate 	return_code = find_addrs(tiptr, addresses, num_of_ifs);
4682*7c478bd9Sstevel@tonic-gate 	if (return_code <= 0) {
4683*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: find_addrs failed\n");
4684*7c478bd9Sstevel@tonic-gate 		(void) t_kclose(tiptr, 0);
4685*7c478bd9Sstevel@tonic-gate 		kmem_free(addresses, num_of_ifs * sizeof (char *));
4686*7c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
4687*7c478bd9Sstevel@tonic-gate 		*count = -1;
4688*7c478bd9Sstevel@tonic-gate 		return (NULL);
4689*7c478bd9Sstevel@tonic-gate 	}
4690*7c478bd9Sstevel@tonic-gate 
4691*7c478bd9Sstevel@tonic-gate 	*count = return_code;
4692*7c478bd9Sstevel@tonic-gate 	VN_RELE(kvp);
4693*7c478bd9Sstevel@tonic-gate 	(void) t_kclose(tiptr, 0);
4694*7c478bd9Sstevel@tonic-gate 	return (addresses);
4695*7c478bd9Sstevel@tonic-gate }
4696*7c478bd9Sstevel@tonic-gate 
4697*7c478bd9Sstevel@tonic-gate int
4698*7c478bd9Sstevel@tonic-gate get_interfaces(TIUSER *tiptr, int *num)
4699*7c478bd9Sstevel@tonic-gate {
4700*7c478bd9Sstevel@tonic-gate 	struct lifnum		if_buf;
4701*7c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
4702*7c478bd9Sstevel@tonic-gate 	vnode_t			*vp;
4703*7c478bd9Sstevel@tonic-gate 	int			return_code;
4704*7c478bd9Sstevel@tonic-gate 
4705*7c478bd9Sstevel@tonic-gate 	/*
4706*7c478bd9Sstevel@tonic-gate 	 * Prep the number of interfaces request buffer for ioctl
4707*7c478bd9Sstevel@tonic-gate 	 */
4708*7c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&if_buf, sizeof (struct lifnum));
4709*7c478bd9Sstevel@tonic-gate 	if_buf.lifn_family = AF_UNSPEC;
4710*7c478bd9Sstevel@tonic-gate 	if_buf.lifn_flags = 0;
4711*7c478bd9Sstevel@tonic-gate 
4712*7c478bd9Sstevel@tonic-gate 	/*
4713*7c478bd9Sstevel@tonic-gate 	 * Prep the kernel ioctl buffer and send it down stream
4714*7c478bd9Sstevel@tonic-gate 	 */
4715*7c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&iocb, sizeof (struct strioctl));
4716*7c478bd9Sstevel@tonic-gate 	iocb.ic_cmd = SIOCGLIFNUM;
4717*7c478bd9Sstevel@tonic-gate 	iocb.ic_timout = 0;
4718*7c478bd9Sstevel@tonic-gate 	iocb.ic_len = sizeof (if_buf);
4719*7c478bd9Sstevel@tonic-gate 	iocb.ic_dp = (caddr_t)&if_buf;
4720*7c478bd9Sstevel@tonic-gate 
4721*7c478bd9Sstevel@tonic-gate 	vp = tiptr->fp->f_vnode;
4722*7c478bd9Sstevel@tonic-gate 	return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
4723*7c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
4724*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_interfaces: kstr_ioctl failed\n");
4725*7c478bd9Sstevel@tonic-gate 		*num = -1;
4726*7c478bd9Sstevel@tonic-gate 		return (-1);
4727*7c478bd9Sstevel@tonic-gate 	}
4728*7c478bd9Sstevel@tonic-gate 
4729*7c478bd9Sstevel@tonic-gate 	*num = if_buf.lifn_count;
4730*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
4731*7c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
4732*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Number of interfaces detected: %d\n",
4733*7c478bd9Sstevel@tonic-gate 		    if_buf.lifn_count);
4734*7c478bd9Sstevel@tonic-gate #endif
4735*7c478bd9Sstevel@tonic-gate 	return (0);
4736*7c478bd9Sstevel@tonic-gate }
4737*7c478bd9Sstevel@tonic-gate 
4738*7c478bd9Sstevel@tonic-gate int
4739*7c478bd9Sstevel@tonic-gate find_addrs(TIUSER *tiptr, char **addrs, int num_ifs)
4740*7c478bd9Sstevel@tonic-gate {
4741*7c478bd9Sstevel@tonic-gate 	struct lifconf		lifc;
4742*7c478bd9Sstevel@tonic-gate 	struct lifreq		*if_data_buf;
4743*7c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
4744*7c478bd9Sstevel@tonic-gate 	caddr_t			request_buffer;
4745*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
4746*7c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
4747*7c478bd9Sstevel@tonic-gate 	vnode_t			*vp;
4748*7c478bd9Sstevel@tonic-gate 	int			i, count, return_code;
4749*7c478bd9Sstevel@tonic-gate 
4750*7c478bd9Sstevel@tonic-gate 	/*
4751*7c478bd9Sstevel@tonic-gate 	 * Prep the buffer for requesting all interface's info
4752*7c478bd9Sstevel@tonic-gate 	 */
4753*7c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&lifc, sizeof (struct lifconf));
4754*7c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
4755*7c478bd9Sstevel@tonic-gate 	lifc.lifc_flags = 0;
4756*7c478bd9Sstevel@tonic-gate 	lifc.lifc_len = num_ifs * sizeof (struct lifreq);
4757*7c478bd9Sstevel@tonic-gate 
4758*7c478bd9Sstevel@tonic-gate 	request_buffer = kmem_zalloc(num_ifs * sizeof (struct lifreq),
4759*7c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
4760*7c478bd9Sstevel@tonic-gate 
4761*7c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = request_buffer;
4762*7c478bd9Sstevel@tonic-gate 
4763*7c478bd9Sstevel@tonic-gate 	/*
4764*7c478bd9Sstevel@tonic-gate 	 * Prep the kernel ioctl buffer and send it down stream
4765*7c478bd9Sstevel@tonic-gate 	 */
4766*7c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&iocb, sizeof (struct strioctl));
4767*7c478bd9Sstevel@tonic-gate 	iocb.ic_cmd = SIOCGLIFCONF;
4768*7c478bd9Sstevel@tonic-gate 	iocb.ic_timout = 0;
4769*7c478bd9Sstevel@tonic-gate 	iocb.ic_len = sizeof (struct lifconf);
4770*7c478bd9Sstevel@tonic-gate 	iocb.ic_dp = (caddr_t)&lifc;
4771*7c478bd9Sstevel@tonic-gate 
4772*7c478bd9Sstevel@tonic-gate 	vp = tiptr->fp->f_vnode;
4773*7c478bd9Sstevel@tonic-gate 	return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
4774*7c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
4775*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "find_addrs: kstr_ioctl failed\n");
4776*7c478bd9Sstevel@tonic-gate 		kmem_free(request_buffer, num_ifs * sizeof (struct lifreq));
4777*7c478bd9Sstevel@tonic-gate 		return (-1);
4778*7c478bd9Sstevel@tonic-gate 	}
4779*7c478bd9Sstevel@tonic-gate 
4780*7c478bd9Sstevel@tonic-gate 	/*
4781*7c478bd9Sstevel@tonic-gate 	 * Extract addresses and fill them in the requested array
4782*7c478bd9Sstevel@tonic-gate 	 * IB_SVC_NAME_LEN is defined to be 64 so it  covers both IPv4 &
4783*7c478bd9Sstevel@tonic-gate 	 * IPv6. Here count is the number of IP addresses collected.
4784*7c478bd9Sstevel@tonic-gate 	 */
4785*7c478bd9Sstevel@tonic-gate 	if_data_buf = lifc.lifc_req;
4786*7c478bd9Sstevel@tonic-gate 	count = 0;
4787*7c478bd9Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--,
4788*7c478bd9Sstevel@tonic-gate 	if_data_buf++) {
4789*7c478bd9Sstevel@tonic-gate 		if (if_data_buf->lifr_addr.ss_family == AF_INET) {
4790*7c478bd9Sstevel@tonic-gate 			sin4 = (struct sockaddr_in *)&if_data_buf->lifr_addr;
4791*7c478bd9Sstevel@tonic-gate 			addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
4792*7c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET, &sin4->sin_addr,
4793*7c478bd9Sstevel@tonic-gate 			    addrs[count], IB_SVC_NAME_LEN);
4794*7c478bd9Sstevel@tonic-gate 			count ++;
4795*7c478bd9Sstevel@tonic-gate 		}
4796*7c478bd9Sstevel@tonic-gate 
4797*7c478bd9Sstevel@tonic-gate 		if (if_data_buf->lifr_addr.ss_family == AF_INET6) {
4798*7c478bd9Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&if_data_buf->lifr_addr;
4799*7c478bd9Sstevel@tonic-gate 			addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
4800*7c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
4801*7c478bd9Sstevel@tonic-gate 			    addrs[count], IB_SVC_NAME_LEN);
4802*7c478bd9Sstevel@tonic-gate 			count ++;
4803*7c478bd9Sstevel@tonic-gate 		}
4804*7c478bd9Sstevel@tonic-gate 	}
4805*7c478bd9Sstevel@tonic-gate 
4806*7c478bd9Sstevel@tonic-gate 	kmem_free(request_buffer, num_ifs * sizeof (struct lifreq));
4807*7c478bd9Sstevel@tonic-gate 	return (count);
4808*7c478bd9Sstevel@tonic-gate }
4809*7c478bd9Sstevel@tonic-gate 
4810*7c478bd9Sstevel@tonic-gate /*
4811*7c478bd9Sstevel@tonic-gate  * Goes through all connections and closes the channel
4812*7c478bd9Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
4813*7c478bd9Sstevel@tonic-gate  * flushed.
4814*7c478bd9Sstevel@tonic-gate  */
4815*7c478bd9Sstevel@tonic-gate static void
4816*7c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
4817*7c478bd9Sstevel@tonic-gate {
4818*7c478bd9Sstevel@tonic-gate 	CONN 		*conn;
4819*7c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
4820*7c478bd9Sstevel@tonic-gate 
4821*7c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
4822*7c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
4823*7c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
4824*7c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
4825*7c478bd9Sstevel@tonic-gate 		qp = ctoqp(conn);
4826*7c478bd9Sstevel@tonic-gate 		if (conn->c_state & C_CONNECTED) {
4827*7c478bd9Sstevel@tonic-gate 			/*
4828*7c478bd9Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
4829*7c478bd9Sstevel@tonic-gate 			 * Call ibt_close_rc_channel in nonblocking mode
4830*7c478bd9Sstevel@tonic-gate 			 * with no callbacks.
4831*7c478bd9Sstevel@tonic-gate 			 */
4832*7c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
4833*7c478bd9Sstevel@tonic-gate 			(void) ibt_close_rc_channel(qp->qp_hdl,
4834*7c478bd9Sstevel@tonic-gate 				IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 0);
4835*7c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
4836*7c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
4837*7c478bd9Sstevel@tonic-gate 		} else {
4838*7c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_ERROR &&
4839*7c478bd9Sstevel@tonic-gate 				qp->qp_hdl != NULL) {
4840*7c478bd9Sstevel@tonic-gate 				/*
4841*7c478bd9Sstevel@tonic-gate 				 * Connection in ERROR state but
4842*7c478bd9Sstevel@tonic-gate 				 * channel is not yet freed.
4843*7c478bd9Sstevel@tonic-gate 				 */
4844*7c478bd9Sstevel@tonic-gate 				(void) ibt_close_rc_channel(qp->qp_hdl,
4845*7c478bd9Sstevel@tonic-gate 					IBT_NOCALLBACKS, NULL, 0, NULL,
4846*7c478bd9Sstevel@tonic-gate 					NULL, 0);
4847*7c478bd9Sstevel@tonic-gate 				(void) ibt_free_channel(qp->qp_hdl);
4848*7c478bd9Sstevel@tonic-gate 				qp->qp_hdl = NULL;
4849*7c478bd9Sstevel@tonic-gate 			}
4850*7c478bd9Sstevel@tonic-gate 		}
4851*7c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
4852*7c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
4853*7c478bd9Sstevel@tonic-gate 	}
4854*7c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
4855*7c478bd9Sstevel@tonic-gate }
4856*7c478bd9Sstevel@tonic-gate 
4857*7c478bd9Sstevel@tonic-gate /*
4858*7c478bd9Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
4859*7c478bd9Sstevel@tonic-gate  */
4860*7c478bd9Sstevel@tonic-gate static void
4861*7c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
4862*7c478bd9Sstevel@tonic-gate {
4863*7c478bd9Sstevel@tonic-gate 	CONN 		*conn;
4864*7c478bd9Sstevel@tonic-gate 
4865*7c478bd9Sstevel@tonic-gate top:
4866*7c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
4867*7c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
4868*7c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
4869*7c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
4870*7c478bd9Sstevel@tonic-gate 
4871*7c478bd9Sstevel@tonic-gate 		/*
4872*7c478bd9Sstevel@tonic-gate 		 * At this point connection is either in ERROR
4873*7c478bd9Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
4874*7c478bd9Sstevel@tonic-gate 		 * then some other thread is culling that connection.
4875*7c478bd9Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
4876*7c478bd9Sstevel@tonic-gate 		 */
4877*7c478bd9Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
4878*7c478bd9Sstevel@tonic-gate 			conn->c_state != C_DISCONN_PEND) {
4879*7c478bd9Sstevel@tonic-gate 			/*
4880*7c478bd9Sstevel@tonic-gate 			 * Cull the connection
4881*7c478bd9Sstevel@tonic-gate 			 */
4882*7c478bd9Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
4883*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
4884*7c478bd9Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
4885*7c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
4886*7c478bd9Sstevel@tonic-gate 			goto top;
4887*7c478bd9Sstevel@tonic-gate 		} else {
4888*7c478bd9Sstevel@tonic-gate 			/*
4889*7c478bd9Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
4890*7c478bd9Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
4891*7c478bd9Sstevel@tonic-gate 			 */
4892*7c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
4893*7c478bd9Sstevel@tonic-gate 		}
4894*7c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
4895*7c478bd9Sstevel@tonic-gate 	}
4896*7c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
4897*7c478bd9Sstevel@tonic-gate 
4898*7c478bd9Sstevel@tonic-gate 	/*
4899*7c478bd9Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
4900*7c478bd9Sstevel@tonic-gate 	 */
4901*7c478bd9Sstevel@tonic-gate }
4902*7c478bd9Sstevel@tonic-gate 
4903*7c478bd9Sstevel@tonic-gate /*
4904*7c478bd9Sstevel@tonic-gate  * Cleans and closes up all uses of the HCA
4905*7c478bd9Sstevel@tonic-gate  */
4906*7c478bd9Sstevel@tonic-gate static void
4907*7c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca)
4908*7c478bd9Sstevel@tonic-gate {
4909*7c478bd9Sstevel@tonic-gate 
4910*7c478bd9Sstevel@tonic-gate 	/*
4911*7c478bd9Sstevel@tonic-gate 	 * Stop all services on the HCA
4912*7c478bd9Sstevel@tonic-gate 	 * Go through cl_conn_list and close all rc_channels
4913*7c478bd9Sstevel@tonic-gate 	 * Go through svr_conn_list and close all rc_channels
4914*7c478bd9Sstevel@tonic-gate 	 * Free connections whose c_ref has dropped to 0
4915*7c478bd9Sstevel@tonic-gate 	 * Destroy all CQs
4916*7c478bd9Sstevel@tonic-gate 	 * Deregister and released all buffer pool memory after all
4917*7c478bd9Sstevel@tonic-gate 	 * connections are destroyed
4918*7c478bd9Sstevel@tonic-gate 	 * Free the protection domain
4919*7c478bd9Sstevel@tonic-gate 	 * ibt_close_hca()
4920*7c478bd9Sstevel@tonic-gate 	 */
4921*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_WRITER);
4922*7c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_DETACHED) {
4923*7c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
4924*7c478bd9Sstevel@tonic-gate 		return;
4925*7c478bd9Sstevel@tonic-gate 	}
4926*7c478bd9Sstevel@tonic-gate 
4927*7c478bd9Sstevel@tonic-gate 	hca->state = HCA_DETACHED;
4928*7c478bd9Sstevel@tonic-gate 	rib_stat->nhca_inited--;
4929*7c478bd9Sstevel@tonic-gate 
4930*7c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
4931*7c478bd9Sstevel@tonic-gate 	rib_deregister_ats();
4932*7c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
4933*7c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
4934*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
4935*7c478bd9Sstevel@tonic-gate 
4936*7c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->cl_conn_list);
4937*7c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->srv_conn_list);
4938*7c478bd9Sstevel@tonic-gate 
4939*7c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
4940*7c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
4941*7c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
4942*7c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
4943*7c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
4944*7c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
4945*7c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
4946*7c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
4947*7c478bd9Sstevel@tonic-gate 
4948*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
4949*7c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
4950*7c478bd9Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
4951*7c478bd9Sstevel@tonic-gate 		hca->cl_conn_list.conn_hd == NULL) {
4952*7c478bd9Sstevel@tonic-gate 		/*
4953*7c478bd9Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
4954*7c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
4955*7c478bd9Sstevel@tonic-gate 		 */
4956*7c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
4957*7c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
4958*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
4959*7c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
4960*7c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
4961*7c478bd9Sstevel@tonic-gate 	}
4962*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
4963*7c478bd9Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
4964*7c478bd9Sstevel@tonic-gate 
4965*7c478bd9Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
4966*7c478bd9Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
4967*7c478bd9Sstevel@tonic-gate 		while (hca->inuse)
4968*7c478bd9Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
4969*7c478bd9Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
4970*7c478bd9Sstevel@tonic-gate 		/*
4971*7c478bd9Sstevel@tonic-gate 		 * conn_lists are now NULL, so destroy
4972*7c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
4973*7c478bd9Sstevel@tonic-gate 		 */
4974*7c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
4975*7c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
4976*7c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
4977*7c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
4978*7c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
4979*7c478bd9Sstevel@tonic-gate 	}
4980*7c478bd9Sstevel@tonic-gate }
4981