xref: /titanic_53/usr/src/uts/common/rpc/rpcib.c (revision 43ed929a6988c3778f00123f4a4a8541e515ec16)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
317c478bd9Sstevel@tonic-gate  * interaction with IBTF.
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #include <sys/param.h>
357c478bd9Sstevel@tonic-gate #include <sys/types.h>
367c478bd9Sstevel@tonic-gate #include <sys/user.h>
377c478bd9Sstevel@tonic-gate #include <sys/systm.h>
387c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
397c478bd9Sstevel@tonic-gate #include <sys/proc.h>
407c478bd9Sstevel@tonic-gate #include <sys/socket.h>
417c478bd9Sstevel@tonic-gate #include <sys/file.h>
427c478bd9Sstevel@tonic-gate #include <sys/stream.h>
437c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
447c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
457c478bd9Sstevel@tonic-gate #include <sys/errno.h>
467c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
477c478bd9Sstevel@tonic-gate #include <sys/debug.h>
487c478bd9Sstevel@tonic-gate #include <sys/systm.h>
497c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
507c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
517c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
527c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
537c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
547c478bd9Sstevel@tonic-gate #include <sys/time.h>
557c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
567c478bd9Sstevel@tonic-gate #include <sys/callb.h>
577c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
587c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
617c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
627c478bd9Sstevel@tonic-gate #include <rpc/ib.h>
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
677c478bd9Sstevel@tonic-gate #include <sys/kstr.h>
687c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
697c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
707c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
717c478bd9Sstevel@tonic-gate #include <net/if.h>
727c478bd9Sstevel@tonic-gate #include <sys/cred.h>
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate extern char *inet_ntop(int, const void *, char *, int);
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate 
787c478bd9Sstevel@tonic-gate /*
797c478bd9Sstevel@tonic-gate  * Prototype declarations for driver ops
807c478bd9Sstevel@tonic-gate  */
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
837c478bd9Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
847c478bd9Sstevel@tonic-gate 			    void *, void **);
857c478bd9Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate /* rpcib cb_ops */
897c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
907c478bd9Sstevel@tonic-gate 	nulldev,		/* open */
917c478bd9Sstevel@tonic-gate 	nulldev,		/* close */
927c478bd9Sstevel@tonic-gate 	nodev,			/* strategy */
937c478bd9Sstevel@tonic-gate 	nodev,			/* print */
947c478bd9Sstevel@tonic-gate 	nodev,			/* dump */
957c478bd9Sstevel@tonic-gate 	nodev,			/* read */
967c478bd9Sstevel@tonic-gate 	nodev,			/* write */
977c478bd9Sstevel@tonic-gate 	nodev,			/* ioctl */
987c478bd9Sstevel@tonic-gate 	nodev,			/* devmap */
997c478bd9Sstevel@tonic-gate 	nodev,			/* mmap */
1007c478bd9Sstevel@tonic-gate 	nodev,			/* segmap */
1017c478bd9Sstevel@tonic-gate 	nochpoll,		/* poll */
1027c478bd9Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
1037c478bd9Sstevel@tonic-gate 	NULL,			/* stream */
1047c478bd9Sstevel@tonic-gate 	D_MP,			/* cb_flag */
1057c478bd9Sstevel@tonic-gate 	CB_REV,			/* rev */
1067c478bd9Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
1077c478bd9Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
1087c478bd9Sstevel@tonic-gate };
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate /*
1117c478bd9Sstevel@tonic-gate  * Device options
1127c478bd9Sstevel@tonic-gate  */
1137c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
1147c478bd9Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
1157c478bd9Sstevel@tonic-gate 	0,			/* refcnt  */
1167c478bd9Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
1177c478bd9Sstevel@tonic-gate 	nulldev,		/* identify */
1187c478bd9Sstevel@tonic-gate 	nulldev,		/* probe */
1197c478bd9Sstevel@tonic-gate 	rpcib_attach,		/* attach */
1207c478bd9Sstevel@tonic-gate 	rpcib_detach,		/* detach */
1217c478bd9Sstevel@tonic-gate 	nodev,			/* reset */
1227c478bd9Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
1237c478bd9Sstevel@tonic-gate 	NULL,			/* bus operations */
1247c478bd9Sstevel@tonic-gate 	NULL			/* power */
1257c478bd9Sstevel@tonic-gate };
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate /*
1287c478bd9Sstevel@tonic-gate  * Module linkage information.
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = {
1327c478bd9Sstevel@tonic-gate 	&mod_driverops,			    /* Driver module */
1337c478bd9Sstevel@tonic-gate 	"RPCIB plugin driver, ver %I%", /* Driver name and version */
1347c478bd9Sstevel@tonic-gate 	&rpcib_ops,		    /* Driver ops */
1357c478bd9Sstevel@tonic-gate };
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
1387c478bd9Sstevel@tonic-gate 	MODREV_1,
1397c478bd9Sstevel@tonic-gate 	(void *)&rib_modldrv,
1407c478bd9Sstevel@tonic-gate 	NULL
1417c478bd9Sstevel@tonic-gate };
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate /*
1447c478bd9Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
1457c478bd9Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
1467c478bd9Sstevel@tonic-gate  *	in all callbacks.
1477c478bd9Sstevel@tonic-gate  */
1487c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate #define	RNR_RETRIES	2
1517c478bd9Sstevel@tonic-gate #define	MAX_PORTS	2
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate int preposted_rbufs = 16;
1547c478bd9Sstevel@tonic-gate int send_threshold = 1;
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate /*
1577c478bd9Sstevel@tonic-gate  * State of the plugin.
1587c478bd9Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
1597c478bd9Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
1607c478bd9Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
1617c478bd9Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
1627c478bd9Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
1637c478bd9Sstevel@tonic-gate  * state for one and in no_accept state for the other.
1647c478bd9Sstevel@tonic-gate  */
1657c478bd9Sstevel@tonic-gate int		plugin_state;
1667c478bd9Sstevel@tonic-gate kmutex_t	plugin_state_lock;
1677c478bd9Sstevel@tonic-gate 
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate /*
1707c478bd9Sstevel@tonic-gate  * RPCIB RDMATF operations
1717c478bd9Sstevel@tonic-gate  */
1727c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
1737c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
1747c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
1757c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
1767c478bd9Sstevel@tonic-gate static rdma_stat rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
1777c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle);
1787c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
1797c478bd9Sstevel@tonic-gate 	struct mrc buf_handle);
1807c478bd9Sstevel@tonic-gate static rdma_stat rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
1817c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle);
1827c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
1837c478bd9Sstevel@tonic-gate 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle);
1847c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
1857c478bd9Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
1887c478bd9Sstevel@tonic-gate 
1897c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
1907c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
1957c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
1967c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
1977c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
1987c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
1997c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
2007c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
2017c478bd9Sstevel@tonic-gate static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **);
2027c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **);
2037c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
2047c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
2057c478bd9Sstevel@tonic-gate static rdma_stat rib_register_ats(rib_hca_t *);
2067c478bd9Sstevel@tonic-gate static void rib_deregister_ats();
2077c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate /*
2107c478bd9Sstevel@tonic-gate  * RPCIB addressing operations
2117c478bd9Sstevel@tonic-gate  */
2127c478bd9Sstevel@tonic-gate char ** get_ip_addrs(int *count);
2137c478bd9Sstevel@tonic-gate int get_interfaces(TIUSER *tiptr, int *num);
2147c478bd9Sstevel@tonic-gate int find_addrs(TIUSER *tiptr, char **addrs, int num_ifs);
2157c478bd9Sstevel@tonic-gate int get_ibd_ipaddr(rpcib_ibd_insts_t *);
2167c478bd9Sstevel@tonic-gate rpcib_ats_t *get_ibd_entry(ib_gid_t *, ib_pkey_t, rpcib_ibd_insts_t *);
2177c478bd9Sstevel@tonic-gate void rib_get_ibd_insts(rpcib_ibd_insts_t *);
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate /*
2217c478bd9Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
2227c478bd9Sstevel@tonic-gate  */
2237c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = {
2247c478bd9Sstevel@tonic-gate 	rib_reachable,
2257c478bd9Sstevel@tonic-gate 	rib_conn_get,
2267c478bd9Sstevel@tonic-gate 	rib_conn_release,
2277c478bd9Sstevel@tonic-gate 	rib_listen,
2287c478bd9Sstevel@tonic-gate 	rib_listen_stop,
2297c478bd9Sstevel@tonic-gate 	rib_registermem,
2307c478bd9Sstevel@tonic-gate 	rib_deregistermem,
2317c478bd9Sstevel@tonic-gate 	rib_registermemsync,
2327c478bd9Sstevel@tonic-gate 	rib_deregistermemsync,
2337c478bd9Sstevel@tonic-gate 	rib_syncmem,
2347c478bd9Sstevel@tonic-gate 	rib_reg_buf_alloc,
2357c478bd9Sstevel@tonic-gate 	rib_reg_buf_free,
2367c478bd9Sstevel@tonic-gate 	rib_send,
2377c478bd9Sstevel@tonic-gate 	rib_send_resp,
2387c478bd9Sstevel@tonic-gate 	rib_post_resp,
2397c478bd9Sstevel@tonic-gate 	rib_post_recv,
2407c478bd9Sstevel@tonic-gate 	rib_recv,
2417c478bd9Sstevel@tonic-gate 	rib_read,
2427c478bd9Sstevel@tonic-gate 	rib_write,
2437c478bd9Sstevel@tonic-gate 	rib_getinfo
2447c478bd9Sstevel@tonic-gate };
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate /*
2477c478bd9Sstevel@tonic-gate  * RDMATF RPCIB plugin details
2487c478bd9Sstevel@tonic-gate  */
2497c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = {
2507c478bd9Sstevel@tonic-gate 	"ibtf",		/* api name */
2517c478bd9Sstevel@tonic-gate 	RDMATF_VERS_1,
2527c478bd9Sstevel@tonic-gate 	0,
2537c478bd9Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
2547c478bd9Sstevel@tonic-gate };
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *);
2577c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
2587c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
2597c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
2607c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
2617c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
2627c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
2637c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
2647c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *, ibt_mr_desc_t *);
2657c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *);
2667c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
2677c478bd9Sstevel@tonic-gate 	rib_qp_t **);
2687c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
2697c478bd9Sstevel@tonic-gate 	rib_qp_t **);
2707c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
2717c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
2727c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
2737c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
2747c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
2757c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
2767c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate static void rib_async_handler(void *,
2797c478bd9Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
2807c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
2817c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
2827c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
2837c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
2847c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
2857c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
2867c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *);
2877c478bd9Sstevel@tonic-gate static rdma_stat rib_chk_srv_ats(rib_hca_t *, struct netbuf *, int,
2887c478bd9Sstevel@tonic-gate 	ibt_path_info_t *);
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate /*
2917c478bd9Sstevel@tonic-gate  * Registration with IBTF as a consumer
2927c478bd9Sstevel@tonic-gate  */
2937c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
294*43ed929aSsrust 	IBTI_V2,
2957c478bd9Sstevel@tonic-gate 	IBT_GENERIC,
2967c478bd9Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
2977c478bd9Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
2987c478bd9Sstevel@tonic-gate 	"nfs/ib"
2997c478bd9Sstevel@tonic-gate };
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate /*
3027c478bd9Sstevel@tonic-gate  * Global strucuture
3037c478bd9Sstevel@tonic-gate  */
3047c478bd9Sstevel@tonic-gate 
3057c478bd9Sstevel@tonic-gate typedef struct rpcib_s {
3067c478bd9Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
3077c478bd9Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
3087c478bd9Sstevel@tonic-gate } rpcib_t;
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate rpcib_t rpcib;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate /*
3137c478bd9Sstevel@tonic-gate  * /etc/system controlled variable to control
3147c478bd9Sstevel@tonic-gate  * debugging in rpcib kernel module.
3157c478bd9Sstevel@tonic-gate  * Set it to values greater that 1 to control
3167c478bd9Sstevel@tonic-gate  * the amount of debugging messages required.
3177c478bd9Sstevel@tonic-gate  */
3187c478bd9Sstevel@tonic-gate int rib_debug = 0;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate static int ats_running = 0;
3217c478bd9Sstevel@tonic-gate int
3227c478bd9Sstevel@tonic-gate _init(void)
3237c478bd9Sstevel@tonic-gate {
3247c478bd9Sstevel@tonic-gate 	int		error;
3257c478bd9Sstevel@tonic-gate 
3267c478bd9Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
3277c478bd9Sstevel@tonic-gate 	if (error != 0) {
3287c478bd9Sstevel@tonic-gate 		/*
3297c478bd9Sstevel@tonic-gate 		 * Could not load module
3307c478bd9Sstevel@tonic-gate 		 */
3317c478bd9Sstevel@tonic-gate 		return (error);
3327c478bd9Sstevel@tonic-gate 	}
3337c478bd9Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	return (0);
3367c478bd9Sstevel@tonic-gate }
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate int
3397c478bd9Sstevel@tonic-gate _fini()
3407c478bd9Sstevel@tonic-gate {
3417c478bd9Sstevel@tonic-gate 	int status;
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 	if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) {
3447c478bd9Sstevel@tonic-gate 		return (EBUSY);
3457c478bd9Sstevel@tonic-gate 	}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 	rib_deregister_ats();
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	/*
3507c478bd9Sstevel@tonic-gate 	 * Remove module
3517c478bd9Sstevel@tonic-gate 	 */
3527c478bd9Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
3537c478bd9Sstevel@tonic-gate 		(void) rdma_register_mod(&rib_mod);
3547c478bd9Sstevel@tonic-gate 		return (status);
3557c478bd9Sstevel@tonic-gate 	}
3567c478bd9Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
3577c478bd9Sstevel@tonic-gate 	return (0);
3587c478bd9Sstevel@tonic-gate }
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate int
3617c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
3627c478bd9Sstevel@tonic-gate {
3637c478bd9Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
3647c478bd9Sstevel@tonic-gate }
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate /*
3687c478bd9Sstevel@tonic-gate  * rpcib_getinfo()
3697c478bd9Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
3707c478bd9Sstevel@tonic-gate  * instance number.
3717c478bd9Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
3727c478bd9Sstevel@tonic-gate  */
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3757c478bd9Sstevel@tonic-gate static int
3767c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
3777c478bd9Sstevel@tonic-gate {
3787c478bd9Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	switch (cmd) {
3817c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
3827c478bd9Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
3837c478bd9Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
3847c478bd9Sstevel@tonic-gate 		else {
3857c478bd9Sstevel@tonic-gate 			*result = NULL;
3867c478bd9Sstevel@tonic-gate 			ret = DDI_FAILURE;
3877c478bd9Sstevel@tonic-gate 		}
3887c478bd9Sstevel@tonic-gate 		break;
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
3917c478bd9Sstevel@tonic-gate 		*result = NULL;
3927c478bd9Sstevel@tonic-gate 		break;
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate 	default:
3957c478bd9Sstevel@tonic-gate 		ret = DDI_FAILURE;
3967c478bd9Sstevel@tonic-gate 	}
3977c478bd9Sstevel@tonic-gate 	return (ret);
3987c478bd9Sstevel@tonic-gate }
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate static int
4017c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4027c478bd9Sstevel@tonic-gate {
4037c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
4047c478bd9Sstevel@tonic-gate 	rdma_stat	r_status;
4057c478bd9Sstevel@tonic-gate 
4067c478bd9Sstevel@tonic-gate 	switch (cmd) {
4077c478bd9Sstevel@tonic-gate 	case DDI_ATTACH:
4087c478bd9Sstevel@tonic-gate 		break;
4097c478bd9Sstevel@tonic-gate 	case DDI_RESUME:
4107c478bd9Sstevel@tonic-gate 		return (DDI_SUCCESS);
4117c478bd9Sstevel@tonic-gate 	default:
4127c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4137c478bd9Sstevel@tonic-gate 	}
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
4187c478bd9Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
4197c478bd9Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
4207c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4217c478bd9Sstevel@tonic-gate 	}
4227c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
4237c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
4247c478bd9Sstevel@tonic-gate 	/*
4257c478bd9Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
4267c478bd9Sstevel@tonic-gate 	 */
4277c478bd9Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
4287c478bd9Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
4297c478bd9Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
4307c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4317c478bd9Sstevel@tonic-gate 	}
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate 	if (rib_stat == NULL) {
4347c478bd9Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
4357c478bd9Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
4367c478bd9Sstevel@tonic-gate 	}
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids);
4397c478bd9Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
4407c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
4417c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
4427c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
4437c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4447c478bd9Sstevel@tonic-gate 	}
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
4477c478bd9Sstevel@tonic-gate 			(void *)rib_stat, &rib_stat->ibt_clnt_hdl);
4487c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
4497c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
4507c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
4517c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
4527c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
4537c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4547c478bd9Sstevel@tonic-gate 	}
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
4577c478bd9Sstevel@tonic-gate 	if (open_hcas(rib_stat) != RDMA_SUCCESS) {
4587c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
4597c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
4607c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
4617c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
4627c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
4637c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
4647c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4657c478bd9Sstevel@tonic-gate 	}
4667c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 	/*
4697c478bd9Sstevel@tonic-gate 	 * Register with rdmatf
4707c478bd9Sstevel@tonic-gate 	 */
4717c478bd9Sstevel@tonic-gate 	rib_mod.rdma_count = rib_stat->hca_count;
4727c478bd9Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
4737c478bd9Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
4747c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
4757c478bd9Sstevel@tonic-gate 		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
4767c478bd9Sstevel@tonic-gate 		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
4777c478bd9Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
4787c478bd9Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
4797c478bd9Sstevel@tonic-gate 		rib_stat = NULL;
4807c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4817c478bd9Sstevel@tonic-gate 	}
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
4857c478bd9Sstevel@tonic-gate }
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4887c478bd9Sstevel@tonic-gate static int
4897c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4907c478bd9Sstevel@tonic-gate {
4917c478bd9Sstevel@tonic-gate 	switch (cmd) {
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	case DDI_DETACH:
4947c478bd9Sstevel@tonic-gate 		break;
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 	case DDI_SUSPEND:
4977c478bd9Sstevel@tonic-gate 	default:
4987c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
4997c478bd9Sstevel@tonic-gate 	}
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	/*
5027c478bd9Sstevel@tonic-gate 	 * Detach the hca and free resources
5037c478bd9Sstevel@tonic-gate 	 */
5047c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
5057c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
5067c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
5077c478bd9Sstevel@tonic-gate 	rib_detach_hca(rib_stat->hca);
5087c478bd9Sstevel@tonic-gate 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
5097c478bd9Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
5127c478bd9Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
5137c478bd9Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
5167c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
5177c478bd9Sstevel@tonic-gate }
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate static void
5217c478bd9Sstevel@tonic-gate rib_deregister_ats()
5227c478bd9Sstevel@tonic-gate {
5237c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
5247c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
5257c478bd9Sstevel@tonic-gate 	ibt_status_t   		ibt_status;
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	/*
5287c478bd9Sstevel@tonic-gate 	 * deregister the Address Translation Service.
5297c478bd9Sstevel@tonic-gate 	 */
5307c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
5317c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
5327c478bd9Sstevel@tonic-gate 	srv_list = hca->ats_list;
5337c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
5347c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
5357c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 		ibt_status = ibt_deregister_ar(hca->ibt_clnt_hdl,
5387c478bd9Sstevel@tonic-gate 				&to_remove->srv_ar);
5397c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
5407c478bd9Sstevel@tonic-gate #ifdef DEBUG
5417c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
5427c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "_fini: "
5437c478bd9Sstevel@tonic-gate 			    "ibt_deregister_ar FAILED"
5447c478bd9Sstevel@tonic-gate 				" status: %d", ibt_status);
5457c478bd9Sstevel@tonic-gate 		    }
5467c478bd9Sstevel@tonic-gate #endif
5477c478bd9Sstevel@tonic-gate 		} else {
5487c478bd9Sstevel@tonic-gate 		    mutex_enter(&rib_stat->open_hca_lock);
5497c478bd9Sstevel@tonic-gate 		    ats_running = 0;
5507c478bd9Sstevel@tonic-gate 		    mutex_exit(&rib_stat->open_hca_lock);
5517c478bd9Sstevel@tonic-gate #ifdef DEBUG
5527c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
5537c478bd9Sstevel@tonic-gate 
5547c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "_fini: "
5557c478bd9Sstevel@tonic-gate 			    "Successfully unregistered"
5567c478bd9Sstevel@tonic-gate 			    " ATS service: %s",
5577c478bd9Sstevel@tonic-gate 			    to_remove->srv_name);
5587c478bd9Sstevel@tonic-gate 		    }
5597c478bd9Sstevel@tonic-gate #endif
5607c478bd9Sstevel@tonic-gate 		}
5617c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
5627c478bd9Sstevel@tonic-gate 	}
5637c478bd9Sstevel@tonic-gate 	hca->ats_list = NULL;
5647c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
5657c478bd9Sstevel@tonic-gate }
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
5687c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
5697c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
5707c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
5717c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
5727c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
5737c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
5747c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate /*
5777c478bd9Sstevel@tonic-gate  * One CQ pair per HCA
5787c478bd9Sstevel@tonic-gate  */
5797c478bd9Sstevel@tonic-gate static rdma_stat
5807c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
5817c478bd9Sstevel@tonic-gate 	rib_cq_t **cqp, rpcib_state_t *ribstat)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 	rib_cq_t	*cq;
5847c478bd9Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
5857c478bd9Sstevel@tonic-gate 	uint32_t	real_size;
5867c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
5877c478bd9Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
5907c478bd9Sstevel@tonic-gate 	cq->rib_hca = hca;
5917c478bd9Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
5927c478bd9Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
5937c478bd9Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
5947c478bd9Sstevel@tonic-gate 	    &real_size);
5957c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
5967c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
5977c478bd9Sstevel@tonic-gate 				" status=%d", status);
5987c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
5997c478bd9Sstevel@tonic-gate 		goto fail;
6007c478bd9Sstevel@tonic-gate 	}
6017c478bd9Sstevel@tonic-gate 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat);
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	/*
6047c478bd9Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
6057c478bd9Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
6067c478bd9Sstevel@tonic-gate 	 * after each callback to get another one).
6077c478bd9Sstevel@tonic-gate 	 */
6087c478bd9Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
6097c478bd9Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
6107c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
6117c478bd9Sstevel@tonic-gate 			"enable_cq_notify failed, status %d", status);
6127c478bd9Sstevel@tonic-gate 		error = RDMA_FAILED;
6137c478bd9Sstevel@tonic-gate 		goto fail;
6147c478bd9Sstevel@tonic-gate 	}
6157c478bd9Sstevel@tonic-gate 	*cqp = cq;
6167c478bd9Sstevel@tonic-gate 
6177c478bd9Sstevel@tonic-gate 	return (error);
6187c478bd9Sstevel@tonic-gate fail:
6197c478bd9Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
6207c478bd9Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
6217c478bd9Sstevel@tonic-gate 	if (cq)
6227c478bd9Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
6237c478bd9Sstevel@tonic-gate 	return (error);
6247c478bd9Sstevel@tonic-gate }
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate static rdma_stat
6277c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat)
6287c478bd9Sstevel@tonic-gate {
6297c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
6307c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
6317c478bd9Sstevel@tonic-gate 	rdma_stat		status;
6327c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
6337c478bd9Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
6347c478bd9Sstevel@tonic-gate 	uint_t			size, cq_size;
6357c478bd9Sstevel@tonic-gate 	int			i;
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
6387c478bd9Sstevel@tonic-gate 	if (ribstat->hcas == NULL)
6397c478bd9Sstevel@tonic-gate 		ribstat->hcas = kmem_zalloc(ribstat->hca_count *
6407c478bd9Sstevel@tonic-gate 				    sizeof (rib_hca_t), KM_SLEEP);
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate 	/*
6437c478bd9Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
6447c478bd9Sstevel@tonic-gate 	 */
6457c478bd9Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
6467c478bd9Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
6477c478bd9Sstevel@tonic-gate 				ribstat->hca_guids[i],
6487c478bd9Sstevel@tonic-gate 				&ribstat->hcas[i].hca_hdl);
6497c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
6507c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_open_hca (%d) "
6517c478bd9Sstevel@tonic-gate 				"returned %d", i, ibt_status);
6527c478bd9Sstevel@tonic-gate 			continue;
6537c478bd9Sstevel@tonic-gate 		}
6547c478bd9Sstevel@tonic-gate 		ribstat->hcas[i].hca_guid = ribstat->hca_guids[i];
6557c478bd9Sstevel@tonic-gate 		hca = &(ribstat->hcas[i]);
6567c478bd9Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
6577c478bd9Sstevel@tonic-gate 		hca->state = HCA_INITED;
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 		/*
6607c478bd9Sstevel@tonic-gate 		 * query HCA info
6617c478bd9Sstevel@tonic-gate 		 */
6627c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
6637c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
6647c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_query_hca "
6657c478bd9Sstevel@tonic-gate 			    "returned %d (hca_guid 0x%llx)",
6667c478bd9Sstevel@tonic-gate 			    ibt_status, (longlong_t)ribstat->hca_guids[i]);
6677c478bd9Sstevel@tonic-gate 			goto fail1;
6687c478bd9Sstevel@tonic-gate 		}
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 		/*
6717c478bd9Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
6727c478bd9Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
6737c478bd9Sstevel@tonic-gate 		 * only when it's in the same PD as that of
6747c478bd9Sstevel@tonic-gate 		 * the memory region.
6757c478bd9Sstevel@tonic-gate 		 */
6767c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
6777c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
6787c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: ibt_alloc_pd "
6797c478bd9Sstevel@tonic-gate 				"returned %d (hca_guid 0x%llx)",
6807c478bd9Sstevel@tonic-gate 				ibt_status, (longlong_t)ribstat->hca_guids[i]);
6817c478bd9Sstevel@tonic-gate 			goto fail1;
6827c478bd9Sstevel@tonic-gate 		}
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 		/*
6857c478bd9Sstevel@tonic-gate 		 * query HCA ports
6867c478bd9Sstevel@tonic-gate 		 */
6877c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
6887c478bd9Sstevel@tonic-gate 				0, &pinfop, &hca->hca_nports, &size);
6897c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
6907c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: "
6917c478bd9Sstevel@tonic-gate 				"ibt_query_hca_ports returned %d "
6927c478bd9Sstevel@tonic-gate 				"(hca_guid 0x%llx)",
6937c478bd9Sstevel@tonic-gate 				ibt_status, (longlong_t)hca->hca_guid);
6947c478bd9Sstevel@tonic-gate 			goto fail2;
6957c478bd9Sstevel@tonic-gate 		}
6967c478bd9Sstevel@tonic-gate 		hca->hca_ports = pinfop;
6977c478bd9Sstevel@tonic-gate 		hca->hca_pinfosz = size;
6987c478bd9Sstevel@tonic-gate 		pinfop = NULL;
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
7017c478bd9Sstevel@tonic-gate 		/*
7027c478bd9Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
7037c478bd9Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
7047c478bd9Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
7057c478bd9Sstevel@tonic-gate 		 * cq's will be needed.
7067c478bd9Sstevel@tonic-gate 		 */
7077c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
7087c478bd9Sstevel@tonic-gate 				&hca->svc_rcq, ribstat);
7097c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7107c478bd9Sstevel@tonic-gate 			goto fail3;
7117c478bd9Sstevel@tonic-gate 		}
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
7147c478bd9Sstevel@tonic-gate 				&hca->svc_scq, ribstat);
7157c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7167c478bd9Sstevel@tonic-gate 			goto fail3;
7177c478bd9Sstevel@tonic-gate 		}
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
7207c478bd9Sstevel@tonic-gate 				&hca->clnt_rcq, ribstat);
7217c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7227c478bd9Sstevel@tonic-gate 			goto fail3;
7237c478bd9Sstevel@tonic-gate 		}
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
7267c478bd9Sstevel@tonic-gate 				&hca->clnt_scq, ribstat);
7277c478bd9Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
7287c478bd9Sstevel@tonic-gate 			goto fail3;
7297c478bd9Sstevel@tonic-gate 		}
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate 		/*
7327c478bd9Sstevel@tonic-gate 		 * Create buffer pools.
7337c478bd9Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
7347c478bd9Sstevel@tonic-gate 		 */
7357c478bd9Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
7367c478bd9Sstevel@tonic-gate 					RECV_BUFFER, MAX_BUFS);
7377c478bd9Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
7387c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: recv buf pool failed\n");
7397c478bd9Sstevel@tonic-gate 			goto fail3;
7407c478bd9Sstevel@tonic-gate 		}
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
7437c478bd9Sstevel@tonic-gate 					SEND_BUFFER, MAX_BUFS);
7447c478bd9Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
7457c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "open_hcas: send buf pool failed\n");
7467c478bd9Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
7477c478bd9Sstevel@tonic-gate 			goto fail3;
7487c478bd9Sstevel@tonic-gate 		}
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 		/*
7517c478bd9Sstevel@tonic-gate 		 * Initialize the registered service list and
7527c478bd9Sstevel@tonic-gate 		 * the lock
7537c478bd9Sstevel@tonic-gate 		 */
7547c478bd9Sstevel@tonic-gate 		hca->service_list = NULL;
7557c478bd9Sstevel@tonic-gate 		rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock);
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
7587c478bd9Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
7597c478bd9Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
7607c478bd9Sstevel@tonic-gate 			hca->iblock);
7617c478bd9Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
7627c478bd9Sstevel@tonic-gate 			hca->iblock);
7637c478bd9Sstevel@tonic-gate 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
7647c478bd9Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
7657c478bd9Sstevel@tonic-gate 		hca->inuse = TRUE;
7667c478bd9Sstevel@tonic-gate 		/*
7677c478bd9Sstevel@tonic-gate 		 * XXX One hca only. Add multi-hca functionality if needed
7687c478bd9Sstevel@tonic-gate 		 * later.
7697c478bd9Sstevel@tonic-gate 		 */
7707c478bd9Sstevel@tonic-gate 		ribstat->hca = hca;
7717c478bd9Sstevel@tonic-gate 		ribstat->nhca_inited++;
7727c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
7737c478bd9Sstevel@tonic-gate 		break;
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate fail3:
7767c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
7777c478bd9Sstevel@tonic-gate fail2:
7787c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
7797c478bd9Sstevel@tonic-gate fail1:
7807c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	}
7837c478bd9Sstevel@tonic-gate 	if (ribstat->hca != NULL)
7847c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
7857c478bd9Sstevel@tonic-gate 	else
7867c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
7877c478bd9Sstevel@tonic-gate }
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate /*
7907c478bd9Sstevel@tonic-gate  * Callback routines
7917c478bd9Sstevel@tonic-gate  */
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate /*
7947c478bd9Sstevel@tonic-gate  * SCQ handlers
7957c478bd9Sstevel@tonic-gate  */
7967c478bd9Sstevel@tonic-gate /* ARGSUSED */
7977c478bd9Sstevel@tonic-gate static void
7987c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
7997c478bd9Sstevel@tonic-gate {
8007c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
8017c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
8027c478bd9Sstevel@tonic-gate 	int		i;
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate 	/*
8057c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
8067c478bd9Sstevel@tonic-gate 	 * completion queue notification.
8077c478bd9Sstevel@tonic-gate 	 */
8087c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
8117c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
8127c478bd9Sstevel@tonic-gate 	    bzero(&wc, sizeof (wc));
8137c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
8147c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS)
8157c478bd9Sstevel@tonic-gate 		return;
8167c478bd9Sstevel@tonic-gate 
8177c478bd9Sstevel@tonic-gate 	/*
8187c478bd9Sstevel@tonic-gate 	 * Got a send completion
8197c478bd9Sstevel@tonic-gate 	 */
8207c478bd9Sstevel@tonic-gate 	    if (wc.wc_id != NULL) {	/* XXX can it be otherwise ???? */
82111606941Sjwahlig 		struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
8227c478bd9Sstevel@tonic-gate 		CONN	*conn = qptoc(wd->qp);
8237c478bd9Sstevel@tonic-gate 
8247c478bd9Sstevel@tonic-gate 		mutex_enter(&wd->sendwait_lock);
8257c478bd9Sstevel@tonic-gate 		switch (wc.wc_status) {
8267c478bd9Sstevel@tonic-gate 		case IBT_WC_SUCCESS:
8277c478bd9Sstevel@tonic-gate 			wd->status = RDMA_SUCCESS;
8287c478bd9Sstevel@tonic-gate 			break;
8297c478bd9Sstevel@tonic-gate 		case IBT_WC_WR_FLUSHED_ERR:
8307c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
8317c478bd9Sstevel@tonic-gate 			break;
8327c478bd9Sstevel@tonic-gate 		default:
8337c478bd9Sstevel@tonic-gate /*
8347c478bd9Sstevel@tonic-gate  *    RC Send Q Error Code		Local state     Remote State
8357c478bd9Sstevel@tonic-gate  *    ==================== 		===========     ============
8367c478bd9Sstevel@tonic-gate  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
8377c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
8387c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
8397c478bd9Sstevel@tonic-gate  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
8407c478bd9Sstevel@tonic-gate  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
8417c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
8427c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
8437c478bd9Sstevel@tonic-gate  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
8447c478bd9Sstevel@tonic-gate  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
8457c478bd9Sstevel@tonic-gate  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
8467c478bd9Sstevel@tonic-gate  *    IBT_WC_WR_FLUSHED_ERR               None            None
8477c478bd9Sstevel@tonic-gate  */
8487c478bd9Sstevel@tonic-gate #ifdef DEBUG
8497c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
8507c478bd9Sstevel@tonic-gate 	    if (wc.wc_status != IBT_WC_SUCCESS) {
8517c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_clnt_scq_handler: "
8527c478bd9Sstevel@tonic-gate 			"WR completed in error, wc.wc_status:%d, "
8537c478bd9Sstevel@tonic-gate 			"wc_id:%llx\n", wc.wc_status, (longlong_t)wc.wc_id);
8547c478bd9Sstevel@tonic-gate 	    }
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate #endif
8577c478bd9Sstevel@tonic-gate 			/*
8587c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection to
8597c478bd9Sstevel@tonic-gate 			 * ERROR and cleanup will happen either from
8607c478bd9Sstevel@tonic-gate 			 * conn_release  or from rib_conn_get
8617c478bd9Sstevel@tonic-gate 			 */
8627c478bd9Sstevel@tonic-gate 			wd->status = RDMA_FAILED;
8637c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
8647c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
8657c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
8667c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
8677c478bd9Sstevel@tonic-gate 			break;
8687c478bd9Sstevel@tonic-gate 		}
8697c478bd9Sstevel@tonic-gate 		if (wd->cv_sig == 1) {
8707c478bd9Sstevel@tonic-gate 			/*
8717c478bd9Sstevel@tonic-gate 			 * Notify poster
8727c478bd9Sstevel@tonic-gate 			 */
8737c478bd9Sstevel@tonic-gate 			cv_signal(&wd->wait_cv);
8747c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
8757c478bd9Sstevel@tonic-gate 		} else {
8767c478bd9Sstevel@tonic-gate 			/*
8777c478bd9Sstevel@tonic-gate 			 * Poster not waiting for notification.
8787c478bd9Sstevel@tonic-gate 			 * Free the send buffers and send_wid
8797c478bd9Sstevel@tonic-gate 			 */
8807c478bd9Sstevel@tonic-gate 			for (i = 0; i < wd->nsbufs; i++) {
8817c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
88211606941Sjwahlig 					(void *)(uintptr_t)wd->sbufaddr[i]);
8837c478bd9Sstevel@tonic-gate 			}
8847c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
8857c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wd);
8867c478bd9Sstevel@tonic-gate 		}
8877c478bd9Sstevel@tonic-gate 	    }
8887c478bd9Sstevel@tonic-gate 	}
8897c478bd9Sstevel@tonic-gate }
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate /* ARGSUSED */
8927c478bd9Sstevel@tonic-gate static void
8937c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
8947c478bd9Sstevel@tonic-gate {
8957c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
8967c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
8977c478bd9Sstevel@tonic-gate 	int		i;
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate 	/*
9007c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
9017c478bd9Sstevel@tonic-gate 	 * completion queue notification.
9027c478bd9Sstevel@tonic-gate 	 */
9037c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
9067c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
9077c478bd9Sstevel@tonic-gate 	    bzero(&wc, sizeof (wc));
9087c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
9097c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS)
9107c478bd9Sstevel@tonic-gate 		return;
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	/*
9137c478bd9Sstevel@tonic-gate 	 * Got a send completion
9147c478bd9Sstevel@tonic-gate 	 */
9157c478bd9Sstevel@tonic-gate #ifdef DEBUG
9167c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1 && wc.wc_status != IBT_WC_SUCCESS) {
9177c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_svc_scq_handler: WR completed in error "
9187c478bd9Sstevel@tonic-gate 			"wc.wc_status:%d, wc_id:%llX",
9197c478bd9Sstevel@tonic-gate 			wc.wc_status, (longlong_t)wc.wc_id);
9207c478bd9Sstevel@tonic-gate 	    }
9217c478bd9Sstevel@tonic-gate #endif
9227c478bd9Sstevel@tonic-gate 	    if (wc.wc_id != NULL) { /* XXX NULL possible ???? */
92311606941Sjwahlig 		struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id;
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 		mutex_enter(&wd->sendwait_lock);
9267c478bd9Sstevel@tonic-gate 		if (wd->cv_sig == 1) {
9277c478bd9Sstevel@tonic-gate 			/*
9287c478bd9Sstevel@tonic-gate 			 * Update completion status and notify poster
9297c478bd9Sstevel@tonic-gate 			 */
9307c478bd9Sstevel@tonic-gate 			if (wc.wc_status == IBT_WC_SUCCESS)
9317c478bd9Sstevel@tonic-gate 				wd->status = RDMA_SUCCESS;
9327c478bd9Sstevel@tonic-gate 			else
9337c478bd9Sstevel@tonic-gate 				wd->status = RDMA_FAILED;
9347c478bd9Sstevel@tonic-gate 			cv_signal(&wd->wait_cv);
9357c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
9367c478bd9Sstevel@tonic-gate 		} else {
9377c478bd9Sstevel@tonic-gate 			/*
9387c478bd9Sstevel@tonic-gate 			 * Poster not waiting for notification.
9397c478bd9Sstevel@tonic-gate 			 * Free the send buffers and send_wid
9407c478bd9Sstevel@tonic-gate 			 */
9417c478bd9Sstevel@tonic-gate 			for (i = 0; i < wd->nsbufs; i++) {
9427c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER,
94311606941Sjwahlig 					(void *)(uintptr_t)wd->sbufaddr[i]);
9447c478bd9Sstevel@tonic-gate 			}
9457c478bd9Sstevel@tonic-gate 			mutex_exit(&wd->sendwait_lock);
9467c478bd9Sstevel@tonic-gate 			(void) rib_free_sendwait(wd);
9477c478bd9Sstevel@tonic-gate 		}
9487c478bd9Sstevel@tonic-gate 	    }
9497c478bd9Sstevel@tonic-gate 	}
9507c478bd9Sstevel@tonic-gate }
9517c478bd9Sstevel@tonic-gate 
9527c478bd9Sstevel@tonic-gate /*
9537c478bd9Sstevel@tonic-gate  * RCQ handler
9547c478bd9Sstevel@tonic-gate  */
9557c478bd9Sstevel@tonic-gate /* ARGSUSED */
9567c478bd9Sstevel@tonic-gate static void
9577c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
9587c478bd9Sstevel@tonic-gate {
9597c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
9607c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
9617c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
9627c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 	/*
9657c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
9667c478bd9Sstevel@tonic-gate 	 * completion queue notification.
9677c478bd9Sstevel@tonic-gate 	 */
9687c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
9717c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
9727c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
9737c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
9747c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
9757c478bd9Sstevel@tonic-gate 		    return;
9767c478bd9Sstevel@tonic-gate 
97711606941Sjwahlig 		rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
9787c478bd9Sstevel@tonic-gate 		qp = rwid->qp;
9797c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
9807c478bd9Sstevel@tonic-gate 		    XDR			inxdrs, *xdrs;
9817c478bd9Sstevel@tonic-gate 		    uint_t		xid, vers, op, find_xid = 0;
9827c478bd9Sstevel@tonic-gate 		    struct reply	*r;
9837c478bd9Sstevel@tonic-gate 		    CONN *conn = qptoc(qp);
9847c478bd9Sstevel@tonic-gate 
9857c478bd9Sstevel@tonic-gate 		    xdrs = &inxdrs;
98611606941Sjwahlig 		    xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
9877c478bd9Sstevel@tonic-gate 			wc.wc_bytes_xfer, XDR_DECODE);
9887c478bd9Sstevel@tonic-gate 		/*
9897c478bd9Sstevel@tonic-gate 		 * Treat xid as opaque (xid is the first entity
9907c478bd9Sstevel@tonic-gate 		 * in the rpc rdma message).
9917c478bd9Sstevel@tonic-gate 		 */
99211606941Sjwahlig 		    xid = *(uint32_t *)(uintptr_t)rwid->addr;
9937c478bd9Sstevel@tonic-gate 		/* Skip xid and set the xdr position accordingly. */
9947c478bd9Sstevel@tonic-gate 		    XDR_SETPOS(xdrs, sizeof (uint32_t));
9957c478bd9Sstevel@tonic-gate 		    (void) xdr_u_int(xdrs, &vers);
9967c478bd9Sstevel@tonic-gate 		    (void) xdr_u_int(xdrs, &op);
9977c478bd9Sstevel@tonic-gate 		    XDR_DESTROY(xdrs);
9987c478bd9Sstevel@tonic-gate 		    if (vers != RPCRDMA_VERS) {
9997c478bd9Sstevel@tonic-gate 			/*
10007c478bd9Sstevel@tonic-gate 			 * Invalid RPC/RDMA version. Cannot interoperate.
10017c478bd9Sstevel@tonic-gate 			 * Set connection to ERROR state and bail out.
10027c478bd9Sstevel@tonic-gate 			 */
10037c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
10047c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
10057c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
10067c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
100711606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
100811606941Sjwahlig 				(void *)(uintptr_t)rwid->addr);
10097c478bd9Sstevel@tonic-gate 			rib_free_wid(rwid);
10107c478bd9Sstevel@tonic-gate 			continue;
10117c478bd9Sstevel@tonic-gate 		    }
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 		    mutex_enter(&qp->replylist_lock);
10147c478bd9Sstevel@tonic-gate 		    for (r = qp->replylist; r != NULL; r = r->next) {
10157c478bd9Sstevel@tonic-gate 			if (r->xid == xid) {
10167c478bd9Sstevel@tonic-gate 			    find_xid = 1;
10177c478bd9Sstevel@tonic-gate 			    switch (op) {
10187c478bd9Sstevel@tonic-gate 			    case RDMA_MSG:
10197c478bd9Sstevel@tonic-gate 			    case RDMA_NOMSG:
10207c478bd9Sstevel@tonic-gate 			    case RDMA_MSGP:
10217c478bd9Sstevel@tonic-gate 				r->status = RDMA_SUCCESS;
10227c478bd9Sstevel@tonic-gate 				r->vaddr_cq = rwid->addr;
10237c478bd9Sstevel@tonic-gate 				r->bytes_xfer = wc.wc_bytes_xfer;
10247c478bd9Sstevel@tonic-gate 				cv_signal(&r->wait_cv);
10257c478bd9Sstevel@tonic-gate 				break;
10267c478bd9Sstevel@tonic-gate 			    default:
10277c478bd9Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
102811606941Sjwahlig 						(void *)(uintptr_t)rwid->addr);
10297c478bd9Sstevel@tonic-gate 				break;
10307c478bd9Sstevel@tonic-gate 			    }
10317c478bd9Sstevel@tonic-gate 			    break;
10327c478bd9Sstevel@tonic-gate 			}
10337c478bd9Sstevel@tonic-gate 		    }
10347c478bd9Sstevel@tonic-gate 		    mutex_exit(&qp->replylist_lock);
10357c478bd9Sstevel@tonic-gate 		    if (find_xid == 0) {
10367c478bd9Sstevel@tonic-gate 			/* RPC caller not waiting for reply */
10377c478bd9Sstevel@tonic-gate #ifdef DEBUG
10387c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
10397c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_clnt_rcq_handler: "
10407c478bd9Sstevel@tonic-gate 			    "NO matching xid %u!\n", xid);
10417c478bd9Sstevel@tonic-gate 			    }
10427c478bd9Sstevel@tonic-gate #endif
10437c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), RECV_BUFFER,
104411606941Sjwahlig 				(void *)(uintptr_t)rwid->addr);
10457c478bd9Sstevel@tonic-gate 		    }
10467c478bd9Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
10477c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 			/*
10507c478bd9Sstevel@tonic-gate 			 * Connection being flushed. Just free
10517c478bd9Sstevel@tonic-gate 			 * the posted buffer
10527c478bd9Sstevel@tonic-gate 			 */
105311606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
105411606941Sjwahlig 				(void *)(uintptr_t)rwid->addr);
10557c478bd9Sstevel@tonic-gate 		} else {
10567c478bd9Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
10577c478bd9Sstevel@tonic-gate /*
10587c478bd9Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
10597c478bd9Sstevel@tonic-gate  *  ====================		===========     ============
10607c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
10617c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
10627c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
10637c478bd9Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
10647c478bd9Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
10657c478bd9Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
10667c478bd9Sstevel@tonic-gate  */
10677c478bd9Sstevel@tonic-gate 			/*
10687c478bd9Sstevel@tonic-gate 			 * Channel in error state. Set connection
10697c478bd9Sstevel@tonic-gate 			 * in ERROR state.
10707c478bd9Sstevel@tonic-gate 			 */
10717c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
10727c478bd9Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
10737c478bd9Sstevel@tonic-gate 				conn->c_state = C_ERROR;
10747c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
107511606941Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
107611606941Sjwahlig 				(void *)(uintptr_t)rwid->addr);
10777c478bd9Sstevel@tonic-gate 		}
10787c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
10797c478bd9Sstevel@tonic-gate 	}
10807c478bd9Sstevel@tonic-gate }
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate /* Server side */
10837c478bd9Sstevel@tonic-gate /* ARGSUSED */
10847c478bd9Sstevel@tonic-gate static void
10857c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10867c478bd9Sstevel@tonic-gate {
10877c478bd9Sstevel@tonic-gate 	struct recv_data *rd;
10887c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
10897c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10907c478bd9Sstevel@tonic-gate 	ibt_wc_t	wc;
10917c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
10927c478bd9Sstevel@tonic-gate 	CONN		*conn;
10937c478bd9Sstevel@tonic-gate 	mblk_t		*mp;
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 	/*
10967c478bd9Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10977c478bd9Sstevel@tonic-gate 	 * completion queue notification.
10987c478bd9Sstevel@tonic-gate 	 */
10997c478bd9Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11007c478bd9Sstevel@tonic-gate 
11017c478bd9Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
11027c478bd9Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
11037c478bd9Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
11047c478bd9Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11057c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
11067c478bd9Sstevel@tonic-gate 		    return;
11077c478bd9Sstevel@tonic-gate 
110811606941Sjwahlig 		s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
11097c478bd9Sstevel@tonic-gate 		qp = s_recvp->qp;
11107c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
11117c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
11127c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs--;
11137c478bd9Sstevel@tonic-gate 		if (qp->n_posted_rbufs == 0)
11147c478bd9Sstevel@tonic-gate 			cv_signal(&qp->posted_rbufs_cv);
11157c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
11167c478bd9Sstevel@tonic-gate 
11177c478bd9Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
11187c478bd9Sstevel@tonic-gate 		    XDR		inxdrs, *xdrs;
11197c478bd9Sstevel@tonic-gate 		    uint_t	xid, vers, op;
11207c478bd9Sstevel@tonic-gate 
11217c478bd9Sstevel@tonic-gate 		    xdrs = &inxdrs;
11227c478bd9Sstevel@tonic-gate 		    /* s_recvp->vaddr stores data */
112311606941Sjwahlig 		    xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
11247c478bd9Sstevel@tonic-gate 			wc.wc_bytes_xfer, XDR_DECODE);
11257c478bd9Sstevel@tonic-gate 
11267c478bd9Sstevel@tonic-gate 		/*
11277c478bd9Sstevel@tonic-gate 		 * Treat xid as opaque (xid is the first entity
11287c478bd9Sstevel@tonic-gate 		 * in the rpc rdma message).
11297c478bd9Sstevel@tonic-gate 		 */
113011606941Sjwahlig 		    xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
11317c478bd9Sstevel@tonic-gate 		/* Skip xid and set the xdr position accordingly. */
11327c478bd9Sstevel@tonic-gate 		    XDR_SETPOS(xdrs, sizeof (uint32_t));
11337c478bd9Sstevel@tonic-gate 		    if (!xdr_u_int(xdrs, &vers) ||
11347c478bd9Sstevel@tonic-gate 			!xdr_u_int(xdrs, &op)) {
11357c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
113611606941Sjwahlig 				(void *)(uintptr_t)s_recvp->vaddr);
11377c478bd9Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
11387c478bd9Sstevel@tonic-gate #ifdef DEBUG
11397c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_svc_rcq_handler: "
11407c478bd9Sstevel@tonic-gate 			    "xdr_u_int failed for qp %p, wc_id=%llx",
11417c478bd9Sstevel@tonic-gate 			    (void *)qp, (longlong_t)wc.wc_id);
11427c478bd9Sstevel@tonic-gate #endif
11437c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
11447c478bd9Sstevel@tonic-gate 			continue;
11457c478bd9Sstevel@tonic-gate 		    }
11467c478bd9Sstevel@tonic-gate 		    XDR_DESTROY(xdrs);
11477c478bd9Sstevel@tonic-gate 
11487c478bd9Sstevel@tonic-gate 		    if (vers != RPCRDMA_VERS) {
11497c478bd9Sstevel@tonic-gate 			/*
11507c478bd9Sstevel@tonic-gate 			 * Invalid RPC/RDMA version. Drop rpc rdma message.
11517c478bd9Sstevel@tonic-gate 			 */
11527c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
115311606941Sjwahlig 				(void *)(uintptr_t)s_recvp->vaddr);
11547c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
11557c478bd9Sstevel@tonic-gate 			continue;
11567c478bd9Sstevel@tonic-gate 		    }
11577c478bd9Sstevel@tonic-gate 			/*
11587c478bd9Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
11597c478bd9Sstevel@tonic-gate 			 */
11607c478bd9Sstevel@tonic-gate 		    if (op == RDMA_DONE) {
11617c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
116211606941Sjwahlig 				(void *)(uintptr_t)s_recvp->vaddr);
11637c478bd9Sstevel@tonic-gate 			/*
11647c478bd9Sstevel@tonic-gate 			 * Wake up the thread waiting on
11657c478bd9Sstevel@tonic-gate 			 * a RDMA_DONE for xid
11667c478bd9Sstevel@tonic-gate 			 */
11677c478bd9Sstevel@tonic-gate 			mutex_enter(&qp->rdlist_lock);
11687c478bd9Sstevel@tonic-gate 			rdma_done_notify(qp, xid);
11697c478bd9Sstevel@tonic-gate 			mutex_exit(&qp->rdlist_lock);
11707c478bd9Sstevel@tonic-gate 			(void) rib_free_svc_recv(s_recvp);
11717c478bd9Sstevel@tonic-gate 			continue;
11727c478bd9Sstevel@tonic-gate 		    }
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 		    mutex_enter(&plugin_state_lock);
11757c478bd9Sstevel@tonic-gate 		    if (plugin_state == ACCEPT) {
11767c478bd9Sstevel@tonic-gate 			while ((mp = allocb(sizeof (*rd), BPRI_LO)) == NULL)
11777c478bd9Sstevel@tonic-gate 			    (void) strwaitbuf(sizeof (*rd), BPRI_LO);
11787c478bd9Sstevel@tonic-gate 			/*
11797c478bd9Sstevel@tonic-gate 			 * Plugin is in accept state, hence the master
11807c478bd9Sstevel@tonic-gate 			 * transport queue for this is still accepting
11817c478bd9Sstevel@tonic-gate 			 * requests. Hence we can call svc_queuereq to
11827c478bd9Sstevel@tonic-gate 			 * queue this recieved msg.
11837c478bd9Sstevel@tonic-gate 			 */
11847c478bd9Sstevel@tonic-gate 			rd = (struct recv_data *)mp->b_rptr;
11857c478bd9Sstevel@tonic-gate 			rd->conn = conn;
118611606941Sjwahlig 			rd->rpcmsg.addr = (caddr_t)(uintptr_t)s_recvp->vaddr;
11877c478bd9Sstevel@tonic-gate 			rd->rpcmsg.type = RECV_BUFFER;
11887c478bd9Sstevel@tonic-gate 			rd->rpcmsg.len = wc.wc_bytes_xfer;
11897c478bd9Sstevel@tonic-gate 			rd->status = wc.wc_status;
11907c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
11917c478bd9Sstevel@tonic-gate 			conn->c_ref++;
11927c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
11937c478bd9Sstevel@tonic-gate 			mp->b_wptr += sizeof (*rd);
11947c478bd9Sstevel@tonic-gate 			svc_queuereq((queue_t *)rib_stat->q, mp);
11957c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
11967c478bd9Sstevel@tonic-gate 		    } else {
11977c478bd9Sstevel@tonic-gate 			/*
11987c478bd9Sstevel@tonic-gate 			 * The master transport for this is going
11997c478bd9Sstevel@tonic-gate 			 * away and the queue is not accepting anymore
12007c478bd9Sstevel@tonic-gate 			 * requests for krpc, so don't do anything, just
12017c478bd9Sstevel@tonic-gate 			 * free the msg.
12027c478bd9Sstevel@tonic-gate 			 */
12037c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
12047c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
120511606941Sjwahlig 			(void *)(uintptr_t)s_recvp->vaddr);
12067c478bd9Sstevel@tonic-gate 		    }
12077c478bd9Sstevel@tonic-gate 		} else {
12087c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
120911606941Sjwahlig 				(void *)(uintptr_t)s_recvp->vaddr);
12107c478bd9Sstevel@tonic-gate 		}
12117c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
12127c478bd9Sstevel@tonic-gate 	}
12137c478bd9Sstevel@tonic-gate }
12147c478bd9Sstevel@tonic-gate 
12157c478bd9Sstevel@tonic-gate /*
12167c478bd9Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
12177c478bd9Sstevel@tonic-gate  */
12187c478bd9Sstevel@tonic-gate /* ARGSUSED */
12197c478bd9Sstevel@tonic-gate static void
12207c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
12217c478bd9Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
12227c478bd9Sstevel@tonic-gate {
12237c478bd9Sstevel@tonic-gate 
12247c478bd9Sstevel@tonic-gate 	switch (code) {
12257c478bd9Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
12267c478bd9Sstevel@tonic-gate 		/* ignore */
12277c478bd9Sstevel@tonic-gate 		break;
12287c478bd9Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
12297c478bd9Sstevel@tonic-gate 	{
12307c478bd9Sstevel@tonic-gate 		ASSERT(rib_stat->hca->hca_hdl == hca_hdl);
12317c478bd9Sstevel@tonic-gate 		rib_detach_hca(rib_stat->hca);
12327c478bd9Sstevel@tonic-gate #ifdef DEBUG
12337c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
12347c478bd9Sstevel@tonic-gate #endif
12357c478bd9Sstevel@tonic-gate 		break;
12367c478bd9Sstevel@tonic-gate 	}
12377c478bd9Sstevel@tonic-gate #ifdef DEBUG
12387c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
12397c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PATH_MIGRATED\n");
12407c478bd9Sstevel@tonic-gate 		break;
12417c478bd9Sstevel@tonic-gate 	case IBT_EVENT_SQD:
12427c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
12437c478bd9Sstevel@tonic-gate 		break;
12447c478bd9Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
12457c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
12467c478bd9Sstevel@tonic-gate 		break;
12477c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
12487c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CATASTROPHIC_CHAN\n");
12497c478bd9Sstevel@tonic-gate 		break;
12507c478bd9Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
12517c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): "
12527c478bd9Sstevel@tonic-gate 		"IBT_ERROR_INVALID_REQUEST_CHAN\n");
12537c478bd9Sstevel@tonic-gate 		break;
12547c478bd9Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
12557c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): "
12567c478bd9Sstevel@tonic-gate 		"IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
12577c478bd9Sstevel@tonic-gate 		break;
12587c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
12597c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PATH_MIGRATE_REQ\n");
12607c478bd9Sstevel@tonic-gate 		break;
12617c478bd9Sstevel@tonic-gate 	case IBT_ERROR_CQ:
12627c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
12637c478bd9Sstevel@tonic-gate 		break;
12647c478bd9Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
12657c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
12667c478bd9Sstevel@tonic-gate 		break;
12677c478bd9Sstevel@tonic-gate 	case IBT_EVENT_PORT_UP:
12687c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
12697c478bd9Sstevel@tonic-gate 		break;
12707c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
12717c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
12727c478bd9Sstevel@tonic-gate 		break;
12737c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
12747c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
12757c478bd9Sstevel@tonic-gate 		break;
12767c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
12777c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
12787c478bd9Sstevel@tonic-gate 		break;
12797c478bd9Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
12807c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
12817c478bd9Sstevel@tonic-gate 		break;
12827c478bd9Sstevel@tonic-gate #endif
12837c478bd9Sstevel@tonic-gate 	default:
12847c478bd9Sstevel@tonic-gate 		break;
12857c478bd9Sstevel@tonic-gate 	}
12867c478bd9Sstevel@tonic-gate }
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate /*
12897c478bd9Sstevel@tonic-gate  * Client's reachable function.
12907c478bd9Sstevel@tonic-gate  */
12917c478bd9Sstevel@tonic-gate static rdma_stat
12927c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
12957c478bd9Sstevel@tonic-gate 	rdma_stat	status;
12967c478bd9Sstevel@tonic-gate 
12977c478bd9Sstevel@tonic-gate 	/*
12987c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
12997c478bd9Sstevel@tonic-gate 	 */
13007c478bd9Sstevel@tonic-gate 	*handle = NULL;
13017c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
13027c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
13037c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
13047c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
13057c478bd9Sstevel@tonic-gate 	}
13067c478bd9Sstevel@tonic-gate 	status = rib_ping_srv(addr_type, raddr, &hca);
13077c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
13087c478bd9Sstevel@tonic-gate 
13097c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
13107c478bd9Sstevel@tonic-gate 		*handle = (void *)hca;
13117c478bd9Sstevel@tonic-gate 		/*
13127c478bd9Sstevel@tonic-gate 		 * Register the Address translation service
13137c478bd9Sstevel@tonic-gate 		 */
13147c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
13157c478bd9Sstevel@tonic-gate 		if (ats_running == 0) {
13167c478bd9Sstevel@tonic-gate 			if (rib_register_ats(rib_stat->hca)
13177c478bd9Sstevel@tonic-gate 			    == RDMA_SUCCESS) {
13187c478bd9Sstevel@tonic-gate 				ats_running = 1;
13197c478bd9Sstevel@tonic-gate 				mutex_exit(&rib_stat->open_hca_lock);
13207c478bd9Sstevel@tonic-gate 				return (RDMA_SUCCESS);
13217c478bd9Sstevel@tonic-gate 			} else {
13227c478bd9Sstevel@tonic-gate 				mutex_exit(&rib_stat->open_hca_lock);
13237c478bd9Sstevel@tonic-gate 				return (RDMA_FAILED);
13247c478bd9Sstevel@tonic-gate 			}
13257c478bd9Sstevel@tonic-gate 		} else {
13267c478bd9Sstevel@tonic-gate 			mutex_exit(&rib_stat->open_hca_lock);
13277c478bd9Sstevel@tonic-gate 			return (RDMA_SUCCESS);
13287c478bd9Sstevel@tonic-gate 		}
13297c478bd9Sstevel@tonic-gate 	} else {
13307c478bd9Sstevel@tonic-gate 		*handle = NULL;
13317c478bd9Sstevel@tonic-gate 		if (rib_debug > 2)
13327c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_reachable(): ping_srv failed.\n");
13337c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
13347c478bd9Sstevel@tonic-gate 	}
13357c478bd9Sstevel@tonic-gate }
13367c478bd9Sstevel@tonic-gate 
13377c478bd9Sstevel@tonic-gate /* Client side qp creation */
13387c478bd9Sstevel@tonic-gate static rdma_stat
13397c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
13407c478bd9Sstevel@tonic-gate {
13417c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
13427c478bd9Sstevel@tonic-gate 	CONN		*conn;
13437c478bd9Sstevel@tonic-gate 
13447c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
13457c478bd9Sstevel@tonic-gate 	*qp = NULL;
13467c478bd9Sstevel@tonic-gate 
13477c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
13487c478bd9Sstevel@tonic-gate 	conn = qptoc(kqp);
13497c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
13507c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
13517c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
13527c478bd9Sstevel@tonic-gate 
13537c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
13547c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
13557c478bd9Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
13567c478bd9Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
13577c478bd9Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
13587c478bd9Sstevel@tonic-gate 
13597c478bd9Sstevel@tonic-gate 	/*
13607c478bd9Sstevel@tonic-gate 	 * Initialize
13617c478bd9Sstevel@tonic-gate 	 */
13627c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
13637c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
13647c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
13657c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
13667c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
13677c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
13687c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
13697c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
13707c478bd9Sstevel@tonic-gate 
13717c478bd9Sstevel@tonic-gate 	*qp = kqp;
13727c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
13737c478bd9Sstevel@tonic-gate }
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate /* Server side qp creation */
13767c478bd9Sstevel@tonic-gate static rdma_stat
13777c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
13787c478bd9Sstevel@tonic-gate {
13797c478bd9Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
13807c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
13817c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
13827c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
13837c478bd9Sstevel@tonic-gate 
13847c478bd9Sstevel@tonic-gate 	ASSERT(qp != NULL);
13857c478bd9Sstevel@tonic-gate 	*qp = NULL;
13867c478bd9Sstevel@tonic-gate 
13877c478bd9Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
13887c478bd9Sstevel@tonic-gate 	kqp->hca = hca;
13897c478bd9Sstevel@tonic-gate 	kqp->port_num = port;
13907c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
13917c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 	/*
13947c478bd9Sstevel@tonic-gate 	 * Create the qp handle
13957c478bd9Sstevel@tonic-gate 	 */
13967c478bd9Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
13977c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
13987c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
13997c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
14007c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
14017c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
14027c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
14037c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
14047c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
14057c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
14067c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
14077c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
14087c478bd9Sstevel@tonic-gate 
14097c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
14107c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
14117c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
14127c478bd9Sstevel@tonic-gate 			IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
14137c478bd9Sstevel@tonic-gate 			&chan_sizes);
14147c478bd9Sstevel@tonic-gate 	} else {
14157c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
14167c478bd9Sstevel@tonic-gate 		goto fail;
14177c478bd9Sstevel@tonic-gate 	}
14187c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
14217c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_svc_create_chan: "
14227c478bd9Sstevel@tonic-gate 			"ibt_alloc_rc_channel failed, ibt_status=%d.",
14237c478bd9Sstevel@tonic-gate 			ibt_status);
14247c478bd9Sstevel@tonic-gate 		goto fail;
14257c478bd9Sstevel@tonic-gate 	}
14267c478bd9Sstevel@tonic-gate 
14277c478bd9Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
14287c478bd9Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
14297c478bd9Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
14307c478bd9Sstevel@tonic-gate 
14317c478bd9Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
14327c478bd9Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
14337c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
14347c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
14357c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
14367c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
14377c478bd9Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
14387c478bd9Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
14397c478bd9Sstevel@tonic-gate 	/*
14407c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
14417c478bd9Sstevel@tonic-gate 	 */
14427c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
14437c478bd9Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
14447c478bd9Sstevel@tonic-gate 	*qp = kqp;
14457c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
14467c478bd9Sstevel@tonic-gate fail:
14477c478bd9Sstevel@tonic-gate 	if (kqp)
14487c478bd9Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
14497c478bd9Sstevel@tonic-gate 
14507c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
14517c478bd9Sstevel@tonic-gate }
14527c478bd9Sstevel@tonic-gate 
14537c478bd9Sstevel@tonic-gate void
14547c478bd9Sstevel@tonic-gate rib_dump_pathrec(ibt_path_info_t *path_rec)
14557c478bd9Sstevel@tonic-gate {
14567c478bd9Sstevel@tonic-gate 	ib_pkey_t	pkey;
14577c478bd9Sstevel@tonic-gate 
14587c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
14597c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path Record:\n");
14607c478bd9Sstevel@tonic-gate 
14617c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Source HCA GUID = %llx\n",
14627c478bd9Sstevel@tonic-gate 		(longlong_t)path_rec->pi_hca_guid);
14637c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Dest Service ID = %llx\n",
14647c478bd9Sstevel@tonic-gate 		(longlong_t)path_rec->pi_sid);
14657c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Port Num        = %02d\n",
14667c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_hca_port_num);
14677c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "P_Key Index     = %04d\n",
14687c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_pkey_ix);
14697c478bd9Sstevel@tonic-gate 
14707c478bd9Sstevel@tonic-gate 	    (void) ibt_index2pkey_byguid(path_rec->pi_hca_guid,
14717c478bd9Sstevel@tonic-gate 			path_rec->pi_prim_cep_path.cep_hca_port_num,
14727c478bd9Sstevel@tonic-gate 			path_rec->pi_prim_cep_path.cep_pkey_ix, &pkey);
14737c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "P_Key		= 0x%x\n", pkey);
14747c478bd9Sstevel@tonic-gate 
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "SGID:           = %llx:%llx\n",
14777c478bd9Sstevel@tonic-gate 		(longlong_t)
14787c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_prefix,
14797c478bd9Sstevel@tonic-gate 		(longlong_t)
14807c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_sgid.gid_guid);
14817c478bd9Sstevel@tonic-gate 
14827c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "DGID:           = %llx:%llx\n",
14837c478bd9Sstevel@tonic-gate 		(longlong_t)
14847c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_prefix,
14857c478bd9Sstevel@tonic-gate 		(longlong_t)
14867c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_dgid.gid_guid);
14877c478bd9Sstevel@tonic-gate 
14887c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path Rate       = %02x\n",
14897c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_srate);
14907c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "SL              = %02x\n",
14917c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_cep_path.cep_adds_vect.av_srvl);
14927c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Prim Packet LT  = %02x\n",
14937c478bd9Sstevel@tonic-gate 		path_rec->pi_prim_pkt_lt);
14947c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "Path MTU        = %02x\n",
14957c478bd9Sstevel@tonic-gate 		path_rec->pi_path_mtu);
14967c478bd9Sstevel@tonic-gate 	}
14977c478bd9Sstevel@tonic-gate }
14987c478bd9Sstevel@tonic-gate 
14997c478bd9Sstevel@tonic-gate /* ARGSUSED */
15007c478bd9Sstevel@tonic-gate ibt_cm_status_t
15017c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
15027c478bd9Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
15037c478bd9Sstevel@tonic-gate     ibt_priv_data_len_t len)
15047c478bd9Sstevel@tonic-gate {
15057c478bd9Sstevel@tonic-gate 	rpcib_state_t   *ribstat;
15067c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)clnt_hdl;
15097c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
15107c478bd9Sstevel@tonic-gate 
15117c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
15127c478bd9Sstevel@tonic-gate 
15137c478bd9Sstevel@tonic-gate 	/* got a connection close event */
15147c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
15157c478bd9Sstevel@tonic-gate 	{
15167c478bd9Sstevel@tonic-gate 		CONN	*conn;
15177c478bd9Sstevel@tonic-gate 		rib_qp_t *qp;
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 		/* check reason why connection was closed */
15207c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
15217c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
15227c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
15237c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
15247c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
15257c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
15267c478bd9Sstevel@tonic-gate 			/*
15277c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
15287c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
15297c478bd9Sstevel@tonic-gate 			 */
15307c478bd9Sstevel@tonic-gate 			break;
15317c478bd9Sstevel@tonic-gate 		default:
15327c478bd9Sstevel@tonic-gate 			/*
15337c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
15347c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
15357c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
15367c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
15377c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
15387c478bd9Sstevel@tonic-gate 			 * state
15397c478bd9Sstevel@tonic-gate 			 */
15407c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
15417c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
15427c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
15437c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
15447c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
15457c478bd9Sstevel@tonic-gate 				break;
15467c478bd9Sstevel@tonic-gate 			}
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
15497c478bd9Sstevel@tonic-gate 
15507c478bd9Sstevel@tonic-gate 			/*
15517c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
15527c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
15537c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
15547c478bd9Sstevel@tonic-gate 			 */
15557c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
15567c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
15577c478bd9Sstevel@tonic-gate 
15587c478bd9Sstevel@tonic-gate 			/*
15597c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
15607c478bd9Sstevel@tonic-gate 			 */
15617c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
15627c478bd9Sstevel@tonic-gate 				/*
15637c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
15647c478bd9Sstevel@tonic-gate 				 */
15657c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
15667c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
15677c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
15687c478bd9Sstevel@tonic-gate 					&hca->cl_conn_list);
15697c478bd9Sstevel@tonic-gate 			} else {
15707c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
15717c478bd9Sstevel@tonic-gate 			}
15727c478bd9Sstevel@tonic-gate #ifdef DEBUG
15737c478bd9Sstevel@tonic-gate 			if (rib_debug)
15747c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
15757c478bd9Sstevel@tonic-gate 					"(CONN_CLOSED) channel disconnected");
15767c478bd9Sstevel@tonic-gate #endif
15777c478bd9Sstevel@tonic-gate 			break;
15787c478bd9Sstevel@tonic-gate 		}
15797c478bd9Sstevel@tonic-gate 		break;
15807c478bd9Sstevel@tonic-gate 	}
15817c478bd9Sstevel@tonic-gate 	default:
15827c478bd9Sstevel@tonic-gate 		break;
15837c478bd9Sstevel@tonic-gate 	}
15847c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
15857c478bd9Sstevel@tonic-gate }
15867c478bd9Sstevel@tonic-gate 
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate /* Check if server has done ATS registration */
15897c478bd9Sstevel@tonic-gate rdma_stat
15907c478bd9Sstevel@tonic-gate rib_chk_srv_ats(rib_hca_t *hca, struct netbuf *raddr,
15917c478bd9Sstevel@tonic-gate 	int addr_type, ibt_path_info_t *path)
15927c478bd9Sstevel@tonic-gate {
15937c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
15947c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
15957c478bd9Sstevel@tonic-gate 	ibt_path_attr_t		path_attr;
15967c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
15977c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
15987c478bd9Sstevel@tonic-gate 	ibt_ar_t		ar_query, ar_result;
15997c478bd9Sstevel@tonic-gate 	rib_service_t		*ats;
16007c478bd9Sstevel@tonic-gate 	ib_gid_t		sgid;
16017c478bd9Sstevel@tonic-gate 	ibt_path_info_t		paths[MAX_PORTS];
16027c478bd9Sstevel@tonic-gate 	uint8_t			npaths, i;
16037c478bd9Sstevel@tonic-gate 
16047c478bd9Sstevel@tonic-gate 	(void) bzero(&path_attr, sizeof (ibt_path_attr_t));
16057c478bd9Sstevel@tonic-gate 	(void) bzero(path, sizeof (ibt_path_info_t));
16067c478bd9Sstevel@tonic-gate 
16077c478bd9Sstevel@tonic-gate 	/*
16087c478bd9Sstevel@tonic-gate 	 * Construct svc name
16097c478bd9Sstevel@tonic-gate 	 */
16107c478bd9Sstevel@tonic-gate 	path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
16117c478bd9Sstevel@tonic-gate 	switch (addr_type) {
16127c478bd9Sstevel@tonic-gate 	case AF_INET:
16137c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
16147c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname,
16157c478bd9Sstevel@tonic-gate 		    IB_SVC_NAME_LEN);
16167c478bd9Sstevel@tonic-gate 		break;
16177c478bd9Sstevel@tonic-gate 
16187c478bd9Sstevel@tonic-gate 	case AF_INET6:
16197c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
16207c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
16217c478bd9Sstevel@tonic-gate 		    path_attr.pa_sname, IB_SVC_NAME_LEN);
16227c478bd9Sstevel@tonic-gate 		break;
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate 	default:
16257c478bd9Sstevel@tonic-gate 		kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
16267c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
16277c478bd9Sstevel@tonic-gate 	}
16287c478bd9Sstevel@tonic-gate 	(void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN);
16297c478bd9Sstevel@tonic-gate 
16307c478bd9Sstevel@tonic-gate 	/*
16317c478bd9Sstevel@tonic-gate 	 * Attempt a path to the server on an ATS-registered port.
16327c478bd9Sstevel@tonic-gate 	 * Try all ATS-registered ports until one succeeds.
16337c478bd9Sstevel@tonic-gate 	 * The first one that succeeds will be used to connect
16347c478bd9Sstevel@tonic-gate 	 * to the server.  If none of them succeed, return RDMA_FAILED.
16357c478bd9Sstevel@tonic-gate 	 */
16367c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
16377c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
16387c478bd9Sstevel@tonic-gate 	    rw_enter(&hca->service_list_lock, RW_READER);
16397c478bd9Sstevel@tonic-gate 	    for (ats = hca->ats_list; ats != NULL; ats = ats->srv_next) {
16407c478bd9Sstevel@tonic-gate 		path_attr.pa_hca_guid = hca->hca_guid;
16417c478bd9Sstevel@tonic-gate 		path_attr.pa_hca_port_num = ats->srv_port;
16427c478bd9Sstevel@tonic-gate 		ibt_status = ibt_get_paths(hca->ibt_clnt_hdl,
16437c478bd9Sstevel@tonic-gate 			IBT_PATH_MULTI_SVC_DEST, &path_attr, 2, paths, &npaths);
16447c478bd9Sstevel@tonic-gate 		if (ibt_status == IBT_SUCCESS ||
16457c478bd9Sstevel@tonic-gate 			ibt_status == IBT_INSUFF_DATA) {
16467c478bd9Sstevel@tonic-gate 		    for (i = 0; i < npaths; i++) {
16477c478bd9Sstevel@tonic-gate 			if (paths[i].pi_hca_guid) {
16487c478bd9Sstevel@tonic-gate 			/*
16497c478bd9Sstevel@tonic-gate 			 * do ibt_query_ar()
16507c478bd9Sstevel@tonic-gate 			 */
16517c478bd9Sstevel@tonic-gate 			    sgid =
16527c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_adds_vect.av_sgid;
16537c478bd9Sstevel@tonic-gate 
16547c478bd9Sstevel@tonic-gate 			    (void) ibt_index2pkey_byguid(paths[i].pi_hca_guid,
16557c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_hca_port_num,
16567c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_pkey_ix, &pkey);
16577c478bd9Sstevel@tonic-gate 
16587c478bd9Sstevel@tonic-gate 			    bzero(&ar_query, sizeof (ar_query));
16597c478bd9Sstevel@tonic-gate 			    bzero(&ar_result, sizeof (ar_result));
16607c478bd9Sstevel@tonic-gate 			    ar_query.ar_gid =
16617c478bd9Sstevel@tonic-gate 				paths[i].pi_prim_cep_path.cep_adds_vect.av_dgid;
16627c478bd9Sstevel@tonic-gate 			    ar_query.ar_pkey = pkey;
16637c478bd9Sstevel@tonic-gate 			    ibt_status = ibt_query_ar(&sgid, &ar_query,
16647c478bd9Sstevel@tonic-gate 					&ar_result);
16657c478bd9Sstevel@tonic-gate 			    if (ibt_status == IBT_SUCCESS) {
16667c478bd9Sstevel@tonic-gate #ifdef DEBUG
16677c478bd9Sstevel@tonic-gate 				if (rib_debug > 1)
16687c478bd9Sstevel@tonic-gate 				    rib_dump_pathrec(&paths[i]);
16697c478bd9Sstevel@tonic-gate #endif
16707c478bd9Sstevel@tonic-gate 				bcopy(&paths[i], path,
16717c478bd9Sstevel@tonic-gate 					sizeof (ibt_path_info_t));
16727c478bd9Sstevel@tonic-gate 				rw_exit(&hca->service_list_lock);
16737c478bd9Sstevel@tonic-gate 				kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
16747c478bd9Sstevel@tonic-gate 				rw_exit(&hca->state_lock);
16757c478bd9Sstevel@tonic-gate 				return (RDMA_SUCCESS);
16767c478bd9Sstevel@tonic-gate 			    }
16777c478bd9Sstevel@tonic-gate #ifdef DEBUG
16787c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
16797c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_chk_srv_ats: "
16807c478bd9Sstevel@tonic-gate 				    "ibt_query_ar FAILED, return\n");
16817c478bd9Sstevel@tonic-gate 			    }
16827c478bd9Sstevel@tonic-gate #endif
16837c478bd9Sstevel@tonic-gate 			}
16847c478bd9Sstevel@tonic-gate 		    }
16857c478bd9Sstevel@tonic-gate 		}
16867c478bd9Sstevel@tonic-gate 	    }
16877c478bd9Sstevel@tonic-gate 	    rw_exit(&hca->service_list_lock);
16887c478bd9Sstevel@tonic-gate 	}
16897c478bd9Sstevel@tonic-gate 	kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
16907c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
16917c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
16927c478bd9Sstevel@tonic-gate }
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate /*
16967c478bd9Sstevel@tonic-gate  * Connect to the server.
16977c478bd9Sstevel@tonic-gate  */
16987c478bd9Sstevel@tonic-gate rdma_stat
16997c478bd9Sstevel@tonic-gate rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, ibt_path_info_t *path)
17007c478bd9Sstevel@tonic-gate {
17017c478bd9Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
17027c478bd9Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
17037c478bd9Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
17047c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
17057c478bd9Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
17067c478bd9Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
17077c478bd9Sstevel@tonic-gate 
17087c478bd9Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
17097c478bd9Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
17107c478bd9Sstevel@tonic-gate 
17117c478bd9Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = path->pi_prim_cep_path.cep_hca_port_num;
17127c478bd9Sstevel@tonic-gate 	/* Alloc a RC channel */
17137c478bd9Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
17147c478bd9Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
17157c478bd9Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
17167c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
17177c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
17187c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
17197c478bd9Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
17207c478bd9Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
17217c478bd9Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
17227c478bd9Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
17237c478bd9Sstevel@tonic-gate 
17247c478bd9Sstevel@tonic-gate 	chan_args.oc_path = path;
17257c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
17267c478bd9Sstevel@tonic-gate 	chan_args.oc_cm_clnt_private = (void *)rib_stat;
17277c478bd9Sstevel@tonic-gate 	chan_args.oc_rdma_ra_out = 1;
17287c478bd9Sstevel@tonic-gate 	chan_args.oc_rdma_ra_in = 1;
17297c478bd9Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
17307c478bd9Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
17317c478bd9Sstevel@tonic-gate 
17327c478bd9Sstevel@tonic-gate refresh:
17337c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
17347c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
17357c478bd9Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
17367c478bd9Sstevel@tonic-gate 			IBT_ACHAN_NO_FLAGS, &qp_attr, &qp->qp_hdl,
17377c478bd9Sstevel@tonic-gate 			&chan_sizes);
17387c478bd9Sstevel@tonic-gate 	} else {
17397c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
17407c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17417c478bd9Sstevel@tonic-gate 	}
17427c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
17437c478bd9Sstevel@tonic-gate 
17447c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
17457c478bd9Sstevel@tonic-gate #ifdef DEBUG
17467c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_conn_to_srv: alloc_rc_channel "
17477c478bd9Sstevel@tonic-gate 		"failed, ibt_status=%d.", ibt_status);
17487c478bd9Sstevel@tonic-gate #endif
17497c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17507c478bd9Sstevel@tonic-gate 	}
17517c478bd9Sstevel@tonic-gate 
17527c478bd9Sstevel@tonic-gate 	/* Connect to the Server */
17537c478bd9Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
17547c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
17557c478bd9Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
17567c478bd9Sstevel@tonic-gate 			IBT_BLOCKING, &chan_args, &ret_args);
17577c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
17587c478bd9Sstevel@tonic-gate #ifdef DEBUG
17597c478bd9Sstevel@tonic-gate 		if (rib_debug)
17607c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_to_srv: open_rc_channel"
17617c478bd9Sstevel@tonic-gate 				" failed for qp %p, status=%d, "
17627c478bd9Sstevel@tonic-gate 				"ret_args.rc_status=%d\n",
17637c478bd9Sstevel@tonic-gate 				(void *)qp, ibt_status, ret_args.rc_status);
17647c478bd9Sstevel@tonic-gate #endif
17657c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
17667c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
17677c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
17687c478bd9Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
17697c478bd9Sstevel@tonic-gate 			ret_args.rc_status == IBT_CM_CONN_STALE) {
17707c478bd9Sstevel@tonic-gate 			/*
17717c478bd9Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
17727c478bd9Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
17737c478bd9Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
17747c478bd9Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
17757c478bd9Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
17767c478bd9Sstevel@tonic-gate 			 */
17777c478bd9Sstevel@tonic-gate 			goto refresh;
17787c478bd9Sstevel@tonic-gate 		}
17797c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
17807c478bd9Sstevel@tonic-gate 	}
17817c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
17827c478bd9Sstevel@tonic-gate 	/*
17837c478bd9Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
17847c478bd9Sstevel@tonic-gate 	 */
17857c478bd9Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
17867c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
17877c478bd9Sstevel@tonic-gate }
17887c478bd9Sstevel@tonic-gate 
17897c478bd9Sstevel@tonic-gate rdma_stat
17907c478bd9Sstevel@tonic-gate rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca)
17917c478bd9Sstevel@tonic-gate {
17927c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
17937c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
17947c478bd9Sstevel@tonic-gate 	ibt_path_attr_t		path_attr;
17957c478bd9Sstevel@tonic-gate 	ibt_path_info_t		path;
17967c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
17977c478bd9Sstevel@tonic-gate 
17987c478bd9Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
17997c478bd9Sstevel@tonic-gate 
18007c478bd9Sstevel@tonic-gate 	bzero(&path_attr, sizeof (ibt_path_attr_t));
18017c478bd9Sstevel@tonic-gate 	bzero(&path, sizeof (ibt_path_info_t));
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate 	/*
18047c478bd9Sstevel@tonic-gate 	 * Conctruct svc name
18057c478bd9Sstevel@tonic-gate 	 */
18067c478bd9Sstevel@tonic-gate 	path_attr.pa_sname = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
18077c478bd9Sstevel@tonic-gate 	switch (addr_type) {
18087c478bd9Sstevel@tonic-gate 	case AF_INET:
18097c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)raddr->buf;
18107c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &sin4->sin_addr, path_attr.pa_sname,
18117c478bd9Sstevel@tonic-gate 		    IB_SVC_NAME_LEN);
18127c478bd9Sstevel@tonic-gate 		break;
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate 	case AF_INET6:
18157c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)raddr->buf;
18167c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
18177c478bd9Sstevel@tonic-gate 		    path_attr.pa_sname, IB_SVC_NAME_LEN);
18187c478bd9Sstevel@tonic-gate 		break;
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate 	default:
18217c478bd9Sstevel@tonic-gate #ifdef	DEBUG
18227c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
18237c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_ping_srv: Address not recognized\n");
18247c478bd9Sstevel@tonic-gate 	    }
18257c478bd9Sstevel@tonic-gate #endif
18267c478bd9Sstevel@tonic-gate 		kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
18277c478bd9Sstevel@tonic-gate 		return (RDMA_INVAL);
18287c478bd9Sstevel@tonic-gate 	}
18297c478bd9Sstevel@tonic-gate 	(void) strlcat(path_attr.pa_sname, "::NFS", IB_SVC_NAME_LEN);
18307c478bd9Sstevel@tonic-gate 
18317c478bd9Sstevel@tonic-gate 	ibt_status = ibt_get_paths(rib_stat->ibt_clnt_hdl,
18327c478bd9Sstevel@tonic-gate 		IBT_PATH_NO_FLAGS, &path_attr, 1, &path, NULL);
18337c478bd9Sstevel@tonic-gate 	kmem_free(path_attr.pa_sname, IB_SVC_NAME_LEN);
18347c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
18357c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1) {
18367c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_ping_srv: ibt_get_paths FAILED!"
18377c478bd9Sstevel@tonic-gate 			" status=%d\n", ibt_status);
18387c478bd9Sstevel@tonic-gate 	    }
18397c478bd9Sstevel@tonic-gate 	} else if (path.pi_hca_guid) {
18407c478bd9Sstevel@tonic-gate 		ASSERT(path.pi_hca_guid == rib_stat->hca->hca_guid);
18417c478bd9Sstevel@tonic-gate 		*hca = rib_stat->hca;
18427c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
18437c478bd9Sstevel@tonic-gate 	}
18447c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
18457c478bd9Sstevel@tonic-gate }
18467c478bd9Sstevel@tonic-gate 
18477c478bd9Sstevel@tonic-gate /*
18487c478bd9Sstevel@tonic-gate  * Close channel, remove from connection list and
18497c478bd9Sstevel@tonic-gate  * free up resources allocated for that channel.
18507c478bd9Sstevel@tonic-gate  */
18517c478bd9Sstevel@tonic-gate rdma_stat
18527c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
18537c478bd9Sstevel@tonic-gate {
18547c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
18557c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	/*
18587c478bd9Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
18597c478bd9Sstevel@tonic-gate 	 */
18607c478bd9Sstevel@tonic-gate 	hca = qp->hca;
18617c478bd9Sstevel@tonic-gate 	if (conn_list != NULL)
18627c478bd9Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
18637c478bd9Sstevel@tonic-gate 	if (qp->qp_hdl != NULL) {
18647c478bd9Sstevel@tonic-gate 		/*
18657c478bd9Sstevel@tonic-gate 		 * If the channel has not been establised,
18667c478bd9Sstevel@tonic-gate 		 * ibt_flush_channel is called to flush outstanding WRs
18677c478bd9Sstevel@tonic-gate 		 * on the Qs.  Otherwise, ibt_close_rc_channel() is
18687c478bd9Sstevel@tonic-gate 		 * called.  The channel is then freed.
18697c478bd9Sstevel@tonic-gate 		 */
18707c478bd9Sstevel@tonic-gate 		if (conn_list != NULL)
18717c478bd9Sstevel@tonic-gate 		    (void) ibt_close_rc_channel(qp->qp_hdl,
18727c478bd9Sstevel@tonic-gate 			IBT_BLOCKING, NULL, 0, NULL, NULL, 0);
18737c478bd9Sstevel@tonic-gate 		else
18747c478bd9Sstevel@tonic-gate 		    (void) ibt_flush_channel(qp->qp_hdl);
18757c478bd9Sstevel@tonic-gate 
18767c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
18777c478bd9Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
18787c478bd9Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
18797c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
18807c478bd9Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
18817c478bd9Sstevel@tonic-gate 		qp->qp_hdl = NULL;
18827c478bd9Sstevel@tonic-gate 	}
18837c478bd9Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
18847c478bd9Sstevel@tonic-gate 	if (qp->replylist != NULL) {
18857c478bd9Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
18867c478bd9Sstevel@tonic-gate 	}
18877c478bd9Sstevel@tonic-gate 
18887c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
18897c478bd9Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
18907c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
18917c478bd9Sstevel@tonic-gate 
18927c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
18937c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
18947c478bd9Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
18957c478bd9Sstevel@tonic-gate 
18967c478bd9Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
18977c478bd9Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
19007c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
19017c478bd9Sstevel@tonic-gate 	}
19027c478bd9Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
19037c478bd9Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
19047c478bd9Sstevel@tonic-gate 	}
19057c478bd9Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
19067c478bd9Sstevel@tonic-gate 
19077c478bd9Sstevel@tonic-gate 	/*
19087c478bd9Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
19097c478bd9Sstevel@tonic-gate 	 * then the hca is no longer being used.
19107c478bd9Sstevel@tonic-gate 	 */
19117c478bd9Sstevel@tonic-gate 	if (conn_list != NULL) {
19127c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
19137c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
19147c478bd9Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
19157c478bd9Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
19167c478bd9Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
19177c478bd9Sstevel@tonic-gate 					RW_READER);
19187c478bd9Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
19197c478bd9Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
19207c478bd9Sstevel@tonic-gate 					hca->inuse = FALSE;
19217c478bd9Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
19227c478bd9Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
19237c478bd9Sstevel@tonic-gate 				}
19247c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
19257c478bd9Sstevel@tonic-gate 			}
19267c478bd9Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
19277c478bd9Sstevel@tonic-gate 		}
19287c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
19297c478bd9Sstevel@tonic-gate 	}
19307c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
19317c478bd9Sstevel@tonic-gate }
19327c478bd9Sstevel@tonic-gate 
19337c478bd9Sstevel@tonic-gate /*
19347c478bd9Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
19357c478bd9Sstevel@tonic-gate  * notification be it a successful or error completion, free the
19367c478bd9Sstevel@tonic-gate  * send_wid.
19377c478bd9Sstevel@tonic-gate  */
19387c478bd9Sstevel@tonic-gate static rdma_stat
19397c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
19407c478bd9Sstevel@tonic-gate {
19417c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
19427c478bd9Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
19437c478bd9Sstevel@tonic-gate 	int	i;
19447c478bd9Sstevel@tonic-gate 
19457c478bd9Sstevel@tonic-gate 	/*
19467c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
19477c478bd9Sstevel@tonic-gate 	 */
19487c478bd9Sstevel@tonic-gate 	ASSERT(wd != NULL);
19497c478bd9Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
19507c478bd9Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
19517c478bd9Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
19527c478bd9Sstevel@tonic-gate 		    ddi_get_lbolt();
19537c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
19547c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
19557c478bd9Sstevel@tonic-gate 				    &wd->sendwait_lock, timout)) > 0 &&
19567c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
19577c478bd9Sstevel@tonic-gate 				;
19587c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
19597c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
19607c478bd9Sstevel@tonic-gate #ifdef DEBUG
19617c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
19627c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN, "rib_sendwait: "
19637c478bd9Sstevel@tonic-gate 					    "timed out qp %p\n", (void *)qp);
19647c478bd9Sstevel@tonic-gate #endif
19657c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
19667c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
19677c478bd9Sstevel@tonic-gate 				break;
19687c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
19697c478bd9Sstevel@tonic-gate 				break;
19707c478bd9Sstevel@tonic-gate 			}
19717c478bd9Sstevel@tonic-gate 		} else {
19727c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
19737c478bd9Sstevel@tonic-gate 				    &wd->sendwait_lock, timout)) > 0 &&
19747c478bd9Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
19757c478bd9Sstevel@tonic-gate 				;
19767c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
19777c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
19787c478bd9Sstevel@tonic-gate #ifdef DEBUG
19797c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
19807c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN, "rib_sendwait: "
19817c478bd9Sstevel@tonic-gate 					    "timed out qp %p\n", (void *)qp);
19827c478bd9Sstevel@tonic-gate #endif
19837c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
19847c478bd9Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
19857c478bd9Sstevel@tonic-gate 				break;
19867c478bd9Sstevel@tonic-gate 			case 0:		/* interrupted */
19877c478bd9Sstevel@tonic-gate #ifdef DEBUG
19887c478bd9Sstevel@tonic-gate 				if (rib_debug > 2)
19897c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "rib_sendwait:"
19907c478bd9Sstevel@tonic-gate 					    " interrupted on qp %p\n",
19917c478bd9Sstevel@tonic-gate 					    (void *)qp);
19927c478bd9Sstevel@tonic-gate #endif
19937c478bd9Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
19947c478bd9Sstevel@tonic-gate 				error = RDMA_INTR;
19957c478bd9Sstevel@tonic-gate 				break;
19967c478bd9Sstevel@tonic-gate 			default:	/* got send completion */
19977c478bd9Sstevel@tonic-gate 				break;
19987c478bd9Sstevel@tonic-gate 			}
19997c478bd9Sstevel@tonic-gate 		}
20007c478bd9Sstevel@tonic-gate 	}
20017c478bd9Sstevel@tonic-gate 
20027c478bd9Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
20037c478bd9Sstevel@tonic-gate 		/* got send completion */
20047c478bd9Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
20057c478bd9Sstevel@tonic-gate 		    error = wd->status;
20067c478bd9Sstevel@tonic-gate 		    if (wd->status != RDMA_CONNLOST)
20077c478bd9Sstevel@tonic-gate 			error = RDMA_FAILED;
20087c478bd9Sstevel@tonic-gate 		}
20097c478bd9Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
20107c478bd9Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
201111606941Sjwahlig 				(void *)(uintptr_t)wd->sbufaddr[i]);
20127c478bd9Sstevel@tonic-gate 		}
20137c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
20147c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
20157c478bd9Sstevel@tonic-gate 	} else {
20167c478bd9Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
20177c478bd9Sstevel@tonic-gate 	}
20187c478bd9Sstevel@tonic-gate 
20197c478bd9Sstevel@tonic-gate 	return (error);
20207c478bd9Sstevel@tonic-gate }
20217c478bd9Sstevel@tonic-gate 
20227c478bd9Sstevel@tonic-gate static struct send_wid *
20237c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
20247c478bd9Sstevel@tonic-gate {
20257c478bd9Sstevel@tonic-gate 	struct send_wid	*wd;
20267c478bd9Sstevel@tonic-gate 
20277c478bd9Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
20287c478bd9Sstevel@tonic-gate 	wd->xid = xid;
20297c478bd9Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
20307c478bd9Sstevel@tonic-gate 	wd->qp = qp;
20317c478bd9Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
20327c478bd9Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
20337c478bd9Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
20347c478bd9Sstevel@tonic-gate 
20357c478bd9Sstevel@tonic-gate 	return (wd);
20367c478bd9Sstevel@tonic-gate }
20377c478bd9Sstevel@tonic-gate 
20387c478bd9Sstevel@tonic-gate static int
20397c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
20407c478bd9Sstevel@tonic-gate {
20417c478bd9Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
20427c478bd9Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
20437c478bd9Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
20447c478bd9Sstevel@tonic-gate 
20457c478bd9Sstevel@tonic-gate 	return (0);
20467c478bd9Sstevel@tonic-gate }
20477c478bd9Sstevel@tonic-gate 
20487c478bd9Sstevel@tonic-gate static rdma_stat
20497c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
20507c478bd9Sstevel@tonic-gate {
20517c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
20527c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
20537c478bd9Sstevel@tonic-gate 	    (void) rib_remreply(qp, rep);
20547c478bd9Sstevel@tonic-gate 	    mutex_exit(&qp->replylist_lock);
20557c478bd9Sstevel@tonic-gate 	    return (RDMA_SUCCESS);
20567c478bd9Sstevel@tonic-gate 	}
20577c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
20587c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
20597c478bd9Sstevel@tonic-gate }
20607c478bd9Sstevel@tonic-gate 
20617c478bd9Sstevel@tonic-gate /*
20627c478bd9Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
20637c478bd9Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
20647c478bd9Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
20657c478bd9Sstevel@tonic-gate  */
20667c478bd9Sstevel@tonic-gate rdma_stat
20677c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
20687c478bd9Sstevel@tonic-gate 	int send_sig, int cv_sig)
20697c478bd9Sstevel@tonic-gate {
20707c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
20717c478bd9Sstevel@tonic-gate 	struct clist	*clp;
20727c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
20737c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
20747c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
20757c478bd9Sstevel@tonic-gate 	int		i, nds;
20767c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
20777c478bd9Sstevel@tonic-gate 	uint_t		total_msg_size;
20787c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
20797c478bd9Sstevel@tonic-gate 
20807c478bd9Sstevel@tonic-gate 	ASSERT(cl != NULL);
20817c478bd9Sstevel@tonic-gate 
20827c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
20837c478bd9Sstevel@tonic-gate 
20847c478bd9Sstevel@tonic-gate 	nds = 0;
20857c478bd9Sstevel@tonic-gate 	total_msg_size = 0;
20867c478bd9Sstevel@tonic-gate 	clp = cl;
20877c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
20887c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
20897c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_and_wait: DSEG_MAX"
20907c478bd9Sstevel@tonic-gate 			    " too small!");
20917c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
20927c478bd9Sstevel@tonic-gate 		}
20937c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = clp->c_saddr;
20947c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
20957c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
20967c478bd9Sstevel@tonic-gate 		total_msg_size += clp->c_len;
20977c478bd9Sstevel@tonic-gate 		clp = clp->c_next;
20987c478bd9Sstevel@tonic-gate 		nds++;
20997c478bd9Sstevel@tonic-gate 	}
21007c478bd9Sstevel@tonic-gate 
21017c478bd9Sstevel@tonic-gate 	if (send_sig) {
21027c478bd9Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
21037c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
21047c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
21057c478bd9Sstevel@tonic-gate 	} else {
21067c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
21077c478bd9Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, 0, qp);
21087c478bd9Sstevel@tonic-gate 	}
21097c478bd9Sstevel@tonic-gate 	wdesc->nsbufs = nds;
21107c478bd9Sstevel@tonic-gate 	for (i = 0; i < nds; i++) {
21117c478bd9Sstevel@tonic-gate 		wdesc->sbufaddr[i] = sgl[i].ds_va;
21127c478bd9Sstevel@tonic-gate 	}
21137c478bd9Sstevel@tonic-gate 
211411606941Sjwahlig 	tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
21157c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
21167c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
21177c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
21187c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
21197c478bd9Sstevel@tonic-gate 
21207c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
21217c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
21227c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
21237c478bd9Sstevel@tonic-gate 	}
21247c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
21257c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
21267c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
21277c478bd9Sstevel@tonic-gate 		for (i = 0; i < nds; i++) {
21287c478bd9Sstevel@tonic-gate 			rib_rbuf_free(conn, SEND_BUFFER,
212911606941Sjwahlig 				(void *)(uintptr_t)wdesc->sbufaddr[i]);
21307c478bd9Sstevel@tonic-gate 		}
21317c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
21327c478bd9Sstevel@tonic-gate #ifdef DEBUG
21337c478bd9Sstevel@tonic-gate 		if (rib_debug && ibt_status != IBT_SUCCESS)
21347c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_and_wait: ibt_post_send "
21357c478bd9Sstevel@tonic-gate 				"failed! wr_id %llx on qpn %p, status=%d!",
21367c478bd9Sstevel@tonic-gate 				(longlong_t)tx_wr.wr_id, (void *)qp,
21377c478bd9Sstevel@tonic-gate 				ibt_status);
21387c478bd9Sstevel@tonic-gate #endif
21397c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
21407c478bd9Sstevel@tonic-gate 	}
21417c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
21427c478bd9Sstevel@tonic-gate 
21437c478bd9Sstevel@tonic-gate 	if (send_sig) {
21447c478bd9Sstevel@tonic-gate 	    if (cv_sig) {
21457c478bd9Sstevel@tonic-gate 		/*
21467c478bd9Sstevel@tonic-gate 		 * cv_wait for send to complete.
21477c478bd9Sstevel@tonic-gate 		 * We can fail due to a timeout or signal or
21487c478bd9Sstevel@tonic-gate 		 * unsuccessful send.
21497c478bd9Sstevel@tonic-gate 		 */
21507c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
21517c478bd9Sstevel@tonic-gate #ifdef DEBUG
21527c478bd9Sstevel@tonic-gate 	    if (rib_debug > 2)
21537c478bd9Sstevel@tonic-gate 		if (ret != 0) {
21547c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_send_and_wait: rib_sendwait "
21557c478bd9Sstevel@tonic-gate 			"FAILED, rdma stat=%d, wr_id %llx, qp %p!",
21567c478bd9Sstevel@tonic-gate 			ret, (longlong_t)tx_wr.wr_id, (void *)qp);
21577c478bd9Sstevel@tonic-gate 		}
21587c478bd9Sstevel@tonic-gate #endif
21597c478bd9Sstevel@tonic-gate 		return (ret);
21607c478bd9Sstevel@tonic-gate 	    }
21617c478bd9Sstevel@tonic-gate 	}
21627c478bd9Sstevel@tonic-gate 
21637c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
21647c478bd9Sstevel@tonic-gate }
21657c478bd9Sstevel@tonic-gate 
21667c478bd9Sstevel@tonic-gate rdma_stat
21677c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
21687c478bd9Sstevel@tonic-gate {
21697c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
21707c478bd9Sstevel@tonic-gate 
21717c478bd9Sstevel@tonic-gate 	/* send-wait & cv_signal */
21727c478bd9Sstevel@tonic-gate 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1);
21737c478bd9Sstevel@tonic-gate 
21747c478bd9Sstevel@tonic-gate 	return (ret);
21757c478bd9Sstevel@tonic-gate }
21767c478bd9Sstevel@tonic-gate 
21777c478bd9Sstevel@tonic-gate /*
21787c478bd9Sstevel@tonic-gate  * Server interface (svc_rdma_ksend).
21797c478bd9Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
21807c478bd9Sstevel@tonic-gate  */
21817c478bd9Sstevel@tonic-gate rdma_stat
21827c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
21837c478bd9Sstevel@tonic-gate {
21847c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
21857c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
21867c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
21877c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
21907c478bd9Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
21917c478bd9Sstevel@tonic-gate 
21927c478bd9Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
21937c478bd9Sstevel@tonic-gate 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0);
21947c478bd9Sstevel@tonic-gate 	if (ret != RDMA_SUCCESS) {
21957c478bd9Sstevel@tonic-gate #ifdef DEBUG
21967c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_send_resp: send_and_wait "
21977c478bd9Sstevel@tonic-gate 		"failed, msgid %u, qp %p", msgid, (void *)qp);
21987c478bd9Sstevel@tonic-gate #endif
21997c478bd9Sstevel@tonic-gate 	    rdma_done_rm(qp, rd);
22007c478bd9Sstevel@tonic-gate 	    goto done;
22017c478bd9Sstevel@tonic-gate 	}
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate 	/*
22047c478bd9Sstevel@tonic-gate 	 * Wait for RDMA_DONE from remote end
22057c478bd9Sstevel@tonic-gate 	 */
22067c478bd9Sstevel@tonic-gate 	timout = drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt();
22077c478bd9Sstevel@tonic-gate 	cv_wait_ret = cv_timedwait(&rd->rdma_done_cv, &qp->rdlist_lock,
22087c478bd9Sstevel@tonic-gate 	    timout);
22097c478bd9Sstevel@tonic-gate 	rdma_done_rm(qp, rd);
22107c478bd9Sstevel@tonic-gate 	if (cv_wait_ret < 0) {
22117c478bd9Sstevel@tonic-gate #ifdef DEBUG
22127c478bd9Sstevel@tonic-gate 		if (rib_debug > 1) {
22137c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_send_resp: RDMA_DONE not"
22147c478bd9Sstevel@tonic-gate 			    " recv'd for qp %p, xid:%u\n",
22157c478bd9Sstevel@tonic-gate 			    (void *)qp, msgid);
22167c478bd9Sstevel@tonic-gate 		}
22177c478bd9Sstevel@tonic-gate #endif
22187c478bd9Sstevel@tonic-gate 		ret = RDMA_TIMEDOUT;
22197c478bd9Sstevel@tonic-gate 		goto done;
22207c478bd9Sstevel@tonic-gate 	}
22217c478bd9Sstevel@tonic-gate 
22227c478bd9Sstevel@tonic-gate done:
22237c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
22247c478bd9Sstevel@tonic-gate 	return (ret);
22257c478bd9Sstevel@tonic-gate }
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate static struct recv_wid *
22287c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
22297c478bd9Sstevel@tonic-gate {
22307c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
22337c478bd9Sstevel@tonic-gate 	rwid->xid = msgid;
22347c478bd9Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
22357c478bd9Sstevel@tonic-gate 	rwid->qp = qp;
22367c478bd9Sstevel@tonic-gate 
22377c478bd9Sstevel@tonic-gate 	return (rwid);
22387c478bd9Sstevel@tonic-gate }
22397c478bd9Sstevel@tonic-gate 
22407c478bd9Sstevel@tonic-gate static void
22417c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
22427c478bd9Sstevel@tonic-gate {
22437c478bd9Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
22447c478bd9Sstevel@tonic-gate }
22457c478bd9Sstevel@tonic-gate 
22467c478bd9Sstevel@tonic-gate rdma_stat
22477c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
22487c478bd9Sstevel@tonic-gate {
22497c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
22507c478bd9Sstevel@tonic-gate 	struct clist	*clp = cl;
22517c478bd9Sstevel@tonic-gate 	struct reply	*rep;
22527c478bd9Sstevel@tonic-gate 	struct recv_wid	*rwid;
22537c478bd9Sstevel@tonic-gate 	int		nds;
22547c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
22557c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
22567c478bd9Sstevel@tonic-gate 	rdma_stat	ret;
22577c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 	/*
22607c478bd9Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
22617c478bd9Sstevel@tonic-gate 	 */
22627c478bd9Sstevel@tonic-gate 
22637c478bd9Sstevel@tonic-gate 	nds = 0;
22647c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
22657c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
22667c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_clnt_post: DSEG_MAX too small!");
22677c478bd9Sstevel@tonic-gate 		    ret = RDMA_FAILED;
22687c478bd9Sstevel@tonic-gate 		    goto done;
22697c478bd9Sstevel@tonic-gate 		}
22707c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
22717c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
22727c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
22737c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
22747c478bd9Sstevel@tonic-gate 		nds++;
22757c478bd9Sstevel@tonic-gate 	}
22767c478bd9Sstevel@tonic-gate 
22777c478bd9Sstevel@tonic-gate 	if (nds != 1) {
22787c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_clnt_post: nds!=1\n");
22797c478bd9Sstevel@tonic-gate 	    ret = RDMA_FAILED;
22807c478bd9Sstevel@tonic-gate 	    goto done;
22817c478bd9Sstevel@tonic-gate 	}
22827c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
22837c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
22847c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
22857c478bd9Sstevel@tonic-gate 
22867c478bd9Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
22877c478bd9Sstevel@tonic-gate 	if (rwid) {
228811606941Sjwahlig 	    recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid;
22897c478bd9Sstevel@tonic-gate 	} else {
22907c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: out of memory");
22917c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
22927c478bd9Sstevel@tonic-gate 		goto done;
22937c478bd9Sstevel@tonic-gate 	}
22947c478bd9Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
22957c478bd9Sstevel@tonic-gate 	if (!rep) {
22967c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: out of memory");
22977c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
22987c478bd9Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
22997c478bd9Sstevel@tonic-gate 		goto done;
23007c478bd9Sstevel@tonic-gate 	}
23017c478bd9Sstevel@tonic-gate 
23027c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
23037c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
23047c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
23057c478bd9Sstevel@tonic-gate 	}
23067c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
23077c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
23087c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
23097c478bd9Sstevel@tonic-gate #ifdef DEBUG
23107c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_clnt_post: QPN %p failed in "
23117c478bd9Sstevel@tonic-gate 		    "ibt_post_recv(), msgid=%d, status=%d",
23127c478bd9Sstevel@tonic-gate 		    (void *)qp,  msgid, ibt_status);
23137c478bd9Sstevel@tonic-gate #endif
23147c478bd9Sstevel@tonic-gate 		rib_free_wid(rwid);
23157c478bd9Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
23167c478bd9Sstevel@tonic-gate 		ret = RDMA_FAILED;
23177c478bd9Sstevel@tonic-gate 		goto done;
23187c478bd9Sstevel@tonic-gate 	}
23197c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
23207c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
23217c478bd9Sstevel@tonic-gate 
23227c478bd9Sstevel@tonic-gate done:
23237c478bd9Sstevel@tonic-gate 	while (clp != NULL) {
232411606941Sjwahlig 	    rib_rbuf_free(conn, RECV_BUFFER, (void *)(uintptr_t)clp->c_saddr);
23257c478bd9Sstevel@tonic-gate 	    clp = clp->c_next;
23267c478bd9Sstevel@tonic-gate 	}
23277c478bd9Sstevel@tonic-gate 	return (ret);
23287c478bd9Sstevel@tonic-gate }
23297c478bd9Sstevel@tonic-gate 
23307c478bd9Sstevel@tonic-gate rdma_stat
23317c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
23327c478bd9Sstevel@tonic-gate {
23337c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
23347c478bd9Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
23357c478bd9Sstevel@tonic-gate 	int		nds;
23367c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
23377c478bd9Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
23387c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate 	nds = 0;
23417c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
23427c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
23437c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_svc_post: DSEG_MAX too small!");
23447c478bd9Sstevel@tonic-gate 		    return (RDMA_FAILED);
23457c478bd9Sstevel@tonic-gate 		}
23467c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
23477c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
23487c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
23497c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
23507c478bd9Sstevel@tonic-gate 		nds++;
23517c478bd9Sstevel@tonic-gate 	}
23527c478bd9Sstevel@tonic-gate 
23537c478bd9Sstevel@tonic-gate 	if (nds != 1) {
23547c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rib_svc_post: nds!=1\n");
235511606941Sjwahlig 	    rib_rbuf_free(conn, RECV_BUFFER, (caddr_t)(uintptr_t)sgl[0].ds_va);
23567c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
23577c478bd9Sstevel@tonic-gate 	}
23587c478bd9Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
23597c478bd9Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
23607c478bd9Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
236311606941Sjwahlig 	/* Use s_recvp's addr as wr id */
236411606941Sjwahlig 	recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp;
23657c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
23667c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
23677c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
23687c478bd9Sstevel@tonic-gate 	}
23697c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
23707c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
23717c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
23727c478bd9Sstevel@tonic-gate #ifdef DEBUG
23737c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_svc_post: QP %p failed in "
23747c478bd9Sstevel@tonic-gate 		    "ibt_post_recv(), status=%d",
23757c478bd9Sstevel@tonic-gate 		    (void *)qp, ibt_status);
23767c478bd9Sstevel@tonic-gate #endif
237711606941Sjwahlig 		rib_rbuf_free(conn, RECV_BUFFER,
237811606941Sjwahlig 			(caddr_t)(uintptr_t)sgl[0].ds_va);
23797c478bd9Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
23807c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
23817c478bd9Sstevel@tonic-gate 	}
23827c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
23857c478bd9Sstevel@tonic-gate }
23867c478bd9Sstevel@tonic-gate 
23877c478bd9Sstevel@tonic-gate /* Client */
23887c478bd9Sstevel@tonic-gate rdma_stat
23897c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
23907c478bd9Sstevel@tonic-gate {
23917c478bd9Sstevel@tonic-gate 
23927c478bd9Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
23937c478bd9Sstevel@tonic-gate }
23947c478bd9Sstevel@tonic-gate 
23957c478bd9Sstevel@tonic-gate /* Server */
23967c478bd9Sstevel@tonic-gate rdma_stat
23977c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
23987c478bd9Sstevel@tonic-gate {
23997c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
24007c478bd9Sstevel@tonic-gate 
24017c478bd9Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
24027c478bd9Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
24037c478bd9Sstevel@tonic-gate 		qp->n_posted_rbufs++;
24047c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
24057c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
24067c478bd9Sstevel@tonic-gate 	}
24077c478bd9Sstevel@tonic-gate 	return (RDMA_FAILED);
24087c478bd9Sstevel@tonic-gate }
24097c478bd9Sstevel@tonic-gate 
24107c478bd9Sstevel@tonic-gate /*
24117c478bd9Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
24127c478bd9Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
24137c478bd9Sstevel@tonic-gate  */
24147c478bd9Sstevel@tonic-gate rdma_stat
24157c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
24167c478bd9Sstevel@tonic-gate {
24177c478bd9Sstevel@tonic-gate 	struct reply *rep = NULL;
24187c478bd9Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
24197c478bd9Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
24207c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
24217c478bd9Sstevel@tonic-gate 
24227c478bd9Sstevel@tonic-gate 	/*
24237c478bd9Sstevel@tonic-gate 	 * Find the reply structure for this msgid
24247c478bd9Sstevel@tonic-gate 	 */
24257c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
24267c478bd9Sstevel@tonic-gate 
24277c478bd9Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
24287c478bd9Sstevel@tonic-gate 	    if (rep->xid == msgid)
24297c478bd9Sstevel@tonic-gate 		break;
24307c478bd9Sstevel@tonic-gate 	}
24317c478bd9Sstevel@tonic-gate 	if (rep != NULL) {
24327c478bd9Sstevel@tonic-gate 		/*
24337c478bd9Sstevel@tonic-gate 		 * If message not yet received, wait.
24347c478bd9Sstevel@tonic-gate 		 */
24357c478bd9Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
24367c478bd9Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
24377c478bd9Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
24387c478bd9Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
24397c478bd9Sstevel@tonic-gate 				    &qp->replylist_lock, timout)) > 0 &&
24407c478bd9Sstevel@tonic-gate 			    rep->status == (uint_t)REPLY_WAIT);
24417c478bd9Sstevel@tonic-gate 
24427c478bd9Sstevel@tonic-gate 			switch (cv_wait_ret) {
24437c478bd9Sstevel@tonic-gate 			case -1:	/* timeout */
24447c478bd9Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
24457c478bd9Sstevel@tonic-gate 				break;
24467c478bd9Sstevel@tonic-gate 			case 0:
24477c478bd9Sstevel@tonic-gate 				ret = RDMA_INTR;
24487c478bd9Sstevel@tonic-gate 				break;
24497c478bd9Sstevel@tonic-gate 			default:
24507c478bd9Sstevel@tonic-gate 				break;
24517c478bd9Sstevel@tonic-gate 			}
24527c478bd9Sstevel@tonic-gate 		}
24537c478bd9Sstevel@tonic-gate 
24547c478bd9Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
24557c478bd9Sstevel@tonic-gate 			struct clist *cl = NULL;
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 			/*
24587c478bd9Sstevel@tonic-gate 			 * Got message successfully
24597c478bd9Sstevel@tonic-gate 			 */
24607c478bd9Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
246111606941Sjwahlig 			    (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL);
24627c478bd9Sstevel@tonic-gate 			*clp = cl;
24637c478bd9Sstevel@tonic-gate 		} else {
24647c478bd9Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
24657c478bd9Sstevel@tonic-gate 				/*
24667c478bd9Sstevel@tonic-gate 				 * Got error in reply message. Free
24677c478bd9Sstevel@tonic-gate 				 * recv buffer here.
24687c478bd9Sstevel@tonic-gate 				 */
24697c478bd9Sstevel@tonic-gate 				ret = rep->status;
24707c478bd9Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
247111606941Sjwahlig 					(caddr_t)(uintptr_t)rep->vaddr_cq);
24727c478bd9Sstevel@tonic-gate 			}
24737c478bd9Sstevel@tonic-gate 		}
24747c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
24757c478bd9Sstevel@tonic-gate 	} else {
24767c478bd9Sstevel@tonic-gate 		/*
24777c478bd9Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
24787c478bd9Sstevel@tonic-gate 		 * reply wait list.
24797c478bd9Sstevel@tonic-gate 		 */
24807c478bd9Sstevel@tonic-gate 		ret = RDMA_INVAL;
24817c478bd9Sstevel@tonic-gate #ifdef DEBUG
24827c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_recv: no matching reply for "
24837c478bd9Sstevel@tonic-gate 		    "xid %u, qp %p\n", msgid, (void *)qp);
24847c478bd9Sstevel@tonic-gate #endif
24857c478bd9Sstevel@tonic-gate 	}
24867c478bd9Sstevel@tonic-gate 
24877c478bd9Sstevel@tonic-gate 	/*
24887c478bd9Sstevel@tonic-gate 	 * Done.
24897c478bd9Sstevel@tonic-gate 	 */
24907c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
24917c478bd9Sstevel@tonic-gate 	return (ret);
24927c478bd9Sstevel@tonic-gate }
24937c478bd9Sstevel@tonic-gate 
24947c478bd9Sstevel@tonic-gate /*
24957c478bd9Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
24967c478bd9Sstevel@tonic-gate  */
24977c478bd9Sstevel@tonic-gate rdma_stat
24987c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
24997c478bd9Sstevel@tonic-gate {
25007c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
25017c478bd9Sstevel@tonic-gate 	int		nds;
25027c478bd9Sstevel@tonic-gate 	int		cv_sig;
25037c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
25047c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
25057c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
25067c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
25077c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25087c478bd9Sstevel@tonic-gate 
25097c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
25107c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_write: NULL clist\n");
25117c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
25127c478bd9Sstevel@tonic-gate 	}
25137c478bd9Sstevel@tonic-gate 
25147c478bd9Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
25157c478bd9Sstevel@tonic-gate 	/*
25167c478bd9Sstevel@tonic-gate 	 * Remote address is at the head chunk item in list.
25177c478bd9Sstevel@tonic-gate 	 */
25187c478bd9Sstevel@tonic-gate 	tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_daddr;
25197c478bd9Sstevel@tonic-gate 	tx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_dmemhandle.mrc_rmr; /* rkey */
25207c478bd9Sstevel@tonic-gate 
25217c478bd9Sstevel@tonic-gate 	nds = 0;
25227c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
25237c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
25247c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_write: DSEG_MAX too small!");
25257c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
25267c478bd9Sstevel@tonic-gate 		}
25277c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_saddr;
25287c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
25297c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
25307c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
25317c478bd9Sstevel@tonic-gate 		nds++;
25327c478bd9Sstevel@tonic-gate 	}
25337c478bd9Sstevel@tonic-gate 
25347c478bd9Sstevel@tonic-gate 	if (wait) {
25357c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
25367c478bd9Sstevel@tonic-gate 		cv_sig = 1;
25377c478bd9Sstevel@tonic-gate 	} else {
25387c478bd9Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
25397c478bd9Sstevel@tonic-gate 		cv_sig = 0;
25407c478bd9Sstevel@tonic-gate 	}
25417c478bd9Sstevel@tonic-gate 
25427c478bd9Sstevel@tonic-gate 	wdesc = rib_init_sendwait(0, cv_sig, qp);
254311606941Sjwahlig 	tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
25447c478bd9Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_RDMAW;
25457c478bd9Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
25467c478bd9Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
25477c478bd9Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
25487c478bd9Sstevel@tonic-gate 
25497c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
25507c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
25517c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
25527c478bd9Sstevel@tonic-gate 	}
25537c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
25547c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
25557c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
25567c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
25577c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
25587c478bd9Sstevel@tonic-gate 	}
25597c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
25607c478bd9Sstevel@tonic-gate 
25617c478bd9Sstevel@tonic-gate 	/*
25627c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
25637c478bd9Sstevel@tonic-gate 	 */
25647c478bd9Sstevel@tonic-gate 	if (wait) {
25657c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
25667c478bd9Sstevel@tonic-gate 		if (ret != 0) {
25677c478bd9Sstevel@tonic-gate 			return (ret);
25687c478bd9Sstevel@tonic-gate 		}
25697c478bd9Sstevel@tonic-gate 	}
25707c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
25717c478bd9Sstevel@tonic-gate }
25727c478bd9Sstevel@tonic-gate 
25737c478bd9Sstevel@tonic-gate /*
25747c478bd9Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
25757c478bd9Sstevel@tonic-gate  */
25767c478bd9Sstevel@tonic-gate rdma_stat
25777c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
25787c478bd9Sstevel@tonic-gate {
25797c478bd9Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
25807c478bd9Sstevel@tonic-gate 	int		nds;
25817c478bd9Sstevel@tonic-gate 	int		cv_sig;
25827c478bd9Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];	/* is 2 sufficient? */
25837c478bd9Sstevel@tonic-gate 	struct send_wid	*wdesc;
25847c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
25857c478bd9Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
25867c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25877c478bd9Sstevel@tonic-gate 
25887c478bd9Sstevel@tonic-gate 	if (cl == NULL) {
25897c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_read: NULL clist\n");
25907c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
25917c478bd9Sstevel@tonic-gate 	}
25927c478bd9Sstevel@tonic-gate 
25937c478bd9Sstevel@tonic-gate 	bzero(&rx_wr, sizeof (ibt_send_wr_t));
25947c478bd9Sstevel@tonic-gate 	/*
25957c478bd9Sstevel@tonic-gate 	 * Remote address is at the head chunk item in list.
25967c478bd9Sstevel@tonic-gate 	 */
25977c478bd9Sstevel@tonic-gate 	rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->c_saddr;
25987c478bd9Sstevel@tonic-gate 	rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr; /* rkey */
25997c478bd9Sstevel@tonic-gate 
26007c478bd9Sstevel@tonic-gate 	nds = 0;
26017c478bd9Sstevel@tonic-gate 	while (cl != NULL) {
26027c478bd9Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
26037c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_read: DSEG_MAX too small!");
26047c478bd9Sstevel@tonic-gate 			return (RDMA_FAILED);
26057c478bd9Sstevel@tonic-gate 		}
26067c478bd9Sstevel@tonic-gate 		sgl[nds].ds_va = cl->c_daddr;
26077c478bd9Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
26087c478bd9Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
26097c478bd9Sstevel@tonic-gate 		cl = cl->c_next;
26107c478bd9Sstevel@tonic-gate 		nds++;
26117c478bd9Sstevel@tonic-gate 	}
26127c478bd9Sstevel@tonic-gate 
26137c478bd9Sstevel@tonic-gate 	if (wait) {
26147c478bd9Sstevel@tonic-gate 		rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
26157c478bd9Sstevel@tonic-gate 		cv_sig = 1;
26167c478bd9Sstevel@tonic-gate 	} else {
26177c478bd9Sstevel@tonic-gate 		rx_wr.wr_flags = IBT_WR_NO_FLAGS;
26187c478bd9Sstevel@tonic-gate 		cv_sig = 0;
26197c478bd9Sstevel@tonic-gate 	}
26207c478bd9Sstevel@tonic-gate 
26217c478bd9Sstevel@tonic-gate 	wdesc = rib_init_sendwait(0, cv_sig, qp);
262211606941Sjwahlig 	rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
26237c478bd9Sstevel@tonic-gate 	rx_wr.wr_opcode = IBT_WRC_RDMAR;
26247c478bd9Sstevel@tonic-gate 	rx_wr.wr_trans = IBT_RC_SRV;
26257c478bd9Sstevel@tonic-gate 	rx_wr.wr_nds = nds;
26267c478bd9Sstevel@tonic-gate 	rx_wr.wr_sgl = sgl;
26277c478bd9Sstevel@tonic-gate 
26287c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
26297c478bd9Sstevel@tonic-gate 	if (conn->c_state & C_CONNECTED) {
26307c478bd9Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
26317c478bd9Sstevel@tonic-gate 	}
26327c478bd9Sstevel@tonic-gate 	if (((conn->c_state & C_CONNECTED) == 0) ||
26337c478bd9Sstevel@tonic-gate 		ibt_status != IBT_SUCCESS) {
26347c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
26357c478bd9Sstevel@tonic-gate #ifdef DEBUG
26367c478bd9Sstevel@tonic-gate 		if (rib_debug && ibt_status != IBT_SUCCESS)
26377c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_read: FAILED post_sending RDMAR"
26387c478bd9Sstevel@tonic-gate 				" wr_id %llx on qp %p, status=%d",
26397c478bd9Sstevel@tonic-gate 				(longlong_t)rx_wr.wr_id, (void *)qp,
26407c478bd9Sstevel@tonic-gate 				ibt_status);
26417c478bd9Sstevel@tonic-gate #endif
26427c478bd9Sstevel@tonic-gate 		(void) rib_free_sendwait(wdesc);
26437c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
26447c478bd9Sstevel@tonic-gate 	}
26457c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
26467c478bd9Sstevel@tonic-gate 
26477c478bd9Sstevel@tonic-gate 	/*
26487c478bd9Sstevel@tonic-gate 	 * Wait for send to complete
26497c478bd9Sstevel@tonic-gate 	 */
26507c478bd9Sstevel@tonic-gate 	if (wait) {
26517c478bd9Sstevel@tonic-gate 		ret = rib_sendwait(qp, wdesc);
26527c478bd9Sstevel@tonic-gate 		if (ret != 0) {
26537c478bd9Sstevel@tonic-gate 			return (ret);
26547c478bd9Sstevel@tonic-gate 		}
26557c478bd9Sstevel@tonic-gate 	}
26567c478bd9Sstevel@tonic-gate 
26577c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
26587c478bd9Sstevel@tonic-gate }
26597c478bd9Sstevel@tonic-gate 
26607c478bd9Sstevel@tonic-gate int
26617c478bd9Sstevel@tonic-gate is_for_ipv4(ibt_ar_t *result)
26627c478bd9Sstevel@tonic-gate {
26637c478bd9Sstevel@tonic-gate 	int	i, size = sizeof (struct in_addr);
26647c478bd9Sstevel@tonic-gate 	uint8_t	zero = 0;
26657c478bd9Sstevel@tonic-gate 
26667c478bd9Sstevel@tonic-gate 	for (i = 0; i < (ATS_AR_DATA_LEN - size); i++)
26677c478bd9Sstevel@tonic-gate 		zero |= result->ar_data[i];
26687c478bd9Sstevel@tonic-gate 	return (zero == 0);
26697c478bd9Sstevel@tonic-gate }
26707c478bd9Sstevel@tonic-gate 
26717c478bd9Sstevel@tonic-gate /*
26727c478bd9Sstevel@tonic-gate  * rib_srv_cm_handler()
26737c478bd9Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
26747c478bd9Sstevel@tonic-gate  */
26757c478bd9Sstevel@tonic-gate /* ARGSUSED */
26767c478bd9Sstevel@tonic-gate static ibt_cm_status_t
26777c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
26787c478bd9Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
26797c478bd9Sstevel@tonic-gate 	ibt_priv_data_len_t len)
26807c478bd9Sstevel@tonic-gate {
26817c478bd9Sstevel@tonic-gate 	queue_t		*q;
26827c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
26837c478bd9Sstevel@tonic-gate 	rpcib_state_t	*ribstat;
26847c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca;
26857c478bd9Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
26867c478bd9Sstevel@tonic-gate 	int		i;
26877c478bd9Sstevel@tonic-gate 	struct clist	cl;
26887c478bd9Sstevel@tonic-gate 	rdma_buf_t	rdbuf;
26897c478bd9Sstevel@tonic-gate 	void		*buf = NULL;
26907c478bd9Sstevel@tonic-gate 	ibt_cm_req_rcv_t	cm_req_rcv;
26917c478bd9Sstevel@tonic-gate 	CONN		*conn;
26927c478bd9Sstevel@tonic-gate 	ibt_status_t ibt_status;
26937c478bd9Sstevel@tonic-gate 	ibt_ar_t	ar_query, ar_result;
26947c478bd9Sstevel@tonic-gate 	ib_gid_t	sgid;
26957c478bd9Sstevel@tonic-gate 
26967c478bd9Sstevel@tonic-gate 
26977c478bd9Sstevel@tonic-gate 	ASSERT(any != NULL);
26987c478bd9Sstevel@tonic-gate 	ASSERT(event != NULL);
26997c478bd9Sstevel@tonic-gate 
27007c478bd9Sstevel@tonic-gate 	ribstat = (rpcib_state_t *)any;
27017c478bd9Sstevel@tonic-gate 	hca = (rib_hca_t *)ribstat->hca;
27027c478bd9Sstevel@tonic-gate 	ASSERT(hca != NULL);
27037c478bd9Sstevel@tonic-gate 
27047c478bd9Sstevel@tonic-gate 	/* got a connection request */
27057c478bd9Sstevel@tonic-gate 	switch (event->cm_type) {
27067c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
27077c478bd9Sstevel@tonic-gate 		/*
27087c478bd9Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
27097c478bd9Sstevel@tonic-gate 		 */
27107c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
27117c478bd9Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
27127c478bd9Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
27137c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
27147c478bd9Sstevel@tonic-gate 		}
27157c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
27167c478bd9Sstevel@tonic-gate 
27177c478bd9Sstevel@tonic-gate 		/*
27187c478bd9Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
27197c478bd9Sstevel@tonic-gate 		 * timeout on us.
27207c478bd9Sstevel@tonic-gate 		 */
27217c478bd9Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
27227c478bd9Sstevel@tonic-gate 			    event->cm_event.req.req_timeout * 8, NULL, 0);
27237c478bd9Sstevel@tonic-gate 
27247c478bd9Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
27257c478bd9Sstevel@tonic-gate 		q = rib_stat->q;
27267c478bd9Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
27277c478bd9Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
27287c478bd9Sstevel@tonic-gate 			event->cm_event.req.req_prim_hca_port, &qp);
27297c478bd9Sstevel@tonic-gate 		if (status) {
27307c478bd9Sstevel@tonic-gate #ifdef DEBUG
27317c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_srv_cm_handler: "
27327c478bd9Sstevel@tonic-gate 			    "create_channel failed %d", status);
27337c478bd9Sstevel@tonic-gate #endif
27347c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
27357c478bd9Sstevel@tonic-gate 		}
27367c478bd9Sstevel@tonic-gate 		cm_req_rcv = event->cm_event.req;
27377c478bd9Sstevel@tonic-gate 
27387c478bd9Sstevel@tonic-gate #ifdef DEBUG
27397c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
27407c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
27417c478bd9Sstevel@tonic-gate 			"server recv'ed IBT_CM_EVENT_REQ_RCV\n");
27427c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t SID:%llx\n",
27437c478bd9Sstevel@tonic-gate 				(longlong_t)cm_req_rcv.req_service_id);
27447c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local Port:%d\n",
27457c478bd9Sstevel@tonic-gate 				cm_req_rcv.req_prim_hca_port);
27467c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE,
27477c478bd9Sstevel@tonic-gate 			"\t\t Remote GID:(prefix:%llx,guid:%llx)\n",
27487c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_prefix,
27497c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_dgid.gid_guid);
27507c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local GID:(prefix:%llx,guid:%llx)\n",
27517c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_prefix,
27527c478bd9Sstevel@tonic-gate 			(longlong_t)cm_req_rcv.req_prim_addr.av_sgid.gid_guid);
27537c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Remote QPN:%u\n",
27547c478bd9Sstevel@tonic-gate 			cm_req_rcv.req_remote_qpn);
27557c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Remote Q_Key:%x\n",
27567c478bd9Sstevel@tonic-gate 			cm_req_rcv.req_remote_qkey);
27577c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "\t\t Local QP %p (qp_hdl=%p)\n",
27587c478bd9Sstevel@tonic-gate 			(void *)qp, (void *)qp->qp_hdl);
27597c478bd9Sstevel@tonic-gate 		}
27607c478bd9Sstevel@tonic-gate 
27617c478bd9Sstevel@tonic-gate 		if (rib_debug > 2) {
27627c478bd9Sstevel@tonic-gate 		    ibt_rc_chan_query_attr_t	chan_attrs;
27637c478bd9Sstevel@tonic-gate 
27647c478bd9Sstevel@tonic-gate 		    if (ibt_query_rc_channel(qp->qp_hdl, &chan_attrs)
27657c478bd9Sstevel@tonic-gate 			== IBT_SUCCESS) {
27667c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_svc_cm_handler: qp %p in "
27677c478bd9Sstevel@tonic-gate 			    "CEP state %d\n", (void *)qp, chan_attrs.rc_state);
27687c478bd9Sstevel@tonic-gate 		    }
27697c478bd9Sstevel@tonic-gate 		}
27707c478bd9Sstevel@tonic-gate #endif
27717c478bd9Sstevel@tonic-gate 
27727c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
27737c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rdma_ra_out = 1;
27747c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rdma_ra_in = 1;
27757c478bd9Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
27767c478bd9Sstevel@tonic-gate 
27777c478bd9Sstevel@tonic-gate 		/*
27787c478bd9Sstevel@tonic-gate 		 * Pre-posts RECV buffers
27797c478bd9Sstevel@tonic-gate 		 */
27807c478bd9Sstevel@tonic-gate 		conn = qptoc(qp);
27817c478bd9Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
27827c478bd9Sstevel@tonic-gate 		    bzero(&rdbuf, sizeof (rdbuf));
27837c478bd9Sstevel@tonic-gate 		    rdbuf.type = RECV_BUFFER;
27847c478bd9Sstevel@tonic-gate 		    buf = rib_rbuf_alloc(conn, &rdbuf);
27857c478bd9Sstevel@tonic-gate 		    if (buf == NULL) {
27867c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_svc_cm_handler: "
27877c478bd9Sstevel@tonic-gate 			    "No RECV_BUFFER buf!\n");
27887c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, NULL);
27897c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
27907c478bd9Sstevel@tonic-gate 		    }
27917c478bd9Sstevel@tonic-gate 
27927c478bd9Sstevel@tonic-gate 		    bzero(&cl, sizeof (cl));
279311606941Sjwahlig 		    cl.c_saddr = (uintptr_t)rdbuf.addr;
27947c478bd9Sstevel@tonic-gate 		    cl.c_len = rdbuf.len;
27957c478bd9Sstevel@tonic-gate 		    cl.c_smemhandle.mrc_lmr = rdbuf.handle.mrc_lmr; /* lkey */
27967c478bd9Sstevel@tonic-gate 		    cl.c_next = NULL;
27977c478bd9Sstevel@tonic-gate 		    status = rib_post_recv(conn, &cl);
27987c478bd9Sstevel@tonic-gate 		    if (status != RDMA_SUCCESS) {
27997c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_srv_cm_handler: failed "
28007c478bd9Sstevel@tonic-gate 			    "posting RPC_REQ buf to qp %p!", (void *)qp);
28017c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, NULL);
28027c478bd9Sstevel@tonic-gate 			return (IBT_CM_REJECT);
28037c478bd9Sstevel@tonic-gate 		    }
28047c478bd9Sstevel@tonic-gate 		}
28057c478bd9Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
28067c478bd9Sstevel@tonic-gate 
28077c478bd9Sstevel@tonic-gate 		/*
28087c478bd9Sstevel@tonic-gate 		 * Get the address translation service record from ATS
28097c478bd9Sstevel@tonic-gate 		 */
28107c478bd9Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
28117c478bd9Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
28127c478bd9Sstevel@tonic-gate 		    rw_exit(&hca->state_lock);
28137c478bd9Sstevel@tonic-gate 		    return (IBT_CM_REJECT);
28147c478bd9Sstevel@tonic-gate 		}
28157c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
28167c478bd9Sstevel@tonic-gate 
28177c478bd9Sstevel@tonic-gate 		for (i = 0; i < hca->hca_nports; i++) {
28187c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_get_port_state(hca->hca_hdl, i+1,
28197c478bd9Sstevel@tonic-gate 					&sgid, NULL);
28207c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
28217c478bd9Sstevel@tonic-gate 			if (rib_debug) {
28227c478bd9Sstevel@tonic-gate 			    cmn_err(CE_WARN, "rib_srv_cm_handler: "
28237c478bd9Sstevel@tonic-gate 				"ibt_get_port_state FAILED!"
28247c478bd9Sstevel@tonic-gate 				"status = %d\n", ibt_status);
28257c478bd9Sstevel@tonic-gate 			}
28267c478bd9Sstevel@tonic-gate 		    } else {
28277c478bd9Sstevel@tonic-gate 			/*
28287c478bd9Sstevel@tonic-gate 			 * do ibt_query_ar()
28297c478bd9Sstevel@tonic-gate 			 */
28307c478bd9Sstevel@tonic-gate 			bzero(&ar_query, sizeof (ar_query));
28317c478bd9Sstevel@tonic-gate 			bzero(&ar_result, sizeof (ar_result));
28327c478bd9Sstevel@tonic-gate 			ar_query.ar_gid = cm_req_rcv.req_prim_addr.av_dgid;
28337c478bd9Sstevel@tonic-gate 			ar_query.ar_pkey = event->cm_event.req.req_pkey;
28347c478bd9Sstevel@tonic-gate 			ibt_status = ibt_query_ar(&sgid, &ar_query,
28357c478bd9Sstevel@tonic-gate 							&ar_result);
28367c478bd9Sstevel@tonic-gate 			if (ibt_status != IBT_SUCCESS) {
28377c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
28387c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_srv_cm_handler: "
28397c478bd9Sstevel@tonic-gate 				    "ibt_query_ar FAILED!"
28407c478bd9Sstevel@tonic-gate 				    "status = %d\n", ibt_status);
28417c478bd9Sstevel@tonic-gate 			    }
28427c478bd9Sstevel@tonic-gate 			} else {
28437c478bd9Sstevel@tonic-gate 			    conn = qptoc(qp);
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 			    if (is_for_ipv4(&ar_result)) {
28467c478bd9Sstevel@tonic-gate 				struct sockaddr_in *s;
28477c478bd9Sstevel@tonic-gate 				int sin_size = sizeof (struct sockaddr_in);
28487c478bd9Sstevel@tonic-gate 				int in_size = sizeof (struct in_addr);
28497c478bd9Sstevel@tonic-gate 				uint8_t	*start_pos;
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate 				conn->c_raddr.maxlen =
28527c478bd9Sstevel@tonic-gate 					conn->c_raddr.len = sin_size;
28537c478bd9Sstevel@tonic-gate 				conn->c_raddr.buf = kmem_zalloc(sin_size,
28547c478bd9Sstevel@tonic-gate 						KM_SLEEP);
28557c478bd9Sstevel@tonic-gate 				s = (struct sockaddr_in *)conn->c_raddr.buf;
28567c478bd9Sstevel@tonic-gate 				s->sin_family = AF_INET;
28577c478bd9Sstevel@tonic-gate 				/*
28587c478bd9Sstevel@tonic-gate 				 * For IPv4,  the IP addr is stored in
28597c478bd9Sstevel@tonic-gate 				 * the last four bytes of ar_data.
28607c478bd9Sstevel@tonic-gate 				 */
28617c478bd9Sstevel@tonic-gate 				start_pos = ar_result.ar_data +
28627c478bd9Sstevel@tonic-gate 					ATS_AR_DATA_LEN - in_size;
28637c478bd9Sstevel@tonic-gate 				bcopy(start_pos, &s->sin_addr, in_size);
28647c478bd9Sstevel@tonic-gate 				if (rib_debug > 1) {
28657c478bd9Sstevel@tonic-gate 				    char print_addr[INET_ADDRSTRLEN];
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate 				    bzero(print_addr, INET_ADDRSTRLEN);
28687c478bd9Sstevel@tonic-gate 				    (void) inet_ntop(AF_INET, &s->sin_addr,
28697c478bd9Sstevel@tonic-gate 						print_addr, INET_ADDRSTRLEN);
28707c478bd9Sstevel@tonic-gate 				    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
28717c478bd9Sstevel@tonic-gate 					"remote clnt_addr: %s\n", print_addr);
28727c478bd9Sstevel@tonic-gate 				}
28737c478bd9Sstevel@tonic-gate 			    } else {
28747c478bd9Sstevel@tonic-gate 				struct sockaddr_in6 *s6;
28757c478bd9Sstevel@tonic-gate 				int sin6_size = sizeof (struct sockaddr_in6);
28767c478bd9Sstevel@tonic-gate 
28777c478bd9Sstevel@tonic-gate 				conn->c_raddr.maxlen =
28787c478bd9Sstevel@tonic-gate 					conn->c_raddr.len = sin6_size;
28797c478bd9Sstevel@tonic-gate 				conn->c_raddr.buf = kmem_zalloc(sin6_size,
28807c478bd9Sstevel@tonic-gate 					KM_SLEEP);
28817c478bd9Sstevel@tonic-gate 
28827c478bd9Sstevel@tonic-gate 				s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
28837c478bd9Sstevel@tonic-gate 				s6->sin6_family = AF_INET6;
28847c478bd9Sstevel@tonic-gate 				/* sin6_addr is stored in ar_data */
28857c478bd9Sstevel@tonic-gate 				bcopy(ar_result.ar_data, &s6->sin6_addr,
28867c478bd9Sstevel@tonic-gate 					sizeof (struct in6_addr));
28877c478bd9Sstevel@tonic-gate 				if (rib_debug > 1) {
28887c478bd9Sstevel@tonic-gate 				    char print_addr[INET6_ADDRSTRLEN];
28897c478bd9Sstevel@tonic-gate 
28907c478bd9Sstevel@tonic-gate 				    bzero(print_addr, INET6_ADDRSTRLEN);
28917c478bd9Sstevel@tonic-gate 				    (void) inet_ntop(AF_INET6, &s6->sin6_addr,
28927c478bd9Sstevel@tonic-gate 						print_addr, INET6_ADDRSTRLEN);
28937c478bd9Sstevel@tonic-gate 				    cmn_err(CE_NOTE, "rib_srv_cm_handler: "
28947c478bd9Sstevel@tonic-gate 					"remote clnt_addr: %s\n", print_addr);
28957c478bd9Sstevel@tonic-gate 				}
28967c478bd9Sstevel@tonic-gate 			    }
28977c478bd9Sstevel@tonic-gate 			    return (IBT_CM_ACCEPT);
28987c478bd9Sstevel@tonic-gate 			}
28997c478bd9Sstevel@tonic-gate 		    }
29007c478bd9Sstevel@tonic-gate 		}
29017c478bd9Sstevel@tonic-gate 		if (rib_debug > 1) {
29027c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_srv_cm_handler: "
29037c478bd9Sstevel@tonic-gate 				"address record query failed!");
29047c478bd9Sstevel@tonic-gate 		}
29057c478bd9Sstevel@tonic-gate 		break;
29067c478bd9Sstevel@tonic-gate 
29077c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
29087c478bd9Sstevel@tonic-gate 	{
29097c478bd9Sstevel@tonic-gate 		CONN		*conn;
29107c478bd9Sstevel@tonic-gate 		rib_qp_t	*qp;
29117c478bd9Sstevel@tonic-gate 
29127c478bd9Sstevel@tonic-gate 		switch (event->cm_event.closed) {
29137c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
29147c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
29157c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
29167c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
29177c478bd9Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
29187c478bd9Sstevel@tonic-gate 			/*
29197c478bd9Sstevel@tonic-gate 			 * These cases indicate the local end initiated
29207c478bd9Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
29217c478bd9Sstevel@tonic-gate 			 */
29227c478bd9Sstevel@tonic-gate 			break;
29237c478bd9Sstevel@tonic-gate 		default:
29247c478bd9Sstevel@tonic-gate 			/*
29257c478bd9Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
29267c478bd9Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
29277c478bd9Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
29287c478bd9Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
29297c478bd9Sstevel@tonic-gate 			 * cases free the channel and transition to error
29307c478bd9Sstevel@tonic-gate 			 * state
29317c478bd9Sstevel@tonic-gate 			 */
29327c478bd9Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
29337c478bd9Sstevel@tonic-gate 			conn = qptoc(qp);
29347c478bd9Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
29357c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
29367c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29377c478bd9Sstevel@tonic-gate 				break;
29387c478bd9Sstevel@tonic-gate 			}
29397c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
29407c478bd9Sstevel@tonic-gate 
29417c478bd9Sstevel@tonic-gate 			/*
29427c478bd9Sstevel@tonic-gate 			 * Free the rc_channel. Channel has already
29437c478bd9Sstevel@tonic-gate 			 * transitioned to ERROR state and WRs have been
29447c478bd9Sstevel@tonic-gate 			 * FLUSHED_ERR already.
29457c478bd9Sstevel@tonic-gate 			 */
29467c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
29477c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
29487c478bd9Sstevel@tonic-gate 
29497c478bd9Sstevel@tonic-gate 			/*
29507c478bd9Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
29517c478bd9Sstevel@tonic-gate 			 */
29527c478bd9Sstevel@tonic-gate 			if (conn->c_ref == 0) {
29537c478bd9Sstevel@tonic-gate 				/*
29547c478bd9Sstevel@tonic-gate 				 * Remove from list and free conn
29557c478bd9Sstevel@tonic-gate 				 */
29567c478bd9Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
29577c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29587c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
29597c478bd9Sstevel@tonic-gate 					&hca->srv_conn_list);
29607c478bd9Sstevel@tonic-gate 			} else {
29617c478bd9Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
29627c478bd9Sstevel@tonic-gate 			}
29637c478bd9Sstevel@tonic-gate #ifdef DEBUG
29647c478bd9Sstevel@tonic-gate 			if (rib_debug)
29657c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29667c478bd9Sstevel@tonic-gate 					" (CONN_CLOSED) channel disconnected");
29677c478bd9Sstevel@tonic-gate #endif
29687c478bd9Sstevel@tonic-gate 			break;
29697c478bd9Sstevel@tonic-gate 		}
29707c478bd9Sstevel@tonic-gate 		break;
29717c478bd9Sstevel@tonic-gate 	}
29727c478bd9Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
29737c478bd9Sstevel@tonic-gate 	/*
29747c478bd9Sstevel@tonic-gate 	 * RTU received, hence connection established.
29757c478bd9Sstevel@tonic-gate 	 */
29767c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
29777c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29787c478bd9Sstevel@tonic-gate 				"(CONN_EST) channel established");
29797c478bd9Sstevel@tonic-gate 		break;
29807c478bd9Sstevel@tonic-gate 
29817c478bd9Sstevel@tonic-gate 	default:
29827c478bd9Sstevel@tonic-gate 	    if (rib_debug > 2) {
29837c478bd9Sstevel@tonic-gate 		/* Let CM handle the following events. */
29847c478bd9Sstevel@tonic-gate 		if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
29857c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29867c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
29877c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
29887c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29897c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
29907c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
29917c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29927c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
29937c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
29947c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29957c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
29967c478bd9Sstevel@tonic-gate 		} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
29977c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
29987c478bd9Sstevel@tonic-gate 			    "server recv'ed IBT_CM_EVENT_FAILURE\n");
29997c478bd9Sstevel@tonic-gate 		}
30007c478bd9Sstevel@tonic-gate 	    }
30017c478bd9Sstevel@tonic-gate 	    return (IBT_CM_REJECT);
30027c478bd9Sstevel@tonic-gate 	}
30037c478bd9Sstevel@tonic-gate 
30047c478bd9Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
30057c478bd9Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
30067c478bd9Sstevel@tonic-gate }
30077c478bd9Sstevel@tonic-gate 
30087c478bd9Sstevel@tonic-gate static rdma_stat
30097c478bd9Sstevel@tonic-gate rib_register_ats(rib_hca_t *hca)
30107c478bd9Sstevel@tonic-gate {
30117c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
30127c478bd9Sstevel@tonic-gate 	uint_t			port_size;
30137c478bd9Sstevel@tonic-gate 	uint_t			pki, i, num_ports, nbinds;
30147c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
30157c478bd9Sstevel@tonic-gate 	rib_service_t		*new_service, *temp_srv;
30167c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
30177c478bd9Sstevel@tonic-gate 	rpcib_ibd_insts_t	ibds;
30187c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
30197c478bd9Sstevel@tonic-gate 	ibt_ar_t		ar;	/* address record */
30207c478bd9Sstevel@tonic-gate 
30217c478bd9Sstevel@tonic-gate 	/*
30227c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
30237c478bd9Sstevel@tonic-gate 	 */
30247c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
30257c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
30267c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
30277c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
30287c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30297c478bd9Sstevel@tonic-gate 	} else {
30307c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30317c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30327c478bd9Sstevel@tonic-gate 	}
30337c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
30347c478bd9Sstevel@tonic-gate #ifdef DEBUG
30357c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
30367c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_ats: FAILED in "
30377c478bd9Sstevel@tonic-gate 		    "ibt_query_hca_ports, status = %d\n", ibt_status);
30387c478bd9Sstevel@tonic-gate 	    }
30397c478bd9Sstevel@tonic-gate #endif
30407c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
30417c478bd9Sstevel@tonic-gate 	}
30427c478bd9Sstevel@tonic-gate 
30437c478bd9Sstevel@tonic-gate #ifdef	DEBUG
30447c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
30457c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_ats: Ports detected "
30467c478bd9Sstevel@tonic-gate 		    "%d\n", num_ports);
30477c478bd9Sstevel@tonic-gate 
30487c478bd9Sstevel@tonic-gate 		for (i = 0; i < num_ports; i++) {
30497c478bd9Sstevel@tonic-gate 			if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
30507c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_ats "
30517c478bd9Sstevel@tonic-gate 				    "Port #: %d INACTIVE\n", i+1);
30527c478bd9Sstevel@tonic-gate 			} else if (port_infop[i].p_linkstate ==
30537c478bd9Sstevel@tonic-gate 			    IBT_PORT_ACTIVE) {
30547c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_ats "
30557c478bd9Sstevel@tonic-gate 				    "Port #: %d ACTIVE\n", i+1);
30567c478bd9Sstevel@tonic-gate 			}
30577c478bd9Sstevel@tonic-gate 		}
30587c478bd9Sstevel@tonic-gate 	}
30597c478bd9Sstevel@tonic-gate #endif
30607c478bd9Sstevel@tonic-gate 
30617c478bd9Sstevel@tonic-gate 	ibds.rib_ibd_alloc = N_IBD_INSTANCES;
30627c478bd9Sstevel@tonic-gate 	ibds.rib_ibd_cnt = 0;
30637c478bd9Sstevel@tonic-gate 	ibds.rib_ats = (rpcib_ats_t *)kmem_zalloc(ibds.rib_ibd_alloc *
30647c478bd9Sstevel@tonic-gate 			sizeof (rpcib_ats_t), KM_SLEEP);
30657c478bd9Sstevel@tonic-gate 	rib_get_ibd_insts(&ibds);
30667c478bd9Sstevel@tonic-gate 
30677c478bd9Sstevel@tonic-gate 	if (ibds.rib_ibd_cnt == 0) {
30687c478bd9Sstevel@tonic-gate 	    kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc *
30697c478bd9Sstevel@tonic-gate 				sizeof (rpcib_ats_t));
30707c478bd9Sstevel@tonic-gate 	    ibt_free_portinfo(port_infop, port_size);
30717c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
30727c478bd9Sstevel@tonic-gate 	}
30737c478bd9Sstevel@tonic-gate 
30747c478bd9Sstevel@tonic-gate 	/*
30757c478bd9Sstevel@tonic-gate 	 * Get the IP addresses of active ports and
30767c478bd9Sstevel@tonic-gate 	 * register them with ATS.  IPv4 addresses
30777c478bd9Sstevel@tonic-gate 	 * have precedence over IPv6 addresses.
30787c478bd9Sstevel@tonic-gate 	 */
30797c478bd9Sstevel@tonic-gate 	if (get_ibd_ipaddr(&ibds) != 0) {
30807c478bd9Sstevel@tonic-gate #ifdef	DEBUG
30817c478bd9Sstevel@tonic-gate 	    if (rib_debug > 1) {
30827c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_ats: "
30837c478bd9Sstevel@tonic-gate 		    "get_ibd_ipaddr failed");
30847c478bd9Sstevel@tonic-gate 	    }
30857c478bd9Sstevel@tonic-gate #endif
30867c478bd9Sstevel@tonic-gate 	    kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc *
30877c478bd9Sstevel@tonic-gate 				sizeof (rpcib_ats_t));
30887c478bd9Sstevel@tonic-gate 	    ibt_free_portinfo(port_infop, port_size);
30897c478bd9Sstevel@tonic-gate 	    return (RDMA_FAILED);
30907c478bd9Sstevel@tonic-gate 	}
30917c478bd9Sstevel@tonic-gate 
30927c478bd9Sstevel@tonic-gate 	/*
30937c478bd9Sstevel@tonic-gate 	 * Start ATS registration for active ports on this HCA.
30947c478bd9Sstevel@tonic-gate 	 */
30957c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
30967c478bd9Sstevel@tonic-gate 	nbinds = 0;
30977c478bd9Sstevel@tonic-gate 	new_service = NULL;
30987c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_ports; i++) {
30997c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
31007c478bd9Sstevel@tonic-gate 			continue;
31017c478bd9Sstevel@tonic-gate 
31027c478bd9Sstevel@tonic-gate 	    for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
31037c478bd9Sstevel@tonic-gate 		pkey = port_infop[i].p_pkey_tbl[pki];
31047c478bd9Sstevel@tonic-gate 		if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) {
31057c478bd9Sstevel@tonic-gate 		    ar.ar_gid = port_infop[i].p_sgid_tbl[0];
31067c478bd9Sstevel@tonic-gate 		    ar.ar_pkey = pkey;
31077c478bd9Sstevel@tonic-gate 		    atsp = get_ibd_entry(&ar.ar_gid, pkey, &ibds);
31087c478bd9Sstevel@tonic-gate 		    if (atsp == NULL)
31097c478bd9Sstevel@tonic-gate 			continue;
31107c478bd9Sstevel@tonic-gate 		/*
31117c478bd9Sstevel@tonic-gate 		 * store the sin[6]_addr in ar_data
31127c478bd9Sstevel@tonic-gate 		 */
31137c478bd9Sstevel@tonic-gate 		    (void) bzero(ar.ar_data, ATS_AR_DATA_LEN);
31147c478bd9Sstevel@tonic-gate 		    if (atsp->ras_inet_type == AF_INET) {
31157c478bd9Sstevel@tonic-gate 			uint8_t *start_pos;
31167c478bd9Sstevel@tonic-gate 
31177c478bd9Sstevel@tonic-gate 			/*
31187c478bd9Sstevel@tonic-gate 			 * The ipv4 addr goes into the last
31197c478bd9Sstevel@tonic-gate 			 * four bytes of ar_data.
31207c478bd9Sstevel@tonic-gate 			 */
31217c478bd9Sstevel@tonic-gate 			start_pos = ar.ar_data + ATS_AR_DATA_LEN -
31227c478bd9Sstevel@tonic-gate 				sizeof (struct in_addr);
31237c478bd9Sstevel@tonic-gate 			bcopy(&atsp->ras_sin.sin_addr, start_pos,
31247c478bd9Sstevel@tonic-gate 				sizeof (struct in_addr));
31257c478bd9Sstevel@tonic-gate 		    } else if (atsp->ras_inet_type == AF_INET6) {
31267c478bd9Sstevel@tonic-gate 			bcopy(&atsp->ras_sin6.sin6_addr, ar.ar_data,
31277c478bd9Sstevel@tonic-gate 				sizeof (struct in6_addr));
31287c478bd9Sstevel@tonic-gate 		    } else
31297c478bd9Sstevel@tonic-gate 			continue;
31307c478bd9Sstevel@tonic-gate 
31317c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_register_ar(hca->ibt_clnt_hdl, &ar);
31327c478bd9Sstevel@tonic-gate 		    if (ibt_status == IBT_SUCCESS) {
31337c478bd9Sstevel@tonic-gate #ifdef	DEBUG
31347c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
31357c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_ats: "
31367c478bd9Sstevel@tonic-gate 				    "ibt_register_ar OK on port %d", i+1);
31377c478bd9Sstevel@tonic-gate 			}
31387c478bd9Sstevel@tonic-gate #endif
31397c478bd9Sstevel@tonic-gate 			/*
31407c478bd9Sstevel@tonic-gate 			 * Allocate and prepare a service entry
31417c478bd9Sstevel@tonic-gate 			 */
31427c478bd9Sstevel@tonic-gate 			new_service = kmem_zalloc(sizeof (rib_service_t),
31437c478bd9Sstevel@tonic-gate 				KM_SLEEP);
31447c478bd9Sstevel@tonic-gate 			new_service->srv_port = i + 1;
31457c478bd9Sstevel@tonic-gate 			new_service->srv_ar = ar;
31467c478bd9Sstevel@tonic-gate 			new_service->srv_next = NULL;
31477c478bd9Sstevel@tonic-gate 
31487c478bd9Sstevel@tonic-gate 			/*
31497c478bd9Sstevel@tonic-gate 			 * Add to the service list for this HCA
31507c478bd9Sstevel@tonic-gate 			 */
31517c478bd9Sstevel@tonic-gate 			new_service->srv_next = hca->ats_list;
31527c478bd9Sstevel@tonic-gate 			hca->ats_list = new_service;
31537c478bd9Sstevel@tonic-gate 			new_service = NULL;
31547c478bd9Sstevel@tonic-gate 			nbinds ++;
31557c478bd9Sstevel@tonic-gate 		    } else {
31567c478bd9Sstevel@tonic-gate #ifdef	DEBUG
31577c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
31587c478bd9Sstevel@tonic-gate 			    cmn_err(CE_WARN, "rib_register_ats: "
31597c478bd9Sstevel@tonic-gate 			    "ibt_register_ar FAILED on port %d", i+1);
31607c478bd9Sstevel@tonic-gate 			}
31617c478bd9Sstevel@tonic-gate #endif
31627c478bd9Sstevel@tonic-gate 		    }
31637c478bd9Sstevel@tonic-gate 		}
31647c478bd9Sstevel@tonic-gate 	    }
31657c478bd9Sstevel@tonic-gate 	}
31667c478bd9Sstevel@tonic-gate 
31677c478bd9Sstevel@tonic-gate #ifdef	DEBUG
31687c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
31697c478bd9Sstevel@tonic-gate 		for (temp_srv = hca->ats_list; temp_srv != NULL;
31707c478bd9Sstevel@tonic-gate 			temp_srv = temp_srv->srv_next) {
31717c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "Service: ATS, active on"
31727c478bd9Sstevel@tonic-gate 					" port: %d\n", temp_srv->srv_port);
31737c478bd9Sstevel@tonic-gate 		}
31747c478bd9Sstevel@tonic-gate 	}
31757c478bd9Sstevel@tonic-gate #endif
31767c478bd9Sstevel@tonic-gate 
31777c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
31787c478bd9Sstevel@tonic-gate 	kmem_free(ibds.rib_ats, ibds.rib_ibd_alloc * sizeof (rpcib_ats_t));
31797c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
31807c478bd9Sstevel@tonic-gate 
31817c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
31827c478bd9Sstevel@tonic-gate #ifdef	DEBUG
31837c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
31847c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_ats FAILED!\n");
31857c478bd9Sstevel@tonic-gate 	}
31867c478bd9Sstevel@tonic-gate #endif
31877c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
31887c478bd9Sstevel@tonic-gate 	}
31897c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
31907c478bd9Sstevel@tonic-gate }
31917c478bd9Sstevel@tonic-gate 
31927c478bd9Sstevel@tonic-gate static rdma_stat
31937c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type)
31947c478bd9Sstevel@tonic-gate {
31957c478bd9Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
31967c478bd9Sstevel@tonic-gate 	ibt_srv_bind_t		sbind;
31977c478bd9Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
31987c478bd9Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
31997c478bd9Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
32007c478bd9Sstevel@tonic-gate 	uint_t			port_size;
32017c478bd9Sstevel@tonic-gate 	uint_t			pki, i, j, num_ports, nbinds;
32027c478bd9Sstevel@tonic-gate 	ibt_status_t		ibt_status;
32037c478bd9Sstevel@tonic-gate 	char			**addrs;
32047c478bd9Sstevel@tonic-gate 	int			addr_count;
32057c478bd9Sstevel@tonic-gate 	rib_service_t		*new_service, *temp_srv;
32067c478bd9Sstevel@tonic-gate 	ib_pkey_t		pkey;
32077c478bd9Sstevel@tonic-gate 
32087c478bd9Sstevel@tonic-gate 	/*
32097c478bd9Sstevel@tonic-gate 	 * Query all ports for the given HCA
32107c478bd9Sstevel@tonic-gate 	 */
32117c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
32127c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
32137c478bd9Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
32147c478bd9Sstevel@tonic-gate 		    &num_ports, &port_size);
32157c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32167c478bd9Sstevel@tonic-gate 	} else {
32177c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32187c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
32197c478bd9Sstevel@tonic-gate 	}
32207c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
32217c478bd9Sstevel@tonic-gate #ifdef DEBUG
32227c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_service: FAILED in "
32237c478bd9Sstevel@tonic-gate 		    "ibt_query_hca_ports, status = %d\n", ibt_status);
32247c478bd9Sstevel@tonic-gate #endif
32257c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
32267c478bd9Sstevel@tonic-gate 	}
32277c478bd9Sstevel@tonic-gate 
32287c478bd9Sstevel@tonic-gate #ifdef	DEBUG
32297c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
32307c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "rib_register_service: Ports detected "
32317c478bd9Sstevel@tonic-gate 		    "%d\n", num_ports);
32327c478bd9Sstevel@tonic-gate 
32337c478bd9Sstevel@tonic-gate 		for (i = 0; i < num_ports; i++) {
32347c478bd9Sstevel@tonic-gate 			if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
32357c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_service "
32367c478bd9Sstevel@tonic-gate 				    "Port #: %d INACTIVE\n", i+1);
32377c478bd9Sstevel@tonic-gate 			} else if (port_infop[i].p_linkstate ==
32387c478bd9Sstevel@tonic-gate 			    IBT_PORT_ACTIVE) {
32397c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_service "
32407c478bd9Sstevel@tonic-gate 				    "Port #: %d ACTIVE\n", i+1);
32417c478bd9Sstevel@tonic-gate 			}
32427c478bd9Sstevel@tonic-gate 		}
32437c478bd9Sstevel@tonic-gate 	}
32447c478bd9Sstevel@tonic-gate #endif
32457c478bd9Sstevel@tonic-gate 	/*
32467c478bd9Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
32477c478bd9Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
32487c478bd9Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
32497c478bd9Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
32507c478bd9Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
32517c478bd9Sstevel@tonic-gate 	 */
32527c478bd9Sstevel@tonic-gate 	addrs = get_ip_addrs(&addr_count);
32537c478bd9Sstevel@tonic-gate 	if (addrs == NULL) {
32547c478bd9Sstevel@tonic-gate #ifdef DEBUG
32557c478bd9Sstevel@tonic-gate 		if (rib_debug) {
32567c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_register_service: "
32577c478bd9Sstevel@tonic-gate 			"get_ip_addrs failed\n");
32587c478bd9Sstevel@tonic-gate 		}
32597c478bd9Sstevel@tonic-gate #endif
32607c478bd9Sstevel@tonic-gate 		ibt_free_portinfo(port_infop, port_size);
32617c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
32627c478bd9Sstevel@tonic-gate 	}
32637c478bd9Sstevel@tonic-gate 
32647c478bd9Sstevel@tonic-gate #ifdef	DEBUG
32657c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
32667c478bd9Sstevel@tonic-gate 		for (i = 0; i < addr_count; i++)
32677c478bd9Sstevel@tonic-gate 			cmn_err(CE_NOTE, "addr %d: %s\n", i, addrs[i]);
32687c478bd9Sstevel@tonic-gate 	}
32697c478bd9Sstevel@tonic-gate #endif
32707c478bd9Sstevel@tonic-gate 
32717c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
32727c478bd9Sstevel@tonic-gate 	/*
32737c478bd9Sstevel@tonic-gate 	 * Start registering and binding service to active
32747c478bd9Sstevel@tonic-gate 	 * on active ports on this HCA.
32757c478bd9Sstevel@tonic-gate 	 */
32767c478bd9Sstevel@tonic-gate 	nbinds = 0;
32777c478bd9Sstevel@tonic-gate 	new_service = NULL;
32787c478bd9Sstevel@tonic-gate 
32797c478bd9Sstevel@tonic-gate 	/*
32807c478bd9Sstevel@tonic-gate 	 * We use IP addresses as the service names for
32817c478bd9Sstevel@tonic-gate 	 * service registration.  Register each of them
32827c478bd9Sstevel@tonic-gate 	 * with CM to obtain a svc_id and svc_hdl.  We do not
32837c478bd9Sstevel@tonic-gate 	 * register the service with machine's loopback address.
32847c478bd9Sstevel@tonic-gate 	 */
32857c478bd9Sstevel@tonic-gate 	for (j = 1; j < addr_count; j++) {
32867c478bd9Sstevel@tonic-gate 	    (void) bzero(&srv_id, sizeof (ib_svc_id_t));
32877c478bd9Sstevel@tonic-gate 	    (void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
32887c478bd9Sstevel@tonic-gate 	    (void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
32897c478bd9Sstevel@tonic-gate 
32907c478bd9Sstevel@tonic-gate 	    sdesc.sd_handler = rib_srv_cm_handler;
32917c478bd9Sstevel@tonic-gate 	    sdesc.sd_flags = 0;
32927c478bd9Sstevel@tonic-gate 
32937c478bd9Sstevel@tonic-gate 	    ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
32947c478bd9Sstevel@tonic-gate 			    &sdesc, 0, 1, &srv_hdl, &srv_id);
32957c478bd9Sstevel@tonic-gate 	    if (ibt_status != IBT_SUCCESS) {
32967c478bd9Sstevel@tonic-gate #ifdef DEBUG
32977c478bd9Sstevel@tonic-gate 		if (rib_debug) {
32987c478bd9Sstevel@tonic-gate 		    cmn_err(CE_WARN, "rib_register_service: "
32997c478bd9Sstevel@tonic-gate 			"ibt_register_service FAILED, status "
33007c478bd9Sstevel@tonic-gate 			"= %d\n", ibt_status);
33017c478bd9Sstevel@tonic-gate 		}
33027c478bd9Sstevel@tonic-gate #endif
33037c478bd9Sstevel@tonic-gate 		/*
33047c478bd9Sstevel@tonic-gate 		 * No need to go on, since we failed to obtain
33057c478bd9Sstevel@tonic-gate 		 * a srv_id and srv_hdl. Move on to the next
33067c478bd9Sstevel@tonic-gate 		 * IP addr as a service name.
33077c478bd9Sstevel@tonic-gate 		 */
33087c478bd9Sstevel@tonic-gate 		continue;
33097c478bd9Sstevel@tonic-gate 	    }
33107c478bd9Sstevel@tonic-gate 	    for (i = 0; i < num_ports; i++) {
33117c478bd9Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
33127c478bd9Sstevel@tonic-gate 			continue;
33137c478bd9Sstevel@tonic-gate 
33147c478bd9Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
33157c478bd9Sstevel@tonic-gate 		    pkey = port_infop[i].p_pkey_tbl[pki];
33167c478bd9Sstevel@tonic-gate 		    if ((pkey & IBSRM_HB) && (pkey != IB_PKEY_INVALID_FULL)) {
33177c478bd9Sstevel@tonic-gate 
33187c478bd9Sstevel@tonic-gate 			/*
33197c478bd9Sstevel@tonic-gate 			 * Allocate and prepare a service entry
33207c478bd9Sstevel@tonic-gate 			 */
33217c478bd9Sstevel@tonic-gate 			new_service = kmem_zalloc(1 * sizeof (rib_service_t),
33227c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
33237c478bd9Sstevel@tonic-gate 			new_service->srv_type = service_type;
33247c478bd9Sstevel@tonic-gate 			new_service->srv_port = i + 1;
33257c478bd9Sstevel@tonic-gate 			new_service->srv_id = srv_id;
33267c478bd9Sstevel@tonic-gate 			new_service->srv_hdl = srv_hdl;
33277c478bd9Sstevel@tonic-gate 			new_service->srv_sbind_hdl = kmem_zalloc(1 *
33287c478bd9Sstevel@tonic-gate 			    sizeof (ibt_sbind_hdl_t), KM_SLEEP);
33297c478bd9Sstevel@tonic-gate 
33307c478bd9Sstevel@tonic-gate 			new_service->srv_name = kmem_zalloc(IB_SVC_NAME_LEN,
33317c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
33327c478bd9Sstevel@tonic-gate 			(void) bcopy(addrs[j], new_service->srv_name,
33337c478bd9Sstevel@tonic-gate 			    IB_SVC_NAME_LEN);
33347c478bd9Sstevel@tonic-gate 			(void) strlcat(new_service->srv_name, "::NFS",
33357c478bd9Sstevel@tonic-gate 				IB_SVC_NAME_LEN);
33367c478bd9Sstevel@tonic-gate 			new_service->srv_next = NULL;
33377c478bd9Sstevel@tonic-gate 
33387c478bd9Sstevel@tonic-gate 			/*
33397c478bd9Sstevel@tonic-gate 			 * Bind the service, specified by the IP address,
33407c478bd9Sstevel@tonic-gate 			 * to the port/pkey using the srv_hdl returned
33417c478bd9Sstevel@tonic-gate 			 * from ibt_register_service().
33427c478bd9Sstevel@tonic-gate 			 */
33437c478bd9Sstevel@tonic-gate 			(void) bzero(&sbind, sizeof (ibt_srv_bind_t));
33447c478bd9Sstevel@tonic-gate 			sbind.sb_pkey = pkey;
33457c478bd9Sstevel@tonic-gate 			sbind.sb_lease = 0xFFFFFFFF;
33467c478bd9Sstevel@tonic-gate 			sbind.sb_key[0] = NFS_SEC_KEY0;
33477c478bd9Sstevel@tonic-gate 			sbind.sb_key[1] = NFS_SEC_KEY1;
33487c478bd9Sstevel@tonic-gate 			sbind.sb_name = new_service->srv_name;
33497c478bd9Sstevel@tonic-gate 
33507c478bd9Sstevel@tonic-gate #ifdef	DEBUG
33517c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
33527c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_register_service: "
33537c478bd9Sstevel@tonic-gate 				    "binding service using name: %s\n",
33547c478bd9Sstevel@tonic-gate 				    sbind.sb_name);
33557c478bd9Sstevel@tonic-gate 			}
33567c478bd9Sstevel@tonic-gate #endif
33577c478bd9Sstevel@tonic-gate 			ibt_status = ibt_bind_service(srv_hdl,
33587c478bd9Sstevel@tonic-gate 			    port_infop[i].p_sgid_tbl[0], &sbind, rib_stat,
33597c478bd9Sstevel@tonic-gate 			    new_service->srv_sbind_hdl);
33607c478bd9Sstevel@tonic-gate 			if (ibt_status != IBT_SUCCESS) {
33617c478bd9Sstevel@tonic-gate #ifdef	DEBUG
33627c478bd9Sstevel@tonic-gate 			    if (rib_debug) {
33637c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "rib_register_service: FAILED"
33647c478bd9Sstevel@tonic-gate 				    " in ibt_bind_service, status = %d\n",
33657c478bd9Sstevel@tonic-gate 				    ibt_status);
33667c478bd9Sstevel@tonic-gate 			    }
33677c478bd9Sstevel@tonic-gate #endif
33687c478bd9Sstevel@tonic-gate 				kmem_free(new_service->srv_sbind_hdl,
33697c478bd9Sstevel@tonic-gate 				    sizeof (ibt_sbind_hdl_t));
33707c478bd9Sstevel@tonic-gate 				kmem_free(new_service->srv_name,
33717c478bd9Sstevel@tonic-gate 				    IB_SVC_NAME_LEN);
33727c478bd9Sstevel@tonic-gate 				kmem_free(new_service,
33737c478bd9Sstevel@tonic-gate 				    sizeof (rib_service_t));
33747c478bd9Sstevel@tonic-gate 				new_service = NULL;
33757c478bd9Sstevel@tonic-gate 				continue;
33767c478bd9Sstevel@tonic-gate 			}
33777c478bd9Sstevel@tonic-gate #ifdef	DEBUG
33787c478bd9Sstevel@tonic-gate 			if (rib_debug > 1) {
33797c478bd9Sstevel@tonic-gate 				if (ibt_status == IBT_SUCCESS)
33807c478bd9Sstevel@tonic-gate 					cmn_err(CE_NOTE, "rib_regstr_service: "
33817c478bd9Sstevel@tonic-gate 					    "Serv: %s REGISTERED on port: %d",
33827c478bd9Sstevel@tonic-gate 					    sbind.sb_name, i+1);
33837c478bd9Sstevel@tonic-gate 			}
33847c478bd9Sstevel@tonic-gate #endif
33857c478bd9Sstevel@tonic-gate 			/*
33867c478bd9Sstevel@tonic-gate 			 * Add to the service list for this HCA
33877c478bd9Sstevel@tonic-gate 			 */
33887c478bd9Sstevel@tonic-gate 			new_service->srv_next = hca->service_list;
33897c478bd9Sstevel@tonic-gate 			hca->service_list = new_service;
33907c478bd9Sstevel@tonic-gate 			new_service = NULL;
33917c478bd9Sstevel@tonic-gate 			nbinds ++;
33927c478bd9Sstevel@tonic-gate 		    }
33937c478bd9Sstevel@tonic-gate 		}
33947c478bd9Sstevel@tonic-gate 	    }
33957c478bd9Sstevel@tonic-gate 	}
33967c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
33977c478bd9Sstevel@tonic-gate 
33987c478bd9Sstevel@tonic-gate #ifdef	DEBUG
33997c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
34007c478bd9Sstevel@tonic-gate 		/*
34017c478bd9Sstevel@tonic-gate 		 * Change this print to a more generic one, as rpcib
34027c478bd9Sstevel@tonic-gate 		 * is supposed to handle multiple service types.
34037c478bd9Sstevel@tonic-gate 		 */
34047c478bd9Sstevel@tonic-gate 		for (temp_srv = hca->service_list; temp_srv != NULL;
34057c478bd9Sstevel@tonic-gate 			temp_srv = temp_srv->srv_next) {
34067c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "NFS-IB, active on port:"
34077c478bd9Sstevel@tonic-gate 					" %d\n"
34087c478bd9Sstevel@tonic-gate 					"Using name: %s", temp_srv->srv_port,
34097c478bd9Sstevel@tonic-gate 					temp_srv->srv_name);
34107c478bd9Sstevel@tonic-gate 		}
34117c478bd9Sstevel@tonic-gate 	}
34127c478bd9Sstevel@tonic-gate #endif
34137c478bd9Sstevel@tonic-gate 
34147c478bd9Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
34157c478bd9Sstevel@tonic-gate 	for (i = 0; i < addr_count; i++) {
34167c478bd9Sstevel@tonic-gate 		if (addrs[i])
34177c478bd9Sstevel@tonic-gate 			kmem_free(addrs[i], IB_SVC_NAME_LEN);
34187c478bd9Sstevel@tonic-gate 	}
34197c478bd9Sstevel@tonic-gate 	kmem_free(addrs, addr_count * sizeof (char *));
34207c478bd9Sstevel@tonic-gate 
34217c478bd9Sstevel@tonic-gate 	if (nbinds == 0) {
34227c478bd9Sstevel@tonic-gate #ifdef	DEBUG
34237c478bd9Sstevel@tonic-gate 	    if (rib_debug) {
34247c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_register_service: "
34257c478bd9Sstevel@tonic-gate 		    "bind_service FAILED!\n");
34267c478bd9Sstevel@tonic-gate 	    }
34277c478bd9Sstevel@tonic-gate #endif
34287c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
34297c478bd9Sstevel@tonic-gate 	} else {
34307c478bd9Sstevel@tonic-gate 		/*
34317c478bd9Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
34327c478bd9Sstevel@tonic-gate 		 * one registration was successful.
34337c478bd9Sstevel@tonic-gate 		 */
34347c478bd9Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
34357c478bd9Sstevel@tonic-gate 		plugin_state = ACCEPT;
34367c478bd9Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
34377c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
34387c478bd9Sstevel@tonic-gate 	}
34397c478bd9Sstevel@tonic-gate }
34407c478bd9Sstevel@tonic-gate 
34417c478bd9Sstevel@tonic-gate void
34427c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
34437c478bd9Sstevel@tonic-gate {
34447c478bd9Sstevel@tonic-gate 	rdma_stat status = RDMA_SUCCESS;
34457c478bd9Sstevel@tonic-gate 
34467c478bd9Sstevel@tonic-gate 	rd->active = 0;
34477c478bd9Sstevel@tonic-gate 	rd->err_code = RDMA_FAILED;
34487c478bd9Sstevel@tonic-gate 
34497c478bd9Sstevel@tonic-gate 	/*
34507c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
34517c478bd9Sstevel@tonic-gate 	 */
34527c478bd9Sstevel@tonic-gate 	rw_enter(&rib_stat->hca->state_lock, RW_READER);
34537c478bd9Sstevel@tonic-gate 	if (rib_stat->hca->state != HCA_INITED) {
34547c478bd9Sstevel@tonic-gate 		rw_exit(&rib_stat->hca->state_lock);
34557c478bd9Sstevel@tonic-gate 		return;
34567c478bd9Sstevel@tonic-gate 	}
34577c478bd9Sstevel@tonic-gate 	rw_exit(&rib_stat->hca->state_lock);
34587c478bd9Sstevel@tonic-gate 
34597c478bd9Sstevel@tonic-gate 	rib_stat->q = &rd->q;
34607c478bd9Sstevel@tonic-gate 	/*
34617c478bd9Sstevel@tonic-gate 	 * Register the Address translation service
34627c478bd9Sstevel@tonic-gate 	 */
34637c478bd9Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
34647c478bd9Sstevel@tonic-gate 	if (ats_running == 0) {
34657c478bd9Sstevel@tonic-gate 		if (rib_register_ats(rib_stat->hca) != RDMA_SUCCESS) {
34667c478bd9Sstevel@tonic-gate #ifdef	DEBUG
34677c478bd9Sstevel@tonic-gate 		    if (rib_debug) {
34687c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
34697c478bd9Sstevel@tonic-gate 			    "rib_listen(): ats registration failed!");
34707c478bd9Sstevel@tonic-gate 		    }
34717c478bd9Sstevel@tonic-gate #endif
34727c478bd9Sstevel@tonic-gate 		    mutex_exit(&rib_stat->open_hca_lock);
34737c478bd9Sstevel@tonic-gate 		    return;
34747c478bd9Sstevel@tonic-gate 		} else {
34757c478bd9Sstevel@tonic-gate 			ats_running = 1;
34767c478bd9Sstevel@tonic-gate 		}
34777c478bd9Sstevel@tonic-gate 	}
34787c478bd9Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
34797c478bd9Sstevel@tonic-gate 
34807c478bd9Sstevel@tonic-gate 	/*
34817c478bd9Sstevel@tonic-gate 	 * Right now the only service type is NFS. Hence force feed this
34827c478bd9Sstevel@tonic-gate 	 * value. Ideally to communicate the service type it should be
34837c478bd9Sstevel@tonic-gate 	 * passed down in rdma_svc_data.
34847c478bd9Sstevel@tonic-gate 	 */
34857c478bd9Sstevel@tonic-gate 	rib_stat->service_type = NFS;
34867c478bd9Sstevel@tonic-gate 	status = rib_register_service(rib_stat->hca, NFS);
34877c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
34887c478bd9Sstevel@tonic-gate 		rd->err_code = status;
34897c478bd9Sstevel@tonic-gate 		return;
34907c478bd9Sstevel@tonic-gate 	}
34917c478bd9Sstevel@tonic-gate 	/*
34927c478bd9Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
34937c478bd9Sstevel@tonic-gate 	 * explainable errors.
34947c478bd9Sstevel@tonic-gate 	 */
34957c478bd9Sstevel@tonic-gate 	rd->active = 1;
34967c478bd9Sstevel@tonic-gate 	rd->err_code = status;
34977c478bd9Sstevel@tonic-gate }
34987c478bd9Sstevel@tonic-gate 
34997c478bd9Sstevel@tonic-gate /* XXXX */
35007c478bd9Sstevel@tonic-gate /* ARGSUSED */
35017c478bd9Sstevel@tonic-gate static void
35027c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
35037c478bd9Sstevel@tonic-gate {
35047c478bd9Sstevel@tonic-gate 	rib_hca_t		*hca;
35057c478bd9Sstevel@tonic-gate 
35067c478bd9Sstevel@tonic-gate 	/*
35077c478bd9Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
35087c478bd9Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
35097c478bd9Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
35107c478bd9Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
35117c478bd9Sstevel@tonic-gate 	 */
35127c478bd9Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
35137c478bd9Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
35147c478bd9Sstevel@tonic-gate 	if (svcdata != NULL)
35157c478bd9Sstevel@tonic-gate 		svcdata->active = 0;
35167c478bd9Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
35177c478bd9Sstevel@tonic-gate 
35187c478bd9Sstevel@tonic-gate 	/*
35197c478bd9Sstevel@tonic-gate 	 * First check if a hca is still attached
35207c478bd9Sstevel@tonic-gate 	 */
35217c478bd9Sstevel@tonic-gate 	hca = rib_stat->hca;
35227c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
35237c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
35247c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
35257c478bd9Sstevel@tonic-gate 		return;
35267c478bd9Sstevel@tonic-gate 	}
35277c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
35287c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
35297c478bd9Sstevel@tonic-gate }
35307c478bd9Sstevel@tonic-gate 
35317c478bd9Sstevel@tonic-gate /*
35327c478bd9Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
35337c478bd9Sstevel@tonic-gate  * Instead of unbinding the service for a service handle by
35347c478bd9Sstevel@tonic-gate  * calling ibt_unbind_service() for each port/pkey, we unbind
35357c478bd9Sstevel@tonic-gate  * all the services for the service handle by making only one
35367c478bd9Sstevel@tonic-gate  * call to ibt_unbind_all_services().  Then, we deregister the
35377c478bd9Sstevel@tonic-gate  * service for the service handle.
35387c478bd9Sstevel@tonic-gate  *
35397c478bd9Sstevel@tonic-gate  * When traversing the entries in service_list, we compare the
35407c478bd9Sstevel@tonic-gate  * srv_hdl of the current entry with that of the next.  If they
35417c478bd9Sstevel@tonic-gate  * are different or if the next entry is NULL, the current entry
35427c478bd9Sstevel@tonic-gate  * marks the last binding of the service handle.  In this case,
35437c478bd9Sstevel@tonic-gate  * call ibt_unbind_all_services() and deregister the service for
35447c478bd9Sstevel@tonic-gate  * the service handle.  If they are the same, the current and the
35457c478bd9Sstevel@tonic-gate  * next entries are bound to the same service handle.  In this
35467c478bd9Sstevel@tonic-gate  * case, move on to the next entry.
35477c478bd9Sstevel@tonic-gate  */
35487c478bd9Sstevel@tonic-gate static void
35497c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
35507c478bd9Sstevel@tonic-gate {
35517c478bd9Sstevel@tonic-gate 	rib_service_t		*srv_list, *to_remove;
35527c478bd9Sstevel@tonic-gate 	ibt_status_t   		ibt_status;
35537c478bd9Sstevel@tonic-gate 
35547c478bd9Sstevel@tonic-gate 	/*
35557c478bd9Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
35567c478bd9Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
35577c478bd9Sstevel@tonic-gate 	 * be passed down to this function.
35587c478bd9Sstevel@tonic-gate 	 */
35597c478bd9Sstevel@tonic-gate 	rw_enter(&hca->service_list_lock, RW_WRITER);
35607c478bd9Sstevel@tonic-gate 	srv_list = hca->service_list;
35617c478bd9Sstevel@tonic-gate 	while (srv_list != NULL) {
35627c478bd9Sstevel@tonic-gate 		to_remove = srv_list;
35637c478bd9Sstevel@tonic-gate 		srv_list = to_remove->srv_next;
35647c478bd9Sstevel@tonic-gate 		if (srv_list == NULL || bcmp(to_remove->srv_hdl,
35657c478bd9Sstevel@tonic-gate 		    srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) {
35667c478bd9Sstevel@tonic-gate 
35677c478bd9Sstevel@tonic-gate 		    ibt_status = ibt_unbind_all_services(to_remove->srv_hdl);
35687c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
35697c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_listen_stop: "
35707c478bd9Sstevel@tonic-gate 			    "ibt_unbind_all_services FAILED"
35717c478bd9Sstevel@tonic-gate 				" status: %d\n", ibt_status);
35727c478bd9Sstevel@tonic-gate 		    }
35737c478bd9Sstevel@tonic-gate 
35747c478bd9Sstevel@tonic-gate 		    ibt_status =
35757c478bd9Sstevel@tonic-gate 			ibt_deregister_service(hca->ibt_clnt_hdl,
35767c478bd9Sstevel@tonic-gate 				to_remove->srv_hdl);
35777c478bd9Sstevel@tonic-gate 		    if (ibt_status != IBT_SUCCESS) {
35787c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_listen_stop: "
35797c478bd9Sstevel@tonic-gate 			    "ibt_deregister_service FAILED"
35807c478bd9Sstevel@tonic-gate 				" status: %d\n", ibt_status);
35817c478bd9Sstevel@tonic-gate 		    }
35827c478bd9Sstevel@tonic-gate 
35837c478bd9Sstevel@tonic-gate #ifdef	DEBUG
35847c478bd9Sstevel@tonic-gate 		    if (rib_debug > 1) {
35857c478bd9Sstevel@tonic-gate 			if (ibt_status == IBT_SUCCESS)
35867c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_listen_stop: "
35877c478bd9Sstevel@tonic-gate 				    "Successfully stopped and"
35887c478bd9Sstevel@tonic-gate 				    " UNREGISTERED service: %s\n",
35897c478bd9Sstevel@tonic-gate 				    to_remove->srv_name);
35907c478bd9Sstevel@tonic-gate 		    }
35917c478bd9Sstevel@tonic-gate #endif
35927c478bd9Sstevel@tonic-gate 		}
35937c478bd9Sstevel@tonic-gate 		kmem_free(to_remove->srv_name, IB_SVC_NAME_LEN);
35947c478bd9Sstevel@tonic-gate 		kmem_free(to_remove->srv_sbind_hdl,
35957c478bd9Sstevel@tonic-gate 			sizeof (ibt_sbind_hdl_t));
35967c478bd9Sstevel@tonic-gate 
35977c478bd9Sstevel@tonic-gate 		kmem_free(to_remove, sizeof (rib_service_t));
35987c478bd9Sstevel@tonic-gate 	}
35997c478bd9Sstevel@tonic-gate 	hca->service_list = NULL;
36007c478bd9Sstevel@tonic-gate 	rw_exit(&hca->service_list_lock);
36017c478bd9Sstevel@tonic-gate }
36027c478bd9Sstevel@tonic-gate 
36037c478bd9Sstevel@tonic-gate static struct svc_recv *
36047c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
36057c478bd9Sstevel@tonic-gate {
36067c478bd9Sstevel@tonic-gate 	struct svc_recv	*recvp;
36077c478bd9Sstevel@tonic-gate 
36087c478bd9Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
36097c478bd9Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
36107c478bd9Sstevel@tonic-gate 	recvp->qp = qp;
36117c478bd9Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
36127c478bd9Sstevel@tonic-gate 	return (recvp);
36137c478bd9Sstevel@tonic-gate }
36147c478bd9Sstevel@tonic-gate 
36157c478bd9Sstevel@tonic-gate static int
36167c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
36177c478bd9Sstevel@tonic-gate {
36187c478bd9Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
36197c478bd9Sstevel@tonic-gate 
36207c478bd9Sstevel@tonic-gate 	return (0);
36217c478bd9Sstevel@tonic-gate }
36227c478bd9Sstevel@tonic-gate 
36237c478bd9Sstevel@tonic-gate static struct reply *
36247c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
36257c478bd9Sstevel@tonic-gate {
36267c478bd9Sstevel@tonic-gate 	struct reply	*rep;
36277c478bd9Sstevel@tonic-gate 
36287c478bd9Sstevel@tonic-gate 
36297c478bd9Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
36307c478bd9Sstevel@tonic-gate 	if (rep == NULL) {
36317c478bd9Sstevel@tonic-gate 		mutex_exit(&qp->replylist_lock);
36327c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_addreplylist: no memory\n");
36337c478bd9Sstevel@tonic-gate 		return (NULL);
36347c478bd9Sstevel@tonic-gate 	}
36357c478bd9Sstevel@tonic-gate 	rep->xid = msgid;
36367c478bd9Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
36377c478bd9Sstevel@tonic-gate 	rep->bytes_xfer = 0;
36387c478bd9Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
36397c478bd9Sstevel@tonic-gate 	rep->prev = NULL;
36407c478bd9Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
36417c478bd9Sstevel@tonic-gate 
36427c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
36437c478bd9Sstevel@tonic-gate 	if (qp->replylist) {
36447c478bd9Sstevel@tonic-gate 		rep->next = qp->replylist;
36457c478bd9Sstevel@tonic-gate 		qp->replylist->prev = rep;
36467c478bd9Sstevel@tonic-gate 	}
36477c478bd9Sstevel@tonic-gate 	qp->rep_list_size++;
36487c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
36497c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "rib_addreplylist: qp:%p, rep_list_size:%d\n",
36507c478bd9Sstevel@tonic-gate 		(void *)qp, qp->rep_list_size);
36517c478bd9Sstevel@tonic-gate 	qp->replylist = rep;
36527c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36537c478bd9Sstevel@tonic-gate 
36547c478bd9Sstevel@tonic-gate 	return (rep);
36557c478bd9Sstevel@tonic-gate }
36567c478bd9Sstevel@tonic-gate 
36577c478bd9Sstevel@tonic-gate static rdma_stat
36587c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
36597c478bd9Sstevel@tonic-gate {
36607c478bd9Sstevel@tonic-gate 	struct reply	*r, *n;
36617c478bd9Sstevel@tonic-gate 
36627c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
36637c478bd9Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
36647c478bd9Sstevel@tonic-gate 		n = r->next;
36657c478bd9Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
36667c478bd9Sstevel@tonic-gate 	}
36677c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36687c478bd9Sstevel@tonic-gate 
36697c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
36707c478bd9Sstevel@tonic-gate }
36717c478bd9Sstevel@tonic-gate 
36727c478bd9Sstevel@tonic-gate static int
36737c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
36747c478bd9Sstevel@tonic-gate {
36757c478bd9Sstevel@tonic-gate 
36767c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
36777c478bd9Sstevel@tonic-gate 	if (rep->prev) {
36787c478bd9Sstevel@tonic-gate 		rep->prev->next = rep->next;
36797c478bd9Sstevel@tonic-gate 	}
36807c478bd9Sstevel@tonic-gate 	if (rep->next) {
36817c478bd9Sstevel@tonic-gate 		rep->next->prev = rep->prev;
36827c478bd9Sstevel@tonic-gate 	}
36837c478bd9Sstevel@tonic-gate 	if (qp->replylist == rep)
36847c478bd9Sstevel@tonic-gate 		qp->replylist = rep->next;
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
36877c478bd9Sstevel@tonic-gate 	qp->rep_list_size--;
36887c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
36897c478bd9Sstevel@tonic-gate 	    cmn_err(CE_NOTE, "rib_remreply: qp:%p, rep_list_size:%d\n",
36907c478bd9Sstevel@tonic-gate 		(void *)qp, qp->rep_list_size);
36917c478bd9Sstevel@tonic-gate 
36927c478bd9Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
36937c478bd9Sstevel@tonic-gate 
36947c478bd9Sstevel@tonic-gate 	return (0);
36957c478bd9Sstevel@tonic-gate }
36967c478bd9Sstevel@tonic-gate 
36977c478bd9Sstevel@tonic-gate rdma_stat
36987c478bd9Sstevel@tonic-gate rib_registermem(CONN *conn, caddr_t buf, uint_t buflen,
36997c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle)
37007c478bd9Sstevel@tonic-gate {
37017c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
37027c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
37037c478bd9Sstevel@tonic-gate 	rdma_stat	status;
37047c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
37057c478bd9Sstevel@tonic-gate 
37067c478bd9Sstevel@tonic-gate 	/*
37077c478bd9Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
37087c478bd9Sstevel@tonic-gate 	 */
37097c478bd9Sstevel@tonic-gate 	status = rib_reg_mem(hca, buf, buflen, 0, &mr_hdl, &mr_desc);
37107c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
371111606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
37127c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
37137c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
37147c478bd9Sstevel@tonic-gate 	} else {
37157c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
37167c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
37177c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
37187c478bd9Sstevel@tonic-gate 	}
37197c478bd9Sstevel@tonic-gate 	return (status);
37207c478bd9Sstevel@tonic-gate }
37217c478bd9Sstevel@tonic-gate 
37227c478bd9Sstevel@tonic-gate static rdma_stat
37237c478bd9Sstevel@tonic-gate rib_reg_mem(rib_hca_t *hca, caddr_t buf, uint_t size, ibt_mr_flags_t spec,
37247c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
37257c478bd9Sstevel@tonic-gate {
37267c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
37277c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
37287c478bd9Sstevel@tonic-gate 
372911606941Sjwahlig 	mem_attr.mr_vaddr = (uintptr_t)buf;
37307c478bd9Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
37317c478bd9Sstevel@tonic-gate 	mem_attr.mr_as = NULL;
37327c478bd9Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
37337c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
37347c478bd9Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
37357c478bd9Sstevel@tonic-gate 
37367c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
37377c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
37387c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
37397c478bd9Sstevel@tonic-gate 					&mem_attr, mr_hdlp, mr_descp);
37407c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
37417c478bd9Sstevel@tonic-gate 	} else {
37427c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
37437c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37447c478bd9Sstevel@tonic-gate 	}
37457c478bd9Sstevel@tonic-gate 
37467c478bd9Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
37477c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_reg_mem: ibt_register_mr "
37487c478bd9Sstevel@tonic-gate 			"(spec:%d) failed for addr %llX, status %d",
37497c478bd9Sstevel@tonic-gate 			spec, (longlong_t)mem_attr.mr_vaddr, ibt_status);
37507c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
37517c478bd9Sstevel@tonic-gate 	}
37527c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37537c478bd9Sstevel@tonic-gate }
37547c478bd9Sstevel@tonic-gate 
37557c478bd9Sstevel@tonic-gate rdma_stat
37567c478bd9Sstevel@tonic-gate rib_registermemsync(CONN *conn, caddr_t buf, uint_t buflen,
37577c478bd9Sstevel@tonic-gate 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle)
37587c478bd9Sstevel@tonic-gate {
37597c478bd9Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
37607c478bd9Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
37617c478bd9Sstevel@tonic-gate 	rdma_stat	status;
37627c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
37637c478bd9Sstevel@tonic-gate 
37647c478bd9Sstevel@tonic-gate 	/*
37657c478bd9Sstevel@tonic-gate 	 * Non-coherent memory registration.
37667c478bd9Sstevel@tonic-gate 	 */
37677c478bd9Sstevel@tonic-gate 	status = rib_reg_mem(hca, buf, buflen, IBT_MR_NONCOHERENT, &mr_hdl,
37687c478bd9Sstevel@tonic-gate 			&mr_desc);
37697c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
377011606941Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
37717c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
37727c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
37737c478bd9Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
37747c478bd9Sstevel@tonic-gate 	} else {
37757c478bd9Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
37767c478bd9Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
37777c478bd9Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
37787c478bd9Sstevel@tonic-gate 	}
37797c478bd9Sstevel@tonic-gate 	return (status);
37807c478bd9Sstevel@tonic-gate }
37817c478bd9Sstevel@tonic-gate 
37827c478bd9Sstevel@tonic-gate /* ARGSUSED */
37837c478bd9Sstevel@tonic-gate rdma_stat
37847c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
37857c478bd9Sstevel@tonic-gate {
37867c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
37877c478bd9Sstevel@tonic-gate 
37887c478bd9Sstevel@tonic-gate 	/*
37897c478bd9Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
37907c478bd9Sstevel@tonic-gate 	 * getting detached. Need all outstanding
37917c478bd9Sstevel@tonic-gate 	 * memory registrations to be deregistered
37927c478bd9Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
37937c478bd9Sstevel@tonic-gate 	 */
37947c478bd9Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
379511606941Sjwahlig 			(ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
37967c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37977c478bd9Sstevel@tonic-gate }
37987c478bd9Sstevel@tonic-gate 
37997c478bd9Sstevel@tonic-gate /* ARGSUSED */
38007c478bd9Sstevel@tonic-gate rdma_stat
38017c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
38027c478bd9Sstevel@tonic-gate 		RIB_SYNCMEM_HANDLE sync_handle)
38037c478bd9Sstevel@tonic-gate {
38047c478bd9Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
38057c478bd9Sstevel@tonic-gate 
38067c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
38077c478bd9Sstevel@tonic-gate }
38087c478bd9Sstevel@tonic-gate 
38097c478bd9Sstevel@tonic-gate /* ARGSUSED */
38107c478bd9Sstevel@tonic-gate rdma_stat
38117c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
38127c478bd9Sstevel@tonic-gate 		int len, int cpu)
38137c478bd9Sstevel@tonic-gate {
38147c478bd9Sstevel@tonic-gate 	ibt_status_t	status;
38157c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
38167c478bd9Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
38177c478bd9Sstevel@tonic-gate 
38187c478bd9Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
381911606941Sjwahlig 	mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf;
38207c478bd9Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
38217c478bd9Sstevel@tonic-gate 	if (cpu) {
38227c478bd9Sstevel@tonic-gate 		/* make incoming data visible to memory */
38237c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
38247c478bd9Sstevel@tonic-gate 	} else {
38257c478bd9Sstevel@tonic-gate 		/* make memory changes visible to IO */
38267c478bd9Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
38277c478bd9Sstevel@tonic-gate 	}
38287c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
38297c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_INITED) {
38307c478bd9Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
38317c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38327c478bd9Sstevel@tonic-gate 	} else {
38337c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38347c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
38357c478bd9Sstevel@tonic-gate 	}
38367c478bd9Sstevel@tonic-gate 
38377c478bd9Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
38387c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
38397c478bd9Sstevel@tonic-gate 	else {
38407c478bd9Sstevel@tonic-gate #ifdef DEBUG
38417c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_syncmem: ibt_sync_mr failed with %d\n",
38427c478bd9Sstevel@tonic-gate 			status);
38437c478bd9Sstevel@tonic-gate #endif
38447c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
38457c478bd9Sstevel@tonic-gate 	}
38467c478bd9Sstevel@tonic-gate }
38477c478bd9Sstevel@tonic-gate 
38487c478bd9Sstevel@tonic-gate /*
38497c478bd9Sstevel@tonic-gate  * XXXX	????
38507c478bd9Sstevel@tonic-gate  */
38517c478bd9Sstevel@tonic-gate static rdma_stat
38527c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
38537c478bd9Sstevel@tonic-gate {
38547c478bd9Sstevel@tonic-gate 	/*
38557c478bd9Sstevel@tonic-gate 	 * XXXX	Hack!
38567c478bd9Sstevel@tonic-gate 	 */
38577c478bd9Sstevel@tonic-gate 	info->addrlen = 16;
38587c478bd9Sstevel@tonic-gate 	info->mts = 1000000;
38597c478bd9Sstevel@tonic-gate 	info->mtu = 1000000;
38607c478bd9Sstevel@tonic-gate 
38617c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
38627c478bd9Sstevel@tonic-gate }
38637c478bd9Sstevel@tonic-gate 
38647c478bd9Sstevel@tonic-gate rib_bufpool_t *
38657c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
38667c478bd9Sstevel@tonic-gate {
38677c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
38687c478bd9Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
38697c478bd9Sstevel@tonic-gate 	caddr_t		buf;
38707c478bd9Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
38717c478bd9Sstevel@tonic-gate 	ibt_status_t	ibt_status;
38727c478bd9Sstevel@tonic-gate 	int		i, j;
38737c478bd9Sstevel@tonic-gate 
38747c478bd9Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
38757c478bd9Sstevel@tonic-gate 
38767c478bd9Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
38777c478bd9Sstevel@tonic-gate 			num * sizeof (void *), KM_SLEEP);
38787c478bd9Sstevel@tonic-gate 
38797c478bd9Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
38807c478bd9Sstevel@tonic-gate 	bp->numelems = num;
38817c478bd9Sstevel@tonic-gate 
38827c478bd9Sstevel@tonic-gate 	switch (ptype) {
38837c478bd9Sstevel@tonic-gate 	    case SEND_BUFFER:
38847c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38857c478bd9Sstevel@tonic-gate 		/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
38867c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
38877c478bd9Sstevel@tonic-gate 		break;
38887c478bd9Sstevel@tonic-gate 	    case RECV_BUFFER:
38897c478bd9Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38907c478bd9Sstevel@tonic-gate 		/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
38917c478bd9Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
38927c478bd9Sstevel@tonic-gate 		break;
38937c478bd9Sstevel@tonic-gate 	    default:
38947c478bd9Sstevel@tonic-gate 		goto fail;
38957c478bd9Sstevel@tonic-gate 	}
38967c478bd9Sstevel@tonic-gate 
38977c478bd9Sstevel@tonic-gate 	/*
38987c478bd9Sstevel@tonic-gate 	 * Register the pool.
38997c478bd9Sstevel@tonic-gate 	 */
39007c478bd9Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
39017c478bd9Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
39027c478bd9Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
39037c478bd9Sstevel@tonic-gate 			sizeof (ibt_mr_hdl_t), KM_SLEEP);
39047c478bd9Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
39057c478bd9Sstevel@tonic-gate 			sizeof (ibt_mr_desc_t), KM_SLEEP);
39067c478bd9Sstevel@tonic-gate 
39077c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
39087c478bd9Sstevel@tonic-gate 	if (hca->state != HCA_INITED) {
39097c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
39107c478bd9Sstevel@tonic-gate 		goto fail;
39117c478bd9Sstevel@tonic-gate 	}
39127c478bd9Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
39137c478bd9Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
391411606941Sjwahlig 		mem_attr.mr_vaddr = (uintptr_t)buf;
39157c478bd9Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
39167c478bd9Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
39177c478bd9Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
39187c478bd9Sstevel@tonic-gate 			hca->pd_hdl, &mem_attr, &rbp->mr_hdl[i],
39197c478bd9Sstevel@tonic-gate 			&rbp->mr_desc[i]);
39207c478bd9Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
39217c478bd9Sstevel@tonic-gate 		    for (j = 0; j < i; j++) {
39227c478bd9Sstevel@tonic-gate 			(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[j]);
39237c478bd9Sstevel@tonic-gate 		    }
39247c478bd9Sstevel@tonic-gate 		    rw_exit(&hca->state_lock);
39257c478bd9Sstevel@tonic-gate 		    goto fail;
39267c478bd9Sstevel@tonic-gate 		}
39277c478bd9Sstevel@tonic-gate 	}
39287c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
39297c478bd9Sstevel@tonic-gate 
39307c478bd9Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
39317c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
39327c478bd9Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
39337c478bd9Sstevel@tonic-gate 	}
39347c478bd9Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
39357c478bd9Sstevel@tonic-gate 	rbp->bpool = bp;
39367c478bd9Sstevel@tonic-gate 
39377c478bd9Sstevel@tonic-gate 	return (rbp);
39387c478bd9Sstevel@tonic-gate fail:
39397c478bd9Sstevel@tonic-gate 	if (bp) {
39407c478bd9Sstevel@tonic-gate 	    if (bp->buf)
39417c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
39427c478bd9Sstevel@tonic-gate 	    kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
39437c478bd9Sstevel@tonic-gate 	}
39447c478bd9Sstevel@tonic-gate 	if (rbp) {
39457c478bd9Sstevel@tonic-gate 	    if (rbp->mr_hdl)
39467c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
39477c478bd9Sstevel@tonic-gate 	    if (rbp->mr_desc)
39487c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
39497c478bd9Sstevel@tonic-gate 	    kmem_free(rbp, sizeof (rib_bufpool_t));
39507c478bd9Sstevel@tonic-gate 	}
39517c478bd9Sstevel@tonic-gate 	return (NULL);
39527c478bd9Sstevel@tonic-gate }
39537c478bd9Sstevel@tonic-gate 
39547c478bd9Sstevel@tonic-gate static void
39557c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
39567c478bd9Sstevel@tonic-gate {
39577c478bd9Sstevel@tonic-gate 	int i;
39587c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39597c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
39607c478bd9Sstevel@tonic-gate 
39617c478bd9Sstevel@tonic-gate 	/*
39627c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39637c478bd9Sstevel@tonic-gate 	 */
39647c478bd9Sstevel@tonic-gate 	switch (ptype) {
39657c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
39667c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
39677c478bd9Sstevel@tonic-gate 			break;
39687c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
39697c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
39707c478bd9Sstevel@tonic-gate 			break;
39717c478bd9Sstevel@tonic-gate 		default:
39727c478bd9Sstevel@tonic-gate 			return;
39737c478bd9Sstevel@tonic-gate 	}
39747c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
39757c478bd9Sstevel@tonic-gate 		return;
39767c478bd9Sstevel@tonic-gate 
39777c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
39787c478bd9Sstevel@tonic-gate 
39797c478bd9Sstevel@tonic-gate 	/*
39807c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
39817c478bd9Sstevel@tonic-gate 	 */
39827c478bd9Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
39837c478bd9Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
39847c478bd9Sstevel@tonic-gate 	}
39857c478bd9Sstevel@tonic-gate }
39867c478bd9Sstevel@tonic-gate 
39877c478bd9Sstevel@tonic-gate static void
39887c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
39897c478bd9Sstevel@tonic-gate {
39907c478bd9Sstevel@tonic-gate 
39917c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39927c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
39937c478bd9Sstevel@tonic-gate 
39947c478bd9Sstevel@tonic-gate 	/*
39957c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39967c478bd9Sstevel@tonic-gate 	 */
39977c478bd9Sstevel@tonic-gate 	switch (ptype) {
39987c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
39997c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
40007c478bd9Sstevel@tonic-gate 			break;
40017c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
40027c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
40037c478bd9Sstevel@tonic-gate 			break;
40047c478bd9Sstevel@tonic-gate 		default:
40057c478bd9Sstevel@tonic-gate 			return;
40067c478bd9Sstevel@tonic-gate 	}
40077c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
40087c478bd9Sstevel@tonic-gate 		return;
40097c478bd9Sstevel@tonic-gate 
40107c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
40117c478bd9Sstevel@tonic-gate 
40127c478bd9Sstevel@tonic-gate 	/*
40137c478bd9Sstevel@tonic-gate 	 * Free the pool memory.
40147c478bd9Sstevel@tonic-gate 	 */
40157c478bd9Sstevel@tonic-gate 	if (rbp->mr_hdl)
40167c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
40177c478bd9Sstevel@tonic-gate 
40187c478bd9Sstevel@tonic-gate 	if (rbp->mr_desc)
40197c478bd9Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
40207c478bd9Sstevel@tonic-gate 
40217c478bd9Sstevel@tonic-gate 	if (bp->buf)
40227c478bd9Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
40237c478bd9Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
40247c478bd9Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
40257c478bd9Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
40267c478bd9Sstevel@tonic-gate }
40277c478bd9Sstevel@tonic-gate 
40287c478bd9Sstevel@tonic-gate void
40297c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
40307c478bd9Sstevel@tonic-gate {
40317c478bd9Sstevel@tonic-gate 	/*
40327c478bd9Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
40337c478bd9Sstevel@tonic-gate 	 */
40347c478bd9Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
40357c478bd9Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
40367c478bd9Sstevel@tonic-gate }
40377c478bd9Sstevel@tonic-gate 
40387c478bd9Sstevel@tonic-gate /*
40397c478bd9Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
40407c478bd9Sstevel@tonic-gate  */
40417c478bd9Sstevel@tonic-gate static rdma_stat
40427c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40437c478bd9Sstevel@tonic-gate {
40447c478bd9Sstevel@tonic-gate 
40457c478bd9Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
40467c478bd9Sstevel@tonic-gate 	if (rdbuf->addr) {
40477c478bd9Sstevel@tonic-gate 		switch (rdbuf->type) {
40487c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
40497c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
40507c478bd9Sstevel@tonic-gate 			break;
40517c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
40527c478bd9Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
40537c478bd9Sstevel@tonic-gate 			break;
40547c478bd9Sstevel@tonic-gate 		default:
40557c478bd9Sstevel@tonic-gate 			rdbuf->len = 0;
40567c478bd9Sstevel@tonic-gate 		}
40577c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
40587c478bd9Sstevel@tonic-gate 	} else
40597c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
40607c478bd9Sstevel@tonic-gate }
40617c478bd9Sstevel@tonic-gate 
40627c478bd9Sstevel@tonic-gate 
40637c478bd9Sstevel@tonic-gate /*
40647c478bd9Sstevel@tonic-gate  * Fetch a buffer of specified type.
40657c478bd9Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
40667c478bd9Sstevel@tonic-gate  */
40677c478bd9Sstevel@tonic-gate static void *
40687c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40697c478bd9Sstevel@tonic-gate {
40707c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
40717c478bd9Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
40727c478bd9Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
40737c478bd9Sstevel@tonic-gate 	void		*buf;
40747c478bd9Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
40757c478bd9Sstevel@tonic-gate 	bufpool_t	*bp;
40767c478bd9Sstevel@tonic-gate 	int		i;
40777c478bd9Sstevel@tonic-gate 
40787c478bd9Sstevel@tonic-gate 	/*
40797c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
40807c478bd9Sstevel@tonic-gate 	 */
40817c478bd9Sstevel@tonic-gate 	switch (ptype) {
40827c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
40837c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
40847c478bd9Sstevel@tonic-gate 			break;
40857c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
40867c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
40877c478bd9Sstevel@tonic-gate 			break;
40887c478bd9Sstevel@tonic-gate 		default:
40897c478bd9Sstevel@tonic-gate 			return (NULL);
40907c478bd9Sstevel@tonic-gate 	}
40917c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
40927c478bd9Sstevel@tonic-gate 		return (NULL);
40937c478bd9Sstevel@tonic-gate 
40947c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
40957c478bd9Sstevel@tonic-gate 
40967c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
40977c478bd9Sstevel@tonic-gate 	if (bp->buffree < 0) {
40987c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_rbuf_alloc: No free buffers!");
40997c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
41007c478bd9Sstevel@tonic-gate 		return (NULL);
41017c478bd9Sstevel@tonic-gate 	}
41027c478bd9Sstevel@tonic-gate 
41037c478bd9Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
41047c478bd9Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
41057c478bd9Sstevel@tonic-gate 	rdbuf->addr = buf;
41067c478bd9Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
41077c478bd9Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
410811606941Sjwahlig 	    if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
41097c478bd9Sstevel@tonic-gate 		rdbuf->handle.mrc_rmr = (uint32_t)rbp->mr_desc[i].md_rkey;
411011606941Sjwahlig 		rdbuf->handle.mrc_linfo = (uintptr_t)rbp->mr_hdl[i];
41117c478bd9Sstevel@tonic-gate 		rdbuf->handle.mrc_lmr = (uint32_t)rbp->mr_desc[i].md_lkey;
41127c478bd9Sstevel@tonic-gate 		bp->buffree--;
41137c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
41147c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_rbuf_alloc: %d free bufs "
41157c478bd9Sstevel@tonic-gate 			"(type %d)\n", bp->buffree+1, ptype);
41167c478bd9Sstevel@tonic-gate 
41177c478bd9Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
41187c478bd9Sstevel@tonic-gate 
41197c478bd9Sstevel@tonic-gate 		return (buf);
41207c478bd9Sstevel@tonic-gate 	    }
41217c478bd9Sstevel@tonic-gate 	}
41227c478bd9Sstevel@tonic-gate 	cmn_err(CE_WARN, "rib_rbuf_alloc: NO matching buf %p of "
41237c478bd9Sstevel@tonic-gate 		"type %d found!", buf, ptype);
41247c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41257c478bd9Sstevel@tonic-gate 
41267c478bd9Sstevel@tonic-gate 	return (NULL);
41277c478bd9Sstevel@tonic-gate }
41287c478bd9Sstevel@tonic-gate 
41297c478bd9Sstevel@tonic-gate static void
41307c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
41317c478bd9Sstevel@tonic-gate {
41327c478bd9Sstevel@tonic-gate 
41337c478bd9Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
41347c478bd9Sstevel@tonic-gate }
41357c478bd9Sstevel@tonic-gate 
41367c478bd9Sstevel@tonic-gate static void
41377c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
41387c478bd9Sstevel@tonic-gate {
41397c478bd9Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
41407c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
41417c478bd9Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
41427c478bd9Sstevel@tonic-gate 	bufpool_t *bp;
41437c478bd9Sstevel@tonic-gate 
41447c478bd9Sstevel@tonic-gate 	/*
41457c478bd9Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
41467c478bd9Sstevel@tonic-gate 	 */
41477c478bd9Sstevel@tonic-gate 	switch (ptype) {
41487c478bd9Sstevel@tonic-gate 		case SEND_BUFFER:
41497c478bd9Sstevel@tonic-gate 			rbp = hca->send_pool;
41507c478bd9Sstevel@tonic-gate 			break;
41517c478bd9Sstevel@tonic-gate 		case RECV_BUFFER:
41527c478bd9Sstevel@tonic-gate 			rbp = hca->recv_pool;
41537c478bd9Sstevel@tonic-gate 			break;
41547c478bd9Sstevel@tonic-gate 		default:
41557c478bd9Sstevel@tonic-gate 			return;
41567c478bd9Sstevel@tonic-gate 	}
41577c478bd9Sstevel@tonic-gate 	if (rbp == NULL)
41587c478bd9Sstevel@tonic-gate 		return;
41597c478bd9Sstevel@tonic-gate 
41607c478bd9Sstevel@tonic-gate 	bp = rbp->bpool;
41617c478bd9Sstevel@tonic-gate 
41627c478bd9Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
41637c478bd9Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
41647c478bd9Sstevel@tonic-gate 		/*
41657c478bd9Sstevel@tonic-gate 		 * Should never happen
41667c478bd9Sstevel@tonic-gate 		 */
41677c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_rbuf_free: One (type %d) "
41687c478bd9Sstevel@tonic-gate 			"too many frees!", ptype);
41697c478bd9Sstevel@tonic-gate 		bp->buffree--;
41707c478bd9Sstevel@tonic-gate 	} else {
41717c478bd9Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
41727c478bd9Sstevel@tonic-gate 		if (rib_debug > 1)
41737c478bd9Sstevel@tonic-gate 		    cmn_err(CE_NOTE, "rib_rbuf_free: %d free bufs "
41747c478bd9Sstevel@tonic-gate 			"(type %d)\n", bp->buffree+1, ptype);
41757c478bd9Sstevel@tonic-gate 	}
41767c478bd9Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41777c478bd9Sstevel@tonic-gate }
41787c478bd9Sstevel@tonic-gate 
41797c478bd9Sstevel@tonic-gate static rdma_stat
41807c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
41817c478bd9Sstevel@tonic-gate {
41827c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41837c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd) {
41847c478bd9Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
41857c478bd9Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
41867c478bd9Sstevel@tonic-gate 	}
41877c478bd9Sstevel@tonic-gate 	connlist->conn_hd = cn;
41887c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
41897c478bd9Sstevel@tonic-gate 
41907c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41917c478bd9Sstevel@tonic-gate }
41927c478bd9Sstevel@tonic-gate 
41937c478bd9Sstevel@tonic-gate static rdma_stat
41947c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
41957c478bd9Sstevel@tonic-gate {
41967c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41977c478bd9Sstevel@tonic-gate 	if (cn->c_prev) {
41987c478bd9Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
41997c478bd9Sstevel@tonic-gate 	}
42007c478bd9Sstevel@tonic-gate 	if (cn->c_next) {
42017c478bd9Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
42027c478bd9Sstevel@tonic-gate 	}
42037c478bd9Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
42047c478bd9Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
42057c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
42067c478bd9Sstevel@tonic-gate 
42077c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
42087c478bd9Sstevel@tonic-gate }
42097c478bd9Sstevel@tonic-gate 
42107c478bd9Sstevel@tonic-gate /*
42117c478bd9Sstevel@tonic-gate  * Connection management.
42127c478bd9Sstevel@tonic-gate  * IBTF does not support recycling of channels. So connections are only
42137c478bd9Sstevel@tonic-gate  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR or
42147c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state. No C_IDLE state.
42157c478bd9Sstevel@tonic-gate  * C_CONN_PEND state: Connection establishment in progress to the server.
42167c478bd9Sstevel@tonic-gate  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
42177c478bd9Sstevel@tonic-gate  * It has an RC channel associated with it. ibt_post_send/recv are allowed
42187c478bd9Sstevel@tonic-gate  * only in this state.
42197c478bd9Sstevel@tonic-gate  * C_ERROR state: A connection transitions to this state when WRs on the
42207c478bd9Sstevel@tonic-gate  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
42217c478bd9Sstevel@tonic-gate  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
42227c478bd9Sstevel@tonic-gate  * C_DISCONN_PEND state: When a connection is in C_ERROR state and when
42237c478bd9Sstevel@tonic-gate  * c_ref drops to 0 (this indicates that RPC has no more references to this
42247c478bd9Sstevel@tonic-gate  * connection), the connection should be destroyed. A connection transitions
42257c478bd9Sstevel@tonic-gate  * into this state when it is being destroyed.
42267c478bd9Sstevel@tonic-gate  */
42277c478bd9Sstevel@tonic-gate static rdma_stat
42287c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn)
42297c478bd9Sstevel@tonic-gate {
42307c478bd9Sstevel@tonic-gate 	CONN *cn;
42317c478bd9Sstevel@tonic-gate 	int status = RDMA_SUCCESS;
42327c478bd9Sstevel@tonic-gate 	rib_hca_t *hca = (rib_hca_t *)handle;
42337c478bd9Sstevel@tonic-gate 	rib_qp_t *qp;
42347c478bd9Sstevel@tonic-gate 	clock_t cv_stat, timout;
42357c478bd9Sstevel@tonic-gate 	ibt_path_info_t path;
42367c478bd9Sstevel@tonic-gate 
42377c478bd9Sstevel@tonic-gate again:
42387c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
42397c478bd9Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
42407c478bd9Sstevel@tonic-gate 	while (cn != NULL) {
42417c478bd9Sstevel@tonic-gate 		/*
42427c478bd9Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
42437c478bd9Sstevel@tonic-gate 		 */
42447c478bd9Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
42457c478bd9Sstevel@tonic-gate 		if (cn->c_state == C_ERROR) {
42467c478bd9Sstevel@tonic-gate 			if (cn->c_ref == 0) {
42477c478bd9Sstevel@tonic-gate 				/*
42487c478bd9Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
42497c478bd9Sstevel@tonic-gate 				 */
42507c478bd9Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
42517c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42527c478bd9Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
42537c478bd9Sstevel@tonic-gate 				(void) rib_disconnect_channel(cn,
42547c478bd9Sstevel@tonic-gate 				    &hca->cl_conn_list);
42557c478bd9Sstevel@tonic-gate 				goto again;
42567c478bd9Sstevel@tonic-gate 			}
42577c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42587c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
42597c478bd9Sstevel@tonic-gate 			continue;
42607c478bd9Sstevel@tonic-gate 		} else if (cn->c_state == C_DISCONN_PEND) {
42617c478bd9Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42627c478bd9Sstevel@tonic-gate 			cn = cn->c_next;
42637c478bd9Sstevel@tonic-gate 			continue;
42647c478bd9Sstevel@tonic-gate 		}
42657c478bd9Sstevel@tonic-gate 		if ((cn->c_raddr.len == svcaddr->len) &&
42667c478bd9Sstevel@tonic-gate 		    bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) {
42677c478bd9Sstevel@tonic-gate 			/*
42687c478bd9Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
42697c478bd9Sstevel@tonic-gate 			 * as we are done traversing the list.
42707c478bd9Sstevel@tonic-gate 			 */
42717c478bd9Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
42727c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
42737c478bd9Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
42747c478bd9Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42757c478bd9Sstevel@tonic-gate 				*conn = cn;
42767c478bd9Sstevel@tonic-gate 				return (status);
42777c478bd9Sstevel@tonic-gate 			}
42787c478bd9Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
42797c478bd9Sstevel@tonic-gate 				/*
42807c478bd9Sstevel@tonic-gate 				 * Hold a reference to this conn before
42817c478bd9Sstevel@tonic-gate 				 * we give up the lock.
42827c478bd9Sstevel@tonic-gate 				 */
42837c478bd9Sstevel@tonic-gate 				cn->c_ref++;
42847c478bd9Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
42857c478bd9Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
42867c478bd9Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
42877c478bd9Sstevel@tonic-gate 					&cn->c_lock, timout)) > 0 &&
42887c478bd9Sstevel@tonic-gate 					cn->c_state == C_CONN_PEND)
42897c478bd9Sstevel@tonic-gate 					;
42907c478bd9Sstevel@tonic-gate 				if (cv_stat == 0) {
42917c478bd9Sstevel@tonic-gate 					cn->c_ref--;
42927c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
42937c478bd9Sstevel@tonic-gate 					return (RDMA_INTR);
42947c478bd9Sstevel@tonic-gate 				}
42957c478bd9Sstevel@tonic-gate 				if (cv_stat < 0) {
42967c478bd9Sstevel@tonic-gate 					cn->c_ref--;
42977c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
42987c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
42997c478bd9Sstevel@tonic-gate 				}
43007c478bd9Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
43017c478bd9Sstevel@tonic-gate 					*conn = cn;
43027c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
43037c478bd9Sstevel@tonic-gate 					return (status);
43047c478bd9Sstevel@tonic-gate 				} else {
43057c478bd9Sstevel@tonic-gate 					cn->c_ref--;
43067c478bd9Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
43077c478bd9Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
43087c478bd9Sstevel@tonic-gate 				}
43097c478bd9Sstevel@tonic-gate 			}
43107c478bd9Sstevel@tonic-gate 		}
43117c478bd9Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
43127c478bd9Sstevel@tonic-gate 		cn = cn->c_next;
43137c478bd9Sstevel@tonic-gate 	}
43147c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
43157c478bd9Sstevel@tonic-gate 
43167c478bd9Sstevel@tonic-gate 	status = rib_chk_srv_ats(hca, svcaddr, addr_type, &path);
43177c478bd9Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
43187c478bd9Sstevel@tonic-gate #ifdef DEBUG
43197c478bd9Sstevel@tonic-gate 		if (rib_debug) {
43207c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_get: "
43217c478bd9Sstevel@tonic-gate 				"No server ATS record!");
43227c478bd9Sstevel@tonic-gate 		}
43237c478bd9Sstevel@tonic-gate #endif
43247c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
43257c478bd9Sstevel@tonic-gate 	}
43267c478bd9Sstevel@tonic-gate 
43277c478bd9Sstevel@tonic-gate 	/*
43287c478bd9Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
43297c478bd9Sstevel@tonic-gate 	 */
43307c478bd9Sstevel@tonic-gate 	if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) {
43317c478bd9Sstevel@tonic-gate 		return (RDMA_FAILED);
43327c478bd9Sstevel@tonic-gate 	}
43337c478bd9Sstevel@tonic-gate 	cn = qptoc(qp);
43347c478bd9Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
43357c478bd9Sstevel@tonic-gate 	cn->c_ref = 1;
43367c478bd9Sstevel@tonic-gate 
43377c478bd9Sstevel@tonic-gate 	/*
43387c478bd9Sstevel@tonic-gate 	 * Add to conn list.
43397c478bd9Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
43407c478bd9Sstevel@tonic-gate 	 * another thread might have created the connection we are
43417c478bd9Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
43427c478bd9Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
43437c478bd9Sstevel@tonic-gate 	 * of one. If we really want to close that window,
43447c478bd9Sstevel@tonic-gate 	 * then need to check the list after acquiring the
43457c478bd9Sstevel@tonic-gate 	 * WRITER lock.
43467c478bd9Sstevel@tonic-gate 	 */
43477c478bd9Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
43487c478bd9Sstevel@tonic-gate 	status = rib_conn_to_srv(hca, qp, &path);
43497c478bd9Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
43507c478bd9Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
43517c478bd9Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
43527c478bd9Sstevel@tonic-gate 		*conn = cn;
43537c478bd9Sstevel@tonic-gate 	} else {
43547c478bd9Sstevel@tonic-gate 		cn->c_state = C_ERROR;
43557c478bd9Sstevel@tonic-gate 		cn->c_ref--;
43567c478bd9Sstevel@tonic-gate #ifdef DEBUG
43577c478bd9Sstevel@tonic-gate 		if (rib_debug) {
43587c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "rib_conn_get: FAILED creating"
43597c478bd9Sstevel@tonic-gate 			    " a channel!");
43607c478bd9Sstevel@tonic-gate 		}
43617c478bd9Sstevel@tonic-gate #endif
43627c478bd9Sstevel@tonic-gate 	}
43637c478bd9Sstevel@tonic-gate 	cv_broadcast(&cn->c_cv);
43647c478bd9Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
43657c478bd9Sstevel@tonic-gate 	return (status);
43667c478bd9Sstevel@tonic-gate }
43677c478bd9Sstevel@tonic-gate 
43687c478bd9Sstevel@tonic-gate static rdma_stat
43697c478bd9Sstevel@tonic-gate rib_conn_release(CONN *conn)
43707c478bd9Sstevel@tonic-gate {
43717c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
43727c478bd9Sstevel@tonic-gate 
43737c478bd9Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
43747c478bd9Sstevel@tonic-gate 	conn->c_ref--;
43757c478bd9Sstevel@tonic-gate 
43767c478bd9Sstevel@tonic-gate 	/*
43777c478bd9Sstevel@tonic-gate 	 * If a conn is C_ERROR, close the channel.
43787c478bd9Sstevel@tonic-gate 	 * If it's CONNECTED, keep it that way.
43797c478bd9Sstevel@tonic-gate 	 */
43807c478bd9Sstevel@tonic-gate 	if (conn->c_ref == 0 && (conn->c_state &  C_ERROR)) {
43817c478bd9Sstevel@tonic-gate 		conn->c_state = C_DISCONN_PEND;
43827c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
43837c478bd9Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER)
43847c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
43857c478bd9Sstevel@tonic-gate 			    &qp->hca->srv_conn_list);
43867c478bd9Sstevel@tonic-gate 		else
43877c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn,
43887c478bd9Sstevel@tonic-gate 			    &qp->hca->cl_conn_list);
43897c478bd9Sstevel@tonic-gate 		return (RDMA_SUCCESS);
43907c478bd9Sstevel@tonic-gate 	}
43917c478bd9Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
43927c478bd9Sstevel@tonic-gate 	return (RDMA_SUCCESS);
43937c478bd9Sstevel@tonic-gate }
43947c478bd9Sstevel@tonic-gate 
43957c478bd9Sstevel@tonic-gate /*
43967c478bd9Sstevel@tonic-gate  * Add at front of list
43977c478bd9Sstevel@tonic-gate  */
43987c478bd9Sstevel@tonic-gate static struct rdma_done_list *
43997c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
44007c478bd9Sstevel@tonic-gate {
44017c478bd9Sstevel@tonic-gate 	struct rdma_done_list *rd;
44027c478bd9Sstevel@tonic-gate 
44037c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
44047c478bd9Sstevel@tonic-gate 
44057c478bd9Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
44067c478bd9Sstevel@tonic-gate 	rd->xid = xid;
44077c478bd9Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
44087c478bd9Sstevel@tonic-gate 
44097c478bd9Sstevel@tonic-gate 	rd->prev = NULL;
44107c478bd9Sstevel@tonic-gate 	rd->next = qp->rdlist;
44117c478bd9Sstevel@tonic-gate 	if (qp->rdlist != NULL)
44127c478bd9Sstevel@tonic-gate 		qp->rdlist->prev = rd;
44137c478bd9Sstevel@tonic-gate 	qp->rdlist = rd;
44147c478bd9Sstevel@tonic-gate 
44157c478bd9Sstevel@tonic-gate 	return (rd);
44167c478bd9Sstevel@tonic-gate }
44177c478bd9Sstevel@tonic-gate 
44187c478bd9Sstevel@tonic-gate static void
44197c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
44207c478bd9Sstevel@tonic-gate {
44217c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r;
44227c478bd9Sstevel@tonic-gate 
44237c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
44247c478bd9Sstevel@tonic-gate 
44257c478bd9Sstevel@tonic-gate 	r = rd->next;
44267c478bd9Sstevel@tonic-gate 	if (r != NULL) {
44277c478bd9Sstevel@tonic-gate 		r->prev = rd->prev;
44287c478bd9Sstevel@tonic-gate 	}
44297c478bd9Sstevel@tonic-gate 
44307c478bd9Sstevel@tonic-gate 	r = rd->prev;
44317c478bd9Sstevel@tonic-gate 	if (r != NULL) {
44327c478bd9Sstevel@tonic-gate 		r->next = rd->next;
44337c478bd9Sstevel@tonic-gate 	} else {
44347c478bd9Sstevel@tonic-gate 		qp->rdlist = rd->next;
44357c478bd9Sstevel@tonic-gate 	}
44367c478bd9Sstevel@tonic-gate 
44377c478bd9Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
44387c478bd9Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
44397c478bd9Sstevel@tonic-gate }
44407c478bd9Sstevel@tonic-gate 
44417c478bd9Sstevel@tonic-gate static void
44427c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
44437c478bd9Sstevel@tonic-gate {
44447c478bd9Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
44457c478bd9Sstevel@tonic-gate 
44467c478bd9Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
44477c478bd9Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
44487c478bd9Sstevel@tonic-gate 		n = r->next;
44497c478bd9Sstevel@tonic-gate 		rdma_done_rm(qp, r);
44507c478bd9Sstevel@tonic-gate 	}
44517c478bd9Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
44527c478bd9Sstevel@tonic-gate }
44537c478bd9Sstevel@tonic-gate 
44547c478bd9Sstevel@tonic-gate static void
44557c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
44567c478bd9Sstevel@tonic-gate {
44577c478bd9Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
44587c478bd9Sstevel@tonic-gate 
44597c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
44607c478bd9Sstevel@tonic-gate 
44617c478bd9Sstevel@tonic-gate 	while (r) {
44627c478bd9Sstevel@tonic-gate 		if (r->xid == xid) {
44637c478bd9Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
44647c478bd9Sstevel@tonic-gate 			return;
44657c478bd9Sstevel@tonic-gate 		} else {
44667c478bd9Sstevel@tonic-gate 			r = r->next;
44677c478bd9Sstevel@tonic-gate 		}
44687c478bd9Sstevel@tonic-gate 	}
44697c478bd9Sstevel@tonic-gate 	if (rib_debug > 1) {
44707c478bd9Sstevel@tonic-gate 	    cmn_err(CE_WARN, "rdma_done_notify: "
44717c478bd9Sstevel@tonic-gate 		"No matching xid for %u, qp %p\n", xid, (void *)qp);
44727c478bd9Sstevel@tonic-gate 	}
44737c478bd9Sstevel@tonic-gate }
44747c478bd9Sstevel@tonic-gate 
44757c478bd9Sstevel@tonic-gate rpcib_ats_t *
44767c478bd9Sstevel@tonic-gate get_ibd_entry(ib_gid_t *gid, ib_pkey_t pkey, rpcib_ibd_insts_t *ibds)
44777c478bd9Sstevel@tonic-gate {
44787c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
44797c478bd9Sstevel@tonic-gate 	int			i;
44807c478bd9Sstevel@tonic-gate 
44817c478bd9Sstevel@tonic-gate 	for (i = 0, atsp = ibds->rib_ats; i < ibds->rib_ibd_cnt; i++, atsp++) {
44827c478bd9Sstevel@tonic-gate 		if (atsp->ras_port_gid.gid_prefix == gid->gid_prefix &&
44837c478bd9Sstevel@tonic-gate 		    atsp->ras_port_gid.gid_guid == gid->gid_guid &&
44847c478bd9Sstevel@tonic-gate 		    atsp->ras_pkey == pkey) {
44857c478bd9Sstevel@tonic-gate 			return (atsp);
44867c478bd9Sstevel@tonic-gate 		}
44877c478bd9Sstevel@tonic-gate 	}
44887c478bd9Sstevel@tonic-gate 	return (NULL);
44897c478bd9Sstevel@tonic-gate }
44907c478bd9Sstevel@tonic-gate 
44917c478bd9Sstevel@tonic-gate int
44927c478bd9Sstevel@tonic-gate rib_get_ibd_insts_cb(dev_info_t *dip, void *arg)
44937c478bd9Sstevel@tonic-gate {
44947c478bd9Sstevel@tonic-gate 	rpcib_ibd_insts_t *ibds = (rpcib_ibd_insts_t *)arg;
44957c478bd9Sstevel@tonic-gate 	rpcib_ats_t	*atsp;
44967c478bd9Sstevel@tonic-gate 	ib_pkey_t	pkey;
44977c478bd9Sstevel@tonic-gate 	uint8_t		port;
44987c478bd9Sstevel@tonic-gate 	ib_guid_t	hca_guid;
44997c478bd9Sstevel@tonic-gate 	ib_gid_t	port_gid;
45007c478bd9Sstevel@tonic-gate 
45017c478bd9Sstevel@tonic-gate 	if ((i_ddi_node_state(dip) >= DS_ATTACHED) &&
45027c478bd9Sstevel@tonic-gate 	    (strcmp(ddi_node_name(dip), "ibport") == 0) &&
45037c478bd9Sstevel@tonic-gate 	    (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) {
45047c478bd9Sstevel@tonic-gate 
45057c478bd9Sstevel@tonic-gate 		if (ibds->rib_ibd_cnt >= ibds->rib_ibd_alloc) {
45067c478bd9Sstevel@tonic-gate 		    rpcib_ats_t	*tmp;
45077c478bd9Sstevel@tonic-gate 
45087c478bd9Sstevel@tonic-gate 		    tmp = (rpcib_ats_t *)kmem_zalloc((ibds->rib_ibd_alloc +
45097c478bd9Sstevel@tonic-gate 			N_IBD_INSTANCES) * sizeof (rpcib_ats_t), KM_SLEEP);
45107c478bd9Sstevel@tonic-gate 		    bcopy(ibds->rib_ats, tmp,
45117c478bd9Sstevel@tonic-gate 			ibds->rib_ibd_alloc * sizeof (rpcib_ats_t));
45127c478bd9Sstevel@tonic-gate 		    kmem_free(ibds->rib_ats,
45137c478bd9Sstevel@tonic-gate 			ibds->rib_ibd_alloc * sizeof (rpcib_ats_t));
45147c478bd9Sstevel@tonic-gate 		    ibds->rib_ats = tmp;
45157c478bd9Sstevel@tonic-gate 		    ibds->rib_ibd_alloc += N_IBD_INSTANCES;
45167c478bd9Sstevel@tonic-gate 		}
45177c478bd9Sstevel@tonic-gate 		if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY,
45187c478bd9Sstevel@tonic-gate 			dip, 0, "hca-guid", 0)) == 0) ||
45197c478bd9Sstevel@tonic-gate 		    ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
45207c478bd9Sstevel@tonic-gate 			0, "port-number", 0)) == 0) ||
45217c478bd9Sstevel@tonic-gate 		    (ibt_get_port_state_byguid(hca_guid, port,
45227c478bd9Sstevel@tonic-gate 			&port_gid, NULL) != IBT_SUCCESS) ||
45237c478bd9Sstevel@tonic-gate 		    ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
45247c478bd9Sstevel@tonic-gate 			"port-pkey", IB_PKEY_INVALID_LIMITED)) <=
45257c478bd9Sstevel@tonic-gate 			IB_PKEY_INVALID_FULL)) {
45267c478bd9Sstevel@tonic-gate 		    return (DDI_WALK_CONTINUE);
45277c478bd9Sstevel@tonic-gate 		}
45287c478bd9Sstevel@tonic-gate 		atsp = &ibds->rib_ats[ibds->rib_ibd_cnt];
45297c478bd9Sstevel@tonic-gate 		atsp->ras_inst = ddi_get_instance(dip);
45307c478bd9Sstevel@tonic-gate 		atsp->ras_pkey = pkey;
45317c478bd9Sstevel@tonic-gate 		atsp->ras_port_gid = port_gid;
45327c478bd9Sstevel@tonic-gate 		ibds->rib_ibd_cnt++;
45337c478bd9Sstevel@tonic-gate 	}
45347c478bd9Sstevel@tonic-gate 	return (DDI_WALK_CONTINUE);
45357c478bd9Sstevel@tonic-gate }
45367c478bd9Sstevel@tonic-gate 
45377c478bd9Sstevel@tonic-gate void
45387c478bd9Sstevel@tonic-gate rib_get_ibd_insts(rpcib_ibd_insts_t *ibds)
45397c478bd9Sstevel@tonic-gate {
45407c478bd9Sstevel@tonic-gate 	ddi_walk_devs(ddi_root_node(), rib_get_ibd_insts_cb, ibds);
45417c478bd9Sstevel@tonic-gate }
45427c478bd9Sstevel@tonic-gate 
45437c478bd9Sstevel@tonic-gate /*
45447c478bd9Sstevel@tonic-gate  * Return ibd interfaces and ibd instances.
45457c478bd9Sstevel@tonic-gate  */
45467c478bd9Sstevel@tonic-gate int
45477c478bd9Sstevel@tonic-gate get_ibd_ipaddr(rpcib_ibd_insts_t *ibds)
45487c478bd9Sstevel@tonic-gate {
45497c478bd9Sstevel@tonic-gate 	TIUSER			*tiptr, *tiptr6;
45507c478bd9Sstevel@tonic-gate 	vnode_t			*kvp, *kvp6;
45517c478bd9Sstevel@tonic-gate 	vnode_t			*vp = NULL, *vp6 = NULL;
45527c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
45537c478bd9Sstevel@tonic-gate 	struct lifreq		lif_req;
45547c478bd9Sstevel@tonic-gate 	int			k, ip_cnt;
45557c478bd9Sstevel@tonic-gate 	rpcib_ats_t		*atsp;
45567c478bd9Sstevel@tonic-gate 
45577c478bd9Sstevel@tonic-gate 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP,
45587c478bd9Sstevel@tonic-gate 		&kvp) == 0) {
45597c478bd9Sstevel@tonic-gate 	    if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
45607c478bd9Sstevel@tonic-gate 		&tiptr, CRED()) == 0) {
45617c478bd9Sstevel@tonic-gate 		vp = tiptr->fp->f_vnode;
45627c478bd9Sstevel@tonic-gate 	    } else {
45637c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
45647c478bd9Sstevel@tonic-gate 	    }
45657c478bd9Sstevel@tonic-gate 	}
45667c478bd9Sstevel@tonic-gate 
45677c478bd9Sstevel@tonic-gate 	if (lookupname("/dev/udp6", UIO_SYSSPACE, FOLLOW, NULLVPP,
45687c478bd9Sstevel@tonic-gate 		&kvp6) == 0) {
45697c478bd9Sstevel@tonic-gate 	    if (t_kopen((file_t *)NULL, kvp6->v_rdev, FREAD|FWRITE,
45707c478bd9Sstevel@tonic-gate 		&tiptr6, CRED()) == 0) {
45717c478bd9Sstevel@tonic-gate 		vp6 = tiptr6->fp->f_vnode;
45727c478bd9Sstevel@tonic-gate 	    } else {
45737c478bd9Sstevel@tonic-gate 		VN_RELE(kvp6);
45747c478bd9Sstevel@tonic-gate 	    }
45757c478bd9Sstevel@tonic-gate 	}
45767c478bd9Sstevel@tonic-gate 
45777c478bd9Sstevel@tonic-gate 	if (vp == NULL && vp6 == NULL)
45787c478bd9Sstevel@tonic-gate 		return (-1);
45797c478bd9Sstevel@tonic-gate 
45807c478bd9Sstevel@tonic-gate 	/* Get ibd ip's */
45817c478bd9Sstevel@tonic-gate 	ip_cnt = 0;
45827c478bd9Sstevel@tonic-gate 	for (k = 0, atsp = ibds->rib_ats; k < ibds->rib_ibd_cnt; k++, atsp++) {
45837c478bd9Sstevel@tonic-gate 		/* IPv4 */
45847c478bd9Sstevel@tonic-gate 	    if (vp != NULL) {
45857c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&lif_req, sizeof (struct lifreq));
45867c478bd9Sstevel@tonic-gate 		(void) snprintf(lif_req.lifr_name,
45877c478bd9Sstevel@tonic-gate 			sizeof (lif_req.lifr_name), "%s%d",
45887c478bd9Sstevel@tonic-gate 			IBD_NAME, atsp->ras_inst);
45897c478bd9Sstevel@tonic-gate 
45907c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&iocb, sizeof (struct strioctl));
45917c478bd9Sstevel@tonic-gate 		iocb.ic_cmd = SIOCGLIFADDR;
45927c478bd9Sstevel@tonic-gate 		iocb.ic_timout = 0;
45937c478bd9Sstevel@tonic-gate 		iocb.ic_len = sizeof (struct lifreq);
45947c478bd9Sstevel@tonic-gate 		iocb.ic_dp = (caddr_t)&lif_req;
45957c478bd9Sstevel@tonic-gate 		if (kstr_ioctl(vp, I_STR, (intptr_t)&iocb) == 0) {
45967c478bd9Sstevel@tonic-gate 		    atsp->ras_inet_type = AF_INET;
45977c478bd9Sstevel@tonic-gate 		    bcopy(&lif_req.lifr_addr, &atsp->ras_sin,
45987c478bd9Sstevel@tonic-gate 			sizeof (struct sockaddr_in));
45997c478bd9Sstevel@tonic-gate 		    ip_cnt++;
46007c478bd9Sstevel@tonic-gate 		    continue;
46017c478bd9Sstevel@tonic-gate 		}
46027c478bd9Sstevel@tonic-gate 	    }
46037c478bd9Sstevel@tonic-gate 		/* Try IPv6 */
46047c478bd9Sstevel@tonic-gate 	    if (vp6 != NULL) {
46057c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&lif_req, sizeof (struct lifreq));
46067c478bd9Sstevel@tonic-gate 		(void) snprintf(lif_req.lifr_name,
46077c478bd9Sstevel@tonic-gate 			sizeof (lif_req.lifr_name), "%s%d",
46087c478bd9Sstevel@tonic-gate 			IBD_NAME, atsp->ras_inst);
46097c478bd9Sstevel@tonic-gate 
46107c478bd9Sstevel@tonic-gate 		(void) bzero((void *)&iocb, sizeof (struct strioctl));
46117c478bd9Sstevel@tonic-gate 		iocb.ic_cmd = SIOCGLIFADDR;
46127c478bd9Sstevel@tonic-gate 		iocb.ic_timout = 0;
46137c478bd9Sstevel@tonic-gate 		iocb.ic_len = sizeof (struct lifreq);
46147c478bd9Sstevel@tonic-gate 		iocb.ic_dp = (caddr_t)&lif_req;
46157c478bd9Sstevel@tonic-gate 		if (kstr_ioctl(vp6, I_STR, (intptr_t)&iocb) == 0) {
46167c478bd9Sstevel@tonic-gate 
46177c478bd9Sstevel@tonic-gate 		    atsp->ras_inet_type = AF_INET6;
46187c478bd9Sstevel@tonic-gate 		    bcopy(&lif_req.lifr_addr, &atsp->ras_sin6,
46197c478bd9Sstevel@tonic-gate 			    sizeof (struct sockaddr_in6));
46207c478bd9Sstevel@tonic-gate 		    ip_cnt++;
46217c478bd9Sstevel@tonic-gate 		}
46227c478bd9Sstevel@tonic-gate 	    }
46237c478bd9Sstevel@tonic-gate 	}
46247c478bd9Sstevel@tonic-gate 
46257c478bd9Sstevel@tonic-gate 	if (vp6 != NULL) {
46267c478bd9Sstevel@tonic-gate 	    (void) t_kclose(tiptr6, 0);
46277c478bd9Sstevel@tonic-gate 	    VN_RELE(kvp6);
46287c478bd9Sstevel@tonic-gate 	}
46297c478bd9Sstevel@tonic-gate 	if (vp != NULL) {
46307c478bd9Sstevel@tonic-gate 	    (void) t_kclose(tiptr, 0);
46317c478bd9Sstevel@tonic-gate 	    VN_RELE(kvp);
46327c478bd9Sstevel@tonic-gate 	}
46337c478bd9Sstevel@tonic-gate 
46347c478bd9Sstevel@tonic-gate 	if (ip_cnt == 0)
46357c478bd9Sstevel@tonic-gate 	    return (-1);
46367c478bd9Sstevel@tonic-gate 	else
46377c478bd9Sstevel@tonic-gate 	    return (0);
46387c478bd9Sstevel@tonic-gate }
46397c478bd9Sstevel@tonic-gate 
46407c478bd9Sstevel@tonic-gate char **
46417c478bd9Sstevel@tonic-gate get_ip_addrs(int *count)
46427c478bd9Sstevel@tonic-gate {
46437c478bd9Sstevel@tonic-gate 	TIUSER			*tiptr;
46447c478bd9Sstevel@tonic-gate 	vnode_t			*kvp;
46457c478bd9Sstevel@tonic-gate 	int			num_of_ifs;
46467c478bd9Sstevel@tonic-gate 	char			**addresses;
46477c478bd9Sstevel@tonic-gate 	int			return_code;
46487c478bd9Sstevel@tonic-gate 
46497c478bd9Sstevel@tonic-gate 	/*
46507c478bd9Sstevel@tonic-gate 	 * Open a device for doing down stream kernel ioctls
46517c478bd9Sstevel@tonic-gate 	 */
46527c478bd9Sstevel@tonic-gate 	return_code = lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW,
46537c478bd9Sstevel@tonic-gate 	    NULLVPP, &kvp);
46547c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
46557c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: lookupname failed\n");
46567c478bd9Sstevel@tonic-gate 		*count = -1;
46577c478bd9Sstevel@tonic-gate 		return (NULL);
46587c478bd9Sstevel@tonic-gate 	}
46597c478bd9Sstevel@tonic-gate 
46607c478bd9Sstevel@tonic-gate 	return_code = t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE,
46617c478bd9Sstevel@tonic-gate 	    &tiptr, CRED());
46627c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
46637c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: t_kopen failed\n");
46647c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
46657c478bd9Sstevel@tonic-gate 		*count = -1;
46667c478bd9Sstevel@tonic-gate 		return (NULL);
46677c478bd9Sstevel@tonic-gate 	}
46687c478bd9Sstevel@tonic-gate 
46697c478bd9Sstevel@tonic-gate 	/*
46707c478bd9Sstevel@tonic-gate 	 * Perform the first ioctl to get the number of interfaces
46717c478bd9Sstevel@tonic-gate 	 */
46727c478bd9Sstevel@tonic-gate 	return_code = get_interfaces(tiptr, &num_of_ifs);
46737c478bd9Sstevel@tonic-gate 	if (return_code != 0 || num_of_ifs == 0) {
46747c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: get_interfaces failed\n");
46757c478bd9Sstevel@tonic-gate 		(void) t_kclose(tiptr, 0);
46767c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
46777c478bd9Sstevel@tonic-gate 		*count = -1;
46787c478bd9Sstevel@tonic-gate 		return (NULL);
46797c478bd9Sstevel@tonic-gate 	}
46807c478bd9Sstevel@tonic-gate 
46817c478bd9Sstevel@tonic-gate 	/*
46827c478bd9Sstevel@tonic-gate 	 * Perform the second ioctl to get the address on each interface
46837c478bd9Sstevel@tonic-gate 	 * found.
46847c478bd9Sstevel@tonic-gate 	 */
46857c478bd9Sstevel@tonic-gate 	addresses = kmem_zalloc(num_of_ifs * sizeof (char *), KM_SLEEP);
46867c478bd9Sstevel@tonic-gate 	return_code = find_addrs(tiptr, addresses, num_of_ifs);
46877c478bd9Sstevel@tonic-gate 	if (return_code <= 0) {
46887c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_Ip_addrs: find_addrs failed\n");
46897c478bd9Sstevel@tonic-gate 		(void) t_kclose(tiptr, 0);
46907c478bd9Sstevel@tonic-gate 		kmem_free(addresses, num_of_ifs * sizeof (char *));
46917c478bd9Sstevel@tonic-gate 		VN_RELE(kvp);
46927c478bd9Sstevel@tonic-gate 		*count = -1;
46937c478bd9Sstevel@tonic-gate 		return (NULL);
46947c478bd9Sstevel@tonic-gate 	}
46957c478bd9Sstevel@tonic-gate 
46967c478bd9Sstevel@tonic-gate 	*count = return_code;
46977c478bd9Sstevel@tonic-gate 	VN_RELE(kvp);
46987c478bd9Sstevel@tonic-gate 	(void) t_kclose(tiptr, 0);
46997c478bd9Sstevel@tonic-gate 	return (addresses);
47007c478bd9Sstevel@tonic-gate }
47017c478bd9Sstevel@tonic-gate 
47027c478bd9Sstevel@tonic-gate int
47037c478bd9Sstevel@tonic-gate get_interfaces(TIUSER *tiptr, int *num)
47047c478bd9Sstevel@tonic-gate {
47057c478bd9Sstevel@tonic-gate 	struct lifnum		if_buf;
47067c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
47077c478bd9Sstevel@tonic-gate 	vnode_t			*vp;
47087c478bd9Sstevel@tonic-gate 	int			return_code;
47097c478bd9Sstevel@tonic-gate 
47107c478bd9Sstevel@tonic-gate 	/*
47117c478bd9Sstevel@tonic-gate 	 * Prep the number of interfaces request buffer for ioctl
47127c478bd9Sstevel@tonic-gate 	 */
47137c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&if_buf, sizeof (struct lifnum));
47147c478bd9Sstevel@tonic-gate 	if_buf.lifn_family = AF_UNSPEC;
47157c478bd9Sstevel@tonic-gate 	if_buf.lifn_flags = 0;
47167c478bd9Sstevel@tonic-gate 
47177c478bd9Sstevel@tonic-gate 	/*
47187c478bd9Sstevel@tonic-gate 	 * Prep the kernel ioctl buffer and send it down stream
47197c478bd9Sstevel@tonic-gate 	 */
47207c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&iocb, sizeof (struct strioctl));
47217c478bd9Sstevel@tonic-gate 	iocb.ic_cmd = SIOCGLIFNUM;
47227c478bd9Sstevel@tonic-gate 	iocb.ic_timout = 0;
47237c478bd9Sstevel@tonic-gate 	iocb.ic_len = sizeof (if_buf);
47247c478bd9Sstevel@tonic-gate 	iocb.ic_dp = (caddr_t)&if_buf;
47257c478bd9Sstevel@tonic-gate 
47267c478bd9Sstevel@tonic-gate 	vp = tiptr->fp->f_vnode;
47277c478bd9Sstevel@tonic-gate 	return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
47287c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
47297c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "get_interfaces: kstr_ioctl failed\n");
47307c478bd9Sstevel@tonic-gate 		*num = -1;
47317c478bd9Sstevel@tonic-gate 		return (-1);
47327c478bd9Sstevel@tonic-gate 	}
47337c478bd9Sstevel@tonic-gate 
47347c478bd9Sstevel@tonic-gate 	*num = if_buf.lifn_count;
47357c478bd9Sstevel@tonic-gate #ifdef	DEBUG
47367c478bd9Sstevel@tonic-gate 	if (rib_debug > 1)
47377c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "Number of interfaces detected: %d\n",
47387c478bd9Sstevel@tonic-gate 		    if_buf.lifn_count);
47397c478bd9Sstevel@tonic-gate #endif
47407c478bd9Sstevel@tonic-gate 	return (0);
47417c478bd9Sstevel@tonic-gate }
47427c478bd9Sstevel@tonic-gate 
47437c478bd9Sstevel@tonic-gate int
47447c478bd9Sstevel@tonic-gate find_addrs(TIUSER *tiptr, char **addrs, int num_ifs)
47457c478bd9Sstevel@tonic-gate {
47467c478bd9Sstevel@tonic-gate 	struct lifconf		lifc;
47477c478bd9Sstevel@tonic-gate 	struct lifreq		*if_data_buf;
47487c478bd9Sstevel@tonic-gate 	struct strioctl		iocb;
47497c478bd9Sstevel@tonic-gate 	caddr_t			request_buffer;
47507c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin4;
47517c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
47527c478bd9Sstevel@tonic-gate 	vnode_t			*vp;
47537c478bd9Sstevel@tonic-gate 	int			i, count, return_code;
47547c478bd9Sstevel@tonic-gate 
47557c478bd9Sstevel@tonic-gate 	/*
47567c478bd9Sstevel@tonic-gate 	 * Prep the buffer for requesting all interface's info
47577c478bd9Sstevel@tonic-gate 	 */
47587c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&lifc, sizeof (struct lifconf));
47597c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
47607c478bd9Sstevel@tonic-gate 	lifc.lifc_flags = 0;
47617c478bd9Sstevel@tonic-gate 	lifc.lifc_len = num_ifs * sizeof (struct lifreq);
47627c478bd9Sstevel@tonic-gate 
47637c478bd9Sstevel@tonic-gate 	request_buffer = kmem_zalloc(num_ifs * sizeof (struct lifreq),
47647c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
47657c478bd9Sstevel@tonic-gate 
47667c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = request_buffer;
47677c478bd9Sstevel@tonic-gate 
47687c478bd9Sstevel@tonic-gate 	/*
47697c478bd9Sstevel@tonic-gate 	 * Prep the kernel ioctl buffer and send it down stream
47707c478bd9Sstevel@tonic-gate 	 */
47717c478bd9Sstevel@tonic-gate 	(void) bzero((void *)&iocb, sizeof (struct strioctl));
47727c478bd9Sstevel@tonic-gate 	iocb.ic_cmd = SIOCGLIFCONF;
47737c478bd9Sstevel@tonic-gate 	iocb.ic_timout = 0;
47747c478bd9Sstevel@tonic-gate 	iocb.ic_len = sizeof (struct lifconf);
47757c478bd9Sstevel@tonic-gate 	iocb.ic_dp = (caddr_t)&lifc;
47767c478bd9Sstevel@tonic-gate 
47777c478bd9Sstevel@tonic-gate 	vp = tiptr->fp->f_vnode;
47787c478bd9Sstevel@tonic-gate 	return_code = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
47797c478bd9Sstevel@tonic-gate 	if (return_code != 0) {
47807c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "find_addrs: kstr_ioctl failed\n");
47817c478bd9Sstevel@tonic-gate 		kmem_free(request_buffer, num_ifs * sizeof (struct lifreq));
47827c478bd9Sstevel@tonic-gate 		return (-1);
47837c478bd9Sstevel@tonic-gate 	}
47847c478bd9Sstevel@tonic-gate 
47857c478bd9Sstevel@tonic-gate 	/*
47867c478bd9Sstevel@tonic-gate 	 * Extract addresses and fill them in the requested array
47877c478bd9Sstevel@tonic-gate 	 * IB_SVC_NAME_LEN is defined to be 64 so it  covers both IPv4 &
47887c478bd9Sstevel@tonic-gate 	 * IPv6. Here count is the number of IP addresses collected.
47897c478bd9Sstevel@tonic-gate 	 */
47907c478bd9Sstevel@tonic-gate 	if_data_buf = lifc.lifc_req;
47917c478bd9Sstevel@tonic-gate 	count = 0;
47927c478bd9Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--,
47937c478bd9Sstevel@tonic-gate 	if_data_buf++) {
47947c478bd9Sstevel@tonic-gate 		if (if_data_buf->lifr_addr.ss_family == AF_INET) {
47957c478bd9Sstevel@tonic-gate 			sin4 = (struct sockaddr_in *)&if_data_buf->lifr_addr;
47967c478bd9Sstevel@tonic-gate 			addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
47977c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET, &sin4->sin_addr,
47987c478bd9Sstevel@tonic-gate 			    addrs[count], IB_SVC_NAME_LEN);
47997c478bd9Sstevel@tonic-gate 			count ++;
48007c478bd9Sstevel@tonic-gate 		}
48017c478bd9Sstevel@tonic-gate 
48027c478bd9Sstevel@tonic-gate 		if (if_data_buf->lifr_addr.ss_family == AF_INET6) {
48037c478bd9Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&if_data_buf->lifr_addr;
48047c478bd9Sstevel@tonic-gate 			addrs[count] = kmem_zalloc(IB_SVC_NAME_LEN, KM_SLEEP);
48057c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET6, &sin6->sin6_addr,
48067c478bd9Sstevel@tonic-gate 			    addrs[count], IB_SVC_NAME_LEN);
48077c478bd9Sstevel@tonic-gate 			count ++;
48087c478bd9Sstevel@tonic-gate 		}
48097c478bd9Sstevel@tonic-gate 	}
48107c478bd9Sstevel@tonic-gate 
48117c478bd9Sstevel@tonic-gate 	kmem_free(request_buffer, num_ifs * sizeof (struct lifreq));
48127c478bd9Sstevel@tonic-gate 	return (count);
48137c478bd9Sstevel@tonic-gate }
48147c478bd9Sstevel@tonic-gate 
48157c478bd9Sstevel@tonic-gate /*
48167c478bd9Sstevel@tonic-gate  * Goes through all connections and closes the channel
48177c478bd9Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
48187c478bd9Sstevel@tonic-gate  * flushed.
48197c478bd9Sstevel@tonic-gate  */
48207c478bd9Sstevel@tonic-gate static void
48217c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
48227c478bd9Sstevel@tonic-gate {
48237c478bd9Sstevel@tonic-gate 	CONN 		*conn;
48247c478bd9Sstevel@tonic-gate 	rib_qp_t	*qp;
48257c478bd9Sstevel@tonic-gate 
48267c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
48277c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
48287c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
48297c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
48307c478bd9Sstevel@tonic-gate 		qp = ctoqp(conn);
48317c478bd9Sstevel@tonic-gate 		if (conn->c_state & C_CONNECTED) {
48327c478bd9Sstevel@tonic-gate 			/*
48337c478bd9Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
48347c478bd9Sstevel@tonic-gate 			 * Call ibt_close_rc_channel in nonblocking mode
48357c478bd9Sstevel@tonic-gate 			 * with no callbacks.
48367c478bd9Sstevel@tonic-gate 			 */
48377c478bd9Sstevel@tonic-gate 			conn->c_state = C_ERROR;
48387c478bd9Sstevel@tonic-gate 			(void) ibt_close_rc_channel(qp->qp_hdl,
48397c478bd9Sstevel@tonic-gate 				IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 0);
48407c478bd9Sstevel@tonic-gate 			(void) ibt_free_channel(qp->qp_hdl);
48417c478bd9Sstevel@tonic-gate 			qp->qp_hdl = NULL;
48427c478bd9Sstevel@tonic-gate 		} else {
48437c478bd9Sstevel@tonic-gate 			if (conn->c_state == C_ERROR &&
48447c478bd9Sstevel@tonic-gate 				qp->qp_hdl != NULL) {
48457c478bd9Sstevel@tonic-gate 				/*
48467c478bd9Sstevel@tonic-gate 				 * Connection in ERROR state but
48477c478bd9Sstevel@tonic-gate 				 * channel is not yet freed.
48487c478bd9Sstevel@tonic-gate 				 */
48497c478bd9Sstevel@tonic-gate 				(void) ibt_close_rc_channel(qp->qp_hdl,
48507c478bd9Sstevel@tonic-gate 					IBT_NOCALLBACKS, NULL, 0, NULL,
48517c478bd9Sstevel@tonic-gate 					NULL, 0);
48527c478bd9Sstevel@tonic-gate 				(void) ibt_free_channel(qp->qp_hdl);
48537c478bd9Sstevel@tonic-gate 				qp->qp_hdl = NULL;
48547c478bd9Sstevel@tonic-gate 			}
48557c478bd9Sstevel@tonic-gate 		}
48567c478bd9Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
48577c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
48587c478bd9Sstevel@tonic-gate 	}
48597c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
48607c478bd9Sstevel@tonic-gate }
48617c478bd9Sstevel@tonic-gate 
48627c478bd9Sstevel@tonic-gate /*
48637c478bd9Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
48647c478bd9Sstevel@tonic-gate  */
48657c478bd9Sstevel@tonic-gate static void
48667c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
48677c478bd9Sstevel@tonic-gate {
48687c478bd9Sstevel@tonic-gate 	CONN 		*conn;
48697c478bd9Sstevel@tonic-gate 
48707c478bd9Sstevel@tonic-gate top:
48717c478bd9Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
48727c478bd9Sstevel@tonic-gate 	conn = connlist->conn_hd;
48737c478bd9Sstevel@tonic-gate 	while (conn != NULL) {
48747c478bd9Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
48757c478bd9Sstevel@tonic-gate 
48767c478bd9Sstevel@tonic-gate 		/*
48777c478bd9Sstevel@tonic-gate 		 * At this point connection is either in ERROR
48787c478bd9Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
48797c478bd9Sstevel@tonic-gate 		 * then some other thread is culling that connection.
48807c478bd9Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
48817c478bd9Sstevel@tonic-gate 		 */
48827c478bd9Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
48837c478bd9Sstevel@tonic-gate 			conn->c_state != C_DISCONN_PEND) {
48847c478bd9Sstevel@tonic-gate 			/*
48857c478bd9Sstevel@tonic-gate 			 * Cull the connection
48867c478bd9Sstevel@tonic-gate 			 */
48877c478bd9Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
48887c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
48897c478bd9Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
48907c478bd9Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
48917c478bd9Sstevel@tonic-gate 			goto top;
48927c478bd9Sstevel@tonic-gate 		} else {
48937c478bd9Sstevel@tonic-gate 			/*
48947c478bd9Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
48957c478bd9Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
48967c478bd9Sstevel@tonic-gate 			 */
48977c478bd9Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
48987c478bd9Sstevel@tonic-gate 		}
48997c478bd9Sstevel@tonic-gate 		conn = conn->c_next;
49007c478bd9Sstevel@tonic-gate 	}
49017c478bd9Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
49027c478bd9Sstevel@tonic-gate 
49037c478bd9Sstevel@tonic-gate 	/*
49047c478bd9Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
49057c478bd9Sstevel@tonic-gate 	 */
49067c478bd9Sstevel@tonic-gate }
49077c478bd9Sstevel@tonic-gate 
49087c478bd9Sstevel@tonic-gate /*
49097c478bd9Sstevel@tonic-gate  * Cleans and closes up all uses of the HCA
49107c478bd9Sstevel@tonic-gate  */
49117c478bd9Sstevel@tonic-gate static void
49127c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca)
49137c478bd9Sstevel@tonic-gate {
49147c478bd9Sstevel@tonic-gate 
49157c478bd9Sstevel@tonic-gate 	/*
49167c478bd9Sstevel@tonic-gate 	 * Stop all services on the HCA
49177c478bd9Sstevel@tonic-gate 	 * Go through cl_conn_list and close all rc_channels
49187c478bd9Sstevel@tonic-gate 	 * Go through svr_conn_list and close all rc_channels
49197c478bd9Sstevel@tonic-gate 	 * Free connections whose c_ref has dropped to 0
49207c478bd9Sstevel@tonic-gate 	 * Destroy all CQs
49217c478bd9Sstevel@tonic-gate 	 * Deregister and released all buffer pool memory after all
49227c478bd9Sstevel@tonic-gate 	 * connections are destroyed
49237c478bd9Sstevel@tonic-gate 	 * Free the protection domain
49247c478bd9Sstevel@tonic-gate 	 * ibt_close_hca()
49257c478bd9Sstevel@tonic-gate 	 */
49267c478bd9Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_WRITER);
49277c478bd9Sstevel@tonic-gate 	if (hca->state == HCA_DETACHED) {
49287c478bd9Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
49297c478bd9Sstevel@tonic-gate 		return;
49307c478bd9Sstevel@tonic-gate 	}
49317c478bd9Sstevel@tonic-gate 
49327c478bd9Sstevel@tonic-gate 	hca->state = HCA_DETACHED;
49337c478bd9Sstevel@tonic-gate 	rib_stat->nhca_inited--;
49347c478bd9Sstevel@tonic-gate 
49357c478bd9Sstevel@tonic-gate 	rib_stop_services(hca);
49367c478bd9Sstevel@tonic-gate 	rib_deregister_ats();
49377c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
49387c478bd9Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
49397c478bd9Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
49407c478bd9Sstevel@tonic-gate 
49417c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->cl_conn_list);
49427c478bd9Sstevel@tonic-gate 	rib_purge_connlist(&hca->srv_conn_list);
49437c478bd9Sstevel@tonic-gate 
49447c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
49457c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
49467c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
49477c478bd9Sstevel@tonic-gate 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
49487c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
49497c478bd9Sstevel@tonic-gate 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
49507c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
49517c478bd9Sstevel@tonic-gate 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
49527c478bd9Sstevel@tonic-gate 
49537c478bd9Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
49547c478bd9Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
49557c478bd9Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
49567c478bd9Sstevel@tonic-gate 		hca->cl_conn_list.conn_hd == NULL) {
49577c478bd9Sstevel@tonic-gate 		/*
49587c478bd9Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
49597c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
49607c478bd9Sstevel@tonic-gate 		 */
49617c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
49627c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
49637c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
49647c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
49657c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
49667c478bd9Sstevel@tonic-gate 	}
49677c478bd9Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
49687c478bd9Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
49697c478bd9Sstevel@tonic-gate 
49707c478bd9Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
49717c478bd9Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
49727c478bd9Sstevel@tonic-gate 		while (hca->inuse)
49737c478bd9Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
49747c478bd9Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
49757c478bd9Sstevel@tonic-gate 		/*
49767c478bd9Sstevel@tonic-gate 		 * conn_lists are now NULL, so destroy
49777c478bd9Sstevel@tonic-gate 		 * buffers, close hca and be done.
49787c478bd9Sstevel@tonic-gate 		 */
49797c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, RECV_BUFFER);
49807c478bd9Sstevel@tonic-gate 		rib_rbufpool_destroy(hca, SEND_BUFFER);
49817c478bd9Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
49827c478bd9Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
49837c478bd9Sstevel@tonic-gate 		hca->hca_hdl = NULL;
49847c478bd9Sstevel@tonic-gate 	}
49857c478bd9Sstevel@tonic-gate }
4986