17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50a701b1eSRobert Gordon * Common Development and Distribution License (the "License"). 60a701b1eSRobert Gordon * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e11c3f44Smeem * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 260a701b1eSRobert Gordon /* 270a701b1eSRobert Gordon * Copyright (c) 2007, The Ohio State University. All rights reserved. 280a701b1eSRobert Gordon * 290a701b1eSRobert Gordon * Portions of this source code is developed by the team members of 300a701b1eSRobert Gordon * The Ohio State University's Network-Based Computing Laboratory (NBCL), 310a701b1eSRobert Gordon * headed by Professor Dhabaleswar K. (DK) Panda. 320a701b1eSRobert Gordon * 330a701b1eSRobert Gordon * Acknowledgements to contributions from developors: 340a701b1eSRobert Gordon * Ranjit Noronha: noronha@cse.ohio-state.edu 350a701b1eSRobert Gordon * Lei Chai : chail@cse.ohio-state.edu 360a701b1eSRobert Gordon * Weikuan Yu : yuw@cse.ohio-state.edu 370a701b1eSRobert Gordon * 380a701b1eSRobert Gordon */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate /* 417c478bd9Sstevel@tonic-gate * The rpcib plugin. Implements the interface for RDMATF's 427c478bd9Sstevel@tonic-gate * interaction with IBTF. 437c478bd9Sstevel@tonic-gate */ 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <sys/param.h> 467c478bd9Sstevel@tonic-gate #include <sys/types.h> 477c478bd9Sstevel@tonic-gate #include <sys/user.h> 487c478bd9Sstevel@tonic-gate #include <sys/systm.h> 497c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 507c478bd9Sstevel@tonic-gate #include <sys/proc.h> 517c478bd9Sstevel@tonic-gate #include <sys/socket.h> 527c478bd9Sstevel@tonic-gate #include <sys/file.h> 537c478bd9Sstevel@tonic-gate #include <sys/stream.h> 547c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 557c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 567c478bd9Sstevel@tonic-gate #include <sys/errno.h> 577c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 587c478bd9Sstevel@tonic-gate #include <sys/debug.h> 597c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 607c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 617c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 627c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 637c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 647c478bd9Sstevel@tonic-gate #include <sys/time.h> 657c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 667c478bd9Sstevel@tonic-gate #include <sys/callb.h> 677c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 687c478bd9Sstevel@tonic-gate #include <sys/sunndi.h> 690a701b1eSRobert Gordon #include <sys/sdt.h> 707c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h> 717c478bd9Sstevel@tonic-gate #include <rpc/rpc.h> 727c478bd9Sstevel@tonic-gate #include <rpc/ib.h> 737c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 747c478bd9Sstevel@tonic-gate #include <sys/kstr.h> 757c478bd9Sstevel@tonic-gate #include <sys/sockio.h> 767c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 777c478bd9Sstevel@tonic-gate #include <sys/tiuser.h> 787c478bd9Sstevel@tonic-gate #include <net/if.h> 79e11c3f44Smeem #include <net/if_types.h> 807c478bd9Sstevel@tonic-gate #include <sys/cred.h> 810a701b1eSRobert Gordon #include <rpc/rpc_rdma.h> 820a701b1eSRobert Gordon #include <nfs/nfs.h> 830a701b1eSRobert Gordon #include <sys/atomic.h> 840a701b1eSRobert Gordon 85f837ee4aSSiddheshwar Mahesh #define NFS_RDMA_PORT 20049 86f837ee4aSSiddheshwar Mahesh 877c478bd9Sstevel@tonic-gate 88e11c3f44Smeem /* 89214ae7d0SSiddheshwar Mahesh * Convenience structures for connection management 90e11c3f44Smeem */ 91e11c3f44Smeem typedef struct rpcib_ipaddrs { 92e11c3f44Smeem void *ri_list; /* pointer to list of addresses */ 93e11c3f44Smeem uint_t ri_count; /* number of addresses in list */ 94e11c3f44Smeem uint_t ri_size; /* size of ri_list in bytes */ 95e11c3f44Smeem } rpcib_ipaddrs_t; 967c478bd9Sstevel@tonic-gate 97214ae7d0SSiddheshwar Mahesh 98214ae7d0SSiddheshwar Mahesh typedef struct rpcib_ping { 99214ae7d0SSiddheshwar Mahesh rib_hca_t *hca; 100214ae7d0SSiddheshwar Mahesh ibt_path_info_t path; 101214ae7d0SSiddheshwar Mahesh ibt_ip_addr_t srcip; 102214ae7d0SSiddheshwar Mahesh ibt_ip_addr_t dstip; 103214ae7d0SSiddheshwar Mahesh } rpcib_ping_t; 104214ae7d0SSiddheshwar Mahesh 1057c478bd9Sstevel@tonic-gate /* 1067c478bd9Sstevel@tonic-gate * Prototype declarations for driver ops 1077c478bd9Sstevel@tonic-gate */ 1087c478bd9Sstevel@tonic-gate static int rpcib_attach(dev_info_t *, ddi_attach_cmd_t); 1097c478bd9Sstevel@tonic-gate static int rpcib_getinfo(dev_info_t *, ddi_info_cmd_t, 1107c478bd9Sstevel@tonic-gate void *, void **); 1117c478bd9Sstevel@tonic-gate static int rpcib_detach(dev_info_t *, ddi_detach_cmd_t); 112e11c3f44Smeem static boolean_t rpcib_rdma_capable_interface(struct lifreq *); 113e11c3f44Smeem static int rpcib_do_ip_ioctl(int, int, void *); 114e11c3f44Smeem static boolean_t rpcib_get_ib_addresses(rpcib_ipaddrs_t *, rpcib_ipaddrs_t *); 1150a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *, int); 1160a701b1eSRobert Gordon static void rib_force_cleanup(void *); 1177c478bd9Sstevel@tonic-gate 1180a701b1eSRobert Gordon struct { 1190a701b1eSRobert Gordon kstat_named_t cache_limit; 1200a701b1eSRobert Gordon kstat_named_t cache_allocation; 1210a701b1eSRobert Gordon kstat_named_t cache_hits; 1220a701b1eSRobert Gordon kstat_named_t cache_misses; 1230a701b1eSRobert Gordon kstat_named_t cache_misses_above_the_limit; 1240a701b1eSRobert Gordon } rpcib_kstat = { 1250a701b1eSRobert Gordon {"cache_limit", KSTAT_DATA_UINT64 }, 1260a701b1eSRobert Gordon {"cache_allocation", KSTAT_DATA_UINT64 }, 1270a701b1eSRobert Gordon {"cache_hits", KSTAT_DATA_UINT64 }, 1280a701b1eSRobert Gordon {"cache_misses", KSTAT_DATA_UINT64 }, 1290a701b1eSRobert Gordon {"cache_misses_above_the_limit", KSTAT_DATA_UINT64 }, 1300a701b1eSRobert Gordon }; 1317c478bd9Sstevel@tonic-gate 1327c478bd9Sstevel@tonic-gate /* rpcib cb_ops */ 1337c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = { 1347c478bd9Sstevel@tonic-gate nulldev, /* open */ 1357c478bd9Sstevel@tonic-gate nulldev, /* close */ 1367c478bd9Sstevel@tonic-gate nodev, /* strategy */ 1377c478bd9Sstevel@tonic-gate nodev, /* print */ 1387c478bd9Sstevel@tonic-gate nodev, /* dump */ 1397c478bd9Sstevel@tonic-gate nodev, /* read */ 1407c478bd9Sstevel@tonic-gate nodev, /* write */ 1417c478bd9Sstevel@tonic-gate nodev, /* ioctl */ 1427c478bd9Sstevel@tonic-gate nodev, /* devmap */ 1437c478bd9Sstevel@tonic-gate nodev, /* mmap */ 1447c478bd9Sstevel@tonic-gate nodev, /* segmap */ 1457c478bd9Sstevel@tonic-gate nochpoll, /* poll */ 1467c478bd9Sstevel@tonic-gate ddi_prop_op, /* prop_op */ 1477c478bd9Sstevel@tonic-gate NULL, /* stream */ 1487c478bd9Sstevel@tonic-gate D_MP, /* cb_flag */ 1497c478bd9Sstevel@tonic-gate CB_REV, /* rev */ 1507c478bd9Sstevel@tonic-gate nodev, /* int (*cb_aread)() */ 1517c478bd9Sstevel@tonic-gate nodev /* int (*cb_awrite)() */ 1527c478bd9Sstevel@tonic-gate }; 1537c478bd9Sstevel@tonic-gate 1547c478bd9Sstevel@tonic-gate /* 1557c478bd9Sstevel@tonic-gate * Device options 1567c478bd9Sstevel@tonic-gate */ 1577c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = { 1587c478bd9Sstevel@tonic-gate DEVO_REV, /* devo_rev, */ 1597c478bd9Sstevel@tonic-gate 0, /* refcnt */ 1607c478bd9Sstevel@tonic-gate rpcib_getinfo, /* info */ 1617c478bd9Sstevel@tonic-gate nulldev, /* identify */ 1627c478bd9Sstevel@tonic-gate nulldev, /* probe */ 1637c478bd9Sstevel@tonic-gate rpcib_attach, /* attach */ 1647c478bd9Sstevel@tonic-gate rpcib_detach, /* detach */ 1657c478bd9Sstevel@tonic-gate nodev, /* reset */ 1667c478bd9Sstevel@tonic-gate &rpcib_cbops, /* driver ops - devctl interfaces */ 1677c478bd9Sstevel@tonic-gate NULL, /* bus operations */ 16819397407SSherry Moore NULL, /* power */ 16919397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 1707c478bd9Sstevel@tonic-gate }; 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate /* 1737c478bd9Sstevel@tonic-gate * Module linkage information. 1747c478bd9Sstevel@tonic-gate */ 1757c478bd9Sstevel@tonic-gate 1767c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = { 1777c478bd9Sstevel@tonic-gate &mod_driverops, /* Driver module */ 17819397407SSherry Moore "RPCIB plugin driver", /* Driver name and version */ 1797c478bd9Sstevel@tonic-gate &rpcib_ops, /* Driver ops */ 1807c478bd9Sstevel@tonic-gate }; 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = { 1837c478bd9Sstevel@tonic-gate MODREV_1, 1847c478bd9Sstevel@tonic-gate (void *)&rib_modldrv, 1857c478bd9Sstevel@tonic-gate NULL 1867c478bd9Sstevel@tonic-gate }; 1877c478bd9Sstevel@tonic-gate 1880a701b1eSRobert Gordon typedef struct rib_lrc_entry { 1890a701b1eSRobert Gordon struct rib_lrc_entry *forw; 1900a701b1eSRobert Gordon struct rib_lrc_entry *back; 1910a701b1eSRobert Gordon char *lrc_buf; 1920a701b1eSRobert Gordon 1930a701b1eSRobert Gordon uint32_t lrc_len; 1940a701b1eSRobert Gordon void *avl_node; 1950a701b1eSRobert Gordon bool_t registered; 1960a701b1eSRobert Gordon 1970a701b1eSRobert Gordon struct mrc lrc_mhandle; 1980a701b1eSRobert Gordon bool_t lrc_on_freed_list; 1990a701b1eSRobert Gordon } rib_lrc_entry_t; 2000a701b1eSRobert Gordon 2010a701b1eSRobert Gordon typedef struct cache_struct { 2020a701b1eSRobert Gordon rib_lrc_entry_t r; 2030a701b1eSRobert Gordon uint32_t len; 2040a701b1eSRobert Gordon uint32_t elements; 2050a701b1eSRobert Gordon kmutex_t node_lock; 2060a701b1eSRobert Gordon avl_node_t avl_link; 2070a701b1eSRobert Gordon } cache_avl_struct_t; 2080a701b1eSRobert Gordon 2090a701b1eSRobert Gordon static uint64_t rib_total_buffers = 0; 2100a701b1eSRobert Gordon uint64_t cache_limit = 100 * 1024 * 1024; 2110a701b1eSRobert Gordon static volatile uint64_t cache_allocation = 0; 2120a701b1eSRobert Gordon static uint64_t cache_watermark = 80 * 1024 * 1024; 2130a701b1eSRobert Gordon static uint64_t cache_hits = 0; 2140a701b1eSRobert Gordon static uint64_t cache_misses = 0; 2150a701b1eSRobert Gordon static uint64_t cache_cold_misses = 0; 2160a701b1eSRobert Gordon static uint64_t cache_hot_misses = 0; 2170a701b1eSRobert Gordon static uint64_t cache_misses_above_the_limit = 0; 2180a701b1eSRobert Gordon static bool_t stats_enabled = FALSE; 2190a701b1eSRobert Gordon 2200a701b1eSRobert Gordon static uint64_t max_unsignaled_rws = 5; 221f837ee4aSSiddheshwar Mahesh int nfs_rdma_port = NFS_RDMA_PORT; 2220a701b1eSRobert Gordon 2237c478bd9Sstevel@tonic-gate /* 2247c478bd9Sstevel@tonic-gate * rib_stat: private data pointer used when registering 2257c478bd9Sstevel@tonic-gate * with the IBTF. It is returned to the consumer 2267c478bd9Sstevel@tonic-gate * in all callbacks. 2277c478bd9Sstevel@tonic-gate */ 2287c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL; 2297c478bd9Sstevel@tonic-gate 2300a701b1eSRobert Gordon #define RNR_RETRIES IBT_RNR_RETRY_1 2317c478bd9Sstevel@tonic-gate #define MAX_PORTS 2 232*065714dcSSiddheshwar Mahesh #define RDMA_DUMMY_WRID 0x4D3A1D4D3A1D 233*065714dcSSiddheshwar Mahesh #define RDMA_CONN_REAP_RETRY 10 /* 10 secs */ 2347c478bd9Sstevel@tonic-gate 2350a701b1eSRobert Gordon int preposted_rbufs = RDMA_BUFS_GRANT; 2367c478bd9Sstevel@tonic-gate int send_threshold = 1; 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate /* 239*065714dcSSiddheshwar Mahesh * Old cards with Tavor driver have limited memory footprint 240*065714dcSSiddheshwar Mahesh * when booted in 32bit. The rib_max_rbufs tunable can be 241*065714dcSSiddheshwar Mahesh * tuned for more buffers if needed. 242*065714dcSSiddheshwar Mahesh */ 243*065714dcSSiddheshwar Mahesh 244*065714dcSSiddheshwar Mahesh #if !defined(_ELF64) && !defined(__sparc) 245*065714dcSSiddheshwar Mahesh int rib_max_rbufs = MAX_BUFS; 246*065714dcSSiddheshwar Mahesh #else 247*065714dcSSiddheshwar Mahesh int rib_max_rbufs = 10 * MAX_BUFS; 248*065714dcSSiddheshwar Mahesh #endif /* !(_ELF64) && !(__sparc) */ 249*065714dcSSiddheshwar Mahesh 250*065714dcSSiddheshwar Mahesh int rib_conn_timeout = 60 * 12; /* 12 minutes */ 251*065714dcSSiddheshwar Mahesh 252*065714dcSSiddheshwar Mahesh /* 2537c478bd9Sstevel@tonic-gate * State of the plugin. 2547c478bd9Sstevel@tonic-gate * ACCEPT = accepting new connections and requests. 2557c478bd9Sstevel@tonic-gate * NO_ACCEPT = not accepting new connection and requests. 2567c478bd9Sstevel@tonic-gate * This should eventually move to rpcib_state_t structure, since this 2577c478bd9Sstevel@tonic-gate * will tell in which state the plugin is for a particular type of service 2587c478bd9Sstevel@tonic-gate * like NFS, NLM or v4 Callback deamon. The plugin might be in accept 2597c478bd9Sstevel@tonic-gate * state for one and in no_accept state for the other. 2607c478bd9Sstevel@tonic-gate */ 2617c478bd9Sstevel@tonic-gate int plugin_state; 2627c478bd9Sstevel@tonic-gate kmutex_t plugin_state_lock; 2637c478bd9Sstevel@tonic-gate 2640a701b1eSRobert Gordon ldi_ident_t rpcib_li; 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate /* 2677c478bd9Sstevel@tonic-gate * RPCIB RDMATF operations 2687c478bd9Sstevel@tonic-gate */ 2697c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle); 2707c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn); 2717c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd); 2727c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd); 2730a701b1eSRobert Gordon static rdma_stat rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, 2740a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle); 2757c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf, 2767c478bd9Sstevel@tonic-gate struct mrc buf_handle); 2770a701b1eSRobert Gordon static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, 2780a701b1eSRobert Gordon caddr_t buf, uint_t buflen, struct mrc *buf_handle); 2790a701b1eSRobert Gordon static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, 2800a701b1eSRobert Gordon struct mrc buf_handle); 2810a701b1eSRobert Gordon static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, 2820a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, 2830a701b1eSRobert Gordon void *lrc); 2847c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf, 2850a701b1eSRobert Gordon struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *); 2867c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, 2877c478bd9Sstevel@tonic-gate caddr_t buf, int len, int cpu); 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf); 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf); 2927c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf); 2957c478bd9Sstevel@tonic-gate 2967c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid); 2977c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid); 2987c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid); 2990a701b1eSRobert Gordon static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid); 3007c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl); 3017c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid); 3027c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait); 3037c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait); 304214ae7d0SSiddheshwar Mahesh static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rpcib_ping_t *); 3057c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **); 3067c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn); 3077c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info); 3080a701b1eSRobert Gordon 3090a701b1eSRobert Gordon static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len); 3100a701b1eSRobert Gordon static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf); 3110a701b1eSRobert Gordon static void rib_destroy_cache(rib_hca_t *hca); 3120a701b1eSRobert Gordon static void rib_server_side_cache_reclaim(void *argp); 3130a701b1eSRobert Gordon static int avl_compare(const void *t1, const void *t2); 3140a701b1eSRobert Gordon 3157c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *); 3160a701b1eSRobert Gordon static void rib_close_channels(rib_conn_list_t *); 317*065714dcSSiddheshwar Mahesh static void rib_conn_close(void *); 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate /* 3207c478bd9Sstevel@tonic-gate * RPCIB addressing operations 3217c478bd9Sstevel@tonic-gate */ 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate /* 3247c478bd9Sstevel@tonic-gate * RDMA operations the RPCIB module exports 3257c478bd9Sstevel@tonic-gate */ 3267c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = { 3277c478bd9Sstevel@tonic-gate rib_reachable, 3287c478bd9Sstevel@tonic-gate rib_conn_get, 3297c478bd9Sstevel@tonic-gate rib_conn_release, 3307c478bd9Sstevel@tonic-gate rib_listen, 3317c478bd9Sstevel@tonic-gate rib_listen_stop, 3327c478bd9Sstevel@tonic-gate rib_registermem, 3337c478bd9Sstevel@tonic-gate rib_deregistermem, 3347c478bd9Sstevel@tonic-gate rib_registermemsync, 3357c478bd9Sstevel@tonic-gate rib_deregistermemsync, 3367c478bd9Sstevel@tonic-gate rib_syncmem, 3377c478bd9Sstevel@tonic-gate rib_reg_buf_alloc, 3387c478bd9Sstevel@tonic-gate rib_reg_buf_free, 3397c478bd9Sstevel@tonic-gate rib_send, 3407c478bd9Sstevel@tonic-gate rib_send_resp, 3417c478bd9Sstevel@tonic-gate rib_post_resp, 3420a701b1eSRobert Gordon rib_post_resp_remove, 3437c478bd9Sstevel@tonic-gate rib_post_recv, 3447c478bd9Sstevel@tonic-gate rib_recv, 3457c478bd9Sstevel@tonic-gate rib_read, 3467c478bd9Sstevel@tonic-gate rib_write, 3470a701b1eSRobert Gordon rib_getinfo, 3487c478bd9Sstevel@tonic-gate }; 3497c478bd9Sstevel@tonic-gate 3507c478bd9Sstevel@tonic-gate /* 3517c478bd9Sstevel@tonic-gate * RDMATF RPCIB plugin details 3527c478bd9Sstevel@tonic-gate */ 3537c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = { 3547c478bd9Sstevel@tonic-gate "ibtf", /* api name */ 3557c478bd9Sstevel@tonic-gate RDMATF_VERS_1, 3567c478bd9Sstevel@tonic-gate 0, 3577c478bd9Sstevel@tonic-gate &rib_ops, /* rdma op vector for ibtf */ 3587c478bd9Sstevel@tonic-gate }; 3597c478bd9Sstevel@tonic-gate 3607c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *); 3617c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int); 3627c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *); 3637c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *); 3647c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *); 3657c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *); 3667c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num); 3670a701b1eSRobert Gordon static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t, 3680a701b1eSRobert Gordon ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *); 3690a701b1eSRobert Gordon static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t, 3700a701b1eSRobert Gordon ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t); 371214ae7d0SSiddheshwar Mahesh static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, rpcib_ping_t *); 3727c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *, 3737c478bd9Sstevel@tonic-gate rib_qp_t **); 3747c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t, 3757c478bd9Sstevel@tonic-gate rib_qp_t **); 3767c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *); 3777c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *); 3787c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *); 3797c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid); 3807c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd); 3817c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *); 3827c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid); 3837c478bd9Sstevel@tonic-gate 3847c478bd9Sstevel@tonic-gate static void rib_async_handler(void *, 3857c478bd9Sstevel@tonic-gate ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *); 3867c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *); 3877c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *); 3887c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *); 3897c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t); 3907c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *); 3917c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *); 3927c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *); 393*065714dcSSiddheshwar Mahesh static void rib_close_a_channel(CONN *); 394*065714dcSSiddheshwar Mahesh static void rib_send_hold(rib_qp_t *); 395*065714dcSSiddheshwar Mahesh static void rib_send_rele(rib_qp_t *); 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate /* 3987c478bd9Sstevel@tonic-gate * Registration with IBTF as a consumer 3997c478bd9Sstevel@tonic-gate */ 4007c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = { 40103494a98SBill Taylor IBTI_V_CURR, 4027c478bd9Sstevel@tonic-gate IBT_GENERIC, 4037c478bd9Sstevel@tonic-gate rib_async_handler, /* async event handler */ 4047c478bd9Sstevel@tonic-gate NULL, /* Memory Region Handler */ 4057c478bd9Sstevel@tonic-gate "nfs/ib" 4067c478bd9Sstevel@tonic-gate }; 4077c478bd9Sstevel@tonic-gate 4087c478bd9Sstevel@tonic-gate /* 4097c478bd9Sstevel@tonic-gate * Global strucuture 4107c478bd9Sstevel@tonic-gate */ 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate typedef struct rpcib_s { 4137c478bd9Sstevel@tonic-gate dev_info_t *rpcib_dip; 4147c478bd9Sstevel@tonic-gate kmutex_t rpcib_mutex; 4157c478bd9Sstevel@tonic-gate } rpcib_t; 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate rpcib_t rpcib; 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate /* 4207c478bd9Sstevel@tonic-gate * /etc/system controlled variable to control 4217c478bd9Sstevel@tonic-gate * debugging in rpcib kernel module. 4227c478bd9Sstevel@tonic-gate * Set it to values greater that 1 to control 4237c478bd9Sstevel@tonic-gate * the amount of debugging messages required. 4247c478bd9Sstevel@tonic-gate */ 4257c478bd9Sstevel@tonic-gate int rib_debug = 0; 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate int 4287c478bd9Sstevel@tonic-gate _init(void) 4297c478bd9Sstevel@tonic-gate { 4307c478bd9Sstevel@tonic-gate int error; 4317c478bd9Sstevel@tonic-gate 4327c478bd9Sstevel@tonic-gate error = mod_install((struct modlinkage *)&rib_modlinkage); 4337c478bd9Sstevel@tonic-gate if (error != 0) { 4347c478bd9Sstevel@tonic-gate /* 4357c478bd9Sstevel@tonic-gate * Could not load module 4367c478bd9Sstevel@tonic-gate */ 4377c478bd9Sstevel@tonic-gate return (error); 4387c478bd9Sstevel@tonic-gate } 4397c478bd9Sstevel@tonic-gate mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL); 4407c478bd9Sstevel@tonic-gate return (0); 4417c478bd9Sstevel@tonic-gate } 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate int 4447c478bd9Sstevel@tonic-gate _fini() 4457c478bd9Sstevel@tonic-gate { 4467c478bd9Sstevel@tonic-gate int status; 4477c478bd9Sstevel@tonic-gate 4487c478bd9Sstevel@tonic-gate /* 4497c478bd9Sstevel@tonic-gate * Remove module 4507c478bd9Sstevel@tonic-gate */ 4517c478bd9Sstevel@tonic-gate if ((status = mod_remove(&rib_modlinkage)) != 0) { 4527c478bd9Sstevel@tonic-gate return (status); 4537c478bd9Sstevel@tonic-gate } 4547c478bd9Sstevel@tonic-gate mutex_destroy(&plugin_state_lock); 4557c478bd9Sstevel@tonic-gate return (0); 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate int 4597c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 4607c478bd9Sstevel@tonic-gate { 4617c478bd9Sstevel@tonic-gate return (mod_info(&rib_modlinkage, modinfop)); 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate /* 4657c478bd9Sstevel@tonic-gate * rpcib_getinfo() 4667c478bd9Sstevel@tonic-gate * Given the device number, return the devinfo pointer or the 4677c478bd9Sstevel@tonic-gate * instance number. 4687c478bd9Sstevel@tonic-gate * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach. 4697c478bd9Sstevel@tonic-gate */ 4707c478bd9Sstevel@tonic-gate 4717c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4727c478bd9Sstevel@tonic-gate static int 4737c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 4747c478bd9Sstevel@tonic-gate { 4757c478bd9Sstevel@tonic-gate int ret = DDI_SUCCESS; 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate switch (cmd) { 4787c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 4797c478bd9Sstevel@tonic-gate if (rpcib.rpcib_dip != NULL) 4807c478bd9Sstevel@tonic-gate *result = rpcib.rpcib_dip; 4817c478bd9Sstevel@tonic-gate else { 4827c478bd9Sstevel@tonic-gate *result = NULL; 4837c478bd9Sstevel@tonic-gate ret = DDI_FAILURE; 4847c478bd9Sstevel@tonic-gate } 4857c478bd9Sstevel@tonic-gate break; 4867c478bd9Sstevel@tonic-gate 4877c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 4887c478bd9Sstevel@tonic-gate *result = NULL; 4897c478bd9Sstevel@tonic-gate break; 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate default: 4927c478bd9Sstevel@tonic-gate ret = DDI_FAILURE; 4937c478bd9Sstevel@tonic-gate } 4947c478bd9Sstevel@tonic-gate return (ret); 4957c478bd9Sstevel@tonic-gate } 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate static int 4987c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4997c478bd9Sstevel@tonic-gate { 5007c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 5017c478bd9Sstevel@tonic-gate rdma_stat r_status; 5027c478bd9Sstevel@tonic-gate 5037c478bd9Sstevel@tonic-gate switch (cmd) { 5047c478bd9Sstevel@tonic-gate case DDI_ATTACH: 5057c478bd9Sstevel@tonic-gate break; 5067c478bd9Sstevel@tonic-gate case DDI_RESUME: 5077c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5087c478bd9Sstevel@tonic-gate default: 5097c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5107c478bd9Sstevel@tonic-gate } 5117c478bd9Sstevel@tonic-gate 5127c478bd9Sstevel@tonic-gate mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL); 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate mutex_enter(&rpcib.rpcib_mutex); 5157c478bd9Sstevel@tonic-gate if (rpcib.rpcib_dip != NULL) { 5167c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 5177c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5187c478bd9Sstevel@tonic-gate } 5197c478bd9Sstevel@tonic-gate rpcib.rpcib_dip = dip; 5207c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 5217c478bd9Sstevel@tonic-gate /* 5227c478bd9Sstevel@tonic-gate * Create the "rpcib" minor-node. 5237c478bd9Sstevel@tonic-gate */ 5247c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(dip, 5257c478bd9Sstevel@tonic-gate "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) { 5267c478bd9Sstevel@tonic-gate /* Error message, no cmn_err as they print on console */ 5277c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate if (rib_stat == NULL) { 5317c478bd9Sstevel@tonic-gate rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP); 5327c478bd9Sstevel@tonic-gate mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL); 5337c478bd9Sstevel@tonic-gate } 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids); 5367c478bd9Sstevel@tonic-gate if (rib_stat->hca_count < 1) { 5377c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5387c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5397c478bd9Sstevel@tonic-gate rib_stat = NULL; 5407c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5417c478bd9Sstevel@tonic-gate } 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate ibt_status = ibt_attach(&rib_modinfo, dip, 5447c478bd9Sstevel@tonic-gate (void *)rib_stat, &rib_stat->ibt_clnt_hdl); 5450a701b1eSRobert Gordon 5467c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 5477c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 5487c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5497c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5507c478bd9Sstevel@tonic-gate rib_stat = NULL; 5517c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5527c478bd9Sstevel@tonic-gate } 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate mutex_enter(&rib_stat->open_hca_lock); 5557c478bd9Sstevel@tonic-gate if (open_hcas(rib_stat) != RDMA_SUCCESS) { 5567c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 55751f34d4bSRajkumar Sivaprakasam goto open_fail; 5587c478bd9Sstevel@tonic-gate } 5597c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 5607c478bd9Sstevel@tonic-gate 56151f34d4bSRajkumar Sivaprakasam if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != 56251f34d4bSRajkumar Sivaprakasam DDI_PROP_SUCCESS) { 56351f34d4bSRajkumar Sivaprakasam cmn_err(CE_WARN, "rpcib_attach: ddi-no-autodetach prop update " 56451f34d4bSRajkumar Sivaprakasam "failed."); 56551f34d4bSRajkumar Sivaprakasam goto register_fail; 56651f34d4bSRajkumar Sivaprakasam } 56751f34d4bSRajkumar Sivaprakasam 5687c478bd9Sstevel@tonic-gate /* 5697c478bd9Sstevel@tonic-gate * Register with rdmatf 5707c478bd9Sstevel@tonic-gate */ 57151f34d4bSRajkumar Sivaprakasam rib_mod.rdma_count = rib_stat->nhca_inited; 5727c478bd9Sstevel@tonic-gate r_status = rdma_register_mod(&rib_mod); 5737c478bd9Sstevel@tonic-gate if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) { 57451f34d4bSRajkumar Sivaprakasam cmn_err(CE_WARN, "rpcib_attach:rdma_register_mod failed, " 57551f34d4bSRajkumar Sivaprakasam "status = %d", r_status); 57651f34d4bSRajkumar Sivaprakasam goto register_fail; 57751f34d4bSRajkumar Sivaprakasam } 57851f34d4bSRajkumar Sivaprakasam 57951f34d4bSRajkumar Sivaprakasam return (DDI_SUCCESS); 58051f34d4bSRajkumar Sivaprakasam 58151f34d4bSRajkumar Sivaprakasam register_fail: 5827c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 58351f34d4bSRajkumar Sivaprakasam open_fail: 5847c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 5857c478bd9Sstevel@tonic-gate (void) ibt_detach(rib_stat->ibt_clnt_hdl); 5867c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5877c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5887c478bd9Sstevel@tonic-gate rib_stat = NULL; 5897c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5907c478bd9Sstevel@tonic-gate } 5917c478bd9Sstevel@tonic-gate 5927c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5937c478bd9Sstevel@tonic-gate static int 5947c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5957c478bd9Sstevel@tonic-gate { 5967c478bd9Sstevel@tonic-gate switch (cmd) { 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate case DDI_DETACH: 5997c478bd9Sstevel@tonic-gate break; 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate case DDI_SUSPEND: 6027c478bd9Sstevel@tonic-gate default: 6037c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 6047c478bd9Sstevel@tonic-gate } 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate /* 6077c478bd9Sstevel@tonic-gate * Detach the hca and free resources 6087c478bd9Sstevel@tonic-gate */ 6097c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 6107c478bd9Sstevel@tonic-gate plugin_state = NO_ACCEPT; 6117c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 6127c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 6137c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 6147c478bd9Sstevel@tonic-gate (void) ibt_detach(rib_stat->ibt_clnt_hdl); 61551f34d4bSRajkumar Sivaprakasam mutex_destroy(&rib_stat->open_hca_lock); 61651f34d4bSRajkumar Sivaprakasam if (rib_stat->hcas) { 61751f34d4bSRajkumar Sivaprakasam kmem_free(rib_stat->hcas, rib_stat->hca_count * 61851f34d4bSRajkumar Sivaprakasam sizeof (rib_hca_t)); 61951f34d4bSRajkumar Sivaprakasam rib_stat->hcas = NULL; 62051f34d4bSRajkumar Sivaprakasam } 62151f34d4bSRajkumar Sivaprakasam kmem_free(rib_stat, sizeof (*rib_stat)); 62251f34d4bSRajkumar Sivaprakasam rib_stat = NULL; 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate mutex_enter(&rpcib.rpcib_mutex); 6257c478bd9Sstevel@tonic-gate rpcib.rpcib_dip = NULL; 6267c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 6277c478bd9Sstevel@tonic-gate mutex_destroy(&rpcib.rpcib_mutex); 6287c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate 6327c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int); 6337c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int); 6347c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype); 6357c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t); 6367c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *); 6377c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *); 6387c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *); 6397c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *); 6407c478bd9Sstevel@tonic-gate 6410a701b1eSRobert Gordon 6427c478bd9Sstevel@tonic-gate /* 6437c478bd9Sstevel@tonic-gate * One CQ pair per HCA 6447c478bd9Sstevel@tonic-gate */ 6457c478bd9Sstevel@tonic-gate static rdma_stat 6467c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler, 6477c478bd9Sstevel@tonic-gate rib_cq_t **cqp, rpcib_state_t *ribstat) 6487c478bd9Sstevel@tonic-gate { 6497c478bd9Sstevel@tonic-gate rib_cq_t *cq; 6507c478bd9Sstevel@tonic-gate ibt_cq_attr_t cq_attr; 6517c478bd9Sstevel@tonic-gate uint32_t real_size; 6527c478bd9Sstevel@tonic-gate ibt_status_t status; 6537c478bd9Sstevel@tonic-gate rdma_stat error = RDMA_SUCCESS; 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP); 6567c478bd9Sstevel@tonic-gate cq->rib_hca = hca; 6577c478bd9Sstevel@tonic-gate cq_attr.cq_size = cq_size; 6587c478bd9Sstevel@tonic-gate cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 6597c478bd9Sstevel@tonic-gate status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl, 6607c478bd9Sstevel@tonic-gate &real_size); 6617c478bd9Sstevel@tonic-gate if (status != IBT_SUCCESS) { 6627c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed," 6637c478bd9Sstevel@tonic-gate " status=%d", status); 6647c478bd9Sstevel@tonic-gate error = RDMA_FAILED; 6657c478bd9Sstevel@tonic-gate goto fail; 6667c478bd9Sstevel@tonic-gate } 6677c478bd9Sstevel@tonic-gate ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat); 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate /* 6707c478bd9Sstevel@tonic-gate * Enable CQ callbacks. CQ Callbacks are single shot 6717c478bd9Sstevel@tonic-gate * (e.g. you have to call ibt_enable_cq_notify() 6727c478bd9Sstevel@tonic-gate * after each callback to get another one). 6737c478bd9Sstevel@tonic-gate */ 6747c478bd9Sstevel@tonic-gate status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION); 6757c478bd9Sstevel@tonic-gate if (status != IBT_SUCCESS) { 6767c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "rib_create_cq: " 6777c478bd9Sstevel@tonic-gate "enable_cq_notify failed, status %d", status); 6787c478bd9Sstevel@tonic-gate error = RDMA_FAILED; 6797c478bd9Sstevel@tonic-gate goto fail; 6807c478bd9Sstevel@tonic-gate } 6817c478bd9Sstevel@tonic-gate *cqp = cq; 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate return (error); 6847c478bd9Sstevel@tonic-gate fail: 6857c478bd9Sstevel@tonic-gate if (cq->rib_cq_hdl) 6867c478bd9Sstevel@tonic-gate (void) ibt_free_cq(cq->rib_cq_hdl); 6877c478bd9Sstevel@tonic-gate if (cq) 6887c478bd9Sstevel@tonic-gate kmem_free(cq, sizeof (rib_cq_t)); 6897c478bd9Sstevel@tonic-gate return (error); 6907c478bd9Sstevel@tonic-gate } 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate static rdma_stat 6937c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat) 6947c478bd9Sstevel@tonic-gate { 6957c478bd9Sstevel@tonic-gate rib_hca_t *hca; 6967c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 6977c478bd9Sstevel@tonic-gate rdma_stat status; 6987c478bd9Sstevel@tonic-gate ibt_hca_portinfo_t *pinfop; 6997c478bd9Sstevel@tonic-gate ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS; 7007c478bd9Sstevel@tonic-gate uint_t size, cq_size; 7017c478bd9Sstevel@tonic-gate int i; 7020a701b1eSRobert Gordon kstat_t *ksp; 7030a701b1eSRobert Gordon cache_avl_struct_t example_avl_node; 7040a701b1eSRobert Gordon char rssc_name[32]; 7057c478bd9Sstevel@tonic-gate 7067c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ribstat->open_hca_lock)); 7070a701b1eSRobert Gordon 7087c478bd9Sstevel@tonic-gate if (ribstat->hcas == NULL) 7097c478bd9Sstevel@tonic-gate ribstat->hcas = kmem_zalloc(ribstat->hca_count * 7107c478bd9Sstevel@tonic-gate sizeof (rib_hca_t), KM_SLEEP); 7117c478bd9Sstevel@tonic-gate 7127c478bd9Sstevel@tonic-gate /* 7137c478bd9Sstevel@tonic-gate * Open a hca and setup for RDMA 7147c478bd9Sstevel@tonic-gate */ 7157c478bd9Sstevel@tonic-gate for (i = 0; i < ribstat->hca_count; i++) { 7167c478bd9Sstevel@tonic-gate ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl, 7177c478bd9Sstevel@tonic-gate ribstat->hca_guids[i], 7187c478bd9Sstevel@tonic-gate &ribstat->hcas[i].hca_hdl); 7197c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7207c478bd9Sstevel@tonic-gate continue; 7217c478bd9Sstevel@tonic-gate } 7227c478bd9Sstevel@tonic-gate ribstat->hcas[i].hca_guid = ribstat->hca_guids[i]; 7237c478bd9Sstevel@tonic-gate hca = &(ribstat->hcas[i]); 7247c478bd9Sstevel@tonic-gate hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl; 7257c478bd9Sstevel@tonic-gate hca->state = HCA_INITED; 7267c478bd9Sstevel@tonic-gate 7277c478bd9Sstevel@tonic-gate /* 7287c478bd9Sstevel@tonic-gate * query HCA info 7297c478bd9Sstevel@tonic-gate */ 7307c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs); 7317c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7327c478bd9Sstevel@tonic-gate goto fail1; 7337c478bd9Sstevel@tonic-gate } 7347c478bd9Sstevel@tonic-gate 7357c478bd9Sstevel@tonic-gate /* 7367c478bd9Sstevel@tonic-gate * One PD (Protection Domain) per HCA. 7377c478bd9Sstevel@tonic-gate * A qp is allowed to access a memory region 7387c478bd9Sstevel@tonic-gate * only when it's in the same PD as that of 7397c478bd9Sstevel@tonic-gate * the memory region. 7407c478bd9Sstevel@tonic-gate */ 7417c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl); 7427c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7437c478bd9Sstevel@tonic-gate goto fail1; 7447c478bd9Sstevel@tonic-gate } 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate /* 7477c478bd9Sstevel@tonic-gate * query HCA ports 7487c478bd9Sstevel@tonic-gate */ 7497c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca_ports(hca->hca_hdl, 7507c478bd9Sstevel@tonic-gate 0, &pinfop, &hca->hca_nports, &size); 7517c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7527c478bd9Sstevel@tonic-gate goto fail2; 7537c478bd9Sstevel@tonic-gate } 7547c478bd9Sstevel@tonic-gate hca->hca_ports = pinfop; 7557c478bd9Sstevel@tonic-gate hca->hca_pinfosz = size; 7567c478bd9Sstevel@tonic-gate pinfop = NULL; 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate cq_size = DEF_CQ_SIZE; /* default cq size */ 7597c478bd9Sstevel@tonic-gate /* 7607c478bd9Sstevel@tonic-gate * Create 2 pairs of cq's (1 pair for client 7617c478bd9Sstevel@tonic-gate * and the other pair for server) on this hca. 7627c478bd9Sstevel@tonic-gate * If number of qp's gets too large, then several 7637c478bd9Sstevel@tonic-gate * cq's will be needed. 7647c478bd9Sstevel@tonic-gate */ 7657c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler, 7667c478bd9Sstevel@tonic-gate &hca->svc_rcq, ribstat); 7677c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7687c478bd9Sstevel@tonic-gate goto fail3; 7697c478bd9Sstevel@tonic-gate } 7707c478bd9Sstevel@tonic-gate 7717c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_svc_scq_handler, 7727c478bd9Sstevel@tonic-gate &hca->svc_scq, ribstat); 7737c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7747c478bd9Sstevel@tonic-gate goto fail3; 7757c478bd9Sstevel@tonic-gate } 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler, 7787c478bd9Sstevel@tonic-gate &hca->clnt_rcq, ribstat); 7797c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7807c478bd9Sstevel@tonic-gate goto fail3; 7817c478bd9Sstevel@tonic-gate } 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler, 7847c478bd9Sstevel@tonic-gate &hca->clnt_scq, ribstat); 7857c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7867c478bd9Sstevel@tonic-gate goto fail3; 7877c478bd9Sstevel@tonic-gate } 7887c478bd9Sstevel@tonic-gate 7897c478bd9Sstevel@tonic-gate /* 7907c478bd9Sstevel@tonic-gate * Create buffer pools. 7917c478bd9Sstevel@tonic-gate * Note rib_rbuf_create also allocates memory windows. 7927c478bd9Sstevel@tonic-gate */ 7937c478bd9Sstevel@tonic-gate hca->recv_pool = rib_rbufpool_create(hca, 794*065714dcSSiddheshwar Mahesh RECV_BUFFER, rib_max_rbufs); 7957c478bd9Sstevel@tonic-gate if (hca->recv_pool == NULL) { 7967c478bd9Sstevel@tonic-gate goto fail3; 7977c478bd9Sstevel@tonic-gate } 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate hca->send_pool = rib_rbufpool_create(hca, 800*065714dcSSiddheshwar Mahesh SEND_BUFFER, rib_max_rbufs); 8017c478bd9Sstevel@tonic-gate if (hca->send_pool == NULL) { 8027c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, RECV_BUFFER); 8037c478bd9Sstevel@tonic-gate goto fail3; 8047c478bd9Sstevel@tonic-gate } 8057c478bd9Sstevel@tonic-gate 8060a701b1eSRobert Gordon if (hca->server_side_cache == NULL) { 8070a701b1eSRobert Gordon (void) sprintf(rssc_name, 8080a701b1eSRobert Gordon "rib_server_side_cache_%04d", i); 8090a701b1eSRobert Gordon hca->server_side_cache = kmem_cache_create( 8100a701b1eSRobert Gordon rssc_name, 8110a701b1eSRobert Gordon sizeof (cache_avl_struct_t), 0, 8120a701b1eSRobert Gordon NULL, 8130a701b1eSRobert Gordon NULL, 8140a701b1eSRobert Gordon rib_server_side_cache_reclaim, 8150a701b1eSRobert Gordon hca, NULL, 0); 8160a701b1eSRobert Gordon } 8170a701b1eSRobert Gordon 8180a701b1eSRobert Gordon avl_create(&hca->avl_tree, 8190a701b1eSRobert Gordon avl_compare, 8200a701b1eSRobert Gordon sizeof (cache_avl_struct_t), 8210a701b1eSRobert Gordon (uint_t)(uintptr_t)&example_avl_node.avl_link- 8220a701b1eSRobert Gordon (uint_t)(uintptr_t)&example_avl_node); 8230a701b1eSRobert Gordon 8240a701b1eSRobert Gordon rw_init(&hca->avl_rw_lock, 8250a701b1eSRobert Gordon NULL, RW_DRIVER, hca->iblock); 8260a701b1eSRobert Gordon mutex_init(&hca->cache_allocation, 8270a701b1eSRobert Gordon NULL, MUTEX_DRIVER, NULL); 8280a701b1eSRobert Gordon hca->avl_init = TRUE; 8290a701b1eSRobert Gordon 8300a701b1eSRobert Gordon /* Create kstats for the cache */ 8310a701b1eSRobert Gordon ASSERT(INGLOBALZONE(curproc)); 8320a701b1eSRobert Gordon 8330a701b1eSRobert Gordon if (!stats_enabled) { 8340a701b1eSRobert Gordon ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc", 8350a701b1eSRobert Gordon KSTAT_TYPE_NAMED, 8360a701b1eSRobert Gordon sizeof (rpcib_kstat) / sizeof (kstat_named_t), 8370a701b1eSRobert Gordon KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, 8380a701b1eSRobert Gordon GLOBAL_ZONEID); 8390a701b1eSRobert Gordon if (ksp) { 8400a701b1eSRobert Gordon ksp->ks_data = (void *) &rpcib_kstat; 8410a701b1eSRobert Gordon ksp->ks_update = rpcib_cache_kstat_update; 8420a701b1eSRobert Gordon kstat_install(ksp); 8430a701b1eSRobert Gordon stats_enabled = TRUE; 8440a701b1eSRobert Gordon } 8450a701b1eSRobert Gordon } 846*065714dcSSiddheshwar Mahesh if (hca->cleanup_helper == NULL) { 847*065714dcSSiddheshwar Mahesh hca->cleanup_helper = ddi_taskq_create(NULL, 848*065714dcSSiddheshwar Mahesh "CLEANUP_HELPER", 1, TASKQ_DEFAULTPRI, 0); 8490a701b1eSRobert Gordon } 8500a701b1eSRobert Gordon 8517c478bd9Sstevel@tonic-gate /* 8527c478bd9Sstevel@tonic-gate * Initialize the registered service list and 8537c478bd9Sstevel@tonic-gate * the lock 8547c478bd9Sstevel@tonic-gate */ 8557c478bd9Sstevel@tonic-gate hca->service_list = NULL; 8567c478bd9Sstevel@tonic-gate rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock); 8577c478bd9Sstevel@tonic-gate 8587c478bd9Sstevel@tonic-gate mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 8597c478bd9Sstevel@tonic-gate cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL); 8607c478bd9Sstevel@tonic-gate rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER, 8617c478bd9Sstevel@tonic-gate hca->iblock); 8627c478bd9Sstevel@tonic-gate rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER, 8637c478bd9Sstevel@tonic-gate hca->iblock); 8647c478bd9Sstevel@tonic-gate rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock); 8657c478bd9Sstevel@tonic-gate mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock); 8667c478bd9Sstevel@tonic-gate hca->inuse = TRUE; 8677c478bd9Sstevel@tonic-gate /* 8687c478bd9Sstevel@tonic-gate * XXX One hca only. Add multi-hca functionality if needed 8697c478bd9Sstevel@tonic-gate * later. 8707c478bd9Sstevel@tonic-gate */ 8717c478bd9Sstevel@tonic-gate ribstat->hca = hca; 8727c478bd9Sstevel@tonic-gate ribstat->nhca_inited++; 8737c478bd9Sstevel@tonic-gate ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 8747c478bd9Sstevel@tonic-gate break; 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate fail3: 8777c478bd9Sstevel@tonic-gate ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 8787c478bd9Sstevel@tonic-gate fail2: 8797c478bd9Sstevel@tonic-gate (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 8807c478bd9Sstevel@tonic-gate fail1: 8817c478bd9Sstevel@tonic-gate (void) ibt_close_hca(hca->hca_hdl); 8827c478bd9Sstevel@tonic-gate 8837c478bd9Sstevel@tonic-gate } 8847c478bd9Sstevel@tonic-gate if (ribstat->hca != NULL) 8857c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 8867c478bd9Sstevel@tonic-gate else 8877c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate /* 8917c478bd9Sstevel@tonic-gate * Callback routines 8927c478bd9Sstevel@tonic-gate */ 8937c478bd9Sstevel@tonic-gate 8947c478bd9Sstevel@tonic-gate /* 8957c478bd9Sstevel@tonic-gate * SCQ handlers 8967c478bd9Sstevel@tonic-gate */ 8977c478bd9Sstevel@tonic-gate /* ARGSUSED */ 8987c478bd9Sstevel@tonic-gate static void 8997c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 9007c478bd9Sstevel@tonic-gate { 9017c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 9027c478bd9Sstevel@tonic-gate ibt_wc_t wc; 903*065714dcSSiddheshwar Mahesh struct send_wid *wd; 904*065714dcSSiddheshwar Mahesh CONN *conn; 905*065714dcSSiddheshwar Mahesh rib_qp_t *qp; 9067c478bd9Sstevel@tonic-gate int i; 9077c478bd9Sstevel@tonic-gate 9087c478bd9Sstevel@tonic-gate /* 9097c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 9107c478bd9Sstevel@tonic-gate * completion queue notification. 9117c478bd9Sstevel@tonic-gate */ 9127c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 9157c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 9167c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 9177c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 9187c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 9197c478bd9Sstevel@tonic-gate return; 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate /* 9227c478bd9Sstevel@tonic-gate * Got a send completion 9237c478bd9Sstevel@tonic-gate */ 924*065714dcSSiddheshwar Mahesh if (wc.wc_id != RDMA_DUMMY_WRID) { 925*065714dcSSiddheshwar Mahesh wd = (struct send_wid *)(uintptr_t)wc.wc_id; 926*065714dcSSiddheshwar Mahesh qp = wd->qp; 927*065714dcSSiddheshwar Mahesh conn = qptoc(qp); 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 9307c478bd9Sstevel@tonic-gate switch (wc.wc_status) { 9317c478bd9Sstevel@tonic-gate case IBT_WC_SUCCESS: 9327c478bd9Sstevel@tonic-gate wd->status = RDMA_SUCCESS; 9337c478bd9Sstevel@tonic-gate break; 9347c478bd9Sstevel@tonic-gate default: 9357c478bd9Sstevel@tonic-gate /* 9367c478bd9Sstevel@tonic-gate * RC Send Q Error Code Local state Remote State 9377c478bd9Sstevel@tonic-gate * ==================== =========== ============ 9387c478bd9Sstevel@tonic-gate * IBT_WC_BAD_RESPONSE_ERR ERROR None 9397c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_LEN_ERR ERROR None 9407c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None 9417c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_PROTECT_ERR ERROR None 9427c478bd9Sstevel@tonic-gate * IBT_WC_MEM_WIN_BIND_ERR ERROR None 9437c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR 9447c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR 9457c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_OP_ERR ERROR ERROR 9467c478bd9Sstevel@tonic-gate * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None 9477c478bd9Sstevel@tonic-gate * IBT_WC_TRANS_TIMEOUT_ERR ERROR None 948*065714dcSSiddheshwar Mahesh * IBT_WC_WR_FLUSHED_ERR ERROR None 9497c478bd9Sstevel@tonic-gate */ 9507c478bd9Sstevel@tonic-gate /* 9517c478bd9Sstevel@tonic-gate * Channel in error state. Set connection to 9527c478bd9Sstevel@tonic-gate * ERROR and cleanup will happen either from 9537c478bd9Sstevel@tonic-gate * conn_release or from rib_conn_get 9547c478bd9Sstevel@tonic-gate */ 9557c478bd9Sstevel@tonic-gate wd->status = RDMA_FAILED; 9567c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 9577c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 9580a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 9597c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 9607c478bd9Sstevel@tonic-gate break; 9617c478bd9Sstevel@tonic-gate } 9620a701b1eSRobert Gordon 9637c478bd9Sstevel@tonic-gate if (wd->cv_sig == 1) { 9647c478bd9Sstevel@tonic-gate /* 9657c478bd9Sstevel@tonic-gate * Notify poster 9667c478bd9Sstevel@tonic-gate */ 9677c478bd9Sstevel@tonic-gate cv_signal(&wd->wait_cv); 9687c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 9697c478bd9Sstevel@tonic-gate } else { 9707c478bd9Sstevel@tonic-gate /* 9717c478bd9Sstevel@tonic-gate * Poster not waiting for notification. 9727c478bd9Sstevel@tonic-gate * Free the send buffers and send_wid 9737c478bd9Sstevel@tonic-gate */ 9747c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 975*065714dcSSiddheshwar Mahesh rib_rbuf_free(qptoc(wd->qp), 976*065714dcSSiddheshwar Mahesh SEND_BUFFER, 97711606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 9787c478bd9Sstevel@tonic-gate } 979*065714dcSSiddheshwar Mahesh 980*065714dcSSiddheshwar Mahesh /* decrement the send ref count */ 981*065714dcSSiddheshwar Mahesh rib_send_rele(qp); 982*065714dcSSiddheshwar Mahesh 9837c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 9847c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 9857c478bd9Sstevel@tonic-gate } 9867c478bd9Sstevel@tonic-gate } 9877c478bd9Sstevel@tonic-gate } 9887c478bd9Sstevel@tonic-gate } 9897c478bd9Sstevel@tonic-gate 9907c478bd9Sstevel@tonic-gate /* ARGSUSED */ 9917c478bd9Sstevel@tonic-gate static void 9927c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 9937c478bd9Sstevel@tonic-gate { 9947c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 9957c478bd9Sstevel@tonic-gate ibt_wc_t wc; 996*065714dcSSiddheshwar Mahesh struct send_wid *wd; 997*065714dcSSiddheshwar Mahesh rib_qp_t *qp; 998*065714dcSSiddheshwar Mahesh CONN *conn; 9997c478bd9Sstevel@tonic-gate int i; 10007c478bd9Sstevel@tonic-gate 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 10037c478bd9Sstevel@tonic-gate * completion queue notification. 10047c478bd9Sstevel@tonic-gate */ 10057c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 10067c478bd9Sstevel@tonic-gate 10077c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 10087c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 10097c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 10107c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 10117c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 10127c478bd9Sstevel@tonic-gate return; 10137c478bd9Sstevel@tonic-gate 10147c478bd9Sstevel@tonic-gate /* 10157c478bd9Sstevel@tonic-gate * Got a send completion 10167c478bd9Sstevel@tonic-gate */ 1017*065714dcSSiddheshwar Mahesh if (wc.wc_id != RDMA_DUMMY_WRID) { 1018*065714dcSSiddheshwar Mahesh wd = (struct send_wid *)(uintptr_t)wc.wc_id; 1019*065714dcSSiddheshwar Mahesh qp = wd->qp; 1020*065714dcSSiddheshwar Mahesh conn = qptoc(qp); 10217c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 1022*065714dcSSiddheshwar Mahesh 1023*065714dcSSiddheshwar Mahesh switch (wc.wc_status) { 1024*065714dcSSiddheshwar Mahesh case IBT_WC_SUCCESS: 1025*065714dcSSiddheshwar Mahesh wd->status = RDMA_SUCCESS; 1026*065714dcSSiddheshwar Mahesh break; 1027*065714dcSSiddheshwar Mahesh default: 1028*065714dcSSiddheshwar Mahesh /* 1029*065714dcSSiddheshwar Mahesh * Channel in error state. Set connection to 1030*065714dcSSiddheshwar Mahesh * ERROR and cleanup will happen either from 1031*065714dcSSiddheshwar Mahesh * conn_release or conn timeout. 1032*065714dcSSiddheshwar Mahesh */ 1033*065714dcSSiddheshwar Mahesh wd->status = RDMA_FAILED; 1034*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 1035*065714dcSSiddheshwar Mahesh if (conn->c_state != C_DISCONN_PEND) 1036*065714dcSSiddheshwar Mahesh conn->c_state = C_ERROR_CONN; 1037*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 1038*065714dcSSiddheshwar Mahesh break; 1039*065714dcSSiddheshwar Mahesh } 1040*065714dcSSiddheshwar Mahesh 10417c478bd9Sstevel@tonic-gate if (wd->cv_sig == 1) { 10427c478bd9Sstevel@tonic-gate /* 10437c478bd9Sstevel@tonic-gate * Update completion status and notify poster 10447c478bd9Sstevel@tonic-gate */ 10457c478bd9Sstevel@tonic-gate cv_signal(&wd->wait_cv); 10467c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 10477c478bd9Sstevel@tonic-gate } else { 10487c478bd9Sstevel@tonic-gate /* 10497c478bd9Sstevel@tonic-gate * Poster not waiting for notification. 10507c478bd9Sstevel@tonic-gate * Free the send buffers and send_wid 10517c478bd9Sstevel@tonic-gate */ 10527c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 10530a701b1eSRobert Gordon rib_rbuf_free(qptoc(wd->qp), 10540a701b1eSRobert Gordon SEND_BUFFER, 105511606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 10567c478bd9Sstevel@tonic-gate } 1057*065714dcSSiddheshwar Mahesh 1058*065714dcSSiddheshwar Mahesh /* decrement the send ref count */ 1059*065714dcSSiddheshwar Mahesh rib_send_rele(qp); 1060*065714dcSSiddheshwar Mahesh 10617c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 10627c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 10637c478bd9Sstevel@tonic-gate } 10647c478bd9Sstevel@tonic-gate } 10657c478bd9Sstevel@tonic-gate } 10667c478bd9Sstevel@tonic-gate } 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate /* 10697c478bd9Sstevel@tonic-gate * RCQ handler 10707c478bd9Sstevel@tonic-gate */ 10717c478bd9Sstevel@tonic-gate /* ARGSUSED */ 10727c478bd9Sstevel@tonic-gate static void 10737c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 10747c478bd9Sstevel@tonic-gate { 10757c478bd9Sstevel@tonic-gate rib_qp_t *qp; 10767c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 10777c478bd9Sstevel@tonic-gate ibt_wc_t wc; 10787c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate /* 10817c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 10827c478bd9Sstevel@tonic-gate * completion queue notification. 10837c478bd9Sstevel@tonic-gate */ 10847c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 10857c478bd9Sstevel@tonic-gate 10867c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 10877c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 10887c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 10897c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 10907c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 10917c478bd9Sstevel@tonic-gate return; 10927c478bd9Sstevel@tonic-gate 109311606941Sjwahlig rwid = (struct recv_wid *)(uintptr_t)wc.wc_id; 10947c478bd9Sstevel@tonic-gate qp = rwid->qp; 10957c478bd9Sstevel@tonic-gate if (wc.wc_status == IBT_WC_SUCCESS) { 10967c478bd9Sstevel@tonic-gate XDR inxdrs, *xdrs; 10977c478bd9Sstevel@tonic-gate uint_t xid, vers, op, find_xid = 0; 10987c478bd9Sstevel@tonic-gate struct reply *r; 10997c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 11000a701b1eSRobert Gordon uint32_t rdma_credit = 0; 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate xdrs = &inxdrs; 110311606941Sjwahlig xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr, 11047c478bd9Sstevel@tonic-gate wc.wc_bytes_xfer, XDR_DECODE); 11057c478bd9Sstevel@tonic-gate /* 11067c478bd9Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 11077c478bd9Sstevel@tonic-gate * in the rpc rdma message). 11087c478bd9Sstevel@tonic-gate */ 110911606941Sjwahlig xid = *(uint32_t *)(uintptr_t)rwid->addr; 11100a701b1eSRobert Gordon 11117c478bd9Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 11127c478bd9Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 11137c478bd9Sstevel@tonic-gate (void) xdr_u_int(xdrs, &vers); 11140a701b1eSRobert Gordon (void) xdr_u_int(xdrs, &rdma_credit); 11157c478bd9Sstevel@tonic-gate (void) xdr_u_int(xdrs, &op); 11167c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 11170a701b1eSRobert Gordon 11187c478bd9Sstevel@tonic-gate if (vers != RPCRDMA_VERS) { 11197c478bd9Sstevel@tonic-gate /* 11200a701b1eSRobert Gordon * Invalid RPC/RDMA version. Cannot 11210a701b1eSRobert Gordon * interoperate. Set connection to 11220a701b1eSRobert Gordon * ERROR state and bail out. 11237c478bd9Sstevel@tonic-gate */ 11247c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 11257c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 11260a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 11277c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 112811606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 112911606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11307c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 11317c478bd9Sstevel@tonic-gate continue; 11327c478bd9Sstevel@tonic-gate } 11337c478bd9Sstevel@tonic-gate 11347c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 11357c478bd9Sstevel@tonic-gate for (r = qp->replylist; r != NULL; r = r->next) { 11367c478bd9Sstevel@tonic-gate if (r->xid == xid) { 11377c478bd9Sstevel@tonic-gate find_xid = 1; 11387c478bd9Sstevel@tonic-gate switch (op) { 11397c478bd9Sstevel@tonic-gate case RDMA_MSG: 11407c478bd9Sstevel@tonic-gate case RDMA_NOMSG: 11417c478bd9Sstevel@tonic-gate case RDMA_MSGP: 11427c478bd9Sstevel@tonic-gate r->status = RDMA_SUCCESS; 11437c478bd9Sstevel@tonic-gate r->vaddr_cq = rwid->addr; 11440a701b1eSRobert Gordon r->bytes_xfer = 11450a701b1eSRobert Gordon wc.wc_bytes_xfer; 11467c478bd9Sstevel@tonic-gate cv_signal(&r->wait_cv); 11477c478bd9Sstevel@tonic-gate break; 11487c478bd9Sstevel@tonic-gate default: 11490a701b1eSRobert Gordon rib_rbuf_free(qptoc(qp), 11500a701b1eSRobert Gordon RECV_BUFFER, 11510a701b1eSRobert Gordon (void *)(uintptr_t) 11520a701b1eSRobert Gordon rwid->addr); 11537c478bd9Sstevel@tonic-gate break; 11547c478bd9Sstevel@tonic-gate } 11557c478bd9Sstevel@tonic-gate break; 11567c478bd9Sstevel@tonic-gate } 11577c478bd9Sstevel@tonic-gate } 11587c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 11597c478bd9Sstevel@tonic-gate if (find_xid == 0) { 11607c478bd9Sstevel@tonic-gate /* RPC caller not waiting for reply */ 11610a701b1eSRobert Gordon 11620a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__nomatchxid1, 11630a701b1eSRobert Gordon int, xid); 11640a701b1eSRobert Gordon 11657c478bd9Sstevel@tonic-gate rib_rbuf_free(qptoc(qp), RECV_BUFFER, 116611606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11677c478bd9Sstevel@tonic-gate } 11687c478bd9Sstevel@tonic-gate } else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) { 11697c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 11707c478bd9Sstevel@tonic-gate 11717c478bd9Sstevel@tonic-gate /* 11727c478bd9Sstevel@tonic-gate * Connection being flushed. Just free 11737c478bd9Sstevel@tonic-gate * the posted buffer 11747c478bd9Sstevel@tonic-gate */ 117511606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 117611606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11777c478bd9Sstevel@tonic-gate } else { 11787c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 11797c478bd9Sstevel@tonic-gate /* 11807c478bd9Sstevel@tonic-gate * RC Recv Q Error Code Local state Remote State 11817c478bd9Sstevel@tonic-gate * ==================== =========== ============ 11827c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_ACCESS_ERR ERROR ERROR when NAK recvd 11837c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_LEN_ERR ERROR ERROR when NAK recvd 11847c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_PROTECT_ERR ERROR ERROR when NAK recvd 11857c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_CHAN_OP_ERR ERROR ERROR when NAK recvd 11867c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR when NAK recvd 11877c478bd9Sstevel@tonic-gate * IBT_WC_WR_FLUSHED_ERR None None 11887c478bd9Sstevel@tonic-gate */ 11897c478bd9Sstevel@tonic-gate /* 11907c478bd9Sstevel@tonic-gate * Channel in error state. Set connection 11917c478bd9Sstevel@tonic-gate * in ERROR state. 11927c478bd9Sstevel@tonic-gate */ 11937c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 11947c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 11950a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 11967c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 119711606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 119811606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11997c478bd9Sstevel@tonic-gate } 12007c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate /* Server side */ 12057c478bd9Sstevel@tonic-gate /* ARGSUSED */ 12067c478bd9Sstevel@tonic-gate static void 12077c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 12087c478bd9Sstevel@tonic-gate { 12090a701b1eSRobert Gordon rdma_recv_data_t *rdp; 12107c478bd9Sstevel@tonic-gate rib_qp_t *qp; 12117c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 12127c478bd9Sstevel@tonic-gate ibt_wc_t wc; 12137c478bd9Sstevel@tonic-gate struct svc_recv *s_recvp; 12147c478bd9Sstevel@tonic-gate CONN *conn; 12157c478bd9Sstevel@tonic-gate mblk_t *mp; 12167c478bd9Sstevel@tonic-gate 12177c478bd9Sstevel@tonic-gate /* 12187c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 12197c478bd9Sstevel@tonic-gate * completion queue notification. 12207c478bd9Sstevel@tonic-gate */ 12217c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 12227c478bd9Sstevel@tonic-gate 12237c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 12247c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 12257c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 12267c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 12277c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 12287c478bd9Sstevel@tonic-gate return; 12297c478bd9Sstevel@tonic-gate 123011606941Sjwahlig s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id; 12317c478bd9Sstevel@tonic-gate qp = s_recvp->qp; 12327c478bd9Sstevel@tonic-gate conn = qptoc(qp); 12337c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 12347c478bd9Sstevel@tonic-gate qp->n_posted_rbufs--; 12357c478bd9Sstevel@tonic-gate if (qp->n_posted_rbufs == 0) 12367c478bd9Sstevel@tonic-gate cv_signal(&qp->posted_rbufs_cv); 12377c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate if (wc.wc_status == IBT_WC_SUCCESS) { 12407c478bd9Sstevel@tonic-gate XDR inxdrs, *xdrs; 12417c478bd9Sstevel@tonic-gate uint_t xid, vers, op; 12420a701b1eSRobert Gordon uint32_t rdma_credit; 12437c478bd9Sstevel@tonic-gate 12447c478bd9Sstevel@tonic-gate xdrs = &inxdrs; 12457c478bd9Sstevel@tonic-gate /* s_recvp->vaddr stores data */ 124611606941Sjwahlig xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr, 12477c478bd9Sstevel@tonic-gate wc.wc_bytes_xfer, XDR_DECODE); 12487c478bd9Sstevel@tonic-gate 12497c478bd9Sstevel@tonic-gate /* 12507c478bd9Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 12517c478bd9Sstevel@tonic-gate * in the rpc rdma message). 12527c478bd9Sstevel@tonic-gate */ 125311606941Sjwahlig xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr; 12547c478bd9Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 12557c478bd9Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 12567c478bd9Sstevel@tonic-gate if (!xdr_u_int(xdrs, &vers) || 12570a701b1eSRobert Gordon !xdr_u_int(xdrs, &rdma_credit) || 12587c478bd9Sstevel@tonic-gate !xdr_u_int(xdrs, &op)) { 12597c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 126011606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12617c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 12627c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12637c478bd9Sstevel@tonic-gate continue; 12647c478bd9Sstevel@tonic-gate } 12657c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 12667c478bd9Sstevel@tonic-gate 12677c478bd9Sstevel@tonic-gate if (vers != RPCRDMA_VERS) { 12687c478bd9Sstevel@tonic-gate /* 12690a701b1eSRobert Gordon * Invalid RPC/RDMA version. 12700a701b1eSRobert Gordon * Drop rpc rdma message. 12717c478bd9Sstevel@tonic-gate */ 12727c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 127311606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12747c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12757c478bd9Sstevel@tonic-gate continue; 12767c478bd9Sstevel@tonic-gate } 12777c478bd9Sstevel@tonic-gate /* 12787c478bd9Sstevel@tonic-gate * Is this for RDMA_DONE? 12797c478bd9Sstevel@tonic-gate */ 12807c478bd9Sstevel@tonic-gate if (op == RDMA_DONE) { 12817c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 128211606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12837c478bd9Sstevel@tonic-gate /* 12847c478bd9Sstevel@tonic-gate * Wake up the thread waiting on 12857c478bd9Sstevel@tonic-gate * a RDMA_DONE for xid 12867c478bd9Sstevel@tonic-gate */ 12877c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 12887c478bd9Sstevel@tonic-gate rdma_done_notify(qp, xid); 12897c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 12907c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12917c478bd9Sstevel@tonic-gate continue; 12927c478bd9Sstevel@tonic-gate } 12937c478bd9Sstevel@tonic-gate 12947c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 12957c478bd9Sstevel@tonic-gate if (plugin_state == ACCEPT) { 12960a701b1eSRobert Gordon while ((mp = allocb(sizeof (*rdp), BPRI_LO)) 12970a701b1eSRobert Gordon == NULL) 12980a701b1eSRobert Gordon (void) strwaitbuf( 12990a701b1eSRobert Gordon sizeof (*rdp), BPRI_LO); 13007c478bd9Sstevel@tonic-gate /* 13017c478bd9Sstevel@tonic-gate * Plugin is in accept state, hence the master 13027c478bd9Sstevel@tonic-gate * transport queue for this is still accepting 13037c478bd9Sstevel@tonic-gate * requests. Hence we can call svc_queuereq to 13047c478bd9Sstevel@tonic-gate * queue this recieved msg. 13057c478bd9Sstevel@tonic-gate */ 13060a701b1eSRobert Gordon rdp = (rdma_recv_data_t *)mp->b_rptr; 13070a701b1eSRobert Gordon rdp->conn = conn; 13080a701b1eSRobert Gordon rdp->rpcmsg.addr = 13090a701b1eSRobert Gordon (caddr_t)(uintptr_t)s_recvp->vaddr; 13100a701b1eSRobert Gordon rdp->rpcmsg.type = RECV_BUFFER; 13110a701b1eSRobert Gordon rdp->rpcmsg.len = wc.wc_bytes_xfer; 13120a701b1eSRobert Gordon rdp->status = wc.wc_status; 13137c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 13147c478bd9Sstevel@tonic-gate conn->c_ref++; 13157c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 13160a701b1eSRobert Gordon mp->b_wptr += sizeof (*rdp); 13177c478bd9Sstevel@tonic-gate svc_queuereq((queue_t *)rib_stat->q, mp); 13187c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 13197c478bd9Sstevel@tonic-gate } else { 13207c478bd9Sstevel@tonic-gate /* 13217c478bd9Sstevel@tonic-gate * The master transport for this is going 13227c478bd9Sstevel@tonic-gate * away and the queue is not accepting anymore 13237c478bd9Sstevel@tonic-gate * requests for krpc, so don't do anything, just 13247c478bd9Sstevel@tonic-gate * free the msg. 13257c478bd9Sstevel@tonic-gate */ 13267c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 13277c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 132811606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate } else { 13317c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 133211606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 13337c478bd9Sstevel@tonic-gate } 13347c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 13357c478bd9Sstevel@tonic-gate } 13367c478bd9Sstevel@tonic-gate } 13377c478bd9Sstevel@tonic-gate 13387c478bd9Sstevel@tonic-gate /* 13397c478bd9Sstevel@tonic-gate * Handles DR event of IBT_HCA_DETACH_EVENT. 13407c478bd9Sstevel@tonic-gate */ 13417c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13427c478bd9Sstevel@tonic-gate static void 13437c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 13447c478bd9Sstevel@tonic-gate ibt_async_code_t code, ibt_async_event_t *event) 13457c478bd9Sstevel@tonic-gate { 13467c478bd9Sstevel@tonic-gate 13477c478bd9Sstevel@tonic-gate switch (code) { 13487c478bd9Sstevel@tonic-gate case IBT_HCA_ATTACH_EVENT: 13497c478bd9Sstevel@tonic-gate /* ignore */ 13507c478bd9Sstevel@tonic-gate break; 13517c478bd9Sstevel@tonic-gate case IBT_HCA_DETACH_EVENT: 13527c478bd9Sstevel@tonic-gate { 13537c478bd9Sstevel@tonic-gate ASSERT(rib_stat->hca->hca_hdl == hca_hdl); 13547c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 13557c478bd9Sstevel@tonic-gate #ifdef DEBUG 13567c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n"); 13577c478bd9Sstevel@tonic-gate #endif 13587c478bd9Sstevel@tonic-gate break; 13597c478bd9Sstevel@tonic-gate } 13607c478bd9Sstevel@tonic-gate #ifdef DEBUG 13617c478bd9Sstevel@tonic-gate case IBT_EVENT_PATH_MIGRATED: 13620a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 13630a701b1eSRobert Gordon "IBT_EVENT_PATH_MIGRATED\n"); 13647c478bd9Sstevel@tonic-gate break; 13657c478bd9Sstevel@tonic-gate case IBT_EVENT_SQD: 13667c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n"); 13677c478bd9Sstevel@tonic-gate break; 13687c478bd9Sstevel@tonic-gate case IBT_EVENT_COM_EST: 13697c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n"); 13707c478bd9Sstevel@tonic-gate break; 13717c478bd9Sstevel@tonic-gate case IBT_ERROR_CATASTROPHIC_CHAN: 13720a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 13730a701b1eSRobert Gordon "IBT_ERROR_CATASTROPHIC_CHAN\n"); 13747c478bd9Sstevel@tonic-gate break; 13757c478bd9Sstevel@tonic-gate case IBT_ERROR_INVALID_REQUEST_CHAN: 13767c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): " 13777c478bd9Sstevel@tonic-gate "IBT_ERROR_INVALID_REQUEST_CHAN\n"); 13787c478bd9Sstevel@tonic-gate break; 13797c478bd9Sstevel@tonic-gate case IBT_ERROR_ACCESS_VIOLATION_CHAN: 13807c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): " 13817c478bd9Sstevel@tonic-gate "IBT_ERROR_ACCESS_VIOLATION_CHAN\n"); 13827c478bd9Sstevel@tonic-gate break; 13837c478bd9Sstevel@tonic-gate case IBT_ERROR_PATH_MIGRATE_REQ: 13840a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 13850a701b1eSRobert Gordon "IBT_ERROR_PATH_MIGRATE_REQ\n"); 13867c478bd9Sstevel@tonic-gate break; 13877c478bd9Sstevel@tonic-gate case IBT_ERROR_CQ: 13887c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n"); 13897c478bd9Sstevel@tonic-gate break; 13907c478bd9Sstevel@tonic-gate case IBT_ERROR_PORT_DOWN: 13917c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n"); 13927c478bd9Sstevel@tonic-gate break; 13937c478bd9Sstevel@tonic-gate case IBT_EVENT_PORT_UP: 13947c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n"); 13957c478bd9Sstevel@tonic-gate break; 13967c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE1: 13977c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n"); 13987c478bd9Sstevel@tonic-gate break; 13997c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE2: 14007c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n"); 14017c478bd9Sstevel@tonic-gate break; 14027c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE3: 14037c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n"); 14047c478bd9Sstevel@tonic-gate break; 14057c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE4: 14067c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n"); 14077c478bd9Sstevel@tonic-gate break; 14087c478bd9Sstevel@tonic-gate #endif 14097c478bd9Sstevel@tonic-gate default: 14107c478bd9Sstevel@tonic-gate break; 14117c478bd9Sstevel@tonic-gate } 14127c478bd9Sstevel@tonic-gate } 14137c478bd9Sstevel@tonic-gate 14147c478bd9Sstevel@tonic-gate /* 14157c478bd9Sstevel@tonic-gate * Client's reachable function. 14167c478bd9Sstevel@tonic-gate */ 14177c478bd9Sstevel@tonic-gate static rdma_stat 14187c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle) 14197c478bd9Sstevel@tonic-gate { 14207c478bd9Sstevel@tonic-gate rdma_stat status; 1421214ae7d0SSiddheshwar Mahesh rpcib_ping_t rpt; 14227c478bd9Sstevel@tonic-gate 14237c478bd9Sstevel@tonic-gate /* 14247c478bd9Sstevel@tonic-gate * First check if a hca is still attached 14257c478bd9Sstevel@tonic-gate */ 14267c478bd9Sstevel@tonic-gate rw_enter(&rib_stat->hca->state_lock, RW_READER); 14277c478bd9Sstevel@tonic-gate if (rib_stat->hca->state != HCA_INITED) { 14287c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 14297c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 14307c478bd9Sstevel@tonic-gate } 1431214ae7d0SSiddheshwar Mahesh 1432214ae7d0SSiddheshwar Mahesh bzero(&rpt, sizeof (rpcib_ping_t)); 1433214ae7d0SSiddheshwar Mahesh status = rib_ping_srv(addr_type, raddr, &rpt); 14347c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 1437214ae7d0SSiddheshwar Mahesh *handle = (void *)rpt.hca; 14387c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 14397c478bd9Sstevel@tonic-gate } else { 14407c478bd9Sstevel@tonic-gate *handle = NULL; 14410a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__pingfailed); 14427c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 14437c478bd9Sstevel@tonic-gate } 14447c478bd9Sstevel@tonic-gate } 14457c478bd9Sstevel@tonic-gate 14467c478bd9Sstevel@tonic-gate /* Client side qp creation */ 14477c478bd9Sstevel@tonic-gate static rdma_stat 14487c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp) 14497c478bd9Sstevel@tonic-gate { 14507c478bd9Sstevel@tonic-gate rib_qp_t *kqp = NULL; 14517c478bd9Sstevel@tonic-gate CONN *conn; 14520a701b1eSRobert Gordon rdma_clnt_cred_ctrl_t *cc_info; 14537c478bd9Sstevel@tonic-gate 14547c478bd9Sstevel@tonic-gate ASSERT(qp != NULL); 14557c478bd9Sstevel@tonic-gate *qp = NULL; 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 14587c478bd9Sstevel@tonic-gate conn = qptoc(kqp); 14597c478bd9Sstevel@tonic-gate kqp->hca = hca; 14607c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_rdmamod = &rib_mod; 14617c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_private = (caddr_t)kqp; 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate kqp->mode = RIB_CLIENT; 14647c478bd9Sstevel@tonic-gate kqp->chan_flags = IBT_BLOCKING; 14657c478bd9Sstevel@tonic-gate conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP); 14667c478bd9Sstevel@tonic-gate bcopy(raddr->buf, conn->c_raddr.buf, raddr->len); 14677c478bd9Sstevel@tonic-gate conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len; 14687c478bd9Sstevel@tonic-gate /* 14697c478bd9Sstevel@tonic-gate * Initialize 14707c478bd9Sstevel@tonic-gate */ 14717c478bd9Sstevel@tonic-gate cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 14727c478bd9Sstevel@tonic-gate cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 14737c478bd9Sstevel@tonic-gate mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 1474*065714dcSSiddheshwar Mahesh cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL); 1475*065714dcSSiddheshwar Mahesh mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 14767c478bd9Sstevel@tonic-gate mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock); 14777c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 14787c478bd9Sstevel@tonic-gate mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 14797c478bd9Sstevel@tonic-gate cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 14807c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 14810a701b1eSRobert Gordon /* 14820a701b1eSRobert Gordon * Initialize the client credit control 14830a701b1eSRobert Gordon * portion of the rdmaconn struct. 14840a701b1eSRobert Gordon */ 14850a701b1eSRobert Gordon kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT; 14860a701b1eSRobert Gordon cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 14870a701b1eSRobert Gordon cc_info->clnt_cc_granted_ops = 0; 14880a701b1eSRobert Gordon cc_info->clnt_cc_in_flight_ops = 0; 14890a701b1eSRobert Gordon cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL); 14907c478bd9Sstevel@tonic-gate 14917c478bd9Sstevel@tonic-gate *qp = kqp; 14927c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 14937c478bd9Sstevel@tonic-gate } 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate /* Server side qp creation */ 14967c478bd9Sstevel@tonic-gate static rdma_stat 14977c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp) 14987c478bd9Sstevel@tonic-gate { 14997c478bd9Sstevel@tonic-gate rib_qp_t *kqp = NULL; 15007c478bd9Sstevel@tonic-gate ibt_chan_sizes_t chan_sizes; 15017c478bd9Sstevel@tonic-gate ibt_rc_chan_alloc_args_t qp_attr; 15027c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 15030a701b1eSRobert Gordon rdma_srv_cred_ctrl_t *cc_info; 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate *qp = NULL; 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 15087c478bd9Sstevel@tonic-gate kqp->hca = hca; 15097c478bd9Sstevel@tonic-gate kqp->port_num = port; 15107c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_rdmamod = &rib_mod; 15117c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_private = (caddr_t)kqp; 15127c478bd9Sstevel@tonic-gate 15137c478bd9Sstevel@tonic-gate /* 15147c478bd9Sstevel@tonic-gate * Create the qp handle 15157c478bd9Sstevel@tonic-gate */ 15167c478bd9Sstevel@tonic-gate bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 15177c478bd9Sstevel@tonic-gate qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl; 15187c478bd9Sstevel@tonic-gate qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl; 15197c478bd9Sstevel@tonic-gate qp_attr.rc_pd = hca->pd_hdl; 15207c478bd9Sstevel@tonic-gate qp_attr.rc_hca_port_num = port; 15217c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 15227c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 15237c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 15247c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 15257c478bd9Sstevel@tonic-gate qp_attr.rc_clone_chan = NULL; 15267c478bd9Sstevel@tonic-gate qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 15277c478bd9Sstevel@tonic-gate qp_attr.rc_flags = IBT_WR_SIGNALED; 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 15307c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 15317c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 15327c478bd9Sstevel@tonic-gate IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl, 15337c478bd9Sstevel@tonic-gate &chan_sizes); 15347c478bd9Sstevel@tonic-gate } else { 15357c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 15367c478bd9Sstevel@tonic-gate goto fail; 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 15397c478bd9Sstevel@tonic-gate 15407c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 15410a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i_svccreatechanfail, 15420a701b1eSRobert Gordon int, ibt_status); 15437c478bd9Sstevel@tonic-gate goto fail; 15447c478bd9Sstevel@tonic-gate } 15457c478bd9Sstevel@tonic-gate 15467c478bd9Sstevel@tonic-gate kqp->mode = RIB_SERVER; 15477c478bd9Sstevel@tonic-gate kqp->chan_flags = IBT_BLOCKING; 15487c478bd9Sstevel@tonic-gate kqp->q = q; /* server ONLY */ 15497c478bd9Sstevel@tonic-gate 15507c478bd9Sstevel@tonic-gate cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 15517c478bd9Sstevel@tonic-gate cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 15527c478bd9Sstevel@tonic-gate mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 15537c478bd9Sstevel@tonic-gate mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 1554*065714dcSSiddheshwar Mahesh cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL); 1555*065714dcSSiddheshwar Mahesh mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 15567c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 15577c478bd9Sstevel@tonic-gate mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 15587c478bd9Sstevel@tonic-gate cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 15597c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 15607c478bd9Sstevel@tonic-gate /* 15617c478bd9Sstevel@tonic-gate * Set the private data area to qp to be used in callbacks 15627c478bd9Sstevel@tonic-gate */ 15637c478bd9Sstevel@tonic-gate ibt_set_chan_private(kqp->qp_hdl, (void *)kqp); 15647c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_state = C_CONNECTED; 15650a701b1eSRobert Gordon 15660a701b1eSRobert Gordon /* 15670a701b1eSRobert Gordon * Initialize the server credit control 15680a701b1eSRobert Gordon * portion of the rdmaconn struct. 15690a701b1eSRobert Gordon */ 15700a701b1eSRobert Gordon kqp->rdmaconn.c_cc_type = RDMA_CC_SRV; 15710a701b1eSRobert Gordon cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc; 15720a701b1eSRobert Gordon cc_info->srv_cc_buffers_granted = preposted_rbufs; 15730a701b1eSRobert Gordon cc_info->srv_cc_cur_buffers_used = 0; 15740a701b1eSRobert Gordon cc_info->srv_cc_posted = preposted_rbufs; 15750a701b1eSRobert Gordon 15767c478bd9Sstevel@tonic-gate *qp = kqp; 15770a701b1eSRobert Gordon 15787c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 15797c478bd9Sstevel@tonic-gate fail: 15807c478bd9Sstevel@tonic-gate if (kqp) 15817c478bd9Sstevel@tonic-gate kmem_free(kqp, sizeof (rib_qp_t)); 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 15847c478bd9Sstevel@tonic-gate } 15857c478bd9Sstevel@tonic-gate 15867c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15877c478bd9Sstevel@tonic-gate ibt_cm_status_t 15887c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event, 15897c478bd9Sstevel@tonic-gate ibt_cm_return_args_t *ret_args, void *priv_data, 15907c478bd9Sstevel@tonic-gate ibt_priv_data_len_t len) 15917c478bd9Sstevel@tonic-gate { 15927c478bd9Sstevel@tonic-gate rpcib_state_t *ribstat; 15937c478bd9Sstevel@tonic-gate rib_hca_t *hca; 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate ribstat = (rpcib_state_t *)clnt_hdl; 15967c478bd9Sstevel@tonic-gate hca = (rib_hca_t *)ribstat->hca; 15977c478bd9Sstevel@tonic-gate 15987c478bd9Sstevel@tonic-gate switch (event->cm_type) { 15997c478bd9Sstevel@tonic-gate 16007c478bd9Sstevel@tonic-gate /* got a connection close event */ 16017c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_CLOSED: 16027c478bd9Sstevel@tonic-gate { 16037c478bd9Sstevel@tonic-gate CONN *conn; 16047c478bd9Sstevel@tonic-gate rib_qp_t *qp; 16057c478bd9Sstevel@tonic-gate 16067c478bd9Sstevel@tonic-gate /* check reason why connection was closed */ 16077c478bd9Sstevel@tonic-gate switch (event->cm_event.closed) { 16087c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREP_RCVD: 16097c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREQ_TIMEOUT: 16107c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DUP: 16117c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ABORT: 16127c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ALREADY: 16137c478bd9Sstevel@tonic-gate /* 16147c478bd9Sstevel@tonic-gate * These cases indicate the local end initiated 16157c478bd9Sstevel@tonic-gate * the closing of the channel. Nothing to do here. 16167c478bd9Sstevel@tonic-gate */ 16177c478bd9Sstevel@tonic-gate break; 16187c478bd9Sstevel@tonic-gate default: 16197c478bd9Sstevel@tonic-gate /* 16207c478bd9Sstevel@tonic-gate * Reason for CONN_CLOSED event must be one of 16217c478bd9Sstevel@tonic-gate * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 16227c478bd9Sstevel@tonic-gate * or IBT_CM_CLOSED_STALE. These indicate cases were 16237c478bd9Sstevel@tonic-gate * the remote end is closing the channel. In these 16247c478bd9Sstevel@tonic-gate * cases free the channel and transition to error 16257c478bd9Sstevel@tonic-gate * state 16267c478bd9Sstevel@tonic-gate */ 16277c478bd9Sstevel@tonic-gate qp = ibt_get_chan_private(event->cm_channel); 16287c478bd9Sstevel@tonic-gate conn = qptoc(qp); 16297c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 16307c478bd9Sstevel@tonic-gate if (conn->c_state == C_DISCONN_PEND) { 16317c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 16327c478bd9Sstevel@tonic-gate break; 16337c478bd9Sstevel@tonic-gate } 16347c478bd9Sstevel@tonic-gate 16350a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* 16387c478bd9Sstevel@tonic-gate * Free the conn if c_ref is down to 0 already 16397c478bd9Sstevel@tonic-gate */ 16407c478bd9Sstevel@tonic-gate if (conn->c_ref == 0) { 16417c478bd9Sstevel@tonic-gate /* 16427c478bd9Sstevel@tonic-gate * Remove from list and free conn 16437c478bd9Sstevel@tonic-gate */ 16447c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 16457c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 16467c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 16477c478bd9Sstevel@tonic-gate &hca->cl_conn_list); 16487c478bd9Sstevel@tonic-gate } else { 1649*065714dcSSiddheshwar Mahesh /* 1650*065714dcSSiddheshwar Mahesh * conn will be freed when c_ref goes to 0. 1651*065714dcSSiddheshwar Mahesh * Indicate to cleaning thread not to close 1652*065714dcSSiddheshwar Mahesh * the connection, but just free the channel. 1653*065714dcSSiddheshwar Mahesh */ 1654*065714dcSSiddheshwar Mahesh conn->c_flags |= C_CLOSE_NOTNEEDED; 16557c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 16567c478bd9Sstevel@tonic-gate } 16577c478bd9Sstevel@tonic-gate #ifdef DEBUG 16587c478bd9Sstevel@tonic-gate if (rib_debug) 16597c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_clnt_cm_handler: " 16607c478bd9Sstevel@tonic-gate "(CONN_CLOSED) channel disconnected"); 16617c478bd9Sstevel@tonic-gate #endif 16627c478bd9Sstevel@tonic-gate break; 16637c478bd9Sstevel@tonic-gate } 16647c478bd9Sstevel@tonic-gate break; 16657c478bd9Sstevel@tonic-gate } 16667c478bd9Sstevel@tonic-gate default: 16677c478bd9Sstevel@tonic-gate break; 16687c478bd9Sstevel@tonic-gate } 16697c478bd9Sstevel@tonic-gate return (IBT_CM_ACCEPT); 16707c478bd9Sstevel@tonic-gate } 16717c478bd9Sstevel@tonic-gate 16727c478bd9Sstevel@tonic-gate /* 16737c478bd9Sstevel@tonic-gate * Connect to the server. 16747c478bd9Sstevel@tonic-gate */ 16757c478bd9Sstevel@tonic-gate rdma_stat 1676214ae7d0SSiddheshwar Mahesh rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, rpcib_ping_t *rptp) 16777c478bd9Sstevel@tonic-gate { 16787c478bd9Sstevel@tonic-gate ibt_chan_open_args_t chan_args; /* channel args */ 16797c478bd9Sstevel@tonic-gate ibt_chan_sizes_t chan_sizes; 16807c478bd9Sstevel@tonic-gate ibt_rc_chan_alloc_args_t qp_attr; 16817c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 16827c478bd9Sstevel@tonic-gate ibt_rc_returns_t ret_args; /* conn reject info */ 16837c478bd9Sstevel@tonic-gate int refresh = REFRESH_ATTEMPTS; /* refresh if IBT_CM_CONN_STALE */ 16840a701b1eSRobert Gordon ibt_ip_cm_info_t ipcm_info; 16850a701b1eSRobert Gordon uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ]; 16860a701b1eSRobert Gordon 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate (void) bzero(&chan_args, sizeof (chan_args)); 16897c478bd9Sstevel@tonic-gate (void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 16900a701b1eSRobert Gordon (void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 16910a701b1eSRobert Gordon 1692214ae7d0SSiddheshwar Mahesh ipcm_info.src_addr.family = rptp->srcip.family; 1693214ae7d0SSiddheshwar Mahesh switch (ipcm_info.src_addr.family) { 16940a701b1eSRobert Gordon case AF_INET: 1695214ae7d0SSiddheshwar Mahesh ipcm_info.src_addr.un.ip4addr = rptp->srcip.un.ip4addr; 16960a701b1eSRobert Gordon break; 16970a701b1eSRobert Gordon case AF_INET6: 1698214ae7d0SSiddheshwar Mahesh ipcm_info.src_addr.un.ip6addr = rptp->srcip.un.ip6addr; 16990a701b1eSRobert Gordon break; 17000a701b1eSRobert Gordon } 17010a701b1eSRobert Gordon 1702214ae7d0SSiddheshwar Mahesh ipcm_info.dst_addr.family = rptp->srcip.family; 1703214ae7d0SSiddheshwar Mahesh switch (ipcm_info.dst_addr.family) { 17040a701b1eSRobert Gordon case AF_INET: 1705214ae7d0SSiddheshwar Mahesh ipcm_info.dst_addr.un.ip4addr = rptp->dstip.un.ip4addr; 17060a701b1eSRobert Gordon break; 17070a701b1eSRobert Gordon case AF_INET6: 1708214ae7d0SSiddheshwar Mahesh ipcm_info.dst_addr.un.ip6addr = rptp->dstip.un.ip6addr; 17090a701b1eSRobert Gordon break; 17100a701b1eSRobert Gordon } 17110a701b1eSRobert Gordon 1712f837ee4aSSiddheshwar Mahesh ipcm_info.src_port = (in_port_t)nfs_rdma_port; 17130a701b1eSRobert Gordon 17140a701b1eSRobert Gordon ibt_status = ibt_format_ip_private_data(&ipcm_info, 17150a701b1eSRobert Gordon IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt); 17160a701b1eSRobert Gordon 17170a701b1eSRobert Gordon if (ibt_status != IBT_SUCCESS) { 17180a701b1eSRobert Gordon cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n"); 17190a701b1eSRobert Gordon return (-1); 17200a701b1eSRobert Gordon } 17217c478bd9Sstevel@tonic-gate 1722214ae7d0SSiddheshwar Mahesh qp_attr.rc_hca_port_num = rptp->path.pi_prim_cep_path.cep_hca_port_num; 17237c478bd9Sstevel@tonic-gate /* Alloc a RC channel */ 17247c478bd9Sstevel@tonic-gate qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl; 17257c478bd9Sstevel@tonic-gate qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl; 17267c478bd9Sstevel@tonic-gate qp_attr.rc_pd = hca->pd_hdl; 17277c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 17287c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 17297c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 17307c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 17317c478bd9Sstevel@tonic-gate qp_attr.rc_clone_chan = NULL; 17327c478bd9Sstevel@tonic-gate qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 17337c478bd9Sstevel@tonic-gate qp_attr.rc_flags = IBT_WR_SIGNALED; 17347c478bd9Sstevel@tonic-gate 1735f837ee4aSSiddheshwar Mahesh rptp->path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port); 1736214ae7d0SSiddheshwar Mahesh chan_args.oc_path = &rptp->path; 1737f837ee4aSSiddheshwar Mahesh 17387c478bd9Sstevel@tonic-gate chan_args.oc_cm_handler = rib_clnt_cm_handler; 17397c478bd9Sstevel@tonic-gate chan_args.oc_cm_clnt_private = (void *)rib_stat; 17400a701b1eSRobert Gordon chan_args.oc_rdma_ra_out = 4; 17410a701b1eSRobert Gordon chan_args.oc_rdma_ra_in = 4; 17427c478bd9Sstevel@tonic-gate chan_args.oc_path_retry_cnt = 2; 17437c478bd9Sstevel@tonic-gate chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES; 17440a701b1eSRobert Gordon chan_args.oc_priv_data = cmp_ip_pvt; 17450a701b1eSRobert Gordon chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ; 17467c478bd9Sstevel@tonic-gate 17477c478bd9Sstevel@tonic-gate refresh: 17487c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 17497c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 17507c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 17510a701b1eSRobert Gordon IBT_ACHAN_NO_FLAGS, 17520a701b1eSRobert Gordon &qp_attr, &qp->qp_hdl, 17537c478bd9Sstevel@tonic-gate &chan_sizes); 17547c478bd9Sstevel@tonic-gate } else { 17557c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 17567c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 17577c478bd9Sstevel@tonic-gate } 17587c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 17597c478bd9Sstevel@tonic-gate 17607c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 17610a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i_conntosrv, 17620a701b1eSRobert Gordon int, ibt_status); 17637c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 17647c478bd9Sstevel@tonic-gate } 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate /* Connect to the Server */ 17677c478bd9Sstevel@tonic-gate (void) bzero(&ret_args, sizeof (ret_args)); 17687c478bd9Sstevel@tonic-gate mutex_enter(&qp->cb_lock); 17697c478bd9Sstevel@tonic-gate ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS, 17707c478bd9Sstevel@tonic-gate IBT_BLOCKING, &chan_args, &ret_args); 17717c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 17720a701b1eSRobert Gordon DTRACE_PROBE2(rpcib__i_openrctosrv, 17730a701b1eSRobert Gordon int, ibt_status, int, ret_args.rc_status); 17740a701b1eSRobert Gordon 17757c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 17767c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 17777c478bd9Sstevel@tonic-gate mutex_exit(&qp->cb_lock); 17787c478bd9Sstevel@tonic-gate if (refresh-- && ibt_status == IBT_CM_FAILURE && 17797c478bd9Sstevel@tonic-gate ret_args.rc_status == IBT_CM_CONN_STALE) { 17807c478bd9Sstevel@tonic-gate /* 17817c478bd9Sstevel@tonic-gate * Got IBT_CM_CONN_STALE probably because of stale 17827c478bd9Sstevel@tonic-gate * data on the passive end of a channel that existed 17837c478bd9Sstevel@tonic-gate * prior to reboot. Retry establishing a channel 17847c478bd9Sstevel@tonic-gate * REFRESH_ATTEMPTS times, during which time the 17857c478bd9Sstevel@tonic-gate * stale conditions on the server might clear up. 17867c478bd9Sstevel@tonic-gate */ 17877c478bd9Sstevel@tonic-gate goto refresh; 17887c478bd9Sstevel@tonic-gate } 17897c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 17907c478bd9Sstevel@tonic-gate } 17917c478bd9Sstevel@tonic-gate mutex_exit(&qp->cb_lock); 17927c478bd9Sstevel@tonic-gate /* 17937c478bd9Sstevel@tonic-gate * Set the private data area to qp to be used in callbacks 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate ibt_set_chan_private(qp->qp_hdl, (void *)qp); 17967c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 17977c478bd9Sstevel@tonic-gate } 17987c478bd9Sstevel@tonic-gate 17997c478bd9Sstevel@tonic-gate rdma_stat 1800214ae7d0SSiddheshwar Mahesh rib_ping_srv(int addr_type, struct netbuf *raddr, rpcib_ping_t *rptp) 18017c478bd9Sstevel@tonic-gate { 1802e11c3f44Smeem uint_t i; 18037c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 18040a701b1eSRobert Gordon uint8_t num_paths_p; 18050a701b1eSRobert Gordon ibt_ip_path_attr_t ipattr; 18060a701b1eSRobert Gordon ibt_path_ip_src_t srcip; 1807e11c3f44Smeem rpcib_ipaddrs_t addrs4; 1808e11c3f44Smeem rpcib_ipaddrs_t addrs6; 1809e11c3f44Smeem struct sockaddr_in *sinp; 1810e11c3f44Smeem struct sockaddr_in6 *sin6p; 1811e11c3f44Smeem rdma_stat retval = RDMA_SUCCESS; 18120a701b1eSRobert Gordon 18137c478bd9Sstevel@tonic-gate ASSERT(raddr->buf != NULL); 18147c478bd9Sstevel@tonic-gate 18150a701b1eSRobert Gordon bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 18167c478bd9Sstevel@tonic-gate 1817e11c3f44Smeem if (!rpcib_get_ib_addresses(&addrs4, &addrs6) || 1818e11c3f44Smeem (addrs4.ri_count == 0 && addrs6.ri_count == 0)) { 1819e11c3f44Smeem retval = RDMA_FAILED; 1820e11c3f44Smeem goto done; 18210a701b1eSRobert Gordon } 18220a701b1eSRobert Gordon 18230a701b1eSRobert Gordon /* Prep the destination address */ 18247c478bd9Sstevel@tonic-gate switch (addr_type) { 18257c478bd9Sstevel@tonic-gate case AF_INET: 1826e11c3f44Smeem sinp = (struct sockaddr_in *)raddr->buf; 1827214ae7d0SSiddheshwar Mahesh rptp->dstip.family = AF_INET; 1828214ae7d0SSiddheshwar Mahesh rptp->dstip.un.ip4addr = sinp->sin_addr.s_addr; 1829e11c3f44Smeem sinp = addrs4.ri_list; 18300a701b1eSRobert Gordon 1831214ae7d0SSiddheshwar Mahesh ipattr.ipa_dst_ip = &rptp->dstip; 18320a701b1eSRobert Gordon ipattr.ipa_hca_guid = rib_stat->hca->hca_guid; 18330a701b1eSRobert Gordon ipattr.ipa_ndst = 1; 18340a701b1eSRobert Gordon ipattr.ipa_max_paths = 1; 1835214ae7d0SSiddheshwar Mahesh ipattr.ipa_src_ip.family = rptp->dstip.family; 1836214ae7d0SSiddheshwar Mahesh for (i = 0; i < addrs4.ri_count; i++) { 1837214ae7d0SSiddheshwar Mahesh num_paths_p = 0; 1838e11c3f44Smeem ipattr.ipa_src_ip.un.ip4addr = sinp[i].sin_addr.s_addr; 1839214ae7d0SSiddheshwar Mahesh bzero(&srcip, sizeof (ibt_path_ip_src_t)); 18400a701b1eSRobert Gordon 18410a701b1eSRobert Gordon ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl, 1842214ae7d0SSiddheshwar Mahesh IBT_PATH_NO_FLAGS, &ipattr, &rptp->path, 1843214ae7d0SSiddheshwar Mahesh &num_paths_p, &srcip); 18440a701b1eSRobert Gordon if (ibt_status == IBT_SUCCESS && 18450a701b1eSRobert Gordon num_paths_p != 0 && 1846214ae7d0SSiddheshwar Mahesh rptp->path.pi_hca_guid == rib_stat->hca->hca_guid) { 1847214ae7d0SSiddheshwar Mahesh rptp->hca = rib_stat->hca; 1848214ae7d0SSiddheshwar Mahesh rptp->srcip.family = AF_INET; 1849214ae7d0SSiddheshwar Mahesh rptp->srcip.un.ip4addr = 1850214ae7d0SSiddheshwar Mahesh srcip.ip_primary.un.ip4addr; 1851e11c3f44Smeem goto done; 18520a701b1eSRobert Gordon } 18530a701b1eSRobert Gordon } 1854e11c3f44Smeem retval = RDMA_FAILED; 18557c478bd9Sstevel@tonic-gate break; 18567c478bd9Sstevel@tonic-gate 18577c478bd9Sstevel@tonic-gate case AF_INET6: 1858e11c3f44Smeem sin6p = (struct sockaddr_in6 *)raddr->buf; 1859214ae7d0SSiddheshwar Mahesh rptp->dstip.family = AF_INET6; 1860214ae7d0SSiddheshwar Mahesh rptp->dstip.un.ip6addr = sin6p->sin6_addr; 1861e11c3f44Smeem sin6p = addrs6.ri_list; 18620a701b1eSRobert Gordon 1863214ae7d0SSiddheshwar Mahesh ipattr.ipa_dst_ip = &rptp->dstip; 18640a701b1eSRobert Gordon ipattr.ipa_hca_guid = rib_stat->hca->hca_guid; 18650a701b1eSRobert Gordon ipattr.ipa_ndst = 1; 18660a701b1eSRobert Gordon ipattr.ipa_max_paths = 1; 1867214ae7d0SSiddheshwar Mahesh ipattr.ipa_src_ip.family = rptp->dstip.family; 1868214ae7d0SSiddheshwar Mahesh for (i = 0; i < addrs6.ri_count; i++) { 1869214ae7d0SSiddheshwar Mahesh num_paths_p = 0; 1870e11c3f44Smeem ipattr.ipa_src_ip.un.ip6addr = sin6p[i].sin6_addr; 1871214ae7d0SSiddheshwar Mahesh bzero(&srcip, sizeof (ibt_path_ip_src_t)); 18720a701b1eSRobert Gordon 18730a701b1eSRobert Gordon ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl, 1874214ae7d0SSiddheshwar Mahesh IBT_PATH_NO_FLAGS, &ipattr, &rptp->path, 1875214ae7d0SSiddheshwar Mahesh &num_paths_p, &srcip); 18760a701b1eSRobert Gordon if (ibt_status == IBT_SUCCESS && 18770a701b1eSRobert Gordon num_paths_p != 0 && 1878214ae7d0SSiddheshwar Mahesh rptp->path.pi_hca_guid == rib_stat->hca->hca_guid) { 1879214ae7d0SSiddheshwar Mahesh rptp->hca = rib_stat->hca; 1880214ae7d0SSiddheshwar Mahesh rptp->srcip.family = AF_INET6; 1881214ae7d0SSiddheshwar Mahesh rptp->srcip.un.ip6addr = 1882214ae7d0SSiddheshwar Mahesh srcip.ip_primary.un.ip6addr; 1883e11c3f44Smeem goto done; 18840a701b1eSRobert Gordon } 18850a701b1eSRobert Gordon } 1886e11c3f44Smeem retval = RDMA_FAILED; 18877c478bd9Sstevel@tonic-gate break; 18887c478bd9Sstevel@tonic-gate 18897c478bd9Sstevel@tonic-gate default: 1890e11c3f44Smeem retval = RDMA_INVAL; 1891e11c3f44Smeem break; 18927c478bd9Sstevel@tonic-gate } 1893e11c3f44Smeem done: 1894214ae7d0SSiddheshwar Mahesh 1895e11c3f44Smeem if (addrs4.ri_size > 0) 1896e11c3f44Smeem kmem_free(addrs4.ri_list, addrs4.ri_size); 1897e11c3f44Smeem if (addrs6.ri_size > 0) 1898e11c3f44Smeem kmem_free(addrs6.ri_list, addrs6.ri_size); 1899e11c3f44Smeem return (retval); 19007c478bd9Sstevel@tonic-gate } 19017c478bd9Sstevel@tonic-gate 19027c478bd9Sstevel@tonic-gate /* 19037c478bd9Sstevel@tonic-gate * Close channel, remove from connection list and 19047c478bd9Sstevel@tonic-gate * free up resources allocated for that channel. 19057c478bd9Sstevel@tonic-gate */ 19067c478bd9Sstevel@tonic-gate rdma_stat 19077c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list) 19087c478bd9Sstevel@tonic-gate { 19097c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 19107c478bd9Sstevel@tonic-gate rib_hca_t *hca; 19117c478bd9Sstevel@tonic-gate 1912*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 1913*065714dcSSiddheshwar Mahesh if (conn->c_timeout != NULL) { 1914*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 1915*065714dcSSiddheshwar Mahesh (void) untimeout(conn->c_timeout); 1916*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 1917*065714dcSSiddheshwar Mahesh } 1918*065714dcSSiddheshwar Mahesh 1919*065714dcSSiddheshwar Mahesh while (conn->c_flags & C_CLOSE_PENDING) { 1920*065714dcSSiddheshwar Mahesh cv_wait(&conn->c_cv, &conn->c_lock); 1921*065714dcSSiddheshwar Mahesh } 1922*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 1923*065714dcSSiddheshwar Mahesh 19247c478bd9Sstevel@tonic-gate /* 19257c478bd9Sstevel@tonic-gate * c_ref == 0 and connection is in C_DISCONN_PEND 19267c478bd9Sstevel@tonic-gate */ 19277c478bd9Sstevel@tonic-gate hca = qp->hca; 19287c478bd9Sstevel@tonic-gate if (conn_list != NULL) 19297c478bd9Sstevel@tonic-gate (void) rib_rm_conn(conn, conn_list); 19300a701b1eSRobert Gordon 19317c478bd9Sstevel@tonic-gate /* 1932*065714dcSSiddheshwar Mahesh * There is only one case where we get here with 1933*065714dcSSiddheshwar Mahesh * qp_hdl = NULL, which is during connection setup on 1934*065714dcSSiddheshwar Mahesh * the client. In such a case there are no posted 1935*065714dcSSiddheshwar Mahesh * send/recv buffers. 19367c478bd9Sstevel@tonic-gate */ 1937*065714dcSSiddheshwar Mahesh if (qp->qp_hdl != NULL) { 19387c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 19397c478bd9Sstevel@tonic-gate while (qp->n_posted_rbufs) 19407c478bd9Sstevel@tonic-gate cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock); 19417c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 1942*065714dcSSiddheshwar Mahesh 1943*065714dcSSiddheshwar Mahesh mutex_enter(&qp->send_rbufs_lock); 1944*065714dcSSiddheshwar Mahesh while (qp->n_send_rbufs) 1945*065714dcSSiddheshwar Mahesh cv_wait(&qp->send_rbufs_cv, &qp->send_rbufs_lock); 1946*065714dcSSiddheshwar Mahesh mutex_exit(&qp->send_rbufs_lock); 1947*065714dcSSiddheshwar Mahesh 19487c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 19497c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 19507c478bd9Sstevel@tonic-gate } 19510a701b1eSRobert Gordon 19527c478bd9Sstevel@tonic-gate ASSERT(qp->rdlist == NULL); 19530a701b1eSRobert Gordon 19547c478bd9Sstevel@tonic-gate if (qp->replylist != NULL) { 19557c478bd9Sstevel@tonic-gate (void) rib_rem_replylist(qp); 19567c478bd9Sstevel@tonic-gate } 19577c478bd9Sstevel@tonic-gate 19587c478bd9Sstevel@tonic-gate cv_destroy(&qp->cb_conn_cv); 19597c478bd9Sstevel@tonic-gate cv_destroy(&qp->posted_rbufs_cv); 1960*065714dcSSiddheshwar Mahesh cv_destroy(&qp->send_rbufs_cv); 19617c478bd9Sstevel@tonic-gate mutex_destroy(&qp->cb_lock); 19627c478bd9Sstevel@tonic-gate mutex_destroy(&qp->replylist_lock); 19637c478bd9Sstevel@tonic-gate mutex_destroy(&qp->posted_rbufs_lock); 1964*065714dcSSiddheshwar Mahesh mutex_destroy(&qp->send_rbufs_lock); 19657c478bd9Sstevel@tonic-gate mutex_destroy(&qp->rdlist_lock); 19667c478bd9Sstevel@tonic-gate 19677c478bd9Sstevel@tonic-gate cv_destroy(&conn->c_cv); 19687c478bd9Sstevel@tonic-gate mutex_destroy(&conn->c_lock); 19697c478bd9Sstevel@tonic-gate 19707c478bd9Sstevel@tonic-gate if (conn->c_raddr.buf != NULL) { 19717c478bd9Sstevel@tonic-gate kmem_free(conn->c_raddr.buf, conn->c_raddr.len); 19727c478bd9Sstevel@tonic-gate } 19737c478bd9Sstevel@tonic-gate if (conn->c_laddr.buf != NULL) { 19747c478bd9Sstevel@tonic-gate kmem_free(conn->c_laddr.buf, conn->c_laddr.len); 19757c478bd9Sstevel@tonic-gate } 19760a701b1eSRobert Gordon 19770a701b1eSRobert Gordon /* 19780a701b1eSRobert Gordon * Credit control cleanup. 19790a701b1eSRobert Gordon */ 19800a701b1eSRobert Gordon if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) { 19810a701b1eSRobert Gordon rdma_clnt_cred_ctrl_t *cc_info; 19820a701b1eSRobert Gordon cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 19830a701b1eSRobert Gordon cv_destroy(&cc_info->clnt_cc_cv); 19840a701b1eSRobert Gordon } 19850a701b1eSRobert Gordon 19867c478bd9Sstevel@tonic-gate kmem_free(qp, sizeof (rib_qp_t)); 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate /* 19897c478bd9Sstevel@tonic-gate * If HCA has been DETACHED and the srv/clnt_conn_list is NULL, 19907c478bd9Sstevel@tonic-gate * then the hca is no longer being used. 19917c478bd9Sstevel@tonic-gate */ 19927c478bd9Sstevel@tonic-gate if (conn_list != NULL) { 19937c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 19947c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 19957c478bd9Sstevel@tonic-gate rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 19967c478bd9Sstevel@tonic-gate if (hca->srv_conn_list.conn_hd == NULL) { 19977c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, 19987c478bd9Sstevel@tonic-gate RW_READER); 19990a701b1eSRobert Gordon 20007c478bd9Sstevel@tonic-gate if (hca->cl_conn_list.conn_hd == NULL) { 20017c478bd9Sstevel@tonic-gate mutex_enter(&hca->inuse_lock); 20027c478bd9Sstevel@tonic-gate hca->inuse = FALSE; 20037c478bd9Sstevel@tonic-gate cv_signal(&hca->cb_cv); 20047c478bd9Sstevel@tonic-gate mutex_exit(&hca->inuse_lock); 20057c478bd9Sstevel@tonic-gate } 20067c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 20077c478bd9Sstevel@tonic-gate } 20087c478bd9Sstevel@tonic-gate rw_exit(&hca->srv_conn_list.conn_lock); 20097c478bd9Sstevel@tonic-gate } 20107c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 20117c478bd9Sstevel@tonic-gate } 20120a701b1eSRobert Gordon 20137c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 20147c478bd9Sstevel@tonic-gate } 20157c478bd9Sstevel@tonic-gate 20167c478bd9Sstevel@tonic-gate /* 2017*065714dcSSiddheshwar Mahesh * All sends are done under the protection of 2018*065714dcSSiddheshwar Mahesh * the wdesc->sendwait_lock. n_send_rbufs count 2019*065714dcSSiddheshwar Mahesh * is protected using the send_rbufs_lock. 2020*065714dcSSiddheshwar Mahesh * lock ordering is: 2021*065714dcSSiddheshwar Mahesh * sendwait_lock -> send_rbufs_lock 2022*065714dcSSiddheshwar Mahesh */ 2023*065714dcSSiddheshwar Mahesh 2024*065714dcSSiddheshwar Mahesh void 2025*065714dcSSiddheshwar Mahesh rib_send_hold(rib_qp_t *qp) 2026*065714dcSSiddheshwar Mahesh { 2027*065714dcSSiddheshwar Mahesh mutex_enter(&qp->send_rbufs_lock); 2028*065714dcSSiddheshwar Mahesh qp->n_send_rbufs++; 2029*065714dcSSiddheshwar Mahesh mutex_exit(&qp->send_rbufs_lock); 2030*065714dcSSiddheshwar Mahesh } 2031*065714dcSSiddheshwar Mahesh 2032*065714dcSSiddheshwar Mahesh void 2033*065714dcSSiddheshwar Mahesh rib_send_rele(rib_qp_t *qp) 2034*065714dcSSiddheshwar Mahesh { 2035*065714dcSSiddheshwar Mahesh mutex_enter(&qp->send_rbufs_lock); 2036*065714dcSSiddheshwar Mahesh qp->n_send_rbufs--; 2037*065714dcSSiddheshwar Mahesh if (qp->n_send_rbufs == 0) 2038*065714dcSSiddheshwar Mahesh cv_signal(&qp->send_rbufs_cv); 2039*065714dcSSiddheshwar Mahesh mutex_exit(&qp->send_rbufs_lock); 2040*065714dcSSiddheshwar Mahesh } 2041*065714dcSSiddheshwar Mahesh 2042*065714dcSSiddheshwar Mahesh /* 20437c478bd9Sstevel@tonic-gate * Wait for send completion notification. Only on receiving a 20447c478bd9Sstevel@tonic-gate * notification be it a successful or error completion, free the 20457c478bd9Sstevel@tonic-gate * send_wid. 20467c478bd9Sstevel@tonic-gate */ 20477c478bd9Sstevel@tonic-gate static rdma_stat 20487c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd) 20497c478bd9Sstevel@tonic-gate { 20507c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 20517c478bd9Sstevel@tonic-gate rdma_stat error = RDMA_SUCCESS; 20527c478bd9Sstevel@tonic-gate int i; 20537c478bd9Sstevel@tonic-gate 20547c478bd9Sstevel@tonic-gate /* 20557c478bd9Sstevel@tonic-gate * Wait for send to complete 20567c478bd9Sstevel@tonic-gate */ 20577c478bd9Sstevel@tonic-gate ASSERT(wd != NULL); 20587c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 20597c478bd9Sstevel@tonic-gate if (wd->status == (uint_t)SEND_WAIT) { 20607c478bd9Sstevel@tonic-gate timout = drv_usectohz(SEND_WAIT_TIME * 1000000) + 20617c478bd9Sstevel@tonic-gate ddi_get_lbolt(); 20620a701b1eSRobert Gordon 20637c478bd9Sstevel@tonic-gate if (qp->mode == RIB_SERVER) { 20647c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait(&wd->wait_cv, 20657c478bd9Sstevel@tonic-gate &wd->sendwait_lock, timout)) > 0 && 20667c478bd9Sstevel@tonic-gate wd->status == (uint_t)SEND_WAIT) 20677c478bd9Sstevel@tonic-gate ; 20687c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 20697c478bd9Sstevel@tonic-gate case -1: /* timeout */ 20700a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__srvsendwait__timeout); 20710a701b1eSRobert Gordon 20727c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20737c478bd9Sstevel@tonic-gate error = RDMA_TIMEDOUT; 20747c478bd9Sstevel@tonic-gate break; 20757c478bd9Sstevel@tonic-gate default: /* got send completion */ 20767c478bd9Sstevel@tonic-gate break; 20777c478bd9Sstevel@tonic-gate } 20787c478bd9Sstevel@tonic-gate } else { 20797c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv, 20807c478bd9Sstevel@tonic-gate &wd->sendwait_lock, timout)) > 0 && 20817c478bd9Sstevel@tonic-gate wd->status == (uint_t)SEND_WAIT) 20827c478bd9Sstevel@tonic-gate ; 20837c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 20847c478bd9Sstevel@tonic-gate case -1: /* timeout */ 20850a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__clntsendwait__timeout); 20860a701b1eSRobert Gordon 20877c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20887c478bd9Sstevel@tonic-gate error = RDMA_TIMEDOUT; 20897c478bd9Sstevel@tonic-gate break; 20907c478bd9Sstevel@tonic-gate case 0: /* interrupted */ 20910a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__clntsendwait__intr); 20920a701b1eSRobert Gordon 20937c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20947c478bd9Sstevel@tonic-gate error = RDMA_INTR; 20957c478bd9Sstevel@tonic-gate break; 20967c478bd9Sstevel@tonic-gate default: /* got send completion */ 20977c478bd9Sstevel@tonic-gate break; 20987c478bd9Sstevel@tonic-gate } 20997c478bd9Sstevel@tonic-gate } 21007c478bd9Sstevel@tonic-gate } 21017c478bd9Sstevel@tonic-gate 21027c478bd9Sstevel@tonic-gate if (wd->status != (uint_t)SEND_WAIT) { 21037c478bd9Sstevel@tonic-gate /* got send completion */ 21047c478bd9Sstevel@tonic-gate if (wd->status != RDMA_SUCCESS) { 2105*065714dcSSiddheshwar Mahesh switch (wd->status) { 2106*065714dcSSiddheshwar Mahesh case RDMA_CONNLOST: 2107f837ee4aSSiddheshwar Mahesh error = RDMA_CONNLOST; 2108*065714dcSSiddheshwar Mahesh break; 2109*065714dcSSiddheshwar Mahesh default: 2110*065714dcSSiddheshwar Mahesh error = RDMA_FAILED; 2111*065714dcSSiddheshwar Mahesh break; 2112f837ee4aSSiddheshwar Mahesh } 21137c478bd9Sstevel@tonic-gate } 21147c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 21157c478bd9Sstevel@tonic-gate rib_rbuf_free(qptoc(qp), SEND_BUFFER, 211611606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 21177c478bd9Sstevel@tonic-gate } 2118*065714dcSSiddheshwar Mahesh 2119*065714dcSSiddheshwar Mahesh rib_send_rele(qp); 2120*065714dcSSiddheshwar Mahesh 21217c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 21227c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 2123*065714dcSSiddheshwar Mahesh 21247c478bd9Sstevel@tonic-gate } else { 21257c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 21267c478bd9Sstevel@tonic-gate } 21277c478bd9Sstevel@tonic-gate return (error); 21287c478bd9Sstevel@tonic-gate } 21297c478bd9Sstevel@tonic-gate 21307c478bd9Sstevel@tonic-gate static struct send_wid * 21317c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp) 21327c478bd9Sstevel@tonic-gate { 21337c478bd9Sstevel@tonic-gate struct send_wid *wd; 21347c478bd9Sstevel@tonic-gate 21357c478bd9Sstevel@tonic-gate wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP); 21367c478bd9Sstevel@tonic-gate wd->xid = xid; 21377c478bd9Sstevel@tonic-gate wd->cv_sig = cv_sig; 21387c478bd9Sstevel@tonic-gate wd->qp = qp; 21397c478bd9Sstevel@tonic-gate cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL); 21407c478bd9Sstevel@tonic-gate mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL); 21417c478bd9Sstevel@tonic-gate wd->status = (uint_t)SEND_WAIT; 21427c478bd9Sstevel@tonic-gate 21437c478bd9Sstevel@tonic-gate return (wd); 21447c478bd9Sstevel@tonic-gate } 21457c478bd9Sstevel@tonic-gate 21467c478bd9Sstevel@tonic-gate static int 21477c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc) 21487c478bd9Sstevel@tonic-gate { 21497c478bd9Sstevel@tonic-gate cv_destroy(&wdesc->wait_cv); 21507c478bd9Sstevel@tonic-gate mutex_destroy(&wdesc->sendwait_lock); 21517c478bd9Sstevel@tonic-gate kmem_free(wdesc, sizeof (*wdesc)); 21527c478bd9Sstevel@tonic-gate 21537c478bd9Sstevel@tonic-gate return (0); 21547c478bd9Sstevel@tonic-gate } 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate static rdma_stat 21577c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep) 21587c478bd9Sstevel@tonic-gate { 21597c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 21607c478bd9Sstevel@tonic-gate if (rep != NULL) { 21617c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, rep); 21627c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 21637c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 21647c478bd9Sstevel@tonic-gate } 21657c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 21667c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 21677c478bd9Sstevel@tonic-gate } 21687c478bd9Sstevel@tonic-gate 21697c478bd9Sstevel@tonic-gate /* 21707c478bd9Sstevel@tonic-gate * Send buffers are freed here only in case of error in posting 21717c478bd9Sstevel@tonic-gate * on QP. If the post succeeded, the send buffers are freed upon 21727c478bd9Sstevel@tonic-gate * send completion in rib_sendwait() or in the scq_handler. 21737c478bd9Sstevel@tonic-gate */ 21747c478bd9Sstevel@tonic-gate rdma_stat 21757c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid, 21760a701b1eSRobert Gordon int send_sig, int cv_sig, caddr_t *swid) 21777c478bd9Sstevel@tonic-gate { 21787c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 21797c478bd9Sstevel@tonic-gate struct clist *clp; 21807c478bd9Sstevel@tonic-gate ibt_status_t ibt_status = IBT_SUCCESS; 21817c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 21827c478bd9Sstevel@tonic-gate ibt_send_wr_t tx_wr; 21837c478bd9Sstevel@tonic-gate int i, nds; 21847c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 21857c478bd9Sstevel@tonic-gate uint_t total_msg_size; 21860a701b1eSRobert Gordon rib_qp_t *qp; 21870a701b1eSRobert Gordon 21880a701b1eSRobert Gordon qp = ctoqp(conn); 21897c478bd9Sstevel@tonic-gate 21907c478bd9Sstevel@tonic-gate ASSERT(cl != NULL); 21917c478bd9Sstevel@tonic-gate 21927c478bd9Sstevel@tonic-gate bzero(&tx_wr, sizeof (ibt_send_wr_t)); 21937c478bd9Sstevel@tonic-gate 21947c478bd9Sstevel@tonic-gate nds = 0; 21957c478bd9Sstevel@tonic-gate total_msg_size = 0; 21967c478bd9Sstevel@tonic-gate clp = cl; 21977c478bd9Sstevel@tonic-gate while (clp != NULL) { 21987c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 21990a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded); 22007c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 22017c478bd9Sstevel@tonic-gate } 22020a701b1eSRobert Gordon sgl[nds].ds_va = clp->w.c_saddr; 22037c478bd9Sstevel@tonic-gate sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */ 22047c478bd9Sstevel@tonic-gate sgl[nds].ds_len = clp->c_len; 22057c478bd9Sstevel@tonic-gate total_msg_size += clp->c_len; 22067c478bd9Sstevel@tonic-gate clp = clp->c_next; 22077c478bd9Sstevel@tonic-gate nds++; 22087c478bd9Sstevel@tonic-gate } 22097c478bd9Sstevel@tonic-gate 22107c478bd9Sstevel@tonic-gate if (send_sig) { 22117c478bd9Sstevel@tonic-gate /* Set SEND_SIGNAL flag. */ 22127c478bd9Sstevel@tonic-gate tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 22137c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(msgid, cv_sig, qp); 22140a701b1eSRobert Gordon *swid = (caddr_t)wdesc; 2215*065714dcSSiddheshwar Mahesh tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 2216*065714dcSSiddheshwar Mahesh mutex_enter(&wdesc->sendwait_lock); 22177c478bd9Sstevel@tonic-gate wdesc->nsbufs = nds; 22187c478bd9Sstevel@tonic-gate for (i = 0; i < nds; i++) { 22197c478bd9Sstevel@tonic-gate wdesc->sbufaddr[i] = sgl[i].ds_va; 22207c478bd9Sstevel@tonic-gate } 2221*065714dcSSiddheshwar Mahesh } else { 2222*065714dcSSiddheshwar Mahesh tx_wr.wr_flags = IBT_WR_NO_FLAGS; 2223*065714dcSSiddheshwar Mahesh *swid = NULL; 2224*065714dcSSiddheshwar Mahesh tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID; 2225*065714dcSSiddheshwar Mahesh } 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate tx_wr.wr_opcode = IBT_WRC_SEND; 22287c478bd9Sstevel@tonic-gate tx_wr.wr_trans = IBT_RC_SRV; 22297c478bd9Sstevel@tonic-gate tx_wr.wr_nds = nds; 22307c478bd9Sstevel@tonic-gate tx_wr.wr_sgl = sgl; 22317c478bd9Sstevel@tonic-gate 22327c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 22330a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 22347c478bd9Sstevel@tonic-gate ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 22357c478bd9Sstevel@tonic-gate } 22360a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 22377c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 22380a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 22390a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 22407c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 2241*065714dcSSiddheshwar Mahesh if (send_sig) { 22427c478bd9Sstevel@tonic-gate for (i = 0; i < nds; i++) { 22437c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, SEND_BUFFER, 224411606941Sjwahlig (void *)(uintptr_t)wdesc->sbufaddr[i]); 22457c478bd9Sstevel@tonic-gate } 2246*065714dcSSiddheshwar Mahesh mutex_exit(&wdesc->sendwait_lock); 22477c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2248*065714dcSSiddheshwar Mahesh } 22490a701b1eSRobert Gordon return (RDMA_CONNLOST); 22507c478bd9Sstevel@tonic-gate } 2251*065714dcSSiddheshwar Mahesh 22527c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 22537c478bd9Sstevel@tonic-gate 22547c478bd9Sstevel@tonic-gate if (send_sig) { 2255*065714dcSSiddheshwar Mahesh rib_send_hold(qp); 2256*065714dcSSiddheshwar Mahesh mutex_exit(&wdesc->sendwait_lock); 22577c478bd9Sstevel@tonic-gate if (cv_sig) { 22587c478bd9Sstevel@tonic-gate /* 22597c478bd9Sstevel@tonic-gate * cv_wait for send to complete. 22607c478bd9Sstevel@tonic-gate * We can fail due to a timeout or signal or 22617c478bd9Sstevel@tonic-gate * unsuccessful send. 22627c478bd9Sstevel@tonic-gate */ 22637c478bd9Sstevel@tonic-gate ret = rib_sendwait(qp, wdesc); 22640a701b1eSRobert Gordon 22657c478bd9Sstevel@tonic-gate return (ret); 22667c478bd9Sstevel@tonic-gate } 22677c478bd9Sstevel@tonic-gate } 22687c478bd9Sstevel@tonic-gate 22697c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 22707c478bd9Sstevel@tonic-gate } 22717c478bd9Sstevel@tonic-gate 22720a701b1eSRobert Gordon 22737c478bd9Sstevel@tonic-gate rdma_stat 22747c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid) 22757c478bd9Sstevel@tonic-gate { 22767c478bd9Sstevel@tonic-gate rdma_stat ret; 22770a701b1eSRobert Gordon caddr_t wd; 22787c478bd9Sstevel@tonic-gate 22797c478bd9Sstevel@tonic-gate /* send-wait & cv_signal */ 22800a701b1eSRobert Gordon ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 22817c478bd9Sstevel@tonic-gate return (ret); 22827c478bd9Sstevel@tonic-gate } 22837c478bd9Sstevel@tonic-gate 22847c478bd9Sstevel@tonic-gate /* 2285*065714dcSSiddheshwar Mahesh * Deprecated/obsolete interface not used currently 2286*065714dcSSiddheshwar Mahesh * but earlier used for READ-READ protocol. 22877c478bd9Sstevel@tonic-gate * Send RPC reply and wait for RDMA_DONE. 22887c478bd9Sstevel@tonic-gate */ 22897c478bd9Sstevel@tonic-gate rdma_stat 22907c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid) 22917c478bd9Sstevel@tonic-gate { 22927c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 22937c478bd9Sstevel@tonic-gate struct rdma_done_list *rd; 22947c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 22950a701b1eSRobert Gordon caddr_t *wid = NULL; 22967c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 22977c478bd9Sstevel@tonic-gate 22987c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 22997c478bd9Sstevel@tonic-gate rd = rdma_done_add(qp, msgid); 23007c478bd9Sstevel@tonic-gate 23017c478bd9Sstevel@tonic-gate /* No cv_signal (whether send-wait or no-send-wait) */ 23020a701b1eSRobert Gordon ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid); 23037c478bd9Sstevel@tonic-gate 23040a701b1eSRobert Gordon if (ret != RDMA_SUCCESS) { 23050a701b1eSRobert Gordon rdma_done_rm(qp, rd); 23060a701b1eSRobert Gordon } else { 23077c478bd9Sstevel@tonic-gate /* 23087c478bd9Sstevel@tonic-gate * Wait for RDMA_DONE from remote end 23097c478bd9Sstevel@tonic-gate */ 23100a701b1eSRobert Gordon timout = 23110a701b1eSRobert Gordon drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt(); 23120a701b1eSRobert Gordon cv_wait_ret = cv_timedwait(&rd->rdma_done_cv, 23130a701b1eSRobert Gordon &qp->rdlist_lock, 23147c478bd9Sstevel@tonic-gate timout); 23150a701b1eSRobert Gordon 23167c478bd9Sstevel@tonic-gate rdma_done_rm(qp, rd); 23170a701b1eSRobert Gordon 23187c478bd9Sstevel@tonic-gate if (cv_wait_ret < 0) { 23197c478bd9Sstevel@tonic-gate ret = RDMA_TIMEDOUT; 23200a701b1eSRobert Gordon } 23217c478bd9Sstevel@tonic-gate } 23227c478bd9Sstevel@tonic-gate 23237c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 23247c478bd9Sstevel@tonic-gate return (ret); 23257c478bd9Sstevel@tonic-gate } 23267c478bd9Sstevel@tonic-gate 23277c478bd9Sstevel@tonic-gate static struct recv_wid * 23287c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid) 23297c478bd9Sstevel@tonic-gate { 23307c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 23317c478bd9Sstevel@tonic-gate 23327c478bd9Sstevel@tonic-gate rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP); 23337c478bd9Sstevel@tonic-gate rwid->xid = msgid; 23347c478bd9Sstevel@tonic-gate rwid->addr = sgl->ds_va; 23357c478bd9Sstevel@tonic-gate rwid->qp = qp; 23367c478bd9Sstevel@tonic-gate 23377c478bd9Sstevel@tonic-gate return (rwid); 23387c478bd9Sstevel@tonic-gate } 23397c478bd9Sstevel@tonic-gate 23407c478bd9Sstevel@tonic-gate static void 23417c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid) 23427c478bd9Sstevel@tonic-gate { 23437c478bd9Sstevel@tonic-gate kmem_free(rwid, sizeof (struct recv_wid)); 23447c478bd9Sstevel@tonic-gate } 23457c478bd9Sstevel@tonic-gate 23467c478bd9Sstevel@tonic-gate rdma_stat 23477c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid) 23487c478bd9Sstevel@tonic-gate { 23497c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 23507c478bd9Sstevel@tonic-gate struct clist *clp = cl; 23517c478bd9Sstevel@tonic-gate struct reply *rep; 23527c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 23537c478bd9Sstevel@tonic-gate int nds; 23547c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 23557c478bd9Sstevel@tonic-gate ibt_recv_wr_t recv_wr; 23567c478bd9Sstevel@tonic-gate rdma_stat ret; 23577c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 23587c478bd9Sstevel@tonic-gate 23597c478bd9Sstevel@tonic-gate /* 23607c478bd9Sstevel@tonic-gate * rdma_clnt_postrecv uses RECV_BUFFER. 23617c478bd9Sstevel@tonic-gate */ 23627c478bd9Sstevel@tonic-gate 23637c478bd9Sstevel@tonic-gate nds = 0; 23647c478bd9Sstevel@tonic-gate while (cl != NULL) { 23657c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 23667c478bd9Sstevel@tonic-gate ret = RDMA_FAILED; 23677c478bd9Sstevel@tonic-gate goto done; 23687c478bd9Sstevel@tonic-gate } 23690a701b1eSRobert Gordon sgl[nds].ds_va = cl->w.c_saddr; 23707c478bd9Sstevel@tonic-gate sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 23717c478bd9Sstevel@tonic-gate sgl[nds].ds_len = cl->c_len; 23727c478bd9Sstevel@tonic-gate cl = cl->c_next; 23737c478bd9Sstevel@tonic-gate nds++; 23747c478bd9Sstevel@tonic-gate } 23757c478bd9Sstevel@tonic-gate 23767c478bd9Sstevel@tonic-gate if (nds != 1) { 23777c478bd9Sstevel@tonic-gate ret = RDMA_FAILED; 23787c478bd9Sstevel@tonic-gate goto done; 23797c478bd9Sstevel@tonic-gate } 23800a701b1eSRobert Gordon 23817c478bd9Sstevel@tonic-gate bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 23827c478bd9Sstevel@tonic-gate recv_wr.wr_nds = nds; 23837c478bd9Sstevel@tonic-gate recv_wr.wr_sgl = sgl; 23847c478bd9Sstevel@tonic-gate 23857c478bd9Sstevel@tonic-gate rwid = rib_create_wid(qp, &sgl[0], msgid); 23867c478bd9Sstevel@tonic-gate if (rwid) { 238711606941Sjwahlig recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid; 23887c478bd9Sstevel@tonic-gate } else { 23897c478bd9Sstevel@tonic-gate ret = RDMA_NORESOURCE; 23907c478bd9Sstevel@tonic-gate goto done; 23917c478bd9Sstevel@tonic-gate } 23927c478bd9Sstevel@tonic-gate rep = rib_addreplylist(qp, msgid); 23937c478bd9Sstevel@tonic-gate if (!rep) { 23947c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 23957c478bd9Sstevel@tonic-gate ret = RDMA_NORESOURCE; 23967c478bd9Sstevel@tonic-gate goto done; 23977c478bd9Sstevel@tonic-gate } 23987c478bd9Sstevel@tonic-gate 23997c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 24000a701b1eSRobert Gordon 24010a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 24027c478bd9Sstevel@tonic-gate ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 24037c478bd9Sstevel@tonic-gate } 24040a701b1eSRobert Gordon 24050a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 24067c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 24070a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 24080a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 24097c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 24107c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 24117c478bd9Sstevel@tonic-gate (void) rib_rem_rep(qp, rep); 24120a701b1eSRobert Gordon ret = RDMA_CONNLOST; 24137c478bd9Sstevel@tonic-gate goto done; 24147c478bd9Sstevel@tonic-gate } 24157c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 24167c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 24177c478bd9Sstevel@tonic-gate 24187c478bd9Sstevel@tonic-gate done: 24197c478bd9Sstevel@tonic-gate while (clp != NULL) { 24200a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 24210a701b1eSRobert Gordon (void *)(uintptr_t)clp->w.c_saddr3); 24227c478bd9Sstevel@tonic-gate clp = clp->c_next; 24237c478bd9Sstevel@tonic-gate } 24247c478bd9Sstevel@tonic-gate return (ret); 24257c478bd9Sstevel@tonic-gate } 24267c478bd9Sstevel@tonic-gate 24277c478bd9Sstevel@tonic-gate rdma_stat 24287c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl) 24297c478bd9Sstevel@tonic-gate { 24307c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 24317c478bd9Sstevel@tonic-gate struct svc_recv *s_recvp; 24327c478bd9Sstevel@tonic-gate int nds; 24337c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 24347c478bd9Sstevel@tonic-gate ibt_recv_wr_t recv_wr; 24357c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 24367c478bd9Sstevel@tonic-gate 24377c478bd9Sstevel@tonic-gate nds = 0; 24387c478bd9Sstevel@tonic-gate while (cl != NULL) { 24397c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 24407c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 24417c478bd9Sstevel@tonic-gate } 24420a701b1eSRobert Gordon sgl[nds].ds_va = cl->w.c_saddr; 24437c478bd9Sstevel@tonic-gate sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 24447c478bd9Sstevel@tonic-gate sgl[nds].ds_len = cl->c_len; 24457c478bd9Sstevel@tonic-gate cl = cl->c_next; 24467c478bd9Sstevel@tonic-gate nds++; 24477c478bd9Sstevel@tonic-gate } 24487c478bd9Sstevel@tonic-gate 24497c478bd9Sstevel@tonic-gate if (nds != 1) { 24500a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 24510a701b1eSRobert Gordon (caddr_t)(uintptr_t)sgl[0].ds_va); 24520a701b1eSRobert Gordon 24537c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 24547c478bd9Sstevel@tonic-gate } 24550a701b1eSRobert Gordon 24567c478bd9Sstevel@tonic-gate bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 24577c478bd9Sstevel@tonic-gate recv_wr.wr_nds = nds; 24587c478bd9Sstevel@tonic-gate recv_wr.wr_sgl = sgl; 24597c478bd9Sstevel@tonic-gate 24607c478bd9Sstevel@tonic-gate s_recvp = rib_init_svc_recv(qp, &sgl[0]); 246111606941Sjwahlig /* Use s_recvp's addr as wr id */ 246211606941Sjwahlig recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp; 24637c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 24640a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 24657c478bd9Sstevel@tonic-gate ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 24667c478bd9Sstevel@tonic-gate } 24670a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 24687c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 24690a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 24700a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 24717c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 247211606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 247311606941Sjwahlig (caddr_t)(uintptr_t)sgl[0].ds_va); 24747c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 24750a701b1eSRobert Gordon 24760a701b1eSRobert Gordon return (RDMA_CONNLOST); 24777c478bd9Sstevel@tonic-gate } 24787c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 24797c478bd9Sstevel@tonic-gate 24807c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 24817c478bd9Sstevel@tonic-gate } 24827c478bd9Sstevel@tonic-gate 24837c478bd9Sstevel@tonic-gate /* Client */ 24847c478bd9Sstevel@tonic-gate rdma_stat 24857c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid) 24867c478bd9Sstevel@tonic-gate { 24877c478bd9Sstevel@tonic-gate 24887c478bd9Sstevel@tonic-gate return (rib_clnt_post(conn, cl, msgid)); 24897c478bd9Sstevel@tonic-gate } 24907c478bd9Sstevel@tonic-gate 24910a701b1eSRobert Gordon /* Client */ 24920a701b1eSRobert Gordon rdma_stat 24930a701b1eSRobert Gordon rib_post_resp_remove(CONN* conn, uint32_t msgid) 24940a701b1eSRobert Gordon { 24950a701b1eSRobert Gordon rib_qp_t *qp = ctoqp(conn); 24960a701b1eSRobert Gordon struct reply *rep; 24970a701b1eSRobert Gordon 24980a701b1eSRobert Gordon mutex_enter(&qp->replylist_lock); 24990a701b1eSRobert Gordon for (rep = qp->replylist; rep != NULL; rep = rep->next) { 25000a701b1eSRobert Gordon if (rep->xid == msgid) { 25010a701b1eSRobert Gordon if (rep->vaddr_cq) { 25020a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 25030a701b1eSRobert Gordon (caddr_t)(uintptr_t)rep->vaddr_cq); 25040a701b1eSRobert Gordon } 25050a701b1eSRobert Gordon (void) rib_remreply(qp, rep); 25060a701b1eSRobert Gordon break; 25070a701b1eSRobert Gordon } 25080a701b1eSRobert Gordon } 25090a701b1eSRobert Gordon mutex_exit(&qp->replylist_lock); 25100a701b1eSRobert Gordon 25110a701b1eSRobert Gordon return (RDMA_SUCCESS); 25120a701b1eSRobert Gordon } 25130a701b1eSRobert Gordon 25147c478bd9Sstevel@tonic-gate /* Server */ 25157c478bd9Sstevel@tonic-gate rdma_stat 25167c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl) 25177c478bd9Sstevel@tonic-gate { 25187c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate if (rib_svc_post(conn, cl) == RDMA_SUCCESS) { 25217c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 25227c478bd9Sstevel@tonic-gate qp->n_posted_rbufs++; 25237c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 25247c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 25257c478bd9Sstevel@tonic-gate } 25267c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 25277c478bd9Sstevel@tonic-gate } 25287c478bd9Sstevel@tonic-gate 25297c478bd9Sstevel@tonic-gate /* 25307c478bd9Sstevel@tonic-gate * Client side only interface to "recv" the rpc reply buf 25317c478bd9Sstevel@tonic-gate * posted earlier by rib_post_resp(conn, cl, msgid). 25327c478bd9Sstevel@tonic-gate */ 25337c478bd9Sstevel@tonic-gate rdma_stat 25347c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid) 25357c478bd9Sstevel@tonic-gate { 25367c478bd9Sstevel@tonic-gate struct reply *rep = NULL; 25377c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 25387c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 25397c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 25407c478bd9Sstevel@tonic-gate 25417c478bd9Sstevel@tonic-gate /* 25427c478bd9Sstevel@tonic-gate * Find the reply structure for this msgid 25437c478bd9Sstevel@tonic-gate */ 25447c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 25457c478bd9Sstevel@tonic-gate 25467c478bd9Sstevel@tonic-gate for (rep = qp->replylist; rep != NULL; rep = rep->next) { 25477c478bd9Sstevel@tonic-gate if (rep->xid == msgid) 25487c478bd9Sstevel@tonic-gate break; 25497c478bd9Sstevel@tonic-gate } 25500a701b1eSRobert Gordon 25517c478bd9Sstevel@tonic-gate if (rep != NULL) { 25527c478bd9Sstevel@tonic-gate /* 25537c478bd9Sstevel@tonic-gate * If message not yet received, wait. 25547c478bd9Sstevel@tonic-gate */ 25557c478bd9Sstevel@tonic-gate if (rep->status == (uint_t)REPLY_WAIT) { 25567c478bd9Sstevel@tonic-gate timout = ddi_get_lbolt() + 25577c478bd9Sstevel@tonic-gate drv_usectohz(REPLY_WAIT_TIME * 1000000); 25580a701b1eSRobert Gordon 25597c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv, 25607c478bd9Sstevel@tonic-gate &qp->replylist_lock, timout)) > 0 && 25610a701b1eSRobert Gordon rep->status == (uint_t)REPLY_WAIT) 25620a701b1eSRobert Gordon ; 25637c478bd9Sstevel@tonic-gate 25647c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 25657c478bd9Sstevel@tonic-gate case -1: /* timeout */ 25667c478bd9Sstevel@tonic-gate ret = RDMA_TIMEDOUT; 25677c478bd9Sstevel@tonic-gate break; 25687c478bd9Sstevel@tonic-gate case 0: 25697c478bd9Sstevel@tonic-gate ret = RDMA_INTR; 25707c478bd9Sstevel@tonic-gate break; 25717c478bd9Sstevel@tonic-gate default: 25727c478bd9Sstevel@tonic-gate break; 25737c478bd9Sstevel@tonic-gate } 25747c478bd9Sstevel@tonic-gate } 25757c478bd9Sstevel@tonic-gate 25767c478bd9Sstevel@tonic-gate if (rep->status == RDMA_SUCCESS) { 25777c478bd9Sstevel@tonic-gate struct clist *cl = NULL; 25787c478bd9Sstevel@tonic-gate 25797c478bd9Sstevel@tonic-gate /* 25807c478bd9Sstevel@tonic-gate * Got message successfully 25817c478bd9Sstevel@tonic-gate */ 25827c478bd9Sstevel@tonic-gate clist_add(&cl, 0, rep->bytes_xfer, NULL, 258311606941Sjwahlig (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL); 25847c478bd9Sstevel@tonic-gate *clp = cl; 25857c478bd9Sstevel@tonic-gate } else { 25867c478bd9Sstevel@tonic-gate if (rep->status != (uint_t)REPLY_WAIT) { 25877c478bd9Sstevel@tonic-gate /* 25887c478bd9Sstevel@tonic-gate * Got error in reply message. Free 25897c478bd9Sstevel@tonic-gate * recv buffer here. 25907c478bd9Sstevel@tonic-gate */ 25917c478bd9Sstevel@tonic-gate ret = rep->status; 25927c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 259311606941Sjwahlig (caddr_t)(uintptr_t)rep->vaddr_cq); 25947c478bd9Sstevel@tonic-gate } 25957c478bd9Sstevel@tonic-gate } 25967c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, rep); 25977c478bd9Sstevel@tonic-gate } else { 25987c478bd9Sstevel@tonic-gate /* 25997c478bd9Sstevel@tonic-gate * No matching reply structure found for given msgid on the 26007c478bd9Sstevel@tonic-gate * reply wait list. 26017c478bd9Sstevel@tonic-gate */ 26027c478bd9Sstevel@tonic-gate ret = RDMA_INVAL; 26030a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__nomatchxid2); 26047c478bd9Sstevel@tonic-gate } 26057c478bd9Sstevel@tonic-gate 26067c478bd9Sstevel@tonic-gate /* 26077c478bd9Sstevel@tonic-gate * Done. 26087c478bd9Sstevel@tonic-gate */ 26097c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 26107c478bd9Sstevel@tonic-gate return (ret); 26117c478bd9Sstevel@tonic-gate } 26127c478bd9Sstevel@tonic-gate 26137c478bd9Sstevel@tonic-gate /* 26147c478bd9Sstevel@tonic-gate * RDMA write a buffer to the remote address. 26157c478bd9Sstevel@tonic-gate */ 26167c478bd9Sstevel@tonic-gate rdma_stat 26177c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait) 26187c478bd9Sstevel@tonic-gate { 26197c478bd9Sstevel@tonic-gate ibt_send_wr_t tx_wr; 26207c478bd9Sstevel@tonic-gate int cv_sig; 26217c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 26227c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 26237c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 26247c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 26257c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 26260a701b1eSRobert Gordon uint64_t n_writes = 0; 26277c478bd9Sstevel@tonic-gate 26287c478bd9Sstevel@tonic-gate if (cl == NULL) { 26297c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 26307c478bd9Sstevel@tonic-gate } 26317c478bd9Sstevel@tonic-gate 26320a701b1eSRobert Gordon while ((cl != NULL)) { 26330a701b1eSRobert Gordon if (cl->c_len > 0) { 26347c478bd9Sstevel@tonic-gate bzero(&tx_wr, sizeof (ibt_send_wr_t)); 26350a701b1eSRobert Gordon tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr; 26360a701b1eSRobert Gordon tx_wr.wr.rc.rcwr.rdma.rdma_rkey = 26370a701b1eSRobert Gordon cl->c_dmemhandle.mrc_rmr; /* rkey */ 26380a701b1eSRobert Gordon sgl[0].ds_va = cl->w.c_saddr; 26390a701b1eSRobert Gordon sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 26400a701b1eSRobert Gordon sgl[0].ds_len = cl->c_len; 26417c478bd9Sstevel@tonic-gate 26427c478bd9Sstevel@tonic-gate if (wait) { 26437c478bd9Sstevel@tonic-gate cv_sig = 1; 26447c478bd9Sstevel@tonic-gate } else { 26450a701b1eSRobert Gordon if (n_writes > max_unsignaled_rws) { 26460a701b1eSRobert Gordon n_writes = 0; 26470a701b1eSRobert Gordon cv_sig = 1; 26480a701b1eSRobert Gordon } else { 26497c478bd9Sstevel@tonic-gate cv_sig = 0; 26507c478bd9Sstevel@tonic-gate } 26510a701b1eSRobert Gordon } 26527c478bd9Sstevel@tonic-gate 2653*065714dcSSiddheshwar Mahesh if (cv_sig) { 2654*065714dcSSiddheshwar Mahesh tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 26557c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(0, cv_sig, qp); 265611606941Sjwahlig tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 2657*065714dcSSiddheshwar Mahesh mutex_enter(&wdesc->sendwait_lock); 2658*065714dcSSiddheshwar Mahesh } else { 2659*065714dcSSiddheshwar Mahesh tx_wr.wr_flags = IBT_WR_NO_FLAGS; 2660*065714dcSSiddheshwar Mahesh tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID; 2661*065714dcSSiddheshwar Mahesh } 26627c478bd9Sstevel@tonic-gate tx_wr.wr_opcode = IBT_WRC_RDMAW; 26637c478bd9Sstevel@tonic-gate tx_wr.wr_trans = IBT_RC_SRV; 26640a701b1eSRobert Gordon tx_wr.wr_nds = 1; 26657c478bd9Sstevel@tonic-gate tx_wr.wr_sgl = sgl; 26667c478bd9Sstevel@tonic-gate 26677c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 26680a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 26690a701b1eSRobert Gordon ibt_status = 26700a701b1eSRobert Gordon ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 26717c478bd9Sstevel@tonic-gate } 26720a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 26737c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 26740a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 26750a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 26767c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 2677*065714dcSSiddheshwar Mahesh if (cv_sig) { 2678*065714dcSSiddheshwar Mahesh mutex_exit(&wdesc->sendwait_lock); 26797c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2680*065714dcSSiddheshwar Mahesh } 26810a701b1eSRobert Gordon return (RDMA_CONNLOST); 26827c478bd9Sstevel@tonic-gate } 2683*065714dcSSiddheshwar Mahesh 26847c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate /* 26877c478bd9Sstevel@tonic-gate * Wait for send to complete 26887c478bd9Sstevel@tonic-gate */ 2689*065714dcSSiddheshwar Mahesh if (cv_sig) { 2690*065714dcSSiddheshwar Mahesh 2691*065714dcSSiddheshwar Mahesh rib_send_hold(qp); 26920a701b1eSRobert Gordon mutex_exit(&wdesc->sendwait_lock); 2693*065714dcSSiddheshwar Mahesh 2694*065714dcSSiddheshwar Mahesh ret = rib_sendwait(qp, wdesc); 2695*065714dcSSiddheshwar Mahesh if (ret != 0) 2696*065714dcSSiddheshwar Mahesh return (ret); 26970a701b1eSRobert Gordon } 26980a701b1eSRobert Gordon n_writes ++; 26990a701b1eSRobert Gordon } 27000a701b1eSRobert Gordon cl = cl->c_next; 27017c478bd9Sstevel@tonic-gate } 27027c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 27037c478bd9Sstevel@tonic-gate } 27047c478bd9Sstevel@tonic-gate 27057c478bd9Sstevel@tonic-gate /* 27067c478bd9Sstevel@tonic-gate * RDMA Read a buffer from the remote address. 27077c478bd9Sstevel@tonic-gate */ 27087c478bd9Sstevel@tonic-gate rdma_stat 27097c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait) 27107c478bd9Sstevel@tonic-gate { 27117c478bd9Sstevel@tonic-gate ibt_send_wr_t rx_wr; 2712*065714dcSSiddheshwar Mahesh int cv_sig = 0; 27130a701b1eSRobert Gordon ibt_wr_ds_t sgl; 27147c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 27157c478bd9Sstevel@tonic-gate ibt_status_t ibt_status = IBT_SUCCESS; 27167c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 27177c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 27187c478bd9Sstevel@tonic-gate 27197c478bd9Sstevel@tonic-gate if (cl == NULL) { 27207c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 27217c478bd9Sstevel@tonic-gate } 27227c478bd9Sstevel@tonic-gate 27230a701b1eSRobert Gordon while (cl != NULL) { 27247c478bd9Sstevel@tonic-gate bzero(&rx_wr, sizeof (ibt_send_wr_t)); 27257c478bd9Sstevel@tonic-gate /* 27267c478bd9Sstevel@tonic-gate * Remote address is at the head chunk item in list. 27277c478bd9Sstevel@tonic-gate */ 27280a701b1eSRobert Gordon rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr; 27290a701b1eSRobert Gordon rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr; 27307c478bd9Sstevel@tonic-gate 27310a701b1eSRobert Gordon sgl.ds_va = cl->u.c_daddr; 27320a701b1eSRobert Gordon sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */ 27330a701b1eSRobert Gordon sgl.ds_len = cl->c_len; 27347c478bd9Sstevel@tonic-gate 2735*065714dcSSiddheshwar Mahesh /* 2736*065714dcSSiddheshwar Mahesh * If there are multiple chunks to be read, and 2737*065714dcSSiddheshwar Mahesh * wait is set, ask for signal only for the last chunk 2738*065714dcSSiddheshwar Mahesh * and wait only on the last chunk. The completion of 2739*065714dcSSiddheshwar Mahesh * RDMA_READ on last chunk ensures that reads on all 2740*065714dcSSiddheshwar Mahesh * previous chunks are also completed. 2741*065714dcSSiddheshwar Mahesh */ 2742*065714dcSSiddheshwar Mahesh if (wait && (cl->c_next == NULL)) { 27437c478bd9Sstevel@tonic-gate cv_sig = 1; 2744*065714dcSSiddheshwar Mahesh wdesc = rib_init_sendwait(0, cv_sig, qp); 2745*065714dcSSiddheshwar Mahesh rx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 2746*065714dcSSiddheshwar Mahesh rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 2747*065714dcSSiddheshwar Mahesh mutex_enter(&wdesc->sendwait_lock); 27487c478bd9Sstevel@tonic-gate } else { 27497c478bd9Sstevel@tonic-gate rx_wr.wr_flags = IBT_WR_NO_FLAGS; 2750*065714dcSSiddheshwar Mahesh rx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID; 27517c478bd9Sstevel@tonic-gate } 27527c478bd9Sstevel@tonic-gate rx_wr.wr_opcode = IBT_WRC_RDMAR; 27537c478bd9Sstevel@tonic-gate rx_wr.wr_trans = IBT_RC_SRV; 27540a701b1eSRobert Gordon rx_wr.wr_nds = 1; 27550a701b1eSRobert Gordon rx_wr.wr_sgl = &sgl; 27567c478bd9Sstevel@tonic-gate 27577c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 27580a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 27597c478bd9Sstevel@tonic-gate ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL); 27607c478bd9Sstevel@tonic-gate } 27610a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 27627c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 27630a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 27640a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 27657c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 2766*065714dcSSiddheshwar Mahesh if (wait && (cl->c_next == NULL)) { 2767*065714dcSSiddheshwar Mahesh mutex_exit(&wdesc->sendwait_lock); 27687c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2769*065714dcSSiddheshwar Mahesh } 27700a701b1eSRobert Gordon return (RDMA_CONNLOST); 27717c478bd9Sstevel@tonic-gate } 2772*065714dcSSiddheshwar Mahesh 27737c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 27747c478bd9Sstevel@tonic-gate 27757c478bd9Sstevel@tonic-gate /* 27760a701b1eSRobert Gordon * Wait for send to complete if this is the 27770a701b1eSRobert Gordon * last item in the list. 27787c478bd9Sstevel@tonic-gate */ 27790a701b1eSRobert Gordon if (wait && cl->c_next == NULL) { 2780*065714dcSSiddheshwar Mahesh rib_send_hold(qp); 27810a701b1eSRobert Gordon mutex_exit(&wdesc->sendwait_lock); 2782*065714dcSSiddheshwar Mahesh 2783*065714dcSSiddheshwar Mahesh ret = rib_sendwait(qp, wdesc); 2784*065714dcSSiddheshwar Mahesh 2785*065714dcSSiddheshwar Mahesh if (ret != 0) 2786*065714dcSSiddheshwar Mahesh return (ret); 27870a701b1eSRobert Gordon } 27880a701b1eSRobert Gordon cl = cl->c_next; 27890a701b1eSRobert Gordon } 27907c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 27917c478bd9Sstevel@tonic-gate } 27927c478bd9Sstevel@tonic-gate 27937c478bd9Sstevel@tonic-gate /* 27947c478bd9Sstevel@tonic-gate * rib_srv_cm_handler() 27957c478bd9Sstevel@tonic-gate * Connection Manager callback to handle RC connection requests. 27967c478bd9Sstevel@tonic-gate */ 27977c478bd9Sstevel@tonic-gate /* ARGSUSED */ 27987c478bd9Sstevel@tonic-gate static ibt_cm_status_t 27997c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event, 28007c478bd9Sstevel@tonic-gate ibt_cm_return_args_t *ret_args, void *priv_data, 28017c478bd9Sstevel@tonic-gate ibt_priv_data_len_t len) 28027c478bd9Sstevel@tonic-gate { 28037c478bd9Sstevel@tonic-gate queue_t *q; 28047c478bd9Sstevel@tonic-gate rib_qp_t *qp; 28057c478bd9Sstevel@tonic-gate rpcib_state_t *ribstat; 28067c478bd9Sstevel@tonic-gate rib_hca_t *hca; 28077c478bd9Sstevel@tonic-gate rdma_stat status = RDMA_SUCCESS; 28087c478bd9Sstevel@tonic-gate int i; 28097c478bd9Sstevel@tonic-gate struct clist cl; 28100a701b1eSRobert Gordon rdma_buf_t rdbuf = {0}; 28117c478bd9Sstevel@tonic-gate void *buf = NULL; 28127c478bd9Sstevel@tonic-gate CONN *conn; 28130a701b1eSRobert Gordon ibt_ip_cm_info_t ipinfo; 28140a701b1eSRobert Gordon struct sockaddr_in *s; 28150a701b1eSRobert Gordon struct sockaddr_in6 *s6; 28160a701b1eSRobert Gordon int sin_size = sizeof (struct sockaddr_in); 28170a701b1eSRobert Gordon int in_size = sizeof (struct in_addr); 28180a701b1eSRobert Gordon int sin6_size = sizeof (struct sockaddr_in6); 28197c478bd9Sstevel@tonic-gate 28207c478bd9Sstevel@tonic-gate ASSERT(any != NULL); 28217c478bd9Sstevel@tonic-gate ASSERT(event != NULL); 28227c478bd9Sstevel@tonic-gate 28237c478bd9Sstevel@tonic-gate ribstat = (rpcib_state_t *)any; 28247c478bd9Sstevel@tonic-gate hca = (rib_hca_t *)ribstat->hca; 28257c478bd9Sstevel@tonic-gate ASSERT(hca != NULL); 28267c478bd9Sstevel@tonic-gate 28277c478bd9Sstevel@tonic-gate /* got a connection request */ 28287c478bd9Sstevel@tonic-gate switch (event->cm_type) { 28297c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_REQ_RCV: 28307c478bd9Sstevel@tonic-gate /* 28317c478bd9Sstevel@tonic-gate * If the plugin is in the NO_ACCEPT state, bail out. 28327c478bd9Sstevel@tonic-gate */ 28337c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 28347c478bd9Sstevel@tonic-gate if (plugin_state == NO_ACCEPT) { 28357c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 28367c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28377c478bd9Sstevel@tonic-gate } 28387c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 28397c478bd9Sstevel@tonic-gate 28407c478bd9Sstevel@tonic-gate /* 28417c478bd9Sstevel@tonic-gate * Need to send a MRA MAD to CM so that it does not 28427c478bd9Sstevel@tonic-gate * timeout on us. 28437c478bd9Sstevel@tonic-gate */ 28447c478bd9Sstevel@tonic-gate (void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id, 28457c478bd9Sstevel@tonic-gate event->cm_event.req.req_timeout * 8, NULL, 0); 28467c478bd9Sstevel@tonic-gate 28477c478bd9Sstevel@tonic-gate mutex_enter(&rib_stat->open_hca_lock); 28487c478bd9Sstevel@tonic-gate q = rib_stat->q; 28497c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 28500a701b1eSRobert Gordon 28517c478bd9Sstevel@tonic-gate status = rib_svc_create_chan(hca, (caddr_t)q, 28527c478bd9Sstevel@tonic-gate event->cm_event.req.req_prim_hca_port, &qp); 28530a701b1eSRobert Gordon 28547c478bd9Sstevel@tonic-gate if (status) { 28557c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28567c478bd9Sstevel@tonic-gate } 28577c478bd9Sstevel@tonic-gate 28587c478bd9Sstevel@tonic-gate ret_args->cm_ret.rep.cm_channel = qp->qp_hdl; 28590a701b1eSRobert Gordon ret_args->cm_ret.rep.cm_rdma_ra_out = 4; 28600a701b1eSRobert Gordon ret_args->cm_ret.rep.cm_rdma_ra_in = 4; 28617c478bd9Sstevel@tonic-gate ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES; 28627c478bd9Sstevel@tonic-gate 28637c478bd9Sstevel@tonic-gate /* 28647c478bd9Sstevel@tonic-gate * Pre-posts RECV buffers 28657c478bd9Sstevel@tonic-gate */ 28667c478bd9Sstevel@tonic-gate conn = qptoc(qp); 28677c478bd9Sstevel@tonic-gate for (i = 0; i < preposted_rbufs; i++) { 28687c478bd9Sstevel@tonic-gate bzero(&rdbuf, sizeof (rdbuf)); 28697c478bd9Sstevel@tonic-gate rdbuf.type = RECV_BUFFER; 28707c478bd9Sstevel@tonic-gate buf = rib_rbuf_alloc(conn, &rdbuf); 28717c478bd9Sstevel@tonic-gate if (buf == NULL) { 2872*065714dcSSiddheshwar Mahesh /* 2873*065714dcSSiddheshwar Mahesh * A connection is not established yet. 2874*065714dcSSiddheshwar Mahesh * Just flush the channel. Buffers 2875*065714dcSSiddheshwar Mahesh * posted till now will error out with 2876*065714dcSSiddheshwar Mahesh * IBT_WC_WR_FLUSHED_ERR. 2877*065714dcSSiddheshwar Mahesh */ 2878*065714dcSSiddheshwar Mahesh (void) ibt_flush_channel(qp->qp_hdl); 28797c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, NULL); 28807c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28817c478bd9Sstevel@tonic-gate } 28827c478bd9Sstevel@tonic-gate 28837c478bd9Sstevel@tonic-gate bzero(&cl, sizeof (cl)); 28840a701b1eSRobert Gordon cl.w.c_saddr3 = (caddr_t)rdbuf.addr; 28857c478bd9Sstevel@tonic-gate cl.c_len = rdbuf.len; 28860a701b1eSRobert Gordon cl.c_smemhandle.mrc_lmr = 28870a701b1eSRobert Gordon rdbuf.handle.mrc_lmr; /* lkey */ 28887c478bd9Sstevel@tonic-gate cl.c_next = NULL; 28897c478bd9Sstevel@tonic-gate status = rib_post_recv(conn, &cl); 28907c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 2891*065714dcSSiddheshwar Mahesh /* 2892*065714dcSSiddheshwar Mahesh * A connection is not established yet. 2893*065714dcSSiddheshwar Mahesh * Just flush the channel. Buffers 2894*065714dcSSiddheshwar Mahesh * posted till now will error out with 2895*065714dcSSiddheshwar Mahesh * IBT_WC_WR_FLUSHED_ERR. 2896*065714dcSSiddheshwar Mahesh */ 2897*065714dcSSiddheshwar Mahesh (void) ibt_flush_channel(qp->qp_hdl); 28987c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, NULL); 28997c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 29007c478bd9Sstevel@tonic-gate } 29017c478bd9Sstevel@tonic-gate } 29027c478bd9Sstevel@tonic-gate (void) rib_add_connlist(conn, &hca->srv_conn_list); 29037c478bd9Sstevel@tonic-gate 29047c478bd9Sstevel@tonic-gate /* 29050a701b1eSRobert Gordon * Get the address translation 29067c478bd9Sstevel@tonic-gate */ 29077c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 29087c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 29097c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 29107c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 29117c478bd9Sstevel@tonic-gate } 29127c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 29137c478bd9Sstevel@tonic-gate 29140a701b1eSRobert Gordon bzero(&ipinfo, sizeof (ibt_ip_cm_info_t)); 29157c478bd9Sstevel@tonic-gate 29160a701b1eSRobert Gordon if (ibt_get_ip_data(event->cm_priv_data_len, 29170a701b1eSRobert Gordon event->cm_priv_data, 29180a701b1eSRobert Gordon &ipinfo) != IBT_SUCCESS) { 29190a701b1eSRobert Gordon 29200a701b1eSRobert Gordon return (IBT_CM_REJECT); 29210a701b1eSRobert Gordon } 29220a701b1eSRobert Gordon 29230a701b1eSRobert Gordon switch (ipinfo.src_addr.family) { 29240a701b1eSRobert Gordon case AF_INET: 29257c478bd9Sstevel@tonic-gate 29267c478bd9Sstevel@tonic-gate conn->c_raddr.maxlen = 29277c478bd9Sstevel@tonic-gate conn->c_raddr.len = sin_size; 29280a701b1eSRobert Gordon conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP); 29290a701b1eSRobert Gordon 29307c478bd9Sstevel@tonic-gate s = (struct sockaddr_in *)conn->c_raddr.buf; 29317c478bd9Sstevel@tonic-gate s->sin_family = AF_INET; 29327c478bd9Sstevel@tonic-gate 29330a701b1eSRobert Gordon bcopy((void *)&ipinfo.src_addr.un.ip4addr, 29340a701b1eSRobert Gordon &s->sin_addr, in_size); 29350a701b1eSRobert Gordon 29360a701b1eSRobert Gordon break; 29370a701b1eSRobert Gordon 29380a701b1eSRobert Gordon case AF_INET6: 29397c478bd9Sstevel@tonic-gate 29407c478bd9Sstevel@tonic-gate conn->c_raddr.maxlen = 29417c478bd9Sstevel@tonic-gate conn->c_raddr.len = sin6_size; 29420a701b1eSRobert Gordon conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP); 29437c478bd9Sstevel@tonic-gate 29447c478bd9Sstevel@tonic-gate s6 = (struct sockaddr_in6 *)conn->c_raddr.buf; 29457c478bd9Sstevel@tonic-gate s6->sin6_family = AF_INET6; 29460a701b1eSRobert Gordon bcopy((void *)&ipinfo.src_addr.un.ip6addr, 29470a701b1eSRobert Gordon &s6->sin6_addr, 29487c478bd9Sstevel@tonic-gate sizeof (struct in6_addr)); 29497c478bd9Sstevel@tonic-gate 29500a701b1eSRobert Gordon break; 29510a701b1eSRobert Gordon 29520a701b1eSRobert Gordon default: 29530a701b1eSRobert Gordon return (IBT_CM_REJECT); 29547c478bd9Sstevel@tonic-gate } 29550a701b1eSRobert Gordon 29567c478bd9Sstevel@tonic-gate break; 29577c478bd9Sstevel@tonic-gate 29587c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_CLOSED: 29597c478bd9Sstevel@tonic-gate { 29607c478bd9Sstevel@tonic-gate CONN *conn; 29617c478bd9Sstevel@tonic-gate rib_qp_t *qp; 29627c478bd9Sstevel@tonic-gate 29637c478bd9Sstevel@tonic-gate switch (event->cm_event.closed) { 29647c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREP_RCVD: 29657c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREQ_TIMEOUT: 29667c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DUP: 29677c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ABORT: 29687c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ALREADY: 29697c478bd9Sstevel@tonic-gate /* 29707c478bd9Sstevel@tonic-gate * These cases indicate the local end initiated 29717c478bd9Sstevel@tonic-gate * the closing of the channel. Nothing to do here. 29727c478bd9Sstevel@tonic-gate */ 29737c478bd9Sstevel@tonic-gate break; 29747c478bd9Sstevel@tonic-gate default: 29757c478bd9Sstevel@tonic-gate /* 29767c478bd9Sstevel@tonic-gate * Reason for CONN_CLOSED event must be one of 29777c478bd9Sstevel@tonic-gate * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 29787c478bd9Sstevel@tonic-gate * or IBT_CM_CLOSED_STALE. These indicate cases were 29797c478bd9Sstevel@tonic-gate * the remote end is closing the channel. In these 29807c478bd9Sstevel@tonic-gate * cases free the channel and transition to error 29817c478bd9Sstevel@tonic-gate * state 29827c478bd9Sstevel@tonic-gate */ 29837c478bd9Sstevel@tonic-gate qp = ibt_get_chan_private(event->cm_channel); 29847c478bd9Sstevel@tonic-gate conn = qptoc(qp); 29857c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 29867c478bd9Sstevel@tonic-gate if (conn->c_state == C_DISCONN_PEND) { 29877c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 29887c478bd9Sstevel@tonic-gate break; 29897c478bd9Sstevel@tonic-gate } 29900a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 29917c478bd9Sstevel@tonic-gate 29927c478bd9Sstevel@tonic-gate /* 29937c478bd9Sstevel@tonic-gate * Free the conn if c_ref goes down to 0 29947c478bd9Sstevel@tonic-gate */ 29957c478bd9Sstevel@tonic-gate if (conn->c_ref == 0) { 29967c478bd9Sstevel@tonic-gate /* 29977c478bd9Sstevel@tonic-gate * Remove from list and free conn 29987c478bd9Sstevel@tonic-gate */ 29997c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 30007c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 30017c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 30027c478bd9Sstevel@tonic-gate &hca->srv_conn_list); 30037c478bd9Sstevel@tonic-gate } else { 3004*065714dcSSiddheshwar Mahesh /* 3005*065714dcSSiddheshwar Mahesh * conn will be freed when c_ref goes to 0. 3006*065714dcSSiddheshwar Mahesh * Indicate to cleaning thread not to close 3007*065714dcSSiddheshwar Mahesh * the connection, but just free the channel. 3008*065714dcSSiddheshwar Mahesh */ 3009*065714dcSSiddheshwar Mahesh conn->c_flags |= C_CLOSE_NOTNEEDED; 30107c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 30117c478bd9Sstevel@tonic-gate } 30120a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__srvcm_chandisconnect); 30137c478bd9Sstevel@tonic-gate break; 30147c478bd9Sstevel@tonic-gate } 30157c478bd9Sstevel@tonic-gate break; 30167c478bd9Sstevel@tonic-gate } 30177c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_EST: 30187c478bd9Sstevel@tonic-gate /* 30197c478bd9Sstevel@tonic-gate * RTU received, hence connection established. 30207c478bd9Sstevel@tonic-gate */ 30217c478bd9Sstevel@tonic-gate if (rib_debug > 1) 30227c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30237c478bd9Sstevel@tonic-gate "(CONN_EST) channel established"); 30247c478bd9Sstevel@tonic-gate break; 30257c478bd9Sstevel@tonic-gate 30267c478bd9Sstevel@tonic-gate default: 30277c478bd9Sstevel@tonic-gate if (rib_debug > 2) { 30287c478bd9Sstevel@tonic-gate /* Let CM handle the following events. */ 30297c478bd9Sstevel@tonic-gate if (event->cm_type == IBT_CM_EVENT_REP_RCV) { 30307c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30317c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_REP_RCV\n"); 30327c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) { 30337c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30347c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_LAP_RCV\n"); 30357c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) { 30367c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30377c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_MRA_RCV\n"); 30387c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_APR_RCV) { 30397c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30407c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_APR_RCV\n"); 30417c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_FAILURE) { 30427c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30437c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_FAILURE\n"); 30447c478bd9Sstevel@tonic-gate } 30457c478bd9Sstevel@tonic-gate } 30460a701b1eSRobert Gordon return (IBT_CM_DEFAULT); 30477c478bd9Sstevel@tonic-gate } 30487c478bd9Sstevel@tonic-gate 30497c478bd9Sstevel@tonic-gate /* accept all other CM messages (i.e. let the CM handle them) */ 30507c478bd9Sstevel@tonic-gate return (IBT_CM_ACCEPT); 30517c478bd9Sstevel@tonic-gate } 30527c478bd9Sstevel@tonic-gate 30537c478bd9Sstevel@tonic-gate static rdma_stat 30547c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type) 30557c478bd9Sstevel@tonic-gate { 30567c478bd9Sstevel@tonic-gate ibt_srv_desc_t sdesc; 30577c478bd9Sstevel@tonic-gate ibt_hca_portinfo_t *port_infop; 30587c478bd9Sstevel@tonic-gate ib_svc_id_t srv_id; 30597c478bd9Sstevel@tonic-gate ibt_srv_hdl_t srv_hdl; 30607c478bd9Sstevel@tonic-gate uint_t port_size; 30610a701b1eSRobert Gordon uint_t pki, i, num_ports, nbinds; 30627c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 30630a701b1eSRobert Gordon rib_service_t *new_service; 30647c478bd9Sstevel@tonic-gate ib_pkey_t pkey; 30657c478bd9Sstevel@tonic-gate 30667c478bd9Sstevel@tonic-gate /* 30677c478bd9Sstevel@tonic-gate * Query all ports for the given HCA 30687c478bd9Sstevel@tonic-gate */ 30697c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 30707c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 30717c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop, 30727c478bd9Sstevel@tonic-gate &num_ports, &port_size); 30737c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 30747c478bd9Sstevel@tonic-gate } else { 30757c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 30767c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 30777c478bd9Sstevel@tonic-gate } 30787c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 30797c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 30807c478bd9Sstevel@tonic-gate } 30817c478bd9Sstevel@tonic-gate 30820a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice_numports, 30830a701b1eSRobert Gordon int, num_ports); 30847c478bd9Sstevel@tonic-gate 30857c478bd9Sstevel@tonic-gate for (i = 0; i < num_ports; i++) { 30867c478bd9Sstevel@tonic-gate if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) { 30870a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__portinactive, 30880a701b1eSRobert Gordon int, i+1); 30890a701b1eSRobert Gordon } else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) { 30900a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__portactive, 30910a701b1eSRobert Gordon int, i+1); 30927c478bd9Sstevel@tonic-gate } 30937c478bd9Sstevel@tonic-gate } 30940a701b1eSRobert Gordon 30957c478bd9Sstevel@tonic-gate /* 30967c478bd9Sstevel@tonic-gate * Get all the IP addresses on this system to register the 30977c478bd9Sstevel@tonic-gate * given "service type" on all DNS recognized IP addrs. 30987c478bd9Sstevel@tonic-gate * Each service type such as NFS will have all the systems 30997c478bd9Sstevel@tonic-gate * IP addresses as its different names. For now the only 31007c478bd9Sstevel@tonic-gate * type of service we support in RPCIB is NFS. 31017c478bd9Sstevel@tonic-gate */ 31027c478bd9Sstevel@tonic-gate rw_enter(&hca->service_list_lock, RW_WRITER); 31037c478bd9Sstevel@tonic-gate /* 31047c478bd9Sstevel@tonic-gate * Start registering and binding service to active 31057c478bd9Sstevel@tonic-gate * on active ports on this HCA. 31067c478bd9Sstevel@tonic-gate */ 31077c478bd9Sstevel@tonic-gate nbinds = 0; 31087c478bd9Sstevel@tonic-gate new_service = NULL; 31097c478bd9Sstevel@tonic-gate 31107c478bd9Sstevel@tonic-gate /* 31117c478bd9Sstevel@tonic-gate * We use IP addresses as the service names for 31127c478bd9Sstevel@tonic-gate * service registration. Register each of them 31137c478bd9Sstevel@tonic-gate * with CM to obtain a svc_id and svc_hdl. We do not 31147c478bd9Sstevel@tonic-gate * register the service with machine's loopback address. 31157c478bd9Sstevel@tonic-gate */ 31167c478bd9Sstevel@tonic-gate (void) bzero(&srv_id, sizeof (ib_svc_id_t)); 31177c478bd9Sstevel@tonic-gate (void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t)); 31187c478bd9Sstevel@tonic-gate (void) bzero(&sdesc, sizeof (ibt_srv_desc_t)); 31197c478bd9Sstevel@tonic-gate 31207c478bd9Sstevel@tonic-gate sdesc.sd_handler = rib_srv_cm_handler; 31217c478bd9Sstevel@tonic-gate sdesc.sd_flags = 0; 31227c478bd9Sstevel@tonic-gate ibt_status = ibt_register_service(hca->ibt_clnt_hdl, 3123f837ee4aSSiddheshwar Mahesh &sdesc, ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port), 31240a701b1eSRobert Gordon 1, &srv_hdl, &srv_id); 31250a701b1eSRobert Gordon 31267c478bd9Sstevel@tonic-gate for (i = 0; i < num_ports; i++) { 31277c478bd9Sstevel@tonic-gate if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) 31287c478bd9Sstevel@tonic-gate continue; 31297c478bd9Sstevel@tonic-gate 31307c478bd9Sstevel@tonic-gate for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) { 31317c478bd9Sstevel@tonic-gate pkey = port_infop[i].p_pkey_tbl[pki]; 31320a701b1eSRobert Gordon if ((pkey & IBSRM_HB) && 31330a701b1eSRobert Gordon (pkey != IB_PKEY_INVALID_FULL)) { 31347c478bd9Sstevel@tonic-gate 31357c478bd9Sstevel@tonic-gate /* 31367c478bd9Sstevel@tonic-gate * Allocate and prepare a service entry 31377c478bd9Sstevel@tonic-gate */ 31380a701b1eSRobert Gordon new_service = 31390a701b1eSRobert Gordon kmem_zalloc(1 * sizeof (rib_service_t), 31407c478bd9Sstevel@tonic-gate KM_SLEEP); 31417c478bd9Sstevel@tonic-gate 31420a701b1eSRobert Gordon new_service->srv_type = service_type; 31430a701b1eSRobert Gordon new_service->srv_hdl = srv_hdl; 31447c478bd9Sstevel@tonic-gate new_service->srv_next = NULL; 31457c478bd9Sstevel@tonic-gate 31467c478bd9Sstevel@tonic-gate ibt_status = ibt_bind_service(srv_hdl, 31470a701b1eSRobert Gordon port_infop[i].p_sgid_tbl[0], 31480a701b1eSRobert Gordon NULL, rib_stat, NULL); 31490a701b1eSRobert Gordon 31500a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__bindres, 31510a701b1eSRobert Gordon int, ibt_status); 31520a701b1eSRobert Gordon 31537c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 31547c478bd9Sstevel@tonic-gate kmem_free(new_service, 31557c478bd9Sstevel@tonic-gate sizeof (rib_service_t)); 31567c478bd9Sstevel@tonic-gate new_service = NULL; 31577c478bd9Sstevel@tonic-gate continue; 31587c478bd9Sstevel@tonic-gate } 31590a701b1eSRobert Gordon 31607c478bd9Sstevel@tonic-gate /* 31617c478bd9Sstevel@tonic-gate * Add to the service list for this HCA 31627c478bd9Sstevel@tonic-gate */ 31637c478bd9Sstevel@tonic-gate new_service->srv_next = hca->service_list; 31647c478bd9Sstevel@tonic-gate hca->service_list = new_service; 31657c478bd9Sstevel@tonic-gate new_service = NULL; 31667c478bd9Sstevel@tonic-gate nbinds++; 31677c478bd9Sstevel@tonic-gate } 31687c478bd9Sstevel@tonic-gate } 31697c478bd9Sstevel@tonic-gate } 31707c478bd9Sstevel@tonic-gate rw_exit(&hca->service_list_lock); 31717c478bd9Sstevel@tonic-gate 31727c478bd9Sstevel@tonic-gate ibt_free_portinfo(port_infop, port_size); 31737c478bd9Sstevel@tonic-gate 31747c478bd9Sstevel@tonic-gate if (nbinds == 0) { 31757c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 31767c478bd9Sstevel@tonic-gate } else { 31777c478bd9Sstevel@tonic-gate /* 31787c478bd9Sstevel@tonic-gate * Put this plugin into accept state, since atleast 31797c478bd9Sstevel@tonic-gate * one registration was successful. 31807c478bd9Sstevel@tonic-gate */ 31817c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 31827c478bd9Sstevel@tonic-gate plugin_state = ACCEPT; 31837c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 31847c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 31857c478bd9Sstevel@tonic-gate } 31867c478bd9Sstevel@tonic-gate } 31877c478bd9Sstevel@tonic-gate 31887c478bd9Sstevel@tonic-gate void 31897c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd) 31907c478bd9Sstevel@tonic-gate { 31917c478bd9Sstevel@tonic-gate rdma_stat status = RDMA_SUCCESS; 31927c478bd9Sstevel@tonic-gate 31937c478bd9Sstevel@tonic-gate rd->active = 0; 31947c478bd9Sstevel@tonic-gate rd->err_code = RDMA_FAILED; 31957c478bd9Sstevel@tonic-gate 31967c478bd9Sstevel@tonic-gate /* 31977c478bd9Sstevel@tonic-gate * First check if a hca is still attached 31987c478bd9Sstevel@tonic-gate */ 31997c478bd9Sstevel@tonic-gate rw_enter(&rib_stat->hca->state_lock, RW_READER); 32007c478bd9Sstevel@tonic-gate if (rib_stat->hca->state != HCA_INITED) { 32017c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 32027c478bd9Sstevel@tonic-gate return; 32037c478bd9Sstevel@tonic-gate } 32047c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 32057c478bd9Sstevel@tonic-gate 32067c478bd9Sstevel@tonic-gate rib_stat->q = &rd->q; 32077c478bd9Sstevel@tonic-gate /* 32087c478bd9Sstevel@tonic-gate * Right now the only service type is NFS. Hence force feed this 32097c478bd9Sstevel@tonic-gate * value. Ideally to communicate the service type it should be 32107c478bd9Sstevel@tonic-gate * passed down in rdma_svc_data. 32117c478bd9Sstevel@tonic-gate */ 32127c478bd9Sstevel@tonic-gate rib_stat->service_type = NFS; 32137c478bd9Sstevel@tonic-gate status = rib_register_service(rib_stat->hca, NFS); 32147c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 32157c478bd9Sstevel@tonic-gate rd->err_code = status; 32167c478bd9Sstevel@tonic-gate return; 32177c478bd9Sstevel@tonic-gate } 32187c478bd9Sstevel@tonic-gate /* 32197c478bd9Sstevel@tonic-gate * Service active on an HCA, check rd->err_code for more 32207c478bd9Sstevel@tonic-gate * explainable errors. 32217c478bd9Sstevel@tonic-gate */ 32227c478bd9Sstevel@tonic-gate rd->active = 1; 32237c478bd9Sstevel@tonic-gate rd->err_code = status; 32247c478bd9Sstevel@tonic-gate } 32257c478bd9Sstevel@tonic-gate 32267c478bd9Sstevel@tonic-gate /* XXXX */ 32277c478bd9Sstevel@tonic-gate /* ARGSUSED */ 32287c478bd9Sstevel@tonic-gate static void 32297c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata) 32307c478bd9Sstevel@tonic-gate { 32317c478bd9Sstevel@tonic-gate rib_hca_t *hca; 32327c478bd9Sstevel@tonic-gate 32337c478bd9Sstevel@tonic-gate /* 32347c478bd9Sstevel@tonic-gate * KRPC called the RDMATF to stop the listeners, this means 32357c478bd9Sstevel@tonic-gate * stop sending incomming or recieved requests to KRPC master 32367c478bd9Sstevel@tonic-gate * transport handle for RDMA-IB. This is also means that the 32377c478bd9Sstevel@tonic-gate * master transport handle, responsible for us, is going away. 32387c478bd9Sstevel@tonic-gate */ 32397c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 32407c478bd9Sstevel@tonic-gate plugin_state = NO_ACCEPT; 32417c478bd9Sstevel@tonic-gate if (svcdata != NULL) 32427c478bd9Sstevel@tonic-gate svcdata->active = 0; 32437c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 32447c478bd9Sstevel@tonic-gate 32457c478bd9Sstevel@tonic-gate /* 32467c478bd9Sstevel@tonic-gate * First check if a hca is still attached 32477c478bd9Sstevel@tonic-gate */ 32487c478bd9Sstevel@tonic-gate hca = rib_stat->hca; 32497c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 32507c478bd9Sstevel@tonic-gate if (hca->state != HCA_INITED) { 32517c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 32527c478bd9Sstevel@tonic-gate return; 32537c478bd9Sstevel@tonic-gate } 32540a701b1eSRobert Gordon rib_close_channels(&hca->srv_conn_list); 32557c478bd9Sstevel@tonic-gate rib_stop_services(hca); 32567c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 32577c478bd9Sstevel@tonic-gate } 32587c478bd9Sstevel@tonic-gate 32597c478bd9Sstevel@tonic-gate /* 32607c478bd9Sstevel@tonic-gate * Traverse the HCA's service list to unbind and deregister services. 32617c478bd9Sstevel@tonic-gate * Instead of unbinding the service for a service handle by 32627c478bd9Sstevel@tonic-gate * calling ibt_unbind_service() for each port/pkey, we unbind 32637c478bd9Sstevel@tonic-gate * all the services for the service handle by making only one 32647c478bd9Sstevel@tonic-gate * call to ibt_unbind_all_services(). Then, we deregister the 32657c478bd9Sstevel@tonic-gate * service for the service handle. 32667c478bd9Sstevel@tonic-gate * 32677c478bd9Sstevel@tonic-gate * When traversing the entries in service_list, we compare the 32687c478bd9Sstevel@tonic-gate * srv_hdl of the current entry with that of the next. If they 32697c478bd9Sstevel@tonic-gate * are different or if the next entry is NULL, the current entry 32707c478bd9Sstevel@tonic-gate * marks the last binding of the service handle. In this case, 32717c478bd9Sstevel@tonic-gate * call ibt_unbind_all_services() and deregister the service for 32727c478bd9Sstevel@tonic-gate * the service handle. If they are the same, the current and the 32737c478bd9Sstevel@tonic-gate * next entries are bound to the same service handle. In this 32747c478bd9Sstevel@tonic-gate * case, move on to the next entry. 32757c478bd9Sstevel@tonic-gate */ 32767c478bd9Sstevel@tonic-gate static void 32777c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca) 32787c478bd9Sstevel@tonic-gate { 32797c478bd9Sstevel@tonic-gate rib_service_t *srv_list, *to_remove; 32807c478bd9Sstevel@tonic-gate 32817c478bd9Sstevel@tonic-gate /* 32827c478bd9Sstevel@tonic-gate * unbind and deregister the services for this service type. 32837c478bd9Sstevel@tonic-gate * Right now there is only one service type. In future it will 32847c478bd9Sstevel@tonic-gate * be passed down to this function. 32857c478bd9Sstevel@tonic-gate */ 32867c478bd9Sstevel@tonic-gate rw_enter(&hca->service_list_lock, RW_WRITER); 32877c478bd9Sstevel@tonic-gate srv_list = hca->service_list; 32887c478bd9Sstevel@tonic-gate while (srv_list != NULL) { 32897c478bd9Sstevel@tonic-gate to_remove = srv_list; 32907c478bd9Sstevel@tonic-gate srv_list = to_remove->srv_next; 32917c478bd9Sstevel@tonic-gate if (srv_list == NULL || bcmp(to_remove->srv_hdl, 32927c478bd9Sstevel@tonic-gate srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) { 32937c478bd9Sstevel@tonic-gate 32940a701b1eSRobert Gordon (void) ibt_unbind_all_services(to_remove->srv_hdl); 32950a701b1eSRobert Gordon (void) ibt_deregister_service(hca->ibt_clnt_hdl, 32967c478bd9Sstevel@tonic-gate to_remove->srv_hdl); 32977c478bd9Sstevel@tonic-gate } 32987c478bd9Sstevel@tonic-gate 32997c478bd9Sstevel@tonic-gate kmem_free(to_remove, sizeof (rib_service_t)); 33007c478bd9Sstevel@tonic-gate } 33017c478bd9Sstevel@tonic-gate hca->service_list = NULL; 33027c478bd9Sstevel@tonic-gate rw_exit(&hca->service_list_lock); 33037c478bd9Sstevel@tonic-gate } 33047c478bd9Sstevel@tonic-gate 33057c478bd9Sstevel@tonic-gate static struct svc_recv * 33067c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl) 33077c478bd9Sstevel@tonic-gate { 33087c478bd9Sstevel@tonic-gate struct svc_recv *recvp; 33097c478bd9Sstevel@tonic-gate 33107c478bd9Sstevel@tonic-gate recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP); 33117c478bd9Sstevel@tonic-gate recvp->vaddr = sgl->ds_va; 33127c478bd9Sstevel@tonic-gate recvp->qp = qp; 33137c478bd9Sstevel@tonic-gate recvp->bytes_xfer = 0; 33147c478bd9Sstevel@tonic-gate return (recvp); 33157c478bd9Sstevel@tonic-gate } 33167c478bd9Sstevel@tonic-gate 33177c478bd9Sstevel@tonic-gate static int 33187c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp) 33197c478bd9Sstevel@tonic-gate { 33207c478bd9Sstevel@tonic-gate kmem_free(recvp, sizeof (*recvp)); 33217c478bd9Sstevel@tonic-gate 33227c478bd9Sstevel@tonic-gate return (0); 33237c478bd9Sstevel@tonic-gate } 33247c478bd9Sstevel@tonic-gate 33257c478bd9Sstevel@tonic-gate static struct reply * 33267c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid) 33277c478bd9Sstevel@tonic-gate { 33287c478bd9Sstevel@tonic-gate struct reply *rep; 33297c478bd9Sstevel@tonic-gate 33307c478bd9Sstevel@tonic-gate 33317c478bd9Sstevel@tonic-gate rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP); 33327c478bd9Sstevel@tonic-gate if (rep == NULL) { 33330a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__addrreply__nomem); 33347c478bd9Sstevel@tonic-gate return (NULL); 33357c478bd9Sstevel@tonic-gate } 33367c478bd9Sstevel@tonic-gate rep->xid = msgid; 33377c478bd9Sstevel@tonic-gate rep->vaddr_cq = NULL; 33387c478bd9Sstevel@tonic-gate rep->bytes_xfer = 0; 33397c478bd9Sstevel@tonic-gate rep->status = (uint_t)REPLY_WAIT; 33407c478bd9Sstevel@tonic-gate rep->prev = NULL; 33417c478bd9Sstevel@tonic-gate cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL); 33427c478bd9Sstevel@tonic-gate 33437c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 33447c478bd9Sstevel@tonic-gate if (qp->replylist) { 33457c478bd9Sstevel@tonic-gate rep->next = qp->replylist; 33467c478bd9Sstevel@tonic-gate qp->replylist->prev = rep; 33477c478bd9Sstevel@tonic-gate } 33487c478bd9Sstevel@tonic-gate qp->rep_list_size++; 33490a701b1eSRobert Gordon 33500a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__addrreply__listsize, 33510a701b1eSRobert Gordon int, qp->rep_list_size); 33520a701b1eSRobert Gordon 33537c478bd9Sstevel@tonic-gate qp->replylist = rep; 33547c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 33557c478bd9Sstevel@tonic-gate 33567c478bd9Sstevel@tonic-gate return (rep); 33577c478bd9Sstevel@tonic-gate } 33587c478bd9Sstevel@tonic-gate 33597c478bd9Sstevel@tonic-gate static rdma_stat 33607c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp) 33617c478bd9Sstevel@tonic-gate { 33627c478bd9Sstevel@tonic-gate struct reply *r, *n; 33637c478bd9Sstevel@tonic-gate 33647c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 33657c478bd9Sstevel@tonic-gate for (r = qp->replylist; r != NULL; r = n) { 33667c478bd9Sstevel@tonic-gate n = r->next; 33677c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, r); 33687c478bd9Sstevel@tonic-gate } 33697c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 33707c478bd9Sstevel@tonic-gate 33717c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 33727c478bd9Sstevel@tonic-gate } 33737c478bd9Sstevel@tonic-gate 33747c478bd9Sstevel@tonic-gate static int 33757c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep) 33767c478bd9Sstevel@tonic-gate { 33777c478bd9Sstevel@tonic-gate 33787c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->replylist_lock)); 33797c478bd9Sstevel@tonic-gate if (rep->prev) { 33807c478bd9Sstevel@tonic-gate rep->prev->next = rep->next; 33817c478bd9Sstevel@tonic-gate } 33827c478bd9Sstevel@tonic-gate if (rep->next) { 33837c478bd9Sstevel@tonic-gate rep->next->prev = rep->prev; 33847c478bd9Sstevel@tonic-gate } 33857c478bd9Sstevel@tonic-gate if (qp->replylist == rep) 33867c478bd9Sstevel@tonic-gate qp->replylist = rep->next; 33877c478bd9Sstevel@tonic-gate 33887c478bd9Sstevel@tonic-gate cv_destroy(&rep->wait_cv); 33897c478bd9Sstevel@tonic-gate qp->rep_list_size--; 33900a701b1eSRobert Gordon 33910a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__remreply__listsize, 33920a701b1eSRobert Gordon int, qp->rep_list_size); 33937c478bd9Sstevel@tonic-gate 33947c478bd9Sstevel@tonic-gate kmem_free(rep, sizeof (*rep)); 33957c478bd9Sstevel@tonic-gate 33967c478bd9Sstevel@tonic-gate return (0); 33977c478bd9Sstevel@tonic-gate } 33987c478bd9Sstevel@tonic-gate 33997c478bd9Sstevel@tonic-gate rdma_stat 34000a701b1eSRobert Gordon rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 34017c478bd9Sstevel@tonic-gate struct mrc *buf_handle) 34027c478bd9Sstevel@tonic-gate { 34037c478bd9Sstevel@tonic-gate ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 34047c478bd9Sstevel@tonic-gate ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 34057c478bd9Sstevel@tonic-gate rdma_stat status; 34067c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 34077c478bd9Sstevel@tonic-gate 34087c478bd9Sstevel@tonic-gate /* 34097c478bd9Sstevel@tonic-gate * Note: ALL buffer pools use the same memory type RDMARW. 34107c478bd9Sstevel@tonic-gate */ 34110a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 34127c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 341311606941Sjwahlig buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 34147c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 34157c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 34167c478bd9Sstevel@tonic-gate } else { 34177c478bd9Sstevel@tonic-gate buf_handle->mrc_linfo = NULL; 34187c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = 0; 34197c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = 0; 34207c478bd9Sstevel@tonic-gate } 34217c478bd9Sstevel@tonic-gate return (status); 34227c478bd9Sstevel@tonic-gate } 34237c478bd9Sstevel@tonic-gate 34247c478bd9Sstevel@tonic-gate static rdma_stat 34250a701b1eSRobert Gordon rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, 34260a701b1eSRobert Gordon ibt_mr_flags_t spec, 34277c478bd9Sstevel@tonic-gate ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp) 34287c478bd9Sstevel@tonic-gate { 34297c478bd9Sstevel@tonic-gate ibt_mr_attr_t mem_attr; 34307c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 343111606941Sjwahlig mem_attr.mr_vaddr = (uintptr_t)buf; 34327c478bd9Sstevel@tonic-gate mem_attr.mr_len = (ib_msglen_t)size; 34330a701b1eSRobert Gordon mem_attr.mr_as = (struct as *)(caddr_t)adsp; 34347c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 34357c478bd9Sstevel@tonic-gate IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE | 34367c478bd9Sstevel@tonic-gate IBT_MR_ENABLE_WINDOW_BIND | spec; 34377c478bd9Sstevel@tonic-gate 34387c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 34397c478bd9Sstevel@tonic-gate if (hca->state == HCA_INITED) { 34407c478bd9Sstevel@tonic-gate ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl, 34417c478bd9Sstevel@tonic-gate &mem_attr, mr_hdlp, mr_descp); 34427c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 34437c478bd9Sstevel@tonic-gate } else { 34447c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 34457c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 34467c478bd9Sstevel@tonic-gate } 34477c478bd9Sstevel@tonic-gate 34487c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 34497c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 34507c478bd9Sstevel@tonic-gate } 34517c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 34527c478bd9Sstevel@tonic-gate } 34537c478bd9Sstevel@tonic-gate 34547c478bd9Sstevel@tonic-gate rdma_stat 34550a701b1eSRobert Gordon rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 34560a701b1eSRobert Gordon struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc) 34577c478bd9Sstevel@tonic-gate { 34587c478bd9Sstevel@tonic-gate ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 34590a701b1eSRobert Gordon rib_lrc_entry_t *l; 34607c478bd9Sstevel@tonic-gate ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 34617c478bd9Sstevel@tonic-gate rdma_stat status; 34627c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 34637c478bd9Sstevel@tonic-gate 34647c478bd9Sstevel@tonic-gate /* 34657c478bd9Sstevel@tonic-gate * Non-coherent memory registration. 34667c478bd9Sstevel@tonic-gate */ 34670a701b1eSRobert Gordon l = (rib_lrc_entry_t *)lrc; 34680a701b1eSRobert Gordon if (l) { 34690a701b1eSRobert Gordon if (l->registered) { 34700a701b1eSRobert Gordon buf_handle->mrc_linfo = 34710a701b1eSRobert Gordon (uintptr_t)l->lrc_mhandle.mrc_linfo; 34720a701b1eSRobert Gordon buf_handle->mrc_lmr = 34730a701b1eSRobert Gordon (uint32_t)l->lrc_mhandle.mrc_lmr; 34740a701b1eSRobert Gordon buf_handle->mrc_rmr = 34750a701b1eSRobert Gordon (uint32_t)l->lrc_mhandle.mrc_rmr; 34760a701b1eSRobert Gordon *sync_handle = (RIB_SYNCMEM_HANDLE) 34770a701b1eSRobert Gordon (uintptr_t)l->lrc_mhandle.mrc_linfo; 34780a701b1eSRobert Gordon return (RDMA_SUCCESS); 34790a701b1eSRobert Gordon } else { 34800a701b1eSRobert Gordon /* Always register the whole buffer */ 34810a701b1eSRobert Gordon buf = (caddr_t)l->lrc_buf; 34820a701b1eSRobert Gordon buflen = l->lrc_len; 34830a701b1eSRobert Gordon } 34840a701b1eSRobert Gordon } 34850a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 34860a701b1eSRobert Gordon 34877c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 34880a701b1eSRobert Gordon if (l) { 34890a701b1eSRobert Gordon l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl; 34900a701b1eSRobert Gordon l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey; 34910a701b1eSRobert Gordon l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey; 34920a701b1eSRobert Gordon l->registered = TRUE; 34930a701b1eSRobert Gordon } 349411606941Sjwahlig buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 34957c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 34967c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 34977c478bd9Sstevel@tonic-gate *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl; 34987c478bd9Sstevel@tonic-gate } else { 34997c478bd9Sstevel@tonic-gate buf_handle->mrc_linfo = NULL; 35007c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = 0; 35017c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = 0; 35027c478bd9Sstevel@tonic-gate } 35037c478bd9Sstevel@tonic-gate return (status); 35047c478bd9Sstevel@tonic-gate } 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate /* ARGSUSED */ 35077c478bd9Sstevel@tonic-gate rdma_stat 35087c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle) 35097c478bd9Sstevel@tonic-gate { 35107c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 35117c478bd9Sstevel@tonic-gate /* 35127c478bd9Sstevel@tonic-gate * Allow memory deregistration even if HCA is 35137c478bd9Sstevel@tonic-gate * getting detached. Need all outstanding 35147c478bd9Sstevel@tonic-gate * memory registrations to be deregistered 35157c478bd9Sstevel@tonic-gate * before HCA_DETACH_EVENT can be accepted. 35167c478bd9Sstevel@tonic-gate */ 35177c478bd9Sstevel@tonic-gate (void) ibt_deregister_mr(hca->hca_hdl, 351811606941Sjwahlig (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 35197c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35207c478bd9Sstevel@tonic-gate } 35217c478bd9Sstevel@tonic-gate 35227c478bd9Sstevel@tonic-gate /* ARGSUSED */ 35237c478bd9Sstevel@tonic-gate rdma_stat 35247c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle, 35250a701b1eSRobert Gordon RIB_SYNCMEM_HANDLE sync_handle, void *lrc) 35267c478bd9Sstevel@tonic-gate { 35270a701b1eSRobert Gordon rib_lrc_entry_t *l; 35280a701b1eSRobert Gordon l = (rib_lrc_entry_t *)lrc; 35290a701b1eSRobert Gordon if (l) 35300a701b1eSRobert Gordon if (l->registered) 35310a701b1eSRobert Gordon return (RDMA_SUCCESS); 35320a701b1eSRobert Gordon 35337c478bd9Sstevel@tonic-gate (void) rib_deregistermem(conn, buf, buf_handle); 35347c478bd9Sstevel@tonic-gate 35357c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35367c478bd9Sstevel@tonic-gate } 35377c478bd9Sstevel@tonic-gate 35387c478bd9Sstevel@tonic-gate /* ARGSUSED */ 35397c478bd9Sstevel@tonic-gate rdma_stat 35407c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf, 35417c478bd9Sstevel@tonic-gate int len, int cpu) 35427c478bd9Sstevel@tonic-gate { 35437c478bd9Sstevel@tonic-gate ibt_status_t status; 35447c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 35457c478bd9Sstevel@tonic-gate ibt_mr_sync_t mr_segment; 35467c478bd9Sstevel@tonic-gate 35477c478bd9Sstevel@tonic-gate mr_segment.ms_handle = (ibt_mr_hdl_t)shandle; 354811606941Sjwahlig mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf; 35497c478bd9Sstevel@tonic-gate mr_segment.ms_len = (ib_memlen_t)len; 35507c478bd9Sstevel@tonic-gate if (cpu) { 35517c478bd9Sstevel@tonic-gate /* make incoming data visible to memory */ 35527c478bd9Sstevel@tonic-gate mr_segment.ms_flags = IBT_SYNC_WRITE; 35537c478bd9Sstevel@tonic-gate } else { 35547c478bd9Sstevel@tonic-gate /* make memory changes visible to IO */ 35557c478bd9Sstevel@tonic-gate mr_segment.ms_flags = IBT_SYNC_READ; 35567c478bd9Sstevel@tonic-gate } 35577c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 35587c478bd9Sstevel@tonic-gate if (hca->state == HCA_INITED) { 35597c478bd9Sstevel@tonic-gate status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1); 35607c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 35617c478bd9Sstevel@tonic-gate } else { 35627c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 35637c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 35647c478bd9Sstevel@tonic-gate } 35657c478bd9Sstevel@tonic-gate 35667c478bd9Sstevel@tonic-gate if (status == IBT_SUCCESS) 35677c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35687c478bd9Sstevel@tonic-gate else { 35697c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 35707c478bd9Sstevel@tonic-gate } 35717c478bd9Sstevel@tonic-gate } 35727c478bd9Sstevel@tonic-gate 35737c478bd9Sstevel@tonic-gate /* 35747c478bd9Sstevel@tonic-gate * XXXX ???? 35757c478bd9Sstevel@tonic-gate */ 35767c478bd9Sstevel@tonic-gate static rdma_stat 35777c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info) 35787c478bd9Sstevel@tonic-gate { 35797c478bd9Sstevel@tonic-gate /* 35807c478bd9Sstevel@tonic-gate * XXXX Hack! 35817c478bd9Sstevel@tonic-gate */ 35827c478bd9Sstevel@tonic-gate info->addrlen = 16; 35837c478bd9Sstevel@tonic-gate info->mts = 1000000; 35847c478bd9Sstevel@tonic-gate info->mtu = 1000000; 35857c478bd9Sstevel@tonic-gate 35867c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35877c478bd9Sstevel@tonic-gate } 35887c478bd9Sstevel@tonic-gate 35897c478bd9Sstevel@tonic-gate rib_bufpool_t * 35907c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num) 35917c478bd9Sstevel@tonic-gate { 35927c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 35937c478bd9Sstevel@tonic-gate bufpool_t *bp = NULL; 35947c478bd9Sstevel@tonic-gate caddr_t buf; 35957c478bd9Sstevel@tonic-gate ibt_mr_attr_t mem_attr; 35967c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 35977c478bd9Sstevel@tonic-gate int i, j; 35987c478bd9Sstevel@tonic-gate 35997c478bd9Sstevel@tonic-gate rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP); 36007c478bd9Sstevel@tonic-gate 36017c478bd9Sstevel@tonic-gate bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) + 36027c478bd9Sstevel@tonic-gate num * sizeof (void *), KM_SLEEP); 36037c478bd9Sstevel@tonic-gate 36047c478bd9Sstevel@tonic-gate mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock); 36057c478bd9Sstevel@tonic-gate bp->numelems = num; 36067c478bd9Sstevel@tonic-gate 36070a701b1eSRobert Gordon 36087c478bd9Sstevel@tonic-gate switch (ptype) { 36097c478bd9Sstevel@tonic-gate case SEND_BUFFER: 36107c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 36117c478bd9Sstevel@tonic-gate bp->rsize = RPC_MSG_SZ; 36127c478bd9Sstevel@tonic-gate break; 36137c478bd9Sstevel@tonic-gate case RECV_BUFFER: 36147c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 36157c478bd9Sstevel@tonic-gate bp->rsize = RPC_BUF_SIZE; 36167c478bd9Sstevel@tonic-gate break; 36177c478bd9Sstevel@tonic-gate default: 36187c478bd9Sstevel@tonic-gate goto fail; 36197c478bd9Sstevel@tonic-gate } 36207c478bd9Sstevel@tonic-gate 36217c478bd9Sstevel@tonic-gate /* 36227c478bd9Sstevel@tonic-gate * Register the pool. 36237c478bd9Sstevel@tonic-gate */ 36247c478bd9Sstevel@tonic-gate bp->bufsize = num * bp->rsize; 36257c478bd9Sstevel@tonic-gate bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP); 36267c478bd9Sstevel@tonic-gate rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num * 36277c478bd9Sstevel@tonic-gate sizeof (ibt_mr_hdl_t), KM_SLEEP); 36287c478bd9Sstevel@tonic-gate rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num * 36297c478bd9Sstevel@tonic-gate sizeof (ibt_mr_desc_t), KM_SLEEP); 36307c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 36310a701b1eSRobert Gordon 36327c478bd9Sstevel@tonic-gate if (hca->state != HCA_INITED) { 36337c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36347c478bd9Sstevel@tonic-gate goto fail; 36357c478bd9Sstevel@tonic-gate } 36360a701b1eSRobert Gordon 36377c478bd9Sstevel@tonic-gate for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) { 36387c478bd9Sstevel@tonic-gate bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t)); 363911606941Sjwahlig mem_attr.mr_vaddr = (uintptr_t)buf; 36407c478bd9Sstevel@tonic-gate mem_attr.mr_len = (ib_msglen_t)bp->rsize; 36417c478bd9Sstevel@tonic-gate mem_attr.mr_as = NULL; 36427c478bd9Sstevel@tonic-gate ibt_status = ibt_register_mr(hca->hca_hdl, 36430a701b1eSRobert Gordon hca->pd_hdl, &mem_attr, 36440a701b1eSRobert Gordon &rbp->mr_hdl[i], 36457c478bd9Sstevel@tonic-gate &rbp->mr_desc[i]); 36467c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 36477c478bd9Sstevel@tonic-gate for (j = 0; j < i; j++) { 36480a701b1eSRobert Gordon (void) ibt_deregister_mr(hca->hca_hdl, 36490a701b1eSRobert Gordon rbp->mr_hdl[j]); 36507c478bd9Sstevel@tonic-gate } 36517c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36527c478bd9Sstevel@tonic-gate goto fail; 36537c478bd9Sstevel@tonic-gate } 36547c478bd9Sstevel@tonic-gate } 36557c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36567c478bd9Sstevel@tonic-gate buf = (caddr_t)bp->buf; 36577c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++, buf += bp->rsize) { 36587c478bd9Sstevel@tonic-gate bp->buflist[i] = (void *)buf; 36597c478bd9Sstevel@tonic-gate } 36607c478bd9Sstevel@tonic-gate bp->buffree = num - 1; /* no. of free buffers */ 36617c478bd9Sstevel@tonic-gate rbp->bpool = bp; 36627c478bd9Sstevel@tonic-gate 36637c478bd9Sstevel@tonic-gate return (rbp); 36647c478bd9Sstevel@tonic-gate fail: 36657c478bd9Sstevel@tonic-gate if (bp) { 36667c478bd9Sstevel@tonic-gate if (bp->buf) 36677c478bd9Sstevel@tonic-gate kmem_free(bp->buf, bp->bufsize); 36687c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *)); 36697c478bd9Sstevel@tonic-gate } 36707c478bd9Sstevel@tonic-gate if (rbp) { 36717c478bd9Sstevel@tonic-gate if (rbp->mr_hdl) 36727c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t)); 36737c478bd9Sstevel@tonic-gate if (rbp->mr_desc) 36747c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t)); 36757c478bd9Sstevel@tonic-gate kmem_free(rbp, sizeof (rib_bufpool_t)); 36767c478bd9Sstevel@tonic-gate } 36777c478bd9Sstevel@tonic-gate return (NULL); 36787c478bd9Sstevel@tonic-gate } 36797c478bd9Sstevel@tonic-gate 36807c478bd9Sstevel@tonic-gate static void 36817c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype) 36827c478bd9Sstevel@tonic-gate { 36837c478bd9Sstevel@tonic-gate int i; 36847c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 36857c478bd9Sstevel@tonic-gate bufpool_t *bp; 36867c478bd9Sstevel@tonic-gate 36877c478bd9Sstevel@tonic-gate /* 36887c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 36897c478bd9Sstevel@tonic-gate */ 36907c478bd9Sstevel@tonic-gate switch (ptype) { 36917c478bd9Sstevel@tonic-gate case SEND_BUFFER: 36927c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 36937c478bd9Sstevel@tonic-gate break; 36947c478bd9Sstevel@tonic-gate case RECV_BUFFER: 36957c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 36967c478bd9Sstevel@tonic-gate break; 36977c478bd9Sstevel@tonic-gate default: 36987c478bd9Sstevel@tonic-gate return; 36997c478bd9Sstevel@tonic-gate } 37007c478bd9Sstevel@tonic-gate if (rbp == NULL) 37017c478bd9Sstevel@tonic-gate return; 37027c478bd9Sstevel@tonic-gate 37037c478bd9Sstevel@tonic-gate bp = rbp->bpool; 37047c478bd9Sstevel@tonic-gate 37057c478bd9Sstevel@tonic-gate /* 37067c478bd9Sstevel@tonic-gate * Deregister the pool memory and free it. 37077c478bd9Sstevel@tonic-gate */ 37087c478bd9Sstevel@tonic-gate for (i = 0; i < bp->numelems; i++) { 37097c478bd9Sstevel@tonic-gate (void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]); 37107c478bd9Sstevel@tonic-gate } 37117c478bd9Sstevel@tonic-gate } 37127c478bd9Sstevel@tonic-gate 37137c478bd9Sstevel@tonic-gate static void 37147c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype) 37157c478bd9Sstevel@tonic-gate { 37167c478bd9Sstevel@tonic-gate 37177c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 37187c478bd9Sstevel@tonic-gate bufpool_t *bp; 37197c478bd9Sstevel@tonic-gate 37207c478bd9Sstevel@tonic-gate /* 37217c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 37227c478bd9Sstevel@tonic-gate */ 37237c478bd9Sstevel@tonic-gate switch (ptype) { 37247c478bd9Sstevel@tonic-gate case SEND_BUFFER: 37257c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 37267c478bd9Sstevel@tonic-gate break; 37277c478bd9Sstevel@tonic-gate case RECV_BUFFER: 37287c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 37297c478bd9Sstevel@tonic-gate break; 37307c478bd9Sstevel@tonic-gate default: 37317c478bd9Sstevel@tonic-gate return; 37327c478bd9Sstevel@tonic-gate } 37337c478bd9Sstevel@tonic-gate if (rbp == NULL) 37347c478bd9Sstevel@tonic-gate return; 37357c478bd9Sstevel@tonic-gate 37367c478bd9Sstevel@tonic-gate bp = rbp->bpool; 37377c478bd9Sstevel@tonic-gate 37387c478bd9Sstevel@tonic-gate /* 37397c478bd9Sstevel@tonic-gate * Free the pool memory. 37407c478bd9Sstevel@tonic-gate */ 37417c478bd9Sstevel@tonic-gate if (rbp->mr_hdl) 37427c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t)); 37437c478bd9Sstevel@tonic-gate 37447c478bd9Sstevel@tonic-gate if (rbp->mr_desc) 37457c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t)); 37467c478bd9Sstevel@tonic-gate if (bp->buf) 37477c478bd9Sstevel@tonic-gate kmem_free(bp->buf, bp->bufsize); 37487c478bd9Sstevel@tonic-gate mutex_destroy(&bp->buflock); 37497c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *)); 37507c478bd9Sstevel@tonic-gate kmem_free(rbp, sizeof (rib_bufpool_t)); 37517c478bd9Sstevel@tonic-gate } 37527c478bd9Sstevel@tonic-gate 37537c478bd9Sstevel@tonic-gate void 37547c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype) 37557c478bd9Sstevel@tonic-gate { 37567c478bd9Sstevel@tonic-gate /* 37577c478bd9Sstevel@tonic-gate * Deregister the pool memory and free it. 37587c478bd9Sstevel@tonic-gate */ 37597c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(hca, ptype); 37607c478bd9Sstevel@tonic-gate rib_rbufpool_free(hca, ptype); 37617c478bd9Sstevel@tonic-gate } 37627c478bd9Sstevel@tonic-gate 37637c478bd9Sstevel@tonic-gate /* 37647c478bd9Sstevel@tonic-gate * Fetch a buffer from the pool of type specified in rdbuf->type. 37657c478bd9Sstevel@tonic-gate */ 37667c478bd9Sstevel@tonic-gate static rdma_stat 37677c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf) 37687c478bd9Sstevel@tonic-gate { 37690a701b1eSRobert Gordon rib_lrc_entry_t *rlep; 37700a701b1eSRobert Gordon 37710a701b1eSRobert Gordon if (rdbuf->type == RDMA_LONG_BUFFER) { 37720a701b1eSRobert Gordon rlep = rib_get_cache_buf(conn, rdbuf->len); 37730a701b1eSRobert Gordon rdbuf->rb_private = (caddr_t)rlep; 37740a701b1eSRobert Gordon rdbuf->addr = rlep->lrc_buf; 37750a701b1eSRobert Gordon rdbuf->handle = rlep->lrc_mhandle; 37760a701b1eSRobert Gordon return (RDMA_SUCCESS); 37770a701b1eSRobert Gordon } 37787c478bd9Sstevel@tonic-gate 37797c478bd9Sstevel@tonic-gate rdbuf->addr = rib_rbuf_alloc(conn, rdbuf); 37807c478bd9Sstevel@tonic-gate if (rdbuf->addr) { 37817c478bd9Sstevel@tonic-gate switch (rdbuf->type) { 37827c478bd9Sstevel@tonic-gate case SEND_BUFFER: 37837c478bd9Sstevel@tonic-gate rdbuf->len = RPC_MSG_SZ; /* 1K */ 37847c478bd9Sstevel@tonic-gate break; 37857c478bd9Sstevel@tonic-gate case RECV_BUFFER: 37867c478bd9Sstevel@tonic-gate rdbuf->len = RPC_BUF_SIZE; /* 2K */ 37877c478bd9Sstevel@tonic-gate break; 37887c478bd9Sstevel@tonic-gate default: 37897c478bd9Sstevel@tonic-gate rdbuf->len = 0; 37907c478bd9Sstevel@tonic-gate } 37917c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 37927c478bd9Sstevel@tonic-gate } else 37937c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 37947c478bd9Sstevel@tonic-gate } 37957c478bd9Sstevel@tonic-gate 37967c478bd9Sstevel@tonic-gate /* 37977c478bd9Sstevel@tonic-gate * Fetch a buffer of specified type. 37987c478bd9Sstevel@tonic-gate * Note that rdbuf->handle is mw's rkey. 37997c478bd9Sstevel@tonic-gate */ 38007c478bd9Sstevel@tonic-gate static void * 38017c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf) 38027c478bd9Sstevel@tonic-gate { 38037c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 38047c478bd9Sstevel@tonic-gate rib_hca_t *hca = qp->hca; 38057c478bd9Sstevel@tonic-gate rdma_btype ptype = rdbuf->type; 38067c478bd9Sstevel@tonic-gate void *buf; 38077c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 38087c478bd9Sstevel@tonic-gate bufpool_t *bp; 38097c478bd9Sstevel@tonic-gate int i; 38107c478bd9Sstevel@tonic-gate 38117c478bd9Sstevel@tonic-gate /* 38127c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 38137c478bd9Sstevel@tonic-gate */ 38147c478bd9Sstevel@tonic-gate switch (ptype) { 38157c478bd9Sstevel@tonic-gate case SEND_BUFFER: 38167c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 38177c478bd9Sstevel@tonic-gate break; 38187c478bd9Sstevel@tonic-gate case RECV_BUFFER: 38197c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 38207c478bd9Sstevel@tonic-gate break; 38217c478bd9Sstevel@tonic-gate default: 38227c478bd9Sstevel@tonic-gate return (NULL); 38237c478bd9Sstevel@tonic-gate } 38247c478bd9Sstevel@tonic-gate if (rbp == NULL) 38257c478bd9Sstevel@tonic-gate return (NULL); 38267c478bd9Sstevel@tonic-gate 38277c478bd9Sstevel@tonic-gate bp = rbp->bpool; 38287c478bd9Sstevel@tonic-gate 38297c478bd9Sstevel@tonic-gate mutex_enter(&bp->buflock); 38307c478bd9Sstevel@tonic-gate if (bp->buffree < 0) { 38317c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38327c478bd9Sstevel@tonic-gate return (NULL); 38337c478bd9Sstevel@tonic-gate } 38347c478bd9Sstevel@tonic-gate 38357c478bd9Sstevel@tonic-gate /* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */ 38367c478bd9Sstevel@tonic-gate buf = bp->buflist[bp->buffree]; 38377c478bd9Sstevel@tonic-gate rdbuf->addr = buf; 38387c478bd9Sstevel@tonic-gate rdbuf->len = bp->rsize; 38397c478bd9Sstevel@tonic-gate for (i = bp->numelems - 1; i >= 0; i--) { 384011606941Sjwahlig if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) { 38410a701b1eSRobert Gordon rdbuf->handle.mrc_rmr = 38420a701b1eSRobert Gordon (uint32_t)rbp->mr_desc[i].md_rkey; 38430a701b1eSRobert Gordon rdbuf->handle.mrc_linfo = 38440a701b1eSRobert Gordon (uintptr_t)rbp->mr_hdl[i]; 38450a701b1eSRobert Gordon rdbuf->handle.mrc_lmr = 38460a701b1eSRobert Gordon (uint32_t)rbp->mr_desc[i].md_lkey; 38477c478bd9Sstevel@tonic-gate bp->buffree--; 38487c478bd9Sstevel@tonic-gate 38497c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38507c478bd9Sstevel@tonic-gate 38517c478bd9Sstevel@tonic-gate return (buf); 38527c478bd9Sstevel@tonic-gate } 38537c478bd9Sstevel@tonic-gate } 38540a701b1eSRobert Gordon 38557c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38567c478bd9Sstevel@tonic-gate 38577c478bd9Sstevel@tonic-gate return (NULL); 38587c478bd9Sstevel@tonic-gate } 38597c478bd9Sstevel@tonic-gate 38607c478bd9Sstevel@tonic-gate static void 38617c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf) 38627c478bd9Sstevel@tonic-gate { 38637c478bd9Sstevel@tonic-gate 38640a701b1eSRobert Gordon if (rdbuf->type == RDMA_LONG_BUFFER) { 38650a701b1eSRobert Gordon rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private); 38660a701b1eSRobert Gordon rdbuf->rb_private = NULL; 38670a701b1eSRobert Gordon return; 38680a701b1eSRobert Gordon } 38697c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, rdbuf->type, rdbuf->addr); 38707c478bd9Sstevel@tonic-gate } 38717c478bd9Sstevel@tonic-gate 38727c478bd9Sstevel@tonic-gate static void 38737c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf) 38747c478bd9Sstevel@tonic-gate { 38757c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 38767c478bd9Sstevel@tonic-gate rib_hca_t *hca = qp->hca; 38777c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 38787c478bd9Sstevel@tonic-gate bufpool_t *bp; 38797c478bd9Sstevel@tonic-gate 38807c478bd9Sstevel@tonic-gate /* 38817c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 38827c478bd9Sstevel@tonic-gate */ 38837c478bd9Sstevel@tonic-gate switch (ptype) { 38847c478bd9Sstevel@tonic-gate case SEND_BUFFER: 38857c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 38867c478bd9Sstevel@tonic-gate break; 38877c478bd9Sstevel@tonic-gate case RECV_BUFFER: 38887c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 38897c478bd9Sstevel@tonic-gate break; 38907c478bd9Sstevel@tonic-gate default: 38917c478bd9Sstevel@tonic-gate return; 38927c478bd9Sstevel@tonic-gate } 38937c478bd9Sstevel@tonic-gate if (rbp == NULL) 38947c478bd9Sstevel@tonic-gate return; 38957c478bd9Sstevel@tonic-gate 38967c478bd9Sstevel@tonic-gate bp = rbp->bpool; 38977c478bd9Sstevel@tonic-gate 38987c478bd9Sstevel@tonic-gate mutex_enter(&bp->buflock); 38997c478bd9Sstevel@tonic-gate if (++bp->buffree >= bp->numelems) { 39007c478bd9Sstevel@tonic-gate /* 39017c478bd9Sstevel@tonic-gate * Should never happen 39027c478bd9Sstevel@tonic-gate */ 39037c478bd9Sstevel@tonic-gate bp->buffree--; 39047c478bd9Sstevel@tonic-gate } else { 39057c478bd9Sstevel@tonic-gate bp->buflist[bp->buffree] = buf; 39067c478bd9Sstevel@tonic-gate } 39077c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 39087c478bd9Sstevel@tonic-gate } 39097c478bd9Sstevel@tonic-gate 39107c478bd9Sstevel@tonic-gate static rdma_stat 39117c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist) 39127c478bd9Sstevel@tonic-gate { 39137c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_WRITER); 39147c478bd9Sstevel@tonic-gate if (connlist->conn_hd) { 39157c478bd9Sstevel@tonic-gate cn->c_next = connlist->conn_hd; 39167c478bd9Sstevel@tonic-gate connlist->conn_hd->c_prev = cn; 39177c478bd9Sstevel@tonic-gate } 39187c478bd9Sstevel@tonic-gate connlist->conn_hd = cn; 39197c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 39207c478bd9Sstevel@tonic-gate 39217c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 39227c478bd9Sstevel@tonic-gate } 39237c478bd9Sstevel@tonic-gate 39247c478bd9Sstevel@tonic-gate static rdma_stat 39257c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist) 39267c478bd9Sstevel@tonic-gate { 39277c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_WRITER); 39287c478bd9Sstevel@tonic-gate if (cn->c_prev) { 39297c478bd9Sstevel@tonic-gate cn->c_prev->c_next = cn->c_next; 39307c478bd9Sstevel@tonic-gate } 39317c478bd9Sstevel@tonic-gate if (cn->c_next) { 39327c478bd9Sstevel@tonic-gate cn->c_next->c_prev = cn->c_prev; 39337c478bd9Sstevel@tonic-gate } 39347c478bd9Sstevel@tonic-gate if (connlist->conn_hd == cn) 39357c478bd9Sstevel@tonic-gate connlist->conn_hd = cn->c_next; 39367c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 39377c478bd9Sstevel@tonic-gate 39387c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 39397c478bd9Sstevel@tonic-gate } 39407c478bd9Sstevel@tonic-gate 39417c478bd9Sstevel@tonic-gate /* 39427c478bd9Sstevel@tonic-gate * Connection management. 39437c478bd9Sstevel@tonic-gate * IBTF does not support recycling of channels. So connections are only 39440a701b1eSRobert Gordon * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or 39457c478bd9Sstevel@tonic-gate * C_DISCONN_PEND state. No C_IDLE state. 39467c478bd9Sstevel@tonic-gate * C_CONN_PEND state: Connection establishment in progress to the server. 39477c478bd9Sstevel@tonic-gate * C_CONNECTED state: A connection when created is in C_CONNECTED state. 39487c478bd9Sstevel@tonic-gate * It has an RC channel associated with it. ibt_post_send/recv are allowed 39497c478bd9Sstevel@tonic-gate * only in this state. 39500a701b1eSRobert Gordon * C_ERROR_CONN state: A connection transitions to this state when WRs on the 39517c478bd9Sstevel@tonic-gate * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event 39527c478bd9Sstevel@tonic-gate * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA. 39530a701b1eSRobert Gordon * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when 39547c478bd9Sstevel@tonic-gate * c_ref drops to 0 (this indicates that RPC has no more references to this 39557c478bd9Sstevel@tonic-gate * connection), the connection should be destroyed. A connection transitions 39567c478bd9Sstevel@tonic-gate * into this state when it is being destroyed. 39577c478bd9Sstevel@tonic-gate */ 395851f34d4bSRajkumar Sivaprakasam /* ARGSUSED */ 39597c478bd9Sstevel@tonic-gate static rdma_stat 39607c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn) 39617c478bd9Sstevel@tonic-gate { 39627c478bd9Sstevel@tonic-gate CONN *cn; 39637c478bd9Sstevel@tonic-gate int status = RDMA_SUCCESS; 396451f34d4bSRajkumar Sivaprakasam rib_hca_t *hca = rib_stat->hca; 39657c478bd9Sstevel@tonic-gate rib_qp_t *qp; 39667c478bd9Sstevel@tonic-gate clock_t cv_stat, timout; 3967214ae7d0SSiddheshwar Mahesh rpcib_ping_t rpt; 39687c478bd9Sstevel@tonic-gate 396951f34d4bSRajkumar Sivaprakasam if (hca == NULL) 397051f34d4bSRajkumar Sivaprakasam return (RDMA_FAILED); 397151f34d4bSRajkumar Sivaprakasam 397251f34d4bSRajkumar Sivaprakasam rw_enter(&rib_stat->hca->state_lock, RW_READER); 397351f34d4bSRajkumar Sivaprakasam if (hca->state == HCA_DETACHED) { 397451f34d4bSRajkumar Sivaprakasam rw_exit(&rib_stat->hca->state_lock); 397551f34d4bSRajkumar Sivaprakasam return (RDMA_FAILED); 397651f34d4bSRajkumar Sivaprakasam } 397751f34d4bSRajkumar Sivaprakasam rw_exit(&rib_stat->hca->state_lock); 397851f34d4bSRajkumar Sivaprakasam 39797c478bd9Sstevel@tonic-gate again: 39807c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 39817c478bd9Sstevel@tonic-gate cn = hca->cl_conn_list.conn_hd; 39827c478bd9Sstevel@tonic-gate while (cn != NULL) { 39837c478bd9Sstevel@tonic-gate /* 39847c478bd9Sstevel@tonic-gate * First, clear up any connection in the ERROR state 39857c478bd9Sstevel@tonic-gate */ 39867c478bd9Sstevel@tonic-gate mutex_enter(&cn->c_lock); 39870a701b1eSRobert Gordon if (cn->c_state == C_ERROR_CONN) { 39887c478bd9Sstevel@tonic-gate if (cn->c_ref == 0) { 39897c478bd9Sstevel@tonic-gate /* 39907c478bd9Sstevel@tonic-gate * Remove connection from list and destroy it. 39917c478bd9Sstevel@tonic-gate */ 39927c478bd9Sstevel@tonic-gate cn->c_state = C_DISCONN_PEND; 39937c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39947c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 3995*065714dcSSiddheshwar Mahesh rib_conn_close((void *)cn); 39967c478bd9Sstevel@tonic-gate goto again; 39977c478bd9Sstevel@tonic-gate } 39987c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39997c478bd9Sstevel@tonic-gate cn = cn->c_next; 40007c478bd9Sstevel@tonic-gate continue; 40010a701b1eSRobert Gordon } 40020a701b1eSRobert Gordon if (cn->c_state == C_DISCONN_PEND) { 40037c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40047c478bd9Sstevel@tonic-gate cn = cn->c_next; 40057c478bd9Sstevel@tonic-gate continue; 40067c478bd9Sstevel@tonic-gate } 40077c478bd9Sstevel@tonic-gate if ((cn->c_raddr.len == svcaddr->len) && 40087c478bd9Sstevel@tonic-gate bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) { 40097c478bd9Sstevel@tonic-gate /* 40107c478bd9Sstevel@tonic-gate * Our connection. Give up conn list lock 40117c478bd9Sstevel@tonic-gate * as we are done traversing the list. 40127c478bd9Sstevel@tonic-gate */ 40137c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 40147c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONNECTED) { 40157c478bd9Sstevel@tonic-gate cn->c_ref++; /* sharing a conn */ 40167c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40177c478bd9Sstevel@tonic-gate *conn = cn; 40187c478bd9Sstevel@tonic-gate return (status); 40197c478bd9Sstevel@tonic-gate } 40207c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONN_PEND) { 40217c478bd9Sstevel@tonic-gate /* 40227c478bd9Sstevel@tonic-gate * Hold a reference to this conn before 40237c478bd9Sstevel@tonic-gate * we give up the lock. 40247c478bd9Sstevel@tonic-gate */ 40257c478bd9Sstevel@tonic-gate cn->c_ref++; 40267c478bd9Sstevel@tonic-gate timout = ddi_get_lbolt() + 40277c478bd9Sstevel@tonic-gate drv_usectohz(CONN_WAIT_TIME * 1000000); 40287c478bd9Sstevel@tonic-gate while ((cv_stat = cv_timedwait_sig(&cn->c_cv, 40297c478bd9Sstevel@tonic-gate &cn->c_lock, timout)) > 0 && 40307c478bd9Sstevel@tonic-gate cn->c_state == C_CONN_PEND) 40317c478bd9Sstevel@tonic-gate ; 40327c478bd9Sstevel@tonic-gate if (cv_stat == 0) { 40337c478bd9Sstevel@tonic-gate cn->c_ref--; 40347c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40357c478bd9Sstevel@tonic-gate return (RDMA_INTR); 40367c478bd9Sstevel@tonic-gate } 40377c478bd9Sstevel@tonic-gate if (cv_stat < 0) { 40387c478bd9Sstevel@tonic-gate cn->c_ref--; 40397c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40407c478bd9Sstevel@tonic-gate return (RDMA_TIMEDOUT); 40417c478bd9Sstevel@tonic-gate } 40427c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONNECTED) { 40437c478bd9Sstevel@tonic-gate *conn = cn; 40447c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40457c478bd9Sstevel@tonic-gate return (status); 40467c478bd9Sstevel@tonic-gate } else { 40477c478bd9Sstevel@tonic-gate cn->c_ref--; 40487c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40497c478bd9Sstevel@tonic-gate return (RDMA_TIMEDOUT); 40507c478bd9Sstevel@tonic-gate } 40517c478bd9Sstevel@tonic-gate } 40527c478bd9Sstevel@tonic-gate } 40537c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40547c478bd9Sstevel@tonic-gate cn = cn->c_next; 40557c478bd9Sstevel@tonic-gate } 40567c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 40577c478bd9Sstevel@tonic-gate 4058214ae7d0SSiddheshwar Mahesh bzero(&rpt, sizeof (rpcib_ping_t)); 40590a701b1eSRobert Gordon 4060214ae7d0SSiddheshwar Mahesh status = rib_ping_srv(addr_type, svcaddr, &rpt); 40617c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 40627c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 40637c478bd9Sstevel@tonic-gate } 40647c478bd9Sstevel@tonic-gate 40657c478bd9Sstevel@tonic-gate /* 40667c478bd9Sstevel@tonic-gate * Channel to server doesn't exist yet, create one. 40677c478bd9Sstevel@tonic-gate */ 40687c478bd9Sstevel@tonic-gate if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) { 40697c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 40707c478bd9Sstevel@tonic-gate } 40717c478bd9Sstevel@tonic-gate cn = qptoc(qp); 40727c478bd9Sstevel@tonic-gate cn->c_state = C_CONN_PEND; 40737c478bd9Sstevel@tonic-gate cn->c_ref = 1; 40747c478bd9Sstevel@tonic-gate 40757c478bd9Sstevel@tonic-gate /* 40767c478bd9Sstevel@tonic-gate * Add to conn list. 40777c478bd9Sstevel@tonic-gate * We had given up the READER lock. In the time since then, 40787c478bd9Sstevel@tonic-gate * another thread might have created the connection we are 40797c478bd9Sstevel@tonic-gate * trying here. But for now, that is quiet alright - there 40807c478bd9Sstevel@tonic-gate * might be two connections between a pair of hosts instead 40817c478bd9Sstevel@tonic-gate * of one. If we really want to close that window, 40827c478bd9Sstevel@tonic-gate * then need to check the list after acquiring the 40837c478bd9Sstevel@tonic-gate * WRITER lock. 40847c478bd9Sstevel@tonic-gate */ 40857c478bd9Sstevel@tonic-gate (void) rib_add_connlist(cn, &hca->cl_conn_list); 4086214ae7d0SSiddheshwar Mahesh status = rib_conn_to_srv(hca, qp, &rpt); 40877c478bd9Sstevel@tonic-gate mutex_enter(&cn->c_lock); 40887c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 40897c478bd9Sstevel@tonic-gate cn->c_state = C_CONNECTED; 40907c478bd9Sstevel@tonic-gate *conn = cn; 40917c478bd9Sstevel@tonic-gate } else { 40920a701b1eSRobert Gordon cn->c_state = C_ERROR_CONN; 40937c478bd9Sstevel@tonic-gate cn->c_ref--; 40947c478bd9Sstevel@tonic-gate } 40957c478bd9Sstevel@tonic-gate cv_broadcast(&cn->c_cv); 40967c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40977c478bd9Sstevel@tonic-gate return (status); 40987c478bd9Sstevel@tonic-gate } 40997c478bd9Sstevel@tonic-gate 4100*065714dcSSiddheshwar Mahesh static void 4101*065714dcSSiddheshwar Mahesh rib_conn_close(void *rarg) 41027c478bd9Sstevel@tonic-gate { 4103*065714dcSSiddheshwar Mahesh CONN *conn = (CONN *)rarg; 41047c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 41057c478bd9Sstevel@tonic-gate 41067c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 4107*065714dcSSiddheshwar Mahesh if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) { 41087c478bd9Sstevel@tonic-gate 4109*065714dcSSiddheshwar Mahesh conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING); 41107c478bd9Sstevel@tonic-gate /* 4111*065714dcSSiddheshwar Mahesh * Live connection in CONNECTED state. 41127c478bd9Sstevel@tonic-gate */ 4113*065714dcSSiddheshwar Mahesh if (conn->c_state == C_CONNECTED) { 4114*065714dcSSiddheshwar Mahesh conn->c_state = C_ERROR_CONN; 4115*065714dcSSiddheshwar Mahesh } 41167c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 4117*065714dcSSiddheshwar Mahesh 4118*065714dcSSiddheshwar Mahesh rib_close_a_channel(conn); 4119*065714dcSSiddheshwar Mahesh 4120*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 4121*065714dcSSiddheshwar Mahesh conn->c_flags &= ~C_CLOSE_PENDING; 4122*065714dcSSiddheshwar Mahesh cv_signal(&conn->c_cv); 4123*065714dcSSiddheshwar Mahesh } 4124*065714dcSSiddheshwar Mahesh 4125*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4126*065714dcSSiddheshwar Mahesh 41277c478bd9Sstevel@tonic-gate if (qp->mode == RIB_SERVER) 41287c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 41297c478bd9Sstevel@tonic-gate &qp->hca->srv_conn_list); 41307c478bd9Sstevel@tonic-gate else 41317c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 41327c478bd9Sstevel@tonic-gate &qp->hca->cl_conn_list); 4133*065714dcSSiddheshwar Mahesh } 4134*065714dcSSiddheshwar Mahesh 4135*065714dcSSiddheshwar Mahesh static void 4136*065714dcSSiddheshwar Mahesh rib_conn_timeout_call(void *carg) 4137*065714dcSSiddheshwar Mahesh { 4138*065714dcSSiddheshwar Mahesh time_t idle_time; 4139*065714dcSSiddheshwar Mahesh CONN *conn = (CONN *)carg; 4140*065714dcSSiddheshwar Mahesh rib_hca_t *hca = ctoqp(conn)->hca; 4141*065714dcSSiddheshwar Mahesh int error; 4142*065714dcSSiddheshwar Mahesh 4143*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 4144*065714dcSSiddheshwar Mahesh if ((conn->c_ref > 0) || 4145*065714dcSSiddheshwar Mahesh (conn->c_state == C_DISCONN_PEND)) { 4146*065714dcSSiddheshwar Mahesh conn->c_timeout = NULL; 4147*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4148*065714dcSSiddheshwar Mahesh return; 4149*065714dcSSiddheshwar Mahesh } 4150*065714dcSSiddheshwar Mahesh 4151*065714dcSSiddheshwar Mahesh idle_time = (gethrestime_sec() - conn->c_last_used); 4152*065714dcSSiddheshwar Mahesh 4153*065714dcSSiddheshwar Mahesh if ((idle_time <= rib_conn_timeout) && 4154*065714dcSSiddheshwar Mahesh (conn->c_state != C_ERROR_CONN)) { 4155*065714dcSSiddheshwar Mahesh /* 4156*065714dcSSiddheshwar Mahesh * There was activity after the last timeout. 4157*065714dcSSiddheshwar Mahesh * Extend the conn life. Unless the conn is 4158*065714dcSSiddheshwar Mahesh * already in error state. 4159*065714dcSSiddheshwar Mahesh */ 4160*065714dcSSiddheshwar Mahesh conn->c_timeout = timeout(rib_conn_timeout_call, conn, 4161*065714dcSSiddheshwar Mahesh SEC_TO_TICK(rib_conn_timeout - idle_time)); 4162*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4163*065714dcSSiddheshwar Mahesh return; 4164*065714dcSSiddheshwar Mahesh } 4165*065714dcSSiddheshwar Mahesh 4166*065714dcSSiddheshwar Mahesh error = ddi_taskq_dispatch(hca->cleanup_helper, rib_conn_close, 4167*065714dcSSiddheshwar Mahesh (void *)conn, DDI_NOSLEEP); 4168*065714dcSSiddheshwar Mahesh 4169*065714dcSSiddheshwar Mahesh /* 4170*065714dcSSiddheshwar Mahesh * If taskq dispatch fails above, then reset the timeout 4171*065714dcSSiddheshwar Mahesh * to try again after 10 secs. 4172*065714dcSSiddheshwar Mahesh */ 4173*065714dcSSiddheshwar Mahesh 4174*065714dcSSiddheshwar Mahesh if (error != DDI_SUCCESS) { 4175*065714dcSSiddheshwar Mahesh conn->c_timeout = timeout(rib_conn_timeout_call, conn, 4176*065714dcSSiddheshwar Mahesh SEC_TO_TICK(RDMA_CONN_REAP_RETRY)); 4177*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4178*065714dcSSiddheshwar Mahesh return; 4179*065714dcSSiddheshwar Mahesh } 4180*065714dcSSiddheshwar Mahesh 4181*065714dcSSiddheshwar Mahesh conn->c_state = C_DISCONN_PEND; 4182*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4183*065714dcSSiddheshwar Mahesh } 4184*065714dcSSiddheshwar Mahesh 4185*065714dcSSiddheshwar Mahesh static rdma_stat 4186*065714dcSSiddheshwar Mahesh rib_conn_release(CONN *conn) 4187*065714dcSSiddheshwar Mahesh { 4188*065714dcSSiddheshwar Mahesh 4189*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 4190*065714dcSSiddheshwar Mahesh conn->c_ref--; 4191*065714dcSSiddheshwar Mahesh 4192*065714dcSSiddheshwar Mahesh conn->c_last_used = gethrestime_sec(); 4193*065714dcSSiddheshwar Mahesh if (conn->c_ref > 0) { 4194*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 41957c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 41967c478bd9Sstevel@tonic-gate } 4197*065714dcSSiddheshwar Mahesh 4198*065714dcSSiddheshwar Mahesh /* 4199*065714dcSSiddheshwar Mahesh * If a conn is C_ERROR_CONN, close the channel. 4200*065714dcSSiddheshwar Mahesh */ 4201*065714dcSSiddheshwar Mahesh if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) { 4202*065714dcSSiddheshwar Mahesh conn->c_state = C_DISCONN_PEND; 4203*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4204*065714dcSSiddheshwar Mahesh rib_conn_close((void *)conn); 4205*065714dcSSiddheshwar Mahesh return (RDMA_SUCCESS); 4206*065714dcSSiddheshwar Mahesh } 4207*065714dcSSiddheshwar Mahesh 4208*065714dcSSiddheshwar Mahesh /* 4209*065714dcSSiddheshwar Mahesh * c_ref == 0, set a timeout for conn release 4210*065714dcSSiddheshwar Mahesh */ 4211*065714dcSSiddheshwar Mahesh 4212*065714dcSSiddheshwar Mahesh if (conn->c_timeout == NULL) { 4213*065714dcSSiddheshwar Mahesh conn->c_timeout = timeout(rib_conn_timeout_call, conn, 4214*065714dcSSiddheshwar Mahesh SEC_TO_TICK(rib_conn_timeout)); 4215*065714dcSSiddheshwar Mahesh } 4216*065714dcSSiddheshwar Mahesh 42177c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 42187c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 42197c478bd9Sstevel@tonic-gate } 42207c478bd9Sstevel@tonic-gate 42217c478bd9Sstevel@tonic-gate /* 42227c478bd9Sstevel@tonic-gate * Add at front of list 42237c478bd9Sstevel@tonic-gate */ 42247c478bd9Sstevel@tonic-gate static struct rdma_done_list * 42257c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid) 42267c478bd9Sstevel@tonic-gate { 42277c478bd9Sstevel@tonic-gate struct rdma_done_list *rd; 42287c478bd9Sstevel@tonic-gate 42297c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 42307c478bd9Sstevel@tonic-gate 42317c478bd9Sstevel@tonic-gate rd = kmem_alloc(sizeof (*rd), KM_SLEEP); 42327c478bd9Sstevel@tonic-gate rd->xid = xid; 42337c478bd9Sstevel@tonic-gate cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL); 42347c478bd9Sstevel@tonic-gate 42357c478bd9Sstevel@tonic-gate rd->prev = NULL; 42367c478bd9Sstevel@tonic-gate rd->next = qp->rdlist; 42377c478bd9Sstevel@tonic-gate if (qp->rdlist != NULL) 42387c478bd9Sstevel@tonic-gate qp->rdlist->prev = rd; 42397c478bd9Sstevel@tonic-gate qp->rdlist = rd; 42407c478bd9Sstevel@tonic-gate 42417c478bd9Sstevel@tonic-gate return (rd); 42427c478bd9Sstevel@tonic-gate } 42437c478bd9Sstevel@tonic-gate 42447c478bd9Sstevel@tonic-gate static void 42457c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd) 42467c478bd9Sstevel@tonic-gate { 42477c478bd9Sstevel@tonic-gate struct rdma_done_list *r; 42487c478bd9Sstevel@tonic-gate 42497c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 42507c478bd9Sstevel@tonic-gate 42517c478bd9Sstevel@tonic-gate r = rd->next; 42527c478bd9Sstevel@tonic-gate if (r != NULL) { 42537c478bd9Sstevel@tonic-gate r->prev = rd->prev; 42547c478bd9Sstevel@tonic-gate } 42557c478bd9Sstevel@tonic-gate 42567c478bd9Sstevel@tonic-gate r = rd->prev; 42577c478bd9Sstevel@tonic-gate if (r != NULL) { 42587c478bd9Sstevel@tonic-gate r->next = rd->next; 42597c478bd9Sstevel@tonic-gate } else { 42607c478bd9Sstevel@tonic-gate qp->rdlist = rd->next; 42617c478bd9Sstevel@tonic-gate } 42627c478bd9Sstevel@tonic-gate 42637c478bd9Sstevel@tonic-gate cv_destroy(&rd->rdma_done_cv); 42647c478bd9Sstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 42657c478bd9Sstevel@tonic-gate } 42667c478bd9Sstevel@tonic-gate 42677c478bd9Sstevel@tonic-gate static void 42687c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp) 42697c478bd9Sstevel@tonic-gate { 42707c478bd9Sstevel@tonic-gate struct rdma_done_list *r, *n; 42717c478bd9Sstevel@tonic-gate 42727c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 42737c478bd9Sstevel@tonic-gate for (r = qp->rdlist; r != NULL; r = n) { 42747c478bd9Sstevel@tonic-gate n = r->next; 42757c478bd9Sstevel@tonic-gate rdma_done_rm(qp, r); 42767c478bd9Sstevel@tonic-gate } 42777c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 42787c478bd9Sstevel@tonic-gate } 42797c478bd9Sstevel@tonic-gate 42807c478bd9Sstevel@tonic-gate static void 42817c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid) 42827c478bd9Sstevel@tonic-gate { 42837c478bd9Sstevel@tonic-gate struct rdma_done_list *r = qp->rdlist; 42847c478bd9Sstevel@tonic-gate 42857c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 42867c478bd9Sstevel@tonic-gate 42877c478bd9Sstevel@tonic-gate while (r) { 42887c478bd9Sstevel@tonic-gate if (r->xid == xid) { 42897c478bd9Sstevel@tonic-gate cv_signal(&r->rdma_done_cv); 42907c478bd9Sstevel@tonic-gate return; 42917c478bd9Sstevel@tonic-gate } else { 42927c478bd9Sstevel@tonic-gate r = r->next; 42937c478bd9Sstevel@tonic-gate } 42947c478bd9Sstevel@tonic-gate } 42950a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid, 42960a701b1eSRobert Gordon int, xid); 42977c478bd9Sstevel@tonic-gate } 42987c478bd9Sstevel@tonic-gate 4299*065714dcSSiddheshwar Mahesh /* 4300*065714dcSSiddheshwar Mahesh * Expects conn->c_lock to be held by the caller. 4301*065714dcSSiddheshwar Mahesh */ 4302*065714dcSSiddheshwar Mahesh 4303*065714dcSSiddheshwar Mahesh static void 4304*065714dcSSiddheshwar Mahesh rib_close_a_channel(CONN *conn) 4305*065714dcSSiddheshwar Mahesh { 4306*065714dcSSiddheshwar Mahesh rib_qp_t *qp; 4307*065714dcSSiddheshwar Mahesh qp = ctoqp(conn); 4308*065714dcSSiddheshwar Mahesh 4309*065714dcSSiddheshwar Mahesh if (qp->qp_hdl == NULL) { 4310*065714dcSSiddheshwar Mahesh /* channel already freed */ 4311*065714dcSSiddheshwar Mahesh return; 4312*065714dcSSiddheshwar Mahesh } 4313*065714dcSSiddheshwar Mahesh 4314*065714dcSSiddheshwar Mahesh /* 4315*065714dcSSiddheshwar Mahesh * Call ibt_close_rc_channel in blocking mode 4316*065714dcSSiddheshwar Mahesh * with no callbacks. 4317*065714dcSSiddheshwar Mahesh */ 4318*065714dcSSiddheshwar Mahesh (void) ibt_close_rc_channel(qp->qp_hdl, IBT_NOCALLBACKS, 4319*065714dcSSiddheshwar Mahesh NULL, 0, NULL, NULL, 0); 4320*065714dcSSiddheshwar Mahesh } 43217c478bd9Sstevel@tonic-gate 43227c478bd9Sstevel@tonic-gate /* 43237c478bd9Sstevel@tonic-gate * Goes through all connections and closes the channel 43247c478bd9Sstevel@tonic-gate * This will cause all the WRs on those channels to be 43257c478bd9Sstevel@tonic-gate * flushed. 43267c478bd9Sstevel@tonic-gate */ 43277c478bd9Sstevel@tonic-gate static void 43287c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist) 43297c478bd9Sstevel@tonic-gate { 4330*065714dcSSiddheshwar Mahesh CONN *conn, *tmp; 43317c478bd9Sstevel@tonic-gate 43327c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_READER); 43337c478bd9Sstevel@tonic-gate conn = connlist->conn_hd; 43347c478bd9Sstevel@tonic-gate while (conn != NULL) { 43357c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 4336*065714dcSSiddheshwar Mahesh tmp = conn->c_next; 4337*065714dcSSiddheshwar Mahesh if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) { 4338*065714dcSSiddheshwar Mahesh 4339*065714dcSSiddheshwar Mahesh conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING); 4340*065714dcSSiddheshwar Mahesh 43417c478bd9Sstevel@tonic-gate /* 43427c478bd9Sstevel@tonic-gate * Live connection in CONNECTED state. 43437c478bd9Sstevel@tonic-gate */ 4344*065714dcSSiddheshwar Mahesh if (conn->c_state == C_CONNECTED) 43450a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 4346*065714dcSSiddheshwar Mahesh mutex_exit(&conn->c_lock); 4347*065714dcSSiddheshwar Mahesh 4348*065714dcSSiddheshwar Mahesh rib_close_a_channel(conn); 4349*065714dcSSiddheshwar Mahesh 4350*065714dcSSiddheshwar Mahesh mutex_enter(&conn->c_lock); 4351*065714dcSSiddheshwar Mahesh conn->c_flags &= ~C_CLOSE_PENDING; 4352*065714dcSSiddheshwar Mahesh /* Signal a pending rib_disconnect_channel() */ 4353*065714dcSSiddheshwar Mahesh cv_signal(&conn->c_cv); 43547c478bd9Sstevel@tonic-gate } 43557c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 4356*065714dcSSiddheshwar Mahesh conn = tmp; 43577c478bd9Sstevel@tonic-gate } 43587c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 43597c478bd9Sstevel@tonic-gate } 43607c478bd9Sstevel@tonic-gate 43617c478bd9Sstevel@tonic-gate /* 43627c478bd9Sstevel@tonic-gate * Frees up all connections that are no longer being referenced 43637c478bd9Sstevel@tonic-gate */ 43647c478bd9Sstevel@tonic-gate static void 43657c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist) 43667c478bd9Sstevel@tonic-gate { 43677c478bd9Sstevel@tonic-gate CONN *conn; 43687c478bd9Sstevel@tonic-gate 43697c478bd9Sstevel@tonic-gate top: 43707c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_READER); 43717c478bd9Sstevel@tonic-gate conn = connlist->conn_hd; 43727c478bd9Sstevel@tonic-gate while (conn != NULL) { 43737c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 43747c478bd9Sstevel@tonic-gate 43757c478bd9Sstevel@tonic-gate /* 43767c478bd9Sstevel@tonic-gate * At this point connection is either in ERROR 43777c478bd9Sstevel@tonic-gate * or DISCONN_PEND state. If in DISCONN_PEND state 43787c478bd9Sstevel@tonic-gate * then some other thread is culling that connection. 43797c478bd9Sstevel@tonic-gate * If not and if c_ref is 0, then destroy the connection. 43807c478bd9Sstevel@tonic-gate */ 43817c478bd9Sstevel@tonic-gate if (conn->c_ref == 0 && 43827c478bd9Sstevel@tonic-gate conn->c_state != C_DISCONN_PEND) { 43837c478bd9Sstevel@tonic-gate /* 43847c478bd9Sstevel@tonic-gate * Cull the connection 43857c478bd9Sstevel@tonic-gate */ 43867c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 43877c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 43887c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 43897c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, connlist); 43907c478bd9Sstevel@tonic-gate goto top; 43917c478bd9Sstevel@tonic-gate } else { 43927c478bd9Sstevel@tonic-gate /* 43937c478bd9Sstevel@tonic-gate * conn disconnect already scheduled or will 43947c478bd9Sstevel@tonic-gate * happen from conn_release when c_ref drops to 0. 43957c478bd9Sstevel@tonic-gate */ 43967c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 43977c478bd9Sstevel@tonic-gate } 43987c478bd9Sstevel@tonic-gate conn = conn->c_next; 43997c478bd9Sstevel@tonic-gate } 44007c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 44017c478bd9Sstevel@tonic-gate 44027c478bd9Sstevel@tonic-gate /* 44037c478bd9Sstevel@tonic-gate * At this point, only connections with c_ref != 0 are on the list 44047c478bd9Sstevel@tonic-gate */ 44057c478bd9Sstevel@tonic-gate } 44067c478bd9Sstevel@tonic-gate 44077c478bd9Sstevel@tonic-gate /* 4408*065714dcSSiddheshwar Mahesh * Free all the HCA resources and close 4409*065714dcSSiddheshwar Mahesh * the hca. 4410*065714dcSSiddheshwar Mahesh */ 4411*065714dcSSiddheshwar Mahesh 4412*065714dcSSiddheshwar Mahesh static void 4413*065714dcSSiddheshwar Mahesh rib_free_hca(rib_hca_t *hca) 4414*065714dcSSiddheshwar Mahesh { 4415*065714dcSSiddheshwar Mahesh (void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl); 4416*065714dcSSiddheshwar Mahesh (void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl); 4417*065714dcSSiddheshwar Mahesh (void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl); 4418*065714dcSSiddheshwar Mahesh (void) ibt_free_cq(hca->svc_scq->rib_cq_hdl); 4419*065714dcSSiddheshwar Mahesh 4420*065714dcSSiddheshwar Mahesh kmem_free(hca->clnt_rcq, sizeof (rib_cq_t)); 4421*065714dcSSiddheshwar Mahesh kmem_free(hca->clnt_scq, sizeof (rib_cq_t)); 4422*065714dcSSiddheshwar Mahesh kmem_free(hca->svc_rcq, sizeof (rib_cq_t)); 4423*065714dcSSiddheshwar Mahesh kmem_free(hca->svc_scq, sizeof (rib_cq_t)); 4424*065714dcSSiddheshwar Mahesh 4425*065714dcSSiddheshwar Mahesh rib_rbufpool_destroy(hca, RECV_BUFFER); 4426*065714dcSSiddheshwar Mahesh rib_rbufpool_destroy(hca, SEND_BUFFER); 4427*065714dcSSiddheshwar Mahesh rib_destroy_cache(hca); 4428*065714dcSSiddheshwar Mahesh if (rib_mod.rdma_count == 0) 4429*065714dcSSiddheshwar Mahesh rdma_unregister_mod(&rib_mod); 4430*065714dcSSiddheshwar Mahesh (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 4431*065714dcSSiddheshwar Mahesh (void) ibt_close_hca(hca->hca_hdl); 4432*065714dcSSiddheshwar Mahesh hca->hca_hdl = NULL; 4433*065714dcSSiddheshwar Mahesh } 4434*065714dcSSiddheshwar Mahesh 4435*065714dcSSiddheshwar Mahesh /* 44367c478bd9Sstevel@tonic-gate * Cleans and closes up all uses of the HCA 44377c478bd9Sstevel@tonic-gate */ 44387c478bd9Sstevel@tonic-gate static void 44397c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca) 44407c478bd9Sstevel@tonic-gate { 44417c478bd9Sstevel@tonic-gate 44427c478bd9Sstevel@tonic-gate /* 44437c478bd9Sstevel@tonic-gate * Stop all services on the HCA 44447c478bd9Sstevel@tonic-gate * Go through cl_conn_list and close all rc_channels 44457c478bd9Sstevel@tonic-gate * Go through svr_conn_list and close all rc_channels 44467c478bd9Sstevel@tonic-gate * Free connections whose c_ref has dropped to 0 44477c478bd9Sstevel@tonic-gate * Destroy all CQs 44487c478bd9Sstevel@tonic-gate * Deregister and released all buffer pool memory after all 44497c478bd9Sstevel@tonic-gate * connections are destroyed 44507c478bd9Sstevel@tonic-gate * Free the protection domain 44517c478bd9Sstevel@tonic-gate * ibt_close_hca() 44527c478bd9Sstevel@tonic-gate */ 44537c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_WRITER); 44547c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 44557c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 44567c478bd9Sstevel@tonic-gate return; 44577c478bd9Sstevel@tonic-gate } 44587c478bd9Sstevel@tonic-gate 44597c478bd9Sstevel@tonic-gate hca->state = HCA_DETACHED; 44607c478bd9Sstevel@tonic-gate rib_stat->nhca_inited--; 44617c478bd9Sstevel@tonic-gate 44627c478bd9Sstevel@tonic-gate rib_stop_services(hca); 44637c478bd9Sstevel@tonic-gate rib_close_channels(&hca->cl_conn_list); 44647c478bd9Sstevel@tonic-gate rib_close_channels(&hca->srv_conn_list); 446551f34d4bSRajkumar Sivaprakasam 446651f34d4bSRajkumar Sivaprakasam rib_mod.rdma_count--; 446751f34d4bSRajkumar Sivaprakasam 44687c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 44697c478bd9Sstevel@tonic-gate 447051f34d4bSRajkumar Sivaprakasam rib_purge_connlist(&hca->cl_conn_list); 447151f34d4bSRajkumar Sivaprakasam rib_purge_connlist(&hca->srv_conn_list); 447251f34d4bSRajkumar Sivaprakasam 447351f34d4bSRajkumar Sivaprakasam if (stats_enabled) { 447451f34d4bSRajkumar Sivaprakasam kstat_delete_byname_zone("unix", 0, "rpcib_cache", 447551f34d4bSRajkumar Sivaprakasam GLOBAL_ZONEID); 447651f34d4bSRajkumar Sivaprakasam } 44777c478bd9Sstevel@tonic-gate 44787c478bd9Sstevel@tonic-gate rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 44797c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 44807c478bd9Sstevel@tonic-gate if (hca->srv_conn_list.conn_hd == NULL && 44817c478bd9Sstevel@tonic-gate hca->cl_conn_list.conn_hd == NULL) { 44827c478bd9Sstevel@tonic-gate /* 44837c478bd9Sstevel@tonic-gate * conn_lists are NULL, so destroy 44847c478bd9Sstevel@tonic-gate * buffers, close hca and be done. 44857c478bd9Sstevel@tonic-gate */ 4486*065714dcSSiddheshwar Mahesh rib_free_hca(hca); 44877c478bd9Sstevel@tonic-gate } 44887c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 44897c478bd9Sstevel@tonic-gate rw_exit(&hca->srv_conn_list.conn_lock); 44907c478bd9Sstevel@tonic-gate 44917c478bd9Sstevel@tonic-gate if (hca->hca_hdl != NULL) { 44927c478bd9Sstevel@tonic-gate mutex_enter(&hca->inuse_lock); 44937c478bd9Sstevel@tonic-gate while (hca->inuse) 44947c478bd9Sstevel@tonic-gate cv_wait(&hca->cb_cv, &hca->inuse_lock); 44957c478bd9Sstevel@tonic-gate mutex_exit(&hca->inuse_lock); 449651f34d4bSRajkumar Sivaprakasam 4497*065714dcSSiddheshwar Mahesh rib_free_hca(hca); 4498*065714dcSSiddheshwar Mahesh } 449951f34d4bSRajkumar Sivaprakasam 4500*065714dcSSiddheshwar Mahesh if (hca->cleanup_helper != NULL) { 4501*065714dcSSiddheshwar Mahesh ddi_taskq_destroy(hca->cleanup_helper); 4502*065714dcSSiddheshwar Mahesh hca->cleanup_helper = NULL; 45037c478bd9Sstevel@tonic-gate } 45047c478bd9Sstevel@tonic-gate } 45050a701b1eSRobert Gordon 45060a701b1eSRobert Gordon static void 45070a701b1eSRobert Gordon rib_server_side_cache_reclaim(void *argp) 45080a701b1eSRobert Gordon { 45090a701b1eSRobert Gordon cache_avl_struct_t *rcas; 45100a701b1eSRobert Gordon rib_lrc_entry_t *rb; 45110a701b1eSRobert Gordon rib_hca_t *hca = (rib_hca_t *)argp; 45120a701b1eSRobert Gordon 45130a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 45140a701b1eSRobert Gordon rcas = avl_first(&hca->avl_tree); 45150a701b1eSRobert Gordon if (rcas != NULL) 45160a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 45170a701b1eSRobert Gordon 45180a701b1eSRobert Gordon while (rcas != NULL) { 45190a701b1eSRobert Gordon while (rcas->r.forw != &rcas->r) { 45200a701b1eSRobert Gordon rcas->elements--; 45210a701b1eSRobert Gordon rib_total_buffers --; 45220a701b1eSRobert Gordon rb = rcas->r.forw; 45230a701b1eSRobert Gordon remque(rb); 45240a701b1eSRobert Gordon if (rb->registered) 45250a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 45260a701b1eSRobert Gordon rb->lrc_buf, rb->lrc_mhandle); 45270a701b1eSRobert Gordon cache_allocation -= rb->lrc_len; 45280a701b1eSRobert Gordon kmem_free(rb->lrc_buf, rb->lrc_len); 45290a701b1eSRobert Gordon kmem_free(rb, sizeof (rib_lrc_entry_t)); 45300a701b1eSRobert Gordon } 45310a701b1eSRobert Gordon mutex_destroy(&rcas->node_lock); 45320a701b1eSRobert Gordon kmem_cache_free(hca->server_side_cache, rcas); 45330a701b1eSRobert Gordon rcas = avl_first(&hca->avl_tree); 45340a701b1eSRobert Gordon if (rcas != NULL) 45350a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 45360a701b1eSRobert Gordon } 45370a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 45380a701b1eSRobert Gordon } 45390a701b1eSRobert Gordon 45400a701b1eSRobert Gordon static void 45410a701b1eSRobert Gordon rib_server_side_cache_cleanup(void *argp) 45420a701b1eSRobert Gordon { 45430a701b1eSRobert Gordon cache_avl_struct_t *rcas; 45440a701b1eSRobert Gordon rib_lrc_entry_t *rb; 45450a701b1eSRobert Gordon rib_hca_t *hca = (rib_hca_t *)argp; 45460a701b1eSRobert Gordon 45470a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 45480a701b1eSRobert Gordon if (cache_allocation < cache_limit) { 45490a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 45500a701b1eSRobert Gordon return; 45510a701b1eSRobert Gordon } 45520a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 45530a701b1eSRobert Gordon 45540a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 45550a701b1eSRobert Gordon rcas = avl_last(&hca->avl_tree); 45560a701b1eSRobert Gordon if (rcas != NULL) 45570a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 45580a701b1eSRobert Gordon 45590a701b1eSRobert Gordon while (rcas != NULL) { 45600a701b1eSRobert Gordon while (rcas->r.forw != &rcas->r) { 45610a701b1eSRobert Gordon rcas->elements--; 45620a701b1eSRobert Gordon rib_total_buffers --; 45630a701b1eSRobert Gordon rb = rcas->r.forw; 45640a701b1eSRobert Gordon remque(rb); 45650a701b1eSRobert Gordon if (rb->registered) 45660a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 45670a701b1eSRobert Gordon rb->lrc_buf, rb->lrc_mhandle); 45680a701b1eSRobert Gordon cache_allocation -= rb->lrc_len; 45690a701b1eSRobert Gordon kmem_free(rb->lrc_buf, rb->lrc_len); 45700a701b1eSRobert Gordon kmem_free(rb, sizeof (rib_lrc_entry_t)); 45710a701b1eSRobert Gordon } 45720a701b1eSRobert Gordon mutex_destroy(&rcas->node_lock); 457351f34d4bSRajkumar Sivaprakasam if (hca->server_side_cache) { 45740a701b1eSRobert Gordon kmem_cache_free(hca->server_side_cache, rcas); 457551f34d4bSRajkumar Sivaprakasam } 45760a701b1eSRobert Gordon if ((cache_allocation) < cache_limit) { 45770a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 45780a701b1eSRobert Gordon return; 45790a701b1eSRobert Gordon } 45800a701b1eSRobert Gordon 45810a701b1eSRobert Gordon rcas = avl_last(&hca->avl_tree); 45820a701b1eSRobert Gordon if (rcas != NULL) 45830a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 45840a701b1eSRobert Gordon } 45850a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 45860a701b1eSRobert Gordon } 45870a701b1eSRobert Gordon 45880a701b1eSRobert Gordon static int 45890a701b1eSRobert Gordon avl_compare(const void *t1, const void *t2) 45900a701b1eSRobert Gordon { 45910a701b1eSRobert Gordon if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len) 45920a701b1eSRobert Gordon return (0); 45930a701b1eSRobert Gordon 45940a701b1eSRobert Gordon if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len) 45950a701b1eSRobert Gordon return (-1); 45960a701b1eSRobert Gordon 45970a701b1eSRobert Gordon return (1); 45980a701b1eSRobert Gordon } 45990a701b1eSRobert Gordon 46000a701b1eSRobert Gordon static void 46010a701b1eSRobert Gordon rib_destroy_cache(rib_hca_t *hca) 46020a701b1eSRobert Gordon { 460351f34d4bSRajkumar Sivaprakasam if (hca->avl_init) { 460451f34d4bSRajkumar Sivaprakasam rib_server_side_cache_reclaim((void *)hca); 460551f34d4bSRajkumar Sivaprakasam if (hca->server_side_cache) { 46060a701b1eSRobert Gordon kmem_cache_destroy(hca->server_side_cache); 460751f34d4bSRajkumar Sivaprakasam hca->server_side_cache = NULL; 460851f34d4bSRajkumar Sivaprakasam } 46090a701b1eSRobert Gordon avl_destroy(&hca->avl_tree); 46100a701b1eSRobert Gordon mutex_destroy(&hca->cache_allocation); 46110a701b1eSRobert Gordon rw_destroy(&hca->avl_rw_lock); 46120a701b1eSRobert Gordon } 46130a701b1eSRobert Gordon hca->avl_init = FALSE; 46140a701b1eSRobert Gordon } 46150a701b1eSRobert Gordon 46160a701b1eSRobert Gordon static void 46170a701b1eSRobert Gordon rib_force_cleanup(void *hca) 46180a701b1eSRobert Gordon { 4619*065714dcSSiddheshwar Mahesh if (((rib_hca_t *)hca)->cleanup_helper != NULL) 46200a701b1eSRobert Gordon (void) ddi_taskq_dispatch( 4621*065714dcSSiddheshwar Mahesh ((rib_hca_t *)hca)->cleanup_helper, 46220a701b1eSRobert Gordon rib_server_side_cache_cleanup, 46230a701b1eSRobert Gordon (void *)hca, DDI_NOSLEEP); 46240a701b1eSRobert Gordon } 46250a701b1eSRobert Gordon 46260a701b1eSRobert Gordon static rib_lrc_entry_t * 46270a701b1eSRobert Gordon rib_get_cache_buf(CONN *conn, uint32_t len) 46280a701b1eSRobert Gordon { 46290a701b1eSRobert Gordon cache_avl_struct_t cas, *rcas; 46300a701b1eSRobert Gordon rib_hca_t *hca = (ctoqp(conn))->hca; 46310a701b1eSRobert Gordon rib_lrc_entry_t *reply_buf; 46320a701b1eSRobert Gordon avl_index_t where = NULL; 46330a701b1eSRobert Gordon uint64_t c_alloc = 0; 46340a701b1eSRobert Gordon 46350a701b1eSRobert Gordon if (!hca->avl_init) 46360a701b1eSRobert Gordon goto error_alloc; 46370a701b1eSRobert Gordon 46380a701b1eSRobert Gordon cas.len = len; 46390a701b1eSRobert Gordon 46400a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 46410a701b1eSRobert Gordon 46420a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 46430a701b1eSRobert Gordon c_alloc = cache_allocation; 46440a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 46450a701b1eSRobert Gordon 46460a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, 46470a701b1eSRobert Gordon &where)) == NULL) { 46480a701b1eSRobert Gordon /* Am I above the cache limit */ 46490a701b1eSRobert Gordon if ((c_alloc + len) >= cache_limit) { 46500a701b1eSRobert Gordon rib_force_cleanup((void *)hca); 46510a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 46520a701b1eSRobert Gordon cache_misses_above_the_limit ++; 46530a701b1eSRobert Gordon 46540a701b1eSRobert Gordon /* Allocate and register the buffer directly */ 46550a701b1eSRobert Gordon goto error_alloc; 46560a701b1eSRobert Gordon } 46570a701b1eSRobert Gordon 46580a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 46590a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 46600a701b1eSRobert Gordon 46610a701b1eSRobert Gordon /* Recheck to make sure no other thread added the entry in */ 46620a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, 46630a701b1eSRobert Gordon &cas, &where)) == NULL) { 46640a701b1eSRobert Gordon /* Allocate an avl tree entry */ 46650a701b1eSRobert Gordon rcas = (cache_avl_struct_t *) 46660a701b1eSRobert Gordon kmem_cache_alloc(hca->server_side_cache, KM_SLEEP); 46670a701b1eSRobert Gordon 46680a701b1eSRobert Gordon bzero(rcas, sizeof (cache_avl_struct_t)); 46690a701b1eSRobert Gordon rcas->elements = 0; 46700a701b1eSRobert Gordon rcas->r.forw = &rcas->r; 46710a701b1eSRobert Gordon rcas->r.back = &rcas->r; 46720a701b1eSRobert Gordon rcas->len = len; 46730a701b1eSRobert Gordon mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL); 46740a701b1eSRobert Gordon avl_insert(&hca->avl_tree, rcas, where); 46750a701b1eSRobert Gordon } 46760a701b1eSRobert Gordon } 46770a701b1eSRobert Gordon 46780a701b1eSRobert Gordon mutex_enter(&rcas->node_lock); 46790a701b1eSRobert Gordon 46800a701b1eSRobert Gordon if (rcas->r.forw != &rcas->r && rcas->elements > 0) { 46810a701b1eSRobert Gordon rib_total_buffers--; 46820a701b1eSRobert Gordon cache_hits++; 46830a701b1eSRobert Gordon reply_buf = rcas->r.forw; 46840a701b1eSRobert Gordon remque(reply_buf); 46850a701b1eSRobert Gordon rcas->elements--; 46860a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 46870a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 46880a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 46890a701b1eSRobert Gordon cache_allocation -= len; 46900a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 46910a701b1eSRobert Gordon } else { 46920a701b1eSRobert Gordon /* Am I above the cache limit */ 46930a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 46940a701b1eSRobert Gordon if ((c_alloc + len) >= cache_limit) { 46950a701b1eSRobert Gordon rib_force_cleanup((void *)hca); 46960a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 46970a701b1eSRobert Gordon cache_misses_above_the_limit ++; 46980a701b1eSRobert Gordon /* Allocate and register the buffer directly */ 46990a701b1eSRobert Gordon goto error_alloc; 47000a701b1eSRobert Gordon } 47010a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 47020a701b1eSRobert Gordon cache_misses ++; 47030a701b1eSRobert Gordon /* Allocate a reply_buf entry */ 47040a701b1eSRobert Gordon reply_buf = (rib_lrc_entry_t *) 47050a701b1eSRobert Gordon kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP); 47060a701b1eSRobert Gordon bzero(reply_buf, sizeof (rib_lrc_entry_t)); 47070a701b1eSRobert Gordon reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 47080a701b1eSRobert Gordon reply_buf->lrc_len = len; 47090a701b1eSRobert Gordon reply_buf->registered = FALSE; 47100a701b1eSRobert Gordon reply_buf->avl_node = (void *)rcas; 47110a701b1eSRobert Gordon } 47120a701b1eSRobert Gordon 47130a701b1eSRobert Gordon return (reply_buf); 47140a701b1eSRobert Gordon 47150a701b1eSRobert Gordon error_alloc: 47160a701b1eSRobert Gordon reply_buf = (rib_lrc_entry_t *) 47170a701b1eSRobert Gordon kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP); 47180a701b1eSRobert Gordon bzero(reply_buf, sizeof (rib_lrc_entry_t)); 47190a701b1eSRobert Gordon reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 47200a701b1eSRobert Gordon reply_buf->lrc_len = len; 47210a701b1eSRobert Gordon reply_buf->registered = FALSE; 47220a701b1eSRobert Gordon reply_buf->avl_node = NULL; 47230a701b1eSRobert Gordon 47240a701b1eSRobert Gordon return (reply_buf); 47250a701b1eSRobert Gordon } 47260a701b1eSRobert Gordon 47270a701b1eSRobert Gordon /* 47280a701b1eSRobert Gordon * Return a pre-registered back to the cache (without 47290a701b1eSRobert Gordon * unregistering the buffer).. 47300a701b1eSRobert Gordon */ 47310a701b1eSRobert Gordon 47320a701b1eSRobert Gordon static void 47330a701b1eSRobert Gordon rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf) 47340a701b1eSRobert Gordon { 47350a701b1eSRobert Gordon cache_avl_struct_t cas, *rcas; 47360a701b1eSRobert Gordon avl_index_t where = NULL; 47370a701b1eSRobert Gordon rib_hca_t *hca = (ctoqp(conn))->hca; 47380a701b1eSRobert Gordon 47390a701b1eSRobert Gordon if (!hca->avl_init) 47400a701b1eSRobert Gordon goto error_free; 47410a701b1eSRobert Gordon 47420a701b1eSRobert Gordon cas.len = reg_buf->lrc_len; 47430a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 47440a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *) 47450a701b1eSRobert Gordon avl_find(&hca->avl_tree, &cas, &where)) == NULL) { 47460a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 47470a701b1eSRobert Gordon goto error_free; 47480a701b1eSRobert Gordon } else { 47490a701b1eSRobert Gordon rib_total_buffers ++; 47500a701b1eSRobert Gordon cas.len = reg_buf->lrc_len; 47510a701b1eSRobert Gordon mutex_enter(&rcas->node_lock); 47520a701b1eSRobert Gordon insque(reg_buf, &rcas->r); 47530a701b1eSRobert Gordon rcas->elements ++; 47540a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 47550a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 47560a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 47570a701b1eSRobert Gordon cache_allocation += cas.len; 47580a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 47590a701b1eSRobert Gordon } 47600a701b1eSRobert Gordon 47610a701b1eSRobert Gordon return; 47620a701b1eSRobert Gordon 47630a701b1eSRobert Gordon error_free: 47640a701b1eSRobert Gordon 47650a701b1eSRobert Gordon if (reg_buf->registered) 47660a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 47670a701b1eSRobert Gordon reg_buf->lrc_buf, reg_buf->lrc_mhandle); 47680a701b1eSRobert Gordon kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len); 47690a701b1eSRobert Gordon kmem_free(reg_buf, sizeof (rib_lrc_entry_t)); 47700a701b1eSRobert Gordon } 47710a701b1eSRobert Gordon 47720a701b1eSRobert Gordon static rdma_stat 47730a701b1eSRobert Gordon rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf, 47740a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle) 47750a701b1eSRobert Gordon { 47760a701b1eSRobert Gordon ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 47770a701b1eSRobert Gordon ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 47780a701b1eSRobert Gordon rdma_stat status; 47790a701b1eSRobert Gordon 47800a701b1eSRobert Gordon 47810a701b1eSRobert Gordon /* 47820a701b1eSRobert Gordon * Note: ALL buffer pools use the same memory type RDMARW. 47830a701b1eSRobert Gordon */ 47840a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 47850a701b1eSRobert Gordon if (status == RDMA_SUCCESS) { 47860a701b1eSRobert Gordon buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl; 47870a701b1eSRobert Gordon buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 47880a701b1eSRobert Gordon buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 47890a701b1eSRobert Gordon } else { 47900a701b1eSRobert Gordon buf_handle->mrc_linfo = NULL; 47910a701b1eSRobert Gordon buf_handle->mrc_lmr = 0; 47920a701b1eSRobert Gordon buf_handle->mrc_rmr = 0; 47930a701b1eSRobert Gordon } 47940a701b1eSRobert Gordon return (status); 47950a701b1eSRobert Gordon } 47960a701b1eSRobert Gordon 47970a701b1eSRobert Gordon /* ARGSUSED */ 47980a701b1eSRobert Gordon static rdma_stat 47990a701b1eSRobert Gordon rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf, 48000a701b1eSRobert Gordon struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle) 48010a701b1eSRobert Gordon { 48020a701b1eSRobert Gordon 48030a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, buf, buf_handle); 48040a701b1eSRobert Gordon return (RDMA_SUCCESS); 48050a701b1eSRobert Gordon } 48060a701b1eSRobert Gordon 48070a701b1eSRobert Gordon /* ARGSUSED */ 48080a701b1eSRobert Gordon static rdma_stat 48090a701b1eSRobert Gordon rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle) 48100a701b1eSRobert Gordon { 48110a701b1eSRobert Gordon 48120a701b1eSRobert Gordon (void) ibt_deregister_mr(hca->hca_hdl, 48130a701b1eSRobert Gordon (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 48140a701b1eSRobert Gordon return (RDMA_SUCCESS); 48150a701b1eSRobert Gordon } 48160a701b1eSRobert Gordon 48170a701b1eSRobert Gordon /* 4818e11c3f44Smeem * Check if the IP interface named by `lifrp' is RDMA-capable. 48190a701b1eSRobert Gordon */ 4820e11c3f44Smeem static boolean_t 4821e11c3f44Smeem rpcib_rdma_capable_interface(struct lifreq *lifrp) 48220a701b1eSRobert Gordon { 4823e11c3f44Smeem char ifname[LIFNAMSIZ]; 4824e11c3f44Smeem char *cp; 48250a701b1eSRobert Gordon 4826e11c3f44Smeem if (lifrp->lifr_type == IFT_IB) 4827e11c3f44Smeem return (B_TRUE); 48280a701b1eSRobert Gordon 48290a701b1eSRobert Gordon /* 4830e11c3f44Smeem * Strip off the logical interface portion before getting 4831e11c3f44Smeem * intimate with the name. 48320a701b1eSRobert Gordon */ 4833e11c3f44Smeem (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ); 4834e11c3f44Smeem if ((cp = strchr(ifname, ':')) != NULL) 4835e11c3f44Smeem *cp = '\0'; 48360a701b1eSRobert Gordon 4837e11c3f44Smeem return (strcmp("lo0", ifname) == 0); 48380a701b1eSRobert Gordon } 48390a701b1eSRobert Gordon 48400a701b1eSRobert Gordon static int 4841e11c3f44Smeem rpcib_do_ip_ioctl(int cmd, int len, void *arg) 48420a701b1eSRobert Gordon { 48430a701b1eSRobert Gordon vnode_t *kvp, *vp; 48440a701b1eSRobert Gordon TIUSER *tiptr; 48450a701b1eSRobert Gordon struct strioctl iocb; 48460a701b1eSRobert Gordon k_sigset_t smask; 48470a701b1eSRobert Gordon int err = 0; 48480a701b1eSRobert Gordon 4849e11c3f44Smeem if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kvp) == 0) { 4850e11c3f44Smeem if (t_kopen(NULL, kvp->v_rdev, FREAD|FWRITE, 48510a701b1eSRobert Gordon &tiptr, CRED()) == 0) { 48520a701b1eSRobert Gordon vp = tiptr->fp->f_vnode; 48530a701b1eSRobert Gordon } else { 48540a701b1eSRobert Gordon VN_RELE(kvp); 48550a701b1eSRobert Gordon return (EPROTO); 48560a701b1eSRobert Gordon } 48570a701b1eSRobert Gordon } else { 48580a701b1eSRobert Gordon return (EPROTO); 48590a701b1eSRobert Gordon } 48600a701b1eSRobert Gordon 48610a701b1eSRobert Gordon iocb.ic_cmd = cmd; 48620a701b1eSRobert Gordon iocb.ic_timout = 0; 48630a701b1eSRobert Gordon iocb.ic_len = len; 4864e11c3f44Smeem iocb.ic_dp = (caddr_t)arg; 48650a701b1eSRobert Gordon sigintr(&smask, 0); 48660a701b1eSRobert Gordon err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb); 48670a701b1eSRobert Gordon sigunintr(&smask); 48680a701b1eSRobert Gordon (void) t_kclose(tiptr, 0); 48690a701b1eSRobert Gordon VN_RELE(kvp); 48700a701b1eSRobert Gordon return (err); 48710a701b1eSRobert Gordon } 48720a701b1eSRobert Gordon 4873e11c3f44Smeem /* 4874e11c3f44Smeem * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'. 4875e11c3f44Smeem * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes. 4876e11c3f44Smeem */ 4877e11c3f44Smeem static int 4878e11c3f44Smeem rpcib_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep) 4879e11c3f44Smeem { 4880e11c3f44Smeem int err; 4881e11c3f44Smeem struct lifnum lifn; 4882e11c3f44Smeem 4883e11c3f44Smeem bzero(&lifn, sizeof (struct lifnum)); 4884e11c3f44Smeem lifn.lifn_family = AF_UNSPEC; 4885e11c3f44Smeem 4886e11c3f44Smeem err = rpcib_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn); 4887e11c3f44Smeem if (err != 0) 4888e11c3f44Smeem return (err); 4889e11c3f44Smeem 4890e11c3f44Smeem /* 4891e11c3f44Smeem * Pad the interface count to account for additional interfaces that 4892e11c3f44Smeem * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF. 4893e11c3f44Smeem */ 4894e11c3f44Smeem lifn.lifn_count += 4; 4895e11c3f44Smeem 4896e11c3f44Smeem bzero(lifcp, sizeof (struct lifconf)); 4897e11c3f44Smeem lifcp->lifc_family = AF_UNSPEC; 4898e11c3f44Smeem lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq); 4899e11c3f44Smeem lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP); 4900e11c3f44Smeem 4901e11c3f44Smeem err = rpcib_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp); 4902e11c3f44Smeem if (err != 0) { 4903e11c3f44Smeem kmem_free(lifcp->lifc_buf, *bufsizep); 4904e11c3f44Smeem return (err); 49050a701b1eSRobert Gordon } 4906e11c3f44Smeem return (0); 49070a701b1eSRobert Gordon } 49080a701b1eSRobert Gordon 49090a701b1eSRobert Gordon static boolean_t 4910e11c3f44Smeem rpcib_get_ib_addresses(rpcib_ipaddrs_t *addrs4, rpcib_ipaddrs_t *addrs6) 49110a701b1eSRobert Gordon { 4912e11c3f44Smeem uint_t i, nifs; 4913e11c3f44Smeem uint_t bufsize; 4914e11c3f44Smeem struct lifconf lifc; 4915e11c3f44Smeem struct lifreq *lifrp; 4916e11c3f44Smeem struct sockaddr_in *sinp; 4917e11c3f44Smeem struct sockaddr_in6 *sin6p; 49180a701b1eSRobert Gordon 4919e11c3f44Smeem bzero(addrs4, sizeof (rpcib_ipaddrs_t)); 4920e11c3f44Smeem bzero(addrs6, sizeof (rpcib_ipaddrs_t)); 49210a701b1eSRobert Gordon 4922e11c3f44Smeem if (rpcib_do_lifconf(&lifc, &bufsize) != 0) 4923e11c3f44Smeem return (B_FALSE); 4924e11c3f44Smeem 4925e11c3f44Smeem if ((nifs = lifc.lifc_len / sizeof (struct lifreq)) == 0) { 4926e11c3f44Smeem kmem_free(lifc.lifc_buf, bufsize); 4927e11c3f44Smeem return (B_FALSE); 49280a701b1eSRobert Gordon } 49290a701b1eSRobert Gordon 4930e11c3f44Smeem /* 4931e11c3f44Smeem * Worst case is that all of the addresses are IB-capable and have 4932e11c3f44Smeem * the same address family, so size our buffers accordingly. 4933e11c3f44Smeem */ 4934e11c3f44Smeem addrs4->ri_size = nifs * sizeof (struct sockaddr_in); 4935e11c3f44Smeem addrs4->ri_list = kmem_zalloc(addrs4->ri_size, KM_SLEEP); 4936e11c3f44Smeem addrs6->ri_size = nifs * sizeof (struct sockaddr_in6); 4937e11c3f44Smeem addrs6->ri_list = kmem_zalloc(addrs6->ri_size, KM_SLEEP); 49380a701b1eSRobert Gordon 4939e11c3f44Smeem for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) { 4940e11c3f44Smeem if (!rpcib_rdma_capable_interface(lifrp)) 4941e11c3f44Smeem continue; 4942e11c3f44Smeem 4943e11c3f44Smeem if (lifrp->lifr_addr.ss_family == AF_INET) { 4944e11c3f44Smeem sinp = addrs4->ri_list; 4945e11c3f44Smeem bcopy(&lifrp->lifr_addr, &sinp[addrs4->ri_count++], 4946e11c3f44Smeem sizeof (struct sockaddr_in)); 4947e11c3f44Smeem } else if (lifrp->lifr_addr.ss_family == AF_INET6) { 4948e11c3f44Smeem sin6p = addrs6->ri_list; 4949e11c3f44Smeem bcopy(&lifrp->lifr_addr, &sin6p[addrs6->ri_count++], 4950e11c3f44Smeem sizeof (struct sockaddr_in6)); 4951e11c3f44Smeem } 49520a701b1eSRobert Gordon } 49530a701b1eSRobert Gordon 4954e11c3f44Smeem kmem_free(lifc.lifc_buf, bufsize); 4955e11c3f44Smeem return (B_TRUE); 49560a701b1eSRobert Gordon } 49570a701b1eSRobert Gordon 49580a701b1eSRobert Gordon /* ARGSUSED */ 49590a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *ksp, int rw) { 49600a701b1eSRobert Gordon 49610a701b1eSRobert Gordon if (KSTAT_WRITE == rw) { 49620a701b1eSRobert Gordon return (EACCES); 49630a701b1eSRobert Gordon } 49640a701b1eSRobert Gordon rpcib_kstat.cache_limit.value.ui64 = 49650a701b1eSRobert Gordon (uint64_t)cache_limit; 49660a701b1eSRobert Gordon rpcib_kstat.cache_allocation.value.ui64 = 49670a701b1eSRobert Gordon (uint64_t)cache_allocation; 49680a701b1eSRobert Gordon rpcib_kstat.cache_hits.value.ui64 = 49690a701b1eSRobert Gordon (uint64_t)cache_hits; 49700a701b1eSRobert Gordon rpcib_kstat.cache_misses.value.ui64 = 49710a701b1eSRobert Gordon (uint64_t)cache_misses; 49720a701b1eSRobert Gordon rpcib_kstat.cache_misses_above_the_limit.value.ui64 = 49730a701b1eSRobert Gordon (uint64_t)cache_misses_above_the_limit; 49740a701b1eSRobert Gordon return (0); 49750a701b1eSRobert Gordon } 4976