17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0a701b1eSRobert Gordon * Common Development and Distribution License (the "License"). 6*0a701b1eSRobert Gordon * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*0a701b1eSRobert Gordon * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26*0a701b1eSRobert Gordon 27*0a701b1eSRobert Gordon /* 28*0a701b1eSRobert Gordon * Copyright (c) 2007, The Ohio State University. All rights reserved. 29*0a701b1eSRobert Gordon * 30*0a701b1eSRobert Gordon * Portions of this source code is developed by the team members of 31*0a701b1eSRobert Gordon * The Ohio State University's Network-Based Computing Laboratory (NBCL), 32*0a701b1eSRobert Gordon * headed by Professor Dhabaleswar K. (DK) Panda. 33*0a701b1eSRobert Gordon * 34*0a701b1eSRobert Gordon * Acknowledgements to contributions from developors: 35*0a701b1eSRobert Gordon * Ranjit Noronha: noronha@cse.ohio-state.edu 36*0a701b1eSRobert Gordon * Lei Chai : chail@cse.ohio-state.edu 37*0a701b1eSRobert Gordon * Weikuan Yu : yuw@cse.ohio-state.edu 38*0a701b1eSRobert Gordon * 39*0a701b1eSRobert Gordon */ 407c478bd9Sstevel@tonic-gate 417c478bd9Sstevel@tonic-gate /* 427c478bd9Sstevel@tonic-gate * The rpcib plugin. Implements the interface for RDMATF's 437c478bd9Sstevel@tonic-gate * interaction with IBTF. 447c478bd9Sstevel@tonic-gate */ 457c478bd9Sstevel@tonic-gate 467c478bd9Sstevel@tonic-gate #include <sys/param.h> 477c478bd9Sstevel@tonic-gate #include <sys/types.h> 487c478bd9Sstevel@tonic-gate #include <sys/user.h> 497c478bd9Sstevel@tonic-gate #include <sys/systm.h> 507c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 517c478bd9Sstevel@tonic-gate #include <sys/proc.h> 527c478bd9Sstevel@tonic-gate #include <sys/socket.h> 537c478bd9Sstevel@tonic-gate #include <sys/file.h> 547c478bd9Sstevel@tonic-gate #include <sys/stream.h> 557c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 567c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 577c478bd9Sstevel@tonic-gate #include <sys/errno.h> 587c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 597c478bd9Sstevel@tonic-gate #include <sys/debug.h> 607c478bd9Sstevel@tonic-gate #include <sys/systm.h> 617c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 627c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 637c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 647c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 657c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 667c478bd9Sstevel@tonic-gate #include <sys/time.h> 677c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 687c478bd9Sstevel@tonic-gate #include <sys/callb.h> 697c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 707c478bd9Sstevel@tonic-gate #include <sys/sunndi.h> 71*0a701b1eSRobert Gordon #include <sys/sunldi.h> 72*0a701b1eSRobert Gordon #include <sys/sdt.h> 73*0a701b1eSRobert Gordon #include <sys/dlpi.h> 747c478bd9Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h> 757c478bd9Sstevel@tonic-gate #include <rpc/rpc.h> 767c478bd9Sstevel@tonic-gate #include <rpc/ib.h> 777c478bd9Sstevel@tonic-gate 787c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 817c478bd9Sstevel@tonic-gate #include <sys/kstr.h> 827c478bd9Sstevel@tonic-gate #include <sys/sockio.h> 837c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 847c478bd9Sstevel@tonic-gate #include <sys/tiuser.h> 857c478bd9Sstevel@tonic-gate #include <net/if.h> 867c478bd9Sstevel@tonic-gate #include <sys/cred.h> 87*0a701b1eSRobert Gordon #include <rpc/rpc_rdma.h> 887c478bd9Sstevel@tonic-gate 89*0a701b1eSRobert Gordon #include <nfs/nfs.h> 90*0a701b1eSRobert Gordon #include <sys/kstat.h> 91*0a701b1eSRobert Gordon #include <sys/atomic.h> 92*0a701b1eSRobert Gordon 93*0a701b1eSRobert Gordon #define NFS_RDMA_PORT 2050 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate extern char *inet_ntop(int, const void *, char *, int); 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate /* 997c478bd9Sstevel@tonic-gate * Prototype declarations for driver ops 1007c478bd9Sstevel@tonic-gate */ 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate static int rpcib_attach(dev_info_t *, ddi_attach_cmd_t); 1037c478bd9Sstevel@tonic-gate static int rpcib_getinfo(dev_info_t *, ddi_info_cmd_t, 1047c478bd9Sstevel@tonic-gate void *, void **); 1057c478bd9Sstevel@tonic-gate static int rpcib_detach(dev_info_t *, ddi_detach_cmd_t); 106*0a701b1eSRobert Gordon static int rpcib_is_ib_interface(char *); 107*0a701b1eSRobert Gordon static int rpcib_dl_info(ldi_handle_t, dl_info_ack_t *); 108*0a701b1eSRobert Gordon static int rpcib_do_ip_ioctl(int, int, caddr_t); 109*0a701b1eSRobert Gordon static boolean_t rpcib_get_ib_addresses(struct sockaddr_in *, 110*0a701b1eSRobert Gordon struct sockaddr_in6 *, uint_t *, uint_t *); 111*0a701b1eSRobert Gordon static uint_t rpcib_get_number_interfaces(void); 112*0a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *, int); 113*0a701b1eSRobert Gordon static void rib_force_cleanup(void *); 1147c478bd9Sstevel@tonic-gate 115*0a701b1eSRobert Gordon struct { 116*0a701b1eSRobert Gordon kstat_named_t cache_limit; 117*0a701b1eSRobert Gordon kstat_named_t cache_allocation; 118*0a701b1eSRobert Gordon kstat_named_t cache_hits; 119*0a701b1eSRobert Gordon kstat_named_t cache_misses; 120*0a701b1eSRobert Gordon kstat_named_t cache_misses_above_the_limit; 121*0a701b1eSRobert Gordon } rpcib_kstat = { 122*0a701b1eSRobert Gordon {"cache_limit", KSTAT_DATA_UINT64 }, 123*0a701b1eSRobert Gordon {"cache_allocation", KSTAT_DATA_UINT64 }, 124*0a701b1eSRobert Gordon {"cache_hits", KSTAT_DATA_UINT64 }, 125*0a701b1eSRobert Gordon {"cache_misses", KSTAT_DATA_UINT64 }, 126*0a701b1eSRobert Gordon {"cache_misses_above_the_limit", KSTAT_DATA_UINT64 }, 127*0a701b1eSRobert Gordon }; 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* rpcib cb_ops */ 1307c478bd9Sstevel@tonic-gate static struct cb_ops rpcib_cbops = { 1317c478bd9Sstevel@tonic-gate nulldev, /* open */ 1327c478bd9Sstevel@tonic-gate nulldev, /* close */ 1337c478bd9Sstevel@tonic-gate nodev, /* strategy */ 1347c478bd9Sstevel@tonic-gate nodev, /* print */ 1357c478bd9Sstevel@tonic-gate nodev, /* dump */ 1367c478bd9Sstevel@tonic-gate nodev, /* read */ 1377c478bd9Sstevel@tonic-gate nodev, /* write */ 1387c478bd9Sstevel@tonic-gate nodev, /* ioctl */ 1397c478bd9Sstevel@tonic-gate nodev, /* devmap */ 1407c478bd9Sstevel@tonic-gate nodev, /* mmap */ 1417c478bd9Sstevel@tonic-gate nodev, /* segmap */ 1427c478bd9Sstevel@tonic-gate nochpoll, /* poll */ 1437c478bd9Sstevel@tonic-gate ddi_prop_op, /* prop_op */ 1447c478bd9Sstevel@tonic-gate NULL, /* stream */ 1457c478bd9Sstevel@tonic-gate D_MP, /* cb_flag */ 1467c478bd9Sstevel@tonic-gate CB_REV, /* rev */ 1477c478bd9Sstevel@tonic-gate nodev, /* int (*cb_aread)() */ 1487c478bd9Sstevel@tonic-gate nodev /* int (*cb_awrite)() */ 1497c478bd9Sstevel@tonic-gate }; 1507c478bd9Sstevel@tonic-gate 151*0a701b1eSRobert Gordon 152*0a701b1eSRobert Gordon 153*0a701b1eSRobert Gordon 1547c478bd9Sstevel@tonic-gate /* 1557c478bd9Sstevel@tonic-gate * Device options 1567c478bd9Sstevel@tonic-gate */ 1577c478bd9Sstevel@tonic-gate static struct dev_ops rpcib_ops = { 1587c478bd9Sstevel@tonic-gate DEVO_REV, /* devo_rev, */ 1597c478bd9Sstevel@tonic-gate 0, /* refcnt */ 1607c478bd9Sstevel@tonic-gate rpcib_getinfo, /* info */ 1617c478bd9Sstevel@tonic-gate nulldev, /* identify */ 1627c478bd9Sstevel@tonic-gate nulldev, /* probe */ 1637c478bd9Sstevel@tonic-gate rpcib_attach, /* attach */ 1647c478bd9Sstevel@tonic-gate rpcib_detach, /* detach */ 1657c478bd9Sstevel@tonic-gate nodev, /* reset */ 1667c478bd9Sstevel@tonic-gate &rpcib_cbops, /* driver ops - devctl interfaces */ 1677c478bd9Sstevel@tonic-gate NULL, /* bus operations */ 1687c478bd9Sstevel@tonic-gate NULL /* power */ 1697c478bd9Sstevel@tonic-gate }; 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * Module linkage information. 1737c478bd9Sstevel@tonic-gate */ 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate static struct modldrv rib_modldrv = { 1767c478bd9Sstevel@tonic-gate &mod_driverops, /* Driver module */ 177*0a701b1eSRobert Gordon "RPCIB plugin driver, ver 1.30", /* Driver name and version */ 1787c478bd9Sstevel@tonic-gate &rpcib_ops, /* Driver ops */ 1797c478bd9Sstevel@tonic-gate }; 1807c478bd9Sstevel@tonic-gate 1817c478bd9Sstevel@tonic-gate static struct modlinkage rib_modlinkage = { 1827c478bd9Sstevel@tonic-gate MODREV_1, 1837c478bd9Sstevel@tonic-gate (void *)&rib_modldrv, 1847c478bd9Sstevel@tonic-gate NULL 1857c478bd9Sstevel@tonic-gate }; 1867c478bd9Sstevel@tonic-gate 187*0a701b1eSRobert Gordon typedef struct rib_lrc_entry { 188*0a701b1eSRobert Gordon struct rib_lrc_entry *forw; 189*0a701b1eSRobert Gordon struct rib_lrc_entry *back; 190*0a701b1eSRobert Gordon char *lrc_buf; 191*0a701b1eSRobert Gordon 192*0a701b1eSRobert Gordon uint32_t lrc_len; 193*0a701b1eSRobert Gordon void *avl_node; 194*0a701b1eSRobert Gordon bool_t registered; 195*0a701b1eSRobert Gordon 196*0a701b1eSRobert Gordon struct mrc lrc_mhandle; 197*0a701b1eSRobert Gordon bool_t lrc_on_freed_list; 198*0a701b1eSRobert Gordon } rib_lrc_entry_t; 199*0a701b1eSRobert Gordon 200*0a701b1eSRobert Gordon typedef struct cache_struct { 201*0a701b1eSRobert Gordon rib_lrc_entry_t r; 202*0a701b1eSRobert Gordon uint32_t len; 203*0a701b1eSRobert Gordon uint32_t elements; 204*0a701b1eSRobert Gordon kmutex_t node_lock; 205*0a701b1eSRobert Gordon avl_node_t avl_link; 206*0a701b1eSRobert Gordon } cache_avl_struct_t; 207*0a701b1eSRobert Gordon 208*0a701b1eSRobert Gordon 209*0a701b1eSRobert Gordon static uint64_t rib_total_buffers = 0; 210*0a701b1eSRobert Gordon uint64_t cache_limit = 100 * 1024 * 1024; 211*0a701b1eSRobert Gordon static volatile uint64_t cache_allocation = 0; 212*0a701b1eSRobert Gordon static uint64_t cache_watermark = 80 * 1024 * 1024; 213*0a701b1eSRobert Gordon static uint64_t cache_hits = 0; 214*0a701b1eSRobert Gordon static uint64_t cache_misses = 0; 215*0a701b1eSRobert Gordon static uint64_t cache_cold_misses = 0; 216*0a701b1eSRobert Gordon static uint64_t cache_hot_misses = 0; 217*0a701b1eSRobert Gordon static uint64_t cache_misses_above_the_limit = 0; 218*0a701b1eSRobert Gordon static bool_t stats_enabled = FALSE; 219*0a701b1eSRobert Gordon 220*0a701b1eSRobert Gordon static uint64_t max_unsignaled_rws = 5; 221*0a701b1eSRobert Gordon 2227c478bd9Sstevel@tonic-gate /* 2237c478bd9Sstevel@tonic-gate * rib_stat: private data pointer used when registering 2247c478bd9Sstevel@tonic-gate * with the IBTF. It is returned to the consumer 2257c478bd9Sstevel@tonic-gate * in all callbacks. 2267c478bd9Sstevel@tonic-gate */ 2277c478bd9Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL; 2287c478bd9Sstevel@tonic-gate 229*0a701b1eSRobert Gordon #define RNR_RETRIES IBT_RNR_RETRY_1 2307c478bd9Sstevel@tonic-gate #define MAX_PORTS 2 2317c478bd9Sstevel@tonic-gate 232*0a701b1eSRobert Gordon int preposted_rbufs = RDMA_BUFS_GRANT; 2337c478bd9Sstevel@tonic-gate int send_threshold = 1; 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate /* 2367c478bd9Sstevel@tonic-gate * State of the plugin. 2377c478bd9Sstevel@tonic-gate * ACCEPT = accepting new connections and requests. 2387c478bd9Sstevel@tonic-gate * NO_ACCEPT = not accepting new connection and requests. 2397c478bd9Sstevel@tonic-gate * This should eventually move to rpcib_state_t structure, since this 2407c478bd9Sstevel@tonic-gate * will tell in which state the plugin is for a particular type of service 2417c478bd9Sstevel@tonic-gate * like NFS, NLM or v4 Callback deamon. The plugin might be in accept 2427c478bd9Sstevel@tonic-gate * state for one and in no_accept state for the other. 2437c478bd9Sstevel@tonic-gate */ 2447c478bd9Sstevel@tonic-gate int plugin_state; 2457c478bd9Sstevel@tonic-gate kmutex_t plugin_state_lock; 2467c478bd9Sstevel@tonic-gate 247*0a701b1eSRobert Gordon ldi_ident_t rpcib_li; 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate /* 2507c478bd9Sstevel@tonic-gate * RPCIB RDMATF operations 2517c478bd9Sstevel@tonic-gate */ 252*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH) 253*0a701b1eSRobert Gordon static void rib_posted_rbufs(uint32_t x) { return; } 254*0a701b1eSRobert Gordon #endif 2557c478bd9Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle); 2567c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn); 2577c478bd9Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd); 2587c478bd9Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd); 259*0a701b1eSRobert Gordon static rdma_stat rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, 260*0a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle); 2617c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf, 2627c478bd9Sstevel@tonic-gate struct mrc buf_handle); 263*0a701b1eSRobert Gordon static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, 264*0a701b1eSRobert Gordon caddr_t buf, uint_t buflen, struct mrc *buf_handle); 265*0a701b1eSRobert Gordon static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, 266*0a701b1eSRobert Gordon struct mrc buf_handle); 267*0a701b1eSRobert Gordon static rdma_stat rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, 268*0a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, 269*0a701b1eSRobert Gordon void *lrc); 2707c478bd9Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf, 271*0a701b1eSRobert Gordon struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *); 2727c478bd9Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, 2737c478bd9Sstevel@tonic-gate caddr_t buf, int len, int cpu); 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf); 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf); 2787c478bd9Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *); 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf); 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid); 2837c478bd9Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid); 2847c478bd9Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid); 285*0a701b1eSRobert Gordon static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid); 2867c478bd9Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl); 2877c478bd9Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid); 2887c478bd9Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait); 2897c478bd9Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait); 2907c478bd9Sstevel@tonic-gate static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rib_hca_t **); 2917c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_get(struct netbuf *, int addr_type, void *, CONN **); 2927c478bd9Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn); 2937c478bd9Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info); 294*0a701b1eSRobert Gordon 295*0a701b1eSRobert Gordon static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len); 296*0a701b1eSRobert Gordon static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf); 297*0a701b1eSRobert Gordon static void rib_destroy_cache(rib_hca_t *hca); 298*0a701b1eSRobert Gordon static void rib_server_side_cache_reclaim(void *argp); 299*0a701b1eSRobert Gordon static int avl_compare(const void *t1, const void *t2); 300*0a701b1eSRobert Gordon 3017c478bd9Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *); 302*0a701b1eSRobert Gordon static void rib_close_channels(rib_conn_list_t *); 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate /* 3057c478bd9Sstevel@tonic-gate * RPCIB addressing operations 3067c478bd9Sstevel@tonic-gate */ 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* 3097c478bd9Sstevel@tonic-gate * RDMA operations the RPCIB module exports 3107c478bd9Sstevel@tonic-gate */ 3117c478bd9Sstevel@tonic-gate static rdmaops_t rib_ops = { 3127c478bd9Sstevel@tonic-gate rib_reachable, 3137c478bd9Sstevel@tonic-gate rib_conn_get, 3147c478bd9Sstevel@tonic-gate rib_conn_release, 3157c478bd9Sstevel@tonic-gate rib_listen, 3167c478bd9Sstevel@tonic-gate rib_listen_stop, 3177c478bd9Sstevel@tonic-gate rib_registermem, 3187c478bd9Sstevel@tonic-gate rib_deregistermem, 3197c478bd9Sstevel@tonic-gate rib_registermemsync, 3207c478bd9Sstevel@tonic-gate rib_deregistermemsync, 3217c478bd9Sstevel@tonic-gate rib_syncmem, 3227c478bd9Sstevel@tonic-gate rib_reg_buf_alloc, 3237c478bd9Sstevel@tonic-gate rib_reg_buf_free, 3247c478bd9Sstevel@tonic-gate rib_send, 3257c478bd9Sstevel@tonic-gate rib_send_resp, 3267c478bd9Sstevel@tonic-gate rib_post_resp, 327*0a701b1eSRobert Gordon rib_post_resp_remove, 3287c478bd9Sstevel@tonic-gate rib_post_recv, 3297c478bd9Sstevel@tonic-gate rib_recv, 3307c478bd9Sstevel@tonic-gate rib_read, 3317c478bd9Sstevel@tonic-gate rib_write, 332*0a701b1eSRobert Gordon rib_getinfo, 3337c478bd9Sstevel@tonic-gate }; 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate /* 3367c478bd9Sstevel@tonic-gate * RDMATF RPCIB plugin details 3377c478bd9Sstevel@tonic-gate */ 3387c478bd9Sstevel@tonic-gate static rdma_mod_t rib_mod = { 3397c478bd9Sstevel@tonic-gate "ibtf", /* api name */ 3407c478bd9Sstevel@tonic-gate RDMATF_VERS_1, 3417c478bd9Sstevel@tonic-gate 0, 3427c478bd9Sstevel@tonic-gate &rib_ops, /* rdma op vector for ibtf */ 3437c478bd9Sstevel@tonic-gate }; 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate static rdma_stat open_hcas(rpcib_state_t *); 3467c478bd9Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int); 3477c478bd9Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *); 3487c478bd9Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *); 3497c478bd9Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *); 3507c478bd9Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *); 3517c478bd9Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num); 352*0a701b1eSRobert Gordon static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t, 353*0a701b1eSRobert Gordon ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *); 354*0a701b1eSRobert Gordon static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t, 355*0a701b1eSRobert Gordon ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t); 356*0a701b1eSRobert Gordon static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, ibt_path_info_t *, 357*0a701b1eSRobert Gordon ibt_ip_addr_t *, ibt_ip_addr_t *); 3587c478bd9Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *, 3597c478bd9Sstevel@tonic-gate rib_qp_t **); 3607c478bd9Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t, 3617c478bd9Sstevel@tonic-gate rib_qp_t **); 3627c478bd9Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *); 3637c478bd9Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *); 3647c478bd9Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *); 3657c478bd9Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid); 3667c478bd9Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd); 3677c478bd9Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *); 3687c478bd9Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid); 3697c478bd9Sstevel@tonic-gate 3707c478bd9Sstevel@tonic-gate static void rib_async_handler(void *, 3717c478bd9Sstevel@tonic-gate ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *); 3727c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *); 3737c478bd9Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *); 3747c478bd9Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *); 3757c478bd9Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t); 3767c478bd9Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *); 3777c478bd9Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *); 3787c478bd9Sstevel@tonic-gate static void rib_detach_hca(rib_hca_t *); 379*0a701b1eSRobert Gordon static rdma_stat rib_chk_srv_ibaddr(struct netbuf *, int, 380*0a701b1eSRobert Gordon ibt_path_info_t *, ibt_ip_addr_t *, ibt_ip_addr_t *); 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate /* 3837c478bd9Sstevel@tonic-gate * Registration with IBTF as a consumer 3847c478bd9Sstevel@tonic-gate */ 3857c478bd9Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = { 38643ed929aSsrust IBTI_V2, 3877c478bd9Sstevel@tonic-gate IBT_GENERIC, 3887c478bd9Sstevel@tonic-gate rib_async_handler, /* async event handler */ 3897c478bd9Sstevel@tonic-gate NULL, /* Memory Region Handler */ 3907c478bd9Sstevel@tonic-gate "nfs/ib" 3917c478bd9Sstevel@tonic-gate }; 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate /* 3947c478bd9Sstevel@tonic-gate * Global strucuture 3957c478bd9Sstevel@tonic-gate */ 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate typedef struct rpcib_s { 3987c478bd9Sstevel@tonic-gate dev_info_t *rpcib_dip; 3997c478bd9Sstevel@tonic-gate kmutex_t rpcib_mutex; 4007c478bd9Sstevel@tonic-gate } rpcib_t; 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate rpcib_t rpcib; 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate /* 4057c478bd9Sstevel@tonic-gate * /etc/system controlled variable to control 4067c478bd9Sstevel@tonic-gate * debugging in rpcib kernel module. 4077c478bd9Sstevel@tonic-gate * Set it to values greater that 1 to control 4087c478bd9Sstevel@tonic-gate * the amount of debugging messages required. 4097c478bd9Sstevel@tonic-gate */ 4107c478bd9Sstevel@tonic-gate int rib_debug = 0; 4117c478bd9Sstevel@tonic-gate 412*0a701b1eSRobert Gordon 4137c478bd9Sstevel@tonic-gate int 4147c478bd9Sstevel@tonic-gate _init(void) 4157c478bd9Sstevel@tonic-gate { 4167c478bd9Sstevel@tonic-gate int error; 417*0a701b1eSRobert Gordon int ret; 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate error = mod_install((struct modlinkage *)&rib_modlinkage); 4207c478bd9Sstevel@tonic-gate if (error != 0) { 4217c478bd9Sstevel@tonic-gate /* 4227c478bd9Sstevel@tonic-gate * Could not load module 4237c478bd9Sstevel@tonic-gate */ 4247c478bd9Sstevel@tonic-gate return (error); 4257c478bd9Sstevel@tonic-gate } 426*0a701b1eSRobert Gordon ret = ldi_ident_from_mod(&rib_modlinkage, &rpcib_li); 427*0a701b1eSRobert Gordon if (ret != 0) 428*0a701b1eSRobert Gordon rpcib_li = NULL; 4297c478bd9Sstevel@tonic-gate mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL); 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate return (0); 4327c478bd9Sstevel@tonic-gate } 4337c478bd9Sstevel@tonic-gate 4347c478bd9Sstevel@tonic-gate int 4357c478bd9Sstevel@tonic-gate _fini() 4367c478bd9Sstevel@tonic-gate { 4377c478bd9Sstevel@tonic-gate int status; 4387c478bd9Sstevel@tonic-gate 4397c478bd9Sstevel@tonic-gate if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) { 4407c478bd9Sstevel@tonic-gate return (EBUSY); 4417c478bd9Sstevel@tonic-gate } 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate /* 4447c478bd9Sstevel@tonic-gate * Remove module 4457c478bd9Sstevel@tonic-gate */ 4467c478bd9Sstevel@tonic-gate if ((status = mod_remove(&rib_modlinkage)) != 0) { 4477c478bd9Sstevel@tonic-gate (void) rdma_register_mod(&rib_mod); 4487c478bd9Sstevel@tonic-gate return (status); 4497c478bd9Sstevel@tonic-gate } 4507c478bd9Sstevel@tonic-gate mutex_destroy(&plugin_state_lock); 451*0a701b1eSRobert Gordon ldi_ident_release(rpcib_li); 4527c478bd9Sstevel@tonic-gate return (0); 4537c478bd9Sstevel@tonic-gate } 4547c478bd9Sstevel@tonic-gate 4557c478bd9Sstevel@tonic-gate int 4567c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 4577c478bd9Sstevel@tonic-gate { 4587c478bd9Sstevel@tonic-gate return (mod_info(&rib_modlinkage, modinfop)); 4597c478bd9Sstevel@tonic-gate } 4607c478bd9Sstevel@tonic-gate 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate /* 4637c478bd9Sstevel@tonic-gate * rpcib_getinfo() 4647c478bd9Sstevel@tonic-gate * Given the device number, return the devinfo pointer or the 4657c478bd9Sstevel@tonic-gate * instance number. 4667c478bd9Sstevel@tonic-gate * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach. 4677c478bd9Sstevel@tonic-gate */ 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4707c478bd9Sstevel@tonic-gate static int 4717c478bd9Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 4727c478bd9Sstevel@tonic-gate { 4737c478bd9Sstevel@tonic-gate int ret = DDI_SUCCESS; 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate switch (cmd) { 4767c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO: 4777c478bd9Sstevel@tonic-gate if (rpcib.rpcib_dip != NULL) 4787c478bd9Sstevel@tonic-gate *result = rpcib.rpcib_dip; 4797c478bd9Sstevel@tonic-gate else { 4807c478bd9Sstevel@tonic-gate *result = NULL; 4817c478bd9Sstevel@tonic-gate ret = DDI_FAILURE; 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate break; 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE: 4867c478bd9Sstevel@tonic-gate *result = NULL; 4877c478bd9Sstevel@tonic-gate break; 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate default: 4907c478bd9Sstevel@tonic-gate ret = DDI_FAILURE; 4917c478bd9Sstevel@tonic-gate } 4927c478bd9Sstevel@tonic-gate return (ret); 4937c478bd9Sstevel@tonic-gate } 4947c478bd9Sstevel@tonic-gate 4957c478bd9Sstevel@tonic-gate static int 4967c478bd9Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4977c478bd9Sstevel@tonic-gate { 4987c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 4997c478bd9Sstevel@tonic-gate rdma_stat r_status; 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate switch (cmd) { 5027c478bd9Sstevel@tonic-gate case DDI_ATTACH: 5037c478bd9Sstevel@tonic-gate break; 5047c478bd9Sstevel@tonic-gate case DDI_RESUME: 5057c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5067c478bd9Sstevel@tonic-gate default: 5077c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5087c478bd9Sstevel@tonic-gate } 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL); 5117c478bd9Sstevel@tonic-gate 5127c478bd9Sstevel@tonic-gate mutex_enter(&rpcib.rpcib_mutex); 5137c478bd9Sstevel@tonic-gate if (rpcib.rpcib_dip != NULL) { 5147c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 5157c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5167c478bd9Sstevel@tonic-gate } 5177c478bd9Sstevel@tonic-gate rpcib.rpcib_dip = dip; 5187c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 5197c478bd9Sstevel@tonic-gate /* 5207c478bd9Sstevel@tonic-gate * Create the "rpcib" minor-node. 5217c478bd9Sstevel@tonic-gate */ 5227c478bd9Sstevel@tonic-gate if (ddi_create_minor_node(dip, 5237c478bd9Sstevel@tonic-gate "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) { 5247c478bd9Sstevel@tonic-gate /* Error message, no cmn_err as they print on console */ 5257c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5267c478bd9Sstevel@tonic-gate } 5277c478bd9Sstevel@tonic-gate 5287c478bd9Sstevel@tonic-gate if (rib_stat == NULL) { 5297c478bd9Sstevel@tonic-gate rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP); 5307c478bd9Sstevel@tonic-gate mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL); 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate rib_stat->hca_count = ibt_get_hca_list(&rib_stat->hca_guids); 5347c478bd9Sstevel@tonic-gate if (rib_stat->hca_count < 1) { 5357c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5367c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5377c478bd9Sstevel@tonic-gate rib_stat = NULL; 5387c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate ibt_status = ibt_attach(&rib_modinfo, dip, 5427c478bd9Sstevel@tonic-gate (void *)rib_stat, &rib_stat->ibt_clnt_hdl); 543*0a701b1eSRobert Gordon 5447c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 5457c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 5467c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5477c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5487c478bd9Sstevel@tonic-gate rib_stat = NULL; 5497c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5507c478bd9Sstevel@tonic-gate } 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate mutex_enter(&rib_stat->open_hca_lock); 5537c478bd9Sstevel@tonic-gate if (open_hcas(rib_stat) != RDMA_SUCCESS) { 5547c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 5557c478bd9Sstevel@tonic-gate (void) ibt_detach(rib_stat->ibt_clnt_hdl); 5567c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 5577c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5587c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5597c478bd9Sstevel@tonic-gate rib_stat = NULL; 5607c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5617c478bd9Sstevel@tonic-gate } 5627c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 5637c478bd9Sstevel@tonic-gate 5647c478bd9Sstevel@tonic-gate /* 5657c478bd9Sstevel@tonic-gate * Register with rdmatf 5667c478bd9Sstevel@tonic-gate */ 5677c478bd9Sstevel@tonic-gate rib_mod.rdma_count = rib_stat->hca_count; 5687c478bd9Sstevel@tonic-gate r_status = rdma_register_mod(&rib_mod); 5697c478bd9Sstevel@tonic-gate if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) { 5707c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 5717c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 5727c478bd9Sstevel@tonic-gate (void) ibt_detach(rib_stat->ibt_clnt_hdl); 5737c478bd9Sstevel@tonic-gate mutex_destroy(&rib_stat->open_hca_lock); 5747c478bd9Sstevel@tonic-gate kmem_free(rib_stat, sizeof (*rib_stat)); 5757c478bd9Sstevel@tonic-gate rib_stat = NULL; 5767c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5777c478bd9Sstevel@tonic-gate } 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 5817c478bd9Sstevel@tonic-gate } 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5847c478bd9Sstevel@tonic-gate static int 5857c478bd9Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5867c478bd9Sstevel@tonic-gate { 5877c478bd9Sstevel@tonic-gate switch (cmd) { 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate case DDI_DETACH: 5907c478bd9Sstevel@tonic-gate break; 5917c478bd9Sstevel@tonic-gate 5927c478bd9Sstevel@tonic-gate case DDI_SUSPEND: 5937c478bd9Sstevel@tonic-gate default: 5947c478bd9Sstevel@tonic-gate return (DDI_FAILURE); 5957c478bd9Sstevel@tonic-gate } 5967c478bd9Sstevel@tonic-gate 5977c478bd9Sstevel@tonic-gate /* 5987c478bd9Sstevel@tonic-gate * Detach the hca and free resources 5997c478bd9Sstevel@tonic-gate */ 6007c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 6017c478bd9Sstevel@tonic-gate plugin_state = NO_ACCEPT; 6027c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 6037c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 6047c478bd9Sstevel@tonic-gate ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count); 6057c478bd9Sstevel@tonic-gate (void) ibt_detach(rib_stat->ibt_clnt_hdl); 6067c478bd9Sstevel@tonic-gate 6077c478bd9Sstevel@tonic-gate mutex_enter(&rpcib.rpcib_mutex); 6087c478bd9Sstevel@tonic-gate rpcib.rpcib_dip = NULL; 6097c478bd9Sstevel@tonic-gate mutex_exit(&rpcib.rpcib_mutex); 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate mutex_destroy(&rpcib.rpcib_mutex); 6127c478bd9Sstevel@tonic-gate return (DDI_SUCCESS); 6137c478bd9Sstevel@tonic-gate } 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int); 6177c478bd9Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int); 6187c478bd9Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype); 6197c478bd9Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t); 6207c478bd9Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *); 6217c478bd9Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *); 6227c478bd9Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *); 6237c478bd9Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *); 6247c478bd9Sstevel@tonic-gate 625*0a701b1eSRobert Gordon 6267c478bd9Sstevel@tonic-gate /* 6277c478bd9Sstevel@tonic-gate * One CQ pair per HCA 6287c478bd9Sstevel@tonic-gate */ 6297c478bd9Sstevel@tonic-gate static rdma_stat 6307c478bd9Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler, 6317c478bd9Sstevel@tonic-gate rib_cq_t **cqp, rpcib_state_t *ribstat) 6327c478bd9Sstevel@tonic-gate { 6337c478bd9Sstevel@tonic-gate rib_cq_t *cq; 6347c478bd9Sstevel@tonic-gate ibt_cq_attr_t cq_attr; 6357c478bd9Sstevel@tonic-gate uint32_t real_size; 6367c478bd9Sstevel@tonic-gate ibt_status_t status; 6377c478bd9Sstevel@tonic-gate rdma_stat error = RDMA_SUCCESS; 6387c478bd9Sstevel@tonic-gate 6397c478bd9Sstevel@tonic-gate cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP); 6407c478bd9Sstevel@tonic-gate cq->rib_hca = hca; 6417c478bd9Sstevel@tonic-gate cq_attr.cq_size = cq_size; 6427c478bd9Sstevel@tonic-gate cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 6437c478bd9Sstevel@tonic-gate status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl, 6447c478bd9Sstevel@tonic-gate &real_size); 6457c478bd9Sstevel@tonic-gate if (status != IBT_SUCCESS) { 6467c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed," 6477c478bd9Sstevel@tonic-gate " status=%d", status); 6487c478bd9Sstevel@tonic-gate error = RDMA_FAILED; 6497c478bd9Sstevel@tonic-gate goto fail; 6507c478bd9Sstevel@tonic-gate } 6517c478bd9Sstevel@tonic-gate ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, ribstat); 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate /* 6547c478bd9Sstevel@tonic-gate * Enable CQ callbacks. CQ Callbacks are single shot 6557c478bd9Sstevel@tonic-gate * (e.g. you have to call ibt_enable_cq_notify() 6567c478bd9Sstevel@tonic-gate * after each callback to get another one). 6577c478bd9Sstevel@tonic-gate */ 6587c478bd9Sstevel@tonic-gate status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION); 6597c478bd9Sstevel@tonic-gate if (status != IBT_SUCCESS) { 6607c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "rib_create_cq: " 6617c478bd9Sstevel@tonic-gate "enable_cq_notify failed, status %d", status); 6627c478bd9Sstevel@tonic-gate error = RDMA_FAILED; 6637c478bd9Sstevel@tonic-gate goto fail; 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate *cqp = cq; 6667c478bd9Sstevel@tonic-gate 6677c478bd9Sstevel@tonic-gate return (error); 6687c478bd9Sstevel@tonic-gate fail: 6697c478bd9Sstevel@tonic-gate if (cq->rib_cq_hdl) 6707c478bd9Sstevel@tonic-gate (void) ibt_free_cq(cq->rib_cq_hdl); 6717c478bd9Sstevel@tonic-gate if (cq) 6727c478bd9Sstevel@tonic-gate kmem_free(cq, sizeof (rib_cq_t)); 6737c478bd9Sstevel@tonic-gate return (error); 6747c478bd9Sstevel@tonic-gate } 6757c478bd9Sstevel@tonic-gate 6767c478bd9Sstevel@tonic-gate static rdma_stat 6777c478bd9Sstevel@tonic-gate open_hcas(rpcib_state_t *ribstat) 6787c478bd9Sstevel@tonic-gate { 6797c478bd9Sstevel@tonic-gate rib_hca_t *hca; 6807c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 6817c478bd9Sstevel@tonic-gate rdma_stat status; 6827c478bd9Sstevel@tonic-gate ibt_hca_portinfo_t *pinfop; 6837c478bd9Sstevel@tonic-gate ibt_pd_flags_t pd_flags = IBT_PD_NO_FLAGS; 6847c478bd9Sstevel@tonic-gate uint_t size, cq_size; 6857c478bd9Sstevel@tonic-gate int i; 686*0a701b1eSRobert Gordon kstat_t *ksp; 687*0a701b1eSRobert Gordon cache_avl_struct_t example_avl_node; 688*0a701b1eSRobert Gordon char rssc_name[32]; 6897c478bd9Sstevel@tonic-gate 6907c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ribstat->open_hca_lock)); 691*0a701b1eSRobert Gordon 6927c478bd9Sstevel@tonic-gate if (ribstat->hcas == NULL) 6937c478bd9Sstevel@tonic-gate ribstat->hcas = kmem_zalloc(ribstat->hca_count * 6947c478bd9Sstevel@tonic-gate sizeof (rib_hca_t), KM_SLEEP); 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate /* 6977c478bd9Sstevel@tonic-gate * Open a hca and setup for RDMA 6987c478bd9Sstevel@tonic-gate */ 6997c478bd9Sstevel@tonic-gate for (i = 0; i < ribstat->hca_count; i++) { 7007c478bd9Sstevel@tonic-gate ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl, 7017c478bd9Sstevel@tonic-gate ribstat->hca_guids[i], 7027c478bd9Sstevel@tonic-gate &ribstat->hcas[i].hca_hdl); 7037c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7047c478bd9Sstevel@tonic-gate continue; 7057c478bd9Sstevel@tonic-gate } 7067c478bd9Sstevel@tonic-gate ribstat->hcas[i].hca_guid = ribstat->hca_guids[i]; 7077c478bd9Sstevel@tonic-gate hca = &(ribstat->hcas[i]); 7087c478bd9Sstevel@tonic-gate hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl; 7097c478bd9Sstevel@tonic-gate hca->state = HCA_INITED; 7107c478bd9Sstevel@tonic-gate 7117c478bd9Sstevel@tonic-gate /* 7127c478bd9Sstevel@tonic-gate * query HCA info 7137c478bd9Sstevel@tonic-gate */ 7147c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs); 7157c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7167c478bd9Sstevel@tonic-gate goto fail1; 7177c478bd9Sstevel@tonic-gate } 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate /* 7207c478bd9Sstevel@tonic-gate * One PD (Protection Domain) per HCA. 7217c478bd9Sstevel@tonic-gate * A qp is allowed to access a memory region 7227c478bd9Sstevel@tonic-gate * only when it's in the same PD as that of 7237c478bd9Sstevel@tonic-gate * the memory region. 7247c478bd9Sstevel@tonic-gate */ 7257c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl); 7267c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7277c478bd9Sstevel@tonic-gate goto fail1; 7287c478bd9Sstevel@tonic-gate } 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate /* 7317c478bd9Sstevel@tonic-gate * query HCA ports 7327c478bd9Sstevel@tonic-gate */ 7337c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca_ports(hca->hca_hdl, 7347c478bd9Sstevel@tonic-gate 0, &pinfop, &hca->hca_nports, &size); 7357c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 7367c478bd9Sstevel@tonic-gate goto fail2; 7377c478bd9Sstevel@tonic-gate } 7387c478bd9Sstevel@tonic-gate hca->hca_ports = pinfop; 7397c478bd9Sstevel@tonic-gate hca->hca_pinfosz = size; 7407c478bd9Sstevel@tonic-gate pinfop = NULL; 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate cq_size = DEF_CQ_SIZE; /* default cq size */ 7437c478bd9Sstevel@tonic-gate /* 7447c478bd9Sstevel@tonic-gate * Create 2 pairs of cq's (1 pair for client 7457c478bd9Sstevel@tonic-gate * and the other pair for server) on this hca. 7467c478bd9Sstevel@tonic-gate * If number of qp's gets too large, then several 7477c478bd9Sstevel@tonic-gate * cq's will be needed. 7487c478bd9Sstevel@tonic-gate */ 7497c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler, 7507c478bd9Sstevel@tonic-gate &hca->svc_rcq, ribstat); 7517c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7527c478bd9Sstevel@tonic-gate goto fail3; 7537c478bd9Sstevel@tonic-gate } 7547c478bd9Sstevel@tonic-gate 7557c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_svc_scq_handler, 7567c478bd9Sstevel@tonic-gate &hca->svc_scq, ribstat); 7577c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7587c478bd9Sstevel@tonic-gate goto fail3; 7597c478bd9Sstevel@tonic-gate } 7607c478bd9Sstevel@tonic-gate 7617c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler, 7627c478bd9Sstevel@tonic-gate &hca->clnt_rcq, ribstat); 7637c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7647c478bd9Sstevel@tonic-gate goto fail3; 7657c478bd9Sstevel@tonic-gate } 7667c478bd9Sstevel@tonic-gate 7677c478bd9Sstevel@tonic-gate status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler, 7687c478bd9Sstevel@tonic-gate &hca->clnt_scq, ribstat); 7697c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 7707c478bd9Sstevel@tonic-gate goto fail3; 7717c478bd9Sstevel@tonic-gate } 7727c478bd9Sstevel@tonic-gate 7737c478bd9Sstevel@tonic-gate /* 7747c478bd9Sstevel@tonic-gate * Create buffer pools. 7757c478bd9Sstevel@tonic-gate * Note rib_rbuf_create also allocates memory windows. 7767c478bd9Sstevel@tonic-gate */ 7777c478bd9Sstevel@tonic-gate hca->recv_pool = rib_rbufpool_create(hca, 7787c478bd9Sstevel@tonic-gate RECV_BUFFER, MAX_BUFS); 7797c478bd9Sstevel@tonic-gate if (hca->recv_pool == NULL) { 7807c478bd9Sstevel@tonic-gate goto fail3; 7817c478bd9Sstevel@tonic-gate } 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate hca->send_pool = rib_rbufpool_create(hca, 7847c478bd9Sstevel@tonic-gate SEND_BUFFER, MAX_BUFS); 7857c478bd9Sstevel@tonic-gate if (hca->send_pool == NULL) { 7867c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, RECV_BUFFER); 7877c478bd9Sstevel@tonic-gate goto fail3; 7887c478bd9Sstevel@tonic-gate } 7897c478bd9Sstevel@tonic-gate 790*0a701b1eSRobert Gordon if (hca->server_side_cache == NULL) { 791*0a701b1eSRobert Gordon (void) sprintf(rssc_name, 792*0a701b1eSRobert Gordon "rib_server_side_cache_%04d", i); 793*0a701b1eSRobert Gordon hca->server_side_cache = kmem_cache_create( 794*0a701b1eSRobert Gordon rssc_name, 795*0a701b1eSRobert Gordon sizeof (cache_avl_struct_t), 0, 796*0a701b1eSRobert Gordon NULL, 797*0a701b1eSRobert Gordon NULL, 798*0a701b1eSRobert Gordon rib_server_side_cache_reclaim, 799*0a701b1eSRobert Gordon hca, NULL, 0); 800*0a701b1eSRobert Gordon } 801*0a701b1eSRobert Gordon 802*0a701b1eSRobert Gordon avl_create(&hca->avl_tree, 803*0a701b1eSRobert Gordon avl_compare, 804*0a701b1eSRobert Gordon sizeof (cache_avl_struct_t), 805*0a701b1eSRobert Gordon (uint_t)(uintptr_t)&example_avl_node.avl_link- 806*0a701b1eSRobert Gordon (uint_t)(uintptr_t)&example_avl_node); 807*0a701b1eSRobert Gordon 808*0a701b1eSRobert Gordon rw_init(&hca->avl_rw_lock, 809*0a701b1eSRobert Gordon NULL, RW_DRIVER, hca->iblock); 810*0a701b1eSRobert Gordon mutex_init(&hca->cache_allocation, 811*0a701b1eSRobert Gordon NULL, MUTEX_DRIVER, NULL); 812*0a701b1eSRobert Gordon hca->avl_init = TRUE; 813*0a701b1eSRobert Gordon 814*0a701b1eSRobert Gordon /* Create kstats for the cache */ 815*0a701b1eSRobert Gordon ASSERT(INGLOBALZONE(curproc)); 816*0a701b1eSRobert Gordon 817*0a701b1eSRobert Gordon if (!stats_enabled) { 818*0a701b1eSRobert Gordon ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc", 819*0a701b1eSRobert Gordon KSTAT_TYPE_NAMED, 820*0a701b1eSRobert Gordon sizeof (rpcib_kstat) / sizeof (kstat_named_t), 821*0a701b1eSRobert Gordon KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, 822*0a701b1eSRobert Gordon GLOBAL_ZONEID); 823*0a701b1eSRobert Gordon if (ksp) { 824*0a701b1eSRobert Gordon ksp->ks_data = (void *) &rpcib_kstat; 825*0a701b1eSRobert Gordon ksp->ks_update = rpcib_cache_kstat_update; 826*0a701b1eSRobert Gordon kstat_install(ksp); 827*0a701b1eSRobert Gordon stats_enabled = TRUE; 828*0a701b1eSRobert Gordon } 829*0a701b1eSRobert Gordon } 830*0a701b1eSRobert Gordon if (NULL == hca->reg_cache_clean_up) { 831*0a701b1eSRobert Gordon hca->reg_cache_clean_up = ddi_taskq_create(NULL, 832*0a701b1eSRobert Gordon "REG_CACHE_CLEANUP", 1, TASKQ_DEFAULTPRI, 0); 833*0a701b1eSRobert Gordon } 834*0a701b1eSRobert Gordon 8357c478bd9Sstevel@tonic-gate /* 8367c478bd9Sstevel@tonic-gate * Initialize the registered service list and 8377c478bd9Sstevel@tonic-gate * the lock 8387c478bd9Sstevel@tonic-gate */ 8397c478bd9Sstevel@tonic-gate hca->service_list = NULL; 8407c478bd9Sstevel@tonic-gate rw_init(&hca->service_list_lock, NULL, RW_DRIVER, hca->iblock); 8417c478bd9Sstevel@tonic-gate 8427c478bd9Sstevel@tonic-gate mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 8437c478bd9Sstevel@tonic-gate cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL); 8447c478bd9Sstevel@tonic-gate rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER, 8457c478bd9Sstevel@tonic-gate hca->iblock); 8467c478bd9Sstevel@tonic-gate rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER, 8477c478bd9Sstevel@tonic-gate hca->iblock); 8487c478bd9Sstevel@tonic-gate rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock); 8497c478bd9Sstevel@tonic-gate mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock); 8507c478bd9Sstevel@tonic-gate hca->inuse = TRUE; 8517c478bd9Sstevel@tonic-gate /* 8527c478bd9Sstevel@tonic-gate * XXX One hca only. Add multi-hca functionality if needed 8537c478bd9Sstevel@tonic-gate * later. 8547c478bd9Sstevel@tonic-gate */ 8557c478bd9Sstevel@tonic-gate ribstat->hca = hca; 8567c478bd9Sstevel@tonic-gate ribstat->nhca_inited++; 8577c478bd9Sstevel@tonic-gate ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 8587c478bd9Sstevel@tonic-gate break; 8597c478bd9Sstevel@tonic-gate 8607c478bd9Sstevel@tonic-gate fail3: 8617c478bd9Sstevel@tonic-gate ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 8627c478bd9Sstevel@tonic-gate fail2: 8637c478bd9Sstevel@tonic-gate (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 8647c478bd9Sstevel@tonic-gate fail1: 8657c478bd9Sstevel@tonic-gate (void) ibt_close_hca(hca->hca_hdl); 8667c478bd9Sstevel@tonic-gate 8677c478bd9Sstevel@tonic-gate } 8687c478bd9Sstevel@tonic-gate if (ribstat->hca != NULL) 8697c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 8707c478bd9Sstevel@tonic-gate else 8717c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 8727c478bd9Sstevel@tonic-gate } 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate /* 8757c478bd9Sstevel@tonic-gate * Callback routines 8767c478bd9Sstevel@tonic-gate */ 8777c478bd9Sstevel@tonic-gate 8787c478bd9Sstevel@tonic-gate /* 8797c478bd9Sstevel@tonic-gate * SCQ handlers 8807c478bd9Sstevel@tonic-gate */ 8817c478bd9Sstevel@tonic-gate /* ARGSUSED */ 8827c478bd9Sstevel@tonic-gate static void 8837c478bd9Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 8847c478bd9Sstevel@tonic-gate { 8857c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 8867c478bd9Sstevel@tonic-gate ibt_wc_t wc; 8877c478bd9Sstevel@tonic-gate int i; 8887c478bd9Sstevel@tonic-gate 8897c478bd9Sstevel@tonic-gate /* 8907c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 8917c478bd9Sstevel@tonic-gate * completion queue notification. 8927c478bd9Sstevel@tonic-gate */ 8937c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 8967c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 8977c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 8987c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 8997c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 9007c478bd9Sstevel@tonic-gate return; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* 9037c478bd9Sstevel@tonic-gate * Got a send completion 9047c478bd9Sstevel@tonic-gate */ 9057c478bd9Sstevel@tonic-gate if (wc.wc_id != NULL) { /* XXX can it be otherwise ???? */ 90611606941Sjwahlig struct send_wid *wd = (struct send_wid *)(uintptr_t)wc.wc_id; 9077c478bd9Sstevel@tonic-gate CONN *conn = qptoc(wd->qp); 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 9107c478bd9Sstevel@tonic-gate switch (wc.wc_status) { 9117c478bd9Sstevel@tonic-gate case IBT_WC_SUCCESS: 9127c478bd9Sstevel@tonic-gate wd->status = RDMA_SUCCESS; 9137c478bd9Sstevel@tonic-gate break; 9147c478bd9Sstevel@tonic-gate case IBT_WC_WR_FLUSHED_ERR: 9157c478bd9Sstevel@tonic-gate wd->status = RDMA_FAILED; 9167c478bd9Sstevel@tonic-gate break; 9177c478bd9Sstevel@tonic-gate default: 9187c478bd9Sstevel@tonic-gate /* 9197c478bd9Sstevel@tonic-gate * RC Send Q Error Code Local state Remote State 9207c478bd9Sstevel@tonic-gate * ==================== =========== ============ 9217c478bd9Sstevel@tonic-gate * IBT_WC_BAD_RESPONSE_ERR ERROR None 9227c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_LEN_ERR ERROR None 9237c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_CHAN_OP_ERR ERROR None 9247c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_PROTECT_ERR ERROR None 9257c478bd9Sstevel@tonic-gate * IBT_WC_MEM_WIN_BIND_ERR ERROR None 9267c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR 9277c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR 9287c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_OP_ERR ERROR ERROR 9297c478bd9Sstevel@tonic-gate * IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None 9307c478bd9Sstevel@tonic-gate * IBT_WC_TRANS_TIMEOUT_ERR ERROR None 9317c478bd9Sstevel@tonic-gate * IBT_WC_WR_FLUSHED_ERR None None 9327c478bd9Sstevel@tonic-gate */ 9337c478bd9Sstevel@tonic-gate /* 9347c478bd9Sstevel@tonic-gate * Channel in error state. Set connection to 9357c478bd9Sstevel@tonic-gate * ERROR and cleanup will happen either from 9367c478bd9Sstevel@tonic-gate * conn_release or from rib_conn_get 9377c478bd9Sstevel@tonic-gate */ 9387c478bd9Sstevel@tonic-gate wd->status = RDMA_FAILED; 9397c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 9407c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 941*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 9427c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 9437c478bd9Sstevel@tonic-gate break; 9447c478bd9Sstevel@tonic-gate } 945*0a701b1eSRobert Gordon 9467c478bd9Sstevel@tonic-gate if (wd->cv_sig == 1) { 9477c478bd9Sstevel@tonic-gate /* 9487c478bd9Sstevel@tonic-gate * Notify poster 9497c478bd9Sstevel@tonic-gate */ 9507c478bd9Sstevel@tonic-gate cv_signal(&wd->wait_cv); 9517c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 9527c478bd9Sstevel@tonic-gate } else { 9537c478bd9Sstevel@tonic-gate /* 9547c478bd9Sstevel@tonic-gate * Poster not waiting for notification. 9557c478bd9Sstevel@tonic-gate * Free the send buffers and send_wid 9567c478bd9Sstevel@tonic-gate */ 9577c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 9587c478bd9Sstevel@tonic-gate rib_rbuf_free(qptoc(wd->qp), SEND_BUFFER, 95911606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 9607c478bd9Sstevel@tonic-gate } 9617c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 9627c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 9637c478bd9Sstevel@tonic-gate } 9647c478bd9Sstevel@tonic-gate } 9657c478bd9Sstevel@tonic-gate } 9667c478bd9Sstevel@tonic-gate } 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate /* ARGSUSED */ 9697c478bd9Sstevel@tonic-gate static void 9707c478bd9Sstevel@tonic-gate rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 9717c478bd9Sstevel@tonic-gate { 9727c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 9737c478bd9Sstevel@tonic-gate ibt_wc_t wc; 9747c478bd9Sstevel@tonic-gate int i; 9757c478bd9Sstevel@tonic-gate 9767c478bd9Sstevel@tonic-gate /* 9777c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 9787c478bd9Sstevel@tonic-gate * completion queue notification. 9797c478bd9Sstevel@tonic-gate */ 9807c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 9837c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 9847c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 9857c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 9867c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 9877c478bd9Sstevel@tonic-gate return; 9887c478bd9Sstevel@tonic-gate 9897c478bd9Sstevel@tonic-gate /* 9907c478bd9Sstevel@tonic-gate * Got a send completion 9917c478bd9Sstevel@tonic-gate */ 9927c478bd9Sstevel@tonic-gate if (wc.wc_id != NULL) { /* XXX NULL possible ???? */ 993*0a701b1eSRobert Gordon struct send_wid *wd = 994*0a701b1eSRobert Gordon (struct send_wid *)(uintptr_t)wc.wc_id; 9957c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 9967c478bd9Sstevel@tonic-gate if (wd->cv_sig == 1) { 9977c478bd9Sstevel@tonic-gate /* 9987c478bd9Sstevel@tonic-gate * Update completion status and notify poster 9997c478bd9Sstevel@tonic-gate */ 10007c478bd9Sstevel@tonic-gate if (wc.wc_status == IBT_WC_SUCCESS) 10017c478bd9Sstevel@tonic-gate wd->status = RDMA_SUCCESS; 10027c478bd9Sstevel@tonic-gate else 10037c478bd9Sstevel@tonic-gate wd->status = RDMA_FAILED; 10047c478bd9Sstevel@tonic-gate cv_signal(&wd->wait_cv); 10057c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 10067c478bd9Sstevel@tonic-gate } else { 10077c478bd9Sstevel@tonic-gate /* 10087c478bd9Sstevel@tonic-gate * Poster not waiting for notification. 10097c478bd9Sstevel@tonic-gate * Free the send buffers and send_wid 10107c478bd9Sstevel@tonic-gate */ 10117c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 1012*0a701b1eSRobert Gordon rib_rbuf_free(qptoc(wd->qp), 1013*0a701b1eSRobert Gordon SEND_BUFFER, 101411606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 10157c478bd9Sstevel@tonic-gate } 10167c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 10177c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 10187c478bd9Sstevel@tonic-gate } 10197c478bd9Sstevel@tonic-gate } 10207c478bd9Sstevel@tonic-gate } 10217c478bd9Sstevel@tonic-gate } 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate /* 10247c478bd9Sstevel@tonic-gate * RCQ handler 10257c478bd9Sstevel@tonic-gate */ 10267c478bd9Sstevel@tonic-gate /* ARGSUSED */ 10277c478bd9Sstevel@tonic-gate static void 10287c478bd9Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 10297c478bd9Sstevel@tonic-gate { 10307c478bd9Sstevel@tonic-gate rib_qp_t *qp; 10317c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 10327c478bd9Sstevel@tonic-gate ibt_wc_t wc; 10337c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 10347c478bd9Sstevel@tonic-gate 10357c478bd9Sstevel@tonic-gate /* 10367c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 10377c478bd9Sstevel@tonic-gate * completion queue notification. 10387c478bd9Sstevel@tonic-gate */ 10397c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 10407c478bd9Sstevel@tonic-gate 10417c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 10427c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 10437c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 10447c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 10457c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 10467c478bd9Sstevel@tonic-gate return; 10477c478bd9Sstevel@tonic-gate 104811606941Sjwahlig rwid = (struct recv_wid *)(uintptr_t)wc.wc_id; 10497c478bd9Sstevel@tonic-gate qp = rwid->qp; 10507c478bd9Sstevel@tonic-gate if (wc.wc_status == IBT_WC_SUCCESS) { 10517c478bd9Sstevel@tonic-gate XDR inxdrs, *xdrs; 10527c478bd9Sstevel@tonic-gate uint_t xid, vers, op, find_xid = 0; 10537c478bd9Sstevel@tonic-gate struct reply *r; 10547c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 1055*0a701b1eSRobert Gordon uint32_t rdma_credit = 0; 10567c478bd9Sstevel@tonic-gate 10577c478bd9Sstevel@tonic-gate xdrs = &inxdrs; 105811606941Sjwahlig xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr, 10597c478bd9Sstevel@tonic-gate wc.wc_bytes_xfer, XDR_DECODE); 10607c478bd9Sstevel@tonic-gate /* 10617c478bd9Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 10627c478bd9Sstevel@tonic-gate * in the rpc rdma message). 10637c478bd9Sstevel@tonic-gate */ 106411606941Sjwahlig xid = *(uint32_t *)(uintptr_t)rwid->addr; 1065*0a701b1eSRobert Gordon 10667c478bd9Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 10677c478bd9Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 10687c478bd9Sstevel@tonic-gate (void) xdr_u_int(xdrs, &vers); 1069*0a701b1eSRobert Gordon (void) xdr_u_int(xdrs, &rdma_credit); 10707c478bd9Sstevel@tonic-gate (void) xdr_u_int(xdrs, &op); 10717c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 1072*0a701b1eSRobert Gordon 10737c478bd9Sstevel@tonic-gate if (vers != RPCRDMA_VERS) { 10747c478bd9Sstevel@tonic-gate /* 1075*0a701b1eSRobert Gordon * Invalid RPC/RDMA version. Cannot 1076*0a701b1eSRobert Gordon * interoperate. Set connection to 1077*0a701b1eSRobert Gordon * ERROR state and bail out. 10787c478bd9Sstevel@tonic-gate */ 10797c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 10807c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 1081*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 10827c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 108311606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 108411606941Sjwahlig (void *)(uintptr_t)rwid->addr); 10857c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 10867c478bd9Sstevel@tonic-gate continue; 10877c478bd9Sstevel@tonic-gate } 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 10907c478bd9Sstevel@tonic-gate for (r = qp->replylist; r != NULL; r = r->next) { 10917c478bd9Sstevel@tonic-gate if (r->xid == xid) { 10927c478bd9Sstevel@tonic-gate find_xid = 1; 10937c478bd9Sstevel@tonic-gate switch (op) { 10947c478bd9Sstevel@tonic-gate case RDMA_MSG: 10957c478bd9Sstevel@tonic-gate case RDMA_NOMSG: 10967c478bd9Sstevel@tonic-gate case RDMA_MSGP: 10977c478bd9Sstevel@tonic-gate r->status = RDMA_SUCCESS; 10987c478bd9Sstevel@tonic-gate r->vaddr_cq = rwid->addr; 1099*0a701b1eSRobert Gordon r->bytes_xfer = 1100*0a701b1eSRobert Gordon wc.wc_bytes_xfer; 11017c478bd9Sstevel@tonic-gate cv_signal(&r->wait_cv); 11027c478bd9Sstevel@tonic-gate break; 11037c478bd9Sstevel@tonic-gate default: 1104*0a701b1eSRobert Gordon rib_rbuf_free(qptoc(qp), 1105*0a701b1eSRobert Gordon RECV_BUFFER, 1106*0a701b1eSRobert Gordon (void *)(uintptr_t) 1107*0a701b1eSRobert Gordon rwid->addr); 11087c478bd9Sstevel@tonic-gate break; 11097c478bd9Sstevel@tonic-gate } 11107c478bd9Sstevel@tonic-gate break; 11117c478bd9Sstevel@tonic-gate } 11127c478bd9Sstevel@tonic-gate } 11137c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 11147c478bd9Sstevel@tonic-gate if (find_xid == 0) { 11157c478bd9Sstevel@tonic-gate /* RPC caller not waiting for reply */ 1116*0a701b1eSRobert Gordon 1117*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__nomatchxid1, 1118*0a701b1eSRobert Gordon int, xid); 1119*0a701b1eSRobert Gordon 11207c478bd9Sstevel@tonic-gate rib_rbuf_free(qptoc(qp), RECV_BUFFER, 112111606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate } else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) { 11247c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * Connection being flushed. Just free 11287c478bd9Sstevel@tonic-gate * the posted buffer 11297c478bd9Sstevel@tonic-gate */ 113011606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 113111606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11327c478bd9Sstevel@tonic-gate } else { 11337c478bd9Sstevel@tonic-gate CONN *conn = qptoc(qp); 11347c478bd9Sstevel@tonic-gate /* 11357c478bd9Sstevel@tonic-gate * RC Recv Q Error Code Local state Remote State 11367c478bd9Sstevel@tonic-gate * ==================== =========== ============ 11377c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_ACCESS_ERR ERROR ERROR when NAK recvd 11387c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_LEN_ERR ERROR ERROR when NAK recvd 11397c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_PROTECT_ERR ERROR ERROR when NAK recvd 11407c478bd9Sstevel@tonic-gate * IBT_WC_LOCAL_CHAN_OP_ERR ERROR ERROR when NAK recvd 11417c478bd9Sstevel@tonic-gate * IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR when NAK recvd 11427c478bd9Sstevel@tonic-gate * IBT_WC_WR_FLUSHED_ERR None None 11437c478bd9Sstevel@tonic-gate */ 11447c478bd9Sstevel@tonic-gate /* 11457c478bd9Sstevel@tonic-gate * Channel in error state. Set connection 11467c478bd9Sstevel@tonic-gate * in ERROR state. 11477c478bd9Sstevel@tonic-gate */ 11487c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 11497c478bd9Sstevel@tonic-gate if (conn->c_state != C_DISCONN_PEND) 1150*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 11517c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 115211606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 115311606941Sjwahlig (void *)(uintptr_t)rwid->addr); 11547c478bd9Sstevel@tonic-gate } 11557c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 11567c478bd9Sstevel@tonic-gate } 11577c478bd9Sstevel@tonic-gate } 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate /* Server side */ 11607c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11617c478bd9Sstevel@tonic-gate static void 11627c478bd9Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 11637c478bd9Sstevel@tonic-gate { 1164*0a701b1eSRobert Gordon rdma_recv_data_t *rdp; 11657c478bd9Sstevel@tonic-gate rib_qp_t *qp; 11667c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 11677c478bd9Sstevel@tonic-gate ibt_wc_t wc; 11687c478bd9Sstevel@tonic-gate struct svc_recv *s_recvp; 11697c478bd9Sstevel@tonic-gate CONN *conn; 11707c478bd9Sstevel@tonic-gate mblk_t *mp; 11717c478bd9Sstevel@tonic-gate 11727c478bd9Sstevel@tonic-gate /* 11737c478bd9Sstevel@tonic-gate * Re-enable cq notify here to avoid missing any 11747c478bd9Sstevel@tonic-gate * completion queue notification. 11757c478bd9Sstevel@tonic-gate */ 11767c478bd9Sstevel@tonic-gate (void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 11777c478bd9Sstevel@tonic-gate 11787c478bd9Sstevel@tonic-gate ibt_status = IBT_SUCCESS; 11797c478bd9Sstevel@tonic-gate while (ibt_status != IBT_CQ_EMPTY) { 11807c478bd9Sstevel@tonic-gate bzero(&wc, sizeof (wc)); 11817c478bd9Sstevel@tonic-gate ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 11827c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) 11837c478bd9Sstevel@tonic-gate return; 11847c478bd9Sstevel@tonic-gate 118511606941Sjwahlig s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id; 11867c478bd9Sstevel@tonic-gate qp = s_recvp->qp; 11877c478bd9Sstevel@tonic-gate conn = qptoc(qp); 11887c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 11897c478bd9Sstevel@tonic-gate qp->n_posted_rbufs--; 1190*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH) 1191*0a701b1eSRobert Gordon rib_posted_rbufs(preposted_rbufs - qp->n_posted_rbufs); 1192*0a701b1eSRobert Gordon #endif 11937c478bd9Sstevel@tonic-gate if (qp->n_posted_rbufs == 0) 11947c478bd9Sstevel@tonic-gate cv_signal(&qp->posted_rbufs_cv); 11957c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 11967c478bd9Sstevel@tonic-gate 11977c478bd9Sstevel@tonic-gate if (wc.wc_status == IBT_WC_SUCCESS) { 11987c478bd9Sstevel@tonic-gate XDR inxdrs, *xdrs; 11997c478bd9Sstevel@tonic-gate uint_t xid, vers, op; 1200*0a701b1eSRobert Gordon uint32_t rdma_credit; 12017c478bd9Sstevel@tonic-gate 12027c478bd9Sstevel@tonic-gate xdrs = &inxdrs; 12037c478bd9Sstevel@tonic-gate /* s_recvp->vaddr stores data */ 120411606941Sjwahlig xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr, 12057c478bd9Sstevel@tonic-gate wc.wc_bytes_xfer, XDR_DECODE); 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate /* 12087c478bd9Sstevel@tonic-gate * Treat xid as opaque (xid is the first entity 12097c478bd9Sstevel@tonic-gate * in the rpc rdma message). 12107c478bd9Sstevel@tonic-gate */ 121111606941Sjwahlig xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr; 12127c478bd9Sstevel@tonic-gate /* Skip xid and set the xdr position accordingly. */ 12137c478bd9Sstevel@tonic-gate XDR_SETPOS(xdrs, sizeof (uint32_t)); 12147c478bd9Sstevel@tonic-gate if (!xdr_u_int(xdrs, &vers) || 1215*0a701b1eSRobert Gordon !xdr_u_int(xdrs, &rdma_credit) || 12167c478bd9Sstevel@tonic-gate !xdr_u_int(xdrs, &op)) { 12177c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 121811606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12197c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 12207c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12217c478bd9Sstevel@tonic-gate continue; 12227c478bd9Sstevel@tonic-gate } 12237c478bd9Sstevel@tonic-gate XDR_DESTROY(xdrs); 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate if (vers != RPCRDMA_VERS) { 12267c478bd9Sstevel@tonic-gate /* 1227*0a701b1eSRobert Gordon * Invalid RPC/RDMA version. 1228*0a701b1eSRobert Gordon * Drop rpc rdma message. 12297c478bd9Sstevel@tonic-gate */ 12307c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 123111606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12327c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12337c478bd9Sstevel@tonic-gate continue; 12347c478bd9Sstevel@tonic-gate } 12357c478bd9Sstevel@tonic-gate /* 12367c478bd9Sstevel@tonic-gate * Is this for RDMA_DONE? 12377c478bd9Sstevel@tonic-gate */ 12387c478bd9Sstevel@tonic-gate if (op == RDMA_DONE) { 12397c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 124011606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12417c478bd9Sstevel@tonic-gate /* 12427c478bd9Sstevel@tonic-gate * Wake up the thread waiting on 12437c478bd9Sstevel@tonic-gate * a RDMA_DONE for xid 12447c478bd9Sstevel@tonic-gate */ 12457c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 12467c478bd9Sstevel@tonic-gate rdma_done_notify(qp, xid); 12477c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 12487c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12497c478bd9Sstevel@tonic-gate continue; 12507c478bd9Sstevel@tonic-gate } 12517c478bd9Sstevel@tonic-gate 12527c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 12537c478bd9Sstevel@tonic-gate if (plugin_state == ACCEPT) { 1254*0a701b1eSRobert Gordon while ((mp = allocb(sizeof (*rdp), BPRI_LO)) 1255*0a701b1eSRobert Gordon == NULL) 1256*0a701b1eSRobert Gordon (void) strwaitbuf( 1257*0a701b1eSRobert Gordon sizeof (*rdp), BPRI_LO); 12587c478bd9Sstevel@tonic-gate /* 12597c478bd9Sstevel@tonic-gate * Plugin is in accept state, hence the master 12607c478bd9Sstevel@tonic-gate * transport queue for this is still accepting 12617c478bd9Sstevel@tonic-gate * requests. Hence we can call svc_queuereq to 12627c478bd9Sstevel@tonic-gate * queue this recieved msg. 12637c478bd9Sstevel@tonic-gate */ 1264*0a701b1eSRobert Gordon rdp = (rdma_recv_data_t *)mp->b_rptr; 1265*0a701b1eSRobert Gordon rdp->conn = conn; 1266*0a701b1eSRobert Gordon rdp->rpcmsg.addr = 1267*0a701b1eSRobert Gordon (caddr_t)(uintptr_t)s_recvp->vaddr; 1268*0a701b1eSRobert Gordon rdp->rpcmsg.type = RECV_BUFFER; 1269*0a701b1eSRobert Gordon rdp->rpcmsg.len = wc.wc_bytes_xfer; 1270*0a701b1eSRobert Gordon rdp->status = wc.wc_status; 12717c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 12727c478bd9Sstevel@tonic-gate conn->c_ref++; 12737c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 1274*0a701b1eSRobert Gordon mp->b_wptr += sizeof (*rdp); 12757c478bd9Sstevel@tonic-gate svc_queuereq((queue_t *)rib_stat->q, mp); 12767c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 12777c478bd9Sstevel@tonic-gate } else { 12787c478bd9Sstevel@tonic-gate /* 12797c478bd9Sstevel@tonic-gate * The master transport for this is going 12807c478bd9Sstevel@tonic-gate * away and the queue is not accepting anymore 12817c478bd9Sstevel@tonic-gate * requests for krpc, so don't do anything, just 12827c478bd9Sstevel@tonic-gate * free the msg. 12837c478bd9Sstevel@tonic-gate */ 12847c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 12857c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 128611606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12877c478bd9Sstevel@tonic-gate } 12887c478bd9Sstevel@tonic-gate } else { 12897c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 129011606941Sjwahlig (void *)(uintptr_t)s_recvp->vaddr); 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 12937c478bd9Sstevel@tonic-gate } 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate /* 12977c478bd9Sstevel@tonic-gate * Handles DR event of IBT_HCA_DETACH_EVENT. 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13007c478bd9Sstevel@tonic-gate static void 13017c478bd9Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 13027c478bd9Sstevel@tonic-gate ibt_async_code_t code, ibt_async_event_t *event) 13037c478bd9Sstevel@tonic-gate { 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate switch (code) { 13067c478bd9Sstevel@tonic-gate case IBT_HCA_ATTACH_EVENT: 13077c478bd9Sstevel@tonic-gate /* ignore */ 13087c478bd9Sstevel@tonic-gate break; 13097c478bd9Sstevel@tonic-gate case IBT_HCA_DETACH_EVENT: 13107c478bd9Sstevel@tonic-gate { 13117c478bd9Sstevel@tonic-gate ASSERT(rib_stat->hca->hca_hdl == hca_hdl); 13127c478bd9Sstevel@tonic-gate rib_detach_hca(rib_stat->hca); 13137c478bd9Sstevel@tonic-gate #ifdef DEBUG 13147c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n"); 13157c478bd9Sstevel@tonic-gate #endif 13167c478bd9Sstevel@tonic-gate break; 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate #ifdef DEBUG 13197c478bd9Sstevel@tonic-gate case IBT_EVENT_PATH_MIGRATED: 1320*0a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 1321*0a701b1eSRobert Gordon "IBT_EVENT_PATH_MIGRATED\n"); 13227c478bd9Sstevel@tonic-gate break; 13237c478bd9Sstevel@tonic-gate case IBT_EVENT_SQD: 13247c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n"); 13257c478bd9Sstevel@tonic-gate break; 13267c478bd9Sstevel@tonic-gate case IBT_EVENT_COM_EST: 13277c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n"); 13287c478bd9Sstevel@tonic-gate break; 13297c478bd9Sstevel@tonic-gate case IBT_ERROR_CATASTROPHIC_CHAN: 1330*0a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 1331*0a701b1eSRobert Gordon "IBT_ERROR_CATASTROPHIC_CHAN\n"); 13327c478bd9Sstevel@tonic-gate break; 13337c478bd9Sstevel@tonic-gate case IBT_ERROR_INVALID_REQUEST_CHAN: 13347c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): " 13357c478bd9Sstevel@tonic-gate "IBT_ERROR_INVALID_REQUEST_CHAN\n"); 13367c478bd9Sstevel@tonic-gate break; 13377c478bd9Sstevel@tonic-gate case IBT_ERROR_ACCESS_VIOLATION_CHAN: 13387c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): " 13397c478bd9Sstevel@tonic-gate "IBT_ERROR_ACCESS_VIOLATION_CHAN\n"); 13407c478bd9Sstevel@tonic-gate break; 13417c478bd9Sstevel@tonic-gate case IBT_ERROR_PATH_MIGRATE_REQ: 1342*0a701b1eSRobert Gordon cmn_err(CE_NOTE, "rib_async_handler(): " 1343*0a701b1eSRobert Gordon "IBT_ERROR_PATH_MIGRATE_REQ\n"); 13447c478bd9Sstevel@tonic-gate break; 13457c478bd9Sstevel@tonic-gate case IBT_ERROR_CQ: 13467c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n"); 13477c478bd9Sstevel@tonic-gate break; 13487c478bd9Sstevel@tonic-gate case IBT_ERROR_PORT_DOWN: 13497c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n"); 13507c478bd9Sstevel@tonic-gate break; 13517c478bd9Sstevel@tonic-gate case IBT_EVENT_PORT_UP: 13527c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n"); 13537c478bd9Sstevel@tonic-gate break; 13547c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE1: 13557c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n"); 13567c478bd9Sstevel@tonic-gate break; 13577c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE2: 13587c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n"); 13597c478bd9Sstevel@tonic-gate break; 13607c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE3: 13617c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n"); 13627c478bd9Sstevel@tonic-gate break; 13637c478bd9Sstevel@tonic-gate case IBT_ASYNC_OPAQUE4: 13647c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n"); 13657c478bd9Sstevel@tonic-gate break; 13667c478bd9Sstevel@tonic-gate #endif 13677c478bd9Sstevel@tonic-gate default: 13687c478bd9Sstevel@tonic-gate break; 13697c478bd9Sstevel@tonic-gate } 13707c478bd9Sstevel@tonic-gate } 13717c478bd9Sstevel@tonic-gate 13727c478bd9Sstevel@tonic-gate /* 13737c478bd9Sstevel@tonic-gate * Client's reachable function. 13747c478bd9Sstevel@tonic-gate */ 13757c478bd9Sstevel@tonic-gate static rdma_stat 13767c478bd9Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle) 13777c478bd9Sstevel@tonic-gate { 13787c478bd9Sstevel@tonic-gate rib_hca_t *hca; 13797c478bd9Sstevel@tonic-gate rdma_stat status; 13807c478bd9Sstevel@tonic-gate 13817c478bd9Sstevel@tonic-gate /* 13827c478bd9Sstevel@tonic-gate * First check if a hca is still attached 13837c478bd9Sstevel@tonic-gate */ 13847c478bd9Sstevel@tonic-gate *handle = NULL; 13857c478bd9Sstevel@tonic-gate rw_enter(&rib_stat->hca->state_lock, RW_READER); 13867c478bd9Sstevel@tonic-gate if (rib_stat->hca->state != HCA_INITED) { 13877c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 13887c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 13897c478bd9Sstevel@tonic-gate } 13907c478bd9Sstevel@tonic-gate status = rib_ping_srv(addr_type, raddr, &hca); 13917c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 13947c478bd9Sstevel@tonic-gate *handle = (void *)hca; 13957c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 13967c478bd9Sstevel@tonic-gate } else { 13977c478bd9Sstevel@tonic-gate *handle = NULL; 1398*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__pingfailed); 13997c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 14007c478bd9Sstevel@tonic-gate } 14017c478bd9Sstevel@tonic-gate } 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate /* Client side qp creation */ 14047c478bd9Sstevel@tonic-gate static rdma_stat 14057c478bd9Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp) 14067c478bd9Sstevel@tonic-gate { 14077c478bd9Sstevel@tonic-gate rib_qp_t *kqp = NULL; 14087c478bd9Sstevel@tonic-gate CONN *conn; 1409*0a701b1eSRobert Gordon rdma_clnt_cred_ctrl_t *cc_info; 14107c478bd9Sstevel@tonic-gate 14117c478bd9Sstevel@tonic-gate ASSERT(qp != NULL); 14127c478bd9Sstevel@tonic-gate *qp = NULL; 14137c478bd9Sstevel@tonic-gate 14147c478bd9Sstevel@tonic-gate kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 14157c478bd9Sstevel@tonic-gate conn = qptoc(kqp); 14167c478bd9Sstevel@tonic-gate kqp->hca = hca; 14177c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_rdmamod = &rib_mod; 14187c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_private = (caddr_t)kqp; 14197c478bd9Sstevel@tonic-gate 14207c478bd9Sstevel@tonic-gate kqp->mode = RIB_CLIENT; 14217c478bd9Sstevel@tonic-gate kqp->chan_flags = IBT_BLOCKING; 14227c478bd9Sstevel@tonic-gate conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP); 14237c478bd9Sstevel@tonic-gate bcopy(raddr->buf, conn->c_raddr.buf, raddr->len); 14247c478bd9Sstevel@tonic-gate conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len; 14257c478bd9Sstevel@tonic-gate /* 14267c478bd9Sstevel@tonic-gate * Initialize 14277c478bd9Sstevel@tonic-gate */ 14287c478bd9Sstevel@tonic-gate cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 14297c478bd9Sstevel@tonic-gate cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 14307c478bd9Sstevel@tonic-gate mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 14317c478bd9Sstevel@tonic-gate mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock); 14327c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 14337c478bd9Sstevel@tonic-gate mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 14347c478bd9Sstevel@tonic-gate cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 14357c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 1436*0a701b1eSRobert Gordon /* 1437*0a701b1eSRobert Gordon * Initialize the client credit control 1438*0a701b1eSRobert Gordon * portion of the rdmaconn struct. 1439*0a701b1eSRobert Gordon */ 1440*0a701b1eSRobert Gordon kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT; 1441*0a701b1eSRobert Gordon cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 1442*0a701b1eSRobert Gordon cc_info->clnt_cc_granted_ops = 0; 1443*0a701b1eSRobert Gordon cc_info->clnt_cc_in_flight_ops = 0; 1444*0a701b1eSRobert Gordon cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL); 14457c478bd9Sstevel@tonic-gate 14467c478bd9Sstevel@tonic-gate *qp = kqp; 14477c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 14487c478bd9Sstevel@tonic-gate } 14497c478bd9Sstevel@tonic-gate 14507c478bd9Sstevel@tonic-gate /* Server side qp creation */ 14517c478bd9Sstevel@tonic-gate static rdma_stat 14527c478bd9Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp) 14537c478bd9Sstevel@tonic-gate { 14547c478bd9Sstevel@tonic-gate rib_qp_t *kqp = NULL; 14557c478bd9Sstevel@tonic-gate ibt_chan_sizes_t chan_sizes; 14567c478bd9Sstevel@tonic-gate ibt_rc_chan_alloc_args_t qp_attr; 14577c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 1458*0a701b1eSRobert Gordon rdma_srv_cred_ctrl_t *cc_info; 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate *qp = NULL; 14617c478bd9Sstevel@tonic-gate 14627c478bd9Sstevel@tonic-gate kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP); 14637c478bd9Sstevel@tonic-gate kqp->hca = hca; 14647c478bd9Sstevel@tonic-gate kqp->port_num = port; 14657c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_rdmamod = &rib_mod; 14667c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_private = (caddr_t)kqp; 14677c478bd9Sstevel@tonic-gate 14687c478bd9Sstevel@tonic-gate /* 14697c478bd9Sstevel@tonic-gate * Create the qp handle 14707c478bd9Sstevel@tonic-gate */ 14717c478bd9Sstevel@tonic-gate bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 14727c478bd9Sstevel@tonic-gate qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl; 14737c478bd9Sstevel@tonic-gate qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl; 14747c478bd9Sstevel@tonic-gate qp_attr.rc_pd = hca->pd_hdl; 14757c478bd9Sstevel@tonic-gate qp_attr.rc_hca_port_num = port; 14767c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 14777c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 14787c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 14797c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 14807c478bd9Sstevel@tonic-gate qp_attr.rc_clone_chan = NULL; 14817c478bd9Sstevel@tonic-gate qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 14827c478bd9Sstevel@tonic-gate qp_attr.rc_flags = IBT_WR_SIGNALED; 14837c478bd9Sstevel@tonic-gate 14847c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 14857c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 14867c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 14877c478bd9Sstevel@tonic-gate IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl, 14887c478bd9Sstevel@tonic-gate &chan_sizes); 14897c478bd9Sstevel@tonic-gate } else { 14907c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 14917c478bd9Sstevel@tonic-gate goto fail; 14927c478bd9Sstevel@tonic-gate } 14937c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 1496*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i_svccreatechanfail, 1497*0a701b1eSRobert Gordon int, ibt_status); 14987c478bd9Sstevel@tonic-gate goto fail; 14997c478bd9Sstevel@tonic-gate } 15007c478bd9Sstevel@tonic-gate 15017c478bd9Sstevel@tonic-gate kqp->mode = RIB_SERVER; 15027c478bd9Sstevel@tonic-gate kqp->chan_flags = IBT_BLOCKING; 15037c478bd9Sstevel@tonic-gate kqp->q = q; /* server ONLY */ 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL); 15067c478bd9Sstevel@tonic-gate cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL); 15077c478bd9Sstevel@tonic-gate mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 15087c478bd9Sstevel@tonic-gate mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock); 15097c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock); 15107c478bd9Sstevel@tonic-gate mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock); 15117c478bd9Sstevel@tonic-gate cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL); 15127c478bd9Sstevel@tonic-gate mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock); 15137c478bd9Sstevel@tonic-gate /* 15147c478bd9Sstevel@tonic-gate * Set the private data area to qp to be used in callbacks 15157c478bd9Sstevel@tonic-gate */ 15167c478bd9Sstevel@tonic-gate ibt_set_chan_private(kqp->qp_hdl, (void *)kqp); 15177c478bd9Sstevel@tonic-gate kqp->rdmaconn.c_state = C_CONNECTED; 1518*0a701b1eSRobert Gordon 1519*0a701b1eSRobert Gordon /* 1520*0a701b1eSRobert Gordon * Initialize the server credit control 1521*0a701b1eSRobert Gordon * portion of the rdmaconn struct. 1522*0a701b1eSRobert Gordon */ 1523*0a701b1eSRobert Gordon kqp->rdmaconn.c_cc_type = RDMA_CC_SRV; 1524*0a701b1eSRobert Gordon cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc; 1525*0a701b1eSRobert Gordon cc_info->srv_cc_buffers_granted = preposted_rbufs; 1526*0a701b1eSRobert Gordon cc_info->srv_cc_cur_buffers_used = 0; 1527*0a701b1eSRobert Gordon cc_info->srv_cc_posted = preposted_rbufs; 1528*0a701b1eSRobert Gordon 15297c478bd9Sstevel@tonic-gate *qp = kqp; 1530*0a701b1eSRobert Gordon 15317c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 15327c478bd9Sstevel@tonic-gate fail: 15337c478bd9Sstevel@tonic-gate if (kqp) 15347c478bd9Sstevel@tonic-gate kmem_free(kqp, sizeof (rib_qp_t)); 15357c478bd9Sstevel@tonic-gate 15367c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15407c478bd9Sstevel@tonic-gate ibt_cm_status_t 15417c478bd9Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event, 15427c478bd9Sstevel@tonic-gate ibt_cm_return_args_t *ret_args, void *priv_data, 15437c478bd9Sstevel@tonic-gate ibt_priv_data_len_t len) 15447c478bd9Sstevel@tonic-gate { 15457c478bd9Sstevel@tonic-gate rpcib_state_t *ribstat; 15467c478bd9Sstevel@tonic-gate rib_hca_t *hca; 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate ribstat = (rpcib_state_t *)clnt_hdl; 15497c478bd9Sstevel@tonic-gate hca = (rib_hca_t *)ribstat->hca; 15507c478bd9Sstevel@tonic-gate 15517c478bd9Sstevel@tonic-gate switch (event->cm_type) { 15527c478bd9Sstevel@tonic-gate 15537c478bd9Sstevel@tonic-gate /* got a connection close event */ 15547c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_CLOSED: 15557c478bd9Sstevel@tonic-gate { 15567c478bd9Sstevel@tonic-gate CONN *conn; 15577c478bd9Sstevel@tonic-gate rib_qp_t *qp; 15587c478bd9Sstevel@tonic-gate 15597c478bd9Sstevel@tonic-gate /* check reason why connection was closed */ 15607c478bd9Sstevel@tonic-gate switch (event->cm_event.closed) { 15617c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREP_RCVD: 15627c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREQ_TIMEOUT: 15637c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DUP: 15647c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ABORT: 15657c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ALREADY: 15667c478bd9Sstevel@tonic-gate /* 15677c478bd9Sstevel@tonic-gate * These cases indicate the local end initiated 15687c478bd9Sstevel@tonic-gate * the closing of the channel. Nothing to do here. 15697c478bd9Sstevel@tonic-gate */ 15707c478bd9Sstevel@tonic-gate break; 15717c478bd9Sstevel@tonic-gate default: 15727c478bd9Sstevel@tonic-gate /* 15737c478bd9Sstevel@tonic-gate * Reason for CONN_CLOSED event must be one of 15747c478bd9Sstevel@tonic-gate * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 15757c478bd9Sstevel@tonic-gate * or IBT_CM_CLOSED_STALE. These indicate cases were 15767c478bd9Sstevel@tonic-gate * the remote end is closing the channel. In these 15777c478bd9Sstevel@tonic-gate * cases free the channel and transition to error 15787c478bd9Sstevel@tonic-gate * state 15797c478bd9Sstevel@tonic-gate */ 15807c478bd9Sstevel@tonic-gate qp = ibt_get_chan_private(event->cm_channel); 15817c478bd9Sstevel@tonic-gate conn = qptoc(qp); 15827c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 15837c478bd9Sstevel@tonic-gate if (conn->c_state == C_DISCONN_PEND) { 15847c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 15857c478bd9Sstevel@tonic-gate break; 15867c478bd9Sstevel@tonic-gate } 15877c478bd9Sstevel@tonic-gate 1588*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate /* 15917c478bd9Sstevel@tonic-gate * Free the rc_channel. Channel has already 15927c478bd9Sstevel@tonic-gate * transitioned to ERROR state and WRs have been 15937c478bd9Sstevel@tonic-gate * FLUSHED_ERR already. 15947c478bd9Sstevel@tonic-gate */ 15957c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 15967c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 15977c478bd9Sstevel@tonic-gate 15987c478bd9Sstevel@tonic-gate /* 15997c478bd9Sstevel@tonic-gate * Free the conn if c_ref is down to 0 already 16007c478bd9Sstevel@tonic-gate */ 16017c478bd9Sstevel@tonic-gate if (conn->c_ref == 0) { 16027c478bd9Sstevel@tonic-gate /* 16037c478bd9Sstevel@tonic-gate * Remove from list and free conn 16047c478bd9Sstevel@tonic-gate */ 16057c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 16067c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 16077c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 16087c478bd9Sstevel@tonic-gate &hca->cl_conn_list); 16097c478bd9Sstevel@tonic-gate } else { 16107c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 16117c478bd9Sstevel@tonic-gate } 16127c478bd9Sstevel@tonic-gate #ifdef DEBUG 16137c478bd9Sstevel@tonic-gate if (rib_debug) 16147c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_clnt_cm_handler: " 16157c478bd9Sstevel@tonic-gate "(CONN_CLOSED) channel disconnected"); 16167c478bd9Sstevel@tonic-gate #endif 16177c478bd9Sstevel@tonic-gate break; 16187c478bd9Sstevel@tonic-gate } 16197c478bd9Sstevel@tonic-gate break; 16207c478bd9Sstevel@tonic-gate } 16217c478bd9Sstevel@tonic-gate default: 16227c478bd9Sstevel@tonic-gate break; 16237c478bd9Sstevel@tonic-gate } 16247c478bd9Sstevel@tonic-gate return (IBT_CM_ACCEPT); 16257c478bd9Sstevel@tonic-gate } 16267c478bd9Sstevel@tonic-gate 1627*0a701b1eSRobert Gordon /* Check server ib address */ 16287c478bd9Sstevel@tonic-gate rdma_stat 1629*0a701b1eSRobert Gordon rib_chk_srv_ibaddr(struct netbuf *raddr, 1630*0a701b1eSRobert Gordon int addr_type, ibt_path_info_t *path, ibt_ip_addr_t *s_ip, 1631*0a701b1eSRobert Gordon ibt_ip_addr_t *d_ip) 16327c478bd9Sstevel@tonic-gate { 16337c478bd9Sstevel@tonic-gate struct sockaddr_in *sin4; 16347c478bd9Sstevel@tonic-gate struct sockaddr_in6 *sin6; 16357c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 1636*0a701b1eSRobert Gordon ibt_ip_path_attr_t ipattr; 1637*0a701b1eSRobert Gordon uint8_t npaths = 0; 1638*0a701b1eSRobert Gordon ibt_path_ip_src_t srcip; 16397c478bd9Sstevel@tonic-gate 1640*0a701b1eSRobert Gordon ASSERT(raddr->buf != NULL); 1641*0a701b1eSRobert Gordon 16427c478bd9Sstevel@tonic-gate (void) bzero(path, sizeof (ibt_path_info_t)); 16437c478bd9Sstevel@tonic-gate 16447c478bd9Sstevel@tonic-gate switch (addr_type) { 16457c478bd9Sstevel@tonic-gate case AF_INET: 16467c478bd9Sstevel@tonic-gate sin4 = (struct sockaddr_in *)raddr->buf; 1647*0a701b1eSRobert Gordon d_ip->family = AF_INET; 1648*0a701b1eSRobert Gordon d_ip->un.ip4addr = htonl(sin4->sin_addr.s_addr); 16497c478bd9Sstevel@tonic-gate break; 16507c478bd9Sstevel@tonic-gate 16517c478bd9Sstevel@tonic-gate case AF_INET6: 16527c478bd9Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)raddr->buf; 1653*0a701b1eSRobert Gordon d_ip->family = AF_INET6; 1654*0a701b1eSRobert Gordon d_ip->un.ip6addr = sin6->sin6_addr; 16557c478bd9Sstevel@tonic-gate break; 16567c478bd9Sstevel@tonic-gate 16577c478bd9Sstevel@tonic-gate default: 16587c478bd9Sstevel@tonic-gate return (RDMA_INVAL); 16597c478bd9Sstevel@tonic-gate } 16607c478bd9Sstevel@tonic-gate 1661*0a701b1eSRobert Gordon bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 1662*0a701b1eSRobert Gordon bzero(&srcip, sizeof (ibt_path_ip_src_t)); 16637c478bd9Sstevel@tonic-gate 1664*0a701b1eSRobert Gordon ipattr.ipa_dst_ip = d_ip; 1665*0a701b1eSRobert Gordon ipattr.ipa_hca_guid = rib_stat->hca->hca_guid; 1666*0a701b1eSRobert Gordon ipattr.ipa_ndst = 1; 1667*0a701b1eSRobert Gordon ipattr.ipa_max_paths = 1; 1668*0a701b1eSRobert Gordon npaths = 0; 16697c478bd9Sstevel@tonic-gate 1670*0a701b1eSRobert Gordon ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl, 1671*0a701b1eSRobert Gordon IBT_PATH_NO_FLAGS, 1672*0a701b1eSRobert Gordon &ipattr, 1673*0a701b1eSRobert Gordon path, 1674*0a701b1eSRobert Gordon &npaths, 1675*0a701b1eSRobert Gordon &srcip); 1676*0a701b1eSRobert Gordon 1677*0a701b1eSRobert Gordon if (ibt_status != IBT_SUCCESS || 1678*0a701b1eSRobert Gordon npaths < 1 || 1679*0a701b1eSRobert Gordon path->pi_hca_guid != rib_stat->hca->hca_guid) { 1680*0a701b1eSRobert Gordon 1681*0a701b1eSRobert Gordon bzero(s_ip, sizeof (ibt_path_ip_src_t)); 16827c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 16837c478bd9Sstevel@tonic-gate } 16847c478bd9Sstevel@tonic-gate 1685*0a701b1eSRobert Gordon if (srcip.ip_primary.family == AF_INET) { 1686*0a701b1eSRobert Gordon s_ip->family = AF_INET; 1687*0a701b1eSRobert Gordon s_ip->un.ip4addr = htonl(srcip.ip_primary.un.ip4addr); 1688*0a701b1eSRobert Gordon } else { 1689*0a701b1eSRobert Gordon s_ip->family = AF_INET6; 1690*0a701b1eSRobert Gordon s_ip->un.ip6addr = srcip.ip_primary.un.ip6addr; 1691*0a701b1eSRobert Gordon } 1692*0a701b1eSRobert Gordon 1693*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 1694*0a701b1eSRobert Gordon } 1695*0a701b1eSRobert Gordon 16967c478bd9Sstevel@tonic-gate 16977c478bd9Sstevel@tonic-gate /* 16987c478bd9Sstevel@tonic-gate * Connect to the server. 16997c478bd9Sstevel@tonic-gate */ 17007c478bd9Sstevel@tonic-gate rdma_stat 1701*0a701b1eSRobert Gordon rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, ibt_path_info_t *path, 1702*0a701b1eSRobert Gordon ibt_ip_addr_t *s_ip, ibt_ip_addr_t *d_ip) 17037c478bd9Sstevel@tonic-gate { 17047c478bd9Sstevel@tonic-gate ibt_chan_open_args_t chan_args; /* channel args */ 17057c478bd9Sstevel@tonic-gate ibt_chan_sizes_t chan_sizes; 17067c478bd9Sstevel@tonic-gate ibt_rc_chan_alloc_args_t qp_attr; 17077c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 17087c478bd9Sstevel@tonic-gate ibt_rc_returns_t ret_args; /* conn reject info */ 17097c478bd9Sstevel@tonic-gate int refresh = REFRESH_ATTEMPTS; /* refresh if IBT_CM_CONN_STALE */ 1710*0a701b1eSRobert Gordon ibt_ip_cm_info_t ipcm_info; 1711*0a701b1eSRobert Gordon uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ]; 1712*0a701b1eSRobert Gordon 17137c478bd9Sstevel@tonic-gate 17147c478bd9Sstevel@tonic-gate (void) bzero(&chan_args, sizeof (chan_args)); 17157c478bd9Sstevel@tonic-gate (void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t)); 1716*0a701b1eSRobert Gordon (void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 1717*0a701b1eSRobert Gordon 1718*0a701b1eSRobert Gordon switch (ipcm_info.src_addr.family = s_ip->family) { 1719*0a701b1eSRobert Gordon case AF_INET: 1720*0a701b1eSRobert Gordon ipcm_info.src_addr.un.ip4addr = s_ip->un.ip4addr; 1721*0a701b1eSRobert Gordon break; 1722*0a701b1eSRobert Gordon case AF_INET6: 1723*0a701b1eSRobert Gordon ipcm_info.src_addr.un.ip6addr = s_ip->un.ip6addr; 1724*0a701b1eSRobert Gordon break; 1725*0a701b1eSRobert Gordon } 1726*0a701b1eSRobert Gordon 1727*0a701b1eSRobert Gordon switch (ipcm_info.dst_addr.family = d_ip->family) { 1728*0a701b1eSRobert Gordon case AF_INET: 1729*0a701b1eSRobert Gordon ipcm_info.dst_addr.un.ip4addr = d_ip->un.ip4addr; 1730*0a701b1eSRobert Gordon break; 1731*0a701b1eSRobert Gordon case AF_INET6: 1732*0a701b1eSRobert Gordon ipcm_info.dst_addr.un.ip6addr = d_ip->un.ip6addr; 1733*0a701b1eSRobert Gordon break; 1734*0a701b1eSRobert Gordon } 1735*0a701b1eSRobert Gordon 1736*0a701b1eSRobert Gordon ipcm_info.src_port = NFS_RDMA_PORT; 1737*0a701b1eSRobert Gordon 1738*0a701b1eSRobert Gordon ibt_status = ibt_format_ip_private_data(&ipcm_info, 1739*0a701b1eSRobert Gordon IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt); 1740*0a701b1eSRobert Gordon 1741*0a701b1eSRobert Gordon if (ibt_status != IBT_SUCCESS) { 1742*0a701b1eSRobert Gordon cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n"); 1743*0a701b1eSRobert Gordon return (-1); 1744*0a701b1eSRobert Gordon } 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate qp_attr.rc_hca_port_num = path->pi_prim_cep_path.cep_hca_port_num; 17477c478bd9Sstevel@tonic-gate /* Alloc a RC channel */ 17487c478bd9Sstevel@tonic-gate qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl; 17497c478bd9Sstevel@tonic-gate qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl; 17507c478bd9Sstevel@tonic-gate qp_attr.rc_pd = hca->pd_hdl; 17517c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX; 17527c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX; 17537c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE; 17547c478bd9Sstevel@tonic-gate qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE; 17557c478bd9Sstevel@tonic-gate qp_attr.rc_clone_chan = NULL; 17567c478bd9Sstevel@tonic-gate qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 17577c478bd9Sstevel@tonic-gate qp_attr.rc_flags = IBT_WR_SIGNALED; 17587c478bd9Sstevel@tonic-gate 1759*0a701b1eSRobert Gordon path->pi_sid = ibt_get_ip_sid(IPPROTO_TCP, NFS_RDMA_PORT); 17607c478bd9Sstevel@tonic-gate chan_args.oc_path = path; 17617c478bd9Sstevel@tonic-gate chan_args.oc_cm_handler = rib_clnt_cm_handler; 17627c478bd9Sstevel@tonic-gate chan_args.oc_cm_clnt_private = (void *)rib_stat; 1763*0a701b1eSRobert Gordon chan_args.oc_rdma_ra_out = 4; 1764*0a701b1eSRobert Gordon chan_args.oc_rdma_ra_in = 4; 17657c478bd9Sstevel@tonic-gate chan_args.oc_path_retry_cnt = 2; 17667c478bd9Sstevel@tonic-gate chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES; 1767*0a701b1eSRobert Gordon chan_args.oc_priv_data = cmp_ip_pvt; 1768*0a701b1eSRobert Gordon chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ; 17697c478bd9Sstevel@tonic-gate 17707c478bd9Sstevel@tonic-gate refresh: 17717c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 17727c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 17737c478bd9Sstevel@tonic-gate ibt_status = ibt_alloc_rc_channel(hca->hca_hdl, 1774*0a701b1eSRobert Gordon IBT_ACHAN_NO_FLAGS, 1775*0a701b1eSRobert Gordon &qp_attr, &qp->qp_hdl, 17767c478bd9Sstevel@tonic-gate &chan_sizes); 17777c478bd9Sstevel@tonic-gate } else { 17787c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 17797c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 17807c478bd9Sstevel@tonic-gate } 17817c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 1784*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i_conntosrv, 1785*0a701b1eSRobert Gordon int, ibt_status); 17867c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 17877c478bd9Sstevel@tonic-gate } 17887c478bd9Sstevel@tonic-gate 17897c478bd9Sstevel@tonic-gate /* Connect to the Server */ 17907c478bd9Sstevel@tonic-gate (void) bzero(&ret_args, sizeof (ret_args)); 17917c478bd9Sstevel@tonic-gate mutex_enter(&qp->cb_lock); 17927c478bd9Sstevel@tonic-gate ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS, 17937c478bd9Sstevel@tonic-gate IBT_BLOCKING, &chan_args, &ret_args); 17947c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 1795*0a701b1eSRobert Gordon DTRACE_PROBE2(rpcib__i_openrctosrv, 1796*0a701b1eSRobert Gordon int, ibt_status, int, ret_args.rc_status); 1797*0a701b1eSRobert Gordon 17987c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 17997c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 18007c478bd9Sstevel@tonic-gate mutex_exit(&qp->cb_lock); 18017c478bd9Sstevel@tonic-gate if (refresh-- && ibt_status == IBT_CM_FAILURE && 18027c478bd9Sstevel@tonic-gate ret_args.rc_status == IBT_CM_CONN_STALE) { 18037c478bd9Sstevel@tonic-gate /* 18047c478bd9Sstevel@tonic-gate * Got IBT_CM_CONN_STALE probably because of stale 18057c478bd9Sstevel@tonic-gate * data on the passive end of a channel that existed 18067c478bd9Sstevel@tonic-gate * prior to reboot. Retry establishing a channel 18077c478bd9Sstevel@tonic-gate * REFRESH_ATTEMPTS times, during which time the 18087c478bd9Sstevel@tonic-gate * stale conditions on the server might clear up. 18097c478bd9Sstevel@tonic-gate */ 18107c478bd9Sstevel@tonic-gate goto refresh; 18117c478bd9Sstevel@tonic-gate } 18127c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 18137c478bd9Sstevel@tonic-gate } 18147c478bd9Sstevel@tonic-gate mutex_exit(&qp->cb_lock); 18157c478bd9Sstevel@tonic-gate /* 18167c478bd9Sstevel@tonic-gate * Set the private data area to qp to be used in callbacks 18177c478bd9Sstevel@tonic-gate */ 18187c478bd9Sstevel@tonic-gate ibt_set_chan_private(qp->qp_hdl, (void *)qp); 18197c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 18207c478bd9Sstevel@tonic-gate } 18217c478bd9Sstevel@tonic-gate 18227c478bd9Sstevel@tonic-gate rdma_stat 18237c478bd9Sstevel@tonic-gate rib_ping_srv(int addr_type, struct netbuf *raddr, rib_hca_t **hca) 18247c478bd9Sstevel@tonic-gate { 1825*0a701b1eSRobert Gordon struct sockaddr_in *sin4, *sin4arr; 1826*0a701b1eSRobert Gordon struct sockaddr_in6 *sin6, *sin6arr; 1827*0a701b1eSRobert Gordon uint_t nif, nif4, nif6, i; 18287c478bd9Sstevel@tonic-gate ibt_path_info_t path; 18297c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 1830*0a701b1eSRobert Gordon uint8_t num_paths_p; 1831*0a701b1eSRobert Gordon ibt_ip_path_attr_t ipattr; 1832*0a701b1eSRobert Gordon ibt_ip_addr_t dstip; 1833*0a701b1eSRobert Gordon ibt_path_ip_src_t srcip; 1834*0a701b1eSRobert Gordon 1835*0a701b1eSRobert Gordon 1836*0a701b1eSRobert Gordon *hca = NULL; 18377c478bd9Sstevel@tonic-gate 18387c478bd9Sstevel@tonic-gate ASSERT(raddr->buf != NULL); 18397c478bd9Sstevel@tonic-gate 18407c478bd9Sstevel@tonic-gate bzero(&path, sizeof (ibt_path_info_t)); 1841*0a701b1eSRobert Gordon bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 1842*0a701b1eSRobert Gordon bzero(&srcip, sizeof (ibt_path_ip_src_t)); 18437c478bd9Sstevel@tonic-gate 1844*0a701b1eSRobert Gordon /* Obtain the source IP addresses for the system */ 1845*0a701b1eSRobert Gordon nif = rpcib_get_number_interfaces(); 1846*0a701b1eSRobert Gordon sin4arr = (struct sockaddr_in *) 1847*0a701b1eSRobert Gordon kmem_zalloc(sizeof (struct sockaddr_in) * nif, KM_SLEEP); 1848*0a701b1eSRobert Gordon sin6arr = (struct sockaddr_in6 *) 1849*0a701b1eSRobert Gordon kmem_zalloc(sizeof (struct sockaddr_in6) * nif, KM_SLEEP); 1850*0a701b1eSRobert Gordon 1851*0a701b1eSRobert Gordon (void) rpcib_get_ib_addresses(sin4arr, sin6arr, &nif4, &nif6); 1852*0a701b1eSRobert Gordon 1853*0a701b1eSRobert Gordon /* Are there really any IB interfaces available */ 1854*0a701b1eSRobert Gordon if (nif4 == 0 && nif6 == 0) { 1855*0a701b1eSRobert Gordon kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif); 1856*0a701b1eSRobert Gordon kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif); 1857*0a701b1eSRobert Gordon return (RDMA_FAILED); 1858*0a701b1eSRobert Gordon } 1859*0a701b1eSRobert Gordon 1860*0a701b1eSRobert Gordon /* Prep the destination address */ 18617c478bd9Sstevel@tonic-gate switch (addr_type) { 18627c478bd9Sstevel@tonic-gate case AF_INET: 18637c478bd9Sstevel@tonic-gate sin4 = (struct sockaddr_in *)raddr->buf; 1864*0a701b1eSRobert Gordon dstip.family = AF_INET; 1865*0a701b1eSRobert Gordon dstip.un.ip4addr = htonl(sin4->sin_addr.s_addr); 1866*0a701b1eSRobert Gordon 1867*0a701b1eSRobert Gordon for (i = 0; i < nif4; i++) { 1868*0a701b1eSRobert Gordon num_paths_p = 0; 1869*0a701b1eSRobert Gordon ipattr.ipa_dst_ip = &dstip; 1870*0a701b1eSRobert Gordon ipattr.ipa_hca_guid = rib_stat->hca->hca_guid; 1871*0a701b1eSRobert Gordon ipattr.ipa_ndst = 1; 1872*0a701b1eSRobert Gordon ipattr.ipa_max_paths = 1; 1873*0a701b1eSRobert Gordon ipattr.ipa_src_ip.family = dstip.family; 1874*0a701b1eSRobert Gordon ipattr.ipa_src_ip.un.ip4addr = 1875*0a701b1eSRobert Gordon htonl(sin4arr[i].sin_addr.s_addr); 1876*0a701b1eSRobert Gordon 1877*0a701b1eSRobert Gordon ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl, 1878*0a701b1eSRobert Gordon IBT_PATH_NO_FLAGS, 1879*0a701b1eSRobert Gordon &ipattr, 1880*0a701b1eSRobert Gordon &path, 1881*0a701b1eSRobert Gordon &num_paths_p, 1882*0a701b1eSRobert Gordon &srcip); 1883*0a701b1eSRobert Gordon if (ibt_status == IBT_SUCCESS && 1884*0a701b1eSRobert Gordon num_paths_p != 0 && 1885*0a701b1eSRobert Gordon path.pi_hca_guid == rib_stat->hca->hca_guid) { 1886*0a701b1eSRobert Gordon *hca = rib_stat->hca; 1887*0a701b1eSRobert Gordon 1888*0a701b1eSRobert Gordon kmem_free(sin4arr, 1889*0a701b1eSRobert Gordon sizeof (struct sockaddr_in) * nif); 1890*0a701b1eSRobert Gordon kmem_free(sin6arr, 1891*0a701b1eSRobert Gordon sizeof (struct sockaddr_in6) * nif); 1892*0a701b1eSRobert Gordon 1893*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 1894*0a701b1eSRobert Gordon } 1895*0a701b1eSRobert Gordon } 18967c478bd9Sstevel@tonic-gate break; 18977c478bd9Sstevel@tonic-gate 18987c478bd9Sstevel@tonic-gate case AF_INET6: 18997c478bd9Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)raddr->buf; 1900*0a701b1eSRobert Gordon dstip.family = AF_INET6; 1901*0a701b1eSRobert Gordon dstip.un.ip6addr = sin6->sin6_addr; 1902*0a701b1eSRobert Gordon 1903*0a701b1eSRobert Gordon for (i = 0; i < nif6; i++) { 1904*0a701b1eSRobert Gordon num_paths_p = 0; 1905*0a701b1eSRobert Gordon ipattr.ipa_dst_ip = &dstip; 1906*0a701b1eSRobert Gordon ipattr.ipa_hca_guid = rib_stat->hca->hca_guid; 1907*0a701b1eSRobert Gordon ipattr.ipa_ndst = 1; 1908*0a701b1eSRobert Gordon ipattr.ipa_max_paths = 1; 1909*0a701b1eSRobert Gordon ipattr.ipa_src_ip.family = dstip.family; 1910*0a701b1eSRobert Gordon ipattr.ipa_src_ip.un.ip6addr = sin6arr[i].sin6_addr; 1911*0a701b1eSRobert Gordon 1912*0a701b1eSRobert Gordon ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl, 1913*0a701b1eSRobert Gordon IBT_PATH_NO_FLAGS, 1914*0a701b1eSRobert Gordon &ipattr, 1915*0a701b1eSRobert Gordon &path, 1916*0a701b1eSRobert Gordon &num_paths_p, 1917*0a701b1eSRobert Gordon &srcip); 1918*0a701b1eSRobert Gordon if (ibt_status == IBT_SUCCESS && 1919*0a701b1eSRobert Gordon num_paths_p != 0 && 1920*0a701b1eSRobert Gordon path.pi_hca_guid == rib_stat->hca->hca_guid) { 1921*0a701b1eSRobert Gordon *hca = rib_stat->hca; 1922*0a701b1eSRobert Gordon 1923*0a701b1eSRobert Gordon kmem_free(sin4arr, 1924*0a701b1eSRobert Gordon sizeof (struct sockaddr_in) * nif); 1925*0a701b1eSRobert Gordon kmem_free(sin6arr, 1926*0a701b1eSRobert Gordon sizeof (struct sockaddr_in6) * nif); 1927*0a701b1eSRobert Gordon 1928*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 1929*0a701b1eSRobert Gordon } 1930*0a701b1eSRobert Gordon } 1931*0a701b1eSRobert Gordon 19327c478bd9Sstevel@tonic-gate break; 19337c478bd9Sstevel@tonic-gate 19347c478bd9Sstevel@tonic-gate default: 1935*0a701b1eSRobert Gordon kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif); 1936*0a701b1eSRobert Gordon kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif); 19377c478bd9Sstevel@tonic-gate return (RDMA_INVAL); 19387c478bd9Sstevel@tonic-gate } 19397c478bd9Sstevel@tonic-gate 1940*0a701b1eSRobert Gordon kmem_free(sin4arr, sizeof (struct sockaddr_in) * nif); 1941*0a701b1eSRobert Gordon kmem_free(sin6arr, sizeof (struct sockaddr_in6) * nif); 19427c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 19437c478bd9Sstevel@tonic-gate } 19447c478bd9Sstevel@tonic-gate 19457c478bd9Sstevel@tonic-gate /* 19467c478bd9Sstevel@tonic-gate * Close channel, remove from connection list and 19477c478bd9Sstevel@tonic-gate * free up resources allocated for that channel. 19487c478bd9Sstevel@tonic-gate */ 19497c478bd9Sstevel@tonic-gate rdma_stat 19507c478bd9Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list) 19517c478bd9Sstevel@tonic-gate { 19527c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 19537c478bd9Sstevel@tonic-gate rib_hca_t *hca; 19547c478bd9Sstevel@tonic-gate 19557c478bd9Sstevel@tonic-gate /* 19567c478bd9Sstevel@tonic-gate * c_ref == 0 and connection is in C_DISCONN_PEND 19577c478bd9Sstevel@tonic-gate */ 19587c478bd9Sstevel@tonic-gate hca = qp->hca; 19597c478bd9Sstevel@tonic-gate if (conn_list != NULL) 19607c478bd9Sstevel@tonic-gate (void) rib_rm_conn(conn, conn_list); 1961*0a701b1eSRobert Gordon 19627c478bd9Sstevel@tonic-gate if (qp->qp_hdl != NULL) { 19637c478bd9Sstevel@tonic-gate /* 19647c478bd9Sstevel@tonic-gate * If the channel has not been establised, 19657c478bd9Sstevel@tonic-gate * ibt_flush_channel is called to flush outstanding WRs 19667c478bd9Sstevel@tonic-gate * on the Qs. Otherwise, ibt_close_rc_channel() is 19677c478bd9Sstevel@tonic-gate * called. The channel is then freed. 19687c478bd9Sstevel@tonic-gate */ 19697c478bd9Sstevel@tonic-gate if (conn_list != NULL) 19707c478bd9Sstevel@tonic-gate (void) ibt_close_rc_channel(qp->qp_hdl, 19717c478bd9Sstevel@tonic-gate IBT_BLOCKING, NULL, 0, NULL, NULL, 0); 19727c478bd9Sstevel@tonic-gate else 19737c478bd9Sstevel@tonic-gate (void) ibt_flush_channel(qp->qp_hdl); 19747c478bd9Sstevel@tonic-gate 19757c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 19767c478bd9Sstevel@tonic-gate while (qp->n_posted_rbufs) 19777c478bd9Sstevel@tonic-gate cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock); 19787c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 19797c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 19807c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 19817c478bd9Sstevel@tonic-gate } 1982*0a701b1eSRobert Gordon 19837c478bd9Sstevel@tonic-gate ASSERT(qp->rdlist == NULL); 1984*0a701b1eSRobert Gordon 19857c478bd9Sstevel@tonic-gate if (qp->replylist != NULL) { 19867c478bd9Sstevel@tonic-gate (void) rib_rem_replylist(qp); 19877c478bd9Sstevel@tonic-gate } 19887c478bd9Sstevel@tonic-gate 19897c478bd9Sstevel@tonic-gate cv_destroy(&qp->cb_conn_cv); 19907c478bd9Sstevel@tonic-gate cv_destroy(&qp->posted_rbufs_cv); 19917c478bd9Sstevel@tonic-gate mutex_destroy(&qp->cb_lock); 19927c478bd9Sstevel@tonic-gate 19937c478bd9Sstevel@tonic-gate mutex_destroy(&qp->replylist_lock); 19947c478bd9Sstevel@tonic-gate mutex_destroy(&qp->posted_rbufs_lock); 19957c478bd9Sstevel@tonic-gate mutex_destroy(&qp->rdlist_lock); 19967c478bd9Sstevel@tonic-gate 19977c478bd9Sstevel@tonic-gate cv_destroy(&conn->c_cv); 19987c478bd9Sstevel@tonic-gate mutex_destroy(&conn->c_lock); 19997c478bd9Sstevel@tonic-gate 20007c478bd9Sstevel@tonic-gate if (conn->c_raddr.buf != NULL) { 20017c478bd9Sstevel@tonic-gate kmem_free(conn->c_raddr.buf, conn->c_raddr.len); 20027c478bd9Sstevel@tonic-gate } 20037c478bd9Sstevel@tonic-gate if (conn->c_laddr.buf != NULL) { 20047c478bd9Sstevel@tonic-gate kmem_free(conn->c_laddr.buf, conn->c_laddr.len); 20057c478bd9Sstevel@tonic-gate } 2006*0a701b1eSRobert Gordon 2007*0a701b1eSRobert Gordon /* 2008*0a701b1eSRobert Gordon * Credit control cleanup. 2009*0a701b1eSRobert Gordon */ 2010*0a701b1eSRobert Gordon if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) { 2011*0a701b1eSRobert Gordon rdma_clnt_cred_ctrl_t *cc_info; 2012*0a701b1eSRobert Gordon cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc; 2013*0a701b1eSRobert Gordon cv_destroy(&cc_info->clnt_cc_cv); 2014*0a701b1eSRobert Gordon } 2015*0a701b1eSRobert Gordon 20167c478bd9Sstevel@tonic-gate kmem_free(qp, sizeof (rib_qp_t)); 20177c478bd9Sstevel@tonic-gate 20187c478bd9Sstevel@tonic-gate /* 20197c478bd9Sstevel@tonic-gate * If HCA has been DETACHED and the srv/clnt_conn_list is NULL, 20207c478bd9Sstevel@tonic-gate * then the hca is no longer being used. 20217c478bd9Sstevel@tonic-gate */ 20227c478bd9Sstevel@tonic-gate if (conn_list != NULL) { 20237c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 20247c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 20257c478bd9Sstevel@tonic-gate rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 20267c478bd9Sstevel@tonic-gate if (hca->srv_conn_list.conn_hd == NULL) { 20277c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, 20287c478bd9Sstevel@tonic-gate RW_READER); 2029*0a701b1eSRobert Gordon 20307c478bd9Sstevel@tonic-gate if (hca->cl_conn_list.conn_hd == NULL) { 20317c478bd9Sstevel@tonic-gate mutex_enter(&hca->inuse_lock); 20327c478bd9Sstevel@tonic-gate hca->inuse = FALSE; 20337c478bd9Sstevel@tonic-gate cv_signal(&hca->cb_cv); 20347c478bd9Sstevel@tonic-gate mutex_exit(&hca->inuse_lock); 20357c478bd9Sstevel@tonic-gate } 20367c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 20377c478bd9Sstevel@tonic-gate } 20387c478bd9Sstevel@tonic-gate rw_exit(&hca->srv_conn_list.conn_lock); 20397c478bd9Sstevel@tonic-gate } 20407c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 20417c478bd9Sstevel@tonic-gate } 2042*0a701b1eSRobert Gordon 20437c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 20447c478bd9Sstevel@tonic-gate } 20457c478bd9Sstevel@tonic-gate 20467c478bd9Sstevel@tonic-gate /* 20477c478bd9Sstevel@tonic-gate * Wait for send completion notification. Only on receiving a 20487c478bd9Sstevel@tonic-gate * notification be it a successful or error completion, free the 20497c478bd9Sstevel@tonic-gate * send_wid. 20507c478bd9Sstevel@tonic-gate */ 20517c478bd9Sstevel@tonic-gate static rdma_stat 20527c478bd9Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd) 20537c478bd9Sstevel@tonic-gate { 20547c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 20557c478bd9Sstevel@tonic-gate rdma_stat error = RDMA_SUCCESS; 20567c478bd9Sstevel@tonic-gate int i; 20577c478bd9Sstevel@tonic-gate 20587c478bd9Sstevel@tonic-gate /* 20597c478bd9Sstevel@tonic-gate * Wait for send to complete 20607c478bd9Sstevel@tonic-gate */ 20617c478bd9Sstevel@tonic-gate ASSERT(wd != NULL); 20627c478bd9Sstevel@tonic-gate mutex_enter(&wd->sendwait_lock); 20637c478bd9Sstevel@tonic-gate if (wd->status == (uint_t)SEND_WAIT) { 20647c478bd9Sstevel@tonic-gate timout = drv_usectohz(SEND_WAIT_TIME * 1000000) + 20657c478bd9Sstevel@tonic-gate ddi_get_lbolt(); 2066*0a701b1eSRobert Gordon 20677c478bd9Sstevel@tonic-gate if (qp->mode == RIB_SERVER) { 20687c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait(&wd->wait_cv, 20697c478bd9Sstevel@tonic-gate &wd->sendwait_lock, timout)) > 0 && 20707c478bd9Sstevel@tonic-gate wd->status == (uint_t)SEND_WAIT) 20717c478bd9Sstevel@tonic-gate ; 20727c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 20737c478bd9Sstevel@tonic-gate case -1: /* timeout */ 2074*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__srvsendwait__timeout); 2075*0a701b1eSRobert Gordon 20767c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20777c478bd9Sstevel@tonic-gate error = RDMA_TIMEDOUT; 20787c478bd9Sstevel@tonic-gate break; 20797c478bd9Sstevel@tonic-gate default: /* got send completion */ 20807c478bd9Sstevel@tonic-gate break; 20817c478bd9Sstevel@tonic-gate } 20827c478bd9Sstevel@tonic-gate } else { 20837c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv, 20847c478bd9Sstevel@tonic-gate &wd->sendwait_lock, timout)) > 0 && 20857c478bd9Sstevel@tonic-gate wd->status == (uint_t)SEND_WAIT) 20867c478bd9Sstevel@tonic-gate ; 20877c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 20887c478bd9Sstevel@tonic-gate case -1: /* timeout */ 2089*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__clntsendwait__timeout); 2090*0a701b1eSRobert Gordon 20917c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20927c478bd9Sstevel@tonic-gate error = RDMA_TIMEDOUT; 20937c478bd9Sstevel@tonic-gate break; 20947c478bd9Sstevel@tonic-gate case 0: /* interrupted */ 2095*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__clntsendwait__intr); 2096*0a701b1eSRobert Gordon 20977c478bd9Sstevel@tonic-gate wd->cv_sig = 0; /* no signal needed */ 20987c478bd9Sstevel@tonic-gate error = RDMA_INTR; 20997c478bd9Sstevel@tonic-gate break; 21007c478bd9Sstevel@tonic-gate default: /* got send completion */ 21017c478bd9Sstevel@tonic-gate break; 21027c478bd9Sstevel@tonic-gate } 21037c478bd9Sstevel@tonic-gate } 21047c478bd9Sstevel@tonic-gate } 21057c478bd9Sstevel@tonic-gate 21067c478bd9Sstevel@tonic-gate if (wd->status != (uint_t)SEND_WAIT) { 21077c478bd9Sstevel@tonic-gate /* got send completion */ 21087c478bd9Sstevel@tonic-gate if (wd->status != RDMA_SUCCESS) { 21097c478bd9Sstevel@tonic-gate error = wd->status; 21107c478bd9Sstevel@tonic-gate if (wd->status != RDMA_CONNLOST) 21117c478bd9Sstevel@tonic-gate error = RDMA_FAILED; 21127c478bd9Sstevel@tonic-gate } 21137c478bd9Sstevel@tonic-gate for (i = 0; i < wd->nsbufs; i++) { 21147c478bd9Sstevel@tonic-gate rib_rbuf_free(qptoc(qp), SEND_BUFFER, 211511606941Sjwahlig (void *)(uintptr_t)wd->sbufaddr[i]); 21167c478bd9Sstevel@tonic-gate } 21177c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 21187c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wd); 21197c478bd9Sstevel@tonic-gate } else { 21207c478bd9Sstevel@tonic-gate mutex_exit(&wd->sendwait_lock); 21217c478bd9Sstevel@tonic-gate } 21227c478bd9Sstevel@tonic-gate return (error); 21237c478bd9Sstevel@tonic-gate } 21247c478bd9Sstevel@tonic-gate 21257c478bd9Sstevel@tonic-gate static struct send_wid * 21267c478bd9Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp) 21277c478bd9Sstevel@tonic-gate { 21287c478bd9Sstevel@tonic-gate struct send_wid *wd; 21297c478bd9Sstevel@tonic-gate 21307c478bd9Sstevel@tonic-gate wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP); 21317c478bd9Sstevel@tonic-gate wd->xid = xid; 21327c478bd9Sstevel@tonic-gate wd->cv_sig = cv_sig; 21337c478bd9Sstevel@tonic-gate wd->qp = qp; 21347c478bd9Sstevel@tonic-gate cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL); 21357c478bd9Sstevel@tonic-gate mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL); 21367c478bd9Sstevel@tonic-gate wd->status = (uint_t)SEND_WAIT; 21377c478bd9Sstevel@tonic-gate 21387c478bd9Sstevel@tonic-gate return (wd); 21397c478bd9Sstevel@tonic-gate } 21407c478bd9Sstevel@tonic-gate 21417c478bd9Sstevel@tonic-gate static int 21427c478bd9Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc) 21437c478bd9Sstevel@tonic-gate { 21447c478bd9Sstevel@tonic-gate cv_destroy(&wdesc->wait_cv); 21457c478bd9Sstevel@tonic-gate mutex_destroy(&wdesc->sendwait_lock); 21467c478bd9Sstevel@tonic-gate kmem_free(wdesc, sizeof (*wdesc)); 21477c478bd9Sstevel@tonic-gate 21487c478bd9Sstevel@tonic-gate return (0); 21497c478bd9Sstevel@tonic-gate } 21507c478bd9Sstevel@tonic-gate 21517c478bd9Sstevel@tonic-gate static rdma_stat 21527c478bd9Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep) 21537c478bd9Sstevel@tonic-gate { 21547c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 21557c478bd9Sstevel@tonic-gate if (rep != NULL) { 21567c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, rep); 21577c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 21587c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 21597c478bd9Sstevel@tonic-gate } 21607c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 21617c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 21627c478bd9Sstevel@tonic-gate } 21637c478bd9Sstevel@tonic-gate 21647c478bd9Sstevel@tonic-gate /* 21657c478bd9Sstevel@tonic-gate * Send buffers are freed here only in case of error in posting 21667c478bd9Sstevel@tonic-gate * on QP. If the post succeeded, the send buffers are freed upon 21677c478bd9Sstevel@tonic-gate * send completion in rib_sendwait() or in the scq_handler. 21687c478bd9Sstevel@tonic-gate */ 21697c478bd9Sstevel@tonic-gate rdma_stat 21707c478bd9Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid, 2171*0a701b1eSRobert Gordon int send_sig, int cv_sig, caddr_t *swid) 21727c478bd9Sstevel@tonic-gate { 21737c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 21747c478bd9Sstevel@tonic-gate struct clist *clp; 21757c478bd9Sstevel@tonic-gate ibt_status_t ibt_status = IBT_SUCCESS; 21767c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 21777c478bd9Sstevel@tonic-gate ibt_send_wr_t tx_wr; 21787c478bd9Sstevel@tonic-gate int i, nds; 21797c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 21807c478bd9Sstevel@tonic-gate uint_t total_msg_size; 2181*0a701b1eSRobert Gordon rib_qp_t *qp; 2182*0a701b1eSRobert Gordon 2183*0a701b1eSRobert Gordon qp = ctoqp(conn); 21847c478bd9Sstevel@tonic-gate 21857c478bd9Sstevel@tonic-gate ASSERT(cl != NULL); 21867c478bd9Sstevel@tonic-gate 21877c478bd9Sstevel@tonic-gate bzero(&tx_wr, sizeof (ibt_send_wr_t)); 21887c478bd9Sstevel@tonic-gate 21897c478bd9Sstevel@tonic-gate nds = 0; 21907c478bd9Sstevel@tonic-gate total_msg_size = 0; 21917c478bd9Sstevel@tonic-gate clp = cl; 21927c478bd9Sstevel@tonic-gate while (clp != NULL) { 21937c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 2194*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded); 21957c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 21967c478bd9Sstevel@tonic-gate } 2197*0a701b1eSRobert Gordon sgl[nds].ds_va = clp->w.c_saddr; 21987c478bd9Sstevel@tonic-gate sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */ 21997c478bd9Sstevel@tonic-gate sgl[nds].ds_len = clp->c_len; 22007c478bd9Sstevel@tonic-gate total_msg_size += clp->c_len; 22017c478bd9Sstevel@tonic-gate clp = clp->c_next; 22027c478bd9Sstevel@tonic-gate nds++; 22037c478bd9Sstevel@tonic-gate } 22047c478bd9Sstevel@tonic-gate 22057c478bd9Sstevel@tonic-gate if (send_sig) { 22067c478bd9Sstevel@tonic-gate /* Set SEND_SIGNAL flag. */ 22077c478bd9Sstevel@tonic-gate tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 22087c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(msgid, cv_sig, qp); 2209*0a701b1eSRobert Gordon *swid = (caddr_t)wdesc; 22107c478bd9Sstevel@tonic-gate } else { 22117c478bd9Sstevel@tonic-gate tx_wr.wr_flags = IBT_WR_NO_FLAGS; 22127c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(msgid, 0, qp); 2213*0a701b1eSRobert Gordon *swid = (caddr_t)wdesc; 22147c478bd9Sstevel@tonic-gate } 22157c478bd9Sstevel@tonic-gate wdesc->nsbufs = nds; 22167c478bd9Sstevel@tonic-gate for (i = 0; i < nds; i++) { 22177c478bd9Sstevel@tonic-gate wdesc->sbufaddr[i] = sgl[i].ds_va; 22187c478bd9Sstevel@tonic-gate } 22197c478bd9Sstevel@tonic-gate 222011606941Sjwahlig tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 22217c478bd9Sstevel@tonic-gate tx_wr.wr_opcode = IBT_WRC_SEND; 22227c478bd9Sstevel@tonic-gate tx_wr.wr_trans = IBT_RC_SRV; 22237c478bd9Sstevel@tonic-gate tx_wr.wr_nds = nds; 22247c478bd9Sstevel@tonic-gate tx_wr.wr_sgl = sgl; 22257c478bd9Sstevel@tonic-gate 22267c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 2227*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 22287c478bd9Sstevel@tonic-gate ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 22297c478bd9Sstevel@tonic-gate } 2230*0a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 22317c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 2232*0a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 2233*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 22347c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 22357c478bd9Sstevel@tonic-gate for (i = 0; i < nds; i++) { 22367c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, SEND_BUFFER, 223711606941Sjwahlig (void *)(uintptr_t)wdesc->sbufaddr[i]); 22387c478bd9Sstevel@tonic-gate } 2239*0a701b1eSRobert Gordon 22407c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2241*0a701b1eSRobert Gordon 2242*0a701b1eSRobert Gordon return (RDMA_CONNLOST); 22437c478bd9Sstevel@tonic-gate } 22447c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 22457c478bd9Sstevel@tonic-gate 22467c478bd9Sstevel@tonic-gate if (send_sig) { 22477c478bd9Sstevel@tonic-gate if (cv_sig) { 22487c478bd9Sstevel@tonic-gate /* 22497c478bd9Sstevel@tonic-gate * cv_wait for send to complete. 22507c478bd9Sstevel@tonic-gate * We can fail due to a timeout or signal or 22517c478bd9Sstevel@tonic-gate * unsuccessful send. 22527c478bd9Sstevel@tonic-gate */ 22537c478bd9Sstevel@tonic-gate ret = rib_sendwait(qp, wdesc); 2254*0a701b1eSRobert Gordon 22557c478bd9Sstevel@tonic-gate return (ret); 22567c478bd9Sstevel@tonic-gate } 22577c478bd9Sstevel@tonic-gate } 22587c478bd9Sstevel@tonic-gate 22597c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 22607c478bd9Sstevel@tonic-gate } 22617c478bd9Sstevel@tonic-gate 2262*0a701b1eSRobert Gordon 22637c478bd9Sstevel@tonic-gate rdma_stat 22647c478bd9Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid) 22657c478bd9Sstevel@tonic-gate { 22667c478bd9Sstevel@tonic-gate rdma_stat ret; 2267*0a701b1eSRobert Gordon caddr_t wd; 22687c478bd9Sstevel@tonic-gate 22697c478bd9Sstevel@tonic-gate /* send-wait & cv_signal */ 2270*0a701b1eSRobert Gordon ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd); 22717c478bd9Sstevel@tonic-gate return (ret); 22727c478bd9Sstevel@tonic-gate } 22737c478bd9Sstevel@tonic-gate 22747c478bd9Sstevel@tonic-gate /* 22757c478bd9Sstevel@tonic-gate * Server interface (svc_rdma_ksend). 22767c478bd9Sstevel@tonic-gate * Send RPC reply and wait for RDMA_DONE. 22777c478bd9Sstevel@tonic-gate */ 22787c478bd9Sstevel@tonic-gate rdma_stat 22797c478bd9Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid) 22807c478bd9Sstevel@tonic-gate { 22817c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 22827c478bd9Sstevel@tonic-gate struct rdma_done_list *rd; 22837c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 2284*0a701b1eSRobert Gordon caddr_t *wid = NULL; 22857c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 22867c478bd9Sstevel@tonic-gate 22877c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 22887c478bd9Sstevel@tonic-gate rd = rdma_done_add(qp, msgid); 22897c478bd9Sstevel@tonic-gate 22907c478bd9Sstevel@tonic-gate /* No cv_signal (whether send-wait or no-send-wait) */ 2291*0a701b1eSRobert Gordon ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid); 22927c478bd9Sstevel@tonic-gate 2293*0a701b1eSRobert Gordon if (ret != RDMA_SUCCESS) { 2294*0a701b1eSRobert Gordon rdma_done_rm(qp, rd); 2295*0a701b1eSRobert Gordon } else { 22967c478bd9Sstevel@tonic-gate /* 22977c478bd9Sstevel@tonic-gate * Wait for RDMA_DONE from remote end 22987c478bd9Sstevel@tonic-gate */ 2299*0a701b1eSRobert Gordon timout = 2300*0a701b1eSRobert Gordon drv_usectohz(REPLY_WAIT_TIME * 1000000) + ddi_get_lbolt(); 2301*0a701b1eSRobert Gordon cv_wait_ret = cv_timedwait(&rd->rdma_done_cv, 2302*0a701b1eSRobert Gordon &qp->rdlist_lock, 23037c478bd9Sstevel@tonic-gate timout); 2304*0a701b1eSRobert Gordon 23057c478bd9Sstevel@tonic-gate rdma_done_rm(qp, rd); 2306*0a701b1eSRobert Gordon 23077c478bd9Sstevel@tonic-gate if (cv_wait_ret < 0) { 23087c478bd9Sstevel@tonic-gate ret = RDMA_TIMEDOUT; 2309*0a701b1eSRobert Gordon } 23107c478bd9Sstevel@tonic-gate } 23117c478bd9Sstevel@tonic-gate 23127c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 23137c478bd9Sstevel@tonic-gate return (ret); 23147c478bd9Sstevel@tonic-gate } 23157c478bd9Sstevel@tonic-gate 23167c478bd9Sstevel@tonic-gate static struct recv_wid * 23177c478bd9Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid) 23187c478bd9Sstevel@tonic-gate { 23197c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 23207c478bd9Sstevel@tonic-gate 23217c478bd9Sstevel@tonic-gate rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP); 23227c478bd9Sstevel@tonic-gate rwid->xid = msgid; 23237c478bd9Sstevel@tonic-gate rwid->addr = sgl->ds_va; 23247c478bd9Sstevel@tonic-gate rwid->qp = qp; 23257c478bd9Sstevel@tonic-gate 23267c478bd9Sstevel@tonic-gate return (rwid); 23277c478bd9Sstevel@tonic-gate } 23287c478bd9Sstevel@tonic-gate 23297c478bd9Sstevel@tonic-gate static void 23307c478bd9Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid) 23317c478bd9Sstevel@tonic-gate { 23327c478bd9Sstevel@tonic-gate kmem_free(rwid, sizeof (struct recv_wid)); 23337c478bd9Sstevel@tonic-gate } 23347c478bd9Sstevel@tonic-gate 23357c478bd9Sstevel@tonic-gate rdma_stat 23367c478bd9Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid) 23377c478bd9Sstevel@tonic-gate { 23387c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 23397c478bd9Sstevel@tonic-gate struct clist *clp = cl; 23407c478bd9Sstevel@tonic-gate struct reply *rep; 23417c478bd9Sstevel@tonic-gate struct recv_wid *rwid; 23427c478bd9Sstevel@tonic-gate int nds; 23437c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 23447c478bd9Sstevel@tonic-gate ibt_recv_wr_t recv_wr; 23457c478bd9Sstevel@tonic-gate rdma_stat ret; 23467c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 23477c478bd9Sstevel@tonic-gate 23487c478bd9Sstevel@tonic-gate /* 23497c478bd9Sstevel@tonic-gate * rdma_clnt_postrecv uses RECV_BUFFER. 23507c478bd9Sstevel@tonic-gate */ 23517c478bd9Sstevel@tonic-gate 23527c478bd9Sstevel@tonic-gate nds = 0; 23537c478bd9Sstevel@tonic-gate while (cl != NULL) { 23547c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 23557c478bd9Sstevel@tonic-gate ret = RDMA_FAILED; 23567c478bd9Sstevel@tonic-gate goto done; 23577c478bd9Sstevel@tonic-gate } 2358*0a701b1eSRobert Gordon sgl[nds].ds_va = cl->w.c_saddr; 23597c478bd9Sstevel@tonic-gate sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 23607c478bd9Sstevel@tonic-gate sgl[nds].ds_len = cl->c_len; 23617c478bd9Sstevel@tonic-gate cl = cl->c_next; 23627c478bd9Sstevel@tonic-gate nds++; 23637c478bd9Sstevel@tonic-gate } 23647c478bd9Sstevel@tonic-gate 23657c478bd9Sstevel@tonic-gate if (nds != 1) { 23667c478bd9Sstevel@tonic-gate ret = RDMA_FAILED; 23677c478bd9Sstevel@tonic-gate goto done; 23687c478bd9Sstevel@tonic-gate } 2369*0a701b1eSRobert Gordon 23707c478bd9Sstevel@tonic-gate bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 23717c478bd9Sstevel@tonic-gate recv_wr.wr_nds = nds; 23727c478bd9Sstevel@tonic-gate recv_wr.wr_sgl = sgl; 23737c478bd9Sstevel@tonic-gate 23747c478bd9Sstevel@tonic-gate rwid = rib_create_wid(qp, &sgl[0], msgid); 23757c478bd9Sstevel@tonic-gate if (rwid) { 237611606941Sjwahlig recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid; 23777c478bd9Sstevel@tonic-gate } else { 23787c478bd9Sstevel@tonic-gate ret = RDMA_NORESOURCE; 23797c478bd9Sstevel@tonic-gate goto done; 23807c478bd9Sstevel@tonic-gate } 23817c478bd9Sstevel@tonic-gate rep = rib_addreplylist(qp, msgid); 23827c478bd9Sstevel@tonic-gate if (!rep) { 23837c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 23847c478bd9Sstevel@tonic-gate ret = RDMA_NORESOURCE; 23857c478bd9Sstevel@tonic-gate goto done; 23867c478bd9Sstevel@tonic-gate } 23877c478bd9Sstevel@tonic-gate 23887c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 2389*0a701b1eSRobert Gordon 2390*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 23917c478bd9Sstevel@tonic-gate ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 23927c478bd9Sstevel@tonic-gate } 2393*0a701b1eSRobert Gordon 2394*0a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 23957c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 2396*0a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 2397*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 23987c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 23997c478bd9Sstevel@tonic-gate rib_free_wid(rwid); 24007c478bd9Sstevel@tonic-gate (void) rib_rem_rep(qp, rep); 2401*0a701b1eSRobert Gordon ret = RDMA_CONNLOST; 24027c478bd9Sstevel@tonic-gate goto done; 24037c478bd9Sstevel@tonic-gate } 24047c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 24057c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 24067c478bd9Sstevel@tonic-gate 24077c478bd9Sstevel@tonic-gate done: 24087c478bd9Sstevel@tonic-gate while (clp != NULL) { 2409*0a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 2410*0a701b1eSRobert Gordon (void *)(uintptr_t)clp->w.c_saddr3); 24117c478bd9Sstevel@tonic-gate clp = clp->c_next; 24127c478bd9Sstevel@tonic-gate } 24137c478bd9Sstevel@tonic-gate return (ret); 24147c478bd9Sstevel@tonic-gate } 24157c478bd9Sstevel@tonic-gate 24167c478bd9Sstevel@tonic-gate rdma_stat 24177c478bd9Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl) 24187c478bd9Sstevel@tonic-gate { 24197c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 24207c478bd9Sstevel@tonic-gate struct svc_recv *s_recvp; 24217c478bd9Sstevel@tonic-gate int nds; 24227c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 24237c478bd9Sstevel@tonic-gate ibt_recv_wr_t recv_wr; 24247c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 24257c478bd9Sstevel@tonic-gate 24267c478bd9Sstevel@tonic-gate nds = 0; 24277c478bd9Sstevel@tonic-gate while (cl != NULL) { 24287c478bd9Sstevel@tonic-gate if (nds >= DSEG_MAX) { 24297c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 24307c478bd9Sstevel@tonic-gate } 2431*0a701b1eSRobert Gordon sgl[nds].ds_va = cl->w.c_saddr; 24327c478bd9Sstevel@tonic-gate sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 24337c478bd9Sstevel@tonic-gate sgl[nds].ds_len = cl->c_len; 24347c478bd9Sstevel@tonic-gate cl = cl->c_next; 24357c478bd9Sstevel@tonic-gate nds++; 24367c478bd9Sstevel@tonic-gate } 24377c478bd9Sstevel@tonic-gate 24387c478bd9Sstevel@tonic-gate if (nds != 1) { 2439*0a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 2440*0a701b1eSRobert Gordon (caddr_t)(uintptr_t)sgl[0].ds_va); 2441*0a701b1eSRobert Gordon 24427c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 24437c478bd9Sstevel@tonic-gate } 2444*0a701b1eSRobert Gordon 24457c478bd9Sstevel@tonic-gate bzero(&recv_wr, sizeof (ibt_recv_wr_t)); 24467c478bd9Sstevel@tonic-gate recv_wr.wr_nds = nds; 24477c478bd9Sstevel@tonic-gate recv_wr.wr_sgl = sgl; 24487c478bd9Sstevel@tonic-gate 24497c478bd9Sstevel@tonic-gate s_recvp = rib_init_svc_recv(qp, &sgl[0]); 245011606941Sjwahlig /* Use s_recvp's addr as wr id */ 245111606941Sjwahlig recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp; 24527c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 2453*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 24547c478bd9Sstevel@tonic-gate ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL); 24557c478bd9Sstevel@tonic-gate } 2456*0a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 24577c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 2458*0a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 2459*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 24607c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 246111606941Sjwahlig rib_rbuf_free(conn, RECV_BUFFER, 246211606941Sjwahlig (caddr_t)(uintptr_t)sgl[0].ds_va); 24637c478bd9Sstevel@tonic-gate (void) rib_free_svc_recv(s_recvp); 2464*0a701b1eSRobert Gordon 2465*0a701b1eSRobert Gordon return (RDMA_CONNLOST); 24667c478bd9Sstevel@tonic-gate } 24677c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 24687c478bd9Sstevel@tonic-gate 24697c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 24707c478bd9Sstevel@tonic-gate } 24717c478bd9Sstevel@tonic-gate 24727c478bd9Sstevel@tonic-gate /* Client */ 24737c478bd9Sstevel@tonic-gate rdma_stat 24747c478bd9Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid) 24757c478bd9Sstevel@tonic-gate { 24767c478bd9Sstevel@tonic-gate 24777c478bd9Sstevel@tonic-gate return (rib_clnt_post(conn, cl, msgid)); 24787c478bd9Sstevel@tonic-gate } 24797c478bd9Sstevel@tonic-gate 2480*0a701b1eSRobert Gordon /* Client */ 2481*0a701b1eSRobert Gordon rdma_stat 2482*0a701b1eSRobert Gordon rib_post_resp_remove(CONN* conn, uint32_t msgid) 2483*0a701b1eSRobert Gordon { 2484*0a701b1eSRobert Gordon rib_qp_t *qp = ctoqp(conn); 2485*0a701b1eSRobert Gordon struct reply *rep; 2486*0a701b1eSRobert Gordon 2487*0a701b1eSRobert Gordon mutex_enter(&qp->replylist_lock); 2488*0a701b1eSRobert Gordon for (rep = qp->replylist; rep != NULL; rep = rep->next) { 2489*0a701b1eSRobert Gordon if (rep->xid == msgid) { 2490*0a701b1eSRobert Gordon if (rep->vaddr_cq) { 2491*0a701b1eSRobert Gordon rib_rbuf_free(conn, RECV_BUFFER, 2492*0a701b1eSRobert Gordon (caddr_t)(uintptr_t)rep->vaddr_cq); 2493*0a701b1eSRobert Gordon } 2494*0a701b1eSRobert Gordon (void) rib_remreply(qp, rep); 2495*0a701b1eSRobert Gordon break; 2496*0a701b1eSRobert Gordon } 2497*0a701b1eSRobert Gordon } 2498*0a701b1eSRobert Gordon mutex_exit(&qp->replylist_lock); 2499*0a701b1eSRobert Gordon 2500*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 2501*0a701b1eSRobert Gordon } 2502*0a701b1eSRobert Gordon 25037c478bd9Sstevel@tonic-gate /* Server */ 25047c478bd9Sstevel@tonic-gate rdma_stat 25057c478bd9Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl) 25067c478bd9Sstevel@tonic-gate { 25077c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 25087c478bd9Sstevel@tonic-gate 25097c478bd9Sstevel@tonic-gate if (rib_svc_post(conn, cl) == RDMA_SUCCESS) { 25107c478bd9Sstevel@tonic-gate mutex_enter(&qp->posted_rbufs_lock); 25117c478bd9Sstevel@tonic-gate qp->n_posted_rbufs++; 25127c478bd9Sstevel@tonic-gate mutex_exit(&qp->posted_rbufs_lock); 25137c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 25147c478bd9Sstevel@tonic-gate } 25157c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 25167c478bd9Sstevel@tonic-gate } 25177c478bd9Sstevel@tonic-gate 25187c478bd9Sstevel@tonic-gate /* 25197c478bd9Sstevel@tonic-gate * Client side only interface to "recv" the rpc reply buf 25207c478bd9Sstevel@tonic-gate * posted earlier by rib_post_resp(conn, cl, msgid). 25217c478bd9Sstevel@tonic-gate */ 25227c478bd9Sstevel@tonic-gate rdma_stat 25237c478bd9Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid) 25247c478bd9Sstevel@tonic-gate { 25257c478bd9Sstevel@tonic-gate struct reply *rep = NULL; 25267c478bd9Sstevel@tonic-gate clock_t timout, cv_wait_ret; 25277c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 25287c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 25297c478bd9Sstevel@tonic-gate 25307c478bd9Sstevel@tonic-gate /* 25317c478bd9Sstevel@tonic-gate * Find the reply structure for this msgid 25327c478bd9Sstevel@tonic-gate */ 25337c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 25347c478bd9Sstevel@tonic-gate 25357c478bd9Sstevel@tonic-gate for (rep = qp->replylist; rep != NULL; rep = rep->next) { 25367c478bd9Sstevel@tonic-gate if (rep->xid == msgid) 25377c478bd9Sstevel@tonic-gate break; 25387c478bd9Sstevel@tonic-gate } 2539*0a701b1eSRobert Gordon 25407c478bd9Sstevel@tonic-gate if (rep != NULL) { 25417c478bd9Sstevel@tonic-gate /* 25427c478bd9Sstevel@tonic-gate * If message not yet received, wait. 25437c478bd9Sstevel@tonic-gate */ 25447c478bd9Sstevel@tonic-gate if (rep->status == (uint_t)REPLY_WAIT) { 25457c478bd9Sstevel@tonic-gate timout = ddi_get_lbolt() + 25467c478bd9Sstevel@tonic-gate drv_usectohz(REPLY_WAIT_TIME * 1000000); 2547*0a701b1eSRobert Gordon 25487c478bd9Sstevel@tonic-gate while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv, 25497c478bd9Sstevel@tonic-gate &qp->replylist_lock, timout)) > 0 && 2550*0a701b1eSRobert Gordon rep->status == (uint_t)REPLY_WAIT) 2551*0a701b1eSRobert Gordon ; 25527c478bd9Sstevel@tonic-gate 25537c478bd9Sstevel@tonic-gate switch (cv_wait_ret) { 25547c478bd9Sstevel@tonic-gate case -1: /* timeout */ 25557c478bd9Sstevel@tonic-gate ret = RDMA_TIMEDOUT; 25567c478bd9Sstevel@tonic-gate break; 25577c478bd9Sstevel@tonic-gate case 0: 25587c478bd9Sstevel@tonic-gate ret = RDMA_INTR; 25597c478bd9Sstevel@tonic-gate break; 25607c478bd9Sstevel@tonic-gate default: 25617c478bd9Sstevel@tonic-gate break; 25627c478bd9Sstevel@tonic-gate } 25637c478bd9Sstevel@tonic-gate } 25647c478bd9Sstevel@tonic-gate 25657c478bd9Sstevel@tonic-gate if (rep->status == RDMA_SUCCESS) { 25667c478bd9Sstevel@tonic-gate struct clist *cl = NULL; 25677c478bd9Sstevel@tonic-gate 25687c478bd9Sstevel@tonic-gate /* 25697c478bd9Sstevel@tonic-gate * Got message successfully 25707c478bd9Sstevel@tonic-gate */ 25717c478bd9Sstevel@tonic-gate clist_add(&cl, 0, rep->bytes_xfer, NULL, 257211606941Sjwahlig (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL); 25737c478bd9Sstevel@tonic-gate *clp = cl; 25747c478bd9Sstevel@tonic-gate } else { 25757c478bd9Sstevel@tonic-gate if (rep->status != (uint_t)REPLY_WAIT) { 25767c478bd9Sstevel@tonic-gate /* 25777c478bd9Sstevel@tonic-gate * Got error in reply message. Free 25787c478bd9Sstevel@tonic-gate * recv buffer here. 25797c478bd9Sstevel@tonic-gate */ 25807c478bd9Sstevel@tonic-gate ret = rep->status; 25817c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, RECV_BUFFER, 258211606941Sjwahlig (caddr_t)(uintptr_t)rep->vaddr_cq); 25837c478bd9Sstevel@tonic-gate } 25847c478bd9Sstevel@tonic-gate } 25857c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, rep); 25867c478bd9Sstevel@tonic-gate } else { 25877c478bd9Sstevel@tonic-gate /* 25887c478bd9Sstevel@tonic-gate * No matching reply structure found for given msgid on the 25897c478bd9Sstevel@tonic-gate * reply wait list. 25907c478bd9Sstevel@tonic-gate */ 25917c478bd9Sstevel@tonic-gate ret = RDMA_INVAL; 2592*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__nomatchxid2); 25937c478bd9Sstevel@tonic-gate } 25947c478bd9Sstevel@tonic-gate 25957c478bd9Sstevel@tonic-gate /* 25967c478bd9Sstevel@tonic-gate * Done. 25977c478bd9Sstevel@tonic-gate */ 25987c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 25997c478bd9Sstevel@tonic-gate return (ret); 26007c478bd9Sstevel@tonic-gate } 26017c478bd9Sstevel@tonic-gate 26027c478bd9Sstevel@tonic-gate /* 26037c478bd9Sstevel@tonic-gate * RDMA write a buffer to the remote address. 26047c478bd9Sstevel@tonic-gate */ 26057c478bd9Sstevel@tonic-gate rdma_stat 26067c478bd9Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait) 26077c478bd9Sstevel@tonic-gate { 26087c478bd9Sstevel@tonic-gate ibt_send_wr_t tx_wr; 26097c478bd9Sstevel@tonic-gate int cv_sig; 2610*0a701b1eSRobert Gordon int i; 26117c478bd9Sstevel@tonic-gate ibt_wr_ds_t sgl[DSEG_MAX]; 26127c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 26137c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 26147c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 26157c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 2616*0a701b1eSRobert Gordon uint64_t n_writes = 0; 2617*0a701b1eSRobert Gordon bool_t force_wait = FALSE; 26187c478bd9Sstevel@tonic-gate 26197c478bd9Sstevel@tonic-gate if (cl == NULL) { 26207c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 26217c478bd9Sstevel@tonic-gate } 26227c478bd9Sstevel@tonic-gate 2623*0a701b1eSRobert Gordon 2624*0a701b1eSRobert Gordon while ((cl != NULL)) { 2625*0a701b1eSRobert Gordon if (cl->c_len > 0) { 26267c478bd9Sstevel@tonic-gate bzero(&tx_wr, sizeof (ibt_send_wr_t)); 2627*0a701b1eSRobert Gordon tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr; 2628*0a701b1eSRobert Gordon tx_wr.wr.rc.rcwr.rdma.rdma_rkey = 2629*0a701b1eSRobert Gordon cl->c_dmemhandle.mrc_rmr; /* rkey */ 2630*0a701b1eSRobert Gordon sgl[0].ds_va = cl->w.c_saddr; 2631*0a701b1eSRobert Gordon sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */ 2632*0a701b1eSRobert Gordon sgl[0].ds_len = cl->c_len; 26337c478bd9Sstevel@tonic-gate 26347c478bd9Sstevel@tonic-gate if (wait) { 26357c478bd9Sstevel@tonic-gate tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 26367c478bd9Sstevel@tonic-gate cv_sig = 1; 26377c478bd9Sstevel@tonic-gate } else { 2638*0a701b1eSRobert Gordon if (n_writes > max_unsignaled_rws) { 2639*0a701b1eSRobert Gordon n_writes = 0; 2640*0a701b1eSRobert Gordon force_wait = TRUE; 2641*0a701b1eSRobert Gordon tx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 2642*0a701b1eSRobert Gordon cv_sig = 1; 2643*0a701b1eSRobert Gordon } else { 26447c478bd9Sstevel@tonic-gate tx_wr.wr_flags = IBT_WR_NO_FLAGS; 26457c478bd9Sstevel@tonic-gate cv_sig = 0; 26467c478bd9Sstevel@tonic-gate } 2647*0a701b1eSRobert Gordon } 26487c478bd9Sstevel@tonic-gate 26497c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(0, cv_sig, qp); 265011606941Sjwahlig tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 26517c478bd9Sstevel@tonic-gate tx_wr.wr_opcode = IBT_WRC_RDMAW; 26527c478bd9Sstevel@tonic-gate tx_wr.wr_trans = IBT_RC_SRV; 2653*0a701b1eSRobert Gordon tx_wr.wr_nds = 1; 26547c478bd9Sstevel@tonic-gate tx_wr.wr_sgl = sgl; 26557c478bd9Sstevel@tonic-gate 26567c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 2657*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 2658*0a701b1eSRobert Gordon ibt_status = 2659*0a701b1eSRobert Gordon ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL); 26607c478bd9Sstevel@tonic-gate } 2661*0a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 26627c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 2663*0a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 2664*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 26657c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 26667c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2667*0a701b1eSRobert Gordon return (RDMA_CONNLOST); 26687c478bd9Sstevel@tonic-gate } 26697c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 26707c478bd9Sstevel@tonic-gate 26717c478bd9Sstevel@tonic-gate /* 26727c478bd9Sstevel@tonic-gate * Wait for send to complete 26737c478bd9Sstevel@tonic-gate */ 2674*0a701b1eSRobert Gordon if (wait || force_wait) { 2675*0a701b1eSRobert Gordon force_wait = FALSE; 26767c478bd9Sstevel@tonic-gate ret = rib_sendwait(qp, wdesc); 26777c478bd9Sstevel@tonic-gate if (ret != 0) { 26787c478bd9Sstevel@tonic-gate return (ret); 26797c478bd9Sstevel@tonic-gate } 2680*0a701b1eSRobert Gordon } else { 2681*0a701b1eSRobert Gordon mutex_enter(&wdesc->sendwait_lock); 2682*0a701b1eSRobert Gordon for (i = 0; i < wdesc->nsbufs; i++) { 2683*0a701b1eSRobert Gordon rib_rbuf_free(qptoc(qp), SEND_BUFFER, 2684*0a701b1eSRobert Gordon (void *)(uintptr_t) 2685*0a701b1eSRobert Gordon wdesc->sbufaddr[i]); 2686*0a701b1eSRobert Gordon } 2687*0a701b1eSRobert Gordon mutex_exit(&wdesc->sendwait_lock); 2688*0a701b1eSRobert Gordon (void) rib_free_sendwait(wdesc); 2689*0a701b1eSRobert Gordon } 2690*0a701b1eSRobert Gordon n_writes ++; 2691*0a701b1eSRobert Gordon } 2692*0a701b1eSRobert Gordon cl = cl->c_next; 26937c478bd9Sstevel@tonic-gate } 26947c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 26957c478bd9Sstevel@tonic-gate } 26967c478bd9Sstevel@tonic-gate 26977c478bd9Sstevel@tonic-gate /* 26987c478bd9Sstevel@tonic-gate * RDMA Read a buffer from the remote address. 26997c478bd9Sstevel@tonic-gate */ 27007c478bd9Sstevel@tonic-gate rdma_stat 27017c478bd9Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait) 27027c478bd9Sstevel@tonic-gate { 27037c478bd9Sstevel@tonic-gate ibt_send_wr_t rx_wr; 27047c478bd9Sstevel@tonic-gate int cv_sig; 2705*0a701b1eSRobert Gordon int i; 2706*0a701b1eSRobert Gordon ibt_wr_ds_t sgl; 27077c478bd9Sstevel@tonic-gate struct send_wid *wdesc; 27087c478bd9Sstevel@tonic-gate ibt_status_t ibt_status = IBT_SUCCESS; 27097c478bd9Sstevel@tonic-gate rdma_stat ret = RDMA_SUCCESS; 27107c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 27117c478bd9Sstevel@tonic-gate 27127c478bd9Sstevel@tonic-gate if (cl == NULL) { 27137c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 27147c478bd9Sstevel@tonic-gate } 27157c478bd9Sstevel@tonic-gate 2716*0a701b1eSRobert Gordon while (cl != NULL) { 27177c478bd9Sstevel@tonic-gate bzero(&rx_wr, sizeof (ibt_send_wr_t)); 27187c478bd9Sstevel@tonic-gate /* 27197c478bd9Sstevel@tonic-gate * Remote address is at the head chunk item in list. 27207c478bd9Sstevel@tonic-gate */ 2721*0a701b1eSRobert Gordon rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr; 2722*0a701b1eSRobert Gordon rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr; 27237c478bd9Sstevel@tonic-gate 2724*0a701b1eSRobert Gordon sgl.ds_va = cl->u.c_daddr; 2725*0a701b1eSRobert Gordon sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */ 2726*0a701b1eSRobert Gordon sgl.ds_len = cl->c_len; 27277c478bd9Sstevel@tonic-gate 27287c478bd9Sstevel@tonic-gate if (wait) { 27297c478bd9Sstevel@tonic-gate rx_wr.wr_flags = IBT_WR_SEND_SIGNAL; 27307c478bd9Sstevel@tonic-gate cv_sig = 1; 27317c478bd9Sstevel@tonic-gate } else { 27327c478bd9Sstevel@tonic-gate rx_wr.wr_flags = IBT_WR_NO_FLAGS; 27337c478bd9Sstevel@tonic-gate cv_sig = 0; 27347c478bd9Sstevel@tonic-gate } 27357c478bd9Sstevel@tonic-gate 27367c478bd9Sstevel@tonic-gate wdesc = rib_init_sendwait(0, cv_sig, qp); 273711606941Sjwahlig rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc; 27387c478bd9Sstevel@tonic-gate rx_wr.wr_opcode = IBT_WRC_RDMAR; 27397c478bd9Sstevel@tonic-gate rx_wr.wr_trans = IBT_RC_SRV; 2740*0a701b1eSRobert Gordon rx_wr.wr_nds = 1; 2741*0a701b1eSRobert Gordon rx_wr.wr_sgl = &sgl; 27427c478bd9Sstevel@tonic-gate 27437c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 2744*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 27457c478bd9Sstevel@tonic-gate ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL); 27467c478bd9Sstevel@tonic-gate } 2747*0a701b1eSRobert Gordon if (conn->c_state != C_CONNECTED || 27487c478bd9Sstevel@tonic-gate ibt_status != IBT_SUCCESS) { 2749*0a701b1eSRobert Gordon if (conn->c_state != C_DISCONN_PEND) 2750*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 27517c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 27527c478bd9Sstevel@tonic-gate (void) rib_free_sendwait(wdesc); 2753*0a701b1eSRobert Gordon return (RDMA_CONNLOST); 27547c478bd9Sstevel@tonic-gate } 27557c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 27567c478bd9Sstevel@tonic-gate 27577c478bd9Sstevel@tonic-gate /* 2758*0a701b1eSRobert Gordon * Wait for send to complete if this is the 2759*0a701b1eSRobert Gordon * last item in the list. 27607c478bd9Sstevel@tonic-gate */ 2761*0a701b1eSRobert Gordon if (wait && cl->c_next == NULL) { 27627c478bd9Sstevel@tonic-gate ret = rib_sendwait(qp, wdesc); 27637c478bd9Sstevel@tonic-gate if (ret != 0) { 27647c478bd9Sstevel@tonic-gate return (ret); 27657c478bd9Sstevel@tonic-gate } 2766*0a701b1eSRobert Gordon } else { 2767*0a701b1eSRobert Gordon mutex_enter(&wdesc->sendwait_lock); 2768*0a701b1eSRobert Gordon for (i = 0; i < wdesc->nsbufs; i++) { 2769*0a701b1eSRobert Gordon rib_rbuf_free(qptoc(qp), SEND_BUFFER, 2770*0a701b1eSRobert Gordon (void *)(uintptr_t)wdesc->sbufaddr[i]); 27717c478bd9Sstevel@tonic-gate } 2772*0a701b1eSRobert Gordon mutex_exit(&wdesc->sendwait_lock); 2773*0a701b1eSRobert Gordon (void) rib_free_sendwait(wdesc); 2774*0a701b1eSRobert Gordon } 2775*0a701b1eSRobert Gordon cl = cl->c_next; 2776*0a701b1eSRobert Gordon } 27777c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 27787c478bd9Sstevel@tonic-gate } 27797c478bd9Sstevel@tonic-gate 27807c478bd9Sstevel@tonic-gate /* 27817c478bd9Sstevel@tonic-gate * rib_srv_cm_handler() 27827c478bd9Sstevel@tonic-gate * Connection Manager callback to handle RC connection requests. 27837c478bd9Sstevel@tonic-gate */ 27847c478bd9Sstevel@tonic-gate /* ARGSUSED */ 27857c478bd9Sstevel@tonic-gate static ibt_cm_status_t 27867c478bd9Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event, 27877c478bd9Sstevel@tonic-gate ibt_cm_return_args_t *ret_args, void *priv_data, 27887c478bd9Sstevel@tonic-gate ibt_priv_data_len_t len) 27897c478bd9Sstevel@tonic-gate { 27907c478bd9Sstevel@tonic-gate queue_t *q; 27917c478bd9Sstevel@tonic-gate rib_qp_t *qp; 27927c478bd9Sstevel@tonic-gate rpcib_state_t *ribstat; 27937c478bd9Sstevel@tonic-gate rib_hca_t *hca; 27947c478bd9Sstevel@tonic-gate rdma_stat status = RDMA_SUCCESS; 27957c478bd9Sstevel@tonic-gate int i; 27967c478bd9Sstevel@tonic-gate struct clist cl; 2797*0a701b1eSRobert Gordon rdma_buf_t rdbuf = {0}; 27987c478bd9Sstevel@tonic-gate void *buf = NULL; 27997c478bd9Sstevel@tonic-gate CONN *conn; 2800*0a701b1eSRobert Gordon ibt_ip_cm_info_t ipinfo; 2801*0a701b1eSRobert Gordon struct sockaddr_in *s; 2802*0a701b1eSRobert Gordon struct sockaddr_in6 *s6; 2803*0a701b1eSRobert Gordon int sin_size = sizeof (struct sockaddr_in); 2804*0a701b1eSRobert Gordon int in_size = sizeof (struct in_addr); 2805*0a701b1eSRobert Gordon int sin6_size = sizeof (struct sockaddr_in6); 28067c478bd9Sstevel@tonic-gate 28077c478bd9Sstevel@tonic-gate ASSERT(any != NULL); 28087c478bd9Sstevel@tonic-gate ASSERT(event != NULL); 28097c478bd9Sstevel@tonic-gate 28107c478bd9Sstevel@tonic-gate ribstat = (rpcib_state_t *)any; 28117c478bd9Sstevel@tonic-gate hca = (rib_hca_t *)ribstat->hca; 28127c478bd9Sstevel@tonic-gate ASSERT(hca != NULL); 28137c478bd9Sstevel@tonic-gate 28147c478bd9Sstevel@tonic-gate /* got a connection request */ 28157c478bd9Sstevel@tonic-gate switch (event->cm_type) { 28167c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_REQ_RCV: 28177c478bd9Sstevel@tonic-gate /* 28187c478bd9Sstevel@tonic-gate * If the plugin is in the NO_ACCEPT state, bail out. 28197c478bd9Sstevel@tonic-gate */ 28207c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 28217c478bd9Sstevel@tonic-gate if (plugin_state == NO_ACCEPT) { 28227c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 28237c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28247c478bd9Sstevel@tonic-gate } 28257c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 28267c478bd9Sstevel@tonic-gate 28277c478bd9Sstevel@tonic-gate /* 28287c478bd9Sstevel@tonic-gate * Need to send a MRA MAD to CM so that it does not 28297c478bd9Sstevel@tonic-gate * timeout on us. 28307c478bd9Sstevel@tonic-gate */ 28317c478bd9Sstevel@tonic-gate (void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id, 28327c478bd9Sstevel@tonic-gate event->cm_event.req.req_timeout * 8, NULL, 0); 28337c478bd9Sstevel@tonic-gate 28347c478bd9Sstevel@tonic-gate mutex_enter(&rib_stat->open_hca_lock); 28357c478bd9Sstevel@tonic-gate q = rib_stat->q; 28367c478bd9Sstevel@tonic-gate mutex_exit(&rib_stat->open_hca_lock); 2837*0a701b1eSRobert Gordon 28387c478bd9Sstevel@tonic-gate status = rib_svc_create_chan(hca, (caddr_t)q, 28397c478bd9Sstevel@tonic-gate event->cm_event.req.req_prim_hca_port, &qp); 2840*0a701b1eSRobert Gordon 28417c478bd9Sstevel@tonic-gate if (status) { 28427c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28437c478bd9Sstevel@tonic-gate } 28447c478bd9Sstevel@tonic-gate 28457c478bd9Sstevel@tonic-gate ret_args->cm_ret.rep.cm_channel = qp->qp_hdl; 2846*0a701b1eSRobert Gordon ret_args->cm_ret.rep.cm_rdma_ra_out = 4; 2847*0a701b1eSRobert Gordon ret_args->cm_ret.rep.cm_rdma_ra_in = 4; 28487c478bd9Sstevel@tonic-gate ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES; 28497c478bd9Sstevel@tonic-gate 28507c478bd9Sstevel@tonic-gate /* 28517c478bd9Sstevel@tonic-gate * Pre-posts RECV buffers 28527c478bd9Sstevel@tonic-gate */ 28537c478bd9Sstevel@tonic-gate conn = qptoc(qp); 28547c478bd9Sstevel@tonic-gate for (i = 0; i < preposted_rbufs; i++) { 28557c478bd9Sstevel@tonic-gate bzero(&rdbuf, sizeof (rdbuf)); 28567c478bd9Sstevel@tonic-gate rdbuf.type = RECV_BUFFER; 28577c478bd9Sstevel@tonic-gate buf = rib_rbuf_alloc(conn, &rdbuf); 28587c478bd9Sstevel@tonic-gate if (buf == NULL) { 28597c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, NULL); 28607c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28617c478bd9Sstevel@tonic-gate } 28627c478bd9Sstevel@tonic-gate 28637c478bd9Sstevel@tonic-gate bzero(&cl, sizeof (cl)); 2864*0a701b1eSRobert Gordon cl.w.c_saddr3 = (caddr_t)rdbuf.addr; 28657c478bd9Sstevel@tonic-gate cl.c_len = rdbuf.len; 2866*0a701b1eSRobert Gordon cl.c_smemhandle.mrc_lmr = 2867*0a701b1eSRobert Gordon rdbuf.handle.mrc_lmr; /* lkey */ 28687c478bd9Sstevel@tonic-gate cl.c_next = NULL; 28697c478bd9Sstevel@tonic-gate status = rib_post_recv(conn, &cl); 28707c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 28717c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, NULL); 28727c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28737c478bd9Sstevel@tonic-gate } 28747c478bd9Sstevel@tonic-gate } 28757c478bd9Sstevel@tonic-gate (void) rib_add_connlist(conn, &hca->srv_conn_list); 28767c478bd9Sstevel@tonic-gate 28777c478bd9Sstevel@tonic-gate /* 2878*0a701b1eSRobert Gordon * Get the address translation 28797c478bd9Sstevel@tonic-gate */ 28807c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 28817c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 28827c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 28837c478bd9Sstevel@tonic-gate return (IBT_CM_REJECT); 28847c478bd9Sstevel@tonic-gate } 28857c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 28867c478bd9Sstevel@tonic-gate 2887*0a701b1eSRobert Gordon bzero(&ipinfo, sizeof (ibt_ip_cm_info_t)); 28887c478bd9Sstevel@tonic-gate 2889*0a701b1eSRobert Gordon if (ibt_get_ip_data(event->cm_priv_data_len, 2890*0a701b1eSRobert Gordon event->cm_priv_data, 2891*0a701b1eSRobert Gordon &ipinfo) != IBT_SUCCESS) { 2892*0a701b1eSRobert Gordon 2893*0a701b1eSRobert Gordon return (IBT_CM_REJECT); 2894*0a701b1eSRobert Gordon } 2895*0a701b1eSRobert Gordon 2896*0a701b1eSRobert Gordon switch (ipinfo.src_addr.family) { 2897*0a701b1eSRobert Gordon case AF_INET: 28987c478bd9Sstevel@tonic-gate 28997c478bd9Sstevel@tonic-gate conn->c_raddr.maxlen = 29007c478bd9Sstevel@tonic-gate conn->c_raddr.len = sin_size; 2901*0a701b1eSRobert Gordon conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP); 2902*0a701b1eSRobert Gordon 29037c478bd9Sstevel@tonic-gate s = (struct sockaddr_in *)conn->c_raddr.buf; 29047c478bd9Sstevel@tonic-gate s->sin_family = AF_INET; 29057c478bd9Sstevel@tonic-gate 2906*0a701b1eSRobert Gordon bcopy((void *)&ipinfo.src_addr.un.ip4addr, 2907*0a701b1eSRobert Gordon &s->sin_addr, in_size); 2908*0a701b1eSRobert Gordon 2909*0a701b1eSRobert Gordon break; 2910*0a701b1eSRobert Gordon 2911*0a701b1eSRobert Gordon case AF_INET6: 29127c478bd9Sstevel@tonic-gate 29137c478bd9Sstevel@tonic-gate conn->c_raddr.maxlen = 29147c478bd9Sstevel@tonic-gate conn->c_raddr.len = sin6_size; 2915*0a701b1eSRobert Gordon conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP); 29167c478bd9Sstevel@tonic-gate 29177c478bd9Sstevel@tonic-gate s6 = (struct sockaddr_in6 *)conn->c_raddr.buf; 29187c478bd9Sstevel@tonic-gate s6->sin6_family = AF_INET6; 2919*0a701b1eSRobert Gordon bcopy((void *)&ipinfo.src_addr.un.ip6addr, 2920*0a701b1eSRobert Gordon &s6->sin6_addr, 29217c478bd9Sstevel@tonic-gate sizeof (struct in6_addr)); 29227c478bd9Sstevel@tonic-gate 2923*0a701b1eSRobert Gordon break; 2924*0a701b1eSRobert Gordon 2925*0a701b1eSRobert Gordon default: 2926*0a701b1eSRobert Gordon return (IBT_CM_REJECT); 29277c478bd9Sstevel@tonic-gate } 2928*0a701b1eSRobert Gordon 29297c478bd9Sstevel@tonic-gate break; 29307c478bd9Sstevel@tonic-gate 29317c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_CLOSED: 29327c478bd9Sstevel@tonic-gate { 29337c478bd9Sstevel@tonic-gate CONN *conn; 29347c478bd9Sstevel@tonic-gate rib_qp_t *qp; 29357c478bd9Sstevel@tonic-gate 29367c478bd9Sstevel@tonic-gate switch (event->cm_event.closed) { 29377c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREP_RCVD: 29387c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DREQ_TIMEOUT: 29397c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_DUP: 29407c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ABORT: 29417c478bd9Sstevel@tonic-gate case IBT_CM_CLOSED_ALREADY: 29427c478bd9Sstevel@tonic-gate /* 29437c478bd9Sstevel@tonic-gate * These cases indicate the local end initiated 29447c478bd9Sstevel@tonic-gate * the closing of the channel. Nothing to do here. 29457c478bd9Sstevel@tonic-gate */ 29467c478bd9Sstevel@tonic-gate break; 29477c478bd9Sstevel@tonic-gate default: 29487c478bd9Sstevel@tonic-gate /* 29497c478bd9Sstevel@tonic-gate * Reason for CONN_CLOSED event must be one of 29507c478bd9Sstevel@tonic-gate * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD 29517c478bd9Sstevel@tonic-gate * or IBT_CM_CLOSED_STALE. These indicate cases were 29527c478bd9Sstevel@tonic-gate * the remote end is closing the channel. In these 29537c478bd9Sstevel@tonic-gate * cases free the channel and transition to error 29547c478bd9Sstevel@tonic-gate * state 29557c478bd9Sstevel@tonic-gate */ 29567c478bd9Sstevel@tonic-gate qp = ibt_get_chan_private(event->cm_channel); 29577c478bd9Sstevel@tonic-gate conn = qptoc(qp); 29587c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 29597c478bd9Sstevel@tonic-gate if (conn->c_state == C_DISCONN_PEND) { 29607c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 29617c478bd9Sstevel@tonic-gate break; 29627c478bd9Sstevel@tonic-gate } 2963*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 29647c478bd9Sstevel@tonic-gate 29657c478bd9Sstevel@tonic-gate /* 29667c478bd9Sstevel@tonic-gate * Free the rc_channel. Channel has already 29677c478bd9Sstevel@tonic-gate * transitioned to ERROR state and WRs have been 29687c478bd9Sstevel@tonic-gate * FLUSHED_ERR already. 29697c478bd9Sstevel@tonic-gate */ 29707c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 29717c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 29727c478bd9Sstevel@tonic-gate 29737c478bd9Sstevel@tonic-gate /* 29747c478bd9Sstevel@tonic-gate * Free the conn if c_ref goes down to 0 29757c478bd9Sstevel@tonic-gate */ 29767c478bd9Sstevel@tonic-gate if (conn->c_ref == 0) { 29777c478bd9Sstevel@tonic-gate /* 29787c478bd9Sstevel@tonic-gate * Remove from list and free conn 29797c478bd9Sstevel@tonic-gate */ 29807c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 29817c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 29827c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 29837c478bd9Sstevel@tonic-gate &hca->srv_conn_list); 29847c478bd9Sstevel@tonic-gate } else { 29857c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 29867c478bd9Sstevel@tonic-gate } 2987*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__srvcm_chandisconnect); 29887c478bd9Sstevel@tonic-gate break; 29897c478bd9Sstevel@tonic-gate } 29907c478bd9Sstevel@tonic-gate break; 29917c478bd9Sstevel@tonic-gate } 29927c478bd9Sstevel@tonic-gate case IBT_CM_EVENT_CONN_EST: 29937c478bd9Sstevel@tonic-gate /* 29947c478bd9Sstevel@tonic-gate * RTU received, hence connection established. 29957c478bd9Sstevel@tonic-gate */ 29967c478bd9Sstevel@tonic-gate if (rib_debug > 1) 29977c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 29987c478bd9Sstevel@tonic-gate "(CONN_EST) channel established"); 29997c478bd9Sstevel@tonic-gate break; 30007c478bd9Sstevel@tonic-gate 30017c478bd9Sstevel@tonic-gate default: 30027c478bd9Sstevel@tonic-gate if (rib_debug > 2) { 30037c478bd9Sstevel@tonic-gate /* Let CM handle the following events. */ 30047c478bd9Sstevel@tonic-gate if (event->cm_type == IBT_CM_EVENT_REP_RCV) { 30057c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30067c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_REP_RCV\n"); 30077c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) { 30087c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30097c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_LAP_RCV\n"); 30107c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) { 30117c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30127c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_MRA_RCV\n"); 30137c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_APR_RCV) { 30147c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30157c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_APR_RCV\n"); 30167c478bd9Sstevel@tonic-gate } else if (event->cm_type == IBT_CM_EVENT_FAILURE) { 30177c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "rib_srv_cm_handler: " 30187c478bd9Sstevel@tonic-gate "server recv'ed IBT_CM_EVENT_FAILURE\n"); 30197c478bd9Sstevel@tonic-gate } 30207c478bd9Sstevel@tonic-gate } 3021*0a701b1eSRobert Gordon return (IBT_CM_DEFAULT); 30227c478bd9Sstevel@tonic-gate } 30237c478bd9Sstevel@tonic-gate 30247c478bd9Sstevel@tonic-gate /* accept all other CM messages (i.e. let the CM handle them) */ 30257c478bd9Sstevel@tonic-gate return (IBT_CM_ACCEPT); 30267c478bd9Sstevel@tonic-gate } 30277c478bd9Sstevel@tonic-gate 30287c478bd9Sstevel@tonic-gate static rdma_stat 30297c478bd9Sstevel@tonic-gate rib_register_service(rib_hca_t *hca, int service_type) 30307c478bd9Sstevel@tonic-gate { 30317c478bd9Sstevel@tonic-gate ibt_srv_desc_t sdesc; 30327c478bd9Sstevel@tonic-gate ibt_hca_portinfo_t *port_infop; 30337c478bd9Sstevel@tonic-gate ib_svc_id_t srv_id; 30347c478bd9Sstevel@tonic-gate ibt_srv_hdl_t srv_hdl; 30357c478bd9Sstevel@tonic-gate uint_t port_size; 3036*0a701b1eSRobert Gordon uint_t pki, i, num_ports, nbinds; 30377c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 3038*0a701b1eSRobert Gordon rib_service_t *new_service; 30397c478bd9Sstevel@tonic-gate ib_pkey_t pkey; 30407c478bd9Sstevel@tonic-gate 30417c478bd9Sstevel@tonic-gate /* 30427c478bd9Sstevel@tonic-gate * Query all ports for the given HCA 30437c478bd9Sstevel@tonic-gate */ 30447c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 30457c478bd9Sstevel@tonic-gate if (hca->state != HCA_DETACHED) { 30467c478bd9Sstevel@tonic-gate ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop, 30477c478bd9Sstevel@tonic-gate &num_ports, &port_size); 30487c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 30497c478bd9Sstevel@tonic-gate } else { 30507c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 30517c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 30527c478bd9Sstevel@tonic-gate } 30537c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 30547c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 30557c478bd9Sstevel@tonic-gate } 30567c478bd9Sstevel@tonic-gate 3057*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice_numports, 3058*0a701b1eSRobert Gordon int, num_ports); 30597c478bd9Sstevel@tonic-gate 30607c478bd9Sstevel@tonic-gate for (i = 0; i < num_ports; i++) { 30617c478bd9Sstevel@tonic-gate if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) { 3062*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__portinactive, 3063*0a701b1eSRobert Gordon int, i+1); 3064*0a701b1eSRobert Gordon } else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) { 3065*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__portactive, 3066*0a701b1eSRobert Gordon int, i+1); 30677c478bd9Sstevel@tonic-gate } 30687c478bd9Sstevel@tonic-gate } 3069*0a701b1eSRobert Gordon 30707c478bd9Sstevel@tonic-gate /* 30717c478bd9Sstevel@tonic-gate * Get all the IP addresses on this system to register the 30727c478bd9Sstevel@tonic-gate * given "service type" on all DNS recognized IP addrs. 30737c478bd9Sstevel@tonic-gate * Each service type such as NFS will have all the systems 30747c478bd9Sstevel@tonic-gate * IP addresses as its different names. For now the only 30757c478bd9Sstevel@tonic-gate * type of service we support in RPCIB is NFS. 30767c478bd9Sstevel@tonic-gate */ 30777c478bd9Sstevel@tonic-gate rw_enter(&hca->service_list_lock, RW_WRITER); 30787c478bd9Sstevel@tonic-gate /* 30797c478bd9Sstevel@tonic-gate * Start registering and binding service to active 30807c478bd9Sstevel@tonic-gate * on active ports on this HCA. 30817c478bd9Sstevel@tonic-gate */ 30827c478bd9Sstevel@tonic-gate nbinds = 0; 30837c478bd9Sstevel@tonic-gate new_service = NULL; 30847c478bd9Sstevel@tonic-gate 30857c478bd9Sstevel@tonic-gate /* 30867c478bd9Sstevel@tonic-gate * We use IP addresses as the service names for 30877c478bd9Sstevel@tonic-gate * service registration. Register each of them 30887c478bd9Sstevel@tonic-gate * with CM to obtain a svc_id and svc_hdl. We do not 30897c478bd9Sstevel@tonic-gate * register the service with machine's loopback address. 30907c478bd9Sstevel@tonic-gate */ 30917c478bd9Sstevel@tonic-gate (void) bzero(&srv_id, sizeof (ib_svc_id_t)); 30927c478bd9Sstevel@tonic-gate (void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t)); 30937c478bd9Sstevel@tonic-gate (void) bzero(&sdesc, sizeof (ibt_srv_desc_t)); 30947c478bd9Sstevel@tonic-gate 30957c478bd9Sstevel@tonic-gate sdesc.sd_handler = rib_srv_cm_handler; 30967c478bd9Sstevel@tonic-gate sdesc.sd_flags = 0; 30977c478bd9Sstevel@tonic-gate ibt_status = ibt_register_service(hca->ibt_clnt_hdl, 3098*0a701b1eSRobert Gordon &sdesc, ibt_get_ip_sid(IPPROTO_TCP, NFS_RDMA_PORT), 3099*0a701b1eSRobert Gordon 1, &srv_hdl, &srv_id); 3100*0a701b1eSRobert Gordon 31017c478bd9Sstevel@tonic-gate for (i = 0; i < num_ports; i++) { 31027c478bd9Sstevel@tonic-gate if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) 31037c478bd9Sstevel@tonic-gate continue; 31047c478bd9Sstevel@tonic-gate 31057c478bd9Sstevel@tonic-gate for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) { 31067c478bd9Sstevel@tonic-gate pkey = port_infop[i].p_pkey_tbl[pki]; 3107*0a701b1eSRobert Gordon if ((pkey & IBSRM_HB) && 3108*0a701b1eSRobert Gordon (pkey != IB_PKEY_INVALID_FULL)) { 31097c478bd9Sstevel@tonic-gate 31107c478bd9Sstevel@tonic-gate /* 31117c478bd9Sstevel@tonic-gate * Allocate and prepare a service entry 31127c478bd9Sstevel@tonic-gate */ 3113*0a701b1eSRobert Gordon new_service = 3114*0a701b1eSRobert Gordon kmem_zalloc(1 * sizeof (rib_service_t), 31157c478bd9Sstevel@tonic-gate KM_SLEEP); 31167c478bd9Sstevel@tonic-gate 3117*0a701b1eSRobert Gordon new_service->srv_type = service_type; 3118*0a701b1eSRobert Gordon new_service->srv_hdl = srv_hdl; 31197c478bd9Sstevel@tonic-gate new_service->srv_next = NULL; 31207c478bd9Sstevel@tonic-gate 31217c478bd9Sstevel@tonic-gate ibt_status = ibt_bind_service(srv_hdl, 3122*0a701b1eSRobert Gordon port_infop[i].p_sgid_tbl[0], 3123*0a701b1eSRobert Gordon NULL, rib_stat, NULL); 3124*0a701b1eSRobert Gordon 3125*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__regservice__bindres, 3126*0a701b1eSRobert Gordon int, ibt_status); 3127*0a701b1eSRobert Gordon 31287c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 31297c478bd9Sstevel@tonic-gate kmem_free(new_service, 31307c478bd9Sstevel@tonic-gate sizeof (rib_service_t)); 31317c478bd9Sstevel@tonic-gate new_service = NULL; 31327c478bd9Sstevel@tonic-gate continue; 31337c478bd9Sstevel@tonic-gate } 3134*0a701b1eSRobert Gordon 31357c478bd9Sstevel@tonic-gate /* 31367c478bd9Sstevel@tonic-gate * Add to the service list for this HCA 31377c478bd9Sstevel@tonic-gate */ 31387c478bd9Sstevel@tonic-gate new_service->srv_next = hca->service_list; 31397c478bd9Sstevel@tonic-gate hca->service_list = new_service; 31407c478bd9Sstevel@tonic-gate new_service = NULL; 31417c478bd9Sstevel@tonic-gate nbinds++; 31427c478bd9Sstevel@tonic-gate } 31437c478bd9Sstevel@tonic-gate } 31447c478bd9Sstevel@tonic-gate } 31457c478bd9Sstevel@tonic-gate rw_exit(&hca->service_list_lock); 31467c478bd9Sstevel@tonic-gate 31477c478bd9Sstevel@tonic-gate ibt_free_portinfo(port_infop, port_size); 31487c478bd9Sstevel@tonic-gate 31497c478bd9Sstevel@tonic-gate if (nbinds == 0) { 31507c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 31517c478bd9Sstevel@tonic-gate } else { 31527c478bd9Sstevel@tonic-gate /* 31537c478bd9Sstevel@tonic-gate * Put this plugin into accept state, since atleast 31547c478bd9Sstevel@tonic-gate * one registration was successful. 31557c478bd9Sstevel@tonic-gate */ 31567c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 31577c478bd9Sstevel@tonic-gate plugin_state = ACCEPT; 31587c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 31597c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 31607c478bd9Sstevel@tonic-gate } 31617c478bd9Sstevel@tonic-gate } 31627c478bd9Sstevel@tonic-gate 31637c478bd9Sstevel@tonic-gate void 31647c478bd9Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd) 31657c478bd9Sstevel@tonic-gate { 31667c478bd9Sstevel@tonic-gate rdma_stat status = RDMA_SUCCESS; 31677c478bd9Sstevel@tonic-gate 31687c478bd9Sstevel@tonic-gate rd->active = 0; 31697c478bd9Sstevel@tonic-gate rd->err_code = RDMA_FAILED; 31707c478bd9Sstevel@tonic-gate 31717c478bd9Sstevel@tonic-gate /* 31727c478bd9Sstevel@tonic-gate * First check if a hca is still attached 31737c478bd9Sstevel@tonic-gate */ 31747c478bd9Sstevel@tonic-gate rw_enter(&rib_stat->hca->state_lock, RW_READER); 31757c478bd9Sstevel@tonic-gate if (rib_stat->hca->state != HCA_INITED) { 31767c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 31777c478bd9Sstevel@tonic-gate return; 31787c478bd9Sstevel@tonic-gate } 31797c478bd9Sstevel@tonic-gate rw_exit(&rib_stat->hca->state_lock); 31807c478bd9Sstevel@tonic-gate 31817c478bd9Sstevel@tonic-gate rib_stat->q = &rd->q; 31827c478bd9Sstevel@tonic-gate /* 31837c478bd9Sstevel@tonic-gate * Right now the only service type is NFS. Hence force feed this 31847c478bd9Sstevel@tonic-gate * value. Ideally to communicate the service type it should be 31857c478bd9Sstevel@tonic-gate * passed down in rdma_svc_data. 31867c478bd9Sstevel@tonic-gate */ 31877c478bd9Sstevel@tonic-gate rib_stat->service_type = NFS; 31887c478bd9Sstevel@tonic-gate status = rib_register_service(rib_stat->hca, NFS); 31897c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 31907c478bd9Sstevel@tonic-gate rd->err_code = status; 31917c478bd9Sstevel@tonic-gate return; 31927c478bd9Sstevel@tonic-gate } 31937c478bd9Sstevel@tonic-gate /* 31947c478bd9Sstevel@tonic-gate * Service active on an HCA, check rd->err_code for more 31957c478bd9Sstevel@tonic-gate * explainable errors. 31967c478bd9Sstevel@tonic-gate */ 31977c478bd9Sstevel@tonic-gate rd->active = 1; 31987c478bd9Sstevel@tonic-gate rd->err_code = status; 31997c478bd9Sstevel@tonic-gate } 32007c478bd9Sstevel@tonic-gate 32017c478bd9Sstevel@tonic-gate /* XXXX */ 32027c478bd9Sstevel@tonic-gate /* ARGSUSED */ 32037c478bd9Sstevel@tonic-gate static void 32047c478bd9Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata) 32057c478bd9Sstevel@tonic-gate { 32067c478bd9Sstevel@tonic-gate rib_hca_t *hca; 32077c478bd9Sstevel@tonic-gate 32087c478bd9Sstevel@tonic-gate /* 32097c478bd9Sstevel@tonic-gate * KRPC called the RDMATF to stop the listeners, this means 32107c478bd9Sstevel@tonic-gate * stop sending incomming or recieved requests to KRPC master 32117c478bd9Sstevel@tonic-gate * transport handle for RDMA-IB. This is also means that the 32127c478bd9Sstevel@tonic-gate * master transport handle, responsible for us, is going away. 32137c478bd9Sstevel@tonic-gate */ 32147c478bd9Sstevel@tonic-gate mutex_enter(&plugin_state_lock); 32157c478bd9Sstevel@tonic-gate plugin_state = NO_ACCEPT; 32167c478bd9Sstevel@tonic-gate if (svcdata != NULL) 32177c478bd9Sstevel@tonic-gate svcdata->active = 0; 32187c478bd9Sstevel@tonic-gate mutex_exit(&plugin_state_lock); 32197c478bd9Sstevel@tonic-gate 32207c478bd9Sstevel@tonic-gate /* 32217c478bd9Sstevel@tonic-gate * First check if a hca is still attached 32227c478bd9Sstevel@tonic-gate */ 32237c478bd9Sstevel@tonic-gate hca = rib_stat->hca; 32247c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 32257c478bd9Sstevel@tonic-gate if (hca->state != HCA_INITED) { 32267c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 32277c478bd9Sstevel@tonic-gate return; 32287c478bd9Sstevel@tonic-gate } 3229*0a701b1eSRobert Gordon rib_close_channels(&hca->srv_conn_list); 32307c478bd9Sstevel@tonic-gate rib_stop_services(hca); 32317c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 32327c478bd9Sstevel@tonic-gate } 32337c478bd9Sstevel@tonic-gate 32347c478bd9Sstevel@tonic-gate /* 32357c478bd9Sstevel@tonic-gate * Traverse the HCA's service list to unbind and deregister services. 32367c478bd9Sstevel@tonic-gate * Instead of unbinding the service for a service handle by 32377c478bd9Sstevel@tonic-gate * calling ibt_unbind_service() for each port/pkey, we unbind 32387c478bd9Sstevel@tonic-gate * all the services for the service handle by making only one 32397c478bd9Sstevel@tonic-gate * call to ibt_unbind_all_services(). Then, we deregister the 32407c478bd9Sstevel@tonic-gate * service for the service handle. 32417c478bd9Sstevel@tonic-gate * 32427c478bd9Sstevel@tonic-gate * When traversing the entries in service_list, we compare the 32437c478bd9Sstevel@tonic-gate * srv_hdl of the current entry with that of the next. If they 32447c478bd9Sstevel@tonic-gate * are different or if the next entry is NULL, the current entry 32457c478bd9Sstevel@tonic-gate * marks the last binding of the service handle. In this case, 32467c478bd9Sstevel@tonic-gate * call ibt_unbind_all_services() and deregister the service for 32477c478bd9Sstevel@tonic-gate * the service handle. If they are the same, the current and the 32487c478bd9Sstevel@tonic-gate * next entries are bound to the same service handle. In this 32497c478bd9Sstevel@tonic-gate * case, move on to the next entry. 32507c478bd9Sstevel@tonic-gate */ 32517c478bd9Sstevel@tonic-gate static void 32527c478bd9Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca) 32537c478bd9Sstevel@tonic-gate { 32547c478bd9Sstevel@tonic-gate rib_service_t *srv_list, *to_remove; 32557c478bd9Sstevel@tonic-gate 32567c478bd9Sstevel@tonic-gate /* 32577c478bd9Sstevel@tonic-gate * unbind and deregister the services for this service type. 32587c478bd9Sstevel@tonic-gate * Right now there is only one service type. In future it will 32597c478bd9Sstevel@tonic-gate * be passed down to this function. 32607c478bd9Sstevel@tonic-gate */ 32617c478bd9Sstevel@tonic-gate rw_enter(&hca->service_list_lock, RW_WRITER); 32627c478bd9Sstevel@tonic-gate srv_list = hca->service_list; 32637c478bd9Sstevel@tonic-gate while (srv_list != NULL) { 32647c478bd9Sstevel@tonic-gate to_remove = srv_list; 32657c478bd9Sstevel@tonic-gate srv_list = to_remove->srv_next; 32667c478bd9Sstevel@tonic-gate if (srv_list == NULL || bcmp(to_remove->srv_hdl, 32677c478bd9Sstevel@tonic-gate srv_list->srv_hdl, sizeof (ibt_srv_hdl_t))) { 32687c478bd9Sstevel@tonic-gate 3269*0a701b1eSRobert Gordon (void) ibt_unbind_all_services(to_remove->srv_hdl); 3270*0a701b1eSRobert Gordon (void) ibt_deregister_service(hca->ibt_clnt_hdl, 32717c478bd9Sstevel@tonic-gate to_remove->srv_hdl); 32727c478bd9Sstevel@tonic-gate } 32737c478bd9Sstevel@tonic-gate 32747c478bd9Sstevel@tonic-gate kmem_free(to_remove, sizeof (rib_service_t)); 32757c478bd9Sstevel@tonic-gate } 32767c478bd9Sstevel@tonic-gate hca->service_list = NULL; 32777c478bd9Sstevel@tonic-gate rw_exit(&hca->service_list_lock); 32787c478bd9Sstevel@tonic-gate } 32797c478bd9Sstevel@tonic-gate 32807c478bd9Sstevel@tonic-gate static struct svc_recv * 32817c478bd9Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl) 32827c478bd9Sstevel@tonic-gate { 32837c478bd9Sstevel@tonic-gate struct svc_recv *recvp; 32847c478bd9Sstevel@tonic-gate 32857c478bd9Sstevel@tonic-gate recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP); 32867c478bd9Sstevel@tonic-gate recvp->vaddr = sgl->ds_va; 32877c478bd9Sstevel@tonic-gate recvp->qp = qp; 32887c478bd9Sstevel@tonic-gate recvp->bytes_xfer = 0; 32897c478bd9Sstevel@tonic-gate return (recvp); 32907c478bd9Sstevel@tonic-gate } 32917c478bd9Sstevel@tonic-gate 32927c478bd9Sstevel@tonic-gate static int 32937c478bd9Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp) 32947c478bd9Sstevel@tonic-gate { 32957c478bd9Sstevel@tonic-gate kmem_free(recvp, sizeof (*recvp)); 32967c478bd9Sstevel@tonic-gate 32977c478bd9Sstevel@tonic-gate return (0); 32987c478bd9Sstevel@tonic-gate } 32997c478bd9Sstevel@tonic-gate 33007c478bd9Sstevel@tonic-gate static struct reply * 33017c478bd9Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid) 33027c478bd9Sstevel@tonic-gate { 33037c478bd9Sstevel@tonic-gate struct reply *rep; 33047c478bd9Sstevel@tonic-gate 33057c478bd9Sstevel@tonic-gate 33067c478bd9Sstevel@tonic-gate rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP); 33077c478bd9Sstevel@tonic-gate if (rep == NULL) { 3308*0a701b1eSRobert Gordon DTRACE_PROBE(rpcib__i__addrreply__nomem); 33097c478bd9Sstevel@tonic-gate return (NULL); 33107c478bd9Sstevel@tonic-gate } 33117c478bd9Sstevel@tonic-gate rep->xid = msgid; 33127c478bd9Sstevel@tonic-gate rep->vaddr_cq = NULL; 33137c478bd9Sstevel@tonic-gate rep->bytes_xfer = 0; 33147c478bd9Sstevel@tonic-gate rep->status = (uint_t)REPLY_WAIT; 33157c478bd9Sstevel@tonic-gate rep->prev = NULL; 33167c478bd9Sstevel@tonic-gate cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL); 33177c478bd9Sstevel@tonic-gate 33187c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 33197c478bd9Sstevel@tonic-gate if (qp->replylist) { 33207c478bd9Sstevel@tonic-gate rep->next = qp->replylist; 33217c478bd9Sstevel@tonic-gate qp->replylist->prev = rep; 33227c478bd9Sstevel@tonic-gate } 33237c478bd9Sstevel@tonic-gate qp->rep_list_size++; 3324*0a701b1eSRobert Gordon 3325*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__addrreply__listsize, 3326*0a701b1eSRobert Gordon int, qp->rep_list_size); 3327*0a701b1eSRobert Gordon 33287c478bd9Sstevel@tonic-gate qp->replylist = rep; 33297c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 33307c478bd9Sstevel@tonic-gate 33317c478bd9Sstevel@tonic-gate return (rep); 33327c478bd9Sstevel@tonic-gate } 33337c478bd9Sstevel@tonic-gate 33347c478bd9Sstevel@tonic-gate static rdma_stat 33357c478bd9Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp) 33367c478bd9Sstevel@tonic-gate { 33377c478bd9Sstevel@tonic-gate struct reply *r, *n; 33387c478bd9Sstevel@tonic-gate 33397c478bd9Sstevel@tonic-gate mutex_enter(&qp->replylist_lock); 33407c478bd9Sstevel@tonic-gate for (r = qp->replylist; r != NULL; r = n) { 33417c478bd9Sstevel@tonic-gate n = r->next; 33427c478bd9Sstevel@tonic-gate (void) rib_remreply(qp, r); 33437c478bd9Sstevel@tonic-gate } 33447c478bd9Sstevel@tonic-gate mutex_exit(&qp->replylist_lock); 33457c478bd9Sstevel@tonic-gate 33467c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 33477c478bd9Sstevel@tonic-gate } 33487c478bd9Sstevel@tonic-gate 33497c478bd9Sstevel@tonic-gate static int 33507c478bd9Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep) 33517c478bd9Sstevel@tonic-gate { 33527c478bd9Sstevel@tonic-gate 33537c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->replylist_lock)); 33547c478bd9Sstevel@tonic-gate if (rep->prev) { 33557c478bd9Sstevel@tonic-gate rep->prev->next = rep->next; 33567c478bd9Sstevel@tonic-gate } 33577c478bd9Sstevel@tonic-gate if (rep->next) { 33587c478bd9Sstevel@tonic-gate rep->next->prev = rep->prev; 33597c478bd9Sstevel@tonic-gate } 33607c478bd9Sstevel@tonic-gate if (qp->replylist == rep) 33617c478bd9Sstevel@tonic-gate qp->replylist = rep->next; 33627c478bd9Sstevel@tonic-gate 33637c478bd9Sstevel@tonic-gate cv_destroy(&rep->wait_cv); 33647c478bd9Sstevel@tonic-gate qp->rep_list_size--; 3365*0a701b1eSRobert Gordon 3366*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__remreply__listsize, 3367*0a701b1eSRobert Gordon int, qp->rep_list_size); 33687c478bd9Sstevel@tonic-gate 33697c478bd9Sstevel@tonic-gate kmem_free(rep, sizeof (*rep)); 33707c478bd9Sstevel@tonic-gate 33717c478bd9Sstevel@tonic-gate return (0); 33727c478bd9Sstevel@tonic-gate } 33737c478bd9Sstevel@tonic-gate 33747c478bd9Sstevel@tonic-gate rdma_stat 3375*0a701b1eSRobert Gordon rib_registermem(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 33767c478bd9Sstevel@tonic-gate struct mrc *buf_handle) 33777c478bd9Sstevel@tonic-gate { 33787c478bd9Sstevel@tonic-gate ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 33797c478bd9Sstevel@tonic-gate ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 33807c478bd9Sstevel@tonic-gate rdma_stat status; 33817c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 33827c478bd9Sstevel@tonic-gate 33837c478bd9Sstevel@tonic-gate /* 33847c478bd9Sstevel@tonic-gate * Note: ALL buffer pools use the same memory type RDMARW. 33857c478bd9Sstevel@tonic-gate */ 3386*0a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 33877c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 338811606941Sjwahlig buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 33897c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 33907c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 33917c478bd9Sstevel@tonic-gate } else { 33927c478bd9Sstevel@tonic-gate buf_handle->mrc_linfo = NULL; 33937c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = 0; 33947c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = 0; 33957c478bd9Sstevel@tonic-gate } 33967c478bd9Sstevel@tonic-gate return (status); 33977c478bd9Sstevel@tonic-gate } 33987c478bd9Sstevel@tonic-gate 33997c478bd9Sstevel@tonic-gate static rdma_stat 3400*0a701b1eSRobert Gordon rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size, 3401*0a701b1eSRobert Gordon ibt_mr_flags_t spec, 34027c478bd9Sstevel@tonic-gate ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp) 34037c478bd9Sstevel@tonic-gate { 34047c478bd9Sstevel@tonic-gate ibt_mr_attr_t mem_attr; 34057c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 340611606941Sjwahlig mem_attr.mr_vaddr = (uintptr_t)buf; 34077c478bd9Sstevel@tonic-gate mem_attr.mr_len = (ib_msglen_t)size; 3408*0a701b1eSRobert Gordon mem_attr.mr_as = (struct as *)(caddr_t)adsp; 34097c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 34107c478bd9Sstevel@tonic-gate IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE | 34117c478bd9Sstevel@tonic-gate IBT_MR_ENABLE_WINDOW_BIND | spec; 34127c478bd9Sstevel@tonic-gate 34137c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 34147c478bd9Sstevel@tonic-gate if (hca->state == HCA_INITED) { 34157c478bd9Sstevel@tonic-gate ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl, 34167c478bd9Sstevel@tonic-gate &mem_attr, mr_hdlp, mr_descp); 34177c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 34187c478bd9Sstevel@tonic-gate } else { 34197c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 34207c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 34217c478bd9Sstevel@tonic-gate } 34227c478bd9Sstevel@tonic-gate 34237c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 34247c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 34257c478bd9Sstevel@tonic-gate } 34267c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 34277c478bd9Sstevel@tonic-gate } 34287c478bd9Sstevel@tonic-gate 34297c478bd9Sstevel@tonic-gate rdma_stat 3430*0a701b1eSRobert Gordon rib_registermemsync(CONN *conn, caddr_t adsp, caddr_t buf, uint_t buflen, 3431*0a701b1eSRobert Gordon struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc) 34327c478bd9Sstevel@tonic-gate { 34337c478bd9Sstevel@tonic-gate ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 3434*0a701b1eSRobert Gordon rib_lrc_entry_t *l; 34357c478bd9Sstevel@tonic-gate ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 34367c478bd9Sstevel@tonic-gate rdma_stat status; 34377c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 34387c478bd9Sstevel@tonic-gate 34397c478bd9Sstevel@tonic-gate /* 34407c478bd9Sstevel@tonic-gate * Non-coherent memory registration. 34417c478bd9Sstevel@tonic-gate */ 3442*0a701b1eSRobert Gordon l = (rib_lrc_entry_t *)lrc; 3443*0a701b1eSRobert Gordon if (l) { 3444*0a701b1eSRobert Gordon if (l->registered) { 3445*0a701b1eSRobert Gordon buf_handle->mrc_linfo = 3446*0a701b1eSRobert Gordon (uintptr_t)l->lrc_mhandle.mrc_linfo; 3447*0a701b1eSRobert Gordon buf_handle->mrc_lmr = 3448*0a701b1eSRobert Gordon (uint32_t)l->lrc_mhandle.mrc_lmr; 3449*0a701b1eSRobert Gordon buf_handle->mrc_rmr = 3450*0a701b1eSRobert Gordon (uint32_t)l->lrc_mhandle.mrc_rmr; 3451*0a701b1eSRobert Gordon *sync_handle = (RIB_SYNCMEM_HANDLE) 3452*0a701b1eSRobert Gordon (uintptr_t)l->lrc_mhandle.mrc_linfo; 3453*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 3454*0a701b1eSRobert Gordon } else { 3455*0a701b1eSRobert Gordon /* Always register the whole buffer */ 3456*0a701b1eSRobert Gordon buf = (caddr_t)l->lrc_buf; 3457*0a701b1eSRobert Gordon buflen = l->lrc_len; 3458*0a701b1eSRobert Gordon } 3459*0a701b1eSRobert Gordon } 3460*0a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 3461*0a701b1eSRobert Gordon 34627c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 3463*0a701b1eSRobert Gordon if (l) { 3464*0a701b1eSRobert Gordon l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl; 3465*0a701b1eSRobert Gordon l->lrc_mhandle.mrc_lmr = (uint32_t)mr_desc.md_lkey; 3466*0a701b1eSRobert Gordon l->lrc_mhandle.mrc_rmr = (uint32_t)mr_desc.md_rkey; 3467*0a701b1eSRobert Gordon l->registered = TRUE; 3468*0a701b1eSRobert Gordon } 346911606941Sjwahlig buf_handle->mrc_linfo = (uintptr_t)mr_hdl; 34707c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 34717c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 34727c478bd9Sstevel@tonic-gate *sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl; 34737c478bd9Sstevel@tonic-gate } else { 34747c478bd9Sstevel@tonic-gate buf_handle->mrc_linfo = NULL; 34757c478bd9Sstevel@tonic-gate buf_handle->mrc_lmr = 0; 34767c478bd9Sstevel@tonic-gate buf_handle->mrc_rmr = 0; 34777c478bd9Sstevel@tonic-gate } 34787c478bd9Sstevel@tonic-gate return (status); 34797c478bd9Sstevel@tonic-gate } 34807c478bd9Sstevel@tonic-gate 34817c478bd9Sstevel@tonic-gate /* ARGSUSED */ 34827c478bd9Sstevel@tonic-gate rdma_stat 34837c478bd9Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle) 34847c478bd9Sstevel@tonic-gate { 34857c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 34867c478bd9Sstevel@tonic-gate /* 34877c478bd9Sstevel@tonic-gate * Allow memory deregistration even if HCA is 34887c478bd9Sstevel@tonic-gate * getting detached. Need all outstanding 34897c478bd9Sstevel@tonic-gate * memory registrations to be deregistered 34907c478bd9Sstevel@tonic-gate * before HCA_DETACH_EVENT can be accepted. 34917c478bd9Sstevel@tonic-gate */ 34927c478bd9Sstevel@tonic-gate (void) ibt_deregister_mr(hca->hca_hdl, 349311606941Sjwahlig (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 34947c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 34957c478bd9Sstevel@tonic-gate } 34967c478bd9Sstevel@tonic-gate 34977c478bd9Sstevel@tonic-gate /* ARGSUSED */ 34987c478bd9Sstevel@tonic-gate rdma_stat 34997c478bd9Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle, 3500*0a701b1eSRobert Gordon RIB_SYNCMEM_HANDLE sync_handle, void *lrc) 35017c478bd9Sstevel@tonic-gate { 3502*0a701b1eSRobert Gordon rib_lrc_entry_t *l; 3503*0a701b1eSRobert Gordon l = (rib_lrc_entry_t *)lrc; 3504*0a701b1eSRobert Gordon if (l) 3505*0a701b1eSRobert Gordon if (l->registered) 3506*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 3507*0a701b1eSRobert Gordon 35087c478bd9Sstevel@tonic-gate (void) rib_deregistermem(conn, buf, buf_handle); 35097c478bd9Sstevel@tonic-gate 35107c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35117c478bd9Sstevel@tonic-gate } 35127c478bd9Sstevel@tonic-gate 35137c478bd9Sstevel@tonic-gate /* ARGSUSED */ 35147c478bd9Sstevel@tonic-gate rdma_stat 35157c478bd9Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf, 35167c478bd9Sstevel@tonic-gate int len, int cpu) 35177c478bd9Sstevel@tonic-gate { 35187c478bd9Sstevel@tonic-gate ibt_status_t status; 35197c478bd9Sstevel@tonic-gate rib_hca_t *hca = (ctoqp(conn))->hca; 35207c478bd9Sstevel@tonic-gate ibt_mr_sync_t mr_segment; 35217c478bd9Sstevel@tonic-gate 35227c478bd9Sstevel@tonic-gate mr_segment.ms_handle = (ibt_mr_hdl_t)shandle; 352311606941Sjwahlig mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf; 35247c478bd9Sstevel@tonic-gate mr_segment.ms_len = (ib_memlen_t)len; 35257c478bd9Sstevel@tonic-gate if (cpu) { 35267c478bd9Sstevel@tonic-gate /* make incoming data visible to memory */ 35277c478bd9Sstevel@tonic-gate mr_segment.ms_flags = IBT_SYNC_WRITE; 35287c478bd9Sstevel@tonic-gate } else { 35297c478bd9Sstevel@tonic-gate /* make memory changes visible to IO */ 35307c478bd9Sstevel@tonic-gate mr_segment.ms_flags = IBT_SYNC_READ; 35317c478bd9Sstevel@tonic-gate } 35327c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 35337c478bd9Sstevel@tonic-gate if (hca->state == HCA_INITED) { 35347c478bd9Sstevel@tonic-gate status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1); 35357c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 35367c478bd9Sstevel@tonic-gate } else { 35377c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 35387c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 35397c478bd9Sstevel@tonic-gate } 35407c478bd9Sstevel@tonic-gate 35417c478bd9Sstevel@tonic-gate if (status == IBT_SUCCESS) 35427c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35437c478bd9Sstevel@tonic-gate else { 35447c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 35457c478bd9Sstevel@tonic-gate } 35467c478bd9Sstevel@tonic-gate } 35477c478bd9Sstevel@tonic-gate 35487c478bd9Sstevel@tonic-gate /* 35497c478bd9Sstevel@tonic-gate * XXXX ???? 35507c478bd9Sstevel@tonic-gate */ 35517c478bd9Sstevel@tonic-gate static rdma_stat 35527c478bd9Sstevel@tonic-gate rib_getinfo(rdma_info_t *info) 35537c478bd9Sstevel@tonic-gate { 35547c478bd9Sstevel@tonic-gate /* 35557c478bd9Sstevel@tonic-gate * XXXX Hack! 35567c478bd9Sstevel@tonic-gate */ 35577c478bd9Sstevel@tonic-gate info->addrlen = 16; 35587c478bd9Sstevel@tonic-gate info->mts = 1000000; 35597c478bd9Sstevel@tonic-gate info->mtu = 1000000; 35607c478bd9Sstevel@tonic-gate 35617c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 35627c478bd9Sstevel@tonic-gate } 35637c478bd9Sstevel@tonic-gate 35647c478bd9Sstevel@tonic-gate rib_bufpool_t * 35657c478bd9Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num) 35667c478bd9Sstevel@tonic-gate { 35677c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 35687c478bd9Sstevel@tonic-gate bufpool_t *bp = NULL; 35697c478bd9Sstevel@tonic-gate caddr_t buf; 35707c478bd9Sstevel@tonic-gate ibt_mr_attr_t mem_attr; 35717c478bd9Sstevel@tonic-gate ibt_status_t ibt_status; 35727c478bd9Sstevel@tonic-gate int i, j; 35737c478bd9Sstevel@tonic-gate 35747c478bd9Sstevel@tonic-gate rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP); 35757c478bd9Sstevel@tonic-gate 35767c478bd9Sstevel@tonic-gate bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) + 35777c478bd9Sstevel@tonic-gate num * sizeof (void *), KM_SLEEP); 35787c478bd9Sstevel@tonic-gate 35797c478bd9Sstevel@tonic-gate mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock); 35807c478bd9Sstevel@tonic-gate bp->numelems = num; 35817c478bd9Sstevel@tonic-gate 3582*0a701b1eSRobert Gordon 35837c478bd9Sstevel@tonic-gate switch (ptype) { 35847c478bd9Sstevel@tonic-gate case SEND_BUFFER: 35857c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 35867c478bd9Sstevel@tonic-gate bp->rsize = RPC_MSG_SZ; 35877c478bd9Sstevel@tonic-gate break; 35887c478bd9Sstevel@tonic-gate case RECV_BUFFER: 35897c478bd9Sstevel@tonic-gate mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 35907c478bd9Sstevel@tonic-gate bp->rsize = RPC_BUF_SIZE; 35917c478bd9Sstevel@tonic-gate break; 35927c478bd9Sstevel@tonic-gate default: 35937c478bd9Sstevel@tonic-gate goto fail; 35947c478bd9Sstevel@tonic-gate } 35957c478bd9Sstevel@tonic-gate 35967c478bd9Sstevel@tonic-gate /* 35977c478bd9Sstevel@tonic-gate * Register the pool. 35987c478bd9Sstevel@tonic-gate */ 35997c478bd9Sstevel@tonic-gate bp->bufsize = num * bp->rsize; 36007c478bd9Sstevel@tonic-gate bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP); 36017c478bd9Sstevel@tonic-gate rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num * 36027c478bd9Sstevel@tonic-gate sizeof (ibt_mr_hdl_t), KM_SLEEP); 36037c478bd9Sstevel@tonic-gate rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num * 36047c478bd9Sstevel@tonic-gate sizeof (ibt_mr_desc_t), KM_SLEEP); 36057c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_READER); 3606*0a701b1eSRobert Gordon 36077c478bd9Sstevel@tonic-gate if (hca->state != HCA_INITED) { 36087c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36097c478bd9Sstevel@tonic-gate goto fail; 36107c478bd9Sstevel@tonic-gate } 3611*0a701b1eSRobert Gordon 36127c478bd9Sstevel@tonic-gate for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) { 36137c478bd9Sstevel@tonic-gate bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t)); 361411606941Sjwahlig mem_attr.mr_vaddr = (uintptr_t)buf; 36157c478bd9Sstevel@tonic-gate mem_attr.mr_len = (ib_msglen_t)bp->rsize; 36167c478bd9Sstevel@tonic-gate mem_attr.mr_as = NULL; 36177c478bd9Sstevel@tonic-gate ibt_status = ibt_register_mr(hca->hca_hdl, 3618*0a701b1eSRobert Gordon hca->pd_hdl, &mem_attr, 3619*0a701b1eSRobert Gordon &rbp->mr_hdl[i], 36207c478bd9Sstevel@tonic-gate &rbp->mr_desc[i]); 36217c478bd9Sstevel@tonic-gate if (ibt_status != IBT_SUCCESS) { 36227c478bd9Sstevel@tonic-gate for (j = 0; j < i; j++) { 3623*0a701b1eSRobert Gordon (void) ibt_deregister_mr(hca->hca_hdl, 3624*0a701b1eSRobert Gordon rbp->mr_hdl[j]); 36257c478bd9Sstevel@tonic-gate } 36267c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36277c478bd9Sstevel@tonic-gate goto fail; 36287c478bd9Sstevel@tonic-gate } 36297c478bd9Sstevel@tonic-gate } 36307c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 36317c478bd9Sstevel@tonic-gate buf = (caddr_t)bp->buf; 36327c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++, buf += bp->rsize) { 36337c478bd9Sstevel@tonic-gate bp->buflist[i] = (void *)buf; 36347c478bd9Sstevel@tonic-gate } 36357c478bd9Sstevel@tonic-gate bp->buffree = num - 1; /* no. of free buffers */ 36367c478bd9Sstevel@tonic-gate rbp->bpool = bp; 36377c478bd9Sstevel@tonic-gate 36387c478bd9Sstevel@tonic-gate return (rbp); 36397c478bd9Sstevel@tonic-gate fail: 36407c478bd9Sstevel@tonic-gate if (bp) { 36417c478bd9Sstevel@tonic-gate if (bp->buf) 36427c478bd9Sstevel@tonic-gate kmem_free(bp->buf, bp->bufsize); 36437c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *)); 36447c478bd9Sstevel@tonic-gate } 36457c478bd9Sstevel@tonic-gate if (rbp) { 36467c478bd9Sstevel@tonic-gate if (rbp->mr_hdl) 36477c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t)); 36487c478bd9Sstevel@tonic-gate if (rbp->mr_desc) 36497c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t)); 36507c478bd9Sstevel@tonic-gate kmem_free(rbp, sizeof (rib_bufpool_t)); 36517c478bd9Sstevel@tonic-gate } 36527c478bd9Sstevel@tonic-gate return (NULL); 36537c478bd9Sstevel@tonic-gate } 36547c478bd9Sstevel@tonic-gate 36557c478bd9Sstevel@tonic-gate static void 36567c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype) 36577c478bd9Sstevel@tonic-gate { 36587c478bd9Sstevel@tonic-gate int i; 36597c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 36607c478bd9Sstevel@tonic-gate bufpool_t *bp; 36617c478bd9Sstevel@tonic-gate 36627c478bd9Sstevel@tonic-gate /* 36637c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 36647c478bd9Sstevel@tonic-gate */ 36657c478bd9Sstevel@tonic-gate switch (ptype) { 36667c478bd9Sstevel@tonic-gate case SEND_BUFFER: 36677c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 36687c478bd9Sstevel@tonic-gate break; 36697c478bd9Sstevel@tonic-gate case RECV_BUFFER: 36707c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 36717c478bd9Sstevel@tonic-gate break; 36727c478bd9Sstevel@tonic-gate default: 36737c478bd9Sstevel@tonic-gate return; 36747c478bd9Sstevel@tonic-gate } 36757c478bd9Sstevel@tonic-gate if (rbp == NULL) 36767c478bd9Sstevel@tonic-gate return; 36777c478bd9Sstevel@tonic-gate 36787c478bd9Sstevel@tonic-gate bp = rbp->bpool; 36797c478bd9Sstevel@tonic-gate 36807c478bd9Sstevel@tonic-gate /* 36817c478bd9Sstevel@tonic-gate * Deregister the pool memory and free it. 36827c478bd9Sstevel@tonic-gate */ 36837c478bd9Sstevel@tonic-gate for (i = 0; i < bp->numelems; i++) { 36847c478bd9Sstevel@tonic-gate (void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]); 36857c478bd9Sstevel@tonic-gate } 36867c478bd9Sstevel@tonic-gate } 36877c478bd9Sstevel@tonic-gate 36887c478bd9Sstevel@tonic-gate static void 36897c478bd9Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype) 36907c478bd9Sstevel@tonic-gate { 36917c478bd9Sstevel@tonic-gate 36927c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 36937c478bd9Sstevel@tonic-gate bufpool_t *bp; 36947c478bd9Sstevel@tonic-gate 36957c478bd9Sstevel@tonic-gate /* 36967c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 36977c478bd9Sstevel@tonic-gate */ 36987c478bd9Sstevel@tonic-gate switch (ptype) { 36997c478bd9Sstevel@tonic-gate case SEND_BUFFER: 37007c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 37017c478bd9Sstevel@tonic-gate break; 37027c478bd9Sstevel@tonic-gate case RECV_BUFFER: 37037c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 37047c478bd9Sstevel@tonic-gate break; 37057c478bd9Sstevel@tonic-gate default: 37067c478bd9Sstevel@tonic-gate return; 37077c478bd9Sstevel@tonic-gate } 37087c478bd9Sstevel@tonic-gate if (rbp == NULL) 37097c478bd9Sstevel@tonic-gate return; 37107c478bd9Sstevel@tonic-gate 37117c478bd9Sstevel@tonic-gate bp = rbp->bpool; 37127c478bd9Sstevel@tonic-gate 37137c478bd9Sstevel@tonic-gate /* 37147c478bd9Sstevel@tonic-gate * Free the pool memory. 37157c478bd9Sstevel@tonic-gate */ 37167c478bd9Sstevel@tonic-gate if (rbp->mr_hdl) 37177c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t)); 37187c478bd9Sstevel@tonic-gate 37197c478bd9Sstevel@tonic-gate if (rbp->mr_desc) 37207c478bd9Sstevel@tonic-gate kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t)); 37217c478bd9Sstevel@tonic-gate if (bp->buf) 37227c478bd9Sstevel@tonic-gate kmem_free(bp->buf, bp->bufsize); 37237c478bd9Sstevel@tonic-gate mutex_destroy(&bp->buflock); 37247c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *)); 37257c478bd9Sstevel@tonic-gate kmem_free(rbp, sizeof (rib_bufpool_t)); 37267c478bd9Sstevel@tonic-gate } 37277c478bd9Sstevel@tonic-gate 37287c478bd9Sstevel@tonic-gate void 37297c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype) 37307c478bd9Sstevel@tonic-gate { 37317c478bd9Sstevel@tonic-gate /* 37327c478bd9Sstevel@tonic-gate * Deregister the pool memory and free it. 37337c478bd9Sstevel@tonic-gate */ 37347c478bd9Sstevel@tonic-gate rib_rbufpool_deregister(hca, ptype); 37357c478bd9Sstevel@tonic-gate rib_rbufpool_free(hca, ptype); 37367c478bd9Sstevel@tonic-gate } 37377c478bd9Sstevel@tonic-gate 37387c478bd9Sstevel@tonic-gate /* 37397c478bd9Sstevel@tonic-gate * Fetch a buffer from the pool of type specified in rdbuf->type. 37407c478bd9Sstevel@tonic-gate */ 37417c478bd9Sstevel@tonic-gate static rdma_stat 37427c478bd9Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf) 37437c478bd9Sstevel@tonic-gate { 3744*0a701b1eSRobert Gordon rib_lrc_entry_t *rlep; 3745*0a701b1eSRobert Gordon 3746*0a701b1eSRobert Gordon if (rdbuf->type == RDMA_LONG_BUFFER) { 3747*0a701b1eSRobert Gordon rlep = rib_get_cache_buf(conn, rdbuf->len); 3748*0a701b1eSRobert Gordon rdbuf->rb_private = (caddr_t)rlep; 3749*0a701b1eSRobert Gordon rdbuf->addr = rlep->lrc_buf; 3750*0a701b1eSRobert Gordon rdbuf->handle = rlep->lrc_mhandle; 3751*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 3752*0a701b1eSRobert Gordon } 37537c478bd9Sstevel@tonic-gate 37547c478bd9Sstevel@tonic-gate rdbuf->addr = rib_rbuf_alloc(conn, rdbuf); 37557c478bd9Sstevel@tonic-gate if (rdbuf->addr) { 37567c478bd9Sstevel@tonic-gate switch (rdbuf->type) { 37577c478bd9Sstevel@tonic-gate case SEND_BUFFER: 37587c478bd9Sstevel@tonic-gate rdbuf->len = RPC_MSG_SZ; /* 1K */ 37597c478bd9Sstevel@tonic-gate break; 37607c478bd9Sstevel@tonic-gate case RECV_BUFFER: 37617c478bd9Sstevel@tonic-gate rdbuf->len = RPC_BUF_SIZE; /* 2K */ 37627c478bd9Sstevel@tonic-gate break; 37637c478bd9Sstevel@tonic-gate default: 37647c478bd9Sstevel@tonic-gate rdbuf->len = 0; 37657c478bd9Sstevel@tonic-gate } 37667c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 37677c478bd9Sstevel@tonic-gate } else 37687c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 37697c478bd9Sstevel@tonic-gate } 37707c478bd9Sstevel@tonic-gate 3771*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH) 3772*0a701b1eSRobert Gordon static void rib_recv_bufs(uint32_t x) { 3773*0a701b1eSRobert Gordon 3774*0a701b1eSRobert Gordon } 3775*0a701b1eSRobert Gordon 3776*0a701b1eSRobert Gordon static void rib_send_bufs(uint32_t x) { 3777*0a701b1eSRobert Gordon 3778*0a701b1eSRobert Gordon } 3779*0a701b1eSRobert Gordon #endif 37807c478bd9Sstevel@tonic-gate 37817c478bd9Sstevel@tonic-gate /* 37827c478bd9Sstevel@tonic-gate * Fetch a buffer of specified type. 37837c478bd9Sstevel@tonic-gate * Note that rdbuf->handle is mw's rkey. 37847c478bd9Sstevel@tonic-gate */ 37857c478bd9Sstevel@tonic-gate static void * 37867c478bd9Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf) 37877c478bd9Sstevel@tonic-gate { 37887c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 37897c478bd9Sstevel@tonic-gate rib_hca_t *hca = qp->hca; 37907c478bd9Sstevel@tonic-gate rdma_btype ptype = rdbuf->type; 37917c478bd9Sstevel@tonic-gate void *buf; 37927c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 37937c478bd9Sstevel@tonic-gate bufpool_t *bp; 37947c478bd9Sstevel@tonic-gate int i; 37957c478bd9Sstevel@tonic-gate 37967c478bd9Sstevel@tonic-gate /* 37977c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 37987c478bd9Sstevel@tonic-gate */ 37997c478bd9Sstevel@tonic-gate switch (ptype) { 38007c478bd9Sstevel@tonic-gate case SEND_BUFFER: 38017c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 38027c478bd9Sstevel@tonic-gate break; 38037c478bd9Sstevel@tonic-gate case RECV_BUFFER: 38047c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 38057c478bd9Sstevel@tonic-gate break; 38067c478bd9Sstevel@tonic-gate default: 38077c478bd9Sstevel@tonic-gate return (NULL); 38087c478bd9Sstevel@tonic-gate } 38097c478bd9Sstevel@tonic-gate if (rbp == NULL) 38107c478bd9Sstevel@tonic-gate return (NULL); 38117c478bd9Sstevel@tonic-gate 38127c478bd9Sstevel@tonic-gate bp = rbp->bpool; 38137c478bd9Sstevel@tonic-gate 38147c478bd9Sstevel@tonic-gate mutex_enter(&bp->buflock); 38157c478bd9Sstevel@tonic-gate if (bp->buffree < 0) { 38167c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38177c478bd9Sstevel@tonic-gate return (NULL); 38187c478bd9Sstevel@tonic-gate } 38197c478bd9Sstevel@tonic-gate 38207c478bd9Sstevel@tonic-gate /* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */ 38217c478bd9Sstevel@tonic-gate buf = bp->buflist[bp->buffree]; 38227c478bd9Sstevel@tonic-gate rdbuf->addr = buf; 38237c478bd9Sstevel@tonic-gate rdbuf->len = bp->rsize; 38247c478bd9Sstevel@tonic-gate for (i = bp->numelems - 1; i >= 0; i--) { 382511606941Sjwahlig if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) { 3826*0a701b1eSRobert Gordon rdbuf->handle.mrc_rmr = 3827*0a701b1eSRobert Gordon (uint32_t)rbp->mr_desc[i].md_rkey; 3828*0a701b1eSRobert Gordon rdbuf->handle.mrc_linfo = 3829*0a701b1eSRobert Gordon (uintptr_t)rbp->mr_hdl[i]; 3830*0a701b1eSRobert Gordon rdbuf->handle.mrc_lmr = 3831*0a701b1eSRobert Gordon (uint32_t)rbp->mr_desc[i].md_lkey; 3832*0a701b1eSRobert Gordon #if defined(MEASURE_POOL_DEPTH) 3833*0a701b1eSRobert Gordon if (ptype == SEND_BUFFER) 3834*0a701b1eSRobert Gordon rib_send_bufs(MAX_BUFS - (bp->buffree+1)); 3835*0a701b1eSRobert Gordon if (ptype == RECV_BUFFER) 3836*0a701b1eSRobert Gordon rib_recv_bufs(MAX_BUFS - (bp->buffree+1)); 3837*0a701b1eSRobert Gordon #endif 38387c478bd9Sstevel@tonic-gate bp->buffree--; 38397c478bd9Sstevel@tonic-gate 38407c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38417c478bd9Sstevel@tonic-gate 38427c478bd9Sstevel@tonic-gate return (buf); 38437c478bd9Sstevel@tonic-gate } 38447c478bd9Sstevel@tonic-gate } 3845*0a701b1eSRobert Gordon 38467c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38477c478bd9Sstevel@tonic-gate 38487c478bd9Sstevel@tonic-gate return (NULL); 38497c478bd9Sstevel@tonic-gate } 38507c478bd9Sstevel@tonic-gate 38517c478bd9Sstevel@tonic-gate static void 38527c478bd9Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf) 38537c478bd9Sstevel@tonic-gate { 38547c478bd9Sstevel@tonic-gate 3855*0a701b1eSRobert Gordon if (rdbuf->type == RDMA_LONG_BUFFER) { 3856*0a701b1eSRobert Gordon rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private); 3857*0a701b1eSRobert Gordon rdbuf->rb_private = NULL; 3858*0a701b1eSRobert Gordon return; 3859*0a701b1eSRobert Gordon } 38607c478bd9Sstevel@tonic-gate rib_rbuf_free(conn, rdbuf->type, rdbuf->addr); 38617c478bd9Sstevel@tonic-gate } 38627c478bd9Sstevel@tonic-gate 38637c478bd9Sstevel@tonic-gate static void 38647c478bd9Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf) 38657c478bd9Sstevel@tonic-gate { 38667c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 38677c478bd9Sstevel@tonic-gate rib_hca_t *hca = qp->hca; 38687c478bd9Sstevel@tonic-gate rib_bufpool_t *rbp = NULL; 38697c478bd9Sstevel@tonic-gate bufpool_t *bp; 38707c478bd9Sstevel@tonic-gate 38717c478bd9Sstevel@tonic-gate /* 38727c478bd9Sstevel@tonic-gate * Obtain pool address based on type of pool 38737c478bd9Sstevel@tonic-gate */ 38747c478bd9Sstevel@tonic-gate switch (ptype) { 38757c478bd9Sstevel@tonic-gate case SEND_BUFFER: 38767c478bd9Sstevel@tonic-gate rbp = hca->send_pool; 38777c478bd9Sstevel@tonic-gate break; 38787c478bd9Sstevel@tonic-gate case RECV_BUFFER: 38797c478bd9Sstevel@tonic-gate rbp = hca->recv_pool; 38807c478bd9Sstevel@tonic-gate break; 38817c478bd9Sstevel@tonic-gate default: 38827c478bd9Sstevel@tonic-gate return; 38837c478bd9Sstevel@tonic-gate } 38847c478bd9Sstevel@tonic-gate if (rbp == NULL) 38857c478bd9Sstevel@tonic-gate return; 38867c478bd9Sstevel@tonic-gate 38877c478bd9Sstevel@tonic-gate bp = rbp->bpool; 38887c478bd9Sstevel@tonic-gate 38897c478bd9Sstevel@tonic-gate mutex_enter(&bp->buflock); 38907c478bd9Sstevel@tonic-gate if (++bp->buffree >= bp->numelems) { 38917c478bd9Sstevel@tonic-gate /* 38927c478bd9Sstevel@tonic-gate * Should never happen 38937c478bd9Sstevel@tonic-gate */ 38947c478bd9Sstevel@tonic-gate bp->buffree--; 38957c478bd9Sstevel@tonic-gate } else { 38967c478bd9Sstevel@tonic-gate bp->buflist[bp->buffree] = buf; 38977c478bd9Sstevel@tonic-gate } 38987c478bd9Sstevel@tonic-gate mutex_exit(&bp->buflock); 38997c478bd9Sstevel@tonic-gate } 39007c478bd9Sstevel@tonic-gate 39017c478bd9Sstevel@tonic-gate static rdma_stat 39027c478bd9Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist) 39037c478bd9Sstevel@tonic-gate { 39047c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_WRITER); 39057c478bd9Sstevel@tonic-gate if (connlist->conn_hd) { 39067c478bd9Sstevel@tonic-gate cn->c_next = connlist->conn_hd; 39077c478bd9Sstevel@tonic-gate connlist->conn_hd->c_prev = cn; 39087c478bd9Sstevel@tonic-gate } 39097c478bd9Sstevel@tonic-gate connlist->conn_hd = cn; 39107c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 39117c478bd9Sstevel@tonic-gate 39127c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 39137c478bd9Sstevel@tonic-gate } 39147c478bd9Sstevel@tonic-gate 39157c478bd9Sstevel@tonic-gate static rdma_stat 39167c478bd9Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist) 39177c478bd9Sstevel@tonic-gate { 39187c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_WRITER); 39197c478bd9Sstevel@tonic-gate if (cn->c_prev) { 39207c478bd9Sstevel@tonic-gate cn->c_prev->c_next = cn->c_next; 39217c478bd9Sstevel@tonic-gate } 39227c478bd9Sstevel@tonic-gate if (cn->c_next) { 39237c478bd9Sstevel@tonic-gate cn->c_next->c_prev = cn->c_prev; 39247c478bd9Sstevel@tonic-gate } 39257c478bd9Sstevel@tonic-gate if (connlist->conn_hd == cn) 39267c478bd9Sstevel@tonic-gate connlist->conn_hd = cn->c_next; 39277c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 39287c478bd9Sstevel@tonic-gate 39297c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 39307c478bd9Sstevel@tonic-gate } 39317c478bd9Sstevel@tonic-gate 39327c478bd9Sstevel@tonic-gate /* 39337c478bd9Sstevel@tonic-gate * Connection management. 39347c478bd9Sstevel@tonic-gate * IBTF does not support recycling of channels. So connections are only 3935*0a701b1eSRobert Gordon * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or 39367c478bd9Sstevel@tonic-gate * C_DISCONN_PEND state. No C_IDLE state. 39377c478bd9Sstevel@tonic-gate * C_CONN_PEND state: Connection establishment in progress to the server. 39387c478bd9Sstevel@tonic-gate * C_CONNECTED state: A connection when created is in C_CONNECTED state. 39397c478bd9Sstevel@tonic-gate * It has an RC channel associated with it. ibt_post_send/recv are allowed 39407c478bd9Sstevel@tonic-gate * only in this state. 3941*0a701b1eSRobert Gordon * C_ERROR_CONN state: A connection transitions to this state when WRs on the 39427c478bd9Sstevel@tonic-gate * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event 39437c478bd9Sstevel@tonic-gate * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA. 3944*0a701b1eSRobert Gordon * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when 39457c478bd9Sstevel@tonic-gate * c_ref drops to 0 (this indicates that RPC has no more references to this 39467c478bd9Sstevel@tonic-gate * connection), the connection should be destroyed. A connection transitions 39477c478bd9Sstevel@tonic-gate * into this state when it is being destroyed. 39487c478bd9Sstevel@tonic-gate */ 39497c478bd9Sstevel@tonic-gate static rdma_stat 39507c478bd9Sstevel@tonic-gate rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn) 39517c478bd9Sstevel@tonic-gate { 39527c478bd9Sstevel@tonic-gate CONN *cn; 39537c478bd9Sstevel@tonic-gate int status = RDMA_SUCCESS; 39547c478bd9Sstevel@tonic-gate rib_hca_t *hca = (rib_hca_t *)handle; 39557c478bd9Sstevel@tonic-gate rib_qp_t *qp; 39567c478bd9Sstevel@tonic-gate clock_t cv_stat, timout; 39577c478bd9Sstevel@tonic-gate ibt_path_info_t path; 3958*0a701b1eSRobert Gordon ibt_ip_addr_t s_ip, d_ip; 39597c478bd9Sstevel@tonic-gate 39607c478bd9Sstevel@tonic-gate again: 39617c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 39627c478bd9Sstevel@tonic-gate cn = hca->cl_conn_list.conn_hd; 39637c478bd9Sstevel@tonic-gate while (cn != NULL) { 39647c478bd9Sstevel@tonic-gate /* 39657c478bd9Sstevel@tonic-gate * First, clear up any connection in the ERROR state 39667c478bd9Sstevel@tonic-gate */ 39677c478bd9Sstevel@tonic-gate mutex_enter(&cn->c_lock); 3968*0a701b1eSRobert Gordon if (cn->c_state == C_ERROR_CONN) { 39697c478bd9Sstevel@tonic-gate if (cn->c_ref == 0) { 39707c478bd9Sstevel@tonic-gate /* 39717c478bd9Sstevel@tonic-gate * Remove connection from list and destroy it. 39727c478bd9Sstevel@tonic-gate */ 39737c478bd9Sstevel@tonic-gate cn->c_state = C_DISCONN_PEND; 39747c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39757c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 39767c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(cn, 39777c478bd9Sstevel@tonic-gate &hca->cl_conn_list); 39787c478bd9Sstevel@tonic-gate goto again; 39797c478bd9Sstevel@tonic-gate } 39807c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39817c478bd9Sstevel@tonic-gate cn = cn->c_next; 39827c478bd9Sstevel@tonic-gate continue; 3983*0a701b1eSRobert Gordon } 3984*0a701b1eSRobert Gordon if (cn->c_state == C_DISCONN_PEND) { 39857c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39867c478bd9Sstevel@tonic-gate cn = cn->c_next; 39877c478bd9Sstevel@tonic-gate continue; 39887c478bd9Sstevel@tonic-gate } 39897c478bd9Sstevel@tonic-gate if ((cn->c_raddr.len == svcaddr->len) && 39907c478bd9Sstevel@tonic-gate bcmp(svcaddr->buf, cn->c_raddr.buf, svcaddr->len) == 0) { 39917c478bd9Sstevel@tonic-gate /* 39927c478bd9Sstevel@tonic-gate * Our connection. Give up conn list lock 39937c478bd9Sstevel@tonic-gate * as we are done traversing the list. 39947c478bd9Sstevel@tonic-gate */ 39957c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 39967c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONNECTED) { 39977c478bd9Sstevel@tonic-gate cn->c_ref++; /* sharing a conn */ 39987c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 39997c478bd9Sstevel@tonic-gate *conn = cn; 40007c478bd9Sstevel@tonic-gate return (status); 40017c478bd9Sstevel@tonic-gate } 40027c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONN_PEND) { 40037c478bd9Sstevel@tonic-gate /* 40047c478bd9Sstevel@tonic-gate * Hold a reference to this conn before 40057c478bd9Sstevel@tonic-gate * we give up the lock. 40067c478bd9Sstevel@tonic-gate */ 40077c478bd9Sstevel@tonic-gate cn->c_ref++; 40087c478bd9Sstevel@tonic-gate timout = ddi_get_lbolt() + 40097c478bd9Sstevel@tonic-gate drv_usectohz(CONN_WAIT_TIME * 1000000); 40107c478bd9Sstevel@tonic-gate while ((cv_stat = cv_timedwait_sig(&cn->c_cv, 40117c478bd9Sstevel@tonic-gate &cn->c_lock, timout)) > 0 && 40127c478bd9Sstevel@tonic-gate cn->c_state == C_CONN_PEND) 40137c478bd9Sstevel@tonic-gate ; 40147c478bd9Sstevel@tonic-gate if (cv_stat == 0) { 40157c478bd9Sstevel@tonic-gate cn->c_ref--; 40167c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40177c478bd9Sstevel@tonic-gate return (RDMA_INTR); 40187c478bd9Sstevel@tonic-gate } 40197c478bd9Sstevel@tonic-gate if (cv_stat < 0) { 40207c478bd9Sstevel@tonic-gate cn->c_ref--; 40217c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40227c478bd9Sstevel@tonic-gate return (RDMA_TIMEDOUT); 40237c478bd9Sstevel@tonic-gate } 40247c478bd9Sstevel@tonic-gate if (cn->c_state == C_CONNECTED) { 40257c478bd9Sstevel@tonic-gate *conn = cn; 40267c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40277c478bd9Sstevel@tonic-gate return (status); 40287c478bd9Sstevel@tonic-gate } else { 40297c478bd9Sstevel@tonic-gate cn->c_ref--; 40307c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40317c478bd9Sstevel@tonic-gate return (RDMA_TIMEDOUT); 40327c478bd9Sstevel@tonic-gate } 40337c478bd9Sstevel@tonic-gate } 40347c478bd9Sstevel@tonic-gate } 40357c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40367c478bd9Sstevel@tonic-gate cn = cn->c_next; 40377c478bd9Sstevel@tonic-gate } 40387c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 40397c478bd9Sstevel@tonic-gate 4040*0a701b1eSRobert Gordon bzero(&path, sizeof (ibt_path_info_t)); 4041*0a701b1eSRobert Gordon bzero(&s_ip, sizeof (ibt_ip_addr_t)); 4042*0a701b1eSRobert Gordon bzero(&d_ip, sizeof (ibt_ip_addr_t)); 4043*0a701b1eSRobert Gordon 4044*0a701b1eSRobert Gordon status = rib_chk_srv_ibaddr(svcaddr, addr_type, &path, &s_ip, &d_ip); 40457c478bd9Sstevel@tonic-gate if (status != RDMA_SUCCESS) { 40467c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 40477c478bd9Sstevel@tonic-gate } 40487c478bd9Sstevel@tonic-gate 40497c478bd9Sstevel@tonic-gate /* 40507c478bd9Sstevel@tonic-gate * Channel to server doesn't exist yet, create one. 40517c478bd9Sstevel@tonic-gate */ 40527c478bd9Sstevel@tonic-gate if (rib_clnt_create_chan(hca, svcaddr, &qp) != RDMA_SUCCESS) { 40537c478bd9Sstevel@tonic-gate return (RDMA_FAILED); 40547c478bd9Sstevel@tonic-gate } 40557c478bd9Sstevel@tonic-gate cn = qptoc(qp); 40567c478bd9Sstevel@tonic-gate cn->c_state = C_CONN_PEND; 40577c478bd9Sstevel@tonic-gate cn->c_ref = 1; 40587c478bd9Sstevel@tonic-gate 40597c478bd9Sstevel@tonic-gate /* 40607c478bd9Sstevel@tonic-gate * Add to conn list. 40617c478bd9Sstevel@tonic-gate * We had given up the READER lock. In the time since then, 40627c478bd9Sstevel@tonic-gate * another thread might have created the connection we are 40637c478bd9Sstevel@tonic-gate * trying here. But for now, that is quiet alright - there 40647c478bd9Sstevel@tonic-gate * might be two connections between a pair of hosts instead 40657c478bd9Sstevel@tonic-gate * of one. If we really want to close that window, 40667c478bd9Sstevel@tonic-gate * then need to check the list after acquiring the 40677c478bd9Sstevel@tonic-gate * WRITER lock. 40687c478bd9Sstevel@tonic-gate */ 40697c478bd9Sstevel@tonic-gate (void) rib_add_connlist(cn, &hca->cl_conn_list); 4070*0a701b1eSRobert Gordon status = rib_conn_to_srv(hca, qp, &path, &s_ip, &d_ip); 40717c478bd9Sstevel@tonic-gate mutex_enter(&cn->c_lock); 40727c478bd9Sstevel@tonic-gate if (status == RDMA_SUCCESS) { 40737c478bd9Sstevel@tonic-gate cn->c_state = C_CONNECTED; 40747c478bd9Sstevel@tonic-gate *conn = cn; 40757c478bd9Sstevel@tonic-gate } else { 4076*0a701b1eSRobert Gordon cn->c_state = C_ERROR_CONN; 40777c478bd9Sstevel@tonic-gate cn->c_ref--; 40787c478bd9Sstevel@tonic-gate } 40797c478bd9Sstevel@tonic-gate cv_broadcast(&cn->c_cv); 40807c478bd9Sstevel@tonic-gate mutex_exit(&cn->c_lock); 40817c478bd9Sstevel@tonic-gate return (status); 40827c478bd9Sstevel@tonic-gate } 40837c478bd9Sstevel@tonic-gate 40847c478bd9Sstevel@tonic-gate static rdma_stat 40857c478bd9Sstevel@tonic-gate rib_conn_release(CONN *conn) 40867c478bd9Sstevel@tonic-gate { 40877c478bd9Sstevel@tonic-gate rib_qp_t *qp = ctoqp(conn); 40887c478bd9Sstevel@tonic-gate 40897c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 40907c478bd9Sstevel@tonic-gate conn->c_ref--; 40917c478bd9Sstevel@tonic-gate 40927c478bd9Sstevel@tonic-gate /* 4093*0a701b1eSRobert Gordon * If a conn is C_ERROR_CONN, close the channel. 40947c478bd9Sstevel@tonic-gate * If it's CONNECTED, keep it that way. 40957c478bd9Sstevel@tonic-gate */ 4096*0a701b1eSRobert Gordon if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) { 40977c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 40987c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 40997c478bd9Sstevel@tonic-gate if (qp->mode == RIB_SERVER) 41007c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 41017c478bd9Sstevel@tonic-gate &qp->hca->srv_conn_list); 41027c478bd9Sstevel@tonic-gate else 41037c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, 41047c478bd9Sstevel@tonic-gate &qp->hca->cl_conn_list); 41057c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 41067c478bd9Sstevel@tonic-gate } 41077c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 41087c478bd9Sstevel@tonic-gate return (RDMA_SUCCESS); 41097c478bd9Sstevel@tonic-gate } 41107c478bd9Sstevel@tonic-gate 41117c478bd9Sstevel@tonic-gate /* 41127c478bd9Sstevel@tonic-gate * Add at front of list 41137c478bd9Sstevel@tonic-gate */ 41147c478bd9Sstevel@tonic-gate static struct rdma_done_list * 41157c478bd9Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid) 41167c478bd9Sstevel@tonic-gate { 41177c478bd9Sstevel@tonic-gate struct rdma_done_list *rd; 41187c478bd9Sstevel@tonic-gate 41197c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 41207c478bd9Sstevel@tonic-gate 41217c478bd9Sstevel@tonic-gate rd = kmem_alloc(sizeof (*rd), KM_SLEEP); 41227c478bd9Sstevel@tonic-gate rd->xid = xid; 41237c478bd9Sstevel@tonic-gate cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL); 41247c478bd9Sstevel@tonic-gate 41257c478bd9Sstevel@tonic-gate rd->prev = NULL; 41267c478bd9Sstevel@tonic-gate rd->next = qp->rdlist; 41277c478bd9Sstevel@tonic-gate if (qp->rdlist != NULL) 41287c478bd9Sstevel@tonic-gate qp->rdlist->prev = rd; 41297c478bd9Sstevel@tonic-gate qp->rdlist = rd; 41307c478bd9Sstevel@tonic-gate 41317c478bd9Sstevel@tonic-gate return (rd); 41327c478bd9Sstevel@tonic-gate } 41337c478bd9Sstevel@tonic-gate 41347c478bd9Sstevel@tonic-gate static void 41357c478bd9Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd) 41367c478bd9Sstevel@tonic-gate { 41377c478bd9Sstevel@tonic-gate struct rdma_done_list *r; 41387c478bd9Sstevel@tonic-gate 41397c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 41407c478bd9Sstevel@tonic-gate 41417c478bd9Sstevel@tonic-gate r = rd->next; 41427c478bd9Sstevel@tonic-gate if (r != NULL) { 41437c478bd9Sstevel@tonic-gate r->prev = rd->prev; 41447c478bd9Sstevel@tonic-gate } 41457c478bd9Sstevel@tonic-gate 41467c478bd9Sstevel@tonic-gate r = rd->prev; 41477c478bd9Sstevel@tonic-gate if (r != NULL) { 41487c478bd9Sstevel@tonic-gate r->next = rd->next; 41497c478bd9Sstevel@tonic-gate } else { 41507c478bd9Sstevel@tonic-gate qp->rdlist = rd->next; 41517c478bd9Sstevel@tonic-gate } 41527c478bd9Sstevel@tonic-gate 41537c478bd9Sstevel@tonic-gate cv_destroy(&rd->rdma_done_cv); 41547c478bd9Sstevel@tonic-gate kmem_free(rd, sizeof (*rd)); 41557c478bd9Sstevel@tonic-gate } 41567c478bd9Sstevel@tonic-gate 41577c478bd9Sstevel@tonic-gate static void 41587c478bd9Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp) 41597c478bd9Sstevel@tonic-gate { 41607c478bd9Sstevel@tonic-gate struct rdma_done_list *r, *n; 41617c478bd9Sstevel@tonic-gate 41627c478bd9Sstevel@tonic-gate mutex_enter(&qp->rdlist_lock); 41637c478bd9Sstevel@tonic-gate for (r = qp->rdlist; r != NULL; r = n) { 41647c478bd9Sstevel@tonic-gate n = r->next; 41657c478bd9Sstevel@tonic-gate rdma_done_rm(qp, r); 41667c478bd9Sstevel@tonic-gate } 41677c478bd9Sstevel@tonic-gate mutex_exit(&qp->rdlist_lock); 41687c478bd9Sstevel@tonic-gate } 41697c478bd9Sstevel@tonic-gate 41707c478bd9Sstevel@tonic-gate static void 41717c478bd9Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid) 41727c478bd9Sstevel@tonic-gate { 41737c478bd9Sstevel@tonic-gate struct rdma_done_list *r = qp->rdlist; 41747c478bd9Sstevel@tonic-gate 41757c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&qp->rdlist_lock)); 41767c478bd9Sstevel@tonic-gate 41777c478bd9Sstevel@tonic-gate while (r) { 41787c478bd9Sstevel@tonic-gate if (r->xid == xid) { 41797c478bd9Sstevel@tonic-gate cv_signal(&r->rdma_done_cv); 41807c478bd9Sstevel@tonic-gate return; 41817c478bd9Sstevel@tonic-gate } else { 41827c478bd9Sstevel@tonic-gate r = r->next; 41837c478bd9Sstevel@tonic-gate } 41847c478bd9Sstevel@tonic-gate } 4185*0a701b1eSRobert Gordon DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid, 4186*0a701b1eSRobert Gordon int, xid); 41877c478bd9Sstevel@tonic-gate } 41887c478bd9Sstevel@tonic-gate 41897c478bd9Sstevel@tonic-gate 41907c478bd9Sstevel@tonic-gate /* 41917c478bd9Sstevel@tonic-gate * Goes through all connections and closes the channel 41927c478bd9Sstevel@tonic-gate * This will cause all the WRs on those channels to be 41937c478bd9Sstevel@tonic-gate * flushed. 41947c478bd9Sstevel@tonic-gate */ 41957c478bd9Sstevel@tonic-gate static void 41967c478bd9Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist) 41977c478bd9Sstevel@tonic-gate { 41987c478bd9Sstevel@tonic-gate CONN *conn; 41997c478bd9Sstevel@tonic-gate rib_qp_t *qp; 42007c478bd9Sstevel@tonic-gate 42017c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_READER); 42027c478bd9Sstevel@tonic-gate conn = connlist->conn_hd; 42037c478bd9Sstevel@tonic-gate while (conn != NULL) { 42047c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 42057c478bd9Sstevel@tonic-gate qp = ctoqp(conn); 4206*0a701b1eSRobert Gordon if (conn->c_state == C_CONNECTED) { 42077c478bd9Sstevel@tonic-gate /* 42087c478bd9Sstevel@tonic-gate * Live connection in CONNECTED state. 42097c478bd9Sstevel@tonic-gate * Call ibt_close_rc_channel in nonblocking mode 42107c478bd9Sstevel@tonic-gate * with no callbacks. 42117c478bd9Sstevel@tonic-gate */ 4212*0a701b1eSRobert Gordon conn->c_state = C_ERROR_CONN; 42137c478bd9Sstevel@tonic-gate (void) ibt_close_rc_channel(qp->qp_hdl, 42147c478bd9Sstevel@tonic-gate IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 0); 42157c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 42167c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 42177c478bd9Sstevel@tonic-gate } else { 4218*0a701b1eSRobert Gordon if (conn->c_state == C_ERROR_CONN && 42197c478bd9Sstevel@tonic-gate qp->qp_hdl != NULL) { 42207c478bd9Sstevel@tonic-gate /* 42217c478bd9Sstevel@tonic-gate * Connection in ERROR state but 42227c478bd9Sstevel@tonic-gate * channel is not yet freed. 42237c478bd9Sstevel@tonic-gate */ 42247c478bd9Sstevel@tonic-gate (void) ibt_close_rc_channel(qp->qp_hdl, 42257c478bd9Sstevel@tonic-gate IBT_NOCALLBACKS, NULL, 0, NULL, 42267c478bd9Sstevel@tonic-gate NULL, 0); 42277c478bd9Sstevel@tonic-gate (void) ibt_free_channel(qp->qp_hdl); 42287c478bd9Sstevel@tonic-gate qp->qp_hdl = NULL; 42297c478bd9Sstevel@tonic-gate } 42307c478bd9Sstevel@tonic-gate } 42317c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 42327c478bd9Sstevel@tonic-gate conn = conn->c_next; 42337c478bd9Sstevel@tonic-gate } 42347c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 42357c478bd9Sstevel@tonic-gate } 42367c478bd9Sstevel@tonic-gate 42377c478bd9Sstevel@tonic-gate /* 42387c478bd9Sstevel@tonic-gate * Frees up all connections that are no longer being referenced 42397c478bd9Sstevel@tonic-gate */ 42407c478bd9Sstevel@tonic-gate static void 42417c478bd9Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist) 42427c478bd9Sstevel@tonic-gate { 42437c478bd9Sstevel@tonic-gate CONN *conn; 42447c478bd9Sstevel@tonic-gate 42457c478bd9Sstevel@tonic-gate top: 42467c478bd9Sstevel@tonic-gate rw_enter(&connlist->conn_lock, RW_READER); 42477c478bd9Sstevel@tonic-gate conn = connlist->conn_hd; 42487c478bd9Sstevel@tonic-gate while (conn != NULL) { 42497c478bd9Sstevel@tonic-gate mutex_enter(&conn->c_lock); 42507c478bd9Sstevel@tonic-gate 42517c478bd9Sstevel@tonic-gate /* 42527c478bd9Sstevel@tonic-gate * At this point connection is either in ERROR 42537c478bd9Sstevel@tonic-gate * or DISCONN_PEND state. If in DISCONN_PEND state 42547c478bd9Sstevel@tonic-gate * then some other thread is culling that connection. 42557c478bd9Sstevel@tonic-gate * If not and if c_ref is 0, then destroy the connection. 42567c478bd9Sstevel@tonic-gate */ 42577c478bd9Sstevel@tonic-gate if (conn->c_ref == 0 && 42587c478bd9Sstevel@tonic-gate conn->c_state != C_DISCONN_PEND) { 42597c478bd9Sstevel@tonic-gate /* 42607c478bd9Sstevel@tonic-gate * Cull the connection 42617c478bd9Sstevel@tonic-gate */ 42627c478bd9Sstevel@tonic-gate conn->c_state = C_DISCONN_PEND; 42637c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 42647c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 42657c478bd9Sstevel@tonic-gate (void) rib_disconnect_channel(conn, connlist); 42667c478bd9Sstevel@tonic-gate goto top; 42677c478bd9Sstevel@tonic-gate } else { 42687c478bd9Sstevel@tonic-gate /* 42697c478bd9Sstevel@tonic-gate * conn disconnect already scheduled or will 42707c478bd9Sstevel@tonic-gate * happen from conn_release when c_ref drops to 0. 42717c478bd9Sstevel@tonic-gate */ 42727c478bd9Sstevel@tonic-gate mutex_exit(&conn->c_lock); 42737c478bd9Sstevel@tonic-gate } 42747c478bd9Sstevel@tonic-gate conn = conn->c_next; 42757c478bd9Sstevel@tonic-gate } 42767c478bd9Sstevel@tonic-gate rw_exit(&connlist->conn_lock); 42777c478bd9Sstevel@tonic-gate 42787c478bd9Sstevel@tonic-gate /* 42797c478bd9Sstevel@tonic-gate * At this point, only connections with c_ref != 0 are on the list 42807c478bd9Sstevel@tonic-gate */ 42817c478bd9Sstevel@tonic-gate } 42827c478bd9Sstevel@tonic-gate 42837c478bd9Sstevel@tonic-gate /* 42847c478bd9Sstevel@tonic-gate * Cleans and closes up all uses of the HCA 42857c478bd9Sstevel@tonic-gate */ 42867c478bd9Sstevel@tonic-gate static void 42877c478bd9Sstevel@tonic-gate rib_detach_hca(rib_hca_t *hca) 42887c478bd9Sstevel@tonic-gate { 42897c478bd9Sstevel@tonic-gate 42907c478bd9Sstevel@tonic-gate /* 42917c478bd9Sstevel@tonic-gate * Stop all services on the HCA 42927c478bd9Sstevel@tonic-gate * Go through cl_conn_list and close all rc_channels 42937c478bd9Sstevel@tonic-gate * Go through svr_conn_list and close all rc_channels 42947c478bd9Sstevel@tonic-gate * Free connections whose c_ref has dropped to 0 42957c478bd9Sstevel@tonic-gate * Destroy all CQs 42967c478bd9Sstevel@tonic-gate * Deregister and released all buffer pool memory after all 42977c478bd9Sstevel@tonic-gate * connections are destroyed 42987c478bd9Sstevel@tonic-gate * Free the protection domain 42997c478bd9Sstevel@tonic-gate * ibt_close_hca() 43007c478bd9Sstevel@tonic-gate */ 43017c478bd9Sstevel@tonic-gate rw_enter(&hca->state_lock, RW_WRITER); 43027c478bd9Sstevel@tonic-gate if (hca->state == HCA_DETACHED) { 43037c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 43047c478bd9Sstevel@tonic-gate return; 43057c478bd9Sstevel@tonic-gate } 43067c478bd9Sstevel@tonic-gate 43077c478bd9Sstevel@tonic-gate hca->state = HCA_DETACHED; 43087c478bd9Sstevel@tonic-gate rib_stat->nhca_inited--; 43097c478bd9Sstevel@tonic-gate 43107c478bd9Sstevel@tonic-gate rib_stop_services(hca); 43117c478bd9Sstevel@tonic-gate rib_close_channels(&hca->cl_conn_list); 43127c478bd9Sstevel@tonic-gate rib_close_channels(&hca->srv_conn_list); 43137c478bd9Sstevel@tonic-gate rw_exit(&hca->state_lock); 43147c478bd9Sstevel@tonic-gate 43157c478bd9Sstevel@tonic-gate rib_purge_connlist(&hca->cl_conn_list); 43167c478bd9Sstevel@tonic-gate rib_purge_connlist(&hca->srv_conn_list); 43177c478bd9Sstevel@tonic-gate 43187c478bd9Sstevel@tonic-gate (void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl); 43197c478bd9Sstevel@tonic-gate (void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl); 43207c478bd9Sstevel@tonic-gate (void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl); 43217c478bd9Sstevel@tonic-gate (void) ibt_free_cq(hca->svc_scq->rib_cq_hdl); 43227c478bd9Sstevel@tonic-gate kmem_free(hca->clnt_rcq, sizeof (rib_cq_t)); 43237c478bd9Sstevel@tonic-gate kmem_free(hca->clnt_scq, sizeof (rib_cq_t)); 43247c478bd9Sstevel@tonic-gate kmem_free(hca->svc_rcq, sizeof (rib_cq_t)); 43257c478bd9Sstevel@tonic-gate kmem_free(hca->svc_scq, sizeof (rib_cq_t)); 43267c478bd9Sstevel@tonic-gate 43277c478bd9Sstevel@tonic-gate rw_enter(&hca->srv_conn_list.conn_lock, RW_READER); 43287c478bd9Sstevel@tonic-gate rw_enter(&hca->cl_conn_list.conn_lock, RW_READER); 43297c478bd9Sstevel@tonic-gate if (hca->srv_conn_list.conn_hd == NULL && 43307c478bd9Sstevel@tonic-gate hca->cl_conn_list.conn_hd == NULL) { 43317c478bd9Sstevel@tonic-gate /* 43327c478bd9Sstevel@tonic-gate * conn_lists are NULL, so destroy 43337c478bd9Sstevel@tonic-gate * buffers, close hca and be done. 43347c478bd9Sstevel@tonic-gate */ 43357c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, RECV_BUFFER); 43367c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, SEND_BUFFER); 4337*0a701b1eSRobert Gordon rib_destroy_cache(hca); 43387c478bd9Sstevel@tonic-gate (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 43397c478bd9Sstevel@tonic-gate (void) ibt_close_hca(hca->hca_hdl); 43407c478bd9Sstevel@tonic-gate hca->hca_hdl = NULL; 43417c478bd9Sstevel@tonic-gate } 43427c478bd9Sstevel@tonic-gate rw_exit(&hca->cl_conn_list.conn_lock); 43437c478bd9Sstevel@tonic-gate rw_exit(&hca->srv_conn_list.conn_lock); 43447c478bd9Sstevel@tonic-gate 43457c478bd9Sstevel@tonic-gate if (hca->hca_hdl != NULL) { 43467c478bd9Sstevel@tonic-gate mutex_enter(&hca->inuse_lock); 43477c478bd9Sstevel@tonic-gate while (hca->inuse) 43487c478bd9Sstevel@tonic-gate cv_wait(&hca->cb_cv, &hca->inuse_lock); 43497c478bd9Sstevel@tonic-gate mutex_exit(&hca->inuse_lock); 43507c478bd9Sstevel@tonic-gate /* 43517c478bd9Sstevel@tonic-gate * conn_lists are now NULL, so destroy 43527c478bd9Sstevel@tonic-gate * buffers, close hca and be done. 43537c478bd9Sstevel@tonic-gate */ 43547c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, RECV_BUFFER); 43557c478bd9Sstevel@tonic-gate rib_rbufpool_destroy(hca, SEND_BUFFER); 43567c478bd9Sstevel@tonic-gate (void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl); 43577c478bd9Sstevel@tonic-gate (void) ibt_close_hca(hca->hca_hdl); 43587c478bd9Sstevel@tonic-gate hca->hca_hdl = NULL; 43597c478bd9Sstevel@tonic-gate } 43607c478bd9Sstevel@tonic-gate } 4361*0a701b1eSRobert Gordon 4362*0a701b1eSRobert Gordon static void 4363*0a701b1eSRobert Gordon rib_server_side_cache_reclaim(void *argp) 4364*0a701b1eSRobert Gordon { 4365*0a701b1eSRobert Gordon cache_avl_struct_t *rcas; 4366*0a701b1eSRobert Gordon rib_lrc_entry_t *rb; 4367*0a701b1eSRobert Gordon rib_hca_t *hca = (rib_hca_t *)argp; 4368*0a701b1eSRobert Gordon 4369*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 4370*0a701b1eSRobert Gordon rcas = avl_first(&hca->avl_tree); 4371*0a701b1eSRobert Gordon if (rcas != NULL) 4372*0a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 4373*0a701b1eSRobert Gordon 4374*0a701b1eSRobert Gordon while (rcas != NULL) { 4375*0a701b1eSRobert Gordon while (rcas->r.forw != &rcas->r) { 4376*0a701b1eSRobert Gordon rcas->elements--; 4377*0a701b1eSRobert Gordon rib_total_buffers --; 4378*0a701b1eSRobert Gordon rb = rcas->r.forw; 4379*0a701b1eSRobert Gordon remque(rb); 4380*0a701b1eSRobert Gordon if (rb->registered) 4381*0a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 4382*0a701b1eSRobert Gordon rb->lrc_buf, rb->lrc_mhandle); 4383*0a701b1eSRobert Gordon cache_allocation -= rb->lrc_len; 4384*0a701b1eSRobert Gordon kmem_free(rb->lrc_buf, rb->lrc_len); 4385*0a701b1eSRobert Gordon kmem_free(rb, sizeof (rib_lrc_entry_t)); 4386*0a701b1eSRobert Gordon } 4387*0a701b1eSRobert Gordon mutex_destroy(&rcas->node_lock); 4388*0a701b1eSRobert Gordon kmem_cache_free(hca->server_side_cache, rcas); 4389*0a701b1eSRobert Gordon rcas = avl_first(&hca->avl_tree); 4390*0a701b1eSRobert Gordon if (rcas != NULL) 4391*0a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 4392*0a701b1eSRobert Gordon } 4393*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4394*0a701b1eSRobert Gordon } 4395*0a701b1eSRobert Gordon 4396*0a701b1eSRobert Gordon static void 4397*0a701b1eSRobert Gordon rib_server_side_cache_cleanup(void *argp) 4398*0a701b1eSRobert Gordon { 4399*0a701b1eSRobert Gordon cache_avl_struct_t *rcas; 4400*0a701b1eSRobert Gordon rib_lrc_entry_t *rb; 4401*0a701b1eSRobert Gordon rib_hca_t *hca = (rib_hca_t *)argp; 4402*0a701b1eSRobert Gordon 4403*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 4404*0a701b1eSRobert Gordon if (cache_allocation < cache_limit) { 4405*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4406*0a701b1eSRobert Gordon return; 4407*0a701b1eSRobert Gordon } 4408*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4409*0a701b1eSRobert Gordon 4410*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 4411*0a701b1eSRobert Gordon rcas = avl_last(&hca->avl_tree); 4412*0a701b1eSRobert Gordon if (rcas != NULL) 4413*0a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 4414*0a701b1eSRobert Gordon 4415*0a701b1eSRobert Gordon while (rcas != NULL) { 4416*0a701b1eSRobert Gordon while (rcas->r.forw != &rcas->r) { 4417*0a701b1eSRobert Gordon rcas->elements--; 4418*0a701b1eSRobert Gordon rib_total_buffers --; 4419*0a701b1eSRobert Gordon rb = rcas->r.forw; 4420*0a701b1eSRobert Gordon remque(rb); 4421*0a701b1eSRobert Gordon if (rb->registered) 4422*0a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 4423*0a701b1eSRobert Gordon rb->lrc_buf, rb->lrc_mhandle); 4424*0a701b1eSRobert Gordon cache_allocation -= rb->lrc_len; 4425*0a701b1eSRobert Gordon kmem_free(rb->lrc_buf, rb->lrc_len); 4426*0a701b1eSRobert Gordon kmem_free(rb, sizeof (rib_lrc_entry_t)); 4427*0a701b1eSRobert Gordon } 4428*0a701b1eSRobert Gordon mutex_destroy(&rcas->node_lock); 4429*0a701b1eSRobert Gordon kmem_cache_free(hca->server_side_cache, rcas); 4430*0a701b1eSRobert Gordon if ((cache_allocation) < cache_limit) { 4431*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4432*0a701b1eSRobert Gordon return; 4433*0a701b1eSRobert Gordon } 4434*0a701b1eSRobert Gordon 4435*0a701b1eSRobert Gordon rcas = avl_last(&hca->avl_tree); 4436*0a701b1eSRobert Gordon if (rcas != NULL) 4437*0a701b1eSRobert Gordon avl_remove(&hca->avl_tree, rcas); 4438*0a701b1eSRobert Gordon } 4439*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4440*0a701b1eSRobert Gordon } 4441*0a701b1eSRobert Gordon 4442*0a701b1eSRobert Gordon static int 4443*0a701b1eSRobert Gordon avl_compare(const void *t1, const void *t2) 4444*0a701b1eSRobert Gordon { 4445*0a701b1eSRobert Gordon if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len) 4446*0a701b1eSRobert Gordon return (0); 4447*0a701b1eSRobert Gordon 4448*0a701b1eSRobert Gordon if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len) 4449*0a701b1eSRobert Gordon return (-1); 4450*0a701b1eSRobert Gordon 4451*0a701b1eSRobert Gordon return (1); 4452*0a701b1eSRobert Gordon } 4453*0a701b1eSRobert Gordon 4454*0a701b1eSRobert Gordon static void 4455*0a701b1eSRobert Gordon rib_destroy_cache(rib_hca_t *hca) 4456*0a701b1eSRobert Gordon { 4457*0a701b1eSRobert Gordon if (hca->reg_cache_clean_up != NULL) { 4458*0a701b1eSRobert Gordon ddi_taskq_destroy(hca->reg_cache_clean_up); 4459*0a701b1eSRobert Gordon hca->reg_cache_clean_up = NULL; 4460*0a701b1eSRobert Gordon } 4461*0a701b1eSRobert Gordon if (!hca->avl_init) { 4462*0a701b1eSRobert Gordon kmem_cache_destroy(hca->server_side_cache); 4463*0a701b1eSRobert Gordon avl_destroy(&hca->avl_tree); 4464*0a701b1eSRobert Gordon mutex_destroy(&hca->cache_allocation); 4465*0a701b1eSRobert Gordon rw_destroy(&hca->avl_rw_lock); 4466*0a701b1eSRobert Gordon } 4467*0a701b1eSRobert Gordon hca->avl_init = FALSE; 4468*0a701b1eSRobert Gordon } 4469*0a701b1eSRobert Gordon 4470*0a701b1eSRobert Gordon static void 4471*0a701b1eSRobert Gordon rib_force_cleanup(void *hca) 4472*0a701b1eSRobert Gordon { 4473*0a701b1eSRobert Gordon if (((rib_hca_t *)hca)->reg_cache_clean_up != NULL) 4474*0a701b1eSRobert Gordon (void) ddi_taskq_dispatch( 4475*0a701b1eSRobert Gordon ((rib_hca_t *)hca)->reg_cache_clean_up, 4476*0a701b1eSRobert Gordon rib_server_side_cache_cleanup, 4477*0a701b1eSRobert Gordon (void *)hca, DDI_NOSLEEP); 4478*0a701b1eSRobert Gordon } 4479*0a701b1eSRobert Gordon 4480*0a701b1eSRobert Gordon static rib_lrc_entry_t * 4481*0a701b1eSRobert Gordon rib_get_cache_buf(CONN *conn, uint32_t len) 4482*0a701b1eSRobert Gordon { 4483*0a701b1eSRobert Gordon cache_avl_struct_t cas, *rcas; 4484*0a701b1eSRobert Gordon rib_hca_t *hca = (ctoqp(conn))->hca; 4485*0a701b1eSRobert Gordon rib_lrc_entry_t *reply_buf; 4486*0a701b1eSRobert Gordon avl_index_t where = NULL; 4487*0a701b1eSRobert Gordon uint64_t c_alloc = 0; 4488*0a701b1eSRobert Gordon 4489*0a701b1eSRobert Gordon if (!hca->avl_init) 4490*0a701b1eSRobert Gordon goto error_alloc; 4491*0a701b1eSRobert Gordon 4492*0a701b1eSRobert Gordon cas.len = len; 4493*0a701b1eSRobert Gordon 4494*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 4495*0a701b1eSRobert Gordon 4496*0a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 4497*0a701b1eSRobert Gordon c_alloc = cache_allocation; 4498*0a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 4499*0a701b1eSRobert Gordon 4500*0a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas, 4501*0a701b1eSRobert Gordon &where)) == NULL) { 4502*0a701b1eSRobert Gordon /* Am I above the cache limit */ 4503*0a701b1eSRobert Gordon if ((c_alloc + len) >= cache_limit) { 4504*0a701b1eSRobert Gordon rib_force_cleanup((void *)hca); 4505*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4506*0a701b1eSRobert Gordon cache_misses_above_the_limit ++; 4507*0a701b1eSRobert Gordon 4508*0a701b1eSRobert Gordon /* Allocate and register the buffer directly */ 4509*0a701b1eSRobert Gordon goto error_alloc; 4510*0a701b1eSRobert Gordon } 4511*0a701b1eSRobert Gordon 4512*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4513*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_WRITER); 4514*0a701b1eSRobert Gordon 4515*0a701b1eSRobert Gordon /* Recheck to make sure no other thread added the entry in */ 4516*0a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, 4517*0a701b1eSRobert Gordon &cas, &where)) == NULL) { 4518*0a701b1eSRobert Gordon /* Allocate an avl tree entry */ 4519*0a701b1eSRobert Gordon rcas = (cache_avl_struct_t *) 4520*0a701b1eSRobert Gordon kmem_cache_alloc(hca->server_side_cache, KM_SLEEP); 4521*0a701b1eSRobert Gordon 4522*0a701b1eSRobert Gordon bzero(rcas, sizeof (cache_avl_struct_t)); 4523*0a701b1eSRobert Gordon rcas->elements = 0; 4524*0a701b1eSRobert Gordon rcas->r.forw = &rcas->r; 4525*0a701b1eSRobert Gordon rcas->r.back = &rcas->r; 4526*0a701b1eSRobert Gordon rcas->len = len; 4527*0a701b1eSRobert Gordon mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL); 4528*0a701b1eSRobert Gordon avl_insert(&hca->avl_tree, rcas, where); 4529*0a701b1eSRobert Gordon } 4530*0a701b1eSRobert Gordon } 4531*0a701b1eSRobert Gordon 4532*0a701b1eSRobert Gordon mutex_enter(&rcas->node_lock); 4533*0a701b1eSRobert Gordon 4534*0a701b1eSRobert Gordon if (rcas->r.forw != &rcas->r && rcas->elements > 0) { 4535*0a701b1eSRobert Gordon rib_total_buffers--; 4536*0a701b1eSRobert Gordon cache_hits++; 4537*0a701b1eSRobert Gordon reply_buf = rcas->r.forw; 4538*0a701b1eSRobert Gordon remque(reply_buf); 4539*0a701b1eSRobert Gordon rcas->elements--; 4540*0a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 4541*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4542*0a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 4543*0a701b1eSRobert Gordon cache_allocation -= len; 4544*0a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 4545*0a701b1eSRobert Gordon } else { 4546*0a701b1eSRobert Gordon /* Am I above the cache limit */ 4547*0a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 4548*0a701b1eSRobert Gordon if ((c_alloc + len) >= cache_limit) { 4549*0a701b1eSRobert Gordon rib_force_cleanup((void *)hca); 4550*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4551*0a701b1eSRobert Gordon cache_misses_above_the_limit ++; 4552*0a701b1eSRobert Gordon /* Allocate and register the buffer directly */ 4553*0a701b1eSRobert Gordon goto error_alloc; 4554*0a701b1eSRobert Gordon } 4555*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4556*0a701b1eSRobert Gordon cache_misses ++; 4557*0a701b1eSRobert Gordon /* Allocate a reply_buf entry */ 4558*0a701b1eSRobert Gordon reply_buf = (rib_lrc_entry_t *) 4559*0a701b1eSRobert Gordon kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP); 4560*0a701b1eSRobert Gordon bzero(reply_buf, sizeof (rib_lrc_entry_t)); 4561*0a701b1eSRobert Gordon reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 4562*0a701b1eSRobert Gordon reply_buf->lrc_len = len; 4563*0a701b1eSRobert Gordon reply_buf->registered = FALSE; 4564*0a701b1eSRobert Gordon reply_buf->avl_node = (void *)rcas; 4565*0a701b1eSRobert Gordon } 4566*0a701b1eSRobert Gordon 4567*0a701b1eSRobert Gordon return (reply_buf); 4568*0a701b1eSRobert Gordon 4569*0a701b1eSRobert Gordon error_alloc: 4570*0a701b1eSRobert Gordon reply_buf = (rib_lrc_entry_t *) 4571*0a701b1eSRobert Gordon kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP); 4572*0a701b1eSRobert Gordon bzero(reply_buf, sizeof (rib_lrc_entry_t)); 4573*0a701b1eSRobert Gordon reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP); 4574*0a701b1eSRobert Gordon reply_buf->lrc_len = len; 4575*0a701b1eSRobert Gordon reply_buf->registered = FALSE; 4576*0a701b1eSRobert Gordon reply_buf->avl_node = NULL; 4577*0a701b1eSRobert Gordon 4578*0a701b1eSRobert Gordon return (reply_buf); 4579*0a701b1eSRobert Gordon } 4580*0a701b1eSRobert Gordon 4581*0a701b1eSRobert Gordon /* 4582*0a701b1eSRobert Gordon * Return a pre-registered back to the cache (without 4583*0a701b1eSRobert Gordon * unregistering the buffer).. 4584*0a701b1eSRobert Gordon */ 4585*0a701b1eSRobert Gordon 4586*0a701b1eSRobert Gordon static void 4587*0a701b1eSRobert Gordon rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf) 4588*0a701b1eSRobert Gordon { 4589*0a701b1eSRobert Gordon cache_avl_struct_t cas, *rcas; 4590*0a701b1eSRobert Gordon avl_index_t where = NULL; 4591*0a701b1eSRobert Gordon rib_hca_t *hca = (ctoqp(conn))->hca; 4592*0a701b1eSRobert Gordon 4593*0a701b1eSRobert Gordon if (!hca->avl_init) 4594*0a701b1eSRobert Gordon goto error_free; 4595*0a701b1eSRobert Gordon 4596*0a701b1eSRobert Gordon cas.len = reg_buf->lrc_len; 4597*0a701b1eSRobert Gordon rw_enter(&hca->avl_rw_lock, RW_READER); 4598*0a701b1eSRobert Gordon if ((rcas = (cache_avl_struct_t *) 4599*0a701b1eSRobert Gordon avl_find(&hca->avl_tree, &cas, &where)) == NULL) { 4600*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4601*0a701b1eSRobert Gordon goto error_free; 4602*0a701b1eSRobert Gordon } else { 4603*0a701b1eSRobert Gordon rib_total_buffers ++; 4604*0a701b1eSRobert Gordon cas.len = reg_buf->lrc_len; 4605*0a701b1eSRobert Gordon mutex_enter(&rcas->node_lock); 4606*0a701b1eSRobert Gordon insque(reg_buf, &rcas->r); 4607*0a701b1eSRobert Gordon rcas->elements ++; 4608*0a701b1eSRobert Gordon mutex_exit(&rcas->node_lock); 4609*0a701b1eSRobert Gordon rw_exit(&hca->avl_rw_lock); 4610*0a701b1eSRobert Gordon mutex_enter(&hca->cache_allocation); 4611*0a701b1eSRobert Gordon cache_allocation += cas.len; 4612*0a701b1eSRobert Gordon mutex_exit(&hca->cache_allocation); 4613*0a701b1eSRobert Gordon } 4614*0a701b1eSRobert Gordon 4615*0a701b1eSRobert Gordon return; 4616*0a701b1eSRobert Gordon 4617*0a701b1eSRobert Gordon error_free: 4618*0a701b1eSRobert Gordon 4619*0a701b1eSRobert Gordon if (reg_buf->registered) 4620*0a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, 4621*0a701b1eSRobert Gordon reg_buf->lrc_buf, reg_buf->lrc_mhandle); 4622*0a701b1eSRobert Gordon kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len); 4623*0a701b1eSRobert Gordon kmem_free(reg_buf, sizeof (rib_lrc_entry_t)); 4624*0a701b1eSRobert Gordon } 4625*0a701b1eSRobert Gordon 4626*0a701b1eSRobert Gordon static rdma_stat 4627*0a701b1eSRobert Gordon rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf, 4628*0a701b1eSRobert Gordon uint_t buflen, struct mrc *buf_handle) 4629*0a701b1eSRobert Gordon { 4630*0a701b1eSRobert Gordon ibt_mr_hdl_t mr_hdl = NULL; /* memory region handle */ 4631*0a701b1eSRobert Gordon ibt_mr_desc_t mr_desc; /* vaddr, lkey, rkey */ 4632*0a701b1eSRobert Gordon rdma_stat status; 4633*0a701b1eSRobert Gordon 4634*0a701b1eSRobert Gordon 4635*0a701b1eSRobert Gordon /* 4636*0a701b1eSRobert Gordon * Note: ALL buffer pools use the same memory type RDMARW. 4637*0a701b1eSRobert Gordon */ 4638*0a701b1eSRobert Gordon status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc); 4639*0a701b1eSRobert Gordon if (status == RDMA_SUCCESS) { 4640*0a701b1eSRobert Gordon buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl; 4641*0a701b1eSRobert Gordon buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey; 4642*0a701b1eSRobert Gordon buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey; 4643*0a701b1eSRobert Gordon } else { 4644*0a701b1eSRobert Gordon buf_handle->mrc_linfo = NULL; 4645*0a701b1eSRobert Gordon buf_handle->mrc_lmr = 0; 4646*0a701b1eSRobert Gordon buf_handle->mrc_rmr = 0; 4647*0a701b1eSRobert Gordon } 4648*0a701b1eSRobert Gordon return (status); 4649*0a701b1eSRobert Gordon } 4650*0a701b1eSRobert Gordon 4651*0a701b1eSRobert Gordon /* ARGSUSED */ 4652*0a701b1eSRobert Gordon static rdma_stat 4653*0a701b1eSRobert Gordon rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf, 4654*0a701b1eSRobert Gordon struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle) 4655*0a701b1eSRobert Gordon { 4656*0a701b1eSRobert Gordon 4657*0a701b1eSRobert Gordon (void) rib_deregistermem_via_hca(hca, buf, buf_handle); 4658*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 4659*0a701b1eSRobert Gordon } 4660*0a701b1eSRobert Gordon 4661*0a701b1eSRobert Gordon /* ARGSUSED */ 4662*0a701b1eSRobert Gordon static rdma_stat 4663*0a701b1eSRobert Gordon rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle) 4664*0a701b1eSRobert Gordon { 4665*0a701b1eSRobert Gordon 4666*0a701b1eSRobert Gordon (void) ibt_deregister_mr(hca->hca_hdl, 4667*0a701b1eSRobert Gordon (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo); 4668*0a701b1eSRobert Gordon return (RDMA_SUCCESS); 4669*0a701b1eSRobert Gordon } 4670*0a701b1eSRobert Gordon 4671*0a701b1eSRobert Gordon 4672*0a701b1eSRobert Gordon /* 4673*0a701b1eSRobert Gordon * Return 0 if the interface is IB. 4674*0a701b1eSRobert Gordon * Return error (>0) if any error is encountered during processing. 4675*0a701b1eSRobert Gordon * Return -1 if the interface is not IB and no error. 4676*0a701b1eSRobert Gordon */ 4677*0a701b1eSRobert Gordon #define isalpha(ch) (((ch) >= 'a' && (ch) <= 'z') || \ 4678*0a701b1eSRobert Gordon ((ch) >= 'A' && (ch) <= 'Z')) 4679*0a701b1eSRobert Gordon static int 4680*0a701b1eSRobert Gordon rpcib_is_ib_interface(char *name) 4681*0a701b1eSRobert Gordon { 4682*0a701b1eSRobert Gordon 4683*0a701b1eSRobert Gordon char dev_path[MAXPATHLEN]; 4684*0a701b1eSRobert Gordon char devname[MAXNAMELEN]; 4685*0a701b1eSRobert Gordon ldi_handle_t lh; 4686*0a701b1eSRobert Gordon dl_info_ack_t info; 4687*0a701b1eSRobert Gordon int ret = 0; 4688*0a701b1eSRobert Gordon int i; 4689*0a701b1eSRobert Gordon 4690*0a701b1eSRobert Gordon /* 4691*0a701b1eSRobert Gordon * ibd devices are only style 2 devices 4692*0a701b1eSRobert Gordon * so we will open only style 2 devices 4693*0a701b1eSRobert Gordon * by ignoring the ppa 4694*0a701b1eSRobert Gordon */ 4695*0a701b1eSRobert Gordon 4696*0a701b1eSRobert Gordon i = strlen(name) - 1; 4697*0a701b1eSRobert Gordon while ((i >= 0) && (!isalpha(name[i]))) i--; 4698*0a701b1eSRobert Gordon 4699*0a701b1eSRobert Gordon if (i < 0) { 4700*0a701b1eSRobert Gordon /* Invalid interface name, no alphabet */ 4701*0a701b1eSRobert Gordon return (-1); 4702*0a701b1eSRobert Gordon } 4703*0a701b1eSRobert Gordon 4704*0a701b1eSRobert Gordon (void) strncpy(devname, name, i + 1); 4705*0a701b1eSRobert Gordon devname[i + 1] = '\0'; 4706*0a701b1eSRobert Gordon 4707*0a701b1eSRobert Gordon if (strcmp("lo", devname) == 0) { 4708*0a701b1eSRobert Gordon /* 4709*0a701b1eSRobert Gordon * loopback interface not rpc/rdma capable 4710*0a701b1eSRobert Gordon */ 4711*0a701b1eSRobert Gordon return (-1); 4712*0a701b1eSRobert Gordon } 4713*0a701b1eSRobert Gordon 4714*0a701b1eSRobert Gordon (void) strncpy(dev_path, "/dev/", MAXPATHLEN); 4715*0a701b1eSRobert Gordon if (strlcat(dev_path, devname, MAXPATHLEN) >= MAXPATHLEN) { 4716*0a701b1eSRobert Gordon /* string overflow */ 4717*0a701b1eSRobert Gordon return (-1); 4718*0a701b1eSRobert Gordon } 4719*0a701b1eSRobert Gordon 4720*0a701b1eSRobert Gordon ret = ldi_open_by_name(dev_path, FREAD|FWRITE, kcred, &lh, rpcib_li); 4721*0a701b1eSRobert Gordon if (ret != 0) { 4722*0a701b1eSRobert Gordon return (ret); 4723*0a701b1eSRobert Gordon } 4724*0a701b1eSRobert Gordon ret = rpcib_dl_info(lh, &info); 4725*0a701b1eSRobert Gordon (void) ldi_close(lh, FREAD|FWRITE, kcred); 4726*0a701b1eSRobert Gordon if (ret != 0) { 4727*0a701b1eSRobert Gordon return (ret); 4728*0a701b1eSRobert Gordon } 4729*0a701b1eSRobert Gordon 4730*0a701b1eSRobert Gordon if (info.dl_mac_type != DL_IB) { 4731*0a701b1eSRobert Gordon return (-1); 4732*0a701b1eSRobert Gordon } 4733*0a701b1eSRobert Gordon 4734*0a701b1eSRobert Gordon return (0); 4735*0a701b1eSRobert Gordon } 4736*0a701b1eSRobert Gordon 4737*0a701b1eSRobert Gordon static int 4738*0a701b1eSRobert Gordon rpcib_dl_info(ldi_handle_t lh, dl_info_ack_t *info) 4739*0a701b1eSRobert Gordon { 4740*0a701b1eSRobert Gordon dl_info_req_t *info_req; 4741*0a701b1eSRobert Gordon union DL_primitives *dl_prim; 4742*0a701b1eSRobert Gordon mblk_t *mp; 4743*0a701b1eSRobert Gordon k_sigset_t smask; 4744*0a701b1eSRobert Gordon int error; 4745*0a701b1eSRobert Gordon 4746*0a701b1eSRobert Gordon if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) { 4747*0a701b1eSRobert Gordon return (ENOMEM); 4748*0a701b1eSRobert Gordon } 4749*0a701b1eSRobert Gordon 4750*0a701b1eSRobert Gordon mp->b_datap->db_type = M_PROTO; 4751*0a701b1eSRobert Gordon 4752*0a701b1eSRobert Gordon info_req = (dl_info_req_t *)(uintptr_t)mp->b_wptr; 4753*0a701b1eSRobert Gordon mp->b_wptr += sizeof (dl_info_req_t); 4754*0a701b1eSRobert Gordon info_req->dl_primitive = DL_INFO_REQ; 4755*0a701b1eSRobert Gordon 4756*0a701b1eSRobert Gordon sigintr(&smask, 0); 4757*0a701b1eSRobert Gordon if ((error = ldi_putmsg(lh, mp)) != 0) { 4758*0a701b1eSRobert Gordon sigunintr(&smask); 4759*0a701b1eSRobert Gordon return (error); 4760*0a701b1eSRobert Gordon } 4761*0a701b1eSRobert Gordon if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { 4762*0a701b1eSRobert Gordon sigunintr(&smask); 4763*0a701b1eSRobert Gordon return (error); 4764*0a701b1eSRobert Gordon } 4765*0a701b1eSRobert Gordon sigunintr(&smask); 4766*0a701b1eSRobert Gordon 4767*0a701b1eSRobert Gordon dl_prim = (union DL_primitives *)(uintptr_t)mp->b_rptr; 4768*0a701b1eSRobert Gordon switch (dl_prim->dl_primitive) { 4769*0a701b1eSRobert Gordon case DL_INFO_ACK: 4770*0a701b1eSRobert Gordon if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < 4771*0a701b1eSRobert Gordon sizeof (dl_info_ack_t)) { 4772*0a701b1eSRobert Gordon error = -1; 4773*0a701b1eSRobert Gordon } else { 4774*0a701b1eSRobert Gordon *info = *(dl_info_ack_t *)(uintptr_t)mp->b_rptr; 4775*0a701b1eSRobert Gordon error = 0; 4776*0a701b1eSRobert Gordon } 4777*0a701b1eSRobert Gordon break; 4778*0a701b1eSRobert Gordon default: 4779*0a701b1eSRobert Gordon error = -1; 4780*0a701b1eSRobert Gordon break; 4781*0a701b1eSRobert Gordon } 4782*0a701b1eSRobert Gordon 4783*0a701b1eSRobert Gordon freemsg(mp); 4784*0a701b1eSRobert Gordon return (error); 4785*0a701b1eSRobert Gordon } 4786*0a701b1eSRobert Gordon static int 4787*0a701b1eSRobert Gordon rpcib_do_ip_ioctl(int cmd, int len, caddr_t arg) 4788*0a701b1eSRobert Gordon { 4789*0a701b1eSRobert Gordon vnode_t *kvp, *vp; 4790*0a701b1eSRobert Gordon TIUSER *tiptr; 4791*0a701b1eSRobert Gordon struct strioctl iocb; 4792*0a701b1eSRobert Gordon k_sigset_t smask; 4793*0a701b1eSRobert Gordon int err = 0; 4794*0a701b1eSRobert Gordon 4795*0a701b1eSRobert Gordon if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, 4796*0a701b1eSRobert Gordon &kvp) == 0) { 4797*0a701b1eSRobert Gordon if (t_kopen((file_t *)NULL, kvp->v_rdev, FREAD|FWRITE, 4798*0a701b1eSRobert Gordon &tiptr, CRED()) == 0) { 4799*0a701b1eSRobert Gordon vp = tiptr->fp->f_vnode; 4800*0a701b1eSRobert Gordon } else { 4801*0a701b1eSRobert Gordon VN_RELE(kvp); 4802*0a701b1eSRobert Gordon return (EPROTO); 4803*0a701b1eSRobert Gordon } 4804*0a701b1eSRobert Gordon } else { 4805*0a701b1eSRobert Gordon return (EPROTO); 4806*0a701b1eSRobert Gordon } 4807*0a701b1eSRobert Gordon 4808*0a701b1eSRobert Gordon iocb.ic_cmd = cmd; 4809*0a701b1eSRobert Gordon iocb.ic_timout = 0; 4810*0a701b1eSRobert Gordon iocb.ic_len = len; 4811*0a701b1eSRobert Gordon iocb.ic_dp = arg; 4812*0a701b1eSRobert Gordon sigintr(&smask, 0); 4813*0a701b1eSRobert Gordon err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb); 4814*0a701b1eSRobert Gordon sigunintr(&smask); 4815*0a701b1eSRobert Gordon (void) t_kclose(tiptr, 0); 4816*0a701b1eSRobert Gordon VN_RELE(kvp); 4817*0a701b1eSRobert Gordon return (err); 4818*0a701b1eSRobert Gordon } 4819*0a701b1eSRobert Gordon 4820*0a701b1eSRobert Gordon static uint_t rpcib_get_number_interfaces(void) { 4821*0a701b1eSRobert Gordon uint_t numifs; 4822*0a701b1eSRobert Gordon if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (uint_t), (caddr_t)&numifs)) { 4823*0a701b1eSRobert Gordon return (0); 4824*0a701b1eSRobert Gordon } 4825*0a701b1eSRobert Gordon return (numifs); 4826*0a701b1eSRobert Gordon } 4827*0a701b1eSRobert Gordon 4828*0a701b1eSRobert Gordon static boolean_t 4829*0a701b1eSRobert Gordon rpcib_get_ib_addresses( 4830*0a701b1eSRobert Gordon struct sockaddr_in *saddr4, 4831*0a701b1eSRobert Gordon struct sockaddr_in6 *saddr6, 4832*0a701b1eSRobert Gordon uint_t *number4, 4833*0a701b1eSRobert Gordon uint_t *number6) 4834*0a701b1eSRobert Gordon { 4835*0a701b1eSRobert Gordon int numifs; 4836*0a701b1eSRobert Gordon struct ifconf kifc; 4837*0a701b1eSRobert Gordon struct ifreq *ifr; 4838*0a701b1eSRobert Gordon boolean_t ret = B_FALSE; 4839*0a701b1eSRobert Gordon 4840*0a701b1eSRobert Gordon *number4 = 0; 4841*0a701b1eSRobert Gordon *number6 = 0; 4842*0a701b1eSRobert Gordon 4843*0a701b1eSRobert Gordon if (rpcib_do_ip_ioctl(SIOCGIFNUM, sizeof (int), (caddr_t)&numifs)) { 4844*0a701b1eSRobert Gordon return (ret); 4845*0a701b1eSRobert Gordon } 4846*0a701b1eSRobert Gordon 4847*0a701b1eSRobert Gordon kifc.ifc_len = numifs * sizeof (struct ifreq); 4848*0a701b1eSRobert Gordon kifc.ifc_buf = kmem_zalloc(kifc.ifc_len, KM_SLEEP); 4849*0a701b1eSRobert Gordon 4850*0a701b1eSRobert Gordon if (rpcib_do_ip_ioctl(SIOCGIFCONF, sizeof (struct ifconf), 4851*0a701b1eSRobert Gordon (caddr_t)&kifc)) { 4852*0a701b1eSRobert Gordon goto done; 4853*0a701b1eSRobert Gordon } 4854*0a701b1eSRobert Gordon 4855*0a701b1eSRobert Gordon ifr = kifc.ifc_req; 4856*0a701b1eSRobert Gordon for (numifs = kifc.ifc_len / sizeof (struct ifreq); 4857*0a701b1eSRobert Gordon numifs > 0; numifs--, ifr++) { 4858*0a701b1eSRobert Gordon struct sockaddr_in *sin4; 4859*0a701b1eSRobert Gordon struct sockaddr_in6 *sin6; 4860*0a701b1eSRobert Gordon 4861*0a701b1eSRobert Gordon if ((rpcib_is_ib_interface(ifr->ifr_name) == 0)) { 4862*0a701b1eSRobert Gordon sin4 = (struct sockaddr_in *)(uintptr_t)&ifr->ifr_addr; 4863*0a701b1eSRobert Gordon sin6 = (struct sockaddr_in6 *)(uintptr_t)&ifr->ifr_addr; 4864*0a701b1eSRobert Gordon if (sin4->sin_family == AF_INET) { 4865*0a701b1eSRobert Gordon saddr4[*number4] = *(struct sockaddr_in *) 4866*0a701b1eSRobert Gordon (uintptr_t)&ifr->ifr_addr; 4867*0a701b1eSRobert Gordon *number4 = *number4 + 1; 4868*0a701b1eSRobert Gordon } else if (sin6->sin6_family == AF_INET6) { 4869*0a701b1eSRobert Gordon saddr6[*number6] = *(struct sockaddr_in6 *) 4870*0a701b1eSRobert Gordon (uintptr_t)&ifr->ifr_addr; 4871*0a701b1eSRobert Gordon *number6 = *number6 + 1; 4872*0a701b1eSRobert Gordon } 4873*0a701b1eSRobert Gordon } 4874*0a701b1eSRobert Gordon } 4875*0a701b1eSRobert Gordon ret = B_TRUE; 4876*0a701b1eSRobert Gordon done: 4877*0a701b1eSRobert Gordon kmem_free(kifc.ifc_buf, kifc.ifc_len); 4878*0a701b1eSRobert Gordon return (ret); 4879*0a701b1eSRobert Gordon } 4880*0a701b1eSRobert Gordon 4881*0a701b1eSRobert Gordon /* ARGSUSED */ 4882*0a701b1eSRobert Gordon static int rpcib_cache_kstat_update(kstat_t *ksp, int rw) { 4883*0a701b1eSRobert Gordon 4884*0a701b1eSRobert Gordon if (KSTAT_WRITE == rw) { 4885*0a701b1eSRobert Gordon return (EACCES); 4886*0a701b1eSRobert Gordon } 4887*0a701b1eSRobert Gordon rpcib_kstat.cache_limit.value.ui64 = 4888*0a701b1eSRobert Gordon (uint64_t)cache_limit; 4889*0a701b1eSRobert Gordon rpcib_kstat.cache_allocation.value.ui64 = 4890*0a701b1eSRobert Gordon (uint64_t)cache_allocation; 4891*0a701b1eSRobert Gordon rpcib_kstat.cache_hits.value.ui64 = 4892*0a701b1eSRobert Gordon (uint64_t)cache_hits; 4893*0a701b1eSRobert Gordon rpcib_kstat.cache_misses.value.ui64 = 4894*0a701b1eSRobert Gordon (uint64_t)cache_misses; 4895*0a701b1eSRobert Gordon rpcib_kstat.cache_misses_above_the_limit.value.ui64 = 4896*0a701b1eSRobert Gordon (uint64_t)cache_misses_above_the_limit; 4897*0a701b1eSRobert Gordon return (0); 4898*0a701b1eSRobert Gordon } 4899