1c0dd49bdSEiji Ota /* 2c0dd49bdSEiji Ota * CDDL HEADER START 3c0dd49bdSEiji Ota * 4c0dd49bdSEiji Ota * The contents of this file are subject to the terms of the 5c0dd49bdSEiji Ota * Common Development and Distribution License (the "License"). 6c0dd49bdSEiji Ota * You may not use this file except in compliance with the License. 7c0dd49bdSEiji Ota * 8c0dd49bdSEiji Ota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9c0dd49bdSEiji Ota * or http://www.opensolaris.org/os/licensing. 10c0dd49bdSEiji Ota * See the License for the specific language governing permissions 11c0dd49bdSEiji Ota * and limitations under the License. 12c0dd49bdSEiji Ota * 13c0dd49bdSEiji Ota * When distributing Covered Code, include this CDDL HEADER in each 14c0dd49bdSEiji Ota * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15c0dd49bdSEiji Ota * If applicable, add the following below this CDDL HEADER, with the 16c0dd49bdSEiji Ota * fields enclosed by brackets "[]" replaced with your own identifying 17c0dd49bdSEiji Ota * information: Portions Copyright [yyyy] [name of copyright owner] 18c0dd49bdSEiji Ota * 19c0dd49bdSEiji Ota * CDDL HEADER END 20c0dd49bdSEiji Ota */ 21c0dd49bdSEiji Ota /* 22c0dd49bdSEiji Ota * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23c0dd49bdSEiji Ota */ 24c0dd49bdSEiji Ota 25c0dd49bdSEiji Ota #ifndef _RDSV3_RDSV3_H 26c0dd49bdSEiji Ota #define _RDSV3_RDSV3_H 27c0dd49bdSEiji Ota 28c0dd49bdSEiji Ota /* 29c0dd49bdSEiji Ota * The name of this file is rds.h in ofed. 30c0dd49bdSEiji Ota */ 31c0dd49bdSEiji Ota 32c0dd49bdSEiji Ota #ifdef __cplusplus 33c0dd49bdSEiji Ota extern "C" { 34c0dd49bdSEiji Ota #endif 35c0dd49bdSEiji Ota 36c0dd49bdSEiji Ota #include <sys/sunndi.h> 37c0dd49bdSEiji Ota #include <netinet/in.h> 38c0dd49bdSEiji Ota #include <sys/synch.h> 39c0dd49bdSEiji Ota #include <sys/stropts.h> 40c0dd49bdSEiji Ota #include <sys/socket.h> 41c0dd49bdSEiji Ota #include <sys/socketvar.h> 42c0dd49bdSEiji Ota #include <inet/ip.h> 43c0dd49bdSEiji Ota #include <sys/avl.h> 44c0dd49bdSEiji Ota #include <sys/param.h> 45c0dd49bdSEiji Ota #include <sys/rds.h> 46c0dd49bdSEiji Ota 47c0dd49bdSEiji Ota #include <sys/ib/ibtl/ibti.h> 48c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_verbs.h> 49c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_addr.h> 50c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/rdma_cm.h> 51c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 52c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/info.h> 53c0dd49bdSEiji Ota 54c0dd49bdSEiji Ota #define NIPQUAD(addr) \ 55c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 56c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 57c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 58c0dd49bdSEiji Ota (unsigned char)(ntohl(addr) & 0xFF) 59c0dd49bdSEiji Ota 60c0dd49bdSEiji Ota /* 61c0dd49bdSEiji Ota * RDS Network protocol version 62c0dd49bdSEiji Ota */ 63c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_0 0x0300 64c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_1 0x0301 65c0dd49bdSEiji Ota #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 66c0dd49bdSEiji Ota #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 67c0dd49bdSEiji Ota #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 68c0dd49bdSEiji Ota #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 69c0dd49bdSEiji Ota 70c0dd49bdSEiji Ota /* 71c0dd49bdSEiji Ota * XXX randomly chosen, but at least seems to be unused: 72c0dd49bdSEiji Ota * # 18464-18768 Unassigned 73c0dd49bdSEiji Ota * We should do better. We want a reserved port to discourage unpriv'ed 74c0dd49bdSEiji Ota * userspace from listening. 75c0dd49bdSEiji Ota * 76c0dd49bdSEiji Ota * port 18633 was the version that had ack frames on the wire. 77c0dd49bdSEiji Ota */ 78c0dd49bdSEiji Ota #define RDSV3_PORT 18634 79c0dd49bdSEiji Ota 80c0dd49bdSEiji Ota /* 81c0dd49bdSEiji Ota * This is the sad making. Some kernels have a bug in the per_cpu() api which 82c0dd49bdSEiji Ota * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 83c0dd49bdSEiji Ota * in the module is not cacheline-aligned. As much as we'd like to tell users 84c0dd49bdSEiji Ota * with older kernels to stuff it, that's not reasonable. We'll roll our own 85c0dd49bdSEiji Ota * until this doesn't have to build against older kernels. 86c0dd49bdSEiji Ota */ 87c0dd49bdSEiji Ota #define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 88c0dd49bdSEiji Ota #define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 89c0dd49bdSEiji Ota #define rdsv3_per_cpu(var, cpu) var[cpu] 90c0dd49bdSEiji Ota 91c0dd49bdSEiji Ota static inline ulong_t 92c0dd49bdSEiji Ota ceil(ulong_t x, ulong_t y) 93c0dd49bdSEiji Ota { 94c0dd49bdSEiji Ota return ((x + y - 1) / y); 95c0dd49bdSEiji Ota } 96c0dd49bdSEiji Ota 97c0dd49bdSEiji Ota #define RDSV3_FRAG_SHIFT 12 98c0dd49bdSEiji Ota #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 99c0dd49bdSEiji Ota 100c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_BYTES (65536 / 8) 101c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 102c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 103c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 104c0dd49bdSEiji Ota 105c0dd49bdSEiji Ota struct rdsv3_cong_map { 106c0dd49bdSEiji Ota struct avl_node m_rb_node; 107c0dd49bdSEiji Ota uint32_be_t m_addr; 108c0dd49bdSEiji Ota rdsv3_wait_queue_t m_waitq; 109c0dd49bdSEiji Ota struct list m_conn_list; 110c0dd49bdSEiji Ota unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 111c0dd49bdSEiji Ota }; 112c0dd49bdSEiji Ota 113c0dd49bdSEiji Ota /* 114c0dd49bdSEiji Ota * This is how we will track the connection state: 115c0dd49bdSEiji Ota * A connection is always in one of the following 116c0dd49bdSEiji Ota * states. Updates to the state are atomic and imply 117c0dd49bdSEiji Ota * a memory barrier. 118c0dd49bdSEiji Ota */ 119c0dd49bdSEiji Ota enum { 120c0dd49bdSEiji Ota RDSV3_CONN_DOWN = 0, 121c0dd49bdSEiji Ota RDSV3_CONN_CONNECTING, 122c0dd49bdSEiji Ota RDSV3_CONN_DISCONNECTING, 123c0dd49bdSEiji Ota RDSV3_CONN_UP, 124c0dd49bdSEiji Ota RDSV3_CONN_ERROR, 125c0dd49bdSEiji Ota }; 126c0dd49bdSEiji Ota 127c0dd49bdSEiji Ota /* Bits for c_flags */ 128c0dd49bdSEiji Ota #define RDSV3_LL_SEND_FULL 0 129c0dd49bdSEiji Ota #define RDSV3_RECONNECT_PENDING 1 130c0dd49bdSEiji Ota 131c0dd49bdSEiji Ota struct rdsv3_connection { 132c0dd49bdSEiji Ota struct avl_node c_hash_node; 133c0dd49bdSEiji Ota uint32_be_t c_laddr; 134c0dd49bdSEiji Ota uint32_be_t c_faddr; 135c0dd49bdSEiji Ota unsigned int c_loopback:1; 136c0dd49bdSEiji Ota struct rdsv3_connection *c_passive; 137c0dd49bdSEiji Ota 138c0dd49bdSEiji Ota struct rdsv3_cong_map *c_lcong; 139c0dd49bdSEiji Ota struct rdsv3_cong_map *c_fcong; 140c0dd49bdSEiji Ota 141c0dd49bdSEiji Ota struct mutex c_send_lock; /* protect send ring */ 142c0dd49bdSEiji Ota struct rdsv3_message *c_xmit_rm; 143c0dd49bdSEiji Ota unsigned long c_xmit_sg; 144c0dd49bdSEiji Ota unsigned int c_xmit_hdr_off; 145c0dd49bdSEiji Ota unsigned int c_xmit_data_off; 146c0dd49bdSEiji Ota unsigned int c_xmit_rdma_sent; 147c0dd49bdSEiji Ota 148c0dd49bdSEiji Ota kmutex_t c_lock; /* protect msg queues */ 149c0dd49bdSEiji Ota uint64_t c_next_tx_seq; 150c0dd49bdSEiji Ota struct list c_send_queue; 151c0dd49bdSEiji Ota struct list c_retrans; 152c0dd49bdSEiji Ota 153c0dd49bdSEiji Ota uint64_t c_next_rx_seq; 154c0dd49bdSEiji Ota 155c0dd49bdSEiji Ota struct rdsv3_transport *c_trans; 156c0dd49bdSEiji Ota void *c_transport_data; 157c0dd49bdSEiji Ota 158c0dd49bdSEiji Ota atomic_t c_state; 159c0dd49bdSEiji Ota unsigned long c_flags; 160c0dd49bdSEiji Ota unsigned long c_reconnect_jiffies; 161c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_send_w; 162c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_recv_w; 163c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_conn_w; 164c0dd49bdSEiji Ota struct rdsv3_work_s c_down_w; 165c0dd49bdSEiji Ota struct mutex c_cm_lock; /* protect conn state & cm */ 166c0dd49bdSEiji Ota 167c0dd49bdSEiji Ota struct list_node c_map_item; 168c0dd49bdSEiji Ota unsigned long c_map_queued; 169c0dd49bdSEiji Ota unsigned long c_map_offset; 170c0dd49bdSEiji Ota unsigned long c_map_bytes; 171c0dd49bdSEiji Ota 172c0dd49bdSEiji Ota unsigned int c_unacked_packets; 173c0dd49bdSEiji Ota unsigned int c_unacked_bytes; 174c0dd49bdSEiji Ota 175c0dd49bdSEiji Ota /* Protocol version */ 176c0dd49bdSEiji Ota unsigned int c_version; 177c0dd49bdSEiji Ota }; 178c0dd49bdSEiji Ota 179c0dd49bdSEiji Ota #define RDSV3_FLAG_CONG_BITMAP 0x01 180c0dd49bdSEiji Ota #define RDSV3_FLAG_ACK_REQUIRED 0x02 181c0dd49bdSEiji Ota #define RDSV3_FLAG_RETRANSMITTED 0x04 182*cadbfdc3SEiji Ota #define RDSV3_MAX_ADV_CREDIT 127 183c0dd49bdSEiji Ota 184c0dd49bdSEiji Ota /* 185c0dd49bdSEiji Ota * Maximum space available for extension headers. 186c0dd49bdSEiji Ota */ 187c0dd49bdSEiji Ota #define RDSV3_HEADER_EXT_SPACE 16 188c0dd49bdSEiji Ota 189c0dd49bdSEiji Ota struct rdsv3_header { 190c0dd49bdSEiji Ota uint64_be_t h_sequence; 191c0dd49bdSEiji Ota uint64_be_t h_ack; 192c0dd49bdSEiji Ota uint32_be_t h_len; 193c0dd49bdSEiji Ota uint16_be_t h_sport; 194c0dd49bdSEiji Ota uint16_be_t h_dport; 195c0dd49bdSEiji Ota uint8_t h_flags; 196c0dd49bdSEiji Ota uint8_t h_credit; 197c0dd49bdSEiji Ota uint8_t h_padding[4]; 198c0dd49bdSEiji Ota uint16_be_t h_csum; 199c0dd49bdSEiji Ota 200c0dd49bdSEiji Ota uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 201c0dd49bdSEiji Ota }; 202c0dd49bdSEiji Ota 203c0dd49bdSEiji Ota /* Reserved - indicates end of extensions */ 204c0dd49bdSEiji Ota #define RDSV3_EXTHDR_NONE 0 205c0dd49bdSEiji Ota 206c0dd49bdSEiji Ota /* 207c0dd49bdSEiji Ota * This extension header is included in the very 208c0dd49bdSEiji Ota * first message that is sent on a new connection, 209c0dd49bdSEiji Ota * and identifies the protocol level. This will help 210c0dd49bdSEiji Ota * rolling updates if a future change requires breaking 211c0dd49bdSEiji Ota * the protocol. 212c0dd49bdSEiji Ota */ 213c0dd49bdSEiji Ota #define RDSV3_EXTHDR_VERSION 1 214c0dd49bdSEiji Ota struct rdsv3_ext_header_version { 215c0dd49bdSEiji Ota uint32_be_t h_version; 216c0dd49bdSEiji Ota }; 217c0dd49bdSEiji Ota 218c0dd49bdSEiji Ota /* 219c0dd49bdSEiji Ota * This extension header is included in the RDS message 220c0dd49bdSEiji Ota * chasing an RDMA operation. 221c0dd49bdSEiji Ota */ 222c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA 2 223c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma { 224c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 225c0dd49bdSEiji Ota }; 226c0dd49bdSEiji Ota 227c0dd49bdSEiji Ota /* 228c0dd49bdSEiji Ota * This extension header tells the peer about the 229c0dd49bdSEiji Ota * destination <R_Key,offset> of the requested RDMA 230c0dd49bdSEiji Ota * operation. 231c0dd49bdSEiji Ota */ 232c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA_DEST 3 233c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma_dest { 234c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 235c0dd49bdSEiji Ota uint32_be_t h_rdma_offset; 236c0dd49bdSEiji Ota }; 237c0dd49bdSEiji Ota 238c0dd49bdSEiji Ota #define __RDSV3_EXTHDR_MAX 16 /* for now */ 239c0dd49bdSEiji Ota 240c0dd49bdSEiji Ota struct rdsv3_incoming { 241c0dd49bdSEiji Ota atomic_t i_refcount; 242c0dd49bdSEiji Ota struct list_node i_item; 243c0dd49bdSEiji Ota struct rdsv3_connection *i_conn; 244c0dd49bdSEiji Ota struct rdsv3_header i_hdr; 245c0dd49bdSEiji Ota unsigned long i_rx_jiffies; 246c0dd49bdSEiji Ota uint32_be_t i_saddr; 247c0dd49bdSEiji Ota 248c0dd49bdSEiji Ota rdsv3_rdma_cookie_t i_rdma_cookie; 249c0dd49bdSEiji Ota }; 250c0dd49bdSEiji Ota 251c0dd49bdSEiji Ota /* 252c0dd49bdSEiji Ota * m_sock_item and m_conn_item are on lists that are serialized under 253c0dd49bdSEiji Ota * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254c0dd49bdSEiji Ota * the message will not be put back on the retransmit list after being sent. 255c0dd49bdSEiji Ota * messages that are canceled while being sent rely on this. 256c0dd49bdSEiji Ota * 257c0dd49bdSEiji Ota * m_inc is used by loopback so that it can pass an incoming message straight 258c0dd49bdSEiji Ota * back up into the rx path. It embeds a wire header which is also used by 259c0dd49bdSEiji Ota * the send path, which is kind of awkward. 260c0dd49bdSEiji Ota * 261c0dd49bdSEiji Ota * m_sock_item indicates the message's presence on a socket's send or receive 262c0dd49bdSEiji Ota * queue. m_rs will point to that socket. 263c0dd49bdSEiji Ota * 264c0dd49bdSEiji Ota * m_daddr is used by cancellation to prune messages to a given destination. 265c0dd49bdSEiji Ota * 266c0dd49bdSEiji Ota * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 267c0dd49bdSEiji Ota * nesting. As paths iterate over messages on a sock, or conn, they must 268c0dd49bdSEiji Ota * also lock the conn, or sock, to remove the message from those lists too. 269c0dd49bdSEiji Ota * Testing the flag to determine if the message is still on the lists lets 270c0dd49bdSEiji Ota * us avoid testing the list_head directly. That means each path can use 271c0dd49bdSEiji Ota * the message's list_head to keep it on a local list while juggling locks 272c0dd49bdSEiji Ota * without confusing the other path. 273c0dd49bdSEiji Ota * 274c0dd49bdSEiji Ota * m_ack_seq is an optional field set by transports who need a different 275c0dd49bdSEiji Ota * sequence number range to invalidate. They can use this in a callback 276c0dd49bdSEiji Ota * that they pass to rdsv3_send_drop_acked() to see if each message has been 277c0dd49bdSEiji Ota * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 278c0dd49bdSEiji Ota * had ack_seq set yet. 279c0dd49bdSEiji Ota */ 280c0dd49bdSEiji Ota #define RDSV3_MSG_ON_SOCK 1 281c0dd49bdSEiji Ota #define RDSV3_MSG_ON_CONN 2 282c0dd49bdSEiji Ota #define RDSV3_MSG_HAS_ACK_SEQ 3 283c0dd49bdSEiji Ota #define RDSV3_MSG_ACK_REQUIRED 4 284c0dd49bdSEiji Ota #define RDSV3_MSG_RETRANSMITTED 5 285c0dd49bdSEiji Ota #define RDSV3_MSG_MAPPED 6 286c0dd49bdSEiji Ota #define RDSV3_MSG_PAGEVEC 7 287c0dd49bdSEiji Ota 288c0dd49bdSEiji Ota struct rdsv3_message { 289c0dd49bdSEiji Ota atomic_t m_refcount; 290c0dd49bdSEiji Ota struct list_node m_sock_item; 291c0dd49bdSEiji Ota struct list_node m_conn_item; 292c0dd49bdSEiji Ota struct rdsv3_incoming m_inc; 293c0dd49bdSEiji Ota uint64_t m_ack_seq; 294c0dd49bdSEiji Ota uint32_be_t m_daddr; 295c0dd49bdSEiji Ota unsigned long m_flags; 296c0dd49bdSEiji Ota 297c0dd49bdSEiji Ota /* 298c0dd49bdSEiji Ota * Never access m_rs without holding m_rs_lock. 299c0dd49bdSEiji Ota * Lock nesting is 300c0dd49bdSEiji Ota * rm->m_rs_lock 301c0dd49bdSEiji Ota * -> rs->rs_lock 302c0dd49bdSEiji Ota */ 303c0dd49bdSEiji Ota kmutex_t m_rs_lock; 304c0dd49bdSEiji Ota struct rdsv3_sock *m_rs; 305c0dd49bdSEiji Ota struct rdsv3_rdma_op *m_rdma_op; 306c0dd49bdSEiji Ota rdsv3_rdma_cookie_t m_rdma_cookie; 307c0dd49bdSEiji Ota struct rdsv3_mr *m_rdma_mr; 308c0dd49bdSEiji Ota unsigned int m_nents; 309c0dd49bdSEiji Ota unsigned int m_count; 310c0dd49bdSEiji Ota struct rdsv3_scatterlist m_sg[1]; 311c0dd49bdSEiji Ota }; 312c0dd49bdSEiji Ota 313c0dd49bdSEiji Ota /* 314c0dd49bdSEiji Ota * The RDS notifier is used (optionally) to tell the application about 315c0dd49bdSEiji Ota * completed RDMA operations. Rather than keeping the whole rds message 316c0dd49bdSEiji Ota * around on the queue, we allocate a small notifier that is put on the 317c0dd49bdSEiji Ota * socket's notifier_list. Notifications are delivered to the application 318c0dd49bdSEiji Ota * through control messages. 319c0dd49bdSEiji Ota */ 320c0dd49bdSEiji Ota struct rdsv3_notifier { 321c0dd49bdSEiji Ota list_node_t n_list; 322c0dd49bdSEiji Ota uint64_t n_user_token; 323c0dd49bdSEiji Ota int n_status; 324c0dd49bdSEiji Ota }; 325c0dd49bdSEiji Ota 326c0dd49bdSEiji Ota /* 327c0dd49bdSEiji Ota * struct rdsv3_transport - transport specific behavioural hooks 328c0dd49bdSEiji Ota * 329c0dd49bdSEiji Ota * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 330c0dd49bdSEiji Ota * part of a message. The caller serializes on the send_sem so this 331c0dd49bdSEiji Ota * doesn't need to be reentrant for a given conn. The header must be 332c0dd49bdSEiji Ota * sent before the data payload. .xmit must be prepared to send a 333c0dd49bdSEiji Ota * message with no data payload. .xmit should return the number of 334c0dd49bdSEiji Ota * bytes that were sent down the connection, including header bytes. 335c0dd49bdSEiji Ota * Returning 0 tells the caller that it doesn't need to perform any 336c0dd49bdSEiji Ota * additional work now. This is usually the case when the transport has 337c0dd49bdSEiji Ota * filled the sending queue for its connection and will handle 338c0dd49bdSEiji Ota * triggering the rds thread to continue the send when space becomes 339c0dd49bdSEiji Ota * available. Returning -EAGAIN tells the caller to retry the send 340c0dd49bdSEiji Ota * immediately. Returning -ENOMEM tells the caller to retry the send at 341c0dd49bdSEiji Ota * some point in the future. 342c0dd49bdSEiji Ota * 343c0dd49bdSEiji Ota * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 344c0dd49bdSEiji Ota * it returns the connection can not call rdsv3_recv_incoming(). 345c0dd49bdSEiji Ota * This will only be called once after conn_connect returns 346c0dd49bdSEiji Ota * non-zero success and will The caller serializes this with 347c0dd49bdSEiji Ota * the send and connecting paths (xmit_* and conn_*). The 348c0dd49bdSEiji Ota * transport is responsible for other serialization, including 349c0dd49bdSEiji Ota * rdsv3_recv_incoming(). This is called in process context but 350c0dd49bdSEiji Ota * should try hard not to block. 351c0dd49bdSEiji Ota * 352c0dd49bdSEiji Ota * @xmit_cong_map: This asks the transport to send the local bitmap down the 353c0dd49bdSEiji Ota * given connection. XXX get a better story about the bitmap 354c0dd49bdSEiji Ota * flag and header. 355c0dd49bdSEiji Ota */ 356c0dd49bdSEiji Ota 357*cadbfdc3SEiji Ota #define RDS_TRANS_IB 0 358*cadbfdc3SEiji Ota #define RDS_TRANS_IWARP 1 359*cadbfdc3SEiji Ota #define RDS_TRANS_TCP 2 360*cadbfdc3SEiji Ota #define RDS_TRANS_COUNT 3 361*cadbfdc3SEiji Ota 362c0dd49bdSEiji Ota struct rdsv3_transport { 363*cadbfdc3SEiji Ota char t_name[TRANSNAMSIZ]; 364c0dd49bdSEiji Ota struct list_node t_item; 365*cadbfdc3SEiji Ota unsigned int t_type; 366c0dd49bdSEiji Ota unsigned int t_prefer_loopback:1; 367c0dd49bdSEiji Ota 368c0dd49bdSEiji Ota int (*laddr_check)(uint32_be_t addr); 369c0dd49bdSEiji Ota int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 370c0dd49bdSEiji Ota void (*conn_free)(void *data); 371c0dd49bdSEiji Ota int (*conn_connect)(struct rdsv3_connection *conn); 372c0dd49bdSEiji Ota void (*conn_shutdown)(struct rdsv3_connection *conn); 373c0dd49bdSEiji Ota void (*xmit_prepare)(struct rdsv3_connection *conn); 374c0dd49bdSEiji Ota void (*xmit_complete)(struct rdsv3_connection *conn); 375c0dd49bdSEiji Ota int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 376c0dd49bdSEiji Ota unsigned int hdr_off, unsigned int sg, unsigned int off); 377c0dd49bdSEiji Ota int (*xmit_cong_map)(struct rdsv3_connection *conn, 378c0dd49bdSEiji Ota struct rdsv3_cong_map *map, unsigned long offset); 379c0dd49bdSEiji Ota int (*xmit_rdma)(struct rdsv3_connection *conn, 380c0dd49bdSEiji Ota struct rdsv3_rdma_op *op); 381c0dd49bdSEiji Ota int (*recv)(struct rdsv3_connection *conn); 382c0dd49bdSEiji Ota int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 383c0dd49bdSEiji Ota size_t size); 384c0dd49bdSEiji Ota void (*inc_purge)(struct rdsv3_incoming *inc); 385c0dd49bdSEiji Ota void (*inc_free)(struct rdsv3_incoming *inc); 386c0dd49bdSEiji Ota 387c0dd49bdSEiji Ota int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 388c0dd49bdSEiji Ota struct rdma_cm_event *event); 389c0dd49bdSEiji Ota int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 390c0dd49bdSEiji Ota void (*cm_connect_complete)(struct rdsv3_connection *conn, 391c0dd49bdSEiji Ota struct rdma_cm_event *event); 392c0dd49bdSEiji Ota 393c0dd49bdSEiji Ota unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 394c0dd49bdSEiji Ota unsigned int avail); 395c0dd49bdSEiji Ota void (*exit)(void); 396c0dd49bdSEiji Ota void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg, 397c0dd49bdSEiji Ota struct rdsv3_sock *rs, uint32_t *key_ret); 398c0dd49bdSEiji Ota void (*sync_mr)(void *trans_private, int direction); 399c0dd49bdSEiji Ota void (*free_mr)(void *trans_private, int invalidate); 400c0dd49bdSEiji Ota void (*flush_mrs)(void); 401c0dd49bdSEiji Ota }; 402c0dd49bdSEiji Ota 403c0dd49bdSEiji Ota struct rdsv3_sock { 404c0dd49bdSEiji Ota struct rsock *rs_sk; 405c0dd49bdSEiji Ota uint64_t rs_user_addr; 406c0dd49bdSEiji Ota uint64_t rs_user_bytes; 407c0dd49bdSEiji Ota 408c0dd49bdSEiji Ota /* 409c0dd49bdSEiji Ota * bound_addr used for both incoming and outgoing, no INADDR_ANY 410c0dd49bdSEiji Ota * support. 411c0dd49bdSEiji Ota */ 412c0dd49bdSEiji Ota struct avl_node rs_bound_node; 413c0dd49bdSEiji Ota uint32_be_t rs_bound_addr; 414c0dd49bdSEiji Ota uint32_be_t rs_conn_addr; 415c0dd49bdSEiji Ota uint16_be_t rs_bound_port; 416c0dd49bdSEiji Ota uint16_be_t rs_conn_port; 417c0dd49bdSEiji Ota 418c0dd49bdSEiji Ota /* 419c0dd49bdSEiji Ota * This is only used to communicate the transport between bind and 420c0dd49bdSEiji Ota * initiating connections. All other trans use is referenced through 421c0dd49bdSEiji Ota * the connection. 422c0dd49bdSEiji Ota */ 423c0dd49bdSEiji Ota struct rdsv3_transport *rs_transport; 424c0dd49bdSEiji Ota 425c0dd49bdSEiji Ota /* 426c0dd49bdSEiji Ota * rdsv3_sendmsg caches the conn it used the last time around. 427c0dd49bdSEiji Ota * This helps avoid costly lookups. 428c0dd49bdSEiji Ota */ 429c0dd49bdSEiji Ota struct rdsv3_connection *rs_conn; 430c0dd49bdSEiji Ota kmutex_t rs_conn_lock; 431c0dd49bdSEiji Ota 432c0dd49bdSEiji Ota /* flag indicating we were congested or not */ 433c0dd49bdSEiji Ota int rs_congested; 434*cadbfdc3SEiji Ota /* seen congestion (ENOBUFS) when sending? */ 435*cadbfdc3SEiji Ota int rs_seen_congestion; 436c0dd49bdSEiji Ota 437c0dd49bdSEiji Ota /* rs_lock protects all these adjacent members before the newline */ 438c0dd49bdSEiji Ota kmutex_t rs_lock; 439c0dd49bdSEiji Ota struct list rs_send_queue; 440c0dd49bdSEiji Ota uint32_t rs_snd_bytes; 441c0dd49bdSEiji Ota int rs_rcv_bytes; 442c0dd49bdSEiji Ota /* currently used for failed RDMAs */ 443c0dd49bdSEiji Ota struct list rs_notify_queue; 444c0dd49bdSEiji Ota 445c0dd49bdSEiji Ota /* 446c0dd49bdSEiji Ota * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 447c0dd49bdSEiji Ota * to decide whether the application should be woken up. 448c0dd49bdSEiji Ota * If not set, we use rs_cong_track to find out whether a cong map 449c0dd49bdSEiji Ota * update arrived. 450c0dd49bdSEiji Ota */ 451c0dd49bdSEiji Ota uint64_t rs_cong_mask; 452c0dd49bdSEiji Ota uint64_t rs_cong_notify; 453c0dd49bdSEiji Ota struct list_node rs_cong_list; 454c0dd49bdSEiji Ota unsigned long rs_cong_track; 455c0dd49bdSEiji Ota 456c0dd49bdSEiji Ota /* 457c0dd49bdSEiji Ota * rs_recv_lock protects the receive queue, and is 458c0dd49bdSEiji Ota * used to serialize with rdsv3_release. 459c0dd49bdSEiji Ota */ 460c0dd49bdSEiji Ota krwlock_t rs_recv_lock; 461c0dd49bdSEiji Ota struct list rs_recv_queue; 462c0dd49bdSEiji Ota 463c0dd49bdSEiji Ota /* just for stats reporting */ 464c0dd49bdSEiji Ota struct list_node rs_item; 465c0dd49bdSEiji Ota 466c0dd49bdSEiji Ota /* these have their own lock */ 467c0dd49bdSEiji Ota kmutex_t rs_rdma_lock; 468c0dd49bdSEiji Ota struct avl_tree rs_rdma_keys; 469c0dd49bdSEiji Ota 470c0dd49bdSEiji Ota /* Socket options - in case there will be more */ 471c0dd49bdSEiji Ota unsigned char rs_recverr, 472c0dd49bdSEiji Ota rs_cong_monitor; 473c0dd49bdSEiji Ota 474c0dd49bdSEiji Ota cred_t *rs_cred; 475c0dd49bdSEiji Ota zoneid_t rs_zoneid; 476c0dd49bdSEiji Ota }; 477c0dd49bdSEiji Ota 4781a561c76SEiji Ota static inline struct rdsv3_sock * 479c0dd49bdSEiji Ota rdsv3_sk_to_rs(const struct rsock *sk) 480c0dd49bdSEiji Ota { 481c0dd49bdSEiji Ota return ((struct rdsv3_sock *)sk->sk_protinfo); 482c0dd49bdSEiji Ota } 483c0dd49bdSEiji Ota 4841a561c76SEiji Ota static inline struct rsock * 485c0dd49bdSEiji Ota rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 486c0dd49bdSEiji Ota { 487c0dd49bdSEiji Ota return ((struct rsock *)rs->rs_sk); 488c0dd49bdSEiji Ota } 489c0dd49bdSEiji Ota 490c0dd49bdSEiji Ota /* 491c0dd49bdSEiji Ota * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 492c0dd49bdSEiji Ota * to account for overhead. We don't account for overhead, we just apply 493c0dd49bdSEiji Ota * the number of payload bytes to the specified value. 494c0dd49bdSEiji Ota */ 4951a561c76SEiji Ota static inline int 496c0dd49bdSEiji Ota rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 497c0dd49bdSEiji Ota { 498c0dd49bdSEiji Ota /* XXX */ 499c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 500c0dd49bdSEiji Ota } 501c0dd49bdSEiji Ota 5021a561c76SEiji Ota static inline int 503c0dd49bdSEiji Ota rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 504c0dd49bdSEiji Ota { 505c0dd49bdSEiji Ota /* XXX */ 506c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 507c0dd49bdSEiji Ota } 508c0dd49bdSEiji Ota 509c0dd49bdSEiji Ota struct rdsv3_statistics { 510c0dd49bdSEiji Ota uint64_t s_conn_reset; 511c0dd49bdSEiji Ota uint64_t s_recv_drop_bad_checksum; 512c0dd49bdSEiji Ota uint64_t s_recv_drop_old_seq; 513c0dd49bdSEiji Ota uint64_t s_recv_drop_no_sock; 514c0dd49bdSEiji Ota uint64_t s_recv_drop_dead_sock; 515c0dd49bdSEiji Ota uint64_t s_recv_deliver_raced; 516c0dd49bdSEiji Ota uint64_t s_recv_delivered; 517c0dd49bdSEiji Ota uint64_t s_recv_queued; 518c0dd49bdSEiji Ota uint64_t s_recv_immediate_retry; 519c0dd49bdSEiji Ota uint64_t s_recv_delayed_retry; 520c0dd49bdSEiji Ota uint64_t s_recv_ack_required; 521c0dd49bdSEiji Ota uint64_t s_recv_rdma_bytes; 522c0dd49bdSEiji Ota uint64_t s_recv_ping; 523c0dd49bdSEiji Ota uint64_t s_send_queue_empty; 524c0dd49bdSEiji Ota uint64_t s_send_queue_full; 525c0dd49bdSEiji Ota uint64_t s_send_sem_contention; 526c0dd49bdSEiji Ota uint64_t s_send_sem_queue_raced; 527c0dd49bdSEiji Ota uint64_t s_send_immediate_retry; 528c0dd49bdSEiji Ota uint64_t s_send_delayed_retry; 529c0dd49bdSEiji Ota uint64_t s_send_drop_acked; 530c0dd49bdSEiji Ota uint64_t s_send_ack_required; 531c0dd49bdSEiji Ota uint64_t s_send_queued; 532c0dd49bdSEiji Ota uint64_t s_send_rdma; 533c0dd49bdSEiji Ota uint64_t s_send_rdma_bytes; 534c0dd49bdSEiji Ota uint64_t s_send_pong; 535c0dd49bdSEiji Ota uint64_t s_page_remainder_hit; 536c0dd49bdSEiji Ota uint64_t s_page_remainder_miss; 537c0dd49bdSEiji Ota uint64_t s_copy_to_user; 538c0dd49bdSEiji Ota uint64_t s_copy_from_user; 539c0dd49bdSEiji Ota uint64_t s_cong_update_queued; 540c0dd49bdSEiji Ota uint64_t s_cong_update_received; 541c0dd49bdSEiji Ota uint64_t s_cong_send_error; 542c0dd49bdSEiji Ota uint64_t s_cong_send_blocked; 543c0dd49bdSEiji Ota }; 544c0dd49bdSEiji Ota 545c0dd49bdSEiji Ota /* af_rds.c */ 546c0dd49bdSEiji Ota void rdsv3_sock_addref(struct rdsv3_sock *rs); 547c0dd49bdSEiji Ota void rdsv3_sock_put(struct rdsv3_sock *rs); 548c0dd49bdSEiji Ota void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 549c0dd49bdSEiji Ota void __rdsv3_wake_sk_sleep(struct rsock *sk); 550c0dd49bdSEiji Ota 551*cadbfdc3SEiji Ota extern rdsv3_wait_queue_t rdsv3_poll_waitq; 552*cadbfdc3SEiji Ota 553c0dd49bdSEiji Ota /* bind.c */ 554c0dd49bdSEiji Ota int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 555c0dd49bdSEiji Ota socklen_t len, cred_t *cr); 556c0dd49bdSEiji Ota void rdsv3_remove_bound(struct rdsv3_sock *rs); 557c0dd49bdSEiji Ota struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port); 558c0dd49bdSEiji Ota 559c0dd49bdSEiji Ota /* conn.c */ 560c0dd49bdSEiji Ota int rdsv3_conn_init(void); 561c0dd49bdSEiji Ota void rdsv3_conn_exit(void); 562c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 563c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 564c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 565c0dd49bdSEiji Ota uint32_be_t faddr, 566c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 567c0dd49bdSEiji Ota void rdsv3_conn_destroy(struct rdsv3_connection *conn); 568c0dd49bdSEiji Ota void rdsv3_conn_reset(struct rdsv3_connection *conn); 569c0dd49bdSEiji Ota void rdsv3_conn_drop(struct rdsv3_connection *conn); 570c0dd49bdSEiji Ota void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 571c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 572c0dd49bdSEiji Ota struct rdsv3_info_lengths *lens, 573c0dd49bdSEiji Ota int (*visitor)(struct rdsv3_connection *, void *), 574c0dd49bdSEiji Ota size_t item_len); 575c0dd49bdSEiji Ota 576c0dd49bdSEiji Ota static inline int 577c0dd49bdSEiji Ota rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 578c0dd49bdSEiji Ota { 579c0dd49bdSEiji Ota return (atomic_cmpxchg(&conn->c_state, old, new) == old); 580c0dd49bdSEiji Ota } 581c0dd49bdSEiji Ota 5821a561c76SEiji Ota static inline int 583c0dd49bdSEiji Ota rdsv3_conn_state(struct rdsv3_connection *conn) 584c0dd49bdSEiji Ota { 585c0dd49bdSEiji Ota return (atomic_get(&conn->c_state)); 586c0dd49bdSEiji Ota } 587c0dd49bdSEiji Ota 5881a561c76SEiji Ota static inline int 589c0dd49bdSEiji Ota rdsv3_conn_up(struct rdsv3_connection *conn) 590c0dd49bdSEiji Ota { 591c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 592c0dd49bdSEiji Ota } 593c0dd49bdSEiji Ota 5941a561c76SEiji Ota static inline int 595c0dd49bdSEiji Ota rdsv3_conn_connecting(struct rdsv3_connection *conn) 596c0dd49bdSEiji Ota { 597c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 598c0dd49bdSEiji Ota } 599c0dd49bdSEiji Ota 600c0dd49bdSEiji Ota /* recv.c */ 601c0dd49bdSEiji Ota void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 602c0dd49bdSEiji Ota uint32_be_t saddr); 603c0dd49bdSEiji Ota void rdsv3_inc_addref(struct rdsv3_incoming *inc); 604c0dd49bdSEiji Ota void rdsv3_inc_put(struct rdsv3_incoming *inc); 605c0dd49bdSEiji Ota void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 606c0dd49bdSEiji Ota uint32_be_t daddr, 607c0dd49bdSEiji Ota struct rdsv3_incoming *inc, int gfp); 608c0dd49bdSEiji Ota int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 609c0dd49bdSEiji Ota struct msghdr *msg, size_t size, int msg_flags); 610c0dd49bdSEiji Ota void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 611c0dd49bdSEiji Ota int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 612c0dd49bdSEiji Ota void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 613c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 614c0dd49bdSEiji Ota uint32_be_t saddr, uint32_be_t daddr, int flip); 615c0dd49bdSEiji Ota 616c0dd49bdSEiji Ota /* page.c */ 617c0dd49bdSEiji Ota int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 618c0dd49bdSEiji Ota unsigned long bytes, int gfp); 619c0dd49bdSEiji Ota 620c0dd49bdSEiji Ota /* send.c */ 621c0dd49bdSEiji Ota int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 622c0dd49bdSEiji Ota size_t payload_len); 623c0dd49bdSEiji Ota void rdsv3_send_reset(struct rdsv3_connection *conn); 624c0dd49bdSEiji Ota int rdsv3_send_xmit(struct rdsv3_connection *conn); 625c0dd49bdSEiji Ota struct sockaddr_in; 626c0dd49bdSEiji Ota void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 627c0dd49bdSEiji Ota typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 628c0dd49bdSEiji Ota void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 629c0dd49bdSEiji Ota is_acked_func is_acked); 630c0dd49bdSEiji Ota int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 631c0dd49bdSEiji Ota void rdsv3_send_remove_from_sock(struct list *messages, int status); 632c0dd49bdSEiji Ota int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 633c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 634c0dd49bdSEiji Ota struct rdsv3_rdma_op *); 635c0dd49bdSEiji Ota 636c0dd49bdSEiji Ota /* rdma.c */ 637c0dd49bdSEiji Ota void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 638c0dd49bdSEiji Ota 639c0dd49bdSEiji Ota /* cong.c */ 640c0dd49bdSEiji Ota void rdsv3_cong_init(void); 641c0dd49bdSEiji Ota int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 642c0dd49bdSEiji Ota void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 643c0dd49bdSEiji Ota void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 644c0dd49bdSEiji Ota void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 645c0dd49bdSEiji Ota void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 646c0dd49bdSEiji Ota int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 647c0dd49bdSEiji Ota struct rdsv3_sock *rs); 648c0dd49bdSEiji Ota void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 649c0dd49bdSEiji Ota void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 650c0dd49bdSEiji Ota int rdsv3_cong_updated_since(unsigned long *recent); 651c0dd49bdSEiji Ota void rdsv3_cong_add_socket(struct rdsv3_sock *); 652c0dd49bdSEiji Ota void rdsv3_cong_remove_socket(struct rdsv3_sock *); 653c0dd49bdSEiji Ota void rdsv3_cong_exit(void); 654c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 655c0dd49bdSEiji Ota 656c0dd49bdSEiji Ota /* stats.c */ 657c0dd49bdSEiji Ota RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 658c0dd49bdSEiji Ota #define rdsv3_stats_inc_which(which, member) do { \ 659c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member++; \ 660c0dd49bdSEiji Ota put_cpu(); \ 661c0dd49bdSEiji Ota } while (0) 662c0dd49bdSEiji Ota #define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 663c0dd49bdSEiji Ota #define rdsv3_stats_add_which(which, member, count) do { \ 664c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member += count; \ 665c0dd49bdSEiji Ota put_cpu(); \ 666c0dd49bdSEiji Ota } while (0) 667c0dd49bdSEiji Ota #define rdsv3_stats_add(member, count) \ 668c0dd49bdSEiji Ota rdsv3_stats_add_which(rdsv3_stats, member, count) 669c0dd49bdSEiji Ota int rdsv3_stats_init(void); 670c0dd49bdSEiji Ota void rdsv3_stats_exit(void); 671c0dd49bdSEiji Ota void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 672c0dd49bdSEiji Ota uint64_t *values, char **names, size_t nr); 673c0dd49bdSEiji Ota 674c0dd49bdSEiji Ota 675c0dd49bdSEiji Ota /* sysctl.c */ 676c0dd49bdSEiji Ota int rdsv3_sysctl_init(void); 677c0dd49bdSEiji Ota void rdsv3_sysctl_exit(void); 678c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_min; 679c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_default; 680c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_max; 681c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 682c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 683c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_packets; 684c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_bytes; 685c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_ping_enable; 686c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_trace_flags; 687c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_trace_level; 688c0dd49bdSEiji Ota 689c0dd49bdSEiji Ota /* threads.c */ 690c0dd49bdSEiji Ota int rdsv3_threads_init(); 691c0dd49bdSEiji Ota void rdsv3_threads_exit(void); 692c0dd49bdSEiji Ota extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 693c0dd49bdSEiji Ota void rdsv3_connect_worker(struct rdsv3_work_s *); 694c0dd49bdSEiji Ota void rdsv3_shutdown_worker(struct rdsv3_work_s *); 695c0dd49bdSEiji Ota void rdsv3_send_worker(struct rdsv3_work_s *); 696c0dd49bdSEiji Ota void rdsv3_recv_worker(struct rdsv3_work_s *); 697c0dd49bdSEiji Ota void rdsv3_connect_complete(struct rdsv3_connection *conn); 698c0dd49bdSEiji Ota 699c0dd49bdSEiji Ota /* transport.c */ 700c0dd49bdSEiji Ota int rdsv3_trans_register(struct rdsv3_transport *trans); 701c0dd49bdSEiji Ota void rdsv3_trans_unregister(struct rdsv3_transport *trans); 702c0dd49bdSEiji Ota struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 703c0dd49bdSEiji Ota unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 704c0dd49bdSEiji Ota unsigned int avail); 705c0dd49bdSEiji Ota void rdsv3_trans_exit(void); 706c0dd49bdSEiji Ota 707c0dd49bdSEiji Ota /* message.c */ 708c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 709c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 710c0dd49bdSEiji Ota size_t total_len); 711c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 712c0dd49bdSEiji Ota unsigned int total_len); 713c0dd49bdSEiji Ota void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 714c0dd49bdSEiji Ota uint16_be_t dport, uint64_t seq); 715c0dd49bdSEiji Ota int rdsv3_message_add_extension(struct rdsv3_header *hdr, 716c0dd49bdSEiji Ota unsigned int type, const void *data, unsigned int len); 717c0dd49bdSEiji Ota int rdsv3_message_next_extension(struct rdsv3_header *hdr, 718c0dd49bdSEiji Ota unsigned int *pos, void *buf, unsigned int *buflen); 719c0dd49bdSEiji Ota int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 720c0dd49bdSEiji Ota unsigned int version); 721c0dd49bdSEiji Ota int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 722c0dd49bdSEiji Ota unsigned int *version); 723c0dd49bdSEiji Ota int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 724c0dd49bdSEiji Ota uint32_t r_key, uint32_t offset); 725c0dd49bdSEiji Ota int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 726c0dd49bdSEiji Ota uio_t *uio, size_t size); 727c0dd49bdSEiji Ota void rdsv3_message_inc_purge(struct rdsv3_incoming *inc); 728c0dd49bdSEiji Ota void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 729c0dd49bdSEiji Ota void rdsv3_message_addref(struct rdsv3_message *rm); 730c0dd49bdSEiji Ota void rdsv3_message_put(struct rdsv3_message *rm); 731c0dd49bdSEiji Ota void rdsv3_message_wait(struct rdsv3_message *rm); 732c0dd49bdSEiji Ota void rdsv3_message_unmapped(struct rdsv3_message *rm); 733c0dd49bdSEiji Ota 7341a561c76SEiji Ota static inline void 735c0dd49bdSEiji Ota rdsv3_message_make_checksum(struct rdsv3_header *hdr) 736c0dd49bdSEiji Ota { 737c0dd49bdSEiji Ota hdr->h_csum = 0; 738c0dd49bdSEiji Ota hdr->h_csum = 739c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 740c0dd49bdSEiji Ota } 741c0dd49bdSEiji Ota 7421a561c76SEiji Ota static inline int 743c0dd49bdSEiji Ota rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 744c0dd49bdSEiji Ota { 745c0dd49bdSEiji Ota return (!hdr->h_csum || 746c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 747c0dd49bdSEiji Ota } 748c0dd49bdSEiji Ota 749c0dd49bdSEiji Ota /* rdsv3_sc.c */ 750c0dd49bdSEiji Ota extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 751c0dd49bdSEiji Ota extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 752c0dd49bdSEiji Ota extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 753c0dd49bdSEiji Ota 754c0dd49bdSEiji Ota #ifdef __cplusplus 755c0dd49bdSEiji Ota } 756c0dd49bdSEiji Ota #endif 757c0dd49bdSEiji Ota 758c0dd49bdSEiji Ota #endif /* _RDSV3_RDSV3_H */ 759