1c0dd49bdSEiji Ota /* 2*16e76cddSagiri * This file contains definitions imported from the OFED rds header rds.h. 3*16e76cddSagiri * Oracle elects to have and use the contents of rds.h under and 4*16e76cddSagiri * governed by the OpenIB.org BSD license. 5c0dd49bdSEiji Ota */ 6*16e76cddSagiri 7c0dd49bdSEiji Ota /* 8c0dd49bdSEiji Ota * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 9c0dd49bdSEiji Ota */ 10c0dd49bdSEiji Ota 11c0dd49bdSEiji Ota #ifndef _RDSV3_RDSV3_H 12c0dd49bdSEiji Ota #define _RDSV3_RDSV3_H 13c0dd49bdSEiji Ota 14c0dd49bdSEiji Ota /* 15c0dd49bdSEiji Ota * The name of this file is rds.h in ofed. 16c0dd49bdSEiji Ota */ 17c0dd49bdSEiji Ota 18c0dd49bdSEiji Ota #ifdef __cplusplus 19c0dd49bdSEiji Ota extern "C" { 20c0dd49bdSEiji Ota #endif 21c0dd49bdSEiji Ota 22c0dd49bdSEiji Ota #include <sys/sunndi.h> 23c0dd49bdSEiji Ota #include <netinet/in.h> 24c0dd49bdSEiji Ota #include <sys/synch.h> 25c0dd49bdSEiji Ota #include <sys/stropts.h> 26c0dd49bdSEiji Ota #include <sys/socket.h> 27c0dd49bdSEiji Ota #include <sys/socketvar.h> 28c0dd49bdSEiji Ota #include <inet/ip.h> 29c0dd49bdSEiji Ota #include <sys/avl.h> 30c0dd49bdSEiji Ota #include <sys/param.h> 315d5562f5SEiji Ota #include <sys/time.h> 32c0dd49bdSEiji Ota #include <sys/rds.h> 33c0dd49bdSEiji Ota 34c0dd49bdSEiji Ota #include <sys/ib/ibtl/ibti.h> 35c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_verbs.h> 36c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_addr.h> 37c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/rdma_cm.h> 38c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 39c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/info.h> 40c0dd49bdSEiji Ota 41c0dd49bdSEiji Ota #define NIPQUAD(addr) \ 42c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 43c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 44c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 45c0dd49bdSEiji Ota (unsigned char)(ntohl(addr) & 0xFF) 46c0dd49bdSEiji Ota 47c0dd49bdSEiji Ota /* 48c0dd49bdSEiji Ota * RDS Network protocol version 49c0dd49bdSEiji Ota */ 50c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_0 0x0300 51c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_1 0x0301 52c0dd49bdSEiji Ota #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 53c0dd49bdSEiji Ota #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 54c0dd49bdSEiji Ota #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 55c0dd49bdSEiji Ota #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 56c0dd49bdSEiji Ota 57c0dd49bdSEiji Ota /* 58c0dd49bdSEiji Ota * XXX randomly chosen, but at least seems to be unused: 59c0dd49bdSEiji Ota * # 18464-18768 Unassigned 60c0dd49bdSEiji Ota * We should do better. We want a reserved port to discourage unpriv'ed 61c0dd49bdSEiji Ota * userspace from listening. 62c0dd49bdSEiji Ota * 63c0dd49bdSEiji Ota * port 18633 was the version that had ack frames on the wire. 64c0dd49bdSEiji Ota */ 65c0dd49bdSEiji Ota #define RDSV3_PORT 18634 66c0dd49bdSEiji Ota 675d5562f5SEiji Ota #define RDSV3_REAPER_WAIT_SECS (5*60) 685d5562f5SEiji Ota #define RDSV3_REAPER_WAIT_JIFFIES SEC_TO_TICK(RDSV3_REAPER_WAIT_SECS) 695d5562f5SEiji Ota 70c0dd49bdSEiji Ota /* 71c0dd49bdSEiji Ota * This is the sad making. Some kernels have a bug in the per_cpu() api which 72c0dd49bdSEiji Ota * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 73c0dd49bdSEiji Ota * in the module is not cacheline-aligned. As much as we'd like to tell users 74c0dd49bdSEiji Ota * with older kernels to stuff it, that's not reasonable. We'll roll our own 75c0dd49bdSEiji Ota * until this doesn't have to build against older kernels. 76c0dd49bdSEiji Ota */ 77c0dd49bdSEiji Ota #define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 78c0dd49bdSEiji Ota #define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 79c0dd49bdSEiji Ota #define rdsv3_per_cpu(var, cpu) var[cpu] 80c0dd49bdSEiji Ota 81c0dd49bdSEiji Ota static inline ulong_t 82c0dd49bdSEiji Ota ceil(ulong_t x, ulong_t y) 83c0dd49bdSEiji Ota { 84c0dd49bdSEiji Ota return ((x + y - 1) / y); 85c0dd49bdSEiji Ota } 86c0dd49bdSEiji Ota 87c0dd49bdSEiji Ota #define RDSV3_FRAG_SHIFT 12 88c0dd49bdSEiji Ota #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 89c0dd49bdSEiji Ota 90c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_BYTES (65536 / 8) 91c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 92c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 93c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 94c0dd49bdSEiji Ota 95c0dd49bdSEiji Ota struct rdsv3_cong_map { 96c0dd49bdSEiji Ota struct avl_node m_rb_node; 97c0dd49bdSEiji Ota uint32_be_t m_addr; 98c0dd49bdSEiji Ota rdsv3_wait_queue_t m_waitq; 99c0dd49bdSEiji Ota struct list m_conn_list; 100c0dd49bdSEiji Ota unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 101c0dd49bdSEiji Ota }; 102c0dd49bdSEiji Ota 103c0dd49bdSEiji Ota /* 104c0dd49bdSEiji Ota * This is how we will track the connection state: 105c0dd49bdSEiji Ota * A connection is always in one of the following 106c0dd49bdSEiji Ota * states. Updates to the state are atomic and imply 107c0dd49bdSEiji Ota * a memory barrier. 108c0dd49bdSEiji Ota */ 109c0dd49bdSEiji Ota enum { 110c0dd49bdSEiji Ota RDSV3_CONN_DOWN = 0, 111c0dd49bdSEiji Ota RDSV3_CONN_CONNECTING, 112c0dd49bdSEiji Ota RDSV3_CONN_DISCONNECTING, 113c0dd49bdSEiji Ota RDSV3_CONN_UP, 114c0dd49bdSEiji Ota RDSV3_CONN_ERROR, 115c0dd49bdSEiji Ota }; 116c0dd49bdSEiji Ota 117c0dd49bdSEiji Ota /* Bits for c_flags */ 118c0dd49bdSEiji Ota #define RDSV3_LL_SEND_FULL 0 119c0dd49bdSEiji Ota #define RDSV3_RECONNECT_PENDING 1 120c0dd49bdSEiji Ota 121c0dd49bdSEiji Ota struct rdsv3_connection { 122c0dd49bdSEiji Ota struct avl_node c_hash_node; 123c0dd49bdSEiji Ota uint32_be_t c_laddr; 124c0dd49bdSEiji Ota uint32_be_t c_faddr; 125c0dd49bdSEiji Ota unsigned int c_loopback:1; 126c0dd49bdSEiji Ota struct rdsv3_connection *c_passive; 127c0dd49bdSEiji Ota 128c0dd49bdSEiji Ota struct rdsv3_cong_map *c_lcong; 129c0dd49bdSEiji Ota struct rdsv3_cong_map *c_fcong; 130c0dd49bdSEiji Ota 131c0dd49bdSEiji Ota struct mutex c_send_lock; /* protect send ring */ 1325d5562f5SEiji Ota atomic_t c_send_generation; 1335d5562f5SEiji Ota atomic_t c_senders; 1345d5562f5SEiji Ota 135c0dd49bdSEiji Ota struct rdsv3_message *c_xmit_rm; 136c0dd49bdSEiji Ota unsigned long c_xmit_sg; 137c0dd49bdSEiji Ota unsigned int c_xmit_hdr_off; 138c0dd49bdSEiji Ota unsigned int c_xmit_data_off; 139c0dd49bdSEiji Ota unsigned int c_xmit_rdma_sent; 140c0dd49bdSEiji Ota 141c0dd49bdSEiji Ota kmutex_t c_lock; /* protect msg queues */ 142c0dd49bdSEiji Ota uint64_t c_next_tx_seq; 143c0dd49bdSEiji Ota struct list c_send_queue; 144c0dd49bdSEiji Ota struct list c_retrans; 145c0dd49bdSEiji Ota 146c0dd49bdSEiji Ota uint64_t c_next_rx_seq; 147c0dd49bdSEiji Ota 148c0dd49bdSEiji Ota struct rdsv3_transport *c_trans; 149c0dd49bdSEiji Ota void *c_transport_data; 150c0dd49bdSEiji Ota 151c0dd49bdSEiji Ota atomic_t c_state; 152c0dd49bdSEiji Ota unsigned long c_flags; 153c0dd49bdSEiji Ota unsigned long c_reconnect_jiffies; 1545d5562f5SEiji Ota clock_t c_last_connect_jiffies; 1555d5562f5SEiji Ota 156c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_send_w; 157c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_recv_w; 158c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_conn_w; 1595d5562f5SEiji Ota struct rdsv3_delayed_work_s c_reap_w; 160c0dd49bdSEiji Ota struct rdsv3_work_s c_down_w; 161c0dd49bdSEiji Ota struct mutex c_cm_lock; /* protect conn state & cm */ 162c0dd49bdSEiji Ota 163c0dd49bdSEiji Ota struct list_node c_map_item; 164c0dd49bdSEiji Ota unsigned long c_map_queued; 165c0dd49bdSEiji Ota unsigned long c_map_offset; 166c0dd49bdSEiji Ota unsigned long c_map_bytes; 167c0dd49bdSEiji Ota 168c0dd49bdSEiji Ota unsigned int c_unacked_packets; 169c0dd49bdSEiji Ota unsigned int c_unacked_bytes; 170c0dd49bdSEiji Ota 171c0dd49bdSEiji Ota /* Protocol version */ 172c0dd49bdSEiji Ota unsigned int c_version; 173c0dd49bdSEiji Ota }; 174c0dd49bdSEiji Ota 175c0dd49bdSEiji Ota #define RDSV3_FLAG_CONG_BITMAP 0x01 176c0dd49bdSEiji Ota #define RDSV3_FLAG_ACK_REQUIRED 0x02 177c0dd49bdSEiji Ota #define RDSV3_FLAG_RETRANSMITTED 0x04 178cadbfdc3SEiji Ota #define RDSV3_MAX_ADV_CREDIT 127 179c0dd49bdSEiji Ota 180c0dd49bdSEiji Ota /* 181c0dd49bdSEiji Ota * Maximum space available for extension headers. 182c0dd49bdSEiji Ota */ 183c0dd49bdSEiji Ota #define RDSV3_HEADER_EXT_SPACE 16 184c0dd49bdSEiji Ota 185c0dd49bdSEiji Ota struct rdsv3_header { 186c0dd49bdSEiji Ota uint64_be_t h_sequence; 187c0dd49bdSEiji Ota uint64_be_t h_ack; 188c0dd49bdSEiji Ota uint32_be_t h_len; 189c0dd49bdSEiji Ota uint16_be_t h_sport; 190c0dd49bdSEiji Ota uint16_be_t h_dport; 191c0dd49bdSEiji Ota uint8_t h_flags; 192c0dd49bdSEiji Ota uint8_t h_credit; 193c0dd49bdSEiji Ota uint8_t h_padding[4]; 194c0dd49bdSEiji Ota uint16_be_t h_csum; 195c0dd49bdSEiji Ota 196c0dd49bdSEiji Ota uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 197c0dd49bdSEiji Ota }; 198c0dd49bdSEiji Ota 199c0dd49bdSEiji Ota /* Reserved - indicates end of extensions */ 200c0dd49bdSEiji Ota #define RDSV3_EXTHDR_NONE 0 201c0dd49bdSEiji Ota 202c0dd49bdSEiji Ota /* 203c0dd49bdSEiji Ota * This extension header is included in the very 204c0dd49bdSEiji Ota * first message that is sent on a new connection, 205c0dd49bdSEiji Ota * and identifies the protocol level. This will help 206c0dd49bdSEiji Ota * rolling updates if a future change requires breaking 207c0dd49bdSEiji Ota * the protocol. 208c0dd49bdSEiji Ota */ 209c0dd49bdSEiji Ota #define RDSV3_EXTHDR_VERSION 1 210c0dd49bdSEiji Ota struct rdsv3_ext_header_version { 211c0dd49bdSEiji Ota uint32_be_t h_version; 212c0dd49bdSEiji Ota }; 213c0dd49bdSEiji Ota 214c0dd49bdSEiji Ota /* 215c0dd49bdSEiji Ota * This extension header is included in the RDS message 216c0dd49bdSEiji Ota * chasing an RDMA operation. 217c0dd49bdSEiji Ota */ 218c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA 2 219c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma { 220c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 221c0dd49bdSEiji Ota }; 222c0dd49bdSEiji Ota 223c0dd49bdSEiji Ota /* 224c0dd49bdSEiji Ota * This extension header tells the peer about the 225c0dd49bdSEiji Ota * destination <R_Key,offset> of the requested RDMA 226c0dd49bdSEiji Ota * operation. 227c0dd49bdSEiji Ota */ 228c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA_DEST 3 229c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma_dest { 230c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 231c0dd49bdSEiji Ota uint32_be_t h_rdma_offset; 232c0dd49bdSEiji Ota }; 233c0dd49bdSEiji Ota 234c0dd49bdSEiji Ota #define __RDSV3_EXTHDR_MAX 16 /* for now */ 235c0dd49bdSEiji Ota 236c0dd49bdSEiji Ota struct rdsv3_incoming { 237c0dd49bdSEiji Ota atomic_t i_refcount; 238c0dd49bdSEiji Ota struct list_node i_item; 239c0dd49bdSEiji Ota struct rdsv3_connection *i_conn; 240c0dd49bdSEiji Ota struct rdsv3_header i_hdr; 241c0dd49bdSEiji Ota unsigned long i_rx_jiffies; 242c0dd49bdSEiji Ota uint32_be_t i_saddr; 243c0dd49bdSEiji Ota 244c0dd49bdSEiji Ota rdsv3_rdma_cookie_t i_rdma_cookie; 245c0dd49bdSEiji Ota }; 246c0dd49bdSEiji Ota 247c0dd49bdSEiji Ota /* 248c0dd49bdSEiji Ota * m_sock_item and m_conn_item are on lists that are serialized under 249c0dd49bdSEiji Ota * conn->c_lock. m_sock_item has additional meaning in that once it is empty 250c0dd49bdSEiji Ota * the message will not be put back on the retransmit list after being sent. 251c0dd49bdSEiji Ota * messages that are canceled while being sent rely on this. 252c0dd49bdSEiji Ota * 253c0dd49bdSEiji Ota * m_inc is used by loopback so that it can pass an incoming message straight 254c0dd49bdSEiji Ota * back up into the rx path. It embeds a wire header which is also used by 255c0dd49bdSEiji Ota * the send path, which is kind of awkward. 256c0dd49bdSEiji Ota * 257c0dd49bdSEiji Ota * m_sock_item indicates the message's presence on a socket's send or receive 258c0dd49bdSEiji Ota * queue. m_rs will point to that socket. 259c0dd49bdSEiji Ota * 260c0dd49bdSEiji Ota * m_daddr is used by cancellation to prune messages to a given destination. 261c0dd49bdSEiji Ota * 262c0dd49bdSEiji Ota * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 263c0dd49bdSEiji Ota * nesting. As paths iterate over messages on a sock, or conn, they must 264c0dd49bdSEiji Ota * also lock the conn, or sock, to remove the message from those lists too. 265c0dd49bdSEiji Ota * Testing the flag to determine if the message is still on the lists lets 266c0dd49bdSEiji Ota * us avoid testing the list_head directly. That means each path can use 267c0dd49bdSEiji Ota * the message's list_head to keep it on a local list while juggling locks 268c0dd49bdSEiji Ota * without confusing the other path. 269c0dd49bdSEiji Ota * 270c0dd49bdSEiji Ota * m_ack_seq is an optional field set by transports who need a different 271c0dd49bdSEiji Ota * sequence number range to invalidate. They can use this in a callback 272c0dd49bdSEiji Ota * that they pass to rdsv3_send_drop_acked() to see if each message has been 273c0dd49bdSEiji Ota * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 274c0dd49bdSEiji Ota * had ack_seq set yet. 275c0dd49bdSEiji Ota */ 276c0dd49bdSEiji Ota #define RDSV3_MSG_ON_SOCK 1 277c0dd49bdSEiji Ota #define RDSV3_MSG_ON_CONN 2 278c0dd49bdSEiji Ota #define RDSV3_MSG_HAS_ACK_SEQ 3 279c0dd49bdSEiji Ota #define RDSV3_MSG_ACK_REQUIRED 4 280c0dd49bdSEiji Ota #define RDSV3_MSG_RETRANSMITTED 5 281c0dd49bdSEiji Ota #define RDSV3_MSG_MAPPED 6 282c0dd49bdSEiji Ota #define RDSV3_MSG_PAGEVEC 7 283c0dd49bdSEiji Ota 284c0dd49bdSEiji Ota struct rdsv3_message { 285c0dd49bdSEiji Ota atomic_t m_refcount; 286c0dd49bdSEiji Ota struct list_node m_sock_item; 287c0dd49bdSEiji Ota struct list_node m_conn_item; 288c0dd49bdSEiji Ota struct rdsv3_incoming m_inc; 289c0dd49bdSEiji Ota uint64_t m_ack_seq; 290c0dd49bdSEiji Ota uint32_be_t m_daddr; 291c0dd49bdSEiji Ota unsigned long m_flags; 292c0dd49bdSEiji Ota 293c0dd49bdSEiji Ota /* 294c0dd49bdSEiji Ota * Never access m_rs without holding m_rs_lock. 295c0dd49bdSEiji Ota * Lock nesting is 296c0dd49bdSEiji Ota * rm->m_rs_lock 297c0dd49bdSEiji Ota * -> rs->rs_lock 298c0dd49bdSEiji Ota */ 299c0dd49bdSEiji Ota kmutex_t m_rs_lock; 3005d5562f5SEiji Ota rdsv3_wait_queue_t m_flush_wait; 3015d5562f5SEiji Ota 302c0dd49bdSEiji Ota struct rdsv3_sock *m_rs; 303c0dd49bdSEiji Ota struct rdsv3_rdma_op *m_rdma_op; 304c0dd49bdSEiji Ota rdsv3_rdma_cookie_t m_rdma_cookie; 305c0dd49bdSEiji Ota struct rdsv3_mr *m_rdma_mr; 306c0dd49bdSEiji Ota unsigned int m_nents; 307c0dd49bdSEiji Ota unsigned int m_count; 308c0dd49bdSEiji Ota struct rdsv3_scatterlist m_sg[1]; 309c0dd49bdSEiji Ota }; 310c0dd49bdSEiji Ota 311c0dd49bdSEiji Ota /* 312c0dd49bdSEiji Ota * The RDS notifier is used (optionally) to tell the application about 313c0dd49bdSEiji Ota * completed RDMA operations. Rather than keeping the whole rds message 314c0dd49bdSEiji Ota * around on the queue, we allocate a small notifier that is put on the 315c0dd49bdSEiji Ota * socket's notifier_list. Notifications are delivered to the application 316c0dd49bdSEiji Ota * through control messages. 317c0dd49bdSEiji Ota */ 318c0dd49bdSEiji Ota struct rdsv3_notifier { 319c0dd49bdSEiji Ota list_node_t n_list; 320c0dd49bdSEiji Ota uint64_t n_user_token; 321c0dd49bdSEiji Ota int n_status; 322c0dd49bdSEiji Ota }; 323c0dd49bdSEiji Ota 324c0dd49bdSEiji Ota /* 325c0dd49bdSEiji Ota * struct rdsv3_transport - transport specific behavioural hooks 326c0dd49bdSEiji Ota * 327c0dd49bdSEiji Ota * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 328c0dd49bdSEiji Ota * part of a message. The caller serializes on the send_sem so this 329c0dd49bdSEiji Ota * doesn't need to be reentrant for a given conn. The header must be 330c0dd49bdSEiji Ota * sent before the data payload. .xmit must be prepared to send a 331c0dd49bdSEiji Ota * message with no data payload. .xmit should return the number of 332c0dd49bdSEiji Ota * bytes that were sent down the connection, including header bytes. 333c0dd49bdSEiji Ota * Returning 0 tells the caller that it doesn't need to perform any 334c0dd49bdSEiji Ota * additional work now. This is usually the case when the transport has 335c0dd49bdSEiji Ota * filled the sending queue for its connection and will handle 336c0dd49bdSEiji Ota * triggering the rds thread to continue the send when space becomes 337c0dd49bdSEiji Ota * available. Returning -EAGAIN tells the caller to retry the send 338c0dd49bdSEiji Ota * immediately. Returning -ENOMEM tells the caller to retry the send at 339c0dd49bdSEiji Ota * some point in the future. 340c0dd49bdSEiji Ota * 341c0dd49bdSEiji Ota * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 342c0dd49bdSEiji Ota * it returns the connection can not call rdsv3_recv_incoming(). 343c0dd49bdSEiji Ota * This will only be called once after conn_connect returns 344c0dd49bdSEiji Ota * non-zero success and will The caller serializes this with 345c0dd49bdSEiji Ota * the send and connecting paths (xmit_* and conn_*). The 346c0dd49bdSEiji Ota * transport is responsible for other serialization, including 347c0dd49bdSEiji Ota * rdsv3_recv_incoming(). This is called in process context but 348c0dd49bdSEiji Ota * should try hard not to block. 349c0dd49bdSEiji Ota * 350c0dd49bdSEiji Ota * @xmit_cong_map: This asks the transport to send the local bitmap down the 351c0dd49bdSEiji Ota * given connection. XXX get a better story about the bitmap 352c0dd49bdSEiji Ota * flag and header. 353c0dd49bdSEiji Ota */ 354c0dd49bdSEiji Ota 355cadbfdc3SEiji Ota #define RDS_TRANS_IB 0 356cadbfdc3SEiji Ota #define RDS_TRANS_IWARP 1 357cadbfdc3SEiji Ota #define RDS_TRANS_TCP 2 358cadbfdc3SEiji Ota #define RDS_TRANS_COUNT 3 359cadbfdc3SEiji Ota 360c0dd49bdSEiji Ota struct rdsv3_transport { 361cadbfdc3SEiji Ota char t_name[TRANSNAMSIZ]; 362c0dd49bdSEiji Ota struct list_node t_item; 363cadbfdc3SEiji Ota unsigned int t_type; 364c0dd49bdSEiji Ota unsigned int t_prefer_loopback:1; 365c0dd49bdSEiji Ota 366c0dd49bdSEiji Ota int (*laddr_check)(uint32_be_t addr); 367c0dd49bdSEiji Ota int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 368c0dd49bdSEiji Ota void (*conn_free)(void *data); 369c0dd49bdSEiji Ota int (*conn_connect)(struct rdsv3_connection *conn); 370c0dd49bdSEiji Ota void (*conn_shutdown)(struct rdsv3_connection *conn); 371c0dd49bdSEiji Ota void (*xmit_prepare)(struct rdsv3_connection *conn); 372c0dd49bdSEiji Ota void (*xmit_complete)(struct rdsv3_connection *conn); 373c0dd49bdSEiji Ota int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 374c0dd49bdSEiji Ota unsigned int hdr_off, unsigned int sg, unsigned int off); 375c0dd49bdSEiji Ota int (*xmit_cong_map)(struct rdsv3_connection *conn, 376c0dd49bdSEiji Ota struct rdsv3_cong_map *map, unsigned long offset); 377c0dd49bdSEiji Ota int (*xmit_rdma)(struct rdsv3_connection *conn, 378c0dd49bdSEiji Ota struct rdsv3_rdma_op *op); 379c0dd49bdSEiji Ota int (*recv)(struct rdsv3_connection *conn); 380c0dd49bdSEiji Ota int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 381c0dd49bdSEiji Ota size_t size); 382c0dd49bdSEiji Ota void (*inc_free)(struct rdsv3_incoming *inc); 383c0dd49bdSEiji Ota 384c0dd49bdSEiji Ota int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 385c0dd49bdSEiji Ota struct rdma_cm_event *event); 386c0dd49bdSEiji Ota int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 387c0dd49bdSEiji Ota void (*cm_connect_complete)(struct rdsv3_connection *conn, 388c0dd49bdSEiji Ota struct rdma_cm_event *event); 389c0dd49bdSEiji Ota 390c0dd49bdSEiji Ota unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 391c0dd49bdSEiji Ota unsigned int avail); 392c0dd49bdSEiji Ota void (*exit)(void); 393c0dd49bdSEiji Ota void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg, 394c0dd49bdSEiji Ota struct rdsv3_sock *rs, uint32_t *key_ret); 395c0dd49bdSEiji Ota void (*sync_mr)(void *trans_private, int direction); 396c0dd49bdSEiji Ota void (*free_mr)(void *trans_private, int invalidate); 397c0dd49bdSEiji Ota void (*flush_mrs)(void); 398c0dd49bdSEiji Ota }; 399c0dd49bdSEiji Ota 400c0dd49bdSEiji Ota struct rdsv3_sock { 401c0dd49bdSEiji Ota struct rsock *rs_sk; 402c0dd49bdSEiji Ota uint64_t rs_user_addr; 403c0dd49bdSEiji Ota uint64_t rs_user_bytes; 404c0dd49bdSEiji Ota 405c0dd49bdSEiji Ota /* 406c0dd49bdSEiji Ota * bound_addr used for both incoming and outgoing, no INADDR_ANY 407c0dd49bdSEiji Ota * support. 408c0dd49bdSEiji Ota */ 409c0dd49bdSEiji Ota struct avl_node rs_bound_node; 410c0dd49bdSEiji Ota uint32_be_t rs_bound_addr; 411c0dd49bdSEiji Ota uint32_be_t rs_conn_addr; 412c0dd49bdSEiji Ota uint16_be_t rs_bound_port; 413c0dd49bdSEiji Ota uint16_be_t rs_conn_port; 414c0dd49bdSEiji Ota 415c0dd49bdSEiji Ota /* 416c0dd49bdSEiji Ota * This is only used to communicate the transport between bind and 417c0dd49bdSEiji Ota * initiating connections. All other trans use is referenced through 418c0dd49bdSEiji Ota * the connection. 419c0dd49bdSEiji Ota */ 420c0dd49bdSEiji Ota struct rdsv3_transport *rs_transport; 421c0dd49bdSEiji Ota 422c0dd49bdSEiji Ota /* 423c0dd49bdSEiji Ota * rdsv3_sendmsg caches the conn it used the last time around. 424c0dd49bdSEiji Ota * This helps avoid costly lookups. 425c0dd49bdSEiji Ota */ 426c0dd49bdSEiji Ota struct rdsv3_connection *rs_conn; 427c0dd49bdSEiji Ota kmutex_t rs_conn_lock; 428c0dd49bdSEiji Ota 429c0dd49bdSEiji Ota /* flag indicating we were congested or not */ 430c0dd49bdSEiji Ota int rs_congested; 431cadbfdc3SEiji Ota /* seen congestion (ENOBUFS) when sending? */ 432cadbfdc3SEiji Ota int rs_seen_congestion; 4335d5562f5SEiji Ota kmutex_t rs_congested_lock; 4345d5562f5SEiji Ota kcondvar_t rs_congested_cv; 435c0dd49bdSEiji Ota 436c0dd49bdSEiji Ota /* rs_lock protects all these adjacent members before the newline */ 437c0dd49bdSEiji Ota kmutex_t rs_lock; 438c0dd49bdSEiji Ota struct list rs_send_queue; 439c0dd49bdSEiji Ota uint32_t rs_snd_bytes; 440c0dd49bdSEiji Ota int rs_rcv_bytes; 441c0dd49bdSEiji Ota /* currently used for failed RDMAs */ 442c0dd49bdSEiji Ota struct list rs_notify_queue; 443c0dd49bdSEiji Ota 444c0dd49bdSEiji Ota /* 445c0dd49bdSEiji Ota * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 446c0dd49bdSEiji Ota * to decide whether the application should be woken up. 447c0dd49bdSEiji Ota * If not set, we use rs_cong_track to find out whether a cong map 448c0dd49bdSEiji Ota * update arrived. 449c0dd49bdSEiji Ota */ 450c0dd49bdSEiji Ota uint64_t rs_cong_mask; 451c0dd49bdSEiji Ota uint64_t rs_cong_notify; 452c0dd49bdSEiji Ota struct list_node rs_cong_list; 453c0dd49bdSEiji Ota unsigned long rs_cong_track; 454c0dd49bdSEiji Ota 455c0dd49bdSEiji Ota /* 456c0dd49bdSEiji Ota * rs_recv_lock protects the receive queue, and is 457c0dd49bdSEiji Ota * used to serialize with rdsv3_release. 458c0dd49bdSEiji Ota */ 459c0dd49bdSEiji Ota krwlock_t rs_recv_lock; 460c0dd49bdSEiji Ota struct list rs_recv_queue; 461c0dd49bdSEiji Ota 462c0dd49bdSEiji Ota /* just for stats reporting */ 463c0dd49bdSEiji Ota struct list_node rs_item; 464c0dd49bdSEiji Ota 465c0dd49bdSEiji Ota /* these have their own lock */ 466c0dd49bdSEiji Ota kmutex_t rs_rdma_lock; 467c0dd49bdSEiji Ota struct avl_tree rs_rdma_keys; 468c0dd49bdSEiji Ota 469c0dd49bdSEiji Ota /* Socket options - in case there will be more */ 470c0dd49bdSEiji Ota unsigned char rs_recverr, 471c0dd49bdSEiji Ota rs_cong_monitor; 472c0dd49bdSEiji Ota 473c0dd49bdSEiji Ota cred_t *rs_cred; 474c0dd49bdSEiji Ota zoneid_t rs_zoneid; 475c0dd49bdSEiji Ota }; 476c0dd49bdSEiji Ota 4771a561c76SEiji Ota static inline struct rdsv3_sock * 478c0dd49bdSEiji Ota rdsv3_sk_to_rs(const struct rsock *sk) 479c0dd49bdSEiji Ota { 480c0dd49bdSEiji Ota return ((struct rdsv3_sock *)sk->sk_protinfo); 481c0dd49bdSEiji Ota } 482c0dd49bdSEiji Ota 4831a561c76SEiji Ota static inline struct rsock * 484c0dd49bdSEiji Ota rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 485c0dd49bdSEiji Ota { 486c0dd49bdSEiji Ota return ((struct rsock *)rs->rs_sk); 487c0dd49bdSEiji Ota } 488c0dd49bdSEiji Ota 489c0dd49bdSEiji Ota /* 490c0dd49bdSEiji Ota * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 491c0dd49bdSEiji Ota * to account for overhead. We don't account for overhead, we just apply 492c0dd49bdSEiji Ota * the number of payload bytes to the specified value. 493c0dd49bdSEiji Ota */ 4941a561c76SEiji Ota static inline int 495c0dd49bdSEiji Ota rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 496c0dd49bdSEiji Ota { 497c0dd49bdSEiji Ota /* XXX */ 498c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 499c0dd49bdSEiji Ota } 500c0dd49bdSEiji Ota 5011a561c76SEiji Ota static inline int 502c0dd49bdSEiji Ota rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 503c0dd49bdSEiji Ota { 504c0dd49bdSEiji Ota /* XXX */ 505c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 506c0dd49bdSEiji Ota } 507c0dd49bdSEiji Ota 508c0dd49bdSEiji Ota struct rdsv3_statistics { 509c0dd49bdSEiji Ota uint64_t s_conn_reset; 510c0dd49bdSEiji Ota uint64_t s_recv_drop_bad_checksum; 511c0dd49bdSEiji Ota uint64_t s_recv_drop_old_seq; 512c0dd49bdSEiji Ota uint64_t s_recv_drop_no_sock; 513c0dd49bdSEiji Ota uint64_t s_recv_drop_dead_sock; 514c0dd49bdSEiji Ota uint64_t s_recv_deliver_raced; 515c0dd49bdSEiji Ota uint64_t s_recv_delivered; 516c0dd49bdSEiji Ota uint64_t s_recv_queued; 517c0dd49bdSEiji Ota uint64_t s_recv_immediate_retry; 518c0dd49bdSEiji Ota uint64_t s_recv_delayed_retry; 519c0dd49bdSEiji Ota uint64_t s_recv_ack_required; 520c0dd49bdSEiji Ota uint64_t s_recv_rdma_bytes; 521c0dd49bdSEiji Ota uint64_t s_recv_ping; 522c0dd49bdSEiji Ota uint64_t s_send_queue_empty; 523c0dd49bdSEiji Ota uint64_t s_send_queue_full; 524c0dd49bdSEiji Ota uint64_t s_send_sem_contention; 525c0dd49bdSEiji Ota uint64_t s_send_sem_queue_raced; 526c0dd49bdSEiji Ota uint64_t s_send_immediate_retry; 527c0dd49bdSEiji Ota uint64_t s_send_delayed_retry; 528c0dd49bdSEiji Ota uint64_t s_send_drop_acked; 529c0dd49bdSEiji Ota uint64_t s_send_ack_required; 530c0dd49bdSEiji Ota uint64_t s_send_queued; 531c0dd49bdSEiji Ota uint64_t s_send_rdma; 532c0dd49bdSEiji Ota uint64_t s_send_rdma_bytes; 533c0dd49bdSEiji Ota uint64_t s_send_pong; 534c0dd49bdSEiji Ota uint64_t s_page_remainder_hit; 535c0dd49bdSEiji Ota uint64_t s_page_remainder_miss; 536c0dd49bdSEiji Ota uint64_t s_copy_to_user; 537c0dd49bdSEiji Ota uint64_t s_copy_from_user; 538c0dd49bdSEiji Ota uint64_t s_cong_update_queued; 539c0dd49bdSEiji Ota uint64_t s_cong_update_received; 540c0dd49bdSEiji Ota uint64_t s_cong_send_error; 541c0dd49bdSEiji Ota uint64_t s_cong_send_blocked; 542c0dd49bdSEiji Ota }; 543c0dd49bdSEiji Ota 544c0dd49bdSEiji Ota /* af_rds.c */ 545c0dd49bdSEiji Ota void rdsv3_sock_addref(struct rdsv3_sock *rs); 546c0dd49bdSEiji Ota void rdsv3_sock_put(struct rdsv3_sock *rs); 547c0dd49bdSEiji Ota void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 548c0dd49bdSEiji Ota void __rdsv3_wake_sk_sleep(struct rsock *sk); 549c0dd49bdSEiji Ota 550c0dd49bdSEiji Ota /* bind.c */ 551c0dd49bdSEiji Ota int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 552c0dd49bdSEiji Ota socklen_t len, cred_t *cr); 553c0dd49bdSEiji Ota void rdsv3_remove_bound(struct rdsv3_sock *rs); 554c0dd49bdSEiji Ota struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port); 555c0dd49bdSEiji Ota 556c0dd49bdSEiji Ota /* conn.c */ 557c0dd49bdSEiji Ota int rdsv3_conn_init(void); 558c0dd49bdSEiji Ota void rdsv3_conn_exit(void); 559c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 560c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 561c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 562c0dd49bdSEiji Ota uint32_be_t faddr, 563c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 5645d5562f5SEiji Ota void rdsv3_conn_shutdown(struct rdsv3_connection *conn); 565c0dd49bdSEiji Ota void rdsv3_conn_destroy(struct rdsv3_connection *conn); 566c0dd49bdSEiji Ota void rdsv3_conn_reset(struct rdsv3_connection *conn); 567c0dd49bdSEiji Ota void rdsv3_conn_drop(struct rdsv3_connection *conn); 568c0dd49bdSEiji Ota void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 569c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 570c0dd49bdSEiji Ota struct rdsv3_info_lengths *lens, 571c0dd49bdSEiji Ota int (*visitor)(struct rdsv3_connection *, void *), 572c0dd49bdSEiji Ota size_t item_len); 573c0dd49bdSEiji Ota 574c0dd49bdSEiji Ota static inline int 575c0dd49bdSEiji Ota rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 576c0dd49bdSEiji Ota { 577c0dd49bdSEiji Ota return (atomic_cmpxchg(&conn->c_state, old, new) == old); 578c0dd49bdSEiji Ota } 579c0dd49bdSEiji Ota 5801a561c76SEiji Ota static inline int 581c0dd49bdSEiji Ota rdsv3_conn_state(struct rdsv3_connection *conn) 582c0dd49bdSEiji Ota { 583c0dd49bdSEiji Ota return (atomic_get(&conn->c_state)); 584c0dd49bdSEiji Ota } 585c0dd49bdSEiji Ota 5861a561c76SEiji Ota static inline int 587c0dd49bdSEiji Ota rdsv3_conn_up(struct rdsv3_connection *conn) 588c0dd49bdSEiji Ota { 589c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 590c0dd49bdSEiji Ota } 591c0dd49bdSEiji Ota 5921a561c76SEiji Ota static inline int 593c0dd49bdSEiji Ota rdsv3_conn_connecting(struct rdsv3_connection *conn) 594c0dd49bdSEiji Ota { 595c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 596c0dd49bdSEiji Ota } 597c0dd49bdSEiji Ota 598c0dd49bdSEiji Ota /* recv.c */ 599c0dd49bdSEiji Ota void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 600c0dd49bdSEiji Ota uint32_be_t saddr); 601c0dd49bdSEiji Ota void rdsv3_inc_addref(struct rdsv3_incoming *inc); 602c0dd49bdSEiji Ota void rdsv3_inc_put(struct rdsv3_incoming *inc); 603c0dd49bdSEiji Ota void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 604c0dd49bdSEiji Ota uint32_be_t daddr, 605c0dd49bdSEiji Ota struct rdsv3_incoming *inc, int gfp); 606c0dd49bdSEiji Ota int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 607c0dd49bdSEiji Ota struct msghdr *msg, size_t size, int msg_flags); 608c0dd49bdSEiji Ota void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 609c0dd49bdSEiji Ota int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 610c0dd49bdSEiji Ota void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 611c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 612c0dd49bdSEiji Ota uint32_be_t saddr, uint32_be_t daddr, int flip); 613c0dd49bdSEiji Ota 614c0dd49bdSEiji Ota /* page.c */ 615c0dd49bdSEiji Ota int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 616c0dd49bdSEiji Ota unsigned long bytes, int gfp); 617c0dd49bdSEiji Ota 618c0dd49bdSEiji Ota /* send.c */ 619c0dd49bdSEiji Ota int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 620c0dd49bdSEiji Ota size_t payload_len); 621c0dd49bdSEiji Ota void rdsv3_send_reset(struct rdsv3_connection *conn); 622c0dd49bdSEiji Ota int rdsv3_send_xmit(struct rdsv3_connection *conn); 623c0dd49bdSEiji Ota struct sockaddr_in; 624c0dd49bdSEiji Ota void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 625c0dd49bdSEiji Ota typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 626c0dd49bdSEiji Ota void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 627c0dd49bdSEiji Ota is_acked_func is_acked); 628c0dd49bdSEiji Ota int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 629c0dd49bdSEiji Ota void rdsv3_send_remove_from_sock(struct list *messages, int status); 630c0dd49bdSEiji Ota int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 631c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 632c0dd49bdSEiji Ota struct rdsv3_rdma_op *); 633c0dd49bdSEiji Ota 634c0dd49bdSEiji Ota /* rdma.c */ 635c0dd49bdSEiji Ota void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 636c0dd49bdSEiji Ota 637c0dd49bdSEiji Ota /* cong.c */ 638c0dd49bdSEiji Ota void rdsv3_cong_init(void); 639c0dd49bdSEiji Ota int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 640c0dd49bdSEiji Ota void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 641c0dd49bdSEiji Ota void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 642c0dd49bdSEiji Ota void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 643c0dd49bdSEiji Ota void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 644c0dd49bdSEiji Ota int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 645c0dd49bdSEiji Ota struct rdsv3_sock *rs); 646c0dd49bdSEiji Ota void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 647c0dd49bdSEiji Ota void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 648c0dd49bdSEiji Ota int rdsv3_cong_updated_since(unsigned long *recent); 649c0dd49bdSEiji Ota void rdsv3_cong_add_socket(struct rdsv3_sock *); 650c0dd49bdSEiji Ota void rdsv3_cong_remove_socket(struct rdsv3_sock *); 651c0dd49bdSEiji Ota void rdsv3_cong_exit(void); 652c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 653c0dd49bdSEiji Ota 654c0dd49bdSEiji Ota /* stats.c */ 655c0dd49bdSEiji Ota RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 656c0dd49bdSEiji Ota #define rdsv3_stats_inc_which(which, member) do { \ 657c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member++; \ 658c0dd49bdSEiji Ota put_cpu(); \ 659c0dd49bdSEiji Ota } while (0) 660c0dd49bdSEiji Ota #define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 661c0dd49bdSEiji Ota #define rdsv3_stats_add_which(which, member, count) do { \ 662c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member += count; \ 663c0dd49bdSEiji Ota put_cpu(); \ 664c0dd49bdSEiji Ota } while (0) 665c0dd49bdSEiji Ota #define rdsv3_stats_add(member, count) \ 666c0dd49bdSEiji Ota rdsv3_stats_add_which(rdsv3_stats, member, count) 667c0dd49bdSEiji Ota int rdsv3_stats_init(void); 668c0dd49bdSEiji Ota void rdsv3_stats_exit(void); 669c0dd49bdSEiji Ota void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 670c0dd49bdSEiji Ota uint64_t *values, char **names, size_t nr); 671c0dd49bdSEiji Ota 672c0dd49bdSEiji Ota 673c0dd49bdSEiji Ota /* sysctl.c */ 674c0dd49bdSEiji Ota int rdsv3_sysctl_init(void); 675c0dd49bdSEiji Ota void rdsv3_sysctl_exit(void); 676c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_min; 677c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_default; 678c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_max; 679c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 680c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 681c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_packets; 682c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_bytes; 683c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_ping_enable; 684c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_trace_flags; 685c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_trace_level; 686c0dd49bdSEiji Ota 687c0dd49bdSEiji Ota /* threads.c */ 688c0dd49bdSEiji Ota int rdsv3_threads_init(); 689c0dd49bdSEiji Ota void rdsv3_threads_exit(void); 690c0dd49bdSEiji Ota extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 6915d5562f5SEiji Ota void rdsv3_queue_reconnect(struct rdsv3_connection *conn); 692c0dd49bdSEiji Ota void rdsv3_connect_worker(struct rdsv3_work_s *); 693c0dd49bdSEiji Ota void rdsv3_shutdown_worker(struct rdsv3_work_s *); 694c0dd49bdSEiji Ota void rdsv3_send_worker(struct rdsv3_work_s *); 695c0dd49bdSEiji Ota void rdsv3_recv_worker(struct rdsv3_work_s *); 6965d5562f5SEiji Ota void rdsv3_reaper_worker(struct rdsv3_work_s *); 697c0dd49bdSEiji Ota void rdsv3_connect_complete(struct rdsv3_connection *conn); 698c0dd49bdSEiji Ota 699c0dd49bdSEiji Ota /* transport.c */ 700c0dd49bdSEiji Ota int rdsv3_trans_register(struct rdsv3_transport *trans); 701c0dd49bdSEiji Ota void rdsv3_trans_unregister(struct rdsv3_transport *trans); 702c0dd49bdSEiji Ota struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 703c0dd49bdSEiji Ota unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 704c0dd49bdSEiji Ota unsigned int avail); 705c0dd49bdSEiji Ota void rdsv3_trans_exit(void); 706c0dd49bdSEiji Ota 707c0dd49bdSEiji Ota /* message.c */ 708c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 709c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 710c0dd49bdSEiji Ota size_t total_len); 711c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 712c0dd49bdSEiji Ota unsigned int total_len); 713c0dd49bdSEiji Ota void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 714c0dd49bdSEiji Ota uint16_be_t dport, uint64_t seq); 715c0dd49bdSEiji Ota int rdsv3_message_add_extension(struct rdsv3_header *hdr, 716c0dd49bdSEiji Ota unsigned int type, const void *data, unsigned int len); 717c0dd49bdSEiji Ota int rdsv3_message_next_extension(struct rdsv3_header *hdr, 718c0dd49bdSEiji Ota unsigned int *pos, void *buf, unsigned int *buflen); 719c0dd49bdSEiji Ota int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 720c0dd49bdSEiji Ota unsigned int version); 721c0dd49bdSEiji Ota int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 722c0dd49bdSEiji Ota unsigned int *version); 723c0dd49bdSEiji Ota int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 724c0dd49bdSEiji Ota uint32_t r_key, uint32_t offset); 725c0dd49bdSEiji Ota int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 726c0dd49bdSEiji Ota uio_t *uio, size_t size); 727c0dd49bdSEiji Ota void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 728c0dd49bdSEiji Ota void rdsv3_message_addref(struct rdsv3_message *rm); 729c0dd49bdSEiji Ota void rdsv3_message_put(struct rdsv3_message *rm); 730c0dd49bdSEiji Ota void rdsv3_message_wait(struct rdsv3_message *rm); 731c0dd49bdSEiji Ota void rdsv3_message_unmapped(struct rdsv3_message *rm); 732c0dd49bdSEiji Ota 7331a561c76SEiji Ota static inline void 734c0dd49bdSEiji Ota rdsv3_message_make_checksum(struct rdsv3_header *hdr) 735c0dd49bdSEiji Ota { 736c0dd49bdSEiji Ota hdr->h_csum = 0; 737c0dd49bdSEiji Ota hdr->h_csum = 738c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 739c0dd49bdSEiji Ota } 740c0dd49bdSEiji Ota 7411a561c76SEiji Ota static inline int 742c0dd49bdSEiji Ota rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 743c0dd49bdSEiji Ota { 744c0dd49bdSEiji Ota return (!hdr->h_csum || 745c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 746c0dd49bdSEiji Ota } 747c0dd49bdSEiji Ota 748c0dd49bdSEiji Ota /* rdsv3_sc.c */ 749c0dd49bdSEiji Ota extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 750c0dd49bdSEiji Ota extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 751c0dd49bdSEiji Ota extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 752c0dd49bdSEiji Ota 753c0dd49bdSEiji Ota #ifdef __cplusplus 754c0dd49bdSEiji Ota } 755c0dd49bdSEiji Ota #endif 756c0dd49bdSEiji Ota 757c0dd49bdSEiji Ota #endif /* _RDSV3_RDSV3_H */ 758