1c0dd49bdSEiji Ota /* 2c0dd49bdSEiji Ota * CDDL HEADER START 3c0dd49bdSEiji Ota * 4c0dd49bdSEiji Ota * The contents of this file are subject to the terms of the 5c0dd49bdSEiji Ota * Common Development and Distribution License (the "License"). 6c0dd49bdSEiji Ota * You may not use this file except in compliance with the License. 7c0dd49bdSEiji Ota * 8c0dd49bdSEiji Ota * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9c0dd49bdSEiji Ota * or http://www.opensolaris.org/os/licensing. 10c0dd49bdSEiji Ota * See the License for the specific language governing permissions 11c0dd49bdSEiji Ota * and limitations under the License. 12c0dd49bdSEiji Ota * 13c0dd49bdSEiji Ota * When distributing Covered Code, include this CDDL HEADER in each 14c0dd49bdSEiji Ota * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15c0dd49bdSEiji Ota * If applicable, add the following below this CDDL HEADER, with the 16c0dd49bdSEiji Ota * fields enclosed by brackets "[]" replaced with your own identifying 17c0dd49bdSEiji Ota * information: Portions Copyright [yyyy] [name of copyright owner] 18c0dd49bdSEiji Ota * 19c0dd49bdSEiji Ota * CDDL HEADER END 20c0dd49bdSEiji Ota */ 21c0dd49bdSEiji Ota /* 22c0dd49bdSEiji Ota * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23c0dd49bdSEiji Ota */ 24c0dd49bdSEiji Ota 25c0dd49bdSEiji Ota #ifndef _RDSV3_RDSV3_H 26c0dd49bdSEiji Ota #define _RDSV3_RDSV3_H 27c0dd49bdSEiji Ota 28c0dd49bdSEiji Ota /* 29c0dd49bdSEiji Ota * The name of this file is rds.h in ofed. 30c0dd49bdSEiji Ota */ 31c0dd49bdSEiji Ota 32c0dd49bdSEiji Ota #ifdef __cplusplus 33c0dd49bdSEiji Ota extern "C" { 34c0dd49bdSEiji Ota #endif 35c0dd49bdSEiji Ota 36c0dd49bdSEiji Ota #include <sys/sunndi.h> 37c0dd49bdSEiji Ota #include <netinet/in.h> 38c0dd49bdSEiji Ota #include <sys/synch.h> 39c0dd49bdSEiji Ota #include <sys/stropts.h> 40c0dd49bdSEiji Ota #include <sys/socket.h> 41c0dd49bdSEiji Ota #include <sys/socketvar.h> 42c0dd49bdSEiji Ota #include <inet/ip.h> 43c0dd49bdSEiji Ota #include <sys/avl.h> 44c0dd49bdSEiji Ota #include <sys/param.h> 45*5d5562f5SEiji Ota #include <sys/time.h> 46c0dd49bdSEiji Ota #include <sys/rds.h> 47c0dd49bdSEiji Ota 48c0dd49bdSEiji Ota #include <sys/ib/ibtl/ibti.h> 49c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_verbs.h> 50c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_addr.h> 51c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/rdma_cm.h> 52c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_impl.h> 53c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/info.h> 54c0dd49bdSEiji Ota 55c0dd49bdSEiji Ota #define NIPQUAD(addr) \ 56c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 24) & 0xFF), \ 57c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 16) & 0xFF), \ 58c0dd49bdSEiji Ota (unsigned char)((ntohl(addr) >> 8) & 0xFF), \ 59c0dd49bdSEiji Ota (unsigned char)(ntohl(addr) & 0xFF) 60c0dd49bdSEiji Ota 61c0dd49bdSEiji Ota /* 62c0dd49bdSEiji Ota * RDS Network protocol version 63c0dd49bdSEiji Ota */ 64c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_0 0x0300 65c0dd49bdSEiji Ota #define RDS_PROTOCOL_3_1 0x0301 66c0dd49bdSEiji Ota #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 67c0dd49bdSEiji Ota #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) 68c0dd49bdSEiji Ota #define RDS_PROTOCOL_MINOR(v) ((v) & 255) 69c0dd49bdSEiji Ota #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) 70c0dd49bdSEiji Ota 71c0dd49bdSEiji Ota /* 72c0dd49bdSEiji Ota * XXX randomly chosen, but at least seems to be unused: 73c0dd49bdSEiji Ota * # 18464-18768 Unassigned 74c0dd49bdSEiji Ota * We should do better. We want a reserved port to discourage unpriv'ed 75c0dd49bdSEiji Ota * userspace from listening. 76c0dd49bdSEiji Ota * 77c0dd49bdSEiji Ota * port 18633 was the version that had ack frames on the wire. 78c0dd49bdSEiji Ota */ 79c0dd49bdSEiji Ota #define RDSV3_PORT 18634 80c0dd49bdSEiji Ota 81*5d5562f5SEiji Ota #define RDSV3_REAPER_WAIT_SECS (5*60) 82*5d5562f5SEiji Ota #define RDSV3_REAPER_WAIT_JIFFIES SEC_TO_TICK(RDSV3_REAPER_WAIT_SECS) 83*5d5562f5SEiji Ota 84c0dd49bdSEiji Ota /* 85c0dd49bdSEiji Ota * This is the sad making. Some kernels have a bug in the per_cpu() api which 86c0dd49bdSEiji Ota * makes DEFINE_PER_CPU trigger an oops on insmod because the per-cpu section 87c0dd49bdSEiji Ota * in the module is not cacheline-aligned. As much as we'd like to tell users 88c0dd49bdSEiji Ota * with older kernels to stuff it, that's not reasonable. We'll roll our own 89c0dd49bdSEiji Ota * until this doesn't have to build against older kernels. 90c0dd49bdSEiji Ota */ 91c0dd49bdSEiji Ota #define RDSV3_DEFINE_PER_CPU(type, var) type var[NR_CPUS] 92c0dd49bdSEiji Ota #define RDSV3_DECLARE_PER_CPU(type, var) extern type var[NR_CPUS] 93c0dd49bdSEiji Ota #define rdsv3_per_cpu(var, cpu) var[cpu] 94c0dd49bdSEiji Ota 95c0dd49bdSEiji Ota static inline ulong_t 96c0dd49bdSEiji Ota ceil(ulong_t x, ulong_t y) 97c0dd49bdSEiji Ota { 98c0dd49bdSEiji Ota return ((x + y - 1) / y); 99c0dd49bdSEiji Ota } 100c0dd49bdSEiji Ota 101c0dd49bdSEiji Ota #define RDSV3_FRAG_SHIFT 12 102c0dd49bdSEiji Ota #define RDSV3_FRAG_SIZE ((unsigned int)(1 << RDSV3_FRAG_SHIFT)) 103c0dd49bdSEiji Ota 104c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_BYTES (65536 / 8) 105c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_LONGS (RDSV3_CONG_MAP_BYTES / sizeof (unsigned long)) 106c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGES (RDSV3_CONG_MAP_BYTES / PAGE_SIZE) 107c0dd49bdSEiji Ota #define RDSV3_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) 108c0dd49bdSEiji Ota 109c0dd49bdSEiji Ota struct rdsv3_cong_map { 110c0dd49bdSEiji Ota struct avl_node m_rb_node; 111c0dd49bdSEiji Ota uint32_be_t m_addr; 112c0dd49bdSEiji Ota rdsv3_wait_queue_t m_waitq; 113c0dd49bdSEiji Ota struct list m_conn_list; 114c0dd49bdSEiji Ota unsigned long m_page_addrs[RDSV3_CONG_MAP_PAGES]; 115c0dd49bdSEiji Ota }; 116c0dd49bdSEiji Ota 117c0dd49bdSEiji Ota /* 118c0dd49bdSEiji Ota * This is how we will track the connection state: 119c0dd49bdSEiji Ota * A connection is always in one of the following 120c0dd49bdSEiji Ota * states. Updates to the state are atomic and imply 121c0dd49bdSEiji Ota * a memory barrier. 122c0dd49bdSEiji Ota */ 123c0dd49bdSEiji Ota enum { 124c0dd49bdSEiji Ota RDSV3_CONN_DOWN = 0, 125c0dd49bdSEiji Ota RDSV3_CONN_CONNECTING, 126c0dd49bdSEiji Ota RDSV3_CONN_DISCONNECTING, 127c0dd49bdSEiji Ota RDSV3_CONN_UP, 128c0dd49bdSEiji Ota RDSV3_CONN_ERROR, 129c0dd49bdSEiji Ota }; 130c0dd49bdSEiji Ota 131c0dd49bdSEiji Ota /* Bits for c_flags */ 132c0dd49bdSEiji Ota #define RDSV3_LL_SEND_FULL 0 133c0dd49bdSEiji Ota #define RDSV3_RECONNECT_PENDING 1 134c0dd49bdSEiji Ota 135c0dd49bdSEiji Ota struct rdsv3_connection { 136c0dd49bdSEiji Ota struct avl_node c_hash_node; 137c0dd49bdSEiji Ota uint32_be_t c_laddr; 138c0dd49bdSEiji Ota uint32_be_t c_faddr; 139c0dd49bdSEiji Ota unsigned int c_loopback:1; 140c0dd49bdSEiji Ota struct rdsv3_connection *c_passive; 141c0dd49bdSEiji Ota 142c0dd49bdSEiji Ota struct rdsv3_cong_map *c_lcong; 143c0dd49bdSEiji Ota struct rdsv3_cong_map *c_fcong; 144c0dd49bdSEiji Ota 145c0dd49bdSEiji Ota struct mutex c_send_lock; /* protect send ring */ 146*5d5562f5SEiji Ota atomic_t c_send_generation; 147*5d5562f5SEiji Ota atomic_t c_senders; 148*5d5562f5SEiji Ota 149c0dd49bdSEiji Ota struct rdsv3_message *c_xmit_rm; 150c0dd49bdSEiji Ota unsigned long c_xmit_sg; 151c0dd49bdSEiji Ota unsigned int c_xmit_hdr_off; 152c0dd49bdSEiji Ota unsigned int c_xmit_data_off; 153c0dd49bdSEiji Ota unsigned int c_xmit_rdma_sent; 154c0dd49bdSEiji Ota 155c0dd49bdSEiji Ota kmutex_t c_lock; /* protect msg queues */ 156c0dd49bdSEiji Ota uint64_t c_next_tx_seq; 157c0dd49bdSEiji Ota struct list c_send_queue; 158c0dd49bdSEiji Ota struct list c_retrans; 159c0dd49bdSEiji Ota 160c0dd49bdSEiji Ota uint64_t c_next_rx_seq; 161c0dd49bdSEiji Ota 162c0dd49bdSEiji Ota struct rdsv3_transport *c_trans; 163c0dd49bdSEiji Ota void *c_transport_data; 164c0dd49bdSEiji Ota 165c0dd49bdSEiji Ota atomic_t c_state; 166c0dd49bdSEiji Ota unsigned long c_flags; 167c0dd49bdSEiji Ota unsigned long c_reconnect_jiffies; 168*5d5562f5SEiji Ota clock_t c_last_connect_jiffies; 169*5d5562f5SEiji Ota 170c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_send_w; 171c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_recv_w; 172c0dd49bdSEiji Ota struct rdsv3_delayed_work_s c_conn_w; 173*5d5562f5SEiji Ota struct rdsv3_delayed_work_s c_reap_w; 174c0dd49bdSEiji Ota struct rdsv3_work_s c_down_w; 175c0dd49bdSEiji Ota struct mutex c_cm_lock; /* protect conn state & cm */ 176c0dd49bdSEiji Ota 177c0dd49bdSEiji Ota struct list_node c_map_item; 178c0dd49bdSEiji Ota unsigned long c_map_queued; 179c0dd49bdSEiji Ota unsigned long c_map_offset; 180c0dd49bdSEiji Ota unsigned long c_map_bytes; 181c0dd49bdSEiji Ota 182c0dd49bdSEiji Ota unsigned int c_unacked_packets; 183c0dd49bdSEiji Ota unsigned int c_unacked_bytes; 184c0dd49bdSEiji Ota 185c0dd49bdSEiji Ota /* Protocol version */ 186c0dd49bdSEiji Ota unsigned int c_version; 187c0dd49bdSEiji Ota }; 188c0dd49bdSEiji Ota 189c0dd49bdSEiji Ota #define RDSV3_FLAG_CONG_BITMAP 0x01 190c0dd49bdSEiji Ota #define RDSV3_FLAG_ACK_REQUIRED 0x02 191c0dd49bdSEiji Ota #define RDSV3_FLAG_RETRANSMITTED 0x04 192cadbfdc3SEiji Ota #define RDSV3_MAX_ADV_CREDIT 127 193c0dd49bdSEiji Ota 194c0dd49bdSEiji Ota /* 195c0dd49bdSEiji Ota * Maximum space available for extension headers. 196c0dd49bdSEiji Ota */ 197c0dd49bdSEiji Ota #define RDSV3_HEADER_EXT_SPACE 16 198c0dd49bdSEiji Ota 199c0dd49bdSEiji Ota struct rdsv3_header { 200c0dd49bdSEiji Ota uint64_be_t h_sequence; 201c0dd49bdSEiji Ota uint64_be_t h_ack; 202c0dd49bdSEiji Ota uint32_be_t h_len; 203c0dd49bdSEiji Ota uint16_be_t h_sport; 204c0dd49bdSEiji Ota uint16_be_t h_dport; 205c0dd49bdSEiji Ota uint8_t h_flags; 206c0dd49bdSEiji Ota uint8_t h_credit; 207c0dd49bdSEiji Ota uint8_t h_padding[4]; 208c0dd49bdSEiji Ota uint16_be_t h_csum; 209c0dd49bdSEiji Ota 210c0dd49bdSEiji Ota uint8_t h_exthdr[RDSV3_HEADER_EXT_SPACE]; 211c0dd49bdSEiji Ota }; 212c0dd49bdSEiji Ota 213c0dd49bdSEiji Ota /* Reserved - indicates end of extensions */ 214c0dd49bdSEiji Ota #define RDSV3_EXTHDR_NONE 0 215c0dd49bdSEiji Ota 216c0dd49bdSEiji Ota /* 217c0dd49bdSEiji Ota * This extension header is included in the very 218c0dd49bdSEiji Ota * first message that is sent on a new connection, 219c0dd49bdSEiji Ota * and identifies the protocol level. This will help 220c0dd49bdSEiji Ota * rolling updates if a future change requires breaking 221c0dd49bdSEiji Ota * the protocol. 222c0dd49bdSEiji Ota */ 223c0dd49bdSEiji Ota #define RDSV3_EXTHDR_VERSION 1 224c0dd49bdSEiji Ota struct rdsv3_ext_header_version { 225c0dd49bdSEiji Ota uint32_be_t h_version; 226c0dd49bdSEiji Ota }; 227c0dd49bdSEiji Ota 228c0dd49bdSEiji Ota /* 229c0dd49bdSEiji Ota * This extension header is included in the RDS message 230c0dd49bdSEiji Ota * chasing an RDMA operation. 231c0dd49bdSEiji Ota */ 232c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA 2 233c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma { 234c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 235c0dd49bdSEiji Ota }; 236c0dd49bdSEiji Ota 237c0dd49bdSEiji Ota /* 238c0dd49bdSEiji Ota * This extension header tells the peer about the 239c0dd49bdSEiji Ota * destination <R_Key,offset> of the requested RDMA 240c0dd49bdSEiji Ota * operation. 241c0dd49bdSEiji Ota */ 242c0dd49bdSEiji Ota #define RDSV3_EXTHDR_RDMA_DEST 3 243c0dd49bdSEiji Ota struct rdsv3_ext_header_rdma_dest { 244c0dd49bdSEiji Ota uint32_be_t h_rdma_rkey; 245c0dd49bdSEiji Ota uint32_be_t h_rdma_offset; 246c0dd49bdSEiji Ota }; 247c0dd49bdSEiji Ota 248c0dd49bdSEiji Ota #define __RDSV3_EXTHDR_MAX 16 /* for now */ 249c0dd49bdSEiji Ota 250c0dd49bdSEiji Ota struct rdsv3_incoming { 251c0dd49bdSEiji Ota atomic_t i_refcount; 252c0dd49bdSEiji Ota struct list_node i_item; 253c0dd49bdSEiji Ota struct rdsv3_connection *i_conn; 254c0dd49bdSEiji Ota struct rdsv3_header i_hdr; 255c0dd49bdSEiji Ota unsigned long i_rx_jiffies; 256c0dd49bdSEiji Ota uint32_be_t i_saddr; 257c0dd49bdSEiji Ota 258c0dd49bdSEiji Ota rdsv3_rdma_cookie_t i_rdma_cookie; 259c0dd49bdSEiji Ota }; 260c0dd49bdSEiji Ota 261c0dd49bdSEiji Ota /* 262c0dd49bdSEiji Ota * m_sock_item and m_conn_item are on lists that are serialized under 263c0dd49bdSEiji Ota * conn->c_lock. m_sock_item has additional meaning in that once it is empty 264c0dd49bdSEiji Ota * the message will not be put back on the retransmit list after being sent. 265c0dd49bdSEiji Ota * messages that are canceled while being sent rely on this. 266c0dd49bdSEiji Ota * 267c0dd49bdSEiji Ota * m_inc is used by loopback so that it can pass an incoming message straight 268c0dd49bdSEiji Ota * back up into the rx path. It embeds a wire header which is also used by 269c0dd49bdSEiji Ota * the send path, which is kind of awkward. 270c0dd49bdSEiji Ota * 271c0dd49bdSEiji Ota * m_sock_item indicates the message's presence on a socket's send or receive 272c0dd49bdSEiji Ota * queue. m_rs will point to that socket. 273c0dd49bdSEiji Ota * 274c0dd49bdSEiji Ota * m_daddr is used by cancellation to prune messages to a given destination. 275c0dd49bdSEiji Ota * 276c0dd49bdSEiji Ota * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock 277c0dd49bdSEiji Ota * nesting. As paths iterate over messages on a sock, or conn, they must 278c0dd49bdSEiji Ota * also lock the conn, or sock, to remove the message from those lists too. 279c0dd49bdSEiji Ota * Testing the flag to determine if the message is still on the lists lets 280c0dd49bdSEiji Ota * us avoid testing the list_head directly. That means each path can use 281c0dd49bdSEiji Ota * the message's list_head to keep it on a local list while juggling locks 282c0dd49bdSEiji Ota * without confusing the other path. 283c0dd49bdSEiji Ota * 284c0dd49bdSEiji Ota * m_ack_seq is an optional field set by transports who need a different 285c0dd49bdSEiji Ota * sequence number range to invalidate. They can use this in a callback 286c0dd49bdSEiji Ota * that they pass to rdsv3_send_drop_acked() to see if each message has been 287c0dd49bdSEiji Ota * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't 288c0dd49bdSEiji Ota * had ack_seq set yet. 289c0dd49bdSEiji Ota */ 290c0dd49bdSEiji Ota #define RDSV3_MSG_ON_SOCK 1 291c0dd49bdSEiji Ota #define RDSV3_MSG_ON_CONN 2 292c0dd49bdSEiji Ota #define RDSV3_MSG_HAS_ACK_SEQ 3 293c0dd49bdSEiji Ota #define RDSV3_MSG_ACK_REQUIRED 4 294c0dd49bdSEiji Ota #define RDSV3_MSG_RETRANSMITTED 5 295c0dd49bdSEiji Ota #define RDSV3_MSG_MAPPED 6 296c0dd49bdSEiji Ota #define RDSV3_MSG_PAGEVEC 7 297c0dd49bdSEiji Ota 298c0dd49bdSEiji Ota struct rdsv3_message { 299c0dd49bdSEiji Ota atomic_t m_refcount; 300c0dd49bdSEiji Ota struct list_node m_sock_item; 301c0dd49bdSEiji Ota struct list_node m_conn_item; 302c0dd49bdSEiji Ota struct rdsv3_incoming m_inc; 303c0dd49bdSEiji Ota uint64_t m_ack_seq; 304c0dd49bdSEiji Ota uint32_be_t m_daddr; 305c0dd49bdSEiji Ota unsigned long m_flags; 306c0dd49bdSEiji Ota 307c0dd49bdSEiji Ota /* 308c0dd49bdSEiji Ota * Never access m_rs without holding m_rs_lock. 309c0dd49bdSEiji Ota * Lock nesting is 310c0dd49bdSEiji Ota * rm->m_rs_lock 311c0dd49bdSEiji Ota * -> rs->rs_lock 312c0dd49bdSEiji Ota */ 313c0dd49bdSEiji Ota kmutex_t m_rs_lock; 314*5d5562f5SEiji Ota rdsv3_wait_queue_t m_flush_wait; 315*5d5562f5SEiji Ota 316c0dd49bdSEiji Ota struct rdsv3_sock *m_rs; 317c0dd49bdSEiji Ota struct rdsv3_rdma_op *m_rdma_op; 318c0dd49bdSEiji Ota rdsv3_rdma_cookie_t m_rdma_cookie; 319c0dd49bdSEiji Ota struct rdsv3_mr *m_rdma_mr; 320c0dd49bdSEiji Ota unsigned int m_nents; 321c0dd49bdSEiji Ota unsigned int m_count; 322c0dd49bdSEiji Ota struct rdsv3_scatterlist m_sg[1]; 323c0dd49bdSEiji Ota }; 324c0dd49bdSEiji Ota 325c0dd49bdSEiji Ota /* 326c0dd49bdSEiji Ota * The RDS notifier is used (optionally) to tell the application about 327c0dd49bdSEiji Ota * completed RDMA operations. Rather than keeping the whole rds message 328c0dd49bdSEiji Ota * around on the queue, we allocate a small notifier that is put on the 329c0dd49bdSEiji Ota * socket's notifier_list. Notifications are delivered to the application 330c0dd49bdSEiji Ota * through control messages. 331c0dd49bdSEiji Ota */ 332c0dd49bdSEiji Ota struct rdsv3_notifier { 333c0dd49bdSEiji Ota list_node_t n_list; 334c0dd49bdSEiji Ota uint64_t n_user_token; 335c0dd49bdSEiji Ota int n_status; 336c0dd49bdSEiji Ota }; 337c0dd49bdSEiji Ota 338c0dd49bdSEiji Ota /* 339c0dd49bdSEiji Ota * struct rdsv3_transport - transport specific behavioural hooks 340c0dd49bdSEiji Ota * 341c0dd49bdSEiji Ota * @xmit: .xmit is called by rdsv3_send_xmit() to tell the transport to send 342c0dd49bdSEiji Ota * part of a message. The caller serializes on the send_sem so this 343c0dd49bdSEiji Ota * doesn't need to be reentrant for a given conn. The header must be 344c0dd49bdSEiji Ota * sent before the data payload. .xmit must be prepared to send a 345c0dd49bdSEiji Ota * message with no data payload. .xmit should return the number of 346c0dd49bdSEiji Ota * bytes that were sent down the connection, including header bytes. 347c0dd49bdSEiji Ota * Returning 0 tells the caller that it doesn't need to perform any 348c0dd49bdSEiji Ota * additional work now. This is usually the case when the transport has 349c0dd49bdSEiji Ota * filled the sending queue for its connection and will handle 350c0dd49bdSEiji Ota * triggering the rds thread to continue the send when space becomes 351c0dd49bdSEiji Ota * available. Returning -EAGAIN tells the caller to retry the send 352c0dd49bdSEiji Ota * immediately. Returning -ENOMEM tells the caller to retry the send at 353c0dd49bdSEiji Ota * some point in the future. 354c0dd49bdSEiji Ota * 355c0dd49bdSEiji Ota * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once 356c0dd49bdSEiji Ota * it returns the connection can not call rdsv3_recv_incoming(). 357c0dd49bdSEiji Ota * This will only be called once after conn_connect returns 358c0dd49bdSEiji Ota * non-zero success and will The caller serializes this with 359c0dd49bdSEiji Ota * the send and connecting paths (xmit_* and conn_*). The 360c0dd49bdSEiji Ota * transport is responsible for other serialization, including 361c0dd49bdSEiji Ota * rdsv3_recv_incoming(). This is called in process context but 362c0dd49bdSEiji Ota * should try hard not to block. 363c0dd49bdSEiji Ota * 364c0dd49bdSEiji Ota * @xmit_cong_map: This asks the transport to send the local bitmap down the 365c0dd49bdSEiji Ota * given connection. XXX get a better story about the bitmap 366c0dd49bdSEiji Ota * flag and header. 367c0dd49bdSEiji Ota */ 368c0dd49bdSEiji Ota 369cadbfdc3SEiji Ota #define RDS_TRANS_IB 0 370cadbfdc3SEiji Ota #define RDS_TRANS_IWARP 1 371cadbfdc3SEiji Ota #define RDS_TRANS_TCP 2 372cadbfdc3SEiji Ota #define RDS_TRANS_COUNT 3 373cadbfdc3SEiji Ota 374c0dd49bdSEiji Ota struct rdsv3_transport { 375cadbfdc3SEiji Ota char t_name[TRANSNAMSIZ]; 376c0dd49bdSEiji Ota struct list_node t_item; 377cadbfdc3SEiji Ota unsigned int t_type; 378c0dd49bdSEiji Ota unsigned int t_prefer_loopback:1; 379c0dd49bdSEiji Ota 380c0dd49bdSEiji Ota int (*laddr_check)(uint32_be_t addr); 381c0dd49bdSEiji Ota int (*conn_alloc)(struct rdsv3_connection *conn, int gfp); 382c0dd49bdSEiji Ota void (*conn_free)(void *data); 383c0dd49bdSEiji Ota int (*conn_connect)(struct rdsv3_connection *conn); 384c0dd49bdSEiji Ota void (*conn_shutdown)(struct rdsv3_connection *conn); 385c0dd49bdSEiji Ota void (*xmit_prepare)(struct rdsv3_connection *conn); 386c0dd49bdSEiji Ota void (*xmit_complete)(struct rdsv3_connection *conn); 387c0dd49bdSEiji Ota int (*xmit)(struct rdsv3_connection *conn, struct rdsv3_message *rm, 388c0dd49bdSEiji Ota unsigned int hdr_off, unsigned int sg, unsigned int off); 389c0dd49bdSEiji Ota int (*xmit_cong_map)(struct rdsv3_connection *conn, 390c0dd49bdSEiji Ota struct rdsv3_cong_map *map, unsigned long offset); 391c0dd49bdSEiji Ota int (*xmit_rdma)(struct rdsv3_connection *conn, 392c0dd49bdSEiji Ota struct rdsv3_rdma_op *op); 393c0dd49bdSEiji Ota int (*recv)(struct rdsv3_connection *conn); 394c0dd49bdSEiji Ota int (*inc_copy_to_user)(struct rdsv3_incoming *inc, uio_t *uio, 395c0dd49bdSEiji Ota size_t size); 396c0dd49bdSEiji Ota void (*inc_free)(struct rdsv3_incoming *inc); 397c0dd49bdSEiji Ota 398c0dd49bdSEiji Ota int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 399c0dd49bdSEiji Ota struct rdma_cm_event *event); 400c0dd49bdSEiji Ota int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); 401c0dd49bdSEiji Ota void (*cm_connect_complete)(struct rdsv3_connection *conn, 402c0dd49bdSEiji Ota struct rdma_cm_event *event); 403c0dd49bdSEiji Ota 404c0dd49bdSEiji Ota unsigned int (*stats_info_copy)(struct rdsv3_info_iterator *iter, 405c0dd49bdSEiji Ota unsigned int avail); 406c0dd49bdSEiji Ota void (*exit)(void); 407c0dd49bdSEiji Ota void *(*get_mr)(struct rdsv3_iovec *sg, unsigned long nr_sg, 408c0dd49bdSEiji Ota struct rdsv3_sock *rs, uint32_t *key_ret); 409c0dd49bdSEiji Ota void (*sync_mr)(void *trans_private, int direction); 410c0dd49bdSEiji Ota void (*free_mr)(void *trans_private, int invalidate); 411c0dd49bdSEiji Ota void (*flush_mrs)(void); 412c0dd49bdSEiji Ota }; 413c0dd49bdSEiji Ota 414c0dd49bdSEiji Ota struct rdsv3_sock { 415c0dd49bdSEiji Ota struct rsock *rs_sk; 416c0dd49bdSEiji Ota uint64_t rs_user_addr; 417c0dd49bdSEiji Ota uint64_t rs_user_bytes; 418c0dd49bdSEiji Ota 419c0dd49bdSEiji Ota /* 420c0dd49bdSEiji Ota * bound_addr used for both incoming and outgoing, no INADDR_ANY 421c0dd49bdSEiji Ota * support. 422c0dd49bdSEiji Ota */ 423c0dd49bdSEiji Ota struct avl_node rs_bound_node; 424c0dd49bdSEiji Ota uint32_be_t rs_bound_addr; 425c0dd49bdSEiji Ota uint32_be_t rs_conn_addr; 426c0dd49bdSEiji Ota uint16_be_t rs_bound_port; 427c0dd49bdSEiji Ota uint16_be_t rs_conn_port; 428c0dd49bdSEiji Ota 429c0dd49bdSEiji Ota /* 430c0dd49bdSEiji Ota * This is only used to communicate the transport between bind and 431c0dd49bdSEiji Ota * initiating connections. All other trans use is referenced through 432c0dd49bdSEiji Ota * the connection. 433c0dd49bdSEiji Ota */ 434c0dd49bdSEiji Ota struct rdsv3_transport *rs_transport; 435c0dd49bdSEiji Ota 436c0dd49bdSEiji Ota /* 437c0dd49bdSEiji Ota * rdsv3_sendmsg caches the conn it used the last time around. 438c0dd49bdSEiji Ota * This helps avoid costly lookups. 439c0dd49bdSEiji Ota */ 440c0dd49bdSEiji Ota struct rdsv3_connection *rs_conn; 441c0dd49bdSEiji Ota kmutex_t rs_conn_lock; 442c0dd49bdSEiji Ota 443c0dd49bdSEiji Ota /* flag indicating we were congested or not */ 444c0dd49bdSEiji Ota int rs_congested; 445cadbfdc3SEiji Ota /* seen congestion (ENOBUFS) when sending? */ 446cadbfdc3SEiji Ota int rs_seen_congestion; 447*5d5562f5SEiji Ota kmutex_t rs_congested_lock; 448*5d5562f5SEiji Ota kcondvar_t rs_congested_cv; 449c0dd49bdSEiji Ota 450c0dd49bdSEiji Ota /* rs_lock protects all these adjacent members before the newline */ 451c0dd49bdSEiji Ota kmutex_t rs_lock; 452c0dd49bdSEiji Ota struct list rs_send_queue; 453c0dd49bdSEiji Ota uint32_t rs_snd_bytes; 454c0dd49bdSEiji Ota int rs_rcv_bytes; 455c0dd49bdSEiji Ota /* currently used for failed RDMAs */ 456c0dd49bdSEiji Ota struct list rs_notify_queue; 457c0dd49bdSEiji Ota 458c0dd49bdSEiji Ota /* 459c0dd49bdSEiji Ota * Congestion wake_up. If rs_cong_monitor is set, we use cong_mask 460c0dd49bdSEiji Ota * to decide whether the application should be woken up. 461c0dd49bdSEiji Ota * If not set, we use rs_cong_track to find out whether a cong map 462c0dd49bdSEiji Ota * update arrived. 463c0dd49bdSEiji Ota */ 464c0dd49bdSEiji Ota uint64_t rs_cong_mask; 465c0dd49bdSEiji Ota uint64_t rs_cong_notify; 466c0dd49bdSEiji Ota struct list_node rs_cong_list; 467c0dd49bdSEiji Ota unsigned long rs_cong_track; 468c0dd49bdSEiji Ota 469c0dd49bdSEiji Ota /* 470c0dd49bdSEiji Ota * rs_recv_lock protects the receive queue, and is 471c0dd49bdSEiji Ota * used to serialize with rdsv3_release. 472c0dd49bdSEiji Ota */ 473c0dd49bdSEiji Ota krwlock_t rs_recv_lock; 474c0dd49bdSEiji Ota struct list rs_recv_queue; 475c0dd49bdSEiji Ota 476c0dd49bdSEiji Ota /* just for stats reporting */ 477c0dd49bdSEiji Ota struct list_node rs_item; 478c0dd49bdSEiji Ota 479c0dd49bdSEiji Ota /* these have their own lock */ 480c0dd49bdSEiji Ota kmutex_t rs_rdma_lock; 481c0dd49bdSEiji Ota struct avl_tree rs_rdma_keys; 482c0dd49bdSEiji Ota 483c0dd49bdSEiji Ota /* Socket options - in case there will be more */ 484c0dd49bdSEiji Ota unsigned char rs_recverr, 485c0dd49bdSEiji Ota rs_cong_monitor; 486c0dd49bdSEiji Ota 487c0dd49bdSEiji Ota cred_t *rs_cred; 488c0dd49bdSEiji Ota zoneid_t rs_zoneid; 489c0dd49bdSEiji Ota }; 490c0dd49bdSEiji Ota 4911a561c76SEiji Ota static inline struct rdsv3_sock * 492c0dd49bdSEiji Ota rdsv3_sk_to_rs(const struct rsock *sk) 493c0dd49bdSEiji Ota { 494c0dd49bdSEiji Ota return ((struct rdsv3_sock *)sk->sk_protinfo); 495c0dd49bdSEiji Ota } 496c0dd49bdSEiji Ota 4971a561c76SEiji Ota static inline struct rsock * 498c0dd49bdSEiji Ota rdsv3_rs_to_sk(const struct rdsv3_sock *rs) 499c0dd49bdSEiji Ota { 500c0dd49bdSEiji Ota return ((struct rsock *)rs->rs_sk); 501c0dd49bdSEiji Ota } 502c0dd49bdSEiji Ota 503c0dd49bdSEiji Ota /* 504c0dd49bdSEiji Ota * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value 505c0dd49bdSEiji Ota * to account for overhead. We don't account for overhead, we just apply 506c0dd49bdSEiji Ota * the number of payload bytes to the specified value. 507c0dd49bdSEiji Ota */ 5081a561c76SEiji Ota static inline int 509c0dd49bdSEiji Ota rdsv3_sk_sndbuf(struct rdsv3_sock *rs) 510c0dd49bdSEiji Ota { 511c0dd49bdSEiji Ota /* XXX */ 512c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_sndbuf); 513c0dd49bdSEiji Ota } 514c0dd49bdSEiji Ota 5151a561c76SEiji Ota static inline int 516c0dd49bdSEiji Ota rdsv3_sk_rcvbuf(struct rdsv3_sock *rs) 517c0dd49bdSEiji Ota { 518c0dd49bdSEiji Ota /* XXX */ 519c0dd49bdSEiji Ota return (rdsv3_rs_to_sk(rs)->sk_rcvbuf); 520c0dd49bdSEiji Ota } 521c0dd49bdSEiji Ota 522c0dd49bdSEiji Ota struct rdsv3_statistics { 523c0dd49bdSEiji Ota uint64_t s_conn_reset; 524c0dd49bdSEiji Ota uint64_t s_recv_drop_bad_checksum; 525c0dd49bdSEiji Ota uint64_t s_recv_drop_old_seq; 526c0dd49bdSEiji Ota uint64_t s_recv_drop_no_sock; 527c0dd49bdSEiji Ota uint64_t s_recv_drop_dead_sock; 528c0dd49bdSEiji Ota uint64_t s_recv_deliver_raced; 529c0dd49bdSEiji Ota uint64_t s_recv_delivered; 530c0dd49bdSEiji Ota uint64_t s_recv_queued; 531c0dd49bdSEiji Ota uint64_t s_recv_immediate_retry; 532c0dd49bdSEiji Ota uint64_t s_recv_delayed_retry; 533c0dd49bdSEiji Ota uint64_t s_recv_ack_required; 534c0dd49bdSEiji Ota uint64_t s_recv_rdma_bytes; 535c0dd49bdSEiji Ota uint64_t s_recv_ping; 536c0dd49bdSEiji Ota uint64_t s_send_queue_empty; 537c0dd49bdSEiji Ota uint64_t s_send_queue_full; 538c0dd49bdSEiji Ota uint64_t s_send_sem_contention; 539c0dd49bdSEiji Ota uint64_t s_send_sem_queue_raced; 540c0dd49bdSEiji Ota uint64_t s_send_immediate_retry; 541c0dd49bdSEiji Ota uint64_t s_send_delayed_retry; 542c0dd49bdSEiji Ota uint64_t s_send_drop_acked; 543c0dd49bdSEiji Ota uint64_t s_send_ack_required; 544c0dd49bdSEiji Ota uint64_t s_send_queued; 545c0dd49bdSEiji Ota uint64_t s_send_rdma; 546c0dd49bdSEiji Ota uint64_t s_send_rdma_bytes; 547c0dd49bdSEiji Ota uint64_t s_send_pong; 548c0dd49bdSEiji Ota uint64_t s_page_remainder_hit; 549c0dd49bdSEiji Ota uint64_t s_page_remainder_miss; 550c0dd49bdSEiji Ota uint64_t s_copy_to_user; 551c0dd49bdSEiji Ota uint64_t s_copy_from_user; 552c0dd49bdSEiji Ota uint64_t s_cong_update_queued; 553c0dd49bdSEiji Ota uint64_t s_cong_update_received; 554c0dd49bdSEiji Ota uint64_t s_cong_send_error; 555c0dd49bdSEiji Ota uint64_t s_cong_send_blocked; 556c0dd49bdSEiji Ota }; 557c0dd49bdSEiji Ota 558c0dd49bdSEiji Ota /* af_rds.c */ 559c0dd49bdSEiji Ota void rdsv3_sock_addref(struct rdsv3_sock *rs); 560c0dd49bdSEiji Ota void rdsv3_sock_put(struct rdsv3_sock *rs); 561c0dd49bdSEiji Ota void rdsv3_wake_sk_sleep(struct rdsv3_sock *rs); 562c0dd49bdSEiji Ota void __rdsv3_wake_sk_sleep(struct rsock *sk); 563c0dd49bdSEiji Ota 564c0dd49bdSEiji Ota /* bind.c */ 565c0dd49bdSEiji Ota int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 566c0dd49bdSEiji Ota socklen_t len, cred_t *cr); 567c0dd49bdSEiji Ota void rdsv3_remove_bound(struct rdsv3_sock *rs); 568c0dd49bdSEiji Ota struct rdsv3_sock *rdsv3_find_bound(uint32_be_t addr, uint16_be_t port); 569c0dd49bdSEiji Ota 570c0dd49bdSEiji Ota /* conn.c */ 571c0dd49bdSEiji Ota int rdsv3_conn_init(void); 572c0dd49bdSEiji Ota void rdsv3_conn_exit(void); 573c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr, 574c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 575c0dd49bdSEiji Ota struct rdsv3_connection *rdsv3_conn_create_outgoing(uint32_be_t laddr, 576c0dd49bdSEiji Ota uint32_be_t faddr, 577c0dd49bdSEiji Ota struct rdsv3_transport *trans, int gfp); 578*5d5562f5SEiji Ota void rdsv3_conn_shutdown(struct rdsv3_connection *conn); 579c0dd49bdSEiji Ota void rdsv3_conn_destroy(struct rdsv3_connection *conn); 580c0dd49bdSEiji Ota void rdsv3_conn_reset(struct rdsv3_connection *conn); 581c0dd49bdSEiji Ota void rdsv3_conn_drop(struct rdsv3_connection *conn); 582c0dd49bdSEiji Ota void rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len, 583c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 584c0dd49bdSEiji Ota struct rdsv3_info_lengths *lens, 585c0dd49bdSEiji Ota int (*visitor)(struct rdsv3_connection *, void *), 586c0dd49bdSEiji Ota size_t item_len); 587c0dd49bdSEiji Ota 588c0dd49bdSEiji Ota static inline int 589c0dd49bdSEiji Ota rdsv3_conn_transition(struct rdsv3_connection *conn, int old, int new) 590c0dd49bdSEiji Ota { 591c0dd49bdSEiji Ota return (atomic_cmpxchg(&conn->c_state, old, new) == old); 592c0dd49bdSEiji Ota } 593c0dd49bdSEiji Ota 5941a561c76SEiji Ota static inline int 595c0dd49bdSEiji Ota rdsv3_conn_state(struct rdsv3_connection *conn) 596c0dd49bdSEiji Ota { 597c0dd49bdSEiji Ota return (atomic_get(&conn->c_state)); 598c0dd49bdSEiji Ota } 599c0dd49bdSEiji Ota 6001a561c76SEiji Ota static inline int 601c0dd49bdSEiji Ota rdsv3_conn_up(struct rdsv3_connection *conn) 602c0dd49bdSEiji Ota { 603c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_UP); 604c0dd49bdSEiji Ota } 605c0dd49bdSEiji Ota 6061a561c76SEiji Ota static inline int 607c0dd49bdSEiji Ota rdsv3_conn_connecting(struct rdsv3_connection *conn) 608c0dd49bdSEiji Ota { 609c0dd49bdSEiji Ota return (atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING); 610c0dd49bdSEiji Ota } 611c0dd49bdSEiji Ota 612c0dd49bdSEiji Ota /* recv.c */ 613c0dd49bdSEiji Ota void rdsv3_inc_init(struct rdsv3_incoming *inc, struct rdsv3_connection *conn, 614c0dd49bdSEiji Ota uint32_be_t saddr); 615c0dd49bdSEiji Ota void rdsv3_inc_addref(struct rdsv3_incoming *inc); 616c0dd49bdSEiji Ota void rdsv3_inc_put(struct rdsv3_incoming *inc); 617c0dd49bdSEiji Ota void rdsv3_recv_incoming(struct rdsv3_connection *conn, uint32_be_t saddr, 618c0dd49bdSEiji Ota uint32_be_t daddr, 619c0dd49bdSEiji Ota struct rdsv3_incoming *inc, int gfp); 620c0dd49bdSEiji Ota int rdsv3_recvmsg(struct rdsv3_sock *rs, uio_t *uio, 621c0dd49bdSEiji Ota struct msghdr *msg, size_t size, int msg_flags); 622c0dd49bdSEiji Ota void rdsv3_clear_recv_queue(struct rdsv3_sock *rs); 623c0dd49bdSEiji Ota int rdsv3_notify_queue_get(struct rdsv3_sock *rs, struct msghdr *msg); 624c0dd49bdSEiji Ota void rdsv3_inc_info_copy(struct rdsv3_incoming *inc, 625c0dd49bdSEiji Ota struct rdsv3_info_iterator *iter, 626c0dd49bdSEiji Ota uint32_be_t saddr, uint32_be_t daddr, int flip); 627c0dd49bdSEiji Ota 628c0dd49bdSEiji Ota /* page.c */ 629c0dd49bdSEiji Ota int rdsv3_page_remainder_alloc(struct rdsv3_scatterlist *scat, 630c0dd49bdSEiji Ota unsigned long bytes, int gfp); 631c0dd49bdSEiji Ota 632c0dd49bdSEiji Ota /* send.c */ 633c0dd49bdSEiji Ota int rdsv3_sendmsg(struct rdsv3_sock *rs, uio_t *uio, struct nmsghdr *msg, 634c0dd49bdSEiji Ota size_t payload_len); 635c0dd49bdSEiji Ota void rdsv3_send_reset(struct rdsv3_connection *conn); 636c0dd49bdSEiji Ota int rdsv3_send_xmit(struct rdsv3_connection *conn); 637c0dd49bdSEiji Ota struct sockaddr_in; 638c0dd49bdSEiji Ota void rdsv3_send_drop_to(struct rdsv3_sock *rs, struct sockaddr_in *dest); 639c0dd49bdSEiji Ota typedef int (*is_acked_func)(struct rdsv3_message *rm, uint64_t ack); 640c0dd49bdSEiji Ota void rdsv3_send_drop_acked(struct rdsv3_connection *conn, uint64_t ack, 641c0dd49bdSEiji Ota is_acked_func is_acked); 642c0dd49bdSEiji Ota int rdsv3_send_acked_before(struct rdsv3_connection *conn, uint64_t seq); 643c0dd49bdSEiji Ota void rdsv3_send_remove_from_sock(struct list *messages, int status); 644c0dd49bdSEiji Ota int rdsv3_send_pong(struct rdsv3_connection *conn, uint16_be_t dport); 645c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_send_get_message(struct rdsv3_connection *, 646c0dd49bdSEiji Ota struct rdsv3_rdma_op *); 647c0dd49bdSEiji Ota 648c0dd49bdSEiji Ota /* rdma.c */ 649c0dd49bdSEiji Ota void rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force); 650c0dd49bdSEiji Ota 651c0dd49bdSEiji Ota /* cong.c */ 652c0dd49bdSEiji Ota void rdsv3_cong_init(void); 653c0dd49bdSEiji Ota int rdsv3_cong_get_maps(struct rdsv3_connection *conn); 654c0dd49bdSEiji Ota void rdsv3_cong_add_conn(struct rdsv3_connection *conn); 655c0dd49bdSEiji Ota void rdsv3_cong_remove_conn(struct rdsv3_connection *conn); 656c0dd49bdSEiji Ota void rdsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port); 657c0dd49bdSEiji Ota void rdsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port); 658c0dd49bdSEiji Ota int rdsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock, 659c0dd49bdSEiji Ota struct rdsv3_sock *rs); 660c0dd49bdSEiji Ota void rdsv3_cong_queue_updates(struct rdsv3_cong_map *map); 661c0dd49bdSEiji Ota void rdsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t); 662c0dd49bdSEiji Ota int rdsv3_cong_updated_since(unsigned long *recent); 663c0dd49bdSEiji Ota void rdsv3_cong_add_socket(struct rdsv3_sock *); 664c0dd49bdSEiji Ota void rdsv3_cong_remove_socket(struct rdsv3_sock *); 665c0dd49bdSEiji Ota void rdsv3_cong_exit(void); 666c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_cong_update_alloc(struct rdsv3_connection *conn); 667c0dd49bdSEiji Ota 668c0dd49bdSEiji Ota /* stats.c */ 669c0dd49bdSEiji Ota RDSV3_DECLARE_PER_CPU(struct rdsv3_statistics, rdsv3_stats); 670c0dd49bdSEiji Ota #define rdsv3_stats_inc_which(which, member) do { \ 671c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member++; \ 672c0dd49bdSEiji Ota put_cpu(); \ 673c0dd49bdSEiji Ota } while (0) 674c0dd49bdSEiji Ota #define rdsv3_stats_inc(member) rdsv3_stats_inc_which(rdsv3_stats, member) 675c0dd49bdSEiji Ota #define rdsv3_stats_add_which(which, member, count) do { \ 676c0dd49bdSEiji Ota rdsv3_per_cpu(which, get_cpu()).member += count; \ 677c0dd49bdSEiji Ota put_cpu(); \ 678c0dd49bdSEiji Ota } while (0) 679c0dd49bdSEiji Ota #define rdsv3_stats_add(member, count) \ 680c0dd49bdSEiji Ota rdsv3_stats_add_which(rdsv3_stats, member, count) 681c0dd49bdSEiji Ota int rdsv3_stats_init(void); 682c0dd49bdSEiji Ota void rdsv3_stats_exit(void); 683c0dd49bdSEiji Ota void rdsv3_stats_info_copy(struct rdsv3_info_iterator *iter, 684c0dd49bdSEiji Ota uint64_t *values, char **names, size_t nr); 685c0dd49bdSEiji Ota 686c0dd49bdSEiji Ota 687c0dd49bdSEiji Ota /* sysctl.c */ 688c0dd49bdSEiji Ota int rdsv3_sysctl_init(void); 689c0dd49bdSEiji Ota void rdsv3_sysctl_exit(void); 690c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_min; 691c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_default; 692c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_sndbuf_max; 693c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_min_jiffies; 694c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_reconnect_max_jiffies; 695c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_packets; 696c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_max_unacked_bytes; 697c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_ping_enable; 698c0dd49bdSEiji Ota extern unsigned long rdsv3_sysctl_trace_flags; 699c0dd49bdSEiji Ota extern unsigned int rdsv3_sysctl_trace_level; 700c0dd49bdSEiji Ota 701c0dd49bdSEiji Ota /* threads.c */ 702c0dd49bdSEiji Ota int rdsv3_threads_init(); 703c0dd49bdSEiji Ota void rdsv3_threads_exit(void); 704c0dd49bdSEiji Ota extern struct rdsv3_workqueue_struct_s *rdsv3_wq; 705*5d5562f5SEiji Ota void rdsv3_queue_reconnect(struct rdsv3_connection *conn); 706c0dd49bdSEiji Ota void rdsv3_connect_worker(struct rdsv3_work_s *); 707c0dd49bdSEiji Ota void rdsv3_shutdown_worker(struct rdsv3_work_s *); 708c0dd49bdSEiji Ota void rdsv3_send_worker(struct rdsv3_work_s *); 709c0dd49bdSEiji Ota void rdsv3_recv_worker(struct rdsv3_work_s *); 710*5d5562f5SEiji Ota void rdsv3_reaper_worker(struct rdsv3_work_s *); 711c0dd49bdSEiji Ota void rdsv3_connect_complete(struct rdsv3_connection *conn); 712c0dd49bdSEiji Ota 713c0dd49bdSEiji Ota /* transport.c */ 714c0dd49bdSEiji Ota int rdsv3_trans_register(struct rdsv3_transport *trans); 715c0dd49bdSEiji Ota void rdsv3_trans_unregister(struct rdsv3_transport *trans); 716c0dd49bdSEiji Ota struct rdsv3_transport *rdsv3_trans_get_preferred(uint32_be_t addr); 717c0dd49bdSEiji Ota unsigned int rdsv3_trans_stats_info_copy(struct rdsv3_info_iterator *iter, 718c0dd49bdSEiji Ota unsigned int avail); 719c0dd49bdSEiji Ota void rdsv3_trans_exit(void); 720c0dd49bdSEiji Ota 721c0dd49bdSEiji Ota /* message.c */ 722c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_alloc(unsigned int nents, int gfp); 723c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_copy_from_user(struct uio *uiop, 724c0dd49bdSEiji Ota size_t total_len); 725c0dd49bdSEiji Ota struct rdsv3_message *rdsv3_message_map_pages(unsigned long *page_addrs, 726c0dd49bdSEiji Ota unsigned int total_len); 727c0dd49bdSEiji Ota void rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 728c0dd49bdSEiji Ota uint16_be_t dport, uint64_t seq); 729c0dd49bdSEiji Ota int rdsv3_message_add_extension(struct rdsv3_header *hdr, 730c0dd49bdSEiji Ota unsigned int type, const void *data, unsigned int len); 731c0dd49bdSEiji Ota int rdsv3_message_next_extension(struct rdsv3_header *hdr, 732c0dd49bdSEiji Ota unsigned int *pos, void *buf, unsigned int *buflen); 733c0dd49bdSEiji Ota int rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 734c0dd49bdSEiji Ota unsigned int version); 735c0dd49bdSEiji Ota int rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 736c0dd49bdSEiji Ota unsigned int *version); 737c0dd49bdSEiji Ota int rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, 738c0dd49bdSEiji Ota uint32_t r_key, uint32_t offset); 739c0dd49bdSEiji Ota int rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 740c0dd49bdSEiji Ota uio_t *uio, size_t size); 741c0dd49bdSEiji Ota void rdsv3_message_inc_free(struct rdsv3_incoming *inc); 742c0dd49bdSEiji Ota void rdsv3_message_addref(struct rdsv3_message *rm); 743c0dd49bdSEiji Ota void rdsv3_message_put(struct rdsv3_message *rm); 744c0dd49bdSEiji Ota void rdsv3_message_wait(struct rdsv3_message *rm); 745c0dd49bdSEiji Ota void rdsv3_message_unmapped(struct rdsv3_message *rm); 746c0dd49bdSEiji Ota 7471a561c76SEiji Ota static inline void 748c0dd49bdSEiji Ota rdsv3_message_make_checksum(struct rdsv3_header *hdr) 749c0dd49bdSEiji Ota { 750c0dd49bdSEiji Ota hdr->h_csum = 0; 751c0dd49bdSEiji Ota hdr->h_csum = 752c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2); 753c0dd49bdSEiji Ota } 754c0dd49bdSEiji Ota 7551a561c76SEiji Ota static inline int 756c0dd49bdSEiji Ota rdsv3_message_verify_checksum(const struct rdsv3_header *hdr) 757c0dd49bdSEiji Ota { 758c0dd49bdSEiji Ota return (!hdr->h_csum || 759c0dd49bdSEiji Ota rdsv3_ip_fast_csum((void *)hdr, sizeof (*hdr) >> 2) == 0); 760c0dd49bdSEiji Ota } 761c0dd49bdSEiji Ota 762c0dd49bdSEiji Ota /* rdsv3_sc.c */ 763c0dd49bdSEiji Ota extern boolean_t rdsv3_if_lookup_by_name(char *if_name); 764c0dd49bdSEiji Ota extern int rdsv3_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip); 765c0dd49bdSEiji Ota extern ipaddr_t rdsv3_scaddr_to_ibaddr(ipaddr_t addr); 766c0dd49bdSEiji Ota 767c0dd49bdSEiji Ota #ifdef __cplusplus 768c0dd49bdSEiji Ota } 769c0dd49bdSEiji Ota #endif 770c0dd49bdSEiji Ota 771c0dd49bdSEiji Ota #endif /* _RDSV3_RDSV3_H */ 772