1607ca46eSDavid Howells /* 2607ca46eSDavid Howells * Copyright (c) 2008 Oracle. All rights reserved. 3607ca46eSDavid Howells * 4607ca46eSDavid Howells * This software is available to you under a choice of one of two 5607ca46eSDavid Howells * licenses. You may choose to be licensed under the terms of the GNU 6607ca46eSDavid Howells * General Public License (GPL) Version 2, available from the file 7607ca46eSDavid Howells * COPYING in the main directory of this source tree, or the 8607ca46eSDavid Howells * OpenIB.org BSD license below: 9607ca46eSDavid Howells * 10607ca46eSDavid Howells * Redistribution and use in source and binary forms, with or 11607ca46eSDavid Howells * without modification, are permitted provided that the following 12607ca46eSDavid Howells * conditions are met: 13607ca46eSDavid Howells * 14607ca46eSDavid Howells * - Redistributions of source code must retain the above 15607ca46eSDavid Howells * copyright notice, this list of conditions and the following 16607ca46eSDavid Howells * disclaimer. 17607ca46eSDavid Howells * 18607ca46eSDavid Howells * - Redistributions in binary form must reproduce the above 19607ca46eSDavid Howells * copyright notice, this list of conditions and the following 20607ca46eSDavid Howells * disclaimer in the documentation and/or other materials 21607ca46eSDavid Howells * provided with the distribution. 22607ca46eSDavid Howells * 23607ca46eSDavid Howells * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24607ca46eSDavid Howells * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25607ca46eSDavid Howells * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26607ca46eSDavid Howells * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27607ca46eSDavid Howells * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28607ca46eSDavid Howells * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29607ca46eSDavid Howells * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30607ca46eSDavid Howells * SOFTWARE. 31607ca46eSDavid Howells * 32607ca46eSDavid Howells */ 33607ca46eSDavid Howells 34607ca46eSDavid Howells #ifndef _LINUX_RDS_H 35607ca46eSDavid Howells #define _LINUX_RDS_H 36607ca46eSDavid Howells 37607ca46eSDavid Howells #include <linux/types.h> 38607ca46eSDavid Howells 39607ca46eSDavid Howells #define RDS_IB_ABI_VERSION 0x301 40607ca46eSDavid Howells 41a28c257cSSowmini Varadhan #define SOL_RDS 276 42a28c257cSSowmini Varadhan 43607ca46eSDavid Howells /* 44607ca46eSDavid Howells * setsockopt/getsockopt for SOL_RDS 45607ca46eSDavid Howells */ 46607ca46eSDavid Howells #define RDS_CANCEL_SENT_TO 1 47607ca46eSDavid Howells #define RDS_GET_MR 2 48607ca46eSDavid Howells #define RDS_FREE_MR 3 49607ca46eSDavid Howells /* deprecated: RDS_BARRIER 4 */ 50607ca46eSDavid Howells #define RDS_RECVERR 5 51607ca46eSDavid Howells #define RDS_CONG_MONITOR 6 52607ca46eSDavid Howells #define RDS_GET_MR_FOR_DEST 7 53a28c257cSSowmini Varadhan #define SO_RDS_TRANSPORT 8 54a28c257cSSowmini Varadhan 55*3289025aSSantosh Shilimkar /* Socket option to tap receive path latency 56*3289025aSSantosh Shilimkar * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY 57*3289025aSSantosh Shilimkar * Format used struct rds_rx_trace_so 58*3289025aSSantosh Shilimkar */ 59*3289025aSSantosh Shilimkar #define SO_RDS_MSG_RXPATH_LATENCY 10 60*3289025aSSantosh Shilimkar 61*3289025aSSantosh Shilimkar 62a28c257cSSowmini Varadhan /* supported values for SO_RDS_TRANSPORT */ 63a28c257cSSowmini Varadhan #define RDS_TRANS_IB 0 64a28c257cSSowmini Varadhan #define RDS_TRANS_IWARP 1 65a28c257cSSowmini Varadhan #define RDS_TRANS_TCP 2 66a28c257cSSowmini Varadhan #define RDS_TRANS_COUNT 3 67a28c257cSSowmini Varadhan #define RDS_TRANS_NONE (~0) 68607ca46eSDavid Howells 69607ca46eSDavid Howells /* 70607ca46eSDavid Howells * Control message types for SOL_RDS. 71607ca46eSDavid Howells * 72607ca46eSDavid Howells * CMSG_RDMA_ARGS (sendmsg) 73607ca46eSDavid Howells * Request a RDMA transfer to/from the specified 74607ca46eSDavid Howells * memory ranges. 75607ca46eSDavid Howells * The cmsg_data is a struct rds_rdma_args. 76607ca46eSDavid Howells * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 77607ca46eSDavid Howells * Kernel informs application about intended 78607ca46eSDavid Howells * source/destination of a RDMA transfer 79607ca46eSDavid Howells * RDS_CMSG_RDMA_MAP (sendmsg) 80607ca46eSDavid Howells * Application asks kernel to map the given 81607ca46eSDavid Howells * memory range into a IB MR, and send the 82607ca46eSDavid Howells * R_Key along in an RDS extension header. 83607ca46eSDavid Howells * The cmsg_data is a struct rds_get_mr_args, 84607ca46eSDavid Howells * the same as for the GET_MR setsockopt. 85607ca46eSDavid Howells * RDS_CMSG_RDMA_STATUS (recvmsg) 86607ca46eSDavid Howells * Returns the status of a completed RDMA operation. 87*3289025aSSantosh Shilimkar * RDS_CMSG_RXPATH_LATENCY(recvmsg) 88*3289025aSSantosh Shilimkar * Returns rds message latencies in various stages of receive 89*3289025aSSantosh Shilimkar * path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY 90*3289025aSSantosh Shilimkar * socket option. Legitimate points are defined in 91*3289025aSSantosh Shilimkar * enum rds_message_rxpath_latency. More points can be added in 92*3289025aSSantosh Shilimkar * future. CSMG format is struct rds_cmsg_rx_trace. 93607ca46eSDavid Howells */ 94607ca46eSDavid Howells #define RDS_CMSG_RDMA_ARGS 1 95607ca46eSDavid Howells #define RDS_CMSG_RDMA_DEST 2 96607ca46eSDavid Howells #define RDS_CMSG_RDMA_MAP 3 97607ca46eSDavid Howells #define RDS_CMSG_RDMA_STATUS 4 98607ca46eSDavid Howells #define RDS_CMSG_CONG_UPDATE 5 99607ca46eSDavid Howells #define RDS_CMSG_ATOMIC_FADD 6 100607ca46eSDavid Howells #define RDS_CMSG_ATOMIC_CSWP 7 101607ca46eSDavid Howells #define RDS_CMSG_MASKED_ATOMIC_FADD 8 102607ca46eSDavid Howells #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 103*3289025aSSantosh Shilimkar #define RDS_CMSG_RXPATH_LATENCY 11 104607ca46eSDavid Howells 105607ca46eSDavid Howells #define RDS_INFO_FIRST 10000 106607ca46eSDavid Howells #define RDS_INFO_COUNTERS 10000 107607ca46eSDavid Howells #define RDS_INFO_CONNECTIONS 10001 108607ca46eSDavid Howells /* 10002 aka RDS_INFO_FLOWS is deprecated */ 109607ca46eSDavid Howells #define RDS_INFO_SEND_MESSAGES 10003 110607ca46eSDavid Howells #define RDS_INFO_RETRANS_MESSAGES 10004 111607ca46eSDavid Howells #define RDS_INFO_RECV_MESSAGES 10005 112607ca46eSDavid Howells #define RDS_INFO_SOCKETS 10006 113607ca46eSDavid Howells #define RDS_INFO_TCP_SOCKETS 10007 114607ca46eSDavid Howells #define RDS_INFO_IB_CONNECTIONS 10008 115607ca46eSDavid Howells #define RDS_INFO_CONNECTION_STATS 10009 116607ca46eSDavid Howells #define RDS_INFO_IWARP_CONNECTIONS 10010 117607ca46eSDavid Howells #define RDS_INFO_LAST 10010 118607ca46eSDavid Howells 119607ca46eSDavid Howells struct rds_info_counter { 120607ca46eSDavid Howells uint8_t name[32]; 121607ca46eSDavid Howells uint64_t value; 122607ca46eSDavid Howells } __attribute__((packed)); 123607ca46eSDavid Howells 124607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 125607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 126607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04 127607ca46eSDavid Howells 128607ca46eSDavid Howells #define TRANSNAMSIZ 16 129607ca46eSDavid Howells 130607ca46eSDavid Howells struct rds_info_connection { 131607ca46eSDavid Howells uint64_t next_tx_seq; 132607ca46eSDavid Howells uint64_t next_rx_seq; 133607ca46eSDavid Howells __be32 laddr; 134607ca46eSDavid Howells __be32 faddr; 135607ca46eSDavid Howells uint8_t transport[TRANSNAMSIZ]; /* null term ascii */ 136607ca46eSDavid Howells uint8_t flags; 137607ca46eSDavid Howells } __attribute__((packed)); 138607ca46eSDavid Howells 139607ca46eSDavid Howells #define RDS_INFO_MESSAGE_FLAG_ACK 0x01 140607ca46eSDavid Howells #define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 141607ca46eSDavid Howells 142607ca46eSDavid Howells struct rds_info_message { 143607ca46eSDavid Howells uint64_t seq; 144607ca46eSDavid Howells uint32_t len; 145607ca46eSDavid Howells __be32 laddr; 146607ca46eSDavid Howells __be32 faddr; 147607ca46eSDavid Howells __be16 lport; 148607ca46eSDavid Howells __be16 fport; 149607ca46eSDavid Howells uint8_t flags; 150607ca46eSDavid Howells } __attribute__((packed)); 151607ca46eSDavid Howells 152607ca46eSDavid Howells struct rds_info_socket { 153607ca46eSDavid Howells uint32_t sndbuf; 154607ca46eSDavid Howells __be32 bound_addr; 155607ca46eSDavid Howells __be32 connected_addr; 156607ca46eSDavid Howells __be16 bound_port; 157607ca46eSDavid Howells __be16 connected_port; 158607ca46eSDavid Howells uint32_t rcvbuf; 159607ca46eSDavid Howells uint64_t inum; 160607ca46eSDavid Howells } __attribute__((packed)); 161607ca46eSDavid Howells 162607ca46eSDavid Howells struct rds_info_tcp_socket { 163607ca46eSDavid Howells __be32 local_addr; 164607ca46eSDavid Howells __be16 local_port; 165607ca46eSDavid Howells __be32 peer_addr; 166607ca46eSDavid Howells __be16 peer_port; 167607ca46eSDavid Howells uint64_t hdr_rem; 168607ca46eSDavid Howells uint64_t data_rem; 169607ca46eSDavid Howells uint32_t last_sent_nxt; 170607ca46eSDavid Howells uint32_t last_expected_una; 171607ca46eSDavid Howells uint32_t last_seen_una; 172607ca46eSDavid Howells } __attribute__((packed)); 173607ca46eSDavid Howells 174607ca46eSDavid Howells #define RDS_IB_GID_LEN 16 175607ca46eSDavid Howells struct rds_info_rdma_connection { 176607ca46eSDavid Howells __be32 src_addr; 177607ca46eSDavid Howells __be32 dst_addr; 178607ca46eSDavid Howells uint8_t src_gid[RDS_IB_GID_LEN]; 179607ca46eSDavid Howells uint8_t dst_gid[RDS_IB_GID_LEN]; 180607ca46eSDavid Howells 181607ca46eSDavid Howells uint32_t max_send_wr; 182607ca46eSDavid Howells uint32_t max_recv_wr; 183607ca46eSDavid Howells uint32_t max_send_sge; 184607ca46eSDavid Howells uint32_t rdma_mr_max; 185607ca46eSDavid Howells uint32_t rdma_mr_size; 186607ca46eSDavid Howells }; 187607ca46eSDavid Howells 188*3289025aSSantosh Shilimkar /* RDS message Receive Path Latency points */ 189*3289025aSSantosh Shilimkar enum rds_message_rxpath_latency { 190*3289025aSSantosh Shilimkar RDS_MSG_RX_HDR_TO_DGRAM_START = 0, 191*3289025aSSantosh Shilimkar RDS_MSG_RX_DGRAM_REASSEMBLE, 192*3289025aSSantosh Shilimkar RDS_MSG_RX_DGRAM_DELIVERED, 193*3289025aSSantosh Shilimkar RDS_MSG_RX_DGRAM_TRACE_MAX 194*3289025aSSantosh Shilimkar }; 195*3289025aSSantosh Shilimkar 196*3289025aSSantosh Shilimkar struct rds_rx_trace_so { 197*3289025aSSantosh Shilimkar u8 rx_traces; 198*3289025aSSantosh Shilimkar u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; 199*3289025aSSantosh Shilimkar }; 200*3289025aSSantosh Shilimkar 201*3289025aSSantosh Shilimkar struct rds_cmsg_rx_trace { 202*3289025aSSantosh Shilimkar u8 rx_traces; 203*3289025aSSantosh Shilimkar u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; 204*3289025aSSantosh Shilimkar u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; 205*3289025aSSantosh Shilimkar }; 206*3289025aSSantosh Shilimkar 207607ca46eSDavid Howells /* 208607ca46eSDavid Howells * Congestion monitoring. 209607ca46eSDavid Howells * Congestion control in RDS happens at the host connection 210607ca46eSDavid Howells * level by exchanging a bitmap marking congested ports. 211607ca46eSDavid Howells * By default, a process sleeping in poll() is always woken 212607ca46eSDavid Howells * up when the congestion map is updated. 213607ca46eSDavid Howells * With explicit monitoring, an application can have more 214607ca46eSDavid Howells * fine-grained control. 215607ca46eSDavid Howells * The application installs a 64bit mask value in the socket, 216607ca46eSDavid Howells * where each bit corresponds to a group of ports. 217607ca46eSDavid Howells * When a congestion update arrives, RDS checks the set of 218607ca46eSDavid Howells * ports that are now uncongested against the list bit mask 219607ca46eSDavid Howells * installed in the socket, and if they overlap, we queue a 220607ca46eSDavid Howells * cong_notification on the socket. 221607ca46eSDavid Howells * 222607ca46eSDavid Howells * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 223607ca46eSDavid Howells * with the 64bit mask. 224607ca46eSDavid Howells * Congestion updates are received via RDS_CMSG_CONG_UPDATE 225607ca46eSDavid Howells * control messages. 226607ca46eSDavid Howells * 227607ca46eSDavid Howells * The correspondence between bits and ports is 228607ca46eSDavid Howells * 1 << (portnum % 64) 229607ca46eSDavid Howells */ 230607ca46eSDavid Howells #define RDS_CONG_MONITOR_SIZE 64 231607ca46eSDavid Howells #define RDS_CONG_MONITOR_BIT(port) (((unsigned int) port) % RDS_CONG_MONITOR_SIZE) 232607ca46eSDavid Howells #define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port)) 233607ca46eSDavid Howells 234607ca46eSDavid Howells /* 235607ca46eSDavid Howells * RDMA related types 236607ca46eSDavid Howells */ 237607ca46eSDavid Howells 238607ca46eSDavid Howells /* 239607ca46eSDavid Howells * This encapsulates a remote memory location. 240607ca46eSDavid Howells * In the current implementation, it contains the R_Key 241607ca46eSDavid Howells * of the remote memory region, and the offset into it 242607ca46eSDavid Howells * (so that the application does not have to worry about 243607ca46eSDavid Howells * alignment). 244607ca46eSDavid Howells */ 245607ca46eSDavid Howells typedef uint64_t rds_rdma_cookie_t; 246607ca46eSDavid Howells 247607ca46eSDavid Howells struct rds_iovec { 248607ca46eSDavid Howells uint64_t addr; 249607ca46eSDavid Howells uint64_t bytes; 250607ca46eSDavid Howells }; 251607ca46eSDavid Howells 252607ca46eSDavid Howells struct rds_get_mr_args { 253607ca46eSDavid Howells struct rds_iovec vec; 254607ca46eSDavid Howells uint64_t cookie_addr; 255607ca46eSDavid Howells uint64_t flags; 256607ca46eSDavid Howells }; 257607ca46eSDavid Howells 258607ca46eSDavid Howells struct rds_get_mr_for_dest_args { 259607ca46eSDavid Howells struct sockaddr_storage dest_addr; 260607ca46eSDavid Howells struct rds_iovec vec; 261607ca46eSDavid Howells uint64_t cookie_addr; 262607ca46eSDavid Howells uint64_t flags; 263607ca46eSDavid Howells }; 264607ca46eSDavid Howells 265607ca46eSDavid Howells struct rds_free_mr_args { 266607ca46eSDavid Howells rds_rdma_cookie_t cookie; 267607ca46eSDavid Howells uint64_t flags; 268607ca46eSDavid Howells }; 269607ca46eSDavid Howells 270607ca46eSDavid Howells struct rds_rdma_args { 271607ca46eSDavid Howells rds_rdma_cookie_t cookie; 272607ca46eSDavid Howells struct rds_iovec remote_vec; 273607ca46eSDavid Howells uint64_t local_vec_addr; 274607ca46eSDavid Howells uint64_t nr_local; 275607ca46eSDavid Howells uint64_t flags; 276607ca46eSDavid Howells uint64_t user_token; 277607ca46eSDavid Howells }; 278607ca46eSDavid Howells 279607ca46eSDavid Howells struct rds_atomic_args { 280607ca46eSDavid Howells rds_rdma_cookie_t cookie; 281607ca46eSDavid Howells uint64_t local_addr; 282607ca46eSDavid Howells uint64_t remote_addr; 283607ca46eSDavid Howells union { 284607ca46eSDavid Howells struct { 285607ca46eSDavid Howells uint64_t compare; 286607ca46eSDavid Howells uint64_t swap; 287607ca46eSDavid Howells } cswp; 288607ca46eSDavid Howells struct { 289607ca46eSDavid Howells uint64_t add; 290607ca46eSDavid Howells } fadd; 291607ca46eSDavid Howells struct { 292607ca46eSDavid Howells uint64_t compare; 293607ca46eSDavid Howells uint64_t swap; 294607ca46eSDavid Howells uint64_t compare_mask; 295607ca46eSDavid Howells uint64_t swap_mask; 296607ca46eSDavid Howells } m_cswp; 297607ca46eSDavid Howells struct { 298607ca46eSDavid Howells uint64_t add; 299607ca46eSDavid Howells uint64_t nocarry_mask; 300607ca46eSDavid Howells } m_fadd; 301607ca46eSDavid Howells }; 302607ca46eSDavid Howells uint64_t flags; 303607ca46eSDavid Howells uint64_t user_token; 304607ca46eSDavid Howells }; 305607ca46eSDavid Howells 306607ca46eSDavid Howells struct rds_rdma_notify { 307607ca46eSDavid Howells uint64_t user_token; 308607ca46eSDavid Howells int32_t status; 309607ca46eSDavid Howells }; 310607ca46eSDavid Howells 311607ca46eSDavid Howells #define RDS_RDMA_SUCCESS 0 312607ca46eSDavid Howells #define RDS_RDMA_REMOTE_ERROR 1 313607ca46eSDavid Howells #define RDS_RDMA_CANCELED 2 314607ca46eSDavid Howells #define RDS_RDMA_DROPPED 3 315607ca46eSDavid Howells #define RDS_RDMA_OTHER_ERROR 4 316607ca46eSDavid Howells 317607ca46eSDavid Howells /* 318607ca46eSDavid Howells * Common set of flags for all RDMA related structs 319607ca46eSDavid Howells */ 320607ca46eSDavid Howells #define RDS_RDMA_READWRITE 0x0001 321607ca46eSDavid Howells #define RDS_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 322607ca46eSDavid Howells #define RDS_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 323607ca46eSDavid Howells #define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ 324607ca46eSDavid Howells #define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 325607ca46eSDavid Howells #define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 326607ca46eSDavid Howells #define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ 327607ca46eSDavid Howells 328607ca46eSDavid Howells #endif /* IB_RDS_H */ 329