1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2008 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 58 /* 59 * Include this file if the application uses rdsv3 sockets. 60 */ 61 62 /* 63 * This file contains definitions from the ofed rds.h and rds_rdma.h 64 * header file. 65 */ 66 #ifndef _RDSV3_RDS_H 67 #define _RDSV3_RDS_H 68 69 #include <sys/types.h> 70 #include <sys/socket.h> 71 72 #ifdef __cplusplus 73 extern "C" { 74 #endif 75 76 #define RDS_IB_ABI_VERSION 0x301 77 78 #define AF_RDS AF_INET_OFFLOAD 79 #define PF_RDS AF_INET_OFFLOAD 80 81 #define SOL_RDS 272 82 83 /* 84 * setsockopt/getsockopt for SOL_RDS 85 */ 86 #define RDSV3_CANCEL_SENT_TO 1 87 #define RDSV3_GET_MR 2 88 #define RDSV3_FREE_MR 3 89 /* deprecated: RDS_BARRIER 4 */ 90 #define RDSV3_RECVERR 5 91 #define RDSV3_CONG_MONITOR 6 92 #define RDSV3_GET_MR_FOR_DEST 7 93 94 /* 95 * Control message types for SOL_RDS. 96 * 97 * RDS_CMSG_RDMA_ARGS (sendmsg) 98 * Request a RDMA transfer to/from the specified 99 * memory ranges. 100 * The cmsg_data is a struct rdsv3_rdma_args. 101 * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 102 * Kernel informs application about intended 103 * source/destination of a RDMA transfer 104 * RDS_CMSG_RDMA_MAP (sendmsg) 105 * Application asks kernel to map the given 106 * memory range into a IB MR, and send the 107 * R_Key along in an RDS extension header. 108 * The cmsg_data is a struct rdsv3_get_mr_args, 109 * the same as for the GET_MR setsockopt. 110 * RDS_CMSG_RDMA_STATUS (recvmsg) 111 * Returns the status of a completed RDMA operation. 112 */ 113 #define RDSV3_CMSG_RDMA_ARGS 1 114 #define RDSV3_CMSG_RDMA_DEST 2 115 #define RDSV3_CMSG_RDMA_MAP 3 116 #define RDSV3_CMSG_RDMA_STATUS 4 117 #define RDSV3_CMSG_CONG_UPDATE 5 118 119 /* rds-info related */ 120 121 #define RDSV3_INFO_FIRST 10000 122 #define RDSV3_INFO_COUNTERS 10000 123 #define RDSV3_INFO_CONNECTIONS 10001 124 /* 10002 aka RDS_INFO_FLOWS is deprecated */ 125 #define RDSV3_INFO_SEND_MESSAGES 10003 126 #define RDSV3_INFO_RETRANS_MESSAGES 10004 127 #define RDSV3_INFO_RECV_MESSAGES 10005 128 #define RDSV3_INFO_SOCKETS 10006 129 #define RDSV3_INFO_TCP_SOCKETS 10007 130 #define RDSV3_INFO_IB_CONNECTIONS 10008 131 #define RDSV3_INFO_CONNECTION_STATS 10009 132 #define RDSV3_INFO_IWARP_CONNECTIONS 10010 133 #define RDSV3_INFO_LAST 10010 134 135 struct rds_info_arg { 136 uint64_t lenp; 137 uint64_t datap; 138 }; 139 140 #ifndef __lock_lint 141 #pragma pack(1) 142 struct rdsv3_info_counter { 143 uint8_t name[32]; 144 uint64_t value; 145 } __attribute__((packed)); 146 #pragma pack() 147 #else 148 struct rdsv3_info_counter { 149 uint8_t name[32]; 150 uint64_t value; 151 }; 152 #endif 153 154 #define RDSV3_INFO_CONNECTION_FLAG_SENDING 0x01 155 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTING 0x02 156 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTED 0x04 157 158 #define TRANSNAMSIZ 16 159 160 #ifndef __lock_lint 161 #pragma pack(1) 162 struct rdsv3_info_connection { 163 uint64_t next_tx_seq; 164 uint64_t next_rx_seq; 165 uint32_t laddr; /* network order */ 166 uint32_t faddr; /* network order */ 167 uint8_t transport[15]; /* null term ascii */ 168 uint8_t flags; 169 } __attribute__((packed)); 170 #pragma pack() 171 #else 172 struct rdsv3_info_connection { 173 uint64_t next_tx_seq; 174 uint64_t next_rx_seq; 175 uint32_t laddr; /* network order */ 176 uint32_t faddr; /* network order */ 177 uint8_t transport[15]; /* null term ascii */ 178 uint8_t flags; 179 }; 180 #endif 181 182 #ifndef __lock_lint 183 #pragma pack(1) 184 struct rdsv3_info_flow { 185 uint32_t laddr; /* network order */ 186 uint32_t faddr; /* network order */ 187 uint32_t bytes; 188 uint16_t lport; /* network order */ 189 uint16_t fport; /* network order */ 190 } __attribute__((packed)); 191 #pragma pack() 192 #else 193 struct rdsv3_info_flow { 194 uint32_t laddr; /* network order */ 195 uint32_t faddr; /* network order */ 196 uint32_t bytes; 197 uint16_t lport; /* network order */ 198 uint16_t fport; /* network order */ 199 }; 200 #endif 201 202 #define RDSV3_INFO_MESSAGE_FLAG_ACK 0x01 203 #define RDSV3_INFO_MESSAGE_FLAG_FAST_ACK 0x02 204 205 #ifndef __lock_lint 206 #pragma pack(1) 207 struct rdsv3_info_message { 208 uint64_t seq; 209 uint32_t len; 210 uint32_t laddr; /* network order */ 211 uint32_t faddr; /* network order */ 212 uint16_t lport; /* network order */ 213 uint16_t fport; /* network order */ 214 uint8_t flags; 215 } __attribute__((packed)); 216 #pragma pack() 217 #else 218 struct rdsv3_info_message { 219 uint64_t seq; 220 uint32_t len; 221 uint32_t laddr; /* network order */ 222 uint32_t faddr; /* network order */ 223 uint16_t lport; /* network order */ 224 uint16_t fport; /* network order */ 225 uint8_t flags; 226 }; 227 #endif 228 229 #ifndef __lock_lint 230 #pragma pack(1) 231 struct rdsv3_info_socket { 232 uint32_t sndbuf; 233 uint32_t bound_addr; /* network order */ 234 uint32_t connected_addr; /* network order */ 235 uint16_t bound_port; /* network order */ 236 uint16_t connected_port; /* network order */ 237 uint32_t rcvbuf; 238 uint64_t inum; 239 } __attribute__((packed)); 240 #pragma pack() 241 #else 242 struct rdsv3_info_socket { 243 uint32_t sndbuf; 244 uint32_t bound_addr; /* network order */ 245 uint32_t connected_addr; /* network order */ 246 uint16_t bound_port; /* network order */ 247 uint16_t connected_port; /* network order */ 248 uint32_t rcvbuf; 249 uint64_t inum; 250 }; 251 #endif 252 253 #ifndef __lock_lint 254 #pragma pack(1) 255 struct rdsv3_info_socket_v1 { 256 uint32_t sndbuf; 257 uint32_t bound_addr; /* network order */ 258 uint32_t connected_addr; /* network order */ 259 uint16_t bound_port; /* network order */ 260 uint16_t connected_port; /* network order */ 261 uint32_t rcvbuf; 262 } __attribute__((packed)); 263 #pragma pack() 264 #else 265 struct rdsv3_info_socket_v1 { 266 uint32_t sndbuf; 267 uint32_t bound_addr; /* network order */ 268 uint32_t connected_addr; /* network order */ 269 uint16_t bound_port; /* network order */ 270 uint16_t connected_port; /* network order */ 271 uint32_t rcvbuf; 272 }; 273 #endif 274 275 #define RDS_IB_GID_LEN 16 276 struct rdsv3_info_rdma_connection { 277 uint32_t src_addr; /* network order */ 278 uint32_t dst_addr; /* network order */ 279 uint8_t src_gid[RDS_IB_GID_LEN]; 280 uint8_t dst_gid[RDS_IB_GID_LEN]; 281 282 uint32_t max_send_wr; 283 uint32_t max_recv_wr; 284 uint32_t max_send_sge; 285 uint32_t rdma_mr_max; 286 uint32_t rdma_mr_size; 287 }; 288 289 #define rdsv3_info_ib_connection rdsv3_info_rdma_connection 290 #define rdma_fmr_max rdma_mr_max 291 #define rdma_fmr_size rdma_mr_size 292 293 /* 294 * Congestion monitoring. 295 * Congestion control in RDS happens at the host connection 296 * level by exchanging a bitmap marking congested ports. 297 * By default, a process sleeping in poll() is always woken 298 * up when the congestion map is updated. 299 * With explicit monitoring, an application can have more 300 * fine-grained control. 301 * The application installs a 64bit mask value in the socket, 302 * where each bit corresponds to a group of ports. 303 * When a congestion update arrives, RDS checks the set of 304 * ports that are now uncongested against the list bit mask 305 * installed in the socket, and if they overlap, we queue a 306 * cong_notification on the socket. 307 * 308 * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 309 * with the 64bit mask. 310 * Congestion updates are received via RDS_CMSG_CONG_UPDATE 311 * control messages. 312 * 313 * The correspondence between bits and ports is 314 * 1 << (portnum % 64) 315 */ 316 #define RDSV3_CONG_MONITOR_SIZE 64 317 #define RDSV3_CONG_MONITOR_BIT(port) \ 318 (((unsigned int) port) % RDSV3_CONG_MONITOR_SIZE) 319 #define RDSV3_CONG_MONITOR_MASK(port) (1ULL << RDSV3_CONG_MONITOR_BIT(port)) 320 321 /* 322 * RDMA related types 323 */ 324 325 /* 326 * This encapsulates a remote memory location. 327 * In the current implementation, it contains the R_Key 328 * of the remote memory region, and the offset into it 329 * (so that the application does not have to worry about 330 * alignment). 331 */ 332 typedef uint64_t rdsv3_rdma_cookie_t; 333 334 struct rdsv3_iovec { 335 uint64_t addr; 336 uint64_t bytes; 337 }; 338 339 struct rdsv3_get_mr_args { 340 struct rdsv3_iovec vec; 341 uint64_t cookie_addr; 342 uint64_t flags; 343 }; 344 345 struct rdsv3_get_mr_for_dest_args { 346 struct sockaddr_storage dest_addr; 347 struct rdsv3_iovec vec; 348 uint64_t cookie_addr; 349 uint64_t flags; 350 }; 351 352 struct rdsv3_free_mr_args { 353 rdsv3_rdma_cookie_t cookie; 354 uint64_t flags; 355 }; 356 357 struct rdsv3_rdma_args { 358 rdsv3_rdma_cookie_t cookie; 359 struct rdsv3_iovec remote_vec; 360 uint64_t local_vec_addr; 361 uint64_t nr_local; 362 uint64_t flags; 363 uint64_t user_token; 364 }; 365 366 struct rdsv3_rdma_notify { 367 uint64_t user_token; 368 int32_t status; 369 }; 370 371 #define RDSV3_RDMA_SUCCESS 0 372 #define RDSV3_RDMA_REMOTE_ERROR 1 373 #define RDSV3_RDMA_CANCELED 2 374 #define RDSV3_RDMA_DROPPED 3 375 #define RDSV3_RDMA_OTHER_ERROR 4 376 377 /* 378 * Common set of flags for all RDMA related structs 379 */ 380 #define RDSV3_RDMA_READWRITE 0x0001 381 #define RDSV3_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 382 #define RDSV3_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 383 #define RDSV3_RDMA_USE_ONCE 0x0008 /* free MR after use */ 384 #define RDSV3_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 385 #define RDSV3_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 386 387 #ifdef __cplusplus 388 } 389 #endif 390 391 #endif /* _RDSV3_RDS_H */ 392