1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2008 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 58 /* 59 * Include this file if the application uses rdsv3 sockets. 60 */ 61 62 /* 63 * This file contains definitions from the ofed rds.h and rds_rdma.h 64 * header file. 65 */ 66 #ifndef _RDSV3_RDS_H 67 #define _RDSV3_RDS_H 68 69 #include <sys/types.h> 70 71 #ifdef __cplusplus 72 extern "C" { 73 #endif 74 75 #define RDS_IB_ABI_VERSION 0x301 76 77 #define AF_RDS AF_INET_OFFLOAD 78 #define PF_RDS AF_INET_OFFLOAD 79 80 #define SOL_RDS 272 81 82 /* 83 * setsockopt/getsockopt for SOL_RDS 84 */ 85 #define RDSV3_CANCEL_SENT_TO 1 86 #define RDSV3_GET_MR 2 87 #define RDSV3_FREE_MR 3 88 /* deprecated: RDS_BARRIER 4 */ 89 #define RDSV3_RECVERR 5 90 #define RDSV3_CONG_MONITOR 6 91 92 /* 93 * Control message types for SOL_RDS. 94 * 95 * RDS_CMSG_RDMA_ARGS (sendmsg) 96 * Request a RDMA transfer to/from the specified 97 * memory ranges. 98 * The cmsg_data is a struct rdsv3_rdma_args. 99 * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 100 * Kernel informs application about intended 101 * source/destination of a RDMA transfer 102 * RDS_CMSG_RDMA_MAP (sendmsg) 103 * Application asks kernel to map the given 104 * memory range into a IB MR, and send the 105 * R_Key along in an RDS extension header. 106 * The cmsg_data is a struct rdsv3_get_mr_args, 107 * the same as for the GET_MR setsockopt. 108 * RDS_CMSG_RDMA_STATUS (recvmsg) 109 * Returns the status of a completed RDMA operation. 110 */ 111 #define RDSV3_CMSG_RDMA_ARGS 1 112 #define RDSV3_CMSG_RDMA_DEST 2 113 #define RDSV3_CMSG_RDMA_MAP 3 114 #define RDSV3_CMSG_RDMA_STATUS 4 115 #define RDSV3_CMSG_CONG_UPDATE 5 116 117 /* 118 * RDMA related types 119 */ 120 121 /* 122 * This encapsulates a remote memory location. 123 * In the current implementation, it contains the R_Key 124 * of the remote memory region, and the offset into it 125 * (so that the application does not have to worry about 126 * alignment). 127 */ 128 typedef uint64_t rdsv3_rdma_cookie_t; 129 130 struct rdsv3_iovec { 131 uint64_t addr; 132 uint64_t bytes; 133 }; 134 135 struct rdsv3_get_mr_args { 136 struct rdsv3_iovec vec; 137 uint64_t cookie_addr; 138 uint64_t flags; 139 }; 140 141 struct rdsv3_free_mr_args { 142 rdsv3_rdma_cookie_t cookie; 143 uint64_t flags; 144 }; 145 146 struct rdsv3_rdma_args { 147 rdsv3_rdma_cookie_t cookie; 148 struct rdsv3_iovec remote_vec; 149 uint64_t local_vec_addr; 150 uint64_t nr_local; 151 uint64_t flags; 152 uint64_t user_token; 153 }; 154 155 struct rdsv3_rdma_notify { 156 uint64_t user_token; 157 int32_t status; 158 }; 159 160 #define RDSV3_RDMA_SUCCESS 0 161 #define RDSV3_RDMA_REMOTE_ERROR 1 162 #define RDSV3_RDMA_CANCELED 2 163 #define RDSV3_RDMA_DROPPED 3 164 #define RDSV3_RDMA_OTHER_ERROR 4 165 166 /* 167 * Common set of flags for all RDMA related structs 168 */ 169 #define RDSV3_RDMA_READWRITE 0x0001 170 #define RDSV3_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 171 #define RDSV3_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 172 #define RDSV3_RDMA_USE_ONCE 0x0008 /* free MR after use */ 173 #define RDSV3_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 174 #define RDSV3_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 175 176 /* 177 * Congestion monitoring. 178 * Congestion control in RDS happens at the host connection 179 * level by exchanging a bitmap marking congested ports. 180 * By default, a process sleeping in poll() is always woken 181 * up when the congestion map is updated. 182 * With explicit monitoring, an application can have more 183 * fine-grained control. 184 * The application installs a 64bit mask value in the socket, 185 * where each bit corresponds to a group of ports. 186 * When a congestion update arrives, RDS checks the set of 187 * ports that are now uncongested against the list bit mask 188 * installed in the socket, and if they overlap, we queue a 189 * cong_notification on the socket. 190 * 191 * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 192 * with the 64bit mask. 193 * Congestion updates are received via RDS_CMSG_CONG_UPDATE 194 * control messages. 195 * 196 * The correspondence between bits and ports is 197 * 1 << (portnum % 64) 198 */ 199 #define RDSV3_CONG_MONITOR_SIZE 64 200 #define RDSV3_CONG_MONITOR_BIT(port) \ 201 (((unsigned int) port) % RDSV3_CONG_MONITOR_SIZE) 202 #define RDSV3_CONG_MONITOR_MASK(port) (1ULL << RDSV3_CONG_MONITOR_BIT(port)) 203 204 /* rds-info related */ 205 206 #define RDSV3_INFO_FIRST 10000 207 #define RDSV3_INFO_COUNTERS 10000 208 #define RDSV3_INFO_CONNECTIONS 10001 209 /* 10002 aka RDS_INFO_FLOWS is deprecated */ 210 #define RDSV3_INFO_SEND_MESSAGES 10003 211 #define RDSV3_INFO_RETRANS_MESSAGES 10004 212 #define RDSV3_INFO_RECV_MESSAGES 10005 213 #define RDSV3_INFO_SOCKETS 10006 214 #define RDSV3_INFO_TCP_SOCKETS 10007 215 #define RDSV3_INFO_IB_CONNECTIONS 10008 216 #define RDSV3_INFO_CONNECTION_STATS 10009 217 #define RDSV3_INFO_IWARP_CONNECTIONS 10010 218 #define RDSV3_INFO_LAST 10010 219 220 #ifndef __lock_lint 221 #pragma pack(1) 222 struct rdsv3_info_counter { 223 uint8_t name[32]; 224 uint64_t value; 225 } __attribute__((packed)); 226 #pragma pack() 227 #else 228 struct rdsv3_info_counter { 229 uint8_t name[32]; 230 uint64_t value; 231 }; 232 #endif 233 234 #define RDSV3_INFO_CONNECTION_FLAG_SENDING 0x01 235 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTING 0x02 236 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTED 0x04 237 238 #ifndef __lock_lint 239 #pragma pack(1) 240 struct rdsv3_info_connection { 241 uint64_t next_tx_seq; 242 uint64_t next_rx_seq; 243 uint32_t laddr; /* network order */ 244 uint32_t faddr; /* network order */ 245 uint8_t transport[15]; /* null term ascii */ 246 uint8_t flags; 247 } __attribute__((packed)); 248 #pragma pack() 249 #else 250 struct rdsv3_info_connection { 251 uint64_t next_tx_seq; 252 uint64_t next_rx_seq; 253 uint32_t laddr; /* network order */ 254 uint32_t faddr; /* network order */ 255 uint8_t transport[15]; /* null term ascii */ 256 uint8_t flags; 257 }; 258 #endif 259 260 #ifndef __lock_lint 261 #pragma pack(1) 262 struct rdsv3_info_flow { 263 uint32_t laddr; /* network order */ 264 uint32_t faddr; /* network order */ 265 uint32_t bytes; 266 uint16_t lport; /* network order */ 267 uint16_t fport; /* network order */ 268 } __attribute__((packed)); 269 #pragma pack() 270 #else 271 struct rdsv3_info_flow { 272 uint32_t laddr; /* network order */ 273 uint32_t faddr; /* network order */ 274 uint32_t bytes; 275 uint16_t lport; /* network order */ 276 uint16_t fport; /* network order */ 277 }; 278 #endif 279 280 #define RDSV3_INFO_MESSAGE_FLAG_ACK 0x01 281 #define RDSV3_INFO_MESSAGE_FLAG_FAST_ACK 0x02 282 283 #ifndef __lock_lint 284 #pragma pack(1) 285 struct rdsv3_info_message { 286 uint64_t seq; 287 uint32_t len; 288 uint32_t laddr; /* network order */ 289 uint32_t faddr; /* network order */ 290 uint16_t lport; /* network order */ 291 uint16_t fport; /* network order */ 292 uint8_t flags; 293 } __attribute__((packed)); 294 #pragma pack() 295 #else 296 struct rdsv3_info_message { 297 uint64_t seq; 298 uint32_t len; 299 uint32_t laddr; /* network order */ 300 uint32_t faddr; /* network order */ 301 uint16_t lport; /* network order */ 302 uint16_t fport; /* network order */ 303 uint8_t flags; 304 }; 305 #endif 306 307 #ifndef __lock_lint 308 #pragma pack(1) 309 struct rdsv3_info_socket { 310 uint32_t sndbuf; 311 uint32_t bound_addr; /* network order */ 312 uint32_t connected_addr; /* network order */ 313 uint16_t bound_port; /* network order */ 314 uint16_t connected_port; /* network order */ 315 uint32_t rcvbuf; 316 uint64_t inum; 317 } __attribute__((packed)); 318 #pragma pack() 319 #else 320 struct rdsv3_info_socket { 321 uint32_t sndbuf; 322 uint32_t bound_addr; /* network order */ 323 uint32_t connected_addr; /* network order */ 324 uint16_t bound_port; /* network order */ 325 uint16_t connected_port; /* network order */ 326 uint32_t rcvbuf; 327 uint64_t inum; 328 }; 329 #endif 330 331 #ifndef __lock_lint 332 #pragma pack(1) 333 struct rdsv3_info_socket_v1 { 334 uint32_t sndbuf; 335 uint32_t bound_addr; /* network order */ 336 uint32_t connected_addr; /* network order */ 337 uint16_t bound_port; /* network order */ 338 uint16_t connected_port; /* network order */ 339 uint32_t rcvbuf; 340 } __attribute__((packed)); 341 #pragma pack() 342 #else 343 struct rdsv3_info_socket_v1 { 344 uint32_t sndbuf; 345 uint32_t bound_addr; /* network order */ 346 uint32_t connected_addr; /* network order */ 347 uint16_t bound_port; /* network order */ 348 uint16_t connected_port; /* network order */ 349 uint32_t rcvbuf; 350 }; 351 #endif 352 353 #define RDS_IB_GID_LEN 16 354 struct rdsv3_info_rdma_connection { 355 uint32_t src_addr; /* network order */ 356 uint32_t dst_addr; /* network order */ 357 uint8_t src_gid[RDS_IB_GID_LEN]; 358 uint8_t dst_gid[RDS_IB_GID_LEN]; 359 360 uint32_t max_send_wr; 361 uint32_t max_recv_wr; 362 uint32_t max_send_sge; 363 uint32_t rdma_mr_max; 364 uint32_t rdma_mr_size; 365 }; 366 367 #define rdsv3_info_ib_connection rdsv3_info_rdma_connection 368 #define rdma_fmr_max rdma_mr_max 369 #define rdma_fmr_size rdma_mr_size 370 371 #ifdef __cplusplus 372 } 373 #endif 374 375 #endif /* _RDSV3_RDS_H */ 376