1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2008 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 58 /* 59 * Include this file if the application uses rdsv3 sockets. 60 */ 61 62 /* 63 * This file contains definitions from the ofed rds.h and rds_rdma.h 64 * header file. 65 */ 66 #ifndef _RDSV3_RDS_H 67 #define _RDSV3_RDS_H 68 69 #include <sys/types.h> 70 71 #ifdef __cplusplus 72 extern "C" { 73 #endif 74 75 #define RDS_IB_ABI_VERSION 0x301 76 77 #define AF_RDS AF_INET_OFFLOAD 78 #define PF_RDS AF_INET_OFFLOAD 79 80 #define SOL_RDS 272 81 82 /* 83 * setsockopt/getsockopt for SOL_RDS 84 */ 85 #define RDSV3_CANCEL_SENT_TO 1 86 #define RDSV3_GET_MR 2 87 #define RDSV3_FREE_MR 3 88 /* deprecated: RDS_BARRIER 4 */ 89 #define RDSV3_RECVERR 5 90 #define RDSV3_CONG_MONITOR 6 91 #define RDSV3_GET_MR_FOR_DEST 7 92 93 /* 94 * Control message types for SOL_RDS. 95 * 96 * RDS_CMSG_RDMA_ARGS (sendmsg) 97 * Request a RDMA transfer to/from the specified 98 * memory ranges. 99 * The cmsg_data is a struct rdsv3_rdma_args. 100 * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 101 * Kernel informs application about intended 102 * source/destination of a RDMA transfer 103 * RDS_CMSG_RDMA_MAP (sendmsg) 104 * Application asks kernel to map the given 105 * memory range into a IB MR, and send the 106 * R_Key along in an RDS extension header. 107 * The cmsg_data is a struct rdsv3_get_mr_args, 108 * the same as for the GET_MR setsockopt. 109 * RDS_CMSG_RDMA_STATUS (recvmsg) 110 * Returns the status of a completed RDMA operation. 111 */ 112 #define RDSV3_CMSG_RDMA_ARGS 1 113 #define RDSV3_CMSG_RDMA_DEST 2 114 #define RDSV3_CMSG_RDMA_MAP 3 115 #define RDSV3_CMSG_RDMA_STATUS 4 116 #define RDSV3_CMSG_CONG_UPDATE 5 117 118 /* rds-info related */ 119 120 #define RDSV3_INFO_FIRST 10000 121 #define RDSV3_INFO_COUNTERS 10000 122 #define RDSV3_INFO_CONNECTIONS 10001 123 /* 10002 aka RDS_INFO_FLOWS is deprecated */ 124 #define RDSV3_INFO_SEND_MESSAGES 10003 125 #define RDSV3_INFO_RETRANS_MESSAGES 10004 126 #define RDSV3_INFO_RECV_MESSAGES 10005 127 #define RDSV3_INFO_SOCKETS 10006 128 #define RDSV3_INFO_TCP_SOCKETS 10007 129 #define RDSV3_INFO_IB_CONNECTIONS 10008 130 #define RDSV3_INFO_CONNECTION_STATS 10009 131 #define RDSV3_INFO_IWARP_CONNECTIONS 10010 132 #define RDSV3_INFO_LAST 10010 133 134 struct rds_info_arg { 135 uint64_t lenp; 136 uint64_t datap; 137 }; 138 139 #ifndef __lock_lint 140 #pragma pack(1) 141 struct rdsv3_info_counter { 142 uint8_t name[32]; 143 uint64_t value; 144 } __attribute__((packed)); 145 #pragma pack() 146 #else 147 struct rdsv3_info_counter { 148 uint8_t name[32]; 149 uint64_t value; 150 }; 151 #endif 152 153 #define RDSV3_INFO_CONNECTION_FLAG_SENDING 0x01 154 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTING 0x02 155 #define RDSV3_INFO_CONNECTION_FLAG_CONNECTED 0x04 156 157 #define TRANSNAMSIZ 16 158 159 #ifndef __lock_lint 160 #pragma pack(1) 161 struct rdsv3_info_connection { 162 uint64_t next_tx_seq; 163 uint64_t next_rx_seq; 164 uint32_t laddr; /* network order */ 165 uint32_t faddr; /* network order */ 166 uint8_t transport[15]; /* null term ascii */ 167 uint8_t flags; 168 } __attribute__((packed)); 169 #pragma pack() 170 #else 171 struct rdsv3_info_connection { 172 uint64_t next_tx_seq; 173 uint64_t next_rx_seq; 174 uint32_t laddr; /* network order */ 175 uint32_t faddr; /* network order */ 176 uint8_t transport[15]; /* null term ascii */ 177 uint8_t flags; 178 }; 179 #endif 180 181 #ifndef __lock_lint 182 #pragma pack(1) 183 struct rdsv3_info_flow { 184 uint32_t laddr; /* network order */ 185 uint32_t faddr; /* network order */ 186 uint32_t bytes; 187 uint16_t lport; /* network order */ 188 uint16_t fport; /* network order */ 189 } __attribute__((packed)); 190 #pragma pack() 191 #else 192 struct rdsv3_info_flow { 193 uint32_t laddr; /* network order */ 194 uint32_t faddr; /* network order */ 195 uint32_t bytes; 196 uint16_t lport; /* network order */ 197 uint16_t fport; /* network order */ 198 }; 199 #endif 200 201 #define RDSV3_INFO_MESSAGE_FLAG_ACK 0x01 202 #define RDSV3_INFO_MESSAGE_FLAG_FAST_ACK 0x02 203 204 #ifndef __lock_lint 205 #pragma pack(1) 206 struct rdsv3_info_message { 207 uint64_t seq; 208 uint32_t len; 209 uint32_t laddr; /* network order */ 210 uint32_t faddr; /* network order */ 211 uint16_t lport; /* network order */ 212 uint16_t fport; /* network order */ 213 uint8_t flags; 214 } __attribute__((packed)); 215 #pragma pack() 216 #else 217 struct rdsv3_info_message { 218 uint64_t seq; 219 uint32_t len; 220 uint32_t laddr; /* network order */ 221 uint32_t faddr; /* network order */ 222 uint16_t lport; /* network order */ 223 uint16_t fport; /* network order */ 224 uint8_t flags; 225 }; 226 #endif 227 228 #ifndef __lock_lint 229 #pragma pack(1) 230 struct rdsv3_info_socket { 231 uint32_t sndbuf; 232 uint32_t bound_addr; /* network order */ 233 uint32_t connected_addr; /* network order */ 234 uint16_t bound_port; /* network order */ 235 uint16_t connected_port; /* network order */ 236 uint32_t rcvbuf; 237 uint64_t inum; 238 } __attribute__((packed)); 239 #pragma pack() 240 #else 241 struct rdsv3_info_socket { 242 uint32_t sndbuf; 243 uint32_t bound_addr; /* network order */ 244 uint32_t connected_addr; /* network order */ 245 uint16_t bound_port; /* network order */ 246 uint16_t connected_port; /* network order */ 247 uint32_t rcvbuf; 248 uint64_t inum; 249 }; 250 #endif 251 252 #ifndef __lock_lint 253 #pragma pack(1) 254 struct rdsv3_info_socket_v1 { 255 uint32_t sndbuf; 256 uint32_t bound_addr; /* network order */ 257 uint32_t connected_addr; /* network order */ 258 uint16_t bound_port; /* network order */ 259 uint16_t connected_port; /* network order */ 260 uint32_t rcvbuf; 261 } __attribute__((packed)); 262 #pragma pack() 263 #else 264 struct rdsv3_info_socket_v1 { 265 uint32_t sndbuf; 266 uint32_t bound_addr; /* network order */ 267 uint32_t connected_addr; /* network order */ 268 uint16_t bound_port; /* network order */ 269 uint16_t connected_port; /* network order */ 270 uint32_t rcvbuf; 271 }; 272 #endif 273 274 #define RDS_IB_GID_LEN 16 275 struct rdsv3_info_rdma_connection { 276 uint32_t src_addr; /* network order */ 277 uint32_t dst_addr; /* network order */ 278 uint8_t src_gid[RDS_IB_GID_LEN]; 279 uint8_t dst_gid[RDS_IB_GID_LEN]; 280 281 uint32_t max_send_wr; 282 uint32_t max_recv_wr; 283 uint32_t max_send_sge; 284 uint32_t rdma_mr_max; 285 uint32_t rdma_mr_size; 286 }; 287 288 #define rdsv3_info_ib_connection rdsv3_info_rdma_connection 289 #define rdma_fmr_max rdma_mr_max 290 #define rdma_fmr_size rdma_mr_size 291 292 /* 293 * Congestion monitoring. 294 * Congestion control in RDS happens at the host connection 295 * level by exchanging a bitmap marking congested ports. 296 * By default, a process sleeping in poll() is always woken 297 * up when the congestion map is updated. 298 * With explicit monitoring, an application can have more 299 * fine-grained control. 300 * The application installs a 64bit mask value in the socket, 301 * where each bit corresponds to a group of ports. 302 * When a congestion update arrives, RDS checks the set of 303 * ports that are now uncongested against the list bit mask 304 * installed in the socket, and if they overlap, we queue a 305 * cong_notification on the socket. 306 * 307 * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 308 * with the 64bit mask. 309 * Congestion updates are received via RDS_CMSG_CONG_UPDATE 310 * control messages. 311 * 312 * The correspondence between bits and ports is 313 * 1 << (portnum % 64) 314 */ 315 #define RDSV3_CONG_MONITOR_SIZE 64 316 #define RDSV3_CONG_MONITOR_BIT(port) \ 317 (((unsigned int) port) % RDSV3_CONG_MONITOR_SIZE) 318 #define RDSV3_CONG_MONITOR_MASK(port) (1ULL << RDSV3_CONG_MONITOR_BIT(port)) 319 320 /* 321 * RDMA related types 322 */ 323 324 /* 325 * This encapsulates a remote memory location. 326 * In the current implementation, it contains the R_Key 327 * of the remote memory region, and the offset into it 328 * (so that the application does not have to worry about 329 * alignment). 330 */ 331 typedef uint64_t rdsv3_rdma_cookie_t; 332 333 struct rdsv3_iovec { 334 uint64_t addr; 335 uint64_t bytes; 336 }; 337 338 struct rdsv3_get_mr_args { 339 struct rdsv3_iovec vec; 340 uint64_t cookie_addr; 341 uint64_t flags; 342 }; 343 344 #if 1 /* 1 at 1.5.1 */ 345 #include <sys/socket_impl.h> 346 struct rdsv3_get_mr_for_dest_args { 347 struct sockaddr_storage dest_addr; 348 struct rdsv3_iovec vec; 349 uint64_t cookie_addr; 350 uint64_t flags; 351 }; 352 #endif 353 354 struct rdsv3_free_mr_args { 355 rdsv3_rdma_cookie_t cookie; 356 uint64_t flags; 357 }; 358 359 struct rdsv3_rdma_args { 360 rdsv3_rdma_cookie_t cookie; 361 struct rdsv3_iovec remote_vec; 362 uint64_t local_vec_addr; 363 uint64_t nr_local; 364 uint64_t flags; 365 uint64_t user_token; 366 }; 367 368 struct rdsv3_rdma_notify { 369 uint64_t user_token; 370 int32_t status; 371 }; 372 373 #define RDSV3_RDMA_SUCCESS 0 374 #define RDSV3_RDMA_REMOTE_ERROR 1 375 #define RDSV3_RDMA_CANCELED 2 376 #define RDSV3_RDMA_DROPPED 3 377 #define RDSV3_RDMA_OTHER_ERROR 4 378 379 /* 380 * Common set of flags for all RDMA related structs 381 */ 382 #define RDSV3_RDMA_READWRITE 0x0001 383 #define RDSV3_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 384 #define RDSV3_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 385 #define RDSV3_RDMA_USE_ONCE 0x0008 /* free MR after use */ 386 #define RDSV3_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 387 #define RDSV3_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 388 389 #ifdef __cplusplus 390 } 391 #endif 392 393 #endif /* _RDSV3_RDS_H */ 394