xref: /linux/include/uapi/linux/rds.h (revision 3289025aedc018f8fd9d0e37fb9efa0c6d531ffa)
1607ca46eSDavid Howells /*
2607ca46eSDavid Howells  * Copyright (c) 2008 Oracle.  All rights reserved.
3607ca46eSDavid Howells  *
4607ca46eSDavid Howells  * This software is available to you under a choice of one of two
5607ca46eSDavid Howells  * licenses.  You may choose to be licensed under the terms of the GNU
6607ca46eSDavid Howells  * General Public License (GPL) Version 2, available from the file
7607ca46eSDavid Howells  * COPYING in the main directory of this source tree, or the
8607ca46eSDavid Howells  * OpenIB.org BSD license below:
9607ca46eSDavid Howells  *
10607ca46eSDavid Howells  *     Redistribution and use in source and binary forms, with or
11607ca46eSDavid Howells  *     without modification, are permitted provided that the following
12607ca46eSDavid Howells  *     conditions are met:
13607ca46eSDavid Howells  *
14607ca46eSDavid Howells  *      - Redistributions of source code must retain the above
15607ca46eSDavid Howells  *        copyright notice, this list of conditions and the following
16607ca46eSDavid Howells  *        disclaimer.
17607ca46eSDavid Howells  *
18607ca46eSDavid Howells  *      - Redistributions in binary form must reproduce the above
19607ca46eSDavid Howells  *        copyright notice, this list of conditions and the following
20607ca46eSDavid Howells  *        disclaimer in the documentation and/or other materials
21607ca46eSDavid Howells  *        provided with the distribution.
22607ca46eSDavid Howells  *
23607ca46eSDavid Howells  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24607ca46eSDavid Howells  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25607ca46eSDavid Howells  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26607ca46eSDavid Howells  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27607ca46eSDavid Howells  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28607ca46eSDavid Howells  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29607ca46eSDavid Howells  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30607ca46eSDavid Howells  * SOFTWARE.
31607ca46eSDavid Howells  *
32607ca46eSDavid Howells  */
33607ca46eSDavid Howells 
34607ca46eSDavid Howells #ifndef _LINUX_RDS_H
35607ca46eSDavid Howells #define _LINUX_RDS_H
36607ca46eSDavid Howells 
37607ca46eSDavid Howells #include <linux/types.h>
38607ca46eSDavid Howells 
39607ca46eSDavid Howells #define RDS_IB_ABI_VERSION		0x301
40607ca46eSDavid Howells 
41a28c257cSSowmini Varadhan #define	SOL_RDS		276
42a28c257cSSowmini Varadhan 
43607ca46eSDavid Howells /*
44607ca46eSDavid Howells  * setsockopt/getsockopt for SOL_RDS
45607ca46eSDavid Howells  */
46607ca46eSDavid Howells #define RDS_CANCEL_SENT_TO      	1
47607ca46eSDavid Howells #define RDS_GET_MR			2
48607ca46eSDavid Howells #define RDS_FREE_MR			3
49607ca46eSDavid Howells /* deprecated: RDS_BARRIER 4 */
50607ca46eSDavid Howells #define RDS_RECVERR			5
51607ca46eSDavid Howells #define RDS_CONG_MONITOR		6
52607ca46eSDavid Howells #define RDS_GET_MR_FOR_DEST		7
53a28c257cSSowmini Varadhan #define SO_RDS_TRANSPORT		8
54a28c257cSSowmini Varadhan 
55*3289025aSSantosh Shilimkar /* Socket option to tap receive path latency
56*3289025aSSantosh Shilimkar  *	SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
57*3289025aSSantosh Shilimkar  *	Format used struct rds_rx_trace_so
58*3289025aSSantosh Shilimkar  */
59*3289025aSSantosh Shilimkar #define SO_RDS_MSG_RXPATH_LATENCY	10
60*3289025aSSantosh Shilimkar 
61*3289025aSSantosh Shilimkar 
62a28c257cSSowmini Varadhan /* supported values for SO_RDS_TRANSPORT */
63a28c257cSSowmini Varadhan #define	RDS_TRANS_IB	0
64a28c257cSSowmini Varadhan #define	RDS_TRANS_IWARP	1
65a28c257cSSowmini Varadhan #define	RDS_TRANS_TCP	2
66a28c257cSSowmini Varadhan #define RDS_TRANS_COUNT	3
67a28c257cSSowmini Varadhan #define	RDS_TRANS_NONE	(~0)
68607ca46eSDavid Howells 
69607ca46eSDavid Howells /*
70607ca46eSDavid Howells  * Control message types for SOL_RDS.
71607ca46eSDavid Howells  *
72607ca46eSDavid Howells  * CMSG_RDMA_ARGS (sendmsg)
73607ca46eSDavid Howells  *	Request a RDMA transfer to/from the specified
74607ca46eSDavid Howells  *	memory ranges.
75607ca46eSDavid Howells  *	The cmsg_data is a struct rds_rdma_args.
76607ca46eSDavid Howells  * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg)
77607ca46eSDavid Howells  *	Kernel informs application about intended
78607ca46eSDavid Howells  *	source/destination of a RDMA transfer
79607ca46eSDavid Howells  * RDS_CMSG_RDMA_MAP (sendmsg)
80607ca46eSDavid Howells  *	Application asks kernel to map the given
81607ca46eSDavid Howells  *	memory range into a IB MR, and send the
82607ca46eSDavid Howells  *	R_Key along in an RDS extension header.
83607ca46eSDavid Howells  *	The cmsg_data is a struct rds_get_mr_args,
84607ca46eSDavid Howells  *	the same as for the GET_MR setsockopt.
85607ca46eSDavid Howells  * RDS_CMSG_RDMA_STATUS (recvmsg)
86607ca46eSDavid Howells  *	Returns the status of a completed RDMA operation.
87*3289025aSSantosh Shilimkar  * RDS_CMSG_RXPATH_LATENCY(recvmsg)
88*3289025aSSantosh Shilimkar  *	Returns rds message latencies in various stages of receive
89*3289025aSSantosh Shilimkar  *	path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
90*3289025aSSantosh Shilimkar  *	socket option. Legitimate points are defined in
91*3289025aSSantosh Shilimkar  *	enum rds_message_rxpath_latency. More points can be added in
92*3289025aSSantosh Shilimkar  *	future. CSMG format is struct rds_cmsg_rx_trace.
93607ca46eSDavid Howells  */
94607ca46eSDavid Howells #define RDS_CMSG_RDMA_ARGS		1
95607ca46eSDavid Howells #define RDS_CMSG_RDMA_DEST		2
96607ca46eSDavid Howells #define RDS_CMSG_RDMA_MAP		3
97607ca46eSDavid Howells #define RDS_CMSG_RDMA_STATUS		4
98607ca46eSDavid Howells #define RDS_CMSG_CONG_UPDATE		5
99607ca46eSDavid Howells #define RDS_CMSG_ATOMIC_FADD		6
100607ca46eSDavid Howells #define RDS_CMSG_ATOMIC_CSWP		7
101607ca46eSDavid Howells #define RDS_CMSG_MASKED_ATOMIC_FADD	8
102607ca46eSDavid Howells #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
103*3289025aSSantosh Shilimkar #define RDS_CMSG_RXPATH_LATENCY		11
104607ca46eSDavid Howells 
105607ca46eSDavid Howells #define RDS_INFO_FIRST			10000
106607ca46eSDavid Howells #define RDS_INFO_COUNTERS		10000
107607ca46eSDavid Howells #define RDS_INFO_CONNECTIONS		10001
108607ca46eSDavid Howells /* 10002 aka RDS_INFO_FLOWS is deprecated */
109607ca46eSDavid Howells #define RDS_INFO_SEND_MESSAGES		10003
110607ca46eSDavid Howells #define RDS_INFO_RETRANS_MESSAGES       10004
111607ca46eSDavid Howells #define RDS_INFO_RECV_MESSAGES          10005
112607ca46eSDavid Howells #define RDS_INFO_SOCKETS                10006
113607ca46eSDavid Howells #define RDS_INFO_TCP_SOCKETS            10007
114607ca46eSDavid Howells #define RDS_INFO_IB_CONNECTIONS		10008
115607ca46eSDavid Howells #define RDS_INFO_CONNECTION_STATS	10009
116607ca46eSDavid Howells #define RDS_INFO_IWARP_CONNECTIONS	10010
117607ca46eSDavid Howells #define RDS_INFO_LAST			10010
118607ca46eSDavid Howells 
119607ca46eSDavid Howells struct rds_info_counter {
120607ca46eSDavid Howells 	uint8_t	name[32];
121607ca46eSDavid Howells 	uint64_t	value;
122607ca46eSDavid Howells } __attribute__((packed));
123607ca46eSDavid Howells 
124607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
125607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
126607ca46eSDavid Howells #define RDS_INFO_CONNECTION_FLAG_CONNECTED	0x04
127607ca46eSDavid Howells 
128607ca46eSDavid Howells #define TRANSNAMSIZ	16
129607ca46eSDavid Howells 
130607ca46eSDavid Howells struct rds_info_connection {
131607ca46eSDavid Howells 	uint64_t	next_tx_seq;
132607ca46eSDavid Howells 	uint64_t	next_rx_seq;
133607ca46eSDavid Howells 	__be32		laddr;
134607ca46eSDavid Howells 	__be32		faddr;
135607ca46eSDavid Howells 	uint8_t	transport[TRANSNAMSIZ];		/* null term ascii */
136607ca46eSDavid Howells 	uint8_t	flags;
137607ca46eSDavid Howells } __attribute__((packed));
138607ca46eSDavid Howells 
139607ca46eSDavid Howells #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
140607ca46eSDavid Howells #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
141607ca46eSDavid Howells 
142607ca46eSDavid Howells struct rds_info_message {
143607ca46eSDavid Howells 	uint64_t	seq;
144607ca46eSDavid Howells 	uint32_t	len;
145607ca46eSDavid Howells 	__be32		laddr;
146607ca46eSDavid Howells 	__be32		faddr;
147607ca46eSDavid Howells 	__be16		lport;
148607ca46eSDavid Howells 	__be16		fport;
149607ca46eSDavid Howells 	uint8_t	flags;
150607ca46eSDavid Howells } __attribute__((packed));
151607ca46eSDavid Howells 
152607ca46eSDavid Howells struct rds_info_socket {
153607ca46eSDavid Howells 	uint32_t	sndbuf;
154607ca46eSDavid Howells 	__be32		bound_addr;
155607ca46eSDavid Howells 	__be32		connected_addr;
156607ca46eSDavid Howells 	__be16		bound_port;
157607ca46eSDavid Howells 	__be16		connected_port;
158607ca46eSDavid Howells 	uint32_t	rcvbuf;
159607ca46eSDavid Howells 	uint64_t	inum;
160607ca46eSDavid Howells } __attribute__((packed));
161607ca46eSDavid Howells 
162607ca46eSDavid Howells struct rds_info_tcp_socket {
163607ca46eSDavid Howells 	__be32          local_addr;
164607ca46eSDavid Howells 	__be16          local_port;
165607ca46eSDavid Howells 	__be32          peer_addr;
166607ca46eSDavid Howells 	__be16          peer_port;
167607ca46eSDavid Howells 	uint64_t       hdr_rem;
168607ca46eSDavid Howells 	uint64_t       data_rem;
169607ca46eSDavid Howells 	uint32_t       last_sent_nxt;
170607ca46eSDavid Howells 	uint32_t       last_expected_una;
171607ca46eSDavid Howells 	uint32_t       last_seen_una;
172607ca46eSDavid Howells } __attribute__((packed));
173607ca46eSDavid Howells 
174607ca46eSDavid Howells #define RDS_IB_GID_LEN	16
175607ca46eSDavid Howells struct rds_info_rdma_connection {
176607ca46eSDavid Howells 	__be32		src_addr;
177607ca46eSDavid Howells 	__be32		dst_addr;
178607ca46eSDavid Howells 	uint8_t		src_gid[RDS_IB_GID_LEN];
179607ca46eSDavid Howells 	uint8_t		dst_gid[RDS_IB_GID_LEN];
180607ca46eSDavid Howells 
181607ca46eSDavid Howells 	uint32_t	max_send_wr;
182607ca46eSDavid Howells 	uint32_t	max_recv_wr;
183607ca46eSDavid Howells 	uint32_t	max_send_sge;
184607ca46eSDavid Howells 	uint32_t	rdma_mr_max;
185607ca46eSDavid Howells 	uint32_t	rdma_mr_size;
186607ca46eSDavid Howells };
187607ca46eSDavid Howells 
188*3289025aSSantosh Shilimkar /* RDS message Receive Path Latency points */
189*3289025aSSantosh Shilimkar enum rds_message_rxpath_latency {
190*3289025aSSantosh Shilimkar 	RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
191*3289025aSSantosh Shilimkar 	RDS_MSG_RX_DGRAM_REASSEMBLE,
192*3289025aSSantosh Shilimkar 	RDS_MSG_RX_DGRAM_DELIVERED,
193*3289025aSSantosh Shilimkar 	RDS_MSG_RX_DGRAM_TRACE_MAX
194*3289025aSSantosh Shilimkar };
195*3289025aSSantosh Shilimkar 
196*3289025aSSantosh Shilimkar struct rds_rx_trace_so {
197*3289025aSSantosh Shilimkar 	u8 rx_traces;
198*3289025aSSantosh Shilimkar 	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
199*3289025aSSantosh Shilimkar };
200*3289025aSSantosh Shilimkar 
201*3289025aSSantosh Shilimkar struct rds_cmsg_rx_trace {
202*3289025aSSantosh Shilimkar 	u8 rx_traces;
203*3289025aSSantosh Shilimkar 	u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
204*3289025aSSantosh Shilimkar 	u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
205*3289025aSSantosh Shilimkar };
206*3289025aSSantosh Shilimkar 
207607ca46eSDavid Howells /*
208607ca46eSDavid Howells  * Congestion monitoring.
209607ca46eSDavid Howells  * Congestion control in RDS happens at the host connection
210607ca46eSDavid Howells  * level by exchanging a bitmap marking congested ports.
211607ca46eSDavid Howells  * By default, a process sleeping in poll() is always woken
212607ca46eSDavid Howells  * up when the congestion map is updated.
213607ca46eSDavid Howells  * With explicit monitoring, an application can have more
214607ca46eSDavid Howells  * fine-grained control.
215607ca46eSDavid Howells  * The application installs a 64bit mask value in the socket,
216607ca46eSDavid Howells  * where each bit corresponds to a group of ports.
217607ca46eSDavid Howells  * When a congestion update arrives, RDS checks the set of
218607ca46eSDavid Howells  * ports that are now uncongested against the list bit mask
219607ca46eSDavid Howells  * installed in the socket, and if they overlap, we queue a
220607ca46eSDavid Howells  * cong_notification on the socket.
221607ca46eSDavid Howells  *
222607ca46eSDavid Howells  * To install the congestion monitor bitmask, use RDS_CONG_MONITOR
223607ca46eSDavid Howells  * with the 64bit mask.
224607ca46eSDavid Howells  * Congestion updates are received via RDS_CMSG_CONG_UPDATE
225607ca46eSDavid Howells  * control messages.
226607ca46eSDavid Howells  *
227607ca46eSDavid Howells  * The correspondence between bits and ports is
228607ca46eSDavid Howells  *	1 << (portnum % 64)
229607ca46eSDavid Howells  */
230607ca46eSDavid Howells #define RDS_CONG_MONITOR_SIZE	64
231607ca46eSDavid Howells #define RDS_CONG_MONITOR_BIT(port)  (((unsigned int) port) % RDS_CONG_MONITOR_SIZE)
232607ca46eSDavid Howells #define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port))
233607ca46eSDavid Howells 
234607ca46eSDavid Howells /*
235607ca46eSDavid Howells  * RDMA related types
236607ca46eSDavid Howells  */
237607ca46eSDavid Howells 
238607ca46eSDavid Howells /*
239607ca46eSDavid Howells  * This encapsulates a remote memory location.
240607ca46eSDavid Howells  * In the current implementation, it contains the R_Key
241607ca46eSDavid Howells  * of the remote memory region, and the offset into it
242607ca46eSDavid Howells  * (so that the application does not have to worry about
243607ca46eSDavid Howells  * alignment).
244607ca46eSDavid Howells  */
245607ca46eSDavid Howells typedef uint64_t	rds_rdma_cookie_t;
246607ca46eSDavid Howells 
247607ca46eSDavid Howells struct rds_iovec {
248607ca46eSDavid Howells 	uint64_t	addr;
249607ca46eSDavid Howells 	uint64_t	bytes;
250607ca46eSDavid Howells };
251607ca46eSDavid Howells 
252607ca46eSDavid Howells struct rds_get_mr_args {
253607ca46eSDavid Howells 	struct rds_iovec vec;
254607ca46eSDavid Howells 	uint64_t	cookie_addr;
255607ca46eSDavid Howells 	uint64_t	flags;
256607ca46eSDavid Howells };
257607ca46eSDavid Howells 
258607ca46eSDavid Howells struct rds_get_mr_for_dest_args {
259607ca46eSDavid Howells 	struct sockaddr_storage	dest_addr;
260607ca46eSDavid Howells 	struct rds_iovec 	vec;
261607ca46eSDavid Howells 	uint64_t		cookie_addr;
262607ca46eSDavid Howells 	uint64_t		flags;
263607ca46eSDavid Howells };
264607ca46eSDavid Howells 
265607ca46eSDavid Howells struct rds_free_mr_args {
266607ca46eSDavid Howells 	rds_rdma_cookie_t cookie;
267607ca46eSDavid Howells 	uint64_t	flags;
268607ca46eSDavid Howells };
269607ca46eSDavid Howells 
270607ca46eSDavid Howells struct rds_rdma_args {
271607ca46eSDavid Howells 	rds_rdma_cookie_t cookie;
272607ca46eSDavid Howells 	struct rds_iovec remote_vec;
273607ca46eSDavid Howells 	uint64_t	local_vec_addr;
274607ca46eSDavid Howells 	uint64_t	nr_local;
275607ca46eSDavid Howells 	uint64_t	flags;
276607ca46eSDavid Howells 	uint64_t	user_token;
277607ca46eSDavid Howells };
278607ca46eSDavid Howells 
279607ca46eSDavid Howells struct rds_atomic_args {
280607ca46eSDavid Howells 	rds_rdma_cookie_t cookie;
281607ca46eSDavid Howells 	uint64_t 	local_addr;
282607ca46eSDavid Howells 	uint64_t 	remote_addr;
283607ca46eSDavid Howells 	union {
284607ca46eSDavid Howells 		struct {
285607ca46eSDavid Howells 			uint64_t	compare;
286607ca46eSDavid Howells 			uint64_t	swap;
287607ca46eSDavid Howells 		} cswp;
288607ca46eSDavid Howells 		struct {
289607ca46eSDavid Howells 			uint64_t	add;
290607ca46eSDavid Howells 		} fadd;
291607ca46eSDavid Howells 		struct {
292607ca46eSDavid Howells 			uint64_t	compare;
293607ca46eSDavid Howells 			uint64_t	swap;
294607ca46eSDavid Howells 			uint64_t	compare_mask;
295607ca46eSDavid Howells 			uint64_t	swap_mask;
296607ca46eSDavid Howells 		} m_cswp;
297607ca46eSDavid Howells 		struct {
298607ca46eSDavid Howells 			uint64_t	add;
299607ca46eSDavid Howells 			uint64_t	nocarry_mask;
300607ca46eSDavid Howells 		} m_fadd;
301607ca46eSDavid Howells 	};
302607ca46eSDavid Howells 	uint64_t	flags;
303607ca46eSDavid Howells 	uint64_t	user_token;
304607ca46eSDavid Howells };
305607ca46eSDavid Howells 
306607ca46eSDavid Howells struct rds_rdma_notify {
307607ca46eSDavid Howells 	uint64_t	user_token;
308607ca46eSDavid Howells 	int32_t		status;
309607ca46eSDavid Howells };
310607ca46eSDavid Howells 
311607ca46eSDavid Howells #define RDS_RDMA_SUCCESS	0
312607ca46eSDavid Howells #define RDS_RDMA_REMOTE_ERROR	1
313607ca46eSDavid Howells #define RDS_RDMA_CANCELED	2
314607ca46eSDavid Howells #define RDS_RDMA_DROPPED	3
315607ca46eSDavid Howells #define RDS_RDMA_OTHER_ERROR	4
316607ca46eSDavid Howells 
317607ca46eSDavid Howells /*
318607ca46eSDavid Howells  * Common set of flags for all RDMA related structs
319607ca46eSDavid Howells  */
320607ca46eSDavid Howells #define RDS_RDMA_READWRITE	0x0001
321607ca46eSDavid Howells #define RDS_RDMA_FENCE		0x0002	/* use FENCE for immediate send */
322607ca46eSDavid Howells #define RDS_RDMA_INVALIDATE	0x0004	/* invalidate R_Key after freeing MR */
323607ca46eSDavid Howells #define RDS_RDMA_USE_ONCE	0x0008	/* free MR after use */
324607ca46eSDavid Howells #define RDS_RDMA_DONTWAIT	0x0010	/* Don't wait in SET_BARRIER */
325607ca46eSDavid Howells #define RDS_RDMA_NOTIFY_ME	0x0020	/* Notify when operation completes */
326607ca46eSDavid Howells #define RDS_RDMA_SILENT		0x0040	/* Do not interrupt remote */
327607ca46eSDavid Howells 
328607ca46eSDavid Howells #endif /* IB_RDS_H */
329