xref: /linux/drivers/infiniband/ulp/rtrs/rtrs-pri.h (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * RDMA Transport Layer
4  *
5  * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6  * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7  * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
8  */
9 
10 #ifndef RTRS_PRI_H
11 #define RTRS_PRI_H
12 
13 #include <linux/uuid.h>
14 #include <rdma/rdma_cm.h>
15 #include <rdma/ib_verbs.h>
16 #include <rdma/ib.h>
17 
18 #include "rtrs.h"
19 
20 #define RTRS_PROTO_VER_MAJOR 2
21 #define RTRS_PROTO_VER_MINOR 0
22 
23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24 			       __stringify(RTRS_PROTO_VER_MINOR)
25 
26 /*
27  * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
28  * and the minimum chunk size is 4096 (2^12).
29  * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
30  * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
31  * Therefore the pratical max value of sess_queue_depth is
32  * somewhere between 1 and 65535 and it depends on the system.
33  */
34 #define MAX_SESS_QUEUE_DEPTH 65535
35 
36 enum rtrs_imm_const {
37 	MAX_IMM_TYPE_BITS = 4,
38 	MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
39 	MAX_IMM_PAYL_BITS = 28,
40 	MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
41 };
42 
43 enum rtrs_imm_type {
44 	RTRS_IO_REQ_IMM       = 0, /* client to server */
45 	RTRS_IO_RSP_IMM       = 1, /* server to client */
46 	RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
47 
48 	RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
49 	RTRS_HB_ACK_IMM = 9,
50 
51 	RTRS_LAST_IMM,
52 };
53 
54 enum {
55 	SERVICE_CON_QUEUE_DEPTH = 512,
56 
57 	MAX_PATHS_NUM = 128,
58 
59 	MIN_CHUNK_SIZE = 8192,
60 
61 	RTRS_HB_INTERVAL_MS = 5000,
62 	RTRS_HB_MISSED_MAX = 5,
63 
64 	RTRS_MAGIC = 0x1BBD,
65 	RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
66 };
67 
68 struct rtrs_ib_dev;
69 
70 struct rtrs_rdma_dev_pd_ops {
71 	int (*init)(struct rtrs_ib_dev *dev);
72 	void (*deinit)(struct rtrs_ib_dev *dev);
73 };
74 
75 struct rtrs_rdma_dev_pd {
76 	struct mutex		mutex;
77 	struct list_head	list;
78 	enum ib_pd_flags	pd_flags;
79 	const struct rtrs_rdma_dev_pd_ops *ops;
80 };
81 
82 struct rtrs_ib_dev {
83 	struct ib_device	 *ib_dev;
84 	struct ib_pd		 *ib_pd;
85 	struct kref		 ref;
86 	struct list_head	 entry;
87 	struct rtrs_rdma_dev_pd *pool;
88 	struct ib_event_handler	 event_handler;
89 };
90 
91 struct rtrs_con {
92 	struct rtrs_path	*path;
93 	struct ib_qp		*qp;
94 	struct ib_cq		*cq;
95 	struct rdma_cm_id	*cm_id;
96 	unsigned int		cid;
97 	int                     nr_cqe;
98 	atomic_t		wr_cnt;
99 	atomic_t		sq_wr_avail;
100 };
101 
102 struct rtrs_path {
103 	struct list_head	entry;
104 	struct sockaddr_storage dst_addr;
105 	struct sockaddr_storage src_addr;
106 	char			sessname[NAME_MAX];
107 	uuid_t			uuid;
108 	struct rtrs_con	**con;
109 	unsigned int		con_num;
110 	unsigned int		irq_con_num;
111 	unsigned int		recon_cnt;
112 	unsigned int		signal_interval;
113 	struct rtrs_ib_dev	*dev;
114 	int			dev_ref;
115 	struct ib_cqe		*hb_cqe;
116 	void			(*hb_err_handler)(struct rtrs_con *con);
117 	struct workqueue_struct *hb_wq;
118 	struct delayed_work	hb_dwork;
119 	unsigned int		hb_interval_ms;
120 	unsigned int		hb_missed_cnt;
121 	unsigned int		hb_missed_max;
122 	ktime_t			hb_last_sent;
123 	ktime_t			hb_cur_latency;
124 };
125 
126 /* rtrs information unit */
127 struct rtrs_iu {
128 	struct ib_cqe           cqe;
129 	dma_addr_t              dma_addr;
130 	void                    *buf;
131 	size_t                  size;
132 	enum dma_data_direction direction;
133 };
134 
135 /**
136  * enum rtrs_msg_types - RTRS message types, see also rtrs/README
137  * @RTRS_MSG_INFO_REQ:		Client additional info request to the server
138  * @RTRS_MSG_INFO_RSP:		Server additional info response to the client
139  * @RTRS_MSG_WRITE:		Client writes data per RDMA to server
140  * @RTRS_MSG_READ:		Client requests data transfer from server
141  * @RTRS_MSG_RKEY_RSP:		Server refreshed rkey for rbuf
142  */
143 enum rtrs_msg_types {
144 	RTRS_MSG_INFO_REQ,
145 	RTRS_MSG_INFO_RSP,
146 	RTRS_MSG_WRITE,
147 	RTRS_MSG_READ,
148 	RTRS_MSG_RKEY_RSP,
149 };
150 
151 /**
152  * enum rtrs_msg_flags - RTRS message flags.
153  * @RTRS_NEED_INVAL:	Send invalidation in response.
154  * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
155  */
156 enum rtrs_msg_flags {
157 	RTRS_MSG_NEED_INVAL_F = 1 << 0,
158 	RTRS_MSG_NEW_RKEY_F = 1 << 1,
159 };
160 
161 /**
162  * struct rtrs_sg_desc - RDMA-Buffer entry description
163  * @addr:	Address of RDMA destination buffer
164  * @key:	Authorization rkey to write to the buffer
165  * @len:	Size of the buffer
166  */
167 struct rtrs_sg_desc {
168 	__le64			addr;
169 	__le32			key;
170 	__le32			len;
171 };
172 
173 /**
174  * struct rtrs_msg_conn_req - Client connection request to the server
175  * @magic:	   RTRS magic
176  * @version:	   RTRS protocol version
177  * @cid:	   Current connection id
178  * @cid_num:	   Number of connections per session
179  * @recon_cnt:	   Reconnections counter
180  * @sess_uuid:	   UUID of a session (path)
181  * @paths_uuid:	   UUID of a group of sessions (paths)
182  *
183  * NOTE: max size 56 bytes, see man rdma_connect().
184  */
185 struct rtrs_msg_conn_req {
186 	/* Is set to 0 by cma.c in case of AF_IB, do not touch that.
187 	 * see https://www.spinics.net/lists/linux-rdma/msg22397.html
188 	 */
189 	u8		__cma_version;
190 	/* On sender side that should be set to 0, or cma_save_ip_info()
191 	 * extract garbage and will fail.
192 	 */
193 	u8		__ip_version;
194 	__le16		magic;
195 	__le16		version;
196 	__le16		cid;
197 	__le16		cid_num;
198 	__le16		recon_cnt;
199 	uuid_t		sess_uuid;
200 	uuid_t		paths_uuid;
201 	u8		first_conn : 1;
202 	u8		reserved_bits : 7;
203 	u8		reserved[11];
204 };
205 
206 /**
207  * struct rtrs_msg_conn_rsp - Server connection response to the client
208  * @magic:	   RTRS magic
209  * @version:	   RTRS protocol version
210  * @errno:	   If rdma_accept() then 0, if rdma_reject() indicates error
211  * @queue_depth:   max inflight messages (queue-depth) in this session
212  * @max_io_size:   max io size server supports
213  * @max_hdr_size:  max msg header size server supports
214  *
215  * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
216  */
217 struct rtrs_msg_conn_rsp {
218 	__le16		magic;
219 	__le16		version;
220 	__le16		errno;
221 	__le16		queue_depth;
222 	__le32		max_io_size;
223 	__le32		max_hdr_size;
224 	__le32		flags;
225 	u8		reserved[36];
226 };
227 
228 /**
229  * struct rtrs_msg_info_req
230  * @type:		@RTRS_MSG_INFO_REQ
231  * @pathname:		Path name chosen by client
232  */
233 struct rtrs_msg_info_req {
234 	__le16		type;
235 	u8		pathname[NAME_MAX];
236 	u8		reserved[15];
237 };
238 
239 /**
240  * struct rtrs_msg_info_rsp
241  * @type:		@RTRS_MSG_INFO_RSP
242  * @sg_cnt:		Number of @desc entries
243  * @desc:		RDMA buffers where the client can write to server
244  */
245 struct rtrs_msg_info_rsp {
246 	__le16		type;
247 	__le16          sg_cnt;
248 	u8              reserved[4];
249 	struct rtrs_sg_desc desc[];
250 };
251 
252 /**
253  * struct rtrs_msg_rkey_rsp
254  * @type:		@RTRS_MSG_RKEY_RSP
255  * @buf_id:		RDMA buf_id of the new rkey
256  * @rkey:		new remote key for RDMA buffers id from server
257  */
258 struct rtrs_msg_rkey_rsp {
259 	__le16		type;
260 	__le16          buf_id;
261 	__le32		rkey;
262 };
263 
264 /**
265  * struct rtrs_msg_rdma_read - RDMA data transfer request from client
266  * @type:		always @RTRS_MSG_READ
267  * @usr_len:		length of user payload
268  * @sg_cnt:		number of @desc entries
269  * @desc:		RDMA buffers where the server can write the result to
270  */
271 struct rtrs_msg_rdma_read {
272 	__le16			type;
273 	__le16			usr_len;
274 	__le16			flags;
275 	__le16			sg_cnt;
276 	struct rtrs_sg_desc    desc[];
277 };
278 
279 /**
280  * struct_msg_rdma_write - Message transferred to server with RDMA-Write
281  * @type:		always @RTRS_MSG_WRITE
282  * @usr_len:		length of user payload
283  */
284 struct rtrs_msg_rdma_write {
285 	__le16			type;
286 	__le16			usr_len;
287 };
288 
289 /**
290  * struct_msg_rdma_hdr - header for read or write request
291  * @type:		@RTRS_MSG_WRITE | @RTRS_MSG_READ
292  */
293 struct rtrs_msg_rdma_hdr {
294 	__le16			type;
295 };
296 
297 /* rtrs.c */
298 
299 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
300 			      struct ib_device *dev, enum dma_data_direction,
301 			      void (*done)(struct ib_cq *cq, struct ib_wc *wc));
302 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
303 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
304 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
305 		      struct ib_send_wr *head);
306 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
307 				struct ib_sge *sge, unsigned int num_sge,
308 				u32 rkey, u64 rdma_addr, u32 imm_data,
309 				enum ib_send_flags flags,
310 				struct ib_send_wr *head,
311 				struct ib_send_wr *tail);
312 
313 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
314 
315 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
316 		      u32 max_send_sge, int cq_vector, int nr_cqe,
317 		      u32 max_send_wr, u32 max_recv_wr,
318 		      enum ib_poll_context poll_ctx);
319 void rtrs_cq_qp_destroy(struct rtrs_con *con);
320 
321 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
322 		  unsigned int interval_ms, unsigned int missed_max,
323 		  void (*err_handler)(struct rtrs_con *con),
324 		  struct workqueue_struct *wq);
325 void rtrs_start_hb(struct rtrs_path *path);
326 void rtrs_stop_hb(struct rtrs_path *path);
327 void rtrs_send_hb_ack(struct rtrs_path *path);
328 
329 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
330 			   struct rtrs_rdma_dev_pd *pool);
331 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
332 
333 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
334 					    struct rtrs_rdma_dev_pd *pool);
335 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
336 
337 static inline u32 rtrs_to_imm(u32 type, u32 payload)
338 {
339 	BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
340 	BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
341 	return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
342 		(payload & MAX_IMM_PAYL_MASK);
343 }
344 
345 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
346 {
347 	*payload = imm & MAX_IMM_PAYL_MASK;
348 	*type = imm >> MAX_IMM_PAYL_BITS;
349 }
350 
351 static inline u32 rtrs_to_io_req_imm(u32 addr)
352 {
353 	return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
354 }
355 
356 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
357 {
358 	enum rtrs_imm_type type;
359 	u32 payload;
360 
361 	/* 9 bits for errno, 19 bits for msg_id */
362 	payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
363 	type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
364 
365 	return rtrs_to_imm(type, payload);
366 }
367 
368 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
369 {
370 	/* 9 bits for errno, 19 bits for msg_id */
371 	*msg_id = payload & 0x7ffff;
372 	*errno = -(int)((payload >> 19) & 0x1ff);
373 }
374 
375 #define STAT_STORE_FUNC(type, set_value, reset)				\
376 static ssize_t set_value##_store(struct kobject *kobj,			\
377 			     struct kobj_attribute *attr,		\
378 			     const char *buf, size_t count)		\
379 {									\
380 	int ret = -EINVAL;						\
381 	type *stats = container_of(kobj, type, kobj_stats);		\
382 									\
383 	if (sysfs_streq(buf, "1"))					\
384 		ret = reset(stats, true);			\
385 	else if (sysfs_streq(buf, "0"))					\
386 		ret = reset(stats, false);			\
387 	if (ret)							\
388 		return ret;						\
389 									\
390 	return count;							\
391 }
392 
393 #define STAT_SHOW_FUNC(type, get_value, print)				\
394 static ssize_t get_value##_show(struct kobject *kobj,			\
395 			   struct kobj_attribute *attr,			\
396 			   char *page)					\
397 {									\
398 	type *stats = container_of(kobj, type, kobj_stats);		\
399 									\
400 	return print(stats, page);			\
401 }
402 
403 #define STAT_ATTR(type, stat, print, reset)				\
404 STAT_STORE_FUNC(type, stat, reset)					\
405 STAT_SHOW_FUNC(type, stat, print)					\
406 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
407 
408 #endif /* RTRS_PRI_H */
409