1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * RDMA Transport Layer
4 *
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
8 */
9
10 #ifndef RTRS_PRI_H
11 #define RTRS_PRI_H
12
13 #include <linux/uuid.h>
14 #include <rdma/rdma_cm.h>
15 #include <rdma/ib_verbs.h>
16 #include <rdma/ib.h>
17
18 #include "rtrs.h"
19
20 #define RTRS_PROTO_VER_MAJOR 2
21 #define RTRS_PROTO_VER_MINOR 0
22
23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24 __stringify(RTRS_PROTO_VER_MINOR)
25
26 /*
27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
28 * and the minimum chunk size is 4096 (2^12).
29 * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory
30 * since queue_depth in rtrs_msg_conn_rsp is defined as le16.
31 * Therefore the pratical max value of sess_queue_depth is
32 * somewhere between 1 and 65535 and it depends on the system.
33 */
34 #define MAX_SESS_QUEUE_DEPTH 65535
35
36 enum rtrs_imm_const {
37 MAX_IMM_TYPE_BITS = 4,
38 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
39 MAX_IMM_PAYL_BITS = 28,
40 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
41 };
42
43 enum rtrs_imm_type {
44 RTRS_IO_REQ_IMM = 0, /* client to server */
45 RTRS_IO_RSP_IMM = 1, /* server to client */
46 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
47
48 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
49 RTRS_HB_ACK_IMM = 9,
50
51 RTRS_LAST_IMM,
52 };
53
54 enum {
55 SERVICE_CON_QUEUE_DEPTH = 512,
56
57 MAX_PATHS_NUM = 128,
58
59 MIN_CHUNK_SIZE = 8192,
60
61 RTRS_HB_INTERVAL_MS = 5000,
62 RTRS_HB_MISSED_MAX = 5,
63
64 RTRS_MAGIC = 0x1BBD,
65 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
66 };
67
68 struct rtrs_ib_dev;
69
70 struct rtrs_rdma_dev_pd_ops {
71 int (*init)(struct rtrs_ib_dev *dev);
72 void (*deinit)(struct rtrs_ib_dev *dev);
73 };
74
75 struct rtrs_rdma_dev_pd {
76 struct mutex mutex;
77 struct list_head list;
78 enum ib_pd_flags pd_flags;
79 const struct rtrs_rdma_dev_pd_ops *ops;
80 };
81
82 struct rtrs_ib_dev {
83 struct ib_device *ib_dev;
84 struct ib_pd *ib_pd;
85 struct kref ref;
86 struct list_head entry;
87 struct rtrs_rdma_dev_pd *pool;
88 struct ib_event_handler event_handler;
89 };
90
91 struct rtrs_con {
92 struct rtrs_path *path;
93 struct ib_qp *qp;
94 struct ib_cq *cq;
95 struct rdma_cm_id *cm_id;
96 unsigned int cid;
97 int nr_cqe;
98 atomic_t wr_cnt;
99 atomic_t sq_wr_avail;
100 };
101
102 struct rtrs_path {
103 struct list_head entry;
104 struct sockaddr_storage dst_addr;
105 struct sockaddr_storage src_addr;
106 char sessname[NAME_MAX];
107 uuid_t uuid;
108 struct rtrs_con **con;
109 unsigned int con_num;
110 unsigned int irq_con_num;
111 unsigned int recon_cnt;
112 unsigned int signal_interval;
113 struct rtrs_ib_dev *dev;
114 int dev_ref;
115 struct ib_cqe *hb_cqe;
116 void (*hb_err_handler)(struct rtrs_con *con);
117 struct workqueue_struct *hb_wq;
118 struct delayed_work hb_dwork;
119 unsigned int hb_interval_ms;
120 unsigned int hb_missed_cnt;
121 unsigned int hb_missed_max;
122 ktime_t hb_last_sent;
123 ktime_t hb_cur_latency;
124 };
125
126 /* rtrs information unit */
127 struct rtrs_iu {
128 struct ib_cqe cqe;
129 dma_addr_t dma_addr;
130 void *buf;
131 size_t size;
132 enum dma_data_direction direction;
133 };
134
135 /**
136 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
137 * @RTRS_MSG_INFO_REQ: Client additional info request to the server
138 * @RTRS_MSG_INFO_RSP: Server additional info response to the client
139 * @RTRS_MSG_WRITE: Client writes data per RDMA to server
140 * @RTRS_MSG_READ: Client requests data transfer from server
141 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf
142 */
143 enum rtrs_msg_types {
144 RTRS_MSG_INFO_REQ,
145 RTRS_MSG_INFO_RSP,
146 RTRS_MSG_WRITE,
147 RTRS_MSG_READ,
148 RTRS_MSG_RKEY_RSP,
149 };
150
151 /**
152 * enum rtrs_msg_flags - RTRS message flags.
153 * @RTRS_MSG_NEED_INVAL_F: Send invalidation in response.
154 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
155 */
156 enum rtrs_msg_flags {
157 RTRS_MSG_NEED_INVAL_F = 1 << 0,
158 RTRS_MSG_NEW_RKEY_F = 1 << 1,
159 };
160
161 /**
162 * struct rtrs_sg_desc - RDMA-Buffer entry description
163 * @addr: Address of RDMA destination buffer
164 * @key: Authorization rkey to write to the buffer
165 * @len: Size of the buffer
166 */
167 struct rtrs_sg_desc {
168 __le64 addr;
169 __le32 key;
170 __le32 len;
171 };
172
173 /**
174 * struct rtrs_msg_conn_req - Client connection request to the server
175 * @magic: RTRS magic
176 * @version: RTRS protocol version
177 * @cid: Current connection id
178 * @cid_num: Number of connections per session
179 * @recon_cnt: Reconnections counter
180 * @sess_uuid: UUID of a session (path)
181 * @paths_uuid: UUID of a group of sessions (paths)
182 * @first_conn: %1 if the connection request is the first for that session,
183 * otherwise %0
184 * NOTE: max size 56 bytes, see man rdma_connect().
185 */
186 struct rtrs_msg_conn_req {
187 /**
188 * @__cma_version: Is set to 0 by cma.c in case of AF_IB, do not touch
189 * that. See https://www.spinics.net/lists/linux-rdma/msg22397.html
190 */
191 u8 __cma_version;
192 /**
193 * @__ip_version: On sender side that should be set to 0, or
194 * cma_save_ip_info() extract garbage and will fail.
195 */
196 u8 __ip_version;
197 __le16 magic;
198 __le16 version;
199 __le16 cid;
200 __le16 cid_num;
201 __le16 recon_cnt;
202 uuid_t sess_uuid;
203 uuid_t paths_uuid;
204 u8 first_conn : 1;
205 /* private: */
206 u8 reserved_bits : 7;
207 u8 reserved[11];
208 };
209
210 /**
211 * struct rtrs_msg_conn_rsp - Server connection response to the client
212 * @magic: RTRS magic
213 * @version: RTRS protocol version
214 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error
215 * @queue_depth: max inflight messages (queue-depth) in this session
216 * @max_io_size: max io size server supports
217 * @max_hdr_size: max msg header size server supports
218 * @flags: RTRS message flags for this message
219 *
220 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
221 */
222 struct rtrs_msg_conn_rsp {
223 __le16 magic;
224 __le16 version;
225 __le16 errno;
226 __le16 queue_depth;
227 __le32 max_io_size;
228 __le32 max_hdr_size;
229 __le32 flags;
230 /* private: */
231 u8 reserved[36];
232 };
233
234 /**
235 * struct rtrs_msg_info_req - client additional info request
236 * @type: @RTRS_MSG_INFO_REQ
237 * @pathname: Path name chosen by client
238 */
239 struct rtrs_msg_info_req {
240 __le16 type;
241 u8 pathname[NAME_MAX];
242 /* private: */
243 u8 reserved[15];
244 };
245
246 /**
247 * struct rtrs_msg_info_rsp - server additional info response
248 * @type: @RTRS_MSG_INFO_RSP
249 * @sg_cnt: Number of @desc entries
250 * @desc: RDMA buffers where the client can write to server
251 */
252 struct rtrs_msg_info_rsp {
253 __le16 type;
254 __le16 sg_cnt;
255 /* private: */
256 u8 reserved[4];
257 /* public: */
258 struct rtrs_sg_desc desc[];
259 };
260
261 /**
262 * struct rtrs_msg_rkey_rsp - server refreshed rkey response
263 * @type: @RTRS_MSG_RKEY_RSP
264 * @buf_id: RDMA buf_id of the new rkey
265 * @rkey: new remote key for RDMA buffers id from server
266 */
267 struct rtrs_msg_rkey_rsp {
268 __le16 type;
269 __le16 buf_id;
270 __le32 rkey;
271 };
272
273 /**
274 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
275 * @type: always @RTRS_MSG_READ
276 * @flags: RTRS message flags (enum rtrs_msg_flags)
277 * @usr_len: length of user payload
278 * @sg_cnt: number of @desc entries
279 * @desc: RDMA buffers where the server can write the result to
280 */
281 struct rtrs_msg_rdma_read {
282 __le16 type;
283 __le16 usr_len;
284 __le16 flags;
285 __le16 sg_cnt;
286 struct rtrs_sg_desc desc[];
287 };
288
289 /**
290 * struct rtrs_msg_rdma_write - Message transferred to server with RDMA-Write
291 * @type: always @RTRS_MSG_WRITE
292 * @usr_len: length of user payload
293 */
294 struct rtrs_msg_rdma_write {
295 __le16 type;
296 __le16 usr_len;
297 };
298
299 /**
300 * struct rtrs_msg_rdma_hdr - header for read or write request
301 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ
302 */
303 struct rtrs_msg_rdma_hdr {
304 __le16 type;
305 };
306
307 /* rtrs.c */
308
309 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
310 struct ib_device *dev, enum dma_data_direction,
311 void (*done)(struct ib_cq *cq, struct ib_wc *wc));
312 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
313 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
314 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
315 struct ib_send_wr *head);
316 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
317 struct ib_sge *sge, unsigned int num_sge,
318 u32 rkey, u64 rdma_addr, u32 imm_data,
319 enum ib_send_flags flags,
320 struct ib_send_wr *head,
321 struct ib_send_wr *tail);
322
323 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
324
325 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
326 u32 max_send_sge, int cq_vector, int nr_cqe,
327 u32 max_send_wr, u32 max_recv_wr,
328 enum ib_poll_context poll_ctx);
329 void rtrs_cq_qp_destroy(struct rtrs_con *con);
330
331 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
332 unsigned int interval_ms, unsigned int missed_max,
333 void (*err_handler)(struct rtrs_con *con),
334 struct workqueue_struct *wq);
335 void rtrs_start_hb(struct rtrs_path *path);
336 void rtrs_stop_hb(struct rtrs_path *path);
337 void rtrs_send_hb_ack(struct rtrs_path *path);
338
339 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
340 struct rtrs_rdma_dev_pd *pool);
341 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
342
343 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
344 struct rtrs_rdma_dev_pd *pool);
345 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
346
rtrs_to_imm(u32 type,u32 payload)347 static inline u32 rtrs_to_imm(u32 type, u32 payload)
348 {
349 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
350 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
351 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
352 (payload & MAX_IMM_PAYL_MASK);
353 }
354
rtrs_from_imm(u32 imm,u32 * type,u32 * payload)355 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
356 {
357 *payload = imm & MAX_IMM_PAYL_MASK;
358 *type = imm >> MAX_IMM_PAYL_BITS;
359 }
360
rtrs_to_io_req_imm(u32 addr)361 static inline u32 rtrs_to_io_req_imm(u32 addr)
362 {
363 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
364 }
365
rtrs_to_io_rsp_imm(u32 msg_id,int errno,bool w_inval)366 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
367 {
368 enum rtrs_imm_type type;
369 u32 payload;
370
371 /* 9 bits for errno, 19 bits for msg_id */
372 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
373 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
374
375 return rtrs_to_imm(type, payload);
376 }
377
rtrs_from_io_rsp_imm(u32 payload,u32 * msg_id,int * errno)378 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
379 {
380 /* 9 bits for errno, 19 bits for msg_id */
381 *msg_id = payload & 0x7ffff;
382 *errno = -(int)((payload >> 19) & 0x1ff);
383 }
384
385 #define STAT_STORE_FUNC(type, set_value, reset) \
386 static ssize_t set_value##_store(struct kobject *kobj, \
387 struct kobj_attribute *attr, \
388 const char *buf, size_t count) \
389 { \
390 int ret = -EINVAL; \
391 type *stats = container_of(kobj, type, kobj_stats); \
392 \
393 if (sysfs_streq(buf, "1")) \
394 ret = reset(stats, true); \
395 else if (sysfs_streq(buf, "0")) \
396 ret = reset(stats, false); \
397 if (ret) \
398 return ret; \
399 \
400 return count; \
401 }
402
403 #define STAT_SHOW_FUNC(type, get_value, print) \
404 static ssize_t get_value##_show(struct kobject *kobj, \
405 struct kobj_attribute *attr, \
406 char *page) \
407 { \
408 type *stats = container_of(kobj, type, kobj_stats); \
409 \
410 return print(stats, page); \
411 }
412
413 #define STAT_ATTR(type, stat, print, reset) \
414 STAT_STORE_FUNC(type, stat, reset) \
415 STAT_SHOW_FUNC(type, stat, print) \
416 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
417
418 #endif /* RTRS_PRI_H */
419