1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * RDMA Transport Layer 4 * 5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. 6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. 7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. 8 */ 9 10 #ifndef RTRS_PRI_H 11 #define RTRS_PRI_H 12 13 #include <linux/uuid.h> 14 #include <rdma/rdma_cm.h> 15 #include <rdma/ib_verbs.h> 16 #include <rdma/ib.h> 17 18 #include "rtrs.h" 19 20 #define RTRS_PROTO_VER_MAJOR 2 21 #define RTRS_PROTO_VER_MINOR 0 22 23 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ 24 __stringify(RTRS_PROTO_VER_MINOR) 25 26 /* 27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) 28 * and the minimum chunk size is 4096 (2^12). 29 * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory 30 * since queue_depth in rtrs_msg_conn_rsp is defined as le16. 31 * Therefore the pratical max value of sess_queue_depth is 32 * somewhere between 1 and 65535 and it depends on the system. 33 */ 34 #define MAX_SESS_QUEUE_DEPTH 65535 35 36 enum rtrs_imm_const { 37 MAX_IMM_TYPE_BITS = 4, 38 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), 39 MAX_IMM_PAYL_BITS = 28, 40 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1), 41 }; 42 43 enum rtrs_imm_type { 44 RTRS_IO_REQ_IMM = 0, /* client to server */ 45 RTRS_IO_RSP_IMM = 1, /* server to client */ 46 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */ 47 48 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */ 49 RTRS_HB_ACK_IMM = 9, 50 51 RTRS_LAST_IMM, 52 }; 53 54 enum { 55 SERVICE_CON_QUEUE_DEPTH = 512, 56 57 MAX_PATHS_NUM = 128, 58 59 MIN_CHUNK_SIZE = 8192, 60 61 RTRS_HB_INTERVAL_MS = 5000, 62 RTRS_HB_MISSED_MAX = 5, 63 64 RTRS_MAGIC = 0x1BBD, 65 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR, 66 }; 67 68 struct rtrs_ib_dev; 69 70 struct rtrs_rdma_dev_pd_ops { 71 int (*init)(struct rtrs_ib_dev *dev); 72 void (*deinit)(struct rtrs_ib_dev *dev); 73 }; 74 75 struct rtrs_rdma_dev_pd { 76 struct mutex mutex; 77 struct list_head list; 78 enum ib_pd_flags pd_flags; 79 const struct rtrs_rdma_dev_pd_ops *ops; 80 }; 81 82 struct rtrs_ib_dev { 83 struct ib_device *ib_dev; 84 struct ib_pd *ib_pd; 85 struct kref ref; 86 struct list_head entry; 87 struct rtrs_rdma_dev_pd *pool; 88 struct ib_event_handler event_handler; 89 }; 90 91 struct rtrs_con { 92 struct rtrs_path *path; 93 struct ib_qp *qp; 94 struct ib_cq *cq; 95 struct rdma_cm_id *cm_id; 96 unsigned int cid; 97 int nr_cqe; 98 atomic_t wr_cnt; 99 atomic_t sq_wr_avail; 100 }; 101 102 struct rtrs_path { 103 struct list_head entry; 104 struct sockaddr_storage dst_addr; 105 struct sockaddr_storage src_addr; 106 char sessname[NAME_MAX]; 107 uuid_t uuid; 108 struct rtrs_con **con; 109 unsigned int con_num; 110 unsigned int irq_con_num; 111 unsigned int recon_cnt; 112 unsigned int signal_interval; 113 struct rtrs_ib_dev *dev; 114 int dev_ref; 115 struct ib_cqe *hb_cqe; 116 void (*hb_err_handler)(struct rtrs_con *con); 117 struct workqueue_struct *hb_wq; 118 struct delayed_work hb_dwork; 119 unsigned int hb_interval_ms; 120 unsigned int hb_missed_cnt; 121 unsigned int hb_missed_max; 122 ktime_t hb_last_sent; 123 ktime_t hb_cur_latency; 124 }; 125 126 /* rtrs information unit */ 127 struct rtrs_iu { 128 struct ib_cqe cqe; 129 dma_addr_t dma_addr; 130 void *buf; 131 size_t size; 132 enum dma_data_direction direction; 133 }; 134 135 /** 136 * enum rtrs_msg_types - RTRS message types, see also rtrs/README 137 * @RTRS_MSG_INFO_REQ: Client additional info request to the server 138 * @RTRS_MSG_INFO_RSP: Server additional info response to the client 139 * @RTRS_MSG_WRITE: Client writes data per RDMA to server 140 * @RTRS_MSG_READ: Client requests data transfer from server 141 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf 142 */ 143 enum rtrs_msg_types { 144 RTRS_MSG_INFO_REQ, 145 RTRS_MSG_INFO_RSP, 146 RTRS_MSG_WRITE, 147 RTRS_MSG_READ, 148 RTRS_MSG_RKEY_RSP, 149 }; 150 151 /** 152 * enum rtrs_msg_flags - RTRS message flags. 153 * @RTRS_MSG_NEED_INVAL_F: Send invalidation in response. 154 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response. 155 */ 156 enum rtrs_msg_flags { 157 RTRS_MSG_NEED_INVAL_F = 1 << 0, 158 RTRS_MSG_NEW_RKEY_F = 1 << 1, 159 }; 160 161 /** 162 * struct rtrs_sg_desc - RDMA-Buffer entry description 163 * @addr: Address of RDMA destination buffer 164 * @key: Authorization rkey to write to the buffer 165 * @len: Size of the buffer 166 */ 167 struct rtrs_sg_desc { 168 __le64 addr; 169 __le32 key; 170 __le32 len; 171 }; 172 173 /** 174 * struct rtrs_msg_conn_req - Client connection request to the server 175 * @magic: RTRS magic 176 * @version: RTRS protocol version 177 * @cid: Current connection id 178 * @cid_num: Number of connections per session 179 * @recon_cnt: Reconnections counter 180 * @sess_uuid: UUID of a session (path) 181 * @paths_uuid: UUID of a group of sessions (paths) 182 * @first_conn: %1 if the connection request is the first for that session, 183 * otherwise %0 184 * NOTE: max size 56 bytes, see man rdma_connect(). 185 */ 186 struct rtrs_msg_conn_req { 187 /** 188 * @__cma_version: Is set to 0 by cma.c in case of AF_IB, do not touch 189 * that. See https://www.spinics.net/lists/linux-rdma/msg22397.html 190 */ 191 u8 __cma_version; 192 /** 193 * @__ip_version: On sender side that should be set to 0, or 194 * cma_save_ip_info() extract garbage and will fail. 195 */ 196 u8 __ip_version; 197 __le16 magic; 198 __le16 version; 199 __le16 cid; 200 __le16 cid_num; 201 __le16 recon_cnt; 202 uuid_t sess_uuid; 203 uuid_t paths_uuid; 204 u8 first_conn : 1; 205 /* private: */ 206 u8 reserved_bits : 7; 207 u8 reserved[11]; 208 }; 209 210 /** 211 * struct rtrs_msg_conn_rsp - Server connection response to the client 212 * @magic: RTRS magic 213 * @version: RTRS protocol version 214 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error 215 * @queue_depth: max inflight messages (queue-depth) in this session 216 * @max_io_size: max io size server supports 217 * @max_hdr_size: max msg header size server supports 218 * @flags: RTRS message flags for this message 219 * 220 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept(). 221 */ 222 struct rtrs_msg_conn_rsp { 223 __le16 magic; 224 __le16 version; 225 __le16 errno; 226 __le16 queue_depth; 227 __le32 max_io_size; 228 __le32 max_hdr_size; 229 __le32 flags; 230 /* private: */ 231 u8 reserved[36]; 232 }; 233 234 /** 235 * struct rtrs_msg_info_req - client additional info request 236 * @type: @RTRS_MSG_INFO_REQ 237 * @pathname: Path name chosen by client 238 */ 239 struct rtrs_msg_info_req { 240 __le16 type; 241 u8 pathname[NAME_MAX]; 242 /* private: */ 243 u8 reserved[15]; 244 }; 245 246 /** 247 * struct rtrs_msg_info_rsp - server additional info response 248 * @type: @RTRS_MSG_INFO_RSP 249 * @sg_cnt: Number of @desc entries 250 * @desc: RDMA buffers where the client can write to server 251 */ 252 struct rtrs_msg_info_rsp { 253 __le16 type; 254 __le16 sg_cnt; 255 /* private: */ 256 u8 reserved[4]; 257 /* public: */ 258 struct rtrs_sg_desc desc[]; 259 }; 260 261 /** 262 * struct rtrs_msg_rkey_rsp - server refreshed rkey response 263 * @type: @RTRS_MSG_RKEY_RSP 264 * @buf_id: RDMA buf_id of the new rkey 265 * @rkey: new remote key for RDMA buffers id from server 266 */ 267 struct rtrs_msg_rkey_rsp { 268 __le16 type; 269 __le16 buf_id; 270 __le32 rkey; 271 }; 272 273 /** 274 * struct rtrs_msg_rdma_read - RDMA data transfer request from client 275 * @type: always @RTRS_MSG_READ 276 * @flags: RTRS message flags (enum rtrs_msg_flags) 277 * @usr_len: length of user payload 278 * @sg_cnt: number of @desc entries 279 * @desc: RDMA buffers where the server can write the result to 280 */ 281 struct rtrs_msg_rdma_read { 282 __le16 type; 283 __le16 usr_len; 284 __le16 flags; 285 __le16 sg_cnt; 286 struct rtrs_sg_desc desc[]; 287 }; 288 289 /** 290 * struct rtrs_msg_rdma_write - Message transferred to server with RDMA-Write 291 * @type: always @RTRS_MSG_WRITE 292 * @usr_len: length of user payload 293 */ 294 struct rtrs_msg_rdma_write { 295 __le16 type; 296 __le16 usr_len; 297 }; 298 299 /** 300 * struct rtrs_msg_rdma_hdr - header for read or write request 301 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ 302 */ 303 struct rtrs_msg_rdma_hdr { 304 __le16 type; 305 }; 306 307 /* rtrs.c */ 308 309 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t, 310 struct ib_device *dev, enum dma_data_direction, 311 void (*done)(struct ib_cq *cq, struct ib_wc *wc)); 312 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num); 313 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); 314 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, 315 struct ib_send_wr *head); 316 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, 317 struct ib_sge *sge, unsigned int num_sge, 318 u32 rkey, u64 rdma_addr, u32 imm_data, 319 enum ib_send_flags flags, 320 struct ib_send_wr *head, 321 struct ib_send_wr *tail); 322 323 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); 324 325 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con, 326 u32 max_send_sge, int cq_vector, int nr_cqe, 327 u32 max_send_wr, u32 max_recv_wr, 328 enum ib_poll_context poll_ctx); 329 void rtrs_cq_qp_destroy(struct rtrs_con *con); 330 331 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe, 332 unsigned int interval_ms, unsigned int missed_max, 333 void (*err_handler)(struct rtrs_con *con), 334 struct workqueue_struct *wq); 335 void rtrs_start_hb(struct rtrs_path *path); 336 void rtrs_stop_hb(struct rtrs_path *path); 337 void rtrs_send_hb_ack(struct rtrs_path *path); 338 339 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, 340 struct rtrs_rdma_dev_pd *pool); 341 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool); 342 343 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, 344 struct rtrs_rdma_dev_pd *pool); 345 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev); 346 347 static inline u32 rtrs_to_imm(u32 type, u32 payload) 348 { 349 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32); 350 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS)); 351 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) | 352 (payload & MAX_IMM_PAYL_MASK); 353 } 354 355 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload) 356 { 357 *payload = imm & MAX_IMM_PAYL_MASK; 358 *type = imm >> MAX_IMM_PAYL_BITS; 359 } 360 361 static inline u32 rtrs_to_io_req_imm(u32 addr) 362 { 363 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr); 364 } 365 366 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval) 367 { 368 enum rtrs_imm_type type; 369 u32 payload; 370 371 /* 9 bits for errno, 19 bits for msg_id */ 372 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff); 373 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM; 374 375 return rtrs_to_imm(type, payload); 376 } 377 378 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno) 379 { 380 /* 9 bits for errno, 19 bits for msg_id */ 381 *msg_id = payload & 0x7ffff; 382 *errno = -(int)((payload >> 19) & 0x1ff); 383 } 384 385 #define STAT_STORE_FUNC(type, set_value, reset) \ 386 static ssize_t set_value##_store(struct kobject *kobj, \ 387 struct kobj_attribute *attr, \ 388 const char *buf, size_t count) \ 389 { \ 390 int ret = -EINVAL; \ 391 type *stats = container_of(kobj, type, kobj_stats); \ 392 \ 393 if (sysfs_streq(buf, "1")) \ 394 ret = reset(stats, true); \ 395 else if (sysfs_streq(buf, "0")) \ 396 ret = reset(stats, false); \ 397 if (ret) \ 398 return ret; \ 399 \ 400 return count; \ 401 } 402 403 #define STAT_SHOW_FUNC(type, get_value, print) \ 404 static ssize_t get_value##_show(struct kobject *kobj, \ 405 struct kobj_attribute *attr, \ 406 char *page) \ 407 { \ 408 type *stats = container_of(kobj, type, kobj_stats); \ 409 \ 410 return print(stats, page); \ 411 } 412 413 #define STAT_ATTR(type, stat, print, reset) \ 414 STAT_STORE_FUNC(type, stat, reset) \ 415 STAT_SHOW_FUNC(type, stat, print) \ 416 static struct kobj_attribute stat##_attr = __ATTR_RW(stat) 417 418 #endif /* RTRS_PRI_H */ 419