/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * RDMA Transport Layer * * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. */ #ifndef RTRS_PRI_H #define RTRS_PRI_H #include #include #include #include #include "rtrs.h" #define RTRS_PROTO_VER_MAJOR 2 #define RTRS_PROTO_VER_MINOR 0 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ __stringify(RTRS_PROTO_VER_MINOR) /* * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) * and the minimum chunk size is 4096 (2^12). * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory * since queue_depth in rtrs_msg_conn_rsp is defined as le16. * Therefore the pratical max value of sess_queue_depth is * somewhere between 1 and 65535 and it depends on the system. */ #define MAX_SESS_QUEUE_DEPTH 65535 enum rtrs_imm_const { MAX_IMM_TYPE_BITS = 4, MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), MAX_IMM_PAYL_BITS = 28, MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1), }; enum rtrs_imm_type { RTRS_IO_REQ_IMM = 0, /* client to server */ RTRS_IO_RSP_IMM = 1, /* server to client */ RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */ RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */ RTRS_HB_ACK_IMM = 9, RTRS_LAST_IMM, }; enum { SERVICE_CON_QUEUE_DEPTH = 512, MAX_PATHS_NUM = 128, MIN_CHUNK_SIZE = 8192, RTRS_HB_INTERVAL_MS = 5000, RTRS_HB_MISSED_MAX = 5, RTRS_MAGIC = 0x1BBD, RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR, }; struct rtrs_ib_dev; struct rtrs_rdma_dev_pd_ops { int (*init)(struct rtrs_ib_dev *dev); void (*deinit)(struct rtrs_ib_dev *dev); }; struct rtrs_rdma_dev_pd { struct mutex mutex; struct list_head list; enum ib_pd_flags pd_flags; const struct rtrs_rdma_dev_pd_ops *ops; }; struct rtrs_ib_dev { struct ib_device *ib_dev; struct ib_pd *ib_pd; struct kref ref; struct list_head entry; struct rtrs_rdma_dev_pd *pool; struct ib_event_handler event_handler; }; struct rtrs_con { struct rtrs_path *path; struct ib_qp *qp; struct ib_cq *cq; struct rdma_cm_id *cm_id; unsigned int cid; int nr_cqe; atomic_t wr_cnt; atomic_t sq_wr_avail; }; struct rtrs_path { struct list_head entry; struct sockaddr_storage dst_addr; struct sockaddr_storage src_addr; char sessname[NAME_MAX]; uuid_t uuid; struct rtrs_con **con; unsigned int con_num; unsigned int irq_con_num; unsigned int recon_cnt; unsigned int signal_interval; struct rtrs_ib_dev *dev; int dev_ref; struct ib_cqe *hb_cqe; void (*hb_err_handler)(struct rtrs_con *con); struct workqueue_struct *hb_wq; struct delayed_work hb_dwork; unsigned int hb_interval_ms; unsigned int hb_missed_cnt; unsigned int hb_missed_max; ktime_t hb_last_sent; ktime_t hb_cur_latency; }; /* rtrs information unit */ struct rtrs_iu { struct ib_cqe cqe; dma_addr_t dma_addr; void *buf; size_t size; enum dma_data_direction direction; }; /** * enum rtrs_msg_types - RTRS message types, see also rtrs/README * @RTRS_MSG_INFO_REQ: Client additional info request to the server * @RTRS_MSG_INFO_RSP: Server additional info response to the client * @RTRS_MSG_WRITE: Client writes data per RDMA to server * @RTRS_MSG_READ: Client requests data transfer from server * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf */ enum rtrs_msg_types { RTRS_MSG_INFO_REQ, RTRS_MSG_INFO_RSP, RTRS_MSG_WRITE, RTRS_MSG_READ, RTRS_MSG_RKEY_RSP, }; /** * enum rtrs_msg_flags - RTRS message flags. * @RTRS_NEED_INVAL: Send invalidation in response. * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response. */ enum rtrs_msg_flags { RTRS_MSG_NEED_INVAL_F = 1 << 0, RTRS_MSG_NEW_RKEY_F = 1 << 1, }; /** * struct rtrs_sg_desc - RDMA-Buffer entry description * @addr: Address of RDMA destination buffer * @key: Authorization rkey to write to the buffer * @len: Size of the buffer */ struct rtrs_sg_desc { __le64 addr; __le32 key; __le32 len; }; /** * struct rtrs_msg_conn_req - Client connection request to the server * @magic: RTRS magic * @version: RTRS protocol version * @cid: Current connection id * @cid_num: Number of connections per session * @recon_cnt: Reconnections counter * @sess_uuid: UUID of a session (path) * @paths_uuid: UUID of a group of sessions (paths) * * NOTE: max size 56 bytes, see man rdma_connect(). */ struct rtrs_msg_conn_req { /* Is set to 0 by cma.c in case of AF_IB, do not touch that. * see https://www.spinics.net/lists/linux-rdma/msg22397.html */ u8 __cma_version; /* On sender side that should be set to 0, or cma_save_ip_info() * extract garbage and will fail. */ u8 __ip_version; __le16 magic; __le16 version; __le16 cid; __le16 cid_num; __le16 recon_cnt; uuid_t sess_uuid; uuid_t paths_uuid; u8 first_conn : 1; u8 reserved_bits : 7; u8 reserved[11]; }; /** * struct rtrs_msg_conn_rsp - Server connection response to the client * @magic: RTRS magic * @version: RTRS protocol version * @errno: If rdma_accept() then 0, if rdma_reject() indicates error * @queue_depth: max inflight messages (queue-depth) in this session * @max_io_size: max io size server supports * @max_hdr_size: max msg header size server supports * * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept(). */ struct rtrs_msg_conn_rsp { __le16 magic; __le16 version; __le16 errno; __le16 queue_depth; __le32 max_io_size; __le32 max_hdr_size; __le32 flags; u8 reserved[36]; }; /** * struct rtrs_msg_info_req * @type: @RTRS_MSG_INFO_REQ * @pathname: Path name chosen by client */ struct rtrs_msg_info_req { __le16 type; u8 pathname[NAME_MAX]; u8 reserved[15]; }; /** * struct rtrs_msg_info_rsp * @type: @RTRS_MSG_INFO_RSP * @sg_cnt: Number of @desc entries * @desc: RDMA buffers where the client can write to server */ struct rtrs_msg_info_rsp { __le16 type; __le16 sg_cnt; u8 reserved[4]; struct rtrs_sg_desc desc[]; }; /** * struct rtrs_msg_rkey_rsp * @type: @RTRS_MSG_RKEY_RSP * @buf_id: RDMA buf_id of the new rkey * @rkey: new remote key for RDMA buffers id from server */ struct rtrs_msg_rkey_rsp { __le16 type; __le16 buf_id; __le32 rkey; }; /** * struct rtrs_msg_rdma_read - RDMA data transfer request from client * @type: always @RTRS_MSG_READ * @usr_len: length of user payload * @sg_cnt: number of @desc entries * @desc: RDMA buffers where the server can write the result to */ struct rtrs_msg_rdma_read { __le16 type; __le16 usr_len; __le16 flags; __le16 sg_cnt; struct rtrs_sg_desc desc[]; }; /** * struct_msg_rdma_write - Message transferred to server with RDMA-Write * @type: always @RTRS_MSG_WRITE * @usr_len: length of user payload */ struct rtrs_msg_rdma_write { __le16 type; __le16 usr_len; }; /** * struct_msg_rdma_hdr - header for read or write request * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ */ struct rtrs_msg_rdma_hdr { __le16 type; }; /* rtrs.c */ struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t, struct ib_device *dev, enum dma_data_direction, void (*done)(struct ib_cq *cq, struct ib_wc *wc)); void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head); int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, struct ib_sge *sge, unsigned int num_sge, u32 rkey, u64 rdma_addr, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head, struct ib_send_wr *tail); int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con, u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe, unsigned int interval_ms, unsigned int missed_max, void (*err_handler)(struct rtrs_con *con), struct workqueue_struct *wq); void rtrs_start_hb(struct rtrs_path *path); void rtrs_stop_hb(struct rtrs_path *path); void rtrs_send_hb_ack(struct rtrs_path *path); void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, struct rtrs_rdma_dev_pd *pool); void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool); struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, struct rtrs_rdma_dev_pd *pool); int rtrs_ib_dev_put(struct rtrs_ib_dev *dev); static inline u32 rtrs_to_imm(u32 type, u32 payload) { BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32); BUILD_BUG_ON(RTRS_LAST_IMM > (1<> MAX_IMM_PAYL_BITS; } static inline u32 rtrs_to_io_req_imm(u32 addr) { return rtrs_to_imm(RTRS_IO_REQ_IMM, addr); } static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval) { enum rtrs_imm_type type; u32 payload; /* 9 bits for errno, 19 bits for msg_id */ payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff); type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM; return rtrs_to_imm(type, payload); } static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno) { /* 9 bits for errno, 19 bits for msg_id */ *msg_id = payload & 0x7ffff; *errno = -(int)((payload >> 19) & 0x1ff); } #define STAT_STORE_FUNC(type, set_value, reset) \ static ssize_t set_value##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ const char *buf, size_t count) \ { \ int ret = -EINVAL; \ type *stats = container_of(kobj, type, kobj_stats); \ \ if (sysfs_streq(buf, "1")) \ ret = reset(stats, true); \ else if (sysfs_streq(buf, "0")) \ ret = reset(stats, false); \ if (ret) \ return ret; \ \ return count; \ } #define STAT_SHOW_FUNC(type, get_value, print) \ static ssize_t get_value##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ type *stats = container_of(kobj, type, kobj_stats); \ \ return print(stats, page); \ } #define STAT_ATTR(type, stat, print, reset) \ STAT_STORE_FUNC(type, stat, reset) \ STAT_SHOW_FUNC(type, stat, print) \ static struct kobj_attribute stat##_attr = __ATTR_RW(stat) #endif /* RTRS_PRI_H */