1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ 2 /* 3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the BSD-type 9 * license below: 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 18 * Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials provided 21 * with the distribution. 22 * 23 * Neither the name of the Network Appliance, Inc. nor the names of 24 * its contributors may be used to endorse or promote products 25 * derived from this software without specific prior written 26 * permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 * 40 * Author: Tom Tucker <tom@opengridcomputing.com> 41 */ 42 43 #ifndef SVC_RDMA_H 44 #define SVC_RDMA_H 45 #include <linux/llist.h> 46 #include <linux/sunrpc/xdr.h> 47 #include <linux/sunrpc/svcsock.h> 48 #include <linux/sunrpc/rpc_rdma.h> 49 #include <linux/sunrpc/rpc_rdma_cid.h> 50 #include <linux/sunrpc/svc_rdma_pcl.h> 51 #include <linux/sunrpc/rdma_rn.h> 52 53 #include <linux/percpu_counter.h> 54 #include <rdma/ib_verbs.h> 55 #include <rdma/rdma_cm.h> 56 57 /* Default and maximum inline threshold sizes */ 58 enum { 59 RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1, 60 RPCRDMA_DEF_INLINE_THRESH = 4096, 61 RPCRDMA_MAX_INLINE_THRESH = 65536 62 }; 63 64 /* RPC/RDMA parameters and stats */ 65 extern unsigned int svcrdma_ord; 66 extern unsigned int svcrdma_max_requests; 67 extern unsigned int svcrdma_max_bc_requests; 68 extern unsigned int svcrdma_max_req_size; 69 extern struct workqueue_struct *svcrdma_wq; 70 71 extern struct percpu_counter svcrdma_stat_read; 72 extern struct percpu_counter svcrdma_stat_recv; 73 extern struct percpu_counter svcrdma_stat_sq_starve; 74 extern struct percpu_counter svcrdma_stat_write; 75 76 struct svcxprt_rdma { 77 struct svc_xprt sc_xprt; /* SVC transport structure */ 78 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ 79 struct list_head sc_accept_q; /* Conn. waiting accept */ 80 struct rpcrdma_notification sc_rn; /* removal notification */ 81 int sc_ord; /* RDMA read limit */ 82 int sc_max_send_sges; 83 bool sc_snd_w_inv; /* OK to use Send With Invalidate */ 84 85 atomic_t sc_sq_avail; /* SQEs ready to be consumed */ 86 unsigned int sc_sq_depth; /* Depth of SQ */ 87 atomic_t sc_sq_ticket_head; /* Next ticket to issue */ 88 atomic_t sc_sq_ticket_tail; /* Ticket currently serving */ 89 wait_queue_head_t sc_sq_ticket_wait; /* Ticket ordering waitlist */ 90 __be32 sc_fc_credits; /* Forward credits */ 91 u32 sc_max_requests; /* Max requests */ 92 u32 sc_max_bc_requests;/* Backward credits */ 93 int sc_max_req_size; /* Size of each RQ WR buf */ 94 u8 sc_port_num; 95 96 struct ib_pd *sc_pd; 97 98 spinlock_t sc_send_lock; 99 struct llist_head sc_send_ctxts; 100 spinlock_t sc_rw_ctxt_lock; 101 struct llist_head sc_rw_ctxts; 102 103 u32 sc_pending_recvs; 104 u32 sc_recv_batch; 105 struct list_head sc_rq_dto_q; 106 struct list_head sc_read_complete_q; 107 spinlock_t sc_rq_dto_lock; 108 struct ib_qp *sc_qp; 109 struct ib_cq *sc_rq_cq; 110 struct ib_cq *sc_sq_cq; 111 112 spinlock_t sc_lock; /* transport lock */ 113 114 wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ 115 unsigned long sc_flags; 116 struct work_struct sc_work; 117 118 struct llist_head sc_recv_ctxts; 119 120 atomic_t sc_completion_ids; 121 }; 122 /* sc_flags */ 123 #define RDMAXPRT_CONN_PENDING 3 124 125 static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) 126 { 127 struct svc_xprt *xprt = rqstp->rq_xprt; 128 129 return container_of(xprt, struct svcxprt_rdma, sc_xprt); 130 } 131 132 /* 133 * Default connection parameters 134 */ 135 enum { 136 RPCRDMA_LISTEN_BACKLOG = 10, 137 RPCRDMA_MAX_REQUESTS = 128, 138 RPCRDMA_MAX_BC_REQUESTS = 2, 139 }; 140 141 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 142 143 /** 144 * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID 145 * @rdma: controlling transport 146 * @cid: completion ID to initialize 147 */ 148 static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, 149 struct rpc_rdma_cid *cid) 150 { 151 cid->ci_queue_id = rdma->sc_rq_cq->res.id; 152 cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 153 } 154 155 /** 156 * svc_rdma_send_cid_init - Initialize a Send Queue completion ID 157 * @rdma: controlling transport 158 * @cid: completion ID to initialize 159 */ 160 static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, 161 struct rpc_rdma_cid *cid) 162 { 163 cid->ci_queue_id = rdma->sc_sq_cq->res.id; 164 cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 165 } 166 167 /* 168 * A chunk context tracks all I/O for moving one Read or Write 169 * chunk. This is a set of rdma_rw's that handle data movement 170 * for all segments of one chunk. 171 */ 172 struct svc_rdma_chunk_ctxt { 173 struct rpc_rdma_cid cc_cid; 174 struct ib_cqe cc_cqe; 175 struct list_head cc_rwctxts; 176 ktime_t cc_posttime; 177 int cc_sqecount; 178 }; 179 180 struct svc_rdma_recv_ctxt { 181 struct llist_node rc_node; 182 struct list_head rc_list; 183 struct ib_recv_wr rc_recv_wr; 184 struct ib_cqe rc_cqe; 185 struct rpc_rdma_cid rc_cid; 186 struct ib_sge rc_recv_sge; 187 void *rc_recv_buf; 188 struct xdr_stream rc_stream; 189 u32 rc_byte_len; 190 u32 rc_inv_rkey; 191 __be32 rc_msgtype; 192 193 /* State for pulling a Read chunk */ 194 unsigned int rc_pageoff; 195 unsigned int rc_curpage; 196 unsigned int rc_readbytes; 197 struct xdr_buf rc_saved_arg; 198 struct svc_rdma_chunk_ctxt rc_cc; 199 200 struct svc_rdma_pcl rc_call_pcl; 201 202 struct svc_rdma_pcl rc_read_pcl; 203 struct svc_rdma_chunk *rc_cur_result_payload; 204 struct svc_rdma_pcl rc_write_pcl; 205 struct svc_rdma_pcl rc_reply_pcl; 206 207 unsigned int rc_page_count; 208 unsigned long rc_maxpages; 209 struct page *rc_pages[] __counted_by(rc_maxpages); 210 }; 211 212 /* 213 * State for sending a Write chunk. 214 * - Tracks progress of writing one chunk over all its segments 215 * - Stores arguments for the SGL constructor functions 216 */ 217 struct svc_rdma_write_info { 218 struct svcxprt_rdma *wi_rdma; 219 struct list_head wi_list; 220 221 const struct svc_rdma_chunk *wi_chunk; 222 223 /* write state of this chunk */ 224 unsigned int wi_seg_off; 225 unsigned int wi_seg_no; 226 227 /* SGL constructor arguments */ 228 const struct xdr_buf *wi_xdr; 229 unsigned char *wi_base; 230 unsigned int wi_next_off; 231 232 struct svc_rdma_chunk_ctxt wi_cc; 233 struct work_struct wi_work; 234 }; 235 236 struct svc_rdma_send_ctxt { 237 struct llist_node sc_node; 238 struct rpc_rdma_cid sc_cid; 239 struct work_struct sc_work; 240 241 struct svcxprt_rdma *sc_rdma; 242 struct ib_send_wr sc_send_wr; 243 struct ib_send_wr *sc_wr_chain; 244 int sc_sqecount; 245 struct ib_cqe sc_cqe; 246 struct xdr_buf sc_hdrbuf; 247 struct xdr_stream sc_stream; 248 249 struct list_head sc_write_info_list; 250 struct svc_rdma_write_info sc_reply_info; 251 252 void *sc_xprt_buf; 253 int sc_page_count; 254 int sc_cur_sge_no; 255 unsigned long sc_maxpages; 256 struct page **sc_pages; 257 struct ib_sge sc_sges[]; 258 }; 259 260 /* svc_rdma_backchannel.c */ 261 extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, 262 struct svc_rdma_recv_ctxt *rctxt); 263 264 /* svc_rdma_recvfrom.c */ 265 extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); 266 extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); 267 extern struct svc_rdma_recv_ctxt * 268 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); 269 extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 270 struct svc_rdma_recv_ctxt *ctxt); 271 extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); 272 extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); 273 extern int svc_rdma_recvfrom(struct svc_rqst *); 274 275 /* svc_rdma_rw.c */ 276 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, 277 struct svc_rdma_chunk_ctxt *cc); 278 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); 279 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, 280 struct svc_rdma_chunk_ctxt *cc); 281 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, 282 struct svc_rdma_chunk_ctxt *cc, 283 enum dma_data_direction dir); 284 extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, 285 struct svc_rdma_send_ctxt *ctxt); 286 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, 287 struct svc_rdma_send_ctxt *ctxt); 288 extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, 289 const struct svc_rdma_recv_ctxt *rctxt, 290 struct svc_rdma_send_ctxt *sctxt, 291 const struct xdr_buf *xdr); 292 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, 293 const struct svc_rdma_pcl *write_pcl, 294 const struct svc_rdma_pcl *reply_pcl, 295 struct svc_rdma_send_ctxt *sctxt, 296 const struct xdr_buf *xdr); 297 extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, 298 struct svc_rqst *rqstp, 299 struct svc_rdma_recv_ctxt *head); 300 301 /* svc_rdma_sendto.c */ 302 extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); 303 extern struct svc_rdma_send_ctxt * 304 svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); 305 extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, 306 struct svc_rdma_send_ctxt *ctxt); 307 extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, 308 struct svc_rdma_send_ctxt *ctxt); 309 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, 310 struct svc_rdma_send_ctxt *sctxt, 311 const struct svc_rdma_pcl *write_pcl, 312 const struct svc_rdma_pcl *reply_pcl, 313 const struct xdr_buf *xdr); 314 extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, 315 struct svc_rdma_send_ctxt *sctxt, 316 struct svc_rdma_recv_ctxt *rctxt, 317 int status); 318 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail); 319 extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma, 320 const struct rpc_rdma_cid *cid, int sqecount); 321 extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma, 322 const struct rpc_rdma_cid *cid, 323 const struct ib_send_wr *bad_wr, 324 const struct ib_send_wr *first_wr, 325 int sqecount, int ret); 326 extern int svc_rdma_sendto(struct svc_rqst *); 327 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, 328 unsigned int length); 329 330 /* svc_rdma_transport.c */ 331 extern struct svc_xprt_class svc_rdma_class; 332 #ifdef CONFIG_SUNRPC_BACKCHANNEL 333 extern struct svc_xprt_class svc_rdma_bc_class; 334 #endif 335 336 /* svc_rdma.c */ 337 extern int svc_rdma_init(void); 338 extern void svc_rdma_cleanup(void); 339 340 #endif 341