xref: /linux/include/linux/sunrpc/svc_rdma.h (revision d16f060f3ee297424c0aba047b1d49208adb9318)
1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
2 /*
3  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the BSD-type
9  * license below:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  *      Redistributions of source code must retain the above copyright
16  *      notice, this list of conditions and the following disclaimer.
17  *
18  *      Redistributions in binary form must reproduce the above
19  *      copyright notice, this list of conditions and the following
20  *      disclaimer in the documentation and/or other materials provided
21  *      with the distribution.
22  *
23  *      Neither the name of the Network Appliance, Inc. nor the names of
24  *      its contributors may be used to endorse or promote products
25  *      derived from this software without specific prior written
26  *      permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39  *
40  * Author: Tom Tucker <tom@opengridcomputing.com>
41  */
42 
43 #ifndef SVC_RDMA_H
44 #define SVC_RDMA_H
45 #include <linux/llist.h>
46 #include <linux/sunrpc/xdr.h>
47 #include <linux/sunrpc/svcsock.h>
48 #include <linux/sunrpc/rpc_rdma.h>
49 #include <linux/sunrpc/rpc_rdma_cid.h>
50 #include <linux/sunrpc/svc_rdma_pcl.h>
51 #include <linux/sunrpc/rdma_rn.h>
52 
53 #include <linux/percpu_counter.h>
54 #include <rdma/ib_verbs.h>
55 #include <rdma/rdma_cm.h>
56 
57 /* Default and maximum inline threshold sizes */
58 enum {
59 	RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1,
60 	RPCRDMA_DEF_INLINE_THRESH = 4096,
61 	RPCRDMA_MAX_INLINE_THRESH = 65536
62 };
63 
64 /* RPC/RDMA parameters and stats */
65 extern unsigned int svcrdma_ord;
66 extern unsigned int svcrdma_max_requests;
67 extern unsigned int svcrdma_max_bc_requests;
68 extern unsigned int svcrdma_max_req_size;
69 extern struct workqueue_struct *svcrdma_wq;
70 
71 extern struct percpu_counter svcrdma_stat_read;
72 extern struct percpu_counter svcrdma_stat_recv;
73 extern struct percpu_counter svcrdma_stat_sq_starve;
74 extern struct percpu_counter svcrdma_stat_write;
75 
76 struct svcxprt_rdma {
77 	struct svc_xprt      sc_xprt;		/* SVC transport structure */
78 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
79 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
80 	struct rpcrdma_notification sc_rn;	/* removal notification */
81 	int		     sc_ord;		/* RDMA read limit */
82 	int                  sc_max_send_sges;
83 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
84 
85 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
86 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
87 	atomic_t	     sc_sq_ticket_head;	/* Next ticket to issue */
88 	atomic_t	     sc_sq_ticket_tail;	/* Ticket currently serving */
89 	wait_queue_head_t    sc_sq_ticket_wait;	/* Ticket ordering waitlist */
90 	__be32		     sc_fc_credits;	/* Forward credits */
91 	u32		     sc_max_requests;	/* Max requests */
92 	u32		     sc_max_bc_requests;/* Backward credits */
93 	int                  sc_max_req_size;	/* Size of each RQ WR buf */
94 	u8		     sc_port_num;
95 
96 	struct ib_pd         *sc_pd;
97 
98 	spinlock_t	     sc_send_lock;
99 	struct llist_head    sc_send_ctxts;
100 	spinlock_t	     sc_rw_ctxt_lock;
101 	struct llist_head    sc_rw_ctxts;
102 
103 	u32		     sc_pending_recvs;
104 	u32		     sc_recv_batch;
105 	struct list_head     sc_rq_dto_q;
106 	struct list_head     sc_read_complete_q;
107 	spinlock_t	     sc_rq_dto_lock;
108 	struct ib_qp         *sc_qp;
109 	struct ib_cq         *sc_rq_cq;
110 	struct ib_cq         *sc_sq_cq;
111 
112 	spinlock_t	     sc_lock;		/* transport lock */
113 
114 	wait_queue_head_t    sc_send_wait;	/* SQ exhaustion waitlist */
115 	unsigned long	     sc_flags;
116 	struct work_struct   sc_work;
117 
118 	struct llist_head    sc_recv_ctxts;
119 
120 	atomic_t	     sc_completion_ids;
121 };
122 /* sc_flags */
123 #define RDMAXPRT_CONN_PENDING	3
124 
125 static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
126 {
127 	struct svc_xprt *xprt = rqstp->rq_xprt;
128 
129 	return container_of(xprt, struct svcxprt_rdma, sc_xprt);
130 }
131 
132 /*
133  * Default connection parameters
134  */
135 enum {
136 	RPCRDMA_LISTEN_BACKLOG	= 10,
137 	RPCRDMA_MAX_REQUESTS	= 128,
138 	RPCRDMA_MAX_BC_REQUESTS	= 2,
139 };
140 
141 #define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
142 
143 /**
144  * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID
145  * @rdma: controlling transport
146  * @cid: completion ID to initialize
147  */
148 static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
149 					  struct rpc_rdma_cid *cid)
150 {
151 	cid->ci_queue_id = rdma->sc_rq_cq->res.id;
152 	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
153 }
154 
155 /**
156  * svc_rdma_send_cid_init - Initialize a Send Queue completion ID
157  * @rdma: controlling transport
158  * @cid: completion ID to initialize
159  */
160 static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
161 					  struct rpc_rdma_cid *cid)
162 {
163 	cid->ci_queue_id = rdma->sc_sq_cq->res.id;
164 	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
165 }
166 
167 /*
168  * A chunk context tracks all I/O for moving one Read or Write
169  * chunk. This is a set of rdma_rw's that handle data movement
170  * for all segments of one chunk.
171  */
172 struct svc_rdma_chunk_ctxt {
173 	struct rpc_rdma_cid	cc_cid;
174 	struct ib_cqe		cc_cqe;
175 	struct list_head	cc_rwctxts;
176 	ktime_t			cc_posttime;
177 	int			cc_sqecount;
178 };
179 
180 struct svc_rdma_recv_ctxt {
181 	struct llist_node	rc_node;
182 	struct list_head	rc_list;
183 	struct ib_recv_wr	rc_recv_wr;
184 	struct ib_cqe		rc_cqe;
185 	struct rpc_rdma_cid	rc_cid;
186 	struct ib_sge		rc_recv_sge;
187 	void			*rc_recv_buf;
188 	struct xdr_stream	rc_stream;
189 	u32			rc_byte_len;
190 	u32			rc_inv_rkey;
191 	__be32			rc_msgtype;
192 
193 	/* State for pulling a Read chunk */
194 	unsigned int		rc_pageoff;
195 	unsigned int		rc_curpage;
196 	unsigned int		rc_readbytes;
197 	struct xdr_buf		rc_saved_arg;
198 	struct svc_rdma_chunk_ctxt	rc_cc;
199 
200 	struct svc_rdma_pcl	rc_call_pcl;
201 
202 	struct svc_rdma_pcl	rc_read_pcl;
203 	struct svc_rdma_chunk	*rc_cur_result_payload;
204 	struct svc_rdma_pcl	rc_write_pcl;
205 	struct svc_rdma_pcl	rc_reply_pcl;
206 
207 	unsigned int		rc_page_count;
208 	unsigned long		rc_maxpages;
209 	struct page		*rc_pages[] __counted_by(rc_maxpages);
210 };
211 
212 /*
213  * State for sending a Write chunk.
214  *  - Tracks progress of writing one chunk over all its segments
215  *  - Stores arguments for the SGL constructor functions
216  */
217 struct svc_rdma_write_info {
218 	struct svcxprt_rdma	*wi_rdma;
219 	struct list_head	wi_list;
220 
221 	const struct svc_rdma_chunk	*wi_chunk;
222 
223 	/* write state of this chunk */
224 	unsigned int		wi_seg_off;
225 	unsigned int		wi_seg_no;
226 
227 	/* SGL constructor arguments */
228 	const struct xdr_buf	*wi_xdr;
229 	unsigned char		*wi_base;
230 	unsigned int		wi_next_off;
231 
232 	struct svc_rdma_chunk_ctxt	wi_cc;
233 	struct work_struct	wi_work;
234 };
235 
236 struct svc_rdma_send_ctxt {
237 	struct llist_node	sc_node;
238 	struct rpc_rdma_cid	sc_cid;
239 	struct work_struct	sc_work;
240 
241 	struct svcxprt_rdma	*sc_rdma;
242 	struct ib_send_wr	sc_send_wr;
243 	struct ib_send_wr	*sc_wr_chain;
244 	int			sc_sqecount;
245 	struct ib_cqe		sc_cqe;
246 	struct xdr_buf		sc_hdrbuf;
247 	struct xdr_stream	sc_stream;
248 
249 	struct list_head	sc_write_info_list;
250 	struct svc_rdma_write_info sc_reply_info;
251 
252 	void			*sc_xprt_buf;
253 	int			sc_page_count;
254 	int			sc_cur_sge_no;
255 	unsigned long		sc_maxpages;
256 	struct page		**sc_pages;
257 	struct ib_sge		sc_sges[];
258 };
259 
260 /* svc_rdma_backchannel.c */
261 extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
262 				     struct svc_rdma_recv_ctxt *rctxt);
263 
264 /* svc_rdma_recvfrom.c */
265 extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
266 extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
267 extern struct svc_rdma_recv_ctxt *
268 		svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma);
269 extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
270 				   struct svc_rdma_recv_ctxt *ctxt);
271 extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
272 extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
273 extern int svc_rdma_recvfrom(struct svc_rqst *);
274 
275 /* svc_rdma_rw.c */
276 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
277 			     struct svc_rdma_chunk_ctxt *cc);
278 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
279 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
280 			     struct svc_rdma_chunk_ctxt *cc);
281 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
282 				struct svc_rdma_chunk_ctxt *cc,
283 				enum dma_data_direction dir);
284 extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
285 					 struct svc_rdma_send_ctxt *ctxt);
286 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
287 					 struct svc_rdma_send_ctxt *ctxt);
288 extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
289 				       const struct svc_rdma_recv_ctxt *rctxt,
290 				       struct svc_rdma_send_ctxt *sctxt,
291 				       const struct xdr_buf *xdr);
292 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
293 					const struct svc_rdma_pcl *write_pcl,
294 					const struct svc_rdma_pcl *reply_pcl,
295 					struct svc_rdma_send_ctxt *sctxt,
296 					const struct xdr_buf *xdr);
297 extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
298 				      struct svc_rqst *rqstp,
299 				      struct svc_rdma_recv_ctxt *head);
300 
301 /* svc_rdma_sendto.c */
302 extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma);
303 extern struct svc_rdma_send_ctxt *
304 		svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
305 extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
306 				   struct svc_rdma_send_ctxt *ctxt);
307 extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
308 			      struct svc_rdma_send_ctxt *ctxt);
309 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
310 				  struct svc_rdma_send_ctxt *sctxt,
311 				  const struct svc_rdma_pcl *write_pcl,
312 				  const struct svc_rdma_pcl *reply_pcl,
313 				  const struct xdr_buf *xdr);
314 extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
315 				    struct svc_rdma_send_ctxt *sctxt,
316 				    struct svc_rdma_recv_ctxt *rctxt,
317 				    int status);
318 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
319 extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
320 			    const struct rpc_rdma_cid *cid, int sqecount);
321 extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
322 				  const struct rpc_rdma_cid *cid,
323 				  const struct ib_send_wr *bad_wr,
324 				  const struct ib_send_wr *first_wr,
325 				  int sqecount, int ret);
326 extern int svc_rdma_sendto(struct svc_rqst *);
327 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
328 				   unsigned int length);
329 
330 /* svc_rdma_transport.c */
331 extern struct svc_xprt_class svc_rdma_class;
332 #ifdef CONFIG_SUNRPC_BACKCHANNEL
333 extern struct svc_xprt_class svc_rdma_bc_class;
334 #endif
335 
336 /* svc_rdma.c */
337 extern int svc_rdma_init(void);
338 extern void svc_rdma_cleanup(void);
339 
340 #endif
341