xref: /linux/drivers/infiniband/sw/rxe/rxe_verbs.h (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *	   Redistribution and use in source and binary forms, with or
12  *	   without modification, are permitted provided that the following
13  *	   conditions are met:
14  *
15  *	- Redistributions of source code must retain the above
16  *	  copyright notice, this list of conditions and the following
17  *	  disclaimer.
18  *
19  *	- Redistributions in binary form must reproduce the above
20  *	  copyright notice, this list of conditions and the following
21  *	  disclaimer in the documentation and/or other materials
22  *	  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #ifndef RXE_VERBS_H
35 #define RXE_VERBS_H
36 
37 #include <linux/interrupt.h>
38 #include <rdma/rdma_user_rxe.h>
39 #include "rxe_pool.h"
40 #include "rxe_task.h"
41 
42 static inline int pkey_match(u16 key1, u16 key2)
43 {
44 	return (((key1 & 0x7fff) != 0) &&
45 		((key1 & 0x7fff) == (key2 & 0x7fff)) &&
46 		((key1 & 0x8000) || (key2 & 0x8000))) ? 1 : 0;
47 }
48 
49 /* Return >0 if psn_a > psn_b
50  *	   0 if psn_a == psn_b
51  *	  <0 if psn_a < psn_b
52  */
53 static inline int psn_compare(u32 psn_a, u32 psn_b)
54 {
55 	s32 diff;
56 
57 	diff = (psn_a - psn_b) << 8;
58 	return diff;
59 }
60 
61 struct rxe_ucontext {
62 	struct rxe_pool_entry	pelem;
63 	struct ib_ucontext	ibuc;
64 };
65 
66 struct rxe_pd {
67 	struct rxe_pool_entry	pelem;
68 	struct ib_pd		ibpd;
69 };
70 
71 struct rxe_ah {
72 	struct rxe_pool_entry	pelem;
73 	struct ib_ah		ibah;
74 	struct rxe_pd		*pd;
75 	struct rxe_av		av;
76 };
77 
78 struct rxe_cqe {
79 	union {
80 		struct ib_wc		ibwc;
81 		struct ib_uverbs_wc	uibwc;
82 	};
83 };
84 
85 struct rxe_cq {
86 	struct rxe_pool_entry	pelem;
87 	struct ib_cq		ibcq;
88 	struct rxe_queue	*queue;
89 	spinlock_t		cq_lock;
90 	u8			notify;
91 	int			is_user;
92 	struct tasklet_struct	comp_task;
93 };
94 
95 enum wqe_state {
96 	wqe_state_posted,
97 	wqe_state_processing,
98 	wqe_state_pending,
99 	wqe_state_done,
100 	wqe_state_error,
101 };
102 
103 struct rxe_sq {
104 	int			max_wr;
105 	int			max_sge;
106 	int			max_inline;
107 	spinlock_t		sq_lock; /* guard queue */
108 	struct rxe_queue	*queue;
109 };
110 
111 struct rxe_rq {
112 	int			max_wr;
113 	int			max_sge;
114 	spinlock_t		producer_lock; /* guard queue producer */
115 	spinlock_t		consumer_lock; /* guard queue consumer */
116 	struct rxe_queue	*queue;
117 };
118 
119 struct rxe_srq {
120 	struct rxe_pool_entry	pelem;
121 	struct ib_srq		ibsrq;
122 	struct rxe_pd		*pd;
123 	struct rxe_rq		rq;
124 	u32			srq_num;
125 
126 	int			limit;
127 	int			error;
128 };
129 
130 enum rxe_qp_state {
131 	QP_STATE_RESET,
132 	QP_STATE_INIT,
133 	QP_STATE_READY,
134 	QP_STATE_DRAIN,		/* req only */
135 	QP_STATE_DRAINED,	/* req only */
136 	QP_STATE_ERROR
137 };
138 
139 extern char *rxe_qp_state_name[];
140 
141 struct rxe_req_info {
142 	enum rxe_qp_state	state;
143 	int			wqe_index;
144 	u32			psn;
145 	int			opcode;
146 	atomic_t		rd_atomic;
147 	int			wait_fence;
148 	int			need_rd_atomic;
149 	int			wait_psn;
150 	int			need_retry;
151 	int			noack_pkts;
152 	struct rxe_task		task;
153 };
154 
155 struct rxe_comp_info {
156 	u32			psn;
157 	int			opcode;
158 	int			timeout;
159 	int			timeout_retry;
160 	u32			retry_cnt;
161 	u32			rnr_retry;
162 	struct rxe_task		task;
163 };
164 
165 enum rdatm_res_state {
166 	rdatm_res_state_next,
167 	rdatm_res_state_new,
168 	rdatm_res_state_replay,
169 };
170 
171 struct resp_res {
172 	int			type;
173 	u32			first_psn;
174 	u32			last_psn;
175 	u32			cur_psn;
176 	enum rdatm_res_state	state;
177 
178 	union {
179 		struct {
180 			struct sk_buff	*skb;
181 		} atomic;
182 		struct {
183 			struct rxe_mem	*mr;
184 			u64		va_org;
185 			u32		rkey;
186 			u32		length;
187 			u64		va;
188 			u32		resid;
189 		} read;
190 	};
191 };
192 
193 struct rxe_resp_info {
194 	enum rxe_qp_state	state;
195 	u32			msn;
196 	u32			psn;
197 	int			opcode;
198 	int			drop_msg;
199 	int			goto_error;
200 	int			sent_psn_nak;
201 	enum ib_wc_status	status;
202 	u8			aeth_syndrome;
203 
204 	/* Receive only */
205 	struct rxe_recv_wqe	*wqe;
206 
207 	/* RDMA read / atomic only */
208 	u64			va;
209 	struct rxe_mem		*mr;
210 	u32			resid;
211 	u32			rkey;
212 	u64			atomic_orig;
213 
214 	/* SRQ only */
215 	struct {
216 		struct rxe_recv_wqe	wqe;
217 		struct ib_sge		sge[RXE_MAX_SGE];
218 	} srq_wqe;
219 
220 	/* Responder resources. It's a circular list where the oldest
221 	 * resource is dropped first.
222 	 */
223 	struct resp_res		*resources;
224 	unsigned int		res_head;
225 	unsigned int		res_tail;
226 	struct resp_res		*res;
227 	struct rxe_task		task;
228 };
229 
230 struct rxe_qp {
231 	struct rxe_pool_entry	pelem;
232 	struct ib_qp		ibqp;
233 	struct ib_qp_attr	attr;
234 	unsigned int		valid;
235 	unsigned int		mtu;
236 	int			is_user;
237 
238 	struct rxe_pd		*pd;
239 	struct rxe_srq		*srq;
240 	struct rxe_cq		*scq;
241 	struct rxe_cq		*rcq;
242 
243 	enum ib_sig_type	sq_sig_type;
244 
245 	struct rxe_sq		sq;
246 	struct rxe_rq		rq;
247 
248 	struct socket		*sk;
249 
250 	struct rxe_av		pri_av;
251 	struct rxe_av		alt_av;
252 
253 	/* list of mcast groups qp has joined (for cleanup) */
254 	struct list_head	grp_list;
255 	spinlock_t		grp_lock; /* guard grp_list */
256 
257 	struct sk_buff_head	req_pkts;
258 	struct sk_buff_head	resp_pkts;
259 	struct sk_buff_head	send_pkts;
260 
261 	struct rxe_req_info	req;
262 	struct rxe_comp_info	comp;
263 	struct rxe_resp_info	resp;
264 
265 	atomic_t		ssn;
266 	atomic_t		skb_out;
267 	int			need_req_skb;
268 
269 	/* Timer for retranmitting packet when ACKs have been lost. RC
270 	 * only. The requester sets it when it is not already
271 	 * started. The responder resets it whenever an ack is
272 	 * received.
273 	 */
274 	struct timer_list retrans_timer;
275 	u64 qp_timeout_jiffies;
276 
277 	/* Timer for handling RNR NAKS. */
278 	struct timer_list rnr_nak_timer;
279 
280 	spinlock_t		state_lock; /* guard requester and completer */
281 };
282 
283 enum rxe_mem_state {
284 	RXE_MEM_STATE_ZOMBIE,
285 	RXE_MEM_STATE_INVALID,
286 	RXE_MEM_STATE_FREE,
287 	RXE_MEM_STATE_VALID,
288 };
289 
290 enum rxe_mem_type {
291 	RXE_MEM_TYPE_NONE,
292 	RXE_MEM_TYPE_DMA,
293 	RXE_MEM_TYPE_MR,
294 	RXE_MEM_TYPE_FMR,
295 	RXE_MEM_TYPE_MW,
296 };
297 
298 #define RXE_BUF_PER_MAP		(PAGE_SIZE / sizeof(struct rxe_phys_buf))
299 
300 struct rxe_phys_buf {
301 	u64      addr;
302 	u64      size;
303 };
304 
305 struct rxe_map {
306 	struct rxe_phys_buf	buf[RXE_BUF_PER_MAP];
307 };
308 
309 struct rxe_mem {
310 	struct rxe_pool_entry	pelem;
311 	union {
312 		struct ib_mr		ibmr;
313 		struct ib_mw		ibmw;
314 	};
315 
316 	struct rxe_pd		*pd;
317 	struct ib_umem		*umem;
318 
319 	u32			lkey;
320 	u32			rkey;
321 
322 	enum rxe_mem_state	state;
323 	enum rxe_mem_type	type;
324 	u64			va;
325 	u64			iova;
326 	size_t			length;
327 	u32			offset;
328 	int			access;
329 
330 	int			page_shift;
331 	int			page_mask;
332 	int			map_shift;
333 	int			map_mask;
334 
335 	u32			num_buf;
336 	u32			nbuf;
337 
338 	u32			max_buf;
339 	u32			num_map;
340 
341 	struct rxe_map		**map;
342 };
343 
344 struct rxe_mc_grp {
345 	struct rxe_pool_entry	pelem;
346 	spinlock_t		mcg_lock; /* guard group */
347 	struct rxe_dev		*rxe;
348 	struct list_head	qp_list;
349 	union ib_gid		mgid;
350 	int			num_qp;
351 	u32			qkey;
352 	u16			pkey;
353 };
354 
355 struct rxe_mc_elem {
356 	struct rxe_pool_entry	pelem;
357 	struct list_head	qp_list;
358 	struct list_head	grp_list;
359 	struct rxe_qp		*qp;
360 	struct rxe_mc_grp	*grp;
361 };
362 
363 struct rxe_port {
364 	struct ib_port_attr	attr;
365 	u16			*pkey_tbl;
366 	__be64			port_guid;
367 	__be64			subnet_prefix;
368 	spinlock_t		port_lock; /* guard port */
369 	unsigned int		mtu_cap;
370 	/* special QPs */
371 	u32			qp_smi_index;
372 	u32			qp_gsi_index;
373 };
374 
375 /* callbacks from rdma_rxe to network interface layer */
376 struct rxe_ifc_ops {
377 	void (*release)(struct rxe_dev *rxe);
378 	__be64 (*node_guid)(struct rxe_dev *rxe);
379 	__be64 (*port_guid)(struct rxe_dev *rxe);
380 	struct device *(*dma_device)(struct rxe_dev *rxe);
381 	int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
382 	int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
383 	int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
384 		       struct sk_buff *skb, u32 *crc);
385 	int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
386 		    struct sk_buff *skb);
387 	int (*loopback)(struct sk_buff *skb);
388 	struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
389 				       int paylen, struct rxe_pkt_info *pkt);
390 	char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
391 	enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
392 					   unsigned int port_num);
393 };
394 
395 struct rxe_dev {
396 	struct ib_device	ib_dev;
397 	struct ib_device_attr	attr;
398 	int			max_ucontext;
399 	int			max_inline_data;
400 	struct kref		ref_cnt;
401 	struct mutex	usdev_lock;
402 
403 	struct rxe_ifc_ops	*ifc_ops;
404 
405 	struct net_device	*ndev;
406 
407 	int			xmit_errors;
408 
409 	struct rxe_pool		uc_pool;
410 	struct rxe_pool		pd_pool;
411 	struct rxe_pool		ah_pool;
412 	struct rxe_pool		srq_pool;
413 	struct rxe_pool		qp_pool;
414 	struct rxe_pool		cq_pool;
415 	struct rxe_pool		mr_pool;
416 	struct rxe_pool		mw_pool;
417 	struct rxe_pool		mc_grp_pool;
418 	struct rxe_pool		mc_elem_pool;
419 
420 	spinlock_t		pending_lock; /* guard pending_mmaps */
421 	struct list_head	pending_mmaps;
422 
423 	spinlock_t		mmap_offset_lock; /* guard mmap_offset */
424 	int			mmap_offset;
425 
426 	struct rxe_port		port;
427 	struct list_head	list;
428 };
429 
430 static inline struct rxe_dev *to_rdev(struct ib_device *dev)
431 {
432 	return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
433 }
434 
435 static inline struct rxe_ucontext *to_ruc(struct ib_ucontext *uc)
436 {
437 	return uc ? container_of(uc, struct rxe_ucontext, ibuc) : NULL;
438 }
439 
440 static inline struct rxe_pd *to_rpd(struct ib_pd *pd)
441 {
442 	return pd ? container_of(pd, struct rxe_pd, ibpd) : NULL;
443 }
444 
445 static inline struct rxe_ah *to_rah(struct ib_ah *ah)
446 {
447 	return ah ? container_of(ah, struct rxe_ah, ibah) : NULL;
448 }
449 
450 static inline struct rxe_srq *to_rsrq(struct ib_srq *srq)
451 {
452 	return srq ? container_of(srq, struct rxe_srq, ibsrq) : NULL;
453 }
454 
455 static inline struct rxe_qp *to_rqp(struct ib_qp *qp)
456 {
457 	return qp ? container_of(qp, struct rxe_qp, ibqp) : NULL;
458 }
459 
460 static inline struct rxe_cq *to_rcq(struct ib_cq *cq)
461 {
462 	return cq ? container_of(cq, struct rxe_cq, ibcq) : NULL;
463 }
464 
465 static inline struct rxe_mem *to_rmr(struct ib_mr *mr)
466 {
467 	return mr ? container_of(mr, struct rxe_mem, ibmr) : NULL;
468 }
469 
470 static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
471 {
472 	return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL;
473 }
474 
475 int rxe_register_device(struct rxe_dev *rxe);
476 int rxe_unregister_device(struct rxe_dev *rxe);
477 
478 void rxe_mc_cleanup(void *arg);
479 
480 #endif /* RXE_VERBS_H */
481