xref: /freebsd/sys/dev/mlx5/mlx5_ib/mlx5_ib.h (revision eea7c61590ae8968b3f1f609cf0bc8633222a94f)
1 /*-
2  * Copyright (c) 2013-2020, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #ifndef MLX5_IB_H
29 #define MLX5_IB_H
30 
31 #include <linux/kernel.h>
32 #include <linux/sched.h>
33 #include <linux/printk.h>
34 #include <linux/netdevice.h>
35 #include <rdma/ib_verbs.h>
36 #include <rdma/ib_umem.h>
37 #include <rdma/ib_smi.h>
38 #include <dev/mlx5/cq.h>
39 #include <dev/mlx5/qp.h>
40 #include <dev/mlx5/srq.h>
41 #include <linux/types.h>
42 #include <dev/mlx5/mlx5_core/transobj.h>
43 #include <rdma/ib_user_verbs.h>
44 #include <rdma/mlx5-abi.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #define mlx5_ib_dbg(dev, format, arg...)				\
48 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
49 	 __LINE__, current->pid, ##arg)
50 
51 #define mlx5_ib_err(dev, format, arg...)				\
52 pr_err("%s: ERR: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
53 	__LINE__, current->pid, ##arg)
54 
55 #define mlx5_ib_warn(dev, format, arg...)				\
56 pr_warn("%s: WARN: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
57 	__LINE__, current->pid, ##arg)
58 
59 #define field_avail(type, fld, sz) (offsetof(type, fld) +		\
60 				    sizeof(((type *)0)->fld) <= (sz))
61 #define MLX5_IB_DEFAULT_UIDX 0xffffff
62 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
63 
64 #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
65 
66 enum {
67 	MLX5_IB_MMAP_CMD_SHIFT	= 8,
68 	MLX5_IB_MMAP_CMD_MASK	= 0xff,
69 };
70 
71 enum {
72 	MLX5_RES_SCAT_DATA32_CQE	= 0x1,
73 	MLX5_RES_SCAT_DATA64_CQE	= 0x2,
74 	MLX5_REQ_SCAT_DATA32_CQE	= 0x11,
75 	MLX5_REQ_SCAT_DATA64_CQE	= 0x22,
76 };
77 
78 enum mlx5_ib_latency_class {
79 	MLX5_IB_LATENCY_CLASS_LOW,
80 	MLX5_IB_LATENCY_CLASS_MEDIUM,
81 	MLX5_IB_LATENCY_CLASS_HIGH,
82 	MLX5_IB_LATENCY_CLASS_FAST_PATH
83 };
84 
85 enum mlx5_ib_mad_ifc_flags {
86 	MLX5_MAD_IFC_IGNORE_MKEY	= 1,
87 	MLX5_MAD_IFC_IGNORE_BKEY	= 2,
88 	MLX5_MAD_IFC_NET_VIEW		= 4,
89 };
90 
91 enum {
92 	MLX5_CROSS_CHANNEL_BFREG         = 0,
93 };
94 
95 enum {
96 	MLX5_CQE_VERSION_V0,
97 	MLX5_CQE_VERSION_V1,
98 };
99 
100 enum {
101 	MLX5_IB_INVALID_UAR_INDEX	= BIT(31),
102 	MLX5_IB_INVALID_BFREG		= BIT(31),
103 };
104 
105 enum mlx5_ib_mmap_type {
106 	MLX5_IB_MMAP_TYPE_MEMIC = 1,
107 	MLX5_IB_MMAP_TYPE_VAR = 2,
108 	MLX5_IB_MMAP_TYPE_UAR_WC = 3,
109 	MLX5_IB_MMAP_TYPE_UAR_NC = 4,
110 };
111 
112 struct mlx5_bfreg_info {
113 	u32 *sys_pages;
114 	int num_low_latency_bfregs;
115 	unsigned int *count;
116 
117 	/*
118 	 * protect bfreg allocation data structs
119 	 */
120 	struct mutex lock;
121 	u32 ver;
122 	u8 lib_uar_4k : 1;
123 	u8 lib_uar_dyn : 1;
124 	u32 num_sys_pages;
125 	u32 num_static_sys_pages;
126 	u32 total_num_bfregs;
127 	u32 num_dyn_bfregs;
128 };
129 
130 struct mlx5_ib_ucontext {
131 	struct ib_ucontext	ibucontext;
132 	struct list_head	db_page_list;
133 
134 	/* protect doorbell record alloc/free
135 	 */
136 	struct mutex		db_page_mutex;
137 	struct mlx5_bfreg_info	bfregi;
138 	u8			cqe_version;
139 	/* Transport Domain number */
140 	u32			tdn;
141 
142 	u64			lib_caps;
143 	u16			devx_uid;
144 };
145 
146 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
147 {
148 	return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
149 }
150 
151 struct mlx5_ib_pd {
152 	struct ib_pd		ibpd;
153 	u32			pdn;
154 	u16			uid;
155 };
156 
157 #define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
158 #define MLX5_IB_FLOW_LAST_PRIO		(MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
159 #if (MLX5_IB_FLOW_LAST_PRIO <= 0)
160 #error "Invalid number of bypass priorities"
161 #endif
162 #define MLX5_IB_FLOW_LEFTOVERS_PRIO	(MLX5_IB_FLOW_MCAST_PRIO + 1)
163 
164 #define MLX5_IB_NUM_FLOW_FT		(MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
165 #define MLX5_IB_NUM_SNIFFER_FTS		2
166 struct mlx5_ib_flow_prio {
167 	struct mlx5_flow_table		*flow_table;
168 	unsigned int			refcount;
169 };
170 
171 struct mlx5_ib_flow_handler {
172 	struct list_head		list;
173 	struct ib_flow			ibflow;
174 	struct mlx5_ib_flow_prio	*prio;
175 	struct mlx5_flow_rule	*rule;
176 };
177 
178 struct mlx5_ib_flow_db {
179 	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
180 	struct mlx5_ib_flow_prio	sniffer[MLX5_IB_NUM_SNIFFER_FTS];
181 	struct mlx5_flow_table		*lag_demux_ft;
182 	/* Protect flow steering bypass flow tables
183 	 * when add/del flow rules.
184 	 * only single add/removal of flow steering rule could be done
185 	 * simultaneously.
186 	 */
187 	struct mutex			lock;
188 };
189 
190 /* Use macros here so that don't have to duplicate
191  * enum ib_send_flags and enum ib_qp_type for low-level driver
192  */
193 
194 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
195 #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
196 #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
197 
198 #define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION	(IB_SEND_RESERVED_START << 3)
199 #define MLX5_IB_SEND_UMR_UPDATE_PD		(IB_SEND_RESERVED_START << 4)
200 #define MLX5_IB_SEND_UMR_UPDATE_ACCESS		IB_SEND_RESERVED_END
201 
202 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
203 /*
204  * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
205  * creates the actual hardware QP.
206  */
207 #define MLX5_IB_QPT_HW_GSI	IB_QPT_RESERVED2
208 #define MLX5_IB_QPT_DCI		IB_QPT_RESERVED3
209 #define MLX5_IB_QPT_DCT		IB_QPT_RESERVED4
210 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
211 
212 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
213  *
214  * These flags are intended for internal use by the mlx5_ib driver, and they
215  * rely on the range reserved for that use in the ib_qp_create_flags enum.
216  */
217 #define MLX5_IB_QP_CREATE_SQPN_QP1	IB_QP_CREATE_RESERVED_START
218 #define MLX5_IB_QP_CREATE_WC_TEST	(IB_QP_CREATE_RESERVED_START << 1)
219 
220 struct wr_list {
221 	u16	opcode;
222 	u16	next;
223 };
224 
225 struct mlx5_ib_wq {
226 	u64		       *wrid;
227 	u32		       *wr_data;
228 	struct wr_list	       *w_list;
229 	unsigned	       *wqe_head;
230 	u16		        unsig_count;
231 
232 	/* serialize post to the work queue
233 	 */
234 	spinlock_t		lock;
235 	int			wqe_cnt;
236 	int			max_post;
237 	int			max_gs;
238 	int			offset;
239 	int			wqe_shift;
240 	unsigned		head;
241 	unsigned		tail;
242 	u16			cur_post;
243 	u16			last_poll;
244 	void		       *qend;
245 };
246 
247 struct mlx5_ib_rwq {
248 	struct ib_wq		ibwq;
249 	struct mlx5_core_qp	core_qp;
250 	u32			rq_num_pas;
251 	u32			log_rq_stride;
252 	u32			log_rq_size;
253 	u32			rq_page_offset;
254 	u32			log_page_size;
255 	struct ib_umem		*umem;
256 	size_t			buf_size;
257 	unsigned int		page_shift;
258 	int			create_type;
259 	struct mlx5_db		db;
260 	u32			user_index;
261 	u32			wqe_count;
262 	u32			wqe_shift;
263 	int			wq_sig;
264 };
265 
266 enum {
267 	MLX5_QP_USER,
268 	MLX5_QP_KERNEL,
269 	MLX5_QP_EMPTY
270 };
271 
272 enum {
273 	MLX5_WQ_USER,
274 	MLX5_WQ_KERNEL
275 };
276 
277 struct mlx5_ib_rwq_ind_table {
278 	struct ib_rwq_ind_table ib_rwq_ind_tbl;
279 	u32			rqtn;
280 	u16			uid;
281 };
282 
283 /*
284  * Connect-IB can trigger up to four concurrent pagefaults
285  * per-QP.
286  */
287 enum mlx5_ib_pagefault_context {
288 	MLX5_IB_PAGEFAULT_RESPONDER_READ,
289 	MLX5_IB_PAGEFAULT_REQUESTOR_READ,
290 	MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
291 	MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
292 	MLX5_IB_PAGEFAULT_CONTEXTS
293 };
294 
295 static inline enum mlx5_ib_pagefault_context
296 	mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
297 {
298 	return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
299 }
300 
301 struct mlx5_ib_pfault {
302 	struct work_struct	work;
303 	struct mlx5_pagefault	mpfault;
304 };
305 
306 struct mlx5_ib_ubuffer {
307 	struct ib_umem	       *umem;
308 	int			buf_size;
309 	u64			buf_addr;
310 };
311 
312 struct mlx5_ib_qp_base {
313 	struct mlx5_ib_qp	*container_mibqp;
314 	struct mlx5_core_qp	mqp;
315 	struct mlx5_ib_ubuffer	ubuffer;
316 };
317 
318 struct mlx5_ib_qp_trans {
319 	struct mlx5_ib_qp_base	base;
320 	u16			xrcdn;
321 	u8			alt_port;
322 	u8			atomic_rd_en;
323 	u8			resp_depth;
324 };
325 
326 struct mlx5_ib_rss_qp {
327 	u32	tirn;
328 };
329 
330 struct mlx5_ib_rq {
331 	struct mlx5_ib_qp_base base;
332 	struct mlx5_ib_wq	*rq;
333 	struct mlx5_ib_ubuffer	ubuffer;
334 	struct mlx5_db		*doorbell;
335 	u32			tirn;
336 	u8			state;
337 };
338 
339 struct mlx5_ib_sq {
340 	struct mlx5_ib_qp_base base;
341 	struct mlx5_ib_wq	*sq;
342 	struct mlx5_ib_ubuffer  ubuffer;
343 	struct mlx5_db		*doorbell;
344 	u32			tisn;
345 	u8			state;
346 };
347 
348 struct mlx5_ib_raw_packet_qp {
349 	struct mlx5_ib_sq sq;
350 	struct mlx5_ib_rq rq;
351 };
352 
353 struct mlx5_bf {
354 	int			buf_size;
355 	unsigned long		offset;
356 	struct mlx5_sq_bfreg   *bfreg;
357 	spinlock_t		lock32;
358 };
359 
360 struct mlx5_ib_dct {
361 	struct mlx5_core_dct    mdct;
362 	u32                     *in;
363 };
364 
365 struct mlx5_ib_qp {
366 	struct ib_qp		ibqp;
367 	union {
368 		struct mlx5_ib_qp_trans trans_qp;
369 		struct mlx5_ib_raw_packet_qp raw_packet_qp;
370 		struct mlx5_ib_rss_qp rss_qp;
371 		struct mlx5_ib_dct dct;
372 	};
373 	struct mlx5_buf		buf;
374 
375 	struct mlx5_db		db;
376 	struct mlx5_ib_wq	rq;
377 
378 	u8			sq_signal_bits;
379 	u8			fm_cache;
380 	struct mlx5_ib_wq	sq;
381 
382 	/* serialize qp state modifications
383 	 */
384 	struct mutex		mutex;
385 	u32			flags;
386 	u8			port;
387 	u8			state;
388 	int			wq_sig;
389 	int			scat_cqe;
390 	int			max_inline_data;
391 	struct mlx5_bf	        bf;
392 	int			has_rq;
393 
394 	/* only for user space QPs. For kernel
395 	 * we have it from the bf object
396 	 */
397 	int			bfregn;
398 
399 	int			create_type;
400 
401 	/* Store signature errors */
402 	bool			signature_en;
403 
404 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
405 	/*
406 	 * A flag that is true for QP's that are in a state that doesn't
407 	 * allow page faults, and shouldn't schedule any more faults.
408 	 */
409 	int                     disable_page_faults;
410 	/*
411 	 * The disable_page_faults_lock protects a QP's disable_page_faults
412 	 * field, allowing for a thread to atomically check whether the QP
413 	 * allows page faults, and if so schedule a page fault.
414 	 */
415 	spinlock_t              disable_page_faults_lock;
416 	struct mlx5_ib_pfault	pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
417 #endif
418 	struct list_head	qps_list;
419 	struct list_head	cq_recv_list;
420 	struct list_head	cq_send_list;
421 };
422 
423 struct mlx5_ib_cq_buf {
424 	struct mlx5_buf		buf;
425 	struct ib_umem		*umem;
426 	int			cqe_size;
427 	int			nent;
428 };
429 
430 enum mlx5_ib_qp_flags {
431 	MLX5_IB_QP_LSO                          = IB_QP_CREATE_IPOIB_UD_LSO,
432 	MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
433 	MLX5_IB_QP_CROSS_CHANNEL            = IB_QP_CREATE_CROSS_CHANNEL,
434 	MLX5_IB_QP_MANAGED_SEND             = IB_QP_CREATE_MANAGED_SEND,
435 	MLX5_IB_QP_MANAGED_RECV             = IB_QP_CREATE_MANAGED_RECV,
436 	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 5,
437 	/* QP uses 1 as its source QP number */
438 	MLX5_IB_QP_SQPN_QP1			= 1 << 6,
439 	MLX5_IB_QP_CAP_SCATTER_FCS		= 1 << 7,
440 	MLX5_IB_QP_RSS				= 1 << 8,
441 	MLX5_IB_QP_UNDERLAY			= 1 << 10,
442 };
443 
444 struct mlx5_umr_wr {
445 	struct ib_send_wr		wr;
446 	union {
447 		u64			virt_addr;
448 		u64			offset;
449 	} target;
450 	struct ib_pd		       *pd;
451 	unsigned int			page_shift;
452 	unsigned int			npages;
453 	u32				length;
454 	int				access_flags;
455 	u32				mkey;
456 };
457 
458 static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
459 {
460 	return container_of(wr, struct mlx5_umr_wr, wr);
461 }
462 
463 struct mlx5_shared_mr_info {
464 	int mr_id;
465 	struct ib_umem		*umem;
466 };
467 
468 struct mlx5_ib_cq {
469 	struct ib_cq		ibcq;
470 	struct mlx5_core_cq	mcq;
471 	struct mlx5_ib_cq_buf	buf;
472 	struct mlx5_db		db;
473 
474 	/* serialize access to the CQ
475 	 */
476 	spinlock_t		lock;
477 
478 	/* protect resize cq
479 	 */
480 	struct mutex		resize_mutex;
481 	struct mlx5_ib_cq_buf  *resize_buf;
482 	struct ib_umem	       *resize_umem;
483 	int			cqe_size;
484 	struct list_head	list_send_qp;
485 	struct list_head	list_recv_qp;
486 	u32			create_flags;
487 	struct list_head	wc_list;
488 	enum ib_cq_notify_flags notify_flags;
489 	struct work_struct	notify_work;
490 };
491 
492 struct mlx5_ib_wc {
493 	struct ib_wc wc;
494 	struct list_head list;
495 };
496 
497 struct mlx5_ib_srq {
498 	struct ib_srq		ibsrq;
499 	struct mlx5_core_srq	msrq;
500 	struct mlx5_buf		buf;
501 	struct mlx5_db		db;
502 	u64		       *wrid;
503 	/* protect SRQ hanlding
504 	 */
505 	spinlock_t		lock;
506 	int			head;
507 	int			tail;
508 	u16			wqe_ctr;
509 	struct ib_umem	       *umem;
510 	/* serialize arming a SRQ
511 	 */
512 	struct mutex		mutex;
513 	int			wq_sig;
514 };
515 
516 struct mlx5_ib_xrcd {
517 	struct ib_xrcd		ibxrcd;
518 	u32			xrcdn;
519 };
520 
521 enum mlx5_ib_mtt_access_flags {
522 	MLX5_IB_MTT_READ  = (1 << 0),
523 	MLX5_IB_MTT_WRITE = (1 << 1),
524 };
525 
526 struct mlx5_user_mmap_entry {
527 	struct rdma_user_mmap_entry rdma_entry;
528 	u8 mmap_flag;
529 	u64 address;
530 	u32 page_idx;
531 };
532 
533 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
534 
535 struct mlx5_ib_mr {
536 	struct ib_mr		ibmr;
537 	void			*descs;
538 	dma_addr_t		desc_map;
539 	int			ndescs;
540 	int			max_descs;
541 	int			desc_size;
542 	int			access_mode;
543 	struct mlx5_core_mkey	mmkey;
544 	struct ib_umem	       *umem;
545 	struct mlx5_shared_mr_info	*smr_info;
546 	struct list_head	list;
547 	int			order;
548 	int			umred;
549 	int			npages;
550 	struct mlx5_ib_dev     *dev;
551 	u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
552 	struct mlx5_core_sig_ctx    *sig;
553 	int			live;
554 	void			*descs_alloc;
555 	int			access_flags; /* Needed for rereg MR */
556 	struct mlx5_async_work	cb_work;
557 };
558 
559 struct mlx5_ib_mw {
560 	struct ib_mw		ibmw;
561 	struct mlx5_core_mkey	mmkey;
562 };
563 
564 struct mlx5_ib_devx_mr {
565 	struct mlx5_core_mkey	mmkey;
566 	int			ndescs;
567 };
568 
569 struct mlx5_ib_umr_context {
570 	struct ib_cqe		cqe;
571 	enum ib_wc_status	status;
572 	struct completion	done;
573 };
574 
575 struct umr_common {
576 	struct ib_pd	*pd;
577 	struct ib_cq	*cq;
578 	struct ib_qp	*qp;
579 	/* control access to UMR QP
580 	 */
581 	struct semaphore	sem;
582 };
583 
584 enum {
585 	MLX5_FMR_INVALID,
586 	MLX5_FMR_VALID,
587 	MLX5_FMR_BUSY,
588 };
589 
590 struct mlx5_cache_ent {
591 	struct list_head	head;
592 	/* sync access to the cahce entry
593 	 */
594 	spinlock_t		lock;
595 
596 
597 	struct dentry	       *dir;
598 	char                    name[4];
599 	u32                     order;
600 	u32			size;
601 	u32                     cur;
602 	u32                     miss;
603 	u32			limit;
604 
605 	struct dentry          *fsize;
606 	struct dentry          *fcur;
607 	struct dentry          *fmiss;
608 	struct dentry          *flimit;
609 
610 	struct mlx5_ib_dev     *dev;
611 	struct work_struct	work;
612 	struct delayed_work	dwork;
613 	int			pending;
614 };
615 
616 struct mlx5_mr_cache {
617 	struct workqueue_struct *wq;
618 	struct mlx5_cache_ent	ent[MAX_MR_CACHE_ENTRIES];
619 	int			stopped;
620 	struct dentry		*root;
621 	unsigned long		last_add;
622 };
623 
624 struct mlx5_ib_gsi_qp;
625 
626 struct mlx5_ib_port_resources {
627 	struct mlx5_ib_resources *devr;
628 	struct mlx5_ib_gsi_qp *gsi;
629 	struct work_struct pkey_change_work;
630 };
631 
632 struct mlx5_ib_resources {
633 	struct ib_cq	*c0;
634 	struct ib_xrcd	*x0;
635 	struct ib_xrcd	*x1;
636 	struct ib_pd	*p0;
637 	struct ib_srq	*s0;
638 	struct ib_srq	*s1;
639 	struct mlx5_ib_port_resources ports[2];
640 	/* Protects changes to the port resources */
641 	struct mutex	mutex;
642 };
643 
644 struct mlx5_ib_port {
645 	u16 q_cnt_id;
646 };
647 
648 struct mlx5_roce {
649 	/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
650 	 * netdev pointer
651 	 */
652 	rwlock_t		netdev_lock;
653 	struct ifnet		*netdev;
654 	struct notifier_block	nb;
655 	atomic_t		next_port;
656 };
657 
658 #define	MLX5_IB_STATS_COUNT(a,...) a
659 #define	MLX5_IB_STATS_VAR(a,b,c,...) b c;
660 #define	MLX5_IB_STATS_DESC(a,b,c,d,e,...) d, e,
661 
662 #define	MLX5_IB_CONG_PARAMS(m) \
663   /* ECN RP */ \
664   m(+1, u64, rp_clamp_tgt_rate, "rp_clamp_tgt_rate", "If set, whenever a CNP is processed, the target rate is updated to be the current rate") \
665   m(+1, u64, rp_clamp_tgt_rate_ati, "rp_clamp_tgt_rate_ati", "If set, when receiving a CNP, the target rate should be updated if the transission rate was increased due to the timer, and not only due to the byte counter") \
666   m(+1, u64, rp_time_reset, "rp_time_reset", "Time in microseconds between rate increases if no CNPs are received") \
667   m(+1, u64, rp_byte_reset, "rp_byte_reset", "Transmitted data in bytes between rate increases if no CNP's are received. A value of zero means disabled.") \
668   m(+1, u64, rp_threshold, "rp_threshold", "The number of times rpByteStage or rpTimeStage can count before the RP rate control state machine advances states") \
669   m(+1, u64, rp_ai_rate, "rp_ai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the active increase state") \
670   m(+1, u64, rp_hai_rate, "rp_hai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the hyper increase state") \
671   m(+1, u64, rp_min_dec_fac, "rp_min_dec_fac", "The minimum factor by which the current transmit rate can be changed when processing a CNP. Value is given as a percentage, [1 .. 100]") \
672   m(+1, u64, rp_min_rate, "rp_min_rate", "The minimum value, in Mbps per second, for rate to limit") \
673   m(+1, u64, rp_rate_to_set_on_first_cnp, "rp_rate_to_set_on_first_cnp", "The rate that is set for the flow when a rate limiter is allocated to it upon first CNP received, in Mbps. A value of zero means use full port speed") \
674   m(+1, u64, rp_dce_tcp_g, "rp_dce_tcp_g", "Used to update the congestion estimator, alpha, once every dce_tcp_rtt once every dce_tcp_rtt microseconds") \
675   m(+1, u64, rp_dce_tcp_rtt, "rp_dce_tcp_rtt", "The time between updates of the aolpha value, in microseconds") \
676   m(+1, u64, rp_rate_reduce_monitor_period, "rp_rate_reduce_monitor_period", "The minimum time between two consecutive rate reductions for a single flow") \
677   m(+1, u64, rp_initial_alpha_value, "rp_initial_alpha_value", "The initial value of alpha to use when receiving the first CNP for a flow") \
678   m(+1, u64, rp_gd, "rp_gd", "If a CNP is received, the flow rate is reduced at the beginning of the next rate_reduce_monitor_period interval") \
679   /* ECN NP */ \
680   m(+1, u64, np_cnp_dscp, "np_cnp_dscp", "The DiffServ Code Point of the generated CNP for this port") \
681   m(+1, u64, np_cnp_prio_mode, "np_cnp_prio_mode", "The 802.1p priority value of the generated CNP for this port") \
682   m(+1, u64, np_cnp_prio, "np_cnp_prio", "The 802.1p priority value of the generated CNP for this port")
683 
684 #define	MLX5_IB_CONG_PARAMS_NUM (0 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_COUNT))
685 
686 #define	MLX5_IB_CONG_STATS(m) \
687   m(+1, u64, syndrome, "syndrome", "Syndrome number") \
688   m(+1, u64, rp_cur_flows, "rp_cur_flows", "Number of flows limited") \
689   m(+1, u64, sum_flows, "sum_flows", "Sum of the number of flows limited over time") \
690   m(+1, u64, rp_cnp_ignored, "rp_cnp_ignored", "Number of CNPs and CNMs ignored") \
691   m(+1, u64, rp_cnp_handled, "rp_cnp_handled", "Number of CNPs and CNMs successfully handled") \
692   m(+1, u64, time_stamp, "time_stamp", "Time stamp in microseconds") \
693   m(+1, u64, accumulators_period, "accumulators_period", "The value of X variable for accumulating counters") \
694   m(+1, u64, np_ecn_marked_roce_packets, "np_ecn_marked_roce_packets", "Number of ECN marked packets seen") \
695   m(+1, u64, np_cnp_sent, "np_cnp_sent", "Number of CNPs sent")
696 
697 #define	MLX5_IB_CONG_STATS_NUM (0 MLX5_IB_CONG_STATS(MLX5_IB_STATS_COUNT))
698 
699 #define	MLX5_IB_CONG_STATUS(m) \
700   /* ECN RP */ \
701   m(+1, u64, rp_0_enable, "rp_0_enable", "Enable reaction point, priority 0", MLX5_IB_RROCE_ECN_RP, 0, enable) \
702   m(+1, u64, rp_1_enable, "rp_1_enable", "Enable reaction point, priority 1", MLX5_IB_RROCE_ECN_RP, 1, enable) \
703   m(+1, u64, rp_2_enable, "rp_2_enable", "Enable reaction point, priority 2", MLX5_IB_RROCE_ECN_RP, 2, enable) \
704   m(+1, u64, rp_3_enable, "rp_3_enable", "Enable reaction point, priority 3", MLX5_IB_RROCE_ECN_RP, 3, enable) \
705   m(+1, u64, rp_4_enable, "rp_4_enable", "Enable reaction point, priority 4", MLX5_IB_RROCE_ECN_RP, 4, enable) \
706   m(+1, u64, rp_5_enable, "rp_5_enable", "Enable reaction point, priority 5", MLX5_IB_RROCE_ECN_RP, 5, enable) \
707   m(+1, u64, rp_6_enable, "rp_6_enable", "Enable reaction point, priority 6", MLX5_IB_RROCE_ECN_RP, 6, enable) \
708   m(+1, u64, rp_7_enable, "rp_7_enable", "Enable reaction point, priority 7", MLX5_IB_RROCE_ECN_RP, 7, enable) \
709   m(+1, u64, rp_8_enable, "rp_8_enable", "Enable reaction point, priority 8", MLX5_IB_RROCE_ECN_RP, 8, enable) \
710   m(+1, u64, rp_9_enable, "rp_9_enable", "Enable reaction point, priority 9", MLX5_IB_RROCE_ECN_RP, 9, enable) \
711   m(+1, u64, rp_10_enable, "rp_10_enable", "Enable reaction point, priority 10", MLX5_IB_RROCE_ECN_RP, 10, enable) \
712   m(+1, u64, rp_11_enable, "rp_11_enable", "Enable reaction point, priority 11", MLX5_IB_RROCE_ECN_RP, 11, enable) \
713   m(+1, u64, rp_12_enable, "rp_12_enable", "Enable reaction point, priority 12", MLX5_IB_RROCE_ECN_RP, 12, enable) \
714   m(+1, u64, rp_13_enable, "rp_13_enable", "Enable reaction point, priority 13", MLX5_IB_RROCE_ECN_RP, 13, enable) \
715   m(+1, u64, rp_14_enable, "rp_14_enable", "Enable reaction point, priority 14", MLX5_IB_RROCE_ECN_RP, 14, enable) \
716   m(+1, u64, rp_15_enable, "rp_15_enable", "Enable reaction point, priority 15", MLX5_IB_RROCE_ECN_RP, 15, enable) \
717   /* ECN NP */ \
718   m(+1, u64, np_0_enable, "np_0_enable", "Enable notification point, priority 0", MLX5_IB_RROCE_ECN_NP, 0, enable) \
719   m(+1, u64, np_1_enable, "np_1_enable", "Enable notification point, priority 1", MLX5_IB_RROCE_ECN_NP, 1, enable) \
720   m(+1, u64, np_2_enable, "np_2_enable", "Enable notification point, priority 2", MLX5_IB_RROCE_ECN_NP, 2, enable) \
721   m(+1, u64, np_3_enable, "np_3_enable", "Enable notification point, priority 3", MLX5_IB_RROCE_ECN_NP, 3, enable) \
722   m(+1, u64, np_4_enable, "np_4_enable", "Enable notification point, priority 4", MLX5_IB_RROCE_ECN_NP, 4, enable) \
723   m(+1, u64, np_5_enable, "np_5_enable", "Enable notification point, priority 5", MLX5_IB_RROCE_ECN_NP, 5, enable) \
724   m(+1, u64, np_6_enable, "np_6_enable", "Enable notification point, priority 6", MLX5_IB_RROCE_ECN_NP, 6, enable) \
725   m(+1, u64, np_7_enable, "np_7_enable", "Enable notification point, priority 7", MLX5_IB_RROCE_ECN_NP, 7, enable) \
726   m(+1, u64, np_8_enable, "np_8_enable", "Enable notification point, priority 8", MLX5_IB_RROCE_ECN_NP, 8, enable) \
727   m(+1, u64, np_9_enable, "np_9_enable", "Enable notification point, priority 9", MLX5_IB_RROCE_ECN_NP, 9, enable) \
728   m(+1, u64, np_10_enable, "np_10_enable", "Enable notification point, priority 10", MLX5_IB_RROCE_ECN_NP, 10, enable) \
729   m(+1, u64, np_11_enable, "np_11_enable", "Enable notification point, priority 11", MLX5_IB_RROCE_ECN_NP, 11, enable) \
730   m(+1, u64, np_12_enable, "np_12_enable", "Enable notification point, priority 12", MLX5_IB_RROCE_ECN_NP, 12, enable) \
731   m(+1, u64, np_13_enable, "np_13_enable", "Enable notification point, priority 13", MLX5_IB_RROCE_ECN_NP, 13, enable) \
732   m(+1, u64, np_14_enable, "np_14_enable", "Enable notification point, priority 14", MLX5_IB_RROCE_ECN_NP, 14, enable) \
733   m(+1, u64, np_15_enable, "np_15_enable", "Enable notification point, priority 15", MLX5_IB_RROCE_ECN_NP, 15, enable) \
734 
735 #define	MLX5_IB_CONG_STATUS_NUM (0 MLX5_IB_CONG_STATUS(MLX5_IB_STATS_COUNT))
736 
737 struct mlx5_ib_congestion {
738 	struct sysctl_ctx_list ctx;
739 	struct sx lock;
740 	struct delayed_work dwork;
741 	union {
742 		u64	arg[1];
743 		struct {
744 			MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR)
745 			MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR)
746 			MLX5_IB_CONG_STATUS(MLX5_IB_STATS_VAR)
747 		};
748 	};
749 };
750 
751 struct mlx5_devx_event_table {
752 	/* serialize updating the event_xa */
753 	struct mutex event_xa_lock;
754 	struct xarray event_xa;
755 };
756 
757 struct mlx5_ib_dev {
758 	struct ib_device		ib_dev;
759 	struct mlx5_core_dev		*mdev;
760 	struct mlx5_roce		roce;
761 	MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
762 	int				num_ports;
763 	/* serialize update of capability mask
764 	 */
765 	struct mutex			cap_mask_mutex;
766 	u8				ib_active:1;
767 	u8				wc_support:1;
768 	struct umr_common		umrc;
769 	/* sync used page count stats
770 	 */
771 	struct mlx5_ib_resources	devr;
772 	struct mlx5_mr_cache		cache;
773 	struct timer_list		delay_timer;
774 	/* Prevents soft lock on massive reg MRs */
775 	struct mutex			slow_path_mutex;
776 	int				fill_delay;
777 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
778 	struct ib_odp_caps	odp_caps;
779 	/*
780 	 * Sleepable RCU that prevents destruction of MRs while they are still
781 	 * being used by a page fault handler.
782 	 */
783 	struct srcu_struct      mr_srcu;
784 #endif
785 	struct mlx5_ib_flow_db	flow_db;
786 	/* protect resources needed as part of reset flow */
787 	spinlock_t		reset_flow_resource_lock;
788 	struct list_head	qp_list;
789 	/* Array with num_ports elements */
790 	struct mlx5_ib_port	*port;
791 	struct mlx5_sq_bfreg	bfreg;
792 	struct mlx5_sq_bfreg	wc_bfreg;
793 	struct mlx5_sq_bfreg	fp_bfreg;
794 	struct mlx5_devx_event_table devx_event_table;
795 	struct mlx5_ib_congestion congestion;
796 
797 	struct mlx5_async_ctx	async_ctx;
798 };
799 
800 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
801 {
802 	return container_of(mcq, struct mlx5_ib_cq, mcq);
803 }
804 
805 static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
806 {
807 	return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
808 }
809 
810 static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
811 {
812 	return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
813 }
814 
815 static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
816 {
817 	struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
818 		udata, struct mlx5_ib_ucontext, ibucontext);
819 
820 	return to_mdev(context->ibucontext.device);
821 }
822 
823 static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
824 {
825 	return container_of(ibcq, struct mlx5_ib_cq, ibcq);
826 }
827 
828 static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
829 {
830 	return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
831 }
832 
833 static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
834 {
835 	return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
836 }
837 
838 static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
839 {
840 	return container_of(mmkey, struct mlx5_ib_mr, mmkey);
841 }
842 
843 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
844 {
845 	return container_of(ibpd, struct mlx5_ib_pd, ibpd);
846 }
847 
848 static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
849 {
850 	return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
851 }
852 
853 static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
854 {
855 	return container_of(ibqp, struct mlx5_ib_qp, ibqp);
856 }
857 
858 static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq)
859 {
860 	return container_of(ibwq, struct mlx5_ib_rwq, ibwq);
861 }
862 
863 static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
864 {
865 	return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl);
866 }
867 
868 static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
869 {
870 	return container_of(msrq, struct mlx5_ib_srq, msrq);
871 }
872 
873 static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
874 {
875 	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
876 }
877 
878 static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
879 {
880 	return container_of(ibmw, struct mlx5_ib_mw, ibmw);
881 }
882 
883 struct mlx5_ib_ah {
884 	struct ib_ah		ibah;
885 	struct mlx5_av		av;
886 };
887 
888 static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
889 {
890 	return container_of(ibah, struct mlx5_ib_ah, ibah);
891 }
892 
893 static inline struct mlx5_user_mmap_entry *
894 to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
895 {
896 	return container_of(rdma_entry,
897 		struct mlx5_user_mmap_entry, rdma_entry);
898 }
899 
900 int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
901 			struct mlx5_db *db);
902 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
903 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
904 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
905 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
906 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
907 		 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
908 		 const void *in_mad, void *response_mad);
909 int mlx5_ib_create_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr, u32 flags,
910 				struct ib_udata *udata);
911 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
912 void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
913 int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
914 		       struct ib_udata *udata);
915 int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
916 		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
917 int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
918 void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
919 int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
920 			  const struct ib_recv_wr **bad_wr);
921 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
922 				struct ib_qp_init_attr *init_attr,
923 				struct ib_udata *udata);
924 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
925 		      int attr_mask, struct ib_udata *udata);
926 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
927 		     struct ib_qp_init_attr *qp_init_attr);
928 int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
929 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
930 		      const struct ib_send_wr **bad_wr);
931 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
932 		      const struct ib_recv_wr **bad_wr);
933 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
934 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
935 			  void *buffer, u32 length,
936 			  struct mlx5_ib_qp_base *base);
937 int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
938 		      struct ib_udata *udata);
939 void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
940 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
941 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
942 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
943 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
944 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
945 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
946 				  u64 virt_addr, int access_flags,
947 				  struct ib_udata *udata);
948 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
949 			       struct ib_udata *udata);
950 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
951 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
952 		       int npages, int zap);
953 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
954 			  u64 length, u64 virt_addr, int access_flags,
955 			  struct ib_pd *pd, struct ib_udata *udata);
956 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
957 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
958 			       u32 max_num_sg, struct ib_udata *udata);
959 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
960 		      unsigned int *sg_offset);
961 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
962 			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
963 			const struct ib_mad_hdr *in, size_t in_mad_size,
964 			struct ib_mad_hdr *out, size_t *out_mad_size,
965 			u16 *out_mad_pkey_index);
966 struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
967 				   struct ib_udata *udata);
968 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
969 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
970 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
971 int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
972 					  struct ib_smp *out_mad);
973 int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
974 					 __be64 *sys_image_guid);
975 int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
976 				 u16 *max_pkeys);
977 int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
978 				 u32 *vendor_id);
979 int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc);
980 int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid);
981 int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
982 			    u16 *pkey);
983 int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
984 			    union ib_gid *gid);
985 int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
986 			    struct ib_port_attr *props);
987 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
988 		       struct ib_port_attr *props);
989 int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
990 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
991 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
992 			unsigned long max_page_shift,
993 			int *count, int *shift,
994 			int *ncont, int *order);
995 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
996 			    int page_shift, size_t offset, size_t num_pages,
997 			    __be64 *pas, int access_flags);
998 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
999 			  int page_shift, __be64 *pas, int access_flags);
1000 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
1001 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
1002 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
1003 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
1004 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
1005 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1006 			    struct ib_mr_status *mr_status);
1007 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
1008 				struct ib_wq_init_attr *init_attr,
1009 				struct ib_udata *udata);
1010 void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
1011 int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
1012 		      u32 wq_attr_mask, struct ib_udata *udata);
1013 struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
1014 						      struct ib_rwq_ind_table_init_attr *init_attr,
1015 						      struct ib_udata *udata);
1016 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
1017 
1018 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1019 extern struct workqueue_struct *mlx5_ib_page_fault_wq;
1020 
1021 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
1022 void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
1023 			       struct mlx5_ib_pfault *pfault);
1024 void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
1025 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
1026 void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
1027 int __init mlx5_ib_odp_init(void);
1028 void mlx5_ib_odp_cleanup(void);
1029 void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
1030 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
1031 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
1032 			      unsigned long end);
1033 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
1034 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
1035 {
1036 	return;
1037 }
1038 
1039 static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)		{}
1040 static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
1041 static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	{}
1042 static inline int mlx5_ib_odp_init(void) { return 0; }
1043 static inline void mlx5_ib_odp_cleanup(void)				{}
1044 static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
1045 static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
1046 
1047 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
1048 
1049 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
1050 			  u8 port, struct ifla_vf_info *info);
1051 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
1052 			      u8 port, int state);
1053 int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
1054 			 u8 port, struct ifla_vf_stats *stats);
1055 int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
1056 			u64 guid, int type);
1057 
1058 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
1059 			       int index);
1060 int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
1061 			   int index, enum ib_gid_type *gid_type);
1062 
1063 /* GSI QP helper functions */
1064 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
1065 				    struct ib_qp_init_attr *init_attr);
1066 int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
1067 int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1068 			  int attr_mask);
1069 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
1070 			 int qp_attr_mask,
1071 			 struct ib_qp_init_attr *qp_init_attr);
1072 int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
1073 			  const struct ib_send_wr **bad_wr);
1074 int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
1075 			  const struct ib_recv_wr **bad_wr);
1076 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
1077 
1078 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
1079 
1080 void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
1081 			int bfregn);
1082 
1083 #if 1 /* IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) */
1084 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
1085 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
1086 void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
1087 void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
1088 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
1089 bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
1090 #else
1091 static inline int
1092 mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
1093 			   bool is_user) { return -EOPNOTSUPP; }
1094 static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
1095 static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
1096 static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
1097 static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
1098 					     int *dest_type)
1099 {
1100 	return false;
1101 }
1102 #endif
1103 
1104 static inline void init_query_mad(struct ib_smp *mad)
1105 {
1106 	mad->base_version  = 1;
1107 	mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
1108 	mad->class_version = 1;
1109 	mad->method	   = IB_MGMT_METHOD_GET;
1110 }
1111 
1112 static inline u8 convert_access(int acc)
1113 {
1114 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
1115 	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
1116 	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
1117 	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
1118 	       MLX5_PERM_LOCAL_READ;
1119 }
1120 
1121 static inline int is_qp1(enum ib_qp_type qp_type)
1122 {
1123 	return qp_type == MLX5_IB_QPT_HW_GSI;
1124 }
1125 
1126 #define MLX5_MAX_UMR_SHIFT 16
1127 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
1128 
1129 static inline u32 check_cq_create_flags(u32 flags)
1130 {
1131 	/*
1132 	 * It returns non-zero value for unsupported CQ
1133 	 * create flags, otherwise it returns zero.
1134 	 */
1135 	return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
1136 			  IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
1137 }
1138 
1139 static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
1140 				     u32 *user_index)
1141 {
1142 	if (cqe_version) {
1143 		if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
1144 		    (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
1145 			return -EINVAL;
1146 		*user_index = cmd_uidx;
1147 	} else {
1148 		*user_index = MLX5_IB_DEFAULT_UIDX;
1149 	}
1150 
1151 	return 0;
1152 }
1153 
1154 static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
1155 				    struct mlx5_ib_create_qp *ucmd,
1156 				    int inlen,
1157 				    u32 *user_index)
1158 {
1159 	u8 cqe_version = ucontext->cqe_version;
1160 
1161 	if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
1162 	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
1163 		return 0;
1164 
1165 	if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
1166 	       !!cqe_version))
1167 		return -EINVAL;
1168 
1169 	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
1170 }
1171 
1172 static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
1173 				     struct mlx5_ib_create_srq *ucmd,
1174 				     int inlen,
1175 				     u32 *user_index)
1176 {
1177 	u8 cqe_version = ucontext->cqe_version;
1178 
1179 	if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
1180 	    !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
1181 		return 0;
1182 
1183 	if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
1184 	       !!cqe_version))
1185 		return -EINVAL;
1186 
1187 	return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
1188 }
1189 
1190 void mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *);
1191 int mlx5_ib_init_congestion(struct mlx5_ib_dev *);
1192 
1193 static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support)
1194 {
1195 	return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1196 				MLX5_UARS_IN_PAGE : 1;
1197 }
1198 
1199 static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
1200 				      struct mlx5_bfreg_info *bfregi)
1201 {
1202 	return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
1203 }
1204 
1205 int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
1206 			struct mlx5_bfreg_info *bfregi, u32 bfregn,
1207 			bool dyn_bfreg);
1208 
1209 #endif /* MLX5_IB_H */
1210