xref: /freebsd/sys/dev/qlnx/qlnxe/ecore_roce.h (revision e3514747256465c52c3b2aedc9795f52c0d3efe9)
1 /*
2  * Copyright (c) 2017-2018 Cavium, Inc.
3  * All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions
7  *  are met:
8  *
9  *  1. Redistributions of source code must retain the above copyright
10  *     notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  *  POSSIBILITY OF SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  *
29  */
30 
31 
32 #ifndef __ECORE_RDMA_H__
33 #define __ECORE_RDMA_H__
34 
35 #include "ecore_status.h"
36 #include "ecore.h"
37 #include "ecore_hsi_common.h"
38 #include "ecore_proto_if.h"
39 #include "ecore_roce_api.h"
40 #include "ecore_dev_api.h"
41 
42 /* Constants */
43 
44 /* HW/FW RoCE Limitations (internal. For external see ecore_rdma_api.h) */
45 #define ECORE_RDMA_MAX_FMR                    (RDMA_MAX_TIDS) /* 2^17 - 1 */
46 #define ECORE_RDMA_MAX_P_KEY                  (1)
47 #define ECORE_RDMA_MAX_WQE                    (0x7FFF) /* 2^15 -1 */
48 #define ECORE_RDMA_MAX_SRQ_WQE_ELEM           (0x7FFF) /* 2^15 -1 */
49 #define ECORE_RDMA_PAGE_SIZE_CAPS             (0xFFFFF000) /* TODO: > 4k?! */
50 #define ECORE_RDMA_ACK_DELAY                  (15) /* 131 milliseconds */
51 #define ECORE_RDMA_MAX_MR_SIZE                (0x10000000000ULL) /* 2^40 */
52 #define ECORE_RDMA_MAX_CQS                    (RDMA_MAX_CQS) /* 64k */
53 #define ECORE_RDMA_MAX_MRS                    (RDMA_MAX_TIDS) /* 2^17 - 1 */
54 /* Add 1 for header element */
55 #define ECORE_RDMA_MAX_SRQ_ELEM_PER_WQE	      (RDMA_MAX_SGE_PER_RQ_WQE + 1)
56 #define ECORE_RDMA_MAX_SGE_PER_SRQ_WQE	      (RDMA_MAX_SGE_PER_RQ_WQE)
57 #define ECORE_RDMA_SRQ_WQE_ELEM_SIZE          (16)
58 #define ECORE_RDMA_MAX_SRQS		      (32 * 1024) /* 32k */
59 
60 /* Configurable */
61 /* Max CQE is derived from u16/32 size, halved and decremented by 1 to handle
62  * wrap properly and then decremented by 1 again. The latter decrement comes
63  * from a requirement to create a chain that is bigger than what the user
64  * requested by one:
65  * The CQE size is 32 bytes but the FW writes in chunks of 64
66  * bytes, for performance purposes. Allocating an extra entry and telling the
67  * FW we have less prevents overwriting the first entry in case of a wrap i.e.
68  * when the FW writes the last entry and the application hasn't read the first
69  * one.
70  */
71 #define ECORE_RDMA_MAX_CQE_32_BIT             (0x7FFFFFFF - 1)
72 #define ECORE_RDMA_MAX_CQE_16_BIT             (0x7FFF - 1)
73 
74 enum ecore_rdma_toggle_bit {
75 	ECORE_RDMA_TOGGLE_BIT_CLEAR = 0,
76 	ECORE_RDMA_TOGGLE_BIT_SET   = 1
77 };
78 
79 /* @@@TBD Currently we support only affilited events
80    * enum ecore_rdma_unaffiliated_event_code {
81    * ECORE_RDMA_PORT_ACTIVE, // Link Up
82    * ECORE_RDMA_PORT_CHANGED, // SGID table has changed
83    * ECORE_RDMA_LOCAL_CATASTROPHIC_ERR, // Fatal device error
84    * ECORE_RDMA_PORT_ERR, // Link down
85    * };
86    */
87 
88 #define QEDR_MAX_BMAP_NAME	(10)
89 struct ecore_bmap {
90 	u32           max_count;
91 	unsigned long *bitmap;
92 	char name[QEDR_MAX_BMAP_NAME];
93 };
94 
95 /* functions for enabling/disabling edpm in rdma PFs according to existence of
96  * qps during DCBx update or bar size
97  */
98 void ecore_roce_dpm_dcbx(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
99 void ecore_rdma_dpm_bar(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
100 
101 #ifdef CONFIG_ECORE_IWARP
102 
103 #define ECORE_IWARP_LL2_SYN_TX_SIZE	(128)
104 #define ECORE_IWARP_LL2_SYN_RX_SIZE	(256)
105 
106 #define ECORE_IWARP_LL2_OOO_DEF_TX_SIZE	(256)
107 #define ECORE_IWARP_LL2_OOO_DEF_RX_SIZE	(4096)
108 
109 #define ECORE_IWARP_MAX_SYN_PKT_SIZE	(128)
110 #define ECORE_IWARP_HANDLE_INVAL	(0xff)
111 
112 struct ecore_iwarp_ll2_buff {
113 	struct ecore_iwarp_ll2_buff	*piggy_buf;
114 	void 				*data;
115 	dma_addr_t			data_phys_addr;
116 	u32				buff_size;
117 };
118 
119 struct ecore_iwarp_ll2_mpa_buf {
120 	osal_list_entry_t		list_entry;
121 	struct ecore_iwarp_ll2_buff	*ll2_buf;
122 	struct unaligned_opaque_data	data;
123 	u16				tcp_payload_len;
124 	u8				placement_offset;
125 };
126 
127 /* In some cases a fpdu will arrive with only one byte of the header, in this
128  * case the fpdu_length will be partial ( contain only higher byte and
129  * incomplete bytes will contain the invalid value */
130 #define ECORE_IWARP_INVALID_INCOMPLETE_BYTES 0xffff
131 
132 struct ecore_iwarp_fpdu {
133 	struct ecore_iwarp_ll2_buff 	*mpa_buf;
134 	dma_addr_t			pkt_hdr;
135 	u8				pkt_hdr_size;
136 	dma_addr_t			mpa_frag;
137 	void				*mpa_frag_virt;
138 	u16				mpa_frag_len;
139 	u16				fpdu_length;
140 	u16				incomplete_bytes;
141 };
142 
143 struct ecore_iwarp_info {
144 	osal_list_t			listen_list; /* ecore_iwarp_listener */
145 	osal_list_t			ep_list;     /* ecore_iwarp_ep */
146 	osal_list_t			ep_free_list;/* pre-allocated ep's */
147 	osal_list_t			mpa_buf_list;/* list of mpa_bufs */
148 	osal_list_t			mpa_buf_pending_list;
149 	osal_spinlock_t			iw_lock;
150 	osal_spinlock_t			qp_lock; /* for teardown races */
151 	struct iwarp_rxmit_stats_drv	stats;
152 	u32				rcv_wnd_scale;
153 	u16				max_mtu;
154 	u16				num_ooo_rx_bufs;
155 	u8				mac_addr[ETH_ALEN];
156 	u8				crc_needed;
157 	u8				tcp_flags;
158 	u8				ll2_syn_handle;
159 	u8				ll2_ooo_handle;
160 	u8				ll2_mpa_handle;
161 	u8				peer2peer;
162 	u8				_pad;
163 	enum mpa_negotiation_mode	mpa_rev;
164 	enum mpa_rtr_type		rtr_type;
165 	struct ecore_iwarp_fpdu		*partial_fpdus;
166 	struct ecore_iwarp_ll2_mpa_buf  *mpa_bufs;
167 	u8				*mpa_intermediate_buf;
168 	u16				max_num_partial_fpdus;
169 
170 	/* MPA statistics */
171 	u64				unalign_rx_comp;
172 };
173 #endif
174 
175 #define IS_ECORE_DCQCN(p_hwfn)	\
176 	(!!(p_hwfn->pf_params.rdma_pf_params.enable_dcqcn))
177 
178 struct ecore_roce_info {
179 	struct roce_events_stats	event_stats;
180 
181 	u8				dcqcn_enabled;
182 	u8				dcqcn_reaction_point;
183 };
184 
185 struct ecore_rdma_info {
186 	osal_spinlock_t			lock;
187 
188 	struct ecore_bmap		cq_map;
189 	struct ecore_bmap		pd_map;
190 	struct ecore_bmap		tid_map;
191 	struct ecore_bmap		srq_map;
192 	struct ecore_bmap		cid_map;
193 	struct ecore_bmap		tcp_cid_map;
194 	struct ecore_bmap		real_cid_map;
195 	struct ecore_bmap		dpi_map;
196 	struct ecore_bmap		toggle_bits;
197 	struct ecore_rdma_events	events;
198 	struct ecore_rdma_device	*dev;
199 	struct ecore_rdma_port		*port;
200 	u32				last_tid;
201 	u8				num_cnqs;
202 	struct rdma_sent_stats          rdma_sent_pstats;
203 	struct rdma_rcv_stats           rdma_rcv_tstats;
204 	u32				num_qps;
205 	u32				num_mrs;
206 	u32				num_srqs;
207 	u16				queue_zone_base;
208 	u16				max_queue_zones;
209 	enum protocol_type		proto;
210 	struct ecore_roce_info		roce;
211 #ifdef CONFIG_ECORE_IWARP
212 	struct ecore_iwarp_info		iwarp;
213 #endif
214 };
215 
216 #ifdef CONFIG_ECORE_IWARP
217 enum ecore_iwarp_qp_state {
218 	ECORE_IWARP_QP_STATE_IDLE,
219 	ECORE_IWARP_QP_STATE_RTS,
220 	ECORE_IWARP_QP_STATE_TERMINATE,
221 	ECORE_IWARP_QP_STATE_CLOSING,
222 	ECORE_IWARP_QP_STATE_ERROR,
223 };
224 #endif
225 
226 struct ecore_rdma_qp {
227 	struct regpair qp_handle;
228 	struct regpair qp_handle_async;
229 	u32	qpid; /* iwarp: may differ from icid */
230 	u16	icid;
231 	enum ecore_roce_qp_state cur_state;
232 #ifdef CONFIG_ECORE_IWARP
233 	enum ecore_iwarp_qp_state iwarp_state;
234 #endif
235 	bool	use_srq;
236 	bool	signal_all;
237 	bool	fmr_and_reserved_lkey;
238 
239 	bool	incoming_rdma_read_en;
240 	bool	incoming_rdma_write_en;
241 	bool	incoming_atomic_en;
242 	bool	e2e_flow_control_en;
243 
244 	u16	pd;			/* Protection domain */
245 	u16	pkey;			/* Primary P_key index */
246 	u32	dest_qp;
247 	u16	mtu;
248 	u16	srq_id;
249 	u8	traffic_class_tos;	/* IPv6/GRH traffic class; IPv4 TOS */
250 	u8	hop_limit_ttl;		/* IPv6/GRH hop limit; IPv4 TTL */
251 	u16	dpi;
252 	u32	flow_label;		/* ignored in IPv4 */
253 	u16	vlan_id;
254 	u32	ack_timeout;
255 	u8	retry_cnt;
256 	u8	rnr_retry_cnt;
257 	u8	min_rnr_nak_timer;
258 	bool	sqd_async;
259 	union ecore_gid	sgid;		/* GRH SGID; IPv4/6 Source IP */
260 	union ecore_gid	dgid;		/* GRH DGID; IPv4/6 Destination IP */
261 	enum roce_mode roce_mode;
262 	u16	udp_src_port;		/* RoCEv2 only */
263 	u8	stats_queue;
264 
265 	/* requeseter */
266 	u8	max_rd_atomic_req;
267 	u32     sq_psn;
268 	u16	sq_cq_id; /* The cq to be associated with the send queue*/
269 	u16	sq_num_pages;
270 	dma_addr_t sq_pbl_ptr;
271 	void	*orq;
272 	dma_addr_t orq_phys_addr;
273 	u8	orq_num_pages;
274 	bool	req_offloaded;
275 
276 	/* responder */
277 	u8	max_rd_atomic_resp;
278 	u32     rq_psn;
279 	u16	rq_cq_id; /* The cq to be associated with the receive queue */
280 	u16	rq_num_pages;
281 	dma_addr_t rq_pbl_ptr;
282 	void	*irq;
283 	dma_addr_t irq_phys_addr;
284 	u8	irq_num_pages;
285 	bool	resp_offloaded;
286 	u32	cq_prod;
287 
288 	u8	remote_mac_addr[6];
289 	u8	local_mac_addr[6];
290 
291 	void	*shared_queue;
292 	dma_addr_t shared_queue_phys_addr;
293 #ifdef CONFIG_ECORE_IWARP
294 	struct ecore_iwarp_ep *ep;
295 #endif
296 };
297 
298 #ifdef CONFIG_ECORE_IWARP
299 
300 enum ecore_iwarp_ep_state {
301 	ECORE_IWARP_EP_INIT,
302 	ECORE_IWARP_EP_MPA_REQ_RCVD,
303 	ECORE_IWARP_EP_ESTABLISHED,
304 	ECORE_IWARP_EP_CLOSED
305 };
306 
307 union async_output {
308 	struct iwarp_eqe_data_mpa_async_completion mpa_response;
309 	struct iwarp_eqe_data_tcp_async_completion mpa_request;
310 };
311 
312 /* Endpoint structure represents a TCP connection. This connection can be
313  * associated with a QP or not (in which case QP==NULL)
314  */
315 struct ecore_iwarp_ep {
316 	osal_list_entry_t		list_entry;
317 	int				sig;
318 	struct ecore_rdma_qp		*qp;
319 	enum ecore_iwarp_ep_state	state;
320 
321 	/* This contains entire buffer required for ep memories. This is the
322 	 * only one actually allocated and freed. The rest are pointers into
323 	 * this buffer
324 	 */
325 	void				*ep_buffer_virt;
326 	dma_addr_t			ep_buffer_phys;
327 
328 	/* Asynce EQE events contain only the ep pointer on the completion. The
329 	 * rest of the data is written to an output buffer pre-allocated by
330 	 * the driver. This buffer points to a location in the ep_buffer.
331 	 */
332 	union async_output		*async_output_virt;
333 	dma_addr_t			async_output_phys;
334 
335 	struct ecore_iwarp_cm_info	cm_info;
336 	enum tcp_connect_mode		connect_mode;
337 	enum mpa_rtr_type		rtr_type;
338 	enum mpa_negotiation_mode	mpa_rev;
339 	u32				tcp_cid;
340 	u32				cid;
341 	u8				remote_mac_addr[6];
342 	u8				local_mac_addr[6];
343 	u16				mss;
344 	bool				mpa_reply_processed;
345 
346 	/* The event_cb function is called for asynchrounous events associated
347 	 * with the ep. It is initialized at different entry points depending
348 	 * on whether the ep is the tcp connection active side or passive side
349 	 * The cb_context is passed to the event_cb function.
350 	 */
351 	iwarp_event_handler		event_cb;
352 	void				*cb_context;
353 
354 	/* For Passive side - syn packet related data */
355 	struct ecore_iwarp_ll2_buff	*syn;
356 	u16				syn_ip_payload_length;
357 	dma_addr_t			syn_phy_addr;
358 };
359 
360 struct ecore_iwarp_listener {
361 	osal_list_entry_t	list_entry;
362 
363 	/* The event_cb function is called for connection requests.
364 	 * The cb_context is passed to the event_cb function.
365 	 */
366 	iwarp_event_handler	event_cb;
367 	void			*cb_context;
368 	u32			max_backlog;
369 	u8			ip_version;
370 	u32			ip_addr[4];
371 	u16			port;
372 	u16			vlan;
373 
374 };
375 
376 void ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn,
377 			     u8 fw_event_code,
378 			     struct regpair *fw_handle,
379 			     u8 fw_return_code);
380 
381 #endif /* CONFIG_ECORE_IWARP */
382 
383 void ecore_roce_async_event(struct ecore_hwfn *p_hwfn,
384 			    u8 fw_event_code,
385 			    union rdma_eqe_data *rdma_data);
386 
387 #endif /*__ECORE_RDMA_H__*/
388