xref: /illumos-gate/usr/src/uts/common/io/qede/579xx/drivers/ecore/ecore_roce.h (revision 14b24e2b79293068c8e016a69ef1d872fb5e2fd5)
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, v.1,  (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 
22 /*
23 * Copyright 2014-2017 Cavium, Inc.
24 * The contents of this file are subject to the terms of the Common Development
25 * and Distribution License, v.1,  (the "License").
26 
27 * You may not use this file except in compliance with the License.
28 
29 * You can obtain a copy of the License at available
30 * at http://opensource.org/licenses/CDDL-1.0
31 
32 * See the License for the specific language governing permissions and
33 * limitations under the License.
34 */
35 
36 #ifndef __ECORE_RDMA_H__
37 #define __ECORE_RDMA_H__
38 
39 #include "ecore_status.h"
40 #include "ecore.h"
41 #include "ecore_hsi_common.h"
42 #include "ecore_proto_if.h"
43 #include "ecore_roce_api.h"
44 #include "ecore_dev_api.h"
45 
46 /* Constants */
47 
48 /* HW/FW RoCE Limitations (internal. For external see ecore_rdma_api.h) */
49 #define ECORE_RDMA_MAX_FMR                    (RDMA_MAX_TIDS) /* 2^17 - 1 */
50 #define ECORE_RDMA_MAX_P_KEY                  (1)
51 #define ECORE_RDMA_MAX_WQE                    (0x7FFF) /* 2^15 -1 */
52 #define ECORE_RDMA_MAX_SRQ_WQE_ELEM           (0x7FFF) /* 2^15 -1 */
53 #define ECORE_RDMA_PAGE_SIZE_CAPS             (0xFFFFF000) /* TODO: > 4k?! */
54 #define ECORE_RDMA_ACK_DELAY                  (15) /* 131 milliseconds */
55 #define ECORE_RDMA_MAX_MR_SIZE                (0x10000000000ULL) /* 2^40 */
56 #define ECORE_RDMA_MAX_CQS                    (RDMA_MAX_CQS) /* 64k */
57 #define ECORE_RDMA_MAX_MRS                    (RDMA_MAX_TIDS) /* 2^17 - 1 */
58 /* Add 1 for header element */
59 #define ECORE_RDMA_MAX_SRQ_ELEM_PER_WQE	      (RDMA_MAX_SGE_PER_RQ_WQE + 1)
60 #define ECORE_RDMA_MAX_SGE_PER_SRQ_WQE	      (RDMA_MAX_SGE_PER_RQ_WQE)
61 #define ECORE_RDMA_SRQ_WQE_ELEM_SIZE          (16)
62 #define ECORE_RDMA_MAX_SRQS		      (32 * 1024) /* 32k */
63 
64 /* Configurable */
65 /* Max CQE is derived from u16/32 size, halved and decremented by 1 to handle
66  * wrap properly and then decremented by 1 again. The latter decrement comes
67  * from a requirement to create a chain that is bigger than what the user
68  * requested by one:
69  * The CQE size is 32 bytes but the FW writes in chunks of 64
70  * bytes, for performance purposes. Allocating an extra entry and telling the
71  * FW we have less prevents overwriting the first entry in case of a wrap i.e.
72  * when the FW writes the last entry and the application hasn't read the first
73  * one.
74  */
75 #define ECORE_RDMA_MAX_CQE_32_BIT             (0x7FFFFFFF - 1)
76 #define ECORE_RDMA_MAX_CQE_16_BIT             (0x7FFF - 1)
77 
78 enum ecore_rdma_toggle_bit {
79 	ECORE_RDMA_TOGGLE_BIT_CLEAR = 0,
80 	ECORE_RDMA_TOGGLE_BIT_SET   = 1
81 };
82 
83 /* @@@TBD Currently we support only affilited events
84    * enum ecore_rdma_unaffiliated_event_code {
85    * ECORE_RDMA_PORT_ACTIVE, // Link Up
86    * ECORE_RDMA_PORT_CHANGED, // SGID table has changed
87    * ECORE_RDMA_LOCAL_CATASTROPHIC_ERR, // Fatal device error
88    * ECORE_RDMA_PORT_ERR, // Link down
89    * };
90    */
91 
92 #define QEDR_MAX_BMAP_NAME	(10)
93 struct ecore_bmap {
94 	u32           max_count;
95 	unsigned long *bitmap;
96 	char name[QEDR_MAX_BMAP_NAME];
97 };
98 
99 /* functions for enabling/disabling edpm in rdma PFs according to existence of
100  * qps during DCBx update or bar size
101  */
102 void ecore_roce_dpm_dcbx(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
103 void ecore_rdma_dpm_bar(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
104 
105 #ifdef CONFIG_ECORE_IWARP
106 
107 #define ECORE_IWARP_PREALLOC_CNT	(256)
108 
109 #define ECORE_IWARP_LL2_SYN_TX_SIZE	(128)
110 #define ECORE_IWARP_LL2_SYN_RX_SIZE	(256)
111 
112 #define ECORE_IWARP_LL2_OOO_DEF_TX_SIZE	(256)
113 #define ECORE_IWARP_LL2_OOO_DEF_RX_SIZE	(4096)
114 #define ECORE_IWARP_LL2_OOO_MAX_RX_SIZE	(16384)
115 
116 #define ECORE_IWARP_MAX_SYN_PKT_SIZE	(128)
117 #define ECORE_IWARP_HANDLE_INVAL	(0xff)
118 
119 struct ecore_iwarp_ll2_buff {
120 	struct ecore_iwarp_ll2_buff	*piggy_buf;
121 	void 				*data;
122 	dma_addr_t			data_phys_addr;
123 	u32				buff_size;
124 };
125 
126 struct ecore_iwarp_ll2_mpa_buf {
127 	osal_list_entry_t		list_entry;
128 	struct ecore_iwarp_ll2_buff	*ll2_buf;
129 	struct unaligned_opaque_data	data;
130 	u16				tcp_payload_len;
131 	u8				placement_offset;
132 };
133 
134 /* In some cases a fpdu will arrive with only one byte of the header, in this
135  * case the fpdu_length will be partial ( contain only higher byte and
136  * incomplete bytes will contain the invalid value */
137 #define ECORE_IWARP_INVALID_INCOMPLETE_BYTES 0xffff
138 
139 struct ecore_iwarp_fpdu {
140 	struct ecore_iwarp_ll2_buff 	*mpa_buf;
141 	dma_addr_t			pkt_hdr;
142 	u8				pkt_hdr_size;
143 	dma_addr_t			mpa_frag;
144 	void				*mpa_frag_virt;
145 	u16				mpa_frag_len;
146 	u16				fpdu_length;
147 	u16				incomplete_bytes;
148 };
149 
150 struct ecore_iwarp_info {
151 	osal_list_t			listen_list; /* ecore_iwarp_listener */
152 	osal_list_t			ep_list;     /* ecore_iwarp_ep */
153 	osal_list_t			ep_free_list;/* pre-allocated ep's */
154 	osal_list_t			mpa_buf_list;/* list of mpa_bufs */
155 	osal_list_t			mpa_buf_pending_list;
156 	osal_spinlock_t			iw_lock;
157 	osal_spinlock_t			qp_lock; /* for teardown races */
158 	struct iwarp_rxmit_stats_drv	stats;
159 	u32				rcv_wnd_scale;
160 	u16				max_mtu;
161 	u16				num_ooo_rx_bufs;
162 	u8				mac_addr[ETH_ALEN];
163 	u8				crc_needed;
164 	u8				tcp_flags;
165 	u8				ll2_syn_handle;
166 	u8				ll2_ooo_handle;
167 	u8				ll2_mpa_handle;
168 	u8				peer2peer;
169 	u8				_pad;
170 	enum mpa_negotiation_mode	mpa_rev;
171 	enum mpa_rtr_type		rtr_type;
172 	struct ecore_iwarp_fpdu		*partial_fpdus;
173 	struct ecore_iwarp_ll2_mpa_buf  *mpa_bufs;
174 	u8				*mpa_intermediate_buf;
175 	u16				max_num_partial_fpdus;
176 
177 	/* MPA statistics */
178 	u64				unalign_rx_comp;
179 };
180 #endif
181 
182 #define IS_ECORE_DCQCN(p_hwfn)	\
183 	(!!(p_hwfn->pf_params.rdma_pf_params.enable_dcqcn))
184 
185 struct ecore_roce_info {
186 	struct roce_events_stats	event_stats;
187 
188 	u8				dcqcn_enabled;
189 	u8				dcqcn_reaction_point;
190 };
191 
192 struct ecore_rdma_info {
193 	osal_spinlock_t			lock;
194 
195 	struct ecore_bmap		cq_map;
196 	struct ecore_bmap		pd_map;
197 	struct ecore_bmap		tid_map;
198 	struct ecore_bmap		srq_map;
199 	struct ecore_bmap		cid_map;
200 	struct ecore_bmap		tcp_cid_map;
201 	struct ecore_bmap		real_cid_map;
202 	struct ecore_bmap		dpi_map;
203 	struct ecore_bmap		toggle_bits;
204 	struct ecore_rdma_events	events;
205 	struct ecore_rdma_device	*dev;
206 	struct ecore_rdma_port		*port;
207 	u32				last_tid;
208 	u8				num_cnqs;
209 	struct rdma_sent_stats          rdma_sent_pstats;
210 	struct rdma_rcv_stats           rdma_rcv_tstats;
211 	u32				num_qps;
212 	u32				num_mrs;
213 	u32				num_srqs;
214 	u16				queue_zone_base;
215 	u16				max_queue_zones;
216 	enum protocol_type		proto;
217 	struct ecore_roce_info		roce;
218 #ifdef CONFIG_ECORE_IWARP
219 	struct ecore_iwarp_info		iwarp;
220 #endif
221 	bool				active;
222 	int				ref_cnt;
223 };
224 
225 #ifdef CONFIG_ECORE_IWARP
226 enum ecore_iwarp_qp_state {
227 	ECORE_IWARP_QP_STATE_IDLE,
228 	ECORE_IWARP_QP_STATE_RTS,
229 	ECORE_IWARP_QP_STATE_TERMINATE,
230 	ECORE_IWARP_QP_STATE_CLOSING,
231 	ECORE_IWARP_QP_STATE_ERROR,
232 };
233 #endif
234 
235 struct ecore_rdma_qp {
236 	struct regpair qp_handle;
237 	struct regpair qp_handle_async;
238 	u32	qpid; /* iwarp: may differ from icid */
239 	u16	icid;
240 	enum ecore_roce_qp_state cur_state;
241 #ifdef CONFIG_ECORE_IWARP
242 	enum ecore_iwarp_qp_state iwarp_state;
243 #endif
244 	bool	use_srq;
245 	bool	signal_all;
246 	bool	fmr_and_reserved_lkey;
247 
248 	bool	incoming_rdma_read_en;
249 	bool	incoming_rdma_write_en;
250 	bool	incoming_atomic_en;
251 	bool	e2e_flow_control_en;
252 
253 	u16	pd;			/* Protection domain */
254 	u16	pkey;			/* Primary P_key index */
255 	u32	dest_qp;
256 	u16	mtu;
257 	u16	srq_id;
258 	u8	traffic_class_tos;	/* IPv6/GRH traffic class; IPv4 TOS */
259 	u8	hop_limit_ttl;		/* IPv6/GRH hop limit; IPv4 TTL */
260 	u16	dpi;
261 	u32	flow_label;		/* ignored in IPv4 */
262 	u16	vlan_id;
263 	u32	ack_timeout;
264 	u8	retry_cnt;
265 	u8	rnr_retry_cnt;
266 	u8	min_rnr_nak_timer;
267 	bool	sqd_async;
268 	union ecore_gid	sgid;		/* GRH SGID; IPv4/6 Source IP */
269 	union ecore_gid	dgid;		/* GRH DGID; IPv4/6 Destination IP */
270 	enum roce_mode roce_mode;
271 	u16	udp_src_port;		/* RoCEv2 only */
272 	u8	stats_queue;
273 
274 	/* requeseter */
275 	u8	max_rd_atomic_req;
276 	u32     sq_psn;
277 	u16	sq_cq_id; /* The cq to be associated with the send queue*/
278 	u16	sq_num_pages;
279 	dma_addr_t sq_pbl_ptr;
280 	void	*orq;
281 	dma_addr_t orq_phys_addr;
282 	u8	orq_num_pages;
283 	bool	req_offloaded;
284 
285 	/* responder */
286 	u8	max_rd_atomic_resp;
287 	u32     rq_psn;
288 	u16	rq_cq_id; /* The cq to be associated with the receive queue */
289 	u16	rq_num_pages;
290 	dma_addr_t rq_pbl_ptr;
291 	void	*irq;
292 	dma_addr_t irq_phys_addr;
293 	u8	irq_num_pages;
294 	bool	resp_offloaded;
295 	u32	cq_prod;
296 
297 	u8	remote_mac_addr[6];
298 	u8	local_mac_addr[6];
299 
300 	void	*shared_queue;
301 	dma_addr_t shared_queue_phys_addr;
302 #ifdef CONFIG_ECORE_IWARP
303 	struct ecore_iwarp_ep *ep;
304 #endif
305 };
306 
307 #ifdef CONFIG_ECORE_IWARP
308 
309 enum ecore_iwarp_ep_state {
310 	ECORE_IWARP_EP_INIT,
311 	ECORE_IWARP_EP_MPA_REQ_RCVD,
312 	ECORE_IWARP_EP_MPA_OFFLOADED,
313 	ECORE_IWARP_EP_ESTABLISHED,
314 	ECORE_IWARP_EP_CLOSED
315 };
316 
317 union async_output {
318 	struct iwarp_eqe_data_mpa_async_completion mpa_response;
319 	struct iwarp_eqe_data_tcp_async_completion mpa_request;
320 };
321 
322 /* Endpoint structure represents a TCP connection. This connection can be
323  * associated with a QP or not (in which case QP==NULL)
324  */
325 struct ecore_iwarp_ep {
326 	osal_list_entry_t		list_entry;
327 	int				sig;
328 	struct ecore_rdma_qp		*qp;
329 	enum ecore_iwarp_ep_state	state;
330 
331 	/* This contains entire buffer required for ep memories. This is the
332 	 * only one actually allocated and freed. The rest are pointers into
333 	 * this buffer
334 	 */
335 	void				*ep_buffer_virt;
336 	dma_addr_t			ep_buffer_phys;
337 
338 	/* Asynce EQE events contain only the ep pointer on the completion. The
339 	 * rest of the data is written to an output buffer pre-allocated by
340 	 * the driver. This buffer points to a location in the ep_buffer.
341 	 */
342 	union async_output		*async_output_virt;
343 	dma_addr_t			async_output_phys;
344 
345 	struct ecore_iwarp_cm_info	cm_info;
346 	enum tcp_connect_mode		connect_mode;
347 	enum mpa_rtr_type		rtr_type;
348 	enum mpa_negotiation_mode	mpa_rev;
349 	u32				tcp_cid;
350 	u32				cid;
351 	u8				remote_mac_addr[6];
352 	u8				local_mac_addr[6];
353 	u16				mss;
354 	bool				mpa_reply_processed;
355 
356 	/* The event_cb function is called for asynchrounous events associated
357 	 * with the ep. It is initialized at different entry points depending
358 	 * on whether the ep is the tcp connection active side or passive side
359 	 * The cb_context is passed to the event_cb function.
360 	 */
361 	iwarp_event_handler		event_cb;
362 	void				*cb_context;
363 
364 	/* For Passive side - syn packet related data */
365 	struct ecore_iwarp_ll2_buff	*syn;
366 	u16				syn_ip_payload_length;
367 	dma_addr_t			syn_phy_addr;
368 };
369 
370 struct ecore_iwarp_listener {
371 	osal_list_entry_t	list_entry;
372 
373 	/* The event_cb function is called for connection requests.
374 	 * The cb_context is passed to the event_cb function.
375 	 */
376 	iwarp_event_handler	event_cb;
377 	void			*cb_context;
378 	u32			max_backlog;
379 	u8			ip_version;
380 	u32			ip_addr[4];
381 	u16			port;
382 	u16			vlan;
383 };
384 
385 void ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn,
386 			     u8 fw_event_code,
387 			     struct regpair *fw_handle,
388 			     u8 fw_return_code);
389 
390 #endif /* CONFIG_ECORE_IWARP */
391 
392 void ecore_roce_async_event(struct ecore_hwfn *p_hwfn,
393 			    u8 fw_event_code,
394 			    union rdma_eqe_data *rdma_data);
395 
396 enum _ecore_status_t ecore_rdma_info_alloc(struct ecore_hwfn *p_hwfn);
397 void ecore_rdma_info_free(struct ecore_hwfn *p_hwfn);
398 
399 #endif /*__ECORE_RDMA_H__*/
400