xref: /freebsd/sys/dev/ena/ena.h (revision 994297b01b98816bea1abf45ae4bac1bc69ee7a0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  *
32  */
33 
34 #ifndef ENA_H
35 #define ENA_H
36 
37 #include "opt_rss.h"
38 
39 #include "ena-com/ena_com.h"
40 #include "ena-com/ena_eth_com.h"
41 
42 #define DRV_MODULE_VER_MAJOR	2
43 #define DRV_MODULE_VER_MINOR	5
44 #define DRV_MODULE_VER_SUBMINOR 0
45 
46 #define DRV_MODULE_NAME		"ena"
47 
48 #ifndef DRV_MODULE_VERSION
49 #define DRV_MODULE_VERSION				\
50 	__XSTRING(DRV_MODULE_VER_MAJOR) "."		\
51 	__XSTRING(DRV_MODULE_VER_MINOR) "."		\
52 	__XSTRING(DRV_MODULE_VER_SUBMINOR)
53 #endif
54 #define DEVICE_NAME	"Elastic Network Adapter (ENA)"
55 #define DEVICE_DESC	"ENA adapter"
56 
57 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
58 #define ENA_DMA_BIT_MASK(x)		((1ULL << (x)) - 1ULL)
59 
60 /* 1 for AENQ + ADMIN */
61 #define	ENA_ADMIN_MSIX_VEC		1
62 #define	ENA_MAX_MSIX_VEC(io_queues)	(ENA_ADMIN_MSIX_VEC + (io_queues))
63 
64 #define	ENA_REG_BAR			0
65 #define	ENA_MEM_BAR			2
66 
67 #define	ENA_BUS_DMA_SEGS		32
68 
69 #define	ENA_DEFAULT_BUF_RING_SIZE	4096
70 
71 #define	ENA_DEFAULT_RING_SIZE		1024
72 #define	ENA_MIN_RING_SIZE		256
73 
74 /*
75  * Refill Rx queue when number of required descriptors is above
76  * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
77  */
78 #define	ENA_RX_REFILL_THRESH_DIVIDER	8
79 #define	ENA_RX_REFILL_THRESH_PACKET	256
80 
81 #define	ENA_IRQNAME_SIZE		40
82 
83 #define	ENA_PKT_MAX_BUFS 		19
84 
85 #define	ENA_RX_RSS_TABLE_LOG_SIZE	7
86 #define	ENA_RX_RSS_TABLE_SIZE		(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
87 
88 #define	ENA_HASH_KEY_SIZE		40
89 
90 #define	ENA_MAX_FRAME_LEN		10000
91 #define	ENA_MIN_FRAME_LEN 		60
92 
93 #define ENA_TX_RESUME_THRESH		(ENA_PKT_MAX_BUFS + 2)
94 
95 #define DB_THRESHOLD	64
96 
97 #define TX_COMMIT	32
98  /*
99  * TX budget for cleaning. It should be half of the RX budget to reduce amount
100  *  of TCP retransmissions.
101  */
102 #define TX_BUDGET	128
103 /* RX cleanup budget. -1 stands for infinity. */
104 #define RX_BUDGET	256
105 /*
106  * How many times we can repeat cleanup in the io irq handling routine if the
107  * RX or TX budget was depleted.
108  */
109 #define CLEAN_BUDGET	8
110 
111 #define RX_IRQ_INTERVAL 20
112 #define TX_IRQ_INTERVAL 50
113 
114 #define	ENA_MIN_MTU		128
115 
116 #define	ENA_TSO_MAXSIZE		65536
117 
118 #define	ENA_MMIO_DISABLE_REG_READ	BIT(0)
119 
120 #define	ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
121 
122 #define	ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
123 
124 #define	ENA_IO_TXQ_IDX(q)		(2 * (q))
125 #define	ENA_IO_RXQ_IDX(q)		(2 * (q) + 1)
126 #define	ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q)	((q) / 2)
127 #define	ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q)	(((q) - 1) / 2)
128 
129 #define	ENA_MGMNT_IRQ_IDX		0
130 #define	ENA_IO_IRQ_FIRST_IDX		1
131 #define	ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
132 
133 #define	ENA_MAX_NO_INTERRUPT_ITERATIONS	3
134 
135 /*
136  * ENA device should send keep alive msg every 1 sec.
137  * We wait for 6 sec just to be on the safe side.
138  */
139 #define DEFAULT_KEEP_ALIVE_TO		(SBT_1S * 6)
140 
141 /* Time in jiffies before concluding the transmitter is hung. */
142 #define DEFAULT_TX_CMP_TO		(SBT_1S * 5)
143 
144 /* Number of queues to check for missing queues per timer tick */
145 #define DEFAULT_TX_MONITORED_QUEUES	(4)
146 
147 /* Max number of timeouted packets before device reset */
148 #define DEFAULT_TX_CMP_THRESHOLD	(128)
149 
150 /*
151  * Supported PCI vendor and devices IDs
152  */
153 #define	PCI_VENDOR_ID_AMAZON	0x1d0f
154 
155 #define	PCI_DEV_ID_ENA_PF		0x0ec2
156 #define	PCI_DEV_ID_ENA_PF_RSERV0	0x1ec2
157 #define	PCI_DEV_ID_ENA_VF		0xec20
158 #define	PCI_DEV_ID_ENA_VF_RSERV0	0xec21
159 
160 /*
161  * Flags indicating current ENA driver state
162  */
163 enum ena_flags_t {
164 	ENA_FLAG_DEVICE_RUNNING,
165 	ENA_FLAG_DEV_UP,
166 	ENA_FLAG_LINK_UP,
167 	ENA_FLAG_MSIX_ENABLED,
168 	ENA_FLAG_TRIGGER_RESET,
169 	ENA_FLAG_ONGOING_RESET,
170 	ENA_FLAG_DEV_UP_BEFORE_RESET,
171 	ENA_FLAG_RSS_ACTIVE,
172 	ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
173 };
174 
175 BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
176 typedef struct _ena_state ena_state_t;
177 
178 #define ENA_FLAG_ZERO(adapter)		\
179 	BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
180 #define ENA_FLAG_ISSET(bit, adapter)	\
181 	BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
182 #define ENA_FLAG_SET_ATOMIC(bit, adapter)	\
183 	BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
184 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter)	\
185 	BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
186 
187 struct msix_entry {
188 	int entry;
189 	int vector;
190 };
191 
192 typedef struct _ena_vendor_info_t {
193 	uint16_t vendor_id;
194 	uint16_t device_id;
195 	unsigned int index;
196 } ena_vendor_info_t;
197 
198 struct ena_irq {
199 	/* Interrupt resources */
200 	struct resource *res;
201 	driver_filter_t *handler;
202 	void *data;
203 	void *cookie;
204 	unsigned int vector;
205 	bool requested;
206 #ifdef RSS
207 	int cpu;
208 #endif
209 	char name[ENA_IRQNAME_SIZE];
210 };
211 
212 struct ena_que {
213 	struct ena_adapter *adapter;
214 	struct ena_ring *tx_ring;
215 	struct ena_ring *rx_ring;
216 
217 	struct task cleanup_task;
218 	struct taskqueue *cleanup_tq;
219 
220 	uint32_t id;
221 #ifdef RSS
222 	int cpu;
223 	cpuset_t cpu_mask;
224 #endif
225 	int domain;
226 	struct sysctl_oid *oid;
227 };
228 
229 struct ena_calc_queue_size_ctx {
230 	struct ena_com_dev_get_features_ctx *get_feat_ctx;
231 	struct ena_com_dev *ena_dev;
232 	device_t pdev;
233 	uint32_t tx_queue_size;
234 	uint32_t rx_queue_size;
235 	uint32_t max_tx_queue_size;
236 	uint32_t max_rx_queue_size;
237 	uint16_t max_tx_sgl_size;
238 	uint16_t max_rx_sgl_size;
239 };
240 
241 #ifdef DEV_NETMAP
242 struct ena_netmap_tx_info {
243 	uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS];
244 	bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS];
245 	unsigned int sockets_used;
246 };
247 #endif
248 
249 struct ena_tx_buffer {
250 	struct mbuf *mbuf;
251 	/* # of ena desc for this specific mbuf
252 	 * (includes data desc and metadata desc) */
253 	unsigned int tx_descs;
254 	/* # of buffers used by this mbuf */
255 	unsigned int num_of_bufs;
256 
257 	bus_dmamap_t dmamap;
258 
259 	/* Used to detect missing tx packets */
260 	struct bintime timestamp;
261 	bool print_once;
262 
263 #ifdef DEV_NETMAP
264 	struct ena_netmap_tx_info nm_info;
265 #endif /* DEV_NETMAP */
266 
267 	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
268 } __aligned(CACHE_LINE_SIZE);
269 
270 struct ena_rx_buffer {
271 	struct mbuf *mbuf;
272 	bus_dmamap_t map;
273 	struct ena_com_buf ena_buf;
274 #ifdef DEV_NETMAP
275 	uint32_t netmap_buf_idx;
276 #endif /* DEV_NETMAP */
277 } __aligned(CACHE_LINE_SIZE);
278 
279 struct ena_stats_tx {
280 	counter_u64_t cnt;
281 	counter_u64_t bytes;
282 	counter_u64_t prepare_ctx_err;
283 	counter_u64_t dma_mapping_err;
284 	counter_u64_t doorbells;
285 	counter_u64_t missing_tx_comp;
286 	counter_u64_t bad_req_id;
287 	counter_u64_t collapse;
288 	counter_u64_t collapse_err;
289 	counter_u64_t queue_wakeup;
290 	counter_u64_t queue_stop;
291 	counter_u64_t llq_buffer_copy;
292 	counter_u64_t unmask_interrupt_num;
293 };
294 
295 struct ena_stats_rx {
296 	counter_u64_t cnt;
297 	counter_u64_t bytes;
298 	counter_u64_t refil_partial;
299 	counter_u64_t csum_bad;
300 	counter_u64_t mjum_alloc_fail;
301 	counter_u64_t mbuf_alloc_fail;
302 	counter_u64_t dma_mapping_err;
303 	counter_u64_t bad_desc_num;
304 	counter_u64_t bad_req_id;
305 	counter_u64_t empty_rx_ring;
306 	counter_u64_t csum_good;
307 };
308 
309 struct ena_ring {
310 	/* Holds the empty requests for TX/RX out of order completions */
311 	union {
312 		uint16_t *free_tx_ids;
313 		uint16_t *free_rx_ids;
314 	};
315 	struct ena_com_dev *ena_dev;
316 	struct ena_adapter *adapter;
317 	struct ena_com_io_cq *ena_com_io_cq;
318 	struct ena_com_io_sq *ena_com_io_sq;
319 
320 	uint16_t qid;
321 
322 	/* Determines if device will use LLQ or normal mode for TX */
323 	enum ena_admin_placement_policy_type tx_mem_queue_type;
324 	union {
325 		/* The maximum length the driver can push to the device (For LLQ) */
326 		uint8_t tx_max_header_size;
327 		/* The maximum (and default) mbuf size for the Rx descriptor. */
328 		uint16_t rx_mbuf_sz;
329 
330 	};
331 
332 	bool first_interrupt;
333 	uint16_t no_interrupt_event_cnt;
334 
335 	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
336 
337 	struct ena_que *que;
338 	struct lro_ctrl lro;
339 
340 	uint16_t next_to_use;
341 	uint16_t next_to_clean;
342 
343 	union {
344 		struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
345 		struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
346 	};
347 	int ring_size; /* number of tx/rx_buffer_info's entries */
348 
349 	struct buf_ring *br; /* only for TX */
350 	uint32_t buf_ring_size;
351 
352 	struct mtx ring_mtx;
353 	char mtx_name[16];
354 
355 	struct {
356 		struct task enqueue_task;
357 		struct taskqueue *enqueue_tq;
358 	};
359 
360 	union {
361 		struct ena_stats_tx tx_stats;
362 		struct ena_stats_rx rx_stats;
363 	};
364 
365 	union {
366 		int empty_rx_queue;
367 		/* For Tx ring to indicate if it's running or not */
368 		bool running;
369 	};
370 
371 	/* How many packets are sent in one Tx loop, used for doorbells */
372 	uint32_t acum_pkts;
373 
374 	/* Used for LLQ */
375 	uint8_t *push_buf_intermediate_buf;
376 
377 #ifdef DEV_NETMAP
378 	bool initialized;
379 #endif /* DEV_NETMAP */
380 } __aligned(CACHE_LINE_SIZE);
381 
382 struct ena_stats_dev {
383 	counter_u64_t wd_expired;
384 	counter_u64_t interface_up;
385 	counter_u64_t interface_down;
386 	counter_u64_t admin_q_pause;
387 };
388 
389 struct ena_hw_stats {
390 	counter_u64_t rx_packets;
391 	counter_u64_t tx_packets;
392 
393 	counter_u64_t rx_bytes;
394 	counter_u64_t tx_bytes;
395 
396 	counter_u64_t rx_drops;
397 	counter_u64_t tx_drops;
398 };
399 
400 /* Board specific private data structure */
401 struct ena_adapter {
402 	struct ena_com_dev *ena_dev;
403 
404 	/* OS defined structs */
405 	if_t ifp;
406 	device_t pdev;
407 	struct ifmedia	media;
408 
409 	/* OS resources */
410 	struct resource *memory;
411 	struct resource *registers;
412 	struct resource *msix;
413 	int msix_rid;
414 
415 	/* MSI-X */
416 	struct msix_entry *msix_entries;
417 	int msix_vecs;
418 
419 	/* DMA tags used throughout the driver adapter for Tx and Rx */
420 	bus_dma_tag_t tx_buf_tag;
421 	bus_dma_tag_t rx_buf_tag;
422 	int dma_width;
423 
424 	uint32_t max_mtu;
425 
426 	uint32_t num_io_queues;
427 	uint32_t max_num_io_queues;
428 
429 	uint32_t requested_tx_ring_size;
430 	uint32_t requested_rx_ring_size;
431 
432 	uint32_t max_tx_ring_size;
433 	uint32_t max_rx_ring_size;
434 
435 	uint16_t max_tx_sgl_size;
436 	uint16_t max_rx_sgl_size;
437 
438 	uint32_t tx_offload_cap;
439 
440 	uint32_t buf_ring_size;
441 
442 	/* RSS*/
443 	int first_bind;
444 	struct ena_indir *rss_indir;
445 
446 	uint8_t mac_addr[ETHER_ADDR_LEN];
447 	/* mdio and phy*/
448 
449 	ena_state_t flags;
450 
451 	/* Queue will represent one TX and one RX ring */
452 	struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
453 	    __aligned(CACHE_LINE_SIZE);
454 
455 	/* TX */
456 	struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
457 	    __aligned(CACHE_LINE_SIZE);
458 
459 	/* RX */
460 	struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
461 	    __aligned(CACHE_LINE_SIZE);
462 
463 	struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
464 
465 	/* Timer service */
466 	struct callout timer_service;
467 	sbintime_t keep_alive_timestamp;
468 	uint32_t next_monitored_tx_qid;
469 	struct task reset_task;
470 	struct taskqueue *reset_tq;
471 	int wd_active;
472 	sbintime_t keep_alive_timeout;
473 	sbintime_t missing_tx_timeout;
474 	uint32_t missing_tx_max_queues;
475 	uint32_t missing_tx_threshold;
476 	bool disable_meta_caching;
477 
478 	uint16_t eni_metrics_sample_interval;
479 	uint16_t eni_metrics_sample_interval_cnt;
480 
481 	/* Statistics */
482 	struct ena_stats_dev dev_stats;
483 	struct ena_hw_stats hw_stats;
484 	struct ena_admin_eni_stats eni_metrics;
485 
486 	enum ena_regs_reset_reason_types reset_reason;
487 };
488 
489 #define	ENA_RING_MTX_LOCK(_ring)		mtx_lock(&(_ring)->ring_mtx)
490 #define	ENA_RING_MTX_TRYLOCK(_ring)		mtx_trylock(&(_ring)->ring_mtx)
491 #define	ENA_RING_MTX_UNLOCK(_ring)		mtx_unlock(&(_ring)->ring_mtx)
492 #define ENA_RING_MTX_ASSERT(_ring)		\
493 	mtx_assert(&(_ring)->ring_mtx, MA_OWNED)
494 
495 #define ENA_LOCK_INIT()					\
496 	sx_init(&ena_global_lock,	"ENA global lock")
497 #define ENA_LOCK_DESTROY()		sx_destroy(&ena_global_lock)
498 #define ENA_LOCK_LOCK()			sx_xlock(&ena_global_lock)
499 #define ENA_LOCK_UNLOCK()		sx_unlock(&ena_global_lock)
500 #define ENA_LOCK_ASSERT()		sx_assert(&ena_global_lock, SA_XLOCKED)
501 
502 #define	ENA_TIMER_INIT(_adapter)					\
503 	callout_init(&(_adapter)->timer_service, true)
504 #define	ENA_TIMER_DRAIN(_adapter)					\
505 	callout_drain(&(_adapter)->timer_service)
506 #define	ENA_TIMER_RESET(_adapter)					\
507 	callout_reset_sbt(&(_adapter)->timer_service, SBT_1S, SBT_1S,	\
508 			ena_timer_service, (void*)(_adapter), 0)
509 
510 #define clamp_t(type, _x, min, max)	min_t(type, max_t(type, _x, min), max)
511 #define clamp_val(val, lo, hi)		clamp_t(__typeof(val), val, lo, hi)
512 
513 extern struct sx ena_global_lock;
514 
515 static inline int ena_mbuf_count(struct mbuf *mbuf)
516 {
517 	int count = 1;
518 
519 	while ((mbuf = mbuf->m_next) != NULL)
520 		++count;
521 
522 	return count;
523 }
524 
525 int	ena_up(struct ena_adapter *adapter);
526 void	ena_down(struct ena_adapter *adapter);
527 int	ena_restore_device(struct ena_adapter *adapter);
528 void	ena_destroy_device(struct ena_adapter *adapter, bool graceful);
529 int	ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num);
530 int	ena_update_buf_ring_size(struct ena_adapter *adapter,
531     uint32_t new_buf_ring_size);
532 int	ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
533     uint32_t new_rx_size);
534 int	ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
535 
536 static inline void
537 ena_trigger_reset(struct ena_adapter *adapter,
538     enum ena_regs_reset_reason_types reset_reason)
539 {
540 	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
541 		adapter->reset_reason = reset_reason;
542 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
543 	}
544 }
545 
546 #endif /* !(ENA_H) */
547