xref: /illumos-gate/usr/src/uts/common/io/ena/ena.h (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #ifndef	_ENA_H
17 #define	_ENA_H
18 
19 #include <sys/stdbool.h>
20 #include <sys/ddi.h>
21 #include <sys/sunddi.h>
22 #include <sys/types.h>
23 #include <sys/atomic.h>
24 #include <sys/list.h>
25 #include <sys/time.h>
26 #include <sys/modctl.h>
27 #include <sys/conf.h>
28 #include <sys/cpuvar.h>
29 #include <sys/pci.h>
30 #include <sys/sysmacros.h>
31 #include <sys/mac.h>
32 #include <sys/mac_ether.h>
33 #include <sys/mac_provider.h>
34 #include <sys/pattr.h>
35 #include <sys/strsun.h>
36 #include <sys/ethernet.h>
37 #include <sys/vlan.h>
38 #include <sys/utsname.h>
39 #include "ena_hw.h"
40 
41 /*
42  * AWS ENA Ethernet Driver
43  */
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define	ENA_MODULE_NAME	"ena"
50 
51 /*
52  * The minimum supported ENA device controller version.
53  */
54 #define	ENA_CTRL_MAJOR_VSN_MIN		0
55 #define	ENA_CTRL_MINOR_VSN_MIN		0
56 #define	ENA_CTRL_SUBMINOR_VSN_MIN	1
57 
58 #define	ENA_MODULE_VER_MAJOR	1
59 #define	ENA_MODULE_VER_MINOR	0
60 #define	ENA_MODULE_VER_SUBMINOR	0
61 
62 /*
63  * The Linux driver doesn't document what the specification version
64  * number controls or the contract around version changes. The best we
65  * can do is use the same version that they use and port version
66  * changes as they come (the last one was in 2018).
67  *
68  * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR}
69  */
70 #define	ENA_SPEC_VERSION_MAJOR	2
71 #define	ENA_SPEC_VERSION_MINOR	0
72 
73 
74 /* This represents BAR 0. */
75 #define	ENA_REG_NUMBER	1
76 
77 /*
78  * A sentinel value passed as argument to ena_ring_rx() to indicate
79  * the Rx ring is being read in interrupt mode, not polling mode.
80  */
81 #define	ENA_INTERRUPT_MODE	-1
82 
83 #define	ENA_RX_BUF_IPHDR_ALIGNMENT	2
84 #define	ENA_ADMINQ_DEPTH		32
85 #define	ENA_AENQ_NUM_DESCS		32
86 
87 /* Convert milliseconds to nanoseconds. */
88 #define	ENA_MS_TO_NS(ms)	((ms) * 1000000ul)
89 
90 /*
91  * The default amount of time we will wait for an admin command to complete,
92  * specified in nanoseconds. This can be overridden by hints received from the
93  * device. We default to half a second.
94  */
95 #define	ENA_ADMIN_CMD_DEF_TIMEOUT_NS	MSEC2NSEC(500)
96 
97 /*
98  * The interval of the watchdog timer, in nanoseconds.
99  */
100 #define	ENA_WATCHDOG_INTERVAL_NS	MSEC2NSEC(1000)
101 
102 /*
103  * The device sends a keepalive message every second. If we don't see any for
104  * a while we will trigger a device reset. Other open source drivers use
105  * 6 seconds for this value, so do we.
106  */
107 #define	ENA_DEVICE_KEEPALIVE_TIMEOUT_NS	MSEC2NSEC(6000)
108 
109 /*
110  * The number of consecutive times a TX queue needs to be seen as blocked by
111  * the watchdog timer before a reset is invoked. Since the watchdog interval
112  * is one second, this is approximately in seconds.
113  */
114 #define	ENA_TX_STALL_TIMEOUT		8
115 
116 /*
117  * In order to avoid rapidly sending basic stats requests to the controller, we
118  * impose a limit of one request every 10ms.
119  */
120 #define	ENA_BASIC_STATS_MINIMUM_INTERVAL_NS	MSEC2NSEC(10);
121 
122 /*
123  * Property macros.
124  */
125 #define	ENA_PROP_RXQ_NUM_DESCS	"rx_queue_num_descs"
126 #define	ENA_PROP_RXQ_NUM_DESCS_MIN	64
127 
128 #define	ENA_PROP_TXQ_NUM_DESCS	"tx_queue_num_descs"
129 #define	ENA_PROP_TXQ_NUM_DESCS_MIN	64
130 
131 #define	ENA_PROP_RXQ_INTR_LIMIT	"rx_queue_intr_limit"
132 #define	ENA_PROP_RXQ_INTR_LIMIT_MIN	16
133 #define	ENA_PROP_RXQ_INTR_LIMIT_MAX	4096
134 #define	ENA_PROP_RXQ_INTR_LIMIT_DEF	256
135 
136 #define	ENA_DMA_BIT_MASK(x)	((1ULL << (x)) - 1ULL)
137 #define	ENA_DMA_VERIFY_ADDR(ena, phys_addr)				\
138 	VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \
139 	    ==, (phys_addr))
140 
141 typedef struct ena_dma_conf {
142 	size_t		edc_size;
143 	uint64_t	edc_align;
144 	int		edc_sgl;
145 	uchar_t		edc_endian;
146 	bool		edc_stream;
147 } ena_dma_conf_t;
148 
149 typedef struct ena_dma_buf {
150 	caddr_t			edb_va;
151 	size_t			edb_len;
152 	/*
153 	 * The length given by DMA engine, kept around for debugging
154 	 * purposes.
155 	 */
156 	size_t			edb_real_len;
157 	size_t			edb_used_len;
158 	ddi_acc_handle_t	edb_acc_hdl;
159 	ddi_dma_handle_t	edb_dma_hdl;
160 	const ddi_dma_cookie_t	*edb_cookie;
161 } ena_dma_buf_t;
162 
163 /*
164  * We always sync the entire range, and therefore expect success.
165  */
166 #ifdef DEBUG
167 #define	ENA_DMA_SYNC(buf, flag)					\
168 	ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
169 #else  /* DEBUG */
170 #define	ENA_DMA_SYNC(buf, flag)					\
171 	((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
172 #endif
173 
174 typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc);
175 
176 typedef struct ena_aenq {
177 	enahw_aenq_desc_t	*eaenq_descs;
178 	ena_dma_buf_t		eaenq_dma;
179 	ena_aenq_hdlr_t		eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM];
180 	uint16_t		eaenq_num_descs;
181 	uint16_t		eaenq_head;
182 	uint8_t			eaenq_phase;
183 } ena_aenq_t;
184 
185 typedef struct ena_admin_sq {
186 	enahw_cmd_desc_t	*eas_entries;
187 	ena_dma_buf_t		eas_dma;
188 	uint32_t		*eas_dbaddr;
189 	uint16_t		eas_tail;
190 	uint8_t			eas_phase;
191 } ena_admin_sq_t;
192 
193 typedef struct ena_admin_cq {
194 	enahw_resp_desc_t	*eac_entries;
195 	ena_dma_buf_t		eac_dma;
196 	uint16_t		eac_head;
197 	uint8_t			eac_phase;
198 } ena_admin_cq_t;
199 
200 /*
201  * The command context is used to track outstanding requests and match
202  * them to device responses.
203  */
204 typedef struct ena_cmd_ctx {
205 	list_node_t		ectx_node;
206 
207 	/*
208 	 * The index into ea_cmd_ctxs where this ctx lives. Used as
209 	 * the command ID value in the command descriptor. This allows
210 	 * us to match a response to its associated context.
211 	 */
212 	uint16_t		ectx_id;
213 
214 	/* Is the command pending? */
215 	bool			ectx_pending;
216 
217 	/* The type of command associated with this context. */
218 	enahw_cmd_opcode_t	ectx_cmd_opcode;
219 
220 	/*
221 	 * The location to copy the full response to. This is
222 	 * specified by the caller of the command during
223 	 * submission.
224 	 */
225 	enahw_resp_desc_t	*ectx_resp;
226 } ena_cmd_ctx_t;
227 
228 /*
229  * The admin queue, the queue through which commands are sent to the
230  * device.
231  *
232  * WO: Write Once (at initialization)
233  *
234  * In general, only a single lock needs to be held in order to access
235  * the different parts of the admin queue:
236  *
237  *  sq_lock: Any data dealing with submitting admin commands, which
238  *  includes acquiring a command context.
239  *
240  *  cq_lock: Any data dealing with reading command responses.
241  *
242  *  stat_lock: For accessing statistics.
243  *
244  * In some cases, the ectx_lock/stat_lock may be held in tandem with
245  * either the SQ or CQ lock. In that case, the SQ/CQ lock is always
246  * entered first.
247  */
248 typedef struct ena_adminq {
249 	kmutex_t		ea_sq_lock;	/* WO */
250 	kmutex_t		ea_cq_lock;	/* WO */
251 	kmutex_t		ea_stat_lock;	/* WO */
252 
253 	hrtime_t		ea_cmd_timeout_ns; /* WO */
254 
255 	uint16_t		ea_qlen;	/* WO */
256 	bool			ea_poll_mode;	/* WO */
257 
258 	ena_cmd_ctx_t		*ea_cmd_ctxs;	  /* WO */
259 	list_t			ea_cmd_ctxs_free; /* ea_sq_lock */
260 	list_t			ea_cmd_ctxs_used; /* ea_sq_lock */
261 	uint16_t		ea_pending_cmds; /* ea_sq_lock */
262 	ena_admin_sq_t		ea_sq; /* eq_sq_lock */
263 	ena_admin_cq_t		ea_cq; /* eq_cq_lock */
264 
265 	/* ea_stat_lock */
266 	struct ena_adminq_stats {
267 		uint64_t cmds_fail;
268 		uint64_t cmds_submitted;
269 		uint64_t cmds_success;
270 		uint64_t queue_full;
271 	} ea_stats;
272 } ena_adminq_t;
273 
274 /*
275  * Cache of the last set of value hints received from the device. See the
276  * definition of ehahw_device_hints_t in ena_hw.h for more detail on the
277  * purpose of each.
278  */
279 typedef struct ena_hints {
280 	uint16_t		eh_mmio_read_timeout;
281 	uint16_t		eh_keep_alive_timeout;
282 	uint16_t		eh_tx_comp_timeout;
283 	uint16_t		eh_missed_tx_reset_threshold;
284 	uint16_t		eh_admin_comp_timeout;
285 	uint16_t		eh_max_tx_sgl;
286 	uint16_t		eh_max_rx_sgl;
287 } ena_hints_t;
288 
289 typedef enum ena_attach_seq {
290 	ENA_ATTACH_PCI = 1,	 /* PCI config space */
291 	ENA_ATTACH_REGS,	 /* BAR mapping */
292 	ENA_ATTACH_DEV_INIT,	 /* ENA device initialization */
293 	ENA_ATTACH_READ_CONF,	 /* Read driver conf file */
294 	ENA_ATTACH_DEV_CFG,	 /* Set any needed device config */
295 	ENA_ATTACH_INTR_ALLOC,	 /* interrupt handles allocated */
296 	ENA_ATTACH_INTR_HDLRS,	 /* intr handlers set */
297 	ENA_ATTACH_TXQS_ALLOC,	 /* Tx Queues allocated */
298 	ENA_ATTACH_RXQS_ALLOC,	 /* Tx Queues allocated */
299 	ENA_ATTACH_MAC_REGISTER, /* registered with mac */
300 	ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */
301 	ENA_ATTACH_END
302 } ena_attach_seq_t;
303 
304 #define	ENA_ATTACH_SEQ_FIRST	(ENA_ATTACH_PCI)
305 #define	ENA_ATTACH_NUM_ENTRIES	(ENA_ATTACH_END - 1)
306 
307 struct ena;
308 typedef bool (*ena_attach_fn_t)(struct ena *);
309 typedef void (*ena_cleanup_fn_t)(struct ena *, bool);
310 
311 typedef struct ena_attach_desc {
312 	ena_attach_seq_t ead_seq;
313 	const char *ead_name;
314 	ena_attach_fn_t ead_attach_fn;
315 	bool ead_attach_hard_fail;
316 	ena_cleanup_fn_t ead_cleanup_fn;
317 } ena_attach_desc_t;
318 
319 typedef enum {
320 	ENA_TCB_NONE,
321 	ENA_TCB_COPY
322 } ena_tcb_type_t;
323 
324 /*
325  * The TCB is used to track information relating to the Tx of a
326  * packet. At the moment we support copy only.
327  */
328 typedef struct ena_tx_control_block {
329 	/*
330 	 * The index into et_tcbs where this tcb lives. Used as the request ID
331 	 * value in the Tx descriptor. This allows us to match a response to
332 	 * its associated TCB.
333 	 */
334 	uint16_t	etcb_id;
335 	mblk_t		*etcb_mp;
336 	ena_tcb_type_t	etcb_type;
337 	ena_dma_buf_t	etcb_dma;
338 } ena_tx_control_block_t;
339 
340 typedef enum ena_txq_state {
341 	ENA_TXQ_STATE_NONE		= 0,
342 	ENA_TXQ_STATE_HOST_ALLOC	= 1 << 0,
343 	ENA_TXQ_STATE_CQ_CREATED	= 1 << 1,
344 	ENA_TXQ_STATE_SQ_CREATED	= 1 << 2,
345 	ENA_TXQ_STATE_READY		= 1 << 3, /* TxQ ready and waiting */
346 	ENA_TXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
347 } ena_txq_state_t;
348 
349 typedef struct ena_txq_stat {
350 	/* Number of times mac_ether_offload_info() has failed. */
351 	kstat_named_t	ets_hck_meoifail;
352 
353 	/*
354 	 * Total number of times the ring was blocked due to
355 	 * insufficient descriptors, or unblocked due to recycling
356 	 * descriptors.
357 	 */
358 	kstat_named_t	ets_blocked;
359 	kstat_named_t	ets_unblocked;
360 
361 	/* The total number descriptors that have been recycled. */
362 	kstat_named_t	ets_recycled;
363 
364 	/*
365 	 * Number of bytes and packets that have been _submitted_ to
366 	 * the device.
367 	 */
368 	kstat_named_t	ets_bytes;
369 	kstat_named_t	ets_packets;
370 } ena_txq_stat_t;
371 
372 /*
373  * A transmit queue, made up of a Submission Queue (SQ) and Completion
374  * Queue (CQ) to form a logical descriptor ring for sending packets.
375  *
376  * Write Once (WO)
377  *
378  *   This value is written once, before the datapath is activated, in
379  *   a function which is controlled by mac(9E). Some values may be
380  *   written earlier, during ena attach, like et_ena and
381  *   et_sq_num_descs.
382  *
383  * Tx Mutex (TM) -- et_lock
384  *
385  *   This value is protected by the Tx queue's mutex. Some values may
386  *   be initialized in a WO path, but also continually updated as part
387  *   of normal datapath operation, such as et_sq_avail_descs. These
388  *   values need mutex protection.
389  */
390 typedef struct ena_txq {
391 	kmutex_t		et_lock; /* WO */
392 
393 	struct ena		*et_ena; /* WO */
394 	uint_t			et_txqs_idx; /* WO */
395 	mac_ring_handle_t	et_mrh;	 /* WO */
396 	uint64_t		et_m_gen_num; /* TM */
397 	ena_txq_state_t		et_state; /* WO */
398 	uint16_t		et_intr_vector; /* WO */
399 
400 	enahw_tx_desc_t		*et_sq_descs; /* TM */
401 	ena_dma_buf_t		et_sq_dma;    /* WO */
402 
403 	/* Is the Tx queue currently in a blocked state? */
404 	bool			et_blocked; /* TM */
405 
406 	/*
407 	 * The number of descriptors owned by this ring. This value
408 	 * never changes after initialization.
409 	 */
410 	uint16_t		et_sq_num_descs;   /* WO */
411 
412 	/*
413 	 * The number of descriptors currently available for Tx
414 	 * submission. When this value reaches zero the ring must
415 	 * block until device notifies us of freed descriptors.
416 	 */
417 	uint16_t		et_sq_avail_descs; /* TM */
418 
419 	/*
420 	 * The current tail index of the queue (the first free
421 	 * descriptor for host Tx submission). After initialization,
422 	 * this value only increments, relying on unsigned wrap
423 	 * around. The ENA device seems to expect this behavior,
424 	 * performing its own modulo on the value for the purposes of
425 	 * indexing, much like the driver code needs to do in order to
426 	 * access the proper TCB entry.
427 	 */
428 	uint16_t		et_sq_tail_idx;  /* TM */
429 
430 	/*
431 	 * The phase is used to know which CQ descriptors may be
432 	 * reclaimed. This is explained further in ena.c.
433 	 */
434 	uint16_t		et_sq_phase; /* TM */
435 	uint16_t		et_sq_hw_idx; /* WO */
436 
437 	/*
438 	 * The "doorbell" address is how the host indicates to the
439 	 * device which descriptors are ready for Tx processing.
440 	 */
441 	uint32_t		*et_sq_db_addr; /* WO */
442 
443 	/*
444 	 * The TCBs track host Tx information, like a pointer to the
445 	 * mblk being submitted. The TCBs currently available for use are
446 	 * maintained in a free list.
447 	 */
448 	ena_tx_control_block_t	*et_tcbs;    /* TM */
449 	ena_tx_control_block_t	**et_tcbs_freelist; /* TM */
450 	uint16_t		et_tcbs_freelist_size; /* TM */
451 
452 	enahw_tx_cdesc_t	*et_cq_descs; /* TM */
453 	ena_dma_buf_t		et_cq_dma;    /* WO */
454 	uint16_t		et_cq_num_descs; /* WO */
455 	uint16_t		et_cq_head_idx; /* TM */
456 	uint16_t		et_cq_phase;	/* TM */
457 	uint16_t		et_cq_hw_idx;	/* WO */
458 
459 	/*
460 	 * This address is used to control the CQ interrupts.
461 	 */
462 	uint32_t		*et_cq_unmask_addr; /* WO */
463 	uint32_t		*et_cq_numa_addr;   /* WO (currently unused) */
464 
465 	/*
466 	 * This is used to detect transmit stalls and invoke a reset. The
467 	 * watchdog increments this counter when it sees that the TX
468 	 * ring is still blocked, and if it exceeds the threshold then the
469 	 * device is assumed to have stalled and needs to be reset.
470 	 */
471 	uint32_t		et_stall_watchdog; /* TM */
472 
473 	/*
474 	 * This mutex protects the Tx queue stats. This mutex may be
475 	 * entered while et_lock is held, but et_lock is not required
476 	 * to access/modify the stats. However, if both locks are
477 	 * held, then et_lock must be entered first.
478 	 */
479 	kmutex_t		et_stat_lock;
480 	ena_txq_stat_t		et_stat;
481 	kstat_t			*et_kstat;
482 } ena_txq_t;
483 
484 typedef enum ena_rxq_state {
485 	ENA_RXQ_STATE_NONE		= 0,
486 	ENA_RXQ_STATE_HOST_ALLOC	= 1 << 0,
487 	ENA_RXQ_STATE_CQ_CREATED	= 1 << 1,
488 	ENA_RXQ_STATE_SQ_CREATED	= 1 << 2,
489 	ENA_RXQ_STATE_SQ_FILLED		= 1 << 3,
490 	ENA_RXQ_STATE_READY		= 1 << 4, /* RxQ ready and waiting */
491 	ENA_RXQ_STATE_RUNNING		= 1 << 5, /* intrs enabled */
492 } ena_rxq_state_t;
493 
494 typedef struct ena_rx_ctrl_block {
495 	ena_dma_buf_t	ercb_dma;
496 	uint8_t		ercb_offset;
497 	uint16_t	ercb_length;
498 } ena_rx_ctrl_block_t;
499 
500 typedef enum {
501 	ENA_RXQ_MODE_POLLING	= 1,
502 	ENA_RXQ_MODE_INTR	= 2,
503 } ena_rxq_mode_t;
504 
505 typedef struct ena_rxq_stat_t {
506 	/* The total number of packets/bytes received on this queue. */
507 	kstat_named_t	ers_packets;
508 	kstat_named_t	ers_bytes;
509 
510 	/*
511 	 * At this time we expect all incoming frames to fit in a
512 	 * single buffer/descriptor. In some rare event that the
513 	 * device doesn't cooperate this stat is incremented.
514 	 */
515 	kstat_named_t	ers_multi_desc;
516 
517 	/*
518 	 * The total number of times we failed to allocate a new mblk
519 	 * for an incoming frame.
520 	 */
521 	kstat_named_t	ers_allocb_fail;
522 
523 	/*
524 	 * The total number of times the Rx interrupt handler reached
525 	 * its maximum limit for number of packets to process in a
526 	 * single interrupt. If you see this number increase
527 	 * continuously at a steady rate, then it may be an indication
528 	 * the driver is not entering polling mode.
529 	 */
530 	kstat_named_t	ers_intr_limit;
531 
532 	/*
533 	 * The total number of times the device detected an incorrect
534 	 * IPv4 header checksum.
535 	 */
536 	kstat_named_t	ers_hck_ipv4_err;
537 
538 	/*
539 	 * The total number of times the device detected an incorrect
540 	 * L4/ULP checksum.
541 	 */
542 	kstat_named_t	ers_hck_l4_err;
543 } ena_rxq_stat_t;
544 
545 /*
546  * A receive queue, made up of a Submission Queue (SQ) and Completion
547  * Queue (CQ) to form a logical descriptor ring for receiving packets.
548  *
549  * Write Once (WO)
550  *
551  *   This value is written once, before the datapath is activated, in
552  *   a function which is controlled by mac(9E).
553  *
554  * Rx Mutex (RM) -- er_lock
555  *
556  *   This value is protected by the Rx queue's mutex. Some values may
557  *   be initialized in a WO path, but also continually updated as part
558  *   of normal datapath operation, such as er_sq_avail_descs. These
559  *   values need mutex protection.
560  */
561 typedef struct ena_rxq {
562 	kmutex_t		er_lock;
563 
564 	struct ena		*er_ena; /* WO */
565 	uint_t			er_rxqs_idx; /* WO */
566 	mac_ring_handle_t	er_mrh;	 /* WO */
567 	uint64_t		er_m_gen_num; /* WO */
568 	ena_rxq_state_t		er_state; /* WO */
569 	uint16_t		er_intr_vector; /* WO */
570 	ena_rxq_mode_t		er_mode;	/* RM */
571 	uint16_t		er_intr_limit;	/* RM */
572 
573 	enahw_rx_desc_t		*er_sq_descs; /* RM */
574 	ena_dma_buf_t		er_sq_dma;    /* WO */
575 	uint16_t		er_sq_num_descs;   /* WO */
576 	uint16_t		er_sq_avail_descs; /* RM */
577 	uint16_t		er_sq_tail_idx;  /* RM */
578 	uint16_t		er_sq_phase; /* RM */
579 	uint16_t		er_sq_hw_idx;	/* WO */
580 	uint32_t		*er_sq_db_addr; /* WO */
581 
582 	enahw_rx_cdesc_t	*er_cq_descs; /* RM */
583 	ena_dma_buf_t		er_cq_dma;    /* WO */
584 	uint16_t		er_cq_num_descs; /* WO */
585 	uint16_t		er_cq_head_idx;	 /* RM */
586 	uint16_t		er_cq_phase;	 /* RM */
587 	uint16_t		er_cq_hw_idx;	 /* WO */
588 	uint32_t		*er_cq_unmask_addr; /* WO */
589 	uint32_t		*er_cq_numa_addr;    /* WO (currently unused) */
590 
591 	ena_rx_ctrl_block_t	*er_rcbs; /* RM */
592 
593 	kmutex_t		er_stat_lock;
594 	ena_rxq_stat_t		er_stat;
595 	kstat_t			*er_kstat;
596 } ena_rxq_t;
597 
598 typedef struct ena_device_stat {
599 	kstat_named_t	eds_reset_forced;
600 	kstat_named_t	eds_reset_error;
601 	kstat_named_t	eds_reset_fatal;
602 	kstat_named_t	eds_reset_keepalive;
603 	kstat_named_t	eds_reset_txstall;
604 } ena_device_stat_t;
605 
606 /*
607  * These are stats based on enahw_resp_basic_stats_t and data that accompanies
608  * the asynchronous keepalive event.
609  */
610 typedef struct ena_basic_stat {
611 	kstat_named_t	ebs_tx_bytes;
612 	kstat_named_t	ebs_tx_pkts;
613 	kstat_named_t	ebs_tx_drops;
614 
615 	kstat_named_t	ebs_rx_bytes;
616 	kstat_named_t	ebs_rx_pkts;
617 	kstat_named_t	ebs_rx_drops;
618 	kstat_named_t	ebs_rx_overruns;
619 } ena_basic_stat_t;
620 
621 /* These are stats based on enahw_resp_eni_stats_t. */
622 typedef struct ena_extended_stat {
623 	kstat_named_t	ees_bw_in_exceeded;
624 	kstat_named_t	ees_bw_out_exceeded;
625 	kstat_named_t	ees_pps_exceeded;
626 	kstat_named_t	ees_conns_exceeded;
627 	kstat_named_t	ees_linklocal_exceeded;
628 } ena_extended_stat_t;
629 
630 /* These stats monitor which AENQ handlers have been called. */
631 typedef struct ena_aenq_stat {
632 	kstat_named_t	eaes_default;
633 	kstat_named_t	eaes_link_change;
634 	kstat_named_t	eaes_notification;
635 	kstat_named_t	eaes_keep_alive;
636 	kstat_named_t	eaes_request_reset;
637 	kstat_named_t	eaes_fatal_error;
638 	kstat_named_t	eaes_warning;
639 } ena_aenq_stat_t;
640 
641 #ifdef DEBUG
642 typedef struct ena_reg {
643 	const char	*er_name;
644 	const uint16_t	er_offset;
645 	uint32_t	er_value;
646 } ena_reg_t;
647 #endif
648 
649 #define	ENA_STATE_UNKNOWN	0x00u
650 #define	ENA_STATE_INITIALIZED	0x01u
651 #define	ENA_STATE_STARTED	0x02u
652 #define	ENA_STATE_ERROR		0x04u
653 #define	ENA_STATE_RESETTING	0x08u
654 
655 /*
656  * This structure contains the per-instance (PF of VF) state of the
657  * device.
658  */
659 typedef struct ena {
660 	dev_info_t		*ena_dip;
661 	int			ena_instance;
662 
663 #ifdef DEBUG
664 	/*
665 	 * In debug kernels, the registers are cached here at various points
666 	 * for easy inspection via mdb(1).
667 	 */
668 	ena_reg_t		ena_reg[ENAHW_NUM_REGS];
669 #endif
670 
671 	/*
672 	 * Global lock, used to synchronize administration changes to
673 	 * the ena_t. This lock should not be held in the datapath.
674 	 */
675 	kmutex_t		ena_lock;
676 	ena_attach_seq_t	ena_attach_seq;
677 
678 	/*
679 	 * We use atomic ops for ena_state so that datapath consumers
680 	 * do not need to enter ena_lock.
681 	 */
682 	uint32_t		ena_state;
683 
684 	/*
685 	 * The reason for the last device reset.
686 	 */
687 	enahw_reset_reason_t	ena_reset_reason;
688 
689 	/*
690 	 * Watchdog
691 	 */
692 	kmutex_t		ena_watchdog_lock;
693 	ddi_periodic_t		ena_watchdog_periodic;
694 	uint64_t		ena_watchdog_last_keepalive;
695 
696 	/*
697 	 * PCI config space and BAR handle.
698 	 */
699 	ddi_acc_handle_t	ena_pci_hdl;
700 	off_t			ena_reg_size;
701 	caddr_t			ena_reg_base;
702 	ddi_device_acc_attr_t	ena_reg_attr;
703 	ddi_acc_handle_t	ena_reg_hdl;
704 
705 	/*
706 	 * Vendor information.
707 	 */
708 	uint16_t		ena_pci_vid;
709 	uint16_t		ena_pci_did;
710 	uint8_t			ena_pci_rev;
711 	uint16_t		ena_pci_svid;
712 	uint16_t		ena_pci_sdid;
713 
714 	/*
715 	 * Device and controller versions.
716 	 */
717 	uint32_t		ena_dev_major_vsn;
718 	uint32_t		ena_dev_minor_vsn;
719 	uint32_t		ena_ctrl_major_vsn;
720 	uint32_t		ena_ctrl_minor_vsn;
721 	uint32_t		ena_ctrl_subminor_vsn;
722 	uint32_t		ena_ctrl_impl_id;
723 
724 	/*
725 	 * Interrupts
726 	 */
727 	int			ena_num_intrs;
728 	ddi_intr_handle_t	*ena_intr_handles;
729 	size_t			ena_intr_handles_sz;
730 	int			ena_intr_caps;
731 	uint_t			ena_intr_pri;
732 
733 	mac_handle_t		ena_mh;
734 
735 	size_t			ena_page_sz;
736 
737 	/*
738 	 * The MTU and data layer frame sizes.
739 	 */
740 	uint32_t		ena_mtu;
741 	uint32_t		ena_max_frame_hdr;
742 	uint32_t		ena_max_frame_total;
743 
744 	/* The size (in bytes) of the Rx/Tx data buffers. */
745 	uint32_t		ena_tx_buf_sz;
746 	uint32_t		ena_rx_buf_sz;
747 
748 	/*
749 	 * The maximum number of Scatter Gather List segments the
750 	 * device can address.
751 	 */
752 	uint8_t			ena_tx_sgl_max_sz;
753 	uint8_t			ena_rx_sgl_max_sz;
754 
755 	/* The number of descriptors per Rx/Tx queue. */
756 	uint16_t		ena_rxq_num_descs;
757 	uint16_t		ena_txq_num_descs;
758 
759 	/*
760 	 * The maximum number of frames which may be read per Rx
761 	 * interrupt.
762 	 */
763 	uint16_t		ena_rxq_intr_limit;
764 
765 	/* The Rx/Tx data queues (rings). */
766 	ena_rxq_t		*ena_rxqs;
767 	uint16_t		ena_num_rxqs;
768 	ena_txq_t		*ena_txqs;
769 	uint16_t		ena_num_txqs;
770 
771 	/* These statistics are device-wide. */
772 	kstat_t			*ena_device_kstat;
773 	ena_device_stat_t	ena_device_stat;
774 	hrtime_t		ena_device_basic_stat_last_update;
775 	kmutex_t		ena_device_basic_stat_lock;
776 	kstat_t			*ena_device_basic_kstat;
777 	kstat_t			*ena_device_extended_kstat;
778 
779 	/*
780 	 * This tracks AENQ-related stats, it is implicitly
781 	 * device-wide.
782 	 */
783 	ena_aenq_stat_t		ena_aenq_stat;
784 	kstat_t			*ena_aenq_kstat;
785 
786 	/*
787 	 * The Admin Queue, through which call device commands are
788 	 * sent.
789 	 */
790 	ena_adminq_t		ena_aq;
791 
792 	ena_aenq_t		ena_aenq;
793 	ena_dma_buf_t		ena_host_info;
794 
795 	/*
796 	 * Hardware info
797 	 */
798 	ena_hints_t		ena_device_hints;
799 	uint32_t		ena_supported_features;
800 	uint32_t		ena_capabilities;
801 	uint8_t			ena_dma_width;
802 	bool			ena_link_autoneg;
803 	link_duplex_t		ena_link_duplex;
804 	uint64_t		ena_link_speed_mbits;
805 	enahw_link_speeds_t	ena_link_speeds;
806 	link_state_t		ena_link_state;
807 	uint32_t		ena_aenq_supported_groups;
808 	uint32_t		ena_aenq_enabled_groups;
809 
810 	uint32_t		ena_tx_max_sq_num;
811 	uint32_t		ena_tx_max_sq_num_descs;
812 	uint32_t		ena_tx_max_cq_num;
813 	uint32_t		ena_tx_max_cq_num_descs;
814 	uint16_t		ena_tx_max_desc_per_pkt;
815 	uint32_t		ena_tx_max_hdr_len;
816 
817 	uint32_t		ena_rx_max_sq_num;
818 	uint32_t		ena_rx_max_sq_num_descs;
819 	uint32_t		ena_rx_max_cq_num;
820 	uint32_t		ena_rx_max_cq_num_descs;
821 	uint16_t		ena_rx_max_desc_per_pkt;
822 
823 	/* This is calculated from the Rx/Tx queue nums. */
824 	uint16_t		ena_max_io_queues;
825 
826 	/* Hardware Offloads */
827 	bool			ena_tx_l3_ipv4_csum;
828 
829 	bool			ena_tx_l4_ipv4_part_csum;
830 	bool			ena_tx_l4_ipv4_full_csum;
831 	bool			ena_tx_l4_ipv4_lso;
832 
833 	bool			ena_tx_l4_ipv6_part_csum;
834 	bool			ena_tx_l4_ipv6_full_csum;
835 	bool			ena_tx_l4_ipv6_lso;
836 
837 	bool			ena_rx_l3_ipv4_csum;
838 	bool			ena_rx_l4_ipv4_csum;
839 	bool			ena_rx_l4_ipv6_csum;
840 	bool			ena_rx_hash;
841 
842 	uint32_t		ena_max_mtu;
843 	uint8_t			ena_mac_addr[ETHERADDRL];
844 } ena_t;
845 
846 /*
847  * Misc
848  */
849 extern bool ena_reset(ena_t *, const enahw_reset_reason_t);
850 extern bool ena_is_feat_avail(ena_t *, const enahw_feature_id_t);
851 extern bool ena_is_cap_avail(ena_t *, const enahw_capability_id_t);
852 extern void ena_update_hints(ena_t *, enahw_device_hints_t *);
853 
854 /*
855  * Logging functions.
856  */
857 extern bool ena_debug;
858 extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
859 extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
860 extern void ena_panic(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
861 extern void ena_trigger_reset(ena_t *, enahw_reset_reason_t);
862 
863 /*
864  * Hardware access.
865  */
866 extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t);
867 extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *);
868 extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t);
869 extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t);
870 extern const char *enahw_reset_reason(enahw_reset_reason_t);
871 #ifdef DEBUG
872 extern void ena_init_regcache(ena_t *);
873 extern void ena_update_regcache(ena_t *);
874 #else
875 #define	ena_init_regcache(x)
876 #define	ena_update_regcache(x)
877 #endif
878 
879 /*
880  * Watchdog
881  */
882 extern void ena_enable_watchdog(ena_t *);
883 extern void ena_disable_watchdog(ena_t *);
884 
885 /*
886  * Stats
887  */
888 extern void ena_stat_device_cleanup(ena_t *);
889 extern bool ena_stat_device_init(ena_t *);
890 
891 extern void ena_stat_device_basic_cleanup(ena_t *);
892 extern bool ena_stat_device_basic_init(ena_t *);
893 
894 extern void ena_stat_device_extended_cleanup(ena_t *);
895 extern bool ena_stat_device_extended_init(ena_t *);
896 
897 extern void ena_stat_aenq_cleanup(ena_t *);
898 extern bool ena_stat_aenq_init(ena_t *);
899 
900 extern void ena_stat_rxq_cleanup(ena_rxq_t *);
901 extern bool ena_stat_rxq_init(ena_rxq_t *);
902 extern void ena_stat_txq_cleanup(ena_txq_t *);
903 extern bool ena_stat_txq_init(ena_txq_t *);
904 
905 /*
906  * DMA
907  */
908 extern bool ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *,
909     size_t);
910 extern void ena_dma_free(ena_dma_buf_t *);
911 extern void ena_dma_bzero(ena_dma_buf_t *);
912 extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *);
913 extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *,
914     uint16_t *);
915 
916 /*
917  * Interrupts
918  */
919 extern bool ena_intr_add_handlers(ena_t *);
920 extern void ena_intr_remove_handlers(ena_t *, bool);
921 extern void ena_tx_intr_work(ena_txq_t *);
922 extern void ena_rx_intr_work(ena_rxq_t *);
923 extern bool ena_intrs_disable(ena_t *);
924 extern bool ena_intrs_enable(ena_t *);
925 
926 /*
927  * MAC
928  */
929 extern bool ena_mac_register(ena_t *);
930 extern int ena_mac_unregister(ena_t *);
931 extern void ena_ring_tx_stop(mac_ring_driver_t);
932 extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t);
933 extern mblk_t *ena_ring_tx(void *, mblk_t *);
934 extern void ena_ring_rx_stop(mac_ring_driver_t);
935 extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num);
936 extern int ena_m_stat(void *, uint_t, uint64_t *);
937 extern mblk_t *ena_ring_rx_poll(void *, int);
938 extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *);
939 extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *);
940 
941 /*
942  * Admin API
943  */
944 extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *,
945     enahw_resp_desc_t *, ena_cmd_ctx_t **);
946 extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *);
947 extern void ena_free_host_info(ena_t *);
948 extern bool ena_init_host_info(ena_t *);
949 extern void ena_create_cmd_ctx(ena_t *);
950 extern void ena_release_all_cmd_ctx(ena_t *);
951 extern int ena_create_cq(ena_t *, uint16_t, uint64_t, bool, uint32_t,
952     uint16_t *, uint32_t **, uint32_t **);
953 extern int ena_destroy_cq(ena_t *, uint16_t);
954 extern int ena_create_sq(ena_t *, uint16_t, uint64_t, bool, uint16_t,
955     uint16_t *, uint32_t **);
956 extern int ena_destroy_sq(ena_t *, uint16_t, bool);
957 extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *,
958     enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t);
959 extern int ena_get_feature(ena_t *, enahw_resp_desc_t *,
960     const enahw_feature_id_t, const uint8_t);
961 extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *);
962 extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *);
963 extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t);
964 
965 /*
966  * Async event queue
967  */
968 extern bool ena_aenq_init(ena_t *);
969 extern bool ena_aenq_configure(ena_t *);
970 extern void ena_aenq_enable(ena_t *);
971 extern void ena_aenq_work(ena_t *);
972 extern void ena_aenq_free(ena_t *);
973 
974 /*
975  * Rx/Tx allocations
976  */
977 extern bool ena_alloc_rxq(ena_rxq_t *);
978 extern void ena_cleanup_rxq(ena_rxq_t *, bool);
979 extern bool ena_alloc_txq(ena_txq_t *);
980 extern void ena_cleanup_txq(ena_txq_t *, bool);
981 
982 #ifdef __cplusplus
983 }
984 #endif
985 
986 #endif	/* _ENA_H */
987