xref: /illumos-gate/usr/src/uts/common/io/igc/igc.h (revision 34bbc83afbf22a6f8e504cb99d76c97c017cb5f4)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Comptuer Company
14  */
15 
16 #ifndef _IGC_H
17 #define	_IGC_H
18 
19 /*
20  * Primary illumos igc(4D) header file.
21  */
22 
23 #include <sys/types.h>
24 #include <sys/mac_provider.h>
25 #include <sys/mac_ether.h>
26 #include <sys/vlan.h>
27 #include <sys/dlpi.h>
28 #include <sys/pattr.h>
29 #include <sys/list.h>
30 
31 #include <core/igc_hw.h>
32 #include <core/igc_api.h>
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 
38 /*
39  * The name of our module for MAC, kstats, etc.
40  */
41 #define	IGC_MOD_NAME	"igc"
42 
43 /*
44  * The igc hardware appears to use BAR 0, which is regs[1].
45  */
46 #define	IGC_PCI_BAR	1
47 
48 /*
49  * Maximum number of RX and TX rings that it appears the hardware supports. The
50  * strict maximum segment size that the device can take is basically 9 KiB
51  * (9216). However, we limit this to 9k so we don't have to worry about the
52  * margin or related bits. The I225/6 datasheet that we have access to doesn't
53  * explicitly state the maximum MTU. Various drivers and the I210 (which has a
54  * rather similar MAC) do have similar values. Our assumption is that this
55  * allows for us to still receive VLAN tagged packets and that we can set the
56  * margin appropriately for mac.
57  */
58 #define	IGC_MAX_RX_RINGS_I225	4
59 #define	IGC_MAX_TX_RINGS_I225	4
60 #define	IGC_MAX_MTU_I225	9216
61 
62 /*
63  * These are the default auto-negotiation values the device supports which is
64  * 10/100 Half and Full duplex and then 1000/2500 full duplex.
65  */
66 #define	IGC_DEFAULT_ADV	IGC_ALL_SPEED_DUPLEX_2500
67 
68 /*
69  * This is the default PAUSE frame time that we use. This value comes from
70  * igb/e1000g and is 858 usec.
71  */
72 #define	IGC_FC_PAUSE_TIME	0x0680
73 
74 /*
75  * Default values for ring sizes and related. We'll let an interrupt drain up to
76  * half the ring by default. These are all things that could reasonably be made
77  * into dladm private properties of the driver. We picked the 256 byte bind
78  * threshold for rx mostly by surveying others. For tx, we picked 512 as that's
79  * what igb, ixgbe, and e1000g use today, though i40e and qede use 256. These
80  * numbers are pretty arbitrary.
81  */
82 #define	IGC_DEF_RX_RING_SIZE	512
83 #define	IGC_DEF_TX_RING_SIZE	512
84 #define	IGC_DEF_RX_RING_INTR_LIMIT	256
85 #define	IGC_DEF_RX_BIND		256
86 #define	IGC_DEF_TX_BIND		512
87 
88 /*
89  * These numbers deal with the tx ring, blocking, recycling, and notification
90  * thresholds. The first thing we need to pick is how many descriptors we
91  * require before we tell MAC that the ring is blocked.  This number is picked
92  * somewhat arbitrarily.  Because we could always fall back to a copy, this
93  * could be as small as 2 (context and related) descriptors; however, the driver
94  * can chain a fair bit together so we basically chose 4, which is a bit less
95  * than 1% of the default ring size.  We picked a default recycle threshold
96  * check during tx of 32, which is about 6.25% of the default ring size.
97  *
98  * We opt to keep a two descriptor gap as that's what igb has always done and
99  * other drivers we've surveyed do the same.
100  */
101 #define	IGC_DEF_TX_NOTIFY_MIN	4
102 #define	IGC_DEF_TX_RECYCLE_MIN	32
103 #define	IGC_DEF_TX_GAP		2
104 
105 /*
106  * This is the maximum number of cookies that we'll use in a transmit. This
107  * number has been used across the igb/e1000g drivers over the years and comes
108  * from the idea of taking a maximum sized LSO packet (64 KiB) plus its header
109  * data, and dividing that by a 4 KiB page size, plus an extra descriptor in
110  * case things end up split across pages.
111  */
112 #define	IGC_MAX_TX_COOKIES	18
113 
114 /*
115  * Extra alignment that we use to offset RX buffers so that way IP's header is
116  * 4-byte aligned.
117  */
118 #define	IGC_RX_BUF_IP_ALIGN	2
119 
120 /*
121  * The buffer sizes that hardware uses for rx and tx are required to be 1 KiB
122  * aligned.
123  */
124 #define	IGC_BUF_ALIGN	0x400
125 
126 /*
127  * This value is used to indicate that we're grabbing the ring from the
128  * interrupt and therefore should only take a single pass.
129  */
130 #define	IGC_RX_POLL_INTR	-1
131 
132 /*
133  * This is a value in microseconds that hardware will guarantee as a gap between
134  * interrupts. This value is just a borrowed default from other drivers.
135  */
136 #define	IGC_DEF_EITR	200
137 
138 /*
139  * Because we never use the offset and address for syncing, we want to cast the
140  * DMA sync call to void, but lets be paranoid on debug.
141  */
142 #ifdef	DEBUG
143 #define	IGC_DMA_SYNC(buf, flag)		ASSERT0(ddi_dma_sync((buf)->idb_hdl, \
144 					    0, 0, flag))
145 #else
146 #define	IGC_DMA_SYNC(buf, flag)		(void) ddi_dma_sync((buf)->idb_hdl, \
147 					    0, 0, flag)
148 #endif	/* DEBUG */
149 
150 typedef enum igc_attach {
151 	IGC_ATTACH_REGS		= 1 << 0,
152 	IGC_ATTACH_INTR_ALLOC	= 1 << 1,
153 	IGC_ATTACH_MUTEX	= 1 << 2,
154 	IGC_ATTACH_INTR_HANDLER	= 1 << 3,
155 	IGC_ATTACH_LED		= 1 << 4,
156 	IGC_ATTACH_STATS	= 1 << 5,
157 	IGC_ATTACH_MAC		= 1 << 6,
158 	IGC_ATTACH_INTR_EN	= 1 << 7,
159 	/*
160 	 * The rest of these represent state that is allocated and transformed
161 	 * after the device's mc_start(9E) entry point, igc_m_start(), is called
162 	 * by MAC.
163 	 */
164 	IGC_ATTACH_MAC_START	= 1 << 8,
165 	IGC_ATTACH_RX_DATA	= 1 << 9,
166 	IGC_ATTACH_TX_DATA	= 1 << 10
167 } igc_attach_t;
168 
169 /*
170  * Hardware-specific limits.
171  */
172 typedef struct igc_limits {
173 	uint32_t il_max_rx_rings;
174 	uint32_t il_max_tx_rings;
175 	uint32_t il_max_mtu;
176 } igc_limits_t;
177 
178 typedef struct igc_dma_buffer {
179 	caddr_t idb_va;
180 	ddi_acc_handle_t idb_acc;
181 	ddi_dma_handle_t idb_hdl;
182 	size_t idb_size;
183 	size_t idb_alloc_len;
184 } igc_dma_buffer_t;
185 
186 typedef struct igc_rx_buffer {
187 	struct igc_rx_ring *irb_ring;
188 	mblk_t *irb_mp;
189 	igc_dma_buffer_t irb_dma;
190 	frtn_t irb_free_rtn;
191 	bool irb_loaned;
192 } igc_rx_buffer_t;
193 
194 typedef enum igc_rx_ring_flags {
195 	/*
196 	 * Indicates we're currently polling and therefore shouldn't process an
197 	 * interrupt in case we're racing.
198 	 */
199 	IGC_RXR_F_POLL	= 1 << 0
200 } igc_rx_ring_flags_t;
201 
202 typedef struct igc_rx_stats {
203 	kstat_named_t irs_rbytes;
204 	kstat_named_t irs_ipackets;
205 	kstat_named_t irs_desc_error;
206 	kstat_named_t irs_copy_nomem;
207 	kstat_named_t irs_bind_nobuf;
208 	kstat_named_t irs_bind_nomp;
209 	kstat_named_t irs_nbind;
210 	kstat_named_t irs_ncopy;
211 	kstat_named_t irs_ixsm;
212 	kstat_named_t irs_l3cksum_err;
213 	kstat_named_t irs_l4cksum_err;
214 	kstat_named_t irs_hcksum_miss;
215 	kstat_named_t irs_hcksum_hit;
216 } igc_rx_stats_t;
217 
218 typedef struct igc_rx_ring {
219 	struct igc *irr_igc;
220 	igc_rx_ring_flags_t irr_flags;
221 	/*
222 	 * The ring's index on the device and the corresponding index that
223 	 * should be used for manipulating it in the EIMS, which generally is
224 	 * just which single MSI-X it has.
225 	 */
226 	uint32_t irr_idx;
227 	uint32_t irr_intr_idx;
228 	mac_ring_handle_t irr_rh;
229 	kmutex_t irr_lock;
230 
231 	/*
232 	 * Stats for the ring, along with the current mac generation, which is
233 	 * needed for receiving data.
234 	 */
235 	uint64_t irr_gen;
236 	igc_rx_stats_t irr_stat;
237 	kstat_t *irr_kstat;
238 
239 	/*
240 	 * Data for the rx descriptor ring itself.
241 	 */
242 	igc_dma_buffer_t irr_desc_dma;
243 	union igc_adv_rx_desc *irr_ring;
244 	uint32_t irr_next;
245 
246 	/*
247 	 * RX descriptors and related. The arena contains every allocated rx
248 	 * buffer. The rx buffers are split between the work list and the free
249 	 * list. The work list is 1:1 mapped to the descriptor ring. The free
250 	 * list contains extra buffers. The total number of buffers is static
251 	 * and is set to igc_rx_nbuf. igc_rx_ndesc go into the work list and
252 	 * then the remaining ones are in the free list.
253 	 */
254 	igc_rx_buffer_t *irr_arena;
255 	igc_rx_buffer_t **irr_work_list;
256 	igc_rx_buffer_t **irr_free_list;
257 	kmutex_t irr_free_lock;
258 	kcondvar_t irr_free_cv;
259 	uint32_t irr_nfree;
260 } igc_rx_ring_t;
261 
262 typedef struct igc_tx_buffer {
263 	list_node_t itb_node;
264 	mblk_t *itb_mp;
265 	igc_dma_buffer_t itb_dma;
266 	ddi_dma_handle_t itb_bind_hdl;
267 	/*
268 	 * This flag indicates that this is the first tx buffer for a packet and
269 	 * therefore its last descriptor for the packet is valid. See 'TX Data
270 	 * Path Design' in the theory statement for more information.
271 	 */
272 	bool itb_first;
273 	/*
274 	 * When set to true this tx buffer is being used to represent DMA
275 	 * binding. Othewrise, it's being used to represent copying.
276 	 */
277 	bool itb_bind;
278 	/*
279 	 * This indicates the last descriptor used for an entire packet and
280 	 * therefore what we will garbage collect.
281 	 */
282 	uint32_t itb_last_desc;
283 	/*
284 	 * This tracks how much data is currently valid in the buffer.
285 	 */
286 	size_t itb_len;
287 } igc_tx_buffer_t;
288 
289 /*
290  * This represents data that we have saved and goes into the tx context
291  * descriptor. If the information has changed, then we likely need to reset the
292  * context descriptor.
293  */
294 typedef struct igc_tx_context_data {
295 	uint8_t itc_l2hlen;
296 	uint8_t itc_l3hlen;
297 	uint8_t itc_l4hlen;
298 	uint8_t itc_l4proto;
299 	uint16_t itc_l3proto;
300 	uint32_t itc_mss;
301 	uint32_t itc_cksum;
302 	uint32_t itc_lso;
303 } igc_tx_context_data_t;
304 
305 typedef struct igc_tx_stats {
306 	kstat_named_t its_obytes;
307 	kstat_named_t its_opackets;
308 	kstat_named_t its_bad_meo;
309 	kstat_named_t its_ring_full;
310 	kstat_named_t its_no_tx_bufs;
311 	kstat_named_t its_tx_copy;
312 	kstat_named_t its_tx_bind;
313 	kstat_named_t its_tx_bind_fail;
314 } igc_tx_stats_t;
315 
316 typedef struct igc_tx_ring {
317 	struct igc *itr_igc;
318 	uint32_t itr_idx;
319 	uint32_t itr_intr_idx;
320 	mac_ring_handle_t itr_rh;
321 	kmutex_t itr_lock;
322 
323 	/*
324 	 * Stats for the ring.
325 	 */
326 	igc_tx_stats_t itr_stat;
327 	kstat_t *itr_kstat;
328 
329 	/*
330 	 * Data for the TX descriptors.
331 	 */
332 	igc_dma_buffer_t itr_desc_dma;
333 	union igc_adv_tx_desc *itr_ring;
334 	uint32_t itr_ring_head;
335 	uint32_t itr_ring_tail;
336 	uint32_t itr_ring_free;
337 	bool itr_mac_blocked;
338 	bool itr_recycle;
339 	igc_tx_context_data_t itr_tx_ctx;
340 
341 	/*
342 	 * Transmit Buffers
343 	 */
344 	igc_tx_buffer_t *itr_arena;
345 	igc_tx_buffer_t **itr_work_list;
346 	list_t itr_free_list;
347 
348 } igc_tx_ring_t;
349 
350 typedef struct igc_addr {
351 	uint8_t ia_mac[ETHERADDRL];
352 	bool ia_valid;
353 } igc_addr_t;
354 
355 /*
356  * Running counters that are used for MAC. These are named after the
357  * corresponding hardware registers.
358  */
359 typedef struct igc_stats {
360 	kstat_named_t is_crcerrs;
361 	kstat_named_t is_algnerrc;
362 	kstat_named_t is_mpc;
363 	kstat_named_t is_scc;
364 	kstat_named_t is_ecol;
365 	kstat_named_t is_mcc;
366 	kstat_named_t is_latecol;
367 	kstat_named_t is_colc;
368 	kstat_named_t is_rerc;
369 	kstat_named_t is_dc;
370 	kstat_named_t is_tncrs;
371 	kstat_named_t is_htdpmc;
372 	kstat_named_t is_rlec;
373 	kstat_named_t is_xonrxc;
374 	kstat_named_t is_xontxc;
375 	kstat_named_t is_xoffrxc;
376 	kstat_named_t is_xofftxc;
377 	kstat_named_t is_fcruc;
378 	kstat_named_t is_prc64;
379 	kstat_named_t is_prc127;
380 	kstat_named_t is_prc255;
381 	kstat_named_t is_prc1023;
382 	kstat_named_t is_prc1522;
383 	kstat_named_t is_gprc;
384 	kstat_named_t is_bprc;
385 	kstat_named_t is_mprc;
386 	kstat_named_t is_gptc;
387 	kstat_named_t is_gorc;
388 	kstat_named_t is_gotc;
389 	kstat_named_t is_rnbc;
390 	kstat_named_t is_ruc;
391 	kstat_named_t is_rfc;
392 	kstat_named_t is_roc;
393 	kstat_named_t is_rjc;
394 	kstat_named_t is_mgtprc;
395 	kstat_named_t is_mgtpdc;
396 	kstat_named_t is_mgtptc;
397 	kstat_named_t is_tor;
398 	kstat_named_t is_tot;
399 	kstat_named_t is_tpr;
400 	kstat_named_t is_tpt;
401 	kstat_named_t is_ptc64;
402 	kstat_named_t is_ptc127;
403 	kstat_named_t is_ptc255;
404 	kstat_named_t is_ptc511;
405 	kstat_named_t is_ptc1023;
406 	kstat_named_t is_ptc1522;
407 	kstat_named_t is_mptc;
408 	kstat_named_t is_bptc;
409 	kstat_named_t is_tsctc;
410 	kstat_named_t is_iac;
411 	kstat_named_t is_rxdmtc;
412 } igc_stats_t;
413 
414 typedef struct igc {
415 	dev_info_t *igc_dip;
416 	igc_attach_t igc_attach;
417 	/*
418 	 * Register access settings.
419 	 */
420 	ddi_acc_handle_t igc_cfgspace;
421 	caddr_t igc_regs_base;
422 	off_t igc_regs_size;
423 	ddi_acc_handle_t igc_regs_hdl;
424 	/*
425 	 * Interrupt Management
426 	 */
427 	uint_t igc_intr_pri;
428 	int igc_intr_cap;
429 	uint_t igc_intr_type;
430 	size_t igc_intr_size;
431 	int igc_nintrs;
432 	ddi_intr_handle_t *igc_intr_handles;
433 	uint32_t igc_eims;
434 	/*
435 	 * Common code structures.
436 	 */
437 	struct igc_hw igc_hw;
438 	/*
439 	 * Limits and device-specific data. All data in this section after the
440 	 * igc_lock is protected by it.
441 	 */
442 	igc_limits_t igc_limits;
443 	uint32_t igc_nrx_rings;
444 	uint32_t igc_ntx_rings;
445 	uint32_t igc_rx_ndesc;
446 	uint32_t igc_tx_ndesc;
447 	uint32_t igc_rx_nbuf;
448 	uint32_t igc_tx_nbuf;
449 	uint32_t igc_rx_nfree;
450 	uint32_t igc_rx_intr_nframes;
451 	uint32_t igc_rx_bind_thresh;
452 	uint32_t igc_tx_bind_thresh;
453 	uint32_t igc_tx_notify_thresh;
454 	uint32_t igc_tx_recycle_thresh;
455 	uint32_t igc_tx_gap;
456 	uint32_t igc_eitr;
457 
458 	kmutex_t igc_lock;
459 	uint32_t igc_mtu;
460 	uint32_t igc_max_frame;
461 	uint32_t igc_rx_buf_size;
462 	uint32_t igc_tx_buf_size;
463 	uint16_t igc_nucast;
464 	uint16_t igc_nmcast;
465 	igc_addr_t *igc_ucast;
466 	igc_addr_t *igc_mcast;
467 	ether_addr_t *igc_mcast_raw;
468 	link_state_t igc_link_state;
469 	link_duplex_t igc_link_duplex;
470 	uint16_t igc_link_speed;
471 	mac_led_mode_t igc_led_mode;
472 	bool igc_promisc;
473 
474 	/*
475 	 * Ring structures.
476 	 */
477 	igc_rx_ring_t *igc_rx_rings;
478 	igc_tx_ring_t *igc_tx_rings;
479 
480 	/*
481 	 * GLDv3 glue
482 	 */
483 	mac_handle_t igc_mac_hdl;
484 	mac_group_handle_t igc_rxg_hdl;
485 
486 	/*
487 	 * LED register values.
488 	 */
489 	uint32_t igc_ledctl;
490 	uint32_t igc_ledctl_on;
491 	uint32_t igc_ledctl_off;
492 	uint32_t igc_ledctl_blink;
493 
494 	/*
495 	 * Stats
496 	 */
497 	kstat_t *igc_ksp;
498 	igc_stats_t igc_stats;
499 
500 	/*
501 	 * PHY Information
502 	 */
503 	uint16_t igc_phy_ctrl;
504 	uint16_t igc_phy_status;
505 	uint16_t igc_phy_an_adv;
506 	uint16_t igc_phy_an_exp;
507 	uint16_t igc_phy_lp;
508 	uint16_t igc_phy_1000t_ctrl;
509 	uint16_t igc_phy_1000t_status;
510 	uint16_t igc_phy_ext_status;
511 	uint16_t igc_phy_mmd_ctrl;
512 	uint16_t igc_phy_mmd_sts;
513 } igc_t;
514 
515 /*
516  * Register read and write functions.
517  */
518 extern uint32_t igc_read32(igc_t *igc, uint32_t);
519 extern void igc_write32(igc_t *igc, uint32_t, uint32_t);
520 
521 /*
522  * Misc. functions related to updating and initializing hardware state.
523  */
524 extern void igc_hw_buf_update(igc_t *);
525 extern bool igc_hw_common_init(igc_t *);
526 extern void igc_multicast_sync(igc_t *);
527 extern void igc_hw_intr_enable(igc_t *igc);
528 extern void igc_hw_intr_disable(igc_t *igc);
529 
530 /*
531  * Buffer, data allocation, and rings.
532  */
533 extern bool igc_rx_data_alloc(igc_t *);
534 extern void igc_rx_data_free(igc_t *);
535 extern void igc_rx_hw_init(igc_t *);
536 extern mblk_t *igc_ring_rx(igc_rx_ring_t *, int);
537 extern void igc_rx_drain(igc_t *);
538 extern mblk_t *igc_ring_tx(void *, mblk_t *);
539 extern void igc_tx_recycle(igc_t *, igc_tx_ring_t *);
540 
541 extern bool igc_tx_data_alloc(igc_t *);
542 extern void igc_tx_data_free(igc_t *);
543 extern void igc_tx_hw_init(igc_t *);
544 
545 /*
546  * Stats related functions.
547  */
548 extern bool igc_stats_init(igc_t *);
549 extern void igc_stats_fini(igc_t *);
550 extern bool igc_rx_ring_stats_init(igc_t *, igc_rx_ring_t *);
551 extern void igc_rx_ring_stats_fini(igc_rx_ring_t *);
552 extern bool igc_tx_ring_stats_init(igc_t *, igc_tx_ring_t *);
553 extern void igc_tx_ring_stats_fini(igc_tx_ring_t *);
554 extern void igc_stats_update_u64(igc_t *, kstat_named_t *, uint32_t);
555 
556 /*
557  * MAC registration related APIs.
558  */
559 extern bool igc_mac_register(igc_t *);
560 
561 #ifdef __cplusplus
562 }
563 #endif
564 
565 #endif /* _IGC_H */
566