/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2024 Oxide Comptuer Company */ #ifndef _IGC_H #define _IGC_H /* * Primary illumos igc(4D) header file. */ #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * The name of our module for MAC, kstats, etc. */ #define IGC_MOD_NAME "igc" /* * The igc hardware appears to use BAR 0, which is regs[1]. */ #define IGC_PCI_BAR 1 /* * Maximum number of RX and TX rings that it appears the hardware supports. The * strict maximum segment size that the device can take is basically 9 KiB * (9216). However, we limit this to 9k so we don't have to worry about the * margin or related bits. The I225/6 datasheet that we have access to doesn't * explicitly state the maximum MTU. Various drivers and the I210 (which has a * rather similar MAC) do have similar values. Our assumption is that this * allows for us to still receive VLAN tagged packets and that we can set the * margin appropriately for mac. */ #define IGC_MAX_RX_RINGS_I225 4 #define IGC_MAX_TX_RINGS_I225 4 #define IGC_MAX_MTU_I225 9216 /* * These are the default auto-negotiation values the device supports which is * 10/100 Half and Full duplex and then 1000/2500 full duplex. */ #define IGC_DEFAULT_ADV IGC_ALL_SPEED_DUPLEX_2500 /* * This is the default PAUSE frame time that we use. This value comes from * igb/e1000g and is 858 usec. */ #define IGC_FC_PAUSE_TIME 0x0680 /* * Default values for ring sizes and related. We'll let an interrupt drain up to * half the ring by default. These are all things that could reasonably be made * into dladm private properties of the driver. We picked the 256 byte bind * threshold for rx mostly by surveying others. For tx, we picked 512 as that's * what igb, ixgbe, and e1000g use today, though i40e and qede use 256. These * numbers are pretty arbitrary. */ #define IGC_DEF_RX_RING_SIZE 512 #define IGC_DEF_TX_RING_SIZE 512 #define IGC_DEF_RX_RING_INTR_LIMIT 256 #define IGC_DEF_RX_BIND 256 #define IGC_DEF_TX_BIND 512 /* * These numbers deal with the tx ring, blocking, recycling, and notification * thresholds. The first thing we need to pick is how many descriptors we * require before we tell MAC that the ring is blocked. This number is picked * somewhat arbitrarily. Because we could always fall back to a copy, this * could be as small as 2 (context and related) descriptors; however, the driver * can chain a fair bit together so we basically chose 4, which is a bit less * than 1% of the default ring size. We picked a default recycle threshold * check during tx of 32, which is about 6.25% of the default ring size. * * We opt to keep a two descriptor gap as that's what igb has always done and * other drivers we've surveyed do the same. */ #define IGC_DEF_TX_NOTIFY_MIN 4 #define IGC_DEF_TX_RECYCLE_MIN 32 #define IGC_DEF_TX_GAP 2 /* * This is the maximum number of cookies that we'll use in a transmit. This * number has been used across the igb/e1000g drivers over the years and comes * from the idea of taking a maximum sized LSO packet (64 KiB) plus its header * data, and dividing that by a 4 KiB page size, plus an extra descriptor in * case things end up split across pages. */ #define IGC_MAX_TX_COOKIES 18 /* * Extra alignment that we use to offset RX buffers so that way IP's header is * 4-byte aligned. */ #define IGC_RX_BUF_IP_ALIGN 2 /* * The buffer sizes that hardware uses for rx and tx are required to be 1 KiB * aligned. */ #define IGC_BUF_ALIGN 0x400 /* * This value is used to indicate that we're grabbing the ring from the * interrupt and therefore should only take a single pass. */ #define IGC_RX_POLL_INTR -1 /* * This is a value in microseconds that hardware will guarantee as a gap between * interrupts. This value is just a borrowed default from other drivers. */ #define IGC_DEF_EITR 200 /* * Because we never use the offset and address for syncing, we want to cast the * DMA sync call to void, but lets be paranoid on debug. */ #ifdef DEBUG #define IGC_DMA_SYNC(buf, flag) ASSERT0(ddi_dma_sync((buf)->idb_hdl, \ 0, 0, flag)) #else #define IGC_DMA_SYNC(buf, flag) (void) ddi_dma_sync((buf)->idb_hdl, \ 0, 0, flag) #endif /* DEBUG */ typedef enum igc_attach { IGC_ATTACH_REGS = 1 << 0, IGC_ATTACH_INTR_ALLOC = 1 << 1, IGC_ATTACH_MUTEX = 1 << 2, IGC_ATTACH_INTR_HANDLER = 1 << 3, IGC_ATTACH_LED = 1 << 4, IGC_ATTACH_STATS = 1 << 5, IGC_ATTACH_MAC = 1 << 6, IGC_ATTACH_INTR_EN = 1 << 7, /* * The rest of these represent state that is allocated and transformed * after the device's mc_start(9E) entry point, igc_m_start(), is called * by MAC. */ IGC_ATTACH_MAC_START = 1 << 8, IGC_ATTACH_RX_DATA = 1 << 9, IGC_ATTACH_TX_DATA = 1 << 10 } igc_attach_t; /* * Hardware-specific limits. */ typedef struct igc_limits { uint32_t il_max_rx_rings; uint32_t il_max_tx_rings; uint32_t il_max_mtu; } igc_limits_t; typedef struct igc_dma_buffer { caddr_t idb_va; ddi_acc_handle_t idb_acc; ddi_dma_handle_t idb_hdl; size_t idb_size; size_t idb_alloc_len; } igc_dma_buffer_t; typedef struct igc_rx_buffer { struct igc_rx_ring *irb_ring; mblk_t *irb_mp; igc_dma_buffer_t irb_dma; frtn_t irb_free_rtn; bool irb_loaned; } igc_rx_buffer_t; typedef enum igc_rx_ring_flags { /* * Indicates we're currently polling and therefore shouldn't process an * interrupt in case we're racing. */ IGC_RXR_F_POLL = 1 << 0 } igc_rx_ring_flags_t; typedef struct igc_rx_stats { kstat_named_t irs_rbytes; kstat_named_t irs_ipackets; kstat_named_t irs_desc_error; kstat_named_t irs_copy_nomem; kstat_named_t irs_bind_nobuf; kstat_named_t irs_bind_nomp; kstat_named_t irs_nbind; kstat_named_t irs_ncopy; kstat_named_t irs_ixsm; kstat_named_t irs_l3cksum_err; kstat_named_t irs_l4cksum_err; kstat_named_t irs_hcksum_miss; kstat_named_t irs_hcksum_hit; } igc_rx_stats_t; typedef struct igc_rx_ring { struct igc *irr_igc; igc_rx_ring_flags_t irr_flags; /* * The ring's index on the device and the corresponding index that * should be used for manipulating it in the EIMS, which generally is * just which single MSI-X it has. */ uint32_t irr_idx; uint32_t irr_intr_idx; mac_ring_handle_t irr_rh; kmutex_t irr_lock; /* * Stats for the ring, along with the current mac generation, which is * needed for receiving data. */ uint64_t irr_gen; igc_rx_stats_t irr_stat; kstat_t *irr_kstat; /* * Data for the rx descriptor ring itself. */ igc_dma_buffer_t irr_desc_dma; union igc_adv_rx_desc *irr_ring; uint32_t irr_next; /* * RX descriptors and related. The arena contains every allocated rx * buffer. The rx buffers are split between the work list and the free * list. The work list is 1:1 mapped to the descriptor ring. The free * list contains extra buffers. The total number of buffers is static * and is set to igc_rx_nbuf. igc_rx_ndesc go into the work list and * then the remaining ones are in the free list. */ igc_rx_buffer_t *irr_arena; igc_rx_buffer_t **irr_work_list; igc_rx_buffer_t **irr_free_list; kmutex_t irr_free_lock; kcondvar_t irr_free_cv; uint32_t irr_nfree; } igc_rx_ring_t; typedef struct igc_tx_buffer { list_node_t itb_node; mblk_t *itb_mp; igc_dma_buffer_t itb_dma; ddi_dma_handle_t itb_bind_hdl; /* * This flag indicates that this is the first tx buffer for a packet and * therefore its last descriptor for the packet is valid. See 'TX Data * Path Design' in the theory statement for more information. */ bool itb_first; /* * When set to true this tx buffer is being used to represent DMA * binding. Othewrise, it's being used to represent copying. */ bool itb_bind; /* * This indicates the last descriptor used for an entire packet and * therefore what we will garbage collect. */ uint32_t itb_last_desc; /* * This tracks how much data is currently valid in the buffer. */ size_t itb_len; } igc_tx_buffer_t; /* * This represents data that we have saved and goes into the tx context * descriptor. If the information has changed, then we likely need to reset the * context descriptor. */ typedef struct igc_tx_context_data { uint8_t itc_l2hlen; uint8_t itc_l3hlen; uint8_t itc_l4hlen; uint8_t itc_l4proto; uint16_t itc_l3proto; uint32_t itc_mss; uint32_t itc_cksum; uint32_t itc_lso; } igc_tx_context_data_t; typedef struct igc_tx_stats { kstat_named_t its_obytes; kstat_named_t its_opackets; kstat_named_t its_bad_meo; kstat_named_t its_ring_full; kstat_named_t its_no_tx_bufs; kstat_named_t its_tx_copy; kstat_named_t its_tx_bind; kstat_named_t its_tx_bind_fail; } igc_tx_stats_t; typedef struct igc_tx_ring { struct igc *itr_igc; uint32_t itr_idx; uint32_t itr_intr_idx; mac_ring_handle_t itr_rh; kmutex_t itr_lock; /* * Stats for the ring. */ igc_tx_stats_t itr_stat; kstat_t *itr_kstat; /* * Data for the TX descriptors. */ igc_dma_buffer_t itr_desc_dma; union igc_adv_tx_desc *itr_ring; uint32_t itr_ring_head; uint32_t itr_ring_tail; uint32_t itr_ring_free; bool itr_mac_blocked; bool itr_recycle; igc_tx_context_data_t itr_tx_ctx; /* * Transmit Buffers */ igc_tx_buffer_t *itr_arena; igc_tx_buffer_t **itr_work_list; list_t itr_free_list; } igc_tx_ring_t; typedef struct igc_addr { uint8_t ia_mac[ETHERADDRL]; bool ia_valid; } igc_addr_t; /* * Running counters that are used for MAC. These are named after the * corresponding hardware registers. */ typedef struct igc_stats { kstat_named_t is_crcerrs; kstat_named_t is_algnerrc; kstat_named_t is_mpc; kstat_named_t is_scc; kstat_named_t is_ecol; kstat_named_t is_mcc; kstat_named_t is_latecol; kstat_named_t is_colc; kstat_named_t is_rerc; kstat_named_t is_dc; kstat_named_t is_tncrs; kstat_named_t is_htdpmc; kstat_named_t is_rlec; kstat_named_t is_xonrxc; kstat_named_t is_xontxc; kstat_named_t is_xoffrxc; kstat_named_t is_xofftxc; kstat_named_t is_fcruc; kstat_named_t is_prc64; kstat_named_t is_prc127; kstat_named_t is_prc255; kstat_named_t is_prc1023; kstat_named_t is_prc1522; kstat_named_t is_gprc; kstat_named_t is_bprc; kstat_named_t is_mprc; kstat_named_t is_gptc; kstat_named_t is_gorc; kstat_named_t is_gotc; kstat_named_t is_rnbc; kstat_named_t is_ruc; kstat_named_t is_rfc; kstat_named_t is_roc; kstat_named_t is_rjc; kstat_named_t is_mgtprc; kstat_named_t is_mgtpdc; kstat_named_t is_mgtptc; kstat_named_t is_tor; kstat_named_t is_tot; kstat_named_t is_tpr; kstat_named_t is_tpt; kstat_named_t is_ptc64; kstat_named_t is_ptc127; kstat_named_t is_ptc255; kstat_named_t is_ptc511; kstat_named_t is_ptc1023; kstat_named_t is_ptc1522; kstat_named_t is_mptc; kstat_named_t is_bptc; kstat_named_t is_tsctc; kstat_named_t is_iac; kstat_named_t is_rxdmtc; } igc_stats_t; typedef struct igc { dev_info_t *igc_dip; igc_attach_t igc_attach; /* * Register access settings. */ ddi_acc_handle_t igc_cfgspace; caddr_t igc_regs_base; off_t igc_regs_size; ddi_acc_handle_t igc_regs_hdl; /* * Interrupt Management */ uint_t igc_intr_pri; int igc_intr_cap; uint_t igc_intr_type; size_t igc_intr_size; int igc_nintrs; ddi_intr_handle_t *igc_intr_handles; uint32_t igc_eims; /* * Common code structures. */ struct igc_hw igc_hw; /* * Limits and device-specific data. All data in this section after the * igc_lock is protected by it. */ igc_limits_t igc_limits; uint32_t igc_nrx_rings; uint32_t igc_ntx_rings; uint32_t igc_rx_ndesc; uint32_t igc_tx_ndesc; uint32_t igc_rx_nbuf; uint32_t igc_tx_nbuf; uint32_t igc_rx_nfree; uint32_t igc_rx_intr_nframes; uint32_t igc_rx_bind_thresh; uint32_t igc_tx_bind_thresh; uint32_t igc_tx_notify_thresh; uint32_t igc_tx_recycle_thresh; uint32_t igc_tx_gap; uint32_t igc_eitr; kmutex_t igc_lock; uint32_t igc_mtu; uint32_t igc_max_frame; uint32_t igc_rx_buf_size; uint32_t igc_tx_buf_size; uint16_t igc_nucast; uint16_t igc_nmcast; igc_addr_t *igc_ucast; igc_addr_t *igc_mcast; ether_addr_t *igc_mcast_raw; link_state_t igc_link_state; link_duplex_t igc_link_duplex; uint16_t igc_link_speed; mac_led_mode_t igc_led_mode; bool igc_promisc; /* * Ring structures. */ igc_rx_ring_t *igc_rx_rings; igc_tx_ring_t *igc_tx_rings; /* * GLDv3 glue */ mac_handle_t igc_mac_hdl; mac_group_handle_t igc_rxg_hdl; /* * LED register values. */ uint32_t igc_ledctl; uint32_t igc_ledctl_on; uint32_t igc_ledctl_off; uint32_t igc_ledctl_blink; /* * Stats */ kstat_t *igc_ksp; igc_stats_t igc_stats; /* * PHY Information */ uint16_t igc_phy_ctrl; uint16_t igc_phy_status; uint16_t igc_phy_an_adv; uint16_t igc_phy_an_exp; uint16_t igc_phy_lp; uint16_t igc_phy_1000t_ctrl; uint16_t igc_phy_1000t_status; uint16_t igc_phy_ext_status; uint16_t igc_phy_mmd_ctrl; uint16_t igc_phy_mmd_sts; } igc_t; /* * Register read and write functions. */ extern uint32_t igc_read32(igc_t *igc, uint32_t); extern void igc_write32(igc_t *igc, uint32_t, uint32_t); /* * Misc. functions related to updating and initializing hardware state. */ extern void igc_hw_buf_update(igc_t *); extern bool igc_hw_common_init(igc_t *); extern void igc_multicast_sync(igc_t *); extern void igc_hw_intr_enable(igc_t *igc); extern void igc_hw_intr_disable(igc_t *igc); /* * Buffer, data allocation, and rings. */ extern bool igc_rx_data_alloc(igc_t *); extern void igc_rx_data_free(igc_t *); extern void igc_rx_hw_init(igc_t *); extern mblk_t *igc_ring_rx(igc_rx_ring_t *, int); extern void igc_rx_drain(igc_t *); extern mblk_t *igc_ring_tx(void *, mblk_t *); extern void igc_tx_recycle(igc_t *, igc_tx_ring_t *); extern bool igc_tx_data_alloc(igc_t *); extern void igc_tx_data_free(igc_t *); extern void igc_tx_hw_init(igc_t *); /* * Stats related functions. */ extern bool igc_stats_init(igc_t *); extern void igc_stats_fini(igc_t *); extern bool igc_rx_ring_stats_init(igc_t *, igc_rx_ring_t *); extern void igc_rx_ring_stats_fini(igc_rx_ring_t *); extern bool igc_tx_ring_stats_init(igc_t *, igc_tx_ring_t *); extern void igc_tx_ring_stats_fini(igc_tx_ring_t *); extern void igc_stats_update_u64(igc_t *, kstat_named_t *, uint32_t); /* * MAC registration related APIs. */ extern bool igc_mac_register(igc_t *); #ifdef __cplusplus } #endif #endif /* _IGC_H */