1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Comptuer Company 14 */ 15 16 #ifndef _IGC_H 17 #define _IGC_H 18 19 /* 20 * Primary illumos igc(4D) header file. 21 */ 22 23 #include <sys/types.h> 24 #include <sys/mac_provider.h> 25 #include <sys/mac_ether.h> 26 #include <sys/vlan.h> 27 #include <sys/dlpi.h> 28 #include <sys/pattr.h> 29 #include <sys/list.h> 30 31 #include <core/igc_hw.h> 32 #include <core/igc_api.h> 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 /* 39 * The name of our module for MAC, kstats, etc. 40 */ 41 #define IGC_MOD_NAME "igc" 42 43 /* 44 * The igc hardware appears to use BAR 0, which is regs[1]. 45 */ 46 #define IGC_PCI_BAR 1 47 48 /* 49 * Maximum number of RX and TX rings that it appears the hardware supports. The 50 * strict maximum segment size that the device can take is basically 9 KiB 51 * (9216). However, we limit this to 9k so we don't have to worry about the 52 * margin or related bits. The I225/6 datasheet that we have access to doesn't 53 * explicitly state the maximum MTU. Various drivers and the I210 (which has a 54 * rather similar MAC) do have similar values. Our assumption is that this 55 * allows for us to still receive VLAN tagged packets and that we can set the 56 * margin appropriately for mac. 57 */ 58 #define IGC_MAX_RX_RINGS_I225 4 59 #define IGC_MAX_TX_RINGS_I225 4 60 #define IGC_MAX_MTU_I225 9216 61 62 /* 63 * These are the default auto-negotiation values the device supports which is 64 * 10/100 Half and Full duplex and then 1000/2500 full duplex. 65 */ 66 #define IGC_DEFAULT_ADV IGC_ALL_SPEED_DUPLEX_2500 67 68 /* 69 * This is the default PAUSE frame time that we use. This value comes from 70 * igb/e1000g and is 858 usec. 71 */ 72 #define IGC_FC_PAUSE_TIME 0x0680 73 74 /* 75 * Default values for ring sizes and related. We'll let an interrupt drain up to 76 * half the ring by default. These are all things that could reasonably be made 77 * into dladm private properties of the driver. We picked the 256 byte bind 78 * threshold for rx mostly by surveying others. For tx, we picked 512 as that's 79 * what igb, ixgbe, and e1000g use today, though i40e and qede use 256. These 80 * numbers are pretty arbitrary. 81 */ 82 #define IGC_DEF_RX_RING_SIZE 512 83 #define IGC_DEF_TX_RING_SIZE 512 84 #define IGC_DEF_RX_RING_INTR_LIMIT 256 85 #define IGC_DEF_RX_BIND 256 86 #define IGC_DEF_TX_BIND 512 87 88 /* 89 * These numbers deal with the tx ring, blocking, recycling, and notification 90 * thresholds. The first thing we need to pick is how many descriptors we 91 * require before we tell MAC that the ring is blocked. This number is picked 92 * somewhat arbitrarily. Because we could always fall back to a copy, this 93 * could be as small as 2 (context and related) descriptors; however, the driver 94 * can chain a fair bit together so we basically chose 4, which is a bit less 95 * than 1% of the default ring size. We picked a default recycle threshold 96 * check during tx of 32, which is about 6.25% of the default ring size. 97 * 98 * We opt to keep a two descriptor gap as that's what igb has always done and 99 * other drivers we've surveyed do the same. 100 */ 101 #define IGC_DEF_TX_NOTIFY_MIN 4 102 #define IGC_DEF_TX_RECYCLE_MIN 32 103 #define IGC_DEF_TX_GAP 2 104 105 /* 106 * This is the maximum number of cookies that we'll use in a transmit. This 107 * number has been used across the igb/e1000g drivers over the years and comes 108 * from the idea of taking a maximum sized LSO packet (64 KiB) plus its header 109 * data, and dividing that by a 4 KiB page size, plus an extra descriptor in 110 * case things end up split across pages. 111 */ 112 #define IGC_MAX_TX_COOKIES 18 113 114 /* 115 * Extra alignment that we use to offset RX buffers so that way IP's header is 116 * 4-byte aligned. 117 */ 118 #define IGC_RX_BUF_IP_ALIGN 2 119 120 /* 121 * The buffer sizes that hardware uses for rx and tx are required to be 1 KiB 122 * aligned. 123 */ 124 #define IGC_BUF_ALIGN 0x400 125 126 /* 127 * This value is used to indicate that we're grabbing the ring from the 128 * interrupt and therefore should only take a single pass. 129 */ 130 #define IGC_RX_POLL_INTR -1 131 132 /* 133 * This is a value in microseconds that hardware will guarantee as a gap between 134 * interrupts. This value is just a borrowed default from other drivers. 135 */ 136 #define IGC_DEF_EITR 200 137 138 /* 139 * Because we never use the offset and address for syncing, we want to cast the 140 * DMA sync call to void, but lets be paranoid on debug. 141 */ 142 #ifdef DEBUG 143 #define IGC_DMA_SYNC(buf, flag) ASSERT0(ddi_dma_sync((buf)->idb_hdl, \ 144 0, 0, flag)) 145 #else 146 #define IGC_DMA_SYNC(buf, flag) (void) ddi_dma_sync((buf)->idb_hdl, \ 147 0, 0, flag) 148 #endif /* DEBUG */ 149 150 typedef enum igc_attach { 151 IGC_ATTACH_REGS = 1 << 0, 152 IGC_ATTACH_INTR_ALLOC = 1 << 1, 153 IGC_ATTACH_MUTEX = 1 << 2, 154 IGC_ATTACH_INTR_HANDLER = 1 << 3, 155 IGC_ATTACH_LED = 1 << 4, 156 IGC_ATTACH_STATS = 1 << 5, 157 IGC_ATTACH_MAC = 1 << 6, 158 IGC_ATTACH_INTR_EN = 1 << 7, 159 /* 160 * The rest of these represent state that is allocated and transformed 161 * after the device's mc_start(9E) entry point, igc_m_start(), is called 162 * by MAC. 163 */ 164 IGC_ATTACH_MAC_START = 1 << 8, 165 IGC_ATTACH_RX_DATA = 1 << 9, 166 IGC_ATTACH_TX_DATA = 1 << 10 167 } igc_attach_t; 168 169 /* 170 * Hardware-specific limits. 171 */ 172 typedef struct igc_limits { 173 uint32_t il_max_rx_rings; 174 uint32_t il_max_tx_rings; 175 uint32_t il_max_mtu; 176 } igc_limits_t; 177 178 typedef struct igc_dma_buffer { 179 caddr_t idb_va; 180 ddi_acc_handle_t idb_acc; 181 ddi_dma_handle_t idb_hdl; 182 size_t idb_size; 183 size_t idb_alloc_len; 184 } igc_dma_buffer_t; 185 186 typedef struct igc_rx_buffer { 187 struct igc_rx_ring *irb_ring; 188 mblk_t *irb_mp; 189 igc_dma_buffer_t irb_dma; 190 frtn_t irb_free_rtn; 191 bool irb_loaned; 192 } igc_rx_buffer_t; 193 194 typedef enum igc_rx_ring_flags { 195 /* 196 * Indicates we're currently polling and therefore shouldn't process an 197 * interrupt in case we're racing. 198 */ 199 IGC_RXR_F_POLL = 1 << 0 200 } igc_rx_ring_flags_t; 201 202 typedef struct igc_rx_stats { 203 kstat_named_t irs_rbytes; 204 kstat_named_t irs_ipackets; 205 kstat_named_t irs_desc_error; 206 kstat_named_t irs_copy_nomem; 207 kstat_named_t irs_bind_nobuf; 208 kstat_named_t irs_bind_nomp; 209 kstat_named_t irs_nbind; 210 kstat_named_t irs_ncopy; 211 kstat_named_t irs_ixsm; 212 kstat_named_t irs_l3cksum_err; 213 kstat_named_t irs_l4cksum_err; 214 kstat_named_t irs_hcksum_miss; 215 kstat_named_t irs_hcksum_hit; 216 } igc_rx_stats_t; 217 218 typedef struct igc_rx_ring { 219 struct igc *irr_igc; 220 igc_rx_ring_flags_t irr_flags; 221 /* 222 * The ring's index on the device and the corresponding index that 223 * should be used for manipulating it in the EIMS, which generally is 224 * just which single MSI-X it has. 225 */ 226 uint32_t irr_idx; 227 uint32_t irr_intr_idx; 228 mac_ring_handle_t irr_rh; 229 kmutex_t irr_lock; 230 231 /* 232 * Stats for the ring, along with the current mac generation, which is 233 * needed for receiving data. 234 */ 235 uint64_t irr_gen; 236 igc_rx_stats_t irr_stat; 237 kstat_t *irr_kstat; 238 239 /* 240 * Data for the rx descriptor ring itself. 241 */ 242 igc_dma_buffer_t irr_desc_dma; 243 union igc_adv_rx_desc *irr_ring; 244 uint32_t irr_next; 245 246 /* 247 * RX descriptors and related. The arena contains every allocated rx 248 * buffer. The rx buffers are split between the work list and the free 249 * list. The work list is 1:1 mapped to the descriptor ring. The free 250 * list contains extra buffers. The total number of buffers is static 251 * and is set to igc_rx_nbuf. igc_rx_ndesc go into the work list and 252 * then the remaining ones are in the free list. 253 */ 254 igc_rx_buffer_t *irr_arena; 255 igc_rx_buffer_t **irr_work_list; 256 igc_rx_buffer_t **irr_free_list; 257 kmutex_t irr_free_lock; 258 kcondvar_t irr_free_cv; 259 uint32_t irr_nfree; 260 } igc_rx_ring_t; 261 262 typedef struct igc_tx_buffer { 263 list_node_t itb_node; 264 mblk_t *itb_mp; 265 igc_dma_buffer_t itb_dma; 266 ddi_dma_handle_t itb_bind_hdl; 267 /* 268 * This flag indicates that this is the first tx buffer for a packet and 269 * therefore its last descriptor for the packet is valid. See 'TX Data 270 * Path Design' in the theory statement for more information. 271 */ 272 bool itb_first; 273 /* 274 * When set to true this tx buffer is being used to represent DMA 275 * binding. Othewrise, it's being used to represent copying. 276 */ 277 bool itb_bind; 278 /* 279 * This indicates the last descriptor used for an entire packet and 280 * therefore what we will garbage collect. 281 */ 282 uint32_t itb_last_desc; 283 /* 284 * This tracks how much data is currently valid in the buffer. 285 */ 286 size_t itb_len; 287 } igc_tx_buffer_t; 288 289 /* 290 * This represents data that we have saved and goes into the tx context 291 * descriptor. If the information has changed, then we likely need to reset the 292 * context descriptor. 293 */ 294 typedef struct igc_tx_context_data { 295 uint8_t itc_l2hlen; 296 uint8_t itc_l3hlen; 297 uint8_t itc_l4hlen; 298 uint8_t itc_l4proto; 299 uint16_t itc_l3proto; 300 uint32_t itc_mss; 301 uint32_t itc_cksum; 302 uint32_t itc_lso; 303 } igc_tx_context_data_t; 304 305 typedef struct igc_tx_stats { 306 kstat_named_t its_obytes; 307 kstat_named_t its_opackets; 308 kstat_named_t its_bad_meo; 309 kstat_named_t its_ring_full; 310 kstat_named_t its_no_tx_bufs; 311 kstat_named_t its_tx_copy; 312 kstat_named_t its_tx_bind; 313 kstat_named_t its_tx_bind_fail; 314 } igc_tx_stats_t; 315 316 typedef struct igc_tx_ring { 317 struct igc *itr_igc; 318 uint32_t itr_idx; 319 uint32_t itr_intr_idx; 320 mac_ring_handle_t itr_rh; 321 kmutex_t itr_lock; 322 323 /* 324 * Stats for the ring. 325 */ 326 igc_tx_stats_t itr_stat; 327 kstat_t *itr_kstat; 328 329 /* 330 * Data for the TX descriptors. 331 */ 332 igc_dma_buffer_t itr_desc_dma; 333 union igc_adv_tx_desc *itr_ring; 334 uint32_t itr_ring_head; 335 uint32_t itr_ring_tail; 336 uint32_t itr_ring_free; 337 bool itr_mac_blocked; 338 bool itr_recycle; 339 igc_tx_context_data_t itr_tx_ctx; 340 341 /* 342 * Transmit Buffers 343 */ 344 igc_tx_buffer_t *itr_arena; 345 igc_tx_buffer_t **itr_work_list; 346 list_t itr_free_list; 347 348 } igc_tx_ring_t; 349 350 typedef struct igc_addr { 351 uint8_t ia_mac[ETHERADDRL]; 352 bool ia_valid; 353 } igc_addr_t; 354 355 /* 356 * Running counters that are used for MAC. These are named after the 357 * corresponding hardware registers. 358 */ 359 typedef struct igc_stats { 360 kstat_named_t is_crcerrs; 361 kstat_named_t is_algnerrc; 362 kstat_named_t is_mpc; 363 kstat_named_t is_scc; 364 kstat_named_t is_ecol; 365 kstat_named_t is_mcc; 366 kstat_named_t is_latecol; 367 kstat_named_t is_colc; 368 kstat_named_t is_rerc; 369 kstat_named_t is_dc; 370 kstat_named_t is_tncrs; 371 kstat_named_t is_htdpmc; 372 kstat_named_t is_rlec; 373 kstat_named_t is_xonrxc; 374 kstat_named_t is_xontxc; 375 kstat_named_t is_xoffrxc; 376 kstat_named_t is_xofftxc; 377 kstat_named_t is_fcruc; 378 kstat_named_t is_prc64; 379 kstat_named_t is_prc127; 380 kstat_named_t is_prc255; 381 kstat_named_t is_prc1023; 382 kstat_named_t is_prc1522; 383 kstat_named_t is_gprc; 384 kstat_named_t is_bprc; 385 kstat_named_t is_mprc; 386 kstat_named_t is_gptc; 387 kstat_named_t is_gorc; 388 kstat_named_t is_gotc; 389 kstat_named_t is_rnbc; 390 kstat_named_t is_ruc; 391 kstat_named_t is_rfc; 392 kstat_named_t is_roc; 393 kstat_named_t is_rjc; 394 kstat_named_t is_mgtprc; 395 kstat_named_t is_mgtpdc; 396 kstat_named_t is_mgtptc; 397 kstat_named_t is_tor; 398 kstat_named_t is_tot; 399 kstat_named_t is_tpr; 400 kstat_named_t is_tpt; 401 kstat_named_t is_ptc64; 402 kstat_named_t is_ptc127; 403 kstat_named_t is_ptc255; 404 kstat_named_t is_ptc511; 405 kstat_named_t is_ptc1023; 406 kstat_named_t is_ptc1522; 407 kstat_named_t is_mptc; 408 kstat_named_t is_bptc; 409 kstat_named_t is_tsctc; 410 kstat_named_t is_iac; 411 kstat_named_t is_rxdmtc; 412 } igc_stats_t; 413 414 typedef struct igc { 415 dev_info_t *igc_dip; 416 igc_attach_t igc_attach; 417 /* 418 * Register access settings. 419 */ 420 ddi_acc_handle_t igc_cfgspace; 421 caddr_t igc_regs_base; 422 off_t igc_regs_size; 423 ddi_acc_handle_t igc_regs_hdl; 424 /* 425 * Interrupt Management 426 */ 427 uint_t igc_intr_pri; 428 int igc_intr_cap; 429 uint_t igc_intr_type; 430 size_t igc_intr_size; 431 int igc_nintrs; 432 ddi_intr_handle_t *igc_intr_handles; 433 uint32_t igc_eims; 434 /* 435 * Common code structures. 436 */ 437 struct igc_hw igc_hw; 438 /* 439 * Limits and device-specific data. All data in this section after the 440 * igc_lock is protected by it. 441 */ 442 igc_limits_t igc_limits; 443 uint32_t igc_nrx_rings; 444 uint32_t igc_ntx_rings; 445 uint32_t igc_rx_ndesc; 446 uint32_t igc_tx_ndesc; 447 uint32_t igc_rx_nbuf; 448 uint32_t igc_tx_nbuf; 449 uint32_t igc_rx_nfree; 450 uint32_t igc_rx_intr_nframes; 451 uint32_t igc_rx_bind_thresh; 452 uint32_t igc_tx_bind_thresh; 453 uint32_t igc_tx_notify_thresh; 454 uint32_t igc_tx_recycle_thresh; 455 uint32_t igc_tx_gap; 456 uint32_t igc_eitr; 457 458 kmutex_t igc_lock; 459 uint32_t igc_mtu; 460 uint32_t igc_max_frame; 461 uint32_t igc_rx_buf_size; 462 uint32_t igc_tx_buf_size; 463 uint16_t igc_nucast; 464 uint16_t igc_nmcast; 465 igc_addr_t *igc_ucast; 466 igc_addr_t *igc_mcast; 467 ether_addr_t *igc_mcast_raw; 468 link_state_t igc_link_state; 469 link_duplex_t igc_link_duplex; 470 uint16_t igc_link_speed; 471 mac_led_mode_t igc_led_mode; 472 bool igc_promisc; 473 474 /* 475 * Ring structures. 476 */ 477 igc_rx_ring_t *igc_rx_rings; 478 igc_tx_ring_t *igc_tx_rings; 479 480 /* 481 * GLDv3 glue 482 */ 483 mac_handle_t igc_mac_hdl; 484 mac_group_handle_t igc_rxg_hdl; 485 486 /* 487 * LED register values. 488 */ 489 uint32_t igc_ledctl; 490 uint32_t igc_ledctl_on; 491 uint32_t igc_ledctl_off; 492 uint32_t igc_ledctl_blink; 493 494 /* 495 * Stats 496 */ 497 kstat_t *igc_ksp; 498 igc_stats_t igc_stats; 499 500 /* 501 * PHY Information 502 */ 503 uint16_t igc_phy_ctrl; 504 uint16_t igc_phy_status; 505 uint16_t igc_phy_an_adv; 506 uint16_t igc_phy_an_exp; 507 uint16_t igc_phy_lp; 508 uint16_t igc_phy_1000t_ctrl; 509 uint16_t igc_phy_1000t_status; 510 uint16_t igc_phy_ext_status; 511 uint16_t igc_phy_mmd_ctrl; 512 uint16_t igc_phy_mmd_sts; 513 } igc_t; 514 515 /* 516 * Register read and write functions. 517 */ 518 extern uint32_t igc_read32(igc_t *igc, uint32_t); 519 extern void igc_write32(igc_t *igc, uint32_t, uint32_t); 520 521 /* 522 * Misc. functions related to updating and initializing hardware state. 523 */ 524 extern void igc_hw_buf_update(igc_t *); 525 extern bool igc_hw_common_init(igc_t *); 526 extern void igc_multicast_sync(igc_t *); 527 extern void igc_hw_intr_enable(igc_t *igc); 528 extern void igc_hw_intr_disable(igc_t *igc); 529 530 /* 531 * Buffer, data allocation, and rings. 532 */ 533 extern bool igc_rx_data_alloc(igc_t *); 534 extern void igc_rx_data_free(igc_t *); 535 extern void igc_rx_hw_init(igc_t *); 536 extern mblk_t *igc_ring_rx(igc_rx_ring_t *, int); 537 extern void igc_rx_drain(igc_t *); 538 extern mblk_t *igc_ring_tx(void *, mblk_t *); 539 extern void igc_tx_recycle(igc_t *, igc_tx_ring_t *); 540 541 extern bool igc_tx_data_alloc(igc_t *); 542 extern void igc_tx_data_free(igc_t *); 543 extern void igc_tx_hw_init(igc_t *); 544 545 /* 546 * Stats related functions. 547 */ 548 extern bool igc_stats_init(igc_t *); 549 extern void igc_stats_fini(igc_t *); 550 extern bool igc_rx_ring_stats_init(igc_t *, igc_rx_ring_t *); 551 extern void igc_rx_ring_stats_fini(igc_rx_ring_t *); 552 extern bool igc_tx_ring_stats_init(igc_t *, igc_tx_ring_t *); 553 extern void igc_tx_ring_stats_fini(igc_tx_ring_t *); 554 extern void igc_stats_update_u64(igc_t *, kstat_named_t *, uint32_t); 555 556 /* 557 * MAC registration related APIs. 558 */ 559 extern bool igc_mac_register(igc_t *); 560 561 #ifdef __cplusplus 562 } 563 #endif 564 565 #endif /* _IGC_H */ 566