1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #ifndef _ENA_H 17 #define _ENA_H 18 19 #include <sys/stdbool.h> 20 #include <sys/ddi.h> 21 #include <sys/sunddi.h> 22 #include <sys/types.h> 23 #include <sys/atomic.h> 24 #include <sys/list.h> 25 #include <sys/time.h> 26 #include <sys/modctl.h> 27 #include <sys/conf.h> 28 #include <sys/cpuvar.h> 29 #include <sys/pci.h> 30 #include <sys/sysmacros.h> 31 #include <sys/mac.h> 32 #include <sys/mac_ether.h> 33 #include <sys/mac_provider.h> 34 #include <sys/pattr.h> 35 #include <sys/strsun.h> 36 #include <sys/ethernet.h> 37 #include <sys/vlan.h> 38 #include <sys/utsname.h> 39 #include "ena_hw.h" 40 41 /* 42 * AWS ENA Ethernet Driver 43 */ 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 #define ENA_MODULE_NAME "ena" 50 51 /* 52 * The minimum supported ENA device controller version. 53 */ 54 #define ENA_CTRL_MAJOR_VSN_MIN 0 55 #define ENA_CTRL_MINOR_VSN_MIN 0 56 #define ENA_CTRL_SUBMINOR_VSN_MIN 1 57 58 #define ENA_MODULE_VER_MAJOR 1 59 #define ENA_MODULE_VER_MINOR 0 60 #define ENA_MODULE_VER_SUBMINOR 0 61 62 /* 63 * The Linux driver doesn't document what the specification version 64 * number controls or the contract around version changes. The best we 65 * can do is use the same version that they use and port version 66 * changes as they come (the last one was in 2018). 67 * 68 * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR} 69 */ 70 #define ENA_SPEC_VERSION_MAJOR 2 71 #define ENA_SPEC_VERSION_MINOR 0 72 73 74 /* This represents BAR 0. */ 75 #define ENA_REG_NUMBER 1 76 77 /* 78 * A sentinel value passed as argument to ena_ring_rx() to indicate 79 * the Rx ring is being read in interrupt mode, not polling mode. 80 */ 81 #define ENA_INTERRUPT_MODE -1 82 83 #define ENA_RX_BUF_IPHDR_ALIGNMENT 2 84 #define ENA_ADMINQ_DEPTH 32 85 #define ENA_AENQ_NUM_DESCS 32 86 87 /* Convert milliseconds to nanoseconds. */ 88 #define ENA_MS_TO_NS(ms) ((ms) * 1000000ul) 89 90 /* 91 * The default amount of time we will wait for an admin command to complete, 92 * specified in nanoseconds. This can be overridden by hints received from the 93 * device. We default to half a second. 94 */ 95 #define ENA_ADMIN_CMD_DEF_TIMEOUT_NS MSEC2NSEC(500) 96 97 /* 98 * The interval of the watchdog timer, in nanoseconds. 99 */ 100 #define ENA_WATCHDOG_INTERVAL_NS MSEC2NSEC(1000) 101 102 /* 103 * The device sends a keepalive message every second. If we don't see any for 104 * a while we will trigger a device reset. Other open source drivers use 105 * 6 seconds for this value, so do we. 106 */ 107 #define ENA_DEVICE_KEEPALIVE_TIMEOUT_NS MSEC2NSEC(6000) 108 109 /* 110 * The number of consecutive times a TX queue needs to be seen as blocked by 111 * the watchdog timer before a reset is invoked. Since the watchdog interval 112 * is one second, this is approximately in seconds. 113 */ 114 #define ENA_TX_STALL_TIMEOUT 8 115 116 /* 117 * In order to avoid rapidly sending basic stats requests to the controller, we 118 * impose a limit of one request every 10ms. 119 */ 120 #define ENA_BASIC_STATS_MINIMUM_INTERVAL_NS MSEC2NSEC(10); 121 122 /* 123 * Property macros. 124 */ 125 #define ENA_PROP_RXQ_NUM_DESCS "rx_queue_num_descs" 126 #define ENA_PROP_RXQ_NUM_DESCS_MIN 64 127 128 #define ENA_PROP_TXQ_NUM_DESCS "tx_queue_num_descs" 129 #define ENA_PROP_TXQ_NUM_DESCS_MIN 64 130 131 #define ENA_PROP_RXQ_INTR_LIMIT "rx_queue_intr_limit" 132 #define ENA_PROP_RXQ_INTR_LIMIT_MIN 16 133 #define ENA_PROP_RXQ_INTR_LIMIT_MAX 4096 134 #define ENA_PROP_RXQ_INTR_LIMIT_DEF 256 135 136 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL) 137 #define ENA_DMA_VERIFY_ADDR(ena, phys_addr) \ 138 VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \ 139 ==, (phys_addr)) 140 141 typedef struct ena_dma_conf { 142 size_t edc_size; 143 uint64_t edc_align; 144 int edc_sgl; 145 uchar_t edc_endian; 146 bool edc_stream; 147 } ena_dma_conf_t; 148 149 typedef struct ena_dma_buf { 150 caddr_t edb_va; 151 size_t edb_len; 152 /* 153 * The length given by DMA engine, kept around for debugging 154 * purposes. 155 */ 156 size_t edb_real_len; 157 size_t edb_used_len; 158 ddi_acc_handle_t edb_acc_hdl; 159 ddi_dma_handle_t edb_dma_hdl; 160 const ddi_dma_cookie_t *edb_cookie; 161 } ena_dma_buf_t; 162 163 /* 164 * We always sync the entire range, and therefore expect success. 165 */ 166 #ifdef DEBUG 167 #define ENA_DMA_SYNC(buf, flag) \ 168 ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag))) 169 #else /* DEBUG */ 170 #define ENA_DMA_SYNC(buf, flag) \ 171 ((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag))) 172 #endif 173 174 typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc); 175 176 typedef struct ena_aenq { 177 enahw_aenq_desc_t *eaenq_descs; 178 ena_dma_buf_t eaenq_dma; 179 ena_aenq_hdlr_t eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM]; 180 uint16_t eaenq_num_descs; 181 uint16_t eaenq_head; 182 uint8_t eaenq_phase; 183 } ena_aenq_t; 184 185 typedef struct ena_admin_sq { 186 enahw_cmd_desc_t *eas_entries; 187 ena_dma_buf_t eas_dma; 188 uint32_t *eas_dbaddr; 189 uint16_t eas_tail; 190 uint8_t eas_phase; 191 } ena_admin_sq_t; 192 193 typedef struct ena_admin_cq { 194 enahw_resp_desc_t *eac_entries; 195 ena_dma_buf_t eac_dma; 196 uint16_t eac_head; 197 uint8_t eac_phase; 198 } ena_admin_cq_t; 199 200 /* 201 * The command context is used to track outstanding requests and match 202 * them to device responses. 203 */ 204 typedef struct ena_cmd_ctx { 205 list_node_t ectx_node; 206 207 /* 208 * The index into ea_cmd_ctxs where this ctx lives. Used as 209 * the command ID value in the command descriptor. This allows 210 * us to match a response to its associated context. 211 */ 212 uint16_t ectx_id; 213 214 /* Is the command pending? */ 215 bool ectx_pending; 216 217 /* The type of command associated with this context. */ 218 enahw_cmd_opcode_t ectx_cmd_opcode; 219 220 /* 221 * The location to copy the full response to. This is 222 * specified by the caller of the command during 223 * submission. 224 */ 225 enahw_resp_desc_t *ectx_resp; 226 } ena_cmd_ctx_t; 227 228 /* 229 * The admin queue, the queue through which commands are sent to the 230 * device. 231 * 232 * WO: Write Once (at initialization) 233 * 234 * In general, only a single lock needs to be held in order to access 235 * the different parts of the admin queue: 236 * 237 * sq_lock: Any data dealing with submitting admin commands, which 238 * includes acquiring a command context. 239 * 240 * cq_lock: Any data dealing with reading command responses. 241 * 242 * stat_lock: For accessing statistics. 243 * 244 * In some cases, the ectx_lock/stat_lock may be held in tandem with 245 * either the SQ or CQ lock. In that case, the SQ/CQ lock is always 246 * entered first. 247 */ 248 typedef struct ena_adminq { 249 kmutex_t ea_sq_lock; /* WO */ 250 kmutex_t ea_cq_lock; /* WO */ 251 kmutex_t ea_stat_lock; /* WO */ 252 253 hrtime_t ea_cmd_timeout_ns; /* WO */ 254 255 uint16_t ea_qlen; /* WO */ 256 bool ea_poll_mode; /* WO */ 257 258 ena_cmd_ctx_t *ea_cmd_ctxs; /* WO */ 259 list_t ea_cmd_ctxs_free; /* ea_sq_lock */ 260 list_t ea_cmd_ctxs_used; /* ea_sq_lock */ 261 uint16_t ea_pending_cmds; /* ea_sq_lock */ 262 ena_admin_sq_t ea_sq; /* eq_sq_lock */ 263 ena_admin_cq_t ea_cq; /* eq_cq_lock */ 264 265 /* ea_stat_lock */ 266 struct ena_adminq_stats { 267 uint64_t cmds_fail; 268 uint64_t cmds_submitted; 269 uint64_t cmds_success; 270 uint64_t queue_full; 271 } ea_stats; 272 } ena_adminq_t; 273 274 /* 275 * Cache of the last set of value hints received from the device. See the 276 * definition of ehahw_device_hints_t in ena_hw.h for more detail on the 277 * purpose of each. 278 */ 279 typedef struct ena_hints { 280 uint16_t eh_mmio_read_timeout; 281 uint16_t eh_keep_alive_timeout; 282 uint16_t eh_tx_comp_timeout; 283 uint16_t eh_missed_tx_reset_threshold; 284 uint16_t eh_admin_comp_timeout; 285 uint16_t eh_max_tx_sgl; 286 uint16_t eh_max_rx_sgl; 287 } ena_hints_t; 288 289 typedef enum ena_attach_seq { 290 ENA_ATTACH_PCI = 1, /* PCI config space */ 291 ENA_ATTACH_REGS, /* BAR mapping */ 292 ENA_ATTACH_DEV_INIT, /* ENA device initialization */ 293 ENA_ATTACH_READ_CONF, /* Read driver conf file */ 294 ENA_ATTACH_DEV_CFG, /* Set any needed device config */ 295 ENA_ATTACH_INTR_ALLOC, /* interrupt handles allocated */ 296 ENA_ATTACH_INTR_HDLRS, /* intr handlers set */ 297 ENA_ATTACH_TXQS_ALLOC, /* Tx Queues allocated */ 298 ENA_ATTACH_RXQS_ALLOC, /* Tx Queues allocated */ 299 ENA_ATTACH_MAC_REGISTER, /* registered with mac */ 300 ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */ 301 ENA_ATTACH_END 302 } ena_attach_seq_t; 303 304 #define ENA_ATTACH_SEQ_FIRST (ENA_ATTACH_PCI) 305 #define ENA_ATTACH_NUM_ENTRIES (ENA_ATTACH_END - 1) 306 307 struct ena; 308 typedef bool (*ena_attach_fn_t)(struct ena *); 309 typedef void (*ena_cleanup_fn_t)(struct ena *, bool); 310 311 typedef struct ena_attach_desc { 312 ena_attach_seq_t ead_seq; 313 const char *ead_name; 314 ena_attach_fn_t ead_attach_fn; 315 bool ead_attach_hard_fail; 316 ena_cleanup_fn_t ead_cleanup_fn; 317 } ena_attach_desc_t; 318 319 typedef enum { 320 ENA_TCB_NONE, 321 ENA_TCB_COPY 322 } ena_tcb_type_t; 323 324 /* 325 * The TCB is used to track information relating to the Tx of a 326 * packet. At the moment we support copy only. 327 */ 328 typedef struct ena_tx_control_block { 329 /* 330 * The index into et_tcbs where this tcb lives. Used as the request ID 331 * value in the Tx descriptor. This allows us to match a response to 332 * its associated TCB. 333 */ 334 uint16_t etcb_id; 335 mblk_t *etcb_mp; 336 ena_tcb_type_t etcb_type; 337 ena_dma_buf_t etcb_dma; 338 } ena_tx_control_block_t; 339 340 typedef enum ena_txq_state { 341 ENA_TXQ_STATE_NONE = 0, 342 ENA_TXQ_STATE_HOST_ALLOC = 1 << 0, 343 ENA_TXQ_STATE_CQ_CREATED = 1 << 1, 344 ENA_TXQ_STATE_SQ_CREATED = 1 << 2, 345 ENA_TXQ_STATE_READY = 1 << 3, /* TxQ ready and waiting */ 346 ENA_TXQ_STATE_RUNNING = 1 << 4, /* intrs enabled */ 347 } ena_txq_state_t; 348 349 typedef struct ena_txq_stat { 350 /* Number of times mac_ether_offload_info() has failed. */ 351 kstat_named_t ets_hck_meoifail; 352 353 /* 354 * Total number of times the ring was blocked due to 355 * insufficient descriptors, or unblocked due to recycling 356 * descriptors. 357 */ 358 kstat_named_t ets_blocked; 359 kstat_named_t ets_unblocked; 360 361 /* The total number descriptors that have been recycled. */ 362 kstat_named_t ets_recycled; 363 364 /* 365 * Number of bytes and packets that have been _submitted_ to 366 * the device. 367 */ 368 kstat_named_t ets_bytes; 369 kstat_named_t ets_packets; 370 } ena_txq_stat_t; 371 372 /* 373 * A transmit queue, made up of a Submission Queue (SQ) and Completion 374 * Queue (CQ) to form a logical descriptor ring for sending packets. 375 * 376 * Write Once (WO) 377 * 378 * This value is written once, before the datapath is activated, in 379 * a function which is controlled by mac(9E). Some values may be 380 * written earlier, during ena attach, like et_ena and 381 * et_sq_num_descs. 382 * 383 * Tx Mutex (TM) -- et_lock 384 * 385 * This value is protected by the Tx queue's mutex. Some values may 386 * be initialized in a WO path, but also continually updated as part 387 * of normal datapath operation, such as et_sq_avail_descs. These 388 * values need mutex protection. 389 */ 390 typedef struct ena_txq { 391 kmutex_t et_lock; /* WO */ 392 393 struct ena *et_ena; /* WO */ 394 uint_t et_txqs_idx; /* WO */ 395 mac_ring_handle_t et_mrh; /* WO */ 396 uint64_t et_m_gen_num; /* TM */ 397 ena_txq_state_t et_state; /* WO */ 398 uint16_t et_intr_vector; /* WO */ 399 400 enahw_tx_desc_t *et_sq_descs; /* TM */ 401 ena_dma_buf_t et_sq_dma; /* WO */ 402 403 /* Is the Tx queue currently in a blocked state? */ 404 bool et_blocked; /* TM */ 405 406 /* 407 * The number of descriptors owned by this ring. This value 408 * never changes after initialization. 409 */ 410 uint16_t et_sq_num_descs; /* WO */ 411 412 /* 413 * The number of descriptors currently available for Tx 414 * submission. When this value reaches zero the ring must 415 * block until device notifies us of freed descriptors. 416 */ 417 uint16_t et_sq_avail_descs; /* TM */ 418 419 /* 420 * The current tail index of the queue (the first free 421 * descriptor for host Tx submission). After initialization, 422 * this value only increments, relying on unsigned wrap 423 * around. The ENA device seems to expect this behavior, 424 * performing its own modulo on the value for the purposes of 425 * indexing, much like the driver code needs to do in order to 426 * access the proper TCB entry. 427 */ 428 uint16_t et_sq_tail_idx; /* TM */ 429 430 /* 431 * The phase is used to know which CQ descriptors may be 432 * reclaimed. This is explained further in ena.c. 433 */ 434 uint16_t et_sq_phase; /* TM */ 435 uint16_t et_sq_hw_idx; /* WO */ 436 437 /* 438 * The "doorbell" address is how the host indicates to the 439 * device which descriptors are ready for Tx processing. 440 */ 441 uint32_t *et_sq_db_addr; /* WO */ 442 443 /* 444 * The TCBs track host Tx information, like a pointer to the 445 * mblk being submitted. The TCBs currently available for use are 446 * maintained in a free list. 447 */ 448 ena_tx_control_block_t *et_tcbs; /* TM */ 449 ena_tx_control_block_t **et_tcbs_freelist; /* TM */ 450 uint16_t et_tcbs_freelist_size; /* TM */ 451 452 enahw_tx_cdesc_t *et_cq_descs; /* TM */ 453 ena_dma_buf_t et_cq_dma; /* WO */ 454 uint16_t et_cq_num_descs; /* WO */ 455 uint16_t et_cq_head_idx; /* TM */ 456 uint16_t et_cq_phase; /* TM */ 457 uint16_t et_cq_hw_idx; /* WO */ 458 459 /* 460 * This address is used to control the CQ interrupts. 461 */ 462 uint32_t *et_cq_unmask_addr; /* WO */ 463 uint32_t *et_cq_numa_addr; /* WO (currently unused) */ 464 465 /* 466 * This is used to detect transmit stalls and invoke a reset. The 467 * watchdog increments this counter when it sees that the TX 468 * ring is still blocked, and if it exceeds the threshold then the 469 * device is assumed to have stalled and needs to be reset. 470 */ 471 uint32_t et_stall_watchdog; /* TM */ 472 473 /* 474 * This mutex protects the Tx queue stats. This mutex may be 475 * entered while et_lock is held, but et_lock is not required 476 * to access/modify the stats. However, if both locks are 477 * held, then et_lock must be entered first. 478 */ 479 kmutex_t et_stat_lock; 480 ena_txq_stat_t et_stat; 481 kstat_t *et_kstat; 482 } ena_txq_t; 483 484 typedef enum ena_rxq_state { 485 ENA_RXQ_STATE_NONE = 0, 486 ENA_RXQ_STATE_HOST_ALLOC = 1 << 0, 487 ENA_RXQ_STATE_CQ_CREATED = 1 << 1, 488 ENA_RXQ_STATE_SQ_CREATED = 1 << 2, 489 ENA_RXQ_STATE_SQ_FILLED = 1 << 3, 490 ENA_RXQ_STATE_READY = 1 << 4, /* RxQ ready and waiting */ 491 ENA_RXQ_STATE_RUNNING = 1 << 5, /* intrs enabled */ 492 } ena_rxq_state_t; 493 494 typedef struct ena_rx_ctrl_block { 495 ena_dma_buf_t ercb_dma; 496 uint8_t ercb_offset; 497 uint16_t ercb_length; 498 } ena_rx_ctrl_block_t; 499 500 typedef enum { 501 ENA_RXQ_MODE_POLLING = 1, 502 ENA_RXQ_MODE_INTR = 2, 503 } ena_rxq_mode_t; 504 505 typedef struct ena_rxq_stat_t { 506 /* The total number of packets/bytes received on this queue. */ 507 kstat_named_t ers_packets; 508 kstat_named_t ers_bytes; 509 510 /* 511 * At this time we expect all incoming frames to fit in a 512 * single buffer/descriptor. In some rare event that the 513 * device doesn't cooperate this stat is incremented. 514 */ 515 kstat_named_t ers_multi_desc; 516 517 /* 518 * The total number of times we failed to allocate a new mblk 519 * for an incoming frame. 520 */ 521 kstat_named_t ers_allocb_fail; 522 523 /* 524 * The total number of times the Rx interrupt handler reached 525 * its maximum limit for number of packets to process in a 526 * single interrupt. If you see this number increase 527 * continuously at a steady rate, then it may be an indication 528 * the driver is not entering polling mode. 529 */ 530 kstat_named_t ers_intr_limit; 531 532 /* 533 * The total number of times the device detected an incorrect 534 * IPv4 header checksum. 535 */ 536 kstat_named_t ers_hck_ipv4_err; 537 538 /* 539 * The total number of times the device detected an incorrect 540 * L4/ULP checksum. 541 */ 542 kstat_named_t ers_hck_l4_err; 543 } ena_rxq_stat_t; 544 545 /* 546 * A receive queue, made up of a Submission Queue (SQ) and Completion 547 * Queue (CQ) to form a logical descriptor ring for receiving packets. 548 * 549 * Write Once (WO) 550 * 551 * This value is written once, before the datapath is activated, in 552 * a function which is controlled by mac(9E). 553 * 554 * Rx Mutex (RM) -- er_lock 555 * 556 * This value is protected by the Rx queue's mutex. Some values may 557 * be initialized in a WO path, but also continually updated as part 558 * of normal datapath operation, such as er_sq_avail_descs. These 559 * values need mutex protection. 560 */ 561 typedef struct ena_rxq { 562 kmutex_t er_lock; 563 564 struct ena *er_ena; /* WO */ 565 uint_t er_rxqs_idx; /* WO */ 566 mac_ring_handle_t er_mrh; /* WO */ 567 uint64_t er_m_gen_num; /* WO */ 568 ena_rxq_state_t er_state; /* WO */ 569 uint16_t er_intr_vector; /* WO */ 570 ena_rxq_mode_t er_mode; /* RM */ 571 uint16_t er_intr_limit; /* RM */ 572 573 enahw_rx_desc_t *er_sq_descs; /* RM */ 574 ena_dma_buf_t er_sq_dma; /* WO */ 575 uint16_t er_sq_num_descs; /* WO */ 576 uint16_t er_sq_avail_descs; /* RM */ 577 uint16_t er_sq_tail_idx; /* RM */ 578 uint16_t er_sq_phase; /* RM */ 579 uint16_t er_sq_hw_idx; /* WO */ 580 uint32_t *er_sq_db_addr; /* WO */ 581 582 enahw_rx_cdesc_t *er_cq_descs; /* RM */ 583 ena_dma_buf_t er_cq_dma; /* WO */ 584 uint16_t er_cq_num_descs; /* WO */ 585 uint16_t er_cq_head_idx; /* RM */ 586 uint16_t er_cq_phase; /* RM */ 587 uint16_t er_cq_hw_idx; /* WO */ 588 uint32_t *er_cq_unmask_addr; /* WO */ 589 uint32_t *er_cq_numa_addr; /* WO (currently unused) */ 590 591 ena_rx_ctrl_block_t *er_rcbs; /* RM */ 592 593 kmutex_t er_stat_lock; 594 ena_rxq_stat_t er_stat; 595 kstat_t *er_kstat; 596 } ena_rxq_t; 597 598 typedef struct ena_device_stat { 599 kstat_named_t eds_reset_forced; 600 kstat_named_t eds_reset_error; 601 kstat_named_t eds_reset_fatal; 602 kstat_named_t eds_reset_keepalive; 603 kstat_named_t eds_reset_txstall; 604 } ena_device_stat_t; 605 606 /* 607 * These are stats based on enahw_resp_basic_stats_t and data that accompanies 608 * the asynchronous keepalive event. 609 */ 610 typedef struct ena_basic_stat { 611 kstat_named_t ebs_tx_bytes; 612 kstat_named_t ebs_tx_pkts; 613 kstat_named_t ebs_tx_drops; 614 615 kstat_named_t ebs_rx_bytes; 616 kstat_named_t ebs_rx_pkts; 617 kstat_named_t ebs_rx_drops; 618 kstat_named_t ebs_rx_overruns; 619 } ena_basic_stat_t; 620 621 /* These are stats based on enahw_resp_eni_stats_t. */ 622 typedef struct ena_extended_stat { 623 kstat_named_t ees_bw_in_exceeded; 624 kstat_named_t ees_bw_out_exceeded; 625 kstat_named_t ees_pps_exceeded; 626 kstat_named_t ees_conns_exceeded; 627 kstat_named_t ees_linklocal_exceeded; 628 } ena_extended_stat_t; 629 630 /* These stats monitor which AENQ handlers have been called. */ 631 typedef struct ena_aenq_stat { 632 kstat_named_t eaes_default; 633 kstat_named_t eaes_link_change; 634 kstat_named_t eaes_notification; 635 kstat_named_t eaes_keep_alive; 636 kstat_named_t eaes_request_reset; 637 kstat_named_t eaes_fatal_error; 638 kstat_named_t eaes_warning; 639 } ena_aenq_stat_t; 640 641 #ifdef DEBUG 642 typedef struct ena_reg { 643 const char *er_name; 644 const uint16_t er_offset; 645 uint32_t er_value; 646 } ena_reg_t; 647 #endif 648 649 #define ENA_STATE_UNKNOWN 0x00u 650 #define ENA_STATE_INITIALIZED 0x01u 651 #define ENA_STATE_STARTED 0x02u 652 #define ENA_STATE_ERROR 0x04u 653 #define ENA_STATE_RESETTING 0x08u 654 655 /* 656 * This structure contains the per-instance (PF of VF) state of the 657 * device. 658 */ 659 typedef struct ena { 660 dev_info_t *ena_dip; 661 int ena_instance; 662 663 #ifdef DEBUG 664 /* 665 * In debug kernels, the registers are cached here at various points 666 * for easy inspection via mdb(1). 667 */ 668 ena_reg_t ena_reg[ENAHW_NUM_REGS]; 669 #endif 670 671 /* 672 * Global lock, used to synchronize administration changes to 673 * the ena_t. This lock should not be held in the datapath. 674 */ 675 kmutex_t ena_lock; 676 ena_attach_seq_t ena_attach_seq; 677 678 /* 679 * We use atomic ops for ena_state so that datapath consumers 680 * do not need to enter ena_lock. 681 */ 682 uint32_t ena_state; 683 684 /* 685 * The reason for the last device reset. 686 */ 687 enahw_reset_reason_t ena_reset_reason; 688 689 /* 690 * Watchdog 691 */ 692 kmutex_t ena_watchdog_lock; 693 ddi_periodic_t ena_watchdog_periodic; 694 uint64_t ena_watchdog_last_keepalive; 695 696 /* 697 * PCI config space and BAR handle. 698 */ 699 ddi_acc_handle_t ena_pci_hdl; 700 off_t ena_reg_size; 701 caddr_t ena_reg_base; 702 ddi_device_acc_attr_t ena_reg_attr; 703 ddi_acc_handle_t ena_reg_hdl; 704 705 /* 706 * Vendor information. 707 */ 708 uint16_t ena_pci_vid; 709 uint16_t ena_pci_did; 710 uint8_t ena_pci_rev; 711 uint16_t ena_pci_svid; 712 uint16_t ena_pci_sdid; 713 714 /* 715 * Device and controller versions. 716 */ 717 uint32_t ena_dev_major_vsn; 718 uint32_t ena_dev_minor_vsn; 719 uint32_t ena_ctrl_major_vsn; 720 uint32_t ena_ctrl_minor_vsn; 721 uint32_t ena_ctrl_subminor_vsn; 722 uint32_t ena_ctrl_impl_id; 723 724 /* 725 * Interrupts 726 */ 727 int ena_num_intrs; 728 ddi_intr_handle_t *ena_intr_handles; 729 size_t ena_intr_handles_sz; 730 int ena_intr_caps; 731 uint_t ena_intr_pri; 732 733 mac_handle_t ena_mh; 734 735 size_t ena_page_sz; 736 737 /* 738 * The MTU and data layer frame sizes. 739 */ 740 uint32_t ena_mtu; 741 uint32_t ena_max_frame_hdr; 742 uint32_t ena_max_frame_total; 743 744 /* The size (in bytes) of the Rx/Tx data buffers. */ 745 uint32_t ena_tx_buf_sz; 746 uint32_t ena_rx_buf_sz; 747 748 /* 749 * The maximum number of Scatter Gather List segments the 750 * device can address. 751 */ 752 uint8_t ena_tx_sgl_max_sz; 753 uint8_t ena_rx_sgl_max_sz; 754 755 /* The number of descriptors per Rx/Tx queue. */ 756 uint16_t ena_rxq_num_descs; 757 uint16_t ena_txq_num_descs; 758 759 /* 760 * The maximum number of frames which may be read per Rx 761 * interrupt. 762 */ 763 uint16_t ena_rxq_intr_limit; 764 765 /* The Rx/Tx data queues (rings). */ 766 ena_rxq_t *ena_rxqs; 767 uint16_t ena_num_rxqs; 768 ena_txq_t *ena_txqs; 769 uint16_t ena_num_txqs; 770 771 /* These statistics are device-wide. */ 772 kstat_t *ena_device_kstat; 773 ena_device_stat_t ena_device_stat; 774 hrtime_t ena_device_basic_stat_last_update; 775 kmutex_t ena_device_basic_stat_lock; 776 kstat_t *ena_device_basic_kstat; 777 kstat_t *ena_device_extended_kstat; 778 779 /* 780 * This tracks AENQ-related stats, it is implicitly 781 * device-wide. 782 */ 783 ena_aenq_stat_t ena_aenq_stat; 784 kstat_t *ena_aenq_kstat; 785 786 /* 787 * The Admin Queue, through which call device commands are 788 * sent. 789 */ 790 ena_adminq_t ena_aq; 791 792 ena_aenq_t ena_aenq; 793 ena_dma_buf_t ena_host_info; 794 795 /* 796 * Hardware info 797 */ 798 ena_hints_t ena_device_hints; 799 uint32_t ena_supported_features; 800 uint32_t ena_capabilities; 801 uint8_t ena_dma_width; 802 bool ena_link_autoneg; 803 link_duplex_t ena_link_duplex; 804 uint64_t ena_link_speed_mbits; 805 enahw_link_speeds_t ena_link_speeds; 806 link_state_t ena_link_state; 807 uint32_t ena_aenq_supported_groups; 808 uint32_t ena_aenq_enabled_groups; 809 810 uint32_t ena_tx_max_sq_num; 811 uint32_t ena_tx_max_sq_num_descs; 812 uint32_t ena_tx_max_cq_num; 813 uint32_t ena_tx_max_cq_num_descs; 814 uint16_t ena_tx_max_desc_per_pkt; 815 uint32_t ena_tx_max_hdr_len; 816 817 uint32_t ena_rx_max_sq_num; 818 uint32_t ena_rx_max_sq_num_descs; 819 uint32_t ena_rx_max_cq_num; 820 uint32_t ena_rx_max_cq_num_descs; 821 uint16_t ena_rx_max_desc_per_pkt; 822 823 /* This is calculated from the Rx/Tx queue nums. */ 824 uint16_t ena_max_io_queues; 825 826 /* Hardware Offloads */ 827 bool ena_tx_l3_ipv4_csum; 828 829 bool ena_tx_l4_ipv4_part_csum; 830 bool ena_tx_l4_ipv4_full_csum; 831 bool ena_tx_l4_ipv4_lso; 832 833 bool ena_tx_l4_ipv6_part_csum; 834 bool ena_tx_l4_ipv6_full_csum; 835 bool ena_tx_l4_ipv6_lso; 836 837 bool ena_rx_l3_ipv4_csum; 838 bool ena_rx_l4_ipv4_csum; 839 bool ena_rx_l4_ipv6_csum; 840 bool ena_rx_hash; 841 842 uint32_t ena_max_mtu; 843 uint8_t ena_mac_addr[ETHERADDRL]; 844 } ena_t; 845 846 /* 847 * Misc 848 */ 849 extern bool ena_reset(ena_t *, const enahw_reset_reason_t); 850 extern bool ena_is_feat_avail(ena_t *, const enahw_feature_id_t); 851 extern bool ena_is_cap_avail(ena_t *, const enahw_capability_id_t); 852 extern void ena_update_hints(ena_t *, enahw_device_hints_t *); 853 854 /* 855 * Logging functions. 856 */ 857 extern bool ena_debug; 858 extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 859 extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 860 extern void ena_panic(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 861 extern void ena_trigger_reset(ena_t *, enahw_reset_reason_t); 862 863 /* 864 * Hardware access. 865 */ 866 extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t); 867 extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *); 868 extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t); 869 extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t); 870 extern const char *enahw_reset_reason(enahw_reset_reason_t); 871 #ifdef DEBUG 872 extern void ena_init_regcache(ena_t *); 873 extern void ena_update_regcache(ena_t *); 874 #else 875 #define ena_init_regcache(x) 876 #define ena_update_regcache(x) 877 #endif 878 879 /* 880 * Watchdog 881 */ 882 extern void ena_enable_watchdog(ena_t *); 883 extern void ena_disable_watchdog(ena_t *); 884 885 /* 886 * Stats 887 */ 888 extern void ena_stat_device_cleanup(ena_t *); 889 extern bool ena_stat_device_init(ena_t *); 890 891 extern void ena_stat_device_basic_cleanup(ena_t *); 892 extern bool ena_stat_device_basic_init(ena_t *); 893 894 extern void ena_stat_device_extended_cleanup(ena_t *); 895 extern bool ena_stat_device_extended_init(ena_t *); 896 897 extern void ena_stat_aenq_cleanup(ena_t *); 898 extern bool ena_stat_aenq_init(ena_t *); 899 900 extern void ena_stat_rxq_cleanup(ena_rxq_t *); 901 extern bool ena_stat_rxq_init(ena_rxq_t *); 902 extern void ena_stat_txq_cleanup(ena_txq_t *); 903 extern bool ena_stat_txq_init(ena_txq_t *); 904 905 /* 906 * DMA 907 */ 908 extern bool ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *, 909 size_t); 910 extern void ena_dma_free(ena_dma_buf_t *); 911 extern void ena_dma_bzero(ena_dma_buf_t *); 912 extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *); 913 extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *, 914 uint16_t *); 915 916 /* 917 * Interrupts 918 */ 919 extern bool ena_intr_add_handlers(ena_t *); 920 extern void ena_intr_remove_handlers(ena_t *, bool); 921 extern void ena_tx_intr_work(ena_txq_t *); 922 extern void ena_rx_intr_work(ena_rxq_t *); 923 extern bool ena_intrs_disable(ena_t *); 924 extern bool ena_intrs_enable(ena_t *); 925 926 /* 927 * MAC 928 */ 929 extern bool ena_mac_register(ena_t *); 930 extern int ena_mac_unregister(ena_t *); 931 extern void ena_ring_tx_stop(mac_ring_driver_t); 932 extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t); 933 extern mblk_t *ena_ring_tx(void *, mblk_t *); 934 extern void ena_ring_rx_stop(mac_ring_driver_t); 935 extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num); 936 extern int ena_m_stat(void *, uint_t, uint64_t *); 937 extern mblk_t *ena_ring_rx_poll(void *, int); 938 extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *); 939 extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *); 940 941 /* 942 * Admin API 943 */ 944 extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *, 945 enahw_resp_desc_t *, ena_cmd_ctx_t **); 946 extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *); 947 extern void ena_free_host_info(ena_t *); 948 extern bool ena_init_host_info(ena_t *); 949 extern void ena_create_cmd_ctx(ena_t *); 950 extern void ena_release_all_cmd_ctx(ena_t *); 951 extern int ena_create_cq(ena_t *, uint16_t, uint64_t, bool, uint32_t, 952 uint16_t *, uint32_t **, uint32_t **); 953 extern int ena_destroy_cq(ena_t *, uint16_t); 954 extern int ena_create_sq(ena_t *, uint16_t, uint64_t, bool, uint16_t, 955 uint16_t *, uint32_t **); 956 extern int ena_destroy_sq(ena_t *, uint16_t, bool); 957 extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *, 958 enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t); 959 extern int ena_get_feature(ena_t *, enahw_resp_desc_t *, 960 const enahw_feature_id_t, const uint8_t); 961 extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *); 962 extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *); 963 extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t); 964 965 /* 966 * Async event queue 967 */ 968 extern bool ena_aenq_init(ena_t *); 969 extern bool ena_aenq_configure(ena_t *); 970 extern void ena_aenq_enable(ena_t *); 971 extern void ena_aenq_work(ena_t *); 972 extern void ena_aenq_free(ena_t *); 973 974 /* 975 * Rx/Tx allocations 976 */ 977 extern bool ena_alloc_rxq(ena_rxq_t *); 978 extern void ena_cleanup_rxq(ena_rxq_t *, bool); 979 extern bool ena_alloc_txq(ena_txq_t *); 980 extern void ena_cleanup_txq(ena_txq_t *, bool); 981 982 #ifdef __cplusplus 983 } 984 #endif 985 986 #endif /* _ENA_H */ 987