1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #ifndef _ENA_H 17 #define _ENA_H 18 19 #include <sys/stdbool.h> 20 #include <sys/ddi.h> 21 #include <sys/sunddi.h> 22 #include <sys/types.h> 23 #include <sys/atomic.h> 24 #include <sys/list.h> 25 #include <sys/time.h> 26 #include <sys/modctl.h> 27 #include <sys/conf.h> 28 #include <sys/cpuvar.h> 29 #include <sys/pci.h> 30 #include <sys/sysmacros.h> 31 #include <sys/mac.h> 32 #include <sys/mac_ether.h> 33 #include <sys/mac_provider.h> 34 #include <sys/pattr.h> 35 #include <sys/strsun.h> 36 #include <sys/ethernet.h> 37 #include <sys/vlan.h> 38 #include <sys/utsname.h> 39 #include "ena_hw.h" 40 41 /* 42 * AWS ENA Ethernet Driver 43 */ 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 #define ENA_MODULE_NAME "ena" 50 51 /* 52 * The minimum supported ENA device controller version. 53 */ 54 #define ENA_CTRL_MAJOR_VSN_MIN 0 55 #define ENA_CTRL_MINOR_VSN_MIN 0 56 #define ENA_CTRL_SUBMINOR_VSN_MIN 1 57 58 #define ENA_MODULE_VER_MAJOR 1 59 #define ENA_MODULE_VER_MINOR 0 60 #define ENA_MODULE_VER_SUBMINOR 0 61 62 /* 63 * The Linux driver doesn't document what the specification version 64 * number controls or the contract around version changes. The best we 65 * can do is use the same version that they use and port version 66 * changes as they come (the last one was in 2018). 67 * 68 * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR} 69 */ 70 #define ENA_SPEC_VERSION_MAJOR 2 71 #define ENA_SPEC_VERSION_MINOR 0 72 73 74 /* This represents BAR 0. */ 75 #define ENA_REG_NUMBER 1 76 77 /* 78 * A sentinel value passed as argument to ena_ring_rx() to indicate 79 * the Rx ring is being read in interrupt mode, not polling mode. 80 */ 81 #define ENA_INTERRUPT_MODE -1 82 83 #define ENA_RX_BUF_IPHDR_ALIGNMENT 2 84 #define ENA_ADMINQ_DEPTH 32 85 #define ENA_AENQ_NUM_DESCS 32 86 87 /* Convert milliseconds to nanoseconds. */ 88 #define ENA_MS_TO_NS(ms) ((ms) * 1000000ul) 89 90 /* 91 * The default amount of time we will wait for an admin command to complete, 92 * specified in nanoseconds. This can be overridden by hints received from the 93 * device. We default to half a second. 94 */ 95 #define ENA_ADMIN_CMD_DEF_TIMEOUT_NS MSEC2NSEC(500) 96 97 /* 98 * The interval of the watchdog timer, in nanoseconds. 99 */ 100 #define ENA_WATCHDOG_INTERVAL_NS MSEC2NSEC(1000) 101 102 /* 103 * The device sends a keepalive message every second. If we don't see any for 104 * a while we will trigger a device reset. Other open source drivers use 105 * 6 seconds for this value, so do we. 106 */ 107 #define ENA_DEVICE_KEEPALIVE_TIMEOUT_NS MSEC2NSEC(6000) 108 109 /* 110 * The number of consecutive times a TX queue needs to be seen as blocked by 111 * the watchdog timer before a reset is invoked. Since the watchdog interval 112 * is one second, this is approximately in seconds. 113 */ 114 #define ENA_TX_STALL_TIMEOUT 8 115 116 /* 117 * In order to avoid rapidly sending basic stats requests to the controller, we 118 * impose a limit of one request every 10ms. 119 */ 120 #define ENA_BASIC_STATS_MINIMUM_INTERVAL_NS MSEC2NSEC(10); 121 122 /* 123 * Property macros. 124 */ 125 #define ENA_PROP_RXQ_NUM_DESCS "rx_queue_num_descs" 126 #define ENA_PROP_RXQ_NUM_DESCS_MIN 64 127 128 #define ENA_PROP_TXQ_NUM_DESCS "tx_queue_num_descs" 129 #define ENA_PROP_TXQ_NUM_DESCS_MIN 64 130 131 #define ENA_PROP_RXQ_INTR_LIMIT "rx_queue_intr_limit" 132 #define ENA_PROP_RXQ_INTR_LIMIT_MIN 16 133 #define ENA_PROP_RXQ_INTR_LIMIT_MAX 4096 134 #define ENA_PROP_RXQ_INTR_LIMIT_DEF 256 135 136 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL) 137 #define ENA_DMA_VERIFY_ADDR(ena, phys_addr) \ 138 VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \ 139 ==, (phys_addr)) 140 141 typedef struct ena_dma_conf { 142 size_t edc_size; 143 uint64_t edc_align; 144 int edc_sgl; 145 uchar_t edc_endian; 146 bool edc_stream; 147 } ena_dma_conf_t; 148 149 typedef struct ena_dma_buf { 150 caddr_t edb_va; 151 size_t edb_len; 152 /* 153 * The length given by DMA engine, kept around for debugging 154 * purposes. 155 */ 156 size_t edb_real_len; 157 size_t edb_used_len; 158 ddi_acc_handle_t edb_acc_hdl; 159 ddi_dma_handle_t edb_dma_hdl; 160 const ddi_dma_cookie_t *edb_cookie; 161 } ena_dma_buf_t; 162 163 /* 164 * We always sync the entire range, and therefore expect success. 165 */ 166 #ifdef DEBUG 167 #define ENA_DMA_SYNC(buf, flag) \ 168 ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag))) 169 #else /* DEBUG */ 170 #define ENA_DMA_SYNC(buf, flag) \ 171 ((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag))) 172 #endif 173 174 typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc); 175 176 typedef struct ena_aenq { 177 enahw_aenq_desc_t *eaenq_descs; 178 ena_dma_buf_t eaenq_dma; 179 ena_aenq_hdlr_t eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM]; 180 uint16_t eaenq_num_descs; 181 uint16_t eaenq_head; 182 uint8_t eaenq_phase; 183 } ena_aenq_t; 184 185 typedef struct ena_admin_sq { 186 enahw_cmd_desc_t *eas_entries; 187 ena_dma_buf_t eas_dma; 188 uint32_t *eas_dbaddr; 189 uint16_t eas_tail; 190 uint8_t eas_phase; 191 } ena_admin_sq_t; 192 193 typedef struct ena_admin_cq { 194 enahw_resp_desc_t *eac_entries; 195 ena_dma_buf_t eac_dma; 196 uint16_t eac_head; 197 uint8_t eac_phase; 198 } ena_admin_cq_t; 199 200 /* 201 * The command context is used to track outstanding requests and match 202 * them to device responses. 203 */ 204 typedef struct ena_cmd_ctx { 205 list_node_t ectx_node; 206 207 /* 208 * The index into ea_cmd_ctxs where this ctx lives. Used as 209 * the command ID value in the command descriptor. This allows 210 * us to match a response to its associated context. 211 */ 212 uint16_t ectx_id; 213 214 /* Is the command pending? */ 215 bool ectx_pending; 216 217 /* The type of command associated with this context. */ 218 enahw_cmd_opcode_t ectx_cmd_opcode; 219 220 /* 221 * The location to copy the full response to. This is 222 * specified by the caller of the command during 223 * submission. 224 */ 225 enahw_resp_desc_t *ectx_resp; 226 } ena_cmd_ctx_t; 227 228 /* 229 * The admin queue, the queue through which commands are sent to the 230 * device. 231 * 232 * WO: Write Once (at initialization) 233 * 234 * In general, only a single lock needs to be held in order to access 235 * the different parts of the admin queue: 236 * 237 * sq_lock: Any data dealing with submitting admin commands, which 238 * includes acquiring a command context. 239 * 240 * cq_lock: Any data dealing with reading command responses. 241 * 242 * stat_lock: For accessing statistics. 243 * 244 * In some cases, the ectx_lock/stat_lock may be held in tandem with 245 * either the SQ or CQ lock. In that case, the SQ/CQ lock is always 246 * entered first. 247 */ 248 typedef struct ena_adminq { 249 kmutex_t ea_sq_lock; /* WO */ 250 kmutex_t ea_cq_lock; /* WO */ 251 kmutex_t ea_stat_lock; /* WO */ 252 253 hrtime_t ea_cmd_timeout_ns; /* WO */ 254 255 uint16_t ea_qlen; /* WO */ 256 bool ea_poll_mode; /* WO */ 257 258 ena_cmd_ctx_t *ea_cmd_ctxs; /* WO */ 259 list_t ea_cmd_ctxs_free; /* ea_sq_lock */ 260 list_t ea_cmd_ctxs_used; /* ea_sq_lock */ 261 uint16_t ea_pending_cmds; /* ea_sq_lock */ 262 ena_admin_sq_t ea_sq; /* eq_sq_lock */ 263 ena_admin_cq_t ea_cq; /* eq_cq_lock */ 264 265 /* ea_stat_lock */ 266 struct ena_adminq_stats { 267 uint64_t cmds_fail; 268 uint64_t cmds_submitted; 269 uint64_t cmds_success; 270 uint64_t queue_full; 271 } ea_stats; 272 } ena_adminq_t; 273 274 /* 275 * Cache of the last set of value hints received from the device. See the 276 * definition of ehahw_device_hints_t in ena_hw.h for more detail on the 277 * purpose of each. 278 */ 279 typedef struct ena_hints { 280 uint16_t eh_mmio_read_timeout; 281 uint16_t eh_keep_alive_timeout; 282 uint16_t eh_tx_comp_timeout; 283 uint16_t eh_missed_tx_reset_threshold; 284 uint16_t eh_admin_comp_timeout; 285 uint16_t eh_max_tx_sgl; 286 uint16_t eh_max_rx_sgl; 287 } ena_hints_t; 288 289 typedef enum ena_attach_seq { 290 ENA_ATTACH_PCI = 1, /* PCI config space */ 291 ENA_ATTACH_REGS, /* BAR mapping */ 292 ENA_ATTACH_DEV_INIT, /* ENA device initialization */ 293 ENA_ATTACH_READ_CONF, /* Read driver conf file */ 294 ENA_ATTACH_DEV_CFG, /* Set any needed device config */ 295 ENA_ATTACH_INTR_ALLOC, /* interrupt handles allocated */ 296 ENA_ATTACH_INTR_HDLRS, /* intr handlers set */ 297 ENA_ATTACH_TXQS_ALLOC, /* Tx Queues allocated */ 298 ENA_ATTACH_RXQS_ALLOC, /* Tx Queues allocated */ 299 ENA_ATTACH_MAC_REGISTER, /* registered with mac */ 300 ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */ 301 ENA_ATTACH_END 302 } ena_attach_seq_t; 303 304 #define ENA_ATTACH_SEQ_FIRST (ENA_ATTACH_PCI) 305 #define ENA_ATTACH_NUM_ENTRIES (ENA_ATTACH_END - 1) 306 307 struct ena; 308 typedef bool (*ena_attach_fn_t)(struct ena *); 309 typedef void (*ena_cleanup_fn_t)(struct ena *, bool); 310 311 typedef struct ena_attach_desc { 312 ena_attach_seq_t ead_seq; 313 const char *ead_name; 314 ena_attach_fn_t ead_attach_fn; 315 bool ead_attach_hard_fail; 316 ena_cleanup_fn_t ead_cleanup_fn; 317 } ena_attach_desc_t; 318 319 typedef enum { 320 ENA_TCB_NONE, 321 ENA_TCB_COPY 322 } ena_tcb_type_t; 323 324 /* 325 * The TCB is used to track information relating to the Tx of a 326 * packet. At the moment we support copy only. 327 */ 328 typedef struct ena_tx_control_block { 329 mblk_t *etcb_mp; 330 ena_tcb_type_t etcb_type; 331 ena_dma_buf_t etcb_dma; 332 } ena_tx_control_block_t; 333 334 typedef enum ena_txq_state { 335 ENA_TXQ_STATE_NONE = 0, 336 ENA_TXQ_STATE_HOST_ALLOC = 1 << 0, 337 ENA_TXQ_STATE_CQ_CREATED = 1 << 1, 338 ENA_TXQ_STATE_SQ_CREATED = 1 << 2, 339 ENA_TXQ_STATE_READY = 1 << 3, /* TxQ ready and waiting */ 340 ENA_TXQ_STATE_RUNNING = 1 << 4, /* intrs enabled */ 341 } ena_txq_state_t; 342 343 typedef struct ena_txq_stat { 344 /* Number of times mac_ether_offload_info() has failed. */ 345 kstat_named_t ets_hck_meoifail; 346 347 /* 348 * Total number of times the ring was blocked due to 349 * insufficient descriptors, or unblocked due to recycling 350 * descriptors. 351 */ 352 kstat_named_t ets_blocked; 353 kstat_named_t ets_unblocked; 354 355 /* The total number descriptors that have been recycled. */ 356 kstat_named_t ets_recycled; 357 358 /* 359 * Number of bytes and packets that have been _submitted_ to 360 * the device. 361 */ 362 kstat_named_t ets_bytes; 363 kstat_named_t ets_packets; 364 } ena_txq_stat_t; 365 366 /* 367 * A transmit queue, made up of a Submission Queue (SQ) and Completion 368 * Queue (CQ) to form a logical descriptor ring for sending packets. 369 * 370 * Write Once (WO) 371 * 372 * This value is written once, before the datapath is activated, in 373 * a function which is controlled by mac(9E). Some values may be 374 * written earlier, during ena attach, like et_ena and 375 * et_sq_num_descs. 376 * 377 * Tx Mutex (TM) -- et_lock 378 * 379 * This value is protected by the Tx queue's mutex. Some values may 380 * be initialized in a WO path, but also continually updated as part 381 * of normal datapath operation, such as et_sq_avail_descs. These 382 * values need mutex protection. 383 */ 384 typedef struct ena_txq { 385 kmutex_t et_lock; /* WO */ 386 387 struct ena *et_ena; /* WO */ 388 uint_t et_txqs_idx; /* WO */ 389 mac_ring_handle_t et_mrh; /* WO */ 390 uint64_t et_m_gen_num; /* TM */ 391 ena_txq_state_t et_state; /* WO */ 392 uint16_t et_intr_vector; /* WO */ 393 394 enahw_tx_desc_t *et_sq_descs; /* TM */ 395 ena_dma_buf_t et_sq_dma; /* WO */ 396 397 /* Is the Tx queue currently in a blocked state? */ 398 bool et_blocked; /* TM */ 399 400 /* 401 * The number of descriptors owned by this ring. This value 402 * never changes after initialization. 403 */ 404 uint16_t et_sq_num_descs; /* WO */ 405 406 /* 407 * The number of descriptors currently available for Tx 408 * submission. When this value reaches zero the ring must 409 * block until device notifies us of freed descriptors. 410 */ 411 uint16_t et_sq_avail_descs; /* TM */ 412 413 /* 414 * The current tail index of the queue (the first free 415 * descriptor for host Tx submission). After initialization, 416 * this value only increments, relying on unsigned wrap 417 * around. The ENA device seems to expect this behavior, 418 * performing its own modulo on the value for the purposes of 419 * indexing, much like the driver code needs to do in order to 420 * access the proper TCB entry. 421 */ 422 uint16_t et_sq_tail_idx; /* TM */ 423 424 /* 425 * The phase is used to know which CQ descriptors may be 426 * reclaimed. This is explained further in ena.c. 427 */ 428 uint16_t et_sq_phase; /* TM */ 429 uint16_t et_sq_hw_idx; /* WO */ 430 431 /* 432 * The "doorbell" address is how the host indicates to the 433 * device which descriptors are ready for Tx processing. 434 */ 435 uint32_t *et_sq_db_addr; /* WO */ 436 437 /* 438 * The TCBs track host Tx information, like a pointer to the 439 * mblk being submitted. Currently we maintain a 1:1 mapping 440 * of SQ descriptors to TCBs as Tx is copy only. 441 */ 442 ena_tx_control_block_t *et_tcbs; /* TM */ 443 444 enahw_tx_cdesc_t *et_cq_descs; /* TM */ 445 ena_dma_buf_t et_cq_dma; /* WO */ 446 uint16_t et_cq_num_descs; /* WO */ 447 uint16_t et_cq_head_idx; /* TM */ 448 uint16_t et_cq_phase; /* TM */ 449 uint16_t et_cq_hw_idx; /* WO */ 450 451 /* 452 * This address is used to control the CQ interrupts. 453 */ 454 uint32_t *et_cq_unmask_addr; /* WO */ 455 uint32_t *et_cq_numa_addr; /* WO (currently unused) */ 456 457 /* 458 * This is used to detect transmit stalls and invoke a reset. The 459 * watchdog increments this counter when it sees that the TX 460 * ring is still blocked, and if it exceeds the threshold then the 461 * device is assumed to have stalled and needs to be reset. 462 */ 463 uint32_t et_stall_watchdog; /* TM */ 464 465 /* 466 * This mutex protects the Tx queue stats. This mutex may be 467 * entered while et_lock is held, but et_lock is not required 468 * to access/modify the stats. However, if both locks are 469 * held, then et_lock must be entered first. 470 */ 471 kmutex_t et_stat_lock; 472 ena_txq_stat_t et_stat; 473 kstat_t *et_kstat; 474 } ena_txq_t; 475 476 typedef enum ena_rxq_state { 477 ENA_RXQ_STATE_NONE = 0, 478 ENA_RXQ_STATE_HOST_ALLOC = 1 << 0, 479 ENA_RXQ_STATE_CQ_CREATED = 1 << 1, 480 ENA_RXQ_STATE_SQ_CREATED = 1 << 2, 481 ENA_RXQ_STATE_SQ_FILLED = 1 << 3, 482 ENA_RXQ_STATE_READY = 1 << 4, /* RxQ ready and waiting */ 483 ENA_RXQ_STATE_RUNNING = 1 << 5, /* intrs enabled */ 484 } ena_rxq_state_t; 485 486 typedef struct ena_rx_ctrl_block { 487 ena_dma_buf_t ercb_dma; 488 uint8_t ercb_offset; 489 uint16_t ercb_length; 490 } ena_rx_ctrl_block_t; 491 492 typedef enum { 493 ENA_RXQ_MODE_POLLING = 1, 494 ENA_RXQ_MODE_INTR = 2, 495 } ena_rxq_mode_t; 496 497 typedef struct ena_rxq_stat_t { 498 /* The total number of packets/bytes received on this queue. */ 499 kstat_named_t ers_packets; 500 kstat_named_t ers_bytes; 501 502 /* 503 * At this time we expect all incoming frames to fit in a 504 * single buffer/descriptor. In some rare event that the 505 * device doesn't cooperate this stat is incremented. 506 */ 507 kstat_named_t ers_multi_desc; 508 509 /* 510 * The total number of times we failed to allocate a new mblk 511 * for an incoming frame. 512 */ 513 kstat_named_t ers_allocb_fail; 514 515 /* 516 * The total number of times the Rx interrupt handler reached 517 * its maximum limit for number of packets to process in a 518 * single interrupt. If you see this number increase 519 * continuously at a steady rate, then it may be an indication 520 * the driver is not entering polling mode. 521 */ 522 kstat_named_t ers_intr_limit; 523 524 /* 525 * The total number of times the device detected an incorrect 526 * IPv4 header checksum. 527 */ 528 kstat_named_t ers_hck_ipv4_err; 529 530 /* 531 * The total number of times the device detected an incorrect 532 * L4/ULP checksum. 533 */ 534 kstat_named_t ers_hck_l4_err; 535 } ena_rxq_stat_t; 536 537 /* 538 * A receive queue, made up of a Submission Queue (SQ) and Completion 539 * Queue (CQ) to form a logical descriptor ring for receiving packets. 540 * 541 * Write Once (WO) 542 * 543 * This value is written once, before the datapath is activated, in 544 * a function which is controlled by mac(9E). 545 * 546 * Rx Mutex (RM) -- er_lock 547 * 548 * This value is protected by the Rx queue's mutex. Some values may 549 * be initialized in a WO path, but also continually updated as part 550 * of normal datapath operation, such as er_sq_avail_descs. These 551 * values need mutex protection. 552 */ 553 typedef struct ena_rxq { 554 kmutex_t er_lock; 555 556 struct ena *er_ena; /* WO */ 557 uint_t er_rxqs_idx; /* WO */ 558 mac_ring_handle_t er_mrh; /* WO */ 559 uint64_t er_m_gen_num; /* WO */ 560 ena_rxq_state_t er_state; /* WO */ 561 uint16_t er_intr_vector; /* WO */ 562 ena_rxq_mode_t er_mode; /* RM */ 563 uint16_t er_intr_limit; /* RM */ 564 565 enahw_rx_desc_t *er_sq_descs; /* RM */ 566 ena_dma_buf_t er_sq_dma; /* WO */ 567 uint16_t er_sq_num_descs; /* WO */ 568 uint16_t er_sq_avail_descs; /* RM */ 569 uint16_t er_sq_tail_idx; /* RM */ 570 uint16_t er_sq_phase; /* RM */ 571 uint16_t er_sq_hw_idx; /* WO */ 572 uint32_t *er_sq_db_addr; /* WO */ 573 574 enahw_rx_cdesc_t *er_cq_descs; /* RM */ 575 ena_dma_buf_t er_cq_dma; /* WO */ 576 uint16_t er_cq_num_descs; /* WO */ 577 uint16_t er_cq_head_idx; /* RM */ 578 uint16_t er_cq_phase; /* RM */ 579 uint16_t er_cq_hw_idx; /* WO */ 580 uint32_t *er_cq_unmask_addr; /* WO */ 581 uint32_t *er_cq_numa_addr; /* WO (currently unused) */ 582 583 ena_rx_ctrl_block_t *er_rcbs; /* RM */ 584 585 kmutex_t er_stat_lock; 586 ena_rxq_stat_t er_stat; 587 kstat_t *er_kstat; 588 } ena_rxq_t; 589 590 typedef struct ena_device_stat { 591 kstat_named_t eds_reset_forced; 592 kstat_named_t eds_reset_error; 593 kstat_named_t eds_reset_fatal; 594 kstat_named_t eds_reset_keepalive; 595 kstat_named_t eds_reset_txstall; 596 } ena_device_stat_t; 597 598 /* 599 * These are stats based on enahw_resp_basic_stats_t and data that accompanies 600 * the asynchronous keepalive event. 601 */ 602 typedef struct ena_basic_stat { 603 kstat_named_t ebs_tx_bytes; 604 kstat_named_t ebs_tx_pkts; 605 kstat_named_t ebs_tx_drops; 606 607 kstat_named_t ebs_rx_bytes; 608 kstat_named_t ebs_rx_pkts; 609 kstat_named_t ebs_rx_drops; 610 kstat_named_t ebs_rx_overruns; 611 } ena_basic_stat_t; 612 613 /* These are stats based on enahw_resp_eni_stats_t. */ 614 typedef struct ena_extended_stat { 615 kstat_named_t ees_bw_in_exceeded; 616 kstat_named_t ees_bw_out_exceeded; 617 kstat_named_t ees_pps_exceeded; 618 kstat_named_t ees_conns_exceeded; 619 kstat_named_t ees_linklocal_exceeded; 620 } ena_extended_stat_t; 621 622 /* These stats monitor which AENQ handlers have been called. */ 623 typedef struct ena_aenq_stat { 624 kstat_named_t eaes_default; 625 kstat_named_t eaes_link_change; 626 kstat_named_t eaes_notification; 627 kstat_named_t eaes_keep_alive; 628 kstat_named_t eaes_request_reset; 629 kstat_named_t eaes_fatal_error; 630 kstat_named_t eaes_warning; 631 } ena_aenq_stat_t; 632 633 #ifdef DEBUG 634 typedef struct ena_reg { 635 const char *er_name; 636 const uint16_t er_offset; 637 uint32_t er_value; 638 } ena_reg_t; 639 #endif 640 641 #define ENA_STATE_UNKNOWN 0x00u 642 #define ENA_STATE_INITIALIZED 0x01u 643 #define ENA_STATE_STARTED 0x02u 644 #define ENA_STATE_ERROR 0x04u 645 #define ENA_STATE_RESETTING 0x08u 646 647 /* 648 * This structure contains the per-instance (PF of VF) state of the 649 * device. 650 */ 651 typedef struct ena { 652 dev_info_t *ena_dip; 653 int ena_instance; 654 655 #ifdef DEBUG 656 /* 657 * In debug kernels, the registers are cached here at various points 658 * for easy inspection via mdb(1). 659 */ 660 ena_reg_t ena_reg[ENAHW_NUM_REGS]; 661 #endif 662 663 /* 664 * Global lock, used to synchronize administration changes to 665 * the ena_t. This lock should not be held in the datapath. 666 */ 667 kmutex_t ena_lock; 668 ena_attach_seq_t ena_attach_seq; 669 670 /* 671 * We use atomic ops for ena_state so that datapath consumers 672 * do not need to enter ena_lock. 673 */ 674 uint32_t ena_state; 675 676 /* 677 * The reason for the last device reset. 678 */ 679 enahw_reset_reason_t ena_reset_reason; 680 681 /* 682 * Watchdog 683 */ 684 kmutex_t ena_watchdog_lock; 685 ddi_periodic_t ena_watchdog_periodic; 686 uint64_t ena_watchdog_last_keepalive; 687 688 /* 689 * PCI config space and BAR handle. 690 */ 691 ddi_acc_handle_t ena_pci_hdl; 692 off_t ena_reg_size; 693 caddr_t ena_reg_base; 694 ddi_device_acc_attr_t ena_reg_attr; 695 ddi_acc_handle_t ena_reg_hdl; 696 697 /* 698 * Vendor information. 699 */ 700 uint16_t ena_pci_vid; 701 uint16_t ena_pci_did; 702 uint8_t ena_pci_rev; 703 uint16_t ena_pci_svid; 704 uint16_t ena_pci_sdid; 705 706 /* 707 * Device and controller versions. 708 */ 709 uint32_t ena_dev_major_vsn; 710 uint32_t ena_dev_minor_vsn; 711 uint32_t ena_ctrl_major_vsn; 712 uint32_t ena_ctrl_minor_vsn; 713 uint32_t ena_ctrl_subminor_vsn; 714 uint32_t ena_ctrl_impl_id; 715 716 /* 717 * Interrupts 718 */ 719 int ena_num_intrs; 720 ddi_intr_handle_t *ena_intr_handles; 721 size_t ena_intr_handles_sz; 722 int ena_intr_caps; 723 uint_t ena_intr_pri; 724 725 mac_handle_t ena_mh; 726 727 size_t ena_page_sz; 728 729 /* 730 * The MTU and data layer frame sizes. 731 */ 732 uint32_t ena_mtu; 733 uint32_t ena_max_frame_hdr; 734 uint32_t ena_max_frame_total; 735 736 /* The size (in bytes) of the Rx/Tx data buffers. */ 737 uint32_t ena_tx_buf_sz; 738 uint32_t ena_rx_buf_sz; 739 740 /* 741 * The maximum number of Scatter Gather List segments the 742 * device can address. 743 */ 744 uint8_t ena_tx_sgl_max_sz; 745 uint8_t ena_rx_sgl_max_sz; 746 747 /* The number of descriptors per Rx/Tx queue. */ 748 uint16_t ena_rxq_num_descs; 749 uint16_t ena_txq_num_descs; 750 751 /* 752 * The maximum number of frames which may be read per Rx 753 * interrupt. 754 */ 755 uint16_t ena_rxq_intr_limit; 756 757 /* The Rx/Tx data queues (rings). */ 758 ena_rxq_t *ena_rxqs; 759 uint16_t ena_num_rxqs; 760 ena_txq_t *ena_txqs; 761 uint16_t ena_num_txqs; 762 763 /* These statistics are device-wide. */ 764 kstat_t *ena_device_kstat; 765 ena_device_stat_t ena_device_stat; 766 hrtime_t ena_device_basic_stat_last_update; 767 kmutex_t ena_device_basic_stat_lock; 768 kstat_t *ena_device_basic_kstat; 769 kstat_t *ena_device_extended_kstat; 770 771 /* 772 * This tracks AENQ-related stats, it is implicitly 773 * device-wide. 774 */ 775 ena_aenq_stat_t ena_aenq_stat; 776 kstat_t *ena_aenq_kstat; 777 778 /* 779 * The Admin Queue, through which call device commands are 780 * sent. 781 */ 782 ena_adminq_t ena_aq; 783 784 ena_aenq_t ena_aenq; 785 ena_dma_buf_t ena_host_info; 786 787 /* 788 * Hardware info 789 */ 790 ena_hints_t ena_device_hints; 791 uint32_t ena_supported_features; 792 uint32_t ena_capabilities; 793 uint8_t ena_dma_width; 794 bool ena_link_autoneg; 795 link_duplex_t ena_link_duplex; 796 uint64_t ena_link_speed_mbits; 797 enahw_link_speeds_t ena_link_speeds; 798 link_state_t ena_link_state; 799 uint32_t ena_aenq_supported_groups; 800 uint32_t ena_aenq_enabled_groups; 801 802 uint32_t ena_tx_max_sq_num; 803 uint32_t ena_tx_max_sq_num_descs; 804 uint32_t ena_tx_max_cq_num; 805 uint32_t ena_tx_max_cq_num_descs; 806 uint16_t ena_tx_max_desc_per_pkt; 807 uint32_t ena_tx_max_hdr_len; 808 809 uint32_t ena_rx_max_sq_num; 810 uint32_t ena_rx_max_sq_num_descs; 811 uint32_t ena_rx_max_cq_num; 812 uint32_t ena_rx_max_cq_num_descs; 813 uint16_t ena_rx_max_desc_per_pkt; 814 815 /* This is calculated from the Rx/Tx queue nums. */ 816 uint16_t ena_max_io_queues; 817 818 /* Hardware Offloads */ 819 bool ena_tx_l3_ipv4_csum; 820 821 bool ena_tx_l4_ipv4_part_csum; 822 bool ena_tx_l4_ipv4_full_csum; 823 bool ena_tx_l4_ipv4_lso; 824 825 bool ena_tx_l4_ipv6_part_csum; 826 bool ena_tx_l4_ipv6_full_csum; 827 bool ena_tx_l4_ipv6_lso; 828 829 bool ena_rx_l3_ipv4_csum; 830 bool ena_rx_l4_ipv4_csum; 831 bool ena_rx_l4_ipv6_csum; 832 bool ena_rx_hash; 833 834 uint32_t ena_max_mtu; 835 uint8_t ena_mac_addr[ETHERADDRL]; 836 } ena_t; 837 838 /* 839 * Misc 840 */ 841 extern bool ena_reset(ena_t *, const enahw_reset_reason_t); 842 extern bool ena_is_feat_avail(ena_t *, const enahw_feature_id_t); 843 extern bool ena_is_cap_avail(ena_t *, const enahw_capability_id_t); 844 extern void ena_update_hints(ena_t *, enahw_device_hints_t *); 845 846 /* 847 * Logging functions. 848 */ 849 extern bool ena_debug; 850 extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 851 extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 852 extern void ena_panic(const ena_t *, const char *, ...) __KPRINTFLIKE(2); 853 extern void ena_trigger_reset(ena_t *, enahw_reset_reason_t); 854 855 /* 856 * Hardware access. 857 */ 858 extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t); 859 extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *); 860 extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t); 861 extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t); 862 extern const char *enahw_reset_reason(enahw_reset_reason_t); 863 #ifdef DEBUG 864 extern void ena_init_regcache(ena_t *); 865 extern void ena_update_regcache(ena_t *); 866 #else 867 #define ena_init_regcache(x) 868 #define ena_update_regcache(x) 869 #endif 870 871 /* 872 * Watchdog 873 */ 874 extern void ena_enable_watchdog(ena_t *); 875 extern void ena_disable_watchdog(ena_t *); 876 877 /* 878 * Stats 879 */ 880 extern void ena_stat_device_cleanup(ena_t *); 881 extern bool ena_stat_device_init(ena_t *); 882 883 extern void ena_stat_device_basic_cleanup(ena_t *); 884 extern bool ena_stat_device_basic_init(ena_t *); 885 886 extern void ena_stat_device_extended_cleanup(ena_t *); 887 extern bool ena_stat_device_extended_init(ena_t *); 888 889 extern void ena_stat_aenq_cleanup(ena_t *); 890 extern bool ena_stat_aenq_init(ena_t *); 891 892 extern void ena_stat_rxq_cleanup(ena_rxq_t *); 893 extern bool ena_stat_rxq_init(ena_rxq_t *); 894 extern void ena_stat_txq_cleanup(ena_txq_t *); 895 extern bool ena_stat_txq_init(ena_txq_t *); 896 897 /* 898 * DMA 899 */ 900 extern bool ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *, 901 size_t); 902 extern void ena_dma_free(ena_dma_buf_t *); 903 extern void ena_dma_bzero(ena_dma_buf_t *); 904 extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *); 905 extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *, 906 uint16_t *); 907 908 /* 909 * Interrupts 910 */ 911 extern bool ena_intr_add_handlers(ena_t *); 912 extern void ena_intr_remove_handlers(ena_t *, bool); 913 extern void ena_tx_intr_work(ena_txq_t *); 914 extern void ena_rx_intr_work(ena_rxq_t *); 915 extern bool ena_intrs_disable(ena_t *); 916 extern bool ena_intrs_enable(ena_t *); 917 918 /* 919 * MAC 920 */ 921 extern bool ena_mac_register(ena_t *); 922 extern int ena_mac_unregister(ena_t *); 923 extern void ena_ring_tx_stop(mac_ring_driver_t); 924 extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t); 925 extern mblk_t *ena_ring_tx(void *, mblk_t *); 926 extern void ena_ring_rx_stop(mac_ring_driver_t); 927 extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num); 928 extern int ena_m_stat(void *, uint_t, uint64_t *); 929 extern mblk_t *ena_ring_rx_poll(void *, int); 930 extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *); 931 extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *); 932 933 /* 934 * Admin API 935 */ 936 extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *, 937 enahw_resp_desc_t *, ena_cmd_ctx_t **); 938 extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *); 939 extern void ena_free_host_info(ena_t *); 940 extern bool ena_init_host_info(ena_t *); 941 extern void ena_create_cmd_ctx(ena_t *); 942 extern void ena_release_all_cmd_ctx(ena_t *); 943 extern int ena_create_cq(ena_t *, uint16_t, uint64_t, bool, uint32_t, 944 uint16_t *, uint32_t **, uint32_t **); 945 extern int ena_destroy_cq(ena_t *, uint16_t); 946 extern int ena_create_sq(ena_t *, uint16_t, uint64_t, bool, uint16_t, 947 uint16_t *, uint32_t **); 948 extern int ena_destroy_sq(ena_t *, uint16_t, bool); 949 extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *, 950 enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t); 951 extern int ena_get_feature(ena_t *, enahw_resp_desc_t *, 952 const enahw_feature_id_t, const uint8_t); 953 extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *); 954 extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *); 955 extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t); 956 957 /* 958 * Async event queue 959 */ 960 extern bool ena_aenq_init(ena_t *); 961 extern bool ena_aenq_configure(ena_t *); 962 extern void ena_aenq_enable(ena_t *); 963 extern void ena_aenq_work(ena_t *); 964 extern void ena_aenq_free(ena_t *); 965 966 /* 967 * Rx/Tx allocations 968 */ 969 extern bool ena_alloc_rxq(ena_rxq_t *); 970 extern void ena_cleanup_rxq(ena_rxq_t *, bool); 971 extern bool ena_alloc_txq(ena_txq_t *); 972 extern void ena_cleanup_txq(ena_txq_t *, bool); 973 974 #ifdef __cplusplus 975 } 976 #endif 977 978 #endif /* _ENA_H */ 979