1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include "opt_rss.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bus.h> 36 #include <sys/endian.h> 37 #include <sys/eventhandler.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/smp.h> 45 #include <sys/socket.h> 46 #include <sys/sockio.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 54 #include <machine/atomic.h> 55 #include <machine/bus.h> 56 #include <machine/in_cksum.h> 57 #include <machine/resource.h> 58 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 62 #include <net/bpf.h> 63 #include <net/ethernet.h> 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_media.h> 68 #include <net/if_types.h> 69 #include <net/if_var.h> 70 #include <net/if_vlan_var.h> 71 #include <netinet/in.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/if_ether.h> 74 #include <netinet/ip.h> 75 #include <netinet/ip6.h> 76 #include <netinet/tcp.h> 77 #include <netinet/udp.h> 78 79 #include "ena.h" 80 #include "ena_datapath.h" 81 #include "ena_rss.h" 82 #include "ena_sysctl.h" 83 84 #ifdef DEV_NETMAP 85 #include "ena_netmap.h" 86 #endif /* DEV_NETMAP */ 87 88 /********************************************************* 89 * Function prototypes 90 *********************************************************/ 91 static int ena_probe(device_t); 92 static void ena_intr_msix_mgmnt(void *); 93 static void ena_free_pci_resources(struct ena_adapter *); 94 static int ena_change_mtu(if_t, int); 95 static inline void ena_alloc_counters(counter_u64_t *, int); 96 static inline void ena_free_counters(counter_u64_t *, int); 97 static inline void ena_reset_counters(counter_u64_t *, int); 98 static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *, 99 uint16_t); 100 static void ena_init_io_rings_basic(struct ena_adapter *); 101 static void ena_init_io_rings_advanced(struct ena_adapter *); 102 static void ena_init_io_rings(struct ena_adapter *); 103 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int); 104 static void ena_free_all_io_rings_resources(struct ena_adapter *); 105 static int ena_setup_tx_dma_tag(struct ena_adapter *); 106 static int ena_free_tx_dma_tag(struct ena_adapter *); 107 static int ena_setup_rx_dma_tag(struct ena_adapter *); 108 static int ena_free_rx_dma_tag(struct ena_adapter *); 109 static void ena_release_all_tx_dmamap(struct ena_ring *); 110 static int ena_setup_tx_resources(struct ena_adapter *, int); 111 static void ena_free_tx_resources(struct ena_adapter *, int); 112 static int ena_setup_all_tx_resources(struct ena_adapter *); 113 static void ena_free_all_tx_resources(struct ena_adapter *); 114 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int); 115 static void ena_free_rx_resources(struct ena_adapter *, unsigned int); 116 static int ena_setup_all_rx_resources(struct ena_adapter *); 117 static void ena_free_all_rx_resources(struct ena_adapter *); 118 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *, 119 struct ena_rx_buffer *); 120 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *, 121 struct ena_rx_buffer *); 122 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int); 123 static void ena_refill_all_rx_bufs(struct ena_adapter *); 124 static void ena_free_all_rx_bufs(struct ena_adapter *); 125 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int); 126 static void ena_free_all_tx_bufs(struct ena_adapter *); 127 static void ena_destroy_all_tx_queues(struct ena_adapter *); 128 static void ena_destroy_all_rx_queues(struct ena_adapter *); 129 static void ena_destroy_all_io_queues(struct ena_adapter *); 130 static int ena_create_io_queues(struct ena_adapter *); 131 static int ena_handle_msix(void *); 132 static int ena_enable_msix(struct ena_adapter *); 133 static void ena_setup_mgmnt_intr(struct ena_adapter *); 134 static int ena_setup_io_intr(struct ena_adapter *); 135 static int ena_request_mgmnt_irq(struct ena_adapter *); 136 static int ena_request_io_irq(struct ena_adapter *); 137 static void ena_free_mgmnt_irq(struct ena_adapter *); 138 static void ena_free_io_irq(struct ena_adapter *); 139 static void ena_free_irqs(struct ena_adapter *); 140 static void ena_disable_msix(struct ena_adapter *); 141 static void ena_unmask_all_io_irqs(struct ena_adapter *); 142 static int ena_up_complete(struct ena_adapter *); 143 static uint64_t ena_get_counter(if_t, ift_counter); 144 static int ena_media_change(if_t); 145 static void ena_media_status(if_t, struct ifmediareq *); 146 static void ena_init(void *); 147 static int ena_ioctl(if_t, u_long, caddr_t); 148 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *); 149 static void ena_update_host_info(struct ena_admin_host_info *, if_t); 150 static void ena_update_hwassist(struct ena_adapter *); 151 static void ena_setup_ifnet(device_t, struct ena_adapter *, 152 struct ena_com_dev_get_features_ctx *); 153 static int ena_enable_wc(device_t, struct resource *); 154 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *, 155 struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *); 156 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *); 157 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *, 158 struct ena_com_dev_get_features_ctx *); 159 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *); 160 static void ena_config_host_info(struct ena_com_dev *, device_t); 161 static int ena_attach(device_t); 162 static int ena_detach(device_t); 163 static int ena_device_init(struct ena_adapter *, device_t, 164 struct ena_com_dev_get_features_ctx *, int *); 165 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *); 166 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *); 167 static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *); 168 static int ena_copy_eni_metrics(struct ena_adapter *); 169 static int ena_copy_srd_metrics(struct ena_adapter *); 170 static int ena_copy_customer_metrics(struct ena_adapter *); 171 static void ena_timer_service(void *); 172 173 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME 174 " v" ENA_DRV_MODULE_VERSION; 175 176 static ena_vendor_info_t ena_vendor_info_array[] = { 177 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 }, 178 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 }, 179 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 }, 180 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 }, 181 /* Last entry */ 182 { 0, 0, 0 } 183 }; 184 185 struct sx ena_global_lock; 186 187 /* 188 * Contains pointers to event handlers, e.g. link state chage. 189 */ 190 static struct ena_aenq_handlers aenq_handlers; 191 192 void 193 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 194 { 195 if (error != 0) 196 return; 197 *(bus_addr_t *)arg = segs[0].ds_addr; 198 } 199 200 int 201 ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma, 202 int mapflags, bus_size_t alignment, int domain) 203 { 204 struct ena_adapter *adapter = device_get_softc(dmadev); 205 device_t pdev = adapter->pdev; 206 uint32_t maxsize; 207 uint64_t dma_space_addr; 208 int error; 209 210 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE; 211 212 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width); 213 if (unlikely(dma_space_addr == 0)) 214 dma_space_addr = BUS_SPACE_MAXADDR; 215 216 error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */ 217 alignment, 0, /* alignment, bounds */ 218 dma_space_addr, /* lowaddr of exclusion window */ 219 BUS_SPACE_MAXADDR, /* highaddr of exclusion window */ 220 NULL, NULL, /* filter, filterarg */ 221 maxsize, /* maxsize */ 222 1, /* nsegments */ 223 maxsize, /* maxsegsize */ 224 BUS_DMA_ALLOCNOW, /* flags */ 225 NULL, /* lockfunc */ 226 NULL, /* lockarg */ 227 &dma->tag); 228 if (unlikely(error != 0)) { 229 ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error); 230 goto fail_tag; 231 } 232 233 error = bus_dma_tag_set_domain(dma->tag, domain); 234 if (unlikely(error != 0)) { 235 ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n", 236 error); 237 goto fail_map_create; 238 } 239 240 error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, 241 BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map); 242 if (unlikely(error != 0)) { 243 ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n", 244 (uintmax_t)size, error); 245 goto fail_map_create; 246 } 247 248 dma->paddr = 0; 249 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, 250 ena_dmamap_callback, &dma->paddr, mapflags); 251 if (unlikely((error != 0) || (dma->paddr == 0))) { 252 ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error); 253 goto fail_map_load; 254 } 255 256 bus_dmamap_sync(dma->tag, dma->map, 257 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 258 259 return (0); 260 261 fail_map_load: 262 bus_dmamem_free(dma->tag, dma->vaddr, dma->map); 263 fail_map_create: 264 bus_dma_tag_destroy(dma->tag); 265 fail_tag: 266 dma->tag = NULL; 267 dma->vaddr = NULL; 268 dma->paddr = 0; 269 270 return (error); 271 } 272 273 static void 274 ena_free_pci_resources(struct ena_adapter *adapter) 275 { 276 device_t pdev = adapter->pdev; 277 278 if (adapter->memory != NULL) { 279 bus_release_resource(pdev, SYS_RES_MEMORY, 280 PCIR_BAR(ENA_MEM_BAR), adapter->memory); 281 } 282 283 if (adapter->registers != NULL) { 284 bus_release_resource(pdev, SYS_RES_MEMORY, 285 PCIR_BAR(ENA_REG_BAR), adapter->registers); 286 } 287 288 if (adapter->msix != NULL) { 289 bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid, 290 adapter->msix); 291 } 292 } 293 294 static int 295 ena_probe(device_t dev) 296 { 297 ena_vendor_info_t *ent; 298 uint16_t pci_vendor_id = 0; 299 uint16_t pci_device_id = 0; 300 301 pci_vendor_id = pci_get_vendor(dev); 302 pci_device_id = pci_get_device(dev); 303 304 ent = ena_vendor_info_array; 305 while (ent->vendor_id != 0) { 306 if ((pci_vendor_id == ent->vendor_id) && 307 (pci_device_id == ent->device_id)) { 308 ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id, 309 pci_device_id); 310 311 device_set_desc(dev, ENA_DEVICE_DESC); 312 return (BUS_PROBE_DEFAULT); 313 } 314 315 ent++; 316 } 317 318 return (ENXIO); 319 } 320 321 static int 322 ena_change_mtu(if_t ifp, int new_mtu) 323 { 324 struct ena_adapter *adapter = if_getsoftc(ifp); 325 device_t pdev = adapter->pdev; 326 int rc; 327 328 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { 329 ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n", 330 new_mtu, adapter->max_mtu, ENA_MIN_MTU); 331 return (EINVAL); 332 } 333 334 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); 335 if (likely(rc == 0)) { 336 ena_log(pdev, DBG, "set MTU to %d\n", new_mtu); 337 if_setmtu(ifp, new_mtu); 338 } else { 339 ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu); 340 } 341 342 return (rc); 343 } 344 345 static inline void 346 ena_alloc_counters(counter_u64_t *begin, int size) 347 { 348 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 349 350 for (; begin < end; ++begin) 351 *begin = counter_u64_alloc(M_WAITOK); 352 } 353 354 static inline void 355 ena_free_counters(counter_u64_t *begin, int size) 356 { 357 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 358 359 for (; begin < end; ++begin) 360 counter_u64_free(*begin); 361 } 362 363 static inline void 364 ena_reset_counters(counter_u64_t *begin, int size) 365 { 366 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 367 368 for (; begin < end; ++begin) 369 counter_u64_zero(*begin); 370 } 371 372 static void 373 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring, 374 uint16_t qid) 375 { 376 ring->qid = qid; 377 ring->adapter = adapter; 378 ring->ena_dev = adapter->ena_dev; 379 atomic_store_8(&ring->first_interrupt, 0); 380 ring->no_interrupt_event_cnt = 0; 381 } 382 383 static void 384 ena_init_io_rings_basic(struct ena_adapter *adapter) 385 { 386 struct ena_com_dev *ena_dev; 387 struct ena_ring *txr, *rxr; 388 struct ena_que *que; 389 int i; 390 391 ena_dev = adapter->ena_dev; 392 393 for (i = 0; i < adapter->num_io_queues; i++) { 394 txr = &adapter->tx_ring[i]; 395 rxr = &adapter->rx_ring[i]; 396 397 /* TX/RX common ring state */ 398 ena_init_io_rings_common(adapter, txr, i); 399 ena_init_io_rings_common(adapter, rxr, i); 400 401 /* TX specific ring state */ 402 txr->tx_max_header_size = ena_dev->tx_max_header_size; 403 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; 404 405 que = &adapter->que[i]; 406 que->adapter = adapter; 407 que->id = i; 408 que->tx_ring = txr; 409 que->rx_ring = rxr; 410 411 txr->que = que; 412 rxr->que = que; 413 414 rxr->empty_rx_queue = 0; 415 rxr->rx_mbuf_sz = ena_mbuf_sz; 416 } 417 } 418 419 static void 420 ena_init_io_rings_advanced(struct ena_adapter *adapter) 421 { 422 struct ena_ring *txr, *rxr; 423 int i; 424 425 for (i = 0; i < adapter->num_io_queues; i++) { 426 txr = &adapter->tx_ring[i]; 427 rxr = &adapter->rx_ring[i]; 428 429 /* Allocate a buf ring */ 430 txr->buf_ring_size = adapter->buf_ring_size; 431 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK, 432 &txr->ring_mtx); 433 434 /* Allocate Tx statistics. */ 435 ena_alloc_counters((counter_u64_t *)&txr->tx_stats, 436 sizeof(txr->tx_stats)); 437 txr->tx_last_cleanup_ticks = ticks; 438 439 /* Allocate Rx statistics. */ 440 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats, 441 sizeof(rxr->rx_stats)); 442 443 /* Initialize locks */ 444 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)", 445 device_get_nameunit(adapter->pdev), i); 446 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)", 447 device_get_nameunit(adapter->pdev), i); 448 449 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); 450 } 451 } 452 453 static void 454 ena_init_io_rings(struct ena_adapter *adapter) 455 { 456 /* 457 * IO rings initialization can be divided into the 2 steps: 458 * 1. Initialize variables and fields with initial values and copy 459 * them from adapter/ena_dev (basic) 460 * 2. Allocate mutex, counters and buf_ring (advanced) 461 */ 462 ena_init_io_rings_basic(adapter); 463 ena_init_io_rings_advanced(adapter); 464 } 465 466 static void 467 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid) 468 { 469 struct ena_ring *txr = &adapter->tx_ring[qid]; 470 struct ena_ring *rxr = &adapter->rx_ring[qid]; 471 472 ena_free_counters((counter_u64_t *)&txr->tx_stats, 473 sizeof(txr->tx_stats)); 474 ena_free_counters((counter_u64_t *)&rxr->rx_stats, 475 sizeof(rxr->rx_stats)); 476 477 ENA_RING_MTX_LOCK(txr); 478 drbr_free(txr->br, M_DEVBUF); 479 ENA_RING_MTX_UNLOCK(txr); 480 481 mtx_destroy(&txr->ring_mtx); 482 } 483 484 static void 485 ena_free_all_io_rings_resources(struct ena_adapter *adapter) 486 { 487 int i; 488 489 for (i = 0; i < adapter->num_io_queues; i++) 490 ena_free_io_ring_resources(adapter, i); 491 } 492 493 static int 494 ena_setup_tx_dma_tag(struct ena_adapter *adapter) 495 { 496 int ret; 497 498 /* Create DMA tag for Tx buffers */ 499 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), 500 1, 0, /* alignment, bounds */ 501 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 502 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 503 NULL, NULL, /* filter, filterarg */ 504 ENA_TSO_MAXSIZE, /* maxsize */ 505 adapter->max_tx_sgl_size - 1, /* nsegments */ 506 ENA_TSO_MAXSIZE, /* maxsegsize */ 507 0, /* flags */ 508 NULL, /* lockfunc */ 509 NULL, /* lockfuncarg */ 510 &adapter->tx_buf_tag); 511 512 return (ret); 513 } 514 515 static int 516 ena_free_tx_dma_tag(struct ena_adapter *adapter) 517 { 518 int ret; 519 520 ret = bus_dma_tag_destroy(adapter->tx_buf_tag); 521 522 if (likely(ret == 0)) 523 adapter->tx_buf_tag = NULL; 524 525 return (ret); 526 } 527 528 static int 529 ena_setup_rx_dma_tag(struct ena_adapter *adapter) 530 { 531 int ret; 532 533 /* Create DMA tag for Rx buffers*/ 534 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */ 535 1, 0, /* alignment, bounds */ 536 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 537 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 538 NULL, NULL, /* filter, filterarg */ 539 ena_mbuf_sz, /* maxsize */ 540 adapter->max_rx_sgl_size, /* nsegments */ 541 ena_mbuf_sz, /* maxsegsize */ 542 0, /* flags */ 543 NULL, /* lockfunc */ 544 NULL, /* lockarg */ 545 &adapter->rx_buf_tag); 546 547 return (ret); 548 } 549 550 static int 551 ena_free_rx_dma_tag(struct ena_adapter *adapter) 552 { 553 int ret; 554 555 ret = bus_dma_tag_destroy(adapter->rx_buf_tag); 556 557 if (likely(ret == 0)) 558 adapter->rx_buf_tag = NULL; 559 560 return (ret); 561 } 562 563 int 564 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id, int tx_req_id_rc) 565 { 566 struct ena_adapter *adapter = tx_ring->adapter; 567 enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 568 569 if (unlikely(tx_req_id_rc != 0)) { 570 if (tx_req_id_rc == ENA_COM_FAULT) { 571 reset_reason = ENA_REGS_RESET_TX_DESCRIPTOR_MALFORMED; 572 ena_log(adapter->pdev, ERR, 573 "TX descriptor malformed. req_id %hu qid %hu\n", 574 req_id, tx_ring->qid); 575 } else if (tx_req_id_rc == ENA_COM_INVAL) { 576 ena_log_nm(adapter->pdev, WARN, 577 "Invalid req_id %hu in qid %hu\n", 578 req_id, tx_ring->qid); 579 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); 580 } 581 582 ena_trigger_reset(adapter, reset_reason); 583 return (EFAULT); 584 } 585 586 return (0); 587 } 588 589 static void 590 ena_release_all_tx_dmamap(struct ena_ring *tx_ring) 591 { 592 struct ena_adapter *adapter = tx_ring->adapter; 593 struct ena_tx_buffer *tx_info; 594 bus_dma_tag_t tx_tag = adapter->tx_buf_tag; 595 int i; 596 #ifdef DEV_NETMAP 597 struct ena_netmap_tx_info *nm_info; 598 int j; 599 #endif /* DEV_NETMAP */ 600 601 for (i = 0; i < tx_ring->ring_size; ++i) { 602 tx_info = &tx_ring->tx_buffer_info[i]; 603 #ifdef DEV_NETMAP 604 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 605 nm_info = &tx_info->nm_info; 606 for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) { 607 if (nm_info->map_seg[j] != NULL) { 608 bus_dmamap_destroy(tx_tag, 609 nm_info->map_seg[j]); 610 nm_info->map_seg[j] = NULL; 611 } 612 } 613 } 614 #endif /* DEV_NETMAP */ 615 if (tx_info->dmamap != NULL) { 616 bus_dmamap_destroy(tx_tag, tx_info->dmamap); 617 tx_info->dmamap = NULL; 618 } 619 } 620 } 621 622 /** 623 * ena_setup_tx_resources - allocate Tx resources (Descriptors) 624 * @adapter: network interface device structure 625 * @qid: queue index 626 * 627 * Returns 0 on success, otherwise on failure. 628 **/ 629 static int 630 ena_setup_tx_resources(struct ena_adapter *adapter, int qid) 631 { 632 device_t pdev = adapter->pdev; 633 char thread_name[MAXCOMLEN + 1]; 634 struct ena_que *que = &adapter->que[qid]; 635 struct ena_ring *tx_ring = que->tx_ring; 636 cpuset_t *cpu_mask = NULL; 637 int size, i, err; 638 #ifdef DEV_NETMAP 639 bus_dmamap_t *map; 640 int j; 641 642 ena_netmap_reset_tx_ring(adapter, qid); 643 #endif /* DEV_NETMAP */ 644 645 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; 646 647 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 648 if (unlikely(tx_ring->tx_buffer_info == NULL)) 649 return (ENOMEM); 650 651 size = sizeof(uint16_t) * tx_ring->ring_size; 652 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 653 if (unlikely(tx_ring->free_tx_ids == NULL)) 654 goto err_buf_info_free; 655 656 size = tx_ring->tx_max_header_size; 657 tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF, 658 M_NOWAIT | M_ZERO); 659 if (unlikely(tx_ring->push_buf_intermediate_buf == NULL)) 660 goto err_tx_ids_free; 661 662 /* Req id stack for TX OOO completions */ 663 for (i = 0; i < tx_ring->ring_size; i++) 664 tx_ring->free_tx_ids[i] = i; 665 666 /* Reset TX statistics. */ 667 ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats, 668 sizeof(tx_ring->tx_stats)); 669 670 tx_ring->next_to_use = 0; 671 tx_ring->next_to_clean = 0; 672 tx_ring->acum_pkts = 0; 673 674 /* Make sure that drbr is empty */ 675 ENA_RING_MTX_LOCK(tx_ring); 676 drbr_flush(adapter->ifp, tx_ring->br); 677 ENA_RING_MTX_UNLOCK(tx_ring); 678 679 /* ... and create the buffer DMA maps */ 680 for (i = 0; i < tx_ring->ring_size; i++) { 681 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 682 &tx_ring->tx_buffer_info[i].dmamap); 683 if (unlikely(err != 0)) { 684 ena_log(pdev, ERR, 685 "Unable to create Tx DMA map for buffer %d\n", i); 686 goto err_map_release; 687 } 688 689 #ifdef DEV_NETMAP 690 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 691 map = tx_ring->tx_buffer_info[i].nm_info.map_seg; 692 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 693 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 694 &map[j]); 695 if (unlikely(err != 0)) { 696 ena_log(pdev, ERR, 697 "Unable to create Tx DMA for buffer %d %d\n", 698 i, j); 699 goto err_map_release; 700 } 701 } 702 } 703 #endif /* DEV_NETMAP */ 704 } 705 706 /* Allocate taskqueues */ 707 TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring); 708 tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT, 709 taskqueue_thread_enqueue, &tx_ring->enqueue_tq); 710 if (unlikely(tx_ring->enqueue_tq == NULL)) { 711 ena_log(pdev, ERR, 712 "Unable to create taskqueue for enqueue task\n"); 713 i = tx_ring->ring_size; 714 goto err_map_release; 715 } 716 717 tx_ring->running = true; 718 719 #ifdef RSS 720 cpu_mask = &que->cpu_mask; 721 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 722 device_get_nameunit(adapter->pdev), que->cpu); 723 #else 724 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 725 device_get_nameunit(adapter->pdev), que->id); 726 #endif 727 taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET, 728 cpu_mask, "%s", thread_name); 729 730 return (0); 731 732 err_map_release: 733 ena_release_all_tx_dmamap(tx_ring); 734 err_tx_ids_free: 735 free(tx_ring->free_tx_ids, M_DEVBUF); 736 tx_ring->free_tx_ids = NULL; 737 err_buf_info_free: 738 free(tx_ring->tx_buffer_info, M_DEVBUF); 739 tx_ring->tx_buffer_info = NULL; 740 741 return (ENOMEM); 742 } 743 744 /** 745 * ena_free_tx_resources - Free Tx Resources per Queue 746 * @adapter: network interface device structure 747 * @qid: queue index 748 * 749 * Free all transmit software resources 750 **/ 751 static void 752 ena_free_tx_resources(struct ena_adapter *adapter, int qid) 753 { 754 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 755 #ifdef DEV_NETMAP 756 struct ena_netmap_tx_info *nm_info; 757 int j; 758 #endif /* DEV_NETMAP */ 759 760 while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL)) 761 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 762 763 taskqueue_free(tx_ring->enqueue_tq); 764 765 ENA_RING_MTX_LOCK(tx_ring); 766 /* Flush buffer ring, */ 767 drbr_flush(adapter->ifp, tx_ring->br); 768 769 /* Free buffer DMA maps, */ 770 for (int i = 0; i < tx_ring->ring_size; i++) { 771 bus_dmamap_sync(adapter->tx_buf_tag, 772 tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE); 773 bus_dmamap_unload(adapter->tx_buf_tag, 774 tx_ring->tx_buffer_info[i].dmamap); 775 bus_dmamap_destroy(adapter->tx_buf_tag, 776 tx_ring->tx_buffer_info[i].dmamap); 777 778 #ifdef DEV_NETMAP 779 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 780 nm_info = &tx_ring->tx_buffer_info[i].nm_info; 781 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 782 if (nm_info->socket_buf_idx[j] != 0) { 783 bus_dmamap_sync(adapter->tx_buf_tag, 784 nm_info->map_seg[j], 785 BUS_DMASYNC_POSTWRITE); 786 ena_netmap_unload(adapter, 787 nm_info->map_seg[j]); 788 } 789 bus_dmamap_destroy(adapter->tx_buf_tag, 790 nm_info->map_seg[j]); 791 nm_info->socket_buf_idx[j] = 0; 792 } 793 } 794 #endif /* DEV_NETMAP */ 795 796 m_freem(tx_ring->tx_buffer_info[i].mbuf); 797 tx_ring->tx_buffer_info[i].mbuf = NULL; 798 } 799 ENA_RING_MTX_UNLOCK(tx_ring); 800 801 /* And free allocated memory. */ 802 free(tx_ring->tx_buffer_info, M_DEVBUF); 803 tx_ring->tx_buffer_info = NULL; 804 805 free(tx_ring->free_tx_ids, M_DEVBUF); 806 tx_ring->free_tx_ids = NULL; 807 808 free(tx_ring->push_buf_intermediate_buf, M_DEVBUF); 809 tx_ring->push_buf_intermediate_buf = NULL; 810 } 811 812 /** 813 * ena_setup_all_tx_resources - allocate all queues Tx resources 814 * @adapter: network interface device structure 815 * 816 * Returns 0 on success, otherwise on failure. 817 **/ 818 static int 819 ena_setup_all_tx_resources(struct ena_adapter *adapter) 820 { 821 int i, rc; 822 823 for (i = 0; i < adapter->num_io_queues; i++) { 824 rc = ena_setup_tx_resources(adapter, i); 825 if (rc != 0) { 826 ena_log(adapter->pdev, ERR, 827 "Allocation for Tx Queue %u failed\n", i); 828 goto err_setup_tx; 829 } 830 } 831 832 return (0); 833 834 err_setup_tx: 835 /* Rewind the index freeing the rings as we go */ 836 while (i--) 837 ena_free_tx_resources(adapter, i); 838 return (rc); 839 } 840 841 /** 842 * ena_free_all_tx_resources - Free Tx Resources for All Queues 843 * @adapter: network interface device structure 844 * 845 * Free all transmit software resources 846 **/ 847 static void 848 ena_free_all_tx_resources(struct ena_adapter *adapter) 849 { 850 int i; 851 852 for (i = 0; i < adapter->num_io_queues; i++) 853 ena_free_tx_resources(adapter, i); 854 } 855 856 /** 857 * ena_setup_rx_resources - allocate Rx resources (Descriptors) 858 * @adapter: network interface device structure 859 * @qid: queue index 860 * 861 * Returns 0 on success, otherwise on failure. 862 **/ 863 static int 864 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid) 865 { 866 device_t pdev = adapter->pdev; 867 struct ena_que *que = &adapter->que[qid]; 868 struct ena_ring *rx_ring = que->rx_ring; 869 int size, err, i; 870 871 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size; 872 873 #ifdef DEV_NETMAP 874 ena_netmap_reset_rx_ring(adapter, qid); 875 rx_ring->initialized = false; 876 #endif /* DEV_NETMAP */ 877 878 /* 879 * Alloc extra element so in rx path 880 * we can always prefetch rx_info + 1 881 */ 882 size += sizeof(struct ena_rx_buffer); 883 884 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); 885 886 size = sizeof(uint16_t) * rx_ring->ring_size; 887 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK); 888 889 for (i = 0; i < rx_ring->ring_size; i++) 890 rx_ring->free_rx_ids[i] = i; 891 892 /* Reset RX statistics. */ 893 ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats, 894 sizeof(rx_ring->rx_stats)); 895 896 rx_ring->next_to_clean = 0; 897 rx_ring->next_to_use = 0; 898 899 /* ... and create the buffer DMA maps */ 900 for (i = 0; i < rx_ring->ring_size; i++) { 901 err = bus_dmamap_create(adapter->rx_buf_tag, 0, 902 &(rx_ring->rx_buffer_info[i].map)); 903 if (err != 0) { 904 ena_log(pdev, ERR, 905 "Unable to create Rx DMA map for buffer %d\n", i); 906 goto err_buf_info_unmap; 907 } 908 } 909 910 /* Create LRO for the ring */ 911 if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) { 912 int err = tcp_lro_init(&rx_ring->lro); 913 if (err != 0) { 914 ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n", 915 qid); 916 } else { 917 ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n", 918 qid); 919 rx_ring->lro.ifp = adapter->ifp; 920 } 921 } 922 923 return (0); 924 925 err_buf_info_unmap: 926 while (i--) { 927 bus_dmamap_destroy(adapter->rx_buf_tag, 928 rx_ring->rx_buffer_info[i].map); 929 } 930 931 free(rx_ring->free_rx_ids, M_DEVBUF); 932 rx_ring->free_rx_ids = NULL; 933 free(rx_ring->rx_buffer_info, M_DEVBUF); 934 rx_ring->rx_buffer_info = NULL; 935 return (ENOMEM); 936 } 937 938 /** 939 * ena_free_rx_resources - Free Rx Resources 940 * @adapter: network interface device structure 941 * @qid: queue index 942 * 943 * Free all receive software resources 944 **/ 945 static void 946 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid) 947 { 948 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 949 950 /* Free buffer DMA maps, */ 951 for (int i = 0; i < rx_ring->ring_size; i++) { 952 bus_dmamap_sync(adapter->rx_buf_tag, 953 rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD); 954 m_freem(rx_ring->rx_buffer_info[i].mbuf); 955 rx_ring->rx_buffer_info[i].mbuf = NULL; 956 bus_dmamap_unload(adapter->rx_buf_tag, 957 rx_ring->rx_buffer_info[i].map); 958 bus_dmamap_destroy(adapter->rx_buf_tag, 959 rx_ring->rx_buffer_info[i].map); 960 } 961 962 /* free LRO resources, */ 963 tcp_lro_free(&rx_ring->lro); 964 965 /* free allocated memory */ 966 free(rx_ring->rx_buffer_info, M_DEVBUF); 967 rx_ring->rx_buffer_info = NULL; 968 969 free(rx_ring->free_rx_ids, M_DEVBUF); 970 rx_ring->free_rx_ids = NULL; 971 } 972 973 /** 974 * ena_setup_all_rx_resources - allocate all queues Rx resources 975 * @adapter: network interface device structure 976 * 977 * Returns 0 on success, otherwise on failure. 978 **/ 979 static int 980 ena_setup_all_rx_resources(struct ena_adapter *adapter) 981 { 982 int i, rc = 0; 983 984 for (i = 0; i < adapter->num_io_queues; i++) { 985 rc = ena_setup_rx_resources(adapter, i); 986 if (rc != 0) { 987 ena_log(adapter->pdev, ERR, 988 "Allocation for Rx Queue %u failed\n", i); 989 goto err_setup_rx; 990 } 991 } 992 return (0); 993 994 err_setup_rx: 995 /* rewind the index freeing the rings as we go */ 996 while (i--) 997 ena_free_rx_resources(adapter, i); 998 return (rc); 999 } 1000 1001 /** 1002 * ena_free_all_rx_resources - Free Rx resources for all queues 1003 * @adapter: network interface device structure 1004 * 1005 * Free all receive software resources 1006 **/ 1007 static void 1008 ena_free_all_rx_resources(struct ena_adapter *adapter) 1009 { 1010 int i; 1011 1012 for (i = 0; i < adapter->num_io_queues; i++) 1013 ena_free_rx_resources(adapter, i); 1014 } 1015 1016 static inline int 1017 ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 1018 struct ena_rx_buffer *rx_info) 1019 { 1020 device_t pdev = adapter->pdev; 1021 struct ena_com_buf *ena_buf; 1022 bus_dma_segment_t segs[1]; 1023 int nsegs, error; 1024 int mlen; 1025 1026 /* if previous allocated frag is not used */ 1027 if (unlikely(rx_info->mbuf != NULL)) 1028 return (0); 1029 1030 /* Get mbuf using UMA allocator */ 1031 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, 1032 rx_ring->rx_mbuf_sz); 1033 1034 if (unlikely(rx_info->mbuf == NULL)) { 1035 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1); 1036 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 1037 if (unlikely(rx_info->mbuf == NULL)) { 1038 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 1039 return (ENOMEM); 1040 } 1041 mlen = MCLBYTES; 1042 } else { 1043 mlen = rx_ring->rx_mbuf_sz; 1044 } 1045 /* Set mbuf length*/ 1046 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen; 1047 1048 /* Map packets for DMA */ 1049 ena_log(pdev, DBG, 1050 "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n", 1051 adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len); 1052 error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map, 1053 rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 1054 if (unlikely((error != 0) || (nsegs != 1))) { 1055 ena_log(pdev, WARN, 1056 "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs); 1057 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1); 1058 goto exit; 1059 } 1060 1061 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD); 1062 1063 ena_buf = &rx_info->ena_buf; 1064 ena_buf->paddr = segs[0].ds_addr; 1065 ena_buf->len = mlen; 1066 1067 ena_log(pdev, DBG, 1068 "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n", 1069 rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr); 1070 1071 return (0); 1072 1073 exit: 1074 m_freem(rx_info->mbuf); 1075 rx_info->mbuf = NULL; 1076 return (EFAULT); 1077 } 1078 1079 static void 1080 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 1081 struct ena_rx_buffer *rx_info) 1082 { 1083 if (rx_info->mbuf == NULL) { 1084 ena_log(adapter->pdev, WARN, 1085 "Trying to free unallocated buffer\n"); 1086 return; 1087 } 1088 1089 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 1090 BUS_DMASYNC_POSTREAD); 1091 bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map); 1092 m_freem(rx_info->mbuf); 1093 rx_info->mbuf = NULL; 1094 } 1095 1096 /** 1097 * ena_refill_rx_bufs - Refills ring with descriptors 1098 * @rx_ring: the ring which we want to feed with free descriptors 1099 * @num: number of descriptors to refill 1100 * Refills the ring with newly allocated DMA-mapped mbufs for receiving 1101 **/ 1102 int 1103 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) 1104 { 1105 struct ena_adapter *adapter = rx_ring->adapter; 1106 device_t pdev = adapter->pdev; 1107 uint16_t next_to_use, req_id; 1108 uint32_t i; 1109 int rc; 1110 1111 ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid); 1112 1113 next_to_use = rx_ring->next_to_use; 1114 1115 for (i = 0; i < num; i++) { 1116 struct ena_rx_buffer *rx_info; 1117 1118 ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n", 1119 next_to_use); 1120 1121 req_id = rx_ring->free_rx_ids[next_to_use]; 1122 rx_info = &rx_ring->rx_buffer_info[req_id]; 1123 #ifdef DEV_NETMAP 1124 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid)) 1125 rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, 1126 rx_info); 1127 else 1128 #endif /* DEV_NETMAP */ 1129 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); 1130 if (unlikely(rc != 0)) { 1131 ena_log_io(pdev, WARN, 1132 "failed to alloc buffer for rx queue %d\n", 1133 rx_ring->qid); 1134 break; 1135 } 1136 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, 1137 &rx_info->ena_buf, req_id); 1138 if (unlikely(rc != 0)) { 1139 ena_log_io(pdev, WARN, 1140 "failed to add buffer for rx queue %d\n", 1141 rx_ring->qid); 1142 break; 1143 } 1144 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, 1145 rx_ring->ring_size); 1146 } 1147 1148 if (unlikely(i < num)) { 1149 counter_u64_add(rx_ring->rx_stats.refil_partial, 1); 1150 ena_log_io(pdev, WARN, 1151 "refilled rx qid %d with only %d mbufs (from %d)\n", 1152 rx_ring->qid, i, num); 1153 } 1154 1155 if (likely(i != 0)) 1156 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); 1157 1158 rx_ring->next_to_use = next_to_use; 1159 return (i); 1160 } 1161 1162 int 1163 ena_update_buf_ring_size(struct ena_adapter *adapter, 1164 uint32_t new_buf_ring_size) 1165 { 1166 uint32_t old_buf_ring_size; 1167 int rc = 0; 1168 bool dev_was_up; 1169 1170 old_buf_ring_size = adapter->buf_ring_size; 1171 adapter->buf_ring_size = new_buf_ring_size; 1172 1173 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1174 ena_down(adapter); 1175 1176 /* Reconfigure buf ring for all Tx rings. */ 1177 ena_free_all_io_rings_resources(adapter); 1178 ena_init_io_rings_advanced(adapter); 1179 if (dev_was_up) { 1180 /* 1181 * If ena_up() fails, it's not because of recent buf_ring size 1182 * changes. Because of that, we just want to revert old drbr 1183 * value and trigger the reset because something else had to 1184 * go wrong. 1185 */ 1186 rc = ena_up(adapter); 1187 if (unlikely(rc != 0)) { 1188 ena_log(adapter->pdev, ERR, 1189 "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n", 1190 new_buf_ring_size, old_buf_ring_size); 1191 1192 /* Revert old size and trigger the reset */ 1193 adapter->buf_ring_size = old_buf_ring_size; 1194 ena_free_all_io_rings_resources(adapter); 1195 ena_init_io_rings_advanced(adapter); 1196 1197 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, 1198 adapter); 1199 ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER); 1200 } 1201 } 1202 1203 return (rc); 1204 } 1205 1206 int 1207 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size, 1208 uint32_t new_rx_size) 1209 { 1210 uint32_t old_tx_size, old_rx_size; 1211 int rc = 0; 1212 bool dev_was_up; 1213 1214 old_tx_size = adapter->requested_tx_ring_size; 1215 old_rx_size = adapter->requested_rx_ring_size; 1216 adapter->requested_tx_ring_size = new_tx_size; 1217 adapter->requested_rx_ring_size = new_rx_size; 1218 1219 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1220 ena_down(adapter); 1221 1222 /* Configure queues with new size. */ 1223 ena_init_io_rings_basic(adapter); 1224 if (dev_was_up) { 1225 rc = ena_up(adapter); 1226 if (unlikely(rc != 0)) { 1227 ena_log(adapter->pdev, ERR, 1228 "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n", 1229 new_tx_size, new_rx_size, old_tx_size, old_rx_size); 1230 1231 /* Revert old size. */ 1232 adapter->requested_tx_ring_size = old_tx_size; 1233 adapter->requested_rx_ring_size = old_rx_size; 1234 ena_init_io_rings_basic(adapter); 1235 1236 /* And try again. */ 1237 rc = ena_up(adapter); 1238 if (unlikely(rc != 0)) { 1239 ena_log(adapter->pdev, ERR, 1240 "Failed to revert old queue sizes. Triggering device reset.\n"); 1241 /* 1242 * If we've failed again, something had to go 1243 * wrong. After reset, the device should try to 1244 * go up 1245 */ 1246 ENA_FLAG_SET_ATOMIC( 1247 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1248 ena_trigger_reset(adapter, 1249 ENA_REGS_RESET_OS_TRIGGER); 1250 } 1251 } 1252 } 1253 1254 return (rc); 1255 } 1256 1257 static void 1258 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num) 1259 { 1260 ena_free_all_io_rings_resources(adapter); 1261 /* Force indirection table to be reinitialized */ 1262 ena_com_rss_destroy(adapter->ena_dev); 1263 1264 adapter->num_io_queues = num; 1265 ena_init_io_rings(adapter); 1266 } 1267 1268 int 1269 ena_update_base_cpu(struct ena_adapter *adapter, int new_num) 1270 { 1271 int old_num; 1272 int rc = 0; 1273 bool dev_was_up; 1274 1275 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1276 old_num = adapter->irq_cpu_base; 1277 1278 ena_down(adapter); 1279 1280 adapter->irq_cpu_base = new_num; 1281 1282 if (dev_was_up) { 1283 rc = ena_up(adapter); 1284 if (unlikely(rc != 0)) { 1285 ena_log(adapter->pdev, ERR, 1286 "Failed to configure device %d IRQ base CPU. " 1287 "Reverting to previous value: %d\n", 1288 new_num, old_num); 1289 1290 adapter->irq_cpu_base = old_num; 1291 1292 rc = ena_up(adapter); 1293 if (unlikely(rc != 0)) { 1294 ena_log(adapter->pdev, ERR, 1295 "Failed to revert to previous setup." 1296 "Triggering device reset.\n"); 1297 ENA_FLAG_SET_ATOMIC( 1298 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1299 ena_trigger_reset(adapter, 1300 ENA_REGS_RESET_OS_TRIGGER); 1301 } 1302 } 1303 } 1304 return (rc); 1305 } 1306 1307 int 1308 ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num) 1309 { 1310 uint32_t old_num; 1311 int rc = 0; 1312 bool dev_was_up; 1313 1314 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1315 old_num = adapter->irq_cpu_stride; 1316 1317 ena_down(adapter); 1318 1319 adapter->irq_cpu_stride = new_num; 1320 1321 if (dev_was_up) { 1322 rc = ena_up(adapter); 1323 if (unlikely(rc != 0)) { 1324 ena_log(adapter->pdev, ERR, 1325 "Failed to configure device %d IRQ CPU stride. " 1326 "Reverting to previous value: %d\n", 1327 new_num, old_num); 1328 1329 adapter->irq_cpu_stride = old_num; 1330 1331 rc = ena_up(adapter); 1332 if (unlikely(rc != 0)) { 1333 ena_log(adapter->pdev, ERR, 1334 "Failed to revert to previous setup." 1335 "Triggering device reset.\n"); 1336 ENA_FLAG_SET_ATOMIC( 1337 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1338 ena_trigger_reset(adapter, 1339 ENA_REGS_RESET_OS_TRIGGER); 1340 } 1341 } 1342 } 1343 return (rc); 1344 } 1345 1346 /* Caller should sanitize new_num */ 1347 int 1348 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num) 1349 { 1350 uint32_t old_num; 1351 int rc = 0; 1352 bool dev_was_up; 1353 1354 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1355 old_num = adapter->num_io_queues; 1356 ena_down(adapter); 1357 1358 ena_update_io_rings(adapter, new_num); 1359 1360 if (dev_was_up) { 1361 rc = ena_up(adapter); 1362 if (unlikely(rc != 0)) { 1363 ena_log(adapter->pdev, ERR, 1364 "Failed to configure device with %u IO queues. " 1365 "Reverting to previous value: %u\n", 1366 new_num, old_num); 1367 1368 ena_update_io_rings(adapter, old_num); 1369 1370 rc = ena_up(adapter); 1371 if (unlikely(rc != 0)) { 1372 ena_log(adapter->pdev, ERR, 1373 "Failed to revert to previous setup IO " 1374 "queues. Triggering device reset.\n"); 1375 ENA_FLAG_SET_ATOMIC( 1376 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1377 ena_trigger_reset(adapter, 1378 ENA_REGS_RESET_OS_TRIGGER); 1379 } 1380 } 1381 } 1382 1383 return (rc); 1384 } 1385 1386 static void 1387 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid) 1388 { 1389 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 1390 unsigned int i; 1391 1392 for (i = 0; i < rx_ring->ring_size; i++) { 1393 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; 1394 1395 if (rx_info->mbuf != NULL) 1396 ena_free_rx_mbuf(adapter, rx_ring, rx_info); 1397 #ifdef DEV_NETMAP 1398 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) && 1399 (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) { 1400 if (rx_info->netmap_buf_idx != 0) 1401 ena_netmap_free_rx_slot(adapter, rx_ring, 1402 rx_info); 1403 } 1404 #endif /* DEV_NETMAP */ 1405 } 1406 } 1407 1408 /** 1409 * ena_refill_all_rx_bufs - allocate all queues Rx buffers 1410 * @adapter: network interface device structure 1411 * 1412 */ 1413 static void 1414 ena_refill_all_rx_bufs(struct ena_adapter *adapter) 1415 { 1416 struct ena_ring *rx_ring; 1417 int i, rc, bufs_num; 1418 1419 for (i = 0; i < adapter->num_io_queues; i++) { 1420 rx_ring = &adapter->rx_ring[i]; 1421 bufs_num = rx_ring->ring_size - 1; 1422 rc = ena_refill_rx_bufs(rx_ring, bufs_num); 1423 if (unlikely(rc != bufs_num)) 1424 ena_log_io(adapter->pdev, WARN, 1425 "refilling Queue %d failed. " 1426 "Allocated %d buffers from: %d\n", 1427 i, rc, bufs_num); 1428 #ifdef DEV_NETMAP 1429 rx_ring->initialized = true; 1430 #endif /* DEV_NETMAP */ 1431 } 1432 } 1433 1434 static void 1435 ena_free_all_rx_bufs(struct ena_adapter *adapter) 1436 { 1437 int i; 1438 1439 for (i = 0; i < adapter->num_io_queues; i++) 1440 ena_free_rx_bufs(adapter, i); 1441 } 1442 1443 /** 1444 * ena_free_tx_bufs - Free Tx Buffers per Queue 1445 * @adapter: network interface device structure 1446 * @qid: queue index 1447 **/ 1448 static void 1449 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) 1450 { 1451 bool print_once = true; 1452 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 1453 1454 ENA_RING_MTX_LOCK(tx_ring); 1455 for (int i = 0; i < tx_ring->ring_size; i++) { 1456 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; 1457 1458 if (tx_info->mbuf == NULL) 1459 continue; 1460 1461 if (print_once) { 1462 ena_log(adapter->pdev, WARN, 1463 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1464 i); 1465 print_once = false; 1466 } else { 1467 ena_log(adapter->pdev, DBG, 1468 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1469 i); 1470 } 1471 1472 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1473 BUS_DMASYNC_POSTWRITE); 1474 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1475 1476 m_free(tx_info->mbuf); 1477 tx_info->mbuf = NULL; 1478 } 1479 ENA_RING_MTX_UNLOCK(tx_ring); 1480 } 1481 1482 static void 1483 ena_free_all_tx_bufs(struct ena_adapter *adapter) 1484 { 1485 for (int i = 0; i < adapter->num_io_queues; i++) 1486 ena_free_tx_bufs(adapter, i); 1487 } 1488 1489 static void 1490 ena_destroy_all_tx_queues(struct ena_adapter *adapter) 1491 { 1492 uint16_t ena_qid; 1493 int i; 1494 1495 for (i = 0; i < adapter->num_io_queues; i++) { 1496 ena_qid = ENA_IO_TXQ_IDX(i); 1497 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1498 } 1499 } 1500 1501 static void 1502 ena_destroy_all_rx_queues(struct ena_adapter *adapter) 1503 { 1504 uint16_t ena_qid; 1505 int i; 1506 1507 for (i = 0; i < adapter->num_io_queues; i++) { 1508 ena_qid = ENA_IO_RXQ_IDX(i); 1509 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1510 } 1511 } 1512 1513 static void 1514 ena_destroy_all_io_queues(struct ena_adapter *adapter) 1515 { 1516 struct ena_que *queue; 1517 int i; 1518 1519 for (i = 0; i < adapter->num_io_queues; i++) { 1520 queue = &adapter->que[i]; 1521 while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL)) 1522 taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task); 1523 taskqueue_free(queue->cleanup_tq); 1524 } 1525 1526 ena_destroy_all_tx_queues(adapter); 1527 ena_destroy_all_rx_queues(adapter); 1528 } 1529 1530 static int 1531 ena_create_io_queues(struct ena_adapter *adapter) 1532 { 1533 struct ena_com_dev *ena_dev = adapter->ena_dev; 1534 struct ena_com_create_io_ctx ctx; 1535 struct ena_ring *ring; 1536 struct ena_que *queue; 1537 uint16_t ena_qid; 1538 uint32_t msix_vector; 1539 cpuset_t *cpu_mask = NULL; 1540 int rc, i; 1541 1542 /* Create TX queues */ 1543 for (i = 0; i < adapter->num_io_queues; i++) { 1544 msix_vector = ENA_IO_IRQ_IDX(i); 1545 ena_qid = ENA_IO_TXQ_IDX(i); 1546 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1547 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1548 ctx.queue_size = adapter->requested_tx_ring_size; 1549 ctx.msix_vector = msix_vector; 1550 ctx.qid = ena_qid; 1551 ctx.numa_node = adapter->que[i].domain; 1552 1553 rc = ena_com_create_io_queue(ena_dev, &ctx); 1554 if (rc != 0) { 1555 ena_log(adapter->pdev, ERR, 1556 "Failed to create io TX queue #%d rc: %d\n", i, rc); 1557 goto err_tx; 1558 } 1559 ring = &adapter->tx_ring[i]; 1560 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1561 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1562 if (rc != 0) { 1563 ena_log(adapter->pdev, ERR, 1564 "Failed to get TX queue handlers. TX queue num" 1565 " %d rc: %d\n", 1566 i, rc); 1567 ena_com_destroy_io_queue(ena_dev, ena_qid); 1568 goto err_tx; 1569 } 1570 1571 if (ctx.numa_node >= 0) { 1572 ena_com_update_numa_node(ring->ena_com_io_cq, 1573 ctx.numa_node); 1574 } 1575 } 1576 1577 /* Create RX queues */ 1578 for (i = 0; i < adapter->num_io_queues; i++) { 1579 msix_vector = ENA_IO_IRQ_IDX(i); 1580 ena_qid = ENA_IO_RXQ_IDX(i); 1581 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1582 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1583 ctx.queue_size = adapter->requested_rx_ring_size; 1584 ctx.msix_vector = msix_vector; 1585 ctx.qid = ena_qid; 1586 ctx.numa_node = adapter->que[i].domain; 1587 1588 rc = ena_com_create_io_queue(ena_dev, &ctx); 1589 if (unlikely(rc != 0)) { 1590 ena_log(adapter->pdev, ERR, 1591 "Failed to create io RX queue[%d] rc: %d\n", i, rc); 1592 goto err_rx; 1593 } 1594 1595 ring = &adapter->rx_ring[i]; 1596 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1597 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1598 if (unlikely(rc != 0)) { 1599 ena_log(adapter->pdev, ERR, 1600 "Failed to get RX queue handlers. RX queue num" 1601 " %d rc: %d\n", 1602 i, rc); 1603 ena_com_destroy_io_queue(ena_dev, ena_qid); 1604 goto err_rx; 1605 } 1606 1607 if (ctx.numa_node >= 0) { 1608 ena_com_update_numa_node(ring->ena_com_io_cq, 1609 ctx.numa_node); 1610 } 1611 } 1612 1613 for (i = 0; i < adapter->num_io_queues; i++) { 1614 queue = &adapter->que[i]; 1615 1616 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue); 1617 queue->cleanup_tq = taskqueue_create_fast("ena cleanup", 1618 M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq); 1619 1620 #ifdef RSS 1621 cpu_mask = &queue->cpu_mask; 1622 #endif 1623 taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET, 1624 cpu_mask, "%s queue %d cleanup", 1625 device_get_nameunit(adapter->pdev), i); 1626 } 1627 1628 return (0); 1629 1630 err_rx: 1631 while (i--) 1632 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); 1633 i = adapter->num_io_queues; 1634 err_tx: 1635 while (i--) 1636 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 1637 1638 return (ENXIO); 1639 } 1640 1641 /********************************************************************* 1642 * 1643 * MSIX & Interrupt Service routine 1644 * 1645 **********************************************************************/ 1646 1647 /** 1648 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue 1649 * @arg: interrupt number 1650 **/ 1651 static void 1652 ena_intr_msix_mgmnt(void *arg) 1653 { 1654 struct ena_adapter *adapter = (struct ena_adapter *)arg; 1655 1656 ena_com_admin_q_comp_intr_handler(adapter->ena_dev); 1657 if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) 1658 ena_com_aenq_intr_handler(adapter->ena_dev, arg); 1659 } 1660 1661 /** 1662 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx 1663 * @arg: queue 1664 **/ 1665 static int 1666 ena_handle_msix(void *arg) 1667 { 1668 struct ena_que *queue = arg; 1669 struct ena_adapter *adapter = queue->adapter; 1670 if_t ifp = adapter->ifp; 1671 1672 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 1673 return (FILTER_STRAY); 1674 1675 taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task); 1676 1677 return (FILTER_HANDLED); 1678 } 1679 1680 static int 1681 ena_enable_msix(struct ena_adapter *adapter) 1682 { 1683 device_t dev = adapter->pdev; 1684 int msix_vecs, msix_req; 1685 int i, rc = 0; 1686 1687 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 1688 ena_log(dev, ERR, "Error, MSI-X is already enabled\n"); 1689 return (EINVAL); 1690 } 1691 1692 /* Reserved the max msix vectors we might need */ 1693 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); 1694 1695 adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry), 1696 M_DEVBUF, M_WAITOK | M_ZERO); 1697 1698 ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs); 1699 1700 for (i = 0; i < msix_vecs; i++) { 1701 adapter->msix_entries[i].entry = i; 1702 /* Vectors must start from 1 */ 1703 adapter->msix_entries[i].vector = i + 1; 1704 } 1705 1706 msix_req = msix_vecs; 1707 rc = pci_alloc_msix(dev, &msix_vecs); 1708 if (unlikely(rc != 0)) { 1709 ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n", 1710 msix_vecs, rc); 1711 1712 rc = ENOSPC; 1713 goto err_msix_free; 1714 } 1715 1716 if (msix_vecs != msix_req) { 1717 if (msix_vecs == ENA_ADMIN_MSIX_VEC) { 1718 ena_log(dev, ERR, 1719 "Not enough number of MSI-x allocated: %d\n", 1720 msix_vecs); 1721 pci_release_msi(dev); 1722 rc = ENOSPC; 1723 goto err_msix_free; 1724 } 1725 ena_log(dev, ERR, 1726 "Enable only %d MSI-x (out of %d), reduce " 1727 "the number of queues\n", 1728 msix_vecs, msix_req); 1729 } 1730 1731 adapter->msix_vecs = msix_vecs; 1732 ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 1733 1734 return (0); 1735 1736 err_msix_free: 1737 free(adapter->msix_entries, M_DEVBUF); 1738 adapter->msix_entries = NULL; 1739 1740 return (rc); 1741 } 1742 1743 static void 1744 ena_setup_mgmnt_intr(struct ena_adapter *adapter) 1745 { 1746 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE, 1747 "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev)); 1748 /* 1749 * Handler is NULL on purpose, it will be set 1750 * when mgmnt interrupt is acquired 1751 */ 1752 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL; 1753 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; 1754 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = 1755 adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; 1756 } 1757 1758 static int 1759 ena_setup_io_intr(struct ena_adapter *adapter) 1760 { 1761 #ifdef RSS 1762 int num_buckets = rss_getnumbuckets(); 1763 static int last_bind = 0; 1764 int cur_bind; 1765 int idx; 1766 #endif 1767 int irq_idx; 1768 1769 if (adapter->msix_entries == NULL) 1770 return (EINVAL); 1771 1772 #ifdef RSS 1773 if (adapter->first_bind < 0) { 1774 adapter->first_bind = last_bind; 1775 last_bind = (last_bind + adapter->num_io_queues) % num_buckets; 1776 } 1777 cur_bind = adapter->first_bind; 1778 #endif 1779 1780 for (int i = 0; i < adapter->num_io_queues; i++) { 1781 irq_idx = ENA_IO_IRQ_IDX(i); 1782 1783 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, 1784 "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i); 1785 adapter->irq_tbl[irq_idx].handler = ena_handle_msix; 1786 adapter->irq_tbl[irq_idx].data = &adapter->que[i]; 1787 adapter->irq_tbl[irq_idx].vector = 1788 adapter->msix_entries[irq_idx].vector; 1789 ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n", 1790 adapter->msix_entries[irq_idx].vector); 1791 1792 if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1793 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1794 (unsigned)(adapter->irq_cpu_base + 1795 i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus; 1796 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1797 } 1798 1799 #ifdef RSS 1800 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1801 rss_getcpu(cur_bind); 1802 cur_bind = (cur_bind + 1) % num_buckets; 1803 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1804 1805 for (idx = 0; idx < MAXMEMDOM; ++idx) { 1806 if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx])) 1807 break; 1808 } 1809 adapter->que[i].domain = idx; 1810 #else 1811 adapter->que[i].domain = -1; 1812 #endif 1813 } 1814 1815 return (0); 1816 } 1817 1818 static int 1819 ena_request_mgmnt_irq(struct ena_adapter *adapter) 1820 { 1821 device_t pdev = adapter->pdev; 1822 struct ena_irq *irq; 1823 unsigned long flags; 1824 int rc, rcc; 1825 1826 flags = RF_ACTIVE | RF_SHAREABLE; 1827 1828 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1829 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1830 &irq->vector, flags); 1831 1832 if (unlikely(irq->res == NULL)) { 1833 ena_log(pdev, ERR, "could not allocate irq vector: %d\n", 1834 irq->vector); 1835 return (ENXIO); 1836 } 1837 1838 rc = bus_setup_intr(adapter->pdev, irq->res, 1839 INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data, 1840 &irq->cookie); 1841 if (unlikely(rc != 0)) { 1842 ena_log(pdev, ERR, 1843 "failed to register interrupt handler for irq %ju: %d\n", 1844 rman_get_start(irq->res), rc); 1845 goto err_res_free; 1846 } 1847 irq->requested = true; 1848 1849 return (rc); 1850 1851 err_res_free: 1852 ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector); 1853 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, 1854 irq->res); 1855 if (unlikely(rcc != 0)) 1856 ena_log(pdev, ERR, 1857 "dev has no parent while releasing res for irq: %d\n", 1858 irq->vector); 1859 irq->res = NULL; 1860 1861 return (rc); 1862 } 1863 1864 static int 1865 ena_request_io_irq(struct ena_adapter *adapter) 1866 { 1867 device_t pdev = adapter->pdev; 1868 struct ena_irq *irq; 1869 unsigned long flags = 0; 1870 int rc = 0, i, rcc; 1871 1872 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) { 1873 ena_log(pdev, ERR, 1874 "failed to request I/O IRQ: MSI-X is not enabled\n"); 1875 return (EINVAL); 1876 } else { 1877 flags = RF_ACTIVE | RF_SHAREABLE; 1878 } 1879 1880 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1881 irq = &adapter->irq_tbl[i]; 1882 1883 if (unlikely(irq->requested)) 1884 continue; 1885 1886 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1887 &irq->vector, flags); 1888 if (unlikely(irq->res == NULL)) { 1889 rc = ENOMEM; 1890 ena_log(pdev, ERR, 1891 "could not allocate irq vector: %d\n", irq->vector); 1892 goto err; 1893 } 1894 1895 rc = bus_setup_intr(adapter->pdev, irq->res, 1896 INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data, 1897 &irq->cookie); 1898 if (unlikely(rc != 0)) { 1899 ena_log(pdev, ERR, 1900 "failed to register interrupt handler for irq %ju: %d\n", 1901 rman_get_start(irq->res), rc); 1902 goto err; 1903 } 1904 irq->requested = true; 1905 1906 if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1907 rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu); 1908 if (unlikely(rc != 0)) { 1909 ena_log(pdev, ERR, 1910 "failed to bind interrupt handler for irq %ju to cpu %d: %d\n", 1911 rman_get_start(irq->res), irq->cpu, rc); 1912 goto err; 1913 } 1914 1915 ena_log(pdev, INFO, "queue %d - cpu %d\n", 1916 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu); 1917 } 1918 } 1919 return (rc); 1920 1921 err: 1922 1923 for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) { 1924 irq = &adapter->irq_tbl[i]; 1925 rcc = 0; 1926 1927 /* Once we entered err: section and irq->requested is true we 1928 free both intr and resources */ 1929 if (irq->requested) { 1930 rcc = bus_teardown_intr(adapter->pdev, irq->res, 1931 irq->cookie); 1932 if (unlikely(rcc != 0)) 1933 ena_log(pdev, ERR, 1934 "could not release irq: %d, error: %d\n", 1935 irq->vector, rcc); 1936 } 1937 1938 /* If we entered err: section without irq->requested set we know 1939 it was bus_alloc_resource_any() that needs cleanup, provided 1940 res is not NULL. In case res is NULL no work in needed in 1941 this iteration */ 1942 rcc = 0; 1943 if (irq->res != NULL) { 1944 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1945 irq->vector, irq->res); 1946 } 1947 if (unlikely(rcc != 0)) 1948 ena_log(pdev, ERR, 1949 "dev has no parent while releasing res for irq: %d\n", 1950 irq->vector); 1951 irq->requested = false; 1952 irq->res = NULL; 1953 } 1954 1955 return (rc); 1956 } 1957 1958 static void 1959 ena_free_mgmnt_irq(struct ena_adapter *adapter) 1960 { 1961 device_t pdev = adapter->pdev; 1962 struct ena_irq *irq; 1963 int rc; 1964 1965 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1966 if (irq->requested) { 1967 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1968 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); 1969 if (unlikely(rc != 0)) 1970 ena_log(pdev, ERR, "failed to tear down irq: %d\n", 1971 irq->vector); 1972 irq->requested = 0; 1973 } 1974 1975 if (irq->res != NULL) { 1976 ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector); 1977 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1978 irq->vector, irq->res); 1979 irq->res = NULL; 1980 if (unlikely(rc != 0)) 1981 ena_log(pdev, ERR, 1982 "dev has no parent while releasing res for irq: %d\n", 1983 irq->vector); 1984 } 1985 } 1986 1987 static void 1988 ena_free_io_irq(struct ena_adapter *adapter) 1989 { 1990 device_t pdev = adapter->pdev; 1991 struct ena_irq *irq; 1992 int rc; 1993 1994 for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1995 irq = &adapter->irq_tbl[i]; 1996 if (irq->requested) { 1997 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1998 rc = bus_teardown_intr(adapter->pdev, irq->res, 1999 irq->cookie); 2000 if (unlikely(rc != 0)) { 2001 ena_log(pdev, ERR, 2002 "failed to tear down irq: %d\n", 2003 irq->vector); 2004 } 2005 irq->requested = 0; 2006 } 2007 2008 if (irq->res != NULL) { 2009 ena_log(pdev, DBG, "release resource irq: %d\n", 2010 irq->vector); 2011 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 2012 irq->vector, irq->res); 2013 irq->res = NULL; 2014 if (unlikely(rc != 0)) { 2015 ena_log(pdev, ERR, 2016 "dev has no parent while releasing res for irq: %d\n", 2017 irq->vector); 2018 } 2019 } 2020 } 2021 } 2022 2023 static void 2024 ena_free_irqs(struct ena_adapter *adapter) 2025 { 2026 ena_free_io_irq(adapter); 2027 ena_free_mgmnt_irq(adapter); 2028 ena_disable_msix(adapter); 2029 } 2030 2031 static void 2032 ena_disable_msix(struct ena_adapter *adapter) 2033 { 2034 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 2035 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 2036 pci_release_msi(adapter->pdev); 2037 } 2038 2039 adapter->msix_vecs = 0; 2040 free(adapter->msix_entries, M_DEVBUF); 2041 adapter->msix_entries = NULL; 2042 } 2043 2044 static void 2045 ena_unmask_all_io_irqs(struct ena_adapter *adapter) 2046 { 2047 struct ena_com_io_cq *io_cq; 2048 struct ena_eth_io_intr_reg intr_reg; 2049 struct ena_ring *tx_ring; 2050 uint16_t ena_qid; 2051 int i; 2052 2053 /* Unmask interrupts for all queues */ 2054 for (i = 0; i < adapter->num_io_queues; i++) { 2055 ena_qid = ENA_IO_TXQ_IDX(i); 2056 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 2057 ena_com_update_intr_reg(&intr_reg, 0, 0, true, false); 2058 tx_ring = &adapter->tx_ring[i]; 2059 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); 2060 ena_com_unmask_intr(io_cq, &intr_reg); 2061 } 2062 } 2063 2064 static int 2065 ena_up_complete(struct ena_adapter *adapter) 2066 { 2067 int rc; 2068 2069 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 2070 rc = ena_rss_configure(adapter); 2071 if (rc != 0) { 2072 ena_log(adapter->pdev, ERR, 2073 "Failed to configure RSS\n"); 2074 return (rc); 2075 } 2076 } 2077 2078 rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp)); 2079 if (unlikely(rc != 0)) 2080 return (rc); 2081 2082 ena_refill_all_rx_bufs(adapter); 2083 ena_reset_counters((counter_u64_t *)&adapter->hw_stats, 2084 sizeof(adapter->hw_stats)); 2085 2086 return (0); 2087 } 2088 2089 static void 2090 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size) 2091 { 2092 int i; 2093 2094 for (i = 0; i < adapter->num_io_queues; i++) { 2095 adapter->tx_ring[i].ring_size = new_tx_size; 2096 adapter->rx_ring[i].ring_size = new_rx_size; 2097 } 2098 } 2099 2100 static int 2101 create_queues_with_size_backoff(struct ena_adapter *adapter) 2102 { 2103 device_t pdev = adapter->pdev; 2104 int rc; 2105 uint32_t cur_rx_ring_size, cur_tx_ring_size; 2106 uint32_t new_rx_ring_size, new_tx_ring_size; 2107 2108 /* 2109 * Current queue sizes might be set to smaller than the requested 2110 * ones due to past queue allocation failures. 2111 */ 2112 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2113 adapter->requested_rx_ring_size); 2114 2115 while (1) { 2116 /* Allocate transmit descriptors */ 2117 rc = ena_setup_all_tx_resources(adapter); 2118 if (unlikely(rc != 0)) { 2119 ena_log(pdev, ERR, "err_setup_tx\n"); 2120 goto err_setup_tx; 2121 } 2122 2123 /* Allocate receive descriptors */ 2124 rc = ena_setup_all_rx_resources(adapter); 2125 if (unlikely(rc != 0)) { 2126 ena_log(pdev, ERR, "err_setup_rx\n"); 2127 goto err_setup_rx; 2128 } 2129 2130 /* Create IO queues for Rx & Tx */ 2131 rc = ena_create_io_queues(adapter); 2132 if (unlikely(rc != 0)) { 2133 ena_log(pdev, ERR, "create IO queues failed\n"); 2134 goto err_io_que; 2135 } 2136 2137 return (0); 2138 2139 err_io_que: 2140 ena_free_all_rx_resources(adapter); 2141 err_setup_rx: 2142 ena_free_all_tx_resources(adapter); 2143 err_setup_tx: 2144 /* 2145 * Lower the ring size if ENOMEM. Otherwise, return the 2146 * error straightaway. 2147 */ 2148 if (unlikely(rc != ENOMEM)) { 2149 ena_log(pdev, ERR, 2150 "Queue creation failed with error code: %d\n", rc); 2151 return (rc); 2152 } 2153 2154 cur_tx_ring_size = adapter->tx_ring[0].ring_size; 2155 cur_rx_ring_size = adapter->rx_ring[0].ring_size; 2156 2157 ena_log(pdev, ERR, 2158 "Not enough memory to create queues with sizes TX=%d, RX=%d\n", 2159 cur_tx_ring_size, cur_rx_ring_size); 2160 2161 new_tx_ring_size = cur_tx_ring_size; 2162 new_rx_ring_size = cur_rx_ring_size; 2163 2164 /* 2165 * Decrease the size of a larger queue, or decrease both if they 2166 * are the same size. 2167 */ 2168 if (cur_rx_ring_size <= cur_tx_ring_size) 2169 new_tx_ring_size = cur_tx_ring_size / 2; 2170 if (cur_rx_ring_size >= cur_tx_ring_size) 2171 new_rx_ring_size = cur_rx_ring_size / 2; 2172 2173 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2174 new_rx_ring_size < ENA_MIN_RING_SIZE) { 2175 ena_log(pdev, ERR, 2176 "Queue creation failed with the smallest possible queue size" 2177 "of %d for both queues. Not retrying with smaller queues\n", 2178 ENA_MIN_RING_SIZE); 2179 return (rc); 2180 } 2181 2182 ena_log(pdev, INFO, 2183 "Retrying queue creation with sizes TX=%d, RX=%d\n", 2184 new_tx_ring_size, new_rx_ring_size); 2185 2186 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size); 2187 } 2188 } 2189 2190 int 2191 ena_up(struct ena_adapter *adapter) 2192 { 2193 int rc = 0; 2194 2195 ENA_LOCK_ASSERT(); 2196 2197 if (unlikely(device_is_attached(adapter->pdev) == 0)) { 2198 ena_log(adapter->pdev, ERR, "device is not attached!\n"); 2199 return (ENXIO); 2200 } 2201 2202 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2203 return (0); 2204 2205 ena_log(adapter->pdev, INFO, "device is going UP\n"); 2206 2207 /* setup interrupts for IO queues */ 2208 rc = ena_setup_io_intr(adapter); 2209 if (unlikely(rc != 0)) { 2210 ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n"); 2211 goto error; 2212 } 2213 rc = ena_request_io_irq(adapter); 2214 if (unlikely(rc != 0)) { 2215 ena_log(adapter->pdev, ERR, "err_req_irq\n"); 2216 goto error; 2217 } 2218 2219 ena_log(adapter->pdev, INFO, 2220 "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n", 2221 adapter->num_io_queues, 2222 adapter->requested_rx_ring_size, 2223 adapter->requested_tx_ring_size, 2224 (adapter->ena_dev->tx_mem_queue_type == 2225 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED"); 2226 2227 rc = create_queues_with_size_backoff(adapter); 2228 if (unlikely(rc != 0)) { 2229 ena_log(adapter->pdev, ERR, 2230 "error creating queues with size backoff\n"); 2231 goto err_create_queues_with_backoff; 2232 } 2233 2234 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 2235 if_link_state_change(adapter->ifp, LINK_STATE_UP); 2236 2237 rc = ena_up_complete(adapter); 2238 if (unlikely(rc != 0)) 2239 goto err_up_complete; 2240 2241 counter_u64_add(adapter->dev_stats.interface_up, 1); 2242 2243 ena_update_hwassist(adapter); 2244 2245 if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2246 2247 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2248 2249 ena_unmask_all_io_irqs(adapter); 2250 2251 return (0); 2252 2253 err_up_complete: 2254 ena_destroy_all_io_queues(adapter); 2255 ena_free_all_rx_resources(adapter); 2256 ena_free_all_tx_resources(adapter); 2257 err_create_queues_with_backoff: 2258 ena_free_io_irq(adapter); 2259 error: 2260 return (rc); 2261 } 2262 2263 static uint64_t 2264 ena_get_counter(if_t ifp, ift_counter cnt) 2265 { 2266 struct ena_adapter *adapter; 2267 struct ena_hw_stats *stats; 2268 2269 adapter = if_getsoftc(ifp); 2270 stats = &adapter->hw_stats; 2271 2272 switch (cnt) { 2273 case IFCOUNTER_IPACKETS: 2274 return (counter_u64_fetch(stats->rx_packets)); 2275 case IFCOUNTER_OPACKETS: 2276 return (counter_u64_fetch(stats->tx_packets)); 2277 case IFCOUNTER_IBYTES: 2278 return (counter_u64_fetch(stats->rx_bytes)); 2279 case IFCOUNTER_OBYTES: 2280 return (counter_u64_fetch(stats->tx_bytes)); 2281 case IFCOUNTER_IQDROPS: 2282 return (counter_u64_fetch(stats->rx_drops)); 2283 case IFCOUNTER_OQDROPS: 2284 return (counter_u64_fetch(stats->tx_drops)); 2285 default: 2286 return (if_get_counter_default(ifp, cnt)); 2287 } 2288 } 2289 2290 static int 2291 ena_media_change(if_t ifp) 2292 { 2293 /* Media Change is not supported by firmware */ 2294 return (0); 2295 } 2296 2297 static void 2298 ena_media_status(if_t ifp, struct ifmediareq *ifmr) 2299 { 2300 struct ena_adapter *adapter = if_getsoftc(ifp); 2301 ena_log(adapter->pdev, DBG, "Media status update\n"); 2302 2303 ENA_LOCK_LOCK(); 2304 2305 ifmr->ifm_status = IFM_AVALID; 2306 ifmr->ifm_active = IFM_ETHER; 2307 2308 if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) { 2309 ENA_LOCK_UNLOCK(); 2310 ena_log(adapter->pdev, INFO, "Link is down\n"); 2311 return; 2312 } 2313 2314 ifmr->ifm_status |= IFM_ACTIVE; 2315 ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX; 2316 2317 ENA_LOCK_UNLOCK(); 2318 } 2319 2320 static void 2321 ena_init(void *arg) 2322 { 2323 struct ena_adapter *adapter = (struct ena_adapter *)arg; 2324 2325 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) { 2326 ENA_LOCK_LOCK(); 2327 ena_up(adapter); 2328 ENA_LOCK_UNLOCK(); 2329 } 2330 } 2331 2332 static int 2333 ena_ioctl(if_t ifp, u_long command, caddr_t data) 2334 { 2335 struct ena_adapter *adapter; 2336 struct ifreq *ifr; 2337 int rc; 2338 2339 adapter = if_getsoftc(ifp); 2340 ifr = (struct ifreq *)data; 2341 2342 /* 2343 * Acquiring lock to prevent from running up and down routines parallel. 2344 */ 2345 rc = 0; 2346 switch (command) { 2347 case SIOCSIFMTU: 2348 if (if_getmtu(ifp) == ifr->ifr_mtu) 2349 break; 2350 ENA_LOCK_LOCK(); 2351 ena_down(adapter); 2352 2353 ena_change_mtu(ifp, ifr->ifr_mtu); 2354 2355 rc = ena_up(adapter); 2356 ENA_LOCK_UNLOCK(); 2357 break; 2358 2359 case SIOCSIFFLAGS: 2360 if ((if_getflags(ifp) & IFF_UP) != 0) { 2361 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2362 if ((if_getflags(ifp) & (IFF_PROMISC | 2363 IFF_ALLMULTI)) != 0) { 2364 ena_log(adapter->pdev, INFO, 2365 "ioctl promisc/allmulti\n"); 2366 } 2367 } else { 2368 ENA_LOCK_LOCK(); 2369 rc = ena_up(adapter); 2370 ENA_LOCK_UNLOCK(); 2371 } 2372 } else { 2373 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2374 ENA_LOCK_LOCK(); 2375 ena_down(adapter); 2376 ENA_LOCK_UNLOCK(); 2377 } 2378 } 2379 break; 2380 2381 case SIOCADDMULTI: 2382 case SIOCDELMULTI: 2383 break; 2384 2385 case SIOCSIFMEDIA: 2386 case SIOCGIFMEDIA: 2387 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 2388 break; 2389 2390 case SIOCSIFCAP: 2391 { 2392 int reinit = 0; 2393 2394 if (ifr->ifr_reqcap != if_getcapenable(ifp)) { 2395 if_setcapenable(ifp, ifr->ifr_reqcap); 2396 reinit = 1; 2397 } 2398 2399 if ((reinit != 0) && 2400 ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) { 2401 ENA_LOCK_LOCK(); 2402 ena_down(adapter); 2403 rc = ena_up(adapter); 2404 ENA_LOCK_UNLOCK(); 2405 } 2406 } 2407 2408 break; 2409 default: 2410 rc = ether_ioctl(ifp, command, data); 2411 break; 2412 } 2413 2414 return (rc); 2415 } 2416 2417 static int 2418 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat) 2419 { 2420 int caps = 0; 2421 2422 if ((feat->offload.tx & 2423 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2424 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK | 2425 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0) 2426 caps |= IFCAP_TXCSUM; 2427 2428 if ((feat->offload.tx & 2429 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK | 2430 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0) 2431 caps |= IFCAP_TXCSUM_IPV6; 2432 2433 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0) 2434 caps |= IFCAP_TSO4; 2435 2436 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0) 2437 caps |= IFCAP_TSO6; 2438 2439 if ((feat->offload.rx_supported & 2440 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK | 2441 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0) 2442 caps |= IFCAP_RXCSUM; 2443 2444 if ((feat->offload.rx_supported & 2445 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0) 2446 caps |= IFCAP_RXCSUM_IPV6; 2447 2448 caps |= IFCAP_LRO | IFCAP_JUMBO_MTU; 2449 2450 return (caps); 2451 } 2452 2453 static void 2454 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp) 2455 { 2456 host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp); 2457 } 2458 2459 static void 2460 ena_update_hwassist(struct ena_adapter *adapter) 2461 { 2462 if_t ifp = adapter->ifp; 2463 uint32_t feat = adapter->tx_offload_cap; 2464 int cap = if_getcapenable(ifp); 2465 int flags = 0; 2466 2467 if_clearhwassist(ifp); 2468 2469 if ((cap & IFCAP_TXCSUM) != 0) { 2470 if ((feat & 2471 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0) 2472 flags |= CSUM_IP; 2473 if ((feat & 2474 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2475 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0) 2476 flags |= CSUM_IP_UDP | CSUM_IP_TCP; 2477 } 2478 2479 if ((cap & IFCAP_TXCSUM_IPV6) != 0) 2480 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP; 2481 2482 if ((cap & IFCAP_TSO4) != 0) 2483 flags |= CSUM_IP_TSO; 2484 2485 if ((cap & IFCAP_TSO6) != 0) 2486 flags |= CSUM_IP6_TSO; 2487 2488 if_sethwassistbits(ifp, flags, 0); 2489 } 2490 2491 static void 2492 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter, 2493 struct ena_com_dev_get_features_ctx *feat) 2494 { 2495 if_t ifp; 2496 int caps = 0; 2497 2498 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 2499 if_initname(ifp, device_get_name(pdev), device_get_unit(pdev)); 2500 if_setdev(ifp, pdev); 2501 if_setsoftc(ifp, adapter); 2502 2503 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 2504 if_setinitfn(ifp, ena_init); 2505 if_settransmitfn(ifp, ena_mq_start); 2506 if_setqflushfn(ifp, ena_qflush); 2507 if_setioctlfn(ifp, ena_ioctl); 2508 if_setgetcounterfn(ifp, ena_get_counter); 2509 2510 if_setsendqlen(ifp, adapter->requested_tx_ring_size); 2511 if_setsendqready(ifp); 2512 if_setmtu(ifp, ETHERMTU); 2513 if_setbaudrate(ifp, 0); 2514 /* Zeroize capabilities... */ 2515 if_setcapabilities(ifp, 0); 2516 if_setcapenable(ifp, 0); 2517 /* check hardware support */ 2518 caps = ena_get_dev_offloads(feat); 2519 /* ... and set them */ 2520 if_setcapabilitiesbit(ifp, caps, 0); 2521 2522 /* TSO parameters */ 2523 if_sethwtsomax(ifp, ENA_TSO_MAXSIZE - 2524 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 2525 if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1); 2526 if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE); 2527 2528 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 2529 if_setcapenable(ifp, if_getcapabilities(ifp)); 2530 2531 /* 2532 * Specify the media types supported by this adapter and register 2533 * callbacks to update media and link information 2534 */ 2535 ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change, 2536 ena_media_status); 2537 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 2538 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 2539 2540 ether_ifattach(ifp, adapter->mac_addr); 2541 } 2542 2543 void 2544 ena_down(struct ena_adapter *adapter) 2545 { 2546 int rc; 2547 2548 ENA_LOCK_ASSERT(); 2549 2550 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2551 return; 2552 2553 ena_log(adapter->pdev, INFO, "device is going DOWN\n"); 2554 2555 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2556 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2557 2558 ena_free_io_irq(adapter); 2559 2560 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) { 2561 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 2562 if (unlikely(rc != 0)) 2563 ena_log(adapter->pdev, ERR, "Device reset failed\n"); 2564 } 2565 2566 ena_destroy_all_io_queues(adapter); 2567 2568 ena_free_all_tx_bufs(adapter); 2569 ena_free_all_rx_bufs(adapter); 2570 ena_free_all_tx_resources(adapter); 2571 ena_free_all_rx_resources(adapter); 2572 2573 counter_u64_add(adapter->dev_stats.interface_down, 1); 2574 } 2575 2576 static uint32_t 2577 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev, 2578 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2579 { 2580 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2581 2582 /* Regular queues capabilities */ 2583 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2584 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2585 &get_feat_ctx->max_queue_ext.max_queue_ext; 2586 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, 2587 max_queue_ext->max_rx_cq_num); 2588 2589 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2590 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2591 } else { 2592 struct ena_admin_queue_feature_desc *max_queues = 2593 &get_feat_ctx->max_queues; 2594 io_tx_sq_num = max_queues->max_sq_num; 2595 io_tx_cq_num = max_queues->max_cq_num; 2596 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); 2597 } 2598 2599 /* In case of LLQ use the llq fields for the tx SQ/CQ */ 2600 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2601 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2602 2603 max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES); 2604 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num); 2605 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num); 2606 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num); 2607 /* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */ 2608 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2609 pci_msix_count(pdev) - 1); 2610 #ifdef RSS 2611 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2612 rss_getnumbuckets()); 2613 #endif 2614 2615 return (max_num_io_queues); 2616 } 2617 2618 static int 2619 ena_enable_wc(device_t pdev, struct resource *res) 2620 { 2621 #if defined(__i386) || defined(__amd64) || defined(__aarch64__) 2622 vm_offset_t va; 2623 vm_size_t len; 2624 int rc; 2625 2626 va = (vm_offset_t)rman_get_virtual(res); 2627 len = rman_get_size(res); 2628 /* Enable write combining */ 2629 rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING); 2630 if (unlikely(rc != 0)) { 2631 ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc); 2632 return (rc); 2633 } 2634 2635 return (0); 2636 #endif 2637 return (EOPNOTSUPP); 2638 } 2639 2640 static int 2641 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev, 2642 struct ena_admin_feature_llq_desc *llq, 2643 struct ena_llq_configurations *llq_default_configurations) 2644 { 2645 int rc; 2646 uint32_t llq_feature_mask; 2647 2648 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2649 if (!(ena_dev->supported_features & llq_feature_mask)) { 2650 ena_log(pdev, WARN, 2651 "LLQ is not supported. Fallback to host mode policy.\n"); 2652 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2653 return (0); 2654 } 2655 2656 if (ena_dev->mem_bar == NULL) { 2657 ena_log(pdev, WARN, 2658 "LLQ is advertised as supported but device doesn't expose mem bar.\n"); 2659 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2660 return (0); 2661 } 2662 2663 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2664 if (unlikely(rc != 0)) { 2665 ena_log(pdev, WARN, 2666 "Failed to configure the device mode. " 2667 "Fallback to host mode policy.\n"); 2668 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2669 } 2670 2671 return (0); 2672 } 2673 2674 static int 2675 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev) 2676 { 2677 struct ena_adapter *adapter = device_get_softc(pdev); 2678 int rc, rid; 2679 2680 /* Try to allocate resources for LLQ bar */ 2681 rid = PCIR_BAR(ENA_MEM_BAR); 2682 adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 2683 RF_ACTIVE); 2684 if (unlikely(adapter->memory == NULL)) { 2685 ena_log(pdev, WARN, 2686 "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n"); 2687 return (0); 2688 } 2689 2690 /* Enable write combining for better LLQ performance */ 2691 rc = ena_enable_wc(adapter->pdev, adapter->memory); 2692 if (unlikely(rc != 0)) { 2693 ena_log(pdev, ERR, "failed to enable write combining.\n"); 2694 return (rc); 2695 } 2696 2697 /* 2698 * Save virtual address of the device's memory region 2699 * for the ena_com layer. 2700 */ 2701 ena_dev->mem_bar = rman_get_virtual(adapter->memory); 2702 2703 return (0); 2704 } 2705 2706 static inline void 2707 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2708 struct ena_admin_feature_llq_desc *llq) 2709 { 2710 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2711 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2712 llq_config->llq_num_decs_before_header = 2713 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2714 if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 2715 0 && ena_force_large_llq_header) { 2716 llq_config->llq_ring_entry_size = 2717 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2718 llq_config->llq_ring_entry_size_value = 256; 2719 } else { 2720 llq_config->llq_ring_entry_size = 2721 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2722 llq_config->llq_ring_entry_size_value = 128; 2723 } 2724 } 2725 2726 static int 2727 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) 2728 { 2729 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 2730 struct ena_com_dev *ena_dev = ctx->ena_dev; 2731 uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE; 2732 uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE; 2733 uint32_t max_tx_queue_size; 2734 uint32_t max_rx_queue_size; 2735 2736 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2737 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2738 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 2739 max_rx_queue_size = min_t(uint32_t, 2740 max_queue_ext->max_rx_cq_depth, 2741 max_queue_ext->max_rx_sq_depth); 2742 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 2743 2744 if (ena_dev->tx_mem_queue_type == 2745 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2746 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2747 llq->max_llq_depth); 2748 else 2749 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2750 max_queue_ext->max_tx_sq_depth); 2751 2752 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2753 max_queue_ext->max_per_packet_tx_descs); 2754 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2755 max_queue_ext->max_per_packet_rx_descs); 2756 } else { 2757 struct ena_admin_queue_feature_desc *max_queues = 2758 &ctx->get_feat_ctx->max_queues; 2759 max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth, 2760 max_queues->max_sq_depth); 2761 max_tx_queue_size = max_queues->max_cq_depth; 2762 2763 if (ena_dev->tx_mem_queue_type == 2764 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2765 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2766 llq->max_llq_depth); 2767 else 2768 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2769 max_queues->max_sq_depth); 2770 2771 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2772 max_queues->max_packet_tx_descs); 2773 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2774 max_queues->max_packet_rx_descs); 2775 } 2776 2777 /* round down to the nearest power of 2 */ 2778 max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1); 2779 max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1); 2780 2781 /* 2782 * When forcing large headers, we multiply the entry size by 2, 2783 * and therefore divide the queue size by 2, leaving the amount 2784 * of memory used by the queues unchanged. 2785 */ 2786 if (ena_force_large_llq_header) { 2787 if ((llq->entry_size_ctrl_supported & 2788 ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 && 2789 ena_dev->tx_mem_queue_type == 2790 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2791 max_tx_queue_size /= 2; 2792 ena_log(ctx->pdev, INFO, 2793 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 2794 max_tx_queue_size); 2795 } else { 2796 ena_log(ctx->pdev, WARN, 2797 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 2798 } 2799 } 2800 2801 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, 2802 max_tx_queue_size); 2803 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, 2804 max_rx_queue_size); 2805 2806 tx_queue_size = 1 << (flsl(tx_queue_size) - 1); 2807 rx_queue_size = 1 << (flsl(rx_queue_size) - 1); 2808 2809 ctx->max_tx_queue_size = max_tx_queue_size; 2810 ctx->max_rx_queue_size = max_rx_queue_size; 2811 ctx->tx_queue_size = tx_queue_size; 2812 ctx->rx_queue_size = rx_queue_size; 2813 2814 return (0); 2815 } 2816 2817 static void 2818 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev) 2819 { 2820 struct ena_admin_host_info *host_info; 2821 uintptr_t rid; 2822 int rc; 2823 2824 /* Allocate only the host info */ 2825 rc = ena_com_allocate_host_info(ena_dev); 2826 if (unlikely(rc != 0)) { 2827 ena_log(dev, ERR, "Cannot allocate host info\n"); 2828 return; 2829 } 2830 2831 host_info = ena_dev->host_attr.host_info; 2832 2833 if (pci_get_id(dev, PCI_ID_RID, &rid) == 0) 2834 host_info->bdf = rid; 2835 host_info->os_type = ENA_ADMIN_OS_FREEBSD; 2836 host_info->kernel_ver = osreldate; 2837 2838 sprintf(host_info->kernel_ver_str, "%d", osreldate); 2839 host_info->os_dist = 0; 2840 strncpy(host_info->os_dist_str, osrelease, 2841 sizeof(host_info->os_dist_str) - 1); 2842 2843 host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) | 2844 (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 2845 (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 2846 host_info->num_cpus = mp_ncpus; 2847 host_info->driver_supported_features = 2848 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 2849 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 2850 2851 rc = ena_com_set_host_attributes(ena_dev); 2852 if (unlikely(rc != 0)) { 2853 if (rc == EOPNOTSUPP) 2854 ena_log(dev, WARN, "Cannot set host attributes\n"); 2855 else 2856 ena_log(dev, ERR, "Cannot set host attributes\n"); 2857 2858 goto err; 2859 } 2860 2861 return; 2862 2863 err: 2864 ena_com_delete_host_info(ena_dev); 2865 } 2866 2867 static int 2868 ena_device_init(struct ena_adapter *adapter, device_t pdev, 2869 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active) 2870 { 2871 struct ena_llq_configurations llq_config; 2872 struct ena_com_dev *ena_dev = adapter->ena_dev; 2873 bool readless_supported; 2874 uint32_t aenq_groups; 2875 int dma_width; 2876 int rc; 2877 2878 rc = ena_com_mmio_reg_read_request_init(ena_dev); 2879 if (unlikely(rc != 0)) { 2880 ena_log(pdev, ERR, "failed to init mmio read less\n"); 2881 return (rc); 2882 } 2883 2884 /* 2885 * The PCIe configuration space revision id indicate if mmio reg 2886 * read is disabled 2887 */ 2888 readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ); 2889 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 2890 2891 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 2892 if (unlikely(rc != 0)) { 2893 ena_log(pdev, ERR, "Can not reset device\n"); 2894 goto err_mmio_read_less; 2895 } 2896 2897 rc = ena_com_validate_version(ena_dev); 2898 if (unlikely(rc != 0)) { 2899 ena_log(pdev, ERR, "device version is too low\n"); 2900 goto err_mmio_read_less; 2901 } 2902 2903 dma_width = ena_com_get_dma_width(ena_dev); 2904 if (unlikely(dma_width < 0)) { 2905 ena_log(pdev, ERR, "Invalid dma width value %d", dma_width); 2906 rc = dma_width; 2907 goto err_mmio_read_less; 2908 } 2909 adapter->dma_width = dma_width; 2910 2911 /* ENA admin level init */ 2912 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 2913 if (unlikely(rc != 0)) { 2914 ena_log(pdev, ERR, 2915 "Can not initialize ena admin queue with device\n"); 2916 goto err_mmio_read_less; 2917 } 2918 2919 /* 2920 * To enable the msix interrupts the driver needs to know the number 2921 * of queues. So the driver uses polling mode to retrieve this 2922 * information 2923 */ 2924 ena_com_set_admin_polling_mode(ena_dev, true); 2925 2926 ena_config_host_info(ena_dev, pdev); 2927 2928 /* Get Device Attributes */ 2929 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 2930 if (unlikely(rc != 0)) { 2931 ena_log(pdev, ERR, 2932 "Cannot get attribute for ena device rc: %d\n", rc); 2933 goto err_admin_init; 2934 } 2935 2936 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 2937 BIT(ENA_ADMIN_FATAL_ERROR) | 2938 BIT(ENA_ADMIN_WARNING) | 2939 BIT(ENA_ADMIN_NOTIFICATION) | 2940 BIT(ENA_ADMIN_KEEP_ALIVE) | 2941 BIT(ENA_ADMIN_CONF_NOTIFICATIONS); 2942 2943 aenq_groups &= get_feat_ctx->aenq.supported_groups; 2944 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 2945 if (unlikely(rc != 0)) { 2946 ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc); 2947 goto err_admin_init; 2948 } 2949 2950 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 2951 2952 set_default_llq_configurations(&llq_config, &get_feat_ctx->llq); 2953 2954 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, 2955 &llq_config); 2956 if (unlikely(rc != 0)) { 2957 ena_log(pdev, ERR, "Failed to set placement policy\n"); 2958 goto err_admin_init; 2959 } 2960 2961 return (0); 2962 2963 err_admin_init: 2964 ena_com_delete_host_info(ena_dev); 2965 ena_com_admin_destroy(ena_dev); 2966 err_mmio_read_less: 2967 ena_com_mmio_reg_read_request_destroy(ena_dev); 2968 2969 return (rc); 2970 } 2971 2972 static int 2973 ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) 2974 { 2975 struct ena_com_dev *ena_dev = adapter->ena_dev; 2976 int rc; 2977 2978 rc = ena_enable_msix(adapter); 2979 if (unlikely(rc != 0)) { 2980 ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n"); 2981 return (rc); 2982 } 2983 2984 ena_setup_mgmnt_intr(adapter); 2985 2986 rc = ena_request_mgmnt_irq(adapter); 2987 if (unlikely(rc != 0)) { 2988 ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n"); 2989 goto err_disable_msix; 2990 } 2991 2992 ena_com_set_admin_polling_mode(ena_dev, false); 2993 2994 ena_com_admin_aenq_enable(ena_dev); 2995 2996 return (0); 2997 2998 err_disable_msix: 2999 ena_disable_msix(adapter); 3000 3001 return (rc); 3002 } 3003 3004 /* Function called on ENA_ADMIN_KEEP_ALIVE event */ 3005 static void 3006 ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 3007 { 3008 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 3009 struct ena_admin_aenq_keep_alive_desc *desc; 3010 sbintime_t stime; 3011 uint64_t rx_drops; 3012 uint64_t tx_drops; 3013 3014 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 3015 3016 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 3017 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 3018 counter_u64_zero(adapter->hw_stats.rx_drops); 3019 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops); 3020 counter_u64_zero(adapter->hw_stats.tx_drops); 3021 counter_u64_add(adapter->hw_stats.tx_drops, tx_drops); 3022 3023 stime = getsbinuptime(); 3024 atomic_store_rel_64(&adapter->keep_alive_timestamp, stime); 3025 } 3026 3027 /* Check for keep alive expiration */ 3028 static void 3029 check_for_missing_keep_alive(struct ena_adapter *adapter) 3030 { 3031 sbintime_t timestamp, time; 3032 enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; 3033 3034 if (adapter->wd_active == 0) 3035 return; 3036 3037 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3038 return; 3039 3040 timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp); 3041 time = getsbinuptime() - timestamp; 3042 if (unlikely(time > adapter->keep_alive_timeout)) { 3043 ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n"); 3044 if (ena_com_aenq_has_keep_alive(adapter->ena_dev)) 3045 reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT; 3046 3047 ena_trigger_reset(adapter, reset_reason); 3048 } 3049 } 3050 3051 /* Check if admin queue is enabled */ 3052 static void 3053 check_for_admin_com_state(struct ena_adapter *adapter) 3054 { 3055 enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_ADMIN_TO; 3056 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) { 3057 ena_log(adapter->pdev, ERR, 3058 "ENA admin queue is not in running state!\n"); 3059 counter_u64_add(adapter->dev_stats.admin_q_pause, 1); 3060 if (ena_com_get_missing_admin_interrupt(adapter->ena_dev)) 3061 reset_reason = ENA_REGS_RESET_MISSING_ADMIN_INTERRUPT; 3062 3063 ena_trigger_reset(adapter, reset_reason); 3064 } 3065 } 3066 3067 static int 3068 check_for_rx_interrupt_queue(struct ena_adapter *adapter, 3069 struct ena_ring *rx_ring) 3070 { 3071 if (likely(atomic_load_8(&rx_ring->first_interrupt))) 3072 return (0); 3073 3074 if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) 3075 return (0); 3076 3077 rx_ring->no_interrupt_event_cnt++; 3078 3079 if (rx_ring->no_interrupt_event_cnt == 3080 ENA_MAX_NO_INTERRUPT_ITERATIONS) { 3081 ena_log(adapter->pdev, ERR, 3082 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", 3083 rx_ring->qid); 3084 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT); 3085 return (EIO); 3086 } 3087 3088 return (0); 3089 } 3090 3091 static int 3092 check_missing_comp_in_tx_queue(struct ena_adapter *adapter, 3093 struct ena_ring *tx_ring) 3094 { 3095 uint32_t missed_tx = 0, new_missed_tx = 0; 3096 device_t pdev = adapter->pdev; 3097 struct bintime curtime, time; 3098 struct ena_tx_buffer *tx_buf; 3099 int time_since_last_cleanup; 3100 int missing_tx_comp_to; 3101 sbintime_t time_offset; 3102 int i, rc = 0; 3103 3104 getbinuptime(&curtime); 3105 3106 for (i = 0; i < tx_ring->ring_size; i++) { 3107 tx_buf = &tx_ring->tx_buffer_info[i]; 3108 3109 if (bintime_isset(&tx_buf->timestamp) == 0) 3110 continue; 3111 3112 time = curtime; 3113 bintime_sub(&time, &tx_buf->timestamp); 3114 time_offset = bttosbt(time); 3115 3116 if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) && 3117 time_offset > 2 * adapter->missing_tx_timeout)) { 3118 /* 3119 * If after graceful period interrupt is still not 3120 * received, we schedule a reset. 3121 */ 3122 ena_log(pdev, ERR, 3123 "Potential MSIX issue on Tx side Queue = %d. " 3124 "Reset the device\n", 3125 tx_ring->qid); 3126 ena_trigger_reset(adapter, 3127 ENA_REGS_RESET_MISS_INTERRUPT); 3128 return (EIO); 3129 } 3130 3131 /* Check again if packet is still waiting */ 3132 if (unlikely(time_offset > adapter->missing_tx_timeout)) { 3133 3134 if (tx_buf->print_once) { 3135 time_since_last_cleanup = TICKS_2_MSEC(ticks - 3136 tx_ring->tx_last_cleanup_ticks); 3137 missing_tx_comp_to = sbttoms( 3138 adapter->missing_tx_timeout); 3139 ena_log(pdev, WARN, 3140 "Found a Tx that wasn't completed on time, qid %d, index %d. " 3141 "%d msecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n", 3142 tx_ring->qid, i, time_since_last_cleanup, 3143 missing_tx_comp_to); 3144 /* Add new TX completions which are missed */ 3145 new_missed_tx++; 3146 } 3147 3148 tx_buf->print_once = false; 3149 missed_tx++; 3150 } 3151 } 3152 /* Checking if this TX ring missing TX completions have passed the threshold */ 3153 if (unlikely(missed_tx > adapter->missing_tx_threshold)) { 3154 ena_log(pdev, ERR, 3155 "The number of lost tx completion is above the threshold " 3156 "(%d > %d). Reset the device\n", 3157 missed_tx, adapter->missing_tx_threshold); 3158 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL); 3159 rc = EIO; 3160 } 3161 /* Add the newly discovered missing TX completions */ 3162 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, new_missed_tx); 3163 3164 return (rc); 3165 } 3166 3167 /* 3168 * Check for TX which were not completed on time. 3169 * Timeout is defined by "missing_tx_timeout". 3170 * Reset will be performed if number of incompleted 3171 * transactions exceeds "missing_tx_threshold". 3172 */ 3173 static void 3174 check_for_missing_completions(struct ena_adapter *adapter) 3175 { 3176 struct ena_ring *tx_ring; 3177 struct ena_ring *rx_ring; 3178 int i, budget, rc; 3179 3180 /* Make sure the driver doesn't turn the device in other process */ 3181 rmb(); 3182 3183 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3184 return; 3185 3186 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3187 return; 3188 3189 if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3190 return; 3191 3192 budget = adapter->missing_tx_max_queues; 3193 3194 for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) { 3195 tx_ring = &adapter->tx_ring[i]; 3196 rx_ring = &adapter->rx_ring[i]; 3197 3198 rc = check_missing_comp_in_tx_queue(adapter, tx_ring); 3199 if (unlikely(rc != 0)) 3200 return; 3201 3202 rc = check_for_rx_interrupt_queue(adapter, rx_ring); 3203 if (unlikely(rc != 0)) 3204 return; 3205 3206 budget--; 3207 if (budget == 0) { 3208 i++; 3209 break; 3210 } 3211 } 3212 3213 adapter->next_monitored_tx_qid = i % adapter->num_io_queues; 3214 } 3215 3216 /* trigger rx cleanup after 2 consecutive detections */ 3217 #define EMPTY_RX_REFILL 2 3218 /* For the rare case where the device runs out of Rx descriptors and the 3219 * msix handler failed to refill new Rx descriptors (due to a lack of memory 3220 * for example). 3221 * This case will lead to a deadlock: 3222 * The device won't send interrupts since all the new Rx packets will be dropped 3223 * The msix handler won't allocate new Rx descriptors so the device won't be 3224 * able to send new packets. 3225 * 3226 * When such a situation is detected - execute rx cleanup task in another thread 3227 */ 3228 static void 3229 check_for_empty_rx_ring(struct ena_adapter *adapter) 3230 { 3231 struct ena_ring *rx_ring; 3232 int i, refill_required; 3233 3234 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3235 return; 3236 3237 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3238 return; 3239 3240 for (i = 0; i < adapter->num_io_queues; i++) { 3241 rx_ring = &adapter->rx_ring[i]; 3242 3243 refill_required = ena_com_free_q_entries( 3244 rx_ring->ena_com_io_sq); 3245 if (unlikely(refill_required == (rx_ring->ring_size - 1))) { 3246 rx_ring->empty_rx_queue++; 3247 3248 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { 3249 counter_u64_add(rx_ring->rx_stats.empty_rx_ring, 3250 1); 3251 3252 ena_log(adapter->pdev, WARN, 3253 "Rx ring %d is stalled. Triggering the refill function\n", 3254 i); 3255 3256 taskqueue_enqueue(rx_ring->que->cleanup_tq, 3257 &rx_ring->que->cleanup_task); 3258 rx_ring->empty_rx_queue = 0; 3259 } 3260 } else { 3261 rx_ring->empty_rx_queue = 0; 3262 } 3263 } 3264 } 3265 3266 static void 3267 ena_update_hints(struct ena_adapter *adapter, 3268 struct ena_admin_ena_hw_hints *hints) 3269 { 3270 struct ena_com_dev *ena_dev = adapter->ena_dev; 3271 3272 if (hints->admin_completion_tx_timeout) 3273 ena_dev->admin_queue.completion_timeout = 3274 hints->admin_completion_tx_timeout * 1000; 3275 3276 if (hints->mmio_read_timeout) 3277 /* convert to usec */ 3278 ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000; 3279 3280 if (hints->missed_tx_completion_count_threshold_to_reset) 3281 adapter->missing_tx_threshold = 3282 hints->missed_tx_completion_count_threshold_to_reset; 3283 3284 if (hints->missing_tx_completion_timeout) { 3285 if (hints->missing_tx_completion_timeout == 3286 ENA_HW_HINTS_NO_TIMEOUT) 3287 adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3288 else 3289 adapter->missing_tx_timeout = SBT_1MS * 3290 hints->missing_tx_completion_timeout; 3291 } 3292 3293 if (hints->driver_watchdog_timeout) { 3294 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3295 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3296 else 3297 adapter->keep_alive_timeout = SBT_1MS * 3298 hints->driver_watchdog_timeout; 3299 } 3300 } 3301 3302 /** 3303 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW. 3304 * @adapter: ENA device adapter 3305 * 3306 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics 3307 * and other error codes on failure. 3308 * 3309 * This function can possibly cause a race with other calls to the admin queue. 3310 * Because of that, the caller should either lock this function or make sure 3311 * that there is no race in the current context. 3312 */ 3313 static int 3314 ena_copy_eni_metrics(struct ena_adapter *adapter) 3315 { 3316 static bool print_once = true; 3317 int rc; 3318 3319 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics); 3320 3321 if (rc != 0) { 3322 if (rc == ENA_COM_UNSUPPORTED) { 3323 if (print_once) { 3324 ena_log(adapter->pdev, WARN, 3325 "Retrieving ENI metrics is not supported.\n"); 3326 print_once = false; 3327 } else { 3328 ena_log(adapter->pdev, DBG, 3329 "Retrieving ENI metrics is not supported.\n"); 3330 } 3331 } else { 3332 ena_log(adapter->pdev, ERR, 3333 "Failed to get ENI metrics: %d\n", rc); 3334 } 3335 } 3336 3337 return (rc); 3338 } 3339 3340 static int 3341 ena_copy_srd_metrics(struct ena_adapter *adapter) 3342 { 3343 return ena_com_get_ena_srd_info(adapter->ena_dev, &adapter->ena_srd_info); 3344 } 3345 3346 static int 3347 ena_copy_customer_metrics(struct ena_adapter *adapter) 3348 { 3349 struct ena_com_dev *dev; 3350 u32 supported_metrics_count; 3351 int rc, len; 3352 3353 dev = adapter->ena_dev; 3354 3355 supported_metrics_count = ena_com_get_customer_metric_count(dev); 3356 len = supported_metrics_count * sizeof(u64); 3357 3358 /* Fill the data buffer */ 3359 rc = ena_com_get_customer_metrics(adapter->ena_dev, 3360 (char *)(adapter->customer_metrics_array), len); 3361 3362 return (rc); 3363 } 3364 3365 static void 3366 ena_timer_service(void *data) 3367 { 3368 struct ena_adapter *adapter = (struct ena_adapter *)data; 3369 struct ena_admin_host_info *host_info = 3370 adapter->ena_dev->host_attr.host_info; 3371 3372 check_for_missing_keep_alive(adapter); 3373 3374 check_for_admin_com_state(adapter); 3375 3376 check_for_missing_completions(adapter); 3377 3378 check_for_empty_rx_ring(adapter); 3379 3380 /* 3381 * User controller update of the ENA metrics. 3382 * If the delay was set to 0, then the stats shouldn't be updated at 3383 * all. 3384 * Otherwise, wait 'metrics_sample_interval' seconds, before 3385 * updating stats. 3386 * As timer service is executed every second, it's enough to increment 3387 * appropriate counter each time the timer service is executed. 3388 */ 3389 if ((adapter->metrics_sample_interval != 0) && 3390 (++adapter->metrics_sample_interval_cnt >= 3391 adapter->metrics_sample_interval)) { 3392 taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task); 3393 adapter->metrics_sample_interval_cnt = 0; 3394 } 3395 3396 3397 if (host_info != NULL) 3398 ena_update_host_info(host_info, adapter->ifp); 3399 3400 if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3401 /* 3402 * Timeout when validating version indicates that the device 3403 * became unresponsive. If that happens skip the reset and 3404 * reschedule timer service, so the reset can be retried later. 3405 */ 3406 if (ena_com_validate_version(adapter->ena_dev) == 3407 ENA_COM_TIMER_EXPIRED) { 3408 ena_log(adapter->pdev, WARN, 3409 "FW unresponsive, skipping reset\n"); 3410 ENA_TIMER_RESET(adapter); 3411 return; 3412 } 3413 ena_log(adapter->pdev, WARN, "Trigger reset is on\n"); 3414 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task); 3415 return; 3416 } 3417 3418 /* 3419 * Schedule another timeout one second from now. 3420 */ 3421 ENA_TIMER_RESET(adapter); 3422 } 3423 3424 void 3425 ena_destroy_device(struct ena_adapter *adapter, bool graceful) 3426 { 3427 if_t ifp = adapter->ifp; 3428 struct ena_com_dev *ena_dev = adapter->ena_dev; 3429 bool dev_up; 3430 3431 if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) 3432 return; 3433 3434 if (!graceful) 3435 if_link_state_change(ifp, LINK_STATE_DOWN); 3436 3437 ENA_TIMER_DRAIN(adapter); 3438 3439 dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 3440 if (dev_up) 3441 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3442 3443 if (!graceful) 3444 ena_com_set_admin_running_state(ena_dev, false); 3445 3446 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3447 ena_down(adapter); 3448 3449 /* 3450 * Stop the device from sending AENQ events (if the device was up, and 3451 * the trigger reset was on, ena_down already performs device reset) 3452 */ 3453 if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up)) 3454 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 3455 3456 ena_free_mgmnt_irq(adapter); 3457 3458 ena_disable_msix(adapter); 3459 3460 /* 3461 * IO rings resources should be freed because `ena_restore_device()` 3462 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX 3463 * vectors. The amount of MSIX vectors after destroy-restore may be 3464 * different than before. Therefore, IO rings resources should be 3465 * established from scratch each time. 3466 */ 3467 ena_free_all_io_rings_resources(adapter); 3468 3469 ena_com_abort_admin_commands(ena_dev); 3470 3471 ena_com_wait_for_abort_completion(ena_dev); 3472 3473 ena_com_admin_destroy(ena_dev); 3474 3475 ena_com_mmio_reg_read_request_destroy(ena_dev); 3476 3477 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3478 3479 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); 3480 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3481 } 3482 3483 static int 3484 ena_device_validate_params(struct ena_adapter *adapter, 3485 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3486 { 3487 if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr, 3488 ETHER_ADDR_LEN) != 0) { 3489 ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n"); 3490 return (EINVAL); 3491 } 3492 3493 if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) { 3494 ena_log(adapter->pdev, ERR, 3495 "Error, device max mtu is smaller than ifp MTU\n"); 3496 return (EINVAL); 3497 } 3498 3499 return 0; 3500 } 3501 3502 int 3503 ena_restore_device(struct ena_adapter *adapter) 3504 { 3505 struct ena_com_dev_get_features_ctx get_feat_ctx; 3506 struct ena_com_dev *ena_dev = adapter->ena_dev; 3507 if_t ifp = adapter->ifp; 3508 device_t dev = adapter->pdev; 3509 int wd_active; 3510 int rc; 3511 3512 ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3513 3514 rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active); 3515 if (rc != 0) { 3516 ena_log(dev, ERR, "Cannot initialize device\n"); 3517 goto err; 3518 } 3519 /* 3520 * Only enable WD if it was enabled before reset, so it won't override 3521 * value set by the user by the sysctl. 3522 */ 3523 if (adapter->wd_active != 0) 3524 adapter->wd_active = wd_active; 3525 3526 rc = ena_device_validate_params(adapter, &get_feat_ctx); 3527 if (rc != 0) { 3528 ena_log(dev, ERR, "Validation of device parameters failed\n"); 3529 goto err_device_destroy; 3530 } 3531 3532 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3533 /* Make sure we don't have a race with AENQ Links state handler */ 3534 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 3535 if_link_state_change(ifp, LINK_STATE_UP); 3536 3537 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3538 if (rc != 0) { 3539 ena_log(dev, ERR, "Enable MSI-X failed\n"); 3540 goto err_device_destroy; 3541 } 3542 3543 /* 3544 * Effective value of used MSIX vectors should be the same as before 3545 * `ena_destroy_device()`, if possible, or closest to it if less vectors 3546 * are available. 3547 */ 3548 if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues) 3549 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3550 3551 /* Re-initialize rings basic information */ 3552 ena_init_io_rings(adapter); 3553 3554 /* If the interface was up before the reset bring it up */ 3555 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) { 3556 rc = ena_up(adapter); 3557 if (rc != 0) { 3558 ena_log(dev, ERR, "Failed to create I/O queues\n"); 3559 goto err_disable_msix; 3560 } 3561 } 3562 3563 /* Indicate that device is running again and ready to work */ 3564 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3565 3566 /* 3567 * As the AENQ handlers weren't executed during reset because 3568 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the 3569 * timestamp must be updated again That will prevent next reset 3570 * caused by missing keep alive. 3571 */ 3572 adapter->keep_alive_timestamp = getsbinuptime(); 3573 ENA_TIMER_RESET(adapter); 3574 3575 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3576 3577 return (rc); 3578 3579 err_disable_msix: 3580 ena_free_mgmnt_irq(adapter); 3581 ena_disable_msix(adapter); 3582 err_device_destroy: 3583 ena_com_abort_admin_commands(ena_dev); 3584 ena_com_wait_for_abort_completion(ena_dev); 3585 ena_com_admin_destroy(ena_dev); 3586 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); 3587 ena_com_mmio_reg_read_request_destroy(ena_dev); 3588 err: 3589 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3590 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3591 ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n"); 3592 3593 return (rc); 3594 } 3595 3596 static void 3597 ena_metrics_task(void *arg, int pending) 3598 { 3599 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3600 3601 ENA_LOCK_LOCK(); 3602 3603 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 3604 (void)ena_copy_customer_metrics(adapter); 3605 else if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS)) 3606 (void)ena_copy_eni_metrics(adapter); 3607 3608 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3609 (void)ena_copy_srd_metrics(adapter); 3610 3611 ENA_LOCK_UNLOCK(); 3612 } 3613 3614 static void 3615 ena_reset_task(void *arg, int pending) 3616 { 3617 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3618 3619 ENA_LOCK_LOCK(); 3620 if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3621 ena_destroy_device(adapter, false); 3622 ena_restore_device(adapter); 3623 3624 ena_log(adapter->pdev, INFO, 3625 "Device reset completed successfully, Driver info: %s\n", 3626 ena_version); 3627 } 3628 ENA_LOCK_UNLOCK(); 3629 } 3630 3631 static void 3632 ena_free_stats(struct ena_adapter *adapter) 3633 { 3634 ena_free_counters((counter_u64_t *)&adapter->hw_stats, 3635 sizeof(struct ena_hw_stats)); 3636 ena_free_counters((counter_u64_t *)&adapter->dev_stats, 3637 sizeof(struct ena_stats_dev)); 3638 3639 } 3640 /** 3641 * ena_attach - Device Initialization Routine 3642 * @pdev: device information struct 3643 * 3644 * Returns 0 on success, otherwise on failure. 3645 * 3646 * ena_attach initializes an adapter identified by a device structure. 3647 * The OS initialization, configuring of the adapter private structure, 3648 * and a hardware reset occur. 3649 **/ 3650 static int 3651 ena_attach(device_t pdev) 3652 { 3653 struct ena_com_dev_get_features_ctx get_feat_ctx; 3654 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 3655 static int version_printed; 3656 struct ena_adapter *adapter; 3657 struct ena_com_dev *ena_dev = NULL; 3658 uint32_t max_num_io_queues; 3659 int msix_rid; 3660 int rid, rc; 3661 3662 adapter = device_get_softc(pdev); 3663 adapter->pdev = pdev; 3664 adapter->first_bind = -1; 3665 3666 /* 3667 * Set up the timer service - driver is responsible for avoiding 3668 * concurrency, as the callout won't be using any locking inside. 3669 */ 3670 ENA_TIMER_INIT(adapter); 3671 adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO; 3672 adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO; 3673 adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES; 3674 adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD; 3675 3676 adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED; 3677 adapter->irq_cpu_stride = 0; 3678 3679 #ifdef RSS 3680 adapter->rss_enabled = 1; 3681 #endif 3682 3683 if (version_printed++ == 0) 3684 ena_log(pdev, INFO, "%s\n", ena_version); 3685 3686 /* Allocate memory for ena_dev structure */ 3687 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF, 3688 M_WAITOK | M_ZERO); 3689 3690 adapter->ena_dev = ena_dev; 3691 ena_dev->dmadev = pdev; 3692 3693 rid = PCIR_BAR(ENA_REG_BAR); 3694 adapter->memory = NULL; 3695 adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 3696 RF_ACTIVE); 3697 if (unlikely(adapter->registers == NULL)) { 3698 ena_log(pdev, ERR, 3699 "unable to allocate bus resource: registers!\n"); 3700 rc = ENOMEM; 3701 goto err_dev_free; 3702 } 3703 3704 /* MSIx vector table may reside on BAR0 with registers or on BAR1. */ 3705 msix_rid = pci_msix_table_bar(pdev); 3706 if (msix_rid != rid) { 3707 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, 3708 &msix_rid, RF_ACTIVE); 3709 if (unlikely(adapter->msix == NULL)) { 3710 ena_log(pdev, ERR, 3711 "unable to allocate bus resource: msix!\n"); 3712 rc = ENOMEM; 3713 goto err_pci_free; 3714 } 3715 adapter->msix_rid = msix_rid; 3716 } 3717 3718 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF, 3719 M_WAITOK | M_ZERO); 3720 3721 /* Store register resources */ 3722 ((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag( 3723 adapter->registers); 3724 ((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle( 3725 adapter->registers); 3726 3727 if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) { 3728 ena_log(pdev, ERR, "failed to pmap registers bar\n"); 3729 rc = ENXIO; 3730 goto err_bus_free; 3731 } 3732 3733 rc = ena_map_llq_mem_bar(pdev, ena_dev); 3734 if (unlikely(rc != 0)) { 3735 ena_log(pdev, ERR, "Failed to map ENA mem bar"); 3736 goto err_bus_free; 3737 } 3738 3739 /* Initially clear all the flags */ 3740 ENA_FLAG_ZERO(adapter); 3741 3742 /* Device initialization */ 3743 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active); 3744 if (unlikely(rc != 0)) { 3745 ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc); 3746 rc = ENXIO; 3747 goto err_bus_free; 3748 } 3749 3750 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 3751 adapter->disable_meta_caching = !!( 3752 get_feat_ctx.llq.accel_mode.u.get.supported_flags & 3753 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 3754 3755 adapter->keep_alive_timestamp = getsbinuptime(); 3756 3757 adapter->tx_offload_cap = get_feat_ctx.offload.tx; 3758 3759 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr, 3760 ETHER_ADDR_LEN); 3761 3762 calc_queue_ctx.pdev = pdev; 3763 calc_queue_ctx.ena_dev = ena_dev; 3764 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 3765 3766 /* Calculate initial and maximum IO queue number and size */ 3767 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, 3768 &get_feat_ctx); 3769 rc = ena_calc_io_queue_size(&calc_queue_ctx); 3770 if (unlikely((rc != 0) || (max_num_io_queues <= 0))) { 3771 rc = EFAULT; 3772 goto err_com_free; 3773 } 3774 3775 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; 3776 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; 3777 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 3778 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 3779 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 3780 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 3781 3782 adapter->max_num_io_queues = max_num_io_queues; 3783 3784 adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE; 3785 3786 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 3787 3788 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3789 3790 /* set up dma tags for rx and tx buffers */ 3791 rc = ena_setup_tx_dma_tag(adapter); 3792 if (unlikely(rc != 0)) { 3793 ena_log(pdev, ERR, "Failed to create TX DMA tag\n"); 3794 goto err_com_free; 3795 } 3796 3797 rc = ena_setup_rx_dma_tag(adapter); 3798 if (unlikely(rc != 0)) { 3799 ena_log(pdev, ERR, "Failed to create RX DMA tag\n"); 3800 goto err_tx_tag_free; 3801 } 3802 3803 /* 3804 * The amount of requested MSIX vectors is equal to 3805 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant 3806 * number of admin queue interrupts. The former is initially determined 3807 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be 3808 * achieved if there are not enough system resources. By default, the 3809 * number of effectively used IO queues is the same but later on it can 3810 * be limited by the user using sysctl interface. 3811 */ 3812 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3813 if (unlikely(rc != 0)) { 3814 ena_log(pdev, ERR, 3815 "Failed to enable and set the admin interrupts\n"); 3816 goto err_io_free; 3817 } 3818 /* By default all of allocated MSIX vectors are actively used */ 3819 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3820 3821 /* initialize rings basic information */ 3822 ena_init_io_rings(adapter); 3823 3824 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 3825 if (rc) { 3826 ena_log(pdev, ERR, "Failed to allocate customer metrics buffer.\n"); 3827 goto err_msix_free; 3828 } 3829 3830 rc = ena_sysctl_allocate_customer_metrics_buffer(adapter); 3831 if (unlikely(rc)){ 3832 ena_log(pdev, ERR, "Failed to allocate sysctl customer metrics buffer.\n"); 3833 goto err_metrics_buffer_destroy; 3834 } 3835 3836 /* Initialize statistics */ 3837 ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, 3838 sizeof(struct ena_stats_dev)); 3839 ena_alloc_counters((counter_u64_t *)&adapter->hw_stats, 3840 sizeof(struct ena_hw_stats)); 3841 ena_sysctl_add_nodes(adapter); 3842 3843 /* setup network interface */ 3844 ena_setup_ifnet(pdev, adapter, &get_feat_ctx); 3845 3846 /* Initialize reset task queue */ 3847 TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); 3848 adapter->reset_tq = taskqueue_create("ena_reset_enqueue", 3849 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); 3850 taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq", 3851 device_get_nameunit(adapter->pdev)); 3852 3853 /* Initialize metrics task queue */ 3854 TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter); 3855 adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue", 3856 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq); 3857 taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq", 3858 device_get_nameunit(adapter->pdev)); 3859 3860 #ifdef DEV_NETMAP 3861 rc = ena_netmap_attach(adapter); 3862 if (rc != 0) { 3863 ena_log(pdev, ERR, "netmap attach failed: %d\n", rc); 3864 goto err_detach; 3865 } 3866 #endif /* DEV_NETMAP */ 3867 3868 /* Tell the stack that the interface is not active */ 3869 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 3870 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3871 3872 /* Run the timer service */ 3873 ENA_TIMER_RESET(adapter); 3874 3875 return (0); 3876 3877 #ifdef DEV_NETMAP 3878 err_detach: 3879 ether_ifdetach(adapter->ifp); 3880 free(adapter->customer_metrics_array, M_DEVBUF); 3881 #endif /* DEV_NETMAP */ 3882 err_metrics_buffer_destroy: 3883 ena_com_delete_customer_metrics_buffer(ena_dev); 3884 err_msix_free: 3885 ena_free_stats(adapter); 3886 ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR); 3887 ena_free_mgmnt_irq(adapter); 3888 ena_disable_msix(adapter); 3889 err_io_free: 3890 ena_free_all_io_rings_resources(adapter); 3891 ena_free_rx_dma_tag(adapter); 3892 err_tx_tag_free: 3893 ena_free_tx_dma_tag(adapter); 3894 err_com_free: 3895 ena_com_admin_destroy(ena_dev); 3896 ena_com_delete_host_info(ena_dev); 3897 ena_com_mmio_reg_read_request_destroy(ena_dev); 3898 err_bus_free: 3899 free(ena_dev->bus, M_DEVBUF); 3900 err_pci_free: 3901 ena_free_pci_resources(adapter); 3902 err_dev_free: 3903 free(ena_dev, M_DEVBUF); 3904 3905 return (rc); 3906 } 3907 3908 /** 3909 * ena_detach - Device Removal Routine 3910 * @pdev: device information struct 3911 * 3912 * ena_detach is called by the device subsystem to alert the driver 3913 * that it should release a PCI device. 3914 **/ 3915 static int 3916 ena_detach(device_t pdev) 3917 { 3918 struct ena_adapter *adapter = device_get_softc(pdev); 3919 struct ena_com_dev *ena_dev = adapter->ena_dev; 3920 int rc; 3921 3922 /* Make sure VLANS are not using driver */ 3923 if (if_vlantrunkinuse(adapter->ifp)) { 3924 ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n"); 3925 return (EBUSY); 3926 } 3927 3928 ether_ifdetach(adapter->ifp); 3929 3930 /* Stop timer service */ 3931 ENA_LOCK_LOCK(); 3932 ENA_TIMER_DRAIN(adapter); 3933 ENA_LOCK_UNLOCK(); 3934 3935 /* Release metrics task */ 3936 while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL)) 3937 taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task); 3938 taskqueue_free(adapter->metrics_tq); 3939 3940 /* Release reset task */ 3941 while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL)) 3942 taskqueue_drain(adapter->reset_tq, &adapter->reset_task); 3943 taskqueue_free(adapter->reset_tq); 3944 3945 ENA_LOCK_LOCK(); 3946 ena_down(adapter); 3947 ena_destroy_device(adapter, true); 3948 ENA_LOCK_UNLOCK(); 3949 3950 /* Restore unregistered sysctl queue nodes. */ 3951 ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues, 3952 adapter->max_num_io_queues); 3953 3954 #ifdef DEV_NETMAP 3955 netmap_detach(adapter->ifp); 3956 #endif /* DEV_NETMAP */ 3957 3958 ena_free_stats(adapter); 3959 3960 rc = ena_free_rx_dma_tag(adapter); 3961 if (unlikely(rc != 0)) 3962 ena_log(adapter->pdev, WARN, 3963 "Unmapped RX DMA tag associations\n"); 3964 3965 rc = ena_free_tx_dma_tag(adapter); 3966 if (unlikely(rc != 0)) 3967 ena_log(adapter->pdev, WARN, 3968 "Unmapped TX DMA tag associations\n"); 3969 3970 ena_free_irqs(adapter); 3971 3972 ena_free_pci_resources(adapter); 3973 3974 if (adapter->rss_indir != NULL) 3975 free(adapter->rss_indir, M_DEVBUF); 3976 3977 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) 3978 ena_com_rss_destroy(ena_dev); 3979 3980 ena_com_delete_host_info(ena_dev); 3981 3982 free(adapter->customer_metrics_array, M_DEVBUF); 3983 3984 ena_com_delete_customer_metrics_buffer(ena_dev); 3985 3986 if_free(adapter->ifp); 3987 3988 free(ena_dev->bus, M_DEVBUF); 3989 3990 free(ena_dev, M_DEVBUF); 3991 3992 return (bus_generic_detach(pdev)); 3993 } 3994 3995 /****************************************************************************** 3996 ******************************** AENQ Handlers ******************************* 3997 *****************************************************************************/ 3998 /** 3999 * ena_update_on_link_change: 4000 * Notify the network interface about the change in link status 4001 **/ 4002 static void 4003 ena_update_on_link_change(void *adapter_data, 4004 struct ena_admin_aenq_entry *aenq_e) 4005 { 4006 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4007 struct ena_admin_aenq_link_change_desc *aenq_desc; 4008 int status; 4009 if_t ifp; 4010 4011 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 4012 ifp = adapter->ifp; 4013 status = aenq_desc->flags & 4014 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; 4015 4016 if (status != 0) { 4017 ena_log(adapter->pdev, INFO, "link is UP\n"); 4018 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter); 4019 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter)) 4020 if_link_state_change(ifp, LINK_STATE_UP); 4021 } else { 4022 ena_log(adapter->pdev, INFO, "link is DOWN\n"); 4023 if_link_state_change(ifp, LINK_STATE_DOWN); 4024 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter); 4025 } 4026 } 4027 4028 static void 4029 ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 4030 { 4031 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4032 struct ena_admin_ena_hw_hints *hints; 4033 4034 ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, 4035 adapter->ena_dev, "Invalid group(%x) expected %x\n", 4036 aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION); 4037 4038 switch (aenq_e->aenq_common_desc.syndrome) { 4039 case ENA_ADMIN_UPDATE_HINTS: 4040 hints = 4041 (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4); 4042 ena_update_hints(adapter, hints); 4043 break; 4044 default: 4045 ena_log(adapter->pdev, ERR, 4046 "Invalid aenq notification link state %d\n", 4047 aenq_e->aenq_common_desc.syndrome); 4048 } 4049 } 4050 4051 static void 4052 ena_lock_init(void *arg) 4053 { 4054 ENA_LOCK_INIT(); 4055 } 4056 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL); 4057 4058 static void 4059 ena_lock_uninit(void *arg) 4060 { 4061 ENA_LOCK_DESTROY(); 4062 } 4063 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL); 4064 4065 /** 4066 * This handler will called for unknown event group or unimplemented handlers 4067 **/ 4068 static void 4069 unimplemented_aenq_handler(void *adapter_data, 4070 struct ena_admin_aenq_entry *aenq_e) 4071 { 4072 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4073 4074 ena_log(adapter->pdev, ERR, 4075 "Unknown event was received or event with unimplemented handler\n"); 4076 } 4077 4078 static void ena_conf_notification(void *adapter_data, 4079 struct ena_admin_aenq_entry *aenq_e) 4080 { 4081 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4082 struct ena_admin_aenq_conf_notifications_desc *desc; 4083 u64 bitmap, bit; 4084 4085 desc = (struct ena_admin_aenq_conf_notifications_desc *)aenq_e; 4086 bitmap = desc->notifications_bitmap; 4087 4088 if (bitmap == 0) { 4089 ena_log(adapter->pdev, INFO, 4090 "Empty configuration notification bitmap\n"); 4091 return; 4092 } 4093 4094 for (bit = ffsll(bitmap); bit != 0; bit = ffsll(bitmap)) { 4095 bit--; 4096 ena_log(adapter->pdev, INFO, 4097 "Sub-optimal configuration notification code: %" PRIu64 " Refer to AWS ENA documentation for additional details and mitigation options.\n", 4098 bit + 1); 4099 // Clear the processed bit 4100 bitmap &= ~(1UL << bit); 4101 } 4102 } 4103 4104 static struct ena_aenq_handlers aenq_handlers = { 4105 .handlers = { 4106 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4107 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4108 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, 4109 [ENA_ADMIN_CONF_NOTIFICATIONS] = ena_conf_notification, 4110 }, 4111 .unimplemented_handler = unimplemented_aenq_handler 4112 }; 4113 4114 /********************************************************************* 4115 * FreeBSD Device Interface Entry Points 4116 *********************************************************************/ 4117 4118 static device_method_t ena_methods[] = { /* Device interface */ 4119 DEVMETHOD(device_probe, ena_probe), 4120 DEVMETHOD(device_attach, ena_attach), 4121 DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END 4122 }; 4123 4124 static driver_t ena_driver = { 4125 "ena", 4126 ena_methods, 4127 sizeof(struct ena_adapter), 4128 }; 4129 4130 DRIVER_MODULE(ena, pci, ena_driver, 0, 0); 4131 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array, 4132 nitems(ena_vendor_info_array) - 1); 4133 MODULE_DEPEND(ena, pci, 1, 1, 1); 4134 MODULE_DEPEND(ena, ether, 1, 1, 1); 4135 #ifdef DEV_NETMAP 4136 MODULE_DEPEND(ena, netmap, 1, 1, 1); 4137 #endif /* DEV_NETMAP */ 4138 4139 /*********************************************************************/ 4140