1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include "opt_rss.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bus.h> 36 #include <sys/endian.h> 37 #include <sys/eventhandler.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/smp.h> 45 #include <sys/socket.h> 46 #include <sys/sockio.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 54 #include <machine/atomic.h> 55 #include <machine/bus.h> 56 #include <machine/in_cksum.h> 57 #include <machine/resource.h> 58 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 62 #include <net/bpf.h> 63 #include <net/ethernet.h> 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_media.h> 68 #include <net/if_types.h> 69 #include <net/if_var.h> 70 #include <net/if_vlan_var.h> 71 #include <netinet/in.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/if_ether.h> 74 #include <netinet/ip.h> 75 #include <netinet/ip6.h> 76 #include <netinet/tcp.h> 77 #include <netinet/udp.h> 78 79 #include "ena.h" 80 #include "ena_datapath.h" 81 #include "ena_rss.h" 82 #include "ena_sysctl.h" 83 84 #ifdef DEV_NETMAP 85 #include "ena_netmap.h" 86 #endif /* DEV_NETMAP */ 87 88 /********************************************************* 89 * Function prototypes 90 *********************************************************/ 91 static int ena_probe(device_t); 92 static void ena_intr_msix_mgmnt(void *); 93 static void ena_free_pci_resources(struct ena_adapter *); 94 static int ena_change_mtu(if_t, int); 95 static inline void ena_alloc_counters(counter_u64_t *, int); 96 static inline void ena_free_counters(counter_u64_t *, int); 97 static inline void ena_reset_counters(counter_u64_t *, int); 98 static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *, 99 uint16_t); 100 static void ena_init_io_rings_basic(struct ena_adapter *); 101 static void ena_init_io_rings_advanced(struct ena_adapter *); 102 static void ena_init_io_rings(struct ena_adapter *); 103 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int); 104 static void ena_free_all_io_rings_resources(struct ena_adapter *); 105 static int ena_setup_tx_dma_tag(struct ena_adapter *); 106 static int ena_free_tx_dma_tag(struct ena_adapter *); 107 static int ena_setup_rx_dma_tag(struct ena_adapter *); 108 static int ena_free_rx_dma_tag(struct ena_adapter *); 109 static void ena_release_all_tx_dmamap(struct ena_ring *); 110 static int ena_setup_tx_resources(struct ena_adapter *, int); 111 static void ena_free_tx_resources(struct ena_adapter *, int); 112 static int ena_setup_all_tx_resources(struct ena_adapter *); 113 static void ena_free_all_tx_resources(struct ena_adapter *); 114 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int); 115 static void ena_free_rx_resources(struct ena_adapter *, unsigned int); 116 static int ena_setup_all_rx_resources(struct ena_adapter *); 117 static void ena_free_all_rx_resources(struct ena_adapter *); 118 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *, 119 struct ena_rx_buffer *); 120 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *, 121 struct ena_rx_buffer *); 122 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int); 123 static void ena_refill_all_rx_bufs(struct ena_adapter *); 124 static void ena_free_all_rx_bufs(struct ena_adapter *); 125 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int); 126 static void ena_free_all_tx_bufs(struct ena_adapter *); 127 static void ena_destroy_all_tx_queues(struct ena_adapter *); 128 static void ena_destroy_all_rx_queues(struct ena_adapter *); 129 static void ena_destroy_all_io_queues(struct ena_adapter *); 130 static int ena_create_io_queues(struct ena_adapter *); 131 static int ena_handle_msix(void *); 132 static int ena_enable_msix(struct ena_adapter *); 133 static void ena_setup_mgmnt_intr(struct ena_adapter *); 134 static int ena_setup_io_intr(struct ena_adapter *); 135 static int ena_request_mgmnt_irq(struct ena_adapter *); 136 static int ena_request_io_irq(struct ena_adapter *); 137 static void ena_free_mgmnt_irq(struct ena_adapter *); 138 static void ena_free_io_irq(struct ena_adapter *); 139 static void ena_free_irqs(struct ena_adapter *); 140 static void ena_disable_msix(struct ena_adapter *); 141 static void ena_unmask_all_io_irqs(struct ena_adapter *); 142 static int ena_up_complete(struct ena_adapter *); 143 static uint64_t ena_get_counter(if_t, ift_counter); 144 static int ena_media_change(if_t); 145 static void ena_media_status(if_t, struct ifmediareq *); 146 static void ena_init(void *); 147 static int ena_ioctl(if_t, u_long, caddr_t); 148 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *); 149 static void ena_update_host_info(struct ena_admin_host_info *, if_t); 150 static void ena_update_hwassist(struct ena_adapter *); 151 static int ena_setup_ifnet(device_t, struct ena_adapter *, 152 struct ena_com_dev_get_features_ctx *); 153 static int ena_enable_wc(device_t, struct resource *); 154 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *, 155 struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *); 156 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *); 157 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *, 158 struct ena_com_dev_get_features_ctx *); 159 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *); 160 static void ena_config_host_info(struct ena_com_dev *, device_t); 161 static int ena_attach(device_t); 162 static int ena_detach(device_t); 163 static int ena_device_init(struct ena_adapter *, device_t, 164 struct ena_com_dev_get_features_ctx *, int *); 165 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *); 166 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *); 167 static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *); 168 static int ena_copy_eni_metrics(struct ena_adapter *); 169 static int ena_copy_srd_metrics(struct ena_adapter *); 170 static int ena_copy_customer_metrics(struct ena_adapter *); 171 static void ena_timer_service(void *); 172 173 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME 174 " v" ENA_DRV_MODULE_VERSION; 175 176 static ena_vendor_info_t ena_vendor_info_array[] = { 177 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 }, 178 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 }, 179 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 }, 180 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 }, 181 /* Last entry */ 182 { 0, 0, 0 } 183 }; 184 185 struct sx ena_global_lock; 186 187 /* 188 * Contains pointers to event handlers, e.g. link state chage. 189 */ 190 static struct ena_aenq_handlers aenq_handlers; 191 192 void 193 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 194 { 195 if (error != 0) 196 return; 197 *(bus_addr_t *)arg = segs[0].ds_addr; 198 } 199 200 int 201 ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma, 202 int mapflags, bus_size_t alignment, int domain) 203 { 204 struct ena_adapter *adapter = device_get_softc(dmadev); 205 device_t pdev = adapter->pdev; 206 uint32_t maxsize; 207 uint64_t dma_space_addr; 208 int error; 209 210 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE; 211 212 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width); 213 if (unlikely(dma_space_addr == 0)) 214 dma_space_addr = BUS_SPACE_MAXADDR; 215 216 error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */ 217 alignment, 0, /* alignment, bounds */ 218 dma_space_addr, /* lowaddr of exclusion window */ 219 BUS_SPACE_MAXADDR, /* highaddr of exclusion window */ 220 NULL, NULL, /* filter, filterarg */ 221 maxsize, /* maxsize */ 222 1, /* nsegments */ 223 maxsize, /* maxsegsize */ 224 BUS_DMA_ALLOCNOW, /* flags */ 225 NULL, /* lockfunc */ 226 NULL, /* lockarg */ 227 &dma->tag); 228 if (unlikely(error != 0)) { 229 ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error); 230 goto fail_tag; 231 } 232 233 error = bus_dma_tag_set_domain(dma->tag, domain); 234 if (unlikely(error != 0)) { 235 ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n", 236 error); 237 goto fail_map_create; 238 } 239 240 error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, 241 BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map); 242 if (unlikely(error != 0)) { 243 ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n", 244 (uintmax_t)size, error); 245 goto fail_map_create; 246 } 247 248 dma->paddr = 0; 249 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, 250 ena_dmamap_callback, &dma->paddr, mapflags); 251 if (unlikely((error != 0) || (dma->paddr == 0))) { 252 ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error); 253 goto fail_map_load; 254 } 255 256 bus_dmamap_sync(dma->tag, dma->map, 257 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 258 259 return (0); 260 261 fail_map_load: 262 bus_dmamem_free(dma->tag, dma->vaddr, dma->map); 263 fail_map_create: 264 bus_dma_tag_destroy(dma->tag); 265 fail_tag: 266 dma->tag = NULL; 267 dma->vaddr = NULL; 268 dma->paddr = 0; 269 270 return (error); 271 } 272 273 static void 274 ena_free_pci_resources(struct ena_adapter *adapter) 275 { 276 device_t pdev = adapter->pdev; 277 278 if (adapter->memory != NULL) { 279 bus_release_resource(pdev, SYS_RES_MEMORY, 280 PCIR_BAR(ENA_MEM_BAR), adapter->memory); 281 } 282 283 if (adapter->registers != NULL) { 284 bus_release_resource(pdev, SYS_RES_MEMORY, 285 PCIR_BAR(ENA_REG_BAR), adapter->registers); 286 } 287 288 if (adapter->msix != NULL) { 289 bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid, 290 adapter->msix); 291 } 292 } 293 294 static int 295 ena_probe(device_t dev) 296 { 297 ena_vendor_info_t *ent; 298 uint16_t pci_vendor_id = 0; 299 uint16_t pci_device_id = 0; 300 301 pci_vendor_id = pci_get_vendor(dev); 302 pci_device_id = pci_get_device(dev); 303 304 ent = ena_vendor_info_array; 305 while (ent->vendor_id != 0) { 306 if ((pci_vendor_id == ent->vendor_id) && 307 (pci_device_id == ent->device_id)) { 308 ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id, 309 pci_device_id); 310 311 device_set_desc(dev, ENA_DEVICE_DESC); 312 return (BUS_PROBE_DEFAULT); 313 } 314 315 ent++; 316 } 317 318 return (ENXIO); 319 } 320 321 static int 322 ena_change_mtu(if_t ifp, int new_mtu) 323 { 324 struct ena_adapter *adapter = if_getsoftc(ifp); 325 device_t pdev = adapter->pdev; 326 int rc; 327 328 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { 329 ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n", 330 new_mtu, adapter->max_mtu, ENA_MIN_MTU); 331 return (EINVAL); 332 } 333 334 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); 335 if (likely(rc == 0)) { 336 ena_log(pdev, DBG, "set MTU to %d\n", new_mtu); 337 if_setmtu(ifp, new_mtu); 338 } else { 339 ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu); 340 } 341 342 return (rc); 343 } 344 345 static inline void 346 ena_alloc_counters(counter_u64_t *begin, int size) 347 { 348 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 349 350 for (; begin < end; ++begin) 351 *begin = counter_u64_alloc(M_WAITOK); 352 } 353 354 static inline void 355 ena_free_counters(counter_u64_t *begin, int size) 356 { 357 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 358 359 for (; begin < end; ++begin) 360 counter_u64_free(*begin); 361 } 362 363 static inline void 364 ena_reset_counters(counter_u64_t *begin, int size) 365 { 366 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 367 368 for (; begin < end; ++begin) 369 counter_u64_zero(*begin); 370 } 371 372 static void 373 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring, 374 uint16_t qid) 375 { 376 ring->qid = qid; 377 ring->adapter = adapter; 378 ring->ena_dev = adapter->ena_dev; 379 atomic_store_8(&ring->first_interrupt, 0); 380 ring->no_interrupt_event_cnt = 0; 381 } 382 383 static void 384 ena_init_io_rings_basic(struct ena_adapter *adapter) 385 { 386 struct ena_com_dev *ena_dev; 387 struct ena_ring *txr, *rxr; 388 struct ena_que *que; 389 int i; 390 391 ena_dev = adapter->ena_dev; 392 393 for (i = 0; i < adapter->num_io_queues; i++) { 394 txr = &adapter->tx_ring[i]; 395 rxr = &adapter->rx_ring[i]; 396 397 /* TX/RX common ring state */ 398 ena_init_io_rings_common(adapter, txr, i); 399 ena_init_io_rings_common(adapter, rxr, i); 400 401 /* TX specific ring state */ 402 txr->tx_max_header_size = ena_dev->tx_max_header_size; 403 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; 404 405 que = &adapter->que[i]; 406 que->adapter = adapter; 407 que->id = i; 408 que->tx_ring = txr; 409 que->rx_ring = rxr; 410 411 txr->que = que; 412 rxr->que = que; 413 414 rxr->empty_rx_queue = 0; 415 rxr->rx_mbuf_sz = ena_mbuf_sz; 416 } 417 } 418 419 static void 420 ena_init_io_rings_advanced(struct ena_adapter *adapter) 421 { 422 struct ena_ring *txr, *rxr; 423 int i; 424 425 for (i = 0; i < adapter->num_io_queues; i++) { 426 txr = &adapter->tx_ring[i]; 427 rxr = &adapter->rx_ring[i]; 428 429 /* Allocate a buf ring */ 430 txr->buf_ring_size = adapter->buf_ring_size; 431 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK, 432 &txr->ring_mtx); 433 434 /* Allocate Tx statistics. */ 435 ena_alloc_counters((counter_u64_t *)&txr->tx_stats, 436 sizeof(txr->tx_stats)); 437 txr->tx_last_cleanup_ticks = ticks; 438 439 /* Allocate Rx statistics. */ 440 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats, 441 sizeof(rxr->rx_stats)); 442 443 /* Initialize locks */ 444 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)", 445 device_get_nameunit(adapter->pdev), i); 446 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)", 447 device_get_nameunit(adapter->pdev), i); 448 449 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); 450 } 451 } 452 453 static void 454 ena_init_io_rings(struct ena_adapter *adapter) 455 { 456 /* 457 * IO rings initialization can be divided into the 2 steps: 458 * 1. Initialize variables and fields with initial values and copy 459 * them from adapter/ena_dev (basic) 460 * 2. Allocate mutex, counters and buf_ring (advanced) 461 */ 462 ena_init_io_rings_basic(adapter); 463 ena_init_io_rings_advanced(adapter); 464 } 465 466 static void 467 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid) 468 { 469 struct ena_ring *txr = &adapter->tx_ring[qid]; 470 struct ena_ring *rxr = &adapter->rx_ring[qid]; 471 472 ena_free_counters((counter_u64_t *)&txr->tx_stats, 473 sizeof(txr->tx_stats)); 474 ena_free_counters((counter_u64_t *)&rxr->rx_stats, 475 sizeof(rxr->rx_stats)); 476 477 ENA_RING_MTX_LOCK(txr); 478 drbr_free(txr->br, M_DEVBUF); 479 ENA_RING_MTX_UNLOCK(txr); 480 481 mtx_destroy(&txr->ring_mtx); 482 } 483 484 static void 485 ena_free_all_io_rings_resources(struct ena_adapter *adapter) 486 { 487 int i; 488 489 for (i = 0; i < adapter->num_io_queues; i++) 490 ena_free_io_ring_resources(adapter, i); 491 } 492 493 static int 494 ena_setup_tx_dma_tag(struct ena_adapter *adapter) 495 { 496 int ret; 497 498 /* Create DMA tag for Tx buffers */ 499 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), 500 1, 0, /* alignment, bounds */ 501 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 502 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 503 NULL, NULL, /* filter, filterarg */ 504 ENA_TSO_MAXSIZE, /* maxsize */ 505 adapter->max_tx_sgl_size - 1, /* nsegments */ 506 ENA_TSO_MAXSIZE, /* maxsegsize */ 507 0, /* flags */ 508 NULL, /* lockfunc */ 509 NULL, /* lockfuncarg */ 510 &adapter->tx_buf_tag); 511 512 return (ret); 513 } 514 515 static int 516 ena_free_tx_dma_tag(struct ena_adapter *adapter) 517 { 518 int ret; 519 520 ret = bus_dma_tag_destroy(adapter->tx_buf_tag); 521 522 if (likely(ret == 0)) 523 adapter->tx_buf_tag = NULL; 524 525 return (ret); 526 } 527 528 static int 529 ena_setup_rx_dma_tag(struct ena_adapter *adapter) 530 { 531 int ret; 532 533 /* Create DMA tag for Rx buffers*/ 534 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */ 535 1, 0, /* alignment, bounds */ 536 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 537 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 538 NULL, NULL, /* filter, filterarg */ 539 ena_mbuf_sz, /* maxsize */ 540 adapter->max_rx_sgl_size, /* nsegments */ 541 ena_mbuf_sz, /* maxsegsize */ 542 0, /* flags */ 543 NULL, /* lockfunc */ 544 NULL, /* lockarg */ 545 &adapter->rx_buf_tag); 546 547 return (ret); 548 } 549 550 static int 551 ena_free_rx_dma_tag(struct ena_adapter *adapter) 552 { 553 int ret; 554 555 ret = bus_dma_tag_destroy(adapter->rx_buf_tag); 556 557 if (likely(ret == 0)) 558 adapter->rx_buf_tag = NULL; 559 560 return (ret); 561 } 562 563 static void 564 ena_release_all_tx_dmamap(struct ena_ring *tx_ring) 565 { 566 struct ena_adapter *adapter = tx_ring->adapter; 567 struct ena_tx_buffer *tx_info; 568 bus_dma_tag_t tx_tag = adapter->tx_buf_tag; 569 int i; 570 #ifdef DEV_NETMAP 571 struct ena_netmap_tx_info *nm_info; 572 int j; 573 #endif /* DEV_NETMAP */ 574 575 for (i = 0; i < tx_ring->ring_size; ++i) { 576 tx_info = &tx_ring->tx_buffer_info[i]; 577 #ifdef DEV_NETMAP 578 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 579 nm_info = &tx_info->nm_info; 580 for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) { 581 if (nm_info->map_seg[j] != NULL) { 582 bus_dmamap_destroy(tx_tag, 583 nm_info->map_seg[j]); 584 nm_info->map_seg[j] = NULL; 585 } 586 } 587 } 588 #endif /* DEV_NETMAP */ 589 if (tx_info->dmamap != NULL) { 590 bus_dmamap_destroy(tx_tag, tx_info->dmamap); 591 tx_info->dmamap = NULL; 592 } 593 } 594 } 595 596 /** 597 * ena_setup_tx_resources - allocate Tx resources (Descriptors) 598 * @adapter: network interface device structure 599 * @qid: queue index 600 * 601 * Returns 0 on success, otherwise on failure. 602 **/ 603 static int 604 ena_setup_tx_resources(struct ena_adapter *adapter, int qid) 605 { 606 device_t pdev = adapter->pdev; 607 char thread_name[MAXCOMLEN + 1]; 608 struct ena_que *que = &adapter->que[qid]; 609 struct ena_ring *tx_ring = que->tx_ring; 610 cpuset_t *cpu_mask = NULL; 611 int size, i, err; 612 #ifdef DEV_NETMAP 613 bus_dmamap_t *map; 614 int j; 615 616 ena_netmap_reset_tx_ring(adapter, qid); 617 #endif /* DEV_NETMAP */ 618 619 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; 620 621 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 622 if (unlikely(tx_ring->tx_buffer_info == NULL)) 623 return (ENOMEM); 624 625 size = sizeof(uint16_t) * tx_ring->ring_size; 626 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 627 if (unlikely(tx_ring->free_tx_ids == NULL)) 628 goto err_buf_info_free; 629 630 size = tx_ring->tx_max_header_size; 631 tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF, 632 M_NOWAIT | M_ZERO); 633 if (unlikely(tx_ring->push_buf_intermediate_buf == NULL)) 634 goto err_tx_ids_free; 635 636 /* Req id stack for TX OOO completions */ 637 for (i = 0; i < tx_ring->ring_size; i++) 638 tx_ring->free_tx_ids[i] = i; 639 640 /* Reset TX statistics. */ 641 ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats, 642 sizeof(tx_ring->tx_stats)); 643 644 tx_ring->next_to_use = 0; 645 tx_ring->next_to_clean = 0; 646 tx_ring->acum_pkts = 0; 647 648 /* Make sure that drbr is empty */ 649 ENA_RING_MTX_LOCK(tx_ring); 650 drbr_flush(adapter->ifp, tx_ring->br); 651 ENA_RING_MTX_UNLOCK(tx_ring); 652 653 /* ... and create the buffer DMA maps */ 654 for (i = 0; i < tx_ring->ring_size; i++) { 655 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 656 &tx_ring->tx_buffer_info[i].dmamap); 657 if (unlikely(err != 0)) { 658 ena_log(pdev, ERR, 659 "Unable to create Tx DMA map for buffer %d\n", i); 660 goto err_map_release; 661 } 662 663 #ifdef DEV_NETMAP 664 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 665 map = tx_ring->tx_buffer_info[i].nm_info.map_seg; 666 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 667 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 668 &map[j]); 669 if (unlikely(err != 0)) { 670 ena_log(pdev, ERR, 671 "Unable to create Tx DMA for buffer %d %d\n", 672 i, j); 673 goto err_map_release; 674 } 675 } 676 } 677 #endif /* DEV_NETMAP */ 678 } 679 680 /* Allocate taskqueues */ 681 TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring); 682 tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT, 683 taskqueue_thread_enqueue, &tx_ring->enqueue_tq); 684 if (unlikely(tx_ring->enqueue_tq == NULL)) { 685 ena_log(pdev, ERR, 686 "Unable to create taskqueue for enqueue task\n"); 687 i = tx_ring->ring_size; 688 goto err_map_release; 689 } 690 691 tx_ring->running = true; 692 693 #ifdef RSS 694 cpu_mask = &que->cpu_mask; 695 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 696 device_get_nameunit(adapter->pdev), que->cpu); 697 #else 698 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 699 device_get_nameunit(adapter->pdev), que->id); 700 #endif 701 taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET, 702 cpu_mask, "%s", thread_name); 703 704 return (0); 705 706 err_map_release: 707 ena_release_all_tx_dmamap(tx_ring); 708 err_tx_ids_free: 709 free(tx_ring->free_tx_ids, M_DEVBUF); 710 tx_ring->free_tx_ids = NULL; 711 err_buf_info_free: 712 free(tx_ring->tx_buffer_info, M_DEVBUF); 713 tx_ring->tx_buffer_info = NULL; 714 715 return (ENOMEM); 716 } 717 718 /** 719 * ena_free_tx_resources - Free Tx Resources per Queue 720 * @adapter: network interface device structure 721 * @qid: queue index 722 * 723 * Free all transmit software resources 724 **/ 725 static void 726 ena_free_tx_resources(struct ena_adapter *adapter, int qid) 727 { 728 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 729 #ifdef DEV_NETMAP 730 struct ena_netmap_tx_info *nm_info; 731 int j; 732 #endif /* DEV_NETMAP */ 733 734 while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL)) 735 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 736 737 taskqueue_free(tx_ring->enqueue_tq); 738 739 ENA_RING_MTX_LOCK(tx_ring); 740 /* Flush buffer ring, */ 741 drbr_flush(adapter->ifp, tx_ring->br); 742 743 /* Free buffer DMA maps, */ 744 for (int i = 0; i < tx_ring->ring_size; i++) { 745 bus_dmamap_sync(adapter->tx_buf_tag, 746 tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE); 747 bus_dmamap_unload(adapter->tx_buf_tag, 748 tx_ring->tx_buffer_info[i].dmamap); 749 bus_dmamap_destroy(adapter->tx_buf_tag, 750 tx_ring->tx_buffer_info[i].dmamap); 751 752 #ifdef DEV_NETMAP 753 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 754 nm_info = &tx_ring->tx_buffer_info[i].nm_info; 755 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 756 if (nm_info->socket_buf_idx[j] != 0) { 757 bus_dmamap_sync(adapter->tx_buf_tag, 758 nm_info->map_seg[j], 759 BUS_DMASYNC_POSTWRITE); 760 ena_netmap_unload(adapter, 761 nm_info->map_seg[j]); 762 } 763 bus_dmamap_destroy(adapter->tx_buf_tag, 764 nm_info->map_seg[j]); 765 nm_info->socket_buf_idx[j] = 0; 766 } 767 } 768 #endif /* DEV_NETMAP */ 769 770 m_freem(tx_ring->tx_buffer_info[i].mbuf); 771 tx_ring->tx_buffer_info[i].mbuf = NULL; 772 } 773 ENA_RING_MTX_UNLOCK(tx_ring); 774 775 /* And free allocated memory. */ 776 free(tx_ring->tx_buffer_info, M_DEVBUF); 777 tx_ring->tx_buffer_info = NULL; 778 779 free(tx_ring->free_tx_ids, M_DEVBUF); 780 tx_ring->free_tx_ids = NULL; 781 782 free(tx_ring->push_buf_intermediate_buf, M_DEVBUF); 783 tx_ring->push_buf_intermediate_buf = NULL; 784 } 785 786 /** 787 * ena_setup_all_tx_resources - allocate all queues Tx resources 788 * @adapter: network interface device structure 789 * 790 * Returns 0 on success, otherwise on failure. 791 **/ 792 static int 793 ena_setup_all_tx_resources(struct ena_adapter *adapter) 794 { 795 int i, rc; 796 797 for (i = 0; i < adapter->num_io_queues; i++) { 798 rc = ena_setup_tx_resources(adapter, i); 799 if (rc != 0) { 800 ena_log(adapter->pdev, ERR, 801 "Allocation for Tx Queue %u failed\n", i); 802 goto err_setup_tx; 803 } 804 } 805 806 return (0); 807 808 err_setup_tx: 809 /* Rewind the index freeing the rings as we go */ 810 while (i--) 811 ena_free_tx_resources(adapter, i); 812 return (rc); 813 } 814 815 /** 816 * ena_free_all_tx_resources - Free Tx Resources for All Queues 817 * @adapter: network interface device structure 818 * 819 * Free all transmit software resources 820 **/ 821 static void 822 ena_free_all_tx_resources(struct ena_adapter *adapter) 823 { 824 int i; 825 826 for (i = 0; i < adapter->num_io_queues; i++) 827 ena_free_tx_resources(adapter, i); 828 } 829 830 /** 831 * ena_setup_rx_resources - allocate Rx resources (Descriptors) 832 * @adapter: network interface device structure 833 * @qid: queue index 834 * 835 * Returns 0 on success, otherwise on failure. 836 **/ 837 static int 838 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid) 839 { 840 device_t pdev = adapter->pdev; 841 struct ena_que *que = &adapter->que[qid]; 842 struct ena_ring *rx_ring = que->rx_ring; 843 int size, err, i; 844 845 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size; 846 847 #ifdef DEV_NETMAP 848 ena_netmap_reset_rx_ring(adapter, qid); 849 rx_ring->initialized = false; 850 #endif /* DEV_NETMAP */ 851 852 /* 853 * Alloc extra element so in rx path 854 * we can always prefetch rx_info + 1 855 */ 856 size += sizeof(struct ena_rx_buffer); 857 858 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); 859 860 size = sizeof(uint16_t) * rx_ring->ring_size; 861 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK); 862 863 for (i = 0; i < rx_ring->ring_size; i++) 864 rx_ring->free_rx_ids[i] = i; 865 866 /* Reset RX statistics. */ 867 ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats, 868 sizeof(rx_ring->rx_stats)); 869 870 rx_ring->next_to_clean = 0; 871 rx_ring->next_to_use = 0; 872 873 /* ... and create the buffer DMA maps */ 874 for (i = 0; i < rx_ring->ring_size; i++) { 875 err = bus_dmamap_create(adapter->rx_buf_tag, 0, 876 &(rx_ring->rx_buffer_info[i].map)); 877 if (err != 0) { 878 ena_log(pdev, ERR, 879 "Unable to create Rx DMA map for buffer %d\n", i); 880 goto err_buf_info_unmap; 881 } 882 } 883 884 /* Create LRO for the ring */ 885 if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) { 886 int err = tcp_lro_init(&rx_ring->lro); 887 if (err != 0) { 888 ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n", 889 qid); 890 } else { 891 ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n", 892 qid); 893 rx_ring->lro.ifp = adapter->ifp; 894 } 895 } 896 897 return (0); 898 899 err_buf_info_unmap: 900 while (i--) { 901 bus_dmamap_destroy(adapter->rx_buf_tag, 902 rx_ring->rx_buffer_info[i].map); 903 } 904 905 free(rx_ring->free_rx_ids, M_DEVBUF); 906 rx_ring->free_rx_ids = NULL; 907 free(rx_ring->rx_buffer_info, M_DEVBUF); 908 rx_ring->rx_buffer_info = NULL; 909 return (ENOMEM); 910 } 911 912 /** 913 * ena_free_rx_resources - Free Rx Resources 914 * @adapter: network interface device structure 915 * @qid: queue index 916 * 917 * Free all receive software resources 918 **/ 919 static void 920 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid) 921 { 922 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 923 924 /* Free buffer DMA maps, */ 925 for (int i = 0; i < rx_ring->ring_size; i++) { 926 bus_dmamap_sync(adapter->rx_buf_tag, 927 rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD); 928 m_freem(rx_ring->rx_buffer_info[i].mbuf); 929 rx_ring->rx_buffer_info[i].mbuf = NULL; 930 bus_dmamap_unload(adapter->rx_buf_tag, 931 rx_ring->rx_buffer_info[i].map); 932 bus_dmamap_destroy(adapter->rx_buf_tag, 933 rx_ring->rx_buffer_info[i].map); 934 } 935 936 /* free LRO resources, */ 937 tcp_lro_free(&rx_ring->lro); 938 939 /* free allocated memory */ 940 free(rx_ring->rx_buffer_info, M_DEVBUF); 941 rx_ring->rx_buffer_info = NULL; 942 943 free(rx_ring->free_rx_ids, M_DEVBUF); 944 rx_ring->free_rx_ids = NULL; 945 } 946 947 /** 948 * ena_setup_all_rx_resources - allocate all queues Rx resources 949 * @adapter: network interface device structure 950 * 951 * Returns 0 on success, otherwise on failure. 952 **/ 953 static int 954 ena_setup_all_rx_resources(struct ena_adapter *adapter) 955 { 956 int i, rc = 0; 957 958 for (i = 0; i < adapter->num_io_queues; i++) { 959 rc = ena_setup_rx_resources(adapter, i); 960 if (rc != 0) { 961 ena_log(adapter->pdev, ERR, 962 "Allocation for Rx Queue %u failed\n", i); 963 goto err_setup_rx; 964 } 965 } 966 return (0); 967 968 err_setup_rx: 969 /* rewind the index freeing the rings as we go */ 970 while (i--) 971 ena_free_rx_resources(adapter, i); 972 return (rc); 973 } 974 975 /** 976 * ena_free_all_rx_resources - Free Rx resources for all queues 977 * @adapter: network interface device structure 978 * 979 * Free all receive software resources 980 **/ 981 static void 982 ena_free_all_rx_resources(struct ena_adapter *adapter) 983 { 984 int i; 985 986 for (i = 0; i < adapter->num_io_queues; i++) 987 ena_free_rx_resources(adapter, i); 988 } 989 990 static inline int 991 ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 992 struct ena_rx_buffer *rx_info) 993 { 994 device_t pdev = adapter->pdev; 995 struct ena_com_buf *ena_buf; 996 bus_dma_segment_t segs[1]; 997 int nsegs, error; 998 int mlen; 999 1000 /* if previous allocated frag is not used */ 1001 if (unlikely(rx_info->mbuf != NULL)) 1002 return (0); 1003 1004 /* Get mbuf using UMA allocator */ 1005 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, 1006 rx_ring->rx_mbuf_sz); 1007 1008 if (unlikely(rx_info->mbuf == NULL)) { 1009 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1); 1010 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 1011 if (unlikely(rx_info->mbuf == NULL)) { 1012 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 1013 return (ENOMEM); 1014 } 1015 mlen = MCLBYTES; 1016 } else { 1017 mlen = rx_ring->rx_mbuf_sz; 1018 } 1019 /* Set mbuf length*/ 1020 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen; 1021 1022 /* Map packets for DMA */ 1023 ena_log(pdev, DBG, 1024 "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n", 1025 adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len); 1026 error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map, 1027 rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 1028 if (unlikely((error != 0) || (nsegs != 1))) { 1029 ena_log(pdev, WARN, 1030 "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs); 1031 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1); 1032 goto exit; 1033 } 1034 1035 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD); 1036 1037 ena_buf = &rx_info->ena_buf; 1038 ena_buf->paddr = segs[0].ds_addr; 1039 ena_buf->len = mlen; 1040 1041 ena_log(pdev, DBG, 1042 "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n", 1043 rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr); 1044 1045 return (0); 1046 1047 exit: 1048 m_freem(rx_info->mbuf); 1049 rx_info->mbuf = NULL; 1050 return (EFAULT); 1051 } 1052 1053 static void 1054 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 1055 struct ena_rx_buffer *rx_info) 1056 { 1057 if (rx_info->mbuf == NULL) { 1058 ena_log(adapter->pdev, WARN, 1059 "Trying to free unallocated buffer\n"); 1060 return; 1061 } 1062 1063 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 1064 BUS_DMASYNC_POSTREAD); 1065 bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map); 1066 m_freem(rx_info->mbuf); 1067 rx_info->mbuf = NULL; 1068 } 1069 1070 /** 1071 * ena_refill_rx_bufs - Refills ring with descriptors 1072 * @rx_ring: the ring which we want to feed with free descriptors 1073 * @num: number of descriptors to refill 1074 * Refills the ring with newly allocated DMA-mapped mbufs for receiving 1075 **/ 1076 int 1077 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) 1078 { 1079 struct ena_adapter *adapter = rx_ring->adapter; 1080 device_t pdev = adapter->pdev; 1081 uint16_t next_to_use, req_id; 1082 uint32_t i; 1083 int rc; 1084 1085 ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid); 1086 1087 next_to_use = rx_ring->next_to_use; 1088 1089 for (i = 0; i < num; i++) { 1090 struct ena_rx_buffer *rx_info; 1091 1092 ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n", 1093 next_to_use); 1094 1095 req_id = rx_ring->free_rx_ids[next_to_use]; 1096 rx_info = &rx_ring->rx_buffer_info[req_id]; 1097 #ifdef DEV_NETMAP 1098 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid)) 1099 rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, 1100 rx_info); 1101 else 1102 #endif /* DEV_NETMAP */ 1103 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); 1104 if (unlikely(rc != 0)) { 1105 ena_log_io(pdev, WARN, 1106 "failed to alloc buffer for rx queue %d\n", 1107 rx_ring->qid); 1108 break; 1109 } 1110 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, 1111 &rx_info->ena_buf, req_id); 1112 if (unlikely(rc != 0)) { 1113 ena_log_io(pdev, WARN, 1114 "failed to add buffer for rx queue %d\n", 1115 rx_ring->qid); 1116 break; 1117 } 1118 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, 1119 rx_ring->ring_size); 1120 } 1121 1122 if (unlikely(i < num)) { 1123 counter_u64_add(rx_ring->rx_stats.refil_partial, 1); 1124 ena_log_io(pdev, WARN, 1125 "refilled rx qid %d with only %d mbufs (from %d)\n", 1126 rx_ring->qid, i, num); 1127 } 1128 1129 if (likely(i != 0)) 1130 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); 1131 1132 rx_ring->next_to_use = next_to_use; 1133 return (i); 1134 } 1135 1136 int 1137 ena_update_buf_ring_size(struct ena_adapter *adapter, 1138 uint32_t new_buf_ring_size) 1139 { 1140 uint32_t old_buf_ring_size; 1141 int rc = 0; 1142 bool dev_was_up; 1143 1144 old_buf_ring_size = adapter->buf_ring_size; 1145 adapter->buf_ring_size = new_buf_ring_size; 1146 1147 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1148 ena_down(adapter); 1149 1150 /* Reconfigure buf ring for all Tx rings. */ 1151 ena_free_all_io_rings_resources(adapter); 1152 ena_init_io_rings_advanced(adapter); 1153 if (dev_was_up) { 1154 /* 1155 * If ena_up() fails, it's not because of recent buf_ring size 1156 * changes. Because of that, we just want to revert old drbr 1157 * value and trigger the reset because something else had to 1158 * go wrong. 1159 */ 1160 rc = ena_up(adapter); 1161 if (unlikely(rc != 0)) { 1162 ena_log(adapter->pdev, ERR, 1163 "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n", 1164 new_buf_ring_size, old_buf_ring_size); 1165 1166 /* Revert old size and trigger the reset */ 1167 adapter->buf_ring_size = old_buf_ring_size; 1168 ena_free_all_io_rings_resources(adapter); 1169 ena_init_io_rings_advanced(adapter); 1170 1171 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, 1172 adapter); 1173 ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER); 1174 } 1175 } 1176 1177 return (rc); 1178 } 1179 1180 int 1181 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size, 1182 uint32_t new_rx_size) 1183 { 1184 uint32_t old_tx_size, old_rx_size; 1185 int rc = 0; 1186 bool dev_was_up; 1187 1188 old_tx_size = adapter->requested_tx_ring_size; 1189 old_rx_size = adapter->requested_rx_ring_size; 1190 adapter->requested_tx_ring_size = new_tx_size; 1191 adapter->requested_rx_ring_size = new_rx_size; 1192 1193 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1194 ena_down(adapter); 1195 1196 /* Configure queues with new size. */ 1197 ena_init_io_rings_basic(adapter); 1198 if (dev_was_up) { 1199 rc = ena_up(adapter); 1200 if (unlikely(rc != 0)) { 1201 ena_log(adapter->pdev, ERR, 1202 "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n", 1203 new_tx_size, new_rx_size, old_tx_size, old_rx_size); 1204 1205 /* Revert old size. */ 1206 adapter->requested_tx_ring_size = old_tx_size; 1207 adapter->requested_rx_ring_size = old_rx_size; 1208 ena_init_io_rings_basic(adapter); 1209 1210 /* And try again. */ 1211 rc = ena_up(adapter); 1212 if (unlikely(rc != 0)) { 1213 ena_log(adapter->pdev, ERR, 1214 "Failed to revert old queue sizes. Triggering device reset.\n"); 1215 /* 1216 * If we've failed again, something had to go 1217 * wrong. After reset, the device should try to 1218 * go up 1219 */ 1220 ENA_FLAG_SET_ATOMIC( 1221 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1222 ena_trigger_reset(adapter, 1223 ENA_REGS_RESET_OS_TRIGGER); 1224 } 1225 } 1226 } 1227 1228 return (rc); 1229 } 1230 1231 static void 1232 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num) 1233 { 1234 ena_free_all_io_rings_resources(adapter); 1235 /* Force indirection table to be reinitialized */ 1236 ena_com_rss_destroy(adapter->ena_dev); 1237 1238 adapter->num_io_queues = num; 1239 ena_init_io_rings(adapter); 1240 } 1241 1242 int 1243 ena_update_base_cpu(struct ena_adapter *adapter, int new_num) 1244 { 1245 int old_num; 1246 int rc = 0; 1247 bool dev_was_up; 1248 1249 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1250 old_num = adapter->irq_cpu_base; 1251 1252 ena_down(adapter); 1253 1254 adapter->irq_cpu_base = new_num; 1255 1256 if (dev_was_up) { 1257 rc = ena_up(adapter); 1258 if (unlikely(rc != 0)) { 1259 ena_log(adapter->pdev, ERR, 1260 "Failed to configure device %d IRQ base CPU. " 1261 "Reverting to previous value: %d\n", 1262 new_num, old_num); 1263 1264 adapter->irq_cpu_base = old_num; 1265 1266 rc = ena_up(adapter); 1267 if (unlikely(rc != 0)) { 1268 ena_log(adapter->pdev, ERR, 1269 "Failed to revert to previous setup." 1270 "Triggering device reset.\n"); 1271 ENA_FLAG_SET_ATOMIC( 1272 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1273 ena_trigger_reset(adapter, 1274 ENA_REGS_RESET_OS_TRIGGER); 1275 } 1276 } 1277 } 1278 return (rc); 1279 } 1280 1281 int 1282 ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num) 1283 { 1284 uint32_t old_num; 1285 int rc = 0; 1286 bool dev_was_up; 1287 1288 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1289 old_num = adapter->irq_cpu_stride; 1290 1291 ena_down(adapter); 1292 1293 adapter->irq_cpu_stride = new_num; 1294 1295 if (dev_was_up) { 1296 rc = ena_up(adapter); 1297 if (unlikely(rc != 0)) { 1298 ena_log(adapter->pdev, ERR, 1299 "Failed to configure device %d IRQ CPU stride. " 1300 "Reverting to previous value: %d\n", 1301 new_num, old_num); 1302 1303 adapter->irq_cpu_stride = old_num; 1304 1305 rc = ena_up(adapter); 1306 if (unlikely(rc != 0)) { 1307 ena_log(adapter->pdev, ERR, 1308 "Failed to revert to previous setup." 1309 "Triggering device reset.\n"); 1310 ENA_FLAG_SET_ATOMIC( 1311 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1312 ena_trigger_reset(adapter, 1313 ENA_REGS_RESET_OS_TRIGGER); 1314 } 1315 } 1316 } 1317 return (rc); 1318 } 1319 1320 /* Caller should sanitize new_num */ 1321 int 1322 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num) 1323 { 1324 uint32_t old_num; 1325 int rc = 0; 1326 bool dev_was_up; 1327 1328 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1329 old_num = adapter->num_io_queues; 1330 ena_down(adapter); 1331 1332 ena_update_io_rings(adapter, new_num); 1333 1334 if (dev_was_up) { 1335 rc = ena_up(adapter); 1336 if (unlikely(rc != 0)) { 1337 ena_log(adapter->pdev, ERR, 1338 "Failed to configure device with %u IO queues. " 1339 "Reverting to previous value: %u\n", 1340 new_num, old_num); 1341 1342 ena_update_io_rings(adapter, old_num); 1343 1344 rc = ena_up(adapter); 1345 if (unlikely(rc != 0)) { 1346 ena_log(adapter->pdev, ERR, 1347 "Failed to revert to previous setup IO " 1348 "queues. Triggering device reset.\n"); 1349 ENA_FLAG_SET_ATOMIC( 1350 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1351 ena_trigger_reset(adapter, 1352 ENA_REGS_RESET_OS_TRIGGER); 1353 } 1354 } 1355 } 1356 1357 return (rc); 1358 } 1359 1360 static void 1361 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid) 1362 { 1363 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 1364 unsigned int i; 1365 1366 for (i = 0; i < rx_ring->ring_size; i++) { 1367 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; 1368 1369 if (rx_info->mbuf != NULL) 1370 ena_free_rx_mbuf(adapter, rx_ring, rx_info); 1371 #ifdef DEV_NETMAP 1372 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) && 1373 (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) { 1374 if (rx_info->netmap_buf_idx != 0) 1375 ena_netmap_free_rx_slot(adapter, rx_ring, 1376 rx_info); 1377 } 1378 #endif /* DEV_NETMAP */ 1379 } 1380 } 1381 1382 /** 1383 * ena_refill_all_rx_bufs - allocate all queues Rx buffers 1384 * @adapter: network interface device structure 1385 * 1386 */ 1387 static void 1388 ena_refill_all_rx_bufs(struct ena_adapter *adapter) 1389 { 1390 struct ena_ring *rx_ring; 1391 int i, rc, bufs_num; 1392 1393 for (i = 0; i < adapter->num_io_queues; i++) { 1394 rx_ring = &adapter->rx_ring[i]; 1395 bufs_num = rx_ring->ring_size - 1; 1396 rc = ena_refill_rx_bufs(rx_ring, bufs_num); 1397 if (unlikely(rc != bufs_num)) 1398 ena_log_io(adapter->pdev, WARN, 1399 "refilling Queue %d failed. " 1400 "Allocated %d buffers from: %d\n", 1401 i, rc, bufs_num); 1402 #ifdef DEV_NETMAP 1403 rx_ring->initialized = true; 1404 #endif /* DEV_NETMAP */ 1405 } 1406 } 1407 1408 static void 1409 ena_free_all_rx_bufs(struct ena_adapter *adapter) 1410 { 1411 int i; 1412 1413 for (i = 0; i < adapter->num_io_queues; i++) 1414 ena_free_rx_bufs(adapter, i); 1415 } 1416 1417 /** 1418 * ena_free_tx_bufs - Free Tx Buffers per Queue 1419 * @adapter: network interface device structure 1420 * @qid: queue index 1421 **/ 1422 static void 1423 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) 1424 { 1425 bool print_once = true; 1426 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 1427 1428 ENA_RING_MTX_LOCK(tx_ring); 1429 for (int i = 0; i < tx_ring->ring_size; i++) { 1430 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; 1431 1432 if (tx_info->mbuf == NULL) 1433 continue; 1434 1435 if (print_once) { 1436 ena_log(adapter->pdev, WARN, 1437 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1438 i); 1439 print_once = false; 1440 } else { 1441 ena_log(adapter->pdev, DBG, 1442 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1443 i); 1444 } 1445 1446 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1447 BUS_DMASYNC_POSTWRITE); 1448 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1449 1450 m_free(tx_info->mbuf); 1451 tx_info->mbuf = NULL; 1452 } 1453 ENA_RING_MTX_UNLOCK(tx_ring); 1454 } 1455 1456 static void 1457 ena_free_all_tx_bufs(struct ena_adapter *adapter) 1458 { 1459 for (int i = 0; i < adapter->num_io_queues; i++) 1460 ena_free_tx_bufs(adapter, i); 1461 } 1462 1463 static void 1464 ena_destroy_all_tx_queues(struct ena_adapter *adapter) 1465 { 1466 uint16_t ena_qid; 1467 int i; 1468 1469 for (i = 0; i < adapter->num_io_queues; i++) { 1470 ena_qid = ENA_IO_TXQ_IDX(i); 1471 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1472 } 1473 } 1474 1475 static void 1476 ena_destroy_all_rx_queues(struct ena_adapter *adapter) 1477 { 1478 uint16_t ena_qid; 1479 int i; 1480 1481 for (i = 0; i < adapter->num_io_queues; i++) { 1482 ena_qid = ENA_IO_RXQ_IDX(i); 1483 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1484 } 1485 } 1486 1487 static void 1488 ena_destroy_all_io_queues(struct ena_adapter *adapter) 1489 { 1490 struct ena_que *queue; 1491 int i; 1492 1493 for (i = 0; i < adapter->num_io_queues; i++) { 1494 queue = &adapter->que[i]; 1495 while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL)) 1496 taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task); 1497 taskqueue_free(queue->cleanup_tq); 1498 } 1499 1500 ena_destroy_all_tx_queues(adapter); 1501 ena_destroy_all_rx_queues(adapter); 1502 } 1503 1504 static int 1505 ena_create_io_queues(struct ena_adapter *adapter) 1506 { 1507 struct ena_com_dev *ena_dev = adapter->ena_dev; 1508 struct ena_com_create_io_ctx ctx; 1509 struct ena_ring *ring; 1510 struct ena_que *queue; 1511 uint16_t ena_qid; 1512 uint32_t msix_vector; 1513 cpuset_t *cpu_mask = NULL; 1514 int rc, i; 1515 1516 /* Create TX queues */ 1517 for (i = 0; i < adapter->num_io_queues; i++) { 1518 msix_vector = ENA_IO_IRQ_IDX(i); 1519 ena_qid = ENA_IO_TXQ_IDX(i); 1520 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1521 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1522 ctx.queue_size = adapter->requested_tx_ring_size; 1523 ctx.msix_vector = msix_vector; 1524 ctx.qid = ena_qid; 1525 ctx.numa_node = adapter->que[i].domain; 1526 1527 rc = ena_com_create_io_queue(ena_dev, &ctx); 1528 if (rc != 0) { 1529 ena_log(adapter->pdev, ERR, 1530 "Failed to create io TX queue #%d rc: %d\n", i, rc); 1531 goto err_tx; 1532 } 1533 ring = &adapter->tx_ring[i]; 1534 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1535 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1536 if (rc != 0) { 1537 ena_log(adapter->pdev, ERR, 1538 "Failed to get TX queue handlers. TX queue num" 1539 " %d rc: %d\n", 1540 i, rc); 1541 ena_com_destroy_io_queue(ena_dev, ena_qid); 1542 goto err_tx; 1543 } 1544 1545 if (ctx.numa_node >= 0) { 1546 ena_com_update_numa_node(ring->ena_com_io_cq, 1547 ctx.numa_node); 1548 } 1549 } 1550 1551 /* Create RX queues */ 1552 for (i = 0; i < adapter->num_io_queues; i++) { 1553 msix_vector = ENA_IO_IRQ_IDX(i); 1554 ena_qid = ENA_IO_RXQ_IDX(i); 1555 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1556 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1557 ctx.queue_size = adapter->requested_rx_ring_size; 1558 ctx.msix_vector = msix_vector; 1559 ctx.qid = ena_qid; 1560 ctx.numa_node = adapter->que[i].domain; 1561 1562 rc = ena_com_create_io_queue(ena_dev, &ctx); 1563 if (unlikely(rc != 0)) { 1564 ena_log(adapter->pdev, ERR, 1565 "Failed to create io RX queue[%d] rc: %d\n", i, rc); 1566 goto err_rx; 1567 } 1568 1569 ring = &adapter->rx_ring[i]; 1570 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1571 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1572 if (unlikely(rc != 0)) { 1573 ena_log(adapter->pdev, ERR, 1574 "Failed to get RX queue handlers. RX queue num" 1575 " %d rc: %d\n", 1576 i, rc); 1577 ena_com_destroy_io_queue(ena_dev, ena_qid); 1578 goto err_rx; 1579 } 1580 1581 if (ctx.numa_node >= 0) { 1582 ena_com_update_numa_node(ring->ena_com_io_cq, 1583 ctx.numa_node); 1584 } 1585 } 1586 1587 for (i = 0; i < adapter->num_io_queues; i++) { 1588 queue = &adapter->que[i]; 1589 1590 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue); 1591 queue->cleanup_tq = taskqueue_create_fast("ena cleanup", 1592 M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq); 1593 1594 #ifdef RSS 1595 cpu_mask = &queue->cpu_mask; 1596 #endif 1597 taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET, 1598 cpu_mask, "%s queue %d cleanup", 1599 device_get_nameunit(adapter->pdev), i); 1600 } 1601 1602 return (0); 1603 1604 err_rx: 1605 while (i--) 1606 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); 1607 i = adapter->num_io_queues; 1608 err_tx: 1609 while (i--) 1610 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 1611 1612 return (ENXIO); 1613 } 1614 1615 /********************************************************************* 1616 * 1617 * MSIX & Interrupt Service routine 1618 * 1619 **********************************************************************/ 1620 1621 /** 1622 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue 1623 * @arg: interrupt number 1624 **/ 1625 static void 1626 ena_intr_msix_mgmnt(void *arg) 1627 { 1628 struct ena_adapter *adapter = (struct ena_adapter *)arg; 1629 1630 ena_com_admin_q_comp_intr_handler(adapter->ena_dev); 1631 if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) 1632 ena_com_aenq_intr_handler(adapter->ena_dev, arg); 1633 } 1634 1635 /** 1636 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx 1637 * @arg: queue 1638 **/ 1639 static int 1640 ena_handle_msix(void *arg) 1641 { 1642 struct ena_que *queue = arg; 1643 struct ena_adapter *adapter = queue->adapter; 1644 if_t ifp = adapter->ifp; 1645 1646 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 1647 return (FILTER_STRAY); 1648 1649 taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task); 1650 1651 return (FILTER_HANDLED); 1652 } 1653 1654 static int 1655 ena_enable_msix(struct ena_adapter *adapter) 1656 { 1657 device_t dev = adapter->pdev; 1658 int msix_vecs, msix_req; 1659 int i, rc = 0; 1660 1661 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 1662 ena_log(dev, ERR, "Error, MSI-X is already enabled\n"); 1663 return (EINVAL); 1664 } 1665 1666 /* Reserved the max msix vectors we might need */ 1667 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); 1668 1669 adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry), 1670 M_DEVBUF, M_WAITOK | M_ZERO); 1671 1672 ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs); 1673 1674 for (i = 0; i < msix_vecs; i++) { 1675 adapter->msix_entries[i].entry = i; 1676 /* Vectors must start from 1 */ 1677 adapter->msix_entries[i].vector = i + 1; 1678 } 1679 1680 msix_req = msix_vecs; 1681 rc = pci_alloc_msix(dev, &msix_vecs); 1682 if (unlikely(rc != 0)) { 1683 ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n", 1684 msix_vecs, rc); 1685 1686 rc = ENOSPC; 1687 goto err_msix_free; 1688 } 1689 1690 if (msix_vecs != msix_req) { 1691 if (msix_vecs == ENA_ADMIN_MSIX_VEC) { 1692 ena_log(dev, ERR, 1693 "Not enough number of MSI-x allocated: %d\n", 1694 msix_vecs); 1695 pci_release_msi(dev); 1696 rc = ENOSPC; 1697 goto err_msix_free; 1698 } 1699 ena_log(dev, ERR, 1700 "Enable only %d MSI-x (out of %d), reduce " 1701 "the number of queues\n", 1702 msix_vecs, msix_req); 1703 } 1704 1705 adapter->msix_vecs = msix_vecs; 1706 ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 1707 1708 return (0); 1709 1710 err_msix_free: 1711 free(adapter->msix_entries, M_DEVBUF); 1712 adapter->msix_entries = NULL; 1713 1714 return (rc); 1715 } 1716 1717 static void 1718 ena_setup_mgmnt_intr(struct ena_adapter *adapter) 1719 { 1720 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE, 1721 "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev)); 1722 /* 1723 * Handler is NULL on purpose, it will be set 1724 * when mgmnt interrupt is acquired 1725 */ 1726 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL; 1727 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; 1728 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = 1729 adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; 1730 } 1731 1732 static int 1733 ena_setup_io_intr(struct ena_adapter *adapter) 1734 { 1735 #ifdef RSS 1736 int num_buckets = rss_getnumbuckets(); 1737 static int last_bind = 0; 1738 int cur_bind; 1739 int idx; 1740 #endif 1741 int irq_idx; 1742 1743 if (adapter->msix_entries == NULL) 1744 return (EINVAL); 1745 1746 #ifdef RSS 1747 if (adapter->first_bind < 0) { 1748 adapter->first_bind = last_bind; 1749 last_bind = (last_bind + adapter->num_io_queues) % num_buckets; 1750 } 1751 cur_bind = adapter->first_bind; 1752 #endif 1753 1754 for (int i = 0; i < adapter->num_io_queues; i++) { 1755 irq_idx = ENA_IO_IRQ_IDX(i); 1756 1757 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, 1758 "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i); 1759 adapter->irq_tbl[irq_idx].handler = ena_handle_msix; 1760 adapter->irq_tbl[irq_idx].data = &adapter->que[i]; 1761 adapter->irq_tbl[irq_idx].vector = 1762 adapter->msix_entries[irq_idx].vector; 1763 ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n", 1764 adapter->msix_entries[irq_idx].vector); 1765 1766 if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1767 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1768 (unsigned)(adapter->irq_cpu_base + 1769 i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus; 1770 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1771 } 1772 1773 #ifdef RSS 1774 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1775 rss_getcpu(cur_bind); 1776 cur_bind = (cur_bind + 1) % num_buckets; 1777 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1778 1779 for (idx = 0; idx < MAXMEMDOM; ++idx) { 1780 if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx])) 1781 break; 1782 } 1783 adapter->que[i].domain = idx; 1784 #else 1785 adapter->que[i].domain = -1; 1786 #endif 1787 } 1788 1789 return (0); 1790 } 1791 1792 static int 1793 ena_request_mgmnt_irq(struct ena_adapter *adapter) 1794 { 1795 device_t pdev = adapter->pdev; 1796 struct ena_irq *irq; 1797 unsigned long flags; 1798 int rc, rcc; 1799 1800 flags = RF_ACTIVE | RF_SHAREABLE; 1801 1802 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1803 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1804 &irq->vector, flags); 1805 1806 if (unlikely(irq->res == NULL)) { 1807 ena_log(pdev, ERR, "could not allocate irq vector: %d\n", 1808 irq->vector); 1809 return (ENXIO); 1810 } 1811 1812 rc = bus_setup_intr(adapter->pdev, irq->res, 1813 INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data, 1814 &irq->cookie); 1815 if (unlikely(rc != 0)) { 1816 ena_log(pdev, ERR, 1817 "failed to register interrupt handler for irq %ju: %d\n", 1818 rman_get_start(irq->res), rc); 1819 goto err_res_free; 1820 } 1821 irq->requested = true; 1822 1823 return (rc); 1824 1825 err_res_free: 1826 ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector); 1827 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, 1828 irq->res); 1829 if (unlikely(rcc != 0)) 1830 ena_log(pdev, ERR, 1831 "dev has no parent while releasing res for irq: %d\n", 1832 irq->vector); 1833 irq->res = NULL; 1834 1835 return (rc); 1836 } 1837 1838 static int 1839 ena_request_io_irq(struct ena_adapter *adapter) 1840 { 1841 device_t pdev = adapter->pdev; 1842 struct ena_irq *irq; 1843 unsigned long flags = 0; 1844 int rc = 0, i, rcc; 1845 1846 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) { 1847 ena_log(pdev, ERR, 1848 "failed to request I/O IRQ: MSI-X is not enabled\n"); 1849 return (EINVAL); 1850 } else { 1851 flags = RF_ACTIVE | RF_SHAREABLE; 1852 } 1853 1854 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1855 irq = &adapter->irq_tbl[i]; 1856 1857 if (unlikely(irq->requested)) 1858 continue; 1859 1860 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1861 &irq->vector, flags); 1862 if (unlikely(irq->res == NULL)) { 1863 rc = ENOMEM; 1864 ena_log(pdev, ERR, 1865 "could not allocate irq vector: %d\n", irq->vector); 1866 goto err; 1867 } 1868 1869 rc = bus_setup_intr(adapter->pdev, irq->res, 1870 INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data, 1871 &irq->cookie); 1872 if (unlikely(rc != 0)) { 1873 ena_log(pdev, ERR, 1874 "failed to register interrupt handler for irq %ju: %d\n", 1875 rman_get_start(irq->res), rc); 1876 goto err; 1877 } 1878 irq->requested = true; 1879 1880 if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1881 rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu); 1882 if (unlikely(rc != 0)) { 1883 ena_log(pdev, ERR, 1884 "failed to bind interrupt handler for irq %ju to cpu %d: %d\n", 1885 rman_get_start(irq->res), irq->cpu, rc); 1886 goto err; 1887 } 1888 1889 ena_log(pdev, INFO, "queue %d - cpu %d\n", 1890 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu); 1891 } 1892 } 1893 return (rc); 1894 1895 err: 1896 1897 for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) { 1898 irq = &adapter->irq_tbl[i]; 1899 rcc = 0; 1900 1901 /* Once we entered err: section and irq->requested is true we 1902 free both intr and resources */ 1903 if (irq->requested) { 1904 rcc = bus_teardown_intr(adapter->pdev, irq->res, 1905 irq->cookie); 1906 if (unlikely(rcc != 0)) 1907 ena_log(pdev, ERR, 1908 "could not release irq: %d, error: %d\n", 1909 irq->vector, rcc); 1910 } 1911 1912 /* If we entered err: section without irq->requested set we know 1913 it was bus_alloc_resource_any() that needs cleanup, provided 1914 res is not NULL. In case res is NULL no work in needed in 1915 this iteration */ 1916 rcc = 0; 1917 if (irq->res != NULL) { 1918 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1919 irq->vector, irq->res); 1920 } 1921 if (unlikely(rcc != 0)) 1922 ena_log(pdev, ERR, 1923 "dev has no parent while releasing res for irq: %d\n", 1924 irq->vector); 1925 irq->requested = false; 1926 irq->res = NULL; 1927 } 1928 1929 return (rc); 1930 } 1931 1932 static void 1933 ena_free_mgmnt_irq(struct ena_adapter *adapter) 1934 { 1935 device_t pdev = adapter->pdev; 1936 struct ena_irq *irq; 1937 int rc; 1938 1939 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1940 if (irq->requested) { 1941 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1942 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); 1943 if (unlikely(rc != 0)) 1944 ena_log(pdev, ERR, "failed to tear down irq: %d\n", 1945 irq->vector); 1946 irq->requested = 0; 1947 } 1948 1949 if (irq->res != NULL) { 1950 ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector); 1951 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1952 irq->vector, irq->res); 1953 irq->res = NULL; 1954 if (unlikely(rc != 0)) 1955 ena_log(pdev, ERR, 1956 "dev has no parent while releasing res for irq: %d\n", 1957 irq->vector); 1958 } 1959 } 1960 1961 static void 1962 ena_free_io_irq(struct ena_adapter *adapter) 1963 { 1964 device_t pdev = adapter->pdev; 1965 struct ena_irq *irq; 1966 int rc; 1967 1968 for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1969 irq = &adapter->irq_tbl[i]; 1970 if (irq->requested) { 1971 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1972 rc = bus_teardown_intr(adapter->pdev, irq->res, 1973 irq->cookie); 1974 if (unlikely(rc != 0)) { 1975 ena_log(pdev, ERR, 1976 "failed to tear down irq: %d\n", 1977 irq->vector); 1978 } 1979 irq->requested = 0; 1980 } 1981 1982 if (irq->res != NULL) { 1983 ena_log(pdev, DBG, "release resource irq: %d\n", 1984 irq->vector); 1985 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1986 irq->vector, irq->res); 1987 irq->res = NULL; 1988 if (unlikely(rc != 0)) { 1989 ena_log(pdev, ERR, 1990 "dev has no parent while releasing res for irq: %d\n", 1991 irq->vector); 1992 } 1993 } 1994 } 1995 } 1996 1997 static void 1998 ena_free_irqs(struct ena_adapter *adapter) 1999 { 2000 ena_free_io_irq(adapter); 2001 ena_free_mgmnt_irq(adapter); 2002 ena_disable_msix(adapter); 2003 } 2004 2005 static void 2006 ena_disable_msix(struct ena_adapter *adapter) 2007 { 2008 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 2009 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 2010 pci_release_msi(adapter->pdev); 2011 } 2012 2013 adapter->msix_vecs = 0; 2014 free(adapter->msix_entries, M_DEVBUF); 2015 adapter->msix_entries = NULL; 2016 } 2017 2018 static void 2019 ena_unmask_all_io_irqs(struct ena_adapter *adapter) 2020 { 2021 struct ena_com_io_cq *io_cq; 2022 struct ena_eth_io_intr_reg intr_reg; 2023 struct ena_ring *tx_ring; 2024 uint16_t ena_qid; 2025 int i; 2026 2027 /* Unmask interrupts for all queues */ 2028 for (i = 0; i < adapter->num_io_queues; i++) { 2029 ena_qid = ENA_IO_TXQ_IDX(i); 2030 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 2031 ena_com_update_intr_reg(&intr_reg, 0, 0, true, false); 2032 tx_ring = &adapter->tx_ring[i]; 2033 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); 2034 ena_com_unmask_intr(io_cq, &intr_reg); 2035 } 2036 } 2037 2038 static int 2039 ena_up_complete(struct ena_adapter *adapter) 2040 { 2041 int rc; 2042 2043 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 2044 rc = ena_rss_configure(adapter); 2045 if (rc != 0) { 2046 ena_log(adapter->pdev, ERR, 2047 "Failed to configure RSS\n"); 2048 return (rc); 2049 } 2050 } 2051 2052 rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp)); 2053 if (unlikely(rc != 0)) 2054 return (rc); 2055 2056 ena_refill_all_rx_bufs(adapter); 2057 ena_reset_counters((counter_u64_t *)&adapter->hw_stats, 2058 sizeof(adapter->hw_stats)); 2059 2060 return (0); 2061 } 2062 2063 static void 2064 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size) 2065 { 2066 int i; 2067 2068 for (i = 0; i < adapter->num_io_queues; i++) { 2069 adapter->tx_ring[i].ring_size = new_tx_size; 2070 adapter->rx_ring[i].ring_size = new_rx_size; 2071 } 2072 } 2073 2074 static int 2075 create_queues_with_size_backoff(struct ena_adapter *adapter) 2076 { 2077 device_t pdev = adapter->pdev; 2078 int rc; 2079 uint32_t cur_rx_ring_size, cur_tx_ring_size; 2080 uint32_t new_rx_ring_size, new_tx_ring_size; 2081 2082 /* 2083 * Current queue sizes might be set to smaller than the requested 2084 * ones due to past queue allocation failures. 2085 */ 2086 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2087 adapter->requested_rx_ring_size); 2088 2089 while (1) { 2090 /* Allocate transmit descriptors */ 2091 rc = ena_setup_all_tx_resources(adapter); 2092 if (unlikely(rc != 0)) { 2093 ena_log(pdev, ERR, "err_setup_tx\n"); 2094 goto err_setup_tx; 2095 } 2096 2097 /* Allocate receive descriptors */ 2098 rc = ena_setup_all_rx_resources(adapter); 2099 if (unlikely(rc != 0)) { 2100 ena_log(pdev, ERR, "err_setup_rx\n"); 2101 goto err_setup_rx; 2102 } 2103 2104 /* Create IO queues for Rx & Tx */ 2105 rc = ena_create_io_queues(adapter); 2106 if (unlikely(rc != 0)) { 2107 ena_log(pdev, ERR, "create IO queues failed\n"); 2108 goto err_io_que; 2109 } 2110 2111 return (0); 2112 2113 err_io_que: 2114 ena_free_all_rx_resources(adapter); 2115 err_setup_rx: 2116 ena_free_all_tx_resources(adapter); 2117 err_setup_tx: 2118 /* 2119 * Lower the ring size if ENOMEM. Otherwise, return the 2120 * error straightaway. 2121 */ 2122 if (unlikely(rc != ENOMEM)) { 2123 ena_log(pdev, ERR, 2124 "Queue creation failed with error code: %d\n", rc); 2125 return (rc); 2126 } 2127 2128 cur_tx_ring_size = adapter->tx_ring[0].ring_size; 2129 cur_rx_ring_size = adapter->rx_ring[0].ring_size; 2130 2131 ena_log(pdev, ERR, 2132 "Not enough memory to create queues with sizes TX=%d, RX=%d\n", 2133 cur_tx_ring_size, cur_rx_ring_size); 2134 2135 new_tx_ring_size = cur_tx_ring_size; 2136 new_rx_ring_size = cur_rx_ring_size; 2137 2138 /* 2139 * Decrease the size of a larger queue, or decrease both if they 2140 * are the same size. 2141 */ 2142 if (cur_rx_ring_size <= cur_tx_ring_size) 2143 new_tx_ring_size = cur_tx_ring_size / 2; 2144 if (cur_rx_ring_size >= cur_tx_ring_size) 2145 new_rx_ring_size = cur_rx_ring_size / 2; 2146 2147 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2148 new_rx_ring_size < ENA_MIN_RING_SIZE) { 2149 ena_log(pdev, ERR, 2150 "Queue creation failed with the smallest possible queue size" 2151 "of %d for both queues. Not retrying with smaller queues\n", 2152 ENA_MIN_RING_SIZE); 2153 return (rc); 2154 } 2155 2156 ena_log(pdev, INFO, 2157 "Retrying queue creation with sizes TX=%d, RX=%d\n", 2158 new_tx_ring_size, new_rx_ring_size); 2159 2160 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size); 2161 } 2162 } 2163 2164 int 2165 ena_up(struct ena_adapter *adapter) 2166 { 2167 int rc = 0; 2168 2169 ENA_LOCK_ASSERT(); 2170 2171 if (unlikely(device_is_attached(adapter->pdev) == 0)) { 2172 ena_log(adapter->pdev, ERR, "device is not attached!\n"); 2173 return (ENXIO); 2174 } 2175 2176 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2177 return (0); 2178 2179 ena_log(adapter->pdev, INFO, "device is going UP\n"); 2180 2181 /* setup interrupts for IO queues */ 2182 rc = ena_setup_io_intr(adapter); 2183 if (unlikely(rc != 0)) { 2184 ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n"); 2185 goto error; 2186 } 2187 rc = ena_request_io_irq(adapter); 2188 if (unlikely(rc != 0)) { 2189 ena_log(adapter->pdev, ERR, "err_req_irq\n"); 2190 goto error; 2191 } 2192 2193 ena_log(adapter->pdev, INFO, 2194 "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n", 2195 adapter->num_io_queues, 2196 adapter->requested_rx_ring_size, 2197 adapter->requested_tx_ring_size, 2198 (adapter->ena_dev->tx_mem_queue_type == 2199 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED"); 2200 2201 rc = create_queues_with_size_backoff(adapter); 2202 if (unlikely(rc != 0)) { 2203 ena_log(adapter->pdev, ERR, 2204 "error creating queues with size backoff\n"); 2205 goto err_create_queues_with_backoff; 2206 } 2207 2208 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 2209 if_link_state_change(adapter->ifp, LINK_STATE_UP); 2210 2211 rc = ena_up_complete(adapter); 2212 if (unlikely(rc != 0)) 2213 goto err_up_complete; 2214 2215 counter_u64_add(adapter->dev_stats.interface_up, 1); 2216 2217 ena_update_hwassist(adapter); 2218 2219 if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2220 2221 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2222 2223 ena_unmask_all_io_irqs(adapter); 2224 2225 return (0); 2226 2227 err_up_complete: 2228 ena_destroy_all_io_queues(adapter); 2229 ena_free_all_rx_resources(adapter); 2230 ena_free_all_tx_resources(adapter); 2231 err_create_queues_with_backoff: 2232 ena_free_io_irq(adapter); 2233 error: 2234 return (rc); 2235 } 2236 2237 static uint64_t 2238 ena_get_counter(if_t ifp, ift_counter cnt) 2239 { 2240 struct ena_adapter *adapter; 2241 struct ena_hw_stats *stats; 2242 2243 adapter = if_getsoftc(ifp); 2244 stats = &adapter->hw_stats; 2245 2246 switch (cnt) { 2247 case IFCOUNTER_IPACKETS: 2248 return (counter_u64_fetch(stats->rx_packets)); 2249 case IFCOUNTER_OPACKETS: 2250 return (counter_u64_fetch(stats->tx_packets)); 2251 case IFCOUNTER_IBYTES: 2252 return (counter_u64_fetch(stats->rx_bytes)); 2253 case IFCOUNTER_OBYTES: 2254 return (counter_u64_fetch(stats->tx_bytes)); 2255 case IFCOUNTER_IQDROPS: 2256 return (counter_u64_fetch(stats->rx_drops)); 2257 case IFCOUNTER_OQDROPS: 2258 return (counter_u64_fetch(stats->tx_drops)); 2259 default: 2260 return (if_get_counter_default(ifp, cnt)); 2261 } 2262 } 2263 2264 static int 2265 ena_media_change(if_t ifp) 2266 { 2267 /* Media Change is not supported by firmware */ 2268 return (0); 2269 } 2270 2271 static void 2272 ena_media_status(if_t ifp, struct ifmediareq *ifmr) 2273 { 2274 struct ena_adapter *adapter = if_getsoftc(ifp); 2275 ena_log(adapter->pdev, DBG, "Media status update\n"); 2276 2277 ENA_LOCK_LOCK(); 2278 2279 ifmr->ifm_status = IFM_AVALID; 2280 ifmr->ifm_active = IFM_ETHER; 2281 2282 if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) { 2283 ENA_LOCK_UNLOCK(); 2284 ena_log(adapter->pdev, INFO, "Link is down\n"); 2285 return; 2286 } 2287 2288 ifmr->ifm_status |= IFM_ACTIVE; 2289 ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX; 2290 2291 ENA_LOCK_UNLOCK(); 2292 } 2293 2294 static void 2295 ena_init(void *arg) 2296 { 2297 struct ena_adapter *adapter = (struct ena_adapter *)arg; 2298 2299 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) { 2300 ENA_LOCK_LOCK(); 2301 ena_up(adapter); 2302 ENA_LOCK_UNLOCK(); 2303 } 2304 } 2305 2306 static int 2307 ena_ioctl(if_t ifp, u_long command, caddr_t data) 2308 { 2309 struct ena_adapter *adapter; 2310 struct ifreq *ifr; 2311 int rc; 2312 2313 adapter = if_getsoftc(ifp); 2314 ifr = (struct ifreq *)data; 2315 2316 /* 2317 * Acquiring lock to prevent from running up and down routines parallel. 2318 */ 2319 rc = 0; 2320 switch (command) { 2321 case SIOCSIFMTU: 2322 if (if_getmtu(ifp) == ifr->ifr_mtu) 2323 break; 2324 ENA_LOCK_LOCK(); 2325 ena_down(adapter); 2326 2327 ena_change_mtu(ifp, ifr->ifr_mtu); 2328 2329 rc = ena_up(adapter); 2330 ENA_LOCK_UNLOCK(); 2331 break; 2332 2333 case SIOCSIFFLAGS: 2334 if ((if_getflags(ifp) & IFF_UP) != 0) { 2335 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2336 if ((if_getflags(ifp) & (IFF_PROMISC | 2337 IFF_ALLMULTI)) != 0) { 2338 ena_log(adapter->pdev, INFO, 2339 "ioctl promisc/allmulti\n"); 2340 } 2341 } else { 2342 ENA_LOCK_LOCK(); 2343 rc = ena_up(adapter); 2344 ENA_LOCK_UNLOCK(); 2345 } 2346 } else { 2347 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2348 ENA_LOCK_LOCK(); 2349 ena_down(adapter); 2350 ENA_LOCK_UNLOCK(); 2351 } 2352 } 2353 break; 2354 2355 case SIOCADDMULTI: 2356 case SIOCDELMULTI: 2357 break; 2358 2359 case SIOCSIFMEDIA: 2360 case SIOCGIFMEDIA: 2361 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 2362 break; 2363 2364 case SIOCSIFCAP: 2365 { 2366 int reinit = 0; 2367 2368 if (ifr->ifr_reqcap != if_getcapenable(ifp)) { 2369 if_setcapenable(ifp, ifr->ifr_reqcap); 2370 reinit = 1; 2371 } 2372 2373 if ((reinit != 0) && 2374 ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) { 2375 ENA_LOCK_LOCK(); 2376 ena_down(adapter); 2377 rc = ena_up(adapter); 2378 ENA_LOCK_UNLOCK(); 2379 } 2380 } 2381 2382 break; 2383 default: 2384 rc = ether_ioctl(ifp, command, data); 2385 break; 2386 } 2387 2388 return (rc); 2389 } 2390 2391 static int 2392 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat) 2393 { 2394 int caps = 0; 2395 2396 if ((feat->offload.tx & 2397 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2398 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK | 2399 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0) 2400 caps |= IFCAP_TXCSUM; 2401 2402 if ((feat->offload.tx & 2403 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK | 2404 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0) 2405 caps |= IFCAP_TXCSUM_IPV6; 2406 2407 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0) 2408 caps |= IFCAP_TSO4; 2409 2410 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0) 2411 caps |= IFCAP_TSO6; 2412 2413 if ((feat->offload.rx_supported & 2414 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK | 2415 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0) 2416 caps |= IFCAP_RXCSUM; 2417 2418 if ((feat->offload.rx_supported & 2419 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0) 2420 caps |= IFCAP_RXCSUM_IPV6; 2421 2422 caps |= IFCAP_LRO | IFCAP_JUMBO_MTU; 2423 2424 return (caps); 2425 } 2426 2427 static void 2428 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp) 2429 { 2430 host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp); 2431 } 2432 2433 static void 2434 ena_update_hwassist(struct ena_adapter *adapter) 2435 { 2436 if_t ifp = adapter->ifp; 2437 uint32_t feat = adapter->tx_offload_cap; 2438 int cap = if_getcapenable(ifp); 2439 int flags = 0; 2440 2441 if_clearhwassist(ifp); 2442 2443 if ((cap & IFCAP_TXCSUM) != 0) { 2444 if ((feat & 2445 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0) 2446 flags |= CSUM_IP; 2447 if ((feat & 2448 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2449 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0) 2450 flags |= CSUM_IP_UDP | CSUM_IP_TCP; 2451 } 2452 2453 if ((cap & IFCAP_TXCSUM_IPV6) != 0) 2454 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP; 2455 2456 if ((cap & IFCAP_TSO4) != 0) 2457 flags |= CSUM_IP_TSO; 2458 2459 if ((cap & IFCAP_TSO6) != 0) 2460 flags |= CSUM_IP6_TSO; 2461 2462 if_sethwassistbits(ifp, flags, 0); 2463 } 2464 2465 static int 2466 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter, 2467 struct ena_com_dev_get_features_ctx *feat) 2468 { 2469 if_t ifp; 2470 int caps = 0; 2471 2472 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 2473 if (unlikely(ifp == NULL)) { 2474 ena_log(pdev, ERR, "can not allocate ifnet structure\n"); 2475 return (ENXIO); 2476 } 2477 if_initname(ifp, device_get_name(pdev), device_get_unit(pdev)); 2478 if_setdev(ifp, pdev); 2479 if_setsoftc(ifp, adapter); 2480 2481 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 2482 if_setinitfn(ifp, ena_init); 2483 if_settransmitfn(ifp, ena_mq_start); 2484 if_setqflushfn(ifp, ena_qflush); 2485 if_setioctlfn(ifp, ena_ioctl); 2486 if_setgetcounterfn(ifp, ena_get_counter); 2487 2488 if_setsendqlen(ifp, adapter->requested_tx_ring_size); 2489 if_setsendqready(ifp); 2490 if_setmtu(ifp, ETHERMTU); 2491 if_setbaudrate(ifp, 0); 2492 /* Zeroize capabilities... */ 2493 if_setcapabilities(ifp, 0); 2494 if_setcapenable(ifp, 0); 2495 /* check hardware support */ 2496 caps = ena_get_dev_offloads(feat); 2497 /* ... and set them */ 2498 if_setcapabilitiesbit(ifp, caps, 0); 2499 2500 /* TSO parameters */ 2501 if_sethwtsomax(ifp, ENA_TSO_MAXSIZE - 2502 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 2503 if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1); 2504 if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE); 2505 2506 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 2507 if_setcapenable(ifp, if_getcapabilities(ifp)); 2508 2509 /* 2510 * Specify the media types supported by this adapter and register 2511 * callbacks to update media and link information 2512 */ 2513 ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change, 2514 ena_media_status); 2515 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 2516 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 2517 2518 ether_ifattach(ifp, adapter->mac_addr); 2519 2520 return (0); 2521 } 2522 2523 void 2524 ena_down(struct ena_adapter *adapter) 2525 { 2526 int rc; 2527 2528 ENA_LOCK_ASSERT(); 2529 2530 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2531 return; 2532 2533 ena_log(adapter->pdev, INFO, "device is going DOWN\n"); 2534 2535 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2536 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2537 2538 ena_free_io_irq(adapter); 2539 2540 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) { 2541 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 2542 if (unlikely(rc != 0)) 2543 ena_log(adapter->pdev, ERR, "Device reset failed\n"); 2544 } 2545 2546 ena_destroy_all_io_queues(adapter); 2547 2548 ena_free_all_tx_bufs(adapter); 2549 ena_free_all_rx_bufs(adapter); 2550 ena_free_all_tx_resources(adapter); 2551 ena_free_all_rx_resources(adapter); 2552 2553 counter_u64_add(adapter->dev_stats.interface_down, 1); 2554 } 2555 2556 static uint32_t 2557 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev, 2558 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2559 { 2560 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2561 2562 /* Regular queues capabilities */ 2563 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2564 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2565 &get_feat_ctx->max_queue_ext.max_queue_ext; 2566 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, 2567 max_queue_ext->max_rx_cq_num); 2568 2569 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2570 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2571 } else { 2572 struct ena_admin_queue_feature_desc *max_queues = 2573 &get_feat_ctx->max_queues; 2574 io_tx_sq_num = max_queues->max_sq_num; 2575 io_tx_cq_num = max_queues->max_cq_num; 2576 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); 2577 } 2578 2579 /* In case of LLQ use the llq fields for the tx SQ/CQ */ 2580 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2581 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2582 2583 max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES); 2584 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num); 2585 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num); 2586 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num); 2587 /* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */ 2588 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2589 pci_msix_count(pdev) - 1); 2590 #ifdef RSS 2591 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2592 rss_getnumbuckets()); 2593 #endif 2594 2595 return (max_num_io_queues); 2596 } 2597 2598 static int 2599 ena_enable_wc(device_t pdev, struct resource *res) 2600 { 2601 #if defined(__i386) || defined(__amd64) || defined(__aarch64__) 2602 vm_offset_t va; 2603 vm_size_t len; 2604 int rc; 2605 2606 va = (vm_offset_t)rman_get_virtual(res); 2607 len = rman_get_size(res); 2608 /* Enable write combining */ 2609 rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING); 2610 if (unlikely(rc != 0)) { 2611 ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc); 2612 return (rc); 2613 } 2614 2615 return (0); 2616 #endif 2617 return (EOPNOTSUPP); 2618 } 2619 2620 static int 2621 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev, 2622 struct ena_admin_feature_llq_desc *llq, 2623 struct ena_llq_configurations *llq_default_configurations) 2624 { 2625 int rc; 2626 uint32_t llq_feature_mask; 2627 2628 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2629 if (!(ena_dev->supported_features & llq_feature_mask)) { 2630 ena_log(pdev, WARN, 2631 "LLQ is not supported. Fallback to host mode policy.\n"); 2632 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2633 return (0); 2634 } 2635 2636 if (ena_dev->mem_bar == NULL) { 2637 ena_log(pdev, WARN, 2638 "LLQ is advertised as supported but device doesn't expose mem bar.\n"); 2639 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2640 return (0); 2641 } 2642 2643 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2644 if (unlikely(rc != 0)) { 2645 ena_log(pdev, WARN, 2646 "Failed to configure the device mode. " 2647 "Fallback to host mode policy.\n"); 2648 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2649 } 2650 2651 return (0); 2652 } 2653 2654 static int 2655 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev) 2656 { 2657 struct ena_adapter *adapter = device_get_softc(pdev); 2658 int rc, rid; 2659 2660 /* Try to allocate resources for LLQ bar */ 2661 rid = PCIR_BAR(ENA_MEM_BAR); 2662 adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 2663 RF_ACTIVE); 2664 if (unlikely(adapter->memory == NULL)) { 2665 ena_log(pdev, WARN, 2666 "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n"); 2667 return (0); 2668 } 2669 2670 /* Enable write combining for better LLQ performance */ 2671 rc = ena_enable_wc(adapter->pdev, adapter->memory); 2672 if (unlikely(rc != 0)) { 2673 ena_log(pdev, ERR, "failed to enable write combining.\n"); 2674 return (rc); 2675 } 2676 2677 /* 2678 * Save virtual address of the device's memory region 2679 * for the ena_com layer. 2680 */ 2681 ena_dev->mem_bar = rman_get_virtual(adapter->memory); 2682 2683 return (0); 2684 } 2685 2686 static inline void 2687 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2688 struct ena_admin_feature_llq_desc *llq) 2689 { 2690 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2691 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2692 llq_config->llq_num_decs_before_header = 2693 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2694 if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 2695 0 && ena_force_large_llq_header) { 2696 llq_config->llq_ring_entry_size = 2697 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2698 llq_config->llq_ring_entry_size_value = 256; 2699 } else { 2700 llq_config->llq_ring_entry_size = 2701 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2702 llq_config->llq_ring_entry_size_value = 128; 2703 } 2704 } 2705 2706 static int 2707 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) 2708 { 2709 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 2710 struct ena_com_dev *ena_dev = ctx->ena_dev; 2711 uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE; 2712 uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE; 2713 uint32_t max_tx_queue_size; 2714 uint32_t max_rx_queue_size; 2715 2716 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2717 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2718 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 2719 max_rx_queue_size = min_t(uint32_t, 2720 max_queue_ext->max_rx_cq_depth, 2721 max_queue_ext->max_rx_sq_depth); 2722 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 2723 2724 if (ena_dev->tx_mem_queue_type == 2725 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2726 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2727 llq->max_llq_depth); 2728 else 2729 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2730 max_queue_ext->max_tx_sq_depth); 2731 2732 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2733 max_queue_ext->max_per_packet_tx_descs); 2734 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2735 max_queue_ext->max_per_packet_rx_descs); 2736 } else { 2737 struct ena_admin_queue_feature_desc *max_queues = 2738 &ctx->get_feat_ctx->max_queues; 2739 max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth, 2740 max_queues->max_sq_depth); 2741 max_tx_queue_size = max_queues->max_cq_depth; 2742 2743 if (ena_dev->tx_mem_queue_type == 2744 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2745 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2746 llq->max_llq_depth); 2747 else 2748 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2749 max_queues->max_sq_depth); 2750 2751 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2752 max_queues->max_packet_tx_descs); 2753 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2754 max_queues->max_packet_rx_descs); 2755 } 2756 2757 /* round down to the nearest power of 2 */ 2758 max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1); 2759 max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1); 2760 2761 /* 2762 * When forcing large headers, we multiply the entry size by 2, 2763 * and therefore divide the queue size by 2, leaving the amount 2764 * of memory used by the queues unchanged. 2765 */ 2766 if (ena_force_large_llq_header) { 2767 if ((llq->entry_size_ctrl_supported & 2768 ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 && 2769 ena_dev->tx_mem_queue_type == 2770 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2771 max_tx_queue_size /= 2; 2772 ena_log(ctx->pdev, INFO, 2773 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 2774 max_tx_queue_size); 2775 } else { 2776 ena_log(ctx->pdev, WARN, 2777 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 2778 } 2779 } 2780 2781 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, 2782 max_tx_queue_size); 2783 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, 2784 max_rx_queue_size); 2785 2786 tx_queue_size = 1 << (flsl(tx_queue_size) - 1); 2787 rx_queue_size = 1 << (flsl(rx_queue_size) - 1); 2788 2789 ctx->max_tx_queue_size = max_tx_queue_size; 2790 ctx->max_rx_queue_size = max_rx_queue_size; 2791 ctx->tx_queue_size = tx_queue_size; 2792 ctx->rx_queue_size = rx_queue_size; 2793 2794 return (0); 2795 } 2796 2797 static void 2798 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev) 2799 { 2800 struct ena_admin_host_info *host_info; 2801 uintptr_t rid; 2802 int rc; 2803 2804 /* Allocate only the host info */ 2805 rc = ena_com_allocate_host_info(ena_dev); 2806 if (unlikely(rc != 0)) { 2807 ena_log(dev, ERR, "Cannot allocate host info\n"); 2808 return; 2809 } 2810 2811 host_info = ena_dev->host_attr.host_info; 2812 2813 if (pci_get_id(dev, PCI_ID_RID, &rid) == 0) 2814 host_info->bdf = rid; 2815 host_info->os_type = ENA_ADMIN_OS_FREEBSD; 2816 host_info->kernel_ver = osreldate; 2817 2818 sprintf(host_info->kernel_ver_str, "%d", osreldate); 2819 host_info->os_dist = 0; 2820 strncpy(host_info->os_dist_str, osrelease, 2821 sizeof(host_info->os_dist_str) - 1); 2822 2823 host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) | 2824 (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 2825 (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 2826 host_info->num_cpus = mp_ncpus; 2827 host_info->driver_supported_features = 2828 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 2829 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 2830 2831 rc = ena_com_set_host_attributes(ena_dev); 2832 if (unlikely(rc != 0)) { 2833 if (rc == EOPNOTSUPP) 2834 ena_log(dev, WARN, "Cannot set host attributes\n"); 2835 else 2836 ena_log(dev, ERR, "Cannot set host attributes\n"); 2837 2838 goto err; 2839 } 2840 2841 return; 2842 2843 err: 2844 ena_com_delete_host_info(ena_dev); 2845 } 2846 2847 static int 2848 ena_device_init(struct ena_adapter *adapter, device_t pdev, 2849 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active) 2850 { 2851 struct ena_llq_configurations llq_config; 2852 struct ena_com_dev *ena_dev = adapter->ena_dev; 2853 bool readless_supported; 2854 uint32_t aenq_groups; 2855 int dma_width; 2856 int rc; 2857 2858 rc = ena_com_mmio_reg_read_request_init(ena_dev); 2859 if (unlikely(rc != 0)) { 2860 ena_log(pdev, ERR, "failed to init mmio read less\n"); 2861 return (rc); 2862 } 2863 2864 /* 2865 * The PCIe configuration space revision id indicate if mmio reg 2866 * read is disabled 2867 */ 2868 readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ); 2869 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 2870 2871 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 2872 if (unlikely(rc != 0)) { 2873 ena_log(pdev, ERR, "Can not reset device\n"); 2874 goto err_mmio_read_less; 2875 } 2876 2877 rc = ena_com_validate_version(ena_dev); 2878 if (unlikely(rc != 0)) { 2879 ena_log(pdev, ERR, "device version is too low\n"); 2880 goto err_mmio_read_less; 2881 } 2882 2883 dma_width = ena_com_get_dma_width(ena_dev); 2884 if (unlikely(dma_width < 0)) { 2885 ena_log(pdev, ERR, "Invalid dma width value %d", dma_width); 2886 rc = dma_width; 2887 goto err_mmio_read_less; 2888 } 2889 adapter->dma_width = dma_width; 2890 2891 /* ENA admin level init */ 2892 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 2893 if (unlikely(rc != 0)) { 2894 ena_log(pdev, ERR, 2895 "Can not initialize ena admin queue with device\n"); 2896 goto err_mmio_read_less; 2897 } 2898 2899 /* 2900 * To enable the msix interrupts the driver needs to know the number 2901 * of queues. So the driver uses polling mode to retrieve this 2902 * information 2903 */ 2904 ena_com_set_admin_polling_mode(ena_dev, true); 2905 2906 ena_config_host_info(ena_dev, pdev); 2907 2908 /* Get Device Attributes */ 2909 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 2910 if (unlikely(rc != 0)) { 2911 ena_log(pdev, ERR, 2912 "Cannot get attribute for ena device rc: %d\n", rc); 2913 goto err_admin_init; 2914 } 2915 2916 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 2917 BIT(ENA_ADMIN_FATAL_ERROR) | 2918 BIT(ENA_ADMIN_WARNING) | 2919 BIT(ENA_ADMIN_NOTIFICATION) | 2920 BIT(ENA_ADMIN_KEEP_ALIVE); 2921 2922 aenq_groups &= get_feat_ctx->aenq.supported_groups; 2923 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 2924 if (unlikely(rc != 0)) { 2925 ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc); 2926 goto err_admin_init; 2927 } 2928 2929 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 2930 2931 set_default_llq_configurations(&llq_config, &get_feat_ctx->llq); 2932 2933 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, 2934 &llq_config); 2935 if (unlikely(rc != 0)) { 2936 ena_log(pdev, ERR, "Failed to set placement policy\n"); 2937 goto err_admin_init; 2938 } 2939 2940 return (0); 2941 2942 err_admin_init: 2943 ena_com_delete_host_info(ena_dev); 2944 ena_com_admin_destroy(ena_dev); 2945 err_mmio_read_less: 2946 ena_com_mmio_reg_read_request_destroy(ena_dev); 2947 2948 return (rc); 2949 } 2950 2951 static int 2952 ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) 2953 { 2954 struct ena_com_dev *ena_dev = adapter->ena_dev; 2955 int rc; 2956 2957 rc = ena_enable_msix(adapter); 2958 if (unlikely(rc != 0)) { 2959 ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n"); 2960 return (rc); 2961 } 2962 2963 ena_setup_mgmnt_intr(adapter); 2964 2965 rc = ena_request_mgmnt_irq(adapter); 2966 if (unlikely(rc != 0)) { 2967 ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n"); 2968 goto err_disable_msix; 2969 } 2970 2971 ena_com_set_admin_polling_mode(ena_dev, false); 2972 2973 ena_com_admin_aenq_enable(ena_dev); 2974 2975 return (0); 2976 2977 err_disable_msix: 2978 ena_disable_msix(adapter); 2979 2980 return (rc); 2981 } 2982 2983 /* Function called on ENA_ADMIN_KEEP_ALIVE event */ 2984 static void 2985 ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 2986 { 2987 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 2988 struct ena_admin_aenq_keep_alive_desc *desc; 2989 sbintime_t stime; 2990 uint64_t rx_drops; 2991 uint64_t tx_drops; 2992 2993 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 2994 2995 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 2996 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 2997 counter_u64_zero(adapter->hw_stats.rx_drops); 2998 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops); 2999 counter_u64_zero(adapter->hw_stats.tx_drops); 3000 counter_u64_add(adapter->hw_stats.tx_drops, tx_drops); 3001 3002 stime = getsbinuptime(); 3003 atomic_store_rel_64(&adapter->keep_alive_timestamp, stime); 3004 } 3005 3006 /* Check for keep alive expiration */ 3007 static void 3008 check_for_missing_keep_alive(struct ena_adapter *adapter) 3009 { 3010 sbintime_t timestamp, time; 3011 3012 if (adapter->wd_active == 0) 3013 return; 3014 3015 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3016 return; 3017 3018 timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp); 3019 time = getsbinuptime() - timestamp; 3020 if (unlikely(time > adapter->keep_alive_timeout)) { 3021 ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n"); 3022 counter_u64_add(adapter->dev_stats.wd_expired, 1); 3023 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 3024 } 3025 } 3026 3027 /* Check if admin queue is enabled */ 3028 static void 3029 check_for_admin_com_state(struct ena_adapter *adapter) 3030 { 3031 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) { 3032 ena_log(adapter->pdev, ERR, 3033 "ENA admin queue is not in running state!\n"); 3034 counter_u64_add(adapter->dev_stats.admin_q_pause, 1); 3035 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 3036 } 3037 } 3038 3039 static int 3040 check_for_rx_interrupt_queue(struct ena_adapter *adapter, 3041 struct ena_ring *rx_ring) 3042 { 3043 if (likely(atomic_load_8(&rx_ring->first_interrupt))) 3044 return (0); 3045 3046 if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) 3047 return (0); 3048 3049 rx_ring->no_interrupt_event_cnt++; 3050 3051 if (rx_ring->no_interrupt_event_cnt == 3052 ENA_MAX_NO_INTERRUPT_ITERATIONS) { 3053 ena_log(adapter->pdev, ERR, 3054 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", 3055 rx_ring->qid); 3056 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT); 3057 return (EIO); 3058 } 3059 3060 return (0); 3061 } 3062 3063 static int 3064 check_missing_comp_in_tx_queue(struct ena_adapter *adapter, 3065 struct ena_ring *tx_ring) 3066 { 3067 device_t pdev = adapter->pdev; 3068 struct bintime curtime, time; 3069 struct ena_tx_buffer *tx_buf; 3070 int time_since_last_cleanup; 3071 int missing_tx_comp_to; 3072 sbintime_t time_offset; 3073 uint32_t missed_tx = 0; 3074 int i, rc = 0; 3075 3076 getbinuptime(&curtime); 3077 3078 for (i = 0; i < tx_ring->ring_size; i++) { 3079 tx_buf = &tx_ring->tx_buffer_info[i]; 3080 3081 if (bintime_isset(&tx_buf->timestamp) == 0) 3082 continue; 3083 3084 time = curtime; 3085 bintime_sub(&time, &tx_buf->timestamp); 3086 time_offset = bttosbt(time); 3087 3088 if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) && 3089 time_offset > 2 * adapter->missing_tx_timeout)) { 3090 /* 3091 * If after graceful period interrupt is still not 3092 * received, we schedule a reset. 3093 */ 3094 ena_log(pdev, ERR, 3095 "Potential MSIX issue on Tx side Queue = %d. " 3096 "Reset the device\n", 3097 tx_ring->qid); 3098 ena_trigger_reset(adapter, 3099 ENA_REGS_RESET_MISS_INTERRUPT); 3100 return (EIO); 3101 } 3102 3103 /* Check again if packet is still waiting */ 3104 if (unlikely(time_offset > adapter->missing_tx_timeout)) { 3105 3106 if (tx_buf->print_once) { 3107 time_since_last_cleanup = TICKS_2_MSEC(ticks - 3108 tx_ring->tx_last_cleanup_ticks); 3109 missing_tx_comp_to = sbttoms( 3110 adapter->missing_tx_timeout); 3111 ena_log(pdev, WARN, 3112 "Found a Tx that wasn't completed on time, qid %d, index %d. " 3113 "%d msecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n", 3114 tx_ring->qid, i, time_since_last_cleanup, 3115 missing_tx_comp_to); 3116 } 3117 3118 tx_buf->print_once = false; 3119 missed_tx++; 3120 } 3121 } 3122 3123 if (unlikely(missed_tx > adapter->missing_tx_threshold)) { 3124 ena_log(pdev, ERR, 3125 "The number of lost tx completion is above the threshold " 3126 "(%d > %d). Reset the device\n", 3127 missed_tx, adapter->missing_tx_threshold); 3128 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL); 3129 rc = EIO; 3130 } 3131 3132 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx); 3133 3134 return (rc); 3135 } 3136 3137 /* 3138 * Check for TX which were not completed on time. 3139 * Timeout is defined by "missing_tx_timeout". 3140 * Reset will be performed if number of incompleted 3141 * transactions exceeds "missing_tx_threshold". 3142 */ 3143 static void 3144 check_for_missing_completions(struct ena_adapter *adapter) 3145 { 3146 struct ena_ring *tx_ring; 3147 struct ena_ring *rx_ring; 3148 int i, budget, rc; 3149 3150 /* Make sure the driver doesn't turn the device in other process */ 3151 rmb(); 3152 3153 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3154 return; 3155 3156 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3157 return; 3158 3159 if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3160 return; 3161 3162 budget = adapter->missing_tx_max_queues; 3163 3164 for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) { 3165 tx_ring = &adapter->tx_ring[i]; 3166 rx_ring = &adapter->rx_ring[i]; 3167 3168 rc = check_missing_comp_in_tx_queue(adapter, tx_ring); 3169 if (unlikely(rc != 0)) 3170 return; 3171 3172 rc = check_for_rx_interrupt_queue(adapter, rx_ring); 3173 if (unlikely(rc != 0)) 3174 return; 3175 3176 budget--; 3177 if (budget == 0) { 3178 i++; 3179 break; 3180 } 3181 } 3182 3183 adapter->next_monitored_tx_qid = i % adapter->num_io_queues; 3184 } 3185 3186 /* trigger rx cleanup after 2 consecutive detections */ 3187 #define EMPTY_RX_REFILL 2 3188 /* For the rare case where the device runs out of Rx descriptors and the 3189 * msix handler failed to refill new Rx descriptors (due to a lack of memory 3190 * for example). 3191 * This case will lead to a deadlock: 3192 * The device won't send interrupts since all the new Rx packets will be dropped 3193 * The msix handler won't allocate new Rx descriptors so the device won't be 3194 * able to send new packets. 3195 * 3196 * When such a situation is detected - execute rx cleanup task in another thread 3197 */ 3198 static void 3199 check_for_empty_rx_ring(struct ena_adapter *adapter) 3200 { 3201 struct ena_ring *rx_ring; 3202 int i, refill_required; 3203 3204 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3205 return; 3206 3207 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3208 return; 3209 3210 for (i = 0; i < adapter->num_io_queues; i++) { 3211 rx_ring = &adapter->rx_ring[i]; 3212 3213 refill_required = ena_com_free_q_entries( 3214 rx_ring->ena_com_io_sq); 3215 if (unlikely(refill_required == (rx_ring->ring_size - 1))) { 3216 rx_ring->empty_rx_queue++; 3217 3218 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { 3219 counter_u64_add(rx_ring->rx_stats.empty_rx_ring, 3220 1); 3221 3222 ena_log(adapter->pdev, WARN, 3223 "Rx ring %d is stalled. Triggering the refill function\n", 3224 i); 3225 3226 taskqueue_enqueue(rx_ring->que->cleanup_tq, 3227 &rx_ring->que->cleanup_task); 3228 rx_ring->empty_rx_queue = 0; 3229 } 3230 } else { 3231 rx_ring->empty_rx_queue = 0; 3232 } 3233 } 3234 } 3235 3236 static void 3237 ena_update_hints(struct ena_adapter *adapter, 3238 struct ena_admin_ena_hw_hints *hints) 3239 { 3240 struct ena_com_dev *ena_dev = adapter->ena_dev; 3241 3242 if (hints->admin_completion_tx_timeout) 3243 ena_dev->admin_queue.completion_timeout = 3244 hints->admin_completion_tx_timeout * 1000; 3245 3246 if (hints->mmio_read_timeout) 3247 /* convert to usec */ 3248 ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000; 3249 3250 if (hints->missed_tx_completion_count_threshold_to_reset) 3251 adapter->missing_tx_threshold = 3252 hints->missed_tx_completion_count_threshold_to_reset; 3253 3254 if (hints->missing_tx_completion_timeout) { 3255 if (hints->missing_tx_completion_timeout == 3256 ENA_HW_HINTS_NO_TIMEOUT) 3257 adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3258 else 3259 adapter->missing_tx_timeout = SBT_1MS * 3260 hints->missing_tx_completion_timeout; 3261 } 3262 3263 if (hints->driver_watchdog_timeout) { 3264 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3265 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3266 else 3267 adapter->keep_alive_timeout = SBT_1MS * 3268 hints->driver_watchdog_timeout; 3269 } 3270 } 3271 3272 /** 3273 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW. 3274 * @adapter: ENA device adapter 3275 * 3276 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics 3277 * and other error codes on failure. 3278 * 3279 * This function can possibly cause a race with other calls to the admin queue. 3280 * Because of that, the caller should either lock this function or make sure 3281 * that there is no race in the current context. 3282 */ 3283 static int 3284 ena_copy_eni_metrics(struct ena_adapter *adapter) 3285 { 3286 static bool print_once = true; 3287 int rc; 3288 3289 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics); 3290 3291 if (rc != 0) { 3292 if (rc == ENA_COM_UNSUPPORTED) { 3293 if (print_once) { 3294 ena_log(adapter->pdev, WARN, 3295 "Retrieving ENI metrics is not supported.\n"); 3296 print_once = false; 3297 } else { 3298 ena_log(adapter->pdev, DBG, 3299 "Retrieving ENI metrics is not supported.\n"); 3300 } 3301 } else { 3302 ena_log(adapter->pdev, ERR, 3303 "Failed to get ENI metrics: %d\n", rc); 3304 } 3305 } 3306 3307 return (rc); 3308 } 3309 3310 static int 3311 ena_copy_srd_metrics(struct ena_adapter *adapter) 3312 { 3313 return ena_com_get_ena_srd_info(adapter->ena_dev, &adapter->ena_srd_info); 3314 } 3315 3316 static int 3317 ena_copy_customer_metrics(struct ena_adapter *adapter) 3318 { 3319 struct ena_com_dev *dev; 3320 u32 supported_metrics_count; 3321 int rc, len; 3322 3323 dev = adapter->ena_dev; 3324 3325 supported_metrics_count = ena_com_get_customer_metric_count(dev); 3326 len = supported_metrics_count * sizeof(u64); 3327 3328 /* Fill the data buffer */ 3329 rc = ena_com_get_customer_metrics(adapter->ena_dev, 3330 (char *)(adapter->customer_metrics_array), len); 3331 3332 return (rc); 3333 } 3334 3335 static void 3336 ena_timer_service(void *data) 3337 { 3338 struct ena_adapter *adapter = (struct ena_adapter *)data; 3339 struct ena_admin_host_info *host_info = 3340 adapter->ena_dev->host_attr.host_info; 3341 3342 check_for_missing_keep_alive(adapter); 3343 3344 check_for_admin_com_state(adapter); 3345 3346 check_for_missing_completions(adapter); 3347 3348 check_for_empty_rx_ring(adapter); 3349 3350 /* 3351 * User controller update of the ENA metrics. 3352 * If the delay was set to 0, then the stats shouldn't be updated at 3353 * all. 3354 * Otherwise, wait 'metrics_sample_interval' seconds, before 3355 * updating stats. 3356 * As timer service is executed every second, it's enough to increment 3357 * appropriate counter each time the timer service is executed. 3358 */ 3359 if ((adapter->metrics_sample_interval != 0) && 3360 (++adapter->metrics_sample_interval_cnt >= 3361 adapter->metrics_sample_interval)) { 3362 taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task); 3363 adapter->metrics_sample_interval_cnt = 0; 3364 } 3365 3366 3367 if (host_info != NULL) 3368 ena_update_host_info(host_info, adapter->ifp); 3369 3370 if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3371 /* 3372 * Timeout when validating version indicates that the device 3373 * became unresponsive. If that happens skip the reset and 3374 * reschedule timer service, so the reset can be retried later. 3375 */ 3376 if (ena_com_validate_version(adapter->ena_dev) == 3377 ENA_COM_TIMER_EXPIRED) { 3378 ena_log(adapter->pdev, WARN, 3379 "FW unresponsive, skipping reset\n"); 3380 ENA_TIMER_RESET(adapter); 3381 return; 3382 } 3383 ena_log(adapter->pdev, WARN, "Trigger reset is on\n"); 3384 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task); 3385 return; 3386 } 3387 3388 /* 3389 * Schedule another timeout one second from now. 3390 */ 3391 ENA_TIMER_RESET(adapter); 3392 } 3393 3394 void 3395 ena_destroy_device(struct ena_adapter *adapter, bool graceful) 3396 { 3397 if_t ifp = adapter->ifp; 3398 struct ena_com_dev *ena_dev = adapter->ena_dev; 3399 bool dev_up; 3400 3401 if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) 3402 return; 3403 3404 if (!graceful) 3405 if_link_state_change(ifp, LINK_STATE_DOWN); 3406 3407 ENA_TIMER_DRAIN(adapter); 3408 3409 dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 3410 if (dev_up) 3411 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3412 3413 if (!graceful) 3414 ena_com_set_admin_running_state(ena_dev, false); 3415 3416 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3417 ena_down(adapter); 3418 3419 /* 3420 * Stop the device from sending AENQ events (if the device was up, and 3421 * the trigger reset was on, ena_down already performs device reset) 3422 */ 3423 if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up)) 3424 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 3425 3426 ena_free_mgmnt_irq(adapter); 3427 3428 ena_disable_msix(adapter); 3429 3430 /* 3431 * IO rings resources should be freed because `ena_restore_device()` 3432 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX 3433 * vectors. The amount of MSIX vectors after destroy-restore may be 3434 * different than before. Therefore, IO rings resources should be 3435 * established from scratch each time. 3436 */ 3437 ena_free_all_io_rings_resources(adapter); 3438 3439 ena_com_abort_admin_commands(ena_dev); 3440 3441 ena_com_wait_for_abort_completion(ena_dev); 3442 3443 ena_com_admin_destroy(ena_dev); 3444 3445 ena_com_mmio_reg_read_request_destroy(ena_dev); 3446 3447 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3448 3449 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); 3450 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3451 } 3452 3453 static int 3454 ena_device_validate_params(struct ena_adapter *adapter, 3455 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3456 { 3457 if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr, 3458 ETHER_ADDR_LEN) != 0) { 3459 ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n"); 3460 return (EINVAL); 3461 } 3462 3463 if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) { 3464 ena_log(adapter->pdev, ERR, 3465 "Error, device max mtu is smaller than ifp MTU\n"); 3466 return (EINVAL); 3467 } 3468 3469 return 0; 3470 } 3471 3472 int 3473 ena_restore_device(struct ena_adapter *adapter) 3474 { 3475 struct ena_com_dev_get_features_ctx get_feat_ctx; 3476 struct ena_com_dev *ena_dev = adapter->ena_dev; 3477 if_t ifp = adapter->ifp; 3478 device_t dev = adapter->pdev; 3479 int wd_active; 3480 int rc; 3481 3482 ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3483 3484 rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active); 3485 if (rc != 0) { 3486 ena_log(dev, ERR, "Cannot initialize device\n"); 3487 goto err; 3488 } 3489 /* 3490 * Only enable WD if it was enabled before reset, so it won't override 3491 * value set by the user by the sysctl. 3492 */ 3493 if (adapter->wd_active != 0) 3494 adapter->wd_active = wd_active; 3495 3496 rc = ena_device_validate_params(adapter, &get_feat_ctx); 3497 if (rc != 0) { 3498 ena_log(dev, ERR, "Validation of device parameters failed\n"); 3499 goto err_device_destroy; 3500 } 3501 3502 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3503 /* Make sure we don't have a race with AENQ Links state handler */ 3504 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 3505 if_link_state_change(ifp, LINK_STATE_UP); 3506 3507 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3508 if (rc != 0) { 3509 ena_log(dev, ERR, "Enable MSI-X failed\n"); 3510 goto err_device_destroy; 3511 } 3512 3513 /* 3514 * Effective value of used MSIX vectors should be the same as before 3515 * `ena_destroy_device()`, if possible, or closest to it if less vectors 3516 * are available. 3517 */ 3518 if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues) 3519 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3520 3521 /* Re-initialize rings basic information */ 3522 ena_init_io_rings(adapter); 3523 3524 /* If the interface was up before the reset bring it up */ 3525 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) { 3526 rc = ena_up(adapter); 3527 if (rc != 0) { 3528 ena_log(dev, ERR, "Failed to create I/O queues\n"); 3529 goto err_disable_msix; 3530 } 3531 } 3532 3533 /* Indicate that device is running again and ready to work */ 3534 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3535 3536 /* 3537 * As the AENQ handlers weren't executed during reset because 3538 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the 3539 * timestamp must be updated again That will prevent next reset 3540 * caused by missing keep alive. 3541 */ 3542 adapter->keep_alive_timestamp = getsbinuptime(); 3543 ENA_TIMER_RESET(adapter); 3544 3545 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3546 3547 return (rc); 3548 3549 err_disable_msix: 3550 ena_free_mgmnt_irq(adapter); 3551 ena_disable_msix(adapter); 3552 err_device_destroy: 3553 ena_com_abort_admin_commands(ena_dev); 3554 ena_com_wait_for_abort_completion(ena_dev); 3555 ena_com_admin_destroy(ena_dev); 3556 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); 3557 ena_com_mmio_reg_read_request_destroy(ena_dev); 3558 err: 3559 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3560 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3561 ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n"); 3562 3563 return (rc); 3564 } 3565 3566 static void 3567 ena_metrics_task(void *arg, int pending) 3568 { 3569 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3570 3571 ENA_LOCK_LOCK(); 3572 3573 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 3574 (void)ena_copy_customer_metrics(adapter); 3575 else if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS)) 3576 (void)ena_copy_eni_metrics(adapter); 3577 3578 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 3579 (void)ena_copy_srd_metrics(adapter); 3580 3581 ENA_LOCK_UNLOCK(); 3582 } 3583 3584 static void 3585 ena_reset_task(void *arg, int pending) 3586 { 3587 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3588 3589 ENA_LOCK_LOCK(); 3590 if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3591 ena_destroy_device(adapter, false); 3592 ena_restore_device(adapter); 3593 3594 ena_log(adapter->pdev, INFO, 3595 "Device reset completed successfully, Driver info: %s\n", 3596 ena_version); 3597 } 3598 ENA_LOCK_UNLOCK(); 3599 } 3600 3601 static void 3602 ena_free_stats(struct ena_adapter *adapter) 3603 { 3604 ena_free_counters((counter_u64_t *)&adapter->hw_stats, 3605 sizeof(struct ena_hw_stats)); 3606 ena_free_counters((counter_u64_t *)&adapter->dev_stats, 3607 sizeof(struct ena_stats_dev)); 3608 3609 } 3610 /** 3611 * ena_attach - Device Initialization Routine 3612 * @pdev: device information struct 3613 * 3614 * Returns 0 on success, otherwise on failure. 3615 * 3616 * ena_attach initializes an adapter identified by a device structure. 3617 * The OS initialization, configuring of the adapter private structure, 3618 * and a hardware reset occur. 3619 **/ 3620 static int 3621 ena_attach(device_t pdev) 3622 { 3623 struct ena_com_dev_get_features_ctx get_feat_ctx; 3624 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 3625 static int version_printed; 3626 struct ena_adapter *adapter; 3627 struct ena_com_dev *ena_dev = NULL; 3628 uint32_t max_num_io_queues; 3629 int msix_rid; 3630 int rid, rc; 3631 3632 adapter = device_get_softc(pdev); 3633 adapter->pdev = pdev; 3634 adapter->first_bind = -1; 3635 3636 /* 3637 * Set up the timer service - driver is responsible for avoiding 3638 * concurrency, as the callout won't be using any locking inside. 3639 */ 3640 ENA_TIMER_INIT(adapter); 3641 adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO; 3642 adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO; 3643 adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES; 3644 adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD; 3645 3646 adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED; 3647 adapter->irq_cpu_stride = 0; 3648 3649 #ifdef RSS 3650 adapter->rss_enabled = 1; 3651 #endif 3652 3653 if (version_printed++ == 0) 3654 ena_log(pdev, INFO, "%s\n", ena_version); 3655 3656 /* Allocate memory for ena_dev structure */ 3657 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF, 3658 M_WAITOK | M_ZERO); 3659 3660 adapter->ena_dev = ena_dev; 3661 ena_dev->dmadev = pdev; 3662 3663 rid = PCIR_BAR(ENA_REG_BAR); 3664 adapter->memory = NULL; 3665 adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 3666 RF_ACTIVE); 3667 if (unlikely(adapter->registers == NULL)) { 3668 ena_log(pdev, ERR, 3669 "unable to allocate bus resource: registers!\n"); 3670 rc = ENOMEM; 3671 goto err_dev_free; 3672 } 3673 3674 /* MSIx vector table may reside on BAR0 with registers or on BAR1. */ 3675 msix_rid = pci_msix_table_bar(pdev); 3676 if (msix_rid != rid) { 3677 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, 3678 &msix_rid, RF_ACTIVE); 3679 if (unlikely(adapter->msix == NULL)) { 3680 ena_log(pdev, ERR, 3681 "unable to allocate bus resource: msix!\n"); 3682 rc = ENOMEM; 3683 goto err_pci_free; 3684 } 3685 adapter->msix_rid = msix_rid; 3686 } 3687 3688 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF, 3689 M_WAITOK | M_ZERO); 3690 3691 /* Store register resources */ 3692 ((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag( 3693 adapter->registers); 3694 ((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle( 3695 adapter->registers); 3696 3697 if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) { 3698 ena_log(pdev, ERR, "failed to pmap registers bar\n"); 3699 rc = ENXIO; 3700 goto err_bus_free; 3701 } 3702 3703 rc = ena_map_llq_mem_bar(pdev, ena_dev); 3704 if (unlikely(rc != 0)) { 3705 ena_log(pdev, ERR, "Failed to map ENA mem bar"); 3706 goto err_bus_free; 3707 } 3708 3709 /* Initially clear all the flags */ 3710 ENA_FLAG_ZERO(adapter); 3711 3712 /* Device initialization */ 3713 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active); 3714 if (unlikely(rc != 0)) { 3715 ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc); 3716 rc = ENXIO; 3717 goto err_bus_free; 3718 } 3719 3720 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 3721 adapter->disable_meta_caching = !!( 3722 get_feat_ctx.llq.accel_mode.u.get.supported_flags & 3723 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 3724 3725 adapter->keep_alive_timestamp = getsbinuptime(); 3726 3727 adapter->tx_offload_cap = get_feat_ctx.offload.tx; 3728 3729 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr, 3730 ETHER_ADDR_LEN); 3731 3732 calc_queue_ctx.pdev = pdev; 3733 calc_queue_ctx.ena_dev = ena_dev; 3734 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 3735 3736 /* Calculate initial and maximum IO queue number and size */ 3737 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, 3738 &get_feat_ctx); 3739 rc = ena_calc_io_queue_size(&calc_queue_ctx); 3740 if (unlikely((rc != 0) || (max_num_io_queues <= 0))) { 3741 rc = EFAULT; 3742 goto err_com_free; 3743 } 3744 3745 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; 3746 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; 3747 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 3748 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 3749 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 3750 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 3751 3752 adapter->max_num_io_queues = max_num_io_queues; 3753 3754 adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE; 3755 3756 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 3757 3758 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3759 3760 /* set up dma tags for rx and tx buffers */ 3761 rc = ena_setup_tx_dma_tag(adapter); 3762 if (unlikely(rc != 0)) { 3763 ena_log(pdev, ERR, "Failed to create TX DMA tag\n"); 3764 goto err_com_free; 3765 } 3766 3767 rc = ena_setup_rx_dma_tag(adapter); 3768 if (unlikely(rc != 0)) { 3769 ena_log(pdev, ERR, "Failed to create RX DMA tag\n"); 3770 goto err_tx_tag_free; 3771 } 3772 3773 /* 3774 * The amount of requested MSIX vectors is equal to 3775 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant 3776 * number of admin queue interrupts. The former is initially determined 3777 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be 3778 * achieved if there are not enough system resources. By default, the 3779 * number of effectively used IO queues is the same but later on it can 3780 * be limited by the user using sysctl interface. 3781 */ 3782 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3783 if (unlikely(rc != 0)) { 3784 ena_log(pdev, ERR, 3785 "Failed to enable and set the admin interrupts\n"); 3786 goto err_io_free; 3787 } 3788 /* By default all of allocated MSIX vectors are actively used */ 3789 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3790 3791 /* initialize rings basic information */ 3792 ena_init_io_rings(adapter); 3793 3794 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 3795 if (rc) { 3796 ena_log(pdev, ERR, "Failed to allocate customer metrics buffer.\n"); 3797 goto err_msix_free; 3798 } 3799 3800 rc = ena_sysctl_allocate_customer_metrics_buffer(adapter); 3801 if (unlikely(rc)){ 3802 ena_log(pdev, ERR, "Failed to allocate sysctl customer metrics buffer.\n"); 3803 goto err_metrics_buffer_destroy; 3804 } 3805 3806 /* Initialize statistics */ 3807 ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, 3808 sizeof(struct ena_stats_dev)); 3809 ena_alloc_counters((counter_u64_t *)&adapter->hw_stats, 3810 sizeof(struct ena_hw_stats)); 3811 ena_sysctl_add_nodes(adapter); 3812 3813 /* setup network interface */ 3814 rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx); 3815 if (unlikely(rc != 0)) { 3816 ena_log(pdev, ERR, "Error with network interface setup\n"); 3817 goto err_customer_metrics_alloc; 3818 } 3819 3820 /* Initialize reset task queue */ 3821 TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); 3822 adapter->reset_tq = taskqueue_create("ena_reset_enqueue", 3823 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); 3824 taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq", 3825 device_get_nameunit(adapter->pdev)); 3826 3827 /* Initialize metrics task queue */ 3828 TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter); 3829 adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue", 3830 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq); 3831 taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq", 3832 device_get_nameunit(adapter->pdev)); 3833 3834 #ifdef DEV_NETMAP 3835 rc = ena_netmap_attach(adapter); 3836 if (rc != 0) { 3837 ena_log(pdev, ERR, "netmap attach failed: %d\n", rc); 3838 goto err_detach; 3839 } 3840 #endif /* DEV_NETMAP */ 3841 3842 /* Tell the stack that the interface is not active */ 3843 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 3844 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3845 3846 /* Run the timer service */ 3847 ENA_TIMER_RESET(adapter); 3848 3849 return (0); 3850 3851 #ifdef DEV_NETMAP 3852 err_detach: 3853 ether_ifdetach(adapter->ifp); 3854 #endif /* DEV_NETMAP */ 3855 err_customer_metrics_alloc: 3856 free(adapter->customer_metrics_array, M_DEVBUF); 3857 err_metrics_buffer_destroy: 3858 ena_com_delete_customer_metrics_buffer(ena_dev); 3859 err_msix_free: 3860 ena_free_stats(adapter); 3861 ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR); 3862 ena_free_mgmnt_irq(adapter); 3863 ena_disable_msix(adapter); 3864 err_io_free: 3865 ena_free_all_io_rings_resources(adapter); 3866 ena_free_rx_dma_tag(adapter); 3867 err_tx_tag_free: 3868 ena_free_tx_dma_tag(adapter); 3869 err_com_free: 3870 ena_com_admin_destroy(ena_dev); 3871 ena_com_delete_host_info(ena_dev); 3872 ena_com_mmio_reg_read_request_destroy(ena_dev); 3873 err_bus_free: 3874 free(ena_dev->bus, M_DEVBUF); 3875 err_pci_free: 3876 ena_free_pci_resources(adapter); 3877 err_dev_free: 3878 free(ena_dev, M_DEVBUF); 3879 3880 return (rc); 3881 } 3882 3883 /** 3884 * ena_detach - Device Removal Routine 3885 * @pdev: device information struct 3886 * 3887 * ena_detach is called by the device subsystem to alert the driver 3888 * that it should release a PCI device. 3889 **/ 3890 static int 3891 ena_detach(device_t pdev) 3892 { 3893 struct ena_adapter *adapter = device_get_softc(pdev); 3894 struct ena_com_dev *ena_dev = adapter->ena_dev; 3895 int rc; 3896 3897 /* Make sure VLANS are not using driver */ 3898 if (if_vlantrunkinuse(adapter->ifp)) { 3899 ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n"); 3900 return (EBUSY); 3901 } 3902 3903 ether_ifdetach(adapter->ifp); 3904 3905 /* Stop timer service */ 3906 ENA_LOCK_LOCK(); 3907 ENA_TIMER_DRAIN(adapter); 3908 ENA_LOCK_UNLOCK(); 3909 3910 /* Release metrics task */ 3911 while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL)) 3912 taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task); 3913 taskqueue_free(adapter->metrics_tq); 3914 3915 /* Release reset task */ 3916 while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL)) 3917 taskqueue_drain(adapter->reset_tq, &adapter->reset_task); 3918 taskqueue_free(adapter->reset_tq); 3919 3920 ENA_LOCK_LOCK(); 3921 ena_down(adapter); 3922 ena_destroy_device(adapter, true); 3923 ENA_LOCK_UNLOCK(); 3924 3925 /* Restore unregistered sysctl queue nodes. */ 3926 ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues, 3927 adapter->max_num_io_queues); 3928 3929 #ifdef DEV_NETMAP 3930 netmap_detach(adapter->ifp); 3931 #endif /* DEV_NETMAP */ 3932 3933 ena_free_stats(adapter); 3934 3935 rc = ena_free_rx_dma_tag(adapter); 3936 if (unlikely(rc != 0)) 3937 ena_log(adapter->pdev, WARN, 3938 "Unmapped RX DMA tag associations\n"); 3939 3940 rc = ena_free_tx_dma_tag(adapter); 3941 if (unlikely(rc != 0)) 3942 ena_log(adapter->pdev, WARN, 3943 "Unmapped TX DMA tag associations\n"); 3944 3945 ena_free_irqs(adapter); 3946 3947 ena_free_pci_resources(adapter); 3948 3949 if (adapter->rss_indir != NULL) 3950 free(adapter->rss_indir, M_DEVBUF); 3951 3952 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) 3953 ena_com_rss_destroy(ena_dev); 3954 3955 ena_com_delete_host_info(ena_dev); 3956 3957 free(adapter->customer_metrics_array, M_DEVBUF); 3958 3959 ena_com_delete_customer_metrics_buffer(ena_dev); 3960 3961 if_free(adapter->ifp); 3962 3963 free(ena_dev->bus, M_DEVBUF); 3964 3965 free(ena_dev, M_DEVBUF); 3966 3967 return (bus_generic_detach(pdev)); 3968 } 3969 3970 /****************************************************************************** 3971 ******************************** AENQ Handlers ******************************* 3972 *****************************************************************************/ 3973 /** 3974 * ena_update_on_link_change: 3975 * Notify the network interface about the change in link status 3976 **/ 3977 static void 3978 ena_update_on_link_change(void *adapter_data, 3979 struct ena_admin_aenq_entry *aenq_e) 3980 { 3981 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 3982 struct ena_admin_aenq_link_change_desc *aenq_desc; 3983 int status; 3984 if_t ifp; 3985 3986 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3987 ifp = adapter->ifp; 3988 status = aenq_desc->flags & 3989 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; 3990 3991 if (status != 0) { 3992 ena_log(adapter->pdev, INFO, "link is UP\n"); 3993 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter); 3994 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter)) 3995 if_link_state_change(ifp, LINK_STATE_UP); 3996 } else { 3997 ena_log(adapter->pdev, INFO, "link is DOWN\n"); 3998 if_link_state_change(ifp, LINK_STATE_DOWN); 3999 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter); 4000 } 4001 } 4002 4003 static void 4004 ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 4005 { 4006 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4007 struct ena_admin_ena_hw_hints *hints; 4008 4009 ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, 4010 adapter->ena_dev, "Invalid group(%x) expected %x\n", 4011 aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION); 4012 4013 switch (aenq_e->aenq_common_desc.syndrome) { 4014 case ENA_ADMIN_UPDATE_HINTS: 4015 hints = 4016 (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4); 4017 ena_update_hints(adapter, hints); 4018 break; 4019 default: 4020 ena_log(adapter->pdev, ERR, 4021 "Invalid aenq notification link state %d\n", 4022 aenq_e->aenq_common_desc.syndrome); 4023 } 4024 } 4025 4026 static void 4027 ena_lock_init(void *arg) 4028 { 4029 ENA_LOCK_INIT(); 4030 } 4031 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL); 4032 4033 static void 4034 ena_lock_uninit(void *arg) 4035 { 4036 ENA_LOCK_DESTROY(); 4037 } 4038 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL); 4039 4040 /** 4041 * This handler will called for unknown event group or unimplemented handlers 4042 **/ 4043 static void 4044 unimplemented_aenq_handler(void *adapter_data, 4045 struct ena_admin_aenq_entry *aenq_e) 4046 { 4047 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4048 4049 ena_log(adapter->pdev, ERR, 4050 "Unknown event was received or event with unimplemented handler\n"); 4051 } 4052 4053 static struct ena_aenq_handlers aenq_handlers = { 4054 .handlers = { 4055 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4056 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4057 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, 4058 }, 4059 .unimplemented_handler = unimplemented_aenq_handler 4060 }; 4061 4062 /********************************************************************* 4063 * FreeBSD Device Interface Entry Points 4064 *********************************************************************/ 4065 4066 static device_method_t ena_methods[] = { /* Device interface */ 4067 DEVMETHOD(device_probe, ena_probe), 4068 DEVMETHOD(device_attach, ena_attach), 4069 DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END 4070 }; 4071 4072 static driver_t ena_driver = { 4073 "ena", 4074 ena_methods, 4075 sizeof(struct ena_adapter), 4076 }; 4077 4078 DRIVER_MODULE(ena, pci, ena_driver, 0, 0); 4079 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array, 4080 nitems(ena_vendor_info_array) - 1); 4081 MODULE_DEPEND(ena, pci, 1, 1, 1); 4082 MODULE_DEPEND(ena, ether, 1, 1, 1); 4083 #ifdef DEV_NETMAP 4084 MODULE_DEPEND(ena, netmap, 1, 1, 1); 4085 #endif /* DEV_NETMAP */ 4086 4087 /*********************************************************************/ 4088