1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include "opt_rss.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bus.h> 36 #include <sys/endian.h> 37 #include <sys/eventhandler.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/module.h> 43 #include <sys/rman.h> 44 #include <sys/smp.h> 45 #include <sys/socket.h> 46 #include <sys/sockio.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 54 #include <machine/atomic.h> 55 #include <machine/bus.h> 56 #include <machine/in_cksum.h> 57 #include <machine/resource.h> 58 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 62 #include <net/bpf.h> 63 #include <net/ethernet.h> 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_media.h> 68 #include <net/if_types.h> 69 #include <net/if_var.h> 70 #include <net/if_vlan_var.h> 71 #include <netinet/in.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/if_ether.h> 74 #include <netinet/ip.h> 75 #include <netinet/ip6.h> 76 #include <netinet/tcp.h> 77 #include <netinet/udp.h> 78 79 #include "ena.h" 80 #include "ena_datapath.h" 81 #include "ena_rss.h" 82 #include "ena_sysctl.h" 83 84 #ifdef DEV_NETMAP 85 #include "ena_netmap.h" 86 #endif /* DEV_NETMAP */ 87 88 /********************************************************* 89 * Function prototypes 90 *********************************************************/ 91 static int ena_probe(device_t); 92 static void ena_intr_msix_mgmnt(void *); 93 static void ena_free_pci_resources(struct ena_adapter *); 94 static int ena_change_mtu(if_t, int); 95 static inline void ena_alloc_counters(counter_u64_t *, int); 96 static inline void ena_free_counters(counter_u64_t *, int); 97 static inline void ena_reset_counters(counter_u64_t *, int); 98 static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *, 99 uint16_t); 100 static void ena_init_io_rings_basic(struct ena_adapter *); 101 static void ena_init_io_rings_advanced(struct ena_adapter *); 102 static void ena_init_io_rings(struct ena_adapter *); 103 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int); 104 static void ena_free_all_io_rings_resources(struct ena_adapter *); 105 static int ena_setup_tx_dma_tag(struct ena_adapter *); 106 static int ena_free_tx_dma_tag(struct ena_adapter *); 107 static int ena_setup_rx_dma_tag(struct ena_adapter *); 108 static int ena_free_rx_dma_tag(struct ena_adapter *); 109 static void ena_release_all_tx_dmamap(struct ena_ring *); 110 static int ena_setup_tx_resources(struct ena_adapter *, int); 111 static void ena_free_tx_resources(struct ena_adapter *, int); 112 static int ena_setup_all_tx_resources(struct ena_adapter *); 113 static void ena_free_all_tx_resources(struct ena_adapter *); 114 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int); 115 static void ena_free_rx_resources(struct ena_adapter *, unsigned int); 116 static int ena_setup_all_rx_resources(struct ena_adapter *); 117 static void ena_free_all_rx_resources(struct ena_adapter *); 118 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *, 119 struct ena_rx_buffer *); 120 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *, 121 struct ena_rx_buffer *); 122 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int); 123 static void ena_refill_all_rx_bufs(struct ena_adapter *); 124 static void ena_free_all_rx_bufs(struct ena_adapter *); 125 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int); 126 static void ena_free_all_tx_bufs(struct ena_adapter *); 127 static void ena_destroy_all_tx_queues(struct ena_adapter *); 128 static void ena_destroy_all_rx_queues(struct ena_adapter *); 129 static void ena_destroy_all_io_queues(struct ena_adapter *); 130 static int ena_create_io_queues(struct ena_adapter *); 131 static int ena_handle_msix(void *); 132 static int ena_enable_msix(struct ena_adapter *); 133 static void ena_setup_mgmnt_intr(struct ena_adapter *); 134 static int ena_setup_io_intr(struct ena_adapter *); 135 static int ena_request_mgmnt_irq(struct ena_adapter *); 136 static int ena_request_io_irq(struct ena_adapter *); 137 static void ena_free_mgmnt_irq(struct ena_adapter *); 138 static void ena_free_io_irq(struct ena_adapter *); 139 static void ena_free_irqs(struct ena_adapter *); 140 static void ena_disable_msix(struct ena_adapter *); 141 static void ena_unmask_all_io_irqs(struct ena_adapter *); 142 static int ena_up_complete(struct ena_adapter *); 143 static uint64_t ena_get_counter(if_t, ift_counter); 144 static int ena_media_change(if_t); 145 static void ena_media_status(if_t, struct ifmediareq *); 146 static void ena_init(void *); 147 static int ena_ioctl(if_t, u_long, caddr_t); 148 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *); 149 static void ena_update_host_info(struct ena_admin_host_info *, if_t); 150 static void ena_update_hwassist(struct ena_adapter *); 151 static int ena_setup_ifnet(device_t, struct ena_adapter *, 152 struct ena_com_dev_get_features_ctx *); 153 static int ena_enable_wc(device_t, struct resource *); 154 static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *, 155 struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *); 156 static int ena_map_llq_mem_bar(device_t, struct ena_com_dev *); 157 static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *, 158 struct ena_com_dev_get_features_ctx *); 159 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *); 160 static void ena_config_host_info(struct ena_com_dev *, device_t); 161 static int ena_attach(device_t); 162 static int ena_detach(device_t); 163 static int ena_device_init(struct ena_adapter *, device_t, 164 struct ena_com_dev_get_features_ctx *, int *); 165 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *); 166 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *); 167 static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *); 168 static int ena_copy_eni_metrics(struct ena_adapter *); 169 static int ena_copy_customer_metrics(struct ena_adapter *); 170 static void ena_timer_service(void *); 171 172 static char ena_version[] = ENA_DEVICE_NAME ENA_DRV_MODULE_NAME 173 " v" ENA_DRV_MODULE_VERSION; 174 175 static ena_vendor_info_t ena_vendor_info_array[] = { 176 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0 }, 177 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0 }, 178 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0 }, 179 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0 }, 180 /* Last entry */ 181 { 0, 0, 0 } 182 }; 183 184 struct sx ena_global_lock; 185 186 /* 187 * Contains pointers to event handlers, e.g. link state chage. 188 */ 189 static struct ena_aenq_handlers aenq_handlers; 190 191 void 192 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 193 { 194 if (error != 0) 195 return; 196 *(bus_addr_t *)arg = segs[0].ds_addr; 197 } 198 199 int 200 ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma, 201 int mapflags, bus_size_t alignment, int domain) 202 { 203 struct ena_adapter *adapter = device_get_softc(dmadev); 204 device_t pdev = adapter->pdev; 205 uint32_t maxsize; 206 uint64_t dma_space_addr; 207 int error; 208 209 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE; 210 211 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width); 212 if (unlikely(dma_space_addr == 0)) 213 dma_space_addr = BUS_SPACE_MAXADDR; 214 215 error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */ 216 alignment, 0, /* alignment, bounds */ 217 dma_space_addr, /* lowaddr of exclusion window */ 218 BUS_SPACE_MAXADDR, /* highaddr of exclusion window */ 219 NULL, NULL, /* filter, filterarg */ 220 maxsize, /* maxsize */ 221 1, /* nsegments */ 222 maxsize, /* maxsegsize */ 223 BUS_DMA_ALLOCNOW, /* flags */ 224 NULL, /* lockfunc */ 225 NULL, /* lockarg */ 226 &dma->tag); 227 if (unlikely(error != 0)) { 228 ena_log(pdev, ERR, "bus_dma_tag_create failed: %d\n", error); 229 goto fail_tag; 230 } 231 232 error = bus_dma_tag_set_domain(dma->tag, domain); 233 if (unlikely(error != 0)) { 234 ena_log(pdev, ERR, "bus_dma_tag_set_domain failed: %d\n", 235 error); 236 goto fail_map_create; 237 } 238 239 error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr, 240 BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map); 241 if (unlikely(error != 0)) { 242 ena_log(pdev, ERR, "bus_dmamem_alloc(%ju) failed: %d\n", 243 (uintmax_t)size, error); 244 goto fail_map_create; 245 } 246 247 dma->paddr = 0; 248 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, 249 ena_dmamap_callback, &dma->paddr, mapflags); 250 if (unlikely((error != 0) || (dma->paddr == 0))) { 251 ena_log(pdev, ERR, "bus_dmamap_load failed: %d\n", error); 252 goto fail_map_load; 253 } 254 255 bus_dmamap_sync(dma->tag, dma->map, 256 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 257 258 return (0); 259 260 fail_map_load: 261 bus_dmamem_free(dma->tag, dma->vaddr, dma->map); 262 fail_map_create: 263 bus_dma_tag_destroy(dma->tag); 264 fail_tag: 265 dma->tag = NULL; 266 dma->vaddr = NULL; 267 dma->paddr = 0; 268 269 return (error); 270 } 271 272 static void 273 ena_free_pci_resources(struct ena_adapter *adapter) 274 { 275 device_t pdev = adapter->pdev; 276 277 if (adapter->memory != NULL) { 278 bus_release_resource(pdev, SYS_RES_MEMORY, 279 PCIR_BAR(ENA_MEM_BAR), adapter->memory); 280 } 281 282 if (adapter->registers != NULL) { 283 bus_release_resource(pdev, SYS_RES_MEMORY, 284 PCIR_BAR(ENA_REG_BAR), adapter->registers); 285 } 286 287 if (adapter->msix != NULL) { 288 bus_release_resource(pdev, SYS_RES_MEMORY, adapter->msix_rid, 289 adapter->msix); 290 } 291 } 292 293 static int 294 ena_probe(device_t dev) 295 { 296 ena_vendor_info_t *ent; 297 uint16_t pci_vendor_id = 0; 298 uint16_t pci_device_id = 0; 299 300 pci_vendor_id = pci_get_vendor(dev); 301 pci_device_id = pci_get_device(dev); 302 303 ent = ena_vendor_info_array; 304 while (ent->vendor_id != 0) { 305 if ((pci_vendor_id == ent->vendor_id) && 306 (pci_device_id == ent->device_id)) { 307 ena_log_raw(DBG, "vendor=%x device=%x\n", pci_vendor_id, 308 pci_device_id); 309 310 device_set_desc(dev, ENA_DEVICE_DESC); 311 return (BUS_PROBE_DEFAULT); 312 } 313 314 ent++; 315 } 316 317 return (ENXIO); 318 } 319 320 static int 321 ena_change_mtu(if_t ifp, int new_mtu) 322 { 323 struct ena_adapter *adapter = if_getsoftc(ifp); 324 device_t pdev = adapter->pdev; 325 int rc; 326 327 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { 328 ena_log(pdev, ERR, "Invalid MTU setting. new_mtu: %d max mtu: %d min mtu: %d\n", 329 new_mtu, adapter->max_mtu, ENA_MIN_MTU); 330 return (EINVAL); 331 } 332 333 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); 334 if (likely(rc == 0)) { 335 ena_log(pdev, DBG, "set MTU to %d\n", new_mtu); 336 if_setmtu(ifp, new_mtu); 337 } else { 338 ena_log(pdev, ERR, "Failed to set MTU to %d\n", new_mtu); 339 } 340 341 return (rc); 342 } 343 344 static inline void 345 ena_alloc_counters(counter_u64_t *begin, int size) 346 { 347 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 348 349 for (; begin < end; ++begin) 350 *begin = counter_u64_alloc(M_WAITOK); 351 } 352 353 static inline void 354 ena_free_counters(counter_u64_t *begin, int size) 355 { 356 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 357 358 for (; begin < end; ++begin) 359 counter_u64_free(*begin); 360 } 361 362 static inline void 363 ena_reset_counters(counter_u64_t *begin, int size) 364 { 365 counter_u64_t *end = (counter_u64_t *)((char *)begin + size); 366 367 for (; begin < end; ++begin) 368 counter_u64_zero(*begin); 369 } 370 371 static void 372 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring, 373 uint16_t qid) 374 { 375 ring->qid = qid; 376 ring->adapter = adapter; 377 ring->ena_dev = adapter->ena_dev; 378 atomic_store_8(&ring->first_interrupt, 0); 379 ring->no_interrupt_event_cnt = 0; 380 } 381 382 static void 383 ena_init_io_rings_basic(struct ena_adapter *adapter) 384 { 385 struct ena_com_dev *ena_dev; 386 struct ena_ring *txr, *rxr; 387 struct ena_que *que; 388 int i; 389 390 ena_dev = adapter->ena_dev; 391 392 for (i = 0; i < adapter->num_io_queues; i++) { 393 txr = &adapter->tx_ring[i]; 394 rxr = &adapter->rx_ring[i]; 395 396 /* TX/RX common ring state */ 397 ena_init_io_rings_common(adapter, txr, i); 398 ena_init_io_rings_common(adapter, rxr, i); 399 400 /* TX specific ring state */ 401 txr->tx_max_header_size = ena_dev->tx_max_header_size; 402 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; 403 404 que = &adapter->que[i]; 405 que->adapter = adapter; 406 que->id = i; 407 que->tx_ring = txr; 408 que->rx_ring = rxr; 409 410 txr->que = que; 411 rxr->que = que; 412 413 rxr->empty_rx_queue = 0; 414 rxr->rx_mbuf_sz = ena_mbuf_sz; 415 } 416 } 417 418 static void 419 ena_init_io_rings_advanced(struct ena_adapter *adapter) 420 { 421 struct ena_ring *txr, *rxr; 422 int i; 423 424 for (i = 0; i < adapter->num_io_queues; i++) { 425 txr = &adapter->tx_ring[i]; 426 rxr = &adapter->rx_ring[i]; 427 428 /* Allocate a buf ring */ 429 txr->buf_ring_size = adapter->buf_ring_size; 430 txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF, M_WAITOK, 431 &txr->ring_mtx); 432 433 /* Allocate Tx statistics. */ 434 ena_alloc_counters((counter_u64_t *)&txr->tx_stats, 435 sizeof(txr->tx_stats)); 436 txr->tx_last_cleanup_ticks = ticks; 437 438 /* Allocate Rx statistics. */ 439 ena_alloc_counters((counter_u64_t *)&rxr->rx_stats, 440 sizeof(rxr->rx_stats)); 441 442 /* Initialize locks */ 443 snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)", 444 device_get_nameunit(adapter->pdev), i); 445 snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)", 446 device_get_nameunit(adapter->pdev), i); 447 448 mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); 449 } 450 } 451 452 static void 453 ena_init_io_rings(struct ena_adapter *adapter) 454 { 455 /* 456 * IO rings initialization can be divided into the 2 steps: 457 * 1. Initialize variables and fields with initial values and copy 458 * them from adapter/ena_dev (basic) 459 * 2. Allocate mutex, counters and buf_ring (advanced) 460 */ 461 ena_init_io_rings_basic(adapter); 462 ena_init_io_rings_advanced(adapter); 463 } 464 465 static void 466 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid) 467 { 468 struct ena_ring *txr = &adapter->tx_ring[qid]; 469 struct ena_ring *rxr = &adapter->rx_ring[qid]; 470 471 ena_free_counters((counter_u64_t *)&txr->tx_stats, 472 sizeof(txr->tx_stats)); 473 ena_free_counters((counter_u64_t *)&rxr->rx_stats, 474 sizeof(rxr->rx_stats)); 475 476 ENA_RING_MTX_LOCK(txr); 477 drbr_free(txr->br, M_DEVBUF); 478 ENA_RING_MTX_UNLOCK(txr); 479 480 mtx_destroy(&txr->ring_mtx); 481 } 482 483 static void 484 ena_free_all_io_rings_resources(struct ena_adapter *adapter) 485 { 486 int i; 487 488 for (i = 0; i < adapter->num_io_queues; i++) 489 ena_free_io_ring_resources(adapter, i); 490 } 491 492 static int 493 ena_setup_tx_dma_tag(struct ena_adapter *adapter) 494 { 495 int ret; 496 497 /* Create DMA tag for Tx buffers */ 498 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), 499 1, 0, /* alignment, bounds */ 500 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 501 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 502 NULL, NULL, /* filter, filterarg */ 503 ENA_TSO_MAXSIZE, /* maxsize */ 504 adapter->max_tx_sgl_size - 1, /* nsegments */ 505 ENA_TSO_MAXSIZE, /* maxsegsize */ 506 0, /* flags */ 507 NULL, /* lockfunc */ 508 NULL, /* lockfuncarg */ 509 &adapter->tx_buf_tag); 510 511 return (ret); 512 } 513 514 static int 515 ena_free_tx_dma_tag(struct ena_adapter *adapter) 516 { 517 int ret; 518 519 ret = bus_dma_tag_destroy(adapter->tx_buf_tag); 520 521 if (likely(ret == 0)) 522 adapter->tx_buf_tag = NULL; 523 524 return (ret); 525 } 526 527 static int 528 ena_setup_rx_dma_tag(struct ena_adapter *adapter) 529 { 530 int ret; 531 532 /* Create DMA tag for Rx buffers*/ 533 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */ 534 1, 0, /* alignment, bounds */ 535 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */ 536 BUS_SPACE_MAXADDR, /* highaddr of excl window */ 537 NULL, NULL, /* filter, filterarg */ 538 ena_mbuf_sz, /* maxsize */ 539 adapter->max_rx_sgl_size, /* nsegments */ 540 ena_mbuf_sz, /* maxsegsize */ 541 0, /* flags */ 542 NULL, /* lockfunc */ 543 NULL, /* lockarg */ 544 &adapter->rx_buf_tag); 545 546 return (ret); 547 } 548 549 static int 550 ena_free_rx_dma_tag(struct ena_adapter *adapter) 551 { 552 int ret; 553 554 ret = bus_dma_tag_destroy(adapter->rx_buf_tag); 555 556 if (likely(ret == 0)) 557 adapter->rx_buf_tag = NULL; 558 559 return (ret); 560 } 561 562 static void 563 ena_release_all_tx_dmamap(struct ena_ring *tx_ring) 564 { 565 struct ena_adapter *adapter = tx_ring->adapter; 566 struct ena_tx_buffer *tx_info; 567 bus_dma_tag_t tx_tag = adapter->tx_buf_tag; 568 int i; 569 #ifdef DEV_NETMAP 570 struct ena_netmap_tx_info *nm_info; 571 int j; 572 #endif /* DEV_NETMAP */ 573 574 for (i = 0; i < tx_ring->ring_size; ++i) { 575 tx_info = &tx_ring->tx_buffer_info[i]; 576 #ifdef DEV_NETMAP 577 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 578 nm_info = &tx_info->nm_info; 579 for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) { 580 if (nm_info->map_seg[j] != NULL) { 581 bus_dmamap_destroy(tx_tag, 582 nm_info->map_seg[j]); 583 nm_info->map_seg[j] = NULL; 584 } 585 } 586 } 587 #endif /* DEV_NETMAP */ 588 if (tx_info->dmamap != NULL) { 589 bus_dmamap_destroy(tx_tag, tx_info->dmamap); 590 tx_info->dmamap = NULL; 591 } 592 } 593 } 594 595 /** 596 * ena_setup_tx_resources - allocate Tx resources (Descriptors) 597 * @adapter: network interface device structure 598 * @qid: queue index 599 * 600 * Returns 0 on success, otherwise on failure. 601 **/ 602 static int 603 ena_setup_tx_resources(struct ena_adapter *adapter, int qid) 604 { 605 device_t pdev = adapter->pdev; 606 char thread_name[MAXCOMLEN + 1]; 607 struct ena_que *que = &adapter->que[qid]; 608 struct ena_ring *tx_ring = que->tx_ring; 609 cpuset_t *cpu_mask = NULL; 610 int size, i, err; 611 #ifdef DEV_NETMAP 612 bus_dmamap_t *map; 613 int j; 614 615 ena_netmap_reset_tx_ring(adapter, qid); 616 #endif /* DEV_NETMAP */ 617 618 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; 619 620 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 621 if (unlikely(tx_ring->tx_buffer_info == NULL)) 622 return (ENOMEM); 623 624 size = sizeof(uint16_t) * tx_ring->ring_size; 625 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); 626 if (unlikely(tx_ring->free_tx_ids == NULL)) 627 goto err_buf_info_free; 628 629 size = tx_ring->tx_max_header_size; 630 tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF, 631 M_NOWAIT | M_ZERO); 632 if (unlikely(tx_ring->push_buf_intermediate_buf == NULL)) 633 goto err_tx_ids_free; 634 635 /* Req id stack for TX OOO completions */ 636 for (i = 0; i < tx_ring->ring_size; i++) 637 tx_ring->free_tx_ids[i] = i; 638 639 /* Reset TX statistics. */ 640 ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats, 641 sizeof(tx_ring->tx_stats)); 642 643 tx_ring->next_to_use = 0; 644 tx_ring->next_to_clean = 0; 645 tx_ring->acum_pkts = 0; 646 647 /* Make sure that drbr is empty */ 648 ENA_RING_MTX_LOCK(tx_ring); 649 drbr_flush(adapter->ifp, tx_ring->br); 650 ENA_RING_MTX_UNLOCK(tx_ring); 651 652 /* ... and create the buffer DMA maps */ 653 for (i = 0; i < tx_ring->ring_size; i++) { 654 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 655 &tx_ring->tx_buffer_info[i].dmamap); 656 if (unlikely(err != 0)) { 657 ena_log(pdev, ERR, 658 "Unable to create Tx DMA map for buffer %d\n", i); 659 goto err_map_release; 660 } 661 662 #ifdef DEV_NETMAP 663 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 664 map = tx_ring->tx_buffer_info[i].nm_info.map_seg; 665 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 666 err = bus_dmamap_create(adapter->tx_buf_tag, 0, 667 &map[j]); 668 if (unlikely(err != 0)) { 669 ena_log(pdev, ERR, 670 "Unable to create Tx DMA for buffer %d %d\n", 671 i, j); 672 goto err_map_release; 673 } 674 } 675 } 676 #endif /* DEV_NETMAP */ 677 } 678 679 /* Allocate taskqueues */ 680 TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring); 681 tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT, 682 taskqueue_thread_enqueue, &tx_ring->enqueue_tq); 683 if (unlikely(tx_ring->enqueue_tq == NULL)) { 684 ena_log(pdev, ERR, 685 "Unable to create taskqueue for enqueue task\n"); 686 i = tx_ring->ring_size; 687 goto err_map_release; 688 } 689 690 tx_ring->running = true; 691 692 #ifdef RSS 693 cpu_mask = &que->cpu_mask; 694 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 695 device_get_nameunit(adapter->pdev), que->cpu); 696 #else 697 snprintf(thread_name, sizeof(thread_name), "%s txeq %d", 698 device_get_nameunit(adapter->pdev), que->id); 699 #endif 700 taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET, 701 cpu_mask, "%s", thread_name); 702 703 return (0); 704 705 err_map_release: 706 ena_release_all_tx_dmamap(tx_ring); 707 err_tx_ids_free: 708 free(tx_ring->free_tx_ids, M_DEVBUF); 709 tx_ring->free_tx_ids = NULL; 710 err_buf_info_free: 711 free(tx_ring->tx_buffer_info, M_DEVBUF); 712 tx_ring->tx_buffer_info = NULL; 713 714 return (ENOMEM); 715 } 716 717 /** 718 * ena_free_tx_resources - Free Tx Resources per Queue 719 * @adapter: network interface device structure 720 * @qid: queue index 721 * 722 * Free all transmit software resources 723 **/ 724 static void 725 ena_free_tx_resources(struct ena_adapter *adapter, int qid) 726 { 727 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 728 #ifdef DEV_NETMAP 729 struct ena_netmap_tx_info *nm_info; 730 int j; 731 #endif /* DEV_NETMAP */ 732 733 while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL)) 734 taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task); 735 736 taskqueue_free(tx_ring->enqueue_tq); 737 738 ENA_RING_MTX_LOCK(tx_ring); 739 /* Flush buffer ring, */ 740 drbr_flush(adapter->ifp, tx_ring->br); 741 742 /* Free buffer DMA maps, */ 743 for (int i = 0; i < tx_ring->ring_size; i++) { 744 bus_dmamap_sync(adapter->tx_buf_tag, 745 tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE); 746 bus_dmamap_unload(adapter->tx_buf_tag, 747 tx_ring->tx_buffer_info[i].dmamap); 748 bus_dmamap_destroy(adapter->tx_buf_tag, 749 tx_ring->tx_buffer_info[i].dmamap); 750 751 #ifdef DEV_NETMAP 752 if (if_getcapenable(adapter->ifp) & IFCAP_NETMAP) { 753 nm_info = &tx_ring->tx_buffer_info[i].nm_info; 754 for (j = 0; j < ENA_PKT_MAX_BUFS; j++) { 755 if (nm_info->socket_buf_idx[j] != 0) { 756 bus_dmamap_sync(adapter->tx_buf_tag, 757 nm_info->map_seg[j], 758 BUS_DMASYNC_POSTWRITE); 759 ena_netmap_unload(adapter, 760 nm_info->map_seg[j]); 761 } 762 bus_dmamap_destroy(adapter->tx_buf_tag, 763 nm_info->map_seg[j]); 764 nm_info->socket_buf_idx[j] = 0; 765 } 766 } 767 #endif /* DEV_NETMAP */ 768 769 m_freem(tx_ring->tx_buffer_info[i].mbuf); 770 tx_ring->tx_buffer_info[i].mbuf = NULL; 771 } 772 ENA_RING_MTX_UNLOCK(tx_ring); 773 774 /* And free allocated memory. */ 775 free(tx_ring->tx_buffer_info, M_DEVBUF); 776 tx_ring->tx_buffer_info = NULL; 777 778 free(tx_ring->free_tx_ids, M_DEVBUF); 779 tx_ring->free_tx_ids = NULL; 780 781 free(tx_ring->push_buf_intermediate_buf, M_DEVBUF); 782 tx_ring->push_buf_intermediate_buf = NULL; 783 } 784 785 /** 786 * ena_setup_all_tx_resources - allocate all queues Tx resources 787 * @adapter: network interface device structure 788 * 789 * Returns 0 on success, otherwise on failure. 790 **/ 791 static int 792 ena_setup_all_tx_resources(struct ena_adapter *adapter) 793 { 794 int i, rc; 795 796 for (i = 0; i < adapter->num_io_queues; i++) { 797 rc = ena_setup_tx_resources(adapter, i); 798 if (rc != 0) { 799 ena_log(adapter->pdev, ERR, 800 "Allocation for Tx Queue %u failed\n", i); 801 goto err_setup_tx; 802 } 803 } 804 805 return (0); 806 807 err_setup_tx: 808 /* Rewind the index freeing the rings as we go */ 809 while (i--) 810 ena_free_tx_resources(adapter, i); 811 return (rc); 812 } 813 814 /** 815 * ena_free_all_tx_resources - Free Tx Resources for All Queues 816 * @adapter: network interface device structure 817 * 818 * Free all transmit software resources 819 **/ 820 static void 821 ena_free_all_tx_resources(struct ena_adapter *adapter) 822 { 823 int i; 824 825 for (i = 0; i < adapter->num_io_queues; i++) 826 ena_free_tx_resources(adapter, i); 827 } 828 829 /** 830 * ena_setup_rx_resources - allocate Rx resources (Descriptors) 831 * @adapter: network interface device structure 832 * @qid: queue index 833 * 834 * Returns 0 on success, otherwise on failure. 835 **/ 836 static int 837 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid) 838 { 839 device_t pdev = adapter->pdev; 840 struct ena_que *que = &adapter->que[qid]; 841 struct ena_ring *rx_ring = que->rx_ring; 842 int size, err, i; 843 844 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size; 845 846 #ifdef DEV_NETMAP 847 ena_netmap_reset_rx_ring(adapter, qid); 848 rx_ring->initialized = false; 849 #endif /* DEV_NETMAP */ 850 851 /* 852 * Alloc extra element so in rx path 853 * we can always prefetch rx_info + 1 854 */ 855 size += sizeof(struct ena_rx_buffer); 856 857 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); 858 859 size = sizeof(uint16_t) * rx_ring->ring_size; 860 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK); 861 862 for (i = 0; i < rx_ring->ring_size; i++) 863 rx_ring->free_rx_ids[i] = i; 864 865 /* Reset RX statistics. */ 866 ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats, 867 sizeof(rx_ring->rx_stats)); 868 869 rx_ring->next_to_clean = 0; 870 rx_ring->next_to_use = 0; 871 872 /* ... and create the buffer DMA maps */ 873 for (i = 0; i < rx_ring->ring_size; i++) { 874 err = bus_dmamap_create(adapter->rx_buf_tag, 0, 875 &(rx_ring->rx_buffer_info[i].map)); 876 if (err != 0) { 877 ena_log(pdev, ERR, 878 "Unable to create Rx DMA map for buffer %d\n", i); 879 goto err_buf_info_unmap; 880 } 881 } 882 883 /* Create LRO for the ring */ 884 if ((if_getcapenable(adapter->ifp) & IFCAP_LRO) != 0) { 885 int err = tcp_lro_init(&rx_ring->lro); 886 if (err != 0) { 887 ena_log(pdev, ERR, "LRO[%d] Initialization failed!\n", 888 qid); 889 } else { 890 ena_log(pdev, DBG, "RX Soft LRO[%d] Initialized\n", 891 qid); 892 rx_ring->lro.ifp = adapter->ifp; 893 } 894 } 895 896 return (0); 897 898 err_buf_info_unmap: 899 while (i--) { 900 bus_dmamap_destroy(adapter->rx_buf_tag, 901 rx_ring->rx_buffer_info[i].map); 902 } 903 904 free(rx_ring->free_rx_ids, M_DEVBUF); 905 rx_ring->free_rx_ids = NULL; 906 free(rx_ring->rx_buffer_info, M_DEVBUF); 907 rx_ring->rx_buffer_info = NULL; 908 return (ENOMEM); 909 } 910 911 /** 912 * ena_free_rx_resources - Free Rx Resources 913 * @adapter: network interface device structure 914 * @qid: queue index 915 * 916 * Free all receive software resources 917 **/ 918 static void 919 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid) 920 { 921 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 922 923 /* Free buffer DMA maps, */ 924 for (int i = 0; i < rx_ring->ring_size; i++) { 925 bus_dmamap_sync(adapter->rx_buf_tag, 926 rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD); 927 m_freem(rx_ring->rx_buffer_info[i].mbuf); 928 rx_ring->rx_buffer_info[i].mbuf = NULL; 929 bus_dmamap_unload(adapter->rx_buf_tag, 930 rx_ring->rx_buffer_info[i].map); 931 bus_dmamap_destroy(adapter->rx_buf_tag, 932 rx_ring->rx_buffer_info[i].map); 933 } 934 935 /* free LRO resources, */ 936 tcp_lro_free(&rx_ring->lro); 937 938 /* free allocated memory */ 939 free(rx_ring->rx_buffer_info, M_DEVBUF); 940 rx_ring->rx_buffer_info = NULL; 941 942 free(rx_ring->free_rx_ids, M_DEVBUF); 943 rx_ring->free_rx_ids = NULL; 944 } 945 946 /** 947 * ena_setup_all_rx_resources - allocate all queues Rx resources 948 * @adapter: network interface device structure 949 * 950 * Returns 0 on success, otherwise on failure. 951 **/ 952 static int 953 ena_setup_all_rx_resources(struct ena_adapter *adapter) 954 { 955 int i, rc = 0; 956 957 for (i = 0; i < adapter->num_io_queues; i++) { 958 rc = ena_setup_rx_resources(adapter, i); 959 if (rc != 0) { 960 ena_log(adapter->pdev, ERR, 961 "Allocation for Rx Queue %u failed\n", i); 962 goto err_setup_rx; 963 } 964 } 965 return (0); 966 967 err_setup_rx: 968 /* rewind the index freeing the rings as we go */ 969 while (i--) 970 ena_free_rx_resources(adapter, i); 971 return (rc); 972 } 973 974 /** 975 * ena_free_all_rx_resources - Free Rx resources for all queues 976 * @adapter: network interface device structure 977 * 978 * Free all receive software resources 979 **/ 980 static void 981 ena_free_all_rx_resources(struct ena_adapter *adapter) 982 { 983 int i; 984 985 for (i = 0; i < adapter->num_io_queues; i++) 986 ena_free_rx_resources(adapter, i); 987 } 988 989 static inline int 990 ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 991 struct ena_rx_buffer *rx_info) 992 { 993 device_t pdev = adapter->pdev; 994 struct ena_com_buf *ena_buf; 995 bus_dma_segment_t segs[1]; 996 int nsegs, error; 997 int mlen; 998 999 /* if previous allocated frag is not used */ 1000 if (unlikely(rx_info->mbuf != NULL)) 1001 return (0); 1002 1003 /* Get mbuf using UMA allocator */ 1004 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, 1005 rx_ring->rx_mbuf_sz); 1006 1007 if (unlikely(rx_info->mbuf == NULL)) { 1008 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1); 1009 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 1010 if (unlikely(rx_info->mbuf == NULL)) { 1011 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); 1012 return (ENOMEM); 1013 } 1014 mlen = MCLBYTES; 1015 } else { 1016 mlen = rx_ring->rx_mbuf_sz; 1017 } 1018 /* Set mbuf length*/ 1019 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen; 1020 1021 /* Map packets for DMA */ 1022 ena_log(pdev, DBG, 1023 "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n", 1024 adapter->rx_buf_tag, rx_info->mbuf, rx_info->mbuf->m_len); 1025 error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map, 1026 rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 1027 if (unlikely((error != 0) || (nsegs != 1))) { 1028 ena_log(pdev, WARN, 1029 "failed to map mbuf, error: %d, nsegs: %d\n", error, nsegs); 1030 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1); 1031 goto exit; 1032 } 1033 1034 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD); 1035 1036 ena_buf = &rx_info->ena_buf; 1037 ena_buf->paddr = segs[0].ds_addr; 1038 ena_buf->len = mlen; 1039 1040 ena_log(pdev, DBG, 1041 "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n", 1042 rx_info->mbuf, rx_info, ena_buf->len, (uintmax_t)ena_buf->paddr); 1043 1044 return (0); 1045 1046 exit: 1047 m_freem(rx_info->mbuf); 1048 rx_info->mbuf = NULL; 1049 return (EFAULT); 1050 } 1051 1052 static void 1053 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, 1054 struct ena_rx_buffer *rx_info) 1055 { 1056 if (rx_info->mbuf == NULL) { 1057 ena_log(adapter->pdev, WARN, 1058 "Trying to free unallocated buffer\n"); 1059 return; 1060 } 1061 1062 bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, 1063 BUS_DMASYNC_POSTREAD); 1064 bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map); 1065 m_freem(rx_info->mbuf); 1066 rx_info->mbuf = NULL; 1067 } 1068 1069 /** 1070 * ena_refill_rx_bufs - Refills ring with descriptors 1071 * @rx_ring: the ring which we want to feed with free descriptors 1072 * @num: number of descriptors to refill 1073 * Refills the ring with newly allocated DMA-mapped mbufs for receiving 1074 **/ 1075 int 1076 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) 1077 { 1078 struct ena_adapter *adapter = rx_ring->adapter; 1079 device_t pdev = adapter->pdev; 1080 uint16_t next_to_use, req_id; 1081 uint32_t i; 1082 int rc; 1083 1084 ena_log_io(adapter->pdev, DBG, "refill qid: %d\n", rx_ring->qid); 1085 1086 next_to_use = rx_ring->next_to_use; 1087 1088 for (i = 0; i < num; i++) { 1089 struct ena_rx_buffer *rx_info; 1090 1091 ena_log_io(pdev, DBG, "RX buffer - next to use: %d\n", 1092 next_to_use); 1093 1094 req_id = rx_ring->free_rx_ids[next_to_use]; 1095 rx_info = &rx_ring->rx_buffer_info[req_id]; 1096 #ifdef DEV_NETMAP 1097 if (ena_rx_ring_in_netmap(adapter, rx_ring->qid)) 1098 rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, 1099 rx_info); 1100 else 1101 #endif /* DEV_NETMAP */ 1102 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); 1103 if (unlikely(rc != 0)) { 1104 ena_log_io(pdev, WARN, 1105 "failed to alloc buffer for rx queue %d\n", 1106 rx_ring->qid); 1107 break; 1108 } 1109 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, 1110 &rx_info->ena_buf, req_id); 1111 if (unlikely(rc != 0)) { 1112 ena_log_io(pdev, WARN, 1113 "failed to add buffer for rx queue %d\n", 1114 rx_ring->qid); 1115 break; 1116 } 1117 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, 1118 rx_ring->ring_size); 1119 } 1120 1121 if (unlikely(i < num)) { 1122 counter_u64_add(rx_ring->rx_stats.refil_partial, 1); 1123 ena_log_io(pdev, WARN, 1124 "refilled rx qid %d with only %d mbufs (from %d)\n", 1125 rx_ring->qid, i, num); 1126 } 1127 1128 if (likely(i != 0)) 1129 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); 1130 1131 rx_ring->next_to_use = next_to_use; 1132 return (i); 1133 } 1134 1135 int 1136 ena_update_buf_ring_size(struct ena_adapter *adapter, 1137 uint32_t new_buf_ring_size) 1138 { 1139 uint32_t old_buf_ring_size; 1140 int rc = 0; 1141 bool dev_was_up; 1142 1143 old_buf_ring_size = adapter->buf_ring_size; 1144 adapter->buf_ring_size = new_buf_ring_size; 1145 1146 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1147 ena_down(adapter); 1148 1149 /* Reconfigure buf ring for all Tx rings. */ 1150 ena_free_all_io_rings_resources(adapter); 1151 ena_init_io_rings_advanced(adapter); 1152 if (dev_was_up) { 1153 /* 1154 * If ena_up() fails, it's not because of recent buf_ring size 1155 * changes. Because of that, we just want to revert old drbr 1156 * value and trigger the reset because something else had to 1157 * go wrong. 1158 */ 1159 rc = ena_up(adapter); 1160 if (unlikely(rc != 0)) { 1161 ena_log(adapter->pdev, ERR, 1162 "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n", 1163 new_buf_ring_size, old_buf_ring_size); 1164 1165 /* Revert old size and trigger the reset */ 1166 adapter->buf_ring_size = old_buf_ring_size; 1167 ena_free_all_io_rings_resources(adapter); 1168 ena_init_io_rings_advanced(adapter); 1169 1170 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, 1171 adapter); 1172 ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER); 1173 } 1174 } 1175 1176 return (rc); 1177 } 1178 1179 int 1180 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size, 1181 uint32_t new_rx_size) 1182 { 1183 uint32_t old_tx_size, old_rx_size; 1184 int rc = 0; 1185 bool dev_was_up; 1186 1187 old_tx_size = adapter->requested_tx_ring_size; 1188 old_rx_size = adapter->requested_rx_ring_size; 1189 adapter->requested_tx_ring_size = new_tx_size; 1190 adapter->requested_rx_ring_size = new_rx_size; 1191 1192 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1193 ena_down(adapter); 1194 1195 /* Configure queues with new size. */ 1196 ena_init_io_rings_basic(adapter); 1197 if (dev_was_up) { 1198 rc = ena_up(adapter); 1199 if (unlikely(rc != 0)) { 1200 ena_log(adapter->pdev, ERR, 1201 "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n", 1202 new_tx_size, new_rx_size, old_tx_size, old_rx_size); 1203 1204 /* Revert old size. */ 1205 adapter->requested_tx_ring_size = old_tx_size; 1206 adapter->requested_rx_ring_size = old_rx_size; 1207 ena_init_io_rings_basic(adapter); 1208 1209 /* And try again. */ 1210 rc = ena_up(adapter); 1211 if (unlikely(rc != 0)) { 1212 ena_log(adapter->pdev, ERR, 1213 "Failed to revert old queue sizes. Triggering device reset.\n"); 1214 /* 1215 * If we've failed again, something had to go 1216 * wrong. After reset, the device should try to 1217 * go up 1218 */ 1219 ENA_FLAG_SET_ATOMIC( 1220 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1221 ena_trigger_reset(adapter, 1222 ENA_REGS_RESET_OS_TRIGGER); 1223 } 1224 } 1225 } 1226 1227 return (rc); 1228 } 1229 1230 static void 1231 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num) 1232 { 1233 ena_free_all_io_rings_resources(adapter); 1234 /* Force indirection table to be reinitialized */ 1235 ena_com_rss_destroy(adapter->ena_dev); 1236 1237 adapter->num_io_queues = num; 1238 ena_init_io_rings(adapter); 1239 } 1240 1241 int 1242 ena_update_base_cpu(struct ena_adapter *adapter, int new_num) 1243 { 1244 int old_num; 1245 int rc = 0; 1246 bool dev_was_up; 1247 1248 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1249 old_num = adapter->irq_cpu_base; 1250 1251 ena_down(adapter); 1252 1253 adapter->irq_cpu_base = new_num; 1254 1255 if (dev_was_up) { 1256 rc = ena_up(adapter); 1257 if (unlikely(rc != 0)) { 1258 ena_log(adapter->pdev, ERR, 1259 "Failed to configure device %d IRQ base CPU. " 1260 "Reverting to previous value: %d\n", 1261 new_num, old_num); 1262 1263 adapter->irq_cpu_base = old_num; 1264 1265 rc = ena_up(adapter); 1266 if (unlikely(rc != 0)) { 1267 ena_log(adapter->pdev, ERR, 1268 "Failed to revert to previous setup." 1269 "Triggering device reset.\n"); 1270 ENA_FLAG_SET_ATOMIC( 1271 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1272 ena_trigger_reset(adapter, 1273 ENA_REGS_RESET_OS_TRIGGER); 1274 } 1275 } 1276 } 1277 return (rc); 1278 } 1279 1280 int 1281 ena_update_cpu_stride(struct ena_adapter *adapter, uint32_t new_num) 1282 { 1283 uint32_t old_num; 1284 int rc = 0; 1285 bool dev_was_up; 1286 1287 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1288 old_num = adapter->irq_cpu_stride; 1289 1290 ena_down(adapter); 1291 1292 adapter->irq_cpu_stride = new_num; 1293 1294 if (dev_was_up) { 1295 rc = ena_up(adapter); 1296 if (unlikely(rc != 0)) { 1297 ena_log(adapter->pdev, ERR, 1298 "Failed to configure device %d IRQ CPU stride. " 1299 "Reverting to previous value: %d\n", 1300 new_num, old_num); 1301 1302 adapter->irq_cpu_stride = old_num; 1303 1304 rc = ena_up(adapter); 1305 if (unlikely(rc != 0)) { 1306 ena_log(adapter->pdev, ERR, 1307 "Failed to revert to previous setup." 1308 "Triggering device reset.\n"); 1309 ENA_FLAG_SET_ATOMIC( 1310 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1311 ena_trigger_reset(adapter, 1312 ENA_REGS_RESET_OS_TRIGGER); 1313 } 1314 } 1315 } 1316 return (rc); 1317 } 1318 1319 /* Caller should sanitize new_num */ 1320 int 1321 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num) 1322 { 1323 uint32_t old_num; 1324 int rc = 0; 1325 bool dev_was_up; 1326 1327 dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 1328 old_num = adapter->num_io_queues; 1329 ena_down(adapter); 1330 1331 ena_update_io_rings(adapter, new_num); 1332 1333 if (dev_was_up) { 1334 rc = ena_up(adapter); 1335 if (unlikely(rc != 0)) { 1336 ena_log(adapter->pdev, ERR, 1337 "Failed to configure device with %u IO queues. " 1338 "Reverting to previous value: %u\n", 1339 new_num, old_num); 1340 1341 ena_update_io_rings(adapter, old_num); 1342 1343 rc = ena_up(adapter); 1344 if (unlikely(rc != 0)) { 1345 ena_log(adapter->pdev, ERR, 1346 "Failed to revert to previous setup IO " 1347 "queues. Triggering device reset.\n"); 1348 ENA_FLAG_SET_ATOMIC( 1349 ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 1350 ena_trigger_reset(adapter, 1351 ENA_REGS_RESET_OS_TRIGGER); 1352 } 1353 } 1354 } 1355 1356 return (rc); 1357 } 1358 1359 static void 1360 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid) 1361 { 1362 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 1363 unsigned int i; 1364 1365 for (i = 0; i < rx_ring->ring_size; i++) { 1366 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; 1367 1368 if (rx_info->mbuf != NULL) 1369 ena_free_rx_mbuf(adapter, rx_ring, rx_info); 1370 #ifdef DEV_NETMAP 1371 if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) && 1372 (if_getcapenable(adapter->ifp) & IFCAP_NETMAP)) { 1373 if (rx_info->netmap_buf_idx != 0) 1374 ena_netmap_free_rx_slot(adapter, rx_ring, 1375 rx_info); 1376 } 1377 #endif /* DEV_NETMAP */ 1378 } 1379 } 1380 1381 /** 1382 * ena_refill_all_rx_bufs - allocate all queues Rx buffers 1383 * @adapter: network interface device structure 1384 * 1385 */ 1386 static void 1387 ena_refill_all_rx_bufs(struct ena_adapter *adapter) 1388 { 1389 struct ena_ring *rx_ring; 1390 int i, rc, bufs_num; 1391 1392 for (i = 0; i < adapter->num_io_queues; i++) { 1393 rx_ring = &adapter->rx_ring[i]; 1394 bufs_num = rx_ring->ring_size - 1; 1395 rc = ena_refill_rx_bufs(rx_ring, bufs_num); 1396 if (unlikely(rc != bufs_num)) 1397 ena_log_io(adapter->pdev, WARN, 1398 "refilling Queue %d failed. " 1399 "Allocated %d buffers from: %d\n", 1400 i, rc, bufs_num); 1401 #ifdef DEV_NETMAP 1402 rx_ring->initialized = true; 1403 #endif /* DEV_NETMAP */ 1404 } 1405 } 1406 1407 static void 1408 ena_free_all_rx_bufs(struct ena_adapter *adapter) 1409 { 1410 int i; 1411 1412 for (i = 0; i < adapter->num_io_queues; i++) 1413 ena_free_rx_bufs(adapter, i); 1414 } 1415 1416 /** 1417 * ena_free_tx_bufs - Free Tx Buffers per Queue 1418 * @adapter: network interface device structure 1419 * @qid: queue index 1420 **/ 1421 static void 1422 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) 1423 { 1424 bool print_once = true; 1425 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 1426 1427 ENA_RING_MTX_LOCK(tx_ring); 1428 for (int i = 0; i < tx_ring->ring_size; i++) { 1429 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; 1430 1431 if (tx_info->mbuf == NULL) 1432 continue; 1433 1434 if (print_once) { 1435 ena_log(adapter->pdev, WARN, 1436 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1437 i); 1438 print_once = false; 1439 } else { 1440 ena_log(adapter->pdev, DBG, 1441 "free uncompleted tx mbuf qid %d idx 0x%x\n", qid, 1442 i); 1443 } 1444 1445 bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, 1446 BUS_DMASYNC_POSTWRITE); 1447 bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); 1448 1449 m_free(tx_info->mbuf); 1450 tx_info->mbuf = NULL; 1451 } 1452 ENA_RING_MTX_UNLOCK(tx_ring); 1453 } 1454 1455 static void 1456 ena_free_all_tx_bufs(struct ena_adapter *adapter) 1457 { 1458 for (int i = 0; i < adapter->num_io_queues; i++) 1459 ena_free_tx_bufs(adapter, i); 1460 } 1461 1462 static void 1463 ena_destroy_all_tx_queues(struct ena_adapter *adapter) 1464 { 1465 uint16_t ena_qid; 1466 int i; 1467 1468 for (i = 0; i < adapter->num_io_queues; i++) { 1469 ena_qid = ENA_IO_TXQ_IDX(i); 1470 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1471 } 1472 } 1473 1474 static void 1475 ena_destroy_all_rx_queues(struct ena_adapter *adapter) 1476 { 1477 uint16_t ena_qid; 1478 int i; 1479 1480 for (i = 0; i < adapter->num_io_queues; i++) { 1481 ena_qid = ENA_IO_RXQ_IDX(i); 1482 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1483 } 1484 } 1485 1486 static void 1487 ena_destroy_all_io_queues(struct ena_adapter *adapter) 1488 { 1489 struct ena_que *queue; 1490 int i; 1491 1492 for (i = 0; i < adapter->num_io_queues; i++) { 1493 queue = &adapter->que[i]; 1494 while (taskqueue_cancel(queue->cleanup_tq, &queue->cleanup_task, NULL)) 1495 taskqueue_drain(queue->cleanup_tq, &queue->cleanup_task); 1496 taskqueue_free(queue->cleanup_tq); 1497 } 1498 1499 ena_destroy_all_tx_queues(adapter); 1500 ena_destroy_all_rx_queues(adapter); 1501 } 1502 1503 static int 1504 ena_create_io_queues(struct ena_adapter *adapter) 1505 { 1506 struct ena_com_dev *ena_dev = adapter->ena_dev; 1507 struct ena_com_create_io_ctx ctx; 1508 struct ena_ring *ring; 1509 struct ena_que *queue; 1510 uint16_t ena_qid; 1511 uint32_t msix_vector; 1512 cpuset_t *cpu_mask = NULL; 1513 int rc, i; 1514 1515 /* Create TX queues */ 1516 for (i = 0; i < adapter->num_io_queues; i++) { 1517 msix_vector = ENA_IO_IRQ_IDX(i); 1518 ena_qid = ENA_IO_TXQ_IDX(i); 1519 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 1520 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 1521 ctx.queue_size = adapter->requested_tx_ring_size; 1522 ctx.msix_vector = msix_vector; 1523 ctx.qid = ena_qid; 1524 ctx.numa_node = adapter->que[i].domain; 1525 1526 rc = ena_com_create_io_queue(ena_dev, &ctx); 1527 if (rc != 0) { 1528 ena_log(adapter->pdev, ERR, 1529 "Failed to create io TX queue #%d rc: %d\n", i, rc); 1530 goto err_tx; 1531 } 1532 ring = &adapter->tx_ring[i]; 1533 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1534 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1535 if (rc != 0) { 1536 ena_log(adapter->pdev, ERR, 1537 "Failed to get TX queue handlers. TX queue num" 1538 " %d rc: %d\n", 1539 i, rc); 1540 ena_com_destroy_io_queue(ena_dev, ena_qid); 1541 goto err_tx; 1542 } 1543 1544 if (ctx.numa_node >= 0) { 1545 ena_com_update_numa_node(ring->ena_com_io_cq, 1546 ctx.numa_node); 1547 } 1548 } 1549 1550 /* Create RX queues */ 1551 for (i = 0; i < adapter->num_io_queues; i++) { 1552 msix_vector = ENA_IO_IRQ_IDX(i); 1553 ena_qid = ENA_IO_RXQ_IDX(i); 1554 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 1555 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 1556 ctx.queue_size = adapter->requested_rx_ring_size; 1557 ctx.msix_vector = msix_vector; 1558 ctx.qid = ena_qid; 1559 ctx.numa_node = adapter->que[i].domain; 1560 1561 rc = ena_com_create_io_queue(ena_dev, &ctx); 1562 if (unlikely(rc != 0)) { 1563 ena_log(adapter->pdev, ERR, 1564 "Failed to create io RX queue[%d] rc: %d\n", i, rc); 1565 goto err_rx; 1566 } 1567 1568 ring = &adapter->rx_ring[i]; 1569 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 1570 &ring->ena_com_io_sq, &ring->ena_com_io_cq); 1571 if (unlikely(rc != 0)) { 1572 ena_log(adapter->pdev, ERR, 1573 "Failed to get RX queue handlers. RX queue num" 1574 " %d rc: %d\n", 1575 i, rc); 1576 ena_com_destroy_io_queue(ena_dev, ena_qid); 1577 goto err_rx; 1578 } 1579 1580 if (ctx.numa_node >= 0) { 1581 ena_com_update_numa_node(ring->ena_com_io_cq, 1582 ctx.numa_node); 1583 } 1584 } 1585 1586 for (i = 0; i < adapter->num_io_queues; i++) { 1587 queue = &adapter->que[i]; 1588 1589 NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue); 1590 queue->cleanup_tq = taskqueue_create_fast("ena cleanup", 1591 M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq); 1592 1593 #ifdef RSS 1594 cpu_mask = &queue->cpu_mask; 1595 #endif 1596 taskqueue_start_threads_cpuset(&queue->cleanup_tq, 1, PI_NET, 1597 cpu_mask, "%s queue %d cleanup", 1598 device_get_nameunit(adapter->pdev), i); 1599 } 1600 1601 return (0); 1602 1603 err_rx: 1604 while (i--) 1605 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); 1606 i = adapter->num_io_queues; 1607 err_tx: 1608 while (i--) 1609 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 1610 1611 return (ENXIO); 1612 } 1613 1614 /********************************************************************* 1615 * 1616 * MSIX & Interrupt Service routine 1617 * 1618 **********************************************************************/ 1619 1620 /** 1621 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue 1622 * @arg: interrupt number 1623 **/ 1624 static void 1625 ena_intr_msix_mgmnt(void *arg) 1626 { 1627 struct ena_adapter *adapter = (struct ena_adapter *)arg; 1628 1629 ena_com_admin_q_comp_intr_handler(adapter->ena_dev); 1630 if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) 1631 ena_com_aenq_intr_handler(adapter->ena_dev, arg); 1632 } 1633 1634 /** 1635 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx 1636 * @arg: queue 1637 **/ 1638 static int 1639 ena_handle_msix(void *arg) 1640 { 1641 struct ena_que *queue = arg; 1642 struct ena_adapter *adapter = queue->adapter; 1643 if_t ifp = adapter->ifp; 1644 1645 if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) 1646 return (FILTER_STRAY); 1647 1648 taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task); 1649 1650 return (FILTER_HANDLED); 1651 } 1652 1653 static int 1654 ena_enable_msix(struct ena_adapter *adapter) 1655 { 1656 device_t dev = adapter->pdev; 1657 int msix_vecs, msix_req; 1658 int i, rc = 0; 1659 1660 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 1661 ena_log(dev, ERR, "Error, MSI-X is already enabled\n"); 1662 return (EINVAL); 1663 } 1664 1665 /* Reserved the max msix vectors we might need */ 1666 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); 1667 1668 adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry), 1669 M_DEVBUF, M_WAITOK | M_ZERO); 1670 1671 ena_log(dev, DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs); 1672 1673 for (i = 0; i < msix_vecs; i++) { 1674 adapter->msix_entries[i].entry = i; 1675 /* Vectors must start from 1 */ 1676 adapter->msix_entries[i].vector = i + 1; 1677 } 1678 1679 msix_req = msix_vecs; 1680 rc = pci_alloc_msix(dev, &msix_vecs); 1681 if (unlikely(rc != 0)) { 1682 ena_log(dev, ERR, "Failed to enable MSIX, vectors %d rc %d\n", 1683 msix_vecs, rc); 1684 1685 rc = ENOSPC; 1686 goto err_msix_free; 1687 } 1688 1689 if (msix_vecs != msix_req) { 1690 if (msix_vecs == ENA_ADMIN_MSIX_VEC) { 1691 ena_log(dev, ERR, 1692 "Not enough number of MSI-x allocated: %d\n", 1693 msix_vecs); 1694 pci_release_msi(dev); 1695 rc = ENOSPC; 1696 goto err_msix_free; 1697 } 1698 ena_log(dev, ERR, 1699 "Enable only %d MSI-x (out of %d), reduce " 1700 "the number of queues\n", 1701 msix_vecs, msix_req); 1702 } 1703 1704 adapter->msix_vecs = msix_vecs; 1705 ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 1706 1707 return (0); 1708 1709 err_msix_free: 1710 free(adapter->msix_entries, M_DEVBUF); 1711 adapter->msix_entries = NULL; 1712 1713 return (rc); 1714 } 1715 1716 static void 1717 ena_setup_mgmnt_intr(struct ena_adapter *adapter) 1718 { 1719 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE, 1720 "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev)); 1721 /* 1722 * Handler is NULL on purpose, it will be set 1723 * when mgmnt interrupt is acquired 1724 */ 1725 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL; 1726 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; 1727 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = 1728 adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; 1729 } 1730 1731 static int 1732 ena_setup_io_intr(struct ena_adapter *adapter) 1733 { 1734 #ifdef RSS 1735 int num_buckets = rss_getnumbuckets(); 1736 static int last_bind = 0; 1737 int cur_bind; 1738 int idx; 1739 #endif 1740 int irq_idx; 1741 1742 if (adapter->msix_entries == NULL) 1743 return (EINVAL); 1744 1745 #ifdef RSS 1746 if (adapter->first_bind < 0) { 1747 adapter->first_bind = last_bind; 1748 last_bind = (last_bind + adapter->num_io_queues) % num_buckets; 1749 } 1750 cur_bind = adapter->first_bind; 1751 #endif 1752 1753 for (int i = 0; i < adapter->num_io_queues; i++) { 1754 irq_idx = ENA_IO_IRQ_IDX(i); 1755 1756 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, 1757 "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i); 1758 adapter->irq_tbl[irq_idx].handler = ena_handle_msix; 1759 adapter->irq_tbl[irq_idx].data = &adapter->que[i]; 1760 adapter->irq_tbl[irq_idx].vector = 1761 adapter->msix_entries[irq_idx].vector; 1762 ena_log(adapter->pdev, DBG, "ena_setup_io_intr vector: %d\n", 1763 adapter->msix_entries[irq_idx].vector); 1764 1765 if (adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1766 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1767 (unsigned)(adapter->irq_cpu_base + 1768 i * adapter->irq_cpu_stride) % (unsigned)mp_ncpus; 1769 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1770 } 1771 1772 #ifdef RSS 1773 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = 1774 rss_getcpu(cur_bind); 1775 cur_bind = (cur_bind + 1) % num_buckets; 1776 CPU_SETOF(adapter->que[i].cpu, &adapter->que[i].cpu_mask); 1777 1778 for (idx = 0; idx < MAXMEMDOM; ++idx) { 1779 if (CPU_ISSET(adapter->que[i].cpu, &cpuset_domain[idx])) 1780 break; 1781 } 1782 adapter->que[i].domain = idx; 1783 #else 1784 adapter->que[i].domain = -1; 1785 #endif 1786 } 1787 1788 return (0); 1789 } 1790 1791 static int 1792 ena_request_mgmnt_irq(struct ena_adapter *adapter) 1793 { 1794 device_t pdev = adapter->pdev; 1795 struct ena_irq *irq; 1796 unsigned long flags; 1797 int rc, rcc; 1798 1799 flags = RF_ACTIVE | RF_SHAREABLE; 1800 1801 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1802 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1803 &irq->vector, flags); 1804 1805 if (unlikely(irq->res == NULL)) { 1806 ena_log(pdev, ERR, "could not allocate irq vector: %d\n", 1807 irq->vector); 1808 return (ENXIO); 1809 } 1810 1811 rc = bus_setup_intr(adapter->pdev, irq->res, 1812 INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data, 1813 &irq->cookie); 1814 if (unlikely(rc != 0)) { 1815 ena_log(pdev, ERR, 1816 "failed to register interrupt handler for irq %ju: %d\n", 1817 rman_get_start(irq->res), rc); 1818 goto err_res_free; 1819 } 1820 irq->requested = true; 1821 1822 return (rc); 1823 1824 err_res_free: 1825 ena_log(pdev, INFO, "releasing resource for irq %d\n", irq->vector); 1826 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, 1827 irq->res); 1828 if (unlikely(rcc != 0)) 1829 ena_log(pdev, ERR, 1830 "dev has no parent while releasing res for irq: %d\n", 1831 irq->vector); 1832 irq->res = NULL; 1833 1834 return (rc); 1835 } 1836 1837 static int 1838 ena_request_io_irq(struct ena_adapter *adapter) 1839 { 1840 device_t pdev = adapter->pdev; 1841 struct ena_irq *irq; 1842 unsigned long flags = 0; 1843 int rc = 0, i, rcc; 1844 1845 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) { 1846 ena_log(pdev, ERR, 1847 "failed to request I/O IRQ: MSI-X is not enabled\n"); 1848 return (EINVAL); 1849 } else { 1850 flags = RF_ACTIVE | RF_SHAREABLE; 1851 } 1852 1853 for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1854 irq = &adapter->irq_tbl[i]; 1855 1856 if (unlikely(irq->requested)) 1857 continue; 1858 1859 irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, 1860 &irq->vector, flags); 1861 if (unlikely(irq->res == NULL)) { 1862 rc = ENOMEM; 1863 ena_log(pdev, ERR, 1864 "could not allocate irq vector: %d\n", irq->vector); 1865 goto err; 1866 } 1867 1868 rc = bus_setup_intr(adapter->pdev, irq->res, 1869 INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL, irq->data, 1870 &irq->cookie); 1871 if (unlikely(rc != 0)) { 1872 ena_log(pdev, ERR, 1873 "failed to register interrupt handler for irq %ju: %d\n", 1874 rman_get_start(irq->res), rc); 1875 goto err; 1876 } 1877 irq->requested = true; 1878 1879 if (adapter->rss_enabled || adapter->irq_cpu_base > ENA_BASE_CPU_UNSPECIFIED) { 1880 rc = bus_bind_intr(adapter->pdev, irq->res, irq->cpu); 1881 if (unlikely(rc != 0)) { 1882 ena_log(pdev, ERR, 1883 "failed to bind interrupt handler for irq %ju to cpu %d: %d\n", 1884 rman_get_start(irq->res), irq->cpu, rc); 1885 goto err; 1886 } 1887 1888 ena_log(pdev, INFO, "queue %d - cpu %d\n", 1889 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu); 1890 } 1891 } 1892 return (rc); 1893 1894 err: 1895 1896 for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) { 1897 irq = &adapter->irq_tbl[i]; 1898 rcc = 0; 1899 1900 /* Once we entered err: section and irq->requested is true we 1901 free both intr and resources */ 1902 if (irq->requested) { 1903 rcc = bus_teardown_intr(adapter->pdev, irq->res, 1904 irq->cookie); 1905 if (unlikely(rcc != 0)) 1906 ena_log(pdev, ERR, 1907 "could not release irq: %d, error: %d\n", 1908 irq->vector, rcc); 1909 } 1910 1911 /* If we entered err: section without irq->requested set we know 1912 it was bus_alloc_resource_any() that needs cleanup, provided 1913 res is not NULL. In case res is NULL no work in needed in 1914 this iteration */ 1915 rcc = 0; 1916 if (irq->res != NULL) { 1917 rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1918 irq->vector, irq->res); 1919 } 1920 if (unlikely(rcc != 0)) 1921 ena_log(pdev, ERR, 1922 "dev has no parent while releasing res for irq: %d\n", 1923 irq->vector); 1924 irq->requested = false; 1925 irq->res = NULL; 1926 } 1927 1928 return (rc); 1929 } 1930 1931 static void 1932 ena_free_mgmnt_irq(struct ena_adapter *adapter) 1933 { 1934 device_t pdev = adapter->pdev; 1935 struct ena_irq *irq; 1936 int rc; 1937 1938 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 1939 if (irq->requested) { 1940 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1941 rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); 1942 if (unlikely(rc != 0)) 1943 ena_log(pdev, ERR, "failed to tear down irq: %d\n", 1944 irq->vector); 1945 irq->requested = 0; 1946 } 1947 1948 if (irq->res != NULL) { 1949 ena_log(pdev, DBG, "release resource irq: %d\n", irq->vector); 1950 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1951 irq->vector, irq->res); 1952 irq->res = NULL; 1953 if (unlikely(rc != 0)) 1954 ena_log(pdev, ERR, 1955 "dev has no parent while releasing res for irq: %d\n", 1956 irq->vector); 1957 } 1958 } 1959 1960 static void 1961 ena_free_io_irq(struct ena_adapter *adapter) 1962 { 1963 device_t pdev = adapter->pdev; 1964 struct ena_irq *irq; 1965 int rc; 1966 1967 for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { 1968 irq = &adapter->irq_tbl[i]; 1969 if (irq->requested) { 1970 ena_log(pdev, DBG, "tear down irq: %d\n", irq->vector); 1971 rc = bus_teardown_intr(adapter->pdev, irq->res, 1972 irq->cookie); 1973 if (unlikely(rc != 0)) { 1974 ena_log(pdev, ERR, 1975 "failed to tear down irq: %d\n", 1976 irq->vector); 1977 } 1978 irq->requested = 0; 1979 } 1980 1981 if (irq->res != NULL) { 1982 ena_log(pdev, DBG, "release resource irq: %d\n", 1983 irq->vector); 1984 rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, 1985 irq->vector, irq->res); 1986 irq->res = NULL; 1987 if (unlikely(rc != 0)) { 1988 ena_log(pdev, ERR, 1989 "dev has no parent while releasing res for irq: %d\n", 1990 irq->vector); 1991 } 1992 } 1993 } 1994 } 1995 1996 static void 1997 ena_free_irqs(struct ena_adapter *adapter) 1998 { 1999 ena_free_io_irq(adapter); 2000 ena_free_mgmnt_irq(adapter); 2001 ena_disable_msix(adapter); 2002 } 2003 2004 static void 2005 ena_disable_msix(struct ena_adapter *adapter) 2006 { 2007 if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) { 2008 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter); 2009 pci_release_msi(adapter->pdev); 2010 } 2011 2012 adapter->msix_vecs = 0; 2013 free(adapter->msix_entries, M_DEVBUF); 2014 adapter->msix_entries = NULL; 2015 } 2016 2017 static void 2018 ena_unmask_all_io_irqs(struct ena_adapter *adapter) 2019 { 2020 struct ena_com_io_cq *io_cq; 2021 struct ena_eth_io_intr_reg intr_reg; 2022 struct ena_ring *tx_ring; 2023 uint16_t ena_qid; 2024 int i; 2025 2026 /* Unmask interrupts for all queues */ 2027 for (i = 0; i < adapter->num_io_queues; i++) { 2028 ena_qid = ENA_IO_TXQ_IDX(i); 2029 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; 2030 ena_com_update_intr_reg(&intr_reg, 0, 0, true, false); 2031 tx_ring = &adapter->tx_ring[i]; 2032 counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); 2033 ena_com_unmask_intr(io_cq, &intr_reg); 2034 } 2035 } 2036 2037 static int 2038 ena_up_complete(struct ena_adapter *adapter) 2039 { 2040 int rc; 2041 2042 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 2043 rc = ena_rss_configure(adapter); 2044 if (rc != 0) { 2045 ena_log(adapter->pdev, ERR, 2046 "Failed to configure RSS\n"); 2047 return (rc); 2048 } 2049 } 2050 2051 rc = ena_change_mtu(adapter->ifp, if_getmtu(adapter->ifp)); 2052 if (unlikely(rc != 0)) 2053 return (rc); 2054 2055 ena_refill_all_rx_bufs(adapter); 2056 ena_reset_counters((counter_u64_t *)&adapter->hw_stats, 2057 sizeof(adapter->hw_stats)); 2058 2059 return (0); 2060 } 2061 2062 static void 2063 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size, int new_rx_size) 2064 { 2065 int i; 2066 2067 for (i = 0; i < adapter->num_io_queues; i++) { 2068 adapter->tx_ring[i].ring_size = new_tx_size; 2069 adapter->rx_ring[i].ring_size = new_rx_size; 2070 } 2071 } 2072 2073 static int 2074 create_queues_with_size_backoff(struct ena_adapter *adapter) 2075 { 2076 device_t pdev = adapter->pdev; 2077 int rc; 2078 uint32_t cur_rx_ring_size, cur_tx_ring_size; 2079 uint32_t new_rx_ring_size, new_tx_ring_size; 2080 2081 /* 2082 * Current queue sizes might be set to smaller than the requested 2083 * ones due to past queue allocation failures. 2084 */ 2085 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2086 adapter->requested_rx_ring_size); 2087 2088 while (1) { 2089 /* Allocate transmit descriptors */ 2090 rc = ena_setup_all_tx_resources(adapter); 2091 if (unlikely(rc != 0)) { 2092 ena_log(pdev, ERR, "err_setup_tx\n"); 2093 goto err_setup_tx; 2094 } 2095 2096 /* Allocate receive descriptors */ 2097 rc = ena_setup_all_rx_resources(adapter); 2098 if (unlikely(rc != 0)) { 2099 ena_log(pdev, ERR, "err_setup_rx\n"); 2100 goto err_setup_rx; 2101 } 2102 2103 /* Create IO queues for Rx & Tx */ 2104 rc = ena_create_io_queues(adapter); 2105 if (unlikely(rc != 0)) { 2106 ena_log(pdev, ERR, "create IO queues failed\n"); 2107 goto err_io_que; 2108 } 2109 2110 return (0); 2111 2112 err_io_que: 2113 ena_free_all_rx_resources(adapter); 2114 err_setup_rx: 2115 ena_free_all_tx_resources(adapter); 2116 err_setup_tx: 2117 /* 2118 * Lower the ring size if ENOMEM. Otherwise, return the 2119 * error straightaway. 2120 */ 2121 if (unlikely(rc != ENOMEM)) { 2122 ena_log(pdev, ERR, 2123 "Queue creation failed with error code: %d\n", rc); 2124 return (rc); 2125 } 2126 2127 cur_tx_ring_size = adapter->tx_ring[0].ring_size; 2128 cur_rx_ring_size = adapter->rx_ring[0].ring_size; 2129 2130 ena_log(pdev, ERR, 2131 "Not enough memory to create queues with sizes TX=%d, RX=%d\n", 2132 cur_tx_ring_size, cur_rx_ring_size); 2133 2134 new_tx_ring_size = cur_tx_ring_size; 2135 new_rx_ring_size = cur_rx_ring_size; 2136 2137 /* 2138 * Decrease the size of a larger queue, or decrease both if they 2139 * are the same size. 2140 */ 2141 if (cur_rx_ring_size <= cur_tx_ring_size) 2142 new_tx_ring_size = cur_tx_ring_size / 2; 2143 if (cur_rx_ring_size >= cur_tx_ring_size) 2144 new_rx_ring_size = cur_rx_ring_size / 2; 2145 2146 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2147 new_rx_ring_size < ENA_MIN_RING_SIZE) { 2148 ena_log(pdev, ERR, 2149 "Queue creation failed with the smallest possible queue size" 2150 "of %d for both queues. Not retrying with smaller queues\n", 2151 ENA_MIN_RING_SIZE); 2152 return (rc); 2153 } 2154 2155 ena_log(pdev, INFO, 2156 "Retrying queue creation with sizes TX=%d, RX=%d\n", 2157 new_tx_ring_size, new_rx_ring_size); 2158 2159 set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size); 2160 } 2161 } 2162 2163 int 2164 ena_up(struct ena_adapter *adapter) 2165 { 2166 int rc = 0; 2167 2168 ENA_LOCK_ASSERT(); 2169 2170 if (unlikely(device_is_attached(adapter->pdev) == 0)) { 2171 ena_log(adapter->pdev, ERR, "device is not attached!\n"); 2172 return (ENXIO); 2173 } 2174 2175 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2176 return (0); 2177 2178 ena_log(adapter->pdev, INFO, "device is going UP\n"); 2179 2180 /* setup interrupts for IO queues */ 2181 rc = ena_setup_io_intr(adapter); 2182 if (unlikely(rc != 0)) { 2183 ena_log(adapter->pdev, ERR, "error setting up IO interrupt\n"); 2184 goto error; 2185 } 2186 rc = ena_request_io_irq(adapter); 2187 if (unlikely(rc != 0)) { 2188 ena_log(adapter->pdev, ERR, "err_req_irq\n"); 2189 goto error; 2190 } 2191 2192 ena_log(adapter->pdev, INFO, 2193 "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, LLQ is %s\n", 2194 adapter->num_io_queues, 2195 adapter->requested_rx_ring_size, 2196 adapter->requested_tx_ring_size, 2197 (adapter->ena_dev->tx_mem_queue_type == 2198 ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED"); 2199 2200 rc = create_queues_with_size_backoff(adapter); 2201 if (unlikely(rc != 0)) { 2202 ena_log(adapter->pdev, ERR, 2203 "error creating queues with size backoff\n"); 2204 goto err_create_queues_with_backoff; 2205 } 2206 2207 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 2208 if_link_state_change(adapter->ifp, LINK_STATE_UP); 2209 2210 rc = ena_up_complete(adapter); 2211 if (unlikely(rc != 0)) 2212 goto err_up_complete; 2213 2214 counter_u64_add(adapter->dev_stats.interface_up, 1); 2215 2216 ena_update_hwassist(adapter); 2217 2218 if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2219 2220 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2221 2222 ena_unmask_all_io_irqs(adapter); 2223 2224 return (0); 2225 2226 err_up_complete: 2227 ena_destroy_all_io_queues(adapter); 2228 ena_free_all_rx_resources(adapter); 2229 ena_free_all_tx_resources(adapter); 2230 err_create_queues_with_backoff: 2231 ena_free_io_irq(adapter); 2232 error: 2233 return (rc); 2234 } 2235 2236 static uint64_t 2237 ena_get_counter(if_t ifp, ift_counter cnt) 2238 { 2239 struct ena_adapter *adapter; 2240 struct ena_hw_stats *stats; 2241 2242 adapter = if_getsoftc(ifp); 2243 stats = &adapter->hw_stats; 2244 2245 switch (cnt) { 2246 case IFCOUNTER_IPACKETS: 2247 return (counter_u64_fetch(stats->rx_packets)); 2248 case IFCOUNTER_OPACKETS: 2249 return (counter_u64_fetch(stats->tx_packets)); 2250 case IFCOUNTER_IBYTES: 2251 return (counter_u64_fetch(stats->rx_bytes)); 2252 case IFCOUNTER_OBYTES: 2253 return (counter_u64_fetch(stats->tx_bytes)); 2254 case IFCOUNTER_IQDROPS: 2255 return (counter_u64_fetch(stats->rx_drops)); 2256 case IFCOUNTER_OQDROPS: 2257 return (counter_u64_fetch(stats->tx_drops)); 2258 default: 2259 return (if_get_counter_default(ifp, cnt)); 2260 } 2261 } 2262 2263 static int 2264 ena_media_change(if_t ifp) 2265 { 2266 /* Media Change is not supported by firmware */ 2267 return (0); 2268 } 2269 2270 static void 2271 ena_media_status(if_t ifp, struct ifmediareq *ifmr) 2272 { 2273 struct ena_adapter *adapter = if_getsoftc(ifp); 2274 ena_log(adapter->pdev, DBG, "Media status update\n"); 2275 2276 ENA_LOCK_LOCK(); 2277 2278 ifmr->ifm_status = IFM_AVALID; 2279 ifmr->ifm_active = IFM_ETHER; 2280 2281 if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) { 2282 ENA_LOCK_UNLOCK(); 2283 ena_log(adapter->pdev, INFO, "Link is down\n"); 2284 return; 2285 } 2286 2287 ifmr->ifm_status |= IFM_ACTIVE; 2288 ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX; 2289 2290 ENA_LOCK_UNLOCK(); 2291 } 2292 2293 static void 2294 ena_init(void *arg) 2295 { 2296 struct ena_adapter *adapter = (struct ena_adapter *)arg; 2297 2298 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) { 2299 ENA_LOCK_LOCK(); 2300 ena_up(adapter); 2301 ENA_LOCK_UNLOCK(); 2302 } 2303 } 2304 2305 static int 2306 ena_ioctl(if_t ifp, u_long command, caddr_t data) 2307 { 2308 struct ena_adapter *adapter; 2309 struct ifreq *ifr; 2310 int rc; 2311 2312 adapter = if_getsoftc(ifp); 2313 ifr = (struct ifreq *)data; 2314 2315 /* 2316 * Acquiring lock to prevent from running up and down routines parallel. 2317 */ 2318 rc = 0; 2319 switch (command) { 2320 case SIOCSIFMTU: 2321 if (if_getmtu(ifp) == ifr->ifr_mtu) 2322 break; 2323 ENA_LOCK_LOCK(); 2324 ena_down(adapter); 2325 2326 ena_change_mtu(ifp, ifr->ifr_mtu); 2327 2328 rc = ena_up(adapter); 2329 ENA_LOCK_UNLOCK(); 2330 break; 2331 2332 case SIOCSIFFLAGS: 2333 if ((if_getflags(ifp) & IFF_UP) != 0) { 2334 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2335 if ((if_getflags(ifp) & (IFF_PROMISC | 2336 IFF_ALLMULTI)) != 0) { 2337 ena_log(adapter->pdev, INFO, 2338 "ioctl promisc/allmulti\n"); 2339 } 2340 } else { 2341 ENA_LOCK_LOCK(); 2342 rc = ena_up(adapter); 2343 ENA_LOCK_UNLOCK(); 2344 } 2345 } else { 2346 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 2347 ENA_LOCK_LOCK(); 2348 ena_down(adapter); 2349 ENA_LOCK_UNLOCK(); 2350 } 2351 } 2352 break; 2353 2354 case SIOCADDMULTI: 2355 case SIOCDELMULTI: 2356 break; 2357 2358 case SIOCSIFMEDIA: 2359 case SIOCGIFMEDIA: 2360 rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 2361 break; 2362 2363 case SIOCSIFCAP: 2364 { 2365 int reinit = 0; 2366 2367 if (ifr->ifr_reqcap != if_getcapenable(ifp)) { 2368 if_setcapenable(ifp, ifr->ifr_reqcap); 2369 reinit = 1; 2370 } 2371 2372 if ((reinit != 0) && 2373 ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) { 2374 ENA_LOCK_LOCK(); 2375 ena_down(adapter); 2376 rc = ena_up(adapter); 2377 ENA_LOCK_UNLOCK(); 2378 } 2379 } 2380 2381 break; 2382 default: 2383 rc = ether_ioctl(ifp, command, data); 2384 break; 2385 } 2386 2387 return (rc); 2388 } 2389 2390 static int 2391 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat) 2392 { 2393 int caps = 0; 2394 2395 if ((feat->offload.tx & 2396 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2397 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK | 2398 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0) 2399 caps |= IFCAP_TXCSUM; 2400 2401 if ((feat->offload.tx & 2402 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK | 2403 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0) 2404 caps |= IFCAP_TXCSUM_IPV6; 2405 2406 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0) 2407 caps |= IFCAP_TSO4; 2408 2409 if ((feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0) 2410 caps |= IFCAP_TSO6; 2411 2412 if ((feat->offload.rx_supported & 2413 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK | 2414 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0) 2415 caps |= IFCAP_RXCSUM; 2416 2417 if ((feat->offload.rx_supported & 2418 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0) 2419 caps |= IFCAP_RXCSUM_IPV6; 2420 2421 caps |= IFCAP_LRO | IFCAP_JUMBO_MTU; 2422 2423 return (caps); 2424 } 2425 2426 static void 2427 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp) 2428 { 2429 host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp); 2430 } 2431 2432 static void 2433 ena_update_hwassist(struct ena_adapter *adapter) 2434 { 2435 if_t ifp = adapter->ifp; 2436 uint32_t feat = adapter->tx_offload_cap; 2437 int cap = if_getcapenable(ifp); 2438 int flags = 0; 2439 2440 if_clearhwassist(ifp); 2441 2442 if ((cap & IFCAP_TXCSUM) != 0) { 2443 if ((feat & 2444 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0) 2445 flags |= CSUM_IP; 2446 if ((feat & 2447 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | 2448 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0) 2449 flags |= CSUM_IP_UDP | CSUM_IP_TCP; 2450 } 2451 2452 if ((cap & IFCAP_TXCSUM_IPV6) != 0) 2453 flags |= CSUM_IP6_UDP | CSUM_IP6_TCP; 2454 2455 if ((cap & IFCAP_TSO4) != 0) 2456 flags |= CSUM_IP_TSO; 2457 2458 if ((cap & IFCAP_TSO6) != 0) 2459 flags |= CSUM_IP6_TSO; 2460 2461 if_sethwassistbits(ifp, flags, 0); 2462 } 2463 2464 static int 2465 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter, 2466 struct ena_com_dev_get_features_ctx *feat) 2467 { 2468 if_t ifp; 2469 int caps = 0; 2470 2471 ifp = adapter->ifp = if_gethandle(IFT_ETHER); 2472 if (unlikely(ifp == NULL)) { 2473 ena_log(pdev, ERR, "can not allocate ifnet structure\n"); 2474 return (ENXIO); 2475 } 2476 if_initname(ifp, device_get_name(pdev), device_get_unit(pdev)); 2477 if_setdev(ifp, pdev); 2478 if_setsoftc(ifp, adapter); 2479 2480 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 2481 if_setinitfn(ifp, ena_init); 2482 if_settransmitfn(ifp, ena_mq_start); 2483 if_setqflushfn(ifp, ena_qflush); 2484 if_setioctlfn(ifp, ena_ioctl); 2485 if_setgetcounterfn(ifp, ena_get_counter); 2486 2487 if_setsendqlen(ifp, adapter->requested_tx_ring_size); 2488 if_setsendqready(ifp); 2489 if_setmtu(ifp, ETHERMTU); 2490 if_setbaudrate(ifp, 0); 2491 /* Zeroize capabilities... */ 2492 if_setcapabilities(ifp, 0); 2493 if_setcapenable(ifp, 0); 2494 /* check hardware support */ 2495 caps = ena_get_dev_offloads(feat); 2496 /* ... and set them */ 2497 if_setcapabilitiesbit(ifp, caps, 0); 2498 2499 /* TSO parameters */ 2500 if_sethwtsomax(ifp, ENA_TSO_MAXSIZE - 2501 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 2502 if_sethwtsomaxsegcount(ifp, adapter->max_tx_sgl_size - 1); 2503 if_sethwtsomaxsegsize(ifp, ENA_TSO_MAXSIZE); 2504 2505 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 2506 if_setcapenable(ifp, if_getcapabilities(ifp)); 2507 2508 /* 2509 * Specify the media types supported by this adapter and register 2510 * callbacks to update media and link information 2511 */ 2512 ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change, 2513 ena_media_status); 2514 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 2515 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 2516 2517 ether_ifattach(ifp, adapter->mac_addr); 2518 2519 return (0); 2520 } 2521 2522 void 2523 ena_down(struct ena_adapter *adapter) 2524 { 2525 int rc; 2526 2527 ENA_LOCK_ASSERT(); 2528 2529 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 2530 return; 2531 2532 ena_log(adapter->pdev, INFO, "device is going DOWN\n"); 2533 2534 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter); 2535 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2536 2537 ena_free_io_irq(adapter); 2538 2539 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) { 2540 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 2541 if (unlikely(rc != 0)) 2542 ena_log(adapter->pdev, ERR, "Device reset failed\n"); 2543 } 2544 2545 ena_destroy_all_io_queues(adapter); 2546 2547 ena_free_all_tx_bufs(adapter); 2548 ena_free_all_rx_bufs(adapter); 2549 ena_free_all_tx_resources(adapter); 2550 ena_free_all_rx_resources(adapter); 2551 2552 counter_u64_add(adapter->dev_stats.interface_down, 1); 2553 } 2554 2555 static uint32_t 2556 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev, 2557 struct ena_com_dev_get_features_ctx *get_feat_ctx) 2558 { 2559 uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 2560 2561 /* Regular queues capabilities */ 2562 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2563 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2564 &get_feat_ctx->max_queue_ext.max_queue_ext; 2565 io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, 2566 max_queue_ext->max_rx_cq_num); 2567 2568 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 2569 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 2570 } else { 2571 struct ena_admin_queue_feature_desc *max_queues = 2572 &get_feat_ctx->max_queues; 2573 io_tx_sq_num = max_queues->max_sq_num; 2574 io_tx_cq_num = max_queues->max_cq_num; 2575 io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); 2576 } 2577 2578 /* In case of LLQ use the llq fields for the tx SQ/CQ */ 2579 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 2580 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 2581 2582 max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES); 2583 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num); 2584 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num); 2585 max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num); 2586 /* 1 IRQ for mgmnt and 1 IRQ for each TX/RX pair */ 2587 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2588 pci_msix_count(pdev) - 1); 2589 #ifdef RSS 2590 max_num_io_queues = min_t(uint32_t, max_num_io_queues, 2591 rss_getnumbuckets()); 2592 #endif 2593 2594 return (max_num_io_queues); 2595 } 2596 2597 static int 2598 ena_enable_wc(device_t pdev, struct resource *res) 2599 { 2600 #if defined(__i386) || defined(__amd64) || defined(__aarch64__) 2601 vm_offset_t va; 2602 vm_size_t len; 2603 int rc; 2604 2605 va = (vm_offset_t)rman_get_virtual(res); 2606 len = rman_get_size(res); 2607 /* Enable write combining */ 2608 rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING); 2609 if (unlikely(rc != 0)) { 2610 ena_log(pdev, ERR, "pmap_change_attr failed, %d\n", rc); 2611 return (rc); 2612 } 2613 2614 return (0); 2615 #endif 2616 return (EOPNOTSUPP); 2617 } 2618 2619 static int 2620 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev, 2621 struct ena_admin_feature_llq_desc *llq, 2622 struct ena_llq_configurations *llq_default_configurations) 2623 { 2624 int rc; 2625 uint32_t llq_feature_mask; 2626 2627 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 2628 if (!(ena_dev->supported_features & llq_feature_mask)) { 2629 ena_log(pdev, WARN, 2630 "LLQ is not supported. Fallback to host mode policy.\n"); 2631 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2632 return (0); 2633 } 2634 2635 if (ena_dev->mem_bar == NULL) { 2636 ena_log(pdev, WARN, 2637 "LLQ is advertised as supported but device doesn't expose mem bar.\n"); 2638 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2639 return (0); 2640 } 2641 2642 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 2643 if (unlikely(rc != 0)) { 2644 ena_log(pdev, WARN, 2645 "Failed to configure the device mode. " 2646 "Fallback to host mode policy.\n"); 2647 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2648 } 2649 2650 return (0); 2651 } 2652 2653 static int 2654 ena_map_llq_mem_bar(device_t pdev, struct ena_com_dev *ena_dev) 2655 { 2656 struct ena_adapter *adapter = device_get_softc(pdev); 2657 int rc, rid; 2658 2659 /* Try to allocate resources for LLQ bar */ 2660 rid = PCIR_BAR(ENA_MEM_BAR); 2661 adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 2662 RF_ACTIVE); 2663 if (unlikely(adapter->memory == NULL)) { 2664 ena_log(pdev, WARN, 2665 "Unable to allocate LLQ bar resource. LLQ mode won't be used.\n"); 2666 return (0); 2667 } 2668 2669 /* Enable write combining for better LLQ performance */ 2670 rc = ena_enable_wc(adapter->pdev, adapter->memory); 2671 if (unlikely(rc != 0)) { 2672 ena_log(pdev, ERR, "failed to enable write combining.\n"); 2673 return (rc); 2674 } 2675 2676 /* 2677 * Save virtual address of the device's memory region 2678 * for the ena_com layer. 2679 */ 2680 ena_dev->mem_bar = rman_get_virtual(adapter->memory); 2681 2682 return (0); 2683 } 2684 2685 static inline void 2686 set_default_llq_configurations(struct ena_llq_configurations *llq_config, 2687 struct ena_admin_feature_llq_desc *llq) 2688 { 2689 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 2690 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 2691 llq_config->llq_num_decs_before_header = 2692 ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 2693 if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 2694 0 && ena_force_large_llq_header) { 2695 llq_config->llq_ring_entry_size = 2696 ENA_ADMIN_LIST_ENTRY_SIZE_256B; 2697 llq_config->llq_ring_entry_size_value = 256; 2698 } else { 2699 llq_config->llq_ring_entry_size = 2700 ENA_ADMIN_LIST_ENTRY_SIZE_128B; 2701 llq_config->llq_ring_entry_size_value = 128; 2702 } 2703 } 2704 2705 static int 2706 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) 2707 { 2708 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 2709 struct ena_com_dev *ena_dev = ctx->ena_dev; 2710 uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE; 2711 uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE; 2712 uint32_t max_tx_queue_size; 2713 uint32_t max_rx_queue_size; 2714 2715 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 2716 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 2717 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 2718 max_rx_queue_size = min_t(uint32_t, 2719 max_queue_ext->max_rx_cq_depth, 2720 max_queue_ext->max_rx_sq_depth); 2721 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 2722 2723 if (ena_dev->tx_mem_queue_type == 2724 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2725 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2726 llq->max_llq_depth); 2727 else 2728 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2729 max_queue_ext->max_tx_sq_depth); 2730 2731 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2732 max_queue_ext->max_per_packet_tx_descs); 2733 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2734 max_queue_ext->max_per_packet_rx_descs); 2735 } else { 2736 struct ena_admin_queue_feature_desc *max_queues = 2737 &ctx->get_feat_ctx->max_queues; 2738 max_rx_queue_size = min_t(uint32_t, max_queues->max_cq_depth, 2739 max_queues->max_sq_depth); 2740 max_tx_queue_size = max_queues->max_cq_depth; 2741 2742 if (ena_dev->tx_mem_queue_type == 2743 ENA_ADMIN_PLACEMENT_POLICY_DEV) 2744 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2745 llq->max_llq_depth); 2746 else 2747 max_tx_queue_size = min_t(uint32_t, max_tx_queue_size, 2748 max_queues->max_sq_depth); 2749 2750 ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2751 max_queues->max_packet_tx_descs); 2752 ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, 2753 max_queues->max_packet_rx_descs); 2754 } 2755 2756 /* round down to the nearest power of 2 */ 2757 max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1); 2758 max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1); 2759 2760 /* 2761 * When forcing large headers, we multiply the entry size by 2, 2762 * and therefore divide the queue size by 2, leaving the amount 2763 * of memory used by the queues unchanged. 2764 */ 2765 if (ena_force_large_llq_header) { 2766 if ((llq->entry_size_ctrl_supported & 2767 ENA_ADMIN_LIST_ENTRY_SIZE_256B) != 0 && 2768 ena_dev->tx_mem_queue_type == 2769 ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2770 max_tx_queue_size /= 2; 2771 ena_log(ctx->pdev, INFO, 2772 "Forcing large headers and decreasing maximum Tx queue size to %d\n", 2773 max_tx_queue_size); 2774 } else { 2775 ena_log(ctx->pdev, WARN, 2776 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n"); 2777 } 2778 } 2779 2780 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, 2781 max_tx_queue_size); 2782 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, 2783 max_rx_queue_size); 2784 2785 tx_queue_size = 1 << (flsl(tx_queue_size) - 1); 2786 rx_queue_size = 1 << (flsl(rx_queue_size) - 1); 2787 2788 ctx->max_tx_queue_size = max_tx_queue_size; 2789 ctx->max_rx_queue_size = max_rx_queue_size; 2790 ctx->tx_queue_size = tx_queue_size; 2791 ctx->rx_queue_size = rx_queue_size; 2792 2793 return (0); 2794 } 2795 2796 static void 2797 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev) 2798 { 2799 struct ena_admin_host_info *host_info; 2800 uintptr_t rid; 2801 int rc; 2802 2803 /* Allocate only the host info */ 2804 rc = ena_com_allocate_host_info(ena_dev); 2805 if (unlikely(rc != 0)) { 2806 ena_log(dev, ERR, "Cannot allocate host info\n"); 2807 return; 2808 } 2809 2810 host_info = ena_dev->host_attr.host_info; 2811 2812 if (pci_get_id(dev, PCI_ID_RID, &rid) == 0) 2813 host_info->bdf = rid; 2814 host_info->os_type = ENA_ADMIN_OS_FREEBSD; 2815 host_info->kernel_ver = osreldate; 2816 2817 sprintf(host_info->kernel_ver_str, "%d", osreldate); 2818 host_info->os_dist = 0; 2819 strncpy(host_info->os_dist_str, osrelease, 2820 sizeof(host_info->os_dist_str) - 1); 2821 2822 host_info->driver_version = (ENA_DRV_MODULE_VER_MAJOR) | 2823 (ENA_DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 2824 (ENA_DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); 2825 host_info->num_cpus = mp_ncpus; 2826 host_info->driver_supported_features = 2827 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 2828 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 2829 2830 rc = ena_com_set_host_attributes(ena_dev); 2831 if (unlikely(rc != 0)) { 2832 if (rc == EOPNOTSUPP) 2833 ena_log(dev, WARN, "Cannot set host attributes\n"); 2834 else 2835 ena_log(dev, ERR, "Cannot set host attributes\n"); 2836 2837 goto err; 2838 } 2839 2840 return; 2841 2842 err: 2843 ena_com_delete_host_info(ena_dev); 2844 } 2845 2846 static int 2847 ena_device_init(struct ena_adapter *adapter, device_t pdev, 2848 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active) 2849 { 2850 struct ena_llq_configurations llq_config; 2851 struct ena_com_dev *ena_dev = adapter->ena_dev; 2852 bool readless_supported; 2853 uint32_t aenq_groups; 2854 int dma_width; 2855 int rc; 2856 2857 rc = ena_com_mmio_reg_read_request_init(ena_dev); 2858 if (unlikely(rc != 0)) { 2859 ena_log(pdev, ERR, "failed to init mmio read less\n"); 2860 return (rc); 2861 } 2862 2863 /* 2864 * The PCIe configuration space revision id indicate if mmio reg 2865 * read is disabled 2866 */ 2867 readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ); 2868 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 2869 2870 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 2871 if (unlikely(rc != 0)) { 2872 ena_log(pdev, ERR, "Can not reset device\n"); 2873 goto err_mmio_read_less; 2874 } 2875 2876 rc = ena_com_validate_version(ena_dev); 2877 if (unlikely(rc != 0)) { 2878 ena_log(pdev, ERR, "device version is too low\n"); 2879 goto err_mmio_read_less; 2880 } 2881 2882 dma_width = ena_com_get_dma_width(ena_dev); 2883 if (unlikely(dma_width < 0)) { 2884 ena_log(pdev, ERR, "Invalid dma width value %d", dma_width); 2885 rc = dma_width; 2886 goto err_mmio_read_less; 2887 } 2888 adapter->dma_width = dma_width; 2889 2890 /* ENA admin level init */ 2891 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 2892 if (unlikely(rc != 0)) { 2893 ena_log(pdev, ERR, 2894 "Can not initialize ena admin queue with device\n"); 2895 goto err_mmio_read_less; 2896 } 2897 2898 /* 2899 * To enable the msix interrupts the driver needs to know the number 2900 * of queues. So the driver uses polling mode to retrieve this 2901 * information 2902 */ 2903 ena_com_set_admin_polling_mode(ena_dev, true); 2904 2905 ena_config_host_info(ena_dev, pdev); 2906 2907 /* Get Device Attributes */ 2908 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 2909 if (unlikely(rc != 0)) { 2910 ena_log(pdev, ERR, 2911 "Cannot get attribute for ena device rc: %d\n", rc); 2912 goto err_admin_init; 2913 } 2914 2915 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 2916 BIT(ENA_ADMIN_FATAL_ERROR) | 2917 BIT(ENA_ADMIN_WARNING) | 2918 BIT(ENA_ADMIN_NOTIFICATION) | 2919 BIT(ENA_ADMIN_KEEP_ALIVE); 2920 2921 aenq_groups &= get_feat_ctx->aenq.supported_groups; 2922 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 2923 if (unlikely(rc != 0)) { 2924 ena_log(pdev, ERR, "Cannot configure aenq groups rc: %d\n", rc); 2925 goto err_admin_init; 2926 } 2927 2928 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 2929 2930 set_default_llq_configurations(&llq_config, &get_feat_ctx->llq); 2931 2932 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, 2933 &llq_config); 2934 if (unlikely(rc != 0)) { 2935 ena_log(pdev, ERR, "Failed to set placement policy\n"); 2936 goto err_admin_init; 2937 } 2938 2939 return (0); 2940 2941 err_admin_init: 2942 ena_com_delete_host_info(ena_dev); 2943 ena_com_admin_destroy(ena_dev); 2944 err_mmio_read_less: 2945 ena_com_mmio_reg_read_request_destroy(ena_dev); 2946 2947 return (rc); 2948 } 2949 2950 static int 2951 ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) 2952 { 2953 struct ena_com_dev *ena_dev = adapter->ena_dev; 2954 int rc; 2955 2956 rc = ena_enable_msix(adapter); 2957 if (unlikely(rc != 0)) { 2958 ena_log(adapter->pdev, ERR, "Error with MSI-X enablement\n"); 2959 return (rc); 2960 } 2961 2962 ena_setup_mgmnt_intr(adapter); 2963 2964 rc = ena_request_mgmnt_irq(adapter); 2965 if (unlikely(rc != 0)) { 2966 ena_log(adapter->pdev, ERR, "Cannot setup mgmnt queue intr\n"); 2967 goto err_disable_msix; 2968 } 2969 2970 ena_com_set_admin_polling_mode(ena_dev, false); 2971 2972 ena_com_admin_aenq_enable(ena_dev); 2973 2974 return (0); 2975 2976 err_disable_msix: 2977 ena_disable_msix(adapter); 2978 2979 return (rc); 2980 } 2981 2982 /* Function called on ENA_ADMIN_KEEP_ALIVE event */ 2983 static void 2984 ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 2985 { 2986 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 2987 struct ena_admin_aenq_keep_alive_desc *desc; 2988 sbintime_t stime; 2989 uint64_t rx_drops; 2990 uint64_t tx_drops; 2991 2992 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 2993 2994 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low; 2995 tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low; 2996 counter_u64_zero(adapter->hw_stats.rx_drops); 2997 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops); 2998 counter_u64_zero(adapter->hw_stats.tx_drops); 2999 counter_u64_add(adapter->hw_stats.tx_drops, tx_drops); 3000 3001 stime = getsbinuptime(); 3002 atomic_store_rel_64(&adapter->keep_alive_timestamp, stime); 3003 } 3004 3005 /* Check for keep alive expiration */ 3006 static void 3007 check_for_missing_keep_alive(struct ena_adapter *adapter) 3008 { 3009 sbintime_t timestamp, time; 3010 3011 if (adapter->wd_active == 0) 3012 return; 3013 3014 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3015 return; 3016 3017 timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp); 3018 time = getsbinuptime() - timestamp; 3019 if (unlikely(time > adapter->keep_alive_timeout)) { 3020 ena_log(adapter->pdev, ERR, "Keep alive watchdog timeout.\n"); 3021 counter_u64_add(adapter->dev_stats.wd_expired, 1); 3022 ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO); 3023 } 3024 } 3025 3026 /* Check if admin queue is enabled */ 3027 static void 3028 check_for_admin_com_state(struct ena_adapter *adapter) 3029 { 3030 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) == false)) { 3031 ena_log(adapter->pdev, ERR, 3032 "ENA admin queue is not in running state!\n"); 3033 counter_u64_add(adapter->dev_stats.admin_q_pause, 1); 3034 ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO); 3035 } 3036 } 3037 3038 static int 3039 check_for_rx_interrupt_queue(struct ena_adapter *adapter, 3040 struct ena_ring *rx_ring) 3041 { 3042 if (likely(atomic_load_8(&rx_ring->first_interrupt))) 3043 return (0); 3044 3045 if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) 3046 return (0); 3047 3048 rx_ring->no_interrupt_event_cnt++; 3049 3050 if (rx_ring->no_interrupt_event_cnt == 3051 ENA_MAX_NO_INTERRUPT_ITERATIONS) { 3052 ena_log(adapter->pdev, ERR, 3053 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", 3054 rx_ring->qid); 3055 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT); 3056 return (EIO); 3057 } 3058 3059 return (0); 3060 } 3061 3062 static int 3063 check_missing_comp_in_tx_queue(struct ena_adapter *adapter, 3064 struct ena_ring *tx_ring) 3065 { 3066 device_t pdev = adapter->pdev; 3067 struct bintime curtime, time; 3068 struct ena_tx_buffer *tx_buf; 3069 int time_since_last_cleanup; 3070 int missing_tx_comp_to; 3071 sbintime_t time_offset; 3072 uint32_t missed_tx = 0; 3073 int i, rc = 0; 3074 3075 getbinuptime(&curtime); 3076 3077 for (i = 0; i < tx_ring->ring_size; i++) { 3078 tx_buf = &tx_ring->tx_buffer_info[i]; 3079 3080 if (bintime_isset(&tx_buf->timestamp) == 0) 3081 continue; 3082 3083 time = curtime; 3084 bintime_sub(&time, &tx_buf->timestamp); 3085 time_offset = bttosbt(time); 3086 3087 if (unlikely(!atomic_load_8(&tx_ring->first_interrupt) && 3088 time_offset > 2 * adapter->missing_tx_timeout)) { 3089 /* 3090 * If after graceful period interrupt is still not 3091 * received, we schedule a reset. 3092 */ 3093 ena_log(pdev, ERR, 3094 "Potential MSIX issue on Tx side Queue = %d. " 3095 "Reset the device\n", 3096 tx_ring->qid); 3097 ena_trigger_reset(adapter, 3098 ENA_REGS_RESET_MISS_INTERRUPT); 3099 return (EIO); 3100 } 3101 3102 /* Check again if packet is still waiting */ 3103 if (unlikely(time_offset > adapter->missing_tx_timeout)) { 3104 3105 if (tx_buf->print_once) { 3106 time_since_last_cleanup = TICKS_2_MSEC(ticks - 3107 tx_ring->tx_last_cleanup_ticks); 3108 missing_tx_comp_to = sbttoms( 3109 adapter->missing_tx_timeout); 3110 ena_log(pdev, WARN, 3111 "Found a Tx that wasn't completed on time, qid %d, index %d. " 3112 "%d msecs have passed since last cleanup. Missing Tx timeout value %d msecs.\n", 3113 tx_ring->qid, i, time_since_last_cleanup, 3114 missing_tx_comp_to); 3115 } 3116 3117 tx_buf->print_once = false; 3118 missed_tx++; 3119 } 3120 } 3121 3122 if (unlikely(missed_tx > adapter->missing_tx_threshold)) { 3123 ena_log(pdev, ERR, 3124 "The number of lost tx completion is above the threshold " 3125 "(%d > %d). Reset the device\n", 3126 missed_tx, adapter->missing_tx_threshold); 3127 ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL); 3128 rc = EIO; 3129 } 3130 3131 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx); 3132 3133 return (rc); 3134 } 3135 3136 /* 3137 * Check for TX which were not completed on time. 3138 * Timeout is defined by "missing_tx_timeout". 3139 * Reset will be performed if number of incompleted 3140 * transactions exceeds "missing_tx_threshold". 3141 */ 3142 static void 3143 check_for_missing_completions(struct ena_adapter *adapter) 3144 { 3145 struct ena_ring *tx_ring; 3146 struct ena_ring *rx_ring; 3147 int i, budget, rc; 3148 3149 /* Make sure the driver doesn't turn the device in other process */ 3150 rmb(); 3151 3152 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3153 return; 3154 3155 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3156 return; 3157 3158 if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3159 return; 3160 3161 budget = adapter->missing_tx_max_queues; 3162 3163 for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) { 3164 tx_ring = &adapter->tx_ring[i]; 3165 rx_ring = &adapter->rx_ring[i]; 3166 3167 rc = check_missing_comp_in_tx_queue(adapter, tx_ring); 3168 if (unlikely(rc != 0)) 3169 return; 3170 3171 rc = check_for_rx_interrupt_queue(adapter, rx_ring); 3172 if (unlikely(rc != 0)) 3173 return; 3174 3175 budget--; 3176 if (budget == 0) { 3177 i++; 3178 break; 3179 } 3180 } 3181 3182 adapter->next_monitored_tx_qid = i % adapter->num_io_queues; 3183 } 3184 3185 /* trigger rx cleanup after 2 consecutive detections */ 3186 #define EMPTY_RX_REFILL 2 3187 /* For the rare case where the device runs out of Rx descriptors and the 3188 * msix handler failed to refill new Rx descriptors (due to a lack of memory 3189 * for example). 3190 * This case will lead to a deadlock: 3191 * The device won't send interrupts since all the new Rx packets will be dropped 3192 * The msix handler won't allocate new Rx descriptors so the device won't be 3193 * able to send new packets. 3194 * 3195 * When such a situation is detected - execute rx cleanup task in another thread 3196 */ 3197 static void 3198 check_for_empty_rx_ring(struct ena_adapter *adapter) 3199 { 3200 struct ena_ring *rx_ring; 3201 int i, refill_required; 3202 3203 if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3204 return; 3205 3206 if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) 3207 return; 3208 3209 for (i = 0; i < adapter->num_io_queues; i++) { 3210 rx_ring = &adapter->rx_ring[i]; 3211 3212 refill_required = ena_com_free_q_entries( 3213 rx_ring->ena_com_io_sq); 3214 if (unlikely(refill_required == (rx_ring->ring_size - 1))) { 3215 rx_ring->empty_rx_queue++; 3216 3217 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { 3218 counter_u64_add(rx_ring->rx_stats.empty_rx_ring, 3219 1); 3220 3221 ena_log(adapter->pdev, WARN, 3222 "Rx ring %d is stalled. Triggering the refill function\n", 3223 i); 3224 3225 taskqueue_enqueue(rx_ring->que->cleanup_tq, 3226 &rx_ring->que->cleanup_task); 3227 rx_ring->empty_rx_queue = 0; 3228 } 3229 } else { 3230 rx_ring->empty_rx_queue = 0; 3231 } 3232 } 3233 } 3234 3235 static void 3236 ena_update_hints(struct ena_adapter *adapter, 3237 struct ena_admin_ena_hw_hints *hints) 3238 { 3239 struct ena_com_dev *ena_dev = adapter->ena_dev; 3240 3241 if (hints->admin_completion_tx_timeout) 3242 ena_dev->admin_queue.completion_timeout = 3243 hints->admin_completion_tx_timeout * 1000; 3244 3245 if (hints->mmio_read_timeout) 3246 /* convert to usec */ 3247 ena_dev->mmio_read.reg_read_to = hints->mmio_read_timeout * 1000; 3248 3249 if (hints->missed_tx_completion_count_threshold_to_reset) 3250 adapter->missing_tx_threshold = 3251 hints->missed_tx_completion_count_threshold_to_reset; 3252 3253 if (hints->missing_tx_completion_timeout) { 3254 if (hints->missing_tx_completion_timeout == 3255 ENA_HW_HINTS_NO_TIMEOUT) 3256 adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3257 else 3258 adapter->missing_tx_timeout = SBT_1MS * 3259 hints->missing_tx_completion_timeout; 3260 } 3261 3262 if (hints->driver_watchdog_timeout) { 3263 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3264 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3265 else 3266 adapter->keep_alive_timeout = SBT_1MS * 3267 hints->driver_watchdog_timeout; 3268 } 3269 } 3270 3271 /** 3272 * ena_copy_eni_metrics - Get and copy ENI metrics from the HW. 3273 * @adapter: ENA device adapter 3274 * 3275 * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics 3276 * and other error codes on failure. 3277 * 3278 * This function can possibly cause a race with other calls to the admin queue. 3279 * Because of that, the caller should either lock this function or make sure 3280 * that there is no race in the current context. 3281 */ 3282 static int 3283 ena_copy_eni_metrics(struct ena_adapter *adapter) 3284 { 3285 static bool print_once = true; 3286 int rc; 3287 3288 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics); 3289 3290 if (rc != 0) { 3291 if (rc == ENA_COM_UNSUPPORTED) { 3292 if (print_once) { 3293 ena_log(adapter->pdev, WARN, 3294 "Retrieving ENI metrics is not supported.\n"); 3295 print_once = false; 3296 } else { 3297 ena_log(adapter->pdev, DBG, 3298 "Retrieving ENI metrics is not supported.\n"); 3299 } 3300 } else { 3301 ena_log(adapter->pdev, ERR, 3302 "Failed to get ENI metrics: %d\n", rc); 3303 } 3304 } 3305 3306 return (rc); 3307 } 3308 3309 static int 3310 ena_copy_customer_metrics(struct ena_adapter *adapter) 3311 { 3312 struct ena_com_dev *dev; 3313 u32 supported_metrics_count; 3314 int rc, len; 3315 3316 dev = adapter->ena_dev; 3317 3318 supported_metrics_count = ena_com_get_customer_metric_count(dev); 3319 len = supported_metrics_count * sizeof(u64); 3320 3321 /* Fill the data buffer */ 3322 rc = ena_com_get_customer_metrics(adapter->ena_dev, 3323 (char *)(adapter->customer_metrics_array), len); 3324 3325 return (rc); 3326 } 3327 3328 static void 3329 ena_timer_service(void *data) 3330 { 3331 struct ena_adapter *adapter = (struct ena_adapter *)data; 3332 struct ena_admin_host_info *host_info = 3333 adapter->ena_dev->host_attr.host_info; 3334 3335 check_for_missing_keep_alive(adapter); 3336 3337 check_for_admin_com_state(adapter); 3338 3339 check_for_missing_completions(adapter); 3340 3341 check_for_empty_rx_ring(adapter); 3342 3343 /* 3344 * User controller update of the ENA metrics. 3345 * If the delay was set to 0, then the stats shouldn't be updated at 3346 * all. 3347 * Otherwise, wait 'metrics_sample_interval' seconds, before 3348 * updating stats. 3349 * As timer service is executed every second, it's enough to increment 3350 * appropriate counter each time the timer service is executed. 3351 */ 3352 if ((adapter->metrics_sample_interval != 0) && 3353 (++adapter->metrics_sample_interval_cnt >= 3354 adapter->metrics_sample_interval)) { 3355 taskqueue_enqueue(adapter->metrics_tq, &adapter->metrics_task); 3356 adapter->metrics_sample_interval_cnt = 0; 3357 } 3358 3359 3360 if (host_info != NULL) 3361 ena_update_host_info(host_info, adapter->ifp); 3362 3363 if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3364 /* 3365 * Timeout when validating version indicates that the device 3366 * became unresponsive. If that happens skip the reset and 3367 * reschedule timer service, so the reset can be retried later. 3368 */ 3369 if (ena_com_validate_version(adapter->ena_dev) == 3370 ENA_COM_TIMER_EXPIRED) { 3371 ena_log(adapter->pdev, WARN, 3372 "FW unresponsive, skipping reset\n"); 3373 ENA_TIMER_RESET(adapter); 3374 return; 3375 } 3376 ena_log(adapter->pdev, WARN, "Trigger reset is on\n"); 3377 taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task); 3378 return; 3379 } 3380 3381 /* 3382 * Schedule another timeout one second from now. 3383 */ 3384 ENA_TIMER_RESET(adapter); 3385 } 3386 3387 void 3388 ena_destroy_device(struct ena_adapter *adapter, bool graceful) 3389 { 3390 if_t ifp = adapter->ifp; 3391 struct ena_com_dev *ena_dev = adapter->ena_dev; 3392 bool dev_up; 3393 3394 if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) 3395 return; 3396 3397 if (!graceful) 3398 if_link_state_change(ifp, LINK_STATE_DOWN); 3399 3400 ENA_TIMER_DRAIN(adapter); 3401 3402 dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter); 3403 if (dev_up) 3404 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3405 3406 if (!graceful) 3407 ena_com_set_admin_running_state(ena_dev, false); 3408 3409 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) 3410 ena_down(adapter); 3411 3412 /* 3413 * Stop the device from sending AENQ events (if the device was up, and 3414 * the trigger reset was on, ena_down already performs device reset) 3415 */ 3416 if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up)) 3417 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 3418 3419 ena_free_mgmnt_irq(adapter); 3420 3421 ena_disable_msix(adapter); 3422 3423 /* 3424 * IO rings resources should be freed because `ena_restore_device()` 3425 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX 3426 * vectors. The amount of MSIX vectors after destroy-restore may be 3427 * different than before. Therefore, IO rings resources should be 3428 * established from scratch each time. 3429 */ 3430 ena_free_all_io_rings_resources(adapter); 3431 3432 ena_com_abort_admin_commands(ena_dev); 3433 3434 ena_com_wait_for_abort_completion(ena_dev); 3435 3436 ena_com_admin_destroy(ena_dev); 3437 3438 ena_com_mmio_reg_read_request_destroy(ena_dev); 3439 3440 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3441 3442 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); 3443 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3444 } 3445 3446 static int 3447 ena_device_validate_params(struct ena_adapter *adapter, 3448 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3449 { 3450 if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr, 3451 ETHER_ADDR_LEN) != 0) { 3452 ena_log(adapter->pdev, ERR, "Error, mac addresses differ\n"); 3453 return (EINVAL); 3454 } 3455 3456 if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) { 3457 ena_log(adapter->pdev, ERR, 3458 "Error, device max mtu is smaller than ifp MTU\n"); 3459 return (EINVAL); 3460 } 3461 3462 return 0; 3463 } 3464 3465 int 3466 ena_restore_device(struct ena_adapter *adapter) 3467 { 3468 struct ena_com_dev_get_features_ctx get_feat_ctx; 3469 struct ena_com_dev *ena_dev = adapter->ena_dev; 3470 if_t ifp = adapter->ifp; 3471 device_t dev = adapter->pdev; 3472 int wd_active; 3473 int rc; 3474 3475 ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3476 3477 rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active); 3478 if (rc != 0) { 3479 ena_log(dev, ERR, "Cannot initialize device\n"); 3480 goto err; 3481 } 3482 /* 3483 * Only enable WD if it was enabled before reset, so it won't override 3484 * value set by the user by the sysctl. 3485 */ 3486 if (adapter->wd_active != 0) 3487 adapter->wd_active = wd_active; 3488 3489 rc = ena_device_validate_params(adapter, &get_feat_ctx); 3490 if (rc != 0) { 3491 ena_log(dev, ERR, "Validation of device parameters failed\n"); 3492 goto err_device_destroy; 3493 } 3494 3495 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3496 /* Make sure we don't have a race with AENQ Links state handler */ 3497 if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) 3498 if_link_state_change(ifp, LINK_STATE_UP); 3499 3500 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3501 if (rc != 0) { 3502 ena_log(dev, ERR, "Enable MSI-X failed\n"); 3503 goto err_device_destroy; 3504 } 3505 3506 /* 3507 * Effective value of used MSIX vectors should be the same as before 3508 * `ena_destroy_device()`, if possible, or closest to it if less vectors 3509 * are available. 3510 */ 3511 if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues) 3512 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3513 3514 /* Re-initialize rings basic information */ 3515 ena_init_io_rings(adapter); 3516 3517 /* If the interface was up before the reset bring it up */ 3518 if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) { 3519 rc = ena_up(adapter); 3520 if (rc != 0) { 3521 ena_log(dev, ERR, "Failed to create I/O queues\n"); 3522 goto err_disable_msix; 3523 } 3524 } 3525 3526 /* Indicate that device is running again and ready to work */ 3527 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3528 3529 /* 3530 * As the AENQ handlers weren't executed during reset because 3531 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the 3532 * timestamp must be updated again That will prevent next reset 3533 * caused by missing keep alive. 3534 */ 3535 adapter->keep_alive_timestamp = getsbinuptime(); 3536 ENA_TIMER_RESET(adapter); 3537 3538 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); 3539 3540 return (rc); 3541 3542 err_disable_msix: 3543 ena_free_mgmnt_irq(adapter); 3544 ena_disable_msix(adapter); 3545 err_device_destroy: 3546 ena_com_abort_admin_commands(ena_dev); 3547 ena_com_wait_for_abort_completion(ena_dev); 3548 ena_com_admin_destroy(ena_dev); 3549 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); 3550 ena_com_mmio_reg_read_request_destroy(ena_dev); 3551 err: 3552 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3553 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter); 3554 ena_log(dev, ERR, "Reset attempt failed. Can not reset the device\n"); 3555 3556 return (rc); 3557 } 3558 3559 static void 3560 ena_metrics_task(void *arg, int pending) 3561 { 3562 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3563 3564 ENA_LOCK_LOCK(); 3565 3566 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_CUSTOMER_METRICS)) 3567 (void)ena_copy_customer_metrics(adapter); 3568 else if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENI_STATS)) 3569 (void)ena_copy_eni_metrics(adapter); 3570 3571 ENA_LOCK_UNLOCK(); 3572 } 3573 3574 static void 3575 ena_reset_task(void *arg, int pending) 3576 { 3577 struct ena_adapter *adapter = (struct ena_adapter *)arg; 3578 3579 ENA_LOCK_LOCK(); 3580 if (likely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { 3581 ena_destroy_device(adapter, false); 3582 ena_restore_device(adapter); 3583 3584 ena_log(adapter->pdev, INFO, 3585 "Device reset completed successfully, Driver info: %s\n", 3586 ena_version); 3587 } 3588 ENA_LOCK_UNLOCK(); 3589 } 3590 3591 static void 3592 ena_free_stats(struct ena_adapter *adapter) 3593 { 3594 ena_free_counters((counter_u64_t *)&adapter->hw_stats, 3595 sizeof(struct ena_hw_stats)); 3596 ena_free_counters((counter_u64_t *)&adapter->dev_stats, 3597 sizeof(struct ena_stats_dev)); 3598 3599 } 3600 /** 3601 * ena_attach - Device Initialization Routine 3602 * @pdev: device information struct 3603 * 3604 * Returns 0 on success, otherwise on failure. 3605 * 3606 * ena_attach initializes an adapter identified by a device structure. 3607 * The OS initialization, configuring of the adapter private structure, 3608 * and a hardware reset occur. 3609 **/ 3610 static int 3611 ena_attach(device_t pdev) 3612 { 3613 struct ena_com_dev_get_features_ctx get_feat_ctx; 3614 struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; 3615 static int version_printed; 3616 struct ena_adapter *adapter; 3617 struct ena_com_dev *ena_dev = NULL; 3618 uint32_t max_num_io_queues; 3619 int msix_rid; 3620 int rid, rc; 3621 3622 adapter = device_get_softc(pdev); 3623 adapter->pdev = pdev; 3624 adapter->first_bind = -1; 3625 3626 /* 3627 * Set up the timer service - driver is responsible for avoiding 3628 * concurrency, as the callout won't be using any locking inside. 3629 */ 3630 ENA_TIMER_INIT(adapter); 3631 adapter->keep_alive_timeout = ENA_DEFAULT_KEEP_ALIVE_TO; 3632 adapter->missing_tx_timeout = ENA_DEFAULT_TX_CMP_TO; 3633 adapter->missing_tx_max_queues = ENA_DEFAULT_TX_MONITORED_QUEUES; 3634 adapter->missing_tx_threshold = ENA_DEFAULT_TX_CMP_THRESHOLD; 3635 3636 adapter->irq_cpu_base = ENA_BASE_CPU_UNSPECIFIED; 3637 adapter->irq_cpu_stride = 0; 3638 3639 #ifdef RSS 3640 adapter->rss_enabled = 1; 3641 #endif 3642 3643 if (version_printed++ == 0) 3644 ena_log(pdev, INFO, "%s\n", ena_version); 3645 3646 /* Allocate memory for ena_dev structure */ 3647 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF, 3648 M_WAITOK | M_ZERO); 3649 3650 adapter->ena_dev = ena_dev; 3651 ena_dev->dmadev = pdev; 3652 3653 rid = PCIR_BAR(ENA_REG_BAR); 3654 adapter->memory = NULL; 3655 adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, 3656 RF_ACTIVE); 3657 if (unlikely(adapter->registers == NULL)) { 3658 ena_log(pdev, ERR, 3659 "unable to allocate bus resource: registers!\n"); 3660 rc = ENOMEM; 3661 goto err_dev_free; 3662 } 3663 3664 /* MSIx vector table may reside on BAR0 with registers or on BAR1. */ 3665 msix_rid = pci_msix_table_bar(pdev); 3666 if (msix_rid != rid) { 3667 adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, 3668 &msix_rid, RF_ACTIVE); 3669 if (unlikely(adapter->msix == NULL)) { 3670 ena_log(pdev, ERR, 3671 "unable to allocate bus resource: msix!\n"); 3672 rc = ENOMEM; 3673 goto err_pci_free; 3674 } 3675 adapter->msix_rid = msix_rid; 3676 } 3677 3678 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF, 3679 M_WAITOK | M_ZERO); 3680 3681 /* Store register resources */ 3682 ((struct ena_bus *)(ena_dev->bus))->reg_bar_t = rman_get_bustag( 3683 adapter->registers); 3684 ((struct ena_bus *)(ena_dev->bus))->reg_bar_h = rman_get_bushandle( 3685 adapter->registers); 3686 3687 if (unlikely(((struct ena_bus *)(ena_dev->bus))->reg_bar_h == 0)) { 3688 ena_log(pdev, ERR, "failed to pmap registers bar\n"); 3689 rc = ENXIO; 3690 goto err_bus_free; 3691 } 3692 3693 rc = ena_map_llq_mem_bar(pdev, ena_dev); 3694 if (unlikely(rc != 0)) { 3695 ena_log(pdev, ERR, "Failed to map ENA mem bar"); 3696 goto err_bus_free; 3697 } 3698 3699 /* Initially clear all the flags */ 3700 ENA_FLAG_ZERO(adapter); 3701 3702 /* Device initialization */ 3703 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active); 3704 if (unlikely(rc != 0)) { 3705 ena_log(pdev, ERR, "ENA device init failed! (err: %d)\n", rc); 3706 rc = ENXIO; 3707 goto err_bus_free; 3708 } 3709 3710 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 3711 adapter->disable_meta_caching = !!( 3712 get_feat_ctx.llq.accel_mode.u.get.supported_flags & 3713 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 3714 3715 adapter->keep_alive_timestamp = getsbinuptime(); 3716 3717 adapter->tx_offload_cap = get_feat_ctx.offload.tx; 3718 3719 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr, 3720 ETHER_ADDR_LEN); 3721 3722 calc_queue_ctx.pdev = pdev; 3723 calc_queue_ctx.ena_dev = ena_dev; 3724 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 3725 3726 /* Calculate initial and maximum IO queue number and size */ 3727 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, 3728 &get_feat_ctx); 3729 rc = ena_calc_io_queue_size(&calc_queue_ctx); 3730 if (unlikely((rc != 0) || (max_num_io_queues <= 0))) { 3731 rc = EFAULT; 3732 goto err_com_free; 3733 } 3734 3735 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; 3736 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; 3737 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 3738 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 3739 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 3740 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 3741 3742 adapter->max_num_io_queues = max_num_io_queues; 3743 3744 adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE; 3745 3746 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu; 3747 3748 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3749 3750 /* set up dma tags for rx and tx buffers */ 3751 rc = ena_setup_tx_dma_tag(adapter); 3752 if (unlikely(rc != 0)) { 3753 ena_log(pdev, ERR, "Failed to create TX DMA tag\n"); 3754 goto err_com_free; 3755 } 3756 3757 rc = ena_setup_rx_dma_tag(adapter); 3758 if (unlikely(rc != 0)) { 3759 ena_log(pdev, ERR, "Failed to create RX DMA tag\n"); 3760 goto err_tx_tag_free; 3761 } 3762 3763 /* 3764 * The amount of requested MSIX vectors is equal to 3765 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant 3766 * number of admin queue interrupts. The former is initially determined 3767 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be 3768 * achieved if there are not enough system resources. By default, the 3769 * number of effectively used IO queues is the same but later on it can 3770 * be limited by the user using sysctl interface. 3771 */ 3772 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3773 if (unlikely(rc != 0)) { 3774 ena_log(pdev, ERR, 3775 "Failed to enable and set the admin interrupts\n"); 3776 goto err_io_free; 3777 } 3778 /* By default all of allocated MSIX vectors are actively used */ 3779 adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC; 3780 3781 /* initialize rings basic information */ 3782 ena_init_io_rings(adapter); 3783 3784 rc = ena_com_allocate_customer_metrics_buffer(ena_dev); 3785 if (rc) { 3786 ena_log(pdev, ERR, "Failed to allocate customer metrics buffer.\n"); 3787 goto err_msix_free; 3788 } 3789 3790 rc = ena_sysctl_allocate_customer_metrics_buffer(adapter); 3791 if (unlikely(rc)){ 3792 ena_log(pdev, ERR, "Failed to allocate sysctl customer metrics buffer.\n"); 3793 goto err_metrics_buffer_destroy; 3794 } 3795 3796 /* Initialize statistics */ 3797 ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, 3798 sizeof(struct ena_stats_dev)); 3799 ena_alloc_counters((counter_u64_t *)&adapter->hw_stats, 3800 sizeof(struct ena_hw_stats)); 3801 ena_sysctl_add_nodes(adapter); 3802 3803 /* setup network interface */ 3804 rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx); 3805 if (unlikely(rc != 0)) { 3806 ena_log(pdev, ERR, "Error with network interface setup\n"); 3807 goto err_customer_metrics_alloc; 3808 } 3809 3810 /* Initialize reset task queue */ 3811 TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); 3812 adapter->reset_tq = taskqueue_create("ena_reset_enqueue", 3813 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); 3814 taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq", 3815 device_get_nameunit(adapter->pdev)); 3816 3817 /* Initialize metrics task queue */ 3818 TASK_INIT(&adapter->metrics_task, 0, ena_metrics_task, adapter); 3819 adapter->metrics_tq = taskqueue_create("ena_metrics_enqueue", 3820 M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->metrics_tq); 3821 taskqueue_start_threads(&adapter->metrics_tq, 1, PI_NET, "%s metricsq", 3822 device_get_nameunit(adapter->pdev)); 3823 3824 #ifdef DEV_NETMAP 3825 rc = ena_netmap_attach(adapter); 3826 if (rc != 0) { 3827 ena_log(pdev, ERR, "netmap attach failed: %d\n", rc); 3828 goto err_detach; 3829 } 3830 #endif /* DEV_NETMAP */ 3831 3832 /* Tell the stack that the interface is not active */ 3833 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 3834 ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter); 3835 3836 /* Run the timer service */ 3837 ENA_TIMER_RESET(adapter); 3838 3839 return (0); 3840 3841 #ifdef DEV_NETMAP 3842 err_detach: 3843 ether_ifdetach(adapter->ifp); 3844 #endif /* DEV_NETMAP */ 3845 err_customer_metrics_alloc: 3846 free(adapter->customer_metrics_array, M_DEVBUF); 3847 err_metrics_buffer_destroy: 3848 ena_com_delete_customer_metrics_buffer(ena_dev); 3849 err_msix_free: 3850 ena_free_stats(adapter); 3851 ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR); 3852 ena_free_mgmnt_irq(adapter); 3853 ena_disable_msix(adapter); 3854 err_io_free: 3855 ena_free_all_io_rings_resources(adapter); 3856 ena_free_rx_dma_tag(adapter); 3857 err_tx_tag_free: 3858 ena_free_tx_dma_tag(adapter); 3859 err_com_free: 3860 ena_com_admin_destroy(ena_dev); 3861 ena_com_delete_host_info(ena_dev); 3862 ena_com_mmio_reg_read_request_destroy(ena_dev); 3863 err_bus_free: 3864 free(ena_dev->bus, M_DEVBUF); 3865 err_pci_free: 3866 ena_free_pci_resources(adapter); 3867 err_dev_free: 3868 free(ena_dev, M_DEVBUF); 3869 3870 return (rc); 3871 } 3872 3873 /** 3874 * ena_detach - Device Removal Routine 3875 * @pdev: device information struct 3876 * 3877 * ena_detach is called by the device subsystem to alert the driver 3878 * that it should release a PCI device. 3879 **/ 3880 static int 3881 ena_detach(device_t pdev) 3882 { 3883 struct ena_adapter *adapter = device_get_softc(pdev); 3884 struct ena_com_dev *ena_dev = adapter->ena_dev; 3885 int rc; 3886 3887 /* Make sure VLANS are not using driver */ 3888 if (if_vlantrunkinuse(adapter->ifp)) { 3889 ena_log(adapter->pdev, ERR, "VLAN is in use, detach first\n"); 3890 return (EBUSY); 3891 } 3892 3893 ether_ifdetach(adapter->ifp); 3894 3895 /* Stop timer service */ 3896 ENA_LOCK_LOCK(); 3897 ENA_TIMER_DRAIN(adapter); 3898 ENA_LOCK_UNLOCK(); 3899 3900 /* Release metrics task */ 3901 while (taskqueue_cancel(adapter->metrics_tq, &adapter->metrics_task, NULL)) 3902 taskqueue_drain(adapter->metrics_tq, &adapter->metrics_task); 3903 taskqueue_free(adapter->metrics_tq); 3904 3905 /* Release reset task */ 3906 while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL)) 3907 taskqueue_drain(adapter->reset_tq, &adapter->reset_task); 3908 taskqueue_free(adapter->reset_tq); 3909 3910 ENA_LOCK_LOCK(); 3911 ena_down(adapter); 3912 ena_destroy_device(adapter, true); 3913 ENA_LOCK_UNLOCK(); 3914 3915 /* Restore unregistered sysctl queue nodes. */ 3916 ena_sysctl_update_queue_node_nb(adapter, adapter->num_io_queues, 3917 adapter->max_num_io_queues); 3918 3919 #ifdef DEV_NETMAP 3920 netmap_detach(adapter->ifp); 3921 #endif /* DEV_NETMAP */ 3922 3923 ena_free_stats(adapter); 3924 3925 rc = ena_free_rx_dma_tag(adapter); 3926 if (unlikely(rc != 0)) 3927 ena_log(adapter->pdev, WARN, 3928 "Unmapped RX DMA tag associations\n"); 3929 3930 rc = ena_free_tx_dma_tag(adapter); 3931 if (unlikely(rc != 0)) 3932 ena_log(adapter->pdev, WARN, 3933 "Unmapped TX DMA tag associations\n"); 3934 3935 ena_free_irqs(adapter); 3936 3937 ena_free_pci_resources(adapter); 3938 3939 if (adapter->rss_indir != NULL) 3940 free(adapter->rss_indir, M_DEVBUF); 3941 3942 if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) 3943 ena_com_rss_destroy(ena_dev); 3944 3945 ena_com_delete_host_info(ena_dev); 3946 3947 free(adapter->customer_metrics_array, M_DEVBUF); 3948 3949 ena_com_delete_customer_metrics_buffer(ena_dev); 3950 3951 if_free(adapter->ifp); 3952 3953 free(ena_dev->bus, M_DEVBUF); 3954 3955 free(ena_dev, M_DEVBUF); 3956 3957 return (bus_generic_detach(pdev)); 3958 } 3959 3960 /****************************************************************************** 3961 ******************************** AENQ Handlers ******************************* 3962 *****************************************************************************/ 3963 /** 3964 * ena_update_on_link_change: 3965 * Notify the network interface about the change in link status 3966 **/ 3967 static void 3968 ena_update_on_link_change(void *adapter_data, 3969 struct ena_admin_aenq_entry *aenq_e) 3970 { 3971 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 3972 struct ena_admin_aenq_link_change_desc *aenq_desc; 3973 int status; 3974 if_t ifp; 3975 3976 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; 3977 ifp = adapter->ifp; 3978 status = aenq_desc->flags & 3979 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; 3980 3981 if (status != 0) { 3982 ena_log(adapter->pdev, INFO, "link is UP\n"); 3983 ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter); 3984 if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter)) 3985 if_link_state_change(ifp, LINK_STATE_UP); 3986 } else { 3987 ena_log(adapter->pdev, INFO, "link is DOWN\n"); 3988 if_link_state_change(ifp, LINK_STATE_DOWN); 3989 ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter); 3990 } 3991 } 3992 3993 static void 3994 ena_notification(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) 3995 { 3996 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 3997 struct ena_admin_ena_hw_hints *hints; 3998 3999 ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, 4000 adapter->ena_dev, "Invalid group(%x) expected %x\n", 4001 aenq_e->aenq_common_desc.group, ENA_ADMIN_NOTIFICATION); 4002 4003 switch (aenq_e->aenq_common_desc.syndrome) { 4004 case ENA_ADMIN_UPDATE_HINTS: 4005 hints = 4006 (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4); 4007 ena_update_hints(adapter, hints); 4008 break; 4009 default: 4010 ena_log(adapter->pdev, ERR, 4011 "Invalid aenq notification link state %d\n", 4012 aenq_e->aenq_common_desc.syndrome); 4013 } 4014 } 4015 4016 static void 4017 ena_lock_init(void *arg) 4018 { 4019 ENA_LOCK_INIT(); 4020 } 4021 SYSINIT(ena_lock_init, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_init, NULL); 4022 4023 static void 4024 ena_lock_uninit(void *arg) 4025 { 4026 ENA_LOCK_DESTROY(); 4027 } 4028 SYSUNINIT(ena_lock_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, ena_lock_uninit, NULL); 4029 4030 /** 4031 * This handler will called for unknown event group or unimplemented handlers 4032 **/ 4033 static void 4034 unimplemented_aenq_handler(void *adapter_data, 4035 struct ena_admin_aenq_entry *aenq_e) 4036 { 4037 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4038 4039 ena_log(adapter->pdev, ERR, 4040 "Unknown event was received or event with unimplemented handler\n"); 4041 } 4042 4043 static struct ena_aenq_handlers aenq_handlers = { 4044 .handlers = { 4045 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4046 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4047 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, 4048 }, 4049 .unimplemented_handler = unimplemented_aenq_handler 4050 }; 4051 4052 /********************************************************************* 4053 * FreeBSD Device Interface Entry Points 4054 *********************************************************************/ 4055 4056 static device_method_t ena_methods[] = { /* Device interface */ 4057 DEVMETHOD(device_probe, ena_probe), 4058 DEVMETHOD(device_attach, ena_attach), 4059 DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END 4060 }; 4061 4062 static driver_t ena_driver = { 4063 "ena", 4064 ena_methods, 4065 sizeof(struct ena_adapter), 4066 }; 4067 4068 DRIVER_MODULE(ena, pci, ena_driver, 0, 0); 4069 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array, 4070 nitems(ena_vendor_info_array) - 1); 4071 MODULE_DEPEND(ena, pci, 1, 1, 1); 4072 MODULE_DEPEND(ena, ether, 1, 1, 1); 4073 #ifdef DEV_NETMAP 4074 MODULE_DEPEND(ena, netmap, 1, 1, 1); 4075 #endif /* DEV_NETMAP */ 4076 4077 /*********************************************************************/ 4078