xref: /freebsd/sys/dev/ena/ena.c (revision 8a272653d9fbd9fc37691c9aad6a05089b4ecb4d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_rss.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/module.h>
44 #include <sys/rman.h>
45 #include <sys/smp.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/taskqueue.h>
50 #include <sys/time.h>
51 #include <sys/eventhandler.h>
52 
53 #include <machine/bus.h>
54 #include <machine/resource.h>
55 #include <machine/in_cksum.h>
56 
57 #include <net/bpf.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 #ifdef RSS
67 #include <net/rss_config.h>
68 #endif
69 
70 #include <netinet/in_systm.h>
71 #include <netinet/in.h>
72 #include <netinet/if_ether.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip6.h>
75 #include <netinet/tcp.h>
76 #include <netinet/udp.h>
77 
78 #include <dev/pci/pcivar.h>
79 #include <dev/pci/pcireg.h>
80 
81 #include <vm/vm.h>
82 #include <vm/pmap.h>
83 
84 #include "ena_datapath.h"
85 #include "ena.h"
86 #include "ena_sysctl.h"
87 
88 #ifdef DEV_NETMAP
89 #include "ena_netmap.h"
90 #endif /* DEV_NETMAP */
91 
92 /*********************************************************
93  *  Function prototypes
94  *********************************************************/
95 static int	ena_probe(device_t);
96 static void	ena_intr_msix_mgmnt(void *);
97 static void	ena_free_pci_resources(struct ena_adapter *);
98 static int	ena_change_mtu(if_t, int);
99 static inline void ena_alloc_counters(counter_u64_t *, int);
100 static inline void ena_free_counters(counter_u64_t *, int);
101 static inline void ena_reset_counters(counter_u64_t *, int);
102 static void	ena_init_io_rings_common(struct ena_adapter *,
103     struct ena_ring *, uint16_t);
104 static void	ena_init_io_rings_basic(struct ena_adapter *);
105 static void	ena_init_io_rings_advanced(struct ena_adapter *);
106 static void	ena_init_io_rings(struct ena_adapter *);
107 static void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
108 static void	ena_free_all_io_rings_resources(struct ena_adapter *);
109 static int	ena_setup_tx_dma_tag(struct ena_adapter *);
110 static int	ena_free_tx_dma_tag(struct ena_adapter *);
111 static int	ena_setup_rx_dma_tag(struct ena_adapter *);
112 static int	ena_free_rx_dma_tag(struct ena_adapter *);
113 static void	ena_release_all_tx_dmamap(struct ena_ring *);
114 static int	ena_setup_tx_resources(struct ena_adapter *, int);
115 static void	ena_free_tx_resources(struct ena_adapter *, int);
116 static int	ena_setup_all_tx_resources(struct ena_adapter *);
117 static void	ena_free_all_tx_resources(struct ena_adapter *);
118 static int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
119 static void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
120 static int	ena_setup_all_rx_resources(struct ena_adapter *);
121 static void	ena_free_all_rx_resources(struct ena_adapter *);
122 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
123     struct ena_rx_buffer *);
124 static void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
125     struct ena_rx_buffer *);
126 static void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
127 static void	ena_refill_all_rx_bufs(struct ena_adapter *);
128 static void	ena_free_all_rx_bufs(struct ena_adapter *);
129 static void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
130 static void	ena_free_all_tx_bufs(struct ena_adapter *);
131 static void	ena_destroy_all_tx_queues(struct ena_adapter *);
132 static void	ena_destroy_all_rx_queues(struct ena_adapter *);
133 static void	ena_destroy_all_io_queues(struct ena_adapter *);
134 static int	ena_create_io_queues(struct ena_adapter *);
135 static int	ena_handle_msix(void *);
136 static int	ena_enable_msix(struct ena_adapter *);
137 static void	ena_setup_mgmnt_intr(struct ena_adapter *);
138 static int	ena_setup_io_intr(struct ena_adapter *);
139 static int	ena_request_mgmnt_irq(struct ena_adapter *);
140 static int	ena_request_io_irq(struct ena_adapter *);
141 static void	ena_free_mgmnt_irq(struct ena_adapter *);
142 static void	ena_free_io_irq(struct ena_adapter *);
143 static void	ena_free_irqs(struct ena_adapter*);
144 static void	ena_disable_msix(struct ena_adapter *);
145 static void	ena_unmask_all_io_irqs(struct ena_adapter *);
146 static int	ena_rss_configure(struct ena_adapter *);
147 static int	ena_up_complete(struct ena_adapter *);
148 static uint64_t	ena_get_counter(if_t, ift_counter);
149 static int	ena_media_change(if_t);
150 static void	ena_media_status(if_t, struct ifmediareq *);
151 static void	ena_init(void *);
152 static int	ena_ioctl(if_t, u_long, caddr_t);
153 static int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
154 static void	ena_update_host_info(struct ena_admin_host_info *, if_t);
155 static void	ena_update_hwassist(struct ena_adapter *);
156 static int	ena_setup_ifnet(device_t, struct ena_adapter *,
157     struct ena_com_dev_get_features_ctx *);
158 static int	ena_enable_wc(struct resource *);
159 static int	ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
160     struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
161 static uint32_t	ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
162     struct ena_com_dev_get_features_ctx *);
163 static int	ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
164 static int	ena_rss_init_default(struct ena_adapter *);
165 static void	ena_rss_init_default_deferred(void *);
166 static void	ena_config_host_info(struct ena_com_dev *, device_t);
167 static int	ena_attach(device_t);
168 static int	ena_detach(device_t);
169 static int	ena_device_init(struct ena_adapter *, device_t,
170     struct ena_com_dev_get_features_ctx *, int *);
171 static int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
172 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
173 static void	unimplemented_aenq_handler(void *,
174     struct ena_admin_aenq_entry *);
175 static int	ena_copy_eni_metrics(struct ena_adapter *);
176 static void	ena_timer_service(void *);
177 
178 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
179 
180 static ena_vendor_info_t ena_vendor_info_array[] = {
181     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
182     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF_RSERV0, 0},
183     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
184     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF_RSERV0, 0},
185     /* Last entry */
186     { 0, 0, 0 }
187 };
188 
189 /*
190  * Contains pointers to event handlers, e.g. link state chage.
191  */
192 static struct ena_aenq_handlers aenq_handlers;
193 
194 void
195 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
196 {
197 	if (error != 0)
198 		return;
199 	*(bus_addr_t *) arg = segs[0].ds_addr;
200 }
201 
202 int
203 ena_dma_alloc(device_t dmadev, bus_size_t size,
204     ena_mem_handle_t *dma, int mapflags, bus_size_t alignment)
205 {
206 	struct ena_adapter* adapter = device_get_softc(dmadev);
207 	uint32_t maxsize;
208 	uint64_t dma_space_addr;
209 	int error;
210 
211 	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
212 
213 	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
214 	if (unlikely(dma_space_addr == 0))
215 		dma_space_addr = BUS_SPACE_MAXADDR;
216 
217 	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
218 	    alignment, 0,     /* alignment, bounds 		*/
219 	    dma_space_addr,   /* lowaddr of exclusion window	*/
220 	    BUS_SPACE_MAXADDR,/* highaddr of exclusion window	*/
221 	    NULL, NULL,	      /* filter, filterarg 		*/
222 	    maxsize,	      /* maxsize 			*/
223 	    1,		      /* nsegments 			*/
224 	    maxsize,	      /* maxsegsize 			*/
225 	    BUS_DMA_ALLOCNOW, /* flags 				*/
226 	    NULL,	      /* lockfunc 			*/
227 	    NULL,	      /* lockarg 			*/
228 	    &dma->tag);
229 	if (unlikely(error != 0)) {
230 		ena_trace(NULL, ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
231 		goto fail_tag;
232 	}
233 
234 	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
235 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
236 	if (unlikely(error != 0)) {
237 		ena_trace(NULL, ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
238 		    (uintmax_t)size, error);
239 		goto fail_map_create;
240 	}
241 
242 	dma->paddr = 0;
243 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
244 	    size, ena_dmamap_callback, &dma->paddr, mapflags);
245 	if (unlikely((error != 0) || (dma->paddr == 0))) {
246 		ena_trace(NULL, ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
247 		goto fail_map_load;
248 	}
249 
250 	bus_dmamap_sync(dma->tag, dma->map,
251 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
252 
253 	return (0);
254 
255 fail_map_load:
256 	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
257 fail_map_create:
258 	bus_dma_tag_destroy(dma->tag);
259 fail_tag:
260 	dma->tag = NULL;
261 	dma->vaddr = NULL;
262 	dma->paddr = 0;
263 
264 	return (error);
265 }
266 
267 /*
268  * This function should generate unique key for the whole driver.
269  * If the key was already genereated in the previous call (for example
270  * for another adapter), then it should be returned instead.
271  */
272 void
273 ena_rss_key_fill(void *key, size_t size)
274 {
275 	static bool key_generated;
276 	static uint8_t default_key[ENA_HASH_KEY_SIZE];
277 
278 	KASSERT(size <= ENA_HASH_KEY_SIZE, ("Requested more bytes than ENA RSS key can hold"));
279 
280 	if (!key_generated) {
281 		arc4random_buf(default_key, ENA_HASH_KEY_SIZE);
282 		key_generated = true;
283 	}
284 
285 	memcpy(key, default_key, size);
286 }
287 
288 static void
289 ena_free_pci_resources(struct ena_adapter *adapter)
290 {
291 	device_t pdev = adapter->pdev;
292 
293 	if (adapter->memory != NULL) {
294 		bus_release_resource(pdev, SYS_RES_MEMORY,
295 		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
296 	}
297 
298 	if (adapter->registers != NULL) {
299 		bus_release_resource(pdev, SYS_RES_MEMORY,
300 		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
301 	}
302 
303 	if (adapter->msix != NULL) {
304 		bus_release_resource(pdev, SYS_RES_MEMORY,
305 		    adapter->msix_rid, adapter->msix);
306 	}
307 }
308 
309 static int
310 ena_probe(device_t dev)
311 {
312 	ena_vendor_info_t *ent;
313 	char		adapter_name[60];
314 	uint16_t	pci_vendor_id = 0;
315 	uint16_t	pci_device_id = 0;
316 
317 	pci_vendor_id = pci_get_vendor(dev);
318 	pci_device_id = pci_get_device(dev);
319 
320 	ent = ena_vendor_info_array;
321 	while (ent->vendor_id != 0) {
322 		if ((pci_vendor_id == ent->vendor_id) &&
323 		    (pci_device_id == ent->device_id)) {
324 			ena_trace(NULL, ENA_DBG, "vendor=%x device=%x\n",
325 			    pci_vendor_id, pci_device_id);
326 
327 			sprintf(adapter_name, DEVICE_DESC);
328 			device_set_desc_copy(dev, adapter_name);
329 			return (BUS_PROBE_DEFAULT);
330 		}
331 
332 		ent++;
333 
334 	}
335 
336 	return (ENXIO);
337 }
338 
339 static int
340 ena_change_mtu(if_t ifp, int new_mtu)
341 {
342 	struct ena_adapter *adapter = if_getsoftc(ifp);
343 	int rc;
344 
345 	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
346 		device_printf(adapter->pdev, "Invalid MTU setting. "
347 		    "new_mtu: %d max mtu: %d min mtu: %d\n",
348 		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
349 		return (EINVAL);
350 	}
351 
352 	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
353 	if (likely(rc == 0)) {
354 		ena_trace(NULL, ENA_DBG, "set MTU to %d\n", new_mtu);
355 		if_setmtu(ifp, new_mtu);
356 	} else {
357 		device_printf(adapter->pdev, "Failed to set MTU to %d\n",
358 		    new_mtu);
359 	}
360 
361 	return (rc);
362 }
363 
364 static inline void
365 ena_alloc_counters(counter_u64_t *begin, int size)
366 {
367 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
368 
369 	for (; begin < end; ++begin)
370 		*begin = counter_u64_alloc(M_WAITOK);
371 }
372 
373 static inline void
374 ena_free_counters(counter_u64_t *begin, int size)
375 {
376 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
377 
378 	for (; begin < end; ++begin)
379 		counter_u64_free(*begin);
380 }
381 
382 static inline void
383 ena_reset_counters(counter_u64_t *begin, int size)
384 {
385 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
386 
387 	for (; begin < end; ++begin)
388 		counter_u64_zero(*begin);
389 }
390 
391 static void
392 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
393     uint16_t qid)
394 {
395 
396 	ring->qid = qid;
397 	ring->adapter = adapter;
398 	ring->ena_dev = adapter->ena_dev;
399 	ring->first_interrupt = false;
400 	ring->no_interrupt_event_cnt = 0;
401 }
402 
403 static void
404 ena_init_io_rings_basic(struct ena_adapter *adapter)
405 {
406 	struct ena_com_dev *ena_dev;
407 	struct ena_ring *txr, *rxr;
408 	struct ena_que *que;
409 	int i;
410 
411 	ena_dev = adapter->ena_dev;
412 
413 	for (i = 0; i < adapter->num_io_queues; i++) {
414 		txr = &adapter->tx_ring[i];
415 		rxr = &adapter->rx_ring[i];
416 
417 		/* TX/RX common ring state */
418 		ena_init_io_rings_common(adapter, txr, i);
419 		ena_init_io_rings_common(adapter, rxr, i);
420 
421 		/* TX specific ring state */
422 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
423 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
424 
425 		que = &adapter->que[i];
426 		que->adapter = adapter;
427 		que->id = i;
428 		que->tx_ring = txr;
429 		que->rx_ring = rxr;
430 
431 		txr->que = que;
432 		rxr->que = que;
433 
434 		rxr->empty_rx_queue = 0;
435 		rxr->rx_mbuf_sz = ena_mbuf_sz;
436 	}
437 }
438 
439 static void
440 ena_init_io_rings_advanced(struct ena_adapter *adapter)
441 {
442 	struct ena_ring *txr, *rxr;
443 	int i;
444 
445 	for (i = 0; i < adapter->num_io_queues; i++) {
446 		txr = &adapter->tx_ring[i];
447 		rxr = &adapter->rx_ring[i];
448 
449 		/* Allocate a buf ring */
450 		txr->buf_ring_size = adapter->buf_ring_size;
451 		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
452 		    M_WAITOK, &txr->ring_mtx);
453 
454 		/* Allocate Tx statistics. */
455 		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
456 		    sizeof(txr->tx_stats));
457 
458 		/* Allocate Rx statistics. */
459 		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
460 		    sizeof(rxr->rx_stats));
461 
462 		/* Initialize locks */
463 		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
464 		    device_get_nameunit(adapter->pdev), i);
465 		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
466 		    device_get_nameunit(adapter->pdev), i);
467 
468 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
469 	}
470 }
471 
472 static void
473 ena_init_io_rings(struct ena_adapter *adapter)
474 {
475 	/*
476 	 * IO rings initialization can be divided into the 2 steps:
477 	 *   1. Initialize variables and fields with initial values and copy
478 	 *      them from adapter/ena_dev (basic)
479 	 *   2. Allocate mutex, counters and buf_ring (advanced)
480 	 */
481 	ena_init_io_rings_basic(adapter);
482 	ena_init_io_rings_advanced(adapter);
483 }
484 
485 static void
486 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
487 {
488 	struct ena_ring *txr = &adapter->tx_ring[qid];
489 	struct ena_ring *rxr = &adapter->rx_ring[qid];
490 
491 	ena_free_counters((counter_u64_t *)&txr->tx_stats,
492 	    sizeof(txr->tx_stats));
493 	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
494 	    sizeof(rxr->rx_stats));
495 
496 	ENA_RING_MTX_LOCK(txr);
497 	drbr_free(txr->br, M_DEVBUF);
498 	ENA_RING_MTX_UNLOCK(txr);
499 
500 	mtx_destroy(&txr->ring_mtx);
501 }
502 
503 static void
504 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
505 {
506 	int i;
507 
508 	for (i = 0; i < adapter->num_io_queues; i++)
509 		ena_free_io_ring_resources(adapter, i);
510 
511 }
512 
513 static int
514 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
515 {
516 	int ret;
517 
518 	/* Create DMA tag for Tx buffers */
519 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
520 	    1, 0,				  /* alignment, bounds 	     */
521 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
522 	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
523 	    NULL, NULL,				  /* filter, filterarg 	     */
524 	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
525 	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
526 	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
527 	    0,					  /* flags 		     */
528 	    NULL,				  /* lockfunc 		     */
529 	    NULL,				  /* lockfuncarg 	     */
530 	    &adapter->tx_buf_tag);
531 
532 	return (ret);
533 }
534 
535 static int
536 ena_free_tx_dma_tag(struct ena_adapter *adapter)
537 {
538 	int ret;
539 
540 	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
541 
542 	if (likely(ret == 0))
543 		adapter->tx_buf_tag = NULL;
544 
545 	return (ret);
546 }
547 
548 static int
549 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
550 {
551 	int ret;
552 
553 	/* Create DMA tag for Rx buffers*/
554 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
555 	    1, 0,				  /* alignment, bounds 	     */
556 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
557 	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
558 	    NULL, NULL,				  /* filter, filterarg 	     */
559 	    ena_mbuf_sz,			  /* maxsize 		     */
560 	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
561 	    ena_mbuf_sz,			  /* maxsegsize 	     */
562 	    0,					  /* flags 		     */
563 	    NULL,				  /* lockfunc 		     */
564 	    NULL,				  /* lockarg 		     */
565 	    &adapter->rx_buf_tag);
566 
567 	return (ret);
568 }
569 
570 static int
571 ena_free_rx_dma_tag(struct ena_adapter *adapter)
572 {
573 	int ret;
574 
575 	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
576 
577 	if (likely(ret == 0))
578 		adapter->rx_buf_tag = NULL;
579 
580 	return (ret);
581 }
582 
583 static void
584 ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
585 {
586 	struct ena_adapter *adapter = tx_ring->adapter;
587 	struct ena_tx_buffer *tx_info;
588 	bus_dma_tag_t tx_tag = adapter->tx_buf_tag;;
589 	int i;
590 #ifdef DEV_NETMAP
591 	struct ena_netmap_tx_info *nm_info;
592 	int j;
593 #endif /* DEV_NETMAP */
594 
595 	for (i = 0; i < tx_ring->ring_size; ++i) {
596 		tx_info = &tx_ring->tx_buffer_info[i];
597 #ifdef DEV_NETMAP
598 		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
599 			nm_info = &tx_info->nm_info;
600 			for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
601 				if (nm_info->map_seg[j] != NULL) {
602 					bus_dmamap_destroy(tx_tag,
603 					    nm_info->map_seg[j]);
604 					nm_info->map_seg[j] = NULL;
605 				}
606 			}
607 		}
608 #endif /* DEV_NETMAP */
609 		if (tx_info->dmamap != NULL) {
610 			bus_dmamap_destroy(tx_tag, tx_info->dmamap);
611 			tx_info->dmamap = NULL;
612 		}
613 	}
614 }
615 
616 /**
617  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
618  * @adapter: network interface device structure
619  * @qid: queue index
620  *
621  * Returns 0 on success, otherwise on failure.
622  **/
623 static int
624 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
625 {
626 	struct ena_que *que = &adapter->que[qid];
627 	struct ena_ring *tx_ring = que->tx_ring;
628 	int size, i, err;
629 #ifdef DEV_NETMAP
630 	bus_dmamap_t *map;
631 	int j;
632 
633 	ena_netmap_reset_tx_ring(adapter, qid);
634 #endif /* DEV_NETMAP */
635 
636 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
637 
638 	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
639 	if (unlikely(tx_ring->tx_buffer_info == NULL))
640 		return (ENOMEM);
641 
642 	size = sizeof(uint16_t) * tx_ring->ring_size;
643 	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
644 	if (unlikely(tx_ring->free_tx_ids == NULL))
645 		goto err_buf_info_free;
646 
647 	size = tx_ring->tx_max_header_size;
648 	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
649 	    M_NOWAIT | M_ZERO);
650 	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
651 		goto err_tx_ids_free;
652 
653 	/* Req id stack for TX OOO completions */
654 	for (i = 0; i < tx_ring->ring_size; i++)
655 		tx_ring->free_tx_ids[i] = i;
656 
657 	/* Reset TX statistics. */
658 	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
659 	    sizeof(tx_ring->tx_stats));
660 
661 	tx_ring->next_to_use = 0;
662 	tx_ring->next_to_clean = 0;
663 	tx_ring->acum_pkts = 0;
664 
665 	/* Make sure that drbr is empty */
666 	ENA_RING_MTX_LOCK(tx_ring);
667 	drbr_flush(adapter->ifp, tx_ring->br);
668 	ENA_RING_MTX_UNLOCK(tx_ring);
669 
670 	/* ... and create the buffer DMA maps */
671 	for (i = 0; i < tx_ring->ring_size; i++) {
672 		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
673 		    &tx_ring->tx_buffer_info[i].dmamap);
674 		if (unlikely(err != 0)) {
675 			ena_trace(NULL, ENA_ALERT,
676 			    "Unable to create Tx DMA map for buffer %d\n",
677 			    i);
678 			goto err_map_release;
679 		}
680 
681 #ifdef DEV_NETMAP
682 		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
683 			map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
684 			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
685 				err = bus_dmamap_create(adapter->tx_buf_tag, 0,
686 				    &map[j]);
687 				if (unlikely(err != 0)) {
688 					ena_trace(NULL, ENA_ALERT, "Unable to create "
689 					    "Tx DMA for buffer %d %d\n", i, j);
690 					goto err_map_release;
691 				}
692 			}
693 		}
694 #endif /* DEV_NETMAP */
695 	}
696 
697 	/* Allocate taskqueues */
698 	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
699 	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
700 	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
701 	if (unlikely(tx_ring->enqueue_tq == NULL)) {
702 		ena_trace(NULL, ENA_ALERT,
703 		    "Unable to create taskqueue for enqueue task\n");
704 		i = tx_ring->ring_size;
705 		goto err_map_release;
706 	}
707 
708 	tx_ring->running = true;
709 
710 	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
711 	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
712 
713 	return (0);
714 
715 err_map_release:
716 	ena_release_all_tx_dmamap(tx_ring);
717 err_tx_ids_free:
718 	free(tx_ring->free_tx_ids, M_DEVBUF);
719 	tx_ring->free_tx_ids = NULL;
720 err_buf_info_free:
721 	free(tx_ring->tx_buffer_info, M_DEVBUF);
722 	tx_ring->tx_buffer_info = NULL;
723 
724 	return (ENOMEM);
725 }
726 
727 /**
728  * ena_free_tx_resources - Free Tx Resources per Queue
729  * @adapter: network interface device structure
730  * @qid: queue index
731  *
732  * Free all transmit software resources
733  **/
734 static void
735 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
736 {
737 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
738 #ifdef DEV_NETMAP
739 	struct ena_netmap_tx_info *nm_info;
740 	int j;
741 #endif /* DEV_NETMAP */
742 
743 	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
744 	    NULL))
745 		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
746 
747 	taskqueue_free(tx_ring->enqueue_tq);
748 
749 	ENA_RING_MTX_LOCK(tx_ring);
750 	/* Flush buffer ring, */
751 	drbr_flush(adapter->ifp, tx_ring->br);
752 
753 	/* Free buffer DMA maps, */
754 	for (int i = 0; i < tx_ring->ring_size; i++) {
755 		bus_dmamap_sync(adapter->tx_buf_tag,
756 		    tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
757 		bus_dmamap_unload(adapter->tx_buf_tag,
758 		    tx_ring->tx_buffer_info[i].dmamap);
759 		bus_dmamap_destroy(adapter->tx_buf_tag,
760 		    tx_ring->tx_buffer_info[i].dmamap);
761 
762 #ifdef DEV_NETMAP
763 		if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
764 			nm_info = &tx_ring->tx_buffer_info[i].nm_info;
765 			for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
766 				if (nm_info->socket_buf_idx[j] != 0) {
767 					bus_dmamap_sync(adapter->tx_buf_tag,
768 					    nm_info->map_seg[j],
769 					    BUS_DMASYNC_POSTWRITE);
770 					ena_netmap_unload(adapter,
771 					    nm_info->map_seg[j]);
772 				}
773 				bus_dmamap_destroy(adapter->tx_buf_tag,
774 				    nm_info->map_seg[j]);
775 				nm_info->socket_buf_idx[j] = 0;
776 			}
777 		}
778 #endif /* DEV_NETMAP */
779 
780 		m_freem(tx_ring->tx_buffer_info[i].mbuf);
781 		tx_ring->tx_buffer_info[i].mbuf = NULL;
782 	}
783 	ENA_RING_MTX_UNLOCK(tx_ring);
784 
785 	/* And free allocated memory. */
786 	free(tx_ring->tx_buffer_info, M_DEVBUF);
787 	tx_ring->tx_buffer_info = NULL;
788 
789 	free(tx_ring->free_tx_ids, M_DEVBUF);
790 	tx_ring->free_tx_ids = NULL;
791 
792 	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
793 	tx_ring->push_buf_intermediate_buf = NULL;
794 }
795 
796 /**
797  * ena_setup_all_tx_resources - allocate all queues Tx resources
798  * @adapter: network interface device structure
799  *
800  * Returns 0 on success, otherwise on failure.
801  **/
802 static int
803 ena_setup_all_tx_resources(struct ena_adapter *adapter)
804 {
805 	int i, rc;
806 
807 	for (i = 0; i < adapter->num_io_queues; i++) {
808 		rc = ena_setup_tx_resources(adapter, i);
809 		if (rc != 0) {
810 			device_printf(adapter->pdev,
811 			    "Allocation for Tx Queue %u failed\n", i);
812 			goto err_setup_tx;
813 		}
814 	}
815 
816 	return (0);
817 
818 err_setup_tx:
819 	/* Rewind the index freeing the rings as we go */
820 	while (i--)
821 		ena_free_tx_resources(adapter, i);
822 	return (rc);
823 }
824 
825 /**
826  * ena_free_all_tx_resources - Free Tx Resources for All Queues
827  * @adapter: network interface device structure
828  *
829  * Free all transmit software resources
830  **/
831 static void
832 ena_free_all_tx_resources(struct ena_adapter *adapter)
833 {
834 	int i;
835 
836 	for (i = 0; i < adapter->num_io_queues; i++)
837 		ena_free_tx_resources(adapter, i);
838 }
839 
840 /**
841  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
842  * @adapter: network interface device structure
843  * @qid: queue index
844  *
845  * Returns 0 on success, otherwise on failure.
846  **/
847 static int
848 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
849 {
850 	struct ena_que *que = &adapter->que[qid];
851 	struct ena_ring *rx_ring = que->rx_ring;
852 	int size, err, i;
853 
854 	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
855 
856 #ifdef DEV_NETMAP
857 	ena_netmap_reset_rx_ring(adapter, qid);
858 	rx_ring->initialized = false;
859 #endif /* DEV_NETMAP */
860 
861 	/*
862 	 * Alloc extra element so in rx path
863 	 * we can always prefetch rx_info + 1
864 	 */
865 	size += sizeof(struct ena_rx_buffer);
866 
867 	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
868 
869 	size = sizeof(uint16_t) * rx_ring->ring_size;
870 	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
871 
872 	for (i = 0; i < rx_ring->ring_size; i++)
873 		rx_ring->free_rx_ids[i] = i;
874 
875 	/* Reset RX statistics. */
876 	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
877 	    sizeof(rx_ring->rx_stats));
878 
879 	rx_ring->next_to_clean = 0;
880 	rx_ring->next_to_use = 0;
881 
882 	/* ... and create the buffer DMA maps */
883 	for (i = 0; i < rx_ring->ring_size; i++) {
884 		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
885 		    &(rx_ring->rx_buffer_info[i].map));
886 		if (err != 0) {
887 			ena_trace(NULL, ENA_ALERT,
888 			    "Unable to create Rx DMA map for buffer %d\n", i);
889 			goto err_buf_info_unmap;
890 		}
891 	}
892 
893 	/* Create LRO for the ring */
894 	if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
895 		int err = tcp_lro_init(&rx_ring->lro);
896 		if (err != 0) {
897 			device_printf(adapter->pdev,
898 			    "LRO[%d] Initialization failed!\n", qid);
899 		} else {
900 			ena_trace(NULL, ENA_INFO,
901 			    "RX Soft LRO[%d] Initialized\n", qid);
902 			rx_ring->lro.ifp = adapter->ifp;
903 		}
904 	}
905 
906 	return (0);
907 
908 err_buf_info_unmap:
909 	while (i--) {
910 		bus_dmamap_destroy(adapter->rx_buf_tag,
911 		    rx_ring->rx_buffer_info[i].map);
912 	}
913 
914 	free(rx_ring->free_rx_ids, M_DEVBUF);
915 	rx_ring->free_rx_ids = NULL;
916 	free(rx_ring->rx_buffer_info, M_DEVBUF);
917 	rx_ring->rx_buffer_info = NULL;
918 	return (ENOMEM);
919 }
920 
921 /**
922  * ena_free_rx_resources - Free Rx Resources
923  * @adapter: network interface device structure
924  * @qid: queue index
925  *
926  * Free all receive software resources
927  **/
928 static void
929 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
930 {
931 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
932 
933 	/* Free buffer DMA maps, */
934 	for (int i = 0; i < rx_ring->ring_size; i++) {
935 		bus_dmamap_sync(adapter->rx_buf_tag,
936 		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
937 		m_freem(rx_ring->rx_buffer_info[i].mbuf);
938 		rx_ring->rx_buffer_info[i].mbuf = NULL;
939 		bus_dmamap_unload(adapter->rx_buf_tag,
940 		    rx_ring->rx_buffer_info[i].map);
941 		bus_dmamap_destroy(adapter->rx_buf_tag,
942 		    rx_ring->rx_buffer_info[i].map);
943 	}
944 
945 	/* free LRO resources, */
946 	tcp_lro_free(&rx_ring->lro);
947 
948 	/* free allocated memory */
949 	free(rx_ring->rx_buffer_info, M_DEVBUF);
950 	rx_ring->rx_buffer_info = NULL;
951 
952 	free(rx_ring->free_rx_ids, M_DEVBUF);
953 	rx_ring->free_rx_ids = NULL;
954 }
955 
956 /**
957  * ena_setup_all_rx_resources - allocate all queues Rx resources
958  * @adapter: network interface device structure
959  *
960  * Returns 0 on success, otherwise on failure.
961  **/
962 static int
963 ena_setup_all_rx_resources(struct ena_adapter *adapter)
964 {
965 	int i, rc = 0;
966 
967 	for (i = 0; i < adapter->num_io_queues; i++) {
968 		rc = ena_setup_rx_resources(adapter, i);
969 		if (rc != 0) {
970 			device_printf(adapter->pdev,
971 			    "Allocation for Rx Queue %u failed\n", i);
972 			goto err_setup_rx;
973 		}
974 	}
975 	return (0);
976 
977 err_setup_rx:
978 	/* rewind the index freeing the rings as we go */
979 	while (i--)
980 		ena_free_rx_resources(adapter, i);
981 	return (rc);
982 }
983 
984 /**
985  * ena_free_all_rx_resources - Free Rx resources for all queues
986  * @adapter: network interface device structure
987  *
988  * Free all receive software resources
989  **/
990 static void
991 ena_free_all_rx_resources(struct ena_adapter *adapter)
992 {
993 	int i;
994 
995 	for (i = 0; i < adapter->num_io_queues; i++)
996 		ena_free_rx_resources(adapter, i);
997 }
998 
999 static inline int
1000 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
1001     struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
1002 {
1003 	struct ena_com_buf *ena_buf;
1004 	bus_dma_segment_t segs[1];
1005 	int nsegs, error;
1006 	int mlen;
1007 
1008 	/* if previous allocated frag is not used */
1009 	if (unlikely(rx_info->mbuf != NULL))
1010 		return (0);
1011 
1012 	/* Get mbuf using UMA allocator */
1013 	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1014 	    rx_ring->rx_mbuf_sz);
1015 
1016 	if (unlikely(rx_info->mbuf == NULL)) {
1017 		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1018 		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1019 		if (unlikely(rx_info->mbuf == NULL)) {
1020 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1021 			return (ENOMEM);
1022 		}
1023 		mlen = MCLBYTES;
1024 	} else {
1025 		mlen = rx_ring->rx_mbuf_sz;
1026 	}
1027 	/* Set mbuf length*/
1028 	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1029 
1030 	/* Map packets for DMA */
1031 	ena_trace(NULL, ENA_DBG | ENA_RSC | ENA_RXPTH,
1032 	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
1033 	    adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
1034 	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
1035 	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
1036 	if (unlikely((error != 0) || (nsegs != 1))) {
1037 		ena_trace(NULL, ENA_WARNING, "failed to map mbuf, error: %d, "
1038 		    "nsegs: %d\n", error, nsegs);
1039 		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1040 		goto exit;
1041 
1042 	}
1043 
1044 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
1045 
1046 	ena_buf = &rx_info->ena_buf;
1047 	ena_buf->paddr = segs[0].ds_addr;
1048 	ena_buf->len = mlen;
1049 
1050 	ena_trace(NULL, ENA_DBG | ENA_RSC | ENA_RXPTH,
1051 	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1052 	    rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
1053 
1054 	return (0);
1055 
1056 exit:
1057 	m_freem(rx_info->mbuf);
1058 	rx_info->mbuf = NULL;
1059 	return (EFAULT);
1060 }
1061 
1062 static void
1063 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1064     struct ena_rx_buffer *rx_info)
1065 {
1066 
1067 	if (rx_info->mbuf == NULL) {
1068 		ena_trace(NULL, ENA_WARNING, "Trying to free unallocated buffer\n");
1069 		return;
1070 	}
1071 
1072 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1073 	    BUS_DMASYNC_POSTREAD);
1074 	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1075 	m_freem(rx_info->mbuf);
1076 	rx_info->mbuf = NULL;
1077 }
1078 
1079 /**
1080  * ena_refill_rx_bufs - Refills ring with descriptors
1081  * @rx_ring: the ring which we want to feed with free descriptors
1082  * @num: number of descriptors to refill
1083  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1084  **/
1085 int
1086 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1087 {
1088 	struct ena_adapter *adapter = rx_ring->adapter;
1089 	uint16_t next_to_use, req_id;
1090 	uint32_t i;
1091 	int rc;
1092 
1093 	ena_trace(NULL, ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
1094 	    rx_ring->qid);
1095 
1096 	next_to_use = rx_ring->next_to_use;
1097 
1098 	for (i = 0; i < num; i++) {
1099 		struct ena_rx_buffer *rx_info;
1100 
1101 		ena_trace(NULL, ENA_DBG | ENA_RXPTH | ENA_RSC,
1102 		    "RX buffer - next to use: %d\n", next_to_use);
1103 
1104 		req_id = rx_ring->free_rx_ids[next_to_use];
1105 		rx_info = &rx_ring->rx_buffer_info[req_id];
1106 #ifdef DEV_NETMAP
1107 		if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
1108 			rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, rx_info);
1109 		else
1110 #endif /* DEV_NETMAP */
1111 			rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1112 		if (unlikely(rc != 0)) {
1113 			ena_trace(NULL, ENA_WARNING,
1114 			    "failed to alloc buffer for rx queue %d\n",
1115 			    rx_ring->qid);
1116 			break;
1117 		}
1118 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1119 		    &rx_info->ena_buf, req_id);
1120 		if (unlikely(rc != 0)) {
1121 			ena_trace(NULL, ENA_WARNING,
1122 			    "failed to add buffer for rx queue %d\n",
1123 			    rx_ring->qid);
1124 			break;
1125 		}
1126 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1127 		    rx_ring->ring_size);
1128 	}
1129 
1130 	if (unlikely(i < num)) {
1131 		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1132 		ena_trace(NULL, ENA_WARNING,
1133 		     "refilled rx qid %d with only %d mbufs (from %d)\n",
1134 		     rx_ring->qid, i, num);
1135 	}
1136 
1137 	if (likely(i != 0))
1138 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1139 
1140 	rx_ring->next_to_use = next_to_use;
1141 	return (i);
1142 }
1143 
1144 int
1145 ena_update_buf_ring_size(struct ena_adapter *adapter,
1146     uint32_t new_buf_ring_size)
1147 {
1148 	uint32_t old_buf_ring_size;
1149 	int rc = 0;
1150 	bool dev_was_up;
1151 
1152 	ENA_LOCK_LOCK(adapter);
1153 
1154 	old_buf_ring_size = adapter->buf_ring_size;
1155 	adapter->buf_ring_size = new_buf_ring_size;
1156 
1157 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1158 	ena_down(adapter);
1159 
1160 	/* Reconfigure buf ring for all Tx rings. */
1161 	ena_free_all_io_rings_resources(adapter);
1162 	ena_init_io_rings_advanced(adapter);
1163 	if (dev_was_up) {
1164 		/*
1165 		 * If ena_up() fails, it's not because of recent buf_ring size
1166 		 * changes. Because of that, we just want to revert old drbr
1167 		 * value and trigger the reset because something else had to
1168 		 * go wrong.
1169 		 */
1170 		rc = ena_up(adapter);
1171 		if (unlikely(rc != 0)) {
1172 			device_printf(adapter->pdev,
1173 			    "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
1174 			    new_buf_ring_size, old_buf_ring_size);
1175 
1176 			/* Revert old size and trigger the reset */
1177 			adapter->buf_ring_size = old_buf_ring_size;
1178 			ena_free_all_io_rings_resources(adapter);
1179 			ena_init_io_rings_advanced(adapter);
1180 
1181 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
1182 			    adapter);
1183 			ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
1184 
1185 		}
1186 	}
1187 
1188 	ENA_LOCK_UNLOCK(adapter);
1189 
1190 	return (rc);
1191 }
1192 
1193 int
1194 ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
1195     uint32_t new_rx_size)
1196 {
1197 	uint32_t old_tx_size, old_rx_size;
1198 	int rc = 0;
1199 	bool dev_was_up;
1200 
1201 	ENA_LOCK_LOCK(adapter);
1202 
1203 	old_tx_size = adapter->requested_tx_ring_size;
1204 	old_rx_size = adapter->requested_rx_ring_size;
1205 	adapter->requested_tx_ring_size = new_tx_size;
1206 	adapter->requested_rx_ring_size = new_rx_size;
1207 
1208 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1209 	ena_down(adapter);
1210 
1211 	/* Configure queues with new size. */
1212 	ena_init_io_rings_basic(adapter);
1213 	if (dev_was_up) {
1214 		rc = ena_up(adapter);
1215 		if (unlikely(rc != 0)) {
1216 			device_printf(adapter->pdev,
1217 			    "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
1218 			    new_tx_size, new_rx_size, old_tx_size, old_rx_size);
1219 
1220 			/* Revert old size. */
1221 			adapter->requested_tx_ring_size = old_tx_size;
1222 			adapter->requested_rx_ring_size = old_rx_size;
1223 			ena_init_io_rings_basic(adapter);
1224 
1225 			/* And try again. */
1226 			rc = ena_up(adapter);
1227 			if (unlikely(rc != 0)) {
1228 				device_printf(adapter->pdev,
1229 				    "Failed to revert old queue sizes. Triggering device reset.\n");
1230 				/*
1231 				 * If we've failed again, something had to go
1232 				 * wrong. After reset, the device should try to
1233 				 * go up
1234 				 */
1235 				ENA_FLAG_SET_ATOMIC(
1236 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1237 				ena_trigger_reset(adapter,
1238 				    ENA_REGS_RESET_OS_TRIGGER);
1239 			}
1240 		}
1241 	}
1242 
1243 	ENA_LOCK_UNLOCK(adapter);
1244 
1245 	return (rc);
1246 }
1247 
1248 static void
1249 ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
1250 {
1251 	ena_free_all_io_rings_resources(adapter);
1252 	/* Force indirection table to be reinitialized */
1253 	ena_com_rss_destroy(adapter->ena_dev);
1254 
1255 	adapter->num_io_queues = num;
1256 	ena_init_io_rings(adapter);
1257 }
1258 
1259 /* Caller should sanitize new_num */
1260 int
1261 ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
1262 {
1263 	uint32_t old_num;
1264 	int rc = 0;
1265 	bool dev_was_up;
1266 
1267 	ENA_LOCK_LOCK(adapter);
1268 
1269 	dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
1270 	old_num = adapter->num_io_queues;
1271 	ena_down(adapter);
1272 
1273 	ena_update_io_rings(adapter, new_num);
1274 
1275 	if (dev_was_up) {
1276 		rc = ena_up(adapter);
1277 		if (unlikely(rc != 0)) {
1278 			device_printf(adapter->pdev,
1279 			    "Failed to configure device with %u IO queues. "
1280 			    "Reverting to previous value: %u\n",
1281 			    new_num, old_num);
1282 
1283 			ena_update_io_rings(adapter, old_num);
1284 
1285 			rc = ena_up(adapter);
1286 			if (unlikely(rc != 0)) {
1287 				device_printf(adapter->pdev,
1288 				    "Failed to revert to previous setup IO "
1289 				    "queues. Triggering device reset.\n");
1290 				ENA_FLAG_SET_ATOMIC(
1291 				    ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
1292 				ena_trigger_reset(adapter,
1293 				    ENA_REGS_RESET_OS_TRIGGER);
1294 			}
1295 		}
1296 	}
1297 
1298 	ENA_LOCK_UNLOCK(adapter);
1299 
1300 	return (rc);
1301 }
1302 
1303 static void
1304 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1305 {
1306 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1307 	unsigned int i;
1308 
1309 	for (i = 0; i < rx_ring->ring_size; i++) {
1310 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1311 
1312 		if (rx_info->mbuf != NULL)
1313 			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1314 #ifdef DEV_NETMAP
1315 		if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
1316 		    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1317 			if (rx_info->netmap_buf_idx != 0)
1318 				ena_netmap_free_rx_slot(adapter, rx_ring,
1319 				    rx_info);
1320 		}
1321 #endif /* DEV_NETMAP */
1322 	}
1323 }
1324 
1325 /**
1326  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1327  * @adapter: network interface device structure
1328  *
1329  */
1330 static void
1331 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1332 {
1333 	struct ena_ring *rx_ring;
1334 	int i, rc, bufs_num;
1335 
1336 	for (i = 0; i < adapter->num_io_queues; i++) {
1337 		rx_ring = &adapter->rx_ring[i];
1338 		bufs_num = rx_ring->ring_size - 1;
1339 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1340 		if (unlikely(rc != bufs_num))
1341 			ena_trace(NULL, ENA_WARNING, "refilling Queue %d failed. "
1342 			    "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1343 #ifdef DEV_NETMAP
1344 		rx_ring->initialized = true;
1345 #endif /* DEV_NETMAP */
1346 	}
1347 }
1348 
1349 static void
1350 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1351 {
1352 	int i;
1353 
1354 	for (i = 0; i < adapter->num_io_queues; i++)
1355 		ena_free_rx_bufs(adapter, i);
1356 }
1357 
1358 /**
1359  * ena_free_tx_bufs - Free Tx Buffers per Queue
1360  * @adapter: network interface device structure
1361  * @qid: queue index
1362  **/
1363 static void
1364 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1365 {
1366 	bool print_once = true;
1367 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1368 
1369 	ENA_RING_MTX_LOCK(tx_ring);
1370 	for (int i = 0; i < tx_ring->ring_size; i++) {
1371 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1372 
1373 		if (tx_info->mbuf == NULL)
1374 			continue;
1375 
1376 		if (print_once) {
1377 			device_printf(adapter->pdev,
1378 			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1379 			    qid, i);
1380 			print_once = false;
1381 		} else {
1382 			ena_trace(NULL, ENA_DBG,
1383 			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1384 			     qid, i);
1385 		}
1386 
1387 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1388 		    BUS_DMASYNC_POSTWRITE);
1389 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1390 
1391 		m_free(tx_info->mbuf);
1392 		tx_info->mbuf = NULL;
1393 	}
1394 	ENA_RING_MTX_UNLOCK(tx_ring);
1395 }
1396 
1397 static void
1398 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1399 {
1400 
1401 	for (int i = 0; i < adapter->num_io_queues; i++)
1402 		ena_free_tx_bufs(adapter, i);
1403 }
1404 
1405 static void
1406 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1407 {
1408 	uint16_t ena_qid;
1409 	int i;
1410 
1411 	for (i = 0; i < adapter->num_io_queues; i++) {
1412 		ena_qid = ENA_IO_TXQ_IDX(i);
1413 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1414 	}
1415 }
1416 
1417 static void
1418 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1419 {
1420 	uint16_t ena_qid;
1421 	int i;
1422 
1423 	for (i = 0; i < adapter->num_io_queues; i++) {
1424 		ena_qid = ENA_IO_RXQ_IDX(i);
1425 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1426 	}
1427 }
1428 
1429 static void
1430 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1431 {
1432 	struct ena_que *queue;
1433 	int i;
1434 
1435 	for (i = 0; i < adapter->num_io_queues; i++) {
1436 		queue = &adapter->que[i];
1437 		while (taskqueue_cancel(queue->cleanup_tq,
1438 		    &queue->cleanup_task, NULL))
1439 			taskqueue_drain(queue->cleanup_tq,
1440 			    &queue->cleanup_task);
1441 		taskqueue_free(queue->cleanup_tq);
1442 	}
1443 
1444 	ena_destroy_all_tx_queues(adapter);
1445 	ena_destroy_all_rx_queues(adapter);
1446 }
1447 
1448 static int
1449 ena_create_io_queues(struct ena_adapter *adapter)
1450 {
1451 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1452 	struct ena_com_create_io_ctx ctx;
1453 	struct ena_ring *ring;
1454 	struct ena_que *queue;
1455 	uint16_t ena_qid;
1456 	uint32_t msix_vector;
1457 	int rc, i;
1458 
1459 	/* Create TX queues */
1460 	for (i = 0; i < adapter->num_io_queues; i++) {
1461 		msix_vector = ENA_IO_IRQ_IDX(i);
1462 		ena_qid = ENA_IO_TXQ_IDX(i);
1463 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1464 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1465 		ctx.queue_size = adapter->requested_tx_ring_size;
1466 		ctx.msix_vector = msix_vector;
1467 		ctx.qid = ena_qid;
1468 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1469 		if (rc != 0) {
1470 			device_printf(adapter->pdev,
1471 			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1472 			goto err_tx;
1473 		}
1474 		ring = &adapter->tx_ring[i];
1475 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1476 		    &ring->ena_com_io_sq,
1477 		    &ring->ena_com_io_cq);
1478 		if (rc != 0) {
1479 			device_printf(adapter->pdev,
1480 			    "Failed to get TX queue handlers. TX queue num"
1481 			    " %d rc: %d\n", i, rc);
1482 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1483 			goto err_tx;
1484 		}
1485 	}
1486 
1487 	/* Create RX queues */
1488 	for (i = 0; i < adapter->num_io_queues; i++) {
1489 		msix_vector = ENA_IO_IRQ_IDX(i);
1490 		ena_qid = ENA_IO_RXQ_IDX(i);
1491 		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1492 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1493 		ctx.queue_size = adapter->requested_rx_ring_size;
1494 		ctx.msix_vector = msix_vector;
1495 		ctx.qid = ena_qid;
1496 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1497 		if (unlikely(rc != 0)) {
1498 			device_printf(adapter->pdev,
1499 			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1500 			goto err_rx;
1501 		}
1502 
1503 		ring = &adapter->rx_ring[i];
1504 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1505 		    &ring->ena_com_io_sq,
1506 		    &ring->ena_com_io_cq);
1507 		if (unlikely(rc != 0)) {
1508 			device_printf(adapter->pdev,
1509 			    "Failed to get RX queue handlers. RX queue num"
1510 			    " %d rc: %d\n", i, rc);
1511 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1512 			goto err_rx;
1513 		}
1514 	}
1515 
1516 	for (i = 0; i < adapter->num_io_queues; i++) {
1517 		queue = &adapter->que[i];
1518 
1519 		NET_TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1520 		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1521 		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1522 
1523 		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
1524 		    "%s queue %d cleanup",
1525 		    device_get_nameunit(adapter->pdev), i);
1526 	}
1527 
1528 	return (0);
1529 
1530 err_rx:
1531 	while (i--)
1532 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1533 	i = adapter->num_io_queues;
1534 err_tx:
1535 	while (i--)
1536 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1537 
1538 	return (ENXIO);
1539 }
1540 
1541 /*********************************************************************
1542  *
1543  *  MSIX & Interrupt Service routine
1544  *
1545  **********************************************************************/
1546 
1547 /**
1548  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1549  * @arg: interrupt number
1550  **/
1551 static void
1552 ena_intr_msix_mgmnt(void *arg)
1553 {
1554 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1555 
1556 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1557 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1558 		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1559 }
1560 
1561 /**
1562  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1563  * @arg: queue
1564  **/
1565 static int
1566 ena_handle_msix(void *arg)
1567 {
1568 	struct ena_que *queue = arg;
1569 	struct ena_adapter *adapter = queue->adapter;
1570 	if_t ifp = adapter->ifp;
1571 
1572 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1573 		return (FILTER_STRAY);
1574 
1575 	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1576 
1577 	return (FILTER_HANDLED);
1578 }
1579 
1580 static int
1581 ena_enable_msix(struct ena_adapter *adapter)
1582 {
1583 	device_t dev = adapter->pdev;
1584 	int msix_vecs, msix_req;
1585 	int i, rc = 0;
1586 
1587 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1588 		device_printf(dev, "Error, MSI-X is already enabled\n");
1589 		return (EINVAL);
1590 	}
1591 
1592 	/* Reserved the max msix vectors we might need */
1593 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1594 
1595 	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1596 	    M_DEVBUF, M_WAITOK | M_ZERO);
1597 
1598 	ena_trace(NULL, ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1599 
1600 	for (i = 0; i < msix_vecs; i++) {
1601 		adapter->msix_entries[i].entry = i;
1602 		/* Vectors must start from 1 */
1603 		adapter->msix_entries[i].vector = i + 1;
1604 	}
1605 
1606 	msix_req = msix_vecs;
1607 	rc = pci_alloc_msix(dev, &msix_vecs);
1608 	if (unlikely(rc != 0)) {
1609 		device_printf(dev,
1610 		    "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1611 
1612 		rc = ENOSPC;
1613 		goto err_msix_free;
1614 	}
1615 
1616 	if (msix_vecs != msix_req) {
1617 		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1618 			device_printf(dev,
1619 			    "Not enough number of MSI-x allocated: %d\n",
1620 			    msix_vecs);
1621 			pci_release_msi(dev);
1622 			rc = ENOSPC;
1623 			goto err_msix_free;
1624 		}
1625 		device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
1626 		    "the number of queues\n", msix_vecs, msix_req);
1627 	}
1628 
1629 	adapter->msix_vecs = msix_vecs;
1630 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1631 
1632 	return (0);
1633 
1634 err_msix_free:
1635 	free(adapter->msix_entries, M_DEVBUF);
1636 	adapter->msix_entries = NULL;
1637 
1638 	return (rc);
1639 }
1640 
1641 static void
1642 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1643 {
1644 
1645 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1646 	    ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1647 	    device_get_nameunit(adapter->pdev));
1648 	/*
1649 	 * Handler is NULL on purpose, it will be set
1650 	 * when mgmnt interrupt is acquired
1651 	 */
1652 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1653 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1654 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1655 	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1656 }
1657 
1658 static int
1659 ena_setup_io_intr(struct ena_adapter *adapter)
1660 {
1661 	static int last_bind_cpu = -1;
1662 	int irq_idx;
1663 
1664 	if (adapter->msix_entries == NULL)
1665 		return (EINVAL);
1666 
1667 	for (int i = 0; i < adapter->num_io_queues; i++) {
1668 		irq_idx = ENA_IO_IRQ_IDX(i);
1669 
1670 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1671 		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1672 		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1673 		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1674 		adapter->irq_tbl[irq_idx].vector =
1675 		    adapter->msix_entries[irq_idx].vector;
1676 		ena_trace(NULL, ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1677 		    adapter->msix_entries[irq_idx].vector);
1678 
1679 		/*
1680 		 * We want to bind rings to the corresponding cpu
1681 		 * using something similar to the RSS round-robin technique.
1682 		 */
1683 		if (unlikely(last_bind_cpu < 0))
1684 			last_bind_cpu = CPU_FIRST();
1685 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1686 		    last_bind_cpu;
1687 		last_bind_cpu = CPU_NEXT(last_bind_cpu);
1688 	}
1689 
1690 	return (0);
1691 }
1692 
1693 static int
1694 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1695 {
1696 	struct ena_irq *irq;
1697 	unsigned long flags;
1698 	int rc, rcc;
1699 
1700 	flags = RF_ACTIVE | RF_SHAREABLE;
1701 
1702 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1703 	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1704 	    &irq->vector, flags);
1705 
1706 	if (unlikely(irq->res == NULL)) {
1707 		device_printf(adapter->pdev, "could not allocate "
1708 		    "irq vector: %d\n", irq->vector);
1709 		return (ENXIO);
1710 	}
1711 
1712 	rc = bus_setup_intr(adapter->pdev, irq->res,
1713 	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
1714 	    irq->data, &irq->cookie);
1715 	if (unlikely(rc != 0)) {
1716 		device_printf(adapter->pdev, "failed to register "
1717 		    "interrupt handler for irq %ju: %d\n",
1718 		    rman_get_start(irq->res), rc);
1719 		goto err_res_free;
1720 	}
1721 	irq->requested = true;
1722 
1723 	return (rc);
1724 
1725 err_res_free:
1726 	ena_trace(NULL, ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
1727 	    irq->vector);
1728 	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1729 	    irq->vector, irq->res);
1730 	if (unlikely(rcc != 0))
1731 		device_printf(adapter->pdev, "dev has no parent while "
1732 		    "releasing res for irq: %d\n", irq->vector);
1733 	irq->res = NULL;
1734 
1735 	return (rc);
1736 }
1737 
1738 static int
1739 ena_request_io_irq(struct ena_adapter *adapter)
1740 {
1741 	struct ena_irq *irq;
1742 	unsigned long flags = 0;
1743 	int rc = 0, i, rcc;
1744 
1745 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
1746 		device_printf(adapter->pdev,
1747 		    "failed to request I/O IRQ: MSI-X is not enabled\n");
1748 		return (EINVAL);
1749 	} else {
1750 		flags = RF_ACTIVE | RF_SHAREABLE;
1751 	}
1752 
1753 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1754 		irq = &adapter->irq_tbl[i];
1755 
1756 		if (unlikely(irq->requested))
1757 			continue;
1758 
1759 		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1760 		    &irq->vector, flags);
1761 		if (unlikely(irq->res == NULL)) {
1762 			rc = ENOMEM;
1763 			device_printf(adapter->pdev, "could not allocate "
1764 			    "irq vector: %d\n", irq->vector);
1765 			goto err;
1766 		}
1767 
1768 		rc = bus_setup_intr(adapter->pdev, irq->res,
1769 		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
1770 		    irq->data, &irq->cookie);
1771 		 if (unlikely(rc != 0)) {
1772 			device_printf(adapter->pdev, "failed to register "
1773 			    "interrupt handler for irq %ju: %d\n",
1774 			    rman_get_start(irq->res), rc);
1775 			goto err;
1776 		}
1777 		irq->requested = true;
1778 
1779 		ena_trace(NULL, ENA_INFO, "queue %d - cpu %d\n",
1780 		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1781 	}
1782 
1783 	return (rc);
1784 
1785 err:
1786 
1787 	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1788 		irq = &adapter->irq_tbl[i];
1789 		rcc = 0;
1790 
1791 		/* Once we entered err: section and irq->requested is true we
1792 		   free both intr and resources */
1793 		if (irq->requested)
1794 			rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1795 		if (unlikely(rcc != 0))
1796 			device_printf(adapter->pdev, "could not release"
1797 			    " irq: %d, error: %d\n", irq->vector, rcc);
1798 
1799 		/* If we entred err: section without irq->requested set we know
1800 		   it was bus_alloc_resource_any() that needs cleanup, provided
1801 		   res is not NULL. In case res is NULL no work in needed in
1802 		   this iteration */
1803 		rcc = 0;
1804 		if (irq->res != NULL) {
1805 			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1806 			    irq->vector, irq->res);
1807 		}
1808 		if (unlikely(rcc != 0))
1809 			device_printf(adapter->pdev, "dev has no parent while "
1810 			    "releasing res for irq: %d\n", irq->vector);
1811 		irq->requested = false;
1812 		irq->res = NULL;
1813 	}
1814 
1815 	return (rc);
1816 }
1817 
1818 static void
1819 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1820 {
1821 	struct ena_irq *irq;
1822 	int rc;
1823 
1824 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1825 	if (irq->requested) {
1826 		ena_trace(NULL, ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
1827 		    irq->vector);
1828 		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1829 		if (unlikely(rc != 0))
1830 			device_printf(adapter->pdev, "failed to tear "
1831 			    "down irq: %d\n", irq->vector);
1832 		irq->requested = 0;
1833 	}
1834 
1835 	if (irq->res != NULL) {
1836 		ena_trace(NULL, ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
1837 		    irq->vector);
1838 		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1839 		    irq->vector, irq->res);
1840 		irq->res = NULL;
1841 		if (unlikely(rc != 0))
1842 			device_printf(adapter->pdev, "dev has no parent while "
1843 			    "releasing res for irq: %d\n", irq->vector);
1844 	}
1845 }
1846 
1847 static void
1848 ena_free_io_irq(struct ena_adapter *adapter)
1849 {
1850 	struct ena_irq *irq;
1851 	int rc;
1852 
1853 	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1854 		irq = &adapter->irq_tbl[i];
1855 		if (irq->requested) {
1856 			ena_trace(NULL, ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
1857 			    irq->vector);
1858 			rc = bus_teardown_intr(adapter->pdev, irq->res,
1859 			    irq->cookie);
1860 			if (unlikely(rc != 0)) {
1861 				device_printf(adapter->pdev, "failed to tear "
1862 				    "down irq: %d\n", irq->vector);
1863 			}
1864 			irq->requested = 0;
1865 		}
1866 
1867 		if (irq->res != NULL) {
1868 			ena_trace(NULL, ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
1869 			    irq->vector);
1870 			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1871 			    irq->vector, irq->res);
1872 			irq->res = NULL;
1873 			if (unlikely(rc != 0)) {
1874 				device_printf(adapter->pdev, "dev has no parent"
1875 				    " while releasing res for irq: %d\n",
1876 				    irq->vector);
1877 			}
1878 		}
1879 	}
1880 }
1881 
1882 static void
1883 ena_free_irqs(struct ena_adapter* adapter)
1884 {
1885 
1886 	ena_free_io_irq(adapter);
1887 	ena_free_mgmnt_irq(adapter);
1888 	ena_disable_msix(adapter);
1889 }
1890 
1891 static void
1892 ena_disable_msix(struct ena_adapter *adapter)
1893 {
1894 
1895 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1896 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1897 		pci_release_msi(adapter->pdev);
1898 	}
1899 
1900 	adapter->msix_vecs = 0;
1901 	if (adapter->msix_entries != NULL)
1902 		free(adapter->msix_entries, M_DEVBUF);
1903 	adapter->msix_entries = NULL;
1904 }
1905 
1906 static void
1907 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
1908 {
1909 	struct ena_com_io_cq* io_cq;
1910 	struct ena_eth_io_intr_reg intr_reg;
1911 	uint16_t ena_qid;
1912 	int i;
1913 
1914 	/* Unmask interrupts for all queues */
1915 	for (i = 0; i < adapter->num_io_queues; i++) {
1916 		ena_qid = ENA_IO_TXQ_IDX(i);
1917 		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1918 		ena_com_update_intr_reg(&intr_reg, 0, 0, true);
1919 		ena_com_unmask_intr(io_cq, &intr_reg);
1920 	}
1921 }
1922 
1923 /* Configure the Rx forwarding */
1924 static int
1925 ena_rss_configure(struct ena_adapter *adapter)
1926 {
1927 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1928 	int rc;
1929 
1930 	/* In case the RSS table was destroyed */
1931 	if (!ena_dev->rss.tbl_log_size) {
1932 		rc = ena_rss_init_default(adapter);
1933 		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
1934 			device_printf(adapter->pdev,
1935 			    "WARNING: RSS was not properly re-initialized,"
1936 			    " it will affect bandwidth\n");
1937 			ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
1938 			return (rc);
1939 		}
1940 	}
1941 
1942 	/* Set indirect table */
1943 	rc = ena_com_indirect_table_set(ena_dev);
1944 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1945 		return (rc);
1946 
1947 	/* Configure hash function (if supported) */
1948 	rc = ena_com_set_hash_function(ena_dev);
1949 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1950 		return (rc);
1951 
1952 	/* Configure hash inputs (if supported) */
1953 	rc = ena_com_set_hash_ctrl(ena_dev);
1954 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
1955 		return (rc);
1956 
1957 	return (0);
1958 }
1959 
1960 static int
1961 ena_up_complete(struct ena_adapter *adapter)
1962 {
1963 	int rc;
1964 
1965 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1966 		rc = ena_rss_configure(adapter);
1967 		if (rc != 0) {
1968 			device_printf(adapter->pdev,
1969 			    "Failed to configure RSS\n");
1970 			return (rc);
1971 		}
1972 	}
1973 
1974 	rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
1975 	if (unlikely(rc != 0))
1976 		return (rc);
1977 
1978 	ena_refill_all_rx_bufs(adapter);
1979 	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
1980 	    sizeof(adapter->hw_stats));
1981 
1982 	return (0);
1983 }
1984 
1985 static void
1986 set_io_rings_size(struct ena_adapter *adapter, int new_tx_size,
1987     int new_rx_size)
1988 {
1989 	int i;
1990 
1991 	for (i = 0; i < adapter->num_io_queues; i++) {
1992 		adapter->tx_ring[i].ring_size = new_tx_size;
1993 		adapter->rx_ring[i].ring_size = new_rx_size;
1994 	}
1995 }
1996 
1997 static int
1998 create_queues_with_size_backoff(struct ena_adapter *adapter)
1999 {
2000 	int rc;
2001 	uint32_t cur_rx_ring_size, cur_tx_ring_size;
2002 	uint32_t new_rx_ring_size, new_tx_ring_size;
2003 
2004 	/*
2005 	 * Current queue sizes might be set to smaller than the requested
2006 	 * ones due to past queue allocation failures.
2007 	 */
2008 	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2009 	    adapter->requested_rx_ring_size);
2010 
2011 	while (1) {
2012 		/* Allocate transmit descriptors */
2013 		rc = ena_setup_all_tx_resources(adapter);
2014 		if (unlikely(rc != 0)) {
2015 			ena_trace(NULL, ENA_ALERT, "err_setup_tx\n");
2016 			goto err_setup_tx;
2017 		}
2018 
2019 		/* Allocate receive descriptors */
2020 		rc = ena_setup_all_rx_resources(adapter);
2021 		if (unlikely(rc != 0)) {
2022 			ena_trace(NULL, ENA_ALERT, "err_setup_rx\n");
2023 			goto err_setup_rx;
2024 		}
2025 
2026 		/* Create IO queues for Rx & Tx */
2027 		rc = ena_create_io_queues(adapter);
2028 		if (unlikely(rc != 0)) {
2029 			ena_trace(NULL, ENA_ALERT,
2030 			    "create IO queues failed\n");
2031 			goto err_io_que;
2032 		}
2033 
2034 		return (0);
2035 
2036 err_io_que:
2037 		ena_free_all_rx_resources(adapter);
2038 err_setup_rx:
2039 		ena_free_all_tx_resources(adapter);
2040 err_setup_tx:
2041 		/*
2042 		 * Lower the ring size if ENOMEM. Otherwise, return the
2043 		 * error straightaway.
2044 		 */
2045 		if (unlikely(rc != ENOMEM)) {
2046 			ena_trace(NULL, ENA_ALERT,
2047 			    "Queue creation failed with error code: %d\n", rc);
2048 			return (rc);
2049 		}
2050 
2051 		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2052 		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2053 
2054 		device_printf(adapter->pdev,
2055 		    "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2056 		    cur_tx_ring_size, cur_rx_ring_size);
2057 
2058 		new_tx_ring_size = cur_tx_ring_size;
2059 		new_rx_ring_size = cur_rx_ring_size;
2060 
2061 		/*
2062 		 * Decrease the size of a larger queue, or decrease both if they are
2063 		 * the same size.
2064 		 */
2065 		if (cur_rx_ring_size <= cur_tx_ring_size)
2066 			new_tx_ring_size = cur_tx_ring_size / 2;
2067 		if (cur_rx_ring_size >= cur_tx_ring_size)
2068 			new_rx_ring_size = cur_rx_ring_size / 2;
2069 
2070 		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2071 		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2072 			device_printf(adapter->pdev,
2073 			    "Queue creation failed with the smallest possible queue size"
2074 			    "of %d for both queues. Not retrying with smaller queues\n",
2075 			    ENA_MIN_RING_SIZE);
2076 			return (rc);
2077 		}
2078 
2079 		set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
2080 	}
2081 }
2082 
2083 int
2084 ena_up(struct ena_adapter *adapter)
2085 {
2086 	int rc = 0;
2087 
2088 	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2089 		device_printf(adapter->pdev, "device is not attached!\n");
2090 		return (ENXIO);
2091 	}
2092 
2093 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2094 		return (0);
2095 
2096 	device_printf(adapter->pdev, "device is going UP\n");
2097 
2098 	/* setup interrupts for IO queues */
2099 	rc = ena_setup_io_intr(adapter);
2100 	if (unlikely(rc != 0)) {
2101 		ena_trace(NULL, ENA_ALERT, "error setting up IO interrupt\n");
2102 		goto error;
2103 	}
2104 	rc = ena_request_io_irq(adapter);
2105 	if (unlikely(rc != 0)) {
2106 		ena_trace(NULL, ENA_ALERT, "err_req_irq\n");
2107 		goto error;
2108 	}
2109 
2110 	device_printf(adapter->pdev,
2111 	    "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, "
2112 	    "LLQ is %s\n",
2113 	    adapter->num_io_queues,
2114 	    adapter->requested_rx_ring_size,
2115 	    adapter->requested_tx_ring_size,
2116 	    (adapter->ena_dev->tx_mem_queue_type ==
2117 	        ENA_ADMIN_PLACEMENT_POLICY_DEV) ?  "ENABLED" : "DISABLED");
2118 
2119 	rc = create_queues_with_size_backoff(adapter);
2120 	if (unlikely(rc != 0)) {
2121 		ena_trace(NULL, ENA_ALERT,
2122 		    "error creating queues with size backoff\n");
2123 		goto err_create_queues_with_backoff;
2124 	}
2125 
2126 	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2127 		if_link_state_change(adapter->ifp, LINK_STATE_UP);
2128 
2129 	rc = ena_up_complete(adapter);
2130 	if (unlikely(rc != 0))
2131 		goto err_up_complete;
2132 
2133 	counter_u64_add(adapter->dev_stats.interface_up, 1);
2134 
2135 	ena_update_hwassist(adapter);
2136 
2137 	if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2138 		IFF_DRV_OACTIVE);
2139 
2140 	/* Activate timer service only if the device is running.
2141 		* If this flag is not set, it means that the driver is being
2142 		* reset and timer service will be activated afterwards.
2143 		*/
2144 	if (ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) {
2145 		callout_reset_sbt(&adapter->timer_service, SBT_1S,
2146 			SBT_1S, ena_timer_service, (void *)adapter, 0);
2147 	}
2148 
2149 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2150 
2151 	ena_unmask_all_io_irqs(adapter);
2152 
2153 	return (0);
2154 
2155 err_up_complete:
2156 	ena_destroy_all_io_queues(adapter);
2157 	ena_free_all_rx_resources(adapter);
2158 	ena_free_all_tx_resources(adapter);
2159 err_create_queues_with_backoff:
2160 	ena_free_io_irq(adapter);
2161 error:
2162 	return (rc);
2163 }
2164 
2165 static uint64_t
2166 ena_get_counter(if_t ifp, ift_counter cnt)
2167 {
2168 	struct ena_adapter *adapter;
2169 	struct ena_hw_stats *stats;
2170 
2171 	adapter = if_getsoftc(ifp);
2172 	stats = &adapter->hw_stats;
2173 
2174 	switch (cnt) {
2175 	case IFCOUNTER_IPACKETS:
2176 		return (counter_u64_fetch(stats->rx_packets));
2177 	case IFCOUNTER_OPACKETS:
2178 		return (counter_u64_fetch(stats->tx_packets));
2179 	case IFCOUNTER_IBYTES:
2180 		return (counter_u64_fetch(stats->rx_bytes));
2181 	case IFCOUNTER_OBYTES:
2182 		return (counter_u64_fetch(stats->tx_bytes));
2183 	case IFCOUNTER_IQDROPS:
2184 		return (counter_u64_fetch(stats->rx_drops));
2185 	case IFCOUNTER_OQDROPS:
2186 		return (counter_u64_fetch(stats->tx_drops));
2187 	default:
2188 		return (if_get_counter_default(ifp, cnt));
2189 	}
2190 }
2191 
2192 static int
2193 ena_media_change(if_t ifp)
2194 {
2195 	/* Media Change is not supported by firmware */
2196 	return (0);
2197 }
2198 
2199 static void
2200 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2201 {
2202 	struct ena_adapter *adapter = if_getsoftc(ifp);
2203 	ena_trace(NULL, ENA_DBG, "enter\n");
2204 
2205 	ENA_LOCK_LOCK(adapter);
2206 
2207 	ifmr->ifm_status = IFM_AVALID;
2208 	ifmr->ifm_active = IFM_ETHER;
2209 
2210 	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2211 		ENA_LOCK_UNLOCK(adapter);
2212 		ena_trace(NULL, ENA_INFO, "Link is down\n");
2213 		return;
2214 	}
2215 
2216 	ifmr->ifm_status |= IFM_ACTIVE;
2217 	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2218 
2219 	ENA_LOCK_UNLOCK(adapter);
2220 }
2221 
2222 static void
2223 ena_init(void *arg)
2224 {
2225 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2226 
2227 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2228 		ENA_LOCK_LOCK(adapter);
2229 		ena_up(adapter);
2230 		ENA_LOCK_UNLOCK(adapter);
2231 	}
2232 }
2233 
2234 static int
2235 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2236 {
2237 	struct ena_adapter *adapter;
2238 	struct ifreq *ifr;
2239 	int rc;
2240 
2241 	adapter = ifp->if_softc;
2242 	ifr = (struct ifreq *)data;
2243 
2244 	/*
2245 	 * Acquiring lock to prevent from running up and down routines parallel.
2246 	 */
2247 	rc = 0;
2248 	switch (command) {
2249 	case SIOCSIFMTU:
2250 		if (ifp->if_mtu == ifr->ifr_mtu)
2251 			break;
2252 		ENA_LOCK_LOCK(adapter);
2253 		ena_down(adapter);
2254 
2255 		ena_change_mtu(ifp, ifr->ifr_mtu);
2256 
2257 		rc = ena_up(adapter);
2258 		ENA_LOCK_UNLOCK(adapter);
2259 		break;
2260 
2261 	case SIOCSIFFLAGS:
2262 		if ((ifp->if_flags & IFF_UP) != 0) {
2263 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2264 				if ((ifp->if_flags & (IFF_PROMISC |
2265 				    IFF_ALLMULTI)) != 0) {
2266 					device_printf(adapter->pdev,
2267 					    "ioctl promisc/allmulti\n");
2268 				}
2269 			} else {
2270 				ENA_LOCK_LOCK(adapter);
2271 				rc = ena_up(adapter);
2272 				ENA_LOCK_UNLOCK(adapter);
2273 			}
2274 		} else {
2275 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2276 				ENA_LOCK_LOCK(adapter);
2277 				ena_down(adapter);
2278 				ENA_LOCK_UNLOCK(adapter);
2279 			}
2280 		}
2281 		break;
2282 
2283 	case SIOCADDMULTI:
2284 	case SIOCDELMULTI:
2285 		break;
2286 
2287 	case SIOCSIFMEDIA:
2288 	case SIOCGIFMEDIA:
2289 		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2290 		break;
2291 
2292 	case SIOCSIFCAP:
2293 		{
2294 			int reinit = 0;
2295 
2296 			if (ifr->ifr_reqcap != ifp->if_capenable) {
2297 				ifp->if_capenable = ifr->ifr_reqcap;
2298 				reinit = 1;
2299 			}
2300 
2301 			if ((reinit != 0) &&
2302 			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2303 				ENA_LOCK_LOCK(adapter);
2304 				ena_down(adapter);
2305 				rc = ena_up(adapter);
2306 				ENA_LOCK_UNLOCK(adapter);
2307 			}
2308 		}
2309 
2310 		break;
2311 	default:
2312 		rc = ether_ioctl(ifp, command, data);
2313 		break;
2314 	}
2315 
2316 	return (rc);
2317 }
2318 
2319 static int
2320 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2321 {
2322 	int caps = 0;
2323 
2324 	if ((feat->offload.tx &
2325 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2326 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2327 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2328 		caps |= IFCAP_TXCSUM;
2329 
2330 	if ((feat->offload.tx &
2331 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2332 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2333 		caps |= IFCAP_TXCSUM_IPV6;
2334 
2335 	if ((feat->offload.tx &
2336 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2337 		caps |= IFCAP_TSO4;
2338 
2339 	if ((feat->offload.tx &
2340 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2341 		caps |= IFCAP_TSO6;
2342 
2343 	if ((feat->offload.rx_supported &
2344 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2345 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2346 		caps |= IFCAP_RXCSUM;
2347 
2348 	if ((feat->offload.rx_supported &
2349 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2350 		caps |= IFCAP_RXCSUM_IPV6;
2351 
2352 	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2353 
2354 	return (caps);
2355 }
2356 
2357 static void
2358 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2359 {
2360 
2361 	host_info->supported_network_features[0] =
2362 	    (uint32_t)if_getcapabilities(ifp);
2363 }
2364 
2365 static void
2366 ena_update_hwassist(struct ena_adapter *adapter)
2367 {
2368 	if_t ifp = adapter->ifp;
2369 	uint32_t feat = adapter->tx_offload_cap;
2370 	int cap = if_getcapenable(ifp);
2371 	int flags = 0;
2372 
2373 	if_clearhwassist(ifp);
2374 
2375 	if ((cap & IFCAP_TXCSUM) != 0) {
2376 		if ((feat &
2377 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2378 			flags |= CSUM_IP;
2379 		if ((feat &
2380 		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2381 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2382 			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2383 	}
2384 
2385 	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2386 		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2387 
2388 	if ((cap & IFCAP_TSO4) != 0)
2389 		flags |= CSUM_IP_TSO;
2390 
2391 	if ((cap & IFCAP_TSO6) != 0)
2392 		flags |= CSUM_IP6_TSO;
2393 
2394 	if_sethwassistbits(ifp, flags, 0);
2395 }
2396 
2397 static int
2398 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2399     struct ena_com_dev_get_features_ctx *feat)
2400 {
2401 	if_t ifp;
2402 	int caps = 0;
2403 
2404 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2405 	if (unlikely(ifp == NULL)) {
2406 		ena_trace(NULL, ENA_ALERT, "can not allocate ifnet structure\n");
2407 		return (ENXIO);
2408 	}
2409 	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2410 	if_setdev(ifp, pdev);
2411 	if_setsoftc(ifp, adapter);
2412 
2413 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
2414 	    IFF_KNOWSEPOCH);
2415 	if_setinitfn(ifp, ena_init);
2416 	if_settransmitfn(ifp, ena_mq_start);
2417 	if_setqflushfn(ifp, ena_qflush);
2418 	if_setioctlfn(ifp, ena_ioctl);
2419 	if_setgetcounterfn(ifp, ena_get_counter);
2420 
2421 	if_setsendqlen(ifp, adapter->requested_tx_ring_size);
2422 	if_setsendqready(ifp);
2423 	if_setmtu(ifp, ETHERMTU);
2424 	if_setbaudrate(ifp, 0);
2425 	/* Zeroize capabilities... */
2426 	if_setcapabilities(ifp, 0);
2427 	if_setcapenable(ifp, 0);
2428 	/* check hardware support */
2429 	caps = ena_get_dev_offloads(feat);
2430 	/* ... and set them */
2431 	if_setcapabilitiesbit(ifp, caps, 0);
2432 
2433 	/* TSO parameters */
2434 	ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2435 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2436 	ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2437 	ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2438 
2439 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2440 	if_setcapenable(ifp, if_getcapabilities(ifp));
2441 
2442 	/*
2443 	 * Specify the media types supported by this adapter and register
2444 	 * callbacks to update media and link information
2445 	 */
2446 	ifmedia_init(&adapter->media, IFM_IMASK,
2447 	    ena_media_change, ena_media_status);
2448 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2449 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2450 
2451 	ether_ifattach(ifp, adapter->mac_addr);
2452 
2453 	return (0);
2454 }
2455 
2456 void
2457 ena_down(struct ena_adapter *adapter)
2458 {
2459 	int rc;
2460 
2461 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
2462 		return;
2463 
2464 	device_printf(adapter->pdev, "device is going DOWN\n");
2465 
2466 	callout_drain(&adapter->timer_service);
2467 
2468 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2469 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2470 		IFF_DRV_RUNNING);
2471 
2472 	ena_free_io_irq(adapter);
2473 
2474 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2475 		rc = ena_com_dev_reset(adapter->ena_dev,
2476 			adapter->reset_reason);
2477 		if (unlikely(rc != 0))
2478 			device_printf(adapter->pdev,
2479 				"Device reset failed\n");
2480 	}
2481 
2482 	ena_destroy_all_io_queues(adapter);
2483 
2484 	ena_free_all_tx_bufs(adapter);
2485 	ena_free_all_rx_bufs(adapter);
2486 	ena_free_all_tx_resources(adapter);
2487 	ena_free_all_rx_resources(adapter);
2488 
2489 	counter_u64_add(adapter->dev_stats.interface_down, 1);
2490 }
2491 
2492 static uint32_t
2493 ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
2494     struct ena_com_dev_get_features_ctx *get_feat_ctx)
2495 {
2496 	uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
2497 
2498 	/* Regular queues capabilities */
2499 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2500 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2501 		    &get_feat_ctx->max_queue_ext.max_queue_ext;
2502 		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
2503 			max_queue_ext->max_rx_cq_num);
2504 
2505 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
2506 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
2507 	} else {
2508 		struct ena_admin_queue_feature_desc *max_queues =
2509 		    &get_feat_ctx->max_queues;
2510 		io_tx_sq_num = max_queues->max_sq_num;
2511 		io_tx_cq_num = max_queues->max_cq_num;
2512 		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
2513 	}
2514 
2515 	/* In case of LLQ use the llq fields for the tx SQ/CQ */
2516 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2517 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
2518 
2519 	max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2520 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
2521 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
2522 	max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
2523 	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
2524 	max_num_io_queues = min_t(uint32_t, max_num_io_queues,
2525 	    pci_msix_count(pdev) - 1);
2526 
2527 	return (max_num_io_queues);
2528 }
2529 
2530 static int
2531 ena_enable_wc(struct resource *res)
2532 {
2533 #if defined(__i386) || defined(__amd64) || defined(__aarch64__)
2534 	vm_offset_t va;
2535 	vm_size_t len;
2536 	int rc;
2537 
2538 	va = (vm_offset_t)rman_get_virtual(res);
2539 	len = rman_get_size(res);
2540 	/* Enable write combining */
2541 	rc = pmap_change_attr(va, len, VM_MEMATTR_WRITE_COMBINING);
2542 	if (unlikely(rc != 0)) {
2543 		ena_trace(NULL, ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
2544 		return (rc);
2545 	}
2546 
2547 	return (0);
2548 #endif
2549 	return (EOPNOTSUPP);
2550 }
2551 
2552 static int
2553 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
2554     struct ena_admin_feature_llq_desc *llq,
2555     struct ena_llq_configurations *llq_default_configurations)
2556 {
2557 	struct ena_adapter *adapter = device_get_softc(pdev);
2558 	int rc, rid;
2559 	uint32_t llq_feature_mask;
2560 
2561 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
2562 	if (!(ena_dev->supported_features & llq_feature_mask)) {
2563 		device_printf(pdev,
2564 		    "LLQ is not supported. Fallback to host mode policy.\n");
2565 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2566 		return (0);
2567 	}
2568 
2569 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
2570 	if (unlikely(rc != 0)) {
2571 		device_printf(pdev, "Failed to configure the device mode. "
2572 		    "Fallback to host mode policy.\n");
2573 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2574 		return (0);
2575 	}
2576 
2577 	/* Nothing to config, exit */
2578 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
2579 		return (0);
2580 
2581 	/* Try to allocate resources for LLQ bar */
2582 	rid = PCIR_BAR(ENA_MEM_BAR);
2583 	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
2584 	    &rid, RF_ACTIVE);
2585 	if (unlikely(adapter->memory == NULL)) {
2586 		device_printf(pdev, "unable to allocate LLQ bar resource. "
2587 		    "Fallback to host mode policy.\n");
2588 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2589 		return (0);
2590 	}
2591 
2592 	/* Enable write combining for better LLQ performance */
2593 	rc = ena_enable_wc(adapter->memory);
2594 	if (unlikely(rc != 0)) {
2595 		device_printf(pdev, "failed to enable write combining.\n");
2596 		return (rc);
2597 	}
2598 
2599 	/*
2600 	 * Save virtual address of the device's memory region
2601 	 * for the ena_com layer.
2602 	 */
2603 	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
2604 
2605 	return (0);
2606 }
2607 
2608 static inline
2609 void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
2610 {
2611 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
2612 	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
2613 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
2614 	llq_config->llq_num_decs_before_header =
2615 	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
2616 	llq_config->llq_ring_entry_size_value = 128;
2617 }
2618 
2619 static int
2620 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
2621 {
2622 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
2623 	struct ena_com_dev *ena_dev = ctx->ena_dev;
2624 	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
2625 	uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
2626 	uint32_t max_tx_queue_size;
2627 	uint32_t max_rx_queue_size;
2628 
2629 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2630 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2631 		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
2632 		max_rx_queue_size = min_t(uint32_t,
2633 		    max_queue_ext->max_rx_cq_depth,
2634 		    max_queue_ext->max_rx_sq_depth);
2635 		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2636 
2637 		if (ena_dev->tx_mem_queue_type ==
2638 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2639 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2640 			    llq->max_llq_depth);
2641 		else
2642 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2643 			    max_queue_ext->max_tx_sq_depth);
2644 
2645 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2646 		    max_queue_ext->max_per_packet_tx_descs);
2647 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2648 		    max_queue_ext->max_per_packet_rx_descs);
2649 	} else {
2650 		struct ena_admin_queue_feature_desc *max_queues =
2651 		    &ctx->get_feat_ctx->max_queues;
2652 		max_rx_queue_size = min_t(uint32_t,
2653 		    max_queues->max_cq_depth,
2654 		    max_queues->max_sq_depth);
2655 		max_tx_queue_size = max_queues->max_cq_depth;
2656 
2657 		if (ena_dev->tx_mem_queue_type ==
2658 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
2659 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2660 			    llq->max_llq_depth);
2661 		else
2662 			max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
2663 			    max_queues->max_sq_depth);
2664 
2665 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2666 		    max_queues->max_packet_tx_descs);
2667 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2668 		    max_queues->max_packet_rx_descs);
2669 	}
2670 
2671 	/* round down to the nearest power of 2 */
2672 	max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
2673 	max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
2674 
2675 	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2676 	    max_tx_queue_size);
2677 	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2678 	    max_rx_queue_size);
2679 
2680 	tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
2681 	rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
2682 
2683 	ctx->max_tx_queue_size = max_tx_queue_size;
2684 	ctx->max_rx_queue_size = max_rx_queue_size;
2685 	ctx->tx_queue_size = tx_queue_size;
2686 	ctx->rx_queue_size = rx_queue_size;
2687 
2688 	return (0);
2689 }
2690 
2691 static int
2692 ena_rss_init_default(struct ena_adapter *adapter)
2693 {
2694 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2695 	device_t dev = adapter->pdev;
2696 	int qid, rc, i;
2697 
2698 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2699 	if (unlikely(rc != 0)) {
2700 		device_printf(dev, "Cannot init indirect table\n");
2701 		return (rc);
2702 	}
2703 
2704 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
2705 		qid = i % adapter->num_io_queues;
2706 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
2707 		    ENA_IO_RXQ_IDX(qid));
2708 		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2709 			device_printf(dev, "Cannot fill indirect table\n");
2710 			goto err_rss_destroy;
2711 		}
2712 	}
2713 
2714 #ifdef RSS
2715 	uint8_t rss_algo = rss_gethashalgo();
2716 	if (rss_algo == RSS_HASH_TOEPLITZ) {
2717 		uint8_t hash_key[RSS_KEYSIZE];
2718 
2719 		rss_getkey(hash_key);
2720 		rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ,
2721 		    hash_key, RSS_KEYSIZE, 0xFFFFFFFF);
2722 	} else
2723 #endif
2724 	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
2725 	    ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
2726 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2727 		device_printf(dev, "Cannot fill hash function\n");
2728 		goto err_rss_destroy;
2729 	}
2730 
2731 	rc = ena_com_set_default_hash_ctrl(ena_dev);
2732 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
2733 		device_printf(dev, "Cannot fill hash control\n");
2734 		goto err_rss_destroy;
2735 	}
2736 
2737 	return (0);
2738 
2739 err_rss_destroy:
2740 	ena_com_rss_destroy(ena_dev);
2741 	return (rc);
2742 }
2743 
2744 static void
2745 ena_rss_init_default_deferred(void *arg)
2746 {
2747 	struct ena_adapter *adapter;
2748 	devclass_t dc;
2749 	int max;
2750 	int rc;
2751 
2752 	dc = devclass_find("ena");
2753 	if (unlikely(dc == NULL)) {
2754 		ena_trace(NULL, ENA_ALERT, "No devclass ena\n");
2755 		return;
2756 	}
2757 
2758 	max = devclass_get_maxunit(dc);
2759 	while (max-- >= 0) {
2760 		adapter = devclass_get_softc(dc, max);
2761 		if (adapter != NULL) {
2762 			rc = ena_rss_init_default(adapter);
2763 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2764 			if (unlikely(rc != 0)) {
2765 				device_printf(adapter->pdev,
2766 				    "WARNING: RSS was not properly initialized,"
2767 				    " it will affect bandwidth\n");
2768 				ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
2769 			}
2770 		}
2771 	}
2772 }
2773 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
2774 
2775 static void
2776 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
2777 {
2778 	struct ena_admin_host_info *host_info;
2779 	uintptr_t rid;
2780 	int rc;
2781 
2782 	/* Allocate only the host info */
2783 	rc = ena_com_allocate_host_info(ena_dev);
2784 	if (unlikely(rc != 0)) {
2785 		ena_trace(NULL, ENA_ALERT, "Cannot allocate host info\n");
2786 		return;
2787 	}
2788 
2789 	host_info = ena_dev->host_attr.host_info;
2790 
2791 	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
2792 		host_info->bdf = rid;
2793 	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
2794 	host_info->kernel_ver = osreldate;
2795 
2796 	sprintf(host_info->kernel_ver_str, "%d", osreldate);
2797 	host_info->os_dist = 0;
2798 	strncpy(host_info->os_dist_str, osrelease,
2799 	    sizeof(host_info->os_dist_str) - 1);
2800 
2801 	host_info->driver_version =
2802 		(DRV_MODULE_VER_MAJOR) |
2803 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2804 		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
2805 	host_info->num_cpus = mp_ncpus;
2806 	host_info->driver_supported_features =
2807 	    ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK;
2808 
2809 	rc = ena_com_set_host_attributes(ena_dev);
2810 	if (unlikely(rc != 0)) {
2811 		if (rc == EOPNOTSUPP)
2812 			ena_trace(NULL, ENA_WARNING, "Cannot set host attributes\n");
2813 		else
2814 			ena_trace(NULL, ENA_ALERT, "Cannot set host attributes\n");
2815 
2816 		goto err;
2817 	}
2818 
2819 	return;
2820 
2821 err:
2822 	ena_com_delete_host_info(ena_dev);
2823 }
2824 
2825 static int
2826 ena_device_init(struct ena_adapter *adapter, device_t pdev,
2827     struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
2828 {
2829 	struct ena_com_dev* ena_dev = adapter->ena_dev;
2830 	bool readless_supported;
2831 	uint32_t aenq_groups;
2832 	int dma_width;
2833 	int rc;
2834 
2835 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
2836 	if (unlikely(rc != 0)) {
2837 		device_printf(pdev, "failed to init mmio read less\n");
2838 		return (rc);
2839 	}
2840 
2841 	/*
2842 	 * The PCIe configuration space revision id indicate if mmio reg
2843 	 * read is disabled
2844 	 */
2845 	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
2846 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
2847 
2848 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
2849 	if (unlikely(rc != 0)) {
2850 		device_printf(pdev, "Can not reset device\n");
2851 		goto err_mmio_read_less;
2852 	}
2853 
2854 	rc = ena_com_validate_version(ena_dev);
2855 	if (unlikely(rc != 0)) {
2856 		device_printf(pdev, "device version is too low\n");
2857 		goto err_mmio_read_less;
2858 	}
2859 
2860 	dma_width = ena_com_get_dma_width(ena_dev);
2861 	if (unlikely(dma_width < 0)) {
2862 		device_printf(pdev, "Invalid dma width value %d", dma_width);
2863 		rc = dma_width;
2864 		goto err_mmio_read_less;
2865 	}
2866 	adapter->dma_width = dma_width;
2867 
2868 	/* ENA admin level init */
2869 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
2870 	if (unlikely(rc != 0)) {
2871 		device_printf(pdev,
2872 		    "Can not initialize ena admin queue with device\n");
2873 		goto err_mmio_read_less;
2874 	}
2875 
2876 	/*
2877 	 * To enable the msix interrupts the driver needs to know the number
2878 	 * of queues. So the driver uses polling mode to retrieve this
2879 	 * information
2880 	 */
2881 	ena_com_set_admin_polling_mode(ena_dev, true);
2882 
2883 	ena_config_host_info(ena_dev, pdev);
2884 
2885 	/* Get Device Attributes */
2886 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
2887 	if (unlikely(rc != 0)) {
2888 		device_printf(pdev,
2889 		    "Cannot get attribute for ena device rc: %d\n", rc);
2890 		goto err_admin_init;
2891 	}
2892 
2893 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
2894 	    BIT(ENA_ADMIN_FATAL_ERROR) |
2895 	    BIT(ENA_ADMIN_WARNING) |
2896 	    BIT(ENA_ADMIN_NOTIFICATION) |
2897 	    BIT(ENA_ADMIN_KEEP_ALIVE);
2898 
2899 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
2900 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
2901 	if (unlikely(rc != 0)) {
2902 		device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
2903 		goto err_admin_init;
2904 	}
2905 
2906 	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
2907 
2908 	return (0);
2909 
2910 err_admin_init:
2911 	ena_com_delete_host_info(ena_dev);
2912 	ena_com_admin_destroy(ena_dev);
2913 err_mmio_read_less:
2914 	ena_com_mmio_reg_read_request_destroy(ena_dev);
2915 
2916 	return (rc);
2917 }
2918 
2919 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
2920 {
2921 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2922 	int rc;
2923 
2924 	rc = ena_enable_msix(adapter);
2925 	if (unlikely(rc != 0)) {
2926 		device_printf(adapter->pdev, "Error with MSI-X enablement\n");
2927 		return (rc);
2928 	}
2929 
2930 	ena_setup_mgmnt_intr(adapter);
2931 
2932 	rc = ena_request_mgmnt_irq(adapter);
2933 	if (unlikely(rc != 0)) {
2934 		device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
2935 		goto err_disable_msix;
2936 	}
2937 
2938 	ena_com_set_admin_polling_mode(ena_dev, false);
2939 
2940 	ena_com_admin_aenq_enable(ena_dev);
2941 
2942 	return (0);
2943 
2944 err_disable_msix:
2945 	ena_disable_msix(adapter);
2946 
2947 	return (rc);
2948 }
2949 
2950 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
2951 static void ena_keep_alive_wd(void *adapter_data,
2952     struct ena_admin_aenq_entry *aenq_e)
2953 {
2954 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
2955 	struct ena_admin_aenq_keep_alive_desc *desc;
2956 	sbintime_t stime;
2957 	uint64_t rx_drops;
2958 	uint64_t tx_drops;
2959 
2960 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
2961 
2962 	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
2963 	tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
2964 	counter_u64_zero(adapter->hw_stats.rx_drops);
2965 	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
2966 	counter_u64_zero(adapter->hw_stats.tx_drops);
2967 	counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
2968 
2969 	stime = getsbinuptime();
2970 	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
2971 }
2972 
2973 /* Check for keep alive expiration */
2974 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
2975 {
2976 	sbintime_t timestamp, time;
2977 
2978 	if (adapter->wd_active == 0)
2979 		return;
2980 
2981 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
2982 		return;
2983 
2984 	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
2985 	time = getsbinuptime() - timestamp;
2986 	if (unlikely(time > adapter->keep_alive_timeout)) {
2987 		device_printf(adapter->pdev,
2988 		    "Keep alive watchdog timeout.\n");
2989 		counter_u64_add(adapter->dev_stats.wd_expired, 1);
2990 		ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
2991 	}
2992 }
2993 
2994 /* Check if admin queue is enabled */
2995 static void check_for_admin_com_state(struct ena_adapter *adapter)
2996 {
2997 	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
2998 	    false)) {
2999 		device_printf(adapter->pdev,
3000 		    "ENA admin queue is not in running state!\n");
3001 		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3002 		ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
3003 	}
3004 }
3005 
3006 static int
3007 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3008     struct ena_ring *rx_ring)
3009 {
3010 	if (likely(rx_ring->first_interrupt))
3011 		return (0);
3012 
3013 	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3014 		return (0);
3015 
3016 	rx_ring->no_interrupt_event_cnt++;
3017 
3018 	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3019 		device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
3020 		    "Queue = %d. Reset the device\n", rx_ring->qid);
3021 		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3022 		return (EIO);
3023 	}
3024 
3025 	return (0);
3026 }
3027 
3028 static int
3029 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3030     struct ena_ring *tx_ring)
3031 {
3032 	struct bintime curtime, time;
3033 	struct ena_tx_buffer *tx_buf;
3034 	sbintime_t time_offset;
3035 	uint32_t missed_tx = 0;
3036 	int i, rc = 0;
3037 
3038 	getbinuptime(&curtime);
3039 
3040 	for (i = 0; i < tx_ring->ring_size; i++) {
3041 		tx_buf = &tx_ring->tx_buffer_info[i];
3042 
3043 		if (bintime_isset(&tx_buf->timestamp) == 0)
3044 			continue;
3045 
3046 		time = curtime;
3047 		bintime_sub(&time, &tx_buf->timestamp);
3048 		time_offset = bttosbt(time);
3049 
3050 		if (unlikely(!tx_ring->first_interrupt &&
3051 		    time_offset > 2 * adapter->missing_tx_timeout)) {
3052 			/*
3053 			 * If after graceful period interrupt is still not
3054 			 * received, we schedule a reset.
3055 			 */
3056 			device_printf(adapter->pdev,
3057 			    "Potential MSIX issue on Tx side Queue = %d. "
3058 			    "Reset the device\n", tx_ring->qid);
3059 			ena_trigger_reset(adapter,
3060 			    ENA_REGS_RESET_MISS_INTERRUPT);
3061 			return (EIO);
3062 		}
3063 
3064 		/* Check again if packet is still waiting */
3065 		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3066 
3067 			if (!tx_buf->print_once)
3068 				ena_trace(NULL, ENA_WARNING, "Found a Tx that wasn't "
3069 				    "completed on time, qid %d, index %d.\n",
3070 				    tx_ring->qid, i);
3071 
3072 			tx_buf->print_once = true;
3073 			missed_tx++;
3074 		}
3075 	}
3076 
3077 	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3078 		device_printf(adapter->pdev,
3079 		    "The number of lost tx completion is above the threshold "
3080 		    "(%d > %d). Reset the device\n",
3081 		    missed_tx, adapter->missing_tx_threshold);
3082 		ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
3083 		rc = EIO;
3084 	}
3085 
3086 	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3087 
3088 	return (rc);
3089 }
3090 
3091 /*
3092  * Check for TX which were not completed on time.
3093  * Timeout is defined by "missing_tx_timeout".
3094  * Reset will be performed if number of incompleted
3095  * transactions exceeds "missing_tx_threshold".
3096  */
3097 static void
3098 check_for_missing_completions(struct ena_adapter *adapter)
3099 {
3100 	struct ena_ring *tx_ring;
3101 	struct ena_ring *rx_ring;
3102 	int i, budget, rc;
3103 
3104 	/* Make sure the driver doesn't turn the device in other process */
3105 	rmb();
3106 
3107 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3108 		return;
3109 
3110 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3111 		return;
3112 
3113 	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3114 		return;
3115 
3116 	budget = adapter->missing_tx_max_queues;
3117 
3118 	for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
3119 		tx_ring = &adapter->tx_ring[i];
3120 		rx_ring = &adapter->rx_ring[i];
3121 
3122 		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3123 		if (unlikely(rc != 0))
3124 			return;
3125 
3126 		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3127 		if (unlikely(rc != 0))
3128 			return;
3129 
3130 		budget--;
3131 		if (budget == 0) {
3132 			i++;
3133 			break;
3134 		}
3135 	}
3136 
3137 	adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
3138 }
3139 
3140 /* trigger rx cleanup after 2 consecutive detections */
3141 #define EMPTY_RX_REFILL 2
3142 /* For the rare case where the device runs out of Rx descriptors and the
3143  * msix handler failed to refill new Rx descriptors (due to a lack of memory
3144  * for example).
3145  * This case will lead to a deadlock:
3146  * The device won't send interrupts since all the new Rx packets will be dropped
3147  * The msix handler won't allocate new Rx descriptors so the device won't be
3148  * able to send new packets.
3149  *
3150  * When such a situation is detected - execute rx cleanup task in another thread
3151  */
3152 static void
3153 check_for_empty_rx_ring(struct ena_adapter *adapter)
3154 {
3155 	struct ena_ring *rx_ring;
3156 	int i, refill_required;
3157 
3158 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3159 		return;
3160 
3161 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3162 		return;
3163 
3164 	for (i = 0; i < adapter->num_io_queues; i++) {
3165 		rx_ring = &adapter->rx_ring[i];
3166 
3167 		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3168 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3169 			rx_ring->empty_rx_queue++;
3170 
3171 			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL)	{
3172 				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3173 				    1);
3174 
3175 				device_printf(adapter->pdev,
3176 				    "trigger refill for ring %d\n", i);
3177 
3178 				taskqueue_enqueue(rx_ring->que->cleanup_tq,
3179 				    &rx_ring->que->cleanup_task);
3180 				rx_ring->empty_rx_queue = 0;
3181 			}
3182 		} else {
3183 			rx_ring->empty_rx_queue = 0;
3184 		}
3185 	}
3186 }
3187 
3188 static void ena_update_hints(struct ena_adapter *adapter,
3189 			     struct ena_admin_ena_hw_hints *hints)
3190 {
3191 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3192 
3193 	if (hints->admin_completion_tx_timeout)
3194 		ena_dev->admin_queue.completion_timeout =
3195 		    hints->admin_completion_tx_timeout * 1000;
3196 
3197 	if (hints->mmio_read_timeout)
3198 		/* convert to usec */
3199 		ena_dev->mmio_read.reg_read_to =
3200 		    hints->mmio_read_timeout * 1000;
3201 
3202 	if (hints->missed_tx_completion_count_threshold_to_reset)
3203 		adapter->missing_tx_threshold =
3204 		    hints->missed_tx_completion_count_threshold_to_reset;
3205 
3206 	if (hints->missing_tx_completion_timeout) {
3207 		if (hints->missing_tx_completion_timeout ==
3208 		     ENA_HW_HINTS_NO_TIMEOUT)
3209 			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3210 		else
3211 			adapter->missing_tx_timeout =
3212 			    SBT_1MS * hints->missing_tx_completion_timeout;
3213 	}
3214 
3215 	if (hints->driver_watchdog_timeout) {
3216 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3217 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3218 		else
3219 			adapter->keep_alive_timeout =
3220 			    SBT_1MS * hints->driver_watchdog_timeout;
3221 	}
3222 }
3223 
3224 /**
3225  * ena_copy_eni_metrics - Get and copy ENI metrics from the HW.
3226  * @adapter: ENA device adapter
3227  *
3228  * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics
3229  * and other error codes on failure.
3230  *
3231  * This function can possibly cause a race with other calls to the admin queue.
3232  * Because of that, the caller should either lock this function or make sure
3233  * that there is no race in the current context.
3234  */
3235 static int
3236 ena_copy_eni_metrics(struct ena_adapter *adapter)
3237 {
3238 	static bool print_once = true;
3239 	int rc;
3240 
3241 	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics);
3242 
3243 	if (rc != 0) {
3244 		if (rc == ENA_COM_UNSUPPORTED) {
3245 			if (print_once) {
3246 				device_printf(adapter->pdev,
3247 				    "Retrieving ENI metrics is not supported.\n");
3248 				print_once = false;
3249 			} else {
3250 				ena_trace(NULL, ENA_DBG,
3251 				    "Retrieving ENI metrics is not supported.\n");
3252 			}
3253 		} else {
3254 			device_printf(adapter->pdev,
3255 			    "Failed to get ENI metrics: %d\n", rc);
3256 		}
3257 	}
3258 
3259 	return (rc);
3260 }
3261 
3262 static void
3263 ena_timer_service(void *data)
3264 {
3265 	struct ena_adapter *adapter = (struct ena_adapter *)data;
3266 	struct ena_admin_host_info *host_info =
3267 	    adapter->ena_dev->host_attr.host_info;
3268 
3269 	check_for_missing_keep_alive(adapter);
3270 
3271 	check_for_admin_com_state(adapter);
3272 
3273 	check_for_missing_completions(adapter);
3274 
3275 	check_for_empty_rx_ring(adapter);
3276 
3277 	/*
3278 	 * User controller update of the ENI metrics.
3279 	 * If the delay was set to 0, then the stats shouldn't be updated at
3280 	 * all.
3281 	 * Otherwise, wait 'eni_metrics_sample_interval' seconds, before
3282 	 * updating stats.
3283 	 * As timer service is executed every second, it's enough to increment
3284 	 * appropriate counter each time the timer service is executed.
3285 	 */
3286 	if ((adapter->eni_metrics_sample_interval != 0) &&
3287 	    (++adapter->eni_metrics_sample_interval_cnt >=
3288 	     adapter->eni_metrics_sample_interval)) {
3289 		/*
3290 		 * There is no race with other admin queue calls, as:
3291 		 *   - Timer service runs after interface is up, so all
3292 		 *     configuration calls to the admin queue are finished.
3293 		 *   - After interface is up, the driver doesn't use (at least
3294 		 *     for now) other functions writing to the admin queue.
3295 		 *
3296 		 * It may change in the future, so in that situation, the lock
3297 		 * will be needed. ENA_LOCK_*() cannot be used for that purpose,
3298 		 * as callout ena_timer_service is protected by them. It could
3299 		 * lead to the deadlock if callout_drain() would hold the lock
3300 		 * before ena_copy_eni_metrics() was executed. It's advised to
3301 		 * use separate lock in that situation which will be used only
3302 		 * for the admin queue.
3303 		 */
3304 		(void)ena_copy_eni_metrics(adapter);
3305 		adapter->eni_metrics_sample_interval_cnt = 0;
3306 	}
3307 
3308 
3309 	if (host_info != NULL)
3310 		ena_update_host_info(host_info, adapter->ifp);
3311 
3312 	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3313 		device_printf(adapter->pdev, "Trigger reset is on\n");
3314 		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3315 		return;
3316 	}
3317 
3318 	/*
3319 	 * Schedule another timeout one second from now.
3320 	 */
3321 	callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
3322 }
3323 
3324 void
3325 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3326 {
3327 	if_t ifp = adapter->ifp;
3328 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3329 	bool dev_up;
3330 
3331 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
3332 		return;
3333 
3334 	if_link_state_change(ifp, LINK_STATE_DOWN);
3335 
3336 	callout_drain(&adapter->timer_service);
3337 
3338 	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
3339 	if (dev_up)
3340 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3341 
3342 	if (!graceful)
3343 		ena_com_set_admin_running_state(ena_dev, false);
3344 
3345 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3346 		ena_down(adapter);
3347 
3348 	/*
3349 	 * Stop the device from sending AENQ events (if the device was up, and
3350 	 * the trigger reset was on, ena_down already performs device reset)
3351 	 */
3352 	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
3353 		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3354 
3355 	ena_free_mgmnt_irq(adapter);
3356 
3357 	ena_disable_msix(adapter);
3358 
3359 	/*
3360 	 * IO rings resources should be freed because `ena_restore_device()`
3361 	 * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
3362 	 * vectors. The amount of MSIX vectors after destroy-restore may be
3363 	 * different than before. Therefore, IO rings resources should be
3364 	 * established from scratch each time.
3365 	 */
3366 	ena_free_all_io_rings_resources(adapter);
3367 
3368 	ena_com_abort_admin_commands(ena_dev);
3369 
3370 	ena_com_wait_for_abort_completion(ena_dev);
3371 
3372 	ena_com_admin_destroy(ena_dev);
3373 
3374 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3375 
3376 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3377 
3378 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3379 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3380 }
3381 
3382 static int
3383 ena_device_validate_params(struct ena_adapter *adapter,
3384     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3385 {
3386 
3387 	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
3388 	    ETHER_ADDR_LEN) != 0) {
3389 		device_printf(adapter->pdev,
3390 		    "Error, mac address are different\n");
3391 		return (EINVAL);
3392 	}
3393 
3394 	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
3395 		device_printf(adapter->pdev,
3396 		    "Error, device max mtu is smaller than ifp MTU\n");
3397 		return (EINVAL);
3398 	}
3399 
3400 	return 0;
3401 }
3402 
3403 int
3404 ena_restore_device(struct ena_adapter *adapter)
3405 {
3406 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3407 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3408 	if_t ifp = adapter->ifp;
3409 	device_t dev = adapter->pdev;
3410 	int wd_active;
3411 	int rc;
3412 
3413 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3414 
3415 	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
3416 	if (rc != 0) {
3417 		device_printf(dev, "Cannot initialize device\n");
3418 		goto err;
3419 	}
3420 	/*
3421 	 * Only enable WD if it was enabled before reset, so it won't override
3422 	 * value set by the user by the sysctl.
3423 	 */
3424 	if (adapter->wd_active != 0)
3425 		adapter->wd_active = wd_active;
3426 
3427 	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3428 	if (rc != 0) {
3429 		device_printf(dev, "Validation of device parameters failed\n");
3430 		goto err_device_destroy;
3431 	}
3432 
3433 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3434 	/* Make sure we don't have a race with AENQ Links state handler */
3435 	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
3436 		if_link_state_change(ifp, LINK_STATE_UP);
3437 
3438 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3439 	if (rc != 0) {
3440 		device_printf(dev, "Enable MSI-X failed\n");
3441 		goto err_device_destroy;
3442 	}
3443 
3444 	/*
3445 	 * Effective value of used MSIX vectors should be the same as before
3446 	 * `ena_destroy_device()`, if possible, or closest to it if less vectors
3447 	 * are available.
3448 	 */
3449 	if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
3450 		adapter->num_io_queues =
3451 		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3452 
3453 	/* Re-initialize rings basic information */
3454 	ena_init_io_rings(adapter);
3455 
3456 	/* If the interface was up before the reset bring it up */
3457 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3458 		rc = ena_up(adapter);
3459 		if (rc != 0) {
3460 			device_printf(dev, "Failed to create I/O queues\n");
3461 			goto err_disable_msix;
3462 		}
3463 	}
3464 
3465 	/* Indicate that device is running again and ready to work */
3466 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3467 
3468 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
3469 		/*
3470 		 * As the AENQ handlers weren't executed during reset because
3471 		 * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
3472 		 * timestamp must be updated again That will prevent next reset
3473 		 * caused by missing keep alive.
3474 		 */
3475 		adapter->keep_alive_timestamp = getsbinuptime();
3476 		callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
3477 		    ena_timer_service, (void *)adapter, 0);
3478 	}
3479 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
3480 
3481 	device_printf(dev,
3482 	    "Device reset completed successfully, Driver info: %s\n", ena_version);
3483 
3484 	return (rc);
3485 
3486 err_disable_msix:
3487 	ena_free_mgmnt_irq(adapter);
3488 	ena_disable_msix(adapter);
3489 err_device_destroy:
3490 	ena_com_abort_admin_commands(ena_dev);
3491 	ena_com_wait_for_abort_completion(ena_dev);
3492 	ena_com_admin_destroy(ena_dev);
3493 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3494 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3495 err:
3496 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3497 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
3498 	device_printf(dev, "Reset attempt failed. Can not reset the device\n");
3499 
3500 	return (rc);
3501 }
3502 
3503 static void
3504 ena_reset_task(void *arg, int pending)
3505 {
3506 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3507 
3508 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3509 		device_printf(adapter->pdev,
3510 		    "device reset scheduled but trigger_reset is off\n");
3511 		return;
3512 	}
3513 
3514 	ENA_LOCK_LOCK(adapter);
3515 	ena_destroy_device(adapter, false);
3516 	ena_restore_device(adapter);
3517 	ENA_LOCK_UNLOCK(adapter);
3518 }
3519 
3520 /**
3521  * ena_attach - Device Initialization Routine
3522  * @pdev: device information struct
3523  *
3524  * Returns 0 on success, otherwise on failure.
3525  *
3526  * ena_attach initializes an adapter identified by a device structure.
3527  * The OS initialization, configuring of the adapter private structure,
3528  * and a hardware reset occur.
3529  **/
3530 static int
3531 ena_attach(device_t pdev)
3532 {
3533 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3534 	struct ena_llq_configurations llq_config;
3535 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3536 	static int version_printed;
3537 	struct ena_adapter *adapter;
3538 	struct ena_com_dev *ena_dev = NULL;
3539 	uint32_t max_num_io_queues;
3540 	int msix_rid;
3541 	int rid, rc;
3542 
3543 	adapter = device_get_softc(pdev);
3544 	adapter->pdev = pdev;
3545 
3546 	ENA_LOCK_INIT(adapter);
3547 
3548 	/*
3549 	 * Set up the timer service - driver is responsible for avoiding
3550 	 * concurrency, as the callout won't be using any locking inside.
3551 	 */
3552 	callout_init(&adapter->timer_service, true);
3553 	adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3554 	adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3555 	adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3556 	adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3557 
3558 	if (version_printed++ == 0)
3559 		device_printf(pdev, "%s\n", ena_version);
3560 
3561 	/* Allocate memory for ena_dev structure */
3562 	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3563 	    M_WAITOK | M_ZERO);
3564 
3565 	adapter->ena_dev = ena_dev;
3566 	ena_dev->dmadev = pdev;
3567 
3568 	rid = PCIR_BAR(ENA_REG_BAR);
3569 	adapter->memory = NULL;
3570 	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3571 	    &rid, RF_ACTIVE);
3572 	if (unlikely(adapter->registers == NULL)) {
3573 		device_printf(pdev,
3574 		    "unable to allocate bus resource: registers!\n");
3575 		rc = ENOMEM;
3576 		goto err_dev_free;
3577 	}
3578 
3579 	/* MSIx vector table may reside on BAR0 with registers or on BAR1. */
3580 	msix_rid = pci_msix_table_bar(pdev);
3581 	if (msix_rid != rid) {
3582 		adapter->msix = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3583 		    &msix_rid, RF_ACTIVE);
3584 		if (unlikely(adapter->msix == NULL)) {
3585 			device_printf(pdev,
3586 			    "unable to allocate bus resource: msix!\n");
3587 			rc = ENOMEM;
3588 			goto err_pci_free;
3589 		}
3590 		adapter->msix_rid = msix_rid;
3591 	}
3592 
3593 	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3594 	    M_WAITOK | M_ZERO);
3595 
3596 	/* Store register resources */
3597 	((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3598 	    rman_get_bustag(adapter->registers);
3599 	((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3600 	    rman_get_bushandle(adapter->registers);
3601 
3602 	if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
3603 		device_printf(pdev, "failed to pmap registers bar\n");
3604 		rc = ENXIO;
3605 		goto err_bus_free;
3606 	}
3607 
3608 	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3609 
3610 	/* Initially clear all the flags */
3611 	ENA_FLAG_ZERO(adapter);
3612 
3613 	/* Device initialization */
3614 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3615 	if (unlikely(rc != 0)) {
3616 		device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
3617 		rc = ENXIO;
3618 		goto err_bus_free;
3619 	}
3620 
3621 	set_default_llq_configurations(&llq_config);
3622 
3623 	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
3624 	     &llq_config);
3625 	if (unlikely(rc != 0)) {
3626 		device_printf(pdev, "failed to set placement policy\n");
3627 		goto err_com_free;
3628 	}
3629 
3630 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3631 		adapter->disable_meta_caching =
3632 		    !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
3633 		    BIT(ENA_ADMIN_DISABLE_META_CACHING));
3634 
3635 	adapter->keep_alive_timestamp = getsbinuptime();
3636 
3637 	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3638 
3639 	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3640 	    ETHER_ADDR_LEN);
3641 
3642 	calc_queue_ctx.pdev = pdev;
3643 	calc_queue_ctx.ena_dev = ena_dev;
3644 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
3645 
3646 	/* Calculate initial and maximum IO queue number and size */
3647 	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
3648 	    &get_feat_ctx);
3649 	rc = ena_calc_io_queue_size(&calc_queue_ctx);
3650 	if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
3651 		rc = EFAULT;
3652 		goto err_com_free;
3653 	}
3654 
3655 	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
3656 	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
3657 	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
3658 	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
3659 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3660 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3661 
3662 	adapter->max_num_io_queues = max_num_io_queues;
3663 
3664 	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
3665 
3666 	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3667 
3668 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3669 
3670 	/* set up dma tags for rx and tx buffers */
3671 	rc = ena_setup_tx_dma_tag(adapter);
3672 	if (unlikely(rc != 0)) {
3673 		device_printf(pdev, "Failed to create TX DMA tag\n");
3674 		goto err_com_free;
3675 	}
3676 
3677 	rc = ena_setup_rx_dma_tag(adapter);
3678 	if (unlikely(rc != 0)) {
3679 		device_printf(pdev, "Failed to create RX DMA tag\n");
3680 		goto err_tx_tag_free;
3681 	}
3682 
3683 	/*
3684 	 * The amount of requested MSIX vectors is equal to
3685 	 * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
3686 	 * number of admin queue interrupts. The former is initially determined
3687 	 * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
3688 	 * achieved if there are not enough system resources. By default, the
3689 	 * number of effectively used IO queues is the same but later on it can
3690 	 * be limited by the user using sysctl interface.
3691 	 */
3692 	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3693 	if (unlikely(rc != 0)) {
3694 		device_printf(pdev,
3695 		    "Failed to enable and set the admin interrupts\n");
3696 		goto err_io_free;
3697 	}
3698 	/* By default all of allocated MSIX vectors are actively used */
3699 	adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
3700 
3701 	/* initialize rings basic information */
3702 	ena_init_io_rings(adapter);
3703 
3704 	/* setup network interface */
3705 	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3706 	if (unlikely(rc != 0)) {
3707 		device_printf(pdev, "Error with network interface setup\n");
3708 		goto err_msix_free;
3709 	}
3710 
3711 	/* Initialize reset task queue */
3712 	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3713 	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3714 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3715 	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
3716 	    "%s rstq", device_get_nameunit(adapter->pdev));
3717 
3718 	/* Initialize statistics */
3719 	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3720 	    sizeof(struct ena_stats_dev));
3721 	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
3722 	    sizeof(struct ena_hw_stats));
3723 	ena_sysctl_add_nodes(adapter);
3724 
3725 #ifdef DEV_NETMAP
3726 	rc = ena_netmap_attach(adapter);
3727 	if (rc != 0) {
3728 		device_printf(pdev, "netmap attach failed: %d\n", rc);
3729 		goto err_detach;
3730 	}
3731 #endif /* DEV_NETMAP */
3732 
3733 	/* Tell the stack that the interface is not active */
3734 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3735 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
3736 
3737 	return (0);
3738 
3739 #ifdef DEV_NETMAP
3740 err_detach:
3741 	ether_ifdetach(adapter->ifp);
3742 #endif /* DEV_NETMAP */
3743 err_msix_free:
3744 	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
3745 	ena_free_mgmnt_irq(adapter);
3746 	ena_disable_msix(adapter);
3747 err_io_free:
3748 	ena_free_all_io_rings_resources(adapter);
3749 	ena_free_rx_dma_tag(adapter);
3750 err_tx_tag_free:
3751 	ena_free_tx_dma_tag(adapter);
3752 err_com_free:
3753 	ena_com_admin_destroy(ena_dev);
3754 	ena_com_delete_host_info(ena_dev);
3755 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3756 err_bus_free:
3757 	free(ena_dev->bus, M_DEVBUF);
3758 err_pci_free:
3759 	ena_free_pci_resources(adapter);
3760 err_dev_free:
3761 	free(ena_dev, M_DEVBUF);
3762 
3763 	return (rc);
3764 }
3765 
3766 /**
3767  * ena_detach - Device Removal Routine
3768  * @pdev: device information struct
3769  *
3770  * ena_detach is called by the device subsystem to alert the driver
3771  * that it should release a PCI device.
3772  **/
3773 static int
3774 ena_detach(device_t pdev)
3775 {
3776 	struct ena_adapter *adapter = device_get_softc(pdev);
3777 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3778 	int rc;
3779 
3780 	/* Make sure VLANS are not using driver */
3781 	if (adapter->ifp->if_vlantrunk != NULL) {
3782 		device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3783 		return (EBUSY);
3784 	}
3785 
3786 	ether_ifdetach(adapter->ifp);
3787 
3788 	/* Stop timer service */
3789 	ENA_LOCK_LOCK(adapter);
3790 	callout_drain(&adapter->timer_service);
3791 	ENA_LOCK_UNLOCK(adapter);
3792 
3793 	/* Release reset task */
3794 	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3795 		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3796 	taskqueue_free(adapter->reset_tq);
3797 
3798 	ENA_LOCK_LOCK(adapter);
3799 	ena_down(adapter);
3800 	ena_destroy_device(adapter, true);
3801 	ENA_LOCK_UNLOCK(adapter);
3802 
3803 #ifdef DEV_NETMAP
3804 	netmap_detach(adapter->ifp);
3805 #endif /* DEV_NETMAP */
3806 
3807 	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
3808 	    sizeof(struct ena_hw_stats));
3809 	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3810 	    sizeof(struct ena_stats_dev));
3811 
3812 	rc = ena_free_rx_dma_tag(adapter);
3813 	if (unlikely(rc != 0))
3814 		device_printf(adapter->pdev,
3815 		    "Unmapped RX DMA tag associations\n");
3816 
3817 	rc = ena_free_tx_dma_tag(adapter);
3818 	if (unlikely(rc != 0))
3819 		device_printf(adapter->pdev,
3820 		    "Unmapped TX DMA tag associations\n");
3821 
3822 	ena_free_irqs(adapter);
3823 
3824 	ena_free_pci_resources(adapter);
3825 
3826 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
3827 		ena_com_rss_destroy(ena_dev);
3828 
3829 	ena_com_delete_host_info(ena_dev);
3830 
3831 	ENA_LOCK_DESTROY(adapter);
3832 
3833 	if_free(adapter->ifp);
3834 
3835 	if (ena_dev->bus != NULL)
3836 		free(ena_dev->bus, M_DEVBUF);
3837 
3838 	if (ena_dev != NULL)
3839 		free(ena_dev, M_DEVBUF);
3840 
3841 	return (bus_generic_detach(pdev));
3842 }
3843 
3844 /******************************************************************************
3845  ******************************** AENQ Handlers *******************************
3846  *****************************************************************************/
3847 /**
3848  * ena_update_on_link_change:
3849  * Notify the network interface about the change in link status
3850  **/
3851 static void
3852 ena_update_on_link_change(void *adapter_data,
3853     struct ena_admin_aenq_entry *aenq_e)
3854 {
3855 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3856 	struct ena_admin_aenq_link_change_desc *aenq_desc;
3857 	int status;
3858 	if_t ifp;
3859 
3860 	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3861 	ifp = adapter->ifp;
3862 	status = aenq_desc->flags &
3863 	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3864 
3865 	if (status != 0) {
3866 		device_printf(adapter->pdev, "link is UP\n");
3867 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3868 		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
3869 			if_link_state_change(ifp, LINK_STATE_UP);
3870 	} else {
3871 		device_printf(adapter->pdev, "link is DOWN\n");
3872 		if_link_state_change(ifp, LINK_STATE_DOWN);
3873 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
3874 	}
3875 }
3876 
3877 static void ena_notification(void *adapter_data,
3878     struct ena_admin_aenq_entry *aenq_e)
3879 {
3880 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3881 	struct ena_admin_ena_hw_hints *hints;
3882 
3883 	ENA_WARN(NULL, aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
3884 	    "Invalid group(%x) expected %x\n",	aenq_e->aenq_common_desc.group,
3885 	    ENA_ADMIN_NOTIFICATION);
3886 
3887 	switch (aenq_e->aenq_common_desc.syndrome) {
3888 	case ENA_ADMIN_UPDATE_HINTS:
3889 		hints =
3890 		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
3891 		ena_update_hints(adapter, hints);
3892 		break;
3893 	default:
3894 		device_printf(adapter->pdev,
3895 		    "Invalid aenq notification link state %d\n",
3896 		    aenq_e->aenq_common_desc.syndrome);
3897 	}
3898 }
3899 
3900 /**
3901  * This handler will called for unknown event group or unimplemented handlers
3902  **/
3903 static void
3904 unimplemented_aenq_handler(void *adapter_data,
3905     struct ena_admin_aenq_entry *aenq_e)
3906 {
3907 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3908 
3909 	device_printf(adapter->pdev,
3910 	    "Unknown event was received or event with unimplemented handler\n");
3911 }
3912 
3913 static struct ena_aenq_handlers aenq_handlers = {
3914     .handlers = {
3915 	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3916 	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
3917 	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3918     },
3919     .unimplemented_handler = unimplemented_aenq_handler
3920 };
3921 
3922 /*********************************************************************
3923  *  FreeBSD Device Interface Entry Points
3924  *********************************************************************/
3925 
3926 static device_method_t ena_methods[] = {
3927     /* Device interface */
3928     DEVMETHOD(device_probe, ena_probe),
3929     DEVMETHOD(device_attach, ena_attach),
3930     DEVMETHOD(device_detach, ena_detach),
3931     DEVMETHOD_END
3932 };
3933 
3934 static driver_t ena_driver = {
3935     "ena", ena_methods, sizeof(struct ena_adapter),
3936 };
3937 
3938 devclass_t ena_devclass;
3939 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
3940 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
3941     nitems(ena_vendor_info_array) - 1);
3942 MODULE_DEPEND(ena, pci, 1, 1, 1);
3943 MODULE_DEPEND(ena, ether, 1, 1, 1);
3944 #ifdef DEV_NETMAP
3945 MODULE_DEPEND(ena, netmap, 1, 1, 1);
3946 #endif /* DEV_NETMAP */
3947 
3948 /*********************************************************************/
3949