xref: /freebsd/sys/dev/ena/ena.c (revision 0bd5d367989b3d2de0e8d8ceaa2e31d3f0d96536)
1 /*-
2  * BSD LICENSE
3  *
4  * Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rman.h>
43 #include <sys/smp.h>
44 #include <sys/socket.h>
45 #include <sys/sockio.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/time.h>
49 #include <sys/eventhandler.h>
50 
51 #include <machine/bus.h>
52 #include <machine/resource.h>
53 #include <machine/in_cksum.h>
54 
55 #include <net/bpf.h>
56 #include <net/ethernet.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_vlan_var.h>
64 
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip6.h>
70 #include <netinet/tcp.h>
71 #include <netinet/udp.h>
72 
73 #include <dev/pci/pcivar.h>
74 #include <dev/pci/pcireg.h>
75 
76 #include <vm/vm.h>
77 #include <vm/pmap.h>
78 
79 #include "ena.h"
80 #include "ena_sysctl.h"
81 
82 /*********************************************************
83  *  Function prototypes
84  *********************************************************/
85 static int	ena_probe(device_t);
86 static void	ena_intr_msix_mgmnt(void *);
87 static void	ena_free_pci_resources(struct ena_adapter *);
88 static int	ena_change_mtu(if_t, int);
89 static inline void ena_alloc_counters(counter_u64_t *, int);
90 static inline void ena_free_counters(counter_u64_t *, int);
91 static inline void ena_reset_counters(counter_u64_t *, int);
92 static void	ena_init_io_rings_common(struct ena_adapter *,
93     struct ena_ring *, uint16_t);
94 static void	ena_init_io_rings(struct ena_adapter *);
95 static void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
96 static void	ena_free_all_io_rings_resources(struct ena_adapter *);
97 static int	ena_setup_tx_dma_tag(struct ena_adapter *);
98 static int	ena_free_tx_dma_tag(struct ena_adapter *);
99 static int	ena_setup_rx_dma_tag(struct ena_adapter *);
100 static int	ena_free_rx_dma_tag(struct ena_adapter *);
101 static int	ena_setup_tx_resources(struct ena_adapter *, int);
102 static void	ena_free_tx_resources(struct ena_adapter *, int);
103 static int	ena_setup_all_tx_resources(struct ena_adapter *);
104 static void	ena_free_all_tx_resources(struct ena_adapter *);
105 static inline int validate_rx_req_id(struct ena_ring *, uint16_t);
106 static int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
107 static void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
108 static int	ena_setup_all_rx_resources(struct ena_adapter *);
109 static void	ena_free_all_rx_resources(struct ena_adapter *);
110 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
111     struct ena_rx_buffer *);
112 static void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
113     struct ena_rx_buffer *);
114 static int	ena_refill_rx_bufs(struct ena_ring *, uint32_t);
115 static void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
116 static void	ena_refill_all_rx_bufs(struct ena_adapter *);
117 static void	ena_free_all_rx_bufs(struct ena_adapter *);
118 static void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
119 static void	ena_free_all_tx_bufs(struct ena_adapter *);
120 static void	ena_destroy_all_tx_queues(struct ena_adapter *);
121 static void	ena_destroy_all_rx_queues(struct ena_adapter *);
122 static void	ena_destroy_all_io_queues(struct ena_adapter *);
123 static int	ena_create_io_queues(struct ena_adapter *);
124 static int	ena_tx_cleanup(struct ena_ring *);
125 static int	ena_rx_cleanup(struct ena_ring *);
126 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
127 static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
128     struct mbuf *);
129 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
130     struct ena_com_rx_ctx *, uint16_t *);
131 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
132     struct mbuf *);
133 static void	ena_cleanup(void *arg, int pending);
134 static int	ena_handle_msix(void *);
135 static int	ena_enable_msix(struct ena_adapter *);
136 static void	ena_setup_mgmnt_intr(struct ena_adapter *);
137 static int	ena_setup_io_intr(struct ena_adapter *);
138 static int	ena_request_mgmnt_irq(struct ena_adapter *);
139 static int	ena_request_io_irq(struct ena_adapter *);
140 static void	ena_free_mgmnt_irq(struct ena_adapter *);
141 static void	ena_free_io_irq(struct ena_adapter *);
142 static void	ena_free_irqs(struct ena_adapter*);
143 static void	ena_disable_msix(struct ena_adapter *);
144 static void	ena_unmask_all_io_irqs(struct ena_adapter *);
145 static int	ena_rss_configure(struct ena_adapter *);
146 static int	ena_up_complete(struct ena_adapter *);
147 static int	ena_up(struct ena_adapter *);
148 static void	ena_down(struct ena_adapter *);
149 static uint64_t	ena_get_counter(if_t, ift_counter);
150 static int	ena_media_change(if_t);
151 static void	ena_media_status(if_t, struct ifmediareq *);
152 static void	ena_init(void *);
153 static int	ena_ioctl(if_t, u_long, caddr_t);
154 static int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
155 static void	ena_update_host_info(struct ena_admin_host_info *, if_t);
156 static void	ena_update_hwassist(struct ena_adapter *);
157 static int	ena_setup_ifnet(device_t, struct ena_adapter *,
158     struct ena_com_dev_get_features_ctx *);
159 static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
160 static int	ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
161     struct mbuf **mbuf);
162 static void	ena_dmamap_llq(void *, bus_dma_segment_t *, int, int);
163 static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
164 static void	ena_start_xmit(struct ena_ring *);
165 static int	ena_mq_start(if_t, struct mbuf *);
166 static void	ena_deferred_mq_start(void *, int);
167 static void	ena_qflush(if_t);
168 static int	ena_enable_wc(struct resource *);
169 static int	ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
170     struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
171 static int	ena_calc_io_queue_num(struct ena_adapter *,
172     struct ena_com_dev_get_features_ctx *);
173 static int	ena_calc_queue_size(struct ena_adapter *,
174     struct ena_calc_queue_size_ctx *);
175 static int	ena_handle_updated_queues(struct ena_adapter *,
176     struct ena_com_dev_get_features_ctx *);
177 static int	ena_rss_init_default(struct ena_adapter *);
178 static void	ena_rss_init_default_deferred(void *);
179 static void	ena_config_host_info(struct ena_com_dev *, device_t);
180 static int	ena_attach(device_t);
181 static int	ena_detach(device_t);
182 static int	ena_device_init(struct ena_adapter *, device_t,
183     struct ena_com_dev_get_features_ctx *, int *);
184 static int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
185     int);
186 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
187 static void	unimplemented_aenq_handler(void *,
188     struct ena_admin_aenq_entry *);
189 static void	ena_timer_service(void *);
190 
191 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
192 
193 static ena_vendor_info_t ena_vendor_info_array[] = {
194     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
195     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
196     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
197     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
198     /* Last entry */
199     { 0, 0, 0 }
200 };
201 
202 /*
203  * Contains pointers to event handlers, e.g. link state chage.
204  */
205 static struct ena_aenq_handlers aenq_handlers;
206 
207 void
208 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
209 {
210 	if (error != 0)
211 		return;
212 	*(bus_addr_t *) arg = segs[0].ds_addr;
213 }
214 
215 int
216 ena_dma_alloc(device_t dmadev, bus_size_t size,
217     ena_mem_handle_t *dma , int mapflags)
218 {
219 	struct ena_adapter* adapter = device_get_softc(dmadev);
220 	uint32_t maxsize;
221 	uint64_t dma_space_addr;
222 	int error;
223 
224 	maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
225 
226 	dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
227 	if (unlikely(dma_space_addr == 0))
228 		dma_space_addr = BUS_SPACE_MAXADDR;
229 
230 	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
231 	    8, 0,	      /* alignment, bounds 		*/
232 	    dma_space_addr,   /* lowaddr of exclusion window	*/
233 	    BUS_SPACE_MAXADDR,/* highaddr of exclusion window	*/
234 	    NULL, NULL,	      /* filter, filterarg 		*/
235 	    maxsize,	      /* maxsize 			*/
236 	    1,		      /* nsegments 			*/
237 	    maxsize,	      /* maxsegsize 			*/
238 	    BUS_DMA_ALLOCNOW, /* flags 				*/
239 	    NULL,	      /* lockfunc 			*/
240 	    NULL,	      /* lockarg 			*/
241 	    &dma->tag);
242 	if (unlikely(error != 0)) {
243 		ena_trace(ENA_ALERT, "bus_dma_tag_create failed: %d\n", error);
244 		goto fail_tag;
245 	}
246 
247 	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
248 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
249 	if (unlikely(error != 0)) {
250 		ena_trace(ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
251 		    (uintmax_t)size, error);
252 		goto fail_map_create;
253 	}
254 
255 	dma->paddr = 0;
256 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
257 	    size, ena_dmamap_callback, &dma->paddr, mapflags);
258 	if (unlikely((error != 0) || (dma->paddr == 0))) {
259 		ena_trace(ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
260 		goto fail_map_load;
261 	}
262 
263 	bus_dmamap_sync(dma->tag, dma->map,
264 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
265 
266 	return (0);
267 
268 fail_map_load:
269 	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
270 fail_map_create:
271 	bus_dma_tag_destroy(dma->tag);
272 fail_tag:
273 	dma->tag = NULL;
274 	dma->vaddr = NULL;
275 	dma->paddr = 0;
276 
277 	return (error);
278 }
279 
280 static void
281 ena_free_pci_resources(struct ena_adapter *adapter)
282 {
283 	device_t pdev = adapter->pdev;
284 
285 	if (adapter->memory != NULL) {
286 		bus_release_resource(pdev, SYS_RES_MEMORY,
287 		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
288 	}
289 
290 	if (adapter->registers != NULL) {
291 		bus_release_resource(pdev, SYS_RES_MEMORY,
292 		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
293 	}
294 }
295 
296 static int
297 ena_probe(device_t dev)
298 {
299 	ena_vendor_info_t *ent;
300 	char		adapter_name[60];
301 	uint16_t	pci_vendor_id = 0;
302 	uint16_t	pci_device_id = 0;
303 
304 	pci_vendor_id = pci_get_vendor(dev);
305 	pci_device_id = pci_get_device(dev);
306 
307 	ent = ena_vendor_info_array;
308 	while (ent->vendor_id != 0) {
309 		if ((pci_vendor_id == ent->vendor_id) &&
310 		    (pci_device_id == ent->device_id)) {
311 			ena_trace(ENA_DBG, "vendor=%x device=%x\n",
312 			    pci_vendor_id, pci_device_id);
313 
314 			sprintf(adapter_name, DEVICE_DESC);
315 			device_set_desc_copy(dev, adapter_name);
316 			return (BUS_PROBE_DEFAULT);
317 		}
318 
319 		ent++;
320 
321 	}
322 
323 	return (ENXIO);
324 }
325 
326 static int
327 ena_change_mtu(if_t ifp, int new_mtu)
328 {
329 	struct ena_adapter *adapter = if_getsoftc(ifp);
330 	int rc;
331 
332 	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
333 		device_printf(adapter->pdev, "Invalid MTU setting. "
334 		    "new_mtu: %d max mtu: %d min mtu: %d\n",
335 		    new_mtu, adapter->max_mtu, ENA_MIN_MTU);
336 		return (EINVAL);
337 	}
338 
339 	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
340 	if (likely(rc == 0)) {
341 		ena_trace(ENA_DBG, "set MTU to %d\n", new_mtu);
342 		if_setmtu(ifp, new_mtu);
343 	} else {
344 		device_printf(adapter->pdev, "Failed to set MTU to %d\n",
345 		    new_mtu);
346 	}
347 
348 	return (rc);
349 }
350 
351 static inline void
352 ena_alloc_counters(counter_u64_t *begin, int size)
353 {
354 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
355 
356 	for (; begin < end; ++begin)
357 		*begin = counter_u64_alloc(M_WAITOK);
358 }
359 
360 static inline void
361 ena_free_counters(counter_u64_t *begin, int size)
362 {
363 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
364 
365 	for (; begin < end; ++begin)
366 		counter_u64_free(*begin);
367 }
368 
369 static inline void
370 ena_reset_counters(counter_u64_t *begin, int size)
371 {
372 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
373 
374 	for (; begin < end; ++begin)
375 		counter_u64_zero(*begin);
376 }
377 
378 static void
379 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
380     uint16_t qid)
381 {
382 
383 	ring->qid = qid;
384 	ring->adapter = adapter;
385 	ring->ena_dev = adapter->ena_dev;
386 	ring->first_interrupt = false;
387 	ring->no_interrupt_event_cnt = 0;
388 }
389 
390 static void
391 ena_init_io_rings(struct ena_adapter *adapter)
392 {
393 	struct ena_com_dev *ena_dev;
394 	struct ena_ring *txr, *rxr;
395 	struct ena_que *que;
396 	int i;
397 
398 	ena_dev = adapter->ena_dev;
399 
400 	for (i = 0; i < adapter->num_queues; i++) {
401 		txr = &adapter->tx_ring[i];
402 		rxr = &adapter->rx_ring[i];
403 
404 		/* TX/RX common ring state */
405 		ena_init_io_rings_common(adapter, txr, i);
406 		ena_init_io_rings_common(adapter, rxr, i);
407 
408 		/* TX specific ring state */
409 		txr->ring_size = adapter->tx_ring_size;
410 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
411 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
412 		txr->smoothed_interval =
413 		    ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
414 
415 		/* Allocate a buf ring */
416 		txr->buf_ring_size = adapter->buf_ring_size;
417 		txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
418 		    M_WAITOK, &txr->ring_mtx);
419 
420 		/* Alloc TX statistics. */
421 		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
422 		    sizeof(txr->tx_stats));
423 
424 		/* RX specific ring state */
425 		rxr->ring_size = adapter->rx_ring_size;
426 		rxr->smoothed_interval =
427 		    ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
428 
429 		/* Alloc RX statistics. */
430 		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
431 		    sizeof(rxr->rx_stats));
432 
433 		/* Initialize locks */
434 		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
435 		    device_get_nameunit(adapter->pdev), i);
436 		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
437 		    device_get_nameunit(adapter->pdev), i);
438 
439 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
440 
441 		que = &adapter->que[i];
442 		que->adapter = adapter;
443 		que->id = i;
444 		que->tx_ring = txr;
445 		que->rx_ring = rxr;
446 
447 		txr->que = que;
448 		rxr->que = que;
449 
450 		rxr->empty_rx_queue = 0;
451 	}
452 }
453 
454 static void
455 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
456 {
457 	struct ena_ring *txr = &adapter->tx_ring[qid];
458 	struct ena_ring *rxr = &adapter->rx_ring[qid];
459 
460 	ena_free_counters((counter_u64_t *)&txr->tx_stats,
461 	    sizeof(txr->tx_stats));
462 	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
463 	    sizeof(rxr->rx_stats));
464 
465 	ENA_RING_MTX_LOCK(txr);
466 	drbr_free(txr->br, M_DEVBUF);
467 	ENA_RING_MTX_UNLOCK(txr);
468 
469 	mtx_destroy(&txr->ring_mtx);
470 }
471 
472 static void
473 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
474 {
475 	int i;
476 
477 	for (i = 0; i < adapter->num_queues; i++)
478 		ena_free_io_ring_resources(adapter, i);
479 
480 }
481 
482 static int
483 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
484 {
485 	int ret;
486 
487 	/* Create DMA tag for Tx buffers */
488 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
489 	    1, 0,				  /* alignment, bounds 	     */
490 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
491 	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
492 	    NULL, NULL,				  /* filter, filterarg 	     */
493 	    ENA_TSO_MAXSIZE,			  /* maxsize 		     */
494 	    adapter->max_tx_sgl_size - 1,	  /* nsegments 		     */
495 	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	     */
496 	    0,					  /* flags 		     */
497 	    NULL,				  /* lockfunc 		     */
498 	    NULL,				  /* lockfuncarg 	     */
499 	    &adapter->tx_buf_tag);
500 
501 	return (ret);
502 }
503 
504 static int
505 ena_free_tx_dma_tag(struct ena_adapter *adapter)
506 {
507 	int ret;
508 
509 	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
510 
511 	if (likely(ret == 0))
512 		adapter->tx_buf_tag = NULL;
513 
514 	return (ret);
515 }
516 
517 static int
518 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
519 {
520 	int ret;
521 
522 	/* Create DMA tag for Rx buffers*/
523 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent   */
524 	    1, 0,				  /* alignment, bounds 	     */
525 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window  */
526 	    BUS_SPACE_MAXADDR, 			  /* highaddr of excl window */
527 	    NULL, NULL,				  /* filter, filterarg 	     */
528 	    MJUM16BYTES,			  /* maxsize 		     */
529 	    adapter->max_rx_sgl_size,		  /* nsegments 		     */
530 	    MJUM16BYTES,			  /* maxsegsize 	     */
531 	    0,					  /* flags 		     */
532 	    NULL,				  /* lockfunc 		     */
533 	    NULL,				  /* lockarg 		     */
534 	    &adapter->rx_buf_tag);
535 
536 	return (ret);
537 }
538 
539 static int
540 ena_free_rx_dma_tag(struct ena_adapter *adapter)
541 {
542 	int ret;
543 
544 	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
545 
546 	if (likely(ret == 0))
547 		adapter->rx_buf_tag = NULL;
548 
549 	return (ret);
550 }
551 
552 /**
553  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
554  * @adapter: network interface device structure
555  * @qid: queue index
556  *
557  * Returns 0 on success, otherwise on failure.
558  **/
559 static int
560 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
561 {
562 	struct ena_que *que = &adapter->que[qid];
563 	struct ena_ring *tx_ring = que->tx_ring;
564 	int size, i, err;
565 
566 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
567 
568 	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
569 	if (unlikely(tx_ring->tx_buffer_info == NULL))
570 		return (ENOMEM);
571 
572 	size = sizeof(uint16_t) * tx_ring->ring_size;
573 	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
574 	if (unlikely(tx_ring->free_tx_ids == NULL))
575 		goto err_buf_info_free;
576 
577 	size = tx_ring->tx_max_header_size;
578 	tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
579 	    M_NOWAIT | M_ZERO);
580 	if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
581 		goto err_tx_ids_free;
582 
583 	/* Req id stack for TX OOO completions */
584 	for (i = 0; i < tx_ring->ring_size; i++)
585 		tx_ring->free_tx_ids[i] = i;
586 
587 	/* Reset TX statistics. */
588 	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
589 	    sizeof(tx_ring->tx_stats));
590 
591 	tx_ring->next_to_use = 0;
592 	tx_ring->next_to_clean = 0;
593 	tx_ring->acum_pkts = 0;
594 
595 	/* Make sure that drbr is empty */
596 	ENA_RING_MTX_LOCK(tx_ring);
597 	drbr_flush(adapter->ifp, tx_ring->br);
598 	ENA_RING_MTX_UNLOCK(tx_ring);
599 
600 	/* ... and create the buffer DMA maps */
601 	for (i = 0; i < tx_ring->ring_size; i++) {
602 		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
603 		    &tx_ring->tx_buffer_info[i].map_head);
604 		if (unlikely(err != 0)) {
605 			ena_trace(ENA_ALERT,
606 			    "Unable to create Tx DMA map_head for buffer %d\n",
607 			    i);
608 			goto err_buf_info_unmap;
609 		}
610 		tx_ring->tx_buffer_info[i].seg_mapped = false;
611 
612 		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
613 		    &tx_ring->tx_buffer_info[i].map_seg);
614 		if (unlikely(err != 0)) {
615 			ena_trace(ENA_ALERT,
616 			    "Unable to create Tx DMA map_seg for buffer %d\n",
617 			    i);
618 			goto err_buf_info_head_unmap;
619 		}
620 		tx_ring->tx_buffer_info[i].head_mapped = false;
621 	}
622 
623 	/* Allocate taskqueues */
624 	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
625 	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
626 	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
627 	if (unlikely(tx_ring->enqueue_tq == NULL)) {
628 		ena_trace(ENA_ALERT,
629 		    "Unable to create taskqueue for enqueue task\n");
630 		i = tx_ring->ring_size;
631 		goto err_buf_info_unmap;
632 	}
633 
634 	tx_ring->running = true;
635 
636 	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
637 	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
638 
639 	return (0);
640 
641 err_buf_info_head_unmap:
642 	bus_dmamap_destroy(adapter->tx_buf_tag,
643 	    tx_ring->tx_buffer_info[i].map_head);
644 err_buf_info_unmap:
645 	while (i--) {
646 		bus_dmamap_destroy(adapter->tx_buf_tag,
647 		    tx_ring->tx_buffer_info[i].map_head);
648 		bus_dmamap_destroy(adapter->tx_buf_tag,
649 		    tx_ring->tx_buffer_info[i].map_seg);
650 	}
651 	free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
652 err_tx_ids_free:
653 	free(tx_ring->free_tx_ids, M_DEVBUF);
654 	tx_ring->free_tx_ids = NULL;
655 err_buf_info_free:
656 	free(tx_ring->tx_buffer_info, M_DEVBUF);
657 	tx_ring->tx_buffer_info = NULL;
658 
659 	return (ENOMEM);
660 }
661 
662 /**
663  * ena_free_tx_resources - Free Tx Resources per Queue
664  * @adapter: network interface device structure
665  * @qid: queue index
666  *
667  * Free all transmit software resources
668  **/
669 static void
670 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
671 {
672 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
673 
674 	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
675 	    NULL))
676 		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
677 
678 	taskqueue_free(tx_ring->enqueue_tq);
679 
680 	ENA_RING_MTX_LOCK(tx_ring);
681 	/* Flush buffer ring, */
682 	drbr_flush(adapter->ifp, tx_ring->br);
683 
684 	/* Free buffer DMA maps, */
685 	for (int i = 0; i < tx_ring->ring_size; i++) {
686 		if (tx_ring->tx_buffer_info[i].head_mapped == true) {
687 			bus_dmamap_sync(adapter->tx_buf_tag,
688 			    tx_ring->tx_buffer_info[i].map_head,
689 			    BUS_DMASYNC_POSTWRITE);
690 			bus_dmamap_unload(adapter->tx_buf_tag,
691 			    tx_ring->tx_buffer_info[i].map_head);
692 			tx_ring->tx_buffer_info[i].head_mapped = false;
693 		}
694 		bus_dmamap_destroy(adapter->tx_buf_tag,
695 		    tx_ring->tx_buffer_info[i].map_head);
696 
697 		if (tx_ring->tx_buffer_info[i].seg_mapped == true) {
698 			bus_dmamap_sync(adapter->tx_buf_tag,
699 			    tx_ring->tx_buffer_info[i].map_seg,
700 			    BUS_DMASYNC_POSTWRITE);
701 			bus_dmamap_unload(adapter->tx_buf_tag,
702 			    tx_ring->tx_buffer_info[i].map_seg);
703 			tx_ring->tx_buffer_info[i].seg_mapped = false;
704 		}
705 		bus_dmamap_destroy(adapter->tx_buf_tag,
706 		    tx_ring->tx_buffer_info[i].map_seg);
707 
708 		m_freem(tx_ring->tx_buffer_info[i].mbuf);
709 		tx_ring->tx_buffer_info[i].mbuf = NULL;
710 	}
711 	ENA_RING_MTX_UNLOCK(tx_ring);
712 
713 	/* And free allocated memory. */
714 	free(tx_ring->tx_buffer_info, M_DEVBUF);
715 	tx_ring->tx_buffer_info = NULL;
716 
717 	free(tx_ring->free_tx_ids, M_DEVBUF);
718 	tx_ring->free_tx_ids = NULL;
719 
720 	ENA_MEM_FREE(adapter->ena_dev->dmadev,
721 	    tx_ring->push_buf_intermediate_buf);
722 	tx_ring->push_buf_intermediate_buf = NULL;
723 }
724 
725 /**
726  * ena_setup_all_tx_resources - allocate all queues Tx resources
727  * @adapter: network interface device structure
728  *
729  * Returns 0 on success, otherwise on failure.
730  **/
731 static int
732 ena_setup_all_tx_resources(struct ena_adapter *adapter)
733 {
734 	int i, rc;
735 
736 	for (i = 0; i < adapter->num_queues; i++) {
737 		rc = ena_setup_tx_resources(adapter, i);
738 		if (rc != 0) {
739 			device_printf(adapter->pdev,
740 			    "Allocation for Tx Queue %u failed\n", i);
741 			goto err_setup_tx;
742 		}
743 	}
744 
745 	return (0);
746 
747 err_setup_tx:
748 	/* Rewind the index freeing the rings as we go */
749 	while (i--)
750 		ena_free_tx_resources(adapter, i);
751 	return (rc);
752 }
753 
754 /**
755  * ena_free_all_tx_resources - Free Tx Resources for All Queues
756  * @adapter: network interface device structure
757  *
758  * Free all transmit software resources
759  **/
760 static void
761 ena_free_all_tx_resources(struct ena_adapter *adapter)
762 {
763 	int i;
764 
765 	for (i = 0; i < adapter->num_queues; i++)
766 		ena_free_tx_resources(adapter, i);
767 }
768 
769 static inline int
770 validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
771 {
772 	if (likely(req_id < rx_ring->ring_size))
773 		return (0);
774 
775 	device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
776 	    req_id);
777 	counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
778 
779 	/* Trigger device reset */
780 	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter))) {
781 		rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
782 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
783 	}
784 
785 	return (EFAULT);
786 }
787 
788 /**
789  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
790  * @adapter: network interface device structure
791  * @qid: queue index
792  *
793  * Returns 0 on success, otherwise on failure.
794  **/
795 static int
796 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
797 {
798 	struct ena_que *que = &adapter->que[qid];
799 	struct ena_ring *rx_ring = que->rx_ring;
800 	int size, err, i;
801 
802 	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
803 
804 	/*
805 	 * Alloc extra element so in rx path
806 	 * we can always prefetch rx_info + 1
807 	 */
808 	size += sizeof(struct ena_rx_buffer);
809 
810 	rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
811 
812 	size = sizeof(uint16_t) * rx_ring->ring_size;
813 	rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
814 
815 	for (i = 0; i < rx_ring->ring_size; i++)
816 		rx_ring->free_rx_ids[i] = i;
817 
818 	/* Reset RX statistics. */
819 	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
820 	    sizeof(rx_ring->rx_stats));
821 
822 	rx_ring->next_to_clean = 0;
823 	rx_ring->next_to_use = 0;
824 
825 	/* ... and create the buffer DMA maps */
826 	for (i = 0; i < rx_ring->ring_size; i++) {
827 		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
828 		    &(rx_ring->rx_buffer_info[i].map));
829 		if (err != 0) {
830 			ena_trace(ENA_ALERT,
831 			    "Unable to create Rx DMA map for buffer %d\n", i);
832 			goto err_buf_info_unmap;
833 		}
834 	}
835 
836 	/* Create LRO for the ring */
837 	if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
838 		int err = tcp_lro_init(&rx_ring->lro);
839 		if (err != 0) {
840 			device_printf(adapter->pdev,
841 			    "LRO[%d] Initialization failed!\n", qid);
842 		} else {
843 			ena_trace(ENA_INFO,
844 			    "RX Soft LRO[%d] Initialized\n", qid);
845 			rx_ring->lro.ifp = adapter->ifp;
846 		}
847 	}
848 
849 	return (0);
850 
851 err_buf_info_unmap:
852 	while (i--) {
853 		bus_dmamap_destroy(adapter->rx_buf_tag,
854 		    rx_ring->rx_buffer_info[i].map);
855 	}
856 
857 	free(rx_ring->free_rx_ids, M_DEVBUF);
858 	rx_ring->free_rx_ids = NULL;
859 	free(rx_ring->rx_buffer_info, M_DEVBUF);
860 	rx_ring->rx_buffer_info = NULL;
861 	return (ENOMEM);
862 }
863 
864 /**
865  * ena_free_rx_resources - Free Rx Resources
866  * @adapter: network interface device structure
867  * @qid: queue index
868  *
869  * Free all receive software resources
870  **/
871 static void
872 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
873 {
874 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
875 
876 	/* Free buffer DMA maps, */
877 	for (int i = 0; i < rx_ring->ring_size; i++) {
878 		bus_dmamap_sync(adapter->rx_buf_tag,
879 		    rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
880 		m_freem(rx_ring->rx_buffer_info[i].mbuf);
881 		rx_ring->rx_buffer_info[i].mbuf = NULL;
882 		bus_dmamap_unload(adapter->rx_buf_tag,
883 		    rx_ring->rx_buffer_info[i].map);
884 		bus_dmamap_destroy(adapter->rx_buf_tag,
885 		    rx_ring->rx_buffer_info[i].map);
886 	}
887 
888 	/* free LRO resources, */
889 	tcp_lro_free(&rx_ring->lro);
890 
891 	/* free allocated memory */
892 	free(rx_ring->rx_buffer_info, M_DEVBUF);
893 	rx_ring->rx_buffer_info = NULL;
894 
895 	free(rx_ring->free_rx_ids, M_DEVBUF);
896 	rx_ring->free_rx_ids = NULL;
897 }
898 
899 /**
900  * ena_setup_all_rx_resources - allocate all queues Rx resources
901  * @adapter: network interface device structure
902  *
903  * Returns 0 on success, otherwise on failure.
904  **/
905 static int
906 ena_setup_all_rx_resources(struct ena_adapter *adapter)
907 {
908 	int i, rc = 0;
909 
910 	for (i = 0; i < adapter->num_queues; i++) {
911 		rc = ena_setup_rx_resources(adapter, i);
912 		if (rc != 0) {
913 			device_printf(adapter->pdev,
914 			    "Allocation for Rx Queue %u failed\n", i);
915 			goto err_setup_rx;
916 		}
917 	}
918 	return (0);
919 
920 err_setup_rx:
921 	/* rewind the index freeing the rings as we go */
922 	while (i--)
923 		ena_free_rx_resources(adapter, i);
924 	return (rc);
925 }
926 
927 /**
928  * ena_free_all_rx_resources - Free Rx resources for all queues
929  * @adapter: network interface device structure
930  *
931  * Free all receive software resources
932  **/
933 static void
934 ena_free_all_rx_resources(struct ena_adapter *adapter)
935 {
936 	int i;
937 
938 	for (i = 0; i < adapter->num_queues; i++)
939 		ena_free_rx_resources(adapter, i);
940 }
941 
942 static inline int
943 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
944     struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
945 {
946 	struct ena_com_buf *ena_buf;
947 	bus_dma_segment_t segs[1];
948 	int nsegs, error;
949 	int mlen;
950 
951 	/* if previous allocated frag is not used */
952 	if (unlikely(rx_info->mbuf != NULL))
953 		return (0);
954 
955 	/* Get mbuf using UMA allocator */
956 	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
957 
958 	if (unlikely(rx_info->mbuf == NULL)) {
959 		counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
960 		rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
961 		if (unlikely(rx_info->mbuf == NULL)) {
962 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
963 			return (ENOMEM);
964 		}
965 		mlen = MCLBYTES;
966 	} else {
967 		mlen = MJUM16BYTES;
968 	}
969 	/* Set mbuf length*/
970 	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
971 
972 	/* Map packets for DMA */
973 	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
974 	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
975 	    adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
976 	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
977 	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
978 	if (unlikely((error != 0) || (nsegs != 1))) {
979 		ena_trace(ENA_WARNING, "failed to map mbuf, error: %d, "
980 		    "nsegs: %d\n", error, nsegs);
981 		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
982 		goto exit;
983 
984 	}
985 
986 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
987 
988 	ena_buf = &rx_info->ena_buf;
989 	ena_buf->paddr = segs[0].ds_addr;
990 	ena_buf->len = mlen;
991 
992 	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
993 	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
994 	    rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
995 
996 	return (0);
997 
998 exit:
999 	m_freem(rx_info->mbuf);
1000 	rx_info->mbuf = NULL;
1001 	return (EFAULT);
1002 }
1003 
1004 static void
1005 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1006     struct ena_rx_buffer *rx_info)
1007 {
1008 
1009 	if (rx_info->mbuf == NULL) {
1010 		ena_trace(ENA_WARNING, "Trying to free unallocated buffer\n");
1011 		return;
1012 	}
1013 
1014 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1015 	    BUS_DMASYNC_POSTREAD);
1016 	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1017 	m_freem(rx_info->mbuf);
1018 	rx_info->mbuf = NULL;
1019 }
1020 
1021 /**
1022  * ena_refill_rx_bufs - Refills ring with descriptors
1023  * @rx_ring: the ring which we want to feed with free descriptors
1024  * @num: number of descriptors to refill
1025  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1026  **/
1027 static int
1028 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1029 {
1030 	struct ena_adapter *adapter = rx_ring->adapter;
1031 	uint16_t next_to_use, req_id;
1032 	uint32_t i;
1033 	int rc;
1034 
1035 	ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
1036 	    rx_ring->qid);
1037 
1038 	next_to_use = rx_ring->next_to_use;
1039 
1040 	for (i = 0; i < num; i++) {
1041 		struct ena_rx_buffer *rx_info;
1042 
1043 		ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1044 		    "RX buffer - next to use: %d\n", next_to_use);
1045 
1046 		req_id = rx_ring->free_rx_ids[next_to_use];
1047 		rx_info = &rx_ring->rx_buffer_info[req_id];
1048 
1049 		rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1050 		if (unlikely(rc != 0)) {
1051 			ena_trace(ENA_WARNING,
1052 			    "failed to alloc buffer for rx queue %d\n",
1053 			    rx_ring->qid);
1054 			break;
1055 		}
1056 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1057 		    &rx_info->ena_buf, req_id);
1058 		if (unlikely(rc != 0)) {
1059 			ena_trace(ENA_WARNING,
1060 			    "failed to add buffer for rx queue %d\n",
1061 			    rx_ring->qid);
1062 			break;
1063 		}
1064 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1065 		    rx_ring->ring_size);
1066 	}
1067 
1068 	if (unlikely(i < num)) {
1069 		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1070 		ena_trace(ENA_WARNING,
1071 		     "refilled rx qid %d with only %d mbufs (from %d)\n",
1072 		     rx_ring->qid, i, num);
1073 	}
1074 
1075 	if (likely(i != 0)) {
1076 		wmb();
1077 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1078 	}
1079 	rx_ring->next_to_use = next_to_use;
1080 	return (i);
1081 }
1082 
1083 static void
1084 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1085 {
1086 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1087 	unsigned int i;
1088 
1089 	for (i = 0; i < rx_ring->ring_size; i++) {
1090 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1091 
1092 		if (rx_info->mbuf != NULL)
1093 			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1094 	}
1095 }
1096 
1097 /**
1098  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1099  * @adapter: network interface device structure
1100  *
1101  */
1102 static void
1103 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1104 {
1105 	struct ena_ring *rx_ring;
1106 	int i, rc, bufs_num;
1107 
1108 	for (i = 0; i < adapter->num_queues; i++) {
1109 		rx_ring = &adapter->rx_ring[i];
1110 		bufs_num = rx_ring->ring_size - 1;
1111 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1112 
1113 		if (unlikely(rc != bufs_num))
1114 			ena_trace(ENA_WARNING, "refilling Queue %d failed. "
1115 			    "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1116 	}
1117 }
1118 
1119 static void
1120 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1121 {
1122 	int i;
1123 
1124 	for (i = 0; i < adapter->num_queues; i++)
1125 		ena_free_rx_bufs(adapter, i);
1126 }
1127 
1128 /**
1129  * ena_free_tx_bufs - Free Tx Buffers per Queue
1130  * @adapter: network interface device structure
1131  * @qid: queue index
1132  **/
1133 static void
1134 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1135 {
1136 	bool print_once = true;
1137 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1138 
1139 	ENA_RING_MTX_LOCK(tx_ring);
1140 	for (int i = 0; i < tx_ring->ring_size; i++) {
1141 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1142 
1143 		if (tx_info->mbuf == NULL)
1144 			continue;
1145 
1146 		if (print_once) {
1147 			device_printf(adapter->pdev,
1148 			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1149 			    qid, i);
1150 			print_once = false;
1151 		} else {
1152 			ena_trace(ENA_DBG,
1153 			    "free uncompleted tx mbuf qid %d idx 0x%x\n",
1154 			     qid, i);
1155 		}
1156 
1157 		if (tx_info->head_mapped == true) {
1158 			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
1159 			    BUS_DMASYNC_POSTWRITE);
1160 			bus_dmamap_unload(adapter->tx_buf_tag,
1161 			    tx_info->map_head);
1162 			tx_info->head_mapped = false;
1163 		}
1164 
1165 		if (tx_info->seg_mapped == true) {
1166 			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
1167 			    BUS_DMASYNC_POSTWRITE);
1168 			bus_dmamap_unload(adapter->tx_buf_tag,
1169 			    tx_info->map_seg);
1170 			tx_info->seg_mapped = false;
1171 		}
1172 
1173 		m_free(tx_info->mbuf);
1174 		tx_info->mbuf = NULL;
1175 	}
1176 	ENA_RING_MTX_UNLOCK(tx_ring);
1177 }
1178 
1179 static void
1180 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1181 {
1182 
1183 	for (int i = 0; i < adapter->num_queues; i++)
1184 		ena_free_tx_bufs(adapter, i);
1185 }
1186 
1187 static void
1188 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1189 {
1190 	uint16_t ena_qid;
1191 	int i;
1192 
1193 	for (i = 0; i < adapter->num_queues; i++) {
1194 		ena_qid = ENA_IO_TXQ_IDX(i);
1195 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1196 	}
1197 }
1198 
1199 static void
1200 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1201 {
1202 	uint16_t ena_qid;
1203 	int i;
1204 
1205 	for (i = 0; i < adapter->num_queues; i++) {
1206 		ena_qid = ENA_IO_RXQ_IDX(i);
1207 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1208 	}
1209 }
1210 
1211 static void
1212 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1213 {
1214 	struct ena_que *queue;
1215 	int i;
1216 
1217 	for (i = 0; i < adapter->num_queues; i++) {
1218 		queue = &adapter->que[i];
1219 		while (taskqueue_cancel(queue->cleanup_tq,
1220 		    &queue->cleanup_task, NULL))
1221 			taskqueue_drain(queue->cleanup_tq,
1222 			    &queue->cleanup_task);
1223 		taskqueue_free(queue->cleanup_tq);
1224 	}
1225 
1226 	ena_destroy_all_tx_queues(adapter);
1227 	ena_destroy_all_rx_queues(adapter);
1228 }
1229 
1230 static inline int
1231 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
1232 {
1233 	struct ena_adapter *adapter = tx_ring->adapter;
1234 	struct ena_tx_buffer *tx_info = NULL;
1235 
1236 	if (likely(req_id < tx_ring->ring_size)) {
1237 		tx_info = &tx_ring->tx_buffer_info[req_id];
1238 		if (tx_info->mbuf != NULL)
1239 			return (0);
1240 		device_printf(adapter->pdev,
1241 		    "tx_info doesn't have valid mbuf\n");
1242 	}
1243 
1244 	device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
1245 	counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
1246 
1247 	/* Trigger device reset */
1248 	adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1249 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
1250 
1251 	return (EFAULT);
1252 }
1253 
1254 static int
1255 ena_create_io_queues(struct ena_adapter *adapter)
1256 {
1257 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1258 	struct ena_com_create_io_ctx ctx;
1259 	struct ena_ring *ring;
1260 	struct ena_que *queue;
1261 	uint16_t ena_qid;
1262 	uint32_t msix_vector;
1263 	int rc, i;
1264 
1265 	/* Create TX queues */
1266 	for (i = 0; i < adapter->num_queues; i++) {
1267 		msix_vector = ENA_IO_IRQ_IDX(i);
1268 		ena_qid = ENA_IO_TXQ_IDX(i);
1269 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1270 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1271 		ctx.queue_size = adapter->tx_ring_size;
1272 		ctx.msix_vector = msix_vector;
1273 		ctx.qid = ena_qid;
1274 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1275 		if (rc != 0) {
1276 			device_printf(adapter->pdev,
1277 			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1278 			goto err_tx;
1279 		}
1280 		ring = &adapter->tx_ring[i];
1281 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1282 		    &ring->ena_com_io_sq,
1283 		    &ring->ena_com_io_cq);
1284 		if (rc != 0) {
1285 			device_printf(adapter->pdev,
1286 			    "Failed to get TX queue handlers. TX queue num"
1287 			    " %d rc: %d\n", i, rc);
1288 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1289 			goto err_tx;
1290 		}
1291 	}
1292 
1293 	/* Create RX queues */
1294 	for (i = 0; i < adapter->num_queues; i++) {
1295 		msix_vector = ENA_IO_IRQ_IDX(i);
1296 		ena_qid = ENA_IO_RXQ_IDX(i);
1297 		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1298 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1299 		ctx.queue_size = adapter->rx_ring_size;
1300 		ctx.msix_vector = msix_vector;
1301 		ctx.qid = ena_qid;
1302 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1303 		if (unlikely(rc != 0)) {
1304 			device_printf(adapter->pdev,
1305 			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1306 			goto err_rx;
1307 		}
1308 
1309 		ring = &adapter->rx_ring[i];
1310 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1311 		    &ring->ena_com_io_sq,
1312 		    &ring->ena_com_io_cq);
1313 		if (unlikely(rc != 0)) {
1314 			device_printf(adapter->pdev,
1315 			    "Failed to get RX queue handlers. RX queue num"
1316 			    " %d rc: %d\n", i, rc);
1317 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1318 			goto err_rx;
1319 		}
1320 	}
1321 
1322 	for (i = 0; i < adapter->num_queues; i++) {
1323 		queue = &adapter->que[i];
1324 
1325 		TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
1326 		queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
1327 		    M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
1328 
1329 		taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
1330 		    "%s queue %d cleanup",
1331 		    device_get_nameunit(adapter->pdev), i);
1332 	}
1333 
1334 	return (0);
1335 
1336 err_rx:
1337 	while (i--)
1338 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1339 	i = adapter->num_queues;
1340 err_tx:
1341 	while (i--)
1342 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1343 
1344 	return (ENXIO);
1345 }
1346 
1347 /**
1348  * ena_tx_cleanup - clear sent packets and corresponding descriptors
1349  * @tx_ring: ring for which we want to clean packets
1350  *
1351  * Once packets are sent, we ask the device in a loop for no longer used
1352  * descriptors. We find the related mbuf chain in a map (index in an array)
1353  * and free it, then update ring state.
1354  * This is performed in "endless" loop, updating ring pointers every
1355  * TX_COMMIT. The first check of free descriptor is performed before the actual
1356  * loop, then repeated at the loop end.
1357  **/
1358 static int
1359 ena_tx_cleanup(struct ena_ring *tx_ring)
1360 {
1361 	struct ena_adapter *adapter;
1362 	struct ena_com_io_cq* io_cq;
1363 	uint16_t next_to_clean;
1364 	uint16_t req_id;
1365 	uint16_t ena_qid;
1366 	unsigned int total_done = 0;
1367 	int rc;
1368 	int commit = TX_COMMIT;
1369 	int budget = TX_BUDGET;
1370 	int work_done;
1371 	bool above_thresh;
1372 
1373 	adapter = tx_ring->que->adapter;
1374 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1375 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1376 	next_to_clean = tx_ring->next_to_clean;
1377 
1378 	do {
1379 		struct ena_tx_buffer *tx_info;
1380 		struct mbuf *mbuf;
1381 
1382 		rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
1383 		if (unlikely(rc != 0))
1384 			break;
1385 
1386 		rc = validate_tx_req_id(tx_ring, req_id);
1387 		if (unlikely(rc != 0))
1388 			break;
1389 
1390 		tx_info = &tx_ring->tx_buffer_info[req_id];
1391 
1392 		mbuf = tx_info->mbuf;
1393 
1394 		tx_info->mbuf = NULL;
1395 		bintime_clear(&tx_info->timestamp);
1396 
1397 		/* Map is no longer required */
1398 		if (tx_info->head_mapped == true) {
1399 			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
1400 			    BUS_DMASYNC_POSTWRITE);
1401 			bus_dmamap_unload(adapter->tx_buf_tag,
1402 			    tx_info->map_head);
1403 			tx_info->head_mapped = false;
1404 		}
1405 		if (tx_info->seg_mapped == true) {
1406 			bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
1407 			    BUS_DMASYNC_POSTWRITE);
1408 			bus_dmamap_unload(adapter->tx_buf_tag,
1409 			    tx_info->map_seg);
1410 			tx_info->seg_mapped = false;
1411 		}
1412 
1413 		ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
1414 		    tx_ring->qid, mbuf);
1415 
1416 		m_freem(mbuf);
1417 
1418 		total_done += tx_info->tx_descs;
1419 
1420 		tx_ring->free_tx_ids[next_to_clean] = req_id;
1421 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1422 		    tx_ring->ring_size);
1423 
1424 		if (unlikely(--commit == 0)) {
1425 			commit = TX_COMMIT;
1426 			/* update ring state every TX_COMMIT descriptor */
1427 			tx_ring->next_to_clean = next_to_clean;
1428 			ena_com_comp_ack(
1429 			    &adapter->ena_dev->io_sq_queues[ena_qid],
1430 			    total_done);
1431 			ena_com_update_dev_comp_head(io_cq);
1432 			total_done = 0;
1433 		}
1434 	} while (likely(--budget));
1435 
1436 	work_done = TX_BUDGET - budget;
1437 
1438 	ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
1439 	tx_ring->qid, work_done);
1440 
1441 	/* If there is still something to commit update ring state */
1442 	if (likely(commit != TX_COMMIT)) {
1443 		tx_ring->next_to_clean = next_to_clean;
1444 		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
1445 		    total_done);
1446 		ena_com_update_dev_comp_head(io_cq);
1447 	}
1448 
1449 	/*
1450 	 * Need to make the rings circular update visible to
1451 	 * ena_xmit_mbuf() before checking for tx_ring->running.
1452 	 */
1453 	mb();
1454 
1455 	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1456 	    ENA_TX_RESUME_THRESH);
1457 	if (unlikely(!tx_ring->running && above_thresh)) {
1458 		ENA_RING_MTX_LOCK(tx_ring);
1459 		above_thresh =
1460 		    ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1461 		    ENA_TX_RESUME_THRESH);
1462 		if (!tx_ring->running && above_thresh) {
1463 			tx_ring->running = true;
1464 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1465 			taskqueue_enqueue(tx_ring->enqueue_tq,
1466 			    &tx_ring->enqueue_task);
1467 		}
1468 		ENA_RING_MTX_UNLOCK(tx_ring);
1469 	}
1470 
1471 	return (work_done);
1472 }
1473 
1474 static void
1475 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1476     struct mbuf *mbuf)
1477 {
1478 	struct ena_adapter *adapter = rx_ring->adapter;
1479 
1480 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1481 		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
1482 
1483 		if (ena_rx_ctx->frag &&
1484 		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
1485 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1486 			return;
1487 		}
1488 
1489 		switch (ena_rx_ctx->l3_proto) {
1490 		case ENA_ETH_IO_L3_PROTO_IPV4:
1491 			switch (ena_rx_ctx->l4_proto) {
1492 			case ENA_ETH_IO_L4_PROTO_TCP:
1493 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1494 				break;
1495 			case ENA_ETH_IO_L4_PROTO_UDP:
1496 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1497 				break;
1498 			default:
1499 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1500 			}
1501 			break;
1502 		case ENA_ETH_IO_L3_PROTO_IPV6:
1503 			switch (ena_rx_ctx->l4_proto) {
1504 			case ENA_ETH_IO_L4_PROTO_TCP:
1505 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1506 				break;
1507 			case ENA_ETH_IO_L4_PROTO_UDP:
1508 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1509 				break;
1510 			default:
1511 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1512 			}
1513 			break;
1514 		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
1515 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1516 			break;
1517 		default:
1518 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1519 		}
1520 	} else {
1521 		mbuf->m_pkthdr.flowid = rx_ring->qid;
1522 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1523 	}
1524 }
1525 
1526 /**
1527  * ena_rx_mbuf - assemble mbuf from descriptors
1528  * @rx_ring: ring for which we want to clean packets
1529  * @ena_bufs: buffer info
1530  * @ena_rx_ctx: metadata for this packet(s)
1531  * @next_to_clean: ring pointer, will be updated only upon success
1532  *
1533  **/
1534 static struct mbuf*
1535 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
1536     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
1537 {
1538 	struct mbuf *mbuf;
1539 	struct ena_rx_buffer *rx_info;
1540 	struct ena_adapter *adapter;
1541 	unsigned int descs = ena_rx_ctx->descs;
1542 	int rc;
1543 	uint16_t ntc, len, req_id, buf = 0;
1544 
1545 	ntc = *next_to_clean;
1546 	adapter = rx_ring->adapter;
1547 
1548 	len = ena_bufs[buf].len;
1549 	req_id = ena_bufs[buf].req_id;
1550 	rc = validate_rx_req_id(rx_ring, req_id);
1551 	if (unlikely(rc != 0))
1552 		return (NULL);
1553 
1554 	rx_info = &rx_ring->rx_buffer_info[req_id];
1555 	if (unlikely(rx_info->mbuf == NULL)) {
1556 		device_printf(adapter->pdev, "NULL mbuf in rx_info");
1557 		return (NULL);
1558 	}
1559 
1560 	ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
1561 	    rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
1562 
1563 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1564 	    BUS_DMASYNC_POSTREAD);
1565 	mbuf = rx_info->mbuf;
1566 	mbuf->m_flags |= M_PKTHDR;
1567 	mbuf->m_pkthdr.len = len;
1568 	mbuf->m_len = len;
1569 	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
1570 
1571 	/* Fill mbuf with hash key and it's interpretation for optimization */
1572 	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
1573 
1574 	ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
1575 	    mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
1576 
1577 	/* DMA address is not needed anymore, unmap it */
1578 	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1579 
1580 	rx_info->mbuf = NULL;
1581 	rx_ring->free_rx_ids[ntc] = req_id;
1582 	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1583 
1584 	/*
1585 	 * While we have more than 1 descriptors for one rcvd packet, append
1586 	 * other mbufs to the main one
1587 	 */
1588 	while (--descs) {
1589 		++buf;
1590 		len = ena_bufs[buf].len;
1591 		req_id = ena_bufs[buf].req_id;
1592 		rc = validate_rx_req_id(rx_ring, req_id);
1593 		if (unlikely(rc != 0)) {
1594 			/*
1595 			 * If the req_id is invalid, then the device will be
1596 			 * reset. In that case we must free all mbufs that
1597 			 * were already gathered.
1598 			 */
1599 			m_freem(mbuf);
1600 			return (NULL);
1601 		}
1602 		rx_info = &rx_ring->rx_buffer_info[req_id];
1603 
1604 		if (unlikely(rx_info->mbuf == NULL)) {
1605 			device_printf(adapter->pdev, "NULL mbuf in rx_info");
1606 			/*
1607 			 * If one of the required mbufs was not allocated yet,
1608 			 * we can break there.
1609 			 * All earlier used descriptors will be reallocated
1610 			 * later and not used mbufs can be reused.
1611 			 * The next_to_clean pointer will not be updated in case
1612 			 * of an error, so caller should advance it manually
1613 			 * in error handling routine to keep it up to date
1614 			 * with hw ring.
1615 			 */
1616 			m_freem(mbuf);
1617 			return (NULL);
1618 		}
1619 
1620 		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
1621 		    BUS_DMASYNC_POSTREAD);
1622 		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
1623 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1624 			ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p\n",
1625 			    mbuf);
1626 		}
1627 
1628 		ena_trace(ENA_DBG | ENA_RXPTH,
1629 		    "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
1630 
1631 		/* Free already appended mbuf, it won't be useful anymore */
1632 		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1633 		m_freem(rx_info->mbuf);
1634 		rx_info->mbuf = NULL;
1635 
1636 		rx_ring->free_rx_ids[ntc] = req_id;
1637 		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1638 	}
1639 
1640 	*next_to_clean = ntc;
1641 
1642 	return (mbuf);
1643 }
1644 
1645 /**
1646  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
1647  **/
1648 static inline void
1649 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1650     struct mbuf *mbuf)
1651 {
1652 
1653 	/* if IP and error */
1654 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1655 	    ena_rx_ctx->l3_csum_err)) {
1656 		/* ipv4 checksum error */
1657 		mbuf->m_pkthdr.csum_flags = 0;
1658 		counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1659 		ena_trace(ENA_DBG, "RX IPv4 header checksum error\n");
1660 		return;
1661 	}
1662 
1663 	/* if TCP/UDP */
1664 	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1665 	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1666 		if (ena_rx_ctx->l4_csum_err) {
1667 			/* TCP/UDP checksum error */
1668 			mbuf->m_pkthdr.csum_flags = 0;
1669 			counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1670 			ena_trace(ENA_DBG, "RX L4 checksum error\n");
1671 		} else {
1672 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1673 			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1674 		}
1675 	}
1676 }
1677 
1678 /**
1679  * ena_rx_cleanup - handle rx irq
1680  * @arg: ring for which irq is being handled
1681  **/
1682 static int
1683 ena_rx_cleanup(struct ena_ring *rx_ring)
1684 {
1685 	struct ena_adapter *adapter;
1686 	struct mbuf *mbuf;
1687 	struct ena_com_rx_ctx ena_rx_ctx;
1688 	struct ena_com_io_cq* io_cq;
1689 	struct ena_com_io_sq* io_sq;
1690 	if_t ifp;
1691 	uint16_t ena_qid;
1692 	uint16_t next_to_clean;
1693 	uint32_t refill_required;
1694 	uint32_t refill_threshold;
1695 	uint32_t do_if_input = 0;
1696 	unsigned int qid;
1697 	int rc, i;
1698 	int budget = RX_BUDGET;
1699 
1700 	adapter = rx_ring->que->adapter;
1701 	ifp = adapter->ifp;
1702 	qid = rx_ring->que->id;
1703 	ena_qid = ENA_IO_RXQ_IDX(qid);
1704 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1705 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1706 	next_to_clean = rx_ring->next_to_clean;
1707 
1708 	ena_trace(ENA_DBG, "rx: qid %d\n", qid);
1709 
1710 	do {
1711 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1712 		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
1713 		ena_rx_ctx.descs = 0;
1714 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
1715 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
1716 		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
1717 
1718 		if (unlikely(rc != 0))
1719 			goto error;
1720 
1721 		if (unlikely(ena_rx_ctx.descs == 0))
1722 			break;
1723 
1724 		ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
1725 		    "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1726 		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1727 		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1728 
1729 		/* Receive mbuf from the ring */
1730 		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
1731 		    &ena_rx_ctx, &next_to_clean);
1732 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
1733 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
1734 		/* Exit if we failed to retrieve a buffer */
1735 		if (unlikely(mbuf == NULL)) {
1736 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
1737 				rx_ring->free_rx_ids[next_to_clean] =
1738 				    rx_ring->ena_bufs[i].req_id;
1739 				next_to_clean =
1740 				    ENA_RX_RING_IDX_NEXT(next_to_clean,
1741 				    rx_ring->ring_size);
1742 
1743 			}
1744 			break;
1745 		}
1746 
1747 		if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
1748 		    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
1749 			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
1750 		}
1751 
1752 		counter_enter();
1753 		counter_u64_add_protected(rx_ring->rx_stats.bytes,
1754 		    mbuf->m_pkthdr.len);
1755 		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
1756 		    mbuf->m_pkthdr.len);
1757 		counter_exit();
1758 		/*
1759 		 * LRO is only for IP/TCP packets and TCP checksum of the packet
1760 		 * should be computed by hardware.
1761 		 */
1762 		do_if_input = 1;
1763 		if (((ifp->if_capenable & IFCAP_LRO) != 0)  &&
1764 		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
1765 		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
1766 			/*
1767 			 * Send to the stack if:
1768 			 *  - LRO not enabled, or
1769 			 *  - no LRO resources, or
1770 			 *  - lro enqueue fails
1771 			 */
1772 			if ((rx_ring->lro.lro_cnt != 0) &&
1773 			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
1774 					do_if_input = 0;
1775 		}
1776 		if (do_if_input != 0) {
1777 			ena_trace(ENA_DBG | ENA_RXPTH,
1778 			    "calling if_input() with mbuf %p\n", mbuf);
1779 			(*ifp->if_input)(ifp, mbuf);
1780 		}
1781 
1782 		counter_enter();
1783 		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
1784 		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
1785 		counter_exit();
1786 	} while (--budget);
1787 
1788 	rx_ring->next_to_clean = next_to_clean;
1789 
1790 	refill_required = ena_com_free_desc(io_sq);
1791 	refill_threshold = min_t(int,
1792 	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1793 	    ENA_RX_REFILL_THRESH_PACKET);
1794 
1795 	if (refill_required > refill_threshold) {
1796 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1797 		ena_refill_rx_bufs(rx_ring, refill_required);
1798 	}
1799 
1800 	tcp_lro_flush_all(&rx_ring->lro);
1801 
1802 	return (RX_BUDGET - budget);
1803 
1804 error:
1805 	counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
1806 
1807 	/* Too many desc from the device. Trigger reset */
1808 	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
1809 		adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1810 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
1811 	}
1812 
1813 	return (0);
1814 }
1815 
1816 /*********************************************************************
1817  *
1818  *  MSIX & Interrupt Service routine
1819  *
1820  **********************************************************************/
1821 
1822 /**
1823  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1824  * @arg: interrupt number
1825  **/
1826 static void
1827 ena_intr_msix_mgmnt(void *arg)
1828 {
1829 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1830 
1831 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1832 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
1833 		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1834 }
1835 
1836 static void
1837 ena_cleanup(void *arg, int pending)
1838 {
1839 	struct ena_que	*que = arg;
1840 	struct ena_adapter *adapter = que->adapter;
1841 	if_t ifp = adapter->ifp;
1842 	struct ena_ring *tx_ring;
1843 	struct ena_ring *rx_ring;
1844 	struct ena_com_io_cq* io_cq;
1845 	struct ena_eth_io_intr_reg intr_reg;
1846 	int qid, ena_qid;
1847 	int txc, rxc, i;
1848 
1849 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1850 		return;
1851 
1852 	ena_trace(ENA_DBG, "MSI-X TX/RX routine\n");
1853 
1854 	tx_ring = que->tx_ring;
1855 	rx_ring = que->rx_ring;
1856 	qid = que->id;
1857 	ena_qid = ENA_IO_TXQ_IDX(qid);
1858 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1859 
1860 	tx_ring->first_interrupt = true;
1861 	rx_ring->first_interrupt = true;
1862 
1863 	for (i = 0; i < CLEAN_BUDGET; ++i) {
1864 		rxc = ena_rx_cleanup(rx_ring);
1865 		txc = ena_tx_cleanup(tx_ring);
1866 
1867 		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1868 			return;
1869 
1870 		if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
1871 		       break;
1872 	}
1873 
1874 	/* Signal that work is done and unmask interrupt */
1875 	ena_com_update_intr_reg(&intr_reg,
1876 	    RX_IRQ_INTERVAL,
1877 	    TX_IRQ_INTERVAL,
1878 	    true);
1879 	ena_com_unmask_intr(io_cq, &intr_reg);
1880 }
1881 
1882 /**
1883  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1884  * @arg: queue
1885  **/
1886 static int
1887 ena_handle_msix(void *arg)
1888 {
1889 	struct ena_que *queue = arg;
1890 	struct ena_adapter *adapter = queue->adapter;
1891 	if_t ifp = adapter->ifp;
1892 
1893 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1894 		return (FILTER_STRAY);
1895 
1896 	taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
1897 
1898 	return (FILTER_HANDLED);
1899 }
1900 
1901 static int
1902 ena_enable_msix(struct ena_adapter *adapter)
1903 {
1904 	device_t dev = adapter->pdev;
1905 	int msix_vecs, msix_req;
1906 	int i, rc = 0;
1907 
1908 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
1909 		device_printf(dev, "Error, MSI-X is already enabled\n");
1910 		return (EINVAL);
1911 	}
1912 
1913 	/* Reserved the max msix vectors we might need */
1914 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues);
1915 
1916 	adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
1917 	    M_DEVBUF, M_WAITOK | M_ZERO);
1918 
1919 	ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
1920 
1921 	for (i = 0; i < msix_vecs; i++) {
1922 		adapter->msix_entries[i].entry = i;
1923 		/* Vectors must start from 1 */
1924 		adapter->msix_entries[i].vector = i + 1;
1925 	}
1926 
1927 	msix_req = msix_vecs;
1928 	rc = pci_alloc_msix(dev, &msix_vecs);
1929 	if (unlikely(rc != 0)) {
1930 		device_printf(dev,
1931 		    "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1932 
1933 		rc = ENOSPC;
1934 		goto err_msix_free;
1935 	}
1936 
1937 	if (msix_vecs != msix_req) {
1938 		if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
1939 			device_printf(dev,
1940 			    "Not enough number of MSI-x allocated: %d\n",
1941 			    msix_vecs);
1942 			pci_release_msi(dev);
1943 			rc = ENOSPC;
1944 			goto err_msix_free;
1945 		}
1946 		device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
1947 		    "the number of queues\n", msix_vecs, msix_req);
1948 		adapter->num_queues = msix_vecs - ENA_ADMIN_MSIX_VEC;
1949 	}
1950 
1951 	adapter->msix_vecs = msix_vecs;
1952 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
1953 
1954 	return (0);
1955 
1956 err_msix_free:
1957 	free(adapter->msix_entries, M_DEVBUF);
1958 	adapter->msix_entries = NULL;
1959 
1960 	return (rc);
1961 }
1962 
1963 static void
1964 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1965 {
1966 
1967 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1968 	    ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1969 	    device_get_nameunit(adapter->pdev));
1970 	/*
1971 	 * Handler is NULL on purpose, it will be set
1972 	 * when mgmnt interrupt is acquired
1973 	 */
1974 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1975 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1976 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1977 	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1978 }
1979 
1980 static int
1981 ena_setup_io_intr(struct ena_adapter *adapter)
1982 {
1983 	static int last_bind_cpu = -1;
1984 	int irq_idx;
1985 
1986 	if (adapter->msix_entries == NULL)
1987 		return (EINVAL);
1988 
1989 	for (int i = 0; i < adapter->num_queues; i++) {
1990 		irq_idx = ENA_IO_IRQ_IDX(i);
1991 
1992 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1993 		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1994 		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1995 		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1996 		adapter->irq_tbl[irq_idx].vector =
1997 		    adapter->msix_entries[irq_idx].vector;
1998 		ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1999 		    adapter->msix_entries[irq_idx].vector);
2000 
2001 		/*
2002 		 * We want to bind rings to the corresponding cpu
2003 		 * using something similar to the RSS round-robin technique.
2004 		 */
2005 		if (unlikely(last_bind_cpu < 0))
2006 			last_bind_cpu = CPU_FIRST();
2007 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
2008 		    last_bind_cpu;
2009 		last_bind_cpu = CPU_NEXT(last_bind_cpu);
2010 	}
2011 
2012 	return (0);
2013 }
2014 
2015 static int
2016 ena_request_mgmnt_irq(struct ena_adapter *adapter)
2017 {
2018 	struct ena_irq *irq;
2019 	unsigned long flags;
2020 	int rc, rcc;
2021 
2022 	flags = RF_ACTIVE | RF_SHAREABLE;
2023 
2024 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2025 	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
2026 	    &irq->vector, flags);
2027 
2028 	if (unlikely(irq->res == NULL)) {
2029 		device_printf(adapter->pdev, "could not allocate "
2030 		    "irq vector: %d\n", irq->vector);
2031 		return (ENXIO);
2032 	}
2033 
2034 	rc = bus_setup_intr(adapter->pdev, irq->res,
2035 	    INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt,
2036 	    irq->data, &irq->cookie);
2037 	if (unlikely(rc != 0)) {
2038 		device_printf(adapter->pdev, "failed to register "
2039 		    "interrupt handler for irq %ju: %d\n",
2040 		    rman_get_start(irq->res), rc);
2041 		goto err_res_free;
2042 	}
2043 	irq->requested = true;
2044 
2045 	return (rc);
2046 
2047 err_res_free:
2048 	ena_trace(ENA_INFO | ENA_ADMQ, "releasing resource for irq %d\n",
2049 	    irq->vector);
2050 	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2051 	    irq->vector, irq->res);
2052 	if (unlikely(rcc != 0))
2053 		device_printf(adapter->pdev, "dev has no parent while "
2054 		    "releasing res for irq: %d\n", irq->vector);
2055 	irq->res = NULL;
2056 
2057 	return (rc);
2058 }
2059 
2060 static int
2061 ena_request_io_irq(struct ena_adapter *adapter)
2062 {
2063 	struct ena_irq *irq;
2064 	unsigned long flags = 0;
2065 	int rc = 0, i, rcc;
2066 
2067 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
2068 		device_printf(adapter->pdev,
2069 		    "failed to request I/O IRQ: MSI-X is not enabled\n");
2070 		return (EINVAL);
2071 	} else {
2072 		flags = RF_ACTIVE | RF_SHAREABLE;
2073 	}
2074 
2075 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2076 		irq = &adapter->irq_tbl[i];
2077 
2078 		if (unlikely(irq->requested))
2079 			continue;
2080 
2081 		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
2082 		    &irq->vector, flags);
2083 		if (unlikely(irq->res == NULL)) {
2084 			rc = ENOMEM;
2085 			device_printf(adapter->pdev, "could not allocate "
2086 			    "irq vector: %d\n", irq->vector);
2087 			goto err;
2088 		}
2089 
2090 		rc = bus_setup_intr(adapter->pdev, irq->res,
2091 		    INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
2092 		    irq->data, &irq->cookie);
2093 		 if (unlikely(rc != 0)) {
2094 			device_printf(adapter->pdev, "failed to register "
2095 			    "interrupt handler for irq %ju: %d\n",
2096 			    rman_get_start(irq->res), rc);
2097 			goto err;
2098 		}
2099 		irq->requested = true;
2100 
2101 		ena_trace(ENA_INFO, "queue %d - cpu %d\n",
2102 		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
2103 	}
2104 
2105 	return (rc);
2106 
2107 err:
2108 
2109 	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
2110 		irq = &adapter->irq_tbl[i];
2111 		rcc = 0;
2112 
2113 		/* Once we entered err: section and irq->requested is true we
2114 		   free both intr and resources */
2115 		if (irq->requested)
2116 			rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2117 		if (unlikely(rcc != 0))
2118 			device_printf(adapter->pdev, "could not release"
2119 			    " irq: %d, error: %d\n", irq->vector, rcc);
2120 
2121 		/* If we entred err: section without irq->requested set we know
2122 		   it was bus_alloc_resource_any() that needs cleanup, provided
2123 		   res is not NULL. In case res is NULL no work in needed in
2124 		   this iteration */
2125 		rcc = 0;
2126 		if (irq->res != NULL) {
2127 			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2128 			    irq->vector, irq->res);
2129 		}
2130 		if (unlikely(rcc != 0))
2131 			device_printf(adapter->pdev, "dev has no parent while "
2132 			    "releasing res for irq: %d\n", irq->vector);
2133 		irq->requested = false;
2134 		irq->res = NULL;
2135 	}
2136 
2137 	return (rc);
2138 }
2139 
2140 static void
2141 ena_free_mgmnt_irq(struct ena_adapter *adapter)
2142 {
2143 	struct ena_irq *irq;
2144 	int rc;
2145 
2146 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2147 	if (irq->requested) {
2148 		ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
2149 		    irq->vector);
2150 		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
2151 		if (unlikely(rc != 0))
2152 			device_printf(adapter->pdev, "failed to tear "
2153 			    "down irq: %d\n", irq->vector);
2154 		irq->requested = 0;
2155 	}
2156 
2157 	if (irq->res != NULL) {
2158 		ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
2159 		    irq->vector);
2160 		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2161 		    irq->vector, irq->res);
2162 		irq->res = NULL;
2163 		if (unlikely(rc != 0))
2164 			device_printf(adapter->pdev, "dev has no parent while "
2165 			    "releasing res for irq: %d\n", irq->vector);
2166 	}
2167 }
2168 
2169 static void
2170 ena_free_io_irq(struct ena_adapter *adapter)
2171 {
2172 	struct ena_irq *irq;
2173 	int rc;
2174 
2175 	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
2176 		irq = &adapter->irq_tbl[i];
2177 		if (irq->requested) {
2178 			ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
2179 			    irq->vector);
2180 			rc = bus_teardown_intr(adapter->pdev, irq->res,
2181 			    irq->cookie);
2182 			if (unlikely(rc != 0)) {
2183 				device_printf(adapter->pdev, "failed to tear "
2184 				    "down irq: %d\n", irq->vector);
2185 			}
2186 			irq->requested = 0;
2187 		}
2188 
2189 		if (irq->res != NULL) {
2190 			ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
2191 			    irq->vector);
2192 			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
2193 			    irq->vector, irq->res);
2194 			irq->res = NULL;
2195 			if (unlikely(rc != 0)) {
2196 				device_printf(adapter->pdev, "dev has no parent"
2197 				    " while releasing res for irq: %d\n",
2198 				    irq->vector);
2199 			}
2200 		}
2201 	}
2202 }
2203 
2204 static void
2205 ena_free_irqs(struct ena_adapter* adapter)
2206 {
2207 
2208 	ena_free_io_irq(adapter);
2209 	ena_free_mgmnt_irq(adapter);
2210 	ena_disable_msix(adapter);
2211 }
2212 
2213 static void
2214 ena_disable_msix(struct ena_adapter *adapter)
2215 {
2216 
2217 	if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
2218 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
2219 		pci_release_msi(adapter->pdev);
2220 	}
2221 
2222 	adapter->msix_vecs = 0;
2223 	if (adapter->msix_entries != NULL)
2224 		free(adapter->msix_entries, M_DEVBUF);
2225 	adapter->msix_entries = NULL;
2226 }
2227 
2228 static void
2229 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2230 {
2231 	struct ena_com_io_cq* io_cq;
2232 	struct ena_eth_io_intr_reg intr_reg;
2233 	uint16_t ena_qid;
2234 	int i;
2235 
2236 	/* Unmask interrupts for all queues */
2237 	for (i = 0; i < adapter->num_queues; i++) {
2238 		ena_qid = ENA_IO_TXQ_IDX(i);
2239 		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2240 		ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2241 		ena_com_unmask_intr(io_cq, &intr_reg);
2242 	}
2243 }
2244 
2245 /* Configure the Rx forwarding */
2246 static int
2247 ena_rss_configure(struct ena_adapter *adapter)
2248 {
2249 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2250 	int rc;
2251 
2252 	/* Set indirect table */
2253 	rc = ena_com_indirect_table_set(ena_dev);
2254 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2255 		return (rc);
2256 
2257 	/* Configure hash function (if supported) */
2258 	rc = ena_com_set_hash_function(ena_dev);
2259 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2260 		return (rc);
2261 
2262 	/* Configure hash inputs (if supported) */
2263 	rc = ena_com_set_hash_ctrl(ena_dev);
2264 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2265 		return (rc);
2266 
2267 	return (0);
2268 }
2269 
2270 static int
2271 ena_up_complete(struct ena_adapter *adapter)
2272 {
2273 	int rc;
2274 
2275 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
2276 		rc = ena_rss_configure(adapter);
2277 		if (rc != 0)
2278 			return (rc);
2279 	}
2280 
2281 	rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
2282 	if (unlikely(rc != 0))
2283 		return (rc);
2284 
2285 	ena_refill_all_rx_bufs(adapter);
2286 	ena_reset_counters((counter_u64_t *)&adapter->hw_stats,
2287 	    sizeof(adapter->hw_stats));
2288 
2289 	return (0);
2290 }
2291 
2292 static int
2293 ena_up(struct ena_adapter *adapter)
2294 {
2295 	int rc = 0;
2296 
2297 	if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2298 		device_printf(adapter->pdev, "device is not attached!\n");
2299 		return (ENXIO);
2300 	}
2301 
2302 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2303 		device_printf(adapter->pdev, "device is going UP\n");
2304 
2305 		/* setup interrupts for IO queues */
2306 		rc = ena_setup_io_intr(adapter);
2307 		if (unlikely(rc != 0)) {
2308 			ena_trace(ENA_ALERT, "error setting up IO interrupt\n");
2309 			goto error;
2310 		}
2311 		rc = ena_request_io_irq(adapter);
2312 		if (unlikely(rc != 0)) {
2313 			ena_trace(ENA_ALERT, "err_req_irq\n");
2314 			goto error;
2315 		}
2316 
2317 		/* allocate transmit descriptors */
2318 		rc = ena_setup_all_tx_resources(adapter);
2319 		if (unlikely(rc != 0)) {
2320 			ena_trace(ENA_ALERT, "err_setup_tx\n");
2321 			goto err_setup_tx;
2322 		}
2323 
2324 		/* allocate receive descriptors */
2325 		rc = ena_setup_all_rx_resources(adapter);
2326 		if (unlikely(rc != 0)) {
2327 			ena_trace(ENA_ALERT, "err_setup_rx\n");
2328 			goto err_setup_rx;
2329 		}
2330 
2331 		/* create IO queues for Rx & Tx */
2332 		rc = ena_create_io_queues(adapter);
2333 		if (unlikely(rc != 0)) {
2334 			ena_trace(ENA_ALERT,
2335 			    "create IO queues failed\n");
2336 			goto err_io_que;
2337 		}
2338 
2339 		if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
2340 			if_link_state_change(adapter->ifp, LINK_STATE_UP);
2341 
2342 		rc = ena_up_complete(adapter);
2343 		if (unlikely(rc != 0))
2344 			goto err_up_complete;
2345 
2346 		counter_u64_add(adapter->dev_stats.interface_up, 1);
2347 
2348 		ena_update_hwassist(adapter);
2349 
2350 		if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2351 		    IFF_DRV_OACTIVE);
2352 
2353 		callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
2354 		    ena_timer_service, (void *)adapter, 0);
2355 
2356 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2357 
2358 		ena_unmask_all_io_irqs(adapter);
2359 	}
2360 
2361 	return (0);
2362 
2363 err_up_complete:
2364 	ena_destroy_all_io_queues(adapter);
2365 err_io_que:
2366 	ena_free_all_rx_resources(adapter);
2367 err_setup_rx:
2368 	ena_free_all_tx_resources(adapter);
2369 err_setup_tx:
2370 	ena_free_io_irq(adapter);
2371 error:
2372 	return (rc);
2373 }
2374 
2375 static uint64_t
2376 ena_get_counter(if_t ifp, ift_counter cnt)
2377 {
2378 	struct ena_adapter *adapter;
2379 	struct ena_hw_stats *stats;
2380 
2381 	adapter = if_getsoftc(ifp);
2382 	stats = &adapter->hw_stats;
2383 
2384 	switch (cnt) {
2385 	case IFCOUNTER_IPACKETS:
2386 		return (counter_u64_fetch(stats->rx_packets));
2387 	case IFCOUNTER_OPACKETS:
2388 		return (counter_u64_fetch(stats->tx_packets));
2389 	case IFCOUNTER_IBYTES:
2390 		return (counter_u64_fetch(stats->rx_bytes));
2391 	case IFCOUNTER_OBYTES:
2392 		return (counter_u64_fetch(stats->tx_bytes));
2393 	case IFCOUNTER_IQDROPS:
2394 		return (counter_u64_fetch(stats->rx_drops));
2395 	default:
2396 		return (if_get_counter_default(ifp, cnt));
2397 	}
2398 }
2399 
2400 static int
2401 ena_media_change(if_t ifp)
2402 {
2403 	/* Media Change is not supported by firmware */
2404 	return (0);
2405 }
2406 
2407 static void
2408 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2409 {
2410 	struct ena_adapter *adapter = if_getsoftc(ifp);
2411 	ena_trace(ENA_DBG, "enter\n");
2412 
2413 	mtx_lock(&adapter->global_mtx);
2414 
2415 	ifmr->ifm_status = IFM_AVALID;
2416 	ifmr->ifm_active = IFM_ETHER;
2417 
2418 	if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
2419 		mtx_unlock(&adapter->global_mtx);
2420 		ena_trace(ENA_INFO, "Link is down\n");
2421 		return;
2422 	}
2423 
2424 	ifmr->ifm_status |= IFM_ACTIVE;
2425 	ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
2426 
2427 	mtx_unlock(&adapter->global_mtx);
2428 }
2429 
2430 static void
2431 ena_init(void *arg)
2432 {
2433 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2434 
2435 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2436 		sx_xlock(&adapter->ioctl_sx);
2437 		ena_up(adapter);
2438 		sx_unlock(&adapter->ioctl_sx);
2439 	}
2440 }
2441 
2442 static int
2443 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2444 {
2445 	struct ena_adapter *adapter;
2446 	struct ifreq *ifr;
2447 	int rc;
2448 
2449 	adapter = ifp->if_softc;
2450 	ifr = (struct ifreq *)data;
2451 
2452 	/*
2453 	 * Acquiring lock to prevent from running up and down routines parallel.
2454 	 */
2455 	rc = 0;
2456 	switch (command) {
2457 	case SIOCSIFMTU:
2458 		if (ifp->if_mtu == ifr->ifr_mtu)
2459 			break;
2460 		sx_xlock(&adapter->ioctl_sx);
2461 		ena_down(adapter);
2462 
2463 		ena_change_mtu(ifp, ifr->ifr_mtu);
2464 
2465 		rc = ena_up(adapter);
2466 		sx_unlock(&adapter->ioctl_sx);
2467 		break;
2468 
2469 	case SIOCSIFFLAGS:
2470 		if ((ifp->if_flags & IFF_UP) != 0) {
2471 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2472 				if ((ifp->if_flags & (IFF_PROMISC |
2473 				    IFF_ALLMULTI)) != 0) {
2474 					device_printf(adapter->pdev,
2475 					    "ioctl promisc/allmulti\n");
2476 				}
2477 			} else {
2478 				sx_xlock(&adapter->ioctl_sx);
2479 				rc = ena_up(adapter);
2480 				sx_unlock(&adapter->ioctl_sx);
2481 			}
2482 		} else {
2483 			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
2484 				sx_xlock(&adapter->ioctl_sx);
2485 				ena_down(adapter);
2486 				sx_unlock(&adapter->ioctl_sx);
2487 			}
2488 		}
2489 		break;
2490 
2491 	case SIOCADDMULTI:
2492 	case SIOCDELMULTI:
2493 		break;
2494 
2495 	case SIOCSIFMEDIA:
2496 	case SIOCGIFMEDIA:
2497 		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2498 		break;
2499 
2500 	case SIOCSIFCAP:
2501 		{
2502 			int reinit = 0;
2503 
2504 			if (ifr->ifr_reqcap != ifp->if_capenable) {
2505 				ifp->if_capenable = ifr->ifr_reqcap;
2506 				reinit = 1;
2507 			}
2508 
2509 			if ((reinit != 0) &&
2510 			    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
2511 				sx_xlock(&adapter->ioctl_sx);
2512 				ena_down(adapter);
2513 				rc = ena_up(adapter);
2514 				sx_unlock(&adapter->ioctl_sx);
2515 			}
2516 		}
2517 
2518 		break;
2519 	default:
2520 		rc = ether_ioctl(ifp, command, data);
2521 		break;
2522 	}
2523 
2524 	return (rc);
2525 }
2526 
2527 static int
2528 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2529 {
2530 	int caps = 0;
2531 
2532 	if ((feat->offload.tx &
2533 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2534 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2535 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2536 		caps |= IFCAP_TXCSUM;
2537 
2538 	if ((feat->offload.tx &
2539 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2540 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2541 		caps |= IFCAP_TXCSUM_IPV6;
2542 
2543 	if ((feat->offload.tx &
2544 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2545 		caps |= IFCAP_TSO4;
2546 
2547 	if ((feat->offload.tx &
2548 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2549 		caps |= IFCAP_TSO6;
2550 
2551 	if ((feat->offload.rx_supported &
2552 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2553 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2554 		caps |= IFCAP_RXCSUM;
2555 
2556 	if ((feat->offload.rx_supported &
2557 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2558 		caps |= IFCAP_RXCSUM_IPV6;
2559 
2560 	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2561 
2562 	return (caps);
2563 }
2564 
2565 static void
2566 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2567 {
2568 
2569 	host_info->supported_network_features[0] =
2570 	    (uint32_t)if_getcapabilities(ifp);
2571 }
2572 
2573 static void
2574 ena_update_hwassist(struct ena_adapter *adapter)
2575 {
2576 	if_t ifp = adapter->ifp;
2577 	uint32_t feat = adapter->tx_offload_cap;
2578 	int cap = if_getcapenable(ifp);
2579 	int flags = 0;
2580 
2581 	if_clearhwassist(ifp);
2582 
2583 	if ((cap & IFCAP_TXCSUM) != 0) {
2584 		if ((feat &
2585 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2586 			flags |= CSUM_IP;
2587 		if ((feat &
2588 		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2589 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2590 			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2591 	}
2592 
2593 	if ((cap & IFCAP_TXCSUM_IPV6) != 0)
2594 		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2595 
2596 	if ((cap & IFCAP_TSO4) != 0)
2597 		flags |= CSUM_IP_TSO;
2598 
2599 	if ((cap & IFCAP_TSO6) != 0)
2600 		flags |= CSUM_IP6_TSO;
2601 
2602 	if_sethwassistbits(ifp, flags, 0);
2603 }
2604 
2605 static int
2606 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2607     struct ena_com_dev_get_features_ctx *feat)
2608 {
2609 	if_t ifp;
2610 	int caps = 0;
2611 
2612 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2613 	if (unlikely(ifp == NULL)) {
2614 		ena_trace(ENA_ALERT, "can not allocate ifnet structure\n");
2615 		return (ENXIO);
2616 	}
2617 	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2618 	if_setdev(ifp, pdev);
2619 	if_setsoftc(ifp, adapter);
2620 
2621 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2622 	if_setinitfn(ifp, ena_init);
2623 	if_settransmitfn(ifp, ena_mq_start);
2624 	if_setqflushfn(ifp, ena_qflush);
2625 	if_setioctlfn(ifp, ena_ioctl);
2626 	if_setgetcounterfn(ifp, ena_get_counter);
2627 
2628 	if_setsendqlen(ifp, adapter->tx_ring_size);
2629 	if_setsendqready(ifp);
2630 	if_setmtu(ifp, ETHERMTU);
2631 	if_setbaudrate(ifp, 0);
2632 	/* Zeroize capabilities... */
2633 	if_setcapabilities(ifp, 0);
2634 	if_setcapenable(ifp, 0);
2635 	/* check hardware support */
2636 	caps = ena_get_dev_offloads(feat);
2637 	/* ... and set them */
2638 	if_setcapabilitiesbit(ifp, caps, 0);
2639 
2640 	/* TSO parameters */
2641 	ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2642 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2643 	ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2644 	ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2645 
2646 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2647 	if_setcapenable(ifp, if_getcapabilities(ifp));
2648 
2649 	/*
2650 	 * Specify the media types supported by this adapter and register
2651 	 * callbacks to update media and link information
2652 	 */
2653 	ifmedia_init(&adapter->media, IFM_IMASK,
2654 	    ena_media_change, ena_media_status);
2655 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2656 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2657 
2658 	ether_ifattach(ifp, adapter->mac_addr);
2659 
2660 	return (0);
2661 }
2662 
2663 static void
2664 ena_down(struct ena_adapter *adapter)
2665 {
2666 	int rc;
2667 
2668 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
2669 		device_printf(adapter->pdev, "device is going DOWN\n");
2670 
2671 		callout_drain(&adapter->timer_service);
2672 
2673 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
2674 		if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2675 		    IFF_DRV_RUNNING);
2676 
2677 		ena_free_io_irq(adapter);
2678 
2679 		if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
2680 			rc = ena_com_dev_reset(adapter->ena_dev,
2681 			    adapter->reset_reason);
2682 			if (unlikely(rc != 0))
2683 				device_printf(adapter->pdev,
2684 				    "Device reset failed\n");
2685 		}
2686 
2687 		ena_destroy_all_io_queues(adapter);
2688 
2689 		ena_free_all_tx_bufs(adapter);
2690 		ena_free_all_rx_bufs(adapter);
2691 		ena_free_all_tx_resources(adapter);
2692 		ena_free_all_rx_resources(adapter);
2693 
2694 		counter_u64_add(adapter->dev_stats.interface_down, 1);
2695 	}
2696 }
2697 
2698 static void
2699 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
2700 {
2701 	struct ena_com_tx_meta *ena_meta;
2702 	struct ether_vlan_header *eh;
2703 	struct mbuf *mbuf_next;
2704 	u32 mss;
2705 	bool offload;
2706 	uint16_t etype;
2707 	int ehdrlen;
2708 	struct ip *ip;
2709 	int iphlen;
2710 	struct tcphdr *th;
2711 	int offset;
2712 
2713 	offload = false;
2714 	ena_meta = &ena_tx_ctx->ena_meta;
2715 	mss = mbuf->m_pkthdr.tso_segsz;
2716 
2717 	if (mss != 0)
2718 		offload = true;
2719 
2720 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2721 		offload = true;
2722 
2723 	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
2724 		offload = true;
2725 
2726 	if (!offload) {
2727 		ena_tx_ctx->meta_valid = 0;
2728 		return;
2729 	}
2730 
2731 	/* Determine where frame payload starts. */
2732 	eh = mtod(mbuf, struct ether_vlan_header *);
2733 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2734 		etype = ntohs(eh->evl_proto);
2735 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2736 	} else {
2737 		etype = ntohs(eh->evl_encap_proto);
2738 		ehdrlen = ETHER_HDR_LEN;
2739 	}
2740 
2741 	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
2742 	ip = (struct ip *)(mtodo(mbuf_next, offset));
2743 	iphlen = ip->ip_hl << 2;
2744 
2745 	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
2746 	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
2747 
2748 	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2749 		ena_tx_ctx->l3_csum_enable = 1;
2750 	}
2751 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2752 		ena_tx_ctx->tso_enable = 1;
2753 		ena_meta->l4_hdr_len = (th->th_off);
2754 	}
2755 
2756 	switch (etype) {
2757 	case ETHERTYPE_IP:
2758 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2759 		if ((ip->ip_off & htons(IP_DF)) != 0)
2760 			ena_tx_ctx->df = 1;
2761 		break;
2762 	case ETHERTYPE_IPV6:
2763 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2764 
2765 	default:
2766 		break;
2767 	}
2768 
2769 	if (ip->ip_p == IPPROTO_TCP) {
2770 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2771 		if ((mbuf->m_pkthdr.csum_flags &
2772 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
2773 			ena_tx_ctx->l4_csum_enable = 1;
2774 		else
2775 			ena_tx_ctx->l4_csum_enable = 0;
2776 	} else if (ip->ip_p == IPPROTO_UDP) {
2777 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2778 		if ((mbuf->m_pkthdr.csum_flags &
2779 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
2780 			ena_tx_ctx->l4_csum_enable = 1;
2781 		else
2782 			ena_tx_ctx->l4_csum_enable = 0;
2783 	} else {
2784 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
2785 		ena_tx_ctx->l4_csum_enable = 0;
2786 	}
2787 
2788 	ena_meta->mss = mss;
2789 	ena_meta->l3_hdr_len = iphlen;
2790 	ena_meta->l3_hdr_offset = ehdrlen;
2791 	ena_tx_ctx->meta_valid = 1;
2792 }
2793 
2794 static int
2795 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2796 {
2797 	struct ena_adapter *adapter;
2798 	struct mbuf *collapsed_mbuf;
2799 	int num_frags;
2800 
2801 	adapter = tx_ring->adapter;
2802 	num_frags = ena_mbuf_count(*mbuf);
2803 
2804 	/* One segment must be reserved for configuration descriptor. */
2805 	if (num_frags < adapter->max_tx_sgl_size)
2806 		return (0);
2807 	counter_u64_add(tx_ring->tx_stats.collapse, 1);
2808 
2809 	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
2810 	    adapter->max_tx_sgl_size - 1);
2811 	if (unlikely(collapsed_mbuf == NULL)) {
2812 		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
2813 		return (ENOMEM);
2814 	}
2815 
2816 	/* If mbuf was collapsed succesfully, original mbuf is released. */
2817 	*mbuf = collapsed_mbuf;
2818 
2819 	return (0);
2820 }
2821 
2822 static void
2823 ena_dmamap_llq(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2824 {
2825 	struct ena_com_buf *ena_buf = arg;
2826 
2827 	if (unlikely(error != 0)) {
2828 		ena_buf->paddr = 0;
2829 		return;
2830 	}
2831 
2832 	KASSERT(nseg == 1, ("Invalid num of segments for LLQ dma"));
2833 
2834 	ena_buf->paddr = segs->ds_addr;
2835 	ena_buf->len = segs->ds_len;
2836 }
2837 
2838 static int
2839 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
2840     struct mbuf *mbuf, void **push_hdr, u16 *header_len)
2841 {
2842 	struct ena_adapter *adapter = tx_ring->adapter;
2843 	struct ena_com_buf *ena_buf;
2844 	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
2845 	uint32_t mbuf_head_len, frag_len;
2846 	uint16_t push_len = 0;
2847 	uint16_t delta = 0;
2848 	int i, rc, nsegs;
2849 
2850 	mbuf_head_len = mbuf->m_len;
2851 	tx_info->mbuf = mbuf;
2852 	ena_buf = tx_info->bufs;
2853 
2854 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2855 		/*
2856 		 * When the device is LLQ mode, the driver will copy
2857 		 * the header into the device memory space.
2858 		 * the ena_com layer assumes the header is in a linear
2859 		 * memory space.
2860 		 * This assumption might be wrong since part of the header
2861 		 * can be in the fragmented buffers.
2862 		 * First check if header fits in the mbuf. If not, copy it to
2863 		 * separate buffer that will be holding linearized data.
2864 		 */
2865 		push_len = min_t(uint32_t, mbuf->m_pkthdr.len,
2866 		    tx_ring->tx_max_header_size);
2867 		*header_len = push_len;
2868 		/* If header is in linear space, just point into mbuf's data. */
2869 		if (likely(push_len <= mbuf_head_len)) {
2870 			*push_hdr = mbuf->m_data;
2871 		/*
2872 		 * Otherwise, copy whole portion of header from multiple mbufs
2873 		 * to intermediate buffer.
2874 		 */
2875 		} else {
2876 			m_copydata(mbuf, 0, push_len,
2877 			    tx_ring->push_buf_intermediate_buf);
2878 			*push_hdr = tx_ring->push_buf_intermediate_buf;
2879 
2880 			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
2881 			delta = push_len - mbuf_head_len;
2882 		}
2883 
2884 		ena_trace(ENA_DBG | ENA_TXPTH,
2885 		    "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
2886 		    mbuf, *push_hdr, push_len);
2887 
2888 		/*
2889 		* If header was in linear memory space, map for the dma rest of the data
2890 		* in the first mbuf of the mbuf chain.
2891 		*/
2892 		if (mbuf_head_len > push_len) {
2893 			rc = bus_dmamap_load(adapter->tx_buf_tag,
2894 			    tx_info->map_head,
2895 			mbuf->m_data + push_len, mbuf_head_len - push_len,
2896 			ena_dmamap_llq, ena_buf, BUS_DMA_NOWAIT);
2897 			if (unlikely((rc != 0) || (ena_buf->paddr == 0)))
2898 				goto single_dma_error;
2899 
2900 			ena_buf++;
2901 			tx_info->num_of_bufs++;
2902 
2903 			tx_info->head_mapped = true;
2904 		}
2905 		mbuf = mbuf->m_next;
2906 	} else {
2907 		*push_hdr = NULL;
2908 		/*
2909 		* header_len is just a hint for the device. Because FreeBSD is not
2910 		* giving us information about packet header length and it is not
2911 		* guaranteed that all packet headers will be in the 1st mbuf, setting
2912 		* header_len to 0 is making the device ignore this value and resolve
2913 		* header on it's own.
2914 		*/
2915 		*header_len = 0;
2916 	}
2917 
2918 	/*
2919 	 * If header is in non linear space (delta > 0), then skip mbufs
2920 	 * containing header and map the last one containing both header and the
2921 	 * packet data.
2922 	 * The first segment is already counted in.
2923 	 * If LLQ is not supported, the loop will be skipped.
2924 	 */
2925 	while (delta > 0) {
2926 		frag_len = mbuf->m_len;
2927 
2928 		/*
2929 		 * If whole segment contains header just move to the
2930 		 * next one and reduce delta.
2931 		 */
2932 		if (unlikely(delta >= frag_len)) {
2933 			delta -= frag_len;
2934 		} else {
2935 			/*
2936 			 * Map rest of the packet data that was contained in
2937 			 * the mbuf.
2938 			 */
2939 			rc = bus_dmamap_load(adapter->tx_buf_tag,
2940 			    tx_info->map_head, mbuf->m_data + delta,
2941 			    frag_len - delta, ena_dmamap_llq, ena_buf,
2942 			    BUS_DMA_NOWAIT);
2943 			if (unlikely((rc != 0) || (ena_buf->paddr == 0)))
2944 				goto single_dma_error;
2945 
2946 			ena_buf++;
2947 			tx_info->num_of_bufs++;
2948 			tx_info->head_mapped = true;
2949 
2950 			delta = 0;
2951 		}
2952 
2953 		mbuf = mbuf->m_next;
2954 	}
2955 
2956 	if (mbuf == NULL) {
2957 		return (0);
2958 	}
2959 
2960 	/* Map rest of the mbufs */
2961 	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map_seg, mbuf,
2962 	    segs, &nsegs, BUS_DMA_NOWAIT);
2963 	if (unlikely((rc != 0) || (nsegs == 0))) {
2964 		ena_trace(ENA_WARNING,
2965 		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
2966 		goto dma_error;
2967 	}
2968 
2969 	for (i = 0; i < nsegs; i++) {
2970 		ena_buf->len = segs[i].ds_len;
2971 		ena_buf->paddr = segs[i].ds_addr;
2972 		ena_buf++;
2973 	}
2974 	tx_info->num_of_bufs += nsegs;
2975 	tx_info->seg_mapped = true;
2976 
2977 	return (0);
2978 
2979 dma_error:
2980 	if (tx_info->head_mapped == true)
2981 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_head);
2982 single_dma_error:
2983 	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
2984 	tx_info->mbuf = NULL;
2985 	return (rc);
2986 }
2987 
2988 static int
2989 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2990 {
2991 	struct ena_adapter *adapter;
2992 	struct ena_tx_buffer *tx_info;
2993 	struct ena_com_tx_ctx ena_tx_ctx;
2994 	struct ena_com_dev *ena_dev;
2995 	struct ena_com_io_sq* io_sq;
2996 	void *push_hdr;
2997 	uint16_t next_to_use;
2998 	uint16_t req_id;
2999 	uint16_t ena_qid;
3000 	uint16_t header_len;
3001 	int rc;
3002 	int nb_hw_desc;
3003 
3004 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
3005 	adapter = tx_ring->que->adapter;
3006 	ena_dev = adapter->ena_dev;
3007 	io_sq = &ena_dev->io_sq_queues[ena_qid];
3008 
3009 	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
3010 	if (unlikely(rc != 0)) {
3011 		ena_trace(ENA_WARNING,
3012 		    "Failed to collapse mbuf! err: %d\n", rc);
3013 		return (rc);
3014 	}
3015 
3016 	ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
3017 
3018 	next_to_use = tx_ring->next_to_use;
3019 	req_id = tx_ring->free_tx_ids[next_to_use];
3020 	tx_info = &tx_ring->tx_buffer_info[req_id];
3021 	tx_info->num_of_bufs = 0;
3022 
3023 	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
3024 	if (unlikely(rc != 0)) {
3025 		ena_trace(ENA_WARNING, "Failed to map TX mbuf\n");
3026 		return (rc);
3027 	}
3028 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
3029 	ena_tx_ctx.ena_bufs = tx_info->bufs;
3030 	ena_tx_ctx.push_header = push_hdr;
3031 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3032 	ena_tx_ctx.req_id = req_id;
3033 	ena_tx_ctx.header_len = header_len;
3034 
3035 	/* Set flags and meta data */
3036 	ena_tx_csum(&ena_tx_ctx, *mbuf);
3037 
3038 	if (tx_ring->acum_pkts == DB_THRESHOLD ||
3039 	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
3040 		ena_trace(ENA_DBG | ENA_TXPTH,
3041 		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
3042 		    tx_ring->que->id);
3043 		wmb();
3044 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3045 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
3046 		tx_ring->acum_pkts = 0;
3047 	}
3048 
3049 	/* Prepare the packet's descriptors and send them to device */
3050 	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
3051 	if (unlikely(rc != 0)) {
3052 		if (likely(rc == ENA_COM_NO_MEM)) {
3053 			ena_trace(ENA_DBG | ENA_TXPTH,
3054 			    "tx ring[%d] if out of space\n", tx_ring->que->id);
3055 		} else {
3056 			device_printf(adapter->pdev,
3057 			    "failed to prepare tx bufs\n");
3058 		}
3059 		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
3060 		goto dma_error;
3061 	}
3062 
3063 	counter_enter();
3064 	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
3065 	counter_u64_add_protected(tx_ring->tx_stats.bytes,
3066 	    (*mbuf)->m_pkthdr.len);
3067 
3068 	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
3069 	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
3070 	    (*mbuf)->m_pkthdr.len);
3071 	counter_exit();
3072 
3073 	tx_info->tx_descs = nb_hw_desc;
3074 	getbinuptime(&tx_info->timestamp);
3075 	tx_info->print_once = true;
3076 
3077 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
3078 	    tx_ring->ring_size);
3079 
3080 	/* stop the queue when no more space available, the packet can have up
3081 	 * to sgl_size + 2. one for the meta descriptor and one for header
3082 	 * (if the header is larger than tx_max_header_size).
3083 	 */
3084 	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3085 	    adapter->max_tx_sgl_size + 2))) {
3086 		ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
3087 		    tx_ring->que->id);
3088 
3089 		tx_ring->running = false;
3090 		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
3091 
3092 		/* There is a rare condition where this function decides to
3093 		 * stop the queue but meanwhile tx_cleanup() updates
3094 		 * next_to_completion and terminates.
3095 		 * The queue will remain stopped forever.
3096 		 * To solve this issue this function performs mb(), checks
3097 		 * the wakeup condition and wakes up the queue if needed.
3098 		 */
3099 		mb();
3100 
3101 		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3102 		    ENA_TX_RESUME_THRESH)) {
3103 			tx_ring->running = true;
3104 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
3105 		}
3106 	}
3107 
3108 	if (tx_info->head_mapped == true)
3109 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_head,
3110 		    BUS_DMASYNC_PREWRITE);
3111 	if (tx_info->seg_mapped == true)
3112 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map_seg,
3113 		    BUS_DMASYNC_PREWRITE);
3114 
3115 	return (0);
3116 
3117 dma_error:
3118 	tx_info->mbuf = NULL;
3119 	if (tx_info->seg_mapped == true) {
3120 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_seg);
3121 		tx_info->seg_mapped = false;
3122 	}
3123 	if (tx_info->head_mapped == true) {
3124 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map_head);
3125 		tx_info->head_mapped = false;
3126 	}
3127 
3128 	return (rc);
3129 }
3130 
3131 static void
3132 ena_start_xmit(struct ena_ring *tx_ring)
3133 {
3134 	struct mbuf *mbuf;
3135 	struct ena_adapter *adapter = tx_ring->adapter;
3136 	struct ena_com_io_sq* io_sq;
3137 	int ena_qid;
3138 	int ret = 0;
3139 
3140 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
3141 		return;
3142 
3143 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
3144 		return;
3145 
3146 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
3147 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
3148 
3149 	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
3150 		ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
3151 		    " header csum flags %#jx\n",
3152 		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
3153 
3154 		if (unlikely(!tx_ring->running)) {
3155 			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3156 			break;
3157 		}
3158 
3159 		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
3160 			if (ret == ENA_COM_NO_MEM) {
3161 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3162 			} else if (ret == ENA_COM_NO_SPACE) {
3163 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
3164 			} else {
3165 				m_freem(mbuf);
3166 				drbr_advance(adapter->ifp, tx_ring->br);
3167 			}
3168 
3169 			break;
3170 		}
3171 
3172 		drbr_advance(adapter->ifp, tx_ring->br);
3173 
3174 		if (unlikely((if_getdrvflags(adapter->ifp) &
3175 		    IFF_DRV_RUNNING) == 0))
3176 			return;
3177 
3178 		tx_ring->acum_pkts++;
3179 
3180 		BPF_MTAP(adapter->ifp, mbuf);
3181 	}
3182 
3183 	if (likely(tx_ring->acum_pkts != 0)) {
3184 		wmb();
3185 		/* Trigger the dma engine */
3186 		ena_com_write_sq_doorbell(io_sq);
3187 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
3188 		tx_ring->acum_pkts = 0;
3189 	}
3190 
3191 	if (unlikely(!tx_ring->running))
3192 		taskqueue_enqueue(tx_ring->que->cleanup_tq,
3193 		    &tx_ring->que->cleanup_task);
3194 }
3195 
3196 static void
3197 ena_deferred_mq_start(void *arg, int pending)
3198 {
3199 	struct ena_ring *tx_ring = (struct ena_ring *)arg;
3200 	struct ifnet *ifp = tx_ring->adapter->ifp;
3201 
3202 	while (!drbr_empty(ifp, tx_ring->br) &&
3203 	    tx_ring->running &&
3204 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
3205 		ENA_RING_MTX_LOCK(tx_ring);
3206 		ena_start_xmit(tx_ring);
3207 		ENA_RING_MTX_UNLOCK(tx_ring);
3208 	}
3209 }
3210 
3211 static int
3212 ena_mq_start(if_t ifp, struct mbuf *m)
3213 {
3214 	struct ena_adapter *adapter = ifp->if_softc;
3215 	struct ena_ring *tx_ring;
3216 	int ret, is_drbr_empty;
3217 	uint32_t i;
3218 
3219 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
3220 		return (ENODEV);
3221 
3222 	/* Which queue to use */
3223 	/*
3224 	 * If everything is setup correctly, it should be the
3225 	 * same bucket that the current CPU we're on is.
3226 	 * It should improve performance.
3227 	 */
3228 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
3229 		i = m->m_pkthdr.flowid % adapter->num_queues;
3230 	} else {
3231 		i = curcpu % adapter->num_queues;
3232 	}
3233 	tx_ring = &adapter->tx_ring[i];
3234 
3235 	/* Check if drbr is empty before putting packet */
3236 	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
3237 	ret = drbr_enqueue(ifp, tx_ring->br, m);
3238 	if (unlikely(ret != 0)) {
3239 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
3240 		return (ret);
3241 	}
3242 
3243 	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
3244 		ena_start_xmit(tx_ring);
3245 		ENA_RING_MTX_UNLOCK(tx_ring);
3246 	} else {
3247 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
3248 	}
3249 
3250 	return (0);
3251 }
3252 
3253 static void
3254 ena_qflush(if_t ifp)
3255 {
3256 	struct ena_adapter *adapter = ifp->if_softc;
3257 	struct ena_ring *tx_ring = adapter->tx_ring;
3258 	int i;
3259 
3260 	for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
3261 		if (!drbr_empty(ifp, tx_ring->br)) {
3262 			ENA_RING_MTX_LOCK(tx_ring);
3263 			drbr_flush(ifp, tx_ring->br);
3264 			ENA_RING_MTX_UNLOCK(tx_ring);
3265 		}
3266 
3267 	if_qflush(ifp);
3268 }
3269 
3270 static int
3271 ena_calc_io_queue_num(struct ena_adapter *adapter,
3272     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3273 {
3274 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3275 	int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num;
3276 
3277 	/* Regular queues capabilities */
3278 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3279 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3280 		    &get_feat_ctx->max_queue_ext.max_queue_ext;
3281 		io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
3282 			max_queue_ext->max_rx_cq_num);
3283 
3284 		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3285 		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3286 	} else {
3287 		struct ena_admin_queue_feature_desc *max_queues =
3288 		    &get_feat_ctx->max_queues;
3289 		io_tx_sq_num = max_queues->max_sq_num;
3290 		io_tx_cq_num = max_queues->max_cq_num;
3291 		io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
3292 	}
3293 
3294 	/* In case of LLQ use the llq fields for the tx SQ/CQ */
3295 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3296 		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3297 
3298 	io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
3299 	io_queue_num = min_t(int, io_queue_num, io_rx_num);
3300 	io_queue_num = min_t(int, io_queue_num, io_tx_sq_num);
3301 	io_queue_num = min_t(int, io_queue_num, io_tx_cq_num);
3302 	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
3303 	io_queue_num = min_t(int, io_queue_num,
3304 	    pci_msix_count(adapter->pdev) - 1);
3305 
3306 	return (io_queue_num);
3307 }
3308 
3309 static int
3310 ena_enable_wc(struct resource *res)
3311 {
3312 #if defined(__i386) || defined(__amd64)
3313 	vm_offset_t va;
3314 	vm_size_t len;
3315 	int rc;
3316 
3317 	va = (vm_offset_t)rman_get_virtual(res);
3318 	len = rman_get_size(res);
3319 	/* Enable write combining */
3320 	rc = pmap_change_attr(va, len, PAT_WRITE_COMBINING);
3321 	if (unlikely(rc != 0)) {
3322 		ena_trace(ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
3323 		return (rc);
3324 	}
3325 
3326 	return (0);
3327 #endif
3328 	return (EOPNOTSUPP);
3329 }
3330 
3331 static int
3332 ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
3333     struct ena_admin_feature_llq_desc *llq,
3334     struct ena_llq_configurations *llq_default_configurations)
3335 {
3336 	struct ena_adapter *adapter = device_get_softc(pdev);
3337 	int rc, rid;
3338 	uint32_t llq_feature_mask;
3339 
3340 	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3341 	if (!(ena_dev->supported_features & llq_feature_mask)) {
3342 		device_printf(pdev,
3343 		    "LLQ is not supported. Fallback to host mode policy.\n");
3344 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3345 		return (0);
3346 	}
3347 
3348 	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3349 	if (unlikely(rc != 0)) {
3350 		device_printf(pdev, "Failed to configure the device mode. "
3351 		    "Fallback to host mode policy.\n");
3352 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3353 		return (0);
3354 	}
3355 
3356 	/* Nothing to config, exit */
3357 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
3358 		return (0);
3359 
3360 	/* Try to allocate resources for LLQ bar */
3361 	rid = PCIR_BAR(ENA_MEM_BAR);
3362 	adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
3363 	    &rid, RF_ACTIVE);
3364 	if (unlikely(adapter->memory == NULL)) {
3365 		device_printf(pdev, "unable to allocate LLQ bar resource. "
3366 		    "Fallback to host mode policy.\n");
3367 		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3368 		return (0);
3369 	}
3370 
3371 	/* Enable write combining for better LLQ performance */
3372 	rc = ena_enable_wc(adapter->memory);
3373 	if (unlikely(rc != 0)) {
3374 		device_printf(pdev, "failed to enable write combining.\n");
3375 		return (rc);
3376 	}
3377 
3378 	/*
3379 	 * Save virtual address of the device's memory region
3380 	 * for the ena_com layer.
3381 	 */
3382 	ena_dev->mem_bar = rman_get_virtual(adapter->memory);
3383 
3384 	return (0);
3385 }
3386 
3387 static inline
3388 void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3389 {
3390 	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3391 	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3392 	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3393 	llq_config->llq_num_decs_before_header =
3394 	    ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3395 	llq_config->llq_ring_entry_size_value = 128;
3396 }
3397 
3398 static int
3399 ena_calc_queue_size(struct ena_adapter *adapter,
3400     struct ena_calc_queue_size_ctx *ctx)
3401 {
3402 	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
3403 	struct ena_com_dev *ena_dev = ctx->ena_dev;
3404 	uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
3405 	uint32_t rx_queue_size = adapter->rx_ring_size;
3406 
3407 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3408 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3409 		    &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
3410 		rx_queue_size = min_t(uint32_t, rx_queue_size,
3411 		    max_queue_ext->max_rx_cq_depth);
3412 		rx_queue_size = min_t(uint32_t, rx_queue_size,
3413 		    max_queue_ext->max_rx_sq_depth);
3414 		tx_queue_size = min_t(uint32_t, tx_queue_size,
3415 		    max_queue_ext->max_tx_cq_depth);
3416 
3417 		if (ena_dev->tx_mem_queue_type ==
3418 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
3419 			tx_queue_size = min_t(uint32_t, tx_queue_size,
3420 			    llq->max_llq_depth);
3421 		else
3422 			tx_queue_size = min_t(uint32_t, tx_queue_size,
3423 			    max_queue_ext->max_tx_sq_depth);
3424 
3425 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3426 		    max_queue_ext->max_per_packet_rx_descs);
3427 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3428 		    max_queue_ext->max_per_packet_tx_descs);
3429 	} else {
3430 		struct ena_admin_queue_feature_desc *max_queues =
3431 		    &ctx->get_feat_ctx->max_queues;
3432 		rx_queue_size = min_t(uint32_t, rx_queue_size,
3433 		    max_queues->max_cq_depth);
3434 		rx_queue_size = min_t(uint32_t, rx_queue_size,
3435 		    max_queues->max_sq_depth);
3436 		tx_queue_size = min_t(uint32_t, tx_queue_size,
3437 		    max_queues->max_cq_depth);
3438 
3439 		if (ena_dev->tx_mem_queue_type ==
3440 		    ENA_ADMIN_PLACEMENT_POLICY_DEV)
3441 			tx_queue_size = min_t(uint32_t, tx_queue_size,
3442 			    llq->max_llq_depth);
3443 		else
3444 			tx_queue_size = min_t(uint32_t, tx_queue_size,
3445 			    max_queues->max_sq_depth);
3446 
3447 		ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3448 		    max_queues->max_packet_tx_descs);
3449 		ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3450 		    max_queues->max_packet_rx_descs);
3451 	}
3452 
3453 	/* round down to the nearest power of 2 */
3454 	rx_queue_size = 1 << (fls(rx_queue_size) - 1);
3455 	tx_queue_size = 1 << (fls(tx_queue_size) - 1);
3456 
3457 	if (unlikely(rx_queue_size == 0 || tx_queue_size == 0)) {
3458 		device_printf(ctx->pdev, "Invalid queue size\n");
3459 		return (EFAULT);
3460 	}
3461 
3462 	ctx->rx_queue_size = rx_queue_size;
3463 	ctx->tx_queue_size = tx_queue_size;
3464 
3465 	return (0);
3466 }
3467 
3468 static int
3469 ena_handle_updated_queues(struct ena_adapter *adapter,
3470     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3471 {
3472 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3473 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
3474 	device_t pdev = adapter->pdev;
3475 	bool are_queues_changed = false;
3476 	int io_queue_num, rc;
3477 
3478 	calc_queue_ctx.ena_dev = ena_dev;
3479 	calc_queue_ctx.get_feat_ctx = get_feat_ctx;
3480 	calc_queue_ctx.pdev = pdev;
3481 
3482 	io_queue_num = ena_calc_io_queue_num(adapter, get_feat_ctx);
3483 	rc = ena_calc_queue_size(adapter, &calc_queue_ctx);
3484 	if (unlikely(rc != 0 || io_queue_num <= 0))
3485 		return EFAULT;
3486 
3487 	if (adapter->tx_ring->buf_ring_size != adapter->buf_ring_size)
3488 		are_queues_changed = true;
3489 
3490 	if (unlikely(adapter->tx_ring_size > calc_queue_ctx.tx_queue_size ||
3491 	    adapter->rx_ring_size > calc_queue_ctx.rx_queue_size)) {
3492 		device_printf(pdev,
3493 		    "Not enough resources to allocate requested queue sizes "
3494 		    "(TX,RX)=(%d,%d), falling back to queue sizes "
3495 		    "(TX,RX)=(%d,%d)\n",
3496 		    adapter->tx_ring_size,
3497 		    adapter->rx_ring_size,
3498 		    calc_queue_ctx.tx_queue_size,
3499 		    calc_queue_ctx.rx_queue_size);
3500 		adapter->tx_ring_size = calc_queue_ctx.tx_queue_size;
3501 		adapter->rx_ring_size = calc_queue_ctx.rx_queue_size;
3502 		adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
3503 		adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
3504 		are_queues_changed = true;
3505 	}
3506 
3507 	if (unlikely(adapter->num_queues > io_queue_num)) {
3508 		device_printf(pdev,
3509 		    "Not enough resources to allocate %d queues, "
3510 		    "falling back to %d queues\n",
3511 		    adapter->num_queues, io_queue_num);
3512 		adapter->num_queues = io_queue_num;
3513 		if (ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)) {
3514 			ena_com_rss_destroy(ena_dev);
3515 			rc = ena_rss_init_default(adapter);
3516 			if (unlikely(rc != 0) && (rc != EOPNOTSUPP)) {
3517 				device_printf(pdev, "Cannot init RSS rc: %d\n",
3518 				    rc);
3519 				return (rc);
3520 			}
3521 		}
3522 		are_queues_changed = true;
3523 	}
3524 
3525 	if (unlikely(are_queues_changed)) {
3526 		ena_free_all_io_rings_resources(adapter);
3527 		ena_init_io_rings(adapter);
3528 	}
3529 
3530 	return (0);
3531 }
3532 
3533 static int
3534 ena_rss_init_default(struct ena_adapter *adapter)
3535 {
3536 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3537 	device_t dev = adapter->pdev;
3538 	int qid, rc, i;
3539 
3540 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3541 	if (unlikely(rc != 0)) {
3542 		device_printf(dev, "Cannot init indirect table\n");
3543 		return (rc);
3544 	}
3545 
3546 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3547 		qid = i % adapter->num_queues;
3548 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3549 		    ENA_IO_RXQ_IDX(qid));
3550 		if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3551 			device_printf(dev, "Cannot fill indirect table\n");
3552 			goto err_rss_destroy;
3553 		}
3554 	}
3555 
3556 	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3557 	    ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3558 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3559 		device_printf(dev, "Cannot fill hash function\n");
3560 		goto err_rss_destroy;
3561 	}
3562 
3563 	rc = ena_com_set_default_hash_ctrl(ena_dev);
3564 	if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3565 		device_printf(dev, "Cannot fill hash control\n");
3566 		goto err_rss_destroy;
3567 	}
3568 
3569 	return (0);
3570 
3571 err_rss_destroy:
3572 	ena_com_rss_destroy(ena_dev);
3573 	return (rc);
3574 }
3575 
3576 static void
3577 ena_rss_init_default_deferred(void *arg)
3578 {
3579 	struct ena_adapter *adapter;
3580 	devclass_t dc;
3581 	int max;
3582 	int rc;
3583 
3584 	dc = devclass_find("ena");
3585 	if (unlikely(dc == NULL)) {
3586 		ena_trace(ENA_ALERT, "No devclass ena\n");
3587 		return;
3588 	}
3589 
3590 	max = devclass_get_maxunit(dc);
3591 	while (max-- >= 0) {
3592 		adapter = devclass_get_softc(dc, max);
3593 		if (adapter != NULL) {
3594 			rc = ena_rss_init_default(adapter);
3595 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
3596 			if (unlikely(rc != 0)) {
3597 				device_printf(adapter->pdev,
3598 				    "WARNING: RSS was not properly initialized,"
3599 				    " it will affect bandwidth\n");
3600 				ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
3601 			}
3602 		}
3603 	}
3604 }
3605 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
3606 
3607 static void
3608 ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
3609 {
3610 	struct ena_admin_host_info *host_info;
3611 	uintptr_t rid;
3612 	int rc;
3613 
3614 	/* Allocate only the host info */
3615 	rc = ena_com_allocate_host_info(ena_dev);
3616 	if (unlikely(rc != 0)) {
3617 		ena_trace(ENA_ALERT, "Cannot allocate host info\n");
3618 		return;
3619 	}
3620 
3621 	host_info = ena_dev->host_attr.host_info;
3622 
3623 	if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
3624 		host_info->bdf = rid;
3625 	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
3626 	host_info->kernel_ver = osreldate;
3627 
3628 	sprintf(host_info->kernel_ver_str, "%d", osreldate);
3629 	host_info->os_dist = 0;
3630 	strncpy(host_info->os_dist_str, osrelease,
3631 	    sizeof(host_info->os_dist_str) - 1);
3632 
3633 	host_info->driver_version =
3634 		(DRV_MODULE_VER_MAJOR) |
3635 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3636 		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3637 	host_info->num_cpus = mp_ncpus;
3638 
3639 	rc = ena_com_set_host_attributes(ena_dev);
3640 	if (unlikely(rc != 0)) {
3641 		if (rc == EOPNOTSUPP)
3642 			ena_trace(ENA_WARNING, "Cannot set host attributes\n");
3643 		else
3644 			ena_trace(ENA_ALERT, "Cannot set host attributes\n");
3645 
3646 		goto err;
3647 	}
3648 
3649 	return;
3650 
3651 err:
3652 	ena_com_delete_host_info(ena_dev);
3653 }
3654 
3655 static int
3656 ena_device_init(struct ena_adapter *adapter, device_t pdev,
3657     struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
3658 {
3659 	struct ena_com_dev* ena_dev = adapter->ena_dev;
3660 	bool readless_supported;
3661 	uint32_t aenq_groups;
3662 	int dma_width;
3663 	int rc;
3664 
3665 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3666 	if (unlikely(rc != 0)) {
3667 		device_printf(pdev, "failed to init mmio read less\n");
3668 		return (rc);
3669 	}
3670 
3671 	/*
3672 	 * The PCIe configuration space revision id indicate if mmio reg
3673 	 * read is disabled
3674 	 */
3675 	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
3676 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3677 
3678 	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3679 	if (unlikely(rc != 0)) {
3680 		device_printf(pdev, "Can not reset device\n");
3681 		goto err_mmio_read_less;
3682 	}
3683 
3684 	rc = ena_com_validate_version(ena_dev);
3685 	if (unlikely(rc != 0)) {
3686 		device_printf(pdev, "device version is too low\n");
3687 		goto err_mmio_read_less;
3688 	}
3689 
3690 	dma_width = ena_com_get_dma_width(ena_dev);
3691 	if (unlikely(dma_width < 0)) {
3692 		device_printf(pdev, "Invalid dma width value %d", dma_width);
3693 		rc = dma_width;
3694 		goto err_mmio_read_less;
3695 	}
3696 	adapter->dma_width = dma_width;
3697 
3698 	/* ENA admin level init */
3699 	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3700 	if (unlikely(rc != 0)) {
3701 		device_printf(pdev,
3702 		    "Can not initialize ena admin queue with device\n");
3703 		goto err_mmio_read_less;
3704 	}
3705 
3706 	/*
3707 	 * To enable the msix interrupts the driver needs to know the number
3708 	 * of queues. So the driver uses polling mode to retrieve this
3709 	 * information
3710 	 */
3711 	ena_com_set_admin_polling_mode(ena_dev, true);
3712 
3713 	ena_config_host_info(ena_dev, pdev);
3714 
3715 	/* Get Device Attributes */
3716 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3717 	if (unlikely(rc != 0)) {
3718 		device_printf(pdev,
3719 		    "Cannot get attribute for ena device rc: %d\n", rc);
3720 		goto err_admin_init;
3721 	}
3722 
3723 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3724 	    BIT(ENA_ADMIN_FATAL_ERROR) |
3725 	    BIT(ENA_ADMIN_WARNING) |
3726 	    BIT(ENA_ADMIN_NOTIFICATION) |
3727 	    BIT(ENA_ADMIN_KEEP_ALIVE);
3728 
3729 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3730 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3731 	if (unlikely(rc != 0)) {
3732 		device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
3733 		goto err_admin_init;
3734 	}
3735 
3736 	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3737 
3738 	return (0);
3739 
3740 err_admin_init:
3741 	ena_com_delete_host_info(ena_dev);
3742 	ena_com_admin_destroy(ena_dev);
3743 err_mmio_read_less:
3744 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3745 
3746 	return (rc);
3747 }
3748 
3749 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
3750     int io_vectors)
3751 {
3752 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3753 	int rc;
3754 
3755 	rc = ena_enable_msix(adapter);
3756 	if (unlikely(rc != 0)) {
3757 		device_printf(adapter->pdev, "Error with MSI-X enablement\n");
3758 		return (rc);
3759 	}
3760 
3761 	ena_setup_mgmnt_intr(adapter);
3762 
3763 	rc = ena_request_mgmnt_irq(adapter);
3764 	if (unlikely(rc != 0)) {
3765 		device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
3766 		goto err_disable_msix;
3767 	}
3768 
3769 	ena_com_set_admin_polling_mode(ena_dev, false);
3770 
3771 	ena_com_admin_aenq_enable(ena_dev);
3772 
3773 	return (0);
3774 
3775 err_disable_msix:
3776 	ena_disable_msix(adapter);
3777 
3778 	return (rc);
3779 }
3780 
3781 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
3782 static void ena_keep_alive_wd(void *adapter_data,
3783     struct ena_admin_aenq_entry *aenq_e)
3784 {
3785 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3786 	struct ena_admin_aenq_keep_alive_desc *desc;
3787 	sbintime_t stime;
3788 	uint64_t rx_drops;
3789 
3790 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3791 
3792 	rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
3793 	counter_u64_zero(adapter->hw_stats.rx_drops);
3794 	counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
3795 
3796 	stime = getsbinuptime();
3797 	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
3798 }
3799 
3800 /* Check for keep alive expiration */
3801 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3802 {
3803 	sbintime_t timestamp, time;
3804 
3805 	if (adapter->wd_active == 0)
3806 		return;
3807 
3808 	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3809 		return;
3810 
3811 	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3812 	time = getsbinuptime() - timestamp;
3813 	if (unlikely(time > adapter->keep_alive_timeout)) {
3814 		device_printf(adapter->pdev,
3815 		    "Keep alive watchdog timeout.\n");
3816 		counter_u64_add(adapter->dev_stats.wd_expired, 1);
3817 		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3818 			adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3819 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3820 		}
3821 	}
3822 }
3823 
3824 /* Check if admin queue is enabled */
3825 static void check_for_admin_com_state(struct ena_adapter *adapter)
3826 {
3827 	if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
3828 	    false)) {
3829 		device_printf(adapter->pdev,
3830 		    "ENA admin queue is not in running state!\n");
3831 		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3832 		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3833 			adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3834 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3835 		}
3836 	}
3837 }
3838 
3839 static int
3840 check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3841     struct ena_ring *rx_ring)
3842 {
3843 	if (likely(rx_ring->first_interrupt))
3844 		return (0);
3845 
3846 	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3847 		return (0);
3848 
3849 	rx_ring->no_interrupt_event_cnt++;
3850 
3851 	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3852 		device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
3853 		    "Queue = %d. Reset the device\n", rx_ring->qid);
3854 		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3855 			adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3856 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3857 		}
3858 		return (EIO);
3859 	}
3860 
3861 	return (0);
3862 }
3863 
3864 static int
3865 check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3866     struct ena_ring *tx_ring)
3867 {
3868 	struct bintime curtime, time;
3869 	struct ena_tx_buffer *tx_buf;
3870 	sbintime_t time_offset;
3871 	uint32_t missed_tx = 0;
3872 	int i, rc = 0;
3873 
3874 	getbinuptime(&curtime);
3875 
3876 	for (i = 0; i < tx_ring->ring_size; i++) {
3877 		tx_buf = &tx_ring->tx_buffer_info[i];
3878 
3879 		if (bintime_isset(&tx_buf->timestamp) == 0)
3880 			continue;
3881 
3882 		time = curtime;
3883 		bintime_sub(&time, &tx_buf->timestamp);
3884 		time_offset = bttosbt(time);
3885 
3886 		if (unlikely(!tx_ring->first_interrupt &&
3887 		    time_offset > 2 * adapter->missing_tx_timeout)) {
3888 			/*
3889 			 * If after graceful period interrupt is still not
3890 			 * received, we schedule a reset.
3891 			 */
3892 			device_printf(adapter->pdev,
3893 			    "Potential MSIX issue on Tx side Queue = %d. "
3894 			    "Reset the device\n", tx_ring->qid);
3895 			if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET,
3896 			    adapter))) {
3897 				adapter->reset_reason =
3898 				    ENA_REGS_RESET_MISS_INTERRUPT;
3899 				ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET,
3900 				    adapter);
3901 			}
3902 			return (EIO);
3903 		}
3904 
3905 		/* Check again if packet is still waiting */
3906 		if (unlikely(time_offset > adapter->missing_tx_timeout)) {
3907 
3908 			if (!tx_buf->print_once)
3909 				ena_trace(ENA_WARNING, "Found a Tx that wasn't "
3910 				    "completed on time, qid %d, index %d.\n",
3911 				    tx_ring->qid, i);
3912 
3913 			tx_buf->print_once = true;
3914 			missed_tx++;
3915 		}
3916 	}
3917 
3918 	if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
3919 		device_printf(adapter->pdev,
3920 		    "The number of lost tx completion is above the threshold "
3921 		    "(%d > %d). Reset the device\n",
3922 		    missed_tx, adapter->missing_tx_threshold);
3923 		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
3924 			adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
3925 			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
3926 		}
3927 		rc = EIO;
3928 	}
3929 
3930 	counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
3931 
3932 	return (rc);
3933 }
3934 
3935 /*
3936  * Check for TX which were not completed on time.
3937  * Timeout is defined by "missing_tx_timeout".
3938  * Reset will be performed if number of incompleted
3939  * transactions exceeds "missing_tx_threshold".
3940  */
3941 static void
3942 check_for_missing_completions(struct ena_adapter *adapter)
3943 {
3944 	struct ena_ring *tx_ring;
3945 	struct ena_ring *rx_ring;
3946 	int i, budget, rc;
3947 
3948 	/* Make sure the driver doesn't turn the device in other process */
3949 	rmb();
3950 
3951 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
3952 		return;
3953 
3954 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
3955 		return;
3956 
3957 	if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3958 		return;
3959 
3960 	budget = adapter->missing_tx_max_queues;
3961 
3962 	for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
3963 		tx_ring = &adapter->tx_ring[i];
3964 		rx_ring = &adapter->rx_ring[i];
3965 
3966 		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3967 		if (unlikely(rc != 0))
3968 			return;
3969 
3970 		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
3971 		if (unlikely(rc != 0))
3972 			return;
3973 
3974 		budget--;
3975 		if (budget == 0) {
3976 			i++;
3977 			break;
3978 		}
3979 	}
3980 
3981 	adapter->next_monitored_tx_qid = i % adapter->num_queues;
3982 }
3983 
3984 /* trigger rx cleanup after 2 consecutive detections */
3985 #define EMPTY_RX_REFILL 2
3986 /* For the rare case where the device runs out of Rx descriptors and the
3987  * msix handler failed to refill new Rx descriptors (due to a lack of memory
3988  * for example).
3989  * This case will lead to a deadlock:
3990  * The device won't send interrupts since all the new Rx packets will be dropped
3991  * The msix handler won't allocate new Rx descriptors so the device won't be
3992  * able to send new packets.
3993  *
3994  * When such a situation is detected - execute rx cleanup task in another thread
3995  */
3996 static void
3997 check_for_empty_rx_ring(struct ena_adapter *adapter)
3998 {
3999 	struct ena_ring *rx_ring;
4000 	int i, refill_required;
4001 
4002 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
4003 		return;
4004 
4005 	if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
4006 		return;
4007 
4008 	for (i = 0; i < adapter->num_queues; i++) {
4009 		rx_ring = &adapter->rx_ring[i];
4010 
4011 		refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
4012 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
4013 			rx_ring->empty_rx_queue++;
4014 
4015 			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL)	{
4016 				counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
4017 				    1);
4018 
4019 				device_printf(adapter->pdev,
4020 				    "trigger refill for ring %d\n", i);
4021 
4022 				taskqueue_enqueue(rx_ring->que->cleanup_tq,
4023 				    &rx_ring->que->cleanup_task);
4024 				rx_ring->empty_rx_queue = 0;
4025 			}
4026 		} else {
4027 			rx_ring->empty_rx_queue = 0;
4028 		}
4029 	}
4030 }
4031 
4032 static void ena_update_hints(struct ena_adapter *adapter,
4033 			     struct ena_admin_ena_hw_hints *hints)
4034 {
4035 	struct ena_com_dev *ena_dev = adapter->ena_dev;
4036 
4037 	if (hints->admin_completion_tx_timeout)
4038 		ena_dev->admin_queue.completion_timeout =
4039 		    hints->admin_completion_tx_timeout * 1000;
4040 
4041 	if (hints->mmio_read_timeout)
4042 		/* convert to usec */
4043 		ena_dev->mmio_read.reg_read_to =
4044 		    hints->mmio_read_timeout * 1000;
4045 
4046 	if (hints->missed_tx_completion_count_threshold_to_reset)
4047 		adapter->missing_tx_threshold =
4048 		    hints->missed_tx_completion_count_threshold_to_reset;
4049 
4050 	if (hints->missing_tx_completion_timeout) {
4051 		if (hints->missing_tx_completion_timeout ==
4052 		     ENA_HW_HINTS_NO_TIMEOUT)
4053 			adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
4054 		else
4055 			adapter->missing_tx_timeout =
4056 			    SBT_1MS * hints->missing_tx_completion_timeout;
4057 	}
4058 
4059 	if (hints->driver_watchdog_timeout) {
4060 		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
4061 			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
4062 		else
4063 			adapter->keep_alive_timeout =
4064 			    SBT_1MS * hints->driver_watchdog_timeout;
4065 	}
4066 }
4067 
4068 static void
4069 ena_timer_service(void *data)
4070 {
4071 	struct ena_adapter *adapter = (struct ena_adapter *)data;
4072 	struct ena_admin_host_info *host_info =
4073 	    adapter->ena_dev->host_attr.host_info;
4074 
4075 	check_for_missing_keep_alive(adapter);
4076 
4077 	check_for_admin_com_state(adapter);
4078 
4079 	check_for_missing_completions(adapter);
4080 
4081 	check_for_empty_rx_ring(adapter);
4082 
4083 	if (host_info != NULL)
4084 		ena_update_host_info(host_info, adapter->ifp);
4085 
4086 	if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
4087 		device_printf(adapter->pdev, "Trigger reset is on\n");
4088 		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
4089 		return;
4090 	}
4091 
4092 	/*
4093 	 * Schedule another timeout one second from now.
4094 	 */
4095 	callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
4096 }
4097 
4098 static void
4099 ena_destroy_device(struct ena_adapter *adapter, bool graceful)
4100 {
4101 	if_t ifp = adapter->ifp;
4102 	struct ena_com_dev *ena_dev = adapter->ena_dev;
4103 	bool dev_up;
4104 
4105 	if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
4106 		return;
4107 
4108 	if_link_state_change(ifp, LINK_STATE_DOWN);
4109 
4110 	callout_drain(&adapter->timer_service);
4111 
4112 	dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
4113 	if (dev_up)
4114 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
4115 	else
4116 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
4117 
4118 	if (!graceful)
4119 		ena_com_set_admin_running_state(ena_dev, false);
4120 
4121 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
4122 		ena_down(adapter);
4123 
4124 	/*
4125 	 * Stop the device from sending AENQ events (if the device was up, and
4126 	 * the trigger reset was on, ena_down already performs device reset)
4127 	 */
4128 	if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
4129 		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
4130 
4131 	ena_free_mgmnt_irq(adapter);
4132 
4133 	ena_disable_msix(adapter);
4134 
4135 	ena_com_abort_admin_commands(ena_dev);
4136 
4137 	ena_com_wait_for_abort_completion(ena_dev);
4138 
4139 	ena_com_admin_destroy(ena_dev);
4140 
4141 	ena_com_mmio_reg_read_request_destroy(ena_dev);
4142 
4143 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4144 
4145 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
4146 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4147 }
4148 
4149 static int
4150 ena_device_validate_params(struct ena_adapter *adapter,
4151     struct ena_com_dev_get_features_ctx *get_feat_ctx)
4152 {
4153 
4154 	if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
4155 	    ETHER_ADDR_LEN) != 0) {
4156 		device_printf(adapter->pdev,
4157 		    "Error, mac address are different\n");
4158 		return (EINVAL);
4159 	}
4160 
4161 	if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
4162 		device_printf(adapter->pdev,
4163 		    "Error, device max mtu is smaller than ifp MTU\n");
4164 		return (EINVAL);
4165 	}
4166 
4167 	return 0;
4168 }
4169 
4170 static int
4171 ena_restore_device(struct ena_adapter *adapter)
4172 {
4173 	struct ena_com_dev_get_features_ctx get_feat_ctx;
4174 	struct ena_com_dev *ena_dev = adapter->ena_dev;
4175 	if_t ifp = adapter->ifp;
4176 	device_t dev = adapter->pdev;
4177 	int wd_active;
4178 	int rc;
4179 
4180 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4181 
4182 	rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
4183 	if (rc != 0) {
4184 		device_printf(dev, "Cannot initialize device\n");
4185 		goto err;
4186 	}
4187 	/*
4188 	 * Only enable WD if it was enabled before reset, so it won't override
4189 	 * value set by the user by the sysctl.
4190 	 */
4191 	if (adapter->wd_active != 0)
4192 		adapter->wd_active = wd_active;
4193 
4194 	rc = ena_device_validate_params(adapter, &get_feat_ctx);
4195 	if (rc != 0) {
4196 		device_printf(dev, "Validation of device parameters failed\n");
4197 		goto err_device_destroy;
4198 	}
4199 
4200 	rc = ena_handle_updated_queues(adapter, &get_feat_ctx);
4201 	if (rc != 0)
4202 		goto err_device_destroy;
4203 
4204 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4205 	/* Make sure we don't have a race with AENQ Links state handler */
4206 	if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
4207 		if_link_state_change(ifp, LINK_STATE_UP);
4208 
4209 	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
4210 	    adapter->num_queues);
4211 	if (rc != 0) {
4212 		device_printf(dev, "Enable MSI-X failed\n");
4213 		goto err_device_destroy;
4214 	}
4215 
4216 	/* If the interface was up before the reset bring it up */
4217 	if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
4218 		rc = ena_up(adapter);
4219 		if (rc != 0) {
4220 			device_printf(dev, "Failed to create I/O queues\n");
4221 			goto err_disable_msix;
4222 		}
4223 	}
4224 
4225 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4226 	callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
4227 	    ena_timer_service, (void *)adapter, 0);
4228 
4229 	device_printf(dev,
4230 	    "Device reset completed successfully, Driver info: %s\n", ena_version);
4231 
4232 	return (rc);
4233 
4234 err_disable_msix:
4235 	ena_free_mgmnt_irq(adapter);
4236 	ena_disable_msix(adapter);
4237 err_device_destroy:
4238 	ena_com_abort_admin_commands(ena_dev);
4239 	ena_com_wait_for_abort_completion(ena_dev);
4240 	ena_com_admin_destroy(ena_dev);
4241 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
4242 	ena_com_mmio_reg_read_request_destroy(ena_dev);
4243 err:
4244 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4245 	ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
4246 	device_printf(dev, "Reset attempt failed. Can not reset the device\n");
4247 
4248 	return (rc);
4249 }
4250 
4251 static void
4252 ena_reset_task(void *arg, int pending)
4253 {
4254 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
4255 
4256 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
4257 		device_printf(adapter->pdev,
4258 		    "device reset scheduled but trigger_reset is off\n");
4259 		return;
4260 	}
4261 
4262 	sx_xlock(&adapter->ioctl_sx);
4263 	ena_destroy_device(adapter, false);
4264 	ena_restore_device(adapter);
4265 	sx_unlock(&adapter->ioctl_sx);
4266 }
4267 
4268 /**
4269  * ena_attach - Device Initialization Routine
4270  * @pdev: device information struct
4271  *
4272  * Returns 0 on success, otherwise on failure.
4273  *
4274  * ena_attach initializes an adapter identified by a device structure.
4275  * The OS initialization, configuring of the adapter private structure,
4276  * and a hardware reset occur.
4277  **/
4278 static int
4279 ena_attach(device_t pdev)
4280 {
4281 	struct ena_com_dev_get_features_ctx get_feat_ctx;
4282 	struct ena_llq_configurations llq_config;
4283 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
4284 	static int version_printed;
4285 	struct ena_adapter *adapter;
4286 	struct ena_com_dev *ena_dev = NULL;
4287 	const char *queue_type_str;
4288 	int io_queue_num;
4289 	int rid, rc;
4290 
4291 	adapter = device_get_softc(pdev);
4292 	adapter->pdev = pdev;
4293 
4294 	mtx_init(&adapter->global_mtx, "ENA global mtx", NULL, MTX_DEF);
4295 	sx_init(&adapter->ioctl_sx, "ENA ioctl sx");
4296 
4297 	/* Set up the timer service */
4298 	callout_init_mtx(&adapter->timer_service, &adapter->global_mtx, 0);
4299 	adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
4300 	adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
4301 	adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
4302 	adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
4303 
4304 	if (version_printed++ == 0)
4305 		device_printf(pdev, "%s\n", ena_version);
4306 
4307 	/* Allocate memory for ena_dev structure */
4308 	ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
4309 	    M_WAITOK | M_ZERO);
4310 
4311 	adapter->ena_dev = ena_dev;
4312 	ena_dev->dmadev = pdev;
4313 
4314 	rid = PCIR_BAR(ENA_REG_BAR);
4315 	adapter->memory = NULL;
4316 	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
4317 	    &rid, RF_ACTIVE);
4318 	if (unlikely(adapter->registers == NULL)) {
4319 		device_printf(pdev,
4320 		    "unable to allocate bus resource: registers!\n");
4321 		rc = ENOMEM;
4322 		goto err_dev_free;
4323 	}
4324 
4325 	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
4326 	    M_WAITOK | M_ZERO);
4327 
4328 	/* Store register resources */
4329 	((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
4330 	    rman_get_bustag(adapter->registers);
4331 	((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
4332 	    rman_get_bushandle(adapter->registers);
4333 
4334 	if (unlikely(((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0)) {
4335 		device_printf(pdev, "failed to pmap registers bar\n");
4336 		rc = ENXIO;
4337 		goto err_bus_free;
4338 	}
4339 
4340 	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
4341 
4342 	/* Initially clear all the flags */
4343 	ENA_FLAG_ZERO(adapter);
4344 
4345 	/* Device initialization */
4346 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
4347 	if (unlikely(rc != 0)) {
4348 		device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
4349 		rc = ENXIO;
4350 		goto err_bus_free;
4351 	}
4352 
4353 	set_default_llq_configurations(&llq_config);
4354 
4355 #if defined(__arm__) || defined(__aarch64__)
4356 	/*
4357 	 * Force LLQ disable, as the driver is not supporting WC enablement
4358 	 * on the ARM architecture. Using LLQ without WC would affect
4359 	 * performance in a negative way.
4360 	 */
4361 	ena_dev->supported_features &= ~(1 << ENA_ADMIN_LLQ);
4362 #endif
4363 	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
4364 	     &llq_config);
4365 	if (unlikely(rc != 0)) {
4366 		device_printf(pdev, "failed to set placement policy\n");
4367 		goto err_com_free;
4368 	}
4369 
4370 	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
4371 		queue_type_str = "Regular";
4372 	else
4373 		queue_type_str = "Low Latency";
4374 	device_printf(pdev, "Placement policy: %s\n", queue_type_str);
4375 
4376 	adapter->keep_alive_timestamp = getsbinuptime();
4377 
4378 	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
4379 
4380 	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
4381 	    ETHER_ADDR_LEN);
4382 
4383 	calc_queue_ctx.ena_dev = ena_dev;
4384 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4385 	calc_queue_ctx.pdev = pdev;
4386 
4387 	/* calculate IO queue number to create */
4388 	io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx);
4389 
4390 	ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
4391 	    io_queue_num);
4392 	adapter->num_queues = io_queue_num;
4393 
4394 	adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
4395 	// Set the requested Rx ring size
4396 	adapter->rx_ring_size = ENA_DEFAULT_RING_SIZE;
4397 	/* calculatre ring sizes */
4398 	rc = ena_calc_queue_size(adapter, &calc_queue_ctx);
4399 	if (unlikely((rc != 0) || (io_queue_num <= 0))) {
4400 		rc = EFAULT;
4401 		goto err_com_free;
4402 	}
4403 
4404 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4405 
4406 	adapter->tx_ring_size = calc_queue_ctx.tx_queue_size;
4407 	adapter->rx_ring_size = calc_queue_ctx.rx_queue_size;
4408 
4409 	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4410 	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
4411 
4412 	adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
4413 
4414 	/* set up dma tags for rx and tx buffers */
4415 	rc = ena_setup_tx_dma_tag(adapter);
4416 	if (unlikely(rc != 0)) {
4417 		device_printf(pdev, "Failed to create TX DMA tag\n");
4418 		goto err_com_free;
4419 	}
4420 
4421 	rc = ena_setup_rx_dma_tag(adapter);
4422 	if (unlikely(rc != 0)) {
4423 		device_printf(pdev, "Failed to create RX DMA tag\n");
4424 		goto err_tx_tag_free;
4425 	}
4426 
4427 	/* initialize rings basic information */
4428 	device_printf(pdev,
4429 	    "Creating %d io queues. Rx queue size: %d, Tx queue size: %d\n",
4430 	    io_queue_num,
4431 	    calc_queue_ctx.rx_queue_size,
4432 	    calc_queue_ctx.tx_queue_size);
4433 	ena_init_io_rings(adapter);
4434 
4435 	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
4436 	if (unlikely(rc != 0)) {
4437 		device_printf(pdev,
4438 		    "Failed to enable and set the admin interrupts\n");
4439 		goto err_io_free;
4440 	}
4441 
4442 	/* setup network interface */
4443 	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
4444 	if (unlikely(rc != 0)) {
4445 		device_printf(pdev, "Error with network interface setup\n");
4446 		goto err_msix_free;
4447 	}
4448 
4449 	/* Initialize reset task queue */
4450 	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
4451 	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
4452 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
4453 	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
4454 	    "%s rstq", device_get_nameunit(adapter->pdev));
4455 
4456 	/* Initialize statistics */
4457 	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
4458 	    sizeof(struct ena_stats_dev));
4459 	ena_alloc_counters((counter_u64_t *)&adapter->hw_stats,
4460 	    sizeof(struct ena_hw_stats));
4461 	ena_sysctl_add_nodes(adapter);
4462 
4463 	/* Tell the stack that the interface is not active */
4464 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
4465 	ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
4466 
4467 	return (0);
4468 
4469 err_msix_free:
4470 	ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
4471 	ena_free_mgmnt_irq(adapter);
4472 	ena_disable_msix(adapter);
4473 err_io_free:
4474 	ena_free_all_io_rings_resources(adapter);
4475 	ena_free_rx_dma_tag(adapter);
4476 err_tx_tag_free:
4477 	ena_free_tx_dma_tag(adapter);
4478 err_com_free:
4479 	ena_com_admin_destroy(ena_dev);
4480 	ena_com_delete_host_info(ena_dev);
4481 	ena_com_mmio_reg_read_request_destroy(ena_dev);
4482 err_bus_free:
4483 	free(ena_dev->bus, M_DEVBUF);
4484 	ena_free_pci_resources(adapter);
4485 err_dev_free:
4486 	free(ena_dev, M_DEVBUF);
4487 
4488 	return (rc);
4489 }
4490 
4491 /**
4492  * ena_detach - Device Removal Routine
4493  * @pdev: device information struct
4494  *
4495  * ena_detach is called by the device subsystem to alert the driver
4496  * that it should release a PCI device.
4497  **/
4498 static int
4499 ena_detach(device_t pdev)
4500 {
4501 	struct ena_adapter *adapter = device_get_softc(pdev);
4502 	struct ena_com_dev *ena_dev = adapter->ena_dev;
4503 	int rc;
4504 
4505 	/* Make sure VLANS are not using driver */
4506 	if (adapter->ifp->if_vlantrunk != NULL) {
4507 		device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
4508 		return (EBUSY);
4509 	}
4510 
4511 	ether_ifdetach(adapter->ifp);
4512 
4513 	/* Free reset task and callout */
4514 	callout_drain(&adapter->timer_service);
4515 	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
4516 		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
4517 	taskqueue_free(adapter->reset_tq);
4518 
4519 	sx_xlock(&adapter->ioctl_sx);
4520 	ena_down(adapter);
4521 	ena_destroy_device(adapter, true);
4522 	sx_unlock(&adapter->ioctl_sx);
4523 
4524 	ena_free_all_io_rings_resources(adapter);
4525 
4526 	ena_free_counters((counter_u64_t *)&adapter->hw_stats,
4527 	    sizeof(struct ena_hw_stats));
4528 	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
4529 	    sizeof(struct ena_stats_dev));
4530 
4531 	rc = ena_free_rx_dma_tag(adapter);
4532 	if (unlikely(rc != 0))
4533 		device_printf(adapter->pdev,
4534 		    "Unmapped RX DMA tag associations\n");
4535 
4536 	rc = ena_free_tx_dma_tag(adapter);
4537 	if (unlikely(rc != 0))
4538 		device_printf(adapter->pdev,
4539 		    "Unmapped TX DMA tag associations\n");
4540 
4541 	ena_free_irqs(adapter);
4542 
4543 	ena_free_pci_resources(adapter);
4544 
4545 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
4546 		ena_com_rss_destroy(ena_dev);
4547 
4548 	ena_com_delete_host_info(ena_dev);
4549 
4550 	mtx_destroy(&adapter->global_mtx);
4551 	sx_destroy(&adapter->ioctl_sx);
4552 
4553 	if_free(adapter->ifp);
4554 
4555 	if (ena_dev->bus != NULL)
4556 		free(ena_dev->bus, M_DEVBUF);
4557 
4558 	if (ena_dev != NULL)
4559 		free(ena_dev, M_DEVBUF);
4560 
4561 	return (bus_generic_detach(pdev));
4562 }
4563 
4564 /******************************************************************************
4565  ******************************** AENQ Handlers *******************************
4566  *****************************************************************************/
4567 /**
4568  * ena_update_on_link_change:
4569  * Notify the network interface about the change in link status
4570  **/
4571 static void
4572 ena_update_on_link_change(void *adapter_data,
4573     struct ena_admin_aenq_entry *aenq_e)
4574 {
4575 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4576 	struct ena_admin_aenq_link_change_desc *aenq_desc;
4577 	int status;
4578 	if_t ifp;
4579 
4580 	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
4581 	ifp = adapter->ifp;
4582 	status = aenq_desc->flags &
4583 	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4584 
4585 	if (status != 0) {
4586 		device_printf(adapter->pdev, "link is UP\n");
4587 		ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
4588 		if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
4589 			if_link_state_change(ifp, LINK_STATE_UP);
4590 	} else {
4591 		device_printf(adapter->pdev, "link is DOWN\n");
4592 		if_link_state_change(ifp, LINK_STATE_DOWN);
4593 		ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
4594 	}
4595 }
4596 
4597 static void ena_notification(void *adapter_data,
4598     struct ena_admin_aenq_entry *aenq_e)
4599 {
4600 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4601 	struct ena_admin_ena_hw_hints *hints;
4602 
4603 	ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4604 	    "Invalid group(%x) expected %x\n",	aenq_e->aenq_common_desc.group,
4605 	    ENA_ADMIN_NOTIFICATION);
4606 
4607 	switch (aenq_e->aenq_common_desc.syndrom) {
4608 	case ENA_ADMIN_UPDATE_HINTS:
4609 		hints =
4610 		    (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
4611 		ena_update_hints(adapter, hints);
4612 		break;
4613 	default:
4614 		device_printf(adapter->pdev,
4615 		    "Invalid aenq notification link state %d\n",
4616 		    aenq_e->aenq_common_desc.syndrom);
4617 	}
4618 }
4619 
4620 /**
4621  * This handler will called for unknown event group or unimplemented handlers
4622  **/
4623 static void
4624 unimplemented_aenq_handler(void *adapter_data,
4625     struct ena_admin_aenq_entry *aenq_e)
4626 {
4627 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4628 
4629 	device_printf(adapter->pdev,
4630 	    "Unknown event was received or event with unimplemented handler\n");
4631 }
4632 
4633 static struct ena_aenq_handlers aenq_handlers = {
4634     .handlers = {
4635 	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4636 	    [ENA_ADMIN_NOTIFICATION] = ena_notification,
4637 	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4638     },
4639     .unimplemented_handler = unimplemented_aenq_handler
4640 };
4641 
4642 /*********************************************************************
4643  *  FreeBSD Device Interface Entry Points
4644  *********************************************************************/
4645 
4646 static device_method_t ena_methods[] = {
4647     /* Device interface */
4648     DEVMETHOD(device_probe, ena_probe),
4649     DEVMETHOD(device_attach, ena_attach),
4650     DEVMETHOD(device_detach, ena_detach),
4651     DEVMETHOD_END
4652 };
4653 
4654 static driver_t ena_driver = {
4655     "ena", ena_methods, sizeof(struct ena_adapter),
4656 };
4657 
4658 devclass_t ena_devclass;
4659 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
4660 MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
4661     nitems(ena_vendor_info_array) - 1);
4662 MODULE_DEPEND(ena, pci, 1, 1, 1);
4663 MODULE_DEPEND(ena, ether, 1, 1, 1);
4664 
4665 /*********************************************************************/
4666