xref: /freebsd/sys/dev/ena/ena.c (revision 9086e0e06819bdf7290dc15cc04985fe8e66a711)
1 /*-
2  * BSD LICENSE
3  *
4  * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/rman.h>
43 #include <sys/smp.h>
44 #include <sys/socket.h>
45 #include <sys/sockio.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/time.h>
49 #include <sys/eventhandler.h>
50 
51 #include <machine/bus.h>
52 #include <machine/resource.h>
53 #include <machine/in_cksum.h>
54 
55 #include <net/bpf.h>
56 #include <net/ethernet.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/rss_config.h>
63 #include <net/if_types.h>
64 #include <net/if_vlan_var.h>
65 
66 #include <netinet/in_rss.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip6.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcivar.h>
76 #include <dev/pci/pcireg.h>
77 
78 #include "ena.h"
79 #include "ena_sysctl.h"
80 
81 /*********************************************************
82  *  Function prototypes
83  *********************************************************/
84 static int	ena_probe(device_t);
85 static void	ena_intr_msix_mgmnt(void *);
86 static int	ena_allocate_pci_resources(struct ena_adapter*);
87 static void	ena_free_pci_resources(struct ena_adapter *);
88 static int	ena_change_mtu(if_t, int);
89 static inline void ena_alloc_counters(counter_u64_t *, int);
90 static inline void ena_free_counters(counter_u64_t *, int);
91 static inline void ena_reset_counters(counter_u64_t *, int);
92 static void	ena_init_io_rings_common(struct ena_adapter *,
93     struct ena_ring *, uint16_t);
94 static int	ena_init_io_rings(struct ena_adapter *);
95 static void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
96 static void	ena_free_all_io_rings_resources(struct ena_adapter *);
97 static int	ena_setup_tx_dma_tag(struct ena_adapter *);
98 static int	ena_free_tx_dma_tag(struct ena_adapter *);
99 static int	ena_setup_rx_dma_tag(struct ena_adapter *);
100 static int	ena_free_rx_dma_tag(struct ena_adapter *);
101 static int	ena_setup_tx_resources(struct ena_adapter *, int);
102 static void	ena_free_tx_resources(struct ena_adapter *, int);
103 static int	ena_setup_all_tx_resources(struct ena_adapter *);
104 static void	ena_free_all_tx_resources(struct ena_adapter *);
105 static int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
106 static void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
107 static int	ena_setup_all_rx_resources(struct ena_adapter *);
108 static void	ena_free_all_rx_resources(struct ena_adapter *);
109 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
110     struct ena_rx_buffer *);
111 static void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
112     struct ena_rx_buffer *);
113 static int	ena_refill_rx_bufs(struct ena_ring *, uint32_t);
114 static void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
115 static void	ena_refill_all_rx_bufs(struct ena_adapter *);
116 static void	ena_free_all_rx_bufs(struct ena_adapter *);
117 static void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
118 static void	ena_free_all_tx_bufs(struct ena_adapter *);
119 static void	ena_destroy_all_tx_queues(struct ena_adapter *);
120 static void	ena_destroy_all_rx_queues(struct ena_adapter *);
121 static void	ena_destroy_all_io_queues(struct ena_adapter *);
122 static int	ena_create_io_queues(struct ena_adapter *);
123 static int	ena_tx_cleanup(struct ena_ring *);
124 static int	ena_rx_cleanup(struct ena_ring *);
125 static int	validate_tx_req_id(struct ena_ring *, uint16_t);
126 static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
127     struct mbuf *);
128 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
129     struct ena_com_rx_ctx *, uint16_t *);
130 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
131     struct mbuf *);
132 static void	ena_handle_msix(void *);
133 static int	ena_enable_msix(struct ena_adapter *);
134 static void	ena_setup_mgmnt_intr(struct ena_adapter *);
135 static void	ena_setup_io_intr(struct ena_adapter *);
136 static int	ena_request_mgmnt_irq(struct ena_adapter *);
137 static int	ena_request_io_irq(struct ena_adapter *);
138 static void	ena_free_mgmnt_irq(struct ena_adapter *);
139 static void	ena_free_io_irq(struct ena_adapter *);
140 static void	ena_free_irqs(struct ena_adapter*);
141 static void	ena_disable_msix(struct ena_adapter *);
142 static void	ena_unmask_all_io_irqs(struct ena_adapter *);
143 static int	ena_rss_configure(struct ena_adapter *);
144 static void	ena_update_hw_stats(void *, int);
145 static int	ena_up_complete(struct ena_adapter *);
146 static int	ena_up(struct ena_adapter *);
147 static void	ena_down(struct ena_adapter *);
148 static uint64_t	ena_get_counter(if_t, ift_counter);
149 static int	ena_media_change(if_t);
150 static void	ena_media_status(if_t, struct ifmediareq *);
151 static void	ena_init(void *);
152 static int	ena_ioctl(if_t, u_long, caddr_t);
153 static int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
154 static void	ena_update_host_info(struct ena_admin_host_info *, if_t);
155 static void	ena_update_hwassist(struct ena_adapter *);
156 static int	ena_setup_ifnet(device_t, struct ena_adapter *,
157     struct ena_com_dev_get_features_ctx *);
158 static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
159 static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
160 static void	ena_start_xmit(struct ena_ring *);
161 static int	ena_mq_start(if_t, struct mbuf *);
162 static void	ena_deferred_mq_start(void *, int);
163 static void	ena_qflush(if_t);
164 static int	ena_calc_io_queue_num(struct ena_adapter *,
165     struct ena_com_dev_get_features_ctx *);
166 static int	ena_calc_queue_size(struct ena_adapter *, uint16_t *,
167     uint16_t *, struct ena_com_dev_get_features_ctx *);
168 static int	ena_rss_init_default(struct ena_adapter *);
169 static void	ena_rss_init_default_deferred(void *);
170 static void	ena_config_host_info(struct ena_com_dev *);
171 static int	ena_attach(device_t);
172 static int	ena_detach(device_t);
173 static int	ena_device_init(struct ena_adapter *, device_t,
174     struct ena_com_dev_get_features_ctx *, int *);
175 static int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
176     int);
177 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
178 static void	unimplemented_aenq_handler(void *,
179     struct ena_admin_aenq_entry *);
180 static void	ena_timer_service(void *);
181 
182 static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
183 
184 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
185 
186 /*
187  * Tuneable number of buffers in the buf-ring (drbr)
188  */
189 static int ena_buf_ring_size = 4096;
190 SYSCTL_INT(_hw_ena, OID_AUTO, buf_ring_size, CTLFLAG_RWTUN,
191     &ena_buf_ring_size, 0, "Size of the bufring");
192 
193 
194 static ena_vendor_info_t ena_vendor_info_array[] = {
195     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
196     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
197     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
198     { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
199     /* Last entry */
200     { 0, 0, 0 }
201 };
202 
203 /*
204  * Contains pointers to event handlers, e.g. link state chage.
205  */
206 static struct ena_aenq_handlers aenq_handlers;
207 
208 void
209 ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
210 {
211 	if (error)
212 		return;
213 	*(bus_addr_t *) arg = segs[0].ds_addr;
214 	return;
215 }
216 
217 int
218 ena_dma_alloc(device_t dmadev, bus_size_t size,
219     ena_mem_handle_t *dma , int mapflags)
220 {
221 	struct ena_adapter* adapter = device_get_softc(dmadev);
222 	uint32_t maxsize = ((size - 1)/PAGE_SIZE + 1) * PAGE_SIZE;
223 	uint64_t dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
224 	int error;
225 
226 	if (dma_space_addr == 0)
227 		dma_space_addr = BUS_SPACE_MAXADDR;
228 	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
229 	    8, 0,	      /* alignment, bounds */
230 	    dma_space_addr,   /* lowaddr */
231 	    dma_space_addr,   /* highaddr */
232 	    NULL, NULL,	      /* filter, filterarg */
233 	    maxsize,	      /* maxsize */
234 	    1,		      /* nsegments */
235 	    maxsize,	      /* maxsegsize */
236 	    BUS_DMA_ALLOCNOW, /* flags */
237 	    NULL,	      /* lockfunc */
238 	    NULL,	      /* lockarg */
239 	    &dma->tag);
240 	if (error) {
241 		device_printf(dmadev,
242 		"%s: bus_dma_tag_create failed: %d\n",
243 		__func__, error);
244 		goto fail_tag;
245 	}
246 
247 	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
248 	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
249 	if (error) {
250 		device_printf(dmadev,
251 		"%s: bus_dmamem_alloc(%ju) failed: %d\n",
252 		__func__, (uintmax_t)size, error);
253 		goto fail_map_create;
254 	}
255 
256 	dma->paddr = 0;
257 	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
258 	    size, ena_dmamap_callback, &dma->paddr, mapflags);
259 	if (error || dma->paddr == 0) {
260 		device_printf(dmadev,
261 		"%s: bus_dmamap_load failed: %d\n",
262 		__func__, error);
263 		goto fail_map_load;
264 	}
265 
266 	return (0);
267 
268 fail_map_load:
269 	bus_dmamap_unload(dma->tag, dma->map);
270 fail_map_create:
271 	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
272 	bus_dma_tag_destroy(dma->tag);
273 fail_tag:
274 	dma->tag = NULL;
275 
276 	return (error);
277 }
278 
279 static int
280 ena_allocate_pci_resources(struct ena_adapter* adapter)
281 {
282 	device_t pdev = adapter->pdev;
283 	int rid;
284 
285 	rid = PCIR_BAR(ENA_REG_BAR);
286 	adapter->memory = NULL;
287 	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
288 	    &rid, RF_ACTIVE);
289 	if (adapter->registers == NULL) {
290 		device_printf(pdev, "Unable to allocate bus resource: "
291 		    "registers\n");
292 		return (ENXIO);
293 	}
294 
295 	return (0);
296 }
297 
298 static void
299 ena_free_pci_resources(struct ena_adapter *adapter)
300 {
301 	device_t pdev = adapter->pdev;
302 
303 	if (adapter->memory != NULL) {
304 		bus_release_resource(pdev, SYS_RES_MEMORY,
305 		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
306 	}
307 
308 	if (adapter->registers != NULL) {
309 		bus_release_resource(pdev, SYS_RES_MEMORY,
310 		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
311 	}
312 
313 	return;
314 }
315 
316 static int
317 ena_probe(device_t dev)
318 {
319 	ena_vendor_info_t *ent;
320 	char		adapter_name[60];
321 	uint16_t	pci_vendor_id = 0;
322 	uint16_t	pci_device_id = 0;
323 
324 	pci_vendor_id = pci_get_vendor(dev);
325 	pci_device_id = pci_get_device(dev);
326 
327 	ent = ena_vendor_info_array;
328 	while (ent->vendor_id != 0) {
329 		if ((pci_vendor_id == ent->vendor_id) &&
330 		    (pci_device_id == ent->device_id)) {
331 			ena_trace(ENA_DBG, "vendor=%x device=%x ",
332 			    pci_vendor_id, pci_device_id);
333 
334 			sprintf(adapter_name, DEVICE_DESC);
335 			device_set_desc_copy(dev, adapter_name);
336 			return (BUS_PROBE_DEFAULT);
337 		}
338 
339 		ent++;
340 
341 	}
342 
343 	return (ENXIO);
344 }
345 
346 static int
347 ena_change_mtu(if_t ifp, int new_mtu)
348 {
349 	struct ena_adapter *adapter = if_getsoftc(ifp);
350 	struct ena_com_dev_get_features_ctx get_feat_ctx;
351 	int rc, old_mtu, max_frame;
352 
353 	rc = ena_com_get_dev_attr_feat(adapter->ena_dev, &get_feat_ctx);
354 	if (rc) {
355 		device_printf(adapter->pdev,
356 		    "Cannot get attribute for ena device\n");
357 		return (ENXIO);
358 	}
359 
360 	/* Save old MTU in case of fail */
361 	old_mtu = if_getmtu(ifp);
362 
363 	/* Change MTU and calculate max frame */
364 	if_setmtu(ifp, new_mtu);
365 	max_frame = ETHER_MAX_FRAME(ifp, ETHERTYPE_VLAN, 1);
366 
367 	if ((new_mtu < ENA_MIN_FRAME_LEN) ||
368 	    (new_mtu > get_feat_ctx.dev_attr.max_mtu) ||
369 	    (max_frame > ENA_MAX_FRAME_LEN)) {
370 		device_printf(adapter->pdev, "Invalid MTU setting. "
371 		    "new_mtu: %d\n", new_mtu);
372 		goto error;
373 	}
374 
375 	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
376 	if (rc != 0)
377 		goto error;
378 
379 	return (0);
380 error:
381 	if_setmtu(ifp, old_mtu);
382 	return (EINVAL);
383 }
384 
385 static inline void
386 ena_alloc_counters(counter_u64_t *begin, int size)
387 {
388 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
389 
390 	for (; begin < end; ++begin)
391 		*begin = counter_u64_alloc(M_WAITOK);
392 }
393 
394 static inline void
395 ena_free_counters(counter_u64_t *begin, int size)
396 {
397 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
398 
399 	for (; begin < end; ++begin)
400 		counter_u64_free(*begin);
401 }
402 
403 static inline void
404 ena_reset_counters(counter_u64_t *begin, int size)
405 {
406 	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
407 
408 	for (; begin < end; ++begin)
409 		counter_u64_zero(*begin);
410 }
411 
412 static void
413 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
414     uint16_t qid)
415 {
416 
417 	ring->qid = qid;
418 	ring->adapter = adapter;
419 	ring->ena_dev = adapter->ena_dev;
420 }
421 
422 static int
423 ena_init_io_rings(struct ena_adapter *adapter)
424 {
425 	struct ena_com_dev *ena_dev;
426 	struct ena_ring *txr, *rxr;
427 	struct ena_que *que;
428 	int i;
429 	int rc;
430 
431 	ena_dev = adapter->ena_dev;
432 
433 	for (i = 0; i < adapter->num_queues; i++) {
434 		txr = &adapter->tx_ring[i];
435 		rxr = &adapter->rx_ring[i];
436 
437 		/* TX/RX common ring state */
438 		ena_init_io_rings_common(adapter, txr, i);
439 		ena_init_io_rings_common(adapter, rxr, i);
440 
441 		/* TX specific ring state */
442 		txr->ring_size = adapter->tx_ring_size;
443 		txr->tx_max_header_size = ena_dev->tx_max_header_size;
444 		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
445 		txr->smoothed_interval =
446 		    ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
447 
448 		/* Allocate a buf ring */
449 		txr->br = buf_ring_alloc(ena_buf_ring_size, M_DEVBUF,
450 		    M_WAITOK, &txr->ring_mtx);
451 		if (txr->br == NULL) {
452 			device_printf(adapter->pdev,
453 			    "Error while setting up bufring\n");
454 			rc = ENOMEM;
455 			goto err_bufr_free;
456 		}
457 
458 		/* Alloc TX statistics. */
459 		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
460 		    sizeof(txr->tx_stats));
461 
462 		/* RX specific ring state */
463 		rxr->ring_size = adapter->rx_ring_size;
464 		rxr->rx_small_copy_len = adapter->small_copy_len;
465 		rxr->smoothed_interval =
466 		    ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
467 
468 		/* Alloc RX statistics. */
469 		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
470 		    sizeof(rxr->rx_stats));
471 
472 		/* Initialize locks */
473 		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
474 		    device_get_nameunit(adapter->pdev), i);
475 		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
476 		    device_get_nameunit(adapter->pdev), i);
477 
478 		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
479 		mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF);
480 
481 		que = &adapter->que[i];
482 		que->adapter = adapter;
483 		que->id = i;
484 		que->tx_ring = txr;
485 		que->rx_ring = rxr;
486 
487 		txr->que = que;
488 		rxr->que = que;
489 	}
490 
491 	return 0;
492 
493 err_bufr_free:
494 	while (i--)
495 		ena_free_io_ring_resources(adapter, i);
496 
497 	return (rc);
498 }
499 
500 static void
501 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
502 {
503 	struct ena_ring *txr = &adapter->tx_ring[qid];
504 	struct ena_ring *rxr = &adapter->rx_ring[qid];
505 
506 	ena_free_counters((counter_u64_t *)&txr->tx_stats,
507 	    sizeof(txr->tx_stats));
508 	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
509 	    sizeof(rxr->rx_stats));
510 
511 	mtx_destroy(&txr->ring_mtx);
512 	mtx_destroy(&rxr->ring_mtx);
513 
514 	drbr_free(txr->br, M_DEVBUF);
515 
516 }
517 
518 static void
519 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
520 {
521 	int i;
522 
523 	for (i = 0; i < adapter->num_queues; i++)
524 		ena_free_io_ring_resources(adapter, i);
525 
526 }
527 
528 static int
529 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
530 {
531 	int ret;
532 
533 	/* Create DMA tag for Tx buffers */
534 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
535 	    1, 0,				  /* alignment, bounds 	*/
536 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr 		*/
537 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr 		*/
538 	    NULL, NULL,				  /* filter, filterarg 	*/
539 	    ENA_TSO_MAXSIZE,			  /* maxsize 		*/
540 	    adapter->max_tx_sgl_size,		  /* nsegments 		*/
541 	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	*/
542 	    0,					  /* flags 		*/
543 	    NULL,				  /* lockfunc 		*/
544 	    NULL,				  /* lockfuncarg 	*/
545 	    &adapter->tx_buf_tag);
546 
547 	if (ret != 0)
548 		device_printf(adapter->pdev, "Unable to create Tx DMA tag\n");
549 
550 	return (ret);
551 }
552 
553 static int
554 ena_free_tx_dma_tag(struct ena_adapter *adapter)
555 {
556 	int ret;
557 
558 	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
559 
560 	if (ret == 0)
561 		adapter->tx_buf_tag = NULL;
562 
563 	return (ret);
564 }
565 
566 static int
567 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
568 {
569 	int ret;
570 
571 	/* Create DMA tag for Rx buffers*/
572 	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */
573 	    1, 0,				  /* alignment, bounds 	*/
574 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr 		*/
575 	    ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr 		*/
576 	    NULL, NULL,				  /* filter, filterarg 	*/
577 	    MJUM16BYTES,			  /* maxsize 		*/
578 	    1,					  /* nsegments 		*/
579 	    MJUM16BYTES,			  /* maxsegsize 	*/
580 	    0,					  /* flags 		*/
581 	    NULL,				  /* lockfunc 		*/
582 	    NULL,				  /* lockarg 		*/
583 	    &adapter->rx_buf_tag);
584 
585 	if (ret != 0)
586 		device_printf(adapter->pdev, "Unable to create Rx DMA tag\n");
587 
588 	return (ret);
589 }
590 
591 static int
592 ena_free_rx_dma_tag(struct ena_adapter *adapter)
593 {
594 	int ret;
595 
596 	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
597 
598 	if (ret == 0)
599 		adapter->rx_buf_tag = NULL;
600 
601 	return (ret);
602 }
603 
604 
605 /**
606  * ena_setup_tx_resources - allocate Tx resources (Descriptors)
607  * @adapter: network interface device structure
608  * @qid: queue index
609  *
610  * Returns 0 on success, otherwise on failure.
611  **/
612 static int
613 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
614 {
615 	struct ena_que *que = &adapter->que[qid];
616 	struct ena_ring *tx_ring = que->tx_ring;
617 	int size, i, err;
618 #ifdef	RSS
619 	cpuset_t cpu_mask;
620 #endif
621 
622 	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
623 
624 	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
625 	if (!tx_ring->tx_buffer_info)
626 		goto err_tx_buffer_info;
627 
628 	size = sizeof(uint16_t) * tx_ring->ring_size;
629 	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
630 	if (!tx_ring->free_tx_ids)
631 		goto err_tx_reqs;
632 
633 	/* Req id stack for TX OOO completions */
634 	for (i = 0; i < tx_ring->ring_size; i++)
635 		tx_ring->free_tx_ids[i] = i;
636 
637 	/* Reset TX statistics. */
638 	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
639 	    sizeof(tx_ring->tx_stats));
640 
641 	tx_ring->next_to_use = 0;
642 	tx_ring->next_to_clean = 0;
643 
644 	/* Make sure that drbr is empty */
645 	drbr_flush(adapter->ifp, tx_ring->br);
646 
647 	/* ... and create the buffer DMA maps */
648 	for (i = 0; i < tx_ring->ring_size; i++) {
649 		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
650 		    &tx_ring->tx_buffer_info[i].map);
651 		if (err != 0) {
652 			device_printf(adapter->pdev,
653 			    "Unable to create Tx DMA map for buffer %d\n", i);
654 			goto err_tx_map;
655 		}
656 	}
657 
658 	/* Allocate taskqueues */
659 	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
660 	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
661 	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
662 	if (tx_ring->enqueue_tq == NULL) {
663 		device_printf(adapter->pdev,
664 		    "Unable to create taskqueue for enqueue task\n");
665 		i = tx_ring->ring_size;
666 		goto err_tx_map;
667 	}
668 
669 	/* RSS set cpu for thread */
670 #ifdef RSS
671 	CPU_SETOF(que->cpu, &cpu_mask);
672 	taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
673 	    &cpu_mask, "%s tx_ring enq (bucket %d)",
674 	    device_get_nameunit(adapter->pdev), que->cpu);
675 #else /* RSS */
676 	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
677 	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
678 #endif /* RSS */
679 
680 	return (0);
681 
682 err_tx_map:
683 	while (i--) {
684 		bus_dmamap_destroy(adapter->tx_buf_tag,
685 		    tx_ring->tx_buffer_info[i].map);
686 	}
687 	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->free_tx_ids);
688 err_tx_reqs:
689 	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info);
690 err_tx_buffer_info:
691 	return (ENOMEM);
692 }
693 
694 /**
695  * ena_free_tx_resources - Free Tx Resources per Queue
696  * @adapter: network interface device structure
697  * @qid: queue index
698  *
699  * Free all transmit software resources
700  **/
701 static void
702 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
703 {
704 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
705 
706 	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
707 	    NULL))
708 		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
709 
710 	taskqueue_free(tx_ring->enqueue_tq);
711 
712 	/* Flush buffer ring, */
713 	drbr_flush(adapter->ifp, tx_ring->br);
714 
715 	/* Free buffer DMA maps, */
716 	ENA_RING_MTX_LOCK(tx_ring);
717 	for (int i = 0; i < tx_ring->ring_size; i++) {
718 		m_freem(tx_ring->tx_buffer_info[i].mbuf);
719 		tx_ring->tx_buffer_info[i].mbuf = NULL;
720 		bus_dmamap_unload(adapter->tx_buf_tag,
721 		    tx_ring->tx_buffer_info[i].map);
722 		bus_dmamap_destroy(adapter->tx_buf_tag,
723 		    tx_ring->tx_buffer_info[i].map);
724 	}
725 	ENA_RING_MTX_UNLOCK(tx_ring);
726 
727 	/* And free allocated memory. */
728 	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info);
729 	tx_ring->tx_buffer_info = NULL;
730 
731 	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->free_tx_ids);
732 	tx_ring->free_tx_ids = NULL;
733 }
734 
735 /**
736  * ena_setup_all_tx_resources - allocate all queues Tx resources
737  * @adapter: network interface device structure
738  *
739  * Returns 0 on success, otherwise on failure.
740  **/
741 static int
742 ena_setup_all_tx_resources(struct ena_adapter *adapter)
743 {
744 	int i, rc;
745 
746 	for (i = 0; i < adapter->num_queues; i++) {
747 		rc = ena_setup_tx_resources(adapter, i);
748 		if (!rc)
749 			continue;
750 
751 		device_printf(adapter->pdev,
752 		    "Allocation for Tx Queue %u failed\n", i);
753 		goto err_setup_tx;
754 	}
755 
756 	return (0);
757 
758 err_setup_tx:
759 	/* Rewind the index freeing the rings as we go */
760 	while (i--)
761 		ena_free_tx_resources(adapter, i);
762 	return (rc);
763 }
764 
765 /**
766  * ena_free_all_tx_resources - Free Tx Resources for All Queues
767  * @adapter: network interface device structure
768  *
769  * Free all transmit software resources
770  **/
771 static void
772 ena_free_all_tx_resources(struct ena_adapter *adapter)
773 {
774 	int i;
775 
776 	for (i = 0; i < adapter->num_queues; i++)
777 		ena_free_tx_resources(adapter, i);
778 
779 	return;
780 }
781 
782 /**
783  * ena_setup_rx_resources - allocate Rx resources (Descriptors)
784  * @adapter: network interface device structure
785  * @qid: queue index
786  *
787  * Returns 0 on success, otherwise on failure.
788  **/
789 static int
790 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
791 {
792 	struct ena_que *que = &adapter->que[qid];
793 	struct ena_ring *rx_ring = que->rx_ring;
794 	int size, err, i;
795 #ifdef	RSS
796 	cpuset_t cpu_mask;
797 #endif
798 
799 	size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
800 
801 	/*
802 	 * Alloc extra element so in rx path
803 	 * we can always prefetch rx_info + 1
804 	 */
805 	size += sizeof(struct ena_rx_buffer);
806 
807 	rx_ring->rx_buffer_info = ENA_MEM_ALLOC(adapter->ena_dev->dmadev, size);
808 	if (!rx_ring->rx_buffer_info)
809 		return (ENOMEM);
810 
811 	/* Reset RX statistics. */
812 	ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats,
813 	    sizeof(rx_ring->rx_stats));
814 
815 	rx_ring->next_to_clean = 0;
816 	rx_ring->next_to_use = 0;
817 
818 	/* ... and create the buffer DMA maps */
819 	for (i = 0; i < rx_ring->ring_size; i++) {
820 		err = bus_dmamap_create(adapter->rx_buf_tag, 0,
821 		    &(rx_ring->rx_buffer_info[i].map));
822 		if (err != 0) {
823 			device_printf(adapter->pdev,
824 			    "Unable to create Rx DMA map for buffer %d\n", i);
825 			goto err_rx_dma;
826 		}
827 	}
828 
829 	/* Create LRO for the ring */
830 	if (adapter->ifp->if_capenable & IFCAP_LRO) {
831 		int err = tcp_lro_init(&rx_ring->lro);
832 		if (err) {
833 			device_printf(adapter->pdev,
834 			    "LRO[%d] Initialization failed!\n", qid);
835 		} else {
836 			ena_trace(ENA_INFO,
837 			    "RX Soft LRO[%d] Initialized\n", qid);
838 			rx_ring->lro.ifp = adapter->ifp;
839 		}
840 	}
841 
842 	return (0);
843 
844 err_rx_dma:
845 	while (i--) {
846 		bus_dmamap_destroy(adapter->rx_buf_tag,
847 		    rx_ring->rx_buffer_info[i].map);
848 	}
849 
850 	ENA_MEM_FREE(adapter->ena_dev->dmadev, rx_ring->rx_buffer_info);
851 	rx_ring->rx_buffer_info = NULL;
852 	ena_trace(ENA_ALERT, "RX resource allocation fail");
853 	return (ENOMEM);
854 }
855 
856 /**
857  * ena_free_rx_resources - Free Rx Resources
858  * @adapter: network interface device structure
859  * @qid: queue index
860  *
861  * Free all receive software resources
862  **/
863 static void
864 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
865 {
866 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
867 
868 	ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid);
869 
870 	/* Free buffer DMA maps, */
871 	for (int i = 0; i < rx_ring->ring_size; i++) {
872 		m_freem(rx_ring->rx_buffer_info[i].mbuf);
873 		rx_ring->rx_buffer_info[i].mbuf = NULL;
874 		bus_dmamap_unload(adapter->rx_buf_tag,
875 		    rx_ring->rx_buffer_info[i].map);
876 		bus_dmamap_destroy(adapter->rx_buf_tag,
877 		    rx_ring->rx_buffer_info[i].map);
878 	}
879 
880 	/* free LRO resources, */
881 	tcp_lro_free(&rx_ring->lro);
882 
883 	/* free allocated memory */
884 	ENA_MEM_FREE(adapter->ena_dev->dmadev, rx_ring->rx_buffer_info);
885 	rx_ring->rx_buffer_info = NULL;
886 
887 	return;
888 }
889 
890 /**
891  * ena_setup_all_rx_resources - allocate all queues Rx resources
892  * @adapter: network interface device structure
893  *
894  * Returns 0 on success, otherwise on failure.
895  **/
896 static int
897 ena_setup_all_rx_resources(struct ena_adapter *adapter)
898 {
899 	int i, rc = 0;
900 
901 	for (i = 0; i < adapter->num_queues; i++) {
902 		rc = ena_setup_rx_resources(adapter, i);
903 		if (!rc)
904 			continue;
905 
906 		device_printf(adapter->pdev,
907 		    "Allocation for Rx Queue %u failed\n", i);
908 		goto err_setup_rx;
909 	}
910 	return (0);
911 
912 err_setup_rx:
913 	/* rewind the index freeing the rings as we go */
914 	while (i--)
915 		ena_free_rx_resources(adapter, i);
916 	return (rc);
917 }
918 
919 /**
920  * ena_free_all_rx_resources - Free Rx resources for all queues
921  * @adapter: network interface device structure
922  *
923  * Free all receive software resources
924  **/
925 static void
926 ena_free_all_rx_resources(struct ena_adapter *adapter)
927 {
928 	int i;
929 
930 	for (i = 0; i < adapter->num_queues; i++)
931 		ena_free_rx_resources(adapter, i);
932 
933 	return;
934 }
935 
936 static inline int
937 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
938     struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
939 {
940 	struct ena_com_buf *ena_buf;
941 	bus_dma_segment_t segs[1];
942 	int nsegs, error;
943 
944 	/* if previous allocated frag is not used */
945 	if (rx_info->mbuf != NULL)
946 		return (0);
947 
948 	ENA_RING_MTX_LOCK(rx_ring);
949 	/* Get mbuf using UMA allocator */
950 	rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
951 	ENA_RING_MTX_UNLOCK(rx_ring);
952 
953 	if (!rx_info->mbuf) {
954 		counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
955 		return (ENOMEM);
956 	}
957 	/* Set mbuf length*/
958 	rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = MJUM16BYTES;
959 
960 	/* Map packets for DMA */
961 	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
962 	    "Using tag %p for buffers' DMA mapping, mbuf %p len: %d",
963 	    adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
964 	error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
965 	    rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
966 	if (error || (nsegs != 1)) {
967 		device_printf(adapter->pdev, "failed to map mbuf, error: %d, "
968 		    "nsegs: %d\n", error, nsegs);
969 		counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
970 		goto exit;
971 
972 	}
973 
974 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
975 
976 	ena_buf = &rx_info->ena_buf;
977 	ena_buf->paddr = segs[0].ds_addr;
978 	ena_buf->len = MJUM16BYTES;
979 
980 	ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
981 	    "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
982 	    rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
983 
984 	return (0);
985 
986 exit:
987 	m_freem(rx_info->mbuf);
988 	rx_info->mbuf = NULL;
989 	return (EFAULT);
990 }
991 
992 static void
993 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
994     struct ena_rx_buffer *rx_info)
995 {
996 
997 	if (!rx_info->mbuf)
998 		return;
999 
1000 	bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
1001 	m_freem(rx_info->mbuf);
1002 	rx_info->mbuf = NULL;
1003 
1004 	return;
1005 }
1006 
1007 
1008 /**
1009  * ena_refill_rx_bufs - Refills ring with descriptors
1010  * @rx_ring: the ring which we want to feed with free descriptors
1011  * @num: number of descriptors to refill
1012  * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1013  **/
1014 static int
1015 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1016 {
1017 	struct ena_adapter *adapter = rx_ring->adapter;
1018 	uint16_t next_to_use;
1019 	uint32_t i;
1020 	int rc;
1021 
1022 	ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d",
1023 	    rx_ring->qid);
1024 
1025 	next_to_use = rx_ring->next_to_use;
1026 
1027 	for (i = 0; i < num; i++) {
1028 		ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1029 		    "RX buffer - next to use: %d", next_to_use);
1030 
1031 		struct ena_rx_buffer *rx_info =
1032 		    &rx_ring->rx_buffer_info[next_to_use];
1033 
1034 		rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1035 		if (rc < 0) {
1036 			device_printf(adapter->pdev,
1037 			    "failed to alloc buffer for rx queue\n");
1038 			break;
1039 		}
1040 		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1041 		    &rx_info->ena_buf, next_to_use);
1042 		if (unlikely(rc)) {
1043 			device_printf(adapter->pdev,
1044 			    "failed to add buffer for rx queue %d\n",
1045 			    rx_ring->qid);
1046 			break;
1047 		}
1048 		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1049 		    rx_ring->ring_size);
1050 	}
1051 
1052 	if (i < num) {
1053 		counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1054 		device_printf(adapter->pdev,
1055 		    "refilled rx queue %d with %d pages only\n",
1056 		    rx_ring->qid, i);
1057 	}
1058 
1059 	if (i != 0) {
1060 		wmb();
1061 		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1062 	}
1063 	rx_ring->next_to_use = next_to_use;
1064 	return (i);
1065 }
1066 
1067 static void
1068 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1069 {
1070 	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1071 	unsigned int i;
1072 
1073 	for (i = 0; i < rx_ring->ring_size; i++) {
1074 		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1075 
1076 		if (rx_info->mbuf)
1077 			ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1078 	}
1079 
1080 	return;
1081 }
1082 
1083 /**
1084  * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1085  * @adapter: network interface device structure
1086  *
1087  */
1088 static void
1089 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1090 {
1091 	struct ena_ring *rx_ring;
1092 	int i, rc, bufs_num;
1093 
1094 	for (i = 0; i < adapter->num_queues; i++) {
1095 		rx_ring = &adapter->rx_ring[i];
1096 		bufs_num = rx_ring->ring_size - 1;
1097 		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1098 
1099 		if (unlikely(rc != bufs_num))
1100 			device_printf(adapter->pdev,
1101 			    "refilling Queue %d failed. allocated %d buffers"
1102 			    " from: %d\n", i, rc, bufs_num);
1103 	}
1104 }
1105 
1106 static void
1107 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1108 {
1109 	int i;
1110 
1111 	for (i = 0; i < adapter->num_queues; i++)
1112 		ena_free_rx_bufs(adapter, i);
1113 	return;
1114 }
1115 
1116 /**
1117  * ena_free_tx_bufs - Free Tx Buffers per Queue
1118  * @adapter: network interface device structure
1119  * @qid: queue index
1120  **/
1121 static void
1122 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1123 {
1124 	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1125 
1126 	ENA_RING_MTX_LOCK(tx_ring);
1127 	for (int i = 0; i < tx_ring->ring_size; i++) {
1128 		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1129 
1130 		if (tx_info->mbuf == NULL)
1131 			continue;
1132 
1133 		ena_trace(ENA_DBG | ENA_TXPTH | ENA_RSC,
1134 		    "free uncompleted Tx mbufs qid[%d] idx: 0x%x", qid, i);
1135 
1136 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
1137 		m_free(tx_info->mbuf);
1138 		tx_info->mbuf = NULL;
1139 	}
1140 	ENA_RING_MTX_UNLOCK(tx_ring);
1141 
1142 	return;
1143 }
1144 
1145 static void
1146 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1147 {
1148 
1149 	for (int i = 0; i < adapter->num_queues; i++)
1150 		ena_free_tx_bufs(adapter, i);
1151 
1152 	return;
1153 }
1154 
1155 static void
1156 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1157 {
1158 	uint16_t ena_qid;
1159 	int i;
1160 
1161 	for (i = 0; i < adapter->num_queues; i++) {
1162 		ena_qid = ENA_IO_TXQ_IDX(i);
1163 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1164 	}
1165 }
1166 
1167 static void
1168 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1169 {
1170 	uint16_t ena_qid;
1171 	int i;
1172 
1173 	for (i = 0; i < adapter->num_queues; i++) {
1174 		ena_qid = ENA_IO_RXQ_IDX(i);
1175 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1176 	}
1177 }
1178 
1179 static void
1180 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1181 {
1182 	ena_destroy_all_tx_queues(adapter);
1183 	ena_destroy_all_rx_queues(adapter);
1184 }
1185 
1186 static int
1187 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
1188 {
1189 	struct ena_tx_buffer *tx_info = NULL;
1190 
1191 	if (likely(req_id < tx_ring->ring_size)) {
1192 		tx_info = &tx_ring->tx_buffer_info[req_id];
1193 		if (tx_info->mbuf)
1194 			return 0;
1195 	}
1196 
1197 	counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
1198 
1199 	return (EFAULT);
1200 }
1201 
1202 static int
1203 ena_create_io_queues(struct ena_adapter *adapter)
1204 {
1205 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1206 	struct ena_com_create_io_ctx ctx;
1207 	struct ena_ring *ring;
1208 	uint16_t ena_qid;
1209 	uint32_t msix_vector;
1210 	int rc, i;
1211 
1212 	/* Create TX queues */
1213 	for (i = 0; i < adapter->num_queues; i++) {
1214 		msix_vector = ENA_IO_IRQ_IDX(i);
1215 		ena_qid = ENA_IO_TXQ_IDX(i);
1216 		ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1217 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1218 		ctx.queue_size = adapter->tx_ring_size;
1219 		ctx.msix_vector = msix_vector;
1220 		ctx.qid = ena_qid;
1221 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1222 		if (rc) {
1223 			device_printf(adapter->pdev,
1224 			    "Failed to create io TX queue #%d rc: %d\n", i, rc);
1225 			goto err_tx;
1226 		}
1227 		ring = &adapter->tx_ring[i];
1228 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1229 		    &ring->ena_com_io_sq,
1230 		    &ring->ena_com_io_cq);
1231 		if (rc) {
1232 			device_printf(adapter->pdev,
1233 			    "Failed to get TX queue handlers. TX queue num"
1234 			    " %d rc: %d\n", i, rc);
1235 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1236 			goto err_tx;
1237 		}
1238 	}
1239 
1240 	/* Create RX queues */
1241 	for (i = 0; i < adapter->num_queues; i++) {
1242 		msix_vector = ENA_IO_IRQ_IDX(i);
1243 		ena_qid = ENA_IO_RXQ_IDX(i);
1244 		ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1245 		ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1246 		ctx.queue_size = adapter->rx_ring_size;
1247 		ctx.msix_vector = msix_vector;
1248 		ctx.qid = ena_qid;
1249 		rc = ena_com_create_io_queue(ena_dev, &ctx);
1250 		if (rc) {
1251 			device_printf(adapter->pdev,
1252 			    "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1253 			goto err_rx;
1254 		}
1255 
1256 		ring = &adapter->rx_ring[i];
1257 		rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1258 		    &ring->ena_com_io_sq,
1259 		    &ring->ena_com_io_cq);
1260 		if (rc) {
1261 			device_printf(adapter->pdev,
1262 			    "Failed to get RX queue handlers. RX queue num"
1263 			    " %d rc: %d\n", i, rc);
1264 			ena_com_destroy_io_queue(ena_dev, ena_qid);
1265 			goto err_rx;
1266 		}
1267 	}
1268 
1269 	return (0);
1270 
1271 err_rx:
1272 	while (i--)
1273 		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1274 	i = adapter->num_queues;
1275 err_tx:
1276 	while (i--)
1277 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1278 
1279 	return (ENXIO);
1280 }
1281 
1282 /**
1283  * ena_tx_cleanup - clear sent packets and corresponding descriptors
1284  * @tx_ring: ring for which we want to clean packets
1285  *
1286  * Once packets are sent, we ask the device in a loop for no longer used
1287  * descriptors. We find the related mbuf chain in a map (index in an array)
1288  * and free it, then update ring state.
1289  * This is performed in "endless" loop, updating ring pointers every
1290  * TX_COMMIT. The first check of free descriptor is performed before the actual
1291  * loop, then repeated at the loop end.
1292  **/
1293 static int
1294 ena_tx_cleanup(struct ena_ring *tx_ring)
1295 {
1296 	struct ena_adapter *adapter;
1297 	struct ena_com_io_cq* io_cq;
1298 	uint16_t next_to_clean;
1299 	uint16_t req_id;
1300 	uint16_t ena_qid;
1301 	unsigned int total_done = 0;
1302 	int rc;
1303 	int commit = TX_COMMIT;
1304 	int budget = TX_BUDGET;
1305 	int work_done;
1306 
1307 	adapter = tx_ring->que->adapter;
1308 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1309 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1310 	next_to_clean = tx_ring->next_to_clean;
1311 
1312 	do {
1313 		struct ena_tx_buffer *tx_info;
1314 		struct mbuf *mbuf;
1315 
1316 		rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
1317 		if (rc != 0)
1318 			break;
1319 
1320 		rc = validate_tx_req_id(tx_ring, req_id);
1321 		if (rc)
1322 			break;
1323 
1324 		tx_info = &tx_ring->tx_buffer_info[req_id];
1325 
1326 		mbuf = tx_info->mbuf;
1327 
1328 		tx_info->mbuf = NULL;
1329 		bintime_clear(&tx_info->timestamp);
1330 
1331 		if (tx_info->num_of_bufs != 0) {
1332 			/* Map is no longer required */
1333 			bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
1334 		}
1335 
1336 		m_freem(mbuf);
1337 
1338 		total_done += tx_info->tx_descs;
1339 
1340 		tx_ring->free_tx_ids[next_to_clean] = req_id;
1341 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1342 		    tx_ring->ring_size);
1343 
1344 		if (--commit == 0) {
1345 			commit = TX_COMMIT;
1346 			/* update ring state every TX_COMMIT descriptor */
1347 			tx_ring->next_to_clean = next_to_clean;
1348 			ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], total_done);
1349 			ena_com_update_dev_comp_head(io_cq);
1350 			total_done = 0;
1351 		}
1352 	} while (--budget);
1353 
1354 	work_done = TX_BUDGET - budget;
1355 
1356 	/* If there is still something to commit update ring state */
1357 	if (commit != TX_COMMIT) {
1358 		tx_ring->next_to_clean = next_to_clean;
1359 		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], total_done);
1360 		ena_com_update_dev_comp_head(io_cq);
1361 	}
1362 
1363 	taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
1364 
1365 	return (work_done);
1366 }
1367 
1368 static void
1369 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1370     struct mbuf *mbuf)
1371 {
1372 	struct ena_adapter *adapter = rx_ring->adapter;
1373 
1374 	if (adapter->rss_support) {
1375 		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
1376 
1377 		if (ena_rx_ctx->frag &&
1378 		    ena_rx_ctx->l3_proto != ENA_ETH_IO_L4_PROTO_UNKNOWN) {
1379 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1380 			return;
1381 		}
1382 
1383 		switch (ena_rx_ctx->l3_proto) {
1384 		case ENA_ETH_IO_L3_PROTO_IPV4:
1385 			switch (ena_rx_ctx->l4_proto) {
1386 			case ENA_ETH_IO_L4_PROTO_TCP:
1387 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1388 				break;
1389 			case ENA_ETH_IO_L4_PROTO_UDP:
1390 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1391 				break;
1392 			default:
1393 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1394 			}
1395 			break;
1396 		case ENA_ETH_IO_L3_PROTO_IPV6:
1397 			switch (ena_rx_ctx->l4_proto) {
1398 			case ENA_ETH_IO_L4_PROTO_TCP:
1399 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1400 				break;
1401 			case ENA_ETH_IO_L4_PROTO_UDP:
1402 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1403 				break;
1404 			default:
1405 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1406 			}
1407 			break;
1408 		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
1409 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1410 			break;
1411 		default:
1412 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1413 		}
1414 	} else {
1415 		mbuf->m_pkthdr.flowid = rx_ring->qid;
1416 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1417 	}
1418 }
1419 
1420 /**
1421  * ena_rx_mbuf - assemble mbuf from descriptors
1422  * @rx_ring: ring for which we want to clean packets
1423  * @ena_bufs: buffer info
1424  * @ena_rx_ctx: metadata for this packet(s)
1425  * @next_to_clean: ring pointer
1426  *
1427  **/
1428 static struct mbuf*
1429 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
1430     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
1431 {
1432 	struct mbuf *mbuf;
1433 	struct ena_rx_buffer *rx_info;
1434 	struct ena_adapter *adapter;
1435 	unsigned int len, buf = 0;
1436 	unsigned int descs = ena_rx_ctx->descs;
1437 
1438 	adapter = rx_ring->adapter;
1439 	rx_info = &rx_ring->rx_buffer_info[*next_to_clean];
1440 
1441 	ENA_ASSERT(rx_info->mbuf, "Invalid alloc frag buffer\n");
1442 
1443 	len = ena_bufs[0].len;
1444 	ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx",
1445 	    rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
1446 
1447 	mbuf = rx_info->mbuf;
1448 	mbuf->m_flags |= M_PKTHDR;
1449 	mbuf->m_pkthdr.len = len;
1450 	mbuf->m_len = len;
1451 	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
1452 
1453 	/* Fill mbuf with hash key and it's interpretation for optimization */
1454 	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
1455 
1456 	ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d",
1457 	    mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
1458 
1459 	/* DMA address is not needed anymore, unmap it */
1460 	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1461 
1462 	rx_info->mbuf = NULL;
1463 	*next_to_clean = ENA_RX_RING_IDX_NEXT(*next_to_clean,
1464 	    rx_ring->ring_size);
1465 
1466 	/*
1467 	 * While we have more than 1 descriptors for one rcvd packet, append
1468 	 * other mbufs to the main one
1469 	 */
1470 	while (--descs) {
1471 		rx_info = &rx_ring->rx_buffer_info[*next_to_clean];
1472 		len = ena_bufs[++buf].len;
1473 
1474 		if (!m_append(mbuf, len, rx_info->mbuf->m_data)) {
1475 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1476 			ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p",
1477 			    mbuf);
1478 		}
1479 		/* Free already appended mbuf, it won't be useful anymore */
1480 		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
1481 		m_freem(rx_info->mbuf);
1482 		rx_info->mbuf = NULL;
1483 
1484 		*next_to_clean = ENA_RX_RING_IDX_NEXT(*next_to_clean,
1485 		    rx_ring->ring_size);
1486 	}
1487 
1488 	return (mbuf);
1489 }
1490 
1491 /**
1492  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
1493  **/
1494 static inline void
1495 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1496     struct mbuf *mbuf)
1497 {
1498 
1499 	/* if IP and error */
1500 	if ((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1501 	    (ena_rx_ctx->l3_csum_err)) {
1502 		/* ipv4 checksum error */
1503 		mbuf->m_pkthdr.csum_flags = 0;
1504 		counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1505 		return;
1506 	}
1507 
1508 	/* if TCP/UDP */
1509 	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1510 	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1511 		if (ena_rx_ctx->l4_csum_err) {
1512 			/* TCP/UDP checksum error */
1513 			mbuf->m_pkthdr.csum_flags = 0;
1514 			counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1515 		} else {
1516 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1517 			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1518 		}
1519 	}
1520 
1521 	return;
1522 }
1523 
1524 /**
1525  * ena_rx_cleanup - handle rx irq
1526  * @arg: ring for which irq is being handled
1527  **/
1528 static int
1529 ena_rx_cleanup(struct ena_ring *rx_ring)
1530 {
1531 	struct ena_adapter *adapter;
1532 	struct mbuf *mbuf;
1533 	struct ena_com_rx_ctx ena_rx_ctx;
1534 	struct ena_com_io_cq* io_cq;
1535 	struct ena_com_io_sq* io_sq;
1536 	/* struct ena_eth_io_intr_reg intr_reg; */
1537 	if_t ifp;
1538 	uint16_t ena_qid;
1539 	uint16_t next_to_clean;
1540 	uint32_t refill_required;
1541 	uint32_t refill_threshold;
1542 	uint32_t do_if_input = 0;
1543 	unsigned int qid;
1544 	int rc;
1545 	int budget = RX_BUDGET;
1546 
1547 	adapter = rx_ring->que->adapter;
1548 	ifp = adapter->ifp;
1549 	qid = rx_ring->que->id;
1550 	ena_qid = ENA_IO_RXQ_IDX(qid);
1551 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1552 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1553 	next_to_clean = rx_ring->next_to_clean;
1554 
1555 	do {
1556 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1557 		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
1558 		ena_rx_ctx.descs = 0;
1559 		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
1560 
1561 		if (unlikely(rc))
1562 			goto error;
1563 
1564 		if (unlikely(ena_rx_ctx.descs == 0))
1565 			break;
1566 
1567 		/* Receive mbuf from the ring */
1568 		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
1569 		    &ena_rx_ctx, &next_to_clean);
1570 
1571 		/* Exit if we failed to retrieve a buffer */
1572 		if (unlikely(!mbuf)) {
1573 			next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean,
1574 			    ena_rx_ctx.descs, rx_ring->ring_size);
1575 			break;
1576 		}
1577 		ena_trace(ENA_DBG | ENA_RXPTH, "Rx: %d bytes",
1578 		    mbuf->m_pkthdr.len);
1579 
1580 		if ((ifp->if_capenable & IFCAP_RXCSUM) ||
1581 		    (ifp->if_capenable & IFCAP_RXCSUM_IPV6)) {
1582 			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
1583 		}
1584 
1585 		counter_u64_add(rx_ring->rx_stats.bytes, mbuf->m_pkthdr.len);
1586 		/*
1587 		 * LRO is only for IP/TCP packets and TCP checksum of the packet
1588 		 * should be computed by hardware.
1589 		 */
1590 		do_if_input = 1;
1591 		if ((ifp->if_capenable & IFCAP_LRO) &&
1592 		    (mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) &&
1593 		    ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP) {
1594 			/*
1595 			 * Send to the stack if:
1596 			 *  - LRO not enabled, or
1597 			 *  - no LRO resources, or
1598 			 *  - lro enqueue fails
1599 			 */
1600 			if (rx_ring->lro.lro_cnt != 0 &&
1601 			    tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)
1602 					do_if_input = 0;
1603 		}
1604 		if (do_if_input) {
1605 			ena_trace(ENA_DBG | ENA_RXPTH, "calling if_input() with mbuf %p",
1606 			    mbuf);
1607 			(*ifp->if_input)(ifp, mbuf);
1608 		}
1609 
1610 		counter_u64_add(rx_ring->rx_stats.cnt, 1);
1611 	} while (--budget);
1612 
1613 	rx_ring->next_to_clean = next_to_clean;
1614 
1615 	refill_required = ena_com_sq_empty_space(io_sq);
1616 	refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DEVIDER;
1617 
1618 	if (refill_required > refill_threshold) {
1619 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1620 		ena_refill_rx_bufs(rx_ring, refill_required);
1621 	}
1622 
1623 	tcp_lro_flush_all(&rx_ring->lro);
1624 
1625 	return (RX_BUDGET - budget);
1626 
1627 error:
1628 	counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
1629 	return (RX_BUDGET - budget);
1630 }
1631 
1632 /*********************************************************************
1633  *
1634  *  MSIX & Interrupt Service routine
1635  *
1636  **********************************************************************/
1637 
1638 /**
1639  * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1640  * @arg: interrupt number
1641  **/
1642 static void
1643 ena_intr_msix_mgmnt(void *arg)
1644 {
1645 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
1646 
1647 	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1648 	if (likely(adapter->running))
1649 		ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1650 }
1651 
1652 /**
1653  * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1654  * @arg: interrupt number
1655  **/
1656 static void
1657 ena_handle_msix(void *arg)
1658 {
1659 	struct ena_que	*que = arg;
1660 	struct ena_adapter *adapter = que->adapter;
1661 	if_t ifp = adapter->ifp;
1662 	struct ena_ring *tx_ring;
1663 	struct ena_ring *rx_ring;
1664 	struct ena_com_io_cq* io_cq;
1665 	struct ena_eth_io_intr_reg intr_reg;
1666 	int qid, ena_qid;
1667 	int txc, rxc, i;
1668 
1669 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1670 		return;
1671 
1672 	ena_trace(ENA_DBG, "MSI-X TX/RX routine");
1673 
1674 	tx_ring = que->tx_ring;
1675 	rx_ring = que->rx_ring;
1676 	qid = que->id;
1677 	ena_qid = ENA_IO_TXQ_IDX(qid);
1678 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1679 
1680 	for (i = 0; i < CLEAN_BUDGET; ++i) {
1681 		rxc = ena_rx_cleanup(rx_ring);
1682 
1683 		/* Protection from calling ena_tx_cleanup from ena_start_xmit */
1684 		ENA_RING_MTX_LOCK(tx_ring);
1685 		txc = ena_tx_cleanup(tx_ring);
1686 		ENA_RING_MTX_UNLOCK(tx_ring);
1687 
1688 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1689 			return;
1690 
1691 		if (txc != TX_BUDGET && rxc != RX_BUDGET)
1692 		       break;
1693 	}
1694 
1695 	/* Signal that work is done and unmask interrupt */
1696 	ena_com_update_intr_reg(&intr_reg,
1697 	    RX_IRQ_INTERVAL,
1698 	    TX_IRQ_INTERVAL,
1699 	    true);
1700 	ena_com_unmask_intr(io_cq, &intr_reg);
1701 }
1702 
1703 static int
1704 ena_enable_msix(struct ena_adapter *adapter)
1705 {
1706 	device_t dev = adapter->pdev;
1707 	int i, msix_vecs, rc = 0;
1708 
1709 	/* Reserved the max msix vectors we might need */
1710 	msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues);
1711 
1712 	adapter->msix_entries = ENA_MEM_ALLOC(adapter->ena_dev->dmadev,
1713 	    msix_vecs * sizeof(struct msix_entry));
1714 	if (!adapter->msix_entries) {
1715 		device_printf(dev,
1716 		    "Failed to allocate msix_entries, vectors %d\n", msix_vecs);
1717 		rc = ENOMEM;
1718 		goto error;
1719 	}
1720 	device_printf(dev, "Allocated msix_entries, vectors (cnt: %d)\n",
1721 	    msix_vecs);
1722 
1723 	for (i = 0; i < msix_vecs; i++) {
1724 		adapter->msix_entries[i].entry = i;
1725 		/* Vectors must start from 1 */
1726 		adapter->msix_entries[i].vector = i + 1;
1727 	}
1728 
1729 	rc = pci_alloc_msix(dev, &msix_vecs);
1730 	if (rc != 0) {
1731 		device_printf(dev,
1732 		    "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc);
1733 		ENA_MEM_FREE(adapter->ena_dev->dmadev, adapter->msix_entries);
1734 		adapter->msix_entries = NULL;
1735 		rc = ENOSPC;
1736 		goto error;
1737 	}
1738 
1739 	adapter->msix_vecs = msix_vecs;
1740 	adapter->msix_enabled = true;
1741 
1742 error:
1743 	return (rc);
1744 }
1745 
1746 static void
1747 ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1748 {
1749 
1750 	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1751 	    ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1752 	    device_get_nameunit(adapter->pdev));
1753 	/*
1754 	 * Handler is NULL on purpose, it will be set
1755 	 * when mgmnt interrupt is acquired
1756 	 */
1757 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL;
1758 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1759 	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1760 	    adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
1761 
1762 	return;
1763 }
1764 
1765 static void
1766 ena_setup_io_intr(struct ena_adapter *adapter)
1767 {
1768 	static int last_bind_cpu = -1;
1769 	int irq_idx;
1770 	ena_trace(ENA_DBG, "enter");
1771 
1772 	for (int i = 0; i < adapter->num_queues; i++) {
1773 		irq_idx = ENA_IO_IRQ_IDX(i);
1774 
1775 		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1776 		    "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i);
1777 		adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1778 		adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1779 		adapter->irq_tbl[irq_idx].vector =
1780 		    adapter->msix_entries[irq_idx].vector;
1781 		ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1782 		    adapter->msix_entries[irq_idx].vector);
1783 #ifdef	RSS
1784 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1785 		    rss_getcpu(i % rss_getnumbuckets());
1786 #else
1787 		/*
1788 		 * We still want to bind rings to the corresponding cpu
1789 		 * using something similar to the RSS round-robin technique.
1790 		 */
1791 		if (last_bind_cpu < 0)
1792 			last_bind_cpu = CPU_FIRST();
1793 		adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1794 		    last_bind_cpu;
1795 		last_bind_cpu = CPU_NEXT(last_bind_cpu);
1796 #endif
1797 	}
1798 
1799 	return;
1800 }
1801 
1802 static int
1803 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1804 {
1805 	struct ena_irq *irq;
1806 	unsigned long flags;
1807 	int rc, rcc;
1808 
1809 	flags = RF_ACTIVE | RF_SHAREABLE;
1810 
1811 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1812 	irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1813 	    &irq->vector, flags);
1814 
1815 	if (irq->res == NULL) {
1816 		device_printf(adapter->pdev, "could not allocate "
1817 		    "irq vector: %d\n", irq->vector);
1818 		rc = ENXIO;
1819 		goto exit_res;
1820 	}
1821 
1822 	if ((rc = bus_activate_resource(adapter->pdev, SYS_RES_IRQ, irq->vector,
1823 	    irq->res)) != 0) {
1824 		device_printf(adapter->pdev, "could not activate "
1825 		    "irq vector: %d\n", irq->vector);
1826 		goto exit_intr;
1827 	}
1828 
1829 	if ((rc = bus_setup_intr(adapter->pdev, irq->res,
1830 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
1831 	    ena_intr_msix_mgmnt, irq->data, &irq->cookie)) != 0) {
1832 		device_printf(adapter->pdev, "failed to register "
1833 		    "interrupt handler for irq %ju: %d\n",
1834 		    rman_get_start(irq->res), rc);
1835 		goto exit_intr;
1836 	}
1837 	irq->requested = true;
1838 
1839 	return (rc);
1840 
1841 exit_intr:
1842 	device_printf(adapter->pdev, "exit_intr: releasing resource"
1843 	    " for irq %d\n", irq->vector);
1844 	rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1845 	    irq->vector, irq->res);
1846 	if (rcc)
1847 		device_printf(adapter->pdev, "dev has no parent while "
1848 		    "releasing res for irq: %d\n", irq->vector);
1849 	irq->res = NULL;
1850 
1851 exit_res:
1852 	return (rc);
1853 }
1854 
1855 static int
1856 ena_request_io_irq(struct ena_adapter *adapter)
1857 {
1858 	struct ena_irq *irq;
1859 	unsigned long flags = 0;
1860 	int rc = 0, i, rcc;
1861 
1862 	if (!adapter->msix_enabled) {
1863 		device_printf(adapter->pdev, "failed to request irq\n");
1864 		return (EINVAL);
1865 	} else {
1866 		flags = RF_ACTIVE | RF_SHAREABLE;
1867 	}
1868 
1869 	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1870 		irq = &adapter->irq_tbl[i];
1871 
1872 		if (irq->requested)
1873 			continue;
1874 
1875 		irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
1876 		    &irq->vector, flags);
1877 		if (irq->res == NULL) {
1878 			device_printf(adapter->pdev, "could not allocate "
1879 			    "irq vector: %d\n", irq->vector);
1880 			goto err;
1881 		}
1882 
1883 		if ((rc = bus_setup_intr(adapter->pdev, irq->res,
1884 			    INTR_TYPE_NET | INTR_MPSAFE, NULL, irq->handler,
1885 			    irq->data, &irq->cookie)) != 0) {
1886 			device_printf(adapter->pdev, "failed to register "
1887 			    "interrupt handler for irq %ju: %d\n",
1888 			    rman_get_start(irq->res), rc);
1889 			goto err;
1890 		}
1891 		irq->requested = true;
1892 
1893 #ifdef	RSS
1894 		device_printf(adapter->pdev, "queue %d - RSS bucket %d\n",
1895 		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1896 #else
1897 		device_printf(adapter->pdev, "queue %d - cpu %d\n",
1898 		    i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
1899 #endif
1900 	}
1901 
1902 	return (rc);
1903 
1904 err:
1905 
1906 	for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) {
1907 		irq = &adapter->irq_tbl[i];
1908 		rcc = 0;
1909 
1910 		/* Once we entered err: section and irq->requested is true we
1911 		   free both intr and resources */
1912 		if (irq->requested == true)
1913 			rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1914 		if (rcc)
1915 			device_printf(adapter->pdev, "could not release"
1916 			    " irq: %d, error: %d\n", irq->vector, rcc);
1917 
1918 		/* If we entred err: section without irq->requested set we know
1919 		   it was bus_alloc_resource_any() that needs cleanup, provided
1920 		   res is not NULL. In case res is NULL no work in needed in
1921 		   this iteration */
1922 		rcc = 0;
1923 		if (irq->res != NULL) {
1924 			rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1925 			    irq->vector, irq->res);
1926 		}
1927 		if (rcc)
1928 			device_printf(adapter->pdev, "dev has no parent while "
1929 			    "releasing res for irq: %d\n", irq->vector);
1930 		irq->requested = false;
1931 		irq->res = NULL;
1932 	}
1933 
1934 	return (rc);
1935 }
1936 
1937 static void
1938 ena_free_mgmnt_irq(struct ena_adapter *adapter)
1939 {
1940 	struct ena_irq *irq;
1941 	int rc;
1942 
1943 	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1944 	if (irq->requested) {
1945 		ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n",
1946 		    irq->vector);
1947 		rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie);
1948 		if (rc)
1949 			device_printf(adapter->pdev, "failed to tear "
1950 			    "down irq: %d\n", irq->vector);
1951 		irq->requested = 0;
1952 	}
1953 
1954 	if (irq->res != NULL) {
1955 		ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n",
1956 		    irq->vector);
1957 		rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1958 		    irq->vector, irq->res);
1959 		irq->res = NULL;
1960 		if (rc)
1961 			device_printf(adapter->pdev, "dev has no parent while "
1962 			    "releasing res for irq: %d\n", irq->vector);
1963 	}
1964 
1965 	return;
1966 }
1967 
1968 static void
1969 ena_free_io_irq(struct ena_adapter *adapter)
1970 {
1971 	struct ena_irq *irq;
1972 	int rc;
1973 
1974 	for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
1975 		irq = &adapter->irq_tbl[i];
1976 		if (irq->requested) {
1977 			ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n",
1978 			    irq->vector);
1979 			rc = bus_teardown_intr(adapter->pdev, irq->res,
1980 			    irq->cookie);
1981 			if (rc) {
1982 				device_printf(adapter->pdev, "failed to tear "
1983 				    "down irq: %d\n", irq->vector);
1984 			}
1985 			irq->requested = 0;
1986 		}
1987 
1988 		if (irq->res != NULL) {
1989 			ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n",
1990 			    irq->vector);
1991 			rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ,
1992 			    irq->vector, irq->res);
1993 			irq->res = NULL;
1994 			if (rc) {
1995 				device_printf(adapter->pdev, "dev has no parent"
1996 				    " while releasing res for irq: %d\n",
1997 				    irq->vector);
1998 			}
1999 		}
2000 	}
2001 
2002 	return;
2003 }
2004 
2005 static void
2006 ena_free_irqs(struct ena_adapter* adapter)
2007 {
2008 
2009 	ena_free_io_irq(adapter);
2010 	ena_free_mgmnt_irq(adapter);
2011 	ena_disable_msix(adapter);
2012 }
2013 
2014 static void
2015 ena_disable_msix(struct ena_adapter *adapter)
2016 {
2017 
2018 	pci_release_msi(adapter->pdev);
2019 
2020 	adapter->msix_vecs = 0;
2021 	ENA_MEM_FREE(adapter->ena_dev->dmadev, adapter->msix_entries);
2022 	adapter->msix_entries = NULL;
2023 }
2024 
2025 static void
2026 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2027 {
2028 	struct ena_com_io_cq* io_cq;
2029 	struct ena_eth_io_intr_reg intr_reg;
2030 	uint16_t ena_qid;
2031 	int i;
2032 
2033 	/* Unmask interrupts for all queues */
2034 	for (i = 0; i < adapter->num_queues; i++) {
2035 		ena_qid = ENA_IO_TXQ_IDX(i);
2036 		io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2037 		ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2038 		ena_com_unmask_intr(io_cq, &intr_reg);
2039 	}
2040 }
2041 
2042 /* Configure the Rx forwarding */
2043 static int ena_rss_configure(struct ena_adapter *adapter)
2044 {
2045 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2046 	int rc;
2047 
2048 	/* Set indirect table */
2049 	rc = ena_com_indirect_table_set(ena_dev);
2050 	if (unlikely(rc && rc != EPERM))
2051 		return rc;
2052 
2053 	/* Configure hash function (if supported) */
2054 	rc = ena_com_set_hash_function(ena_dev);
2055 	if (unlikely(rc && (rc != EPERM)))
2056 		return rc;
2057 
2058 	/* Configure hash inputs (if supported) */
2059 	rc = ena_com_set_hash_ctrl(ena_dev);
2060 	if (unlikely(rc && (rc != EPERM)))
2061 		return rc;
2062 
2063 	return 0;
2064 }
2065 
2066 static void
2067 ena_update_hw_stats(void *arg, int pending)
2068 {
2069 	struct ena_adapter *adapter = arg;
2070 	int rc;
2071 
2072 	for (;;) {
2073 		if (!adapter->up)
2074 			return;
2075 
2076 		rc = ena_update_stats_counters(adapter);
2077 		if (rc)
2078 			ena_trace(ENA_WARNING,
2079 			    "Error updating stats counters, rc = %d", rc);
2080 
2081 		pause("ena update hw stats", hz);
2082 	}
2083 }
2084 
2085 static int
2086 ena_up_complete(struct ena_adapter *adapter)
2087 {
2088 	int rc;
2089 
2090 	if (adapter->rss_support) {
2091 		rc = ena_rss_configure(adapter);
2092 		if (rc)
2093 			return (rc);
2094 	}
2095 
2096 	ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
2097 	ena_refill_all_rx_bufs(adapter);
2098 	ena_unmask_all_io_irqs(adapter);
2099 
2100 	return (0);
2101 }
2102 
2103 static int
2104 ena_up(struct ena_adapter *adapter)
2105 {
2106 	int rc = 0;
2107 
2108 	if (!device_is_attached(adapter->pdev)) {
2109 		device_printf(adapter->pdev, "device is not attached!\n");
2110 		return (ENXIO);
2111 	}
2112 
2113 	if (!adapter->running) {
2114 		device_printf(adapter->pdev, "device is not running!\n");
2115 		return (ENXIO);
2116 	}
2117 
2118 	if (!adapter->up) {
2119 		device_printf(adapter->pdev, "device is going UP\n");
2120 
2121 		/* setup interrupts for IO queues */
2122 		ena_setup_io_intr(adapter);
2123 		rc = ena_request_io_irq(adapter);
2124 		if (rc) {
2125 			ena_trace(ENA_ALERT, "err_req_irq");
2126 			goto err_req_irq;
2127 		}
2128 
2129 		/* allocate transmit descriptors */
2130 		rc = ena_setup_all_tx_resources(adapter);
2131 		if (rc) {
2132 			ena_trace(ENA_ALERT, "err_setup_tx");
2133 			goto err_setup_tx;
2134 		}
2135 
2136 		/* allocate receive descriptors */
2137 		rc = ena_setup_all_rx_resources(adapter);
2138 		if (rc) {
2139 			ena_trace(ENA_ALERT, "err_setup_rx");
2140 			goto err_setup_rx;
2141 		}
2142 
2143 		/* create IO queues for Rx & Tx */
2144 		rc = ena_create_io_queues(adapter);
2145 		if (rc) {
2146 			ena_trace(ENA_ALERT,
2147 			    "create IO queues failed");
2148 			goto err_io_que;
2149 		}
2150 
2151 		if (adapter->link_status)
2152 			if_link_state_change(adapter->ifp, LINK_STATE_UP);
2153 
2154 		rc = ena_up_complete(adapter);
2155 		if (rc)
2156 			goto err_up_complete;
2157 
2158 		counter_u64_add(adapter->dev_stats.interface_up, 1);
2159 
2160 		ena_update_hwassist(adapter);
2161 
2162 		if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
2163 		    IFF_DRV_OACTIVE);
2164 
2165 		callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
2166 		    ena_timer_service, (void *)adapter, 0);
2167 
2168 		taskqueue_enqueue(adapter->stats_tq, &adapter->stats_task);
2169 
2170 		adapter->up = true;
2171 	}
2172 
2173 	return (0);
2174 
2175 err_up_complete:
2176 	ena_destroy_all_io_queues(adapter);
2177 err_io_que:
2178 	ena_free_all_rx_resources(adapter);
2179 err_setup_rx:
2180 	ena_free_all_tx_resources(adapter);
2181 err_setup_tx:
2182 	ena_free_io_irq(adapter);
2183 err_req_irq:
2184 	return (rc);
2185 }
2186 
2187 int
2188 ena_update_stats_counters(struct ena_adapter *adapter)
2189 {
2190 	struct ena_admin_basic_stats ena_stats;
2191 	struct ena_hw_stats *stats = &adapter->hw_stats;
2192 	int rc = 0;
2193 
2194 	if (!adapter->up)
2195 		return (rc);
2196 
2197 	rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
2198 	if (rc)
2199 		return (rc);
2200 
2201 	stats->tx_bytes = ((uint64_t)ena_stats.tx_bytes_high << 32) |
2202 		ena_stats.tx_bytes_low;
2203 	stats->rx_bytes = ((uint64_t)ena_stats.rx_bytes_high << 32) |
2204 		ena_stats.rx_bytes_low;
2205 
2206 	stats->rx_packets = ((uint64_t)ena_stats.rx_pkts_high << 32) |
2207 		ena_stats.rx_pkts_low;
2208 	stats->tx_packets = ((uint64_t)ena_stats.tx_pkts_high << 32) |
2209 		ena_stats.tx_pkts_low;
2210 
2211 	stats->rx_drops = ((uint64_t)ena_stats.rx_drops_high << 32) |
2212 		ena_stats.rx_drops_low;
2213 
2214 	return (0);
2215 }
2216 
2217 static uint64_t
2218 ena_get_counter(if_t ifp, ift_counter cnt)
2219 {
2220 	struct ena_adapter *adapter;
2221 	struct ena_hw_stats *stats;
2222 
2223 	adapter = if_getsoftc(ifp);
2224 	stats = &adapter->hw_stats;
2225 
2226 	switch (cnt) {
2227 	case IFCOUNTER_IPACKETS:
2228 		return (stats->rx_packets);
2229 	case IFCOUNTER_OPACKETS:
2230 		return (stats->tx_packets);
2231 	case IFCOUNTER_IBYTES:
2232 		return (stats->rx_bytes);
2233 	case IFCOUNTER_OBYTES:
2234 		return (stats->tx_bytes);
2235 	case IFCOUNTER_IQDROPS:
2236 		return (stats->rx_drops);
2237 	default:
2238 		return (if_get_counter_default(ifp, cnt));
2239 	}
2240 }
2241 
2242 static int
2243 ena_media_change(if_t ifp)
2244 {
2245 	/* Media Change is not supported by firmware */
2246 	return (0);
2247 }
2248 
2249 static void
2250 ena_media_status(if_t ifp, struct ifmediareq *ifmr)
2251 {
2252 	struct ena_adapter *adapter = if_getsoftc(ifp);
2253 	ena_trace(ENA_DBG, "enter");
2254 
2255 	ENA_DEV_LOCK;
2256 
2257 	ifmr->ifm_status = IFM_AVALID;
2258 	ifmr->ifm_active = IFM_ETHER;
2259 
2260 	if (!adapter->link_status) {
2261 		ENA_DEV_UNLOCK;
2262 		ena_trace(ENA_WARNING, "link_status = false");
2263 		return;
2264 	}
2265 
2266 	ifmr->ifm_status |= IFM_ACTIVE;
2267 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
2268 
2269 	ENA_DEV_UNLOCK;
2270 
2271 	return;
2272 }
2273 
2274 static void
2275 ena_init(void *arg)
2276 {
2277 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
2278 
2279 	if (adapter->up == false)
2280 		ena_up(adapter);
2281 
2282 	return;
2283 }
2284 
2285 static int
2286 ena_ioctl(if_t ifp, u_long command, caddr_t data)
2287 {
2288 	struct ena_adapter *adapter;
2289 	struct ifreq *ifr;
2290 	int rc;
2291 
2292 	adapter = ifp->if_softc;
2293 	ifr = (struct ifreq *)data;
2294 
2295 	/*
2296 	 * Acquiring lock to prevent from running up and down routines parallel.
2297 	 */
2298 	rc = 0;
2299 	switch (command) {
2300 	case SIOCSIFMTU:
2301 		sx_xlock(&adapter->ioctl_sx);
2302 		ena_down(adapter);
2303 
2304 		ena_change_mtu(ifp, ifr->ifr_mtu);
2305 
2306 		rc = ena_up(adapter);
2307 		sx_unlock(&adapter->ioctl_sx);
2308 		break;
2309 
2310 	case SIOCSIFFLAGS:
2311 		if (ifp->if_flags & IFF_UP) {
2312 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2313 				if (ifp->if_flags & (IFF_PROMISC |
2314 				    IFF_ALLMULTI)) {
2315 					device_printf(adapter->pdev,
2316 					    "ioctl promisc/allmulti\n");
2317 				}
2318 			} else {
2319 				sx_xlock(&adapter->ioctl_sx);
2320 				rc = ena_up(adapter);
2321 				sx_unlock(&adapter->ioctl_sx);
2322 			}
2323 		} else {
2324 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2325 				sx_xlock(&adapter->ioctl_sx);
2326 				ena_down(adapter);
2327 				sx_unlock(&adapter->ioctl_sx);
2328 			}
2329 		}
2330 		break;
2331 
2332 	case SIOCADDMULTI:
2333 	case SIOCDELMULTI:
2334 		break;
2335 
2336 	case SIOCSIFMEDIA:
2337 	case SIOCGIFMEDIA:
2338 		rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
2339 		break;
2340 
2341 	case SIOCSIFCAP:
2342 		{
2343 			int reinit = 0;
2344 
2345 			if (ifr->ifr_reqcap != ifp->if_capenable) {
2346 				ifp->if_capenable = ifr->ifr_reqcap;
2347 				reinit = 1;
2348 			}
2349 
2350 			if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2351 				sx_xlock(&adapter->ioctl_sx);
2352 				ena_down(adapter);
2353 				rc = ena_up(adapter);
2354 				sx_unlock(&adapter->ioctl_sx);
2355 			}
2356 		}
2357 
2358 		break;
2359 	default:
2360 		rc = ether_ioctl(ifp, command, data);
2361 		break;
2362 	}
2363 
2364 	return (rc);
2365 }
2366 
2367 static int
2368 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2369 {
2370 	int caps = 0;
2371 
2372 	if (feat->offload.tx &
2373 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2374 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2375 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK))
2376 		caps |= IFCAP_TXCSUM;
2377 
2378 	if (feat->offload.tx &
2379 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2380 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK))
2381 		caps |= IFCAP_TXCSUM_IPV6;
2382 
2383 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
2384 		caps |= IFCAP_TSO4;
2385 
2386 	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
2387 		caps |= IFCAP_TSO6;
2388 
2389 	if (feat->offload.rx_supported &
2390 	    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2391 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK))
2392 		caps |= IFCAP_RXCSUM;
2393 
2394 	if (feat->offload.rx_supported &
2395 	    ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
2396 		caps |= IFCAP_RXCSUM_IPV6;
2397 
2398 	caps |= IFCAP_LRO | IFCAP_JUMBO_MTU;
2399 
2400 	return (caps);
2401 }
2402 
2403 static void
2404 ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp)
2405 {
2406 
2407 	host_info->supported_network_features[0] =
2408 	    (uint32_t)if_getcapabilities(ifp);
2409 }
2410 
2411 static void
2412 ena_update_hwassist(struct ena_adapter *adapter)
2413 {
2414 	if_t ifp = adapter->ifp;
2415 	uint32_t feat = adapter->tx_offload_cap;
2416 	int cap = if_getcapenable(ifp);
2417 	int flags = 0;
2418 
2419 	if_clearhwassist(ifp);
2420 
2421 	if (cap & IFCAP_TXCSUM) {
2422 		if (feat & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)
2423 			flags |= CSUM_IP;
2424 		if (feat &
2425 		    (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2426 		    ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK))
2427 			flags |= CSUM_IP_UDP | CSUM_IP_TCP;
2428 	}
2429 
2430 	if (cap & IFCAP_TXCSUM_IPV6)
2431 		flags |= CSUM_IP6_UDP | CSUM_IP6_TCP;
2432 
2433 	if (cap & IFCAP_TSO4)
2434 		flags |= CSUM_IP_TSO;
2435 
2436 	if (cap & IFCAP_TSO6)
2437 		flags |= CSUM_IP6_TSO;
2438 
2439 	if_sethwassistbits(ifp, flags, 0);
2440 }
2441 
2442 static int
2443 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2444     struct ena_com_dev_get_features_ctx *feat)
2445 {
2446 	if_t ifp;
2447 	int caps = 0;
2448 
2449 	ena_trace(ENA_DBG, "enter");
2450 
2451 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2452 	if (ifp == 0) {
2453 		device_printf(pdev, "can not allocate ifnet structure\n");
2454 		return (ENXIO);
2455 	}
2456 	if_initname(ifp, device_get_name(pdev), device_get_unit(pdev));
2457 	if_setdev(ifp, pdev);
2458 	if_setsoftc(ifp, adapter);
2459 
2460 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2461 	if_setinitfn(ifp, ena_init);
2462 	if_settransmitfn(ifp, ena_mq_start);
2463 	if_setqflushfn(ifp, ena_qflush);
2464 	if_setioctlfn(ifp, ena_ioctl);
2465 	if_setgetcounterfn(ifp, ena_get_counter);
2466 
2467 	if_setsendqlen(ifp, adapter->tx_ring_size);
2468 	if_setsendqready(ifp);
2469 	if_setmtu(ifp, ETHERMTU);
2470 	if_setbaudrate(ifp, 0);
2471 	/* Zeroize capabilities... */
2472 	if_setcapabilities(ifp, 0);
2473 	if_setcapenable(ifp, 0);
2474 	/* check hardware support */
2475 	caps = ena_get_dev_offloads(feat);
2476 	/* ... and set them */
2477 	if_setcapabilitiesbit(ifp, caps, 0);
2478 
2479 	/* TSO parameters */
2480 	ifp->if_hw_tsomax = ENA_TSO_MAXSIZE;
2481 	ifp->if_hw_tsomaxsegcount = ENA_TSO_NSEGS;
2482 	ifp->if_hw_tsomaxsegsize = MCLBYTES;
2483 
2484 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2485 	if_setcapenable(ifp, if_getcapabilities(ifp));
2486 
2487 	/*
2488 	 * Specify the media types supported by this adapter and register
2489 	 * callbacks to update media and link information
2490 	 */
2491 	ifmedia_init(&adapter->media, IFM_IMASK,
2492 	    ena_media_change, ena_media_status);
2493 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2494 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2495 
2496 	ether_ifattach(ifp, adapter->mac_addr);
2497 
2498 	return (0);
2499 }
2500 
2501 static void
2502 ena_down(struct ena_adapter *adapter)
2503 {
2504 
2505 	if (adapter->up) {
2506 		device_printf(adapter->pdev, "device is going DOWN\n");
2507 
2508 		callout_drain(&adapter->timer_service);
2509 
2510 		adapter->up = false;
2511 		if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
2512 		    IFF_DRV_RUNNING);
2513 
2514 		/* Drain task responsible for updating hw stats */
2515 		while (taskqueue_cancel(adapter->stats_tq, &adapter->stats_task, NULL))
2516 			taskqueue_drain(adapter->stats_tq, &adapter->stats_task);
2517 
2518 		ena_free_io_irq(adapter);
2519 
2520 		ena_destroy_all_io_queues(adapter);
2521 
2522 		ena_free_all_tx_bufs(adapter);
2523 		ena_free_all_rx_bufs(adapter);
2524 		ena_free_all_tx_resources(adapter);
2525 		ena_free_all_rx_resources(adapter);
2526 
2527 		counter_u64_add(adapter->dev_stats.interface_down, 1);
2528 	}
2529 
2530 	return;
2531 }
2532 
2533 static void
2534 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
2535 {
2536 	struct ena_com_tx_meta *ena_meta;
2537 	struct ether_vlan_header *eh;
2538 	u32 mss;
2539 	bool offload;
2540 	uint16_t etype;
2541 	int ehdrlen;
2542 	struct ip *ip;
2543 	int iphlen;
2544 	struct tcphdr *th;
2545 
2546 	offload = false;
2547 	ena_meta = &ena_tx_ctx->ena_meta;
2548 	mss = mbuf->m_pkthdr.tso_segsz;
2549 
2550 	if (mss != 0)
2551 		offload = true;
2552 
2553 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
2554 		offload = true;
2555 
2556 	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
2557 		offload = true;
2558 
2559 	if (offload == false) {
2560 		ena_tx_ctx->meta_valid = 0;
2561 		return;
2562 	}
2563 
2564 	/* Determine where frame payload starts. */
2565 	eh = mtod(mbuf, struct ether_vlan_header *);
2566 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2567 		etype = ntohs(eh->evl_proto);
2568 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2569 	} else {
2570 		etype = ntohs(eh->evl_encap_proto);
2571 		ehdrlen = ETHER_HDR_LEN;
2572 	}
2573 
2574 	ip = (struct ip *)(mbuf->m_data + ehdrlen);
2575 	iphlen = ip->ip_hl << 2;
2576 	th = (struct tcphdr *)((caddr_t)ip + iphlen);
2577 
2578 	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
2579 		ena_tx_ctx->l3_csum_enable = 1;
2580 	}
2581 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
2582 		ena_tx_ctx->tso_enable = 1;
2583 		ena_meta->l4_hdr_len = (th->th_off);
2584 	}
2585 
2586 	switch (etype) {
2587 	case ETHERTYPE_IP:
2588 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2589 		if (ip->ip_off == 0)
2590 			ena_tx_ctx->df = 1;
2591 		break;
2592 	case ETHERTYPE_IPV6:
2593 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2594 
2595 	default:
2596 		break;
2597 	}
2598 
2599 	if (ip->ip_p == IPPROTO_TCP) {
2600 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2601 		if (mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
2602 		    ena_tx_ctx->l4_csum_enable = 1;
2603 		else
2604 		    ena_tx_ctx->l4_csum_enable = 0;
2605 	} else if (ip->ip_p == IPPROTO_UDP) {
2606 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2607 		if (mbuf->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
2608 		    ena_tx_ctx->l4_csum_enable = 1;
2609 		else
2610 		    ena_tx_ctx->l4_csum_enable = 0;
2611 	} else {
2612 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
2613 		ena_tx_ctx->l4_csum_enable = 0;
2614 	}
2615 
2616 	ena_meta->mss = mss;
2617 	ena_meta->l3_hdr_len = iphlen;
2618 	ena_meta->l3_hdr_offset = ehdrlen;
2619 	ena_tx_ctx->meta_valid = 1;
2620 }
2621 
2622 static int
2623 ena_check_and_defragment_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2624 {
2625 	struct ena_adapter *adapter;
2626 	struct mbuf *defrag_mbuf;
2627 	int num_frags;
2628 
2629 	adapter = tx_ring->adapter;
2630 	num_frags = ena_mbuf_count(*mbuf);
2631 
2632 	/* One segment must be reserved for configuration descriptor. */
2633 	if (num_frags < adapter->max_tx_sgl_size)
2634 		return (0);
2635 	counter_u64_add(tx_ring->tx_stats.defragment, 1);
2636 
2637 	defrag_mbuf = m_defrag(*mbuf, M_NOWAIT);
2638 	if (defrag_mbuf == NULL) {
2639 		counter_u64_add(tx_ring->tx_stats.defragment_err, 1);
2640 		return (ENOMEM);
2641 	}
2642 
2643 	/* If mbuf was defragmented succesfully, original mbuf is released. */
2644 	*mbuf = defrag_mbuf;
2645 
2646 	return (0);
2647 }
2648 
2649 static int
2650 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2651 {
2652 	struct ena_adapter *adapter;
2653 	struct ena_tx_buffer *tx_info;
2654 	struct ena_com_tx_ctx ena_tx_ctx;
2655 	struct ena_com_dev *ena_dev;
2656 	struct ena_com_buf *ena_buf;
2657 	struct ena_com_io_sq* io_sq;
2658 	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
2659 	void *push_hdr;
2660 	uint16_t next_to_use;
2661 	uint16_t req_id;
2662 	uint16_t push_len;
2663 	uint16_t ena_qid;
2664 	uint32_t len, nsegs, header_len;
2665 	int i, rc;
2666 	int nb_hw_desc;
2667 
2668 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2669 	adapter = tx_ring->que->adapter;
2670 	ena_dev = adapter->ena_dev;
2671 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
2672 
2673 	ENA_ASSERT(*mbuf, "mbuf is NULL\n");
2674 
2675 	rc = ena_check_and_defragment_mbuf(tx_ring, mbuf);
2676 	if (rc) {
2677 		ena_trace(ENA_WARNING,
2678 		    "Failed to defragment mbuf! err: %d", rc);
2679 		return (rc);
2680 	}
2681 
2682 	next_to_use = tx_ring->next_to_use;
2683 	req_id = tx_ring->free_tx_ids[next_to_use];
2684 	tx_info = &tx_ring->tx_buffer_info[req_id];
2685 
2686 	tx_info->mbuf = *mbuf;
2687 	tx_info->num_of_bufs = 0;
2688 
2689 	ena_buf = tx_info->bufs;
2690 	len = (*mbuf)->m_len;
2691 
2692 	ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len);
2693 
2694 	push_len = 0;
2695 	header_len = min_t(uint32_t, len, tx_ring->tx_max_header_size);
2696 	push_hdr = NULL;
2697 
2698 	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map,
2699 	    *mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
2700 
2701 	if (rc || (nsegs == 0)) {
2702 		ena_trace(ENA_WARNING,
2703 		    "dmamap load failed! err: %d nsegs: %d", rc, nsegs);
2704 		counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
2705 		tx_info->mbuf = NULL;
2706 		if (rc == ENOMEM)
2707 			return (ENA_COM_NO_MEM);
2708 		else
2709 			return (ENA_COM_INVAL);
2710 	}
2711 
2712 	for (i = 0; i < nsegs; i++) {
2713 		ena_buf->len = segs[i].ds_len;
2714 		ena_buf->paddr = segs[i].ds_addr;
2715 		ena_buf++;
2716 	}
2717 	tx_info->num_of_bufs = nsegs;
2718 
2719 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2720 	ena_tx_ctx.ena_bufs = tx_info->bufs;
2721 	ena_tx_ctx.push_header = push_hdr;
2722 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2723 	ena_tx_ctx.req_id = req_id;
2724 	ena_tx_ctx.header_len = header_len;
2725 
2726 	/* Set flags and meta data */
2727 	ena_tx_csum(&ena_tx_ctx, *mbuf);
2728 	/* Prepare the packet's descriptors and send them to device */
2729 	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
2730 	if (rc != 0) {
2731 		ena_trace(ENA_WARNING, "failed to prepare tx bufs\n");
2732 		counter_enter();
2733 		counter_u64_add_protected(tx_ring->tx_stats.queue_stop, 1);
2734 		counter_u64_add_protected(tx_ring->tx_stats.prepare_ctx_err, 1);
2735 		counter_exit();
2736 		goto dma_error;
2737 	}
2738 
2739 	counter_enter();
2740 	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
2741 	counter_u64_add_protected(tx_ring->tx_stats.bytes,  (*mbuf)->m_pkthdr.len);
2742 	counter_exit();
2743 
2744 	tx_info->tx_descs = nb_hw_desc;
2745 	getbinuptime(&tx_info->timestamp);
2746 	tx_info->print_once = true;
2747 
2748 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2749 	    tx_ring->ring_size);
2750 
2751 	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map, BUS_DMASYNC_PREWRITE);
2752 
2753 	return (0);
2754 
2755 dma_error:
2756 	tx_info->mbuf = NULL;
2757 	bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
2758 
2759 	return (rc);
2760 }
2761 
2762 static void
2763 ena_start_xmit(struct ena_ring *tx_ring)
2764 {
2765 	struct mbuf *mbuf;
2766 	struct ena_adapter *adapter = tx_ring->adapter;
2767 	struct ena_com_io_sq* io_sq;
2768 	int ena_qid;
2769 	int acum_pkts = 0;
2770 	int ret = 0;
2771 
2772 	if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2773 		return;
2774 
2775 	if (!adapter->link_status)
2776 		return;
2777 
2778 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2779 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
2780 
2781 	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
2782 		ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
2783 		    " header csum flags %#jx",
2784 		    mbuf, mbuf->m_flags, mbuf->m_pkthdr.csum_flags);
2785 
2786 		if (ena_com_sq_empty_space(io_sq) < ENA_TX_CLEANUP_TRESHOLD)
2787 			ena_tx_cleanup(tx_ring);
2788 
2789 		if ((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0) {
2790 			if (ret == ENA_COM_NO_MEM) {
2791 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
2792 			} else if (ret == ENA_COM_NO_SPACE) {
2793 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
2794 			} else {
2795 				m_freem(mbuf);
2796 				drbr_advance(adapter->ifp, tx_ring->br);
2797 			}
2798 
2799 			break;
2800 		}
2801 
2802 		if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2803 			return;
2804 
2805 		drbr_advance(adapter->ifp, tx_ring->br);
2806 		acum_pkts++;
2807 
2808 		BPF_MTAP(adapter->ifp, mbuf);
2809 
2810 		if (acum_pkts == DB_THRESHOLD) {
2811 			acum_pkts = 0;
2812 			wmb();
2813 			/* Trigger the dma engine */
2814 			ena_com_write_sq_doorbell(io_sq);
2815 			counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2816 		}
2817 
2818 	}
2819 
2820 	if (acum_pkts) {
2821 		wmb();
2822 		/* Trigger the dma engine */
2823 		ena_com_write_sq_doorbell(io_sq);
2824 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2825 	}
2826 
2827 	if (ena_com_sq_empty_space(io_sq) < ENA_TX_CLEANUP_TRESHOLD)
2828 		ena_tx_cleanup(tx_ring);
2829 }
2830 
2831 static void
2832 ena_deferred_mq_start(void *arg, int pending)
2833 {
2834 	struct ena_ring *tx_ring = (struct ena_ring *)arg;
2835 	struct ifnet *ifp = tx_ring->adapter->ifp;
2836 
2837 	while (drbr_empty(ifp, tx_ring->br) == FALSE &&
2838 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
2839 		ENA_RING_MTX_LOCK(tx_ring);
2840 		ena_start_xmit(tx_ring);
2841 		ENA_RING_MTX_UNLOCK(tx_ring);
2842 	}
2843 }
2844 
2845 static int
2846 ena_mq_start(if_t ifp, struct mbuf *m)
2847 {
2848 	struct ena_adapter *adapter = ifp->if_softc;
2849 	struct ena_ring *tx_ring;
2850 	int ret, is_drbr_empty;
2851 	uint32_t i;
2852 
2853 	if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2854 		return (ENODEV);
2855 
2856 	/* Which queue to use */
2857 	/*
2858 	 * If everything is setup correctly, it should be the
2859 	 * same bucket that the current CPU we're on is.
2860 	 * It should improve performance.
2861 	 */
2862 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
2863 #ifdef	RSS
2864 		if (rss_hash2bucket(m->m_pkthdr.flowid,
2865 		    M_HASHTYPE_GET(m), &i) == 0) {
2866 			i = i % adapter->num_queues;
2867 
2868 		} else
2869 #endif
2870 		{
2871 			i = m->m_pkthdr.flowid % adapter->num_queues;
2872 		}
2873 	} else {
2874 		i = curcpu % adapter->num_queues;
2875 	}
2876 	tx_ring = &adapter->tx_ring[i];
2877 
2878 	/* Check if drbr is empty before putting packet */
2879 	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
2880 	ret = drbr_enqueue(ifp, tx_ring->br, m);
2881 	if (ret) {
2882 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
2883 		return (ret);
2884 	}
2885 
2886 	if (is_drbr_empty && ENA_RING_MTX_TRYLOCK(tx_ring)) {
2887 		ena_start_xmit(tx_ring);
2888 		ENA_RING_MTX_UNLOCK(tx_ring);
2889 	} else {
2890 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
2891 	}
2892 
2893 	return (0);
2894 }
2895 
2896 static void
2897 ena_qflush(if_t ifp)
2898 {
2899 	struct ena_adapter *adapter = ifp->if_softc;
2900 	struct ena_ring *tx_ring = adapter->tx_ring;
2901 	int i;
2902 
2903 	for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
2904 		if (drbr_empty(ifp, tx_ring->br) == FALSE) {
2905 			ENA_RING_MTX_LOCK(tx_ring);
2906 			drbr_flush(ifp, tx_ring->br);
2907 			ENA_RING_MTX_UNLOCK(tx_ring);
2908 		}
2909 
2910 	if_qflush(ifp);
2911 
2912 	return;
2913 }
2914 
2915 static int ena_calc_io_queue_num(struct ena_adapter *adapter,
2916     struct ena_com_dev_get_features_ctx *get_feat_ctx)
2917 {
2918 	int io_sq_num, io_cq_num, io_queue_num;
2919 
2920 	io_sq_num = get_feat_ctx->max_queues.max_sq_num;
2921 	io_cq_num = get_feat_ctx->max_queues.max_sq_num;
2922 
2923 	io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
2924 	io_queue_num = min_t(int, io_queue_num, io_sq_num);
2925 	io_queue_num = min_t(int, io_queue_num, io_cq_num);
2926 	/* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
2927 	io_queue_num = min_t(int, io_queue_num,
2928 	    pci_msix_count(adapter->pdev) - 1);
2929 #ifdef	RSS
2930 	io_queue_num = min_t(int, io_queue_num, rss_getnumbuckets());
2931 #endif
2932 
2933 	return io_queue_num;
2934 }
2935 
2936 static int ena_calc_queue_size(struct ena_adapter *adapter,
2937     uint16_t *max_tx_sgl_size,  uint16_t *max_rx_sgl_size,
2938     struct ena_com_dev_get_features_ctx *feat)
2939 {
2940 	uint32_t queue_size = ENA_DEFAULT_RING_SIZE;
2941 	uint32_t v;
2942 	uint32_t q;
2943 
2944 	queue_size = min_t(uint32_t, queue_size,
2945 	    feat->max_queues.max_cq_depth);
2946 	queue_size = min_t(uint32_t, queue_size,
2947 	    feat->max_queues.max_sq_depth);
2948 
2949 	/* round down to the nearest power of 2 */
2950 	v = queue_size;
2951 	while (v != 0) {
2952 		if (powerof2(queue_size))
2953 			break;
2954 		v /= 2;
2955 		q = rounddown2(queue_size, v);
2956 		if (q != 0) {
2957 			queue_size = q;
2958 			break;
2959 		}
2960 	}
2961 
2962 	if (unlikely(!queue_size)) {
2963 		device_printf(adapter->pdev, "Invalid queue size\n");
2964 		return ENA_COM_FAULT;
2965 	}
2966 
2967 	*max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2968 	    feat->max_queues.max_packet_tx_descs);
2969 	*max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
2970 	    feat->max_queues.max_packet_rx_descs);
2971 
2972 	return queue_size;
2973 }
2974 
2975 static int ena_rss_init_default(struct ena_adapter *adapter)
2976 {
2977 	struct ena_com_dev *ena_dev = adapter->ena_dev;
2978 	device_t dev = adapter->pdev;
2979 	int qid, rc, i;
2980 
2981 	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
2982 	if (unlikely(rc)) {
2983 		device_printf(dev, "Cannot init RSS\n");
2984 		goto err_rss_init;
2985 	}
2986 
2987 	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
2988 #ifdef	RSS
2989 		qid = rss_get_indirection_to_bucket(i);
2990 		qid = qid % adapter->num_queues;
2991 #else
2992 		qid = i % adapter->num_queues;
2993 #endif
2994 		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
2995 						       ENA_IO_RXQ_IDX(qid));
2996 		if (unlikely(rc && (rc != EPERM))) {
2997 			device_printf(dev, "Cannot fill indirect table\n");
2998 			goto err_fill_indir;
2999 		}
3000 	}
3001 
3002 	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3003 					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3004 	if (unlikely(rc && (rc != EPERM))) {
3005 		device_printf(dev, "Cannot fill hash function\n");
3006 		goto err_fill_indir;
3007 	}
3008 
3009 	rc = ena_com_set_default_hash_ctrl(ena_dev);
3010 	if (unlikely(rc && (rc != EPERM))) {
3011 		device_printf(dev, "Cannot fill hash control\n");
3012 		goto err_fill_indir;
3013 	}
3014 
3015 	return (0);
3016 
3017 err_fill_indir:
3018 	ena_com_rss_destroy(ena_dev);
3019 err_rss_init:
3020 	return (rc);
3021 }
3022 
3023 static void
3024 ena_rss_init_default_deferred(void *arg)
3025 {
3026 	struct ena_adapter *adapter;
3027 	devclass_t dc;
3028 	int max;
3029 	int rc;
3030 
3031 	dc = devclass_find("ena");
3032 	if (dc == NULL) {
3033 		ena_trace(ENA_DBG, "No devclass ena\n");
3034 		return;
3035 	}
3036 
3037 	max = devclass_get_maxunit(dc);
3038 	while (max-- >= 0) {
3039 		adapter = devclass_get_softc(dc, max);
3040 		if (adapter != NULL) {
3041 			rc = ena_rss_init_default(adapter);
3042 			adapter->rss_support = true;
3043 			if (rc) {
3044 				device_printf(adapter->pdev,
3045 				    "WARNING: RSS was not properly initialized,"
3046 				    " it will affect bandwith\n");
3047 				adapter->rss_support = false;
3048 			}
3049 		}
3050 	}
3051 }
3052 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
3053 
3054 static void ena_config_host_info(struct ena_com_dev *ena_dev)
3055 {
3056 	struct ena_admin_host_info *host_info;
3057 	int rc;
3058 
3059 	/* Allocate only the host info */
3060 	rc = ena_com_allocate_host_info(ena_dev);
3061 	if (rc) {
3062 		ena_trace(ENA_ALERT, "Cannot allocate host info\n");
3063 		return;
3064 	}
3065 
3066 	host_info = ena_dev->host_attr.host_info;
3067 
3068 	host_info->os_type = ENA_ADMIN_OS_FREEBSD;
3069 	host_info->kernel_ver = osreldate;
3070 
3071 	sprintf(host_info->kernel_ver_str, "%d", osreldate);
3072 	host_info->os_dist = 0;
3073 	strncpy(host_info->os_dist_str, osrelease,
3074 	    sizeof(host_info->os_dist_str) - 1);
3075 
3076 	host_info->driver_version =
3077 		(DRV_MODULE_VER_MAJOR) |
3078 		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3079 		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3080 
3081 	rc = ena_com_set_host_attributes(ena_dev);
3082 	if (rc) {
3083 		if (rc == EPERM)
3084 			ena_trace(ENA_WARNING, "Cannot set host attributes\n");
3085 		else
3086 			ena_trace(ENA_ALERT, "Cannot set host attributes\n");
3087 
3088 		goto err;
3089 	}
3090 
3091 	return;
3092 
3093 err:
3094 	ena_com_delete_host_info(ena_dev);
3095 }
3096 
3097 static int
3098 ena_device_init(struct ena_adapter *adapter, device_t pdev,
3099 	struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
3100 {
3101 	struct ena_com_dev* ena_dev = adapter->ena_dev;
3102 	bool readless_supported;
3103 	uint32_t aenq_groups;
3104 	int dma_width;
3105 	int rc;
3106 
3107 	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3108 	if (rc) {
3109 		device_printf(pdev, "failed to init mmio read less\n");
3110 		return rc;
3111 	}
3112 
3113 	/*
3114 	 * The PCIe configuration space revision id indicate if mmio reg
3115 	 * read is disabled
3116 	 */
3117 	readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ);
3118 	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3119 
3120 	rc = ena_com_dev_reset(ena_dev);
3121 	if (rc) {
3122 		device_printf(pdev, "Can not reset device\n");
3123 		goto err_mmio_read_less;
3124 	}
3125 
3126 	rc = ena_com_validate_version(ena_dev);
3127 	if (rc) {
3128 		device_printf(pdev, "device version is too low\n");
3129 		goto err_mmio_read_less;
3130 	}
3131 
3132 	dma_width = ena_com_get_dma_width(ena_dev);
3133 	if (dma_width < 0) {
3134 		device_printf(pdev, "Invalid dma width value %d", dma_width);
3135 		rc = dma_width;
3136 		goto err_mmio_read_less;
3137 	}
3138 	adapter->dma_width = dma_width;
3139 
3140 	/* ENA admin level init */
3141 	rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
3142 	if (rc) {
3143 		device_printf(pdev,
3144 		    "Can not initialize ena admin queue with device\n");
3145 		goto err_mmio_read_less;
3146 	}
3147 
3148 	/*
3149 	 * To enable the msix interrupts the driver needs to know the number
3150 	 * of queues. So the driver uses polling mode to retrieve this
3151 	 * information
3152 	 */
3153 	ena_com_set_admin_polling_mode(ena_dev, true);
3154 
3155 	ena_config_host_info(ena_dev);
3156 
3157 	/* Get Device Attributes */
3158 	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3159 	if (rc) {
3160 		device_printf(pdev,
3161 		    "Cannot get attribute for ena device rc: %d\n", rc);
3162 		goto err_admin_init;
3163 	}
3164 
3165 	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3166 	    BIT(ENA_ADMIN_FATAL_ERROR) |
3167 	    BIT(ENA_ADMIN_WARNING) |
3168 	    BIT(ENA_ADMIN_NOTIFICATION) |
3169 	    BIT(ENA_ADMIN_KEEP_ALIVE);
3170 
3171 	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3172 	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3173 	if (rc) {
3174 		device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
3175 		goto err_admin_init;
3176 	}
3177 
3178 	*wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3179 
3180 	return 0;
3181 
3182 err_admin_init:
3183 	ena_com_delete_host_info(ena_dev);
3184 	ena_com_admin_destroy(ena_dev);
3185 err_mmio_read_less:
3186 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3187 
3188 	return rc;
3189 }
3190 
3191 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
3192     int io_vectors)
3193 {
3194 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3195 	int rc;
3196 
3197 	rc = ena_enable_msix(adapter);
3198 	if (rc) {
3199 		device_printf(adapter->pdev, "Error with MSI-X enablement\n");
3200 		return rc;
3201 	}
3202 
3203 	ena_setup_mgmnt_intr(adapter);
3204 
3205 	rc = ena_request_mgmnt_irq(adapter);
3206 	if (rc) {
3207 		device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
3208 		goto err_disable_msix;
3209 	}
3210 
3211 	ena_com_set_admin_polling_mode(ena_dev, false);
3212 
3213 	ena_com_admin_aenq_enable(ena_dev);
3214 
3215 	return 0;
3216 
3217 err_disable_msix:
3218 	ena_disable_msix(adapter);
3219 
3220 	return rc;
3221 }
3222 
3223 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
3224 static void ena_keep_alive_wd(void *adapter_data,
3225     struct ena_admin_aenq_entry *aenq_e)
3226 {
3227 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3228 	sbintime_t stime;
3229 
3230 	stime = getsbinuptime();
3231 	atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
3232 }
3233 
3234 /* Check for keep alive expiration */
3235 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3236 {
3237 	sbintime_t timestamp, time;
3238 
3239 	if (adapter->wd_active == 0)
3240 		return;
3241 
3242 	if (adapter->keep_alive_timeout == 0)
3243 		return;
3244 
3245 	timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
3246 	time = getsbinuptime() - timestamp;
3247 	if (unlikely(time > adapter->keep_alive_timeout)) {
3248 		device_printf(adapter->pdev,
3249 		    "Keep alive watchdog timeout.\n");
3250 		counter_u64_add(adapter->dev_stats.wd_expired, 1);
3251 		adapter->trigger_reset = true;
3252 	}
3253 }
3254 
3255 /* Check if admin queue is enabled */
3256 static void check_for_admin_com_state(struct ena_adapter *adapter)
3257 {
3258 	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3259 		device_printf(adapter->pdev,
3260 		    "ENA admin queue is not in running state!\n");
3261 		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3262 		adapter->trigger_reset = true;
3263 	}
3264 }
3265 
3266 /*
3267  * Check for TX which were not completed on time.
3268  * Timeout is defined by "missing_tx_timeout".
3269  * Reset will be performed if number of incompleted
3270  * transactions exceeds "missing_tx_threshold".
3271  */
3272 static void check_for_missing_tx_completions(struct ena_adapter *adapter)
3273 {
3274 	struct ena_ring *tx_ring;
3275 	struct ena_tx_buffer *tx_info;
3276 	struct bintime curtime, time;
3277 	int i, j, budget, missed_tx;
3278 
3279 	/* Make sure the driver doesn't turn the device in other process */
3280 	rmb();
3281 
3282 	if (!adapter->up)
3283 		return;
3284 
3285 	if (adapter->trigger_reset)
3286 		return;
3287 
3288 	if (adapter->missing_tx_timeout == 0)
3289 		return;
3290 
3291 	budget = adapter->missing_tx_max_queues;
3292 	getbinuptime(&curtime);
3293 
3294 	for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
3295 		tx_ring = &adapter->tx_ring[i];
3296 
3297 		missed_tx = 0;
3298 
3299 		for (j = 0; j < tx_ring->ring_size; j++) {
3300 			tx_info = &tx_ring->tx_buffer_info[j];
3301 
3302 			if (!bintime_isset(&tx_info->timestamp))
3303 				continue;
3304 
3305 			time = curtime;
3306 			bintime_sub(&time, &tx_info->timestamp);
3307 
3308 			/* Check again if packet is still waiting */
3309 			if (bintime_isset(&tx_info->timestamp) && unlikely(
3310 			    bttosbt(time) > adapter->missing_tx_timeout)) {
3311 				if (tx_info->print_once)
3312 					device_printf(adapter->pdev,
3313 					    "Found a Tx that wasn't completed "
3314 					    "on time, qid %d, index %d.\n",
3315 					    tx_ring->qid, j);
3316 
3317 				tx_info->print_once = false;
3318 				missed_tx++;
3319 
3320 				if (unlikely(missed_tx >
3321 				    adapter->missing_tx_threshold)) {
3322 					device_printf(adapter->pdev,
3323 					    "The number of lost tx completion "
3324 					    "is above the threshold (%d > %d). "
3325 					    "Reset the device\n", missed_tx,
3326 					    adapter->missing_tx_threshold);
3327 					adapter->trigger_reset = true;
3328 					return;
3329 				}
3330 			}
3331 		}
3332 
3333 		budget--;
3334 		if (!budget) {
3335 			i++;
3336 			break;
3337 		}
3338 	}
3339 
3340 	adapter->next_monitored_tx_qid = i % adapter->num_queues;
3341 }
3342 
3343 
3344 static void
3345 ena_timer_service(void *data)
3346 {
3347 	struct ena_adapter *adapter = (struct ena_adapter *)data;
3348 	struct ena_admin_host_info *host_info =
3349 	    adapter->ena_dev->host_attr.host_info;
3350 
3351 	check_for_missing_keep_alive(adapter);
3352 
3353 	check_for_admin_com_state(adapter);
3354 
3355 	check_for_missing_tx_completions(adapter);
3356 
3357 	if (host_info)
3358 		ena_update_host_info(host_info, adapter->ifp);
3359 
3360 	if (unlikely(adapter->trigger_reset)) {
3361 		device_printf(adapter->pdev, "Trigger reset is on\n");
3362 		taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
3363 		return;
3364 	}
3365 
3366 	/*
3367 	 * Schedule another timeout one second from now.
3368 	 */
3369 	callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
3370 }
3371 
3372 static void
3373 ena_reset_task(void *arg, int pending)
3374 {
3375 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3376 	struct ena_adapter *adapter = (struct ena_adapter *)arg;
3377 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3378 	bool dev_up;
3379 	int rc;
3380 
3381 	if (unlikely(!adapter->trigger_reset)) {
3382 		device_printf(adapter->pdev,
3383 		    "device reset scheduled but trigger_reset is off\n");
3384 		return;
3385 	}
3386 
3387 	sx_xlock(&adapter->ioctl_sx);
3388 
3389 	callout_drain(&adapter->timer_service);
3390 
3391 	dev_up = adapter->up;
3392 
3393 	ena_com_set_admin_running_state(ena_dev, false);
3394 	ena_free_mgmnt_irq(adapter);
3395 	ena_down(adapter);
3396 	ena_com_dev_reset(ena_dev);
3397 	ena_disable_msix(adapter);
3398 	ena_com_abort_admin_commands(ena_dev);
3399 	ena_com_wait_for_abort_completion(ena_dev);
3400 	ena_com_admin_destroy(ena_dev);
3401 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3402 
3403 	adapter->trigger_reset = false;
3404 
3405 	/* Finished destroy part. Restart the device */
3406 	rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx,
3407 	    &adapter->wd_active);
3408 	if (rc) {
3409 		device_printf(adapter->pdev,
3410 		    "ENA device init failed! (err: %d)\n", rc);
3411 		goto err_dev_free;
3412 	}
3413 
3414 	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
3415 	    adapter->num_queues);
3416 	if (rc) {
3417 		device_printf(adapter->pdev, "Enable MSI-X failed\n");
3418 		goto err_com_free;
3419 	}
3420 
3421 	/* If the interface was up before the reset bring it up */
3422 	if (dev_up) {
3423 		rc = ena_up(adapter);
3424 		if (rc) {
3425 			device_printf(adapter->pdev,
3426 			    "Failed to create I/O queues\n");
3427 			goto err_msix_free;
3428 		}
3429 	}
3430 
3431 	callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
3432 	    ena_timer_service, (void *)adapter, 0);
3433 
3434 	sx_unlock(&adapter->ioctl_sx);
3435 
3436 	return;
3437 
3438 err_msix_free:
3439 	ena_com_dev_reset(ena_dev);
3440 	ena_free_mgmnt_irq(adapter);
3441 	ena_disable_msix(adapter);
3442 err_com_free:
3443 	ena_com_admin_destroy(ena_dev);
3444 err_dev_free:
3445 	device_printf(adapter->pdev, "ENA reset failed!\n");
3446 	adapter->running = false;
3447 	sx_unlock(&adapter->ioctl_sx);
3448 }
3449 
3450 /**
3451  * ena_attach - Device Initialization Routine
3452  * @pdev: device information struct
3453  *
3454  * Returns 0 on success, otherwise on failure.
3455  *
3456  * ena_attach initializes an adapter identified by a device structure.
3457  * The OS initialization, configuring of the adapter private structure,
3458  * and a hardware reset occur.
3459  **/
3460 static int
3461 ena_attach(device_t pdev)
3462 {
3463 	struct ena_com_dev_get_features_ctx get_feat_ctx;
3464 	static int version_printed;
3465 	struct ena_adapter *adapter;
3466 	struct ena_com_dev *ena_dev = NULL;
3467 	uint16_t tx_sgl_size = 0;
3468 	uint16_t rx_sgl_size = 0;
3469 	int io_queue_num;
3470 	int queue_size;
3471 	int rc;
3472 	struct sysctl_ctx_list *ctx;
3473 	struct sysctl_oid_list *children;
3474 
3475 	adapter = device_get_softc(pdev);
3476 	adapter->pdev = pdev;
3477 	ctx = device_get_sysctl_ctx(pdev);
3478 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(pdev));
3479 
3480 	mtx_init(&adapter->global_mtx, "ENA global mtx", NULL, MTX_DEF);
3481 	sx_init(&adapter->ioctl_sx, "ENA ioctl sx");
3482 
3483 	/* Sysctl calls for Watchdog service */
3484 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "wd_active",
3485 	    CTLFLAG_RWTUN, &adapter->wd_active, 0,
3486 	    "Watchdog is active");
3487 
3488 	SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "keep_alive_timeout",
3489 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
3490 	    "Timeout for Keep Alive messages");
3491 
3492 	SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "missing_tx_timeout",
3493 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
3494 	    "Timeout for TX completion");
3495 
3496 	SYSCTL_ADD_U32(ctx, children, OID_AUTO, "missing_tx_max_queues",
3497 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
3498 	    "Number of TX queues to check per run");
3499 
3500 	SYSCTL_ADD_U32(ctx, children, OID_AUTO, "missing_tx_threshold",
3501 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
3502 	    "Max number of timeouted packets");
3503 
3504 	/* Set up the timer service */
3505 	callout_init_mtx(&adapter->timer_service, &adapter->global_mtx, 0);
3506 	adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3507 	adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3508 	adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3509 	adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3510 
3511 	if (version_printed++ == 0)
3512 		device_printf(pdev, "%s\n", ena_version);
3513 
3514 	rc = ena_allocate_pci_resources(adapter);
3515 	if (rc) {
3516 		device_printf(pdev, "PCI resource allocation failed!\n");
3517 		ena_free_pci_resources(adapter);
3518 		goto err_pci_res;
3519 	}
3520 
3521 	/* Allocate memory for ena_dev structure */
3522 	ena_dev = ENA_MEM_ALLOC(pdev, sizeof(struct ena_com_dev));
3523 	if (!ena_dev) {
3524 		device_printf(pdev, "allocating ena_dev failed\n");
3525 		rc = ENOMEM;
3526 		goto err_select_region;
3527 	}
3528 
3529 	adapter->ena_dev = ena_dev;
3530 	ena_dev->dmadev = pdev;
3531 	ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3532 	    M_WAITOK | M_ZERO);
3533 
3534 	/* Store register resources */
3535 	((struct ena_bus*)(ena_dev->bus))->reg_bar_t =
3536 	    rman_get_bustag(adapter->registers);
3537 	((struct ena_bus*)(ena_dev->bus))->reg_bar_h =
3538 	    rman_get_bushandle(adapter->registers);
3539 
3540 	if (((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0) {
3541 		device_printf(pdev, "failed to pmap registers bar\n");
3542 		rc = ENXIO;
3543 		goto err_dev_free;
3544 	}
3545 
3546 	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3547 
3548 	/* Device initialization */
3549 	rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
3550 	if (rc) {
3551 		device_printf(pdev, "ENA device init failed! (err: %d)\n", rc);
3552 		rc = ENXIO;
3553 		goto err_bus_free;
3554 	}
3555 
3556 	adapter->keep_alive_timestamp = getsbinuptime();
3557 
3558 	adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3559 
3560 	/* Set for sure that interface is not up */
3561 	adapter->up = false;
3562 
3563 	memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3564 	    ETHER_ADDR_LEN);
3565 
3566 	adapter->small_copy_len =
3567 	    ENA_DEFAULT_SMALL_PACKET_LEN;
3568 
3569 	/* calculate IO queue number to create */
3570 	io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx);
3571 
3572 	ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
3573 	    io_queue_num);
3574 	adapter->num_queues = io_queue_num;
3575 
3576 	/* calculatre ring sizes */
3577 	queue_size = ena_calc_queue_size(adapter,&tx_sgl_size,
3578 	    &rx_sgl_size, &get_feat_ctx);
3579 	if ((queue_size <= 0) || (io_queue_num <= 0)) {
3580 		rc = ENA_COM_FAULT;
3581 		goto err_com_free;
3582 	}
3583 
3584 	adapter->tx_ring_size = queue_size;
3585 	adapter->rx_ring_size = queue_size;
3586 
3587 	adapter->max_tx_sgl_size = tx_sgl_size;
3588 	adapter->max_rx_sgl_size = rx_sgl_size;
3589 
3590 	/* set up dma tags for rx and tx buffers */
3591 	rc = ena_setup_tx_dma_tag(adapter);
3592 	if (rc)
3593 		goto dma_tx_err;
3594 
3595 	rc = ena_setup_rx_dma_tag(adapter);
3596 	if (rc)
3597 		goto dma_rx_err;
3598 
3599 	/* initialize rings basic information */
3600 	device_printf(pdev, "initalize %d io queues\n", io_queue_num);
3601 	rc = ena_init_io_rings(adapter);
3602 	if (rc) {
3603 		device_printf(pdev,"Error with initialization of IO rings\n");
3604 		goto err_io_init;
3605 	}
3606 
3607 	/* setup network interface */
3608 	rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
3609 	if (rc) {
3610 		device_printf(pdev,"Error with network interface setup\n");
3611 		goto err_com_free;
3612 	}
3613 
3614 	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3615 	if (rc) {
3616 		device_printf(pdev,
3617 		    "Failed to enable and set the admin interrupts\n");
3618 		goto err_ifp_free;
3619 	}
3620 
3621 	/* Initialize reset task queue */
3622 	TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter);
3623 	adapter->reset_tq = taskqueue_create("ena_reset_enqueue",
3624 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq);
3625 	if (adapter->reset_tq == NULL) {
3626 		device_printf(adapter->pdev,
3627 		    "Unable to create reset task queue\n");
3628 		goto err_reset_tq;
3629 	}
3630 	taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET,
3631 	    "%s rstq", device_get_nameunit(adapter->pdev));
3632 
3633 	/* Initialize task queue responsible for updating hw stats */
3634 	TASK_INIT(&adapter->stats_task, 0, ena_update_hw_stats, adapter);
3635 	adapter->stats_tq = taskqueue_create_fast("ena_stats_update",
3636 	    M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->stats_tq);
3637 	if (adapter->stats_tq == NULL) {
3638 		device_printf(adapter->pdev,
3639 		    "Unable to create taskqueue for updating hw stats\n");
3640 		goto err_stats_tq;
3641 	}
3642 	taskqueue_start_threads(&adapter->stats_tq, 1, PI_REALTIME,
3643 	    "%s stats tq", device_get_nameunit(adapter->pdev));
3644 
3645 	/* Initialize statistics */
3646 	ena_alloc_counters((counter_u64_t *)&adapter->dev_stats,
3647 	    sizeof(struct ena_stats_dev));
3648 	ena_update_stats_counters(adapter);
3649 	ena_sysctl_add_nodes(adapter);
3650 
3651 	/* Tell the stack that the interface is not active */
3652 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
3653 
3654 	adapter->running = true;
3655 	return (0);
3656 
3657 err_stats_tq:
3658 	taskqueue_free(adapter->reset_tq);
3659 err_reset_tq:
3660 	ena_free_mgmnt_irq(adapter);
3661 	ena_disable_msix(adapter);
3662 err_ifp_free:
3663 	if_detach(adapter->ifp);
3664 	if_free(adapter->ifp);
3665 err_com_free:
3666 	ena_free_all_io_rings_resources(adapter);
3667 err_io_init:
3668 	ena_free_rx_dma_tag(adapter);
3669 dma_rx_err:
3670 	ena_free_tx_dma_tag(adapter);
3671 dma_tx_err:
3672 	ena_com_admin_destroy(ena_dev);
3673 	ena_com_delete_host_info(ena_dev);
3674 err_bus_free:
3675 	free(ena_dev->bus, M_DEVBUF);
3676 err_dev_free:
3677 	free(ena_dev, M_DEVBUF);
3678 err_select_region:
3679 	ena_free_pci_resources(adapter);
3680 err_pci_res:
3681 	return (rc);
3682 }
3683 
3684 /**
3685  * ena_detach - Device Removal Routine
3686  * @pdev: device information struct
3687  *
3688  * ena_detach is called by the device subsystem to alert the driver
3689  * that it should release a PCI device.
3690  **/
3691 static int
3692 ena_detach(device_t pdev)
3693 {
3694 	struct ena_adapter *adapter = device_get_softc(pdev);
3695 	struct ena_com_dev *ena_dev = adapter->ena_dev;
3696 	int rc;
3697 
3698 	/* Make sure VLANS are not using driver */
3699 	if (adapter->ifp->if_vlantrunk != NULL) {
3700 		device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3701 		return (EBUSY);
3702 	}
3703 
3704 	/* Free reset task and callout */
3705 	callout_drain(&adapter->timer_service);
3706 	while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
3707 		taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
3708 	taskqueue_free(adapter->reset_tq);
3709 
3710 	sx_xlock(&adapter->ioctl_sx);
3711 	ena_down(adapter);
3712 	sx_unlock(&adapter->ioctl_sx);
3713 
3714 	taskqueue_free(adapter->stats_tq);
3715 
3716 	if (adapter->ifp != NULL) {
3717 		ether_ifdetach(adapter->ifp);
3718 		if_free(adapter->ifp);
3719 	}
3720 
3721 	ena_free_all_io_rings_resources(adapter);
3722 
3723 	ena_free_counters((counter_u64_t *)&adapter->dev_stats,
3724 	    sizeof(struct ena_stats_dev));
3725 
3726 	if (adapter->rss_support)
3727 		ena_com_rss_destroy(ena_dev);
3728 
3729 	rc = ena_free_rx_dma_tag(adapter);
3730 	if (rc)
3731 		device_printf(adapter->pdev,
3732 		    "Unmapped RX DMA tag associations\n");
3733 
3734 	rc = ena_free_tx_dma_tag(adapter);
3735 	if (rc)
3736 		device_printf(adapter->pdev,
3737 		    "Unmapped TX DMA tag associations\n");
3738 
3739 	/* Reset the device only if the device is running. */
3740 	if (adapter->running)
3741 		ena_com_dev_reset(ena_dev);
3742 
3743 	ena_com_delete_host_info(ena_dev);
3744 
3745 	ena_com_admin_destroy(ena_dev);
3746 
3747 	ena_free_irqs(adapter);
3748 
3749 	ena_com_mmio_reg_read_request_destroy(ena_dev);
3750 
3751 	ena_free_pci_resources(adapter);
3752 
3753 	mtx_destroy(&adapter->global_mtx);
3754 	sx_destroy(&adapter->ioctl_sx);
3755 
3756 	if (ena_dev->bus != NULL)
3757 		free(ena_dev->bus, M_DEVBUF);
3758 
3759 	if (ena_dev != NULL)
3760 		free(ena_dev, M_DEVBUF);
3761 
3762 	return (bus_generic_detach(pdev));
3763 }
3764 
3765 /******************************************************************************
3766  ******************************** AENQ Handlers *******************************
3767  *****************************************************************************/
3768 /**
3769  * ena_update_on_link_change:
3770  * Notify the network interface about the change in link status
3771  **/
3772 static void
3773 ena_update_on_link_change(void *adapter_data,
3774     struct ena_admin_aenq_entry *aenq_e)
3775 {
3776 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3777 	struct ena_admin_aenq_link_change_desc *aenq_desc;
3778 	int status;
3779 	if_t ifp;
3780 
3781 	aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3782 	ifp = adapter->ifp;
3783 	status = aenq_desc->flags &
3784 	    ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3785 
3786 	if (status != 0) {
3787 		device_printf(adapter->pdev, "link is UP\n");
3788 		if_link_state_change(ifp, LINK_STATE_UP);
3789 	} else if (status == 0) {
3790 		device_printf(adapter->pdev, "link is DOWN\n");
3791 		if_link_state_change(ifp, LINK_STATE_DOWN);
3792 	} else {
3793 		device_printf(adapter->pdev, "invalid value recvd\n");
3794 		BUG();
3795 	}
3796 
3797 	adapter->link_status = status;
3798 
3799 	return;
3800 }
3801 
3802 /**
3803  * This handler will called for unknown event group or unimplemented handlers
3804  **/
3805 static void
3806 unimplemented_aenq_handler(void *data,
3807     struct ena_admin_aenq_entry *aenq_e)
3808 {
3809 	return;
3810 }
3811 
3812 static struct ena_aenq_handlers aenq_handlers = {
3813     .handlers = {
3814 	    [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3815 	    [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3816     },
3817     .unimplemented_handler = unimplemented_aenq_handler
3818 };
3819 
3820 /*********************************************************************
3821  *  FreeBSD Device Interface Entry Points
3822  *********************************************************************/
3823 
3824 static device_method_t ena_methods[] = {
3825     /* Device interface */
3826     DEVMETHOD(device_probe, ena_probe),
3827     DEVMETHOD(device_attach, ena_attach),
3828     DEVMETHOD(device_detach, ena_detach),
3829     DEVMETHOD_END
3830 };
3831 
3832 static driver_t ena_driver = {
3833     "ena", ena_methods, sizeof(struct ena_adapter),
3834 };
3835 
3836 devclass_t ena_devclass;
3837 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
3838 MODULE_DEPEND(ena, pci, 1, 1, 1);
3839 MODULE_DEPEND(ena, ether, 1, 1, 1);
3840 
3841 /*********************************************************************/
3842