xref: /freebsd/sys/dev/ena/ena_datapath.c (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_rss.h"
34 #include "ena.h"
35 #include "ena_datapath.h"
36 #ifdef DEV_NETMAP
37 #include "ena_netmap.h"
38 #endif /* DEV_NETMAP */
39 #ifdef RSS
40 #include <net/rss_config.h>
41 #endif /* RSS */
42 
43 #include <netinet6/ip6_var.h>
44 
45 /*********************************************************************
46  *  Static functions prototypes
47  *********************************************************************/
48 
49 static int	ena_tx_cleanup(struct ena_ring *);
50 static int	ena_rx_cleanup(struct ena_ring *);
51 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
52     struct ena_com_io_cq *io_cq, uint16_t *req_id);
53 static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
54     struct mbuf *);
55 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
56     struct ena_com_rx_ctx *, uint16_t *);
57 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
58     struct mbuf *);
59 static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
60 static int	ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
61     struct mbuf **mbuf);
62 static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
63 static void	ena_start_xmit(struct ena_ring *);
64 
65 /*********************************************************************
66  *  Global functions
67  *********************************************************************/
68 
69 void
70 ena_cleanup(void *arg, int pending)
71 {
72 	struct ena_que	*que = arg;
73 	struct ena_adapter *adapter = que->adapter;
74 	if_t ifp = adapter->ifp;
75 	struct ena_ring *tx_ring;
76 	struct ena_ring *rx_ring;
77 	struct ena_com_io_cq* io_cq;
78 	struct ena_eth_io_intr_reg intr_reg;
79 	int qid, ena_qid;
80 	int txc, rxc, i;
81 
82 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
83 		return;
84 
85 	ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
86 
87 	tx_ring = que->tx_ring;
88 	rx_ring = que->rx_ring;
89 	qid = que->id;
90 	ena_qid = ENA_IO_TXQ_IDX(qid);
91 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
92 
93 	tx_ring->first_interrupt = true;
94 	rx_ring->first_interrupt = true;
95 
96 	for (i = 0; i < CLEAN_BUDGET; ++i) {
97 		rxc = ena_rx_cleanup(rx_ring);
98 		txc = ena_tx_cleanup(tx_ring);
99 
100 		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
101 			return;
102 
103 		if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
104 		       break;
105 	}
106 
107 	/* Signal that work is done and unmask interrupt */
108 	ena_com_update_intr_reg(&intr_reg,
109 	    RX_IRQ_INTERVAL,
110 	    TX_IRQ_INTERVAL,
111 	    true);
112 	counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
113 	ena_com_unmask_intr(io_cq, &intr_reg);
114 }
115 
116 void
117 ena_deferred_mq_start(void *arg, int pending)
118 {
119 	struct ena_ring *tx_ring = (struct ena_ring *)arg;
120 	struct ifnet *ifp = tx_ring->adapter->ifp;
121 
122 	while (!drbr_empty(ifp, tx_ring->br) &&
123 	    tx_ring->running &&
124 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
125 		ENA_RING_MTX_LOCK(tx_ring);
126 		ena_start_xmit(tx_ring);
127 		ENA_RING_MTX_UNLOCK(tx_ring);
128 	}
129 }
130 
131 int
132 ena_mq_start(if_t ifp, struct mbuf *m)
133 {
134 	struct ena_adapter *adapter = ifp->if_softc;
135 	struct ena_ring *tx_ring;
136 	int ret, is_drbr_empty;
137 	uint32_t i;
138 #ifdef RSS
139 	uint32_t bucket_id;
140 #endif
141 
142 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
143 		return (ENODEV);
144 
145 	/* Which queue to use */
146 	/*
147 	 * If everything is setup correctly, it should be the
148 	 * same bucket that the current CPU we're on is.
149 	 * It should improve performance.
150 	 */
151 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
152 #ifdef RSS
153 		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
154 		    &bucket_id) == 0)
155 			i = bucket_id % adapter->num_io_queues;
156 		else
157 #endif
158 			i = m->m_pkthdr.flowid % adapter->num_io_queues;
159 	} else {
160 		i = curcpu % adapter->num_io_queues;
161 	}
162 	tx_ring = &adapter->tx_ring[i];
163 
164 	/* Check if drbr is empty before putting packet */
165 	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
166 	ret = drbr_enqueue(ifp, tx_ring->br, m);
167 	if (unlikely(ret != 0)) {
168 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
169 		return (ret);
170 	}
171 
172 	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
173 		ena_start_xmit(tx_ring);
174 		ENA_RING_MTX_UNLOCK(tx_ring);
175 	} else {
176 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
177 	}
178 
179 	return (0);
180 }
181 
182 void
183 ena_qflush(if_t ifp)
184 {
185 	struct ena_adapter *adapter = ifp->if_softc;
186 	struct ena_ring *tx_ring = adapter->tx_ring;
187 	int i;
188 
189 	for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
190 		if (!drbr_empty(ifp, tx_ring->br)) {
191 			ENA_RING_MTX_LOCK(tx_ring);
192 			drbr_flush(ifp, tx_ring->br);
193 			ENA_RING_MTX_UNLOCK(tx_ring);
194 		}
195 
196 	if_qflush(ifp);
197 }
198 
199 /*********************************************************************
200  *  Static functions
201  *********************************************************************/
202 
203 static inline int
204 ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
205     uint16_t *req_id)
206 {
207 	struct ena_adapter *adapter = tx_ring->adapter;
208 	int rc;
209 
210 	rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
211 	if (rc == ENA_COM_TRY_AGAIN)
212 		return (EAGAIN);
213 
214 	if (unlikely(rc != 0)) {
215 		ena_log(adapter->pdev, ERR, "Invalid req_id: %hu\n", *req_id);
216 		counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
217 		goto err;
218 	}
219 
220 	if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL)
221 		return (0);
222 
223 	ena_log(adapter->pdev, ERR, "tx_info doesn't have valid mbuf\n");
224 err:
225 	ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
226 
227 	return (EFAULT);
228 }
229 
230 /**
231  * ena_tx_cleanup - clear sent packets and corresponding descriptors
232  * @tx_ring: ring for which we want to clean packets
233  *
234  * Once packets are sent, we ask the device in a loop for no longer used
235  * descriptors. We find the related mbuf chain in a map (index in an array)
236  * and free it, then update ring state.
237  * This is performed in "endless" loop, updating ring pointers every
238  * TX_COMMIT. The first check of free descriptor is performed before the actual
239  * loop, then repeated at the loop end.
240  **/
241 static int
242 ena_tx_cleanup(struct ena_ring *tx_ring)
243 {
244 	struct ena_adapter *adapter;
245 	struct ena_com_io_cq* io_cq;
246 	uint16_t next_to_clean;
247 	uint16_t req_id;
248 	uint16_t ena_qid;
249 	unsigned int total_done = 0;
250 	int rc;
251 	int commit = TX_COMMIT;
252 	int budget = TX_BUDGET;
253 	int work_done;
254 	bool above_thresh;
255 
256 	adapter = tx_ring->que->adapter;
257 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
258 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
259 	next_to_clean = tx_ring->next_to_clean;
260 
261 #ifdef DEV_NETMAP
262 	if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
263 		return (0);
264 #endif /* DEV_NETMAP */
265 
266 	do {
267 		struct ena_tx_buffer *tx_info;
268 		struct mbuf *mbuf;
269 
270 		rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id);
271 		if (unlikely(rc != 0))
272 			break;
273 
274 		tx_info = &tx_ring->tx_buffer_info[req_id];
275 
276 		mbuf = tx_info->mbuf;
277 
278 		tx_info->mbuf = NULL;
279 		bintime_clear(&tx_info->timestamp);
280 
281 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
282 		    BUS_DMASYNC_POSTWRITE);
283 		bus_dmamap_unload(adapter->tx_buf_tag,
284 		    tx_info->dmamap);
285 
286 		ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n",
287 		    tx_ring->qid, mbuf);
288 
289 		m_freem(mbuf);
290 
291 		total_done += tx_info->tx_descs;
292 
293 		tx_ring->free_tx_ids[next_to_clean] = req_id;
294 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
295 		    tx_ring->ring_size);
296 
297 		if (unlikely(--commit == 0)) {
298 			commit = TX_COMMIT;
299 			/* update ring state every TX_COMMIT descriptor */
300 			tx_ring->next_to_clean = next_to_clean;
301 			ena_com_comp_ack(
302 			    &adapter->ena_dev->io_sq_queues[ena_qid],
303 			    total_done);
304 			ena_com_update_dev_comp_head(io_cq);
305 			total_done = 0;
306 		}
307 	} while (likely(--budget));
308 
309 	work_done = TX_BUDGET - budget;
310 
311 	ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n",
312 	    tx_ring->qid, work_done);
313 
314 	/* If there is still something to commit update ring state */
315 	if (likely(commit != TX_COMMIT)) {
316 		tx_ring->next_to_clean = next_to_clean;
317 		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
318 		    total_done);
319 		ena_com_update_dev_comp_head(io_cq);
320 	}
321 
322 	/*
323 	 * Need to make the rings circular update visible to
324 	 * ena_xmit_mbuf() before checking for tx_ring->running.
325 	 */
326 	mb();
327 
328 	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
329 	    ENA_TX_RESUME_THRESH);
330 	if (unlikely(!tx_ring->running && above_thresh)) {
331 		ENA_RING_MTX_LOCK(tx_ring);
332 		above_thresh =
333 		    ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
334 		    ENA_TX_RESUME_THRESH);
335 		if (!tx_ring->running && above_thresh) {
336 			tx_ring->running = true;
337 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
338 			taskqueue_enqueue(tx_ring->enqueue_tq,
339 			    &tx_ring->enqueue_task);
340 		}
341 		ENA_RING_MTX_UNLOCK(tx_ring);
342 	}
343 
344 	return (work_done);
345 }
346 
347 static void
348 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
349     struct mbuf *mbuf)
350 {
351 	struct ena_adapter *adapter = rx_ring->adapter;
352 
353 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
354 		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
355 
356 #ifdef RSS
357 		/*
358 		 * Hardware and software RSS are in agreement only when both are
359 		 * configured to Toeplitz algorithm.  This driver configures
360 		 * that algorithm only when software RSS is enabled and uses it.
361 		 */
362 		if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
363 		    ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
364 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
365 			return;
366 		}
367 #endif
368 
369 		if (ena_rx_ctx->frag &&
370 		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
371 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
372 			return;
373 		}
374 
375 		switch (ena_rx_ctx->l3_proto) {
376 		case ENA_ETH_IO_L3_PROTO_IPV4:
377 			switch (ena_rx_ctx->l4_proto) {
378 			case ENA_ETH_IO_L4_PROTO_TCP:
379 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
380 				break;
381 			case ENA_ETH_IO_L4_PROTO_UDP:
382 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
383 				break;
384 			default:
385 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
386 			}
387 			break;
388 		case ENA_ETH_IO_L3_PROTO_IPV6:
389 			switch (ena_rx_ctx->l4_proto) {
390 			case ENA_ETH_IO_L4_PROTO_TCP:
391 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
392 				break;
393 			case ENA_ETH_IO_L4_PROTO_UDP:
394 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
395 				break;
396 			default:
397 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
398 			}
399 			break;
400 		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
401 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
402 			break;
403 		default:
404 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
405 		}
406 	} else {
407 		mbuf->m_pkthdr.flowid = rx_ring->qid;
408 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
409 	}
410 }
411 
412 /**
413  * ena_rx_mbuf - assemble mbuf from descriptors
414  * @rx_ring: ring for which we want to clean packets
415  * @ena_bufs: buffer info
416  * @ena_rx_ctx: metadata for this packet(s)
417  * @next_to_clean: ring pointer, will be updated only upon success
418  *
419  **/
420 static struct mbuf*
421 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
422     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
423 {
424 	struct mbuf *mbuf;
425 	struct ena_rx_buffer *rx_info;
426 	struct ena_adapter *adapter;
427 	device_t pdev;
428 	unsigned int descs = ena_rx_ctx->descs;
429 	uint16_t ntc, len, req_id, buf = 0;
430 
431 	ntc = *next_to_clean;
432 	adapter = rx_ring->adapter;
433 	pdev = adapter->pdev;
434 
435 	len = ena_bufs[buf].len;
436 	req_id = ena_bufs[buf].req_id;
437 	rx_info = &rx_ring->rx_buffer_info[req_id];
438 	if (unlikely(rx_info->mbuf == NULL)) {
439 		ena_log(pdev, ERR, "NULL mbuf in rx_info");
440 		return (NULL);
441 	}
442 
443 	ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info,
444 	    rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
445 
446 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
447 	    BUS_DMASYNC_POSTREAD);
448 	mbuf = rx_info->mbuf;
449 	mbuf->m_flags |= M_PKTHDR;
450 	mbuf->m_pkthdr.len = len;
451 	mbuf->m_len = len;
452 	/* Only for the first segment the data starts at specific offset */
453 	mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
454 	ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
455 	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
456 
457 	/* Fill mbuf with hash key and it's interpretation for optimization */
458 	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
459 
460 	ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf,
461 	    mbuf->m_flags, mbuf->m_pkthdr.len);
462 
463 	/* DMA address is not needed anymore, unmap it */
464 	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
465 
466 	rx_info->mbuf = NULL;
467 	rx_ring->free_rx_ids[ntc] = req_id;
468 	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
469 
470 	/*
471 	 * While we have more than 1 descriptors for one rcvd packet, append
472 	 * other mbufs to the main one
473 	 */
474 	while (--descs) {
475 		++buf;
476 		len = ena_bufs[buf].len;
477 		req_id = ena_bufs[buf].req_id;
478 		rx_info = &rx_ring->rx_buffer_info[req_id];
479 
480 		if (unlikely(rx_info->mbuf == NULL)) {
481 			ena_log(pdev, ERR, "NULL mbuf in rx_info");
482 			/*
483 			 * If one of the required mbufs was not allocated yet,
484 			 * we can break there.
485 			 * All earlier used descriptors will be reallocated
486 			 * later and not used mbufs can be reused.
487 			 * The next_to_clean pointer will not be updated in case
488 			 * of an error, so caller should advance it manually
489 			 * in error handling routine to keep it up to date
490 			 * with hw ring.
491 			 */
492 			m_freem(mbuf);
493 			return (NULL);
494 		}
495 
496 		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
497 		    BUS_DMASYNC_POSTREAD);
498 		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
499 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
500 			ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n",
501 			    mbuf);
502 		}
503 
504 		ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n",
505 		    mbuf->m_pkthdr.len);
506 
507 		/* Free already appended mbuf, it won't be useful anymore */
508 		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
509 		m_freem(rx_info->mbuf);
510 		rx_info->mbuf = NULL;
511 
512 		rx_ring->free_rx_ids[ntc] = req_id;
513 		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
514 	}
515 
516 	*next_to_clean = ntc;
517 
518 	return (mbuf);
519 }
520 
521 /**
522  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
523  **/
524 static inline void
525 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
526     struct mbuf *mbuf)
527 {
528 	device_t pdev = rx_ring->adapter->pdev;
529 
530 	/* if IP and error */
531 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
532 	    ena_rx_ctx->l3_csum_err)) {
533 		/* ipv4 checksum error */
534 		mbuf->m_pkthdr.csum_flags = 0;
535 		counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
536 		ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n");
537 		return;
538 	}
539 
540 	/* if TCP/UDP */
541 	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
542 	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
543 		if (ena_rx_ctx->l4_csum_err) {
544 			/* TCP/UDP checksum error */
545 			mbuf->m_pkthdr.csum_flags = 0;
546 			counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
547 			ena_log_io(pdev, DBG, "RX L4 checksum error\n");
548 		} else {
549 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
550 			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
551 			counter_u64_add(rx_ring->rx_stats.csum_good, 1);
552 		}
553 	}
554 }
555 
556 /**
557  * ena_rx_cleanup - handle rx irq
558  * @arg: ring for which irq is being handled
559  **/
560 static int
561 ena_rx_cleanup(struct ena_ring *rx_ring)
562 {
563 	struct ena_adapter *adapter;
564 	device_t pdev;
565 	struct mbuf *mbuf;
566 	struct ena_com_rx_ctx ena_rx_ctx;
567 	struct ena_com_io_cq* io_cq;
568 	struct ena_com_io_sq* io_sq;
569 	enum ena_regs_reset_reason_types reset_reason;
570 	if_t ifp;
571 	uint16_t ena_qid;
572 	uint16_t next_to_clean;
573 	uint32_t refill_required;
574 	uint32_t refill_threshold;
575 	uint32_t do_if_input = 0;
576 	unsigned int qid;
577 	int rc, i;
578 	int budget = RX_BUDGET;
579 #ifdef DEV_NETMAP
580 	int done;
581 #endif /* DEV_NETMAP */
582 
583 	adapter = rx_ring->que->adapter;
584 	pdev = adapter->pdev;
585 	ifp = adapter->ifp;
586 	qid = rx_ring->que->id;
587 	ena_qid = ENA_IO_RXQ_IDX(qid);
588 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
589 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
590 	next_to_clean = rx_ring->next_to_clean;
591 
592 #ifdef DEV_NETMAP
593 	if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
594 		return (0);
595 #endif /* DEV_NETMAP */
596 
597 	ena_log_io(pdev, DBG, "rx: qid %d\n", qid);
598 
599 	do {
600 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
601 		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
602 		ena_rx_ctx.descs = 0;
603 		ena_rx_ctx.pkt_offset = 0;
604 
605 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
606 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
607 		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
608 		if (unlikely(rc != 0)) {
609 			if (rc == ENA_COM_NO_SPACE) {
610 				counter_u64_add(rx_ring->rx_stats.bad_desc_num,
611 				    1);
612 				reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
613 			} else {
614 				counter_u64_add(rx_ring->rx_stats.bad_req_id,
615 				    1);
616 				reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
617 			}
618 			ena_trigger_reset(adapter, reset_reason);
619 			return (0);
620 		}
621 
622 		if (unlikely(ena_rx_ctx.descs == 0))
623 			break;
624 
625 		ena_log_io(pdev, DBG, "rx: q %d got packet from ena. "
626 		    "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
627 		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
628 		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
629 
630 		/* Receive mbuf from the ring */
631 		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
632 		    &ena_rx_ctx, &next_to_clean);
633 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
634 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
635 		/* Exit if we failed to retrieve a buffer */
636 		if (unlikely(mbuf == NULL)) {
637 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
638 				rx_ring->free_rx_ids[next_to_clean] =
639 				    rx_ring->ena_bufs[i].req_id;
640 				next_to_clean =
641 				    ENA_RX_RING_IDX_NEXT(next_to_clean,
642 				    rx_ring->ring_size);
643 
644 			}
645 			break;
646 		}
647 
648 		if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
649 		    ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
650 			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
651 		}
652 
653 		counter_enter();
654 		counter_u64_add_protected(rx_ring->rx_stats.bytes,
655 		    mbuf->m_pkthdr.len);
656 		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
657 		    mbuf->m_pkthdr.len);
658 		counter_exit();
659 		/*
660 		 * LRO is only for IP/TCP packets and TCP checksum of the packet
661 		 * should be computed by hardware.
662 		 */
663 		do_if_input = 1;
664 		if (((ifp->if_capenable & IFCAP_LRO) != 0)  &&
665 		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
666 		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
667 			/*
668 			 * Send to the stack if:
669 			 *  - LRO not enabled, or
670 			 *  - no LRO resources, or
671 			 *  - lro enqueue fails
672 			 */
673 			if ((rx_ring->lro.lro_cnt != 0) &&
674 			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
675 					do_if_input = 0;
676 		}
677 		if (do_if_input != 0) {
678 			ena_log_io(pdev, DBG, "calling if_input() with mbuf %p\n",
679 			    mbuf);
680 			(*ifp->if_input)(ifp, mbuf);
681 		}
682 
683 		counter_enter();
684 		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
685 		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
686 		counter_exit();
687 	} while (--budget);
688 
689 	rx_ring->next_to_clean = next_to_clean;
690 
691 	refill_required = ena_com_free_q_entries(io_sq);
692 	refill_threshold = min_t(int,
693 	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
694 	    ENA_RX_REFILL_THRESH_PACKET);
695 
696 	if (refill_required > refill_threshold) {
697 		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
698 		ena_refill_rx_bufs(rx_ring, refill_required);
699 	}
700 
701 	tcp_lro_flush_all(&rx_ring->lro);
702 
703 	return (RX_BUDGET - budget);
704 }
705 
706 static void
707 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
708     bool disable_meta_caching)
709 {
710 	struct ena_com_tx_meta *ena_meta;
711 	struct ether_vlan_header *eh;
712 	struct mbuf *mbuf_next;
713 	u32 mss;
714 	bool offload;
715 	uint16_t etype;
716 	int ehdrlen;
717 	struct ip *ip;
718 	int ipproto;
719 	int iphlen;
720 	struct tcphdr *th;
721 	int offset;
722 
723 	offload = false;
724 	ena_meta = &ena_tx_ctx->ena_meta;
725 	mss = mbuf->m_pkthdr.tso_segsz;
726 
727 	if (mss != 0)
728 		offload = true;
729 
730 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
731 		offload = true;
732 
733 	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
734 		offload = true;
735 
736 	if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
737 		offload = true;
738 
739 	if (!offload) {
740 		if (disable_meta_caching) {
741 			memset(ena_meta, 0, sizeof(*ena_meta));
742 			ena_tx_ctx->meta_valid = 1;
743 		} else {
744 			ena_tx_ctx->meta_valid = 0;
745 		}
746 		return;
747 	}
748 
749 	/* Determine where frame payload starts. */
750 	eh = mtod(mbuf, struct ether_vlan_header *);
751 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
752 		etype = ntohs(eh->evl_proto);
753 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
754 	} else {
755 		etype = ntohs(eh->evl_encap_proto);
756 		ehdrlen = ETHER_HDR_LEN;
757 	}
758 
759 	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
760 
761 	switch (etype) {
762 	case ETHERTYPE_IP:
763 		ip = (struct ip *)(mtodo(mbuf_next, offset));
764 		iphlen = ip->ip_hl << 2;
765 		ipproto = ip->ip_p;
766 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
767 		if ((ip->ip_off & htons(IP_DF)) != 0)
768 			ena_tx_ctx->df = 1;
769 		break;
770 	case ETHERTYPE_IPV6:
771 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
772 		iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto);
773 		iphlen -= ehdrlen;
774 		ena_tx_ctx->df = 1;
775 		break;
776 	default:
777 		iphlen = 0;
778 		ipproto = 0;
779 		break;
780 	}
781 
782 	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
783 	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
784 
785 	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
786 		ena_tx_ctx->l3_csum_enable = 1;
787 	}
788 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
789 		ena_tx_ctx->tso_enable = 1;
790 		ena_meta->l4_hdr_len = (th->th_off);
791 	}
792 
793 	if (ipproto == IPPROTO_TCP) {
794 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
795 		if ((mbuf->m_pkthdr.csum_flags &
796 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
797 			ena_tx_ctx->l4_csum_enable = 1;
798 		else
799 			ena_tx_ctx->l4_csum_enable = 0;
800 	} else if (ipproto == IPPROTO_UDP) {
801 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
802 		if ((mbuf->m_pkthdr.csum_flags &
803 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
804 			ena_tx_ctx->l4_csum_enable = 1;
805 		else
806 			ena_tx_ctx->l4_csum_enable = 0;
807 	} else {
808 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
809 		ena_tx_ctx->l4_csum_enable = 0;
810 	}
811 
812 	ena_meta->mss = mss;
813 	ena_meta->l3_hdr_len = iphlen;
814 	ena_meta->l3_hdr_offset = ehdrlen;
815 	ena_tx_ctx->meta_valid = 1;
816 }
817 
818 static int
819 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
820 {
821 	struct ena_adapter *adapter;
822 	struct mbuf *collapsed_mbuf;
823 	int num_frags;
824 
825 	adapter = tx_ring->adapter;
826 	num_frags = ena_mbuf_count(*mbuf);
827 
828 	/* One segment must be reserved for configuration descriptor. */
829 	if (num_frags < adapter->max_tx_sgl_size)
830 		return (0);
831 
832 	if ((num_frags == adapter->max_tx_sgl_size) &&
833 	    ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size))
834 		return (0);
835 
836 	counter_u64_add(tx_ring->tx_stats.collapse, 1);
837 
838 	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
839 	    adapter->max_tx_sgl_size - 1);
840 	if (unlikely(collapsed_mbuf == NULL)) {
841 		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
842 		return (ENOMEM);
843 	}
844 
845 	/* If mbuf was collapsed succesfully, original mbuf is released. */
846 	*mbuf = collapsed_mbuf;
847 
848 	return (0);
849 }
850 
851 static int
852 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
853     struct mbuf *mbuf, void **push_hdr, u16 *header_len)
854 {
855 	struct ena_adapter *adapter = tx_ring->adapter;
856 	struct ena_com_buf *ena_buf;
857 	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
858 	size_t iseg = 0;
859 	uint32_t mbuf_head_len;
860 	uint16_t offset;
861 	int rc, nsegs;
862 
863 	mbuf_head_len = mbuf->m_len;
864 	tx_info->mbuf = mbuf;
865 	ena_buf = tx_info->bufs;
866 
867 	/*
868 	 * For easier maintaining of the DMA map, map the whole mbuf even if
869 	 * the LLQ is used. The descriptors will be filled using the segments.
870 	 */
871 	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf,
872 	    segs, &nsegs, BUS_DMA_NOWAIT);
873 	if (unlikely((rc != 0) || (nsegs == 0))) {
874 		ena_log_io(adapter->pdev, WARN,
875 		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
876 		goto dma_error;
877 	}
878 
879 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
880 		/*
881 		 * When the device is LLQ mode, the driver will copy
882 		 * the header into the device memory space.
883 		 * the ena_com layer assumes the header is in a linear
884 		 * memory space.
885 		 * This assumption might be wrong since part of the header
886 		 * can be in the fragmented buffers.
887 		 * First check if header fits in the mbuf. If not, copy it to
888 		 * separate buffer that will be holding linearized data.
889 		 */
890 		*header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size);
891 
892 		/* If header is in linear space, just point into mbuf's data. */
893 		if (likely(*header_len <= mbuf_head_len)) {
894 			*push_hdr = mbuf->m_data;
895 		/*
896 		 * Otherwise, copy whole portion of header from multiple mbufs
897 		 * to intermediate buffer.
898 		 */
899 		} else {
900 			m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf);
901 			*push_hdr = tx_ring->push_buf_intermediate_buf;
902 
903 			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
904 		}
905 
906 		ena_log_io(adapter->pdev, DBG, "mbuf: %p ""header_buf->vaddr: %p "
907 		    "push_len: %d\n", mbuf, *push_hdr, *header_len);
908 
909 		/* If packet is fitted in LLQ header, no need for DMA segments. */
910 		if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
911 			return (0);
912 		} else {
913 			offset = tx_ring->tx_max_header_size;
914 			/*
915 			 * As Header part is mapped to LLQ header, we can skip it and just
916 			 * map the residuum of the mbuf to DMA Segments.
917 			 */
918 			while (offset > 0) {
919 				if (offset >= segs[iseg].ds_len) {
920 					offset -= segs[iseg].ds_len;
921 				} else {
922 					ena_buf->paddr = segs[iseg].ds_addr + offset;
923 					ena_buf->len = segs[iseg].ds_len - offset;
924 					ena_buf++;
925 					tx_info->num_of_bufs++;
926 					offset = 0;
927 				}
928 				iseg++;
929 			}
930 		}
931 	} else {
932 		*push_hdr = NULL;
933 		/*
934 		* header_len is just a hint for the device. Because FreeBSD is not
935 		* giving us information about packet header length and it is not
936 		* guaranteed that all packet headers will be in the 1st mbuf, setting
937 		* header_len to 0 is making the device ignore this value and resolve
938 		* header on it's own.
939 		*/
940 		*header_len = 0;
941 	}
942 
943 	/* Map rest of the mbuf */
944 	while (iseg < nsegs) {
945 		ena_buf->paddr = segs[iseg].ds_addr;
946 		ena_buf->len = segs[iseg].ds_len;
947 		ena_buf++;
948 		iseg++;
949 		tx_info->num_of_bufs++;
950 	}
951 
952 	return (0);
953 
954 dma_error:
955 	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
956 	tx_info->mbuf = NULL;
957 	return (rc);
958 }
959 
960 static int
961 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
962 {
963 	struct ena_adapter *adapter;
964 	device_t pdev;
965 	struct ena_tx_buffer *tx_info;
966 	struct ena_com_tx_ctx ena_tx_ctx;
967 	struct ena_com_dev *ena_dev;
968 	struct ena_com_io_sq* io_sq;
969 	void *push_hdr;
970 	uint16_t next_to_use;
971 	uint16_t req_id;
972 	uint16_t ena_qid;
973 	uint16_t header_len;
974 	int rc;
975 	int nb_hw_desc;
976 
977 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
978 	adapter = tx_ring->que->adapter;
979 	pdev = adapter->pdev;
980 	ena_dev = adapter->ena_dev;
981 	io_sq = &ena_dev->io_sq_queues[ena_qid];
982 
983 	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
984 	if (unlikely(rc != 0)) {
985 		ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n",
986 		    rc);
987 		return (rc);
988 	}
989 
990 	ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
991 
992 	next_to_use = tx_ring->next_to_use;
993 	req_id = tx_ring->free_tx_ids[next_to_use];
994 	tx_info = &tx_ring->tx_buffer_info[req_id];
995 	tx_info->num_of_bufs = 0;
996 
997 	ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev,
998 	    "mbuf isn't NULL for req_id %d\n", req_id);
999 
1000 	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
1001 	if (unlikely(rc != 0)) {
1002 		ena_log_io(pdev, WARN, "Failed to map TX mbuf\n");
1003 		return (rc);
1004 	}
1005 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
1006 	ena_tx_ctx.ena_bufs = tx_info->bufs;
1007 	ena_tx_ctx.push_header = push_hdr;
1008 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
1009 	ena_tx_ctx.req_id = req_id;
1010 	ena_tx_ctx.header_len = header_len;
1011 
1012 	/* Set flags and meta data */
1013 	ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
1014 
1015 	if (tx_ring->acum_pkts == DB_THRESHOLD ||
1016 	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
1017 		ena_log_io(pdev, DBG,
1018 		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
1019 		    tx_ring->que->id);
1020 		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
1021 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1022 		tx_ring->acum_pkts = 0;
1023 	}
1024 
1025 	/* Prepare the packet's descriptors and send them to device */
1026 	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
1027 	if (unlikely(rc != 0)) {
1028 		if (likely(rc == ENA_COM_NO_MEM)) {
1029 			ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n",
1030 			    tx_ring->que->id);
1031 		} else {
1032 			ena_log(pdev, ERR, "failed to prepare tx bufs\n");
1033 			ena_trigger_reset(adapter,
1034 			    ENA_REGS_RESET_DRIVER_INVALID_STATE);
1035 		}
1036 		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
1037 		goto dma_error;
1038 	}
1039 
1040 	counter_enter();
1041 	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1042 	counter_u64_add_protected(tx_ring->tx_stats.bytes,
1043 	    (*mbuf)->m_pkthdr.len);
1044 
1045 	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1046 	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1047 	    (*mbuf)->m_pkthdr.len);
1048 	counter_exit();
1049 
1050 	tx_info->tx_descs = nb_hw_desc;
1051 	getbinuptime(&tx_info->timestamp);
1052 	tx_info->print_once = true;
1053 
1054 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1055 	    tx_ring->ring_size);
1056 
1057 	/* stop the queue when no more space available, the packet can have up
1058 	 * to sgl_size + 2. one for the meta descriptor and one for header
1059 	 * (if the header is larger than tx_max_header_size).
1060 	 */
1061 	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1062 	    adapter->max_tx_sgl_size + 2))) {
1063 		ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id);
1064 
1065 		tx_ring->running = false;
1066 		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1067 
1068 		/* There is a rare condition where this function decides to
1069 		 * stop the queue but meanwhile tx_cleanup() updates
1070 		 * next_to_completion and terminates.
1071 		 * The queue will remain stopped forever.
1072 		 * To solve this issue this function performs mb(), checks
1073 		 * the wakeup condition and wakes up the queue if needed.
1074 		 */
1075 		mb();
1076 
1077 		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1078 		    ENA_TX_RESUME_THRESH)) {
1079 			tx_ring->running = true;
1080 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1081 		}
1082 	}
1083 
1084 	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1085 	    BUS_DMASYNC_PREWRITE);
1086 
1087 	return (0);
1088 
1089 dma_error:
1090 	tx_info->mbuf = NULL;
1091 	bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1092 
1093 	return (rc);
1094 }
1095 
1096 static void
1097 ena_start_xmit(struct ena_ring *tx_ring)
1098 {
1099 	struct mbuf *mbuf;
1100 	struct ena_adapter *adapter = tx_ring->adapter;
1101 	struct ena_com_io_sq* io_sq;
1102 	int ena_qid;
1103 	int ret = 0;
1104 
1105 	ENA_RING_MTX_ASSERT(tx_ring);
1106 
1107 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1108 		return;
1109 
1110 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1111 		return;
1112 
1113 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1114 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1115 
1116 	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1117 		ena_log_io(adapter->pdev, DBG,
1118 		    "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n",
1119 		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1120 
1121 		if (unlikely(!tx_ring->running)) {
1122 			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1123 			break;
1124 		}
1125 
1126 		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1127 			if (ret == ENA_COM_NO_MEM) {
1128 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1129 			} else if (ret == ENA_COM_NO_SPACE) {
1130 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1131 			} else {
1132 				m_freem(mbuf);
1133 				drbr_advance(adapter->ifp, tx_ring->br);
1134 			}
1135 
1136 			break;
1137 		}
1138 
1139 		drbr_advance(adapter->ifp, tx_ring->br);
1140 
1141 		if (unlikely((if_getdrvflags(adapter->ifp) &
1142 		    IFF_DRV_RUNNING) == 0))
1143 			return;
1144 
1145 		tx_ring->acum_pkts++;
1146 
1147 		BPF_MTAP(adapter->ifp, mbuf);
1148 	}
1149 
1150 	if (likely(tx_ring->acum_pkts != 0)) {
1151 		/* Trigger the dma engine */
1152 		ena_com_write_sq_doorbell(io_sq);
1153 		counter_u64_add(tx_ring->tx_stats.doorbells, 1);
1154 		tx_ring->acum_pkts = 0;
1155 	}
1156 
1157 	if (unlikely(!tx_ring->running))
1158 		taskqueue_enqueue(tx_ring->que->cleanup_tq,
1159 		    &tx_ring->que->cleanup_task);
1160 }
1161