xref: /freebsd/sys/dev/ena/ena_datapath.c (revision 8aac90f18aef7c9eea906c3ff9a001ca7b94f375)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 #include "opt_rss.h"
32 #include "ena.h"
33 #include "ena_datapath.h"
34 #ifdef DEV_NETMAP
35 #include "ena_netmap.h"
36 #endif /* DEV_NETMAP */
37 #ifdef RSS
38 #include <net/rss_config.h>
39 #endif /* RSS */
40 
41 #include <netinet6/ip6_var.h>
42 
43 /*********************************************************************
44  *  Static functions prototypes
45  *********************************************************************/
46 
47 static int ena_tx_cleanup(struct ena_ring *);
48 static int ena_rx_cleanup(struct ena_ring *);
49 static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
50     struct ena_com_io_cq *io_cq, uint16_t *req_id);
51 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
52     struct mbuf *);
53 static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
54     struct ena_com_rx_ctx *, uint16_t *);
55 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
56     struct mbuf *);
57 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
58 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
59     struct mbuf **mbuf);
60 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
61 static void ena_start_xmit(struct ena_ring *);
62 
63 /*********************************************************************
64  *  Global functions
65  *********************************************************************/
66 
67 void
68 ena_cleanup(void *arg, int pending)
69 {
70 	struct ena_que *que = arg;
71 	struct ena_adapter *adapter = que->adapter;
72 	if_t ifp = adapter->ifp;
73 	struct ena_ring *tx_ring;
74 	struct ena_ring *rx_ring;
75 	struct ena_com_io_cq *io_cq;
76 	struct ena_eth_io_intr_reg intr_reg;
77 	int qid, ena_qid;
78 	int txc, rxc, i;
79 
80 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
81 		return;
82 
83 	ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
84 
85 	tx_ring = que->tx_ring;
86 	rx_ring = que->rx_ring;
87 	qid = que->id;
88 	ena_qid = ENA_IO_TXQ_IDX(qid);
89 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
90 
91 	atomic_store_8(&tx_ring->first_interrupt, 1);
92 	atomic_store_8(&rx_ring->first_interrupt, 1);
93 
94 	for (i = 0; i < ENA_CLEAN_BUDGET; ++i) {
95 		rxc = ena_rx_cleanup(rx_ring);
96 		txc = ena_tx_cleanup(tx_ring);
97 
98 		if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
99 			return;
100 
101 		if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
102 			break;
103 	}
104 
105 	/* Signal that work is done and unmask interrupt */
106 	ena_com_update_intr_reg(&intr_reg, ENA_RX_IRQ_INTERVAL,
107 	    ENA_TX_IRQ_INTERVAL, true, false);
108 	counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1);
109 	ena_com_unmask_intr(io_cq, &intr_reg);
110 }
111 
112 void
113 ena_deferred_mq_start(void *arg, int pending)
114 {
115 	struct ena_ring *tx_ring = (struct ena_ring *)arg;
116 	if_t ifp = tx_ring->adapter->ifp;
117 
118 	while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running &&
119 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
120 		ENA_RING_MTX_LOCK(tx_ring);
121 		ena_start_xmit(tx_ring);
122 		ENA_RING_MTX_UNLOCK(tx_ring);
123 	}
124 }
125 
126 int
127 ena_mq_start(if_t ifp, struct mbuf *m)
128 {
129 	struct ena_adapter *adapter = if_getsoftc(ifp);
130 	struct ena_ring *tx_ring;
131 	int ret, is_drbr_empty;
132 	uint32_t i;
133 #ifdef RSS
134 	uint32_t bucket_id;
135 #endif
136 
137 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
138 		return (ENODEV);
139 
140 	/* Which queue to use */
141 	/*
142 	 * If everything is setup correctly, it should be the
143 	 * same bucket that the current CPU we're on is.
144 	 * It should improve performance.
145 	 */
146 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
147 #ifdef RSS
148 		if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
149 		    &bucket_id) == 0)
150 			i = bucket_id % adapter->num_io_queues;
151 		else
152 #endif
153 			i = m->m_pkthdr.flowid % adapter->num_io_queues;
154 	} else {
155 		i = curcpu % adapter->num_io_queues;
156 	}
157 	tx_ring = &adapter->tx_ring[i];
158 
159 	/* Check if drbr is empty before putting packet */
160 	is_drbr_empty = drbr_empty(ifp, tx_ring->br);
161 	ret = drbr_enqueue(ifp, tx_ring->br, m);
162 	if (unlikely(ret != 0)) {
163 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
164 		return (ret);
165 	}
166 
167 	if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
168 		ena_start_xmit(tx_ring);
169 		ENA_RING_MTX_UNLOCK(tx_ring);
170 	} else {
171 		taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
172 	}
173 
174 	return (0);
175 }
176 
177 void
178 ena_qflush(if_t ifp)
179 {
180 	struct ena_adapter *adapter = if_getsoftc(ifp);
181 	struct ena_ring *tx_ring = adapter->tx_ring;
182 	int i;
183 
184 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
185 		if (!drbr_empty(ifp, tx_ring->br)) {
186 			ENA_RING_MTX_LOCK(tx_ring);
187 			drbr_flush(ifp, tx_ring->br);
188 			ENA_RING_MTX_UNLOCK(tx_ring);
189 		}
190 
191 	if_qflush(ifp);
192 }
193 
194 /*********************************************************************
195  *  Static functions
196  *********************************************************************/
197 
198 static inline int
199 ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
200     uint16_t *req_id)
201 {
202 	struct ena_adapter *adapter = tx_ring->adapter;
203 	int rc;
204 
205 	rc = ena_com_tx_comp_req_id_get(io_cq, req_id);
206 	if (rc == ENA_COM_TRY_AGAIN)
207 		return (EAGAIN);
208 
209 	if (unlikely(rc != 0)) {
210 		ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n",
211 		    *req_id, tx_ring->qid);
212 		counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
213 		goto err;
214 	}
215 
216 	if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL)
217 		return (0);
218 
219 	ena_log(adapter->pdev, ERR,
220 	    "tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
221 	    *req_id, tx_ring->qid);
222 err:
223 	ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
224 
225 	return (EFAULT);
226 }
227 
228 /**
229  * ena_tx_cleanup - clear sent packets and corresponding descriptors
230  * @tx_ring: ring for which we want to clean packets
231  *
232  * Once packets are sent, we ask the device in a loop for no longer used
233  * descriptors. We find the related mbuf chain in a map (index in an array)
234  * and free it, then update ring state.
235  * This is performed in "endless" loop, updating ring pointers every
236  * TX_COMMIT. The first check of free descriptor is performed before the actual
237  * loop, then repeated at the loop end.
238  **/
239 static int
240 ena_tx_cleanup(struct ena_ring *tx_ring)
241 {
242 	struct ena_adapter *adapter;
243 	struct ena_com_io_cq *io_cq;
244 	uint16_t next_to_clean;
245 	uint16_t req_id;
246 	uint16_t ena_qid;
247 	unsigned int total_done = 0;
248 	int rc;
249 	int commit = ENA_TX_COMMIT;
250 	int budget = ENA_TX_BUDGET;
251 	int work_done;
252 	bool above_thresh;
253 
254 	adapter = tx_ring->que->adapter;
255 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
256 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
257 	next_to_clean = tx_ring->next_to_clean;
258 
259 #ifdef DEV_NETMAP
260 	if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
261 		return (0);
262 #endif /* DEV_NETMAP */
263 
264 	do {
265 		struct ena_tx_buffer *tx_info;
266 		struct mbuf *mbuf;
267 
268 		rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id);
269 		if (unlikely(rc != 0))
270 			break;
271 
272 		tx_info = &tx_ring->tx_buffer_info[req_id];
273 
274 		mbuf = tx_info->mbuf;
275 
276 		tx_info->mbuf = NULL;
277 		bintime_clear(&tx_info->timestamp);
278 
279 		bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
280 		    BUS_DMASYNC_POSTWRITE);
281 		bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
282 
283 		ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n",
284 		    tx_ring->qid, mbuf);
285 
286 		m_freem(mbuf);
287 
288 		total_done += tx_info->tx_descs;
289 
290 		tx_ring->free_tx_ids[next_to_clean] = req_id;
291 		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
292 		    tx_ring->ring_size);
293 
294 		if (unlikely(--commit == 0)) {
295 			commit = ENA_TX_COMMIT;
296 			/* update ring state every ENA_TX_COMMIT descriptor */
297 			tx_ring->next_to_clean = next_to_clean;
298 			ena_com_comp_ack(
299 			    &adapter->ena_dev->io_sq_queues[ena_qid],
300 			    total_done);
301 			total_done = 0;
302 		}
303 	} while (likely(--budget));
304 
305 	work_done = ENA_TX_BUDGET - budget;
306 
307 	ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n",
308 	    tx_ring->qid, work_done);
309 
310 	/* If there is still something to commit update ring state */
311 	if (likely(commit != ENA_TX_COMMIT)) {
312 		tx_ring->next_to_clean = next_to_clean;
313 		ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
314 		    total_done);
315 	}
316 
317 	/*
318 	 * Need to make the rings circular update visible to
319 	 * ena_xmit_mbuf() before checking for tx_ring->running.
320 	 */
321 	mb();
322 
323 	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
324 	    ENA_TX_RESUME_THRESH);
325 	if (unlikely(!tx_ring->running && above_thresh)) {
326 		ENA_RING_MTX_LOCK(tx_ring);
327 		above_thresh = ena_com_sq_have_enough_space(
328 		    tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH);
329 		if (!tx_ring->running && above_thresh) {
330 			tx_ring->running = true;
331 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
332 			taskqueue_enqueue(tx_ring->enqueue_tq,
333 			    &tx_ring->enqueue_task);
334 		}
335 		ENA_RING_MTX_UNLOCK(tx_ring);
336 	}
337 
338 	tx_ring->tx_last_cleanup_ticks = ticks;
339 
340 	return (work_done);
341 }
342 
343 static void
344 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
345     struct mbuf *mbuf)
346 {
347 	struct ena_adapter *adapter = rx_ring->adapter;
348 
349 	if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
350 		mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
351 
352 #ifdef RSS
353 		/*
354 		 * Hardware and software RSS are in agreement only when both are
355 		 * configured to Toeplitz algorithm.  This driver configures
356 		 * that algorithm only when software RSS is enabled and uses it.
357 		 */
358 		if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
359 		    ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
360 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
361 			return;
362 		}
363 #endif
364 
365 		if (ena_rx_ctx->frag &&
366 		    (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
367 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
368 			return;
369 		}
370 
371 		switch (ena_rx_ctx->l3_proto) {
372 		case ENA_ETH_IO_L3_PROTO_IPV4:
373 			switch (ena_rx_ctx->l4_proto) {
374 			case ENA_ETH_IO_L4_PROTO_TCP:
375 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
376 				break;
377 			case ENA_ETH_IO_L4_PROTO_UDP:
378 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
379 				break;
380 			default:
381 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
382 			}
383 			break;
384 		case ENA_ETH_IO_L3_PROTO_IPV6:
385 			switch (ena_rx_ctx->l4_proto) {
386 			case ENA_ETH_IO_L4_PROTO_TCP:
387 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
388 				break;
389 			case ENA_ETH_IO_L4_PROTO_UDP:
390 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
391 				break;
392 			default:
393 				M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
394 			}
395 			break;
396 		case ENA_ETH_IO_L3_PROTO_UNKNOWN:
397 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
398 			break;
399 		default:
400 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
401 		}
402 	} else {
403 		mbuf->m_pkthdr.flowid = rx_ring->qid;
404 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
405 	}
406 }
407 
408 /**
409  * ena_rx_mbuf - assemble mbuf from descriptors
410  * @rx_ring: ring for which we want to clean packets
411  * @ena_bufs: buffer info
412  * @ena_rx_ctx: metadata for this packet(s)
413  * @next_to_clean: ring pointer, will be updated only upon success
414  *
415  **/
416 static struct mbuf *
417 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
418     struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
419 {
420 	struct mbuf *mbuf;
421 	struct ena_rx_buffer *rx_info;
422 	struct ena_adapter *adapter;
423 	device_t pdev;
424 	unsigned int descs = ena_rx_ctx->descs;
425 	uint16_t ntc, len, req_id, buf = 0;
426 
427 	ntc = *next_to_clean;
428 	adapter = rx_ring->adapter;
429 	pdev = adapter->pdev;
430 
431 	len = ena_bufs[buf].len;
432 	req_id = ena_bufs[buf].req_id;
433 	rx_info = &rx_ring->rx_buffer_info[req_id];
434 	if (unlikely(rx_info->mbuf == NULL)) {
435 		ena_log(pdev, ERR, "NULL mbuf in rx_info");
436 		return (NULL);
437 	}
438 
439 	ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info,
440 	    rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
441 
442 	bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
443 	    BUS_DMASYNC_POSTREAD);
444 	mbuf = rx_info->mbuf;
445 	mbuf->m_flags |= M_PKTHDR;
446 	mbuf->m_pkthdr.len = len;
447 	mbuf->m_len = len;
448 	/* Only for the first segment the data starts at specific offset */
449 	mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
450 	ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
451 	mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
452 
453 	/* Fill mbuf with hash key and it's interpretation for optimization */
454 	ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
455 
456 	ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf,
457 	    mbuf->m_flags, mbuf->m_pkthdr.len);
458 
459 	/* DMA address is not needed anymore, unmap it */
460 	bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
461 
462 	rx_info->mbuf = NULL;
463 	rx_ring->free_rx_ids[ntc] = req_id;
464 	ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
465 
466 	/*
467 	 * While we have more than 1 descriptors for one rcvd packet, append
468 	 * other mbufs to the main one
469 	 */
470 	while (--descs) {
471 		++buf;
472 		len = ena_bufs[buf].len;
473 		req_id = ena_bufs[buf].req_id;
474 		rx_info = &rx_ring->rx_buffer_info[req_id];
475 
476 		if (unlikely(rx_info->mbuf == NULL)) {
477 			ena_log(pdev, ERR, "NULL mbuf in rx_info");
478 			/*
479 			 * If one of the required mbufs was not allocated yet,
480 			 * we can break there.
481 			 * All earlier used descriptors will be reallocated
482 			 * later and not used mbufs can be reused.
483 			 * The next_to_clean pointer will not be updated in case
484 			 * of an error, so caller should advance it manually
485 			 * in error handling routine to keep it up to date
486 			 * with hw ring.
487 			 */
488 			m_freem(mbuf);
489 			return (NULL);
490 		}
491 
492 		bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
493 		    BUS_DMASYNC_POSTREAD);
494 		if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
495 			counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
496 			ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n",
497 			    mbuf);
498 		}
499 
500 		ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n",
501 		    mbuf->m_pkthdr.len);
502 
503 		/* Free already appended mbuf, it won't be useful anymore */
504 		bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
505 		m_freem(rx_info->mbuf);
506 		rx_info->mbuf = NULL;
507 
508 		rx_ring->free_rx_ids[ntc] = req_id;
509 		ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
510 	}
511 
512 	*next_to_clean = ntc;
513 
514 	return (mbuf);
515 }
516 
517 /**
518  * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
519  **/
520 static inline void
521 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
522     struct mbuf *mbuf)
523 {
524 	device_t pdev = rx_ring->adapter->pdev;
525 
526 	/* if IP and error */
527 	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
528 	    ena_rx_ctx->l3_csum_err)) {
529 		/* ipv4 checksum error */
530 		mbuf->m_pkthdr.csum_flags = 0;
531 		counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
532 		ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n");
533 		return;
534 	}
535 
536 	/* if TCP/UDP */
537 	if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
538 	    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
539 		if (ena_rx_ctx->l4_csum_err) {
540 			/* TCP/UDP checksum error */
541 			mbuf->m_pkthdr.csum_flags = 0;
542 			counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
543 			ena_log_io(pdev, DBG, "RX L4 checksum error\n");
544 		} else {
545 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
546 			mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
547 			counter_u64_add(rx_ring->rx_stats.csum_good, 1);
548 		}
549 	}
550 }
551 
552 /**
553  * ena_rx_cleanup - handle rx irq
554  * @arg: ring for which irq is being handled
555  **/
556 static int
557 ena_rx_cleanup(struct ena_ring *rx_ring)
558 {
559 	struct ena_adapter *adapter;
560 	device_t pdev;
561 	struct mbuf *mbuf;
562 	struct ena_com_rx_ctx ena_rx_ctx;
563 	struct ena_com_io_cq *io_cq;
564 	struct ena_com_io_sq *io_sq;
565 	enum ena_regs_reset_reason_types reset_reason;
566 	if_t ifp;
567 	uint16_t ena_qid;
568 	uint16_t next_to_clean;
569 	uint32_t refill_required;
570 	uint32_t refill_threshold;
571 	uint32_t do_if_input = 0;
572 	unsigned int qid;
573 	int rc, i;
574 	int budget = ENA_RX_BUDGET;
575 #ifdef DEV_NETMAP
576 	int done;
577 #endif /* DEV_NETMAP */
578 
579 	adapter = rx_ring->que->adapter;
580 	pdev = adapter->pdev;
581 	ifp = adapter->ifp;
582 	qid = rx_ring->que->id;
583 	ena_qid = ENA_IO_RXQ_IDX(qid);
584 	io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
585 	io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
586 	next_to_clean = rx_ring->next_to_clean;
587 
588 #ifdef DEV_NETMAP
589 	if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
590 		return (0);
591 #endif /* DEV_NETMAP */
592 
593 	ena_log_io(pdev, DBG, "rx: qid %d\n", qid);
594 
595 	do {
596 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
597 		ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
598 		ena_rx_ctx.descs = 0;
599 		ena_rx_ctx.pkt_offset = 0;
600 
601 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
602 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
603 		rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
604 		if (unlikely(rc != 0)) {
605 			if (rc == ENA_COM_NO_SPACE) {
606 				counter_u64_add(rx_ring->rx_stats.bad_desc_num,
607 				    1);
608 				reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
609 			} else {
610 				counter_u64_add(rx_ring->rx_stats.bad_req_id,
611 				    1);
612 				reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
613 			}
614 			ena_trigger_reset(adapter, reset_reason);
615 			return (0);
616 		}
617 
618 		if (unlikely(ena_rx_ctx.descs == 0))
619 			break;
620 
621 		ena_log_io(pdev, DBG,
622 		    "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
623 		    rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
624 		    ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
625 
626 		/* Receive mbuf from the ring */
627 		mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx,
628 		    &next_to_clean);
629 		bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
630 		    io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
631 		/* Exit if we failed to retrieve a buffer */
632 		if (unlikely(mbuf == NULL)) {
633 			for (i = 0; i < ena_rx_ctx.descs; ++i) {
634 				rx_ring->free_rx_ids[next_to_clean] =
635 				    rx_ring->ena_bufs[i].req_id;
636 				next_to_clean = ENA_RX_RING_IDX_NEXT(
637 				    next_to_clean, rx_ring->ring_size);
638 			}
639 			break;
640 		}
641 
642 		if (((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) ||
643 		    ((if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) != 0)) {
644 			ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
645 		}
646 
647 		counter_enter();
648 		counter_u64_add_protected(rx_ring->rx_stats.bytes,
649 		    mbuf->m_pkthdr.len);
650 		counter_u64_add_protected(adapter->hw_stats.rx_bytes,
651 		    mbuf->m_pkthdr.len);
652 		counter_exit();
653 		/*
654 		 * LRO is only for IP/TCP packets and TCP checksum of the packet
655 		 * should be computed by hardware.
656 		 */
657 		do_if_input = 1;
658 		if (((if_getcapenable(ifp) & IFCAP_LRO) != 0)  &&
659 		    ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
660 		    (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
661 			/*
662 			 * Send to the stack if:
663 			 *  - LRO not enabled, or
664 			 *  - no LRO resources, or
665 			 *  - lro enqueue fails
666 			 */
667 			if ((rx_ring->lro.lro_cnt != 0) &&
668 			    (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
669 				do_if_input = 0;
670 		}
671 		if (do_if_input != 0) {
672 			ena_log_io(pdev, DBG,
673 			    "calling if_input() with mbuf %p\n", mbuf);
674 			if_input(ifp, mbuf);
675 		}
676 
677 		counter_enter();
678 		counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
679 		counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
680 		counter_exit();
681 	} while (--budget);
682 
683 	rx_ring->next_to_clean = next_to_clean;
684 
685 	refill_required = ena_com_free_q_entries(io_sq);
686 	refill_threshold = min_t(int,
687 	    rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
688 	    ENA_RX_REFILL_THRESH_PACKET);
689 
690 	if (refill_required > refill_threshold) {
691 		ena_refill_rx_bufs(rx_ring, refill_required);
692 	}
693 
694 	tcp_lro_flush_all(&rx_ring->lro);
695 
696 	return (ENA_RX_BUDGET - budget);
697 }
698 
699 static void
700 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
701     bool disable_meta_caching)
702 {
703 	struct ena_com_tx_meta *ena_meta;
704 	struct ether_vlan_header *eh;
705 	struct mbuf *mbuf_next;
706 	u32 mss;
707 	bool offload;
708 	uint16_t etype;
709 	int ehdrlen;
710 	struct ip *ip;
711 	int ipproto;
712 	int iphlen;
713 	struct tcphdr *th;
714 	int offset;
715 
716 	offload = false;
717 	ena_meta = &ena_tx_ctx->ena_meta;
718 	mss = mbuf->m_pkthdr.tso_segsz;
719 
720 	if (mss != 0)
721 		offload = true;
722 
723 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
724 		offload = true;
725 
726 	if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
727 		offload = true;
728 
729 	if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
730 		offload = true;
731 
732 	if (!offload) {
733 		if (disable_meta_caching) {
734 			memset(ena_meta, 0, sizeof(*ena_meta));
735 			ena_tx_ctx->meta_valid = 1;
736 		} else {
737 			ena_tx_ctx->meta_valid = 0;
738 		}
739 		return;
740 	}
741 
742 	/* Determine where frame payload starts. */
743 	eh = mtod(mbuf, struct ether_vlan_header *);
744 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
745 		etype = ntohs(eh->evl_proto);
746 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
747 	} else {
748 		etype = ntohs(eh->evl_encap_proto);
749 		ehdrlen = ETHER_HDR_LEN;
750 	}
751 
752 	mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
753 
754 	switch (etype) {
755 	case ETHERTYPE_IP:
756 		ip = (struct ip *)(mtodo(mbuf_next, offset));
757 		iphlen = ip->ip_hl << 2;
758 		ipproto = ip->ip_p;
759 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
760 		if ((ip->ip_off & htons(IP_DF)) != 0)
761 			ena_tx_ctx->df = 1;
762 		break;
763 	case ETHERTYPE_IPV6:
764 		ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
765 		iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto);
766 		iphlen -= ehdrlen;
767 		ena_tx_ctx->df = 1;
768 		break;
769 	default:
770 		iphlen = 0;
771 		ipproto = 0;
772 		break;
773 	}
774 
775 	mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
776 	th = (struct tcphdr *)(mtodo(mbuf_next, offset));
777 
778 	if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
779 		ena_tx_ctx->l3_csum_enable = 1;
780 	}
781 	if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
782 		ena_tx_ctx->tso_enable = 1;
783 		ena_meta->l4_hdr_len = (th->th_off);
784 	}
785 
786 	if (ipproto == IPPROTO_TCP) {
787 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
788 		if ((mbuf->m_pkthdr.csum_flags &
789 		    (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
790 			ena_tx_ctx->l4_csum_enable = 1;
791 		else
792 			ena_tx_ctx->l4_csum_enable = 0;
793 	} else if (ipproto == IPPROTO_UDP) {
794 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
795 		if ((mbuf->m_pkthdr.csum_flags &
796 		    (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
797 			ena_tx_ctx->l4_csum_enable = 1;
798 		else
799 			ena_tx_ctx->l4_csum_enable = 0;
800 	} else {
801 		ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
802 		ena_tx_ctx->l4_csum_enable = 0;
803 	}
804 
805 	ena_meta->mss = mss;
806 	ena_meta->l3_hdr_len = iphlen;
807 	ena_meta->l3_hdr_offset = ehdrlen;
808 	ena_tx_ctx->meta_valid = 1;
809 }
810 
811 static int
812 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
813 {
814 	struct ena_adapter *adapter;
815 	struct mbuf *collapsed_mbuf;
816 	int num_frags;
817 
818 	adapter = tx_ring->adapter;
819 	num_frags = ena_mbuf_count(*mbuf);
820 
821 	/* One segment must be reserved for configuration descriptor. */
822 	if (num_frags < adapter->max_tx_sgl_size)
823 		return (0);
824 
825 	if ((num_frags == adapter->max_tx_sgl_size) &&
826 	    ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size))
827 		return (0);
828 
829 	counter_u64_add(tx_ring->tx_stats.collapse, 1);
830 
831 	collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
832 	    adapter->max_tx_sgl_size - 1);
833 	if (unlikely(collapsed_mbuf == NULL)) {
834 		counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
835 		return (ENOMEM);
836 	}
837 
838 	/* If mbuf was collapsed succesfully, original mbuf is released. */
839 	*mbuf = collapsed_mbuf;
840 
841 	return (0);
842 }
843 
844 static int
845 ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
846     struct mbuf *mbuf, void **push_hdr, u16 *header_len)
847 {
848 	struct ena_adapter *adapter = tx_ring->adapter;
849 	struct ena_com_buf *ena_buf;
850 	bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
851 	size_t iseg = 0;
852 	uint32_t mbuf_head_len;
853 	uint16_t offset;
854 	int rc, nsegs;
855 
856 	mbuf_head_len = mbuf->m_len;
857 	tx_info->mbuf = mbuf;
858 	ena_buf = tx_info->bufs;
859 
860 	/*
861 	 * For easier maintaining of the DMA map, map the whole mbuf even if
862 	 * the LLQ is used. The descriptors will be filled using the segments.
863 	 */
864 	rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag,
865 	    tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
866 	if (unlikely((rc != 0) || (nsegs == 0))) {
867 		ena_log_io(adapter->pdev, WARN,
868 		    "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
869 		goto dma_error;
870 	}
871 
872 	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
873 		/*
874 		 * When the device is LLQ mode, the driver will copy
875 		 * the header into the device memory space.
876 		 * the ena_com layer assumes the header is in a linear
877 		 * memory space.
878 		 * This assumption might be wrong since part of the header
879 		 * can be in the fragmented buffers.
880 		 * First check if header fits in the mbuf. If not, copy it to
881 		 * separate buffer that will be holding linearized data.
882 		 */
883 		*header_len = min_t(uint32_t, mbuf->m_pkthdr.len,
884 		    tx_ring->tx_max_header_size);
885 
886 		/* If header is in linear space, just point into mbuf's data. */
887 		if (likely(*header_len <= mbuf_head_len)) {
888 			*push_hdr = mbuf->m_data;
889 		/*
890 		 * Otherwise, copy whole portion of header from multiple
891 		 * mbufs to intermediate buffer.
892 		 */
893 		} else {
894 			m_copydata(mbuf, 0, *header_len,
895 			    tx_ring->push_buf_intermediate_buf);
896 			*push_hdr = tx_ring->push_buf_intermediate_buf;
897 
898 			counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
899 		}
900 
901 		ena_log_io(adapter->pdev, DBG,
902 		    "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
903 		    mbuf, *push_hdr, *header_len);
904 
905 		/* If packet is fitted in LLQ header, no need for DMA segments. */
906 		if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
907 			return (0);
908 		} else {
909 			offset = tx_ring->tx_max_header_size;
910 			/*
911 			 * As Header part is mapped to LLQ header, we can skip
912 			 * it and just map the residuum of the mbuf to DMA
913 			 * Segments.
914 			 */
915 			while (offset > 0) {
916 				if (offset >= segs[iseg].ds_len) {
917 					offset -= segs[iseg].ds_len;
918 				} else {
919 					ena_buf->paddr = segs[iseg].ds_addr +
920 					    offset;
921 					ena_buf->len = segs[iseg].ds_len -
922 					    offset;
923 					ena_buf++;
924 					tx_info->num_of_bufs++;
925 					offset = 0;
926 				}
927 				iseg++;
928 			}
929 		}
930 	} else {
931 		*push_hdr = NULL;
932 		/*
933 		 * header_len is just a hint for the device. Because FreeBSD is
934 		 * not giving us information about packet header length and it
935 		 * is not guaranteed that all packet headers will be in the 1st
936 		 * mbuf, setting header_len to 0 is making the device ignore
937 		 * this value and resolve header on it's own.
938 		 */
939 		*header_len = 0;
940 	}
941 
942 	/* Map rest of the mbuf */
943 	while (iseg < nsegs) {
944 		ena_buf->paddr = segs[iseg].ds_addr;
945 		ena_buf->len = segs[iseg].ds_len;
946 		ena_buf++;
947 		iseg++;
948 		tx_info->num_of_bufs++;
949 	}
950 
951 	return (0);
952 
953 dma_error:
954 	counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
955 	tx_info->mbuf = NULL;
956 	return (rc);
957 }
958 
959 static int
960 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
961 {
962 	struct ena_adapter *adapter;
963 	device_t pdev;
964 	struct ena_tx_buffer *tx_info;
965 	struct ena_com_tx_ctx ena_tx_ctx;
966 	struct ena_com_dev *ena_dev;
967 	struct ena_com_io_sq *io_sq;
968 	void *push_hdr;
969 	uint16_t next_to_use;
970 	uint16_t req_id;
971 	uint16_t ena_qid;
972 	uint16_t header_len;
973 	int rc;
974 	int nb_hw_desc;
975 
976 	ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
977 	adapter = tx_ring->que->adapter;
978 	pdev = adapter->pdev;
979 	ena_dev = adapter->ena_dev;
980 	io_sq = &ena_dev->io_sq_queues[ena_qid];
981 
982 	rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
983 	if (unlikely(rc != 0)) {
984 		ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n",
985 		    rc);
986 		return (rc);
987 	}
988 
989 	ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
990 
991 	next_to_use = tx_ring->next_to_use;
992 	req_id = tx_ring->free_tx_ids[next_to_use];
993 	tx_info = &tx_ring->tx_buffer_info[req_id];
994 	tx_info->num_of_bufs = 0;
995 
996 	ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev,
997 	    "mbuf isn't NULL for req_id %d\n", req_id);
998 
999 	rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
1000 	if (unlikely(rc != 0)) {
1001 		ena_log_io(pdev, WARN, "Failed to map TX mbuf\n");
1002 		return (rc);
1003 	}
1004 	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
1005 	ena_tx_ctx.ena_bufs = tx_info->bufs;
1006 	ena_tx_ctx.push_header = push_hdr;
1007 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
1008 	ena_tx_ctx.req_id = req_id;
1009 	ena_tx_ctx.header_len = header_len;
1010 
1011 	/* Set flags and meta data */
1012 	ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
1013 
1014 	if (tx_ring->acum_pkts == ENA_DB_THRESHOLD ||
1015 	    ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
1016 		ena_log_io(pdev, DBG,
1017 		    "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
1018 		    tx_ring->que->id);
1019 		ena_ring_tx_doorbell(tx_ring);
1020 	}
1021 
1022 	/* Prepare the packet's descriptors and send them to device */
1023 	rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
1024 	if (unlikely(rc != 0)) {
1025 		if (likely(rc == ENA_COM_NO_MEM)) {
1026 			ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n",
1027 			    tx_ring->que->id);
1028 		} else {
1029 			ena_log(pdev, ERR, "failed to prepare tx bufs\n");
1030 			ena_trigger_reset(adapter,
1031 			    ENA_REGS_RESET_DRIVER_INVALID_STATE);
1032 		}
1033 		counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
1034 		goto dma_error;
1035 	}
1036 
1037 	counter_enter();
1038 	counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
1039 	counter_u64_add_protected(tx_ring->tx_stats.bytes,
1040 	    (*mbuf)->m_pkthdr.len);
1041 
1042 	counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
1043 	counter_u64_add_protected(adapter->hw_stats.tx_bytes,
1044 	    (*mbuf)->m_pkthdr.len);
1045 	counter_exit();
1046 
1047 	tx_info->tx_descs = nb_hw_desc;
1048 	getbinuptime(&tx_info->timestamp);
1049 	tx_info->print_once = true;
1050 
1051 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
1052 	    tx_ring->ring_size);
1053 
1054 	/* stop the queue when no more space available, the packet can have up
1055 	 * to sgl_size + 2. one for the meta descriptor and one for header
1056 	 * (if the header is larger than tx_max_header_size).
1057 	 */
1058 	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1059 	    adapter->max_tx_sgl_size + 2))) {
1060 		ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id);
1061 
1062 		tx_ring->running = false;
1063 		counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
1064 
1065 		/* There is a rare condition where this function decides to
1066 		 * stop the queue but meanwhile tx_cleanup() updates
1067 		 * next_to_completion and terminates.
1068 		 * The queue will remain stopped forever.
1069 		 * To solve this issue this function performs mb(), checks
1070 		 * the wakeup condition and wakes up the queue if needed.
1071 		 */
1072 		mb();
1073 
1074 		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1075 		    ENA_TX_RESUME_THRESH)) {
1076 			tx_ring->running = true;
1077 			counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
1078 		}
1079 	}
1080 
1081 	bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
1082 	    BUS_DMASYNC_PREWRITE);
1083 
1084 	return (0);
1085 
1086 dma_error:
1087 	tx_info->mbuf = NULL;
1088 	bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
1089 
1090 	return (rc);
1091 }
1092 
1093 static void
1094 ena_start_xmit(struct ena_ring *tx_ring)
1095 {
1096 	struct mbuf *mbuf;
1097 	struct ena_adapter *adapter = tx_ring->adapter;
1098 	int ret = 0;
1099 
1100 	ENA_RING_MTX_ASSERT(tx_ring);
1101 
1102 	if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1103 		return;
1104 
1105 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
1106 		return;
1107 
1108 	while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
1109 		ena_log_io(adapter->pdev, DBG,
1110 		    "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n",
1111 		    mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
1112 
1113 		if (unlikely(!tx_ring->running)) {
1114 			drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1115 			break;
1116 		}
1117 
1118 		if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
1119 			if (ret == ENA_COM_NO_MEM) {
1120 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1121 			} else if (ret == ENA_COM_NO_SPACE) {
1122 				drbr_putback(adapter->ifp, tx_ring->br, mbuf);
1123 			} else {
1124 				m_freem(mbuf);
1125 				drbr_advance(adapter->ifp, tx_ring->br);
1126 			}
1127 
1128 			break;
1129 		}
1130 
1131 		drbr_advance(adapter->ifp, tx_ring->br);
1132 
1133 		if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
1134 			return;
1135 
1136 		tx_ring->acum_pkts++;
1137 
1138 		BPF_MTAP(adapter->ifp, mbuf);
1139 	}
1140 
1141 	if (likely(tx_ring->acum_pkts != 0)) {
1142 		/* Trigger the dma engine */
1143 		ena_ring_tx_doorbell(tx_ring);
1144 	}
1145 
1146 	if (unlikely(!tx_ring->running))
1147 		taskqueue_enqueue(tx_ring->que->cleanup_tq,
1148 		    &tx_ring->que->cleanup_task);
1149 }
1150