xref: /freebsd/sys/dev/gve/gve_tx_dqo.c (revision 73c3fe4db3ebc2bd6cb732aae77ea017fd376d22)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_inet6.h"
33 
34 #include "gve.h"
35 #include "gve_dqo.h"
36 
37 static void
gve_unmap_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)38 gve_unmap_packet(struct gve_tx_ring *tx,
39     struct gve_tx_pending_pkt_dqo *pending_pkt)
40 {
41 	bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
42 	    BUS_DMASYNC_POSTWRITE);
43 	bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
44 }
45 
46 static void
gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo * pending_pkt)47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt)
48 {
49 	pending_pkt->qpl_buf_head = -1;
50 	pending_pkt->num_qpl_bufs = 0;
51 }
52 
53 static void
gve_free_tx_mbufs_dqo(struct gve_tx_ring * tx)54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
55 {
56 	struct gve_tx_pending_pkt_dqo *pending_pkt;
57 	int i;
58 
59 	for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
60 		pending_pkt = &tx->dqo.pending_pkts[i];
61 		if (!pending_pkt->mbuf)
62 			continue;
63 
64 		if (gve_is_qpl(tx->com.priv))
65 			gve_clear_qpl_pending_pkt(pending_pkt);
66 		else
67 			gve_unmap_packet(tx, pending_pkt);
68 
69 		m_freem(pending_pkt->mbuf);
70 		pending_pkt->mbuf = NULL;
71 	}
72 }
73 
74 void
gve_tx_free_ring_dqo(struct gve_priv * priv,int i)75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
76 {
77 	struct gve_tx_ring *tx = &priv->tx[i];
78 	int j;
79 
80 	if (tx->dqo.desc_ring != NULL) {
81 		gve_dma_free_coherent(&tx->desc_ring_mem);
82 		tx->dqo.desc_ring = NULL;
83 	}
84 
85 	if (tx->dqo.compl_ring != NULL) {
86 		gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
87 		tx->dqo.compl_ring = NULL;
88 	}
89 
90 	if (tx->dqo.pending_pkts != NULL) {
91 		gve_free_tx_mbufs_dqo(tx);
92 
93 		if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
94 			for (j = 0; j < tx->dqo.num_pending_pkts; j++)
95 				if (tx->dqo.pending_pkts[j].state !=
96 				    GVE_PACKET_STATE_UNALLOCATED)
97 					bus_dmamap_destroy(tx->dqo.buf_dmatag,
98 					    tx->dqo.pending_pkts[j].dmamap);
99 		}
100 
101 		free(tx->dqo.pending_pkts, M_GVE);
102 		tx->dqo.pending_pkts = NULL;
103 	}
104 
105 	if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
106 		bus_dma_tag_destroy(tx->dqo.buf_dmatag);
107 
108 	if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
109 		free(tx->dqo.qpl_bufs, M_GVE);
110 		tx->dqo.qpl_bufs = NULL;
111 	}
112 }
113 
114 static int
gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring * tx)115 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
116 {
117 	struct gve_priv *priv = tx->com.priv;
118 	int err;
119 	int j;
120 
121 	/*
122 	 * DMA tag for mapping Tx mbufs
123 	 * The maxsize, nsegments, and maxsegsize params should match
124 	 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
125 	 */
126 	err = bus_dma_tag_create(
127 	    bus_get_dma_tag(priv->dev),	/* parent */
128 	    1, 0,			/* alignment, bounds */
129 	    BUS_SPACE_MAXADDR,		/* lowaddr */
130 	    BUS_SPACE_MAXADDR,		/* highaddr */
131 	    NULL, NULL,			/* filter, filterarg */
132 	    GVE_TSO_MAXSIZE_DQO,	/* maxsize */
133 	    GVE_TX_MAX_DATA_DESCS_DQO,	/* nsegments */
134 	    GVE_TX_MAX_BUF_SIZE_DQO,	/* maxsegsize */
135 	    BUS_DMA_ALLOCNOW,		/* flags */
136 	    NULL,			/* lockfunc */
137 	    NULL,			/* lockarg */
138 	    &tx->dqo.buf_dmatag);
139 	if (err != 0) {
140 		device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
141 		    __func__, err);
142 		return (err);
143 	}
144 
145 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
146 		err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
147 		    &tx->dqo.pending_pkts[j].dmamap);
148 		if (err != 0) {
149 			device_printf(priv->dev,
150 			    "err in creating pending pkt dmamap %d: %d",
151 			    j, err);
152 			return (err);
153 		}
154 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
155 	}
156 
157 	return (0);
158 }
159 
160 int
gve_tx_alloc_ring_dqo(struct gve_priv * priv,int i)161 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
162 {
163 	struct gve_tx_ring *tx = &priv->tx[i];
164 	uint16_t num_pending_pkts;
165 	int err;
166 
167 	/* Descriptor ring */
168 	err = gve_dma_alloc_coherent(priv,
169 	    sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
170 	    CACHE_LINE_SIZE, &tx->desc_ring_mem);
171 	if (err != 0) {
172 		device_printf(priv->dev,
173 		    "Failed to alloc desc ring for tx ring %d", i);
174 		goto abort;
175 	}
176 	tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
177 
178 	/* Completion ring */
179 	err = gve_dma_alloc_coherent(priv,
180 	    sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
181 	    CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
182 	if (err != 0) {
183 		device_printf(priv->dev,
184 		    "Failed to alloc compl ring for tx ring %d", i);
185 		goto abort;
186 	}
187 	tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
188 
189 	/*
190 	 * pending_pkts array
191 	 *
192 	 * The max number of pending packets determines the maximum number of
193 	 * descriptors which maybe written to the completion queue.
194 	 *
195 	 * We must set the number small enough to make sure we never overrun the
196 	 * completion queue.
197 	 */
198 	num_pending_pkts = priv->tx_desc_cnt;
199 	/*
200 	 * Reserve space for descriptor completions, which will be reported at
201 	 * most every GVE_TX_MIN_RE_INTERVAL packets.
202 	 */
203 	num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
204 
205 	tx->dqo.num_pending_pkts = num_pending_pkts;
206 	tx->dqo.pending_pkts = malloc(
207 	    sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
208 	    M_GVE, M_WAITOK | M_ZERO);
209 
210 	if (gve_is_qpl(priv)) {
211 		int qpl_buf_cnt;
212 
213 		tx->com.qpl = &priv->qpls[i];
214 		qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
215 		    tx->com.qpl->num_pages;
216 
217 		tx->dqo.qpl_bufs = malloc(
218 		    sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
219 		    M_GVE, M_WAITOK | M_ZERO);
220 	} else
221 		gve_tx_alloc_rda_fields_dqo(tx);
222 	return (0);
223 
224 abort:
225 	gve_tx_free_ring_dqo(priv, i);
226 	return (err);
227 }
228 
229 static void
gve_extract_tx_metadata_dqo(const struct mbuf * mbuf,struct gve_tx_metadata_dqo * metadata)230 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
231     struct gve_tx_metadata_dqo *metadata)
232 {
233 	uint32_t hash = mbuf->m_pkthdr.flowid;
234 	uint16_t path_hash;
235 
236 	metadata->version = GVE_TX_METADATA_VERSION_DQO;
237 	if (hash) {
238 		path_hash = hash ^ (hash >> 16);
239 
240 		path_hash &= (1 << 15) - 1;
241 		if (__predict_false(path_hash == 0))
242 			path_hash = ~path_hash;
243 
244 		metadata->path_hash = path_hash;
245 	}
246 }
247 
248 static void
gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring * tx,uint32_t * desc_idx,uint32_t len,uint64_t addr,int16_t compl_tag,bool eop,bool csum_enabled)249 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
250     uint32_t *desc_idx, uint32_t len, uint64_t addr,
251     int16_t compl_tag, bool eop, bool csum_enabled)
252 {
253 	while (len > 0) {
254 		struct gve_tx_pkt_desc_dqo *desc =
255 		    &tx->dqo.desc_ring[*desc_idx].pkt;
256 		uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
257 		bool cur_eop = eop && cur_len == len;
258 
259 		*desc = (struct gve_tx_pkt_desc_dqo){
260 			.buf_addr = htole64(addr),
261 			.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
262 			.end_of_packet = cur_eop,
263 			.checksum_offload_enable = csum_enabled,
264 			.compl_tag = htole16(compl_tag),
265 			.buf_size = cur_len,
266 		};
267 
268 		addr += cur_len;
269 		len -= cur_len;
270 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
271 	}
272 }
273 
274 static void
gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo * desc,const struct mbuf * mbuf,const struct gve_tx_metadata_dqo * metadata,int header_len)275 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
276     const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
277     int header_len)
278 {
279 	*desc = (struct gve_tx_tso_context_desc_dqo){
280 		.header_len = header_len,
281 		.cmd_dtype = {
282 			.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
283 			.tso = 1,
284 		},
285 		.flex0 = metadata->bytes[0],
286 		.flex5 = metadata->bytes[5],
287 		.flex6 = metadata->bytes[6],
288 		.flex7 = metadata->bytes[7],
289 		.flex8 = metadata->bytes[8],
290 		.flex9 = metadata->bytes[9],
291 		.flex10 = metadata->bytes[10],
292 		.flex11 = metadata->bytes[11],
293 	};
294 	desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
295 	desc->mss = mbuf->m_pkthdr.tso_segsz;
296 }
297 
298 static void
gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo * desc,const struct gve_tx_metadata_dqo * metadata)299 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
300     const struct gve_tx_metadata_dqo *metadata)
301 {
302 	*desc = (struct gve_tx_general_context_desc_dqo){
303 		.flex0 = metadata->bytes[0],
304 		.flex1 = metadata->bytes[1],
305 		.flex2 = metadata->bytes[2],
306 		.flex3 = metadata->bytes[3],
307 		.flex4 = metadata->bytes[4],
308 		.flex5 = metadata->bytes[5],
309 		.flex6 = metadata->bytes[6],
310 		.flex7 = metadata->bytes[7],
311 		.flex8 = metadata->bytes[8],
312 		.flex9 = metadata->bytes[9],
313 		.flex10 = metadata->bytes[10],
314 		.flex11 = metadata->bytes[11],
315 		.cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
316 	};
317 }
318 
319 #define PULLUP_HDR(m, len)				\
320 do {							\
321 	if (__predict_false((m)->m_len < (len))) {	\
322 		(m) = m_pullup((m), (len));		\
323 		if ((m) == NULL)			\
324 			return (EINVAL);		\
325 	}						\
326 } while (0)
327 
328 static int
gve_prep_tso(struct mbuf * mbuf,int * header_len)329 gve_prep_tso(struct mbuf *mbuf, int *header_len)
330 {
331 	uint8_t l3_off, l4_off = 0;
332 	struct ether_header *eh;
333 	struct tcphdr *th;
334 	u_short csum;
335 
336 	PULLUP_HDR(mbuf, sizeof(*eh));
337 	eh = mtod(mbuf, struct ether_header *);
338 	KASSERT(eh->ether_type != ETHERTYPE_VLAN,
339 	    ("VLAN-tagged packets not supported"));
340 	l3_off = ETHER_HDR_LEN;
341 
342 #ifdef INET6
343 	if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
344 		struct ip6_hdr *ip6;
345 
346 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
347 		ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
348 		l4_off = l3_off + sizeof(struct ip6_hdr);
349 		csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
350 		    /*csum=*/0);
351 	} else
352 #endif
353 	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
354 		struct ip *ip;
355 
356 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
357 		ip = (struct ip *)(mtodo(mbuf, l3_off));
358 		l4_off = l3_off + (ip->ip_hl << 2);
359 		csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
360 		    htons(IPPROTO_TCP));
361 	}
362 
363 	PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
364 	th = (struct tcphdr *)(mtodo(mbuf, l4_off));
365 	*header_len = l4_off + (th->th_off << 2);
366 
367 	/*
368 	 * Hardware requires the th->th_sum to not include the TCP payload,
369 	 * hence we recompute the csum with it excluded.
370 	 */
371 	th->th_sum = csum;
372 
373 	return (0);
374 }
375 
376 static int
gve_tx_fill_ctx_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,bool is_tso,uint32_t * desc_idx)377 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
378     bool is_tso, uint32_t *desc_idx)
379 {
380 	struct gve_tx_general_context_desc_dqo *gen_desc;
381 	struct gve_tx_tso_context_desc_dqo *tso_desc;
382 	struct gve_tx_metadata_dqo metadata;
383 	int header_len;
384 	int err;
385 
386 	metadata = (struct gve_tx_metadata_dqo){0};
387 	gve_extract_tx_metadata_dqo(mbuf, &metadata);
388 
389 	if (is_tso) {
390 		err = gve_prep_tso(mbuf, &header_len);
391 		if (__predict_false(err)) {
392 			counter_enter();
393 			counter_u64_add_protected(
394 			    tx->stats.tx_delayed_pkt_tsoerr, 1);
395 			counter_exit();
396 			return (err);
397 		}
398 
399 		tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
400 		gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
401 
402 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
403 		counter_enter();
404 		counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
405 		counter_exit();
406 	}
407 
408 	gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
409 	gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
410 	*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
411 	return (0);
412 }
413 
414 static int
gve_map_mbuf_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf,bus_dmamap_t dmamap,bus_dma_segment_t * segs,int * nsegs,int attempt)415 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
416     struct mbuf **mbuf, bus_dmamap_t dmamap,
417     bus_dma_segment_t *segs, int *nsegs, int attempt)
418 {
419 	struct mbuf *m_new = NULL;
420 	int err;
421 
422 	err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
423 	    *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
424 
425 	switch (err) {
426 	case __predict_true(0):
427 		break;
428 	case EFBIG:
429 		if (__predict_false(attempt > 0))
430 			goto abort;
431 
432 		counter_enter();
433 		counter_u64_add_protected(
434 		    tx->stats.tx_mbuf_collapse, 1);
435 		counter_exit();
436 
437 		/* Try m_collapse before m_defrag */
438 		m_new = m_collapse(*mbuf, M_NOWAIT,
439 		    GVE_TX_MAX_DATA_DESCS_DQO);
440 		if (m_new == NULL) {
441 			counter_enter();
442 			counter_u64_add_protected(
443 			    tx->stats.tx_mbuf_defrag, 1);
444 			counter_exit();
445 			m_new = m_defrag(*mbuf, M_NOWAIT);
446 		}
447 
448 		if (__predict_false(m_new == NULL)) {
449 			counter_enter();
450 			counter_u64_add_protected(
451 			    tx->stats.tx_mbuf_defrag_err, 1);
452 			counter_exit();
453 
454 			m_freem(*mbuf);
455 			*mbuf = NULL;
456 			err = ENOMEM;
457 			goto abort;
458 		} else {
459 			*mbuf = m_new;
460 			return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
461 			    segs, nsegs, ++attempt));
462 		}
463 	case ENOMEM:
464 		counter_enter();
465 		counter_u64_add_protected(
466 		    tx->stats.tx_mbuf_dmamap_enomem_err, 1);
467 		counter_exit();
468 		goto abort;
469 	default:
470 		goto abort;
471 	}
472 
473 	return (0);
474 
475 abort:
476 	counter_enter();
477 	counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
478 	counter_exit();
479 	return (err);
480 }
481 
482 static uint32_t
num_avail_desc_ring_slots(const struct gve_tx_ring * tx)483 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
484 {
485 	uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
486 	    tx->dqo.desc_mask;
487 
488 	return (tx->dqo.desc_mask - num_used);
489 }
490 
491 static struct gve_tx_pending_pkt_dqo *
gve_alloc_pending_packet(struct gve_tx_ring * tx)492 gve_alloc_pending_packet(struct gve_tx_ring *tx)
493 {
494 	int32_t index = tx->dqo.free_pending_pkts_csm;
495 	struct gve_tx_pending_pkt_dqo *pending_pkt;
496 
497 	/*
498 	 * No pending packets available in the consumer list,
499 	 * try to steal the producer list.
500 	 */
501 	if (__predict_false(index == -1)) {
502 		tx->dqo.free_pending_pkts_csm = atomic_swap_32(
503 		    &tx->dqo.free_pending_pkts_prd, -1);
504 
505 		index = tx->dqo.free_pending_pkts_csm;
506 		if (__predict_false(index == -1))
507 			return (NULL);
508 	}
509 
510 	pending_pkt = &tx->dqo.pending_pkts[index];
511 
512 	/* Remove pending_pkt from the consumer list */
513 	tx->dqo.free_pending_pkts_csm = pending_pkt->next;
514 	pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
515 
516 	return (pending_pkt);
517 }
518 
519 static void
gve_free_pending_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)520 gve_free_pending_packet(struct gve_tx_ring *tx,
521     struct gve_tx_pending_pkt_dqo *pending_pkt)
522 {
523 	int index = pending_pkt - tx->dqo.pending_pkts;
524 	int32_t old_head;
525 
526 	pending_pkt->state = GVE_PACKET_STATE_FREE;
527 
528 	/* Add pending_pkt to the producer list */
529 	while (true) {
530 		old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
531 
532 		pending_pkt->next = old_head;
533 		if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
534 		    old_head, index))
535 			break;
536 	}
537 }
538 
539 /*
540  * Has the side-effect of retrieving the value of the last desc index
541  * processed by the NIC. hw_tx_head is written to by the completions-processing
542  * taskqueue upon receiving descriptor-completions.
543  */
544 static bool
gve_tx_has_desc_room_dqo(struct gve_tx_ring * tx,int needed_descs)545 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
546 {
547 	if (needed_descs <= num_avail_desc_ring_slots(tx))
548 		return (true);
549 
550 	tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
551 	if (needed_descs > num_avail_desc_ring_slots(tx)) {
552 		counter_enter();
553 		counter_u64_add_protected(
554 		    tx->stats.tx_delayed_pkt_nospace_descring, 1);
555 		counter_exit();
556 		return (false);
557 	}
558 
559 	return (0);
560 }
561 
562 static void
gve_tx_request_desc_compl(struct gve_tx_ring * tx,uint32_t desc_idx)563 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
564 {
565 	uint32_t last_report_event_interval;
566 	uint32_t last_desc_idx;
567 
568 	last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
569 	last_report_event_interval =
570 	    (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
571 
572 	if (__predict_false(last_report_event_interval >=
573 	    GVE_TX_MIN_RE_INTERVAL)) {
574 		tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
575 		tx->dqo.last_re_idx = last_desc_idx;
576 	}
577 }
578 
579 static bool
gve_tx_have_enough_qpl_bufs(struct gve_tx_ring * tx,int num_bufs)580 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
581 {
582 	uint32_t available = tx->dqo.qpl_bufs_produced_cached -
583 	    tx->dqo.qpl_bufs_consumed;
584 
585 	if (__predict_true(available >= num_bufs))
586 		return (true);
587 
588 	tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
589 	    &tx->dqo.qpl_bufs_produced);
590 	available = tx->dqo.qpl_bufs_produced_cached -
591 	    tx->dqo.qpl_bufs_consumed;
592 
593 	if (__predict_true(available >= num_bufs))
594 		return (true);
595 	return (false);
596 }
597 
598 static int32_t
gve_tx_alloc_qpl_buf(struct gve_tx_ring * tx)599 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
600 {
601 	int32_t buf = tx->dqo.free_qpl_bufs_csm;
602 
603 	if (__predict_false(buf == -1)) {
604 		tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
605 		    &tx->dqo.free_qpl_bufs_prd, -1);
606 		buf = tx->dqo.free_qpl_bufs_csm;
607 		if (__predict_false(buf == -1))
608 			return (-1);
609 	}
610 
611 	tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
612 	tx->dqo.qpl_bufs_consumed++;
613 	return (buf);
614 }
615 
616 /*
617  * Tx buffer i corresponds to
618  * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
619  * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
620  */
621 static void
gve_tx_buf_get_addr_dqo(struct gve_tx_ring * tx,int32_t index,void ** va,bus_addr_t * dma_addr)622 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
623     int32_t index, void **va, bus_addr_t *dma_addr)
624 {
625 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
626 	int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
627 	    GVE_TX_BUF_SHIFT_DQO;
628 
629 	*va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
630 	*dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
631 }
632 
633 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_tx_ring * tx,int32_t index)634 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
635 {
636 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
637 
638 	return (&tx->com.qpl->dmas[page_id]);
639 }
640 
641 static void
gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,struct gve_tx_pending_pkt_dqo * pkt,bool csum_enabled,int16_t completion_tag,uint32_t * desc_idx)642 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
643     struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
644     bool csum_enabled, int16_t completion_tag,
645     uint32_t *desc_idx)
646 {
647 	int32_t pkt_len = mbuf->m_pkthdr.len;
648 	struct gve_dma_handle *dma;
649 	uint32_t copy_offset = 0;
650 	int32_t prev_buf = -1;
651 	uint32_t copy_len;
652 	bus_addr_t addr;
653 	int32_t buf;
654 	void *va;
655 
656 	MPASS(pkt->num_qpl_bufs == 0);
657 	MPASS(pkt->qpl_buf_head == -1);
658 
659 	while (copy_offset < pkt_len) {
660 		buf = gve_tx_alloc_qpl_buf(tx);
661 		/* We already checked for availability */
662 		MPASS(buf != -1);
663 
664 		gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
665 		copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
666 		m_copydata(mbuf, copy_offset, copy_len, va);
667 		copy_offset += copy_len;
668 
669 		dma = gve_get_page_dma_handle(tx, buf);
670 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
671 
672 		gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
673 		    copy_len, addr, completion_tag,
674 		    /*eop=*/copy_offset == pkt_len,
675 		    csum_enabled);
676 
677 		/* Link all the qpl bufs for a packet */
678 		if (prev_buf == -1)
679 			pkt->qpl_buf_head = buf;
680 		else
681 			tx->dqo.qpl_bufs[prev_buf] = buf;
682 
683 		prev_buf = buf;
684 		pkt->num_qpl_bufs++;
685 	}
686 
687 	tx->dqo.qpl_bufs[buf] = -1;
688 }
689 
690 int
gve_xmit_dqo_qpl(struct gve_tx_ring * tx,struct mbuf * mbuf)691 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
692 {
693 	uint32_t desc_idx = tx->dqo.desc_tail;
694 	struct gve_tx_pending_pkt_dqo *pkt;
695 	int total_descs_needed;
696 	int16_t completion_tag;
697 	bool has_csum_flag;
698 	int csum_flags;
699 	bool is_tso;
700 	int nsegs;
701 	int err;
702 
703 	csum_flags = mbuf->m_pkthdr.csum_flags;
704 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
705 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
706 	is_tso = csum_flags & CSUM_TSO;
707 
708 	nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
709 	/* Check if we have enough room in the desc ring */
710 	total_descs_needed = 1 +     /* general_ctx_desc */
711 	    nsegs +		     /* pkt_desc */
712 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
713 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
714 		return (ENOBUFS);
715 
716 	if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
717 		counter_enter();
718 		counter_u64_add_protected(
719 		    tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
720 		counter_exit();
721 		return (ENOBUFS);
722 	}
723 
724 	pkt = gve_alloc_pending_packet(tx);
725 	if (pkt == NULL) {
726 		counter_enter();
727 		counter_u64_add_protected(
728 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
729 		counter_exit();
730 		return (ENOBUFS);
731 	}
732 	completion_tag = pkt - tx->dqo.pending_pkts;
733 	pkt->mbuf = mbuf;
734 
735 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
736 	if (err)
737 		goto abort;
738 
739 	gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
740 	    has_csum_flag, completion_tag, &desc_idx);
741 
742 	/* Remember the index of the last desc written */
743 	tx->dqo.desc_tail = desc_idx;
744 
745 	/*
746 	 * Request a descriptor completion on the last descriptor of the
747 	 * packet if we are allowed to by the HW enforced interval.
748 	 */
749 	gve_tx_request_desc_compl(tx, desc_idx);
750 
751 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
752 	return (0);
753 
754 abort:
755 	pkt->mbuf = NULL;
756 	gve_free_pending_packet(tx, pkt);
757 	return (err);
758 }
759 
760 int
gve_xmit_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf_ptr)761 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
762 {
763 	bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
764 	uint32_t desc_idx = tx->dqo.desc_tail;
765 	struct gve_tx_pending_pkt_dqo *pkt;
766 	struct mbuf *mbuf = *mbuf_ptr;
767 	int total_descs_needed;
768 	int16_t completion_tag;
769 	bool has_csum_flag;
770 	int csum_flags;
771 	bool is_tso;
772 	int nsegs;
773 	int err;
774 	int i;
775 
776 	csum_flags = mbuf->m_pkthdr.csum_flags;
777 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
778 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
779 	is_tso = csum_flags & CSUM_TSO;
780 
781 	/*
782 	 * This mbuf might end up needing more than 1 pkt desc.
783 	 * The actual number, `nsegs` is known only after the
784 	 * expensive gve_map_mbuf_dqo call. This check beneath
785 	 * exists to fail early when the desc ring is really full.
786 	 */
787 	total_descs_needed = 1 +     /* general_ctx_desc */
788 	    1 +			     /* pkt_desc */
789 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
790 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
791 		return (ENOBUFS);
792 
793 	pkt = gve_alloc_pending_packet(tx);
794 	if (pkt == NULL) {
795 		counter_enter();
796 		counter_u64_add_protected(
797 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
798 		counter_exit();
799 		return (ENOBUFS);
800 	}
801 	completion_tag = pkt - tx->dqo.pending_pkts;
802 
803 	err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
804 	    segs, &nsegs, /*attempt=*/0);
805 	if (err)
806 		goto abort;
807 	mbuf = *mbuf_ptr;  /* gve_map_mbuf_dqo might replace the mbuf chain */
808 	pkt->mbuf = mbuf;
809 
810 	total_descs_needed = 1 + /* general_ctx_desc */
811 	    nsegs +              /* pkt_desc */
812 	    (is_tso ? 1 : 0);    /* tso_ctx_desc */
813 	if (__predict_false(
814 	    !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
815 		err = ENOBUFS;
816 		goto abort_with_dma;
817 	}
818 
819 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
820 	if (err)
821 		goto abort_with_dma;
822 
823 	bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
824 	for (i = 0; i < nsegs; i++) {
825 		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
826 		    segs[i].ds_len, segs[i].ds_addr,
827 		    completion_tag, /*eop=*/i == (nsegs - 1),
828 		    has_csum_flag);
829 	}
830 
831 	/* Remember the index of the last desc written */
832 	tx->dqo.desc_tail = desc_idx;
833 
834 	/*
835 	 * Request a descriptor completion on the last descriptor of the
836 	 * packet if we are allowed to by the HW enforced interval.
837 	 */
838 	gve_tx_request_desc_compl(tx, desc_idx);
839 
840 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
841 	return (0);
842 
843 abort_with_dma:
844 	gve_unmap_packet(tx, pkt);
845 abort:
846 	pkt->mbuf = NULL;
847 	gve_free_pending_packet(tx, pkt);
848 	return (err);
849 }
850 
851 static void
gve_reap_qpl_bufs_dqo(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pkt)852 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
853     struct gve_tx_pending_pkt_dqo *pkt)
854 {
855 	int32_t buf = pkt->qpl_buf_head;
856 	struct gve_dma_handle *dma;
857 	int32_t qpl_buf_tail;
858 	int32_t old_head;
859 	int i;
860 
861 	for (i = 0; i < pkt->num_qpl_bufs; i++) {
862 		dma = gve_get_page_dma_handle(tx, buf);
863 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
864 		qpl_buf_tail = buf;
865 		buf = tx->dqo.qpl_bufs[buf];
866 	}
867 	MPASS(buf == -1);
868 	buf = qpl_buf_tail;
869 
870 	while (true) {
871 		old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
872 		tx->dqo.qpl_bufs[buf] = old_head;
873 
874 		/*
875 		 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
876 		 * is visible only after the linked list from this pkt is
877 		 * attached above to old_head.
878 		 */
879 		if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
880 		    old_head, pkt->qpl_buf_head))
881 			break;
882 	}
883 	/*
884 	 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
885 	 * visible only adter the update to dqo.free_qpl_bufs_prd above.
886 	 */
887 	atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
888 
889 	gve_clear_qpl_pending_pkt(pkt);
890 }
891 
892 static uint64_t
gve_handle_packet_completion(struct gve_priv * priv,struct gve_tx_ring * tx,uint16_t compl_tag)893 gve_handle_packet_completion(struct gve_priv *priv,
894     struct gve_tx_ring *tx, uint16_t compl_tag)
895 {
896 	struct gve_tx_pending_pkt_dqo *pending_pkt;
897 	int32_t pkt_len;
898 
899 	if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
900 		device_printf(priv->dev, "Invalid TX completion tag: %d\n",
901 		    compl_tag);
902 		return (0);
903 	}
904 
905 	pending_pkt = &tx->dqo.pending_pkts[compl_tag];
906 
907 	/* Packet is allocated but not pending data completion. */
908 	if (__predict_false(pending_pkt->state !=
909 	    GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
910 		device_printf(priv->dev,
911 		    "No pending data completion: %d\n", compl_tag);
912 		return (0);
913 	}
914 
915 	pkt_len = pending_pkt->mbuf->m_pkthdr.len;
916 
917 	if (gve_is_qpl(priv))
918 		gve_reap_qpl_bufs_dqo(tx, pending_pkt);
919 	else
920 		gve_unmap_packet(tx, pending_pkt);
921 
922 	m_freem(pending_pkt->mbuf);
923 	pending_pkt->mbuf = NULL;
924 	gve_free_pending_packet(tx, pending_pkt);
925 	return (pkt_len);
926 }
927 
928 int
gve_tx_intr_dqo(void * arg)929 gve_tx_intr_dqo(void *arg)
930 {
931 	struct gve_tx_ring *tx = arg;
932 	struct gve_priv *priv = tx->com.priv;
933 	struct gve_ring_com *com = &tx->com;
934 
935 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
936 		return (FILTER_STRAY);
937 
938 	/* Interrupts are automatically masked */
939 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
940 	return (FILTER_HANDLED);
941 }
942 
943 static void
gve_tx_clear_desc_ring_dqo(struct gve_tx_ring * tx)944 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
945 {
946 	struct gve_ring_com *com = &tx->com;
947 	int i;
948 
949 	for (i = 0; i < com->priv->tx_desc_cnt; i++)
950 		tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
951 
952 	bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
953 	    BUS_DMASYNC_PREWRITE);
954 }
955 
956 static void
gve_tx_clear_compl_ring_dqo(struct gve_tx_ring * tx)957 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
958 {
959 	struct gve_ring_com *com = &tx->com;
960 	int entries;
961 	int i;
962 
963 	entries = com->priv->tx_desc_cnt;
964 	for (i = 0; i < entries; i++)
965 		tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
966 
967 	bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
968 	    BUS_DMASYNC_PREWRITE);
969 }
970 
971 void
gve_clear_tx_ring_dqo(struct gve_priv * priv,int i)972 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
973 {
974 	struct gve_tx_ring *tx = &priv->tx[i];
975 	int j;
976 
977 	tx->dqo.desc_head = 0;
978 	tx->dqo.desc_tail = 0;
979 	tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
980 	tx->dqo.last_re_idx = 0;
981 
982 	tx->dqo.compl_head = 0;
983 	tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
984 	atomic_store_32(&tx->dqo.hw_tx_head, 0);
985 	tx->dqo.cur_gen_bit = 0;
986 
987 	gve_free_tx_mbufs_dqo(tx);
988 
989 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
990 		if (gve_is_qpl(tx->com.priv))
991 			gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]);
992 		tx->dqo.pending_pkts[j].next =
993 		    (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1;
994 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
995 	}
996 	tx->dqo.free_pending_pkts_csm = 0;
997 	atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
998 
999 	if (gve_is_qpl(priv)) {
1000 		int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
1001 		    tx->com.qpl->num_pages;
1002 
1003 		for (j = 0; j < qpl_buf_cnt - 1; j++)
1004 			tx->dqo.qpl_bufs[j] = j + 1;
1005 		tx->dqo.qpl_bufs[j] = -1;
1006 
1007 		tx->dqo.free_qpl_bufs_csm = 0;
1008 		atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
1009 		atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
1010 		tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
1011 		tx->dqo.qpl_bufs_consumed = 0;
1012 	}
1013 
1014 	gve_tx_clear_desc_ring_dqo(tx);
1015 	gve_tx_clear_compl_ring_dqo(tx);
1016 }
1017 
1018 static bool
gve_tx_cleanup_dqo(struct gve_priv * priv,struct gve_tx_ring * tx,int budget)1019 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
1020 {
1021 	struct gve_tx_compl_desc_dqo *compl_desc;
1022 	uint64_t bytes_done = 0;
1023 	uint64_t pkts_done = 0;
1024 	uint16_t compl_tag;
1025 	int work_done = 0;
1026 	uint16_t tx_head;
1027 	uint16_t type;
1028 
1029 	while (work_done < budget) {
1030 		bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
1031 		    BUS_DMASYNC_POSTREAD);
1032 
1033 		compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
1034 		if (compl_desc->generation == tx->dqo.cur_gen_bit)
1035 			break;
1036 
1037 		/*
1038 		 * Prevent generation bit from being read after the rest of the
1039 		 * descriptor.
1040 		 */
1041 		atomic_thread_fence_acq();
1042 		type = compl_desc->type;
1043 
1044 		if (type == GVE_COMPL_TYPE_DQO_DESC) {
1045 			/* This is the last descriptor fetched by HW plus one */
1046 			tx_head = le16toh(compl_desc->tx_head);
1047 			atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
1048 		} else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1049 			compl_tag = le16toh(compl_desc->completion_tag);
1050 			bytes_done += gve_handle_packet_completion(priv,
1051 			    tx, compl_tag);
1052 			pkts_done++;
1053 		}
1054 
1055 		tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
1056 		    tx->dqo.compl_mask;
1057 		/* Flip the generation bit when we wrap around */
1058 		tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
1059 		work_done++;
1060 	}
1061 
1062 	/*
1063 	 * Waking the xmit taskqueue has to occur after room has been made in
1064 	 * the queue.
1065 	 */
1066 	atomic_thread_fence_seq_cst();
1067 	if (atomic_load_bool(&tx->stopped) && work_done) {
1068 		atomic_store_bool(&tx->stopped, false);
1069 		taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
1070 	}
1071 
1072 	tx->done += work_done; /* tx->done is just a sysctl counter */
1073 	counter_enter();
1074 	counter_u64_add_protected(tx->stats.tbytes, bytes_done);
1075 	counter_u64_add_protected(tx->stats.tpackets, pkts_done);
1076 	counter_exit();
1077 
1078 	return (work_done == budget);
1079 }
1080 
1081 void
gve_tx_cleanup_tq_dqo(void * arg,int pending)1082 gve_tx_cleanup_tq_dqo(void *arg, int pending)
1083 {
1084 	struct gve_tx_ring *tx = arg;
1085 	struct gve_priv *priv = tx->com.priv;
1086 
1087 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1088 		return;
1089 
1090 	if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
1091 		taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
1092 		return;
1093 	}
1094 
1095 	gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
1096 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1097 }
1098