xref: /freebsd/sys/dev/gve/gve_tx_dqo.c (revision 46fce000843215ff3d574d1c24fc24771975973e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_inet6.h"
33 
34 #include "gve.h"
35 #include "gve_dqo.h"
36 
37 static void
gve_unmap_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)38 gve_unmap_packet(struct gve_tx_ring *tx,
39     struct gve_tx_pending_pkt_dqo *pending_pkt)
40 {
41 	bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
42 	    BUS_DMASYNC_POSTWRITE);
43 	bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
44 }
45 
46 static void
gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo * pending_pkt)47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt)
48 {
49 	pending_pkt->qpl_buf_head = -1;
50 	pending_pkt->num_qpl_bufs = 0;
51 }
52 
53 static void
gve_free_tx_mbufs_dqo(struct gve_tx_ring * tx)54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
55 {
56 	struct gve_tx_pending_pkt_dqo *pending_pkt;
57 	int i;
58 
59 	for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
60 		pending_pkt = &tx->dqo.pending_pkts[i];
61 		if (!pending_pkt->mbuf)
62 			continue;
63 
64 		if (gve_is_qpl(tx->com.priv))
65 			gve_clear_qpl_pending_pkt(pending_pkt);
66 		else
67 			gve_unmap_packet(tx, pending_pkt);
68 
69 		m_freem(pending_pkt->mbuf);
70 		pending_pkt->mbuf = NULL;
71 	}
72 }
73 
74 void
gve_tx_free_ring_dqo(struct gve_priv * priv,int i)75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
76 {
77 	struct gve_tx_ring *tx = &priv->tx[i];
78 	struct gve_ring_com *com = &tx->com;
79 	int j;
80 
81 	if (tx->dqo.desc_ring != NULL) {
82 		gve_dma_free_coherent(&tx->desc_ring_mem);
83 		tx->dqo.desc_ring = NULL;
84 	}
85 
86 	if (tx->dqo.compl_ring != NULL) {
87 		gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
88 		tx->dqo.compl_ring = NULL;
89 	}
90 
91 	if (tx->dqo.pending_pkts != NULL) {
92 		gve_free_tx_mbufs_dqo(tx);
93 
94 		if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
95 			for (j = 0; j < tx->dqo.num_pending_pkts; j++)
96 				if (tx->dqo.pending_pkts[j].state !=
97 				    GVE_PACKET_STATE_UNALLOCATED)
98 					bus_dmamap_destroy(tx->dqo.buf_dmatag,
99 					    tx->dqo.pending_pkts[j].dmamap);
100 		}
101 
102 		free(tx->dqo.pending_pkts, M_GVE);
103 		tx->dqo.pending_pkts = NULL;
104 	}
105 
106 	if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
107 		bus_dma_tag_destroy(tx->dqo.buf_dmatag);
108 
109 	if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
110 		free(tx->dqo.qpl_bufs, M_GVE);
111 		tx->dqo.qpl_bufs = NULL;
112 	}
113 
114 	if (com->qpl != NULL) {
115 		gve_free_qpl(priv, com->qpl);
116 		com->qpl = NULL;
117 	}
118 }
119 
120 static int
gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring * tx)121 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
122 {
123 	struct gve_priv *priv = tx->com.priv;
124 	int err;
125 	int j;
126 
127 	/*
128 	 * DMA tag for mapping Tx mbufs
129 	 * The maxsize, nsegments, and maxsegsize params should match
130 	 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
131 	 */
132 	err = bus_dma_tag_create(
133 	    bus_get_dma_tag(priv->dev),	/* parent */
134 	    1, 0,			/* alignment, bounds */
135 	    BUS_SPACE_MAXADDR,		/* lowaddr */
136 	    BUS_SPACE_MAXADDR,		/* highaddr */
137 	    NULL, NULL,			/* filter, filterarg */
138 	    GVE_TSO_MAXSIZE_DQO,	/* maxsize */
139 	    GVE_TX_MAX_DATA_DESCS_DQO,	/* nsegments */
140 	    GVE_TX_MAX_BUF_SIZE_DQO,	/* maxsegsize */
141 	    BUS_DMA_ALLOCNOW,		/* flags */
142 	    NULL,			/* lockfunc */
143 	    NULL,			/* lockarg */
144 	    &tx->dqo.buf_dmatag);
145 	if (err != 0) {
146 		device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
147 		    __func__, err);
148 		return (err);
149 	}
150 
151 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
152 		err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
153 		    &tx->dqo.pending_pkts[j].dmamap);
154 		if (err != 0) {
155 			device_printf(priv->dev,
156 			    "err in creating pending pkt dmamap %d: %d",
157 			    j, err);
158 			return (err);
159 		}
160 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
161 	}
162 
163 	return (0);
164 }
165 
166 int
gve_tx_alloc_ring_dqo(struct gve_priv * priv,int i)167 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
168 {
169 	struct gve_tx_ring *tx = &priv->tx[i];
170 	uint16_t num_pending_pkts;
171 	int err;
172 
173 	/* Descriptor ring */
174 	err = gve_dma_alloc_coherent(priv,
175 	    sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
176 	    CACHE_LINE_SIZE, &tx->desc_ring_mem);
177 	if (err != 0) {
178 		device_printf(priv->dev,
179 		    "Failed to alloc desc ring for tx ring %d", i);
180 		goto abort;
181 	}
182 	tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
183 
184 	/* Completion ring */
185 	err = gve_dma_alloc_coherent(priv,
186 	    sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
187 	    CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
188 	if (err != 0) {
189 		device_printf(priv->dev,
190 		    "Failed to alloc compl ring for tx ring %d", i);
191 		goto abort;
192 	}
193 	tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
194 
195 	/*
196 	 * pending_pkts array
197 	 *
198 	 * The max number of pending packets determines the maximum number of
199 	 * descriptors which maybe written to the completion queue.
200 	 *
201 	 * We must set the number small enough to make sure we never overrun the
202 	 * completion queue.
203 	 */
204 	num_pending_pkts = priv->tx_desc_cnt;
205 	/*
206 	 * Reserve space for descriptor completions, which will be reported at
207 	 * most every GVE_TX_MIN_RE_INTERVAL packets.
208 	 */
209 	num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
210 
211 	tx->dqo.num_pending_pkts = num_pending_pkts;
212 	tx->dqo.pending_pkts = malloc(
213 	    sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
214 	    M_GVE, M_WAITOK | M_ZERO);
215 
216 	if (gve_is_qpl(priv)) {
217 		int qpl_buf_cnt;
218 
219 		tx->com.qpl = gve_alloc_qpl(priv, i, GVE_TX_NUM_QPL_PAGES_DQO,
220 		    /*single_kva*/false);
221 		if (tx->com.qpl == NULL) {
222 			device_printf(priv->dev,
223 			    "Failed to alloc QPL for tx ring %d", i);
224 			err = ENOMEM;
225 			goto abort;
226 		}
227 
228 		qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
229 		    tx->com.qpl->num_pages;
230 
231 		tx->dqo.qpl_bufs = malloc(
232 		    sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
233 		    M_GVE, M_WAITOK | M_ZERO);
234 	} else
235 		gve_tx_alloc_rda_fields_dqo(tx);
236 	return (0);
237 
238 abort:
239 	gve_tx_free_ring_dqo(priv, i);
240 	return (err);
241 }
242 
243 static void
gve_extract_tx_metadata_dqo(const struct mbuf * mbuf,struct gve_tx_metadata_dqo * metadata)244 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
245     struct gve_tx_metadata_dqo *metadata)
246 {
247 	uint32_t hash = mbuf->m_pkthdr.flowid;
248 	uint16_t path_hash;
249 
250 	metadata->version = GVE_TX_METADATA_VERSION_DQO;
251 	if (hash) {
252 		path_hash = hash ^ (hash >> 16);
253 
254 		path_hash &= (1 << 15) - 1;
255 		if (__predict_false(path_hash == 0))
256 			path_hash = ~path_hash;
257 
258 		metadata->path_hash = path_hash;
259 	}
260 }
261 
262 static void
gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring * tx,uint32_t * desc_idx,uint32_t len,uint64_t addr,int16_t compl_tag,bool eop,bool csum_enabled)263 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
264     uint32_t *desc_idx, uint32_t len, uint64_t addr,
265     int16_t compl_tag, bool eop, bool csum_enabled)
266 {
267 	while (len > 0) {
268 		struct gve_tx_pkt_desc_dqo *desc =
269 		    &tx->dqo.desc_ring[*desc_idx].pkt;
270 		uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
271 		bool cur_eop = eop && cur_len == len;
272 
273 		*desc = (struct gve_tx_pkt_desc_dqo){
274 			.buf_addr = htole64(addr),
275 			.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
276 			.end_of_packet = cur_eop,
277 			.checksum_offload_enable = csum_enabled,
278 			.compl_tag = htole16(compl_tag),
279 			.buf_size = cur_len,
280 		};
281 
282 		addr += cur_len;
283 		len -= cur_len;
284 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
285 	}
286 }
287 
288 static void
gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo * desc,const struct mbuf * mbuf,const struct gve_tx_metadata_dqo * metadata,int header_len)289 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
290     const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
291     int header_len)
292 {
293 	*desc = (struct gve_tx_tso_context_desc_dqo){
294 		.header_len = header_len,
295 		.cmd_dtype = {
296 			.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
297 			.tso = 1,
298 		},
299 		.flex0 = metadata->bytes[0],
300 		.flex5 = metadata->bytes[5],
301 		.flex6 = metadata->bytes[6],
302 		.flex7 = metadata->bytes[7],
303 		.flex8 = metadata->bytes[8],
304 		.flex9 = metadata->bytes[9],
305 		.flex10 = metadata->bytes[10],
306 		.flex11 = metadata->bytes[11],
307 	};
308 	desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
309 	desc->mss = mbuf->m_pkthdr.tso_segsz;
310 }
311 
312 static void
gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo * desc,const struct gve_tx_metadata_dqo * metadata)313 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
314     const struct gve_tx_metadata_dqo *metadata)
315 {
316 	*desc = (struct gve_tx_general_context_desc_dqo){
317 		.flex0 = metadata->bytes[0],
318 		.flex1 = metadata->bytes[1],
319 		.flex2 = metadata->bytes[2],
320 		.flex3 = metadata->bytes[3],
321 		.flex4 = metadata->bytes[4],
322 		.flex5 = metadata->bytes[5],
323 		.flex6 = metadata->bytes[6],
324 		.flex7 = metadata->bytes[7],
325 		.flex8 = metadata->bytes[8],
326 		.flex9 = metadata->bytes[9],
327 		.flex10 = metadata->bytes[10],
328 		.flex11 = metadata->bytes[11],
329 		.cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
330 	};
331 }
332 
333 #define PULLUP_HDR(m, len)				\
334 do {							\
335 	if (__predict_false((m)->m_len < (len))) {	\
336 		(m) = m_pullup((m), (len));		\
337 		if ((m) == NULL)			\
338 			return (EINVAL);		\
339 	}						\
340 } while (0)
341 
342 static int
gve_prep_tso(struct mbuf * mbuf,int * header_len)343 gve_prep_tso(struct mbuf *mbuf, int *header_len)
344 {
345 	uint8_t l3_off, l4_off = 0;
346 	struct ether_header *eh;
347 	struct tcphdr *th;
348 	u_short csum;
349 
350 	PULLUP_HDR(mbuf, sizeof(*eh));
351 	eh = mtod(mbuf, struct ether_header *);
352 	KASSERT(eh->ether_type != ETHERTYPE_VLAN,
353 	    ("VLAN-tagged packets not supported"));
354 	l3_off = ETHER_HDR_LEN;
355 
356 #ifdef INET6
357 	if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
358 		struct ip6_hdr *ip6;
359 
360 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
361 		ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
362 		l4_off = l3_off + sizeof(struct ip6_hdr);
363 		csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
364 		    /*csum=*/0);
365 	} else
366 #endif
367 	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
368 		struct ip *ip;
369 
370 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
371 		ip = (struct ip *)(mtodo(mbuf, l3_off));
372 		l4_off = l3_off + (ip->ip_hl << 2);
373 		csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
374 		    htons(IPPROTO_TCP));
375 	}
376 
377 	PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
378 	th = (struct tcphdr *)(mtodo(mbuf, l4_off));
379 	*header_len = l4_off + (th->th_off << 2);
380 
381 	/*
382 	 * Hardware requires the th->th_sum to not include the TCP payload,
383 	 * hence we recompute the csum with it excluded.
384 	 */
385 	th->th_sum = csum;
386 
387 	return (0);
388 }
389 
390 static int
gve_tx_fill_ctx_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,bool is_tso,uint32_t * desc_idx)391 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
392     bool is_tso, uint32_t *desc_idx)
393 {
394 	struct gve_tx_general_context_desc_dqo *gen_desc;
395 	struct gve_tx_tso_context_desc_dqo *tso_desc;
396 	struct gve_tx_metadata_dqo metadata;
397 	int header_len;
398 	int err;
399 
400 	metadata = (struct gve_tx_metadata_dqo){0};
401 	gve_extract_tx_metadata_dqo(mbuf, &metadata);
402 
403 	if (is_tso) {
404 		err = gve_prep_tso(mbuf, &header_len);
405 		if (__predict_false(err)) {
406 			counter_enter();
407 			counter_u64_add_protected(
408 			    tx->stats.tx_delayed_pkt_tsoerr, 1);
409 			counter_exit();
410 			return (err);
411 		}
412 
413 		tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
414 		gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
415 
416 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
417 		counter_enter();
418 		counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
419 		counter_exit();
420 	}
421 
422 	gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
423 	gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
424 	*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
425 	return (0);
426 }
427 
428 static int
gve_map_mbuf_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf,bus_dmamap_t dmamap,bus_dma_segment_t * segs,int * nsegs,int attempt)429 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
430     struct mbuf **mbuf, bus_dmamap_t dmamap,
431     bus_dma_segment_t *segs, int *nsegs, int attempt)
432 {
433 	struct mbuf *m_new = NULL;
434 	int err;
435 
436 	err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
437 	    *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
438 
439 	switch (err) {
440 	case __predict_true(0):
441 		break;
442 	case EFBIG:
443 		if (__predict_false(attempt > 0))
444 			goto abort;
445 
446 		counter_enter();
447 		counter_u64_add_protected(
448 		    tx->stats.tx_mbuf_collapse, 1);
449 		counter_exit();
450 
451 		/* Try m_collapse before m_defrag */
452 		m_new = m_collapse(*mbuf, M_NOWAIT,
453 		    GVE_TX_MAX_DATA_DESCS_DQO);
454 		if (m_new == NULL) {
455 			counter_enter();
456 			counter_u64_add_protected(
457 			    tx->stats.tx_mbuf_defrag, 1);
458 			counter_exit();
459 			m_new = m_defrag(*mbuf, M_NOWAIT);
460 		}
461 
462 		if (__predict_false(m_new == NULL)) {
463 			counter_enter();
464 			counter_u64_add_protected(
465 			    tx->stats.tx_mbuf_defrag_err, 1);
466 			counter_exit();
467 
468 			m_freem(*mbuf);
469 			*mbuf = NULL;
470 			err = ENOMEM;
471 			goto abort;
472 		} else {
473 			*mbuf = m_new;
474 			return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
475 			    segs, nsegs, ++attempt));
476 		}
477 	case ENOMEM:
478 		counter_enter();
479 		counter_u64_add_protected(
480 		    tx->stats.tx_mbuf_dmamap_enomem_err, 1);
481 		counter_exit();
482 		goto abort;
483 	default:
484 		goto abort;
485 	}
486 
487 	return (0);
488 
489 abort:
490 	counter_enter();
491 	counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
492 	counter_exit();
493 	return (err);
494 }
495 
496 static uint32_t
num_avail_desc_ring_slots(const struct gve_tx_ring * tx)497 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
498 {
499 	uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
500 	    tx->dqo.desc_mask;
501 
502 	return (tx->dqo.desc_mask - num_used);
503 }
504 
505 static struct gve_tx_pending_pkt_dqo *
gve_alloc_pending_packet(struct gve_tx_ring * tx)506 gve_alloc_pending_packet(struct gve_tx_ring *tx)
507 {
508 	int32_t index = tx->dqo.free_pending_pkts_csm;
509 	struct gve_tx_pending_pkt_dqo *pending_pkt;
510 
511 	/*
512 	 * No pending packets available in the consumer list,
513 	 * try to steal the producer list.
514 	 */
515 	if (__predict_false(index == -1)) {
516 		tx->dqo.free_pending_pkts_csm = atomic_swap_32(
517 		    &tx->dqo.free_pending_pkts_prd, -1);
518 
519 		index = tx->dqo.free_pending_pkts_csm;
520 		if (__predict_false(index == -1))
521 			return (NULL);
522 	}
523 
524 	pending_pkt = &tx->dqo.pending_pkts[index];
525 
526 	/* Remove pending_pkt from the consumer list */
527 	tx->dqo.free_pending_pkts_csm = pending_pkt->next;
528 	pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
529 
530 	gve_set_timestamp(&pending_pkt->enqueue_time_sec);
531 
532 	return (pending_pkt);
533 }
534 
535 static void
gve_free_pending_packet(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pending_pkt)536 gve_free_pending_packet(struct gve_tx_ring *tx,
537     struct gve_tx_pending_pkt_dqo *pending_pkt)
538 {
539 	int index = pending_pkt - tx->dqo.pending_pkts;
540 	int32_t old_head;
541 
542 	pending_pkt->state = GVE_PACKET_STATE_FREE;
543 
544 	gve_invalidate_timestamp(&pending_pkt->enqueue_time_sec);
545 
546 	/* Add pending_pkt to the producer list */
547 	while (true) {
548 		old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
549 
550 		pending_pkt->next = old_head;
551 		if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
552 		    old_head, index))
553 			break;
554 	}
555 }
556 
557 /*
558  * Has the side-effect of retrieving the value of the last desc index
559  * processed by the NIC. hw_tx_head is written to by the completions-processing
560  * taskqueue upon receiving descriptor-completions.
561  */
562 static bool
gve_tx_has_desc_room_dqo(struct gve_tx_ring * tx,int needed_descs)563 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
564 {
565 	if (needed_descs <= num_avail_desc_ring_slots(tx))
566 		return (true);
567 
568 	tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
569 	if (needed_descs > num_avail_desc_ring_slots(tx)) {
570 		counter_enter();
571 		counter_u64_add_protected(
572 		    tx->stats.tx_delayed_pkt_nospace_descring, 1);
573 		counter_exit();
574 		return (false);
575 	}
576 
577 	return (0);
578 }
579 
580 static void
gve_tx_request_desc_compl(struct gve_tx_ring * tx,uint32_t desc_idx)581 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
582 {
583 	uint32_t last_report_event_interval;
584 	uint32_t last_desc_idx;
585 
586 	last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
587 	last_report_event_interval =
588 	    (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
589 
590 	if (__predict_false(last_report_event_interval >=
591 	    GVE_TX_MIN_RE_INTERVAL)) {
592 		tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
593 		tx->dqo.last_re_idx = last_desc_idx;
594 	}
595 }
596 
597 static bool
gve_tx_have_enough_qpl_bufs(struct gve_tx_ring * tx,int num_bufs)598 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
599 {
600 	uint32_t available = tx->dqo.qpl_bufs_produced_cached -
601 	    tx->dqo.qpl_bufs_consumed;
602 
603 	if (__predict_true(available >= num_bufs))
604 		return (true);
605 
606 	tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
607 	    &tx->dqo.qpl_bufs_produced);
608 	available = tx->dqo.qpl_bufs_produced_cached -
609 	    tx->dqo.qpl_bufs_consumed;
610 
611 	if (__predict_true(available >= num_bufs))
612 		return (true);
613 	return (false);
614 }
615 
616 static int32_t
gve_tx_alloc_qpl_buf(struct gve_tx_ring * tx)617 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
618 {
619 	int32_t buf = tx->dqo.free_qpl_bufs_csm;
620 
621 	if (__predict_false(buf == -1)) {
622 		tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
623 		    &tx->dqo.free_qpl_bufs_prd, -1);
624 		buf = tx->dqo.free_qpl_bufs_csm;
625 		if (__predict_false(buf == -1))
626 			return (-1);
627 	}
628 
629 	tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
630 	tx->dqo.qpl_bufs_consumed++;
631 	return (buf);
632 }
633 
634 /*
635  * Tx buffer i corresponds to
636  * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
637  * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
638  */
639 static void
gve_tx_buf_get_addr_dqo(struct gve_tx_ring * tx,int32_t index,void ** va,bus_addr_t * dma_addr)640 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
641     int32_t index, void **va, bus_addr_t *dma_addr)
642 {
643 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
644 	int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
645 	    GVE_TX_BUF_SHIFT_DQO;
646 
647 	*va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
648 	*dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
649 }
650 
651 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_tx_ring * tx,int32_t index)652 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
653 {
654 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
655 
656 	return (&tx->com.qpl->dmas[page_id]);
657 }
658 
659 static void
gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring * tx,struct mbuf * mbuf,struct gve_tx_pending_pkt_dqo * pkt,bool csum_enabled,int16_t completion_tag,uint32_t * desc_idx)660 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
661     struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
662     bool csum_enabled, int16_t completion_tag,
663     uint32_t *desc_idx)
664 {
665 	int32_t pkt_len = mbuf->m_pkthdr.len;
666 	struct gve_dma_handle *dma;
667 	uint32_t copy_offset = 0;
668 	int32_t prev_buf = -1;
669 	uint32_t copy_len;
670 	bus_addr_t addr;
671 	int32_t buf;
672 	void *va;
673 
674 	MPASS(pkt->num_qpl_bufs == 0);
675 	MPASS(pkt->qpl_buf_head == -1);
676 
677 	while (copy_offset < pkt_len) {
678 		buf = gve_tx_alloc_qpl_buf(tx);
679 		/* We already checked for availability */
680 		MPASS(buf != -1);
681 
682 		gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
683 		copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
684 		m_copydata(mbuf, copy_offset, copy_len, va);
685 		copy_offset += copy_len;
686 
687 		dma = gve_get_page_dma_handle(tx, buf);
688 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
689 
690 		gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
691 		    copy_len, addr, completion_tag,
692 		    /*eop=*/copy_offset == pkt_len,
693 		    csum_enabled);
694 
695 		/* Link all the qpl bufs for a packet */
696 		if (prev_buf == -1)
697 			pkt->qpl_buf_head = buf;
698 		else
699 			tx->dqo.qpl_bufs[prev_buf] = buf;
700 
701 		prev_buf = buf;
702 		pkt->num_qpl_bufs++;
703 	}
704 
705 	tx->dqo.qpl_bufs[buf] = -1;
706 }
707 
708 int
gve_xmit_dqo_qpl(struct gve_tx_ring * tx,struct mbuf * mbuf)709 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
710 {
711 	uint32_t desc_idx = tx->dqo.desc_tail;
712 	struct gve_tx_pending_pkt_dqo *pkt;
713 	int total_descs_needed;
714 	int16_t completion_tag;
715 	bool has_csum_flag;
716 	int csum_flags;
717 	bool is_tso;
718 	int nsegs;
719 	int err;
720 
721 	csum_flags = mbuf->m_pkthdr.csum_flags;
722 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
723 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
724 	is_tso = csum_flags & CSUM_TSO;
725 
726 	nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
727 	/* Check if we have enough room in the desc ring */
728 	total_descs_needed = 1 +     /* general_ctx_desc */
729 	    nsegs +		     /* pkt_desc */
730 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
731 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
732 		return (ENOBUFS);
733 
734 	if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
735 		counter_enter();
736 		counter_u64_add_protected(
737 		    tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
738 		counter_exit();
739 		return (ENOBUFS);
740 	}
741 
742 	pkt = gve_alloc_pending_packet(tx);
743 	if (pkt == NULL) {
744 		counter_enter();
745 		counter_u64_add_protected(
746 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
747 		counter_exit();
748 		return (ENOBUFS);
749 	}
750 	completion_tag = pkt - tx->dqo.pending_pkts;
751 	pkt->mbuf = mbuf;
752 
753 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
754 	if (err)
755 		goto abort;
756 
757 	gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
758 	    has_csum_flag, completion_tag, &desc_idx);
759 
760 	/* Remember the index of the last desc written */
761 	tx->dqo.desc_tail = desc_idx;
762 
763 	/*
764 	 * Request a descriptor completion on the last descriptor of the
765 	 * packet if we are allowed to by the HW enforced interval.
766 	 */
767 	gve_tx_request_desc_compl(tx, desc_idx);
768 
769 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
770 	return (0);
771 
772 abort:
773 	pkt->mbuf = NULL;
774 	gve_free_pending_packet(tx, pkt);
775 	return (err);
776 }
777 
778 int
gve_xmit_dqo(struct gve_tx_ring * tx,struct mbuf ** mbuf_ptr)779 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
780 {
781 	bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
782 	uint32_t desc_idx = tx->dqo.desc_tail;
783 	struct gve_tx_pending_pkt_dqo *pkt;
784 	struct mbuf *mbuf = *mbuf_ptr;
785 	int total_descs_needed;
786 	int16_t completion_tag;
787 	bool has_csum_flag;
788 	int csum_flags;
789 	bool is_tso;
790 	int nsegs;
791 	int err;
792 	int i;
793 
794 	csum_flags = mbuf->m_pkthdr.csum_flags;
795 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
796 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
797 	is_tso = csum_flags & CSUM_TSO;
798 
799 	/*
800 	 * This mbuf might end up needing more than 1 pkt desc.
801 	 * The actual number, `nsegs` is known only after the
802 	 * expensive gve_map_mbuf_dqo call. This check beneath
803 	 * exists to fail early when the desc ring is really full.
804 	 */
805 	total_descs_needed = 1 +     /* general_ctx_desc */
806 	    1 +			     /* pkt_desc */
807 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
808 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
809 		return (ENOBUFS);
810 
811 	pkt = gve_alloc_pending_packet(tx);
812 	if (pkt == NULL) {
813 		counter_enter();
814 		counter_u64_add_protected(
815 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
816 		counter_exit();
817 		return (ENOBUFS);
818 	}
819 	completion_tag = pkt - tx->dqo.pending_pkts;
820 
821 	err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
822 	    segs, &nsegs, /*attempt=*/0);
823 	if (err)
824 		goto abort;
825 	mbuf = *mbuf_ptr;  /* gve_map_mbuf_dqo might replace the mbuf chain */
826 	pkt->mbuf = mbuf;
827 
828 	total_descs_needed = 1 + /* general_ctx_desc */
829 	    nsegs +              /* pkt_desc */
830 	    (is_tso ? 1 : 0);    /* tso_ctx_desc */
831 	if (__predict_false(
832 	    !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
833 		err = ENOBUFS;
834 		goto abort_with_dma;
835 	}
836 
837 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
838 	if (err)
839 		goto abort_with_dma;
840 
841 	bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
842 	for (i = 0; i < nsegs; i++) {
843 		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
844 		    segs[i].ds_len, segs[i].ds_addr,
845 		    completion_tag, /*eop=*/i == (nsegs - 1),
846 		    has_csum_flag);
847 	}
848 
849 	/* Remember the index of the last desc written */
850 	tx->dqo.desc_tail = desc_idx;
851 
852 	/*
853 	 * Request a descriptor completion on the last descriptor of the
854 	 * packet if we are allowed to by the HW enforced interval.
855 	 */
856 	gve_tx_request_desc_compl(tx, desc_idx);
857 
858 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
859 	return (0);
860 
861 abort_with_dma:
862 	gve_unmap_packet(tx, pkt);
863 abort:
864 	pkt->mbuf = NULL;
865 	gve_free_pending_packet(tx, pkt);
866 	return (err);
867 }
868 
869 static void
gve_reap_qpl_bufs_dqo(struct gve_tx_ring * tx,struct gve_tx_pending_pkt_dqo * pkt)870 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
871     struct gve_tx_pending_pkt_dqo *pkt)
872 {
873 	int32_t buf = pkt->qpl_buf_head;
874 	struct gve_dma_handle *dma;
875 	int32_t qpl_buf_tail;
876 	int32_t old_head;
877 	int i;
878 
879 	for (i = 0; i < pkt->num_qpl_bufs; i++) {
880 		dma = gve_get_page_dma_handle(tx, buf);
881 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
882 		qpl_buf_tail = buf;
883 		buf = tx->dqo.qpl_bufs[buf];
884 	}
885 	MPASS(buf == -1);
886 	buf = qpl_buf_tail;
887 
888 	while (true) {
889 		old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
890 		tx->dqo.qpl_bufs[buf] = old_head;
891 
892 		/*
893 		 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
894 		 * is visible only after the linked list from this pkt is
895 		 * attached above to old_head.
896 		 */
897 		if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
898 		    old_head, pkt->qpl_buf_head))
899 			break;
900 	}
901 	/*
902 	 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
903 	 * visible only adter the update to dqo.free_qpl_bufs_prd above.
904 	 */
905 	atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
906 
907 	gve_clear_qpl_pending_pkt(pkt);
908 }
909 
910 static uint64_t
gve_handle_packet_completion(struct gve_priv * priv,struct gve_tx_ring * tx,uint16_t compl_tag)911 gve_handle_packet_completion(struct gve_priv *priv,
912     struct gve_tx_ring *tx, uint16_t compl_tag)
913 {
914 	struct gve_tx_pending_pkt_dqo *pending_pkt;
915 	int32_t pkt_len;
916 
917 	if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
918 		device_printf(priv->dev, "Invalid TX completion tag: %d\n",
919 		    compl_tag);
920 		return (0);
921 	}
922 
923 	pending_pkt = &tx->dqo.pending_pkts[compl_tag];
924 
925 	/* Packet is allocated but not pending data completion. */
926 	if (__predict_false(pending_pkt->state !=
927 	    GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
928 		device_printf(priv->dev,
929 		    "No pending data completion: %d\n", compl_tag);
930 		return (0);
931 	}
932 
933 	pkt_len = pending_pkt->mbuf->m_pkthdr.len;
934 
935 	if (gve_is_qpl(priv))
936 		gve_reap_qpl_bufs_dqo(tx, pending_pkt);
937 	else
938 		gve_unmap_packet(tx, pending_pkt);
939 
940 	m_freem(pending_pkt->mbuf);
941 	pending_pkt->mbuf = NULL;
942 	gve_free_pending_packet(tx, pending_pkt);
943 	return (pkt_len);
944 }
945 
946 int
gve_check_tx_timeout_dqo(struct gve_priv * priv,struct gve_tx_ring * tx)947 gve_check_tx_timeout_dqo(struct gve_priv *priv, struct gve_tx_ring *tx)
948 {
949 	struct gve_tx_pending_pkt_dqo *pending_pkt;
950 	int num_timeouts;
951 	uint16_t pkt_idx;
952 
953 	num_timeouts = 0;
954 	for (pkt_idx = 0; pkt_idx < tx->dqo.num_pending_pkts; pkt_idx++) {
955 		pending_pkt = &tx->dqo.pending_pkts[pkt_idx];
956 
957 		if (!gve_timestamp_valid(&pending_pkt->enqueue_time_sec))
958 			continue;
959 
960 		if (__predict_false(
961 		    gve_seconds_since(&pending_pkt->enqueue_time_sec) >
962 		    GVE_TX_TIMEOUT_PKT_SEC))
963 			num_timeouts += 1;
964 	}
965 
966 	return (num_timeouts);
967 }
968 
969 int
gve_tx_intr_dqo(void * arg)970 gve_tx_intr_dqo(void *arg)
971 {
972 	struct gve_tx_ring *tx = arg;
973 	struct gve_priv *priv = tx->com.priv;
974 	struct gve_ring_com *com = &tx->com;
975 
976 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
977 		return (FILTER_STRAY);
978 
979 	/* Interrupts are automatically masked */
980 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
981 	return (FILTER_HANDLED);
982 }
983 
984 static void
gve_tx_clear_desc_ring_dqo(struct gve_tx_ring * tx)985 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
986 {
987 	struct gve_ring_com *com = &tx->com;
988 	int i;
989 
990 	for (i = 0; i < com->priv->tx_desc_cnt; i++)
991 		tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
992 
993 	bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
994 	    BUS_DMASYNC_PREWRITE);
995 }
996 
997 static void
gve_tx_clear_compl_ring_dqo(struct gve_tx_ring * tx)998 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
999 {
1000 	struct gve_ring_com *com = &tx->com;
1001 	int entries;
1002 	int i;
1003 
1004 	entries = com->priv->tx_desc_cnt;
1005 	for (i = 0; i < entries; i++)
1006 		tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
1007 
1008 	bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
1009 	    BUS_DMASYNC_PREWRITE);
1010 }
1011 
1012 void
gve_clear_tx_ring_dqo(struct gve_priv * priv,int i)1013 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
1014 {
1015 	struct gve_tx_ring *tx = &priv->tx[i];
1016 	int j;
1017 
1018 	tx->dqo.desc_head = 0;
1019 	tx->dqo.desc_tail = 0;
1020 	tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
1021 	tx->dqo.last_re_idx = 0;
1022 
1023 	tx->dqo.compl_head = 0;
1024 	tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
1025 	atomic_store_32(&tx->dqo.hw_tx_head, 0);
1026 	tx->dqo.cur_gen_bit = 0;
1027 
1028 	gve_free_tx_mbufs_dqo(tx);
1029 
1030 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
1031 		if (gve_is_qpl(tx->com.priv))
1032 			gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]);
1033 		gve_invalidate_timestamp(
1034 		    &tx->dqo.pending_pkts[j].enqueue_time_sec);
1035 		tx->dqo.pending_pkts[j].next =
1036 		    (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1;
1037 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
1038 	}
1039 	tx->dqo.free_pending_pkts_csm = 0;
1040 	atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
1041 
1042 	if (gve_is_qpl(priv)) {
1043 		int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
1044 		    tx->com.qpl->num_pages;
1045 
1046 		for (j = 0; j < qpl_buf_cnt - 1; j++)
1047 			tx->dqo.qpl_bufs[j] = j + 1;
1048 		tx->dqo.qpl_bufs[j] = -1;
1049 
1050 		tx->dqo.free_qpl_bufs_csm = 0;
1051 		atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
1052 		atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
1053 		tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
1054 		tx->dqo.qpl_bufs_consumed = 0;
1055 	}
1056 
1057 	gve_tx_clear_desc_ring_dqo(tx);
1058 	gve_tx_clear_compl_ring_dqo(tx);
1059 }
1060 
1061 static uint8_t
gve_tx_get_gen_bit(uint8_t * desc)1062 gve_tx_get_gen_bit(uint8_t *desc)
1063 {
1064 	uint8_t byte;
1065 
1066 	/*
1067 	 * Prevent generation bit from being read after the rest of the
1068 	 * descriptor.
1069 	 */
1070 	byte = atomic_load_acq_8(desc + GVE_TX_DESC_DQO_GEN_BYTE_OFFSET);
1071 	return ((byte & GVE_TX_DESC_DQO_GEN_BIT_MASK) != 0);
1072 }
1073 
1074 static bool
gve_tx_cleanup_dqo(struct gve_priv * priv,struct gve_tx_ring * tx,int budget)1075 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
1076 {
1077 	struct gve_tx_compl_desc_dqo *compl_desc;
1078 	uint64_t bytes_done = 0;
1079 	uint64_t pkts_done = 0;
1080 	uint16_t compl_tag;
1081 	int work_done = 0;
1082 	uint16_t tx_head;
1083 	uint16_t type;
1084 
1085 	while (work_done < budget) {
1086 		bus_dmamap_sync(tx->dqo.compl_ring_mem.tag,
1087 		    tx->dqo.compl_ring_mem.map,
1088 		    BUS_DMASYNC_POSTREAD);
1089 
1090 		compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
1091 		if (gve_tx_get_gen_bit((uint8_t *)compl_desc) ==
1092 		    tx->dqo.cur_gen_bit)
1093 			break;
1094 
1095 		type = compl_desc->type;
1096 		if (type == GVE_COMPL_TYPE_DQO_DESC) {
1097 			/* This is the last descriptor fetched by HW plus one */
1098 			tx_head = le16toh(compl_desc->tx_head);
1099 			atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
1100 		} else if (type == GVE_COMPL_TYPE_DQO_PKT) {
1101 			compl_tag = le16toh(compl_desc->completion_tag);
1102 			bytes_done += gve_handle_packet_completion(priv,
1103 			    tx, compl_tag);
1104 			pkts_done++;
1105 		}
1106 
1107 		tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
1108 		    tx->dqo.compl_mask;
1109 		/* Flip the generation bit when we wrap around */
1110 		tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
1111 		work_done++;
1112 	}
1113 
1114 	/*
1115 	 * Waking the xmit taskqueue has to occur after room has been made in
1116 	 * the queue.
1117 	 */
1118 	atomic_thread_fence_seq_cst();
1119 	if (atomic_load_bool(&tx->stopped) && work_done) {
1120 		atomic_store_bool(&tx->stopped, false);
1121 		taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
1122 	}
1123 
1124 	tx->done += work_done; /* tx->done is just a sysctl counter */
1125 	counter_enter();
1126 	counter_u64_add_protected(tx->stats.tbytes, bytes_done);
1127 	counter_u64_add_protected(tx->stats.tpackets, pkts_done);
1128 	counter_exit();
1129 
1130 	return (work_done == budget);
1131 }
1132 
1133 void
gve_tx_cleanup_tq_dqo(void * arg,int pending)1134 gve_tx_cleanup_tq_dqo(void *arg, int pending)
1135 {
1136 	struct gve_tx_ring *tx = arg;
1137 	struct gve_priv *priv = tx->com.priv;
1138 
1139 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1140 		return;
1141 
1142 	if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
1143 		taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
1144 		return;
1145 	}
1146 
1147 	gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
1148 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1149 }
1150