xref: /freebsd/sys/dev/gve/gve_tx.c (revision 031800c786823a9ad4c4d2f79f217d42dad3f5d1)
154dfc97bSShailend Chand /*-
254dfc97bSShailend Chand  * SPDX-License-Identifier: BSD-3-Clause
354dfc97bSShailend Chand  *
4d438b4efSShailend Chand  * Copyright (c) 2023-2024 Google LLC
554dfc97bSShailend Chand  *
654dfc97bSShailend Chand  * Redistribution and use in source and binary forms, with or without modification,
754dfc97bSShailend Chand  * are permitted provided that the following conditions are met:
854dfc97bSShailend Chand  *
954dfc97bSShailend Chand  * 1. Redistributions of source code must retain the above copyright notice, this
1054dfc97bSShailend Chand  *    list of conditions and the following disclaimer.
1154dfc97bSShailend Chand  *
1254dfc97bSShailend Chand  * 2. Redistributions in binary form must reproduce the above copyright notice,
1354dfc97bSShailend Chand  *    this list of conditions and the following disclaimer in the documentation
1454dfc97bSShailend Chand  *    and/or other materials provided with the distribution.
1554dfc97bSShailend Chand  *
1654dfc97bSShailend Chand  * 3. Neither the name of the copyright holder nor the names of its contributors
1754dfc97bSShailend Chand  *    may be used to endorse or promote products derived from this software without
1854dfc97bSShailend Chand  *    specific prior written permission.
1954dfc97bSShailend Chand  *
2054dfc97bSShailend Chand  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
2154dfc97bSShailend Chand  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
2254dfc97bSShailend Chand  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2354dfc97bSShailend Chand  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
2454dfc97bSShailend Chand  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2554dfc97bSShailend Chand  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2654dfc97bSShailend Chand  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
2754dfc97bSShailend Chand  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2854dfc97bSShailend Chand  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2954dfc97bSShailend Chand  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3054dfc97bSShailend Chand  */
3154dfc97bSShailend Chand #include "gve.h"
3254dfc97bSShailend Chand #include "gve_adminq.h"
33d438b4efSShailend Chand #include "gve_dqo.h"
3454dfc97bSShailend Chand 
3554dfc97bSShailend Chand #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
3654dfc97bSShailend Chand 
3754dfc97bSShailend Chand static int
gve_tx_fifo_init(struct gve_priv * priv,struct gve_tx_ring * tx)3854dfc97bSShailend Chand gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_ring *tx)
3954dfc97bSShailend Chand {
4054dfc97bSShailend Chand 	struct gve_queue_page_list *qpl = tx->com.qpl;
4154dfc97bSShailend Chand 	struct gve_tx_fifo *fifo = &tx->fifo;
4254dfc97bSShailend Chand 
4354dfc97bSShailend Chand 	fifo->size = qpl->num_pages * PAGE_SIZE;
4454dfc97bSShailend Chand 	fifo->base = qpl->kva;
4554dfc97bSShailend Chand 	atomic_store_int(&fifo->available, fifo->size);
4654dfc97bSShailend Chand 	fifo->head = 0;
4754dfc97bSShailend Chand 
4854dfc97bSShailend Chand 	return (0);
4954dfc97bSShailend Chand }
5054dfc97bSShailend Chand 
5154dfc97bSShailend Chand static void
gve_tx_free_ring_gqi(struct gve_priv * priv,int i)52d438b4efSShailend Chand gve_tx_free_ring_gqi(struct gve_priv *priv, int i)
53d438b4efSShailend Chand {
54d438b4efSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
55d438b4efSShailend Chand 
56d438b4efSShailend Chand 	if (tx->desc_ring != NULL) {
57d438b4efSShailend Chand 		gve_dma_free_coherent(&tx->desc_ring_mem);
58d438b4efSShailend Chand 		tx->desc_ring = NULL;
59d438b4efSShailend Chand 	}
60d438b4efSShailend Chand 
61d438b4efSShailend Chand 	if (tx->info != NULL) {
62d438b4efSShailend Chand 		free(tx->info, M_GVE);
63d438b4efSShailend Chand 		tx->info = NULL;
64d438b4efSShailend Chand 	}
65d438b4efSShailend Chand }
66d438b4efSShailend Chand 
67d438b4efSShailend Chand static void
gve_tx_free_ring(struct gve_priv * priv,int i)6854dfc97bSShailend Chand gve_tx_free_ring(struct gve_priv *priv, int i)
6954dfc97bSShailend Chand {
7054dfc97bSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
7154dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
7254dfc97bSShailend Chand 
7354dfc97bSShailend Chand 	/* Safe to call even if never alloced */
7454dfc97bSShailend Chand 	gve_free_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS);
7554dfc97bSShailend Chand 
7654dfc97bSShailend Chand 	if (mtx_initialized(&tx->ring_mtx))
7754dfc97bSShailend Chand 		mtx_destroy(&tx->ring_mtx);
7854dfc97bSShailend Chand 
7954dfc97bSShailend Chand 	if (com->q_resources != NULL) {
8054dfc97bSShailend Chand 		gve_dma_free_coherent(&com->q_resources_mem);
8154dfc97bSShailend Chand 		com->q_resources = NULL;
8254dfc97bSShailend Chand 	}
83d438b4efSShailend Chand 
84d438b4efSShailend Chand 	if (tx->br != NULL) {
85d438b4efSShailend Chand 		buf_ring_free(tx->br, M_DEVBUF);
86d438b4efSShailend Chand 		tx->br = NULL;
87d438b4efSShailend Chand 	}
88d438b4efSShailend Chand 
89d438b4efSShailend Chand 	if (gve_is_gqi(priv))
90d438b4efSShailend Chand 		gve_tx_free_ring_gqi(priv, i);
91d438b4efSShailend Chand 	else
92d438b4efSShailend Chand 		gve_tx_free_ring_dqo(priv, i);
93d438b4efSShailend Chand }
94d438b4efSShailend Chand 
95d438b4efSShailend Chand static int
gve_tx_alloc_ring_gqi(struct gve_priv * priv,int i)96d438b4efSShailend Chand gve_tx_alloc_ring_gqi(struct gve_priv *priv, int i)
97d438b4efSShailend Chand {
98d438b4efSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
99d438b4efSShailend Chand 	struct gve_ring_com *com = &tx->com;
100d438b4efSShailend Chand 	int err;
101d438b4efSShailend Chand 
102d438b4efSShailend Chand 	err = gve_dma_alloc_coherent(priv,
103d438b4efSShailend Chand 	    sizeof(union gve_tx_desc) * priv->tx_desc_cnt,
104d438b4efSShailend Chand 	    CACHE_LINE_SIZE, &tx->desc_ring_mem);
105d438b4efSShailend Chand 	if (err != 0) {
106d438b4efSShailend Chand 		device_printf(priv->dev,
107d438b4efSShailend Chand 		    "Failed to alloc desc ring for tx ring %d", i);
108d438b4efSShailend Chand 		goto abort;
109d438b4efSShailend Chand 	}
110d438b4efSShailend Chand 	tx->desc_ring = tx->desc_ring_mem.cpu_addr;
111d438b4efSShailend Chand 
112d438b4efSShailend Chand 	com->qpl = &priv->qpls[i];
113d438b4efSShailend Chand 	if (com->qpl == NULL) {
114d438b4efSShailend Chand 		device_printf(priv->dev, "No QPL left for tx ring %d\n", i);
115d438b4efSShailend Chand 		err = ENOMEM;
116d438b4efSShailend Chand 		goto abort;
117d438b4efSShailend Chand 	}
118d438b4efSShailend Chand 
119d438b4efSShailend Chand 	err = gve_tx_fifo_init(priv, tx);
120d438b4efSShailend Chand 	if (err != 0)
121d438b4efSShailend Chand 		goto abort;
122d438b4efSShailend Chand 
123d438b4efSShailend Chand 	tx->info = malloc(
124d438b4efSShailend Chand 	    sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt,
125d438b4efSShailend Chand 	    M_GVE, M_WAITOK | M_ZERO);
126d438b4efSShailend Chand 	return (0);
127d438b4efSShailend Chand 
128d438b4efSShailend Chand abort:
129d438b4efSShailend Chand 	gve_tx_free_ring_gqi(priv, i);
130d438b4efSShailend Chand 	return (err);
13154dfc97bSShailend Chand }
13254dfc97bSShailend Chand 
13354dfc97bSShailend Chand static int
gve_tx_alloc_ring(struct gve_priv * priv,int i)13454dfc97bSShailend Chand gve_tx_alloc_ring(struct gve_priv *priv, int i)
13554dfc97bSShailend Chand {
13654dfc97bSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
13754dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
13854dfc97bSShailend Chand 	char mtx_name[16];
13954dfc97bSShailend Chand 	int err;
14054dfc97bSShailend Chand 
14154dfc97bSShailend Chand 	com->priv = priv;
14254dfc97bSShailend Chand 	com->id = i;
14354dfc97bSShailend Chand 
144d438b4efSShailend Chand 	if (gve_is_gqi(priv))
145d438b4efSShailend Chand 		err = gve_tx_alloc_ring_gqi(priv, i);
146d438b4efSShailend Chand 	else
147d438b4efSShailend Chand 		err = gve_tx_alloc_ring_dqo(priv, i);
14854dfc97bSShailend Chand 	if (err != 0)
14954dfc97bSShailend Chand 		goto abort;
15054dfc97bSShailend Chand 
15154dfc97bSShailend Chand 	sprintf(mtx_name, "gvetx%d", i);
15254dfc97bSShailend Chand 	mtx_init(&tx->ring_mtx, mtx_name, NULL, MTX_DEF);
15354dfc97bSShailend Chand 
15454dfc97bSShailend Chand 	tx->br = buf_ring_alloc(GVE_TX_BUFRING_ENTRIES, M_DEVBUF,
15554dfc97bSShailend Chand 	    M_WAITOK, &tx->ring_mtx);
15654dfc97bSShailend Chand 
15754dfc97bSShailend Chand 	gve_alloc_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS);
15854dfc97bSShailend Chand 
15954dfc97bSShailend Chand 	err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources),
16054dfc97bSShailend Chand 	    PAGE_SIZE, &com->q_resources_mem);
16154dfc97bSShailend Chand 	if (err != 0) {
162d438b4efSShailend Chand 		device_printf(priv->dev,
163d438b4efSShailend Chand 		    "Failed to alloc queue resources for tx ring %d", i);
16454dfc97bSShailend Chand 		goto abort;
16554dfc97bSShailend Chand 	}
16654dfc97bSShailend Chand 	com->q_resources = com->q_resources_mem.cpu_addr;
16754dfc97bSShailend Chand 
16854dfc97bSShailend Chand 	return (0);
16954dfc97bSShailend Chand 
17054dfc97bSShailend Chand abort:
17154dfc97bSShailend Chand 	gve_tx_free_ring(priv, i);
17254dfc97bSShailend Chand 	return (err);
17354dfc97bSShailend Chand }
17454dfc97bSShailend Chand 
17554dfc97bSShailend Chand int
gve_alloc_tx_rings(struct gve_priv * priv)17654dfc97bSShailend Chand gve_alloc_tx_rings(struct gve_priv *priv)
17754dfc97bSShailend Chand {
17854dfc97bSShailend Chand 	int err = 0;
17954dfc97bSShailend Chand 	int i;
18054dfc97bSShailend Chand 
18154dfc97bSShailend Chand 	priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.num_queues,
18254dfc97bSShailend Chand 	    M_GVE, M_WAITOK | M_ZERO);
18354dfc97bSShailend Chand 
18454dfc97bSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
18554dfc97bSShailend Chand 		err = gve_tx_alloc_ring(priv, i);
18654dfc97bSShailend Chand 		if (err != 0)
18754dfc97bSShailend Chand 			goto free_rings;
18854dfc97bSShailend Chand 
18954dfc97bSShailend Chand 	}
19054dfc97bSShailend Chand 
19154dfc97bSShailend Chand 	return (0);
19254dfc97bSShailend Chand 
19354dfc97bSShailend Chand free_rings:
19454dfc97bSShailend Chand 	while (i--)
19554dfc97bSShailend Chand 		gve_tx_free_ring(priv, i);
19654dfc97bSShailend Chand 	free(priv->tx, M_GVE);
19754dfc97bSShailend Chand 	return (err);
19854dfc97bSShailend Chand }
19954dfc97bSShailend Chand 
20054dfc97bSShailend Chand void
gve_free_tx_rings(struct gve_priv * priv)20154dfc97bSShailend Chand gve_free_tx_rings(struct gve_priv *priv)
20254dfc97bSShailend Chand {
20354dfc97bSShailend Chand 	int i;
20454dfc97bSShailend Chand 
20554dfc97bSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; i++)
20654dfc97bSShailend Chand 		gve_tx_free_ring(priv, i);
20754dfc97bSShailend Chand 
20854dfc97bSShailend Chand 	free(priv->tx, M_GVE);
20954dfc97bSShailend Chand }
21054dfc97bSShailend Chand 
21154dfc97bSShailend Chand static void
gve_tx_clear_desc_ring(struct gve_tx_ring * tx)21254dfc97bSShailend Chand gve_tx_clear_desc_ring(struct gve_tx_ring *tx)
21354dfc97bSShailend Chand {
21454dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
21554dfc97bSShailend Chand 	int i;
21654dfc97bSShailend Chand 
21754dfc97bSShailend Chand 	for (i = 0; i < com->priv->tx_desc_cnt; i++) {
21854dfc97bSShailend Chand 		tx->desc_ring[i] = (union gve_tx_desc){};
21954dfc97bSShailend Chand 		tx->info[i] = (struct gve_tx_buffer_state){};
22054dfc97bSShailend Chand 	}
22154dfc97bSShailend Chand 
22254dfc97bSShailend Chand 	bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
22354dfc97bSShailend Chand 	    BUS_DMASYNC_PREWRITE);
22454dfc97bSShailend Chand }
22554dfc97bSShailend Chand 
22654dfc97bSShailend Chand static void
gve_clear_tx_ring(struct gve_priv * priv,int i)22754dfc97bSShailend Chand gve_clear_tx_ring(struct gve_priv *priv, int i)
22854dfc97bSShailend Chand {
22954dfc97bSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
23054dfc97bSShailend Chand 	struct gve_tx_fifo *fifo = &tx->fifo;
23154dfc97bSShailend Chand 
23254dfc97bSShailend Chand 	tx->req = 0;
23354dfc97bSShailend Chand 	tx->done = 0;
23454dfc97bSShailend Chand 	tx->mask = priv->tx_desc_cnt - 1;
23554dfc97bSShailend Chand 
23654dfc97bSShailend Chand 	atomic_store_int(&fifo->available, fifo->size);
23754dfc97bSShailend Chand 	fifo->head = 0;
23854dfc97bSShailend Chand 
23954dfc97bSShailend Chand 	gve_tx_clear_desc_ring(tx);
24054dfc97bSShailend Chand }
24154dfc97bSShailend Chand 
24254dfc97bSShailend Chand static void
gve_start_tx_ring(struct gve_priv * priv,int i)243*031800c7SJasper Tran O'Leary gve_start_tx_ring(struct gve_priv *priv, int i)
24454dfc97bSShailend Chand {
24554dfc97bSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
24654dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
24754dfc97bSShailend Chand 
24840097cd6SShailend Chand 	atomic_store_bool(&tx->stopped, false);
249*031800c7SJasper Tran O'Leary 	if (gve_is_gqi(priv))
250*031800c7SJasper Tran O'Leary 		NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq, tx);
251*031800c7SJasper Tran O'Leary 	else
252*031800c7SJasper Tran O'Leary 		NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq_dqo, tx);
25354dfc97bSShailend Chand 	com->cleanup_tq = taskqueue_create_fast("gve tx", M_WAITOK,
25454dfc97bSShailend Chand 	    taskqueue_thread_enqueue, &com->cleanup_tq);
25554dfc97bSShailend Chand 	taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, "%s txq %d",
25654dfc97bSShailend Chand 	    device_get_nameunit(priv->dev), i);
25754dfc97bSShailend Chand 
25854dfc97bSShailend Chand 	TASK_INIT(&tx->xmit_task, 0, gve_xmit_tq, tx);
25954dfc97bSShailend Chand 	tx->xmit_tq = taskqueue_create_fast("gve tx xmit",
26054dfc97bSShailend Chand 	    M_WAITOK, taskqueue_thread_enqueue, &tx->xmit_tq);
26154dfc97bSShailend Chand 	taskqueue_start_threads(&tx->xmit_tq, 1, PI_NET, "%s txq %d xmit",
26254dfc97bSShailend Chand 	    device_get_nameunit(priv->dev), i);
26354dfc97bSShailend Chand }
26454dfc97bSShailend Chand 
26554dfc97bSShailend Chand int
gve_create_tx_rings(struct gve_priv * priv)26654dfc97bSShailend Chand gve_create_tx_rings(struct gve_priv *priv)
26754dfc97bSShailend Chand {
26854dfc97bSShailend Chand 	struct gve_ring_com *com;
26954dfc97bSShailend Chand 	struct gve_tx_ring *tx;
27054dfc97bSShailend Chand 	int err;
27154dfc97bSShailend Chand 	int i;
27254dfc97bSShailend Chand 
27354dfc97bSShailend Chand 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK))
27454dfc97bSShailend Chand 		return (0);
27554dfc97bSShailend Chand 
276d438b4efSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
277d438b4efSShailend Chand 		if (gve_is_gqi(priv))
27854dfc97bSShailend Chand 			gve_clear_tx_ring(priv, i);
279d438b4efSShailend Chand 		else
280d438b4efSShailend Chand 			gve_clear_tx_ring_dqo(priv, i);
281d438b4efSShailend Chand 	}
28254dfc97bSShailend Chand 
28354dfc97bSShailend Chand 	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
28454dfc97bSShailend Chand 	if (err != 0)
28554dfc97bSShailend Chand 		return (err);
28654dfc97bSShailend Chand 
28754dfc97bSShailend Chand 	bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
28854dfc97bSShailend Chand 	    BUS_DMASYNC_POSTREAD);
28954dfc97bSShailend Chand 
29054dfc97bSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
29154dfc97bSShailend Chand 		tx = &priv->tx[i];
29254dfc97bSShailend Chand 		com = &tx->com;
29354dfc97bSShailend Chand 
29454dfc97bSShailend Chand 		com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index);
29554dfc97bSShailend Chand 
29654dfc97bSShailend Chand 		bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map,
29754dfc97bSShailend Chand 		    BUS_DMASYNC_POSTREAD);
29854dfc97bSShailend Chand 		com->db_offset = 4 * be32toh(com->q_resources->db_index);
29954dfc97bSShailend Chand 		com->counter_idx = be32toh(com->q_resources->counter_index);
30054dfc97bSShailend Chand 
301*031800c7SJasper Tran O'Leary 		gve_start_tx_ring(priv, i);
30254dfc97bSShailend Chand 	}
30354dfc97bSShailend Chand 
30454dfc97bSShailend Chand 	gve_set_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
30554dfc97bSShailend Chand 	return (0);
30654dfc97bSShailend Chand }
30754dfc97bSShailend Chand 
30854dfc97bSShailend Chand static void
gve_stop_tx_ring(struct gve_priv * priv,int i)30954dfc97bSShailend Chand gve_stop_tx_ring(struct gve_priv *priv, int i)
31054dfc97bSShailend Chand {
31154dfc97bSShailend Chand 	struct gve_tx_ring *tx = &priv->tx[i];
31254dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
31354dfc97bSShailend Chand 
31454dfc97bSShailend Chand 	if (com->cleanup_tq != NULL) {
31554dfc97bSShailend Chand 		taskqueue_quiesce(com->cleanup_tq);
31654dfc97bSShailend Chand 		taskqueue_free(com->cleanup_tq);
31754dfc97bSShailend Chand 		com->cleanup_tq = NULL;
31854dfc97bSShailend Chand 	}
31954dfc97bSShailend Chand 
32054dfc97bSShailend Chand 	if (tx->xmit_tq != NULL) {
32154dfc97bSShailend Chand 		taskqueue_quiesce(tx->xmit_tq);
32254dfc97bSShailend Chand 		taskqueue_free(tx->xmit_tq);
32354dfc97bSShailend Chand 		tx->xmit_tq = NULL;
32454dfc97bSShailend Chand 	}
32554dfc97bSShailend Chand }
32654dfc97bSShailend Chand 
32754dfc97bSShailend Chand int
gve_destroy_tx_rings(struct gve_priv * priv)32854dfc97bSShailend Chand gve_destroy_tx_rings(struct gve_priv *priv)
32954dfc97bSShailend Chand {
33054dfc97bSShailend Chand 	int err;
33154dfc97bSShailend Chand 	int i;
33254dfc97bSShailend Chand 
33354dfc97bSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; i++)
33454dfc97bSShailend Chand 		gve_stop_tx_ring(priv, i);
33554dfc97bSShailend Chand 
33654dfc97bSShailend Chand 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK)) {
33754dfc97bSShailend Chand 		err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
33854dfc97bSShailend Chand 		if (err != 0)
33954dfc97bSShailend Chand 			return (err);
34054dfc97bSShailend Chand 		gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
34154dfc97bSShailend Chand 	}
34254dfc97bSShailend Chand 
34354dfc97bSShailend Chand 	return (0);
34454dfc97bSShailend Chand }
34554dfc97bSShailend Chand 
34654dfc97bSShailend Chand int
gve_tx_intr(void * arg)34754dfc97bSShailend Chand gve_tx_intr(void *arg)
34854dfc97bSShailend Chand {
34954dfc97bSShailend Chand 	struct gve_tx_ring *tx = arg;
35054dfc97bSShailend Chand 	struct gve_priv *priv = tx->com.priv;
35154dfc97bSShailend Chand 	struct gve_ring_com *com = &tx->com;
35254dfc97bSShailend Chand 
35354dfc97bSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
35454dfc97bSShailend Chand 		return (FILTER_STRAY);
35554dfc97bSShailend Chand 
35654dfc97bSShailend Chand 	gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK);
35754dfc97bSShailend Chand 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
35854dfc97bSShailend Chand 	return (FILTER_HANDLED);
35954dfc97bSShailend Chand }
36054dfc97bSShailend Chand 
36154dfc97bSShailend Chand static uint32_t
gve_tx_load_event_counter(struct gve_priv * priv,struct gve_tx_ring * tx)36254dfc97bSShailend Chand gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx)
36354dfc97bSShailend Chand {
36454dfc97bSShailend Chand 	bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map,
36554dfc97bSShailend Chand 	    BUS_DMASYNC_POSTREAD);
36654dfc97bSShailend Chand 	uint32_t counter = priv->counters[tx->com.counter_idx];
36754dfc97bSShailend Chand 	return (be32toh(counter));
36854dfc97bSShailend Chand }
36954dfc97bSShailend Chand 
37054dfc97bSShailend Chand static void
gve_tx_free_fifo(struct gve_tx_fifo * fifo,size_t bytes)37154dfc97bSShailend Chand gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
37254dfc97bSShailend Chand {
37354dfc97bSShailend Chand 	atomic_add_int(&fifo->available, bytes);
37454dfc97bSShailend Chand }
37554dfc97bSShailend Chand 
37654dfc97bSShailend Chand void
gve_tx_cleanup_tq(void * arg,int pending)37754dfc97bSShailend Chand gve_tx_cleanup_tq(void *arg, int pending)
37854dfc97bSShailend Chand {
37954dfc97bSShailend Chand 	struct gve_tx_ring *tx = arg;
38054dfc97bSShailend Chand 	struct gve_priv *priv = tx->com.priv;
38154dfc97bSShailend Chand 	uint32_t nic_done = gve_tx_load_event_counter(priv, tx);
38254dfc97bSShailend Chand 	uint32_t todo = nic_done - tx->done;
38354dfc97bSShailend Chand 	size_t space_freed = 0;
38454dfc97bSShailend Chand 	int i, j;
38554dfc97bSShailend Chand 
38654dfc97bSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
38754dfc97bSShailend Chand 		return;
38854dfc97bSShailend Chand 
38954dfc97bSShailend Chand 	for (j = 0; j < todo; j++) {
39054dfc97bSShailend Chand 		uint32_t idx = tx->done & tx->mask;
39154dfc97bSShailend Chand 		struct gve_tx_buffer_state *info = &tx->info[idx];
39254dfc97bSShailend Chand 		struct mbuf *mbuf = info->mbuf;
39354dfc97bSShailend Chand 
39454dfc97bSShailend Chand 		tx->done++;
39554dfc97bSShailend Chand 		if (mbuf == NULL)
39654dfc97bSShailend Chand 			continue;
39754dfc97bSShailend Chand 
39854dfc97bSShailend Chand 		info->mbuf = NULL;
39954dfc97bSShailend Chand 		counter_enter();
40054dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tbytes, mbuf->m_pkthdr.len);
40154dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tpackets, 1);
40254dfc97bSShailend Chand 		counter_exit();
40354dfc97bSShailend Chand 		m_freem(mbuf);
40454dfc97bSShailend Chand 
40554dfc97bSShailend Chand 		for (i = 0; i < GVE_TX_MAX_DESCS; i++) {
40654dfc97bSShailend Chand 			space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
40754dfc97bSShailend Chand 			info->iov[i].iov_len = 0;
40854dfc97bSShailend Chand 			info->iov[i].iov_padding = 0;
40954dfc97bSShailend Chand 		}
41054dfc97bSShailend Chand 	}
41154dfc97bSShailend Chand 
41254dfc97bSShailend Chand 	gve_tx_free_fifo(&tx->fifo, space_freed);
41354dfc97bSShailend Chand 
41454dfc97bSShailend Chand 	gve_db_bar_write_4(priv, tx->com.irq_db_offset,
41554dfc97bSShailend Chand 	    GVE_IRQ_ACK | GVE_IRQ_EVENT);
41654dfc97bSShailend Chand 
41754dfc97bSShailend Chand 	/*
41854dfc97bSShailend Chand 	 * Completions born before this barrier MAY NOT cause the NIC to send an
41954dfc97bSShailend Chand 	 * interrupt but they will still be handled by the enqueue below.
42054dfc97bSShailend Chand 	 * Completions born after the barrier WILL trigger an interrupt.
42154dfc97bSShailend Chand 	 */
422*031800c7SJasper Tran O'Leary 	atomic_thread_fence_seq_cst();
42354dfc97bSShailend Chand 
42454dfc97bSShailend Chand 	nic_done = gve_tx_load_event_counter(priv, tx);
42554dfc97bSShailend Chand 	todo = nic_done - tx->done;
42654dfc97bSShailend Chand 	if (todo != 0) {
42754dfc97bSShailend Chand 		gve_db_bar_write_4(priv, tx->com.irq_db_offset, GVE_IRQ_MASK);
42854dfc97bSShailend Chand 		taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
42954dfc97bSShailend Chand 	}
43040097cd6SShailend Chand 
43140097cd6SShailend Chand 	if (atomic_load_bool(&tx->stopped) && space_freed) {
43240097cd6SShailend Chand 		atomic_store_bool(&tx->stopped, false);
43340097cd6SShailend Chand 		taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
43440097cd6SShailend Chand 	}
43554dfc97bSShailend Chand }
43654dfc97bSShailend Chand 
43754dfc97bSShailend Chand static void
gve_dma_sync_for_device(struct gve_queue_page_list * qpl,uint64_t iov_offset,uint64_t iov_len)43854dfc97bSShailend Chand gve_dma_sync_for_device(struct gve_queue_page_list *qpl,
43954dfc97bSShailend Chand 			uint64_t iov_offset, uint64_t iov_len)
44054dfc97bSShailend Chand {
44154dfc97bSShailend Chand 	uint64_t last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
44254dfc97bSShailend Chand 	uint64_t first_page = iov_offset / PAGE_SIZE;
44354dfc97bSShailend Chand 	struct gve_dma_handle *dma;
44454dfc97bSShailend Chand 	uint64_t page;
44554dfc97bSShailend Chand 
44654dfc97bSShailend Chand 	for (page = first_page; page <= last_page; page++) {
44754dfc97bSShailend Chand 		dma = &(qpl->dmas[page]);
44854dfc97bSShailend Chand 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
44954dfc97bSShailend Chand 	}
45054dfc97bSShailend Chand }
45154dfc97bSShailend Chand 
45254dfc97bSShailend Chand static void
gve_tx_fill_mtd_desc(struct gve_tx_mtd_desc * mtd_desc,struct mbuf * mbuf)45354dfc97bSShailend Chand gve_tx_fill_mtd_desc(struct gve_tx_mtd_desc *mtd_desc, struct mbuf *mbuf)
45454dfc97bSShailend Chand {
45554dfc97bSShailend Chand 	mtd_desc->type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH;
45654dfc97bSShailend Chand 	mtd_desc->path_state = GVE_MTD_PATH_STATE_DEFAULT | GVE_MTD_PATH_HASH_L4;
45754dfc97bSShailend Chand 	mtd_desc->path_hash = htobe32(mbuf->m_pkthdr.flowid);
45854dfc97bSShailend Chand 	mtd_desc->reserved0 = 0;
45954dfc97bSShailend Chand 	mtd_desc->reserved1 = 0;
46054dfc97bSShailend Chand }
46154dfc97bSShailend Chand 
46254dfc97bSShailend Chand static void
gve_tx_fill_pkt_desc(struct gve_tx_pkt_desc * pkt_desc,bool is_tso,uint16_t l4_hdr_offset,uint32_t desc_cnt,uint16_t first_seg_len,uint64_t addr,bool has_csum_flag,int csum_offset,uint16_t pkt_len)46354dfc97bSShailend Chand gve_tx_fill_pkt_desc(struct gve_tx_pkt_desc *pkt_desc, bool is_tso,
46454dfc97bSShailend Chand     uint16_t l4_hdr_offset, uint32_t desc_cnt,
46554dfc97bSShailend Chand     uint16_t first_seg_len, uint64_t addr, bool has_csum_flag,
46654dfc97bSShailend Chand     int csum_offset, uint16_t pkt_len)
46754dfc97bSShailend Chand {
46854dfc97bSShailend Chand 	if (is_tso) {
46954dfc97bSShailend Chand 		pkt_desc->type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
47054dfc97bSShailend Chand 		pkt_desc->l4_csum_offset = csum_offset >> 1;
47154dfc97bSShailend Chand 		pkt_desc->l4_hdr_offset = l4_hdr_offset >> 1;
47254dfc97bSShailend Chand 	} else if (has_csum_flag) {
47354dfc97bSShailend Chand 		pkt_desc->type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
47454dfc97bSShailend Chand 		pkt_desc->l4_csum_offset = csum_offset >> 1;
47554dfc97bSShailend Chand 		pkt_desc->l4_hdr_offset = l4_hdr_offset >> 1;
47654dfc97bSShailend Chand 	} else {
47754dfc97bSShailend Chand 		pkt_desc->type_flags = GVE_TXD_STD;
47854dfc97bSShailend Chand 		pkt_desc->l4_csum_offset = 0;
47954dfc97bSShailend Chand 		pkt_desc->l4_hdr_offset = 0;
48054dfc97bSShailend Chand 	}
48154dfc97bSShailend Chand 	pkt_desc->desc_cnt = desc_cnt;
48254dfc97bSShailend Chand 	pkt_desc->len = htobe16(pkt_len);
48354dfc97bSShailend Chand 	pkt_desc->seg_len = htobe16(first_seg_len);
48454dfc97bSShailend Chand 	pkt_desc->seg_addr = htobe64(addr);
48554dfc97bSShailend Chand }
48654dfc97bSShailend Chand 
48754dfc97bSShailend Chand static void
gve_tx_fill_seg_desc(struct gve_tx_seg_desc * seg_desc,bool is_tso,uint16_t len,uint64_t addr,bool is_ipv6,uint8_t l3_off,uint16_t tso_mss)48854dfc97bSShailend Chand gve_tx_fill_seg_desc(struct gve_tx_seg_desc *seg_desc,
48954dfc97bSShailend Chand     bool is_tso, uint16_t len, uint64_t addr,
49054dfc97bSShailend Chand     bool is_ipv6, uint8_t l3_off, uint16_t tso_mss)
49154dfc97bSShailend Chand {
49254dfc97bSShailend Chand 	seg_desc->type_flags = GVE_TXD_SEG;
49354dfc97bSShailend Chand 	if (is_tso) {
49454dfc97bSShailend Chand 		if (is_ipv6)
49554dfc97bSShailend Chand 			seg_desc->type_flags |= GVE_TXSF_IPV6;
49654dfc97bSShailend Chand 		seg_desc->l3_offset = l3_off >> 1;
49754dfc97bSShailend Chand 		seg_desc->mss = htobe16(tso_mss);
49854dfc97bSShailend Chand 	}
49954dfc97bSShailend Chand 	seg_desc->seg_len = htobe16(len);
50054dfc97bSShailend Chand 	seg_desc->seg_addr = htobe64(addr);
50154dfc97bSShailend Chand }
50254dfc97bSShailend Chand 
50354dfc97bSShailend Chand static inline uint32_t
gve_tx_avail(struct gve_tx_ring * tx)50454dfc97bSShailend Chand gve_tx_avail(struct gve_tx_ring *tx)
50554dfc97bSShailend Chand {
50654dfc97bSShailend Chand 	return (tx->mask + 1 - (tx->req - tx->done));
50754dfc97bSShailend Chand }
50854dfc97bSShailend Chand 
50954dfc97bSShailend Chand static bool
gve_tx_fifo_can_alloc(struct gve_tx_fifo * fifo,size_t bytes)51054dfc97bSShailend Chand gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
51154dfc97bSShailend Chand {
51254dfc97bSShailend Chand 	return (atomic_load_int(&fifo->available) >= bytes);
51354dfc97bSShailend Chand }
51454dfc97bSShailend Chand 
51554dfc97bSShailend Chand static inline bool
gve_can_tx(struct gve_tx_ring * tx,int bytes_required)51654dfc97bSShailend Chand gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
51754dfc97bSShailend Chand {
51854dfc97bSShailend Chand 	return (gve_tx_avail(tx) >= (GVE_TX_MAX_DESCS + 1) &&
51954dfc97bSShailend Chand 	    gve_tx_fifo_can_alloc(&tx->fifo, bytes_required));
52054dfc97bSShailend Chand }
52154dfc97bSShailend Chand 
52254dfc97bSShailend Chand static int
gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo * fifo,size_t bytes)52354dfc97bSShailend Chand gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, size_t bytes)
52454dfc97bSShailend Chand {
52554dfc97bSShailend Chand 	return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
52654dfc97bSShailend Chand }
52754dfc97bSShailend Chand 
52854dfc97bSShailend Chand static inline int
gve_fifo_bytes_required(struct gve_tx_ring * tx,uint16_t first_seg_len,uint16_t pkt_len)52954dfc97bSShailend Chand gve_fifo_bytes_required(struct gve_tx_ring *tx, uint16_t first_seg_len,
53054dfc97bSShailend Chand     uint16_t pkt_len)
53154dfc97bSShailend Chand {
53254dfc97bSShailend Chand 	int pad_bytes, align_hdr_pad;
53354dfc97bSShailend Chand 	int bytes;
53454dfc97bSShailend Chand 
53554dfc97bSShailend Chand 	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->fifo, first_seg_len);
53654dfc97bSShailend Chand 	/* We need to take into account the header alignment padding. */
53754dfc97bSShailend Chand 	align_hdr_pad = roundup2(first_seg_len, CACHE_LINE_SIZE) - first_seg_len;
53854dfc97bSShailend Chand 	bytes = align_hdr_pad + pad_bytes + pkt_len;
53954dfc97bSShailend Chand 
54054dfc97bSShailend Chand 	return (bytes);
54154dfc97bSShailend Chand }
54254dfc97bSShailend Chand 
54354dfc97bSShailend Chand static int
gve_tx_alloc_fifo(struct gve_tx_fifo * fifo,size_t bytes,struct gve_tx_iovec iov[2])54454dfc97bSShailend Chand gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
54554dfc97bSShailend Chand     struct gve_tx_iovec iov[2])
54654dfc97bSShailend Chand {
54754dfc97bSShailend Chand 	size_t overflow, padding;
54854dfc97bSShailend Chand 	uint32_t aligned_head;
54954dfc97bSShailend Chand 	int nfrags = 0;
55054dfc97bSShailend Chand 
55154dfc97bSShailend Chand 	if (bytes == 0)
55254dfc97bSShailend Chand 		return (0);
55354dfc97bSShailend Chand 
55454dfc97bSShailend Chand 	/*
55554dfc97bSShailend Chand 	 * This check happens before we know how much padding is needed to
55654dfc97bSShailend Chand 	 * align to a cacheline boundary for the payload, but that is fine,
55754dfc97bSShailend Chand 	 * because the FIFO head always start aligned, and the FIFO's boundaries
55854dfc97bSShailend Chand 	 * are aligned, so if there is space for the data, there is space for
55954dfc97bSShailend Chand 	 * the padding to the next alignment.
56054dfc97bSShailend Chand 	 */
56154dfc97bSShailend Chand 	KASSERT(gve_tx_fifo_can_alloc(fifo, bytes),
56254dfc97bSShailend Chand 	    ("Allocating gve tx fifo when there is no room"));
56354dfc97bSShailend Chand 
56454dfc97bSShailend Chand 	nfrags++;
56554dfc97bSShailend Chand 
56654dfc97bSShailend Chand 	iov[0].iov_offset = fifo->head;
56754dfc97bSShailend Chand 	iov[0].iov_len = bytes;
56854dfc97bSShailend Chand 	fifo->head += bytes;
56954dfc97bSShailend Chand 
57054dfc97bSShailend Chand 	if (fifo->head > fifo->size) {
57154dfc97bSShailend Chand 		/*
57254dfc97bSShailend Chand 		 * If the allocation did not fit in the tail fragment of the
57354dfc97bSShailend Chand 		 * FIFO, also use the head fragment.
57454dfc97bSShailend Chand 		 */
57554dfc97bSShailend Chand 		nfrags++;
57654dfc97bSShailend Chand 		overflow = fifo->head - fifo->size;
57754dfc97bSShailend Chand 		iov[0].iov_len -= overflow;
57854dfc97bSShailend Chand 		iov[1].iov_offset = 0;	/* Start of fifo*/
57954dfc97bSShailend Chand 		iov[1].iov_len = overflow;
58054dfc97bSShailend Chand 
58154dfc97bSShailend Chand 		fifo->head = overflow;
58254dfc97bSShailend Chand 	}
58354dfc97bSShailend Chand 
58454dfc97bSShailend Chand 	/* Re-align to a cacheline boundary */
58554dfc97bSShailend Chand 	aligned_head = roundup2(fifo->head, CACHE_LINE_SIZE);
58654dfc97bSShailend Chand 	padding = aligned_head - fifo->head;
58754dfc97bSShailend Chand 	iov[nfrags - 1].iov_padding = padding;
58854dfc97bSShailend Chand 	atomic_add_int(&fifo->available, -(bytes + padding));
58954dfc97bSShailend Chand 	fifo->head = aligned_head;
59054dfc97bSShailend Chand 
59154dfc97bSShailend Chand 	if (fifo->head == fifo->size)
59254dfc97bSShailend Chand 		fifo->head = 0;
59354dfc97bSShailend Chand 
59454dfc97bSShailend Chand 	return (nfrags);
59554dfc97bSShailend Chand }
59654dfc97bSShailend Chand 
59754dfc97bSShailend Chand /* Only error this returns is ENOBUFS when the tx fifo is short of space */
59854dfc97bSShailend Chand static int
gve_xmit(struct gve_tx_ring * tx,struct mbuf * mbuf)59954dfc97bSShailend Chand gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf)
60054dfc97bSShailend Chand {
60154dfc97bSShailend Chand 	bool is_tso, has_csum_flag, is_ipv6 = false, is_tcp = false, is_udp = false;
60254dfc97bSShailend Chand 	int csum_flags, csum_offset, mtd_desc_nr, offset, copy_offset;
60354dfc97bSShailend Chand 	uint16_t tso_mss, l4_off, l4_data_off, pkt_len, first_seg_len;
60454dfc97bSShailend Chand 	int pad_bytes, hdr_nfrags, payload_nfrags;
60554dfc97bSShailend Chand 	struct gve_tx_pkt_desc *pkt_desc;
60654dfc97bSShailend Chand 	struct gve_tx_seg_desc *seg_desc;
60754dfc97bSShailend Chand 	struct gve_tx_mtd_desc *mtd_desc;
60854dfc97bSShailend Chand 	struct gve_tx_buffer_state *info;
60954dfc97bSShailend Chand 	uint32_t idx = tx->req & tx->mask;
61054dfc97bSShailend Chand 	struct ether_header *eh;
61154dfc97bSShailend Chand 	struct mbuf *mbuf_next;
61254dfc97bSShailend Chand 	int payload_iov = 2;
61354dfc97bSShailend Chand 	int bytes_required;
61454dfc97bSShailend Chand 	struct ip6_hdr *ip6;
61554dfc97bSShailend Chand 	struct tcphdr *th;
61654dfc97bSShailend Chand 	uint32_t next_idx;
61754dfc97bSShailend Chand 	uint8_t l3_off;
61854dfc97bSShailend Chand 	struct ip *ip;
61954dfc97bSShailend Chand 	int i;
62054dfc97bSShailend Chand 
62154dfc97bSShailend Chand 	info = &tx->info[idx];
62254dfc97bSShailend Chand 	csum_flags = mbuf->m_pkthdr.csum_flags;
62354dfc97bSShailend Chand 	pkt_len = mbuf->m_pkthdr.len;
62454dfc97bSShailend Chand 	is_tso = csum_flags & CSUM_TSO;
62554dfc97bSShailend Chand 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
62654dfc97bSShailend Chand 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
62754dfc97bSShailend Chand 	mtd_desc_nr = M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE ? 1 : 0;
62854dfc97bSShailend Chand 	tso_mss = is_tso ? mbuf->m_pkthdr.tso_segsz : 0;
62954dfc97bSShailend Chand 
63054dfc97bSShailend Chand 	eh = mtod(mbuf, struct ether_header *);
63154dfc97bSShailend Chand 	KASSERT(eh->ether_type != ETHERTYPE_VLAN,
63254dfc97bSShailend Chand 	    ("VLAN-tagged packets not supported"));
63354dfc97bSShailend Chand 
63454dfc97bSShailend Chand 	is_ipv6 = ntohs(eh->ether_type) == ETHERTYPE_IPV6;
63554dfc97bSShailend Chand 	l3_off = ETHER_HDR_LEN;
63654dfc97bSShailend Chand 	mbuf_next = m_getptr(mbuf, l3_off, &offset);
63754dfc97bSShailend Chand 
63854dfc97bSShailend Chand 	if (is_ipv6) {
63954dfc97bSShailend Chand 		ip6 = (struct ip6_hdr *)(mtodo(mbuf_next, offset));
64054dfc97bSShailend Chand 		l4_off = l3_off + sizeof(struct ip6_hdr);
64154dfc97bSShailend Chand 		is_tcp = (ip6->ip6_nxt == IPPROTO_TCP);
64254dfc97bSShailend Chand 		is_udp = (ip6->ip6_nxt == IPPROTO_UDP);
64354dfc97bSShailend Chand 		mbuf_next = m_getptr(mbuf, l4_off, &offset);
64454dfc97bSShailend Chand 	} else if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
64554dfc97bSShailend Chand 		ip = (struct ip *)(mtodo(mbuf_next, offset));
64654dfc97bSShailend Chand 		l4_off = l3_off + (ip->ip_hl << 2);
64754dfc97bSShailend Chand 		is_tcp = (ip->ip_p == IPPROTO_TCP);
64854dfc97bSShailend Chand 		is_udp = (ip->ip_p == IPPROTO_UDP);
64954dfc97bSShailend Chand 		mbuf_next = m_getptr(mbuf, l4_off, &offset);
65054dfc97bSShailend Chand 	}
65154dfc97bSShailend Chand 
65254dfc97bSShailend Chand 	l4_data_off = 0;
65354dfc97bSShailend Chand 	if (is_tcp) {
65454dfc97bSShailend Chand 		th = (struct tcphdr *)(mtodo(mbuf_next, offset));
65554dfc97bSShailend Chand 		l4_data_off = l4_off + (th->th_off << 2);
65654dfc97bSShailend Chand 	} else if (is_udp)
65754dfc97bSShailend Chand 		l4_data_off = l4_off + sizeof(struct udphdr);
65854dfc97bSShailend Chand 
65954dfc97bSShailend Chand 	if (has_csum_flag) {
66054dfc97bSShailend Chand 		if ((csum_flags & (CSUM_TSO | CSUM_TCP | CSUM_IP6_TCP)) != 0)
66154dfc97bSShailend Chand 			csum_offset = offsetof(struct tcphdr, th_sum);
66254dfc97bSShailend Chand 		else
66354dfc97bSShailend Chand 			csum_offset = offsetof(struct udphdr, uh_sum);
66454dfc97bSShailend Chand 	}
66554dfc97bSShailend Chand 
66654dfc97bSShailend Chand 	/*
66754dfc97bSShailend Chand 	 * If this packet is neither a TCP nor a UDP packet, the first segment,
66854dfc97bSShailend Chand 	 * the one represented by the packet descriptor, will carry the
66954dfc97bSShailend Chand 	 * spec-stipulated minimum of 182B.
67054dfc97bSShailend Chand 	 */
67154dfc97bSShailend Chand 	if (l4_data_off != 0)
67254dfc97bSShailend Chand 		first_seg_len = l4_data_off;
67354dfc97bSShailend Chand 	else
67454dfc97bSShailend Chand 		first_seg_len = MIN(pkt_len, GVE_GQ_TX_MIN_PKT_DESC_BYTES);
67554dfc97bSShailend Chand 
67654dfc97bSShailend Chand 	bytes_required = gve_fifo_bytes_required(tx, first_seg_len, pkt_len);
67754dfc97bSShailend Chand 	if (__predict_false(!gve_can_tx(tx, bytes_required))) {
67854dfc97bSShailend Chand 		counter_enter();
67940097cd6SShailend Chand 		counter_u64_add_protected(tx->stats.tx_delayed_pkt_nospace_device, 1);
68054dfc97bSShailend Chand 		counter_exit();
68154dfc97bSShailend Chand 		return (ENOBUFS);
68254dfc97bSShailend Chand 	}
68354dfc97bSShailend Chand 
68454dfc97bSShailend Chand 	/* So that the cleanup taskqueue can free the mbuf eventually. */
68554dfc97bSShailend Chand 	info->mbuf = mbuf;
68654dfc97bSShailend Chand 
68754dfc97bSShailend Chand 	/*
68854dfc97bSShailend Chand 	 * We don't want to split the header, so if necessary, pad to the end
68954dfc97bSShailend Chand 	 * of the fifo and then put the header at the beginning of the fifo.
69054dfc97bSShailend Chand 	 */
69154dfc97bSShailend Chand 	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->fifo, first_seg_len);
69254dfc97bSShailend Chand 	hdr_nfrags = gve_tx_alloc_fifo(&tx->fifo, first_seg_len + pad_bytes,
69354dfc97bSShailend Chand 	    &info->iov[0]);
69454dfc97bSShailend Chand 	KASSERT(hdr_nfrags > 0, ("Number of header fragments for gve tx is 0"));
69554dfc97bSShailend Chand 	payload_nfrags = gve_tx_alloc_fifo(&tx->fifo, pkt_len - first_seg_len,
69654dfc97bSShailend Chand 	    &info->iov[payload_iov]);
69754dfc97bSShailend Chand 
69854dfc97bSShailend Chand 	pkt_desc = &tx->desc_ring[idx].pkt;
69954dfc97bSShailend Chand 	gve_tx_fill_pkt_desc(pkt_desc, is_tso, l4_off,
70054dfc97bSShailend Chand 	    1 + mtd_desc_nr + payload_nfrags, first_seg_len,
70154dfc97bSShailend Chand 	    info->iov[hdr_nfrags - 1].iov_offset, has_csum_flag, csum_offset,
70254dfc97bSShailend Chand 	    pkt_len);
70354dfc97bSShailend Chand 
70454dfc97bSShailend Chand 	m_copydata(mbuf, 0, first_seg_len,
70554dfc97bSShailend Chand 	    (char *)tx->fifo.base + info->iov[hdr_nfrags - 1].iov_offset);
70654dfc97bSShailend Chand 	gve_dma_sync_for_device(tx->com.qpl,
70754dfc97bSShailend Chand 	    info->iov[hdr_nfrags - 1].iov_offset,
70854dfc97bSShailend Chand 	    info->iov[hdr_nfrags - 1].iov_len);
70954dfc97bSShailend Chand 	copy_offset = first_seg_len;
71054dfc97bSShailend Chand 
71154dfc97bSShailend Chand 	if (mtd_desc_nr == 1) {
71254dfc97bSShailend Chand 		next_idx = (tx->req + 1) & tx->mask;
71354dfc97bSShailend Chand 		mtd_desc = &tx->desc_ring[next_idx].mtd;
71454dfc97bSShailend Chand 		gve_tx_fill_mtd_desc(mtd_desc, mbuf);
71554dfc97bSShailend Chand 	}
71654dfc97bSShailend Chand 
71754dfc97bSShailend Chand 	for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
71854dfc97bSShailend Chand 		next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask;
71954dfc97bSShailend Chand 		seg_desc = &tx->desc_ring[next_idx].seg;
72054dfc97bSShailend Chand 
72154dfc97bSShailend Chand 		gve_tx_fill_seg_desc(seg_desc, is_tso, info->iov[i].iov_len,
72254dfc97bSShailend Chand 		    info->iov[i].iov_offset, is_ipv6, l3_off, tso_mss);
72354dfc97bSShailend Chand 
72454dfc97bSShailend Chand 		m_copydata(mbuf, copy_offset, info->iov[i].iov_len,
72554dfc97bSShailend Chand 		    (char *)tx->fifo.base + info->iov[i].iov_offset);
72654dfc97bSShailend Chand 		gve_dma_sync_for_device(tx->com.qpl,
72754dfc97bSShailend Chand 		    info->iov[i].iov_offset, info->iov[i].iov_len);
72854dfc97bSShailend Chand 		copy_offset += info->iov[i].iov_len;
72954dfc97bSShailend Chand 	}
73054dfc97bSShailend Chand 
73154dfc97bSShailend Chand 	tx->req += (1 + mtd_desc_nr + payload_nfrags);
73254dfc97bSShailend Chand 	if (is_tso) {
73354dfc97bSShailend Chand 		counter_enter();
73454dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
73554dfc97bSShailend Chand 		counter_exit();
73654dfc97bSShailend Chand 	}
73754dfc97bSShailend Chand 	return (0);
73854dfc97bSShailend Chand }
73954dfc97bSShailend Chand 
74040097cd6SShailend Chand static int
gve_xmit_mbuf(struct gve_tx_ring * tx,struct mbuf ** mbuf)74140097cd6SShailend Chand gve_xmit_mbuf(struct gve_tx_ring *tx,
74240097cd6SShailend Chand     struct mbuf **mbuf)
74340097cd6SShailend Chand {
74440097cd6SShailend Chand 	if (gve_is_gqi(tx->com.priv))
74540097cd6SShailend Chand 		return (gve_xmit(tx, *mbuf));
74640097cd6SShailend Chand 
74740097cd6SShailend Chand 	if (gve_is_qpl(tx->com.priv))
74840097cd6SShailend Chand 		return (gve_xmit_dqo_qpl(tx, *mbuf));
74940097cd6SShailend Chand 
75040097cd6SShailend Chand 	/*
75140097cd6SShailend Chand 	 * gve_xmit_dqo might attempt to defrag the mbuf chain.
75240097cd6SShailend Chand 	 * The reference is passed in so that in the case of
75340097cd6SShailend Chand 	 * errors, the new mbuf chain is what's put back on the br.
75440097cd6SShailend Chand 	 */
75540097cd6SShailend Chand 	return (gve_xmit_dqo(tx, mbuf));
75640097cd6SShailend Chand }
75740097cd6SShailend Chand 
75840097cd6SShailend Chand /*
75940097cd6SShailend Chand  * Has the side-effect of stopping the xmit queue by setting tx->stopped
76040097cd6SShailend Chand  */
76140097cd6SShailend Chand static int
gve_xmit_retry_enobuf_mbuf(struct gve_tx_ring * tx,struct mbuf ** mbuf)76240097cd6SShailend Chand gve_xmit_retry_enobuf_mbuf(struct gve_tx_ring *tx,
76340097cd6SShailend Chand     struct mbuf **mbuf)
76440097cd6SShailend Chand {
76540097cd6SShailend Chand 	int err;
76640097cd6SShailend Chand 
76740097cd6SShailend Chand 	atomic_store_bool(&tx->stopped, true);
76840097cd6SShailend Chand 
76940097cd6SShailend Chand 	/*
77040097cd6SShailend Chand 	 * Room made in the queue BEFORE the barrier will be seen by the
77140097cd6SShailend Chand 	 * gve_xmit_mbuf retry below.
77240097cd6SShailend Chand 	 *
77340097cd6SShailend Chand 	 * If room is made in the queue AFTER the barrier, the cleanup tq
77440097cd6SShailend Chand 	 * iteration creating the room will either see a tx->stopped value
77540097cd6SShailend Chand 	 * of 0 or the 1 we just wrote:
77640097cd6SShailend Chand 	 *
77740097cd6SShailend Chand 	 *   If it sees a 1, then it would enqueue the xmit tq. Enqueue
77840097cd6SShailend Chand 	 *   implies a retry on the waiting pkt.
77940097cd6SShailend Chand 	 *
78040097cd6SShailend Chand 	 *   If it sees a 0, then that implies a previous iteration overwrote
78140097cd6SShailend Chand 	 *   our 1, and that iteration would enqueue the xmit tq. Enqueue
78240097cd6SShailend Chand 	 *   implies a retry on the waiting pkt.
78340097cd6SShailend Chand 	 */
78440097cd6SShailend Chand 	atomic_thread_fence_seq_cst();
78540097cd6SShailend Chand 
78640097cd6SShailend Chand 	err = gve_xmit_mbuf(tx, mbuf);
78740097cd6SShailend Chand 	if (err == 0)
78840097cd6SShailend Chand 		atomic_store_bool(&tx->stopped, false);
78940097cd6SShailend Chand 
79040097cd6SShailend Chand 	return (err);
79140097cd6SShailend Chand }
79240097cd6SShailend Chand 
79354dfc97bSShailend Chand static void
gve_xmit_br(struct gve_tx_ring * tx)79454dfc97bSShailend Chand gve_xmit_br(struct gve_tx_ring *tx)
79554dfc97bSShailend Chand {
79654dfc97bSShailend Chand 	struct gve_priv *priv = tx->com.priv;
79754dfc97bSShailend Chand 	struct ifnet *ifp = priv->ifp;
79854dfc97bSShailend Chand 	struct mbuf *mbuf;
799d438b4efSShailend Chand 	int err;
80054dfc97bSShailend Chand 
801543cf924SShailend Chand 	while ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0 &&
802543cf924SShailend Chand 	    (mbuf = drbr_peek(ifp, tx->br)) != NULL) {
80340097cd6SShailend Chand 		err = gve_xmit_mbuf(tx, &mbuf);
80454dfc97bSShailend Chand 
805d438b4efSShailend Chand 		/*
80640097cd6SShailend Chand 		 * We need to stop this taskqueue when we can't xmit the pkt due
80740097cd6SShailend Chand 		 * to lack of space in the NIC ring (ENOBUFS). The retry exists
80840097cd6SShailend Chand 		 * to guard against a TOCTTOU bug that could end up freezing the
80940097cd6SShailend Chand 		 * queue forever.
810d438b4efSShailend Chand 		 */
81140097cd6SShailend Chand 		if (__predict_false(mbuf != NULL && err == ENOBUFS))
81240097cd6SShailend Chand 			err = gve_xmit_retry_enobuf_mbuf(tx, &mbuf);
813d438b4efSShailend Chand 
814d438b4efSShailend Chand 		if (__predict_false(err != 0 && mbuf != NULL)) {
81540097cd6SShailend Chand 			if (err == EINVAL) {
81640097cd6SShailend Chand 				drbr_advance(ifp, tx->br);
81740097cd6SShailend Chand 				m_freem(mbuf);
81840097cd6SShailend Chand 			} else
81954dfc97bSShailend Chand 				drbr_putback(ifp, tx->br, mbuf);
82054dfc97bSShailend Chand 			break;
82154dfc97bSShailend Chand 		}
82254dfc97bSShailend Chand 
82374861578SShailend Chand 		drbr_advance(ifp, tx->br);
82474861578SShailend Chand 		BPF_MTAP(ifp, mbuf);
82574861578SShailend Chand 
82654dfc97bSShailend Chand 		bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
82754dfc97bSShailend Chand 		    BUS_DMASYNC_PREWRITE);
828d438b4efSShailend Chand 
829d438b4efSShailend Chand 		if (gve_is_gqi(priv))
83054dfc97bSShailend Chand 			gve_db_bar_write_4(priv, tx->com.db_offset, tx->req);
831d438b4efSShailend Chand 		else
832d438b4efSShailend Chand 			gve_db_bar_dqo_write_4(priv, tx->com.db_offset,
833d438b4efSShailend Chand 			    tx->dqo.desc_tail);
83454dfc97bSShailend Chand 	}
83554dfc97bSShailend Chand }
83654dfc97bSShailend Chand 
83754dfc97bSShailend Chand void
gve_xmit_tq(void * arg,int pending)83854dfc97bSShailend Chand gve_xmit_tq(void *arg, int pending)
83954dfc97bSShailend Chand {
84054dfc97bSShailend Chand 	struct gve_tx_ring *tx = (struct gve_tx_ring *)arg;
84154dfc97bSShailend Chand 
84254dfc97bSShailend Chand 	GVE_RING_LOCK(tx);
84354dfc97bSShailend Chand 	gve_xmit_br(tx);
84454dfc97bSShailend Chand 	GVE_RING_UNLOCK(tx);
84554dfc97bSShailend Chand }
84654dfc97bSShailend Chand 
84754dfc97bSShailend Chand static bool
is_vlan_tagged_pkt(struct mbuf * mbuf)84854dfc97bSShailend Chand is_vlan_tagged_pkt(struct mbuf *mbuf)
84954dfc97bSShailend Chand {
85054dfc97bSShailend Chand 	struct ether_header *eh;
85154dfc97bSShailend Chand 
85254dfc97bSShailend Chand 	eh = mtod(mbuf, struct ether_header *);
85354dfc97bSShailend Chand 	return (ntohs(eh->ether_type) == ETHERTYPE_VLAN);
85454dfc97bSShailend Chand }
85554dfc97bSShailend Chand 
85654dfc97bSShailend Chand int
gve_xmit_ifp(if_t ifp,struct mbuf * mbuf)85754dfc97bSShailend Chand gve_xmit_ifp(if_t ifp, struct mbuf *mbuf)
85854dfc97bSShailend Chand {
85954dfc97bSShailend Chand 	struct gve_priv *priv = if_getsoftc(ifp);
86054dfc97bSShailend Chand 	struct gve_tx_ring *tx;
86154dfc97bSShailend Chand 	bool is_br_empty;
86254dfc97bSShailend Chand 	int err;
86354dfc97bSShailend Chand 	uint32_t i;
86454dfc97bSShailend Chand 
86554dfc97bSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
86654dfc97bSShailend Chand 		return (ENODEV);
86754dfc97bSShailend Chand 
86854dfc97bSShailend Chand 	if (M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE)
86954dfc97bSShailend Chand 		i = mbuf->m_pkthdr.flowid % priv->tx_cfg.num_queues;
87054dfc97bSShailend Chand 	else
87154dfc97bSShailend Chand 		i = curcpu % priv->tx_cfg.num_queues;
87254dfc97bSShailend Chand 	tx = &priv->tx[i];
87354dfc97bSShailend Chand 
87454dfc97bSShailend Chand 	if (__predict_false(is_vlan_tagged_pkt(mbuf))) {
87554dfc97bSShailend Chand 		counter_enter();
87654dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tx_dropped_pkt_vlan, 1);
87754dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
87854dfc97bSShailend Chand 		counter_exit();
87954dfc97bSShailend Chand 		m_freem(mbuf);
88054dfc97bSShailend Chand 		return (ENODEV);
88154dfc97bSShailend Chand 	}
88254dfc97bSShailend Chand 
88354dfc97bSShailend Chand 	is_br_empty = drbr_empty(ifp, tx->br);
88454dfc97bSShailend Chand 	err = drbr_enqueue(ifp, tx->br, mbuf);
88554dfc97bSShailend Chand 	if (__predict_false(err != 0)) {
88640097cd6SShailend Chand 		if (!atomic_load_bool(&tx->stopped))
88754dfc97bSShailend Chand 			taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
88854dfc97bSShailend Chand 		counter_enter();
88954dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_bufring, 1);
89054dfc97bSShailend Chand 		counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1);
89154dfc97bSShailend Chand 		counter_exit();
89254dfc97bSShailend Chand 		return (err);
89354dfc97bSShailend Chand 	}
89454dfc97bSShailend Chand 
89554dfc97bSShailend Chand 	/*
89654dfc97bSShailend Chand 	 * If the mbuf we just enqueued is the only one on the ring, then
89754dfc97bSShailend Chand 	 * transmit it right away in the interests of low latency.
89854dfc97bSShailend Chand 	 */
89954dfc97bSShailend Chand 	if (is_br_empty && (GVE_RING_TRYLOCK(tx) != 0)) {
90054dfc97bSShailend Chand 		gve_xmit_br(tx);
90154dfc97bSShailend Chand 		GVE_RING_UNLOCK(tx);
90240097cd6SShailend Chand 	} else if (!atomic_load_bool(&tx->stopped))
90354dfc97bSShailend Chand 		taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
90454dfc97bSShailend Chand 
90554dfc97bSShailend Chand 	return (0);
90654dfc97bSShailend Chand }
90754dfc97bSShailend Chand 
90854dfc97bSShailend Chand void
gve_qflush(if_t ifp)90954dfc97bSShailend Chand gve_qflush(if_t ifp)
91054dfc97bSShailend Chand {
91154dfc97bSShailend Chand 	struct gve_priv *priv = if_getsoftc(ifp);
91254dfc97bSShailend Chand 	struct gve_tx_ring *tx;
91354dfc97bSShailend Chand 	int i;
91454dfc97bSShailend Chand 
91554dfc97bSShailend Chand 	for (i = 0; i < priv->tx_cfg.num_queues; ++i) {
91654dfc97bSShailend Chand 		tx = &priv->tx[i];
91754dfc97bSShailend Chand 		if (drbr_empty(ifp, tx->br) == 0) {
91854dfc97bSShailend Chand 			GVE_RING_LOCK(tx);
91954dfc97bSShailend Chand 			drbr_flush(ifp, tx->br);
92054dfc97bSShailend Chand 			GVE_RING_UNLOCK(tx);
92154dfc97bSShailend Chand 		}
92254dfc97bSShailend Chand 	}
92354dfc97bSShailend Chand 
92454dfc97bSShailend Chand 	if_qflush(ifp);
92554dfc97bSShailend Chand }
926