xref: /freebsd/sys/dev/gve/gve_rx_dqo.c (revision 031800c786823a9ad4c4d2f79f217d42dad3f5d1)
1d438b4efSShailend Chand /*-
2d438b4efSShailend Chand  * SPDX-License-Identifier: BSD-3-Clause
3d438b4efSShailend Chand  *
4d438b4efSShailend Chand  * Copyright (c) 2024 Google LLC
5d438b4efSShailend Chand  *
6d438b4efSShailend Chand  * Redistribution and use in source and binary forms, with or without modification,
7d438b4efSShailend Chand  * are permitted provided that the following conditions are met:
8d438b4efSShailend Chand  *
9d438b4efSShailend Chand  * 1. Redistributions of source code must retain the above copyright notice, this
10d438b4efSShailend Chand  *    list of conditions and the following disclaimer.
11d438b4efSShailend Chand  *
12d438b4efSShailend Chand  * 2. Redistributions in binary form must reproduce the above copyright notice,
13d438b4efSShailend Chand  *    this list of conditions and the following disclaimer in the documentation
14d438b4efSShailend Chand  *    and/or other materials provided with the distribution.
15d438b4efSShailend Chand  *
16d438b4efSShailend Chand  * 3. Neither the name of the copyright holder nor the names of its contributors
17d438b4efSShailend Chand  *    may be used to endorse or promote products derived from this software without
18d438b4efSShailend Chand  *    specific prior written permission.
19d438b4efSShailend Chand  *
20d438b4efSShailend Chand  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21d438b4efSShailend Chand  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22d438b4efSShailend Chand  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23d438b4efSShailend Chand  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24d438b4efSShailend Chand  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25d438b4efSShailend Chand  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26d438b4efSShailend Chand  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27d438b4efSShailend Chand  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28d438b4efSShailend Chand  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29d438b4efSShailend Chand  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30d438b4efSShailend Chand  */
31d438b4efSShailend Chand #include "gve.h"
32d438b4efSShailend Chand #include "gve_adminq.h"
33d438b4efSShailend Chand #include "gve_dqo.h"
34d438b4efSShailend Chand 
35d438b4efSShailend Chand static void
36d438b4efSShailend Chand gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37d438b4efSShailend Chand {
38d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
39d438b4efSShailend Chand 	int i;
40d438b4efSShailend Chand 
412348ac89SShailend Chand 	if (gve_is_qpl(rx->com.priv))
422348ac89SShailend Chand 		return;
432348ac89SShailend Chand 
44d438b4efSShailend Chand 	for (i = 0; i < rx->dqo.buf_cnt; i++) {
45d438b4efSShailend Chand 		buf = &rx->dqo.bufs[i];
46d438b4efSShailend Chand 		if (!buf->mbuf)
47d438b4efSShailend Chand 			continue;
48d438b4efSShailend Chand 
49d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
50d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
51d438b4efSShailend Chand 		bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
52d438b4efSShailend Chand 		m_freem(buf->mbuf);
53d438b4efSShailend Chand 		buf->mbuf = NULL;
54d438b4efSShailend Chand 	}
55d438b4efSShailend Chand }
56d438b4efSShailend Chand 
57d438b4efSShailend Chand void
58d438b4efSShailend Chand gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
59d438b4efSShailend Chand {
60d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
61d438b4efSShailend Chand 	int j;
62d438b4efSShailend Chand 
63d438b4efSShailend Chand 	if (rx->dqo.compl_ring != NULL) {
64d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
65d438b4efSShailend Chand 		rx->dqo.compl_ring = NULL;
66d438b4efSShailend Chand 	}
67d438b4efSShailend Chand 
68d438b4efSShailend Chand 	if (rx->dqo.desc_ring != NULL) {
69d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->desc_ring_mem);
70d438b4efSShailend Chand 		rx->dqo.desc_ring = NULL;
71d438b4efSShailend Chand 	}
72d438b4efSShailend Chand 
73d438b4efSShailend Chand 	if (rx->dqo.bufs != NULL) {
74d438b4efSShailend Chand 		gve_free_rx_mbufs_dqo(rx);
75d438b4efSShailend Chand 
762348ac89SShailend Chand 		if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
77d438b4efSShailend Chand 			for (j = 0; j < rx->dqo.buf_cnt; j++)
78d438b4efSShailend Chand 				if (rx->dqo.bufs[j].mapped)
79d438b4efSShailend Chand 					bus_dmamap_destroy(rx->dqo.buf_dmatag,
80d438b4efSShailend Chand 					    rx->dqo.bufs[j].dmamap);
81d438b4efSShailend Chand 		}
82d438b4efSShailend Chand 
83d438b4efSShailend Chand 		free(rx->dqo.bufs, M_GVE);
84d438b4efSShailend Chand 		rx->dqo.bufs = NULL;
85d438b4efSShailend Chand 	}
86d438b4efSShailend Chand 
872348ac89SShailend Chand 	if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
88d438b4efSShailend Chand 		bus_dma_tag_destroy(rx->dqo.buf_dmatag);
89d438b4efSShailend Chand }
90d438b4efSShailend Chand 
91d438b4efSShailend Chand int
92d438b4efSShailend Chand gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
93d438b4efSShailend Chand {
94d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
95d438b4efSShailend Chand 	int err;
96d438b4efSShailend Chand 	int j;
97d438b4efSShailend Chand 
98d438b4efSShailend Chand 	err = gve_dma_alloc_coherent(priv,
99d438b4efSShailend Chand 	    sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
100d438b4efSShailend Chand 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
101d438b4efSShailend Chand 	if (err != 0) {
102d438b4efSShailend Chand 		device_printf(priv->dev,
103d438b4efSShailend Chand 		    "Failed to alloc desc ring for rx ring %d", i);
104d438b4efSShailend Chand 		goto abort;
105d438b4efSShailend Chand 	}
106d438b4efSShailend Chand 	rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
107d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
108d438b4efSShailend Chand 
1092348ac89SShailend Chand 	err = gve_dma_alloc_coherent(priv,
1102348ac89SShailend Chand 	    sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
1112348ac89SShailend Chand 	    CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
1122348ac89SShailend Chand 	if (err != 0) {
1132348ac89SShailend Chand 		device_printf(priv->dev,
1142348ac89SShailend Chand 		    "Failed to alloc compl ring for rx ring %d", i);
1152348ac89SShailend Chand 		goto abort;
1162348ac89SShailend Chand 	}
1172348ac89SShailend Chand 	rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
1182348ac89SShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
1192348ac89SShailend Chand 
1202348ac89SShailend Chand 	rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
1212348ac89SShailend Chand 	    priv->rx_desc_cnt;
1222348ac89SShailend Chand 	rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
1232348ac89SShailend Chand 	    M_GVE, M_WAITOK | M_ZERO);
1242348ac89SShailend Chand 
1252348ac89SShailend Chand 	if (gve_is_qpl(priv)) {
1262348ac89SShailend Chand 		rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
1272348ac89SShailend Chand 		if (rx->com.qpl == NULL) {
1282348ac89SShailend Chand 			device_printf(priv->dev, "No QPL left for rx ring %d", i);
1292348ac89SShailend Chand 			return (ENOMEM);
1302348ac89SShailend Chand 		}
1312348ac89SShailend Chand 		return (0);
1322348ac89SShailend Chand 	}
1332348ac89SShailend Chand 
134d438b4efSShailend Chand 	err = bus_dma_tag_create(
135d438b4efSShailend Chand 	    bus_get_dma_tag(priv->dev),	/* parent */
136d438b4efSShailend Chand 	    1, 0,			/* alignment, bounds */
137d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* lowaddr */
138d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* highaddr */
139d438b4efSShailend Chand 	    NULL, NULL,			/* filter, filterarg */
140d438b4efSShailend Chand 	    MCLBYTES,			/* maxsize */
141d438b4efSShailend Chand 	    1,				/* nsegments */
142d438b4efSShailend Chand 	    MCLBYTES,			/* maxsegsize */
143d438b4efSShailend Chand 	    0,				/* flags */
144d438b4efSShailend Chand 	    NULL,			/* lockfunc */
145d438b4efSShailend Chand 	    NULL,			/* lockarg */
146d438b4efSShailend Chand 	    &rx->dqo.buf_dmatag);
147d438b4efSShailend Chand 	if (err != 0) {
148d438b4efSShailend Chand 		device_printf(priv->dev,
149d438b4efSShailend Chand 		    "%s: bus_dma_tag_create failed: %d\n",
150d438b4efSShailend Chand 		    __func__, err);
151d438b4efSShailend Chand 		goto abort;
152d438b4efSShailend Chand 	}
153d438b4efSShailend Chand 
154d438b4efSShailend Chand 	for (j = 0; j < rx->dqo.buf_cnt; j++) {
155d438b4efSShailend Chand 		err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
156d438b4efSShailend Chand 		    &rx->dqo.bufs[j].dmamap);
157d438b4efSShailend Chand 		if (err != 0) {
158d438b4efSShailend Chand 			device_printf(priv->dev,
159d438b4efSShailend Chand 			    "err in creating rx buf dmamap %d: %d",
160d438b4efSShailend Chand 			    j, err);
161d438b4efSShailend Chand 			goto abort;
162d438b4efSShailend Chand 		}
163d438b4efSShailend Chand 		rx->dqo.bufs[j].mapped = true;
164d438b4efSShailend Chand 	}
165d438b4efSShailend Chand 
166d438b4efSShailend Chand 	return (0);
167d438b4efSShailend Chand 
168d438b4efSShailend Chand abort:
169d438b4efSShailend Chand 	gve_rx_free_ring_dqo(priv, i);
170d438b4efSShailend Chand 	return (err);
171d438b4efSShailend Chand }
172d438b4efSShailend Chand 
173d438b4efSShailend Chand static void
174d438b4efSShailend Chand gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
175d438b4efSShailend Chand {
176d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
177d438b4efSShailend Chand 	int entries;
178d438b4efSShailend Chand 	int i;
179d438b4efSShailend Chand 
180d438b4efSShailend Chand 	entries = com->priv->rx_desc_cnt;
181d438b4efSShailend Chand 	for (i = 0; i < entries; i++)
182d438b4efSShailend Chand 		rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
183d438b4efSShailend Chand 
184d438b4efSShailend Chand 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
185d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
186d438b4efSShailend Chand }
187d438b4efSShailend Chand 
188d438b4efSShailend Chand static void
189d438b4efSShailend Chand gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
190d438b4efSShailend Chand {
191d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
192d438b4efSShailend Chand 	int i;
193d438b4efSShailend Chand 
194d438b4efSShailend Chand 	for (i = 0; i < com->priv->rx_desc_cnt; i++)
195d438b4efSShailend Chand 		rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
196d438b4efSShailend Chand 
197d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
198d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
199d438b4efSShailend Chand }
200d438b4efSShailend Chand 
201d438b4efSShailend Chand void
202d438b4efSShailend Chand gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
203d438b4efSShailend Chand {
204d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
205d438b4efSShailend Chand 	int j;
206d438b4efSShailend Chand 
207d438b4efSShailend Chand 	rx->fill_cnt = 0;
208d438b4efSShailend Chand 	rx->cnt = 0;
209d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
210d438b4efSShailend Chand 	rx->dqo.head = 0;
211d438b4efSShailend Chand 	rx->dqo.tail = 0;
212d438b4efSShailend Chand 	rx->dqo.cur_gen_bit = 0;
213d438b4efSShailend Chand 
214d438b4efSShailend Chand 	gve_rx_clear_desc_ring_dqo(rx);
215d438b4efSShailend Chand 	gve_rx_clear_compl_ring_dqo(rx);
216d438b4efSShailend Chand 
217d438b4efSShailend Chand 	gve_free_rx_mbufs_dqo(rx);
218d438b4efSShailend Chand 
2192348ac89SShailend Chand 	if (gve_is_qpl(priv)) {
2202348ac89SShailend Chand 		SLIST_INIT(&rx->dqo.free_bufs);
2212348ac89SShailend Chand 		STAILQ_INIT(&rx->dqo.used_bufs);
2222348ac89SShailend Chand 
2232348ac89SShailend Chand 		for (j = 0; j < rx->dqo.buf_cnt; j++) {
2242348ac89SShailend Chand 			struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
2252348ac89SShailend Chand 
2262348ac89SShailend Chand 			vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
2272348ac89SShailend Chand 			u_int ref_count = atomic_load_int(&page->ref_count);
2282348ac89SShailend Chand 
2292348ac89SShailend Chand 			/*
2302348ac89SShailend Chand 			 * An ifconfig down+up might see pages still in flight
2312348ac89SShailend Chand 			 * from the previous innings.
2322348ac89SShailend Chand 			 */
2332348ac89SShailend Chand 			if (VPRC_WIRE_COUNT(ref_count) == 1)
2342348ac89SShailend Chand 				SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
2352348ac89SShailend Chand 				    buf, slist_entry);
2362348ac89SShailend Chand 			else
2372348ac89SShailend Chand 				STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
2382348ac89SShailend Chand 				    buf, stailq_entry);
2392348ac89SShailend Chand 
2402348ac89SShailend Chand 			buf->num_nic_frags = 0;
2412348ac89SShailend Chand 			buf->next_idx = 0;
2422348ac89SShailend Chand 		}
2432348ac89SShailend Chand 	} else {
244d438b4efSShailend Chand 		SLIST_INIT(&rx->dqo.free_bufs);
245d438b4efSShailend Chand 		for (j = 0; j < rx->dqo.buf_cnt; j++)
246d438b4efSShailend Chand 			SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
247d438b4efSShailend Chand 			    &rx->dqo.bufs[j], slist_entry);
248d438b4efSShailend Chand 	}
2492348ac89SShailend Chand }
250d438b4efSShailend Chand 
251d438b4efSShailend Chand int
252d438b4efSShailend Chand gve_rx_intr_dqo(void *arg)
253d438b4efSShailend Chand {
254d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
255d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
256d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
257d438b4efSShailend Chand 
258d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
259d438b4efSShailend Chand 		return (FILTER_STRAY);
260d438b4efSShailend Chand 
261d438b4efSShailend Chand 	/* Interrupts are automatically masked */
262d438b4efSShailend Chand 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
263d438b4efSShailend Chand 	return (FILTER_HANDLED);
264d438b4efSShailend Chand }
265d438b4efSShailend Chand 
266d438b4efSShailend Chand static void
2672348ac89SShailend Chand gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
2682348ac89SShailend Chand {
2692348ac89SShailend Chand 	rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
2702348ac89SShailend Chand 	rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
2712348ac89SShailend Chand 
2722348ac89SShailend Chand 	if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
2732348ac89SShailend Chand 		bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
2742348ac89SShailend Chand 		    BUS_DMASYNC_PREWRITE);
2752348ac89SShailend Chand 		gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
2762348ac89SShailend Chand 		    rx->dqo.head);
2772348ac89SShailend Chand 	}
2782348ac89SShailend Chand }
2792348ac89SShailend Chand 
2802348ac89SShailend Chand static void
281d438b4efSShailend Chand gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
282d438b4efSShailend Chand {
283d438b4efSShailend Chand 	struct gve_rx_desc_dqo *desc;
284d438b4efSShailend Chand 
285d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
286d438b4efSShailend Chand 	    BUS_DMASYNC_PREREAD);
287d438b4efSShailend Chand 
288d438b4efSShailend Chand 	desc = &rx->dqo.desc_ring[rx->dqo.head];
289d438b4efSShailend Chand 	desc->buf_id = htole16(buf - rx->dqo.bufs);
290d438b4efSShailend Chand 	desc->buf_addr = htole64(buf->addr);
291d438b4efSShailend Chand 
2922348ac89SShailend Chand 	gve_rx_advance_head_dqo(rx);
293d438b4efSShailend Chand }
294d438b4efSShailend Chand 
295d438b4efSShailend Chand static int
296d438b4efSShailend Chand gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
297d438b4efSShailend Chand {
298d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
299d438b4efSShailend Chand 	bus_dma_segment_t segs[1];
300d438b4efSShailend Chand 	int nsegs;
301d438b4efSShailend Chand 	int err;
302d438b4efSShailend Chand 
303d438b4efSShailend Chand 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
304d438b4efSShailend Chand 	if (__predict_false(!buf)) {
305d438b4efSShailend Chand 		device_printf(rx->com.priv->dev,
306d438b4efSShailend Chand 		    "Unexpected empty free bufs list\n");
307d438b4efSShailend Chand 		return (ENOBUFS);
308d438b4efSShailend Chand 	}
309d438b4efSShailend Chand 	SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
310d438b4efSShailend Chand 
311d438b4efSShailend Chand 	buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
312d438b4efSShailend Chand 	if (__predict_false(!buf->mbuf)) {
313d438b4efSShailend Chand 		err = ENOMEM;
314d438b4efSShailend Chand 		counter_enter();
315d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
316d438b4efSShailend Chand 		counter_exit();
317d438b4efSShailend Chand 		goto abort_with_buf;
318d438b4efSShailend Chand 	}
319d438b4efSShailend Chand 	buf->mbuf->m_len = MCLBYTES;
320d438b4efSShailend Chand 
321d438b4efSShailend Chand 	err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
322d438b4efSShailend Chand 	    buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
323d438b4efSShailend Chand 	KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
324d438b4efSShailend Chand 	if (__predict_false(err != 0)) {
325d438b4efSShailend Chand 		counter_enter();
326d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
327d438b4efSShailend Chand 		counter_exit();
328d438b4efSShailend Chand 		goto abort_with_mbuf;
329d438b4efSShailend Chand 	}
330d438b4efSShailend Chand 	buf->addr = segs[0].ds_addr;
331d438b4efSShailend Chand 
332d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
333d438b4efSShailend Chand 	return (0);
334d438b4efSShailend Chand 
335d438b4efSShailend Chand abort_with_mbuf:
336d438b4efSShailend Chand 	m_freem(buf->mbuf);
337d438b4efSShailend Chand 	buf->mbuf = NULL;
338d438b4efSShailend Chand abort_with_buf:
339d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
340d438b4efSShailend Chand 	return (err);
341d438b4efSShailend Chand }
342d438b4efSShailend Chand 
3432348ac89SShailend Chand static struct gve_dma_handle *
3442348ac89SShailend Chand gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
3452348ac89SShailend Chand {
3462348ac89SShailend Chand 	return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
3472348ac89SShailend Chand }
3482348ac89SShailend Chand 
3492348ac89SShailend Chand static void
3502348ac89SShailend Chand gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
3512348ac89SShailend Chand     uint8_t frag_num)
3522348ac89SShailend Chand {
3532348ac89SShailend Chand 	struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
3542348ac89SShailend Chand 	union gve_rx_qpl_buf_id_dqo composed_id;
3552348ac89SShailend Chand 	struct gve_dma_handle *page_dma_handle;
3562348ac89SShailend Chand 
3572348ac89SShailend Chand 	composed_id.buf_id = buf - rx->dqo.bufs;
3582348ac89SShailend Chand 	composed_id.frag_num = frag_num;
3592348ac89SShailend Chand 	desc->buf_id = htole16(composed_id.all);
3602348ac89SShailend Chand 
3612348ac89SShailend Chand 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
3622348ac89SShailend Chand 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
3632348ac89SShailend Chand 	    BUS_DMASYNC_PREREAD);
3642348ac89SShailend Chand 	desc->buf_addr = htole64(page_dma_handle->bus_addr +
3652348ac89SShailend Chand 	    frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
3662348ac89SShailend Chand 
3672348ac89SShailend Chand 	buf->num_nic_frags++;
3682348ac89SShailend Chand 	gve_rx_advance_head_dqo(rx);
3692348ac89SShailend Chand }
3702348ac89SShailend Chand 
3712348ac89SShailend Chand static void
3722348ac89SShailend Chand gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
3732348ac89SShailend Chand {
3742348ac89SShailend Chand 	struct gve_rx_buf_dqo *hol_blocker = NULL;
3752348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
3762348ac89SShailend Chand 	u_int ref_count;
3772348ac89SShailend Chand 	vm_page_t page;
3782348ac89SShailend Chand 
3792348ac89SShailend Chand 	while (true) {
3802348ac89SShailend Chand 		buf = STAILQ_FIRST(&rx->dqo.used_bufs);
3812348ac89SShailend Chand 		if (__predict_false(buf == NULL))
3822348ac89SShailend Chand 			break;
3832348ac89SShailend Chand 
3842348ac89SShailend Chand 		page = rx->com.qpl->pages[buf - rx->dqo.bufs];
3852348ac89SShailend Chand 		ref_count = atomic_load_int(&page->ref_count);
3862348ac89SShailend Chand 
3872348ac89SShailend Chand 		if (VPRC_WIRE_COUNT(ref_count) != 1) {
3882348ac89SShailend Chand 			/* Account for one head-of-line blocker */
3892348ac89SShailend Chand 			if (hol_blocker != NULL)
3902348ac89SShailend Chand 				break;
3912348ac89SShailend Chand 			hol_blocker = buf;
3922348ac89SShailend Chand 			STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
3932348ac89SShailend Chand 			    stailq_entry);
3942348ac89SShailend Chand 			continue;
3952348ac89SShailend Chand 		}
3962348ac89SShailend Chand 
3972348ac89SShailend Chand 		STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
3982348ac89SShailend Chand 		    stailq_entry);
3992348ac89SShailend Chand 		SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
4002348ac89SShailend Chand 		    buf, slist_entry);
4012348ac89SShailend Chand 		if (just_one)
4022348ac89SShailend Chand 			break;
4032348ac89SShailend Chand 	}
4042348ac89SShailend Chand 
4052348ac89SShailend Chand 	if (hol_blocker != NULL)
4062348ac89SShailend Chand 		STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
4072348ac89SShailend Chand 		    hol_blocker, stailq_entry);
4082348ac89SShailend Chand }
4092348ac89SShailend Chand 
4102348ac89SShailend Chand static int
4112348ac89SShailend Chand gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
4122348ac89SShailend Chand {
4132348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
4142348ac89SShailend Chand 
4152348ac89SShailend Chand 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
4162348ac89SShailend Chand 	if (__predict_false(buf == NULL)) {
4172348ac89SShailend Chand 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
4182348ac89SShailend Chand 		buf = SLIST_FIRST(&rx->dqo.free_bufs);
4192348ac89SShailend Chand 		if (__predict_false(buf == NULL))
4202348ac89SShailend Chand 			return (ENOBUFS);
4212348ac89SShailend Chand 	}
4222348ac89SShailend Chand 
4232348ac89SShailend Chand 	gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
4242348ac89SShailend Chand 	if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
4252348ac89SShailend Chand 		buf->next_idx = 0;
4262348ac89SShailend Chand 	else
4272348ac89SShailend Chand 		buf->next_idx++;
4282348ac89SShailend Chand 
4292348ac89SShailend Chand 	/*
4302348ac89SShailend Chand 	 * We have posted all the frags in this buf to the NIC.
4312348ac89SShailend Chand 	 * - buf will enter used_bufs once the last completion arrives.
4322348ac89SShailend Chand 	 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
4332348ac89SShailend Chand 	 *   when its wire count drops back to 1.
4342348ac89SShailend Chand 	 */
4352348ac89SShailend Chand 	if (buf->next_idx == 0)
4362348ac89SShailend Chand 		SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
4372348ac89SShailend Chand 	return (0);
4382348ac89SShailend Chand }
4392348ac89SShailend Chand 
440d438b4efSShailend Chand static void
441d438b4efSShailend Chand gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
442d438b4efSShailend Chand {
443d438b4efSShailend Chand 	uint32_t num_pending_bufs;
444d438b4efSShailend Chand 	uint32_t num_to_post;
445d438b4efSShailend Chand 	uint32_t i;
446d438b4efSShailend Chand 	int err;
447d438b4efSShailend Chand 
448d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
449d438b4efSShailend Chand 	num_to_post = rx->dqo.mask - num_pending_bufs;
450d438b4efSShailend Chand 
451d438b4efSShailend Chand 	for (i = 0; i < num_to_post; i++) {
4522348ac89SShailend Chand 		if (gve_is_qpl(rx->com.priv))
4532348ac89SShailend Chand 			err = gve_rx_post_new_dqo_qpl_buf(rx);
4542348ac89SShailend Chand 		else
455d438b4efSShailend Chand 			err = gve_rx_post_new_mbuf_dqo(rx, how);
456d438b4efSShailend Chand 		if (err)
457d438b4efSShailend Chand 			break;
458d438b4efSShailend Chand 	}
459d438b4efSShailend Chand }
460d438b4efSShailend Chand 
461d438b4efSShailend Chand void
462d438b4efSShailend Chand gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
463d438b4efSShailend Chand {
464d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_WAITOK);
465d438b4efSShailend Chand }
466d438b4efSShailend Chand 
467d438b4efSShailend Chand static void
468d438b4efSShailend Chand gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
469d438b4efSShailend Chand {
470d438b4efSShailend Chand 	switch (ptype->l3_type) {
471d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV4:
472d438b4efSShailend Chand 		switch (ptype->l4_type) {
473d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
474d438b4efSShailend Chand 			*is_tcp = true;
475d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
476d438b4efSShailend Chand 			break;
477d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
478d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
479d438b4efSShailend Chand 			break;
480d438b4efSShailend Chand 		default:
481d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
482d438b4efSShailend Chand 		}
483d438b4efSShailend Chand 		break;
484d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV6:
485d438b4efSShailend Chand 		switch (ptype->l4_type) {
486d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
487d438b4efSShailend Chand 			*is_tcp = true;
488d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
489d438b4efSShailend Chand 			break;
490d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
491d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
492d438b4efSShailend Chand 			break;
493d438b4efSShailend Chand 		default:
494d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
495d438b4efSShailend Chand 		}
496d438b4efSShailend Chand 		break;
497d438b4efSShailend Chand 	default:
498d438b4efSShailend Chand 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
499d438b4efSShailend Chand 	}
500d438b4efSShailend Chand }
501d438b4efSShailend Chand 
502d438b4efSShailend Chand static void
503d438b4efSShailend Chand gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
504d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *desc,
505d438b4efSShailend Chand     struct gve_ptype *ptype)
506d438b4efSShailend Chand {
507d438b4efSShailend Chand 	/* HW did not identify and process L3 and L4 headers. */
508d438b4efSShailend Chand 	if (__predict_false(!desc->l3_l4_processed))
509d438b4efSShailend Chand 		return;
510d438b4efSShailend Chand 
511d438b4efSShailend Chand 	if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
512d438b4efSShailend Chand 		if (__predict_false(desc->csum_ip_err ||
513d438b4efSShailend Chand 		    desc->csum_external_ip_err))
514d438b4efSShailend Chand 			return;
515d438b4efSShailend Chand 	} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
516d438b4efSShailend Chand 		/* Checksum should be skipped if this flag is set. */
517d438b4efSShailend Chand 		if (__predict_false(desc->ipv6_ex_add))
518d438b4efSShailend Chand 			return;
519d438b4efSShailend Chand 	}
520d438b4efSShailend Chand 
521d438b4efSShailend Chand 	if (__predict_false(desc->csum_l4_err))
522d438b4efSShailend Chand 		return;
523d438b4efSShailend Chand 
524d438b4efSShailend Chand 	switch (ptype->l4_type) {
525d438b4efSShailend Chand 	case GVE_L4_TYPE_TCP:
526d438b4efSShailend Chand 	case GVE_L4_TYPE_UDP:
527d438b4efSShailend Chand 	case GVE_L4_TYPE_ICMP:
528d438b4efSShailend Chand 	case GVE_L4_TYPE_SCTP:
529d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
530d438b4efSShailend Chand 					    CSUM_IP_VALID |
531d438b4efSShailend Chand 					    CSUM_DATA_VALID |
532d438b4efSShailend Chand 					    CSUM_PSEUDO_HDR;
533d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_data = 0xffff;
534d438b4efSShailend Chand 		break;
535d438b4efSShailend Chand 	default:
536d438b4efSShailend Chand 		break;
537d438b4efSShailend Chand 	}
538d438b4efSShailend Chand }
539d438b4efSShailend Chand 
540d438b4efSShailend Chand static void
541d438b4efSShailend Chand gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
542d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc)
543d438b4efSShailend Chand {
544d438b4efSShailend Chand 	struct mbuf *mbuf = rx->ctx.mbuf_head;
545d438b4efSShailend Chand 	if_t ifp = rx->com.priv->ifp;
546d438b4efSShailend Chand 	struct gve_ptype *ptype;
547d438b4efSShailend Chand 	bool do_if_input = true;
548d438b4efSShailend Chand 	bool is_tcp = false;
549d438b4efSShailend Chand 
550d438b4efSShailend Chand 	ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
551d438b4efSShailend Chand 	gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
552d438b4efSShailend Chand 	mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
553d438b4efSShailend Chand 	gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
554d438b4efSShailend Chand 
555d438b4efSShailend Chand 	mbuf->m_pkthdr.rcvif = ifp;
556d438b4efSShailend Chand 	mbuf->m_pkthdr.len = rx->ctx.total_size;
557d438b4efSShailend Chand 
558d438b4efSShailend Chand 	if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
559d438b4efSShailend Chand 	    is_tcp &&
560d438b4efSShailend Chand 	    (rx->lro.lro_cnt != 0) &&
561d438b4efSShailend Chand 	    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
562d438b4efSShailend Chand 		do_if_input = false;
563d438b4efSShailend Chand 
564d438b4efSShailend Chand 	if (do_if_input)
565d438b4efSShailend Chand 		if_input(ifp, mbuf);
566d438b4efSShailend Chand 
567d438b4efSShailend Chand 	counter_enter();
568d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
569d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rpackets, 1);
570d438b4efSShailend Chand 	counter_exit();
571d438b4efSShailend Chand 
572d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
573d438b4efSShailend Chand }
574d438b4efSShailend Chand 
575d438b4efSShailend Chand static int
5762348ac89SShailend Chand gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
577d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
578d438b4efSShailend Chand {
579d438b4efSShailend Chand 	struct mbuf *mbuf;
580d438b4efSShailend Chand 
581d438b4efSShailend Chand 	mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
582d438b4efSShailend Chand 	if (__predict_false(mbuf == NULL))
583d438b4efSShailend Chand 		return (ENOMEM);
584d438b4efSShailend Chand 
585d438b4efSShailend Chand 	counter_enter();
586d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
587d438b4efSShailend Chand 	counter_exit();
588d438b4efSShailend Chand 
5892348ac89SShailend Chand 	m_copyback(mbuf, 0, frag_len, va);
590d438b4efSShailend Chand 	mbuf->m_len = frag_len;
591d438b4efSShailend Chand 
592d438b4efSShailend Chand 	rx->ctx.mbuf_head = mbuf;
593d438b4efSShailend Chand 	rx->ctx.mbuf_tail = mbuf;
594d438b4efSShailend Chand 	rx->ctx.total_size += frag_len;
595d438b4efSShailend Chand 
596d438b4efSShailend Chand 	gve_rx_input_mbuf_dqo(rx, compl_desc);
597d438b4efSShailend Chand 	return (0);
598d438b4efSShailend Chand }
599d438b4efSShailend Chand 
600d438b4efSShailend Chand static void
601d438b4efSShailend Chand gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
602d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc,
603d438b4efSShailend Chand     int *work_done)
604d438b4efSShailend Chand {
605d438b4efSShailend Chand 	bool is_last_frag = compl_desc->end_of_packet != 0;
606d438b4efSShailend Chand 	struct gve_rx_ctx *ctx = &rx->ctx;
607d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
608d438b4efSShailend Chand 	uint32_t num_pending_bufs;
609d438b4efSShailend Chand 	uint16_t frag_len;
610d438b4efSShailend Chand 	uint16_t buf_id;
611d438b4efSShailend Chand 	int err;
612d438b4efSShailend Chand 
613d438b4efSShailend Chand 	buf_id = le16toh(compl_desc->buf_id);
614d438b4efSShailend Chand 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
615d438b4efSShailend Chand 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
616d438b4efSShailend Chand 		    buf_id, rx->com.id);
617d438b4efSShailend Chand 		gve_schedule_reset(priv);
618d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
619d438b4efSShailend Chand 	}
620d438b4efSShailend Chand 	buf = &rx->dqo.bufs[buf_id];
621d438b4efSShailend Chand 	if (__predict_false(buf->mbuf == NULL)) {
622d438b4efSShailend Chand 		device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
623d438b4efSShailend Chand 		    buf_id, rx->com.id);
624d438b4efSShailend Chand 		gve_schedule_reset(priv);
625d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
626d438b4efSShailend Chand 	}
627d438b4efSShailend Chand 
628d438b4efSShailend Chand 	if (__predict_false(ctx->drop_pkt))
629d438b4efSShailend Chand 		goto drop_frag;
630d438b4efSShailend Chand 
631d438b4efSShailend Chand 	if (__predict_false(compl_desc->rx_error)) {
632d438b4efSShailend Chand 		counter_enter();
633d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
634d438b4efSShailend Chand 		counter_exit();
635d438b4efSShailend Chand 		goto drop_frag;
636d438b4efSShailend Chand 	}
637d438b4efSShailend Chand 
638d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
639d438b4efSShailend Chand 	    BUS_DMASYNC_POSTREAD);
640d438b4efSShailend Chand 
641d438b4efSShailend Chand 	frag_len = compl_desc->packet_len;
642d438b4efSShailend Chand 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
6432348ac89SShailend Chand 		err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
6442348ac89SShailend Chand 		    compl_desc, frag_len);
645d438b4efSShailend Chand 		if (__predict_false(err != 0))
646d438b4efSShailend Chand 			goto drop_frag;
647d438b4efSShailend Chand 		(*work_done)++;
6482348ac89SShailend Chand 		gve_rx_post_buf_dqo(rx, buf);
649d438b4efSShailend Chand 		return;
650d438b4efSShailend Chand 	}
651d438b4efSShailend Chand 
652d438b4efSShailend Chand 	/*
653d438b4efSShailend Chand 	 * Although buffer completions may arrive out of order, buffer
654d438b4efSShailend Chand 	 * descriptors are consumed by the NIC in order. That is, the
655d438b4efSShailend Chand 	 * buffer at desc_ring[tail] might not be the buffer we got the
656d438b4efSShailend Chand 	 * completion compl_ring[tail] for: but we know that desc_ring[tail]
657d438b4efSShailend Chand 	 * has already been read by the NIC.
658d438b4efSShailend Chand 	 */
659d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
660d438b4efSShailend Chand 
661d438b4efSShailend Chand 	/*
662d438b4efSShailend Chand 	 * For every fragment received, try to post a new buffer.
663d438b4efSShailend Chand 	 *
664d438b4efSShailend Chand 	 * Failures are okay but only so long as the number of outstanding
665d438b4efSShailend Chand 	 * buffers is above a threshold.
666d438b4efSShailend Chand 	 *
667d438b4efSShailend Chand 	 * Beyond that we drop new packets to reuse their buffers.
668d438b4efSShailend Chand 	 * Without ensuring a minimum number of buffers for the NIC to
669d438b4efSShailend Chand 	 * put packets in, we run the risk of getting the queue stuck
670d438b4efSShailend Chand 	 * for good.
671d438b4efSShailend Chand 	 */
672d438b4efSShailend Chand 	err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
673d438b4efSShailend Chand 	if (__predict_false(err != 0 &&
674d438b4efSShailend Chand 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
675d438b4efSShailend Chand 		counter_enter();
676d438b4efSShailend Chand 		counter_u64_add_protected(
677d438b4efSShailend Chand 		    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
678d438b4efSShailend Chand 		counter_exit();
679d438b4efSShailend Chand 		goto drop_frag;
680d438b4efSShailend Chand 	}
681d438b4efSShailend Chand 
682d438b4efSShailend Chand 	buf->mbuf->m_len = frag_len;
683d438b4efSShailend Chand 	ctx->total_size += frag_len;
684d438b4efSShailend Chand 	if (ctx->mbuf_tail == NULL) {
685d438b4efSShailend Chand 		ctx->mbuf_head = buf->mbuf;
686d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
687d438b4efSShailend Chand 	} else {
688d438b4efSShailend Chand 		buf->mbuf->m_flags &= ~M_PKTHDR;
689d438b4efSShailend Chand 		ctx->mbuf_tail->m_next = buf->mbuf;
690d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
691d438b4efSShailend Chand 	}
692d438b4efSShailend Chand 
693d438b4efSShailend Chand 	/*
694d438b4efSShailend Chand 	 * Disassociate the mbuf from buf and surrender buf to the free list to
695d438b4efSShailend Chand 	 * be used by a future mbuf.
696d438b4efSShailend Chand 	 */
697d438b4efSShailend Chand 	bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
698d438b4efSShailend Chand 	buf->mbuf = NULL;
699d438b4efSShailend Chand 	buf->addr = 0;
700d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
701d438b4efSShailend Chand 
702d438b4efSShailend Chand 	if (is_last_frag) {
703d438b4efSShailend Chand 		gve_rx_input_mbuf_dqo(rx, compl_desc);
704d438b4efSShailend Chand 		(*work_done)++;
705d438b4efSShailend Chand 	}
706d438b4efSShailend Chand 	return;
707d438b4efSShailend Chand 
708d438b4efSShailend Chand drop_frag:
709d438b4efSShailend Chand 	/* Clear the earlier frags if there were any */
710d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
711d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
712d438b4efSShailend Chand 	/* Drop the rest of the pkt if there are more frags */
713d438b4efSShailend Chand 	ctx->drop_pkt = true;
714d438b4efSShailend Chand 	/* Reuse the dropped frag's buffer */
715d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
716d438b4efSShailend Chand 
717d438b4efSShailend Chand 	if (is_last_frag)
718d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
719d438b4efSShailend Chand 	return;
720d438b4efSShailend Chand 
721d438b4efSShailend Chand drop_frag_clear_ctx:
722d438b4efSShailend Chand 	counter_enter();
723d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
724d438b4efSShailend Chand 	counter_exit();
725d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
726d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
727d438b4efSShailend Chand }
728d438b4efSShailend Chand 
7292348ac89SShailend Chand static void *
7302348ac89SShailend Chand gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
7312348ac89SShailend Chand     struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
7322348ac89SShailend Chand {
7332348ac89SShailend Chand 	int page_idx = buf - rx->dqo.bufs;
7342348ac89SShailend Chand 	void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
7352348ac89SShailend Chand 
7362348ac89SShailend Chand 	va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
7372348ac89SShailend Chand 	return (va);
7382348ac89SShailend Chand }
7392348ac89SShailend Chand 
7402348ac89SShailend Chand static int
7412348ac89SShailend Chand gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
7422348ac89SShailend Chand     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
7432348ac89SShailend Chand     uint8_t buf_frag_num, uint16_t frag_len)
7442348ac89SShailend Chand {
7452348ac89SShailend Chand 	void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
7462348ac89SShailend Chand 	struct mbuf *mbuf;
7472348ac89SShailend Chand 
7482348ac89SShailend Chand 	if (ctx->mbuf_tail == NULL) {
7492348ac89SShailend Chand 		mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
7502348ac89SShailend Chand 		if (mbuf == NULL)
7512348ac89SShailend Chand 			return (ENOMEM);
7522348ac89SShailend Chand 		ctx->mbuf_head = mbuf;
7532348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
7542348ac89SShailend Chand 	} else {
7552348ac89SShailend Chand 		mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
7562348ac89SShailend Chand 		if (mbuf == NULL)
7572348ac89SShailend Chand 			return (ENOMEM);
7582348ac89SShailend Chand 		ctx->mbuf_tail->m_next = mbuf;
7592348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
7602348ac89SShailend Chand 	}
7612348ac89SShailend Chand 
7622348ac89SShailend Chand 	mbuf->m_len = frag_len;
7632348ac89SShailend Chand 	ctx->total_size += frag_len;
7642348ac89SShailend Chand 
7652348ac89SShailend Chand 	m_copyback(mbuf, 0, frag_len, va);
7662348ac89SShailend Chand 	counter_enter();
7672348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
7682348ac89SShailend Chand 	counter_exit();
7692348ac89SShailend Chand 	return (0);
7702348ac89SShailend Chand }
7712348ac89SShailend Chand 
7722348ac89SShailend Chand static int
7732348ac89SShailend Chand gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
7742348ac89SShailend Chand     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
7752348ac89SShailend Chand     uint8_t buf_frag_num, uint16_t frag_len)
7762348ac89SShailend Chand {
7772348ac89SShailend Chand 	struct mbuf *mbuf;
7782348ac89SShailend Chand 	void *page_addr;
7792348ac89SShailend Chand 	vm_page_t page;
7802348ac89SShailend Chand 	int page_idx;
7812348ac89SShailend Chand 	void *va;
7822348ac89SShailend Chand 
7832348ac89SShailend Chand 	if (ctx->mbuf_tail == NULL) {
7842348ac89SShailend Chand 		mbuf = m_gethdr(M_NOWAIT, MT_DATA);
7852348ac89SShailend Chand 		if (mbuf == NULL)
7862348ac89SShailend Chand 			return (ENOMEM);
7872348ac89SShailend Chand 		ctx->mbuf_head = mbuf;
7882348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
7892348ac89SShailend Chand 	} else {
7902348ac89SShailend Chand 		mbuf = m_get(M_NOWAIT, MT_DATA);
7912348ac89SShailend Chand 		if (mbuf == NULL)
7922348ac89SShailend Chand 			return (ENOMEM);
7932348ac89SShailend Chand 		ctx->mbuf_tail->m_next = mbuf;
7942348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
7952348ac89SShailend Chand 	}
7962348ac89SShailend Chand 
7972348ac89SShailend Chand 	mbuf->m_len = frag_len;
7982348ac89SShailend Chand 	ctx->total_size += frag_len;
7992348ac89SShailend Chand 
8002348ac89SShailend Chand 	page_idx = buf - rx->dqo.bufs;
8012348ac89SShailend Chand 	page = rx->com.qpl->pages[page_idx];
8022348ac89SShailend Chand 	page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
8032348ac89SShailend Chand 	va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
8042348ac89SShailend Chand 
8052348ac89SShailend Chand 	/*
8062348ac89SShailend Chand 	 * Grab an extra ref to the page so that gve_mextadd_free
8072348ac89SShailend Chand 	 * does not end up freeing the page while the interface exists.
8082348ac89SShailend Chand 	 */
8092348ac89SShailend Chand 	vm_page_wire(page);
8102348ac89SShailend Chand 
8112348ac89SShailend Chand 	counter_enter();
8122348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
8132348ac89SShailend Chand 	counter_exit();
8142348ac89SShailend Chand 
8152348ac89SShailend Chand 	MEXTADD(mbuf, va, frag_len,
8162348ac89SShailend Chand 	    gve_mextadd_free, page, page_addr,
8172348ac89SShailend Chand 	    0, EXT_NET_DRV);
8182348ac89SShailend Chand 	return (0);
8192348ac89SShailend Chand }
8202348ac89SShailend Chand 
8212348ac89SShailend Chand static void
8222348ac89SShailend Chand gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
8232348ac89SShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc,
8242348ac89SShailend Chand     int *work_done)
8252348ac89SShailend Chand {
8262348ac89SShailend Chand 	bool is_last_frag = compl_desc->end_of_packet != 0;
8272348ac89SShailend Chand 	union gve_rx_qpl_buf_id_dqo composed_id;
8282348ac89SShailend Chand 	struct gve_dma_handle *page_dma_handle;
8292348ac89SShailend Chand 	struct gve_rx_ctx *ctx = &rx->ctx;
8302348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
8312348ac89SShailend Chand 	uint32_t num_pending_bufs;
8322348ac89SShailend Chand 	uint8_t buf_frag_num;
8332348ac89SShailend Chand 	uint16_t frag_len;
8342348ac89SShailend Chand 	uint16_t buf_id;
8352348ac89SShailend Chand 	int err;
8362348ac89SShailend Chand 
8372348ac89SShailend Chand 	composed_id.all = le16toh(compl_desc->buf_id);
8382348ac89SShailend Chand 	buf_id = composed_id.buf_id;
8392348ac89SShailend Chand 	buf_frag_num = composed_id.frag_num;
8402348ac89SShailend Chand 
8412348ac89SShailend Chand 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
8422348ac89SShailend Chand 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
8432348ac89SShailend Chand 		    buf_id, rx->com.id);
8442348ac89SShailend Chand 		gve_schedule_reset(priv);
8452348ac89SShailend Chand 		goto drop_frag_clear_ctx;
8462348ac89SShailend Chand 	}
8472348ac89SShailend Chand 	buf = &rx->dqo.bufs[buf_id];
8482348ac89SShailend Chand 	if (__predict_false(buf->num_nic_frags == 0 ||
8492348ac89SShailend Chand 	    buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
8502348ac89SShailend Chand 		device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
8512348ac89SShailend Chand 		    "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
8522348ac89SShailend Chand 		    buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
8532348ac89SShailend Chand 		gve_schedule_reset(priv);
8542348ac89SShailend Chand 		goto drop_frag_clear_ctx;
8552348ac89SShailend Chand 	}
8562348ac89SShailend Chand 
8572348ac89SShailend Chand 	buf->num_nic_frags--;
8582348ac89SShailend Chand 
8592348ac89SShailend Chand 	if (__predict_false(ctx->drop_pkt))
8602348ac89SShailend Chand 		goto drop_frag;
8612348ac89SShailend Chand 
8622348ac89SShailend Chand 	if (__predict_false(compl_desc->rx_error)) {
8632348ac89SShailend Chand 		counter_enter();
8642348ac89SShailend Chand 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
8652348ac89SShailend Chand 		counter_exit();
8662348ac89SShailend Chand 		goto drop_frag;
8672348ac89SShailend Chand 	}
8682348ac89SShailend Chand 
8692348ac89SShailend Chand 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
8702348ac89SShailend Chand 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
8712348ac89SShailend Chand 	    BUS_DMASYNC_POSTREAD);
8722348ac89SShailend Chand 
8732348ac89SShailend Chand 	frag_len = compl_desc->packet_len;
8742348ac89SShailend Chand 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
8752348ac89SShailend Chand 		void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
8762348ac89SShailend Chand 
8772348ac89SShailend Chand 		err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
8782348ac89SShailend Chand 		if (__predict_false(err != 0))
8792348ac89SShailend Chand 			goto drop_frag;
8802348ac89SShailend Chand 		(*work_done)++;
8812348ac89SShailend Chand 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
8822348ac89SShailend Chand 		return;
8832348ac89SShailend Chand 	}
8842348ac89SShailend Chand 
8852348ac89SShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
8862348ac89SShailend Chand 	err = gve_rx_post_new_dqo_qpl_buf(rx);
8872348ac89SShailend Chand 	if (__predict_false(err != 0 &&
8882348ac89SShailend Chand 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
8892348ac89SShailend Chand 		/*
8902348ac89SShailend Chand 		 * Resort to copying this fragment into a cluster mbuf
8912348ac89SShailend Chand 		 * when the above threshold is breached and repost the
8922348ac89SShailend Chand 		 * incoming buffer. If we cannot find cluster mbufs,
8932348ac89SShailend Chand 		 * just drop the packet (to repost its buffer).
8942348ac89SShailend Chand 		 */
8952348ac89SShailend Chand 		err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
8962348ac89SShailend Chand 		    buf_frag_num, frag_len);
8972348ac89SShailend Chand 		if (err != 0) {
8982348ac89SShailend Chand 			counter_enter();
8992348ac89SShailend Chand 			counter_u64_add_protected(
9002348ac89SShailend Chand 			    rx->stats.rx_dropped_pkt_buf_post_fail, 1);
9012348ac89SShailend Chand 			counter_exit();
9022348ac89SShailend Chand 			goto drop_frag;
9032348ac89SShailend Chand 		}
9042348ac89SShailend Chand 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
9052348ac89SShailend Chand 	} else {
9062348ac89SShailend Chand 		err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
9072348ac89SShailend Chand 		    buf_frag_num, frag_len);
9082348ac89SShailend Chand 		if (__predict_false(err != 0)) {
9092348ac89SShailend Chand 			counter_enter();
9102348ac89SShailend Chand 			counter_u64_add_protected(
9112348ac89SShailend Chand 			    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
9122348ac89SShailend Chand 			counter_exit();
9132348ac89SShailend Chand 			goto drop_frag;
9142348ac89SShailend Chand 		}
9152348ac89SShailend Chand 	}
9162348ac89SShailend Chand 
9172348ac89SShailend Chand 	/*
9182348ac89SShailend Chand 	 * Both the counts need to be checked.
9192348ac89SShailend Chand 	 *
9202348ac89SShailend Chand 	 * num_nic_frags == 0 implies no pending completions
9212348ac89SShailend Chand 	 * but not all frags may have yet been posted.
9222348ac89SShailend Chand 	 *
9232348ac89SShailend Chand 	 * next_idx == 0 implies all frags have been posted
9242348ac89SShailend Chand 	 * but there might be pending completions.
9252348ac89SShailend Chand 	 */
9262348ac89SShailend Chand 	if (buf->num_nic_frags == 0 && buf->next_idx == 0)
9272348ac89SShailend Chand 		STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
9282348ac89SShailend Chand 
9292348ac89SShailend Chand 	if (is_last_frag) {
9302348ac89SShailend Chand 		gve_rx_input_mbuf_dqo(rx, compl_desc);
9312348ac89SShailend Chand 		(*work_done)++;
9322348ac89SShailend Chand 	}
9332348ac89SShailend Chand 	return;
9342348ac89SShailend Chand 
9352348ac89SShailend Chand drop_frag:
9362348ac89SShailend Chand 	/* Clear the earlier frags if there were any */
9372348ac89SShailend Chand 	m_freem(ctx->mbuf_head);
9382348ac89SShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
9392348ac89SShailend Chand 	/* Drop the rest of the pkt if there are more frags */
9402348ac89SShailend Chand 	ctx->drop_pkt = true;
9412348ac89SShailend Chand 	/* Reuse the dropped frag's buffer */
9422348ac89SShailend Chand 	gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
9432348ac89SShailend Chand 
9442348ac89SShailend Chand 	if (is_last_frag)
9452348ac89SShailend Chand 		goto drop_frag_clear_ctx;
9462348ac89SShailend Chand 	return;
9472348ac89SShailend Chand 
9482348ac89SShailend Chand drop_frag_clear_ctx:
9492348ac89SShailend Chand 	counter_enter();
9502348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
9512348ac89SShailend Chand 	counter_exit();
9522348ac89SShailend Chand 	m_freem(ctx->mbuf_head);
9532348ac89SShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
9542348ac89SShailend Chand }
9552348ac89SShailend Chand 
956d438b4efSShailend Chand static bool
957d438b4efSShailend Chand gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
958d438b4efSShailend Chand {
959d438b4efSShailend Chand 	struct gve_rx_compl_desc_dqo *compl_desc;
960d438b4efSShailend Chand 	uint32_t work_done = 0;
961d438b4efSShailend Chand 
962d438b4efSShailend Chand 	NET_EPOCH_ASSERT();
963d438b4efSShailend Chand 
964d438b4efSShailend Chand 	while (work_done < budget) {
965d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
966d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
967d438b4efSShailend Chand 
968d438b4efSShailend Chand 		compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
969d438b4efSShailend Chand 		if (compl_desc->generation == rx->dqo.cur_gen_bit)
970d438b4efSShailend Chand 			break;
971d438b4efSShailend Chand 		/*
972d438b4efSShailend Chand 		 * Prevent generation bit from being read after the rest of the
973d438b4efSShailend Chand 		 * descriptor.
974d438b4efSShailend Chand 		 */
975*031800c7SJasper Tran O'Leary 		atomic_thread_fence_acq();
976d438b4efSShailend Chand 
977d438b4efSShailend Chand 		rx->cnt++;
978d438b4efSShailend Chand 		rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
979d438b4efSShailend Chand 		rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
980d438b4efSShailend Chand 
9812348ac89SShailend Chand 		if (gve_is_qpl(priv))
9822348ac89SShailend Chand 			gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
9832348ac89SShailend Chand 		else
984d438b4efSShailend Chand 			gve_rx_dqo(priv, rx, compl_desc, &work_done);
985d438b4efSShailend Chand 	}
986d438b4efSShailend Chand 
987d438b4efSShailend Chand 	if (work_done != 0)
988d438b4efSShailend Chand 		tcp_lro_flush_all(&rx->lro);
989d438b4efSShailend Chand 
990d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_NOWAIT);
9912348ac89SShailend Chand 	if (gve_is_qpl(priv))
9922348ac89SShailend Chand 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
993d438b4efSShailend Chand 	return (work_done == budget);
994d438b4efSShailend Chand }
995d438b4efSShailend Chand 
996d438b4efSShailend Chand void
997d438b4efSShailend Chand gve_rx_cleanup_tq_dqo(void *arg, int pending)
998d438b4efSShailend Chand {
999d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
1000d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
1001d438b4efSShailend Chand 
1002d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1003d438b4efSShailend Chand 		return;
1004d438b4efSShailend Chand 
1005d438b4efSShailend Chand 	if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
1006d438b4efSShailend Chand 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
1007d438b4efSShailend Chand 		return;
1008d438b4efSShailend Chand 	}
1009d438b4efSShailend Chand 
1010d438b4efSShailend Chand 	gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
1011d438b4efSShailend Chand 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1012d438b4efSShailend Chand }
1013