xref: /freebsd/sys/dev/gve/gve_rx_dqo.c (revision 2348ac893d10f06d2d84e1e4bd5ca9f1c5da92d8)
1d438b4efSShailend Chand /*-
2d438b4efSShailend Chand  * SPDX-License-Identifier: BSD-3-Clause
3d438b4efSShailend Chand  *
4d438b4efSShailend Chand  * Copyright (c) 2024 Google LLC
5d438b4efSShailend Chand  *
6d438b4efSShailend Chand  * Redistribution and use in source and binary forms, with or without modification,
7d438b4efSShailend Chand  * are permitted provided that the following conditions are met:
8d438b4efSShailend Chand  *
9d438b4efSShailend Chand  * 1. Redistributions of source code must retain the above copyright notice, this
10d438b4efSShailend Chand  *    list of conditions and the following disclaimer.
11d438b4efSShailend Chand  *
12d438b4efSShailend Chand  * 2. Redistributions in binary form must reproduce the above copyright notice,
13d438b4efSShailend Chand  *    this list of conditions and the following disclaimer in the documentation
14d438b4efSShailend Chand  *    and/or other materials provided with the distribution.
15d438b4efSShailend Chand  *
16d438b4efSShailend Chand  * 3. Neither the name of the copyright holder nor the names of its contributors
17d438b4efSShailend Chand  *    may be used to endorse or promote products derived from this software without
18d438b4efSShailend Chand  *    specific prior written permission.
19d438b4efSShailend Chand  *
20d438b4efSShailend Chand  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21d438b4efSShailend Chand  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22d438b4efSShailend Chand  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23d438b4efSShailend Chand  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24d438b4efSShailend Chand  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25d438b4efSShailend Chand  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26d438b4efSShailend Chand  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27d438b4efSShailend Chand  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28d438b4efSShailend Chand  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29d438b4efSShailend Chand  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30d438b4efSShailend Chand  */
31d438b4efSShailend Chand #include "gve.h"
32d438b4efSShailend Chand #include "gve_adminq.h"
33d438b4efSShailend Chand #include "gve_dqo.h"
34d438b4efSShailend Chand 
35d438b4efSShailend Chand static void
36d438b4efSShailend Chand gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37d438b4efSShailend Chand {
38d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
39d438b4efSShailend Chand 	int i;
40d438b4efSShailend Chand 
41*2348ac89SShailend Chand 	if (gve_is_qpl(rx->com.priv))
42*2348ac89SShailend Chand 		return;
43*2348ac89SShailend Chand 
44d438b4efSShailend Chand 	for (i = 0; i < rx->dqo.buf_cnt; i++) {
45d438b4efSShailend Chand 		buf = &rx->dqo.bufs[i];
46d438b4efSShailend Chand 		if (!buf->mbuf)
47d438b4efSShailend Chand 			continue;
48d438b4efSShailend Chand 
49d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
50d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
51d438b4efSShailend Chand 		bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
52d438b4efSShailend Chand 		m_freem(buf->mbuf);
53d438b4efSShailend Chand 		buf->mbuf = NULL;
54d438b4efSShailend Chand 	}
55d438b4efSShailend Chand }
56d438b4efSShailend Chand 
57d438b4efSShailend Chand void
58d438b4efSShailend Chand gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
59d438b4efSShailend Chand {
60d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
61d438b4efSShailend Chand 	int j;
62d438b4efSShailend Chand 
63d438b4efSShailend Chand 	if (rx->dqo.compl_ring != NULL) {
64d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
65d438b4efSShailend Chand 		rx->dqo.compl_ring = NULL;
66d438b4efSShailend Chand 	}
67d438b4efSShailend Chand 
68d438b4efSShailend Chand 	if (rx->dqo.desc_ring != NULL) {
69d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->desc_ring_mem);
70d438b4efSShailend Chand 		rx->dqo.desc_ring = NULL;
71d438b4efSShailend Chand 	}
72d438b4efSShailend Chand 
73d438b4efSShailend Chand 	if (rx->dqo.bufs != NULL) {
74d438b4efSShailend Chand 		gve_free_rx_mbufs_dqo(rx);
75d438b4efSShailend Chand 
76*2348ac89SShailend Chand 		if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
77d438b4efSShailend Chand 			for (j = 0; j < rx->dqo.buf_cnt; j++)
78d438b4efSShailend Chand 				if (rx->dqo.bufs[j].mapped)
79d438b4efSShailend Chand 					bus_dmamap_destroy(rx->dqo.buf_dmatag,
80d438b4efSShailend Chand 					    rx->dqo.bufs[j].dmamap);
81d438b4efSShailend Chand 		}
82d438b4efSShailend Chand 
83d438b4efSShailend Chand 		free(rx->dqo.bufs, M_GVE);
84d438b4efSShailend Chand 		rx->dqo.bufs = NULL;
85d438b4efSShailend Chand 	}
86d438b4efSShailend Chand 
87*2348ac89SShailend Chand 	if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
88d438b4efSShailend Chand 		bus_dma_tag_destroy(rx->dqo.buf_dmatag);
89d438b4efSShailend Chand }
90d438b4efSShailend Chand 
91d438b4efSShailend Chand int
92d438b4efSShailend Chand gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
93d438b4efSShailend Chand {
94d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
95d438b4efSShailend Chand 	int err;
96d438b4efSShailend Chand 	int j;
97d438b4efSShailend Chand 
98d438b4efSShailend Chand 	err = gve_dma_alloc_coherent(priv,
99d438b4efSShailend Chand 	    sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
100d438b4efSShailend Chand 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
101d438b4efSShailend Chand 	if (err != 0) {
102d438b4efSShailend Chand 		device_printf(priv->dev,
103d438b4efSShailend Chand 		    "Failed to alloc desc ring for rx ring %d", i);
104d438b4efSShailend Chand 		goto abort;
105d438b4efSShailend Chand 	}
106d438b4efSShailend Chand 	rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
107d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
108d438b4efSShailend Chand 
109*2348ac89SShailend Chand 	err = gve_dma_alloc_coherent(priv,
110*2348ac89SShailend Chand 	    sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
111*2348ac89SShailend Chand 	    CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
112*2348ac89SShailend Chand 	if (err != 0) {
113*2348ac89SShailend Chand 		device_printf(priv->dev,
114*2348ac89SShailend Chand 		    "Failed to alloc compl ring for rx ring %d", i);
115*2348ac89SShailend Chand 		goto abort;
116*2348ac89SShailend Chand 	}
117*2348ac89SShailend Chand 	rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
118*2348ac89SShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
119*2348ac89SShailend Chand 
120*2348ac89SShailend Chand 	rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
121*2348ac89SShailend Chand 	    priv->rx_desc_cnt;
122*2348ac89SShailend Chand 	rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
123*2348ac89SShailend Chand 	    M_GVE, M_WAITOK | M_ZERO);
124*2348ac89SShailend Chand 
125*2348ac89SShailend Chand 	if (gve_is_qpl(priv)) {
126*2348ac89SShailend Chand 		rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
127*2348ac89SShailend Chand 		if (rx->com.qpl == NULL) {
128*2348ac89SShailend Chand 			device_printf(priv->dev, "No QPL left for rx ring %d", i);
129*2348ac89SShailend Chand 			return (ENOMEM);
130*2348ac89SShailend Chand 		}
131*2348ac89SShailend Chand 		return (0);
132*2348ac89SShailend Chand 	}
133*2348ac89SShailend Chand 
134d438b4efSShailend Chand 	err = bus_dma_tag_create(
135d438b4efSShailend Chand 	    bus_get_dma_tag(priv->dev),	/* parent */
136d438b4efSShailend Chand 	    1, 0,			/* alignment, bounds */
137d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* lowaddr */
138d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* highaddr */
139d438b4efSShailend Chand 	    NULL, NULL,			/* filter, filterarg */
140d438b4efSShailend Chand 	    MCLBYTES,			/* maxsize */
141d438b4efSShailend Chand 	    1,				/* nsegments */
142d438b4efSShailend Chand 	    MCLBYTES,			/* maxsegsize */
143d438b4efSShailend Chand 	    0,				/* flags */
144d438b4efSShailend Chand 	    NULL,			/* lockfunc */
145d438b4efSShailend Chand 	    NULL,			/* lockarg */
146d438b4efSShailend Chand 	    &rx->dqo.buf_dmatag);
147d438b4efSShailend Chand 	if (err != 0) {
148d438b4efSShailend Chand 		device_printf(priv->dev,
149d438b4efSShailend Chand 		    "%s: bus_dma_tag_create failed: %d\n",
150d438b4efSShailend Chand 		    __func__, err);
151d438b4efSShailend Chand 		goto abort;
152d438b4efSShailend Chand 	}
153d438b4efSShailend Chand 
154d438b4efSShailend Chand 	for (j = 0; j < rx->dqo.buf_cnt; j++) {
155d438b4efSShailend Chand 		err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
156d438b4efSShailend Chand 		    &rx->dqo.bufs[j].dmamap);
157d438b4efSShailend Chand 		if (err != 0) {
158d438b4efSShailend Chand 			device_printf(priv->dev,
159d438b4efSShailend Chand 			    "err in creating rx buf dmamap %d: %d",
160d438b4efSShailend Chand 			    j, err);
161d438b4efSShailend Chand 			goto abort;
162d438b4efSShailend Chand 		}
163d438b4efSShailend Chand 		rx->dqo.bufs[j].mapped = true;
164d438b4efSShailend Chand 	}
165d438b4efSShailend Chand 
166d438b4efSShailend Chand 	return (0);
167d438b4efSShailend Chand 
168d438b4efSShailend Chand abort:
169d438b4efSShailend Chand 	gve_rx_free_ring_dqo(priv, i);
170d438b4efSShailend Chand 	return (err);
171d438b4efSShailend Chand }
172d438b4efSShailend Chand 
173d438b4efSShailend Chand static void
174d438b4efSShailend Chand gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
175d438b4efSShailend Chand {
176d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
177d438b4efSShailend Chand 	int entries;
178d438b4efSShailend Chand 	int i;
179d438b4efSShailend Chand 
180d438b4efSShailend Chand 	entries = com->priv->rx_desc_cnt;
181d438b4efSShailend Chand 	for (i = 0; i < entries; i++)
182d438b4efSShailend Chand 		rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
183d438b4efSShailend Chand 
184d438b4efSShailend Chand 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
185d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
186d438b4efSShailend Chand }
187d438b4efSShailend Chand 
188d438b4efSShailend Chand static void
189d438b4efSShailend Chand gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
190d438b4efSShailend Chand {
191d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
192d438b4efSShailend Chand 	int i;
193d438b4efSShailend Chand 
194d438b4efSShailend Chand 	for (i = 0; i < com->priv->rx_desc_cnt; i++)
195d438b4efSShailend Chand 		rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
196d438b4efSShailend Chand 
197d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
198d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
199d438b4efSShailend Chand }
200d438b4efSShailend Chand 
201d438b4efSShailend Chand void
202d438b4efSShailend Chand gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
203d438b4efSShailend Chand {
204d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
205d438b4efSShailend Chand 	int j;
206d438b4efSShailend Chand 
207d438b4efSShailend Chand 	rx->fill_cnt = 0;
208d438b4efSShailend Chand 	rx->cnt = 0;
209d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
210d438b4efSShailend Chand 	rx->dqo.head = 0;
211d438b4efSShailend Chand 	rx->dqo.tail = 0;
212d438b4efSShailend Chand 	rx->dqo.cur_gen_bit = 0;
213d438b4efSShailend Chand 
214d438b4efSShailend Chand 	gve_rx_clear_desc_ring_dqo(rx);
215d438b4efSShailend Chand 	gve_rx_clear_compl_ring_dqo(rx);
216d438b4efSShailend Chand 
217d438b4efSShailend Chand 	gve_free_rx_mbufs_dqo(rx);
218d438b4efSShailend Chand 
219*2348ac89SShailend Chand 	if (gve_is_qpl(priv)) {
220*2348ac89SShailend Chand 		SLIST_INIT(&rx->dqo.free_bufs);
221*2348ac89SShailend Chand 		STAILQ_INIT(&rx->dqo.used_bufs);
222*2348ac89SShailend Chand 
223*2348ac89SShailend Chand 		for (j = 0; j < rx->dqo.buf_cnt; j++) {
224*2348ac89SShailend Chand 			struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
225*2348ac89SShailend Chand 
226*2348ac89SShailend Chand 			vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
227*2348ac89SShailend Chand 			u_int ref_count = atomic_load_int(&page->ref_count);
228*2348ac89SShailend Chand 
229*2348ac89SShailend Chand 			/*
230*2348ac89SShailend Chand 			 * An ifconfig down+up might see pages still in flight
231*2348ac89SShailend Chand 			 * from the previous innings.
232*2348ac89SShailend Chand 			 */
233*2348ac89SShailend Chand 			if (VPRC_WIRE_COUNT(ref_count) == 1)
234*2348ac89SShailend Chand 				SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
235*2348ac89SShailend Chand 				    buf, slist_entry);
236*2348ac89SShailend Chand 			else
237*2348ac89SShailend Chand 				STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
238*2348ac89SShailend Chand 				    buf, stailq_entry);
239*2348ac89SShailend Chand 
240*2348ac89SShailend Chand 			buf->num_nic_frags = 0;
241*2348ac89SShailend Chand 			buf->next_idx = 0;
242*2348ac89SShailend Chand 		}
243*2348ac89SShailend Chand 	} else {
244d438b4efSShailend Chand 		SLIST_INIT(&rx->dqo.free_bufs);
245d438b4efSShailend Chand 		for (j = 0; j < rx->dqo.buf_cnt; j++)
246d438b4efSShailend Chand 			SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
247d438b4efSShailend Chand 			    &rx->dqo.bufs[j], slist_entry);
248d438b4efSShailend Chand 	}
249*2348ac89SShailend Chand }
250d438b4efSShailend Chand 
251d438b4efSShailend Chand int
252d438b4efSShailend Chand gve_rx_intr_dqo(void *arg)
253d438b4efSShailend Chand {
254d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
255d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
256d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
257d438b4efSShailend Chand 
258d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
259d438b4efSShailend Chand 		return (FILTER_STRAY);
260d438b4efSShailend Chand 
261d438b4efSShailend Chand 	/* Interrupts are automatically masked */
262d438b4efSShailend Chand 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
263d438b4efSShailend Chand 	return (FILTER_HANDLED);
264d438b4efSShailend Chand }
265d438b4efSShailend Chand 
266d438b4efSShailend Chand static void
267*2348ac89SShailend Chand gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
268*2348ac89SShailend Chand {
269*2348ac89SShailend Chand 	rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
270*2348ac89SShailend Chand 	rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
271*2348ac89SShailend Chand 
272*2348ac89SShailend Chand 	if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
273*2348ac89SShailend Chand 		bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
274*2348ac89SShailend Chand 		    BUS_DMASYNC_PREWRITE);
275*2348ac89SShailend Chand 		gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
276*2348ac89SShailend Chand 		    rx->dqo.head);
277*2348ac89SShailend Chand 	}
278*2348ac89SShailend Chand }
279*2348ac89SShailend Chand 
280*2348ac89SShailend Chand static void
281d438b4efSShailend Chand gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
282d438b4efSShailend Chand {
283d438b4efSShailend Chand 	struct gve_rx_desc_dqo *desc;
284d438b4efSShailend Chand 
285d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
286d438b4efSShailend Chand 	    BUS_DMASYNC_PREREAD);
287d438b4efSShailend Chand 
288d438b4efSShailend Chand 	desc = &rx->dqo.desc_ring[rx->dqo.head];
289d438b4efSShailend Chand 	desc->buf_id = htole16(buf - rx->dqo.bufs);
290d438b4efSShailend Chand 	desc->buf_addr = htole64(buf->addr);
291d438b4efSShailend Chand 
292*2348ac89SShailend Chand 	gve_rx_advance_head_dqo(rx);
293d438b4efSShailend Chand }
294d438b4efSShailend Chand 
295d438b4efSShailend Chand static int
296d438b4efSShailend Chand gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
297d438b4efSShailend Chand {
298d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
299d438b4efSShailend Chand 	bus_dma_segment_t segs[1];
300d438b4efSShailend Chand 	int nsegs;
301d438b4efSShailend Chand 	int err;
302d438b4efSShailend Chand 
303d438b4efSShailend Chand 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
304d438b4efSShailend Chand 	if (__predict_false(!buf)) {
305d438b4efSShailend Chand 		device_printf(rx->com.priv->dev,
306d438b4efSShailend Chand 		    "Unexpected empty free bufs list\n");
307d438b4efSShailend Chand 		return (ENOBUFS);
308d438b4efSShailend Chand 	}
309d438b4efSShailend Chand 	SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
310d438b4efSShailend Chand 
311d438b4efSShailend Chand 	buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
312d438b4efSShailend Chand 	if (__predict_false(!buf->mbuf)) {
313d438b4efSShailend Chand 		err = ENOMEM;
314d438b4efSShailend Chand 		counter_enter();
315d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
316d438b4efSShailend Chand 		counter_exit();
317d438b4efSShailend Chand 		goto abort_with_buf;
318d438b4efSShailend Chand 	}
319d438b4efSShailend Chand 	buf->mbuf->m_len = MCLBYTES;
320d438b4efSShailend Chand 
321d438b4efSShailend Chand 	err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
322d438b4efSShailend Chand 	    buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
323d438b4efSShailend Chand 	KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
324d438b4efSShailend Chand 	if (__predict_false(err != 0)) {
325d438b4efSShailend Chand 		counter_enter();
326d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
327d438b4efSShailend Chand 		counter_exit();
328d438b4efSShailend Chand 		goto abort_with_mbuf;
329d438b4efSShailend Chand 	}
330d438b4efSShailend Chand 	buf->addr = segs[0].ds_addr;
331d438b4efSShailend Chand 
332d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
333d438b4efSShailend Chand 	return (0);
334d438b4efSShailend Chand 
335d438b4efSShailend Chand abort_with_mbuf:
336d438b4efSShailend Chand 	m_freem(buf->mbuf);
337d438b4efSShailend Chand 	buf->mbuf = NULL;
338d438b4efSShailend Chand abort_with_buf:
339d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
340d438b4efSShailend Chand 	return (err);
341d438b4efSShailend Chand }
342d438b4efSShailend Chand 
343*2348ac89SShailend Chand static struct gve_dma_handle *
344*2348ac89SShailend Chand gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
345*2348ac89SShailend Chand {
346*2348ac89SShailend Chand 	return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
347*2348ac89SShailend Chand }
348*2348ac89SShailend Chand 
349*2348ac89SShailend Chand static void
350*2348ac89SShailend Chand gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
351*2348ac89SShailend Chand     uint8_t frag_num)
352*2348ac89SShailend Chand {
353*2348ac89SShailend Chand 	struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
354*2348ac89SShailend Chand 	union gve_rx_qpl_buf_id_dqo composed_id;
355*2348ac89SShailend Chand 	struct gve_dma_handle *page_dma_handle;
356*2348ac89SShailend Chand 
357*2348ac89SShailend Chand 	composed_id.buf_id = buf - rx->dqo.bufs;
358*2348ac89SShailend Chand 	composed_id.frag_num = frag_num;
359*2348ac89SShailend Chand 	desc->buf_id = htole16(composed_id.all);
360*2348ac89SShailend Chand 
361*2348ac89SShailend Chand 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
362*2348ac89SShailend Chand 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
363*2348ac89SShailend Chand 	    BUS_DMASYNC_PREREAD);
364*2348ac89SShailend Chand 	desc->buf_addr = htole64(page_dma_handle->bus_addr +
365*2348ac89SShailend Chand 	    frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
366*2348ac89SShailend Chand 
367*2348ac89SShailend Chand 	buf->num_nic_frags++;
368*2348ac89SShailend Chand 	gve_rx_advance_head_dqo(rx);
369*2348ac89SShailend Chand }
370*2348ac89SShailend Chand 
371*2348ac89SShailend Chand static void
372*2348ac89SShailend Chand gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
373*2348ac89SShailend Chand {
374*2348ac89SShailend Chand 	struct gve_rx_buf_dqo *hol_blocker = NULL;
375*2348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
376*2348ac89SShailend Chand 	u_int ref_count;
377*2348ac89SShailend Chand 	vm_page_t page;
378*2348ac89SShailend Chand 
379*2348ac89SShailend Chand 	while (true) {
380*2348ac89SShailend Chand 		buf = STAILQ_FIRST(&rx->dqo.used_bufs);
381*2348ac89SShailend Chand 		if (__predict_false(buf == NULL))
382*2348ac89SShailend Chand 			break;
383*2348ac89SShailend Chand 
384*2348ac89SShailend Chand 		page = rx->com.qpl->pages[buf - rx->dqo.bufs];
385*2348ac89SShailend Chand 		ref_count = atomic_load_int(&page->ref_count);
386*2348ac89SShailend Chand 
387*2348ac89SShailend Chand 		if (VPRC_WIRE_COUNT(ref_count) != 1) {
388*2348ac89SShailend Chand 			/* Account for one head-of-line blocker */
389*2348ac89SShailend Chand 			if (hol_blocker != NULL)
390*2348ac89SShailend Chand 				break;
391*2348ac89SShailend Chand 			hol_blocker = buf;
392*2348ac89SShailend Chand 			STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
393*2348ac89SShailend Chand 			    stailq_entry);
394*2348ac89SShailend Chand 			continue;
395*2348ac89SShailend Chand 		}
396*2348ac89SShailend Chand 
397*2348ac89SShailend Chand 		STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
398*2348ac89SShailend Chand 		    stailq_entry);
399*2348ac89SShailend Chand 		SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
400*2348ac89SShailend Chand 		    buf, slist_entry);
401*2348ac89SShailend Chand 		if (just_one)
402*2348ac89SShailend Chand 			break;
403*2348ac89SShailend Chand 	}
404*2348ac89SShailend Chand 
405*2348ac89SShailend Chand 	if (hol_blocker != NULL)
406*2348ac89SShailend Chand 		STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
407*2348ac89SShailend Chand 		    hol_blocker, stailq_entry);
408*2348ac89SShailend Chand }
409*2348ac89SShailend Chand 
410*2348ac89SShailend Chand static int
411*2348ac89SShailend Chand gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
412*2348ac89SShailend Chand {
413*2348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
414*2348ac89SShailend Chand 
415*2348ac89SShailend Chand 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
416*2348ac89SShailend Chand 	if (__predict_false(buf == NULL)) {
417*2348ac89SShailend Chand 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
418*2348ac89SShailend Chand 		buf = SLIST_FIRST(&rx->dqo.free_bufs);
419*2348ac89SShailend Chand 		if (__predict_false(buf == NULL))
420*2348ac89SShailend Chand 			return (ENOBUFS);
421*2348ac89SShailend Chand 	}
422*2348ac89SShailend Chand 
423*2348ac89SShailend Chand 	gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
424*2348ac89SShailend Chand 	if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
425*2348ac89SShailend Chand 		buf->next_idx = 0;
426*2348ac89SShailend Chand 	else
427*2348ac89SShailend Chand 		buf->next_idx++;
428*2348ac89SShailend Chand 
429*2348ac89SShailend Chand 	/*
430*2348ac89SShailend Chand 	 * We have posted all the frags in this buf to the NIC.
431*2348ac89SShailend Chand 	 * - buf will enter used_bufs once the last completion arrives.
432*2348ac89SShailend Chand 	 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
433*2348ac89SShailend Chand 	 *   when its wire count drops back to 1.
434*2348ac89SShailend Chand 	 */
435*2348ac89SShailend Chand 	if (buf->next_idx == 0)
436*2348ac89SShailend Chand 		SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
437*2348ac89SShailend Chand 	return (0);
438*2348ac89SShailend Chand }
439*2348ac89SShailend Chand 
440d438b4efSShailend Chand static void
441d438b4efSShailend Chand gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
442d438b4efSShailend Chand {
443d438b4efSShailend Chand 	uint32_t num_pending_bufs;
444d438b4efSShailend Chand 	uint32_t num_to_post;
445d438b4efSShailend Chand 	uint32_t i;
446d438b4efSShailend Chand 	int err;
447d438b4efSShailend Chand 
448d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
449d438b4efSShailend Chand 	num_to_post = rx->dqo.mask - num_pending_bufs;
450d438b4efSShailend Chand 
451d438b4efSShailend Chand 	for (i = 0; i < num_to_post; i++) {
452*2348ac89SShailend Chand 		if (gve_is_qpl(rx->com.priv))
453*2348ac89SShailend Chand 			err = gve_rx_post_new_dqo_qpl_buf(rx);
454*2348ac89SShailend Chand 		else
455d438b4efSShailend Chand 			err = gve_rx_post_new_mbuf_dqo(rx, how);
456d438b4efSShailend Chand 		if (err)
457d438b4efSShailend Chand 			break;
458d438b4efSShailend Chand 	}
459d438b4efSShailend Chand }
460d438b4efSShailend Chand 
461d438b4efSShailend Chand void
462d438b4efSShailend Chand gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
463d438b4efSShailend Chand {
464d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_WAITOK);
465d438b4efSShailend Chand }
466d438b4efSShailend Chand 
467d438b4efSShailend Chand static void
468d438b4efSShailend Chand gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
469d438b4efSShailend Chand {
470d438b4efSShailend Chand 	switch (ptype->l3_type) {
471d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV4:
472d438b4efSShailend Chand 		switch (ptype->l4_type) {
473d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
474d438b4efSShailend Chand 			*is_tcp = true;
475d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
476d438b4efSShailend Chand 			break;
477d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
478d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
479d438b4efSShailend Chand 			break;
480d438b4efSShailend Chand 		default:
481d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
482d438b4efSShailend Chand 		}
483d438b4efSShailend Chand 		break;
484d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV6:
485d438b4efSShailend Chand 		switch (ptype->l4_type) {
486d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
487d438b4efSShailend Chand 			*is_tcp = true;
488d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
489d438b4efSShailend Chand 			break;
490d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
491d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
492d438b4efSShailend Chand 			break;
493d438b4efSShailend Chand 		default:
494d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
495d438b4efSShailend Chand 		}
496d438b4efSShailend Chand 		break;
497d438b4efSShailend Chand 	default:
498d438b4efSShailend Chand 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
499d438b4efSShailend Chand 	}
500d438b4efSShailend Chand }
501d438b4efSShailend Chand 
502d438b4efSShailend Chand static void
503d438b4efSShailend Chand gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
504d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *desc,
505d438b4efSShailend Chand     struct gve_ptype *ptype)
506d438b4efSShailend Chand {
507d438b4efSShailend Chand 	/* HW did not identify and process L3 and L4 headers. */
508d438b4efSShailend Chand 	if (__predict_false(!desc->l3_l4_processed))
509d438b4efSShailend Chand 		return;
510d438b4efSShailend Chand 
511d438b4efSShailend Chand 	if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
512d438b4efSShailend Chand 		if (__predict_false(desc->csum_ip_err ||
513d438b4efSShailend Chand 		    desc->csum_external_ip_err))
514d438b4efSShailend Chand 			return;
515d438b4efSShailend Chand 	} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
516d438b4efSShailend Chand 		/* Checksum should be skipped if this flag is set. */
517d438b4efSShailend Chand 		if (__predict_false(desc->ipv6_ex_add))
518d438b4efSShailend Chand 			return;
519d438b4efSShailend Chand 	}
520d438b4efSShailend Chand 
521d438b4efSShailend Chand 	if (__predict_false(desc->csum_l4_err))
522d438b4efSShailend Chand 		return;
523d438b4efSShailend Chand 
524d438b4efSShailend Chand 	switch (ptype->l4_type) {
525d438b4efSShailend Chand 	case GVE_L4_TYPE_TCP:
526d438b4efSShailend Chand 	case GVE_L4_TYPE_UDP:
527d438b4efSShailend Chand 	case GVE_L4_TYPE_ICMP:
528d438b4efSShailend Chand 	case GVE_L4_TYPE_SCTP:
529d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
530d438b4efSShailend Chand 					    CSUM_IP_VALID |
531d438b4efSShailend Chand 					    CSUM_DATA_VALID |
532d438b4efSShailend Chand 					    CSUM_PSEUDO_HDR;
533d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_data = 0xffff;
534d438b4efSShailend Chand 		break;
535d438b4efSShailend Chand 	default:
536d438b4efSShailend Chand 		break;
537d438b4efSShailend Chand 	}
538d438b4efSShailend Chand }
539d438b4efSShailend Chand 
540d438b4efSShailend Chand static void
541d438b4efSShailend Chand gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
542d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc)
543d438b4efSShailend Chand {
544d438b4efSShailend Chand 	struct mbuf *mbuf = rx->ctx.mbuf_head;
545d438b4efSShailend Chand 	if_t ifp = rx->com.priv->ifp;
546d438b4efSShailend Chand 	struct gve_ptype *ptype;
547d438b4efSShailend Chand 	bool do_if_input = true;
548d438b4efSShailend Chand 	bool is_tcp = false;
549d438b4efSShailend Chand 
550d438b4efSShailend Chand 	ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
551d438b4efSShailend Chand 	gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
552d438b4efSShailend Chand 	mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
553d438b4efSShailend Chand 	gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
554d438b4efSShailend Chand 
555d438b4efSShailend Chand 	mbuf->m_pkthdr.rcvif = ifp;
556d438b4efSShailend Chand 	mbuf->m_pkthdr.len = rx->ctx.total_size;
557d438b4efSShailend Chand 
558d438b4efSShailend Chand 	if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
559d438b4efSShailend Chand 	    is_tcp &&
560d438b4efSShailend Chand 	    (rx->lro.lro_cnt != 0) &&
561d438b4efSShailend Chand 	    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
562d438b4efSShailend Chand 		do_if_input = false;
563d438b4efSShailend Chand 
564d438b4efSShailend Chand 	if (do_if_input)
565d438b4efSShailend Chand 		if_input(ifp, mbuf);
566d438b4efSShailend Chand 
567d438b4efSShailend Chand 	counter_enter();
568d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
569d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rpackets, 1);
570d438b4efSShailend Chand 	counter_exit();
571d438b4efSShailend Chand 
572d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
573d438b4efSShailend Chand }
574d438b4efSShailend Chand 
575d438b4efSShailend Chand static int
576*2348ac89SShailend Chand gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
577d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
578d438b4efSShailend Chand {
579d438b4efSShailend Chand 	struct mbuf *mbuf;
580d438b4efSShailend Chand 
581d438b4efSShailend Chand 	mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
582d438b4efSShailend Chand 	if (__predict_false(mbuf == NULL))
583d438b4efSShailend Chand 		return (ENOMEM);
584d438b4efSShailend Chand 
585d438b4efSShailend Chand 	counter_enter();
586d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
587d438b4efSShailend Chand 	counter_exit();
588d438b4efSShailend Chand 
589*2348ac89SShailend Chand 	m_copyback(mbuf, 0, frag_len, va);
590d438b4efSShailend Chand 	mbuf->m_len = frag_len;
591d438b4efSShailend Chand 
592d438b4efSShailend Chand 	rx->ctx.mbuf_head = mbuf;
593d438b4efSShailend Chand 	rx->ctx.mbuf_tail = mbuf;
594d438b4efSShailend Chand 	rx->ctx.total_size += frag_len;
595d438b4efSShailend Chand 
596d438b4efSShailend Chand 	gve_rx_input_mbuf_dqo(rx, compl_desc);
597d438b4efSShailend Chand 	return (0);
598d438b4efSShailend Chand }
599d438b4efSShailend Chand 
600d438b4efSShailend Chand static void
601d438b4efSShailend Chand gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
602d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc,
603d438b4efSShailend Chand     int *work_done)
604d438b4efSShailend Chand {
605d438b4efSShailend Chand 	bool is_last_frag = compl_desc->end_of_packet != 0;
606d438b4efSShailend Chand 	struct gve_rx_ctx *ctx = &rx->ctx;
607d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
608d438b4efSShailend Chand 	uint32_t num_pending_bufs;
609d438b4efSShailend Chand 	uint16_t frag_len;
610d438b4efSShailend Chand 	uint16_t buf_id;
611d438b4efSShailend Chand 	int err;
612d438b4efSShailend Chand 
613d438b4efSShailend Chand 	buf_id = le16toh(compl_desc->buf_id);
614d438b4efSShailend Chand 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
615d438b4efSShailend Chand 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
616d438b4efSShailend Chand 		    buf_id, rx->com.id);
617d438b4efSShailend Chand 		gve_schedule_reset(priv);
618d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
619d438b4efSShailend Chand 	}
620d438b4efSShailend Chand 	buf = &rx->dqo.bufs[buf_id];
621d438b4efSShailend Chand 	if (__predict_false(buf->mbuf == NULL)) {
622d438b4efSShailend Chand 		device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
623d438b4efSShailend Chand 		    buf_id, rx->com.id);
624d438b4efSShailend Chand 		gve_schedule_reset(priv);
625d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
626d438b4efSShailend Chand 	}
627d438b4efSShailend Chand 
628d438b4efSShailend Chand 	if (__predict_false(ctx->drop_pkt))
629d438b4efSShailend Chand 		goto drop_frag;
630d438b4efSShailend Chand 
631d438b4efSShailend Chand 	if (__predict_false(compl_desc->rx_error)) {
632d438b4efSShailend Chand 		counter_enter();
633d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
634d438b4efSShailend Chand 		counter_exit();
635d438b4efSShailend Chand 		goto drop_frag;
636d438b4efSShailend Chand 	}
637d438b4efSShailend Chand 
638d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
639d438b4efSShailend Chand 	    BUS_DMASYNC_POSTREAD);
640d438b4efSShailend Chand 
641d438b4efSShailend Chand 	frag_len = compl_desc->packet_len;
642d438b4efSShailend Chand 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
643*2348ac89SShailend Chand 		err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
644*2348ac89SShailend Chand 		    compl_desc, frag_len);
645d438b4efSShailend Chand 		if (__predict_false(err != 0))
646d438b4efSShailend Chand 			goto drop_frag;
647d438b4efSShailend Chand 		(*work_done)++;
648*2348ac89SShailend Chand 		gve_rx_post_buf_dqo(rx, buf);
649d438b4efSShailend Chand 		return;
650d438b4efSShailend Chand 	}
651d438b4efSShailend Chand 
652d438b4efSShailend Chand 	/*
653d438b4efSShailend Chand 	 * Although buffer completions may arrive out of order, buffer
654d438b4efSShailend Chand 	 * descriptors are consumed by the NIC in order. That is, the
655d438b4efSShailend Chand 	 * buffer at desc_ring[tail] might not be the buffer we got the
656d438b4efSShailend Chand 	 * completion compl_ring[tail] for: but we know that desc_ring[tail]
657d438b4efSShailend Chand 	 * has already been read by the NIC.
658d438b4efSShailend Chand 	 */
659d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
660d438b4efSShailend Chand 
661d438b4efSShailend Chand 	/*
662d438b4efSShailend Chand 	 * For every fragment received, try to post a new buffer.
663d438b4efSShailend Chand 	 *
664d438b4efSShailend Chand 	 * Failures are okay but only so long as the number of outstanding
665d438b4efSShailend Chand 	 * buffers is above a threshold.
666d438b4efSShailend Chand 	 *
667d438b4efSShailend Chand 	 * Beyond that we drop new packets to reuse their buffers.
668d438b4efSShailend Chand 	 * Without ensuring a minimum number of buffers for the NIC to
669d438b4efSShailend Chand 	 * put packets in, we run the risk of getting the queue stuck
670d438b4efSShailend Chand 	 * for good.
671d438b4efSShailend Chand 	 */
672d438b4efSShailend Chand 	err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
673d438b4efSShailend Chand 	if (__predict_false(err != 0 &&
674d438b4efSShailend Chand 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
675d438b4efSShailend Chand 		counter_enter();
676d438b4efSShailend Chand 		counter_u64_add_protected(
677d438b4efSShailend Chand 		    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
678d438b4efSShailend Chand 		counter_exit();
679d438b4efSShailend Chand 		goto drop_frag;
680d438b4efSShailend Chand 	}
681d438b4efSShailend Chand 
682d438b4efSShailend Chand 	buf->mbuf->m_len = frag_len;
683d438b4efSShailend Chand 	ctx->total_size += frag_len;
684d438b4efSShailend Chand 	if (ctx->mbuf_tail == NULL) {
685d438b4efSShailend Chand 		ctx->mbuf_head = buf->mbuf;
686d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
687d438b4efSShailend Chand 	} else {
688d438b4efSShailend Chand 		buf->mbuf->m_flags &= ~M_PKTHDR;
689d438b4efSShailend Chand 		ctx->mbuf_tail->m_next = buf->mbuf;
690d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
691d438b4efSShailend Chand 	}
692d438b4efSShailend Chand 
693d438b4efSShailend Chand 	/*
694d438b4efSShailend Chand 	 * Disassociate the mbuf from buf and surrender buf to the free list to
695d438b4efSShailend Chand 	 * be used by a future mbuf.
696d438b4efSShailend Chand 	 */
697d438b4efSShailend Chand 	bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
698d438b4efSShailend Chand 	buf->mbuf = NULL;
699d438b4efSShailend Chand 	buf->addr = 0;
700d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
701d438b4efSShailend Chand 
702d438b4efSShailend Chand 	if (is_last_frag) {
703d438b4efSShailend Chand 		gve_rx_input_mbuf_dqo(rx, compl_desc);
704d438b4efSShailend Chand 		(*work_done)++;
705d438b4efSShailend Chand 	}
706d438b4efSShailend Chand 	return;
707d438b4efSShailend Chand 
708d438b4efSShailend Chand drop_frag:
709d438b4efSShailend Chand 	/* Clear the earlier frags if there were any */
710d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
711d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
712d438b4efSShailend Chand 	/* Drop the rest of the pkt if there are more frags */
713d438b4efSShailend Chand 	ctx->drop_pkt = true;
714d438b4efSShailend Chand 	/* Reuse the dropped frag's buffer */
715d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
716d438b4efSShailend Chand 
717d438b4efSShailend Chand 	if (is_last_frag)
718d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
719d438b4efSShailend Chand 	return;
720d438b4efSShailend Chand 
721d438b4efSShailend Chand drop_frag_clear_ctx:
722d438b4efSShailend Chand 	counter_enter();
723d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
724d438b4efSShailend Chand 	counter_exit();
725d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
726d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
727d438b4efSShailend Chand }
728d438b4efSShailend Chand 
729*2348ac89SShailend Chand static void *
730*2348ac89SShailend Chand gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
731*2348ac89SShailend Chand     struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
732*2348ac89SShailend Chand {
733*2348ac89SShailend Chand 	int page_idx = buf - rx->dqo.bufs;
734*2348ac89SShailend Chand 	void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
735*2348ac89SShailend Chand 
736*2348ac89SShailend Chand 	va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
737*2348ac89SShailend Chand 	return (va);
738*2348ac89SShailend Chand }
739*2348ac89SShailend Chand 
740*2348ac89SShailend Chand static int
741*2348ac89SShailend Chand gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
742*2348ac89SShailend Chand     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
743*2348ac89SShailend Chand     uint8_t buf_frag_num, uint16_t frag_len)
744*2348ac89SShailend Chand {
745*2348ac89SShailend Chand 	void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
746*2348ac89SShailend Chand 	struct mbuf *mbuf;
747*2348ac89SShailend Chand 
748*2348ac89SShailend Chand 	if (ctx->mbuf_tail == NULL) {
749*2348ac89SShailend Chand 		mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
750*2348ac89SShailend Chand 		if (mbuf == NULL)
751*2348ac89SShailend Chand 			return (ENOMEM);
752*2348ac89SShailend Chand 		ctx->mbuf_head = mbuf;
753*2348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
754*2348ac89SShailend Chand 	} else {
755*2348ac89SShailend Chand 		mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
756*2348ac89SShailend Chand 		if (mbuf == NULL)
757*2348ac89SShailend Chand 			return (ENOMEM);
758*2348ac89SShailend Chand 		ctx->mbuf_tail->m_next = mbuf;
759*2348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
760*2348ac89SShailend Chand 	}
761*2348ac89SShailend Chand 
762*2348ac89SShailend Chand 	mbuf->m_len = frag_len;
763*2348ac89SShailend Chand 	ctx->total_size += frag_len;
764*2348ac89SShailend Chand 
765*2348ac89SShailend Chand 	m_copyback(mbuf, 0, frag_len, va);
766*2348ac89SShailend Chand 	counter_enter();
767*2348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
768*2348ac89SShailend Chand 	counter_exit();
769*2348ac89SShailend Chand 	return (0);
770*2348ac89SShailend Chand }
771*2348ac89SShailend Chand 
772*2348ac89SShailend Chand static int
773*2348ac89SShailend Chand gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
774*2348ac89SShailend Chand     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
775*2348ac89SShailend Chand     uint8_t buf_frag_num, uint16_t frag_len)
776*2348ac89SShailend Chand {
777*2348ac89SShailend Chand 	struct mbuf *mbuf;
778*2348ac89SShailend Chand 	void *page_addr;
779*2348ac89SShailend Chand 	vm_page_t page;
780*2348ac89SShailend Chand 	int page_idx;
781*2348ac89SShailend Chand 	void *va;
782*2348ac89SShailend Chand 
783*2348ac89SShailend Chand 	if (ctx->mbuf_tail == NULL) {
784*2348ac89SShailend Chand 		mbuf = m_gethdr(M_NOWAIT, MT_DATA);
785*2348ac89SShailend Chand 		if (mbuf == NULL)
786*2348ac89SShailend Chand 			return (ENOMEM);
787*2348ac89SShailend Chand 		ctx->mbuf_head = mbuf;
788*2348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
789*2348ac89SShailend Chand 	} else {
790*2348ac89SShailend Chand 		mbuf = m_get(M_NOWAIT, MT_DATA);
791*2348ac89SShailend Chand 		if (mbuf == NULL)
792*2348ac89SShailend Chand 			return (ENOMEM);
793*2348ac89SShailend Chand 		ctx->mbuf_tail->m_next = mbuf;
794*2348ac89SShailend Chand 		ctx->mbuf_tail = mbuf;
795*2348ac89SShailend Chand 	}
796*2348ac89SShailend Chand 
797*2348ac89SShailend Chand 	mbuf->m_len = frag_len;
798*2348ac89SShailend Chand 	ctx->total_size += frag_len;
799*2348ac89SShailend Chand 
800*2348ac89SShailend Chand 	page_idx = buf - rx->dqo.bufs;
801*2348ac89SShailend Chand 	page = rx->com.qpl->pages[page_idx];
802*2348ac89SShailend Chand 	page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
803*2348ac89SShailend Chand 	va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
804*2348ac89SShailend Chand 
805*2348ac89SShailend Chand 	/*
806*2348ac89SShailend Chand 	 * Grab an extra ref to the page so that gve_mextadd_free
807*2348ac89SShailend Chand 	 * does not end up freeing the page while the interface exists.
808*2348ac89SShailend Chand 	 */
809*2348ac89SShailend Chand 	vm_page_wire(page);
810*2348ac89SShailend Chand 
811*2348ac89SShailend Chand 	counter_enter();
812*2348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
813*2348ac89SShailend Chand 	counter_exit();
814*2348ac89SShailend Chand 
815*2348ac89SShailend Chand 	MEXTADD(mbuf, va, frag_len,
816*2348ac89SShailend Chand 	    gve_mextadd_free, page, page_addr,
817*2348ac89SShailend Chand 	    0, EXT_NET_DRV);
818*2348ac89SShailend Chand 	return (0);
819*2348ac89SShailend Chand }
820*2348ac89SShailend Chand 
821*2348ac89SShailend Chand static void
822*2348ac89SShailend Chand gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
823*2348ac89SShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc,
824*2348ac89SShailend Chand     int *work_done)
825*2348ac89SShailend Chand {
826*2348ac89SShailend Chand 	bool is_last_frag = compl_desc->end_of_packet != 0;
827*2348ac89SShailend Chand 	union gve_rx_qpl_buf_id_dqo composed_id;
828*2348ac89SShailend Chand 	struct gve_dma_handle *page_dma_handle;
829*2348ac89SShailend Chand 	struct gve_rx_ctx *ctx = &rx->ctx;
830*2348ac89SShailend Chand 	struct gve_rx_buf_dqo *buf;
831*2348ac89SShailend Chand 	uint32_t num_pending_bufs;
832*2348ac89SShailend Chand 	uint8_t buf_frag_num;
833*2348ac89SShailend Chand 	uint16_t frag_len;
834*2348ac89SShailend Chand 	uint16_t buf_id;
835*2348ac89SShailend Chand 	int err;
836*2348ac89SShailend Chand 
837*2348ac89SShailend Chand 	composed_id.all = le16toh(compl_desc->buf_id);
838*2348ac89SShailend Chand 	buf_id = composed_id.buf_id;
839*2348ac89SShailend Chand 	buf_frag_num = composed_id.frag_num;
840*2348ac89SShailend Chand 
841*2348ac89SShailend Chand 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
842*2348ac89SShailend Chand 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
843*2348ac89SShailend Chand 		    buf_id, rx->com.id);
844*2348ac89SShailend Chand 		gve_schedule_reset(priv);
845*2348ac89SShailend Chand 		goto drop_frag_clear_ctx;
846*2348ac89SShailend Chand 	}
847*2348ac89SShailend Chand 	buf = &rx->dqo.bufs[buf_id];
848*2348ac89SShailend Chand 	if (__predict_false(buf->num_nic_frags == 0 ||
849*2348ac89SShailend Chand 	    buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
850*2348ac89SShailend Chand 		device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
851*2348ac89SShailend Chand 		    "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
852*2348ac89SShailend Chand 		    buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
853*2348ac89SShailend Chand 		gve_schedule_reset(priv);
854*2348ac89SShailend Chand 		goto drop_frag_clear_ctx;
855*2348ac89SShailend Chand 	}
856*2348ac89SShailend Chand 
857*2348ac89SShailend Chand 	buf->num_nic_frags--;
858*2348ac89SShailend Chand 
859*2348ac89SShailend Chand 	if (__predict_false(ctx->drop_pkt))
860*2348ac89SShailend Chand 		goto drop_frag;
861*2348ac89SShailend Chand 
862*2348ac89SShailend Chand 	if (__predict_false(compl_desc->rx_error)) {
863*2348ac89SShailend Chand 		counter_enter();
864*2348ac89SShailend Chand 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
865*2348ac89SShailend Chand 		counter_exit();
866*2348ac89SShailend Chand 		goto drop_frag;
867*2348ac89SShailend Chand 	}
868*2348ac89SShailend Chand 
869*2348ac89SShailend Chand 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
870*2348ac89SShailend Chand 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
871*2348ac89SShailend Chand 	    BUS_DMASYNC_POSTREAD);
872*2348ac89SShailend Chand 
873*2348ac89SShailend Chand 	frag_len = compl_desc->packet_len;
874*2348ac89SShailend Chand 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
875*2348ac89SShailend Chand 		void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
876*2348ac89SShailend Chand 
877*2348ac89SShailend Chand 		err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
878*2348ac89SShailend Chand 		if (__predict_false(err != 0))
879*2348ac89SShailend Chand 			goto drop_frag;
880*2348ac89SShailend Chand 		(*work_done)++;
881*2348ac89SShailend Chand 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
882*2348ac89SShailend Chand 		return;
883*2348ac89SShailend Chand 	}
884*2348ac89SShailend Chand 
885*2348ac89SShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
886*2348ac89SShailend Chand 	err = gve_rx_post_new_dqo_qpl_buf(rx);
887*2348ac89SShailend Chand 	if (__predict_false(err != 0 &&
888*2348ac89SShailend Chand 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
889*2348ac89SShailend Chand 		/*
890*2348ac89SShailend Chand 		 * Resort to copying this fragment into a cluster mbuf
891*2348ac89SShailend Chand 		 * when the above threshold is breached and repost the
892*2348ac89SShailend Chand 		 * incoming buffer. If we cannot find cluster mbufs,
893*2348ac89SShailend Chand 		 * just drop the packet (to repost its buffer).
894*2348ac89SShailend Chand 		 */
895*2348ac89SShailend Chand 		err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
896*2348ac89SShailend Chand 		    buf_frag_num, frag_len);
897*2348ac89SShailend Chand 		if (err != 0) {
898*2348ac89SShailend Chand 			counter_enter();
899*2348ac89SShailend Chand 			counter_u64_add_protected(
900*2348ac89SShailend Chand 			    rx->stats.rx_dropped_pkt_buf_post_fail, 1);
901*2348ac89SShailend Chand 			counter_exit();
902*2348ac89SShailend Chand 			goto drop_frag;
903*2348ac89SShailend Chand 		}
904*2348ac89SShailend Chand 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
905*2348ac89SShailend Chand 	} else {
906*2348ac89SShailend Chand 		err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
907*2348ac89SShailend Chand 		    buf_frag_num, frag_len);
908*2348ac89SShailend Chand 		if (__predict_false(err != 0)) {
909*2348ac89SShailend Chand 			counter_enter();
910*2348ac89SShailend Chand 			counter_u64_add_protected(
911*2348ac89SShailend Chand 			    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
912*2348ac89SShailend Chand 			counter_exit();
913*2348ac89SShailend Chand 			goto drop_frag;
914*2348ac89SShailend Chand 		}
915*2348ac89SShailend Chand 	}
916*2348ac89SShailend Chand 
917*2348ac89SShailend Chand 	/*
918*2348ac89SShailend Chand 	 * Both the counts need to be checked.
919*2348ac89SShailend Chand 	 *
920*2348ac89SShailend Chand 	 * num_nic_frags == 0 implies no pending completions
921*2348ac89SShailend Chand 	 * but not all frags may have yet been posted.
922*2348ac89SShailend Chand 	 *
923*2348ac89SShailend Chand 	 * next_idx == 0 implies all frags have been posted
924*2348ac89SShailend Chand 	 * but there might be pending completions.
925*2348ac89SShailend Chand 	 */
926*2348ac89SShailend Chand 	if (buf->num_nic_frags == 0 && buf->next_idx == 0)
927*2348ac89SShailend Chand 		STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
928*2348ac89SShailend Chand 
929*2348ac89SShailend Chand 	if (is_last_frag) {
930*2348ac89SShailend Chand 		gve_rx_input_mbuf_dqo(rx, compl_desc);
931*2348ac89SShailend Chand 		(*work_done)++;
932*2348ac89SShailend Chand 	}
933*2348ac89SShailend Chand 	return;
934*2348ac89SShailend Chand 
935*2348ac89SShailend Chand drop_frag:
936*2348ac89SShailend Chand 	/* Clear the earlier frags if there were any */
937*2348ac89SShailend Chand 	m_freem(ctx->mbuf_head);
938*2348ac89SShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
939*2348ac89SShailend Chand 	/* Drop the rest of the pkt if there are more frags */
940*2348ac89SShailend Chand 	ctx->drop_pkt = true;
941*2348ac89SShailend Chand 	/* Reuse the dropped frag's buffer */
942*2348ac89SShailend Chand 	gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
943*2348ac89SShailend Chand 
944*2348ac89SShailend Chand 	if (is_last_frag)
945*2348ac89SShailend Chand 		goto drop_frag_clear_ctx;
946*2348ac89SShailend Chand 	return;
947*2348ac89SShailend Chand 
948*2348ac89SShailend Chand drop_frag_clear_ctx:
949*2348ac89SShailend Chand 	counter_enter();
950*2348ac89SShailend Chand 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
951*2348ac89SShailend Chand 	counter_exit();
952*2348ac89SShailend Chand 	m_freem(ctx->mbuf_head);
953*2348ac89SShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
954*2348ac89SShailend Chand }
955*2348ac89SShailend Chand 
956d438b4efSShailend Chand static bool
957d438b4efSShailend Chand gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
958d438b4efSShailend Chand {
959d438b4efSShailend Chand 	struct gve_rx_compl_desc_dqo *compl_desc;
960d438b4efSShailend Chand 	uint32_t work_done = 0;
961d438b4efSShailend Chand 
962d438b4efSShailend Chand 	NET_EPOCH_ASSERT();
963d438b4efSShailend Chand 
964d438b4efSShailend Chand 	while (work_done < budget) {
965d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
966d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
967d438b4efSShailend Chand 
968d438b4efSShailend Chand 		compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
969d438b4efSShailend Chand 		if (compl_desc->generation == rx->dqo.cur_gen_bit)
970d438b4efSShailend Chand 			break;
971d438b4efSShailend Chand 		/*
972d438b4efSShailend Chand 		 * Prevent generation bit from being read after the rest of the
973d438b4efSShailend Chand 		 * descriptor.
974d438b4efSShailend Chand 		 */
975d438b4efSShailend Chand 		rmb();
976d438b4efSShailend Chand 
977d438b4efSShailend Chand 		rx->cnt++;
978d438b4efSShailend Chand 		rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
979d438b4efSShailend Chand 		rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
980d438b4efSShailend Chand 
981*2348ac89SShailend Chand 		if (gve_is_qpl(priv))
982*2348ac89SShailend Chand 			gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
983*2348ac89SShailend Chand 		else
984d438b4efSShailend Chand 			gve_rx_dqo(priv, rx, compl_desc, &work_done);
985d438b4efSShailend Chand 	}
986d438b4efSShailend Chand 
987d438b4efSShailend Chand 	if (work_done != 0)
988d438b4efSShailend Chand 		tcp_lro_flush_all(&rx->lro);
989d438b4efSShailend Chand 
990d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_NOWAIT);
991*2348ac89SShailend Chand 	if (gve_is_qpl(priv))
992*2348ac89SShailend Chand 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
993d438b4efSShailend Chand 	return (work_done == budget);
994d438b4efSShailend Chand }
995d438b4efSShailend Chand 
996d438b4efSShailend Chand void
997d438b4efSShailend Chand gve_rx_cleanup_tq_dqo(void *arg, int pending)
998d438b4efSShailend Chand {
999d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
1000d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
1001d438b4efSShailend Chand 
1002d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1003d438b4efSShailend Chand 		return;
1004d438b4efSShailend Chand 
1005d438b4efSShailend Chand 	if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
1006d438b4efSShailend Chand 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
1007d438b4efSShailend Chand 		return;
1008d438b4efSShailend Chand 	}
1009d438b4efSShailend Chand 
1010d438b4efSShailend Chand 	gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
1011d438b4efSShailend Chand 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1012d438b4efSShailend Chand }
1013