xref: /freebsd/sys/dev/gve/gve_rx_dqo.c (revision d438b4ef0cfc6986b93d0754f49ebf3ead50f269)
1*d438b4efSShailend Chand /*-
2*d438b4efSShailend Chand  * SPDX-License-Identifier: BSD-3-Clause
3*d438b4efSShailend Chand  *
4*d438b4efSShailend Chand  * Copyright (c) 2024 Google LLC
5*d438b4efSShailend Chand  *
6*d438b4efSShailend Chand  * Redistribution and use in source and binary forms, with or without modification,
7*d438b4efSShailend Chand  * are permitted provided that the following conditions are met:
8*d438b4efSShailend Chand  *
9*d438b4efSShailend Chand  * 1. Redistributions of source code must retain the above copyright notice, this
10*d438b4efSShailend Chand  *    list of conditions and the following disclaimer.
11*d438b4efSShailend Chand  *
12*d438b4efSShailend Chand  * 2. Redistributions in binary form must reproduce the above copyright notice,
13*d438b4efSShailend Chand  *    this list of conditions and the following disclaimer in the documentation
14*d438b4efSShailend Chand  *    and/or other materials provided with the distribution.
15*d438b4efSShailend Chand  *
16*d438b4efSShailend Chand  * 3. Neither the name of the copyright holder nor the names of its contributors
17*d438b4efSShailend Chand  *    may be used to endorse or promote products derived from this software without
18*d438b4efSShailend Chand  *    specific prior written permission.
19*d438b4efSShailend Chand  *
20*d438b4efSShailend Chand  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21*d438b4efSShailend Chand  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22*d438b4efSShailend Chand  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23*d438b4efSShailend Chand  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24*d438b4efSShailend Chand  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25*d438b4efSShailend Chand  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26*d438b4efSShailend Chand  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27*d438b4efSShailend Chand  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*d438b4efSShailend Chand  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29*d438b4efSShailend Chand  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*d438b4efSShailend Chand  */
31*d438b4efSShailend Chand #include "gve.h"
32*d438b4efSShailend Chand #include "gve_adminq.h"
33*d438b4efSShailend Chand #include "gve_dqo.h"
34*d438b4efSShailend Chand 
35*d438b4efSShailend Chand static void
36*d438b4efSShailend Chand gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37*d438b4efSShailend Chand {
38*d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
39*d438b4efSShailend Chand 	int i;
40*d438b4efSShailend Chand 
41*d438b4efSShailend Chand 	for (i = 0; i < rx->dqo.buf_cnt; i++) {
42*d438b4efSShailend Chand 		buf = &rx->dqo.bufs[i];
43*d438b4efSShailend Chand 		if (!buf->mbuf)
44*d438b4efSShailend Chand 			continue;
45*d438b4efSShailend Chand 
46*d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
47*d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
48*d438b4efSShailend Chand 		bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
49*d438b4efSShailend Chand 		m_freem(buf->mbuf);
50*d438b4efSShailend Chand 		buf->mbuf = NULL;
51*d438b4efSShailend Chand 	}
52*d438b4efSShailend Chand }
53*d438b4efSShailend Chand 
54*d438b4efSShailend Chand void
55*d438b4efSShailend Chand gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
56*d438b4efSShailend Chand {
57*d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
58*d438b4efSShailend Chand 	int j;
59*d438b4efSShailend Chand 
60*d438b4efSShailend Chand 	if (rx->dqo.compl_ring != NULL) {
61*d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
62*d438b4efSShailend Chand 		rx->dqo.compl_ring = NULL;
63*d438b4efSShailend Chand 	}
64*d438b4efSShailend Chand 
65*d438b4efSShailend Chand 	if (rx->dqo.desc_ring != NULL) {
66*d438b4efSShailend Chand 		gve_dma_free_coherent(&rx->desc_ring_mem);
67*d438b4efSShailend Chand 		rx->dqo.desc_ring = NULL;
68*d438b4efSShailend Chand 	}
69*d438b4efSShailend Chand 
70*d438b4efSShailend Chand 	if (rx->dqo.bufs != NULL) {
71*d438b4efSShailend Chand 		gve_free_rx_mbufs_dqo(rx);
72*d438b4efSShailend Chand 
73*d438b4efSShailend Chand 		if (rx->dqo.buf_dmatag) {
74*d438b4efSShailend Chand 			for (j = 0; j < rx->dqo.buf_cnt; j++)
75*d438b4efSShailend Chand 				if (rx->dqo.bufs[j].mapped)
76*d438b4efSShailend Chand 					bus_dmamap_destroy(rx->dqo.buf_dmatag,
77*d438b4efSShailend Chand 					    rx->dqo.bufs[j].dmamap);
78*d438b4efSShailend Chand 		}
79*d438b4efSShailend Chand 
80*d438b4efSShailend Chand 		free(rx->dqo.bufs, M_GVE);
81*d438b4efSShailend Chand 		rx->dqo.bufs = NULL;
82*d438b4efSShailend Chand 	}
83*d438b4efSShailend Chand 
84*d438b4efSShailend Chand 	if (rx->dqo.buf_dmatag)
85*d438b4efSShailend Chand 		bus_dma_tag_destroy(rx->dqo.buf_dmatag);
86*d438b4efSShailend Chand }
87*d438b4efSShailend Chand 
88*d438b4efSShailend Chand int
89*d438b4efSShailend Chand gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
90*d438b4efSShailend Chand {
91*d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
92*d438b4efSShailend Chand 	int err;
93*d438b4efSShailend Chand 	int j;
94*d438b4efSShailend Chand 
95*d438b4efSShailend Chand 	err = gve_dma_alloc_coherent(priv,
96*d438b4efSShailend Chand 	    sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
97*d438b4efSShailend Chand 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
98*d438b4efSShailend Chand 	if (err != 0) {
99*d438b4efSShailend Chand 		device_printf(priv->dev,
100*d438b4efSShailend Chand 		    "Failed to alloc desc ring for rx ring %d", i);
101*d438b4efSShailend Chand 		goto abort;
102*d438b4efSShailend Chand 	}
103*d438b4efSShailend Chand 	rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
104*d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
105*d438b4efSShailend Chand 
106*d438b4efSShailend Chand 	err = bus_dma_tag_create(
107*d438b4efSShailend Chand 	    bus_get_dma_tag(priv->dev),	/* parent */
108*d438b4efSShailend Chand 	    1, 0,			/* alignment, bounds */
109*d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* lowaddr */
110*d438b4efSShailend Chand 	    BUS_SPACE_MAXADDR,		/* highaddr */
111*d438b4efSShailend Chand 	    NULL, NULL,			/* filter, filterarg */
112*d438b4efSShailend Chand 	    MCLBYTES,			/* maxsize */
113*d438b4efSShailend Chand 	    1,				/* nsegments */
114*d438b4efSShailend Chand 	    MCLBYTES,			/* maxsegsize */
115*d438b4efSShailend Chand 	    0,				/* flags */
116*d438b4efSShailend Chand 	    NULL,			/* lockfunc */
117*d438b4efSShailend Chand 	    NULL,			/* lockarg */
118*d438b4efSShailend Chand 	    &rx->dqo.buf_dmatag);
119*d438b4efSShailend Chand 	if (err != 0) {
120*d438b4efSShailend Chand 		device_printf(priv->dev,
121*d438b4efSShailend Chand 		    "%s: bus_dma_tag_create failed: %d\n",
122*d438b4efSShailend Chand 		    __func__, err);
123*d438b4efSShailend Chand 		goto abort;
124*d438b4efSShailend Chand 	}
125*d438b4efSShailend Chand 
126*d438b4efSShailend Chand 	rx->dqo.buf_cnt = priv->rx_desc_cnt;
127*d438b4efSShailend Chand 	rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
128*d438b4efSShailend Chand 	    M_GVE, M_WAITOK | M_ZERO);
129*d438b4efSShailend Chand 	for (j = 0; j < rx->dqo.buf_cnt; j++) {
130*d438b4efSShailend Chand 		err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
131*d438b4efSShailend Chand 		    &rx->dqo.bufs[j].dmamap);
132*d438b4efSShailend Chand 		if (err != 0) {
133*d438b4efSShailend Chand 			device_printf(priv->dev,
134*d438b4efSShailend Chand 			    "err in creating rx buf dmamap %d: %d",
135*d438b4efSShailend Chand 			    j, err);
136*d438b4efSShailend Chand 			goto abort;
137*d438b4efSShailend Chand 		}
138*d438b4efSShailend Chand 		rx->dqo.bufs[j].mapped = true;
139*d438b4efSShailend Chand 	}
140*d438b4efSShailend Chand 
141*d438b4efSShailend Chand 	err = gve_dma_alloc_coherent(priv,
142*d438b4efSShailend Chand 	    sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
143*d438b4efSShailend Chand 	    CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
144*d438b4efSShailend Chand 	if (err != 0) {
145*d438b4efSShailend Chand 		device_printf(priv->dev,
146*d438b4efSShailend Chand 		    "Failed to alloc compl ring for rx ring %d", i);
147*d438b4efSShailend Chand 		goto abort;
148*d438b4efSShailend Chand 	}
149*d438b4efSShailend Chand 	rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
150*d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
151*d438b4efSShailend Chand 
152*d438b4efSShailend Chand 	return (0);
153*d438b4efSShailend Chand 
154*d438b4efSShailend Chand abort:
155*d438b4efSShailend Chand 	gve_rx_free_ring_dqo(priv, i);
156*d438b4efSShailend Chand 	return (err);
157*d438b4efSShailend Chand }
158*d438b4efSShailend Chand 
159*d438b4efSShailend Chand static void
160*d438b4efSShailend Chand gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
161*d438b4efSShailend Chand {
162*d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
163*d438b4efSShailend Chand 	int entries;
164*d438b4efSShailend Chand 	int i;
165*d438b4efSShailend Chand 
166*d438b4efSShailend Chand 	entries = com->priv->rx_desc_cnt;
167*d438b4efSShailend Chand 	for (i = 0; i < entries; i++)
168*d438b4efSShailend Chand 		rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
169*d438b4efSShailend Chand 
170*d438b4efSShailend Chand 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
171*d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
172*d438b4efSShailend Chand }
173*d438b4efSShailend Chand 
174*d438b4efSShailend Chand static void
175*d438b4efSShailend Chand gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
176*d438b4efSShailend Chand {
177*d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
178*d438b4efSShailend Chand 	int i;
179*d438b4efSShailend Chand 
180*d438b4efSShailend Chand 	for (i = 0; i < com->priv->rx_desc_cnt; i++)
181*d438b4efSShailend Chand 		rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
182*d438b4efSShailend Chand 
183*d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
184*d438b4efSShailend Chand 	    BUS_DMASYNC_PREWRITE);
185*d438b4efSShailend Chand }
186*d438b4efSShailend Chand 
187*d438b4efSShailend Chand void
188*d438b4efSShailend Chand gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
189*d438b4efSShailend Chand {
190*d438b4efSShailend Chand 	struct gve_rx_ring *rx = &priv->rx[i];
191*d438b4efSShailend Chand 	int j;
192*d438b4efSShailend Chand 
193*d438b4efSShailend Chand 	rx->fill_cnt = 0;
194*d438b4efSShailend Chand 	rx->cnt = 0;
195*d438b4efSShailend Chand 	rx->dqo.mask = priv->rx_desc_cnt - 1;
196*d438b4efSShailend Chand 	rx->dqo.head = 0;
197*d438b4efSShailend Chand 	rx->dqo.tail = 0;
198*d438b4efSShailend Chand 	rx->dqo.cur_gen_bit = 0;
199*d438b4efSShailend Chand 
200*d438b4efSShailend Chand 	gve_rx_clear_desc_ring_dqo(rx);
201*d438b4efSShailend Chand 	gve_rx_clear_compl_ring_dqo(rx);
202*d438b4efSShailend Chand 
203*d438b4efSShailend Chand 	gve_free_rx_mbufs_dqo(rx);
204*d438b4efSShailend Chand 
205*d438b4efSShailend Chand 	SLIST_INIT(&rx->dqo.free_bufs);
206*d438b4efSShailend Chand 	for (j = 0; j < rx->dqo.buf_cnt; j++)
207*d438b4efSShailend Chand 		SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
208*d438b4efSShailend Chand 		    &rx->dqo.bufs[j], slist_entry);
209*d438b4efSShailend Chand }
210*d438b4efSShailend Chand 
211*d438b4efSShailend Chand int
212*d438b4efSShailend Chand gve_rx_intr_dqo(void *arg)
213*d438b4efSShailend Chand {
214*d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
215*d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
216*d438b4efSShailend Chand 	struct gve_ring_com *com = &rx->com;
217*d438b4efSShailend Chand 
218*d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
219*d438b4efSShailend Chand 		return (FILTER_STRAY);
220*d438b4efSShailend Chand 
221*d438b4efSShailend Chand 	/* Interrupts are automatically masked */
222*d438b4efSShailend Chand 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
223*d438b4efSShailend Chand 	return (FILTER_HANDLED);
224*d438b4efSShailend Chand }
225*d438b4efSShailend Chand 
226*d438b4efSShailend Chand static void
227*d438b4efSShailend Chand gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
228*d438b4efSShailend Chand {
229*d438b4efSShailend Chand 	struct gve_rx_desc_dqo *desc;
230*d438b4efSShailend Chand 
231*d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
232*d438b4efSShailend Chand 	    BUS_DMASYNC_PREREAD);
233*d438b4efSShailend Chand 
234*d438b4efSShailend Chand 	desc = &rx->dqo.desc_ring[rx->dqo.head];
235*d438b4efSShailend Chand 	desc->buf_id = htole16(buf - rx->dqo.bufs);
236*d438b4efSShailend Chand 	desc->buf_addr = htole64(buf->addr);
237*d438b4efSShailend Chand 
238*d438b4efSShailend Chand 	rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
239*d438b4efSShailend Chand 	rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
240*d438b4efSShailend Chand 
241*d438b4efSShailend Chand 	if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
242*d438b4efSShailend Chand 		bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
243*d438b4efSShailend Chand 		    BUS_DMASYNC_PREWRITE);
244*d438b4efSShailend Chand 		gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
245*d438b4efSShailend Chand 		    rx->dqo.head);
246*d438b4efSShailend Chand 	}
247*d438b4efSShailend Chand }
248*d438b4efSShailend Chand 
249*d438b4efSShailend Chand static int
250*d438b4efSShailend Chand gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
251*d438b4efSShailend Chand {
252*d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
253*d438b4efSShailend Chand 	bus_dma_segment_t segs[1];
254*d438b4efSShailend Chand 	int nsegs;
255*d438b4efSShailend Chand 	int err;
256*d438b4efSShailend Chand 
257*d438b4efSShailend Chand 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
258*d438b4efSShailend Chand 	if (__predict_false(!buf)) {
259*d438b4efSShailend Chand 		device_printf(rx->com.priv->dev,
260*d438b4efSShailend Chand 		    "Unexpected empty free bufs list\n");
261*d438b4efSShailend Chand 		return (ENOBUFS);
262*d438b4efSShailend Chand 	}
263*d438b4efSShailend Chand 	SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
264*d438b4efSShailend Chand 
265*d438b4efSShailend Chand 	buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
266*d438b4efSShailend Chand 	if (__predict_false(!buf->mbuf)) {
267*d438b4efSShailend Chand 		err = ENOMEM;
268*d438b4efSShailend Chand 		counter_enter();
269*d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
270*d438b4efSShailend Chand 		counter_exit();
271*d438b4efSShailend Chand 		goto abort_with_buf;
272*d438b4efSShailend Chand 	}
273*d438b4efSShailend Chand 	buf->mbuf->m_len = MCLBYTES;
274*d438b4efSShailend Chand 
275*d438b4efSShailend Chand 	err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
276*d438b4efSShailend Chand 	    buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
277*d438b4efSShailend Chand 	KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
278*d438b4efSShailend Chand 	if (__predict_false(err != 0)) {
279*d438b4efSShailend Chand 		counter_enter();
280*d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
281*d438b4efSShailend Chand 		counter_exit();
282*d438b4efSShailend Chand 		goto abort_with_mbuf;
283*d438b4efSShailend Chand 	}
284*d438b4efSShailend Chand 	buf->addr = segs[0].ds_addr;
285*d438b4efSShailend Chand 
286*d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
287*d438b4efSShailend Chand 	return (0);
288*d438b4efSShailend Chand 
289*d438b4efSShailend Chand abort_with_mbuf:
290*d438b4efSShailend Chand 	m_freem(buf->mbuf);
291*d438b4efSShailend Chand 	buf->mbuf = NULL;
292*d438b4efSShailend Chand abort_with_buf:
293*d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
294*d438b4efSShailend Chand 	return (err);
295*d438b4efSShailend Chand }
296*d438b4efSShailend Chand 
297*d438b4efSShailend Chand static void
298*d438b4efSShailend Chand gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
299*d438b4efSShailend Chand {
300*d438b4efSShailend Chand 	uint32_t num_pending_bufs;
301*d438b4efSShailend Chand 	uint32_t num_to_post;
302*d438b4efSShailend Chand 	uint32_t i;
303*d438b4efSShailend Chand 	int err;
304*d438b4efSShailend Chand 
305*d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
306*d438b4efSShailend Chand 	num_to_post = rx->dqo.mask - num_pending_bufs;
307*d438b4efSShailend Chand 
308*d438b4efSShailend Chand 	for (i = 0; i < num_to_post; i++) {
309*d438b4efSShailend Chand 		err = gve_rx_post_new_mbuf_dqo(rx, how);
310*d438b4efSShailend Chand 		if (err)
311*d438b4efSShailend Chand 			break;
312*d438b4efSShailend Chand 	}
313*d438b4efSShailend Chand }
314*d438b4efSShailend Chand 
315*d438b4efSShailend Chand void
316*d438b4efSShailend Chand gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
317*d438b4efSShailend Chand {
318*d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_WAITOK);
319*d438b4efSShailend Chand }
320*d438b4efSShailend Chand 
321*d438b4efSShailend Chand static void
322*d438b4efSShailend Chand gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
323*d438b4efSShailend Chand {
324*d438b4efSShailend Chand 	switch (ptype->l3_type) {
325*d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV4:
326*d438b4efSShailend Chand 		switch (ptype->l4_type) {
327*d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
328*d438b4efSShailend Chand 			*is_tcp = true;
329*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
330*d438b4efSShailend Chand 			break;
331*d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
332*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
333*d438b4efSShailend Chand 			break;
334*d438b4efSShailend Chand 		default:
335*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
336*d438b4efSShailend Chand 		}
337*d438b4efSShailend Chand 		break;
338*d438b4efSShailend Chand 	case GVE_L3_TYPE_IPV6:
339*d438b4efSShailend Chand 		switch (ptype->l4_type) {
340*d438b4efSShailend Chand 		case GVE_L4_TYPE_TCP:
341*d438b4efSShailend Chand 			*is_tcp = true;
342*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
343*d438b4efSShailend Chand 			break;
344*d438b4efSShailend Chand 		case GVE_L4_TYPE_UDP:
345*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
346*d438b4efSShailend Chand 			break;
347*d438b4efSShailend Chand 		default:
348*d438b4efSShailend Chand 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
349*d438b4efSShailend Chand 		}
350*d438b4efSShailend Chand 		break;
351*d438b4efSShailend Chand 	default:
352*d438b4efSShailend Chand 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
353*d438b4efSShailend Chand 	}
354*d438b4efSShailend Chand }
355*d438b4efSShailend Chand 
356*d438b4efSShailend Chand static void
357*d438b4efSShailend Chand gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
358*d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *desc,
359*d438b4efSShailend Chand     struct gve_ptype *ptype)
360*d438b4efSShailend Chand {
361*d438b4efSShailend Chand 	/* HW did not identify and process L3 and L4 headers. */
362*d438b4efSShailend Chand 	if (__predict_false(!desc->l3_l4_processed))
363*d438b4efSShailend Chand 		return;
364*d438b4efSShailend Chand 
365*d438b4efSShailend Chand 	if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
366*d438b4efSShailend Chand 		if (__predict_false(desc->csum_ip_err ||
367*d438b4efSShailend Chand 		    desc->csum_external_ip_err))
368*d438b4efSShailend Chand 			return;
369*d438b4efSShailend Chand 	} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
370*d438b4efSShailend Chand 		/* Checksum should be skipped if this flag is set. */
371*d438b4efSShailend Chand 		if (__predict_false(desc->ipv6_ex_add))
372*d438b4efSShailend Chand 			return;
373*d438b4efSShailend Chand 	}
374*d438b4efSShailend Chand 
375*d438b4efSShailend Chand 	if (__predict_false(desc->csum_l4_err))
376*d438b4efSShailend Chand 		return;
377*d438b4efSShailend Chand 
378*d438b4efSShailend Chand 	switch (ptype->l4_type) {
379*d438b4efSShailend Chand 	case GVE_L4_TYPE_TCP:
380*d438b4efSShailend Chand 	case GVE_L4_TYPE_UDP:
381*d438b4efSShailend Chand 	case GVE_L4_TYPE_ICMP:
382*d438b4efSShailend Chand 	case GVE_L4_TYPE_SCTP:
383*d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
384*d438b4efSShailend Chand 					    CSUM_IP_VALID |
385*d438b4efSShailend Chand 					    CSUM_DATA_VALID |
386*d438b4efSShailend Chand 					    CSUM_PSEUDO_HDR;
387*d438b4efSShailend Chand 		mbuf->m_pkthdr.csum_data = 0xffff;
388*d438b4efSShailend Chand 		break;
389*d438b4efSShailend Chand 	default:
390*d438b4efSShailend Chand 		break;
391*d438b4efSShailend Chand 	}
392*d438b4efSShailend Chand }
393*d438b4efSShailend Chand 
394*d438b4efSShailend Chand static void
395*d438b4efSShailend Chand gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
396*d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc)
397*d438b4efSShailend Chand {
398*d438b4efSShailend Chand 	struct mbuf *mbuf = rx->ctx.mbuf_head;
399*d438b4efSShailend Chand 	if_t ifp = rx->com.priv->ifp;
400*d438b4efSShailend Chand 	struct gve_ptype *ptype;
401*d438b4efSShailend Chand 	bool do_if_input = true;
402*d438b4efSShailend Chand 	bool is_tcp = false;
403*d438b4efSShailend Chand 
404*d438b4efSShailend Chand 	ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
405*d438b4efSShailend Chand 	gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
406*d438b4efSShailend Chand 	mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
407*d438b4efSShailend Chand 	gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
408*d438b4efSShailend Chand 
409*d438b4efSShailend Chand 	mbuf->m_pkthdr.rcvif = ifp;
410*d438b4efSShailend Chand 	mbuf->m_pkthdr.len = rx->ctx.total_size;
411*d438b4efSShailend Chand 
412*d438b4efSShailend Chand 	if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
413*d438b4efSShailend Chand 	    is_tcp &&
414*d438b4efSShailend Chand 	    (rx->lro.lro_cnt != 0) &&
415*d438b4efSShailend Chand 	    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
416*d438b4efSShailend Chand 		do_if_input = false;
417*d438b4efSShailend Chand 
418*d438b4efSShailend Chand 	if (do_if_input)
419*d438b4efSShailend Chand 		if_input(ifp, mbuf);
420*d438b4efSShailend Chand 
421*d438b4efSShailend Chand 	counter_enter();
422*d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
423*d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rpackets, 1);
424*d438b4efSShailend Chand 	counter_exit();
425*d438b4efSShailend Chand 
426*d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
427*d438b4efSShailend Chand }
428*d438b4efSShailend Chand 
429*d438b4efSShailend Chand static int
430*d438b4efSShailend Chand gve_rx_copybreak_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
431*d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
432*d438b4efSShailend Chand {
433*d438b4efSShailend Chand 	struct mbuf *mbuf;
434*d438b4efSShailend Chand 
435*d438b4efSShailend Chand 	mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
436*d438b4efSShailend Chand 	if (__predict_false(mbuf == NULL))
437*d438b4efSShailend Chand 		return (ENOMEM);
438*d438b4efSShailend Chand 
439*d438b4efSShailend Chand 	counter_enter();
440*d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
441*d438b4efSShailend Chand 	counter_exit();
442*d438b4efSShailend Chand 
443*d438b4efSShailend Chand 	m_copyback(mbuf, 0, frag_len, mtod(buf->mbuf, char*));
444*d438b4efSShailend Chand 	mbuf->m_len = frag_len;
445*d438b4efSShailend Chand 
446*d438b4efSShailend Chand 	rx->ctx.mbuf_head = mbuf;
447*d438b4efSShailend Chand 	rx->ctx.mbuf_tail = mbuf;
448*d438b4efSShailend Chand 	rx->ctx.total_size += frag_len;
449*d438b4efSShailend Chand 
450*d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
451*d438b4efSShailend Chand 	gve_rx_input_mbuf_dqo(rx, compl_desc);
452*d438b4efSShailend Chand 	return (0);
453*d438b4efSShailend Chand }
454*d438b4efSShailend Chand 
455*d438b4efSShailend Chand static void
456*d438b4efSShailend Chand gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
457*d438b4efSShailend Chand     struct gve_rx_compl_desc_dqo *compl_desc,
458*d438b4efSShailend Chand     int *work_done)
459*d438b4efSShailend Chand {
460*d438b4efSShailend Chand 	bool is_last_frag = compl_desc->end_of_packet != 0;
461*d438b4efSShailend Chand 	struct gve_rx_ctx *ctx = &rx->ctx;
462*d438b4efSShailend Chand 	struct gve_rx_buf_dqo *buf;
463*d438b4efSShailend Chand 	uint32_t num_pending_bufs;
464*d438b4efSShailend Chand 	uint16_t frag_len;
465*d438b4efSShailend Chand 	uint16_t buf_id;
466*d438b4efSShailend Chand 	int err;
467*d438b4efSShailend Chand 
468*d438b4efSShailend Chand 	buf_id = le16toh(compl_desc->buf_id);
469*d438b4efSShailend Chand 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
470*d438b4efSShailend Chand 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
471*d438b4efSShailend Chand 		    buf_id, rx->com.id);
472*d438b4efSShailend Chand 		gve_schedule_reset(priv);
473*d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
474*d438b4efSShailend Chand 	}
475*d438b4efSShailend Chand 	buf = &rx->dqo.bufs[buf_id];
476*d438b4efSShailend Chand 	if (__predict_false(buf->mbuf == NULL)) {
477*d438b4efSShailend Chand 		device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
478*d438b4efSShailend Chand 		    buf_id, rx->com.id);
479*d438b4efSShailend Chand 		gve_schedule_reset(priv);
480*d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
481*d438b4efSShailend Chand 	}
482*d438b4efSShailend Chand 
483*d438b4efSShailend Chand 	if (__predict_false(ctx->drop_pkt))
484*d438b4efSShailend Chand 		goto drop_frag;
485*d438b4efSShailend Chand 
486*d438b4efSShailend Chand 	if (__predict_false(compl_desc->rx_error)) {
487*d438b4efSShailend Chand 		counter_enter();
488*d438b4efSShailend Chand 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
489*d438b4efSShailend Chand 		counter_exit();
490*d438b4efSShailend Chand 		goto drop_frag;
491*d438b4efSShailend Chand 	}
492*d438b4efSShailend Chand 
493*d438b4efSShailend Chand 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
494*d438b4efSShailend Chand 	    BUS_DMASYNC_POSTREAD);
495*d438b4efSShailend Chand 
496*d438b4efSShailend Chand 	frag_len = compl_desc->packet_len;
497*d438b4efSShailend Chand 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
498*d438b4efSShailend Chand 		err = gve_rx_copybreak_dqo(rx, buf, compl_desc, frag_len);
499*d438b4efSShailend Chand 		if (__predict_false(err != 0))
500*d438b4efSShailend Chand 			goto drop_frag;
501*d438b4efSShailend Chand 		(*work_done)++;
502*d438b4efSShailend Chand 		return;
503*d438b4efSShailend Chand 	}
504*d438b4efSShailend Chand 
505*d438b4efSShailend Chand 	/*
506*d438b4efSShailend Chand 	 * Although buffer completions may arrive out of order, buffer
507*d438b4efSShailend Chand 	 * descriptors are consumed by the NIC in order. That is, the
508*d438b4efSShailend Chand 	 * buffer at desc_ring[tail] might not be the buffer we got the
509*d438b4efSShailend Chand 	 * completion compl_ring[tail] for: but we know that desc_ring[tail]
510*d438b4efSShailend Chand 	 * has already been read by the NIC.
511*d438b4efSShailend Chand 	 */
512*d438b4efSShailend Chand 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
513*d438b4efSShailend Chand 
514*d438b4efSShailend Chand 	/*
515*d438b4efSShailend Chand 	 * For every fragment received, try to post a new buffer.
516*d438b4efSShailend Chand 	 *
517*d438b4efSShailend Chand 	 * Failures are okay but only so long as the number of outstanding
518*d438b4efSShailend Chand 	 * buffers is above a threshold.
519*d438b4efSShailend Chand 	 *
520*d438b4efSShailend Chand 	 * Beyond that we drop new packets to reuse their buffers.
521*d438b4efSShailend Chand 	 * Without ensuring a minimum number of buffers for the NIC to
522*d438b4efSShailend Chand 	 * put packets in, we run the risk of getting the queue stuck
523*d438b4efSShailend Chand 	 * for good.
524*d438b4efSShailend Chand 	 */
525*d438b4efSShailend Chand 	err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
526*d438b4efSShailend Chand 	if (__predict_false(err != 0 &&
527*d438b4efSShailend Chand 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
528*d438b4efSShailend Chand 		counter_enter();
529*d438b4efSShailend Chand 		counter_u64_add_protected(
530*d438b4efSShailend Chand 		    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
531*d438b4efSShailend Chand 		counter_exit();
532*d438b4efSShailend Chand 		goto drop_frag;
533*d438b4efSShailend Chand 	}
534*d438b4efSShailend Chand 
535*d438b4efSShailend Chand 	buf->mbuf->m_len = frag_len;
536*d438b4efSShailend Chand 	ctx->total_size += frag_len;
537*d438b4efSShailend Chand 	if (ctx->mbuf_tail == NULL) {
538*d438b4efSShailend Chand 		ctx->mbuf_head = buf->mbuf;
539*d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
540*d438b4efSShailend Chand 	} else {
541*d438b4efSShailend Chand 		buf->mbuf->m_flags &= ~M_PKTHDR;
542*d438b4efSShailend Chand 		ctx->mbuf_tail->m_next = buf->mbuf;
543*d438b4efSShailend Chand 		ctx->mbuf_tail = buf->mbuf;
544*d438b4efSShailend Chand 	}
545*d438b4efSShailend Chand 
546*d438b4efSShailend Chand 	/*
547*d438b4efSShailend Chand 	 * Disassociate the mbuf from buf and surrender buf to the free list to
548*d438b4efSShailend Chand 	 * be used by a future mbuf.
549*d438b4efSShailend Chand 	 */
550*d438b4efSShailend Chand 	bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
551*d438b4efSShailend Chand 	buf->mbuf = NULL;
552*d438b4efSShailend Chand 	buf->addr = 0;
553*d438b4efSShailend Chand 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
554*d438b4efSShailend Chand 
555*d438b4efSShailend Chand 	if (is_last_frag) {
556*d438b4efSShailend Chand 		gve_rx_input_mbuf_dqo(rx, compl_desc);
557*d438b4efSShailend Chand 		(*work_done)++;
558*d438b4efSShailend Chand 	}
559*d438b4efSShailend Chand 	return;
560*d438b4efSShailend Chand 
561*d438b4efSShailend Chand drop_frag:
562*d438b4efSShailend Chand 	/* Clear the earlier frags if there were any */
563*d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
564*d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
565*d438b4efSShailend Chand 	/* Drop the rest of the pkt if there are more frags */
566*d438b4efSShailend Chand 	ctx->drop_pkt = true;
567*d438b4efSShailend Chand 	/* Reuse the dropped frag's buffer */
568*d438b4efSShailend Chand 	gve_rx_post_buf_dqo(rx, buf);
569*d438b4efSShailend Chand 
570*d438b4efSShailend Chand 	if (is_last_frag)
571*d438b4efSShailend Chand 		goto drop_frag_clear_ctx;
572*d438b4efSShailend Chand 	return;
573*d438b4efSShailend Chand 
574*d438b4efSShailend Chand drop_frag_clear_ctx:
575*d438b4efSShailend Chand 	counter_enter();
576*d438b4efSShailend Chand 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
577*d438b4efSShailend Chand 	counter_exit();
578*d438b4efSShailend Chand 	m_freem(ctx->mbuf_head);
579*d438b4efSShailend Chand 	rx->ctx = (struct gve_rx_ctx){};
580*d438b4efSShailend Chand }
581*d438b4efSShailend Chand 
582*d438b4efSShailend Chand static bool
583*d438b4efSShailend Chand gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
584*d438b4efSShailend Chand {
585*d438b4efSShailend Chand 	struct gve_rx_compl_desc_dqo *compl_desc;
586*d438b4efSShailend Chand 	uint32_t work_done = 0;
587*d438b4efSShailend Chand 
588*d438b4efSShailend Chand 	NET_EPOCH_ASSERT();
589*d438b4efSShailend Chand 
590*d438b4efSShailend Chand 	while (work_done < budget) {
591*d438b4efSShailend Chand 		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
592*d438b4efSShailend Chand 		    BUS_DMASYNC_POSTREAD);
593*d438b4efSShailend Chand 
594*d438b4efSShailend Chand 		compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
595*d438b4efSShailend Chand 		if (compl_desc->generation == rx->dqo.cur_gen_bit)
596*d438b4efSShailend Chand 			break;
597*d438b4efSShailend Chand 		/*
598*d438b4efSShailend Chand 		 * Prevent generation bit from being read after the rest of the
599*d438b4efSShailend Chand 		 * descriptor.
600*d438b4efSShailend Chand 		 */
601*d438b4efSShailend Chand 		rmb();
602*d438b4efSShailend Chand 
603*d438b4efSShailend Chand 		rx->cnt++;
604*d438b4efSShailend Chand 		rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
605*d438b4efSShailend Chand 		rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
606*d438b4efSShailend Chand 
607*d438b4efSShailend Chand 		gve_rx_dqo(priv, rx, compl_desc, &work_done);
608*d438b4efSShailend Chand 	}
609*d438b4efSShailend Chand 
610*d438b4efSShailend Chand 	if (work_done != 0)
611*d438b4efSShailend Chand 		tcp_lro_flush_all(&rx->lro);
612*d438b4efSShailend Chand 
613*d438b4efSShailend Chand 	gve_rx_post_buffers_dqo(rx, M_NOWAIT);
614*d438b4efSShailend Chand 	return (work_done == budget);
615*d438b4efSShailend Chand }
616*d438b4efSShailend Chand 
617*d438b4efSShailend Chand void
618*d438b4efSShailend Chand gve_rx_cleanup_tq_dqo(void *arg, int pending)
619*d438b4efSShailend Chand {
620*d438b4efSShailend Chand 	struct gve_rx_ring *rx = arg;
621*d438b4efSShailend Chand 	struct gve_priv *priv = rx->com.priv;
622*d438b4efSShailend Chand 
623*d438b4efSShailend Chand 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
624*d438b4efSShailend Chand 		return;
625*d438b4efSShailend Chand 
626*d438b4efSShailend Chand 	if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
627*d438b4efSShailend Chand 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
628*d438b4efSShailend Chand 		return;
629*d438b4efSShailend Chand 	}
630*d438b4efSShailend Chand 
631*d438b4efSShailend Chand 	gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
632*d438b4efSShailend Chand 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
633*d438b4efSShailend Chand }
634