xref: /freebsd/sys/dev/gve/gve_rx_dqo.c (revision 71702df6126226b31dc3ec66459388e32b993be1)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34 
35 static void
gve_free_rx_mbufs_dqo(struct gve_rx_ring * rx)36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37 {
38 	struct gve_rx_buf_dqo *buf;
39 	int i;
40 
41 	if (gve_is_qpl(rx->com.priv))
42 		return;
43 
44 	for (i = 0; i < rx->dqo.buf_cnt; i++) {
45 		buf = &rx->dqo.bufs[i];
46 		if (!buf->mbuf)
47 			continue;
48 
49 		bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
50 		    BUS_DMASYNC_POSTREAD);
51 		bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
52 		m_freem(buf->mbuf);
53 		buf->mbuf = NULL;
54 	}
55 }
56 
57 void
gve_rx_free_ring_dqo(struct gve_priv * priv,int i)58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
59 {
60 	struct gve_rx_ring *rx = &priv->rx[i];
61 	struct gve_ring_com *com = &rx->com;
62 	int j;
63 
64 	if (rx->dqo.compl_ring != NULL) {
65 		gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
66 		rx->dqo.compl_ring = NULL;
67 	}
68 
69 	if (rx->dqo.desc_ring != NULL) {
70 		gve_dma_free_coherent(&rx->desc_ring_mem);
71 		rx->dqo.desc_ring = NULL;
72 	}
73 
74 	if (rx->dqo.bufs != NULL) {
75 		gve_free_rx_mbufs_dqo(rx);
76 
77 		if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
78 			for (j = 0; j < rx->dqo.buf_cnt; j++)
79 				if (rx->dqo.bufs[j].mapped)
80 					bus_dmamap_destroy(rx->dqo.buf_dmatag,
81 					    rx->dqo.bufs[j].dmamap);
82 		}
83 
84 		free(rx->dqo.bufs, M_GVE);
85 		rx->dqo.bufs = NULL;
86 	}
87 
88 	if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
89 		bus_dma_tag_destroy(rx->dqo.buf_dmatag);
90 
91 	if (com->qpl != NULL) {
92 		gve_free_qpl(priv, com->qpl);
93 		com->qpl = NULL;
94 	}
95 }
96 
97 int
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int i)98 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
99 {
100 	struct gve_rx_ring *rx = &priv->rx[i];
101 	int err;
102 	int j;
103 
104 	err = gve_dma_alloc_coherent(priv,
105 	    sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
106 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
107 	if (err != 0) {
108 		device_printf(priv->dev,
109 		    "Failed to alloc desc ring for rx ring %d", i);
110 		goto abort;
111 	}
112 	rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
113 	rx->dqo.mask = priv->rx_desc_cnt - 1;
114 
115 	err = gve_dma_alloc_coherent(priv,
116 	    sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
117 	    CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
118 	if (err != 0) {
119 		device_printf(priv->dev,
120 		    "Failed to alloc compl ring for rx ring %d", i);
121 		goto abort;
122 	}
123 	rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
124 	rx->dqo.mask = priv->rx_desc_cnt - 1;
125 
126 	rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
127 	    priv->rx_desc_cnt;
128 	rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
129 	    M_GVE, M_WAITOK | M_ZERO);
130 
131 	if (gve_is_qpl(priv)) {
132 		rx->com.qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues,
133 		    GVE_RX_NUM_QPL_PAGES_DQO, /*single_kva=*/false);
134 		if (rx->com.qpl == NULL) {
135 			device_printf(priv->dev,
136 			    "Failed to alloc QPL for rx ring %d", i);
137 			err = ENOMEM;
138 			goto abort;
139 		}
140 		return (0);
141 	}
142 
143 	bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv);
144 
145 	err = bus_dma_tag_create(
146 	    bus_get_dma_tag(priv->dev),	/* parent */
147 	    1, 0,			/* alignment, bounds */
148 	    BUS_SPACE_MAXADDR,		/* lowaddr */
149 	    BUS_SPACE_MAXADDR,		/* highaddr */
150 	    NULL, NULL,			/* filter, filterarg */
151 	    max_seg_size,		/* maxsize */
152 	    1,				/* nsegments */
153 	    max_seg_size,		/* maxsegsize */
154 	    0,				/* flags */
155 	    NULL,			/* lockfunc */
156 	    NULL,			/* lockarg */
157 	    &rx->dqo.buf_dmatag);
158 	if (err != 0) {
159 		device_printf(priv->dev,
160 		    "%s: bus_dma_tag_create failed: %d\n",
161 		    __func__, err);
162 		goto abort;
163 	}
164 
165 	for (j = 0; j < rx->dqo.buf_cnt; j++) {
166 		err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
167 		    &rx->dqo.bufs[j].dmamap);
168 		if (err != 0) {
169 			device_printf(priv->dev,
170 			    "err in creating rx buf dmamap %d: %d",
171 			    j, err);
172 			goto abort;
173 		}
174 		rx->dqo.bufs[j].mapped = true;
175 	}
176 
177 	return (0);
178 
179 abort:
180 	gve_rx_free_ring_dqo(priv, i);
181 	return (err);
182 }
183 
184 static void
gve_rx_clear_desc_ring_dqo(struct gve_rx_ring * rx)185 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
186 {
187 	struct gve_ring_com *com = &rx->com;
188 	int entries;
189 	int i;
190 
191 	entries = com->priv->rx_desc_cnt;
192 	for (i = 0; i < entries; i++)
193 		rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
194 
195 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
196 	    BUS_DMASYNC_PREWRITE);
197 }
198 
199 static void
gve_rx_clear_compl_ring_dqo(struct gve_rx_ring * rx)200 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
201 {
202 	struct gve_ring_com *com = &rx->com;
203 	int i;
204 
205 	for (i = 0; i < com->priv->rx_desc_cnt; i++)
206 		rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
207 
208 	bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
209 	    BUS_DMASYNC_PREWRITE);
210 }
211 
212 void
gve_clear_rx_ring_dqo(struct gve_priv * priv,int i)213 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
214 {
215 	struct gve_rx_ring *rx = &priv->rx[i];
216 	int j;
217 
218 	rx->fill_cnt = 0;
219 	rx->cnt = 0;
220 	rx->dqo.mask = priv->rx_desc_cnt - 1;
221 	rx->dqo.head = 0;
222 	rx->dqo.tail = 0;
223 	rx->dqo.cur_gen_bit = 0;
224 
225 	gve_rx_clear_desc_ring_dqo(rx);
226 	gve_rx_clear_compl_ring_dqo(rx);
227 
228 	gve_free_rx_mbufs_dqo(rx);
229 
230 	if (gve_is_qpl(priv)) {
231 		SLIST_INIT(&rx->dqo.free_bufs);
232 		STAILQ_INIT(&rx->dqo.used_bufs);
233 
234 		for (j = 0; j < rx->dqo.buf_cnt; j++) {
235 			struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
236 
237 			vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
238 			u_int ref_count = atomic_load_int(&page->ref_count);
239 
240 			/*
241 			 * An ifconfig down+up might see pages still in flight
242 			 * from the previous innings.
243 			 */
244 			if (VPRC_WIRE_COUNT(ref_count) == 1)
245 				SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
246 				    buf, slist_entry);
247 			else
248 				STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
249 				    buf, stailq_entry);
250 
251 			buf->num_nic_frags = 0;
252 			buf->next_idx = 0;
253 		}
254 	} else {
255 		SLIST_INIT(&rx->dqo.free_bufs);
256 		for (j = 0; j < rx->dqo.buf_cnt; j++)
257 			SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
258 			    &rx->dqo.bufs[j], slist_entry);
259 	}
260 }
261 
262 int
gve_rx_intr_dqo(void * arg)263 gve_rx_intr_dqo(void *arg)
264 {
265 	struct gve_rx_ring *rx = arg;
266 	struct gve_priv *priv = rx->com.priv;
267 	struct gve_ring_com *com = &rx->com;
268 
269 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
270 		return (FILTER_STRAY);
271 
272 	/* Interrupts are automatically masked */
273 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
274 	return (FILTER_HANDLED);
275 }
276 
277 static void
gve_rx_advance_head_dqo(struct gve_rx_ring * rx)278 gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
279 {
280 	rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
281 	rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
282 
283 	if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
284 		bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
285 		    BUS_DMASYNC_PREWRITE);
286 		gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
287 		    rx->dqo.head);
288 	}
289 }
290 
291 static void
gve_rx_post_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)292 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
293 {
294 	struct gve_rx_desc_dqo *desc;
295 
296 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
297 	    BUS_DMASYNC_PREREAD);
298 
299 	desc = &rx->dqo.desc_ring[rx->dqo.head];
300 	desc->buf_id = htole16(buf - rx->dqo.bufs);
301 	desc->buf_addr = htole64(buf->addr);
302 
303 	gve_rx_advance_head_dqo(rx);
304 }
305 
306 static int
gve_rx_post_new_mbuf_dqo(struct gve_rx_ring * rx,int how)307 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
308 {
309 	struct gve_rx_buf_dqo *buf;
310 	bus_dma_segment_t segs[1];
311 	int nsegs;
312 	int err;
313 
314 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
315 	if (__predict_false(!buf)) {
316 		device_printf(rx->com.priv->dev,
317 		    "Unexpected empty free bufs list\n");
318 		return (ENOBUFS);
319 	}
320 	SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
321 
322 	bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
323 	buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size);
324 	if (__predict_false(!buf->mbuf)) {
325 		err = ENOMEM;
326 		counter_enter();
327 		counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
328 		counter_exit();
329 		goto abort_with_buf;
330 	}
331 	buf->mbuf->m_len = segment_size;
332 
333 	err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
334 	    buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
335 	KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
336 	if (__predict_false(err != 0)) {
337 		counter_enter();
338 		counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
339 		counter_exit();
340 		goto abort_with_mbuf;
341 	}
342 	buf->addr = segs[0].ds_addr;
343 
344 	gve_rx_post_buf_dqo(rx, buf);
345 	return (0);
346 
347 abort_with_mbuf:
348 	m_freem(buf->mbuf);
349 	buf->mbuf = NULL;
350 abort_with_buf:
351 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
352 	return (err);
353 }
354 
355 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)356 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
357 {
358 	return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
359 }
360 
361 static void
gve_rx_post_qpl_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t frag_num)362 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
363     uint8_t frag_num)
364 {
365 	struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
366 	union gve_rx_qpl_buf_id_dqo composed_id;
367 	struct gve_dma_handle *page_dma_handle;
368 
369 	composed_id.buf_id = buf - rx->dqo.bufs;
370 	composed_id.frag_num = frag_num;
371 	desc->buf_id = htole16(composed_id.all);
372 
373 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
374 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
375 	    BUS_DMASYNC_PREREAD);
376 	desc->buf_addr = htole64(page_dma_handle->bus_addr +
377 	    frag_num * rx->com.priv->rx_buf_size_dqo);
378 
379 	buf->num_nic_frags++;
380 	gve_rx_advance_head_dqo(rx);
381 }
382 
383 static void
gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring * rx,bool just_one)384 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
385 {
386 	struct gve_rx_buf_dqo *hol_blocker = NULL;
387 	struct gve_rx_buf_dqo *buf;
388 	u_int ref_count;
389 	vm_page_t page;
390 
391 	while (true) {
392 		buf = STAILQ_FIRST(&rx->dqo.used_bufs);
393 		if (__predict_false(buf == NULL))
394 			break;
395 
396 		page = rx->com.qpl->pages[buf - rx->dqo.bufs];
397 		ref_count = atomic_load_int(&page->ref_count);
398 
399 		if (VPRC_WIRE_COUNT(ref_count) != 1) {
400 			/* Account for one head-of-line blocker */
401 			if (hol_blocker != NULL)
402 				break;
403 			hol_blocker = buf;
404 			STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
405 			    stailq_entry);
406 			continue;
407 		}
408 
409 		STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
410 		    stailq_entry);
411 		SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
412 		    buf, slist_entry);
413 		if (just_one)
414 			break;
415 	}
416 
417 	if (hol_blocker != NULL)
418 		STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
419 		    hol_blocker, stailq_entry);
420 }
421 
422 static int
gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring * rx)423 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
424 {
425 	struct gve_rx_buf_dqo *buf;
426 
427 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
428 	if (__predict_false(buf == NULL)) {
429 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
430 		buf = SLIST_FIRST(&rx->dqo.free_bufs);
431 		if (__predict_false(buf == NULL))
432 			return (ENOBUFS);
433 	}
434 
435 	gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
436 	if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1)
437 		buf->next_idx = 0;
438 	else
439 		buf->next_idx++;
440 
441 	/*
442 	 * We have posted all the frags in this buf to the NIC.
443 	 * - buf will enter used_bufs once the last completion arrives.
444 	 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
445 	 *   when its wire count drops back to 1.
446 	 */
447 	if (buf->next_idx == 0)
448 		SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
449 	return (0);
450 }
451 
452 static void
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx,int how)453 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
454 {
455 	uint32_t num_pending_bufs;
456 	uint32_t num_to_post;
457 	uint32_t i;
458 	int err;
459 
460 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
461 	num_to_post = rx->dqo.mask - num_pending_bufs;
462 
463 	for (i = 0; i < num_to_post; i++) {
464 		if (gve_is_qpl(rx->com.priv))
465 			err = gve_rx_post_new_dqo_qpl_buf(rx);
466 		else
467 			err = gve_rx_post_new_mbuf_dqo(rx, how);
468 		if (err)
469 			break;
470 	}
471 }
472 
473 void
gve_rx_prefill_buffers_dqo(struct gve_rx_ring * rx)474 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
475 {
476 	gve_rx_post_buffers_dqo(rx, M_WAITOK);
477 }
478 
479 static void
gve_rx_set_hashtype_dqo(struct mbuf * mbuf,struct gve_ptype * ptype,bool * is_tcp)480 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
481 {
482 	switch (ptype->l3_type) {
483 	case GVE_L3_TYPE_IPV4:
484 		switch (ptype->l4_type) {
485 		case GVE_L4_TYPE_TCP:
486 			*is_tcp = true;
487 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
488 			break;
489 		case GVE_L4_TYPE_UDP:
490 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
491 			break;
492 		default:
493 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
494 		}
495 		break;
496 	case GVE_L3_TYPE_IPV6:
497 		switch (ptype->l4_type) {
498 		case GVE_L4_TYPE_TCP:
499 			*is_tcp = true;
500 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
501 			break;
502 		case GVE_L4_TYPE_UDP:
503 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
504 			break;
505 		default:
506 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
507 		}
508 		break;
509 	default:
510 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
511 	}
512 }
513 
514 static void
gve_rx_set_csum_flags_dqo(struct mbuf * mbuf,struct gve_rx_compl_desc_dqo * desc,struct gve_ptype * ptype)515 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
516     struct gve_rx_compl_desc_dqo *desc,
517     struct gve_ptype *ptype)
518 {
519 	/* HW did not identify and process L3 and L4 headers. */
520 	if (__predict_false(!desc->l3_l4_processed))
521 		return;
522 
523 	if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
524 		if (__predict_false(desc->csum_ip_err ||
525 		    desc->csum_external_ip_err))
526 			return;
527 	} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
528 		/* Checksum should be skipped if this flag is set. */
529 		if (__predict_false(desc->ipv6_ex_add))
530 			return;
531 	}
532 
533 	if (__predict_false(desc->csum_l4_err))
534 		return;
535 
536 	switch (ptype->l4_type) {
537 	case GVE_L4_TYPE_TCP:
538 	case GVE_L4_TYPE_UDP:
539 	case GVE_L4_TYPE_ICMP:
540 	case GVE_L4_TYPE_SCTP:
541 		mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
542 					    CSUM_IP_VALID |
543 					    CSUM_DATA_VALID |
544 					    CSUM_PSEUDO_HDR;
545 		mbuf->m_pkthdr.csum_data = 0xffff;
546 		break;
547 	default:
548 		break;
549 	}
550 }
551 
552 static void
gve_rx_input_mbuf_dqo(struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc)553 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
554     struct gve_rx_compl_desc_dqo *compl_desc)
555 {
556 	struct mbuf *mbuf = rx->ctx.mbuf_head;
557 	if_t ifp = rx->com.priv->ifp;
558 	struct gve_ptype *ptype;
559 	bool do_if_input = true;
560 	bool is_tcp = false;
561 
562 	ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
563 	gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
564 	mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
565 	gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
566 
567 	mbuf->m_pkthdr.rcvif = ifp;
568 	mbuf->m_pkthdr.len = rx->ctx.total_size;
569 
570 	if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
571 	    is_tcp &&
572 	    (rx->lro.lro_cnt != 0) &&
573 	    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
574 		do_if_input = false;
575 
576 	if (do_if_input)
577 		if_input(ifp, mbuf);
578 
579 	counter_enter();
580 	counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
581 	counter_u64_add_protected(rx->stats.rpackets, 1);
582 	counter_exit();
583 
584 	rx->ctx = (struct gve_rx_ctx){};
585 }
586 
587 static int
gve_rx_copybreak_dqo(struct gve_rx_ring * rx,void * va,struct gve_rx_compl_desc_dqo * compl_desc,uint16_t frag_len)588 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
589     struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
590 {
591 	struct mbuf *mbuf;
592 
593 	mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
594 	if (__predict_false(mbuf == NULL))
595 		return (ENOMEM);
596 
597 	counter_enter();
598 	counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
599 	counter_exit();
600 
601 	m_copyback(mbuf, 0, frag_len, va);
602 	mbuf->m_len = frag_len;
603 
604 	rx->ctx.mbuf_head = mbuf;
605 	rx->ctx.mbuf_tail = mbuf;
606 	rx->ctx.total_size += frag_len;
607 
608 	gve_rx_input_mbuf_dqo(rx, compl_desc);
609 	return (0);
610 }
611 
612 static void
gve_rx_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)613 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
614     struct gve_rx_compl_desc_dqo *compl_desc,
615     int *work_done)
616 {
617 	bool is_last_frag = compl_desc->end_of_packet != 0;
618 	struct gve_rx_ctx *ctx = &rx->ctx;
619 	struct gve_rx_buf_dqo *buf;
620 	uint32_t num_pending_bufs;
621 	uint16_t frag_len;
622 	uint16_t buf_id;
623 	int err;
624 
625 	buf_id = le16toh(compl_desc->buf_id);
626 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
627 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
628 		    buf_id, rx->com.id);
629 		gve_schedule_reset(priv);
630 		goto drop_frag_clear_ctx;
631 	}
632 	buf = &rx->dqo.bufs[buf_id];
633 	if (__predict_false(buf->mbuf == NULL)) {
634 		device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
635 		    buf_id, rx->com.id);
636 		gve_schedule_reset(priv);
637 		goto drop_frag_clear_ctx;
638 	}
639 
640 	if (__predict_false(ctx->drop_pkt))
641 		goto drop_frag;
642 
643 	if (__predict_false(compl_desc->rx_error)) {
644 		counter_enter();
645 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
646 		counter_exit();
647 		goto drop_frag;
648 	}
649 
650 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
651 	    BUS_DMASYNC_POSTREAD);
652 
653 	frag_len = compl_desc->packet_len;
654 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
655 		err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
656 		    compl_desc, frag_len);
657 		if (__predict_false(err != 0))
658 			goto drop_frag;
659 		(*work_done)++;
660 		gve_rx_post_buf_dqo(rx, buf);
661 		return;
662 	}
663 
664 	/*
665 	 * Although buffer completions may arrive out of order, buffer
666 	 * descriptors are consumed by the NIC in order. That is, the
667 	 * buffer at desc_ring[tail] might not be the buffer we got the
668 	 * completion compl_ring[tail] for: but we know that desc_ring[tail]
669 	 * has already been read by the NIC.
670 	 */
671 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
672 
673 	/*
674 	 * For every fragment received, try to post a new buffer.
675 	 *
676 	 * Failures are okay but only so long as the number of outstanding
677 	 * buffers is above a threshold.
678 	 *
679 	 * Beyond that we drop new packets to reuse their buffers.
680 	 * Without ensuring a minimum number of buffers for the NIC to
681 	 * put packets in, we run the risk of getting the queue stuck
682 	 * for good.
683 	 */
684 	err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
685 	if (__predict_false(err != 0 &&
686 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
687 		counter_enter();
688 		counter_u64_add_protected(
689 		    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
690 		counter_exit();
691 		goto drop_frag;
692 	}
693 
694 	buf->mbuf->m_len = frag_len;
695 	ctx->total_size += frag_len;
696 	if (ctx->mbuf_tail == NULL) {
697 		ctx->mbuf_head = buf->mbuf;
698 		ctx->mbuf_tail = buf->mbuf;
699 	} else {
700 		buf->mbuf->m_flags &= ~M_PKTHDR;
701 		ctx->mbuf_tail->m_next = buf->mbuf;
702 		ctx->mbuf_tail = buf->mbuf;
703 	}
704 
705 	/*
706 	 * Disassociate the mbuf from buf and surrender buf to the free list to
707 	 * be used by a future mbuf.
708 	 */
709 	bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
710 	buf->mbuf = NULL;
711 	buf->addr = 0;
712 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
713 
714 	if (is_last_frag) {
715 		gve_rx_input_mbuf_dqo(rx, compl_desc);
716 		(*work_done)++;
717 	}
718 	return;
719 
720 drop_frag:
721 	/* Clear the earlier frags if there were any */
722 	m_freem(ctx->mbuf_head);
723 	rx->ctx = (struct gve_rx_ctx){};
724 	/* Drop the rest of the pkt if there are more frags */
725 	ctx->drop_pkt = true;
726 	/* Reuse the dropped frag's buffer */
727 	gve_rx_post_buf_dqo(rx, buf);
728 
729 	if (is_last_frag)
730 		goto drop_frag_clear_ctx;
731 	return;
732 
733 drop_frag_clear_ctx:
734 	counter_enter();
735 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
736 	counter_exit();
737 	m_freem(ctx->mbuf_head);
738 	rx->ctx = (struct gve_rx_ctx){};
739 }
740 
741 static void *
gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num)742 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
743     struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
744 {
745 	int page_idx = buf - rx->dqo.bufs;
746 	void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
747 
748 	va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
749 	return (va);
750 }
751 
752 static int
gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)753 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
754     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
755     uint8_t buf_frag_num, uint16_t frag_len)
756 {
757 	void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
758 	struct mbuf *mbuf;
759 	bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
760 
761 	if (ctx->mbuf_tail == NULL) {
762 		mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size);
763 		if (mbuf == NULL)
764 			return (ENOMEM);
765 		ctx->mbuf_head = mbuf;
766 		ctx->mbuf_tail = mbuf;
767 	} else {
768 		mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size);
769 		if (mbuf == NULL)
770 			return (ENOMEM);
771 		ctx->mbuf_tail->m_next = mbuf;
772 		ctx->mbuf_tail = mbuf;
773 	}
774 
775 	mbuf->m_len = frag_len;
776 	ctx->total_size += frag_len;
777 
778 	m_copyback(mbuf, 0, frag_len, va);
779 	counter_enter();
780 	counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
781 	counter_exit();
782 	return (0);
783 }
784 
785 static int
gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)786 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
787     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
788     uint8_t buf_frag_num, uint16_t frag_len)
789 {
790 	struct mbuf *mbuf;
791 	void *page_addr;
792 	vm_page_t page;
793 	int page_idx;
794 	void *va;
795 
796 	if (ctx->mbuf_tail == NULL) {
797 		mbuf = m_gethdr(M_NOWAIT, MT_DATA);
798 		if (mbuf == NULL)
799 			return (ENOMEM);
800 		ctx->mbuf_head = mbuf;
801 		ctx->mbuf_tail = mbuf;
802 	} else {
803 		mbuf = m_get(M_NOWAIT, MT_DATA);
804 		if (mbuf == NULL)
805 			return (ENOMEM);
806 		ctx->mbuf_tail->m_next = mbuf;
807 		ctx->mbuf_tail = mbuf;
808 	}
809 
810 	mbuf->m_len = frag_len;
811 	ctx->total_size += frag_len;
812 
813 	page_idx = buf - rx->dqo.bufs;
814 	page = rx->com.qpl->pages[page_idx];
815 	page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
816 	va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
817 
818 	/*
819 	 * Grab an extra ref to the page so that gve_mextadd_free
820 	 * does not end up freeing the page while the interface exists.
821 	 */
822 	vm_page_wire(page);
823 
824 	counter_enter();
825 	counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
826 	counter_exit();
827 
828 	MEXTADD(mbuf, va, frag_len,
829 	    gve_mextadd_free, page, page_addr,
830 	    0, EXT_NET_DRV);
831 	return (0);
832 }
833 
834 static void
gve_rx_dqo_qpl(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)835 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
836     struct gve_rx_compl_desc_dqo *compl_desc,
837     int *work_done)
838 {
839 	bool is_last_frag = compl_desc->end_of_packet != 0;
840 	union gve_rx_qpl_buf_id_dqo composed_id;
841 	struct gve_dma_handle *page_dma_handle;
842 	struct gve_rx_ctx *ctx = &rx->ctx;
843 	struct gve_rx_buf_dqo *buf;
844 	uint32_t num_pending_bufs;
845 	uint8_t buf_frag_num;
846 	uint16_t frag_len;
847 	uint16_t buf_id;
848 	int err;
849 
850 	composed_id.all = le16toh(compl_desc->buf_id);
851 	buf_id = composed_id.buf_id;
852 	buf_frag_num = composed_id.frag_num;
853 
854 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
855 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
856 		    buf_id, rx->com.id);
857 		gve_schedule_reset(priv);
858 		goto drop_frag_clear_ctx;
859 	}
860 	buf = &rx->dqo.bufs[buf_id];
861 	if (__predict_false(buf->num_nic_frags == 0 ||
862 	    buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) {
863 		device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
864 		    "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
865 		    buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
866 		gve_schedule_reset(priv);
867 		goto drop_frag_clear_ctx;
868 	}
869 
870 	buf->num_nic_frags--;
871 
872 	if (__predict_false(ctx->drop_pkt))
873 		goto drop_frag;
874 
875 	if (__predict_false(compl_desc->rx_error)) {
876 		counter_enter();
877 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
878 		counter_exit();
879 		goto drop_frag;
880 	}
881 
882 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
883 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
884 	    BUS_DMASYNC_POSTREAD);
885 
886 	frag_len = compl_desc->packet_len;
887 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
888 		void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
889 
890 		err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
891 		if (__predict_false(err != 0))
892 			goto drop_frag;
893 		(*work_done)++;
894 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
895 		return;
896 	}
897 
898 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
899 	err = gve_rx_post_new_dqo_qpl_buf(rx);
900 	if (__predict_false(err != 0 &&
901 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
902 		/*
903 		 * Resort to copying this fragment into a cluster mbuf
904 		 * when the above threshold is breached and repost the
905 		 * incoming buffer. If we cannot find cluster mbufs,
906 		 * just drop the packet (to repost its buffer).
907 		 */
908 		err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
909 		    buf_frag_num, frag_len);
910 		if (err != 0) {
911 			counter_enter();
912 			counter_u64_add_protected(
913 			    rx->stats.rx_dropped_pkt_buf_post_fail, 1);
914 			counter_exit();
915 			goto drop_frag;
916 		}
917 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
918 	} else {
919 		err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
920 		    buf_frag_num, frag_len);
921 		if (__predict_false(err != 0)) {
922 			counter_enter();
923 			counter_u64_add_protected(
924 			    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
925 			counter_exit();
926 			goto drop_frag;
927 		}
928 	}
929 
930 	/*
931 	 * Both the counts need to be checked.
932 	 *
933 	 * num_nic_frags == 0 implies no pending completions
934 	 * but not all frags may have yet been posted.
935 	 *
936 	 * next_idx == 0 implies all frags have been posted
937 	 * but there might be pending completions.
938 	 */
939 	if (buf->num_nic_frags == 0 && buf->next_idx == 0)
940 		STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
941 
942 	if (is_last_frag) {
943 		gve_rx_input_mbuf_dqo(rx, compl_desc);
944 		(*work_done)++;
945 	}
946 	return;
947 
948 drop_frag:
949 	/* Clear the earlier frags if there were any */
950 	m_freem(ctx->mbuf_head);
951 	rx->ctx = (struct gve_rx_ctx){};
952 	/* Drop the rest of the pkt if there are more frags */
953 	ctx->drop_pkt = true;
954 	/* Reuse the dropped frag's buffer */
955 	gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
956 
957 	if (is_last_frag)
958 		goto drop_frag_clear_ctx;
959 	return;
960 
961 drop_frag_clear_ctx:
962 	counter_enter();
963 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
964 	counter_exit();
965 	m_freem(ctx->mbuf_head);
966 	rx->ctx = (struct gve_rx_ctx){};
967 }
968 
969 static uint8_t
gve_rx_get_gen_bit(uint8_t * desc)970 gve_rx_get_gen_bit(uint8_t *desc)
971 {
972 	uint8_t byte;
973 
974 	/*
975 	 * Prevent generation bit from being read after the rest of the
976 	 * descriptor.
977 	 */
978 	byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET);
979 	return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0);
980 }
981 
982 static bool
gve_rx_cleanup_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,int budget)983 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
984 {
985 	struct gve_rx_compl_desc_dqo *compl_desc;
986 	uint32_t work_done = 0;
987 
988 	NET_EPOCH_ASSERT();
989 
990 	while (work_done < budget) {
991 		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag,
992 		    rx->dqo.compl_ring_mem.map,
993 		    BUS_DMASYNC_POSTREAD);
994 
995 		compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
996 		if (gve_rx_get_gen_bit((uint8_t *)compl_desc) ==
997 		    rx->dqo.cur_gen_bit)
998 			break;
999 
1000 		rx->cnt++;
1001 		rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
1002 		rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
1003 
1004 		if (gve_is_qpl(priv))
1005 			gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
1006 		else
1007 			gve_rx_dqo(priv, rx, compl_desc, &work_done);
1008 	}
1009 
1010 	if (work_done != 0)
1011 		tcp_lro_flush_all(&rx->lro);
1012 
1013 	gve_rx_post_buffers_dqo(rx, M_NOWAIT);
1014 	if (gve_is_qpl(priv))
1015 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
1016 	return (work_done == budget);
1017 }
1018 
1019 void
gve_rx_cleanup_tq_dqo(void * arg,int pending)1020 gve_rx_cleanup_tq_dqo(void *arg, int pending)
1021 {
1022 	struct gve_rx_ring *rx = arg;
1023 	struct gve_priv *priv = rx->com.priv;
1024 
1025 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1026 		return;
1027 
1028 	if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
1029 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
1030 		return;
1031 	}
1032 
1033 	gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
1034 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1035 }
1036