xref: /freebsd/sys/dev/gve/gve_rx.c (revision e0464f74d5579e1538ce741b0a15e6604dbc53c4)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2023-2024 Google LLC
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * 3. Neither the name of the copyright holder nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software without
18  *    specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34 
35 static void
gve_rx_free_ring_gqi(struct gve_priv * priv,int i)36 gve_rx_free_ring_gqi(struct gve_priv *priv, int i)
37 {
38 	struct gve_rx_ring *rx = &priv->rx[i];
39 	struct gve_ring_com *com = &rx->com;
40 
41 	if (rx->page_info != NULL) {
42 		free(rx->page_info, M_GVE);
43 		rx->page_info = NULL;
44 	}
45 
46 	if (rx->data_ring != NULL) {
47 		gve_dma_free_coherent(&rx->data_ring_mem);
48 		rx->data_ring = NULL;
49 	}
50 
51 	if (rx->desc_ring != NULL) {
52 		gve_dma_free_coherent(&rx->desc_ring_mem);
53 		rx->desc_ring = NULL;
54 	}
55 
56 	if (com->qpl != NULL) {
57 		gve_free_qpl(priv, com->qpl);
58 		com->qpl = NULL;
59 	}
60 }
61 
62 static void
gve_rx_free_ring(struct gve_priv * priv,int i)63 gve_rx_free_ring(struct gve_priv *priv, int i)
64 {
65 	struct gve_rx_ring *rx = &priv->rx[i];
66 	struct gve_ring_com *com = &rx->com;
67 
68         /* Safe to call even if never allocated */
69 	gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
70 
71 	if (gve_is_gqi(priv))
72 		gve_rx_free_ring_gqi(priv, i);
73 	else
74 		gve_rx_free_ring_dqo(priv, i);
75 
76 	if (com->q_resources != NULL) {
77 		gve_dma_free_coherent(&com->q_resources_mem);
78 		com->q_resources = NULL;
79 	}
80 }
81 
82 static void
gve_prefill_rx_slots(struct gve_rx_ring * rx)83 gve_prefill_rx_slots(struct gve_rx_ring *rx)
84 {
85 	struct gve_ring_com *com = &rx->com;
86 	struct gve_dma_handle *dma;
87 	int i;
88 
89 	for (i = 0; i < com->priv->rx_desc_cnt; i++) {
90 		rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i);
91 		rx->page_info[i].page_offset = 0;
92 		rx->page_info[i].page_address = com->qpl->dmas[i].cpu_addr;
93 		rx->page_info[i].page = com->qpl->pages[i];
94 
95 		dma = &com->qpl->dmas[i];
96 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREREAD);
97 	}
98 
99 	bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
100 	    BUS_DMASYNC_PREWRITE);
101 }
102 
103 static int
gve_rx_alloc_ring_gqi(struct gve_priv * priv,int i)104 gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i)
105 {
106 	struct gve_rx_ring *rx = &priv->rx[i];
107 	struct gve_ring_com *com = &rx->com;
108 	int err;
109 
110 	err = gve_dma_alloc_coherent(priv,
111 	    sizeof(struct gve_rx_desc) * priv->rx_desc_cnt,
112 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
113 	if (err != 0) {
114 		device_printf(priv->dev,
115 		    "Failed to alloc desc ring for rx ring %d", i);
116 		goto abort;
117 	}
118 
119 	rx->mask = priv->rx_pages_per_qpl - 1;
120 	rx->desc_ring = rx->desc_ring_mem.cpu_addr;
121 
122 	com->qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues,
123 	    priv->rx_desc_cnt, /*single_kva=*/false);
124 	if (com->qpl == NULL) {
125 		device_printf(priv->dev,
126 		    "Failed to alloc QPL for rx ring %d", i);
127 		err = ENOMEM;
128 		goto abort;
129 	}
130 
131 	rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info),
132 	    M_GVE, M_WAITOK | M_ZERO);
133 
134 	err = gve_dma_alloc_coherent(priv,
135 	    sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt,
136 	    CACHE_LINE_SIZE, &rx->data_ring_mem);
137 	if (err != 0) {
138 		device_printf(priv->dev,
139 		    "Failed to alloc data ring for rx ring %d", i);
140 		goto abort;
141 	}
142 	rx->data_ring = rx->data_ring_mem.cpu_addr;
143 
144 	gve_prefill_rx_slots(rx);
145 	return (0);
146 
147 abort:
148 	gve_rx_free_ring_gqi(priv, i);
149 	return (err);
150 }
151 
152 static int
gve_rx_alloc_ring(struct gve_priv * priv,int i)153 gve_rx_alloc_ring(struct gve_priv *priv, int i)
154 {
155 	struct gve_rx_ring *rx = &priv->rx[i];
156 	struct gve_ring_com *com = &rx->com;
157 	int err;
158 
159 	com->priv = priv;
160 	com->id = i;
161 
162 	gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
163 
164 	err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources),
165 	    PAGE_SIZE, &com->q_resources_mem);
166 	if (err != 0) {
167 		device_printf(priv->dev,
168 		    "Failed to alloc queue resources for rx ring %d", i);
169 		goto abort;
170 	}
171 	com->q_resources = com->q_resources_mem.cpu_addr;
172 
173 	if (gve_is_gqi(priv))
174 		err = gve_rx_alloc_ring_gqi(priv, i);
175 	else
176 		err = gve_rx_alloc_ring_dqo(priv, i);
177 	if (err != 0)
178 		goto abort;
179 
180 	return (0);
181 
182 abort:
183 	gve_rx_free_ring(priv, i);
184 	return (err);
185 }
186 
187 int
gve_alloc_rx_rings(struct gve_priv * priv,uint16_t start_idx,uint16_t stop_idx)188 gve_alloc_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx)
189 {
190 	int i;
191 	int err;
192 
193 	KASSERT(priv->rx != NULL, ("priv->rx is NULL!"));
194 
195 	for (i = start_idx; i < stop_idx; i++) {
196 		err = gve_rx_alloc_ring(priv, i);
197 		if (err != 0)
198 			goto free_rings;
199 	}
200 
201 	return (0);
202 free_rings:
203 	gve_free_rx_rings(priv, start_idx, i);
204 	return (err);
205 }
206 
207 void
gve_free_rx_rings(struct gve_priv * priv,uint16_t start_idx,uint16_t stop_idx)208 gve_free_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx)
209 {
210 	int i;
211 
212 	for (i = start_idx; i < stop_idx; i++)
213 		gve_rx_free_ring(priv, i);
214 }
215 
216 static void
gve_rx_clear_data_ring(struct gve_rx_ring * rx)217 gve_rx_clear_data_ring(struct gve_rx_ring *rx)
218 {
219 	struct gve_priv *priv = rx->com.priv;
220 	int i;
221 
222 	/*
223 	 * The Rx data ring has this invariant: "the networking stack is not
224 	 * using the buffer beginning at any page_offset". This invariant is
225 	 * established initially by gve_prefill_rx_slots at alloc-time and is
226 	 * maintained by the cleanup taskqueue. This invariant implies that the
227 	 * ring can be considered to be fully posted with buffers at this point,
228 	 * even if there are unfreed mbufs still being processed, which is why we
229 	 * can fill the ring without waiting on can_flip at each slot to become true.
230 	 */
231 	for (i = 0; i < priv->rx_desc_cnt; i++) {
232 		rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i +
233 		    rx->page_info[i].page_offset);
234 		rx->fill_cnt++;
235 	}
236 
237 	bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
238 	    BUS_DMASYNC_PREWRITE);
239 }
240 
241 static void
gve_rx_clear_desc_ring(struct gve_rx_ring * rx)242 gve_rx_clear_desc_ring(struct gve_rx_ring *rx)
243 {
244 	struct gve_priv *priv = rx->com.priv;
245 	int i;
246 
247 	for (i = 0; i < priv->rx_desc_cnt; i++)
248 		rx->desc_ring[i] = (struct gve_rx_desc){};
249 
250 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
251 	    BUS_DMASYNC_PREWRITE);
252 }
253 
254 static void
gve_clear_rx_ring(struct gve_priv * priv,int i)255 gve_clear_rx_ring(struct gve_priv *priv, int i)
256 {
257 	struct gve_rx_ring *rx = &priv->rx[i];
258 
259 	if (!gve_is_gqi(priv)) {
260 		gve_clear_rx_ring_dqo(priv, i);
261 		return;
262 	}
263 
264 	rx->seq_no = 1;
265 	rx->cnt = 0;
266 	rx->fill_cnt = 0;
267 	rx->mask = priv->rx_desc_cnt - 1;
268 
269 	gve_rx_clear_desc_ring(rx);
270 	gve_rx_clear_data_ring(rx);
271 }
272 
273 static void
gve_start_rx_ring(struct gve_priv * priv,int i)274 gve_start_rx_ring(struct gve_priv *priv, int i)
275 {
276 	struct gve_rx_ring *rx = &priv->rx[i];
277 	struct gve_ring_com *com = &rx->com;
278 
279 	if ((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) {
280 		if (tcp_lro_init(&rx->lro) != 0)
281 			device_printf(priv->dev, "Failed to init lro for rx ring %d", i);
282 		rx->lro.ifp = priv->ifp;
283 	}
284 
285 	if (gve_is_gqi(priv))
286 		NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx);
287 	else
288 		NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx);
289 	com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK,
290 	    taskqueue_thread_enqueue, &com->cleanup_tq);
291 
292 	taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET,
293 	    "%s rxq %d", device_get_nameunit(priv->dev), i);
294 
295 	if (gve_is_gqi(priv)) {
296 		/* GQ RX bufs are prefilled at ring alloc time */
297 		gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt);
298 	} else
299 		gve_rx_prefill_buffers_dqo(rx);
300 }
301 
302 int
gve_create_rx_rings(struct gve_priv * priv)303 gve_create_rx_rings(struct gve_priv *priv)
304 {
305 	struct gve_ring_com *com;
306 	struct gve_rx_ring *rx;
307 	int err;
308 	int i;
309 
310 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK))
311 		return (0);
312 
313 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
314 		gve_clear_rx_ring(priv, i);
315 
316 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
317 	if (err != 0)
318 		return (err);
319 
320 	bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
321 	    BUS_DMASYNC_POSTREAD);
322 
323 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
324 		rx = &priv->rx[i];
325 		com = &rx->com;
326 
327 		com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index);
328 
329 		bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map,
330 		    BUS_DMASYNC_POSTREAD);
331 		com->db_offset = 4 * be32toh(com->q_resources->db_index);
332 		com->counter_idx = be32toh(com->q_resources->counter_index);
333 
334 		gve_start_rx_ring(priv, i);
335 	}
336 
337 	gve_set_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
338 	return (0);
339 }
340 
341 static void
gve_stop_rx_ring(struct gve_priv * priv,int i)342 gve_stop_rx_ring(struct gve_priv *priv, int i)
343 {
344 	struct gve_rx_ring *rx = &priv->rx[i];
345 	struct gve_ring_com *com = &rx->com;
346 
347 	if (com->cleanup_tq != NULL) {
348 		taskqueue_quiesce(com->cleanup_tq);
349 		taskqueue_free(com->cleanup_tq);
350 		com->cleanup_tq = NULL;
351 	}
352 
353 	tcp_lro_free(&rx->lro);
354 	rx->ctx = (struct gve_rx_ctx){};
355 }
356 
357 int
gve_destroy_rx_rings(struct gve_priv * priv)358 gve_destroy_rx_rings(struct gve_priv *priv)
359 {
360 	int err;
361 	int i;
362 
363 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
364 		gve_stop_rx_ring(priv, i);
365 
366 	if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) {
367 		err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
368 		if (err != 0)
369 			return (err);
370 		gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
371 	}
372 
373 	return (0);
374 }
375 
376 int
gve_rx_intr(void * arg)377 gve_rx_intr(void *arg)
378 {
379 	struct gve_rx_ring *rx = arg;
380 	struct gve_priv *priv = rx->com.priv;
381 	struct gve_ring_com *com = &rx->com;
382 
383 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
384 		return (FILTER_STRAY);
385 
386 	gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK);
387 	taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
388 	return (FILTER_HANDLED);
389 }
390 
391 static inline void
gve_set_rss_type(__be16 flag,struct mbuf * mbuf)392 gve_set_rss_type(__be16 flag, struct mbuf *mbuf)
393 {
394 	if ((flag & GVE_RXF_IPV4) != 0) {
395 		if ((flag & GVE_RXF_TCP) != 0)
396 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
397 		else if ((flag & GVE_RXF_UDP) != 0)
398 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
399 		else
400 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
401 		return;
402 	}
403 
404 	if ((flag & GVE_RXF_IPV6) != 0) {
405 		if ((flag & GVE_RXF_TCP) != 0)
406 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
407 		else if ((flag & GVE_RXF_UDP) != 0)
408 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
409 		else
410 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
411 		return;
412 	}
413 }
414 
415 static void
gve_rx_flip_buff(struct gve_rx_slot_page_info * page_info,__be64 * slot_addr)416 gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
417 {
418 	const __be64 offset = htobe64(GVE_DEFAULT_RX_BUFFER_OFFSET);
419 	page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET;
420 	*(slot_addr) ^= offset;
421 }
422 
423 static struct mbuf *
gve_rx_create_mbuf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_slot_page_info * page_info,uint16_t len,union gve_rx_data_slot * data_slot,bool is_only_frag)424 gve_rx_create_mbuf(struct gve_priv *priv, struct gve_rx_ring *rx,
425     struct gve_rx_slot_page_info *page_info, uint16_t len,
426     union gve_rx_data_slot *data_slot, bool is_only_frag)
427 {
428 	struct gve_rx_ctx *ctx = &rx->ctx;
429 	struct mbuf *mbuf;
430 	u_int ref_count;
431 	bool can_flip;
432 
433 	uint32_t offset = page_info->page_offset + page_info->pad;
434 	void *va = (char *)page_info->page_address + offset;
435 
436 	if (len <= priv->rx_copybreak && is_only_frag) {
437 		mbuf = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
438 		if (__predict_false(mbuf == NULL))
439 			return (NULL);
440 
441 		m_copyback(mbuf, 0, len, va);
442 		counter_enter();
443 		counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
444 		counter_exit();
445 		ctx->mbuf_head = mbuf;
446 		ctx->mbuf_tail = mbuf;
447 	} else {
448 		struct mbuf *mbuf_tail = ctx->mbuf_tail;
449 		KASSERT(len <= MCLBYTES, ("gve rx fragment bigger than cluster mbuf"));
450 
451 		/*
452 		 * This page was created with VM_ALLOC_WIRED, thus the lowest
453 		 * wire count experienced by the page until the interface is
454 		 * destroyed is 1.
455 		 *
456 		 * We wire the page again before supplying an mbuf pointing to
457 		 * it to the networking stack, so before the mbuf leaves the
458 		 * driver, the wire count rises to 2.
459 		 *
460 		 * If it is 1 again, it necessarily means that the mbuf has been
461 		 * consumed and it was gve_mextadd_free that brought down the wire
462 		 * count back to 1. We only need to eventually observe the 1.
463 		 */
464 		ref_count = atomic_load_int(&page_info->page->ref_count);
465 		can_flip = VPRC_WIRE_COUNT(ref_count) == 1;
466 
467 		if (mbuf_tail == NULL) {
468 			if (can_flip)
469 				mbuf = m_gethdr(M_NOWAIT, MT_DATA);
470 			else
471 				mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
472 
473 			ctx->mbuf_head = mbuf;
474 			ctx->mbuf_tail = mbuf;
475 		} else {
476 			if (can_flip)
477 				mbuf = m_get(M_NOWAIT, MT_DATA);
478 			else
479 				mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
480 
481 			mbuf_tail->m_next = mbuf;
482 			ctx->mbuf_tail = mbuf;
483 		}
484 
485 		if (__predict_false(mbuf == NULL))
486 			return (NULL);
487 
488 		if (can_flip) {
489 			MEXTADD(mbuf, va, len, gve_mextadd_free,
490 			    page_info->page, page_info->page_address,
491 			    0, EXT_NET_DRV);
492 
493 			counter_enter();
494 			counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
495 			counter_exit();
496 
497 			/*
498 			 * Grab an extra ref to the page so that gve_mextadd_free
499 			 * does not end up freeing the page while the interface exists.
500 			 */
501 			vm_page_wire(page_info->page);
502 
503 			gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
504 		} else {
505 			m_copyback(mbuf, 0, len, va);
506 			counter_enter();
507 			counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
508 			counter_exit();
509 		}
510 	}
511 
512 	mbuf->m_len = len;
513 	ctx->total_size += len;
514 
515 	return (mbuf);
516 }
517 
518 static inline bool
gve_needs_rss(__be16 flag)519 gve_needs_rss(__be16 flag)
520 {
521 	if ((flag & GVE_RXF_FRAG) != 0)
522 		return (false);
523 	if ((flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) != 0)
524 		return (true);
525 	return (false);
526 }
527 
528 static void
gve_rx(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_desc * desc,uint32_t idx)529 gve_rx(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_desc *desc,
530     uint32_t idx)
531 {
532 	struct gve_rx_slot_page_info *page_info;
533 	struct gve_dma_handle *page_dma_handle;
534 	union gve_rx_data_slot *data_slot;
535 	struct gve_rx_ctx *ctx = &rx->ctx;
536 	struct mbuf *mbuf = NULL;
537 	if_t ifp = priv->ifp;
538 	bool do_if_input;
539 	uint16_t len;
540 
541 	bool is_first_frag = ctx->frag_cnt == 0;
542 	bool is_last_frag = !(GVE_RXF_PKT_CONT & desc->flags_seq);
543 	bool is_only_frag = is_first_frag && is_last_frag;
544 
545 	if (__predict_false(ctx->drop_pkt))
546 		goto finish_frag;
547 
548 	if ((desc->flags_seq & GVE_RXF_ERR) != 0) {
549 		ctx->drop_pkt = true;
550 		counter_enter();
551 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
552 		counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
553 		counter_exit();
554 		m_freem(ctx->mbuf_head);
555 		goto finish_frag;
556 	}
557 
558 	page_info = &rx->page_info[idx];
559 	data_slot = &rx->data_ring[idx];
560 	page_dma_handle = &(rx->com.qpl->dmas[idx]);
561 
562 	page_info->pad = is_first_frag ? GVE_RX_PAD : 0;
563 	len = be16toh(desc->len) - page_info->pad;
564 
565 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
566 	    BUS_DMASYNC_POSTREAD);
567 
568 	mbuf = gve_rx_create_mbuf(priv, rx, page_info, len, data_slot,
569 	    is_only_frag);
570 	if (mbuf == NULL) {
571 		ctx->drop_pkt = true;
572 		counter_enter();
573 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
574 		counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
575 		counter_exit();
576 		m_freem(ctx->mbuf_head);
577 		goto finish_frag;
578 	}
579 
580 	if (is_first_frag) {
581 		mbuf->m_pkthdr.rcvif = priv->ifp;
582 		ctx->is_tcp = desc->flags_seq & GVE_RXF_TCP;
583 
584 		if (gve_needs_rss(desc->flags_seq)) {
585 			gve_set_rss_type(desc->flags_seq, mbuf);
586 			mbuf->m_pkthdr.flowid = be32toh(desc->rss_hash);
587 		}
588 
589 		if ((desc->csum != 0) && ((desc->flags_seq & GVE_RXF_FRAG) == 0)) {
590 			mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
591 				                    CSUM_IP_VALID |
592 						    CSUM_DATA_VALID |
593 						    CSUM_PSEUDO_HDR;
594 			mbuf->m_pkthdr.csum_data = 0xffff;
595 		}
596 	}
597 
598 	if (is_last_frag) {
599 		mbuf = ctx->mbuf_head;
600 		mbuf->m_pkthdr.len = ctx->total_size;
601 		do_if_input = true;
602 
603 		if (((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) &&      /* LRO is enabled */
604 		    (ctx->is_tcp) &&                      		    /* pkt is a TCP pkt */
605 		    ((mbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID) != 0) && /* NIC verified csum */
606 		    (rx->lro.lro_cnt != 0) &&                               /* LRO resources exist */
607 		    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
608 			do_if_input = false;
609 
610 		if (do_if_input)
611 			if_input(ifp, mbuf);
612 
613 		counter_enter();
614 		counter_u64_add_protected(rx->stats.rbytes, ctx->total_size);
615 		counter_u64_add_protected(rx->stats.rpackets, 1);
616 		counter_exit();
617 	}
618 
619 finish_frag:
620 	ctx->frag_cnt++;
621 	if (is_last_frag)
622 		rx->ctx = (struct gve_rx_ctx){};
623 }
624 
625 static bool
gve_rx_work_pending(struct gve_rx_ring * rx)626 gve_rx_work_pending(struct gve_rx_ring *rx)
627 {
628 	struct gve_rx_desc *desc;
629 	__be16 flags_seq;
630 	uint32_t next_idx;
631 
632 	next_idx = rx->cnt & rx->mask;
633 	desc = rx->desc_ring + next_idx;
634 
635 	flags_seq = desc->flags_seq;
636 
637 	return (GVE_SEQNO(flags_seq) == rx->seq_no);
638 }
639 
640 static inline uint8_t
gve_next_seqno(uint8_t seq)641 gve_next_seqno(uint8_t seq)
642 {
643 	return ((seq + 1) == 8 ? 1 : seq + 1);
644 }
645 
646 static void
gve_rx_cleanup(struct gve_priv * priv,struct gve_rx_ring * rx,int budget)647 gve_rx_cleanup(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
648 {
649 	uint32_t idx = rx->cnt & rx->mask;
650 	struct gve_rx_desc *desc;
651 	struct gve_rx_ctx *ctx = &rx->ctx;
652 	uint32_t work_done = 0;
653 
654 	NET_EPOCH_ASSERT();
655 
656 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
657 	    BUS_DMASYNC_POSTREAD);
658 	desc = &rx->desc_ring[idx];
659 
660 	while ((work_done < budget || ctx->frag_cnt) &&
661 	    (GVE_SEQNO(desc->flags_seq) == rx->seq_no)) {
662 
663 		gve_rx(priv, rx, desc, idx);
664 
665 		rx->cnt++;
666 		idx = rx->cnt & rx->mask;
667 		desc = &rx->desc_ring[idx];
668 		rx->seq_no = gve_next_seqno(rx->seq_no);
669 		work_done++;
670 	}
671 
672 	/* The device will only send whole packets. */
673 	if (__predict_false(ctx->frag_cnt)) {
674 		m_freem(ctx->mbuf_head);
675 		rx->ctx = (struct gve_rx_ctx){};
676 		device_printf(priv->dev,
677 		    "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset",
678 		    GVE_SEQNO(desc->flags_seq), rx->seq_no);
679 		gve_schedule_reset(priv);
680 	}
681 
682 	if (work_done != 0)
683 		tcp_lro_flush_all(&rx->lro);
684 
685 	bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
686 	    BUS_DMASYNC_PREWRITE);
687 
688 	/* Buffers are refilled as the descs are processed */
689 	rx->fill_cnt += work_done;
690 	gve_db_bar_write_4(priv, rx->com.db_offset, rx->fill_cnt);
691 }
692 
693 void
gve_rx_cleanup_tq(void * arg,int pending)694 gve_rx_cleanup_tq(void *arg, int pending)
695 {
696 	struct gve_rx_ring *rx = arg;
697 	struct gve_priv *priv = rx->com.priv;
698 
699 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
700 		return;
701 
702 	gve_rx_cleanup(priv, rx, /*budget=*/128);
703 
704 	gve_db_bar_write_4(priv, rx->com.irq_db_offset,
705 	    GVE_IRQ_ACK | GVE_IRQ_EVENT);
706 
707 	/*
708 	 * Fragments received before this barrier MAY NOT cause the NIC to send an
709 	 * interrupt but they will still be handled by the enqueue below.
710 	 * Fragments received after the barrier WILL trigger an interrupt.
711 	 */
712 	atomic_thread_fence_seq_cst();
713 
714 	if (gve_rx_work_pending(rx)) {
715 		gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK);
716 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
717 	}
718 }
719