1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2023-2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34
35 static void
gve_rx_free_ring_gqi(struct gve_priv * priv,int i)36 gve_rx_free_ring_gqi(struct gve_priv *priv, int i)
37 {
38 struct gve_rx_ring *rx = &priv->rx[i];
39 struct gve_ring_com *com = &rx->com;
40
41 if (rx->page_info != NULL) {
42 free(rx->page_info, M_GVE);
43 rx->page_info = NULL;
44 }
45
46 if (rx->data_ring != NULL) {
47 gve_dma_free_coherent(&rx->data_ring_mem);
48 rx->data_ring = NULL;
49 }
50
51 if (rx->desc_ring != NULL) {
52 gve_dma_free_coherent(&rx->desc_ring_mem);
53 rx->desc_ring = NULL;
54 }
55
56 if (com->qpl != NULL) {
57 gve_free_qpl(priv, com->qpl);
58 com->qpl = NULL;
59 }
60 }
61
62 static void
gve_rx_free_ring(struct gve_priv * priv,int i)63 gve_rx_free_ring(struct gve_priv *priv, int i)
64 {
65 struct gve_rx_ring *rx = &priv->rx[i];
66 struct gve_ring_com *com = &rx->com;
67
68 /* Safe to call even if never allocated */
69 gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
70
71 if (gve_is_gqi(priv))
72 gve_rx_free_ring_gqi(priv, i);
73 else
74 gve_rx_free_ring_dqo(priv, i);
75
76 if (com->q_resources != NULL) {
77 gve_dma_free_coherent(&com->q_resources_mem);
78 com->q_resources = NULL;
79 }
80 }
81
82 static void
gve_prefill_rx_slots(struct gve_rx_ring * rx)83 gve_prefill_rx_slots(struct gve_rx_ring *rx)
84 {
85 struct gve_ring_com *com = &rx->com;
86 struct gve_dma_handle *dma;
87 int i;
88
89 for (i = 0; i < com->priv->rx_desc_cnt; i++) {
90 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i);
91 rx->page_info[i].page_offset = 0;
92 rx->page_info[i].page_address = com->qpl->dmas[i].cpu_addr;
93 rx->page_info[i].page = com->qpl->pages[i];
94
95 dma = &com->qpl->dmas[i];
96 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREREAD);
97 }
98
99 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
100 BUS_DMASYNC_PREWRITE);
101 }
102
103 static int
gve_rx_alloc_ring_gqi(struct gve_priv * priv,int i)104 gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i)
105 {
106 struct gve_rx_ring *rx = &priv->rx[i];
107 struct gve_ring_com *com = &rx->com;
108 int err;
109
110 err = gve_dma_alloc_coherent(priv,
111 sizeof(struct gve_rx_desc) * priv->rx_desc_cnt,
112 CACHE_LINE_SIZE, &rx->desc_ring_mem);
113 if (err != 0) {
114 device_printf(priv->dev,
115 "Failed to alloc desc ring for rx ring %d", i);
116 goto abort;
117 }
118
119 rx->mask = priv->rx_pages_per_qpl - 1;
120 rx->desc_ring = rx->desc_ring_mem.cpu_addr;
121
122 com->qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues,
123 priv->rx_desc_cnt, /*single_kva=*/false);
124 if (com->qpl == NULL) {
125 device_printf(priv->dev,
126 "Failed to alloc QPL for rx ring %d", i);
127 err = ENOMEM;
128 goto abort;
129 }
130
131 rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info),
132 M_GVE, M_WAITOK | M_ZERO);
133
134 err = gve_dma_alloc_coherent(priv,
135 sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt,
136 CACHE_LINE_SIZE, &rx->data_ring_mem);
137 if (err != 0) {
138 device_printf(priv->dev,
139 "Failed to alloc data ring for rx ring %d", i);
140 goto abort;
141 }
142 rx->data_ring = rx->data_ring_mem.cpu_addr;
143
144 gve_prefill_rx_slots(rx);
145 return (0);
146
147 abort:
148 gve_rx_free_ring_gqi(priv, i);
149 return (err);
150 }
151
152 static int
gve_rx_alloc_ring(struct gve_priv * priv,int i)153 gve_rx_alloc_ring(struct gve_priv *priv, int i)
154 {
155 struct gve_rx_ring *rx = &priv->rx[i];
156 struct gve_ring_com *com = &rx->com;
157 int err;
158
159 com->priv = priv;
160 com->id = i;
161
162 gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS);
163
164 err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources),
165 PAGE_SIZE, &com->q_resources_mem);
166 if (err != 0) {
167 device_printf(priv->dev,
168 "Failed to alloc queue resources for rx ring %d", i);
169 goto abort;
170 }
171 com->q_resources = com->q_resources_mem.cpu_addr;
172
173 if (gve_is_gqi(priv))
174 err = gve_rx_alloc_ring_gqi(priv, i);
175 else
176 err = gve_rx_alloc_ring_dqo(priv, i);
177 if (err != 0)
178 goto abort;
179
180 return (0);
181
182 abort:
183 gve_rx_free_ring(priv, i);
184 return (err);
185 }
186
187 int
gve_alloc_rx_rings(struct gve_priv * priv,uint16_t start_idx,uint16_t stop_idx)188 gve_alloc_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx)
189 {
190 int i;
191 int err;
192
193 KASSERT(priv->rx != NULL, ("priv->rx is NULL!"));
194
195 for (i = start_idx; i < stop_idx; i++) {
196 err = gve_rx_alloc_ring(priv, i);
197 if (err != 0)
198 goto free_rings;
199 }
200
201 return (0);
202 free_rings:
203 gve_free_rx_rings(priv, start_idx, i);
204 return (err);
205 }
206
207 void
gve_free_rx_rings(struct gve_priv * priv,uint16_t start_idx,uint16_t stop_idx)208 gve_free_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx)
209 {
210 int i;
211
212 for (i = start_idx; i < stop_idx; i++)
213 gve_rx_free_ring(priv, i);
214 }
215
216 static void
gve_rx_clear_data_ring(struct gve_rx_ring * rx)217 gve_rx_clear_data_ring(struct gve_rx_ring *rx)
218 {
219 struct gve_priv *priv = rx->com.priv;
220 int i;
221
222 /*
223 * The Rx data ring has this invariant: "the networking stack is not
224 * using the buffer beginning at any page_offset". This invariant is
225 * established initially by gve_prefill_rx_slots at alloc-time and is
226 * maintained by the cleanup taskqueue. This invariant implies that the
227 * ring can be considered to be fully posted with buffers at this point,
228 * even if there are unfreed mbufs still being processed, which is why we
229 * can fill the ring without waiting on can_flip at each slot to become true.
230 */
231 for (i = 0; i < priv->rx_desc_cnt; i++) {
232 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i +
233 rx->page_info[i].page_offset);
234 rx->fill_cnt++;
235 }
236
237 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
238 BUS_DMASYNC_PREWRITE);
239 }
240
241 static void
gve_rx_clear_desc_ring(struct gve_rx_ring * rx)242 gve_rx_clear_desc_ring(struct gve_rx_ring *rx)
243 {
244 struct gve_priv *priv = rx->com.priv;
245 int i;
246
247 for (i = 0; i < priv->rx_desc_cnt; i++)
248 rx->desc_ring[i] = (struct gve_rx_desc){};
249
250 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
251 BUS_DMASYNC_PREWRITE);
252 }
253
254 static void
gve_clear_rx_ring(struct gve_priv * priv,int i)255 gve_clear_rx_ring(struct gve_priv *priv, int i)
256 {
257 struct gve_rx_ring *rx = &priv->rx[i];
258
259 if (!gve_is_gqi(priv)) {
260 gve_clear_rx_ring_dqo(priv, i);
261 return;
262 }
263
264 rx->seq_no = 1;
265 rx->cnt = 0;
266 rx->fill_cnt = 0;
267 rx->mask = priv->rx_desc_cnt - 1;
268
269 gve_rx_clear_desc_ring(rx);
270 gve_rx_clear_data_ring(rx);
271 }
272
273 static void
gve_start_rx_ring(struct gve_priv * priv,int i)274 gve_start_rx_ring(struct gve_priv *priv, int i)
275 {
276 struct gve_rx_ring *rx = &priv->rx[i];
277 struct gve_ring_com *com = &rx->com;
278
279 if ((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) {
280 if (tcp_lro_init(&rx->lro) != 0)
281 device_printf(priv->dev, "Failed to init lro for rx ring %d", i);
282 rx->lro.ifp = priv->ifp;
283 }
284
285 if (gve_is_gqi(priv))
286 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx);
287 else
288 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx);
289 com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK,
290 taskqueue_thread_enqueue, &com->cleanup_tq);
291
292 taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET,
293 "%s rxq %d", device_get_nameunit(priv->dev), i);
294
295 if (gve_is_gqi(priv)) {
296 /* GQ RX bufs are prefilled at ring alloc time */
297 gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt);
298 } else
299 gve_rx_prefill_buffers_dqo(rx);
300 }
301
302 int
gve_create_rx_rings(struct gve_priv * priv)303 gve_create_rx_rings(struct gve_priv *priv)
304 {
305 struct gve_ring_com *com;
306 struct gve_rx_ring *rx;
307 int err;
308 int i;
309
310 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK))
311 return (0);
312
313 for (i = 0; i < priv->rx_cfg.num_queues; i++)
314 gve_clear_rx_ring(priv, i);
315
316 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
317 if (err != 0)
318 return (err);
319
320 bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
321 BUS_DMASYNC_POSTREAD);
322
323 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
324 rx = &priv->rx[i];
325 com = &rx->com;
326
327 com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index);
328
329 bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map,
330 BUS_DMASYNC_POSTREAD);
331 com->db_offset = 4 * be32toh(com->q_resources->db_index);
332 com->counter_idx = be32toh(com->q_resources->counter_index);
333
334 gve_start_rx_ring(priv, i);
335 }
336
337 gve_set_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
338 return (0);
339 }
340
341 static void
gve_stop_rx_ring(struct gve_priv * priv,int i)342 gve_stop_rx_ring(struct gve_priv *priv, int i)
343 {
344 struct gve_rx_ring *rx = &priv->rx[i];
345 struct gve_ring_com *com = &rx->com;
346
347 if (com->cleanup_tq != NULL) {
348 taskqueue_quiesce(com->cleanup_tq);
349 taskqueue_free(com->cleanup_tq);
350 com->cleanup_tq = NULL;
351 }
352
353 tcp_lro_free(&rx->lro);
354 rx->ctx = (struct gve_rx_ctx){};
355 }
356
357 int
gve_destroy_rx_rings(struct gve_priv * priv)358 gve_destroy_rx_rings(struct gve_priv *priv)
359 {
360 int err;
361 int i;
362
363 for (i = 0; i < priv->rx_cfg.num_queues; i++)
364 gve_stop_rx_ring(priv, i);
365
366 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) {
367 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
368 if (err != 0)
369 return (err);
370 gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK);
371 }
372
373 return (0);
374 }
375
376 int
gve_rx_intr(void * arg)377 gve_rx_intr(void *arg)
378 {
379 struct gve_rx_ring *rx = arg;
380 struct gve_priv *priv = rx->com.priv;
381 struct gve_ring_com *com = &rx->com;
382
383 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
384 return (FILTER_STRAY);
385
386 gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK);
387 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
388 return (FILTER_HANDLED);
389 }
390
391 static inline void
gve_set_rss_type(__be16 flag,struct mbuf * mbuf)392 gve_set_rss_type(__be16 flag, struct mbuf *mbuf)
393 {
394 if ((flag & GVE_RXF_IPV4) != 0) {
395 if ((flag & GVE_RXF_TCP) != 0)
396 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
397 else if ((flag & GVE_RXF_UDP) != 0)
398 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
399 else
400 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
401 return;
402 }
403
404 if ((flag & GVE_RXF_IPV6) != 0) {
405 if ((flag & GVE_RXF_TCP) != 0)
406 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
407 else if ((flag & GVE_RXF_UDP) != 0)
408 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
409 else
410 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
411 return;
412 }
413 }
414
415 static void
gve_rx_flip_buff(struct gve_rx_slot_page_info * page_info,__be64 * slot_addr)416 gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
417 {
418 const __be64 offset = htobe64(GVE_DEFAULT_RX_BUFFER_OFFSET);
419 page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET;
420 *(slot_addr) ^= offset;
421 }
422
423 static struct mbuf *
gve_rx_create_mbuf(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_slot_page_info * page_info,uint16_t len,union gve_rx_data_slot * data_slot,bool is_only_frag)424 gve_rx_create_mbuf(struct gve_priv *priv, struct gve_rx_ring *rx,
425 struct gve_rx_slot_page_info *page_info, uint16_t len,
426 union gve_rx_data_slot *data_slot, bool is_only_frag)
427 {
428 struct gve_rx_ctx *ctx = &rx->ctx;
429 struct mbuf *mbuf;
430 u_int ref_count;
431 bool can_flip;
432
433 uint32_t offset = page_info->page_offset + page_info->pad;
434 void *va = (char *)page_info->page_address + offset;
435
436 if (len <= priv->rx_copybreak && is_only_frag) {
437 mbuf = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR);
438 if (__predict_false(mbuf == NULL))
439 return (NULL);
440
441 m_copyback(mbuf, 0, len, va);
442 counter_enter();
443 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
444 counter_exit();
445 ctx->mbuf_head = mbuf;
446 ctx->mbuf_tail = mbuf;
447 } else {
448 struct mbuf *mbuf_tail = ctx->mbuf_tail;
449 KASSERT(len <= MCLBYTES, ("gve rx fragment bigger than cluster mbuf"));
450
451 /*
452 * This page was created with VM_ALLOC_WIRED, thus the lowest
453 * wire count experienced by the page until the interface is
454 * destroyed is 1.
455 *
456 * We wire the page again before supplying an mbuf pointing to
457 * it to the networking stack, so before the mbuf leaves the
458 * driver, the wire count rises to 2.
459 *
460 * If it is 1 again, it necessarily means that the mbuf has been
461 * consumed and it was gve_mextadd_free that brought down the wire
462 * count back to 1. We only need to eventually observe the 1.
463 */
464 ref_count = atomic_load_int(&page_info->page->ref_count);
465 can_flip = VPRC_WIRE_COUNT(ref_count) == 1;
466
467 if (mbuf_tail == NULL) {
468 if (can_flip)
469 mbuf = m_gethdr(M_NOWAIT, MT_DATA);
470 else
471 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
472
473 ctx->mbuf_head = mbuf;
474 ctx->mbuf_tail = mbuf;
475 } else {
476 if (can_flip)
477 mbuf = m_get(M_NOWAIT, MT_DATA);
478 else
479 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
480
481 mbuf_tail->m_next = mbuf;
482 ctx->mbuf_tail = mbuf;
483 }
484
485 if (__predict_false(mbuf == NULL))
486 return (NULL);
487
488 if (can_flip) {
489 MEXTADD(mbuf, va, len, gve_mextadd_free,
490 page_info->page, page_info->page_address,
491 0, EXT_NET_DRV);
492
493 counter_enter();
494 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
495 counter_exit();
496
497 /*
498 * Grab an extra ref to the page so that gve_mextadd_free
499 * does not end up freeing the page while the interface exists.
500 */
501 vm_page_wire(page_info->page);
502
503 gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
504 } else {
505 m_copyback(mbuf, 0, len, va);
506 counter_enter();
507 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
508 counter_exit();
509 }
510 }
511
512 mbuf->m_len = len;
513 ctx->total_size += len;
514
515 return (mbuf);
516 }
517
518 static inline bool
gve_needs_rss(__be16 flag)519 gve_needs_rss(__be16 flag)
520 {
521 if ((flag & GVE_RXF_FRAG) != 0)
522 return (false);
523 if ((flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) != 0)
524 return (true);
525 return (false);
526 }
527
528 static void
gve_rx(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_desc * desc,uint32_t idx)529 gve_rx(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_desc *desc,
530 uint32_t idx)
531 {
532 struct gve_rx_slot_page_info *page_info;
533 struct gve_dma_handle *page_dma_handle;
534 union gve_rx_data_slot *data_slot;
535 struct gve_rx_ctx *ctx = &rx->ctx;
536 struct mbuf *mbuf = NULL;
537 if_t ifp = priv->ifp;
538 bool do_if_input;
539 uint16_t len;
540
541 bool is_first_frag = ctx->frag_cnt == 0;
542 bool is_last_frag = !(GVE_RXF_PKT_CONT & desc->flags_seq);
543 bool is_only_frag = is_first_frag && is_last_frag;
544
545 if (__predict_false(ctx->drop_pkt))
546 goto finish_frag;
547
548 if ((desc->flags_seq & GVE_RXF_ERR) != 0) {
549 ctx->drop_pkt = true;
550 counter_enter();
551 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
552 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
553 counter_exit();
554 m_freem(ctx->mbuf_head);
555 goto finish_frag;
556 }
557
558 page_info = &rx->page_info[idx];
559 data_slot = &rx->data_ring[idx];
560 page_dma_handle = &(rx->com.qpl->dmas[idx]);
561
562 page_info->pad = is_first_frag ? GVE_RX_PAD : 0;
563 len = be16toh(desc->len) - page_info->pad;
564
565 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
566 BUS_DMASYNC_POSTREAD);
567
568 mbuf = gve_rx_create_mbuf(priv, rx, page_info, len, data_slot,
569 is_only_frag);
570 if (mbuf == NULL) {
571 ctx->drop_pkt = true;
572 counter_enter();
573 counter_u64_add_protected(rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
574 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
575 counter_exit();
576 m_freem(ctx->mbuf_head);
577 goto finish_frag;
578 }
579
580 if (is_first_frag) {
581 mbuf->m_pkthdr.rcvif = priv->ifp;
582 ctx->is_tcp = desc->flags_seq & GVE_RXF_TCP;
583
584 if (gve_needs_rss(desc->flags_seq)) {
585 gve_set_rss_type(desc->flags_seq, mbuf);
586 mbuf->m_pkthdr.flowid = be32toh(desc->rss_hash);
587 }
588
589 if ((desc->csum != 0) && ((desc->flags_seq & GVE_RXF_FRAG) == 0)) {
590 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
591 CSUM_IP_VALID |
592 CSUM_DATA_VALID |
593 CSUM_PSEUDO_HDR;
594 mbuf->m_pkthdr.csum_data = 0xffff;
595 }
596 }
597
598 if (is_last_frag) {
599 mbuf = ctx->mbuf_head;
600 mbuf->m_pkthdr.len = ctx->total_size;
601 do_if_input = true;
602
603 if (((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) && /* LRO is enabled */
604 (ctx->is_tcp) && /* pkt is a TCP pkt */
605 ((mbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID) != 0) && /* NIC verified csum */
606 (rx->lro.lro_cnt != 0) && /* LRO resources exist */
607 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
608 do_if_input = false;
609
610 if (do_if_input)
611 if_input(ifp, mbuf);
612
613 counter_enter();
614 counter_u64_add_protected(rx->stats.rbytes, ctx->total_size);
615 counter_u64_add_protected(rx->stats.rpackets, 1);
616 counter_exit();
617 }
618
619 finish_frag:
620 ctx->frag_cnt++;
621 if (is_last_frag)
622 rx->ctx = (struct gve_rx_ctx){};
623 }
624
625 static bool
gve_rx_work_pending(struct gve_rx_ring * rx)626 gve_rx_work_pending(struct gve_rx_ring *rx)
627 {
628 struct gve_rx_desc *desc;
629 __be16 flags_seq;
630 uint32_t next_idx;
631
632 next_idx = rx->cnt & rx->mask;
633 desc = rx->desc_ring + next_idx;
634
635 flags_seq = desc->flags_seq;
636
637 return (GVE_SEQNO(flags_seq) == rx->seq_no);
638 }
639
640 static inline uint8_t
gve_next_seqno(uint8_t seq)641 gve_next_seqno(uint8_t seq)
642 {
643 return ((seq + 1) == 8 ? 1 : seq + 1);
644 }
645
646 static void
gve_rx_cleanup(struct gve_priv * priv,struct gve_rx_ring * rx,int budget)647 gve_rx_cleanup(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
648 {
649 uint32_t idx = rx->cnt & rx->mask;
650 struct gve_rx_desc *desc;
651 struct gve_rx_ctx *ctx = &rx->ctx;
652 uint32_t work_done = 0;
653
654 NET_EPOCH_ASSERT();
655
656 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
657 BUS_DMASYNC_POSTREAD);
658 desc = &rx->desc_ring[idx];
659
660 while ((work_done < budget || ctx->frag_cnt) &&
661 (GVE_SEQNO(desc->flags_seq) == rx->seq_no)) {
662
663 gve_rx(priv, rx, desc, idx);
664
665 rx->cnt++;
666 idx = rx->cnt & rx->mask;
667 desc = &rx->desc_ring[idx];
668 rx->seq_no = gve_next_seqno(rx->seq_no);
669 work_done++;
670 }
671
672 /* The device will only send whole packets. */
673 if (__predict_false(ctx->frag_cnt)) {
674 m_freem(ctx->mbuf_head);
675 rx->ctx = (struct gve_rx_ctx){};
676 device_printf(priv->dev,
677 "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset",
678 GVE_SEQNO(desc->flags_seq), rx->seq_no);
679 gve_schedule_reset(priv);
680 }
681
682 if (work_done != 0)
683 tcp_lro_flush_all(&rx->lro);
684
685 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map,
686 BUS_DMASYNC_PREWRITE);
687
688 /* Buffers are refilled as the descs are processed */
689 rx->fill_cnt += work_done;
690 gve_db_bar_write_4(priv, rx->com.db_offset, rx->fill_cnt);
691 }
692
693 void
gve_rx_cleanup_tq(void * arg,int pending)694 gve_rx_cleanup_tq(void *arg, int pending)
695 {
696 struct gve_rx_ring *rx = arg;
697 struct gve_priv *priv = rx->com.priv;
698
699 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
700 return;
701
702 gve_rx_cleanup(priv, rx, /*budget=*/128);
703
704 gve_db_bar_write_4(priv, rx->com.irq_db_offset,
705 GVE_IRQ_ACK | GVE_IRQ_EVENT);
706
707 /*
708 * Fragments received before this barrier MAY NOT cause the NIC to send an
709 * interrupt but they will still be handled by the enqueue below.
710 * Fragments received after the barrier WILL trigger an interrupt.
711 */
712 atomic_thread_fence_seq_cst();
713
714 if (gve_rx_work_pending(rx)) {
715 gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK);
716 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
717 }
718 }
719