1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34
35 static void
gve_free_rx_mbufs_dqo(struct gve_rx_ring * rx)36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37 {
38 struct gve_rx_buf_dqo *buf;
39 int i;
40
41 if (gve_is_qpl(rx->com.priv))
42 return;
43
44 for (i = 0; i < rx->dqo.buf_cnt; i++) {
45 buf = &rx->dqo.bufs[i];
46 if (!buf->mbuf)
47 continue;
48
49 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
50 BUS_DMASYNC_POSTREAD);
51 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
52 m_freem(buf->mbuf);
53 buf->mbuf = NULL;
54 }
55 }
56
57 void
gve_rx_free_ring_dqo(struct gve_priv * priv,int i)58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
59 {
60 struct gve_rx_ring *rx = &priv->rx[i];
61 struct gve_ring_com *com = &rx->com;
62 int j;
63
64 if (rx->dqo.compl_ring != NULL) {
65 gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
66 rx->dqo.compl_ring = NULL;
67 }
68
69 if (rx->dqo.desc_ring != NULL) {
70 gve_dma_free_coherent(&rx->desc_ring_mem);
71 rx->dqo.desc_ring = NULL;
72 }
73
74 if (rx->dqo.bufs != NULL) {
75 gve_free_rx_mbufs_dqo(rx);
76
77 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
78 for (j = 0; j < rx->dqo.buf_cnt; j++)
79 if (rx->dqo.bufs[j].mapped)
80 bus_dmamap_destroy(rx->dqo.buf_dmatag,
81 rx->dqo.bufs[j].dmamap);
82 }
83
84 free(rx->dqo.bufs, M_GVE);
85 rx->dqo.bufs = NULL;
86 }
87
88 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
89 bus_dma_tag_destroy(rx->dqo.buf_dmatag);
90
91 if (com->qpl != NULL) {
92 gve_free_qpl(priv, com->qpl);
93 com->qpl = NULL;
94 }
95 }
96
97 int
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int i)98 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
99 {
100 struct gve_rx_ring *rx = &priv->rx[i];
101 int err;
102 int j;
103
104 err = gve_dma_alloc_coherent(priv,
105 sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
106 CACHE_LINE_SIZE, &rx->desc_ring_mem);
107 if (err != 0) {
108 device_printf(priv->dev,
109 "Failed to alloc desc ring for rx ring %d", i);
110 goto abort;
111 }
112 rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
113 rx->dqo.mask = priv->rx_desc_cnt - 1;
114
115 err = gve_dma_alloc_coherent(priv,
116 sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
117 CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
118 if (err != 0) {
119 device_printf(priv->dev,
120 "Failed to alloc compl ring for rx ring %d", i);
121 goto abort;
122 }
123 rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
124 rx->dqo.mask = priv->rx_desc_cnt - 1;
125
126 rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
127 priv->rx_desc_cnt;
128 rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
129 M_GVE, M_WAITOK | M_ZERO);
130
131 if (gve_is_qpl(priv)) {
132 rx->com.qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues,
133 GVE_RX_NUM_QPL_PAGES_DQO, /*single_kva=*/false);
134 if (rx->com.qpl == NULL) {
135 device_printf(priv->dev,
136 "Failed to alloc QPL for rx ring %d", i);
137 err = ENOMEM;
138 goto abort;
139 }
140 return (0);
141 }
142
143 bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv);
144
145 err = bus_dma_tag_create(
146 bus_get_dma_tag(priv->dev), /* parent */
147 1, 0, /* alignment, bounds */
148 BUS_SPACE_MAXADDR, /* lowaddr */
149 BUS_SPACE_MAXADDR, /* highaddr */
150 NULL, NULL, /* filter, filterarg */
151 max_seg_size, /* maxsize */
152 1, /* nsegments */
153 max_seg_size, /* maxsegsize */
154 0, /* flags */
155 NULL, /* lockfunc */
156 NULL, /* lockarg */
157 &rx->dqo.buf_dmatag);
158 if (err != 0) {
159 device_printf(priv->dev,
160 "%s: bus_dma_tag_create failed: %d\n",
161 __func__, err);
162 goto abort;
163 }
164
165 for (j = 0; j < rx->dqo.buf_cnt; j++) {
166 err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
167 &rx->dqo.bufs[j].dmamap);
168 if (err != 0) {
169 device_printf(priv->dev,
170 "err in creating rx buf dmamap %d: %d",
171 j, err);
172 goto abort;
173 }
174 rx->dqo.bufs[j].mapped = true;
175 }
176
177 return (0);
178
179 abort:
180 gve_rx_free_ring_dqo(priv, i);
181 return (err);
182 }
183
184 static void
gve_rx_clear_desc_ring_dqo(struct gve_rx_ring * rx)185 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
186 {
187 struct gve_ring_com *com = &rx->com;
188 int entries;
189 int i;
190
191 entries = com->priv->rx_desc_cnt;
192 for (i = 0; i < entries; i++)
193 rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
194
195 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
196 BUS_DMASYNC_PREWRITE);
197 }
198
199 static void
gve_rx_clear_compl_ring_dqo(struct gve_rx_ring * rx)200 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
201 {
202 struct gve_ring_com *com = &rx->com;
203 int i;
204
205 for (i = 0; i < com->priv->rx_desc_cnt; i++)
206 rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
207
208 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
209 BUS_DMASYNC_PREWRITE);
210 }
211
212 void
gve_clear_rx_ring_dqo(struct gve_priv * priv,int i)213 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
214 {
215 struct gve_rx_ring *rx = &priv->rx[i];
216 int j;
217
218 rx->fill_cnt = 0;
219 rx->cnt = 0;
220 rx->dqo.mask = priv->rx_desc_cnt - 1;
221 rx->dqo.head = 0;
222 rx->dqo.tail = 0;
223 rx->dqo.cur_gen_bit = 0;
224
225 gve_rx_clear_desc_ring_dqo(rx);
226 gve_rx_clear_compl_ring_dqo(rx);
227
228 gve_free_rx_mbufs_dqo(rx);
229
230 if (gve_is_qpl(priv)) {
231 SLIST_INIT(&rx->dqo.free_bufs);
232 STAILQ_INIT(&rx->dqo.used_bufs);
233
234 for (j = 0; j < rx->dqo.buf_cnt; j++) {
235 struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
236
237 vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
238 u_int ref_count = atomic_load_int(&page->ref_count);
239
240 /*
241 * An ifconfig down+up might see pages still in flight
242 * from the previous innings.
243 */
244 if (VPRC_WIRE_COUNT(ref_count) == 1)
245 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
246 buf, slist_entry);
247 else
248 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
249 buf, stailq_entry);
250
251 buf->num_nic_frags = 0;
252 buf->next_idx = 0;
253 }
254 } else {
255 SLIST_INIT(&rx->dqo.free_bufs);
256 for (j = 0; j < rx->dqo.buf_cnt; j++)
257 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
258 &rx->dqo.bufs[j], slist_entry);
259 }
260 }
261
262 int
gve_rx_intr_dqo(void * arg)263 gve_rx_intr_dqo(void *arg)
264 {
265 struct gve_rx_ring *rx = arg;
266 struct gve_priv *priv = rx->com.priv;
267 struct gve_ring_com *com = &rx->com;
268
269 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
270 return (FILTER_STRAY);
271
272 /* Interrupts are automatically masked */
273 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
274 return (FILTER_HANDLED);
275 }
276
277 static void
gve_rx_advance_head_dqo(struct gve_rx_ring * rx)278 gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
279 {
280 rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
281 rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
282
283 if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
284 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
285 BUS_DMASYNC_PREWRITE);
286 gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
287 rx->dqo.head);
288 }
289 }
290
291 static void
gve_rx_post_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)292 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
293 {
294 struct gve_rx_desc_dqo *desc;
295
296 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
297 BUS_DMASYNC_PREREAD);
298
299 desc = &rx->dqo.desc_ring[rx->dqo.head];
300 desc->buf_id = htole16(buf - rx->dqo.bufs);
301 desc->buf_addr = htole64(buf->addr);
302
303 gve_rx_advance_head_dqo(rx);
304 }
305
306 static int
gve_rx_post_new_mbuf_dqo(struct gve_rx_ring * rx,int how)307 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
308 {
309 struct gve_rx_buf_dqo *buf;
310 bus_dma_segment_t segs[1];
311 int nsegs;
312 int err;
313
314 buf = SLIST_FIRST(&rx->dqo.free_bufs);
315 if (__predict_false(!buf)) {
316 device_printf(rx->com.priv->dev,
317 "Unexpected empty free bufs list\n");
318 return (ENOBUFS);
319 }
320 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
321
322 bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
323 buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size);
324 if (__predict_false(!buf->mbuf)) {
325 err = ENOMEM;
326 counter_enter();
327 counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
328 counter_exit();
329 goto abort_with_buf;
330 }
331 buf->mbuf->m_len = segment_size;
332
333 err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
334 buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
335 KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
336 if (__predict_false(err != 0)) {
337 counter_enter();
338 counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
339 counter_exit();
340 goto abort_with_mbuf;
341 }
342 buf->addr = segs[0].ds_addr;
343
344 gve_rx_post_buf_dqo(rx, buf);
345 return (0);
346
347 abort_with_mbuf:
348 m_freem(buf->mbuf);
349 buf->mbuf = NULL;
350 abort_with_buf:
351 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
352 return (err);
353 }
354
355 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)356 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
357 {
358 return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
359 }
360
361 static void
gve_rx_post_qpl_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t frag_num)362 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
363 uint8_t frag_num)
364 {
365 struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
366 union gve_rx_qpl_buf_id_dqo composed_id;
367 struct gve_dma_handle *page_dma_handle;
368
369 composed_id.buf_id = buf - rx->dqo.bufs;
370 composed_id.frag_num = frag_num;
371 desc->buf_id = htole16(composed_id.all);
372
373 page_dma_handle = gve_get_page_dma_handle(rx, buf);
374 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
375 BUS_DMASYNC_PREREAD);
376 desc->buf_addr = htole64(page_dma_handle->bus_addr +
377 frag_num * rx->com.priv->rx_buf_size_dqo);
378
379 buf->num_nic_frags++;
380 gve_rx_advance_head_dqo(rx);
381 }
382
383 static void
gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring * rx,bool just_one)384 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
385 {
386 struct gve_rx_buf_dqo *hol_blocker = NULL;
387 struct gve_rx_buf_dqo *buf;
388 u_int ref_count;
389 vm_page_t page;
390
391 while (true) {
392 buf = STAILQ_FIRST(&rx->dqo.used_bufs);
393 if (__predict_false(buf == NULL))
394 break;
395
396 page = rx->com.qpl->pages[buf - rx->dqo.bufs];
397 ref_count = atomic_load_int(&page->ref_count);
398
399 if (VPRC_WIRE_COUNT(ref_count) != 1) {
400 /* Account for one head-of-line blocker */
401 if (hol_blocker != NULL)
402 break;
403 hol_blocker = buf;
404 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
405 stailq_entry);
406 continue;
407 }
408
409 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
410 stailq_entry);
411 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
412 buf, slist_entry);
413 if (just_one)
414 break;
415 }
416
417 if (hol_blocker != NULL)
418 STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
419 hol_blocker, stailq_entry);
420 }
421
422 static int
gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring * rx)423 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
424 {
425 struct gve_rx_buf_dqo *buf;
426
427 buf = SLIST_FIRST(&rx->dqo.free_bufs);
428 if (__predict_false(buf == NULL)) {
429 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
430 buf = SLIST_FIRST(&rx->dqo.free_bufs);
431 if (__predict_false(buf == NULL))
432 return (ENOBUFS);
433 }
434
435 gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
436 if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1)
437 buf->next_idx = 0;
438 else
439 buf->next_idx++;
440
441 /*
442 * We have posted all the frags in this buf to the NIC.
443 * - buf will enter used_bufs once the last completion arrives.
444 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
445 * when its wire count drops back to 1.
446 */
447 if (buf->next_idx == 0)
448 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
449 return (0);
450 }
451
452 static void
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx,int how)453 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
454 {
455 uint32_t num_pending_bufs;
456 uint32_t num_to_post;
457 uint32_t i;
458 int err;
459
460 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
461 num_to_post = rx->dqo.mask - num_pending_bufs;
462
463 for (i = 0; i < num_to_post; i++) {
464 if (gve_is_qpl(rx->com.priv))
465 err = gve_rx_post_new_dqo_qpl_buf(rx);
466 else
467 err = gve_rx_post_new_mbuf_dqo(rx, how);
468 if (err)
469 break;
470 }
471 }
472
473 void
gve_rx_prefill_buffers_dqo(struct gve_rx_ring * rx)474 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
475 {
476 gve_rx_post_buffers_dqo(rx, M_WAITOK);
477 }
478
479 static void
gve_rx_set_hashtype_dqo(struct mbuf * mbuf,struct gve_ptype * ptype,bool * is_tcp)480 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
481 {
482 switch (ptype->l3_type) {
483 case GVE_L3_TYPE_IPV4:
484 switch (ptype->l4_type) {
485 case GVE_L4_TYPE_TCP:
486 *is_tcp = true;
487 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
488 break;
489 case GVE_L4_TYPE_UDP:
490 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
491 break;
492 default:
493 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
494 }
495 break;
496 case GVE_L3_TYPE_IPV6:
497 switch (ptype->l4_type) {
498 case GVE_L4_TYPE_TCP:
499 *is_tcp = true;
500 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
501 break;
502 case GVE_L4_TYPE_UDP:
503 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
504 break;
505 default:
506 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
507 }
508 break;
509 default:
510 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
511 }
512 }
513
514 static void
gve_rx_set_csum_flags_dqo(struct mbuf * mbuf,struct gve_rx_compl_desc_dqo * desc,struct gve_ptype * ptype)515 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
516 struct gve_rx_compl_desc_dqo *desc,
517 struct gve_ptype *ptype)
518 {
519 /* HW did not identify and process L3 and L4 headers. */
520 if (__predict_false(!desc->l3_l4_processed))
521 return;
522
523 if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
524 if (__predict_false(desc->csum_ip_err ||
525 desc->csum_external_ip_err))
526 return;
527 } else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
528 /* Checksum should be skipped if this flag is set. */
529 if (__predict_false(desc->ipv6_ex_add))
530 return;
531 }
532
533 if (__predict_false(desc->csum_l4_err))
534 return;
535
536 switch (ptype->l4_type) {
537 case GVE_L4_TYPE_TCP:
538 case GVE_L4_TYPE_UDP:
539 case GVE_L4_TYPE_ICMP:
540 case GVE_L4_TYPE_SCTP:
541 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
542 CSUM_IP_VALID |
543 CSUM_DATA_VALID |
544 CSUM_PSEUDO_HDR;
545 mbuf->m_pkthdr.csum_data = 0xffff;
546 break;
547 default:
548 break;
549 }
550 }
551
552 static void
gve_rx_input_mbuf_dqo(struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc)553 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
554 struct gve_rx_compl_desc_dqo *compl_desc)
555 {
556 struct mbuf *mbuf = rx->ctx.mbuf_head;
557 if_t ifp = rx->com.priv->ifp;
558 struct gve_ptype *ptype;
559 bool do_if_input = true;
560 bool is_tcp = false;
561
562 ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
563 gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
564 mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
565 gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
566
567 mbuf->m_pkthdr.rcvif = ifp;
568 mbuf->m_pkthdr.len = rx->ctx.total_size;
569
570 if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
571 is_tcp &&
572 (rx->lro.lro_cnt != 0) &&
573 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
574 do_if_input = false;
575
576 if (do_if_input)
577 if_input(ifp, mbuf);
578
579 counter_enter();
580 counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
581 counter_u64_add_protected(rx->stats.rpackets, 1);
582 counter_exit();
583
584 rx->ctx = (struct gve_rx_ctx){};
585 }
586
587 static int
gve_rx_copybreak_dqo(struct gve_rx_ring * rx,void * va,struct gve_rx_compl_desc_dqo * compl_desc,uint16_t frag_len)588 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
589 struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
590 {
591 struct mbuf *mbuf;
592
593 mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
594 if (__predict_false(mbuf == NULL))
595 return (ENOMEM);
596
597 counter_enter();
598 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
599 counter_exit();
600
601 m_copyback(mbuf, 0, frag_len, va);
602 mbuf->m_len = frag_len;
603
604 rx->ctx.mbuf_head = mbuf;
605 rx->ctx.mbuf_tail = mbuf;
606 rx->ctx.total_size += frag_len;
607
608 gve_rx_input_mbuf_dqo(rx, compl_desc);
609 return (0);
610 }
611
612 static void
gve_rx_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)613 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
614 struct gve_rx_compl_desc_dqo *compl_desc,
615 int *work_done)
616 {
617 bool is_last_frag = compl_desc->end_of_packet != 0;
618 struct gve_rx_ctx *ctx = &rx->ctx;
619 struct gve_rx_buf_dqo *buf;
620 uint32_t num_pending_bufs;
621 uint16_t frag_len;
622 uint16_t buf_id;
623 int err;
624
625 buf_id = le16toh(compl_desc->buf_id);
626 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
627 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
628 buf_id, rx->com.id);
629 gve_schedule_reset(priv);
630 goto drop_frag_clear_ctx;
631 }
632 buf = &rx->dqo.bufs[buf_id];
633 if (__predict_false(buf->mbuf == NULL)) {
634 device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
635 buf_id, rx->com.id);
636 gve_schedule_reset(priv);
637 goto drop_frag_clear_ctx;
638 }
639
640 if (__predict_false(ctx->drop_pkt))
641 goto drop_frag;
642
643 if (__predict_false(compl_desc->rx_error)) {
644 counter_enter();
645 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
646 counter_exit();
647 goto drop_frag;
648 }
649
650 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
651 BUS_DMASYNC_POSTREAD);
652
653 frag_len = compl_desc->packet_len;
654 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
655 err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
656 compl_desc, frag_len);
657 if (__predict_false(err != 0))
658 goto drop_frag;
659 (*work_done)++;
660 gve_rx_post_buf_dqo(rx, buf);
661 return;
662 }
663
664 /*
665 * Although buffer completions may arrive out of order, buffer
666 * descriptors are consumed by the NIC in order. That is, the
667 * buffer at desc_ring[tail] might not be the buffer we got the
668 * completion compl_ring[tail] for: but we know that desc_ring[tail]
669 * has already been read by the NIC.
670 */
671 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
672
673 /*
674 * For every fragment received, try to post a new buffer.
675 *
676 * Failures are okay but only so long as the number of outstanding
677 * buffers is above a threshold.
678 *
679 * Beyond that we drop new packets to reuse their buffers.
680 * Without ensuring a minimum number of buffers for the NIC to
681 * put packets in, we run the risk of getting the queue stuck
682 * for good.
683 */
684 err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
685 if (__predict_false(err != 0 &&
686 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
687 counter_enter();
688 counter_u64_add_protected(
689 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
690 counter_exit();
691 goto drop_frag;
692 }
693
694 buf->mbuf->m_len = frag_len;
695 ctx->total_size += frag_len;
696 if (ctx->mbuf_tail == NULL) {
697 ctx->mbuf_head = buf->mbuf;
698 ctx->mbuf_tail = buf->mbuf;
699 } else {
700 buf->mbuf->m_flags &= ~M_PKTHDR;
701 ctx->mbuf_tail->m_next = buf->mbuf;
702 ctx->mbuf_tail = buf->mbuf;
703 }
704
705 /*
706 * Disassociate the mbuf from buf and surrender buf to the free list to
707 * be used by a future mbuf.
708 */
709 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
710 buf->mbuf = NULL;
711 buf->addr = 0;
712 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
713
714 if (is_last_frag) {
715 gve_rx_input_mbuf_dqo(rx, compl_desc);
716 (*work_done)++;
717 }
718 return;
719
720 drop_frag:
721 /* Clear the earlier frags if there were any */
722 m_freem(ctx->mbuf_head);
723 rx->ctx = (struct gve_rx_ctx){};
724 /* Drop the rest of the pkt if there are more frags */
725 ctx->drop_pkt = true;
726 /* Reuse the dropped frag's buffer */
727 gve_rx_post_buf_dqo(rx, buf);
728
729 if (is_last_frag)
730 goto drop_frag_clear_ctx;
731 return;
732
733 drop_frag_clear_ctx:
734 counter_enter();
735 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
736 counter_exit();
737 m_freem(ctx->mbuf_head);
738 rx->ctx = (struct gve_rx_ctx){};
739 }
740
741 static void *
gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num)742 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
743 struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
744 {
745 int page_idx = buf - rx->dqo.bufs;
746 void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
747
748 va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
749 return (va);
750 }
751
752 static int
gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)753 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
754 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
755 uint8_t buf_frag_num, uint16_t frag_len)
756 {
757 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
758 struct mbuf *mbuf;
759 bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv);
760
761 if (ctx->mbuf_tail == NULL) {
762 mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size);
763 if (mbuf == NULL)
764 return (ENOMEM);
765 ctx->mbuf_head = mbuf;
766 ctx->mbuf_tail = mbuf;
767 } else {
768 mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size);
769 if (mbuf == NULL)
770 return (ENOMEM);
771 ctx->mbuf_tail->m_next = mbuf;
772 ctx->mbuf_tail = mbuf;
773 }
774
775 mbuf->m_len = frag_len;
776 ctx->total_size += frag_len;
777
778 m_copyback(mbuf, 0, frag_len, va);
779 counter_enter();
780 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
781 counter_exit();
782 return (0);
783 }
784
785 static int
gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)786 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
787 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
788 uint8_t buf_frag_num, uint16_t frag_len)
789 {
790 struct mbuf *mbuf;
791 void *page_addr;
792 vm_page_t page;
793 int page_idx;
794 void *va;
795
796 if (ctx->mbuf_tail == NULL) {
797 mbuf = m_gethdr(M_NOWAIT, MT_DATA);
798 if (mbuf == NULL)
799 return (ENOMEM);
800 ctx->mbuf_head = mbuf;
801 ctx->mbuf_tail = mbuf;
802 } else {
803 mbuf = m_get(M_NOWAIT, MT_DATA);
804 if (mbuf == NULL)
805 return (ENOMEM);
806 ctx->mbuf_tail->m_next = mbuf;
807 ctx->mbuf_tail = mbuf;
808 }
809
810 mbuf->m_len = frag_len;
811 ctx->total_size += frag_len;
812
813 page_idx = buf - rx->dqo.bufs;
814 page = rx->com.qpl->pages[page_idx];
815 page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
816 va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo);
817
818 /*
819 * Grab an extra ref to the page so that gve_mextadd_free
820 * does not end up freeing the page while the interface exists.
821 */
822 vm_page_wire(page);
823
824 counter_enter();
825 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
826 counter_exit();
827
828 MEXTADD(mbuf, va, frag_len,
829 gve_mextadd_free, page, page_addr,
830 0, EXT_NET_DRV);
831 return (0);
832 }
833
834 static void
gve_rx_dqo_qpl(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)835 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
836 struct gve_rx_compl_desc_dqo *compl_desc,
837 int *work_done)
838 {
839 bool is_last_frag = compl_desc->end_of_packet != 0;
840 union gve_rx_qpl_buf_id_dqo composed_id;
841 struct gve_dma_handle *page_dma_handle;
842 struct gve_rx_ctx *ctx = &rx->ctx;
843 struct gve_rx_buf_dqo *buf;
844 uint32_t num_pending_bufs;
845 uint8_t buf_frag_num;
846 uint16_t frag_len;
847 uint16_t buf_id;
848 int err;
849
850 composed_id.all = le16toh(compl_desc->buf_id);
851 buf_id = composed_id.buf_id;
852 buf_frag_num = composed_id.frag_num;
853
854 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
855 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
856 buf_id, rx->com.id);
857 gve_schedule_reset(priv);
858 goto drop_frag_clear_ctx;
859 }
860 buf = &rx->dqo.bufs[buf_id];
861 if (__predict_false(buf->num_nic_frags == 0 ||
862 buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) {
863 device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
864 "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
865 buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
866 gve_schedule_reset(priv);
867 goto drop_frag_clear_ctx;
868 }
869
870 buf->num_nic_frags--;
871
872 if (__predict_false(ctx->drop_pkt))
873 goto drop_frag;
874
875 if (__predict_false(compl_desc->rx_error)) {
876 counter_enter();
877 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
878 counter_exit();
879 goto drop_frag;
880 }
881
882 page_dma_handle = gve_get_page_dma_handle(rx, buf);
883 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
884 BUS_DMASYNC_POSTREAD);
885
886 frag_len = compl_desc->packet_len;
887 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
888 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
889
890 err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
891 if (__predict_false(err != 0))
892 goto drop_frag;
893 (*work_done)++;
894 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
895 return;
896 }
897
898 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
899 err = gve_rx_post_new_dqo_qpl_buf(rx);
900 if (__predict_false(err != 0 &&
901 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
902 /*
903 * Resort to copying this fragment into a cluster mbuf
904 * when the above threshold is breached and repost the
905 * incoming buffer. If we cannot find cluster mbufs,
906 * just drop the packet (to repost its buffer).
907 */
908 err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
909 buf_frag_num, frag_len);
910 if (err != 0) {
911 counter_enter();
912 counter_u64_add_protected(
913 rx->stats.rx_dropped_pkt_buf_post_fail, 1);
914 counter_exit();
915 goto drop_frag;
916 }
917 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
918 } else {
919 err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
920 buf_frag_num, frag_len);
921 if (__predict_false(err != 0)) {
922 counter_enter();
923 counter_u64_add_protected(
924 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
925 counter_exit();
926 goto drop_frag;
927 }
928 }
929
930 /*
931 * Both the counts need to be checked.
932 *
933 * num_nic_frags == 0 implies no pending completions
934 * but not all frags may have yet been posted.
935 *
936 * next_idx == 0 implies all frags have been posted
937 * but there might be pending completions.
938 */
939 if (buf->num_nic_frags == 0 && buf->next_idx == 0)
940 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
941
942 if (is_last_frag) {
943 gve_rx_input_mbuf_dqo(rx, compl_desc);
944 (*work_done)++;
945 }
946 return;
947
948 drop_frag:
949 /* Clear the earlier frags if there were any */
950 m_freem(ctx->mbuf_head);
951 rx->ctx = (struct gve_rx_ctx){};
952 /* Drop the rest of the pkt if there are more frags */
953 ctx->drop_pkt = true;
954 /* Reuse the dropped frag's buffer */
955 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
956
957 if (is_last_frag)
958 goto drop_frag_clear_ctx;
959 return;
960
961 drop_frag_clear_ctx:
962 counter_enter();
963 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
964 counter_exit();
965 m_freem(ctx->mbuf_head);
966 rx->ctx = (struct gve_rx_ctx){};
967 }
968
969 static uint8_t
gve_rx_get_gen_bit(uint8_t * desc)970 gve_rx_get_gen_bit(uint8_t *desc)
971 {
972 uint8_t byte;
973
974 /*
975 * Prevent generation bit from being read after the rest of the
976 * descriptor.
977 */
978 byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET);
979 return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0);
980 }
981
982 static bool
gve_rx_cleanup_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,int budget)983 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
984 {
985 struct gve_rx_compl_desc_dqo *compl_desc;
986 uint32_t work_done = 0;
987
988 NET_EPOCH_ASSERT();
989
990 while (work_done < budget) {
991 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag,
992 rx->dqo.compl_ring_mem.map,
993 BUS_DMASYNC_POSTREAD);
994
995 compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
996 if (gve_rx_get_gen_bit((uint8_t *)compl_desc) ==
997 rx->dqo.cur_gen_bit)
998 break;
999
1000 rx->cnt++;
1001 rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
1002 rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
1003
1004 if (gve_is_qpl(priv))
1005 gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
1006 else
1007 gve_rx_dqo(priv, rx, compl_desc, &work_done);
1008 }
1009
1010 if (work_done != 0)
1011 tcp_lro_flush_all(&rx->lro);
1012
1013 gve_rx_post_buffers_dqo(rx, M_NOWAIT);
1014 if (gve_is_qpl(priv))
1015 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
1016 return (work_done == budget);
1017 }
1018
1019 void
gve_rx_cleanup_tq_dqo(void * arg,int pending)1020 gve_rx_cleanup_tq_dqo(void *arg, int pending)
1021 {
1022 struct gve_rx_ring *rx = arg;
1023 struct gve_priv *priv = rx->com.priv;
1024
1025 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1026 return;
1027
1028 if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
1029 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
1030 return;
1031 }
1032
1033 gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
1034 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1035 }
1036