1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2024 Google LLC
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * 3. Neither the name of the copyright holder nor the names of its contributors
17 * may be used to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 #include "gve.h"
32 #include "gve_adminq.h"
33 #include "gve_dqo.h"
34
35 static void
gve_free_rx_mbufs_dqo(struct gve_rx_ring * rx)36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
37 {
38 struct gve_rx_buf_dqo *buf;
39 int i;
40
41 if (gve_is_qpl(rx->com.priv))
42 return;
43
44 for (i = 0; i < rx->dqo.buf_cnt; i++) {
45 buf = &rx->dqo.bufs[i];
46 if (!buf->mbuf)
47 continue;
48
49 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
50 BUS_DMASYNC_POSTREAD);
51 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
52 m_freem(buf->mbuf);
53 buf->mbuf = NULL;
54 }
55 }
56
57 void
gve_rx_free_ring_dqo(struct gve_priv * priv,int i)58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
59 {
60 struct gve_rx_ring *rx = &priv->rx[i];
61 int j;
62
63 if (rx->dqo.compl_ring != NULL) {
64 gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
65 rx->dqo.compl_ring = NULL;
66 }
67
68 if (rx->dqo.desc_ring != NULL) {
69 gve_dma_free_coherent(&rx->desc_ring_mem);
70 rx->dqo.desc_ring = NULL;
71 }
72
73 if (rx->dqo.bufs != NULL) {
74 gve_free_rx_mbufs_dqo(rx);
75
76 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
77 for (j = 0; j < rx->dqo.buf_cnt; j++)
78 if (rx->dqo.bufs[j].mapped)
79 bus_dmamap_destroy(rx->dqo.buf_dmatag,
80 rx->dqo.bufs[j].dmamap);
81 }
82
83 free(rx->dqo.bufs, M_GVE);
84 rx->dqo.bufs = NULL;
85 }
86
87 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
88 bus_dma_tag_destroy(rx->dqo.buf_dmatag);
89 }
90
91 int
gve_rx_alloc_ring_dqo(struct gve_priv * priv,int i)92 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
93 {
94 struct gve_rx_ring *rx = &priv->rx[i];
95 int err;
96 int j;
97
98 err = gve_dma_alloc_coherent(priv,
99 sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
100 CACHE_LINE_SIZE, &rx->desc_ring_mem);
101 if (err != 0) {
102 device_printf(priv->dev,
103 "Failed to alloc desc ring for rx ring %d", i);
104 goto abort;
105 }
106 rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
107 rx->dqo.mask = priv->rx_desc_cnt - 1;
108
109 err = gve_dma_alloc_coherent(priv,
110 sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
111 CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
112 if (err != 0) {
113 device_printf(priv->dev,
114 "Failed to alloc compl ring for rx ring %d", i);
115 goto abort;
116 }
117 rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
118 rx->dqo.mask = priv->rx_desc_cnt - 1;
119
120 rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
121 priv->rx_desc_cnt;
122 rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
123 M_GVE, M_WAITOK | M_ZERO);
124
125 if (gve_is_qpl(priv)) {
126 rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i];
127 if (rx->com.qpl == NULL) {
128 device_printf(priv->dev, "No QPL left for rx ring %d", i);
129 return (ENOMEM);
130 }
131 return (0);
132 }
133
134 err = bus_dma_tag_create(
135 bus_get_dma_tag(priv->dev), /* parent */
136 1, 0, /* alignment, bounds */
137 BUS_SPACE_MAXADDR, /* lowaddr */
138 BUS_SPACE_MAXADDR, /* highaddr */
139 NULL, NULL, /* filter, filterarg */
140 MCLBYTES, /* maxsize */
141 1, /* nsegments */
142 MCLBYTES, /* maxsegsize */
143 0, /* flags */
144 NULL, /* lockfunc */
145 NULL, /* lockarg */
146 &rx->dqo.buf_dmatag);
147 if (err != 0) {
148 device_printf(priv->dev,
149 "%s: bus_dma_tag_create failed: %d\n",
150 __func__, err);
151 goto abort;
152 }
153
154 for (j = 0; j < rx->dqo.buf_cnt; j++) {
155 err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
156 &rx->dqo.bufs[j].dmamap);
157 if (err != 0) {
158 device_printf(priv->dev,
159 "err in creating rx buf dmamap %d: %d",
160 j, err);
161 goto abort;
162 }
163 rx->dqo.bufs[j].mapped = true;
164 }
165
166 return (0);
167
168 abort:
169 gve_rx_free_ring_dqo(priv, i);
170 return (err);
171 }
172
173 static void
gve_rx_clear_desc_ring_dqo(struct gve_rx_ring * rx)174 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
175 {
176 struct gve_ring_com *com = &rx->com;
177 int entries;
178 int i;
179
180 entries = com->priv->rx_desc_cnt;
181 for (i = 0; i < entries; i++)
182 rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
183
184 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
185 BUS_DMASYNC_PREWRITE);
186 }
187
188 static void
gve_rx_clear_compl_ring_dqo(struct gve_rx_ring * rx)189 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
190 {
191 struct gve_ring_com *com = &rx->com;
192 int i;
193
194 for (i = 0; i < com->priv->rx_desc_cnt; i++)
195 rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
196
197 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
198 BUS_DMASYNC_PREWRITE);
199 }
200
201 void
gve_clear_rx_ring_dqo(struct gve_priv * priv,int i)202 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
203 {
204 struct gve_rx_ring *rx = &priv->rx[i];
205 int j;
206
207 rx->fill_cnt = 0;
208 rx->cnt = 0;
209 rx->dqo.mask = priv->rx_desc_cnt - 1;
210 rx->dqo.head = 0;
211 rx->dqo.tail = 0;
212 rx->dqo.cur_gen_bit = 0;
213
214 gve_rx_clear_desc_ring_dqo(rx);
215 gve_rx_clear_compl_ring_dqo(rx);
216
217 gve_free_rx_mbufs_dqo(rx);
218
219 if (gve_is_qpl(priv)) {
220 SLIST_INIT(&rx->dqo.free_bufs);
221 STAILQ_INIT(&rx->dqo.used_bufs);
222
223 for (j = 0; j < rx->dqo.buf_cnt; j++) {
224 struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
225
226 vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
227 u_int ref_count = atomic_load_int(&page->ref_count);
228
229 /*
230 * An ifconfig down+up might see pages still in flight
231 * from the previous innings.
232 */
233 if (VPRC_WIRE_COUNT(ref_count) == 1)
234 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
235 buf, slist_entry);
236 else
237 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
238 buf, stailq_entry);
239
240 buf->num_nic_frags = 0;
241 buf->next_idx = 0;
242 }
243 } else {
244 SLIST_INIT(&rx->dqo.free_bufs);
245 for (j = 0; j < rx->dqo.buf_cnt; j++)
246 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
247 &rx->dqo.bufs[j], slist_entry);
248 }
249 }
250
251 int
gve_rx_intr_dqo(void * arg)252 gve_rx_intr_dqo(void *arg)
253 {
254 struct gve_rx_ring *rx = arg;
255 struct gve_priv *priv = rx->com.priv;
256 struct gve_ring_com *com = &rx->com;
257
258 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
259 return (FILTER_STRAY);
260
261 /* Interrupts are automatically masked */
262 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
263 return (FILTER_HANDLED);
264 }
265
266 static void
gve_rx_advance_head_dqo(struct gve_rx_ring * rx)267 gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
268 {
269 rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
270 rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
271
272 if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
273 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
274 BUS_DMASYNC_PREWRITE);
275 gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
276 rx->dqo.head);
277 }
278 }
279
280 static void
gve_rx_post_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)281 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
282 {
283 struct gve_rx_desc_dqo *desc;
284
285 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
286 BUS_DMASYNC_PREREAD);
287
288 desc = &rx->dqo.desc_ring[rx->dqo.head];
289 desc->buf_id = htole16(buf - rx->dqo.bufs);
290 desc->buf_addr = htole64(buf->addr);
291
292 gve_rx_advance_head_dqo(rx);
293 }
294
295 static int
gve_rx_post_new_mbuf_dqo(struct gve_rx_ring * rx,int how)296 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
297 {
298 struct gve_rx_buf_dqo *buf;
299 bus_dma_segment_t segs[1];
300 int nsegs;
301 int err;
302
303 buf = SLIST_FIRST(&rx->dqo.free_bufs);
304 if (__predict_false(!buf)) {
305 device_printf(rx->com.priv->dev,
306 "Unexpected empty free bufs list\n");
307 return (ENOBUFS);
308 }
309 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
310
311 buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
312 if (__predict_false(!buf->mbuf)) {
313 err = ENOMEM;
314 counter_enter();
315 counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
316 counter_exit();
317 goto abort_with_buf;
318 }
319 buf->mbuf->m_len = MCLBYTES;
320
321 err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
322 buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
323 KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
324 if (__predict_false(err != 0)) {
325 counter_enter();
326 counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
327 counter_exit();
328 goto abort_with_mbuf;
329 }
330 buf->addr = segs[0].ds_addr;
331
332 gve_rx_post_buf_dqo(rx, buf);
333 return (0);
334
335 abort_with_mbuf:
336 m_freem(buf->mbuf);
337 buf->mbuf = NULL;
338 abort_with_buf:
339 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
340 return (err);
341 }
342
343 static struct gve_dma_handle *
gve_get_page_dma_handle(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf)344 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
345 {
346 return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
347 }
348
349 static void
gve_rx_post_qpl_buf_dqo(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t frag_num)350 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
351 uint8_t frag_num)
352 {
353 struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
354 union gve_rx_qpl_buf_id_dqo composed_id;
355 struct gve_dma_handle *page_dma_handle;
356
357 composed_id.buf_id = buf - rx->dqo.bufs;
358 composed_id.frag_num = frag_num;
359 desc->buf_id = htole16(composed_id.all);
360
361 page_dma_handle = gve_get_page_dma_handle(rx, buf);
362 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
363 BUS_DMASYNC_PREREAD);
364 desc->buf_addr = htole64(page_dma_handle->bus_addr +
365 frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
366
367 buf->num_nic_frags++;
368 gve_rx_advance_head_dqo(rx);
369 }
370
371 static void
gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring * rx,bool just_one)372 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
373 {
374 struct gve_rx_buf_dqo *hol_blocker = NULL;
375 struct gve_rx_buf_dqo *buf;
376 u_int ref_count;
377 vm_page_t page;
378
379 while (true) {
380 buf = STAILQ_FIRST(&rx->dqo.used_bufs);
381 if (__predict_false(buf == NULL))
382 break;
383
384 page = rx->com.qpl->pages[buf - rx->dqo.bufs];
385 ref_count = atomic_load_int(&page->ref_count);
386
387 if (VPRC_WIRE_COUNT(ref_count) != 1) {
388 /* Account for one head-of-line blocker */
389 if (hol_blocker != NULL)
390 break;
391 hol_blocker = buf;
392 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
393 stailq_entry);
394 continue;
395 }
396
397 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
398 stailq_entry);
399 SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
400 buf, slist_entry);
401 if (just_one)
402 break;
403 }
404
405 if (hol_blocker != NULL)
406 STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
407 hol_blocker, stailq_entry);
408 }
409
410 static int
gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring * rx)411 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
412 {
413 struct gve_rx_buf_dqo *buf;
414
415 buf = SLIST_FIRST(&rx->dqo.free_bufs);
416 if (__predict_false(buf == NULL)) {
417 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
418 buf = SLIST_FIRST(&rx->dqo.free_bufs);
419 if (__predict_false(buf == NULL))
420 return (ENOBUFS);
421 }
422
423 gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
424 if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
425 buf->next_idx = 0;
426 else
427 buf->next_idx++;
428
429 /*
430 * We have posted all the frags in this buf to the NIC.
431 * - buf will enter used_bufs once the last completion arrives.
432 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
433 * when its wire count drops back to 1.
434 */
435 if (buf->next_idx == 0)
436 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
437 return (0);
438 }
439
440 static void
gve_rx_post_buffers_dqo(struct gve_rx_ring * rx,int how)441 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
442 {
443 uint32_t num_pending_bufs;
444 uint32_t num_to_post;
445 uint32_t i;
446 int err;
447
448 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
449 num_to_post = rx->dqo.mask - num_pending_bufs;
450
451 for (i = 0; i < num_to_post; i++) {
452 if (gve_is_qpl(rx->com.priv))
453 err = gve_rx_post_new_dqo_qpl_buf(rx);
454 else
455 err = gve_rx_post_new_mbuf_dqo(rx, how);
456 if (err)
457 break;
458 }
459 }
460
461 void
gve_rx_prefill_buffers_dqo(struct gve_rx_ring * rx)462 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
463 {
464 gve_rx_post_buffers_dqo(rx, M_WAITOK);
465 }
466
467 static void
gve_rx_set_hashtype_dqo(struct mbuf * mbuf,struct gve_ptype * ptype,bool * is_tcp)468 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
469 {
470 switch (ptype->l3_type) {
471 case GVE_L3_TYPE_IPV4:
472 switch (ptype->l4_type) {
473 case GVE_L4_TYPE_TCP:
474 *is_tcp = true;
475 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
476 break;
477 case GVE_L4_TYPE_UDP:
478 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
479 break;
480 default:
481 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
482 }
483 break;
484 case GVE_L3_TYPE_IPV6:
485 switch (ptype->l4_type) {
486 case GVE_L4_TYPE_TCP:
487 *is_tcp = true;
488 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
489 break;
490 case GVE_L4_TYPE_UDP:
491 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
492 break;
493 default:
494 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
495 }
496 break;
497 default:
498 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
499 }
500 }
501
502 static void
gve_rx_set_csum_flags_dqo(struct mbuf * mbuf,struct gve_rx_compl_desc_dqo * desc,struct gve_ptype * ptype)503 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
504 struct gve_rx_compl_desc_dqo *desc,
505 struct gve_ptype *ptype)
506 {
507 /* HW did not identify and process L3 and L4 headers. */
508 if (__predict_false(!desc->l3_l4_processed))
509 return;
510
511 if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
512 if (__predict_false(desc->csum_ip_err ||
513 desc->csum_external_ip_err))
514 return;
515 } else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
516 /* Checksum should be skipped if this flag is set. */
517 if (__predict_false(desc->ipv6_ex_add))
518 return;
519 }
520
521 if (__predict_false(desc->csum_l4_err))
522 return;
523
524 switch (ptype->l4_type) {
525 case GVE_L4_TYPE_TCP:
526 case GVE_L4_TYPE_UDP:
527 case GVE_L4_TYPE_ICMP:
528 case GVE_L4_TYPE_SCTP:
529 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
530 CSUM_IP_VALID |
531 CSUM_DATA_VALID |
532 CSUM_PSEUDO_HDR;
533 mbuf->m_pkthdr.csum_data = 0xffff;
534 break;
535 default:
536 break;
537 }
538 }
539
540 static void
gve_rx_input_mbuf_dqo(struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc)541 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
542 struct gve_rx_compl_desc_dqo *compl_desc)
543 {
544 struct mbuf *mbuf = rx->ctx.mbuf_head;
545 if_t ifp = rx->com.priv->ifp;
546 struct gve_ptype *ptype;
547 bool do_if_input = true;
548 bool is_tcp = false;
549
550 ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
551 gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
552 mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
553 gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
554
555 mbuf->m_pkthdr.rcvif = ifp;
556 mbuf->m_pkthdr.len = rx->ctx.total_size;
557
558 if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
559 is_tcp &&
560 (rx->lro.lro_cnt != 0) &&
561 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
562 do_if_input = false;
563
564 if (do_if_input)
565 if_input(ifp, mbuf);
566
567 counter_enter();
568 counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
569 counter_u64_add_protected(rx->stats.rpackets, 1);
570 counter_exit();
571
572 rx->ctx = (struct gve_rx_ctx){};
573 }
574
575 static int
gve_rx_copybreak_dqo(struct gve_rx_ring * rx,void * va,struct gve_rx_compl_desc_dqo * compl_desc,uint16_t frag_len)576 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
577 struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
578 {
579 struct mbuf *mbuf;
580
581 mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
582 if (__predict_false(mbuf == NULL))
583 return (ENOMEM);
584
585 counter_enter();
586 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
587 counter_exit();
588
589 m_copyback(mbuf, 0, frag_len, va);
590 mbuf->m_len = frag_len;
591
592 rx->ctx.mbuf_head = mbuf;
593 rx->ctx.mbuf_tail = mbuf;
594 rx->ctx.total_size += frag_len;
595
596 gve_rx_input_mbuf_dqo(rx, compl_desc);
597 return (0);
598 }
599
600 static void
gve_rx_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)601 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
602 struct gve_rx_compl_desc_dqo *compl_desc,
603 int *work_done)
604 {
605 bool is_last_frag = compl_desc->end_of_packet != 0;
606 struct gve_rx_ctx *ctx = &rx->ctx;
607 struct gve_rx_buf_dqo *buf;
608 uint32_t num_pending_bufs;
609 uint16_t frag_len;
610 uint16_t buf_id;
611 int err;
612
613 buf_id = le16toh(compl_desc->buf_id);
614 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
615 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
616 buf_id, rx->com.id);
617 gve_schedule_reset(priv);
618 goto drop_frag_clear_ctx;
619 }
620 buf = &rx->dqo.bufs[buf_id];
621 if (__predict_false(buf->mbuf == NULL)) {
622 device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
623 buf_id, rx->com.id);
624 gve_schedule_reset(priv);
625 goto drop_frag_clear_ctx;
626 }
627
628 if (__predict_false(ctx->drop_pkt))
629 goto drop_frag;
630
631 if (__predict_false(compl_desc->rx_error)) {
632 counter_enter();
633 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
634 counter_exit();
635 goto drop_frag;
636 }
637
638 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
639 BUS_DMASYNC_POSTREAD);
640
641 frag_len = compl_desc->packet_len;
642 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
643 err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
644 compl_desc, frag_len);
645 if (__predict_false(err != 0))
646 goto drop_frag;
647 (*work_done)++;
648 gve_rx_post_buf_dqo(rx, buf);
649 return;
650 }
651
652 /*
653 * Although buffer completions may arrive out of order, buffer
654 * descriptors are consumed by the NIC in order. That is, the
655 * buffer at desc_ring[tail] might not be the buffer we got the
656 * completion compl_ring[tail] for: but we know that desc_ring[tail]
657 * has already been read by the NIC.
658 */
659 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
660
661 /*
662 * For every fragment received, try to post a new buffer.
663 *
664 * Failures are okay but only so long as the number of outstanding
665 * buffers is above a threshold.
666 *
667 * Beyond that we drop new packets to reuse their buffers.
668 * Without ensuring a minimum number of buffers for the NIC to
669 * put packets in, we run the risk of getting the queue stuck
670 * for good.
671 */
672 err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
673 if (__predict_false(err != 0 &&
674 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
675 counter_enter();
676 counter_u64_add_protected(
677 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
678 counter_exit();
679 goto drop_frag;
680 }
681
682 buf->mbuf->m_len = frag_len;
683 ctx->total_size += frag_len;
684 if (ctx->mbuf_tail == NULL) {
685 ctx->mbuf_head = buf->mbuf;
686 ctx->mbuf_tail = buf->mbuf;
687 } else {
688 buf->mbuf->m_flags &= ~M_PKTHDR;
689 ctx->mbuf_tail->m_next = buf->mbuf;
690 ctx->mbuf_tail = buf->mbuf;
691 }
692
693 /*
694 * Disassociate the mbuf from buf and surrender buf to the free list to
695 * be used by a future mbuf.
696 */
697 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
698 buf->mbuf = NULL;
699 buf->addr = 0;
700 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
701
702 if (is_last_frag) {
703 gve_rx_input_mbuf_dqo(rx, compl_desc);
704 (*work_done)++;
705 }
706 return;
707
708 drop_frag:
709 /* Clear the earlier frags if there were any */
710 m_freem(ctx->mbuf_head);
711 rx->ctx = (struct gve_rx_ctx){};
712 /* Drop the rest of the pkt if there are more frags */
713 ctx->drop_pkt = true;
714 /* Reuse the dropped frag's buffer */
715 gve_rx_post_buf_dqo(rx, buf);
716
717 if (is_last_frag)
718 goto drop_frag_clear_ctx;
719 return;
720
721 drop_frag_clear_ctx:
722 counter_enter();
723 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
724 counter_exit();
725 m_freem(ctx->mbuf_head);
726 rx->ctx = (struct gve_rx_ctx){};
727 }
728
729 static void *
gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring * rx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num)730 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
731 struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
732 {
733 int page_idx = buf - rx->dqo.bufs;
734 void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
735
736 va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
737 return (va);
738 }
739
740 static int
gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)741 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
742 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
743 uint8_t buf_frag_num, uint16_t frag_len)
744 {
745 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
746 struct mbuf *mbuf;
747
748 if (ctx->mbuf_tail == NULL) {
749 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
750 if (mbuf == NULL)
751 return (ENOMEM);
752 ctx->mbuf_head = mbuf;
753 ctx->mbuf_tail = mbuf;
754 } else {
755 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
756 if (mbuf == NULL)
757 return (ENOMEM);
758 ctx->mbuf_tail->m_next = mbuf;
759 ctx->mbuf_tail = mbuf;
760 }
761
762 mbuf->m_len = frag_len;
763 ctx->total_size += frag_len;
764
765 m_copyback(mbuf, 0, frag_len, va);
766 counter_enter();
767 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
768 counter_exit();
769 return (0);
770 }
771
772 static int
gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring * rx,struct gve_rx_ctx * ctx,struct gve_rx_buf_dqo * buf,uint8_t buf_frag_num,uint16_t frag_len)773 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
774 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
775 uint8_t buf_frag_num, uint16_t frag_len)
776 {
777 struct mbuf *mbuf;
778 void *page_addr;
779 vm_page_t page;
780 int page_idx;
781 void *va;
782
783 if (ctx->mbuf_tail == NULL) {
784 mbuf = m_gethdr(M_NOWAIT, MT_DATA);
785 if (mbuf == NULL)
786 return (ENOMEM);
787 ctx->mbuf_head = mbuf;
788 ctx->mbuf_tail = mbuf;
789 } else {
790 mbuf = m_get(M_NOWAIT, MT_DATA);
791 if (mbuf == NULL)
792 return (ENOMEM);
793 ctx->mbuf_tail->m_next = mbuf;
794 ctx->mbuf_tail = mbuf;
795 }
796
797 mbuf->m_len = frag_len;
798 ctx->total_size += frag_len;
799
800 page_idx = buf - rx->dqo.bufs;
801 page = rx->com.qpl->pages[page_idx];
802 page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
803 va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
804
805 /*
806 * Grab an extra ref to the page so that gve_mextadd_free
807 * does not end up freeing the page while the interface exists.
808 */
809 vm_page_wire(page);
810
811 counter_enter();
812 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
813 counter_exit();
814
815 MEXTADD(mbuf, va, frag_len,
816 gve_mextadd_free, page, page_addr,
817 0, EXT_NET_DRV);
818 return (0);
819 }
820
821 static void
gve_rx_dqo_qpl(struct gve_priv * priv,struct gve_rx_ring * rx,struct gve_rx_compl_desc_dqo * compl_desc,int * work_done)822 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
823 struct gve_rx_compl_desc_dqo *compl_desc,
824 int *work_done)
825 {
826 bool is_last_frag = compl_desc->end_of_packet != 0;
827 union gve_rx_qpl_buf_id_dqo composed_id;
828 struct gve_dma_handle *page_dma_handle;
829 struct gve_rx_ctx *ctx = &rx->ctx;
830 struct gve_rx_buf_dqo *buf;
831 uint32_t num_pending_bufs;
832 uint8_t buf_frag_num;
833 uint16_t frag_len;
834 uint16_t buf_id;
835 int err;
836
837 composed_id.all = le16toh(compl_desc->buf_id);
838 buf_id = composed_id.buf_id;
839 buf_frag_num = composed_id.frag_num;
840
841 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
842 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
843 buf_id, rx->com.id);
844 gve_schedule_reset(priv);
845 goto drop_frag_clear_ctx;
846 }
847 buf = &rx->dqo.bufs[buf_id];
848 if (__predict_false(buf->num_nic_frags == 0 ||
849 buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
850 device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
851 "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
852 buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
853 gve_schedule_reset(priv);
854 goto drop_frag_clear_ctx;
855 }
856
857 buf->num_nic_frags--;
858
859 if (__predict_false(ctx->drop_pkt))
860 goto drop_frag;
861
862 if (__predict_false(compl_desc->rx_error)) {
863 counter_enter();
864 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
865 counter_exit();
866 goto drop_frag;
867 }
868
869 page_dma_handle = gve_get_page_dma_handle(rx, buf);
870 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
871 BUS_DMASYNC_POSTREAD);
872
873 frag_len = compl_desc->packet_len;
874 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
875 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
876
877 err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
878 if (__predict_false(err != 0))
879 goto drop_frag;
880 (*work_done)++;
881 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
882 return;
883 }
884
885 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
886 err = gve_rx_post_new_dqo_qpl_buf(rx);
887 if (__predict_false(err != 0 &&
888 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
889 /*
890 * Resort to copying this fragment into a cluster mbuf
891 * when the above threshold is breached and repost the
892 * incoming buffer. If we cannot find cluster mbufs,
893 * just drop the packet (to repost its buffer).
894 */
895 err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
896 buf_frag_num, frag_len);
897 if (err != 0) {
898 counter_enter();
899 counter_u64_add_protected(
900 rx->stats.rx_dropped_pkt_buf_post_fail, 1);
901 counter_exit();
902 goto drop_frag;
903 }
904 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
905 } else {
906 err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
907 buf_frag_num, frag_len);
908 if (__predict_false(err != 0)) {
909 counter_enter();
910 counter_u64_add_protected(
911 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
912 counter_exit();
913 goto drop_frag;
914 }
915 }
916
917 /*
918 * Both the counts need to be checked.
919 *
920 * num_nic_frags == 0 implies no pending completions
921 * but not all frags may have yet been posted.
922 *
923 * next_idx == 0 implies all frags have been posted
924 * but there might be pending completions.
925 */
926 if (buf->num_nic_frags == 0 && buf->next_idx == 0)
927 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
928
929 if (is_last_frag) {
930 gve_rx_input_mbuf_dqo(rx, compl_desc);
931 (*work_done)++;
932 }
933 return;
934
935 drop_frag:
936 /* Clear the earlier frags if there were any */
937 m_freem(ctx->mbuf_head);
938 rx->ctx = (struct gve_rx_ctx){};
939 /* Drop the rest of the pkt if there are more frags */
940 ctx->drop_pkt = true;
941 /* Reuse the dropped frag's buffer */
942 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
943
944 if (is_last_frag)
945 goto drop_frag_clear_ctx;
946 return;
947
948 drop_frag_clear_ctx:
949 counter_enter();
950 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
951 counter_exit();
952 m_freem(ctx->mbuf_head);
953 rx->ctx = (struct gve_rx_ctx){};
954 }
955
956 static bool
gve_rx_cleanup_dqo(struct gve_priv * priv,struct gve_rx_ring * rx,int budget)957 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
958 {
959 struct gve_rx_compl_desc_dqo *compl_desc;
960 uint32_t work_done = 0;
961
962 NET_EPOCH_ASSERT();
963
964 while (work_done < budget) {
965 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
966 BUS_DMASYNC_POSTREAD);
967
968 compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
969 if (compl_desc->generation == rx->dqo.cur_gen_bit)
970 break;
971 /*
972 * Prevent generation bit from being read after the rest of the
973 * descriptor.
974 */
975 rmb();
976
977 rx->cnt++;
978 rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
979 rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
980
981 if (gve_is_qpl(priv))
982 gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
983 else
984 gve_rx_dqo(priv, rx, compl_desc, &work_done);
985 }
986
987 if (work_done != 0)
988 tcp_lro_flush_all(&rx->lro);
989
990 gve_rx_post_buffers_dqo(rx, M_NOWAIT);
991 if (gve_is_qpl(priv))
992 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
993 return (work_done == budget);
994 }
995
996 void
gve_rx_cleanup_tq_dqo(void * arg,int pending)997 gve_rx_cleanup_tq_dqo(void *arg, int pending)
998 {
999 struct gve_rx_ring *rx = arg;
1000 struct gve_priv *priv = rx->com.priv;
1001
1002 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
1003 return;
1004
1005 if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
1006 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
1007 return;
1008 }
1009
1010 gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
1011 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
1012 }
1013