1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2023-2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 #include "gve_dqo.h" 34 35 static void 36 gve_rx_free_ring_gqi(struct gve_priv *priv, int i) 37 { 38 struct gve_rx_ring *rx = &priv->rx[i]; 39 struct gve_ring_com *com = &rx->com; 40 41 if (rx->page_info != NULL) { 42 free(rx->page_info, M_GVE); 43 rx->page_info = NULL; 44 } 45 46 if (rx->data_ring != NULL) { 47 gve_dma_free_coherent(&rx->data_ring_mem); 48 rx->data_ring = NULL; 49 } 50 51 if (rx->desc_ring != NULL) { 52 gve_dma_free_coherent(&rx->desc_ring_mem); 53 rx->desc_ring = NULL; 54 } 55 56 if (com->qpl != NULL) { 57 gve_free_qpl(priv, com->qpl); 58 com->qpl = NULL; 59 } 60 } 61 62 static void 63 gve_rx_free_ring(struct gve_priv *priv, int i) 64 { 65 struct gve_rx_ring *rx = &priv->rx[i]; 66 struct gve_ring_com *com = &rx->com; 67 68 /* Safe to call even if never allocated */ 69 gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 70 71 if (gve_is_gqi(priv)) 72 gve_rx_free_ring_gqi(priv, i); 73 else 74 gve_rx_free_ring_dqo(priv, i); 75 76 if (com->q_resources != NULL) { 77 gve_dma_free_coherent(&com->q_resources_mem); 78 com->q_resources = NULL; 79 } 80 } 81 82 static void 83 gve_prefill_rx_slots(struct gve_rx_ring *rx) 84 { 85 struct gve_ring_com *com = &rx->com; 86 struct gve_dma_handle *dma; 87 int i; 88 89 for (i = 0; i < com->priv->rx_desc_cnt; i++) { 90 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i); 91 rx->page_info[i].page_offset = 0; 92 rx->page_info[i].page_address = com->qpl->dmas[i].cpu_addr; 93 rx->page_info[i].page = com->qpl->pages[i]; 94 95 dma = &com->qpl->dmas[i]; 96 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREREAD); 97 } 98 99 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 100 BUS_DMASYNC_PREWRITE); 101 } 102 103 static int 104 gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i) 105 { 106 struct gve_rx_ring *rx = &priv->rx[i]; 107 struct gve_ring_com *com = &rx->com; 108 int err; 109 110 err = gve_dma_alloc_coherent(priv, 111 sizeof(struct gve_rx_desc) * priv->rx_desc_cnt, 112 CACHE_LINE_SIZE, &rx->desc_ring_mem); 113 if (err != 0) { 114 device_printf(priv->dev, 115 "Failed to alloc desc ring for rx ring %d", i); 116 goto abort; 117 } 118 119 rx->mask = priv->rx_pages_per_qpl - 1; 120 rx->desc_ring = rx->desc_ring_mem.cpu_addr; 121 122 com->qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues, 123 priv->rx_desc_cnt, /*single_kva=*/false); 124 if (com->qpl == NULL) { 125 device_printf(priv->dev, 126 "Failed to alloc QPL for rx ring %d", i); 127 err = ENOMEM; 128 goto abort; 129 } 130 131 rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info), 132 M_GVE, M_WAITOK | M_ZERO); 133 134 err = gve_dma_alloc_coherent(priv, 135 sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt, 136 CACHE_LINE_SIZE, &rx->data_ring_mem); 137 if (err != 0) { 138 device_printf(priv->dev, 139 "Failed to alloc data ring for rx ring %d", i); 140 goto abort; 141 } 142 rx->data_ring = rx->data_ring_mem.cpu_addr; 143 144 gve_prefill_rx_slots(rx); 145 return (0); 146 147 abort: 148 gve_rx_free_ring_gqi(priv, i); 149 return (err); 150 } 151 152 static int 153 gve_rx_alloc_ring(struct gve_priv *priv, int i) 154 { 155 struct gve_rx_ring *rx = &priv->rx[i]; 156 struct gve_ring_com *com = &rx->com; 157 int err; 158 159 com->priv = priv; 160 com->id = i; 161 162 gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 163 164 err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources), 165 PAGE_SIZE, &com->q_resources_mem); 166 if (err != 0) { 167 device_printf(priv->dev, 168 "Failed to alloc queue resources for rx ring %d", i); 169 goto abort; 170 } 171 com->q_resources = com->q_resources_mem.cpu_addr; 172 173 if (gve_is_gqi(priv)) 174 err = gve_rx_alloc_ring_gqi(priv, i); 175 else 176 err = gve_rx_alloc_ring_dqo(priv, i); 177 if (err != 0) 178 goto abort; 179 180 return (0); 181 182 abort: 183 gve_rx_free_ring(priv, i); 184 return (err); 185 } 186 187 int 188 gve_alloc_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx) 189 { 190 int i; 191 int err; 192 193 KASSERT(priv->rx != NULL, ("priv->rx is NULL!")); 194 195 for (i = start_idx; i < stop_idx; i++) { 196 err = gve_rx_alloc_ring(priv, i); 197 if (err != 0) 198 goto free_rings; 199 } 200 201 return (0); 202 free_rings: 203 gve_free_rx_rings(priv, start_idx, i); 204 return (err); 205 } 206 207 void 208 gve_free_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx) 209 { 210 int i; 211 212 for (i = start_idx; i < stop_idx; i++) 213 gve_rx_free_ring(priv, i); 214 } 215 216 static void 217 gve_rx_clear_data_ring(struct gve_rx_ring *rx) 218 { 219 struct gve_priv *priv = rx->com.priv; 220 int i; 221 222 /* 223 * The Rx data ring has this invariant: "the networking stack is not 224 * using the buffer beginning at any page_offset". This invariant is 225 * established initially by gve_prefill_rx_slots at alloc-time and is 226 * maintained by the cleanup taskqueue. This invariant implies that the 227 * ring can be considered to be fully posted with buffers at this point, 228 * even if there are unfreed mbufs still being processed, which is why we 229 * can fill the ring without waiting on can_flip at each slot to become true. 230 */ 231 for (i = 0; i < priv->rx_desc_cnt; i++) { 232 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i + 233 rx->page_info[i].page_offset); 234 rx->fill_cnt++; 235 } 236 237 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 238 BUS_DMASYNC_PREWRITE); 239 } 240 241 static void 242 gve_rx_clear_desc_ring(struct gve_rx_ring *rx) 243 { 244 struct gve_priv *priv = rx->com.priv; 245 int i; 246 247 for (i = 0; i < priv->rx_desc_cnt; i++) 248 rx->desc_ring[i] = (struct gve_rx_desc){}; 249 250 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 251 BUS_DMASYNC_PREWRITE); 252 } 253 254 static void 255 gve_clear_rx_ring(struct gve_priv *priv, int i) 256 { 257 struct gve_rx_ring *rx = &priv->rx[i]; 258 259 if (!gve_is_gqi(priv)) { 260 gve_clear_rx_ring_dqo(priv, i); 261 return; 262 } 263 264 rx->seq_no = 1; 265 rx->cnt = 0; 266 rx->fill_cnt = 0; 267 rx->mask = priv->rx_desc_cnt - 1; 268 269 gve_rx_clear_desc_ring(rx); 270 gve_rx_clear_data_ring(rx); 271 } 272 273 static void 274 gve_start_rx_ring(struct gve_priv *priv, int i) 275 { 276 struct gve_rx_ring *rx = &priv->rx[i]; 277 struct gve_ring_com *com = &rx->com; 278 279 if ((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) { 280 if (tcp_lro_init(&rx->lro) != 0) 281 device_printf(priv->dev, "Failed to init lro for rx ring %d", i); 282 rx->lro.ifp = priv->ifp; 283 } 284 285 if (gve_is_gqi(priv)) 286 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx); 287 else 288 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx); 289 com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK, 290 taskqueue_thread_enqueue, &com->cleanup_tq); 291 292 taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, 293 "%s rxq %d", device_get_nameunit(priv->dev), i); 294 295 if (gve_is_gqi(priv)) { 296 /* GQ RX bufs are prefilled at ring alloc time */ 297 gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt); 298 } else 299 gve_rx_prefill_buffers_dqo(rx); 300 } 301 302 int 303 gve_create_rx_rings(struct gve_priv *priv) 304 { 305 struct gve_ring_com *com; 306 struct gve_rx_ring *rx; 307 int err; 308 int i; 309 310 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) 311 return (0); 312 313 for (i = 0; i < priv->rx_cfg.num_queues; i++) 314 gve_clear_rx_ring(priv, i); 315 316 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 317 if (err != 0) 318 return (err); 319 320 bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, 321 BUS_DMASYNC_POSTREAD); 322 323 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 324 rx = &priv->rx[i]; 325 com = &rx->com; 326 327 com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index); 328 329 bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map, 330 BUS_DMASYNC_POSTREAD); 331 com->db_offset = 4 * be32toh(com->q_resources->db_index); 332 com->counter_idx = be32toh(com->q_resources->counter_index); 333 334 gve_start_rx_ring(priv, i); 335 } 336 337 gve_set_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 338 return (0); 339 } 340 341 static void 342 gve_stop_rx_ring(struct gve_priv *priv, int i) 343 { 344 struct gve_rx_ring *rx = &priv->rx[i]; 345 struct gve_ring_com *com = &rx->com; 346 347 if (com->cleanup_tq != NULL) { 348 taskqueue_quiesce(com->cleanup_tq); 349 taskqueue_free(com->cleanup_tq); 350 com->cleanup_tq = NULL; 351 } 352 353 tcp_lro_free(&rx->lro); 354 rx->ctx = (struct gve_rx_ctx){}; 355 } 356 357 int 358 gve_destroy_rx_rings(struct gve_priv *priv) 359 { 360 int err; 361 int i; 362 363 for (i = 0; i < priv->rx_cfg.num_queues; i++) 364 gve_stop_rx_ring(priv, i); 365 366 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) { 367 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 368 if (err != 0) 369 return (err); 370 gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 371 } 372 373 return (0); 374 } 375 376 int 377 gve_rx_intr(void *arg) 378 { 379 struct gve_rx_ring *rx = arg; 380 struct gve_priv *priv = rx->com.priv; 381 struct gve_ring_com *com = &rx->com; 382 383 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 384 return (FILTER_STRAY); 385 386 gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK); 387 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 388 return (FILTER_HANDLED); 389 } 390 391 static inline void 392 gve_set_rss_type(__be16 flag, struct mbuf *mbuf) 393 { 394 if ((flag & GVE_RXF_IPV4) != 0) { 395 if ((flag & GVE_RXF_TCP) != 0) 396 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 397 else if ((flag & GVE_RXF_UDP) != 0) 398 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 399 else 400 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 401 return; 402 } 403 404 if ((flag & GVE_RXF_IPV6) != 0) { 405 if ((flag & GVE_RXF_TCP) != 0) 406 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 407 else if ((flag & GVE_RXF_UDP) != 0) 408 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 409 else 410 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 411 return; 412 } 413 } 414 415 static void 416 gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 417 { 418 const __be64 offset = htobe64(GVE_DEFAULT_RX_BUFFER_OFFSET); 419 page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; 420 *(slot_addr) ^= offset; 421 } 422 423 static struct mbuf * 424 gve_rx_create_mbuf(struct gve_priv *priv, struct gve_rx_ring *rx, 425 struct gve_rx_slot_page_info *page_info, uint16_t len, 426 union gve_rx_data_slot *data_slot, bool is_only_frag) 427 { 428 struct gve_rx_ctx *ctx = &rx->ctx; 429 struct mbuf *mbuf; 430 u_int ref_count; 431 bool can_flip; 432 433 uint32_t offset = page_info->page_offset + page_info->pad; 434 void *va = (char *)page_info->page_address + offset; 435 436 if (len <= priv->rx_copybreak && is_only_frag) { 437 mbuf = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR); 438 if (__predict_false(mbuf == NULL)) 439 return (NULL); 440 441 m_copyback(mbuf, 0, len, va); 442 counter_enter(); 443 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 444 counter_exit(); 445 ctx->mbuf_head = mbuf; 446 ctx->mbuf_tail = mbuf; 447 } else { 448 struct mbuf *mbuf_tail = ctx->mbuf_tail; 449 KASSERT(len <= MCLBYTES, ("gve rx fragment bigger than cluster mbuf")); 450 451 /* 452 * This page was created with VM_ALLOC_WIRED, thus the lowest 453 * wire count experienced by the page until the interface is 454 * destroyed is 1. 455 * 456 * We wire the page again before supplying an mbuf pointing to 457 * it to the networking stack, so before the mbuf leaves the 458 * driver, the wire count rises to 2. 459 * 460 * If it is 1 again, it necessarily means that the mbuf has been 461 * consumed and it was gve_mextadd_free that brought down the wire 462 * count back to 1. We only need to eventually observe the 1. 463 */ 464 ref_count = atomic_load_int(&page_info->page->ref_count); 465 can_flip = VPRC_WIRE_COUNT(ref_count) == 1; 466 467 if (mbuf_tail == NULL) { 468 if (can_flip) 469 mbuf = m_gethdr(M_NOWAIT, MT_DATA); 470 else 471 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 472 473 ctx->mbuf_head = mbuf; 474 ctx->mbuf_tail = mbuf; 475 } else { 476 if (can_flip) 477 mbuf = m_get(M_NOWAIT, MT_DATA); 478 else 479 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); 480 481 mbuf_tail->m_next = mbuf; 482 ctx->mbuf_tail = mbuf; 483 } 484 485 if (__predict_false(mbuf == NULL)) 486 return (NULL); 487 488 if (can_flip) { 489 MEXTADD(mbuf, va, len, gve_mextadd_free, 490 page_info->page, page_info->page_address, 491 0, EXT_NET_DRV); 492 493 counter_enter(); 494 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 495 counter_exit(); 496 497 /* 498 * Grab an extra ref to the page so that gve_mextadd_free 499 * does not end up freeing the page while the interface exists. 500 */ 501 vm_page_wire(page_info->page); 502 503 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 504 } else { 505 m_copyback(mbuf, 0, len, va); 506 counter_enter(); 507 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 508 counter_exit(); 509 } 510 } 511 512 mbuf->m_len = len; 513 ctx->total_size += len; 514 515 return (mbuf); 516 } 517 518 static inline bool 519 gve_needs_rss(__be16 flag) 520 { 521 if ((flag & GVE_RXF_FRAG) != 0) 522 return (false); 523 if ((flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) != 0) 524 return (true); 525 return (false); 526 } 527 528 static void 529 gve_rx(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_desc *desc, 530 uint32_t idx) 531 { 532 struct gve_rx_slot_page_info *page_info; 533 struct gve_dma_handle *page_dma_handle; 534 union gve_rx_data_slot *data_slot; 535 struct gve_rx_ctx *ctx = &rx->ctx; 536 struct mbuf *mbuf = NULL; 537 if_t ifp = priv->ifp; 538 bool do_if_input; 539 uint16_t len; 540 541 bool is_first_frag = ctx->frag_cnt == 0; 542 bool is_last_frag = !(GVE_RXF_PKT_CONT & desc->flags_seq); 543 bool is_only_frag = is_first_frag && is_last_frag; 544 545 if (__predict_false(ctx->drop_pkt)) 546 goto finish_frag; 547 548 if ((desc->flags_seq & GVE_RXF_ERR) != 0) { 549 ctx->drop_pkt = true; 550 counter_enter(); 551 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 552 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 553 counter_exit(); 554 m_freem(ctx->mbuf_head); 555 goto finish_frag; 556 } 557 558 page_info = &rx->page_info[idx]; 559 data_slot = &rx->data_ring[idx]; 560 page_dma_handle = &(rx->com.qpl->dmas[idx]); 561 562 page_info->pad = is_first_frag ? GVE_RX_PAD : 0; 563 len = be16toh(desc->len) - page_info->pad; 564 565 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 566 BUS_DMASYNC_POSTREAD); 567 568 mbuf = gve_rx_create_mbuf(priv, rx, page_info, len, data_slot, 569 is_only_frag); 570 if (mbuf == NULL) { 571 ctx->drop_pkt = true; 572 counter_enter(); 573 counter_u64_add_protected(rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 574 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 575 counter_exit(); 576 m_freem(ctx->mbuf_head); 577 goto finish_frag; 578 } 579 580 if (is_first_frag) { 581 mbuf->m_pkthdr.rcvif = priv->ifp; 582 ctx->is_tcp = desc->flags_seq & GVE_RXF_TCP; 583 584 if (gve_needs_rss(desc->flags_seq)) { 585 gve_set_rss_type(desc->flags_seq, mbuf); 586 mbuf->m_pkthdr.flowid = be32toh(desc->rss_hash); 587 } 588 589 if ((desc->csum != 0) && ((desc->flags_seq & GVE_RXF_FRAG) == 0)) { 590 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 591 CSUM_IP_VALID | 592 CSUM_DATA_VALID | 593 CSUM_PSEUDO_HDR; 594 mbuf->m_pkthdr.csum_data = 0xffff; 595 } 596 } 597 598 if (is_last_frag) { 599 mbuf = ctx->mbuf_head; 600 mbuf->m_pkthdr.len = ctx->total_size; 601 do_if_input = true; 602 603 if (((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) && /* LRO is enabled */ 604 (ctx->is_tcp) && /* pkt is a TCP pkt */ 605 ((mbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID) != 0) && /* NIC verified csum */ 606 (rx->lro.lro_cnt != 0) && /* LRO resources exist */ 607 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 608 do_if_input = false; 609 610 if (do_if_input) 611 if_input(ifp, mbuf); 612 613 counter_enter(); 614 counter_u64_add_protected(rx->stats.rbytes, ctx->total_size); 615 counter_u64_add_protected(rx->stats.rpackets, 1); 616 counter_exit(); 617 } 618 619 finish_frag: 620 ctx->frag_cnt++; 621 if (is_last_frag) 622 rx->ctx = (struct gve_rx_ctx){}; 623 } 624 625 static bool 626 gve_rx_work_pending(struct gve_rx_ring *rx) 627 { 628 struct gve_rx_desc *desc; 629 __be16 flags_seq; 630 uint32_t next_idx; 631 632 next_idx = rx->cnt & rx->mask; 633 desc = rx->desc_ring + next_idx; 634 635 flags_seq = desc->flags_seq; 636 637 return (GVE_SEQNO(flags_seq) == rx->seq_no); 638 } 639 640 static inline uint8_t 641 gve_next_seqno(uint8_t seq) 642 { 643 return ((seq + 1) == 8 ? 1 : seq + 1); 644 } 645 646 static void 647 gve_rx_cleanup(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 648 { 649 uint32_t idx = rx->cnt & rx->mask; 650 struct gve_rx_desc *desc; 651 struct gve_rx_ctx *ctx = &rx->ctx; 652 uint32_t work_done = 0; 653 654 NET_EPOCH_ASSERT(); 655 656 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 657 BUS_DMASYNC_POSTREAD); 658 desc = &rx->desc_ring[idx]; 659 660 while ((work_done < budget || ctx->frag_cnt) && 661 (GVE_SEQNO(desc->flags_seq) == rx->seq_no)) { 662 663 gve_rx(priv, rx, desc, idx); 664 665 rx->cnt++; 666 idx = rx->cnt & rx->mask; 667 desc = &rx->desc_ring[idx]; 668 rx->seq_no = gve_next_seqno(rx->seq_no); 669 work_done++; 670 } 671 672 /* The device will only send whole packets. */ 673 if (__predict_false(ctx->frag_cnt)) { 674 m_freem(ctx->mbuf_head); 675 rx->ctx = (struct gve_rx_ctx){}; 676 device_printf(priv->dev, 677 "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", 678 GVE_SEQNO(desc->flags_seq), rx->seq_no); 679 gve_schedule_reset(priv); 680 } 681 682 if (work_done != 0) 683 tcp_lro_flush_all(&rx->lro); 684 685 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 686 BUS_DMASYNC_PREWRITE); 687 688 /* Buffers are refilled as the descs are processed */ 689 rx->fill_cnt += work_done; 690 gve_db_bar_write_4(priv, rx->com.db_offset, rx->fill_cnt); 691 } 692 693 void 694 gve_rx_cleanup_tq(void *arg, int pending) 695 { 696 struct gve_rx_ring *rx = arg; 697 struct gve_priv *priv = rx->com.priv; 698 699 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 700 return; 701 702 gve_rx_cleanup(priv, rx, /*budget=*/128); 703 704 gve_db_bar_write_4(priv, rx->com.irq_db_offset, 705 GVE_IRQ_ACK | GVE_IRQ_EVENT); 706 707 /* 708 * Fragments received before this barrier MAY NOT cause the NIC to send an 709 * interrupt but they will still be handled by the enqueue below. 710 * Fragments received after the barrier WILL trigger an interrupt. 711 */ 712 atomic_thread_fence_seq_cst(); 713 714 if (gve_rx_work_pending(rx)) { 715 gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK); 716 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 717 } 718 } 719