1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2023-2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 #include "gve_dqo.h" 34 35 static void 36 gve_rx_free_ring_gqi(struct gve_priv *priv, int i) 37 { 38 struct gve_rx_ring *rx = &priv->rx[i]; 39 40 if (rx->page_info != NULL) { 41 free(rx->page_info, M_GVE); 42 rx->page_info = NULL; 43 } 44 45 if (rx->data_ring != NULL) { 46 gve_dma_free_coherent(&rx->data_ring_mem); 47 rx->data_ring = NULL; 48 } 49 50 if (rx->desc_ring != NULL) { 51 gve_dma_free_coherent(&rx->desc_ring_mem); 52 rx->desc_ring = NULL; 53 } 54 } 55 56 static void 57 gve_rx_free_ring(struct gve_priv *priv, int i) 58 { 59 struct gve_rx_ring *rx = &priv->rx[i]; 60 struct gve_ring_com *com = &rx->com; 61 62 /* Safe to call even if never allocated */ 63 gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 64 65 if (gve_is_gqi(priv)) 66 gve_rx_free_ring_gqi(priv, i); 67 else 68 gve_rx_free_ring_dqo(priv, i); 69 70 if (com->q_resources != NULL) { 71 gve_dma_free_coherent(&com->q_resources_mem); 72 com->q_resources = NULL; 73 } 74 } 75 76 static void 77 gve_prefill_rx_slots(struct gve_rx_ring *rx) 78 { 79 struct gve_ring_com *com = &rx->com; 80 struct gve_dma_handle *dma; 81 int i; 82 83 for (i = 0; i < com->priv->rx_desc_cnt; i++) { 84 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i); 85 rx->page_info[i].page_offset = 0; 86 rx->page_info[i].page_address = com->qpl->dmas[i].cpu_addr; 87 rx->page_info[i].page = com->qpl->pages[i]; 88 89 dma = &com->qpl->dmas[i]; 90 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREREAD); 91 } 92 93 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 94 BUS_DMASYNC_PREWRITE); 95 } 96 97 static int 98 gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i) 99 { 100 struct gve_rx_ring *rx = &priv->rx[i]; 101 struct gve_ring_com *com = &rx->com; 102 int err; 103 104 err = gve_dma_alloc_coherent(priv, 105 sizeof(struct gve_rx_desc) * priv->rx_desc_cnt, 106 CACHE_LINE_SIZE, &rx->desc_ring_mem); 107 if (err != 0) { 108 device_printf(priv->dev, 109 "Failed to alloc desc ring for rx ring %d", i); 110 goto abort; 111 } 112 113 rx->mask = priv->rx_pages_per_qpl - 1; 114 rx->desc_ring = rx->desc_ring_mem.cpu_addr; 115 116 com->qpl = &priv->qpls[priv->tx_cfg.max_queues + i]; 117 if (com->qpl == NULL) { 118 device_printf(priv->dev, "No QPL left for rx ring %d", i); 119 return (ENOMEM); 120 } 121 122 rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info), 123 M_GVE, M_WAITOK | M_ZERO); 124 125 err = gve_dma_alloc_coherent(priv, 126 sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt, 127 CACHE_LINE_SIZE, &rx->data_ring_mem); 128 if (err != 0) { 129 device_printf(priv->dev, 130 "Failed to alloc data ring for rx ring %d", i); 131 goto abort; 132 } 133 rx->data_ring = rx->data_ring_mem.cpu_addr; 134 135 gve_prefill_rx_slots(rx); 136 return (0); 137 138 abort: 139 gve_rx_free_ring_gqi(priv, i); 140 return (err); 141 } 142 143 static int 144 gve_rx_alloc_ring(struct gve_priv *priv, int i) 145 { 146 struct gve_rx_ring *rx = &priv->rx[i]; 147 struct gve_ring_com *com = &rx->com; 148 int err; 149 150 com->priv = priv; 151 com->id = i; 152 153 gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 154 155 err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources), 156 PAGE_SIZE, &com->q_resources_mem); 157 if (err != 0) { 158 device_printf(priv->dev, 159 "Failed to alloc queue resources for rx ring %d", i); 160 goto abort; 161 } 162 com->q_resources = com->q_resources_mem.cpu_addr; 163 164 if (gve_is_gqi(priv)) 165 err = gve_rx_alloc_ring_gqi(priv, i); 166 else 167 err = gve_rx_alloc_ring_dqo(priv, i); 168 if (err != 0) 169 goto abort; 170 171 return (0); 172 173 abort: 174 gve_rx_free_ring(priv, i); 175 return (err); 176 } 177 178 int 179 gve_alloc_rx_rings(struct gve_priv *priv) 180 { 181 int err = 0; 182 int i; 183 184 priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.num_queues, 185 M_GVE, M_WAITOK | M_ZERO); 186 187 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 188 err = gve_rx_alloc_ring(priv, i); 189 if (err != 0) 190 goto free_rings; 191 } 192 193 return (0); 194 195 free_rings: 196 while (i--) 197 gve_rx_free_ring(priv, i); 198 free(priv->rx, M_GVE); 199 return (err); 200 } 201 202 void 203 gve_free_rx_rings(struct gve_priv *priv) 204 { 205 int i; 206 207 for (i = 0; i < priv->rx_cfg.num_queues; i++) 208 gve_rx_free_ring(priv, i); 209 210 free(priv->rx, M_GVE); 211 } 212 213 static void 214 gve_rx_clear_data_ring(struct gve_rx_ring *rx) 215 { 216 struct gve_priv *priv = rx->com.priv; 217 int i; 218 219 /* 220 * The Rx data ring has this invariant: "the networking stack is not 221 * using the buffer beginning at any page_offset". This invariant is 222 * established initially by gve_prefill_rx_slots at alloc-time and is 223 * maintained by the cleanup taskqueue. This invariant implies that the 224 * ring can be considered to be fully posted with buffers at this point, 225 * even if there are unfreed mbufs still being processed, which is why we 226 * can fill the ring without waiting on can_flip at each slot to become true. 227 */ 228 for (i = 0; i < priv->rx_desc_cnt; i++) { 229 rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i + 230 rx->page_info[i].page_offset); 231 rx->fill_cnt++; 232 } 233 234 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 235 BUS_DMASYNC_PREWRITE); 236 } 237 238 static void 239 gve_rx_clear_desc_ring(struct gve_rx_ring *rx) 240 { 241 struct gve_priv *priv = rx->com.priv; 242 int i; 243 244 for (i = 0; i < priv->rx_desc_cnt; i++) 245 rx->desc_ring[i] = (struct gve_rx_desc){}; 246 247 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 248 BUS_DMASYNC_PREWRITE); 249 } 250 251 static void 252 gve_clear_rx_ring(struct gve_priv *priv, int i) 253 { 254 struct gve_rx_ring *rx = &priv->rx[i]; 255 256 if (!gve_is_gqi(priv)) { 257 gve_clear_rx_ring_dqo(priv, i); 258 return; 259 } 260 261 rx->seq_no = 1; 262 rx->cnt = 0; 263 rx->fill_cnt = 0; 264 rx->mask = priv->rx_desc_cnt - 1; 265 266 gve_rx_clear_desc_ring(rx); 267 gve_rx_clear_data_ring(rx); 268 } 269 270 static void 271 gve_start_rx_ring(struct gve_priv *priv, int i) 272 { 273 struct gve_rx_ring *rx = &priv->rx[i]; 274 struct gve_ring_com *com = &rx->com; 275 276 if ((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) { 277 if (tcp_lro_init(&rx->lro) != 0) 278 device_printf(priv->dev, "Failed to init lro for rx ring %d", i); 279 rx->lro.ifp = priv->ifp; 280 } 281 282 if (gve_is_gqi(priv)) 283 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx); 284 else 285 NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx); 286 com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK, 287 taskqueue_thread_enqueue, &com->cleanup_tq); 288 289 taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, 290 "%s rxq %d", device_get_nameunit(priv->dev), i); 291 292 if (gve_is_gqi(priv)) { 293 /* GQ RX bufs are prefilled at ring alloc time */ 294 gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt); 295 } else 296 gve_rx_prefill_buffers_dqo(rx); 297 } 298 299 int 300 gve_create_rx_rings(struct gve_priv *priv) 301 { 302 struct gve_ring_com *com; 303 struct gve_rx_ring *rx; 304 int err; 305 int i; 306 307 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) 308 return (0); 309 310 for (i = 0; i < priv->rx_cfg.num_queues; i++) 311 gve_clear_rx_ring(priv, i); 312 313 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 314 if (err != 0) 315 return (err); 316 317 bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, 318 BUS_DMASYNC_POSTREAD); 319 320 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 321 rx = &priv->rx[i]; 322 com = &rx->com; 323 324 com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index); 325 326 bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map, 327 BUS_DMASYNC_POSTREAD); 328 com->db_offset = 4 * be32toh(com->q_resources->db_index); 329 com->counter_idx = be32toh(com->q_resources->counter_index); 330 331 gve_start_rx_ring(priv, i); 332 } 333 334 gve_set_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 335 return (0); 336 } 337 338 static void 339 gve_stop_rx_ring(struct gve_priv *priv, int i) 340 { 341 struct gve_rx_ring *rx = &priv->rx[i]; 342 struct gve_ring_com *com = &rx->com; 343 344 if (com->cleanup_tq != NULL) { 345 taskqueue_quiesce(com->cleanup_tq); 346 taskqueue_free(com->cleanup_tq); 347 com->cleanup_tq = NULL; 348 } 349 350 tcp_lro_free(&rx->lro); 351 rx->ctx = (struct gve_rx_ctx){}; 352 } 353 354 int 355 gve_destroy_rx_rings(struct gve_priv *priv) 356 { 357 int err; 358 int i; 359 360 for (i = 0; i < priv->rx_cfg.num_queues; i++) 361 gve_stop_rx_ring(priv, i); 362 363 if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) { 364 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 365 if (err != 0) 366 return (err); 367 gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 368 } 369 370 return (0); 371 } 372 373 int 374 gve_rx_intr(void *arg) 375 { 376 struct gve_rx_ring *rx = arg; 377 struct gve_priv *priv = rx->com.priv; 378 struct gve_ring_com *com = &rx->com; 379 380 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 381 return (FILTER_STRAY); 382 383 gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK); 384 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 385 return (FILTER_HANDLED); 386 } 387 388 static inline void 389 gve_set_rss_type(__be16 flag, struct mbuf *mbuf) 390 { 391 if ((flag & GVE_RXF_IPV4) != 0) { 392 if ((flag & GVE_RXF_TCP) != 0) 393 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 394 else if ((flag & GVE_RXF_UDP) != 0) 395 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 396 else 397 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 398 return; 399 } 400 401 if ((flag & GVE_RXF_IPV6) != 0) { 402 if ((flag & GVE_RXF_TCP) != 0) 403 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 404 else if ((flag & GVE_RXF_UDP) != 0) 405 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 406 else 407 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 408 return; 409 } 410 } 411 412 static void 413 gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 414 { 415 const __be64 offset = htobe64(GVE_DEFAULT_RX_BUFFER_OFFSET); 416 page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; 417 *(slot_addr) ^= offset; 418 } 419 420 static struct mbuf * 421 gve_rx_create_mbuf(struct gve_priv *priv, struct gve_rx_ring *rx, 422 struct gve_rx_slot_page_info *page_info, uint16_t len, 423 union gve_rx_data_slot *data_slot, bool is_only_frag) 424 { 425 struct gve_rx_ctx *ctx = &rx->ctx; 426 struct mbuf *mbuf; 427 u_int ref_count; 428 bool can_flip; 429 430 uint32_t offset = page_info->page_offset + page_info->pad; 431 void *va = (char *)page_info->page_address + offset; 432 433 if (len <= priv->rx_copybreak && is_only_frag) { 434 mbuf = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR); 435 if (__predict_false(mbuf == NULL)) 436 return (NULL); 437 438 m_copyback(mbuf, 0, len, va); 439 counter_enter(); 440 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 441 counter_exit(); 442 ctx->mbuf_head = mbuf; 443 ctx->mbuf_tail = mbuf; 444 } else { 445 struct mbuf *mbuf_tail = ctx->mbuf_tail; 446 KASSERT(len <= MCLBYTES, ("gve rx fragment bigger than cluster mbuf")); 447 448 /* 449 * This page was created with VM_ALLOC_WIRED, thus the lowest 450 * wire count experienced by the page until the interface is 451 * destroyed is 1. 452 * 453 * We wire the page again before supplying an mbuf pointing to 454 * it to the networking stack, so before the mbuf leaves the 455 * driver, the wire count rises to 2. 456 * 457 * If it is 1 again, it necessarily means that the mbuf has been 458 * consumed and it was gve_mextadd_free that brought down the wire 459 * count back to 1. We only need to eventually observe the 1. 460 */ 461 ref_count = atomic_load_int(&page_info->page->ref_count); 462 can_flip = VPRC_WIRE_COUNT(ref_count) == 1; 463 464 if (mbuf_tail == NULL) { 465 if (can_flip) 466 mbuf = m_gethdr(M_NOWAIT, MT_DATA); 467 else 468 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 469 470 ctx->mbuf_head = mbuf; 471 ctx->mbuf_tail = mbuf; 472 } else { 473 if (can_flip) 474 mbuf = m_get(M_NOWAIT, MT_DATA); 475 else 476 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); 477 478 mbuf_tail->m_next = mbuf; 479 ctx->mbuf_tail = mbuf; 480 } 481 482 if (__predict_false(mbuf == NULL)) 483 return (NULL); 484 485 if (can_flip) { 486 MEXTADD(mbuf, va, len, gve_mextadd_free, 487 page_info->page, page_info->page_address, 488 0, EXT_NET_DRV); 489 490 counter_enter(); 491 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 492 counter_exit(); 493 494 /* 495 * Grab an extra ref to the page so that gve_mextadd_free 496 * does not end up freeing the page while the interface exists. 497 */ 498 vm_page_wire(page_info->page); 499 500 gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 501 } else { 502 m_copyback(mbuf, 0, len, va); 503 counter_enter(); 504 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 505 counter_exit(); 506 } 507 } 508 509 mbuf->m_len = len; 510 ctx->total_size += len; 511 512 return (mbuf); 513 } 514 515 static inline bool 516 gve_needs_rss(__be16 flag) 517 { 518 if ((flag & GVE_RXF_FRAG) != 0) 519 return (false); 520 if ((flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) != 0) 521 return (true); 522 return (false); 523 } 524 525 static void 526 gve_rx(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_desc *desc, 527 uint32_t idx) 528 { 529 struct gve_rx_slot_page_info *page_info; 530 struct gve_dma_handle *page_dma_handle; 531 union gve_rx_data_slot *data_slot; 532 struct gve_rx_ctx *ctx = &rx->ctx; 533 struct mbuf *mbuf = NULL; 534 if_t ifp = priv->ifp; 535 bool do_if_input; 536 uint16_t len; 537 538 bool is_first_frag = ctx->frag_cnt == 0; 539 bool is_last_frag = !(GVE_RXF_PKT_CONT & desc->flags_seq); 540 bool is_only_frag = is_first_frag && is_last_frag; 541 542 if (__predict_false(ctx->drop_pkt)) 543 goto finish_frag; 544 545 if ((desc->flags_seq & GVE_RXF_ERR) != 0) { 546 ctx->drop_pkt = true; 547 counter_enter(); 548 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 549 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 550 counter_exit(); 551 m_freem(ctx->mbuf_head); 552 goto finish_frag; 553 } 554 555 page_info = &rx->page_info[idx]; 556 data_slot = &rx->data_ring[idx]; 557 page_dma_handle = &(rx->com.qpl->dmas[idx]); 558 559 page_info->pad = is_first_frag ? GVE_RX_PAD : 0; 560 len = be16toh(desc->len) - page_info->pad; 561 562 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 563 BUS_DMASYNC_POSTREAD); 564 565 mbuf = gve_rx_create_mbuf(priv, rx, page_info, len, data_slot, 566 is_only_frag); 567 if (mbuf == NULL) { 568 ctx->drop_pkt = true; 569 counter_enter(); 570 counter_u64_add_protected(rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 571 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 572 counter_exit(); 573 m_freem(ctx->mbuf_head); 574 goto finish_frag; 575 } 576 577 if (is_first_frag) { 578 mbuf->m_pkthdr.rcvif = priv->ifp; 579 ctx->is_tcp = desc->flags_seq & GVE_RXF_TCP; 580 581 if (gve_needs_rss(desc->flags_seq)) { 582 gve_set_rss_type(desc->flags_seq, mbuf); 583 mbuf->m_pkthdr.flowid = be32toh(desc->rss_hash); 584 } 585 586 if ((desc->csum != 0) && ((desc->flags_seq & GVE_RXF_FRAG) == 0)) { 587 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 588 CSUM_IP_VALID | 589 CSUM_DATA_VALID | 590 CSUM_PSEUDO_HDR; 591 mbuf->m_pkthdr.csum_data = 0xffff; 592 } 593 } 594 595 if (is_last_frag) { 596 mbuf = ctx->mbuf_head; 597 mbuf->m_pkthdr.len = ctx->total_size; 598 do_if_input = true; 599 600 if (((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) && /* LRO is enabled */ 601 (ctx->is_tcp) && /* pkt is a TCP pkt */ 602 ((mbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID) != 0) && /* NIC verified csum */ 603 (rx->lro.lro_cnt != 0) && /* LRO resources exist */ 604 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 605 do_if_input = false; 606 607 if (do_if_input) 608 if_input(ifp, mbuf); 609 610 counter_enter(); 611 counter_u64_add_protected(rx->stats.rbytes, ctx->total_size); 612 counter_u64_add_protected(rx->stats.rpackets, 1); 613 counter_exit(); 614 } 615 616 finish_frag: 617 ctx->frag_cnt++; 618 if (is_last_frag) 619 rx->ctx = (struct gve_rx_ctx){}; 620 } 621 622 static bool 623 gve_rx_work_pending(struct gve_rx_ring *rx) 624 { 625 struct gve_rx_desc *desc; 626 __be16 flags_seq; 627 uint32_t next_idx; 628 629 next_idx = rx->cnt & rx->mask; 630 desc = rx->desc_ring + next_idx; 631 632 flags_seq = desc->flags_seq; 633 634 return (GVE_SEQNO(flags_seq) == rx->seq_no); 635 } 636 637 static inline uint8_t 638 gve_next_seqno(uint8_t seq) 639 { 640 return ((seq + 1) == 8 ? 1 : seq + 1); 641 } 642 643 static void 644 gve_rx_cleanup(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 645 { 646 uint32_t idx = rx->cnt & rx->mask; 647 struct gve_rx_desc *desc; 648 struct gve_rx_ctx *ctx = &rx->ctx; 649 uint32_t work_done = 0; 650 651 NET_EPOCH_ASSERT(); 652 653 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 654 BUS_DMASYNC_POSTREAD); 655 desc = &rx->desc_ring[idx]; 656 657 while ((work_done < budget || ctx->frag_cnt) && 658 (GVE_SEQNO(desc->flags_seq) == rx->seq_no)) { 659 660 gve_rx(priv, rx, desc, idx); 661 662 rx->cnt++; 663 idx = rx->cnt & rx->mask; 664 desc = &rx->desc_ring[idx]; 665 rx->seq_no = gve_next_seqno(rx->seq_no); 666 work_done++; 667 } 668 669 /* The device will only send whole packets. */ 670 if (__predict_false(ctx->frag_cnt)) { 671 m_freem(ctx->mbuf_head); 672 rx->ctx = (struct gve_rx_ctx){}; 673 device_printf(priv->dev, 674 "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", 675 GVE_SEQNO(desc->flags_seq), rx->seq_no); 676 gve_schedule_reset(priv); 677 } 678 679 if (work_done != 0) 680 tcp_lro_flush_all(&rx->lro); 681 682 bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 683 BUS_DMASYNC_PREWRITE); 684 685 /* Buffers are refilled as the descs are processed */ 686 rx->fill_cnt += work_done; 687 gve_db_bar_write_4(priv, rx->com.db_offset, rx->fill_cnt); 688 } 689 690 void 691 gve_rx_cleanup_tq(void *arg, int pending) 692 { 693 struct gve_rx_ring *rx = arg; 694 struct gve_priv *priv = rx->com.priv; 695 696 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 697 return; 698 699 gve_rx_cleanup(priv, rx, /*budget=*/128); 700 701 gve_db_bar_write_4(priv, rx->com.irq_db_offset, 702 GVE_IRQ_ACK | GVE_IRQ_EVENT); 703 704 /* 705 * Fragments received before this barrier MAY NOT cause the NIC to send an 706 * interrupt but they will still be handled by the enqueue below. 707 * Fragments received after the barrier WILL trigger an interrupt. 708 */ 709 mb(); 710 711 if (gve_rx_work_pending(rx)) { 712 gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK); 713 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 714 } 715 } 716