1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 #include "gve_dqo.h" 34 35 static void 36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx) 37 { 38 struct gve_rx_buf_dqo *buf; 39 int i; 40 41 if (gve_is_qpl(rx->com.priv)) 42 return; 43 44 for (i = 0; i < rx->dqo.buf_cnt; i++) { 45 buf = &rx->dqo.bufs[i]; 46 if (!buf->mbuf) 47 continue; 48 49 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 50 BUS_DMASYNC_POSTREAD); 51 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 52 m_freem(buf->mbuf); 53 buf->mbuf = NULL; 54 } 55 } 56 57 void 58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i) 59 { 60 struct gve_rx_ring *rx = &priv->rx[i]; 61 struct gve_ring_com *com = &rx->com; 62 int j; 63 64 if (rx->dqo.compl_ring != NULL) { 65 gve_dma_free_coherent(&rx->dqo.compl_ring_mem); 66 rx->dqo.compl_ring = NULL; 67 } 68 69 if (rx->dqo.desc_ring != NULL) { 70 gve_dma_free_coherent(&rx->desc_ring_mem); 71 rx->dqo.desc_ring = NULL; 72 } 73 74 if (rx->dqo.bufs != NULL) { 75 gve_free_rx_mbufs_dqo(rx); 76 77 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) { 78 for (j = 0; j < rx->dqo.buf_cnt; j++) 79 if (rx->dqo.bufs[j].mapped) 80 bus_dmamap_destroy(rx->dqo.buf_dmatag, 81 rx->dqo.bufs[j].dmamap); 82 } 83 84 free(rx->dqo.bufs, M_GVE); 85 rx->dqo.bufs = NULL; 86 } 87 88 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) 89 bus_dma_tag_destroy(rx->dqo.buf_dmatag); 90 91 if (com->qpl != NULL) { 92 gve_free_qpl(priv, com->qpl); 93 com->qpl = NULL; 94 } 95 } 96 97 int 98 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i) 99 { 100 struct gve_rx_ring *rx = &priv->rx[i]; 101 int err; 102 int j; 103 104 err = gve_dma_alloc_coherent(priv, 105 sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt, 106 CACHE_LINE_SIZE, &rx->desc_ring_mem); 107 if (err != 0) { 108 device_printf(priv->dev, 109 "Failed to alloc desc ring for rx ring %d", i); 110 goto abort; 111 } 112 rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr; 113 rx->dqo.mask = priv->rx_desc_cnt - 1; 114 115 err = gve_dma_alloc_coherent(priv, 116 sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt, 117 CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem); 118 if (err != 0) { 119 device_printf(priv->dev, 120 "Failed to alloc compl ring for rx ring %d", i); 121 goto abort; 122 } 123 rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr; 124 rx->dqo.mask = priv->rx_desc_cnt - 1; 125 126 rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO : 127 priv->rx_desc_cnt; 128 rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo), 129 M_GVE, M_WAITOK | M_ZERO); 130 131 if (gve_is_qpl(priv)) { 132 rx->com.qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues, 133 GVE_RX_NUM_QPL_PAGES_DQO, /*single_kva=*/false); 134 if (rx->com.qpl == NULL) { 135 device_printf(priv->dev, 136 "Failed to alloc QPL for rx ring %d", i); 137 err = ENOMEM; 138 goto abort; 139 } 140 return (0); 141 } 142 143 bus_size_t max_seg_size = gve_rx_dqo_mbuf_segment_size(priv); 144 145 err = bus_dma_tag_create( 146 bus_get_dma_tag(priv->dev), /* parent */ 147 1, 0, /* alignment, bounds */ 148 BUS_SPACE_MAXADDR, /* lowaddr */ 149 BUS_SPACE_MAXADDR, /* highaddr */ 150 NULL, NULL, /* filter, filterarg */ 151 max_seg_size, /* maxsize */ 152 1, /* nsegments */ 153 max_seg_size, /* maxsegsize */ 154 0, /* flags */ 155 NULL, /* lockfunc */ 156 NULL, /* lockarg */ 157 &rx->dqo.buf_dmatag); 158 if (err != 0) { 159 device_printf(priv->dev, 160 "%s: bus_dma_tag_create failed: %d\n", 161 __func__, err); 162 goto abort; 163 } 164 165 for (j = 0; j < rx->dqo.buf_cnt; j++) { 166 err = bus_dmamap_create(rx->dqo.buf_dmatag, 0, 167 &rx->dqo.bufs[j].dmamap); 168 if (err != 0) { 169 device_printf(priv->dev, 170 "err in creating rx buf dmamap %d: %d", 171 j, err); 172 goto abort; 173 } 174 rx->dqo.bufs[j].mapped = true; 175 } 176 177 return (0); 178 179 abort: 180 gve_rx_free_ring_dqo(priv, i); 181 return (err); 182 } 183 184 static void 185 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx) 186 { 187 struct gve_ring_com *com = &rx->com; 188 int entries; 189 int i; 190 191 entries = com->priv->rx_desc_cnt; 192 for (i = 0; i < entries; i++) 193 rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){}; 194 195 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 196 BUS_DMASYNC_PREWRITE); 197 } 198 199 static void 200 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx) 201 { 202 struct gve_ring_com *com = &rx->com; 203 int i; 204 205 for (i = 0; i < com->priv->rx_desc_cnt; i++) 206 rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){}; 207 208 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map, 209 BUS_DMASYNC_PREWRITE); 210 } 211 212 void 213 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i) 214 { 215 struct gve_rx_ring *rx = &priv->rx[i]; 216 int j; 217 218 rx->fill_cnt = 0; 219 rx->cnt = 0; 220 rx->dqo.mask = priv->rx_desc_cnt - 1; 221 rx->dqo.head = 0; 222 rx->dqo.tail = 0; 223 rx->dqo.cur_gen_bit = 0; 224 225 gve_rx_clear_desc_ring_dqo(rx); 226 gve_rx_clear_compl_ring_dqo(rx); 227 228 gve_free_rx_mbufs_dqo(rx); 229 230 if (gve_is_qpl(priv)) { 231 SLIST_INIT(&rx->dqo.free_bufs); 232 STAILQ_INIT(&rx->dqo.used_bufs); 233 234 for (j = 0; j < rx->dqo.buf_cnt; j++) { 235 struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j]; 236 237 vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 238 u_int ref_count = atomic_load_int(&page->ref_count); 239 240 /* 241 * An ifconfig down+up might see pages still in flight 242 * from the previous innings. 243 */ 244 if (VPRC_WIRE_COUNT(ref_count) == 1) 245 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 246 buf, slist_entry); 247 else 248 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, 249 buf, stailq_entry); 250 251 buf->num_nic_frags = 0; 252 buf->next_idx = 0; 253 } 254 } else { 255 SLIST_INIT(&rx->dqo.free_bufs); 256 for (j = 0; j < rx->dqo.buf_cnt; j++) 257 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 258 &rx->dqo.bufs[j], slist_entry); 259 } 260 } 261 262 int 263 gve_rx_intr_dqo(void *arg) 264 { 265 struct gve_rx_ring *rx = arg; 266 struct gve_priv *priv = rx->com.priv; 267 struct gve_ring_com *com = &rx->com; 268 269 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 270 return (FILTER_STRAY); 271 272 /* Interrupts are automatically masked */ 273 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 274 return (FILTER_HANDLED); 275 } 276 277 static void 278 gve_rx_advance_head_dqo(struct gve_rx_ring *rx) 279 { 280 rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask; 281 rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */ 282 283 if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) { 284 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 285 BUS_DMASYNC_PREWRITE); 286 gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset, 287 rx->dqo.head); 288 } 289 } 290 291 static void 292 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 293 { 294 struct gve_rx_desc_dqo *desc; 295 296 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 297 BUS_DMASYNC_PREREAD); 298 299 desc = &rx->dqo.desc_ring[rx->dqo.head]; 300 desc->buf_id = htole16(buf - rx->dqo.bufs); 301 desc->buf_addr = htole64(buf->addr); 302 303 gve_rx_advance_head_dqo(rx); 304 } 305 306 static int 307 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how) 308 { 309 struct gve_rx_buf_dqo *buf; 310 bus_dma_segment_t segs[1]; 311 int nsegs; 312 int err; 313 314 buf = SLIST_FIRST(&rx->dqo.free_bufs); 315 if (__predict_false(!buf)) { 316 device_printf(rx->com.priv->dev, 317 "Unexpected empty free bufs list\n"); 318 return (ENOBUFS); 319 } 320 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 321 322 bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); 323 buf->mbuf = m_getjcl(how, MT_DATA, M_PKTHDR, segment_size); 324 if (__predict_false(!buf->mbuf)) { 325 err = ENOMEM; 326 counter_enter(); 327 counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1); 328 counter_exit(); 329 goto abort_with_buf; 330 } 331 buf->mbuf->m_len = segment_size; 332 333 err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap, 334 buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 335 KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1")); 336 if (__predict_false(err != 0)) { 337 counter_enter(); 338 counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1); 339 counter_exit(); 340 goto abort_with_mbuf; 341 } 342 buf->addr = segs[0].ds_addr; 343 344 gve_rx_post_buf_dqo(rx, buf); 345 return (0); 346 347 abort_with_mbuf: 348 m_freem(buf->mbuf); 349 buf->mbuf = NULL; 350 abort_with_buf: 351 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 352 return (err); 353 } 354 355 static struct gve_dma_handle * 356 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 357 { 358 return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs])); 359 } 360 361 static void 362 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf, 363 uint8_t frag_num) 364 { 365 struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head]; 366 union gve_rx_qpl_buf_id_dqo composed_id; 367 struct gve_dma_handle *page_dma_handle; 368 369 composed_id.buf_id = buf - rx->dqo.bufs; 370 composed_id.frag_num = frag_num; 371 desc->buf_id = htole16(composed_id.all); 372 373 page_dma_handle = gve_get_page_dma_handle(rx, buf); 374 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 375 BUS_DMASYNC_PREREAD); 376 desc->buf_addr = htole64(page_dma_handle->bus_addr + 377 frag_num * rx->com.priv->rx_buf_size_dqo); 378 379 buf->num_nic_frags++; 380 gve_rx_advance_head_dqo(rx); 381 } 382 383 static void 384 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one) 385 { 386 struct gve_rx_buf_dqo *hol_blocker = NULL; 387 struct gve_rx_buf_dqo *buf; 388 u_int ref_count; 389 vm_page_t page; 390 391 while (true) { 392 buf = STAILQ_FIRST(&rx->dqo.used_bufs); 393 if (__predict_false(buf == NULL)) 394 break; 395 396 page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 397 ref_count = atomic_load_int(&page->ref_count); 398 399 if (VPRC_WIRE_COUNT(ref_count) != 1) { 400 /* Account for one head-of-line blocker */ 401 if (hol_blocker != NULL) 402 break; 403 hol_blocker = buf; 404 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 405 stailq_entry); 406 continue; 407 } 408 409 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 410 stailq_entry); 411 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 412 buf, slist_entry); 413 if (just_one) 414 break; 415 } 416 417 if (hol_blocker != NULL) 418 STAILQ_INSERT_HEAD(&rx->dqo.used_bufs, 419 hol_blocker, stailq_entry); 420 } 421 422 static int 423 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx) 424 { 425 struct gve_rx_buf_dqo *buf; 426 427 buf = SLIST_FIRST(&rx->dqo.free_bufs); 428 if (__predict_false(buf == NULL)) { 429 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true); 430 buf = SLIST_FIRST(&rx->dqo.free_bufs); 431 if (__predict_false(buf == NULL)) 432 return (ENOBUFS); 433 } 434 435 gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx); 436 if (buf->next_idx == gve_get_dq_num_frags_in_page(rx->com.priv) - 1) 437 buf->next_idx = 0; 438 else 439 buf->next_idx++; 440 441 /* 442 * We have posted all the frags in this buf to the NIC. 443 * - buf will enter used_bufs once the last completion arrives. 444 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs 445 * when its wire count drops back to 1. 446 */ 447 if (buf->next_idx == 0) 448 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 449 return (0); 450 } 451 452 static void 453 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how) 454 { 455 uint32_t num_pending_bufs; 456 uint32_t num_to_post; 457 uint32_t i; 458 int err; 459 460 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 461 num_to_post = rx->dqo.mask - num_pending_bufs; 462 463 for (i = 0; i < num_to_post; i++) { 464 if (gve_is_qpl(rx->com.priv)) 465 err = gve_rx_post_new_dqo_qpl_buf(rx); 466 else 467 err = gve_rx_post_new_mbuf_dqo(rx, how); 468 if (err) 469 break; 470 } 471 } 472 473 void 474 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx) 475 { 476 gve_rx_post_buffers_dqo(rx, M_WAITOK); 477 } 478 479 static void 480 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp) 481 { 482 switch (ptype->l3_type) { 483 case GVE_L3_TYPE_IPV4: 484 switch (ptype->l4_type) { 485 case GVE_L4_TYPE_TCP: 486 *is_tcp = true; 487 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 488 break; 489 case GVE_L4_TYPE_UDP: 490 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 491 break; 492 default: 493 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 494 } 495 break; 496 case GVE_L3_TYPE_IPV6: 497 switch (ptype->l4_type) { 498 case GVE_L4_TYPE_TCP: 499 *is_tcp = true; 500 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 501 break; 502 case GVE_L4_TYPE_UDP: 503 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 504 break; 505 default: 506 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 507 } 508 break; 509 default: 510 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 511 } 512 } 513 514 static void 515 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf, 516 struct gve_rx_compl_desc_dqo *desc, 517 struct gve_ptype *ptype) 518 { 519 /* HW did not identify and process L3 and L4 headers. */ 520 if (__predict_false(!desc->l3_l4_processed)) 521 return; 522 523 if (ptype->l3_type == GVE_L3_TYPE_IPV4) { 524 if (__predict_false(desc->csum_ip_err || 525 desc->csum_external_ip_err)) 526 return; 527 } else if (ptype->l3_type == GVE_L3_TYPE_IPV6) { 528 /* Checksum should be skipped if this flag is set. */ 529 if (__predict_false(desc->ipv6_ex_add)) 530 return; 531 } 532 533 if (__predict_false(desc->csum_l4_err)) 534 return; 535 536 switch (ptype->l4_type) { 537 case GVE_L4_TYPE_TCP: 538 case GVE_L4_TYPE_UDP: 539 case GVE_L4_TYPE_ICMP: 540 case GVE_L4_TYPE_SCTP: 541 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 542 CSUM_IP_VALID | 543 CSUM_DATA_VALID | 544 CSUM_PSEUDO_HDR; 545 mbuf->m_pkthdr.csum_data = 0xffff; 546 break; 547 default: 548 break; 549 } 550 } 551 552 static void 553 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx, 554 struct gve_rx_compl_desc_dqo *compl_desc) 555 { 556 struct mbuf *mbuf = rx->ctx.mbuf_head; 557 if_t ifp = rx->com.priv->ifp; 558 struct gve_ptype *ptype; 559 bool do_if_input = true; 560 bool is_tcp = false; 561 562 ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type]; 563 gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp); 564 mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash); 565 gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype); 566 567 mbuf->m_pkthdr.rcvif = ifp; 568 mbuf->m_pkthdr.len = rx->ctx.total_size; 569 570 if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) && 571 is_tcp && 572 (rx->lro.lro_cnt != 0) && 573 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 574 do_if_input = false; 575 576 if (do_if_input) 577 if_input(ifp, mbuf); 578 579 counter_enter(); 580 counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size); 581 counter_u64_add_protected(rx->stats.rpackets, 1); 582 counter_exit(); 583 584 rx->ctx = (struct gve_rx_ctx){}; 585 } 586 587 static int 588 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va, 589 struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len) 590 { 591 struct mbuf *mbuf; 592 593 mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR); 594 if (__predict_false(mbuf == NULL)) 595 return (ENOMEM); 596 597 counter_enter(); 598 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 599 counter_exit(); 600 601 m_copyback(mbuf, 0, frag_len, va); 602 mbuf->m_len = frag_len; 603 604 rx->ctx.mbuf_head = mbuf; 605 rx->ctx.mbuf_tail = mbuf; 606 rx->ctx.total_size += frag_len; 607 608 gve_rx_input_mbuf_dqo(rx, compl_desc); 609 return (0); 610 } 611 612 static void 613 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 614 struct gve_rx_compl_desc_dqo *compl_desc, 615 int *work_done) 616 { 617 bool is_last_frag = compl_desc->end_of_packet != 0; 618 struct gve_rx_ctx *ctx = &rx->ctx; 619 struct gve_rx_buf_dqo *buf; 620 uint32_t num_pending_bufs; 621 uint16_t frag_len; 622 uint16_t buf_id; 623 int err; 624 625 buf_id = le16toh(compl_desc->buf_id); 626 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 627 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 628 buf_id, rx->com.id); 629 gve_schedule_reset(priv); 630 goto drop_frag_clear_ctx; 631 } 632 buf = &rx->dqo.bufs[buf_id]; 633 if (__predict_false(buf->mbuf == NULL)) { 634 device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n", 635 buf_id, rx->com.id); 636 gve_schedule_reset(priv); 637 goto drop_frag_clear_ctx; 638 } 639 640 if (__predict_false(ctx->drop_pkt)) 641 goto drop_frag; 642 643 if (__predict_false(compl_desc->rx_error)) { 644 counter_enter(); 645 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 646 counter_exit(); 647 goto drop_frag; 648 } 649 650 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 651 BUS_DMASYNC_POSTREAD); 652 653 frag_len = compl_desc->packet_len; 654 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 655 err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*), 656 compl_desc, frag_len); 657 if (__predict_false(err != 0)) 658 goto drop_frag; 659 (*work_done)++; 660 gve_rx_post_buf_dqo(rx, buf); 661 return; 662 } 663 664 /* 665 * Although buffer completions may arrive out of order, buffer 666 * descriptors are consumed by the NIC in order. That is, the 667 * buffer at desc_ring[tail] might not be the buffer we got the 668 * completion compl_ring[tail] for: but we know that desc_ring[tail] 669 * has already been read by the NIC. 670 */ 671 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 672 673 /* 674 * For every fragment received, try to post a new buffer. 675 * 676 * Failures are okay but only so long as the number of outstanding 677 * buffers is above a threshold. 678 * 679 * Beyond that we drop new packets to reuse their buffers. 680 * Without ensuring a minimum number of buffers for the NIC to 681 * put packets in, we run the risk of getting the queue stuck 682 * for good. 683 */ 684 err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT); 685 if (__predict_false(err != 0 && 686 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 687 counter_enter(); 688 counter_u64_add_protected( 689 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 690 counter_exit(); 691 goto drop_frag; 692 } 693 694 buf->mbuf->m_len = frag_len; 695 ctx->total_size += frag_len; 696 if (ctx->mbuf_tail == NULL) { 697 ctx->mbuf_head = buf->mbuf; 698 ctx->mbuf_tail = buf->mbuf; 699 } else { 700 buf->mbuf->m_flags &= ~M_PKTHDR; 701 ctx->mbuf_tail->m_next = buf->mbuf; 702 ctx->mbuf_tail = buf->mbuf; 703 } 704 705 /* 706 * Disassociate the mbuf from buf and surrender buf to the free list to 707 * be used by a future mbuf. 708 */ 709 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 710 buf->mbuf = NULL; 711 buf->addr = 0; 712 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 713 714 if (is_last_frag) { 715 gve_rx_input_mbuf_dqo(rx, compl_desc); 716 (*work_done)++; 717 } 718 return; 719 720 drop_frag: 721 /* Clear the earlier frags if there were any */ 722 m_freem(ctx->mbuf_head); 723 rx->ctx = (struct gve_rx_ctx){}; 724 /* Drop the rest of the pkt if there are more frags */ 725 ctx->drop_pkt = true; 726 /* Reuse the dropped frag's buffer */ 727 gve_rx_post_buf_dqo(rx, buf); 728 729 if (is_last_frag) 730 goto drop_frag_clear_ctx; 731 return; 732 733 drop_frag_clear_ctx: 734 counter_enter(); 735 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 736 counter_exit(); 737 m_freem(ctx->mbuf_head); 738 rx->ctx = (struct gve_rx_ctx){}; 739 } 740 741 static void * 742 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx, 743 struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num) 744 { 745 int page_idx = buf - rx->dqo.bufs; 746 void *va = rx->com.qpl->dmas[page_idx].cpu_addr; 747 748 va = (char *)va + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); 749 return (va); 750 } 751 752 static int 753 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx, 754 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 755 uint8_t buf_frag_num, uint16_t frag_len) 756 { 757 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 758 struct mbuf *mbuf; 759 bus_size_t segment_size = gve_rx_dqo_mbuf_segment_size(rx->com.priv); 760 761 if (ctx->mbuf_tail == NULL) { 762 mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, segment_size); 763 if (mbuf == NULL) 764 return (ENOMEM); 765 ctx->mbuf_head = mbuf; 766 ctx->mbuf_tail = mbuf; 767 } else { 768 mbuf = m_getjcl(M_NOWAIT, MT_DATA, 0, segment_size); 769 if (mbuf == NULL) 770 return (ENOMEM); 771 ctx->mbuf_tail->m_next = mbuf; 772 ctx->mbuf_tail = mbuf; 773 } 774 775 mbuf->m_len = frag_len; 776 ctx->total_size += frag_len; 777 778 m_copyback(mbuf, 0, frag_len, va); 779 counter_enter(); 780 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 781 counter_exit(); 782 return (0); 783 } 784 785 static int 786 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx, 787 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 788 uint8_t buf_frag_num, uint16_t frag_len) 789 { 790 struct mbuf *mbuf; 791 void *page_addr; 792 vm_page_t page; 793 int page_idx; 794 void *va; 795 796 if (ctx->mbuf_tail == NULL) { 797 mbuf = m_gethdr(M_NOWAIT, MT_DATA); 798 if (mbuf == NULL) 799 return (ENOMEM); 800 ctx->mbuf_head = mbuf; 801 ctx->mbuf_tail = mbuf; 802 } else { 803 mbuf = m_get(M_NOWAIT, MT_DATA); 804 if (mbuf == NULL) 805 return (ENOMEM); 806 ctx->mbuf_tail->m_next = mbuf; 807 ctx->mbuf_tail = mbuf; 808 } 809 810 mbuf->m_len = frag_len; 811 ctx->total_size += frag_len; 812 813 page_idx = buf - rx->dqo.bufs; 814 page = rx->com.qpl->pages[page_idx]; 815 page_addr = rx->com.qpl->dmas[page_idx].cpu_addr; 816 va = (char *)page_addr + (buf_frag_num * rx->com.priv->rx_buf_size_dqo); 817 818 /* 819 * Grab an extra ref to the page so that gve_mextadd_free 820 * does not end up freeing the page while the interface exists. 821 */ 822 vm_page_wire(page); 823 824 counter_enter(); 825 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 826 counter_exit(); 827 828 MEXTADD(mbuf, va, frag_len, 829 gve_mextadd_free, page, page_addr, 830 0, EXT_NET_DRV); 831 return (0); 832 } 833 834 static void 835 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx, 836 struct gve_rx_compl_desc_dqo *compl_desc, 837 int *work_done) 838 { 839 bool is_last_frag = compl_desc->end_of_packet != 0; 840 union gve_rx_qpl_buf_id_dqo composed_id; 841 struct gve_dma_handle *page_dma_handle; 842 struct gve_rx_ctx *ctx = &rx->ctx; 843 struct gve_rx_buf_dqo *buf; 844 uint32_t num_pending_bufs; 845 uint8_t buf_frag_num; 846 uint16_t frag_len; 847 uint16_t buf_id; 848 int err; 849 850 composed_id.all = le16toh(compl_desc->buf_id); 851 buf_id = composed_id.buf_id; 852 buf_frag_num = composed_id.frag_num; 853 854 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 855 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 856 buf_id, rx->com.id); 857 gve_schedule_reset(priv); 858 goto drop_frag_clear_ctx; 859 } 860 buf = &rx->dqo.bufs[buf_id]; 861 if (__predict_false(buf->num_nic_frags == 0 || 862 buf_frag_num > gve_get_dq_num_frags_in_page(priv) - 1)) { 863 device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d " 864 "with buf_frag_num %d and num_nic_frags %d, issuing reset\n", 865 buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags); 866 gve_schedule_reset(priv); 867 goto drop_frag_clear_ctx; 868 } 869 870 buf->num_nic_frags--; 871 872 if (__predict_false(ctx->drop_pkt)) 873 goto drop_frag; 874 875 if (__predict_false(compl_desc->rx_error)) { 876 counter_enter(); 877 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 878 counter_exit(); 879 goto drop_frag; 880 } 881 882 page_dma_handle = gve_get_page_dma_handle(rx, buf); 883 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 884 BUS_DMASYNC_POSTREAD); 885 886 frag_len = compl_desc->packet_len; 887 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 888 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 889 890 err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len); 891 if (__predict_false(err != 0)) 892 goto drop_frag; 893 (*work_done)++; 894 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 895 return; 896 } 897 898 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 899 err = gve_rx_post_new_dqo_qpl_buf(rx); 900 if (__predict_false(err != 0 && 901 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 902 /* 903 * Resort to copying this fragment into a cluster mbuf 904 * when the above threshold is breached and repost the 905 * incoming buffer. If we cannot find cluster mbufs, 906 * just drop the packet (to repost its buffer). 907 */ 908 err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf, 909 buf_frag_num, frag_len); 910 if (err != 0) { 911 counter_enter(); 912 counter_u64_add_protected( 913 rx->stats.rx_dropped_pkt_buf_post_fail, 1); 914 counter_exit(); 915 goto drop_frag; 916 } 917 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 918 } else { 919 err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf, 920 buf_frag_num, frag_len); 921 if (__predict_false(err != 0)) { 922 counter_enter(); 923 counter_u64_add_protected( 924 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 925 counter_exit(); 926 goto drop_frag; 927 } 928 } 929 930 /* 931 * Both the counts need to be checked. 932 * 933 * num_nic_frags == 0 implies no pending completions 934 * but not all frags may have yet been posted. 935 * 936 * next_idx == 0 implies all frags have been posted 937 * but there might be pending completions. 938 */ 939 if (buf->num_nic_frags == 0 && buf->next_idx == 0) 940 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry); 941 942 if (is_last_frag) { 943 gve_rx_input_mbuf_dqo(rx, compl_desc); 944 (*work_done)++; 945 } 946 return; 947 948 drop_frag: 949 /* Clear the earlier frags if there were any */ 950 m_freem(ctx->mbuf_head); 951 rx->ctx = (struct gve_rx_ctx){}; 952 /* Drop the rest of the pkt if there are more frags */ 953 ctx->drop_pkt = true; 954 /* Reuse the dropped frag's buffer */ 955 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 956 957 if (is_last_frag) 958 goto drop_frag_clear_ctx; 959 return; 960 961 drop_frag_clear_ctx: 962 counter_enter(); 963 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 964 counter_exit(); 965 m_freem(ctx->mbuf_head); 966 rx->ctx = (struct gve_rx_ctx){}; 967 } 968 969 static uint8_t 970 gve_rx_get_gen_bit(uint8_t *desc) 971 { 972 uint8_t byte; 973 974 /* 975 * Prevent generation bit from being read after the rest of the 976 * descriptor. 977 */ 978 byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET); 979 return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0); 980 } 981 982 static bool 983 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 984 { 985 struct gve_rx_compl_desc_dqo *compl_desc; 986 uint32_t work_done = 0; 987 988 NET_EPOCH_ASSERT(); 989 990 while (work_done < budget) { 991 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, 992 rx->dqo.compl_ring_mem.map, 993 BUS_DMASYNC_POSTREAD); 994 995 compl_desc = &rx->dqo.compl_ring[rx->dqo.tail]; 996 if (gve_rx_get_gen_bit((uint8_t *)compl_desc) == 997 rx->dqo.cur_gen_bit) 998 break; 999 1000 rx->cnt++; 1001 rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask; 1002 rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0); 1003 1004 if (gve_is_qpl(priv)) 1005 gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done); 1006 else 1007 gve_rx_dqo(priv, rx, compl_desc, &work_done); 1008 } 1009 1010 if (work_done != 0) 1011 tcp_lro_flush_all(&rx->lro); 1012 1013 gve_rx_post_buffers_dqo(rx, M_NOWAIT); 1014 if (gve_is_qpl(priv)) 1015 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false); 1016 return (work_done == budget); 1017 } 1018 1019 void 1020 gve_rx_cleanup_tq_dqo(void *arg, int pending) 1021 { 1022 struct gve_rx_ring *rx = arg; 1023 struct gve_priv *priv = rx->com.priv; 1024 1025 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1026 return; 1027 1028 if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) { 1029 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 1030 return; 1031 } 1032 1033 gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset, 1034 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1035 } 1036