1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 #include "gve_dqo.h" 34 35 static void 36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx) 37 { 38 struct gve_rx_buf_dqo *buf; 39 int i; 40 41 if (gve_is_qpl(rx->com.priv)) 42 return; 43 44 for (i = 0; i < rx->dqo.buf_cnt; i++) { 45 buf = &rx->dqo.bufs[i]; 46 if (!buf->mbuf) 47 continue; 48 49 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 50 BUS_DMASYNC_POSTREAD); 51 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 52 m_freem(buf->mbuf); 53 buf->mbuf = NULL; 54 } 55 } 56 57 void 58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i) 59 { 60 struct gve_rx_ring *rx = &priv->rx[i]; 61 int j; 62 63 if (rx->dqo.compl_ring != NULL) { 64 gve_dma_free_coherent(&rx->dqo.compl_ring_mem); 65 rx->dqo.compl_ring = NULL; 66 } 67 68 if (rx->dqo.desc_ring != NULL) { 69 gve_dma_free_coherent(&rx->desc_ring_mem); 70 rx->dqo.desc_ring = NULL; 71 } 72 73 if (rx->dqo.bufs != NULL) { 74 gve_free_rx_mbufs_dqo(rx); 75 76 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) { 77 for (j = 0; j < rx->dqo.buf_cnt; j++) 78 if (rx->dqo.bufs[j].mapped) 79 bus_dmamap_destroy(rx->dqo.buf_dmatag, 80 rx->dqo.bufs[j].dmamap); 81 } 82 83 free(rx->dqo.bufs, M_GVE); 84 rx->dqo.bufs = NULL; 85 } 86 87 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) 88 bus_dma_tag_destroy(rx->dqo.buf_dmatag); 89 } 90 91 int 92 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i) 93 { 94 struct gve_rx_ring *rx = &priv->rx[i]; 95 int err; 96 int j; 97 98 err = gve_dma_alloc_coherent(priv, 99 sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt, 100 CACHE_LINE_SIZE, &rx->desc_ring_mem); 101 if (err != 0) { 102 device_printf(priv->dev, 103 "Failed to alloc desc ring for rx ring %d", i); 104 goto abort; 105 } 106 rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr; 107 rx->dqo.mask = priv->rx_desc_cnt - 1; 108 109 err = gve_dma_alloc_coherent(priv, 110 sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt, 111 CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem); 112 if (err != 0) { 113 device_printf(priv->dev, 114 "Failed to alloc compl ring for rx ring %d", i); 115 goto abort; 116 } 117 rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr; 118 rx->dqo.mask = priv->rx_desc_cnt - 1; 119 120 rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO : 121 priv->rx_desc_cnt; 122 rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo), 123 M_GVE, M_WAITOK | M_ZERO); 124 125 if (gve_is_qpl(priv)) { 126 rx->com.qpl = &priv->qpls[priv->tx_cfg.max_queues + i]; 127 if (rx->com.qpl == NULL) { 128 device_printf(priv->dev, "No QPL left for rx ring %d", i); 129 return (ENOMEM); 130 } 131 return (0); 132 } 133 134 err = bus_dma_tag_create( 135 bus_get_dma_tag(priv->dev), /* parent */ 136 1, 0, /* alignment, bounds */ 137 BUS_SPACE_MAXADDR, /* lowaddr */ 138 BUS_SPACE_MAXADDR, /* highaddr */ 139 NULL, NULL, /* filter, filterarg */ 140 MCLBYTES, /* maxsize */ 141 1, /* nsegments */ 142 MCLBYTES, /* maxsegsize */ 143 0, /* flags */ 144 NULL, /* lockfunc */ 145 NULL, /* lockarg */ 146 &rx->dqo.buf_dmatag); 147 if (err != 0) { 148 device_printf(priv->dev, 149 "%s: bus_dma_tag_create failed: %d\n", 150 __func__, err); 151 goto abort; 152 } 153 154 for (j = 0; j < rx->dqo.buf_cnt; j++) { 155 err = bus_dmamap_create(rx->dqo.buf_dmatag, 0, 156 &rx->dqo.bufs[j].dmamap); 157 if (err != 0) { 158 device_printf(priv->dev, 159 "err in creating rx buf dmamap %d: %d", 160 j, err); 161 goto abort; 162 } 163 rx->dqo.bufs[j].mapped = true; 164 } 165 166 return (0); 167 168 abort: 169 gve_rx_free_ring_dqo(priv, i); 170 return (err); 171 } 172 173 static void 174 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx) 175 { 176 struct gve_ring_com *com = &rx->com; 177 int entries; 178 int i; 179 180 entries = com->priv->rx_desc_cnt; 181 for (i = 0; i < entries; i++) 182 rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){}; 183 184 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 185 BUS_DMASYNC_PREWRITE); 186 } 187 188 static void 189 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx) 190 { 191 struct gve_ring_com *com = &rx->com; 192 int i; 193 194 for (i = 0; i < com->priv->rx_desc_cnt; i++) 195 rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){}; 196 197 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map, 198 BUS_DMASYNC_PREWRITE); 199 } 200 201 void 202 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i) 203 { 204 struct gve_rx_ring *rx = &priv->rx[i]; 205 int j; 206 207 rx->fill_cnt = 0; 208 rx->cnt = 0; 209 rx->dqo.mask = priv->rx_desc_cnt - 1; 210 rx->dqo.head = 0; 211 rx->dqo.tail = 0; 212 rx->dqo.cur_gen_bit = 0; 213 214 gve_rx_clear_desc_ring_dqo(rx); 215 gve_rx_clear_compl_ring_dqo(rx); 216 217 gve_free_rx_mbufs_dqo(rx); 218 219 if (gve_is_qpl(priv)) { 220 SLIST_INIT(&rx->dqo.free_bufs); 221 STAILQ_INIT(&rx->dqo.used_bufs); 222 223 for (j = 0; j < rx->dqo.buf_cnt; j++) { 224 struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j]; 225 226 vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 227 u_int ref_count = atomic_load_int(&page->ref_count); 228 229 /* 230 * An ifconfig down+up might see pages still in flight 231 * from the previous innings. 232 */ 233 if (VPRC_WIRE_COUNT(ref_count) == 1) 234 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 235 buf, slist_entry); 236 else 237 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, 238 buf, stailq_entry); 239 240 buf->num_nic_frags = 0; 241 buf->next_idx = 0; 242 } 243 } else { 244 SLIST_INIT(&rx->dqo.free_bufs); 245 for (j = 0; j < rx->dqo.buf_cnt; j++) 246 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 247 &rx->dqo.bufs[j], slist_entry); 248 } 249 } 250 251 int 252 gve_rx_intr_dqo(void *arg) 253 { 254 struct gve_rx_ring *rx = arg; 255 struct gve_priv *priv = rx->com.priv; 256 struct gve_ring_com *com = &rx->com; 257 258 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 259 return (FILTER_STRAY); 260 261 /* Interrupts are automatically masked */ 262 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 263 return (FILTER_HANDLED); 264 } 265 266 static void 267 gve_rx_advance_head_dqo(struct gve_rx_ring *rx) 268 { 269 rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask; 270 rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */ 271 272 if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) { 273 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 274 BUS_DMASYNC_PREWRITE); 275 gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset, 276 rx->dqo.head); 277 } 278 } 279 280 static void 281 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 282 { 283 struct gve_rx_desc_dqo *desc; 284 285 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 286 BUS_DMASYNC_PREREAD); 287 288 desc = &rx->dqo.desc_ring[rx->dqo.head]; 289 desc->buf_id = htole16(buf - rx->dqo.bufs); 290 desc->buf_addr = htole64(buf->addr); 291 292 gve_rx_advance_head_dqo(rx); 293 } 294 295 static int 296 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how) 297 { 298 struct gve_rx_buf_dqo *buf; 299 bus_dma_segment_t segs[1]; 300 int nsegs; 301 int err; 302 303 buf = SLIST_FIRST(&rx->dqo.free_bufs); 304 if (__predict_false(!buf)) { 305 device_printf(rx->com.priv->dev, 306 "Unexpected empty free bufs list\n"); 307 return (ENOBUFS); 308 } 309 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 310 311 buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR); 312 if (__predict_false(!buf->mbuf)) { 313 err = ENOMEM; 314 counter_enter(); 315 counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1); 316 counter_exit(); 317 goto abort_with_buf; 318 } 319 buf->mbuf->m_len = MCLBYTES; 320 321 err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap, 322 buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 323 KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1")); 324 if (__predict_false(err != 0)) { 325 counter_enter(); 326 counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1); 327 counter_exit(); 328 goto abort_with_mbuf; 329 } 330 buf->addr = segs[0].ds_addr; 331 332 gve_rx_post_buf_dqo(rx, buf); 333 return (0); 334 335 abort_with_mbuf: 336 m_freem(buf->mbuf); 337 buf->mbuf = NULL; 338 abort_with_buf: 339 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 340 return (err); 341 } 342 343 static struct gve_dma_handle * 344 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 345 { 346 return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs])); 347 } 348 349 static void 350 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf, 351 uint8_t frag_num) 352 { 353 struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head]; 354 union gve_rx_qpl_buf_id_dqo composed_id; 355 struct gve_dma_handle *page_dma_handle; 356 357 composed_id.buf_id = buf - rx->dqo.bufs; 358 composed_id.frag_num = frag_num; 359 desc->buf_id = htole16(composed_id.all); 360 361 page_dma_handle = gve_get_page_dma_handle(rx, buf); 362 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 363 BUS_DMASYNC_PREREAD); 364 desc->buf_addr = htole64(page_dma_handle->bus_addr + 365 frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 366 367 buf->num_nic_frags++; 368 gve_rx_advance_head_dqo(rx); 369 } 370 371 static void 372 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one) 373 { 374 struct gve_rx_buf_dqo *hol_blocker = NULL; 375 struct gve_rx_buf_dqo *buf; 376 u_int ref_count; 377 vm_page_t page; 378 379 while (true) { 380 buf = STAILQ_FIRST(&rx->dqo.used_bufs); 381 if (__predict_false(buf == NULL)) 382 break; 383 384 page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 385 ref_count = atomic_load_int(&page->ref_count); 386 387 if (VPRC_WIRE_COUNT(ref_count) != 1) { 388 /* Account for one head-of-line blocker */ 389 if (hol_blocker != NULL) 390 break; 391 hol_blocker = buf; 392 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 393 stailq_entry); 394 continue; 395 } 396 397 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 398 stailq_entry); 399 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 400 buf, slist_entry); 401 if (just_one) 402 break; 403 } 404 405 if (hol_blocker != NULL) 406 STAILQ_INSERT_HEAD(&rx->dqo.used_bufs, 407 hol_blocker, stailq_entry); 408 } 409 410 static int 411 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx) 412 { 413 struct gve_rx_buf_dqo *buf; 414 415 buf = SLIST_FIRST(&rx->dqo.free_bufs); 416 if (__predict_false(buf == NULL)) { 417 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true); 418 buf = SLIST_FIRST(&rx->dqo.free_bufs); 419 if (__predict_false(buf == NULL)) 420 return (ENOBUFS); 421 } 422 423 gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx); 424 if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1) 425 buf->next_idx = 0; 426 else 427 buf->next_idx++; 428 429 /* 430 * We have posted all the frags in this buf to the NIC. 431 * - buf will enter used_bufs once the last completion arrives. 432 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs 433 * when its wire count drops back to 1. 434 */ 435 if (buf->next_idx == 0) 436 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 437 return (0); 438 } 439 440 static void 441 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how) 442 { 443 uint32_t num_pending_bufs; 444 uint32_t num_to_post; 445 uint32_t i; 446 int err; 447 448 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 449 num_to_post = rx->dqo.mask - num_pending_bufs; 450 451 for (i = 0; i < num_to_post; i++) { 452 if (gve_is_qpl(rx->com.priv)) 453 err = gve_rx_post_new_dqo_qpl_buf(rx); 454 else 455 err = gve_rx_post_new_mbuf_dqo(rx, how); 456 if (err) 457 break; 458 } 459 } 460 461 void 462 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx) 463 { 464 gve_rx_post_buffers_dqo(rx, M_WAITOK); 465 } 466 467 static void 468 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp) 469 { 470 switch (ptype->l3_type) { 471 case GVE_L3_TYPE_IPV4: 472 switch (ptype->l4_type) { 473 case GVE_L4_TYPE_TCP: 474 *is_tcp = true; 475 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 476 break; 477 case GVE_L4_TYPE_UDP: 478 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 479 break; 480 default: 481 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 482 } 483 break; 484 case GVE_L3_TYPE_IPV6: 485 switch (ptype->l4_type) { 486 case GVE_L4_TYPE_TCP: 487 *is_tcp = true; 488 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 489 break; 490 case GVE_L4_TYPE_UDP: 491 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 492 break; 493 default: 494 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 495 } 496 break; 497 default: 498 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 499 } 500 } 501 502 static void 503 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf, 504 struct gve_rx_compl_desc_dqo *desc, 505 struct gve_ptype *ptype) 506 { 507 /* HW did not identify and process L3 and L4 headers. */ 508 if (__predict_false(!desc->l3_l4_processed)) 509 return; 510 511 if (ptype->l3_type == GVE_L3_TYPE_IPV4) { 512 if (__predict_false(desc->csum_ip_err || 513 desc->csum_external_ip_err)) 514 return; 515 } else if (ptype->l3_type == GVE_L3_TYPE_IPV6) { 516 /* Checksum should be skipped if this flag is set. */ 517 if (__predict_false(desc->ipv6_ex_add)) 518 return; 519 } 520 521 if (__predict_false(desc->csum_l4_err)) 522 return; 523 524 switch (ptype->l4_type) { 525 case GVE_L4_TYPE_TCP: 526 case GVE_L4_TYPE_UDP: 527 case GVE_L4_TYPE_ICMP: 528 case GVE_L4_TYPE_SCTP: 529 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 530 CSUM_IP_VALID | 531 CSUM_DATA_VALID | 532 CSUM_PSEUDO_HDR; 533 mbuf->m_pkthdr.csum_data = 0xffff; 534 break; 535 default: 536 break; 537 } 538 } 539 540 static void 541 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx, 542 struct gve_rx_compl_desc_dqo *compl_desc) 543 { 544 struct mbuf *mbuf = rx->ctx.mbuf_head; 545 if_t ifp = rx->com.priv->ifp; 546 struct gve_ptype *ptype; 547 bool do_if_input = true; 548 bool is_tcp = false; 549 550 ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type]; 551 gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp); 552 mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash); 553 gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype); 554 555 mbuf->m_pkthdr.rcvif = ifp; 556 mbuf->m_pkthdr.len = rx->ctx.total_size; 557 558 if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) && 559 is_tcp && 560 (rx->lro.lro_cnt != 0) && 561 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 562 do_if_input = false; 563 564 if (do_if_input) 565 if_input(ifp, mbuf); 566 567 counter_enter(); 568 counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size); 569 counter_u64_add_protected(rx->stats.rpackets, 1); 570 counter_exit(); 571 572 rx->ctx = (struct gve_rx_ctx){}; 573 } 574 575 static int 576 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va, 577 struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len) 578 { 579 struct mbuf *mbuf; 580 581 mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR); 582 if (__predict_false(mbuf == NULL)) 583 return (ENOMEM); 584 585 counter_enter(); 586 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 587 counter_exit(); 588 589 m_copyback(mbuf, 0, frag_len, va); 590 mbuf->m_len = frag_len; 591 592 rx->ctx.mbuf_head = mbuf; 593 rx->ctx.mbuf_tail = mbuf; 594 rx->ctx.total_size += frag_len; 595 596 gve_rx_input_mbuf_dqo(rx, compl_desc); 597 return (0); 598 } 599 600 static void 601 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 602 struct gve_rx_compl_desc_dqo *compl_desc, 603 int *work_done) 604 { 605 bool is_last_frag = compl_desc->end_of_packet != 0; 606 struct gve_rx_ctx *ctx = &rx->ctx; 607 struct gve_rx_buf_dqo *buf; 608 uint32_t num_pending_bufs; 609 uint16_t frag_len; 610 uint16_t buf_id; 611 int err; 612 613 buf_id = le16toh(compl_desc->buf_id); 614 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 615 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 616 buf_id, rx->com.id); 617 gve_schedule_reset(priv); 618 goto drop_frag_clear_ctx; 619 } 620 buf = &rx->dqo.bufs[buf_id]; 621 if (__predict_false(buf->mbuf == NULL)) { 622 device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n", 623 buf_id, rx->com.id); 624 gve_schedule_reset(priv); 625 goto drop_frag_clear_ctx; 626 } 627 628 if (__predict_false(ctx->drop_pkt)) 629 goto drop_frag; 630 631 if (__predict_false(compl_desc->rx_error)) { 632 counter_enter(); 633 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 634 counter_exit(); 635 goto drop_frag; 636 } 637 638 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 639 BUS_DMASYNC_POSTREAD); 640 641 frag_len = compl_desc->packet_len; 642 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 643 err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*), 644 compl_desc, frag_len); 645 if (__predict_false(err != 0)) 646 goto drop_frag; 647 (*work_done)++; 648 gve_rx_post_buf_dqo(rx, buf); 649 return; 650 } 651 652 /* 653 * Although buffer completions may arrive out of order, buffer 654 * descriptors are consumed by the NIC in order. That is, the 655 * buffer at desc_ring[tail] might not be the buffer we got the 656 * completion compl_ring[tail] for: but we know that desc_ring[tail] 657 * has already been read by the NIC. 658 */ 659 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 660 661 /* 662 * For every fragment received, try to post a new buffer. 663 * 664 * Failures are okay but only so long as the number of outstanding 665 * buffers is above a threshold. 666 * 667 * Beyond that we drop new packets to reuse their buffers. 668 * Without ensuring a minimum number of buffers for the NIC to 669 * put packets in, we run the risk of getting the queue stuck 670 * for good. 671 */ 672 err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT); 673 if (__predict_false(err != 0 && 674 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 675 counter_enter(); 676 counter_u64_add_protected( 677 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 678 counter_exit(); 679 goto drop_frag; 680 } 681 682 buf->mbuf->m_len = frag_len; 683 ctx->total_size += frag_len; 684 if (ctx->mbuf_tail == NULL) { 685 ctx->mbuf_head = buf->mbuf; 686 ctx->mbuf_tail = buf->mbuf; 687 } else { 688 buf->mbuf->m_flags &= ~M_PKTHDR; 689 ctx->mbuf_tail->m_next = buf->mbuf; 690 ctx->mbuf_tail = buf->mbuf; 691 } 692 693 /* 694 * Disassociate the mbuf from buf and surrender buf to the free list to 695 * be used by a future mbuf. 696 */ 697 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 698 buf->mbuf = NULL; 699 buf->addr = 0; 700 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 701 702 if (is_last_frag) { 703 gve_rx_input_mbuf_dqo(rx, compl_desc); 704 (*work_done)++; 705 } 706 return; 707 708 drop_frag: 709 /* Clear the earlier frags if there were any */ 710 m_freem(ctx->mbuf_head); 711 rx->ctx = (struct gve_rx_ctx){}; 712 /* Drop the rest of the pkt if there are more frags */ 713 ctx->drop_pkt = true; 714 /* Reuse the dropped frag's buffer */ 715 gve_rx_post_buf_dqo(rx, buf); 716 717 if (is_last_frag) 718 goto drop_frag_clear_ctx; 719 return; 720 721 drop_frag_clear_ctx: 722 counter_enter(); 723 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 724 counter_exit(); 725 m_freem(ctx->mbuf_head); 726 rx->ctx = (struct gve_rx_ctx){}; 727 } 728 729 static void * 730 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx, 731 struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num) 732 { 733 int page_idx = buf - rx->dqo.bufs; 734 void *va = rx->com.qpl->dmas[page_idx].cpu_addr; 735 736 va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 737 return (va); 738 } 739 740 static int 741 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx, 742 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 743 uint8_t buf_frag_num, uint16_t frag_len) 744 { 745 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 746 struct mbuf *mbuf; 747 748 if (ctx->mbuf_tail == NULL) { 749 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 750 if (mbuf == NULL) 751 return (ENOMEM); 752 ctx->mbuf_head = mbuf; 753 ctx->mbuf_tail = mbuf; 754 } else { 755 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); 756 if (mbuf == NULL) 757 return (ENOMEM); 758 ctx->mbuf_tail->m_next = mbuf; 759 ctx->mbuf_tail = mbuf; 760 } 761 762 mbuf->m_len = frag_len; 763 ctx->total_size += frag_len; 764 765 m_copyback(mbuf, 0, frag_len, va); 766 counter_enter(); 767 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 768 counter_exit(); 769 return (0); 770 } 771 772 static int 773 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx, 774 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 775 uint8_t buf_frag_num, uint16_t frag_len) 776 { 777 struct mbuf *mbuf; 778 void *page_addr; 779 vm_page_t page; 780 int page_idx; 781 void *va; 782 783 if (ctx->mbuf_tail == NULL) { 784 mbuf = m_gethdr(M_NOWAIT, MT_DATA); 785 if (mbuf == NULL) 786 return (ENOMEM); 787 ctx->mbuf_head = mbuf; 788 ctx->mbuf_tail = mbuf; 789 } else { 790 mbuf = m_get(M_NOWAIT, MT_DATA); 791 if (mbuf == NULL) 792 return (ENOMEM); 793 ctx->mbuf_tail->m_next = mbuf; 794 ctx->mbuf_tail = mbuf; 795 } 796 797 mbuf->m_len = frag_len; 798 ctx->total_size += frag_len; 799 800 page_idx = buf - rx->dqo.bufs; 801 page = rx->com.qpl->pages[page_idx]; 802 page_addr = rx->com.qpl->dmas[page_idx].cpu_addr; 803 va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 804 805 /* 806 * Grab an extra ref to the page so that gve_mextadd_free 807 * does not end up freeing the page while the interface exists. 808 */ 809 vm_page_wire(page); 810 811 counter_enter(); 812 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 813 counter_exit(); 814 815 MEXTADD(mbuf, va, frag_len, 816 gve_mextadd_free, page, page_addr, 817 0, EXT_NET_DRV); 818 return (0); 819 } 820 821 static void 822 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx, 823 struct gve_rx_compl_desc_dqo *compl_desc, 824 int *work_done) 825 { 826 bool is_last_frag = compl_desc->end_of_packet != 0; 827 union gve_rx_qpl_buf_id_dqo composed_id; 828 struct gve_dma_handle *page_dma_handle; 829 struct gve_rx_ctx *ctx = &rx->ctx; 830 struct gve_rx_buf_dqo *buf; 831 uint32_t num_pending_bufs; 832 uint8_t buf_frag_num; 833 uint16_t frag_len; 834 uint16_t buf_id; 835 int err; 836 837 composed_id.all = le16toh(compl_desc->buf_id); 838 buf_id = composed_id.buf_id; 839 buf_frag_num = composed_id.frag_num; 840 841 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 842 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 843 buf_id, rx->com.id); 844 gve_schedule_reset(priv); 845 goto drop_frag_clear_ctx; 846 } 847 buf = &rx->dqo.bufs[buf_id]; 848 if (__predict_false(buf->num_nic_frags == 0 || 849 buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) { 850 device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d " 851 "with buf_frag_num %d and num_nic_frags %d, issuing reset\n", 852 buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags); 853 gve_schedule_reset(priv); 854 goto drop_frag_clear_ctx; 855 } 856 857 buf->num_nic_frags--; 858 859 if (__predict_false(ctx->drop_pkt)) 860 goto drop_frag; 861 862 if (__predict_false(compl_desc->rx_error)) { 863 counter_enter(); 864 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 865 counter_exit(); 866 goto drop_frag; 867 } 868 869 page_dma_handle = gve_get_page_dma_handle(rx, buf); 870 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 871 BUS_DMASYNC_POSTREAD); 872 873 frag_len = compl_desc->packet_len; 874 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 875 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 876 877 err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len); 878 if (__predict_false(err != 0)) 879 goto drop_frag; 880 (*work_done)++; 881 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 882 return; 883 } 884 885 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 886 err = gve_rx_post_new_dqo_qpl_buf(rx); 887 if (__predict_false(err != 0 && 888 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 889 /* 890 * Resort to copying this fragment into a cluster mbuf 891 * when the above threshold is breached and repost the 892 * incoming buffer. If we cannot find cluster mbufs, 893 * just drop the packet (to repost its buffer). 894 */ 895 err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf, 896 buf_frag_num, frag_len); 897 if (err != 0) { 898 counter_enter(); 899 counter_u64_add_protected( 900 rx->stats.rx_dropped_pkt_buf_post_fail, 1); 901 counter_exit(); 902 goto drop_frag; 903 } 904 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 905 } else { 906 err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf, 907 buf_frag_num, frag_len); 908 if (__predict_false(err != 0)) { 909 counter_enter(); 910 counter_u64_add_protected( 911 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 912 counter_exit(); 913 goto drop_frag; 914 } 915 } 916 917 /* 918 * Both the counts need to be checked. 919 * 920 * num_nic_frags == 0 implies no pending completions 921 * but not all frags may have yet been posted. 922 * 923 * next_idx == 0 implies all frags have been posted 924 * but there might be pending completions. 925 */ 926 if (buf->num_nic_frags == 0 && buf->next_idx == 0) 927 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry); 928 929 if (is_last_frag) { 930 gve_rx_input_mbuf_dqo(rx, compl_desc); 931 (*work_done)++; 932 } 933 return; 934 935 drop_frag: 936 /* Clear the earlier frags if there were any */ 937 m_freem(ctx->mbuf_head); 938 rx->ctx = (struct gve_rx_ctx){}; 939 /* Drop the rest of the pkt if there are more frags */ 940 ctx->drop_pkt = true; 941 /* Reuse the dropped frag's buffer */ 942 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 943 944 if (is_last_frag) 945 goto drop_frag_clear_ctx; 946 return; 947 948 drop_frag_clear_ctx: 949 counter_enter(); 950 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 951 counter_exit(); 952 m_freem(ctx->mbuf_head); 953 rx->ctx = (struct gve_rx_ctx){}; 954 } 955 956 static bool 957 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 958 { 959 struct gve_rx_compl_desc_dqo *compl_desc; 960 uint32_t work_done = 0; 961 962 NET_EPOCH_ASSERT(); 963 964 while (work_done < budget) { 965 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map, 966 BUS_DMASYNC_POSTREAD); 967 968 compl_desc = &rx->dqo.compl_ring[rx->dqo.tail]; 969 if (compl_desc->generation == rx->dqo.cur_gen_bit) 970 break; 971 /* 972 * Prevent generation bit from being read after the rest of the 973 * descriptor. 974 */ 975 rmb(); 976 977 rx->cnt++; 978 rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask; 979 rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0); 980 981 if (gve_is_qpl(priv)) 982 gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done); 983 else 984 gve_rx_dqo(priv, rx, compl_desc, &work_done); 985 } 986 987 if (work_done != 0) 988 tcp_lro_flush_all(&rx->lro); 989 990 gve_rx_post_buffers_dqo(rx, M_NOWAIT); 991 if (gve_is_qpl(priv)) 992 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false); 993 return (work_done == budget); 994 } 995 996 void 997 gve_rx_cleanup_tq_dqo(void *arg, int pending) 998 { 999 struct gve_rx_ring *rx = arg; 1000 struct gve_priv *priv = rx->com.priv; 1001 1002 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1003 return; 1004 1005 if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) { 1006 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 1007 return; 1008 } 1009 1010 gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset, 1011 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1012 } 1013