1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 #include "gve_dqo.h" 34 35 static void 36 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx) 37 { 38 struct gve_rx_buf_dqo *buf; 39 int i; 40 41 if (gve_is_qpl(rx->com.priv)) 42 return; 43 44 for (i = 0; i < rx->dqo.buf_cnt; i++) { 45 buf = &rx->dqo.bufs[i]; 46 if (!buf->mbuf) 47 continue; 48 49 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 50 BUS_DMASYNC_POSTREAD); 51 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 52 m_freem(buf->mbuf); 53 buf->mbuf = NULL; 54 } 55 } 56 57 void 58 gve_rx_free_ring_dqo(struct gve_priv *priv, int i) 59 { 60 struct gve_rx_ring *rx = &priv->rx[i]; 61 struct gve_ring_com *com = &rx->com; 62 int j; 63 64 if (rx->dqo.compl_ring != NULL) { 65 gve_dma_free_coherent(&rx->dqo.compl_ring_mem); 66 rx->dqo.compl_ring = NULL; 67 } 68 69 if (rx->dqo.desc_ring != NULL) { 70 gve_dma_free_coherent(&rx->desc_ring_mem); 71 rx->dqo.desc_ring = NULL; 72 } 73 74 if (rx->dqo.bufs != NULL) { 75 gve_free_rx_mbufs_dqo(rx); 76 77 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) { 78 for (j = 0; j < rx->dqo.buf_cnt; j++) 79 if (rx->dqo.bufs[j].mapped) 80 bus_dmamap_destroy(rx->dqo.buf_dmatag, 81 rx->dqo.bufs[j].dmamap); 82 } 83 84 free(rx->dqo.bufs, M_GVE); 85 rx->dqo.bufs = NULL; 86 } 87 88 if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) 89 bus_dma_tag_destroy(rx->dqo.buf_dmatag); 90 91 if (com->qpl != NULL) { 92 gve_free_qpl(priv, com->qpl); 93 com->qpl = NULL; 94 } 95 } 96 97 int 98 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i) 99 { 100 struct gve_rx_ring *rx = &priv->rx[i]; 101 int err; 102 int j; 103 104 err = gve_dma_alloc_coherent(priv, 105 sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt, 106 CACHE_LINE_SIZE, &rx->desc_ring_mem); 107 if (err != 0) { 108 device_printf(priv->dev, 109 "Failed to alloc desc ring for rx ring %d", i); 110 goto abort; 111 } 112 rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr; 113 rx->dqo.mask = priv->rx_desc_cnt - 1; 114 115 err = gve_dma_alloc_coherent(priv, 116 sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt, 117 CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem); 118 if (err != 0) { 119 device_printf(priv->dev, 120 "Failed to alloc compl ring for rx ring %d", i); 121 goto abort; 122 } 123 rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr; 124 rx->dqo.mask = priv->rx_desc_cnt - 1; 125 126 rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO : 127 priv->rx_desc_cnt; 128 rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo), 129 M_GVE, M_WAITOK | M_ZERO); 130 131 if (gve_is_qpl(priv)) { 132 rx->com.qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues, 133 GVE_RX_NUM_QPL_PAGES_DQO, /*single_kva=*/false); 134 if (rx->com.qpl == NULL) { 135 device_printf(priv->dev, 136 "Failed to alloc QPL for rx ring %d", i); 137 err = ENOMEM; 138 goto abort; 139 } 140 return (0); 141 } 142 143 err = bus_dma_tag_create( 144 bus_get_dma_tag(priv->dev), /* parent */ 145 1, 0, /* alignment, bounds */ 146 BUS_SPACE_MAXADDR, /* lowaddr */ 147 BUS_SPACE_MAXADDR, /* highaddr */ 148 NULL, NULL, /* filter, filterarg */ 149 MCLBYTES, /* maxsize */ 150 1, /* nsegments */ 151 MCLBYTES, /* maxsegsize */ 152 0, /* flags */ 153 NULL, /* lockfunc */ 154 NULL, /* lockarg */ 155 &rx->dqo.buf_dmatag); 156 if (err != 0) { 157 device_printf(priv->dev, 158 "%s: bus_dma_tag_create failed: %d\n", 159 __func__, err); 160 goto abort; 161 } 162 163 for (j = 0; j < rx->dqo.buf_cnt; j++) { 164 err = bus_dmamap_create(rx->dqo.buf_dmatag, 0, 165 &rx->dqo.bufs[j].dmamap); 166 if (err != 0) { 167 device_printf(priv->dev, 168 "err in creating rx buf dmamap %d: %d", 169 j, err); 170 goto abort; 171 } 172 rx->dqo.bufs[j].mapped = true; 173 } 174 175 return (0); 176 177 abort: 178 gve_rx_free_ring_dqo(priv, i); 179 return (err); 180 } 181 182 static void 183 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx) 184 { 185 struct gve_ring_com *com = &rx->com; 186 int entries; 187 int i; 188 189 entries = com->priv->rx_desc_cnt; 190 for (i = 0; i < entries; i++) 191 rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){}; 192 193 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 194 BUS_DMASYNC_PREWRITE); 195 } 196 197 static void 198 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx) 199 { 200 struct gve_ring_com *com = &rx->com; 201 int i; 202 203 for (i = 0; i < com->priv->rx_desc_cnt; i++) 204 rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){}; 205 206 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map, 207 BUS_DMASYNC_PREWRITE); 208 } 209 210 void 211 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i) 212 { 213 struct gve_rx_ring *rx = &priv->rx[i]; 214 int j; 215 216 rx->fill_cnt = 0; 217 rx->cnt = 0; 218 rx->dqo.mask = priv->rx_desc_cnt - 1; 219 rx->dqo.head = 0; 220 rx->dqo.tail = 0; 221 rx->dqo.cur_gen_bit = 0; 222 223 gve_rx_clear_desc_ring_dqo(rx); 224 gve_rx_clear_compl_ring_dqo(rx); 225 226 gve_free_rx_mbufs_dqo(rx); 227 228 if (gve_is_qpl(priv)) { 229 SLIST_INIT(&rx->dqo.free_bufs); 230 STAILQ_INIT(&rx->dqo.used_bufs); 231 232 for (j = 0; j < rx->dqo.buf_cnt; j++) { 233 struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j]; 234 235 vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 236 u_int ref_count = atomic_load_int(&page->ref_count); 237 238 /* 239 * An ifconfig down+up might see pages still in flight 240 * from the previous innings. 241 */ 242 if (VPRC_WIRE_COUNT(ref_count) == 1) 243 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 244 buf, slist_entry); 245 else 246 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, 247 buf, stailq_entry); 248 249 buf->num_nic_frags = 0; 250 buf->next_idx = 0; 251 } 252 } else { 253 SLIST_INIT(&rx->dqo.free_bufs); 254 for (j = 0; j < rx->dqo.buf_cnt; j++) 255 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 256 &rx->dqo.bufs[j], slist_entry); 257 } 258 } 259 260 int 261 gve_rx_intr_dqo(void *arg) 262 { 263 struct gve_rx_ring *rx = arg; 264 struct gve_priv *priv = rx->com.priv; 265 struct gve_ring_com *com = &rx->com; 266 267 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 268 return (FILTER_STRAY); 269 270 /* Interrupts are automatically masked */ 271 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 272 return (FILTER_HANDLED); 273 } 274 275 static void 276 gve_rx_advance_head_dqo(struct gve_rx_ring *rx) 277 { 278 rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask; 279 rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */ 280 281 if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) { 282 bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 283 BUS_DMASYNC_PREWRITE); 284 gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset, 285 rx->dqo.head); 286 } 287 } 288 289 static void 290 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 291 { 292 struct gve_rx_desc_dqo *desc; 293 294 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 295 BUS_DMASYNC_PREREAD); 296 297 desc = &rx->dqo.desc_ring[rx->dqo.head]; 298 desc->buf_id = htole16(buf - rx->dqo.bufs); 299 desc->buf_addr = htole64(buf->addr); 300 301 gve_rx_advance_head_dqo(rx); 302 } 303 304 static int 305 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how) 306 { 307 struct gve_rx_buf_dqo *buf; 308 bus_dma_segment_t segs[1]; 309 int nsegs; 310 int err; 311 312 buf = SLIST_FIRST(&rx->dqo.free_bufs); 313 if (__predict_false(!buf)) { 314 device_printf(rx->com.priv->dev, 315 "Unexpected empty free bufs list\n"); 316 return (ENOBUFS); 317 } 318 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 319 320 buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR); 321 if (__predict_false(!buf->mbuf)) { 322 err = ENOMEM; 323 counter_enter(); 324 counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1); 325 counter_exit(); 326 goto abort_with_buf; 327 } 328 buf->mbuf->m_len = MCLBYTES; 329 330 err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap, 331 buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); 332 KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1")); 333 if (__predict_false(err != 0)) { 334 counter_enter(); 335 counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1); 336 counter_exit(); 337 goto abort_with_mbuf; 338 } 339 buf->addr = segs[0].ds_addr; 340 341 gve_rx_post_buf_dqo(rx, buf); 342 return (0); 343 344 abort_with_mbuf: 345 m_freem(buf->mbuf); 346 buf->mbuf = NULL; 347 abort_with_buf: 348 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 349 return (err); 350 } 351 352 static struct gve_dma_handle * 353 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf) 354 { 355 return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs])); 356 } 357 358 static void 359 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf, 360 uint8_t frag_num) 361 { 362 struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head]; 363 union gve_rx_qpl_buf_id_dqo composed_id; 364 struct gve_dma_handle *page_dma_handle; 365 366 composed_id.buf_id = buf - rx->dqo.bufs; 367 composed_id.frag_num = frag_num; 368 desc->buf_id = htole16(composed_id.all); 369 370 page_dma_handle = gve_get_page_dma_handle(rx, buf); 371 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 372 BUS_DMASYNC_PREREAD); 373 desc->buf_addr = htole64(page_dma_handle->bus_addr + 374 frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 375 376 buf->num_nic_frags++; 377 gve_rx_advance_head_dqo(rx); 378 } 379 380 static void 381 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one) 382 { 383 struct gve_rx_buf_dqo *hol_blocker = NULL; 384 struct gve_rx_buf_dqo *buf; 385 u_int ref_count; 386 vm_page_t page; 387 388 while (true) { 389 buf = STAILQ_FIRST(&rx->dqo.used_bufs); 390 if (__predict_false(buf == NULL)) 391 break; 392 393 page = rx->com.qpl->pages[buf - rx->dqo.bufs]; 394 ref_count = atomic_load_int(&page->ref_count); 395 396 if (VPRC_WIRE_COUNT(ref_count) != 1) { 397 /* Account for one head-of-line blocker */ 398 if (hol_blocker != NULL) 399 break; 400 hol_blocker = buf; 401 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 402 stailq_entry); 403 continue; 404 } 405 406 STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs, 407 stailq_entry); 408 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, 409 buf, slist_entry); 410 if (just_one) 411 break; 412 } 413 414 if (hol_blocker != NULL) 415 STAILQ_INSERT_HEAD(&rx->dqo.used_bufs, 416 hol_blocker, stailq_entry); 417 } 418 419 static int 420 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx) 421 { 422 struct gve_rx_buf_dqo *buf; 423 424 buf = SLIST_FIRST(&rx->dqo.free_bufs); 425 if (__predict_false(buf == NULL)) { 426 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true); 427 buf = SLIST_FIRST(&rx->dqo.free_bufs); 428 if (__predict_false(buf == NULL)) 429 return (ENOBUFS); 430 } 431 432 gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx); 433 if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1) 434 buf->next_idx = 0; 435 else 436 buf->next_idx++; 437 438 /* 439 * We have posted all the frags in this buf to the NIC. 440 * - buf will enter used_bufs once the last completion arrives. 441 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs 442 * when its wire count drops back to 1. 443 */ 444 if (buf->next_idx == 0) 445 SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry); 446 return (0); 447 } 448 449 static void 450 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how) 451 { 452 uint32_t num_pending_bufs; 453 uint32_t num_to_post; 454 uint32_t i; 455 int err; 456 457 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 458 num_to_post = rx->dqo.mask - num_pending_bufs; 459 460 for (i = 0; i < num_to_post; i++) { 461 if (gve_is_qpl(rx->com.priv)) 462 err = gve_rx_post_new_dqo_qpl_buf(rx); 463 else 464 err = gve_rx_post_new_mbuf_dqo(rx, how); 465 if (err) 466 break; 467 } 468 } 469 470 void 471 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx) 472 { 473 gve_rx_post_buffers_dqo(rx, M_WAITOK); 474 } 475 476 static void 477 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp) 478 { 479 switch (ptype->l3_type) { 480 case GVE_L3_TYPE_IPV4: 481 switch (ptype->l4_type) { 482 case GVE_L4_TYPE_TCP: 483 *is_tcp = true; 484 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 485 break; 486 case GVE_L4_TYPE_UDP: 487 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 488 break; 489 default: 490 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 491 } 492 break; 493 case GVE_L3_TYPE_IPV6: 494 switch (ptype->l4_type) { 495 case GVE_L4_TYPE_TCP: 496 *is_tcp = true; 497 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 498 break; 499 case GVE_L4_TYPE_UDP: 500 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 501 break; 502 default: 503 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 504 } 505 break; 506 default: 507 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); 508 } 509 } 510 511 static void 512 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf, 513 struct gve_rx_compl_desc_dqo *desc, 514 struct gve_ptype *ptype) 515 { 516 /* HW did not identify and process L3 and L4 headers. */ 517 if (__predict_false(!desc->l3_l4_processed)) 518 return; 519 520 if (ptype->l3_type == GVE_L3_TYPE_IPV4) { 521 if (__predict_false(desc->csum_ip_err || 522 desc->csum_external_ip_err)) 523 return; 524 } else if (ptype->l3_type == GVE_L3_TYPE_IPV6) { 525 /* Checksum should be skipped if this flag is set. */ 526 if (__predict_false(desc->ipv6_ex_add)) 527 return; 528 } 529 530 if (__predict_false(desc->csum_l4_err)) 531 return; 532 533 switch (ptype->l4_type) { 534 case GVE_L4_TYPE_TCP: 535 case GVE_L4_TYPE_UDP: 536 case GVE_L4_TYPE_ICMP: 537 case GVE_L4_TYPE_SCTP: 538 mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 539 CSUM_IP_VALID | 540 CSUM_DATA_VALID | 541 CSUM_PSEUDO_HDR; 542 mbuf->m_pkthdr.csum_data = 0xffff; 543 break; 544 default: 545 break; 546 } 547 } 548 549 static void 550 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx, 551 struct gve_rx_compl_desc_dqo *compl_desc) 552 { 553 struct mbuf *mbuf = rx->ctx.mbuf_head; 554 if_t ifp = rx->com.priv->ifp; 555 struct gve_ptype *ptype; 556 bool do_if_input = true; 557 bool is_tcp = false; 558 559 ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type]; 560 gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp); 561 mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash); 562 gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype); 563 564 mbuf->m_pkthdr.rcvif = ifp; 565 mbuf->m_pkthdr.len = rx->ctx.total_size; 566 567 if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) && 568 is_tcp && 569 (rx->lro.lro_cnt != 0) && 570 (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 571 do_if_input = false; 572 573 if (do_if_input) 574 if_input(ifp, mbuf); 575 576 counter_enter(); 577 counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size); 578 counter_u64_add_protected(rx->stats.rpackets, 1); 579 counter_exit(); 580 581 rx->ctx = (struct gve_rx_ctx){}; 582 } 583 584 static int 585 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va, 586 struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len) 587 { 588 struct mbuf *mbuf; 589 590 mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR); 591 if (__predict_false(mbuf == NULL)) 592 return (ENOMEM); 593 594 counter_enter(); 595 counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 596 counter_exit(); 597 598 m_copyback(mbuf, 0, frag_len, va); 599 mbuf->m_len = frag_len; 600 601 rx->ctx.mbuf_head = mbuf; 602 rx->ctx.mbuf_tail = mbuf; 603 rx->ctx.total_size += frag_len; 604 605 gve_rx_input_mbuf_dqo(rx, compl_desc); 606 return (0); 607 } 608 609 static void 610 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 611 struct gve_rx_compl_desc_dqo *compl_desc, 612 int *work_done) 613 { 614 bool is_last_frag = compl_desc->end_of_packet != 0; 615 struct gve_rx_ctx *ctx = &rx->ctx; 616 struct gve_rx_buf_dqo *buf; 617 uint32_t num_pending_bufs; 618 uint16_t frag_len; 619 uint16_t buf_id; 620 int err; 621 622 buf_id = le16toh(compl_desc->buf_id); 623 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 624 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 625 buf_id, rx->com.id); 626 gve_schedule_reset(priv); 627 goto drop_frag_clear_ctx; 628 } 629 buf = &rx->dqo.bufs[buf_id]; 630 if (__predict_false(buf->mbuf == NULL)) { 631 device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n", 632 buf_id, rx->com.id); 633 gve_schedule_reset(priv); 634 goto drop_frag_clear_ctx; 635 } 636 637 if (__predict_false(ctx->drop_pkt)) 638 goto drop_frag; 639 640 if (__predict_false(compl_desc->rx_error)) { 641 counter_enter(); 642 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 643 counter_exit(); 644 goto drop_frag; 645 } 646 647 bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap, 648 BUS_DMASYNC_POSTREAD); 649 650 frag_len = compl_desc->packet_len; 651 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 652 err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*), 653 compl_desc, frag_len); 654 if (__predict_false(err != 0)) 655 goto drop_frag; 656 (*work_done)++; 657 gve_rx_post_buf_dqo(rx, buf); 658 return; 659 } 660 661 /* 662 * Although buffer completions may arrive out of order, buffer 663 * descriptors are consumed by the NIC in order. That is, the 664 * buffer at desc_ring[tail] might not be the buffer we got the 665 * completion compl_ring[tail] for: but we know that desc_ring[tail] 666 * has already been read by the NIC. 667 */ 668 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 669 670 /* 671 * For every fragment received, try to post a new buffer. 672 * 673 * Failures are okay but only so long as the number of outstanding 674 * buffers is above a threshold. 675 * 676 * Beyond that we drop new packets to reuse their buffers. 677 * Without ensuring a minimum number of buffers for the NIC to 678 * put packets in, we run the risk of getting the queue stuck 679 * for good. 680 */ 681 err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT); 682 if (__predict_false(err != 0 && 683 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 684 counter_enter(); 685 counter_u64_add_protected( 686 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 687 counter_exit(); 688 goto drop_frag; 689 } 690 691 buf->mbuf->m_len = frag_len; 692 ctx->total_size += frag_len; 693 if (ctx->mbuf_tail == NULL) { 694 ctx->mbuf_head = buf->mbuf; 695 ctx->mbuf_tail = buf->mbuf; 696 } else { 697 buf->mbuf->m_flags &= ~M_PKTHDR; 698 ctx->mbuf_tail->m_next = buf->mbuf; 699 ctx->mbuf_tail = buf->mbuf; 700 } 701 702 /* 703 * Disassociate the mbuf from buf and surrender buf to the free list to 704 * be used by a future mbuf. 705 */ 706 bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap); 707 buf->mbuf = NULL; 708 buf->addr = 0; 709 SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry); 710 711 if (is_last_frag) { 712 gve_rx_input_mbuf_dqo(rx, compl_desc); 713 (*work_done)++; 714 } 715 return; 716 717 drop_frag: 718 /* Clear the earlier frags if there were any */ 719 m_freem(ctx->mbuf_head); 720 rx->ctx = (struct gve_rx_ctx){}; 721 /* Drop the rest of the pkt if there are more frags */ 722 ctx->drop_pkt = true; 723 /* Reuse the dropped frag's buffer */ 724 gve_rx_post_buf_dqo(rx, buf); 725 726 if (is_last_frag) 727 goto drop_frag_clear_ctx; 728 return; 729 730 drop_frag_clear_ctx: 731 counter_enter(); 732 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 733 counter_exit(); 734 m_freem(ctx->mbuf_head); 735 rx->ctx = (struct gve_rx_ctx){}; 736 } 737 738 static void * 739 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx, 740 struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num) 741 { 742 int page_idx = buf - rx->dqo.bufs; 743 void *va = rx->com.qpl->dmas[page_idx].cpu_addr; 744 745 va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 746 return (va); 747 } 748 749 static int 750 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx, 751 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 752 uint8_t buf_frag_num, uint16_t frag_len) 753 { 754 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 755 struct mbuf *mbuf; 756 757 if (ctx->mbuf_tail == NULL) { 758 mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 759 if (mbuf == NULL) 760 return (ENOMEM); 761 ctx->mbuf_head = mbuf; 762 ctx->mbuf_tail = mbuf; 763 } else { 764 mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); 765 if (mbuf == NULL) 766 return (ENOMEM); 767 ctx->mbuf_tail->m_next = mbuf; 768 ctx->mbuf_tail = mbuf; 769 } 770 771 mbuf->m_len = frag_len; 772 ctx->total_size += frag_len; 773 774 m_copyback(mbuf, 0, frag_len, va); 775 counter_enter(); 776 counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 777 counter_exit(); 778 return (0); 779 } 780 781 static int 782 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx, 783 struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf, 784 uint8_t buf_frag_num, uint16_t frag_len) 785 { 786 struct mbuf *mbuf; 787 void *page_addr; 788 vm_page_t page; 789 int page_idx; 790 void *va; 791 792 if (ctx->mbuf_tail == NULL) { 793 mbuf = m_gethdr(M_NOWAIT, MT_DATA); 794 if (mbuf == NULL) 795 return (ENOMEM); 796 ctx->mbuf_head = mbuf; 797 ctx->mbuf_tail = mbuf; 798 } else { 799 mbuf = m_get(M_NOWAIT, MT_DATA); 800 if (mbuf == NULL) 801 return (ENOMEM); 802 ctx->mbuf_tail->m_next = mbuf; 803 ctx->mbuf_tail = mbuf; 804 } 805 806 mbuf->m_len = frag_len; 807 ctx->total_size += frag_len; 808 809 page_idx = buf - rx->dqo.bufs; 810 page = rx->com.qpl->pages[page_idx]; 811 page_addr = rx->com.qpl->dmas[page_idx].cpu_addr; 812 va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE); 813 814 /* 815 * Grab an extra ref to the page so that gve_mextadd_free 816 * does not end up freeing the page while the interface exists. 817 */ 818 vm_page_wire(page); 819 820 counter_enter(); 821 counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 822 counter_exit(); 823 824 MEXTADD(mbuf, va, frag_len, 825 gve_mextadd_free, page, page_addr, 826 0, EXT_NET_DRV); 827 return (0); 828 } 829 830 static void 831 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx, 832 struct gve_rx_compl_desc_dqo *compl_desc, 833 int *work_done) 834 { 835 bool is_last_frag = compl_desc->end_of_packet != 0; 836 union gve_rx_qpl_buf_id_dqo composed_id; 837 struct gve_dma_handle *page_dma_handle; 838 struct gve_rx_ctx *ctx = &rx->ctx; 839 struct gve_rx_buf_dqo *buf; 840 uint32_t num_pending_bufs; 841 uint8_t buf_frag_num; 842 uint16_t frag_len; 843 uint16_t buf_id; 844 int err; 845 846 composed_id.all = le16toh(compl_desc->buf_id); 847 buf_id = composed_id.buf_id; 848 buf_frag_num = composed_id.frag_num; 849 850 if (__predict_false(buf_id >= rx->dqo.buf_cnt)) { 851 device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n", 852 buf_id, rx->com.id); 853 gve_schedule_reset(priv); 854 goto drop_frag_clear_ctx; 855 } 856 buf = &rx->dqo.bufs[buf_id]; 857 if (__predict_false(buf->num_nic_frags == 0 || 858 buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) { 859 device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d " 860 "with buf_frag_num %d and num_nic_frags %d, issuing reset\n", 861 buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags); 862 gve_schedule_reset(priv); 863 goto drop_frag_clear_ctx; 864 } 865 866 buf->num_nic_frags--; 867 868 if (__predict_false(ctx->drop_pkt)) 869 goto drop_frag; 870 871 if (__predict_false(compl_desc->rx_error)) { 872 counter_enter(); 873 counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 874 counter_exit(); 875 goto drop_frag; 876 } 877 878 page_dma_handle = gve_get_page_dma_handle(rx, buf); 879 bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 880 BUS_DMASYNC_POSTREAD); 881 882 frag_len = compl_desc->packet_len; 883 if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) { 884 void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num); 885 886 err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len); 887 if (__predict_false(err != 0)) 888 goto drop_frag; 889 (*work_done)++; 890 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 891 return; 892 } 893 894 num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask; 895 err = gve_rx_post_new_dqo_qpl_buf(rx); 896 if (__predict_false(err != 0 && 897 num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) { 898 /* 899 * Resort to copying this fragment into a cluster mbuf 900 * when the above threshold is breached and repost the 901 * incoming buffer. If we cannot find cluster mbufs, 902 * just drop the packet (to repost its buffer). 903 */ 904 err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf, 905 buf_frag_num, frag_len); 906 if (err != 0) { 907 counter_enter(); 908 counter_u64_add_protected( 909 rx->stats.rx_dropped_pkt_buf_post_fail, 1); 910 counter_exit(); 911 goto drop_frag; 912 } 913 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 914 } else { 915 err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf, 916 buf_frag_num, frag_len); 917 if (__predict_false(err != 0)) { 918 counter_enter(); 919 counter_u64_add_protected( 920 rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 921 counter_exit(); 922 goto drop_frag; 923 } 924 } 925 926 /* 927 * Both the counts need to be checked. 928 * 929 * num_nic_frags == 0 implies no pending completions 930 * but not all frags may have yet been posted. 931 * 932 * next_idx == 0 implies all frags have been posted 933 * but there might be pending completions. 934 */ 935 if (buf->num_nic_frags == 0 && buf->next_idx == 0) 936 STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry); 937 938 if (is_last_frag) { 939 gve_rx_input_mbuf_dqo(rx, compl_desc); 940 (*work_done)++; 941 } 942 return; 943 944 drop_frag: 945 /* Clear the earlier frags if there were any */ 946 m_freem(ctx->mbuf_head); 947 rx->ctx = (struct gve_rx_ctx){}; 948 /* Drop the rest of the pkt if there are more frags */ 949 ctx->drop_pkt = true; 950 /* Reuse the dropped frag's buffer */ 951 gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num); 952 953 if (is_last_frag) 954 goto drop_frag_clear_ctx; 955 return; 956 957 drop_frag_clear_ctx: 958 counter_enter(); 959 counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 960 counter_exit(); 961 m_freem(ctx->mbuf_head); 962 rx->ctx = (struct gve_rx_ctx){}; 963 } 964 965 static uint8_t 966 gve_rx_get_gen_bit(uint8_t *desc) 967 { 968 uint8_t byte; 969 970 /* 971 * Prevent generation bit from being read after the rest of the 972 * descriptor. 973 */ 974 byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET); 975 return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0); 976 } 977 978 static bool 979 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 980 { 981 struct gve_rx_compl_desc_dqo *compl_desc; 982 uint32_t work_done = 0; 983 984 NET_EPOCH_ASSERT(); 985 986 while (work_done < budget) { 987 bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, 988 rx->dqo.compl_ring_mem.map, 989 BUS_DMASYNC_POSTREAD); 990 991 compl_desc = &rx->dqo.compl_ring[rx->dqo.tail]; 992 if (gve_rx_get_gen_bit((uint8_t *)compl_desc) == 993 rx->dqo.cur_gen_bit) 994 break; 995 996 rx->cnt++; 997 rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask; 998 rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0); 999 1000 if (gve_is_qpl(priv)) 1001 gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done); 1002 else 1003 gve_rx_dqo(priv, rx, compl_desc, &work_done); 1004 } 1005 1006 if (work_done != 0) 1007 tcp_lro_flush_all(&rx->lro); 1008 1009 gve_rx_post_buffers_dqo(rx, M_NOWAIT); 1010 if (gve_is_qpl(priv)) 1011 gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false); 1012 return (work_done == budget); 1013 } 1014 1015 void 1016 gve_rx_cleanup_tq_dqo(void *arg, int pending) 1017 { 1018 struct gve_rx_ring *rx = arg; 1019 struct gve_priv *priv = rx->com.priv; 1020 1021 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1022 return; 1023 1024 if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) { 1025 taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 1026 return; 1027 } 1028 1029 gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset, 1030 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1031 } 1032