1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_inet6.h" 33 34 #include "gve.h" 35 #include "gve_dqo.h" 36 37 static void 38 gve_unmap_packet(struct gve_tx_ring *tx, 39 struct gve_tx_pending_pkt_dqo *pending_pkt) 40 { 41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap, 42 BUS_DMASYNC_POSTWRITE); 43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap); 44 } 45 46 static void 47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt) 48 { 49 pending_pkt->qpl_buf_head = -1; 50 pending_pkt->num_qpl_bufs = 0; 51 } 52 53 static void 54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx) 55 { 56 struct gve_tx_pending_pkt_dqo *pending_pkt; 57 int i; 58 59 for (i = 0; i < tx->dqo.num_pending_pkts; i++) { 60 pending_pkt = &tx->dqo.pending_pkts[i]; 61 if (!pending_pkt->mbuf) 62 continue; 63 64 if (gve_is_qpl(tx->com.priv)) 65 gve_clear_qpl_pending_pkt(pending_pkt); 66 else 67 gve_unmap_packet(tx, pending_pkt); 68 69 m_freem(pending_pkt->mbuf); 70 pending_pkt->mbuf = NULL; 71 } 72 } 73 74 void 75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i) 76 { 77 struct gve_tx_ring *tx = &priv->tx[i]; 78 struct gve_ring_com *com = &tx->com; 79 int j; 80 81 if (tx->dqo.desc_ring != NULL) { 82 gve_dma_free_coherent(&tx->desc_ring_mem); 83 tx->dqo.desc_ring = NULL; 84 } 85 86 if (tx->dqo.compl_ring != NULL) { 87 gve_dma_free_coherent(&tx->dqo.compl_ring_mem); 88 tx->dqo.compl_ring = NULL; 89 } 90 91 if (tx->dqo.pending_pkts != NULL) { 92 gve_free_tx_mbufs_dqo(tx); 93 94 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) { 95 for (j = 0; j < tx->dqo.num_pending_pkts; j++) 96 if (tx->dqo.pending_pkts[j].state != 97 GVE_PACKET_STATE_UNALLOCATED) 98 bus_dmamap_destroy(tx->dqo.buf_dmatag, 99 tx->dqo.pending_pkts[j].dmamap); 100 } 101 102 free(tx->dqo.pending_pkts, M_GVE); 103 tx->dqo.pending_pkts = NULL; 104 } 105 106 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) 107 bus_dma_tag_destroy(tx->dqo.buf_dmatag); 108 109 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) { 110 free(tx->dqo.qpl_bufs, M_GVE); 111 tx->dqo.qpl_bufs = NULL; 112 } 113 114 if (com->qpl != NULL) { 115 gve_free_qpl(priv, com->qpl); 116 com->qpl = NULL; 117 } 118 } 119 120 static int 121 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx) 122 { 123 struct gve_priv *priv = tx->com.priv; 124 int err; 125 int j; 126 127 /* 128 * DMA tag for mapping Tx mbufs 129 * The maxsize, nsegments, and maxsegsize params should match 130 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c. 131 */ 132 err = bus_dma_tag_create( 133 bus_get_dma_tag(priv->dev), /* parent */ 134 1, 0, /* alignment, bounds */ 135 BUS_SPACE_MAXADDR, /* lowaddr */ 136 BUS_SPACE_MAXADDR, /* highaddr */ 137 NULL, NULL, /* filter, filterarg */ 138 GVE_TSO_MAXSIZE_DQO, /* maxsize */ 139 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */ 140 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */ 141 BUS_DMA_ALLOCNOW, /* flags */ 142 NULL, /* lockfunc */ 143 NULL, /* lockarg */ 144 &tx->dqo.buf_dmatag); 145 if (err != 0) { 146 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n", 147 __func__, err); 148 return (err); 149 } 150 151 for (j = 0; j < tx->dqo.num_pending_pkts; j++) { 152 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0, 153 &tx->dqo.pending_pkts[j].dmamap); 154 if (err != 0) { 155 device_printf(priv->dev, 156 "err in creating pending pkt dmamap %d: %d", 157 j, err); 158 return (err); 159 } 160 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 161 } 162 163 return (0); 164 } 165 166 int 167 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i) 168 { 169 struct gve_tx_ring *tx = &priv->tx[i]; 170 uint16_t num_pending_pkts; 171 int err; 172 173 /* Descriptor ring */ 174 err = gve_dma_alloc_coherent(priv, 175 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt, 176 CACHE_LINE_SIZE, &tx->desc_ring_mem); 177 if (err != 0) { 178 device_printf(priv->dev, 179 "Failed to alloc desc ring for tx ring %d", i); 180 goto abort; 181 } 182 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr; 183 184 /* Completion ring */ 185 err = gve_dma_alloc_coherent(priv, 186 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt, 187 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem); 188 if (err != 0) { 189 device_printf(priv->dev, 190 "Failed to alloc compl ring for tx ring %d", i); 191 goto abort; 192 } 193 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr; 194 195 /* 196 * pending_pkts array 197 * 198 * The max number of pending packets determines the maximum number of 199 * descriptors which maybe written to the completion queue. 200 * 201 * We must set the number small enough to make sure we never overrun the 202 * completion queue. 203 */ 204 num_pending_pkts = priv->tx_desc_cnt; 205 /* 206 * Reserve space for descriptor completions, which will be reported at 207 * most every GVE_TX_MIN_RE_INTERVAL packets. 208 */ 209 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL; 210 211 tx->dqo.num_pending_pkts = num_pending_pkts; 212 tx->dqo.pending_pkts = malloc( 213 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts, 214 M_GVE, M_WAITOK | M_ZERO); 215 216 if (gve_is_qpl(priv)) { 217 int qpl_buf_cnt; 218 219 tx->com.qpl = gve_alloc_qpl(priv, i, GVE_TX_NUM_QPL_PAGES_DQO, 220 /*single_kva*/false); 221 if (tx->com.qpl == NULL) { 222 device_printf(priv->dev, 223 "Failed to alloc QPL for tx ring %d", i); 224 err = ENOMEM; 225 goto abort; 226 } 227 228 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 229 tx->com.qpl->num_pages; 230 231 tx->dqo.qpl_bufs = malloc( 232 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt, 233 M_GVE, M_WAITOK | M_ZERO); 234 } else 235 gve_tx_alloc_rda_fields_dqo(tx); 236 return (0); 237 238 abort: 239 gve_tx_free_ring_dqo(priv, i); 240 return (err); 241 } 242 243 static void 244 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf, 245 struct gve_tx_metadata_dqo *metadata) 246 { 247 uint32_t hash = mbuf->m_pkthdr.flowid; 248 uint16_t path_hash; 249 250 metadata->version = GVE_TX_METADATA_VERSION_DQO; 251 if (hash) { 252 path_hash = hash ^ (hash >> 16); 253 254 path_hash &= (1 << 15) - 1; 255 if (__predict_false(path_hash == 0)) 256 path_hash = ~path_hash; 257 258 metadata->path_hash = path_hash; 259 } 260 } 261 262 static void 263 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, 264 uint32_t *desc_idx, uint32_t len, uint64_t addr, 265 int16_t compl_tag, bool eop, bool csum_enabled) 266 { 267 while (len > 0) { 268 struct gve_tx_pkt_desc_dqo *desc = 269 &tx->dqo.desc_ring[*desc_idx].pkt; 270 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO); 271 bool cur_eop = eop && cur_len == len; 272 273 *desc = (struct gve_tx_pkt_desc_dqo){ 274 .buf_addr = htole64(addr), 275 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, 276 .end_of_packet = cur_eop, 277 .checksum_offload_enable = csum_enabled, 278 .compl_tag = htole16(compl_tag), 279 .buf_size = cur_len, 280 }; 281 282 addr += cur_len; 283 len -= cur_len; 284 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 285 } 286 } 287 288 static void 289 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, 290 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata, 291 int header_len) 292 { 293 *desc = (struct gve_tx_tso_context_desc_dqo){ 294 .header_len = header_len, 295 .cmd_dtype = { 296 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, 297 .tso = 1, 298 }, 299 .flex0 = metadata->bytes[0], 300 .flex5 = metadata->bytes[5], 301 .flex6 = metadata->bytes[6], 302 .flex7 = metadata->bytes[7], 303 .flex8 = metadata->bytes[8], 304 .flex9 = metadata->bytes[9], 305 .flex10 = metadata->bytes[10], 306 .flex11 = metadata->bytes[11], 307 }; 308 desc->tso_total_len = mbuf->m_pkthdr.len - header_len; 309 desc->mss = mbuf->m_pkthdr.tso_segsz; 310 } 311 312 static void 313 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, 314 const struct gve_tx_metadata_dqo *metadata) 315 { 316 *desc = (struct gve_tx_general_context_desc_dqo){ 317 .flex0 = metadata->bytes[0], 318 .flex1 = metadata->bytes[1], 319 .flex2 = metadata->bytes[2], 320 .flex3 = metadata->bytes[3], 321 .flex4 = metadata->bytes[4], 322 .flex5 = metadata->bytes[5], 323 .flex6 = metadata->bytes[6], 324 .flex7 = metadata->bytes[7], 325 .flex8 = metadata->bytes[8], 326 .flex9 = metadata->bytes[9], 327 .flex10 = metadata->bytes[10], 328 .flex11 = metadata->bytes[11], 329 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, 330 }; 331 } 332 333 #define PULLUP_HDR(m, len) \ 334 do { \ 335 if (__predict_false((m)->m_len < (len))) { \ 336 (m) = m_pullup((m), (len)); \ 337 if ((m) == NULL) \ 338 return (EINVAL); \ 339 } \ 340 } while (0) 341 342 static int 343 gve_prep_tso(struct mbuf *mbuf, int *header_len) 344 { 345 uint8_t l3_off, l4_off = 0; 346 struct ether_header *eh; 347 struct tcphdr *th; 348 u_short csum; 349 350 PULLUP_HDR(mbuf, sizeof(*eh)); 351 eh = mtod(mbuf, struct ether_header *); 352 KASSERT(eh->ether_type != ETHERTYPE_VLAN, 353 ("VLAN-tagged packets not supported")); 354 l3_off = ETHER_HDR_LEN; 355 356 #ifdef INET6 357 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) { 358 struct ip6_hdr *ip6; 359 360 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6)); 361 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off)); 362 l4_off = l3_off + sizeof(struct ip6_hdr); 363 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP, 364 /*csum=*/0); 365 } else 366 #endif 367 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 368 struct ip *ip; 369 370 PULLUP_HDR(mbuf, l3_off + sizeof(*ip)); 371 ip = (struct ip *)(mtodo(mbuf, l3_off)); 372 l4_off = l3_off + (ip->ip_hl << 2); 373 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 374 htons(IPPROTO_TCP)); 375 } 376 377 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *)); 378 th = (struct tcphdr *)(mtodo(mbuf, l4_off)); 379 *header_len = l4_off + (th->th_off << 2); 380 381 /* 382 * Hardware requires the th->th_sum to not include the TCP payload, 383 * hence we recompute the csum with it excluded. 384 */ 385 th->th_sum = csum; 386 387 return (0); 388 } 389 390 static int 391 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf, 392 bool is_tso, uint32_t *desc_idx) 393 { 394 struct gve_tx_general_context_desc_dqo *gen_desc; 395 struct gve_tx_tso_context_desc_dqo *tso_desc; 396 struct gve_tx_metadata_dqo metadata; 397 int header_len; 398 int err; 399 400 metadata = (struct gve_tx_metadata_dqo){0}; 401 gve_extract_tx_metadata_dqo(mbuf, &metadata); 402 403 if (is_tso) { 404 err = gve_prep_tso(mbuf, &header_len); 405 if (__predict_false(err)) { 406 counter_enter(); 407 counter_u64_add_protected( 408 tx->stats.tx_delayed_pkt_tsoerr, 1); 409 counter_exit(); 410 return (err); 411 } 412 413 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx; 414 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len); 415 416 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 417 counter_enter(); 418 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1); 419 counter_exit(); 420 } 421 422 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx; 423 gve_tx_fill_general_ctx_desc(gen_desc, &metadata); 424 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 425 return (0); 426 } 427 428 static int 429 gve_map_mbuf_dqo(struct gve_tx_ring *tx, 430 struct mbuf **mbuf, bus_dmamap_t dmamap, 431 bus_dma_segment_t *segs, int *nsegs, int attempt) 432 { 433 struct mbuf *m_new = NULL; 434 int err; 435 436 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap, 437 *mbuf, segs, nsegs, BUS_DMA_NOWAIT); 438 439 switch (err) { 440 case __predict_true(0): 441 break; 442 case EFBIG: 443 if (__predict_false(attempt > 0)) 444 goto abort; 445 446 counter_enter(); 447 counter_u64_add_protected( 448 tx->stats.tx_mbuf_collapse, 1); 449 counter_exit(); 450 451 /* Try m_collapse before m_defrag */ 452 m_new = m_collapse(*mbuf, M_NOWAIT, 453 GVE_TX_MAX_DATA_DESCS_DQO); 454 if (m_new == NULL) { 455 counter_enter(); 456 counter_u64_add_protected( 457 tx->stats.tx_mbuf_defrag, 1); 458 counter_exit(); 459 m_new = m_defrag(*mbuf, M_NOWAIT); 460 } 461 462 if (__predict_false(m_new == NULL)) { 463 counter_enter(); 464 counter_u64_add_protected( 465 tx->stats.tx_mbuf_defrag_err, 1); 466 counter_exit(); 467 468 m_freem(*mbuf); 469 *mbuf = NULL; 470 err = ENOMEM; 471 goto abort; 472 } else { 473 *mbuf = m_new; 474 return (gve_map_mbuf_dqo(tx, mbuf, dmamap, 475 segs, nsegs, ++attempt)); 476 } 477 case ENOMEM: 478 counter_enter(); 479 counter_u64_add_protected( 480 tx->stats.tx_mbuf_dmamap_enomem_err, 1); 481 counter_exit(); 482 goto abort; 483 default: 484 goto abort; 485 } 486 487 return (0); 488 489 abort: 490 counter_enter(); 491 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1); 492 counter_exit(); 493 return (err); 494 } 495 496 static uint32_t 497 num_avail_desc_ring_slots(const struct gve_tx_ring *tx) 498 { 499 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) & 500 tx->dqo.desc_mask; 501 502 return (tx->dqo.desc_mask - num_used); 503 } 504 505 static struct gve_tx_pending_pkt_dqo * 506 gve_alloc_pending_packet(struct gve_tx_ring *tx) 507 { 508 int32_t index = tx->dqo.free_pending_pkts_csm; 509 struct gve_tx_pending_pkt_dqo *pending_pkt; 510 511 /* 512 * No pending packets available in the consumer list, 513 * try to steal the producer list. 514 */ 515 if (__predict_false(index == -1)) { 516 tx->dqo.free_pending_pkts_csm = atomic_swap_32( 517 &tx->dqo.free_pending_pkts_prd, -1); 518 519 index = tx->dqo.free_pending_pkts_csm; 520 if (__predict_false(index == -1)) 521 return (NULL); 522 } 523 524 pending_pkt = &tx->dqo.pending_pkts[index]; 525 526 /* Remove pending_pkt from the consumer list */ 527 tx->dqo.free_pending_pkts_csm = pending_pkt->next; 528 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; 529 530 gve_set_timestamp(&pending_pkt->enqueue_time_sec); 531 532 return (pending_pkt); 533 } 534 535 static void 536 gve_free_pending_packet(struct gve_tx_ring *tx, 537 struct gve_tx_pending_pkt_dqo *pending_pkt) 538 { 539 int index = pending_pkt - tx->dqo.pending_pkts; 540 int32_t old_head; 541 542 pending_pkt->state = GVE_PACKET_STATE_FREE; 543 544 gve_invalidate_timestamp(&pending_pkt->enqueue_time_sec); 545 546 /* Add pending_pkt to the producer list */ 547 while (true) { 548 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd); 549 550 pending_pkt->next = old_head; 551 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd, 552 old_head, index)) 553 break; 554 } 555 } 556 557 /* 558 * Has the side-effect of retrieving the value of the last desc index 559 * processed by the NIC. hw_tx_head is written to by the completions-processing 560 * taskqueue upon receiving descriptor-completions. 561 */ 562 static bool 563 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs) 564 { 565 if (needed_descs <= num_avail_desc_ring_slots(tx)) 566 return (true); 567 568 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head); 569 if (needed_descs > num_avail_desc_ring_slots(tx)) { 570 counter_enter(); 571 counter_u64_add_protected( 572 tx->stats.tx_delayed_pkt_nospace_descring, 1); 573 counter_exit(); 574 return (false); 575 } 576 577 return (0); 578 } 579 580 static void 581 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx) 582 { 583 uint32_t last_report_event_interval; 584 uint32_t last_desc_idx; 585 586 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask; 587 last_report_event_interval = 588 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask; 589 590 if (__predict_false(last_report_event_interval >= 591 GVE_TX_MIN_RE_INTERVAL)) { 592 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true; 593 tx->dqo.last_re_idx = last_desc_idx; 594 } 595 } 596 597 static bool 598 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs) 599 { 600 uint32_t available = tx->dqo.qpl_bufs_produced_cached - 601 tx->dqo.qpl_bufs_consumed; 602 603 if (__predict_true(available >= num_bufs)) 604 return (true); 605 606 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32( 607 &tx->dqo.qpl_bufs_produced); 608 available = tx->dqo.qpl_bufs_produced_cached - 609 tx->dqo.qpl_bufs_consumed; 610 611 if (__predict_true(available >= num_bufs)) 612 return (true); 613 return (false); 614 } 615 616 static int32_t 617 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx) 618 { 619 int32_t buf = tx->dqo.free_qpl_bufs_csm; 620 621 if (__predict_false(buf == -1)) { 622 tx->dqo.free_qpl_bufs_csm = atomic_swap_32( 623 &tx->dqo.free_qpl_bufs_prd, -1); 624 buf = tx->dqo.free_qpl_bufs_csm; 625 if (__predict_false(buf == -1)) 626 return (-1); 627 } 628 629 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf]; 630 tx->dqo.qpl_bufs_consumed++; 631 return (buf); 632 } 633 634 /* 635 * Tx buffer i corresponds to 636 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO 637 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO 638 */ 639 static void 640 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx, 641 int32_t index, void **va, bus_addr_t *dma_addr) 642 { 643 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 644 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << 645 GVE_TX_BUF_SHIFT_DQO; 646 647 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset; 648 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset; 649 } 650 651 static struct gve_dma_handle * 652 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index) 653 { 654 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 655 656 return (&tx->com.qpl->dmas[page_id]); 657 } 658 659 static void 660 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx, 661 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt, 662 bool csum_enabled, int16_t completion_tag, 663 uint32_t *desc_idx) 664 { 665 int32_t pkt_len = mbuf->m_pkthdr.len; 666 struct gve_dma_handle *dma; 667 uint32_t copy_offset = 0; 668 int32_t prev_buf = -1; 669 uint32_t copy_len; 670 bus_addr_t addr; 671 int32_t buf; 672 void *va; 673 674 MPASS(pkt->num_qpl_bufs == 0); 675 MPASS(pkt->qpl_buf_head == -1); 676 677 while (copy_offset < pkt_len) { 678 buf = gve_tx_alloc_qpl_buf(tx); 679 /* We already checked for availability */ 680 MPASS(buf != -1); 681 682 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr); 683 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset); 684 m_copydata(mbuf, copy_offset, copy_len, va); 685 copy_offset += copy_len; 686 687 dma = gve_get_page_dma_handle(tx, buf); 688 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); 689 690 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, 691 copy_len, addr, completion_tag, 692 /*eop=*/copy_offset == pkt_len, 693 csum_enabled); 694 695 /* Link all the qpl bufs for a packet */ 696 if (prev_buf == -1) 697 pkt->qpl_buf_head = buf; 698 else 699 tx->dqo.qpl_bufs[prev_buf] = buf; 700 701 prev_buf = buf; 702 pkt->num_qpl_bufs++; 703 } 704 705 tx->dqo.qpl_bufs[buf] = -1; 706 } 707 708 int 709 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf) 710 { 711 uint32_t desc_idx = tx->dqo.desc_tail; 712 struct gve_tx_pending_pkt_dqo *pkt; 713 int total_descs_needed; 714 int16_t completion_tag; 715 bool has_csum_flag; 716 int csum_flags; 717 bool is_tso; 718 int nsegs; 719 int err; 720 721 csum_flags = mbuf->m_pkthdr.csum_flags; 722 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 723 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 724 is_tso = csum_flags & CSUM_TSO; 725 726 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO); 727 /* Check if we have enough room in the desc ring */ 728 total_descs_needed = 1 + /* general_ctx_desc */ 729 nsegs + /* pkt_desc */ 730 (is_tso ? 1 : 0); /* tso_ctx_desc */ 731 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 732 return (ENOBUFS); 733 734 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) { 735 counter_enter(); 736 counter_u64_add_protected( 737 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1); 738 counter_exit(); 739 return (ENOBUFS); 740 } 741 742 pkt = gve_alloc_pending_packet(tx); 743 if (pkt == NULL) { 744 counter_enter(); 745 counter_u64_add_protected( 746 tx->stats.tx_delayed_pkt_nospace_compring, 1); 747 counter_exit(); 748 return (ENOBUFS); 749 } 750 completion_tag = pkt - tx->dqo.pending_pkts; 751 pkt->mbuf = mbuf; 752 753 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 754 if (err) 755 goto abort; 756 757 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt, 758 has_csum_flag, completion_tag, &desc_idx); 759 760 /* Remember the index of the last desc written */ 761 tx->dqo.desc_tail = desc_idx; 762 763 /* 764 * Request a descriptor completion on the last descriptor of the 765 * packet if we are allowed to by the HW enforced interval. 766 */ 767 gve_tx_request_desc_compl(tx, desc_idx); 768 769 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 770 return (0); 771 772 abort: 773 pkt->mbuf = NULL; 774 gve_free_pending_packet(tx, pkt); 775 return (err); 776 } 777 778 int 779 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr) 780 { 781 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO]; 782 uint32_t desc_idx = tx->dqo.desc_tail; 783 struct gve_tx_pending_pkt_dqo *pkt; 784 struct mbuf *mbuf = *mbuf_ptr; 785 int total_descs_needed; 786 int16_t completion_tag; 787 bool has_csum_flag; 788 int csum_flags; 789 bool is_tso; 790 int nsegs; 791 int err; 792 int i; 793 794 csum_flags = mbuf->m_pkthdr.csum_flags; 795 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 796 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 797 is_tso = csum_flags & CSUM_TSO; 798 799 /* 800 * This mbuf might end up needing more than 1 pkt desc. 801 * The actual number, `nsegs` is known only after the 802 * expensive gve_map_mbuf_dqo call. This check beneath 803 * exists to fail early when the desc ring is really full. 804 */ 805 total_descs_needed = 1 + /* general_ctx_desc */ 806 1 + /* pkt_desc */ 807 (is_tso ? 1 : 0); /* tso_ctx_desc */ 808 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 809 return (ENOBUFS); 810 811 pkt = gve_alloc_pending_packet(tx); 812 if (pkt == NULL) { 813 counter_enter(); 814 counter_u64_add_protected( 815 tx->stats.tx_delayed_pkt_nospace_compring, 1); 816 counter_exit(); 817 return (ENOBUFS); 818 } 819 completion_tag = pkt - tx->dqo.pending_pkts; 820 821 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap, 822 segs, &nsegs, /*attempt=*/0); 823 if (err) 824 goto abort; 825 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */ 826 pkt->mbuf = mbuf; 827 828 total_descs_needed = 1 + /* general_ctx_desc */ 829 nsegs + /* pkt_desc */ 830 (is_tso ? 1 : 0); /* tso_ctx_desc */ 831 if (__predict_false( 832 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) { 833 err = ENOBUFS; 834 goto abort_with_dma; 835 } 836 837 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 838 if (err) 839 goto abort_with_dma; 840 841 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE); 842 for (i = 0; i < nsegs; i++) { 843 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, 844 segs[i].ds_len, segs[i].ds_addr, 845 completion_tag, /*eop=*/i == (nsegs - 1), 846 has_csum_flag); 847 } 848 849 /* Remember the index of the last desc written */ 850 tx->dqo.desc_tail = desc_idx; 851 852 /* 853 * Request a descriptor completion on the last descriptor of the 854 * packet if we are allowed to by the HW enforced interval. 855 */ 856 gve_tx_request_desc_compl(tx, desc_idx); 857 858 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 859 return (0); 860 861 abort_with_dma: 862 gve_unmap_packet(tx, pkt); 863 abort: 864 pkt->mbuf = NULL; 865 gve_free_pending_packet(tx, pkt); 866 return (err); 867 } 868 869 static void 870 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx, 871 struct gve_tx_pending_pkt_dqo *pkt) 872 { 873 int32_t buf = pkt->qpl_buf_head; 874 struct gve_dma_handle *dma; 875 int32_t qpl_buf_tail; 876 int32_t old_head; 877 int i; 878 879 for (i = 0; i < pkt->num_qpl_bufs; i++) { 880 dma = gve_get_page_dma_handle(tx, buf); 881 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE); 882 qpl_buf_tail = buf; 883 buf = tx->dqo.qpl_bufs[buf]; 884 } 885 MPASS(buf == -1); 886 buf = qpl_buf_tail; 887 888 while (true) { 889 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd); 890 tx->dqo.qpl_bufs[buf] = old_head; 891 892 /* 893 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd 894 * is visible only after the linked list from this pkt is 895 * attached above to old_head. 896 */ 897 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd, 898 old_head, pkt->qpl_buf_head)) 899 break; 900 } 901 /* 902 * The "rel" ensures that the update to dqo.qpl_bufs_produced is 903 * visible only adter the update to dqo.free_qpl_bufs_prd above. 904 */ 905 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs); 906 907 gve_clear_qpl_pending_pkt(pkt); 908 } 909 910 static uint64_t 911 gve_handle_packet_completion(struct gve_priv *priv, 912 struct gve_tx_ring *tx, uint16_t compl_tag) 913 { 914 struct gve_tx_pending_pkt_dqo *pending_pkt; 915 int32_t pkt_len; 916 917 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) { 918 device_printf(priv->dev, "Invalid TX completion tag: %d\n", 919 compl_tag); 920 return (0); 921 } 922 923 pending_pkt = &tx->dqo.pending_pkts[compl_tag]; 924 925 /* Packet is allocated but not pending data completion. */ 926 if (__predict_false(pending_pkt->state != 927 GVE_PACKET_STATE_PENDING_DATA_COMPL)) { 928 device_printf(priv->dev, 929 "No pending data completion: %d\n", compl_tag); 930 return (0); 931 } 932 933 pkt_len = pending_pkt->mbuf->m_pkthdr.len; 934 935 if (gve_is_qpl(priv)) 936 gve_reap_qpl_bufs_dqo(tx, pending_pkt); 937 else 938 gve_unmap_packet(tx, pending_pkt); 939 940 m_freem(pending_pkt->mbuf); 941 pending_pkt->mbuf = NULL; 942 gve_free_pending_packet(tx, pending_pkt); 943 return (pkt_len); 944 } 945 946 int 947 gve_check_tx_timeout_dqo(struct gve_priv *priv, struct gve_tx_ring *tx) 948 { 949 struct gve_tx_pending_pkt_dqo *pending_pkt; 950 int num_timeouts; 951 uint16_t pkt_idx; 952 953 num_timeouts = 0; 954 for (pkt_idx = 0; pkt_idx < tx->dqo.num_pending_pkts; pkt_idx++) { 955 pending_pkt = &tx->dqo.pending_pkts[pkt_idx]; 956 957 if (!gve_timestamp_valid(&pending_pkt->enqueue_time_sec)) 958 continue; 959 960 if (__predict_false( 961 gve_seconds_since(&pending_pkt->enqueue_time_sec) > 962 GVE_TX_TIMEOUT_PKT_SEC)) 963 num_timeouts += 1; 964 } 965 966 return (num_timeouts); 967 } 968 969 int 970 gve_tx_intr_dqo(void *arg) 971 { 972 struct gve_tx_ring *tx = arg; 973 struct gve_priv *priv = tx->com.priv; 974 struct gve_ring_com *com = &tx->com; 975 976 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 977 return (FILTER_STRAY); 978 979 /* Interrupts are automatically masked */ 980 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 981 return (FILTER_HANDLED); 982 } 983 984 static void 985 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx) 986 { 987 struct gve_ring_com *com = &tx->com; 988 int i; 989 990 for (i = 0; i < com->priv->tx_desc_cnt; i++) 991 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){}; 992 993 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, 994 BUS_DMASYNC_PREWRITE); 995 } 996 997 static void 998 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx) 999 { 1000 struct gve_ring_com *com = &tx->com; 1001 int entries; 1002 int i; 1003 1004 entries = com->priv->tx_desc_cnt; 1005 for (i = 0; i < entries; i++) 1006 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){}; 1007 1008 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, 1009 BUS_DMASYNC_PREWRITE); 1010 } 1011 1012 void 1013 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i) 1014 { 1015 struct gve_tx_ring *tx = &priv->tx[i]; 1016 int j; 1017 1018 tx->dqo.desc_head = 0; 1019 tx->dqo.desc_tail = 0; 1020 tx->dqo.desc_mask = priv->tx_desc_cnt - 1; 1021 tx->dqo.last_re_idx = 0; 1022 1023 tx->dqo.compl_head = 0; 1024 tx->dqo.compl_mask = priv->tx_desc_cnt - 1; 1025 atomic_store_32(&tx->dqo.hw_tx_head, 0); 1026 tx->dqo.cur_gen_bit = 0; 1027 1028 gve_free_tx_mbufs_dqo(tx); 1029 1030 for (j = 0; j < tx->dqo.num_pending_pkts; j++) { 1031 if (gve_is_qpl(tx->com.priv)) 1032 gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]); 1033 gve_invalidate_timestamp( 1034 &tx->dqo.pending_pkts[j].enqueue_time_sec); 1035 tx->dqo.pending_pkts[j].next = 1036 (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1; 1037 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 1038 } 1039 tx->dqo.free_pending_pkts_csm = 0; 1040 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1); 1041 1042 if (gve_is_qpl(priv)) { 1043 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 1044 tx->com.qpl->num_pages; 1045 1046 for (j = 0; j < qpl_buf_cnt - 1; j++) 1047 tx->dqo.qpl_bufs[j] = j + 1; 1048 tx->dqo.qpl_bufs[j] = -1; 1049 1050 tx->dqo.free_qpl_bufs_csm = 0; 1051 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1); 1052 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt); 1053 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt; 1054 tx->dqo.qpl_bufs_consumed = 0; 1055 } 1056 1057 gve_tx_clear_desc_ring_dqo(tx); 1058 gve_tx_clear_compl_ring_dqo(tx); 1059 } 1060 1061 static uint8_t 1062 gve_tx_get_gen_bit(uint8_t *desc) 1063 { 1064 uint8_t byte; 1065 1066 /* 1067 * Prevent generation bit from being read after the rest of the 1068 * descriptor. 1069 */ 1070 byte = atomic_load_acq_8(desc + GVE_TX_DESC_DQO_GEN_BYTE_OFFSET); 1071 return ((byte & GVE_TX_DESC_DQO_GEN_BIT_MASK) != 0); 1072 } 1073 1074 static bool 1075 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget) 1076 { 1077 struct gve_tx_compl_desc_dqo *compl_desc; 1078 uint64_t bytes_done = 0; 1079 uint64_t pkts_done = 0; 1080 uint16_t compl_tag; 1081 int work_done = 0; 1082 uint16_t tx_head; 1083 uint16_t type; 1084 1085 while (work_done < budget) { 1086 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, 1087 tx->dqo.compl_ring_mem.map, 1088 BUS_DMASYNC_POSTREAD); 1089 1090 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head]; 1091 if (gve_tx_get_gen_bit((uint8_t *)compl_desc) == 1092 tx->dqo.cur_gen_bit) 1093 break; 1094 1095 type = compl_desc->type; 1096 if (type == GVE_COMPL_TYPE_DQO_DESC) { 1097 /* This is the last descriptor fetched by HW plus one */ 1098 tx_head = le16toh(compl_desc->tx_head); 1099 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head); 1100 } else if (type == GVE_COMPL_TYPE_DQO_PKT) { 1101 compl_tag = le16toh(compl_desc->completion_tag); 1102 bytes_done += gve_handle_packet_completion(priv, 1103 tx, compl_tag); 1104 pkts_done++; 1105 } 1106 1107 tx->dqo.compl_head = (tx->dqo.compl_head + 1) & 1108 tx->dqo.compl_mask; 1109 /* Flip the generation bit when we wrap around */ 1110 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0; 1111 work_done++; 1112 } 1113 1114 /* 1115 * Waking the xmit taskqueue has to occur after room has been made in 1116 * the queue. 1117 */ 1118 atomic_thread_fence_seq_cst(); 1119 if (atomic_load_bool(&tx->stopped) && work_done) { 1120 atomic_store_bool(&tx->stopped, false); 1121 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 1122 } 1123 1124 tx->done += work_done; /* tx->done is just a sysctl counter */ 1125 counter_enter(); 1126 counter_u64_add_protected(tx->stats.tbytes, bytes_done); 1127 counter_u64_add_protected(tx->stats.tpackets, pkts_done); 1128 counter_exit(); 1129 1130 return (work_done == budget); 1131 } 1132 1133 void 1134 gve_tx_cleanup_tq_dqo(void *arg, int pending) 1135 { 1136 struct gve_tx_ring *tx = arg; 1137 struct gve_priv *priv = tx->com.priv; 1138 1139 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1140 return; 1141 1142 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) { 1143 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); 1144 return; 1145 } 1146 1147 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset, 1148 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1149 } 1150