1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_inet6.h" 33 34 #include "gve.h" 35 #include "gve_dqo.h" 36 37 static void 38 gve_unmap_packet(struct gve_tx_ring *tx, 39 struct gve_tx_pending_pkt_dqo *pending_pkt) 40 { 41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap, 42 BUS_DMASYNC_POSTWRITE); 43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap); 44 } 45 46 static void 47 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt) 48 { 49 pending_pkt->qpl_buf_head = -1; 50 pending_pkt->num_qpl_bufs = 0; 51 } 52 53 static void 54 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx) 55 { 56 struct gve_tx_pending_pkt_dqo *pending_pkt; 57 int i; 58 59 for (i = 0; i < tx->dqo.num_pending_pkts; i++) { 60 pending_pkt = &tx->dqo.pending_pkts[i]; 61 if (!pending_pkt->mbuf) 62 continue; 63 64 if (gve_is_qpl(tx->com.priv)) 65 gve_clear_qpl_pending_pkt(pending_pkt); 66 else 67 gve_unmap_packet(tx, pending_pkt); 68 69 m_freem(pending_pkt->mbuf); 70 pending_pkt->mbuf = NULL; 71 } 72 } 73 74 void 75 gve_tx_free_ring_dqo(struct gve_priv *priv, int i) 76 { 77 struct gve_tx_ring *tx = &priv->tx[i]; 78 int j; 79 80 if (tx->dqo.desc_ring != NULL) { 81 gve_dma_free_coherent(&tx->desc_ring_mem); 82 tx->dqo.desc_ring = NULL; 83 } 84 85 if (tx->dqo.compl_ring != NULL) { 86 gve_dma_free_coherent(&tx->dqo.compl_ring_mem); 87 tx->dqo.compl_ring = NULL; 88 } 89 90 if (tx->dqo.pending_pkts != NULL) { 91 gve_free_tx_mbufs_dqo(tx); 92 93 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) { 94 for (j = 0; j < tx->dqo.num_pending_pkts; j++) 95 if (tx->dqo.pending_pkts[j].state != 96 GVE_PACKET_STATE_UNALLOCATED) 97 bus_dmamap_destroy(tx->dqo.buf_dmatag, 98 tx->dqo.pending_pkts[j].dmamap); 99 } 100 101 free(tx->dqo.pending_pkts, M_GVE); 102 tx->dqo.pending_pkts = NULL; 103 } 104 105 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) 106 bus_dma_tag_destroy(tx->dqo.buf_dmatag); 107 108 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) { 109 free(tx->dqo.qpl_bufs, M_GVE); 110 tx->dqo.qpl_bufs = NULL; 111 } 112 } 113 114 static int 115 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx) 116 { 117 struct gve_priv *priv = tx->com.priv; 118 int err; 119 int j; 120 121 /* 122 * DMA tag for mapping Tx mbufs 123 * The maxsize, nsegments, and maxsegsize params should match 124 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c. 125 */ 126 err = bus_dma_tag_create( 127 bus_get_dma_tag(priv->dev), /* parent */ 128 1, 0, /* alignment, bounds */ 129 BUS_SPACE_MAXADDR, /* lowaddr */ 130 BUS_SPACE_MAXADDR, /* highaddr */ 131 NULL, NULL, /* filter, filterarg */ 132 GVE_TSO_MAXSIZE_DQO, /* maxsize */ 133 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */ 134 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */ 135 BUS_DMA_ALLOCNOW, /* flags */ 136 NULL, /* lockfunc */ 137 NULL, /* lockarg */ 138 &tx->dqo.buf_dmatag); 139 if (err != 0) { 140 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n", 141 __func__, err); 142 return (err); 143 } 144 145 for (j = 0; j < tx->dqo.num_pending_pkts; j++) { 146 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0, 147 &tx->dqo.pending_pkts[j].dmamap); 148 if (err != 0) { 149 device_printf(priv->dev, 150 "err in creating pending pkt dmamap %d: %d", 151 j, err); 152 return (err); 153 } 154 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 155 } 156 157 return (0); 158 } 159 160 int 161 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i) 162 { 163 struct gve_tx_ring *tx = &priv->tx[i]; 164 uint16_t num_pending_pkts; 165 int err; 166 167 /* Descriptor ring */ 168 err = gve_dma_alloc_coherent(priv, 169 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt, 170 CACHE_LINE_SIZE, &tx->desc_ring_mem); 171 if (err != 0) { 172 device_printf(priv->dev, 173 "Failed to alloc desc ring for tx ring %d", i); 174 goto abort; 175 } 176 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr; 177 178 /* Completion ring */ 179 err = gve_dma_alloc_coherent(priv, 180 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt, 181 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem); 182 if (err != 0) { 183 device_printf(priv->dev, 184 "Failed to alloc compl ring for tx ring %d", i); 185 goto abort; 186 } 187 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr; 188 189 /* 190 * pending_pkts array 191 * 192 * The max number of pending packets determines the maximum number of 193 * descriptors which maybe written to the completion queue. 194 * 195 * We must set the number small enough to make sure we never overrun the 196 * completion queue. 197 */ 198 num_pending_pkts = priv->tx_desc_cnt; 199 /* 200 * Reserve space for descriptor completions, which will be reported at 201 * most every GVE_TX_MIN_RE_INTERVAL packets. 202 */ 203 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL; 204 205 tx->dqo.num_pending_pkts = num_pending_pkts; 206 tx->dqo.pending_pkts = malloc( 207 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts, 208 M_GVE, M_WAITOK | M_ZERO); 209 210 if (gve_is_qpl(priv)) { 211 int qpl_buf_cnt; 212 213 tx->com.qpl = &priv->qpls[i]; 214 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 215 tx->com.qpl->num_pages; 216 217 tx->dqo.qpl_bufs = malloc( 218 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt, 219 M_GVE, M_WAITOK | M_ZERO); 220 } else 221 gve_tx_alloc_rda_fields_dqo(tx); 222 return (0); 223 224 abort: 225 gve_tx_free_ring_dqo(priv, i); 226 return (err); 227 } 228 229 static void 230 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf, 231 struct gve_tx_metadata_dqo *metadata) 232 { 233 uint32_t hash = mbuf->m_pkthdr.flowid; 234 uint16_t path_hash; 235 236 metadata->version = GVE_TX_METADATA_VERSION_DQO; 237 if (hash) { 238 path_hash = hash ^ (hash >> 16); 239 240 path_hash &= (1 << 15) - 1; 241 if (__predict_false(path_hash == 0)) 242 path_hash = ~path_hash; 243 244 metadata->path_hash = path_hash; 245 } 246 } 247 248 static void 249 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, 250 uint32_t *desc_idx, uint32_t len, uint64_t addr, 251 int16_t compl_tag, bool eop, bool csum_enabled) 252 { 253 while (len > 0) { 254 struct gve_tx_pkt_desc_dqo *desc = 255 &tx->dqo.desc_ring[*desc_idx].pkt; 256 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO); 257 bool cur_eop = eop && cur_len == len; 258 259 *desc = (struct gve_tx_pkt_desc_dqo){ 260 .buf_addr = htole64(addr), 261 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, 262 .end_of_packet = cur_eop, 263 .checksum_offload_enable = csum_enabled, 264 .compl_tag = htole16(compl_tag), 265 .buf_size = cur_len, 266 }; 267 268 addr += cur_len; 269 len -= cur_len; 270 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 271 } 272 } 273 274 static void 275 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, 276 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata, 277 int header_len) 278 { 279 *desc = (struct gve_tx_tso_context_desc_dqo){ 280 .header_len = header_len, 281 .cmd_dtype = { 282 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, 283 .tso = 1, 284 }, 285 .flex0 = metadata->bytes[0], 286 .flex5 = metadata->bytes[5], 287 .flex6 = metadata->bytes[6], 288 .flex7 = metadata->bytes[7], 289 .flex8 = metadata->bytes[8], 290 .flex9 = metadata->bytes[9], 291 .flex10 = metadata->bytes[10], 292 .flex11 = metadata->bytes[11], 293 }; 294 desc->tso_total_len = mbuf->m_pkthdr.len - header_len; 295 desc->mss = mbuf->m_pkthdr.tso_segsz; 296 } 297 298 static void 299 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, 300 const struct gve_tx_metadata_dqo *metadata) 301 { 302 *desc = (struct gve_tx_general_context_desc_dqo){ 303 .flex0 = metadata->bytes[0], 304 .flex1 = metadata->bytes[1], 305 .flex2 = metadata->bytes[2], 306 .flex3 = metadata->bytes[3], 307 .flex4 = metadata->bytes[4], 308 .flex5 = metadata->bytes[5], 309 .flex6 = metadata->bytes[6], 310 .flex7 = metadata->bytes[7], 311 .flex8 = metadata->bytes[8], 312 .flex9 = metadata->bytes[9], 313 .flex10 = metadata->bytes[10], 314 .flex11 = metadata->bytes[11], 315 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, 316 }; 317 } 318 319 #define PULLUP_HDR(m, len) \ 320 do { \ 321 if (__predict_false((m)->m_len < (len))) { \ 322 (m) = m_pullup((m), (len)); \ 323 if ((m) == NULL) \ 324 return (EINVAL); \ 325 } \ 326 } while (0) 327 328 static int 329 gve_prep_tso(struct mbuf *mbuf, int *header_len) 330 { 331 uint8_t l3_off, l4_off = 0; 332 struct ether_header *eh; 333 struct tcphdr *th; 334 u_short csum; 335 336 PULLUP_HDR(mbuf, sizeof(*eh)); 337 eh = mtod(mbuf, struct ether_header *); 338 KASSERT(eh->ether_type != ETHERTYPE_VLAN, 339 ("VLAN-tagged packets not supported")); 340 l3_off = ETHER_HDR_LEN; 341 342 #ifdef INET6 343 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) { 344 struct ip6_hdr *ip6; 345 346 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6)); 347 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off)); 348 l4_off = l3_off + sizeof(struct ip6_hdr); 349 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP, 350 /*csum=*/0); 351 } else 352 #endif 353 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 354 struct ip *ip; 355 356 PULLUP_HDR(mbuf, l3_off + sizeof(*ip)); 357 ip = (struct ip *)(mtodo(mbuf, l3_off)); 358 l4_off = l3_off + (ip->ip_hl << 2); 359 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 360 htons(IPPROTO_TCP)); 361 } 362 363 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *)); 364 th = (struct tcphdr *)(mtodo(mbuf, l4_off)); 365 *header_len = l4_off + (th->th_off << 2); 366 367 /* 368 * Hardware requires the th->th_sum to not include the TCP payload, 369 * hence we recompute the csum with it excluded. 370 */ 371 th->th_sum = csum; 372 373 return (0); 374 } 375 376 static int 377 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf, 378 bool is_tso, uint32_t *desc_idx) 379 { 380 struct gve_tx_general_context_desc_dqo *gen_desc; 381 struct gve_tx_tso_context_desc_dqo *tso_desc; 382 struct gve_tx_metadata_dqo metadata; 383 int header_len; 384 int err; 385 386 metadata = (struct gve_tx_metadata_dqo){0}; 387 gve_extract_tx_metadata_dqo(mbuf, &metadata); 388 389 if (is_tso) { 390 err = gve_prep_tso(mbuf, &header_len); 391 if (__predict_false(err)) { 392 counter_enter(); 393 counter_u64_add_protected( 394 tx->stats.tx_delayed_pkt_tsoerr, 1); 395 counter_exit(); 396 return (err); 397 } 398 399 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx; 400 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len); 401 402 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 403 counter_enter(); 404 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1); 405 counter_exit(); 406 } 407 408 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx; 409 gve_tx_fill_general_ctx_desc(gen_desc, &metadata); 410 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 411 return (0); 412 } 413 414 static int 415 gve_map_mbuf_dqo(struct gve_tx_ring *tx, 416 struct mbuf **mbuf, bus_dmamap_t dmamap, 417 bus_dma_segment_t *segs, int *nsegs, int attempt) 418 { 419 struct mbuf *m_new = NULL; 420 int err; 421 422 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap, 423 *mbuf, segs, nsegs, BUS_DMA_NOWAIT); 424 425 switch (err) { 426 case __predict_true(0): 427 break; 428 case EFBIG: 429 if (__predict_false(attempt > 0)) 430 goto abort; 431 432 counter_enter(); 433 counter_u64_add_protected( 434 tx->stats.tx_mbuf_collapse, 1); 435 counter_exit(); 436 437 /* Try m_collapse before m_defrag */ 438 m_new = m_collapse(*mbuf, M_NOWAIT, 439 GVE_TX_MAX_DATA_DESCS_DQO); 440 if (m_new == NULL) { 441 counter_enter(); 442 counter_u64_add_protected( 443 tx->stats.tx_mbuf_defrag, 1); 444 counter_exit(); 445 m_new = m_defrag(*mbuf, M_NOWAIT); 446 } 447 448 if (__predict_false(m_new == NULL)) { 449 counter_enter(); 450 counter_u64_add_protected( 451 tx->stats.tx_mbuf_defrag_err, 1); 452 counter_exit(); 453 454 m_freem(*mbuf); 455 *mbuf = NULL; 456 err = ENOMEM; 457 goto abort; 458 } else { 459 *mbuf = m_new; 460 return (gve_map_mbuf_dqo(tx, mbuf, dmamap, 461 segs, nsegs, ++attempt)); 462 } 463 case ENOMEM: 464 counter_enter(); 465 counter_u64_add_protected( 466 tx->stats.tx_mbuf_dmamap_enomem_err, 1); 467 counter_exit(); 468 goto abort; 469 default: 470 goto abort; 471 } 472 473 return (0); 474 475 abort: 476 counter_enter(); 477 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1); 478 counter_exit(); 479 return (err); 480 } 481 482 static uint32_t 483 num_avail_desc_ring_slots(const struct gve_tx_ring *tx) 484 { 485 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) & 486 tx->dqo.desc_mask; 487 488 return (tx->dqo.desc_mask - num_used); 489 } 490 491 static struct gve_tx_pending_pkt_dqo * 492 gve_alloc_pending_packet(struct gve_tx_ring *tx) 493 { 494 int32_t index = tx->dqo.free_pending_pkts_csm; 495 struct gve_tx_pending_pkt_dqo *pending_pkt; 496 497 /* 498 * No pending packets available in the consumer list, 499 * try to steal the producer list. 500 */ 501 if (__predict_false(index == -1)) { 502 tx->dqo.free_pending_pkts_csm = atomic_swap_32( 503 &tx->dqo.free_pending_pkts_prd, -1); 504 505 index = tx->dqo.free_pending_pkts_csm; 506 if (__predict_false(index == -1)) 507 return (NULL); 508 } 509 510 pending_pkt = &tx->dqo.pending_pkts[index]; 511 512 /* Remove pending_pkt from the consumer list */ 513 tx->dqo.free_pending_pkts_csm = pending_pkt->next; 514 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; 515 516 return (pending_pkt); 517 } 518 519 static void 520 gve_free_pending_packet(struct gve_tx_ring *tx, 521 struct gve_tx_pending_pkt_dqo *pending_pkt) 522 { 523 int index = pending_pkt - tx->dqo.pending_pkts; 524 int32_t old_head; 525 526 pending_pkt->state = GVE_PACKET_STATE_FREE; 527 528 /* Add pending_pkt to the producer list */ 529 while (true) { 530 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd); 531 532 pending_pkt->next = old_head; 533 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd, 534 old_head, index)) 535 break; 536 } 537 } 538 539 /* 540 * Has the side-effect of retrieving the value of the last desc index 541 * processed by the NIC. hw_tx_head is written to by the completions-processing 542 * taskqueue upon receiving descriptor-completions. 543 */ 544 static bool 545 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs) 546 { 547 if (needed_descs <= num_avail_desc_ring_slots(tx)) 548 return (true); 549 550 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head); 551 if (needed_descs > num_avail_desc_ring_slots(tx)) { 552 counter_enter(); 553 counter_u64_add_protected( 554 tx->stats.tx_delayed_pkt_nospace_descring, 1); 555 counter_exit(); 556 return (false); 557 } 558 559 return (0); 560 } 561 562 static void 563 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx) 564 { 565 uint32_t last_report_event_interval; 566 uint32_t last_desc_idx; 567 568 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask; 569 last_report_event_interval = 570 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask; 571 572 if (__predict_false(last_report_event_interval >= 573 GVE_TX_MIN_RE_INTERVAL)) { 574 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true; 575 tx->dqo.last_re_idx = last_desc_idx; 576 } 577 } 578 579 static bool 580 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs) 581 { 582 uint32_t available = tx->dqo.qpl_bufs_produced_cached - 583 tx->dqo.qpl_bufs_consumed; 584 585 if (__predict_true(available >= num_bufs)) 586 return (true); 587 588 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32( 589 &tx->dqo.qpl_bufs_produced); 590 available = tx->dqo.qpl_bufs_produced_cached - 591 tx->dqo.qpl_bufs_consumed; 592 593 if (__predict_true(available >= num_bufs)) 594 return (true); 595 return (false); 596 } 597 598 static int32_t 599 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx) 600 { 601 int32_t buf = tx->dqo.free_qpl_bufs_csm; 602 603 if (__predict_false(buf == -1)) { 604 tx->dqo.free_qpl_bufs_csm = atomic_swap_32( 605 &tx->dqo.free_qpl_bufs_prd, -1); 606 buf = tx->dqo.free_qpl_bufs_csm; 607 if (__predict_false(buf == -1)) 608 return (-1); 609 } 610 611 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf]; 612 tx->dqo.qpl_bufs_consumed++; 613 return (buf); 614 } 615 616 /* 617 * Tx buffer i corresponds to 618 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO 619 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO 620 */ 621 static void 622 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx, 623 int32_t index, void **va, bus_addr_t *dma_addr) 624 { 625 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 626 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << 627 GVE_TX_BUF_SHIFT_DQO; 628 629 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset; 630 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset; 631 } 632 633 static struct gve_dma_handle * 634 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index) 635 { 636 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 637 638 return (&tx->com.qpl->dmas[page_id]); 639 } 640 641 static void 642 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx, 643 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt, 644 bool csum_enabled, int16_t completion_tag, 645 uint32_t *desc_idx) 646 { 647 int32_t pkt_len = mbuf->m_pkthdr.len; 648 struct gve_dma_handle *dma; 649 uint32_t copy_offset = 0; 650 int32_t prev_buf = -1; 651 uint32_t copy_len; 652 bus_addr_t addr; 653 int32_t buf; 654 void *va; 655 656 MPASS(pkt->num_qpl_bufs == 0); 657 MPASS(pkt->qpl_buf_head == -1); 658 659 while (copy_offset < pkt_len) { 660 buf = gve_tx_alloc_qpl_buf(tx); 661 /* We already checked for availability */ 662 MPASS(buf != -1); 663 664 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr); 665 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset); 666 m_copydata(mbuf, copy_offset, copy_len, va); 667 copy_offset += copy_len; 668 669 dma = gve_get_page_dma_handle(tx, buf); 670 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); 671 672 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, 673 copy_len, addr, completion_tag, 674 /*eop=*/copy_offset == pkt_len, 675 csum_enabled); 676 677 /* Link all the qpl bufs for a packet */ 678 if (prev_buf == -1) 679 pkt->qpl_buf_head = buf; 680 else 681 tx->dqo.qpl_bufs[prev_buf] = buf; 682 683 prev_buf = buf; 684 pkt->num_qpl_bufs++; 685 } 686 687 tx->dqo.qpl_bufs[buf] = -1; 688 } 689 690 int 691 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf) 692 { 693 uint32_t desc_idx = tx->dqo.desc_tail; 694 struct gve_tx_pending_pkt_dqo *pkt; 695 int total_descs_needed; 696 int16_t completion_tag; 697 bool has_csum_flag; 698 int csum_flags; 699 bool is_tso; 700 int nsegs; 701 int err; 702 703 csum_flags = mbuf->m_pkthdr.csum_flags; 704 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 705 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 706 is_tso = csum_flags & CSUM_TSO; 707 708 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO); 709 /* Check if we have enough room in the desc ring */ 710 total_descs_needed = 1 + /* general_ctx_desc */ 711 nsegs + /* pkt_desc */ 712 (is_tso ? 1 : 0); /* tso_ctx_desc */ 713 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 714 return (ENOBUFS); 715 716 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) { 717 counter_enter(); 718 counter_u64_add_protected( 719 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1); 720 counter_exit(); 721 return (ENOBUFS); 722 } 723 724 pkt = gve_alloc_pending_packet(tx); 725 if (pkt == NULL) { 726 counter_enter(); 727 counter_u64_add_protected( 728 tx->stats.tx_delayed_pkt_nospace_compring, 1); 729 counter_exit(); 730 return (ENOBUFS); 731 } 732 completion_tag = pkt - tx->dqo.pending_pkts; 733 pkt->mbuf = mbuf; 734 735 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 736 if (err) 737 goto abort; 738 739 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt, 740 has_csum_flag, completion_tag, &desc_idx); 741 742 /* Remember the index of the last desc written */ 743 tx->dqo.desc_tail = desc_idx; 744 745 /* 746 * Request a descriptor completion on the last descriptor of the 747 * packet if we are allowed to by the HW enforced interval. 748 */ 749 gve_tx_request_desc_compl(tx, desc_idx); 750 751 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 752 return (0); 753 754 abort: 755 pkt->mbuf = NULL; 756 gve_free_pending_packet(tx, pkt); 757 return (err); 758 } 759 760 int 761 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr) 762 { 763 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO]; 764 uint32_t desc_idx = tx->dqo.desc_tail; 765 struct gve_tx_pending_pkt_dqo *pkt; 766 struct mbuf *mbuf = *mbuf_ptr; 767 int total_descs_needed; 768 int16_t completion_tag; 769 bool has_csum_flag; 770 int csum_flags; 771 bool is_tso; 772 int nsegs; 773 int err; 774 int i; 775 776 csum_flags = mbuf->m_pkthdr.csum_flags; 777 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 778 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 779 is_tso = csum_flags & CSUM_TSO; 780 781 /* 782 * This mbuf might end up needing more than 1 pkt desc. 783 * The actual number, `nsegs` is known only after the 784 * expensive gve_map_mbuf_dqo call. This check beneath 785 * exists to fail early when the desc ring is really full. 786 */ 787 total_descs_needed = 1 + /* general_ctx_desc */ 788 1 + /* pkt_desc */ 789 (is_tso ? 1 : 0); /* tso_ctx_desc */ 790 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 791 return (ENOBUFS); 792 793 pkt = gve_alloc_pending_packet(tx); 794 if (pkt == NULL) { 795 counter_enter(); 796 counter_u64_add_protected( 797 tx->stats.tx_delayed_pkt_nospace_compring, 1); 798 counter_exit(); 799 return (ENOBUFS); 800 } 801 completion_tag = pkt - tx->dqo.pending_pkts; 802 803 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap, 804 segs, &nsegs, /*attempt=*/0); 805 if (err) 806 goto abort; 807 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */ 808 pkt->mbuf = mbuf; 809 810 total_descs_needed = 1 + /* general_ctx_desc */ 811 nsegs + /* pkt_desc */ 812 (is_tso ? 1 : 0); /* tso_ctx_desc */ 813 if (__predict_false( 814 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) { 815 err = ENOBUFS; 816 goto abort_with_dma; 817 } 818 819 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 820 if (err) 821 goto abort_with_dma; 822 823 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE); 824 for (i = 0; i < nsegs; i++) { 825 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, 826 segs[i].ds_len, segs[i].ds_addr, 827 completion_tag, /*eop=*/i == (nsegs - 1), 828 has_csum_flag); 829 } 830 831 /* Remember the index of the last desc written */ 832 tx->dqo.desc_tail = desc_idx; 833 834 /* 835 * Request a descriptor completion on the last descriptor of the 836 * packet if we are allowed to by the HW enforced interval. 837 */ 838 gve_tx_request_desc_compl(tx, desc_idx); 839 840 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 841 return (0); 842 843 abort_with_dma: 844 gve_unmap_packet(tx, pkt); 845 abort: 846 pkt->mbuf = NULL; 847 gve_free_pending_packet(tx, pkt); 848 return (err); 849 } 850 851 static void 852 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx, 853 struct gve_tx_pending_pkt_dqo *pkt) 854 { 855 int32_t buf = pkt->qpl_buf_head; 856 struct gve_dma_handle *dma; 857 int32_t qpl_buf_tail; 858 int32_t old_head; 859 int i; 860 861 for (i = 0; i < pkt->num_qpl_bufs; i++) { 862 dma = gve_get_page_dma_handle(tx, buf); 863 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE); 864 qpl_buf_tail = buf; 865 buf = tx->dqo.qpl_bufs[buf]; 866 } 867 MPASS(buf == -1); 868 buf = qpl_buf_tail; 869 870 while (true) { 871 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd); 872 tx->dqo.qpl_bufs[buf] = old_head; 873 874 /* 875 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd 876 * is visible only after the linked list from this pkt is 877 * attached above to old_head. 878 */ 879 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd, 880 old_head, pkt->qpl_buf_head)) 881 break; 882 } 883 /* 884 * The "rel" ensures that the update to dqo.qpl_bufs_produced is 885 * visible only adter the update to dqo.free_qpl_bufs_prd above. 886 */ 887 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs); 888 889 gve_clear_qpl_pending_pkt(pkt); 890 } 891 892 static uint64_t 893 gve_handle_packet_completion(struct gve_priv *priv, 894 struct gve_tx_ring *tx, uint16_t compl_tag) 895 { 896 struct gve_tx_pending_pkt_dqo *pending_pkt; 897 int32_t pkt_len; 898 899 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) { 900 device_printf(priv->dev, "Invalid TX completion tag: %d\n", 901 compl_tag); 902 return (0); 903 } 904 905 pending_pkt = &tx->dqo.pending_pkts[compl_tag]; 906 907 /* Packet is allocated but not pending data completion. */ 908 if (__predict_false(pending_pkt->state != 909 GVE_PACKET_STATE_PENDING_DATA_COMPL)) { 910 device_printf(priv->dev, 911 "No pending data completion: %d\n", compl_tag); 912 return (0); 913 } 914 915 pkt_len = pending_pkt->mbuf->m_pkthdr.len; 916 917 if (gve_is_qpl(priv)) 918 gve_reap_qpl_bufs_dqo(tx, pending_pkt); 919 else 920 gve_unmap_packet(tx, pending_pkt); 921 922 m_freem(pending_pkt->mbuf); 923 pending_pkt->mbuf = NULL; 924 gve_free_pending_packet(tx, pending_pkt); 925 return (pkt_len); 926 } 927 928 int 929 gve_tx_intr_dqo(void *arg) 930 { 931 struct gve_tx_ring *tx = arg; 932 struct gve_priv *priv = tx->com.priv; 933 struct gve_ring_com *com = &tx->com; 934 935 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 936 return (FILTER_STRAY); 937 938 /* Interrupts are automatically masked */ 939 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 940 return (FILTER_HANDLED); 941 } 942 943 static void 944 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx) 945 { 946 struct gve_ring_com *com = &tx->com; 947 int i; 948 949 for (i = 0; i < com->priv->tx_desc_cnt; i++) 950 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){}; 951 952 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, 953 BUS_DMASYNC_PREWRITE); 954 } 955 956 static void 957 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx) 958 { 959 struct gve_ring_com *com = &tx->com; 960 int entries; 961 int i; 962 963 entries = com->priv->tx_desc_cnt; 964 for (i = 0; i < entries; i++) 965 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){}; 966 967 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, 968 BUS_DMASYNC_PREWRITE); 969 } 970 971 void 972 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i) 973 { 974 struct gve_tx_ring *tx = &priv->tx[i]; 975 int j; 976 977 tx->dqo.desc_head = 0; 978 tx->dqo.desc_tail = 0; 979 tx->dqo.desc_mask = priv->tx_desc_cnt - 1; 980 tx->dqo.last_re_idx = 0; 981 982 tx->dqo.compl_head = 0; 983 tx->dqo.compl_mask = priv->tx_desc_cnt - 1; 984 atomic_store_32(&tx->dqo.hw_tx_head, 0); 985 tx->dqo.cur_gen_bit = 0; 986 987 gve_free_tx_mbufs_dqo(tx); 988 989 for (j = 0; j < tx->dqo.num_pending_pkts; j++) { 990 if (gve_is_qpl(tx->com.priv)) 991 gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]); 992 tx->dqo.pending_pkts[j].next = 993 (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1; 994 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 995 } 996 tx->dqo.free_pending_pkts_csm = 0; 997 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1); 998 999 if (gve_is_qpl(priv)) { 1000 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 1001 tx->com.qpl->num_pages; 1002 1003 for (j = 0; j < qpl_buf_cnt - 1; j++) 1004 tx->dqo.qpl_bufs[j] = j + 1; 1005 tx->dqo.qpl_bufs[j] = -1; 1006 1007 tx->dqo.free_qpl_bufs_csm = 0; 1008 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1); 1009 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt); 1010 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt; 1011 tx->dqo.qpl_bufs_consumed = 0; 1012 } 1013 1014 gve_tx_clear_desc_ring_dqo(tx); 1015 gve_tx_clear_compl_ring_dqo(tx); 1016 } 1017 1018 static bool 1019 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget) 1020 { 1021 struct gve_tx_compl_desc_dqo *compl_desc; 1022 uint64_t bytes_done = 0; 1023 uint64_t pkts_done = 0; 1024 uint16_t compl_tag; 1025 int work_done = 0; 1026 uint16_t tx_head; 1027 uint16_t type; 1028 1029 while (work_done < budget) { 1030 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, 1031 BUS_DMASYNC_POSTREAD); 1032 1033 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head]; 1034 if (compl_desc->generation == tx->dqo.cur_gen_bit) 1035 break; 1036 1037 /* 1038 * Prevent generation bit from being read after the rest of the 1039 * descriptor. 1040 */ 1041 atomic_thread_fence_acq(); 1042 type = compl_desc->type; 1043 1044 if (type == GVE_COMPL_TYPE_DQO_DESC) { 1045 /* This is the last descriptor fetched by HW plus one */ 1046 tx_head = le16toh(compl_desc->tx_head); 1047 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head); 1048 } else if (type == GVE_COMPL_TYPE_DQO_PKT) { 1049 compl_tag = le16toh(compl_desc->completion_tag); 1050 bytes_done += gve_handle_packet_completion(priv, 1051 tx, compl_tag); 1052 pkts_done++; 1053 } 1054 1055 tx->dqo.compl_head = (tx->dqo.compl_head + 1) & 1056 tx->dqo.compl_mask; 1057 /* Flip the generation bit when we wrap around */ 1058 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0; 1059 work_done++; 1060 } 1061 1062 /* 1063 * Waking the xmit taskqueue has to occur after room has been made in 1064 * the queue. 1065 */ 1066 atomic_thread_fence_seq_cst(); 1067 if (atomic_load_bool(&tx->stopped) && work_done) { 1068 atomic_store_bool(&tx->stopped, false); 1069 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 1070 } 1071 1072 tx->done += work_done; /* tx->done is just a sysctl counter */ 1073 counter_enter(); 1074 counter_u64_add_protected(tx->stats.tbytes, bytes_done); 1075 counter_u64_add_protected(tx->stats.tpackets, pkts_done); 1076 counter_exit(); 1077 1078 return (work_done == budget); 1079 } 1080 1081 void 1082 gve_tx_cleanup_tq_dqo(void *arg, int pending) 1083 { 1084 struct gve_tx_ring *tx = arg; 1085 struct gve_priv *priv = tx->com.priv; 1086 1087 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1088 return; 1089 1090 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) { 1091 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); 1092 return; 1093 } 1094 1095 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset, 1096 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1097 } 1098