1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2024 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_inet6.h" 33 34 #include "gve.h" 35 #include "gve_dqo.h" 36 37 static void 38 gve_unmap_packet(struct gve_tx_ring *tx, 39 struct gve_tx_pending_pkt_dqo *pending_pkt) 40 { 41 bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap, 42 BUS_DMASYNC_POSTWRITE); 43 bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap); 44 } 45 46 static void 47 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx) 48 { 49 struct gve_tx_pending_pkt_dqo *pending_pkt; 50 int i; 51 52 for (i = 0; i < tx->dqo.num_pending_pkts; i++) { 53 pending_pkt = &tx->dqo.pending_pkts[i]; 54 if (!pending_pkt->mbuf) 55 continue; 56 57 if (gve_is_qpl(tx->com.priv)) { 58 pending_pkt->qpl_buf_head = -1; 59 pending_pkt->num_qpl_bufs = 0; 60 } else 61 gve_unmap_packet(tx, pending_pkt); 62 63 m_freem(pending_pkt->mbuf); 64 pending_pkt->mbuf = NULL; 65 } 66 } 67 68 void 69 gve_tx_free_ring_dqo(struct gve_priv *priv, int i) 70 { 71 struct gve_tx_ring *tx = &priv->tx[i]; 72 int j; 73 74 if (tx->dqo.desc_ring != NULL) { 75 gve_dma_free_coherent(&tx->desc_ring_mem); 76 tx->dqo.desc_ring = NULL; 77 } 78 79 if (tx->dqo.compl_ring != NULL) { 80 gve_dma_free_coherent(&tx->dqo.compl_ring_mem); 81 tx->dqo.compl_ring = NULL; 82 } 83 84 if (tx->dqo.pending_pkts != NULL) { 85 gve_free_tx_mbufs_dqo(tx); 86 87 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) { 88 for (j = 0; j < tx->dqo.num_pending_pkts; j++) 89 if (tx->dqo.pending_pkts[j].state != 90 GVE_PACKET_STATE_UNALLOCATED) 91 bus_dmamap_destroy(tx->dqo.buf_dmatag, 92 tx->dqo.pending_pkts[j].dmamap); 93 } 94 95 free(tx->dqo.pending_pkts, M_GVE); 96 tx->dqo.pending_pkts = NULL; 97 } 98 99 if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) 100 bus_dma_tag_destroy(tx->dqo.buf_dmatag); 101 102 if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) { 103 free(tx->dqo.qpl_bufs, M_GVE); 104 tx->dqo.qpl_bufs = NULL; 105 } 106 } 107 108 static int 109 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx) 110 { 111 struct gve_priv *priv = tx->com.priv; 112 int err; 113 int j; 114 115 /* 116 * DMA tag for mapping Tx mbufs 117 * The maxsize, nsegments, and maxsegsize params should match 118 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c. 119 */ 120 err = bus_dma_tag_create( 121 bus_get_dma_tag(priv->dev), /* parent */ 122 1, 0, /* alignment, bounds */ 123 BUS_SPACE_MAXADDR, /* lowaddr */ 124 BUS_SPACE_MAXADDR, /* highaddr */ 125 NULL, NULL, /* filter, filterarg */ 126 GVE_TSO_MAXSIZE_DQO, /* maxsize */ 127 GVE_TX_MAX_DATA_DESCS_DQO, /* nsegments */ 128 GVE_TX_MAX_BUF_SIZE_DQO, /* maxsegsize */ 129 BUS_DMA_ALLOCNOW, /* flags */ 130 NULL, /* lockfunc */ 131 NULL, /* lockarg */ 132 &tx->dqo.buf_dmatag); 133 if (err != 0) { 134 device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n", 135 __func__, err); 136 return (err); 137 } 138 139 for (j = 0; j < tx->dqo.num_pending_pkts; j++) { 140 err = bus_dmamap_create(tx->dqo.buf_dmatag, 0, 141 &tx->dqo.pending_pkts[j].dmamap); 142 if (err != 0) { 143 device_printf(priv->dev, 144 "err in creating pending pkt dmamap %d: %d", 145 j, err); 146 return (err); 147 } 148 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 149 } 150 151 return (0); 152 } 153 154 int 155 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i) 156 { 157 struct gve_tx_ring *tx = &priv->tx[i]; 158 uint16_t num_pending_pkts; 159 int err; 160 161 /* Descriptor ring */ 162 err = gve_dma_alloc_coherent(priv, 163 sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt, 164 CACHE_LINE_SIZE, &tx->desc_ring_mem); 165 if (err != 0) { 166 device_printf(priv->dev, 167 "Failed to alloc desc ring for tx ring %d", i); 168 goto abort; 169 } 170 tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr; 171 172 /* Completion ring */ 173 err = gve_dma_alloc_coherent(priv, 174 sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt, 175 CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem); 176 if (err != 0) { 177 device_printf(priv->dev, 178 "Failed to alloc compl ring for tx ring %d", i); 179 goto abort; 180 } 181 tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr; 182 183 /* 184 * pending_pkts array 185 * 186 * The max number of pending packets determines the maximum number of 187 * descriptors which maybe written to the completion queue. 188 * 189 * We must set the number small enough to make sure we never overrun the 190 * completion queue. 191 */ 192 num_pending_pkts = priv->tx_desc_cnt; 193 /* 194 * Reserve space for descriptor completions, which will be reported at 195 * most every GVE_TX_MIN_RE_INTERVAL packets. 196 */ 197 num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL; 198 199 tx->dqo.num_pending_pkts = num_pending_pkts; 200 tx->dqo.pending_pkts = malloc( 201 sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts, 202 M_GVE, M_WAITOK | M_ZERO); 203 204 if (gve_is_qpl(priv)) { 205 int qpl_buf_cnt; 206 207 tx->com.qpl = &priv->qpls[i]; 208 qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 209 tx->com.qpl->num_pages; 210 211 tx->dqo.qpl_bufs = malloc( 212 sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt, 213 M_GVE, M_WAITOK | M_ZERO); 214 } else 215 gve_tx_alloc_rda_fields_dqo(tx); 216 return (0); 217 218 abort: 219 gve_tx_free_ring_dqo(priv, i); 220 return (err); 221 } 222 223 static void 224 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf, 225 struct gve_tx_metadata_dqo *metadata) 226 { 227 uint32_t hash = mbuf->m_pkthdr.flowid; 228 uint16_t path_hash; 229 230 metadata->version = GVE_TX_METADATA_VERSION_DQO; 231 if (hash) { 232 path_hash = hash ^ (hash >> 16); 233 234 path_hash &= (1 << 15) - 1; 235 if (__predict_false(path_hash == 0)) 236 path_hash = ~path_hash; 237 238 metadata->path_hash = path_hash; 239 } 240 } 241 242 static void 243 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, 244 uint32_t *desc_idx, uint32_t len, uint64_t addr, 245 int16_t compl_tag, bool eop, bool csum_enabled) 246 { 247 while (len > 0) { 248 struct gve_tx_pkt_desc_dqo *desc = 249 &tx->dqo.desc_ring[*desc_idx].pkt; 250 uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO); 251 bool cur_eop = eop && cur_len == len; 252 253 *desc = (struct gve_tx_pkt_desc_dqo){ 254 .buf_addr = htole64(addr), 255 .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, 256 .end_of_packet = cur_eop, 257 .checksum_offload_enable = csum_enabled, 258 .compl_tag = htole16(compl_tag), 259 .buf_size = cur_len, 260 }; 261 262 addr += cur_len; 263 len -= cur_len; 264 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 265 } 266 } 267 268 static void 269 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc, 270 const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata, 271 int header_len) 272 { 273 *desc = (struct gve_tx_tso_context_desc_dqo){ 274 .header_len = header_len, 275 .cmd_dtype = { 276 .dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO, 277 .tso = 1, 278 }, 279 .flex0 = metadata->bytes[0], 280 .flex5 = metadata->bytes[5], 281 .flex6 = metadata->bytes[6], 282 .flex7 = metadata->bytes[7], 283 .flex8 = metadata->bytes[8], 284 .flex9 = metadata->bytes[9], 285 .flex10 = metadata->bytes[10], 286 .flex11 = metadata->bytes[11], 287 }; 288 desc->tso_total_len = mbuf->m_pkthdr.len - header_len; 289 desc->mss = mbuf->m_pkthdr.tso_segsz; 290 } 291 292 static void 293 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, 294 const struct gve_tx_metadata_dqo *metadata) 295 { 296 *desc = (struct gve_tx_general_context_desc_dqo){ 297 .flex0 = metadata->bytes[0], 298 .flex1 = metadata->bytes[1], 299 .flex2 = metadata->bytes[2], 300 .flex3 = metadata->bytes[3], 301 .flex4 = metadata->bytes[4], 302 .flex5 = metadata->bytes[5], 303 .flex6 = metadata->bytes[6], 304 .flex7 = metadata->bytes[7], 305 .flex8 = metadata->bytes[8], 306 .flex9 = metadata->bytes[9], 307 .flex10 = metadata->bytes[10], 308 .flex11 = metadata->bytes[11], 309 .cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO}, 310 }; 311 } 312 313 #define PULLUP_HDR(m, len) \ 314 do { \ 315 if (__predict_false((m)->m_len < (len))) { \ 316 (m) = m_pullup((m), (len)); \ 317 if ((m) == NULL) \ 318 return (EINVAL); \ 319 } \ 320 } while (0) 321 322 static int 323 gve_prep_tso(struct mbuf *mbuf, int *header_len) 324 { 325 uint8_t l3_off, l4_off = 0; 326 struct ether_header *eh; 327 struct tcphdr *th; 328 u_short csum; 329 330 PULLUP_HDR(mbuf, sizeof(*eh)); 331 eh = mtod(mbuf, struct ether_header *); 332 KASSERT(eh->ether_type != ETHERTYPE_VLAN, 333 ("VLAN-tagged packets not supported")); 334 l3_off = ETHER_HDR_LEN; 335 336 #ifdef INET6 337 if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) { 338 struct ip6_hdr *ip6; 339 340 PULLUP_HDR(mbuf, l3_off + sizeof(*ip6)); 341 ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off)); 342 l4_off = l3_off + sizeof(struct ip6_hdr); 343 csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP, 344 /*csum=*/0); 345 } else 346 #endif 347 if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 348 struct ip *ip; 349 350 PULLUP_HDR(mbuf, l3_off + sizeof(*ip)); 351 ip = (struct ip *)(mtodo(mbuf, l3_off)); 352 l4_off = l3_off + (ip->ip_hl << 2); 353 csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 354 htons(IPPROTO_TCP)); 355 } 356 357 PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *)); 358 th = (struct tcphdr *)(mtodo(mbuf, l4_off)); 359 *header_len = l4_off + (th->th_off << 2); 360 361 /* 362 * Hardware requires the th->th_sum to not include the TCP payload, 363 * hence we recompute the csum with it excluded. 364 */ 365 th->th_sum = csum; 366 367 return (0); 368 } 369 370 static int 371 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf, 372 bool is_tso, uint32_t *desc_idx) 373 { 374 struct gve_tx_general_context_desc_dqo *gen_desc; 375 struct gve_tx_tso_context_desc_dqo *tso_desc; 376 struct gve_tx_metadata_dqo metadata; 377 int header_len; 378 int err; 379 380 metadata = (struct gve_tx_metadata_dqo){0}; 381 gve_extract_tx_metadata_dqo(mbuf, &metadata); 382 383 if (is_tso) { 384 err = gve_prep_tso(mbuf, &header_len); 385 if (__predict_false(err)) { 386 counter_enter(); 387 counter_u64_add_protected( 388 tx->stats.tx_delayed_pkt_tsoerr, 1); 389 counter_exit(); 390 return (err); 391 } 392 393 tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx; 394 gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len); 395 396 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 397 counter_enter(); 398 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1); 399 counter_exit(); 400 } 401 402 gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx; 403 gve_tx_fill_general_ctx_desc(gen_desc, &metadata); 404 *desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask; 405 return (0); 406 } 407 408 static int 409 gve_map_mbuf_dqo(struct gve_tx_ring *tx, 410 struct mbuf **mbuf, bus_dmamap_t dmamap, 411 bus_dma_segment_t *segs, int *nsegs, int attempt) 412 { 413 struct mbuf *m_new = NULL; 414 int err; 415 416 err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap, 417 *mbuf, segs, nsegs, BUS_DMA_NOWAIT); 418 419 switch (err) { 420 case __predict_true(0): 421 break; 422 case EFBIG: 423 if (__predict_false(attempt > 0)) 424 goto abort; 425 426 counter_enter(); 427 counter_u64_add_protected( 428 tx->stats.tx_mbuf_collapse, 1); 429 counter_exit(); 430 431 /* Try m_collapse before m_defrag */ 432 m_new = m_collapse(*mbuf, M_NOWAIT, 433 GVE_TX_MAX_DATA_DESCS_DQO); 434 if (m_new == NULL) { 435 counter_enter(); 436 counter_u64_add_protected( 437 tx->stats.tx_mbuf_defrag, 1); 438 counter_exit(); 439 m_new = m_defrag(*mbuf, M_NOWAIT); 440 } 441 442 if (__predict_false(m_new == NULL)) { 443 counter_enter(); 444 counter_u64_add_protected( 445 tx->stats.tx_mbuf_defrag_err, 1); 446 counter_exit(); 447 448 m_freem(*mbuf); 449 *mbuf = NULL; 450 err = ENOMEM; 451 goto abort; 452 } else { 453 *mbuf = m_new; 454 return (gve_map_mbuf_dqo(tx, mbuf, dmamap, 455 segs, nsegs, ++attempt)); 456 } 457 case ENOMEM: 458 counter_enter(); 459 counter_u64_add_protected( 460 tx->stats.tx_mbuf_dmamap_enomem_err, 1); 461 counter_exit(); 462 goto abort; 463 default: 464 goto abort; 465 } 466 467 return (0); 468 469 abort: 470 counter_enter(); 471 counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1); 472 counter_exit(); 473 return (err); 474 } 475 476 static uint32_t 477 num_avail_desc_ring_slots(const struct gve_tx_ring *tx) 478 { 479 uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) & 480 tx->dqo.desc_mask; 481 482 return (tx->dqo.desc_mask - num_used); 483 } 484 485 static struct gve_tx_pending_pkt_dqo * 486 gve_alloc_pending_packet(struct gve_tx_ring *tx) 487 { 488 int32_t index = tx->dqo.free_pending_pkts_csm; 489 struct gve_tx_pending_pkt_dqo *pending_pkt; 490 491 /* 492 * No pending packets available in the consumer list, 493 * try to steal the producer list. 494 */ 495 if (__predict_false(index == -1)) { 496 tx->dqo.free_pending_pkts_csm = atomic_swap_32( 497 &tx->dqo.free_pending_pkts_prd, -1); 498 499 index = tx->dqo.free_pending_pkts_csm; 500 if (__predict_false(index == -1)) 501 return (NULL); 502 } 503 504 pending_pkt = &tx->dqo.pending_pkts[index]; 505 506 /* Remove pending_pkt from the consumer list */ 507 tx->dqo.free_pending_pkts_csm = pending_pkt->next; 508 pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL; 509 510 return (pending_pkt); 511 } 512 513 static void 514 gve_free_pending_packet(struct gve_tx_ring *tx, 515 struct gve_tx_pending_pkt_dqo *pending_pkt) 516 { 517 int index = pending_pkt - tx->dqo.pending_pkts; 518 int32_t old_head; 519 520 pending_pkt->state = GVE_PACKET_STATE_FREE; 521 522 /* Add pending_pkt to the producer list */ 523 while (true) { 524 old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd); 525 526 pending_pkt->next = old_head; 527 if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd, 528 old_head, index)) 529 break; 530 } 531 } 532 533 /* 534 * Has the side-effect of retrieving the value of the last desc index 535 * processed by the NIC. hw_tx_head is written to by the completions-processing 536 * taskqueue upon receiving descriptor-completions. 537 */ 538 static bool 539 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs) 540 { 541 if (needed_descs <= num_avail_desc_ring_slots(tx)) 542 return (true); 543 544 tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head); 545 if (needed_descs > num_avail_desc_ring_slots(tx)) { 546 counter_enter(); 547 counter_u64_add_protected( 548 tx->stats.tx_delayed_pkt_nospace_descring, 1); 549 counter_exit(); 550 return (false); 551 } 552 553 return (0); 554 } 555 556 static void 557 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx) 558 { 559 uint32_t last_report_event_interval; 560 uint32_t last_desc_idx; 561 562 last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask; 563 last_report_event_interval = 564 (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask; 565 566 if (__predict_false(last_report_event_interval >= 567 GVE_TX_MIN_RE_INTERVAL)) { 568 tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true; 569 tx->dqo.last_re_idx = last_desc_idx; 570 } 571 } 572 573 static bool 574 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs) 575 { 576 uint32_t available = tx->dqo.qpl_bufs_produced_cached - 577 tx->dqo.qpl_bufs_consumed; 578 579 if (__predict_true(available >= num_bufs)) 580 return (true); 581 582 tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32( 583 &tx->dqo.qpl_bufs_produced); 584 available = tx->dqo.qpl_bufs_produced_cached - 585 tx->dqo.qpl_bufs_consumed; 586 587 if (__predict_true(available >= num_bufs)) 588 return (true); 589 return (false); 590 } 591 592 static int32_t 593 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx) 594 { 595 int32_t buf = tx->dqo.free_qpl_bufs_csm; 596 597 if (__predict_false(buf == -1)) { 598 tx->dqo.free_qpl_bufs_csm = atomic_swap_32( 599 &tx->dqo.free_qpl_bufs_prd, -1); 600 buf = tx->dqo.free_qpl_bufs_csm; 601 if (__predict_false(buf == -1)) 602 return (-1); 603 } 604 605 tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf]; 606 tx->dqo.qpl_bufs_consumed++; 607 return (buf); 608 } 609 610 /* 611 * Tx buffer i corresponds to 612 * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO 613 * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO 614 */ 615 static void 616 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx, 617 int32_t index, void **va, bus_addr_t *dma_addr) 618 { 619 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 620 int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << 621 GVE_TX_BUF_SHIFT_DQO; 622 623 *va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset; 624 *dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset; 625 } 626 627 static struct gve_dma_handle * 628 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index) 629 { 630 int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO); 631 632 return (&tx->com.qpl->dmas[page_id]); 633 } 634 635 static void 636 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx, 637 struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt, 638 bool csum_enabled, int16_t completion_tag, 639 uint32_t *desc_idx) 640 { 641 int32_t pkt_len = mbuf->m_pkthdr.len; 642 struct gve_dma_handle *dma; 643 uint32_t copy_offset = 0; 644 int32_t prev_buf = -1; 645 uint32_t copy_len; 646 bus_addr_t addr; 647 int32_t buf; 648 void *va; 649 650 MPASS(pkt->num_qpl_bufs == 0); 651 MPASS(pkt->qpl_buf_head == -1); 652 653 while (copy_offset < pkt_len) { 654 buf = gve_tx_alloc_qpl_buf(tx); 655 /* We already checked for availability */ 656 MPASS(buf != -1); 657 658 gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr); 659 copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset); 660 m_copydata(mbuf, copy_offset, copy_len, va); 661 copy_offset += copy_len; 662 663 dma = gve_get_page_dma_handle(tx, buf); 664 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); 665 666 gve_tx_fill_pkt_desc_dqo(tx, desc_idx, 667 copy_len, addr, completion_tag, 668 /*eop=*/copy_offset == pkt_len, 669 csum_enabled); 670 671 /* Link all the qpl bufs for a packet */ 672 if (prev_buf == -1) 673 pkt->qpl_buf_head = buf; 674 else 675 tx->dqo.qpl_bufs[prev_buf] = buf; 676 677 prev_buf = buf; 678 pkt->num_qpl_bufs++; 679 } 680 681 tx->dqo.qpl_bufs[buf] = -1; 682 } 683 684 int 685 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf) 686 { 687 uint32_t desc_idx = tx->dqo.desc_tail; 688 struct gve_tx_pending_pkt_dqo *pkt; 689 int total_descs_needed; 690 int16_t completion_tag; 691 bool has_csum_flag; 692 int csum_flags; 693 bool is_tso; 694 int nsegs; 695 int err; 696 697 csum_flags = mbuf->m_pkthdr.csum_flags; 698 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 699 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 700 is_tso = csum_flags & CSUM_TSO; 701 702 nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO); 703 /* Check if we have enough room in the desc ring */ 704 total_descs_needed = 1 + /* general_ctx_desc */ 705 nsegs + /* pkt_desc */ 706 (is_tso ? 1 : 0); /* tso_ctx_desc */ 707 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 708 return (ENOBUFS); 709 710 if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) { 711 counter_enter(); 712 counter_u64_add_protected( 713 tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1); 714 counter_exit(); 715 return (ENOBUFS); 716 } 717 718 pkt = gve_alloc_pending_packet(tx); 719 if (pkt == NULL) { 720 counter_enter(); 721 counter_u64_add_protected( 722 tx->stats.tx_delayed_pkt_nospace_compring, 1); 723 counter_exit(); 724 return (ENOBUFS); 725 } 726 completion_tag = pkt - tx->dqo.pending_pkts; 727 pkt->mbuf = mbuf; 728 729 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 730 if (err) 731 goto abort; 732 733 gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt, 734 has_csum_flag, completion_tag, &desc_idx); 735 736 /* Remember the index of the last desc written */ 737 tx->dqo.desc_tail = desc_idx; 738 739 /* 740 * Request a descriptor completion on the last descriptor of the 741 * packet if we are allowed to by the HW enforced interval. 742 */ 743 gve_tx_request_desc_compl(tx, desc_idx); 744 745 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 746 return (0); 747 748 abort: 749 pkt->mbuf = NULL; 750 gve_free_pending_packet(tx, pkt); 751 return (err); 752 } 753 754 int 755 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr) 756 { 757 bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO]; 758 uint32_t desc_idx = tx->dqo.desc_tail; 759 struct gve_tx_pending_pkt_dqo *pkt; 760 struct mbuf *mbuf = *mbuf_ptr; 761 int total_descs_needed; 762 int16_t completion_tag; 763 bool has_csum_flag; 764 int csum_flags; 765 bool is_tso; 766 int nsegs; 767 int err; 768 int i; 769 770 csum_flags = mbuf->m_pkthdr.csum_flags; 771 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 772 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 773 is_tso = csum_flags & CSUM_TSO; 774 775 /* 776 * This mbuf might end up needing more than 1 pkt desc. 777 * The actual number, `nsegs` is known only after the 778 * expensive gve_map_mbuf_dqo call. This check beneath 779 * exists to fail early when the desc ring is really full. 780 */ 781 total_descs_needed = 1 + /* general_ctx_desc */ 782 1 + /* pkt_desc */ 783 (is_tso ? 1 : 0); /* tso_ctx_desc */ 784 if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed))) 785 return (ENOBUFS); 786 787 pkt = gve_alloc_pending_packet(tx); 788 if (pkt == NULL) { 789 counter_enter(); 790 counter_u64_add_protected( 791 tx->stats.tx_delayed_pkt_nospace_compring, 1); 792 counter_exit(); 793 return (ENOBUFS); 794 } 795 completion_tag = pkt - tx->dqo.pending_pkts; 796 797 err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap, 798 segs, &nsegs, /*attempt=*/0); 799 if (err) 800 goto abort; 801 mbuf = *mbuf_ptr; /* gve_map_mbuf_dqo might replace the mbuf chain */ 802 pkt->mbuf = mbuf; 803 804 total_descs_needed = 1 + /* general_ctx_desc */ 805 nsegs + /* pkt_desc */ 806 (is_tso ? 1 : 0); /* tso_ctx_desc */ 807 if (__predict_false( 808 !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) { 809 err = ENOBUFS; 810 goto abort_with_dma; 811 } 812 813 err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx); 814 if (err) 815 goto abort_with_dma; 816 817 bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE); 818 for (i = 0; i < nsegs; i++) { 819 gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, 820 segs[i].ds_len, segs[i].ds_addr, 821 completion_tag, /*eop=*/i == (nsegs - 1), 822 has_csum_flag); 823 } 824 825 /* Remember the index of the last desc written */ 826 tx->dqo.desc_tail = desc_idx; 827 828 /* 829 * Request a descriptor completion on the last descriptor of the 830 * packet if we are allowed to by the HW enforced interval. 831 */ 832 gve_tx_request_desc_compl(tx, desc_idx); 833 834 tx->req += total_descs_needed; /* tx->req is just a sysctl counter */ 835 return (0); 836 837 abort_with_dma: 838 gve_unmap_packet(tx, pkt); 839 abort: 840 pkt->mbuf = NULL; 841 gve_free_pending_packet(tx, pkt); 842 return (err); 843 } 844 845 static void 846 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx, 847 struct gve_tx_pending_pkt_dqo *pkt) 848 { 849 int32_t buf = pkt->qpl_buf_head; 850 struct gve_dma_handle *dma; 851 int32_t qpl_buf_tail; 852 int32_t old_head; 853 int i; 854 855 for (i = 0; i < pkt->num_qpl_bufs; i++) { 856 dma = gve_get_page_dma_handle(tx, buf); 857 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE); 858 qpl_buf_tail = buf; 859 buf = tx->dqo.qpl_bufs[buf]; 860 } 861 MPASS(buf == -1); 862 buf = qpl_buf_tail; 863 864 while (true) { 865 old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd); 866 tx->dqo.qpl_bufs[buf] = old_head; 867 868 /* 869 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd 870 * is visible only after the linked list from this pkt is 871 * attached above to old_head. 872 */ 873 if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd, 874 old_head, pkt->qpl_buf_head)) 875 break; 876 } 877 /* 878 * The "rel" ensures that the update to dqo.qpl_bufs_produced is 879 * visible only adter the update to dqo.free_qpl_bufs_prd above. 880 */ 881 atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs); 882 883 pkt->qpl_buf_head = -1; 884 pkt->num_qpl_bufs = 0; 885 } 886 887 static uint64_t 888 gve_handle_packet_completion(struct gve_priv *priv, 889 struct gve_tx_ring *tx, uint16_t compl_tag) 890 { 891 struct gve_tx_pending_pkt_dqo *pending_pkt; 892 int32_t pkt_len; 893 894 if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) { 895 device_printf(priv->dev, "Invalid TX completion tag: %d\n", 896 compl_tag); 897 return (0); 898 } 899 900 pending_pkt = &tx->dqo.pending_pkts[compl_tag]; 901 902 /* Packet is allocated but not pending data completion. */ 903 if (__predict_false(pending_pkt->state != 904 GVE_PACKET_STATE_PENDING_DATA_COMPL)) { 905 device_printf(priv->dev, 906 "No pending data completion: %d\n", compl_tag); 907 return (0); 908 } 909 910 pkt_len = pending_pkt->mbuf->m_pkthdr.len; 911 912 if (gve_is_qpl(priv)) 913 gve_reap_qpl_bufs_dqo(tx, pending_pkt); 914 else 915 gve_unmap_packet(tx, pending_pkt); 916 917 m_freem(pending_pkt->mbuf); 918 pending_pkt->mbuf = NULL; 919 gve_free_pending_packet(tx, pending_pkt); 920 return (pkt_len); 921 } 922 923 int 924 gve_tx_intr_dqo(void *arg) 925 { 926 struct gve_tx_ring *tx = arg; 927 struct gve_priv *priv = tx->com.priv; 928 struct gve_ring_com *com = &tx->com; 929 930 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 931 return (FILTER_STRAY); 932 933 /* Interrupts are automatically masked */ 934 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 935 return (FILTER_HANDLED); 936 } 937 938 static void 939 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx) 940 { 941 struct gve_ring_com *com = &tx->com; 942 int i; 943 944 for (i = 0; i < com->priv->tx_desc_cnt; i++) 945 tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){}; 946 947 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, 948 BUS_DMASYNC_PREWRITE); 949 } 950 951 static void 952 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx) 953 { 954 struct gve_ring_com *com = &tx->com; 955 int entries; 956 int i; 957 958 entries = com->priv->tx_desc_cnt; 959 for (i = 0; i < entries; i++) 960 tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){}; 961 962 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, 963 BUS_DMASYNC_PREWRITE); 964 } 965 966 void 967 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i) 968 { 969 struct gve_tx_ring *tx = &priv->tx[i]; 970 int j; 971 972 tx->dqo.desc_head = 0; 973 tx->dqo.desc_tail = 0; 974 tx->dqo.desc_mask = priv->tx_desc_cnt - 1; 975 tx->dqo.last_re_idx = 0; 976 977 tx->dqo.compl_head = 0; 978 tx->dqo.compl_mask = priv->tx_desc_cnt - 1; 979 atomic_store_32(&tx->dqo.hw_tx_head, 0); 980 tx->dqo.cur_gen_bit = 0; 981 982 gve_free_tx_mbufs_dqo(tx); 983 984 for (j = 0; j < tx->dqo.num_pending_pkts - 1; j++) { 985 tx->dqo.pending_pkts[j].next = j + 1; 986 tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE; 987 } 988 tx->dqo.pending_pkts[tx->dqo.num_pending_pkts - 1].next = -1; 989 tx->dqo.free_pending_pkts_csm = 0; 990 atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1); 991 992 if (gve_is_qpl(priv)) { 993 int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO * 994 tx->com.qpl->num_pages; 995 996 for (j = 0; j < qpl_buf_cnt - 1; j++) 997 tx->dqo.qpl_bufs[j] = j + 1; 998 tx->dqo.qpl_bufs[j] = -1; 999 1000 tx->dqo.free_qpl_bufs_csm = 0; 1001 atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1); 1002 atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt); 1003 tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt; 1004 tx->dqo.qpl_bufs_consumed = 0; 1005 } 1006 1007 gve_tx_clear_desc_ring_dqo(tx); 1008 gve_tx_clear_compl_ring_dqo(tx); 1009 } 1010 1011 static bool 1012 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget) 1013 { 1014 struct gve_tx_compl_desc_dqo *compl_desc; 1015 uint64_t bytes_done = 0; 1016 uint64_t pkts_done = 0; 1017 uint16_t compl_tag; 1018 int work_done = 0; 1019 uint16_t tx_head; 1020 uint16_t type; 1021 1022 while (work_done < budget) { 1023 bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map, 1024 BUS_DMASYNC_POSTREAD); 1025 1026 compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head]; 1027 if (compl_desc->generation == tx->dqo.cur_gen_bit) 1028 break; 1029 1030 /* 1031 * Prevent generation bit from being read after the rest of the 1032 * descriptor. 1033 */ 1034 rmb(); 1035 type = compl_desc->type; 1036 1037 if (type == GVE_COMPL_TYPE_DQO_DESC) { 1038 /* This is the last descriptor fetched by HW plus one */ 1039 tx_head = le16toh(compl_desc->tx_head); 1040 atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head); 1041 } else if (type == GVE_COMPL_TYPE_DQO_PKT) { 1042 compl_tag = le16toh(compl_desc->completion_tag); 1043 bytes_done += gve_handle_packet_completion(priv, 1044 tx, compl_tag); 1045 pkts_done++; 1046 } 1047 1048 tx->dqo.compl_head = (tx->dqo.compl_head + 1) & 1049 tx->dqo.compl_mask; 1050 /* Flip the generation bit when we wrap around */ 1051 tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0; 1052 work_done++; 1053 } 1054 1055 /* 1056 * Waking the xmit taskqueue has to occur after room has been made in 1057 * the queue. 1058 */ 1059 atomic_thread_fence_seq_cst(); 1060 if (atomic_load_bool(&tx->stopped) && work_done) { 1061 atomic_store_bool(&tx->stopped, false); 1062 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 1063 } 1064 1065 tx->done += work_done; /* tx->done is just a sysctl counter */ 1066 counter_enter(); 1067 counter_u64_add_protected(tx->stats.tbytes, bytes_done); 1068 counter_u64_add_protected(tx->stats.tpackets, pkts_done); 1069 counter_exit(); 1070 1071 return (work_done == budget); 1072 } 1073 1074 void 1075 gve_tx_cleanup_tq_dqo(void *arg, int pending) 1076 { 1077 struct gve_tx_ring *tx = arg; 1078 struct gve_priv *priv = tx->com.priv; 1079 1080 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 1081 return; 1082 1083 if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) { 1084 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); 1085 return; 1086 } 1087 1088 gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset, 1089 GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO); 1090 } 1091