1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2023 Google LLC 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * 3. Neither the name of the copyright holder nor the names of its contributors 17 * may be used to endorse or promote products derived from this software without 18 * specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 27 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 #include "gve.h" 32 #include "gve_adminq.h" 33 34 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 35 36 static int 37 gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_ring *tx) 38 { 39 struct gve_queue_page_list *qpl = tx->com.qpl; 40 struct gve_tx_fifo *fifo = &tx->fifo; 41 42 fifo->size = qpl->num_pages * PAGE_SIZE; 43 fifo->base = qpl->kva; 44 atomic_store_int(&fifo->available, fifo->size); 45 fifo->head = 0; 46 47 return (0); 48 } 49 50 static void 51 gve_tx_free_ring(struct gve_priv *priv, int i) 52 { 53 struct gve_tx_ring *tx = &priv->tx[i]; 54 struct gve_ring_com *com = &tx->com; 55 56 /* Safe to call even if never alloced */ 57 gve_free_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS); 58 59 if (tx->br != NULL) { 60 buf_ring_free(tx->br, M_DEVBUF); 61 tx->br = NULL; 62 } 63 64 if (mtx_initialized(&tx->ring_mtx)) 65 mtx_destroy(&tx->ring_mtx); 66 67 if (tx->info != NULL) { 68 free(tx->info, M_GVE); 69 tx->info = NULL; 70 } 71 72 if (tx->desc_ring != NULL) { 73 gve_dma_free_coherent(&tx->desc_ring_mem); 74 tx->desc_ring = NULL; 75 } 76 77 if (com->q_resources != NULL) { 78 gve_dma_free_coherent(&com->q_resources_mem); 79 com->q_resources = NULL; 80 } 81 } 82 83 static int 84 gve_tx_alloc_ring(struct gve_priv *priv, int i) 85 { 86 struct gve_tx_ring *tx = &priv->tx[i]; 87 struct gve_ring_com *com = &tx->com; 88 char mtx_name[16]; 89 int err; 90 91 com->priv = priv; 92 com->id = i; 93 94 com->qpl = &priv->qpls[i]; 95 if (com->qpl == NULL) { 96 device_printf(priv->dev, "No QPL left for tx ring %d\n", i); 97 return (ENOMEM); 98 } 99 100 err = gve_tx_fifo_init(priv, tx); 101 if (err != 0) 102 goto abort; 103 104 tx->info = malloc(sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt, 105 M_GVE, M_WAITOK | M_ZERO); 106 107 sprintf(mtx_name, "gvetx%d", i); 108 mtx_init(&tx->ring_mtx, mtx_name, NULL, MTX_DEF); 109 110 tx->br = buf_ring_alloc(GVE_TX_BUFRING_ENTRIES, M_DEVBUF, 111 M_WAITOK, &tx->ring_mtx); 112 113 gve_alloc_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS); 114 115 err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources), 116 PAGE_SIZE, &com->q_resources_mem); 117 if (err != 0) { 118 device_printf(priv->dev, "Failed to alloc queue resources for tx ring %d", i); 119 goto abort; 120 } 121 com->q_resources = com->q_resources_mem.cpu_addr; 122 123 err = gve_dma_alloc_coherent(priv, 124 sizeof(union gve_tx_desc) * priv->tx_desc_cnt, 125 CACHE_LINE_SIZE, &tx->desc_ring_mem); 126 if (err != 0) { 127 device_printf(priv->dev, "Failed to alloc desc ring for tx ring %d", i); 128 goto abort; 129 } 130 tx->desc_ring = tx->desc_ring_mem.cpu_addr; 131 132 return (0); 133 134 abort: 135 gve_tx_free_ring(priv, i); 136 return (err); 137 } 138 139 int 140 gve_alloc_tx_rings(struct gve_priv *priv) 141 { 142 int err = 0; 143 int i; 144 145 priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.num_queues, 146 M_GVE, M_WAITOK | M_ZERO); 147 148 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 149 err = gve_tx_alloc_ring(priv, i); 150 if (err != 0) 151 goto free_rings; 152 153 } 154 155 return (0); 156 157 free_rings: 158 while (i--) 159 gve_tx_free_ring(priv, i); 160 free(priv->tx, M_GVE); 161 return (err); 162 } 163 164 void 165 gve_free_tx_rings(struct gve_priv *priv) 166 { 167 int i; 168 169 for (i = 0; i < priv->tx_cfg.num_queues; i++) 170 gve_tx_free_ring(priv, i); 171 172 free(priv->tx, M_GVE); 173 } 174 175 static void 176 gve_tx_clear_desc_ring(struct gve_tx_ring *tx) 177 { 178 struct gve_ring_com *com = &tx->com; 179 int i; 180 181 for (i = 0; i < com->priv->tx_desc_cnt; i++) { 182 tx->desc_ring[i] = (union gve_tx_desc){}; 183 tx->info[i] = (struct gve_tx_buffer_state){}; 184 } 185 186 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, 187 BUS_DMASYNC_PREWRITE); 188 } 189 190 static void 191 gve_clear_tx_ring(struct gve_priv *priv, int i) 192 { 193 struct gve_tx_ring *tx = &priv->tx[i]; 194 struct gve_tx_fifo *fifo = &tx->fifo; 195 196 tx->req = 0; 197 tx->done = 0; 198 tx->mask = priv->tx_desc_cnt - 1; 199 200 atomic_store_int(&fifo->available, fifo->size); 201 fifo->head = 0; 202 203 gve_tx_clear_desc_ring(tx); 204 } 205 206 static void 207 gve_start_tx_ring(struct gve_priv *priv, int i) 208 { 209 struct gve_tx_ring *tx = &priv->tx[i]; 210 struct gve_ring_com *com = &tx->com; 211 212 NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq, tx); 213 com->cleanup_tq = taskqueue_create_fast("gve tx", M_WAITOK, 214 taskqueue_thread_enqueue, &com->cleanup_tq); 215 taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, "%s txq %d", 216 device_get_nameunit(priv->dev), i); 217 218 TASK_INIT(&tx->xmit_task, 0, gve_xmit_tq, tx); 219 tx->xmit_tq = taskqueue_create_fast("gve tx xmit", 220 M_WAITOK, taskqueue_thread_enqueue, &tx->xmit_tq); 221 taskqueue_start_threads(&tx->xmit_tq, 1, PI_NET, "%s txq %d xmit", 222 device_get_nameunit(priv->dev), i); 223 } 224 225 int 226 gve_create_tx_rings(struct gve_priv *priv) 227 { 228 struct gve_ring_com *com; 229 struct gve_tx_ring *tx; 230 int err; 231 int i; 232 233 if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK)) 234 return (0); 235 236 for (i = 0; i < priv->tx_cfg.num_queues; i++) 237 gve_clear_tx_ring(priv, i); 238 239 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); 240 if (err != 0) 241 return (err); 242 243 bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, 244 BUS_DMASYNC_POSTREAD); 245 246 for (i = 0; i < priv->tx_cfg.num_queues; i++) { 247 tx = &priv->tx[i]; 248 com = &tx->com; 249 250 com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index); 251 252 bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map, 253 BUS_DMASYNC_POSTREAD); 254 com->db_offset = 4 * be32toh(com->q_resources->db_index); 255 com->counter_idx = be32toh(com->q_resources->counter_index); 256 257 gve_start_tx_ring(priv, i); 258 } 259 260 gve_set_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK); 261 return (0); 262 } 263 264 static void 265 gve_stop_tx_ring(struct gve_priv *priv, int i) 266 { 267 struct gve_tx_ring *tx = &priv->tx[i]; 268 struct gve_ring_com *com = &tx->com; 269 270 if (com->cleanup_tq != NULL) { 271 taskqueue_quiesce(com->cleanup_tq); 272 taskqueue_free(com->cleanup_tq); 273 com->cleanup_tq = NULL; 274 } 275 276 if (tx->xmit_tq != NULL) { 277 taskqueue_quiesce(tx->xmit_tq); 278 taskqueue_free(tx->xmit_tq); 279 tx->xmit_tq = NULL; 280 } 281 } 282 283 int 284 gve_destroy_tx_rings(struct gve_priv *priv) 285 { 286 int err; 287 int i; 288 289 for (i = 0; i < priv->tx_cfg.num_queues; i++) 290 gve_stop_tx_ring(priv, i); 291 292 if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK)) { 293 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); 294 if (err != 0) 295 return (err); 296 gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK); 297 } 298 299 return (0); 300 } 301 302 int 303 gve_tx_intr(void *arg) 304 { 305 struct gve_tx_ring *tx = arg; 306 struct gve_priv *priv = tx->com.priv; 307 struct gve_ring_com *com = &tx->com; 308 309 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 310 return (FILTER_STRAY); 311 312 gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK); 313 taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task); 314 return (FILTER_HANDLED); 315 } 316 317 static uint32_t 318 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx) 319 { 320 bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map, 321 BUS_DMASYNC_POSTREAD); 322 uint32_t counter = priv->counters[tx->com.counter_idx]; 323 return (be32toh(counter)); 324 } 325 326 static void 327 gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) 328 { 329 atomic_add_int(&fifo->available, bytes); 330 } 331 332 void 333 gve_tx_cleanup_tq(void *arg, int pending) 334 { 335 struct gve_tx_ring *tx = arg; 336 struct gve_priv *priv = tx->com.priv; 337 uint32_t nic_done = gve_tx_load_event_counter(priv, tx); 338 uint32_t todo = nic_done - tx->done; 339 size_t space_freed = 0; 340 int i, j; 341 342 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 343 return; 344 345 for (j = 0; j < todo; j++) { 346 uint32_t idx = tx->done & tx->mask; 347 struct gve_tx_buffer_state *info = &tx->info[idx]; 348 struct mbuf *mbuf = info->mbuf; 349 350 tx->done++; 351 if (mbuf == NULL) 352 continue; 353 354 info->mbuf = NULL; 355 counter_enter(); 356 counter_u64_add_protected(tx->stats.tbytes, mbuf->m_pkthdr.len); 357 counter_u64_add_protected(tx->stats.tpackets, 1); 358 counter_exit(); 359 m_freem(mbuf); 360 361 for (i = 0; i < GVE_TX_MAX_DESCS; i++) { 362 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; 363 info->iov[i].iov_len = 0; 364 info->iov[i].iov_padding = 0; 365 } 366 } 367 368 gve_tx_free_fifo(&tx->fifo, space_freed); 369 370 gve_db_bar_write_4(priv, tx->com.irq_db_offset, 371 GVE_IRQ_ACK | GVE_IRQ_EVENT); 372 373 /* 374 * Completions born before this barrier MAY NOT cause the NIC to send an 375 * interrupt but they will still be handled by the enqueue below. 376 * Completions born after the barrier WILL trigger an interrupt. 377 */ 378 mb(); 379 380 nic_done = gve_tx_load_event_counter(priv, tx); 381 todo = nic_done - tx->done; 382 if (todo != 0) { 383 gve_db_bar_write_4(priv, tx->com.irq_db_offset, GVE_IRQ_MASK); 384 taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); 385 } 386 } 387 388 static void 389 gve_dma_sync_for_device(struct gve_queue_page_list *qpl, 390 uint64_t iov_offset, uint64_t iov_len) 391 { 392 uint64_t last_page = (iov_offset + iov_len - 1) / PAGE_SIZE; 393 uint64_t first_page = iov_offset / PAGE_SIZE; 394 struct gve_dma_handle *dma; 395 uint64_t page; 396 397 for (page = first_page; page <= last_page; page++) { 398 dma = &(qpl->dmas[page]); 399 bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE); 400 } 401 } 402 403 static void 404 gve_tx_fill_mtd_desc(struct gve_tx_mtd_desc *mtd_desc, struct mbuf *mbuf) 405 { 406 mtd_desc->type_flags = GVE_TXD_MTD | GVE_MTD_SUBTYPE_PATH; 407 mtd_desc->path_state = GVE_MTD_PATH_STATE_DEFAULT | GVE_MTD_PATH_HASH_L4; 408 mtd_desc->path_hash = htobe32(mbuf->m_pkthdr.flowid); 409 mtd_desc->reserved0 = 0; 410 mtd_desc->reserved1 = 0; 411 } 412 413 static void 414 gve_tx_fill_pkt_desc(struct gve_tx_pkt_desc *pkt_desc, bool is_tso, 415 uint16_t l4_hdr_offset, uint32_t desc_cnt, 416 uint16_t first_seg_len, uint64_t addr, bool has_csum_flag, 417 int csum_offset, uint16_t pkt_len) 418 { 419 if (is_tso) { 420 pkt_desc->type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; 421 pkt_desc->l4_csum_offset = csum_offset >> 1; 422 pkt_desc->l4_hdr_offset = l4_hdr_offset >> 1; 423 } else if (has_csum_flag) { 424 pkt_desc->type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; 425 pkt_desc->l4_csum_offset = csum_offset >> 1; 426 pkt_desc->l4_hdr_offset = l4_hdr_offset >> 1; 427 } else { 428 pkt_desc->type_flags = GVE_TXD_STD; 429 pkt_desc->l4_csum_offset = 0; 430 pkt_desc->l4_hdr_offset = 0; 431 } 432 pkt_desc->desc_cnt = desc_cnt; 433 pkt_desc->len = htobe16(pkt_len); 434 pkt_desc->seg_len = htobe16(first_seg_len); 435 pkt_desc->seg_addr = htobe64(addr); 436 } 437 438 static void 439 gve_tx_fill_seg_desc(struct gve_tx_seg_desc *seg_desc, 440 bool is_tso, uint16_t len, uint64_t addr, 441 bool is_ipv6, uint8_t l3_off, uint16_t tso_mss) 442 { 443 seg_desc->type_flags = GVE_TXD_SEG; 444 if (is_tso) { 445 if (is_ipv6) 446 seg_desc->type_flags |= GVE_TXSF_IPV6; 447 seg_desc->l3_offset = l3_off >> 1; 448 seg_desc->mss = htobe16(tso_mss); 449 } 450 seg_desc->seg_len = htobe16(len); 451 seg_desc->seg_addr = htobe64(addr); 452 } 453 454 static inline uint32_t 455 gve_tx_avail(struct gve_tx_ring *tx) 456 { 457 return (tx->mask + 1 - (tx->req - tx->done)); 458 } 459 460 static bool 461 gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) 462 { 463 return (atomic_load_int(&fifo->available) >= bytes); 464 } 465 466 static inline bool 467 gve_can_tx(struct gve_tx_ring *tx, int bytes_required) 468 { 469 return (gve_tx_avail(tx) >= (GVE_TX_MAX_DESCS + 1) && 470 gve_tx_fifo_can_alloc(&tx->fifo, bytes_required)); 471 } 472 473 static int 474 gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, size_t bytes) 475 { 476 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; 477 } 478 479 static inline int 480 gve_fifo_bytes_required(struct gve_tx_ring *tx, uint16_t first_seg_len, 481 uint16_t pkt_len) 482 { 483 int pad_bytes, align_hdr_pad; 484 int bytes; 485 486 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->fifo, first_seg_len); 487 /* We need to take into account the header alignment padding. */ 488 align_hdr_pad = roundup2(first_seg_len, CACHE_LINE_SIZE) - first_seg_len; 489 bytes = align_hdr_pad + pad_bytes + pkt_len; 490 491 return (bytes); 492 } 493 494 static int 495 gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, 496 struct gve_tx_iovec iov[2]) 497 { 498 size_t overflow, padding; 499 uint32_t aligned_head; 500 int nfrags = 0; 501 502 if (bytes == 0) 503 return (0); 504 505 /* 506 * This check happens before we know how much padding is needed to 507 * align to a cacheline boundary for the payload, but that is fine, 508 * because the FIFO head always start aligned, and the FIFO's boundaries 509 * are aligned, so if there is space for the data, there is space for 510 * the padding to the next alignment. 511 */ 512 KASSERT(gve_tx_fifo_can_alloc(fifo, bytes), 513 ("Allocating gve tx fifo when there is no room")); 514 515 nfrags++; 516 517 iov[0].iov_offset = fifo->head; 518 iov[0].iov_len = bytes; 519 fifo->head += bytes; 520 521 if (fifo->head > fifo->size) { 522 /* 523 * If the allocation did not fit in the tail fragment of the 524 * FIFO, also use the head fragment. 525 */ 526 nfrags++; 527 overflow = fifo->head - fifo->size; 528 iov[0].iov_len -= overflow; 529 iov[1].iov_offset = 0; /* Start of fifo*/ 530 iov[1].iov_len = overflow; 531 532 fifo->head = overflow; 533 } 534 535 /* Re-align to a cacheline boundary */ 536 aligned_head = roundup2(fifo->head, CACHE_LINE_SIZE); 537 padding = aligned_head - fifo->head; 538 iov[nfrags - 1].iov_padding = padding; 539 atomic_add_int(&fifo->available, -(bytes + padding)); 540 fifo->head = aligned_head; 541 542 if (fifo->head == fifo->size) 543 fifo->head = 0; 544 545 return (nfrags); 546 } 547 548 /* Only error this returns is ENOBUFS when the tx fifo is short of space */ 549 static int 550 gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf) 551 { 552 bool is_tso, has_csum_flag, is_ipv6 = false, is_tcp = false, is_udp = false; 553 int csum_flags, csum_offset, mtd_desc_nr, offset, copy_offset; 554 uint16_t tso_mss, l4_off, l4_data_off, pkt_len, first_seg_len; 555 int pad_bytes, hdr_nfrags, payload_nfrags; 556 struct gve_tx_pkt_desc *pkt_desc; 557 struct gve_tx_seg_desc *seg_desc; 558 struct gve_tx_mtd_desc *mtd_desc; 559 struct gve_tx_buffer_state *info; 560 uint32_t idx = tx->req & tx->mask; 561 struct ether_header *eh; 562 struct mbuf *mbuf_next; 563 int payload_iov = 2; 564 int bytes_required; 565 struct ip6_hdr *ip6; 566 struct tcphdr *th; 567 uint32_t next_idx; 568 uint8_t l3_off; 569 struct ip *ip; 570 int i; 571 572 info = &tx->info[idx]; 573 csum_flags = mbuf->m_pkthdr.csum_flags; 574 pkt_len = mbuf->m_pkthdr.len; 575 is_tso = csum_flags & CSUM_TSO; 576 has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP | 577 CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO); 578 mtd_desc_nr = M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE ? 1 : 0; 579 tso_mss = is_tso ? mbuf->m_pkthdr.tso_segsz : 0; 580 581 eh = mtod(mbuf, struct ether_header *); 582 KASSERT(eh->ether_type != ETHERTYPE_VLAN, 583 ("VLAN-tagged packets not supported")); 584 585 is_ipv6 = ntohs(eh->ether_type) == ETHERTYPE_IPV6; 586 l3_off = ETHER_HDR_LEN; 587 mbuf_next = m_getptr(mbuf, l3_off, &offset); 588 589 if (is_ipv6) { 590 ip6 = (struct ip6_hdr *)(mtodo(mbuf_next, offset)); 591 l4_off = l3_off + sizeof(struct ip6_hdr); 592 is_tcp = (ip6->ip6_nxt == IPPROTO_TCP); 593 is_udp = (ip6->ip6_nxt == IPPROTO_UDP); 594 mbuf_next = m_getptr(mbuf, l4_off, &offset); 595 } else if (ntohs(eh->ether_type) == ETHERTYPE_IP) { 596 ip = (struct ip *)(mtodo(mbuf_next, offset)); 597 l4_off = l3_off + (ip->ip_hl << 2); 598 is_tcp = (ip->ip_p == IPPROTO_TCP); 599 is_udp = (ip->ip_p == IPPROTO_UDP); 600 mbuf_next = m_getptr(mbuf, l4_off, &offset); 601 } 602 603 l4_data_off = 0; 604 if (is_tcp) { 605 th = (struct tcphdr *)(mtodo(mbuf_next, offset)); 606 l4_data_off = l4_off + (th->th_off << 2); 607 } else if (is_udp) 608 l4_data_off = l4_off + sizeof(struct udphdr); 609 610 if (has_csum_flag) { 611 if ((csum_flags & (CSUM_TSO | CSUM_TCP | CSUM_IP6_TCP)) != 0) 612 csum_offset = offsetof(struct tcphdr, th_sum); 613 else 614 csum_offset = offsetof(struct udphdr, uh_sum); 615 } 616 617 /* 618 * If this packet is neither a TCP nor a UDP packet, the first segment, 619 * the one represented by the packet descriptor, will carry the 620 * spec-stipulated minimum of 182B. 621 */ 622 if (l4_data_off != 0) 623 first_seg_len = l4_data_off; 624 else 625 first_seg_len = MIN(pkt_len, GVE_GQ_TX_MIN_PKT_DESC_BYTES); 626 627 bytes_required = gve_fifo_bytes_required(tx, first_seg_len, pkt_len); 628 if (__predict_false(!gve_can_tx(tx, bytes_required))) { 629 counter_enter(); 630 counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_device, 1); 631 counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1); 632 counter_exit(); 633 return (ENOBUFS); 634 } 635 636 /* So that the cleanup taskqueue can free the mbuf eventually. */ 637 info->mbuf = mbuf; 638 639 /* 640 * We don't want to split the header, so if necessary, pad to the end 641 * of the fifo and then put the header at the beginning of the fifo. 642 */ 643 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->fifo, first_seg_len); 644 hdr_nfrags = gve_tx_alloc_fifo(&tx->fifo, first_seg_len + pad_bytes, 645 &info->iov[0]); 646 KASSERT(hdr_nfrags > 0, ("Number of header fragments for gve tx is 0")); 647 payload_nfrags = gve_tx_alloc_fifo(&tx->fifo, pkt_len - first_seg_len, 648 &info->iov[payload_iov]); 649 650 pkt_desc = &tx->desc_ring[idx].pkt; 651 gve_tx_fill_pkt_desc(pkt_desc, is_tso, l4_off, 652 1 + mtd_desc_nr + payload_nfrags, first_seg_len, 653 info->iov[hdr_nfrags - 1].iov_offset, has_csum_flag, csum_offset, 654 pkt_len); 655 656 m_copydata(mbuf, 0, first_seg_len, 657 (char *)tx->fifo.base + info->iov[hdr_nfrags - 1].iov_offset); 658 gve_dma_sync_for_device(tx->com.qpl, 659 info->iov[hdr_nfrags - 1].iov_offset, 660 info->iov[hdr_nfrags - 1].iov_len); 661 copy_offset = first_seg_len; 662 663 if (mtd_desc_nr == 1) { 664 next_idx = (tx->req + 1) & tx->mask; 665 mtd_desc = &tx->desc_ring[next_idx].mtd; 666 gve_tx_fill_mtd_desc(mtd_desc, mbuf); 667 } 668 669 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { 670 next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask; 671 seg_desc = &tx->desc_ring[next_idx].seg; 672 673 gve_tx_fill_seg_desc(seg_desc, is_tso, info->iov[i].iov_len, 674 info->iov[i].iov_offset, is_ipv6, l3_off, tso_mss); 675 676 m_copydata(mbuf, copy_offset, info->iov[i].iov_len, 677 (char *)tx->fifo.base + info->iov[i].iov_offset); 678 gve_dma_sync_for_device(tx->com.qpl, 679 info->iov[i].iov_offset, info->iov[i].iov_len); 680 copy_offset += info->iov[i].iov_len; 681 } 682 683 tx->req += (1 + mtd_desc_nr + payload_nfrags); 684 if (is_tso) { 685 counter_enter(); 686 counter_u64_add_protected(tx->stats.tso_packet_cnt, 1); 687 counter_exit(); 688 } 689 return (0); 690 } 691 692 static void 693 gve_xmit_br(struct gve_tx_ring *tx) 694 { 695 struct gve_priv *priv = tx->com.priv; 696 struct ifnet *ifp = priv->ifp; 697 struct mbuf *mbuf; 698 699 while (!drbr_empty(ifp, tx->br) && 700 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { 701 702 mbuf = drbr_peek(ifp, tx->br); 703 if (__predict_false(gve_xmit(tx, mbuf) != 0)) { 704 drbr_putback(ifp, tx->br, mbuf); 705 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 706 break; 707 } 708 709 bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, 710 BUS_DMASYNC_PREWRITE); 711 gve_db_bar_write_4(priv, tx->com.db_offset, tx->req); 712 713 drbr_advance(ifp, tx->br); 714 BPF_MTAP(ifp, mbuf); 715 } 716 } 717 718 void 719 gve_xmit_tq(void *arg, int pending) 720 { 721 struct gve_tx_ring *tx = (struct gve_tx_ring *)arg; 722 723 GVE_RING_LOCK(tx); 724 gve_xmit_br(tx); 725 GVE_RING_UNLOCK(tx); 726 } 727 728 static bool 729 is_vlan_tagged_pkt(struct mbuf *mbuf) 730 { 731 struct ether_header *eh; 732 733 eh = mtod(mbuf, struct ether_header *); 734 return (ntohs(eh->ether_type) == ETHERTYPE_VLAN); 735 } 736 737 int 738 gve_xmit_ifp(if_t ifp, struct mbuf *mbuf) 739 { 740 struct gve_priv *priv = if_getsoftc(ifp); 741 struct gve_tx_ring *tx; 742 bool is_br_empty; 743 int err; 744 uint32_t i; 745 746 if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 747 return (ENODEV); 748 749 if (M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE) 750 i = mbuf->m_pkthdr.flowid % priv->tx_cfg.num_queues; 751 else 752 i = curcpu % priv->tx_cfg.num_queues; 753 tx = &priv->tx[i]; 754 755 if (__predict_false(is_vlan_tagged_pkt(mbuf))) { 756 counter_enter(); 757 counter_u64_add_protected(tx->stats.tx_dropped_pkt_vlan, 1); 758 counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1); 759 counter_exit(); 760 m_freem(mbuf); 761 return (ENODEV); 762 } 763 764 is_br_empty = drbr_empty(ifp, tx->br); 765 err = drbr_enqueue(ifp, tx->br, mbuf); 766 if (__predict_false(err != 0)) { 767 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 768 counter_enter(); 769 counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_bufring, 1); 770 counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1); 771 counter_exit(); 772 return (err); 773 } 774 775 /* 776 * If the mbuf we just enqueued is the only one on the ring, then 777 * transmit it right away in the interests of low latency. 778 */ 779 if (is_br_empty && (GVE_RING_TRYLOCK(tx) != 0)) { 780 gve_xmit_br(tx); 781 GVE_RING_UNLOCK(tx); 782 } else { 783 taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); 784 } 785 786 return (0); 787 } 788 789 void 790 gve_qflush(if_t ifp) 791 { 792 struct gve_priv *priv = if_getsoftc(ifp); 793 struct gve_tx_ring *tx; 794 int i; 795 796 for (i = 0; i < priv->tx_cfg.num_queues; ++i) { 797 tx = &priv->tx[i]; 798 if (drbr_empty(ifp, tx->br) == 0) { 799 GVE_RING_LOCK(tx); 800 drbr_flush(ifp, tx->br); 801 GVE_RING_UNLOCK(tx); 802 } 803 } 804 805 if_qflush(ifp); 806 } 807