1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010-2016 Solarflare Communications Inc. 5 * All rights reserved. 6 * 7 * This software was developed in part by Philip Paeps under contract for 8 * Solarflare Communications, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright notice, 14 * this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * The views and conclusions contained in the software and documentation are 32 * those of the authors and should not be interpreted as representing official 33 * policies, either expressed or implied, of the FreeBSD Project. 34 */ 35 36 /* Theory of operation: 37 * 38 * Tx queues allocation and mapping on Siena 39 * 40 * One Tx queue with enabled checksum offload is allocated per Rx channel 41 * (event queue). Also 2 Tx queues (one without checksum offload and one 42 * with IP checksum offload only) are allocated and bound to event queue 0. 43 * sfxge_txq_type is used as Tx queue label. 44 * 45 * So, event queue plus label mapping to Tx queue index is: 46 * if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES) 47 * else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1 48 * See sfxge_get_txq_by_label() sfxge_ev.c 49 * 50 * Tx queue allocation and mapping on EF10 51 * 52 * One Tx queue with enabled checksum offload is allocated per Rx 53 * channel (event queue). Checksum offload on all Tx queues is enabled or 54 * disabled dynamically by inserting option descriptors, so the additional 55 * queues used on Siena are not required. 56 * 57 * TxQ label is always set to zero on EF10 hardware. 58 * So, event queue to Tx queue mapping is simple: 59 * TxQ-index = EvQ-index 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_rss.h" 66 67 #include <sys/param.h> 68 #include <sys/malloc.h> 69 #include <sys/mbuf.h> 70 #include <sys/smp.h> 71 #include <sys/socket.h> 72 #include <sys/sysctl.h> 73 #include <sys/syslog.h> 74 #include <sys/limits.h> 75 76 #include <net/bpf.h> 77 #include <net/ethernet.h> 78 #include <net/if.h> 79 #include <net/if_vlan_var.h> 80 81 #include <netinet/in.h> 82 #include <netinet/ip.h> 83 #include <netinet/ip6.h> 84 #include <netinet/tcp.h> 85 86 #ifdef RSS 87 #include <net/rss_config.h> 88 #endif 89 90 #include "common/efx.h" 91 92 #include "sfxge.h" 93 #include "sfxge_tx.h" 94 95 #define SFXGE_PARAM_TX_DPL_GET_MAX SFXGE_PARAM(tx_dpl_get_max) 96 static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT; 97 TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max); 98 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN, 99 &sfxge_tx_dpl_get_max, 0, 100 "Maximum number of any packets in deferred packet get-list"); 101 102 #define SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \ 103 SFXGE_PARAM(tx_dpl_get_non_tcp_max) 104 static int sfxge_tx_dpl_get_non_tcp_max = 105 SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT; 106 TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max); 107 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN, 108 &sfxge_tx_dpl_get_non_tcp_max, 0, 109 "Maximum number of non-TCP packets in deferred packet get-list"); 110 111 #define SFXGE_PARAM_TX_DPL_PUT_MAX SFXGE_PARAM(tx_dpl_put_max) 112 static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT; 113 TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max); 114 SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, 115 &sfxge_tx_dpl_put_max, 0, 116 "Maximum number of any packets in deferred packet put-list"); 117 118 #define SFXGE_PARAM_TSO_FW_ASSISTED SFXGE_PARAM(tso_fw_assisted) 119 static int sfxge_tso_fw_assisted = (SFXGE_FATSOV1 | SFXGE_FATSOV2); 120 TUNABLE_INT(SFXGE_PARAM_TSO_FW_ASSISTED, &sfxge_tso_fw_assisted); 121 SYSCTL_INT(_hw_sfxge, OID_AUTO, tso_fw_assisted, CTLFLAG_RDTUN, 122 &sfxge_tso_fw_assisted, 0, 123 "Bitmask of FW-assisted TSO allowed to use if supported by NIC firmware"); 124 125 static const struct { 126 const char *name; 127 size_t offset; 128 } sfxge_tx_stats[] = { 129 #define SFXGE_TX_STAT(name, member) \ 130 { #name, offsetof(struct sfxge_txq, member) } 131 SFXGE_TX_STAT(tso_bursts, tso_bursts), 132 SFXGE_TX_STAT(tso_packets, tso_packets), 133 SFXGE_TX_STAT(tso_long_headers, tso_long_headers), 134 SFXGE_TX_STAT(tso_pdrop_too_many, tso_pdrop_too_many), 135 SFXGE_TX_STAT(tso_pdrop_no_rsrc, tso_pdrop_no_rsrc), 136 SFXGE_TX_STAT(tx_collapses, collapses), 137 SFXGE_TX_STAT(tx_drops, drops), 138 SFXGE_TX_STAT(tx_get_overflow, get_overflow), 139 SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow), 140 SFXGE_TX_STAT(tx_put_overflow, put_overflow), 141 SFXGE_TX_STAT(tx_netdown_drops, netdown_drops), 142 }; 143 144 /* Forward declarations. */ 145 static void sfxge_tx_qdpl_service(struct sfxge_txq *txq); 146 static void sfxge_tx_qlist_post(struct sfxge_txq *txq); 147 static void sfxge_tx_qunblock(struct sfxge_txq *txq); 148 static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, 149 const bus_dma_segment_t *dma_seg, int n_dma_seg, 150 int n_extra_descs); 151 152 static inline void 153 sfxge_next_stmp(struct sfxge_txq *txq, struct sfxge_tx_mapping **pstmp) 154 { 155 KASSERT((*pstmp)->flags == 0, ("stmp flags are not 0")); 156 if (__predict_false(*pstmp == 157 &txq->stmp[txq->ptr_mask])) 158 *pstmp = &txq->stmp[0]; 159 else 160 (*pstmp)++; 161 } 162 163 static int 164 sfxge_tx_maybe_toggle_cksum_offload(struct sfxge_txq *txq, struct mbuf *mbuf, 165 struct sfxge_tx_mapping **pstmp) 166 { 167 uint16_t new_hw_cksum_flags; 168 efx_desc_t *desc; 169 170 if (mbuf->m_pkthdr.csum_flags & 171 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6 | CSUM_TSO)) { 172 /* 173 * We always set EFX_TXQ_CKSUM_IPV4 here because this 174 * configuration is the most useful, and this won't 175 * cause any trouble in case of IPv6 traffic anyway. 176 */ 177 new_hw_cksum_flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 178 } else if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IP) { 179 new_hw_cksum_flags = EFX_TXQ_CKSUM_IPV4; 180 } else { 181 new_hw_cksum_flags = 0; 182 } 183 184 if (new_hw_cksum_flags == txq->hw_cksum_flags) 185 return (0); 186 187 desc = &txq->pend_desc[txq->n_pend_desc]; 188 efx_tx_qdesc_checksum_create(txq->common, new_hw_cksum_flags, desc); 189 txq->hw_cksum_flags = new_hw_cksum_flags; 190 txq->n_pend_desc++; 191 192 sfxge_next_stmp(txq, pstmp); 193 194 return (1); 195 } 196 197 static int 198 sfxge_tx_maybe_insert_tag(struct sfxge_txq *txq, struct mbuf *mbuf, 199 struct sfxge_tx_mapping **pstmp) 200 { 201 uint16_t this_tag = ((mbuf->m_flags & M_VLANTAG) ? 202 mbuf->m_pkthdr.ether_vtag : 203 0); 204 efx_desc_t *desc; 205 206 if (this_tag == txq->hw_vlan_tci) 207 return (0); 208 209 desc = &txq->pend_desc[txq->n_pend_desc]; 210 efx_tx_qdesc_vlantci_create(txq->common, bswap16(this_tag), desc); 211 txq->hw_vlan_tci = this_tag; 212 txq->n_pend_desc++; 213 214 sfxge_next_stmp(txq, pstmp); 215 216 return (1); 217 } 218 219 void 220 sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq) 221 { 222 unsigned int completed; 223 224 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 225 226 completed = txq->completed; 227 while (completed != txq->pending) { 228 struct sfxge_tx_mapping *stmp; 229 unsigned int id; 230 231 id = completed++ & txq->ptr_mask; 232 233 stmp = &txq->stmp[id]; 234 if (stmp->flags & TX_BUF_UNMAP) { 235 bus_dmamap_unload(txq->packet_dma_tag, stmp->map); 236 if (stmp->flags & TX_BUF_MBUF) { 237 struct mbuf *m = stmp->u.mbuf; 238 do 239 m = m_free(m); 240 while (m != NULL); 241 } else { 242 free(stmp->u.heap_buf, M_SFXGE); 243 } 244 stmp->flags = 0; 245 } 246 } 247 txq->completed = completed; 248 249 /* Check whether we need to unblock the queue. */ 250 mb(); 251 if (txq->blocked) { 252 unsigned int level; 253 254 level = txq->added - txq->completed; 255 if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) 256 sfxge_tx_qunblock(txq); 257 } 258 } 259 260 static unsigned int 261 sfxge_is_mbuf_non_tcp(struct mbuf *mbuf) 262 { 263 /* Absence of TCP checksum flags does not mean that it is non-TCP 264 * but it should be true if user wants to achieve high throughput. 265 */ 266 return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))); 267 } 268 269 /* 270 * Reorder the put list and append it to the get list. 271 */ 272 static void 273 sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq) 274 { 275 struct sfxge_tx_dpl *stdp; 276 struct mbuf *mbuf, *get_next, **get_tailp; 277 volatile uintptr_t *putp; 278 uintptr_t put; 279 unsigned int count; 280 unsigned int non_tcp_count; 281 282 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 283 284 stdp = &txq->dpl; 285 286 /* Acquire the put list. */ 287 putp = &stdp->std_put; 288 put = atomic_readandclear_ptr(putp); 289 mbuf = (void *)put; 290 291 if (mbuf == NULL) 292 return; 293 294 /* Reverse the put list. */ 295 get_tailp = &mbuf->m_nextpkt; 296 get_next = NULL; 297 298 count = 0; 299 non_tcp_count = 0; 300 do { 301 struct mbuf *put_next; 302 303 non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf); 304 put_next = mbuf->m_nextpkt; 305 mbuf->m_nextpkt = get_next; 306 get_next = mbuf; 307 mbuf = put_next; 308 309 count++; 310 } while (mbuf != NULL); 311 312 if (count > stdp->std_put_hiwat) 313 stdp->std_put_hiwat = count; 314 315 /* Append the reversed put list to the get list. */ 316 KASSERT(*get_tailp == NULL, ("*get_tailp != NULL")); 317 *stdp->std_getp = get_next; 318 stdp->std_getp = get_tailp; 319 stdp->std_get_count += count; 320 stdp->std_get_non_tcp_count += non_tcp_count; 321 } 322 323 static void 324 sfxge_tx_qreap(struct sfxge_txq *txq) 325 { 326 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 327 328 txq->reaped = txq->completed; 329 } 330 331 static void 332 sfxge_tx_qlist_post(struct sfxge_txq *txq) 333 { 334 unsigned int old_added __diagused; 335 unsigned int block_level; 336 unsigned int level; 337 int rc __diagused; 338 339 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 340 341 KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0")); 342 KASSERT(txq->n_pend_desc <= txq->max_pkt_desc, 343 ("txq->n_pend_desc too large")); 344 KASSERT(!txq->blocked, ("txq->blocked")); 345 346 old_added = txq->added; 347 348 /* Post the fragment list. */ 349 rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, txq->n_pend_desc, 350 txq->reaped, &txq->added); 351 KASSERT(rc == 0, ("efx_tx_qdesc_post() failed")); 352 353 /* If efx_tx_qdesc_post() had to refragment, our information about 354 * buffers to free may be associated with the wrong 355 * descriptors. 356 */ 357 KASSERT(txq->added - old_added == txq->n_pend_desc, 358 ("efx_tx_qdesc_post() refragmented descriptors")); 359 360 level = txq->added - txq->reaped; 361 KASSERT(level <= txq->entries, ("overfilled TX queue")); 362 363 /* Clear the fragment list. */ 364 txq->n_pend_desc = 0; 365 366 /* 367 * Set the block level to ensure there is space to generate a 368 * large number of descriptors for TSO. 369 */ 370 block_level = EFX_TXQ_LIMIT(txq->entries) - txq->max_pkt_desc; 371 372 /* Have we reached the block level? */ 373 if (level < block_level) 374 return; 375 376 /* Reap, and check again */ 377 sfxge_tx_qreap(txq); 378 level = txq->added - txq->reaped; 379 if (level < block_level) 380 return; 381 382 txq->blocked = 1; 383 384 /* 385 * Avoid a race with completion interrupt handling that could leave 386 * the queue blocked. 387 */ 388 mb(); 389 sfxge_tx_qreap(txq); 390 level = txq->added - txq->reaped; 391 if (level < block_level) { 392 mb(); 393 txq->blocked = 0; 394 } 395 } 396 397 static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf) 398 { 399 bus_dmamap_t *used_map; 400 bus_dmamap_t map; 401 bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG]; 402 unsigned int id; 403 struct sfxge_tx_mapping *stmp; 404 efx_desc_t *desc; 405 int n_dma_seg; 406 int rc; 407 int i; 408 int eop; 409 uint16_t hw_cksum_flags_prev; 410 uint16_t hw_vlan_tci_prev; 411 int n_extra_descs; 412 413 KASSERT(!txq->blocked, ("txq->blocked")); 414 415 #if SFXGE_TX_PARSE_EARLY 416 /* 417 * If software TSO is used, we still need to copy packet header, 418 * even if we have already parsed it early before enqueue. 419 */ 420 if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) && 421 (txq->tso_fw_assisted == 0)) 422 prefetch_read_many(mbuf->m_data); 423 #else 424 /* 425 * Prefetch packet header since we need to parse it and extract 426 * IP ID, TCP sequence number and flags. 427 */ 428 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) 429 prefetch_read_many(mbuf->m_data); 430 #endif 431 432 if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) { 433 rc = EINTR; 434 goto reject; 435 } 436 437 /* Load the packet for DMA. */ 438 id = txq->added & txq->ptr_mask; 439 stmp = &txq->stmp[id]; 440 rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map, 441 mbuf, dma_seg, &n_dma_seg, 0); 442 if (rc == EFBIG) { 443 /* Try again. */ 444 struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT, 445 SFXGE_TX_MAPPING_MAX_SEG); 446 if (new_mbuf == NULL) 447 goto reject; 448 ++txq->collapses; 449 mbuf = new_mbuf; 450 rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, 451 stmp->map, mbuf, 452 dma_seg, &n_dma_seg, 0); 453 } 454 if (rc != 0) 455 goto reject; 456 457 /* Make the packet visible to the hardware. */ 458 bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE); 459 460 used_map = &stmp->map; 461 462 hw_cksum_flags_prev = txq->hw_cksum_flags; 463 hw_vlan_tci_prev = txq->hw_vlan_tci; 464 465 /* 466 * The order of option descriptors, which are used to leverage VLAN tag 467 * and checksum offloads, might be important. Changing checksum offload 468 * between VLAN option and packet descriptors probably does not work. 469 */ 470 n_extra_descs = sfxge_tx_maybe_toggle_cksum_offload(txq, mbuf, &stmp); 471 n_extra_descs += sfxge_tx_maybe_insert_tag(txq, mbuf, &stmp); 472 473 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) { 474 rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg, 475 n_extra_descs); 476 if (rc < 0) 477 goto reject_mapped; 478 stmp = &txq->stmp[(rc - 1) & txq->ptr_mask]; 479 } else { 480 /* Add the mapping to the fragment list, and set flags 481 * for the buffer. 482 */ 483 484 i = 0; 485 for (;;) { 486 desc = &txq->pend_desc[i + n_extra_descs]; 487 eop = (i == n_dma_seg - 1); 488 efx_tx_qdesc_dma_create(txq->common, 489 dma_seg[i].ds_addr, 490 dma_seg[i].ds_len, 491 eop, 492 desc); 493 if (eop) 494 break; 495 i++; 496 sfxge_next_stmp(txq, &stmp); 497 } 498 txq->n_pend_desc = n_dma_seg + n_extra_descs; 499 } 500 501 /* 502 * If the mapping required more than one descriptor 503 * then we need to associate the DMA map with the last 504 * descriptor, not the first. 505 */ 506 if (used_map != &stmp->map) { 507 map = stmp->map; 508 stmp->map = *used_map; 509 *used_map = map; 510 } 511 512 stmp->u.mbuf = mbuf; 513 stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF; 514 515 /* Post the fragment list. */ 516 sfxge_tx_qlist_post(txq); 517 518 return (0); 519 520 reject_mapped: 521 txq->hw_vlan_tci = hw_vlan_tci_prev; 522 txq->hw_cksum_flags = hw_cksum_flags_prev; 523 bus_dmamap_unload(txq->packet_dma_tag, *used_map); 524 reject: 525 /* Drop the packet on the floor. */ 526 m_freem(mbuf); 527 ++txq->drops; 528 529 return (rc); 530 } 531 532 /* 533 * Drain the deferred packet list into the transmit queue. 534 */ 535 static void 536 sfxge_tx_qdpl_drain(struct sfxge_txq *txq) 537 { 538 struct sfxge_softc *sc; 539 struct sfxge_tx_dpl *stdp; 540 struct mbuf *mbuf, *next; 541 unsigned int count; 542 unsigned int non_tcp_count; 543 unsigned int pushed; 544 int rc; 545 546 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 547 548 sc = txq->sc; 549 stdp = &txq->dpl; 550 pushed = txq->added; 551 552 if (__predict_true(txq->init_state == SFXGE_TXQ_STARTED)) { 553 prefetch_read_many(sc->enp); 554 prefetch_read_many(txq->common); 555 } 556 557 mbuf = stdp->std_get; 558 count = stdp->std_get_count; 559 non_tcp_count = stdp->std_get_non_tcp_count; 560 561 if (count > stdp->std_get_hiwat) 562 stdp->std_get_hiwat = count; 563 564 while (count != 0) { 565 KASSERT(mbuf != NULL, ("mbuf == NULL")); 566 567 next = mbuf->m_nextpkt; 568 mbuf->m_nextpkt = NULL; 569 570 ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */ 571 572 if (next != NULL) 573 prefetch_read_many(next); 574 575 rc = sfxge_tx_queue_mbuf(txq, mbuf); 576 --count; 577 non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf); 578 mbuf = next; 579 if (rc != 0) 580 continue; 581 582 if (txq->blocked) 583 break; 584 585 /* Push the fragments to the hardware in batches. */ 586 if (txq->added - pushed >= SFXGE_TX_BATCH) { 587 efx_tx_qpush(txq->common, txq->added, pushed); 588 pushed = txq->added; 589 } 590 } 591 592 if (count == 0) { 593 KASSERT(mbuf == NULL, ("mbuf != NULL")); 594 KASSERT(non_tcp_count == 0, 595 ("inconsistent TCP/non-TCP detection")); 596 stdp->std_get = NULL; 597 stdp->std_get_count = 0; 598 stdp->std_get_non_tcp_count = 0; 599 stdp->std_getp = &stdp->std_get; 600 } else { 601 stdp->std_get = mbuf; 602 stdp->std_get_count = count; 603 stdp->std_get_non_tcp_count = non_tcp_count; 604 } 605 606 if (txq->added != pushed) 607 efx_tx_qpush(txq->common, txq->added, pushed); 608 609 KASSERT(txq->blocked || stdp->std_get_count == 0, 610 ("queue unblocked but count is non-zero")); 611 } 612 613 #define SFXGE_TX_QDPL_PENDING(_txq) ((_txq)->dpl.std_put != 0) 614 615 /* 616 * Service the deferred packet list. 617 * 618 * NOTE: drops the txq mutex! 619 */ 620 static void 621 sfxge_tx_qdpl_service(struct sfxge_txq *txq) 622 { 623 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 624 625 do { 626 if (SFXGE_TX_QDPL_PENDING(txq)) 627 sfxge_tx_qdpl_swizzle(txq); 628 629 if (!txq->blocked) 630 sfxge_tx_qdpl_drain(txq); 631 632 SFXGE_TXQ_UNLOCK(txq); 633 } while (SFXGE_TX_QDPL_PENDING(txq) && 634 SFXGE_TXQ_TRYLOCK(txq)); 635 } 636 637 /* 638 * Put a packet on the deferred packet get-list. 639 */ 640 static int 641 sfxge_tx_qdpl_put_locked(struct sfxge_txq *txq, struct mbuf *mbuf) 642 { 643 struct sfxge_tx_dpl *stdp; 644 645 stdp = &txq->dpl; 646 647 KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL")); 648 649 SFXGE_TXQ_LOCK_ASSERT_OWNED(txq); 650 651 if (stdp->std_get_count >= stdp->std_get_max) { 652 txq->get_overflow++; 653 return (ENOBUFS); 654 } 655 if (sfxge_is_mbuf_non_tcp(mbuf)) { 656 if (stdp->std_get_non_tcp_count >= 657 stdp->std_get_non_tcp_max) { 658 txq->get_non_tcp_overflow++; 659 return (ENOBUFS); 660 } 661 stdp->std_get_non_tcp_count++; 662 } 663 664 *(stdp->std_getp) = mbuf; 665 stdp->std_getp = &mbuf->m_nextpkt; 666 stdp->std_get_count++; 667 668 return (0); 669 } 670 671 /* 672 * Put a packet on the deferred packet put-list. 673 * 674 * We overload the csum_data field in the mbuf to keep track of this length 675 * because there is no cheap alternative to avoid races. 676 */ 677 static int 678 sfxge_tx_qdpl_put_unlocked(struct sfxge_txq *txq, struct mbuf *mbuf) 679 { 680 struct sfxge_tx_dpl *stdp; 681 volatile uintptr_t *putp; 682 uintptr_t old; 683 uintptr_t new; 684 unsigned int put_count; 685 686 KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL")); 687 688 SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq); 689 690 stdp = &txq->dpl; 691 putp = &stdp->std_put; 692 new = (uintptr_t)mbuf; 693 694 do { 695 old = *putp; 696 if (old != 0) { 697 struct mbuf *mp = (struct mbuf *)old; 698 put_count = mp->m_pkthdr.csum_data; 699 } else 700 put_count = 0; 701 if (put_count >= stdp->std_put_max) { 702 atomic_add_long(&txq->put_overflow, 1); 703 return (ENOBUFS); 704 } 705 mbuf->m_pkthdr.csum_data = put_count + 1; 706 mbuf->m_nextpkt = (void *)old; 707 } while (atomic_cmpset_ptr(putp, old, new) == 0); 708 709 return (0); 710 } 711 712 /* 713 * Called from if_transmit - will try to grab the txq lock and enqueue to the 714 * put list if it succeeds, otherwise try to push onto the defer list if space. 715 */ 716 static int 717 sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) 718 { 719 int rc; 720 721 if (!SFXGE_LINK_UP(txq->sc)) { 722 atomic_add_long(&txq->netdown_drops, 1); 723 return (ENETDOWN); 724 } 725 726 /* 727 * Try to grab the txq lock. If we are able to get the lock, 728 * the packet will be appended to the "get list" of the deferred 729 * packet list. Otherwise, it will be pushed on the "put list". 730 */ 731 if (SFXGE_TXQ_TRYLOCK(txq)) { 732 /* First swizzle put-list to get-list to keep order */ 733 sfxge_tx_qdpl_swizzle(txq); 734 735 rc = sfxge_tx_qdpl_put_locked(txq, m); 736 737 /* Try to service the list. */ 738 sfxge_tx_qdpl_service(txq); 739 /* Lock has been dropped. */ 740 } else { 741 rc = sfxge_tx_qdpl_put_unlocked(txq, m); 742 743 /* 744 * Try to grab the lock again. 745 * 746 * If we are able to get the lock, we need to process 747 * the deferred packet list. If we are not able to get 748 * the lock, another thread is processing the list. 749 */ 750 if ((rc == 0) && SFXGE_TXQ_TRYLOCK(txq)) { 751 sfxge_tx_qdpl_service(txq); 752 /* Lock has been dropped. */ 753 } 754 } 755 756 SFXGE_TXQ_LOCK_ASSERT_NOTOWNED(txq); 757 758 return (rc); 759 } 760 761 static void 762 sfxge_tx_qdpl_flush(struct sfxge_txq *txq) 763 { 764 struct sfxge_tx_dpl *stdp = &txq->dpl; 765 struct mbuf *mbuf, *next; 766 767 SFXGE_TXQ_LOCK(txq); 768 769 sfxge_tx_qdpl_swizzle(txq); 770 for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) { 771 next = mbuf->m_nextpkt; 772 m_freem(mbuf); 773 } 774 stdp->std_get = NULL; 775 stdp->std_get_count = 0; 776 stdp->std_get_non_tcp_count = 0; 777 stdp->std_getp = &stdp->std_get; 778 779 SFXGE_TXQ_UNLOCK(txq); 780 } 781 782 void 783 sfxge_if_qflush(if_t ifp) 784 { 785 struct sfxge_softc *sc; 786 unsigned int i; 787 788 sc = if_getsoftc(ifp); 789 790 for (i = 0; i < sc->txq_count; i++) 791 sfxge_tx_qdpl_flush(sc->txq[i]); 792 } 793 794 #if SFXGE_TX_PARSE_EARLY 795 796 /* There is little space for user data in mbuf pkthdr, so we 797 * use l*hlen fields which are not used by the driver otherwise 798 * to store header offsets. 799 * The fields are 8-bit, but it's ok, no header may be longer than 255 bytes. 800 */ 801 802 #define TSO_MBUF_PROTO(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[0]) 803 /* We abuse l5hlen here because PH_loc can hold only 64 bits of data */ 804 #define TSO_MBUF_FLAGS(_mbuf) ((_mbuf)->m_pkthdr.l5hlen) 805 #define TSO_MBUF_PACKETID(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.sixteen[1]) 806 #define TSO_MBUF_SEQNUM(_mbuf) ((_mbuf)->m_pkthdr.PH_loc.thirtytwo[1]) 807 808 static void sfxge_parse_tx_packet(struct mbuf *mbuf) 809 { 810 struct ether_header *eh = mtod(mbuf, struct ether_header *); 811 const struct tcphdr *th; 812 struct tcphdr th_copy; 813 814 /* Find network protocol and header */ 815 TSO_MBUF_PROTO(mbuf) = eh->ether_type; 816 if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_VLAN)) { 817 struct ether_vlan_header *veh = 818 mtod(mbuf, struct ether_vlan_header *); 819 TSO_MBUF_PROTO(mbuf) = veh->evl_proto; 820 mbuf->m_pkthdr.l2hlen = sizeof(*veh); 821 } else { 822 mbuf->m_pkthdr.l2hlen = sizeof(*eh); 823 } 824 825 /* Find TCP header */ 826 if (TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IP)) { 827 const struct ip *iph = (const struct ip *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen); 828 829 KASSERT(iph->ip_p == IPPROTO_TCP, 830 ("TSO required on non-TCP packet")); 831 mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + 4 * iph->ip_hl; 832 TSO_MBUF_PACKETID(mbuf) = iph->ip_id; 833 } else { 834 KASSERT(TSO_MBUF_PROTO(mbuf) == htons(ETHERTYPE_IPV6), 835 ("TSO required on non-IP packet")); 836 KASSERT(((const struct ip6_hdr *)mtodo(mbuf, mbuf->m_pkthdr.l2hlen))->ip6_nxt == 837 IPPROTO_TCP, 838 ("TSO required on non-TCP packet")); 839 mbuf->m_pkthdr.l3hlen = mbuf->m_pkthdr.l2hlen + sizeof(struct ip6_hdr); 840 TSO_MBUF_PACKETID(mbuf) = 0; 841 } 842 843 KASSERT(mbuf->m_len >= mbuf->m_pkthdr.l3hlen, 844 ("network header is fragmented in mbuf")); 845 846 /* We need TCP header including flags (window is the next) */ 847 if (mbuf->m_len < mbuf->m_pkthdr.l3hlen + offsetof(struct tcphdr, th_win)) { 848 m_copydata(mbuf, mbuf->m_pkthdr.l3hlen, sizeof(th_copy), 849 (caddr_t)&th_copy); 850 th = &th_copy; 851 } else { 852 th = (const struct tcphdr *)mtodo(mbuf, mbuf->m_pkthdr.l3hlen); 853 } 854 855 mbuf->m_pkthdr.l4hlen = mbuf->m_pkthdr.l3hlen + 4 * th->th_off; 856 TSO_MBUF_SEQNUM(mbuf) = ntohl(th->th_seq); 857 858 /* These flags must not be duplicated */ 859 /* 860 * RST should not be duplicated as well, but FreeBSD kernel 861 * generates TSO packets with RST flag. So, do not assert 862 * its absence. 863 */ 864 KASSERT(!(th->th_flags & (TH_URG | TH_SYN)), 865 ("incompatible TCP flag 0x%x on TSO packet", 866 th->th_flags & (TH_URG | TH_SYN))); 867 TSO_MBUF_FLAGS(mbuf) = th->th_flags; 868 } 869 #endif 870 871 /* 872 * TX start -- called by the stack. 873 */ 874 int 875 sfxge_if_transmit(if_t ifp, struct mbuf *m) 876 { 877 struct sfxge_softc *sc; 878 struct sfxge_txq *txq; 879 int rc; 880 881 sc = (struct sfxge_softc *)if_getsoftc(ifp); 882 883 /* 884 * Transmit may be called when interface is up from the kernel 885 * point of view, but not yet up (in progress) from the driver 886 * point of view. I.e. link aggregation bring up. 887 * Transmit may be called when interface is up from the driver 888 * point of view, but already down from the kernel point of 889 * view. I.e. Rx when interface shutdown is in progress. 890 */ 891 KASSERT((if_getflags(ifp) & IFF_UP) || (sc->if_flags & IFF_UP), 892 ("interface not up")); 893 894 /* Pick the desired transmit queue. */ 895 if (sc->txq_dynamic_cksum_toggle_supported | 896 (m->m_pkthdr.csum_flags & 897 (CSUM_DELAY_DATA | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO))) { 898 int index = 0; 899 900 #ifdef RSS 901 uint32_t bucket_id; 902 903 /* 904 * Select a TX queue which matches the corresponding 905 * RX queue for the hash in order to assign both 906 * TX and RX parts of the flow to the same CPU 907 */ 908 if (rss_m2bucket(m, &bucket_id) == 0) 909 index = bucket_id % (sc->txq_count - (SFXGE_TXQ_NTYPES - 1)); 910 #else 911 /* check if flowid is set */ 912 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 913 uint32_t hash = m->m_pkthdr.flowid; 914 uint32_t idx = hash % nitems(sc->rx_indir_table); 915 916 index = sc->rx_indir_table[idx]; 917 } 918 #endif 919 #if SFXGE_TX_PARSE_EARLY 920 if (m->m_pkthdr.csum_flags & CSUM_TSO) 921 sfxge_parse_tx_packet(m); 922 #endif 923 index += (sc->txq_dynamic_cksum_toggle_supported == B_FALSE) ? 924 SFXGE_TXQ_IP_TCP_UDP_CKSUM : 0; 925 txq = sc->txq[index]; 926 } else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) { 927 txq = sc->txq[SFXGE_TXQ_IP_CKSUM]; 928 } else { 929 txq = sc->txq[SFXGE_TXQ_NON_CKSUM]; 930 } 931 932 rc = sfxge_tx_packet_add(txq, m); 933 if (rc != 0) 934 m_freem(m); 935 936 return (rc); 937 } 938 939 /* 940 * Software "TSO". Not quite as good as doing it in hardware, but 941 * still faster than segmenting in the stack. 942 */ 943 944 struct sfxge_tso_state { 945 /* Output position */ 946 unsigned out_len; /* Remaining length in current segment */ 947 unsigned seqnum; /* Current sequence number */ 948 unsigned packet_space; /* Remaining space in current packet */ 949 unsigned segs_space; /* Remaining number of DMA segments 950 for the packet (FATSOv2 only) */ 951 952 /* Input position */ 953 uint64_t dma_addr; /* DMA address of current position */ 954 unsigned in_len; /* Remaining length in current mbuf */ 955 956 const struct mbuf *mbuf; /* Input mbuf (head of chain) */ 957 u_short protocol; /* Network protocol (after VLAN decap) */ 958 ssize_t nh_off; /* Offset of network header */ 959 ssize_t tcph_off; /* Offset of TCP header */ 960 unsigned header_len; /* Number of bytes of header */ 961 unsigned seg_size; /* TCP segment size */ 962 int fw_assisted; /* Use FW-assisted TSO */ 963 u_short packet_id; /* IPv4 packet ID from the original packet */ 964 uint8_t tcp_flags; /* TCP flags */ 965 efx_desc_t header_desc; /* Precomputed header descriptor for 966 * FW-assisted TSO */ 967 }; 968 969 #if !SFXGE_TX_PARSE_EARLY 970 static const struct ip *tso_iph(const struct sfxge_tso_state *tso) 971 { 972 KASSERT(tso->protocol == htons(ETHERTYPE_IP), 973 ("tso_iph() in non-IPv4 state")); 974 return (const struct ip *)(tso->mbuf->m_data + tso->nh_off); 975 } 976 977 static __unused const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso) 978 { 979 KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), 980 ("tso_ip6h() in non-IPv6 state")); 981 return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off); 982 } 983 984 static const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso) 985 { 986 return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off); 987 } 988 #endif 989 990 /* Size of preallocated TSO header buffers. Larger blocks must be 991 * allocated from the heap. 992 */ 993 #define TSOH_STD_SIZE 128 994 995 /* At most half the descriptors in the queue at any time will refer to 996 * a TSO header buffer, since they must always be followed by a 997 * payload descriptor referring to an mbuf. 998 */ 999 #define TSOH_COUNT(_txq_entries) ((_txq_entries) / 2u) 1000 #define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) 1001 #define TSOH_PAGE_COUNT(_txq_entries) \ 1002 howmany(TSOH_COUNT(_txq_entries), TSOH_PER_PAGE) 1003 1004 static int tso_init(struct sfxge_txq *txq) 1005 { 1006 struct sfxge_softc *sc = txq->sc; 1007 unsigned int tsoh_page_count = TSOH_PAGE_COUNT(sc->txq_entries); 1008 int i, rc; 1009 1010 /* Allocate TSO header buffers */ 1011 txq->tsoh_buffer = malloc(tsoh_page_count * sizeof(txq->tsoh_buffer[0]), 1012 M_SFXGE, M_WAITOK); 1013 1014 for (i = 0; i < tsoh_page_count; i++) { 1015 rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]); 1016 if (rc != 0) 1017 goto fail; 1018 } 1019 1020 return (0); 1021 1022 fail: 1023 while (i-- > 0) 1024 sfxge_dma_free(&txq->tsoh_buffer[i]); 1025 free(txq->tsoh_buffer, M_SFXGE); 1026 txq->tsoh_buffer = NULL; 1027 return (rc); 1028 } 1029 1030 static void tso_fini(struct sfxge_txq *txq) 1031 { 1032 int i; 1033 1034 if (txq->tsoh_buffer != NULL) { 1035 for (i = 0; i < TSOH_PAGE_COUNT(txq->sc->txq_entries); i++) 1036 sfxge_dma_free(&txq->tsoh_buffer[i]); 1037 free(txq->tsoh_buffer, M_SFXGE); 1038 } 1039 } 1040 1041 static void tso_start(struct sfxge_txq *txq, struct sfxge_tso_state *tso, 1042 const bus_dma_segment_t *hdr_dma_seg, 1043 struct mbuf *mbuf) 1044 { 1045 const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->sc->enp); 1046 #if !SFXGE_TX_PARSE_EARLY 1047 struct ether_header *eh = mtod(mbuf, struct ether_header *); 1048 const struct tcphdr *th; 1049 struct tcphdr th_copy; 1050 #endif 1051 1052 tso->fw_assisted = txq->tso_fw_assisted; 1053 tso->mbuf = mbuf; 1054 1055 /* Find network protocol and header */ 1056 #if !SFXGE_TX_PARSE_EARLY 1057 tso->protocol = eh->ether_type; 1058 if (tso->protocol == htons(ETHERTYPE_VLAN)) { 1059 struct ether_vlan_header *veh = 1060 mtod(mbuf, struct ether_vlan_header *); 1061 tso->protocol = veh->evl_proto; 1062 tso->nh_off = sizeof(*veh); 1063 } else { 1064 tso->nh_off = sizeof(*eh); 1065 } 1066 #else 1067 tso->protocol = TSO_MBUF_PROTO(mbuf); 1068 tso->nh_off = mbuf->m_pkthdr.l2hlen; 1069 tso->tcph_off = mbuf->m_pkthdr.l3hlen; 1070 tso->packet_id = ntohs(TSO_MBUF_PACKETID(mbuf)); 1071 #endif 1072 1073 #if !SFXGE_TX_PARSE_EARLY 1074 /* Find TCP header */ 1075 if (tso->protocol == htons(ETHERTYPE_IP)) { 1076 KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP, 1077 ("TSO required on non-TCP packet")); 1078 tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl; 1079 tso->packet_id = ntohs(tso_iph(tso)->ip_id); 1080 } else { 1081 KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), 1082 ("TSO required on non-IP packet")); 1083 KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP, 1084 ("TSO required on non-TCP packet")); 1085 tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr); 1086 tso->packet_id = 0; 1087 } 1088 #endif 1089 1090 if (tso->fw_assisted && 1091 __predict_false(tso->tcph_off > 1092 encp->enc_tx_tso_tcp_header_offset_limit)) { 1093 tso->fw_assisted = 0; 1094 } 1095 1096 #if !SFXGE_TX_PARSE_EARLY 1097 KASSERT(mbuf->m_len >= tso->tcph_off, 1098 ("network header is fragmented in mbuf")); 1099 /* We need TCP header including flags (window is the next) */ 1100 if (mbuf->m_len < tso->tcph_off + offsetof(struct tcphdr, th_win)) { 1101 m_copydata(tso->mbuf, tso->tcph_off, sizeof(th_copy), 1102 (caddr_t)&th_copy); 1103 th = &th_copy; 1104 } else { 1105 th = tso_tcph(tso); 1106 } 1107 tso->header_len = tso->tcph_off + 4 * th->th_off; 1108 #else 1109 tso->header_len = mbuf->m_pkthdr.l4hlen; 1110 #endif 1111 tso->seg_size = mbuf->m_pkthdr.tso_segsz; 1112 1113 #if !SFXGE_TX_PARSE_EARLY 1114 tso->seqnum = ntohl(th->th_seq); 1115 1116 /* These flags must not be duplicated */ 1117 /* 1118 * RST should not be duplicated as well, but FreeBSD kernel 1119 * generates TSO packets with RST flag. So, do not assert 1120 * its absence. 1121 */ 1122 KASSERT(!(th->th_flags & (TH_URG | TH_SYN)), 1123 ("incompatible TCP flag 0x%x on TSO packet", 1124 th->th_flags & (TH_URG | TH_SYN))); 1125 tso->tcp_flags = th->th_flags; 1126 #else 1127 tso->seqnum = TSO_MBUF_SEQNUM(mbuf); 1128 tso->tcp_flags = TSO_MBUF_FLAGS(mbuf); 1129 #endif 1130 1131 tso->out_len = mbuf->m_pkthdr.len - tso->header_len; 1132 1133 if (tso->fw_assisted) { 1134 if (hdr_dma_seg->ds_len >= tso->header_len) 1135 efx_tx_qdesc_dma_create(txq->common, 1136 hdr_dma_seg->ds_addr, 1137 tso->header_len, 1138 B_FALSE, 1139 &tso->header_desc); 1140 else 1141 tso->fw_assisted = 0; 1142 } 1143 } 1144 1145 /* 1146 * tso_fill_packet_with_fragment - form descriptors for the current fragment 1147 * 1148 * Form descriptors for the current fragment, until we reach the end 1149 * of fragment or end-of-packet. Return 0 on success, 1 if not enough 1150 * space. 1151 */ 1152 static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, 1153 struct sfxge_tso_state *tso) 1154 { 1155 efx_desc_t *desc; 1156 int n; 1157 uint64_t dma_addr = tso->dma_addr; 1158 boolean_t eop; 1159 1160 if (tso->in_len == 0 || tso->packet_space == 0) 1161 return; 1162 1163 KASSERT(tso->in_len > 0, ("TSO input length went negative")); 1164 KASSERT(tso->packet_space > 0, ("TSO packet space went negative")); 1165 1166 if (tso->fw_assisted & SFXGE_FATSOV2) { 1167 n = tso->in_len; 1168 tso->out_len -= n; 1169 tso->seqnum += n; 1170 tso->in_len = 0; 1171 if (n < tso->packet_space) { 1172 tso->packet_space -= n; 1173 tso->segs_space--; 1174 } else { 1175 tso->packet_space = tso->seg_size - 1176 (n - tso->packet_space) % tso->seg_size; 1177 tso->segs_space = 1178 EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1 - 1179 (tso->packet_space != tso->seg_size); 1180 } 1181 } else { 1182 n = min(tso->in_len, tso->packet_space); 1183 tso->packet_space -= n; 1184 tso->out_len -= n; 1185 tso->dma_addr += n; 1186 tso->in_len -= n; 1187 } 1188 1189 /* 1190 * It is OK to use binary OR below to avoid extra branching 1191 * since all conditions may always be checked. 1192 */ 1193 eop = (tso->out_len == 0) | (tso->packet_space == 0) | 1194 (tso->segs_space == 0); 1195 1196 desc = &txq->pend_desc[txq->n_pend_desc++]; 1197 efx_tx_qdesc_dma_create(txq->common, dma_addr, n, eop, desc); 1198 } 1199 1200 /* Callback from bus_dmamap_load() for long TSO headers. */ 1201 static void tso_map_long_header(void *dma_addr_ret, 1202 bus_dma_segment_t *segs, int nseg, 1203 int error) 1204 { 1205 *(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) && 1206 __predict_true(nseg == 1)) ? 1207 segs->ds_addr : 0); 1208 } 1209 1210 /* 1211 * tso_start_new_packet - generate a new header and prepare for the new packet 1212 * 1213 * Generate a new header and prepare for the new packet. Return 0 on 1214 * success, or an error code if failed to alloc header. 1215 */ 1216 static int tso_start_new_packet(struct sfxge_txq *txq, 1217 struct sfxge_tso_state *tso, 1218 unsigned int *idp) 1219 { 1220 unsigned int id = *idp; 1221 struct tcphdr *tsoh_th; 1222 unsigned ip_length; 1223 caddr_t header; 1224 uint64_t dma_addr; 1225 bus_dmamap_t map; 1226 efx_desc_t *desc; 1227 int rc; 1228 1229 if (tso->fw_assisted) { 1230 if (tso->fw_assisted & SFXGE_FATSOV2) { 1231 /* Add 2 FATSOv2 option descriptors */ 1232 desc = &txq->pend_desc[txq->n_pend_desc]; 1233 efx_tx_qdesc_tso2_create(txq->common, 1234 tso->packet_id, 1235 0, 1236 tso->seqnum, 1237 tso->seg_size, 1238 desc, 1239 EFX_TX_FATSOV2_OPT_NDESCS); 1240 desc += EFX_TX_FATSOV2_OPT_NDESCS; 1241 txq->n_pend_desc += EFX_TX_FATSOV2_OPT_NDESCS; 1242 KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); 1243 id = (id + EFX_TX_FATSOV2_OPT_NDESCS) & txq->ptr_mask; 1244 1245 tso->segs_space = 1246 EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1; 1247 } else { 1248 uint8_t tcp_flags = tso->tcp_flags; 1249 1250 if (tso->out_len > tso->seg_size) 1251 tcp_flags &= ~(TH_FIN | TH_PUSH); 1252 1253 /* Add FATSOv1 option descriptor */ 1254 desc = &txq->pend_desc[txq->n_pend_desc++]; 1255 efx_tx_qdesc_tso_create(txq->common, 1256 tso->packet_id, 1257 tso->seqnum, 1258 tcp_flags, 1259 desc++); 1260 KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); 1261 id = (id + 1) & txq->ptr_mask; 1262 1263 tso->seqnum += tso->seg_size; 1264 tso->segs_space = UINT_MAX; 1265 } 1266 1267 /* Header DMA descriptor */ 1268 *desc = tso->header_desc; 1269 txq->n_pend_desc++; 1270 KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); 1271 id = (id + 1) & txq->ptr_mask; 1272 } else { 1273 /* Allocate a DMA-mapped header buffer. */ 1274 if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) { 1275 unsigned int page_index = (id / 2) / TSOH_PER_PAGE; 1276 unsigned int buf_index = (id / 2) % TSOH_PER_PAGE; 1277 1278 header = (txq->tsoh_buffer[page_index].esm_base + 1279 buf_index * TSOH_STD_SIZE); 1280 dma_addr = (txq->tsoh_buffer[page_index].esm_addr + 1281 buf_index * TSOH_STD_SIZE); 1282 map = txq->tsoh_buffer[page_index].esm_map; 1283 1284 KASSERT(txq->stmp[id].flags == 0, 1285 ("stmp flags are not 0")); 1286 } else { 1287 struct sfxge_tx_mapping *stmp = &txq->stmp[id]; 1288 1289 /* We cannot use bus_dmamem_alloc() as that may sleep */ 1290 header = malloc(tso->header_len, M_SFXGE, M_NOWAIT); 1291 if (__predict_false(!header)) 1292 return (ENOMEM); 1293 rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map, 1294 header, tso->header_len, 1295 tso_map_long_header, &dma_addr, 1296 BUS_DMA_NOWAIT); 1297 if (__predict_false(dma_addr == 0)) { 1298 if (rc == 0) { 1299 /* Succeeded but got >1 segment */ 1300 bus_dmamap_unload(txq->packet_dma_tag, 1301 stmp->map); 1302 rc = EINVAL; 1303 } 1304 free(header, M_SFXGE); 1305 return (rc); 1306 } 1307 map = stmp->map; 1308 1309 txq->tso_long_headers++; 1310 stmp->u.heap_buf = header; 1311 stmp->flags = TX_BUF_UNMAP; 1312 } 1313 1314 tsoh_th = (struct tcphdr *)(header + tso->tcph_off); 1315 1316 /* Copy and update the headers. */ 1317 m_copydata(tso->mbuf, 0, tso->header_len, header); 1318 1319 tsoh_th->th_seq = htonl(tso->seqnum); 1320 tso->seqnum += tso->seg_size; 1321 if (tso->out_len > tso->seg_size) { 1322 /* This packet will not finish the TSO burst. */ 1323 ip_length = tso->header_len - tso->nh_off + tso->seg_size; 1324 tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH); 1325 } else { 1326 /* This packet will be the last in the TSO burst. */ 1327 ip_length = tso->header_len - tso->nh_off + tso->out_len; 1328 } 1329 1330 if (tso->protocol == htons(ETHERTYPE_IP)) { 1331 struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off); 1332 tsoh_iph->ip_len = htons(ip_length); 1333 /* XXX We should increment ip_id, but FreeBSD doesn't 1334 * currently allocate extra IDs for multiple segments. 1335 */ 1336 } else { 1337 struct ip6_hdr *tsoh_iph = 1338 (struct ip6_hdr *)(header + tso->nh_off); 1339 tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph)); 1340 } 1341 1342 /* Make the header visible to the hardware. */ 1343 bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE); 1344 1345 /* Form a descriptor for this header. */ 1346 desc = &txq->pend_desc[txq->n_pend_desc++]; 1347 efx_tx_qdesc_dma_create(txq->common, 1348 dma_addr, 1349 tso->header_len, 1350 0, 1351 desc); 1352 id = (id + 1) & txq->ptr_mask; 1353 1354 tso->segs_space = UINT_MAX; 1355 } 1356 tso->packet_space = tso->seg_size; 1357 txq->tso_packets++; 1358 *idp = id; 1359 1360 return (0); 1361 } 1362 1363 static int 1364 sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, 1365 const bus_dma_segment_t *dma_seg, int n_dma_seg, 1366 int n_extra_descs) 1367 { 1368 struct sfxge_tso_state tso; 1369 unsigned int id; 1370 unsigned skipped = 0; 1371 1372 tso_start(txq, &tso, dma_seg, mbuf); 1373 1374 while (dma_seg->ds_len + skipped <= tso.header_len) { 1375 skipped += dma_seg->ds_len; 1376 --n_dma_seg; 1377 KASSERT(n_dma_seg, ("no payload found in TSO packet")); 1378 ++dma_seg; 1379 } 1380 tso.in_len = dma_seg->ds_len - (tso.header_len - skipped); 1381 tso.dma_addr = dma_seg->ds_addr + (tso.header_len - skipped); 1382 1383 id = (txq->added + n_extra_descs) & txq->ptr_mask; 1384 if (__predict_false(tso_start_new_packet(txq, &tso, &id))) 1385 return (-1); 1386 1387 while (1) { 1388 tso_fill_packet_with_fragment(txq, &tso); 1389 /* Exactly one DMA descriptor is added */ 1390 KASSERT(txq->stmp[id].flags == 0, ("stmp flags are not 0")); 1391 id = (id + 1) & txq->ptr_mask; 1392 1393 /* Move onto the next fragment? */ 1394 if (tso.in_len == 0) { 1395 --n_dma_seg; 1396 if (n_dma_seg == 0) 1397 break; 1398 ++dma_seg; 1399 tso.in_len = dma_seg->ds_len; 1400 tso.dma_addr = dma_seg->ds_addr; 1401 } 1402 1403 /* End of packet? */ 1404 if ((tso.packet_space == 0) | (tso.segs_space == 0)) { 1405 unsigned int n_fatso_opt_desc = 1406 (tso.fw_assisted & SFXGE_FATSOV2) ? 1407 EFX_TX_FATSOV2_OPT_NDESCS : 1408 (tso.fw_assisted & SFXGE_FATSOV1) ? 1 : 0; 1409 1410 /* If the queue is now full due to tiny MSS, 1411 * or we can't create another header, discard 1412 * the remainder of the input mbuf but do not 1413 * roll back the work we have done. 1414 */ 1415 if (txq->n_pend_desc + n_fatso_opt_desc + 1416 1 /* header */ + n_dma_seg > txq->max_pkt_desc) { 1417 txq->tso_pdrop_too_many++; 1418 break; 1419 } 1420 if (__predict_false(tso_start_new_packet(txq, &tso, 1421 &id))) { 1422 txq->tso_pdrop_no_rsrc++; 1423 break; 1424 } 1425 } 1426 } 1427 1428 txq->tso_bursts++; 1429 return (id); 1430 } 1431 1432 static void 1433 sfxge_tx_qunblock(struct sfxge_txq *txq) 1434 { 1435 struct sfxge_softc *sc; 1436 struct sfxge_evq *evq __diagused; 1437 1438 sc = txq->sc; 1439 evq = sc->evq[txq->evq_index]; 1440 1441 SFXGE_EVQ_LOCK_ASSERT_OWNED(evq); 1442 1443 if (__predict_false(txq->init_state != SFXGE_TXQ_STARTED)) 1444 return; 1445 1446 SFXGE_TXQ_LOCK(txq); 1447 1448 if (txq->blocked) { 1449 unsigned int level; 1450 1451 level = txq->added - txq->completed; 1452 if (level <= SFXGE_TXQ_UNBLOCK_LEVEL(txq->entries)) { 1453 /* reaped must be in sync with blocked */ 1454 sfxge_tx_qreap(txq); 1455 txq->blocked = 0; 1456 } 1457 } 1458 1459 sfxge_tx_qdpl_service(txq); 1460 /* note: lock has been dropped */ 1461 } 1462 1463 void 1464 sfxge_tx_qflush_done(struct sfxge_txq *txq) 1465 { 1466 1467 txq->flush_state = SFXGE_FLUSH_DONE; 1468 } 1469 1470 static void 1471 sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index) 1472 { 1473 struct sfxge_txq *txq; 1474 struct sfxge_evq *evq; 1475 unsigned int count; 1476 1477 SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); 1478 1479 txq = sc->txq[index]; 1480 evq = sc->evq[txq->evq_index]; 1481 1482 SFXGE_EVQ_LOCK(evq); 1483 SFXGE_TXQ_LOCK(txq); 1484 1485 KASSERT(txq->init_state == SFXGE_TXQ_STARTED, 1486 ("txq->init_state != SFXGE_TXQ_STARTED")); 1487 1488 txq->init_state = SFXGE_TXQ_INITIALIZED; 1489 1490 if (txq->flush_state != SFXGE_FLUSH_DONE) { 1491 txq->flush_state = SFXGE_FLUSH_PENDING; 1492 1493 SFXGE_EVQ_UNLOCK(evq); 1494 SFXGE_TXQ_UNLOCK(txq); 1495 1496 /* Flush the transmit queue. */ 1497 if (efx_tx_qflush(txq->common) != 0) { 1498 log(LOG_ERR, "%s: Flushing Tx queue %u failed\n", 1499 device_get_nameunit(sc->dev), index); 1500 txq->flush_state = SFXGE_FLUSH_DONE; 1501 } else { 1502 count = 0; 1503 do { 1504 /* Spin for 100ms. */ 1505 DELAY(100000); 1506 if (txq->flush_state != SFXGE_FLUSH_PENDING) 1507 break; 1508 } while (++count < 20); 1509 } 1510 SFXGE_EVQ_LOCK(evq); 1511 SFXGE_TXQ_LOCK(txq); 1512 1513 KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED, 1514 ("txq->flush_state == SFXGE_FLUSH_FAILED")); 1515 1516 if (txq->flush_state != SFXGE_FLUSH_DONE) { 1517 /* Flush timeout */ 1518 log(LOG_ERR, "%s: Cannot flush Tx queue %u\n", 1519 device_get_nameunit(sc->dev), index); 1520 txq->flush_state = SFXGE_FLUSH_DONE; 1521 } 1522 } 1523 1524 txq->blocked = 0; 1525 txq->pending = txq->added; 1526 1527 sfxge_tx_qcomplete(txq, evq); 1528 KASSERT(txq->completed == txq->added, 1529 ("txq->completed != txq->added")); 1530 1531 sfxge_tx_qreap(txq); 1532 KASSERT(txq->reaped == txq->completed, 1533 ("txq->reaped != txq->completed")); 1534 1535 txq->added = 0; 1536 txq->pending = 0; 1537 txq->completed = 0; 1538 txq->reaped = 0; 1539 1540 /* Destroy the common code transmit queue. */ 1541 efx_tx_qdestroy(txq->common); 1542 txq->common = NULL; 1543 1544 efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id, 1545 EFX_TXQ_NBUFS(sc->txq_entries)); 1546 1547 txq->hw_cksum_flags = 0; 1548 1549 SFXGE_EVQ_UNLOCK(evq); 1550 SFXGE_TXQ_UNLOCK(txq); 1551 } 1552 1553 /* 1554 * Estimate maximum number of Tx descriptors required for TSO packet. 1555 * With minimum MSS and maximum mbuf length we might need more (even 1556 * than a ring-ful of descriptors), but this should not happen in 1557 * practice except due to deliberate attack. In that case we will 1558 * truncate the output at a packet boundary. 1559 */ 1560 static unsigned int 1561 sfxge_tx_max_pkt_desc(const struct sfxge_softc *sc, enum sfxge_txq_type type, 1562 unsigned int tso_fw_assisted) 1563 { 1564 /* One descriptor for every input fragment */ 1565 unsigned int max_descs = SFXGE_TX_MAPPING_MAX_SEG; 1566 unsigned int sw_tso_max_descs; 1567 unsigned int fa_tso_v1_max_descs = 0; 1568 unsigned int fa_tso_v2_max_descs = 0; 1569 1570 /* Checksum offload Tx option descriptor may be required */ 1571 if (sc->txq_dynamic_cksum_toggle_supported) 1572 max_descs++; 1573 1574 /* VLAN tagging Tx option descriptor may be required */ 1575 if (efx_nic_cfg_get(sc->enp)->enc_hw_tx_insert_vlan_enabled) 1576 max_descs++; 1577 1578 if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) { 1579 /* 1580 * Plus header and payload descriptor for each output segment. 1581 * Minus one since header fragment is already counted. 1582 * Even if FATSO is used, we should be ready to fallback 1583 * to do it in the driver. 1584 */ 1585 sw_tso_max_descs = SFXGE_TSO_MAX_SEGS * 2 - 1; 1586 1587 /* FW assisted TSOv1 requires one more descriptor per segment 1588 * in comparison to SW TSO */ 1589 if (tso_fw_assisted & SFXGE_FATSOV1) 1590 fa_tso_v1_max_descs = 1591 sw_tso_max_descs + SFXGE_TSO_MAX_SEGS; 1592 1593 /* FW assisted TSOv2 requires 3 (2 FATSO plus header) extra 1594 * descriptors per superframe limited by number of DMA fetches 1595 * per packet. The first packet header is already counted. 1596 */ 1597 if (tso_fw_assisted & SFXGE_FATSOV2) { 1598 fa_tso_v2_max_descs = 1599 howmany(SFXGE_TX_MAPPING_MAX_SEG, 1600 EFX_TX_FATSOV2_DMA_SEGS_PER_PKT_MAX - 1) * 1601 (EFX_TX_FATSOV2_OPT_NDESCS + 1) - 1; 1602 } 1603 1604 max_descs += MAX(sw_tso_max_descs, 1605 MAX(fa_tso_v1_max_descs, fa_tso_v2_max_descs)); 1606 } 1607 1608 return (max_descs); 1609 } 1610 1611 static int 1612 sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) 1613 { 1614 struct sfxge_txq *txq; 1615 efsys_mem_t *esmp; 1616 uint16_t flags; 1617 unsigned int tso_fw_assisted; 1618 unsigned int label; 1619 struct sfxge_evq *evq; 1620 unsigned int desc_index; 1621 int rc; 1622 1623 SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); 1624 1625 txq = sc->txq[index]; 1626 esmp = &txq->mem; 1627 evq = sc->evq[txq->evq_index]; 1628 1629 KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED, 1630 ("txq->init_state != SFXGE_TXQ_INITIALIZED")); 1631 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 1632 ("evq->init_state != SFXGE_EVQ_STARTED")); 1633 1634 /* Program the buffer table. */ 1635 if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp, 1636 EFX_TXQ_NBUFS(sc->txq_entries))) != 0) 1637 return (rc); 1638 1639 /* Determine the kind of queue we are creating. */ 1640 tso_fw_assisted = 0; 1641 switch (txq->type) { 1642 case SFXGE_TXQ_NON_CKSUM: 1643 flags = 0; 1644 break; 1645 case SFXGE_TXQ_IP_CKSUM: 1646 flags = EFX_TXQ_CKSUM_IPV4; 1647 break; 1648 case SFXGE_TXQ_IP_TCP_UDP_CKSUM: 1649 flags = EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP; 1650 tso_fw_assisted = sc->tso_fw_assisted; 1651 if (tso_fw_assisted & SFXGE_FATSOV2) 1652 flags |= EFX_TXQ_FATSOV2; 1653 break; 1654 default: 1655 KASSERT(0, ("Impossible TX queue")); 1656 flags = 0; 1657 break; 1658 } 1659 1660 label = (sc->txq_dynamic_cksum_toggle_supported) ? 0 : txq->type; 1661 1662 /* Create the common code transmit queue. */ 1663 if ((rc = efx_tx_qcreate(sc->enp, index, label, esmp, 1664 sc->txq_entries, txq->buf_base_id, flags, evq->common, 1665 &txq->common, &desc_index)) != 0) { 1666 /* Retry if no FATSOv2 resources, otherwise fail */ 1667 if ((rc != ENOSPC) || (~flags & EFX_TXQ_FATSOV2)) 1668 goto fail; 1669 1670 /* Looks like all FATSOv2 contexts are used */ 1671 flags &= ~EFX_TXQ_FATSOV2; 1672 tso_fw_assisted &= ~SFXGE_FATSOV2; 1673 if ((rc = efx_tx_qcreate(sc->enp, index, label, esmp, 1674 sc->txq_entries, txq->buf_base_id, flags, evq->common, 1675 &txq->common, &desc_index)) != 0) 1676 goto fail; 1677 } 1678 1679 /* Initialise queue descriptor indexes */ 1680 txq->added = txq->pending = txq->completed = txq->reaped = desc_index; 1681 1682 SFXGE_TXQ_LOCK(txq); 1683 1684 /* Enable the transmit queue. */ 1685 efx_tx_qenable(txq->common); 1686 1687 txq->init_state = SFXGE_TXQ_STARTED; 1688 txq->flush_state = SFXGE_FLUSH_REQUIRED; 1689 txq->tso_fw_assisted = tso_fw_assisted; 1690 1691 txq->max_pkt_desc = sfxge_tx_max_pkt_desc(sc, txq->type, 1692 tso_fw_assisted); 1693 1694 txq->hw_vlan_tci = 0; 1695 1696 txq->hw_cksum_flags = flags & 1697 (EFX_TXQ_CKSUM_IPV4 | EFX_TXQ_CKSUM_TCPUDP); 1698 1699 SFXGE_TXQ_UNLOCK(txq); 1700 1701 return (0); 1702 1703 fail: 1704 efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id, 1705 EFX_TXQ_NBUFS(sc->txq_entries)); 1706 return (rc); 1707 } 1708 1709 void 1710 sfxge_tx_stop(struct sfxge_softc *sc) 1711 { 1712 int index; 1713 1714 index = sc->txq_count; 1715 while (--index >= 0) 1716 sfxge_tx_qstop(sc, index); 1717 1718 /* Tear down the transmit module */ 1719 efx_tx_fini(sc->enp); 1720 } 1721 1722 int 1723 sfxge_tx_start(struct sfxge_softc *sc) 1724 { 1725 int index; 1726 int rc; 1727 1728 /* Initialize the common code transmit module. */ 1729 if ((rc = efx_tx_init(sc->enp)) != 0) 1730 return (rc); 1731 1732 for (index = 0; index < sc->txq_count; index++) { 1733 if ((rc = sfxge_tx_qstart(sc, index)) != 0) 1734 goto fail; 1735 } 1736 1737 return (0); 1738 1739 fail: 1740 while (--index >= 0) 1741 sfxge_tx_qstop(sc, index); 1742 1743 efx_tx_fini(sc->enp); 1744 1745 return (rc); 1746 } 1747 1748 static int 1749 sfxge_txq_stat_init(struct sfxge_txq *txq, struct sysctl_oid *txq_node) 1750 { 1751 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(txq->sc->dev); 1752 struct sysctl_oid *stat_node; 1753 unsigned int id; 1754 1755 stat_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO, 1756 "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Tx queue statistics"); 1757 if (stat_node == NULL) 1758 return (ENOMEM); 1759 1760 for (id = 0; id < nitems(sfxge_tx_stats); id++) { 1761 SYSCTL_ADD_ULONG( 1762 ctx, SYSCTL_CHILDREN(stat_node), OID_AUTO, 1763 sfxge_tx_stats[id].name, CTLFLAG_RD | CTLFLAG_STATS, 1764 (unsigned long *)((caddr_t)txq + sfxge_tx_stats[id].offset), 1765 ""); 1766 } 1767 1768 return (0); 1769 } 1770 1771 /** 1772 * Destroy a transmit queue. 1773 */ 1774 static void 1775 sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index) 1776 { 1777 struct sfxge_txq *txq; 1778 unsigned int nmaps; 1779 1780 txq = sc->txq[index]; 1781 1782 KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED, 1783 ("txq->init_state != SFXGE_TXQ_INITIALIZED")); 1784 1785 if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) 1786 tso_fini(txq); 1787 1788 /* Free the context arrays. */ 1789 free(txq->pend_desc, M_SFXGE); 1790 nmaps = sc->txq_entries; 1791 while (nmaps-- != 0) 1792 bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map); 1793 free(txq->stmp, M_SFXGE); 1794 1795 /* Release DMA memory mapping. */ 1796 sfxge_dma_free(&txq->mem); 1797 1798 sc->txq[index] = NULL; 1799 1800 SFXGE_TXQ_LOCK_DESTROY(txq); 1801 1802 free(txq, M_SFXGE); 1803 } 1804 1805 static int 1806 sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, 1807 enum sfxge_txq_type type, unsigned int evq_index) 1808 { 1809 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp); 1810 char name[16]; 1811 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1812 struct sysctl_oid *txq_node; 1813 struct sfxge_txq *txq; 1814 struct sfxge_tx_dpl *stdp; 1815 struct sysctl_oid *dpl_node; 1816 efsys_mem_t *esmp; 1817 unsigned int nmaps; 1818 int rc; 1819 1820 txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK); 1821 txq->sc = sc; 1822 txq->entries = sc->txq_entries; 1823 txq->ptr_mask = txq->entries - 1; 1824 1825 sc->txq[txq_index] = txq; 1826 esmp = &txq->mem; 1827 1828 /* Allocate and zero DMA space for the descriptor ring. */ 1829 if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(sc->txq_entries), esmp)) != 0) 1830 return (rc); 1831 1832 /* Allocate buffer table entries. */ 1833 sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(sc->txq_entries), 1834 &txq->buf_base_id); 1835 1836 /* Create a DMA tag for packet mappings. */ 1837 if (bus_dma_tag_create(sc->parent_dma_tag, 1, 1838 encp->enc_tx_dma_desc_boundary, 1839 MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL, 1840 NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG, 1841 encp->enc_tx_dma_desc_size_max, 0, NULL, NULL, 1842 &txq->packet_dma_tag) != 0) { 1843 device_printf(sc->dev, "Couldn't allocate txq DMA tag\n"); 1844 rc = ENOMEM; 1845 goto fail; 1846 } 1847 1848 /* Allocate pending descriptor array for batching writes. */ 1849 txq->pend_desc = malloc(sizeof(efx_desc_t) * sc->txq_entries, 1850 M_SFXGE, M_ZERO | M_WAITOK); 1851 1852 /* Allocate and initialise mbuf DMA mapping array. */ 1853 txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * sc->txq_entries, 1854 M_SFXGE, M_ZERO | M_WAITOK); 1855 for (nmaps = 0; nmaps < sc->txq_entries; nmaps++) { 1856 rc = bus_dmamap_create(txq->packet_dma_tag, 0, 1857 &txq->stmp[nmaps].map); 1858 if (rc != 0) 1859 goto fail2; 1860 } 1861 1862 snprintf(name, sizeof(name), "%u", txq_index); 1863 txq_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->txqs_node), 1864 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); 1865 if (txq_node == NULL) { 1866 rc = ENOMEM; 1867 goto fail_txq_node; 1868 } 1869 1870 if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM && 1871 (rc = tso_init(txq)) != 0) 1872 goto fail3; 1873 1874 /* Initialize the deferred packet list. */ 1875 stdp = &txq->dpl; 1876 stdp->std_put_max = sfxge_tx_dpl_put_max; 1877 stdp->std_get_max = sfxge_tx_dpl_get_max; 1878 stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max; 1879 stdp->std_getp = &stdp->std_get; 1880 1881 SFXGE_TXQ_LOCK_INIT(txq, device_get_nameunit(sc->dev), txq_index); 1882 1883 dpl_node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(txq_node), OID_AUTO, 1884 "dpl", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 1885 "Deferred packet list statistics"); 1886 if (dpl_node == NULL) { 1887 rc = ENOMEM; 1888 goto fail_dpl_node; 1889 } 1890 1891 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO, 1892 "get_count", CTLFLAG_RD | CTLFLAG_STATS, 1893 &stdp->std_get_count, 0, ""); 1894 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO, 1895 "get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS, 1896 &stdp->std_get_non_tcp_count, 0, ""); 1897 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO, 1898 "get_hiwat", CTLFLAG_RD | CTLFLAG_STATS, 1899 &stdp->std_get_hiwat, 0, ""); 1900 SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(dpl_node), OID_AUTO, 1901 "put_hiwat", CTLFLAG_RD | CTLFLAG_STATS, 1902 &stdp->std_put_hiwat, 0, ""); 1903 1904 rc = sfxge_txq_stat_init(txq, txq_node); 1905 if (rc != 0) 1906 goto fail_txq_stat_init; 1907 1908 txq->type = type; 1909 txq->evq_index = evq_index; 1910 txq->init_state = SFXGE_TXQ_INITIALIZED; 1911 1912 return (0); 1913 1914 fail_txq_stat_init: 1915 fail_dpl_node: 1916 fail3: 1917 fail_txq_node: 1918 free(txq->pend_desc, M_SFXGE); 1919 fail2: 1920 while (nmaps-- != 0) 1921 bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map); 1922 free(txq->stmp, M_SFXGE); 1923 bus_dma_tag_destroy(txq->packet_dma_tag); 1924 1925 fail: 1926 sfxge_dma_free(esmp); 1927 1928 return (rc); 1929 } 1930 1931 static int 1932 sfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS) 1933 { 1934 struct sfxge_softc *sc = arg1; 1935 unsigned int id = arg2; 1936 unsigned long sum; 1937 unsigned int index; 1938 1939 /* Sum across all TX queues */ 1940 sum = 0; 1941 for (index = 0; index < sc->txq_count; index++) 1942 sum += *(unsigned long *)((caddr_t)sc->txq[index] + 1943 sfxge_tx_stats[id].offset); 1944 1945 return (SYSCTL_OUT(req, &sum, sizeof(sum))); 1946 } 1947 1948 static void 1949 sfxge_tx_stat_init(struct sfxge_softc *sc) 1950 { 1951 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1952 struct sysctl_oid_list *stat_list; 1953 unsigned int id; 1954 1955 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1956 1957 for (id = 0; id < nitems(sfxge_tx_stats); id++) { 1958 SYSCTL_ADD_PROC(ctx, stat_list, OID_AUTO, 1959 sfxge_tx_stats[id].name, 1960 CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 1961 sc, id, sfxge_tx_stat_handler, "LU", ""); 1962 } 1963 } 1964 1965 uint64_t 1966 sfxge_tx_get_drops(struct sfxge_softc *sc) 1967 { 1968 unsigned int index; 1969 uint64_t drops = 0; 1970 struct sfxge_txq *txq; 1971 1972 /* Sum across all TX queues */ 1973 for (index = 0; index < sc->txq_count; index++) { 1974 txq = sc->txq[index]; 1975 /* 1976 * In theory, txq->put_overflow and txq->netdown_drops 1977 * should use atomic operation and other should be 1978 * obtained under txq lock, but it is just statistics. 1979 */ 1980 drops += txq->drops + txq->get_overflow + 1981 txq->get_non_tcp_overflow + 1982 txq->put_overflow + txq->netdown_drops + 1983 txq->tso_pdrop_too_many + txq->tso_pdrop_no_rsrc; 1984 } 1985 return (drops); 1986 } 1987 1988 void 1989 sfxge_tx_fini(struct sfxge_softc *sc) 1990 { 1991 int index; 1992 1993 index = sc->txq_count; 1994 while (--index >= 0) 1995 sfxge_tx_qfini(sc, index); 1996 1997 sc->txq_count = 0; 1998 } 1999 2000 int 2001 sfxge_tx_init(struct sfxge_softc *sc) 2002 { 2003 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp); 2004 struct sfxge_intr *intr __diagused; 2005 int index; 2006 int rc; 2007 2008 intr = &sc->intr; 2009 2010 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 2011 ("intr->state != SFXGE_INTR_INITIALIZED")); 2012 2013 if (sfxge_tx_dpl_get_max <= 0) { 2014 log(LOG_ERR, "%s=%d must be greater than 0", 2015 SFXGE_PARAM_TX_DPL_GET_MAX, sfxge_tx_dpl_get_max); 2016 rc = EINVAL; 2017 goto fail_tx_dpl_get_max; 2018 } 2019 if (sfxge_tx_dpl_get_non_tcp_max <= 0) { 2020 log(LOG_ERR, "%s=%d must be greater than 0", 2021 SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, 2022 sfxge_tx_dpl_get_non_tcp_max); 2023 rc = EINVAL; 2024 goto fail_tx_dpl_get_non_tcp_max; 2025 } 2026 if (sfxge_tx_dpl_put_max < 0) { 2027 log(LOG_ERR, "%s=%d must be greater or equal to 0", 2028 SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max); 2029 rc = EINVAL; 2030 goto fail_tx_dpl_put_max; 2031 } 2032 2033 sc->txq_count = SFXGE_EVQ0_N_TXQ(sc) - 1 + sc->intr.n_alloc; 2034 2035 sc->tso_fw_assisted = sfxge_tso_fw_assisted; 2036 if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO) || 2037 (!encp->enc_fw_assisted_tso_enabled)) 2038 sc->tso_fw_assisted &= ~SFXGE_FATSOV1; 2039 if ((~encp->enc_features & EFX_FEATURE_FW_ASSISTED_TSO_V2) || 2040 (!encp->enc_fw_assisted_tso_v2_enabled)) 2041 sc->tso_fw_assisted &= ~SFXGE_FATSOV2; 2042 2043 sc->txqs_node = SYSCTL_ADD_NODE(device_get_sysctl_ctx(sc->dev), 2044 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, 2045 "txq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Tx queues"); 2046 if (sc->txqs_node == NULL) { 2047 rc = ENOMEM; 2048 goto fail_txq_node; 2049 } 2050 2051 /* Initialize the transmit queues */ 2052 if (sc->txq_dynamic_cksum_toggle_supported == B_FALSE) { 2053 if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM, 2054 SFXGE_TXQ_NON_CKSUM, 0)) != 0) 2055 goto fail; 2056 2057 if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM, 2058 SFXGE_TXQ_IP_CKSUM, 0)) != 0) 2059 goto fail2; 2060 } 2061 2062 for (index = 0; 2063 index < sc->txq_count - SFXGE_EVQ0_N_TXQ(sc) + 1; 2064 index++) { 2065 if ((rc = sfxge_tx_qinit(sc, SFXGE_EVQ0_N_TXQ(sc) - 1 + index, 2066 SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0) 2067 goto fail3; 2068 } 2069 2070 sfxge_tx_stat_init(sc); 2071 2072 return (0); 2073 2074 fail3: 2075 while (--index >= 0) 2076 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 2077 2078 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM); 2079 2080 fail2: 2081 sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM); 2082 2083 fail: 2084 fail_txq_node: 2085 sc->txq_count = 0; 2086 fail_tx_dpl_put_max: 2087 fail_tx_dpl_get_non_tcp_max: 2088 fail_tx_dpl_get_max: 2089 return (rc); 2090 } 2091