1 /*- 2 * Copyright (c) 2010-2011 Solarflare Communications, Inc. 3 * All rights reserved. 4 * 5 * This software was developed in part by Philip Paeps under contract for 6 * Solarflare Communications, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/mbuf.h> 35 #include <sys/smp.h> 36 #include <sys/socket.h> 37 #include <sys/sysctl.h> 38 39 #include <net/bpf.h> 40 #include <net/ethernet.h> 41 #include <net/if.h> 42 #include <net/if_vlan_var.h> 43 44 #include <netinet/in.h> 45 #include <netinet/ip.h> 46 #include <netinet/ip6.h> 47 #include <netinet/tcp.h> 48 49 #include "common/efx.h" 50 51 #include "sfxge.h" 52 #include "sfxge_tx.h" 53 54 /* Set the block level to ensure there is space to generate a 55 * large number of descriptors for TSO. With minimum MSS and 56 * maximum mbuf length we might need more than a ring-ful of 57 * descriptors, but this should not happen in practice except 58 * due to deliberate attack. In that case we will truncate 59 * the output at a packet boundary. Allow for a reasonable 60 * minimum MSS of 512. 61 */ 62 #define SFXGE_TSO_MAX_DESC ((65535 / 512) * 2 + SFXGE_TX_MAPPING_MAX_SEG - 1) 63 #define SFXGE_TXQ_BLOCK_LEVEL (SFXGE_NDESCS - SFXGE_TSO_MAX_DESC) 64 65 /* Forward declarations. */ 66 static inline void sfxge_tx_qdpl_service(struct sfxge_txq *txq); 67 static void sfxge_tx_qlist_post(struct sfxge_txq *txq); 68 static void sfxge_tx_qunblock(struct sfxge_txq *txq); 69 static int sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, 70 const bus_dma_segment_t *dma_seg, int n_dma_seg); 71 72 void 73 sfxge_tx_qcomplete(struct sfxge_txq *txq) 74 { 75 struct sfxge_softc *sc; 76 struct sfxge_evq *evq; 77 unsigned int completed; 78 79 sc = txq->sc; 80 evq = sc->evq[txq->evq_index]; 81 82 mtx_assert(&evq->lock, MA_OWNED); 83 84 completed = txq->completed; 85 while (completed != txq->pending) { 86 struct sfxge_tx_mapping *stmp; 87 unsigned int id; 88 89 id = completed++ & (SFXGE_NDESCS - 1); 90 91 stmp = &txq->stmp[id]; 92 if (stmp->flags & TX_BUF_UNMAP) { 93 bus_dmamap_unload(txq->packet_dma_tag, stmp->map); 94 if (stmp->flags & TX_BUF_MBUF) { 95 struct mbuf *m = stmp->u.mbuf; 96 do 97 m = m_free(m); 98 while (m != NULL); 99 } else { 100 free(stmp->u.heap_buf, M_SFXGE); 101 } 102 stmp->flags = 0; 103 } 104 } 105 txq->completed = completed; 106 107 /* Check whether we need to unblock the queue. */ 108 mb(); 109 if (txq->blocked) { 110 unsigned int level; 111 112 level = txq->added - txq->completed; 113 if (level <= SFXGE_TXQ_UNBLOCK_LEVEL) 114 sfxge_tx_qunblock(txq); 115 } 116 } 117 118 #ifdef SFXGE_HAVE_MQ 119 120 /* 121 * Reorder the put list and append it to the get list. 122 */ 123 static void 124 sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq) 125 { 126 struct sfxge_tx_dpl *stdp; 127 struct mbuf *mbuf, *get_next, **get_tailp; 128 volatile uintptr_t *putp; 129 uintptr_t put; 130 unsigned int count; 131 132 mtx_assert(&txq->lock, MA_OWNED); 133 134 stdp = &txq->dpl; 135 136 /* Acquire the put list. */ 137 putp = &stdp->std_put; 138 put = atomic_readandclear_ptr(putp); 139 mbuf = (void *)put; 140 141 if (mbuf == NULL) 142 return; 143 144 /* Reverse the put list. */ 145 get_tailp = &mbuf->m_nextpkt; 146 get_next = NULL; 147 148 count = 0; 149 do { 150 struct mbuf *put_next; 151 152 put_next = mbuf->m_nextpkt; 153 mbuf->m_nextpkt = get_next; 154 get_next = mbuf; 155 mbuf = put_next; 156 157 count++; 158 } while (mbuf != NULL); 159 160 /* Append the reversed put list to the get list. */ 161 KASSERT(*get_tailp == NULL, ("*get_tailp != NULL")); 162 *stdp->std_getp = get_next; 163 stdp->std_getp = get_tailp; 164 stdp->std_count += count; 165 } 166 167 #endif /* SFXGE_HAVE_MQ */ 168 169 static void 170 sfxge_tx_qreap(struct sfxge_txq *txq) 171 { 172 mtx_assert(SFXGE_TXQ_LOCK(txq), MA_OWNED); 173 174 txq->reaped = txq->completed; 175 } 176 177 static void 178 sfxge_tx_qlist_post(struct sfxge_txq *txq) 179 { 180 unsigned int old_added; 181 unsigned int level; 182 int rc; 183 184 mtx_assert(SFXGE_TXQ_LOCK(txq), MA_OWNED); 185 186 KASSERT(txq->n_pend_desc != 0, ("txq->n_pend_desc == 0")); 187 KASSERT(txq->n_pend_desc <= SFXGE_TSO_MAX_DESC, 188 ("txq->n_pend_desc too large")); 189 KASSERT(!txq->blocked, ("txq->blocked")); 190 191 old_added = txq->added; 192 193 /* Post the fragment list. */ 194 rc = efx_tx_qpost(txq->common, txq->pend_desc, txq->n_pend_desc, 195 txq->reaped, &txq->added); 196 KASSERT(rc == 0, ("efx_tx_qpost() failed")); 197 198 /* If efx_tx_qpost() had to refragment, our information about 199 * buffers to free may be associated with the wrong 200 * descriptors. 201 */ 202 KASSERT(txq->added - old_added == txq->n_pend_desc, 203 ("efx_tx_qpost() refragmented descriptors")); 204 205 level = txq->added - txq->reaped; 206 KASSERT(level <= SFXGE_NDESCS, ("overfilled TX queue")); 207 208 /* Clear the fragment list. */ 209 txq->n_pend_desc = 0; 210 211 /* Have we reached the block level? */ 212 if (level < SFXGE_TXQ_BLOCK_LEVEL) 213 return; 214 215 /* Reap, and check again */ 216 sfxge_tx_qreap(txq); 217 level = txq->added - txq->reaped; 218 if (level < SFXGE_TXQ_BLOCK_LEVEL) 219 return; 220 221 txq->blocked = 1; 222 223 /* 224 * Avoid a race with completion interrupt handling that could leave 225 * the queue blocked. 226 */ 227 mb(); 228 sfxge_tx_qreap(txq); 229 level = txq->added - txq->reaped; 230 if (level < SFXGE_TXQ_BLOCK_LEVEL) { 231 mb(); 232 txq->blocked = 0; 233 } 234 } 235 236 static int sfxge_tx_queue_mbuf(struct sfxge_txq *txq, struct mbuf *mbuf) 237 { 238 bus_dmamap_t *used_map; 239 bus_dmamap_t map; 240 bus_dma_segment_t dma_seg[SFXGE_TX_MAPPING_MAX_SEG]; 241 unsigned int id; 242 struct sfxge_tx_mapping *stmp; 243 efx_buffer_t *desc; 244 int n_dma_seg; 245 int rc; 246 int i; 247 248 KASSERT(!txq->blocked, ("txq->blocked")); 249 250 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) 251 prefetch_read_many(mbuf->m_data); 252 253 if (txq->init_state != SFXGE_TXQ_STARTED) { 254 rc = EINTR; 255 goto reject; 256 } 257 258 /* Load the packet for DMA. */ 259 id = txq->added & (SFXGE_NDESCS - 1); 260 stmp = &txq->stmp[id]; 261 rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, stmp->map, 262 mbuf, dma_seg, &n_dma_seg, 0); 263 if (rc == EFBIG) { 264 /* Try again. */ 265 struct mbuf *new_mbuf = m_collapse(mbuf, M_NOWAIT, 266 SFXGE_TX_MAPPING_MAX_SEG); 267 if (new_mbuf == NULL) 268 goto reject; 269 ++txq->collapses; 270 mbuf = new_mbuf; 271 rc = bus_dmamap_load_mbuf_sg(txq->packet_dma_tag, 272 stmp->map, mbuf, 273 dma_seg, &n_dma_seg, 0); 274 } 275 if (rc != 0) 276 goto reject; 277 278 /* Make the packet visible to the hardware. */ 279 bus_dmamap_sync(txq->packet_dma_tag, stmp->map, BUS_DMASYNC_PREWRITE); 280 281 used_map = &stmp->map; 282 283 if (mbuf->m_pkthdr.csum_flags & CSUM_TSO) { 284 rc = sfxge_tx_queue_tso(txq, mbuf, dma_seg, n_dma_seg); 285 if (rc < 0) 286 goto reject_mapped; 287 stmp = &txq->stmp[rc]; 288 } else { 289 /* Add the mapping to the fragment list, and set flags 290 * for the buffer. 291 */ 292 i = 0; 293 for (;;) { 294 desc = &txq->pend_desc[i]; 295 desc->eb_addr = dma_seg[i].ds_addr; 296 desc->eb_size = dma_seg[i].ds_len; 297 if (i == n_dma_seg - 1) { 298 desc->eb_eop = 1; 299 break; 300 } 301 desc->eb_eop = 0; 302 i++; 303 304 stmp->flags = 0; 305 if (__predict_false(stmp == 306 &txq->stmp[SFXGE_NDESCS - 1])) 307 stmp = &txq->stmp[0]; 308 else 309 stmp++; 310 } 311 txq->n_pend_desc = n_dma_seg; 312 } 313 314 /* 315 * If the mapping required more than one descriptor 316 * then we need to associate the DMA map with the last 317 * descriptor, not the first. 318 */ 319 if (used_map != &stmp->map) { 320 map = stmp->map; 321 stmp->map = *used_map; 322 *used_map = map; 323 } 324 325 stmp->u.mbuf = mbuf; 326 stmp->flags = TX_BUF_UNMAP | TX_BUF_MBUF; 327 328 /* Post the fragment list. */ 329 sfxge_tx_qlist_post(txq); 330 331 return 0; 332 333 reject_mapped: 334 bus_dmamap_unload(txq->packet_dma_tag, *used_map); 335 reject: 336 /* Drop the packet on the floor. */ 337 m_freem(mbuf); 338 ++txq->drops; 339 340 return rc; 341 } 342 343 #ifdef SFXGE_HAVE_MQ 344 345 /* 346 * Drain the deferred packet list into the transmit queue. 347 */ 348 static void 349 sfxge_tx_qdpl_drain(struct sfxge_txq *txq) 350 { 351 struct sfxge_softc *sc; 352 struct sfxge_tx_dpl *stdp; 353 struct mbuf *mbuf, *next; 354 unsigned int count; 355 unsigned int pushed; 356 int rc; 357 358 mtx_assert(&txq->lock, MA_OWNED); 359 360 sc = txq->sc; 361 stdp = &txq->dpl; 362 pushed = txq->added; 363 364 prefetch_read_many(sc->enp); 365 prefetch_read_many(txq->common); 366 367 mbuf = stdp->std_get; 368 count = stdp->std_count; 369 370 while (count != 0) { 371 KASSERT(mbuf != NULL, ("mbuf == NULL")); 372 373 next = mbuf->m_nextpkt; 374 mbuf->m_nextpkt = NULL; 375 376 ETHER_BPF_MTAP(sc->ifnet, mbuf); /* packet capture */ 377 378 if (next != NULL) 379 prefetch_read_many(next); 380 381 rc = sfxge_tx_queue_mbuf(txq, mbuf); 382 --count; 383 mbuf = next; 384 if (rc != 0) 385 continue; 386 387 if (txq->blocked) 388 break; 389 390 /* Push the fragments to the hardware in batches. */ 391 if (txq->added - pushed >= SFXGE_TX_BATCH) { 392 efx_tx_qpush(txq->common, txq->added); 393 pushed = txq->added; 394 } 395 } 396 397 if (count == 0) { 398 KASSERT(mbuf == NULL, ("mbuf != NULL")); 399 stdp->std_get = NULL; 400 stdp->std_count = 0; 401 stdp->std_getp = &stdp->std_get; 402 } else { 403 stdp->std_get = mbuf; 404 stdp->std_count = count; 405 } 406 407 if (txq->added != pushed) 408 efx_tx_qpush(txq->common, txq->added); 409 410 KASSERT(txq->blocked || stdp->std_count == 0, 411 ("queue unblocked but count is non-zero")); 412 } 413 414 #define SFXGE_TX_QDPL_PENDING(_txq) \ 415 ((_txq)->dpl.std_put != 0) 416 417 /* 418 * Service the deferred packet list. 419 * 420 * NOTE: drops the txq mutex! 421 */ 422 static inline void 423 sfxge_tx_qdpl_service(struct sfxge_txq *txq) 424 { 425 mtx_assert(&txq->lock, MA_OWNED); 426 427 do { 428 if (SFXGE_TX_QDPL_PENDING(txq)) 429 sfxge_tx_qdpl_swizzle(txq); 430 431 if (!txq->blocked) 432 sfxge_tx_qdpl_drain(txq); 433 434 mtx_unlock(&txq->lock); 435 } while (SFXGE_TX_QDPL_PENDING(txq) && 436 mtx_trylock(&txq->lock)); 437 } 438 439 /* 440 * Put a packet on the deferred packet list. 441 * 442 * If we are called with the txq lock held, we put the packet on the "get 443 * list", otherwise we atomically push it on the "put list". The swizzle 444 * function takes care of ordering. 445 * 446 * The length of the put list is bounded by SFXGE_TX_MAX_DEFFERED. We 447 * overload the csum_data field in the mbuf to keep track of this length 448 * because there is no cheap alternative to avoid races. 449 */ 450 static inline int 451 sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked) 452 { 453 struct sfxge_tx_dpl *stdp; 454 455 stdp = &txq->dpl; 456 457 KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL")); 458 459 if (locked) { 460 mtx_assert(&txq->lock, MA_OWNED); 461 462 sfxge_tx_qdpl_swizzle(txq); 463 464 *(stdp->std_getp) = mbuf; 465 stdp->std_getp = &mbuf->m_nextpkt; 466 stdp->std_count++; 467 } else { 468 volatile uintptr_t *putp; 469 uintptr_t old; 470 uintptr_t new; 471 unsigned old_len; 472 473 putp = &stdp->std_put; 474 new = (uintptr_t)mbuf; 475 476 do { 477 old = *putp; 478 if (old) { 479 struct mbuf *mp = (struct mbuf *)old; 480 old_len = mp->m_pkthdr.csum_data; 481 } else 482 old_len = 0; 483 if (old_len >= SFXGE_TX_MAX_DEFERRED) 484 return ENOBUFS; 485 mbuf->m_pkthdr.csum_data = old_len + 1; 486 mbuf->m_nextpkt = (void *)old; 487 } while (atomic_cmpset_ptr(putp, old, new) == 0); 488 } 489 490 return (0); 491 } 492 493 /* 494 * Called from if_transmit - will try to grab the txq lock and enqueue to the 495 * put list if it succeeds, otherwise will push onto the defer list. 496 */ 497 int 498 sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) 499 { 500 int locked; 501 int rc; 502 503 /* 504 * Try to grab the txq lock. If we are able to get the lock, 505 * the packet will be appended to the "get list" of the deferred 506 * packet list. Otherwise, it will be pushed on the "put list". 507 */ 508 locked = mtx_trylock(&txq->lock); 509 510 /* 511 * Can only fail if we weren't able to get the lock. 512 */ 513 if (sfxge_tx_qdpl_put(txq, m, locked) != 0) { 514 KASSERT(!locked, 515 ("sfxge_tx_qdpl_put() failed locked")); 516 rc = ENOBUFS; 517 goto fail; 518 } 519 520 /* 521 * Try to grab the lock again. 522 * 523 * If we are able to get the lock, we need to process the deferred 524 * packet list. If we are not able to get the lock, another thread 525 * is processing the list. 526 */ 527 if (!locked) 528 locked = mtx_trylock(&txq->lock); 529 530 if (locked) { 531 /* Try to service the list. */ 532 sfxge_tx_qdpl_service(txq); 533 /* Lock has been dropped. */ 534 } 535 536 return (0); 537 538 fail: 539 return (rc); 540 541 } 542 543 static void 544 sfxge_tx_qdpl_flush(struct sfxge_txq *txq) 545 { 546 struct sfxge_tx_dpl *stdp = &txq->dpl; 547 struct mbuf *mbuf, *next; 548 549 mtx_lock(&txq->lock); 550 551 sfxge_tx_qdpl_swizzle(txq); 552 for (mbuf = stdp->std_get; mbuf != NULL; mbuf = next) { 553 next = mbuf->m_nextpkt; 554 m_freem(mbuf); 555 } 556 stdp->std_get = NULL; 557 stdp->std_count = 0; 558 stdp->std_getp = &stdp->std_get; 559 560 mtx_unlock(&txq->lock); 561 } 562 563 void 564 sfxge_if_qflush(struct ifnet *ifp) 565 { 566 struct sfxge_softc *sc; 567 int i; 568 569 sc = ifp->if_softc; 570 571 for (i = 0; i < SFXGE_TX_SCALE(sc); i++) 572 sfxge_tx_qdpl_flush(sc->txq[i]); 573 } 574 575 /* 576 * TX start -- called by the stack. 577 */ 578 int 579 sfxge_if_transmit(struct ifnet *ifp, struct mbuf *m) 580 { 581 struct sfxge_softc *sc; 582 struct sfxge_txq *txq; 583 int rc; 584 585 sc = (struct sfxge_softc *)ifp->if_softc; 586 587 KASSERT(ifp->if_flags & IFF_UP, ("interface not up")); 588 589 if (!SFXGE_LINK_UP(sc)) { 590 m_freem(m); 591 return (0); 592 } 593 594 /* Pick the desired transmit queue. */ 595 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) { 596 int index = 0; 597 598 if (m->m_flags & M_FLOWID) { 599 uint32_t hash = m->m_pkthdr.flowid; 600 601 index = sc->rx_indir_table[hash % SFXGE_RX_SCALE_MAX]; 602 } 603 txq = sc->txq[SFXGE_TXQ_IP_TCP_UDP_CKSUM + index]; 604 } else if (m->m_pkthdr.csum_flags & CSUM_DELAY_IP) { 605 txq = sc->txq[SFXGE_TXQ_IP_CKSUM]; 606 } else { 607 txq = sc->txq[SFXGE_TXQ_NON_CKSUM]; 608 } 609 610 rc = sfxge_tx_packet_add(txq, m); 611 612 return (rc); 613 } 614 615 #else /* !SFXGE_HAVE_MQ */ 616 617 static void sfxge_if_start_locked(struct ifnet *ifp) 618 { 619 struct sfxge_softc *sc = ifp->if_softc; 620 struct sfxge_txq *txq; 621 struct mbuf *mbuf; 622 unsigned int pushed[SFXGE_TXQ_NTYPES]; 623 unsigned int q_index; 624 625 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 626 IFF_DRV_RUNNING) 627 return; 628 629 if (!sc->port.link_up) 630 return; 631 632 for (q_index = 0; q_index < SFXGE_TXQ_NTYPES; q_index++) { 633 txq = sc->txq[q_index]; 634 pushed[q_index] = txq->added; 635 } 636 637 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 638 IFQ_DRV_DEQUEUE(&ifp->if_snd, mbuf); 639 if (mbuf == NULL) 640 break; 641 642 ETHER_BPF_MTAP(ifp, mbuf); /* packet capture */ 643 644 /* Pick the desired transmit queue. */ 645 if (mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_TSO)) 646 q_index = SFXGE_TXQ_IP_TCP_UDP_CKSUM; 647 else if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IP) 648 q_index = SFXGE_TXQ_IP_CKSUM; 649 else 650 q_index = SFXGE_TXQ_NON_CKSUM; 651 txq = sc->txq[q_index]; 652 653 if (sfxge_tx_queue_mbuf(txq, mbuf) != 0) 654 continue; 655 656 if (txq->blocked) { 657 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 658 break; 659 } 660 661 /* Push the fragments to the hardware in batches. */ 662 if (txq->added - pushed[q_index] >= SFXGE_TX_BATCH) { 663 efx_tx_qpush(txq->common, txq->added); 664 pushed[q_index] = txq->added; 665 } 666 } 667 668 for (q_index = 0; q_index < SFXGE_TXQ_NTYPES; q_index++) { 669 txq = sc->txq[q_index]; 670 if (txq->added != pushed[q_index]) 671 efx_tx_qpush(txq->common, txq->added); 672 } 673 } 674 675 void sfxge_if_start(struct ifnet *ifp) 676 { 677 struct sfxge_softc *sc = ifp->if_softc; 678 679 mtx_lock(&sc->tx_lock); 680 sfxge_if_start_locked(ifp); 681 mtx_unlock(&sc->tx_lock); 682 } 683 684 static inline void 685 sfxge_tx_qdpl_service(struct sfxge_txq *txq) 686 { 687 struct sfxge_softc *sc = txq->sc; 688 struct ifnet *ifp = sc->ifnet; 689 690 mtx_assert(&sc->tx_lock, MA_OWNED); 691 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 692 sfxge_if_start_locked(ifp); 693 mtx_unlock(&sc->tx_lock); 694 } 695 696 #endif /* SFXGE_HAVE_MQ */ 697 698 /* 699 * Software "TSO". Not quite as good as doing it in hardware, but 700 * still faster than segmenting in the stack. 701 */ 702 703 struct sfxge_tso_state { 704 /* Output position */ 705 unsigned out_len; /* Remaining length in current segment */ 706 unsigned seqnum; /* Current sequence number */ 707 unsigned packet_space; /* Remaining space in current packet */ 708 709 /* Input position */ 710 unsigned dma_seg_i; /* Current DMA segment number */ 711 uint64_t dma_addr; /* DMA address of current position */ 712 unsigned in_len; /* Remaining length in current mbuf */ 713 714 const struct mbuf *mbuf; /* Input mbuf (head of chain) */ 715 u_short protocol; /* Network protocol (after VLAN decap) */ 716 ssize_t nh_off; /* Offset of network header */ 717 ssize_t tcph_off; /* Offset of TCP header */ 718 unsigned header_len; /* Number of bytes of header */ 719 int full_packet_size; /* Number of bytes to put in each outgoing 720 * segment */ 721 }; 722 723 static inline const struct ip *tso_iph(const struct sfxge_tso_state *tso) 724 { 725 KASSERT(tso->protocol == htons(ETHERTYPE_IP), 726 ("tso_iph() in non-IPv4 state")); 727 return (const struct ip *)(tso->mbuf->m_data + tso->nh_off); 728 } 729 static inline const struct ip6_hdr *tso_ip6h(const struct sfxge_tso_state *tso) 730 { 731 KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), 732 ("tso_ip6h() in non-IPv6 state")); 733 return (const struct ip6_hdr *)(tso->mbuf->m_data + tso->nh_off); 734 } 735 static inline const struct tcphdr *tso_tcph(const struct sfxge_tso_state *tso) 736 { 737 return (const struct tcphdr *)(tso->mbuf->m_data + tso->tcph_off); 738 } 739 740 /* Size of preallocated TSO header buffers. Larger blocks must be 741 * allocated from the heap. 742 */ 743 #define TSOH_STD_SIZE 128 744 745 /* At most half the descriptors in the queue at any time will refer to 746 * a TSO header buffer, since they must always be followed by a 747 * payload descriptor referring to an mbuf. 748 */ 749 #define TSOH_COUNT (SFXGE_NDESCS / 2u) 750 #define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) 751 #define TSOH_PAGE_COUNT ((TSOH_COUNT + TSOH_PER_PAGE - 1) / TSOH_PER_PAGE) 752 753 static int tso_init(struct sfxge_txq *txq) 754 { 755 struct sfxge_softc *sc = txq->sc; 756 int i, rc; 757 758 /* Allocate TSO header buffers */ 759 txq->tsoh_buffer = malloc(TSOH_PAGE_COUNT * sizeof(txq->tsoh_buffer[0]), 760 M_SFXGE, M_WAITOK); 761 762 for (i = 0; i < TSOH_PAGE_COUNT; i++) { 763 rc = sfxge_dma_alloc(sc, PAGE_SIZE, &txq->tsoh_buffer[i]); 764 if (rc) 765 goto fail; 766 } 767 768 return 0; 769 770 fail: 771 while (i-- > 0) 772 sfxge_dma_free(&txq->tsoh_buffer[i]); 773 free(txq->tsoh_buffer, M_SFXGE); 774 txq->tsoh_buffer = NULL; 775 return rc; 776 } 777 778 static void tso_fini(struct sfxge_txq *txq) 779 { 780 int i; 781 782 if (txq->tsoh_buffer) { 783 for (i = 0; i < TSOH_PAGE_COUNT; i++) 784 sfxge_dma_free(&txq->tsoh_buffer[i]); 785 free(txq->tsoh_buffer, M_SFXGE); 786 } 787 } 788 789 static void tso_start(struct sfxge_tso_state *tso, struct mbuf *mbuf) 790 { 791 struct ether_header *eh = mtod(mbuf, struct ether_header *); 792 793 tso->mbuf = mbuf; 794 795 /* Find network protocol and header */ 796 tso->protocol = eh->ether_type; 797 if (tso->protocol == htons(ETHERTYPE_VLAN)) { 798 struct ether_vlan_header *veh = 799 mtod(mbuf, struct ether_vlan_header *); 800 tso->protocol = veh->evl_proto; 801 tso->nh_off = sizeof(*veh); 802 } else { 803 tso->nh_off = sizeof(*eh); 804 } 805 806 /* Find TCP header */ 807 if (tso->protocol == htons(ETHERTYPE_IP)) { 808 KASSERT(tso_iph(tso)->ip_p == IPPROTO_TCP, 809 ("TSO required on non-TCP packet")); 810 tso->tcph_off = tso->nh_off + 4 * tso_iph(tso)->ip_hl; 811 } else { 812 KASSERT(tso->protocol == htons(ETHERTYPE_IPV6), 813 ("TSO required on non-IP packet")); 814 KASSERT(tso_ip6h(tso)->ip6_nxt == IPPROTO_TCP, 815 ("TSO required on non-TCP packet")); 816 tso->tcph_off = tso->nh_off + sizeof(struct ip6_hdr); 817 } 818 819 /* We assume all headers are linear in the head mbuf */ 820 tso->header_len = tso->tcph_off + 4 * tso_tcph(tso)->th_off; 821 KASSERT(tso->header_len <= mbuf->m_len, ("packet headers fragmented")); 822 tso->full_packet_size = tso->header_len + mbuf->m_pkthdr.tso_segsz; 823 824 tso->seqnum = ntohl(tso_tcph(tso)->th_seq); 825 826 /* These flags must not be duplicated */ 827 KASSERT(!(tso_tcph(tso)->th_flags & (TH_URG | TH_SYN | TH_RST)), 828 ("incompatible TCP flag on TSO packet")); 829 830 tso->out_len = mbuf->m_pkthdr.len - tso->header_len; 831 } 832 833 /* 834 * tso_fill_packet_with_fragment - form descriptors for the current fragment 835 * 836 * Form descriptors for the current fragment, until we reach the end 837 * of fragment or end-of-packet. Return 0 on success, 1 if not enough 838 * space. 839 */ 840 static void tso_fill_packet_with_fragment(struct sfxge_txq *txq, 841 struct sfxge_tso_state *tso) 842 { 843 efx_buffer_t *desc; 844 int n; 845 846 if (tso->in_len == 0 || tso->packet_space == 0) 847 return; 848 849 KASSERT(tso->in_len > 0, ("TSO input length went negative")); 850 KASSERT(tso->packet_space > 0, ("TSO packet space went negative")); 851 852 n = min(tso->in_len, tso->packet_space); 853 854 tso->packet_space -= n; 855 tso->out_len -= n; 856 tso->in_len -= n; 857 858 desc = &txq->pend_desc[txq->n_pend_desc++]; 859 desc->eb_addr = tso->dma_addr; 860 desc->eb_size = n; 861 desc->eb_eop = tso->out_len == 0 || tso->packet_space == 0; 862 863 tso->dma_addr += n; 864 } 865 866 /* Callback from bus_dmamap_load() for long TSO headers. */ 867 static void tso_map_long_header(void *dma_addr_ret, 868 bus_dma_segment_t *segs, int nseg, 869 int error) 870 { 871 *(uint64_t *)dma_addr_ret = ((__predict_true(error == 0) && 872 __predict_true(nseg == 1)) ? 873 segs->ds_addr : 0); 874 } 875 876 /* 877 * tso_start_new_packet - generate a new header and prepare for the new packet 878 * 879 * Generate a new header and prepare for the new packet. Return 0 on 880 * success, or an error code if failed to alloc header. 881 */ 882 static int tso_start_new_packet(struct sfxge_txq *txq, 883 struct sfxge_tso_state *tso, 884 unsigned int id) 885 { 886 struct sfxge_tx_mapping *stmp = &txq->stmp[id]; 887 struct tcphdr *tsoh_th; 888 unsigned ip_length; 889 caddr_t header; 890 uint64_t dma_addr; 891 bus_dmamap_t map; 892 efx_buffer_t *desc; 893 int rc; 894 895 /* Allocate a DMA-mapped header buffer. */ 896 if (__predict_true(tso->header_len <= TSOH_STD_SIZE)) { 897 unsigned int page_index = (id / 2) / TSOH_PER_PAGE; 898 unsigned int buf_index = (id / 2) % TSOH_PER_PAGE; 899 900 header = (txq->tsoh_buffer[page_index].esm_base + 901 buf_index * TSOH_STD_SIZE); 902 dma_addr = (txq->tsoh_buffer[page_index].esm_addr + 903 buf_index * TSOH_STD_SIZE); 904 map = txq->tsoh_buffer[page_index].esm_map; 905 906 stmp->flags = 0; 907 } else { 908 /* We cannot use bus_dmamem_alloc() as that may sleep */ 909 header = malloc(tso->header_len, M_SFXGE, M_NOWAIT); 910 if (__predict_false(!header)) 911 return ENOMEM; 912 rc = bus_dmamap_load(txq->packet_dma_tag, stmp->map, 913 header, tso->header_len, 914 tso_map_long_header, &dma_addr, 915 BUS_DMA_NOWAIT); 916 if (__predict_false(dma_addr == 0)) { 917 if (rc == 0) { 918 /* Succeeded but got >1 segment */ 919 bus_dmamap_unload(txq->packet_dma_tag, 920 stmp->map); 921 rc = EINVAL; 922 } 923 free(header, M_SFXGE); 924 return rc; 925 } 926 map = stmp->map; 927 928 txq->tso_long_headers++; 929 stmp->u.heap_buf = header; 930 stmp->flags = TX_BUF_UNMAP; 931 } 932 933 tsoh_th = (struct tcphdr *)(header + tso->tcph_off); 934 935 /* Copy and update the headers. */ 936 memcpy(header, tso->mbuf->m_data, tso->header_len); 937 938 tsoh_th->th_seq = htonl(tso->seqnum); 939 tso->seqnum += tso->mbuf->m_pkthdr.tso_segsz; 940 if (tso->out_len > tso->mbuf->m_pkthdr.tso_segsz) { 941 /* This packet will not finish the TSO burst. */ 942 ip_length = tso->full_packet_size - tso->nh_off; 943 tsoh_th->th_flags &= ~(TH_FIN | TH_PUSH); 944 } else { 945 /* This packet will be the last in the TSO burst. */ 946 ip_length = tso->header_len - tso->nh_off + tso->out_len; 947 } 948 949 if (tso->protocol == htons(ETHERTYPE_IP)) { 950 struct ip *tsoh_iph = (struct ip *)(header + tso->nh_off); 951 tsoh_iph->ip_len = htons(ip_length); 952 /* XXX We should increment ip_id, but FreeBSD doesn't 953 * currently allocate extra IDs for multiple segments. 954 */ 955 } else { 956 struct ip6_hdr *tsoh_iph = 957 (struct ip6_hdr *)(header + tso->nh_off); 958 tsoh_iph->ip6_plen = htons(ip_length - sizeof(*tsoh_iph)); 959 } 960 961 /* Make the header visible to the hardware. */ 962 bus_dmamap_sync(txq->packet_dma_tag, map, BUS_DMASYNC_PREWRITE); 963 964 tso->packet_space = tso->mbuf->m_pkthdr.tso_segsz; 965 txq->tso_packets++; 966 967 /* Form a descriptor for this header. */ 968 desc = &txq->pend_desc[txq->n_pend_desc++]; 969 desc->eb_addr = dma_addr; 970 desc->eb_size = tso->header_len; 971 desc->eb_eop = 0; 972 973 return 0; 974 } 975 976 static int 977 sfxge_tx_queue_tso(struct sfxge_txq *txq, struct mbuf *mbuf, 978 const bus_dma_segment_t *dma_seg, int n_dma_seg) 979 { 980 struct sfxge_tso_state tso; 981 unsigned int id, next_id; 982 983 tso_start(&tso, mbuf); 984 985 /* Grab the first payload fragment. */ 986 if (dma_seg->ds_len == tso.header_len) { 987 --n_dma_seg; 988 KASSERT(n_dma_seg, ("no payload found in TSO packet")); 989 ++dma_seg; 990 tso.in_len = dma_seg->ds_len; 991 tso.dma_addr = dma_seg->ds_addr; 992 } else { 993 tso.in_len = dma_seg->ds_len - tso.header_len; 994 tso.dma_addr = dma_seg->ds_addr + tso.header_len; 995 } 996 997 id = txq->added & (SFXGE_NDESCS - 1); 998 if (__predict_false(tso_start_new_packet(txq, &tso, id))) 999 return -1; 1000 1001 while (1) { 1002 id = (id + 1) & (SFXGE_NDESCS - 1); 1003 tso_fill_packet_with_fragment(txq, &tso); 1004 1005 /* Move onto the next fragment? */ 1006 if (tso.in_len == 0) { 1007 --n_dma_seg; 1008 if (n_dma_seg == 0) 1009 break; 1010 ++dma_seg; 1011 tso.in_len = dma_seg->ds_len; 1012 tso.dma_addr = dma_seg->ds_addr; 1013 } 1014 1015 /* End of packet? */ 1016 if (tso.packet_space == 0) { 1017 /* If the queue is now full due to tiny MSS, 1018 * or we can't create another header, discard 1019 * the remainder of the input mbuf but do not 1020 * roll back the work we have done. 1021 */ 1022 if (txq->n_pend_desc > 1023 SFXGE_TSO_MAX_DESC - (1 + SFXGE_TX_MAPPING_MAX_SEG)) 1024 break; 1025 next_id = (id + 1) & (SFXGE_NDESCS - 1); 1026 if (__predict_false(tso_start_new_packet(txq, &tso, 1027 next_id))) 1028 break; 1029 id = next_id; 1030 } 1031 } 1032 1033 txq->tso_bursts++; 1034 return id; 1035 } 1036 1037 static void 1038 sfxge_tx_qunblock(struct sfxge_txq *txq) 1039 { 1040 struct sfxge_softc *sc; 1041 struct sfxge_evq *evq; 1042 1043 sc = txq->sc; 1044 evq = sc->evq[txq->evq_index]; 1045 1046 mtx_assert(&evq->lock, MA_OWNED); 1047 1048 if (txq->init_state != SFXGE_TXQ_STARTED) 1049 return; 1050 1051 mtx_lock(SFXGE_TXQ_LOCK(txq)); 1052 1053 if (txq->blocked) { 1054 unsigned int level; 1055 1056 level = txq->added - txq->completed; 1057 if (level <= SFXGE_TXQ_UNBLOCK_LEVEL) 1058 txq->blocked = 0; 1059 } 1060 1061 sfxge_tx_qdpl_service(txq); 1062 /* note: lock has been dropped */ 1063 } 1064 1065 void 1066 sfxge_tx_qflush_done(struct sfxge_txq *txq) 1067 { 1068 1069 txq->flush_state = SFXGE_FLUSH_DONE; 1070 } 1071 1072 static void 1073 sfxge_tx_qstop(struct sfxge_softc *sc, unsigned int index) 1074 { 1075 struct sfxge_txq *txq; 1076 struct sfxge_evq *evq; 1077 unsigned int count; 1078 1079 txq = sc->txq[index]; 1080 evq = sc->evq[txq->evq_index]; 1081 1082 mtx_lock(SFXGE_TXQ_LOCK(txq)); 1083 1084 KASSERT(txq->init_state == SFXGE_TXQ_STARTED, 1085 ("txq->init_state != SFXGE_TXQ_STARTED")); 1086 1087 txq->init_state = SFXGE_TXQ_INITIALIZED; 1088 txq->flush_state = SFXGE_FLUSH_PENDING; 1089 1090 /* Flush the transmit queue. */ 1091 efx_tx_qflush(txq->common); 1092 1093 mtx_unlock(SFXGE_TXQ_LOCK(txq)); 1094 1095 count = 0; 1096 do { 1097 /* Spin for 100ms. */ 1098 DELAY(100000); 1099 1100 if (txq->flush_state != SFXGE_FLUSH_PENDING) 1101 break; 1102 } while (++count < 20); 1103 1104 mtx_lock(&evq->lock); 1105 mtx_lock(SFXGE_TXQ_LOCK(txq)); 1106 1107 KASSERT(txq->flush_state != SFXGE_FLUSH_FAILED, 1108 ("txq->flush_state == SFXGE_FLUSH_FAILED")); 1109 1110 txq->flush_state = SFXGE_FLUSH_DONE; 1111 1112 txq->blocked = 0; 1113 txq->pending = txq->added; 1114 1115 sfxge_tx_qcomplete(txq); 1116 KASSERT(txq->completed == txq->added, 1117 ("txq->completed != txq->added")); 1118 1119 sfxge_tx_qreap(txq); 1120 KASSERT(txq->reaped == txq->completed, 1121 ("txq->reaped != txq->completed")); 1122 1123 txq->added = 0; 1124 txq->pending = 0; 1125 txq->completed = 0; 1126 txq->reaped = 0; 1127 1128 /* Destroy the common code transmit queue. */ 1129 efx_tx_qdestroy(txq->common); 1130 txq->common = NULL; 1131 1132 efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id, 1133 EFX_TXQ_NBUFS(SFXGE_NDESCS)); 1134 1135 mtx_unlock(&evq->lock); 1136 mtx_unlock(SFXGE_TXQ_LOCK(txq)); 1137 } 1138 1139 static int 1140 sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) 1141 { 1142 struct sfxge_txq *txq; 1143 efsys_mem_t *esmp; 1144 uint16_t flags; 1145 struct sfxge_evq *evq; 1146 int rc; 1147 1148 txq = sc->txq[index]; 1149 esmp = &txq->mem; 1150 evq = sc->evq[txq->evq_index]; 1151 1152 KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED, 1153 ("txq->init_state != SFXGE_TXQ_INITIALIZED")); 1154 KASSERT(evq->init_state == SFXGE_EVQ_STARTED, 1155 ("evq->init_state != SFXGE_EVQ_STARTED")); 1156 1157 /* Program the buffer table. */ 1158 if ((rc = efx_sram_buf_tbl_set(sc->enp, txq->buf_base_id, esmp, 1159 EFX_TXQ_NBUFS(SFXGE_NDESCS))) != 0) 1160 return rc; 1161 1162 /* Determine the kind of queue we are creating. */ 1163 switch (txq->type) { 1164 case SFXGE_TXQ_NON_CKSUM: 1165 flags = 0; 1166 break; 1167 case SFXGE_TXQ_IP_CKSUM: 1168 flags = EFX_CKSUM_IPV4; 1169 break; 1170 case SFXGE_TXQ_IP_TCP_UDP_CKSUM: 1171 flags = EFX_CKSUM_IPV4 | EFX_CKSUM_TCPUDP; 1172 break; 1173 default: 1174 KASSERT(0, ("Impossible TX queue")); 1175 flags = 0; 1176 break; 1177 } 1178 1179 /* Create the common code transmit queue. */ 1180 if ((rc = efx_tx_qcreate(sc->enp, index, index, esmp, 1181 SFXGE_NDESCS, txq->buf_base_id, flags, evq->common, 1182 &txq->common)) != 0) 1183 goto fail; 1184 1185 mtx_lock(SFXGE_TXQ_LOCK(txq)); 1186 1187 /* Enable the transmit queue. */ 1188 efx_tx_qenable(txq->common); 1189 1190 txq->init_state = SFXGE_TXQ_STARTED; 1191 1192 mtx_unlock(SFXGE_TXQ_LOCK(txq)); 1193 1194 return (0); 1195 1196 fail: 1197 efx_sram_buf_tbl_clear(sc->enp, txq->buf_base_id, 1198 EFX_TXQ_NBUFS(SFXGE_NDESCS)); 1199 return rc; 1200 } 1201 1202 void 1203 sfxge_tx_stop(struct sfxge_softc *sc) 1204 { 1205 const efx_nic_cfg_t *encp; 1206 int index; 1207 1208 index = SFXGE_TX_SCALE(sc); 1209 while (--index >= 0) 1210 sfxge_tx_qstop(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 1211 1212 sfxge_tx_qstop(sc, SFXGE_TXQ_IP_CKSUM); 1213 1214 encp = efx_nic_cfg_get(sc->enp); 1215 sfxge_tx_qstop(sc, SFXGE_TXQ_NON_CKSUM); 1216 1217 /* Tear down the transmit module */ 1218 efx_tx_fini(sc->enp); 1219 } 1220 1221 int 1222 sfxge_tx_start(struct sfxge_softc *sc) 1223 { 1224 int index; 1225 int rc; 1226 1227 /* Initialize the common code transmit module. */ 1228 if ((rc = efx_tx_init(sc->enp)) != 0) 1229 return (rc); 1230 1231 if ((rc = sfxge_tx_qstart(sc, SFXGE_TXQ_NON_CKSUM)) != 0) 1232 goto fail; 1233 1234 if ((rc = sfxge_tx_qstart(sc, SFXGE_TXQ_IP_CKSUM)) != 0) 1235 goto fail2; 1236 1237 for (index = 0; index < SFXGE_TX_SCALE(sc); index++) { 1238 if ((rc = sfxge_tx_qstart(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + 1239 index)) != 0) 1240 goto fail3; 1241 } 1242 1243 return (0); 1244 1245 fail3: 1246 while (--index >= 0) 1247 sfxge_tx_qstop(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 1248 1249 sfxge_tx_qstop(sc, SFXGE_TXQ_IP_CKSUM); 1250 1251 fail2: 1252 sfxge_tx_qstop(sc, SFXGE_TXQ_NON_CKSUM); 1253 1254 fail: 1255 efx_tx_fini(sc->enp); 1256 1257 return (rc); 1258 } 1259 1260 /** 1261 * Destroy a transmit queue. 1262 */ 1263 static void 1264 sfxge_tx_qfini(struct sfxge_softc *sc, unsigned int index) 1265 { 1266 struct sfxge_txq *txq; 1267 unsigned int nmaps = SFXGE_NDESCS; 1268 1269 txq = sc->txq[index]; 1270 1271 KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED, 1272 ("txq->init_state != SFXGE_TXQ_INITIALIZED")); 1273 1274 if (txq->type == SFXGE_TXQ_IP_TCP_UDP_CKSUM) 1275 tso_fini(txq); 1276 1277 /* Free the context arrays. */ 1278 free(txq->pend_desc, M_SFXGE); 1279 while (nmaps--) 1280 bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map); 1281 free(txq->stmp, M_SFXGE); 1282 1283 /* Release DMA memory mapping. */ 1284 sfxge_dma_free(&txq->mem); 1285 1286 sc->txq[index] = NULL; 1287 1288 #ifdef SFXGE_HAVE_MQ 1289 mtx_destroy(&txq->lock); 1290 #endif 1291 1292 free(txq, M_SFXGE); 1293 } 1294 1295 static int 1296 sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index, 1297 enum sfxge_txq_type type, unsigned int evq_index) 1298 { 1299 struct sfxge_txq *txq; 1300 struct sfxge_evq *evq; 1301 #ifdef SFXGE_HAVE_MQ 1302 struct sfxge_tx_dpl *stdp; 1303 #endif 1304 efsys_mem_t *esmp; 1305 unsigned int nmaps; 1306 int rc; 1307 1308 txq = malloc(sizeof(struct sfxge_txq), M_SFXGE, M_ZERO | M_WAITOK); 1309 txq->sc = sc; 1310 1311 sc->txq[txq_index] = txq; 1312 esmp = &txq->mem; 1313 1314 evq = sc->evq[evq_index]; 1315 1316 /* Allocate and zero DMA space for the descriptor ring. */ 1317 if ((rc = sfxge_dma_alloc(sc, EFX_TXQ_SIZE(SFXGE_NDESCS), esmp)) != 0) 1318 return (rc); 1319 (void)memset(esmp->esm_base, 0, EFX_TXQ_SIZE(SFXGE_NDESCS)); 1320 1321 /* Allocate buffer table entries. */ 1322 sfxge_sram_buf_tbl_alloc(sc, EFX_TXQ_NBUFS(SFXGE_NDESCS), 1323 &txq->buf_base_id); 1324 1325 /* Create a DMA tag for packet mappings. */ 1326 if (bus_dma_tag_create(sc->parent_dma_tag, 1, 0x1000, 1327 MIN(0x3FFFFFFFFFFFUL, BUS_SPACE_MAXADDR), BUS_SPACE_MAXADDR, NULL, 1328 NULL, 0x11000, SFXGE_TX_MAPPING_MAX_SEG, 0x1000, 0, NULL, NULL, 1329 &txq->packet_dma_tag) != 0) { 1330 device_printf(sc->dev, "Couldn't allocate txq DMA tag\n"); 1331 rc = ENOMEM; 1332 goto fail; 1333 } 1334 1335 /* Allocate pending descriptor array for batching writes. */ 1336 txq->pend_desc = malloc(sizeof(efx_buffer_t) * SFXGE_NDESCS, 1337 M_SFXGE, M_ZERO | M_WAITOK); 1338 1339 /* Allocate and initialise mbuf DMA mapping array. */ 1340 txq->stmp = malloc(sizeof(struct sfxge_tx_mapping) * SFXGE_NDESCS, 1341 M_SFXGE, M_ZERO | M_WAITOK); 1342 for (nmaps = 0; nmaps < SFXGE_NDESCS; nmaps++) { 1343 rc = bus_dmamap_create(txq->packet_dma_tag, 0, 1344 &txq->stmp[nmaps].map); 1345 if (rc != 0) 1346 goto fail2; 1347 } 1348 1349 if (type == SFXGE_TXQ_IP_TCP_UDP_CKSUM && 1350 (rc = tso_init(txq)) != 0) 1351 goto fail3; 1352 1353 #ifdef SFXGE_HAVE_MQ 1354 /* Initialize the deferred packet list. */ 1355 stdp = &txq->dpl; 1356 stdp->std_getp = &stdp->std_get; 1357 1358 mtx_init(&txq->lock, "txq", NULL, MTX_DEF); 1359 #endif 1360 1361 txq->type = type; 1362 txq->evq_index = evq_index; 1363 txq->txq_index = txq_index; 1364 txq->init_state = SFXGE_TXQ_INITIALIZED; 1365 1366 return (0); 1367 1368 fail3: 1369 free(txq->pend_desc, M_SFXGE); 1370 fail2: 1371 while (nmaps--) 1372 bus_dmamap_destroy(txq->packet_dma_tag, txq->stmp[nmaps].map); 1373 free(txq->stmp, M_SFXGE); 1374 bus_dma_tag_destroy(txq->packet_dma_tag); 1375 1376 fail: 1377 sfxge_dma_free(esmp); 1378 1379 return (rc); 1380 } 1381 1382 static const struct { 1383 const char *name; 1384 size_t offset; 1385 } sfxge_tx_stats[] = { 1386 #define SFXGE_TX_STAT(name, member) \ 1387 { #name, offsetof(struct sfxge_txq, member) } 1388 SFXGE_TX_STAT(tso_bursts, tso_bursts), 1389 SFXGE_TX_STAT(tso_packets, tso_packets), 1390 SFXGE_TX_STAT(tso_long_headers, tso_long_headers), 1391 SFXGE_TX_STAT(tx_collapses, collapses), 1392 SFXGE_TX_STAT(tx_drops, drops), 1393 }; 1394 1395 static int 1396 sfxge_tx_stat_handler(SYSCTL_HANDLER_ARGS) 1397 { 1398 struct sfxge_softc *sc = arg1; 1399 unsigned int id = arg2; 1400 unsigned long sum; 1401 unsigned int index; 1402 1403 /* Sum across all TX queues */ 1404 sum = 0; 1405 for (index = 0; 1406 index < SFXGE_TXQ_IP_TCP_UDP_CKSUM + SFXGE_TX_SCALE(sc); 1407 index++) 1408 sum += *(unsigned long *)((caddr_t)sc->txq[index] + 1409 sfxge_tx_stats[id].offset); 1410 1411 return SYSCTL_OUT(req, &sum, sizeof(sum)); 1412 } 1413 1414 static void 1415 sfxge_tx_stat_init(struct sfxge_softc *sc) 1416 { 1417 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 1418 struct sysctl_oid_list *stat_list; 1419 unsigned int id; 1420 1421 stat_list = SYSCTL_CHILDREN(sc->stats_node); 1422 1423 for (id = 0; 1424 id < sizeof(sfxge_tx_stats) / sizeof(sfxge_tx_stats[0]); 1425 id++) { 1426 SYSCTL_ADD_PROC( 1427 ctx, stat_list, 1428 OID_AUTO, sfxge_tx_stats[id].name, 1429 CTLTYPE_ULONG|CTLFLAG_RD, 1430 sc, id, sfxge_tx_stat_handler, "LU", 1431 ""); 1432 } 1433 } 1434 1435 void 1436 sfxge_tx_fini(struct sfxge_softc *sc) 1437 { 1438 int index; 1439 1440 index = SFXGE_TX_SCALE(sc); 1441 while (--index >= 0) 1442 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 1443 1444 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM); 1445 sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM); 1446 } 1447 1448 1449 int 1450 sfxge_tx_init(struct sfxge_softc *sc) 1451 { 1452 struct sfxge_intr *intr; 1453 int index; 1454 int rc; 1455 1456 intr = &sc->intr; 1457 1458 KASSERT(intr->state == SFXGE_INTR_INITIALIZED, 1459 ("intr->state != SFXGE_INTR_INITIALIZED")); 1460 1461 /* Initialize the transmit queues */ 1462 if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_NON_CKSUM, 1463 SFXGE_TXQ_NON_CKSUM, 0)) != 0) 1464 goto fail; 1465 1466 if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_CKSUM, 1467 SFXGE_TXQ_IP_CKSUM, 0)) != 0) 1468 goto fail2; 1469 1470 for (index = 0; index < SFXGE_TX_SCALE(sc); index++) { 1471 if ((rc = sfxge_tx_qinit(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index, 1472 SFXGE_TXQ_IP_TCP_UDP_CKSUM, index)) != 0) 1473 goto fail3; 1474 } 1475 1476 sfxge_tx_stat_init(sc); 1477 1478 return (0); 1479 1480 fail3: 1481 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_CKSUM); 1482 1483 while (--index >= 0) 1484 sfxge_tx_qfini(sc, SFXGE_TXQ_IP_TCP_UDP_CKSUM + index); 1485 1486 fail2: 1487 sfxge_tx_qfini(sc, SFXGE_TXQ_NON_CKSUM); 1488 1489 fail: 1490 return (rc); 1491 } 1492