1 /****************************************************************************** 2 3 Copyright (c) 2001-2014, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /* Shared PCI config read/write */ 85 inline u16 86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 87 { 88 u16 value; 89 90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 91 reg, 2); 92 93 return (value); 94 } 95 96 inline void 97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 98 { 99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 100 reg, value, 2); 101 102 return; 103 } 104 105 /********************************************************************* 106 * Local Function prototypes 107 *********************************************************************/ 108 static void ixgbe_setup_transmit_ring(struct tx_ring *); 109 static void ixgbe_free_transmit_buffers(struct tx_ring *); 110 static int ixgbe_setup_receive_ring(struct rx_ring *); 111 static void ixgbe_free_receive_buffers(struct rx_ring *); 112 113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 #ifdef IXGBE_LEGACY_TX 128 /********************************************************************* 129 * Transmit entry point 130 * 131 * ixgbe_start is called by the stack to initiate a transmit. 132 * The driver will remain in this routine as long as there are 133 * packets to transmit and transmit resources are available. 134 * In case resources are not available stack is notified and 135 * the packet is requeued. 136 **********************************************************************/ 137 138 void 139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 140 { 141 struct mbuf *m_head; 142 struct adapter *adapter = txr->adapter; 143 144 IXGBE_TX_LOCK_ASSERT(txr); 145 146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 147 return; 148 if (!adapter->link_active) 149 return; 150 151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 153 break; 154 155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 156 if (m_head == NULL) 157 break; 158 159 if (ixgbe_xmit(txr, &m_head)) { 160 if (m_head != NULL) 161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 162 break; 163 } 164 /* Send a copy of the frame to the BPF listener */ 165 ETHER_BPF_MTAP(ifp, m_head); 166 } 167 return; 168 } 169 170 /* 171 * Legacy TX start - called by the stack, this 172 * always uses the first tx ring, and should 173 * not be used with multiqueue tx enabled. 174 */ 175 void 176 ixgbe_start(struct ifnet *ifp) 177 { 178 struct adapter *adapter = ifp->if_softc; 179 struct tx_ring *txr = adapter->tx_rings; 180 181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 182 IXGBE_TX_LOCK(txr); 183 ixgbe_start_locked(txr, ifp); 184 IXGBE_TX_UNLOCK(txr); 185 } 186 return; 187 } 188 189 #else /* ! IXGBE_LEGACY_TX */ 190 191 /* 192 ** Multiqueue Transmit driver 193 ** 194 */ 195 int 196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 197 { 198 struct adapter *adapter = ifp->if_softc; 199 struct ix_queue *que; 200 struct tx_ring *txr; 201 int i, err = 0; 202 #ifdef RSS 203 uint32_t bucket_id; 204 #endif 205 206 /* 207 * When doing RSS, map it to the same outbound queue 208 * as the incoming flow would be mapped to. 209 * 210 * If everything is setup correctly, it should be the 211 * same bucket that the current CPU we're on is. 212 */ 213 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 214 #ifdef RSS 215 if (rss_hash2bucket(m->m_pkthdr.flowid, 216 M_HASHTYPE_GET(m), &bucket_id) == 0) 217 /* TODO: spit out something if bucket_id > num_queues? */ 218 i = bucket_id % adapter->num_queues; 219 else 220 #endif 221 i = m->m_pkthdr.flowid % adapter->num_queues; 222 } else 223 i = curcpu % adapter->num_queues; 224 225 /* Check for a hung queue and pick alternative */ 226 if (((1 << i) & adapter->active_queues) == 0) 227 i = ffsl(adapter->active_queues); 228 229 txr = &adapter->tx_rings[i]; 230 que = &adapter->queues[i]; 231 232 err = drbr_enqueue(ifp, txr->br, m); 233 if (err) 234 return (err); 235 if (IXGBE_TX_TRYLOCK(txr)) { 236 ixgbe_mq_start_locked(ifp, txr); 237 IXGBE_TX_UNLOCK(txr); 238 } else 239 taskqueue_enqueue(que->tq, &txr->txq_task); 240 241 return (0); 242 } 243 244 int 245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 246 { 247 struct adapter *adapter = txr->adapter; 248 struct mbuf *next; 249 int enqueued = 0, err = 0; 250 251 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 252 adapter->link_active == 0) 253 return (ENETDOWN); 254 255 /* Process the queue */ 256 #if __FreeBSD_version < 901504 257 next = drbr_dequeue(ifp, txr->br); 258 while (next != NULL) { 259 if ((err = ixgbe_xmit(txr, &next)) != 0) { 260 if (next != NULL) 261 err = drbr_enqueue(ifp, txr->br, next); 262 #else 263 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 264 if ((err = ixgbe_xmit(txr, &next)) != 0) { 265 if (next == NULL) { 266 drbr_advance(ifp, txr->br); 267 } else { 268 drbr_putback(ifp, txr->br, next); 269 } 270 #endif 271 break; 272 } 273 #if __FreeBSD_version >= 901504 274 drbr_advance(ifp, txr->br); 275 #endif 276 enqueued++; 277 #if 0 // this is VF-only 278 #if __FreeBSD_version >= 1100036 279 if (next->m_flags & M_MCAST) 280 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 281 #endif 282 #endif 283 /* Send a copy of the frame to the BPF listener */ 284 ETHER_BPF_MTAP(ifp, next); 285 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 286 break; 287 #if __FreeBSD_version < 901504 288 next = drbr_dequeue(ifp, txr->br); 289 #endif 290 } 291 292 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 293 ixgbe_txeof(txr); 294 295 return (err); 296 } 297 298 /* 299 * Called from a taskqueue to drain queued transmit packets. 300 */ 301 void 302 ixgbe_deferred_mq_start(void *arg, int pending) 303 { 304 struct tx_ring *txr = arg; 305 struct adapter *adapter = txr->adapter; 306 struct ifnet *ifp = adapter->ifp; 307 308 IXGBE_TX_LOCK(txr); 309 if (!drbr_empty(ifp, txr->br)) 310 ixgbe_mq_start_locked(ifp, txr); 311 IXGBE_TX_UNLOCK(txr); 312 } 313 314 /* 315 ** Flush all ring buffers 316 */ 317 void 318 ixgbe_qflush(struct ifnet *ifp) 319 { 320 struct adapter *adapter = ifp->if_softc; 321 struct tx_ring *txr = adapter->tx_rings; 322 struct mbuf *m; 323 324 for (int i = 0; i < adapter->num_queues; i++, txr++) { 325 IXGBE_TX_LOCK(txr); 326 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 327 m_freem(m); 328 IXGBE_TX_UNLOCK(txr); 329 } 330 if_qflush(ifp); 331 } 332 #endif /* IXGBE_LEGACY_TX */ 333 334 335 /********************************************************************* 336 * 337 * This routine maps the mbufs to tx descriptors, allowing the 338 * TX engine to transmit the packets. 339 * - return 0 on success, positive on failure 340 * 341 **********************************************************************/ 342 343 static int 344 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 345 { 346 struct adapter *adapter = txr->adapter; 347 u32 olinfo_status = 0, cmd_type_len; 348 int i, j, error, nsegs; 349 int first; 350 bool remap = TRUE; 351 struct mbuf *m_head; 352 bus_dma_segment_t segs[adapter->num_segs]; 353 bus_dmamap_t map; 354 struct ixgbe_tx_buf *txbuf; 355 union ixgbe_adv_tx_desc *txd = NULL; 356 357 m_head = *m_headp; 358 359 /* Basic descriptor defines */ 360 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 361 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 362 363 if (m_head->m_flags & M_VLANTAG) 364 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 365 366 /* 367 * Important to capture the first descriptor 368 * used because it will contain the index of 369 * the one we tell the hardware to report back 370 */ 371 first = txr->next_avail_desc; 372 txbuf = &txr->tx_buffers[first]; 373 map = txbuf->map; 374 375 /* 376 * Map the packet for DMA. 377 */ 378 retry: 379 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 380 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 381 382 if (__predict_false(error)) { 383 struct mbuf *m; 384 385 switch (error) { 386 case EFBIG: 387 /* Try it again? - one try */ 388 if (remap == TRUE) { 389 remap = FALSE; 390 m = m_defrag(*m_headp, M_NOWAIT); 391 if (m == NULL) { 392 adapter->mbuf_defrag_failed++; 393 m_freem(*m_headp); 394 *m_headp = NULL; 395 return (ENOBUFS); 396 } 397 *m_headp = m; 398 goto retry; 399 } else 400 return (error); 401 case ENOMEM: 402 txr->no_tx_dma_setup++; 403 return (error); 404 default: 405 txr->no_tx_dma_setup++; 406 m_freem(*m_headp); 407 *m_headp = NULL; 408 return (error); 409 } 410 } 411 412 /* Make certain there are enough descriptors */ 413 if (nsegs > txr->tx_avail - 2) { 414 txr->no_desc_avail++; 415 bus_dmamap_unload(txr->txtag, map); 416 return (ENOBUFS); 417 } 418 m_head = *m_headp; 419 420 /* 421 ** Set up the appropriate offload context 422 ** this will consume the first descriptor 423 */ 424 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 425 if (__predict_false(error)) { 426 if (error == ENOBUFS) 427 *m_headp = NULL; 428 return (error); 429 } 430 431 #ifdef IXGBE_FDIR 432 /* Do the flow director magic */ 433 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 434 ++txr->atr_count; 435 if (txr->atr_count >= atr_sample_rate) { 436 ixgbe_atr(txr, m_head); 437 txr->atr_count = 0; 438 } 439 } 440 #endif 441 442 olinfo_status |= IXGBE_ADVTXD_CC; 443 i = txr->next_avail_desc; 444 for (j = 0; j < nsegs; j++) { 445 bus_size_t seglen; 446 bus_addr_t segaddr; 447 448 txbuf = &txr->tx_buffers[i]; 449 txd = &txr->tx_base[i]; 450 seglen = segs[j].ds_len; 451 segaddr = htole64(segs[j].ds_addr); 452 453 txd->read.buffer_addr = segaddr; 454 txd->read.cmd_type_len = htole32(txr->txd_cmd | 455 cmd_type_len |seglen); 456 txd->read.olinfo_status = htole32(olinfo_status); 457 458 if (++i == txr->num_desc) 459 i = 0; 460 } 461 462 txd->read.cmd_type_len |= 463 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 464 txr->tx_avail -= nsegs; 465 txr->next_avail_desc = i; 466 467 txbuf->m_head = m_head; 468 /* 469 ** Here we swap the map so the last descriptor, 470 ** which gets the completion interrupt has the 471 ** real map, and the first descriptor gets the 472 ** unused map from this descriptor. 473 */ 474 txr->tx_buffers[first].map = txbuf->map; 475 txbuf->map = map; 476 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 477 478 /* Set the EOP descriptor that will be marked done */ 479 txbuf = &txr->tx_buffers[first]; 480 txbuf->eop = txd; 481 482 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 483 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 484 /* 485 * Advance the Transmit Descriptor Tail (Tdt), this tells the 486 * hardware that this frame is available to transmit. 487 */ 488 ++txr->total_packets; 489 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 490 491 /* Mark queue as having work */ 492 if (txr->busy == 0) 493 txr->busy = 1; 494 495 return (0); 496 497 } 498 499 500 /********************************************************************* 501 * 502 * Allocate memory for tx_buffer structures. The tx_buffer stores all 503 * the information needed to transmit a packet on the wire. This is 504 * called only once at attach, setup is done every reset. 505 * 506 **********************************************************************/ 507 int 508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 509 { 510 struct adapter *adapter = txr->adapter; 511 device_t dev = adapter->dev; 512 struct ixgbe_tx_buf *txbuf; 513 int error, i; 514 515 /* 516 * Setup DMA descriptor areas. 517 */ 518 if ((error = bus_dma_tag_create( 519 bus_get_dma_tag(adapter->dev), /* parent */ 520 1, 0, /* alignment, bounds */ 521 BUS_SPACE_MAXADDR, /* lowaddr */ 522 BUS_SPACE_MAXADDR, /* highaddr */ 523 NULL, NULL, /* filter, filterarg */ 524 IXGBE_TSO_SIZE, /* maxsize */ 525 adapter->num_segs, /* nsegments */ 526 PAGE_SIZE, /* maxsegsize */ 527 0, /* flags */ 528 NULL, /* lockfunc */ 529 NULL, /* lockfuncarg */ 530 &txr->txtag))) { 531 device_printf(dev,"Unable to allocate TX DMA tag\n"); 532 goto fail; 533 } 534 535 if (!(txr->tx_buffers = 536 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 537 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 538 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 539 error = ENOMEM; 540 goto fail; 541 } 542 543 /* Create the descriptor buffer dma maps */ 544 txbuf = txr->tx_buffers; 545 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 546 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 547 if (error != 0) { 548 device_printf(dev, "Unable to create TX DMA map\n"); 549 goto fail; 550 } 551 } 552 553 return 0; 554 fail: 555 /* We free all, it handles case where we are in the middle */ 556 ixgbe_free_transmit_structures(adapter); 557 return (error); 558 } 559 560 /********************************************************************* 561 * 562 * Initialize a transmit ring. 563 * 564 **********************************************************************/ 565 static void 566 ixgbe_setup_transmit_ring(struct tx_ring *txr) 567 { 568 struct adapter *adapter = txr->adapter; 569 struct ixgbe_tx_buf *txbuf; 570 int i; 571 #ifdef DEV_NETMAP 572 struct netmap_adapter *na = NA(adapter->ifp); 573 struct netmap_slot *slot; 574 #endif /* DEV_NETMAP */ 575 576 /* Clear the old ring contents */ 577 IXGBE_TX_LOCK(txr); 578 #ifdef DEV_NETMAP 579 /* 580 * (under lock): if in netmap mode, do some consistency 581 * checks and set slot to entry 0 of the netmap ring. 582 */ 583 slot = netmap_reset(na, NR_TX, txr->me, 0); 584 #endif /* DEV_NETMAP */ 585 bzero((void *)txr->tx_base, 586 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 587 /* Reset indices */ 588 txr->next_avail_desc = 0; 589 txr->next_to_clean = 0; 590 591 /* Free any existing tx buffers. */ 592 txbuf = txr->tx_buffers; 593 for (i = 0; i < txr->num_desc; i++, txbuf++) { 594 if (txbuf->m_head != NULL) { 595 bus_dmamap_sync(txr->txtag, txbuf->map, 596 BUS_DMASYNC_POSTWRITE); 597 bus_dmamap_unload(txr->txtag, txbuf->map); 598 m_freem(txbuf->m_head); 599 txbuf->m_head = NULL; 600 } 601 #ifdef DEV_NETMAP 602 /* 603 * In netmap mode, set the map for the packet buffer. 604 * NOTE: Some drivers (not this one) also need to set 605 * the physical buffer address in the NIC ring. 606 * Slots in the netmap ring (indexed by "si") are 607 * kring->nkr_hwofs positions "ahead" wrt the 608 * corresponding slot in the NIC ring. In some drivers 609 * (not here) nkr_hwofs can be negative. Function 610 * netmap_idx_n2k() handles wraparounds properly. 611 */ 612 if (slot) { 613 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 614 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); 615 } 616 #endif /* DEV_NETMAP */ 617 /* Clear the EOP descriptor pointer */ 618 txbuf->eop = NULL; 619 } 620 621 #ifdef IXGBE_FDIR 622 /* Set the rate at which we sample packets */ 623 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 624 txr->atr_sample = atr_sample_rate; 625 #endif 626 627 /* Set number of descriptors available */ 628 txr->tx_avail = adapter->num_tx_desc; 629 630 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 631 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 632 IXGBE_TX_UNLOCK(txr); 633 } 634 635 /********************************************************************* 636 * 637 * Initialize all transmit rings. 638 * 639 **********************************************************************/ 640 int 641 ixgbe_setup_transmit_structures(struct adapter *adapter) 642 { 643 struct tx_ring *txr = adapter->tx_rings; 644 645 for (int i = 0; i < adapter->num_queues; i++, txr++) 646 ixgbe_setup_transmit_ring(txr); 647 648 return (0); 649 } 650 651 /********************************************************************* 652 * 653 * Free all transmit rings. 654 * 655 **********************************************************************/ 656 void 657 ixgbe_free_transmit_structures(struct adapter *adapter) 658 { 659 struct tx_ring *txr = adapter->tx_rings; 660 661 for (int i = 0; i < adapter->num_queues; i++, txr++) { 662 IXGBE_TX_LOCK(txr); 663 ixgbe_free_transmit_buffers(txr); 664 ixgbe_dma_free(adapter, &txr->txdma); 665 IXGBE_TX_UNLOCK(txr); 666 IXGBE_TX_LOCK_DESTROY(txr); 667 } 668 free(adapter->tx_rings, M_DEVBUF); 669 } 670 671 /********************************************************************* 672 * 673 * Free transmit ring related data structures. 674 * 675 **********************************************************************/ 676 static void 677 ixgbe_free_transmit_buffers(struct tx_ring *txr) 678 { 679 struct adapter *adapter = txr->adapter; 680 struct ixgbe_tx_buf *tx_buffer; 681 int i; 682 683 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 684 685 if (txr->tx_buffers == NULL) 686 return; 687 688 tx_buffer = txr->tx_buffers; 689 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 690 if (tx_buffer->m_head != NULL) { 691 bus_dmamap_sync(txr->txtag, tx_buffer->map, 692 BUS_DMASYNC_POSTWRITE); 693 bus_dmamap_unload(txr->txtag, 694 tx_buffer->map); 695 m_freem(tx_buffer->m_head); 696 tx_buffer->m_head = NULL; 697 if (tx_buffer->map != NULL) { 698 bus_dmamap_destroy(txr->txtag, 699 tx_buffer->map); 700 tx_buffer->map = NULL; 701 } 702 } else if (tx_buffer->map != NULL) { 703 bus_dmamap_unload(txr->txtag, 704 tx_buffer->map); 705 bus_dmamap_destroy(txr->txtag, 706 tx_buffer->map); 707 tx_buffer->map = NULL; 708 } 709 } 710 #ifdef IXGBE_LEGACY_TX 711 if (txr->br != NULL) 712 buf_ring_free(txr->br, M_DEVBUF); 713 #endif 714 if (txr->tx_buffers != NULL) { 715 free(txr->tx_buffers, M_DEVBUF); 716 txr->tx_buffers = NULL; 717 } 718 if (txr->txtag != NULL) { 719 bus_dma_tag_destroy(txr->txtag); 720 txr->txtag = NULL; 721 } 722 return; 723 } 724 725 /********************************************************************* 726 * 727 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 728 * 729 **********************************************************************/ 730 731 static int 732 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 733 u32 *cmd_type_len, u32 *olinfo_status) 734 { 735 struct ixgbe_adv_tx_context_desc *TXD; 736 struct ether_vlan_header *eh; 737 struct ip *ip; 738 struct ip6_hdr *ip6; 739 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 740 int ehdrlen, ip_hlen = 0; 741 u16 etype; 742 u8 ipproto = 0; 743 int offload = TRUE; 744 int ctxd = txr->next_avail_desc; 745 u16 vtag = 0; 746 747 /* First check if TSO is to be used */ 748 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 749 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 750 751 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 752 offload = FALSE; 753 754 /* Indicate the whole packet as payload when not doing TSO */ 755 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 756 757 /* Now ready a context descriptor */ 758 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 759 760 /* 761 ** In advanced descriptors the vlan tag must 762 ** be placed into the context descriptor. Hence 763 ** we need to make one even if not doing offloads. 764 */ 765 if (mp->m_flags & M_VLANTAG) { 766 vtag = htole16(mp->m_pkthdr.ether_vtag); 767 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 768 } 769 770 /* 771 * Determine where frame payload starts. 772 * Jump over vlan headers if already present, 773 * helpful for QinQ too. 774 */ 775 eh = mtod(mp, struct ether_vlan_header *); 776 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 777 etype = ntohs(eh->evl_proto); 778 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 779 } else { 780 etype = ntohs(eh->evl_encap_proto); 781 ehdrlen = ETHER_HDR_LEN; 782 } 783 784 /* Set the ether header length */ 785 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 786 787 if (offload == FALSE) 788 goto no_offloads; 789 790 switch (etype) { 791 case ETHERTYPE_IP: 792 ip = (struct ip *)(mp->m_data + ehdrlen); 793 ip_hlen = ip->ip_hl << 2; 794 ipproto = ip->ip_p; 795 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 796 break; 797 case ETHERTYPE_IPV6: 798 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 799 ip_hlen = sizeof(struct ip6_hdr); 800 /* XXX-BZ this will go badly in case of ext hdrs. */ 801 ipproto = ip6->ip6_nxt; 802 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 803 break; 804 default: 805 offload = FALSE; 806 break; 807 } 808 809 vlan_macip_lens |= ip_hlen; 810 811 switch (ipproto) { 812 case IPPROTO_TCP: 813 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 814 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 815 break; 816 817 case IPPROTO_UDP: 818 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 820 break; 821 822 #if __FreeBSD_version >= 800000 823 case IPPROTO_SCTP: 824 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 825 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 826 break; 827 #endif 828 default: 829 offload = FALSE; 830 break; 831 } 832 833 if (offload) /* For the TX descriptor setup */ 834 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 835 836 no_offloads: 837 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 838 839 /* Now copy bits into descriptor */ 840 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 841 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 842 TXD->seqnum_seed = htole32(0); 843 TXD->mss_l4len_idx = htole32(0); 844 845 /* We've consumed the first desc, adjust counters */ 846 if (++ctxd == txr->num_desc) 847 ctxd = 0; 848 txr->next_avail_desc = ctxd; 849 --txr->tx_avail; 850 851 return (0); 852 } 853 854 /********************************************************************** 855 * 856 * Setup work for hardware segmentation offload (TSO) on 857 * adapters using advanced tx descriptors 858 * 859 **********************************************************************/ 860 static int 861 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 862 u32 *cmd_type_len, u32 *olinfo_status) 863 { 864 struct ixgbe_adv_tx_context_desc *TXD; 865 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 866 u32 mss_l4len_idx = 0, paylen; 867 u16 vtag = 0, eh_type; 868 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 869 struct ether_vlan_header *eh; 870 #ifdef INET6 871 struct ip6_hdr *ip6; 872 #endif 873 #ifdef INET 874 struct ip *ip; 875 #endif 876 struct tcphdr *th; 877 878 879 /* 880 * Determine where frame payload starts. 881 * Jump over vlan headers if already present 882 */ 883 eh = mtod(mp, struct ether_vlan_header *); 884 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 885 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 886 eh_type = eh->evl_proto; 887 } else { 888 ehdrlen = ETHER_HDR_LEN; 889 eh_type = eh->evl_encap_proto; 890 } 891 892 switch (ntohs(eh_type)) { 893 #ifdef INET6 894 case ETHERTYPE_IPV6: 895 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 896 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 897 if (ip6->ip6_nxt != IPPROTO_TCP) 898 return (ENXIO); 899 ip_hlen = sizeof(struct ip6_hdr); 900 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 901 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 902 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 903 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 904 break; 905 #endif 906 #ifdef INET 907 case ETHERTYPE_IP: 908 ip = (struct ip *)(mp->m_data + ehdrlen); 909 if (ip->ip_p != IPPROTO_TCP) 910 return (ENXIO); 911 ip->ip_sum = 0; 912 ip_hlen = ip->ip_hl << 2; 913 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 914 th->th_sum = in_pseudo(ip->ip_src.s_addr, 915 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 916 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 917 /* Tell transmit desc to also do IPv4 checksum. */ 918 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 919 break; 920 #endif 921 default: 922 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 923 __func__, ntohs(eh_type)); 924 break; 925 } 926 927 ctxd = txr->next_avail_desc; 928 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 929 930 tcp_hlen = th->th_off << 2; 931 932 /* This is used in the transmit desc in encap */ 933 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 934 935 /* VLAN MACLEN IPLEN */ 936 if (mp->m_flags & M_VLANTAG) { 937 vtag = htole16(mp->m_pkthdr.ether_vtag); 938 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 939 } 940 941 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 942 vlan_macip_lens |= ip_hlen; 943 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 944 945 /* ADV DTYPE TUCMD */ 946 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 947 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 948 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 949 950 /* MSS L4LEN IDX */ 951 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 952 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 953 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 954 955 TXD->seqnum_seed = htole32(0); 956 957 if (++ctxd == txr->num_desc) 958 ctxd = 0; 959 960 txr->tx_avail--; 961 txr->next_avail_desc = ctxd; 962 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 963 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 964 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 965 ++txr->tso_tx; 966 return (0); 967 } 968 969 970 /********************************************************************** 971 * 972 * Examine each tx_buffer in the used queue. If the hardware is done 973 * processing the packet then free associated resources. The 974 * tx_buffer is put back on the free queue. 975 * 976 **********************************************************************/ 977 void 978 ixgbe_txeof(struct tx_ring *txr) 979 { 980 #ifdef DEV_NETMAP 981 struct adapter *adapter = txr->adapter; 982 struct ifnet *ifp = adapter->ifp; 983 #endif 984 u32 work, processed = 0; 985 u16 limit = txr->process_limit; 986 struct ixgbe_tx_buf *buf; 987 union ixgbe_adv_tx_desc *txd; 988 989 mtx_assert(&txr->tx_mtx, MA_OWNED); 990 991 #ifdef DEV_NETMAP 992 if (ifp->if_capenable & IFCAP_NETMAP) { 993 struct netmap_adapter *na = NA(ifp); 994 struct netmap_kring *kring = &na->tx_rings[txr->me]; 995 txd = txr->tx_base; 996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 997 BUS_DMASYNC_POSTREAD); 998 /* 999 * In netmap mode, all the work is done in the context 1000 * of the client thread. Interrupt handlers only wake up 1001 * clients, which may be sleeping on individual rings 1002 * or on a global resource for all rings. 1003 * To implement tx interrupt mitigation, we wake up the client 1004 * thread roughly every half ring, even if the NIC interrupts 1005 * more frequently. This is implemented as follows: 1006 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1007 * the slot that should wake up the thread (nkr_num_slots 1008 * means the user thread should not be woken up); 1009 * - the driver ignores tx interrupts unless netmap_mitigate=0 1010 * or the slot has the DD bit set. 1011 */ 1012 if (!netmap_mitigate || 1013 (kring->nr_kflags < kring->nkr_num_slots && 1014 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1015 netmap_tx_irq(ifp, txr->me); 1016 } 1017 return; 1018 } 1019 #endif /* DEV_NETMAP */ 1020 1021 if (txr->tx_avail == txr->num_desc) { 1022 txr->busy = 0; 1023 return; 1024 } 1025 1026 /* Get work starting point */ 1027 work = txr->next_to_clean; 1028 buf = &txr->tx_buffers[work]; 1029 txd = &txr->tx_base[work]; 1030 work -= txr->num_desc; /* The distance to ring end */ 1031 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1032 BUS_DMASYNC_POSTREAD); 1033 1034 do { 1035 union ixgbe_adv_tx_desc *eop= buf->eop; 1036 if (eop == NULL) /* No work */ 1037 break; 1038 1039 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1040 break; /* I/O not complete */ 1041 1042 if (buf->m_head) { 1043 txr->bytes += 1044 buf->m_head->m_pkthdr.len; 1045 bus_dmamap_sync(txr->txtag, 1046 buf->map, 1047 BUS_DMASYNC_POSTWRITE); 1048 bus_dmamap_unload(txr->txtag, 1049 buf->map); 1050 m_freem(buf->m_head); 1051 buf->m_head = NULL; 1052 } 1053 buf->eop = NULL; 1054 ++txr->tx_avail; 1055 1056 /* We clean the range if multi segment */ 1057 while (txd != eop) { 1058 ++txd; 1059 ++buf; 1060 ++work; 1061 /* wrap the ring? */ 1062 if (__predict_false(!work)) { 1063 work -= txr->num_desc; 1064 buf = txr->tx_buffers; 1065 txd = txr->tx_base; 1066 } 1067 if (buf->m_head) { 1068 txr->bytes += 1069 buf->m_head->m_pkthdr.len; 1070 bus_dmamap_sync(txr->txtag, 1071 buf->map, 1072 BUS_DMASYNC_POSTWRITE); 1073 bus_dmamap_unload(txr->txtag, 1074 buf->map); 1075 m_freem(buf->m_head); 1076 buf->m_head = NULL; 1077 } 1078 ++txr->tx_avail; 1079 buf->eop = NULL; 1080 1081 } 1082 ++txr->packets; 1083 ++processed; 1084 1085 /* Try the next packet */ 1086 ++txd; 1087 ++buf; 1088 ++work; 1089 /* reset with a wrap */ 1090 if (__predict_false(!work)) { 1091 work -= txr->num_desc; 1092 buf = txr->tx_buffers; 1093 txd = txr->tx_base; 1094 } 1095 prefetch(txd); 1096 } while (__predict_true(--limit)); 1097 1098 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1099 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1100 1101 work += txr->num_desc; 1102 txr->next_to_clean = work; 1103 1104 /* 1105 ** Queue Hang detection, we know there's 1106 ** work outstanding or the first return 1107 ** would have been taken, so increment busy 1108 ** if nothing managed to get cleaned, then 1109 ** in local_timer it will be checked and 1110 ** marked as HUNG if it exceeds a MAX attempt. 1111 */ 1112 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1113 ++txr->busy; 1114 /* 1115 ** If anything gets cleaned we reset state to 1, 1116 ** note this will turn off HUNG if its set. 1117 */ 1118 if (processed) 1119 txr->busy = 1; 1120 1121 if (txr->tx_avail == txr->num_desc) 1122 txr->busy = 0; 1123 1124 return; 1125 } 1126 1127 1128 #ifdef IXGBE_FDIR 1129 /* 1130 ** This routine parses packet headers so that Flow 1131 ** Director can make a hashed filter table entry 1132 ** allowing traffic flows to be identified and kept 1133 ** on the same cpu. This would be a performance 1134 ** hit, but we only do it at IXGBE_FDIR_RATE of 1135 ** packets. 1136 */ 1137 static void 1138 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1139 { 1140 struct adapter *adapter = txr->adapter; 1141 struct ix_queue *que; 1142 struct ip *ip; 1143 struct tcphdr *th; 1144 struct udphdr *uh; 1145 struct ether_vlan_header *eh; 1146 union ixgbe_atr_hash_dword input = {.dword = 0}; 1147 union ixgbe_atr_hash_dword common = {.dword = 0}; 1148 int ehdrlen, ip_hlen; 1149 u16 etype; 1150 1151 eh = mtod(mp, struct ether_vlan_header *); 1152 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1153 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1154 etype = eh->evl_proto; 1155 } else { 1156 ehdrlen = ETHER_HDR_LEN; 1157 etype = eh->evl_encap_proto; 1158 } 1159 1160 /* Only handling IPv4 */ 1161 if (etype != htons(ETHERTYPE_IP)) 1162 return; 1163 1164 ip = (struct ip *)(mp->m_data + ehdrlen); 1165 ip_hlen = ip->ip_hl << 2; 1166 1167 /* check if we're UDP or TCP */ 1168 switch (ip->ip_p) { 1169 case IPPROTO_TCP: 1170 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1171 /* src and dst are inverted */ 1172 common.port.dst ^= th->th_sport; 1173 common.port.src ^= th->th_dport; 1174 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1175 break; 1176 case IPPROTO_UDP: 1177 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1178 /* src and dst are inverted */ 1179 common.port.dst ^= uh->uh_sport; 1180 common.port.src ^= uh->uh_dport; 1181 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1182 break; 1183 default: 1184 return; 1185 } 1186 1187 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1188 if (mp->m_pkthdr.ether_vtag) 1189 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1190 else 1191 common.flex_bytes ^= etype; 1192 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1193 1194 que = &adapter->queues[txr->me]; 1195 /* 1196 ** This assumes the Rx queue and Tx 1197 ** queue are bound to the same CPU 1198 */ 1199 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1200 input, common, que->msix); 1201 } 1202 #endif /* IXGBE_FDIR */ 1203 1204 /* 1205 ** Used to detect a descriptor that has 1206 ** been merged by Hardware RSC. 1207 */ 1208 static inline u32 1209 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1210 { 1211 return (le32toh(rx->wb.lower.lo_dword.data) & 1212 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1213 } 1214 1215 /********************************************************************* 1216 * 1217 * Initialize Hardware RSC (LRO) feature on 82599 1218 * for an RX ring, this is toggled by the LRO capability 1219 * even though it is transparent to the stack. 1220 * 1221 * NOTE: since this HW feature only works with IPV4 and 1222 * our testing has shown soft LRO to be as effective 1223 * I have decided to disable this by default. 1224 * 1225 **********************************************************************/ 1226 static void 1227 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1228 { 1229 struct adapter *adapter = rxr->adapter; 1230 struct ixgbe_hw *hw = &adapter->hw; 1231 u32 rscctrl, rdrxctl; 1232 1233 /* If turning LRO/RSC off we need to disable it */ 1234 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1235 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1236 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1237 return; 1238 } 1239 1240 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1241 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1242 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1243 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1244 #endif /* DEV_NETMAP */ 1245 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1246 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1247 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1248 1249 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1250 rscctrl |= IXGBE_RSCCTL_RSCEN; 1251 /* 1252 ** Limit the total number of descriptors that 1253 ** can be combined, so it does not exceed 64K 1254 */ 1255 if (rxr->mbuf_sz == MCLBYTES) 1256 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1257 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1258 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1259 else if (rxr->mbuf_sz == MJUM9BYTES) 1260 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1261 else /* Using 16K cluster */ 1262 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1263 1264 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1265 1266 /* Enable TCP header recognition */ 1267 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1268 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1269 IXGBE_PSRTYPE_TCPHDR)); 1270 1271 /* Disable RSC for ACK packets */ 1272 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1273 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1274 1275 rxr->hw_rsc = TRUE; 1276 } 1277 /********************************************************************* 1278 * 1279 * Refresh mbuf buffers for RX descriptor rings 1280 * - now keeps its own state so discards due to resource 1281 * exhaustion are unnecessary, if an mbuf cannot be obtained 1282 * it just returns, keeping its placeholder, thus it can simply 1283 * be recalled to try again. 1284 * 1285 **********************************************************************/ 1286 static void 1287 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1288 { 1289 struct adapter *adapter = rxr->adapter; 1290 bus_dma_segment_t seg[1]; 1291 struct ixgbe_rx_buf *rxbuf; 1292 struct mbuf *mp; 1293 int i, j, nsegs, error; 1294 bool refreshed = FALSE; 1295 1296 i = j = rxr->next_to_refresh; 1297 /* Control the loop with one beyond */ 1298 if (++j == rxr->num_desc) 1299 j = 0; 1300 1301 while (j != limit) { 1302 rxbuf = &rxr->rx_buffers[i]; 1303 if (rxbuf->buf == NULL) { 1304 mp = m_getjcl(M_NOWAIT, MT_DATA, 1305 M_PKTHDR, rxr->mbuf_sz); 1306 if (mp == NULL) 1307 goto update; 1308 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1309 m_adj(mp, ETHER_ALIGN); 1310 } else 1311 mp = rxbuf->buf; 1312 1313 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1314 1315 /* If we're dealing with an mbuf that was copied rather 1316 * than replaced, there's no need to go through busdma. 1317 */ 1318 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1319 /* Get the memory mapping */ 1320 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1321 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1322 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1323 if (error != 0) { 1324 printf("Refresh mbufs: payload dmamap load" 1325 " failure - %d\n", error); 1326 m_free(mp); 1327 rxbuf->buf = NULL; 1328 goto update; 1329 } 1330 rxbuf->buf = mp; 1331 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1332 BUS_DMASYNC_PREREAD); 1333 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1334 htole64(seg[0].ds_addr); 1335 } else { 1336 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1337 rxbuf->flags &= ~IXGBE_RX_COPY; 1338 } 1339 1340 refreshed = TRUE; 1341 /* Next is precalculated */ 1342 i = j; 1343 rxr->next_to_refresh = i; 1344 if (++j == rxr->num_desc) 1345 j = 0; 1346 } 1347 update: 1348 if (refreshed) /* Update hardware tail index */ 1349 IXGBE_WRITE_REG(&adapter->hw, 1350 rxr->tail, rxr->next_to_refresh); 1351 return; 1352 } 1353 1354 /********************************************************************* 1355 * 1356 * Allocate memory for rx_buffer structures. Since we use one 1357 * rx_buffer per received packet, the maximum number of rx_buffer's 1358 * that we'll need is equal to the number of receive descriptors 1359 * that we've allocated. 1360 * 1361 **********************************************************************/ 1362 int 1363 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1364 { 1365 struct adapter *adapter = rxr->adapter; 1366 device_t dev = adapter->dev; 1367 struct ixgbe_rx_buf *rxbuf; 1368 int i, bsize, error; 1369 1370 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1371 if (!(rxr->rx_buffers = 1372 (struct ixgbe_rx_buf *) malloc(bsize, 1373 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1374 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1375 error = ENOMEM; 1376 goto fail; 1377 } 1378 1379 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1380 1, 0, /* alignment, bounds */ 1381 BUS_SPACE_MAXADDR, /* lowaddr */ 1382 BUS_SPACE_MAXADDR, /* highaddr */ 1383 NULL, NULL, /* filter, filterarg */ 1384 MJUM16BYTES, /* maxsize */ 1385 1, /* nsegments */ 1386 MJUM16BYTES, /* maxsegsize */ 1387 0, /* flags */ 1388 NULL, /* lockfunc */ 1389 NULL, /* lockfuncarg */ 1390 &rxr->ptag))) { 1391 device_printf(dev, "Unable to create RX DMA tag\n"); 1392 goto fail; 1393 } 1394 1395 for (i = 0; i < rxr->num_desc; i++, rxbuf++) { 1396 rxbuf = &rxr->rx_buffers[i]; 1397 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1398 if (error) { 1399 device_printf(dev, "Unable to create RX dma map\n"); 1400 goto fail; 1401 } 1402 } 1403 1404 return (0); 1405 1406 fail: 1407 /* Frees all, but can handle partial completion */ 1408 ixgbe_free_receive_structures(adapter); 1409 return (error); 1410 } 1411 1412 1413 static void 1414 ixgbe_free_receive_ring(struct rx_ring *rxr) 1415 { 1416 struct ixgbe_rx_buf *rxbuf; 1417 int i; 1418 1419 for (i = 0; i < rxr->num_desc; i++) { 1420 rxbuf = &rxr->rx_buffers[i]; 1421 if (rxbuf->buf != NULL) { 1422 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1423 BUS_DMASYNC_POSTREAD); 1424 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1425 rxbuf->buf->m_flags |= M_PKTHDR; 1426 m_freem(rxbuf->buf); 1427 rxbuf->buf = NULL; 1428 rxbuf->flags = 0; 1429 } 1430 } 1431 } 1432 1433 1434 /********************************************************************* 1435 * 1436 * Initialize a receive ring and its buffers. 1437 * 1438 **********************************************************************/ 1439 static int 1440 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1441 { 1442 struct adapter *adapter; 1443 struct ifnet *ifp; 1444 device_t dev; 1445 struct ixgbe_rx_buf *rxbuf; 1446 bus_dma_segment_t seg[1]; 1447 struct lro_ctrl *lro = &rxr->lro; 1448 int rsize, nsegs, error = 0; 1449 #ifdef DEV_NETMAP 1450 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1451 struct netmap_slot *slot; 1452 #endif /* DEV_NETMAP */ 1453 1454 adapter = rxr->adapter; 1455 ifp = adapter->ifp; 1456 dev = adapter->dev; 1457 1458 /* Clear the ring contents */ 1459 IXGBE_RX_LOCK(rxr); 1460 #ifdef DEV_NETMAP 1461 /* same as in ixgbe_setup_transmit_ring() */ 1462 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1463 #endif /* DEV_NETMAP */ 1464 rsize = roundup2(adapter->num_rx_desc * 1465 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1466 bzero((void *)rxr->rx_base, rsize); 1467 /* Cache the size */ 1468 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1469 1470 /* Free current RX buffer structs and their mbufs */ 1471 ixgbe_free_receive_ring(rxr); 1472 1473 /* Now replenish the mbufs */ 1474 for (int j = 0; j != rxr->num_desc; ++j) { 1475 struct mbuf *mp; 1476 1477 rxbuf = &rxr->rx_buffers[j]; 1478 #ifdef DEV_NETMAP 1479 /* 1480 * In netmap mode, fill the map and set the buffer 1481 * address in the NIC ring, considering the offset 1482 * between the netmap and NIC rings (see comment in 1483 * ixgbe_setup_transmit_ring() ). No need to allocate 1484 * an mbuf, so end the block with a continue; 1485 */ 1486 if (slot) { 1487 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1488 uint64_t paddr; 1489 void *addr; 1490 1491 addr = PNMB(na, slot + sj, &paddr); 1492 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1493 /* Update descriptor and the cached value */ 1494 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1495 rxbuf->addr = htole64(paddr); 1496 continue; 1497 } 1498 #endif /* DEV_NETMAP */ 1499 rxbuf->flags = 0; 1500 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1501 M_PKTHDR, adapter->rx_mbuf_sz); 1502 if (rxbuf->buf == NULL) { 1503 error = ENOBUFS; 1504 goto fail; 1505 } 1506 mp = rxbuf->buf; 1507 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1508 /* Get the memory mapping */ 1509 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1510 rxbuf->pmap, mp, seg, 1511 &nsegs, BUS_DMA_NOWAIT); 1512 if (error != 0) 1513 goto fail; 1514 bus_dmamap_sync(rxr->ptag, 1515 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1516 /* Update the descriptor and the cached value */ 1517 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1518 rxbuf->addr = htole64(seg[0].ds_addr); 1519 } 1520 1521 1522 /* Setup our descriptor indices */ 1523 rxr->next_to_check = 0; 1524 rxr->next_to_refresh = 0; 1525 rxr->lro_enabled = FALSE; 1526 rxr->rx_copies = 0; 1527 rxr->rx_bytes = 0; 1528 rxr->vtag_strip = FALSE; 1529 1530 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1531 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1532 1533 /* 1534 ** Now set up the LRO interface: 1535 */ 1536 if (ixgbe_rsc_enable) 1537 ixgbe_setup_hw_rsc(rxr); 1538 else if (ifp->if_capenable & IFCAP_LRO) { 1539 int err = tcp_lro_init(lro); 1540 if (err) { 1541 device_printf(dev, "LRO Initialization failed!\n"); 1542 goto fail; 1543 } 1544 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1545 rxr->lro_enabled = TRUE; 1546 lro->ifp = adapter->ifp; 1547 } 1548 1549 IXGBE_RX_UNLOCK(rxr); 1550 return (0); 1551 1552 fail: 1553 ixgbe_free_receive_ring(rxr); 1554 IXGBE_RX_UNLOCK(rxr); 1555 return (error); 1556 } 1557 1558 /********************************************************************* 1559 * 1560 * Initialize all receive rings. 1561 * 1562 **********************************************************************/ 1563 int 1564 ixgbe_setup_receive_structures(struct adapter *adapter) 1565 { 1566 struct rx_ring *rxr = adapter->rx_rings; 1567 int j; 1568 1569 for (j = 0; j < adapter->num_queues; j++, rxr++) 1570 if (ixgbe_setup_receive_ring(rxr)) 1571 goto fail; 1572 1573 return (0); 1574 fail: 1575 /* 1576 * Free RX buffers allocated so far, we will only handle 1577 * the rings that completed, the failing case will have 1578 * cleaned up for itself. 'j' failed, so its the terminus. 1579 */ 1580 for (int i = 0; i < j; ++i) { 1581 rxr = &adapter->rx_rings[i]; 1582 ixgbe_free_receive_ring(rxr); 1583 } 1584 1585 return (ENOBUFS); 1586 } 1587 1588 1589 /********************************************************************* 1590 * 1591 * Free all receive rings. 1592 * 1593 **********************************************************************/ 1594 void 1595 ixgbe_free_receive_structures(struct adapter *adapter) 1596 { 1597 struct rx_ring *rxr = adapter->rx_rings; 1598 1599 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1600 1601 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1602 struct lro_ctrl *lro = &rxr->lro; 1603 ixgbe_free_receive_buffers(rxr); 1604 /* Free LRO memory */ 1605 tcp_lro_free(lro); 1606 /* Free the ring memory as well */ 1607 ixgbe_dma_free(adapter, &rxr->rxdma); 1608 } 1609 1610 free(adapter->rx_rings, M_DEVBUF); 1611 } 1612 1613 1614 /********************************************************************* 1615 * 1616 * Free receive ring data structures 1617 * 1618 **********************************************************************/ 1619 void 1620 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1621 { 1622 struct adapter *adapter = rxr->adapter; 1623 struct ixgbe_rx_buf *rxbuf; 1624 1625 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1626 1627 /* Cleanup any existing buffers */ 1628 if (rxr->rx_buffers != NULL) { 1629 for (int i = 0; i < adapter->num_rx_desc; i++) { 1630 rxbuf = &rxr->rx_buffers[i]; 1631 if (rxbuf->buf != NULL) { 1632 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1633 BUS_DMASYNC_POSTREAD); 1634 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1635 rxbuf->buf->m_flags |= M_PKTHDR; 1636 m_freem(rxbuf->buf); 1637 } 1638 rxbuf->buf = NULL; 1639 if (rxbuf->pmap != NULL) { 1640 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1641 rxbuf->pmap = NULL; 1642 } 1643 } 1644 if (rxr->rx_buffers != NULL) { 1645 free(rxr->rx_buffers, M_DEVBUF); 1646 rxr->rx_buffers = NULL; 1647 } 1648 } 1649 1650 if (rxr->ptag != NULL) { 1651 bus_dma_tag_destroy(rxr->ptag); 1652 rxr->ptag = NULL; 1653 } 1654 1655 return; 1656 } 1657 1658 static __inline void 1659 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1660 { 1661 1662 /* 1663 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1664 * should be computed by hardware. Also it should not have VLAN tag in 1665 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1666 */ 1667 if (rxr->lro_enabled && 1668 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1669 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1670 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1671 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1672 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1673 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1674 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1675 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1676 /* 1677 * Send to the stack if: 1678 ** - LRO not enabled, or 1679 ** - no LRO resources, or 1680 ** - lro enqueue fails 1681 */ 1682 if (rxr->lro.lro_cnt != 0) 1683 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1684 return; 1685 } 1686 IXGBE_RX_UNLOCK(rxr); 1687 (*ifp->if_input)(ifp, m); 1688 IXGBE_RX_LOCK(rxr); 1689 } 1690 1691 static __inline void 1692 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1693 { 1694 struct ixgbe_rx_buf *rbuf; 1695 1696 rbuf = &rxr->rx_buffers[i]; 1697 1698 1699 /* 1700 ** With advanced descriptors the writeback 1701 ** clobbers the buffer addrs, so its easier 1702 ** to just free the existing mbufs and take 1703 ** the normal refresh path to get new buffers 1704 ** and mapping. 1705 */ 1706 1707 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1708 rbuf->fmp->m_flags |= M_PKTHDR; 1709 m_freem(rbuf->fmp); 1710 rbuf->fmp = NULL; 1711 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1712 } else if (rbuf->buf) { 1713 m_free(rbuf->buf); 1714 rbuf->buf = NULL; 1715 } 1716 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1717 1718 rbuf->flags = 0; 1719 1720 return; 1721 } 1722 1723 1724 /********************************************************************* 1725 * 1726 * This routine executes in interrupt context. It replenishes 1727 * the mbufs in the descriptor and sends data which has been 1728 * dma'ed into host memory to upper layer. 1729 * 1730 * We loop at most count times if count is > 0, or until done if 1731 * count < 0. 1732 * 1733 * Return TRUE for more work, FALSE for all clean. 1734 *********************************************************************/ 1735 bool 1736 ixgbe_rxeof(struct ix_queue *que) 1737 { 1738 struct adapter *adapter = que->adapter; 1739 struct rx_ring *rxr = que->rxr; 1740 struct ifnet *ifp = adapter->ifp; 1741 struct lro_ctrl *lro = &rxr->lro; 1742 struct lro_entry *queued; 1743 int i, nextp, processed = 0; 1744 u32 staterr = 0; 1745 u16 count = rxr->process_limit; 1746 union ixgbe_adv_rx_desc *cur; 1747 struct ixgbe_rx_buf *rbuf, *nbuf; 1748 u16 pkt_info; 1749 1750 IXGBE_RX_LOCK(rxr); 1751 1752 #ifdef DEV_NETMAP 1753 /* Same as the txeof routine: wakeup clients on intr. */ 1754 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1755 IXGBE_RX_UNLOCK(rxr); 1756 return (FALSE); 1757 } 1758 #endif /* DEV_NETMAP */ 1759 1760 for (i = rxr->next_to_check; count != 0;) { 1761 struct mbuf *sendmp, *mp; 1762 u32 rsc, ptype; 1763 u16 len; 1764 u16 vtag = 0; 1765 bool eop; 1766 1767 /* Sync the ring. */ 1768 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1769 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1770 1771 cur = &rxr->rx_base[i]; 1772 staterr = le32toh(cur->wb.upper.status_error); 1773 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1774 1775 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1776 break; 1777 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1778 break; 1779 1780 count--; 1781 sendmp = NULL; 1782 nbuf = NULL; 1783 rsc = 0; 1784 cur->wb.upper.status_error = 0; 1785 rbuf = &rxr->rx_buffers[i]; 1786 mp = rbuf->buf; 1787 1788 len = le16toh(cur->wb.upper.length); 1789 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1790 IXGBE_RXDADV_PKTTYPE_MASK; 1791 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1792 1793 /* Make sure bad packets are discarded */ 1794 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1795 #if 0 // VF-only 1796 #if __FreeBSD_version >= 1100036 1797 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1798 #endif 1799 #endif 1800 rxr->rx_discarded++; 1801 ixgbe_rx_discard(rxr, i); 1802 goto next_desc; 1803 } 1804 1805 /* 1806 ** On 82599 which supports a hardware 1807 ** LRO (called HW RSC), packets need 1808 ** not be fragmented across sequential 1809 ** descriptors, rather the next descriptor 1810 ** is indicated in bits of the descriptor. 1811 ** This also means that we might proceses 1812 ** more than one packet at a time, something 1813 ** that has never been true before, it 1814 ** required eliminating global chain pointers 1815 ** in favor of what we are doing here. -jfv 1816 */ 1817 if (!eop) { 1818 /* 1819 ** Figure out the next descriptor 1820 ** of this frame. 1821 */ 1822 if (rxr->hw_rsc == TRUE) { 1823 rsc = ixgbe_rsc_count(cur); 1824 rxr->rsc_num += (rsc - 1); 1825 } 1826 if (rsc) { /* Get hardware index */ 1827 nextp = ((staterr & 1828 IXGBE_RXDADV_NEXTP_MASK) >> 1829 IXGBE_RXDADV_NEXTP_SHIFT); 1830 } else { /* Just sequential */ 1831 nextp = i + 1; 1832 if (nextp == adapter->num_rx_desc) 1833 nextp = 0; 1834 } 1835 nbuf = &rxr->rx_buffers[nextp]; 1836 prefetch(nbuf); 1837 } 1838 /* 1839 ** Rather than using the fmp/lmp global pointers 1840 ** we now keep the head of a packet chain in the 1841 ** buffer struct and pass this along from one 1842 ** descriptor to the next, until we get EOP. 1843 */ 1844 mp->m_len = len; 1845 /* 1846 ** See if there is a stored head 1847 ** that determines what we are 1848 */ 1849 sendmp = rbuf->fmp; 1850 if (sendmp != NULL) { /* secondary frag */ 1851 rbuf->buf = rbuf->fmp = NULL; 1852 mp->m_flags &= ~M_PKTHDR; 1853 sendmp->m_pkthdr.len += mp->m_len; 1854 } else { 1855 /* 1856 * Optimize. This might be a small packet, 1857 * maybe just a TCP ACK. Do a fast copy that 1858 * is cache aligned into a new mbuf, and 1859 * leave the old mbuf+cluster for re-use. 1860 */ 1861 if (eop && len <= IXGBE_RX_COPY_LEN) { 1862 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1863 if (sendmp != NULL) { 1864 sendmp->m_data += 1865 IXGBE_RX_COPY_ALIGN; 1866 ixgbe_bcopy(mp->m_data, 1867 sendmp->m_data, len); 1868 sendmp->m_len = len; 1869 rxr->rx_copies++; 1870 rbuf->flags |= IXGBE_RX_COPY; 1871 } 1872 } 1873 if (sendmp == NULL) { 1874 rbuf->buf = rbuf->fmp = NULL; 1875 sendmp = mp; 1876 } 1877 1878 /* first desc of a non-ps chain */ 1879 sendmp->m_flags |= M_PKTHDR; 1880 sendmp->m_pkthdr.len = mp->m_len; 1881 } 1882 ++processed; 1883 1884 /* Pass the head pointer on */ 1885 if (eop == 0) { 1886 nbuf->fmp = sendmp; 1887 sendmp = NULL; 1888 mp->m_next = nbuf->buf; 1889 } else { /* Sending this frame */ 1890 sendmp->m_pkthdr.rcvif = ifp; 1891 rxr->rx_packets++; 1892 /* capture data for AIM */ 1893 rxr->bytes += sendmp->m_pkthdr.len; 1894 rxr->rx_bytes += sendmp->m_pkthdr.len; 1895 /* Process vlan info */ 1896 if ((rxr->vtag_strip) && 1897 (staterr & IXGBE_RXD_STAT_VP)) 1898 vtag = le16toh(cur->wb.upper.vlan); 1899 if (vtag) { 1900 sendmp->m_pkthdr.ether_vtag = vtag; 1901 sendmp->m_flags |= M_VLANTAG; 1902 } 1903 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1904 ixgbe_rx_checksum(staterr, sendmp, ptype); 1905 #if __FreeBSD_version >= 800000 1906 #ifdef RSS 1907 sendmp->m_pkthdr.flowid = 1908 le32toh(cur->wb.lower.hi_dword.rss); 1909 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1910 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1911 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4); 1912 break; 1913 case IXGBE_RXDADV_RSSTYPE_IPV4: 1914 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4); 1915 break; 1916 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1917 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6); 1918 break; 1919 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1920 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX); 1921 break; 1922 case IXGBE_RXDADV_RSSTYPE_IPV6: 1923 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6); 1924 break; 1925 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1926 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX); 1927 break; 1928 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1929 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4); 1930 break; 1931 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1932 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6); 1933 break; 1934 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1935 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX); 1936 break; 1937 default: 1938 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1939 } 1940 #else /* RSS */ 1941 sendmp->m_pkthdr.flowid = que->msix; 1942 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1943 #endif /* RSS */ 1944 #endif /* FreeBSD_version */ 1945 } 1946 next_desc: 1947 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1948 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1949 1950 /* Advance our pointers to the next descriptor. */ 1951 if (++i == rxr->num_desc) 1952 i = 0; 1953 1954 /* Now send to the stack or do LRO */ 1955 if (sendmp != NULL) { 1956 rxr->next_to_check = i; 1957 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1958 i = rxr->next_to_check; 1959 } 1960 1961 /* Every 8 descriptors we go to refresh mbufs */ 1962 if (processed == 8) { 1963 ixgbe_refresh_mbufs(rxr, i); 1964 processed = 0; 1965 } 1966 } 1967 1968 /* Refresh any remaining buf structs */ 1969 if (ixgbe_rx_unrefreshed(rxr)) 1970 ixgbe_refresh_mbufs(rxr, i); 1971 1972 rxr->next_to_check = i; 1973 1974 /* 1975 * Flush any outstanding LRO work 1976 */ 1977 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1978 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1979 tcp_lro_flush(lro, queued); 1980 } 1981 1982 IXGBE_RX_UNLOCK(rxr); 1983 1984 /* 1985 ** Still have cleaning to do? 1986 */ 1987 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 1988 return (TRUE); 1989 else 1990 return (FALSE); 1991 } 1992 1993 1994 /********************************************************************* 1995 * 1996 * Verify that the hardware indicated that the checksum is valid. 1997 * Inform the stack about the status of checksum so that stack 1998 * doesn't spend time verifying the checksum. 1999 * 2000 *********************************************************************/ 2001 static void 2002 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2003 { 2004 u16 status = (u16) staterr; 2005 u8 errors = (u8) (staterr >> 24); 2006 bool sctp = FALSE; 2007 2008 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2009 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2010 sctp = TRUE; 2011 2012 if (status & IXGBE_RXD_STAT_IPCS) { 2013 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2014 /* IP Checksum Good */ 2015 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 2016 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 2017 2018 } else 2019 mp->m_pkthdr.csum_flags = 0; 2020 } 2021 if (status & IXGBE_RXD_STAT_L4CS) { 2022 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2023 #if __FreeBSD_version >= 800000 2024 if (sctp) 2025 type = CSUM_SCTP_VALID; 2026 #endif 2027 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2028 mp->m_pkthdr.csum_flags |= type; 2029 if (!sctp) 2030 mp->m_pkthdr.csum_data = htons(0xffff); 2031 } 2032 } 2033 return; 2034 } 2035 2036 /******************************************************************** 2037 * Manage DMA'able memory. 2038 *******************************************************************/ 2039 static void 2040 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2041 { 2042 if (error) 2043 return; 2044 *(bus_addr_t *) arg = segs->ds_addr; 2045 return; 2046 } 2047 2048 int 2049 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2050 struct ixgbe_dma_alloc *dma, int mapflags) 2051 { 2052 device_t dev = adapter->dev; 2053 int r; 2054 2055 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2056 DBA_ALIGN, 0, /* alignment, bounds */ 2057 BUS_SPACE_MAXADDR, /* lowaddr */ 2058 BUS_SPACE_MAXADDR, /* highaddr */ 2059 NULL, NULL, /* filter, filterarg */ 2060 size, /* maxsize */ 2061 1, /* nsegments */ 2062 size, /* maxsegsize */ 2063 BUS_DMA_ALLOCNOW, /* flags */ 2064 NULL, /* lockfunc */ 2065 NULL, /* lockfuncarg */ 2066 &dma->dma_tag); 2067 if (r != 0) { 2068 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2069 "error %u\n", r); 2070 goto fail_0; 2071 } 2072 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2073 BUS_DMA_NOWAIT, &dma->dma_map); 2074 if (r != 0) { 2075 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2076 "error %u\n", r); 2077 goto fail_1; 2078 } 2079 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2080 size, 2081 ixgbe_dmamap_cb, 2082 &dma->dma_paddr, 2083 mapflags | BUS_DMA_NOWAIT); 2084 if (r != 0) { 2085 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2086 "error %u\n", r); 2087 goto fail_2; 2088 } 2089 dma->dma_size = size; 2090 return (0); 2091 fail_2: 2092 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2093 fail_1: 2094 bus_dma_tag_destroy(dma->dma_tag); 2095 fail_0: 2096 dma->dma_tag = NULL; 2097 return (r); 2098 } 2099 2100 void 2101 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2102 { 2103 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2104 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2105 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2106 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2107 bus_dma_tag_destroy(dma->dma_tag); 2108 } 2109 2110 2111 /********************************************************************* 2112 * 2113 * Allocate memory for the transmit and receive rings, and then 2114 * the descriptors associated with each, called only once at attach. 2115 * 2116 **********************************************************************/ 2117 int 2118 ixgbe_allocate_queues(struct adapter *adapter) 2119 { 2120 device_t dev = adapter->dev; 2121 struct ix_queue *que; 2122 struct tx_ring *txr; 2123 struct rx_ring *rxr; 2124 int rsize, tsize, error = IXGBE_SUCCESS; 2125 int txconf = 0, rxconf = 0; 2126 2127 /* First allocate the top level queue structs */ 2128 if (!(adapter->queues = 2129 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2130 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2131 device_printf(dev, "Unable to allocate queue memory\n"); 2132 error = ENOMEM; 2133 goto fail; 2134 } 2135 2136 /* First allocate the TX ring struct memory */ 2137 if (!(adapter->tx_rings = 2138 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2139 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2140 device_printf(dev, "Unable to allocate TX ring memory\n"); 2141 error = ENOMEM; 2142 goto tx_fail; 2143 } 2144 2145 /* Next allocate the RX */ 2146 if (!(adapter->rx_rings = 2147 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2148 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2149 device_printf(dev, "Unable to allocate RX ring memory\n"); 2150 error = ENOMEM; 2151 goto rx_fail; 2152 } 2153 2154 /* For the ring itself */ 2155 tsize = roundup2(adapter->num_tx_desc * 2156 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2157 2158 /* 2159 * Now set up the TX queues, txconf is needed to handle the 2160 * possibility that things fail midcourse and we need to 2161 * undo memory gracefully 2162 */ 2163 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2164 /* Set up some basics */ 2165 txr = &adapter->tx_rings[i]; 2166 txr->adapter = adapter; 2167 txr->me = i; 2168 txr->num_desc = adapter->num_tx_desc; 2169 2170 /* Initialize the TX side lock */ 2171 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2172 device_get_nameunit(dev), txr->me); 2173 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2174 2175 if (ixgbe_dma_malloc(adapter, tsize, 2176 &txr->txdma, BUS_DMA_NOWAIT)) { 2177 device_printf(dev, 2178 "Unable to allocate TX Descriptor memory\n"); 2179 error = ENOMEM; 2180 goto err_tx_desc; 2181 } 2182 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2183 bzero((void *)txr->tx_base, tsize); 2184 2185 /* Now allocate transmit buffers for the ring */ 2186 if (ixgbe_allocate_transmit_buffers(txr)) { 2187 device_printf(dev, 2188 "Critical Failure setting up transmit buffers\n"); 2189 error = ENOMEM; 2190 goto err_tx_desc; 2191 } 2192 #ifndef IXGBE_LEGACY_TX 2193 /* Allocate a buf ring */ 2194 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2195 M_WAITOK, &txr->tx_mtx); 2196 if (txr->br == NULL) { 2197 device_printf(dev, 2198 "Critical Failure setting up buf ring\n"); 2199 error = ENOMEM; 2200 goto err_tx_desc; 2201 } 2202 #endif 2203 } 2204 2205 /* 2206 * Next the RX queues... 2207 */ 2208 rsize = roundup2(adapter->num_rx_desc * 2209 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2210 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2211 rxr = &adapter->rx_rings[i]; 2212 /* Set up some basics */ 2213 rxr->adapter = adapter; 2214 rxr->me = i; 2215 rxr->num_desc = adapter->num_rx_desc; 2216 2217 /* Initialize the RX side lock */ 2218 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2219 device_get_nameunit(dev), rxr->me); 2220 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2221 2222 if (ixgbe_dma_malloc(adapter, rsize, 2223 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2224 device_printf(dev, 2225 "Unable to allocate RxDescriptor memory\n"); 2226 error = ENOMEM; 2227 goto err_rx_desc; 2228 } 2229 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2230 bzero((void *)rxr->rx_base, rsize); 2231 2232 /* Allocate receive buffers for the ring*/ 2233 if (ixgbe_allocate_receive_buffers(rxr)) { 2234 device_printf(dev, 2235 "Critical Failure setting up receive buffers\n"); 2236 error = ENOMEM; 2237 goto err_rx_desc; 2238 } 2239 } 2240 2241 /* 2242 ** Finally set up the queue holding structs 2243 */ 2244 for (int i = 0; i < adapter->num_queues; i++) { 2245 que = &adapter->queues[i]; 2246 que->adapter = adapter; 2247 que->me = i; 2248 que->txr = &adapter->tx_rings[i]; 2249 que->rxr = &adapter->rx_rings[i]; 2250 } 2251 2252 return (0); 2253 2254 err_rx_desc: 2255 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2256 ixgbe_dma_free(adapter, &rxr->rxdma); 2257 err_tx_desc: 2258 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2259 ixgbe_dma_free(adapter, &txr->txdma); 2260 free(adapter->rx_rings, M_DEVBUF); 2261 rx_fail: 2262 free(adapter->tx_rings, M_DEVBUF); 2263 tx_fail: 2264 free(adapter->queues, M_DEVBUF); 2265 fail: 2266 return (error); 2267 } 2268