1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /* Shared PCI config read/write */ 85 inline u16 86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 87 { 88 u16 value; 89 90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 91 reg, 2); 92 93 return (value); 94 } 95 96 inline void 97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 98 { 99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 100 reg, value, 2); 101 102 return; 103 } 104 105 /********************************************************************* 106 * Local Function prototypes 107 *********************************************************************/ 108 static void ixgbe_setup_transmit_ring(struct tx_ring *); 109 static void ixgbe_free_transmit_buffers(struct tx_ring *); 110 static int ixgbe_setup_receive_ring(struct rx_ring *); 111 static void ixgbe_free_receive_buffers(struct rx_ring *); 112 113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 #ifdef IXGBE_LEGACY_TX 128 /********************************************************************* 129 * Transmit entry point 130 * 131 * ixgbe_start is called by the stack to initiate a transmit. 132 * The driver will remain in this routine as long as there are 133 * packets to transmit and transmit resources are available. 134 * In case resources are not available stack is notified and 135 * the packet is requeued. 136 **********************************************************************/ 137 138 void 139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 140 { 141 struct mbuf *m_head; 142 struct adapter *adapter = txr->adapter; 143 144 IXGBE_TX_LOCK_ASSERT(txr); 145 146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 147 return; 148 if (!adapter->link_active) 149 return; 150 151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 153 break; 154 155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 156 if (m_head == NULL) 157 break; 158 159 if (ixgbe_xmit(txr, &m_head)) { 160 if (m_head != NULL) 161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 162 break; 163 } 164 /* Send a copy of the frame to the BPF listener */ 165 ETHER_BPF_MTAP(ifp, m_head); 166 } 167 return; 168 } 169 170 /* 171 * Legacy TX start - called by the stack, this 172 * always uses the first tx ring, and should 173 * not be used with multiqueue tx enabled. 174 */ 175 void 176 ixgbe_start(struct ifnet *ifp) 177 { 178 struct adapter *adapter = ifp->if_softc; 179 struct tx_ring *txr = adapter->tx_rings; 180 181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 182 IXGBE_TX_LOCK(txr); 183 ixgbe_start_locked(txr, ifp); 184 IXGBE_TX_UNLOCK(txr); 185 } 186 return; 187 } 188 189 #else /* ! IXGBE_LEGACY_TX */ 190 191 /* 192 ** Multiqueue Transmit driver 193 ** 194 */ 195 int 196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 197 { 198 struct adapter *adapter = ifp->if_softc; 199 struct ix_queue *que; 200 struct tx_ring *txr; 201 int i, err = 0; 202 #ifdef RSS 203 uint32_t bucket_id; 204 #endif 205 206 /* 207 * When doing RSS, map it to the same outbound queue 208 * as the incoming flow would be mapped to. 209 * 210 * If everything is setup correctly, it should be the 211 * same bucket that the current CPU we're on is. 212 */ 213 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 214 #ifdef RSS 215 if (rss_hash2bucket(m->m_pkthdr.flowid, 216 M_HASHTYPE_GET(m), &bucket_id) == 0) 217 /* TODO: spit out something if bucket_id > num_queues? */ 218 i = bucket_id % adapter->num_queues; 219 else 220 #endif 221 i = m->m_pkthdr.flowid % adapter->num_queues; 222 } else 223 i = curcpu % adapter->num_queues; 224 225 /* Check for a hung queue and pick alternative */ 226 if (((1 << i) & adapter->active_queues) == 0) 227 i = ffsl(adapter->active_queues); 228 229 txr = &adapter->tx_rings[i]; 230 que = &adapter->queues[i]; 231 232 err = drbr_enqueue(ifp, txr->br, m); 233 if (err) 234 return (err); 235 if (IXGBE_TX_TRYLOCK(txr)) { 236 ixgbe_mq_start_locked(ifp, txr); 237 IXGBE_TX_UNLOCK(txr); 238 } else 239 taskqueue_enqueue(que->tq, &txr->txq_task); 240 241 return (0); 242 } 243 244 int 245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 246 { 247 struct adapter *adapter = txr->adapter; 248 struct mbuf *next; 249 int enqueued = 0, err = 0; 250 251 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 252 adapter->link_active == 0) 253 return (ENETDOWN); 254 255 /* Process the queue */ 256 #if __FreeBSD_version < 901504 257 next = drbr_dequeue(ifp, txr->br); 258 while (next != NULL) { 259 if ((err = ixgbe_xmit(txr, &next)) != 0) { 260 if (next != NULL) 261 err = drbr_enqueue(ifp, txr->br, next); 262 #else 263 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 264 if ((err = ixgbe_xmit(txr, &next)) != 0) { 265 if (next == NULL) { 266 drbr_advance(ifp, txr->br); 267 } else { 268 drbr_putback(ifp, txr->br, next); 269 } 270 #endif 271 break; 272 } 273 #if __FreeBSD_version >= 901504 274 drbr_advance(ifp, txr->br); 275 #endif 276 enqueued++; 277 #if 0 // this is VF-only 278 #if __FreeBSD_version >= 1100036 279 /* 280 * Since we're looking at the tx ring, we can check 281 * to see if we're a VF by examing our tail register 282 * address. 283 */ 284 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 285 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 286 #endif 287 #endif 288 /* Send a copy of the frame to the BPF listener */ 289 ETHER_BPF_MTAP(ifp, next); 290 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 291 break; 292 #if __FreeBSD_version < 901504 293 next = drbr_dequeue(ifp, txr->br); 294 #endif 295 } 296 297 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 298 ixgbe_txeof(txr); 299 300 return (err); 301 } 302 303 /* 304 * Called from a taskqueue to drain queued transmit packets. 305 */ 306 void 307 ixgbe_deferred_mq_start(void *arg, int pending) 308 { 309 struct tx_ring *txr = arg; 310 struct adapter *adapter = txr->adapter; 311 struct ifnet *ifp = adapter->ifp; 312 313 IXGBE_TX_LOCK(txr); 314 if (!drbr_empty(ifp, txr->br)) 315 ixgbe_mq_start_locked(ifp, txr); 316 IXGBE_TX_UNLOCK(txr); 317 } 318 319 /* 320 * Flush all ring buffers 321 */ 322 void 323 ixgbe_qflush(struct ifnet *ifp) 324 { 325 struct adapter *adapter = ifp->if_softc; 326 struct tx_ring *txr = adapter->tx_rings; 327 struct mbuf *m; 328 329 for (int i = 0; i < adapter->num_queues; i++, txr++) { 330 IXGBE_TX_LOCK(txr); 331 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 332 m_freem(m); 333 IXGBE_TX_UNLOCK(txr); 334 } 335 if_qflush(ifp); 336 } 337 #endif /* IXGBE_LEGACY_TX */ 338 339 340 /********************************************************************* 341 * 342 * This routine maps the mbufs to tx descriptors, allowing the 343 * TX engine to transmit the packets. 344 * - return 0 on success, positive on failure 345 * 346 **********************************************************************/ 347 348 static int 349 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 350 { 351 struct adapter *adapter = txr->adapter; 352 u32 olinfo_status = 0, cmd_type_len; 353 int i, j, error, nsegs; 354 int first; 355 bool remap = TRUE; 356 struct mbuf *m_head; 357 bus_dma_segment_t segs[adapter->num_segs]; 358 bus_dmamap_t map; 359 struct ixgbe_tx_buf *txbuf; 360 union ixgbe_adv_tx_desc *txd = NULL; 361 362 m_head = *m_headp; 363 364 /* Basic descriptor defines */ 365 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 366 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 367 368 if (m_head->m_flags & M_VLANTAG) 369 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 370 371 /* 372 * Important to capture the first descriptor 373 * used because it will contain the index of 374 * the one we tell the hardware to report back 375 */ 376 first = txr->next_avail_desc; 377 txbuf = &txr->tx_buffers[first]; 378 map = txbuf->map; 379 380 /* 381 * Map the packet for DMA. 382 */ 383 retry: 384 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 385 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 386 387 if (__predict_false(error)) { 388 struct mbuf *m; 389 390 switch (error) { 391 case EFBIG: 392 /* Try it again? - one try */ 393 if (remap == TRUE) { 394 remap = FALSE; 395 /* 396 * XXX: m_defrag will choke on 397 * non-MCLBYTES-sized clusters 398 */ 399 m = m_defrag(*m_headp, M_NOWAIT); 400 if (m == NULL) { 401 adapter->mbuf_defrag_failed++; 402 m_freem(*m_headp); 403 *m_headp = NULL; 404 return (ENOBUFS); 405 } 406 *m_headp = m; 407 goto retry; 408 } else 409 return (error); 410 case ENOMEM: 411 txr->no_tx_dma_setup++; 412 return (error); 413 default: 414 txr->no_tx_dma_setup++; 415 m_freem(*m_headp); 416 *m_headp = NULL; 417 return (error); 418 } 419 } 420 421 /* Make certain there are enough descriptors */ 422 if (nsegs > txr->tx_avail - 2) { 423 txr->no_desc_avail++; 424 bus_dmamap_unload(txr->txtag, map); 425 return (ENOBUFS); 426 } 427 m_head = *m_headp; 428 429 /* 430 * Set up the appropriate offload context 431 * this will consume the first descriptor 432 */ 433 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 434 if (__predict_false(error)) { 435 if (error == ENOBUFS) 436 *m_headp = NULL; 437 return (error); 438 } 439 440 #ifdef IXGBE_FDIR 441 /* Do the flow director magic */ 442 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 443 ++txr->atr_count; 444 if (txr->atr_count >= atr_sample_rate) { 445 ixgbe_atr(txr, m_head); 446 txr->atr_count = 0; 447 } 448 } 449 #endif 450 451 i = txr->next_avail_desc; 452 for (j = 0; j < nsegs; j++) { 453 bus_size_t seglen; 454 bus_addr_t segaddr; 455 456 txbuf = &txr->tx_buffers[i]; 457 txd = &txr->tx_base[i]; 458 seglen = segs[j].ds_len; 459 segaddr = htole64(segs[j].ds_addr); 460 461 txd->read.buffer_addr = segaddr; 462 txd->read.cmd_type_len = htole32(txr->txd_cmd | 463 cmd_type_len |seglen); 464 txd->read.olinfo_status = htole32(olinfo_status); 465 466 if (++i == txr->num_desc) 467 i = 0; 468 } 469 470 txd->read.cmd_type_len |= 471 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 472 txr->tx_avail -= nsegs; 473 txr->next_avail_desc = i; 474 475 txbuf->m_head = m_head; 476 /* 477 * Here we swap the map so the last descriptor, 478 * which gets the completion interrupt has the 479 * real map, and the first descriptor gets the 480 * unused map from this descriptor. 481 */ 482 txr->tx_buffers[first].map = txbuf->map; 483 txbuf->map = map; 484 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 485 486 /* Set the EOP descriptor that will be marked done */ 487 txbuf = &txr->tx_buffers[first]; 488 txbuf->eop = txd; 489 490 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 491 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 492 /* 493 * Advance the Transmit Descriptor Tail (Tdt), this tells the 494 * hardware that this frame is available to transmit. 495 */ 496 ++txr->total_packets; 497 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 498 499 /* Mark queue as having work */ 500 if (txr->busy == 0) 501 txr->busy = 1; 502 503 return (0); 504 } 505 506 507 /********************************************************************* 508 * 509 * Allocate memory for tx_buffer structures. The tx_buffer stores all 510 * the information needed to transmit a packet on the wire. This is 511 * called only once at attach, setup is done every reset. 512 * 513 **********************************************************************/ 514 int 515 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 516 { 517 struct adapter *adapter = txr->adapter; 518 device_t dev = adapter->dev; 519 struct ixgbe_tx_buf *txbuf; 520 int error, i; 521 522 /* 523 * Setup DMA descriptor areas. 524 */ 525 if ((error = bus_dma_tag_create( 526 bus_get_dma_tag(adapter->dev), /* parent */ 527 1, 0, /* alignment, bounds */ 528 BUS_SPACE_MAXADDR, /* lowaddr */ 529 BUS_SPACE_MAXADDR, /* highaddr */ 530 NULL, NULL, /* filter, filterarg */ 531 IXGBE_TSO_SIZE, /* maxsize */ 532 adapter->num_segs, /* nsegments */ 533 PAGE_SIZE, /* maxsegsize */ 534 0, /* flags */ 535 NULL, /* lockfunc */ 536 NULL, /* lockfuncarg */ 537 &txr->txtag))) { 538 device_printf(dev,"Unable to allocate TX DMA tag\n"); 539 goto fail; 540 } 541 542 if (!(txr->tx_buffers = 543 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 544 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 545 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 546 error = ENOMEM; 547 goto fail; 548 } 549 550 /* Create the descriptor buffer dma maps */ 551 txbuf = txr->tx_buffers; 552 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 553 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 554 if (error != 0) { 555 device_printf(dev, "Unable to create TX DMA map\n"); 556 goto fail; 557 } 558 } 559 560 return 0; 561 fail: 562 /* We free all, it handles case where we are in the middle */ 563 ixgbe_free_transmit_structures(adapter); 564 return (error); 565 } 566 567 /********************************************************************* 568 * 569 * Initialize a transmit ring. 570 * 571 **********************************************************************/ 572 static void 573 ixgbe_setup_transmit_ring(struct tx_ring *txr) 574 { 575 struct adapter *adapter = txr->adapter; 576 struct ixgbe_tx_buf *txbuf; 577 #ifdef DEV_NETMAP 578 struct netmap_adapter *na = NA(adapter->ifp); 579 struct netmap_slot *slot; 580 #endif /* DEV_NETMAP */ 581 582 /* Clear the old ring contents */ 583 IXGBE_TX_LOCK(txr); 584 #ifdef DEV_NETMAP 585 /* 586 * (under lock): if in netmap mode, do some consistency 587 * checks and set slot to entry 0 of the netmap ring. 588 */ 589 slot = netmap_reset(na, NR_TX, txr->me, 0); 590 #endif /* DEV_NETMAP */ 591 bzero((void *)txr->tx_base, 592 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 593 /* Reset indices */ 594 txr->next_avail_desc = 0; 595 txr->next_to_clean = 0; 596 597 /* Free any existing tx buffers. */ 598 txbuf = txr->tx_buffers; 599 for (int i = 0; i < txr->num_desc; i++, txbuf++) { 600 if (txbuf->m_head != NULL) { 601 bus_dmamap_sync(txr->txtag, txbuf->map, 602 BUS_DMASYNC_POSTWRITE); 603 bus_dmamap_unload(txr->txtag, txbuf->map); 604 m_freem(txbuf->m_head); 605 txbuf->m_head = NULL; 606 } 607 #ifdef DEV_NETMAP 608 /* 609 * In netmap mode, set the map for the packet buffer. 610 * NOTE: Some drivers (not this one) also need to set 611 * the physical buffer address in the NIC ring. 612 * Slots in the netmap ring (indexed by "si") are 613 * kring->nkr_hwofs positions "ahead" wrt the 614 * corresponding slot in the NIC ring. In some drivers 615 * (not here) nkr_hwofs can be negative. Function 616 * netmap_idx_n2k() handles wraparounds properly. 617 */ 618 if (slot) { 619 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 620 netmap_load_map(na, txr->txtag, 621 txbuf->map, NMB(na, slot + si)); 622 } 623 #endif /* DEV_NETMAP */ 624 /* Clear the EOP descriptor pointer */ 625 txbuf->eop = NULL; 626 } 627 628 #ifdef IXGBE_FDIR 629 /* Set the rate at which we sample packets */ 630 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 631 txr->atr_sample = atr_sample_rate; 632 #endif 633 634 /* Set number of descriptors available */ 635 txr->tx_avail = adapter->num_tx_desc; 636 637 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 638 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 639 IXGBE_TX_UNLOCK(txr); 640 } 641 642 /********************************************************************* 643 * 644 * Initialize all transmit rings. 645 * 646 **********************************************************************/ 647 int 648 ixgbe_setup_transmit_structures(struct adapter *adapter) 649 { 650 struct tx_ring *txr = adapter->tx_rings; 651 652 for (int i = 0; i < adapter->num_queues; i++, txr++) 653 ixgbe_setup_transmit_ring(txr); 654 655 return (0); 656 } 657 658 /********************************************************************* 659 * 660 * Free all transmit rings. 661 * 662 **********************************************************************/ 663 void 664 ixgbe_free_transmit_structures(struct adapter *adapter) 665 { 666 struct tx_ring *txr = adapter->tx_rings; 667 668 for (int i = 0; i < adapter->num_queues; i++, txr++) { 669 IXGBE_TX_LOCK(txr); 670 ixgbe_free_transmit_buffers(txr); 671 ixgbe_dma_free(adapter, &txr->txdma); 672 IXGBE_TX_UNLOCK(txr); 673 IXGBE_TX_LOCK_DESTROY(txr); 674 } 675 free(adapter->tx_rings, M_DEVBUF); 676 } 677 678 /********************************************************************* 679 * 680 * Free transmit ring related data structures. 681 * 682 **********************************************************************/ 683 static void 684 ixgbe_free_transmit_buffers(struct tx_ring *txr) 685 { 686 struct adapter *adapter = txr->adapter; 687 struct ixgbe_tx_buf *tx_buffer; 688 int i; 689 690 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 691 692 if (txr->tx_buffers == NULL) 693 return; 694 695 tx_buffer = txr->tx_buffers; 696 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 697 if (tx_buffer->m_head != NULL) { 698 bus_dmamap_sync(txr->txtag, tx_buffer->map, 699 BUS_DMASYNC_POSTWRITE); 700 bus_dmamap_unload(txr->txtag, 701 tx_buffer->map); 702 m_freem(tx_buffer->m_head); 703 tx_buffer->m_head = NULL; 704 if (tx_buffer->map != NULL) { 705 bus_dmamap_destroy(txr->txtag, 706 tx_buffer->map); 707 tx_buffer->map = NULL; 708 } 709 } else if (tx_buffer->map != NULL) { 710 bus_dmamap_unload(txr->txtag, 711 tx_buffer->map); 712 bus_dmamap_destroy(txr->txtag, 713 tx_buffer->map); 714 tx_buffer->map = NULL; 715 } 716 } 717 #ifdef IXGBE_LEGACY_TX 718 if (txr->br != NULL) 719 buf_ring_free(txr->br, M_DEVBUF); 720 #endif 721 if (txr->tx_buffers != NULL) { 722 free(txr->tx_buffers, M_DEVBUF); 723 txr->tx_buffers = NULL; 724 } 725 if (txr->txtag != NULL) { 726 bus_dma_tag_destroy(txr->txtag); 727 txr->txtag = NULL; 728 } 729 return; 730 } 731 732 /********************************************************************* 733 * 734 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 735 * 736 **********************************************************************/ 737 738 static int 739 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 740 u32 *cmd_type_len, u32 *olinfo_status) 741 { 742 struct adapter *adapter = txr->adapter; 743 struct ixgbe_adv_tx_context_desc *TXD; 744 struct ether_vlan_header *eh; 745 struct ip *ip; 746 struct ip6_hdr *ip6; 747 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 748 int ehdrlen, ip_hlen = 0; 749 u16 etype; 750 u8 ipproto = 0; 751 int offload = TRUE; 752 int ctxd = txr->next_avail_desc; 753 u16 vtag = 0; 754 755 /* First check if TSO is to be used */ 756 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 757 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 758 759 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 760 offload = FALSE; 761 762 /* Indicate the whole packet as payload when not doing TSO */ 763 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 764 765 /* Now ready a context descriptor */ 766 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 767 768 /* 769 ** In advanced descriptors the vlan tag must 770 ** be placed into the context descriptor. Hence 771 ** we need to make one even if not doing offloads. 772 */ 773 if (mp->m_flags & M_VLANTAG) { 774 vtag = htole16(mp->m_pkthdr.ether_vtag); 775 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 776 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 777 return (0); 778 779 /* 780 * Determine where frame payload starts. 781 * Jump over vlan headers if already present, 782 * helpful for QinQ too. 783 */ 784 eh = mtod(mp, struct ether_vlan_header *); 785 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 786 etype = ntohs(eh->evl_proto); 787 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 788 } else { 789 etype = ntohs(eh->evl_encap_proto); 790 ehdrlen = ETHER_HDR_LEN; 791 } 792 793 /* Set the ether header length */ 794 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 795 796 if (offload == FALSE) 797 goto no_offloads; 798 799 switch (etype) { 800 case ETHERTYPE_IP: 801 ip = (struct ip *)(mp->m_data + ehdrlen); 802 ip_hlen = ip->ip_hl << 2; 803 ipproto = ip->ip_p; 804 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 805 break; 806 case ETHERTYPE_IPV6: 807 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 808 ip_hlen = sizeof(struct ip6_hdr); 809 /* XXX-BZ this will go badly in case of ext hdrs. */ 810 ipproto = ip6->ip6_nxt; 811 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 812 break; 813 default: 814 offload = FALSE; 815 break; 816 } 817 818 vlan_macip_lens |= ip_hlen; 819 820 switch (ipproto) { 821 case IPPROTO_TCP: 822 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 823 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 824 break; 825 826 case IPPROTO_UDP: 827 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 828 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 829 break; 830 831 #if __FreeBSD_version >= 800000 832 case IPPROTO_SCTP: 833 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 834 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 835 break; 836 #endif 837 default: 838 offload = FALSE; 839 break; 840 } 841 842 if (offload) /* For the TX descriptor setup */ 843 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 844 845 no_offloads: 846 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 847 848 /* Now copy bits into descriptor */ 849 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 850 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 851 TXD->seqnum_seed = htole32(0); 852 TXD->mss_l4len_idx = htole32(0); 853 854 /* We've consumed the first desc, adjust counters */ 855 if (++ctxd == txr->num_desc) 856 ctxd = 0; 857 txr->next_avail_desc = ctxd; 858 --txr->tx_avail; 859 860 return (0); 861 } 862 863 /********************************************************************** 864 * 865 * Setup work for hardware segmentation offload (TSO) on 866 * adapters using advanced tx descriptors 867 * 868 **********************************************************************/ 869 static int 870 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 871 u32 *cmd_type_len, u32 *olinfo_status) 872 { 873 struct ixgbe_adv_tx_context_desc *TXD; 874 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 875 u32 mss_l4len_idx = 0, paylen; 876 u16 vtag = 0, eh_type; 877 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 878 struct ether_vlan_header *eh; 879 #ifdef INET6 880 struct ip6_hdr *ip6; 881 #endif 882 #ifdef INET 883 struct ip *ip; 884 #endif 885 struct tcphdr *th; 886 887 888 /* 889 * Determine where frame payload starts. 890 * Jump over vlan headers if already present 891 */ 892 eh = mtod(mp, struct ether_vlan_header *); 893 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 894 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 895 eh_type = eh->evl_proto; 896 } else { 897 ehdrlen = ETHER_HDR_LEN; 898 eh_type = eh->evl_encap_proto; 899 } 900 901 switch (ntohs(eh_type)) { 902 #ifdef INET6 903 case ETHERTYPE_IPV6: 904 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 905 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 906 if (ip6->ip6_nxt != IPPROTO_TCP) 907 return (ENXIO); 908 ip_hlen = sizeof(struct ip6_hdr); 909 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 910 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 911 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 912 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 913 break; 914 #endif 915 #ifdef INET 916 case ETHERTYPE_IP: 917 ip = (struct ip *)(mp->m_data + ehdrlen); 918 if (ip->ip_p != IPPROTO_TCP) 919 return (ENXIO); 920 ip->ip_sum = 0; 921 ip_hlen = ip->ip_hl << 2; 922 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 923 th->th_sum = in_pseudo(ip->ip_src.s_addr, 924 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 925 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 926 /* Tell transmit desc to also do IPv4 checksum. */ 927 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 928 break; 929 #endif 930 default: 931 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 932 __func__, ntohs(eh_type)); 933 break; 934 } 935 936 ctxd = txr->next_avail_desc; 937 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 938 939 tcp_hlen = th->th_off << 2; 940 941 /* This is used in the transmit desc in encap */ 942 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 943 944 /* VLAN MACLEN IPLEN */ 945 if (mp->m_flags & M_VLANTAG) { 946 vtag = htole16(mp->m_pkthdr.ether_vtag); 947 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 948 } 949 950 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 951 vlan_macip_lens |= ip_hlen; 952 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 953 954 /* ADV DTYPE TUCMD */ 955 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 956 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 957 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 958 959 /* MSS L4LEN IDX */ 960 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 961 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 962 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 963 964 TXD->seqnum_seed = htole32(0); 965 966 if (++ctxd == txr->num_desc) 967 ctxd = 0; 968 969 txr->tx_avail--; 970 txr->next_avail_desc = ctxd; 971 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 972 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 973 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 974 ++txr->tso_tx; 975 return (0); 976 } 977 978 979 /********************************************************************** 980 * 981 * Examine each tx_buffer in the used queue. If the hardware is done 982 * processing the packet then free associated resources. The 983 * tx_buffer is put back on the free queue. 984 * 985 **********************************************************************/ 986 void 987 ixgbe_txeof(struct tx_ring *txr) 988 { 989 struct adapter *adapter = txr->adapter; 990 #ifdef DEV_NETMAP 991 struct ifnet *ifp = adapter->ifp; 992 #endif 993 u32 work, processed = 0; 994 u32 limit = adapter->tx_process_limit; 995 struct ixgbe_tx_buf *buf; 996 union ixgbe_adv_tx_desc *txd; 997 998 mtx_assert(&txr->tx_mtx, MA_OWNED); 999 1000 #ifdef DEV_NETMAP 1001 if (ifp->if_capenable & IFCAP_NETMAP) { 1002 struct netmap_adapter *na = NA(ifp); 1003 struct netmap_kring *kring = &na->tx_rings[txr->me]; 1004 txd = txr->tx_base; 1005 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1006 BUS_DMASYNC_POSTREAD); 1007 /* 1008 * In netmap mode, all the work is done in the context 1009 * of the client thread. Interrupt handlers only wake up 1010 * clients, which may be sleeping on individual rings 1011 * or on a global resource for all rings. 1012 * To implement tx interrupt mitigation, we wake up the client 1013 * thread roughly every half ring, even if the NIC interrupts 1014 * more frequently. This is implemented as follows: 1015 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1016 * the slot that should wake up the thread (nkr_num_slots 1017 * means the user thread should not be woken up); 1018 * - the driver ignores tx interrupts unless netmap_mitigate=0 1019 * or the slot has the DD bit set. 1020 */ 1021 if (!netmap_mitigate || 1022 (kring->nr_kflags < kring->nkr_num_slots && 1023 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1024 netmap_tx_irq(ifp, txr->me); 1025 } 1026 return; 1027 } 1028 #endif /* DEV_NETMAP */ 1029 1030 if (txr->tx_avail == txr->num_desc) { 1031 txr->busy = 0; 1032 return; 1033 } 1034 1035 /* Get work starting point */ 1036 work = txr->next_to_clean; 1037 buf = &txr->tx_buffers[work]; 1038 txd = &txr->tx_base[work]; 1039 work -= txr->num_desc; /* The distance to ring end */ 1040 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1041 BUS_DMASYNC_POSTREAD); 1042 1043 do { 1044 union ixgbe_adv_tx_desc *eop= buf->eop; 1045 if (eop == NULL) /* No work */ 1046 break; 1047 1048 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1049 break; /* I/O not complete */ 1050 1051 if (buf->m_head) { 1052 txr->bytes += 1053 buf->m_head->m_pkthdr.len; 1054 bus_dmamap_sync(txr->txtag, 1055 buf->map, 1056 BUS_DMASYNC_POSTWRITE); 1057 bus_dmamap_unload(txr->txtag, 1058 buf->map); 1059 m_freem(buf->m_head); 1060 buf->m_head = NULL; 1061 } 1062 buf->eop = NULL; 1063 ++txr->tx_avail; 1064 1065 /* We clean the range if multi segment */ 1066 while (txd != eop) { 1067 ++txd; 1068 ++buf; 1069 ++work; 1070 /* wrap the ring? */ 1071 if (__predict_false(!work)) { 1072 work -= txr->num_desc; 1073 buf = txr->tx_buffers; 1074 txd = txr->tx_base; 1075 } 1076 if (buf->m_head) { 1077 txr->bytes += 1078 buf->m_head->m_pkthdr.len; 1079 bus_dmamap_sync(txr->txtag, 1080 buf->map, 1081 BUS_DMASYNC_POSTWRITE); 1082 bus_dmamap_unload(txr->txtag, 1083 buf->map); 1084 m_freem(buf->m_head); 1085 buf->m_head = NULL; 1086 } 1087 ++txr->tx_avail; 1088 buf->eop = NULL; 1089 1090 } 1091 ++txr->packets; 1092 ++processed; 1093 1094 /* Try the next packet */ 1095 ++txd; 1096 ++buf; 1097 ++work; 1098 /* reset with a wrap */ 1099 if (__predict_false(!work)) { 1100 work -= txr->num_desc; 1101 buf = txr->tx_buffers; 1102 txd = txr->tx_base; 1103 } 1104 prefetch(txd); 1105 } while (__predict_true(--limit)); 1106 1107 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1108 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1109 1110 work += txr->num_desc; 1111 txr->next_to_clean = work; 1112 1113 /* 1114 ** Queue Hang detection, we know there's 1115 ** work outstanding or the first return 1116 ** would have been taken, so increment busy 1117 ** if nothing managed to get cleaned, then 1118 ** in local_timer it will be checked and 1119 ** marked as HUNG if it exceeds a MAX attempt. 1120 */ 1121 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1122 ++txr->busy; 1123 /* 1124 ** If anything gets cleaned we reset state to 1, 1125 ** note this will turn off HUNG if its set. 1126 */ 1127 if (processed) 1128 txr->busy = 1; 1129 1130 if (txr->tx_avail == txr->num_desc) 1131 txr->busy = 0; 1132 1133 return; 1134 } 1135 1136 1137 #ifdef IXGBE_FDIR 1138 /* 1139 ** This routine parses packet headers so that Flow 1140 ** Director can make a hashed filter table entry 1141 ** allowing traffic flows to be identified and kept 1142 ** on the same cpu. This would be a performance 1143 ** hit, but we only do it at IXGBE_FDIR_RATE of 1144 ** packets. 1145 */ 1146 static void 1147 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1148 { 1149 struct adapter *adapter = txr->adapter; 1150 struct ix_queue *que; 1151 struct ip *ip; 1152 struct tcphdr *th; 1153 struct udphdr *uh; 1154 struct ether_vlan_header *eh; 1155 union ixgbe_atr_hash_dword input = {.dword = 0}; 1156 union ixgbe_atr_hash_dword common = {.dword = 0}; 1157 int ehdrlen, ip_hlen; 1158 u16 etype; 1159 1160 eh = mtod(mp, struct ether_vlan_header *); 1161 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1162 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1163 etype = eh->evl_proto; 1164 } else { 1165 ehdrlen = ETHER_HDR_LEN; 1166 etype = eh->evl_encap_proto; 1167 } 1168 1169 /* Only handling IPv4 */ 1170 if (etype != htons(ETHERTYPE_IP)) 1171 return; 1172 1173 ip = (struct ip *)(mp->m_data + ehdrlen); 1174 ip_hlen = ip->ip_hl << 2; 1175 1176 /* check if we're UDP or TCP */ 1177 switch (ip->ip_p) { 1178 case IPPROTO_TCP: 1179 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1180 /* src and dst are inverted */ 1181 common.port.dst ^= th->th_sport; 1182 common.port.src ^= th->th_dport; 1183 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1184 break; 1185 case IPPROTO_UDP: 1186 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1187 /* src and dst are inverted */ 1188 common.port.dst ^= uh->uh_sport; 1189 common.port.src ^= uh->uh_dport; 1190 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1191 break; 1192 default: 1193 return; 1194 } 1195 1196 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1197 if (mp->m_pkthdr.ether_vtag) 1198 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1199 else 1200 common.flex_bytes ^= etype; 1201 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1202 1203 que = &adapter->queues[txr->me]; 1204 /* 1205 ** This assumes the Rx queue and Tx 1206 ** queue are bound to the same CPU 1207 */ 1208 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1209 input, common, que->msix); 1210 } 1211 #endif /* IXGBE_FDIR */ 1212 1213 /* 1214 ** Used to detect a descriptor that has 1215 ** been merged by Hardware RSC. 1216 */ 1217 static inline u32 1218 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1219 { 1220 return (le32toh(rx->wb.lower.lo_dword.data) & 1221 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1222 } 1223 1224 /********************************************************************* 1225 * 1226 * Initialize Hardware RSC (LRO) feature on 82599 1227 * for an RX ring, this is toggled by the LRO capability 1228 * even though it is transparent to the stack. 1229 * 1230 * NOTE: since this HW feature only works with IPV4 and 1231 * our testing has shown soft LRO to be as effective 1232 * I have decided to disable this by default. 1233 * 1234 **********************************************************************/ 1235 static void 1236 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1237 { 1238 struct adapter *adapter = rxr->adapter; 1239 struct ixgbe_hw *hw = &adapter->hw; 1240 u32 rscctrl, rdrxctl; 1241 1242 /* If turning LRO/RSC off we need to disable it */ 1243 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1244 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1245 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1246 return; 1247 } 1248 1249 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1250 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1251 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1252 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1253 #endif /* DEV_NETMAP */ 1254 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1255 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1256 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1257 1258 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1259 rscctrl |= IXGBE_RSCCTL_RSCEN; 1260 /* 1261 ** Limit the total number of descriptors that 1262 ** can be combined, so it does not exceed 64K 1263 */ 1264 if (rxr->mbuf_sz == MCLBYTES) 1265 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1266 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1267 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1268 else if (rxr->mbuf_sz == MJUM9BYTES) 1269 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1270 else /* Using 16K cluster */ 1271 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1272 1273 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1274 1275 /* Enable TCP header recognition */ 1276 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1277 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1278 IXGBE_PSRTYPE_TCPHDR)); 1279 1280 /* Disable RSC for ACK packets */ 1281 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1282 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1283 1284 rxr->hw_rsc = TRUE; 1285 } 1286 /********************************************************************* 1287 * 1288 * Refresh mbuf buffers for RX descriptor rings 1289 * - now keeps its own state so discards due to resource 1290 * exhaustion are unnecessary, if an mbuf cannot be obtained 1291 * it just returns, keeping its placeholder, thus it can simply 1292 * be recalled to try again. 1293 * 1294 **********************************************************************/ 1295 static void 1296 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1297 { 1298 struct adapter *adapter = rxr->adapter; 1299 bus_dma_segment_t seg[1]; 1300 struct ixgbe_rx_buf *rxbuf; 1301 struct mbuf *mp; 1302 int i, j, nsegs, error; 1303 bool refreshed = FALSE; 1304 1305 i = j = rxr->next_to_refresh; 1306 /* Control the loop with one beyond */ 1307 if (++j == rxr->num_desc) 1308 j = 0; 1309 1310 while (j != limit) { 1311 rxbuf = &rxr->rx_buffers[i]; 1312 if (rxbuf->buf == NULL) { 1313 mp = m_getjcl(M_NOWAIT, MT_DATA, 1314 M_PKTHDR, rxr->mbuf_sz); 1315 if (mp == NULL) 1316 goto update; 1317 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1318 m_adj(mp, ETHER_ALIGN); 1319 } else 1320 mp = rxbuf->buf; 1321 1322 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1323 1324 /* If we're dealing with an mbuf that was copied rather 1325 * than replaced, there's no need to go through busdma. 1326 */ 1327 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1328 /* Get the memory mapping */ 1329 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1330 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1331 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1332 if (error != 0) { 1333 printf("Refresh mbufs: payload dmamap load" 1334 " failure - %d\n", error); 1335 m_free(mp); 1336 rxbuf->buf = NULL; 1337 goto update; 1338 } 1339 rxbuf->buf = mp; 1340 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1341 BUS_DMASYNC_PREREAD); 1342 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1343 htole64(seg[0].ds_addr); 1344 } else { 1345 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1346 rxbuf->flags &= ~IXGBE_RX_COPY; 1347 } 1348 1349 refreshed = TRUE; 1350 /* Next is precalculated */ 1351 i = j; 1352 rxr->next_to_refresh = i; 1353 if (++j == rxr->num_desc) 1354 j = 0; 1355 } 1356 update: 1357 if (refreshed) /* Update hardware tail index */ 1358 IXGBE_WRITE_REG(&adapter->hw, 1359 rxr->tail, rxr->next_to_refresh); 1360 return; 1361 } 1362 1363 /********************************************************************* 1364 * 1365 * Allocate memory for rx_buffer structures. Since we use one 1366 * rx_buffer per received packet, the maximum number of rx_buffer's 1367 * that we'll need is equal to the number of receive descriptors 1368 * that we've allocated. 1369 * 1370 **********************************************************************/ 1371 int 1372 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1373 { 1374 struct adapter *adapter = rxr->adapter; 1375 device_t dev = adapter->dev; 1376 struct ixgbe_rx_buf *rxbuf; 1377 int bsize, error; 1378 1379 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1380 if (!(rxr->rx_buffers = 1381 (struct ixgbe_rx_buf *) malloc(bsize, 1382 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1383 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1384 error = ENOMEM; 1385 goto fail; 1386 } 1387 1388 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1389 1, 0, /* alignment, bounds */ 1390 BUS_SPACE_MAXADDR, /* lowaddr */ 1391 BUS_SPACE_MAXADDR, /* highaddr */ 1392 NULL, NULL, /* filter, filterarg */ 1393 MJUM16BYTES, /* maxsize */ 1394 1, /* nsegments */ 1395 MJUM16BYTES, /* maxsegsize */ 1396 0, /* flags */ 1397 NULL, /* lockfunc */ 1398 NULL, /* lockfuncarg */ 1399 &rxr->ptag))) { 1400 device_printf(dev, "Unable to create RX DMA tag\n"); 1401 goto fail; 1402 } 1403 1404 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { 1405 rxbuf = &rxr->rx_buffers[i]; 1406 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1407 if (error) { 1408 device_printf(dev, "Unable to create RX dma map\n"); 1409 goto fail; 1410 } 1411 } 1412 1413 return (0); 1414 1415 fail: 1416 /* Frees all, but can handle partial completion */ 1417 ixgbe_free_receive_structures(adapter); 1418 return (error); 1419 } 1420 1421 1422 static void 1423 ixgbe_free_receive_ring(struct rx_ring *rxr) 1424 { 1425 struct ixgbe_rx_buf *rxbuf; 1426 1427 for (int i = 0; i < rxr->num_desc; i++) { 1428 rxbuf = &rxr->rx_buffers[i]; 1429 if (rxbuf->buf != NULL) { 1430 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1431 BUS_DMASYNC_POSTREAD); 1432 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1433 rxbuf->buf->m_flags |= M_PKTHDR; 1434 m_freem(rxbuf->buf); 1435 rxbuf->buf = NULL; 1436 rxbuf->flags = 0; 1437 } 1438 } 1439 } 1440 1441 1442 /********************************************************************* 1443 * 1444 * Initialize a receive ring and its buffers. 1445 * 1446 **********************************************************************/ 1447 static int 1448 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1449 { 1450 struct adapter *adapter; 1451 struct ifnet *ifp; 1452 device_t dev; 1453 struct ixgbe_rx_buf *rxbuf; 1454 bus_dma_segment_t seg[1]; 1455 struct lro_ctrl *lro = &rxr->lro; 1456 int rsize, nsegs, error = 0; 1457 #ifdef DEV_NETMAP 1458 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1459 struct netmap_slot *slot; 1460 #endif /* DEV_NETMAP */ 1461 1462 adapter = rxr->adapter; 1463 ifp = adapter->ifp; 1464 dev = adapter->dev; 1465 1466 /* Clear the ring contents */ 1467 IXGBE_RX_LOCK(rxr); 1468 #ifdef DEV_NETMAP 1469 /* same as in ixgbe_setup_transmit_ring() */ 1470 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1471 #endif /* DEV_NETMAP */ 1472 rsize = roundup2(adapter->num_rx_desc * 1473 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1474 bzero((void *)rxr->rx_base, rsize); 1475 /* Cache the size */ 1476 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1477 1478 /* Free current RX buffer structs and their mbufs */ 1479 ixgbe_free_receive_ring(rxr); 1480 1481 /* Now replenish the mbufs */ 1482 for (int j = 0; j != rxr->num_desc; ++j) { 1483 struct mbuf *mp; 1484 1485 rxbuf = &rxr->rx_buffers[j]; 1486 #ifdef DEV_NETMAP 1487 /* 1488 * In netmap mode, fill the map and set the buffer 1489 * address in the NIC ring, considering the offset 1490 * between the netmap and NIC rings (see comment in 1491 * ixgbe_setup_transmit_ring() ). No need to allocate 1492 * an mbuf, so end the block with a continue; 1493 */ 1494 if (slot) { 1495 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1496 uint64_t paddr; 1497 void *addr; 1498 1499 addr = PNMB(na, slot + sj, &paddr); 1500 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1501 /* Update descriptor and the cached value */ 1502 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1503 rxbuf->addr = htole64(paddr); 1504 continue; 1505 } 1506 #endif /* DEV_NETMAP */ 1507 rxbuf->flags = 0; 1508 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1509 M_PKTHDR, adapter->rx_mbuf_sz); 1510 if (rxbuf->buf == NULL) { 1511 error = ENOBUFS; 1512 goto fail; 1513 } 1514 mp = rxbuf->buf; 1515 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1516 /* Get the memory mapping */ 1517 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1518 rxbuf->pmap, mp, seg, 1519 &nsegs, BUS_DMA_NOWAIT); 1520 if (error != 0) 1521 goto fail; 1522 bus_dmamap_sync(rxr->ptag, 1523 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1524 /* Update the descriptor and the cached value */ 1525 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1526 rxbuf->addr = htole64(seg[0].ds_addr); 1527 } 1528 1529 1530 /* Setup our descriptor indices */ 1531 rxr->next_to_check = 0; 1532 rxr->next_to_refresh = 0; 1533 rxr->lro_enabled = FALSE; 1534 rxr->rx_copies = 0; 1535 rxr->rx_bytes = 0; 1536 rxr->vtag_strip = FALSE; 1537 1538 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1539 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1540 1541 /* 1542 ** Now set up the LRO interface: 1543 */ 1544 if (ixgbe_rsc_enable) 1545 ixgbe_setup_hw_rsc(rxr); 1546 else if (ifp->if_capenable & IFCAP_LRO) { 1547 int err = tcp_lro_init(lro); 1548 if (err) { 1549 device_printf(dev, "LRO Initialization failed!\n"); 1550 goto fail; 1551 } 1552 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1553 rxr->lro_enabled = TRUE; 1554 lro->ifp = adapter->ifp; 1555 } 1556 1557 IXGBE_RX_UNLOCK(rxr); 1558 return (0); 1559 1560 fail: 1561 ixgbe_free_receive_ring(rxr); 1562 IXGBE_RX_UNLOCK(rxr); 1563 return (error); 1564 } 1565 1566 /********************************************************************* 1567 * 1568 * Initialize all receive rings. 1569 * 1570 **********************************************************************/ 1571 int 1572 ixgbe_setup_receive_structures(struct adapter *adapter) 1573 { 1574 struct rx_ring *rxr = adapter->rx_rings; 1575 int j; 1576 1577 for (j = 0; j < adapter->num_queues; j++, rxr++) 1578 if (ixgbe_setup_receive_ring(rxr)) 1579 goto fail; 1580 1581 return (0); 1582 fail: 1583 /* 1584 * Free RX buffers allocated so far, we will only handle 1585 * the rings that completed, the failing case will have 1586 * cleaned up for itself. 'j' failed, so its the terminus. 1587 */ 1588 for (int i = 0; i < j; ++i) { 1589 rxr = &adapter->rx_rings[i]; 1590 ixgbe_free_receive_ring(rxr); 1591 } 1592 1593 return (ENOBUFS); 1594 } 1595 1596 1597 /********************************************************************* 1598 * 1599 * Free all receive rings. 1600 * 1601 **********************************************************************/ 1602 void 1603 ixgbe_free_receive_structures(struct adapter *adapter) 1604 { 1605 struct rx_ring *rxr = adapter->rx_rings; 1606 1607 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1608 1609 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1610 struct lro_ctrl *lro = &rxr->lro; 1611 ixgbe_free_receive_buffers(rxr); 1612 /* Free LRO memory */ 1613 tcp_lro_free(lro); 1614 /* Free the ring memory as well */ 1615 ixgbe_dma_free(adapter, &rxr->rxdma); 1616 } 1617 1618 free(adapter->rx_rings, M_DEVBUF); 1619 } 1620 1621 1622 /********************************************************************* 1623 * 1624 * Free receive ring data structures 1625 * 1626 **********************************************************************/ 1627 void 1628 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1629 { 1630 struct adapter *adapter = rxr->adapter; 1631 struct ixgbe_rx_buf *rxbuf; 1632 1633 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1634 1635 /* Cleanup any existing buffers */ 1636 if (rxr->rx_buffers != NULL) { 1637 for (int i = 0; i < adapter->num_rx_desc; i++) { 1638 rxbuf = &rxr->rx_buffers[i]; 1639 if (rxbuf->buf != NULL) { 1640 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1641 BUS_DMASYNC_POSTREAD); 1642 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1643 rxbuf->buf->m_flags |= M_PKTHDR; 1644 m_freem(rxbuf->buf); 1645 } 1646 rxbuf->buf = NULL; 1647 if (rxbuf->pmap != NULL) { 1648 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1649 rxbuf->pmap = NULL; 1650 } 1651 } 1652 if (rxr->rx_buffers != NULL) { 1653 free(rxr->rx_buffers, M_DEVBUF); 1654 rxr->rx_buffers = NULL; 1655 } 1656 } 1657 1658 if (rxr->ptag != NULL) { 1659 bus_dma_tag_destroy(rxr->ptag); 1660 rxr->ptag = NULL; 1661 } 1662 1663 return; 1664 } 1665 1666 static __inline void 1667 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1668 { 1669 1670 /* 1671 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1672 * should be computed by hardware. Also it should not have VLAN tag in 1673 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1674 */ 1675 if (rxr->lro_enabled && 1676 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1677 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1678 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1679 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1680 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1681 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1682 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1683 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1684 /* 1685 * Send to the stack if: 1686 ** - LRO not enabled, or 1687 ** - no LRO resources, or 1688 ** - lro enqueue fails 1689 */ 1690 if (rxr->lro.lro_cnt != 0) 1691 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1692 return; 1693 } 1694 IXGBE_RX_UNLOCK(rxr); 1695 (*ifp->if_input)(ifp, m); 1696 IXGBE_RX_LOCK(rxr); 1697 } 1698 1699 static __inline void 1700 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1701 { 1702 struct ixgbe_rx_buf *rbuf; 1703 1704 rbuf = &rxr->rx_buffers[i]; 1705 1706 1707 /* 1708 ** With advanced descriptors the writeback 1709 ** clobbers the buffer addrs, so its easier 1710 ** to just free the existing mbufs and take 1711 ** the normal refresh path to get new buffers 1712 ** and mapping. 1713 */ 1714 1715 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1716 rbuf->fmp->m_flags |= M_PKTHDR; 1717 m_freem(rbuf->fmp); 1718 rbuf->fmp = NULL; 1719 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1720 } else if (rbuf->buf) { 1721 m_free(rbuf->buf); 1722 rbuf->buf = NULL; 1723 } 1724 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1725 1726 rbuf->flags = 0; 1727 1728 return; 1729 } 1730 1731 1732 /********************************************************************* 1733 * 1734 * This routine executes in interrupt context. It replenishes 1735 * the mbufs in the descriptor and sends data which has been 1736 * dma'ed into host memory to upper layer. 1737 * 1738 * Return TRUE for more work, FALSE for all clean. 1739 *********************************************************************/ 1740 bool 1741 ixgbe_rxeof(struct ix_queue *que) 1742 { 1743 struct adapter *adapter = que->adapter; 1744 struct rx_ring *rxr = que->rxr; 1745 struct ifnet *ifp = adapter->ifp; 1746 struct lro_ctrl *lro = &rxr->lro; 1747 struct lro_entry *queued; 1748 int i, nextp, processed = 0; 1749 u32 staterr = 0; 1750 u32 count = adapter->rx_process_limit; 1751 union ixgbe_adv_rx_desc *cur; 1752 struct ixgbe_rx_buf *rbuf, *nbuf; 1753 u16 pkt_info; 1754 1755 IXGBE_RX_LOCK(rxr); 1756 1757 #ifdef DEV_NETMAP 1758 /* Same as the txeof routine: wakeup clients on intr. */ 1759 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1760 IXGBE_RX_UNLOCK(rxr); 1761 return (FALSE); 1762 } 1763 #endif /* DEV_NETMAP */ 1764 1765 for (i = rxr->next_to_check; count != 0;) { 1766 struct mbuf *sendmp, *mp; 1767 u32 rsc, ptype; 1768 u16 len; 1769 u16 vtag = 0; 1770 bool eop; 1771 1772 /* Sync the ring. */ 1773 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1774 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1775 1776 cur = &rxr->rx_base[i]; 1777 staterr = le32toh(cur->wb.upper.status_error); 1778 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1779 1780 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1781 break; 1782 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1783 break; 1784 1785 count--; 1786 sendmp = NULL; 1787 nbuf = NULL; 1788 rsc = 0; 1789 cur->wb.upper.status_error = 0; 1790 rbuf = &rxr->rx_buffers[i]; 1791 mp = rbuf->buf; 1792 1793 len = le16toh(cur->wb.upper.length); 1794 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1795 IXGBE_RXDADV_PKTTYPE_MASK; 1796 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1797 1798 /* Make sure bad packets are discarded */ 1799 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1800 #if __FreeBSD_version >= 1100036 1801 if (IXGBE_IS_VF(adapter)) 1802 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1803 #endif 1804 rxr->rx_discarded++; 1805 ixgbe_rx_discard(rxr, i); 1806 goto next_desc; 1807 } 1808 1809 /* 1810 ** On 82599 which supports a hardware 1811 ** LRO (called HW RSC), packets need 1812 ** not be fragmented across sequential 1813 ** descriptors, rather the next descriptor 1814 ** is indicated in bits of the descriptor. 1815 ** This also means that we might proceses 1816 ** more than one packet at a time, something 1817 ** that has never been true before, it 1818 ** required eliminating global chain pointers 1819 ** in favor of what we are doing here. -jfv 1820 */ 1821 if (!eop) { 1822 /* 1823 ** Figure out the next descriptor 1824 ** of this frame. 1825 */ 1826 if (rxr->hw_rsc == TRUE) { 1827 rsc = ixgbe_rsc_count(cur); 1828 rxr->rsc_num += (rsc - 1); 1829 } 1830 if (rsc) { /* Get hardware index */ 1831 nextp = ((staterr & 1832 IXGBE_RXDADV_NEXTP_MASK) >> 1833 IXGBE_RXDADV_NEXTP_SHIFT); 1834 } else { /* Just sequential */ 1835 nextp = i + 1; 1836 if (nextp == adapter->num_rx_desc) 1837 nextp = 0; 1838 } 1839 nbuf = &rxr->rx_buffers[nextp]; 1840 prefetch(nbuf); 1841 } 1842 /* 1843 ** Rather than using the fmp/lmp global pointers 1844 ** we now keep the head of a packet chain in the 1845 ** buffer struct and pass this along from one 1846 ** descriptor to the next, until we get EOP. 1847 */ 1848 mp->m_len = len; 1849 /* 1850 ** See if there is a stored head 1851 ** that determines what we are 1852 */ 1853 sendmp = rbuf->fmp; 1854 if (sendmp != NULL) { /* secondary frag */ 1855 rbuf->buf = rbuf->fmp = NULL; 1856 mp->m_flags &= ~M_PKTHDR; 1857 sendmp->m_pkthdr.len += mp->m_len; 1858 } else { 1859 /* 1860 * Optimize. This might be a small packet, 1861 * maybe just a TCP ACK. Do a fast copy that 1862 * is cache aligned into a new mbuf, and 1863 * leave the old mbuf+cluster for re-use. 1864 */ 1865 if (eop && len <= IXGBE_RX_COPY_LEN) { 1866 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1867 if (sendmp != NULL) { 1868 sendmp->m_data += 1869 IXGBE_RX_COPY_ALIGN; 1870 ixgbe_bcopy(mp->m_data, 1871 sendmp->m_data, len); 1872 sendmp->m_len = len; 1873 rxr->rx_copies++; 1874 rbuf->flags |= IXGBE_RX_COPY; 1875 } 1876 } 1877 if (sendmp == NULL) { 1878 rbuf->buf = rbuf->fmp = NULL; 1879 sendmp = mp; 1880 } 1881 1882 /* first desc of a non-ps chain */ 1883 sendmp->m_flags |= M_PKTHDR; 1884 sendmp->m_pkthdr.len = mp->m_len; 1885 } 1886 ++processed; 1887 1888 /* Pass the head pointer on */ 1889 if (eop == 0) { 1890 nbuf->fmp = sendmp; 1891 sendmp = NULL; 1892 mp->m_next = nbuf->buf; 1893 } else { /* Sending this frame */ 1894 sendmp->m_pkthdr.rcvif = ifp; 1895 rxr->rx_packets++; 1896 /* capture data for AIM */ 1897 rxr->bytes += sendmp->m_pkthdr.len; 1898 rxr->rx_bytes += sendmp->m_pkthdr.len; 1899 /* Process vlan info */ 1900 if ((rxr->vtag_strip) && 1901 (staterr & IXGBE_RXD_STAT_VP)) 1902 vtag = le16toh(cur->wb.upper.vlan); 1903 if (vtag) { 1904 sendmp->m_pkthdr.ether_vtag = vtag; 1905 sendmp->m_flags |= M_VLANTAG; 1906 } 1907 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1908 ixgbe_rx_checksum(staterr, sendmp, ptype); 1909 1910 /* 1911 * In case of multiqueue, we have RXCSUM.PCSD bit set 1912 * and never cleared. This means we have RSS hash 1913 * available to be used. 1914 */ 1915 if (adapter->num_queues > 1) { 1916 sendmp->m_pkthdr.flowid = 1917 le32toh(cur->wb.lower.hi_dword.rss); 1918 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1919 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1920 M_HASHTYPE_SET(sendmp, 1921 M_HASHTYPE_RSS_TCP_IPV4); 1922 break; 1923 case IXGBE_RXDADV_RSSTYPE_IPV4: 1924 M_HASHTYPE_SET(sendmp, 1925 M_HASHTYPE_RSS_IPV4); 1926 break; 1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1928 M_HASHTYPE_SET(sendmp, 1929 M_HASHTYPE_RSS_TCP_IPV6); 1930 break; 1931 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1932 M_HASHTYPE_SET(sendmp, 1933 M_HASHTYPE_RSS_IPV6_EX); 1934 break; 1935 case IXGBE_RXDADV_RSSTYPE_IPV6: 1936 M_HASHTYPE_SET(sendmp, 1937 M_HASHTYPE_RSS_IPV6); 1938 break; 1939 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1940 M_HASHTYPE_SET(sendmp, 1941 M_HASHTYPE_RSS_TCP_IPV6_EX); 1942 break; 1943 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1944 M_HASHTYPE_SET(sendmp, 1945 M_HASHTYPE_RSS_UDP_IPV4); 1946 break; 1947 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1948 M_HASHTYPE_SET(sendmp, 1949 M_HASHTYPE_RSS_UDP_IPV6); 1950 break; 1951 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1952 M_HASHTYPE_SET(sendmp, 1953 M_HASHTYPE_RSS_UDP_IPV6_EX); 1954 break; 1955 default: 1956 M_HASHTYPE_SET(sendmp, 1957 M_HASHTYPE_OPAQUE); 1958 } 1959 } else { 1960 sendmp->m_pkthdr.flowid = que->msix; 1961 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1962 } 1963 } 1964 next_desc: 1965 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1966 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1967 1968 /* Advance our pointers to the next descriptor. */ 1969 if (++i == rxr->num_desc) 1970 i = 0; 1971 1972 /* Now send to the stack or do LRO */ 1973 if (sendmp != NULL) { 1974 rxr->next_to_check = i; 1975 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1976 i = rxr->next_to_check; 1977 } 1978 1979 /* Every 8 descriptors we go to refresh mbufs */ 1980 if (processed == 8) { 1981 ixgbe_refresh_mbufs(rxr, i); 1982 processed = 0; 1983 } 1984 } 1985 1986 /* Refresh any remaining buf structs */ 1987 if (ixgbe_rx_unrefreshed(rxr)) 1988 ixgbe_refresh_mbufs(rxr, i); 1989 1990 rxr->next_to_check = i; 1991 1992 /* 1993 * Flush any outstanding LRO work 1994 */ 1995 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1996 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1997 tcp_lro_flush(lro, queued); 1998 } 1999 2000 IXGBE_RX_UNLOCK(rxr); 2001 2002 /* 2003 ** Still have cleaning to do? 2004 */ 2005 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 2006 return (TRUE); 2007 else 2008 return (FALSE); 2009 } 2010 2011 2012 /********************************************************************* 2013 * 2014 * Verify that the hardware indicated that the checksum is valid. 2015 * Inform the stack about the status of checksum so that stack 2016 * doesn't spend time verifying the checksum. 2017 * 2018 *********************************************************************/ 2019 static void 2020 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2021 { 2022 u16 status = (u16) staterr; 2023 u8 errors = (u8) (staterr >> 24); 2024 bool sctp = FALSE; 2025 2026 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2027 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2028 sctp = TRUE; 2029 2030 if (status & IXGBE_RXD_STAT_IPCS) { 2031 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2032 /* IP Checksum Good */ 2033 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 2034 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 2035 2036 } else 2037 mp->m_pkthdr.csum_flags = 0; 2038 } 2039 if (status & IXGBE_RXD_STAT_L4CS) { 2040 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2041 #if __FreeBSD_version >= 800000 2042 if (sctp) 2043 type = CSUM_SCTP_VALID; 2044 #endif 2045 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2046 mp->m_pkthdr.csum_flags |= type; 2047 if (!sctp) 2048 mp->m_pkthdr.csum_data = htons(0xffff); 2049 } 2050 } 2051 return; 2052 } 2053 2054 /******************************************************************** 2055 * Manage DMA'able memory. 2056 *******************************************************************/ 2057 static void 2058 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2059 { 2060 if (error) 2061 return; 2062 *(bus_addr_t *) arg = segs->ds_addr; 2063 return; 2064 } 2065 2066 int 2067 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2068 struct ixgbe_dma_alloc *dma, int mapflags) 2069 { 2070 device_t dev = adapter->dev; 2071 int r; 2072 2073 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2074 DBA_ALIGN, 0, /* alignment, bounds */ 2075 BUS_SPACE_MAXADDR, /* lowaddr */ 2076 BUS_SPACE_MAXADDR, /* highaddr */ 2077 NULL, NULL, /* filter, filterarg */ 2078 size, /* maxsize */ 2079 1, /* nsegments */ 2080 size, /* maxsegsize */ 2081 BUS_DMA_ALLOCNOW, /* flags */ 2082 NULL, /* lockfunc */ 2083 NULL, /* lockfuncarg */ 2084 &dma->dma_tag); 2085 if (r != 0) { 2086 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2087 "error %u\n", r); 2088 goto fail_0; 2089 } 2090 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2091 BUS_DMA_NOWAIT, &dma->dma_map); 2092 if (r != 0) { 2093 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2094 "error %u\n", r); 2095 goto fail_1; 2096 } 2097 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2098 size, 2099 ixgbe_dmamap_cb, 2100 &dma->dma_paddr, 2101 mapflags | BUS_DMA_NOWAIT); 2102 if (r != 0) { 2103 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2104 "error %u\n", r); 2105 goto fail_2; 2106 } 2107 dma->dma_size = size; 2108 return (0); 2109 fail_2: 2110 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2111 fail_1: 2112 bus_dma_tag_destroy(dma->dma_tag); 2113 fail_0: 2114 dma->dma_tag = NULL; 2115 return (r); 2116 } 2117 2118 void 2119 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2120 { 2121 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2122 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2123 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2124 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2125 bus_dma_tag_destroy(dma->dma_tag); 2126 } 2127 2128 2129 /********************************************************************* 2130 * 2131 * Allocate memory for the transmit and receive rings, and then 2132 * the descriptors associated with each, called only once at attach. 2133 * 2134 **********************************************************************/ 2135 int 2136 ixgbe_allocate_queues(struct adapter *adapter) 2137 { 2138 device_t dev = adapter->dev; 2139 struct ix_queue *que; 2140 struct tx_ring *txr; 2141 struct rx_ring *rxr; 2142 int rsize, tsize, error = IXGBE_SUCCESS; 2143 int txconf = 0, rxconf = 0; 2144 #ifdef PCI_IOV 2145 enum ixgbe_iov_mode iov_mode; 2146 #endif 2147 2148 /* First allocate the top level queue structs */ 2149 if (!(adapter->queues = 2150 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2151 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2152 device_printf(dev, "Unable to allocate queue memory\n"); 2153 error = ENOMEM; 2154 goto fail; 2155 } 2156 2157 /* First allocate the TX ring struct memory */ 2158 if (!(adapter->tx_rings = 2159 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2160 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2161 device_printf(dev, "Unable to allocate TX ring memory\n"); 2162 error = ENOMEM; 2163 goto tx_fail; 2164 } 2165 2166 /* Next allocate the RX */ 2167 if (!(adapter->rx_rings = 2168 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2169 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2170 device_printf(dev, "Unable to allocate RX ring memory\n"); 2171 error = ENOMEM; 2172 goto rx_fail; 2173 } 2174 2175 /* For the ring itself */ 2176 tsize = roundup2(adapter->num_tx_desc * 2177 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2178 2179 #ifdef PCI_IOV 2180 iov_mode = ixgbe_get_iov_mode(adapter); 2181 adapter->pool = ixgbe_max_vfs(iov_mode); 2182 #else 2183 adapter->pool = 0; 2184 #endif 2185 /* 2186 * Now set up the TX queues, txconf is needed to handle the 2187 * possibility that things fail midcourse and we need to 2188 * undo memory gracefully 2189 */ 2190 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2191 /* Set up some basics */ 2192 txr = &adapter->tx_rings[i]; 2193 txr->adapter = adapter; 2194 #ifdef PCI_IOV 2195 txr->me = ixgbe_pf_que_index(iov_mode, i); 2196 #else 2197 txr->me = i; 2198 #endif 2199 txr->num_desc = adapter->num_tx_desc; 2200 2201 /* Initialize the TX side lock */ 2202 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2203 device_get_nameunit(dev), txr->me); 2204 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2205 2206 if (ixgbe_dma_malloc(adapter, tsize, 2207 &txr->txdma, BUS_DMA_NOWAIT)) { 2208 device_printf(dev, 2209 "Unable to allocate TX Descriptor memory\n"); 2210 error = ENOMEM; 2211 goto err_tx_desc; 2212 } 2213 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2214 bzero((void *)txr->tx_base, tsize); 2215 2216 /* Now allocate transmit buffers for the ring */ 2217 if (ixgbe_allocate_transmit_buffers(txr)) { 2218 device_printf(dev, 2219 "Critical Failure setting up transmit buffers\n"); 2220 error = ENOMEM; 2221 goto err_tx_desc; 2222 } 2223 #ifndef IXGBE_LEGACY_TX 2224 /* Allocate a buf ring */ 2225 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2226 M_WAITOK, &txr->tx_mtx); 2227 if (txr->br == NULL) { 2228 device_printf(dev, 2229 "Critical Failure setting up buf ring\n"); 2230 error = ENOMEM; 2231 goto err_tx_desc; 2232 } 2233 #endif 2234 } 2235 2236 /* 2237 * Next the RX queues... 2238 */ 2239 rsize = roundup2(adapter->num_rx_desc * 2240 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2241 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2242 rxr = &adapter->rx_rings[i]; 2243 /* Set up some basics */ 2244 rxr->adapter = adapter; 2245 #ifdef PCI_IOV 2246 rxr->me = ixgbe_pf_que_index(iov_mode, i); 2247 #else 2248 rxr->me = i; 2249 #endif 2250 rxr->num_desc = adapter->num_rx_desc; 2251 2252 /* Initialize the RX side lock */ 2253 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2254 device_get_nameunit(dev), rxr->me); 2255 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2256 2257 if (ixgbe_dma_malloc(adapter, rsize, 2258 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2259 device_printf(dev, 2260 "Unable to allocate RxDescriptor memory\n"); 2261 error = ENOMEM; 2262 goto err_rx_desc; 2263 } 2264 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2265 bzero((void *)rxr->rx_base, rsize); 2266 2267 /* Allocate receive buffers for the ring*/ 2268 if (ixgbe_allocate_receive_buffers(rxr)) { 2269 device_printf(dev, 2270 "Critical Failure setting up receive buffers\n"); 2271 error = ENOMEM; 2272 goto err_rx_desc; 2273 } 2274 } 2275 2276 /* 2277 ** Finally set up the queue holding structs 2278 */ 2279 for (int i = 0; i < adapter->num_queues; i++) { 2280 que = &adapter->queues[i]; 2281 que->adapter = adapter; 2282 que->me = i; 2283 que->txr = &adapter->tx_rings[i]; 2284 que->rxr = &adapter->rx_rings[i]; 2285 } 2286 2287 return (0); 2288 2289 err_rx_desc: 2290 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2291 ixgbe_dma_free(adapter, &rxr->rxdma); 2292 err_tx_desc: 2293 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2294 ixgbe_dma_free(adapter, &txr->txdma); 2295 free(adapter->rx_rings, M_DEVBUF); 2296 rx_fail: 2297 free(adapter->tx_rings, M_DEVBUF); 2298 tx_fail: 2299 free(adapter->queues, M_DEVBUF); 2300 fail: 2301 return (error); 2302 } 2303