1 /****************************************************************************** 2 3 Copyright (c) 2001-2014, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /* Shared PCI config read/write */ 85 inline u16 86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 87 { 88 u16 value; 89 90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 91 reg, 2); 92 93 return (value); 94 } 95 96 inline void 97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 98 { 99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 100 reg, value, 2); 101 102 return; 103 } 104 105 /********************************************************************* 106 * Local Function prototypes 107 *********************************************************************/ 108 static void ixgbe_setup_transmit_ring(struct tx_ring *); 109 static void ixgbe_free_transmit_buffers(struct tx_ring *); 110 static int ixgbe_setup_receive_ring(struct rx_ring *); 111 static void ixgbe_free_receive_buffers(struct rx_ring *); 112 113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 #ifdef IXGBE_LEGACY_TX 128 /********************************************************************* 129 * Transmit entry point 130 * 131 * ixgbe_start is called by the stack to initiate a transmit. 132 * The driver will remain in this routine as long as there are 133 * packets to transmit and transmit resources are available. 134 * In case resources are not available stack is notified and 135 * the packet is requeued. 136 **********************************************************************/ 137 138 void 139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 140 { 141 struct mbuf *m_head; 142 struct adapter *adapter = txr->adapter; 143 144 IXGBE_TX_LOCK_ASSERT(txr); 145 146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 147 return; 148 if (!adapter->link_active) 149 return; 150 151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 153 break; 154 155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 156 if (m_head == NULL) 157 break; 158 159 if (ixgbe_xmit(txr, &m_head)) { 160 if (m_head != NULL) 161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 162 break; 163 } 164 /* Send a copy of the frame to the BPF listener */ 165 ETHER_BPF_MTAP(ifp, m_head); 166 } 167 return; 168 } 169 170 /* 171 * Legacy TX start - called by the stack, this 172 * always uses the first tx ring, and should 173 * not be used with multiqueue tx enabled. 174 */ 175 void 176 ixgbe_start(struct ifnet *ifp) 177 { 178 struct adapter *adapter = ifp->if_softc; 179 struct tx_ring *txr = adapter->tx_rings; 180 181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 182 IXGBE_TX_LOCK(txr); 183 ixgbe_start_locked(txr, ifp); 184 IXGBE_TX_UNLOCK(txr); 185 } 186 return; 187 } 188 189 #else /* ! IXGBE_LEGACY_TX */ 190 191 /* 192 ** Multiqueue Transmit driver 193 ** 194 */ 195 int 196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 197 { 198 struct adapter *adapter = ifp->if_softc; 199 struct ix_queue *que; 200 struct tx_ring *txr; 201 int i, err = 0; 202 #ifdef RSS 203 uint32_t bucket_id; 204 #endif 205 206 /* 207 * When doing RSS, map it to the same outbound queue 208 * as the incoming flow would be mapped to. 209 * 210 * If everything is setup correctly, it should be the 211 * same bucket that the current CPU we're on is. 212 */ 213 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 214 #ifdef RSS 215 if (rss_hash2bucket(m->m_pkthdr.flowid, 216 M_HASHTYPE_GET(m), &bucket_id) == 0) 217 /* TODO: spit out something if bucket_id > num_queues? */ 218 i = bucket_id % adapter->num_queues; 219 else 220 #endif 221 i = m->m_pkthdr.flowid % adapter->num_queues; 222 } else 223 i = curcpu % adapter->num_queues; 224 225 /* Check for a hung queue and pick alternative */ 226 if (((1 << i) & adapter->active_queues) == 0) 227 i = ffsl(adapter->active_queues); 228 229 txr = &adapter->tx_rings[i]; 230 que = &adapter->queues[i]; 231 232 err = drbr_enqueue(ifp, txr->br, m); 233 if (err) 234 return (err); 235 if (IXGBE_TX_TRYLOCK(txr)) { 236 ixgbe_mq_start_locked(ifp, txr); 237 IXGBE_TX_UNLOCK(txr); 238 } else 239 taskqueue_enqueue(que->tq, &txr->txq_task); 240 241 return (0); 242 } 243 244 int 245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 246 { 247 struct adapter *adapter = txr->adapter; 248 struct mbuf *next; 249 int enqueued = 0, err = 0; 250 251 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 252 adapter->link_active == 0) 253 return (ENETDOWN); 254 255 /* Process the queue */ 256 #if __FreeBSD_version < 901504 257 next = drbr_dequeue(ifp, txr->br); 258 while (next != NULL) { 259 if ((err = ixgbe_xmit(txr, &next)) != 0) { 260 if (next != NULL) 261 err = drbr_enqueue(ifp, txr->br, next); 262 #else 263 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 264 if ((err = ixgbe_xmit(txr, &next)) != 0) { 265 if (next == NULL) { 266 drbr_advance(ifp, txr->br); 267 } else { 268 drbr_putback(ifp, txr->br, next); 269 } 270 #endif 271 break; 272 } 273 #if __FreeBSD_version >= 901504 274 drbr_advance(ifp, txr->br); 275 #endif 276 enqueued++; 277 #if 0 // this is VF-only 278 #if __FreeBSD_version >= 1100036 279 if (next->m_flags & M_MCAST) 280 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 281 #endif 282 #endif 283 /* Send a copy of the frame to the BPF listener */ 284 ETHER_BPF_MTAP(ifp, next); 285 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 286 break; 287 #if __FreeBSD_version < 901504 288 next = drbr_dequeue(ifp, txr->br); 289 #endif 290 } 291 292 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 293 ixgbe_txeof(txr); 294 295 return (err); 296 } 297 298 /* 299 * Called from a taskqueue to drain queued transmit packets. 300 */ 301 void 302 ixgbe_deferred_mq_start(void *arg, int pending) 303 { 304 struct tx_ring *txr = arg; 305 struct adapter *adapter = txr->adapter; 306 struct ifnet *ifp = adapter->ifp; 307 308 IXGBE_TX_LOCK(txr); 309 if (!drbr_empty(ifp, txr->br)) 310 ixgbe_mq_start_locked(ifp, txr); 311 IXGBE_TX_UNLOCK(txr); 312 } 313 314 /* 315 ** Flush all ring buffers 316 */ 317 void 318 ixgbe_qflush(struct ifnet *ifp) 319 { 320 struct adapter *adapter = ifp->if_softc; 321 struct tx_ring *txr = adapter->tx_rings; 322 struct mbuf *m; 323 324 for (int i = 0; i < adapter->num_queues; i++, txr++) { 325 IXGBE_TX_LOCK(txr); 326 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 327 m_freem(m); 328 IXGBE_TX_UNLOCK(txr); 329 } 330 if_qflush(ifp); 331 } 332 #endif /* IXGBE_LEGACY_TX */ 333 334 335 /********************************************************************* 336 * 337 * This routine maps the mbufs to tx descriptors, allowing the 338 * TX engine to transmit the packets. 339 * - return 0 on success, positive on failure 340 * 341 **********************************************************************/ 342 343 static int 344 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 345 { 346 struct adapter *adapter = txr->adapter; 347 u32 olinfo_status = 0, cmd_type_len; 348 int i, j, error, nsegs; 349 int first; 350 bool remap = TRUE; 351 struct mbuf *m_head; 352 bus_dma_segment_t segs[adapter->num_segs]; 353 bus_dmamap_t map; 354 struct ixgbe_tx_buf *txbuf; 355 union ixgbe_adv_tx_desc *txd = NULL; 356 357 m_head = *m_headp; 358 359 /* Basic descriptor defines */ 360 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 361 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 362 363 if (m_head->m_flags & M_VLANTAG) 364 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 365 366 /* 367 * Important to capture the first descriptor 368 * used because it will contain the index of 369 * the one we tell the hardware to report back 370 */ 371 first = txr->next_avail_desc; 372 txbuf = &txr->tx_buffers[first]; 373 map = txbuf->map; 374 375 /* 376 * Map the packet for DMA. 377 */ 378 retry: 379 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 380 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 381 382 if (__predict_false(error)) { 383 struct mbuf *m; 384 385 switch (error) { 386 case EFBIG: 387 /* Try it again? - one try */ 388 if (remap == TRUE) { 389 remap = FALSE; 390 m = m_defrag(*m_headp, M_NOWAIT); 391 if (m == NULL) { 392 adapter->mbuf_defrag_failed++; 393 m_freem(*m_headp); 394 *m_headp = NULL; 395 return (ENOBUFS); 396 } 397 *m_headp = m; 398 goto retry; 399 } else 400 return (error); 401 case ENOMEM: 402 txr->no_tx_dma_setup++; 403 return (error); 404 default: 405 txr->no_tx_dma_setup++; 406 m_freem(*m_headp); 407 *m_headp = NULL; 408 return (error); 409 } 410 } 411 412 /* Make certain there are enough descriptors */ 413 if (nsegs > txr->tx_avail - 2) { 414 txr->no_desc_avail++; 415 bus_dmamap_unload(txr->txtag, map); 416 return (ENOBUFS); 417 } 418 m_head = *m_headp; 419 420 /* 421 ** Set up the appropriate offload context 422 ** this will consume the first descriptor 423 */ 424 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 425 if (__predict_false(error)) { 426 if (error == ENOBUFS) 427 *m_headp = NULL; 428 return (error); 429 } 430 431 #ifdef IXGBE_FDIR 432 /* Do the flow director magic */ 433 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 434 ++txr->atr_count; 435 if (txr->atr_count >= atr_sample_rate) { 436 ixgbe_atr(txr, m_head); 437 txr->atr_count = 0; 438 } 439 } 440 #endif 441 442 olinfo_status |= IXGBE_ADVTXD_CC; 443 i = txr->next_avail_desc; 444 for (j = 0; j < nsegs; j++) { 445 bus_size_t seglen; 446 bus_addr_t segaddr; 447 448 txbuf = &txr->tx_buffers[i]; 449 txd = &txr->tx_base[i]; 450 seglen = segs[j].ds_len; 451 segaddr = htole64(segs[j].ds_addr); 452 453 txd->read.buffer_addr = segaddr; 454 txd->read.cmd_type_len = htole32(txr->txd_cmd | 455 cmd_type_len |seglen); 456 txd->read.olinfo_status = htole32(olinfo_status); 457 458 if (++i == txr->num_desc) 459 i = 0; 460 } 461 462 txd->read.cmd_type_len |= 463 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 464 txr->tx_avail -= nsegs; 465 txr->next_avail_desc = i; 466 467 txbuf->m_head = m_head; 468 /* 469 ** Here we swap the map so the last descriptor, 470 ** which gets the completion interrupt has the 471 ** real map, and the first descriptor gets the 472 ** unused map from this descriptor. 473 */ 474 txr->tx_buffers[first].map = txbuf->map; 475 txbuf->map = map; 476 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 477 478 /* Set the EOP descriptor that will be marked done */ 479 txbuf = &txr->tx_buffers[first]; 480 txbuf->eop = txd; 481 482 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 483 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 484 /* 485 * Advance the Transmit Descriptor Tail (Tdt), this tells the 486 * hardware that this frame is available to transmit. 487 */ 488 ++txr->total_packets; 489 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 490 491 /* Mark queue as having work */ 492 if (txr->busy == 0) 493 txr->busy = 1; 494 495 return (0); 496 497 } 498 499 500 /********************************************************************* 501 * 502 * Allocate memory for tx_buffer structures. The tx_buffer stores all 503 * the information needed to transmit a packet on the wire. This is 504 * called only once at attach, setup is done every reset. 505 * 506 **********************************************************************/ 507 int 508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 509 { 510 struct adapter *adapter = txr->adapter; 511 device_t dev = adapter->dev; 512 struct ixgbe_tx_buf *txbuf; 513 int error, i; 514 515 /* 516 * Setup DMA descriptor areas. 517 */ 518 if ((error = bus_dma_tag_create( 519 bus_get_dma_tag(adapter->dev), /* parent */ 520 1, 0, /* alignment, bounds */ 521 BUS_SPACE_MAXADDR, /* lowaddr */ 522 BUS_SPACE_MAXADDR, /* highaddr */ 523 NULL, NULL, /* filter, filterarg */ 524 IXGBE_TSO_SIZE, /* maxsize */ 525 adapter->num_segs, /* nsegments */ 526 PAGE_SIZE, /* maxsegsize */ 527 0, /* flags */ 528 NULL, /* lockfunc */ 529 NULL, /* lockfuncarg */ 530 &txr->txtag))) { 531 device_printf(dev,"Unable to allocate TX DMA tag\n"); 532 goto fail; 533 } 534 535 if (!(txr->tx_buffers = 536 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 537 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 538 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 539 error = ENOMEM; 540 goto fail; 541 } 542 543 /* Create the descriptor buffer dma maps */ 544 txbuf = txr->tx_buffers; 545 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 546 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 547 if (error != 0) { 548 device_printf(dev, "Unable to create TX DMA map\n"); 549 goto fail; 550 } 551 } 552 553 return 0; 554 fail: 555 /* We free all, it handles case where we are in the middle */ 556 ixgbe_free_transmit_structures(adapter); 557 return (error); 558 } 559 560 /********************************************************************* 561 * 562 * Initialize a transmit ring. 563 * 564 **********************************************************************/ 565 static void 566 ixgbe_setup_transmit_ring(struct tx_ring *txr) 567 { 568 struct adapter *adapter = txr->adapter; 569 struct ixgbe_tx_buf *txbuf; 570 int i; 571 #ifdef DEV_NETMAP 572 struct netmap_adapter *na = NA(adapter->ifp); 573 struct netmap_slot *slot; 574 #endif /* DEV_NETMAP */ 575 576 /* Clear the old ring contents */ 577 IXGBE_TX_LOCK(txr); 578 #ifdef DEV_NETMAP 579 /* 580 * (under lock): if in netmap mode, do some consistency 581 * checks and set slot to entry 0 of the netmap ring. 582 */ 583 slot = netmap_reset(na, NR_TX, txr->me, 0); 584 #endif /* DEV_NETMAP */ 585 bzero((void *)txr->tx_base, 586 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 587 /* Reset indices */ 588 txr->next_avail_desc = 0; 589 txr->next_to_clean = 0; 590 591 /* Free any existing tx buffers. */ 592 txbuf = txr->tx_buffers; 593 for (i = 0; i < txr->num_desc; i++, txbuf++) { 594 if (txbuf->m_head != NULL) { 595 bus_dmamap_sync(txr->txtag, txbuf->map, 596 BUS_DMASYNC_POSTWRITE); 597 bus_dmamap_unload(txr->txtag, txbuf->map); 598 m_freem(txbuf->m_head); 599 txbuf->m_head = NULL; 600 } 601 #ifdef DEV_NETMAP 602 /* 603 * In netmap mode, set the map for the packet buffer. 604 * NOTE: Some drivers (not this one) also need to set 605 * the physical buffer address in the NIC ring. 606 * Slots in the netmap ring (indexed by "si") are 607 * kring->nkr_hwofs positions "ahead" wrt the 608 * corresponding slot in the NIC ring. In some drivers 609 * (not here) nkr_hwofs can be negative. Function 610 * netmap_idx_n2k() handles wraparounds properly. 611 */ 612 if (slot) { 613 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 614 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); 615 } 616 #endif /* DEV_NETMAP */ 617 /* Clear the EOP descriptor pointer */ 618 txbuf->eop = NULL; 619 } 620 621 #ifdef IXGBE_FDIR 622 /* Set the rate at which we sample packets */ 623 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 624 txr->atr_sample = atr_sample_rate; 625 #endif 626 627 /* Set number of descriptors available */ 628 txr->tx_avail = adapter->num_tx_desc; 629 630 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 631 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 632 IXGBE_TX_UNLOCK(txr); 633 } 634 635 /********************************************************************* 636 * 637 * Initialize all transmit rings. 638 * 639 **********************************************************************/ 640 int 641 ixgbe_setup_transmit_structures(struct adapter *adapter) 642 { 643 struct tx_ring *txr = adapter->tx_rings; 644 645 for (int i = 0; i < adapter->num_queues; i++, txr++) 646 ixgbe_setup_transmit_ring(txr); 647 648 return (0); 649 } 650 651 /********************************************************************* 652 * 653 * Free all transmit rings. 654 * 655 **********************************************************************/ 656 void 657 ixgbe_free_transmit_structures(struct adapter *adapter) 658 { 659 struct tx_ring *txr = adapter->tx_rings; 660 661 for (int i = 0; i < adapter->num_queues; i++, txr++) { 662 IXGBE_TX_LOCK(txr); 663 ixgbe_free_transmit_buffers(txr); 664 ixgbe_dma_free(adapter, &txr->txdma); 665 IXGBE_TX_UNLOCK(txr); 666 IXGBE_TX_LOCK_DESTROY(txr); 667 } 668 free(adapter->tx_rings, M_DEVBUF); 669 } 670 671 /********************************************************************* 672 * 673 * Free transmit ring related data structures. 674 * 675 **********************************************************************/ 676 static void 677 ixgbe_free_transmit_buffers(struct tx_ring *txr) 678 { 679 struct adapter *adapter = txr->adapter; 680 struct ixgbe_tx_buf *tx_buffer; 681 int i; 682 683 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 684 685 if (txr->tx_buffers == NULL) 686 return; 687 688 tx_buffer = txr->tx_buffers; 689 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 690 if (tx_buffer->m_head != NULL) { 691 bus_dmamap_sync(txr->txtag, tx_buffer->map, 692 BUS_DMASYNC_POSTWRITE); 693 bus_dmamap_unload(txr->txtag, 694 tx_buffer->map); 695 m_freem(tx_buffer->m_head); 696 tx_buffer->m_head = NULL; 697 if (tx_buffer->map != NULL) { 698 bus_dmamap_destroy(txr->txtag, 699 tx_buffer->map); 700 tx_buffer->map = NULL; 701 } 702 } else if (tx_buffer->map != NULL) { 703 bus_dmamap_unload(txr->txtag, 704 tx_buffer->map); 705 bus_dmamap_destroy(txr->txtag, 706 tx_buffer->map); 707 tx_buffer->map = NULL; 708 } 709 } 710 #ifdef IXGBE_LEGACY_TX 711 if (txr->br != NULL) 712 buf_ring_free(txr->br, M_DEVBUF); 713 #endif 714 if (txr->tx_buffers != NULL) { 715 free(txr->tx_buffers, M_DEVBUF); 716 txr->tx_buffers = NULL; 717 } 718 if (txr->txtag != NULL) { 719 bus_dma_tag_destroy(txr->txtag); 720 txr->txtag = NULL; 721 } 722 return; 723 } 724 725 /********************************************************************* 726 * 727 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 728 * 729 **********************************************************************/ 730 731 static int 732 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 733 u32 *cmd_type_len, u32 *olinfo_status) 734 { 735 struct ixgbe_adv_tx_context_desc *TXD; 736 struct ether_vlan_header *eh; 737 struct ip *ip; 738 struct ip6_hdr *ip6; 739 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 740 int ehdrlen, ip_hlen = 0; 741 u16 etype; 742 u8 ipproto = 0; 743 int offload = TRUE; 744 int ctxd = txr->next_avail_desc; 745 u16 vtag = 0; 746 747 /* First check if TSO is to be used */ 748 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 749 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 750 751 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 752 offload = FALSE; 753 754 /* Indicate the whole packet as payload when not doing TSO */ 755 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 756 757 /* Now ready a context descriptor */ 758 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 759 760 /* 761 ** In advanced descriptors the vlan tag must 762 ** be placed into the context descriptor. Hence 763 ** we need to make one even if not doing offloads. 764 */ 765 if (mp->m_flags & M_VLANTAG) { 766 vtag = htole16(mp->m_pkthdr.ether_vtag); 767 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 768 } 769 770 /* 771 * Determine where frame payload starts. 772 * Jump over vlan headers if already present, 773 * helpful for QinQ too. 774 */ 775 eh = mtod(mp, struct ether_vlan_header *); 776 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 777 etype = ntohs(eh->evl_proto); 778 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 779 } else { 780 etype = ntohs(eh->evl_encap_proto); 781 ehdrlen = ETHER_HDR_LEN; 782 } 783 784 /* Set the ether header length */ 785 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 786 787 if (offload == FALSE) 788 goto no_offloads; 789 790 switch (etype) { 791 case ETHERTYPE_IP: 792 ip = (struct ip *)(mp->m_data + ehdrlen); 793 ip_hlen = ip->ip_hl << 2; 794 ipproto = ip->ip_p; 795 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 796 break; 797 case ETHERTYPE_IPV6: 798 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 799 ip_hlen = sizeof(struct ip6_hdr); 800 /* XXX-BZ this will go badly in case of ext hdrs. */ 801 ipproto = ip6->ip6_nxt; 802 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 803 break; 804 default: 805 offload = FALSE; 806 break; 807 } 808 809 vlan_macip_lens |= ip_hlen; 810 811 switch (ipproto) { 812 case IPPROTO_TCP: 813 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 814 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 815 break; 816 817 case IPPROTO_UDP: 818 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 820 break; 821 822 #if __FreeBSD_version >= 800000 823 case IPPROTO_SCTP: 824 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 825 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 826 break; 827 #endif 828 default: 829 offload = FALSE; 830 break; 831 } 832 833 if (offload) /* For the TX descriptor setup */ 834 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 835 836 no_offloads: 837 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 838 839 /* Now copy bits into descriptor */ 840 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 841 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 842 TXD->seqnum_seed = htole32(0); 843 TXD->mss_l4len_idx = htole32(0); 844 845 /* We've consumed the first desc, adjust counters */ 846 if (++ctxd == txr->num_desc) 847 ctxd = 0; 848 txr->next_avail_desc = ctxd; 849 --txr->tx_avail; 850 851 return (0); 852 } 853 854 /********************************************************************** 855 * 856 * Setup work for hardware segmentation offload (TSO) on 857 * adapters using advanced tx descriptors 858 * 859 **********************************************************************/ 860 static int 861 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 862 u32 *cmd_type_len, u32 *olinfo_status) 863 { 864 struct ixgbe_adv_tx_context_desc *TXD; 865 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 866 u32 mss_l4len_idx = 0, paylen; 867 u16 vtag = 0, eh_type; 868 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 869 struct ether_vlan_header *eh; 870 #ifdef INET6 871 struct ip6_hdr *ip6; 872 #endif 873 #ifdef INET 874 struct ip *ip; 875 #endif 876 struct tcphdr *th; 877 878 879 /* 880 * Determine where frame payload starts. 881 * Jump over vlan headers if already present 882 */ 883 eh = mtod(mp, struct ether_vlan_header *); 884 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 885 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 886 eh_type = eh->evl_proto; 887 } else { 888 ehdrlen = ETHER_HDR_LEN; 889 eh_type = eh->evl_encap_proto; 890 } 891 892 switch (ntohs(eh_type)) { 893 #ifdef INET6 894 case ETHERTYPE_IPV6: 895 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 896 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 897 if (ip6->ip6_nxt != IPPROTO_TCP) 898 return (ENXIO); 899 ip_hlen = sizeof(struct ip6_hdr); 900 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 901 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 902 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 903 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 904 break; 905 #endif 906 #ifdef INET 907 case ETHERTYPE_IP: 908 ip = (struct ip *)(mp->m_data + ehdrlen); 909 if (ip->ip_p != IPPROTO_TCP) 910 return (ENXIO); 911 ip->ip_sum = 0; 912 ip_hlen = ip->ip_hl << 2; 913 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 914 th->th_sum = in_pseudo(ip->ip_src.s_addr, 915 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 916 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 917 /* Tell transmit desc to also do IPv4 checksum. */ 918 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 919 break; 920 #endif 921 default: 922 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 923 __func__, ntohs(eh_type)); 924 break; 925 } 926 927 ctxd = txr->next_avail_desc; 928 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 929 930 tcp_hlen = th->th_off << 2; 931 932 /* This is used in the transmit desc in encap */ 933 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 934 935 /* VLAN MACLEN IPLEN */ 936 if (mp->m_flags & M_VLANTAG) { 937 vtag = htole16(mp->m_pkthdr.ether_vtag); 938 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 939 } 940 941 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 942 vlan_macip_lens |= ip_hlen; 943 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 944 945 /* ADV DTYPE TUCMD */ 946 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 947 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 948 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 949 950 /* MSS L4LEN IDX */ 951 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 952 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 953 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 954 955 TXD->seqnum_seed = htole32(0); 956 957 if (++ctxd == txr->num_desc) 958 ctxd = 0; 959 960 txr->tx_avail--; 961 txr->next_avail_desc = ctxd; 962 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 963 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 964 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 965 ++txr->tso_tx; 966 return (0); 967 } 968 969 970 /********************************************************************** 971 * 972 * Examine each tx_buffer in the used queue. If the hardware is done 973 * processing the packet then free associated resources. The 974 * tx_buffer is put back on the free queue. 975 * 976 **********************************************************************/ 977 void 978 ixgbe_txeof(struct tx_ring *txr) 979 { 980 #ifdef DEV_NETMAP 981 struct adapter *adapter = txr->adapter; 982 struct ifnet *ifp = adapter->ifp; 983 #endif 984 u32 work, processed = 0; 985 u16 limit = txr->process_limit; 986 struct ixgbe_tx_buf *buf; 987 union ixgbe_adv_tx_desc *txd; 988 989 mtx_assert(&txr->tx_mtx, MA_OWNED); 990 991 #ifdef DEV_NETMAP 992 if (ifp->if_capenable & IFCAP_NETMAP) { 993 struct netmap_adapter *na = NA(ifp); 994 struct netmap_kring *kring = &na->tx_rings[txr->me]; 995 txd = txr->tx_base; 996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 997 BUS_DMASYNC_POSTREAD); 998 /* 999 * In netmap mode, all the work is done in the context 1000 * of the client thread. Interrupt handlers only wake up 1001 * clients, which may be sleeping on individual rings 1002 * or on a global resource for all rings. 1003 * To implement tx interrupt mitigation, we wake up the client 1004 * thread roughly every half ring, even if the NIC interrupts 1005 * more frequently. This is implemented as follows: 1006 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1007 * the slot that should wake up the thread (nkr_num_slots 1008 * means the user thread should not be woken up); 1009 * - the driver ignores tx interrupts unless netmap_mitigate=0 1010 * or the slot has the DD bit set. 1011 */ 1012 if (!netmap_mitigate || 1013 (kring->nr_kflags < kring->nkr_num_slots && 1014 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1015 netmap_tx_irq(ifp, txr->me); 1016 } 1017 return; 1018 } 1019 #endif /* DEV_NETMAP */ 1020 1021 if (txr->tx_avail == txr->num_desc) { 1022 txr->busy = 0; 1023 return; 1024 } 1025 1026 /* Get work starting point */ 1027 work = txr->next_to_clean; 1028 buf = &txr->tx_buffers[work]; 1029 txd = &txr->tx_base[work]; 1030 work -= txr->num_desc; /* The distance to ring end */ 1031 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1032 BUS_DMASYNC_POSTREAD); 1033 1034 do { 1035 union ixgbe_adv_tx_desc *eop= buf->eop; 1036 if (eop == NULL) /* No work */ 1037 break; 1038 1039 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1040 break; /* I/O not complete */ 1041 1042 if (buf->m_head) { 1043 txr->bytes += 1044 buf->m_head->m_pkthdr.len; 1045 bus_dmamap_sync(txr->txtag, 1046 buf->map, 1047 BUS_DMASYNC_POSTWRITE); 1048 bus_dmamap_unload(txr->txtag, 1049 buf->map); 1050 m_freem(buf->m_head); 1051 buf->m_head = NULL; 1052 buf->map = NULL; 1053 } 1054 buf->eop = NULL; 1055 ++txr->tx_avail; 1056 1057 /* We clean the range if multi segment */ 1058 while (txd != eop) { 1059 ++txd; 1060 ++buf; 1061 ++work; 1062 /* wrap the ring? */ 1063 if (__predict_false(!work)) { 1064 work -= txr->num_desc; 1065 buf = txr->tx_buffers; 1066 txd = txr->tx_base; 1067 } 1068 if (buf->m_head) { 1069 txr->bytes += 1070 buf->m_head->m_pkthdr.len; 1071 bus_dmamap_sync(txr->txtag, 1072 buf->map, 1073 BUS_DMASYNC_POSTWRITE); 1074 bus_dmamap_unload(txr->txtag, 1075 buf->map); 1076 m_freem(buf->m_head); 1077 buf->m_head = NULL; 1078 buf->map = NULL; 1079 } 1080 ++txr->tx_avail; 1081 buf->eop = NULL; 1082 1083 } 1084 ++txr->packets; 1085 ++processed; 1086 1087 /* Try the next packet */ 1088 ++txd; 1089 ++buf; 1090 ++work; 1091 /* reset with a wrap */ 1092 if (__predict_false(!work)) { 1093 work -= txr->num_desc; 1094 buf = txr->tx_buffers; 1095 txd = txr->tx_base; 1096 } 1097 prefetch(txd); 1098 } while (__predict_true(--limit)); 1099 1100 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1101 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1102 1103 work += txr->num_desc; 1104 txr->next_to_clean = work; 1105 1106 /* 1107 ** Queue Hang detection, we know there's 1108 ** work outstanding or the first return 1109 ** would have been taken, so increment busy 1110 ** if nothing managed to get cleaned, then 1111 ** in local_timer it will be checked and 1112 ** marked as HUNG if it exceeds a MAX attempt. 1113 */ 1114 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1115 ++txr->busy; 1116 /* 1117 ** If anything gets cleaned we reset state to 1, 1118 ** note this will turn off HUNG if its set. 1119 */ 1120 if (processed) 1121 txr->busy = 1; 1122 1123 if (txr->tx_avail == txr->num_desc) 1124 txr->busy = 0; 1125 1126 return; 1127 } 1128 1129 1130 #ifdef IXGBE_FDIR 1131 /* 1132 ** This routine parses packet headers so that Flow 1133 ** Director can make a hashed filter table entry 1134 ** allowing traffic flows to be identified and kept 1135 ** on the same cpu. This would be a performance 1136 ** hit, but we only do it at IXGBE_FDIR_RATE of 1137 ** packets. 1138 */ 1139 static void 1140 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1141 { 1142 struct adapter *adapter = txr->adapter; 1143 struct ix_queue *que; 1144 struct ip *ip; 1145 struct tcphdr *th; 1146 struct udphdr *uh; 1147 struct ether_vlan_header *eh; 1148 union ixgbe_atr_hash_dword input = {.dword = 0}; 1149 union ixgbe_atr_hash_dword common = {.dword = 0}; 1150 int ehdrlen, ip_hlen; 1151 u16 etype; 1152 1153 eh = mtod(mp, struct ether_vlan_header *); 1154 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1155 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1156 etype = eh->evl_proto; 1157 } else { 1158 ehdrlen = ETHER_HDR_LEN; 1159 etype = eh->evl_encap_proto; 1160 } 1161 1162 /* Only handling IPv4 */ 1163 if (etype != htons(ETHERTYPE_IP)) 1164 return; 1165 1166 ip = (struct ip *)(mp->m_data + ehdrlen); 1167 ip_hlen = ip->ip_hl << 2; 1168 1169 /* check if we're UDP or TCP */ 1170 switch (ip->ip_p) { 1171 case IPPROTO_TCP: 1172 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1173 /* src and dst are inverted */ 1174 common.port.dst ^= th->th_sport; 1175 common.port.src ^= th->th_dport; 1176 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1177 break; 1178 case IPPROTO_UDP: 1179 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1180 /* src and dst are inverted */ 1181 common.port.dst ^= uh->uh_sport; 1182 common.port.src ^= uh->uh_dport; 1183 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1184 break; 1185 default: 1186 return; 1187 } 1188 1189 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1190 if (mp->m_pkthdr.ether_vtag) 1191 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1192 else 1193 common.flex_bytes ^= etype; 1194 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1195 1196 que = &adapter->queues[txr->me]; 1197 /* 1198 ** This assumes the Rx queue and Tx 1199 ** queue are bound to the same CPU 1200 */ 1201 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1202 input, common, que->msix); 1203 } 1204 #endif /* IXGBE_FDIR */ 1205 1206 /* 1207 ** Used to detect a descriptor that has 1208 ** been merged by Hardware RSC. 1209 */ 1210 static inline u32 1211 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1212 { 1213 return (le32toh(rx->wb.lower.lo_dword.data) & 1214 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1215 } 1216 1217 /********************************************************************* 1218 * 1219 * Initialize Hardware RSC (LRO) feature on 82599 1220 * for an RX ring, this is toggled by the LRO capability 1221 * even though it is transparent to the stack. 1222 * 1223 * NOTE: since this HW feature only works with IPV4 and 1224 * our testing has shown soft LRO to be as effective 1225 * I have decided to disable this by default. 1226 * 1227 **********************************************************************/ 1228 static void 1229 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1230 { 1231 struct adapter *adapter = rxr->adapter; 1232 struct ixgbe_hw *hw = &adapter->hw; 1233 u32 rscctrl, rdrxctl; 1234 1235 /* If turning LRO/RSC off we need to disable it */ 1236 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1237 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1238 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1239 return; 1240 } 1241 1242 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1243 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1244 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1245 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1246 #endif /* DEV_NETMAP */ 1247 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1248 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1249 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1250 1251 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1252 rscctrl |= IXGBE_RSCCTL_RSCEN; 1253 /* 1254 ** Limit the total number of descriptors that 1255 ** can be combined, so it does not exceed 64K 1256 */ 1257 if (rxr->mbuf_sz == MCLBYTES) 1258 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1259 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1260 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1261 else if (rxr->mbuf_sz == MJUM9BYTES) 1262 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1263 else /* Using 16K cluster */ 1264 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1265 1266 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1267 1268 /* Enable TCP header recognition */ 1269 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1270 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1271 IXGBE_PSRTYPE_TCPHDR)); 1272 1273 /* Disable RSC for ACK packets */ 1274 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1275 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1276 1277 rxr->hw_rsc = TRUE; 1278 } 1279 /********************************************************************* 1280 * 1281 * Refresh mbuf buffers for RX descriptor rings 1282 * - now keeps its own state so discards due to resource 1283 * exhaustion are unnecessary, if an mbuf cannot be obtained 1284 * it just returns, keeping its placeholder, thus it can simply 1285 * be recalled to try again. 1286 * 1287 **********************************************************************/ 1288 static void 1289 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1290 { 1291 struct adapter *adapter = rxr->adapter; 1292 bus_dma_segment_t seg[1]; 1293 struct ixgbe_rx_buf *rxbuf; 1294 struct mbuf *mp; 1295 int i, j, nsegs, error; 1296 bool refreshed = FALSE; 1297 1298 i = j = rxr->next_to_refresh; 1299 /* Control the loop with one beyond */ 1300 if (++j == rxr->num_desc) 1301 j = 0; 1302 1303 while (j != limit) { 1304 rxbuf = &rxr->rx_buffers[i]; 1305 if (rxbuf->buf == NULL) { 1306 mp = m_getjcl(M_NOWAIT, MT_DATA, 1307 M_PKTHDR, rxr->mbuf_sz); 1308 if (mp == NULL) 1309 goto update; 1310 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1311 m_adj(mp, ETHER_ALIGN); 1312 } else 1313 mp = rxbuf->buf; 1314 1315 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1316 1317 /* If we're dealing with an mbuf that was copied rather 1318 * than replaced, there's no need to go through busdma. 1319 */ 1320 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1321 /* Get the memory mapping */ 1322 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1323 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1324 if (error != 0) { 1325 printf("Refresh mbufs: payload dmamap load" 1326 " failure - %d\n", error); 1327 m_free(mp); 1328 rxbuf->buf = NULL; 1329 goto update; 1330 } 1331 rxbuf->buf = mp; 1332 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1333 BUS_DMASYNC_PREREAD); 1334 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1335 htole64(seg[0].ds_addr); 1336 } else { 1337 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1338 rxbuf->flags &= ~IXGBE_RX_COPY; 1339 } 1340 1341 refreshed = TRUE; 1342 /* Next is precalculated */ 1343 i = j; 1344 rxr->next_to_refresh = i; 1345 if (++j == rxr->num_desc) 1346 j = 0; 1347 } 1348 update: 1349 if (refreshed) /* Update hardware tail index */ 1350 IXGBE_WRITE_REG(&adapter->hw, 1351 rxr->tail, rxr->next_to_refresh); 1352 return; 1353 } 1354 1355 /********************************************************************* 1356 * 1357 * Allocate memory for rx_buffer structures. Since we use one 1358 * rx_buffer per received packet, the maximum number of rx_buffer's 1359 * that we'll need is equal to the number of receive descriptors 1360 * that we've allocated. 1361 * 1362 **********************************************************************/ 1363 int 1364 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1365 { 1366 struct adapter *adapter = rxr->adapter; 1367 device_t dev = adapter->dev; 1368 struct ixgbe_rx_buf *rxbuf; 1369 int i, bsize, error; 1370 1371 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1372 if (!(rxr->rx_buffers = 1373 (struct ixgbe_rx_buf *) malloc(bsize, 1374 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1375 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1376 error = ENOMEM; 1377 goto fail; 1378 } 1379 1380 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1381 1, 0, /* alignment, bounds */ 1382 BUS_SPACE_MAXADDR, /* lowaddr */ 1383 BUS_SPACE_MAXADDR, /* highaddr */ 1384 NULL, NULL, /* filter, filterarg */ 1385 MJUM16BYTES, /* maxsize */ 1386 1, /* nsegments */ 1387 MJUM16BYTES, /* maxsegsize */ 1388 0, /* flags */ 1389 NULL, /* lockfunc */ 1390 NULL, /* lockfuncarg */ 1391 &rxr->ptag))) { 1392 device_printf(dev, "Unable to create RX DMA tag\n"); 1393 goto fail; 1394 } 1395 1396 for (i = 0; i < rxr->num_desc; i++, rxbuf++) { 1397 rxbuf = &rxr->rx_buffers[i]; 1398 error = bus_dmamap_create(rxr->ptag, 1399 BUS_DMA_NOWAIT, &rxbuf->pmap); 1400 if (error) { 1401 device_printf(dev, "Unable to create RX dma map\n"); 1402 goto fail; 1403 } 1404 } 1405 1406 return (0); 1407 1408 fail: 1409 /* Frees all, but can handle partial completion */ 1410 ixgbe_free_receive_structures(adapter); 1411 return (error); 1412 } 1413 1414 1415 static void 1416 ixgbe_free_receive_ring(struct rx_ring *rxr) 1417 { 1418 struct ixgbe_rx_buf *rxbuf; 1419 int i; 1420 1421 for (i = 0; i < rxr->num_desc; i++) { 1422 rxbuf = &rxr->rx_buffers[i]; 1423 if (rxbuf->buf != NULL) { 1424 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1425 BUS_DMASYNC_POSTREAD); 1426 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1427 rxbuf->buf->m_flags |= M_PKTHDR; 1428 m_freem(rxbuf->buf); 1429 rxbuf->buf = NULL; 1430 rxbuf->flags = 0; 1431 } 1432 } 1433 } 1434 1435 1436 /********************************************************************* 1437 * 1438 * Initialize a receive ring and its buffers. 1439 * 1440 **********************************************************************/ 1441 static int 1442 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1443 { 1444 struct adapter *adapter; 1445 struct ifnet *ifp; 1446 device_t dev; 1447 struct ixgbe_rx_buf *rxbuf; 1448 bus_dma_segment_t seg[1]; 1449 struct lro_ctrl *lro = &rxr->lro; 1450 int rsize, nsegs, error = 0; 1451 #ifdef DEV_NETMAP 1452 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1453 struct netmap_slot *slot; 1454 #endif /* DEV_NETMAP */ 1455 1456 adapter = rxr->adapter; 1457 ifp = adapter->ifp; 1458 dev = adapter->dev; 1459 1460 /* Clear the ring contents */ 1461 IXGBE_RX_LOCK(rxr); 1462 #ifdef DEV_NETMAP 1463 /* same as in ixgbe_setup_transmit_ring() */ 1464 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1465 #endif /* DEV_NETMAP */ 1466 rsize = roundup2(adapter->num_rx_desc * 1467 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1468 bzero((void *)rxr->rx_base, rsize); 1469 /* Cache the size */ 1470 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1471 1472 /* Free current RX buffer structs and their mbufs */ 1473 ixgbe_free_receive_ring(rxr); 1474 1475 /* Now replenish the mbufs */ 1476 for (int j = 0; j != rxr->num_desc; ++j) { 1477 struct mbuf *mp; 1478 1479 rxbuf = &rxr->rx_buffers[j]; 1480 #ifdef DEV_NETMAP 1481 /* 1482 * In netmap mode, fill the map and set the buffer 1483 * address in the NIC ring, considering the offset 1484 * between the netmap and NIC rings (see comment in 1485 * ixgbe_setup_transmit_ring() ). No need to allocate 1486 * an mbuf, so end the block with a continue; 1487 */ 1488 if (slot) { 1489 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1490 uint64_t paddr; 1491 void *addr; 1492 1493 addr = PNMB(na, slot + sj, &paddr); 1494 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1495 /* Update descriptor and the cached value */ 1496 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1497 rxbuf->addr = htole64(paddr); 1498 continue; 1499 } 1500 #endif /* DEV_NETMAP */ 1501 rxbuf->flags = 0; 1502 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1503 M_PKTHDR, adapter->rx_mbuf_sz); 1504 if (rxbuf->buf == NULL) { 1505 error = ENOBUFS; 1506 goto fail; 1507 } 1508 mp = rxbuf->buf; 1509 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1510 /* Get the memory mapping */ 1511 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1512 rxbuf->pmap, mp, seg, 1513 &nsegs, BUS_DMA_NOWAIT); 1514 if (error != 0) 1515 goto fail; 1516 bus_dmamap_sync(rxr->ptag, 1517 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1518 /* Update the descriptor and the cached value */ 1519 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1520 rxbuf->addr = htole64(seg[0].ds_addr); 1521 } 1522 1523 1524 /* Setup our descriptor indices */ 1525 rxr->next_to_check = 0; 1526 rxr->next_to_refresh = 0; 1527 rxr->lro_enabled = FALSE; 1528 rxr->rx_copies = 0; 1529 rxr->rx_bytes = 0; 1530 rxr->vtag_strip = FALSE; 1531 1532 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1533 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1534 1535 /* 1536 ** Now set up the LRO interface: 1537 */ 1538 if (ixgbe_rsc_enable) 1539 ixgbe_setup_hw_rsc(rxr); 1540 else if (ifp->if_capenable & IFCAP_LRO) { 1541 int err = tcp_lro_init(lro); 1542 if (err) { 1543 device_printf(dev, "LRO Initialization failed!\n"); 1544 goto fail; 1545 } 1546 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1547 rxr->lro_enabled = TRUE; 1548 lro->ifp = adapter->ifp; 1549 } 1550 1551 IXGBE_RX_UNLOCK(rxr); 1552 return (0); 1553 1554 fail: 1555 ixgbe_free_receive_ring(rxr); 1556 IXGBE_RX_UNLOCK(rxr); 1557 return (error); 1558 } 1559 1560 /********************************************************************* 1561 * 1562 * Initialize all receive rings. 1563 * 1564 **********************************************************************/ 1565 int 1566 ixgbe_setup_receive_structures(struct adapter *adapter) 1567 { 1568 struct rx_ring *rxr = adapter->rx_rings; 1569 int j; 1570 1571 for (j = 0; j < adapter->num_queues; j++, rxr++) 1572 if (ixgbe_setup_receive_ring(rxr)) 1573 goto fail; 1574 1575 return (0); 1576 fail: 1577 /* 1578 * Free RX buffers allocated so far, we will only handle 1579 * the rings that completed, the failing case will have 1580 * cleaned up for itself. 'j' failed, so its the terminus. 1581 */ 1582 for (int i = 0; i < j; ++i) { 1583 rxr = &adapter->rx_rings[i]; 1584 ixgbe_free_receive_ring(rxr); 1585 } 1586 1587 return (ENOBUFS); 1588 } 1589 1590 1591 /********************************************************************* 1592 * 1593 * Free all receive rings. 1594 * 1595 **********************************************************************/ 1596 void 1597 ixgbe_free_receive_structures(struct adapter *adapter) 1598 { 1599 struct rx_ring *rxr = adapter->rx_rings; 1600 1601 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1602 1603 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1604 struct lro_ctrl *lro = &rxr->lro; 1605 ixgbe_free_receive_buffers(rxr); 1606 /* Free LRO memory */ 1607 tcp_lro_free(lro); 1608 /* Free the ring memory as well */ 1609 ixgbe_dma_free(adapter, &rxr->rxdma); 1610 } 1611 1612 free(adapter->rx_rings, M_DEVBUF); 1613 } 1614 1615 1616 /********************************************************************* 1617 * 1618 * Free receive ring data structures 1619 * 1620 **********************************************************************/ 1621 void 1622 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1623 { 1624 struct adapter *adapter = rxr->adapter; 1625 struct ixgbe_rx_buf *rxbuf; 1626 1627 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1628 1629 /* Cleanup any existing buffers */ 1630 if (rxr->rx_buffers != NULL) { 1631 for (int i = 0; i < adapter->num_rx_desc; i++) { 1632 rxbuf = &rxr->rx_buffers[i]; 1633 if (rxbuf->buf != NULL) { 1634 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1635 BUS_DMASYNC_POSTREAD); 1636 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1637 rxbuf->buf->m_flags |= M_PKTHDR; 1638 m_freem(rxbuf->buf); 1639 } 1640 rxbuf->buf = NULL; 1641 if (rxbuf->pmap != NULL) { 1642 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1643 rxbuf->pmap = NULL; 1644 } 1645 } 1646 if (rxr->rx_buffers != NULL) { 1647 free(rxr->rx_buffers, M_DEVBUF); 1648 rxr->rx_buffers = NULL; 1649 } 1650 } 1651 1652 if (rxr->ptag != NULL) { 1653 bus_dma_tag_destroy(rxr->ptag); 1654 rxr->ptag = NULL; 1655 } 1656 1657 return; 1658 } 1659 1660 static __inline void 1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1662 { 1663 1664 /* 1665 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1666 * should be computed by hardware. Also it should not have VLAN tag in 1667 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1668 */ 1669 if (rxr->lro_enabled && 1670 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1671 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1672 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1673 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1674 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1675 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1676 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1677 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1678 /* 1679 * Send to the stack if: 1680 ** - LRO not enabled, or 1681 ** - no LRO resources, or 1682 ** - lro enqueue fails 1683 */ 1684 if (rxr->lro.lro_cnt != 0) 1685 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1686 return; 1687 } 1688 IXGBE_RX_UNLOCK(rxr); 1689 (*ifp->if_input)(ifp, m); 1690 IXGBE_RX_LOCK(rxr); 1691 } 1692 1693 static __inline void 1694 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1695 { 1696 struct ixgbe_rx_buf *rbuf; 1697 1698 rbuf = &rxr->rx_buffers[i]; 1699 1700 1701 /* 1702 ** With advanced descriptors the writeback 1703 ** clobbers the buffer addrs, so its easier 1704 ** to just free the existing mbufs and take 1705 ** the normal refresh path to get new buffers 1706 ** and mapping. 1707 */ 1708 1709 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1710 rbuf->fmp->m_flags |= M_PKTHDR; 1711 m_freem(rbuf->fmp); 1712 rbuf->fmp = NULL; 1713 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1714 } else if (rbuf->buf) { 1715 m_free(rbuf->buf); 1716 rbuf->buf = NULL; 1717 } 1718 1719 rbuf->flags = 0; 1720 1721 return; 1722 } 1723 1724 1725 /********************************************************************* 1726 * 1727 * This routine executes in interrupt context. It replenishes 1728 * the mbufs in the descriptor and sends data which has been 1729 * dma'ed into host memory to upper layer. 1730 * 1731 * We loop at most count times if count is > 0, or until done if 1732 * count < 0. 1733 * 1734 * Return TRUE for more work, FALSE for all clean. 1735 *********************************************************************/ 1736 bool 1737 ixgbe_rxeof(struct ix_queue *que) 1738 { 1739 struct adapter *adapter = que->adapter; 1740 struct rx_ring *rxr = que->rxr; 1741 struct ifnet *ifp = adapter->ifp; 1742 struct lro_ctrl *lro = &rxr->lro; 1743 struct lro_entry *queued; 1744 int i, nextp, processed = 0; 1745 u32 staterr = 0; 1746 u16 count = rxr->process_limit; 1747 union ixgbe_adv_rx_desc *cur; 1748 struct ixgbe_rx_buf *rbuf, *nbuf; 1749 u16 pkt_info; 1750 1751 IXGBE_RX_LOCK(rxr); 1752 1753 #ifdef DEV_NETMAP 1754 /* Same as the txeof routine: wakeup clients on intr. */ 1755 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1756 IXGBE_RX_UNLOCK(rxr); 1757 return (FALSE); 1758 } 1759 #endif /* DEV_NETMAP */ 1760 1761 for (i = rxr->next_to_check; count != 0;) { 1762 struct mbuf *sendmp, *mp; 1763 u32 rsc, ptype; 1764 u16 len; 1765 u16 vtag = 0; 1766 bool eop; 1767 1768 /* Sync the ring. */ 1769 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1770 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1771 1772 cur = &rxr->rx_base[i]; 1773 staterr = le32toh(cur->wb.upper.status_error); 1774 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1775 1776 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1777 break; 1778 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1779 break; 1780 1781 count--; 1782 sendmp = NULL; 1783 nbuf = NULL; 1784 rsc = 0; 1785 cur->wb.upper.status_error = 0; 1786 rbuf = &rxr->rx_buffers[i]; 1787 mp = rbuf->buf; 1788 1789 len = le16toh(cur->wb.upper.length); 1790 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1791 IXGBE_RXDADV_PKTTYPE_MASK; 1792 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1793 1794 /* Make sure bad packets are discarded */ 1795 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1796 #if 0 // VF-only 1797 #if __FreeBSD_version >= 1100036 1798 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1799 #endif 1800 #endif 1801 rxr->rx_discarded++; 1802 ixgbe_rx_discard(rxr, i); 1803 goto next_desc; 1804 } 1805 1806 /* 1807 ** On 82599 which supports a hardware 1808 ** LRO (called HW RSC), packets need 1809 ** not be fragmented across sequential 1810 ** descriptors, rather the next descriptor 1811 ** is indicated in bits of the descriptor. 1812 ** This also means that we might proceses 1813 ** more than one packet at a time, something 1814 ** that has never been true before, it 1815 ** required eliminating global chain pointers 1816 ** in favor of what we are doing here. -jfv 1817 */ 1818 if (!eop) { 1819 /* 1820 ** Figure out the next descriptor 1821 ** of this frame. 1822 */ 1823 if (rxr->hw_rsc == TRUE) { 1824 rsc = ixgbe_rsc_count(cur); 1825 rxr->rsc_num += (rsc - 1); 1826 } 1827 if (rsc) { /* Get hardware index */ 1828 nextp = ((staterr & 1829 IXGBE_RXDADV_NEXTP_MASK) >> 1830 IXGBE_RXDADV_NEXTP_SHIFT); 1831 } else { /* Just sequential */ 1832 nextp = i + 1; 1833 if (nextp == adapter->num_rx_desc) 1834 nextp = 0; 1835 } 1836 nbuf = &rxr->rx_buffers[nextp]; 1837 prefetch(nbuf); 1838 } 1839 /* 1840 ** Rather than using the fmp/lmp global pointers 1841 ** we now keep the head of a packet chain in the 1842 ** buffer struct and pass this along from one 1843 ** descriptor to the next, until we get EOP. 1844 */ 1845 mp->m_len = len; 1846 /* 1847 ** See if there is a stored head 1848 ** that determines what we are 1849 */ 1850 sendmp = rbuf->fmp; 1851 if (sendmp != NULL) { /* secondary frag */ 1852 rbuf->buf = rbuf->fmp = NULL; 1853 mp->m_flags &= ~M_PKTHDR; 1854 sendmp->m_pkthdr.len += mp->m_len; 1855 } else { 1856 /* 1857 * Optimize. This might be a small packet, 1858 * maybe just a TCP ACK. Do a fast copy that 1859 * is cache aligned into a new mbuf, and 1860 * leave the old mbuf+cluster for re-use. 1861 */ 1862 if (eop && len <= IXGBE_RX_COPY_LEN) { 1863 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1864 if (sendmp != NULL) { 1865 sendmp->m_data += 1866 IXGBE_RX_COPY_ALIGN; 1867 ixgbe_bcopy(mp->m_data, 1868 sendmp->m_data, len); 1869 sendmp->m_len = len; 1870 rxr->rx_copies++; 1871 rbuf->flags |= IXGBE_RX_COPY; 1872 } 1873 } 1874 if (sendmp == NULL) { 1875 rbuf->buf = rbuf->fmp = NULL; 1876 sendmp = mp; 1877 } 1878 1879 /* first desc of a non-ps chain */ 1880 sendmp->m_flags |= M_PKTHDR; 1881 sendmp->m_pkthdr.len = mp->m_len; 1882 } 1883 ++processed; 1884 1885 /* Pass the head pointer on */ 1886 if (eop == 0) { 1887 nbuf->fmp = sendmp; 1888 sendmp = NULL; 1889 mp->m_next = nbuf->buf; 1890 } else { /* Sending this frame */ 1891 sendmp->m_pkthdr.rcvif = ifp; 1892 rxr->rx_packets++; 1893 /* capture data for AIM */ 1894 rxr->bytes += sendmp->m_pkthdr.len; 1895 rxr->rx_bytes += sendmp->m_pkthdr.len; 1896 /* Process vlan info */ 1897 if ((rxr->vtag_strip) && 1898 (staterr & IXGBE_RXD_STAT_VP)) 1899 vtag = le16toh(cur->wb.upper.vlan); 1900 if (vtag) { 1901 sendmp->m_pkthdr.ether_vtag = vtag; 1902 sendmp->m_flags |= M_VLANTAG; 1903 } 1904 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1905 ixgbe_rx_checksum(staterr, sendmp, ptype); 1906 #if __FreeBSD_version >= 800000 1907 #ifdef RSS 1908 sendmp->m_pkthdr.flowid = 1909 le32toh(cur->wb.lower.hi_dword.rss); 1910 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1911 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1912 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4); 1913 break; 1914 case IXGBE_RXDADV_RSSTYPE_IPV4: 1915 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4); 1916 break; 1917 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1918 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6); 1919 break; 1920 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1921 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX); 1922 break; 1923 case IXGBE_RXDADV_RSSTYPE_IPV6: 1924 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6); 1925 break; 1926 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1927 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX); 1928 break; 1929 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1930 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4); 1931 break; 1932 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1933 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6); 1934 break; 1935 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1936 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX); 1937 break; 1938 default: 1939 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1940 } 1941 #else /* RSS */ 1942 sendmp->m_pkthdr.flowid = que->msix; 1943 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1944 #endif /* RSS */ 1945 #endif /* FreeBSD_version */ 1946 } 1947 next_desc: 1948 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1949 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1950 1951 /* Advance our pointers to the next descriptor. */ 1952 if (++i == rxr->num_desc) 1953 i = 0; 1954 1955 /* Now send to the stack or do LRO */ 1956 if (sendmp != NULL) { 1957 rxr->next_to_check = i; 1958 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1959 i = rxr->next_to_check; 1960 } 1961 1962 /* Every 8 descriptors we go to refresh mbufs */ 1963 if (processed == 8) { 1964 ixgbe_refresh_mbufs(rxr, i); 1965 processed = 0; 1966 } 1967 } 1968 1969 /* Refresh any remaining buf structs */ 1970 if (ixgbe_rx_unrefreshed(rxr)) 1971 ixgbe_refresh_mbufs(rxr, i); 1972 1973 rxr->next_to_check = i; 1974 1975 /* 1976 * Flush any outstanding LRO work 1977 */ 1978 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1979 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1980 tcp_lro_flush(lro, queued); 1981 } 1982 1983 IXGBE_RX_UNLOCK(rxr); 1984 1985 /* 1986 ** Still have cleaning to do? 1987 */ 1988 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 1989 return (TRUE); 1990 else 1991 return (FALSE); 1992 } 1993 1994 1995 /********************************************************************* 1996 * 1997 * Verify that the hardware indicated that the checksum is valid. 1998 * Inform the stack about the status of checksum so that stack 1999 * doesn't spend time verifying the checksum. 2000 * 2001 *********************************************************************/ 2002 static void 2003 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2004 { 2005 u16 status = (u16) staterr; 2006 u8 errors = (u8) (staterr >> 24); 2007 bool sctp = FALSE; 2008 2009 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2010 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2011 sctp = TRUE; 2012 2013 if (status & IXGBE_RXD_STAT_IPCS) { 2014 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2015 /* IP Checksum Good */ 2016 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 2017 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 2018 2019 } else 2020 mp->m_pkthdr.csum_flags = 0; 2021 } 2022 if (status & IXGBE_RXD_STAT_L4CS) { 2023 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2024 #if __FreeBSD_version >= 800000 2025 if (sctp) 2026 type = CSUM_SCTP_VALID; 2027 #endif 2028 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2029 mp->m_pkthdr.csum_flags |= type; 2030 if (!sctp) 2031 mp->m_pkthdr.csum_data = htons(0xffff); 2032 } 2033 } 2034 return; 2035 } 2036 2037 /******************************************************************** 2038 * Manage DMA'able memory. 2039 *******************************************************************/ 2040 static void 2041 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2042 { 2043 if (error) 2044 return; 2045 *(bus_addr_t *) arg = segs->ds_addr; 2046 return; 2047 } 2048 2049 int 2050 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2051 struct ixgbe_dma_alloc *dma, int mapflags) 2052 { 2053 device_t dev = adapter->dev; 2054 int r; 2055 2056 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2057 DBA_ALIGN, 0, /* alignment, bounds */ 2058 BUS_SPACE_MAXADDR, /* lowaddr */ 2059 BUS_SPACE_MAXADDR, /* highaddr */ 2060 NULL, NULL, /* filter, filterarg */ 2061 size, /* maxsize */ 2062 1, /* nsegments */ 2063 size, /* maxsegsize */ 2064 BUS_DMA_ALLOCNOW, /* flags */ 2065 NULL, /* lockfunc */ 2066 NULL, /* lockfuncarg */ 2067 &dma->dma_tag); 2068 if (r != 0) { 2069 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2070 "error %u\n", r); 2071 goto fail_0; 2072 } 2073 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2074 BUS_DMA_NOWAIT, &dma->dma_map); 2075 if (r != 0) { 2076 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2077 "error %u\n", r); 2078 goto fail_1; 2079 } 2080 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2081 size, 2082 ixgbe_dmamap_cb, 2083 &dma->dma_paddr, 2084 mapflags | BUS_DMA_NOWAIT); 2085 if (r != 0) { 2086 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2087 "error %u\n", r); 2088 goto fail_2; 2089 } 2090 dma->dma_size = size; 2091 return (0); 2092 fail_2: 2093 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2094 fail_1: 2095 bus_dma_tag_destroy(dma->dma_tag); 2096 fail_0: 2097 dma->dma_tag = NULL; 2098 return (r); 2099 } 2100 2101 void 2102 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2103 { 2104 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2105 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2106 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2107 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2108 bus_dma_tag_destroy(dma->dma_tag); 2109 } 2110 2111 2112 /********************************************************************* 2113 * 2114 * Allocate memory for the transmit and receive rings, and then 2115 * the descriptors associated with each, called only once at attach. 2116 * 2117 **********************************************************************/ 2118 int 2119 ixgbe_allocate_queues(struct adapter *adapter) 2120 { 2121 device_t dev = adapter->dev; 2122 struct ix_queue *que; 2123 struct tx_ring *txr; 2124 struct rx_ring *rxr; 2125 int rsize, tsize, error = IXGBE_SUCCESS; 2126 int txconf = 0, rxconf = 0; 2127 2128 /* First allocate the top level queue structs */ 2129 if (!(adapter->queues = 2130 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2131 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2132 device_printf(dev, "Unable to allocate queue memory\n"); 2133 error = ENOMEM; 2134 goto fail; 2135 } 2136 2137 /* First allocate the TX ring struct memory */ 2138 if (!(adapter->tx_rings = 2139 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2140 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2141 device_printf(dev, "Unable to allocate TX ring memory\n"); 2142 error = ENOMEM; 2143 goto tx_fail; 2144 } 2145 2146 /* Next allocate the RX */ 2147 if (!(adapter->rx_rings = 2148 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2149 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2150 device_printf(dev, "Unable to allocate RX ring memory\n"); 2151 error = ENOMEM; 2152 goto rx_fail; 2153 } 2154 2155 /* For the ring itself */ 2156 tsize = roundup2(adapter->num_tx_desc * 2157 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2158 2159 /* 2160 * Now set up the TX queues, txconf is needed to handle the 2161 * possibility that things fail midcourse and we need to 2162 * undo memory gracefully 2163 */ 2164 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2165 /* Set up some basics */ 2166 txr = &adapter->tx_rings[i]; 2167 txr->adapter = adapter; 2168 txr->me = i; 2169 txr->num_desc = adapter->num_tx_desc; 2170 2171 /* Initialize the TX side lock */ 2172 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2173 device_get_nameunit(dev), txr->me); 2174 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2175 2176 if (ixgbe_dma_malloc(adapter, tsize, 2177 &txr->txdma, BUS_DMA_NOWAIT)) { 2178 device_printf(dev, 2179 "Unable to allocate TX Descriptor memory\n"); 2180 error = ENOMEM; 2181 goto err_tx_desc; 2182 } 2183 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2184 bzero((void *)txr->tx_base, tsize); 2185 2186 /* Now allocate transmit buffers for the ring */ 2187 if (ixgbe_allocate_transmit_buffers(txr)) { 2188 device_printf(dev, 2189 "Critical Failure setting up transmit buffers\n"); 2190 error = ENOMEM; 2191 goto err_tx_desc; 2192 } 2193 #ifndef IXGBE_LEGACY_TX 2194 /* Allocate a buf ring */ 2195 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2196 M_WAITOK, &txr->tx_mtx); 2197 if (txr->br == NULL) { 2198 device_printf(dev, 2199 "Critical Failure setting up buf ring\n"); 2200 error = ENOMEM; 2201 goto err_tx_desc; 2202 } 2203 #endif 2204 } 2205 2206 /* 2207 * Next the RX queues... 2208 */ 2209 rsize = roundup2(adapter->num_rx_desc * 2210 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2211 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2212 rxr = &adapter->rx_rings[i]; 2213 /* Set up some basics */ 2214 rxr->adapter = adapter; 2215 rxr->me = i; 2216 rxr->num_desc = adapter->num_rx_desc; 2217 2218 /* Initialize the RX side lock */ 2219 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2220 device_get_nameunit(dev), rxr->me); 2221 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2222 2223 if (ixgbe_dma_malloc(adapter, rsize, 2224 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2225 device_printf(dev, 2226 "Unable to allocate RxDescriptor memory\n"); 2227 error = ENOMEM; 2228 goto err_rx_desc; 2229 } 2230 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2231 bzero((void *)rxr->rx_base, rsize); 2232 2233 /* Allocate receive buffers for the ring*/ 2234 if (ixgbe_allocate_receive_buffers(rxr)) { 2235 device_printf(dev, 2236 "Critical Failure setting up receive buffers\n"); 2237 error = ENOMEM; 2238 goto err_rx_desc; 2239 } 2240 } 2241 2242 /* 2243 ** Finally set up the queue holding structs 2244 */ 2245 for (int i = 0; i < adapter->num_queues; i++) { 2246 que = &adapter->queues[i]; 2247 que->adapter = adapter; 2248 que->me = i; 2249 que->txr = &adapter->tx_rings[i]; 2250 que->rxr = &adapter->rx_rings[i]; 2251 } 2252 2253 return (0); 2254 2255 err_rx_desc: 2256 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2257 ixgbe_dma_free(adapter, &rxr->rxdma); 2258 err_tx_desc: 2259 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2260 ixgbe_dma_free(adapter, &txr->txdma); 2261 free(adapter->rx_rings, M_DEVBUF); 2262 rx_fail: 2263 free(adapter->tx_rings, M_DEVBUF); 2264 tx_fail: 2265 free(adapter->queues, M_DEVBUF); 2266 fail: 2267 return (error); 2268 } 2269