1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /* Shared PCI config read/write */ 85 inline u16 86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 87 { 88 u16 value; 89 90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 91 reg, 2); 92 93 return (value); 94 } 95 96 inline void 97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 98 { 99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 100 reg, value, 2); 101 102 return; 103 } 104 105 /********************************************************************* 106 * Local Function prototypes 107 *********************************************************************/ 108 static void ixgbe_setup_transmit_ring(struct tx_ring *); 109 static void ixgbe_free_transmit_buffers(struct tx_ring *); 110 static int ixgbe_setup_receive_ring(struct rx_ring *); 111 static void ixgbe_free_receive_buffers(struct rx_ring *); 112 113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 #ifdef IXGBE_LEGACY_TX 128 /********************************************************************* 129 * Transmit entry point 130 * 131 * ixgbe_start is called by the stack to initiate a transmit. 132 * The driver will remain in this routine as long as there are 133 * packets to transmit and transmit resources are available. 134 * In case resources are not available stack is notified and 135 * the packet is requeued. 136 **********************************************************************/ 137 138 void 139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 140 { 141 struct mbuf *m_head; 142 struct adapter *adapter = txr->adapter; 143 144 IXGBE_TX_LOCK_ASSERT(txr); 145 146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 147 return; 148 if (!adapter->link_active) 149 return; 150 151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 153 break; 154 155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 156 if (m_head == NULL) 157 break; 158 159 if (ixgbe_xmit(txr, &m_head)) { 160 if (m_head != NULL) 161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 162 break; 163 } 164 /* Send a copy of the frame to the BPF listener */ 165 ETHER_BPF_MTAP(ifp, m_head); 166 } 167 return; 168 } 169 170 /* 171 * Legacy TX start - called by the stack, this 172 * always uses the first tx ring, and should 173 * not be used with multiqueue tx enabled. 174 */ 175 void 176 ixgbe_start(struct ifnet *ifp) 177 { 178 struct adapter *adapter = ifp->if_softc; 179 struct tx_ring *txr = adapter->tx_rings; 180 181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 182 IXGBE_TX_LOCK(txr); 183 ixgbe_start_locked(txr, ifp); 184 IXGBE_TX_UNLOCK(txr); 185 } 186 return; 187 } 188 189 #else /* ! IXGBE_LEGACY_TX */ 190 191 /* 192 ** Multiqueue Transmit driver 193 ** 194 */ 195 int 196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 197 { 198 struct adapter *adapter = ifp->if_softc; 199 struct ix_queue *que; 200 struct tx_ring *txr; 201 int i, err = 0; 202 #ifdef RSS 203 uint32_t bucket_id; 204 #endif 205 206 /* 207 * When doing RSS, map it to the same outbound queue 208 * as the incoming flow would be mapped to. 209 * 210 * If everything is setup correctly, it should be the 211 * same bucket that the current CPU we're on is. 212 */ 213 #if __FreeBSD_version < 1100054 214 if (m->m_flags & M_FLOWID) { 215 #else 216 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 217 #endif 218 #ifdef RSS 219 if (rss_hash2bucket(m->m_pkthdr.flowid, 220 M_HASHTYPE_GET(m), &bucket_id) == 0) 221 /* TODO: spit out something if bucket_id > num_queues? */ 222 i = bucket_id % adapter->num_queues; 223 else 224 #endif 225 i = m->m_pkthdr.flowid % adapter->num_queues; 226 } else 227 i = curcpu % adapter->num_queues; 228 229 /* Check for a hung queue and pick alternative */ 230 if (((1 << i) & adapter->active_queues) == 0) 231 i = ffsl(adapter->active_queues); 232 233 txr = &adapter->tx_rings[i]; 234 que = &adapter->queues[i]; 235 236 err = drbr_enqueue(ifp, txr->br, m); 237 if (err) 238 return (err); 239 if (IXGBE_TX_TRYLOCK(txr)) { 240 ixgbe_mq_start_locked(ifp, txr); 241 IXGBE_TX_UNLOCK(txr); 242 } else 243 taskqueue_enqueue(que->tq, &txr->txq_task); 244 245 return (0); 246 } 247 248 int 249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 250 { 251 struct adapter *adapter = txr->adapter; 252 struct mbuf *next; 253 int enqueued = 0, err = 0; 254 255 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 256 adapter->link_active == 0) 257 return (ENETDOWN); 258 259 /* Process the queue */ 260 #if __FreeBSD_version < 901504 261 next = drbr_dequeue(ifp, txr->br); 262 while (next != NULL) { 263 if ((err = ixgbe_xmit(txr, &next)) != 0) { 264 if (next != NULL) 265 err = drbr_enqueue(ifp, txr->br, next); 266 #else 267 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 268 if ((err = ixgbe_xmit(txr, &next)) != 0) { 269 if (next == NULL) { 270 drbr_advance(ifp, txr->br); 271 } else { 272 drbr_putback(ifp, txr->br, next); 273 } 274 #endif 275 break; 276 } 277 #if __FreeBSD_version >= 901504 278 drbr_advance(ifp, txr->br); 279 #endif 280 enqueued++; 281 #if 0 // this is VF-only 282 #if __FreeBSD_version >= 1100036 283 /* 284 * Since we're looking at the tx ring, we can check 285 * to see if we're a VF by examing our tail register 286 * address. 287 */ 288 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 289 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 290 #endif 291 #endif 292 /* Send a copy of the frame to the BPF listener */ 293 ETHER_BPF_MTAP(ifp, next); 294 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 295 break; 296 #if __FreeBSD_version < 901504 297 next = drbr_dequeue(ifp, txr->br); 298 #endif 299 } 300 301 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 302 ixgbe_txeof(txr); 303 304 return (err); 305 } 306 307 /* 308 * Called from a taskqueue to drain queued transmit packets. 309 */ 310 void 311 ixgbe_deferred_mq_start(void *arg, int pending) 312 { 313 struct tx_ring *txr = arg; 314 struct adapter *adapter = txr->adapter; 315 struct ifnet *ifp = adapter->ifp; 316 317 IXGBE_TX_LOCK(txr); 318 if (!drbr_empty(ifp, txr->br)) 319 ixgbe_mq_start_locked(ifp, txr); 320 IXGBE_TX_UNLOCK(txr); 321 } 322 323 /* 324 * Flush all ring buffers 325 */ 326 void 327 ixgbe_qflush(struct ifnet *ifp) 328 { 329 struct adapter *adapter = ifp->if_softc; 330 struct tx_ring *txr = adapter->tx_rings; 331 struct mbuf *m; 332 333 for (int i = 0; i < adapter->num_queues; i++, txr++) { 334 IXGBE_TX_LOCK(txr); 335 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 336 m_freem(m); 337 IXGBE_TX_UNLOCK(txr); 338 } 339 if_qflush(ifp); 340 } 341 #endif /* IXGBE_LEGACY_TX */ 342 343 344 /********************************************************************* 345 * 346 * This routine maps the mbufs to tx descriptors, allowing the 347 * TX engine to transmit the packets. 348 * - return 0 on success, positive on failure 349 * 350 **********************************************************************/ 351 352 static int 353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 354 { 355 struct adapter *adapter = txr->adapter; 356 u32 olinfo_status = 0, cmd_type_len; 357 int i, j, error, nsegs; 358 int first; 359 bool remap = TRUE; 360 struct mbuf *m_head; 361 bus_dma_segment_t segs[adapter->num_segs]; 362 bus_dmamap_t map; 363 struct ixgbe_tx_buf *txbuf; 364 union ixgbe_adv_tx_desc *txd = NULL; 365 366 m_head = *m_headp; 367 368 /* Basic descriptor defines */ 369 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 370 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 371 372 if (m_head->m_flags & M_VLANTAG) 373 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 374 375 /* 376 * Important to capture the first descriptor 377 * used because it will contain the index of 378 * the one we tell the hardware to report back 379 */ 380 first = txr->next_avail_desc; 381 txbuf = &txr->tx_buffers[first]; 382 map = txbuf->map; 383 384 /* 385 * Map the packet for DMA. 386 */ 387 retry: 388 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 389 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 390 391 if (__predict_false(error)) { 392 struct mbuf *m; 393 394 switch (error) { 395 case EFBIG: 396 /* Try it again? - one try */ 397 if (remap == TRUE) { 398 remap = FALSE; 399 /* 400 * XXX: m_defrag will choke on 401 * non-MCLBYTES-sized clusters 402 */ 403 m = m_defrag(*m_headp, M_NOWAIT); 404 if (m == NULL) { 405 adapter->mbuf_defrag_failed++; 406 m_freem(*m_headp); 407 *m_headp = NULL; 408 return (ENOBUFS); 409 } 410 *m_headp = m; 411 goto retry; 412 } else 413 return (error); 414 case ENOMEM: 415 txr->no_tx_dma_setup++; 416 return (error); 417 default: 418 txr->no_tx_dma_setup++; 419 m_freem(*m_headp); 420 *m_headp = NULL; 421 return (error); 422 } 423 } 424 425 /* Make certain there are enough descriptors */ 426 if (nsegs > txr->tx_avail - 2) { 427 txr->no_desc_avail++; 428 bus_dmamap_unload(txr->txtag, map); 429 return (ENOBUFS); 430 } 431 m_head = *m_headp; 432 433 /* 434 * Set up the appropriate offload context 435 * this will consume the first descriptor 436 */ 437 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 438 if (__predict_false(error)) { 439 if (error == ENOBUFS) 440 *m_headp = NULL; 441 return (error); 442 } 443 444 #ifdef IXGBE_FDIR 445 /* Do the flow director magic */ 446 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 447 ++txr->atr_count; 448 if (txr->atr_count >= atr_sample_rate) { 449 ixgbe_atr(txr, m_head); 450 txr->atr_count = 0; 451 } 452 } 453 #endif 454 455 i = txr->next_avail_desc; 456 for (j = 0; j < nsegs; j++) { 457 bus_size_t seglen; 458 bus_addr_t segaddr; 459 460 txbuf = &txr->tx_buffers[i]; 461 txd = &txr->tx_base[i]; 462 seglen = segs[j].ds_len; 463 segaddr = htole64(segs[j].ds_addr); 464 465 txd->read.buffer_addr = segaddr; 466 txd->read.cmd_type_len = htole32(txr->txd_cmd | 467 cmd_type_len |seglen); 468 txd->read.olinfo_status = htole32(olinfo_status); 469 470 if (++i == txr->num_desc) 471 i = 0; 472 } 473 474 txd->read.cmd_type_len |= 475 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 476 txr->tx_avail -= nsegs; 477 txr->next_avail_desc = i; 478 479 txbuf->m_head = m_head; 480 /* 481 * Here we swap the map so the last descriptor, 482 * which gets the completion interrupt has the 483 * real map, and the first descriptor gets the 484 * unused map from this descriptor. 485 */ 486 txr->tx_buffers[first].map = txbuf->map; 487 txbuf->map = map; 488 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 489 490 /* Set the EOP descriptor that will be marked done */ 491 txbuf = &txr->tx_buffers[first]; 492 txbuf->eop = txd; 493 494 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 495 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 496 /* 497 * Advance the Transmit Descriptor Tail (Tdt), this tells the 498 * hardware that this frame is available to transmit. 499 */ 500 ++txr->total_packets; 501 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 502 503 /* Mark queue as having work */ 504 if (txr->busy == 0) 505 txr->busy = 1; 506 507 return (0); 508 } 509 510 511 /********************************************************************* 512 * 513 * Allocate memory for tx_buffer structures. The tx_buffer stores all 514 * the information needed to transmit a packet on the wire. This is 515 * called only once at attach, setup is done every reset. 516 * 517 **********************************************************************/ 518 int 519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 520 { 521 struct adapter *adapter = txr->adapter; 522 device_t dev = adapter->dev; 523 struct ixgbe_tx_buf *txbuf; 524 int error, i; 525 526 /* 527 * Setup DMA descriptor areas. 528 */ 529 if ((error = bus_dma_tag_create( 530 bus_get_dma_tag(adapter->dev), /* parent */ 531 1, 0, /* alignment, bounds */ 532 BUS_SPACE_MAXADDR, /* lowaddr */ 533 BUS_SPACE_MAXADDR, /* highaddr */ 534 NULL, NULL, /* filter, filterarg */ 535 IXGBE_TSO_SIZE, /* maxsize */ 536 adapter->num_segs, /* nsegments */ 537 PAGE_SIZE, /* maxsegsize */ 538 0, /* flags */ 539 NULL, /* lockfunc */ 540 NULL, /* lockfuncarg */ 541 &txr->txtag))) { 542 device_printf(dev,"Unable to allocate TX DMA tag\n"); 543 goto fail; 544 } 545 546 if (!(txr->tx_buffers = 547 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 548 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 549 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 550 error = ENOMEM; 551 goto fail; 552 } 553 554 /* Create the descriptor buffer dma maps */ 555 txbuf = txr->tx_buffers; 556 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 557 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 558 if (error != 0) { 559 device_printf(dev, "Unable to create TX DMA map\n"); 560 goto fail; 561 } 562 } 563 564 return 0; 565 fail: 566 /* We free all, it handles case where we are in the middle */ 567 ixgbe_free_transmit_structures(adapter); 568 return (error); 569 } 570 571 /********************************************************************* 572 * 573 * Initialize a transmit ring. 574 * 575 **********************************************************************/ 576 static void 577 ixgbe_setup_transmit_ring(struct tx_ring *txr) 578 { 579 struct adapter *adapter = txr->adapter; 580 struct ixgbe_tx_buf *txbuf; 581 int i; 582 #ifdef DEV_NETMAP 583 struct netmap_adapter *na = NA(adapter->ifp); 584 struct netmap_slot *slot; 585 #endif /* DEV_NETMAP */ 586 587 /* Clear the old ring contents */ 588 IXGBE_TX_LOCK(txr); 589 #ifdef DEV_NETMAP 590 /* 591 * (under lock): if in netmap mode, do some consistency 592 * checks and set slot to entry 0 of the netmap ring. 593 */ 594 slot = netmap_reset(na, NR_TX, txr->me, 0); 595 #endif /* DEV_NETMAP */ 596 bzero((void *)txr->tx_base, 597 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 598 /* Reset indices */ 599 txr->next_avail_desc = 0; 600 txr->next_to_clean = 0; 601 602 /* Free any existing tx buffers. */ 603 txbuf = txr->tx_buffers; 604 for (i = 0; i < txr->num_desc; i++, txbuf++) { 605 if (txbuf->m_head != NULL) { 606 bus_dmamap_sync(txr->txtag, txbuf->map, 607 BUS_DMASYNC_POSTWRITE); 608 bus_dmamap_unload(txr->txtag, txbuf->map); 609 m_freem(txbuf->m_head); 610 txbuf->m_head = NULL; 611 } 612 #ifdef DEV_NETMAP 613 /* 614 * In netmap mode, set the map for the packet buffer. 615 * NOTE: Some drivers (not this one) also need to set 616 * the physical buffer address in the NIC ring. 617 * Slots in the netmap ring (indexed by "si") are 618 * kring->nkr_hwofs positions "ahead" wrt the 619 * corresponding slot in the NIC ring. In some drivers 620 * (not here) nkr_hwofs can be negative. Function 621 * netmap_idx_n2k() handles wraparounds properly. 622 */ 623 if (slot) { 624 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 625 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); 626 } 627 #endif /* DEV_NETMAP */ 628 /* Clear the EOP descriptor pointer */ 629 txbuf->eop = NULL; 630 } 631 632 #ifdef IXGBE_FDIR 633 /* Set the rate at which we sample packets */ 634 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 635 txr->atr_sample = atr_sample_rate; 636 #endif 637 638 /* Set number of descriptors available */ 639 txr->tx_avail = adapter->num_tx_desc; 640 641 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 642 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 643 IXGBE_TX_UNLOCK(txr); 644 } 645 646 /********************************************************************* 647 * 648 * Initialize all transmit rings. 649 * 650 **********************************************************************/ 651 int 652 ixgbe_setup_transmit_structures(struct adapter *adapter) 653 { 654 struct tx_ring *txr = adapter->tx_rings; 655 656 for (int i = 0; i < adapter->num_queues; i++, txr++) 657 ixgbe_setup_transmit_ring(txr); 658 659 return (0); 660 } 661 662 /********************************************************************* 663 * 664 * Free all transmit rings. 665 * 666 **********************************************************************/ 667 void 668 ixgbe_free_transmit_structures(struct adapter *adapter) 669 { 670 struct tx_ring *txr = adapter->tx_rings; 671 672 for (int i = 0; i < adapter->num_queues; i++, txr++) { 673 IXGBE_TX_LOCK(txr); 674 ixgbe_free_transmit_buffers(txr); 675 ixgbe_dma_free(adapter, &txr->txdma); 676 IXGBE_TX_UNLOCK(txr); 677 IXGBE_TX_LOCK_DESTROY(txr); 678 } 679 free(adapter->tx_rings, M_DEVBUF); 680 } 681 682 /********************************************************************* 683 * 684 * Free transmit ring related data structures. 685 * 686 **********************************************************************/ 687 static void 688 ixgbe_free_transmit_buffers(struct tx_ring *txr) 689 { 690 struct adapter *adapter = txr->adapter; 691 struct ixgbe_tx_buf *tx_buffer; 692 int i; 693 694 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 695 696 if (txr->tx_buffers == NULL) 697 return; 698 699 tx_buffer = txr->tx_buffers; 700 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 701 if (tx_buffer->m_head != NULL) { 702 bus_dmamap_sync(txr->txtag, tx_buffer->map, 703 BUS_DMASYNC_POSTWRITE); 704 bus_dmamap_unload(txr->txtag, 705 tx_buffer->map); 706 m_freem(tx_buffer->m_head); 707 tx_buffer->m_head = NULL; 708 if (tx_buffer->map != NULL) { 709 bus_dmamap_destroy(txr->txtag, 710 tx_buffer->map); 711 tx_buffer->map = NULL; 712 } 713 } else if (tx_buffer->map != NULL) { 714 bus_dmamap_unload(txr->txtag, 715 tx_buffer->map); 716 bus_dmamap_destroy(txr->txtag, 717 tx_buffer->map); 718 tx_buffer->map = NULL; 719 } 720 } 721 #ifdef IXGBE_LEGACY_TX 722 if (txr->br != NULL) 723 buf_ring_free(txr->br, M_DEVBUF); 724 #endif 725 if (txr->tx_buffers != NULL) { 726 free(txr->tx_buffers, M_DEVBUF); 727 txr->tx_buffers = NULL; 728 } 729 if (txr->txtag != NULL) { 730 bus_dma_tag_destroy(txr->txtag); 731 txr->txtag = NULL; 732 } 733 return; 734 } 735 736 /********************************************************************* 737 * 738 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 739 * 740 **********************************************************************/ 741 742 static int 743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 744 u32 *cmd_type_len, u32 *olinfo_status) 745 { 746 struct adapter *adapter = txr->adapter; 747 struct ixgbe_adv_tx_context_desc *TXD; 748 struct ether_vlan_header *eh; 749 struct ip *ip; 750 struct ip6_hdr *ip6; 751 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 752 int ehdrlen, ip_hlen = 0; 753 u16 etype; 754 u8 ipproto = 0; 755 int offload = TRUE; 756 int ctxd = txr->next_avail_desc; 757 u16 vtag = 0; 758 759 /* First check if TSO is to be used */ 760 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 761 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 762 763 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 764 offload = FALSE; 765 766 /* Indicate the whole packet as payload when not doing TSO */ 767 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 768 769 /* Now ready a context descriptor */ 770 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 771 772 /* 773 ** In advanced descriptors the vlan tag must 774 ** be placed into the context descriptor. Hence 775 ** we need to make one even if not doing offloads. 776 */ 777 if (mp->m_flags & M_VLANTAG) { 778 vtag = htole16(mp->m_pkthdr.ether_vtag); 779 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 780 } 781 else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 782 return (0); 783 784 /* 785 * Determine where frame payload starts. 786 * Jump over vlan headers if already present, 787 * helpful for QinQ too. 788 */ 789 eh = mtod(mp, struct ether_vlan_header *); 790 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 791 etype = ntohs(eh->evl_proto); 792 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 793 } else { 794 etype = ntohs(eh->evl_encap_proto); 795 ehdrlen = ETHER_HDR_LEN; 796 } 797 798 /* Set the ether header length */ 799 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 800 801 if (offload == FALSE) 802 goto no_offloads; 803 804 switch (etype) { 805 case ETHERTYPE_IP: 806 ip = (struct ip *)(mp->m_data + ehdrlen); 807 ip_hlen = ip->ip_hl << 2; 808 ipproto = ip->ip_p; 809 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 810 break; 811 case ETHERTYPE_IPV6: 812 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 813 ip_hlen = sizeof(struct ip6_hdr); 814 /* XXX-BZ this will go badly in case of ext hdrs. */ 815 ipproto = ip6->ip6_nxt; 816 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 817 break; 818 default: 819 offload = FALSE; 820 break; 821 } 822 823 vlan_macip_lens |= ip_hlen; 824 825 switch (ipproto) { 826 case IPPROTO_TCP: 827 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 828 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 829 break; 830 831 case IPPROTO_UDP: 832 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 833 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 834 break; 835 836 #if __FreeBSD_version >= 800000 837 case IPPROTO_SCTP: 838 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 839 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 840 break; 841 #endif 842 default: 843 offload = FALSE; 844 break; 845 } 846 847 if (offload) /* For the TX descriptor setup */ 848 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 849 850 no_offloads: 851 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 852 853 /* Now copy bits into descriptor */ 854 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 855 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 856 TXD->seqnum_seed = htole32(0); 857 TXD->mss_l4len_idx = htole32(0); 858 859 /* We've consumed the first desc, adjust counters */ 860 if (++ctxd == txr->num_desc) 861 ctxd = 0; 862 txr->next_avail_desc = ctxd; 863 --txr->tx_avail; 864 865 return (0); 866 } 867 868 /********************************************************************** 869 * 870 * Setup work for hardware segmentation offload (TSO) on 871 * adapters using advanced tx descriptors 872 * 873 **********************************************************************/ 874 static int 875 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 876 u32 *cmd_type_len, u32 *olinfo_status) 877 { 878 struct ixgbe_adv_tx_context_desc *TXD; 879 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 880 u32 mss_l4len_idx = 0, paylen; 881 u16 vtag = 0, eh_type; 882 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 883 struct ether_vlan_header *eh; 884 #ifdef INET6 885 struct ip6_hdr *ip6; 886 #endif 887 #ifdef INET 888 struct ip *ip; 889 #endif 890 struct tcphdr *th; 891 892 893 /* 894 * Determine where frame payload starts. 895 * Jump over vlan headers if already present 896 */ 897 eh = mtod(mp, struct ether_vlan_header *); 898 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 899 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 900 eh_type = eh->evl_proto; 901 } else { 902 ehdrlen = ETHER_HDR_LEN; 903 eh_type = eh->evl_encap_proto; 904 } 905 906 switch (ntohs(eh_type)) { 907 #ifdef INET6 908 case ETHERTYPE_IPV6: 909 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 910 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 911 if (ip6->ip6_nxt != IPPROTO_TCP) 912 return (ENXIO); 913 ip_hlen = sizeof(struct ip6_hdr); 914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 915 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 916 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 917 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 918 break; 919 #endif 920 #ifdef INET 921 case ETHERTYPE_IP: 922 ip = (struct ip *)(mp->m_data + ehdrlen); 923 if (ip->ip_p != IPPROTO_TCP) 924 return (ENXIO); 925 ip->ip_sum = 0; 926 ip_hlen = ip->ip_hl << 2; 927 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 928 th->th_sum = in_pseudo(ip->ip_src.s_addr, 929 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 930 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 931 /* Tell transmit desc to also do IPv4 checksum. */ 932 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 933 break; 934 #endif 935 default: 936 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 937 __func__, ntohs(eh_type)); 938 break; 939 } 940 941 ctxd = txr->next_avail_desc; 942 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 943 944 tcp_hlen = th->th_off << 2; 945 946 /* This is used in the transmit desc in encap */ 947 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 948 949 /* VLAN MACLEN IPLEN */ 950 if (mp->m_flags & M_VLANTAG) { 951 vtag = htole16(mp->m_pkthdr.ether_vtag); 952 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 953 } 954 955 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 956 vlan_macip_lens |= ip_hlen; 957 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 958 959 /* ADV DTYPE TUCMD */ 960 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 961 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 962 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 963 964 /* MSS L4LEN IDX */ 965 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 966 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 967 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 968 969 TXD->seqnum_seed = htole32(0); 970 971 if (++ctxd == txr->num_desc) 972 ctxd = 0; 973 974 txr->tx_avail--; 975 txr->next_avail_desc = ctxd; 976 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 977 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 978 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 979 ++txr->tso_tx; 980 return (0); 981 } 982 983 984 /********************************************************************** 985 * 986 * Examine each tx_buffer in the used queue. If the hardware is done 987 * processing the packet then free associated resources. The 988 * tx_buffer is put back on the free queue. 989 * 990 **********************************************************************/ 991 void 992 ixgbe_txeof(struct tx_ring *txr) 993 { 994 #ifdef DEV_NETMAP 995 struct adapter *adapter = txr->adapter; 996 struct ifnet *ifp = adapter->ifp; 997 #endif 998 u32 work, processed = 0; 999 u16 limit = txr->process_limit; 1000 struct ixgbe_tx_buf *buf; 1001 union ixgbe_adv_tx_desc *txd; 1002 1003 mtx_assert(&txr->tx_mtx, MA_OWNED); 1004 1005 #ifdef DEV_NETMAP 1006 if (ifp->if_capenable & IFCAP_NETMAP) { 1007 struct netmap_adapter *na = NA(ifp); 1008 struct netmap_kring *kring = &na->tx_rings[txr->me]; 1009 txd = txr->tx_base; 1010 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1011 BUS_DMASYNC_POSTREAD); 1012 /* 1013 * In netmap mode, all the work is done in the context 1014 * of the client thread. Interrupt handlers only wake up 1015 * clients, which may be sleeping on individual rings 1016 * or on a global resource for all rings. 1017 * To implement tx interrupt mitigation, we wake up the client 1018 * thread roughly every half ring, even if the NIC interrupts 1019 * more frequently. This is implemented as follows: 1020 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1021 * the slot that should wake up the thread (nkr_num_slots 1022 * means the user thread should not be woken up); 1023 * - the driver ignores tx interrupts unless netmap_mitigate=0 1024 * or the slot has the DD bit set. 1025 */ 1026 if (!netmap_mitigate || 1027 (kring->nr_kflags < kring->nkr_num_slots && 1028 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1029 netmap_tx_irq(ifp, txr->me); 1030 } 1031 return; 1032 } 1033 #endif /* DEV_NETMAP */ 1034 1035 if (txr->tx_avail == txr->num_desc) { 1036 txr->busy = 0; 1037 return; 1038 } 1039 1040 /* Get work starting point */ 1041 work = txr->next_to_clean; 1042 buf = &txr->tx_buffers[work]; 1043 txd = &txr->tx_base[work]; 1044 work -= txr->num_desc; /* The distance to ring end */ 1045 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1046 BUS_DMASYNC_POSTREAD); 1047 1048 do { 1049 union ixgbe_adv_tx_desc *eop= buf->eop; 1050 if (eop == NULL) /* No work */ 1051 break; 1052 1053 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1054 break; /* I/O not complete */ 1055 1056 if (buf->m_head) { 1057 txr->bytes += 1058 buf->m_head->m_pkthdr.len; 1059 bus_dmamap_sync(txr->txtag, 1060 buf->map, 1061 BUS_DMASYNC_POSTWRITE); 1062 bus_dmamap_unload(txr->txtag, 1063 buf->map); 1064 m_freem(buf->m_head); 1065 buf->m_head = NULL; 1066 } 1067 buf->eop = NULL; 1068 ++txr->tx_avail; 1069 1070 /* We clean the range if multi segment */ 1071 while (txd != eop) { 1072 ++txd; 1073 ++buf; 1074 ++work; 1075 /* wrap the ring? */ 1076 if (__predict_false(!work)) { 1077 work -= txr->num_desc; 1078 buf = txr->tx_buffers; 1079 txd = txr->tx_base; 1080 } 1081 if (buf->m_head) { 1082 txr->bytes += 1083 buf->m_head->m_pkthdr.len; 1084 bus_dmamap_sync(txr->txtag, 1085 buf->map, 1086 BUS_DMASYNC_POSTWRITE); 1087 bus_dmamap_unload(txr->txtag, 1088 buf->map); 1089 m_freem(buf->m_head); 1090 buf->m_head = NULL; 1091 } 1092 ++txr->tx_avail; 1093 buf->eop = NULL; 1094 1095 } 1096 ++txr->packets; 1097 ++processed; 1098 1099 /* Try the next packet */ 1100 ++txd; 1101 ++buf; 1102 ++work; 1103 /* reset with a wrap */ 1104 if (__predict_false(!work)) { 1105 work -= txr->num_desc; 1106 buf = txr->tx_buffers; 1107 txd = txr->tx_base; 1108 } 1109 prefetch(txd); 1110 } while (__predict_true(--limit)); 1111 1112 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1113 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1114 1115 work += txr->num_desc; 1116 txr->next_to_clean = work; 1117 1118 /* 1119 ** Queue Hang detection, we know there's 1120 ** work outstanding or the first return 1121 ** would have been taken, so increment busy 1122 ** if nothing managed to get cleaned, then 1123 ** in local_timer it will be checked and 1124 ** marked as HUNG if it exceeds a MAX attempt. 1125 */ 1126 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1127 ++txr->busy; 1128 /* 1129 ** If anything gets cleaned we reset state to 1, 1130 ** note this will turn off HUNG if its set. 1131 */ 1132 if (processed) 1133 txr->busy = 1; 1134 1135 if (txr->tx_avail == txr->num_desc) 1136 txr->busy = 0; 1137 1138 return; 1139 } 1140 1141 1142 #ifdef IXGBE_FDIR 1143 /* 1144 ** This routine parses packet headers so that Flow 1145 ** Director can make a hashed filter table entry 1146 ** allowing traffic flows to be identified and kept 1147 ** on the same cpu. This would be a performance 1148 ** hit, but we only do it at IXGBE_FDIR_RATE of 1149 ** packets. 1150 */ 1151 static void 1152 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1153 { 1154 struct adapter *adapter = txr->adapter; 1155 struct ix_queue *que; 1156 struct ip *ip; 1157 struct tcphdr *th; 1158 struct udphdr *uh; 1159 struct ether_vlan_header *eh; 1160 union ixgbe_atr_hash_dword input = {.dword = 0}; 1161 union ixgbe_atr_hash_dword common = {.dword = 0}; 1162 int ehdrlen, ip_hlen; 1163 u16 etype; 1164 1165 eh = mtod(mp, struct ether_vlan_header *); 1166 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1167 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1168 etype = eh->evl_proto; 1169 } else { 1170 ehdrlen = ETHER_HDR_LEN; 1171 etype = eh->evl_encap_proto; 1172 } 1173 1174 /* Only handling IPv4 */ 1175 if (etype != htons(ETHERTYPE_IP)) 1176 return; 1177 1178 ip = (struct ip *)(mp->m_data + ehdrlen); 1179 ip_hlen = ip->ip_hl << 2; 1180 1181 /* check if we're UDP or TCP */ 1182 switch (ip->ip_p) { 1183 case IPPROTO_TCP: 1184 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1185 /* src and dst are inverted */ 1186 common.port.dst ^= th->th_sport; 1187 common.port.src ^= th->th_dport; 1188 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1189 break; 1190 case IPPROTO_UDP: 1191 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1192 /* src and dst are inverted */ 1193 common.port.dst ^= uh->uh_sport; 1194 common.port.src ^= uh->uh_dport; 1195 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1196 break; 1197 default: 1198 return; 1199 } 1200 1201 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1202 if (mp->m_pkthdr.ether_vtag) 1203 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1204 else 1205 common.flex_bytes ^= etype; 1206 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1207 1208 que = &adapter->queues[txr->me]; 1209 /* 1210 ** This assumes the Rx queue and Tx 1211 ** queue are bound to the same CPU 1212 */ 1213 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1214 input, common, que->msix); 1215 } 1216 #endif /* IXGBE_FDIR */ 1217 1218 /* 1219 ** Used to detect a descriptor that has 1220 ** been merged by Hardware RSC. 1221 */ 1222 static inline u32 1223 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1224 { 1225 return (le32toh(rx->wb.lower.lo_dword.data) & 1226 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1227 } 1228 1229 /********************************************************************* 1230 * 1231 * Initialize Hardware RSC (LRO) feature on 82599 1232 * for an RX ring, this is toggled by the LRO capability 1233 * even though it is transparent to the stack. 1234 * 1235 * NOTE: since this HW feature only works with IPV4 and 1236 * our testing has shown soft LRO to be as effective 1237 * I have decided to disable this by default. 1238 * 1239 **********************************************************************/ 1240 static void 1241 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1242 { 1243 struct adapter *adapter = rxr->adapter; 1244 struct ixgbe_hw *hw = &adapter->hw; 1245 u32 rscctrl, rdrxctl; 1246 1247 /* If turning LRO/RSC off we need to disable it */ 1248 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1249 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1250 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1251 return; 1252 } 1253 1254 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1255 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1256 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1257 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1258 #endif /* DEV_NETMAP */ 1259 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1260 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1261 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1262 1263 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1264 rscctrl |= IXGBE_RSCCTL_RSCEN; 1265 /* 1266 ** Limit the total number of descriptors that 1267 ** can be combined, so it does not exceed 64K 1268 */ 1269 if (rxr->mbuf_sz == MCLBYTES) 1270 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1271 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1272 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1273 else if (rxr->mbuf_sz == MJUM9BYTES) 1274 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1275 else /* Using 16K cluster */ 1276 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1277 1278 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1279 1280 /* Enable TCP header recognition */ 1281 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1282 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1283 IXGBE_PSRTYPE_TCPHDR)); 1284 1285 /* Disable RSC for ACK packets */ 1286 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1287 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1288 1289 rxr->hw_rsc = TRUE; 1290 } 1291 /********************************************************************* 1292 * 1293 * Refresh mbuf buffers for RX descriptor rings 1294 * - now keeps its own state so discards due to resource 1295 * exhaustion are unnecessary, if an mbuf cannot be obtained 1296 * it just returns, keeping its placeholder, thus it can simply 1297 * be recalled to try again. 1298 * 1299 **********************************************************************/ 1300 static void 1301 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1302 { 1303 struct adapter *adapter = rxr->adapter; 1304 bus_dma_segment_t seg[1]; 1305 struct ixgbe_rx_buf *rxbuf; 1306 struct mbuf *mp; 1307 int i, j, nsegs, error; 1308 bool refreshed = FALSE; 1309 1310 i = j = rxr->next_to_refresh; 1311 /* Control the loop with one beyond */ 1312 if (++j == rxr->num_desc) 1313 j = 0; 1314 1315 while (j != limit) { 1316 rxbuf = &rxr->rx_buffers[i]; 1317 if (rxbuf->buf == NULL) { 1318 mp = m_getjcl(M_NOWAIT, MT_DATA, 1319 M_PKTHDR, rxr->mbuf_sz); 1320 if (mp == NULL) 1321 goto update; 1322 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1323 m_adj(mp, ETHER_ALIGN); 1324 } else 1325 mp = rxbuf->buf; 1326 1327 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1328 1329 /* If we're dealing with an mbuf that was copied rather 1330 * than replaced, there's no need to go through busdma. 1331 */ 1332 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1333 /* Get the memory mapping */ 1334 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1335 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1336 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1337 if (error != 0) { 1338 printf("Refresh mbufs: payload dmamap load" 1339 " failure - %d\n", error); 1340 m_free(mp); 1341 rxbuf->buf = NULL; 1342 goto update; 1343 } 1344 rxbuf->buf = mp; 1345 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1346 BUS_DMASYNC_PREREAD); 1347 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1348 htole64(seg[0].ds_addr); 1349 } else { 1350 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1351 rxbuf->flags &= ~IXGBE_RX_COPY; 1352 } 1353 1354 refreshed = TRUE; 1355 /* Next is precalculated */ 1356 i = j; 1357 rxr->next_to_refresh = i; 1358 if (++j == rxr->num_desc) 1359 j = 0; 1360 } 1361 update: 1362 if (refreshed) /* Update hardware tail index */ 1363 IXGBE_WRITE_REG(&adapter->hw, 1364 rxr->tail, rxr->next_to_refresh); 1365 return; 1366 } 1367 1368 /********************************************************************* 1369 * 1370 * Allocate memory for rx_buffer structures. Since we use one 1371 * rx_buffer per received packet, the maximum number of rx_buffer's 1372 * that we'll need is equal to the number of receive descriptors 1373 * that we've allocated. 1374 * 1375 **********************************************************************/ 1376 int 1377 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1378 { 1379 struct adapter *adapter = rxr->adapter; 1380 device_t dev = adapter->dev; 1381 struct ixgbe_rx_buf *rxbuf; 1382 int i, bsize, error; 1383 1384 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1385 if (!(rxr->rx_buffers = 1386 (struct ixgbe_rx_buf *) malloc(bsize, 1387 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1388 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1389 error = ENOMEM; 1390 goto fail; 1391 } 1392 1393 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1394 1, 0, /* alignment, bounds */ 1395 BUS_SPACE_MAXADDR, /* lowaddr */ 1396 BUS_SPACE_MAXADDR, /* highaddr */ 1397 NULL, NULL, /* filter, filterarg */ 1398 MJUM16BYTES, /* maxsize */ 1399 1, /* nsegments */ 1400 MJUM16BYTES, /* maxsegsize */ 1401 0, /* flags */ 1402 NULL, /* lockfunc */ 1403 NULL, /* lockfuncarg */ 1404 &rxr->ptag))) { 1405 device_printf(dev, "Unable to create RX DMA tag\n"); 1406 goto fail; 1407 } 1408 1409 for (i = 0; i < rxr->num_desc; i++, rxbuf++) { 1410 rxbuf = &rxr->rx_buffers[i]; 1411 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1412 if (error) { 1413 device_printf(dev, "Unable to create RX dma map\n"); 1414 goto fail; 1415 } 1416 } 1417 1418 return (0); 1419 1420 fail: 1421 /* Frees all, but can handle partial completion */ 1422 ixgbe_free_receive_structures(adapter); 1423 return (error); 1424 } 1425 1426 1427 static void 1428 ixgbe_free_receive_ring(struct rx_ring *rxr) 1429 { 1430 struct ixgbe_rx_buf *rxbuf; 1431 int i; 1432 1433 for (i = 0; i < rxr->num_desc; i++) { 1434 rxbuf = &rxr->rx_buffers[i]; 1435 if (rxbuf->buf != NULL) { 1436 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1437 BUS_DMASYNC_POSTREAD); 1438 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1439 rxbuf->buf->m_flags |= M_PKTHDR; 1440 m_freem(rxbuf->buf); 1441 rxbuf->buf = NULL; 1442 rxbuf->flags = 0; 1443 } 1444 } 1445 } 1446 1447 1448 /********************************************************************* 1449 * 1450 * Initialize a receive ring and its buffers. 1451 * 1452 **********************************************************************/ 1453 static int 1454 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1455 { 1456 struct adapter *adapter; 1457 struct ifnet *ifp; 1458 device_t dev; 1459 struct ixgbe_rx_buf *rxbuf; 1460 bus_dma_segment_t seg[1]; 1461 struct lro_ctrl *lro = &rxr->lro; 1462 int rsize, nsegs, error = 0; 1463 #ifdef DEV_NETMAP 1464 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1465 struct netmap_slot *slot; 1466 #endif /* DEV_NETMAP */ 1467 1468 adapter = rxr->adapter; 1469 ifp = adapter->ifp; 1470 dev = adapter->dev; 1471 1472 /* Clear the ring contents */ 1473 IXGBE_RX_LOCK(rxr); 1474 #ifdef DEV_NETMAP 1475 /* same as in ixgbe_setup_transmit_ring() */ 1476 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1477 #endif /* DEV_NETMAP */ 1478 rsize = roundup2(adapter->num_rx_desc * 1479 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1480 bzero((void *)rxr->rx_base, rsize); 1481 /* Cache the size */ 1482 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1483 1484 /* Free current RX buffer structs and their mbufs */ 1485 ixgbe_free_receive_ring(rxr); 1486 1487 /* Now replenish the mbufs */ 1488 for (int j = 0; j != rxr->num_desc; ++j) { 1489 struct mbuf *mp; 1490 1491 rxbuf = &rxr->rx_buffers[j]; 1492 #ifdef DEV_NETMAP 1493 /* 1494 * In netmap mode, fill the map and set the buffer 1495 * address in the NIC ring, considering the offset 1496 * between the netmap and NIC rings (see comment in 1497 * ixgbe_setup_transmit_ring() ). No need to allocate 1498 * an mbuf, so end the block with a continue; 1499 */ 1500 if (slot) { 1501 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1502 uint64_t paddr; 1503 void *addr; 1504 1505 addr = PNMB(na, slot + sj, &paddr); 1506 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1507 /* Update descriptor and the cached value */ 1508 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1509 rxbuf->addr = htole64(paddr); 1510 continue; 1511 } 1512 #endif /* DEV_NETMAP */ 1513 rxbuf->flags = 0; 1514 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1515 M_PKTHDR, adapter->rx_mbuf_sz); 1516 if (rxbuf->buf == NULL) { 1517 error = ENOBUFS; 1518 goto fail; 1519 } 1520 mp = rxbuf->buf; 1521 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1522 /* Get the memory mapping */ 1523 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1524 rxbuf->pmap, mp, seg, 1525 &nsegs, BUS_DMA_NOWAIT); 1526 if (error != 0) 1527 goto fail; 1528 bus_dmamap_sync(rxr->ptag, 1529 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1530 /* Update the descriptor and the cached value */ 1531 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1532 rxbuf->addr = htole64(seg[0].ds_addr); 1533 } 1534 1535 1536 /* Setup our descriptor indices */ 1537 rxr->next_to_check = 0; 1538 rxr->next_to_refresh = 0; 1539 rxr->lro_enabled = FALSE; 1540 rxr->rx_copies = 0; 1541 rxr->rx_bytes = 0; 1542 rxr->vtag_strip = FALSE; 1543 1544 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1545 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1546 1547 /* 1548 ** Now set up the LRO interface: 1549 */ 1550 if (ixgbe_rsc_enable) 1551 ixgbe_setup_hw_rsc(rxr); 1552 else if (ifp->if_capenable & IFCAP_LRO) { 1553 int err = tcp_lro_init(lro); 1554 if (err) { 1555 device_printf(dev, "LRO Initialization failed!\n"); 1556 goto fail; 1557 } 1558 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1559 rxr->lro_enabled = TRUE; 1560 lro->ifp = adapter->ifp; 1561 } 1562 1563 IXGBE_RX_UNLOCK(rxr); 1564 return (0); 1565 1566 fail: 1567 ixgbe_free_receive_ring(rxr); 1568 IXGBE_RX_UNLOCK(rxr); 1569 return (error); 1570 } 1571 1572 /********************************************************************* 1573 * 1574 * Initialize all receive rings. 1575 * 1576 **********************************************************************/ 1577 int 1578 ixgbe_setup_receive_structures(struct adapter *adapter) 1579 { 1580 struct rx_ring *rxr = adapter->rx_rings; 1581 int j; 1582 1583 for (j = 0; j < adapter->num_queues; j++, rxr++) 1584 if (ixgbe_setup_receive_ring(rxr)) 1585 goto fail; 1586 1587 return (0); 1588 fail: 1589 /* 1590 * Free RX buffers allocated so far, we will only handle 1591 * the rings that completed, the failing case will have 1592 * cleaned up for itself. 'j' failed, so its the terminus. 1593 */ 1594 for (int i = 0; i < j; ++i) { 1595 rxr = &adapter->rx_rings[i]; 1596 ixgbe_free_receive_ring(rxr); 1597 } 1598 1599 return (ENOBUFS); 1600 } 1601 1602 1603 /********************************************************************* 1604 * 1605 * Free all receive rings. 1606 * 1607 **********************************************************************/ 1608 void 1609 ixgbe_free_receive_structures(struct adapter *adapter) 1610 { 1611 struct rx_ring *rxr = adapter->rx_rings; 1612 1613 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1614 1615 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1616 struct lro_ctrl *lro = &rxr->lro; 1617 ixgbe_free_receive_buffers(rxr); 1618 /* Free LRO memory */ 1619 tcp_lro_free(lro); 1620 /* Free the ring memory as well */ 1621 ixgbe_dma_free(adapter, &rxr->rxdma); 1622 } 1623 1624 free(adapter->rx_rings, M_DEVBUF); 1625 } 1626 1627 1628 /********************************************************************* 1629 * 1630 * Free receive ring data structures 1631 * 1632 **********************************************************************/ 1633 void 1634 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1635 { 1636 struct adapter *adapter = rxr->adapter; 1637 struct ixgbe_rx_buf *rxbuf; 1638 1639 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1640 1641 /* Cleanup any existing buffers */ 1642 if (rxr->rx_buffers != NULL) { 1643 for (int i = 0; i < adapter->num_rx_desc; i++) { 1644 rxbuf = &rxr->rx_buffers[i]; 1645 if (rxbuf->buf != NULL) { 1646 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1647 BUS_DMASYNC_POSTREAD); 1648 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1649 rxbuf->buf->m_flags |= M_PKTHDR; 1650 m_freem(rxbuf->buf); 1651 } 1652 rxbuf->buf = NULL; 1653 if (rxbuf->pmap != NULL) { 1654 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1655 rxbuf->pmap = NULL; 1656 } 1657 } 1658 if (rxr->rx_buffers != NULL) { 1659 free(rxr->rx_buffers, M_DEVBUF); 1660 rxr->rx_buffers = NULL; 1661 } 1662 } 1663 1664 if (rxr->ptag != NULL) { 1665 bus_dma_tag_destroy(rxr->ptag); 1666 rxr->ptag = NULL; 1667 } 1668 1669 return; 1670 } 1671 1672 static __inline void 1673 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1674 { 1675 1676 /* 1677 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1678 * should be computed by hardware. Also it should not have VLAN tag in 1679 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1680 */ 1681 if (rxr->lro_enabled && 1682 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1683 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1684 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1685 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1686 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1687 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1688 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1689 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1690 /* 1691 * Send to the stack if: 1692 ** - LRO not enabled, or 1693 ** - no LRO resources, or 1694 ** - lro enqueue fails 1695 */ 1696 if (rxr->lro.lro_cnt != 0) 1697 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1698 return; 1699 } 1700 IXGBE_RX_UNLOCK(rxr); 1701 (*ifp->if_input)(ifp, m); 1702 IXGBE_RX_LOCK(rxr); 1703 } 1704 1705 static __inline void 1706 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1707 { 1708 struct ixgbe_rx_buf *rbuf; 1709 1710 rbuf = &rxr->rx_buffers[i]; 1711 1712 1713 /* 1714 ** With advanced descriptors the writeback 1715 ** clobbers the buffer addrs, so its easier 1716 ** to just free the existing mbufs and take 1717 ** the normal refresh path to get new buffers 1718 ** and mapping. 1719 */ 1720 1721 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1722 rbuf->fmp->m_flags |= M_PKTHDR; 1723 m_freem(rbuf->fmp); 1724 rbuf->fmp = NULL; 1725 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1726 } else if (rbuf->buf) { 1727 m_free(rbuf->buf); 1728 rbuf->buf = NULL; 1729 } 1730 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1731 1732 rbuf->flags = 0; 1733 1734 return; 1735 } 1736 1737 1738 /********************************************************************* 1739 * 1740 * This routine executes in interrupt context. It replenishes 1741 * the mbufs in the descriptor and sends data which has been 1742 * dma'ed into host memory to upper layer. 1743 * 1744 * Return TRUE for more work, FALSE for all clean. 1745 *********************************************************************/ 1746 bool 1747 ixgbe_rxeof(struct ix_queue *que) 1748 { 1749 struct adapter *adapter = que->adapter; 1750 struct rx_ring *rxr = que->rxr; 1751 struct ifnet *ifp = adapter->ifp; 1752 struct lro_ctrl *lro = &rxr->lro; 1753 struct lro_entry *queued; 1754 int i, nextp, processed = 0; 1755 u32 staterr = 0; 1756 u16 count = rxr->process_limit; 1757 union ixgbe_adv_rx_desc *cur; 1758 struct ixgbe_rx_buf *rbuf, *nbuf; 1759 u16 pkt_info; 1760 1761 IXGBE_RX_LOCK(rxr); 1762 1763 #ifdef DEV_NETMAP 1764 /* Same as the txeof routine: wakeup clients on intr. */ 1765 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1766 IXGBE_RX_UNLOCK(rxr); 1767 return (FALSE); 1768 } 1769 #endif /* DEV_NETMAP */ 1770 1771 for (i = rxr->next_to_check; count != 0;) { 1772 struct mbuf *sendmp, *mp; 1773 u32 rsc, ptype; 1774 u16 len; 1775 u16 vtag = 0; 1776 bool eop; 1777 1778 /* Sync the ring. */ 1779 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1780 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1781 1782 cur = &rxr->rx_base[i]; 1783 staterr = le32toh(cur->wb.upper.status_error); 1784 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1785 1786 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1787 break; 1788 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1789 break; 1790 1791 count--; 1792 sendmp = NULL; 1793 nbuf = NULL; 1794 rsc = 0; 1795 cur->wb.upper.status_error = 0; 1796 rbuf = &rxr->rx_buffers[i]; 1797 mp = rbuf->buf; 1798 1799 len = le16toh(cur->wb.upper.length); 1800 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1801 IXGBE_RXDADV_PKTTYPE_MASK; 1802 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1803 1804 /* Make sure bad packets are discarded */ 1805 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1806 #if __FreeBSD_version >= 1100036 1807 if (IXGBE_IS_VF(adapter)) 1808 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1809 #endif 1810 rxr->rx_discarded++; 1811 ixgbe_rx_discard(rxr, i); 1812 goto next_desc; 1813 } 1814 1815 /* 1816 ** On 82599 which supports a hardware 1817 ** LRO (called HW RSC), packets need 1818 ** not be fragmented across sequential 1819 ** descriptors, rather the next descriptor 1820 ** is indicated in bits of the descriptor. 1821 ** This also means that we might proceses 1822 ** more than one packet at a time, something 1823 ** that has never been true before, it 1824 ** required eliminating global chain pointers 1825 ** in favor of what we are doing here. -jfv 1826 */ 1827 if (!eop) { 1828 /* 1829 ** Figure out the next descriptor 1830 ** of this frame. 1831 */ 1832 if (rxr->hw_rsc == TRUE) { 1833 rsc = ixgbe_rsc_count(cur); 1834 rxr->rsc_num += (rsc - 1); 1835 } 1836 if (rsc) { /* Get hardware index */ 1837 nextp = ((staterr & 1838 IXGBE_RXDADV_NEXTP_MASK) >> 1839 IXGBE_RXDADV_NEXTP_SHIFT); 1840 } else { /* Just sequential */ 1841 nextp = i + 1; 1842 if (nextp == adapter->num_rx_desc) 1843 nextp = 0; 1844 } 1845 nbuf = &rxr->rx_buffers[nextp]; 1846 prefetch(nbuf); 1847 } 1848 /* 1849 ** Rather than using the fmp/lmp global pointers 1850 ** we now keep the head of a packet chain in the 1851 ** buffer struct and pass this along from one 1852 ** descriptor to the next, until we get EOP. 1853 */ 1854 mp->m_len = len; 1855 /* 1856 ** See if there is a stored head 1857 ** that determines what we are 1858 */ 1859 sendmp = rbuf->fmp; 1860 if (sendmp != NULL) { /* secondary frag */ 1861 rbuf->buf = rbuf->fmp = NULL; 1862 mp->m_flags &= ~M_PKTHDR; 1863 sendmp->m_pkthdr.len += mp->m_len; 1864 } else { 1865 /* 1866 * Optimize. This might be a small packet, 1867 * maybe just a TCP ACK. Do a fast copy that 1868 * is cache aligned into a new mbuf, and 1869 * leave the old mbuf+cluster for re-use. 1870 */ 1871 if (eop && len <= IXGBE_RX_COPY_LEN) { 1872 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1873 if (sendmp != NULL) { 1874 sendmp->m_data += 1875 IXGBE_RX_COPY_ALIGN; 1876 ixgbe_bcopy(mp->m_data, 1877 sendmp->m_data, len); 1878 sendmp->m_len = len; 1879 rxr->rx_copies++; 1880 rbuf->flags |= IXGBE_RX_COPY; 1881 } 1882 } 1883 if (sendmp == NULL) { 1884 rbuf->buf = rbuf->fmp = NULL; 1885 sendmp = mp; 1886 } 1887 1888 /* first desc of a non-ps chain */ 1889 sendmp->m_flags |= M_PKTHDR; 1890 sendmp->m_pkthdr.len = mp->m_len; 1891 } 1892 ++processed; 1893 1894 /* Pass the head pointer on */ 1895 if (eop == 0) { 1896 nbuf->fmp = sendmp; 1897 sendmp = NULL; 1898 mp->m_next = nbuf->buf; 1899 } else { /* Sending this frame */ 1900 sendmp->m_pkthdr.rcvif = ifp; 1901 rxr->rx_packets++; 1902 /* capture data for AIM */ 1903 rxr->bytes += sendmp->m_pkthdr.len; 1904 rxr->rx_bytes += sendmp->m_pkthdr.len; 1905 /* Process vlan info */ 1906 if ((rxr->vtag_strip) && 1907 (staterr & IXGBE_RXD_STAT_VP)) 1908 vtag = le16toh(cur->wb.upper.vlan); 1909 if (vtag) { 1910 sendmp->m_pkthdr.ether_vtag = vtag; 1911 sendmp->m_flags |= M_VLANTAG; 1912 } 1913 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1914 ixgbe_rx_checksum(staterr, sendmp, ptype); 1915 #if __FreeBSD_version >= 800000 1916 #ifdef RSS 1917 sendmp->m_pkthdr.flowid = 1918 le32toh(cur->wb.lower.hi_dword.rss); 1919 #if __FreeBSD_version < 1100054 1920 sendmp->m_flags |= M_FLOWID; 1921 #endif 1922 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1923 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1924 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4); 1925 break; 1926 case IXGBE_RXDADV_RSSTYPE_IPV4: 1927 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4); 1928 break; 1929 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1930 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6); 1931 break; 1932 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1933 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX); 1934 break; 1935 case IXGBE_RXDADV_RSSTYPE_IPV6: 1936 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6); 1937 break; 1938 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1939 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX); 1940 break; 1941 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1942 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4); 1943 break; 1944 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1945 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6); 1946 break; 1947 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1948 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX); 1949 break; 1950 default: 1951 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1952 } 1953 #else /* RSS */ 1954 sendmp->m_pkthdr.flowid = que->msix; 1955 #if __FreeBSD_version >= 1100054 1956 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1957 #else 1958 sendmp->m_flags |= M_FLOWID; 1959 #endif 1960 #endif /* RSS */ 1961 #endif /* FreeBSD_version */ 1962 } 1963 next_desc: 1964 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1965 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1966 1967 /* Advance our pointers to the next descriptor. */ 1968 if (++i == rxr->num_desc) 1969 i = 0; 1970 1971 /* Now send to the stack or do LRO */ 1972 if (sendmp != NULL) { 1973 rxr->next_to_check = i; 1974 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1975 i = rxr->next_to_check; 1976 } 1977 1978 /* Every 8 descriptors we go to refresh mbufs */ 1979 if (processed == 8) { 1980 ixgbe_refresh_mbufs(rxr, i); 1981 processed = 0; 1982 } 1983 } 1984 1985 /* Refresh any remaining buf structs */ 1986 if (ixgbe_rx_unrefreshed(rxr)) 1987 ixgbe_refresh_mbufs(rxr, i); 1988 1989 rxr->next_to_check = i; 1990 1991 /* 1992 * Flush any outstanding LRO work 1993 */ 1994 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1995 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1996 tcp_lro_flush(lro, queued); 1997 } 1998 1999 IXGBE_RX_UNLOCK(rxr); 2000 2001 /* 2002 ** Still have cleaning to do? 2003 */ 2004 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 2005 return (TRUE); 2006 else 2007 return (FALSE); 2008 } 2009 2010 2011 /********************************************************************* 2012 * 2013 * Verify that the hardware indicated that the checksum is valid. 2014 * Inform the stack about the status of checksum so that stack 2015 * doesn't spend time verifying the checksum. 2016 * 2017 *********************************************************************/ 2018 static void 2019 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2020 { 2021 u16 status = (u16) staterr; 2022 u8 errors = (u8) (staterr >> 24); 2023 bool sctp = FALSE; 2024 2025 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2026 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2027 sctp = TRUE; 2028 2029 if (status & IXGBE_RXD_STAT_IPCS) { 2030 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2031 /* IP Checksum Good */ 2032 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 2033 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 2034 2035 } else 2036 mp->m_pkthdr.csum_flags = 0; 2037 } 2038 if (status & IXGBE_RXD_STAT_L4CS) { 2039 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2040 #if __FreeBSD_version >= 800000 2041 if (sctp) 2042 type = CSUM_SCTP_VALID; 2043 #endif 2044 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2045 mp->m_pkthdr.csum_flags |= type; 2046 if (!sctp) 2047 mp->m_pkthdr.csum_data = htons(0xffff); 2048 } 2049 } 2050 return; 2051 } 2052 2053 /******************************************************************** 2054 * Manage DMA'able memory. 2055 *******************************************************************/ 2056 static void 2057 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2058 { 2059 if (error) 2060 return; 2061 *(bus_addr_t *) arg = segs->ds_addr; 2062 return; 2063 } 2064 2065 int 2066 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2067 struct ixgbe_dma_alloc *dma, int mapflags) 2068 { 2069 device_t dev = adapter->dev; 2070 int r; 2071 2072 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2073 DBA_ALIGN, 0, /* alignment, bounds */ 2074 BUS_SPACE_MAXADDR, /* lowaddr */ 2075 BUS_SPACE_MAXADDR, /* highaddr */ 2076 NULL, NULL, /* filter, filterarg */ 2077 size, /* maxsize */ 2078 1, /* nsegments */ 2079 size, /* maxsegsize */ 2080 BUS_DMA_ALLOCNOW, /* flags */ 2081 NULL, /* lockfunc */ 2082 NULL, /* lockfuncarg */ 2083 &dma->dma_tag); 2084 if (r != 0) { 2085 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2086 "error %u\n", r); 2087 goto fail_0; 2088 } 2089 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2090 BUS_DMA_NOWAIT, &dma->dma_map); 2091 if (r != 0) { 2092 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2093 "error %u\n", r); 2094 goto fail_1; 2095 } 2096 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2097 size, 2098 ixgbe_dmamap_cb, 2099 &dma->dma_paddr, 2100 mapflags | BUS_DMA_NOWAIT); 2101 if (r != 0) { 2102 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2103 "error %u\n", r); 2104 goto fail_2; 2105 } 2106 dma->dma_size = size; 2107 return (0); 2108 fail_2: 2109 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2110 fail_1: 2111 bus_dma_tag_destroy(dma->dma_tag); 2112 fail_0: 2113 dma->dma_tag = NULL; 2114 return (r); 2115 } 2116 2117 void 2118 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2119 { 2120 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2121 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2122 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2123 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2124 bus_dma_tag_destroy(dma->dma_tag); 2125 } 2126 2127 2128 /********************************************************************* 2129 * 2130 * Allocate memory for the transmit and receive rings, and then 2131 * the descriptors associated with each, called only once at attach. 2132 * 2133 **********************************************************************/ 2134 int 2135 ixgbe_allocate_queues(struct adapter *adapter) 2136 { 2137 device_t dev = adapter->dev; 2138 struct ix_queue *que; 2139 struct tx_ring *txr; 2140 struct rx_ring *rxr; 2141 int rsize, tsize, error = IXGBE_SUCCESS; 2142 int txconf = 0, rxconf = 0; 2143 2144 /* First allocate the top level queue structs */ 2145 if (!(adapter->queues = 2146 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2147 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2148 device_printf(dev, "Unable to allocate queue memory\n"); 2149 error = ENOMEM; 2150 goto fail; 2151 } 2152 2153 /* First allocate the TX ring struct memory */ 2154 if (!(adapter->tx_rings = 2155 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2156 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2157 device_printf(dev, "Unable to allocate TX ring memory\n"); 2158 error = ENOMEM; 2159 goto tx_fail; 2160 } 2161 2162 /* Next allocate the RX */ 2163 if (!(adapter->rx_rings = 2164 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2165 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2166 device_printf(dev, "Unable to allocate RX ring memory\n"); 2167 error = ENOMEM; 2168 goto rx_fail; 2169 } 2170 2171 /* For the ring itself */ 2172 tsize = roundup2(adapter->num_tx_desc * 2173 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2174 2175 /* 2176 * Now set up the TX queues, txconf is needed to handle the 2177 * possibility that things fail midcourse and we need to 2178 * undo memory gracefully 2179 */ 2180 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2181 /* Set up some basics */ 2182 txr = &adapter->tx_rings[i]; 2183 txr->adapter = adapter; 2184 txr->me = i; 2185 txr->num_desc = adapter->num_tx_desc; 2186 2187 /* Initialize the TX side lock */ 2188 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2189 device_get_nameunit(dev), txr->me); 2190 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2191 2192 if (ixgbe_dma_malloc(adapter, tsize, 2193 &txr->txdma, BUS_DMA_NOWAIT)) { 2194 device_printf(dev, 2195 "Unable to allocate TX Descriptor memory\n"); 2196 error = ENOMEM; 2197 goto err_tx_desc; 2198 } 2199 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2200 bzero((void *)txr->tx_base, tsize); 2201 2202 /* Now allocate transmit buffers for the ring */ 2203 if (ixgbe_allocate_transmit_buffers(txr)) { 2204 device_printf(dev, 2205 "Critical Failure setting up transmit buffers\n"); 2206 error = ENOMEM; 2207 goto err_tx_desc; 2208 } 2209 #ifndef IXGBE_LEGACY_TX 2210 /* Allocate a buf ring */ 2211 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2212 M_WAITOK, &txr->tx_mtx); 2213 if (txr->br == NULL) { 2214 device_printf(dev, 2215 "Critical Failure setting up buf ring\n"); 2216 error = ENOMEM; 2217 goto err_tx_desc; 2218 } 2219 #endif 2220 } 2221 2222 /* 2223 * Next the RX queues... 2224 */ 2225 rsize = roundup2(adapter->num_rx_desc * 2226 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2227 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2228 rxr = &adapter->rx_rings[i]; 2229 /* Set up some basics */ 2230 rxr->adapter = adapter; 2231 rxr->me = i; 2232 rxr->num_desc = adapter->num_rx_desc; 2233 2234 /* Initialize the RX side lock */ 2235 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2236 device_get_nameunit(dev), rxr->me); 2237 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2238 2239 if (ixgbe_dma_malloc(adapter, rsize, 2240 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2241 device_printf(dev, 2242 "Unable to allocate RxDescriptor memory\n"); 2243 error = ENOMEM; 2244 goto err_rx_desc; 2245 } 2246 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2247 bzero((void *)rxr->rx_base, rsize); 2248 2249 /* Allocate receive buffers for the ring*/ 2250 if (ixgbe_allocate_receive_buffers(rxr)) { 2251 device_printf(dev, 2252 "Critical Failure setting up receive buffers\n"); 2253 error = ENOMEM; 2254 goto err_rx_desc; 2255 } 2256 } 2257 2258 /* 2259 ** Finally set up the queue holding structs 2260 */ 2261 for (int i = 0; i < adapter->num_queues; i++) { 2262 que = &adapter->queues[i]; 2263 que->adapter = adapter; 2264 que->me = i; 2265 que->txr = &adapter->tx_rings[i]; 2266 que->rxr = &adapter->rx_rings[i]; 2267 } 2268 2269 return (0); 2270 2271 err_rx_desc: 2272 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2273 ixgbe_dma_free(adapter, &rxr->rxdma); 2274 err_tx_desc: 2275 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2276 ixgbe_dma_free(adapter, &txr->txdma); 2277 free(adapter->rx_rings, M_DEVBUF); 2278 rx_fail: 2279 free(adapter->tx_rings, M_DEVBUF); 2280 tx_fail: 2281 free(adapter->queues, M_DEVBUF); 2282 fail: 2283 return (error); 2284 } 2285