1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /* Shared PCI config read/write */ 85 inline u16 86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg) 87 { 88 u16 value; 89 90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 91 reg, 2); 92 93 return (value); 94 } 95 96 inline void 97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value) 98 { 99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 100 reg, value, 2); 101 102 return; 103 } 104 105 /********************************************************************* 106 * Local Function prototypes 107 *********************************************************************/ 108 static void ixgbe_setup_transmit_ring(struct tx_ring *); 109 static void ixgbe_free_transmit_buffers(struct tx_ring *); 110 static int ixgbe_setup_receive_ring(struct rx_ring *); 111 static void ixgbe_free_receive_buffers(struct rx_ring *); 112 113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 #ifdef IXGBE_LEGACY_TX 128 /********************************************************************* 129 * Transmit entry point 130 * 131 * ixgbe_start is called by the stack to initiate a transmit. 132 * The driver will remain in this routine as long as there are 133 * packets to transmit and transmit resources are available. 134 * In case resources are not available stack is notified and 135 * the packet is requeued. 136 **********************************************************************/ 137 138 void 139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 140 { 141 struct mbuf *m_head; 142 struct adapter *adapter = txr->adapter; 143 144 IXGBE_TX_LOCK_ASSERT(txr); 145 146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 147 return; 148 if (!adapter->link_active) 149 return; 150 151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 153 break; 154 155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 156 if (m_head == NULL) 157 break; 158 159 if (ixgbe_xmit(txr, &m_head)) { 160 if (m_head != NULL) 161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 162 break; 163 } 164 /* Send a copy of the frame to the BPF listener */ 165 ETHER_BPF_MTAP(ifp, m_head); 166 } 167 return; 168 } 169 170 /* 171 * Legacy TX start - called by the stack, this 172 * always uses the first tx ring, and should 173 * not be used with multiqueue tx enabled. 174 */ 175 void 176 ixgbe_start(struct ifnet *ifp) 177 { 178 struct adapter *adapter = ifp->if_softc; 179 struct tx_ring *txr = adapter->tx_rings; 180 181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 182 IXGBE_TX_LOCK(txr); 183 ixgbe_start_locked(txr, ifp); 184 IXGBE_TX_UNLOCK(txr); 185 } 186 return; 187 } 188 189 #else /* ! IXGBE_LEGACY_TX */ 190 191 /* 192 ** Multiqueue Transmit driver 193 ** 194 */ 195 int 196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 197 { 198 struct adapter *adapter = ifp->if_softc; 199 struct ix_queue *que; 200 struct tx_ring *txr; 201 int i, err = 0; 202 #ifdef RSS 203 uint32_t bucket_id; 204 #endif 205 206 /* 207 * When doing RSS, map it to the same outbound queue 208 * as the incoming flow would be mapped to. 209 * 210 * If everything is setup correctly, it should be the 211 * same bucket that the current CPU we're on is. 212 */ 213 #if __FreeBSD_version < 1100054 214 if (m->m_flags & M_FLOWID) { 215 #else 216 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 217 #endif 218 #ifdef RSS 219 if (rss_hash2bucket(m->m_pkthdr.flowid, 220 M_HASHTYPE_GET(m), &bucket_id) == 0) 221 /* TODO: spit out something if bucket_id > num_queues? */ 222 i = bucket_id % adapter->num_queues; 223 else 224 #endif 225 i = m->m_pkthdr.flowid % adapter->num_queues; 226 } else 227 i = curcpu % adapter->num_queues; 228 229 /* Check for a hung queue and pick alternative */ 230 if (((1 << i) & adapter->active_queues) == 0) 231 i = ffsl(adapter->active_queues); 232 233 txr = &adapter->tx_rings[i]; 234 que = &adapter->queues[i]; 235 236 err = drbr_enqueue(ifp, txr->br, m); 237 if (err) 238 return (err); 239 if (IXGBE_TX_TRYLOCK(txr)) { 240 ixgbe_mq_start_locked(ifp, txr); 241 IXGBE_TX_UNLOCK(txr); 242 } else 243 taskqueue_enqueue(que->tq, &txr->txq_task); 244 245 return (0); 246 } 247 248 int 249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 250 { 251 struct adapter *adapter = txr->adapter; 252 struct mbuf *next; 253 int enqueued = 0, err = 0; 254 255 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 256 adapter->link_active == 0) 257 return (ENETDOWN); 258 259 /* Process the queue */ 260 #if __FreeBSD_version < 901504 261 next = drbr_dequeue(ifp, txr->br); 262 while (next != NULL) { 263 if ((err = ixgbe_xmit(txr, &next)) != 0) { 264 if (next != NULL) 265 err = drbr_enqueue(ifp, txr->br, next); 266 #else 267 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 268 if ((err = ixgbe_xmit(txr, &next)) != 0) { 269 if (next == NULL) { 270 drbr_advance(ifp, txr->br); 271 } else { 272 drbr_putback(ifp, txr->br, next); 273 } 274 #endif 275 break; 276 } 277 #if __FreeBSD_version >= 901504 278 drbr_advance(ifp, txr->br); 279 #endif 280 enqueued++; 281 #if 0 // this is VF-only 282 #if __FreeBSD_version >= 1100036 283 /* 284 * Since we're looking at the tx ring, we can check 285 * to see if we're a VF by examing our tail register 286 * address. 287 */ 288 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 289 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 290 #endif 291 #endif 292 /* Send a copy of the frame to the BPF listener */ 293 ETHER_BPF_MTAP(ifp, next); 294 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 295 break; 296 #if __FreeBSD_version < 901504 297 next = drbr_dequeue(ifp, txr->br); 298 #endif 299 } 300 301 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 302 ixgbe_txeof(txr); 303 304 return (err); 305 } 306 307 /* 308 * Called from a taskqueue to drain queued transmit packets. 309 */ 310 void 311 ixgbe_deferred_mq_start(void *arg, int pending) 312 { 313 struct tx_ring *txr = arg; 314 struct adapter *adapter = txr->adapter; 315 struct ifnet *ifp = adapter->ifp; 316 317 IXGBE_TX_LOCK(txr); 318 if (!drbr_empty(ifp, txr->br)) 319 ixgbe_mq_start_locked(ifp, txr); 320 IXGBE_TX_UNLOCK(txr); 321 } 322 323 /* 324 * Flush all ring buffers 325 */ 326 void 327 ixgbe_qflush(struct ifnet *ifp) 328 { 329 struct adapter *adapter = ifp->if_softc; 330 struct tx_ring *txr = adapter->tx_rings; 331 struct mbuf *m; 332 333 for (int i = 0; i < adapter->num_queues; i++, txr++) { 334 IXGBE_TX_LOCK(txr); 335 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 336 m_freem(m); 337 IXGBE_TX_UNLOCK(txr); 338 } 339 if_qflush(ifp); 340 } 341 #endif /* IXGBE_LEGACY_TX */ 342 343 344 /********************************************************************* 345 * 346 * This routine maps the mbufs to tx descriptors, allowing the 347 * TX engine to transmit the packets. 348 * - return 0 on success, positive on failure 349 * 350 **********************************************************************/ 351 352 static int 353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 354 { 355 struct adapter *adapter = txr->adapter; 356 u32 olinfo_status = 0, cmd_type_len; 357 int i, j, error, nsegs; 358 int first; 359 bool remap = TRUE; 360 struct mbuf *m_head; 361 bus_dma_segment_t segs[adapter->num_segs]; 362 bus_dmamap_t map; 363 struct ixgbe_tx_buf *txbuf; 364 union ixgbe_adv_tx_desc *txd = NULL; 365 366 m_head = *m_headp; 367 368 /* Basic descriptor defines */ 369 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 370 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 371 372 if (m_head->m_flags & M_VLANTAG) 373 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 374 375 /* 376 * Important to capture the first descriptor 377 * used because it will contain the index of 378 * the one we tell the hardware to report back 379 */ 380 first = txr->next_avail_desc; 381 txbuf = &txr->tx_buffers[first]; 382 map = txbuf->map; 383 384 /* 385 * Map the packet for DMA. 386 */ 387 retry: 388 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 389 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 390 391 if (__predict_false(error)) { 392 struct mbuf *m; 393 394 switch (error) { 395 case EFBIG: 396 /* Try it again? - one try */ 397 if (remap == TRUE) { 398 remap = FALSE; 399 /* 400 * XXX: m_defrag will choke on 401 * non-MCLBYTES-sized clusters 402 */ 403 m = m_defrag(*m_headp, M_NOWAIT); 404 if (m == NULL) { 405 adapter->mbuf_defrag_failed++; 406 m_freem(*m_headp); 407 *m_headp = NULL; 408 return (ENOBUFS); 409 } 410 *m_headp = m; 411 goto retry; 412 } else 413 return (error); 414 case ENOMEM: 415 txr->no_tx_dma_setup++; 416 return (error); 417 default: 418 txr->no_tx_dma_setup++; 419 m_freem(*m_headp); 420 *m_headp = NULL; 421 return (error); 422 } 423 } 424 425 /* Make certain there are enough descriptors */ 426 if (nsegs > txr->tx_avail - 2) { 427 txr->no_desc_avail++; 428 bus_dmamap_unload(txr->txtag, map); 429 return (ENOBUFS); 430 } 431 m_head = *m_headp; 432 433 /* 434 * Set up the appropriate offload context 435 * this will consume the first descriptor 436 */ 437 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 438 if (__predict_false(error)) { 439 if (error == ENOBUFS) 440 *m_headp = NULL; 441 return (error); 442 } 443 444 #ifdef IXGBE_FDIR 445 /* Do the flow director magic */ 446 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 447 ++txr->atr_count; 448 if (txr->atr_count >= atr_sample_rate) { 449 ixgbe_atr(txr, m_head); 450 txr->atr_count = 0; 451 } 452 } 453 #endif 454 455 i = txr->next_avail_desc; 456 for (j = 0; j < nsegs; j++) { 457 bus_size_t seglen; 458 bus_addr_t segaddr; 459 460 txbuf = &txr->tx_buffers[i]; 461 txd = &txr->tx_base[i]; 462 seglen = segs[j].ds_len; 463 segaddr = htole64(segs[j].ds_addr); 464 465 txd->read.buffer_addr = segaddr; 466 txd->read.cmd_type_len = htole32(txr->txd_cmd | 467 cmd_type_len |seglen); 468 txd->read.olinfo_status = htole32(olinfo_status); 469 470 if (++i == txr->num_desc) 471 i = 0; 472 } 473 474 txd->read.cmd_type_len |= 475 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 476 txr->tx_avail -= nsegs; 477 txr->next_avail_desc = i; 478 479 txbuf->m_head = m_head; 480 /* 481 * Here we swap the map so the last descriptor, 482 * which gets the completion interrupt has the 483 * real map, and the first descriptor gets the 484 * unused map from this descriptor. 485 */ 486 txr->tx_buffers[first].map = txbuf->map; 487 txbuf->map = map; 488 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 489 490 /* Set the EOP descriptor that will be marked done */ 491 txbuf = &txr->tx_buffers[first]; 492 txbuf->eop = txd; 493 494 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 495 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 496 /* 497 * Advance the Transmit Descriptor Tail (Tdt), this tells the 498 * hardware that this frame is available to transmit. 499 */ 500 ++txr->total_packets; 501 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 502 503 /* Mark queue as having work */ 504 if (txr->busy == 0) 505 txr->busy = 1; 506 507 return (0); 508 } 509 510 511 /********************************************************************* 512 * 513 * Allocate memory for tx_buffer structures. The tx_buffer stores all 514 * the information needed to transmit a packet on the wire. This is 515 * called only once at attach, setup is done every reset. 516 * 517 **********************************************************************/ 518 int 519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 520 { 521 struct adapter *adapter = txr->adapter; 522 device_t dev = adapter->dev; 523 struct ixgbe_tx_buf *txbuf; 524 int error, i; 525 526 /* 527 * Setup DMA descriptor areas. 528 */ 529 if ((error = bus_dma_tag_create( 530 bus_get_dma_tag(adapter->dev), /* parent */ 531 1, 0, /* alignment, bounds */ 532 BUS_SPACE_MAXADDR, /* lowaddr */ 533 BUS_SPACE_MAXADDR, /* highaddr */ 534 NULL, NULL, /* filter, filterarg */ 535 IXGBE_TSO_SIZE, /* maxsize */ 536 adapter->num_segs, /* nsegments */ 537 PAGE_SIZE, /* maxsegsize */ 538 0, /* flags */ 539 NULL, /* lockfunc */ 540 NULL, /* lockfuncarg */ 541 &txr->txtag))) { 542 device_printf(dev,"Unable to allocate TX DMA tag\n"); 543 goto fail; 544 } 545 546 if (!(txr->tx_buffers = 547 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 548 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 549 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 550 error = ENOMEM; 551 goto fail; 552 } 553 554 /* Create the descriptor buffer dma maps */ 555 txbuf = txr->tx_buffers; 556 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 557 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 558 if (error != 0) { 559 device_printf(dev, "Unable to create TX DMA map\n"); 560 goto fail; 561 } 562 } 563 564 return 0; 565 fail: 566 /* We free all, it handles case where we are in the middle */ 567 ixgbe_free_transmit_structures(adapter); 568 return (error); 569 } 570 571 /********************************************************************* 572 * 573 * Initialize a transmit ring. 574 * 575 **********************************************************************/ 576 static void 577 ixgbe_setup_transmit_ring(struct tx_ring *txr) 578 { 579 struct adapter *adapter = txr->adapter; 580 struct ixgbe_tx_buf *txbuf; 581 #ifdef DEV_NETMAP 582 struct netmap_adapter *na = NA(adapter->ifp); 583 struct netmap_slot *slot; 584 #endif /* DEV_NETMAP */ 585 586 /* Clear the old ring contents */ 587 IXGBE_TX_LOCK(txr); 588 #ifdef DEV_NETMAP 589 /* 590 * (under lock): if in netmap mode, do some consistency 591 * checks and set slot to entry 0 of the netmap ring. 592 */ 593 slot = netmap_reset(na, NR_TX, txr->me, 0); 594 #endif /* DEV_NETMAP */ 595 bzero((void *)txr->tx_base, 596 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 597 /* Reset indices */ 598 txr->next_avail_desc = 0; 599 txr->next_to_clean = 0; 600 601 /* Free any existing tx buffers. */ 602 txbuf = txr->tx_buffers; 603 for (int i = 0; i < txr->num_desc; i++, txbuf++) { 604 if (txbuf->m_head != NULL) { 605 bus_dmamap_sync(txr->txtag, txbuf->map, 606 BUS_DMASYNC_POSTWRITE); 607 bus_dmamap_unload(txr->txtag, txbuf->map); 608 m_freem(txbuf->m_head); 609 txbuf->m_head = NULL; 610 } 611 #ifdef DEV_NETMAP 612 /* 613 * In netmap mode, set the map for the packet buffer. 614 * NOTE: Some drivers (not this one) also need to set 615 * the physical buffer address in the NIC ring. 616 * Slots in the netmap ring (indexed by "si") are 617 * kring->nkr_hwofs positions "ahead" wrt the 618 * corresponding slot in the NIC ring. In some drivers 619 * (not here) nkr_hwofs can be negative. Function 620 * netmap_idx_n2k() handles wraparounds properly. 621 */ 622 if (slot) { 623 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 624 netmap_load_map(na, txr->txtag, 625 txbuf->map, NMB(na, slot + si)); 626 } 627 #endif /* DEV_NETMAP */ 628 /* Clear the EOP descriptor pointer */ 629 txbuf->eop = NULL; 630 } 631 632 #ifdef IXGBE_FDIR 633 /* Set the rate at which we sample packets */ 634 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 635 txr->atr_sample = atr_sample_rate; 636 #endif 637 638 /* Set number of descriptors available */ 639 txr->tx_avail = adapter->num_tx_desc; 640 641 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 642 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 643 IXGBE_TX_UNLOCK(txr); 644 } 645 646 /********************************************************************* 647 * 648 * Initialize all transmit rings. 649 * 650 **********************************************************************/ 651 int 652 ixgbe_setup_transmit_structures(struct adapter *adapter) 653 { 654 struct tx_ring *txr = adapter->tx_rings; 655 656 for (int i = 0; i < adapter->num_queues; i++, txr++) 657 ixgbe_setup_transmit_ring(txr); 658 659 return (0); 660 } 661 662 /********************************************************************* 663 * 664 * Free all transmit rings. 665 * 666 **********************************************************************/ 667 void 668 ixgbe_free_transmit_structures(struct adapter *adapter) 669 { 670 struct tx_ring *txr = adapter->tx_rings; 671 672 for (int i = 0; i < adapter->num_queues; i++, txr++) { 673 IXGBE_TX_LOCK(txr); 674 ixgbe_free_transmit_buffers(txr); 675 ixgbe_dma_free(adapter, &txr->txdma); 676 IXGBE_TX_UNLOCK(txr); 677 IXGBE_TX_LOCK_DESTROY(txr); 678 } 679 free(adapter->tx_rings, M_DEVBUF); 680 } 681 682 /********************************************************************* 683 * 684 * Free transmit ring related data structures. 685 * 686 **********************************************************************/ 687 static void 688 ixgbe_free_transmit_buffers(struct tx_ring *txr) 689 { 690 struct adapter *adapter = txr->adapter; 691 struct ixgbe_tx_buf *tx_buffer; 692 int i; 693 694 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 695 696 if (txr->tx_buffers == NULL) 697 return; 698 699 tx_buffer = txr->tx_buffers; 700 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 701 if (tx_buffer->m_head != NULL) { 702 bus_dmamap_sync(txr->txtag, tx_buffer->map, 703 BUS_DMASYNC_POSTWRITE); 704 bus_dmamap_unload(txr->txtag, 705 tx_buffer->map); 706 m_freem(tx_buffer->m_head); 707 tx_buffer->m_head = NULL; 708 if (tx_buffer->map != NULL) { 709 bus_dmamap_destroy(txr->txtag, 710 tx_buffer->map); 711 tx_buffer->map = NULL; 712 } 713 } else if (tx_buffer->map != NULL) { 714 bus_dmamap_unload(txr->txtag, 715 tx_buffer->map); 716 bus_dmamap_destroy(txr->txtag, 717 tx_buffer->map); 718 tx_buffer->map = NULL; 719 } 720 } 721 #ifdef IXGBE_LEGACY_TX 722 if (txr->br != NULL) 723 buf_ring_free(txr->br, M_DEVBUF); 724 #endif 725 if (txr->tx_buffers != NULL) { 726 free(txr->tx_buffers, M_DEVBUF); 727 txr->tx_buffers = NULL; 728 } 729 if (txr->txtag != NULL) { 730 bus_dma_tag_destroy(txr->txtag); 731 txr->txtag = NULL; 732 } 733 return; 734 } 735 736 /********************************************************************* 737 * 738 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 739 * 740 **********************************************************************/ 741 742 static int 743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 744 u32 *cmd_type_len, u32 *olinfo_status) 745 { 746 struct adapter *adapter = txr->adapter; 747 struct ixgbe_adv_tx_context_desc *TXD; 748 struct ether_vlan_header *eh; 749 struct ip *ip; 750 struct ip6_hdr *ip6; 751 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 752 int ehdrlen, ip_hlen = 0; 753 u16 etype; 754 u8 ipproto = 0; 755 int offload = TRUE; 756 int ctxd = txr->next_avail_desc; 757 u16 vtag = 0; 758 759 /* First check if TSO is to be used */ 760 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 761 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 762 763 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 764 offload = FALSE; 765 766 /* Indicate the whole packet as payload when not doing TSO */ 767 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 768 769 /* Now ready a context descriptor */ 770 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 771 772 /* 773 ** In advanced descriptors the vlan tag must 774 ** be placed into the context descriptor. Hence 775 ** we need to make one even if not doing offloads. 776 */ 777 if (mp->m_flags & M_VLANTAG) { 778 vtag = htole16(mp->m_pkthdr.ether_vtag); 779 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 780 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 781 return (0); 782 783 /* 784 * Determine where frame payload starts. 785 * Jump over vlan headers if already present, 786 * helpful for QinQ too. 787 */ 788 eh = mtod(mp, struct ether_vlan_header *); 789 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 790 etype = ntohs(eh->evl_proto); 791 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 792 } else { 793 etype = ntohs(eh->evl_encap_proto); 794 ehdrlen = ETHER_HDR_LEN; 795 } 796 797 /* Set the ether header length */ 798 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 799 800 if (offload == FALSE) 801 goto no_offloads; 802 803 switch (etype) { 804 case ETHERTYPE_IP: 805 ip = (struct ip *)(mp->m_data + ehdrlen); 806 ip_hlen = ip->ip_hl << 2; 807 ipproto = ip->ip_p; 808 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 809 break; 810 case ETHERTYPE_IPV6: 811 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 812 ip_hlen = sizeof(struct ip6_hdr); 813 /* XXX-BZ this will go badly in case of ext hdrs. */ 814 ipproto = ip6->ip6_nxt; 815 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 816 break; 817 default: 818 offload = FALSE; 819 break; 820 } 821 822 vlan_macip_lens |= ip_hlen; 823 824 switch (ipproto) { 825 case IPPROTO_TCP: 826 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 827 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 828 break; 829 830 case IPPROTO_UDP: 831 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 832 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 833 break; 834 835 #if __FreeBSD_version >= 800000 836 case IPPROTO_SCTP: 837 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 839 break; 840 #endif 841 default: 842 offload = FALSE; 843 break; 844 } 845 846 if (offload) /* For the TX descriptor setup */ 847 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 848 849 no_offloads: 850 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 851 852 /* Now copy bits into descriptor */ 853 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 854 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 855 TXD->seqnum_seed = htole32(0); 856 TXD->mss_l4len_idx = htole32(0); 857 858 /* We've consumed the first desc, adjust counters */ 859 if (++ctxd == txr->num_desc) 860 ctxd = 0; 861 txr->next_avail_desc = ctxd; 862 --txr->tx_avail; 863 864 return (0); 865 } 866 867 /********************************************************************** 868 * 869 * Setup work for hardware segmentation offload (TSO) on 870 * adapters using advanced tx descriptors 871 * 872 **********************************************************************/ 873 static int 874 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 875 u32 *cmd_type_len, u32 *olinfo_status) 876 { 877 struct ixgbe_adv_tx_context_desc *TXD; 878 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 879 u32 mss_l4len_idx = 0, paylen; 880 u16 vtag = 0, eh_type; 881 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 882 struct ether_vlan_header *eh; 883 #ifdef INET6 884 struct ip6_hdr *ip6; 885 #endif 886 #ifdef INET 887 struct ip *ip; 888 #endif 889 struct tcphdr *th; 890 891 892 /* 893 * Determine where frame payload starts. 894 * Jump over vlan headers if already present 895 */ 896 eh = mtod(mp, struct ether_vlan_header *); 897 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 898 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 899 eh_type = eh->evl_proto; 900 } else { 901 ehdrlen = ETHER_HDR_LEN; 902 eh_type = eh->evl_encap_proto; 903 } 904 905 switch (ntohs(eh_type)) { 906 #ifdef INET6 907 case ETHERTYPE_IPV6: 908 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 909 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 910 if (ip6->ip6_nxt != IPPROTO_TCP) 911 return (ENXIO); 912 ip_hlen = sizeof(struct ip6_hdr); 913 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 914 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 915 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 916 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 917 break; 918 #endif 919 #ifdef INET 920 case ETHERTYPE_IP: 921 ip = (struct ip *)(mp->m_data + ehdrlen); 922 if (ip->ip_p != IPPROTO_TCP) 923 return (ENXIO); 924 ip->ip_sum = 0; 925 ip_hlen = ip->ip_hl << 2; 926 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 927 th->th_sum = in_pseudo(ip->ip_src.s_addr, 928 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 929 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 930 /* Tell transmit desc to also do IPv4 checksum. */ 931 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 932 break; 933 #endif 934 default: 935 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 936 __func__, ntohs(eh_type)); 937 break; 938 } 939 940 ctxd = txr->next_avail_desc; 941 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 942 943 tcp_hlen = th->th_off << 2; 944 945 /* This is used in the transmit desc in encap */ 946 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 947 948 /* VLAN MACLEN IPLEN */ 949 if (mp->m_flags & M_VLANTAG) { 950 vtag = htole16(mp->m_pkthdr.ether_vtag); 951 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 952 } 953 954 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 955 vlan_macip_lens |= ip_hlen; 956 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 957 958 /* ADV DTYPE TUCMD */ 959 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 960 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 961 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 962 963 /* MSS L4LEN IDX */ 964 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 965 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 966 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 967 968 TXD->seqnum_seed = htole32(0); 969 970 if (++ctxd == txr->num_desc) 971 ctxd = 0; 972 973 txr->tx_avail--; 974 txr->next_avail_desc = ctxd; 975 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 976 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 977 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 978 ++txr->tso_tx; 979 return (0); 980 } 981 982 983 /********************************************************************** 984 * 985 * Examine each tx_buffer in the used queue. If the hardware is done 986 * processing the packet then free associated resources. The 987 * tx_buffer is put back on the free queue. 988 * 989 **********************************************************************/ 990 void 991 ixgbe_txeof(struct tx_ring *txr) 992 { 993 #ifdef DEV_NETMAP 994 struct adapter *adapter = txr->adapter; 995 struct ifnet *ifp = adapter->ifp; 996 #endif 997 u32 work, processed = 0; 998 u16 limit = txr->process_limit; 999 struct ixgbe_tx_buf *buf; 1000 union ixgbe_adv_tx_desc *txd; 1001 1002 mtx_assert(&txr->tx_mtx, MA_OWNED); 1003 1004 #ifdef DEV_NETMAP 1005 if (ifp->if_capenable & IFCAP_NETMAP) { 1006 struct netmap_adapter *na = NA(ifp); 1007 struct netmap_kring *kring = &na->tx_rings[txr->me]; 1008 txd = txr->tx_base; 1009 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1010 BUS_DMASYNC_POSTREAD); 1011 /* 1012 * In netmap mode, all the work is done in the context 1013 * of the client thread. Interrupt handlers only wake up 1014 * clients, which may be sleeping on individual rings 1015 * or on a global resource for all rings. 1016 * To implement tx interrupt mitigation, we wake up the client 1017 * thread roughly every half ring, even if the NIC interrupts 1018 * more frequently. This is implemented as follows: 1019 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1020 * the slot that should wake up the thread (nkr_num_slots 1021 * means the user thread should not be woken up); 1022 * - the driver ignores tx interrupts unless netmap_mitigate=0 1023 * or the slot has the DD bit set. 1024 */ 1025 if (!netmap_mitigate || 1026 (kring->nr_kflags < kring->nkr_num_slots && 1027 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1028 netmap_tx_irq(ifp, txr->me); 1029 } 1030 return; 1031 } 1032 #endif /* DEV_NETMAP */ 1033 1034 if (txr->tx_avail == txr->num_desc) { 1035 txr->busy = 0; 1036 return; 1037 } 1038 1039 /* Get work starting point */ 1040 work = txr->next_to_clean; 1041 buf = &txr->tx_buffers[work]; 1042 txd = &txr->tx_base[work]; 1043 work -= txr->num_desc; /* The distance to ring end */ 1044 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1045 BUS_DMASYNC_POSTREAD); 1046 1047 do { 1048 union ixgbe_adv_tx_desc *eop= buf->eop; 1049 if (eop == NULL) /* No work */ 1050 break; 1051 1052 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1053 break; /* I/O not complete */ 1054 1055 if (buf->m_head) { 1056 txr->bytes += 1057 buf->m_head->m_pkthdr.len; 1058 bus_dmamap_sync(txr->txtag, 1059 buf->map, 1060 BUS_DMASYNC_POSTWRITE); 1061 bus_dmamap_unload(txr->txtag, 1062 buf->map); 1063 m_freem(buf->m_head); 1064 buf->m_head = NULL; 1065 } 1066 buf->eop = NULL; 1067 ++txr->tx_avail; 1068 1069 /* We clean the range if multi segment */ 1070 while (txd != eop) { 1071 ++txd; 1072 ++buf; 1073 ++work; 1074 /* wrap the ring? */ 1075 if (__predict_false(!work)) { 1076 work -= txr->num_desc; 1077 buf = txr->tx_buffers; 1078 txd = txr->tx_base; 1079 } 1080 if (buf->m_head) { 1081 txr->bytes += 1082 buf->m_head->m_pkthdr.len; 1083 bus_dmamap_sync(txr->txtag, 1084 buf->map, 1085 BUS_DMASYNC_POSTWRITE); 1086 bus_dmamap_unload(txr->txtag, 1087 buf->map); 1088 m_freem(buf->m_head); 1089 buf->m_head = NULL; 1090 } 1091 ++txr->tx_avail; 1092 buf->eop = NULL; 1093 1094 } 1095 ++txr->packets; 1096 ++processed; 1097 1098 /* Try the next packet */ 1099 ++txd; 1100 ++buf; 1101 ++work; 1102 /* reset with a wrap */ 1103 if (__predict_false(!work)) { 1104 work -= txr->num_desc; 1105 buf = txr->tx_buffers; 1106 txd = txr->tx_base; 1107 } 1108 prefetch(txd); 1109 } while (__predict_true(--limit)); 1110 1111 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1112 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1113 1114 work += txr->num_desc; 1115 txr->next_to_clean = work; 1116 1117 /* 1118 ** Queue Hang detection, we know there's 1119 ** work outstanding or the first return 1120 ** would have been taken, so increment busy 1121 ** if nothing managed to get cleaned, then 1122 ** in local_timer it will be checked and 1123 ** marked as HUNG if it exceeds a MAX attempt. 1124 */ 1125 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1126 ++txr->busy; 1127 /* 1128 ** If anything gets cleaned we reset state to 1, 1129 ** note this will turn off HUNG if its set. 1130 */ 1131 if (processed) 1132 txr->busy = 1; 1133 1134 if (txr->tx_avail == txr->num_desc) 1135 txr->busy = 0; 1136 1137 return; 1138 } 1139 1140 1141 #ifdef IXGBE_FDIR 1142 /* 1143 ** This routine parses packet headers so that Flow 1144 ** Director can make a hashed filter table entry 1145 ** allowing traffic flows to be identified and kept 1146 ** on the same cpu. This would be a performance 1147 ** hit, but we only do it at IXGBE_FDIR_RATE of 1148 ** packets. 1149 */ 1150 static void 1151 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1152 { 1153 struct adapter *adapter = txr->adapter; 1154 struct ix_queue *que; 1155 struct ip *ip; 1156 struct tcphdr *th; 1157 struct udphdr *uh; 1158 struct ether_vlan_header *eh; 1159 union ixgbe_atr_hash_dword input = {.dword = 0}; 1160 union ixgbe_atr_hash_dword common = {.dword = 0}; 1161 int ehdrlen, ip_hlen; 1162 u16 etype; 1163 1164 eh = mtod(mp, struct ether_vlan_header *); 1165 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1166 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1167 etype = eh->evl_proto; 1168 } else { 1169 ehdrlen = ETHER_HDR_LEN; 1170 etype = eh->evl_encap_proto; 1171 } 1172 1173 /* Only handling IPv4 */ 1174 if (etype != htons(ETHERTYPE_IP)) 1175 return; 1176 1177 ip = (struct ip *)(mp->m_data + ehdrlen); 1178 ip_hlen = ip->ip_hl << 2; 1179 1180 /* check if we're UDP or TCP */ 1181 switch (ip->ip_p) { 1182 case IPPROTO_TCP: 1183 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1184 /* src and dst are inverted */ 1185 common.port.dst ^= th->th_sport; 1186 common.port.src ^= th->th_dport; 1187 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1188 break; 1189 case IPPROTO_UDP: 1190 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1191 /* src and dst are inverted */ 1192 common.port.dst ^= uh->uh_sport; 1193 common.port.src ^= uh->uh_dport; 1194 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1195 break; 1196 default: 1197 return; 1198 } 1199 1200 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1201 if (mp->m_pkthdr.ether_vtag) 1202 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1203 else 1204 common.flex_bytes ^= etype; 1205 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1206 1207 que = &adapter->queues[txr->me]; 1208 /* 1209 ** This assumes the Rx queue and Tx 1210 ** queue are bound to the same CPU 1211 */ 1212 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1213 input, common, que->msix); 1214 } 1215 #endif /* IXGBE_FDIR */ 1216 1217 /* 1218 ** Used to detect a descriptor that has 1219 ** been merged by Hardware RSC. 1220 */ 1221 static inline u32 1222 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1223 { 1224 return (le32toh(rx->wb.lower.lo_dword.data) & 1225 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1226 } 1227 1228 /********************************************************************* 1229 * 1230 * Initialize Hardware RSC (LRO) feature on 82599 1231 * for an RX ring, this is toggled by the LRO capability 1232 * even though it is transparent to the stack. 1233 * 1234 * NOTE: since this HW feature only works with IPV4 and 1235 * our testing has shown soft LRO to be as effective 1236 * I have decided to disable this by default. 1237 * 1238 **********************************************************************/ 1239 static void 1240 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1241 { 1242 struct adapter *adapter = rxr->adapter; 1243 struct ixgbe_hw *hw = &adapter->hw; 1244 u32 rscctrl, rdrxctl; 1245 1246 /* If turning LRO/RSC off we need to disable it */ 1247 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1248 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1249 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1250 return; 1251 } 1252 1253 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1254 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1255 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1256 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1257 #endif /* DEV_NETMAP */ 1258 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1259 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1260 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1261 1262 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1263 rscctrl |= IXGBE_RSCCTL_RSCEN; 1264 /* 1265 ** Limit the total number of descriptors that 1266 ** can be combined, so it does not exceed 64K 1267 */ 1268 if (rxr->mbuf_sz == MCLBYTES) 1269 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1270 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1271 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1272 else if (rxr->mbuf_sz == MJUM9BYTES) 1273 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1274 else /* Using 16K cluster */ 1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1276 1277 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1278 1279 /* Enable TCP header recognition */ 1280 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1281 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1282 IXGBE_PSRTYPE_TCPHDR)); 1283 1284 /* Disable RSC for ACK packets */ 1285 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1286 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1287 1288 rxr->hw_rsc = TRUE; 1289 } 1290 /********************************************************************* 1291 * 1292 * Refresh mbuf buffers for RX descriptor rings 1293 * - now keeps its own state so discards due to resource 1294 * exhaustion are unnecessary, if an mbuf cannot be obtained 1295 * it just returns, keeping its placeholder, thus it can simply 1296 * be recalled to try again. 1297 * 1298 **********************************************************************/ 1299 static void 1300 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1301 { 1302 struct adapter *adapter = rxr->adapter; 1303 bus_dma_segment_t seg[1]; 1304 struct ixgbe_rx_buf *rxbuf; 1305 struct mbuf *mp; 1306 int i, j, nsegs, error; 1307 bool refreshed = FALSE; 1308 1309 i = j = rxr->next_to_refresh; 1310 /* Control the loop with one beyond */ 1311 if (++j == rxr->num_desc) 1312 j = 0; 1313 1314 while (j != limit) { 1315 rxbuf = &rxr->rx_buffers[i]; 1316 if (rxbuf->buf == NULL) { 1317 mp = m_getjcl(M_NOWAIT, MT_DATA, 1318 M_PKTHDR, rxr->mbuf_sz); 1319 if (mp == NULL) 1320 goto update; 1321 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1322 m_adj(mp, ETHER_ALIGN); 1323 } else 1324 mp = rxbuf->buf; 1325 1326 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1327 1328 /* If we're dealing with an mbuf that was copied rather 1329 * than replaced, there's no need to go through busdma. 1330 */ 1331 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1332 /* Get the memory mapping */ 1333 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1334 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1335 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1336 if (error != 0) { 1337 printf("Refresh mbufs: payload dmamap load" 1338 " failure - %d\n", error); 1339 m_free(mp); 1340 rxbuf->buf = NULL; 1341 goto update; 1342 } 1343 rxbuf->buf = mp; 1344 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1345 BUS_DMASYNC_PREREAD); 1346 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1347 htole64(seg[0].ds_addr); 1348 } else { 1349 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1350 rxbuf->flags &= ~IXGBE_RX_COPY; 1351 } 1352 1353 refreshed = TRUE; 1354 /* Next is precalculated */ 1355 i = j; 1356 rxr->next_to_refresh = i; 1357 if (++j == rxr->num_desc) 1358 j = 0; 1359 } 1360 update: 1361 if (refreshed) /* Update hardware tail index */ 1362 IXGBE_WRITE_REG(&adapter->hw, 1363 rxr->tail, rxr->next_to_refresh); 1364 return; 1365 } 1366 1367 /********************************************************************* 1368 * 1369 * Allocate memory for rx_buffer structures. Since we use one 1370 * rx_buffer per received packet, the maximum number of rx_buffer's 1371 * that we'll need is equal to the number of receive descriptors 1372 * that we've allocated. 1373 * 1374 **********************************************************************/ 1375 int 1376 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1377 { 1378 struct adapter *adapter = rxr->adapter; 1379 device_t dev = adapter->dev; 1380 struct ixgbe_rx_buf *rxbuf; 1381 int bsize, error; 1382 1383 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1384 if (!(rxr->rx_buffers = 1385 (struct ixgbe_rx_buf *) malloc(bsize, 1386 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1387 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1388 error = ENOMEM; 1389 goto fail; 1390 } 1391 1392 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1393 1, 0, /* alignment, bounds */ 1394 BUS_SPACE_MAXADDR, /* lowaddr */ 1395 BUS_SPACE_MAXADDR, /* highaddr */ 1396 NULL, NULL, /* filter, filterarg */ 1397 MJUM16BYTES, /* maxsize */ 1398 1, /* nsegments */ 1399 MJUM16BYTES, /* maxsegsize */ 1400 0, /* flags */ 1401 NULL, /* lockfunc */ 1402 NULL, /* lockfuncarg */ 1403 &rxr->ptag))) { 1404 device_printf(dev, "Unable to create RX DMA tag\n"); 1405 goto fail; 1406 } 1407 1408 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { 1409 rxbuf = &rxr->rx_buffers[i]; 1410 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1411 if (error) { 1412 device_printf(dev, "Unable to create RX dma map\n"); 1413 goto fail; 1414 } 1415 } 1416 1417 return (0); 1418 1419 fail: 1420 /* Frees all, but can handle partial completion */ 1421 ixgbe_free_receive_structures(adapter); 1422 return (error); 1423 } 1424 1425 1426 static void 1427 ixgbe_free_receive_ring(struct rx_ring *rxr) 1428 { 1429 struct ixgbe_rx_buf *rxbuf; 1430 1431 for (int i = 0; i < rxr->num_desc; i++) { 1432 rxbuf = &rxr->rx_buffers[i]; 1433 if (rxbuf->buf != NULL) { 1434 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1435 BUS_DMASYNC_POSTREAD); 1436 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1437 rxbuf->buf->m_flags |= M_PKTHDR; 1438 m_freem(rxbuf->buf); 1439 rxbuf->buf = NULL; 1440 rxbuf->flags = 0; 1441 } 1442 } 1443 } 1444 1445 1446 /********************************************************************* 1447 * 1448 * Initialize a receive ring and its buffers. 1449 * 1450 **********************************************************************/ 1451 static int 1452 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1453 { 1454 struct adapter *adapter; 1455 struct ifnet *ifp; 1456 device_t dev; 1457 struct ixgbe_rx_buf *rxbuf; 1458 bus_dma_segment_t seg[1]; 1459 struct lro_ctrl *lro = &rxr->lro; 1460 int rsize, nsegs, error = 0; 1461 #ifdef DEV_NETMAP 1462 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1463 struct netmap_slot *slot; 1464 #endif /* DEV_NETMAP */ 1465 1466 adapter = rxr->adapter; 1467 ifp = adapter->ifp; 1468 dev = adapter->dev; 1469 1470 /* Clear the ring contents */ 1471 IXGBE_RX_LOCK(rxr); 1472 #ifdef DEV_NETMAP 1473 /* same as in ixgbe_setup_transmit_ring() */ 1474 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1475 #endif /* DEV_NETMAP */ 1476 rsize = roundup2(adapter->num_rx_desc * 1477 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1478 bzero((void *)rxr->rx_base, rsize); 1479 /* Cache the size */ 1480 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1481 1482 /* Free current RX buffer structs and their mbufs */ 1483 ixgbe_free_receive_ring(rxr); 1484 1485 /* Now replenish the mbufs */ 1486 for (int j = 0; j != rxr->num_desc; ++j) { 1487 struct mbuf *mp; 1488 1489 rxbuf = &rxr->rx_buffers[j]; 1490 #ifdef DEV_NETMAP 1491 /* 1492 * In netmap mode, fill the map and set the buffer 1493 * address in the NIC ring, considering the offset 1494 * between the netmap and NIC rings (see comment in 1495 * ixgbe_setup_transmit_ring() ). No need to allocate 1496 * an mbuf, so end the block with a continue; 1497 */ 1498 if (slot) { 1499 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1500 uint64_t paddr; 1501 void *addr; 1502 1503 addr = PNMB(na, slot + sj, &paddr); 1504 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1505 /* Update descriptor and the cached value */ 1506 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1507 rxbuf->addr = htole64(paddr); 1508 continue; 1509 } 1510 #endif /* DEV_NETMAP */ 1511 rxbuf->flags = 0; 1512 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1513 M_PKTHDR, adapter->rx_mbuf_sz); 1514 if (rxbuf->buf == NULL) { 1515 error = ENOBUFS; 1516 goto fail; 1517 } 1518 mp = rxbuf->buf; 1519 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1520 /* Get the memory mapping */ 1521 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1522 rxbuf->pmap, mp, seg, 1523 &nsegs, BUS_DMA_NOWAIT); 1524 if (error != 0) 1525 goto fail; 1526 bus_dmamap_sync(rxr->ptag, 1527 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1528 /* Update the descriptor and the cached value */ 1529 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1530 rxbuf->addr = htole64(seg[0].ds_addr); 1531 } 1532 1533 1534 /* Setup our descriptor indices */ 1535 rxr->next_to_check = 0; 1536 rxr->next_to_refresh = 0; 1537 rxr->lro_enabled = FALSE; 1538 rxr->rx_copies = 0; 1539 rxr->rx_bytes = 0; 1540 rxr->vtag_strip = FALSE; 1541 1542 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1543 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1544 1545 /* 1546 ** Now set up the LRO interface: 1547 */ 1548 if (ixgbe_rsc_enable) 1549 ixgbe_setup_hw_rsc(rxr); 1550 else if (ifp->if_capenable & IFCAP_LRO) { 1551 int err = tcp_lro_init(lro); 1552 if (err) { 1553 device_printf(dev, "LRO Initialization failed!\n"); 1554 goto fail; 1555 } 1556 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1557 rxr->lro_enabled = TRUE; 1558 lro->ifp = adapter->ifp; 1559 } 1560 1561 IXGBE_RX_UNLOCK(rxr); 1562 return (0); 1563 1564 fail: 1565 ixgbe_free_receive_ring(rxr); 1566 IXGBE_RX_UNLOCK(rxr); 1567 return (error); 1568 } 1569 1570 /********************************************************************* 1571 * 1572 * Initialize all receive rings. 1573 * 1574 **********************************************************************/ 1575 int 1576 ixgbe_setup_receive_structures(struct adapter *adapter) 1577 { 1578 struct rx_ring *rxr = adapter->rx_rings; 1579 int j; 1580 1581 for (j = 0; j < adapter->num_queues; j++, rxr++) 1582 if (ixgbe_setup_receive_ring(rxr)) 1583 goto fail; 1584 1585 return (0); 1586 fail: 1587 /* 1588 * Free RX buffers allocated so far, we will only handle 1589 * the rings that completed, the failing case will have 1590 * cleaned up for itself. 'j' failed, so its the terminus. 1591 */ 1592 for (int i = 0; i < j; ++i) { 1593 rxr = &adapter->rx_rings[i]; 1594 ixgbe_free_receive_ring(rxr); 1595 } 1596 1597 return (ENOBUFS); 1598 } 1599 1600 1601 /********************************************************************* 1602 * 1603 * Free all receive rings. 1604 * 1605 **********************************************************************/ 1606 void 1607 ixgbe_free_receive_structures(struct adapter *adapter) 1608 { 1609 struct rx_ring *rxr = adapter->rx_rings; 1610 1611 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1612 1613 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1614 struct lro_ctrl *lro = &rxr->lro; 1615 ixgbe_free_receive_buffers(rxr); 1616 /* Free LRO memory */ 1617 tcp_lro_free(lro); 1618 /* Free the ring memory as well */ 1619 ixgbe_dma_free(adapter, &rxr->rxdma); 1620 } 1621 1622 free(adapter->rx_rings, M_DEVBUF); 1623 } 1624 1625 1626 /********************************************************************* 1627 * 1628 * Free receive ring data structures 1629 * 1630 **********************************************************************/ 1631 void 1632 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1633 { 1634 struct adapter *adapter = rxr->adapter; 1635 struct ixgbe_rx_buf *rxbuf; 1636 1637 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1638 1639 /* Cleanup any existing buffers */ 1640 if (rxr->rx_buffers != NULL) { 1641 for (int i = 0; i < adapter->num_rx_desc; i++) { 1642 rxbuf = &rxr->rx_buffers[i]; 1643 if (rxbuf->buf != NULL) { 1644 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1645 BUS_DMASYNC_POSTREAD); 1646 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1647 rxbuf->buf->m_flags |= M_PKTHDR; 1648 m_freem(rxbuf->buf); 1649 } 1650 rxbuf->buf = NULL; 1651 if (rxbuf->pmap != NULL) { 1652 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1653 rxbuf->pmap = NULL; 1654 } 1655 } 1656 if (rxr->rx_buffers != NULL) { 1657 free(rxr->rx_buffers, M_DEVBUF); 1658 rxr->rx_buffers = NULL; 1659 } 1660 } 1661 1662 if (rxr->ptag != NULL) { 1663 bus_dma_tag_destroy(rxr->ptag); 1664 rxr->ptag = NULL; 1665 } 1666 1667 return; 1668 } 1669 1670 static __inline void 1671 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1672 { 1673 1674 /* 1675 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1676 * should be computed by hardware. Also it should not have VLAN tag in 1677 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1678 */ 1679 if (rxr->lro_enabled && 1680 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1681 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1682 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1683 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1684 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1685 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1686 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1687 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1688 /* 1689 * Send to the stack if: 1690 ** - LRO not enabled, or 1691 ** - no LRO resources, or 1692 ** - lro enqueue fails 1693 */ 1694 if (rxr->lro.lro_cnt != 0) 1695 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1696 return; 1697 } 1698 IXGBE_RX_UNLOCK(rxr); 1699 (*ifp->if_input)(ifp, m); 1700 IXGBE_RX_LOCK(rxr); 1701 } 1702 1703 static __inline void 1704 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1705 { 1706 struct ixgbe_rx_buf *rbuf; 1707 1708 rbuf = &rxr->rx_buffers[i]; 1709 1710 1711 /* 1712 ** With advanced descriptors the writeback 1713 ** clobbers the buffer addrs, so its easier 1714 ** to just free the existing mbufs and take 1715 ** the normal refresh path to get new buffers 1716 ** and mapping. 1717 */ 1718 1719 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1720 rbuf->fmp->m_flags |= M_PKTHDR; 1721 m_freem(rbuf->fmp); 1722 rbuf->fmp = NULL; 1723 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1724 } else if (rbuf->buf) { 1725 m_free(rbuf->buf); 1726 rbuf->buf = NULL; 1727 } 1728 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1729 1730 rbuf->flags = 0; 1731 1732 return; 1733 } 1734 1735 1736 /********************************************************************* 1737 * 1738 * This routine executes in interrupt context. It replenishes 1739 * the mbufs in the descriptor and sends data which has been 1740 * dma'ed into host memory to upper layer. 1741 * 1742 * Return TRUE for more work, FALSE for all clean. 1743 *********************************************************************/ 1744 bool 1745 ixgbe_rxeof(struct ix_queue *que) 1746 { 1747 struct adapter *adapter = que->adapter; 1748 struct rx_ring *rxr = que->rxr; 1749 struct ifnet *ifp = adapter->ifp; 1750 struct lro_ctrl *lro = &rxr->lro; 1751 struct lro_entry *queued; 1752 int i, nextp, processed = 0; 1753 u32 staterr = 0; 1754 u16 count = rxr->process_limit; 1755 union ixgbe_adv_rx_desc *cur; 1756 struct ixgbe_rx_buf *rbuf, *nbuf; 1757 u16 pkt_info; 1758 1759 IXGBE_RX_LOCK(rxr); 1760 1761 #ifdef DEV_NETMAP 1762 /* Same as the txeof routine: wakeup clients on intr. */ 1763 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1764 IXGBE_RX_UNLOCK(rxr); 1765 return (FALSE); 1766 } 1767 #endif /* DEV_NETMAP */ 1768 1769 for (i = rxr->next_to_check; count != 0;) { 1770 struct mbuf *sendmp, *mp; 1771 u32 rsc, ptype; 1772 u16 len; 1773 u16 vtag = 0; 1774 bool eop; 1775 1776 /* Sync the ring. */ 1777 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1778 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1779 1780 cur = &rxr->rx_base[i]; 1781 staterr = le32toh(cur->wb.upper.status_error); 1782 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1783 1784 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1785 break; 1786 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1787 break; 1788 1789 count--; 1790 sendmp = NULL; 1791 nbuf = NULL; 1792 rsc = 0; 1793 cur->wb.upper.status_error = 0; 1794 rbuf = &rxr->rx_buffers[i]; 1795 mp = rbuf->buf; 1796 1797 len = le16toh(cur->wb.upper.length); 1798 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1799 IXGBE_RXDADV_PKTTYPE_MASK; 1800 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1801 1802 /* Make sure bad packets are discarded */ 1803 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1804 #if __FreeBSD_version >= 1100036 1805 if (IXGBE_IS_VF(adapter)) 1806 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1807 #endif 1808 rxr->rx_discarded++; 1809 ixgbe_rx_discard(rxr, i); 1810 goto next_desc; 1811 } 1812 1813 /* 1814 ** On 82599 which supports a hardware 1815 ** LRO (called HW RSC), packets need 1816 ** not be fragmented across sequential 1817 ** descriptors, rather the next descriptor 1818 ** is indicated in bits of the descriptor. 1819 ** This also means that we might proceses 1820 ** more than one packet at a time, something 1821 ** that has never been true before, it 1822 ** required eliminating global chain pointers 1823 ** in favor of what we are doing here. -jfv 1824 */ 1825 if (!eop) { 1826 /* 1827 ** Figure out the next descriptor 1828 ** of this frame. 1829 */ 1830 if (rxr->hw_rsc == TRUE) { 1831 rsc = ixgbe_rsc_count(cur); 1832 rxr->rsc_num += (rsc - 1); 1833 } 1834 if (rsc) { /* Get hardware index */ 1835 nextp = ((staterr & 1836 IXGBE_RXDADV_NEXTP_MASK) >> 1837 IXGBE_RXDADV_NEXTP_SHIFT); 1838 } else { /* Just sequential */ 1839 nextp = i + 1; 1840 if (nextp == adapter->num_rx_desc) 1841 nextp = 0; 1842 } 1843 nbuf = &rxr->rx_buffers[nextp]; 1844 prefetch(nbuf); 1845 } 1846 /* 1847 ** Rather than using the fmp/lmp global pointers 1848 ** we now keep the head of a packet chain in the 1849 ** buffer struct and pass this along from one 1850 ** descriptor to the next, until we get EOP. 1851 */ 1852 mp->m_len = len; 1853 /* 1854 ** See if there is a stored head 1855 ** that determines what we are 1856 */ 1857 sendmp = rbuf->fmp; 1858 if (sendmp != NULL) { /* secondary frag */ 1859 rbuf->buf = rbuf->fmp = NULL; 1860 mp->m_flags &= ~M_PKTHDR; 1861 sendmp->m_pkthdr.len += mp->m_len; 1862 } else { 1863 /* 1864 * Optimize. This might be a small packet, 1865 * maybe just a TCP ACK. Do a fast copy that 1866 * is cache aligned into a new mbuf, and 1867 * leave the old mbuf+cluster for re-use. 1868 */ 1869 if (eop && len <= IXGBE_RX_COPY_LEN) { 1870 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1871 if (sendmp != NULL) { 1872 sendmp->m_data += 1873 IXGBE_RX_COPY_ALIGN; 1874 ixgbe_bcopy(mp->m_data, 1875 sendmp->m_data, len); 1876 sendmp->m_len = len; 1877 rxr->rx_copies++; 1878 rbuf->flags |= IXGBE_RX_COPY; 1879 } 1880 } 1881 if (sendmp == NULL) { 1882 rbuf->buf = rbuf->fmp = NULL; 1883 sendmp = mp; 1884 } 1885 1886 /* first desc of a non-ps chain */ 1887 sendmp->m_flags |= M_PKTHDR; 1888 sendmp->m_pkthdr.len = mp->m_len; 1889 } 1890 ++processed; 1891 1892 /* Pass the head pointer on */ 1893 if (eop == 0) { 1894 nbuf->fmp = sendmp; 1895 sendmp = NULL; 1896 mp->m_next = nbuf->buf; 1897 } else { /* Sending this frame */ 1898 sendmp->m_pkthdr.rcvif = ifp; 1899 rxr->rx_packets++; 1900 /* capture data for AIM */ 1901 rxr->bytes += sendmp->m_pkthdr.len; 1902 rxr->rx_bytes += sendmp->m_pkthdr.len; 1903 /* Process vlan info */ 1904 if ((rxr->vtag_strip) && 1905 (staterr & IXGBE_RXD_STAT_VP)) 1906 vtag = le16toh(cur->wb.upper.vlan); 1907 if (vtag) { 1908 sendmp->m_pkthdr.ether_vtag = vtag; 1909 sendmp->m_flags |= M_VLANTAG; 1910 } 1911 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1912 ixgbe_rx_checksum(staterr, sendmp, ptype); 1913 #if __FreeBSD_version >= 800000 1914 #ifdef RSS 1915 sendmp->m_pkthdr.flowid = 1916 le32toh(cur->wb.lower.hi_dword.rss); 1917 #if __FreeBSD_version < 1100054 1918 sendmp->m_flags |= M_FLOWID; 1919 #endif 1920 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1921 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1922 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4); 1923 break; 1924 case IXGBE_RXDADV_RSSTYPE_IPV4: 1925 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4); 1926 break; 1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1928 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6); 1929 break; 1930 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1931 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX); 1932 break; 1933 case IXGBE_RXDADV_RSSTYPE_IPV6: 1934 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6); 1935 break; 1936 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1937 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX); 1938 break; 1939 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1940 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4); 1941 break; 1942 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1943 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6); 1944 break; 1945 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1946 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX); 1947 break; 1948 default: 1949 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1950 } 1951 #else /* RSS */ 1952 sendmp->m_pkthdr.flowid = que->msix; 1953 #if __FreeBSD_version >= 1100054 1954 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1955 #else 1956 sendmp->m_flags |= M_FLOWID; 1957 #endif 1958 #endif /* RSS */ 1959 #endif /* FreeBSD_version */ 1960 } 1961 next_desc: 1962 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1963 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1964 1965 /* Advance our pointers to the next descriptor. */ 1966 if (++i == rxr->num_desc) 1967 i = 0; 1968 1969 /* Now send to the stack or do LRO */ 1970 if (sendmp != NULL) { 1971 rxr->next_to_check = i; 1972 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1973 i = rxr->next_to_check; 1974 } 1975 1976 /* Every 8 descriptors we go to refresh mbufs */ 1977 if (processed == 8) { 1978 ixgbe_refresh_mbufs(rxr, i); 1979 processed = 0; 1980 } 1981 } 1982 1983 /* Refresh any remaining buf structs */ 1984 if (ixgbe_rx_unrefreshed(rxr)) 1985 ixgbe_refresh_mbufs(rxr, i); 1986 1987 rxr->next_to_check = i; 1988 1989 /* 1990 * Flush any outstanding LRO work 1991 */ 1992 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1993 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1994 tcp_lro_flush(lro, queued); 1995 } 1996 1997 IXGBE_RX_UNLOCK(rxr); 1998 1999 /* 2000 ** Still have cleaning to do? 2001 */ 2002 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 2003 return (TRUE); 2004 else 2005 return (FALSE); 2006 } 2007 2008 2009 /********************************************************************* 2010 * 2011 * Verify that the hardware indicated that the checksum is valid. 2012 * Inform the stack about the status of checksum so that stack 2013 * doesn't spend time verifying the checksum. 2014 * 2015 *********************************************************************/ 2016 static void 2017 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2018 { 2019 u16 status = (u16) staterr; 2020 u8 errors = (u8) (staterr >> 24); 2021 bool sctp = FALSE; 2022 2023 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2024 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2025 sctp = TRUE; 2026 2027 if (status & IXGBE_RXD_STAT_IPCS) { 2028 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2029 /* IP Checksum Good */ 2030 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 2031 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 2032 2033 } else 2034 mp->m_pkthdr.csum_flags = 0; 2035 } 2036 if (status & IXGBE_RXD_STAT_L4CS) { 2037 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2038 #if __FreeBSD_version >= 800000 2039 if (sctp) 2040 type = CSUM_SCTP_VALID; 2041 #endif 2042 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2043 mp->m_pkthdr.csum_flags |= type; 2044 if (!sctp) 2045 mp->m_pkthdr.csum_data = htons(0xffff); 2046 } 2047 } 2048 return; 2049 } 2050 2051 /******************************************************************** 2052 * Manage DMA'able memory. 2053 *******************************************************************/ 2054 static void 2055 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2056 { 2057 if (error) 2058 return; 2059 *(bus_addr_t *) arg = segs->ds_addr; 2060 return; 2061 } 2062 2063 int 2064 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2065 struct ixgbe_dma_alloc *dma, int mapflags) 2066 { 2067 device_t dev = adapter->dev; 2068 int r; 2069 2070 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2071 DBA_ALIGN, 0, /* alignment, bounds */ 2072 BUS_SPACE_MAXADDR, /* lowaddr */ 2073 BUS_SPACE_MAXADDR, /* highaddr */ 2074 NULL, NULL, /* filter, filterarg */ 2075 size, /* maxsize */ 2076 1, /* nsegments */ 2077 size, /* maxsegsize */ 2078 BUS_DMA_ALLOCNOW, /* flags */ 2079 NULL, /* lockfunc */ 2080 NULL, /* lockfuncarg */ 2081 &dma->dma_tag); 2082 if (r != 0) { 2083 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2084 "error %u\n", r); 2085 goto fail_0; 2086 } 2087 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2088 BUS_DMA_NOWAIT, &dma->dma_map); 2089 if (r != 0) { 2090 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2091 "error %u\n", r); 2092 goto fail_1; 2093 } 2094 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2095 size, 2096 ixgbe_dmamap_cb, 2097 &dma->dma_paddr, 2098 mapflags | BUS_DMA_NOWAIT); 2099 if (r != 0) { 2100 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2101 "error %u\n", r); 2102 goto fail_2; 2103 } 2104 dma->dma_size = size; 2105 return (0); 2106 fail_2: 2107 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2108 fail_1: 2109 bus_dma_tag_destroy(dma->dma_tag); 2110 fail_0: 2111 dma->dma_tag = NULL; 2112 return (r); 2113 } 2114 2115 void 2116 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2117 { 2118 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2119 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2120 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2121 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2122 bus_dma_tag_destroy(dma->dma_tag); 2123 } 2124 2125 2126 /********************************************************************* 2127 * 2128 * Allocate memory for the transmit and receive rings, and then 2129 * the descriptors associated with each, called only once at attach. 2130 * 2131 **********************************************************************/ 2132 int 2133 ixgbe_allocate_queues(struct adapter *adapter) 2134 { 2135 device_t dev = adapter->dev; 2136 struct ix_queue *que; 2137 struct tx_ring *txr; 2138 struct rx_ring *rxr; 2139 int rsize, tsize, error = IXGBE_SUCCESS; 2140 int txconf = 0, rxconf = 0; 2141 #ifdef PCI_IOV 2142 enum ixgbe_iov_mode iov_mode; 2143 #endif 2144 2145 /* First allocate the top level queue structs */ 2146 if (!(adapter->queues = 2147 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2148 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2149 device_printf(dev, "Unable to allocate queue memory\n"); 2150 error = ENOMEM; 2151 goto fail; 2152 } 2153 2154 /* First allocate the TX ring struct memory */ 2155 if (!(adapter->tx_rings = 2156 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2157 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2158 device_printf(dev, "Unable to allocate TX ring memory\n"); 2159 error = ENOMEM; 2160 goto tx_fail; 2161 } 2162 2163 /* Next allocate the RX */ 2164 if (!(adapter->rx_rings = 2165 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2166 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2167 device_printf(dev, "Unable to allocate RX ring memory\n"); 2168 error = ENOMEM; 2169 goto rx_fail; 2170 } 2171 2172 /* For the ring itself */ 2173 tsize = roundup2(adapter->num_tx_desc * 2174 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2175 2176 #ifdef PCI_IOV 2177 iov_mode = ixgbe_get_iov_mode(adapter); 2178 adapter->pool = ixgbe_max_vfs(iov_mode); 2179 #else 2180 adapter->pool = 0; 2181 #endif 2182 /* 2183 * Now set up the TX queues, txconf is needed to handle the 2184 * possibility that things fail midcourse and we need to 2185 * undo memory gracefully 2186 */ 2187 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2188 /* Set up some basics */ 2189 txr = &adapter->tx_rings[i]; 2190 txr->adapter = adapter; 2191 #ifdef PCI_IOV 2192 txr->me = ixgbe_pf_que_index(iov_mode, i); 2193 #else 2194 txr->me = i; 2195 #endif 2196 txr->num_desc = adapter->num_tx_desc; 2197 2198 /* Initialize the TX side lock */ 2199 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2200 device_get_nameunit(dev), txr->me); 2201 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2202 2203 if (ixgbe_dma_malloc(adapter, tsize, 2204 &txr->txdma, BUS_DMA_NOWAIT)) { 2205 device_printf(dev, 2206 "Unable to allocate TX Descriptor memory\n"); 2207 error = ENOMEM; 2208 goto err_tx_desc; 2209 } 2210 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2211 bzero((void *)txr->tx_base, tsize); 2212 2213 /* Now allocate transmit buffers for the ring */ 2214 if (ixgbe_allocate_transmit_buffers(txr)) { 2215 device_printf(dev, 2216 "Critical Failure setting up transmit buffers\n"); 2217 error = ENOMEM; 2218 goto err_tx_desc; 2219 } 2220 #ifndef IXGBE_LEGACY_TX 2221 /* Allocate a buf ring */ 2222 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2223 M_WAITOK, &txr->tx_mtx); 2224 if (txr->br == NULL) { 2225 device_printf(dev, 2226 "Critical Failure setting up buf ring\n"); 2227 error = ENOMEM; 2228 goto err_tx_desc; 2229 } 2230 #endif 2231 } 2232 2233 /* 2234 * Next the RX queues... 2235 */ 2236 rsize = roundup2(adapter->num_rx_desc * 2237 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2238 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2239 rxr = &adapter->rx_rings[i]; 2240 /* Set up some basics */ 2241 rxr->adapter = adapter; 2242 #ifdef PCI_IOV 2243 rxr->me = ixgbe_pf_que_index(iov_mode, i); 2244 #else 2245 rxr->me = i; 2246 #endif 2247 rxr->num_desc = adapter->num_rx_desc; 2248 2249 /* Initialize the RX side lock */ 2250 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2251 device_get_nameunit(dev), rxr->me); 2252 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2253 2254 if (ixgbe_dma_malloc(adapter, rsize, 2255 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2256 device_printf(dev, 2257 "Unable to allocate RxDescriptor memory\n"); 2258 error = ENOMEM; 2259 goto err_rx_desc; 2260 } 2261 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2262 bzero((void *)rxr->rx_base, rsize); 2263 2264 /* Allocate receive buffers for the ring*/ 2265 if (ixgbe_allocate_receive_buffers(rxr)) { 2266 device_printf(dev, 2267 "Critical Failure setting up receive buffers\n"); 2268 error = ENOMEM; 2269 goto err_rx_desc; 2270 } 2271 } 2272 2273 /* 2274 ** Finally set up the queue holding structs 2275 */ 2276 for (int i = 0; i < adapter->num_queues; i++) { 2277 que = &adapter->queues[i]; 2278 que->adapter = adapter; 2279 que->me = i; 2280 que->txr = &adapter->tx_rings[i]; 2281 que->rxr = &adapter->rx_rings[i]; 2282 } 2283 2284 return (0); 2285 2286 err_rx_desc: 2287 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2288 ixgbe_dma_free(adapter, &rxr->rxdma); 2289 err_tx_desc: 2290 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2291 ixgbe_dma_free(adapter, &txr->txdma); 2292 free(adapter->rx_rings, M_DEVBUF); 2293 rx_fail: 2294 free(adapter->tx_rings, M_DEVBUF); 2295 tx_fail: 2296 free(adapter->queues, M_DEVBUF); 2297 fail: 2298 return (error); 2299 } 2300