1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 #ifdef RSS 45 #include <net/rss_config.h> 46 #include <netinet/in_rss.h> 47 #endif 48 49 #ifdef DEV_NETMAP 50 #include <net/netmap.h> 51 #include <sys/selinfo.h> 52 #include <dev/netmap/netmap_kern.h> 53 54 extern int ix_crcstrip; 55 #endif 56 57 /* 58 ** HW RSC control: 59 ** this feature only works with 60 ** IPv4, and only on 82599 and later. 61 ** Also this will cause IP forwarding to 62 ** fail and that can't be controlled by 63 ** the stack as LRO can. For all these 64 ** reasons I've deemed it best to leave 65 ** this off and not bother with a tuneable 66 ** interface, this would need to be compiled 67 ** to enable. 68 */ 69 static bool ixgbe_rsc_enable = FALSE; 70 71 #ifdef IXGBE_FDIR 72 /* 73 ** For Flow Director: this is the 74 ** number of TX packets we sample 75 ** for the filter pool, this means 76 ** every 20th packet will be probed. 77 ** 78 ** This feature can be disabled by 79 ** setting this to 0. 80 */ 81 static int atr_sample_rate = 20; 82 #endif 83 84 /********************************************************************* 85 * Local Function prototypes 86 *********************************************************************/ 87 static void ixgbe_setup_transmit_ring(struct tx_ring *); 88 static void ixgbe_free_transmit_buffers(struct tx_ring *); 89 static int ixgbe_setup_receive_ring(struct rx_ring *); 90 static void ixgbe_free_receive_buffers(struct rx_ring *); 91 92 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 93 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 94 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 95 static int ixgbe_tx_ctx_setup(struct tx_ring *, 96 struct mbuf *, u32 *, u32 *); 97 static int ixgbe_tso_setup(struct tx_ring *, 98 struct mbuf *, u32 *, u32 *); 99 #ifdef IXGBE_FDIR 100 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 101 #endif 102 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 104 struct mbuf *, u32); 105 106 #ifdef IXGBE_LEGACY_TX 107 /********************************************************************* 108 * Transmit entry point 109 * 110 * ixgbe_start is called by the stack to initiate a transmit. 111 * The driver will remain in this routine as long as there are 112 * packets to transmit and transmit resources are available. 113 * In case resources are not available stack is notified and 114 * the packet is requeued. 115 **********************************************************************/ 116 117 void 118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 119 { 120 struct mbuf *m_head; 121 struct adapter *adapter = txr->adapter; 122 123 IXGBE_TX_LOCK_ASSERT(txr); 124 125 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 126 return; 127 if (!adapter->link_active) 128 return; 129 130 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 131 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 132 break; 133 134 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 135 if (m_head == NULL) 136 break; 137 138 if (ixgbe_xmit(txr, &m_head)) { 139 if (m_head != NULL) 140 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 141 break; 142 } 143 /* Send a copy of the frame to the BPF listener */ 144 ETHER_BPF_MTAP(ifp, m_head); 145 } 146 return; 147 } 148 149 /* 150 * Legacy TX start - called by the stack, this 151 * always uses the first tx ring, and should 152 * not be used with multiqueue tx enabled. 153 */ 154 void 155 ixgbe_start(struct ifnet *ifp) 156 { 157 struct adapter *adapter = ifp->if_softc; 158 struct tx_ring *txr = adapter->tx_rings; 159 160 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 161 IXGBE_TX_LOCK(txr); 162 ixgbe_start_locked(txr, ifp); 163 IXGBE_TX_UNLOCK(txr); 164 } 165 return; 166 } 167 168 #else /* ! IXGBE_LEGACY_TX */ 169 170 /* 171 ** Multiqueue Transmit Entry Point 172 ** (if_transmit function) 173 */ 174 int 175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 176 { 177 struct adapter *adapter = ifp->if_softc; 178 struct ix_queue *que; 179 struct tx_ring *txr; 180 int i, err = 0; 181 #ifdef RSS 182 uint32_t bucket_id; 183 #endif 184 185 /* 186 * When doing RSS, map it to the same outbound queue 187 * as the incoming flow would be mapped to. 188 * 189 * If everything is setup correctly, it should be the 190 * same bucket that the current CPU we're on is. 191 */ 192 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 193 #ifdef RSS 194 if (rss_hash2bucket(m->m_pkthdr.flowid, 195 M_HASHTYPE_GET(m), &bucket_id) == 0) { 196 i = bucket_id % adapter->num_queues; 197 #ifdef IXGBE_DEBUG 198 if (bucket_id > adapter->num_queues) 199 if_printf(ifp, "bucket_id (%d) > num_queues " 200 "(%d)\n", bucket_id, adapter->num_queues); 201 #endif 202 } else 203 #endif 204 i = m->m_pkthdr.flowid % adapter->num_queues; 205 } else 206 i = curcpu % adapter->num_queues; 207 208 /* Check for a hung queue and pick alternative */ 209 if (((1 << i) & adapter->active_queues) == 0) 210 i = ffsl(adapter->active_queues); 211 212 txr = &adapter->tx_rings[i]; 213 que = &adapter->queues[i]; 214 215 err = drbr_enqueue(ifp, txr->br, m); 216 if (err) 217 return (err); 218 if (IXGBE_TX_TRYLOCK(txr)) { 219 ixgbe_mq_start_locked(ifp, txr); 220 IXGBE_TX_UNLOCK(txr); 221 } else 222 taskqueue_enqueue(que->tq, &txr->txq_task); 223 224 return (0); 225 } 226 227 int 228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 229 { 230 struct adapter *adapter = txr->adapter; 231 struct mbuf *next; 232 int enqueued = 0, err = 0; 233 234 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 235 adapter->link_active == 0) 236 return (ENETDOWN); 237 238 /* Process the queue */ 239 #if __FreeBSD_version < 901504 240 next = drbr_dequeue(ifp, txr->br); 241 while (next != NULL) { 242 if ((err = ixgbe_xmit(txr, &next)) != 0) { 243 if (next != NULL) 244 err = drbr_enqueue(ifp, txr->br, next); 245 #else 246 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 247 if ((err = ixgbe_xmit(txr, &next)) != 0) { 248 if (next == NULL) { 249 drbr_advance(ifp, txr->br); 250 } else { 251 drbr_putback(ifp, txr->br, next); 252 } 253 #endif 254 break; 255 } 256 #if __FreeBSD_version >= 901504 257 drbr_advance(ifp, txr->br); 258 #endif 259 enqueued++; 260 #if 0 // this is VF-only 261 #if __FreeBSD_version >= 1100036 262 /* 263 * Since we're looking at the tx ring, we can check 264 * to see if we're a VF by examing our tail register 265 * address. 266 */ 267 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 268 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 269 #endif 270 #endif 271 /* Send a copy of the frame to the BPF listener */ 272 ETHER_BPF_MTAP(ifp, next); 273 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 274 break; 275 #if __FreeBSD_version < 901504 276 next = drbr_dequeue(ifp, txr->br); 277 #endif 278 } 279 280 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 281 ixgbe_txeof(txr); 282 283 return (err); 284 } 285 286 /* 287 * Called from a taskqueue to drain queued transmit packets. 288 */ 289 void 290 ixgbe_deferred_mq_start(void *arg, int pending) 291 { 292 struct tx_ring *txr = arg; 293 struct adapter *adapter = txr->adapter; 294 struct ifnet *ifp = adapter->ifp; 295 296 IXGBE_TX_LOCK(txr); 297 if (!drbr_empty(ifp, txr->br)) 298 ixgbe_mq_start_locked(ifp, txr); 299 IXGBE_TX_UNLOCK(txr); 300 } 301 302 /* 303 * Flush all ring buffers 304 */ 305 void 306 ixgbe_qflush(struct ifnet *ifp) 307 { 308 struct adapter *adapter = ifp->if_softc; 309 struct tx_ring *txr = adapter->tx_rings; 310 struct mbuf *m; 311 312 for (int i = 0; i < adapter->num_queues; i++, txr++) { 313 IXGBE_TX_LOCK(txr); 314 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 315 m_freem(m); 316 IXGBE_TX_UNLOCK(txr); 317 } 318 if_qflush(ifp); 319 } 320 #endif /* IXGBE_LEGACY_TX */ 321 322 323 /********************************************************************* 324 * 325 * This routine maps the mbufs to tx descriptors, allowing the 326 * TX engine to transmit the packets. 327 * - return 0 on success, positive on failure 328 * 329 **********************************************************************/ 330 331 static int 332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 333 { 334 struct adapter *adapter = txr->adapter; 335 u32 olinfo_status = 0, cmd_type_len; 336 int i, j, error, nsegs; 337 int first; 338 bool remap = TRUE; 339 struct mbuf *m_head; 340 bus_dma_segment_t segs[adapter->num_segs]; 341 bus_dmamap_t map; 342 struct ixgbe_tx_buf *txbuf; 343 union ixgbe_adv_tx_desc *txd = NULL; 344 345 m_head = *m_headp; 346 347 /* Basic descriptor defines */ 348 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 349 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 350 351 if (m_head->m_flags & M_VLANTAG) 352 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 353 354 /* 355 * Important to capture the first descriptor 356 * used because it will contain the index of 357 * the one we tell the hardware to report back 358 */ 359 first = txr->next_avail_desc; 360 txbuf = &txr->tx_buffers[first]; 361 map = txbuf->map; 362 363 /* 364 * Map the packet for DMA. 365 */ 366 retry: 367 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 368 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 369 370 if (__predict_false(error)) { 371 struct mbuf *m; 372 373 switch (error) { 374 case EFBIG: 375 /* Try it again? - one try */ 376 if (remap == TRUE) { 377 remap = FALSE; 378 /* 379 * XXX: m_defrag will choke on 380 * non-MCLBYTES-sized clusters 381 */ 382 m = m_defrag(*m_headp, M_NOWAIT); 383 if (m == NULL) { 384 adapter->mbuf_defrag_failed++; 385 m_freem(*m_headp); 386 *m_headp = NULL; 387 return (ENOBUFS); 388 } 389 *m_headp = m; 390 goto retry; 391 } else 392 return (error); 393 case ENOMEM: 394 txr->no_tx_dma_setup++; 395 return (error); 396 default: 397 txr->no_tx_dma_setup++; 398 m_freem(*m_headp); 399 *m_headp = NULL; 400 return (error); 401 } 402 } 403 404 /* Make certain there are enough descriptors */ 405 if (txr->tx_avail < (nsegs + 2)) { 406 txr->no_desc_avail++; 407 bus_dmamap_unload(txr->txtag, map); 408 return (ENOBUFS); 409 } 410 m_head = *m_headp; 411 412 /* 413 * Set up the appropriate offload context 414 * this will consume the first descriptor 415 */ 416 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 417 if (__predict_false(error)) { 418 if (error == ENOBUFS) 419 *m_headp = NULL; 420 return (error); 421 } 422 423 #ifdef IXGBE_FDIR 424 /* Do the flow director magic */ 425 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 426 ++txr->atr_count; 427 if (txr->atr_count >= atr_sample_rate) { 428 ixgbe_atr(txr, m_head); 429 txr->atr_count = 0; 430 } 431 } 432 #endif 433 434 olinfo_status |= IXGBE_ADVTXD_CC; 435 i = txr->next_avail_desc; 436 for (j = 0; j < nsegs; j++) { 437 bus_size_t seglen; 438 bus_addr_t segaddr; 439 440 txbuf = &txr->tx_buffers[i]; 441 txd = &txr->tx_base[i]; 442 seglen = segs[j].ds_len; 443 segaddr = htole64(segs[j].ds_addr); 444 445 txd->read.buffer_addr = segaddr; 446 txd->read.cmd_type_len = htole32(txr->txd_cmd | 447 cmd_type_len |seglen); 448 txd->read.olinfo_status = htole32(olinfo_status); 449 450 if (++i == txr->num_desc) 451 i = 0; 452 } 453 454 txd->read.cmd_type_len |= 455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 456 txr->tx_avail -= nsegs; 457 txr->next_avail_desc = i; 458 459 txbuf->m_head = m_head; 460 /* 461 * Here we swap the map so the last descriptor, 462 * which gets the completion interrupt has the 463 * real map, and the first descriptor gets the 464 * unused map from this descriptor. 465 */ 466 txr->tx_buffers[first].map = txbuf->map; 467 txbuf->map = map; 468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 469 470 /* Set the EOP descriptor that will be marked done */ 471 txbuf = &txr->tx_buffers[first]; 472 txbuf->eop = txd; 473 474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 476 /* 477 * Advance the Transmit Descriptor Tail (Tdt), this tells the 478 * hardware that this frame is available to transmit. 479 */ 480 ++txr->total_packets; 481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 482 483 /* Mark queue as having work */ 484 if (txr->busy == 0) 485 txr->busy = 1; 486 487 return (0); 488 } 489 490 491 /********************************************************************* 492 * 493 * Allocate memory for tx_buffer structures. The tx_buffer stores all 494 * the information needed to transmit a packet on the wire. This is 495 * called only once at attach, setup is done every reset. 496 * 497 **********************************************************************/ 498 int 499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 500 { 501 struct adapter *adapter = txr->adapter; 502 device_t dev = adapter->dev; 503 struct ixgbe_tx_buf *txbuf; 504 int error, i; 505 506 /* 507 * Setup DMA descriptor areas. 508 */ 509 if ((error = bus_dma_tag_create( 510 bus_get_dma_tag(adapter->dev), /* parent */ 511 1, 0, /* alignment, bounds */ 512 BUS_SPACE_MAXADDR, /* lowaddr */ 513 BUS_SPACE_MAXADDR, /* highaddr */ 514 NULL, NULL, /* filter, filterarg */ 515 IXGBE_TSO_SIZE, /* maxsize */ 516 adapter->num_segs, /* nsegments */ 517 PAGE_SIZE, /* maxsegsize */ 518 0, /* flags */ 519 NULL, /* lockfunc */ 520 NULL, /* lockfuncarg */ 521 &txr->txtag))) { 522 device_printf(dev,"Unable to allocate TX DMA tag\n"); 523 goto fail; 524 } 525 526 if (!(txr->tx_buffers = 527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 529 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 530 error = ENOMEM; 531 goto fail; 532 } 533 534 /* Create the descriptor buffer dma maps */ 535 txbuf = txr->tx_buffers; 536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 538 if (error != 0) { 539 device_printf(dev, "Unable to create TX DMA map\n"); 540 goto fail; 541 } 542 } 543 544 return 0; 545 fail: 546 /* We free all, it handles case where we are in the middle */ 547 ixgbe_free_transmit_structures(adapter); 548 return (error); 549 } 550 551 /********************************************************************* 552 * 553 * Initialize a transmit ring. 554 * 555 **********************************************************************/ 556 static void 557 ixgbe_setup_transmit_ring(struct tx_ring *txr) 558 { 559 struct adapter *adapter = txr->adapter; 560 struct ixgbe_tx_buf *txbuf; 561 #ifdef DEV_NETMAP 562 struct netmap_adapter *na = NA(adapter->ifp); 563 struct netmap_slot *slot; 564 #endif /* DEV_NETMAP */ 565 566 /* Clear the old ring contents */ 567 IXGBE_TX_LOCK(txr); 568 #ifdef DEV_NETMAP 569 /* 570 * (under lock): if in netmap mode, do some consistency 571 * checks and set slot to entry 0 of the netmap ring. 572 */ 573 slot = netmap_reset(na, NR_TX, txr->me, 0); 574 #endif /* DEV_NETMAP */ 575 bzero((void *)txr->tx_base, 576 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 577 /* Reset indices */ 578 txr->next_avail_desc = 0; 579 txr->next_to_clean = 0; 580 581 /* Free any existing tx buffers. */ 582 txbuf = txr->tx_buffers; 583 for (int i = 0; i < txr->num_desc; i++, txbuf++) { 584 if (txbuf->m_head != NULL) { 585 bus_dmamap_sync(txr->txtag, txbuf->map, 586 BUS_DMASYNC_POSTWRITE); 587 bus_dmamap_unload(txr->txtag, txbuf->map); 588 m_freem(txbuf->m_head); 589 txbuf->m_head = NULL; 590 } 591 #ifdef DEV_NETMAP 592 /* 593 * In netmap mode, set the map for the packet buffer. 594 * NOTE: Some drivers (not this one) also need to set 595 * the physical buffer address in the NIC ring. 596 * Slots in the netmap ring (indexed by "si") are 597 * kring->nkr_hwofs positions "ahead" wrt the 598 * corresponding slot in the NIC ring. In some drivers 599 * (not here) nkr_hwofs can be negative. Function 600 * netmap_idx_n2k() handles wraparounds properly. 601 */ 602 if (slot) { 603 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 604 netmap_load_map(na, txr->txtag, 605 txbuf->map, NMB(na, slot + si)); 606 } 607 #endif /* DEV_NETMAP */ 608 /* Clear the EOP descriptor pointer */ 609 txbuf->eop = NULL; 610 } 611 612 #ifdef IXGBE_FDIR 613 /* Set the rate at which we sample packets */ 614 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 615 txr->atr_sample = atr_sample_rate; 616 #endif 617 618 /* Set number of descriptors available */ 619 txr->tx_avail = adapter->num_tx_desc; 620 621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 623 IXGBE_TX_UNLOCK(txr); 624 } 625 626 /********************************************************************* 627 * 628 * Initialize all transmit rings. 629 * 630 **********************************************************************/ 631 int 632 ixgbe_setup_transmit_structures(struct adapter *adapter) 633 { 634 struct tx_ring *txr = adapter->tx_rings; 635 636 for (int i = 0; i < adapter->num_queues; i++, txr++) 637 ixgbe_setup_transmit_ring(txr); 638 639 return (0); 640 } 641 642 /********************************************************************* 643 * 644 * Free all transmit rings. 645 * 646 **********************************************************************/ 647 void 648 ixgbe_free_transmit_structures(struct adapter *adapter) 649 { 650 struct tx_ring *txr = adapter->tx_rings; 651 652 for (int i = 0; i < adapter->num_queues; i++, txr++) { 653 IXGBE_TX_LOCK(txr); 654 ixgbe_free_transmit_buffers(txr); 655 ixgbe_dma_free(adapter, &txr->txdma); 656 IXGBE_TX_UNLOCK(txr); 657 IXGBE_TX_LOCK_DESTROY(txr); 658 } 659 free(adapter->tx_rings, M_DEVBUF); 660 } 661 662 /********************************************************************* 663 * 664 * Free transmit ring related data structures. 665 * 666 **********************************************************************/ 667 static void 668 ixgbe_free_transmit_buffers(struct tx_ring *txr) 669 { 670 struct adapter *adapter = txr->adapter; 671 struct ixgbe_tx_buf *tx_buffer; 672 int i; 673 674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 675 676 if (txr->tx_buffers == NULL) 677 return; 678 679 tx_buffer = txr->tx_buffers; 680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 681 if (tx_buffer->m_head != NULL) { 682 bus_dmamap_sync(txr->txtag, tx_buffer->map, 683 BUS_DMASYNC_POSTWRITE); 684 bus_dmamap_unload(txr->txtag, 685 tx_buffer->map); 686 m_freem(tx_buffer->m_head); 687 tx_buffer->m_head = NULL; 688 if (tx_buffer->map != NULL) { 689 bus_dmamap_destroy(txr->txtag, 690 tx_buffer->map); 691 tx_buffer->map = NULL; 692 } 693 } else if (tx_buffer->map != NULL) { 694 bus_dmamap_unload(txr->txtag, 695 tx_buffer->map); 696 bus_dmamap_destroy(txr->txtag, 697 tx_buffer->map); 698 tx_buffer->map = NULL; 699 } 700 } 701 #ifdef IXGBE_LEGACY_TX 702 if (txr->br != NULL) 703 buf_ring_free(txr->br, M_DEVBUF); 704 #endif 705 if (txr->tx_buffers != NULL) { 706 free(txr->tx_buffers, M_DEVBUF); 707 txr->tx_buffers = NULL; 708 } 709 if (txr->txtag != NULL) { 710 bus_dma_tag_destroy(txr->txtag); 711 txr->txtag = NULL; 712 } 713 return; 714 } 715 716 /********************************************************************* 717 * 718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 719 * 720 **********************************************************************/ 721 722 static int 723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 724 u32 *cmd_type_len, u32 *olinfo_status) 725 { 726 struct adapter *adapter = txr->adapter; 727 struct ixgbe_adv_tx_context_desc *TXD; 728 struct ether_vlan_header *eh; 729 #ifdef INET 730 struct ip *ip; 731 #endif 732 #ifdef INET6 733 struct ip6_hdr *ip6; 734 #endif 735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 736 int ehdrlen, ip_hlen = 0; 737 u16 etype; 738 u8 ipproto = 0; 739 int offload = TRUE; 740 int ctxd = txr->next_avail_desc; 741 u16 vtag = 0; 742 caddr_t l3d; 743 744 745 /* First check if TSO is to be used */ 746 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO)) 747 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 748 749 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 750 offload = FALSE; 751 752 /* Indicate the whole packet as payload when not doing TSO */ 753 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 754 755 /* Now ready a context descriptor */ 756 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 757 758 /* 759 ** In advanced descriptors the vlan tag must 760 ** be placed into the context descriptor. Hence 761 ** we need to make one even if not doing offloads. 762 */ 763 if (mp->m_flags & M_VLANTAG) { 764 vtag = htole16(mp->m_pkthdr.ether_vtag); 765 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 766 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 767 return (0); 768 769 /* 770 * Determine where frame payload starts. 771 * Jump over vlan headers if already present, 772 * helpful for QinQ too. 773 */ 774 eh = mtod(mp, struct ether_vlan_header *); 775 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 776 etype = ntohs(eh->evl_proto); 777 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 778 } else { 779 etype = ntohs(eh->evl_encap_proto); 780 ehdrlen = ETHER_HDR_LEN; 781 } 782 783 /* Set the ether header length */ 784 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 785 786 if (offload == FALSE) 787 goto no_offloads; 788 789 /* 790 * If the first mbuf only includes the ethernet header, jump to the next one 791 * XXX: This assumes the stack splits mbufs containing headers on header boundaries 792 * XXX: And assumes the entire IP header is contained in one mbuf 793 */ 794 if (mp->m_len == ehdrlen && mp->m_next) 795 l3d = mtod(mp->m_next, caddr_t); 796 else 797 l3d = mtod(mp, caddr_t) + ehdrlen; 798 799 switch (etype) { 800 #ifdef INET 801 case ETHERTYPE_IP: 802 ip = (struct ip *)(l3d); 803 ip_hlen = ip->ip_hl << 2; 804 ipproto = ip->ip_p; 805 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 806 /* Insert IPv4 checksum into data descriptors */ 807 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 808 ip->ip_sum = 0; 809 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 810 } 811 break; 812 #endif 813 #ifdef INET6 814 case ETHERTYPE_IPV6: 815 ip6 = (struct ip6_hdr *)(l3d); 816 ip_hlen = sizeof(struct ip6_hdr); 817 ipproto = ip6->ip6_nxt; 818 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 819 break; 820 #endif 821 default: 822 offload = FALSE; 823 break; 824 } 825 826 vlan_macip_lens |= ip_hlen; 827 828 /* No support for offloads for non-L4 next headers */ 829 switch (ipproto) { 830 case IPPROTO_TCP: 831 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 832 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 833 else 834 offload = false; 835 break; 836 case IPPROTO_UDP: 837 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) 838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 839 else 840 offload = false; 841 break; 842 case IPPROTO_SCTP: 843 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) 844 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 845 else 846 offload = false; 847 break; 848 default: 849 offload = false; 850 break; 851 } 852 853 if (offload) /* Insert L4 checksum into data descriptors */ 854 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 855 856 no_offloads: 857 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 858 859 /* Now copy bits into descriptor */ 860 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 861 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 862 TXD->seqnum_seed = htole32(0); 863 TXD->mss_l4len_idx = htole32(0); 864 865 /* We've consumed the first desc, adjust counters */ 866 if (++ctxd == txr->num_desc) 867 ctxd = 0; 868 txr->next_avail_desc = ctxd; 869 --txr->tx_avail; 870 871 return (0); 872 } 873 874 /********************************************************************** 875 * 876 * Setup work for hardware segmentation offload (TSO) on 877 * adapters using advanced tx descriptors 878 * 879 **********************************************************************/ 880 static int 881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 882 u32 *cmd_type_len, u32 *olinfo_status) 883 { 884 struct ixgbe_adv_tx_context_desc *TXD; 885 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 886 u32 mss_l4len_idx = 0, paylen; 887 u16 vtag = 0, eh_type; 888 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 889 struct ether_vlan_header *eh; 890 #ifdef INET6 891 struct ip6_hdr *ip6; 892 #endif 893 #ifdef INET 894 struct ip *ip; 895 #endif 896 struct tcphdr *th; 897 898 /* 899 * Determine where frame payload starts. 900 * Jump over vlan headers if already present 901 */ 902 eh = mtod(mp, struct ether_vlan_header *); 903 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 904 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 905 eh_type = eh->evl_proto; 906 } else { 907 ehdrlen = ETHER_HDR_LEN; 908 eh_type = eh->evl_encap_proto; 909 } 910 911 switch (ntohs(eh_type)) { 912 #ifdef INET6 913 case ETHERTYPE_IPV6: 914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 915 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 916 if (ip6->ip6_nxt != IPPROTO_TCP) 917 return (ENXIO); 918 ip_hlen = sizeof(struct ip6_hdr); 919 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 920 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 921 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 922 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 923 break; 924 #endif 925 #ifdef INET 926 case ETHERTYPE_IP: 927 ip = (struct ip *)(mp->m_data + ehdrlen); 928 if (ip->ip_p != IPPROTO_TCP) 929 return (ENXIO); 930 ip->ip_sum = 0; 931 ip_hlen = ip->ip_hl << 2; 932 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 933 th->th_sum = in_pseudo(ip->ip_src.s_addr, 934 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 935 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 936 /* Tell transmit desc to also do IPv4 checksum. */ 937 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 938 break; 939 #endif 940 default: 941 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 942 __func__, ntohs(eh_type)); 943 break; 944 } 945 946 ctxd = txr->next_avail_desc; 947 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 948 949 tcp_hlen = th->th_off << 2; 950 951 /* This is used in the transmit desc in encap */ 952 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 953 954 /* VLAN MACLEN IPLEN */ 955 if (mp->m_flags & M_VLANTAG) { 956 vtag = htole16(mp->m_pkthdr.ether_vtag); 957 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 958 } 959 960 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 961 vlan_macip_lens |= ip_hlen; 962 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 963 964 /* ADV DTYPE TUCMD */ 965 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 966 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 967 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 968 969 /* MSS L4LEN IDX */ 970 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 971 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 972 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 973 974 TXD->seqnum_seed = htole32(0); 975 976 if (++ctxd == txr->num_desc) 977 ctxd = 0; 978 979 txr->tx_avail--; 980 txr->next_avail_desc = ctxd; 981 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 982 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 983 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 984 ++txr->tso_tx; 985 return (0); 986 } 987 988 989 /********************************************************************** 990 * 991 * Examine each tx_buffer in the used queue. If the hardware is done 992 * processing the packet then free associated resources. The 993 * tx_buffer is put back on the free queue. 994 * 995 **********************************************************************/ 996 void 997 ixgbe_txeof(struct tx_ring *txr) 998 { 999 struct adapter *adapter = txr->adapter; 1000 #ifdef DEV_NETMAP 1001 struct ifnet *ifp = adapter->ifp; 1002 #endif 1003 u32 work, processed = 0; 1004 u32 limit = adapter->tx_process_limit; 1005 struct ixgbe_tx_buf *buf; 1006 union ixgbe_adv_tx_desc *txd; 1007 1008 mtx_assert(&txr->tx_mtx, MA_OWNED); 1009 1010 #ifdef DEV_NETMAP 1011 if (ifp->if_capenable & IFCAP_NETMAP) { 1012 struct netmap_adapter *na = NA(ifp); 1013 struct netmap_kring *kring = &na->tx_rings[txr->me]; 1014 txd = txr->tx_base; 1015 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1016 BUS_DMASYNC_POSTREAD); 1017 /* 1018 * In netmap mode, all the work is done in the context 1019 * of the client thread. Interrupt handlers only wake up 1020 * clients, which may be sleeping on individual rings 1021 * or on a global resource for all rings. 1022 * To implement tx interrupt mitigation, we wake up the client 1023 * thread roughly every half ring, even if the NIC interrupts 1024 * more frequently. This is implemented as follows: 1025 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1026 * the slot that should wake up the thread (nkr_num_slots 1027 * means the user thread should not be woken up); 1028 * - the driver ignores tx interrupts unless netmap_mitigate=0 1029 * or the slot has the DD bit set. 1030 */ 1031 if (!netmap_mitigate || 1032 (kring->nr_kflags < kring->nkr_num_slots && 1033 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1034 netmap_tx_irq(ifp, txr->me); 1035 } 1036 return; 1037 } 1038 #endif /* DEV_NETMAP */ 1039 1040 if (txr->tx_avail == txr->num_desc) { 1041 txr->busy = 0; 1042 return; 1043 } 1044 1045 /* Get work starting point */ 1046 work = txr->next_to_clean; 1047 buf = &txr->tx_buffers[work]; 1048 txd = &txr->tx_base[work]; 1049 work -= txr->num_desc; /* The distance to ring end */ 1050 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1051 BUS_DMASYNC_POSTREAD); 1052 1053 do { 1054 union ixgbe_adv_tx_desc *eop = buf->eop; 1055 if (eop == NULL) /* No work */ 1056 break; 1057 1058 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1059 break; /* I/O not complete */ 1060 1061 if (buf->m_head) { 1062 txr->bytes += 1063 buf->m_head->m_pkthdr.len; 1064 bus_dmamap_sync(txr->txtag, 1065 buf->map, 1066 BUS_DMASYNC_POSTWRITE); 1067 bus_dmamap_unload(txr->txtag, 1068 buf->map); 1069 m_freem(buf->m_head); 1070 buf->m_head = NULL; 1071 } 1072 buf->eop = NULL; 1073 ++txr->tx_avail; 1074 1075 /* We clean the range if multi segment */ 1076 while (txd != eop) { 1077 ++txd; 1078 ++buf; 1079 ++work; 1080 /* wrap the ring? */ 1081 if (__predict_false(!work)) { 1082 work -= txr->num_desc; 1083 buf = txr->tx_buffers; 1084 txd = txr->tx_base; 1085 } 1086 if (buf->m_head) { 1087 txr->bytes += 1088 buf->m_head->m_pkthdr.len; 1089 bus_dmamap_sync(txr->txtag, 1090 buf->map, 1091 BUS_DMASYNC_POSTWRITE); 1092 bus_dmamap_unload(txr->txtag, 1093 buf->map); 1094 m_freem(buf->m_head); 1095 buf->m_head = NULL; 1096 } 1097 ++txr->tx_avail; 1098 buf->eop = NULL; 1099 1100 } 1101 ++txr->packets; 1102 ++processed; 1103 1104 /* Try the next packet */ 1105 ++txd; 1106 ++buf; 1107 ++work; 1108 /* reset with a wrap */ 1109 if (__predict_false(!work)) { 1110 work -= txr->num_desc; 1111 buf = txr->tx_buffers; 1112 txd = txr->tx_base; 1113 } 1114 prefetch(txd); 1115 } while (__predict_true(--limit)); 1116 1117 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1118 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1119 1120 work += txr->num_desc; 1121 txr->next_to_clean = work; 1122 1123 /* 1124 ** Queue Hang detection, we know there's 1125 ** work outstanding or the first return 1126 ** would have been taken, so increment busy 1127 ** if nothing managed to get cleaned, then 1128 ** in local_timer it will be checked and 1129 ** marked as HUNG if it exceeds a MAX attempt. 1130 */ 1131 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1132 ++txr->busy; 1133 /* 1134 ** If anything gets cleaned we reset state to 1, 1135 ** note this will turn off HUNG if its set. 1136 */ 1137 if (processed) 1138 txr->busy = 1; 1139 1140 if (txr->tx_avail == txr->num_desc) 1141 txr->busy = 0; 1142 1143 return; 1144 } 1145 1146 1147 #ifdef IXGBE_FDIR 1148 /* 1149 ** This routine parses packet headers so that Flow 1150 ** Director can make a hashed filter table entry 1151 ** allowing traffic flows to be identified and kept 1152 ** on the same cpu. This would be a performance 1153 ** hit, but we only do it at IXGBE_FDIR_RATE of 1154 ** packets. 1155 */ 1156 static void 1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1158 { 1159 struct adapter *adapter = txr->adapter; 1160 struct ix_queue *que; 1161 struct ip *ip; 1162 struct tcphdr *th; 1163 struct udphdr *uh; 1164 struct ether_vlan_header *eh; 1165 union ixgbe_atr_hash_dword input = {.dword = 0}; 1166 union ixgbe_atr_hash_dword common = {.dword = 0}; 1167 int ehdrlen, ip_hlen; 1168 u16 etype; 1169 1170 eh = mtod(mp, struct ether_vlan_header *); 1171 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1172 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1173 etype = eh->evl_proto; 1174 } else { 1175 ehdrlen = ETHER_HDR_LEN; 1176 etype = eh->evl_encap_proto; 1177 } 1178 1179 /* Only handling IPv4 */ 1180 if (etype != htons(ETHERTYPE_IP)) 1181 return; 1182 1183 ip = (struct ip *)(mp->m_data + ehdrlen); 1184 ip_hlen = ip->ip_hl << 2; 1185 1186 /* check if we're UDP or TCP */ 1187 switch (ip->ip_p) { 1188 case IPPROTO_TCP: 1189 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 1190 /* src and dst are inverted */ 1191 common.port.dst ^= th->th_sport; 1192 common.port.src ^= th->th_dport; 1193 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1194 break; 1195 case IPPROTO_UDP: 1196 uh = (struct udphdr *)((caddr_t)ip + ip_hlen); 1197 /* src and dst are inverted */ 1198 common.port.dst ^= uh->uh_sport; 1199 common.port.src ^= uh->uh_dport; 1200 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1201 break; 1202 default: 1203 return; 1204 } 1205 1206 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1207 if (mp->m_pkthdr.ether_vtag) 1208 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1209 else 1210 common.flex_bytes ^= etype; 1211 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1212 1213 que = &adapter->queues[txr->me]; 1214 /* 1215 ** This assumes the Rx queue and Tx 1216 ** queue are bound to the same CPU 1217 */ 1218 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1219 input, common, que->msix); 1220 } 1221 #endif /* IXGBE_FDIR */ 1222 1223 /* 1224 ** Used to detect a descriptor that has 1225 ** been merged by Hardware RSC. 1226 */ 1227 static inline u32 1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1229 { 1230 return (le32toh(rx->wb.lower.lo_dword.data) & 1231 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1232 } 1233 1234 /********************************************************************* 1235 * 1236 * Initialize Hardware RSC (LRO) feature on 82599 1237 * for an RX ring, this is toggled by the LRO capability 1238 * even though it is transparent to the stack. 1239 * 1240 * NOTE: since this HW feature only works with IPV4 and 1241 * our testing has shown soft LRO to be as effective 1242 * I have decided to disable this by default. 1243 * 1244 **********************************************************************/ 1245 static void 1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1247 { 1248 struct adapter *adapter = rxr->adapter; 1249 struct ixgbe_hw *hw = &adapter->hw; 1250 u32 rscctrl, rdrxctl; 1251 1252 /* If turning LRO/RSC off we need to disable it */ 1253 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1254 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1255 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1256 return; 1257 } 1258 1259 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1260 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1262 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1263 #endif /* DEV_NETMAP */ 1264 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1265 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1266 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1267 1268 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1269 rscctrl |= IXGBE_RSCCTL_RSCEN; 1270 /* 1271 ** Limit the total number of descriptors that 1272 ** can be combined, so it does not exceed 64K 1273 */ 1274 if (rxr->mbuf_sz == MCLBYTES) 1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1276 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1277 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1278 else if (rxr->mbuf_sz == MJUM9BYTES) 1279 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1280 else /* Using 16K cluster */ 1281 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1282 1283 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1284 1285 /* Enable TCP header recognition */ 1286 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1287 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1288 IXGBE_PSRTYPE_TCPHDR)); 1289 1290 /* Disable RSC for ACK packets */ 1291 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1292 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1293 1294 rxr->hw_rsc = TRUE; 1295 } 1296 1297 /********************************************************************* 1298 * 1299 * Refresh mbuf buffers for RX descriptor rings 1300 * - now keeps its own state so discards due to resource 1301 * exhaustion are unnecessary, if an mbuf cannot be obtained 1302 * it just returns, keeping its placeholder, thus it can simply 1303 * be recalled to try again. 1304 * 1305 **********************************************************************/ 1306 static void 1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1308 { 1309 struct adapter *adapter = rxr->adapter; 1310 bus_dma_segment_t seg[1]; 1311 struct ixgbe_rx_buf *rxbuf; 1312 struct mbuf *mp; 1313 int i, j, nsegs, error; 1314 bool refreshed = FALSE; 1315 1316 i = j = rxr->next_to_refresh; 1317 /* Control the loop with one beyond */ 1318 if (++j == rxr->num_desc) 1319 j = 0; 1320 1321 while (j != limit) { 1322 rxbuf = &rxr->rx_buffers[i]; 1323 if (rxbuf->buf == NULL) { 1324 mp = m_getjcl(M_NOWAIT, MT_DATA, 1325 M_PKTHDR, rxr->mbuf_sz); 1326 if (mp == NULL) 1327 goto update; 1328 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1329 m_adj(mp, ETHER_ALIGN); 1330 } else 1331 mp = rxbuf->buf; 1332 1333 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1334 1335 /* If we're dealing with an mbuf that was copied rather 1336 * than replaced, there's no need to go through busdma. 1337 */ 1338 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1339 /* Get the memory mapping */ 1340 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1341 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1342 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); 1343 if (error != 0) { 1344 printf("Refresh mbufs: payload dmamap load" 1345 " failure - %d\n", error); 1346 m_free(mp); 1347 rxbuf->buf = NULL; 1348 goto update; 1349 } 1350 rxbuf->buf = mp; 1351 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1352 BUS_DMASYNC_PREREAD); 1353 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1354 htole64(seg[0].ds_addr); 1355 } else { 1356 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1357 rxbuf->flags &= ~IXGBE_RX_COPY; 1358 } 1359 1360 refreshed = TRUE; 1361 /* Next is precalculated */ 1362 i = j; 1363 rxr->next_to_refresh = i; 1364 if (++j == rxr->num_desc) 1365 j = 0; 1366 } 1367 update: 1368 if (refreshed) /* Update hardware tail index */ 1369 IXGBE_WRITE_REG(&adapter->hw, 1370 rxr->tail, rxr->next_to_refresh); 1371 return; 1372 } 1373 1374 /********************************************************************* 1375 * 1376 * Allocate memory for rx_buffer structures. Since we use one 1377 * rx_buffer per received packet, the maximum number of rx_buffer's 1378 * that we'll need is equal to the number of receive descriptors 1379 * that we've allocated. 1380 * 1381 **********************************************************************/ 1382 int 1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1384 { 1385 struct adapter *adapter = rxr->adapter; 1386 device_t dev = adapter->dev; 1387 struct ixgbe_rx_buf *rxbuf; 1388 int bsize, error; 1389 1390 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1391 if (!(rxr->rx_buffers = 1392 (struct ixgbe_rx_buf *) malloc(bsize, 1393 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1394 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1395 error = ENOMEM; 1396 goto fail; 1397 } 1398 1399 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1400 1, 0, /* alignment, bounds */ 1401 BUS_SPACE_MAXADDR, /* lowaddr */ 1402 BUS_SPACE_MAXADDR, /* highaddr */ 1403 NULL, NULL, /* filter, filterarg */ 1404 MJUM16BYTES, /* maxsize */ 1405 1, /* nsegments */ 1406 MJUM16BYTES, /* maxsegsize */ 1407 0, /* flags */ 1408 NULL, /* lockfunc */ 1409 NULL, /* lockfuncarg */ 1410 &rxr->ptag))) { 1411 device_printf(dev, "Unable to create RX DMA tag\n"); 1412 goto fail; 1413 } 1414 1415 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { 1416 rxbuf = &rxr->rx_buffers[i]; 1417 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1418 if (error) { 1419 device_printf(dev, "Unable to create RX dma map\n"); 1420 goto fail; 1421 } 1422 } 1423 1424 return (0); 1425 1426 fail: 1427 /* Frees all, but can handle partial completion */ 1428 ixgbe_free_receive_structures(adapter); 1429 return (error); 1430 } 1431 1432 static void 1433 ixgbe_free_receive_ring(struct rx_ring *rxr) 1434 { 1435 1436 for (int i = 0; i < rxr->num_desc; i++) { 1437 ixgbe_rx_discard(rxr, i); 1438 } 1439 } 1440 1441 /********************************************************************* 1442 * 1443 * Initialize a receive ring and its buffers. 1444 * 1445 **********************************************************************/ 1446 static int 1447 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1448 { 1449 struct adapter *adapter; 1450 struct ifnet *ifp; 1451 device_t dev; 1452 struct ixgbe_rx_buf *rxbuf; 1453 bus_dma_segment_t seg[1]; 1454 struct lro_ctrl *lro = &rxr->lro; 1455 int rsize, nsegs, error = 0; 1456 #ifdef DEV_NETMAP 1457 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1458 struct netmap_slot *slot; 1459 #endif /* DEV_NETMAP */ 1460 1461 adapter = rxr->adapter; 1462 ifp = adapter->ifp; 1463 dev = adapter->dev; 1464 1465 /* Clear the ring contents */ 1466 IXGBE_RX_LOCK(rxr); 1467 #ifdef DEV_NETMAP 1468 /* same as in ixgbe_setup_transmit_ring() */ 1469 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1470 #endif /* DEV_NETMAP */ 1471 rsize = roundup2(adapter->num_rx_desc * 1472 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1473 bzero((void *)rxr->rx_base, rsize); 1474 /* Cache the size */ 1475 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1476 1477 /* Free current RX buffer structs and their mbufs */ 1478 ixgbe_free_receive_ring(rxr); 1479 1480 /* Now replenish the mbufs */ 1481 for (int j = 0; j != rxr->num_desc; ++j) { 1482 struct mbuf *mp; 1483 1484 rxbuf = &rxr->rx_buffers[j]; 1485 #ifdef DEV_NETMAP 1486 /* 1487 * In netmap mode, fill the map and set the buffer 1488 * address in the NIC ring, considering the offset 1489 * between the netmap and NIC rings (see comment in 1490 * ixgbe_setup_transmit_ring() ). No need to allocate 1491 * an mbuf, so end the block with a continue; 1492 */ 1493 if (slot) { 1494 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1495 uint64_t paddr; 1496 void *addr; 1497 1498 addr = PNMB(na, slot + sj, &paddr); 1499 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1500 /* Update descriptor and the cached value */ 1501 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1502 rxbuf->addr = htole64(paddr); 1503 continue; 1504 } 1505 #endif /* DEV_NETMAP */ 1506 rxbuf->flags = 0; 1507 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, 1508 M_PKTHDR, adapter->rx_mbuf_sz); 1509 if (rxbuf->buf == NULL) { 1510 error = ENOBUFS; 1511 goto fail; 1512 } 1513 mp = rxbuf->buf; 1514 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1515 /* Get the memory mapping */ 1516 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1517 rxbuf->pmap, mp, seg, 1518 &nsegs, BUS_DMA_NOWAIT); 1519 if (error != 0) 1520 goto fail; 1521 bus_dmamap_sync(rxr->ptag, 1522 rxbuf->pmap, BUS_DMASYNC_PREREAD); 1523 /* Update the descriptor and the cached value */ 1524 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1525 rxbuf->addr = htole64(seg[0].ds_addr); 1526 } 1527 1528 1529 /* Setup our descriptor indices */ 1530 rxr->next_to_check = 0; 1531 rxr->next_to_refresh = 0; 1532 rxr->lro_enabled = FALSE; 1533 rxr->rx_copies = 0; 1534 rxr->rx_bytes = 0; 1535 rxr->vtag_strip = FALSE; 1536 1537 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1538 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1539 1540 /* 1541 ** Now set up the LRO interface: 1542 */ 1543 if (ixgbe_rsc_enable) 1544 ixgbe_setup_hw_rsc(rxr); 1545 else if (ifp->if_capenable & IFCAP_LRO) { 1546 int err = tcp_lro_init(lro); 1547 if (err) { 1548 device_printf(dev, "LRO Initialization failed!\n"); 1549 goto fail; 1550 } 1551 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1552 rxr->lro_enabled = TRUE; 1553 lro->ifp = adapter->ifp; 1554 } 1555 1556 IXGBE_RX_UNLOCK(rxr); 1557 return (0); 1558 1559 fail: 1560 ixgbe_free_receive_ring(rxr); 1561 IXGBE_RX_UNLOCK(rxr); 1562 return (error); 1563 } 1564 1565 /********************************************************************* 1566 * 1567 * Initialize all receive rings. 1568 * 1569 **********************************************************************/ 1570 int 1571 ixgbe_setup_receive_structures(struct adapter *adapter) 1572 { 1573 struct rx_ring *rxr = adapter->rx_rings; 1574 int j; 1575 1576 for (j = 0; j < adapter->num_queues; j++, rxr++) 1577 if (ixgbe_setup_receive_ring(rxr)) 1578 goto fail; 1579 1580 return (0); 1581 fail: 1582 /* 1583 * Free RX buffers allocated so far, we will only handle 1584 * the rings that completed, the failing case will have 1585 * cleaned up for itself. 'j' failed, so its the terminus. 1586 */ 1587 for (int i = 0; i < j; ++i) { 1588 rxr = &adapter->rx_rings[i]; 1589 IXGBE_RX_LOCK(rxr); 1590 ixgbe_free_receive_ring(rxr); 1591 IXGBE_RX_UNLOCK(rxr); 1592 } 1593 1594 return (ENOBUFS); 1595 } 1596 1597 1598 /********************************************************************* 1599 * 1600 * Free all receive rings. 1601 * 1602 **********************************************************************/ 1603 void 1604 ixgbe_free_receive_structures(struct adapter *adapter) 1605 { 1606 struct rx_ring *rxr = adapter->rx_rings; 1607 1608 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1609 1610 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1611 struct lro_ctrl *lro = &rxr->lro; 1612 ixgbe_free_receive_buffers(rxr); 1613 /* Free LRO memory */ 1614 tcp_lro_free(lro); 1615 /* Free the ring memory as well */ 1616 ixgbe_dma_free(adapter, &rxr->rxdma); 1617 } 1618 1619 free(adapter->rx_rings, M_DEVBUF); 1620 } 1621 1622 1623 /********************************************************************* 1624 * 1625 * Free receive ring data structures 1626 * 1627 **********************************************************************/ 1628 void 1629 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1630 { 1631 struct adapter *adapter = rxr->adapter; 1632 struct ixgbe_rx_buf *rxbuf; 1633 1634 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1635 1636 /* Cleanup any existing buffers */ 1637 if (rxr->rx_buffers != NULL) { 1638 for (int i = 0; i < adapter->num_rx_desc; i++) { 1639 rxbuf = &rxr->rx_buffers[i]; 1640 ixgbe_rx_discard(rxr, i); 1641 if (rxbuf->pmap != NULL) { 1642 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1643 rxbuf->pmap = NULL; 1644 } 1645 } 1646 if (rxr->rx_buffers != NULL) { 1647 free(rxr->rx_buffers, M_DEVBUF); 1648 rxr->rx_buffers = NULL; 1649 } 1650 } 1651 1652 if (rxr->ptag != NULL) { 1653 bus_dma_tag_destroy(rxr->ptag); 1654 rxr->ptag = NULL; 1655 } 1656 1657 return; 1658 } 1659 1660 static __inline void 1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1662 { 1663 1664 /* 1665 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1666 * should be computed by hardware. Also it should not have VLAN tag in 1667 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1668 */ 1669 if (rxr->lro_enabled && 1670 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1671 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1672 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1673 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1674 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1675 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1676 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1677 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1678 /* 1679 * Send to the stack if: 1680 ** - LRO not enabled, or 1681 ** - no LRO resources, or 1682 ** - lro enqueue fails 1683 */ 1684 if (rxr->lro.lro_cnt != 0) 1685 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1686 return; 1687 } 1688 IXGBE_RX_UNLOCK(rxr); 1689 (*ifp->if_input)(ifp, m); 1690 IXGBE_RX_LOCK(rxr); 1691 } 1692 1693 static __inline void 1694 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1695 { 1696 struct ixgbe_rx_buf *rbuf; 1697 1698 rbuf = &rxr->rx_buffers[i]; 1699 1700 1701 /* 1702 ** With advanced descriptors the writeback 1703 ** clobbers the buffer addrs, so its easier 1704 ** to just free the existing mbufs and take 1705 ** the normal refresh path to get new buffers 1706 ** and mapping. 1707 */ 1708 1709 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1710 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1711 m_freem(rbuf->fmp); 1712 rbuf->fmp = NULL; 1713 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1714 } else if (rbuf->buf) { 1715 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1716 m_free(rbuf->buf); 1717 rbuf->buf = NULL; 1718 } 1719 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1720 1721 rbuf->flags = 0; 1722 1723 return; 1724 } 1725 1726 1727 /********************************************************************* 1728 * 1729 * This routine executes in interrupt context. It replenishes 1730 * the mbufs in the descriptor and sends data which has been 1731 * dma'ed into host memory to upper layer. 1732 * 1733 * Return TRUE for more work, FALSE for all clean. 1734 *********************************************************************/ 1735 bool 1736 ixgbe_rxeof(struct ix_queue *que) 1737 { 1738 struct adapter *adapter = que->adapter; 1739 struct rx_ring *rxr = que->rxr; 1740 struct ifnet *ifp = adapter->ifp; 1741 struct lro_ctrl *lro = &rxr->lro; 1742 int i, nextp, processed = 0; 1743 u32 staterr = 0; 1744 u32 count = adapter->rx_process_limit; 1745 union ixgbe_adv_rx_desc *cur; 1746 struct ixgbe_rx_buf *rbuf, *nbuf; 1747 u16 pkt_info; 1748 1749 IXGBE_RX_LOCK(rxr); 1750 1751 #ifdef DEV_NETMAP 1752 /* Same as the txeof routine: wakeup clients on intr. */ 1753 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1754 IXGBE_RX_UNLOCK(rxr); 1755 return (FALSE); 1756 } 1757 #endif /* DEV_NETMAP */ 1758 1759 for (i = rxr->next_to_check; count != 0;) { 1760 struct mbuf *sendmp, *mp; 1761 u32 rsc, ptype; 1762 u16 len; 1763 u16 vtag = 0; 1764 bool eop; 1765 1766 /* Sync the ring. */ 1767 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1768 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1769 1770 cur = &rxr->rx_base[i]; 1771 staterr = le32toh(cur->wb.upper.status_error); 1772 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1773 1774 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1775 break; 1776 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1777 break; 1778 1779 count--; 1780 sendmp = NULL; 1781 nbuf = NULL; 1782 rsc = 0; 1783 cur->wb.upper.status_error = 0; 1784 rbuf = &rxr->rx_buffers[i]; 1785 mp = rbuf->buf; 1786 1787 len = le16toh(cur->wb.upper.length); 1788 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1789 IXGBE_RXDADV_PKTTYPE_MASK; 1790 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1791 1792 /* Make sure bad packets are discarded */ 1793 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1794 #if __FreeBSD_version >= 1100036 1795 if (IXGBE_IS_VF(adapter)) 1796 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1797 #endif 1798 rxr->rx_discarded++; 1799 ixgbe_rx_discard(rxr, i); 1800 goto next_desc; 1801 } 1802 1803 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1804 1805 /* 1806 ** On 82599 which supports a hardware 1807 ** LRO (called HW RSC), packets need 1808 ** not be fragmented across sequential 1809 ** descriptors, rather the next descriptor 1810 ** is indicated in bits of the descriptor. 1811 ** This also means that we might proceses 1812 ** more than one packet at a time, something 1813 ** that has never been true before, it 1814 ** required eliminating global chain pointers 1815 ** in favor of what we are doing here. -jfv 1816 */ 1817 if (!eop) { 1818 /* 1819 ** Figure out the next descriptor 1820 ** of this frame. 1821 */ 1822 if (rxr->hw_rsc == TRUE) { 1823 rsc = ixgbe_rsc_count(cur); 1824 rxr->rsc_num += (rsc - 1); 1825 } 1826 if (rsc) { /* Get hardware index */ 1827 nextp = ((staterr & 1828 IXGBE_RXDADV_NEXTP_MASK) >> 1829 IXGBE_RXDADV_NEXTP_SHIFT); 1830 } else { /* Just sequential */ 1831 nextp = i + 1; 1832 if (nextp == adapter->num_rx_desc) 1833 nextp = 0; 1834 } 1835 nbuf = &rxr->rx_buffers[nextp]; 1836 prefetch(nbuf); 1837 } 1838 /* 1839 ** Rather than using the fmp/lmp global pointers 1840 ** we now keep the head of a packet chain in the 1841 ** buffer struct and pass this along from one 1842 ** descriptor to the next, until we get EOP. 1843 */ 1844 mp->m_len = len; 1845 /* 1846 ** See if there is a stored head 1847 ** that determines what we are 1848 */ 1849 sendmp = rbuf->fmp; 1850 if (sendmp != NULL) { /* secondary frag */ 1851 rbuf->buf = rbuf->fmp = NULL; 1852 mp->m_flags &= ~M_PKTHDR; 1853 sendmp->m_pkthdr.len += mp->m_len; 1854 } else { 1855 /* 1856 * Optimize. This might be a small packet, 1857 * maybe just a TCP ACK. Do a fast copy that 1858 * is cache aligned into a new mbuf, and 1859 * leave the old mbuf+cluster for re-use. 1860 */ 1861 if (eop && len <= IXGBE_RX_COPY_LEN) { 1862 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1863 if (sendmp != NULL) { 1864 sendmp->m_data += 1865 IXGBE_RX_COPY_ALIGN; 1866 ixgbe_bcopy(mp->m_data, 1867 sendmp->m_data, len); 1868 sendmp->m_len = len; 1869 rxr->rx_copies++; 1870 rbuf->flags |= IXGBE_RX_COPY; 1871 } 1872 } 1873 if (sendmp == NULL) { 1874 rbuf->buf = rbuf->fmp = NULL; 1875 sendmp = mp; 1876 } 1877 1878 /* first desc of a non-ps chain */ 1879 sendmp->m_flags |= M_PKTHDR; 1880 sendmp->m_pkthdr.len = mp->m_len; 1881 } 1882 ++processed; 1883 1884 /* Pass the head pointer on */ 1885 if (eop == 0) { 1886 nbuf->fmp = sendmp; 1887 sendmp = NULL; 1888 mp->m_next = nbuf->buf; 1889 } else { /* Sending this frame */ 1890 sendmp->m_pkthdr.rcvif = ifp; 1891 rxr->rx_packets++; 1892 /* capture data for AIM */ 1893 rxr->bytes += sendmp->m_pkthdr.len; 1894 rxr->rx_bytes += sendmp->m_pkthdr.len; 1895 /* Process vlan info */ 1896 if ((rxr->vtag_strip) && 1897 (staterr & IXGBE_RXD_STAT_VP)) 1898 vtag = le16toh(cur->wb.upper.vlan); 1899 if (vtag) { 1900 sendmp->m_pkthdr.ether_vtag = vtag; 1901 sendmp->m_flags |= M_VLANTAG; 1902 } 1903 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1904 ixgbe_rx_checksum(staterr, sendmp, ptype); 1905 1906 /* 1907 * In case of multiqueue, we have RXCSUM.PCSD bit set 1908 * and never cleared. This means we have RSS hash 1909 * available to be used. 1910 */ 1911 if (adapter->num_queues > 1) { 1912 sendmp->m_pkthdr.flowid = 1913 le32toh(cur->wb.lower.hi_dword.rss); 1914 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1915 case IXGBE_RXDADV_RSSTYPE_IPV4: 1916 M_HASHTYPE_SET(sendmp, 1917 M_HASHTYPE_RSS_IPV4); 1918 break; 1919 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1920 M_HASHTYPE_SET(sendmp, 1921 M_HASHTYPE_RSS_TCP_IPV4); 1922 break; 1923 case IXGBE_RXDADV_RSSTYPE_IPV6: 1924 M_HASHTYPE_SET(sendmp, 1925 M_HASHTYPE_RSS_IPV6); 1926 break; 1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1928 M_HASHTYPE_SET(sendmp, 1929 M_HASHTYPE_RSS_TCP_IPV6); 1930 break; 1931 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1932 M_HASHTYPE_SET(sendmp, 1933 M_HASHTYPE_RSS_IPV6_EX); 1934 break; 1935 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1936 M_HASHTYPE_SET(sendmp, 1937 M_HASHTYPE_RSS_TCP_IPV6_EX); 1938 break; 1939 #if __FreeBSD_version > 1100000 1940 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1941 M_HASHTYPE_SET(sendmp, 1942 M_HASHTYPE_RSS_UDP_IPV4); 1943 break; 1944 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1945 M_HASHTYPE_SET(sendmp, 1946 M_HASHTYPE_RSS_UDP_IPV6); 1947 break; 1948 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1949 M_HASHTYPE_SET(sendmp, 1950 M_HASHTYPE_RSS_UDP_IPV6_EX); 1951 break; 1952 #endif 1953 default: 1954 M_HASHTYPE_SET(sendmp, 1955 M_HASHTYPE_OPAQUE_HASH); 1956 } 1957 } else { 1958 sendmp->m_pkthdr.flowid = que->msix; 1959 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1960 } 1961 } 1962 next_desc: 1963 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1964 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1965 1966 /* Advance our pointers to the next descriptor. */ 1967 if (++i == rxr->num_desc) 1968 i = 0; 1969 1970 /* Now send to the stack or do LRO */ 1971 if (sendmp != NULL) { 1972 rxr->next_to_check = i; 1973 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1974 i = rxr->next_to_check; 1975 } 1976 1977 /* Every 8 descriptors we go to refresh mbufs */ 1978 if (processed == 8) { 1979 ixgbe_refresh_mbufs(rxr, i); 1980 processed = 0; 1981 } 1982 } 1983 1984 /* Refresh any remaining buf structs */ 1985 if (ixgbe_rx_unrefreshed(rxr)) 1986 ixgbe_refresh_mbufs(rxr, i); 1987 1988 rxr->next_to_check = i; 1989 1990 /* 1991 * Flush any outstanding LRO work 1992 */ 1993 tcp_lro_flush_all(lro); 1994 1995 IXGBE_RX_UNLOCK(rxr); 1996 1997 /* 1998 ** Still have cleaning to do? 1999 */ 2000 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 2001 return (TRUE); 2002 else 2003 return (FALSE); 2004 } 2005 2006 2007 /********************************************************************* 2008 * 2009 * Verify that the hardware indicated that the checksum is valid. 2010 * Inform the stack about the status of checksum so that stack 2011 * doesn't spend time verifying the checksum. 2012 * 2013 *********************************************************************/ 2014 static void 2015 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 2016 { 2017 u16 status = (u16) staterr; 2018 u8 errors = (u8) (staterr >> 24); 2019 bool sctp = false; 2020 2021 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2022 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2023 sctp = true; 2024 2025 /* IPv4 checksum */ 2026 if (status & IXGBE_RXD_STAT_IPCS) { 2027 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC; 2028 /* IP Checksum Good */ 2029 if (!(errors & IXGBE_RXD_ERR_IPE)) 2030 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID; 2031 } 2032 /* TCP/UDP/SCTP checksum */ 2033 if (status & IXGBE_RXD_STAT_L4CS) { 2034 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC; 2035 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2036 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID; 2037 if (!sctp) 2038 mp->m_pkthdr.csum_data = htons(0xffff); 2039 } 2040 } 2041 } 2042 2043 /******************************************************************** 2044 * Manage DMA'able memory. 2045 *******************************************************************/ 2046 static void 2047 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 2048 { 2049 if (error) 2050 return; 2051 *(bus_addr_t *) arg = segs->ds_addr; 2052 return; 2053 } 2054 2055 int 2056 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 2057 struct ixgbe_dma_alloc *dma, int mapflags) 2058 { 2059 device_t dev = adapter->dev; 2060 int r; 2061 2062 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 2063 DBA_ALIGN, 0, /* alignment, bounds */ 2064 BUS_SPACE_MAXADDR, /* lowaddr */ 2065 BUS_SPACE_MAXADDR, /* highaddr */ 2066 NULL, NULL, /* filter, filterarg */ 2067 size, /* maxsize */ 2068 1, /* nsegments */ 2069 size, /* maxsegsize */ 2070 BUS_DMA_ALLOCNOW, /* flags */ 2071 NULL, /* lockfunc */ 2072 NULL, /* lockfuncarg */ 2073 &dma->dma_tag); 2074 if (r != 0) { 2075 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " 2076 "error %u\n", r); 2077 goto fail_0; 2078 } 2079 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 2080 BUS_DMA_NOWAIT, &dma->dma_map); 2081 if (r != 0) { 2082 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " 2083 "error %u\n", r); 2084 goto fail_1; 2085 } 2086 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 2087 size, 2088 ixgbe_dmamap_cb, 2089 &dma->dma_paddr, 2090 mapflags | BUS_DMA_NOWAIT); 2091 if (r != 0) { 2092 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " 2093 "error %u\n", r); 2094 goto fail_2; 2095 } 2096 dma->dma_size = size; 2097 return (0); 2098 fail_2: 2099 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2100 fail_1: 2101 bus_dma_tag_destroy(dma->dma_tag); 2102 fail_0: 2103 dma->dma_tag = NULL; 2104 return (r); 2105 } 2106 2107 void 2108 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2109 { 2110 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2111 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2112 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2113 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2114 bus_dma_tag_destroy(dma->dma_tag); 2115 } 2116 2117 2118 /********************************************************************* 2119 * 2120 * Allocate memory for the transmit and receive rings, and then 2121 * the descriptors associated with each, called only once at attach. 2122 * 2123 **********************************************************************/ 2124 int 2125 ixgbe_allocate_queues(struct adapter *adapter) 2126 { 2127 device_t dev = adapter->dev; 2128 struct ix_queue *que; 2129 struct tx_ring *txr; 2130 struct rx_ring *rxr; 2131 int rsize, tsize, error = IXGBE_SUCCESS; 2132 int txconf = 0, rxconf = 0; 2133 #ifdef PCI_IOV 2134 enum ixgbe_iov_mode iov_mode; 2135 #endif 2136 2137 /* First allocate the top level queue structs */ 2138 if (!(adapter->queues = 2139 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2140 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2141 device_printf(dev, "Unable to allocate queue memory\n"); 2142 error = ENOMEM; 2143 goto fail; 2144 } 2145 2146 /* First allocate the TX ring struct memory */ 2147 if (!(adapter->tx_rings = 2148 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2149 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2150 device_printf(dev, "Unable to allocate TX ring memory\n"); 2151 error = ENOMEM; 2152 goto tx_fail; 2153 } 2154 2155 /* Next allocate the RX */ 2156 if (!(adapter->rx_rings = 2157 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2158 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2159 device_printf(dev, "Unable to allocate RX ring memory\n"); 2160 error = ENOMEM; 2161 goto rx_fail; 2162 } 2163 2164 /* For the ring itself */ 2165 tsize = roundup2(adapter->num_tx_desc * 2166 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2167 2168 #ifdef PCI_IOV 2169 iov_mode = ixgbe_get_iov_mode(adapter); 2170 adapter->pool = ixgbe_max_vfs(iov_mode); 2171 #else 2172 adapter->pool = 0; 2173 #endif 2174 /* 2175 * Now set up the TX queues, txconf is needed to handle the 2176 * possibility that things fail midcourse and we need to 2177 * undo memory gracefully 2178 */ 2179 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2180 /* Set up some basics */ 2181 txr = &adapter->tx_rings[i]; 2182 txr->adapter = adapter; 2183 #ifdef PCI_IOV 2184 txr->me = ixgbe_pf_que_index(iov_mode, i); 2185 #else 2186 txr->me = i; 2187 #endif 2188 txr->num_desc = adapter->num_tx_desc; 2189 2190 /* Initialize the TX side lock */ 2191 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2192 device_get_nameunit(dev), txr->me); 2193 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2194 2195 if (ixgbe_dma_malloc(adapter, tsize, 2196 &txr->txdma, BUS_DMA_NOWAIT)) { 2197 device_printf(dev, 2198 "Unable to allocate TX Descriptor memory\n"); 2199 error = ENOMEM; 2200 goto err_tx_desc; 2201 } 2202 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2203 bzero((void *)txr->tx_base, tsize); 2204 2205 /* Now allocate transmit buffers for the ring */ 2206 if (ixgbe_allocate_transmit_buffers(txr)) { 2207 device_printf(dev, 2208 "Critical Failure setting up transmit buffers\n"); 2209 error = ENOMEM; 2210 goto err_tx_desc; 2211 } 2212 #ifndef IXGBE_LEGACY_TX 2213 /* Allocate a buf ring */ 2214 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2215 M_WAITOK, &txr->tx_mtx); 2216 if (txr->br == NULL) { 2217 device_printf(dev, 2218 "Critical Failure setting up buf ring\n"); 2219 error = ENOMEM; 2220 goto err_tx_desc; 2221 } 2222 #endif 2223 } 2224 2225 /* 2226 * Next the RX queues... 2227 */ 2228 rsize = roundup2(adapter->num_rx_desc * 2229 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2230 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2231 rxr = &adapter->rx_rings[i]; 2232 /* Set up some basics */ 2233 rxr->adapter = adapter; 2234 #ifdef PCI_IOV 2235 rxr->me = ixgbe_pf_que_index(iov_mode, i); 2236 #else 2237 rxr->me = i; 2238 #endif 2239 rxr->num_desc = adapter->num_rx_desc; 2240 2241 /* Initialize the RX side lock */ 2242 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2243 device_get_nameunit(dev), rxr->me); 2244 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2245 2246 if (ixgbe_dma_malloc(adapter, rsize, 2247 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2248 device_printf(dev, 2249 "Unable to allocate RxDescriptor memory\n"); 2250 error = ENOMEM; 2251 goto err_rx_desc; 2252 } 2253 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2254 bzero((void *)rxr->rx_base, rsize); 2255 2256 /* Allocate receive buffers for the ring*/ 2257 if (ixgbe_allocate_receive_buffers(rxr)) { 2258 device_printf(dev, 2259 "Critical Failure setting up receive buffers\n"); 2260 error = ENOMEM; 2261 goto err_rx_desc; 2262 } 2263 } 2264 2265 /* 2266 ** Finally set up the queue holding structs 2267 */ 2268 for (int i = 0; i < adapter->num_queues; i++) { 2269 que = &adapter->queues[i]; 2270 que->adapter = adapter; 2271 que->me = i; 2272 que->txr = &adapter->tx_rings[i]; 2273 que->rxr = &adapter->rx_rings[i]; 2274 } 2275 2276 return (0); 2277 2278 err_rx_desc: 2279 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2280 ixgbe_dma_free(adapter, &rxr->rxdma); 2281 err_tx_desc: 2282 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2283 ixgbe_dma_free(adapter, &txr->txdma); 2284 free(adapter->rx_rings, M_DEVBUF); 2285 rx_fail: 2286 free(adapter->tx_rings, M_DEVBUF); 2287 tx_fail: 2288 free(adapter->queues, M_DEVBUF); 2289 fail: 2290 return (error); 2291 } 2292