1 /****************************************************************************** 2 3 Copyright (c) 2001-2017, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 36 #ifndef IXGBE_STANDALONE_BUILD 37 #include "opt_inet.h" 38 #include "opt_inet6.h" 39 #include "opt_rss.h" 40 #endif 41 42 #include "ixgbe.h" 43 44 /* 45 * HW RSC control: 46 * this feature only works with 47 * IPv4, and only on 82599 and later. 48 * Also this will cause IP forwarding to 49 * fail and that can't be controlled by 50 * the stack as LRO can. For all these 51 * reasons I've deemed it best to leave 52 * this off and not bother with a tuneable 53 * interface, this would need to be compiled 54 * to enable. 55 */ 56 static bool ixgbe_rsc_enable = FALSE; 57 58 /* 59 * For Flow Director: this is the 60 * number of TX packets we sample 61 * for the filter pool, this means 62 * every 20th packet will be probed. 63 * 64 * This feature can be disabled by 65 * setting this to 0. 66 */ 67 static int atr_sample_rate = 20; 68 69 /************************************************************************ 70 * Local Function prototypes 71 ************************************************************************/ 72 static void ixgbe_setup_transmit_ring(struct tx_ring *); 73 static void ixgbe_free_transmit_buffers(struct tx_ring *); 74 static int ixgbe_setup_receive_ring(struct rx_ring *); 75 static void ixgbe_free_receive_buffers(struct rx_ring *); 76 static void ixgbe_rx_checksum(u32, struct mbuf *, u32); 77 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 78 static int ixgbe_xmit(struct tx_ring *, struct mbuf **); 79 static int ixgbe_tx_ctx_setup(struct tx_ring *, 80 struct mbuf *, u32 *, u32 *); 81 static int ixgbe_tso_setup(struct tx_ring *, 82 struct mbuf *, u32 *, u32 *); 83 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 84 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 85 struct mbuf *, u32); 86 static int ixgbe_dma_malloc(struct adapter *, bus_size_t, 87 struct ixgbe_dma_alloc *, int); 88 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); 89 90 /************************************************************************ 91 * ixgbe_legacy_start_locked - Transmit entry point 92 * 93 * Called by the stack to initiate a transmit. 94 * The driver will remain in this routine as long as there are 95 * packets to transmit and transmit resources are available. 96 * In case resources are not available, the stack is notified 97 * and the packet is requeued. 98 ************************************************************************/ 99 int 100 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr) 101 { 102 struct mbuf *m_head; 103 struct adapter *adapter = txr->adapter; 104 105 IXGBE_TX_LOCK_ASSERT(txr); 106 107 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 108 return (ENETDOWN); 109 if (!adapter->link_active) 110 return (ENETDOWN); 111 112 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 113 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 114 break; 115 116 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 117 if (m_head == NULL) 118 break; 119 120 if (ixgbe_xmit(txr, &m_head)) { 121 if (m_head != NULL) 122 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 123 break; 124 } 125 /* Send a copy of the frame to the BPF listener */ 126 ETHER_BPF_MTAP(ifp, m_head); 127 } 128 129 return IXGBE_SUCCESS; 130 } /* ixgbe_legacy_start_locked */ 131 132 /************************************************************************ 133 * ixgbe_legacy_start 134 * 135 * Called by the stack, this always uses the first tx ring, 136 * and should not be used with multiqueue tx enabled. 137 ************************************************************************/ 138 void 139 ixgbe_legacy_start(struct ifnet *ifp) 140 { 141 struct adapter *adapter = ifp->if_softc; 142 struct tx_ring *txr = adapter->tx_rings; 143 144 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 145 IXGBE_TX_LOCK(txr); 146 ixgbe_legacy_start_locked(ifp, txr); 147 IXGBE_TX_UNLOCK(txr); 148 } 149 } /* ixgbe_legacy_start */ 150 151 /************************************************************************ 152 * ixgbe_mq_start - Multiqueue Transmit Entry Point 153 * 154 * (if_transmit function) 155 ************************************************************************/ 156 int 157 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 158 { 159 struct adapter *adapter = ifp->if_softc; 160 struct ix_queue *que; 161 struct tx_ring *txr; 162 int i, err = 0; 163 uint32_t bucket_id; 164 165 /* 166 * When doing RSS, map it to the same outbound queue 167 * as the incoming flow would be mapped to. 168 * 169 * If everything is setup correctly, it should be the 170 * same bucket that the current CPU we're on is. 171 */ 172 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 173 if ((adapter->feat_en & IXGBE_FEATURE_RSS) && 174 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), 175 &bucket_id) == 0)) { 176 i = bucket_id % adapter->num_queues; 177 #ifdef IXGBE_DEBUG 178 if (bucket_id > adapter->num_queues) 179 if_printf(ifp, 180 "bucket_id (%d) > num_queues (%d)\n", 181 bucket_id, adapter->num_queues); 182 #endif 183 } else 184 i = m->m_pkthdr.flowid % adapter->num_queues; 185 } else 186 i = curcpu % adapter->num_queues; 187 188 /* Check for a hung queue and pick alternative */ 189 if (((1 << i) & adapter->active_queues) == 0) 190 i = ffsl(adapter->active_queues); 191 192 txr = &adapter->tx_rings[i]; 193 que = &adapter->queues[i]; 194 195 err = drbr_enqueue(ifp, txr->br, m); 196 if (err) 197 return (err); 198 if (IXGBE_TX_TRYLOCK(txr)) { 199 ixgbe_mq_start_locked(ifp, txr); 200 IXGBE_TX_UNLOCK(txr); 201 } else 202 taskqueue_enqueue(que->tq, &txr->txq_task); 203 204 return (0); 205 } /* ixgbe_mq_start */ 206 207 /************************************************************************ 208 * ixgbe_mq_start_locked 209 ************************************************************************/ 210 int 211 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 212 { 213 struct mbuf *next; 214 int enqueued = 0, err = 0; 215 216 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 217 return (ENETDOWN); 218 if (txr->adapter->link_active == 0) 219 return (ENETDOWN); 220 221 /* Process the queue */ 222 #if __FreeBSD_version < 901504 223 next = drbr_dequeue(ifp, txr->br); 224 while (next != NULL) { 225 if ((err = ixgbe_xmit(txr, &next)) != 0) { 226 if (next != NULL) 227 err = drbr_enqueue(ifp, txr->br, next); 228 #else 229 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 230 err = ixgbe_xmit(txr, &next); 231 if (err != 0) { 232 if (next == NULL) 233 drbr_advance(ifp, txr->br); 234 else 235 drbr_putback(ifp, txr->br, next); 236 #endif 237 break; 238 } 239 #if __FreeBSD_version >= 901504 240 drbr_advance(ifp, txr->br); 241 #endif 242 enqueued++; 243 #if __FreeBSD_version >= 1100036 244 /* 245 * Since we're looking at the tx ring, we can check 246 * to see if we're a VF by examing our tail register 247 * address. 248 */ 249 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) && 250 (next->m_flags & M_MCAST)) 251 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 252 #endif 253 /* Send a copy of the frame to the BPF listener */ 254 ETHER_BPF_MTAP(ifp, next); 255 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 256 break; 257 #if __FreeBSD_version < 901504 258 next = drbr_dequeue(ifp, txr->br); 259 #endif 260 } 261 262 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter)) 263 ixgbe_txeof(txr); 264 265 return (err); 266 } /* ixgbe_mq_start_locked */ 267 268 /************************************************************************ 269 * ixgbe_deferred_mq_start 270 * 271 * Called from a taskqueue to drain queued transmit packets. 272 ************************************************************************/ 273 void 274 ixgbe_deferred_mq_start(void *arg, int pending) 275 { 276 struct tx_ring *txr = arg; 277 struct adapter *adapter = txr->adapter; 278 struct ifnet *ifp = adapter->ifp; 279 280 IXGBE_TX_LOCK(txr); 281 if (!drbr_empty(ifp, txr->br)) 282 ixgbe_mq_start_locked(ifp, txr); 283 IXGBE_TX_UNLOCK(txr); 284 } /* ixgbe_deferred_mq_start */ 285 286 /************************************************************************ 287 * ixgbe_qflush - Flush all ring buffers 288 ************************************************************************/ 289 void 290 ixgbe_qflush(struct ifnet *ifp) 291 { 292 struct adapter *adapter = ifp->if_softc; 293 struct tx_ring *txr = adapter->tx_rings; 294 struct mbuf *m; 295 296 for (int i = 0; i < adapter->num_queues; i++, txr++) { 297 IXGBE_TX_LOCK(txr); 298 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 299 m_freem(m); 300 IXGBE_TX_UNLOCK(txr); 301 } 302 if_qflush(ifp); 303 } /* ixgbe_qflush */ 304 305 306 /************************************************************************ 307 * ixgbe_xmit 308 * 309 * Maps the mbufs to tx descriptors, allowing the 310 * TX engine to transmit the packets. 311 * 312 * Return 0 on success, positive on failure 313 ************************************************************************/ 314 static int 315 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) 316 { 317 struct adapter *adapter = txr->adapter; 318 struct ixgbe_tx_buf *txbuf; 319 union ixgbe_adv_tx_desc *txd = NULL; 320 struct mbuf *m_head; 321 int i, j, error, nsegs; 322 int first; 323 u32 olinfo_status = 0, cmd_type_len; 324 bool remap = TRUE; 325 bus_dma_segment_t segs[adapter->num_segs]; 326 bus_dmamap_t map; 327 328 m_head = *m_headp; 329 330 /* Basic descriptor defines */ 331 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 332 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 333 334 if (m_head->m_flags & M_VLANTAG) 335 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 336 337 /* 338 * Important to capture the first descriptor 339 * used because it will contain the index of 340 * the one we tell the hardware to report back 341 */ 342 first = txr->next_avail_desc; 343 txbuf = &txr->tx_buffers[first]; 344 map = txbuf->map; 345 346 /* 347 * Map the packet for DMA. 348 */ 349 retry: 350 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, 351 &nsegs, BUS_DMA_NOWAIT); 352 353 if (__predict_false(error)) { 354 struct mbuf *m; 355 356 switch (error) { 357 case EFBIG: 358 /* Try it again? - one try */ 359 if (remap == TRUE) { 360 remap = FALSE; 361 /* 362 * XXX: m_defrag will choke on 363 * non-MCLBYTES-sized clusters 364 */ 365 m = m_defrag(*m_headp, M_NOWAIT); 366 if (m == NULL) { 367 adapter->mbuf_defrag_failed++; 368 m_freem(*m_headp); 369 *m_headp = NULL; 370 return (ENOBUFS); 371 } 372 *m_headp = m; 373 goto retry; 374 } else 375 return (error); 376 case ENOMEM: 377 txr->no_tx_dma_setup++; 378 return (error); 379 default: 380 txr->no_tx_dma_setup++; 381 m_freem(*m_headp); 382 *m_headp = NULL; 383 return (error); 384 } 385 } 386 387 /* Make certain there are enough descriptors */ 388 if (txr->tx_avail < (nsegs + 2)) { 389 txr->no_desc_avail++; 390 bus_dmamap_unload(txr->txtag, map); 391 return (ENOBUFS); 392 } 393 m_head = *m_headp; 394 395 /* 396 * Set up the appropriate offload context 397 * this will consume the first descriptor 398 */ 399 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 400 if (__predict_false(error)) { 401 if (error == ENOBUFS) 402 *m_headp = NULL; 403 return (error); 404 } 405 406 /* Do the flow director magic */ 407 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) && 408 (txr->atr_sample) && (!adapter->fdir_reinit)) { 409 ++txr->atr_count; 410 if (txr->atr_count >= atr_sample_rate) { 411 ixgbe_atr(txr, m_head); 412 txr->atr_count = 0; 413 } 414 } 415 416 olinfo_status |= IXGBE_ADVTXD_CC; 417 i = txr->next_avail_desc; 418 for (j = 0; j < nsegs; j++) { 419 bus_size_t seglen; 420 bus_addr_t segaddr; 421 422 txbuf = &txr->tx_buffers[i]; 423 txd = &txr->tx_base[i]; 424 seglen = segs[j].ds_len; 425 segaddr = htole64(segs[j].ds_addr); 426 427 txd->read.buffer_addr = segaddr; 428 txd->read.cmd_type_len = htole32(txr->txd_cmd | 429 cmd_type_len | seglen); 430 txd->read.olinfo_status = htole32(olinfo_status); 431 432 if (++i == txr->num_desc) 433 i = 0; 434 } 435 436 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 437 txr->tx_avail -= nsegs; 438 txr->next_avail_desc = i; 439 440 txbuf->m_head = m_head; 441 /* 442 * Here we swap the map so the last descriptor, 443 * which gets the completion interrupt has the 444 * real map, and the first descriptor gets the 445 * unused map from this descriptor. 446 */ 447 txr->tx_buffers[first].map = txbuf->map; 448 txbuf->map = map; 449 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 450 451 /* Set the EOP descriptor that will be marked done */ 452 txbuf = &txr->tx_buffers[first]; 453 txbuf->eop = txd; 454 455 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 456 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 457 /* 458 * Advance the Transmit Descriptor Tail (Tdt), this tells the 459 * hardware that this frame is available to transmit. 460 */ 461 ++txr->total_packets; 462 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 463 464 /* Mark queue as having work */ 465 if (txr->busy == 0) 466 txr->busy = 1; 467 468 return (0); 469 } /* ixgbe_xmit */ 470 471 472 /************************************************************************ 473 * ixgbe_allocate_transmit_buffers 474 * 475 * Allocate memory for tx_buffer structures. The tx_buffer stores all 476 * the information needed to transmit a packet on the wire. This is 477 * called only once at attach, setup is done every reset. 478 ************************************************************************/ 479 static int 480 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 481 { 482 struct adapter *adapter = txr->adapter; 483 device_t dev = adapter->dev; 484 struct ixgbe_tx_buf *txbuf; 485 int error, i; 486 487 /* 488 * Setup DMA descriptor areas. 489 */ 490 error = bus_dma_tag_create( 491 /* parent */ bus_get_dma_tag(adapter->dev), 492 /* alignment */ 1, 493 /* bounds */ 0, 494 /* lowaddr */ BUS_SPACE_MAXADDR, 495 /* highaddr */ BUS_SPACE_MAXADDR, 496 /* filter */ NULL, 497 /* filterarg */ NULL, 498 /* maxsize */ IXGBE_TSO_SIZE, 499 /* nsegments */ adapter->num_segs, 500 /* maxsegsize */ PAGE_SIZE, 501 /* flags */ 0, 502 /* lockfunc */ NULL, 503 /* lockfuncarg */ NULL, 504 &txr->txtag); 505 if (error != 0) { 506 device_printf(dev, "Unable to allocate TX DMA tag\n"); 507 goto fail; 508 } 509 510 txr->tx_buffers = 511 (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) * 512 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); 513 if (txr->tx_buffers == NULL) { 514 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 515 error = ENOMEM; 516 goto fail; 517 } 518 519 /* Create the descriptor buffer dma maps */ 520 txbuf = txr->tx_buffers; 521 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 522 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 523 if (error != 0) { 524 device_printf(dev, "Unable to create TX DMA map\n"); 525 goto fail; 526 } 527 } 528 529 return 0; 530 fail: 531 /* We free all, it handles case where we are in the middle */ 532 ixgbe_free_transmit_structures(adapter); 533 534 return (error); 535 } /* ixgbe_allocate_transmit_buffers */ 536 537 /************************************************************************ 538 * ixgbe_setup_transmit_ring - Initialize a transmit ring. 539 ************************************************************************/ 540 static void 541 ixgbe_setup_transmit_ring(struct tx_ring *txr) 542 { 543 struct adapter *adapter = txr->adapter; 544 struct ixgbe_tx_buf *txbuf; 545 #ifdef DEV_NETMAP 546 struct netmap_adapter *na = NA(adapter->ifp); 547 struct netmap_slot *slot; 548 #endif /* DEV_NETMAP */ 549 550 /* Clear the old ring contents */ 551 IXGBE_TX_LOCK(txr); 552 553 #ifdef DEV_NETMAP 554 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) { 555 /* 556 * (under lock): if in netmap mode, do some consistency 557 * checks and set slot to entry 0 of the netmap ring. 558 */ 559 slot = netmap_reset(na, NR_TX, txr->me, 0); 560 } 561 #endif /* DEV_NETMAP */ 562 563 bzero((void *)txr->tx_base, 564 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 565 /* Reset indices */ 566 txr->next_avail_desc = 0; 567 txr->next_to_clean = 0; 568 569 /* Free any existing tx buffers. */ 570 txbuf = txr->tx_buffers; 571 for (int i = 0; i < txr->num_desc; i++, txbuf++) { 572 if (txbuf->m_head != NULL) { 573 bus_dmamap_sync(txr->txtag, txbuf->map, 574 BUS_DMASYNC_POSTWRITE); 575 bus_dmamap_unload(txr->txtag, txbuf->map); 576 m_freem(txbuf->m_head); 577 txbuf->m_head = NULL; 578 } 579 580 #ifdef DEV_NETMAP 581 /* 582 * In netmap mode, set the map for the packet buffer. 583 * NOTE: Some drivers (not this one) also need to set 584 * the physical buffer address in the NIC ring. 585 * Slots in the netmap ring (indexed by "si") are 586 * kring->nkr_hwofs positions "ahead" wrt the 587 * corresponding slot in the NIC ring. In some drivers 588 * (not here) nkr_hwofs can be negative. Function 589 * netmap_idx_n2k() handles wraparounds properly. 590 */ 591 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) { 592 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 593 netmap_load_map(na, txr->txtag, 594 txbuf->map, NMB(na, slot + si)); 595 } 596 #endif /* DEV_NETMAP */ 597 598 /* Clear the EOP descriptor pointer */ 599 txbuf->eop = NULL; 600 } 601 602 /* Set the rate at which we sample packets */ 603 if (adapter->feat_en & IXGBE_FEATURE_FDIR) 604 txr->atr_sample = atr_sample_rate; 605 606 /* Set number of descriptors available */ 607 txr->tx_avail = adapter->num_tx_desc; 608 609 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 610 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 611 IXGBE_TX_UNLOCK(txr); 612 } /* ixgbe_setup_transmit_ring */ 613 614 /************************************************************************ 615 * ixgbe_setup_transmit_structures - Initialize all transmit rings. 616 ************************************************************************/ 617 int 618 ixgbe_setup_transmit_structures(struct adapter *adapter) 619 { 620 struct tx_ring *txr = adapter->tx_rings; 621 622 for (int i = 0; i < adapter->num_queues; i++, txr++) 623 ixgbe_setup_transmit_ring(txr); 624 625 return (0); 626 } /* ixgbe_setup_transmit_structures */ 627 628 /************************************************************************ 629 * ixgbe_free_transmit_structures - Free all transmit rings. 630 ************************************************************************/ 631 void 632 ixgbe_free_transmit_structures(struct adapter *adapter) 633 { 634 struct tx_ring *txr = adapter->tx_rings; 635 636 for (int i = 0; i < adapter->num_queues; i++, txr++) { 637 IXGBE_TX_LOCK(txr); 638 ixgbe_free_transmit_buffers(txr); 639 ixgbe_dma_free(adapter, &txr->txdma); 640 IXGBE_TX_UNLOCK(txr); 641 IXGBE_TX_LOCK_DESTROY(txr); 642 } 643 free(adapter->tx_rings, M_DEVBUF); 644 } /* ixgbe_free_transmit_structures */ 645 646 /************************************************************************ 647 * ixgbe_free_transmit_buffers 648 * 649 * Free transmit ring related data structures. 650 ************************************************************************/ 651 static void 652 ixgbe_free_transmit_buffers(struct tx_ring *txr) 653 { 654 struct adapter *adapter = txr->adapter; 655 struct ixgbe_tx_buf *tx_buffer; 656 int i; 657 658 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); 659 660 if (txr->tx_buffers == NULL) 661 return; 662 663 tx_buffer = txr->tx_buffers; 664 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 665 if (tx_buffer->m_head != NULL) { 666 bus_dmamap_sync(txr->txtag, tx_buffer->map, 667 BUS_DMASYNC_POSTWRITE); 668 bus_dmamap_unload(txr->txtag, tx_buffer->map); 669 m_freem(tx_buffer->m_head); 670 tx_buffer->m_head = NULL; 671 if (tx_buffer->map != NULL) { 672 bus_dmamap_destroy(txr->txtag, tx_buffer->map); 673 tx_buffer->map = NULL; 674 } 675 } else if (tx_buffer->map != NULL) { 676 bus_dmamap_unload(txr->txtag, tx_buffer->map); 677 bus_dmamap_destroy(txr->txtag, tx_buffer->map); 678 tx_buffer->map = NULL; 679 } 680 } 681 if (txr->br != NULL) 682 buf_ring_free(txr->br, M_DEVBUF); 683 if (txr->tx_buffers != NULL) { 684 free(txr->tx_buffers, M_DEVBUF); 685 txr->tx_buffers = NULL; 686 } 687 if (txr->txtag != NULL) { 688 bus_dma_tag_destroy(txr->txtag); 689 txr->txtag = NULL; 690 } 691 } /* ixgbe_free_transmit_buffers */ 692 693 /************************************************************************ 694 * ixgbe_tx_ctx_setup 695 * 696 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 697 ************************************************************************/ 698 static int 699 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 700 u32 *cmd_type_len, u32 *olinfo_status) 701 { 702 struct ixgbe_adv_tx_context_desc *TXD; 703 struct ether_vlan_header *eh; 704 #ifdef INET 705 struct ip *ip; 706 #endif 707 #ifdef INET6 708 struct ip6_hdr *ip6; 709 #endif 710 int ehdrlen, ip_hlen = 0; 711 int offload = TRUE; 712 int ctxd = txr->next_avail_desc; 713 u32 vlan_macip_lens = 0; 714 u32 type_tucmd_mlhl = 0; 715 u16 vtag = 0; 716 u16 etype; 717 u8 ipproto = 0; 718 caddr_t l3d; 719 720 721 /* First check if TSO is to be used */ 722 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) 723 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 724 725 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 726 offload = FALSE; 727 728 /* Indicate the whole packet as payload when not doing TSO */ 729 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 730 731 /* Now ready a context descriptor */ 732 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd]; 733 734 /* 735 * In advanced descriptors the vlan tag must 736 * be placed into the context descriptor. Hence 737 * we need to make one even if not doing offloads. 738 */ 739 if (mp->m_flags & M_VLANTAG) { 740 vtag = htole16(mp->m_pkthdr.ether_vtag); 741 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 742 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) && 743 (offload == FALSE)) 744 return (0); 745 746 /* 747 * Determine where frame payload starts. 748 * Jump over vlan headers if already present, 749 * helpful for QinQ too. 750 */ 751 eh = mtod(mp, struct ether_vlan_header *); 752 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 753 etype = ntohs(eh->evl_proto); 754 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 755 } else { 756 etype = ntohs(eh->evl_encap_proto); 757 ehdrlen = ETHER_HDR_LEN; 758 } 759 760 /* Set the ether header length */ 761 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 762 763 if (offload == FALSE) 764 goto no_offloads; 765 766 /* 767 * If the first mbuf only includes the ethernet header, 768 * jump to the next one 769 * XXX: This assumes the stack splits mbufs containing headers 770 * on header boundaries 771 * XXX: And assumes the entire IP header is contained in one mbuf 772 */ 773 if (mp->m_len == ehdrlen && mp->m_next) 774 l3d = mtod(mp->m_next, caddr_t); 775 else 776 l3d = mtod(mp, caddr_t) + ehdrlen; 777 778 switch (etype) { 779 #ifdef INET 780 case ETHERTYPE_IP: 781 ip = (struct ip *)(l3d); 782 ip_hlen = ip->ip_hl << 2; 783 ipproto = ip->ip_p; 784 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 785 /* Insert IPv4 checksum into data descriptors */ 786 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 787 ip->ip_sum = 0; 788 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 789 } 790 break; 791 #endif 792 #ifdef INET6 793 case ETHERTYPE_IPV6: 794 ip6 = (struct ip6_hdr *)(l3d); 795 ip_hlen = sizeof(struct ip6_hdr); 796 ipproto = ip6->ip6_nxt; 797 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 798 break; 799 #endif 800 default: 801 offload = FALSE; 802 break; 803 } 804 805 vlan_macip_lens |= ip_hlen; 806 807 /* No support for offloads for non-L4 next headers */ 808 switch (ipproto) { 809 case IPPROTO_TCP: 810 if (mp->m_pkthdr.csum_flags & 811 (CSUM_IP_TCP | CSUM_IP6_TCP)) 812 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 813 else 814 offload = false; 815 break; 816 case IPPROTO_UDP: 817 if (mp->m_pkthdr.csum_flags & 818 (CSUM_IP_UDP | CSUM_IP6_UDP)) 819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 820 else 821 offload = false; 822 break; 823 case IPPROTO_SCTP: 824 if (mp->m_pkthdr.csum_flags & 825 (CSUM_IP_SCTP | CSUM_IP6_SCTP)) 826 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 827 else 828 offload = false; 829 break; 830 default: 831 offload = false; 832 break; 833 } 834 835 if (offload) /* Insert L4 checksum into data descriptors */ 836 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 837 838 no_offloads: 839 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 840 841 /* Now copy bits into descriptor */ 842 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 843 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 844 TXD->seqnum_seed = htole32(0); 845 TXD->mss_l4len_idx = htole32(0); 846 847 /* We've consumed the first desc, adjust counters */ 848 if (++ctxd == txr->num_desc) 849 ctxd = 0; 850 txr->next_avail_desc = ctxd; 851 --txr->tx_avail; 852 853 return (0); 854 } /* ixgbe_tx_ctx_setup */ 855 856 /************************************************************************ 857 * ixgbe_tso_setup 858 * 859 * Setup work for hardware segmentation offload (TSO) on 860 * adapters using advanced tx descriptors 861 ************************************************************************/ 862 static int 863 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, 864 u32 *olinfo_status) 865 { 866 struct ixgbe_adv_tx_context_desc *TXD; 867 struct ether_vlan_header *eh; 868 #ifdef INET6 869 struct ip6_hdr *ip6; 870 #endif 871 #ifdef INET 872 struct ip *ip; 873 #endif 874 struct tcphdr *th; 875 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 876 u32 vlan_macip_lens = 0; 877 u32 type_tucmd_mlhl = 0; 878 u32 mss_l4len_idx = 0, paylen; 879 u16 vtag = 0, eh_type; 880 881 /* 882 * Determine where frame payload starts. 883 * Jump over vlan headers if already present 884 */ 885 eh = mtod(mp, struct ether_vlan_header *); 886 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 887 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 888 eh_type = eh->evl_proto; 889 } else { 890 ehdrlen = ETHER_HDR_LEN; 891 eh_type = eh->evl_encap_proto; 892 } 893 894 switch (ntohs(eh_type)) { 895 #ifdef INET 896 case ETHERTYPE_IP: 897 ip = (struct ip *)(mp->m_data + ehdrlen); 898 if (ip->ip_p != IPPROTO_TCP) 899 return (ENXIO); 900 ip->ip_sum = 0; 901 ip_hlen = ip->ip_hl << 2; 902 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 903 th->th_sum = in_pseudo(ip->ip_src.s_addr, 904 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 905 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 906 /* Tell transmit desc to also do IPv4 checksum. */ 907 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 908 break; 909 #endif 910 #ifdef INET6 911 case ETHERTYPE_IPV6: 912 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 913 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 914 if (ip6->ip6_nxt != IPPROTO_TCP) 915 return (ENXIO); 916 ip_hlen = sizeof(struct ip6_hdr); 917 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 918 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 919 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 920 break; 921 #endif 922 default: 923 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 924 __func__, ntohs(eh_type)); 925 break; 926 } 927 928 ctxd = txr->next_avail_desc; 929 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd]; 930 931 tcp_hlen = th->th_off << 2; 932 933 /* This is used in the transmit desc in encap */ 934 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 935 936 /* VLAN MACLEN IPLEN */ 937 if (mp->m_flags & M_VLANTAG) { 938 vtag = htole16(mp->m_pkthdr.ether_vtag); 939 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 940 } 941 942 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 943 vlan_macip_lens |= ip_hlen; 944 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 945 946 /* ADV DTYPE TUCMD */ 947 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 948 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 949 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 950 951 /* MSS L4LEN IDX */ 952 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 953 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 954 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 955 956 TXD->seqnum_seed = htole32(0); 957 958 if (++ctxd == txr->num_desc) 959 ctxd = 0; 960 961 txr->tx_avail--; 962 txr->next_avail_desc = ctxd; 963 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 964 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 965 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 966 ++txr->tso_tx; 967 968 return (0); 969 } /* ixgbe_tso_setup */ 970 971 972 /************************************************************************ 973 * ixgbe_txeof 974 * 975 * Examine each tx_buffer in the used queue. If the hardware is done 976 * processing the packet then free associated resources. The 977 * tx_buffer is put back on the free queue. 978 ************************************************************************/ 979 void 980 ixgbe_txeof(struct tx_ring *txr) 981 { 982 struct adapter *adapter = txr->adapter; 983 struct ixgbe_tx_buf *buf; 984 union ixgbe_adv_tx_desc *txd; 985 u32 work, processed = 0; 986 u32 limit = adapter->tx_process_limit; 987 988 mtx_assert(&txr->tx_mtx, MA_OWNED); 989 990 #ifdef DEV_NETMAP 991 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && 992 (adapter->ifp->if_capenable & IFCAP_NETMAP)) { 993 struct netmap_adapter *na = NA(adapter->ifp); 994 struct netmap_kring *kring = &na->tx_rings[txr->me]; 995 txd = txr->tx_base; 996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 997 BUS_DMASYNC_POSTREAD); 998 /* 999 * In netmap mode, all the work is done in the context 1000 * of the client thread. Interrupt handlers only wake up 1001 * clients, which may be sleeping on individual rings 1002 * or on a global resource for all rings. 1003 * To implement tx interrupt mitigation, we wake up the client 1004 * thread roughly every half ring, even if the NIC interrupts 1005 * more frequently. This is implemented as follows: 1006 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1007 * the slot that should wake up the thread (nkr_num_slots 1008 * means the user thread should not be woken up); 1009 * - the driver ignores tx interrupts unless netmap_mitigate=0 1010 * or the slot has the DD bit set. 1011 */ 1012 if (!netmap_mitigate || 1013 (kring->nr_kflags < kring->nkr_num_slots && 1014 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1015 netmap_tx_irq(adapter->ifp, txr->me); 1016 } 1017 return; 1018 } 1019 #endif /* DEV_NETMAP */ 1020 1021 if (txr->tx_avail == txr->num_desc) { 1022 txr->busy = 0; 1023 return; 1024 } 1025 1026 /* Get work starting point */ 1027 work = txr->next_to_clean; 1028 buf = &txr->tx_buffers[work]; 1029 txd = &txr->tx_base[work]; 1030 work -= txr->num_desc; /* The distance to ring end */ 1031 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1032 BUS_DMASYNC_POSTREAD); 1033 1034 do { 1035 union ixgbe_adv_tx_desc *eop = buf->eop; 1036 if (eop == NULL) /* No work */ 1037 break; 1038 1039 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1040 break; /* I/O not complete */ 1041 1042 if (buf->m_head) { 1043 txr->bytes += buf->m_head->m_pkthdr.len; 1044 bus_dmamap_sync(txr->txtag, buf->map, 1045 BUS_DMASYNC_POSTWRITE); 1046 bus_dmamap_unload(txr->txtag, buf->map); 1047 m_freem(buf->m_head); 1048 buf->m_head = NULL; 1049 } 1050 buf->eop = NULL; 1051 ++txr->tx_avail; 1052 1053 /* We clean the range if multi segment */ 1054 while (txd != eop) { 1055 ++txd; 1056 ++buf; 1057 ++work; 1058 /* wrap the ring? */ 1059 if (__predict_false(!work)) { 1060 work -= txr->num_desc; 1061 buf = txr->tx_buffers; 1062 txd = txr->tx_base; 1063 } 1064 if (buf->m_head) { 1065 txr->bytes += buf->m_head->m_pkthdr.len; 1066 bus_dmamap_sync(txr->txtag, buf->map, 1067 BUS_DMASYNC_POSTWRITE); 1068 bus_dmamap_unload(txr->txtag, buf->map); 1069 m_freem(buf->m_head); 1070 buf->m_head = NULL; 1071 } 1072 ++txr->tx_avail; 1073 buf->eop = NULL; 1074 1075 } 1076 ++txr->packets; 1077 ++processed; 1078 1079 /* Try the next packet */ 1080 ++txd; 1081 ++buf; 1082 ++work; 1083 /* reset with a wrap */ 1084 if (__predict_false(!work)) { 1085 work -= txr->num_desc; 1086 buf = txr->tx_buffers; 1087 txd = txr->tx_base; 1088 } 1089 prefetch(txd); 1090 } while (__predict_true(--limit)); 1091 1092 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1093 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1094 1095 work += txr->num_desc; 1096 txr->next_to_clean = work; 1097 1098 /* 1099 * Queue Hang detection, we know there's 1100 * work outstanding or the first return 1101 * would have been taken, so increment busy 1102 * if nothing managed to get cleaned, then 1103 * in local_timer it will be checked and 1104 * marked as HUNG if it exceeds a MAX attempt. 1105 */ 1106 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1107 ++txr->busy; 1108 /* 1109 * If anything gets cleaned we reset state to 1, 1110 * note this will turn off HUNG if its set. 1111 */ 1112 if (processed) 1113 txr->busy = 1; 1114 1115 if (txr->tx_avail == txr->num_desc) 1116 txr->busy = 0; 1117 1118 return; 1119 } /* ixgbe_txeof */ 1120 1121 /************************************************************************ 1122 * ixgbe_rsc_count 1123 * 1124 * Used to detect a descriptor that has been merged by Hardware RSC. 1125 ************************************************************************/ 1126 static inline u32 1127 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1128 { 1129 return (le32toh(rx->wb.lower.lo_dword.data) & 1130 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1131 } /* ixgbe_rsc_count */ 1132 1133 /************************************************************************ 1134 * ixgbe_setup_hw_rsc 1135 * 1136 * Initialize Hardware RSC (LRO) feature on 82599 1137 * for an RX ring, this is toggled by the LRO capability 1138 * even though it is transparent to the stack. 1139 * 1140 * NOTE: Since this HW feature only works with IPv4 and 1141 * testing has shown soft LRO to be as effective, 1142 * this feature will be disabled by default. 1143 ************************************************************************/ 1144 static void 1145 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1146 { 1147 struct adapter *adapter = rxr->adapter; 1148 struct ixgbe_hw *hw = &adapter->hw; 1149 u32 rscctrl, rdrxctl; 1150 1151 /* If turning LRO/RSC off we need to disable it */ 1152 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1153 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1154 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1155 return; 1156 } 1157 1158 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1159 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1160 #ifdef DEV_NETMAP 1161 /* Always strip CRC unless Netmap disabled it */ 1162 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) || 1163 !(adapter->ifp->if_capenable & IFCAP_NETMAP) || 1164 ix_crcstrip) 1165 #endif /* DEV_NETMAP */ 1166 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1167 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1168 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1169 1170 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1171 rscctrl |= IXGBE_RSCCTL_RSCEN; 1172 /* 1173 * Limit the total number of descriptors that 1174 * can be combined, so it does not exceed 64K 1175 */ 1176 if (rxr->mbuf_sz == MCLBYTES) 1177 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1178 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1179 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1180 else if (rxr->mbuf_sz == MJUM9BYTES) 1181 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1182 else /* Using 16K cluster */ 1183 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1184 1185 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1186 1187 /* Enable TCP header recognition */ 1188 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1189 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR)); 1190 1191 /* Disable RSC for ACK packets */ 1192 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1193 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1194 1195 rxr->hw_rsc = TRUE; 1196 } /* ixgbe_setup_hw_rsc */ 1197 1198 /************************************************************************ 1199 * ixgbe_refresh_mbufs 1200 * 1201 * Refresh mbuf buffers for RX descriptor rings 1202 * - now keeps its own state so discards due to resource 1203 * exhaustion are unnecessary, if an mbuf cannot be obtained 1204 * it just returns, keeping its placeholder, thus it can simply 1205 * be recalled to try again. 1206 ************************************************************************/ 1207 static void 1208 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1209 { 1210 struct adapter *adapter = rxr->adapter; 1211 struct ixgbe_rx_buf *rxbuf; 1212 struct mbuf *mp; 1213 bus_dma_segment_t seg[1]; 1214 int i, j, nsegs, error; 1215 bool refreshed = FALSE; 1216 1217 i = j = rxr->next_to_refresh; 1218 /* Control the loop with one beyond */ 1219 if (++j == rxr->num_desc) 1220 j = 0; 1221 1222 while (j != limit) { 1223 rxbuf = &rxr->rx_buffers[i]; 1224 if (rxbuf->buf == NULL) { 1225 mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, 1226 rxr->mbuf_sz); 1227 if (mp == NULL) 1228 goto update; 1229 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1230 m_adj(mp, ETHER_ALIGN); 1231 } else 1232 mp = rxbuf->buf; 1233 1234 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1235 1236 /* If we're dealing with an mbuf that was copied rather 1237 * than replaced, there's no need to go through busdma. 1238 */ 1239 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1240 /* Get the memory mapping */ 1241 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 1242 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, 1243 mp, seg, &nsegs, BUS_DMA_NOWAIT); 1244 if (error != 0) { 1245 printf("Refresh mbufs: payload dmamap load failure - %d\n", error); 1246 m_free(mp); 1247 rxbuf->buf = NULL; 1248 goto update; 1249 } 1250 rxbuf->buf = mp; 1251 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 1252 BUS_DMASYNC_PREREAD); 1253 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1254 htole64(seg[0].ds_addr); 1255 } else { 1256 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1257 rxbuf->flags &= ~IXGBE_RX_COPY; 1258 } 1259 1260 refreshed = TRUE; 1261 /* Next is precalculated */ 1262 i = j; 1263 rxr->next_to_refresh = i; 1264 if (++j == rxr->num_desc) 1265 j = 0; 1266 } 1267 1268 update: 1269 if (refreshed) /* Update hardware tail index */ 1270 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh); 1271 1272 return; 1273 } /* ixgbe_refresh_mbufs */ 1274 1275 /************************************************************************ 1276 * ixgbe_allocate_receive_buffers 1277 * 1278 * Allocate memory for rx_buffer structures. Since we use one 1279 * rx_buffer per received packet, the maximum number of rx_buffer's 1280 * that we'll need is equal to the number of receive descriptors 1281 * that we've allocated. 1282 ************************************************************************/ 1283 static int 1284 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1285 { 1286 struct adapter *adapter = rxr->adapter; 1287 device_t dev = adapter->dev; 1288 struct ixgbe_rx_buf *rxbuf; 1289 int bsize, error; 1290 1291 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1292 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF, 1293 M_NOWAIT | M_ZERO); 1294 if (rxr->rx_buffers == NULL) { 1295 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1296 error = ENOMEM; 1297 goto fail; 1298 } 1299 1300 error = bus_dma_tag_create( 1301 /* parent */ bus_get_dma_tag(dev), 1302 /* alignment */ 1, 1303 /* bounds */ 0, 1304 /* lowaddr */ BUS_SPACE_MAXADDR, 1305 /* highaddr */ BUS_SPACE_MAXADDR, 1306 /* filter */ NULL, 1307 /* filterarg */ NULL, 1308 /* maxsize */ MJUM16BYTES, 1309 /* nsegments */ 1, 1310 /* maxsegsize */ MJUM16BYTES, 1311 /* flags */ 0, 1312 /* lockfunc */ NULL, 1313 /* lockfuncarg */ NULL, 1314 &rxr->ptag); 1315 if (error != 0) { 1316 device_printf(dev, "Unable to create RX DMA tag\n"); 1317 goto fail; 1318 } 1319 1320 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { 1321 rxbuf = &rxr->rx_buffers[i]; 1322 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1323 if (error) { 1324 device_printf(dev, "Unable to create RX dma map\n"); 1325 goto fail; 1326 } 1327 } 1328 1329 return (0); 1330 1331 fail: 1332 /* Frees all, but can handle partial completion */ 1333 ixgbe_free_receive_structures(adapter); 1334 1335 return (error); 1336 } /* ixgbe_allocate_receive_buffers */ 1337 1338 /************************************************************************ 1339 * ixgbe_free_receive_ring 1340 ************************************************************************/ 1341 static void 1342 ixgbe_free_receive_ring(struct rx_ring *rxr) 1343 { 1344 for (int i = 0; i < rxr->num_desc; i++) { 1345 ixgbe_rx_discard(rxr, i); 1346 } 1347 } /* ixgbe_free_receive_ring */ 1348 1349 /************************************************************************ 1350 * ixgbe_setup_receive_ring 1351 * 1352 * Initialize a receive ring and its buffers. 1353 ************************************************************************/ 1354 static int 1355 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1356 { 1357 struct adapter *adapter; 1358 struct ifnet *ifp; 1359 device_t dev; 1360 struct ixgbe_rx_buf *rxbuf; 1361 struct lro_ctrl *lro = &rxr->lro; 1362 #ifdef DEV_NETMAP 1363 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1364 struct netmap_slot *slot; 1365 #endif /* DEV_NETMAP */ 1366 bus_dma_segment_t seg[1]; 1367 int rsize, nsegs, error = 0; 1368 1369 adapter = rxr->adapter; 1370 ifp = adapter->ifp; 1371 dev = adapter->dev; 1372 1373 /* Clear the ring contents */ 1374 IXGBE_RX_LOCK(rxr); 1375 1376 #ifdef DEV_NETMAP 1377 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) 1378 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1379 #endif /* DEV_NETMAP */ 1380 1381 rsize = roundup2(adapter->num_rx_desc * 1382 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1383 bzero((void *)rxr->rx_base, rsize); 1384 /* Cache the size */ 1385 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1386 1387 /* Free current RX buffer structs and their mbufs */ 1388 ixgbe_free_receive_ring(rxr); 1389 1390 /* Now replenish the mbufs */ 1391 for (int j = 0; j != rxr->num_desc; ++j) { 1392 struct mbuf *mp; 1393 1394 rxbuf = &rxr->rx_buffers[j]; 1395 1396 #ifdef DEV_NETMAP 1397 /* 1398 * In netmap mode, fill the map and set the buffer 1399 * address in the NIC ring, considering the offset 1400 * between the netmap and NIC rings (see comment in 1401 * ixgbe_setup_transmit_ring() ). No need to allocate 1402 * an mbuf, so end the block with a continue; 1403 */ 1404 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) { 1405 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1406 uint64_t paddr; 1407 void *addr; 1408 1409 addr = PNMB(na, slot + sj, &paddr); 1410 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1411 /* Update descriptor and the cached value */ 1412 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1413 rxbuf->addr = htole64(paddr); 1414 continue; 1415 } 1416 #endif /* DEV_NETMAP */ 1417 1418 rxbuf->flags = 0; 1419 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, 1420 adapter->rx_mbuf_sz); 1421 if (rxbuf->buf == NULL) { 1422 error = ENOBUFS; 1423 goto fail; 1424 } 1425 mp = rxbuf->buf; 1426 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1427 /* Get the memory mapping */ 1428 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, 1429 &nsegs, BUS_DMA_NOWAIT); 1430 if (error != 0) 1431 goto fail; 1432 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); 1433 /* Update the descriptor and the cached value */ 1434 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); 1435 rxbuf->addr = htole64(seg[0].ds_addr); 1436 } 1437 1438 1439 /* Setup our descriptor indices */ 1440 rxr->next_to_check = 0; 1441 rxr->next_to_refresh = 0; 1442 rxr->lro_enabled = FALSE; 1443 rxr->rx_copies = 0; 1444 rxr->rx_bytes = 0; 1445 rxr->vtag_strip = FALSE; 1446 1447 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1448 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1449 1450 /* 1451 * Now set up the LRO interface 1452 */ 1453 if (ixgbe_rsc_enable) 1454 ixgbe_setup_hw_rsc(rxr); 1455 else if (ifp->if_capenable & IFCAP_LRO) { 1456 int err = tcp_lro_init(lro); 1457 if (err) { 1458 device_printf(dev, "LRO Initialization failed!\n"); 1459 goto fail; 1460 } 1461 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1462 rxr->lro_enabled = TRUE; 1463 lro->ifp = adapter->ifp; 1464 } 1465 1466 IXGBE_RX_UNLOCK(rxr); 1467 1468 return (0); 1469 1470 fail: 1471 ixgbe_free_receive_ring(rxr); 1472 IXGBE_RX_UNLOCK(rxr); 1473 1474 return (error); 1475 } /* ixgbe_setup_receive_ring */ 1476 1477 /************************************************************************ 1478 * ixgbe_setup_receive_structures - Initialize all receive rings. 1479 ************************************************************************/ 1480 int 1481 ixgbe_setup_receive_structures(struct adapter *adapter) 1482 { 1483 struct rx_ring *rxr = adapter->rx_rings; 1484 int j; 1485 1486 for (j = 0; j < adapter->num_queues; j++, rxr++) 1487 if (ixgbe_setup_receive_ring(rxr)) 1488 goto fail; 1489 1490 return (0); 1491 fail: 1492 /* 1493 * Free RX buffers allocated so far, we will only handle 1494 * the rings that completed, the failing case will have 1495 * cleaned up for itself. 'j' failed, so its the terminus. 1496 */ 1497 for (int i = 0; i < j; ++i) { 1498 rxr = &adapter->rx_rings[i]; 1499 IXGBE_RX_LOCK(rxr); 1500 ixgbe_free_receive_ring(rxr); 1501 IXGBE_RX_UNLOCK(rxr); 1502 } 1503 1504 return (ENOBUFS); 1505 } /* ixgbe_setup_receive_structures */ 1506 1507 1508 /************************************************************************ 1509 * ixgbe_free_receive_structures - Free all receive rings. 1510 ************************************************************************/ 1511 void 1512 ixgbe_free_receive_structures(struct adapter *adapter) 1513 { 1514 struct rx_ring *rxr = adapter->rx_rings; 1515 1516 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1517 1518 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1519 ixgbe_free_receive_buffers(rxr); 1520 /* Free LRO memory */ 1521 tcp_lro_free(&rxr->lro); 1522 /* Free the ring memory as well */ 1523 ixgbe_dma_free(adapter, &rxr->rxdma); 1524 } 1525 1526 free(adapter->rx_rings, M_DEVBUF); 1527 } /* ixgbe_free_receive_structures */ 1528 1529 1530 /************************************************************************ 1531 * ixgbe_free_receive_buffers - Free receive ring data structures 1532 ************************************************************************/ 1533 static void 1534 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1535 { 1536 struct adapter *adapter = rxr->adapter; 1537 struct ixgbe_rx_buf *rxbuf; 1538 1539 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1540 1541 /* Cleanup any existing buffers */ 1542 if (rxr->rx_buffers != NULL) { 1543 for (int i = 0; i < adapter->num_rx_desc; i++) { 1544 rxbuf = &rxr->rx_buffers[i]; 1545 ixgbe_rx_discard(rxr, i); 1546 if (rxbuf->pmap != NULL) { 1547 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1548 rxbuf->pmap = NULL; 1549 } 1550 } 1551 if (rxr->rx_buffers != NULL) { 1552 free(rxr->rx_buffers, M_DEVBUF); 1553 rxr->rx_buffers = NULL; 1554 } 1555 } 1556 1557 if (rxr->ptag != NULL) { 1558 bus_dma_tag_destroy(rxr->ptag); 1559 rxr->ptag = NULL; 1560 } 1561 1562 return; 1563 } /* ixgbe_free_receive_buffers */ 1564 1565 /************************************************************************ 1566 * ixgbe_rx_input 1567 ************************************************************************/ 1568 static __inline void 1569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, 1570 u32 ptype) 1571 { 1572 /* 1573 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1574 * should be computed by hardware. Also it should not have VLAN tag in 1575 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1576 */ 1577 if (rxr->lro_enabled && 1578 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1579 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1580 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1581 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1582 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1583 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1584 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1585 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1586 /* 1587 * Send to the stack if: 1588 * - LRO not enabled, or 1589 * - no LRO resources, or 1590 * - lro enqueue fails 1591 */ 1592 if (rxr->lro.lro_cnt != 0) 1593 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1594 return; 1595 } 1596 (*ifp->if_input)(ifp, m); 1597 } /* ixgbe_rx_input */ 1598 1599 /************************************************************************ 1600 * ixgbe_rx_discard 1601 ************************************************************************/ 1602 static __inline void 1603 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1604 { 1605 struct ixgbe_rx_buf *rbuf; 1606 1607 rbuf = &rxr->rx_buffers[i]; 1608 1609 /* 1610 * With advanced descriptors the writeback 1611 * clobbers the buffer addrs, so its easier 1612 * to just free the existing mbufs and take 1613 * the normal refresh path to get new buffers 1614 * and mapping. 1615 */ 1616 1617 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1618 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1619 m_freem(rbuf->fmp); 1620 rbuf->fmp = NULL; 1621 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1622 } else if (rbuf->buf) { 1623 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1624 m_free(rbuf->buf); 1625 rbuf->buf = NULL; 1626 } 1627 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1628 1629 rbuf->flags = 0; 1630 1631 return; 1632 } /* ixgbe_rx_discard */ 1633 1634 1635 /************************************************************************ 1636 * ixgbe_rxeof 1637 * 1638 * Executes in interrupt context. It replenishes the 1639 * mbufs in the descriptor and sends data which has 1640 * been dma'ed into host memory to upper layer. 1641 * 1642 * Return TRUE for more work, FALSE for all clean. 1643 ************************************************************************/ 1644 bool 1645 ixgbe_rxeof(struct ix_queue *que) 1646 { 1647 struct adapter *adapter = que->adapter; 1648 struct rx_ring *rxr = que->rxr; 1649 struct ifnet *ifp = adapter->ifp; 1650 struct lro_ctrl *lro = &rxr->lro; 1651 union ixgbe_adv_rx_desc *cur; 1652 struct ixgbe_rx_buf *rbuf, *nbuf; 1653 int i, nextp, processed = 0; 1654 u32 staterr = 0; 1655 u32 count = adapter->rx_process_limit; 1656 u16 pkt_info; 1657 1658 IXGBE_RX_LOCK(rxr); 1659 1660 #ifdef DEV_NETMAP 1661 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) { 1662 /* Same as the txeof routine: wakeup clients on intr. */ 1663 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1664 IXGBE_RX_UNLOCK(rxr); 1665 return (FALSE); 1666 } 1667 } 1668 #endif /* DEV_NETMAP */ 1669 1670 for (i = rxr->next_to_check; count != 0;) { 1671 struct mbuf *sendmp, *mp; 1672 u32 rsc, ptype; 1673 u16 len; 1674 u16 vtag = 0; 1675 bool eop; 1676 1677 /* Sync the ring. */ 1678 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1679 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1680 1681 cur = &rxr->rx_base[i]; 1682 staterr = le32toh(cur->wb.upper.status_error); 1683 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1684 1685 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1686 break; 1687 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1688 break; 1689 1690 count--; 1691 sendmp = NULL; 1692 nbuf = NULL; 1693 rsc = 0; 1694 cur->wb.upper.status_error = 0; 1695 rbuf = &rxr->rx_buffers[i]; 1696 mp = rbuf->buf; 1697 1698 len = le16toh(cur->wb.upper.length); 1699 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1700 IXGBE_RXDADV_PKTTYPE_MASK; 1701 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1702 1703 /* Make sure bad packets are discarded */ 1704 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1705 #if __FreeBSD_version >= 1100036 1706 if (adapter->feat_en & IXGBE_FEATURE_VF) 1707 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1708 #endif 1709 rxr->rx_discarded++; 1710 ixgbe_rx_discard(rxr, i); 1711 goto next_desc; 1712 } 1713 1714 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1715 1716 /* 1717 * On 82599 which supports a hardware 1718 * LRO (called HW RSC), packets need 1719 * not be fragmented across sequential 1720 * descriptors, rather the next descriptor 1721 * is indicated in bits of the descriptor. 1722 * This also means that we might proceses 1723 * more than one packet at a time, something 1724 * that has never been true before, it 1725 * required eliminating global chain pointers 1726 * in favor of what we are doing here. -jfv 1727 */ 1728 if (!eop) { 1729 /* 1730 * Figure out the next descriptor 1731 * of this frame. 1732 */ 1733 if (rxr->hw_rsc == TRUE) { 1734 rsc = ixgbe_rsc_count(cur); 1735 rxr->rsc_num += (rsc - 1); 1736 } 1737 if (rsc) { /* Get hardware index */ 1738 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >> 1739 IXGBE_RXDADV_NEXTP_SHIFT); 1740 } else { /* Just sequential */ 1741 nextp = i + 1; 1742 if (nextp == adapter->num_rx_desc) 1743 nextp = 0; 1744 } 1745 nbuf = &rxr->rx_buffers[nextp]; 1746 prefetch(nbuf); 1747 } 1748 /* 1749 * Rather than using the fmp/lmp global pointers 1750 * we now keep the head of a packet chain in the 1751 * buffer struct and pass this along from one 1752 * descriptor to the next, until we get EOP. 1753 */ 1754 mp->m_len = len; 1755 /* 1756 * See if there is a stored head 1757 * that determines what we are 1758 */ 1759 sendmp = rbuf->fmp; 1760 if (sendmp != NULL) { /* secondary frag */ 1761 rbuf->buf = rbuf->fmp = NULL; 1762 mp->m_flags &= ~M_PKTHDR; 1763 sendmp->m_pkthdr.len += mp->m_len; 1764 } else { 1765 /* 1766 * Optimize. This might be a small packet, 1767 * maybe just a TCP ACK. Do a fast copy that 1768 * is cache aligned into a new mbuf, and 1769 * leave the old mbuf+cluster for re-use. 1770 */ 1771 if (eop && len <= IXGBE_RX_COPY_LEN) { 1772 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1773 if (sendmp != NULL) { 1774 sendmp->m_data += IXGBE_RX_COPY_ALIGN; 1775 ixgbe_bcopy(mp->m_data, sendmp->m_data, 1776 len); 1777 sendmp->m_len = len; 1778 rxr->rx_copies++; 1779 rbuf->flags |= IXGBE_RX_COPY; 1780 } 1781 } 1782 if (sendmp == NULL) { 1783 rbuf->buf = rbuf->fmp = NULL; 1784 sendmp = mp; 1785 } 1786 1787 /* first desc of a non-ps chain */ 1788 sendmp->m_flags |= M_PKTHDR; 1789 sendmp->m_pkthdr.len = mp->m_len; 1790 } 1791 ++processed; 1792 1793 /* Pass the head pointer on */ 1794 if (eop == 0) { 1795 nbuf->fmp = sendmp; 1796 sendmp = NULL; 1797 mp->m_next = nbuf->buf; 1798 } else { /* Sending this frame */ 1799 sendmp->m_pkthdr.rcvif = ifp; 1800 rxr->rx_packets++; 1801 /* capture data for AIM */ 1802 rxr->bytes += sendmp->m_pkthdr.len; 1803 rxr->rx_bytes += sendmp->m_pkthdr.len; 1804 /* Process vlan info */ 1805 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) 1806 vtag = le16toh(cur->wb.upper.vlan); 1807 if (vtag) { 1808 sendmp->m_pkthdr.ether_vtag = vtag; 1809 sendmp->m_flags |= M_VLANTAG; 1810 } 1811 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1812 ixgbe_rx_checksum(staterr, sendmp, ptype); 1813 1814 /* 1815 * In case of multiqueue, we have RXCSUM.PCSD bit set 1816 * and never cleared. This means we have RSS hash 1817 * available to be used. 1818 */ 1819 if (adapter->num_queues > 1) { 1820 sendmp->m_pkthdr.flowid = 1821 le32toh(cur->wb.lower.hi_dword.rss); 1822 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1823 case IXGBE_RXDADV_RSSTYPE_IPV4: 1824 M_HASHTYPE_SET(sendmp, 1825 M_HASHTYPE_RSS_IPV4); 1826 break; 1827 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1828 M_HASHTYPE_SET(sendmp, 1829 M_HASHTYPE_RSS_TCP_IPV4); 1830 break; 1831 case IXGBE_RXDADV_RSSTYPE_IPV6: 1832 M_HASHTYPE_SET(sendmp, 1833 M_HASHTYPE_RSS_IPV6); 1834 break; 1835 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1836 M_HASHTYPE_SET(sendmp, 1837 M_HASHTYPE_RSS_TCP_IPV6); 1838 break; 1839 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1840 M_HASHTYPE_SET(sendmp, 1841 M_HASHTYPE_RSS_IPV6_EX); 1842 break; 1843 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1844 M_HASHTYPE_SET(sendmp, 1845 M_HASHTYPE_RSS_TCP_IPV6_EX); 1846 break; 1847 #if __FreeBSD_version > 1100000 1848 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 1849 M_HASHTYPE_SET(sendmp, 1850 M_HASHTYPE_RSS_UDP_IPV4); 1851 break; 1852 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 1853 M_HASHTYPE_SET(sendmp, 1854 M_HASHTYPE_RSS_UDP_IPV6); 1855 break; 1856 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 1857 M_HASHTYPE_SET(sendmp, 1858 M_HASHTYPE_RSS_UDP_IPV6_EX); 1859 break; 1860 #endif 1861 default: 1862 M_HASHTYPE_SET(sendmp, 1863 M_HASHTYPE_OPAQUE_HASH); 1864 } 1865 } else { 1866 sendmp->m_pkthdr.flowid = que->msix; 1867 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1868 } 1869 } 1870 next_desc: 1871 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1872 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1873 1874 /* Advance our pointers to the next descriptor. */ 1875 if (++i == rxr->num_desc) 1876 i = 0; 1877 1878 /* Now send to the stack or do LRO */ 1879 if (sendmp != NULL) { 1880 rxr->next_to_check = i; 1881 IXGBE_RX_UNLOCK(rxr); 1882 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 1883 IXGBE_RX_LOCK(rxr); 1884 i = rxr->next_to_check; 1885 } 1886 1887 /* Every 8 descriptors we go to refresh mbufs */ 1888 if (processed == 8) { 1889 ixgbe_refresh_mbufs(rxr, i); 1890 processed = 0; 1891 } 1892 } 1893 1894 /* Refresh any remaining buf structs */ 1895 if (ixgbe_rx_unrefreshed(rxr)) 1896 ixgbe_refresh_mbufs(rxr, i); 1897 1898 rxr->next_to_check = i; 1899 1900 IXGBE_RX_UNLOCK(rxr); 1901 1902 /* 1903 * Flush any outstanding LRO work 1904 */ 1905 tcp_lro_flush_all(lro); 1906 1907 /* 1908 * Still have cleaning to do? 1909 */ 1910 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 1911 return (TRUE); 1912 1913 return (FALSE); 1914 } /* ixgbe_rxeof */ 1915 1916 1917 /************************************************************************ 1918 * ixgbe_rx_checksum 1919 * 1920 * Verify that the hardware indicated that the checksum is valid. 1921 * Inform the stack about the status of checksum so that stack 1922 * doesn't spend time verifying the checksum. 1923 ************************************************************************/ 1924 static void 1925 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) 1926 { 1927 u16 status = (u16)staterr; 1928 u8 errors = (u8)(staterr >> 24); 1929 bool sctp = false; 1930 1931 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1932 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 1933 sctp = true; 1934 1935 /* IPv4 checksum */ 1936 if (status & IXGBE_RXD_STAT_IPCS) { 1937 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC; 1938 /* IP Checksum Good */ 1939 if (!(errors & IXGBE_RXD_ERR_IPE)) 1940 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID; 1941 } 1942 /* TCP/UDP/SCTP checksum */ 1943 if (status & IXGBE_RXD_STAT_L4CS) { 1944 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC; 1945 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 1946 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID; 1947 if (!sctp) 1948 mp->m_pkthdr.csum_data = htons(0xffff); 1949 } 1950 } 1951 } /* ixgbe_rx_checksum */ 1952 1953 /************************************************************************ 1954 * ixgbe_dmamap_cb - Manage DMA'able memory. 1955 ************************************************************************/ 1956 static void 1957 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) 1958 { 1959 if (error) 1960 return; 1961 *(bus_addr_t *)arg = segs->ds_addr; 1962 1963 return; 1964 } /* ixgbe_dmamap_cb */ 1965 1966 /************************************************************************ 1967 * ixgbe_dma_malloc 1968 ************************************************************************/ 1969 static int 1970 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, 1971 struct ixgbe_dma_alloc *dma, int mapflags) 1972 { 1973 device_t dev = adapter->dev; 1974 int r; 1975 1976 r = bus_dma_tag_create( 1977 /* parent */ bus_get_dma_tag(adapter->dev), 1978 /* alignment */ DBA_ALIGN, 1979 /* bounds */ 0, 1980 /* lowaddr */ BUS_SPACE_MAXADDR, 1981 /* highaddr */ BUS_SPACE_MAXADDR, 1982 /* filter */ NULL, 1983 /* filterarg */ NULL, 1984 /* maxsize */ size, 1985 /* nsegments */ 1, 1986 /* maxsegsize */ size, 1987 /* flags */ BUS_DMA_ALLOCNOW, 1988 /* lockfunc */ NULL, 1989 /* lockfuncarg */ NULL, 1990 &dma->dma_tag); 1991 if (r != 0) { 1992 device_printf(dev, 1993 "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n", 1994 r); 1995 goto fail_0; 1996 } 1997 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, 1998 BUS_DMA_NOWAIT, &dma->dma_map); 1999 if (r != 0) { 2000 device_printf(dev, 2001 "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r); 2002 goto fail_1; 2003 } 2004 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, 2005 ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 2006 if (r != 0) { 2007 device_printf(dev, 2008 "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r); 2009 goto fail_2; 2010 } 2011 dma->dma_size = size; 2012 2013 return (0); 2014 fail_2: 2015 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2016 fail_1: 2017 bus_dma_tag_destroy(dma->dma_tag); 2018 fail_0: 2019 dma->dma_tag = NULL; 2020 2021 return (r); 2022 } /* ixgbe_dma_malloc */ 2023 2024 /************************************************************************ 2025 * ixgbe_dma_free 2026 ************************************************************************/ 2027 static void 2028 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2029 { 2030 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 2031 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2032 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 2033 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 2034 bus_dma_tag_destroy(dma->dma_tag); 2035 } /* ixgbe_dma_free */ 2036 2037 2038 /************************************************************************ 2039 * ixgbe_allocate_queues 2040 * 2041 * Allocate memory for the transmit and receive rings, and then 2042 * the descriptors associated with each, called only once at attach. 2043 ************************************************************************/ 2044 int 2045 ixgbe_allocate_queues(struct adapter *adapter) 2046 { 2047 device_t dev = adapter->dev; 2048 struct ix_queue *que; 2049 struct tx_ring *txr; 2050 struct rx_ring *rxr; 2051 int rsize, tsize, error = IXGBE_SUCCESS; 2052 int txconf = 0, rxconf = 0; 2053 2054 /* First, allocate the top level queue structs */ 2055 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) * 2056 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); 2057 if (adapter->queues == NULL) { 2058 device_printf(dev, "Unable to allocate queue memory\n"); 2059 error = ENOMEM; 2060 goto fail; 2061 } 2062 2063 /* Second, allocate the TX ring struct memory */ 2064 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) * 2065 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); 2066 if (adapter->tx_rings == NULL) { 2067 device_printf(dev, "Unable to allocate TX ring memory\n"); 2068 error = ENOMEM; 2069 goto tx_fail; 2070 } 2071 2072 /* Third, allocate the RX ring */ 2073 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) * 2074 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); 2075 if (adapter->rx_rings == NULL) { 2076 device_printf(dev, "Unable to allocate RX ring memory\n"); 2077 error = ENOMEM; 2078 goto rx_fail; 2079 } 2080 2081 /* For the ring itself */ 2082 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc), 2083 DBA_ALIGN); 2084 2085 /* 2086 * Now set up the TX queues, txconf is needed to handle the 2087 * possibility that things fail midcourse and we need to 2088 * undo memory gracefully 2089 */ 2090 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2091 /* Set up some basics */ 2092 txr = &adapter->tx_rings[i]; 2093 txr->adapter = adapter; 2094 txr->br = NULL; 2095 /* In case SR-IOV is enabled, align the index properly */ 2096 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, 2097 i); 2098 txr->num_desc = adapter->num_tx_desc; 2099 2100 /* Initialize the TX side lock */ 2101 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2102 device_get_nameunit(dev), txr->me); 2103 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 2104 2105 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma, 2106 BUS_DMA_NOWAIT)) { 2107 device_printf(dev, 2108 "Unable to allocate TX Descriptor memory\n"); 2109 error = ENOMEM; 2110 goto err_tx_desc; 2111 } 2112 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2113 bzero((void *)txr->tx_base, tsize); 2114 2115 /* Now allocate transmit buffers for the ring */ 2116 if (ixgbe_allocate_transmit_buffers(txr)) { 2117 device_printf(dev, 2118 "Critical Failure setting up transmit buffers\n"); 2119 error = ENOMEM; 2120 goto err_tx_desc; 2121 } 2122 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) { 2123 /* Allocate a buf ring */ 2124 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, 2125 M_WAITOK, &txr->tx_mtx); 2126 if (txr->br == NULL) { 2127 device_printf(dev, 2128 "Critical Failure setting up buf ring\n"); 2129 error = ENOMEM; 2130 goto err_tx_desc; 2131 } 2132 } 2133 } 2134 2135 /* 2136 * Next the RX queues... 2137 */ 2138 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), 2139 DBA_ALIGN); 2140 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2141 rxr = &adapter->rx_rings[i]; 2142 /* Set up some basics */ 2143 rxr->adapter = adapter; 2144 /* In case SR-IOV is enabled, align the index properly */ 2145 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, 2146 i); 2147 rxr->num_desc = adapter->num_rx_desc; 2148 2149 /* Initialize the RX side lock */ 2150 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2151 device_get_nameunit(dev), rxr->me); 2152 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 2153 2154 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma, 2155 BUS_DMA_NOWAIT)) { 2156 device_printf(dev, 2157 "Unable to allocate RxDescriptor memory\n"); 2158 error = ENOMEM; 2159 goto err_rx_desc; 2160 } 2161 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2162 bzero((void *)rxr->rx_base, rsize); 2163 2164 /* Allocate receive buffers for the ring */ 2165 if (ixgbe_allocate_receive_buffers(rxr)) { 2166 device_printf(dev, 2167 "Critical Failure setting up receive buffers\n"); 2168 error = ENOMEM; 2169 goto err_rx_desc; 2170 } 2171 } 2172 2173 /* 2174 * Finally set up the queue holding structs 2175 */ 2176 for (int i = 0; i < adapter->num_queues; i++) { 2177 que = &adapter->queues[i]; 2178 que->adapter = adapter; 2179 que->me = i; 2180 que->txr = &adapter->tx_rings[i]; 2181 que->rxr = &adapter->rx_rings[i]; 2182 } 2183 2184 return (0); 2185 2186 err_rx_desc: 2187 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2188 ixgbe_dma_free(adapter, &rxr->rxdma); 2189 err_tx_desc: 2190 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2191 ixgbe_dma_free(adapter, &txr->txdma); 2192 free(adapter->rx_rings, M_DEVBUF); 2193 rx_fail: 2194 free(adapter->tx_rings, M_DEVBUF); 2195 tx_fail: 2196 free(adapter->queues, M_DEVBUF); 2197 fail: 2198 return (error); 2199 } /* ixgbe_allocate_queues */ 2200