1 /****************************************************************************** 2 3 Copyright (c) 2013-2017, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 static void ixl_queue_sw_irq(struct ixl_vsi *, int); 61 62 static inline void ixl_rx_discard(struct rx_ring *, int); 63 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 64 struct mbuf *, u8); 65 66 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 67 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 68 69 #ifdef DEV_NETMAP 70 #include <dev/netmap/if_ixl_netmap.h> 71 #if __FreeBSD_version >= 1200000 72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 73 #endif 74 #endif /* DEV_NETMAP */ 75 76 /* 77 * @key key is saved into this parameter 78 */ 79 void 80 ixl_get_default_rss_key(u32 *key) 81 { 82 MPASS(key != NULL); 83 84 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 85 0x183cfd8c, 0xce880440, 0x580cbc3c, 86 0x35897377, 0x328b25e1, 0x4fa98922, 87 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 88 0x0, 0x0, 0x0}; 89 90 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 91 } 92 93 /** 94 * i40e_vc_stat_str - convert virtchnl status err code to a string 95 * @hw: pointer to the HW structure 96 * @stat_err: the status error code to convert 97 **/ 98 const char * 99 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err) 100 { 101 switch (stat_err) { 102 case VIRTCHNL_STATUS_SUCCESS: 103 return "OK"; 104 case VIRTCHNL_ERR_PARAM: 105 return "VIRTCHNL_ERR_PARAM"; 106 case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: 107 return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH"; 108 case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: 109 return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR"; 110 case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: 111 return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID"; 112 case VIRTCHNL_STATUS_NOT_SUPPORTED: 113 return "VIRTCHNL_STATUS_NOT_SUPPORTED"; 114 } 115 116 snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err); 117 return hw->err_str; 118 } 119 120 /* 121 * PCI BUSMASTER needs to be set for proper operation. 122 */ 123 void 124 ixl_set_busmaster(device_t dev) 125 { 126 u16 pci_cmd_word; 127 128 pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 129 pci_cmd_word |= PCIM_CMD_BUSMASTEREN; 130 pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); 131 } 132 133 /* 134 * Rewrite the ENABLE bit in the MSIX control register 135 */ 136 void 137 ixl_set_msix_enable(device_t dev) 138 { 139 int msix_ctrl, rid; 140 141 pci_find_cap(dev, PCIY_MSIX, &rid); 142 rid += PCIR_MSIX_CTRL; 143 msix_ctrl = pci_read_config(dev, rid, 2); 144 msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; 145 pci_write_config(dev, rid, msix_ctrl, 2); 146 } 147 148 149 /* 150 ** Multiqueue Transmit driver 151 */ 152 int 153 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 154 { 155 struct ixl_vsi *vsi = ifp->if_softc; 156 struct ixl_queue *que; 157 struct tx_ring *txr; 158 int err, i; 159 #ifdef RSS 160 u32 bucket_id; 161 #endif 162 163 /* 164 * Which queue to use: 165 * 166 * When doing RSS, map it to the same outbound 167 * queue as the incoming flow would be mapped to. 168 * If everything is setup correctly, it should be 169 * the same bucket that the current CPU we're on is. 170 */ 171 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 172 #ifdef RSS 173 if (rss_hash2bucket(m->m_pkthdr.flowid, 174 M_HASHTYPE_GET(m), &bucket_id) == 0) { 175 i = bucket_id % vsi->num_queues; 176 } else 177 #endif 178 i = m->m_pkthdr.flowid % vsi->num_queues; 179 } else 180 i = curcpu % vsi->num_queues; 181 182 que = &vsi->queues[i]; 183 txr = &que->txr; 184 185 err = drbr_enqueue(ifp, txr->br, m); 186 if (err) 187 return (err); 188 if (IXL_TX_TRYLOCK(txr)) { 189 ixl_mq_start_locked(ifp, txr); 190 IXL_TX_UNLOCK(txr); 191 } else 192 taskqueue_enqueue(que->tq, &que->tx_task); 193 194 return (0); 195 } 196 197 int 198 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 199 { 200 struct ixl_queue *que = txr->que; 201 struct ixl_vsi *vsi = que->vsi; 202 struct mbuf *next; 203 int err = 0; 204 205 206 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 207 vsi->link_active == 0) 208 return (ENETDOWN); 209 210 /* Process the transmit queue */ 211 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 212 if ((err = ixl_xmit(que, &next)) != 0) { 213 if (next == NULL) 214 drbr_advance(ifp, txr->br); 215 else 216 drbr_putback(ifp, txr->br, next); 217 break; 218 } 219 drbr_advance(ifp, txr->br); 220 /* Send a copy of the frame to the BPF listener */ 221 ETHER_BPF_MTAP(ifp, next); 222 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 223 break; 224 } 225 226 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 227 ixl_txeof(que); 228 229 return (err); 230 } 231 232 /* 233 * Called from a taskqueue to drain queued transmit packets. 234 */ 235 void 236 ixl_deferred_mq_start(void *arg, int pending) 237 { 238 struct ixl_queue *que = arg; 239 struct tx_ring *txr = &que->txr; 240 struct ixl_vsi *vsi = que->vsi; 241 struct ifnet *ifp = vsi->ifp; 242 243 IXL_TX_LOCK(txr); 244 if (!drbr_empty(ifp, txr->br)) 245 ixl_mq_start_locked(ifp, txr); 246 IXL_TX_UNLOCK(txr); 247 } 248 249 /* 250 ** Flush all queue ring buffers 251 */ 252 void 253 ixl_qflush(struct ifnet *ifp) 254 { 255 struct ixl_vsi *vsi = ifp->if_softc; 256 257 for (int i = 0; i < vsi->num_queues; i++) { 258 struct ixl_queue *que = &vsi->queues[i]; 259 struct tx_ring *txr = &que->txr; 260 struct mbuf *m; 261 IXL_TX_LOCK(txr); 262 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 263 m_freem(m); 264 IXL_TX_UNLOCK(txr); 265 } 266 if_qflush(ifp); 267 } 268 269 static inline bool 270 ixl_tso_detect_sparse(struct mbuf *mp) 271 { 272 struct mbuf *m; 273 int num, mss; 274 275 num = 0; 276 mss = mp->m_pkthdr.tso_segsz; 277 278 /* Exclude first mbuf; assume it contains all headers */ 279 for (m = mp->m_next; m != NULL; m = m->m_next) { 280 if (m == NULL) 281 break; 282 num++; 283 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 284 285 if (num > IXL_SPARSE_CHAIN) 286 return (true); 287 if (mss < 1) { 288 num = (mss == 0) ? 0 : 1; 289 mss += mp->m_pkthdr.tso_segsz; 290 } 291 } 292 293 return (false); 294 } 295 296 297 /********************************************************************* 298 * 299 * This routine maps the mbufs to tx descriptors, allowing the 300 * TX engine to transmit the packets. 301 * - return 0 on success, positive on failure 302 * 303 **********************************************************************/ 304 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 305 306 static int 307 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 308 { 309 struct ixl_vsi *vsi = que->vsi; 310 struct i40e_hw *hw = vsi->hw; 311 struct tx_ring *txr = &que->txr; 312 struct ixl_tx_buf *buf; 313 struct i40e_tx_desc *txd = NULL; 314 struct mbuf *m_head, *m; 315 int i, j, error, nsegs; 316 int first, last = 0; 317 u16 vtag = 0; 318 u32 cmd, off; 319 bus_dmamap_t map; 320 bus_dma_tag_t tag; 321 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 322 323 cmd = off = 0; 324 m_head = *m_headp; 325 326 /* 327 * Important to capture the first descriptor 328 * used because it will contain the index of 329 * the one we tell the hardware to report back 330 */ 331 first = txr->next_avail; 332 buf = &txr->buffers[first]; 333 map = buf->map; 334 tag = txr->tx_tag; 335 336 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 337 /* Use larger mapping for TSO */ 338 tag = txr->tso_tag; 339 if (ixl_tso_detect_sparse(m_head)) { 340 m = m_defrag(m_head, M_NOWAIT); 341 if (m == NULL) { 342 m_freem(*m_headp); 343 *m_headp = NULL; 344 return (ENOBUFS); 345 } 346 *m_headp = m; 347 } 348 } 349 350 /* 351 * Map the packet for DMA. 352 */ 353 error = bus_dmamap_load_mbuf_sg(tag, map, 354 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 355 356 if (error == EFBIG) { 357 struct mbuf *m; 358 359 m = m_defrag(*m_headp, M_NOWAIT); 360 if (m == NULL) { 361 que->mbuf_defrag_failed++; 362 m_freem(*m_headp); 363 *m_headp = NULL; 364 return (ENOBUFS); 365 } 366 *m_headp = m; 367 368 /* Try it again */ 369 error = bus_dmamap_load_mbuf_sg(tag, map, 370 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 371 372 if (error != 0) { 373 que->tx_dmamap_failed++; 374 m_freem(*m_headp); 375 *m_headp = NULL; 376 return (error); 377 } 378 } else if (error != 0) { 379 que->tx_dmamap_failed++; 380 m_freem(*m_headp); 381 *m_headp = NULL; 382 return (error); 383 } 384 385 /* Make certain there are enough descriptors */ 386 if (nsegs > txr->avail - 2) { 387 txr->no_desc++; 388 error = ENOBUFS; 389 goto xmit_fail; 390 } 391 m_head = *m_headp; 392 393 /* Set up the TSO/CSUM offload */ 394 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 395 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 396 if (error) 397 goto xmit_fail; 398 } 399 400 cmd |= I40E_TX_DESC_CMD_ICRC; 401 /* Grab the VLAN tag */ 402 if (m_head->m_flags & M_VLANTAG) { 403 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 404 vtag = htole16(m_head->m_pkthdr.ether_vtag); 405 } 406 407 i = txr->next_avail; 408 for (j = 0; j < nsegs; j++) { 409 bus_size_t seglen; 410 411 buf = &txr->buffers[i]; 412 buf->tag = tag; /* Keep track of the type tag */ 413 txd = &txr->base[i]; 414 seglen = segs[j].ds_len; 415 416 txd->buffer_addr = htole64(segs[j].ds_addr); 417 txd->cmd_type_offset_bsz = 418 htole64(I40E_TX_DESC_DTYPE_DATA 419 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 420 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 421 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 422 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 423 424 last = i; /* descriptor that will get completion IRQ */ 425 426 if (++i == que->num_tx_desc) 427 i = 0; 428 429 buf->m_head = NULL; 430 buf->eop_index = -1; 431 } 432 /* Set the last descriptor for report */ 433 txd->cmd_type_offset_bsz |= 434 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 435 txr->avail -= nsegs; 436 txr->next_avail = i; 437 438 buf->m_head = m_head; 439 /* Swap the dma map between the first and last descriptor. 440 * The descriptor that gets checked on completion will now 441 * have the real map from the first descriptor. 442 */ 443 txr->buffers[first].map = buf->map; 444 buf->map = map; 445 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 446 447 /* Set the index of the descriptor that will be marked done */ 448 buf = &txr->buffers[first]; 449 buf->eop_index = last; 450 451 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 452 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 453 /* 454 * Advance the Transmit Descriptor Tail (Tdt), this tells the 455 * hardware that this frame is available to transmit. 456 */ 457 ++txr->total_packets; 458 wr32(hw, txr->tail, i); 459 460 /* Mark outstanding work */ 461 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 462 return (0); 463 464 xmit_fail: 465 bus_dmamap_unload(tag, buf->map); 466 return (error); 467 } 468 469 470 /********************************************************************* 471 * 472 * Allocate memory for tx_buffer structures. The tx_buffer stores all 473 * the information needed to transmit a packet on the wire. This is 474 * called only once at attach, setup is done every reset. 475 * 476 **********************************************************************/ 477 int 478 ixl_allocate_tx_data(struct ixl_queue *que) 479 { 480 struct tx_ring *txr = &que->txr; 481 struct ixl_vsi *vsi = que->vsi; 482 device_t dev = vsi->dev; 483 struct ixl_tx_buf *buf; 484 int i, error = 0; 485 486 /* 487 * Setup DMA descriptor areas. 488 */ 489 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 490 1, 0, /* alignment, bounds */ 491 BUS_SPACE_MAXADDR, /* lowaddr */ 492 BUS_SPACE_MAXADDR, /* highaddr */ 493 NULL, NULL, /* filter, filterarg */ 494 IXL_TSO_SIZE, /* maxsize */ 495 IXL_MAX_TX_SEGS, /* nsegments */ 496 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */ 497 0, /* flags */ 498 NULL, /* lockfunc */ 499 NULL, /* lockfuncarg */ 500 &txr->tx_tag))) { 501 device_printf(dev,"Unable to allocate TX DMA tag\n"); 502 return (error); 503 } 504 505 /* Make a special tag for TSO */ 506 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 507 1, 0, /* alignment, bounds */ 508 BUS_SPACE_MAXADDR, /* lowaddr */ 509 BUS_SPACE_MAXADDR, /* highaddr */ 510 NULL, NULL, /* filter, filterarg */ 511 IXL_TSO_SIZE, /* maxsize */ 512 IXL_MAX_TSO_SEGS, /* nsegments */ 513 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */ 514 0, /* flags */ 515 NULL, /* lockfunc */ 516 NULL, /* lockfuncarg */ 517 &txr->tso_tag))) { 518 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 519 goto free_tx_dma; 520 } 521 522 if (!(txr->buffers = 523 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 524 que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 525 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 526 error = ENOMEM; 527 goto free_tx_tso_dma; 528 } 529 530 /* Create the descriptor buffer default dma maps */ 531 buf = txr->buffers; 532 for (i = 0; i < que->num_tx_desc; i++, buf++) { 533 buf->tag = txr->tx_tag; 534 error = bus_dmamap_create(buf->tag, 0, &buf->map); 535 if (error != 0) { 536 device_printf(dev, "Unable to create TX DMA map\n"); 537 goto free_buffers; 538 } 539 } 540 541 return 0; 542 543 free_buffers: 544 while (i--) { 545 buf--; 546 bus_dmamap_destroy(buf->tag, buf->map); 547 } 548 549 free(txr->buffers, M_DEVBUF); 550 txr->buffers = NULL; 551 free_tx_tso_dma: 552 bus_dma_tag_destroy(txr->tso_tag); 553 txr->tso_tag = NULL; 554 free_tx_dma: 555 bus_dma_tag_destroy(txr->tx_tag); 556 txr->tx_tag = NULL; 557 558 return (error); 559 } 560 561 562 /********************************************************************* 563 * 564 * (Re)Initialize a queue transmit ring. 565 * - called by init, it clears the descriptor ring, 566 * and frees any stale mbufs 567 * 568 **********************************************************************/ 569 void 570 ixl_init_tx_ring(struct ixl_queue *que) 571 { 572 #ifdef DEV_NETMAP 573 struct netmap_adapter *na = NA(que->vsi->ifp); 574 struct netmap_slot *slot; 575 #endif /* DEV_NETMAP */ 576 struct tx_ring *txr = &que->txr; 577 struct ixl_tx_buf *buf; 578 579 /* Clear the old ring contents */ 580 IXL_TX_LOCK(txr); 581 582 #ifdef DEV_NETMAP 583 /* 584 * (under lock): if in netmap mode, do some consistency 585 * checks and set slot to entry 0 of the netmap ring. 586 */ 587 slot = netmap_reset(na, NR_TX, que->me, 0); 588 #endif /* DEV_NETMAP */ 589 590 bzero((void *)txr->base, 591 (sizeof(struct i40e_tx_desc)) * que->num_tx_desc); 592 593 /* Reset indices */ 594 txr->next_avail = 0; 595 txr->next_to_clean = 0; 596 597 /* Reset watchdog status */ 598 txr->watchdog_timer = 0; 599 600 /* Free any existing tx mbufs. */ 601 buf = txr->buffers; 602 for (int i = 0; i < que->num_tx_desc; i++, buf++) { 603 if (buf->m_head != NULL) { 604 bus_dmamap_sync(buf->tag, buf->map, 605 BUS_DMASYNC_POSTWRITE); 606 bus_dmamap_unload(buf->tag, buf->map); 607 m_freem(buf->m_head); 608 buf->m_head = NULL; 609 } 610 #ifdef DEV_NETMAP 611 /* 612 * In netmap mode, set the map for the packet buffer. 613 * NOTE: Some drivers (not this one) also need to set 614 * the physical buffer address in the NIC ring. 615 * netmap_idx_n2k() maps a nic index, i, into the corresponding 616 * netmap slot index, si 617 */ 618 if (slot) { 619 int si = netmap_idx_n2k(na->tx_rings[que->me], i); 620 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 621 } 622 #endif /* DEV_NETMAP */ 623 /* Clear the EOP index */ 624 buf->eop_index = -1; 625 } 626 627 /* Set number of descriptors available */ 628 txr->avail = que->num_tx_desc; 629 630 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 631 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 632 IXL_TX_UNLOCK(txr); 633 } 634 635 636 /********************************************************************* 637 * 638 * Free transmit ring related data structures. 639 * 640 **********************************************************************/ 641 void 642 ixl_free_que_tx(struct ixl_queue *que) 643 { 644 struct tx_ring *txr = &que->txr; 645 struct ixl_tx_buf *buf; 646 647 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 648 649 for (int i = 0; i < que->num_tx_desc; i++) { 650 buf = &txr->buffers[i]; 651 if (buf->m_head != NULL) { 652 bus_dmamap_sync(buf->tag, buf->map, 653 BUS_DMASYNC_POSTWRITE); 654 m_freem(buf->m_head); 655 buf->m_head = NULL; 656 } 657 bus_dmamap_unload(buf->tag, buf->map); 658 bus_dmamap_destroy(buf->tag, buf->map); 659 } 660 if (txr->buffers != NULL) { 661 free(txr->buffers, M_DEVBUF); 662 txr->buffers = NULL; 663 } 664 if (txr->tx_tag != NULL) { 665 bus_dma_tag_destroy(txr->tx_tag); 666 txr->tx_tag = NULL; 667 } 668 if (txr->tso_tag != NULL) { 669 bus_dma_tag_destroy(txr->tso_tag); 670 txr->tso_tag = NULL; 671 } 672 673 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 674 return; 675 } 676 677 /********************************************************************* 678 * 679 * Setup descriptor for hw offloads 680 * 681 **********************************************************************/ 682 683 static int 684 ixl_tx_setup_offload(struct ixl_queue *que, 685 struct mbuf *mp, u32 *cmd, u32 *off) 686 { 687 struct ether_vlan_header *eh; 688 #ifdef INET 689 struct ip *ip = NULL; 690 #endif 691 struct tcphdr *th = NULL; 692 #ifdef INET6 693 struct ip6_hdr *ip6; 694 #endif 695 int elen, ip_hlen = 0, tcp_hlen; 696 u16 etype; 697 u8 ipproto = 0; 698 bool tso = FALSE; 699 700 /* Set up the TSO context descriptor if required */ 701 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 702 tso = ixl_tso_setup(que, mp); 703 if (tso) 704 ++que->tso; 705 else 706 return (ENXIO); 707 } 708 709 /* 710 * Determine where frame payload starts. 711 * Jump over vlan headers if already present, 712 * helpful for QinQ too. 713 */ 714 eh = mtod(mp, struct ether_vlan_header *); 715 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 716 etype = ntohs(eh->evl_proto); 717 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 718 } else { 719 etype = ntohs(eh->evl_encap_proto); 720 elen = ETHER_HDR_LEN; 721 } 722 723 switch (etype) { 724 #ifdef INET 725 case ETHERTYPE_IP: 726 ip = (struct ip *)(mp->m_data + elen); 727 ip_hlen = ip->ip_hl << 2; 728 ipproto = ip->ip_p; 729 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 730 /* The IP checksum must be recalculated with TSO */ 731 if (tso) 732 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 733 else 734 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 735 break; 736 #endif 737 #ifdef INET6 738 case ETHERTYPE_IPV6: 739 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 740 ip_hlen = sizeof(struct ip6_hdr); 741 ipproto = ip6->ip6_nxt; 742 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 743 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 744 break; 745 #endif 746 default: 747 break; 748 } 749 750 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 751 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 752 753 switch (ipproto) { 754 case IPPROTO_TCP: 755 tcp_hlen = th->th_off << 2; 756 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 757 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 758 *off |= (tcp_hlen >> 2) << 759 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 760 } 761 break; 762 case IPPROTO_UDP: 763 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 764 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 765 *off |= (sizeof(struct udphdr) >> 2) << 766 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 767 } 768 break; 769 case IPPROTO_SCTP: 770 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 771 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 772 *off |= (sizeof(struct sctphdr) >> 2) << 773 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 774 } 775 /* Fall Thru */ 776 default: 777 break; 778 } 779 780 return (0); 781 } 782 783 784 /********************************************************************** 785 * 786 * Setup context for hardware segmentation offload (TSO) 787 * 788 **********************************************************************/ 789 static bool 790 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 791 { 792 struct tx_ring *txr = &que->txr; 793 struct i40e_tx_context_desc *TXD; 794 struct ixl_tx_buf *buf; 795 u32 cmd, mss, type, tsolen; 796 u16 etype; 797 int idx, elen, ip_hlen, tcp_hlen; 798 struct ether_vlan_header *eh; 799 #ifdef INET 800 struct ip *ip; 801 #endif 802 #ifdef INET6 803 struct ip6_hdr *ip6; 804 #endif 805 #if defined(INET6) || defined(INET) 806 struct tcphdr *th; 807 #endif 808 u64 type_cmd_tso_mss; 809 810 /* 811 * Determine where frame payload starts. 812 * Jump over vlan headers if already present 813 */ 814 eh = mtod(mp, struct ether_vlan_header *); 815 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 816 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 817 etype = eh->evl_proto; 818 } else { 819 elen = ETHER_HDR_LEN; 820 etype = eh->evl_encap_proto; 821 } 822 823 switch (ntohs(etype)) { 824 #ifdef INET6 825 case ETHERTYPE_IPV6: 826 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 827 if (ip6->ip6_nxt != IPPROTO_TCP) 828 return (ENXIO); 829 ip_hlen = sizeof(struct ip6_hdr); 830 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 831 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 832 tcp_hlen = th->th_off << 2; 833 /* 834 * The corresponding flag is set by the stack in the IPv4 835 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 836 * So, set it here because the rest of the flow requires it. 837 */ 838 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 839 break; 840 #endif 841 #ifdef INET 842 case ETHERTYPE_IP: 843 ip = (struct ip *)(mp->m_data + elen); 844 if (ip->ip_p != IPPROTO_TCP) 845 return (ENXIO); 846 ip->ip_sum = 0; 847 ip_hlen = ip->ip_hl << 2; 848 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 849 th->th_sum = in_pseudo(ip->ip_src.s_addr, 850 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 851 tcp_hlen = th->th_off << 2; 852 break; 853 #endif 854 default: 855 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 856 __func__, ntohs(etype)); 857 return FALSE; 858 } 859 860 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 861 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 862 return FALSE; 863 864 idx = txr->next_avail; 865 buf = &txr->buffers[idx]; 866 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 867 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 868 869 type = I40E_TX_DESC_DTYPE_CONTEXT; 870 cmd = I40E_TX_CTX_DESC_TSO; 871 /* TSO MSS must not be less than 64 */ 872 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 873 que->mss_too_small++; 874 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 875 } 876 mss = mp->m_pkthdr.tso_segsz; 877 878 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 879 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 880 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 881 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 882 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 883 884 TXD->tunneling_params = htole32(0); 885 buf->m_head = NULL; 886 buf->eop_index = -1; 887 888 if (++idx == que->num_tx_desc) 889 idx = 0; 890 891 txr->avail--; 892 txr->next_avail = idx; 893 894 return TRUE; 895 } 896 897 /* 898 * ixl_get_tx_head - Retrieve the value from the 899 * location the HW records its HEAD index 900 */ 901 static inline u32 902 ixl_get_tx_head(struct ixl_queue *que) 903 { 904 struct tx_ring *txr = &que->txr; 905 void *head = &txr->base[que->num_tx_desc]; 906 return LE32_TO_CPU(*(volatile __le32 *)head); 907 } 908 909 /********************************************************************** 910 * 911 * Get index of last used descriptor/buffer from hardware, and clean 912 * the descriptors/buffers up to that index. 913 * 914 **********************************************************************/ 915 static bool 916 ixl_txeof_hwb(struct ixl_queue *que) 917 { 918 struct tx_ring *txr = &que->txr; 919 u32 first, last, head, done; 920 struct ixl_tx_buf *buf; 921 struct i40e_tx_desc *tx_desc, *eop_desc; 922 923 mtx_assert(&txr->mtx, MA_OWNED); 924 925 #ifdef DEV_NETMAP 926 // XXX todo: implement moderation 927 if (netmap_tx_irq(que->vsi->ifp, que->me)) 928 return FALSE; 929 #endif /* DEF_NETMAP */ 930 931 /* These are not the descriptors you seek, move along :) */ 932 if (txr->avail == que->num_tx_desc) { 933 atomic_store_rel_32(&txr->watchdog_timer, 0); 934 return FALSE; 935 } 936 937 first = txr->next_to_clean; 938 buf = &txr->buffers[first]; 939 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 940 last = buf->eop_index; 941 if (last == -1) 942 return FALSE; 943 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 944 945 /* Sync DMA before reading head index from ring */ 946 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 947 BUS_DMASYNC_POSTREAD); 948 949 /* Get the Head WB value */ 950 head = ixl_get_tx_head(que); 951 952 /* 953 ** Get the index of the first descriptor 954 ** BEYOND the EOP and call that 'done'. 955 ** I do this so the comparison in the 956 ** inner while loop below can be simple 957 */ 958 if (++last == que->num_tx_desc) last = 0; 959 done = last; 960 961 /* 962 ** The HEAD index of the ring is written in a 963 ** defined location, this rather than a done bit 964 ** is what is used to keep track of what must be 965 ** 'cleaned'. 966 */ 967 while (first != head) { 968 /* We clean the range of the packet */ 969 while (first != done) { 970 ++txr->avail; 971 972 if (buf->m_head) { 973 txr->bytes += /* for ITR adjustment */ 974 buf->m_head->m_pkthdr.len; 975 txr->tx_bytes += /* for TX stats */ 976 buf->m_head->m_pkthdr.len; 977 bus_dmamap_sync(buf->tag, 978 buf->map, 979 BUS_DMASYNC_POSTWRITE); 980 bus_dmamap_unload(buf->tag, 981 buf->map); 982 m_freem(buf->m_head); 983 buf->m_head = NULL; 984 } 985 buf->eop_index = -1; 986 987 if (++first == que->num_tx_desc) 988 first = 0; 989 990 buf = &txr->buffers[first]; 991 tx_desc = &txr->base[first]; 992 } 993 ++txr->packets; 994 /* If a packet was successfully cleaned, reset the watchdog timer */ 995 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 996 /* See if there is more work now */ 997 last = buf->eop_index; 998 if (last != -1) { 999 eop_desc = &txr->base[last]; 1000 /* Get next done point */ 1001 if (++last == que->num_tx_desc) last = 0; 1002 done = last; 1003 } else 1004 break; 1005 } 1006 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 1007 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1008 1009 txr->next_to_clean = first; 1010 1011 /* 1012 * If there are no pending descriptors, clear the timeout. 1013 */ 1014 if (txr->avail == que->num_tx_desc) { 1015 atomic_store_rel_32(&txr->watchdog_timer, 0); 1016 return FALSE; 1017 } 1018 1019 return TRUE; 1020 } 1021 1022 /********************************************************************** 1023 * 1024 * Use index kept by driver and the flag on each descriptor to find used 1025 * descriptor/buffers and clean them up for re-use. 1026 * 1027 * This method of reclaiming descriptors is current incompatible with 1028 * DEV_NETMAP. 1029 * 1030 * Returns TRUE if there are more descriptors to be cleaned after this 1031 * function exits. 1032 * 1033 **********************************************************************/ 1034 static bool 1035 ixl_txeof_dwb(struct ixl_queue *que) 1036 { 1037 struct tx_ring *txr = &que->txr; 1038 u32 first, last, done; 1039 u32 limit = 256; 1040 struct ixl_tx_buf *buf; 1041 struct i40e_tx_desc *tx_desc, *eop_desc; 1042 1043 mtx_assert(&txr->mtx, MA_OWNED); 1044 1045 /* There are no descriptors to clean */ 1046 if (txr->avail == que->num_tx_desc) { 1047 atomic_store_rel_32(&txr->watchdog_timer, 0); 1048 return FALSE; 1049 } 1050 1051 /* Set starting index/descriptor/buffer */ 1052 first = txr->next_to_clean; 1053 buf = &txr->buffers[first]; 1054 tx_desc = &txr->base[first]; 1055 1056 /* 1057 * This function operates per-packet -- identifies the start of the 1058 * packet and gets the index of the last descriptor of the packet from 1059 * it, from eop_index. 1060 * 1061 * If the last descriptor is marked "done" by the hardware, then all 1062 * of the descriptors for the packet are cleaned. 1063 */ 1064 last = buf->eop_index; 1065 if (last == -1) 1066 return FALSE; 1067 eop_desc = &txr->base[last]; 1068 1069 /* Sync DMA before reading from ring */ 1070 bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD); 1071 1072 /* 1073 * Get the index of the first descriptor beyond the EOP and call that 1074 * 'done'. Simplifies the comparison for the inner loop below. 1075 */ 1076 if (++last == que->num_tx_desc) 1077 last = 0; 1078 done = last; 1079 1080 /* 1081 * We find the last completed descriptor by examining each 1082 * descriptor's status bits to see if it's done. 1083 */ 1084 do { 1085 /* Break if last descriptor in packet isn't marked done */ 1086 if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK) 1087 != I40E_TX_DESC_DTYPE_DESC_DONE) 1088 break; 1089 1090 /* Clean the descriptors that make up the processed packet */ 1091 while (first != done) { 1092 /* 1093 * If there was a buffer attached to this descriptor, 1094 * prevent the adapter from accessing it, and add its 1095 * length to the queue's TX stats. 1096 */ 1097 if (buf->m_head) { 1098 txr->bytes += buf->m_head->m_pkthdr.len; 1099 txr->tx_bytes += buf->m_head->m_pkthdr.len; 1100 bus_dmamap_sync(buf->tag, buf->map, 1101 BUS_DMASYNC_POSTWRITE); 1102 bus_dmamap_unload(buf->tag, buf->map); 1103 m_freem(buf->m_head); 1104 buf->m_head = NULL; 1105 } 1106 buf->eop_index = -1; 1107 ++txr->avail; 1108 1109 if (++first == que->num_tx_desc) 1110 first = 0; 1111 buf = &txr->buffers[first]; 1112 tx_desc = &txr->base[first]; 1113 } 1114 ++txr->packets; 1115 /* If a packet was successfully cleaned, reset the watchdog timer */ 1116 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 1117 1118 /* 1119 * Since buf is the first buffer after the one that was just 1120 * cleaned, check if the packet it starts is done, too. 1121 */ 1122 last = buf->eop_index; 1123 if (last != -1) { 1124 eop_desc = &txr->base[last]; 1125 /* Get next done point */ 1126 if (++last == que->num_tx_desc) last = 0; 1127 done = last; 1128 } else 1129 break; 1130 } while (--limit); 1131 1132 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 1133 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1134 1135 txr->next_to_clean = first; 1136 1137 /* 1138 * If there are no pending descriptors, clear the watchdog timer. 1139 */ 1140 if (txr->avail == que->num_tx_desc) { 1141 atomic_store_rel_32(&txr->watchdog_timer, 0); 1142 return FALSE; 1143 } 1144 1145 return TRUE; 1146 } 1147 1148 bool 1149 ixl_txeof(struct ixl_queue *que) 1150 { 1151 struct ixl_vsi *vsi = que->vsi; 1152 1153 return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que) 1154 : ixl_txeof_dwb(que); 1155 } 1156 1157 1158 /********************************************************************* 1159 * 1160 * Refresh mbuf buffers for RX descriptor rings 1161 * - now keeps its own state so discards due to resource 1162 * exhaustion are unnecessary, if an mbuf cannot be obtained 1163 * it just returns, keeping its placeholder, thus it can simply 1164 * be recalled to try again. 1165 * 1166 **********************************************************************/ 1167 static void 1168 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 1169 { 1170 struct ixl_vsi *vsi = que->vsi; 1171 struct rx_ring *rxr = &que->rxr; 1172 bus_dma_segment_t hseg[1]; 1173 bus_dma_segment_t pseg[1]; 1174 struct ixl_rx_buf *buf; 1175 struct mbuf *mh, *mp; 1176 int i, j, nsegs, error; 1177 bool refreshed = FALSE; 1178 1179 i = j = rxr->next_refresh; 1180 /* Control the loop with one beyond */ 1181 if (++j == que->num_rx_desc) 1182 j = 0; 1183 1184 while (j != limit) { 1185 buf = &rxr->buffers[i]; 1186 if (rxr->hdr_split == FALSE) 1187 goto no_split; 1188 1189 if (buf->m_head == NULL) { 1190 mh = m_gethdr(M_NOWAIT, MT_DATA); 1191 if (mh == NULL) 1192 goto update; 1193 } else 1194 mh = buf->m_head; 1195 1196 mh->m_pkthdr.len = mh->m_len = MHLEN; 1197 mh->m_len = MHLEN; 1198 mh->m_flags |= M_PKTHDR; 1199 /* Get the memory mapping */ 1200 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1201 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1202 if (error != 0) { 1203 printf("Refresh mbufs: hdr dmamap load" 1204 " failure - %d\n", error); 1205 m_free(mh); 1206 buf->m_head = NULL; 1207 goto update; 1208 } 1209 buf->m_head = mh; 1210 bus_dmamap_sync(rxr->htag, buf->hmap, 1211 BUS_DMASYNC_PREREAD); 1212 rxr->base[i].read.hdr_addr = 1213 htole64(hseg[0].ds_addr); 1214 1215 no_split: 1216 if (buf->m_pack == NULL) { 1217 mp = m_getjcl(M_NOWAIT, MT_DATA, 1218 M_PKTHDR, rxr->mbuf_sz); 1219 if (mp == NULL) 1220 goto update; 1221 } else 1222 mp = buf->m_pack; 1223 1224 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1225 /* Get the memory mapping */ 1226 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1227 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1228 if (error != 0) { 1229 printf("Refresh mbufs: payload dmamap load" 1230 " failure - %d\n", error); 1231 m_free(mp); 1232 buf->m_pack = NULL; 1233 goto update; 1234 } 1235 buf->m_pack = mp; 1236 bus_dmamap_sync(rxr->ptag, buf->pmap, 1237 BUS_DMASYNC_PREREAD); 1238 rxr->base[i].read.pkt_addr = 1239 htole64(pseg[0].ds_addr); 1240 /* Used only when doing header split */ 1241 rxr->base[i].read.hdr_addr = 0; 1242 1243 refreshed = TRUE; 1244 /* Next is precalculated */ 1245 i = j; 1246 rxr->next_refresh = i; 1247 if (++j == que->num_rx_desc) 1248 j = 0; 1249 } 1250 update: 1251 if (refreshed) /* Update hardware tail index */ 1252 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1253 return; 1254 } 1255 1256 1257 /********************************************************************* 1258 * 1259 * Allocate memory for rx_buffer structures. Since we use one 1260 * rx_buffer per descriptor, the maximum number of rx_buffer's 1261 * that we'll need is equal to the number of receive descriptors 1262 * that we've defined. 1263 * 1264 **********************************************************************/ 1265 int 1266 ixl_allocate_rx_data(struct ixl_queue *que) 1267 { 1268 struct rx_ring *rxr = &que->rxr; 1269 struct ixl_vsi *vsi = que->vsi; 1270 device_t dev = vsi->dev; 1271 struct ixl_rx_buf *buf; 1272 int i, bsize, error; 1273 1274 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1275 1, 0, /* alignment, bounds */ 1276 BUS_SPACE_MAXADDR, /* lowaddr */ 1277 BUS_SPACE_MAXADDR, /* highaddr */ 1278 NULL, NULL, /* filter, filterarg */ 1279 MSIZE, /* maxsize */ 1280 1, /* nsegments */ 1281 MSIZE, /* maxsegsize */ 1282 0, /* flags */ 1283 NULL, /* lockfunc */ 1284 NULL, /* lockfuncarg */ 1285 &rxr->htag))) { 1286 device_printf(dev, "Unable to create RX DMA htag\n"); 1287 return (error); 1288 } 1289 1290 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1291 1, 0, /* alignment, bounds */ 1292 BUS_SPACE_MAXADDR, /* lowaddr */ 1293 BUS_SPACE_MAXADDR, /* highaddr */ 1294 NULL, NULL, /* filter, filterarg */ 1295 MJUM16BYTES, /* maxsize */ 1296 1, /* nsegments */ 1297 MJUM16BYTES, /* maxsegsize */ 1298 0, /* flags */ 1299 NULL, /* lockfunc */ 1300 NULL, /* lockfuncarg */ 1301 &rxr->ptag))) { 1302 device_printf(dev, "Unable to create RX DMA ptag\n"); 1303 goto free_rx_htag; 1304 } 1305 1306 bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc; 1307 if (!(rxr->buffers = 1308 (struct ixl_rx_buf *) malloc(bsize, 1309 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1310 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1311 error = ENOMEM; 1312 goto free_rx_ptag; 1313 } 1314 1315 for (i = 0; i < que->num_rx_desc; i++) { 1316 buf = &rxr->buffers[i]; 1317 error = bus_dmamap_create(rxr->htag, 1318 BUS_DMA_NOWAIT, &buf->hmap); 1319 if (error) { 1320 device_printf(dev, "Unable to create RX head map\n"); 1321 goto free_buffers; 1322 } 1323 error = bus_dmamap_create(rxr->ptag, 1324 BUS_DMA_NOWAIT, &buf->pmap); 1325 if (error) { 1326 bus_dmamap_destroy(rxr->htag, buf->hmap); 1327 device_printf(dev, "Unable to create RX pkt map\n"); 1328 goto free_buffers; 1329 } 1330 } 1331 1332 return 0; 1333 free_buffers: 1334 while (i--) { 1335 buf = &rxr->buffers[i]; 1336 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1337 bus_dmamap_destroy(rxr->htag, buf->hmap); 1338 } 1339 free(rxr->buffers, M_DEVBUF); 1340 rxr->buffers = NULL; 1341 free_rx_ptag: 1342 bus_dma_tag_destroy(rxr->ptag); 1343 rxr->ptag = NULL; 1344 free_rx_htag: 1345 bus_dma_tag_destroy(rxr->htag); 1346 rxr->htag = NULL; 1347 return (error); 1348 } 1349 1350 1351 /********************************************************************* 1352 * 1353 * (Re)Initialize the queue receive ring and its buffers. 1354 * 1355 **********************************************************************/ 1356 int 1357 ixl_init_rx_ring(struct ixl_queue *que) 1358 { 1359 struct rx_ring *rxr = &que->rxr; 1360 struct ixl_vsi *vsi = que->vsi; 1361 #if defined(INET6) || defined(INET) 1362 struct ifnet *ifp = vsi->ifp; 1363 struct lro_ctrl *lro = &rxr->lro; 1364 #endif 1365 struct ixl_rx_buf *buf; 1366 bus_dma_segment_t pseg[1], hseg[1]; 1367 int rsize, nsegs, error = 0; 1368 #ifdef DEV_NETMAP 1369 struct netmap_adapter *na = NA(que->vsi->ifp); 1370 struct netmap_slot *slot; 1371 #endif /* DEV_NETMAP */ 1372 1373 IXL_RX_LOCK(rxr); 1374 #ifdef DEV_NETMAP 1375 /* same as in ixl_init_tx_ring() */ 1376 slot = netmap_reset(na, NR_RX, que->me, 0); 1377 #endif /* DEV_NETMAP */ 1378 /* Clear the ring contents */ 1379 rsize = roundup2(que->num_rx_desc * 1380 sizeof(union i40e_rx_desc), DBA_ALIGN); 1381 bzero((void *)rxr->base, rsize); 1382 /* Cleanup any existing buffers */ 1383 for (int i = 0; i < que->num_rx_desc; i++) { 1384 buf = &rxr->buffers[i]; 1385 if (buf->m_head != NULL) { 1386 bus_dmamap_sync(rxr->htag, buf->hmap, 1387 BUS_DMASYNC_POSTREAD); 1388 bus_dmamap_unload(rxr->htag, buf->hmap); 1389 buf->m_head->m_flags |= M_PKTHDR; 1390 m_freem(buf->m_head); 1391 } 1392 if (buf->m_pack != NULL) { 1393 bus_dmamap_sync(rxr->ptag, buf->pmap, 1394 BUS_DMASYNC_POSTREAD); 1395 bus_dmamap_unload(rxr->ptag, buf->pmap); 1396 buf->m_pack->m_flags |= M_PKTHDR; 1397 m_freem(buf->m_pack); 1398 } 1399 buf->m_head = NULL; 1400 buf->m_pack = NULL; 1401 } 1402 1403 /* header split is off */ 1404 rxr->hdr_split = FALSE; 1405 1406 /* Now replenish the mbufs */ 1407 for (int j = 0; j != que->num_rx_desc; ++j) { 1408 struct mbuf *mh, *mp; 1409 1410 buf = &rxr->buffers[j]; 1411 #ifdef DEV_NETMAP 1412 /* 1413 * In netmap mode, fill the map and set the buffer 1414 * address in the NIC ring, considering the offset 1415 * between the netmap and NIC rings (see comment in 1416 * ixgbe_setup_transmit_ring() ). No need to allocate 1417 * an mbuf, so end the block with a continue; 1418 */ 1419 if (slot) { 1420 int sj = netmap_idx_n2k(na->rx_rings[que->me], j); 1421 uint64_t paddr; 1422 void *addr; 1423 1424 addr = PNMB(na, slot + sj, &paddr); 1425 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1426 /* Update descriptor and the cached value */ 1427 rxr->base[j].read.pkt_addr = htole64(paddr); 1428 rxr->base[j].read.hdr_addr = 0; 1429 continue; 1430 } 1431 #endif /* DEV_NETMAP */ 1432 /* 1433 ** Don't allocate mbufs if not 1434 ** doing header split, its wasteful 1435 */ 1436 if (rxr->hdr_split == FALSE) 1437 goto skip_head; 1438 1439 /* First the header */ 1440 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1441 if (buf->m_head == NULL) { 1442 error = ENOBUFS; 1443 goto fail; 1444 } 1445 m_adj(buf->m_head, ETHER_ALIGN); 1446 mh = buf->m_head; 1447 mh->m_len = mh->m_pkthdr.len = MHLEN; 1448 mh->m_flags |= M_PKTHDR; 1449 /* Get the memory mapping */ 1450 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1451 buf->hmap, buf->m_head, hseg, 1452 &nsegs, BUS_DMA_NOWAIT); 1453 if (error != 0) /* Nothing elegant to do here */ 1454 goto fail; 1455 bus_dmamap_sync(rxr->htag, 1456 buf->hmap, BUS_DMASYNC_PREREAD); 1457 /* Update descriptor */ 1458 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1459 1460 skip_head: 1461 /* Now the payload cluster */ 1462 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1463 M_PKTHDR, rxr->mbuf_sz); 1464 if (buf->m_pack == NULL) { 1465 error = ENOBUFS; 1466 goto fail; 1467 } 1468 mp = buf->m_pack; 1469 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1470 /* Get the memory mapping */ 1471 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1472 buf->pmap, mp, pseg, 1473 &nsegs, BUS_DMA_NOWAIT); 1474 if (error != 0) 1475 goto fail; 1476 bus_dmamap_sync(rxr->ptag, 1477 buf->pmap, BUS_DMASYNC_PREREAD); 1478 /* Update descriptor */ 1479 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1480 rxr->base[j].read.hdr_addr = 0; 1481 } 1482 1483 1484 /* Setup our descriptor indices */ 1485 rxr->next_check = 0; 1486 rxr->next_refresh = 0; 1487 rxr->lro_enabled = FALSE; 1488 rxr->split = 0; 1489 rxr->bytes = 0; 1490 rxr->discard = FALSE; 1491 1492 wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1); 1493 ixl_flush(vsi->hw); 1494 1495 #if defined(INET6) || defined(INET) 1496 /* 1497 ** Now set up the LRO interface: 1498 */ 1499 if (ifp->if_capenable & IFCAP_LRO) { 1500 int err = tcp_lro_init(lro); 1501 if (err) { 1502 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1503 goto fail; 1504 } 1505 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1506 rxr->lro_enabled = TRUE; 1507 lro->ifp = vsi->ifp; 1508 } 1509 #endif 1510 1511 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1512 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1513 1514 fail: 1515 IXL_RX_UNLOCK(rxr); 1516 return (error); 1517 } 1518 1519 1520 /********************************************************************* 1521 * 1522 * Free station receive ring data structures 1523 * 1524 **********************************************************************/ 1525 void 1526 ixl_free_que_rx(struct ixl_queue *que) 1527 { 1528 struct rx_ring *rxr = &que->rxr; 1529 struct ixl_rx_buf *buf; 1530 1531 /* Cleanup any existing buffers */ 1532 if (rxr->buffers != NULL) { 1533 for (int i = 0; i < que->num_rx_desc; i++) { 1534 buf = &rxr->buffers[i]; 1535 1536 /* Free buffers and unload dma maps */ 1537 ixl_rx_discard(rxr, i); 1538 1539 bus_dmamap_destroy(rxr->htag, buf->hmap); 1540 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1541 } 1542 free(rxr->buffers, M_DEVBUF); 1543 rxr->buffers = NULL; 1544 } 1545 1546 if (rxr->htag != NULL) { 1547 bus_dma_tag_destroy(rxr->htag); 1548 rxr->htag = NULL; 1549 } 1550 if (rxr->ptag != NULL) { 1551 bus_dma_tag_destroy(rxr->ptag); 1552 rxr->ptag = NULL; 1553 } 1554 } 1555 1556 static inline void 1557 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1558 { 1559 1560 #if defined(INET6) || defined(INET) 1561 /* 1562 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1563 * should be computed by hardware. Also it should not have VLAN tag in 1564 * ethernet header. 1565 */ 1566 if (rxr->lro_enabled && 1567 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1568 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1569 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1570 /* 1571 * Send to the stack if: 1572 ** - LRO not enabled, or 1573 ** - no LRO resources, or 1574 ** - lro enqueue fails 1575 */ 1576 if (rxr->lro.lro_cnt != 0) 1577 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1578 return; 1579 } 1580 #endif 1581 (*ifp->if_input)(ifp, m); 1582 } 1583 1584 1585 static inline void 1586 ixl_rx_discard(struct rx_ring *rxr, int i) 1587 { 1588 struct ixl_rx_buf *rbuf; 1589 1590 KASSERT(rxr != NULL, ("Receive ring pointer cannot be null")); 1591 KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_desc")); 1592 1593 rbuf = &rxr->buffers[i]; 1594 1595 /* Free the mbufs in the current chain for the packet */ 1596 if (rbuf->fmp != NULL) { 1597 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1598 m_freem(rbuf->fmp); 1599 rbuf->fmp = NULL; 1600 } 1601 1602 /* 1603 * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs() 1604 * assign new mbufs to these. 1605 */ 1606 if (rbuf->m_head) { 1607 bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD); 1608 bus_dmamap_unload(rxr->htag, rbuf->hmap); 1609 m_free(rbuf->m_head); 1610 rbuf->m_head = NULL; 1611 } 1612 1613 if (rbuf->m_pack) { 1614 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); 1615 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1616 m_free(rbuf->m_pack); 1617 rbuf->m_pack = NULL; 1618 } 1619 } 1620 1621 #ifdef RSS 1622 /* 1623 ** i40e_ptype_to_hash: parse the packet type 1624 ** to determine the appropriate hash. 1625 */ 1626 static inline int 1627 ixl_ptype_to_hash(u8 ptype) 1628 { 1629 struct i40e_rx_ptype_decoded decoded; 1630 1631 decoded = decode_rx_desc_ptype(ptype); 1632 1633 if (!decoded.known) 1634 return M_HASHTYPE_OPAQUE_HASH; 1635 1636 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1637 return M_HASHTYPE_OPAQUE_HASH; 1638 1639 /* Note: anything that gets to this point is IP */ 1640 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1641 switch (decoded.inner_prot) { 1642 case I40E_RX_PTYPE_INNER_PROT_TCP: 1643 return M_HASHTYPE_RSS_TCP_IPV6; 1644 case I40E_RX_PTYPE_INNER_PROT_UDP: 1645 return M_HASHTYPE_RSS_UDP_IPV6; 1646 default: 1647 return M_HASHTYPE_RSS_IPV6; 1648 } 1649 } 1650 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1651 switch (decoded.inner_prot) { 1652 case I40E_RX_PTYPE_INNER_PROT_TCP: 1653 return M_HASHTYPE_RSS_TCP_IPV4; 1654 case I40E_RX_PTYPE_INNER_PROT_UDP: 1655 return M_HASHTYPE_RSS_UDP_IPV4; 1656 default: 1657 return M_HASHTYPE_RSS_IPV4; 1658 } 1659 } 1660 /* We should never get here!! */ 1661 return M_HASHTYPE_OPAQUE_HASH; 1662 } 1663 #endif /* RSS */ 1664 1665 /********************************************************************* 1666 * 1667 * This routine executes in interrupt context. It replenishes 1668 * the mbufs in the descriptor and sends data which has been 1669 * dma'ed into host memory to upper layer. 1670 * 1671 * We loop at most count times if count is > 0, or until done if 1672 * count < 0. 1673 * 1674 * Return TRUE for more work, FALSE for all clean. 1675 *********************************************************************/ 1676 bool 1677 ixl_rxeof(struct ixl_queue *que, int count) 1678 { 1679 struct ixl_vsi *vsi = que->vsi; 1680 struct rx_ring *rxr = &que->rxr; 1681 struct ifnet *ifp = vsi->ifp; 1682 #if defined(INET6) || defined(INET) 1683 struct lro_ctrl *lro = &rxr->lro; 1684 #endif 1685 int i, nextp, processed = 0; 1686 union i40e_rx_desc *cur; 1687 struct ixl_rx_buf *rbuf, *nbuf; 1688 1689 IXL_RX_LOCK(rxr); 1690 1691 #ifdef DEV_NETMAP 1692 if (netmap_rx_irq(ifp, que->me, &count)) { 1693 IXL_RX_UNLOCK(rxr); 1694 return (FALSE); 1695 } 1696 #endif /* DEV_NETMAP */ 1697 1698 for (i = rxr->next_check; count != 0;) { 1699 struct mbuf *sendmp, *mh, *mp; 1700 u32 status, error; 1701 u16 hlen, plen, vtag; 1702 u64 qword; 1703 u8 ptype; 1704 bool eop; 1705 1706 /* Sync the ring. */ 1707 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1708 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1709 1710 cur = &rxr->base[i]; 1711 qword = le64toh(cur->wb.qword1.status_error_len); 1712 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1713 >> I40E_RXD_QW1_STATUS_SHIFT; 1714 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1715 >> I40E_RXD_QW1_ERROR_SHIFT; 1716 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1717 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1718 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1719 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1720 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1721 >> I40E_RXD_QW1_PTYPE_SHIFT; 1722 1723 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1724 ++rxr->not_done; 1725 break; 1726 } 1727 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1728 break; 1729 1730 count--; 1731 sendmp = NULL; 1732 nbuf = NULL; 1733 cur->wb.qword1.status_error_len = 0; 1734 rbuf = &rxr->buffers[i]; 1735 mh = rbuf->m_head; 1736 mp = rbuf->m_pack; 1737 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1738 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1739 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1740 else 1741 vtag = 0; 1742 1743 /* Remove device access to the rx buffers. */ 1744 if (rbuf->m_head != NULL) { 1745 bus_dmamap_sync(rxr->htag, rbuf->hmap, 1746 BUS_DMASYNC_POSTREAD); 1747 bus_dmamap_unload(rxr->htag, rbuf->hmap); 1748 } 1749 if (rbuf->m_pack != NULL) { 1750 bus_dmamap_sync(rxr->ptag, rbuf->pmap, 1751 BUS_DMASYNC_POSTREAD); 1752 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1753 } 1754 1755 /* 1756 ** Make sure bad packets are discarded, 1757 ** note that only EOP descriptor has valid 1758 ** error results. 1759 */ 1760 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1761 rxr->desc_errs++; 1762 ixl_rx_discard(rxr, i); 1763 goto next_desc; 1764 } 1765 1766 /* Prefetch the next buffer */ 1767 if (!eop) { 1768 nextp = i + 1; 1769 if (nextp == que->num_rx_desc) 1770 nextp = 0; 1771 nbuf = &rxr->buffers[nextp]; 1772 prefetch(nbuf); 1773 } 1774 1775 /* 1776 ** The header mbuf is ONLY used when header 1777 ** split is enabled, otherwise we get normal 1778 ** behavior, ie, both header and payload 1779 ** are DMA'd into the payload buffer. 1780 ** 1781 ** Rather than using the fmp/lmp global pointers 1782 ** we now keep the head of a packet chain in the 1783 ** buffer struct and pass this along from one 1784 ** descriptor to the next, until we get EOP. 1785 */ 1786 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1787 if (hlen > IXL_RX_HDR) 1788 hlen = IXL_RX_HDR; 1789 mh->m_len = hlen; 1790 mh->m_flags |= M_PKTHDR; 1791 mh->m_next = NULL; 1792 mh->m_pkthdr.len = mh->m_len; 1793 /* Null buf pointer so it is refreshed */ 1794 rbuf->m_head = NULL; 1795 /* 1796 ** Check the payload length, this 1797 ** could be zero if its a small 1798 ** packet. 1799 */ 1800 if (plen > 0) { 1801 mp->m_len = plen; 1802 mp->m_next = NULL; 1803 mp->m_flags &= ~M_PKTHDR; 1804 mh->m_next = mp; 1805 mh->m_pkthdr.len += mp->m_len; 1806 /* Null buf pointer so it is refreshed */ 1807 rbuf->m_pack = NULL; 1808 rxr->split++; 1809 } 1810 /* 1811 ** Now create the forward 1812 ** chain so when complete 1813 ** we wont have to. 1814 */ 1815 if (eop == 0) { 1816 /* stash the chain head */ 1817 nbuf->fmp = mh; 1818 /* Make forward chain */ 1819 if (plen) 1820 mp->m_next = nbuf->m_pack; 1821 else 1822 mh->m_next = nbuf->m_pack; 1823 } else { 1824 /* Singlet, prepare to send */ 1825 sendmp = mh; 1826 if (vtag) { 1827 sendmp->m_pkthdr.ether_vtag = vtag; 1828 sendmp->m_flags |= M_VLANTAG; 1829 } 1830 } 1831 } else { 1832 /* 1833 ** Either no header split, or a 1834 ** secondary piece of a fragmented 1835 ** split packet. 1836 */ 1837 mp->m_len = plen; 1838 /* 1839 ** See if there is a stored head 1840 ** that determines what we are 1841 */ 1842 sendmp = rbuf->fmp; 1843 rbuf->m_pack = rbuf->fmp = NULL; 1844 1845 if (sendmp != NULL) /* secondary frag */ 1846 sendmp->m_pkthdr.len += mp->m_len; 1847 else { 1848 /* first desc of a non-ps chain */ 1849 sendmp = mp; 1850 sendmp->m_flags |= M_PKTHDR; 1851 sendmp->m_pkthdr.len = mp->m_len; 1852 } 1853 /* Pass the head pointer on */ 1854 if (eop == 0) { 1855 nbuf->fmp = sendmp; 1856 sendmp = NULL; 1857 mp->m_next = nbuf->m_pack; 1858 } 1859 } 1860 ++processed; 1861 /* Sending this frame? */ 1862 if (eop) { 1863 sendmp->m_pkthdr.rcvif = ifp; 1864 /* gather stats */ 1865 rxr->rx_packets++; 1866 rxr->rx_bytes += sendmp->m_pkthdr.len; 1867 /* capture data for dynamic ITR adjustment */ 1868 rxr->packets++; 1869 rxr->bytes += sendmp->m_pkthdr.len; 1870 /* Set VLAN tag (field only valid in eop desc) */ 1871 if (vtag) { 1872 sendmp->m_pkthdr.ether_vtag = vtag; 1873 sendmp->m_flags |= M_VLANTAG; 1874 } 1875 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1876 ixl_rx_checksum(sendmp, status, error, ptype); 1877 #ifdef RSS 1878 sendmp->m_pkthdr.flowid = 1879 le32toh(cur->wb.qword0.hi_dword.rss); 1880 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1881 #else 1882 sendmp->m_pkthdr.flowid = que->msix; 1883 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1884 #endif 1885 } 1886 next_desc: 1887 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1888 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1889 1890 /* Advance our pointers to the next descriptor. */ 1891 if (++i == que->num_rx_desc) 1892 i = 0; 1893 1894 /* Now send to the stack or do LRO */ 1895 if (sendmp != NULL) { 1896 rxr->next_check = i; 1897 IXL_RX_UNLOCK(rxr); 1898 ixl_rx_input(rxr, ifp, sendmp, ptype); 1899 IXL_RX_LOCK(rxr); 1900 /* 1901 * Update index used in loop in case another 1902 * ixl_rxeof() call executes when lock is released 1903 */ 1904 i = rxr->next_check; 1905 } 1906 1907 /* Every 8 descriptors we go to refresh mbufs */ 1908 if (processed == 8) { 1909 ixl_refresh_mbufs(que, i); 1910 processed = 0; 1911 } 1912 } 1913 1914 /* Refresh any remaining buf structs */ 1915 if (ixl_rx_unrefreshed(que)) 1916 ixl_refresh_mbufs(que, i); 1917 1918 rxr->next_check = i; 1919 1920 IXL_RX_UNLOCK(rxr); 1921 1922 #if defined(INET6) || defined(INET) 1923 /* 1924 * Flush any outstanding LRO work 1925 */ 1926 #if __FreeBSD_version >= 1100105 1927 tcp_lro_flush_all(lro); 1928 #else 1929 struct lro_entry *queued; 1930 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1931 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1932 tcp_lro_flush(lro, queued); 1933 } 1934 #endif 1935 #endif /* defined(INET6) || defined(INET) */ 1936 1937 return (FALSE); 1938 } 1939 1940 1941 /********************************************************************* 1942 * 1943 * Verify that the hardware indicated that the checksum is valid. 1944 * Inform the stack about the status of checksum so that stack 1945 * doesn't spend time verifying the checksum. 1946 * 1947 *********************************************************************/ 1948 static void 1949 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1950 { 1951 struct i40e_rx_ptype_decoded decoded; 1952 1953 decoded = decode_rx_desc_ptype(ptype); 1954 1955 /* Errors? */ 1956 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1957 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1958 mp->m_pkthdr.csum_flags = 0; 1959 return; 1960 } 1961 1962 /* IPv6 with extension headers likely have bad csum */ 1963 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1964 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1965 if (status & 1966 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1967 mp->m_pkthdr.csum_flags = 0; 1968 return; 1969 } 1970 1971 1972 /* IP Checksum Good */ 1973 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1974 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1975 1976 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1977 mp->m_pkthdr.csum_flags |= 1978 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1979 mp->m_pkthdr.csum_data |= htons(0xffff); 1980 } 1981 return; 1982 } 1983 1984 #if __FreeBSD_version >= 1100000 1985 uint64_t 1986 ixl_get_counter(if_t ifp, ift_counter cnt) 1987 { 1988 struct ixl_vsi *vsi; 1989 1990 vsi = if_getsoftc(ifp); 1991 1992 switch (cnt) { 1993 case IFCOUNTER_IPACKETS: 1994 return (vsi->ipackets); 1995 case IFCOUNTER_IERRORS: 1996 return (vsi->ierrors); 1997 case IFCOUNTER_OPACKETS: 1998 return (vsi->opackets); 1999 case IFCOUNTER_OERRORS: 2000 return (vsi->oerrors); 2001 case IFCOUNTER_COLLISIONS: 2002 /* Collisions are by standard impossible in 40G/10G Ethernet */ 2003 return (0); 2004 case IFCOUNTER_IBYTES: 2005 return (vsi->ibytes); 2006 case IFCOUNTER_OBYTES: 2007 return (vsi->obytes); 2008 case IFCOUNTER_IMCASTS: 2009 return (vsi->imcasts); 2010 case IFCOUNTER_OMCASTS: 2011 return (vsi->omcasts); 2012 case IFCOUNTER_IQDROPS: 2013 return (vsi->iqdrops); 2014 case IFCOUNTER_OQDROPS: 2015 return (vsi->oqdrops); 2016 case IFCOUNTER_NOPROTO: 2017 return (vsi->noproto); 2018 default: 2019 return (if_get_counter_default(ifp, cnt)); 2020 } 2021 } 2022 #endif 2023 2024 /* 2025 * Set TX and RX ring size adjusting value to supported range 2026 */ 2027 void 2028 ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size) 2029 { 2030 struct device * dev = vsi->dev; 2031 2032 if (tx_ring_size < IXL_MIN_RING 2033 || tx_ring_size > IXL_MAX_RING 2034 || tx_ring_size % IXL_RING_INCREMENT != 0) { 2035 device_printf(dev, "Invalid tx_ring_size value of %d set!\n", 2036 tx_ring_size); 2037 device_printf(dev, "tx_ring_size must be between %d and %d, " 2038 "inclusive, and must be a multiple of %d\n", 2039 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT); 2040 device_printf(dev, "Using default value of %d instead\n", 2041 IXL_DEFAULT_RING); 2042 vsi->num_tx_desc = IXL_DEFAULT_RING; 2043 } else 2044 vsi->num_tx_desc = tx_ring_size; 2045 2046 if (rx_ring_size < IXL_MIN_RING 2047 || rx_ring_size > IXL_MAX_RING 2048 || rx_ring_size % IXL_RING_INCREMENT != 0) { 2049 device_printf(dev, "Invalid rx_ring_size value of %d set!\n", 2050 rx_ring_size); 2051 device_printf(dev, "rx_ring_size must be between %d and %d, " 2052 "inclusive, and must be a multiple of %d\n", 2053 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT); 2054 device_printf(dev, "Using default value of %d instead\n", 2055 IXL_DEFAULT_RING); 2056 vsi->num_rx_desc = IXL_DEFAULT_RING; 2057 } else 2058 vsi->num_rx_desc = rx_ring_size; 2059 2060 device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", 2061 vsi->num_tx_desc, vsi->num_rx_desc); 2062 2063 } 2064 2065 static void 2066 ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx) 2067 { 2068 struct i40e_hw *hw = vsi->hw; 2069 u32 reg, mask; 2070 2071 if ((vsi->flags & IXL_FLAGS_IS_VF) != 0) { 2072 mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK | 2073 I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | 2074 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); 2075 2076 reg = I40E_VFINT_DYN_CTLN1(qidx); 2077 } else { 2078 mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | 2079 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | 2080 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); 2081 2082 reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ? 2083 I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0; 2084 } 2085 2086 wr32(hw, reg, mask); 2087 } 2088 2089 int 2090 ixl_queue_hang_check(struct ixl_vsi *vsi) 2091 { 2092 struct ixl_queue *que = vsi->queues; 2093 device_t dev = vsi->dev; 2094 struct tx_ring *txr; 2095 s32 timer, new_timer; 2096 int hung = 0; 2097 2098 for (int i = 0; i < vsi->num_queues; i++, que++) { 2099 txr = &que->txr; 2100 /* 2101 * If watchdog_timer is equal to defualt value set by ixl_txeof 2102 * just substract hz and move on - the queue is most probably 2103 * running. Otherwise check the value. 2104 */ 2105 if (atomic_cmpset_rel_32(&txr->watchdog_timer, 2106 IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) { 2107 timer = atomic_load_acq_32(&txr->watchdog_timer); 2108 /* 2109 * Again - if the timer was reset to default value 2110 * then queue is running. Otherwise check if watchdog 2111 * expired and act accrdingly. 2112 */ 2113 2114 if (timer > 0 && timer != IXL_WATCHDOG) { 2115 new_timer = timer - hz; 2116 if (new_timer <= 0) { 2117 atomic_store_rel_32(&txr->watchdog_timer, -1); 2118 device_printf(dev, "WARNING: queue %d " 2119 "appears to be hung!\n", que->me); 2120 ++hung; 2121 /* Try to unblock the queue with SW IRQ */ 2122 ixl_queue_sw_irq(vsi, i); 2123 } else { 2124 /* 2125 * If this fails, that means something in the TX path 2126 * has updated the watchdog, so it means the TX path 2127 * is still working and the watchdog doesn't need 2128 * to countdown. 2129 */ 2130 atomic_cmpset_rel_32(&txr->watchdog_timer, 2131 timer, new_timer); 2132 } 2133 } 2134 } 2135 } 2136 2137 return (hung); 2138 } 2139 2140