1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static int ixl_tx_setup_offload(struct ixl_queue *que, 67 struct mbuf *mp, u32 *cmd, u32 *off); 68 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 69 70 #ifdef DEV_NETMAP 71 #include <dev/netmap/if_ixl_netmap.h> 72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 73 #endif /* DEV_NETMAP */ 74 75 /* 76 * @key key is saved into this parameter 77 */ 78 void 79 ixl_get_default_rss_key(u32 *key) 80 { 81 MPASS(key != NULL); 82 83 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 84 0x183cfd8c, 0xce880440, 0x580cbc3c, 85 0x35897377, 0x328b25e1, 0x4fa98922, 86 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 87 0x0, 0x0, 0x0}; 88 89 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 90 } 91 92 /* 93 ** Multiqueue Transmit driver 94 */ 95 int 96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 97 { 98 struct ixl_vsi *vsi = ifp->if_softc; 99 struct ixl_queue *que; 100 struct tx_ring *txr; 101 int err, i; 102 #ifdef RSS 103 u32 bucket_id; 104 #endif 105 106 /* 107 ** Which queue to use: 108 ** 109 ** When doing RSS, map it to the same outbound 110 ** queue as the incoming flow would be mapped to. 111 ** If everything is setup correctly, it should be 112 ** the same bucket that the current CPU we're on is. 113 */ 114 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 115 #ifdef RSS 116 if (rss_hash2bucket(m->m_pkthdr.flowid, 117 M_HASHTYPE_GET(m), &bucket_id) == 0) { 118 i = bucket_id % vsi->num_queues; 119 } else 120 #endif 121 i = m->m_pkthdr.flowid % vsi->num_queues; 122 } else 123 i = curcpu % vsi->num_queues; 124 125 que = &vsi->queues[i]; 126 txr = &que->txr; 127 128 err = drbr_enqueue(ifp, txr->br, m); 129 if (err) 130 return (err); 131 if (IXL_TX_TRYLOCK(txr)) { 132 ixl_mq_start_locked(ifp, txr); 133 IXL_TX_UNLOCK(txr); 134 } else 135 taskqueue_enqueue(que->tq, &que->tx_task); 136 137 return (0); 138 } 139 140 int 141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 142 { 143 struct ixl_queue *que = txr->que; 144 struct ixl_vsi *vsi = que->vsi; 145 struct mbuf *next; 146 int err = 0; 147 148 149 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 150 vsi->link_active == 0) 151 return (ENETDOWN); 152 153 /* Process the transmit queue */ 154 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 155 if ((err = ixl_xmit(que, &next)) != 0) { 156 if (next == NULL) 157 drbr_advance(ifp, txr->br); 158 else 159 drbr_putback(ifp, txr->br, next); 160 break; 161 } 162 drbr_advance(ifp, txr->br); 163 /* Send a copy of the frame to the BPF listener */ 164 ETHER_BPF_MTAP(ifp, next); 165 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 166 break; 167 } 168 169 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 170 ixl_txeof(que); 171 172 return (err); 173 } 174 175 /* 176 * Called from a taskqueue to drain queued transmit packets. 177 */ 178 void 179 ixl_deferred_mq_start(void *arg, int pending) 180 { 181 struct ixl_queue *que = arg; 182 struct tx_ring *txr = &que->txr; 183 struct ixl_vsi *vsi = que->vsi; 184 struct ifnet *ifp = vsi->ifp; 185 186 IXL_TX_LOCK(txr); 187 if (!drbr_empty(ifp, txr->br)) 188 ixl_mq_start_locked(ifp, txr); 189 IXL_TX_UNLOCK(txr); 190 } 191 192 /* 193 ** Flush all queue ring buffers 194 */ 195 void 196 ixl_qflush(struct ifnet *ifp) 197 { 198 struct ixl_vsi *vsi = ifp->if_softc; 199 200 for (int i = 0; i < vsi->num_queues; i++) { 201 struct ixl_queue *que = &vsi->queues[i]; 202 struct tx_ring *txr = &que->txr; 203 struct mbuf *m; 204 IXL_TX_LOCK(txr); 205 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 206 m_freem(m); 207 IXL_TX_UNLOCK(txr); 208 } 209 if_qflush(ifp); 210 } 211 212 /* 213 ** Find mbuf chains passed to the driver 214 ** that are 'sparse', using more than 8 215 ** mbufs to deliver an mss-size chunk of data 216 */ 217 static inline bool 218 ixl_tso_detect_sparse(struct mbuf *mp) 219 { 220 struct mbuf *m; 221 int num = 0, mss; 222 bool ret = FALSE; 223 224 mss = mp->m_pkthdr.tso_segsz; 225 for (m = mp->m_next; m != NULL; m = m->m_next) { 226 num++; 227 mss -= m->m_len; 228 if (mss < 1) 229 break; 230 if (m->m_next == NULL) 231 break; 232 } 233 if (num > IXL_SPARSE_CHAIN) 234 ret = TRUE; 235 236 return (ret); 237 } 238 239 240 /********************************************************************* 241 * 242 * This routine maps the mbufs to tx descriptors, allowing the 243 * TX engine to transmit the packets. 244 * - return 0 on success, positive on failure 245 * 246 **********************************************************************/ 247 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 248 249 static int 250 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 251 { 252 struct ixl_vsi *vsi = que->vsi; 253 struct i40e_hw *hw = vsi->hw; 254 struct tx_ring *txr = &que->txr; 255 struct ixl_tx_buf *buf; 256 struct i40e_tx_desc *txd = NULL; 257 struct mbuf *m_head, *m; 258 int i, j, error, nsegs; 259 int first, last = 0; 260 u16 vtag = 0; 261 u32 cmd, off; 262 bus_dmamap_t map; 263 bus_dma_tag_t tag; 264 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 265 266 cmd = off = 0; 267 m_head = *m_headp; 268 269 /* 270 * Important to capture the first descriptor 271 * used because it will contain the index of 272 * the one we tell the hardware to report back 273 */ 274 first = txr->next_avail; 275 buf = &txr->buffers[first]; 276 map = buf->map; 277 tag = txr->tx_tag; 278 279 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 280 /* Use larger mapping for TSO */ 281 tag = txr->tso_tag; 282 if (ixl_tso_detect_sparse(m_head)) { 283 m = m_defrag(m_head, M_NOWAIT); 284 if (m == NULL) { 285 m_freem(*m_headp); 286 *m_headp = NULL; 287 return (ENOBUFS); 288 } 289 *m_headp = m; 290 } 291 } 292 293 /* 294 * Map the packet for DMA. 295 */ 296 error = bus_dmamap_load_mbuf_sg(tag, map, 297 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 298 299 if (error == EFBIG) { 300 struct mbuf *m; 301 302 m = m_defrag(*m_headp, M_NOWAIT); 303 if (m == NULL) { 304 que->mbuf_defrag_failed++; 305 m_freem(*m_headp); 306 *m_headp = NULL; 307 return (ENOBUFS); 308 } 309 *m_headp = m; 310 311 /* Try it again */ 312 error = bus_dmamap_load_mbuf_sg(tag, map, 313 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 314 315 if (error == ENOMEM) { 316 que->tx_dmamap_failed++; 317 return (error); 318 } else if (error != 0) { 319 que->tx_dmamap_failed++; 320 m_freem(*m_headp); 321 *m_headp = NULL; 322 return (error); 323 } 324 } else if (error == ENOMEM) { 325 que->tx_dmamap_failed++; 326 return (error); 327 } else if (error != 0) { 328 que->tx_dmamap_failed++; 329 m_freem(*m_headp); 330 *m_headp = NULL; 331 return (error); 332 } 333 334 /* Make certain there are enough descriptors */ 335 if (nsegs > txr->avail - 2) { 336 txr->no_desc++; 337 error = ENOBUFS; 338 goto xmit_fail; 339 } 340 m_head = *m_headp; 341 342 /* Set up the TSO/CSUM offload */ 343 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 344 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 345 if (error) 346 goto xmit_fail; 347 } 348 349 cmd |= I40E_TX_DESC_CMD_ICRC; 350 /* Grab the VLAN tag */ 351 if (m_head->m_flags & M_VLANTAG) { 352 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 353 vtag = htole16(m_head->m_pkthdr.ether_vtag); 354 } 355 356 i = txr->next_avail; 357 for (j = 0; j < nsegs; j++) { 358 bus_size_t seglen; 359 360 buf = &txr->buffers[i]; 361 buf->tag = tag; /* Keep track of the type tag */ 362 txd = &txr->base[i]; 363 seglen = segs[j].ds_len; 364 365 txd->buffer_addr = htole64(segs[j].ds_addr); 366 txd->cmd_type_offset_bsz = 367 htole64(I40E_TX_DESC_DTYPE_DATA 368 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 369 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 370 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 371 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 372 373 last = i; /* descriptor that will get completion IRQ */ 374 375 if (++i == que->num_desc) 376 i = 0; 377 378 buf->m_head = NULL; 379 buf->eop_index = -1; 380 } 381 /* Set the last descriptor for report */ 382 txd->cmd_type_offset_bsz |= 383 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 384 txr->avail -= nsegs; 385 txr->next_avail = i; 386 387 buf->m_head = m_head; 388 /* Swap the dma map between the first and last descriptor */ 389 txr->buffers[first].map = buf->map; 390 buf->map = map; 391 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 392 393 /* Set the index of the descriptor that will be marked done */ 394 buf = &txr->buffers[first]; 395 buf->eop_index = last; 396 397 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 398 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 399 /* 400 * Advance the Transmit Descriptor Tail (Tdt), this tells the 401 * hardware that this frame is available to transmit. 402 */ 403 ++txr->total_packets; 404 wr32(hw, txr->tail, i); 405 406 /* Mark outstanding work */ 407 if (que->busy == 0) 408 que->busy = 1; 409 return (0); 410 411 xmit_fail: 412 bus_dmamap_unload(tag, buf->map); 413 return (error); 414 } 415 416 417 /********************************************************************* 418 * 419 * Allocate memory for tx_buffer structures. The tx_buffer stores all 420 * the information needed to transmit a packet on the wire. This is 421 * called only once at attach, setup is done every reset. 422 * 423 **********************************************************************/ 424 int 425 ixl_allocate_tx_data(struct ixl_queue *que) 426 { 427 struct tx_ring *txr = &que->txr; 428 struct ixl_vsi *vsi = que->vsi; 429 device_t dev = vsi->dev; 430 struct ixl_tx_buf *buf; 431 int error = 0; 432 433 /* 434 * Setup DMA descriptor areas. 435 */ 436 if ((error = bus_dma_tag_create(NULL, /* parent */ 437 1, 0, /* alignment, bounds */ 438 BUS_SPACE_MAXADDR, /* lowaddr */ 439 BUS_SPACE_MAXADDR, /* highaddr */ 440 NULL, NULL, /* filter, filterarg */ 441 IXL_TSO_SIZE, /* maxsize */ 442 IXL_MAX_TX_SEGS, /* nsegments */ 443 PAGE_SIZE, /* maxsegsize */ 444 0, /* flags */ 445 NULL, /* lockfunc */ 446 NULL, /* lockfuncarg */ 447 &txr->tx_tag))) { 448 device_printf(dev,"Unable to allocate TX DMA tag\n"); 449 goto fail; 450 } 451 452 /* Make a special tag for TSO */ 453 if ((error = bus_dma_tag_create(NULL, /* parent */ 454 1, 0, /* alignment, bounds */ 455 BUS_SPACE_MAXADDR, /* lowaddr */ 456 BUS_SPACE_MAXADDR, /* highaddr */ 457 NULL, NULL, /* filter, filterarg */ 458 IXL_TSO_SIZE, /* maxsize */ 459 IXL_MAX_TSO_SEGS, /* nsegments */ 460 PAGE_SIZE, /* maxsegsize */ 461 0, /* flags */ 462 NULL, /* lockfunc */ 463 NULL, /* lockfuncarg */ 464 &txr->tso_tag))) { 465 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 466 goto fail; 467 } 468 469 if (!(txr->buffers = 470 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 471 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 472 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 473 error = ENOMEM; 474 goto fail; 475 } 476 477 /* Create the descriptor buffer default dma maps */ 478 buf = txr->buffers; 479 for (int i = 0; i < que->num_desc; i++, buf++) { 480 buf->tag = txr->tx_tag; 481 error = bus_dmamap_create(buf->tag, 0, &buf->map); 482 if (error != 0) { 483 device_printf(dev, "Unable to create TX DMA map\n"); 484 goto fail; 485 } 486 } 487 fail: 488 return (error); 489 } 490 491 492 /********************************************************************* 493 * 494 * (Re)Initialize a queue transmit ring. 495 * - called by init, it clears the descriptor ring, 496 * and frees any stale mbufs 497 * 498 **********************************************************************/ 499 void 500 ixl_init_tx_ring(struct ixl_queue *que) 501 { 502 #ifdef DEV_NETMAP 503 struct netmap_adapter *na = NA(que->vsi->ifp); 504 struct netmap_slot *slot; 505 #endif /* DEV_NETMAP */ 506 struct tx_ring *txr = &que->txr; 507 struct ixl_tx_buf *buf; 508 509 /* Clear the old ring contents */ 510 IXL_TX_LOCK(txr); 511 512 #ifdef DEV_NETMAP 513 /* 514 * (under lock): if in netmap mode, do some consistency 515 * checks and set slot to entry 0 of the netmap ring. 516 */ 517 slot = netmap_reset(na, NR_TX, que->me, 0); 518 #endif /* DEV_NETMAP */ 519 520 bzero((void *)txr->base, 521 (sizeof(struct i40e_tx_desc)) * que->num_desc); 522 523 /* Reset indices */ 524 txr->next_avail = 0; 525 txr->next_to_clean = 0; 526 527 #ifdef IXL_FDIR 528 /* Initialize flow director */ 529 txr->atr_rate = ixl_atr_rate; 530 txr->atr_count = 0; 531 #endif 532 533 /* Free any existing tx mbufs. */ 534 buf = txr->buffers; 535 for (int i = 0; i < que->num_desc; i++, buf++) { 536 if (buf->m_head != NULL) { 537 bus_dmamap_sync(buf->tag, buf->map, 538 BUS_DMASYNC_POSTWRITE); 539 bus_dmamap_unload(buf->tag, buf->map); 540 m_freem(buf->m_head); 541 buf->m_head = NULL; 542 } 543 #ifdef DEV_NETMAP 544 /* 545 * In netmap mode, set the map for the packet buffer. 546 * NOTE: Some drivers (not this one) also need to set 547 * the physical buffer address in the NIC ring. 548 * netmap_idx_n2k() maps a nic index, i, into the corresponding 549 * netmap slot index, si 550 */ 551 if (slot) { 552 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 553 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 554 } 555 #endif /* DEV_NETMAP */ 556 /* Clear the EOP index */ 557 buf->eop_index = -1; 558 } 559 560 /* Set number of descriptors available */ 561 txr->avail = que->num_desc; 562 563 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 564 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 565 IXL_TX_UNLOCK(txr); 566 } 567 568 569 /********************************************************************* 570 * 571 * Free transmit ring related data structures. 572 * 573 **********************************************************************/ 574 void 575 ixl_free_que_tx(struct ixl_queue *que) 576 { 577 struct tx_ring *txr = &que->txr; 578 struct ixl_tx_buf *buf; 579 580 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 581 582 for (int i = 0; i < que->num_desc; i++) { 583 buf = &txr->buffers[i]; 584 if (buf->m_head != NULL) { 585 bus_dmamap_sync(buf->tag, buf->map, 586 BUS_DMASYNC_POSTWRITE); 587 bus_dmamap_unload(buf->tag, 588 buf->map); 589 m_freem(buf->m_head); 590 buf->m_head = NULL; 591 if (buf->map != NULL) { 592 bus_dmamap_destroy(buf->tag, 593 buf->map); 594 buf->map = NULL; 595 } 596 } else if (buf->map != NULL) { 597 bus_dmamap_unload(buf->tag, 598 buf->map); 599 bus_dmamap_destroy(buf->tag, 600 buf->map); 601 buf->map = NULL; 602 } 603 } 604 if (txr->br != NULL) 605 buf_ring_free(txr->br, M_DEVBUF); 606 if (txr->buffers != NULL) { 607 free(txr->buffers, M_DEVBUF); 608 txr->buffers = NULL; 609 } 610 if (txr->tx_tag != NULL) { 611 bus_dma_tag_destroy(txr->tx_tag); 612 txr->tx_tag = NULL; 613 } 614 if (txr->tso_tag != NULL) { 615 bus_dma_tag_destroy(txr->tso_tag); 616 txr->tso_tag = NULL; 617 } 618 619 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 620 return; 621 } 622 623 /********************************************************************* 624 * 625 * Setup descriptor for hw offloads 626 * 627 **********************************************************************/ 628 629 static int 630 ixl_tx_setup_offload(struct ixl_queue *que, 631 struct mbuf *mp, u32 *cmd, u32 *off) 632 { 633 struct ether_vlan_header *eh; 634 #ifdef INET 635 struct ip *ip = NULL; 636 #endif 637 struct tcphdr *th = NULL; 638 #ifdef INET6 639 struct ip6_hdr *ip6; 640 #endif 641 int elen, ip_hlen = 0, tcp_hlen; 642 u16 etype; 643 u8 ipproto = 0; 644 bool tso = FALSE; 645 646 /* Set up the TSO context descriptor if required */ 647 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 648 tso = ixl_tso_setup(que, mp); 649 if (tso) 650 ++que->tso; 651 else 652 return (ENXIO); 653 } 654 655 /* 656 * Determine where frame payload starts. 657 * Jump over vlan headers if already present, 658 * helpful for QinQ too. 659 */ 660 eh = mtod(mp, struct ether_vlan_header *); 661 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 662 etype = ntohs(eh->evl_proto); 663 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 664 } else { 665 etype = ntohs(eh->evl_encap_proto); 666 elen = ETHER_HDR_LEN; 667 } 668 669 switch (etype) { 670 #ifdef INET 671 case ETHERTYPE_IP: 672 ip = (struct ip *)(mp->m_data + elen); 673 ip_hlen = ip->ip_hl << 2; 674 ipproto = ip->ip_p; 675 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 676 /* The IP checksum must be recalculated with TSO */ 677 if (tso) 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 679 else 680 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 681 break; 682 #endif 683 #ifdef INET6 684 case ETHERTYPE_IPV6: 685 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 686 ip_hlen = sizeof(struct ip6_hdr); 687 ipproto = ip6->ip6_nxt; 688 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 689 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 690 break; 691 #endif 692 default: 693 break; 694 } 695 696 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 697 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 698 699 switch (ipproto) { 700 case IPPROTO_TCP: 701 tcp_hlen = th->th_off << 2; 702 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 703 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 704 *off |= (tcp_hlen >> 2) << 705 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 706 } 707 #ifdef IXL_FDIR 708 ixl_atr(que, th, etype); 709 #endif 710 break; 711 case IPPROTO_UDP: 712 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 713 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 714 *off |= (sizeof(struct udphdr) >> 2) << 715 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 716 } 717 break; 718 719 case IPPROTO_SCTP: 720 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 721 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 722 *off |= (sizeof(struct sctphdr) >> 2) << 723 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 724 } 725 /* Fall Thru */ 726 default: 727 break; 728 } 729 730 return (0); 731 } 732 733 734 /********************************************************************** 735 * 736 * Setup context for hardware segmentation offload (TSO) 737 * 738 **********************************************************************/ 739 static bool 740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 741 { 742 struct tx_ring *txr = &que->txr; 743 struct i40e_tx_context_desc *TXD; 744 struct ixl_tx_buf *buf; 745 u32 cmd, mss, type, tsolen; 746 u16 etype; 747 int idx, elen, ip_hlen, tcp_hlen; 748 struct ether_vlan_header *eh; 749 #ifdef INET 750 struct ip *ip; 751 #endif 752 #ifdef INET6 753 struct ip6_hdr *ip6; 754 #endif 755 #if defined(INET6) || defined(INET) 756 struct tcphdr *th; 757 #endif 758 u64 type_cmd_tso_mss; 759 760 /* 761 * Determine where frame payload starts. 762 * Jump over vlan headers if already present 763 */ 764 eh = mtod(mp, struct ether_vlan_header *); 765 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 766 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 etype = eh->evl_proto; 768 } else { 769 elen = ETHER_HDR_LEN; 770 etype = eh->evl_encap_proto; 771 } 772 773 switch (ntohs(etype)) { 774 #ifdef INET6 775 case ETHERTYPE_IPV6: 776 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 777 if (ip6->ip6_nxt != IPPROTO_TCP) 778 return (ENXIO); 779 ip_hlen = sizeof(struct ip6_hdr); 780 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 781 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 782 tcp_hlen = th->th_off << 2; 783 /* 784 * The corresponding flag is set by the stack in the IPv4 785 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 786 * So, set it here because the rest of the flow requires it. 787 */ 788 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 789 break; 790 #endif 791 #ifdef INET 792 case ETHERTYPE_IP: 793 ip = (struct ip *)(mp->m_data + elen); 794 if (ip->ip_p != IPPROTO_TCP) 795 return (ENXIO); 796 ip->ip_sum = 0; 797 ip_hlen = ip->ip_hl << 2; 798 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 799 th->th_sum = in_pseudo(ip->ip_src.s_addr, 800 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 801 tcp_hlen = th->th_off << 2; 802 break; 803 #endif 804 default: 805 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 806 __func__, ntohs(etype)); 807 return FALSE; 808 } 809 810 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 811 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 812 return FALSE; 813 814 idx = txr->next_avail; 815 buf = &txr->buffers[idx]; 816 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 817 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 818 819 type = I40E_TX_DESC_DTYPE_CONTEXT; 820 cmd = I40E_TX_CTX_DESC_TSO; 821 /* ERJ: this must not be less than 64 */ 822 mss = mp->m_pkthdr.tso_segsz; 823 824 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 825 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 826 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 827 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 828 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 829 830 TXD->tunneling_params = htole32(0); 831 buf->m_head = NULL; 832 buf->eop_index = -1; 833 834 if (++idx == que->num_desc) 835 idx = 0; 836 837 txr->avail--; 838 txr->next_avail = idx; 839 840 return TRUE; 841 } 842 843 /* 844 ** ixl_get_tx_head - Retrieve the value from the 845 ** location the HW records its HEAD index 846 */ 847 static inline u32 848 ixl_get_tx_head(struct ixl_queue *que) 849 { 850 struct tx_ring *txr = &que->txr; 851 void *head = &txr->base[que->num_desc]; 852 return LE32_TO_CPU(*(volatile __le32 *)head); 853 } 854 855 /********************************************************************** 856 * 857 * Examine each tx_buffer in the used queue. If the hardware is done 858 * processing the packet then free associated resources. The 859 * tx_buffer is put back on the free queue. 860 * 861 **********************************************************************/ 862 bool 863 ixl_txeof(struct ixl_queue *que) 864 { 865 struct tx_ring *txr = &que->txr; 866 u32 first, last, head, done, processed; 867 struct ixl_tx_buf *buf; 868 struct i40e_tx_desc *tx_desc, *eop_desc; 869 870 871 mtx_assert(&txr->mtx, MA_OWNED); 872 873 #ifdef DEV_NETMAP 874 // XXX todo: implement moderation 875 if (netmap_tx_irq(que->vsi->ifp, que->me)) 876 return FALSE; 877 #endif /* DEF_NETMAP */ 878 879 /* These are not the descriptors you seek, move along :) */ 880 if (txr->avail == que->num_desc) { 881 que->busy = 0; 882 return FALSE; 883 } 884 885 processed = 0; 886 first = txr->next_to_clean; 887 buf = &txr->buffers[first]; 888 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 889 last = buf->eop_index; 890 if (last == -1) 891 return FALSE; 892 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 893 894 /* Get the Head WB value */ 895 head = ixl_get_tx_head(que); 896 897 /* 898 ** Get the index of the first descriptor 899 ** BEYOND the EOP and call that 'done'. 900 ** I do this so the comparison in the 901 ** inner while loop below can be simple 902 */ 903 if (++last == que->num_desc) last = 0; 904 done = last; 905 906 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 907 BUS_DMASYNC_POSTREAD); 908 /* 909 ** The HEAD index of the ring is written in a 910 ** defined location, this rather than a done bit 911 ** is what is used to keep track of what must be 912 ** 'cleaned'. 913 */ 914 while (first != head) { 915 /* We clean the range of the packet */ 916 while (first != done) { 917 ++txr->avail; 918 ++processed; 919 920 if (buf->m_head) { 921 txr->bytes += /* for ITR adjustment */ 922 buf->m_head->m_pkthdr.len; 923 txr->tx_bytes += /* for TX stats */ 924 buf->m_head->m_pkthdr.len; 925 bus_dmamap_sync(buf->tag, 926 buf->map, 927 BUS_DMASYNC_POSTWRITE); 928 bus_dmamap_unload(buf->tag, 929 buf->map); 930 m_freem(buf->m_head); 931 buf->m_head = NULL; 932 buf->map = NULL; 933 } 934 buf->eop_index = -1; 935 936 if (++first == que->num_desc) 937 first = 0; 938 939 buf = &txr->buffers[first]; 940 tx_desc = &txr->base[first]; 941 } 942 ++txr->packets; 943 /* See if there is more work now */ 944 last = buf->eop_index; 945 if (last != -1) { 946 eop_desc = &txr->base[last]; 947 /* Get next done point */ 948 if (++last == que->num_desc) last = 0; 949 done = last; 950 } else 951 break; 952 } 953 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 954 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 955 956 txr->next_to_clean = first; 957 958 959 /* 960 ** Hang detection, we know there's 961 ** work outstanding or the first return 962 ** would have been taken, so indicate an 963 ** unsuccessful pass, in local_timer if 964 ** the value is too great the queue will 965 ** be considered hung. If anything has been 966 ** cleaned then reset the state. 967 */ 968 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 969 ++que->busy; 970 971 if (processed) 972 que->busy = 1; /* Note this turns off HUNG */ 973 974 /* 975 * If there are no pending descriptors, clear the timeout. 976 */ 977 if (txr->avail == que->num_desc) { 978 que->busy = 0; 979 return FALSE; 980 } 981 982 return TRUE; 983 } 984 985 /********************************************************************* 986 * 987 * Refresh mbuf buffers for RX descriptor rings 988 * - now keeps its own state so discards due to resource 989 * exhaustion are unnecessary, if an mbuf cannot be obtained 990 * it just returns, keeping its placeholder, thus it can simply 991 * be recalled to try again. 992 * 993 **********************************************************************/ 994 static void 995 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 996 { 997 struct ixl_vsi *vsi = que->vsi; 998 struct rx_ring *rxr = &que->rxr; 999 bus_dma_segment_t hseg[1]; 1000 bus_dma_segment_t pseg[1]; 1001 struct ixl_rx_buf *buf; 1002 struct mbuf *mh, *mp; 1003 int i, j, nsegs, error; 1004 bool refreshed = FALSE; 1005 1006 i = j = rxr->next_refresh; 1007 /* Control the loop with one beyond */ 1008 if (++j == que->num_desc) 1009 j = 0; 1010 1011 while (j != limit) { 1012 buf = &rxr->buffers[i]; 1013 if (rxr->hdr_split == FALSE) 1014 goto no_split; 1015 1016 if (buf->m_head == NULL) { 1017 mh = m_gethdr(M_NOWAIT, MT_DATA); 1018 if (mh == NULL) 1019 goto update; 1020 } else 1021 mh = buf->m_head; 1022 1023 mh->m_pkthdr.len = mh->m_len = MHLEN; 1024 mh->m_len = MHLEN; 1025 mh->m_flags |= M_PKTHDR; 1026 /* Get the memory mapping */ 1027 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1028 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1029 if (error != 0) { 1030 printf("Refresh mbufs: hdr dmamap load" 1031 " failure - %d\n", error); 1032 m_free(mh); 1033 buf->m_head = NULL; 1034 goto update; 1035 } 1036 buf->m_head = mh; 1037 bus_dmamap_sync(rxr->htag, buf->hmap, 1038 BUS_DMASYNC_PREREAD); 1039 rxr->base[i].read.hdr_addr = 1040 htole64(hseg[0].ds_addr); 1041 1042 no_split: 1043 if (buf->m_pack == NULL) { 1044 mp = m_getjcl(M_NOWAIT, MT_DATA, 1045 M_PKTHDR, rxr->mbuf_sz); 1046 if (mp == NULL) 1047 goto update; 1048 } else 1049 mp = buf->m_pack; 1050 1051 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1052 /* Get the memory mapping */ 1053 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1054 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1055 if (error != 0) { 1056 printf("Refresh mbufs: payload dmamap load" 1057 " failure - %d\n", error); 1058 m_free(mp); 1059 buf->m_pack = NULL; 1060 goto update; 1061 } 1062 buf->m_pack = mp; 1063 bus_dmamap_sync(rxr->ptag, buf->pmap, 1064 BUS_DMASYNC_PREREAD); 1065 rxr->base[i].read.pkt_addr = 1066 htole64(pseg[0].ds_addr); 1067 /* Used only when doing header split */ 1068 rxr->base[i].read.hdr_addr = 0; 1069 1070 refreshed = TRUE; 1071 /* Next is precalculated */ 1072 i = j; 1073 rxr->next_refresh = i; 1074 if (++j == que->num_desc) 1075 j = 0; 1076 } 1077 update: 1078 if (refreshed) /* Update hardware tail index */ 1079 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1080 return; 1081 } 1082 1083 1084 /********************************************************************* 1085 * 1086 * Allocate memory for rx_buffer structures. Since we use one 1087 * rx_buffer per descriptor, the maximum number of rx_buffer's 1088 * that we'll need is equal to the number of receive descriptors 1089 * that we've defined. 1090 * 1091 **********************************************************************/ 1092 int 1093 ixl_allocate_rx_data(struct ixl_queue *que) 1094 { 1095 struct rx_ring *rxr = &que->rxr; 1096 struct ixl_vsi *vsi = que->vsi; 1097 device_t dev = vsi->dev; 1098 struct ixl_rx_buf *buf; 1099 int i, bsize, error; 1100 1101 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1102 if (!(rxr->buffers = 1103 (struct ixl_rx_buf *) malloc(bsize, 1104 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1105 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1106 error = ENOMEM; 1107 return (error); 1108 } 1109 1110 if ((error = bus_dma_tag_create(NULL, /* parent */ 1111 1, 0, /* alignment, bounds */ 1112 BUS_SPACE_MAXADDR, /* lowaddr */ 1113 BUS_SPACE_MAXADDR, /* highaddr */ 1114 NULL, NULL, /* filter, filterarg */ 1115 MSIZE, /* maxsize */ 1116 1, /* nsegments */ 1117 MSIZE, /* maxsegsize */ 1118 0, /* flags */ 1119 NULL, /* lockfunc */ 1120 NULL, /* lockfuncarg */ 1121 &rxr->htag))) { 1122 device_printf(dev, "Unable to create RX DMA htag\n"); 1123 return (error); 1124 } 1125 1126 if ((error = bus_dma_tag_create(NULL, /* parent */ 1127 1, 0, /* alignment, bounds */ 1128 BUS_SPACE_MAXADDR, /* lowaddr */ 1129 BUS_SPACE_MAXADDR, /* highaddr */ 1130 NULL, NULL, /* filter, filterarg */ 1131 MJUM16BYTES, /* maxsize */ 1132 1, /* nsegments */ 1133 MJUM16BYTES, /* maxsegsize */ 1134 0, /* flags */ 1135 NULL, /* lockfunc */ 1136 NULL, /* lockfuncarg */ 1137 &rxr->ptag))) { 1138 device_printf(dev, "Unable to create RX DMA ptag\n"); 1139 return (error); 1140 } 1141 1142 for (i = 0; i < que->num_desc; i++) { 1143 buf = &rxr->buffers[i]; 1144 error = bus_dmamap_create(rxr->htag, 1145 BUS_DMA_NOWAIT, &buf->hmap); 1146 if (error) { 1147 device_printf(dev, "Unable to create RX head map\n"); 1148 break; 1149 } 1150 error = bus_dmamap_create(rxr->ptag, 1151 BUS_DMA_NOWAIT, &buf->pmap); 1152 if (error) { 1153 device_printf(dev, "Unable to create RX pkt map\n"); 1154 break; 1155 } 1156 } 1157 1158 return (error); 1159 } 1160 1161 1162 /********************************************************************* 1163 * 1164 * (Re)Initialize the queue receive ring and its buffers. 1165 * 1166 **********************************************************************/ 1167 int 1168 ixl_init_rx_ring(struct ixl_queue *que) 1169 { 1170 struct rx_ring *rxr = &que->rxr; 1171 struct ixl_vsi *vsi = que->vsi; 1172 #if defined(INET6) || defined(INET) 1173 struct ifnet *ifp = vsi->ifp; 1174 struct lro_ctrl *lro = &rxr->lro; 1175 #endif 1176 struct ixl_rx_buf *buf; 1177 bus_dma_segment_t pseg[1], hseg[1]; 1178 int rsize, nsegs, error = 0; 1179 #ifdef DEV_NETMAP 1180 struct netmap_adapter *na = NA(que->vsi->ifp); 1181 struct netmap_slot *slot; 1182 #endif /* DEV_NETMAP */ 1183 1184 IXL_RX_LOCK(rxr); 1185 #ifdef DEV_NETMAP 1186 /* same as in ixl_init_tx_ring() */ 1187 slot = netmap_reset(na, NR_RX, que->me, 0); 1188 #endif /* DEV_NETMAP */ 1189 /* Clear the ring contents */ 1190 rsize = roundup2(que->num_desc * 1191 sizeof(union i40e_rx_desc), DBA_ALIGN); 1192 bzero((void *)rxr->base, rsize); 1193 /* Cleanup any existing buffers */ 1194 for (int i = 0; i < que->num_desc; i++) { 1195 buf = &rxr->buffers[i]; 1196 if (buf->m_head != NULL) { 1197 bus_dmamap_sync(rxr->htag, buf->hmap, 1198 BUS_DMASYNC_POSTREAD); 1199 bus_dmamap_unload(rxr->htag, buf->hmap); 1200 buf->m_head->m_flags |= M_PKTHDR; 1201 m_freem(buf->m_head); 1202 } 1203 if (buf->m_pack != NULL) { 1204 bus_dmamap_sync(rxr->ptag, buf->pmap, 1205 BUS_DMASYNC_POSTREAD); 1206 bus_dmamap_unload(rxr->ptag, buf->pmap); 1207 buf->m_pack->m_flags |= M_PKTHDR; 1208 m_freem(buf->m_pack); 1209 } 1210 buf->m_head = NULL; 1211 buf->m_pack = NULL; 1212 } 1213 1214 /* header split is off */ 1215 rxr->hdr_split = FALSE; 1216 1217 /* Now replenish the mbufs */ 1218 for (int j = 0; j != que->num_desc; ++j) { 1219 struct mbuf *mh, *mp; 1220 1221 buf = &rxr->buffers[j]; 1222 #ifdef DEV_NETMAP 1223 /* 1224 * In netmap mode, fill the map and set the buffer 1225 * address in the NIC ring, considering the offset 1226 * between the netmap and NIC rings (see comment in 1227 * ixgbe_setup_transmit_ring() ). No need to allocate 1228 * an mbuf, so end the block with a continue; 1229 */ 1230 if (slot) { 1231 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1232 uint64_t paddr; 1233 void *addr; 1234 1235 addr = PNMB(na, slot + sj, &paddr); 1236 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1237 /* Update descriptor and the cached value */ 1238 rxr->base[j].read.pkt_addr = htole64(paddr); 1239 rxr->base[j].read.hdr_addr = 0; 1240 continue; 1241 } 1242 #endif /* DEV_NETMAP */ 1243 /* 1244 ** Don't allocate mbufs if not 1245 ** doing header split, its wasteful 1246 */ 1247 if (rxr->hdr_split == FALSE) 1248 goto skip_head; 1249 1250 /* First the header */ 1251 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1252 if (buf->m_head == NULL) { 1253 error = ENOBUFS; 1254 goto fail; 1255 } 1256 m_adj(buf->m_head, ETHER_ALIGN); 1257 mh = buf->m_head; 1258 mh->m_len = mh->m_pkthdr.len = MHLEN; 1259 mh->m_flags |= M_PKTHDR; 1260 /* Get the memory mapping */ 1261 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1262 buf->hmap, buf->m_head, hseg, 1263 &nsegs, BUS_DMA_NOWAIT); 1264 if (error != 0) /* Nothing elegant to do here */ 1265 goto fail; 1266 bus_dmamap_sync(rxr->htag, 1267 buf->hmap, BUS_DMASYNC_PREREAD); 1268 /* Update descriptor */ 1269 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1270 1271 skip_head: 1272 /* Now the payload cluster */ 1273 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1274 M_PKTHDR, rxr->mbuf_sz); 1275 if (buf->m_pack == NULL) { 1276 error = ENOBUFS; 1277 goto fail; 1278 } 1279 mp = buf->m_pack; 1280 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1281 /* Get the memory mapping */ 1282 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1283 buf->pmap, mp, pseg, 1284 &nsegs, BUS_DMA_NOWAIT); 1285 if (error != 0) 1286 goto fail; 1287 bus_dmamap_sync(rxr->ptag, 1288 buf->pmap, BUS_DMASYNC_PREREAD); 1289 /* Update descriptor */ 1290 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1291 rxr->base[j].read.hdr_addr = 0; 1292 } 1293 1294 1295 /* Setup our descriptor indices */ 1296 rxr->next_check = 0; 1297 rxr->next_refresh = 0; 1298 rxr->lro_enabled = FALSE; 1299 rxr->split = 0; 1300 rxr->bytes = 0; 1301 rxr->discard = FALSE; 1302 1303 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1304 ixl_flush(vsi->hw); 1305 1306 #if defined(INET6) || defined(INET) 1307 /* 1308 ** Now set up the LRO interface: 1309 */ 1310 if (ifp->if_capenable & IFCAP_LRO) { 1311 int err = tcp_lro_init(lro); 1312 if (err) { 1313 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1314 goto fail; 1315 } 1316 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1317 rxr->lro_enabled = TRUE; 1318 lro->ifp = vsi->ifp; 1319 } 1320 #endif 1321 1322 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1323 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1324 1325 fail: 1326 IXL_RX_UNLOCK(rxr); 1327 return (error); 1328 } 1329 1330 1331 /********************************************************************* 1332 * 1333 * Free station receive ring data structures 1334 * 1335 **********************************************************************/ 1336 void 1337 ixl_free_que_rx(struct ixl_queue *que) 1338 { 1339 struct rx_ring *rxr = &que->rxr; 1340 struct ixl_rx_buf *buf; 1341 1342 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1343 1344 /* Cleanup any existing buffers */ 1345 if (rxr->buffers != NULL) { 1346 for (int i = 0; i < que->num_desc; i++) { 1347 buf = &rxr->buffers[i]; 1348 if (buf->m_head != NULL) { 1349 bus_dmamap_sync(rxr->htag, buf->hmap, 1350 BUS_DMASYNC_POSTREAD); 1351 bus_dmamap_unload(rxr->htag, buf->hmap); 1352 buf->m_head->m_flags |= M_PKTHDR; 1353 m_freem(buf->m_head); 1354 } 1355 if (buf->m_pack != NULL) { 1356 bus_dmamap_sync(rxr->ptag, buf->pmap, 1357 BUS_DMASYNC_POSTREAD); 1358 bus_dmamap_unload(rxr->ptag, buf->pmap); 1359 buf->m_pack->m_flags |= M_PKTHDR; 1360 m_freem(buf->m_pack); 1361 } 1362 buf->m_head = NULL; 1363 buf->m_pack = NULL; 1364 if (buf->hmap != NULL) { 1365 bus_dmamap_destroy(rxr->htag, buf->hmap); 1366 buf->hmap = NULL; 1367 } 1368 if (buf->pmap != NULL) { 1369 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1370 buf->pmap = NULL; 1371 } 1372 } 1373 if (rxr->buffers != NULL) { 1374 free(rxr->buffers, M_DEVBUF); 1375 rxr->buffers = NULL; 1376 } 1377 } 1378 1379 if (rxr->htag != NULL) { 1380 bus_dma_tag_destroy(rxr->htag); 1381 rxr->htag = NULL; 1382 } 1383 if (rxr->ptag != NULL) { 1384 bus_dma_tag_destroy(rxr->ptag); 1385 rxr->ptag = NULL; 1386 } 1387 1388 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1389 return; 1390 } 1391 1392 static inline void 1393 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1394 { 1395 1396 #if defined(INET6) || defined(INET) 1397 /* 1398 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1399 * should be computed by hardware. Also it should not have VLAN tag in 1400 * ethernet header. 1401 */ 1402 if (rxr->lro_enabled && 1403 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1404 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1405 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1406 /* 1407 * Send to the stack if: 1408 ** - LRO not enabled, or 1409 ** - no LRO resources, or 1410 ** - lro enqueue fails 1411 */ 1412 if (rxr->lro.lro_cnt != 0) 1413 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1414 return; 1415 } 1416 #endif 1417 IXL_RX_UNLOCK(rxr); 1418 (*ifp->if_input)(ifp, m); 1419 IXL_RX_LOCK(rxr); 1420 } 1421 1422 1423 static inline void 1424 ixl_rx_discard(struct rx_ring *rxr, int i) 1425 { 1426 struct ixl_rx_buf *rbuf; 1427 1428 rbuf = &rxr->buffers[i]; 1429 1430 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1431 rbuf->fmp->m_flags |= M_PKTHDR; 1432 m_freem(rbuf->fmp); 1433 rbuf->fmp = NULL; 1434 } 1435 1436 /* 1437 ** With advanced descriptors the writeback 1438 ** clobbers the buffer addrs, so its easier 1439 ** to just free the existing mbufs and take 1440 ** the normal refresh path to get new buffers 1441 ** and mapping. 1442 */ 1443 if (rbuf->m_head) { 1444 m_free(rbuf->m_head); 1445 rbuf->m_head = NULL; 1446 } 1447 1448 if (rbuf->m_pack) { 1449 m_free(rbuf->m_pack); 1450 rbuf->m_pack = NULL; 1451 } 1452 1453 return; 1454 } 1455 1456 #ifdef RSS 1457 /* 1458 ** i40e_ptype_to_hash: parse the packet type 1459 ** to determine the appropriate hash. 1460 */ 1461 static inline int 1462 ixl_ptype_to_hash(u8 ptype) 1463 { 1464 struct i40e_rx_ptype_decoded decoded; 1465 u8 ex = 0; 1466 1467 decoded = decode_rx_desc_ptype(ptype); 1468 ex = decoded.outer_frag; 1469 1470 if (!decoded.known) 1471 return M_HASHTYPE_OPAQUE_HASH; 1472 1473 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1474 return M_HASHTYPE_OPAQUE_HASH; 1475 1476 /* Note: anything that gets to this point is IP */ 1477 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1478 switch (decoded.inner_prot) { 1479 case I40E_RX_PTYPE_INNER_PROT_TCP: 1480 if (ex) 1481 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1482 else 1483 return M_HASHTYPE_RSS_TCP_IPV6; 1484 case I40E_RX_PTYPE_INNER_PROT_UDP: 1485 if (ex) 1486 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1487 else 1488 return M_HASHTYPE_RSS_UDP_IPV6; 1489 default: 1490 if (ex) 1491 return M_HASHTYPE_RSS_IPV6_EX; 1492 else 1493 return M_HASHTYPE_RSS_IPV6; 1494 } 1495 } 1496 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1497 switch (decoded.inner_prot) { 1498 case I40E_RX_PTYPE_INNER_PROT_TCP: 1499 return M_HASHTYPE_RSS_TCP_IPV4; 1500 case I40E_RX_PTYPE_INNER_PROT_UDP: 1501 if (ex) 1502 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1503 else 1504 return M_HASHTYPE_RSS_UDP_IPV4; 1505 default: 1506 return M_HASHTYPE_RSS_IPV4; 1507 } 1508 } 1509 /* We should never get here!! */ 1510 return M_HASHTYPE_OPAQUE_HASH; 1511 } 1512 #endif /* RSS */ 1513 1514 /********************************************************************* 1515 * 1516 * This routine executes in interrupt context. It replenishes 1517 * the mbufs in the descriptor and sends data which has been 1518 * dma'ed into host memory to upper layer. 1519 * 1520 * We loop at most count times if count is > 0, or until done if 1521 * count < 0. 1522 * 1523 * Return TRUE for more work, FALSE for all clean. 1524 *********************************************************************/ 1525 bool 1526 ixl_rxeof(struct ixl_queue *que, int count) 1527 { 1528 struct ixl_vsi *vsi = que->vsi; 1529 struct rx_ring *rxr = &que->rxr; 1530 struct ifnet *ifp = vsi->ifp; 1531 #if defined(INET6) || defined(INET) 1532 struct lro_ctrl *lro = &rxr->lro; 1533 #endif 1534 int i, nextp, processed = 0; 1535 union i40e_rx_desc *cur; 1536 struct ixl_rx_buf *rbuf, *nbuf; 1537 1538 1539 IXL_RX_LOCK(rxr); 1540 1541 #ifdef DEV_NETMAP 1542 if (netmap_rx_irq(ifp, que->me, &count)) { 1543 IXL_RX_UNLOCK(rxr); 1544 return (FALSE); 1545 } 1546 #endif /* DEV_NETMAP */ 1547 1548 for (i = rxr->next_check; count != 0;) { 1549 struct mbuf *sendmp, *mh, *mp; 1550 u32 status, error; 1551 u16 hlen, plen, vtag; 1552 u64 qword; 1553 u8 ptype; 1554 bool eop; 1555 1556 /* Sync the ring. */ 1557 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1558 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1559 1560 cur = &rxr->base[i]; 1561 qword = le64toh(cur->wb.qword1.status_error_len); 1562 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1563 >> I40E_RXD_QW1_STATUS_SHIFT; 1564 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1565 >> I40E_RXD_QW1_ERROR_SHIFT; 1566 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1567 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1568 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1569 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1570 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1571 >> I40E_RXD_QW1_PTYPE_SHIFT; 1572 1573 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1574 ++rxr->not_done; 1575 break; 1576 } 1577 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1578 break; 1579 1580 count--; 1581 sendmp = NULL; 1582 nbuf = NULL; 1583 cur->wb.qword1.status_error_len = 0; 1584 rbuf = &rxr->buffers[i]; 1585 mh = rbuf->m_head; 1586 mp = rbuf->m_pack; 1587 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1588 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1589 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1590 else 1591 vtag = 0; 1592 1593 /* 1594 ** Make sure bad packets are discarded, 1595 ** note that only EOP descriptor has valid 1596 ** error results. 1597 */ 1598 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1599 rxr->desc_errs++; 1600 ixl_rx_discard(rxr, i); 1601 goto next_desc; 1602 } 1603 1604 /* Prefetch the next buffer */ 1605 if (!eop) { 1606 nextp = i + 1; 1607 if (nextp == que->num_desc) 1608 nextp = 0; 1609 nbuf = &rxr->buffers[nextp]; 1610 prefetch(nbuf); 1611 } 1612 1613 /* 1614 ** The header mbuf is ONLY used when header 1615 ** split is enabled, otherwise we get normal 1616 ** behavior, ie, both header and payload 1617 ** are DMA'd into the payload buffer. 1618 ** 1619 ** Rather than using the fmp/lmp global pointers 1620 ** we now keep the head of a packet chain in the 1621 ** buffer struct and pass this along from one 1622 ** descriptor to the next, until we get EOP. 1623 */ 1624 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1625 if (hlen > IXL_RX_HDR) 1626 hlen = IXL_RX_HDR; 1627 mh->m_len = hlen; 1628 mh->m_flags |= M_PKTHDR; 1629 mh->m_next = NULL; 1630 mh->m_pkthdr.len = mh->m_len; 1631 /* Null buf pointer so it is refreshed */ 1632 rbuf->m_head = NULL; 1633 /* 1634 ** Check the payload length, this 1635 ** could be zero if its a small 1636 ** packet. 1637 */ 1638 if (plen > 0) { 1639 mp->m_len = plen; 1640 mp->m_next = NULL; 1641 mp->m_flags &= ~M_PKTHDR; 1642 mh->m_next = mp; 1643 mh->m_pkthdr.len += mp->m_len; 1644 /* Null buf pointer so it is refreshed */ 1645 rbuf->m_pack = NULL; 1646 rxr->split++; 1647 } 1648 /* 1649 ** Now create the forward 1650 ** chain so when complete 1651 ** we wont have to. 1652 */ 1653 if (eop == 0) { 1654 /* stash the chain head */ 1655 nbuf->fmp = mh; 1656 /* Make forward chain */ 1657 if (plen) 1658 mp->m_next = nbuf->m_pack; 1659 else 1660 mh->m_next = nbuf->m_pack; 1661 } else { 1662 /* Singlet, prepare to send */ 1663 sendmp = mh; 1664 if (vtag) { 1665 sendmp->m_pkthdr.ether_vtag = vtag; 1666 sendmp->m_flags |= M_VLANTAG; 1667 } 1668 } 1669 } else { 1670 /* 1671 ** Either no header split, or a 1672 ** secondary piece of a fragmented 1673 ** split packet. 1674 */ 1675 mp->m_len = plen; 1676 /* 1677 ** See if there is a stored head 1678 ** that determines what we are 1679 */ 1680 sendmp = rbuf->fmp; 1681 rbuf->m_pack = rbuf->fmp = NULL; 1682 1683 if (sendmp != NULL) /* secondary frag */ 1684 sendmp->m_pkthdr.len += mp->m_len; 1685 else { 1686 /* first desc of a non-ps chain */ 1687 sendmp = mp; 1688 sendmp->m_flags |= M_PKTHDR; 1689 sendmp->m_pkthdr.len = mp->m_len; 1690 } 1691 /* Pass the head pointer on */ 1692 if (eop == 0) { 1693 nbuf->fmp = sendmp; 1694 sendmp = NULL; 1695 mp->m_next = nbuf->m_pack; 1696 } 1697 } 1698 ++processed; 1699 /* Sending this frame? */ 1700 if (eop) { 1701 sendmp->m_pkthdr.rcvif = ifp; 1702 /* gather stats */ 1703 rxr->rx_packets++; 1704 rxr->rx_bytes += sendmp->m_pkthdr.len; 1705 /* capture data for dynamic ITR adjustment */ 1706 rxr->packets++; 1707 rxr->bytes += sendmp->m_pkthdr.len; 1708 /* Set VLAN tag (field only valid in eop desc) */ 1709 if (vtag) { 1710 sendmp->m_pkthdr.ether_vtag = vtag; 1711 sendmp->m_flags |= M_VLANTAG; 1712 } 1713 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1714 ixl_rx_checksum(sendmp, status, error, ptype); 1715 #ifdef RSS 1716 sendmp->m_pkthdr.flowid = 1717 le32toh(cur->wb.qword0.hi_dword.rss); 1718 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1719 #else 1720 sendmp->m_pkthdr.flowid = que->msix; 1721 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1722 #endif 1723 } 1724 next_desc: 1725 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1726 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1727 1728 /* Advance our pointers to the next descriptor. */ 1729 if (++i == que->num_desc) 1730 i = 0; 1731 1732 /* Now send to the stack or do LRO */ 1733 if (sendmp != NULL) { 1734 rxr->next_check = i; 1735 ixl_rx_input(rxr, ifp, sendmp, ptype); 1736 i = rxr->next_check; 1737 } 1738 1739 /* Every 8 descriptors we go to refresh mbufs */ 1740 if (processed == 8) { 1741 ixl_refresh_mbufs(que, i); 1742 processed = 0; 1743 } 1744 } 1745 1746 /* Refresh any remaining buf structs */ 1747 if (ixl_rx_unrefreshed(que)) 1748 ixl_refresh_mbufs(que, i); 1749 1750 rxr->next_check = i; 1751 1752 #if defined(INET6) || defined(INET) 1753 /* 1754 * Flush any outstanding LRO work 1755 */ 1756 tcp_lro_flush_all(lro); 1757 #endif 1758 1759 IXL_RX_UNLOCK(rxr); 1760 return (FALSE); 1761 } 1762 1763 1764 /********************************************************************* 1765 * 1766 * Verify that the hardware indicated that the checksum is valid. 1767 * Inform the stack about the status of checksum so that stack 1768 * doesn't spend time verifying the checksum. 1769 * 1770 *********************************************************************/ 1771 static void 1772 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1773 { 1774 struct i40e_rx_ptype_decoded decoded; 1775 1776 decoded = decode_rx_desc_ptype(ptype); 1777 1778 /* Errors? */ 1779 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1780 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1781 mp->m_pkthdr.csum_flags = 0; 1782 return; 1783 } 1784 1785 /* IPv6 with extension headers likely have bad csum */ 1786 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1787 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1788 if (status & 1789 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1790 mp->m_pkthdr.csum_flags = 0; 1791 return; 1792 } 1793 1794 1795 /* IP Checksum Good */ 1796 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1797 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1798 1799 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1800 mp->m_pkthdr.csum_flags |= 1801 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1802 mp->m_pkthdr.csum_data |= htons(0xffff); 1803 } 1804 return; 1805 } 1806 1807 #if __FreeBSD_version >= 1100000 1808 uint64_t 1809 ixl_get_counter(if_t ifp, ift_counter cnt) 1810 { 1811 struct ixl_vsi *vsi; 1812 1813 vsi = if_getsoftc(ifp); 1814 1815 switch (cnt) { 1816 case IFCOUNTER_IPACKETS: 1817 return (vsi->ipackets); 1818 case IFCOUNTER_IERRORS: 1819 return (vsi->ierrors); 1820 case IFCOUNTER_OPACKETS: 1821 return (vsi->opackets); 1822 case IFCOUNTER_OERRORS: 1823 return (vsi->oerrors); 1824 case IFCOUNTER_COLLISIONS: 1825 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1826 return (0); 1827 case IFCOUNTER_IBYTES: 1828 return (vsi->ibytes); 1829 case IFCOUNTER_OBYTES: 1830 return (vsi->obytes); 1831 case IFCOUNTER_IMCASTS: 1832 return (vsi->imcasts); 1833 case IFCOUNTER_OMCASTS: 1834 return (vsi->omcasts); 1835 case IFCOUNTER_IQDROPS: 1836 return (vsi->iqdrops); 1837 case IFCOUNTER_OQDROPS: 1838 return (vsi->oqdrops); 1839 case IFCOUNTER_NOPROTO: 1840 return (vsi->noproto); 1841 default: 1842 return (if_get_counter_default(ifp, cnt)); 1843 } 1844 } 1845 #endif 1846 1847