1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static int ixl_tx_setup_offload(struct ixl_queue *que, 67 struct mbuf *mp, u32 *cmd, u32 *off); 68 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 69 70 #ifdef DEV_NETMAP 71 #include <dev/netmap/if_ixl_netmap.h> 72 #endif /* DEV_NETMAP */ 73 74 /* 75 * @key key is saved into this parameter 76 */ 77 void 78 ixl_get_default_rss_key(u32 *key) 79 { 80 MPASS(key != NULL); 81 82 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 83 0x183cfd8c, 0xce880440, 0x580cbc3c, 84 0x35897377, 0x328b25e1, 0x4fa98922, 85 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 86 0x0, 0x0, 0x0}; 87 88 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 89 } 90 91 /* 92 ** Multiqueue Transmit driver 93 */ 94 int 95 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 96 { 97 struct ixl_vsi *vsi = ifp->if_softc; 98 struct ixl_queue *que; 99 struct tx_ring *txr; 100 int err, i; 101 #ifdef RSS 102 u32 bucket_id; 103 #endif 104 105 /* 106 ** Which queue to use: 107 ** 108 ** When doing RSS, map it to the same outbound 109 ** queue as the incoming flow would be mapped to. 110 ** If everything is setup correctly, it should be 111 ** the same bucket that the current CPU we're on is. 112 */ 113 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 114 #ifdef RSS 115 if (rss_hash2bucket(m->m_pkthdr.flowid, 116 M_HASHTYPE_GET(m), &bucket_id) == 0) { 117 i = bucket_id % vsi->num_queues; 118 } else 119 #endif 120 i = m->m_pkthdr.flowid % vsi->num_queues; 121 } else 122 i = curcpu % vsi->num_queues; 123 124 que = &vsi->queues[i]; 125 txr = &que->txr; 126 127 err = drbr_enqueue(ifp, txr->br, m); 128 if (err) 129 return (err); 130 if (IXL_TX_TRYLOCK(txr)) { 131 ixl_mq_start_locked(ifp, txr); 132 IXL_TX_UNLOCK(txr); 133 } else 134 taskqueue_enqueue(que->tq, &que->tx_task); 135 136 return (0); 137 } 138 139 int 140 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 141 { 142 struct ixl_queue *que = txr->que; 143 struct ixl_vsi *vsi = que->vsi; 144 struct mbuf *next; 145 int err = 0; 146 147 148 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 149 vsi->link_active == 0) 150 return (ENETDOWN); 151 152 /* Process the transmit queue */ 153 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 154 if ((err = ixl_xmit(que, &next)) != 0) { 155 if (next == NULL) 156 drbr_advance(ifp, txr->br); 157 else 158 drbr_putback(ifp, txr->br, next); 159 break; 160 } 161 drbr_advance(ifp, txr->br); 162 /* Send a copy of the frame to the BPF listener */ 163 ETHER_BPF_MTAP(ifp, next); 164 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 165 break; 166 } 167 168 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 169 ixl_txeof(que); 170 171 return (err); 172 } 173 174 /* 175 * Called from a taskqueue to drain queued transmit packets. 176 */ 177 void 178 ixl_deferred_mq_start(void *arg, int pending) 179 { 180 struct ixl_queue *que = arg; 181 struct tx_ring *txr = &que->txr; 182 struct ixl_vsi *vsi = que->vsi; 183 struct ifnet *ifp = vsi->ifp; 184 185 IXL_TX_LOCK(txr); 186 if (!drbr_empty(ifp, txr->br)) 187 ixl_mq_start_locked(ifp, txr); 188 IXL_TX_UNLOCK(txr); 189 } 190 191 /* 192 ** Flush all queue ring buffers 193 */ 194 void 195 ixl_qflush(struct ifnet *ifp) 196 { 197 struct ixl_vsi *vsi = ifp->if_softc; 198 199 for (int i = 0; i < vsi->num_queues; i++) { 200 struct ixl_queue *que = &vsi->queues[i]; 201 struct tx_ring *txr = &que->txr; 202 struct mbuf *m; 203 IXL_TX_LOCK(txr); 204 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 205 m_freem(m); 206 IXL_TX_UNLOCK(txr); 207 } 208 if_qflush(ifp); 209 } 210 211 /* 212 ** Find mbuf chains passed to the driver 213 ** that are 'sparse', using more than 8 214 ** mbufs to deliver an mss-size chunk of data 215 */ 216 static inline bool 217 ixl_tso_detect_sparse(struct mbuf *mp) 218 { 219 struct mbuf *m; 220 int num = 0, mss; 221 bool ret = FALSE; 222 223 mss = mp->m_pkthdr.tso_segsz; 224 for (m = mp->m_next; m != NULL; m = m->m_next) { 225 num++; 226 mss -= m->m_len; 227 if (mss < 1) 228 break; 229 if (m->m_next == NULL) 230 break; 231 } 232 if (num > IXL_SPARSE_CHAIN) 233 ret = TRUE; 234 235 return (ret); 236 } 237 238 239 /********************************************************************* 240 * 241 * This routine maps the mbufs to tx descriptors, allowing the 242 * TX engine to transmit the packets. 243 * - return 0 on success, positive on failure 244 * 245 **********************************************************************/ 246 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 247 248 static int 249 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 250 { 251 struct ixl_vsi *vsi = que->vsi; 252 struct i40e_hw *hw = vsi->hw; 253 struct tx_ring *txr = &que->txr; 254 struct ixl_tx_buf *buf; 255 struct i40e_tx_desc *txd = NULL; 256 struct mbuf *m_head, *m; 257 int i, j, error, nsegs; 258 int first, last = 0; 259 u16 vtag = 0; 260 u32 cmd, off; 261 bus_dmamap_t map; 262 bus_dma_tag_t tag; 263 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 264 265 cmd = off = 0; 266 m_head = *m_headp; 267 268 /* 269 * Important to capture the first descriptor 270 * used because it will contain the index of 271 * the one we tell the hardware to report back 272 */ 273 first = txr->next_avail; 274 buf = &txr->buffers[first]; 275 map = buf->map; 276 tag = txr->tx_tag; 277 278 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 279 /* Use larger mapping for TSO */ 280 tag = txr->tso_tag; 281 if (ixl_tso_detect_sparse(m_head)) { 282 m = m_defrag(m_head, M_NOWAIT); 283 if (m == NULL) { 284 m_freem(*m_headp); 285 *m_headp = NULL; 286 return (ENOBUFS); 287 } 288 *m_headp = m; 289 } 290 } 291 292 /* 293 * Map the packet for DMA. 294 */ 295 error = bus_dmamap_load_mbuf_sg(tag, map, 296 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 297 298 if (error == EFBIG) { 299 struct mbuf *m; 300 301 m = m_defrag(*m_headp, M_NOWAIT); 302 if (m == NULL) { 303 que->mbuf_defrag_failed++; 304 m_freem(*m_headp); 305 *m_headp = NULL; 306 return (ENOBUFS); 307 } 308 *m_headp = m; 309 310 /* Try it again */ 311 error = bus_dmamap_load_mbuf_sg(tag, map, 312 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 313 314 if (error == ENOMEM) { 315 que->tx_dmamap_failed++; 316 return (error); 317 } else if (error != 0) { 318 que->tx_dmamap_failed++; 319 m_freem(*m_headp); 320 *m_headp = NULL; 321 return (error); 322 } 323 } else if (error == ENOMEM) { 324 que->tx_dmamap_failed++; 325 return (error); 326 } else if (error != 0) { 327 que->tx_dmamap_failed++; 328 m_freem(*m_headp); 329 *m_headp = NULL; 330 return (error); 331 } 332 333 /* Make certain there are enough descriptors */ 334 if (nsegs > txr->avail - 2) { 335 txr->no_desc++; 336 error = ENOBUFS; 337 goto xmit_fail; 338 } 339 m_head = *m_headp; 340 341 /* Set up the TSO/CSUM offload */ 342 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 343 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 344 if (error) 345 goto xmit_fail; 346 } 347 348 cmd |= I40E_TX_DESC_CMD_ICRC; 349 /* Grab the VLAN tag */ 350 if (m_head->m_flags & M_VLANTAG) { 351 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 352 vtag = htole16(m_head->m_pkthdr.ether_vtag); 353 } 354 355 i = txr->next_avail; 356 for (j = 0; j < nsegs; j++) { 357 bus_size_t seglen; 358 359 buf = &txr->buffers[i]; 360 buf->tag = tag; /* Keep track of the type tag */ 361 txd = &txr->base[i]; 362 seglen = segs[j].ds_len; 363 364 txd->buffer_addr = htole64(segs[j].ds_addr); 365 txd->cmd_type_offset_bsz = 366 htole64(I40E_TX_DESC_DTYPE_DATA 367 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 368 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 369 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 370 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 371 372 last = i; /* descriptor that will get completion IRQ */ 373 374 if (++i == que->num_desc) 375 i = 0; 376 377 buf->m_head = NULL; 378 buf->eop_index = -1; 379 } 380 /* Set the last descriptor for report */ 381 txd->cmd_type_offset_bsz |= 382 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 383 txr->avail -= nsegs; 384 txr->next_avail = i; 385 386 buf->m_head = m_head; 387 /* Swap the dma map between the first and last descriptor */ 388 txr->buffers[first].map = buf->map; 389 buf->map = map; 390 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 391 392 /* Set the index of the descriptor that will be marked done */ 393 buf = &txr->buffers[first]; 394 buf->eop_index = last; 395 396 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 397 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 398 /* 399 * Advance the Transmit Descriptor Tail (Tdt), this tells the 400 * hardware that this frame is available to transmit. 401 */ 402 ++txr->total_packets; 403 wr32(hw, txr->tail, i); 404 405 /* Mark outstanding work */ 406 if (que->busy == 0) 407 que->busy = 1; 408 return (0); 409 410 xmit_fail: 411 bus_dmamap_unload(tag, buf->map); 412 return (error); 413 } 414 415 416 /********************************************************************* 417 * 418 * Allocate memory for tx_buffer structures. The tx_buffer stores all 419 * the information needed to transmit a packet on the wire. This is 420 * called only once at attach, setup is done every reset. 421 * 422 **********************************************************************/ 423 int 424 ixl_allocate_tx_data(struct ixl_queue *que) 425 { 426 struct tx_ring *txr = &que->txr; 427 struct ixl_vsi *vsi = que->vsi; 428 device_t dev = vsi->dev; 429 struct ixl_tx_buf *buf; 430 int error = 0; 431 432 /* 433 * Setup DMA descriptor areas. 434 */ 435 if ((error = bus_dma_tag_create(NULL, /* parent */ 436 1, 0, /* alignment, bounds */ 437 BUS_SPACE_MAXADDR, /* lowaddr */ 438 BUS_SPACE_MAXADDR, /* highaddr */ 439 NULL, NULL, /* filter, filterarg */ 440 IXL_TSO_SIZE, /* maxsize */ 441 IXL_MAX_TX_SEGS, /* nsegments */ 442 PAGE_SIZE, /* maxsegsize */ 443 0, /* flags */ 444 NULL, /* lockfunc */ 445 NULL, /* lockfuncarg */ 446 &txr->tx_tag))) { 447 device_printf(dev,"Unable to allocate TX DMA tag\n"); 448 goto fail; 449 } 450 451 /* Make a special tag for TSO */ 452 if ((error = bus_dma_tag_create(NULL, /* parent */ 453 1, 0, /* alignment, bounds */ 454 BUS_SPACE_MAXADDR, /* lowaddr */ 455 BUS_SPACE_MAXADDR, /* highaddr */ 456 NULL, NULL, /* filter, filterarg */ 457 IXL_TSO_SIZE, /* maxsize */ 458 IXL_MAX_TSO_SEGS, /* nsegments */ 459 PAGE_SIZE, /* maxsegsize */ 460 0, /* flags */ 461 NULL, /* lockfunc */ 462 NULL, /* lockfuncarg */ 463 &txr->tso_tag))) { 464 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 465 goto fail; 466 } 467 468 if (!(txr->buffers = 469 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 470 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 471 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 472 error = ENOMEM; 473 goto fail; 474 } 475 476 /* Create the descriptor buffer default dma maps */ 477 buf = txr->buffers; 478 for (int i = 0; i < que->num_desc; i++, buf++) { 479 buf->tag = txr->tx_tag; 480 error = bus_dmamap_create(buf->tag, 0, &buf->map); 481 if (error != 0) { 482 device_printf(dev, "Unable to create TX DMA map\n"); 483 goto fail; 484 } 485 } 486 fail: 487 return (error); 488 } 489 490 491 /********************************************************************* 492 * 493 * (Re)Initialize a queue transmit ring. 494 * - called by init, it clears the descriptor ring, 495 * and frees any stale mbufs 496 * 497 **********************************************************************/ 498 void 499 ixl_init_tx_ring(struct ixl_queue *que) 500 { 501 #ifdef DEV_NETMAP 502 struct netmap_adapter *na = NA(que->vsi->ifp); 503 struct netmap_slot *slot; 504 #endif /* DEV_NETMAP */ 505 struct tx_ring *txr = &que->txr; 506 struct ixl_tx_buf *buf; 507 508 /* Clear the old ring contents */ 509 IXL_TX_LOCK(txr); 510 511 #ifdef DEV_NETMAP 512 /* 513 * (under lock): if in netmap mode, do some consistency 514 * checks and set slot to entry 0 of the netmap ring. 515 */ 516 slot = netmap_reset(na, NR_TX, que->me, 0); 517 #endif /* DEV_NETMAP */ 518 519 bzero((void *)txr->base, 520 (sizeof(struct i40e_tx_desc)) * que->num_desc); 521 522 /* Reset indices */ 523 txr->next_avail = 0; 524 txr->next_to_clean = 0; 525 526 #ifdef IXL_FDIR 527 /* Initialize flow director */ 528 txr->atr_rate = ixl_atr_rate; 529 txr->atr_count = 0; 530 #endif 531 532 /* Free any existing tx mbufs. */ 533 buf = txr->buffers; 534 for (int i = 0; i < que->num_desc; i++, buf++) { 535 if (buf->m_head != NULL) { 536 bus_dmamap_sync(buf->tag, buf->map, 537 BUS_DMASYNC_POSTWRITE); 538 bus_dmamap_unload(buf->tag, buf->map); 539 m_freem(buf->m_head); 540 buf->m_head = NULL; 541 } 542 #ifdef DEV_NETMAP 543 /* 544 * In netmap mode, set the map for the packet buffer. 545 * NOTE: Some drivers (not this one) also need to set 546 * the physical buffer address in the NIC ring. 547 * netmap_idx_n2k() maps a nic index, i, into the corresponding 548 * netmap slot index, si 549 */ 550 if (slot) { 551 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 552 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 553 } 554 #endif /* DEV_NETMAP */ 555 /* Clear the EOP index */ 556 buf->eop_index = -1; 557 } 558 559 /* Set number of descriptors available */ 560 txr->avail = que->num_desc; 561 562 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 563 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 564 IXL_TX_UNLOCK(txr); 565 } 566 567 568 /********************************************************************* 569 * 570 * Free transmit ring related data structures. 571 * 572 **********************************************************************/ 573 void 574 ixl_free_que_tx(struct ixl_queue *que) 575 { 576 struct tx_ring *txr = &que->txr; 577 struct ixl_tx_buf *buf; 578 579 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 580 581 for (int i = 0; i < que->num_desc; i++) { 582 buf = &txr->buffers[i]; 583 if (buf->m_head != NULL) { 584 bus_dmamap_sync(buf->tag, buf->map, 585 BUS_DMASYNC_POSTWRITE); 586 bus_dmamap_unload(buf->tag, 587 buf->map); 588 m_freem(buf->m_head); 589 buf->m_head = NULL; 590 if (buf->map != NULL) { 591 bus_dmamap_destroy(buf->tag, 592 buf->map); 593 buf->map = NULL; 594 } 595 } else if (buf->map != NULL) { 596 bus_dmamap_unload(buf->tag, 597 buf->map); 598 bus_dmamap_destroy(buf->tag, 599 buf->map); 600 buf->map = NULL; 601 } 602 } 603 if (txr->br != NULL) 604 buf_ring_free(txr->br, M_DEVBUF); 605 if (txr->buffers != NULL) { 606 free(txr->buffers, M_DEVBUF); 607 txr->buffers = NULL; 608 } 609 if (txr->tx_tag != NULL) { 610 bus_dma_tag_destroy(txr->tx_tag); 611 txr->tx_tag = NULL; 612 } 613 if (txr->tso_tag != NULL) { 614 bus_dma_tag_destroy(txr->tso_tag); 615 txr->tso_tag = NULL; 616 } 617 618 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 619 return; 620 } 621 622 /********************************************************************* 623 * 624 * Setup descriptor for hw offloads 625 * 626 **********************************************************************/ 627 628 static int 629 ixl_tx_setup_offload(struct ixl_queue *que, 630 struct mbuf *mp, u32 *cmd, u32 *off) 631 { 632 struct ether_vlan_header *eh; 633 #ifdef INET 634 struct ip *ip = NULL; 635 #endif 636 struct tcphdr *th = NULL; 637 #ifdef INET6 638 struct ip6_hdr *ip6; 639 #endif 640 int elen, ip_hlen = 0, tcp_hlen; 641 u16 etype; 642 u8 ipproto = 0; 643 bool tso = FALSE; 644 645 /* Set up the TSO context descriptor if required */ 646 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 647 tso = ixl_tso_setup(que, mp); 648 if (tso) 649 ++que->tso; 650 else 651 return (ENXIO); 652 } 653 654 /* 655 * Determine where frame payload starts. 656 * Jump over vlan headers if already present, 657 * helpful for QinQ too. 658 */ 659 eh = mtod(mp, struct ether_vlan_header *); 660 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 661 etype = ntohs(eh->evl_proto); 662 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 663 } else { 664 etype = ntohs(eh->evl_encap_proto); 665 elen = ETHER_HDR_LEN; 666 } 667 668 switch (etype) { 669 #ifdef INET 670 case ETHERTYPE_IP: 671 ip = (struct ip *)(mp->m_data + elen); 672 ip_hlen = ip->ip_hl << 2; 673 ipproto = ip->ip_p; 674 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 675 /* The IP checksum must be recalculated with TSO */ 676 if (tso) 677 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 678 else 679 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 680 break; 681 #endif 682 #ifdef INET6 683 case ETHERTYPE_IPV6: 684 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 685 ip_hlen = sizeof(struct ip6_hdr); 686 ipproto = ip6->ip6_nxt; 687 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 688 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 689 break; 690 #endif 691 default: 692 break; 693 } 694 695 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 696 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 697 698 switch (ipproto) { 699 case IPPROTO_TCP: 700 tcp_hlen = th->th_off << 2; 701 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 702 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 703 *off |= (tcp_hlen >> 2) << 704 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 705 } 706 #ifdef IXL_FDIR 707 ixl_atr(que, th, etype); 708 #endif 709 break; 710 case IPPROTO_UDP: 711 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 712 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 713 *off |= (sizeof(struct udphdr) >> 2) << 714 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 715 } 716 break; 717 718 case IPPROTO_SCTP: 719 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 720 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 721 *off |= (sizeof(struct sctphdr) >> 2) << 722 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 723 } 724 /* Fall Thru */ 725 default: 726 break; 727 } 728 729 return (0); 730 } 731 732 733 /********************************************************************** 734 * 735 * Setup context for hardware segmentation offload (TSO) 736 * 737 **********************************************************************/ 738 static bool 739 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 740 { 741 struct tx_ring *txr = &que->txr; 742 struct i40e_tx_context_desc *TXD; 743 struct ixl_tx_buf *buf; 744 u32 cmd, mss, type, tsolen; 745 u16 etype; 746 int idx, elen, ip_hlen, tcp_hlen; 747 struct ether_vlan_header *eh; 748 #ifdef INET 749 struct ip *ip; 750 #endif 751 #ifdef INET6 752 struct ip6_hdr *ip6; 753 #endif 754 #if defined(INET6) || defined(INET) 755 struct tcphdr *th; 756 #endif 757 u64 type_cmd_tso_mss; 758 759 /* 760 * Determine where frame payload starts. 761 * Jump over vlan headers if already present 762 */ 763 eh = mtod(mp, struct ether_vlan_header *); 764 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 765 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 766 etype = eh->evl_proto; 767 } else { 768 elen = ETHER_HDR_LEN; 769 etype = eh->evl_encap_proto; 770 } 771 772 switch (ntohs(etype)) { 773 #ifdef INET6 774 case ETHERTYPE_IPV6: 775 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 776 if (ip6->ip6_nxt != IPPROTO_TCP) 777 return (ENXIO); 778 ip_hlen = sizeof(struct ip6_hdr); 779 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 780 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 781 tcp_hlen = th->th_off << 2; 782 /* 783 * The corresponding flag is set by the stack in the IPv4 784 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 785 * So, set it here because the rest of the flow requires it. 786 */ 787 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 788 break; 789 #endif 790 #ifdef INET 791 case ETHERTYPE_IP: 792 ip = (struct ip *)(mp->m_data + elen); 793 if (ip->ip_p != IPPROTO_TCP) 794 return (ENXIO); 795 ip->ip_sum = 0; 796 ip_hlen = ip->ip_hl << 2; 797 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 798 th->th_sum = in_pseudo(ip->ip_src.s_addr, 799 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 800 tcp_hlen = th->th_off << 2; 801 break; 802 #endif 803 default: 804 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 805 __func__, ntohs(etype)); 806 return FALSE; 807 } 808 809 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 810 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 811 return FALSE; 812 813 idx = txr->next_avail; 814 buf = &txr->buffers[idx]; 815 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 816 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 817 818 type = I40E_TX_DESC_DTYPE_CONTEXT; 819 cmd = I40E_TX_CTX_DESC_TSO; 820 /* ERJ: this must not be less than 64 */ 821 mss = mp->m_pkthdr.tso_segsz; 822 823 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 824 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 825 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 826 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 827 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 828 829 TXD->tunneling_params = htole32(0); 830 buf->m_head = NULL; 831 buf->eop_index = -1; 832 833 if (++idx == que->num_desc) 834 idx = 0; 835 836 txr->avail--; 837 txr->next_avail = idx; 838 839 return TRUE; 840 } 841 842 /* 843 ** ixl_get_tx_head - Retrieve the value from the 844 ** location the HW records its HEAD index 845 */ 846 static inline u32 847 ixl_get_tx_head(struct ixl_queue *que) 848 { 849 struct tx_ring *txr = &que->txr; 850 void *head = &txr->base[que->num_desc]; 851 return LE32_TO_CPU(*(volatile __le32 *)head); 852 } 853 854 /********************************************************************** 855 * 856 * Examine each tx_buffer in the used queue. If the hardware is done 857 * processing the packet then free associated resources. The 858 * tx_buffer is put back on the free queue. 859 * 860 **********************************************************************/ 861 bool 862 ixl_txeof(struct ixl_queue *que) 863 { 864 struct tx_ring *txr = &que->txr; 865 u32 first, last, head, done, processed; 866 struct ixl_tx_buf *buf; 867 struct i40e_tx_desc *tx_desc, *eop_desc; 868 869 870 mtx_assert(&txr->mtx, MA_OWNED); 871 872 #ifdef DEV_NETMAP 873 // XXX todo: implement moderation 874 if (netmap_tx_irq(que->vsi->ifp, que->me)) 875 return FALSE; 876 #endif /* DEF_NETMAP */ 877 878 /* These are not the descriptors you seek, move along :) */ 879 if (txr->avail == que->num_desc) { 880 que->busy = 0; 881 return FALSE; 882 } 883 884 processed = 0; 885 first = txr->next_to_clean; 886 buf = &txr->buffers[first]; 887 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 888 last = buf->eop_index; 889 if (last == -1) 890 return FALSE; 891 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 892 893 /* Get the Head WB value */ 894 head = ixl_get_tx_head(que); 895 896 /* 897 ** Get the index of the first descriptor 898 ** BEYOND the EOP and call that 'done'. 899 ** I do this so the comparison in the 900 ** inner while loop below can be simple 901 */ 902 if (++last == que->num_desc) last = 0; 903 done = last; 904 905 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 906 BUS_DMASYNC_POSTREAD); 907 /* 908 ** The HEAD index of the ring is written in a 909 ** defined location, this rather than a done bit 910 ** is what is used to keep track of what must be 911 ** 'cleaned'. 912 */ 913 while (first != head) { 914 /* We clean the range of the packet */ 915 while (first != done) { 916 ++txr->avail; 917 ++processed; 918 919 if (buf->m_head) { 920 txr->bytes += /* for ITR adjustment */ 921 buf->m_head->m_pkthdr.len; 922 txr->tx_bytes += /* for TX stats */ 923 buf->m_head->m_pkthdr.len; 924 bus_dmamap_sync(buf->tag, 925 buf->map, 926 BUS_DMASYNC_POSTWRITE); 927 bus_dmamap_unload(buf->tag, 928 buf->map); 929 m_freem(buf->m_head); 930 buf->m_head = NULL; 931 buf->map = NULL; 932 } 933 buf->eop_index = -1; 934 935 if (++first == que->num_desc) 936 first = 0; 937 938 buf = &txr->buffers[first]; 939 tx_desc = &txr->base[first]; 940 } 941 ++txr->packets; 942 /* See if there is more work now */ 943 last = buf->eop_index; 944 if (last != -1) { 945 eop_desc = &txr->base[last]; 946 /* Get next done point */ 947 if (++last == que->num_desc) last = 0; 948 done = last; 949 } else 950 break; 951 } 952 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 953 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 954 955 txr->next_to_clean = first; 956 957 958 /* 959 ** Hang detection, we know there's 960 ** work outstanding or the first return 961 ** would have been taken, so indicate an 962 ** unsuccessful pass, in local_timer if 963 ** the value is too great the queue will 964 ** be considered hung. If anything has been 965 ** cleaned then reset the state. 966 */ 967 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 968 ++que->busy; 969 970 if (processed) 971 que->busy = 1; /* Note this turns off HUNG */ 972 973 /* 974 * If there are no pending descriptors, clear the timeout. 975 */ 976 if (txr->avail == que->num_desc) { 977 que->busy = 0; 978 return FALSE; 979 } 980 981 return TRUE; 982 } 983 984 /********************************************************************* 985 * 986 * Refresh mbuf buffers for RX descriptor rings 987 * - now keeps its own state so discards due to resource 988 * exhaustion are unnecessary, if an mbuf cannot be obtained 989 * it just returns, keeping its placeholder, thus it can simply 990 * be recalled to try again. 991 * 992 **********************************************************************/ 993 static void 994 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 995 { 996 struct ixl_vsi *vsi = que->vsi; 997 struct rx_ring *rxr = &que->rxr; 998 bus_dma_segment_t hseg[1]; 999 bus_dma_segment_t pseg[1]; 1000 struct ixl_rx_buf *buf; 1001 struct mbuf *mh, *mp; 1002 int i, j, nsegs, error; 1003 bool refreshed = FALSE; 1004 1005 i = j = rxr->next_refresh; 1006 /* Control the loop with one beyond */ 1007 if (++j == que->num_desc) 1008 j = 0; 1009 1010 while (j != limit) { 1011 buf = &rxr->buffers[i]; 1012 if (rxr->hdr_split == FALSE) 1013 goto no_split; 1014 1015 if (buf->m_head == NULL) { 1016 mh = m_gethdr(M_NOWAIT, MT_DATA); 1017 if (mh == NULL) 1018 goto update; 1019 } else 1020 mh = buf->m_head; 1021 1022 mh->m_pkthdr.len = mh->m_len = MHLEN; 1023 mh->m_len = MHLEN; 1024 mh->m_flags |= M_PKTHDR; 1025 /* Get the memory mapping */ 1026 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1027 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1028 if (error != 0) { 1029 printf("Refresh mbufs: hdr dmamap load" 1030 " failure - %d\n", error); 1031 m_free(mh); 1032 buf->m_head = NULL; 1033 goto update; 1034 } 1035 buf->m_head = mh; 1036 bus_dmamap_sync(rxr->htag, buf->hmap, 1037 BUS_DMASYNC_PREREAD); 1038 rxr->base[i].read.hdr_addr = 1039 htole64(hseg[0].ds_addr); 1040 1041 no_split: 1042 if (buf->m_pack == NULL) { 1043 mp = m_getjcl(M_NOWAIT, MT_DATA, 1044 M_PKTHDR, rxr->mbuf_sz); 1045 if (mp == NULL) 1046 goto update; 1047 } else 1048 mp = buf->m_pack; 1049 1050 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1051 /* Get the memory mapping */ 1052 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1053 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1054 if (error != 0) { 1055 printf("Refresh mbufs: payload dmamap load" 1056 " failure - %d\n", error); 1057 m_free(mp); 1058 buf->m_pack = NULL; 1059 goto update; 1060 } 1061 buf->m_pack = mp; 1062 bus_dmamap_sync(rxr->ptag, buf->pmap, 1063 BUS_DMASYNC_PREREAD); 1064 rxr->base[i].read.pkt_addr = 1065 htole64(pseg[0].ds_addr); 1066 /* Used only when doing header split */ 1067 rxr->base[i].read.hdr_addr = 0; 1068 1069 refreshed = TRUE; 1070 /* Next is precalculated */ 1071 i = j; 1072 rxr->next_refresh = i; 1073 if (++j == que->num_desc) 1074 j = 0; 1075 } 1076 update: 1077 if (refreshed) /* Update hardware tail index */ 1078 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1079 return; 1080 } 1081 1082 1083 /********************************************************************* 1084 * 1085 * Allocate memory for rx_buffer structures. Since we use one 1086 * rx_buffer per descriptor, the maximum number of rx_buffer's 1087 * that we'll need is equal to the number of receive descriptors 1088 * that we've defined. 1089 * 1090 **********************************************************************/ 1091 int 1092 ixl_allocate_rx_data(struct ixl_queue *que) 1093 { 1094 struct rx_ring *rxr = &que->rxr; 1095 struct ixl_vsi *vsi = que->vsi; 1096 device_t dev = vsi->dev; 1097 struct ixl_rx_buf *buf; 1098 int i, bsize, error; 1099 1100 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1101 if (!(rxr->buffers = 1102 (struct ixl_rx_buf *) malloc(bsize, 1103 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1104 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1105 error = ENOMEM; 1106 return (error); 1107 } 1108 1109 if ((error = bus_dma_tag_create(NULL, /* parent */ 1110 1, 0, /* alignment, bounds */ 1111 BUS_SPACE_MAXADDR, /* lowaddr */ 1112 BUS_SPACE_MAXADDR, /* highaddr */ 1113 NULL, NULL, /* filter, filterarg */ 1114 MSIZE, /* maxsize */ 1115 1, /* nsegments */ 1116 MSIZE, /* maxsegsize */ 1117 0, /* flags */ 1118 NULL, /* lockfunc */ 1119 NULL, /* lockfuncarg */ 1120 &rxr->htag))) { 1121 device_printf(dev, "Unable to create RX DMA htag\n"); 1122 return (error); 1123 } 1124 1125 if ((error = bus_dma_tag_create(NULL, /* parent */ 1126 1, 0, /* alignment, bounds */ 1127 BUS_SPACE_MAXADDR, /* lowaddr */ 1128 BUS_SPACE_MAXADDR, /* highaddr */ 1129 NULL, NULL, /* filter, filterarg */ 1130 MJUM16BYTES, /* maxsize */ 1131 1, /* nsegments */ 1132 MJUM16BYTES, /* maxsegsize */ 1133 0, /* flags */ 1134 NULL, /* lockfunc */ 1135 NULL, /* lockfuncarg */ 1136 &rxr->ptag))) { 1137 device_printf(dev, "Unable to create RX DMA ptag\n"); 1138 return (error); 1139 } 1140 1141 for (i = 0; i < que->num_desc; i++) { 1142 buf = &rxr->buffers[i]; 1143 error = bus_dmamap_create(rxr->htag, 1144 BUS_DMA_NOWAIT, &buf->hmap); 1145 if (error) { 1146 device_printf(dev, "Unable to create RX head map\n"); 1147 break; 1148 } 1149 error = bus_dmamap_create(rxr->ptag, 1150 BUS_DMA_NOWAIT, &buf->pmap); 1151 if (error) { 1152 device_printf(dev, "Unable to create RX pkt map\n"); 1153 break; 1154 } 1155 } 1156 1157 return (error); 1158 } 1159 1160 1161 /********************************************************************* 1162 * 1163 * (Re)Initialize the queue receive ring and its buffers. 1164 * 1165 **********************************************************************/ 1166 int 1167 ixl_init_rx_ring(struct ixl_queue *que) 1168 { 1169 struct rx_ring *rxr = &que->rxr; 1170 struct ixl_vsi *vsi = que->vsi; 1171 #if defined(INET6) || defined(INET) 1172 struct ifnet *ifp = vsi->ifp; 1173 struct lro_ctrl *lro = &rxr->lro; 1174 #endif 1175 struct ixl_rx_buf *buf; 1176 bus_dma_segment_t pseg[1], hseg[1]; 1177 int rsize, nsegs, error = 0; 1178 #ifdef DEV_NETMAP 1179 struct netmap_adapter *na = NA(que->vsi->ifp); 1180 struct netmap_slot *slot; 1181 #endif /* DEV_NETMAP */ 1182 1183 IXL_RX_LOCK(rxr); 1184 #ifdef DEV_NETMAP 1185 /* same as in ixl_init_tx_ring() */ 1186 slot = netmap_reset(na, NR_RX, que->me, 0); 1187 #endif /* DEV_NETMAP */ 1188 /* Clear the ring contents */ 1189 rsize = roundup2(que->num_desc * 1190 sizeof(union i40e_rx_desc), DBA_ALIGN); 1191 bzero((void *)rxr->base, rsize); 1192 /* Cleanup any existing buffers */ 1193 for (int i = 0; i < que->num_desc; i++) { 1194 buf = &rxr->buffers[i]; 1195 if (buf->m_head != NULL) { 1196 bus_dmamap_sync(rxr->htag, buf->hmap, 1197 BUS_DMASYNC_POSTREAD); 1198 bus_dmamap_unload(rxr->htag, buf->hmap); 1199 buf->m_head->m_flags |= M_PKTHDR; 1200 m_freem(buf->m_head); 1201 } 1202 if (buf->m_pack != NULL) { 1203 bus_dmamap_sync(rxr->ptag, buf->pmap, 1204 BUS_DMASYNC_POSTREAD); 1205 bus_dmamap_unload(rxr->ptag, buf->pmap); 1206 buf->m_pack->m_flags |= M_PKTHDR; 1207 m_freem(buf->m_pack); 1208 } 1209 buf->m_head = NULL; 1210 buf->m_pack = NULL; 1211 } 1212 1213 /* header split is off */ 1214 rxr->hdr_split = FALSE; 1215 1216 /* Now replenish the mbufs */ 1217 for (int j = 0; j != que->num_desc; ++j) { 1218 struct mbuf *mh, *mp; 1219 1220 buf = &rxr->buffers[j]; 1221 #ifdef DEV_NETMAP 1222 /* 1223 * In netmap mode, fill the map and set the buffer 1224 * address in the NIC ring, considering the offset 1225 * between the netmap and NIC rings (see comment in 1226 * ixgbe_setup_transmit_ring() ). No need to allocate 1227 * an mbuf, so end the block with a continue; 1228 */ 1229 if (slot) { 1230 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1231 uint64_t paddr; 1232 void *addr; 1233 1234 addr = PNMB(na, slot + sj, &paddr); 1235 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1236 /* Update descriptor and the cached value */ 1237 rxr->base[j].read.pkt_addr = htole64(paddr); 1238 rxr->base[j].read.hdr_addr = 0; 1239 continue; 1240 } 1241 #endif /* DEV_NETMAP */ 1242 /* 1243 ** Don't allocate mbufs if not 1244 ** doing header split, its wasteful 1245 */ 1246 if (rxr->hdr_split == FALSE) 1247 goto skip_head; 1248 1249 /* First the header */ 1250 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1251 if (buf->m_head == NULL) { 1252 error = ENOBUFS; 1253 goto fail; 1254 } 1255 m_adj(buf->m_head, ETHER_ALIGN); 1256 mh = buf->m_head; 1257 mh->m_len = mh->m_pkthdr.len = MHLEN; 1258 mh->m_flags |= M_PKTHDR; 1259 /* Get the memory mapping */ 1260 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1261 buf->hmap, buf->m_head, hseg, 1262 &nsegs, BUS_DMA_NOWAIT); 1263 if (error != 0) /* Nothing elegant to do here */ 1264 goto fail; 1265 bus_dmamap_sync(rxr->htag, 1266 buf->hmap, BUS_DMASYNC_PREREAD); 1267 /* Update descriptor */ 1268 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1269 1270 skip_head: 1271 /* Now the payload cluster */ 1272 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1273 M_PKTHDR, rxr->mbuf_sz); 1274 if (buf->m_pack == NULL) { 1275 error = ENOBUFS; 1276 goto fail; 1277 } 1278 mp = buf->m_pack; 1279 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1280 /* Get the memory mapping */ 1281 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1282 buf->pmap, mp, pseg, 1283 &nsegs, BUS_DMA_NOWAIT); 1284 if (error != 0) 1285 goto fail; 1286 bus_dmamap_sync(rxr->ptag, 1287 buf->pmap, BUS_DMASYNC_PREREAD); 1288 /* Update descriptor */ 1289 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1290 rxr->base[j].read.hdr_addr = 0; 1291 } 1292 1293 1294 /* Setup our descriptor indices */ 1295 rxr->next_check = 0; 1296 rxr->next_refresh = 0; 1297 rxr->lro_enabled = FALSE; 1298 rxr->split = 0; 1299 rxr->bytes = 0; 1300 rxr->discard = FALSE; 1301 1302 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1303 ixl_flush(vsi->hw); 1304 1305 #if defined(INET6) || defined(INET) 1306 /* 1307 ** Now set up the LRO interface: 1308 */ 1309 if (ifp->if_capenable & IFCAP_LRO) { 1310 int err = tcp_lro_init(lro); 1311 if (err) { 1312 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1313 goto fail; 1314 } 1315 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1316 rxr->lro_enabled = TRUE; 1317 lro->ifp = vsi->ifp; 1318 } 1319 #endif 1320 1321 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1322 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1323 1324 fail: 1325 IXL_RX_UNLOCK(rxr); 1326 return (error); 1327 } 1328 1329 1330 /********************************************************************* 1331 * 1332 * Free station receive ring data structures 1333 * 1334 **********************************************************************/ 1335 void 1336 ixl_free_que_rx(struct ixl_queue *que) 1337 { 1338 struct rx_ring *rxr = &que->rxr; 1339 struct ixl_rx_buf *buf; 1340 1341 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1342 1343 /* Cleanup any existing buffers */ 1344 if (rxr->buffers != NULL) { 1345 for (int i = 0; i < que->num_desc; i++) { 1346 buf = &rxr->buffers[i]; 1347 if (buf->m_head != NULL) { 1348 bus_dmamap_sync(rxr->htag, buf->hmap, 1349 BUS_DMASYNC_POSTREAD); 1350 bus_dmamap_unload(rxr->htag, buf->hmap); 1351 buf->m_head->m_flags |= M_PKTHDR; 1352 m_freem(buf->m_head); 1353 } 1354 if (buf->m_pack != NULL) { 1355 bus_dmamap_sync(rxr->ptag, buf->pmap, 1356 BUS_DMASYNC_POSTREAD); 1357 bus_dmamap_unload(rxr->ptag, buf->pmap); 1358 buf->m_pack->m_flags |= M_PKTHDR; 1359 m_freem(buf->m_pack); 1360 } 1361 buf->m_head = NULL; 1362 buf->m_pack = NULL; 1363 if (buf->hmap != NULL) { 1364 bus_dmamap_destroy(rxr->htag, buf->hmap); 1365 buf->hmap = NULL; 1366 } 1367 if (buf->pmap != NULL) { 1368 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1369 buf->pmap = NULL; 1370 } 1371 } 1372 if (rxr->buffers != NULL) { 1373 free(rxr->buffers, M_DEVBUF); 1374 rxr->buffers = NULL; 1375 } 1376 } 1377 1378 if (rxr->htag != NULL) { 1379 bus_dma_tag_destroy(rxr->htag); 1380 rxr->htag = NULL; 1381 } 1382 if (rxr->ptag != NULL) { 1383 bus_dma_tag_destroy(rxr->ptag); 1384 rxr->ptag = NULL; 1385 } 1386 1387 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1388 return; 1389 } 1390 1391 static inline void 1392 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1393 { 1394 1395 #if defined(INET6) || defined(INET) 1396 /* 1397 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1398 * should be computed by hardware. Also it should not have VLAN tag in 1399 * ethernet header. 1400 */ 1401 if (rxr->lro_enabled && 1402 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1403 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1404 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1405 /* 1406 * Send to the stack if: 1407 ** - LRO not enabled, or 1408 ** - no LRO resources, or 1409 ** - lro enqueue fails 1410 */ 1411 if (rxr->lro.lro_cnt != 0) 1412 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1413 return; 1414 } 1415 #endif 1416 IXL_RX_UNLOCK(rxr); 1417 (*ifp->if_input)(ifp, m); 1418 IXL_RX_LOCK(rxr); 1419 } 1420 1421 1422 static inline void 1423 ixl_rx_discard(struct rx_ring *rxr, int i) 1424 { 1425 struct ixl_rx_buf *rbuf; 1426 1427 rbuf = &rxr->buffers[i]; 1428 1429 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1430 rbuf->fmp->m_flags |= M_PKTHDR; 1431 m_freem(rbuf->fmp); 1432 rbuf->fmp = NULL; 1433 } 1434 1435 /* 1436 ** With advanced descriptors the writeback 1437 ** clobbers the buffer addrs, so its easier 1438 ** to just free the existing mbufs and take 1439 ** the normal refresh path to get new buffers 1440 ** and mapping. 1441 */ 1442 if (rbuf->m_head) { 1443 m_free(rbuf->m_head); 1444 rbuf->m_head = NULL; 1445 } 1446 1447 if (rbuf->m_pack) { 1448 m_free(rbuf->m_pack); 1449 rbuf->m_pack = NULL; 1450 } 1451 1452 return; 1453 } 1454 1455 #ifdef RSS 1456 /* 1457 ** i40e_ptype_to_hash: parse the packet type 1458 ** to determine the appropriate hash. 1459 */ 1460 static inline int 1461 ixl_ptype_to_hash(u8 ptype) 1462 { 1463 struct i40e_rx_ptype_decoded decoded; 1464 u8 ex = 0; 1465 1466 decoded = decode_rx_desc_ptype(ptype); 1467 ex = decoded.outer_frag; 1468 1469 if (!decoded.known) 1470 return M_HASHTYPE_OPAQUE_HASH; 1471 1472 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1473 return M_HASHTYPE_OPAQUE_HASH; 1474 1475 /* Note: anything that gets to this point is IP */ 1476 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1477 switch (decoded.inner_prot) { 1478 case I40E_RX_PTYPE_INNER_PROT_TCP: 1479 if (ex) 1480 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1481 else 1482 return M_HASHTYPE_RSS_TCP_IPV6; 1483 case I40E_RX_PTYPE_INNER_PROT_UDP: 1484 if (ex) 1485 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1486 else 1487 return M_HASHTYPE_RSS_UDP_IPV6; 1488 default: 1489 if (ex) 1490 return M_HASHTYPE_RSS_IPV6_EX; 1491 else 1492 return M_HASHTYPE_RSS_IPV6; 1493 } 1494 } 1495 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1496 switch (decoded.inner_prot) { 1497 case I40E_RX_PTYPE_INNER_PROT_TCP: 1498 return M_HASHTYPE_RSS_TCP_IPV4; 1499 case I40E_RX_PTYPE_INNER_PROT_UDP: 1500 if (ex) 1501 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1502 else 1503 return M_HASHTYPE_RSS_UDP_IPV4; 1504 default: 1505 return M_HASHTYPE_RSS_IPV4; 1506 } 1507 } 1508 /* We should never get here!! */ 1509 return M_HASHTYPE_OPAQUE_HASH; 1510 } 1511 #endif /* RSS */ 1512 1513 /********************************************************************* 1514 * 1515 * This routine executes in interrupt context. It replenishes 1516 * the mbufs in the descriptor and sends data which has been 1517 * dma'ed into host memory to upper layer. 1518 * 1519 * We loop at most count times if count is > 0, or until done if 1520 * count < 0. 1521 * 1522 * Return TRUE for more work, FALSE for all clean. 1523 *********************************************************************/ 1524 bool 1525 ixl_rxeof(struct ixl_queue *que, int count) 1526 { 1527 struct ixl_vsi *vsi = que->vsi; 1528 struct rx_ring *rxr = &que->rxr; 1529 struct ifnet *ifp = vsi->ifp; 1530 #if defined(INET6) || defined(INET) 1531 struct lro_ctrl *lro = &rxr->lro; 1532 #endif 1533 int i, nextp, processed = 0; 1534 union i40e_rx_desc *cur; 1535 struct ixl_rx_buf *rbuf, *nbuf; 1536 1537 1538 IXL_RX_LOCK(rxr); 1539 1540 #ifdef DEV_NETMAP 1541 if (netmap_rx_irq(ifp, que->me, &count)) { 1542 IXL_RX_UNLOCK(rxr); 1543 return (FALSE); 1544 } 1545 #endif /* DEV_NETMAP */ 1546 1547 for (i = rxr->next_check; count != 0;) { 1548 struct mbuf *sendmp, *mh, *mp; 1549 u32 status, error; 1550 u16 hlen, plen, vtag; 1551 u64 qword; 1552 u8 ptype; 1553 bool eop; 1554 1555 /* Sync the ring. */ 1556 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1557 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1558 1559 cur = &rxr->base[i]; 1560 qword = le64toh(cur->wb.qword1.status_error_len); 1561 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1562 >> I40E_RXD_QW1_STATUS_SHIFT; 1563 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1564 >> I40E_RXD_QW1_ERROR_SHIFT; 1565 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1566 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1567 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1568 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1569 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1570 >> I40E_RXD_QW1_PTYPE_SHIFT; 1571 1572 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1573 ++rxr->not_done; 1574 break; 1575 } 1576 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1577 break; 1578 1579 count--; 1580 sendmp = NULL; 1581 nbuf = NULL; 1582 cur->wb.qword1.status_error_len = 0; 1583 rbuf = &rxr->buffers[i]; 1584 mh = rbuf->m_head; 1585 mp = rbuf->m_pack; 1586 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1587 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1588 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1589 else 1590 vtag = 0; 1591 1592 /* 1593 ** Make sure bad packets are discarded, 1594 ** note that only EOP descriptor has valid 1595 ** error results. 1596 */ 1597 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1598 rxr->desc_errs++; 1599 ixl_rx_discard(rxr, i); 1600 goto next_desc; 1601 } 1602 1603 /* Prefetch the next buffer */ 1604 if (!eop) { 1605 nextp = i + 1; 1606 if (nextp == que->num_desc) 1607 nextp = 0; 1608 nbuf = &rxr->buffers[nextp]; 1609 prefetch(nbuf); 1610 } 1611 1612 /* 1613 ** The header mbuf is ONLY used when header 1614 ** split is enabled, otherwise we get normal 1615 ** behavior, ie, both header and payload 1616 ** are DMA'd into the payload buffer. 1617 ** 1618 ** Rather than using the fmp/lmp global pointers 1619 ** we now keep the head of a packet chain in the 1620 ** buffer struct and pass this along from one 1621 ** descriptor to the next, until we get EOP. 1622 */ 1623 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1624 if (hlen > IXL_RX_HDR) 1625 hlen = IXL_RX_HDR; 1626 mh->m_len = hlen; 1627 mh->m_flags |= M_PKTHDR; 1628 mh->m_next = NULL; 1629 mh->m_pkthdr.len = mh->m_len; 1630 /* Null buf pointer so it is refreshed */ 1631 rbuf->m_head = NULL; 1632 /* 1633 ** Check the payload length, this 1634 ** could be zero if its a small 1635 ** packet. 1636 */ 1637 if (plen > 0) { 1638 mp->m_len = plen; 1639 mp->m_next = NULL; 1640 mp->m_flags &= ~M_PKTHDR; 1641 mh->m_next = mp; 1642 mh->m_pkthdr.len += mp->m_len; 1643 /* Null buf pointer so it is refreshed */ 1644 rbuf->m_pack = NULL; 1645 rxr->split++; 1646 } 1647 /* 1648 ** Now create the forward 1649 ** chain so when complete 1650 ** we wont have to. 1651 */ 1652 if (eop == 0) { 1653 /* stash the chain head */ 1654 nbuf->fmp = mh; 1655 /* Make forward chain */ 1656 if (plen) 1657 mp->m_next = nbuf->m_pack; 1658 else 1659 mh->m_next = nbuf->m_pack; 1660 } else { 1661 /* Singlet, prepare to send */ 1662 sendmp = mh; 1663 if (vtag) { 1664 sendmp->m_pkthdr.ether_vtag = vtag; 1665 sendmp->m_flags |= M_VLANTAG; 1666 } 1667 } 1668 } else { 1669 /* 1670 ** Either no header split, or a 1671 ** secondary piece of a fragmented 1672 ** split packet. 1673 */ 1674 mp->m_len = plen; 1675 /* 1676 ** See if there is a stored head 1677 ** that determines what we are 1678 */ 1679 sendmp = rbuf->fmp; 1680 rbuf->m_pack = rbuf->fmp = NULL; 1681 1682 if (sendmp != NULL) /* secondary frag */ 1683 sendmp->m_pkthdr.len += mp->m_len; 1684 else { 1685 /* first desc of a non-ps chain */ 1686 sendmp = mp; 1687 sendmp->m_flags |= M_PKTHDR; 1688 sendmp->m_pkthdr.len = mp->m_len; 1689 } 1690 /* Pass the head pointer on */ 1691 if (eop == 0) { 1692 nbuf->fmp = sendmp; 1693 sendmp = NULL; 1694 mp->m_next = nbuf->m_pack; 1695 } 1696 } 1697 ++processed; 1698 /* Sending this frame? */ 1699 if (eop) { 1700 sendmp->m_pkthdr.rcvif = ifp; 1701 /* gather stats */ 1702 rxr->rx_packets++; 1703 rxr->rx_bytes += sendmp->m_pkthdr.len; 1704 /* capture data for dynamic ITR adjustment */ 1705 rxr->packets++; 1706 rxr->bytes += sendmp->m_pkthdr.len; 1707 /* Set VLAN tag (field only valid in eop desc) */ 1708 if (vtag) { 1709 sendmp->m_pkthdr.ether_vtag = vtag; 1710 sendmp->m_flags |= M_VLANTAG; 1711 } 1712 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1713 ixl_rx_checksum(sendmp, status, error, ptype); 1714 #ifdef RSS 1715 sendmp->m_pkthdr.flowid = 1716 le32toh(cur->wb.qword0.hi_dword.rss); 1717 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1718 #else 1719 sendmp->m_pkthdr.flowid = que->msix; 1720 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1721 #endif 1722 } 1723 next_desc: 1724 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1725 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1726 1727 /* Advance our pointers to the next descriptor. */ 1728 if (++i == que->num_desc) 1729 i = 0; 1730 1731 /* Now send to the stack or do LRO */ 1732 if (sendmp != NULL) { 1733 rxr->next_check = i; 1734 ixl_rx_input(rxr, ifp, sendmp, ptype); 1735 i = rxr->next_check; 1736 } 1737 1738 /* Every 8 descriptors we go to refresh mbufs */ 1739 if (processed == 8) { 1740 ixl_refresh_mbufs(que, i); 1741 processed = 0; 1742 } 1743 } 1744 1745 /* Refresh any remaining buf structs */ 1746 if (ixl_rx_unrefreshed(que)) 1747 ixl_refresh_mbufs(que, i); 1748 1749 rxr->next_check = i; 1750 1751 #if defined(INET6) || defined(INET) 1752 /* 1753 * Flush any outstanding LRO work 1754 */ 1755 tcp_lro_flush_all(lro); 1756 #endif 1757 1758 IXL_RX_UNLOCK(rxr); 1759 return (FALSE); 1760 } 1761 1762 1763 /********************************************************************* 1764 * 1765 * Verify that the hardware indicated that the checksum is valid. 1766 * Inform the stack about the status of checksum so that stack 1767 * doesn't spend time verifying the checksum. 1768 * 1769 *********************************************************************/ 1770 static void 1771 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1772 { 1773 struct i40e_rx_ptype_decoded decoded; 1774 1775 decoded = decode_rx_desc_ptype(ptype); 1776 1777 /* Errors? */ 1778 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1779 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1780 mp->m_pkthdr.csum_flags = 0; 1781 return; 1782 } 1783 1784 /* IPv6 with extension headers likely have bad csum */ 1785 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1786 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1787 if (status & 1788 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1789 mp->m_pkthdr.csum_flags = 0; 1790 return; 1791 } 1792 1793 1794 /* IP Checksum Good */ 1795 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1796 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1797 1798 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1799 mp->m_pkthdr.csum_flags |= 1800 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1801 mp->m_pkthdr.csum_data |= htons(0xffff); 1802 } 1803 return; 1804 } 1805 1806 #if __FreeBSD_version >= 1100000 1807 uint64_t 1808 ixl_get_counter(if_t ifp, ift_counter cnt) 1809 { 1810 struct ixl_vsi *vsi; 1811 1812 vsi = if_getsoftc(ifp); 1813 1814 switch (cnt) { 1815 case IFCOUNTER_IPACKETS: 1816 return (vsi->ipackets); 1817 case IFCOUNTER_IERRORS: 1818 return (vsi->ierrors); 1819 case IFCOUNTER_OPACKETS: 1820 return (vsi->opackets); 1821 case IFCOUNTER_OERRORS: 1822 return (vsi->oerrors); 1823 case IFCOUNTER_COLLISIONS: 1824 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1825 return (0); 1826 case IFCOUNTER_IBYTES: 1827 return (vsi->ibytes); 1828 case IFCOUNTER_OBYTES: 1829 return (vsi->obytes); 1830 case IFCOUNTER_IMCASTS: 1831 return (vsi->imcasts); 1832 case IFCOUNTER_OMCASTS: 1833 return (vsi->omcasts); 1834 case IFCOUNTER_IQDROPS: 1835 return (vsi->iqdrops); 1836 case IFCOUNTER_OQDROPS: 1837 return (vsi->oqdrops); 1838 case IFCOUNTER_NOPROTO: 1839 return (vsi->noproto); 1840 default: 1841 return (if_get_counter_default(ifp, cnt)); 1842 } 1843 } 1844 #endif 1845 1846