1 /****************************************************************************** 2 3 Copyright (c) 2013-2014, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the BASE and the VF drivers. 39 */ 40 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 #include "opt_rss.h" 44 #include "ixl.h" 45 46 #ifdef RSS 47 #include <net/rss_config.h> 48 #endif 49 50 /* Local Prototypes */ 51 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 52 static void ixl_refresh_mbufs(struct ixl_queue *, int); 53 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 54 static int ixl_tx_setup_offload(struct ixl_queue *, 55 struct mbuf *, u32 *, u32 *); 56 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 57 58 static __inline void ixl_rx_discard(struct rx_ring *, int); 59 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 60 struct mbuf *, u8); 61 62 /* 63 ** Multiqueue Transmit driver 64 ** 65 */ 66 int 67 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 68 { 69 struct ixl_vsi *vsi = ifp->if_softc; 70 struct ixl_queue *que; 71 struct tx_ring *txr; 72 int err, i; 73 #ifdef RSS 74 u32 bucket_id; 75 #endif 76 77 /* 78 ** Which queue to use: 79 ** 80 ** When doing RSS, map it to the same outbound 81 ** queue as the incoming flow would be mapped to. 82 ** If everything is setup correctly, it should be 83 ** the same bucket that the current CPU we're on is. 84 */ 85 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 86 #ifdef RSS 87 if (rss_hash2bucket(m->m_pkthdr.flowid, 88 M_HASHTYPE_GET(m), &bucket_id) == 0) { 89 i = bucket_id % vsi->num_queues; 90 } else 91 #endif 92 i = m->m_pkthdr.flowid % vsi->num_queues; 93 } else 94 i = curcpu % vsi->num_queues; 95 /* 96 ** This may not be perfect, but until something 97 ** better comes along it will keep from scheduling 98 ** on stalled queues. 99 */ 100 if (((1 << i) & vsi->active_queues) == 0) 101 i = ffsl(vsi->active_queues); 102 103 que = &vsi->queues[i]; 104 txr = &que->txr; 105 106 err = drbr_enqueue(ifp, txr->br, m); 107 if (err) 108 return(err); 109 if (IXL_TX_TRYLOCK(txr)) { 110 ixl_mq_start_locked(ifp, txr); 111 IXL_TX_UNLOCK(txr); 112 } else 113 taskqueue_enqueue(que->tq, &que->tx_task); 114 115 return (0); 116 } 117 118 int 119 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 120 { 121 struct ixl_queue *que = txr->que; 122 struct ixl_vsi *vsi = que->vsi; 123 struct mbuf *next; 124 int err = 0; 125 126 127 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 128 vsi->link_active == 0) 129 return (ENETDOWN); 130 131 /* Process the transmit queue */ 132 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 133 if ((err = ixl_xmit(que, &next)) != 0) { 134 if (next == NULL) 135 drbr_advance(ifp, txr->br); 136 else 137 drbr_putback(ifp, txr->br, next); 138 break; 139 } 140 drbr_advance(ifp, txr->br); 141 /* Send a copy of the frame to the BPF listener */ 142 ETHER_BPF_MTAP(ifp, next); 143 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 144 break; 145 } 146 147 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 148 ixl_txeof(que); 149 150 return (err); 151 } 152 153 /* 154 * Called from a taskqueue to drain queued transmit packets. 155 */ 156 void 157 ixl_deferred_mq_start(void *arg, int pending) 158 { 159 struct ixl_queue *que = arg; 160 struct tx_ring *txr = &que->txr; 161 struct ixl_vsi *vsi = que->vsi; 162 struct ifnet *ifp = vsi->ifp; 163 164 IXL_TX_LOCK(txr); 165 if (!drbr_empty(ifp, txr->br)) 166 ixl_mq_start_locked(ifp, txr); 167 IXL_TX_UNLOCK(txr); 168 } 169 170 /* 171 ** Flush all queue ring buffers 172 */ 173 void 174 ixl_qflush(struct ifnet *ifp) 175 { 176 struct ixl_vsi *vsi = ifp->if_softc; 177 178 for (int i = 0; i < vsi->num_queues; i++) { 179 struct ixl_queue *que = &vsi->queues[i]; 180 struct tx_ring *txr = &que->txr; 181 struct mbuf *m; 182 IXL_TX_LOCK(txr); 183 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 184 m_freem(m); 185 IXL_TX_UNLOCK(txr); 186 } 187 if_qflush(ifp); 188 } 189 190 /* 191 ** Find mbuf chains passed to the driver 192 ** that are 'sparse', using more than 8 193 ** mbufs to deliver an mss-size chunk of data 194 */ 195 static inline bool 196 ixl_tso_detect_sparse(struct mbuf *mp) 197 { 198 struct mbuf *m; 199 int num = 0, mss; 200 bool ret = FALSE; 201 202 mss = mp->m_pkthdr.tso_segsz; 203 for (m = mp->m_next; m != NULL; m = m->m_next) { 204 num++; 205 mss -= m->m_len; 206 if (mss < 1) 207 break; 208 if (m->m_next == NULL) 209 break; 210 } 211 if (num > IXL_SPARSE_CHAIN) 212 ret = TRUE; 213 214 return (ret); 215 } 216 217 218 /********************************************************************* 219 * 220 * This routine maps the mbufs to tx descriptors, allowing the 221 * TX engine to transmit the packets. 222 * - return 0 on success, positive on failure 223 * 224 **********************************************************************/ 225 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 226 227 static int 228 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 229 { 230 struct ixl_vsi *vsi = que->vsi; 231 struct i40e_hw *hw = vsi->hw; 232 struct tx_ring *txr = &que->txr; 233 struct ixl_tx_buf *buf; 234 struct i40e_tx_desc *txd = NULL; 235 struct mbuf *m_head, *m; 236 int i, j, error, nsegs, maxsegs; 237 int first, last = 0; 238 u16 vtag = 0; 239 u32 cmd, off; 240 bus_dmamap_t map; 241 bus_dma_tag_t tag; 242 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 243 244 245 cmd = off = 0; 246 m_head = *m_headp; 247 248 /* 249 * Important to capture the first descriptor 250 * used because it will contain the index of 251 * the one we tell the hardware to report back 252 */ 253 first = txr->next_avail; 254 buf = &txr->buffers[first]; 255 map = buf->map; 256 tag = txr->tx_tag; 257 maxsegs = IXL_MAX_TX_SEGS; 258 259 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 260 /* Use larger mapping for TSO */ 261 tag = txr->tso_tag; 262 maxsegs = IXL_MAX_TSO_SEGS; 263 if (ixl_tso_detect_sparse(m_head)) { 264 m = m_defrag(m_head, M_NOWAIT); 265 if (m == NULL) { 266 m_freem(*m_headp); 267 *m_headp = NULL; 268 return (ENOBUFS); 269 } 270 *m_headp = m; 271 } 272 } 273 274 /* 275 * Map the packet for DMA. 276 */ 277 error = bus_dmamap_load_mbuf_sg(tag, map, 278 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 279 280 if (error == EFBIG) { 281 struct mbuf *m; 282 283 m = m_collapse(*m_headp, M_NOWAIT, maxsegs); 284 if (m == NULL) { 285 que->mbuf_defrag_failed++; 286 m_freem(*m_headp); 287 *m_headp = NULL; 288 return (ENOBUFS); 289 } 290 *m_headp = m; 291 292 /* Try it again */ 293 error = bus_dmamap_load_mbuf_sg(tag, map, 294 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 295 296 if (error == ENOMEM) { 297 que->tx_dma_setup++; 298 return (error); 299 } else if (error != 0) { 300 que->tx_dma_setup++; 301 m_freem(*m_headp); 302 *m_headp = NULL; 303 return (error); 304 } 305 } else if (error == ENOMEM) { 306 que->tx_dma_setup++; 307 return (error); 308 } else if (error != 0) { 309 que->tx_dma_setup++; 310 m_freem(*m_headp); 311 *m_headp = NULL; 312 return (error); 313 } 314 315 /* Make certain there are enough descriptors */ 316 if (nsegs > txr->avail - 2) { 317 txr->no_desc++; 318 error = ENOBUFS; 319 goto xmit_fail; 320 } 321 m_head = *m_headp; 322 323 /* Set up the TSO/CSUM offload */ 324 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 325 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 326 if (error) 327 goto xmit_fail; 328 } 329 330 cmd |= I40E_TX_DESC_CMD_ICRC; 331 /* Grab the VLAN tag */ 332 if (m_head->m_flags & M_VLANTAG) { 333 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 334 vtag = htole16(m_head->m_pkthdr.ether_vtag); 335 } 336 337 i = txr->next_avail; 338 for (j = 0; j < nsegs; j++) { 339 bus_size_t seglen; 340 341 buf = &txr->buffers[i]; 342 buf->tag = tag; /* Keep track of the type tag */ 343 txd = &txr->base[i]; 344 seglen = segs[j].ds_len; 345 346 txd->buffer_addr = htole64(segs[j].ds_addr); 347 txd->cmd_type_offset_bsz = 348 htole64(I40E_TX_DESC_DTYPE_DATA 349 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 350 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 351 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 352 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 353 354 last = i; /* descriptor that will get completion IRQ */ 355 356 if (++i == que->num_desc) 357 i = 0; 358 359 buf->m_head = NULL; 360 buf->eop_index = -1; 361 } 362 /* Set the last descriptor for report */ 363 txd->cmd_type_offset_bsz |= 364 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 365 txr->avail -= nsegs; 366 txr->next_avail = i; 367 368 buf->m_head = m_head; 369 /* Swap the dma map between the first and last descriptor */ 370 txr->buffers[first].map = buf->map; 371 buf->map = map; 372 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 373 374 /* Set the index of the descriptor that will be marked done */ 375 buf = &txr->buffers[first]; 376 buf->eop_index = last; 377 378 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 379 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 380 /* 381 * Advance the Transmit Descriptor Tail (Tdt), this tells the 382 * hardware that this frame is available to transmit. 383 */ 384 ++txr->total_packets; 385 wr32(hw, txr->tail, i); 386 387 ixl_flush(hw); 388 /* Mark outstanding work */ 389 if (que->busy == 0) 390 que->busy = 1; 391 return (0); 392 393 xmit_fail: 394 bus_dmamap_unload(tag, buf->map); 395 return (error); 396 } 397 398 399 /********************************************************************* 400 * 401 * Allocate memory for tx_buffer structures. The tx_buffer stores all 402 * the information needed to transmit a packet on the wire. This is 403 * called only once at attach, setup is done every reset. 404 * 405 **********************************************************************/ 406 int 407 ixl_allocate_tx_data(struct ixl_queue *que) 408 { 409 struct tx_ring *txr = &que->txr; 410 struct ixl_vsi *vsi = que->vsi; 411 device_t dev = vsi->dev; 412 struct ixl_tx_buf *buf; 413 int error = 0; 414 415 /* 416 * Setup DMA descriptor areas. 417 */ 418 if ((error = bus_dma_tag_create(NULL, /* parent */ 419 1, 0, /* alignment, bounds */ 420 BUS_SPACE_MAXADDR, /* lowaddr */ 421 BUS_SPACE_MAXADDR, /* highaddr */ 422 NULL, NULL, /* filter, filterarg */ 423 IXL_TSO_SIZE, /* maxsize */ 424 IXL_MAX_TX_SEGS, /* nsegments */ 425 PAGE_SIZE, /* maxsegsize */ 426 0, /* flags */ 427 NULL, /* lockfunc */ 428 NULL, /* lockfuncarg */ 429 &txr->tx_tag))) { 430 device_printf(dev,"Unable to allocate TX DMA tag\n"); 431 goto fail; 432 } 433 434 /* Make a special tag for TSO */ 435 if ((error = bus_dma_tag_create(NULL, /* parent */ 436 1, 0, /* alignment, bounds */ 437 BUS_SPACE_MAXADDR, /* lowaddr */ 438 BUS_SPACE_MAXADDR, /* highaddr */ 439 NULL, NULL, /* filter, filterarg */ 440 IXL_TSO_SIZE, /* maxsize */ 441 IXL_MAX_TSO_SEGS, /* nsegments */ 442 PAGE_SIZE, /* maxsegsize */ 443 0, /* flags */ 444 NULL, /* lockfunc */ 445 NULL, /* lockfuncarg */ 446 &txr->tso_tag))) { 447 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 448 goto fail; 449 } 450 451 if (!(txr->buffers = 452 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 453 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 454 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 455 error = ENOMEM; 456 goto fail; 457 } 458 459 /* Create the descriptor buffer default dma maps */ 460 buf = txr->buffers; 461 for (int i = 0; i < que->num_desc; i++, buf++) { 462 buf->tag = txr->tx_tag; 463 error = bus_dmamap_create(buf->tag, 0, &buf->map); 464 if (error != 0) { 465 device_printf(dev, "Unable to create TX DMA map\n"); 466 goto fail; 467 } 468 } 469 fail: 470 return (error); 471 } 472 473 474 /********************************************************************* 475 * 476 * (Re)Initialize a queue transmit ring. 477 * - called by init, it clears the descriptor ring, 478 * and frees any stale mbufs 479 * 480 **********************************************************************/ 481 void 482 ixl_init_tx_ring(struct ixl_queue *que) 483 { 484 struct tx_ring *txr = &que->txr; 485 struct ixl_tx_buf *buf; 486 487 /* Clear the old ring contents */ 488 IXL_TX_LOCK(txr); 489 bzero((void *)txr->base, 490 (sizeof(struct i40e_tx_desc)) * que->num_desc); 491 492 /* Reset indices */ 493 txr->next_avail = 0; 494 txr->next_to_clean = 0; 495 496 #ifdef IXL_FDIR 497 /* Initialize flow director */ 498 txr->atr_rate = ixl_atr_rate; 499 txr->atr_count = 0; 500 #endif 501 502 /* Free any existing tx mbufs. */ 503 buf = txr->buffers; 504 for (int i = 0; i < que->num_desc; i++, buf++) { 505 if (buf->m_head != NULL) { 506 bus_dmamap_sync(buf->tag, buf->map, 507 BUS_DMASYNC_POSTWRITE); 508 bus_dmamap_unload(buf->tag, buf->map); 509 m_freem(buf->m_head); 510 buf->m_head = NULL; 511 } 512 /* Clear the EOP index */ 513 buf->eop_index = -1; 514 } 515 516 /* Set number of descriptors available */ 517 txr->avail = que->num_desc; 518 519 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 520 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 521 IXL_TX_UNLOCK(txr); 522 } 523 524 525 /********************************************************************* 526 * 527 * Free transmit ring related data structures. 528 * 529 **********************************************************************/ 530 void 531 ixl_free_que_tx(struct ixl_queue *que) 532 { 533 struct tx_ring *txr = &que->txr; 534 struct ixl_tx_buf *buf; 535 536 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 537 538 for (int i = 0; i < que->num_desc; i++) { 539 buf = &txr->buffers[i]; 540 if (buf->m_head != NULL) { 541 bus_dmamap_sync(buf->tag, buf->map, 542 BUS_DMASYNC_POSTWRITE); 543 bus_dmamap_unload(buf->tag, 544 buf->map); 545 m_freem(buf->m_head); 546 buf->m_head = NULL; 547 if (buf->map != NULL) { 548 bus_dmamap_destroy(buf->tag, 549 buf->map); 550 buf->map = NULL; 551 } 552 } else if (buf->map != NULL) { 553 bus_dmamap_unload(buf->tag, 554 buf->map); 555 bus_dmamap_destroy(buf->tag, 556 buf->map); 557 buf->map = NULL; 558 } 559 } 560 if (txr->br != NULL) 561 buf_ring_free(txr->br, M_DEVBUF); 562 if (txr->buffers != NULL) { 563 free(txr->buffers, M_DEVBUF); 564 txr->buffers = NULL; 565 } 566 if (txr->tx_tag != NULL) { 567 bus_dma_tag_destroy(txr->tx_tag); 568 txr->tx_tag = NULL; 569 } 570 if (txr->tso_tag != NULL) { 571 bus_dma_tag_destroy(txr->tso_tag); 572 txr->tso_tag = NULL; 573 } 574 575 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 576 return; 577 } 578 579 /********************************************************************* 580 * 581 * Setup descriptor for hw offloads 582 * 583 **********************************************************************/ 584 585 static int 586 ixl_tx_setup_offload(struct ixl_queue *que, 587 struct mbuf *mp, u32 *cmd, u32 *off) 588 { 589 struct ether_vlan_header *eh; 590 #ifdef INET 591 struct ip *ip = NULL; 592 #endif 593 struct tcphdr *th = NULL; 594 #ifdef INET6 595 struct ip6_hdr *ip6; 596 #endif 597 int elen, ip_hlen = 0, tcp_hlen; 598 u16 etype; 599 u8 ipproto = 0; 600 bool tso = FALSE; 601 602 603 /* Set up the TSO context descriptor if required */ 604 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 605 tso = ixl_tso_setup(que, mp); 606 if (tso) 607 ++que->tso; 608 else 609 return (ENXIO); 610 } 611 612 /* 613 * Determine where frame payload starts. 614 * Jump over vlan headers if already present, 615 * helpful for QinQ too. 616 */ 617 eh = mtod(mp, struct ether_vlan_header *); 618 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 619 etype = ntohs(eh->evl_proto); 620 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 621 } else { 622 etype = ntohs(eh->evl_encap_proto); 623 elen = ETHER_HDR_LEN; 624 } 625 626 switch (etype) { 627 #ifdef INET 628 case ETHERTYPE_IP: 629 ip = (struct ip *)(mp->m_data + elen); 630 ip_hlen = ip->ip_hl << 2; 631 ipproto = ip->ip_p; 632 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 633 /* The IP checksum must be recalculated with TSO */ 634 if (tso) 635 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 636 else 637 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 638 break; 639 #endif 640 #ifdef INET6 641 case ETHERTYPE_IPV6: 642 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 643 ip_hlen = sizeof(struct ip6_hdr); 644 ipproto = ip6->ip6_nxt; 645 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 646 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 647 break; 648 #endif 649 default: 650 break; 651 } 652 653 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 654 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 655 656 switch (ipproto) { 657 case IPPROTO_TCP: 658 tcp_hlen = th->th_off << 2; 659 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 660 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 661 *off |= (tcp_hlen >> 2) << 662 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 663 } 664 #ifdef IXL_FDIR 665 ixl_atr(que, th, etype); 666 #endif 667 break; 668 case IPPROTO_UDP: 669 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 670 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 671 *off |= (sizeof(struct udphdr) >> 2) << 672 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 673 } 674 break; 675 676 case IPPROTO_SCTP: 677 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 678 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 679 *off |= (sizeof(struct sctphdr) >> 2) << 680 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 681 } 682 /* Fall Thru */ 683 default: 684 break; 685 } 686 687 return (0); 688 } 689 690 691 /********************************************************************** 692 * 693 * Setup context for hardware segmentation offload (TSO) 694 * 695 **********************************************************************/ 696 static bool 697 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 698 { 699 struct tx_ring *txr = &que->txr; 700 struct i40e_tx_context_desc *TXD; 701 struct ixl_tx_buf *buf; 702 u32 cmd, mss, type, tsolen; 703 u16 etype; 704 int idx, elen, ip_hlen, tcp_hlen; 705 struct ether_vlan_header *eh; 706 #ifdef INET 707 struct ip *ip; 708 #endif 709 #ifdef INET6 710 struct ip6_hdr *ip6; 711 #endif 712 #if defined(INET6) || defined(INET) 713 struct tcphdr *th; 714 #endif 715 u64 type_cmd_tso_mss; 716 717 /* 718 * Determine where frame payload starts. 719 * Jump over vlan headers if already present 720 */ 721 eh = mtod(mp, struct ether_vlan_header *); 722 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 723 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 724 etype = eh->evl_proto; 725 } else { 726 elen = ETHER_HDR_LEN; 727 etype = eh->evl_encap_proto; 728 } 729 730 switch (ntohs(etype)) { 731 #ifdef INET6 732 case ETHERTYPE_IPV6: 733 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 734 if (ip6->ip6_nxt != IPPROTO_TCP) 735 return (ENXIO); 736 ip_hlen = sizeof(struct ip6_hdr); 737 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 738 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 739 tcp_hlen = th->th_off << 2; 740 break; 741 #endif 742 #ifdef INET 743 case ETHERTYPE_IP: 744 ip = (struct ip *)(mp->m_data + elen); 745 if (ip->ip_p != IPPROTO_TCP) 746 return (ENXIO); 747 ip->ip_sum = 0; 748 ip_hlen = ip->ip_hl << 2; 749 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 750 th->th_sum = in_pseudo(ip->ip_src.s_addr, 751 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 752 tcp_hlen = th->th_off << 2; 753 break; 754 #endif 755 default: 756 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 757 __func__, ntohs(etype)); 758 return FALSE; 759 } 760 761 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 762 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 763 return FALSE; 764 765 idx = txr->next_avail; 766 buf = &txr->buffers[idx]; 767 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 768 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 769 770 type = I40E_TX_DESC_DTYPE_CONTEXT; 771 cmd = I40E_TX_CTX_DESC_TSO; 772 mss = mp->m_pkthdr.tso_segsz; 773 774 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 775 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 776 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 777 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 778 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 779 780 TXD->tunneling_params = htole32(0); 781 buf->m_head = NULL; 782 buf->eop_index = -1; 783 784 if (++idx == que->num_desc) 785 idx = 0; 786 787 txr->avail--; 788 txr->next_avail = idx; 789 790 return TRUE; 791 } 792 793 /* 794 ** ixl_get_tx_head - Retrieve the value from the 795 ** location the HW records its HEAD index 796 */ 797 static inline u32 798 ixl_get_tx_head(struct ixl_queue *que) 799 { 800 struct tx_ring *txr = &que->txr; 801 void *head = &txr->base[que->num_desc]; 802 return LE32_TO_CPU(*(volatile __le32 *)head); 803 } 804 805 /********************************************************************** 806 * 807 * Examine each tx_buffer in the used queue. If the hardware is done 808 * processing the packet then free associated resources. The 809 * tx_buffer is put back on the free queue. 810 * 811 **********************************************************************/ 812 bool 813 ixl_txeof(struct ixl_queue *que) 814 { 815 struct tx_ring *txr = &que->txr; 816 u32 first, last, head, done, processed; 817 struct ixl_tx_buf *buf; 818 struct i40e_tx_desc *tx_desc, *eop_desc; 819 820 821 mtx_assert(&txr->mtx, MA_OWNED); 822 823 824 /* These are not the descriptors you seek, move along :) */ 825 if (txr->avail == que->num_desc) { 826 que->busy = 0; 827 return FALSE; 828 } 829 830 processed = 0; 831 first = txr->next_to_clean; 832 buf = &txr->buffers[first]; 833 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 834 last = buf->eop_index; 835 if (last == -1) 836 return FALSE; 837 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 838 839 /* Get the Head WB value */ 840 head = ixl_get_tx_head(que); 841 842 /* 843 ** Get the index of the first descriptor 844 ** BEYOND the EOP and call that 'done'. 845 ** I do this so the comparison in the 846 ** inner while loop below can be simple 847 */ 848 if (++last == que->num_desc) last = 0; 849 done = last; 850 851 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 852 BUS_DMASYNC_POSTREAD); 853 /* 854 ** The HEAD index of the ring is written in a 855 ** defined location, this rather than a done bit 856 ** is what is used to keep track of what must be 857 ** 'cleaned'. 858 */ 859 while (first != head) { 860 /* We clean the range of the packet */ 861 while (first != done) { 862 ++txr->avail; 863 ++processed; 864 865 if (buf->m_head) { 866 txr->bytes += /* for ITR adjustment */ 867 buf->m_head->m_pkthdr.len; 868 txr->tx_bytes += /* for TX stats */ 869 buf->m_head->m_pkthdr.len; 870 bus_dmamap_sync(buf->tag, 871 buf->map, 872 BUS_DMASYNC_POSTWRITE); 873 bus_dmamap_unload(buf->tag, 874 buf->map); 875 m_freem(buf->m_head); 876 buf->m_head = NULL; 877 buf->map = NULL; 878 } 879 buf->eop_index = -1; 880 881 if (++first == que->num_desc) 882 first = 0; 883 884 buf = &txr->buffers[first]; 885 tx_desc = &txr->base[first]; 886 } 887 ++txr->packets; 888 /* See if there is more work now */ 889 last = buf->eop_index; 890 if (last != -1) { 891 eop_desc = &txr->base[last]; 892 /* Get next done point */ 893 if (++last == que->num_desc) last = 0; 894 done = last; 895 } else 896 break; 897 } 898 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 899 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 900 901 txr->next_to_clean = first; 902 903 904 /* 905 ** Hang detection, we know there's 906 ** work outstanding or the first return 907 ** would have been taken, so indicate an 908 ** unsuccessful pass, in local_timer if 909 ** the value is too great the queue will 910 ** be considered hung. If anything has been 911 ** cleaned then reset the state. 912 */ 913 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 914 ++que->busy; 915 916 if (processed) 917 que->busy = 1; /* Note this turns off HUNG */ 918 919 /* 920 * If there are no pending descriptors, clear the timeout. 921 */ 922 if (txr->avail == que->num_desc) { 923 que->busy = 0; 924 return FALSE; 925 } 926 927 return TRUE; 928 } 929 930 /********************************************************************* 931 * 932 * Refresh mbuf buffers for RX descriptor rings 933 * - now keeps its own state so discards due to resource 934 * exhaustion are unnecessary, if an mbuf cannot be obtained 935 * it just returns, keeping its placeholder, thus it can simply 936 * be recalled to try again. 937 * 938 **********************************************************************/ 939 static void 940 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 941 { 942 struct ixl_vsi *vsi = que->vsi; 943 struct rx_ring *rxr = &que->rxr; 944 bus_dma_segment_t hseg[1]; 945 bus_dma_segment_t pseg[1]; 946 struct ixl_rx_buf *buf; 947 struct mbuf *mh, *mp; 948 int i, j, nsegs, error; 949 bool refreshed = FALSE; 950 951 i = j = rxr->next_refresh; 952 /* Control the loop with one beyond */ 953 if (++j == que->num_desc) 954 j = 0; 955 956 while (j != limit) { 957 buf = &rxr->buffers[i]; 958 if (rxr->hdr_split == FALSE) 959 goto no_split; 960 961 if (buf->m_head == NULL) { 962 mh = m_gethdr(M_NOWAIT, MT_DATA); 963 if (mh == NULL) 964 goto update; 965 } else 966 mh = buf->m_head; 967 968 mh->m_pkthdr.len = mh->m_len = MHLEN; 969 mh->m_len = MHLEN; 970 mh->m_flags |= M_PKTHDR; 971 /* Get the memory mapping */ 972 error = bus_dmamap_load_mbuf_sg(rxr->htag, 973 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 974 if (error != 0) { 975 printf("Refresh mbufs: hdr dmamap load" 976 " failure - %d\n", error); 977 m_free(mh); 978 buf->m_head = NULL; 979 goto update; 980 } 981 buf->m_head = mh; 982 bus_dmamap_sync(rxr->htag, buf->hmap, 983 BUS_DMASYNC_PREREAD); 984 rxr->base[i].read.hdr_addr = 985 htole64(hseg[0].ds_addr); 986 987 no_split: 988 if (buf->m_pack == NULL) { 989 mp = m_getjcl(M_NOWAIT, MT_DATA, 990 M_PKTHDR, rxr->mbuf_sz); 991 if (mp == NULL) 992 goto update; 993 } else 994 mp = buf->m_pack; 995 996 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 997 /* Get the memory mapping */ 998 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 999 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1000 if (error != 0) { 1001 printf("Refresh mbufs: payload dmamap load" 1002 " failure - %d\n", error); 1003 m_free(mp); 1004 buf->m_pack = NULL; 1005 goto update; 1006 } 1007 buf->m_pack = mp; 1008 bus_dmamap_sync(rxr->ptag, buf->pmap, 1009 BUS_DMASYNC_PREREAD); 1010 rxr->base[i].read.pkt_addr = 1011 htole64(pseg[0].ds_addr); 1012 /* Used only when doing header split */ 1013 rxr->base[i].read.hdr_addr = 0; 1014 1015 refreshed = TRUE; 1016 /* Next is precalculated */ 1017 i = j; 1018 rxr->next_refresh = i; 1019 if (++j == que->num_desc) 1020 j = 0; 1021 } 1022 update: 1023 if (refreshed) /* Update hardware tail index */ 1024 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1025 return; 1026 } 1027 1028 1029 /********************************************************************* 1030 * 1031 * Allocate memory for rx_buffer structures. Since we use one 1032 * rx_buffer per descriptor, the maximum number of rx_buffer's 1033 * that we'll need is equal to the number of receive descriptors 1034 * that we've defined. 1035 * 1036 **********************************************************************/ 1037 int 1038 ixl_allocate_rx_data(struct ixl_queue *que) 1039 { 1040 struct rx_ring *rxr = &que->rxr; 1041 struct ixl_vsi *vsi = que->vsi; 1042 device_t dev = vsi->dev; 1043 struct ixl_rx_buf *buf; 1044 int i, bsize, error; 1045 1046 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1047 if (!(rxr->buffers = 1048 (struct ixl_rx_buf *) malloc(bsize, 1049 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1050 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1051 error = ENOMEM; 1052 return (error); 1053 } 1054 1055 if ((error = bus_dma_tag_create(NULL, /* parent */ 1056 1, 0, /* alignment, bounds */ 1057 BUS_SPACE_MAXADDR, /* lowaddr */ 1058 BUS_SPACE_MAXADDR, /* highaddr */ 1059 NULL, NULL, /* filter, filterarg */ 1060 MSIZE, /* maxsize */ 1061 1, /* nsegments */ 1062 MSIZE, /* maxsegsize */ 1063 0, /* flags */ 1064 NULL, /* lockfunc */ 1065 NULL, /* lockfuncarg */ 1066 &rxr->htag))) { 1067 device_printf(dev, "Unable to create RX DMA htag\n"); 1068 return (error); 1069 } 1070 1071 if ((error = bus_dma_tag_create(NULL, /* parent */ 1072 1, 0, /* alignment, bounds */ 1073 BUS_SPACE_MAXADDR, /* lowaddr */ 1074 BUS_SPACE_MAXADDR, /* highaddr */ 1075 NULL, NULL, /* filter, filterarg */ 1076 MJUM16BYTES, /* maxsize */ 1077 1, /* nsegments */ 1078 MJUM16BYTES, /* maxsegsize */ 1079 0, /* flags */ 1080 NULL, /* lockfunc */ 1081 NULL, /* lockfuncarg */ 1082 &rxr->ptag))) { 1083 device_printf(dev, "Unable to create RX DMA ptag\n"); 1084 return (error); 1085 } 1086 1087 for (i = 0; i < que->num_desc; i++) { 1088 buf = &rxr->buffers[i]; 1089 error = bus_dmamap_create(rxr->htag, 1090 BUS_DMA_NOWAIT, &buf->hmap); 1091 if (error) { 1092 device_printf(dev, "Unable to create RX head map\n"); 1093 break; 1094 } 1095 error = bus_dmamap_create(rxr->ptag, 1096 BUS_DMA_NOWAIT, &buf->pmap); 1097 if (error) { 1098 device_printf(dev, "Unable to create RX pkt map\n"); 1099 break; 1100 } 1101 } 1102 1103 return (error); 1104 } 1105 1106 1107 /********************************************************************* 1108 * 1109 * (Re)Initialize the queue receive ring and its buffers. 1110 * 1111 **********************************************************************/ 1112 int 1113 ixl_init_rx_ring(struct ixl_queue *que) 1114 { 1115 struct rx_ring *rxr = &que->rxr; 1116 struct ixl_vsi *vsi = que->vsi; 1117 #if defined(INET6) || defined(INET) 1118 struct ifnet *ifp = vsi->ifp; 1119 struct lro_ctrl *lro = &rxr->lro; 1120 #endif 1121 struct ixl_rx_buf *buf; 1122 bus_dma_segment_t pseg[1], hseg[1]; 1123 int rsize, nsegs, error = 0; 1124 1125 IXL_RX_LOCK(rxr); 1126 /* Clear the ring contents */ 1127 rsize = roundup2(que->num_desc * 1128 sizeof(union i40e_rx_desc), DBA_ALIGN); 1129 bzero((void *)rxr->base, rsize); 1130 /* Cleanup any existing buffers */ 1131 for (int i = 0; i < que->num_desc; i++) { 1132 buf = &rxr->buffers[i]; 1133 if (buf->m_head != NULL) { 1134 bus_dmamap_sync(rxr->htag, buf->hmap, 1135 BUS_DMASYNC_POSTREAD); 1136 bus_dmamap_unload(rxr->htag, buf->hmap); 1137 buf->m_head->m_flags |= M_PKTHDR; 1138 m_freem(buf->m_head); 1139 } 1140 if (buf->m_pack != NULL) { 1141 bus_dmamap_sync(rxr->ptag, buf->pmap, 1142 BUS_DMASYNC_POSTREAD); 1143 bus_dmamap_unload(rxr->ptag, buf->pmap); 1144 buf->m_pack->m_flags |= M_PKTHDR; 1145 m_freem(buf->m_pack); 1146 } 1147 buf->m_head = NULL; 1148 buf->m_pack = NULL; 1149 } 1150 1151 /* header split is off */ 1152 rxr->hdr_split = FALSE; 1153 1154 /* Now replenish the mbufs */ 1155 for (int j = 0; j != que->num_desc; ++j) { 1156 struct mbuf *mh, *mp; 1157 1158 buf = &rxr->buffers[j]; 1159 /* 1160 ** Don't allocate mbufs if not 1161 ** doing header split, its wasteful 1162 */ 1163 if (rxr->hdr_split == FALSE) 1164 goto skip_head; 1165 1166 /* First the header */ 1167 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1168 if (buf->m_head == NULL) { 1169 error = ENOBUFS; 1170 goto fail; 1171 } 1172 m_adj(buf->m_head, ETHER_ALIGN); 1173 mh = buf->m_head; 1174 mh->m_len = mh->m_pkthdr.len = MHLEN; 1175 mh->m_flags |= M_PKTHDR; 1176 /* Get the memory mapping */ 1177 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1178 buf->hmap, buf->m_head, hseg, 1179 &nsegs, BUS_DMA_NOWAIT); 1180 if (error != 0) /* Nothing elegant to do here */ 1181 goto fail; 1182 bus_dmamap_sync(rxr->htag, 1183 buf->hmap, BUS_DMASYNC_PREREAD); 1184 /* Update descriptor */ 1185 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1186 1187 skip_head: 1188 /* Now the payload cluster */ 1189 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1190 M_PKTHDR, rxr->mbuf_sz); 1191 if (buf->m_pack == NULL) { 1192 error = ENOBUFS; 1193 goto fail; 1194 } 1195 mp = buf->m_pack; 1196 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1197 /* Get the memory mapping */ 1198 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1199 buf->pmap, mp, pseg, 1200 &nsegs, BUS_DMA_NOWAIT); 1201 if (error != 0) 1202 goto fail; 1203 bus_dmamap_sync(rxr->ptag, 1204 buf->pmap, BUS_DMASYNC_PREREAD); 1205 /* Update descriptor */ 1206 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1207 rxr->base[j].read.hdr_addr = 0; 1208 } 1209 1210 1211 /* Setup our descriptor indices */ 1212 rxr->next_check = 0; 1213 rxr->next_refresh = 0; 1214 rxr->lro_enabled = FALSE; 1215 rxr->split = 0; 1216 rxr->bytes = 0; 1217 rxr->discard = FALSE; 1218 1219 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1220 ixl_flush(vsi->hw); 1221 1222 #if defined(INET6) || defined(INET) 1223 /* 1224 ** Now set up the LRO interface: 1225 */ 1226 if (ifp->if_capenable & IFCAP_LRO) { 1227 int err = tcp_lro_init(lro); 1228 if (err) { 1229 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1230 goto fail; 1231 } 1232 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1233 rxr->lro_enabled = TRUE; 1234 lro->ifp = vsi->ifp; 1235 } 1236 #endif 1237 1238 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1239 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1240 1241 fail: 1242 IXL_RX_UNLOCK(rxr); 1243 return (error); 1244 } 1245 1246 1247 /********************************************************************* 1248 * 1249 * Free station receive ring data structures 1250 * 1251 **********************************************************************/ 1252 void 1253 ixl_free_que_rx(struct ixl_queue *que) 1254 { 1255 struct rx_ring *rxr = &que->rxr; 1256 struct ixl_rx_buf *buf; 1257 1258 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1259 1260 /* Cleanup any existing buffers */ 1261 if (rxr->buffers != NULL) { 1262 for (int i = 0; i < que->num_desc; i++) { 1263 buf = &rxr->buffers[i]; 1264 if (buf->m_head != NULL) { 1265 bus_dmamap_sync(rxr->htag, buf->hmap, 1266 BUS_DMASYNC_POSTREAD); 1267 bus_dmamap_unload(rxr->htag, buf->hmap); 1268 buf->m_head->m_flags |= M_PKTHDR; 1269 m_freem(buf->m_head); 1270 } 1271 if (buf->m_pack != NULL) { 1272 bus_dmamap_sync(rxr->ptag, buf->pmap, 1273 BUS_DMASYNC_POSTREAD); 1274 bus_dmamap_unload(rxr->ptag, buf->pmap); 1275 buf->m_pack->m_flags |= M_PKTHDR; 1276 m_freem(buf->m_pack); 1277 } 1278 buf->m_head = NULL; 1279 buf->m_pack = NULL; 1280 if (buf->hmap != NULL) { 1281 bus_dmamap_destroy(rxr->htag, buf->hmap); 1282 buf->hmap = NULL; 1283 } 1284 if (buf->pmap != NULL) { 1285 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1286 buf->pmap = NULL; 1287 } 1288 } 1289 if (rxr->buffers != NULL) { 1290 free(rxr->buffers, M_DEVBUF); 1291 rxr->buffers = NULL; 1292 } 1293 } 1294 1295 if (rxr->htag != NULL) { 1296 bus_dma_tag_destroy(rxr->htag); 1297 rxr->htag = NULL; 1298 } 1299 if (rxr->ptag != NULL) { 1300 bus_dma_tag_destroy(rxr->ptag); 1301 rxr->ptag = NULL; 1302 } 1303 1304 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1305 return; 1306 } 1307 1308 static __inline void 1309 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1310 { 1311 1312 #if defined(INET6) || defined(INET) 1313 /* 1314 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1315 * should be computed by hardware. Also it should not have VLAN tag in 1316 * ethernet header. 1317 */ 1318 if (rxr->lro_enabled && 1319 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1320 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1321 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1322 /* 1323 * Send to the stack if: 1324 ** - LRO not enabled, or 1325 ** - no LRO resources, or 1326 ** - lro enqueue fails 1327 */ 1328 if (rxr->lro.lro_cnt != 0) 1329 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1330 return; 1331 } 1332 #endif 1333 IXL_RX_UNLOCK(rxr); 1334 (*ifp->if_input)(ifp, m); 1335 IXL_RX_LOCK(rxr); 1336 } 1337 1338 1339 static __inline void 1340 ixl_rx_discard(struct rx_ring *rxr, int i) 1341 { 1342 struct ixl_rx_buf *rbuf; 1343 1344 rbuf = &rxr->buffers[i]; 1345 1346 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1347 rbuf->fmp->m_flags |= M_PKTHDR; 1348 m_freem(rbuf->fmp); 1349 rbuf->fmp = NULL; 1350 } 1351 1352 /* 1353 ** With advanced descriptors the writeback 1354 ** clobbers the buffer addrs, so its easier 1355 ** to just free the existing mbufs and take 1356 ** the normal refresh path to get new buffers 1357 ** and mapping. 1358 */ 1359 if (rbuf->m_head) { 1360 m_free(rbuf->m_head); 1361 rbuf->m_head = NULL; 1362 } 1363 1364 if (rbuf->m_pack) { 1365 m_free(rbuf->m_pack); 1366 rbuf->m_pack = NULL; 1367 } 1368 1369 return; 1370 } 1371 1372 #ifdef RSS 1373 /* 1374 ** ixl_ptype_to_hash: parse the packet type 1375 ** to determine the appropriate hash. 1376 */ 1377 static inline int 1378 ixl_ptype_to_hash(u8 ptype) 1379 { 1380 struct i40e_rx_ptype_decoded decoded; 1381 u8 ex = 0; 1382 1383 decoded = decode_rx_desc_ptype(ptype); 1384 ex = decoded.outer_frag; 1385 1386 if (!decoded.known) 1387 return M_HASHTYPE_OPAQUE; 1388 1389 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1390 return M_HASHTYPE_OPAQUE; 1391 1392 /* Note: anything that gets to this point is IP */ 1393 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1394 switch (decoded.inner_prot) { 1395 case I40E_RX_PTYPE_INNER_PROT_TCP: 1396 if (ex) 1397 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1398 else 1399 return M_HASHTYPE_RSS_TCP_IPV6; 1400 case I40E_RX_PTYPE_INNER_PROT_UDP: 1401 if (ex) 1402 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1403 else 1404 return M_HASHTYPE_RSS_UDP_IPV6; 1405 default: 1406 if (ex) 1407 return M_HASHTYPE_RSS_IPV6_EX; 1408 else 1409 return M_HASHTYPE_RSS_IPV6; 1410 } 1411 } 1412 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1413 switch (decoded.inner_prot) { 1414 case I40E_RX_PTYPE_INNER_PROT_TCP: 1415 return M_HASHTYPE_RSS_TCP_IPV4; 1416 case I40E_RX_PTYPE_INNER_PROT_UDP: 1417 if (ex) 1418 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1419 else 1420 return M_HASHTYPE_RSS_UDP_IPV4; 1421 default: 1422 return M_HASHTYPE_RSS_IPV4; 1423 } 1424 } 1425 /* We should never get here!! */ 1426 return M_HASHTYPE_OPAQUE; 1427 } 1428 #endif /* RSS */ 1429 1430 /********************************************************************* 1431 * 1432 * This routine executes in interrupt context. It replenishes 1433 * the mbufs in the descriptor and sends data which has been 1434 * dma'ed into host memory to upper layer. 1435 * 1436 * We loop at most count times if count is > 0, or until done if 1437 * count < 0. 1438 * 1439 * Return TRUE for more work, FALSE for all clean. 1440 *********************************************************************/ 1441 bool 1442 ixl_rxeof(struct ixl_queue *que, int count) 1443 { 1444 struct ixl_vsi *vsi = que->vsi; 1445 struct rx_ring *rxr = &que->rxr; 1446 struct ifnet *ifp = vsi->ifp; 1447 #if defined(INET6) || defined(INET) 1448 struct lro_ctrl *lro = &rxr->lro; 1449 struct lro_entry *queued; 1450 #endif 1451 int i, nextp, processed = 0; 1452 union i40e_rx_desc *cur; 1453 struct ixl_rx_buf *rbuf, *nbuf; 1454 1455 1456 IXL_RX_LOCK(rxr); 1457 1458 1459 for (i = rxr->next_check; count != 0;) { 1460 struct mbuf *sendmp, *mh, *mp; 1461 u32 rsc, status, error; 1462 u16 hlen, plen, vtag; 1463 u64 qword; 1464 u8 ptype; 1465 bool eop; 1466 1467 /* Sync the ring. */ 1468 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1469 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1470 1471 cur = &rxr->base[i]; 1472 qword = le64toh(cur->wb.qword1.status_error_len); 1473 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1474 >> I40E_RXD_QW1_STATUS_SHIFT; 1475 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1476 >> I40E_RXD_QW1_ERROR_SHIFT; 1477 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1478 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1479 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1480 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1481 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1482 >> I40E_RXD_QW1_PTYPE_SHIFT; 1483 1484 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1485 ++rxr->not_done; 1486 break; 1487 } 1488 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1489 break; 1490 1491 count--; 1492 sendmp = NULL; 1493 nbuf = NULL; 1494 rsc = 0; 1495 cur->wb.qword1.status_error_len = 0; 1496 rbuf = &rxr->buffers[i]; 1497 mh = rbuf->m_head; 1498 mp = rbuf->m_pack; 1499 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1500 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1501 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1502 else 1503 vtag = 0; 1504 1505 /* 1506 ** Make sure bad packets are discarded, 1507 ** note that only EOP descriptor has valid 1508 ** error results. 1509 */ 1510 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1511 rxr->discarded++; 1512 ixl_rx_discard(rxr, i); 1513 goto next_desc; 1514 } 1515 1516 /* Prefetch the next buffer */ 1517 if (!eop) { 1518 nextp = i + 1; 1519 if (nextp == que->num_desc) 1520 nextp = 0; 1521 nbuf = &rxr->buffers[nextp]; 1522 prefetch(nbuf); 1523 } 1524 1525 /* 1526 ** The header mbuf is ONLY used when header 1527 ** split is enabled, otherwise we get normal 1528 ** behavior, ie, both header and payload 1529 ** are DMA'd into the payload buffer. 1530 ** 1531 ** Rather than using the fmp/lmp global pointers 1532 ** we now keep the head of a packet chain in the 1533 ** buffer struct and pass this along from one 1534 ** descriptor to the next, until we get EOP. 1535 */ 1536 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1537 if (hlen > IXL_RX_HDR) 1538 hlen = IXL_RX_HDR; 1539 mh->m_len = hlen; 1540 mh->m_flags |= M_PKTHDR; 1541 mh->m_next = NULL; 1542 mh->m_pkthdr.len = mh->m_len; 1543 /* Null buf pointer so it is refreshed */ 1544 rbuf->m_head = NULL; 1545 /* 1546 ** Check the payload length, this 1547 ** could be zero if its a small 1548 ** packet. 1549 */ 1550 if (plen > 0) { 1551 mp->m_len = plen; 1552 mp->m_next = NULL; 1553 mp->m_flags &= ~M_PKTHDR; 1554 mh->m_next = mp; 1555 mh->m_pkthdr.len += mp->m_len; 1556 /* Null buf pointer so it is refreshed */ 1557 rbuf->m_pack = NULL; 1558 rxr->split++; 1559 } 1560 /* 1561 ** Now create the forward 1562 ** chain so when complete 1563 ** we wont have to. 1564 */ 1565 if (eop == 0) { 1566 /* stash the chain head */ 1567 nbuf->fmp = mh; 1568 /* Make forward chain */ 1569 if (plen) 1570 mp->m_next = nbuf->m_pack; 1571 else 1572 mh->m_next = nbuf->m_pack; 1573 } else { 1574 /* Singlet, prepare to send */ 1575 sendmp = mh; 1576 if (vtag) { 1577 sendmp->m_pkthdr.ether_vtag = vtag; 1578 sendmp->m_flags |= M_VLANTAG; 1579 } 1580 } 1581 } else { 1582 /* 1583 ** Either no header split, or a 1584 ** secondary piece of a fragmented 1585 ** split packet. 1586 */ 1587 mp->m_len = plen; 1588 /* 1589 ** See if there is a stored head 1590 ** that determines what we are 1591 */ 1592 sendmp = rbuf->fmp; 1593 rbuf->m_pack = rbuf->fmp = NULL; 1594 1595 if (sendmp != NULL) /* secondary frag */ 1596 sendmp->m_pkthdr.len += mp->m_len; 1597 else { 1598 /* first desc of a non-ps chain */ 1599 sendmp = mp; 1600 sendmp->m_flags |= M_PKTHDR; 1601 sendmp->m_pkthdr.len = mp->m_len; 1602 if (vtag) { 1603 sendmp->m_pkthdr.ether_vtag = vtag; 1604 sendmp->m_flags |= M_VLANTAG; 1605 } 1606 } 1607 /* Pass the head pointer on */ 1608 if (eop == 0) { 1609 nbuf->fmp = sendmp; 1610 sendmp = NULL; 1611 mp->m_next = nbuf->m_pack; 1612 } 1613 } 1614 ++processed; 1615 /* Sending this frame? */ 1616 if (eop) { 1617 sendmp->m_pkthdr.rcvif = ifp; 1618 /* gather stats */ 1619 rxr->rx_packets++; 1620 rxr->rx_bytes += sendmp->m_pkthdr.len; 1621 /* capture data for dynamic ITR adjustment */ 1622 rxr->packets++; 1623 rxr->bytes += sendmp->m_pkthdr.len; 1624 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1625 ixl_rx_checksum(sendmp, status, error, ptype); 1626 #ifdef RSS 1627 sendmp->m_pkthdr.flowid = 1628 le32toh(cur->wb.qword0.hi_dword.rss); 1629 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1630 #else 1631 sendmp->m_pkthdr.flowid = que->msix; 1632 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1633 #endif 1634 } 1635 next_desc: 1636 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1637 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1638 1639 /* Advance our pointers to the next descriptor. */ 1640 if (++i == que->num_desc) 1641 i = 0; 1642 1643 /* Now send to the stack or do LRO */ 1644 if (sendmp != NULL) { 1645 rxr->next_check = i; 1646 ixl_rx_input(rxr, ifp, sendmp, ptype); 1647 i = rxr->next_check; 1648 } 1649 1650 /* Every 8 descriptors we go to refresh mbufs */ 1651 if (processed == 8) { 1652 ixl_refresh_mbufs(que, i); 1653 processed = 0; 1654 } 1655 } 1656 1657 /* Refresh any remaining buf structs */ 1658 if (ixl_rx_unrefreshed(que)) 1659 ixl_refresh_mbufs(que, i); 1660 1661 rxr->next_check = i; 1662 1663 #if defined(INET6) || defined(INET) 1664 /* 1665 * Flush any outstanding LRO work 1666 */ 1667 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1668 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1669 tcp_lro_flush(lro, queued); 1670 } 1671 #endif 1672 1673 IXL_RX_UNLOCK(rxr); 1674 return (FALSE); 1675 } 1676 1677 1678 /********************************************************************* 1679 * 1680 * Verify that the hardware indicated that the checksum is valid. 1681 * Inform the stack about the status of checksum so that stack 1682 * doesn't spend time verifying the checksum. 1683 * 1684 *********************************************************************/ 1685 static void 1686 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1687 { 1688 struct i40e_rx_ptype_decoded decoded; 1689 1690 decoded = decode_rx_desc_ptype(ptype); 1691 1692 /* Errors? */ 1693 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1694 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1695 mp->m_pkthdr.csum_flags = 0; 1696 return; 1697 } 1698 1699 /* IPv6 with extension headers likely have bad csum */ 1700 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1701 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1702 if (status & 1703 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1704 mp->m_pkthdr.csum_flags = 0; 1705 return; 1706 } 1707 1708 1709 /* IP Checksum Good */ 1710 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1711 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1712 1713 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1714 mp->m_pkthdr.csum_flags |= 1715 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1716 mp->m_pkthdr.csum_data |= htons(0xffff); 1717 } 1718 return; 1719 } 1720 1721 #if __FreeBSD_version >= 1100000 1722 uint64_t 1723 ixl_get_counter(if_t ifp, ift_counter cnt) 1724 { 1725 struct ixl_vsi *vsi; 1726 1727 vsi = if_getsoftc(ifp); 1728 1729 switch (cnt) { 1730 case IFCOUNTER_IPACKETS: 1731 return (vsi->ipackets); 1732 case IFCOUNTER_IERRORS: 1733 return (vsi->ierrors); 1734 case IFCOUNTER_OPACKETS: 1735 return (vsi->opackets); 1736 case IFCOUNTER_OERRORS: 1737 return (vsi->oerrors); 1738 case IFCOUNTER_COLLISIONS: 1739 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1740 return (0); 1741 case IFCOUNTER_IBYTES: 1742 return (vsi->ibytes); 1743 case IFCOUNTER_OBYTES: 1744 return (vsi->obytes); 1745 case IFCOUNTER_IMCASTS: 1746 return (vsi->imcasts); 1747 case IFCOUNTER_OMCASTS: 1748 return (vsi->omcasts); 1749 case IFCOUNTER_IQDROPS: 1750 return (vsi->iqdrops); 1751 case IFCOUNTER_OQDROPS: 1752 return (vsi->oqdrops); 1753 case IFCOUNTER_NOPROTO: 1754 return (vsi->noproto); 1755 default: 1756 return (if_get_counter_default(ifp, cnt)); 1757 } 1758 } 1759 #endif 1760 1761