1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the BASE and the VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static __inline void ixl_rx_discard(struct rx_ring *, int); 62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 #ifdef DEV_NETMAP 66 #include <dev/netmap/if_ixl_netmap.h> 67 #endif /* DEV_NETMAP */ 68 69 /* 70 ** Multiqueue Transmit driver 71 */ 72 int 73 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 74 { 75 struct ixl_vsi *vsi = ifp->if_softc; 76 struct ixl_queue *que; 77 struct tx_ring *txr; 78 int err, i; 79 #ifdef RSS 80 u32 bucket_id; 81 #endif 82 83 /* 84 ** Which queue to use: 85 ** 86 ** When doing RSS, map it to the same outbound 87 ** queue as the incoming flow would be mapped to. 88 ** If everything is setup correctly, it should be 89 ** the same bucket that the current CPU we're on is. 90 */ 91 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 92 #ifdef RSS 93 if (rss_hash2bucket(m->m_pkthdr.flowid, 94 M_HASHTYPE_GET(m), &bucket_id) == 0) { 95 i = bucket_id % vsi->num_queues; 96 } else 97 #endif 98 i = m->m_pkthdr.flowid % vsi->num_queues; 99 } else 100 i = curcpu % vsi->num_queues; 101 /* 102 ** This may not be perfect, but until something 103 ** better comes along it will keep from scheduling 104 ** on stalled queues. 105 */ 106 if (((1 << i) & vsi->active_queues) == 0) 107 i = ffsl(vsi->active_queues); 108 109 que = &vsi->queues[i]; 110 txr = &que->txr; 111 112 err = drbr_enqueue(ifp, txr->br, m); 113 if (err) 114 return (err); 115 if (IXL_TX_TRYLOCK(txr)) { 116 ixl_mq_start_locked(ifp, txr); 117 IXL_TX_UNLOCK(txr); 118 } else 119 taskqueue_enqueue(que->tq, &que->tx_task); 120 121 return (0); 122 } 123 124 int 125 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 126 { 127 struct ixl_queue *que = txr->que; 128 struct ixl_vsi *vsi = que->vsi; 129 struct mbuf *next; 130 int err = 0; 131 132 133 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 134 vsi->link_active == 0) 135 return (ENETDOWN); 136 137 /* Process the transmit queue */ 138 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 139 if ((err = ixl_xmit(que, &next)) != 0) { 140 if (next == NULL) 141 drbr_advance(ifp, txr->br); 142 else 143 drbr_putback(ifp, txr->br, next); 144 break; 145 } 146 drbr_advance(ifp, txr->br); 147 /* Send a copy of the frame to the BPF listener */ 148 ETHER_BPF_MTAP(ifp, next); 149 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 150 break; 151 } 152 153 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 154 ixl_txeof(que); 155 156 return (err); 157 } 158 159 /* 160 * Called from a taskqueue to drain queued transmit packets. 161 */ 162 void 163 ixl_deferred_mq_start(void *arg, int pending) 164 { 165 struct ixl_queue *que = arg; 166 struct tx_ring *txr = &que->txr; 167 struct ixl_vsi *vsi = que->vsi; 168 struct ifnet *ifp = vsi->ifp; 169 170 IXL_TX_LOCK(txr); 171 if (!drbr_empty(ifp, txr->br)) 172 ixl_mq_start_locked(ifp, txr); 173 IXL_TX_UNLOCK(txr); 174 } 175 176 /* 177 ** Flush all queue ring buffers 178 */ 179 void 180 ixl_qflush(struct ifnet *ifp) 181 { 182 struct ixl_vsi *vsi = ifp->if_softc; 183 184 for (int i = 0; i < vsi->num_queues; i++) { 185 struct ixl_queue *que = &vsi->queues[i]; 186 struct tx_ring *txr = &que->txr; 187 struct mbuf *m; 188 IXL_TX_LOCK(txr); 189 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 190 m_freem(m); 191 IXL_TX_UNLOCK(txr); 192 } 193 if_qflush(ifp); 194 } 195 196 /* 197 ** Find mbuf chains passed to the driver 198 ** that are 'sparse', using more than 8 199 ** mbufs to deliver an mss-size chunk of data 200 */ 201 static inline bool 202 ixl_tso_detect_sparse(struct mbuf *mp) 203 { 204 struct mbuf *m; 205 int num = 0, mss; 206 bool ret = FALSE; 207 208 mss = mp->m_pkthdr.tso_segsz; 209 for (m = mp->m_next; m != NULL; m = m->m_next) { 210 num++; 211 mss -= m->m_len; 212 if (mss < 1) 213 break; 214 if (m->m_next == NULL) 215 break; 216 } 217 if (num > IXL_SPARSE_CHAIN) 218 ret = TRUE; 219 220 return (ret); 221 } 222 223 224 /********************************************************************* 225 * 226 * This routine maps the mbufs to tx descriptors, allowing the 227 * TX engine to transmit the packets. 228 * - return 0 on success, positive on failure 229 * 230 **********************************************************************/ 231 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 232 233 static int 234 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 235 { 236 struct ixl_vsi *vsi = que->vsi; 237 struct i40e_hw *hw = vsi->hw; 238 struct tx_ring *txr = &que->txr; 239 struct ixl_tx_buf *buf; 240 struct i40e_tx_desc *txd = NULL; 241 struct mbuf *m_head, *m; 242 int i, j, error, nsegs, maxsegs; 243 int first, last = 0; 244 u16 vtag = 0; 245 u32 cmd, off; 246 bus_dmamap_t map; 247 bus_dma_tag_t tag; 248 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 249 250 cmd = off = 0; 251 m_head = *m_headp; 252 253 /* 254 * Important to capture the first descriptor 255 * used because it will contain the index of 256 * the one we tell the hardware to report back 257 */ 258 first = txr->next_avail; 259 buf = &txr->buffers[first]; 260 map = buf->map; 261 tag = txr->tx_tag; 262 maxsegs = IXL_MAX_TX_SEGS; 263 264 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 265 /* Use larger mapping for TSO */ 266 tag = txr->tso_tag; 267 maxsegs = IXL_MAX_TSO_SEGS; 268 if (ixl_tso_detect_sparse(m_head)) { 269 m = m_defrag(m_head, M_NOWAIT); 270 if (m == NULL) { 271 m_freem(*m_headp); 272 *m_headp = NULL; 273 return (ENOBUFS); 274 } 275 *m_headp = m; 276 } 277 } 278 279 /* 280 * Map the packet for DMA. 281 */ 282 error = bus_dmamap_load_mbuf_sg(tag, map, 283 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 284 285 if (error == EFBIG) { 286 struct mbuf *m; 287 288 m = m_defrag(*m_headp, M_NOWAIT); 289 if (m == NULL) { 290 que->mbuf_defrag_failed++; 291 m_freem(*m_headp); 292 *m_headp = NULL; 293 return (ENOBUFS); 294 } 295 *m_headp = m; 296 297 /* Try it again */ 298 error = bus_dmamap_load_mbuf_sg(tag, map, 299 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 300 301 if (error == ENOMEM) { 302 que->tx_dma_setup++; 303 return (error); 304 } else if (error != 0) { 305 que->tx_dma_setup++; 306 m_freem(*m_headp); 307 *m_headp = NULL; 308 return (error); 309 } 310 } else if (error == ENOMEM) { 311 que->tx_dma_setup++; 312 return (error); 313 } else if (error != 0) { 314 que->tx_dma_setup++; 315 m_freem(*m_headp); 316 *m_headp = NULL; 317 return (error); 318 } 319 320 /* Make certain there are enough descriptors */ 321 if (nsegs > txr->avail - 2) { 322 txr->no_desc++; 323 error = ENOBUFS; 324 goto xmit_fail; 325 } 326 m_head = *m_headp; 327 328 /* Set up the TSO/CSUM offload */ 329 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 330 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 331 if (error) 332 goto xmit_fail; 333 } 334 335 cmd |= I40E_TX_DESC_CMD_ICRC; 336 /* Grab the VLAN tag */ 337 if (m_head->m_flags & M_VLANTAG) { 338 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 339 vtag = htole16(m_head->m_pkthdr.ether_vtag); 340 } 341 342 i = txr->next_avail; 343 for (j = 0; j < nsegs; j++) { 344 bus_size_t seglen; 345 346 buf = &txr->buffers[i]; 347 buf->tag = tag; /* Keep track of the type tag */ 348 txd = &txr->base[i]; 349 seglen = segs[j].ds_len; 350 351 txd->buffer_addr = htole64(segs[j].ds_addr); 352 txd->cmd_type_offset_bsz = 353 htole64(I40E_TX_DESC_DTYPE_DATA 354 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 355 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 356 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 357 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 358 359 last = i; /* descriptor that will get completion IRQ */ 360 361 if (++i == que->num_desc) 362 i = 0; 363 364 buf->m_head = NULL; 365 buf->eop_index = -1; 366 } 367 /* Set the last descriptor for report */ 368 txd->cmd_type_offset_bsz |= 369 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 370 txr->avail -= nsegs; 371 txr->next_avail = i; 372 373 buf->m_head = m_head; 374 /* Swap the dma map between the first and last descriptor */ 375 txr->buffers[first].map = buf->map; 376 buf->map = map; 377 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 378 379 /* Set the index of the descriptor that will be marked done */ 380 buf = &txr->buffers[first]; 381 buf->eop_index = last; 382 383 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 384 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 385 /* 386 * Advance the Transmit Descriptor Tail (Tdt), this tells the 387 * hardware that this frame is available to transmit. 388 */ 389 ++txr->total_packets; 390 wr32(hw, txr->tail, i); 391 392 /* Mark outstanding work */ 393 if (que->busy == 0) 394 que->busy = 1; 395 return (0); 396 397 xmit_fail: 398 bus_dmamap_unload(tag, buf->map); 399 return (error); 400 } 401 402 403 /********************************************************************* 404 * 405 * Allocate memory for tx_buffer structures. The tx_buffer stores all 406 * the information needed to transmit a packet on the wire. This is 407 * called only once at attach, setup is done every reset. 408 * 409 **********************************************************************/ 410 int 411 ixl_allocate_tx_data(struct ixl_queue *que) 412 { 413 struct tx_ring *txr = &que->txr; 414 struct ixl_vsi *vsi = que->vsi; 415 device_t dev = vsi->dev; 416 struct ixl_tx_buf *buf; 417 int error = 0; 418 419 /* 420 * Setup DMA descriptor areas. 421 */ 422 if ((error = bus_dma_tag_create(NULL, /* parent */ 423 1, 0, /* alignment, bounds */ 424 BUS_SPACE_MAXADDR, /* lowaddr */ 425 BUS_SPACE_MAXADDR, /* highaddr */ 426 NULL, NULL, /* filter, filterarg */ 427 IXL_TSO_SIZE, /* maxsize */ 428 IXL_MAX_TX_SEGS, /* nsegments */ 429 PAGE_SIZE, /* maxsegsize */ 430 0, /* flags */ 431 NULL, /* lockfunc */ 432 NULL, /* lockfuncarg */ 433 &txr->tx_tag))) { 434 device_printf(dev,"Unable to allocate TX DMA tag\n"); 435 goto fail; 436 } 437 438 /* Make a special tag for TSO */ 439 if ((error = bus_dma_tag_create(NULL, /* parent */ 440 1, 0, /* alignment, bounds */ 441 BUS_SPACE_MAXADDR, /* lowaddr */ 442 BUS_SPACE_MAXADDR, /* highaddr */ 443 NULL, NULL, /* filter, filterarg */ 444 IXL_TSO_SIZE, /* maxsize */ 445 IXL_MAX_TSO_SEGS, /* nsegments */ 446 PAGE_SIZE, /* maxsegsize */ 447 0, /* flags */ 448 NULL, /* lockfunc */ 449 NULL, /* lockfuncarg */ 450 &txr->tso_tag))) { 451 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 452 goto fail; 453 } 454 455 if (!(txr->buffers = 456 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 457 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 458 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 459 error = ENOMEM; 460 goto fail; 461 } 462 463 /* Create the descriptor buffer default dma maps */ 464 buf = txr->buffers; 465 for (int i = 0; i < que->num_desc; i++, buf++) { 466 buf->tag = txr->tx_tag; 467 error = bus_dmamap_create(buf->tag, 0, &buf->map); 468 if (error != 0) { 469 device_printf(dev, "Unable to create TX DMA map\n"); 470 goto fail; 471 } 472 } 473 fail: 474 return (error); 475 } 476 477 478 /********************************************************************* 479 * 480 * (Re)Initialize a queue transmit ring. 481 * - called by init, it clears the descriptor ring, 482 * and frees any stale mbufs 483 * 484 **********************************************************************/ 485 void 486 ixl_init_tx_ring(struct ixl_queue *que) 487 { 488 #ifdef DEV_NETMAP 489 struct netmap_adapter *na = NA(que->vsi->ifp); 490 struct netmap_slot *slot; 491 #endif /* DEV_NETMAP */ 492 struct tx_ring *txr = &que->txr; 493 struct ixl_tx_buf *buf; 494 495 /* Clear the old ring contents */ 496 IXL_TX_LOCK(txr); 497 498 #ifdef DEV_NETMAP 499 /* 500 * (under lock): if in netmap mode, do some consistency 501 * checks and set slot to entry 0 of the netmap ring. 502 */ 503 slot = netmap_reset(na, NR_TX, que->me, 0); 504 #endif /* DEV_NETMAP */ 505 506 bzero((void *)txr->base, 507 (sizeof(struct i40e_tx_desc)) * que->num_desc); 508 509 /* Reset indices */ 510 txr->next_avail = 0; 511 txr->next_to_clean = 0; 512 513 #ifdef IXL_FDIR 514 /* Initialize flow director */ 515 txr->atr_rate = ixl_atr_rate; 516 txr->atr_count = 0; 517 #endif 518 519 /* Free any existing tx mbufs. */ 520 buf = txr->buffers; 521 for (int i = 0; i < que->num_desc; i++, buf++) { 522 if (buf->m_head != NULL) { 523 bus_dmamap_sync(buf->tag, buf->map, 524 BUS_DMASYNC_POSTWRITE); 525 bus_dmamap_unload(buf->tag, buf->map); 526 m_freem(buf->m_head); 527 buf->m_head = NULL; 528 } 529 #ifdef DEV_NETMAP 530 /* 531 * In netmap mode, set the map for the packet buffer. 532 * NOTE: Some drivers (not this one) also need to set 533 * the physical buffer address in the NIC ring. 534 * netmap_idx_n2k() maps a nic index, i, into the corresponding 535 * netmap slot index, si 536 */ 537 if (slot) { 538 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 539 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 540 } 541 #endif /* DEV_NETMAP */ 542 /* Clear the EOP index */ 543 buf->eop_index = -1; 544 } 545 546 /* Set number of descriptors available */ 547 txr->avail = que->num_desc; 548 549 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 550 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 551 IXL_TX_UNLOCK(txr); 552 } 553 554 555 /********************************************************************* 556 * 557 * Free transmit ring related data structures. 558 * 559 **********************************************************************/ 560 void 561 ixl_free_que_tx(struct ixl_queue *que) 562 { 563 struct tx_ring *txr = &que->txr; 564 struct ixl_tx_buf *buf; 565 566 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 567 568 for (int i = 0; i < que->num_desc; i++) { 569 buf = &txr->buffers[i]; 570 if (buf->m_head != NULL) { 571 bus_dmamap_sync(buf->tag, buf->map, 572 BUS_DMASYNC_POSTWRITE); 573 bus_dmamap_unload(buf->tag, 574 buf->map); 575 m_freem(buf->m_head); 576 buf->m_head = NULL; 577 if (buf->map != NULL) { 578 bus_dmamap_destroy(buf->tag, 579 buf->map); 580 buf->map = NULL; 581 } 582 } else if (buf->map != NULL) { 583 bus_dmamap_unload(buf->tag, 584 buf->map); 585 bus_dmamap_destroy(buf->tag, 586 buf->map); 587 buf->map = NULL; 588 } 589 } 590 if (txr->br != NULL) 591 buf_ring_free(txr->br, M_DEVBUF); 592 if (txr->buffers != NULL) { 593 free(txr->buffers, M_DEVBUF); 594 txr->buffers = NULL; 595 } 596 if (txr->tx_tag != NULL) { 597 bus_dma_tag_destroy(txr->tx_tag); 598 txr->tx_tag = NULL; 599 } 600 if (txr->tso_tag != NULL) { 601 bus_dma_tag_destroy(txr->tso_tag); 602 txr->tso_tag = NULL; 603 } 604 605 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 606 return; 607 } 608 609 /********************************************************************* 610 * 611 * Setup descriptor for hw offloads 612 * 613 **********************************************************************/ 614 615 static int 616 ixl_tx_setup_offload(struct ixl_queue *que, 617 struct mbuf *mp, u32 *cmd, u32 *off) 618 { 619 struct ether_vlan_header *eh; 620 #ifdef INET 621 struct ip *ip = NULL; 622 #endif 623 struct tcphdr *th = NULL; 624 #ifdef INET6 625 struct ip6_hdr *ip6; 626 #endif 627 int elen, ip_hlen = 0, tcp_hlen; 628 u16 etype; 629 u8 ipproto = 0; 630 bool tso = FALSE; 631 632 /* Set up the TSO context descriptor if required */ 633 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 634 tso = ixl_tso_setup(que, mp); 635 if (tso) 636 ++que->tso; 637 else 638 return (ENXIO); 639 } 640 641 /* 642 * Determine where frame payload starts. 643 * Jump over vlan headers if already present, 644 * helpful for QinQ too. 645 */ 646 eh = mtod(mp, struct ether_vlan_header *); 647 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 648 etype = ntohs(eh->evl_proto); 649 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 650 } else { 651 etype = ntohs(eh->evl_encap_proto); 652 elen = ETHER_HDR_LEN; 653 } 654 655 switch (etype) { 656 #ifdef INET 657 case ETHERTYPE_IP: 658 ip = (struct ip *)(mp->m_data + elen); 659 ip_hlen = ip->ip_hl << 2; 660 ipproto = ip->ip_p; 661 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 662 /* The IP checksum must be recalculated with TSO */ 663 if (tso) 664 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 665 else 666 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 667 break; 668 #endif 669 #ifdef INET6 670 case ETHERTYPE_IPV6: 671 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 672 ip_hlen = sizeof(struct ip6_hdr); 673 ipproto = ip6->ip6_nxt; 674 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 675 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 676 break; 677 #endif 678 default: 679 break; 680 } 681 682 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 683 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 684 685 switch (ipproto) { 686 case IPPROTO_TCP: 687 tcp_hlen = th->th_off << 2; 688 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 689 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 690 *off |= (tcp_hlen >> 2) << 691 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 692 } 693 #ifdef IXL_FDIR 694 ixl_atr(que, th, etype); 695 #endif 696 break; 697 case IPPROTO_UDP: 698 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 699 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 700 *off |= (sizeof(struct udphdr) >> 2) << 701 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 702 } 703 break; 704 705 case IPPROTO_SCTP: 706 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 707 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 708 *off |= (sizeof(struct sctphdr) >> 2) << 709 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 710 } 711 /* Fall Thru */ 712 default: 713 break; 714 } 715 716 return (0); 717 } 718 719 720 /********************************************************************** 721 * 722 * Setup context for hardware segmentation offload (TSO) 723 * 724 **********************************************************************/ 725 static bool 726 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 727 { 728 struct tx_ring *txr = &que->txr; 729 struct i40e_tx_context_desc *TXD; 730 struct ixl_tx_buf *buf; 731 u32 cmd, mss, type, tsolen; 732 u16 etype; 733 int idx, elen, ip_hlen, tcp_hlen; 734 struct ether_vlan_header *eh; 735 #ifdef INET 736 struct ip *ip; 737 #endif 738 #ifdef INET6 739 struct ip6_hdr *ip6; 740 #endif 741 #if defined(INET6) || defined(INET) 742 struct tcphdr *th; 743 #endif 744 u64 type_cmd_tso_mss; 745 746 /* 747 * Determine where frame payload starts. 748 * Jump over vlan headers if already present 749 */ 750 eh = mtod(mp, struct ether_vlan_header *); 751 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 752 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 753 etype = eh->evl_proto; 754 } else { 755 elen = ETHER_HDR_LEN; 756 etype = eh->evl_encap_proto; 757 } 758 759 switch (ntohs(etype)) { 760 #ifdef INET6 761 case ETHERTYPE_IPV6: 762 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 763 if (ip6->ip6_nxt != IPPROTO_TCP) 764 return (ENXIO); 765 ip_hlen = sizeof(struct ip6_hdr); 766 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 767 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 768 tcp_hlen = th->th_off << 2; 769 /* 770 * The corresponding flag is set by the stack in the IPv4 771 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 772 * So, set it here because the rest of the flow requires it. 773 */ 774 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 775 break; 776 #endif 777 #ifdef INET 778 case ETHERTYPE_IP: 779 ip = (struct ip *)(mp->m_data + elen); 780 if (ip->ip_p != IPPROTO_TCP) 781 return (ENXIO); 782 ip->ip_sum = 0; 783 ip_hlen = ip->ip_hl << 2; 784 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 785 th->th_sum = in_pseudo(ip->ip_src.s_addr, 786 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 787 tcp_hlen = th->th_off << 2; 788 break; 789 #endif 790 default: 791 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 792 __func__, ntohs(etype)); 793 return FALSE; 794 } 795 796 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 797 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 798 return FALSE; 799 800 idx = txr->next_avail; 801 buf = &txr->buffers[idx]; 802 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 803 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 804 805 type = I40E_TX_DESC_DTYPE_CONTEXT; 806 cmd = I40E_TX_CTX_DESC_TSO; 807 mss = mp->m_pkthdr.tso_segsz; 808 809 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 810 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 811 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 812 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 813 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 814 815 TXD->tunneling_params = htole32(0); 816 buf->m_head = NULL; 817 buf->eop_index = -1; 818 819 if (++idx == que->num_desc) 820 idx = 0; 821 822 txr->avail--; 823 txr->next_avail = idx; 824 825 return TRUE; 826 } 827 828 /* 829 ** ixl_get_tx_head - Retrieve the value from the 830 ** location the HW records its HEAD index 831 */ 832 static inline u32 833 ixl_get_tx_head(struct ixl_queue *que) 834 { 835 struct tx_ring *txr = &que->txr; 836 void *head = &txr->base[que->num_desc]; 837 return LE32_TO_CPU(*(volatile __le32 *)head); 838 } 839 840 /********************************************************************** 841 * 842 * Examine each tx_buffer in the used queue. If the hardware is done 843 * processing the packet then free associated resources. The 844 * tx_buffer is put back on the free queue. 845 * 846 **********************************************************************/ 847 bool 848 ixl_txeof(struct ixl_queue *que) 849 { 850 struct tx_ring *txr = &que->txr; 851 u32 first, last, head, done, processed; 852 struct ixl_tx_buf *buf; 853 struct i40e_tx_desc *tx_desc, *eop_desc; 854 855 856 mtx_assert(&txr->mtx, MA_OWNED); 857 858 #ifdef DEV_NETMAP 859 // XXX todo: implement moderation 860 if (netmap_tx_irq(que->vsi->ifp, que->me)) 861 return FALSE; 862 #endif /* DEF_NETMAP */ 863 864 /* These are not the descriptors you seek, move along :) */ 865 if (txr->avail == que->num_desc) { 866 que->busy = 0; 867 return FALSE; 868 } 869 870 processed = 0; 871 first = txr->next_to_clean; 872 buf = &txr->buffers[first]; 873 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 874 last = buf->eop_index; 875 if (last == -1) 876 return FALSE; 877 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 878 879 /* Get the Head WB value */ 880 head = ixl_get_tx_head(que); 881 882 /* 883 ** Get the index of the first descriptor 884 ** BEYOND the EOP and call that 'done'. 885 ** I do this so the comparison in the 886 ** inner while loop below can be simple 887 */ 888 if (++last == que->num_desc) last = 0; 889 done = last; 890 891 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 892 BUS_DMASYNC_POSTREAD); 893 /* 894 ** The HEAD index of the ring is written in a 895 ** defined location, this rather than a done bit 896 ** is what is used to keep track of what must be 897 ** 'cleaned'. 898 */ 899 while (first != head) { 900 /* We clean the range of the packet */ 901 while (first != done) { 902 ++txr->avail; 903 ++processed; 904 905 if (buf->m_head) { 906 txr->bytes += /* for ITR adjustment */ 907 buf->m_head->m_pkthdr.len; 908 txr->tx_bytes += /* for TX stats */ 909 buf->m_head->m_pkthdr.len; 910 bus_dmamap_sync(buf->tag, 911 buf->map, 912 BUS_DMASYNC_POSTWRITE); 913 bus_dmamap_unload(buf->tag, 914 buf->map); 915 m_freem(buf->m_head); 916 buf->m_head = NULL; 917 buf->map = NULL; 918 } 919 buf->eop_index = -1; 920 921 if (++first == que->num_desc) 922 first = 0; 923 924 buf = &txr->buffers[first]; 925 tx_desc = &txr->base[first]; 926 } 927 ++txr->packets; 928 /* See if there is more work now */ 929 last = buf->eop_index; 930 if (last != -1) { 931 eop_desc = &txr->base[last]; 932 /* Get next done point */ 933 if (++last == que->num_desc) last = 0; 934 done = last; 935 } else 936 break; 937 } 938 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 939 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 940 941 txr->next_to_clean = first; 942 943 944 /* 945 ** Hang detection, we know there's 946 ** work outstanding or the first return 947 ** would have been taken, so indicate an 948 ** unsuccessful pass, in local_timer if 949 ** the value is too great the queue will 950 ** be considered hung. If anything has been 951 ** cleaned then reset the state. 952 */ 953 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 954 ++que->busy; 955 956 if (processed) 957 que->busy = 1; /* Note this turns off HUNG */ 958 959 /* 960 * If there are no pending descriptors, clear the timeout. 961 */ 962 if (txr->avail == que->num_desc) { 963 que->busy = 0; 964 return FALSE; 965 } 966 967 return TRUE; 968 } 969 970 /********************************************************************* 971 * 972 * Refresh mbuf buffers for RX descriptor rings 973 * - now keeps its own state so discards due to resource 974 * exhaustion are unnecessary, if an mbuf cannot be obtained 975 * it just returns, keeping its placeholder, thus it can simply 976 * be recalled to try again. 977 * 978 **********************************************************************/ 979 static void 980 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 981 { 982 struct ixl_vsi *vsi = que->vsi; 983 struct rx_ring *rxr = &que->rxr; 984 bus_dma_segment_t hseg[1]; 985 bus_dma_segment_t pseg[1]; 986 struct ixl_rx_buf *buf; 987 struct mbuf *mh, *mp; 988 int i, j, nsegs, error; 989 bool refreshed = FALSE; 990 991 i = j = rxr->next_refresh; 992 /* Control the loop with one beyond */ 993 if (++j == que->num_desc) 994 j = 0; 995 996 while (j != limit) { 997 buf = &rxr->buffers[i]; 998 if (rxr->hdr_split == FALSE) 999 goto no_split; 1000 1001 if (buf->m_head == NULL) { 1002 mh = m_gethdr(M_NOWAIT, MT_DATA); 1003 if (mh == NULL) 1004 goto update; 1005 } else 1006 mh = buf->m_head; 1007 1008 mh->m_pkthdr.len = mh->m_len = MHLEN; 1009 mh->m_len = MHLEN; 1010 mh->m_flags |= M_PKTHDR; 1011 /* Get the memory mapping */ 1012 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1013 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1014 if (error != 0) { 1015 printf("Refresh mbufs: hdr dmamap load" 1016 " failure - %d\n", error); 1017 m_free(mh); 1018 buf->m_head = NULL; 1019 goto update; 1020 } 1021 buf->m_head = mh; 1022 bus_dmamap_sync(rxr->htag, buf->hmap, 1023 BUS_DMASYNC_PREREAD); 1024 rxr->base[i].read.hdr_addr = 1025 htole64(hseg[0].ds_addr); 1026 1027 no_split: 1028 if (buf->m_pack == NULL) { 1029 mp = m_getjcl(M_NOWAIT, MT_DATA, 1030 M_PKTHDR, rxr->mbuf_sz); 1031 if (mp == NULL) 1032 goto update; 1033 } else 1034 mp = buf->m_pack; 1035 1036 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1037 /* Get the memory mapping */ 1038 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1039 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1040 if (error != 0) { 1041 printf("Refresh mbufs: payload dmamap load" 1042 " failure - %d\n", error); 1043 m_free(mp); 1044 buf->m_pack = NULL; 1045 goto update; 1046 } 1047 buf->m_pack = mp; 1048 bus_dmamap_sync(rxr->ptag, buf->pmap, 1049 BUS_DMASYNC_PREREAD); 1050 rxr->base[i].read.pkt_addr = 1051 htole64(pseg[0].ds_addr); 1052 /* Used only when doing header split */ 1053 rxr->base[i].read.hdr_addr = 0; 1054 1055 refreshed = TRUE; 1056 /* Next is precalculated */ 1057 i = j; 1058 rxr->next_refresh = i; 1059 if (++j == que->num_desc) 1060 j = 0; 1061 } 1062 update: 1063 if (refreshed) /* Update hardware tail index */ 1064 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1065 return; 1066 } 1067 1068 1069 /********************************************************************* 1070 * 1071 * Allocate memory for rx_buffer structures. Since we use one 1072 * rx_buffer per descriptor, the maximum number of rx_buffer's 1073 * that we'll need is equal to the number of receive descriptors 1074 * that we've defined. 1075 * 1076 **********************************************************************/ 1077 int 1078 ixl_allocate_rx_data(struct ixl_queue *que) 1079 { 1080 struct rx_ring *rxr = &que->rxr; 1081 struct ixl_vsi *vsi = que->vsi; 1082 device_t dev = vsi->dev; 1083 struct ixl_rx_buf *buf; 1084 int i, bsize, error; 1085 1086 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1087 if (!(rxr->buffers = 1088 (struct ixl_rx_buf *) malloc(bsize, 1089 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1090 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1091 error = ENOMEM; 1092 return (error); 1093 } 1094 1095 if ((error = bus_dma_tag_create(NULL, /* parent */ 1096 1, 0, /* alignment, bounds */ 1097 BUS_SPACE_MAXADDR, /* lowaddr */ 1098 BUS_SPACE_MAXADDR, /* highaddr */ 1099 NULL, NULL, /* filter, filterarg */ 1100 MSIZE, /* maxsize */ 1101 1, /* nsegments */ 1102 MSIZE, /* maxsegsize */ 1103 0, /* flags */ 1104 NULL, /* lockfunc */ 1105 NULL, /* lockfuncarg */ 1106 &rxr->htag))) { 1107 device_printf(dev, "Unable to create RX DMA htag\n"); 1108 return (error); 1109 } 1110 1111 if ((error = bus_dma_tag_create(NULL, /* parent */ 1112 1, 0, /* alignment, bounds */ 1113 BUS_SPACE_MAXADDR, /* lowaddr */ 1114 BUS_SPACE_MAXADDR, /* highaddr */ 1115 NULL, NULL, /* filter, filterarg */ 1116 MJUM16BYTES, /* maxsize */ 1117 1, /* nsegments */ 1118 MJUM16BYTES, /* maxsegsize */ 1119 0, /* flags */ 1120 NULL, /* lockfunc */ 1121 NULL, /* lockfuncarg */ 1122 &rxr->ptag))) { 1123 device_printf(dev, "Unable to create RX DMA ptag\n"); 1124 return (error); 1125 } 1126 1127 for (i = 0; i < que->num_desc; i++) { 1128 buf = &rxr->buffers[i]; 1129 error = bus_dmamap_create(rxr->htag, 1130 BUS_DMA_NOWAIT, &buf->hmap); 1131 if (error) { 1132 device_printf(dev, "Unable to create RX head map\n"); 1133 break; 1134 } 1135 error = bus_dmamap_create(rxr->ptag, 1136 BUS_DMA_NOWAIT, &buf->pmap); 1137 if (error) { 1138 device_printf(dev, "Unable to create RX pkt map\n"); 1139 break; 1140 } 1141 } 1142 1143 return (error); 1144 } 1145 1146 1147 /********************************************************************* 1148 * 1149 * (Re)Initialize the queue receive ring and its buffers. 1150 * 1151 **********************************************************************/ 1152 int 1153 ixl_init_rx_ring(struct ixl_queue *que) 1154 { 1155 struct rx_ring *rxr = &que->rxr; 1156 struct ixl_vsi *vsi = que->vsi; 1157 #if defined(INET6) || defined(INET) 1158 struct ifnet *ifp = vsi->ifp; 1159 struct lro_ctrl *lro = &rxr->lro; 1160 #endif 1161 struct ixl_rx_buf *buf; 1162 bus_dma_segment_t pseg[1], hseg[1]; 1163 int rsize, nsegs, error = 0; 1164 #ifdef DEV_NETMAP 1165 struct netmap_adapter *na = NA(que->vsi->ifp); 1166 struct netmap_slot *slot; 1167 #endif /* DEV_NETMAP */ 1168 1169 IXL_RX_LOCK(rxr); 1170 #ifdef DEV_NETMAP 1171 /* same as in ixl_init_tx_ring() */ 1172 slot = netmap_reset(na, NR_RX, que->me, 0); 1173 #endif /* DEV_NETMAP */ 1174 /* Clear the ring contents */ 1175 rsize = roundup2(que->num_desc * 1176 sizeof(union i40e_rx_desc), DBA_ALIGN); 1177 bzero((void *)rxr->base, rsize); 1178 /* Cleanup any existing buffers */ 1179 for (int i = 0; i < que->num_desc; i++) { 1180 buf = &rxr->buffers[i]; 1181 if (buf->m_head != NULL) { 1182 bus_dmamap_sync(rxr->htag, buf->hmap, 1183 BUS_DMASYNC_POSTREAD); 1184 bus_dmamap_unload(rxr->htag, buf->hmap); 1185 buf->m_head->m_flags |= M_PKTHDR; 1186 m_freem(buf->m_head); 1187 } 1188 if (buf->m_pack != NULL) { 1189 bus_dmamap_sync(rxr->ptag, buf->pmap, 1190 BUS_DMASYNC_POSTREAD); 1191 bus_dmamap_unload(rxr->ptag, buf->pmap); 1192 buf->m_pack->m_flags |= M_PKTHDR; 1193 m_freem(buf->m_pack); 1194 } 1195 buf->m_head = NULL; 1196 buf->m_pack = NULL; 1197 } 1198 1199 /* header split is off */ 1200 rxr->hdr_split = FALSE; 1201 1202 /* Now replenish the mbufs */ 1203 for (int j = 0; j != que->num_desc; ++j) { 1204 struct mbuf *mh, *mp; 1205 1206 buf = &rxr->buffers[j]; 1207 #ifdef DEV_NETMAP 1208 /* 1209 * In netmap mode, fill the map and set the buffer 1210 * address in the NIC ring, considering the offset 1211 * between the netmap and NIC rings (see comment in 1212 * ixgbe_setup_transmit_ring() ). No need to allocate 1213 * an mbuf, so end the block with a continue; 1214 */ 1215 if (slot) { 1216 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1217 uint64_t paddr; 1218 void *addr; 1219 1220 addr = PNMB(na, slot + sj, &paddr); 1221 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1222 /* Update descriptor and the cached value */ 1223 rxr->base[j].read.pkt_addr = htole64(paddr); 1224 rxr->base[j].read.hdr_addr = 0; 1225 continue; 1226 } 1227 #endif /* DEV_NETMAP */ 1228 /* 1229 ** Don't allocate mbufs if not 1230 ** doing header split, its wasteful 1231 */ 1232 if (rxr->hdr_split == FALSE) 1233 goto skip_head; 1234 1235 /* First the header */ 1236 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1237 if (buf->m_head == NULL) { 1238 error = ENOBUFS; 1239 goto fail; 1240 } 1241 m_adj(buf->m_head, ETHER_ALIGN); 1242 mh = buf->m_head; 1243 mh->m_len = mh->m_pkthdr.len = MHLEN; 1244 mh->m_flags |= M_PKTHDR; 1245 /* Get the memory mapping */ 1246 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1247 buf->hmap, buf->m_head, hseg, 1248 &nsegs, BUS_DMA_NOWAIT); 1249 if (error != 0) /* Nothing elegant to do here */ 1250 goto fail; 1251 bus_dmamap_sync(rxr->htag, 1252 buf->hmap, BUS_DMASYNC_PREREAD); 1253 /* Update descriptor */ 1254 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1255 1256 skip_head: 1257 /* Now the payload cluster */ 1258 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1259 M_PKTHDR, rxr->mbuf_sz); 1260 if (buf->m_pack == NULL) { 1261 error = ENOBUFS; 1262 goto fail; 1263 } 1264 mp = buf->m_pack; 1265 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1266 /* Get the memory mapping */ 1267 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1268 buf->pmap, mp, pseg, 1269 &nsegs, BUS_DMA_NOWAIT); 1270 if (error != 0) 1271 goto fail; 1272 bus_dmamap_sync(rxr->ptag, 1273 buf->pmap, BUS_DMASYNC_PREREAD); 1274 /* Update descriptor */ 1275 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1276 rxr->base[j].read.hdr_addr = 0; 1277 } 1278 1279 1280 /* Setup our descriptor indices */ 1281 rxr->next_check = 0; 1282 rxr->next_refresh = 0; 1283 rxr->lro_enabled = FALSE; 1284 rxr->split = 0; 1285 rxr->bytes = 0; 1286 rxr->discard = FALSE; 1287 1288 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1289 ixl_flush(vsi->hw); 1290 1291 #if defined(INET6) || defined(INET) 1292 /* 1293 ** Now set up the LRO interface: 1294 */ 1295 if (ifp->if_capenable & IFCAP_LRO) { 1296 int err = tcp_lro_init(lro); 1297 if (err) { 1298 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1299 goto fail; 1300 } 1301 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1302 rxr->lro_enabled = TRUE; 1303 lro->ifp = vsi->ifp; 1304 } 1305 #endif 1306 1307 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1308 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1309 1310 fail: 1311 IXL_RX_UNLOCK(rxr); 1312 return (error); 1313 } 1314 1315 1316 /********************************************************************* 1317 * 1318 * Free station receive ring data structures 1319 * 1320 **********************************************************************/ 1321 void 1322 ixl_free_que_rx(struct ixl_queue *que) 1323 { 1324 struct rx_ring *rxr = &que->rxr; 1325 struct ixl_rx_buf *buf; 1326 1327 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1328 1329 /* Cleanup any existing buffers */ 1330 if (rxr->buffers != NULL) { 1331 for (int i = 0; i < que->num_desc; i++) { 1332 buf = &rxr->buffers[i]; 1333 if (buf->m_head != NULL) { 1334 bus_dmamap_sync(rxr->htag, buf->hmap, 1335 BUS_DMASYNC_POSTREAD); 1336 bus_dmamap_unload(rxr->htag, buf->hmap); 1337 buf->m_head->m_flags |= M_PKTHDR; 1338 m_freem(buf->m_head); 1339 } 1340 if (buf->m_pack != NULL) { 1341 bus_dmamap_sync(rxr->ptag, buf->pmap, 1342 BUS_DMASYNC_POSTREAD); 1343 bus_dmamap_unload(rxr->ptag, buf->pmap); 1344 buf->m_pack->m_flags |= M_PKTHDR; 1345 m_freem(buf->m_pack); 1346 } 1347 buf->m_head = NULL; 1348 buf->m_pack = NULL; 1349 if (buf->hmap != NULL) { 1350 bus_dmamap_destroy(rxr->htag, buf->hmap); 1351 buf->hmap = NULL; 1352 } 1353 if (buf->pmap != NULL) { 1354 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1355 buf->pmap = NULL; 1356 } 1357 } 1358 if (rxr->buffers != NULL) { 1359 free(rxr->buffers, M_DEVBUF); 1360 rxr->buffers = NULL; 1361 } 1362 } 1363 1364 if (rxr->htag != NULL) { 1365 bus_dma_tag_destroy(rxr->htag); 1366 rxr->htag = NULL; 1367 } 1368 if (rxr->ptag != NULL) { 1369 bus_dma_tag_destroy(rxr->ptag); 1370 rxr->ptag = NULL; 1371 } 1372 1373 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1374 return; 1375 } 1376 1377 static __inline void 1378 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1379 { 1380 1381 #if defined(INET6) || defined(INET) 1382 /* 1383 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1384 * should be computed by hardware. Also it should not have VLAN tag in 1385 * ethernet header. 1386 */ 1387 if (rxr->lro_enabled && 1388 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1389 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1390 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1391 /* 1392 * Send to the stack if: 1393 ** - LRO not enabled, or 1394 ** - no LRO resources, or 1395 ** - lro enqueue fails 1396 */ 1397 if (rxr->lro.lro_cnt != 0) 1398 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1399 return; 1400 } 1401 #endif 1402 IXL_RX_UNLOCK(rxr); 1403 (*ifp->if_input)(ifp, m); 1404 IXL_RX_LOCK(rxr); 1405 } 1406 1407 1408 static __inline void 1409 ixl_rx_discard(struct rx_ring *rxr, int i) 1410 { 1411 struct ixl_rx_buf *rbuf; 1412 1413 rbuf = &rxr->buffers[i]; 1414 1415 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1416 rbuf->fmp->m_flags |= M_PKTHDR; 1417 m_freem(rbuf->fmp); 1418 rbuf->fmp = NULL; 1419 } 1420 1421 /* 1422 ** With advanced descriptors the writeback 1423 ** clobbers the buffer addrs, so its easier 1424 ** to just free the existing mbufs and take 1425 ** the normal refresh path to get new buffers 1426 ** and mapping. 1427 */ 1428 if (rbuf->m_head) { 1429 m_free(rbuf->m_head); 1430 rbuf->m_head = NULL; 1431 } 1432 1433 if (rbuf->m_pack) { 1434 m_free(rbuf->m_pack); 1435 rbuf->m_pack = NULL; 1436 } 1437 1438 return; 1439 } 1440 1441 #ifdef RSS 1442 /* 1443 ** i40e_ptype_to_hash: parse the packet type 1444 ** to determine the appropriate hash. 1445 */ 1446 static inline int 1447 ixl_ptype_to_hash(u8 ptype) 1448 { 1449 struct i40e_rx_ptype_decoded decoded; 1450 u8 ex = 0; 1451 1452 decoded = decode_rx_desc_ptype(ptype); 1453 ex = decoded.outer_frag; 1454 1455 if (!decoded.known) 1456 return M_HASHTYPE_OPAQUE; 1457 1458 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1459 return M_HASHTYPE_OPAQUE; 1460 1461 /* Note: anything that gets to this point is IP */ 1462 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1463 switch (decoded.inner_prot) { 1464 case I40E_RX_PTYPE_INNER_PROT_TCP: 1465 if (ex) 1466 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1467 else 1468 return M_HASHTYPE_RSS_TCP_IPV6; 1469 case I40E_RX_PTYPE_INNER_PROT_UDP: 1470 if (ex) 1471 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1472 else 1473 return M_HASHTYPE_RSS_UDP_IPV6; 1474 default: 1475 if (ex) 1476 return M_HASHTYPE_RSS_IPV6_EX; 1477 else 1478 return M_HASHTYPE_RSS_IPV6; 1479 } 1480 } 1481 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1482 switch (decoded.inner_prot) { 1483 case I40E_RX_PTYPE_INNER_PROT_TCP: 1484 return M_HASHTYPE_RSS_TCP_IPV4; 1485 case I40E_RX_PTYPE_INNER_PROT_UDP: 1486 if (ex) 1487 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1488 else 1489 return M_HASHTYPE_RSS_UDP_IPV4; 1490 default: 1491 return M_HASHTYPE_RSS_IPV4; 1492 } 1493 } 1494 /* We should never get here!! */ 1495 return M_HASHTYPE_OPAQUE; 1496 } 1497 #endif /* RSS */ 1498 1499 /********************************************************************* 1500 * 1501 * This routine executes in interrupt context. It replenishes 1502 * the mbufs in the descriptor and sends data which has been 1503 * dma'ed into host memory to upper layer. 1504 * 1505 * We loop at most count times if count is > 0, or until done if 1506 * count < 0. 1507 * 1508 * Return TRUE for more work, FALSE for all clean. 1509 *********************************************************************/ 1510 bool 1511 ixl_rxeof(struct ixl_queue *que, int count) 1512 { 1513 struct ixl_vsi *vsi = que->vsi; 1514 struct rx_ring *rxr = &que->rxr; 1515 struct ifnet *ifp = vsi->ifp; 1516 #if defined(INET6) || defined(INET) 1517 struct lro_ctrl *lro = &rxr->lro; 1518 #endif 1519 int i, nextp, processed = 0; 1520 union i40e_rx_desc *cur; 1521 struct ixl_rx_buf *rbuf, *nbuf; 1522 1523 1524 IXL_RX_LOCK(rxr); 1525 1526 #ifdef DEV_NETMAP 1527 if (netmap_rx_irq(ifp, que->me, &count)) { 1528 IXL_RX_UNLOCK(rxr); 1529 return (FALSE); 1530 } 1531 #endif /* DEV_NETMAP */ 1532 1533 for (i = rxr->next_check; count != 0;) { 1534 struct mbuf *sendmp, *mh, *mp; 1535 u32 rsc, status, error; 1536 u16 hlen, plen, vtag; 1537 u64 qword; 1538 u8 ptype; 1539 bool eop; 1540 1541 /* Sync the ring. */ 1542 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1543 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1544 1545 cur = &rxr->base[i]; 1546 qword = le64toh(cur->wb.qword1.status_error_len); 1547 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1548 >> I40E_RXD_QW1_STATUS_SHIFT; 1549 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1550 >> I40E_RXD_QW1_ERROR_SHIFT; 1551 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1552 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1553 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1554 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1555 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1556 >> I40E_RXD_QW1_PTYPE_SHIFT; 1557 1558 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1559 ++rxr->not_done; 1560 break; 1561 } 1562 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1563 break; 1564 1565 count--; 1566 sendmp = NULL; 1567 nbuf = NULL; 1568 rsc = 0; 1569 cur->wb.qword1.status_error_len = 0; 1570 rbuf = &rxr->buffers[i]; 1571 mh = rbuf->m_head; 1572 mp = rbuf->m_pack; 1573 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1574 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1575 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1576 else 1577 vtag = 0; 1578 1579 /* 1580 ** Make sure bad packets are discarded, 1581 ** note that only EOP descriptor has valid 1582 ** error results. 1583 */ 1584 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1585 rxr->desc_errs++; 1586 ixl_rx_discard(rxr, i); 1587 goto next_desc; 1588 } 1589 1590 /* Prefetch the next buffer */ 1591 if (!eop) { 1592 nextp = i + 1; 1593 if (nextp == que->num_desc) 1594 nextp = 0; 1595 nbuf = &rxr->buffers[nextp]; 1596 prefetch(nbuf); 1597 } 1598 1599 /* 1600 ** The header mbuf is ONLY used when header 1601 ** split is enabled, otherwise we get normal 1602 ** behavior, ie, both header and payload 1603 ** are DMA'd into the payload buffer. 1604 ** 1605 ** Rather than using the fmp/lmp global pointers 1606 ** we now keep the head of a packet chain in the 1607 ** buffer struct and pass this along from one 1608 ** descriptor to the next, until we get EOP. 1609 */ 1610 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1611 if (hlen > IXL_RX_HDR) 1612 hlen = IXL_RX_HDR; 1613 mh->m_len = hlen; 1614 mh->m_flags |= M_PKTHDR; 1615 mh->m_next = NULL; 1616 mh->m_pkthdr.len = mh->m_len; 1617 /* Null buf pointer so it is refreshed */ 1618 rbuf->m_head = NULL; 1619 /* 1620 ** Check the payload length, this 1621 ** could be zero if its a small 1622 ** packet. 1623 */ 1624 if (plen > 0) { 1625 mp->m_len = plen; 1626 mp->m_next = NULL; 1627 mp->m_flags &= ~M_PKTHDR; 1628 mh->m_next = mp; 1629 mh->m_pkthdr.len += mp->m_len; 1630 /* Null buf pointer so it is refreshed */ 1631 rbuf->m_pack = NULL; 1632 rxr->split++; 1633 } 1634 /* 1635 ** Now create the forward 1636 ** chain so when complete 1637 ** we wont have to. 1638 */ 1639 if (eop == 0) { 1640 /* stash the chain head */ 1641 nbuf->fmp = mh; 1642 /* Make forward chain */ 1643 if (plen) 1644 mp->m_next = nbuf->m_pack; 1645 else 1646 mh->m_next = nbuf->m_pack; 1647 } else { 1648 /* Singlet, prepare to send */ 1649 sendmp = mh; 1650 if (vtag) { 1651 sendmp->m_pkthdr.ether_vtag = vtag; 1652 sendmp->m_flags |= M_VLANTAG; 1653 } 1654 } 1655 } else { 1656 /* 1657 ** Either no header split, or a 1658 ** secondary piece of a fragmented 1659 ** split packet. 1660 */ 1661 mp->m_len = plen; 1662 /* 1663 ** See if there is a stored head 1664 ** that determines what we are 1665 */ 1666 sendmp = rbuf->fmp; 1667 rbuf->m_pack = rbuf->fmp = NULL; 1668 1669 if (sendmp != NULL) /* secondary frag */ 1670 sendmp->m_pkthdr.len += mp->m_len; 1671 else { 1672 /* first desc of a non-ps chain */ 1673 sendmp = mp; 1674 sendmp->m_flags |= M_PKTHDR; 1675 sendmp->m_pkthdr.len = mp->m_len; 1676 if (vtag) { 1677 sendmp->m_pkthdr.ether_vtag = vtag; 1678 sendmp->m_flags |= M_VLANTAG; 1679 } 1680 } 1681 /* Pass the head pointer on */ 1682 if (eop == 0) { 1683 nbuf->fmp = sendmp; 1684 sendmp = NULL; 1685 mp->m_next = nbuf->m_pack; 1686 } 1687 } 1688 ++processed; 1689 /* Sending this frame? */ 1690 if (eop) { 1691 sendmp->m_pkthdr.rcvif = ifp; 1692 /* gather stats */ 1693 rxr->rx_packets++; 1694 rxr->rx_bytes += sendmp->m_pkthdr.len; 1695 /* capture data for dynamic ITR adjustment */ 1696 rxr->packets++; 1697 rxr->bytes += sendmp->m_pkthdr.len; 1698 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1699 ixl_rx_checksum(sendmp, status, error, ptype); 1700 #ifdef RSS 1701 sendmp->m_pkthdr.flowid = 1702 le32toh(cur->wb.qword0.hi_dword.rss); 1703 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1704 #else 1705 sendmp->m_pkthdr.flowid = que->msix; 1706 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1707 #endif 1708 } 1709 next_desc: 1710 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1711 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1712 1713 /* Advance our pointers to the next descriptor. */ 1714 if (++i == que->num_desc) 1715 i = 0; 1716 1717 /* Now send to the stack or do LRO */ 1718 if (sendmp != NULL) { 1719 rxr->next_check = i; 1720 ixl_rx_input(rxr, ifp, sendmp, ptype); 1721 i = rxr->next_check; 1722 } 1723 1724 /* Every 8 descriptors we go to refresh mbufs */ 1725 if (processed == 8) { 1726 ixl_refresh_mbufs(que, i); 1727 processed = 0; 1728 } 1729 } 1730 1731 /* Refresh any remaining buf structs */ 1732 if (ixl_rx_unrefreshed(que)) 1733 ixl_refresh_mbufs(que, i); 1734 1735 rxr->next_check = i; 1736 1737 #if defined(INET6) || defined(INET) 1738 /* 1739 * Flush any outstanding LRO work 1740 */ 1741 tcp_lro_flush_all(lro); 1742 #endif 1743 1744 IXL_RX_UNLOCK(rxr); 1745 return (FALSE); 1746 } 1747 1748 1749 /********************************************************************* 1750 * 1751 * Verify that the hardware indicated that the checksum is valid. 1752 * Inform the stack about the status of checksum so that stack 1753 * doesn't spend time verifying the checksum. 1754 * 1755 *********************************************************************/ 1756 static void 1757 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1758 { 1759 struct i40e_rx_ptype_decoded decoded; 1760 1761 decoded = decode_rx_desc_ptype(ptype); 1762 1763 /* Errors? */ 1764 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1765 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1766 mp->m_pkthdr.csum_flags = 0; 1767 return; 1768 } 1769 1770 /* IPv6 with extension headers likely have bad csum */ 1771 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1772 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1773 if (status & 1774 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1775 mp->m_pkthdr.csum_flags = 0; 1776 return; 1777 } 1778 1779 1780 /* IP Checksum Good */ 1781 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1782 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1783 1784 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1785 mp->m_pkthdr.csum_flags |= 1786 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1787 mp->m_pkthdr.csum_data |= htons(0xffff); 1788 } 1789 return; 1790 } 1791 1792 #if __FreeBSD_version >= 1100000 1793 uint64_t 1794 ixl_get_counter(if_t ifp, ift_counter cnt) 1795 { 1796 struct ixl_vsi *vsi; 1797 1798 vsi = if_getsoftc(ifp); 1799 1800 switch (cnt) { 1801 case IFCOUNTER_IPACKETS: 1802 return (vsi->ipackets); 1803 case IFCOUNTER_IERRORS: 1804 return (vsi->ierrors); 1805 case IFCOUNTER_OPACKETS: 1806 return (vsi->opackets); 1807 case IFCOUNTER_OERRORS: 1808 return (vsi->oerrors); 1809 case IFCOUNTER_COLLISIONS: 1810 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1811 return (0); 1812 case IFCOUNTER_IBYTES: 1813 return (vsi->ibytes); 1814 case IFCOUNTER_OBYTES: 1815 return (vsi->obytes); 1816 case IFCOUNTER_IMCASTS: 1817 return (vsi->imcasts); 1818 case IFCOUNTER_OMCASTS: 1819 return (vsi->omcasts); 1820 case IFCOUNTER_IQDROPS: 1821 return (vsi->iqdrops); 1822 case IFCOUNTER_OQDROPS: 1823 return (vsi->oqdrops); 1824 case IFCOUNTER_NOPROTO: 1825 return (vsi->noproto); 1826 default: 1827 return (if_get_counter_default(ifp, cnt)); 1828 } 1829 } 1830 #endif 1831 1832