1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the BASE and the VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static __inline void ixl_rx_discard(struct rx_ring *, int); 62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 #ifdef DEV_NETMAP 66 #include <dev/netmap/if_ixl_netmap.h> 67 #endif /* DEV_NETMAP */ 68 69 /* 70 ** Multiqueue Transmit driver 71 */ 72 int 73 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 74 { 75 struct ixl_vsi *vsi = ifp->if_softc; 76 struct ixl_queue *que; 77 struct tx_ring *txr; 78 int err, i; 79 #ifdef RSS 80 u32 bucket_id; 81 #endif 82 83 /* 84 ** Which queue to use: 85 ** 86 ** When doing RSS, map it to the same outbound 87 ** queue as the incoming flow would be mapped to. 88 ** If everything is setup correctly, it should be 89 ** the same bucket that the current CPU we're on is. 90 */ 91 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 92 #ifdef RSS 93 if (rss_hash2bucket(m->m_pkthdr.flowid, 94 M_HASHTYPE_GET(m), &bucket_id) == 0) { 95 i = bucket_id % vsi->num_queues; 96 } else 97 #endif 98 i = m->m_pkthdr.flowid % vsi->num_queues; 99 } else 100 i = curcpu % vsi->num_queues; 101 /* 102 ** This may not be perfect, but until something 103 ** better comes along it will keep from scheduling 104 ** on stalled queues. 105 */ 106 if (((1 << i) & vsi->active_queues) == 0) 107 i = ffsl(vsi->active_queues); 108 109 que = &vsi->queues[i]; 110 txr = &que->txr; 111 112 err = drbr_enqueue(ifp, txr->br, m); 113 if (err) 114 return (err); 115 if (IXL_TX_TRYLOCK(txr)) { 116 ixl_mq_start_locked(ifp, txr); 117 IXL_TX_UNLOCK(txr); 118 } else 119 taskqueue_enqueue(que->tq, &que->tx_task); 120 121 return (0); 122 } 123 124 int 125 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 126 { 127 struct ixl_queue *que = txr->que; 128 struct ixl_vsi *vsi = que->vsi; 129 struct mbuf *next; 130 int err = 0; 131 132 133 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 134 vsi->link_active == 0) 135 return (ENETDOWN); 136 137 /* Process the transmit queue */ 138 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 139 if ((err = ixl_xmit(que, &next)) != 0) { 140 if (next == NULL) 141 drbr_advance(ifp, txr->br); 142 else 143 drbr_putback(ifp, txr->br, next); 144 break; 145 } 146 drbr_advance(ifp, txr->br); 147 /* Send a copy of the frame to the BPF listener */ 148 ETHER_BPF_MTAP(ifp, next); 149 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 150 break; 151 } 152 153 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 154 ixl_txeof(que); 155 156 return (err); 157 } 158 159 /* 160 * Called from a taskqueue to drain queued transmit packets. 161 */ 162 void 163 ixl_deferred_mq_start(void *arg, int pending) 164 { 165 struct ixl_queue *que = arg; 166 struct tx_ring *txr = &que->txr; 167 struct ixl_vsi *vsi = que->vsi; 168 struct ifnet *ifp = vsi->ifp; 169 170 IXL_TX_LOCK(txr); 171 if (!drbr_empty(ifp, txr->br)) 172 ixl_mq_start_locked(ifp, txr); 173 IXL_TX_UNLOCK(txr); 174 } 175 176 /* 177 ** Flush all queue ring buffers 178 */ 179 void 180 ixl_qflush(struct ifnet *ifp) 181 { 182 struct ixl_vsi *vsi = ifp->if_softc; 183 184 for (int i = 0; i < vsi->num_queues; i++) { 185 struct ixl_queue *que = &vsi->queues[i]; 186 struct tx_ring *txr = &que->txr; 187 struct mbuf *m; 188 IXL_TX_LOCK(txr); 189 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 190 m_freem(m); 191 IXL_TX_UNLOCK(txr); 192 } 193 if_qflush(ifp); 194 } 195 196 /* 197 ** Find mbuf chains passed to the driver 198 ** that are 'sparse', using more than 8 199 ** mbufs to deliver an mss-size chunk of data 200 */ 201 static inline bool 202 ixl_tso_detect_sparse(struct mbuf *mp) 203 { 204 struct mbuf *m; 205 int num = 0, mss; 206 bool ret = FALSE; 207 208 mss = mp->m_pkthdr.tso_segsz; 209 for (m = mp->m_next; m != NULL; m = m->m_next) { 210 num++; 211 mss -= m->m_len; 212 if (mss < 1) 213 break; 214 if (m->m_next == NULL) 215 break; 216 } 217 if (num > IXL_SPARSE_CHAIN) 218 ret = TRUE; 219 220 return (ret); 221 } 222 223 224 /********************************************************************* 225 * 226 * This routine maps the mbufs to tx descriptors, allowing the 227 * TX engine to transmit the packets. 228 * - return 0 on success, positive on failure 229 * 230 **********************************************************************/ 231 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 232 233 static int 234 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 235 { 236 struct ixl_vsi *vsi = que->vsi; 237 struct i40e_hw *hw = vsi->hw; 238 struct tx_ring *txr = &que->txr; 239 struct ixl_tx_buf *buf; 240 struct i40e_tx_desc *txd = NULL; 241 struct mbuf *m_head, *m; 242 int i, j, error, nsegs, maxsegs; 243 int first, last = 0; 244 u16 vtag = 0; 245 u32 cmd, off; 246 bus_dmamap_t map; 247 bus_dma_tag_t tag; 248 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 249 250 251 cmd = off = 0; 252 m_head = *m_headp; 253 254 /* 255 * Important to capture the first descriptor 256 * used because it will contain the index of 257 * the one we tell the hardware to report back 258 */ 259 first = txr->next_avail; 260 buf = &txr->buffers[first]; 261 map = buf->map; 262 tag = txr->tx_tag; 263 maxsegs = IXL_MAX_TX_SEGS; 264 265 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 266 /* Use larger mapping for TSO */ 267 tag = txr->tso_tag; 268 maxsegs = IXL_MAX_TSO_SEGS; 269 if (ixl_tso_detect_sparse(m_head)) { 270 m = m_defrag(m_head, M_NOWAIT); 271 if (m == NULL) { 272 m_freem(*m_headp); 273 *m_headp = NULL; 274 return (ENOBUFS); 275 } 276 *m_headp = m; 277 } 278 } 279 280 /* 281 * Map the packet for DMA. 282 */ 283 error = bus_dmamap_load_mbuf_sg(tag, map, 284 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 285 286 if (error == EFBIG) { 287 struct mbuf *m; 288 289 m = m_defrag(*m_headp, M_NOWAIT); 290 if (m == NULL) { 291 que->mbuf_defrag_failed++; 292 m_freem(*m_headp); 293 *m_headp = NULL; 294 return (ENOBUFS); 295 } 296 *m_headp = m; 297 298 /* Try it again */ 299 error = bus_dmamap_load_mbuf_sg(tag, map, 300 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 301 302 if (error == ENOMEM) { 303 que->tx_dma_setup++; 304 return (error); 305 } else if (error != 0) { 306 que->tx_dma_setup++; 307 m_freem(*m_headp); 308 *m_headp = NULL; 309 return (error); 310 } 311 } else if (error == ENOMEM) { 312 que->tx_dma_setup++; 313 return (error); 314 } else if (error != 0) { 315 que->tx_dma_setup++; 316 m_freem(*m_headp); 317 *m_headp = NULL; 318 return (error); 319 } 320 321 /* Make certain there are enough descriptors */ 322 if (nsegs > txr->avail - 2) { 323 txr->no_desc++; 324 error = ENOBUFS; 325 goto xmit_fail; 326 } 327 m_head = *m_headp; 328 329 /* Set up the TSO/CSUM offload */ 330 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 331 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 332 if (error) 333 goto xmit_fail; 334 } 335 336 cmd |= I40E_TX_DESC_CMD_ICRC; 337 /* Grab the VLAN tag */ 338 if (m_head->m_flags & M_VLANTAG) { 339 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 340 vtag = htole16(m_head->m_pkthdr.ether_vtag); 341 } 342 343 i = txr->next_avail; 344 for (j = 0; j < nsegs; j++) { 345 bus_size_t seglen; 346 347 buf = &txr->buffers[i]; 348 buf->tag = tag; /* Keep track of the type tag */ 349 txd = &txr->base[i]; 350 seglen = segs[j].ds_len; 351 352 txd->buffer_addr = htole64(segs[j].ds_addr); 353 txd->cmd_type_offset_bsz = 354 htole64(I40E_TX_DESC_DTYPE_DATA 355 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 356 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 357 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 358 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 359 360 last = i; /* descriptor that will get completion IRQ */ 361 362 if (++i == que->num_desc) 363 i = 0; 364 365 buf->m_head = NULL; 366 buf->eop_index = -1; 367 } 368 /* Set the last descriptor for report */ 369 txd->cmd_type_offset_bsz |= 370 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 371 txr->avail -= nsegs; 372 txr->next_avail = i; 373 374 buf->m_head = m_head; 375 /* Swap the dma map between the first and last descriptor */ 376 txr->buffers[first].map = buf->map; 377 buf->map = map; 378 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 379 380 /* Set the index of the descriptor that will be marked done */ 381 buf = &txr->buffers[first]; 382 buf->eop_index = last; 383 384 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 385 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 386 /* 387 * Advance the Transmit Descriptor Tail (Tdt), this tells the 388 * hardware that this frame is available to transmit. 389 */ 390 ++txr->total_packets; 391 wr32(hw, txr->tail, i); 392 393 /* Mark outstanding work */ 394 if (que->busy == 0) 395 que->busy = 1; 396 return (0); 397 398 xmit_fail: 399 bus_dmamap_unload(tag, buf->map); 400 return (error); 401 } 402 403 404 /********************************************************************* 405 * 406 * Allocate memory for tx_buffer structures. The tx_buffer stores all 407 * the information needed to transmit a packet on the wire. This is 408 * called only once at attach, setup is done every reset. 409 * 410 **********************************************************************/ 411 int 412 ixl_allocate_tx_data(struct ixl_queue *que) 413 { 414 struct tx_ring *txr = &que->txr; 415 struct ixl_vsi *vsi = que->vsi; 416 device_t dev = vsi->dev; 417 struct ixl_tx_buf *buf; 418 int error = 0; 419 420 /* 421 * Setup DMA descriptor areas. 422 */ 423 if ((error = bus_dma_tag_create(NULL, /* parent */ 424 1, 0, /* alignment, bounds */ 425 BUS_SPACE_MAXADDR, /* lowaddr */ 426 BUS_SPACE_MAXADDR, /* highaddr */ 427 NULL, NULL, /* filter, filterarg */ 428 IXL_TSO_SIZE, /* maxsize */ 429 IXL_MAX_TX_SEGS, /* nsegments */ 430 PAGE_SIZE, /* maxsegsize */ 431 0, /* flags */ 432 NULL, /* lockfunc */ 433 NULL, /* lockfuncarg */ 434 &txr->tx_tag))) { 435 device_printf(dev,"Unable to allocate TX DMA tag\n"); 436 goto fail; 437 } 438 439 /* Make a special tag for TSO */ 440 if ((error = bus_dma_tag_create(NULL, /* parent */ 441 1, 0, /* alignment, bounds */ 442 BUS_SPACE_MAXADDR, /* lowaddr */ 443 BUS_SPACE_MAXADDR, /* highaddr */ 444 NULL, NULL, /* filter, filterarg */ 445 IXL_TSO_SIZE, /* maxsize */ 446 IXL_MAX_TSO_SEGS, /* nsegments */ 447 PAGE_SIZE, /* maxsegsize */ 448 0, /* flags */ 449 NULL, /* lockfunc */ 450 NULL, /* lockfuncarg */ 451 &txr->tso_tag))) { 452 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 453 goto fail; 454 } 455 456 if (!(txr->buffers = 457 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 458 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 459 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 460 error = ENOMEM; 461 goto fail; 462 } 463 464 /* Create the descriptor buffer default dma maps */ 465 buf = txr->buffers; 466 for (int i = 0; i < que->num_desc; i++, buf++) { 467 buf->tag = txr->tx_tag; 468 error = bus_dmamap_create(buf->tag, 0, &buf->map); 469 if (error != 0) { 470 device_printf(dev, "Unable to create TX DMA map\n"); 471 goto fail; 472 } 473 } 474 fail: 475 return (error); 476 } 477 478 479 /********************************************************************* 480 * 481 * (Re)Initialize a queue transmit ring. 482 * - called by init, it clears the descriptor ring, 483 * and frees any stale mbufs 484 * 485 **********************************************************************/ 486 void 487 ixl_init_tx_ring(struct ixl_queue *que) 488 { 489 #ifdef DEV_NETMAP 490 struct netmap_adapter *na = NA(que->vsi->ifp); 491 struct netmap_slot *slot; 492 #endif /* DEV_NETMAP */ 493 struct tx_ring *txr = &que->txr; 494 struct ixl_tx_buf *buf; 495 496 /* Clear the old ring contents */ 497 IXL_TX_LOCK(txr); 498 499 #ifdef DEV_NETMAP 500 /* 501 * (under lock): if in netmap mode, do some consistency 502 * checks and set slot to entry 0 of the netmap ring. 503 */ 504 slot = netmap_reset(na, NR_TX, que->me, 0); 505 #endif /* DEV_NETMAP */ 506 507 bzero((void *)txr->base, 508 (sizeof(struct i40e_tx_desc)) * que->num_desc); 509 510 /* Reset indices */ 511 txr->next_avail = 0; 512 txr->next_to_clean = 0; 513 514 #ifdef IXL_FDIR 515 /* Initialize flow director */ 516 txr->atr_rate = ixl_atr_rate; 517 txr->atr_count = 0; 518 #endif 519 520 /* Free any existing tx mbufs. */ 521 buf = txr->buffers; 522 for (int i = 0; i < que->num_desc; i++, buf++) { 523 if (buf->m_head != NULL) { 524 bus_dmamap_sync(buf->tag, buf->map, 525 BUS_DMASYNC_POSTWRITE); 526 bus_dmamap_unload(buf->tag, buf->map); 527 m_freem(buf->m_head); 528 buf->m_head = NULL; 529 } 530 #ifdef DEV_NETMAP 531 /* 532 * In netmap mode, set the map for the packet buffer. 533 * NOTE: Some drivers (not this one) also need to set 534 * the physical buffer address in the NIC ring. 535 * netmap_idx_n2k() maps a nic index, i, into the corresponding 536 * netmap slot index, si 537 */ 538 if (slot) { 539 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 540 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 541 } 542 #endif /* DEV_NETMAP */ 543 /* Clear the EOP index */ 544 buf->eop_index = -1; 545 } 546 547 /* Set number of descriptors available */ 548 txr->avail = que->num_desc; 549 550 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 551 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 552 IXL_TX_UNLOCK(txr); 553 } 554 555 556 /********************************************************************* 557 * 558 * Free transmit ring related data structures. 559 * 560 **********************************************************************/ 561 void 562 ixl_free_que_tx(struct ixl_queue *que) 563 { 564 struct tx_ring *txr = &que->txr; 565 struct ixl_tx_buf *buf; 566 567 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 568 569 for (int i = 0; i < que->num_desc; i++) { 570 buf = &txr->buffers[i]; 571 if (buf->m_head != NULL) { 572 bus_dmamap_sync(buf->tag, buf->map, 573 BUS_DMASYNC_POSTWRITE); 574 bus_dmamap_unload(buf->tag, 575 buf->map); 576 m_freem(buf->m_head); 577 buf->m_head = NULL; 578 if (buf->map != NULL) { 579 bus_dmamap_destroy(buf->tag, 580 buf->map); 581 buf->map = NULL; 582 } 583 } else if (buf->map != NULL) { 584 bus_dmamap_unload(buf->tag, 585 buf->map); 586 bus_dmamap_destroy(buf->tag, 587 buf->map); 588 buf->map = NULL; 589 } 590 } 591 if (txr->br != NULL) 592 buf_ring_free(txr->br, M_DEVBUF); 593 if (txr->buffers != NULL) { 594 free(txr->buffers, M_DEVBUF); 595 txr->buffers = NULL; 596 } 597 if (txr->tx_tag != NULL) { 598 bus_dma_tag_destroy(txr->tx_tag); 599 txr->tx_tag = NULL; 600 } 601 if (txr->tso_tag != NULL) { 602 bus_dma_tag_destroy(txr->tso_tag); 603 txr->tso_tag = NULL; 604 } 605 606 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 607 return; 608 } 609 610 /********************************************************************* 611 * 612 * Setup descriptor for hw offloads 613 * 614 **********************************************************************/ 615 616 static int 617 ixl_tx_setup_offload(struct ixl_queue *que, 618 struct mbuf *mp, u32 *cmd, u32 *off) 619 { 620 struct ether_vlan_header *eh; 621 #ifdef INET 622 struct ip *ip = NULL; 623 #endif 624 struct tcphdr *th = NULL; 625 #ifdef INET6 626 struct ip6_hdr *ip6; 627 #endif 628 int elen, ip_hlen = 0, tcp_hlen; 629 u16 etype; 630 u8 ipproto = 0; 631 bool tso = FALSE; 632 633 634 /* Set up the TSO context descriptor if required */ 635 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 636 tso = ixl_tso_setup(que, mp); 637 if (tso) 638 ++que->tso; 639 else 640 return (ENXIO); 641 } 642 643 /* 644 * Determine where frame payload starts. 645 * Jump over vlan headers if already present, 646 * helpful for QinQ too. 647 */ 648 eh = mtod(mp, struct ether_vlan_header *); 649 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 650 etype = ntohs(eh->evl_proto); 651 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 652 } else { 653 etype = ntohs(eh->evl_encap_proto); 654 elen = ETHER_HDR_LEN; 655 } 656 657 switch (etype) { 658 #ifdef INET 659 case ETHERTYPE_IP: 660 ip = (struct ip *)(mp->m_data + elen); 661 ip_hlen = ip->ip_hl << 2; 662 ipproto = ip->ip_p; 663 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 664 /* The IP checksum must be recalculated with TSO */ 665 if (tso) 666 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 667 else 668 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 669 break; 670 #endif 671 #ifdef INET6 672 case ETHERTYPE_IPV6: 673 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 674 ip_hlen = sizeof(struct ip6_hdr); 675 ipproto = ip6->ip6_nxt; 676 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 677 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 678 break; 679 #endif 680 default: 681 break; 682 } 683 684 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 685 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 686 687 switch (ipproto) { 688 case IPPROTO_TCP: 689 tcp_hlen = th->th_off << 2; 690 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 691 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 692 *off |= (tcp_hlen >> 2) << 693 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 694 } 695 #ifdef IXL_FDIR 696 ixl_atr(que, th, etype); 697 #endif 698 break; 699 case IPPROTO_UDP: 700 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 701 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 702 *off |= (sizeof(struct udphdr) >> 2) << 703 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 704 } 705 break; 706 707 case IPPROTO_SCTP: 708 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 709 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 710 *off |= (sizeof(struct sctphdr) >> 2) << 711 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 712 } 713 /* Fall Thru */ 714 default: 715 break; 716 } 717 718 return (0); 719 } 720 721 722 /********************************************************************** 723 * 724 * Setup context for hardware segmentation offload (TSO) 725 * 726 **********************************************************************/ 727 static bool 728 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 729 { 730 struct tx_ring *txr = &que->txr; 731 struct i40e_tx_context_desc *TXD; 732 struct ixl_tx_buf *buf; 733 u32 cmd, mss, type, tsolen; 734 u16 etype; 735 int idx, elen, ip_hlen, tcp_hlen; 736 struct ether_vlan_header *eh; 737 #ifdef INET 738 struct ip *ip; 739 #endif 740 #ifdef INET6 741 struct ip6_hdr *ip6; 742 #endif 743 #if defined(INET6) || defined(INET) 744 struct tcphdr *th; 745 #endif 746 u64 type_cmd_tso_mss; 747 748 /* 749 * Determine where frame payload starts. 750 * Jump over vlan headers if already present 751 */ 752 eh = mtod(mp, struct ether_vlan_header *); 753 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 754 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 755 etype = eh->evl_proto; 756 } else { 757 elen = ETHER_HDR_LEN; 758 etype = eh->evl_encap_proto; 759 } 760 761 switch (ntohs(etype)) { 762 #ifdef INET6 763 case ETHERTYPE_IPV6: 764 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 765 if (ip6->ip6_nxt != IPPROTO_TCP) 766 return (ENXIO); 767 ip_hlen = sizeof(struct ip6_hdr); 768 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 769 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 770 tcp_hlen = th->th_off << 2; 771 break; 772 #endif 773 #ifdef INET 774 case ETHERTYPE_IP: 775 ip = (struct ip *)(mp->m_data + elen); 776 if (ip->ip_p != IPPROTO_TCP) 777 return (ENXIO); 778 ip->ip_sum = 0; 779 ip_hlen = ip->ip_hl << 2; 780 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 781 th->th_sum = in_pseudo(ip->ip_src.s_addr, 782 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 783 tcp_hlen = th->th_off << 2; 784 break; 785 #endif 786 default: 787 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 788 __func__, ntohs(etype)); 789 return FALSE; 790 } 791 792 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 793 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 794 return FALSE; 795 796 idx = txr->next_avail; 797 buf = &txr->buffers[idx]; 798 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 799 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 800 801 type = I40E_TX_DESC_DTYPE_CONTEXT; 802 cmd = I40E_TX_CTX_DESC_TSO; 803 mss = mp->m_pkthdr.tso_segsz; 804 805 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 806 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 807 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 808 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 809 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 810 811 TXD->tunneling_params = htole32(0); 812 buf->m_head = NULL; 813 buf->eop_index = -1; 814 815 if (++idx == que->num_desc) 816 idx = 0; 817 818 txr->avail--; 819 txr->next_avail = idx; 820 821 return TRUE; 822 } 823 824 /* 825 ** ixl_get_tx_head - Retrieve the value from the 826 ** location the HW records its HEAD index 827 */ 828 static inline u32 829 ixl_get_tx_head(struct ixl_queue *que) 830 { 831 struct tx_ring *txr = &que->txr; 832 void *head = &txr->base[que->num_desc]; 833 return LE32_TO_CPU(*(volatile __le32 *)head); 834 } 835 836 /********************************************************************** 837 * 838 * Examine each tx_buffer in the used queue. If the hardware is done 839 * processing the packet then free associated resources. The 840 * tx_buffer is put back on the free queue. 841 * 842 **********************************************************************/ 843 bool 844 ixl_txeof(struct ixl_queue *que) 845 { 846 struct tx_ring *txr = &que->txr; 847 u32 first, last, head, done, processed; 848 struct ixl_tx_buf *buf; 849 struct i40e_tx_desc *tx_desc, *eop_desc; 850 851 852 mtx_assert(&txr->mtx, MA_OWNED); 853 854 #ifdef DEV_NETMAP 855 // XXX todo: implement moderation 856 if (netmap_tx_irq(que->vsi->ifp, que->me)) 857 return FALSE; 858 #endif /* DEF_NETMAP */ 859 860 /* These are not the descriptors you seek, move along :) */ 861 if (txr->avail == que->num_desc) { 862 que->busy = 0; 863 return FALSE; 864 } 865 866 processed = 0; 867 first = txr->next_to_clean; 868 buf = &txr->buffers[first]; 869 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 870 last = buf->eop_index; 871 if (last == -1) 872 return FALSE; 873 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 874 875 /* Get the Head WB value */ 876 head = ixl_get_tx_head(que); 877 878 /* 879 ** Get the index of the first descriptor 880 ** BEYOND the EOP and call that 'done'. 881 ** I do this so the comparison in the 882 ** inner while loop below can be simple 883 */ 884 if (++last == que->num_desc) last = 0; 885 done = last; 886 887 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 888 BUS_DMASYNC_POSTREAD); 889 /* 890 ** The HEAD index of the ring is written in a 891 ** defined location, this rather than a done bit 892 ** is what is used to keep track of what must be 893 ** 'cleaned'. 894 */ 895 while (first != head) { 896 /* We clean the range of the packet */ 897 while (first != done) { 898 ++txr->avail; 899 ++processed; 900 901 if (buf->m_head) { 902 txr->bytes += /* for ITR adjustment */ 903 buf->m_head->m_pkthdr.len; 904 txr->tx_bytes += /* for TX stats */ 905 buf->m_head->m_pkthdr.len; 906 bus_dmamap_sync(buf->tag, 907 buf->map, 908 BUS_DMASYNC_POSTWRITE); 909 bus_dmamap_unload(buf->tag, 910 buf->map); 911 m_freem(buf->m_head); 912 buf->m_head = NULL; 913 buf->map = NULL; 914 } 915 buf->eop_index = -1; 916 917 if (++first == que->num_desc) 918 first = 0; 919 920 buf = &txr->buffers[first]; 921 tx_desc = &txr->base[first]; 922 } 923 ++txr->packets; 924 /* See if there is more work now */ 925 last = buf->eop_index; 926 if (last != -1) { 927 eop_desc = &txr->base[last]; 928 /* Get next done point */ 929 if (++last == que->num_desc) last = 0; 930 done = last; 931 } else 932 break; 933 } 934 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 935 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 936 937 txr->next_to_clean = first; 938 939 940 /* 941 ** Hang detection, we know there's 942 ** work outstanding or the first return 943 ** would have been taken, so indicate an 944 ** unsuccessful pass, in local_timer if 945 ** the value is too great the queue will 946 ** be considered hung. If anything has been 947 ** cleaned then reset the state. 948 */ 949 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 950 ++que->busy; 951 952 if (processed) 953 que->busy = 1; /* Note this turns off HUNG */ 954 955 /* 956 * If there are no pending descriptors, clear the timeout. 957 */ 958 if (txr->avail == que->num_desc) { 959 que->busy = 0; 960 return FALSE; 961 } 962 963 return TRUE; 964 } 965 966 /********************************************************************* 967 * 968 * Refresh mbuf buffers for RX descriptor rings 969 * - now keeps its own state so discards due to resource 970 * exhaustion are unnecessary, if an mbuf cannot be obtained 971 * it just returns, keeping its placeholder, thus it can simply 972 * be recalled to try again. 973 * 974 **********************************************************************/ 975 static void 976 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 977 { 978 struct ixl_vsi *vsi = que->vsi; 979 struct rx_ring *rxr = &que->rxr; 980 bus_dma_segment_t hseg[1]; 981 bus_dma_segment_t pseg[1]; 982 struct ixl_rx_buf *buf; 983 struct mbuf *mh, *mp; 984 int i, j, nsegs, error; 985 bool refreshed = FALSE; 986 987 i = j = rxr->next_refresh; 988 /* Control the loop with one beyond */ 989 if (++j == que->num_desc) 990 j = 0; 991 992 while (j != limit) { 993 buf = &rxr->buffers[i]; 994 if (rxr->hdr_split == FALSE) 995 goto no_split; 996 997 if (buf->m_head == NULL) { 998 mh = m_gethdr(M_NOWAIT, MT_DATA); 999 if (mh == NULL) 1000 goto update; 1001 } else 1002 mh = buf->m_head; 1003 1004 mh->m_pkthdr.len = mh->m_len = MHLEN; 1005 mh->m_len = MHLEN; 1006 mh->m_flags |= M_PKTHDR; 1007 /* Get the memory mapping */ 1008 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1009 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1010 if (error != 0) { 1011 printf("Refresh mbufs: hdr dmamap load" 1012 " failure - %d\n", error); 1013 m_free(mh); 1014 buf->m_head = NULL; 1015 goto update; 1016 } 1017 buf->m_head = mh; 1018 bus_dmamap_sync(rxr->htag, buf->hmap, 1019 BUS_DMASYNC_PREREAD); 1020 rxr->base[i].read.hdr_addr = 1021 htole64(hseg[0].ds_addr); 1022 1023 no_split: 1024 if (buf->m_pack == NULL) { 1025 mp = m_getjcl(M_NOWAIT, MT_DATA, 1026 M_PKTHDR, rxr->mbuf_sz); 1027 if (mp == NULL) 1028 goto update; 1029 } else 1030 mp = buf->m_pack; 1031 1032 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1033 /* Get the memory mapping */ 1034 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1035 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1036 if (error != 0) { 1037 printf("Refresh mbufs: payload dmamap load" 1038 " failure - %d\n", error); 1039 m_free(mp); 1040 buf->m_pack = NULL; 1041 goto update; 1042 } 1043 buf->m_pack = mp; 1044 bus_dmamap_sync(rxr->ptag, buf->pmap, 1045 BUS_DMASYNC_PREREAD); 1046 rxr->base[i].read.pkt_addr = 1047 htole64(pseg[0].ds_addr); 1048 /* Used only when doing header split */ 1049 rxr->base[i].read.hdr_addr = 0; 1050 1051 refreshed = TRUE; 1052 /* Next is precalculated */ 1053 i = j; 1054 rxr->next_refresh = i; 1055 if (++j == que->num_desc) 1056 j = 0; 1057 } 1058 update: 1059 if (refreshed) /* Update hardware tail index */ 1060 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1061 return; 1062 } 1063 1064 1065 /********************************************************************* 1066 * 1067 * Allocate memory for rx_buffer structures. Since we use one 1068 * rx_buffer per descriptor, the maximum number of rx_buffer's 1069 * that we'll need is equal to the number of receive descriptors 1070 * that we've defined. 1071 * 1072 **********************************************************************/ 1073 int 1074 ixl_allocate_rx_data(struct ixl_queue *que) 1075 { 1076 struct rx_ring *rxr = &que->rxr; 1077 struct ixl_vsi *vsi = que->vsi; 1078 device_t dev = vsi->dev; 1079 struct ixl_rx_buf *buf; 1080 int i, bsize, error; 1081 1082 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1083 if (!(rxr->buffers = 1084 (struct ixl_rx_buf *) malloc(bsize, 1085 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1086 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1087 error = ENOMEM; 1088 return (error); 1089 } 1090 1091 if ((error = bus_dma_tag_create(NULL, /* parent */ 1092 1, 0, /* alignment, bounds */ 1093 BUS_SPACE_MAXADDR, /* lowaddr */ 1094 BUS_SPACE_MAXADDR, /* highaddr */ 1095 NULL, NULL, /* filter, filterarg */ 1096 MSIZE, /* maxsize */ 1097 1, /* nsegments */ 1098 MSIZE, /* maxsegsize */ 1099 0, /* flags */ 1100 NULL, /* lockfunc */ 1101 NULL, /* lockfuncarg */ 1102 &rxr->htag))) { 1103 device_printf(dev, "Unable to create RX DMA htag\n"); 1104 return (error); 1105 } 1106 1107 if ((error = bus_dma_tag_create(NULL, /* parent */ 1108 1, 0, /* alignment, bounds */ 1109 BUS_SPACE_MAXADDR, /* lowaddr */ 1110 BUS_SPACE_MAXADDR, /* highaddr */ 1111 NULL, NULL, /* filter, filterarg */ 1112 MJUM16BYTES, /* maxsize */ 1113 1, /* nsegments */ 1114 MJUM16BYTES, /* maxsegsize */ 1115 0, /* flags */ 1116 NULL, /* lockfunc */ 1117 NULL, /* lockfuncarg */ 1118 &rxr->ptag))) { 1119 device_printf(dev, "Unable to create RX DMA ptag\n"); 1120 return (error); 1121 } 1122 1123 for (i = 0; i < que->num_desc; i++) { 1124 buf = &rxr->buffers[i]; 1125 error = bus_dmamap_create(rxr->htag, 1126 BUS_DMA_NOWAIT, &buf->hmap); 1127 if (error) { 1128 device_printf(dev, "Unable to create RX head map\n"); 1129 break; 1130 } 1131 error = bus_dmamap_create(rxr->ptag, 1132 BUS_DMA_NOWAIT, &buf->pmap); 1133 if (error) { 1134 device_printf(dev, "Unable to create RX pkt map\n"); 1135 break; 1136 } 1137 } 1138 1139 return (error); 1140 } 1141 1142 1143 /********************************************************************* 1144 * 1145 * (Re)Initialize the queue receive ring and its buffers. 1146 * 1147 **********************************************************************/ 1148 int 1149 ixl_init_rx_ring(struct ixl_queue *que) 1150 { 1151 struct rx_ring *rxr = &que->rxr; 1152 struct ixl_vsi *vsi = que->vsi; 1153 #if defined(INET6) || defined(INET) 1154 struct ifnet *ifp = vsi->ifp; 1155 struct lro_ctrl *lro = &rxr->lro; 1156 #endif 1157 struct ixl_rx_buf *buf; 1158 bus_dma_segment_t pseg[1], hseg[1]; 1159 int rsize, nsegs, error = 0; 1160 #ifdef DEV_NETMAP 1161 struct netmap_adapter *na = NA(que->vsi->ifp); 1162 struct netmap_slot *slot; 1163 #endif /* DEV_NETMAP */ 1164 1165 IXL_RX_LOCK(rxr); 1166 #ifdef DEV_NETMAP 1167 /* same as in ixl_init_tx_ring() */ 1168 slot = netmap_reset(na, NR_RX, que->me, 0); 1169 #endif /* DEV_NETMAP */ 1170 /* Clear the ring contents */ 1171 rsize = roundup2(que->num_desc * 1172 sizeof(union i40e_rx_desc), DBA_ALIGN); 1173 bzero((void *)rxr->base, rsize); 1174 /* Cleanup any existing buffers */ 1175 for (int i = 0; i < que->num_desc; i++) { 1176 buf = &rxr->buffers[i]; 1177 if (buf->m_head != NULL) { 1178 bus_dmamap_sync(rxr->htag, buf->hmap, 1179 BUS_DMASYNC_POSTREAD); 1180 bus_dmamap_unload(rxr->htag, buf->hmap); 1181 buf->m_head->m_flags |= M_PKTHDR; 1182 m_freem(buf->m_head); 1183 } 1184 if (buf->m_pack != NULL) { 1185 bus_dmamap_sync(rxr->ptag, buf->pmap, 1186 BUS_DMASYNC_POSTREAD); 1187 bus_dmamap_unload(rxr->ptag, buf->pmap); 1188 buf->m_pack->m_flags |= M_PKTHDR; 1189 m_freem(buf->m_pack); 1190 } 1191 buf->m_head = NULL; 1192 buf->m_pack = NULL; 1193 } 1194 1195 /* header split is off */ 1196 rxr->hdr_split = FALSE; 1197 1198 /* Now replenish the mbufs */ 1199 for (int j = 0; j != que->num_desc; ++j) { 1200 struct mbuf *mh, *mp; 1201 1202 buf = &rxr->buffers[j]; 1203 #ifdef DEV_NETMAP 1204 /* 1205 * In netmap mode, fill the map and set the buffer 1206 * address in the NIC ring, considering the offset 1207 * between the netmap and NIC rings (see comment in 1208 * ixgbe_setup_transmit_ring() ). No need to allocate 1209 * an mbuf, so end the block with a continue; 1210 */ 1211 if (slot) { 1212 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1213 uint64_t paddr; 1214 void *addr; 1215 1216 addr = PNMB(na, slot + sj, &paddr); 1217 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1218 /* Update descriptor and the cached value */ 1219 rxr->base[j].read.pkt_addr = htole64(paddr); 1220 rxr->base[j].read.hdr_addr = 0; 1221 continue; 1222 } 1223 #endif /* DEV_NETMAP */ 1224 /* 1225 ** Don't allocate mbufs if not 1226 ** doing header split, its wasteful 1227 */ 1228 if (rxr->hdr_split == FALSE) 1229 goto skip_head; 1230 1231 /* First the header */ 1232 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1233 if (buf->m_head == NULL) { 1234 error = ENOBUFS; 1235 goto fail; 1236 } 1237 m_adj(buf->m_head, ETHER_ALIGN); 1238 mh = buf->m_head; 1239 mh->m_len = mh->m_pkthdr.len = MHLEN; 1240 mh->m_flags |= M_PKTHDR; 1241 /* Get the memory mapping */ 1242 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1243 buf->hmap, buf->m_head, hseg, 1244 &nsegs, BUS_DMA_NOWAIT); 1245 if (error != 0) /* Nothing elegant to do here */ 1246 goto fail; 1247 bus_dmamap_sync(rxr->htag, 1248 buf->hmap, BUS_DMASYNC_PREREAD); 1249 /* Update descriptor */ 1250 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1251 1252 skip_head: 1253 /* Now the payload cluster */ 1254 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1255 M_PKTHDR, rxr->mbuf_sz); 1256 if (buf->m_pack == NULL) { 1257 error = ENOBUFS; 1258 goto fail; 1259 } 1260 mp = buf->m_pack; 1261 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1262 /* Get the memory mapping */ 1263 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1264 buf->pmap, mp, pseg, 1265 &nsegs, BUS_DMA_NOWAIT); 1266 if (error != 0) 1267 goto fail; 1268 bus_dmamap_sync(rxr->ptag, 1269 buf->pmap, BUS_DMASYNC_PREREAD); 1270 /* Update descriptor */ 1271 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1272 rxr->base[j].read.hdr_addr = 0; 1273 } 1274 1275 1276 /* Setup our descriptor indices */ 1277 rxr->next_check = 0; 1278 rxr->next_refresh = 0; 1279 rxr->lro_enabled = FALSE; 1280 rxr->split = 0; 1281 rxr->bytes = 0; 1282 rxr->discard = FALSE; 1283 1284 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1285 ixl_flush(vsi->hw); 1286 1287 #if defined(INET6) || defined(INET) 1288 /* 1289 ** Now set up the LRO interface: 1290 */ 1291 if (ifp->if_capenable & IFCAP_LRO) { 1292 int err = tcp_lro_init(lro); 1293 if (err) { 1294 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1295 goto fail; 1296 } 1297 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1298 rxr->lro_enabled = TRUE; 1299 lro->ifp = vsi->ifp; 1300 } 1301 #endif 1302 1303 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1304 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1305 1306 fail: 1307 IXL_RX_UNLOCK(rxr); 1308 return (error); 1309 } 1310 1311 1312 /********************************************************************* 1313 * 1314 * Free station receive ring data structures 1315 * 1316 **********************************************************************/ 1317 void 1318 ixl_free_que_rx(struct ixl_queue *que) 1319 { 1320 struct rx_ring *rxr = &que->rxr; 1321 struct ixl_rx_buf *buf; 1322 1323 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1324 1325 /* Cleanup any existing buffers */ 1326 if (rxr->buffers != NULL) { 1327 for (int i = 0; i < que->num_desc; i++) { 1328 buf = &rxr->buffers[i]; 1329 if (buf->m_head != NULL) { 1330 bus_dmamap_sync(rxr->htag, buf->hmap, 1331 BUS_DMASYNC_POSTREAD); 1332 bus_dmamap_unload(rxr->htag, buf->hmap); 1333 buf->m_head->m_flags |= M_PKTHDR; 1334 m_freem(buf->m_head); 1335 } 1336 if (buf->m_pack != NULL) { 1337 bus_dmamap_sync(rxr->ptag, buf->pmap, 1338 BUS_DMASYNC_POSTREAD); 1339 bus_dmamap_unload(rxr->ptag, buf->pmap); 1340 buf->m_pack->m_flags |= M_PKTHDR; 1341 m_freem(buf->m_pack); 1342 } 1343 buf->m_head = NULL; 1344 buf->m_pack = NULL; 1345 if (buf->hmap != NULL) { 1346 bus_dmamap_destroy(rxr->htag, buf->hmap); 1347 buf->hmap = NULL; 1348 } 1349 if (buf->pmap != NULL) { 1350 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1351 buf->pmap = NULL; 1352 } 1353 } 1354 if (rxr->buffers != NULL) { 1355 free(rxr->buffers, M_DEVBUF); 1356 rxr->buffers = NULL; 1357 } 1358 } 1359 1360 if (rxr->htag != NULL) { 1361 bus_dma_tag_destroy(rxr->htag); 1362 rxr->htag = NULL; 1363 } 1364 if (rxr->ptag != NULL) { 1365 bus_dma_tag_destroy(rxr->ptag); 1366 rxr->ptag = NULL; 1367 } 1368 1369 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1370 return; 1371 } 1372 1373 static __inline void 1374 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1375 { 1376 1377 #if defined(INET6) || defined(INET) 1378 /* 1379 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1380 * should be computed by hardware. Also it should not have VLAN tag in 1381 * ethernet header. 1382 */ 1383 if (rxr->lro_enabled && 1384 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1385 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1386 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1387 /* 1388 * Send to the stack if: 1389 ** - LRO not enabled, or 1390 ** - no LRO resources, or 1391 ** - lro enqueue fails 1392 */ 1393 if (rxr->lro.lro_cnt != 0) 1394 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1395 return; 1396 } 1397 #endif 1398 IXL_RX_UNLOCK(rxr); 1399 (*ifp->if_input)(ifp, m); 1400 IXL_RX_LOCK(rxr); 1401 } 1402 1403 1404 static __inline void 1405 ixl_rx_discard(struct rx_ring *rxr, int i) 1406 { 1407 struct ixl_rx_buf *rbuf; 1408 1409 rbuf = &rxr->buffers[i]; 1410 1411 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1412 rbuf->fmp->m_flags |= M_PKTHDR; 1413 m_freem(rbuf->fmp); 1414 rbuf->fmp = NULL; 1415 } 1416 1417 /* 1418 ** With advanced descriptors the writeback 1419 ** clobbers the buffer addrs, so its easier 1420 ** to just free the existing mbufs and take 1421 ** the normal refresh path to get new buffers 1422 ** and mapping. 1423 */ 1424 if (rbuf->m_head) { 1425 m_free(rbuf->m_head); 1426 rbuf->m_head = NULL; 1427 } 1428 1429 if (rbuf->m_pack) { 1430 m_free(rbuf->m_pack); 1431 rbuf->m_pack = NULL; 1432 } 1433 1434 return; 1435 } 1436 1437 #ifdef RSS 1438 /* 1439 ** i40e_ptype_to_hash: parse the packet type 1440 ** to determine the appropriate hash. 1441 */ 1442 static inline int 1443 ixl_ptype_to_hash(u8 ptype) 1444 { 1445 struct i40e_rx_ptype_decoded decoded; 1446 u8 ex = 0; 1447 1448 decoded = decode_rx_desc_ptype(ptype); 1449 ex = decoded.outer_frag; 1450 1451 if (!decoded.known) 1452 return M_HASHTYPE_OPAQUE; 1453 1454 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1455 return M_HASHTYPE_OPAQUE; 1456 1457 /* Note: anything that gets to this point is IP */ 1458 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1459 switch (decoded.inner_prot) { 1460 case I40E_RX_PTYPE_INNER_PROT_TCP: 1461 if (ex) 1462 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1463 else 1464 return M_HASHTYPE_RSS_TCP_IPV6; 1465 case I40E_RX_PTYPE_INNER_PROT_UDP: 1466 if (ex) 1467 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1468 else 1469 return M_HASHTYPE_RSS_UDP_IPV6; 1470 default: 1471 if (ex) 1472 return M_HASHTYPE_RSS_IPV6_EX; 1473 else 1474 return M_HASHTYPE_RSS_IPV6; 1475 } 1476 } 1477 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1478 switch (decoded.inner_prot) { 1479 case I40E_RX_PTYPE_INNER_PROT_TCP: 1480 return M_HASHTYPE_RSS_TCP_IPV4; 1481 case I40E_RX_PTYPE_INNER_PROT_UDP: 1482 if (ex) 1483 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1484 else 1485 return M_HASHTYPE_RSS_UDP_IPV4; 1486 default: 1487 return M_HASHTYPE_RSS_IPV4; 1488 } 1489 } 1490 /* We should never get here!! */ 1491 return M_HASHTYPE_OPAQUE; 1492 } 1493 #endif /* RSS */ 1494 1495 /********************************************************************* 1496 * 1497 * This routine executes in interrupt context. It replenishes 1498 * the mbufs in the descriptor and sends data which has been 1499 * dma'ed into host memory to upper layer. 1500 * 1501 * We loop at most count times if count is > 0, or until done if 1502 * count < 0. 1503 * 1504 * Return TRUE for more work, FALSE for all clean. 1505 *********************************************************************/ 1506 bool 1507 ixl_rxeof(struct ixl_queue *que, int count) 1508 { 1509 struct ixl_vsi *vsi = que->vsi; 1510 struct rx_ring *rxr = &que->rxr; 1511 struct ifnet *ifp = vsi->ifp; 1512 #if defined(INET6) || defined(INET) 1513 struct lro_ctrl *lro = &rxr->lro; 1514 struct lro_entry *queued; 1515 #endif 1516 int i, nextp, processed = 0; 1517 union i40e_rx_desc *cur; 1518 struct ixl_rx_buf *rbuf, *nbuf; 1519 1520 1521 IXL_RX_LOCK(rxr); 1522 1523 #ifdef DEV_NETMAP 1524 if (netmap_rx_irq(ifp, que->me, &count)) { 1525 IXL_RX_UNLOCK(rxr); 1526 return (FALSE); 1527 } 1528 #endif /* DEV_NETMAP */ 1529 1530 for (i = rxr->next_check; count != 0;) { 1531 struct mbuf *sendmp, *mh, *mp; 1532 u32 rsc, status, error; 1533 u16 hlen, plen, vtag; 1534 u64 qword; 1535 u8 ptype; 1536 bool eop; 1537 1538 /* Sync the ring. */ 1539 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1540 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1541 1542 cur = &rxr->base[i]; 1543 qword = le64toh(cur->wb.qword1.status_error_len); 1544 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1545 >> I40E_RXD_QW1_STATUS_SHIFT; 1546 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1547 >> I40E_RXD_QW1_ERROR_SHIFT; 1548 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1549 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1550 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1551 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1552 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1553 >> I40E_RXD_QW1_PTYPE_SHIFT; 1554 1555 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1556 ++rxr->not_done; 1557 break; 1558 } 1559 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1560 break; 1561 1562 count--; 1563 sendmp = NULL; 1564 nbuf = NULL; 1565 rsc = 0; 1566 cur->wb.qword1.status_error_len = 0; 1567 rbuf = &rxr->buffers[i]; 1568 mh = rbuf->m_head; 1569 mp = rbuf->m_pack; 1570 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1571 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1572 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1573 else 1574 vtag = 0; 1575 1576 /* 1577 ** Make sure bad packets are discarded, 1578 ** note that only EOP descriptor has valid 1579 ** error results. 1580 */ 1581 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1582 rxr->discarded++; 1583 ixl_rx_discard(rxr, i); 1584 goto next_desc; 1585 } 1586 1587 /* Prefetch the next buffer */ 1588 if (!eop) { 1589 nextp = i + 1; 1590 if (nextp == que->num_desc) 1591 nextp = 0; 1592 nbuf = &rxr->buffers[nextp]; 1593 prefetch(nbuf); 1594 } 1595 1596 /* 1597 ** The header mbuf is ONLY used when header 1598 ** split is enabled, otherwise we get normal 1599 ** behavior, ie, both header and payload 1600 ** are DMA'd into the payload buffer. 1601 ** 1602 ** Rather than using the fmp/lmp global pointers 1603 ** we now keep the head of a packet chain in the 1604 ** buffer struct and pass this along from one 1605 ** descriptor to the next, until we get EOP. 1606 */ 1607 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1608 if (hlen > IXL_RX_HDR) 1609 hlen = IXL_RX_HDR; 1610 mh->m_len = hlen; 1611 mh->m_flags |= M_PKTHDR; 1612 mh->m_next = NULL; 1613 mh->m_pkthdr.len = mh->m_len; 1614 /* Null buf pointer so it is refreshed */ 1615 rbuf->m_head = NULL; 1616 /* 1617 ** Check the payload length, this 1618 ** could be zero if its a small 1619 ** packet. 1620 */ 1621 if (plen > 0) { 1622 mp->m_len = plen; 1623 mp->m_next = NULL; 1624 mp->m_flags &= ~M_PKTHDR; 1625 mh->m_next = mp; 1626 mh->m_pkthdr.len += mp->m_len; 1627 /* Null buf pointer so it is refreshed */ 1628 rbuf->m_pack = NULL; 1629 rxr->split++; 1630 } 1631 /* 1632 ** Now create the forward 1633 ** chain so when complete 1634 ** we wont have to. 1635 */ 1636 if (eop == 0) { 1637 /* stash the chain head */ 1638 nbuf->fmp = mh; 1639 /* Make forward chain */ 1640 if (plen) 1641 mp->m_next = nbuf->m_pack; 1642 else 1643 mh->m_next = nbuf->m_pack; 1644 } else { 1645 /* Singlet, prepare to send */ 1646 sendmp = mh; 1647 if (vtag) { 1648 sendmp->m_pkthdr.ether_vtag = vtag; 1649 sendmp->m_flags |= M_VLANTAG; 1650 } 1651 } 1652 } else { 1653 /* 1654 ** Either no header split, or a 1655 ** secondary piece of a fragmented 1656 ** split packet. 1657 */ 1658 mp->m_len = plen; 1659 /* 1660 ** See if there is a stored head 1661 ** that determines what we are 1662 */ 1663 sendmp = rbuf->fmp; 1664 rbuf->m_pack = rbuf->fmp = NULL; 1665 1666 if (sendmp != NULL) /* secondary frag */ 1667 sendmp->m_pkthdr.len += mp->m_len; 1668 else { 1669 /* first desc of a non-ps chain */ 1670 sendmp = mp; 1671 sendmp->m_flags |= M_PKTHDR; 1672 sendmp->m_pkthdr.len = mp->m_len; 1673 if (vtag) { 1674 sendmp->m_pkthdr.ether_vtag = vtag; 1675 sendmp->m_flags |= M_VLANTAG; 1676 } 1677 } 1678 /* Pass the head pointer on */ 1679 if (eop == 0) { 1680 nbuf->fmp = sendmp; 1681 sendmp = NULL; 1682 mp->m_next = nbuf->m_pack; 1683 } 1684 } 1685 ++processed; 1686 /* Sending this frame? */ 1687 if (eop) { 1688 sendmp->m_pkthdr.rcvif = ifp; 1689 /* gather stats */ 1690 rxr->rx_packets++; 1691 rxr->rx_bytes += sendmp->m_pkthdr.len; 1692 /* capture data for dynamic ITR adjustment */ 1693 rxr->packets++; 1694 rxr->bytes += sendmp->m_pkthdr.len; 1695 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1696 ixl_rx_checksum(sendmp, status, error, ptype); 1697 #ifdef RSS 1698 sendmp->m_pkthdr.flowid = 1699 le32toh(cur->wb.qword0.hi_dword.rss); 1700 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1701 #else 1702 sendmp->m_pkthdr.flowid = que->msix; 1703 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1704 #endif 1705 } 1706 next_desc: 1707 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1708 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1709 1710 /* Advance our pointers to the next descriptor. */ 1711 if (++i == que->num_desc) 1712 i = 0; 1713 1714 /* Now send to the stack or do LRO */ 1715 if (sendmp != NULL) { 1716 rxr->next_check = i; 1717 ixl_rx_input(rxr, ifp, sendmp, ptype); 1718 i = rxr->next_check; 1719 } 1720 1721 /* Every 8 descriptors we go to refresh mbufs */ 1722 if (processed == 8) { 1723 ixl_refresh_mbufs(que, i); 1724 processed = 0; 1725 } 1726 } 1727 1728 /* Refresh any remaining buf structs */ 1729 if (ixl_rx_unrefreshed(que)) 1730 ixl_refresh_mbufs(que, i); 1731 1732 rxr->next_check = i; 1733 1734 #if defined(INET6) || defined(INET) 1735 /* 1736 * Flush any outstanding LRO work 1737 */ 1738 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1739 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1740 tcp_lro_flush(lro, queued); 1741 } 1742 #endif 1743 1744 IXL_RX_UNLOCK(rxr); 1745 return (FALSE); 1746 } 1747 1748 1749 /********************************************************************* 1750 * 1751 * Verify that the hardware indicated that the checksum is valid. 1752 * Inform the stack about the status of checksum so that stack 1753 * doesn't spend time verifying the checksum. 1754 * 1755 *********************************************************************/ 1756 static void 1757 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1758 { 1759 struct i40e_rx_ptype_decoded decoded; 1760 1761 decoded = decode_rx_desc_ptype(ptype); 1762 1763 /* Errors? */ 1764 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1765 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1766 mp->m_pkthdr.csum_flags = 0; 1767 return; 1768 } 1769 1770 /* IPv6 with extension headers likely have bad csum */ 1771 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1772 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1773 if (status & 1774 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1775 mp->m_pkthdr.csum_flags = 0; 1776 return; 1777 } 1778 1779 1780 /* IP Checksum Good */ 1781 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1782 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1783 1784 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1785 mp->m_pkthdr.csum_flags |= 1786 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1787 mp->m_pkthdr.csum_data |= htons(0xffff); 1788 } 1789 return; 1790 } 1791 1792 #if __FreeBSD_version >= 1100000 1793 uint64_t 1794 ixl_get_counter(if_t ifp, ift_counter cnt) 1795 { 1796 struct ixl_vsi *vsi; 1797 1798 vsi = if_getsoftc(ifp); 1799 1800 switch (cnt) { 1801 case IFCOUNTER_IPACKETS: 1802 return (vsi->ipackets); 1803 case IFCOUNTER_IERRORS: 1804 return (vsi->ierrors); 1805 case IFCOUNTER_OPACKETS: 1806 return (vsi->opackets); 1807 case IFCOUNTER_OERRORS: 1808 return (vsi->oerrors); 1809 case IFCOUNTER_COLLISIONS: 1810 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1811 return (0); 1812 case IFCOUNTER_IBYTES: 1813 return (vsi->ibytes); 1814 case IFCOUNTER_OBYTES: 1815 return (vsi->obytes); 1816 case IFCOUNTER_IMCASTS: 1817 return (vsi->imcasts); 1818 case IFCOUNTER_OMCASTS: 1819 return (vsi->omcasts); 1820 case IFCOUNTER_IQDROPS: 1821 return (vsi->iqdrops); 1822 case IFCOUNTER_OQDROPS: 1823 return (vsi->oqdrops); 1824 case IFCOUNTER_NOPROTO: 1825 return (vsi->noproto); 1826 default: 1827 return (if_get_counter_default(ifp, cnt)); 1828 } 1829 } 1830 #endif 1831 1832