1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the BASE and the VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static __inline void ixl_rx_discard(struct rx_ring *, int); 62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 #ifdef DEV_NETMAP 66 #include <dev/netmap/if_ixl_netmap.h> 67 #endif /* DEV_NETMAP */ 68 69 /* 70 ** Multiqueue Transmit driver 71 ** 72 */ 73 int 74 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 75 { 76 struct ixl_vsi *vsi = ifp->if_softc; 77 struct ixl_queue *que; 78 struct tx_ring *txr; 79 int err, i; 80 #ifdef RSS 81 u32 bucket_id; 82 #endif 83 84 /* 85 ** Which queue to use: 86 ** 87 ** When doing RSS, map it to the same outbound 88 ** queue as the incoming flow would be mapped to. 89 ** If everything is setup correctly, it should be 90 ** the same bucket that the current CPU we're on is. 91 */ 92 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 93 #ifdef RSS 94 if (rss_hash2bucket(m->m_pkthdr.flowid, 95 M_HASHTYPE_GET(m), &bucket_id) == 0) { 96 i = bucket_id % vsi->num_queues; 97 } else 98 #endif 99 i = m->m_pkthdr.flowid % vsi->num_queues; 100 } else 101 i = curcpu % vsi->num_queues; 102 /* 103 ** This may not be perfect, but until something 104 ** better comes along it will keep from scheduling 105 ** on stalled queues. 106 */ 107 if (((1 << i) & vsi->active_queues) == 0) 108 i = ffsl(vsi->active_queues); 109 110 que = &vsi->queues[i]; 111 txr = &que->txr; 112 113 err = drbr_enqueue(ifp, txr->br, m); 114 if (err) 115 return(err); 116 if (IXL_TX_TRYLOCK(txr)) { 117 ixl_mq_start_locked(ifp, txr); 118 IXL_TX_UNLOCK(txr); 119 } else 120 taskqueue_enqueue(que->tq, &que->tx_task); 121 122 return (0); 123 } 124 125 int 126 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 127 { 128 struct ixl_queue *que = txr->que; 129 struct ixl_vsi *vsi = que->vsi; 130 struct mbuf *next; 131 int err = 0; 132 133 134 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 135 vsi->link_active == 0) 136 return (ENETDOWN); 137 138 /* Process the transmit queue */ 139 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 140 if ((err = ixl_xmit(que, &next)) != 0) { 141 if (next == NULL) 142 drbr_advance(ifp, txr->br); 143 else 144 drbr_putback(ifp, txr->br, next); 145 break; 146 } 147 drbr_advance(ifp, txr->br); 148 /* Send a copy of the frame to the BPF listener */ 149 ETHER_BPF_MTAP(ifp, next); 150 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 151 break; 152 } 153 154 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 155 ixl_txeof(que); 156 157 return (err); 158 } 159 160 /* 161 * Called from a taskqueue to drain queued transmit packets. 162 */ 163 void 164 ixl_deferred_mq_start(void *arg, int pending) 165 { 166 struct ixl_queue *que = arg; 167 struct tx_ring *txr = &que->txr; 168 struct ixl_vsi *vsi = que->vsi; 169 struct ifnet *ifp = vsi->ifp; 170 171 IXL_TX_LOCK(txr); 172 if (!drbr_empty(ifp, txr->br)) 173 ixl_mq_start_locked(ifp, txr); 174 IXL_TX_UNLOCK(txr); 175 } 176 177 /* 178 ** Flush all queue ring buffers 179 */ 180 void 181 ixl_qflush(struct ifnet *ifp) 182 { 183 struct ixl_vsi *vsi = ifp->if_softc; 184 185 for (int i = 0; i < vsi->num_queues; i++) { 186 struct ixl_queue *que = &vsi->queues[i]; 187 struct tx_ring *txr = &que->txr; 188 struct mbuf *m; 189 IXL_TX_LOCK(txr); 190 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 191 m_freem(m); 192 IXL_TX_UNLOCK(txr); 193 } 194 if_qflush(ifp); 195 } 196 197 /* 198 ** Find mbuf chains passed to the driver 199 ** that are 'sparse', using more than 8 200 ** mbufs to deliver an mss-size chunk of data 201 */ 202 static inline bool 203 ixl_tso_detect_sparse(struct mbuf *mp) 204 { 205 struct mbuf *m; 206 int num = 0, mss; 207 bool ret = FALSE; 208 209 mss = mp->m_pkthdr.tso_segsz; 210 for (m = mp->m_next; m != NULL; m = m->m_next) { 211 num++; 212 mss -= m->m_len; 213 if (mss < 1) 214 break; 215 if (m->m_next == NULL) 216 break; 217 } 218 if (num > IXL_SPARSE_CHAIN) 219 ret = TRUE; 220 221 return (ret); 222 } 223 224 225 /********************************************************************* 226 * 227 * This routine maps the mbufs to tx descriptors, allowing the 228 * TX engine to transmit the packets. 229 * - return 0 on success, positive on failure 230 * 231 **********************************************************************/ 232 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 233 234 static int 235 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 236 { 237 struct ixl_vsi *vsi = que->vsi; 238 struct i40e_hw *hw = vsi->hw; 239 struct tx_ring *txr = &que->txr; 240 struct ixl_tx_buf *buf; 241 struct i40e_tx_desc *txd = NULL; 242 struct mbuf *m_head, *m; 243 int i, j, error, nsegs, maxsegs; 244 int first, last = 0; 245 u16 vtag = 0; 246 u32 cmd, off; 247 bus_dmamap_t map; 248 bus_dma_tag_t tag; 249 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 250 251 252 cmd = off = 0; 253 m_head = *m_headp; 254 255 /* 256 * Important to capture the first descriptor 257 * used because it will contain the index of 258 * the one we tell the hardware to report back 259 */ 260 first = txr->next_avail; 261 buf = &txr->buffers[first]; 262 map = buf->map; 263 tag = txr->tx_tag; 264 maxsegs = IXL_MAX_TX_SEGS; 265 266 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 267 /* Use larger mapping for TSO */ 268 tag = txr->tso_tag; 269 maxsegs = IXL_MAX_TSO_SEGS; 270 if (ixl_tso_detect_sparse(m_head)) { 271 m = m_defrag(m_head, M_NOWAIT); 272 if (m == NULL) { 273 m_freem(*m_headp); 274 *m_headp = NULL; 275 return (ENOBUFS); 276 } 277 *m_headp = m; 278 } 279 } 280 281 /* 282 * Map the packet for DMA. 283 */ 284 error = bus_dmamap_load_mbuf_sg(tag, map, 285 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 286 287 if (error == EFBIG) { 288 struct mbuf *m; 289 290 m = m_collapse(*m_headp, M_NOWAIT, maxsegs); 291 if (m == NULL) { 292 que->mbuf_defrag_failed++; 293 m_freem(*m_headp); 294 *m_headp = NULL; 295 return (ENOBUFS); 296 } 297 *m_headp = m; 298 299 /* Try it again */ 300 error = bus_dmamap_load_mbuf_sg(tag, map, 301 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 302 303 if (error == ENOMEM) { 304 que->tx_dma_setup++; 305 return (error); 306 } else if (error != 0) { 307 que->tx_dma_setup++; 308 m_freem(*m_headp); 309 *m_headp = NULL; 310 return (error); 311 } 312 } else if (error == ENOMEM) { 313 que->tx_dma_setup++; 314 return (error); 315 } else if (error != 0) { 316 que->tx_dma_setup++; 317 m_freem(*m_headp); 318 *m_headp = NULL; 319 return (error); 320 } 321 322 /* Make certain there are enough descriptors */ 323 if (nsegs > txr->avail - 2) { 324 txr->no_desc++; 325 error = ENOBUFS; 326 goto xmit_fail; 327 } 328 m_head = *m_headp; 329 330 /* Set up the TSO/CSUM offload */ 331 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 332 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 333 if (error) 334 goto xmit_fail; 335 } 336 337 cmd |= I40E_TX_DESC_CMD_ICRC; 338 /* Grab the VLAN tag */ 339 if (m_head->m_flags & M_VLANTAG) { 340 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 341 vtag = htole16(m_head->m_pkthdr.ether_vtag); 342 } 343 344 i = txr->next_avail; 345 for (j = 0; j < nsegs; j++) { 346 bus_size_t seglen; 347 348 buf = &txr->buffers[i]; 349 buf->tag = tag; /* Keep track of the type tag */ 350 txd = &txr->base[i]; 351 seglen = segs[j].ds_len; 352 353 txd->buffer_addr = htole64(segs[j].ds_addr); 354 txd->cmd_type_offset_bsz = 355 htole64(I40E_TX_DESC_DTYPE_DATA 356 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 357 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 358 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 359 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 360 361 last = i; /* descriptor that will get completion IRQ */ 362 363 if (++i == que->num_desc) 364 i = 0; 365 366 buf->m_head = NULL; 367 buf->eop_index = -1; 368 } 369 /* Set the last descriptor for report */ 370 txd->cmd_type_offset_bsz |= 371 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 372 txr->avail -= nsegs; 373 txr->next_avail = i; 374 375 buf->m_head = m_head; 376 /* Swap the dma map between the first and last descriptor */ 377 txr->buffers[first].map = buf->map; 378 buf->map = map; 379 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 380 381 /* Set the index of the descriptor that will be marked done */ 382 buf = &txr->buffers[first]; 383 buf->eop_index = last; 384 385 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 386 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 387 /* 388 * Advance the Transmit Descriptor Tail (Tdt), this tells the 389 * hardware that this frame is available to transmit. 390 */ 391 ++txr->total_packets; 392 wr32(hw, txr->tail, i); 393 394 ixl_flush(hw); 395 /* Mark outstanding work */ 396 if (que->busy == 0) 397 que->busy = 1; 398 return (0); 399 400 xmit_fail: 401 bus_dmamap_unload(tag, buf->map); 402 return (error); 403 } 404 405 406 /********************************************************************* 407 * 408 * Allocate memory for tx_buffer structures. The tx_buffer stores all 409 * the information needed to transmit a packet on the wire. This is 410 * called only once at attach, setup is done every reset. 411 * 412 **********************************************************************/ 413 int 414 ixl_allocate_tx_data(struct ixl_queue *que) 415 { 416 struct tx_ring *txr = &que->txr; 417 struct ixl_vsi *vsi = que->vsi; 418 device_t dev = vsi->dev; 419 struct ixl_tx_buf *buf; 420 int error = 0; 421 422 /* 423 * Setup DMA descriptor areas. 424 */ 425 if ((error = bus_dma_tag_create(NULL, /* parent */ 426 1, 0, /* alignment, bounds */ 427 BUS_SPACE_MAXADDR, /* lowaddr */ 428 BUS_SPACE_MAXADDR, /* highaddr */ 429 NULL, NULL, /* filter, filterarg */ 430 IXL_TSO_SIZE, /* maxsize */ 431 IXL_MAX_TX_SEGS, /* nsegments */ 432 PAGE_SIZE, /* maxsegsize */ 433 0, /* flags */ 434 NULL, /* lockfunc */ 435 NULL, /* lockfuncarg */ 436 &txr->tx_tag))) { 437 device_printf(dev,"Unable to allocate TX DMA tag\n"); 438 goto fail; 439 } 440 441 /* Make a special tag for TSO */ 442 if ((error = bus_dma_tag_create(NULL, /* parent */ 443 1, 0, /* alignment, bounds */ 444 BUS_SPACE_MAXADDR, /* lowaddr */ 445 BUS_SPACE_MAXADDR, /* highaddr */ 446 NULL, NULL, /* filter, filterarg */ 447 IXL_TSO_SIZE, /* maxsize */ 448 IXL_MAX_TSO_SEGS, /* nsegments */ 449 PAGE_SIZE, /* maxsegsize */ 450 0, /* flags */ 451 NULL, /* lockfunc */ 452 NULL, /* lockfuncarg */ 453 &txr->tso_tag))) { 454 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 455 goto fail; 456 } 457 458 if (!(txr->buffers = 459 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 460 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 461 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 462 error = ENOMEM; 463 goto fail; 464 } 465 466 /* Create the descriptor buffer default dma maps */ 467 buf = txr->buffers; 468 for (int i = 0; i < que->num_desc; i++, buf++) { 469 buf->tag = txr->tx_tag; 470 error = bus_dmamap_create(buf->tag, 0, &buf->map); 471 if (error != 0) { 472 device_printf(dev, "Unable to create TX DMA map\n"); 473 goto fail; 474 } 475 } 476 fail: 477 return (error); 478 } 479 480 481 /********************************************************************* 482 * 483 * (Re)Initialize a queue transmit ring. 484 * - called by init, it clears the descriptor ring, 485 * and frees any stale mbufs 486 * 487 **********************************************************************/ 488 void 489 ixl_init_tx_ring(struct ixl_queue *que) 490 { 491 struct tx_ring *txr = &que->txr; 492 struct ixl_tx_buf *buf; 493 #ifdef DEV_NETMAP 494 struct netmap_adapter *na = NA(que->vsi->ifp); 495 struct netmap_slot *slot; 496 #endif /* DEV_NETMAP */ 497 498 /* Clear the old ring contents */ 499 IXL_TX_LOCK(txr); 500 #ifdef DEV_NETMAP 501 /* 502 * (under lock): if in netmap mode, do some consistency 503 * checks and set slot to entry 0 of the netmap ring. 504 */ 505 slot = netmap_reset(na, NR_TX, que->me, 0); 506 #endif /* DEV_NETMAP */ 507 508 bzero((void *)txr->base, 509 (sizeof(struct i40e_tx_desc)) * que->num_desc); 510 511 /* Reset indices */ 512 txr->next_avail = 0; 513 txr->next_to_clean = 0; 514 515 #ifdef IXL_FDIR 516 /* Initialize flow director */ 517 txr->atr_rate = ixl_atr_rate; 518 txr->atr_count = 0; 519 #endif 520 521 /* Free any existing tx mbufs. */ 522 buf = txr->buffers; 523 for (int i = 0; i < que->num_desc; i++, buf++) { 524 if (buf->m_head != NULL) { 525 bus_dmamap_sync(buf->tag, buf->map, 526 BUS_DMASYNC_POSTWRITE); 527 bus_dmamap_unload(buf->tag, buf->map); 528 m_freem(buf->m_head); 529 buf->m_head = NULL; 530 } 531 #ifdef DEV_NETMAP 532 /* 533 * In netmap mode, set the map for the packet buffer. 534 * NOTE: Some drivers (not this one) also need to set 535 * the physical buffer address in the NIC ring. 536 * netmap_idx_n2k() maps a nic index, i, into the corresponding 537 * netmap slot index, si 538 */ 539 if (slot) { 540 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 541 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 542 } 543 #endif /* DEV_NETMAP */ 544 /* Clear the EOP index */ 545 buf->eop_index = -1; 546 } 547 548 /* Set number of descriptors available */ 549 txr->avail = que->num_desc; 550 551 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 552 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 553 IXL_TX_UNLOCK(txr); 554 } 555 556 557 /********************************************************************* 558 * 559 * Free transmit ring related data structures. 560 * 561 **********************************************************************/ 562 void 563 ixl_free_que_tx(struct ixl_queue *que) 564 { 565 struct tx_ring *txr = &que->txr; 566 struct ixl_tx_buf *buf; 567 568 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 569 570 for (int i = 0; i < que->num_desc; i++) { 571 buf = &txr->buffers[i]; 572 if (buf->m_head != NULL) { 573 bus_dmamap_sync(buf->tag, buf->map, 574 BUS_DMASYNC_POSTWRITE); 575 bus_dmamap_unload(buf->tag, 576 buf->map); 577 m_freem(buf->m_head); 578 buf->m_head = NULL; 579 if (buf->map != NULL) { 580 bus_dmamap_destroy(buf->tag, 581 buf->map); 582 buf->map = NULL; 583 } 584 } else if (buf->map != NULL) { 585 bus_dmamap_unload(buf->tag, 586 buf->map); 587 bus_dmamap_destroy(buf->tag, 588 buf->map); 589 buf->map = NULL; 590 } 591 } 592 if (txr->br != NULL) 593 buf_ring_free(txr->br, M_DEVBUF); 594 if (txr->buffers != NULL) { 595 free(txr->buffers, M_DEVBUF); 596 txr->buffers = NULL; 597 } 598 if (txr->tx_tag != NULL) { 599 bus_dma_tag_destroy(txr->tx_tag); 600 txr->tx_tag = NULL; 601 } 602 if (txr->tso_tag != NULL) { 603 bus_dma_tag_destroy(txr->tso_tag); 604 txr->tso_tag = NULL; 605 } 606 607 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 608 return; 609 } 610 611 /********************************************************************* 612 * 613 * Setup descriptor for hw offloads 614 * 615 **********************************************************************/ 616 617 static int 618 ixl_tx_setup_offload(struct ixl_queue *que, 619 struct mbuf *mp, u32 *cmd, u32 *off) 620 { 621 struct ether_vlan_header *eh; 622 #ifdef INET 623 struct ip *ip = NULL; 624 #endif 625 struct tcphdr *th = NULL; 626 #ifdef INET6 627 struct ip6_hdr *ip6; 628 #endif 629 int elen, ip_hlen = 0, tcp_hlen; 630 u16 etype; 631 u8 ipproto = 0; 632 bool tso = FALSE; 633 634 635 /* Set up the TSO context descriptor if required */ 636 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 637 tso = ixl_tso_setup(que, mp); 638 if (tso) 639 ++que->tso; 640 else 641 return (ENXIO); 642 } 643 644 /* 645 * Determine where frame payload starts. 646 * Jump over vlan headers if already present, 647 * helpful for QinQ too. 648 */ 649 eh = mtod(mp, struct ether_vlan_header *); 650 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 651 etype = ntohs(eh->evl_proto); 652 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 653 } else { 654 etype = ntohs(eh->evl_encap_proto); 655 elen = ETHER_HDR_LEN; 656 } 657 658 switch (etype) { 659 #ifdef INET 660 case ETHERTYPE_IP: 661 ip = (struct ip *)(mp->m_data + elen); 662 ip_hlen = ip->ip_hl << 2; 663 ipproto = ip->ip_p; 664 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 665 /* The IP checksum must be recalculated with TSO */ 666 if (tso) 667 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 668 else 669 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 670 break; 671 #endif 672 #ifdef INET6 673 case ETHERTYPE_IPV6: 674 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 675 ip_hlen = sizeof(struct ip6_hdr); 676 ipproto = ip6->ip6_nxt; 677 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 679 break; 680 #endif 681 default: 682 break; 683 } 684 685 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 686 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 687 688 switch (ipproto) { 689 case IPPROTO_TCP: 690 tcp_hlen = th->th_off << 2; 691 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 692 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 693 *off |= (tcp_hlen >> 2) << 694 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 695 } 696 #ifdef IXL_FDIR 697 ixl_atr(que, th, etype); 698 #endif 699 break; 700 case IPPROTO_UDP: 701 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 702 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 703 *off |= (sizeof(struct udphdr) >> 2) << 704 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 705 } 706 break; 707 708 case IPPROTO_SCTP: 709 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 710 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 711 *off |= (sizeof(struct sctphdr) >> 2) << 712 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 713 } 714 /* Fall Thru */ 715 default: 716 break; 717 } 718 719 return (0); 720 } 721 722 723 /********************************************************************** 724 * 725 * Setup context for hardware segmentation offload (TSO) 726 * 727 **********************************************************************/ 728 static bool 729 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 730 { 731 struct tx_ring *txr = &que->txr; 732 struct i40e_tx_context_desc *TXD; 733 struct ixl_tx_buf *buf; 734 u32 cmd, mss, type, tsolen; 735 u16 etype; 736 int idx, elen, ip_hlen, tcp_hlen; 737 struct ether_vlan_header *eh; 738 #ifdef INET 739 struct ip *ip; 740 #endif 741 #ifdef INET6 742 struct ip6_hdr *ip6; 743 #endif 744 #if defined(INET6) || defined(INET) 745 struct tcphdr *th; 746 #endif 747 u64 type_cmd_tso_mss; 748 749 /* 750 * Determine where frame payload starts. 751 * Jump over vlan headers if already present 752 */ 753 eh = mtod(mp, struct ether_vlan_header *); 754 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 755 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 756 etype = eh->evl_proto; 757 } else { 758 elen = ETHER_HDR_LEN; 759 etype = eh->evl_encap_proto; 760 } 761 762 switch (ntohs(etype)) { 763 #ifdef INET6 764 case ETHERTYPE_IPV6: 765 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 766 if (ip6->ip6_nxt != IPPROTO_TCP) 767 return (ENXIO); 768 ip_hlen = sizeof(struct ip6_hdr); 769 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 770 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 771 tcp_hlen = th->th_off << 2; 772 break; 773 #endif 774 #ifdef INET 775 case ETHERTYPE_IP: 776 ip = (struct ip *)(mp->m_data + elen); 777 if (ip->ip_p != IPPROTO_TCP) 778 return (ENXIO); 779 ip->ip_sum = 0; 780 ip_hlen = ip->ip_hl << 2; 781 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 782 th->th_sum = in_pseudo(ip->ip_src.s_addr, 783 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 784 tcp_hlen = th->th_off << 2; 785 break; 786 #endif 787 default: 788 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 789 __func__, ntohs(etype)); 790 return FALSE; 791 } 792 793 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 794 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 795 return FALSE; 796 797 idx = txr->next_avail; 798 buf = &txr->buffers[idx]; 799 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 800 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 801 802 type = I40E_TX_DESC_DTYPE_CONTEXT; 803 cmd = I40E_TX_CTX_DESC_TSO; 804 mss = mp->m_pkthdr.tso_segsz; 805 806 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 807 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 808 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 809 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 810 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 811 812 TXD->tunneling_params = htole32(0); 813 buf->m_head = NULL; 814 buf->eop_index = -1; 815 816 if (++idx == que->num_desc) 817 idx = 0; 818 819 txr->avail--; 820 txr->next_avail = idx; 821 822 return TRUE; 823 } 824 825 /* 826 ** ixl_get_tx_head - Retrieve the value from the 827 ** location the HW records its HEAD index 828 */ 829 static inline u32 830 ixl_get_tx_head(struct ixl_queue *que) 831 { 832 struct tx_ring *txr = &que->txr; 833 void *head = &txr->base[que->num_desc]; 834 return LE32_TO_CPU(*(volatile __le32 *)head); 835 } 836 837 /********************************************************************** 838 * 839 * Examine each tx_buffer in the used queue. If the hardware is done 840 * processing the packet then free associated resources. The 841 * tx_buffer is put back on the free queue. 842 * 843 **********************************************************************/ 844 bool 845 ixl_txeof(struct ixl_queue *que) 846 { 847 struct tx_ring *txr = &que->txr; 848 u32 first, last, head, done, processed; 849 struct ixl_tx_buf *buf; 850 struct i40e_tx_desc *tx_desc, *eop_desc; 851 852 853 mtx_assert(&txr->mtx, MA_OWNED); 854 855 #ifdef DEV_NETMAP 856 // XXX todo: implement moderation 857 if (netmap_tx_irq(que->vsi->ifp, que->me)) 858 return FALSE; 859 #endif /* DEF_NETMAP */ 860 861 /* These are not the descriptors you seek, move along :) */ 862 if (txr->avail == que->num_desc) { 863 que->busy = 0; 864 return FALSE; 865 } 866 867 processed = 0; 868 first = txr->next_to_clean; 869 buf = &txr->buffers[first]; 870 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 871 last = buf->eop_index; 872 if (last == -1) 873 return FALSE; 874 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 875 876 /* Get the Head WB value */ 877 head = ixl_get_tx_head(que); 878 879 /* 880 ** Get the index of the first descriptor 881 ** BEYOND the EOP and call that 'done'. 882 ** I do this so the comparison in the 883 ** inner while loop below can be simple 884 */ 885 if (++last == que->num_desc) last = 0; 886 done = last; 887 888 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 889 BUS_DMASYNC_POSTREAD); 890 /* 891 ** The HEAD index of the ring is written in a 892 ** defined location, this rather than a done bit 893 ** is what is used to keep track of what must be 894 ** 'cleaned'. 895 */ 896 while (first != head) { 897 /* We clean the range of the packet */ 898 while (first != done) { 899 ++txr->avail; 900 ++processed; 901 902 if (buf->m_head) { 903 txr->bytes += /* for ITR adjustment */ 904 buf->m_head->m_pkthdr.len; 905 txr->tx_bytes += /* for TX stats */ 906 buf->m_head->m_pkthdr.len; 907 bus_dmamap_sync(buf->tag, 908 buf->map, 909 BUS_DMASYNC_POSTWRITE); 910 bus_dmamap_unload(buf->tag, 911 buf->map); 912 m_freem(buf->m_head); 913 buf->m_head = NULL; 914 buf->map = NULL; 915 } 916 buf->eop_index = -1; 917 918 if (++first == que->num_desc) 919 first = 0; 920 921 buf = &txr->buffers[first]; 922 tx_desc = &txr->base[first]; 923 } 924 ++txr->packets; 925 /* See if there is more work now */ 926 last = buf->eop_index; 927 if (last != -1) { 928 eop_desc = &txr->base[last]; 929 /* Get next done point */ 930 if (++last == que->num_desc) last = 0; 931 done = last; 932 } else 933 break; 934 } 935 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 936 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 937 938 txr->next_to_clean = first; 939 940 941 /* 942 ** Hang detection, we know there's 943 ** work outstanding or the first return 944 ** would have been taken, so indicate an 945 ** unsuccessful pass, in local_timer if 946 ** the value is too great the queue will 947 ** be considered hung. If anything has been 948 ** cleaned then reset the state. 949 */ 950 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 951 ++que->busy; 952 953 if (processed) 954 que->busy = 1; /* Note this turns off HUNG */ 955 956 /* 957 * If there are no pending descriptors, clear the timeout. 958 */ 959 if (txr->avail == que->num_desc) { 960 que->busy = 0; 961 return FALSE; 962 } 963 964 return TRUE; 965 } 966 967 /********************************************************************* 968 * 969 * Refresh mbuf buffers for RX descriptor rings 970 * - now keeps its own state so discards due to resource 971 * exhaustion are unnecessary, if an mbuf cannot be obtained 972 * it just returns, keeping its placeholder, thus it can simply 973 * be recalled to try again. 974 * 975 **********************************************************************/ 976 static void 977 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 978 { 979 struct ixl_vsi *vsi = que->vsi; 980 struct rx_ring *rxr = &que->rxr; 981 bus_dma_segment_t hseg[1]; 982 bus_dma_segment_t pseg[1]; 983 struct ixl_rx_buf *buf; 984 struct mbuf *mh, *mp; 985 int i, j, nsegs, error; 986 bool refreshed = FALSE; 987 988 i = j = rxr->next_refresh; 989 /* Control the loop with one beyond */ 990 if (++j == que->num_desc) 991 j = 0; 992 993 while (j != limit) { 994 buf = &rxr->buffers[i]; 995 if (rxr->hdr_split == FALSE) 996 goto no_split; 997 998 if (buf->m_head == NULL) { 999 mh = m_gethdr(M_NOWAIT, MT_DATA); 1000 if (mh == NULL) 1001 goto update; 1002 } else 1003 mh = buf->m_head; 1004 1005 mh->m_pkthdr.len = mh->m_len = MHLEN; 1006 mh->m_len = MHLEN; 1007 mh->m_flags |= M_PKTHDR; 1008 /* Get the memory mapping */ 1009 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1010 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1011 if (error != 0) { 1012 printf("Refresh mbufs: hdr dmamap load" 1013 " failure - %d\n", error); 1014 m_free(mh); 1015 buf->m_head = NULL; 1016 goto update; 1017 } 1018 buf->m_head = mh; 1019 bus_dmamap_sync(rxr->htag, buf->hmap, 1020 BUS_DMASYNC_PREREAD); 1021 rxr->base[i].read.hdr_addr = 1022 htole64(hseg[0].ds_addr); 1023 1024 no_split: 1025 if (buf->m_pack == NULL) { 1026 mp = m_getjcl(M_NOWAIT, MT_DATA, 1027 M_PKTHDR, rxr->mbuf_sz); 1028 if (mp == NULL) 1029 goto update; 1030 } else 1031 mp = buf->m_pack; 1032 1033 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1034 /* Get the memory mapping */ 1035 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1036 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1037 if (error != 0) { 1038 printf("Refresh mbufs: payload dmamap load" 1039 " failure - %d\n", error); 1040 m_free(mp); 1041 buf->m_pack = NULL; 1042 goto update; 1043 } 1044 buf->m_pack = mp; 1045 bus_dmamap_sync(rxr->ptag, buf->pmap, 1046 BUS_DMASYNC_PREREAD); 1047 rxr->base[i].read.pkt_addr = 1048 htole64(pseg[0].ds_addr); 1049 /* Used only when doing header split */ 1050 rxr->base[i].read.hdr_addr = 0; 1051 1052 refreshed = TRUE; 1053 /* Next is precalculated */ 1054 i = j; 1055 rxr->next_refresh = i; 1056 if (++j == que->num_desc) 1057 j = 0; 1058 } 1059 update: 1060 if (refreshed) /* Update hardware tail index */ 1061 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1062 return; 1063 } 1064 1065 1066 /********************************************************************* 1067 * 1068 * Allocate memory for rx_buffer structures. Since we use one 1069 * rx_buffer per descriptor, the maximum number of rx_buffer's 1070 * that we'll need is equal to the number of receive descriptors 1071 * that we've defined. 1072 * 1073 **********************************************************************/ 1074 int 1075 ixl_allocate_rx_data(struct ixl_queue *que) 1076 { 1077 struct rx_ring *rxr = &que->rxr; 1078 struct ixl_vsi *vsi = que->vsi; 1079 device_t dev = vsi->dev; 1080 struct ixl_rx_buf *buf; 1081 int i, bsize, error; 1082 1083 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1084 if (!(rxr->buffers = 1085 (struct ixl_rx_buf *) malloc(bsize, 1086 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1087 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1088 error = ENOMEM; 1089 return (error); 1090 } 1091 1092 if ((error = bus_dma_tag_create(NULL, /* parent */ 1093 1, 0, /* alignment, bounds */ 1094 BUS_SPACE_MAXADDR, /* lowaddr */ 1095 BUS_SPACE_MAXADDR, /* highaddr */ 1096 NULL, NULL, /* filter, filterarg */ 1097 MSIZE, /* maxsize */ 1098 1, /* nsegments */ 1099 MSIZE, /* maxsegsize */ 1100 0, /* flags */ 1101 NULL, /* lockfunc */ 1102 NULL, /* lockfuncarg */ 1103 &rxr->htag))) { 1104 device_printf(dev, "Unable to create RX DMA htag\n"); 1105 return (error); 1106 } 1107 1108 if ((error = bus_dma_tag_create(NULL, /* parent */ 1109 1, 0, /* alignment, bounds */ 1110 BUS_SPACE_MAXADDR, /* lowaddr */ 1111 BUS_SPACE_MAXADDR, /* highaddr */ 1112 NULL, NULL, /* filter, filterarg */ 1113 MJUM16BYTES, /* maxsize */ 1114 1, /* nsegments */ 1115 MJUM16BYTES, /* maxsegsize */ 1116 0, /* flags */ 1117 NULL, /* lockfunc */ 1118 NULL, /* lockfuncarg */ 1119 &rxr->ptag))) { 1120 device_printf(dev, "Unable to create RX DMA ptag\n"); 1121 return (error); 1122 } 1123 1124 for (i = 0; i < que->num_desc; i++) { 1125 buf = &rxr->buffers[i]; 1126 error = bus_dmamap_create(rxr->htag, 1127 BUS_DMA_NOWAIT, &buf->hmap); 1128 if (error) { 1129 device_printf(dev, "Unable to create RX head map\n"); 1130 break; 1131 } 1132 error = bus_dmamap_create(rxr->ptag, 1133 BUS_DMA_NOWAIT, &buf->pmap); 1134 if (error) { 1135 device_printf(dev, "Unable to create RX pkt map\n"); 1136 break; 1137 } 1138 } 1139 1140 return (error); 1141 } 1142 1143 1144 /********************************************************************* 1145 * 1146 * (Re)Initialize the queue receive ring and its buffers. 1147 * 1148 **********************************************************************/ 1149 int 1150 ixl_init_rx_ring(struct ixl_queue *que) 1151 { 1152 struct rx_ring *rxr = &que->rxr; 1153 struct ixl_vsi *vsi = que->vsi; 1154 #if defined(INET6) || defined(INET) 1155 struct ifnet *ifp = vsi->ifp; 1156 struct lro_ctrl *lro = &rxr->lro; 1157 #endif 1158 struct ixl_rx_buf *buf; 1159 bus_dma_segment_t pseg[1], hseg[1]; 1160 int rsize, nsegs, error = 0; 1161 #ifdef DEV_NETMAP 1162 struct netmap_adapter *na = NA(que->vsi->ifp); 1163 struct netmap_slot *slot; 1164 #endif /* DEV_NETMAP */ 1165 1166 IXL_RX_LOCK(rxr); 1167 #ifdef DEV_NETMAP 1168 /* same as in ixl_init_tx_ring() */ 1169 slot = netmap_reset(na, NR_RX, que->me, 0); 1170 #endif /* DEV_NETMAP */ 1171 /* Clear the ring contents */ 1172 rsize = roundup2(que->num_desc * 1173 sizeof(union i40e_rx_desc), DBA_ALIGN); 1174 bzero((void *)rxr->base, rsize); 1175 /* Cleanup any existing buffers */ 1176 for (int i = 0; i < que->num_desc; i++) { 1177 buf = &rxr->buffers[i]; 1178 if (buf->m_head != NULL) { 1179 bus_dmamap_sync(rxr->htag, buf->hmap, 1180 BUS_DMASYNC_POSTREAD); 1181 bus_dmamap_unload(rxr->htag, buf->hmap); 1182 buf->m_head->m_flags |= M_PKTHDR; 1183 m_freem(buf->m_head); 1184 } 1185 if (buf->m_pack != NULL) { 1186 bus_dmamap_sync(rxr->ptag, buf->pmap, 1187 BUS_DMASYNC_POSTREAD); 1188 bus_dmamap_unload(rxr->ptag, buf->pmap); 1189 buf->m_pack->m_flags |= M_PKTHDR; 1190 m_freem(buf->m_pack); 1191 } 1192 buf->m_head = NULL; 1193 buf->m_pack = NULL; 1194 } 1195 1196 /* header split is off */ 1197 rxr->hdr_split = FALSE; 1198 1199 /* Now replenish the mbufs */ 1200 for (int j = 0; j != que->num_desc; ++j) { 1201 struct mbuf *mh, *mp; 1202 1203 buf = &rxr->buffers[j]; 1204 #ifdef DEV_NETMAP 1205 /* 1206 * In netmap mode, fill the map and set the buffer 1207 * address in the NIC ring, considering the offset 1208 * between the netmap and NIC rings (see comment in 1209 * ixgbe_setup_transmit_ring() ). No need to allocate 1210 * an mbuf, so end the block with a continue; 1211 */ 1212 if (slot) { 1213 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1214 uint64_t paddr; 1215 void *addr; 1216 1217 addr = PNMB(na, slot + sj, &paddr); 1218 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1219 /* Update descriptor and the cached value */ 1220 rxr->base[j].read.pkt_addr = htole64(paddr); 1221 rxr->base[j].read.hdr_addr = 0; 1222 continue; 1223 } 1224 #endif /* DEV_NETMAP */ 1225 1226 /* 1227 ** Don't allocate mbufs if not 1228 ** doing header split, its wasteful 1229 */ 1230 if (rxr->hdr_split == FALSE) 1231 goto skip_head; 1232 1233 /* First the header */ 1234 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1235 if (buf->m_head == NULL) { 1236 error = ENOBUFS; 1237 goto fail; 1238 } 1239 m_adj(buf->m_head, ETHER_ALIGN); 1240 mh = buf->m_head; 1241 mh->m_len = mh->m_pkthdr.len = MHLEN; 1242 mh->m_flags |= M_PKTHDR; 1243 /* Get the memory mapping */ 1244 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1245 buf->hmap, buf->m_head, hseg, 1246 &nsegs, BUS_DMA_NOWAIT); 1247 if (error != 0) /* Nothing elegant to do here */ 1248 goto fail; 1249 bus_dmamap_sync(rxr->htag, 1250 buf->hmap, BUS_DMASYNC_PREREAD); 1251 /* Update descriptor */ 1252 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1253 1254 skip_head: 1255 /* Now the payload cluster */ 1256 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1257 M_PKTHDR, rxr->mbuf_sz); 1258 if (buf->m_pack == NULL) { 1259 error = ENOBUFS; 1260 goto fail; 1261 } 1262 mp = buf->m_pack; 1263 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1264 /* Get the memory mapping */ 1265 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1266 buf->pmap, mp, pseg, 1267 &nsegs, BUS_DMA_NOWAIT); 1268 if (error != 0) 1269 goto fail; 1270 bus_dmamap_sync(rxr->ptag, 1271 buf->pmap, BUS_DMASYNC_PREREAD); 1272 /* Update descriptor */ 1273 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1274 rxr->base[j].read.hdr_addr = 0; 1275 } 1276 1277 1278 /* Setup our descriptor indices */ 1279 rxr->next_check = 0; 1280 rxr->next_refresh = 0; 1281 rxr->lro_enabled = FALSE; 1282 rxr->split = 0; 1283 rxr->bytes = 0; 1284 rxr->discard = FALSE; 1285 1286 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1287 ixl_flush(vsi->hw); 1288 1289 #if defined(INET6) || defined(INET) 1290 /* 1291 ** Now set up the LRO interface: 1292 */ 1293 if (ifp->if_capenable & IFCAP_LRO) { 1294 int err = tcp_lro_init(lro); 1295 if (err) { 1296 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1297 goto fail; 1298 } 1299 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1300 rxr->lro_enabled = TRUE; 1301 lro->ifp = vsi->ifp; 1302 } 1303 #endif 1304 1305 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1306 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1307 1308 fail: 1309 IXL_RX_UNLOCK(rxr); 1310 return (error); 1311 } 1312 1313 1314 /********************************************************************* 1315 * 1316 * Free station receive ring data structures 1317 * 1318 **********************************************************************/ 1319 void 1320 ixl_free_que_rx(struct ixl_queue *que) 1321 { 1322 struct rx_ring *rxr = &que->rxr; 1323 struct ixl_rx_buf *buf; 1324 1325 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1326 1327 /* Cleanup any existing buffers */ 1328 if (rxr->buffers != NULL) { 1329 for (int i = 0; i < que->num_desc; i++) { 1330 buf = &rxr->buffers[i]; 1331 if (buf->m_head != NULL) { 1332 bus_dmamap_sync(rxr->htag, buf->hmap, 1333 BUS_DMASYNC_POSTREAD); 1334 bus_dmamap_unload(rxr->htag, buf->hmap); 1335 buf->m_head->m_flags |= M_PKTHDR; 1336 m_freem(buf->m_head); 1337 } 1338 if (buf->m_pack != NULL) { 1339 bus_dmamap_sync(rxr->ptag, buf->pmap, 1340 BUS_DMASYNC_POSTREAD); 1341 bus_dmamap_unload(rxr->ptag, buf->pmap); 1342 buf->m_pack->m_flags |= M_PKTHDR; 1343 m_freem(buf->m_pack); 1344 } 1345 buf->m_head = NULL; 1346 buf->m_pack = NULL; 1347 if (buf->hmap != NULL) { 1348 bus_dmamap_destroy(rxr->htag, buf->hmap); 1349 buf->hmap = NULL; 1350 } 1351 if (buf->pmap != NULL) { 1352 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1353 buf->pmap = NULL; 1354 } 1355 } 1356 if (rxr->buffers != NULL) { 1357 free(rxr->buffers, M_DEVBUF); 1358 rxr->buffers = NULL; 1359 } 1360 } 1361 1362 if (rxr->htag != NULL) { 1363 bus_dma_tag_destroy(rxr->htag); 1364 rxr->htag = NULL; 1365 } 1366 if (rxr->ptag != NULL) { 1367 bus_dma_tag_destroy(rxr->ptag); 1368 rxr->ptag = NULL; 1369 } 1370 1371 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1372 return; 1373 } 1374 1375 static __inline void 1376 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1377 { 1378 1379 #if defined(INET6) || defined(INET) 1380 /* 1381 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1382 * should be computed by hardware. Also it should not have VLAN tag in 1383 * ethernet header. 1384 */ 1385 if (rxr->lro_enabled && 1386 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1387 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1388 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1389 /* 1390 * Send to the stack if: 1391 ** - LRO not enabled, or 1392 ** - no LRO resources, or 1393 ** - lro enqueue fails 1394 */ 1395 if (rxr->lro.lro_cnt != 0) 1396 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1397 return; 1398 } 1399 #endif 1400 IXL_RX_UNLOCK(rxr); 1401 (*ifp->if_input)(ifp, m); 1402 IXL_RX_LOCK(rxr); 1403 } 1404 1405 1406 static __inline void 1407 ixl_rx_discard(struct rx_ring *rxr, int i) 1408 { 1409 struct ixl_rx_buf *rbuf; 1410 1411 rbuf = &rxr->buffers[i]; 1412 1413 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1414 rbuf->fmp->m_flags |= M_PKTHDR; 1415 m_freem(rbuf->fmp); 1416 rbuf->fmp = NULL; 1417 } 1418 1419 /* 1420 ** With advanced descriptors the writeback 1421 ** clobbers the buffer addrs, so its easier 1422 ** to just free the existing mbufs and take 1423 ** the normal refresh path to get new buffers 1424 ** and mapping. 1425 */ 1426 if (rbuf->m_head) { 1427 m_free(rbuf->m_head); 1428 rbuf->m_head = NULL; 1429 } 1430 1431 if (rbuf->m_pack) { 1432 m_free(rbuf->m_pack); 1433 rbuf->m_pack = NULL; 1434 } 1435 1436 return; 1437 } 1438 1439 #ifdef RSS 1440 /* 1441 ** i40e_ptype_to_hash: parse the packet type 1442 ** to determine the appropriate hash. 1443 */ 1444 static inline int 1445 ixl_ptype_to_hash(u8 ptype) 1446 { 1447 struct i40e_rx_ptype_decoded decoded; 1448 u8 ex = 0; 1449 1450 decoded = decode_rx_desc_ptype(ptype); 1451 ex = decoded.outer_frag; 1452 1453 if (!decoded.known) 1454 return M_HASHTYPE_OPAQUE; 1455 1456 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1457 return M_HASHTYPE_OPAQUE; 1458 1459 /* Note: anything that gets to this point is IP */ 1460 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1461 switch (decoded.inner_prot) { 1462 case I40E_RX_PTYPE_INNER_PROT_TCP: 1463 if (ex) 1464 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1465 else 1466 return M_HASHTYPE_RSS_TCP_IPV6; 1467 case I40E_RX_PTYPE_INNER_PROT_UDP: 1468 if (ex) 1469 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1470 else 1471 return M_HASHTYPE_RSS_UDP_IPV6; 1472 default: 1473 if (ex) 1474 return M_HASHTYPE_RSS_IPV6_EX; 1475 else 1476 return M_HASHTYPE_RSS_IPV6; 1477 } 1478 } 1479 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1480 switch (decoded.inner_prot) { 1481 case I40E_RX_PTYPE_INNER_PROT_TCP: 1482 return M_HASHTYPE_RSS_TCP_IPV4; 1483 case I40E_RX_PTYPE_INNER_PROT_UDP: 1484 if (ex) 1485 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1486 else 1487 return M_HASHTYPE_RSS_UDP_IPV4; 1488 default: 1489 return M_HASHTYPE_RSS_IPV4; 1490 } 1491 } 1492 /* We should never get here!! */ 1493 return M_HASHTYPE_OPAQUE; 1494 } 1495 #endif /* RSS */ 1496 1497 /********************************************************************* 1498 * 1499 * This routine executes in interrupt context. It replenishes 1500 * the mbufs in the descriptor and sends data which has been 1501 * dma'ed into host memory to upper layer. 1502 * 1503 * We loop at most count times if count is > 0, or until done if 1504 * count < 0. 1505 * 1506 * Return TRUE for more work, FALSE for all clean. 1507 *********************************************************************/ 1508 bool 1509 ixl_rxeof(struct ixl_queue *que, int count) 1510 { 1511 struct ixl_vsi *vsi = que->vsi; 1512 struct rx_ring *rxr = &que->rxr; 1513 struct ifnet *ifp = vsi->ifp; 1514 #if defined(INET6) || defined(INET) 1515 struct lro_ctrl *lro = &rxr->lro; 1516 struct lro_entry *queued; 1517 #endif 1518 int i, nextp, processed = 0; 1519 union i40e_rx_desc *cur; 1520 struct ixl_rx_buf *rbuf, *nbuf; 1521 1522 1523 IXL_RX_LOCK(rxr); 1524 1525 #ifdef DEV_NETMAP 1526 if (netmap_rx_irq(ifp, que->me, &count)) { 1527 IXL_RX_UNLOCK(rxr); 1528 return (FALSE); 1529 } 1530 #endif /* DEV_NETMAP */ 1531 1532 for (i = rxr->next_check; count != 0;) { 1533 struct mbuf *sendmp, *mh, *mp; 1534 u32 rsc, status, error; 1535 u16 hlen, plen, vtag; 1536 u64 qword; 1537 u8 ptype; 1538 bool eop; 1539 1540 /* Sync the ring. */ 1541 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1542 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1543 1544 cur = &rxr->base[i]; 1545 qword = le64toh(cur->wb.qword1.status_error_len); 1546 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1547 >> I40E_RXD_QW1_STATUS_SHIFT; 1548 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1549 >> I40E_RXD_QW1_ERROR_SHIFT; 1550 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1551 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1552 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1553 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1554 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1555 >> I40E_RXD_QW1_PTYPE_SHIFT; 1556 1557 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1558 ++rxr->not_done; 1559 break; 1560 } 1561 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1562 break; 1563 1564 count--; 1565 sendmp = NULL; 1566 nbuf = NULL; 1567 rsc = 0; 1568 cur->wb.qword1.status_error_len = 0; 1569 rbuf = &rxr->buffers[i]; 1570 mh = rbuf->m_head; 1571 mp = rbuf->m_pack; 1572 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1573 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1574 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1575 else 1576 vtag = 0; 1577 1578 /* 1579 ** Make sure bad packets are discarded, 1580 ** note that only EOP descriptor has valid 1581 ** error results. 1582 */ 1583 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1584 rxr->discarded++; 1585 ixl_rx_discard(rxr, i); 1586 goto next_desc; 1587 } 1588 1589 /* Prefetch the next buffer */ 1590 if (!eop) { 1591 nextp = i + 1; 1592 if (nextp == que->num_desc) 1593 nextp = 0; 1594 nbuf = &rxr->buffers[nextp]; 1595 prefetch(nbuf); 1596 } 1597 1598 /* 1599 ** The header mbuf is ONLY used when header 1600 ** split is enabled, otherwise we get normal 1601 ** behavior, ie, both header and payload 1602 ** are DMA'd into the payload buffer. 1603 ** 1604 ** Rather than using the fmp/lmp global pointers 1605 ** we now keep the head of a packet chain in the 1606 ** buffer struct and pass this along from one 1607 ** descriptor to the next, until we get EOP. 1608 */ 1609 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1610 if (hlen > IXL_RX_HDR) 1611 hlen = IXL_RX_HDR; 1612 mh->m_len = hlen; 1613 mh->m_flags |= M_PKTHDR; 1614 mh->m_next = NULL; 1615 mh->m_pkthdr.len = mh->m_len; 1616 /* Null buf pointer so it is refreshed */ 1617 rbuf->m_head = NULL; 1618 /* 1619 ** Check the payload length, this 1620 ** could be zero if its a small 1621 ** packet. 1622 */ 1623 if (plen > 0) { 1624 mp->m_len = plen; 1625 mp->m_next = NULL; 1626 mp->m_flags &= ~M_PKTHDR; 1627 mh->m_next = mp; 1628 mh->m_pkthdr.len += mp->m_len; 1629 /* Null buf pointer so it is refreshed */ 1630 rbuf->m_pack = NULL; 1631 rxr->split++; 1632 } 1633 /* 1634 ** Now create the forward 1635 ** chain so when complete 1636 ** we wont have to. 1637 */ 1638 if (eop == 0) { 1639 /* stash the chain head */ 1640 nbuf->fmp = mh; 1641 /* Make forward chain */ 1642 if (plen) 1643 mp->m_next = nbuf->m_pack; 1644 else 1645 mh->m_next = nbuf->m_pack; 1646 } else { 1647 /* Singlet, prepare to send */ 1648 sendmp = mh; 1649 if (vtag) { 1650 sendmp->m_pkthdr.ether_vtag = vtag; 1651 sendmp->m_flags |= M_VLANTAG; 1652 } 1653 } 1654 } else { 1655 /* 1656 ** Either no header split, or a 1657 ** secondary piece of a fragmented 1658 ** split packet. 1659 */ 1660 mp->m_len = plen; 1661 /* 1662 ** See if there is a stored head 1663 ** that determines what we are 1664 */ 1665 sendmp = rbuf->fmp; 1666 rbuf->m_pack = rbuf->fmp = NULL; 1667 1668 if (sendmp != NULL) /* secondary frag */ 1669 sendmp->m_pkthdr.len += mp->m_len; 1670 else { 1671 /* first desc of a non-ps chain */ 1672 sendmp = mp; 1673 sendmp->m_flags |= M_PKTHDR; 1674 sendmp->m_pkthdr.len = mp->m_len; 1675 if (vtag) { 1676 sendmp->m_pkthdr.ether_vtag = vtag; 1677 sendmp->m_flags |= M_VLANTAG; 1678 } 1679 } 1680 /* Pass the head pointer on */ 1681 if (eop == 0) { 1682 nbuf->fmp = sendmp; 1683 sendmp = NULL; 1684 mp->m_next = nbuf->m_pack; 1685 } 1686 } 1687 ++processed; 1688 /* Sending this frame? */ 1689 if (eop) { 1690 sendmp->m_pkthdr.rcvif = ifp; 1691 /* gather stats */ 1692 rxr->rx_packets++; 1693 rxr->rx_bytes += sendmp->m_pkthdr.len; 1694 /* capture data for dynamic ITR adjustment */ 1695 rxr->packets++; 1696 rxr->bytes += sendmp->m_pkthdr.len; 1697 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1698 ixl_rx_checksum(sendmp, status, error, ptype); 1699 #ifdef RSS 1700 sendmp->m_pkthdr.flowid = 1701 le32toh(cur->wb.qword0.hi_dword.rss); 1702 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1703 #else 1704 sendmp->m_pkthdr.flowid = que->msix; 1705 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1706 #endif 1707 } 1708 next_desc: 1709 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1710 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1711 1712 /* Advance our pointers to the next descriptor. */ 1713 if (++i == que->num_desc) 1714 i = 0; 1715 1716 /* Now send to the stack or do LRO */ 1717 if (sendmp != NULL) { 1718 rxr->next_check = i; 1719 ixl_rx_input(rxr, ifp, sendmp, ptype); 1720 i = rxr->next_check; 1721 } 1722 1723 /* Every 8 descriptors we go to refresh mbufs */ 1724 if (processed == 8) { 1725 ixl_refresh_mbufs(que, i); 1726 processed = 0; 1727 } 1728 } 1729 1730 /* Refresh any remaining buf structs */ 1731 if (ixl_rx_unrefreshed(que)) 1732 ixl_refresh_mbufs(que, i); 1733 1734 rxr->next_check = i; 1735 1736 #if defined(INET6) || defined(INET) 1737 /* 1738 * Flush any outstanding LRO work 1739 */ 1740 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1741 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1742 tcp_lro_flush(lro, queued); 1743 } 1744 #endif 1745 1746 IXL_RX_UNLOCK(rxr); 1747 return (FALSE); 1748 } 1749 1750 1751 /********************************************************************* 1752 * 1753 * Verify that the hardware indicated that the checksum is valid. 1754 * Inform the stack about the status of checksum so that stack 1755 * doesn't spend time verifying the checksum. 1756 * 1757 *********************************************************************/ 1758 static void 1759 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1760 { 1761 struct i40e_rx_ptype_decoded decoded; 1762 1763 decoded = decode_rx_desc_ptype(ptype); 1764 1765 /* Errors? */ 1766 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1767 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1768 mp->m_pkthdr.csum_flags = 0; 1769 return; 1770 } 1771 1772 /* IPv6 with extension headers likely have bad csum */ 1773 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1774 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1775 if (status & 1776 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1777 mp->m_pkthdr.csum_flags = 0; 1778 return; 1779 } 1780 1781 1782 /* IP Checksum Good */ 1783 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1784 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1785 1786 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1787 mp->m_pkthdr.csum_flags |= 1788 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1789 mp->m_pkthdr.csum_data |= htons(0xffff); 1790 } 1791 return; 1792 } 1793 1794 #if __FreeBSD_version >= 1100000 1795 uint64_t 1796 ixl_get_counter(if_t ifp, ift_counter cnt) 1797 { 1798 struct ixl_vsi *vsi; 1799 1800 vsi = if_getsoftc(ifp); 1801 1802 switch (cnt) { 1803 case IFCOUNTER_IPACKETS: 1804 return (vsi->ipackets); 1805 case IFCOUNTER_IERRORS: 1806 return (vsi->ierrors); 1807 case IFCOUNTER_OPACKETS: 1808 return (vsi->opackets); 1809 case IFCOUNTER_OERRORS: 1810 return (vsi->oerrors); 1811 case IFCOUNTER_COLLISIONS: 1812 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1813 return (0); 1814 case IFCOUNTER_IBYTES: 1815 return (vsi->ibytes); 1816 case IFCOUNTER_OBYTES: 1817 return (vsi->obytes); 1818 case IFCOUNTER_IMCASTS: 1819 return (vsi->imcasts); 1820 case IFCOUNTER_OMCASTS: 1821 return (vsi->omcasts); 1822 case IFCOUNTER_IQDROPS: 1823 return (vsi->iqdrops); 1824 case IFCOUNTER_OQDROPS: 1825 return (vsi->oqdrops); 1826 case IFCOUNTER_NOPROTO: 1827 return (vsi->noproto); 1828 default: 1829 return (if_get_counter_default(ifp, cnt)); 1830 } 1831 } 1832 #endif 1833 1834