1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the BASE and the VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static __inline void ixl_rx_discard(struct rx_ring *, int); 62 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 #ifdef DEV_NETMAP 66 #include <dev/netmap/if_ixl_netmap.h> 67 #endif /* DEV_NETMAP */ 68 69 /* 70 ** Multiqueue Transmit driver 71 */ 72 int 73 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 74 { 75 struct ixl_vsi *vsi = ifp->if_softc; 76 struct ixl_queue *que; 77 struct tx_ring *txr; 78 int err, i; 79 #ifdef RSS 80 u32 bucket_id; 81 #endif 82 83 /* 84 ** Which queue to use: 85 ** 86 ** When doing RSS, map it to the same outbound 87 ** queue as the incoming flow would be mapped to. 88 ** If everything is setup correctly, it should be 89 ** the same bucket that the current CPU we're on is. 90 */ 91 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 92 #ifdef RSS 93 if (rss_hash2bucket(m->m_pkthdr.flowid, 94 M_HASHTYPE_GET(m), &bucket_id) == 0) { 95 i = bucket_id % vsi->num_queues; 96 } else 97 #endif 98 i = m->m_pkthdr.flowid % vsi->num_queues; 99 } else 100 i = curcpu % vsi->num_queues; 101 /* 102 ** This may not be perfect, but until something 103 ** better comes along it will keep from scheduling 104 ** on stalled queues. 105 */ 106 if (((1 << i) & vsi->active_queues) == 0) 107 i = ffsl(vsi->active_queues); 108 109 que = &vsi->queues[i]; 110 txr = &que->txr; 111 112 err = drbr_enqueue(ifp, txr->br, m); 113 if (err) 114 return (err); 115 if (IXL_TX_TRYLOCK(txr)) { 116 ixl_mq_start_locked(ifp, txr); 117 IXL_TX_UNLOCK(txr); 118 } else 119 taskqueue_enqueue(que->tq, &que->tx_task); 120 121 return (0); 122 } 123 124 int 125 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 126 { 127 struct ixl_queue *que = txr->que; 128 struct ixl_vsi *vsi = que->vsi; 129 struct mbuf *next; 130 int err = 0; 131 132 133 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 134 vsi->link_active == 0) 135 return (ENETDOWN); 136 137 /* Process the transmit queue */ 138 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 139 if ((err = ixl_xmit(que, &next)) != 0) { 140 if (next == NULL) 141 drbr_advance(ifp, txr->br); 142 else 143 drbr_putback(ifp, txr->br, next); 144 break; 145 } 146 drbr_advance(ifp, txr->br); 147 /* Send a copy of the frame to the BPF listener */ 148 ETHER_BPF_MTAP(ifp, next); 149 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 150 break; 151 } 152 153 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 154 ixl_txeof(que); 155 156 return (err); 157 } 158 159 /* 160 * Called from a taskqueue to drain queued transmit packets. 161 */ 162 void 163 ixl_deferred_mq_start(void *arg, int pending) 164 { 165 struct ixl_queue *que = arg; 166 struct tx_ring *txr = &que->txr; 167 struct ixl_vsi *vsi = que->vsi; 168 struct ifnet *ifp = vsi->ifp; 169 170 IXL_TX_LOCK(txr); 171 if (!drbr_empty(ifp, txr->br)) 172 ixl_mq_start_locked(ifp, txr); 173 IXL_TX_UNLOCK(txr); 174 } 175 176 /* 177 ** Flush all queue ring buffers 178 */ 179 void 180 ixl_qflush(struct ifnet *ifp) 181 { 182 struct ixl_vsi *vsi = ifp->if_softc; 183 184 for (int i = 0; i < vsi->num_queues; i++) { 185 struct ixl_queue *que = &vsi->queues[i]; 186 struct tx_ring *txr = &que->txr; 187 struct mbuf *m; 188 IXL_TX_LOCK(txr); 189 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 190 m_freem(m); 191 IXL_TX_UNLOCK(txr); 192 } 193 if_qflush(ifp); 194 } 195 196 /* 197 ** Find mbuf chains passed to the driver 198 ** that are 'sparse', using more than 8 199 ** mbufs to deliver an mss-size chunk of data 200 */ 201 static inline bool 202 ixl_tso_detect_sparse(struct mbuf *mp) 203 { 204 struct mbuf *m; 205 int num = 0, mss; 206 bool ret = FALSE; 207 208 mss = mp->m_pkthdr.tso_segsz; 209 for (m = mp->m_next; m != NULL; m = m->m_next) { 210 num++; 211 mss -= m->m_len; 212 if (mss < 1) 213 break; 214 if (m->m_next == NULL) 215 break; 216 } 217 if (num > IXL_SPARSE_CHAIN) 218 ret = TRUE; 219 220 return (ret); 221 } 222 223 224 /********************************************************************* 225 * 226 * This routine maps the mbufs to tx descriptors, allowing the 227 * TX engine to transmit the packets. 228 * - return 0 on success, positive on failure 229 * 230 **********************************************************************/ 231 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 232 233 static int 234 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 235 { 236 struct ixl_vsi *vsi = que->vsi; 237 struct i40e_hw *hw = vsi->hw; 238 struct tx_ring *txr = &que->txr; 239 struct ixl_tx_buf *buf; 240 struct i40e_tx_desc *txd = NULL; 241 struct mbuf *m_head, *m; 242 int i, j, error, nsegs, maxsegs; 243 int first, last = 0; 244 u16 vtag = 0; 245 u32 cmd, off; 246 bus_dmamap_t map; 247 bus_dma_tag_t tag; 248 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 249 250 251 cmd = off = 0; 252 m_head = *m_headp; 253 254 /* 255 * Important to capture the first descriptor 256 * used because it will contain the index of 257 * the one we tell the hardware to report back 258 */ 259 first = txr->next_avail; 260 buf = &txr->buffers[first]; 261 map = buf->map; 262 tag = txr->tx_tag; 263 maxsegs = IXL_MAX_TX_SEGS; 264 265 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 266 /* Use larger mapping for TSO */ 267 tag = txr->tso_tag; 268 maxsegs = IXL_MAX_TSO_SEGS; 269 if (ixl_tso_detect_sparse(m_head)) { 270 m = m_defrag(m_head, M_NOWAIT); 271 if (m == NULL) { 272 m_freem(*m_headp); 273 *m_headp = NULL; 274 return (ENOBUFS); 275 } 276 *m_headp = m; 277 } 278 } 279 280 /* 281 * Map the packet for DMA. 282 */ 283 error = bus_dmamap_load_mbuf_sg(tag, map, 284 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 285 286 if (error == EFBIG) { 287 struct mbuf *m; 288 289 m = m_collapse(*m_headp, M_NOWAIT, maxsegs); 290 if (m == NULL) { 291 que->mbuf_defrag_failed++; 292 m_freem(*m_headp); 293 *m_headp = NULL; 294 return (ENOBUFS); 295 } 296 *m_headp = m; 297 298 /* Try it again */ 299 error = bus_dmamap_load_mbuf_sg(tag, map, 300 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 301 302 if (error == ENOMEM) { 303 que->tx_dma_setup++; 304 return (error); 305 } else if (error != 0) { 306 que->tx_dma_setup++; 307 m_freem(*m_headp); 308 *m_headp = NULL; 309 return (error); 310 } 311 } else if (error == ENOMEM) { 312 que->tx_dma_setup++; 313 return (error); 314 } else if (error != 0) { 315 que->tx_dma_setup++; 316 m_freem(*m_headp); 317 *m_headp = NULL; 318 return (error); 319 } 320 321 /* Make certain there are enough descriptors */ 322 if (nsegs > txr->avail - 2) { 323 txr->no_desc++; 324 error = ENOBUFS; 325 goto xmit_fail; 326 } 327 m_head = *m_headp; 328 329 /* Set up the TSO/CSUM offload */ 330 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 331 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 332 if (error) 333 goto xmit_fail; 334 } 335 336 cmd |= I40E_TX_DESC_CMD_ICRC; 337 /* Grab the VLAN tag */ 338 if (m_head->m_flags & M_VLANTAG) { 339 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 340 vtag = htole16(m_head->m_pkthdr.ether_vtag); 341 } 342 343 i = txr->next_avail; 344 for (j = 0; j < nsegs; j++) { 345 bus_size_t seglen; 346 347 buf = &txr->buffers[i]; 348 buf->tag = tag; /* Keep track of the type tag */ 349 txd = &txr->base[i]; 350 seglen = segs[j].ds_len; 351 352 txd->buffer_addr = htole64(segs[j].ds_addr); 353 txd->cmd_type_offset_bsz = 354 htole64(I40E_TX_DESC_DTYPE_DATA 355 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 356 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 357 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 358 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 359 360 last = i; /* descriptor that will get completion IRQ */ 361 362 if (++i == que->num_desc) 363 i = 0; 364 365 buf->m_head = NULL; 366 buf->eop_index = -1; 367 } 368 /* Set the last descriptor for report */ 369 txd->cmd_type_offset_bsz |= 370 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 371 txr->avail -= nsegs; 372 txr->next_avail = i; 373 374 buf->m_head = m_head; 375 /* Swap the dma map between the first and last descriptor */ 376 txr->buffers[first].map = buf->map; 377 buf->map = map; 378 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 379 380 /* Set the index of the descriptor that will be marked done */ 381 buf = &txr->buffers[first]; 382 buf->eop_index = last; 383 384 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 385 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 386 /* 387 * Advance the Transmit Descriptor Tail (Tdt), this tells the 388 * hardware that this frame is available to transmit. 389 */ 390 ++txr->total_packets; 391 wr32(hw, txr->tail, i); 392 393 ixl_flush(hw); 394 /* Mark outstanding work */ 395 if (que->busy == 0) 396 que->busy = 1; 397 return (0); 398 399 xmit_fail: 400 bus_dmamap_unload(tag, buf->map); 401 return (error); 402 } 403 404 405 /********************************************************************* 406 * 407 * Allocate memory for tx_buffer structures. The tx_buffer stores all 408 * the information needed to transmit a packet on the wire. This is 409 * called only once at attach, setup is done every reset. 410 * 411 **********************************************************************/ 412 int 413 ixl_allocate_tx_data(struct ixl_queue *que) 414 { 415 struct tx_ring *txr = &que->txr; 416 struct ixl_vsi *vsi = que->vsi; 417 device_t dev = vsi->dev; 418 struct ixl_tx_buf *buf; 419 int error = 0; 420 421 /* 422 * Setup DMA descriptor areas. 423 */ 424 if ((error = bus_dma_tag_create(NULL, /* parent */ 425 1, 0, /* alignment, bounds */ 426 BUS_SPACE_MAXADDR, /* lowaddr */ 427 BUS_SPACE_MAXADDR, /* highaddr */ 428 NULL, NULL, /* filter, filterarg */ 429 IXL_TSO_SIZE, /* maxsize */ 430 IXL_MAX_TX_SEGS, /* nsegments */ 431 PAGE_SIZE, /* maxsegsize */ 432 0, /* flags */ 433 NULL, /* lockfunc */ 434 NULL, /* lockfuncarg */ 435 &txr->tx_tag))) { 436 device_printf(dev,"Unable to allocate TX DMA tag\n"); 437 goto fail; 438 } 439 440 /* Make a special tag for TSO */ 441 if ((error = bus_dma_tag_create(NULL, /* parent */ 442 1, 0, /* alignment, bounds */ 443 BUS_SPACE_MAXADDR, /* lowaddr */ 444 BUS_SPACE_MAXADDR, /* highaddr */ 445 NULL, NULL, /* filter, filterarg */ 446 IXL_TSO_SIZE, /* maxsize */ 447 IXL_MAX_TSO_SEGS, /* nsegments */ 448 PAGE_SIZE, /* maxsegsize */ 449 0, /* flags */ 450 NULL, /* lockfunc */ 451 NULL, /* lockfuncarg */ 452 &txr->tso_tag))) { 453 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 454 goto fail; 455 } 456 457 if (!(txr->buffers = 458 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 459 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 460 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 461 error = ENOMEM; 462 goto fail; 463 } 464 465 /* Create the descriptor buffer default dma maps */ 466 buf = txr->buffers; 467 for (int i = 0; i < que->num_desc; i++, buf++) { 468 buf->tag = txr->tx_tag; 469 error = bus_dmamap_create(buf->tag, 0, &buf->map); 470 if (error != 0) { 471 device_printf(dev, "Unable to create TX DMA map\n"); 472 goto fail; 473 } 474 } 475 fail: 476 return (error); 477 } 478 479 480 /********************************************************************* 481 * 482 * (Re)Initialize a queue transmit ring. 483 * - called by init, it clears the descriptor ring, 484 * and frees any stale mbufs 485 * 486 **********************************************************************/ 487 void 488 ixl_init_tx_ring(struct ixl_queue *que) 489 { 490 #ifdef DEV_NETMAP 491 struct netmap_adapter *na = NA(que->vsi->ifp); 492 struct netmap_slot *slot; 493 #endif /* DEV_NETMAP */ 494 struct tx_ring *txr = &que->txr; 495 struct ixl_tx_buf *buf; 496 497 /* Clear the old ring contents */ 498 IXL_TX_LOCK(txr); 499 500 #ifdef DEV_NETMAP 501 /* 502 * (under lock): if in netmap mode, do some consistency 503 * checks and set slot to entry 0 of the netmap ring. 504 */ 505 slot = netmap_reset(na, NR_TX, que->me, 0); 506 #endif /* DEV_NETMAP */ 507 508 bzero((void *)txr->base, 509 (sizeof(struct i40e_tx_desc)) * que->num_desc); 510 511 /* Reset indices */ 512 txr->next_avail = 0; 513 txr->next_to_clean = 0; 514 515 #ifdef IXL_FDIR 516 /* Initialize flow director */ 517 txr->atr_rate = ixl_atr_rate; 518 txr->atr_count = 0; 519 #endif 520 521 /* Free any existing tx mbufs. */ 522 buf = txr->buffers; 523 for (int i = 0; i < que->num_desc; i++, buf++) { 524 if (buf->m_head != NULL) { 525 bus_dmamap_sync(buf->tag, buf->map, 526 BUS_DMASYNC_POSTWRITE); 527 bus_dmamap_unload(buf->tag, buf->map); 528 m_freem(buf->m_head); 529 buf->m_head = NULL; 530 } 531 #ifdef DEV_NETMAP 532 /* 533 * In netmap mode, set the map for the packet buffer. 534 * NOTE: Some drivers (not this one) also need to set 535 * the physical buffer address in the NIC ring. 536 * netmap_idx_n2k() maps a nic index, i, into the corresponding 537 * netmap slot index, si 538 */ 539 if (slot) { 540 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 541 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 542 } 543 #endif /* DEV_NETMAP */ 544 /* Clear the EOP index */ 545 buf->eop_index = -1; 546 } 547 548 /* Set number of descriptors available */ 549 txr->avail = que->num_desc; 550 551 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 552 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 553 IXL_TX_UNLOCK(txr); 554 } 555 556 557 /********************************************************************* 558 * 559 * Free transmit ring related data structures. 560 * 561 **********************************************************************/ 562 void 563 ixl_free_que_tx(struct ixl_queue *que) 564 { 565 struct tx_ring *txr = &que->txr; 566 struct ixl_tx_buf *buf; 567 568 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 569 570 for (int i = 0; i < que->num_desc; i++) { 571 buf = &txr->buffers[i]; 572 if (buf->m_head != NULL) { 573 bus_dmamap_sync(buf->tag, buf->map, 574 BUS_DMASYNC_POSTWRITE); 575 bus_dmamap_unload(buf->tag, 576 buf->map); 577 m_freem(buf->m_head); 578 buf->m_head = NULL; 579 if (buf->map != NULL) { 580 bus_dmamap_destroy(buf->tag, 581 buf->map); 582 buf->map = NULL; 583 } 584 } else if (buf->map != NULL) { 585 bus_dmamap_unload(buf->tag, 586 buf->map); 587 bus_dmamap_destroy(buf->tag, 588 buf->map); 589 buf->map = NULL; 590 } 591 } 592 if (txr->br != NULL) 593 buf_ring_free(txr->br, M_DEVBUF); 594 if (txr->buffers != NULL) { 595 free(txr->buffers, M_DEVBUF); 596 txr->buffers = NULL; 597 } 598 if (txr->tx_tag != NULL) { 599 bus_dma_tag_destroy(txr->tx_tag); 600 txr->tx_tag = NULL; 601 } 602 if (txr->tso_tag != NULL) { 603 bus_dma_tag_destroy(txr->tso_tag); 604 txr->tso_tag = NULL; 605 } 606 607 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 608 return; 609 } 610 611 /********************************************************************* 612 * 613 * Setup descriptor for hw offloads 614 * 615 **********************************************************************/ 616 617 static int 618 ixl_tx_setup_offload(struct ixl_queue *que, 619 struct mbuf *mp, u32 *cmd, u32 *off) 620 { 621 struct ether_vlan_header *eh; 622 #ifdef INET 623 struct ip *ip = NULL; 624 #endif 625 struct tcphdr *th = NULL; 626 #ifdef INET6 627 struct ip6_hdr *ip6; 628 #endif 629 int elen, ip_hlen = 0, tcp_hlen; 630 u16 etype; 631 u8 ipproto = 0; 632 bool tso = FALSE; 633 634 635 /* Set up the TSO context descriptor if required */ 636 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 637 tso = ixl_tso_setup(que, mp); 638 if (tso) 639 ++que->tso; 640 else 641 return (ENXIO); 642 } 643 644 /* 645 * Determine where frame payload starts. 646 * Jump over vlan headers if already present, 647 * helpful for QinQ too. 648 */ 649 eh = mtod(mp, struct ether_vlan_header *); 650 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 651 etype = ntohs(eh->evl_proto); 652 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 653 } else { 654 etype = ntohs(eh->evl_encap_proto); 655 elen = ETHER_HDR_LEN; 656 } 657 658 switch (etype) { 659 #ifdef INET 660 case ETHERTYPE_IP: 661 ip = (struct ip *)(mp->m_data + elen); 662 ip_hlen = ip->ip_hl << 2; 663 ipproto = ip->ip_p; 664 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 665 /* The IP checksum must be recalculated with TSO */ 666 if (tso) 667 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 668 else 669 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 670 break; 671 #endif 672 #ifdef INET6 673 case ETHERTYPE_IPV6: 674 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 675 ip_hlen = sizeof(struct ip6_hdr); 676 ipproto = ip6->ip6_nxt; 677 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 679 break; 680 #endif 681 default: 682 break; 683 } 684 685 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 686 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 687 688 switch (ipproto) { 689 case IPPROTO_TCP: 690 tcp_hlen = th->th_off << 2; 691 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 692 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 693 *off |= (tcp_hlen >> 2) << 694 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 695 } 696 #ifdef IXL_FDIR 697 ixl_atr(que, th, etype); 698 #endif 699 break; 700 case IPPROTO_UDP: 701 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 702 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 703 *off |= (sizeof(struct udphdr) >> 2) << 704 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 705 } 706 break; 707 708 case IPPROTO_SCTP: 709 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 710 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 711 *off |= (sizeof(struct sctphdr) >> 2) << 712 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 713 } 714 /* Fall Thru */ 715 default: 716 break; 717 } 718 719 return (0); 720 } 721 722 723 /********************************************************************** 724 * 725 * Setup context for hardware segmentation offload (TSO) 726 * 727 **********************************************************************/ 728 static bool 729 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 730 { 731 struct tx_ring *txr = &que->txr; 732 struct i40e_tx_context_desc *TXD; 733 struct ixl_tx_buf *buf; 734 u32 cmd, mss, type, tsolen; 735 u16 etype; 736 int idx, elen, ip_hlen, tcp_hlen; 737 struct ether_vlan_header *eh; 738 #ifdef INET 739 struct ip *ip; 740 #endif 741 #ifdef INET6 742 struct ip6_hdr *ip6; 743 #endif 744 #if defined(INET6) || defined(INET) 745 struct tcphdr *th; 746 #endif 747 u64 type_cmd_tso_mss; 748 749 /* 750 * Determine where frame payload starts. 751 * Jump over vlan headers if already present 752 */ 753 eh = mtod(mp, struct ether_vlan_header *); 754 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 755 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 756 etype = eh->evl_proto; 757 } else { 758 elen = ETHER_HDR_LEN; 759 etype = eh->evl_encap_proto; 760 } 761 762 switch (ntohs(etype)) { 763 #ifdef INET6 764 case ETHERTYPE_IPV6: 765 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 766 if (ip6->ip6_nxt != IPPROTO_TCP) 767 return (ENXIO); 768 ip_hlen = sizeof(struct ip6_hdr); 769 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 770 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 771 tcp_hlen = th->th_off << 2; 772 break; 773 #endif 774 #ifdef INET 775 case ETHERTYPE_IP: 776 ip = (struct ip *)(mp->m_data + elen); 777 if (ip->ip_p != IPPROTO_TCP) 778 return (ENXIO); 779 ip->ip_sum = 0; 780 ip_hlen = ip->ip_hl << 2; 781 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 782 th->th_sum = in_pseudo(ip->ip_src.s_addr, 783 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 784 tcp_hlen = th->th_off << 2; 785 break; 786 #endif 787 default: 788 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 789 __func__, ntohs(etype)); 790 return FALSE; 791 } 792 793 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 794 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 795 return FALSE; 796 797 idx = txr->next_avail; 798 buf = &txr->buffers[idx]; 799 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 800 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 801 802 type = I40E_TX_DESC_DTYPE_CONTEXT; 803 cmd = I40E_TX_CTX_DESC_TSO; 804 mss = mp->m_pkthdr.tso_segsz; 805 806 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 807 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 808 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 809 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 810 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 811 812 TXD->tunneling_params = htole32(0); 813 buf->m_head = NULL; 814 buf->eop_index = -1; 815 816 if (++idx == que->num_desc) 817 idx = 0; 818 819 txr->avail--; 820 txr->next_avail = idx; 821 822 return TRUE; 823 } 824 825 /* 826 ** ixl_get_tx_head - Retrieve the value from the 827 ** location the HW records its HEAD index 828 */ 829 static inline u32 830 ixl_get_tx_head(struct ixl_queue *que) 831 { 832 struct tx_ring *txr = &que->txr; 833 void *head = &txr->base[que->num_desc]; 834 return LE32_TO_CPU(*(volatile __le32 *)head); 835 } 836 837 /********************************************************************** 838 * 839 * Examine each tx_buffer in the used queue. If the hardware is done 840 * processing the packet then free associated resources. The 841 * tx_buffer is put back on the free queue. 842 * 843 **********************************************************************/ 844 bool 845 ixl_txeof(struct ixl_queue *que) 846 { 847 struct tx_ring *txr = &que->txr; 848 u32 first, last, head, done, processed; 849 struct ixl_tx_buf *buf; 850 struct i40e_tx_desc *tx_desc, *eop_desc; 851 852 853 mtx_assert(&txr->mtx, MA_OWNED); 854 855 #ifdef DEV_NETMAP 856 // XXX todo: implement moderation 857 if (netmap_tx_irq(que->vsi->ifp, que->me)) 858 return FALSE; 859 #endif /* DEF_NETMAP */ 860 861 /* These are not the descriptors you seek, move along :) */ 862 if (txr->avail == que->num_desc) { 863 que->busy = 0; 864 return FALSE; 865 } 866 867 processed = 0; 868 first = txr->next_to_clean; 869 buf = &txr->buffers[first]; 870 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 871 last = buf->eop_index; 872 if (last == -1) 873 return FALSE; 874 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 875 876 /* Get the Head WB value */ 877 head = ixl_get_tx_head(que); 878 879 /* 880 ** Get the index of the first descriptor 881 ** BEYOND the EOP and call that 'done'. 882 ** I do this so the comparison in the 883 ** inner while loop below can be simple 884 */ 885 if (++last == que->num_desc) last = 0; 886 done = last; 887 888 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 889 BUS_DMASYNC_POSTREAD); 890 /* 891 ** The HEAD index of the ring is written in a 892 ** defined location, this rather than a done bit 893 ** is what is used to keep track of what must be 894 ** 'cleaned'. 895 */ 896 while (first != head) { 897 /* We clean the range of the packet */ 898 while (first != done) { 899 ++txr->avail; 900 ++processed; 901 902 if (buf->m_head) { 903 txr->bytes += /* for ITR adjustment */ 904 buf->m_head->m_pkthdr.len; 905 txr->tx_bytes += /* for TX stats */ 906 buf->m_head->m_pkthdr.len; 907 bus_dmamap_sync(buf->tag, 908 buf->map, 909 BUS_DMASYNC_POSTWRITE); 910 bus_dmamap_unload(buf->tag, 911 buf->map); 912 m_freem(buf->m_head); 913 buf->m_head = NULL; 914 buf->map = NULL; 915 } 916 buf->eop_index = -1; 917 918 if (++first == que->num_desc) 919 first = 0; 920 921 buf = &txr->buffers[first]; 922 tx_desc = &txr->base[first]; 923 } 924 ++txr->packets; 925 /* See if there is more work now */ 926 last = buf->eop_index; 927 if (last != -1) { 928 eop_desc = &txr->base[last]; 929 /* Get next done point */ 930 if (++last == que->num_desc) last = 0; 931 done = last; 932 } else 933 break; 934 } 935 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 936 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 937 938 txr->next_to_clean = first; 939 940 941 /* 942 ** Hang detection, we know there's 943 ** work outstanding or the first return 944 ** would have been taken, so indicate an 945 ** unsuccessful pass, in local_timer if 946 ** the value is too great the queue will 947 ** be considered hung. If anything has been 948 ** cleaned then reset the state. 949 */ 950 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG)) 951 ++que->busy; 952 953 if (processed) 954 que->busy = 1; /* Note this turns off HUNG */ 955 956 /* 957 * If there are no pending descriptors, clear the timeout. 958 */ 959 if (txr->avail == que->num_desc) { 960 que->busy = 0; 961 return FALSE; 962 } 963 964 return TRUE; 965 } 966 967 /********************************************************************* 968 * 969 * Refresh mbuf buffers for RX descriptor rings 970 * - now keeps its own state so discards due to resource 971 * exhaustion are unnecessary, if an mbuf cannot be obtained 972 * it just returns, keeping its placeholder, thus it can simply 973 * be recalled to try again. 974 * 975 **********************************************************************/ 976 static void 977 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 978 { 979 struct ixl_vsi *vsi = que->vsi; 980 struct rx_ring *rxr = &que->rxr; 981 bus_dma_segment_t hseg[1]; 982 bus_dma_segment_t pseg[1]; 983 struct ixl_rx_buf *buf; 984 struct mbuf *mh, *mp; 985 int i, j, nsegs, error; 986 bool refreshed = FALSE; 987 988 i = j = rxr->next_refresh; 989 /* Control the loop with one beyond */ 990 if (++j == que->num_desc) 991 j = 0; 992 993 while (j != limit) { 994 buf = &rxr->buffers[i]; 995 if (rxr->hdr_split == FALSE) 996 goto no_split; 997 998 if (buf->m_head == NULL) { 999 mh = m_gethdr(M_NOWAIT, MT_DATA); 1000 if (mh == NULL) 1001 goto update; 1002 } else 1003 mh = buf->m_head; 1004 1005 mh->m_pkthdr.len = mh->m_len = MHLEN; 1006 mh->m_len = MHLEN; 1007 mh->m_flags |= M_PKTHDR; 1008 /* Get the memory mapping */ 1009 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1010 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1011 if (error != 0) { 1012 printf("Refresh mbufs: hdr dmamap load" 1013 " failure - %d\n", error); 1014 m_free(mh); 1015 buf->m_head = NULL; 1016 goto update; 1017 } 1018 buf->m_head = mh; 1019 bus_dmamap_sync(rxr->htag, buf->hmap, 1020 BUS_DMASYNC_PREREAD); 1021 rxr->base[i].read.hdr_addr = 1022 htole64(hseg[0].ds_addr); 1023 1024 no_split: 1025 if (buf->m_pack == NULL) { 1026 mp = m_getjcl(M_NOWAIT, MT_DATA, 1027 M_PKTHDR, rxr->mbuf_sz); 1028 if (mp == NULL) 1029 goto update; 1030 } else 1031 mp = buf->m_pack; 1032 1033 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1034 /* Get the memory mapping */ 1035 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1036 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1037 if (error != 0) { 1038 printf("Refresh mbufs: payload dmamap load" 1039 " failure - %d\n", error); 1040 m_free(mp); 1041 buf->m_pack = NULL; 1042 goto update; 1043 } 1044 buf->m_pack = mp; 1045 bus_dmamap_sync(rxr->ptag, buf->pmap, 1046 BUS_DMASYNC_PREREAD); 1047 rxr->base[i].read.pkt_addr = 1048 htole64(pseg[0].ds_addr); 1049 /* Used only when doing header split */ 1050 rxr->base[i].read.hdr_addr = 0; 1051 1052 refreshed = TRUE; 1053 /* Next is precalculated */ 1054 i = j; 1055 rxr->next_refresh = i; 1056 if (++j == que->num_desc) 1057 j = 0; 1058 } 1059 update: 1060 if (refreshed) /* Update hardware tail index */ 1061 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1062 return; 1063 } 1064 1065 1066 /********************************************************************* 1067 * 1068 * Allocate memory for rx_buffer structures. Since we use one 1069 * rx_buffer per descriptor, the maximum number of rx_buffer's 1070 * that we'll need is equal to the number of receive descriptors 1071 * that we've defined. 1072 * 1073 **********************************************************************/ 1074 int 1075 ixl_allocate_rx_data(struct ixl_queue *que) 1076 { 1077 struct rx_ring *rxr = &que->rxr; 1078 struct ixl_vsi *vsi = que->vsi; 1079 device_t dev = vsi->dev; 1080 struct ixl_rx_buf *buf; 1081 int i, bsize, error; 1082 1083 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1084 if (!(rxr->buffers = 1085 (struct ixl_rx_buf *) malloc(bsize, 1086 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1087 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1088 error = ENOMEM; 1089 return (error); 1090 } 1091 1092 if ((error = bus_dma_tag_create(NULL, /* parent */ 1093 1, 0, /* alignment, bounds */ 1094 BUS_SPACE_MAXADDR, /* lowaddr */ 1095 BUS_SPACE_MAXADDR, /* highaddr */ 1096 NULL, NULL, /* filter, filterarg */ 1097 MSIZE, /* maxsize */ 1098 1, /* nsegments */ 1099 MSIZE, /* maxsegsize */ 1100 0, /* flags */ 1101 NULL, /* lockfunc */ 1102 NULL, /* lockfuncarg */ 1103 &rxr->htag))) { 1104 device_printf(dev, "Unable to create RX DMA htag\n"); 1105 return (error); 1106 } 1107 1108 if ((error = bus_dma_tag_create(NULL, /* parent */ 1109 1, 0, /* alignment, bounds */ 1110 BUS_SPACE_MAXADDR, /* lowaddr */ 1111 BUS_SPACE_MAXADDR, /* highaddr */ 1112 NULL, NULL, /* filter, filterarg */ 1113 MJUM16BYTES, /* maxsize */ 1114 1, /* nsegments */ 1115 MJUM16BYTES, /* maxsegsize */ 1116 0, /* flags */ 1117 NULL, /* lockfunc */ 1118 NULL, /* lockfuncarg */ 1119 &rxr->ptag))) { 1120 device_printf(dev, "Unable to create RX DMA ptag\n"); 1121 return (error); 1122 } 1123 1124 for (i = 0; i < que->num_desc; i++) { 1125 buf = &rxr->buffers[i]; 1126 error = bus_dmamap_create(rxr->htag, 1127 BUS_DMA_NOWAIT, &buf->hmap); 1128 if (error) { 1129 device_printf(dev, "Unable to create RX head map\n"); 1130 break; 1131 } 1132 error = bus_dmamap_create(rxr->ptag, 1133 BUS_DMA_NOWAIT, &buf->pmap); 1134 if (error) { 1135 device_printf(dev, "Unable to create RX pkt map\n"); 1136 break; 1137 } 1138 } 1139 1140 return (error); 1141 } 1142 1143 1144 /********************************************************************* 1145 * 1146 * (Re)Initialize the queue receive ring and its buffers. 1147 * 1148 **********************************************************************/ 1149 int 1150 ixl_init_rx_ring(struct ixl_queue *que) 1151 { 1152 struct rx_ring *rxr = &que->rxr; 1153 struct ixl_vsi *vsi = que->vsi; 1154 #if defined(INET6) || defined(INET) 1155 struct ifnet *ifp = vsi->ifp; 1156 struct lro_ctrl *lro = &rxr->lro; 1157 #endif 1158 struct ixl_rx_buf *buf; 1159 bus_dma_segment_t pseg[1], hseg[1]; 1160 int rsize, nsegs, error = 0; 1161 #ifdef DEV_NETMAP 1162 struct netmap_adapter *na = NA(que->vsi->ifp); 1163 struct netmap_slot *slot; 1164 #endif /* DEV_NETMAP */ 1165 1166 IXL_RX_LOCK(rxr); 1167 #ifdef DEV_NETMAP 1168 /* same as in ixl_init_tx_ring() */ 1169 slot = netmap_reset(na, NR_RX, que->me, 0); 1170 #endif /* DEV_NETMAP */ 1171 /* Clear the ring contents */ 1172 rsize = roundup2(que->num_desc * 1173 sizeof(union i40e_rx_desc), DBA_ALIGN); 1174 bzero((void *)rxr->base, rsize); 1175 /* Cleanup any existing buffers */ 1176 for (int i = 0; i < que->num_desc; i++) { 1177 buf = &rxr->buffers[i]; 1178 if (buf->m_head != NULL) { 1179 bus_dmamap_sync(rxr->htag, buf->hmap, 1180 BUS_DMASYNC_POSTREAD); 1181 bus_dmamap_unload(rxr->htag, buf->hmap); 1182 buf->m_head->m_flags |= M_PKTHDR; 1183 m_freem(buf->m_head); 1184 } 1185 if (buf->m_pack != NULL) { 1186 bus_dmamap_sync(rxr->ptag, buf->pmap, 1187 BUS_DMASYNC_POSTREAD); 1188 bus_dmamap_unload(rxr->ptag, buf->pmap); 1189 buf->m_pack->m_flags |= M_PKTHDR; 1190 m_freem(buf->m_pack); 1191 } 1192 buf->m_head = NULL; 1193 buf->m_pack = NULL; 1194 } 1195 1196 /* header split is off */ 1197 rxr->hdr_split = FALSE; 1198 1199 /* Now replenish the mbufs */ 1200 for (int j = 0; j != que->num_desc; ++j) { 1201 struct mbuf *mh, *mp; 1202 1203 buf = &rxr->buffers[j]; 1204 #ifdef DEV_NETMAP 1205 /* 1206 * In netmap mode, fill the map and set the buffer 1207 * address in the NIC ring, considering the offset 1208 * between the netmap and NIC rings (see comment in 1209 * ixgbe_setup_transmit_ring() ). No need to allocate 1210 * an mbuf, so end the block with a continue; 1211 */ 1212 if (slot) { 1213 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1214 uint64_t paddr; 1215 void *addr; 1216 1217 addr = PNMB(na, slot + sj, &paddr); 1218 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1219 /* Update descriptor and the cached value */ 1220 rxr->base[j].read.pkt_addr = htole64(paddr); 1221 rxr->base[j].read.hdr_addr = 0; 1222 continue; 1223 } 1224 #endif /* DEV_NETMAP */ 1225 /* 1226 ** Don't allocate mbufs if not 1227 ** doing header split, its wasteful 1228 */ 1229 if (rxr->hdr_split == FALSE) 1230 goto skip_head; 1231 1232 /* First the header */ 1233 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1234 if (buf->m_head == NULL) { 1235 error = ENOBUFS; 1236 goto fail; 1237 } 1238 m_adj(buf->m_head, ETHER_ALIGN); 1239 mh = buf->m_head; 1240 mh->m_len = mh->m_pkthdr.len = MHLEN; 1241 mh->m_flags |= M_PKTHDR; 1242 /* Get the memory mapping */ 1243 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1244 buf->hmap, buf->m_head, hseg, 1245 &nsegs, BUS_DMA_NOWAIT); 1246 if (error != 0) /* Nothing elegant to do here */ 1247 goto fail; 1248 bus_dmamap_sync(rxr->htag, 1249 buf->hmap, BUS_DMASYNC_PREREAD); 1250 /* Update descriptor */ 1251 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1252 1253 skip_head: 1254 /* Now the payload cluster */ 1255 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1256 M_PKTHDR, rxr->mbuf_sz); 1257 if (buf->m_pack == NULL) { 1258 error = ENOBUFS; 1259 goto fail; 1260 } 1261 mp = buf->m_pack; 1262 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1263 /* Get the memory mapping */ 1264 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1265 buf->pmap, mp, pseg, 1266 &nsegs, BUS_DMA_NOWAIT); 1267 if (error != 0) 1268 goto fail; 1269 bus_dmamap_sync(rxr->ptag, 1270 buf->pmap, BUS_DMASYNC_PREREAD); 1271 /* Update descriptor */ 1272 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1273 rxr->base[j].read.hdr_addr = 0; 1274 } 1275 1276 1277 /* Setup our descriptor indices */ 1278 rxr->next_check = 0; 1279 rxr->next_refresh = 0; 1280 rxr->lro_enabled = FALSE; 1281 rxr->split = 0; 1282 rxr->bytes = 0; 1283 rxr->discard = FALSE; 1284 1285 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1286 ixl_flush(vsi->hw); 1287 1288 #if defined(INET6) || defined(INET) 1289 /* 1290 ** Now set up the LRO interface: 1291 */ 1292 if (ifp->if_capenable & IFCAP_LRO) { 1293 int err = tcp_lro_init(lro); 1294 if (err) { 1295 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1296 goto fail; 1297 } 1298 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1299 rxr->lro_enabled = TRUE; 1300 lro->ifp = vsi->ifp; 1301 } 1302 #endif 1303 1304 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1305 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1306 1307 fail: 1308 IXL_RX_UNLOCK(rxr); 1309 return (error); 1310 } 1311 1312 1313 /********************************************************************* 1314 * 1315 * Free station receive ring data structures 1316 * 1317 **********************************************************************/ 1318 void 1319 ixl_free_que_rx(struct ixl_queue *que) 1320 { 1321 struct rx_ring *rxr = &que->rxr; 1322 struct ixl_rx_buf *buf; 1323 1324 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1325 1326 /* Cleanup any existing buffers */ 1327 if (rxr->buffers != NULL) { 1328 for (int i = 0; i < que->num_desc; i++) { 1329 buf = &rxr->buffers[i]; 1330 if (buf->m_head != NULL) { 1331 bus_dmamap_sync(rxr->htag, buf->hmap, 1332 BUS_DMASYNC_POSTREAD); 1333 bus_dmamap_unload(rxr->htag, buf->hmap); 1334 buf->m_head->m_flags |= M_PKTHDR; 1335 m_freem(buf->m_head); 1336 } 1337 if (buf->m_pack != NULL) { 1338 bus_dmamap_sync(rxr->ptag, buf->pmap, 1339 BUS_DMASYNC_POSTREAD); 1340 bus_dmamap_unload(rxr->ptag, buf->pmap); 1341 buf->m_pack->m_flags |= M_PKTHDR; 1342 m_freem(buf->m_pack); 1343 } 1344 buf->m_head = NULL; 1345 buf->m_pack = NULL; 1346 if (buf->hmap != NULL) { 1347 bus_dmamap_destroy(rxr->htag, buf->hmap); 1348 buf->hmap = NULL; 1349 } 1350 if (buf->pmap != NULL) { 1351 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1352 buf->pmap = NULL; 1353 } 1354 } 1355 if (rxr->buffers != NULL) { 1356 free(rxr->buffers, M_DEVBUF); 1357 rxr->buffers = NULL; 1358 } 1359 } 1360 1361 if (rxr->htag != NULL) { 1362 bus_dma_tag_destroy(rxr->htag); 1363 rxr->htag = NULL; 1364 } 1365 if (rxr->ptag != NULL) { 1366 bus_dma_tag_destroy(rxr->ptag); 1367 rxr->ptag = NULL; 1368 } 1369 1370 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1371 return; 1372 } 1373 1374 static __inline void 1375 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1376 { 1377 1378 #if defined(INET6) || defined(INET) 1379 /* 1380 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1381 * should be computed by hardware. Also it should not have VLAN tag in 1382 * ethernet header. 1383 */ 1384 if (rxr->lro_enabled && 1385 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1386 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1387 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1388 /* 1389 * Send to the stack if: 1390 ** - LRO not enabled, or 1391 ** - no LRO resources, or 1392 ** - lro enqueue fails 1393 */ 1394 if (rxr->lro.lro_cnt != 0) 1395 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1396 return; 1397 } 1398 #endif 1399 IXL_RX_UNLOCK(rxr); 1400 (*ifp->if_input)(ifp, m); 1401 IXL_RX_LOCK(rxr); 1402 } 1403 1404 1405 static __inline void 1406 ixl_rx_discard(struct rx_ring *rxr, int i) 1407 { 1408 struct ixl_rx_buf *rbuf; 1409 1410 rbuf = &rxr->buffers[i]; 1411 1412 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1413 rbuf->fmp->m_flags |= M_PKTHDR; 1414 m_freem(rbuf->fmp); 1415 rbuf->fmp = NULL; 1416 } 1417 1418 /* 1419 ** With advanced descriptors the writeback 1420 ** clobbers the buffer addrs, so its easier 1421 ** to just free the existing mbufs and take 1422 ** the normal refresh path to get new buffers 1423 ** and mapping. 1424 */ 1425 if (rbuf->m_head) { 1426 m_free(rbuf->m_head); 1427 rbuf->m_head = NULL; 1428 } 1429 1430 if (rbuf->m_pack) { 1431 m_free(rbuf->m_pack); 1432 rbuf->m_pack = NULL; 1433 } 1434 1435 return; 1436 } 1437 1438 #ifdef RSS 1439 /* 1440 ** i40e_ptype_to_hash: parse the packet type 1441 ** to determine the appropriate hash. 1442 */ 1443 static inline int 1444 ixl_ptype_to_hash(u8 ptype) 1445 { 1446 struct i40e_rx_ptype_decoded decoded; 1447 u8 ex = 0; 1448 1449 decoded = decode_rx_desc_ptype(ptype); 1450 ex = decoded.outer_frag; 1451 1452 if (!decoded.known) 1453 return M_HASHTYPE_OPAQUE; 1454 1455 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1456 return M_HASHTYPE_OPAQUE; 1457 1458 /* Note: anything that gets to this point is IP */ 1459 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1460 switch (decoded.inner_prot) { 1461 case I40E_RX_PTYPE_INNER_PROT_TCP: 1462 if (ex) 1463 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1464 else 1465 return M_HASHTYPE_RSS_TCP_IPV6; 1466 case I40E_RX_PTYPE_INNER_PROT_UDP: 1467 if (ex) 1468 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1469 else 1470 return M_HASHTYPE_RSS_UDP_IPV6; 1471 default: 1472 if (ex) 1473 return M_HASHTYPE_RSS_IPV6_EX; 1474 else 1475 return M_HASHTYPE_RSS_IPV6; 1476 } 1477 } 1478 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1479 switch (decoded.inner_prot) { 1480 case I40E_RX_PTYPE_INNER_PROT_TCP: 1481 return M_HASHTYPE_RSS_TCP_IPV4; 1482 case I40E_RX_PTYPE_INNER_PROT_UDP: 1483 if (ex) 1484 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1485 else 1486 return M_HASHTYPE_RSS_UDP_IPV4; 1487 default: 1488 return M_HASHTYPE_RSS_IPV4; 1489 } 1490 } 1491 /* We should never get here!! */ 1492 return M_HASHTYPE_OPAQUE; 1493 } 1494 #endif /* RSS */ 1495 1496 /********************************************************************* 1497 * 1498 * This routine executes in interrupt context. It replenishes 1499 * the mbufs in the descriptor and sends data which has been 1500 * dma'ed into host memory to upper layer. 1501 * 1502 * We loop at most count times if count is > 0, or until done if 1503 * count < 0. 1504 * 1505 * Return TRUE for more work, FALSE for all clean. 1506 *********************************************************************/ 1507 bool 1508 ixl_rxeof(struct ixl_queue *que, int count) 1509 { 1510 struct ixl_vsi *vsi = que->vsi; 1511 struct rx_ring *rxr = &que->rxr; 1512 struct ifnet *ifp = vsi->ifp; 1513 #if defined(INET6) || defined(INET) 1514 struct lro_ctrl *lro = &rxr->lro; 1515 struct lro_entry *queued; 1516 #endif 1517 int i, nextp, processed = 0; 1518 union i40e_rx_desc *cur; 1519 struct ixl_rx_buf *rbuf, *nbuf; 1520 1521 1522 IXL_RX_LOCK(rxr); 1523 1524 #ifdef DEV_NETMAP 1525 if (netmap_rx_irq(ifp, que->me, &count)) { 1526 IXL_RX_UNLOCK(rxr); 1527 return (FALSE); 1528 } 1529 #endif /* DEV_NETMAP */ 1530 1531 for (i = rxr->next_check; count != 0;) { 1532 struct mbuf *sendmp, *mh, *mp; 1533 u32 rsc, status, error; 1534 u16 hlen, plen, vtag; 1535 u64 qword; 1536 u8 ptype; 1537 bool eop; 1538 1539 /* Sync the ring. */ 1540 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1541 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1542 1543 cur = &rxr->base[i]; 1544 qword = le64toh(cur->wb.qword1.status_error_len); 1545 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1546 >> I40E_RXD_QW1_STATUS_SHIFT; 1547 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1548 >> I40E_RXD_QW1_ERROR_SHIFT; 1549 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1550 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1551 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1552 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1553 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1554 >> I40E_RXD_QW1_PTYPE_SHIFT; 1555 1556 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1557 ++rxr->not_done; 1558 break; 1559 } 1560 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1561 break; 1562 1563 count--; 1564 sendmp = NULL; 1565 nbuf = NULL; 1566 rsc = 0; 1567 cur->wb.qword1.status_error_len = 0; 1568 rbuf = &rxr->buffers[i]; 1569 mh = rbuf->m_head; 1570 mp = rbuf->m_pack; 1571 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1572 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1573 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1574 else 1575 vtag = 0; 1576 1577 /* 1578 ** Make sure bad packets are discarded, 1579 ** note that only EOP descriptor has valid 1580 ** error results. 1581 */ 1582 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1583 rxr->discarded++; 1584 ixl_rx_discard(rxr, i); 1585 goto next_desc; 1586 } 1587 1588 /* Prefetch the next buffer */ 1589 if (!eop) { 1590 nextp = i + 1; 1591 if (nextp == que->num_desc) 1592 nextp = 0; 1593 nbuf = &rxr->buffers[nextp]; 1594 prefetch(nbuf); 1595 } 1596 1597 /* 1598 ** The header mbuf is ONLY used when header 1599 ** split is enabled, otherwise we get normal 1600 ** behavior, ie, both header and payload 1601 ** are DMA'd into the payload buffer. 1602 ** 1603 ** Rather than using the fmp/lmp global pointers 1604 ** we now keep the head of a packet chain in the 1605 ** buffer struct and pass this along from one 1606 ** descriptor to the next, until we get EOP. 1607 */ 1608 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1609 if (hlen > IXL_RX_HDR) 1610 hlen = IXL_RX_HDR; 1611 mh->m_len = hlen; 1612 mh->m_flags |= M_PKTHDR; 1613 mh->m_next = NULL; 1614 mh->m_pkthdr.len = mh->m_len; 1615 /* Null buf pointer so it is refreshed */ 1616 rbuf->m_head = NULL; 1617 /* 1618 ** Check the payload length, this 1619 ** could be zero if its a small 1620 ** packet. 1621 */ 1622 if (plen > 0) { 1623 mp->m_len = plen; 1624 mp->m_next = NULL; 1625 mp->m_flags &= ~M_PKTHDR; 1626 mh->m_next = mp; 1627 mh->m_pkthdr.len += mp->m_len; 1628 /* Null buf pointer so it is refreshed */ 1629 rbuf->m_pack = NULL; 1630 rxr->split++; 1631 } 1632 /* 1633 ** Now create the forward 1634 ** chain so when complete 1635 ** we wont have to. 1636 */ 1637 if (eop == 0) { 1638 /* stash the chain head */ 1639 nbuf->fmp = mh; 1640 /* Make forward chain */ 1641 if (plen) 1642 mp->m_next = nbuf->m_pack; 1643 else 1644 mh->m_next = nbuf->m_pack; 1645 } else { 1646 /* Singlet, prepare to send */ 1647 sendmp = mh; 1648 if (vtag) { 1649 sendmp->m_pkthdr.ether_vtag = vtag; 1650 sendmp->m_flags |= M_VLANTAG; 1651 } 1652 } 1653 } else { 1654 /* 1655 ** Either no header split, or a 1656 ** secondary piece of a fragmented 1657 ** split packet. 1658 */ 1659 mp->m_len = plen; 1660 /* 1661 ** See if there is a stored head 1662 ** that determines what we are 1663 */ 1664 sendmp = rbuf->fmp; 1665 rbuf->m_pack = rbuf->fmp = NULL; 1666 1667 if (sendmp != NULL) /* secondary frag */ 1668 sendmp->m_pkthdr.len += mp->m_len; 1669 else { 1670 /* first desc of a non-ps chain */ 1671 sendmp = mp; 1672 sendmp->m_flags |= M_PKTHDR; 1673 sendmp->m_pkthdr.len = mp->m_len; 1674 if (vtag) { 1675 sendmp->m_pkthdr.ether_vtag = vtag; 1676 sendmp->m_flags |= M_VLANTAG; 1677 } 1678 } 1679 /* Pass the head pointer on */ 1680 if (eop == 0) { 1681 nbuf->fmp = sendmp; 1682 sendmp = NULL; 1683 mp->m_next = nbuf->m_pack; 1684 } 1685 } 1686 ++processed; 1687 /* Sending this frame? */ 1688 if (eop) { 1689 sendmp->m_pkthdr.rcvif = ifp; 1690 /* gather stats */ 1691 rxr->rx_packets++; 1692 rxr->rx_bytes += sendmp->m_pkthdr.len; 1693 /* capture data for dynamic ITR adjustment */ 1694 rxr->packets++; 1695 rxr->bytes += sendmp->m_pkthdr.len; 1696 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1697 ixl_rx_checksum(sendmp, status, error, ptype); 1698 #ifdef RSS 1699 sendmp->m_pkthdr.flowid = 1700 le32toh(cur->wb.qword0.hi_dword.rss); 1701 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1702 #else 1703 sendmp->m_pkthdr.flowid = que->msix; 1704 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1705 #endif 1706 } 1707 next_desc: 1708 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1709 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1710 1711 /* Advance our pointers to the next descriptor. */ 1712 if (++i == que->num_desc) 1713 i = 0; 1714 1715 /* Now send to the stack or do LRO */ 1716 if (sendmp != NULL) { 1717 rxr->next_check = i; 1718 ixl_rx_input(rxr, ifp, sendmp, ptype); 1719 i = rxr->next_check; 1720 } 1721 1722 /* Every 8 descriptors we go to refresh mbufs */ 1723 if (processed == 8) { 1724 ixl_refresh_mbufs(que, i); 1725 processed = 0; 1726 } 1727 } 1728 1729 /* Refresh any remaining buf structs */ 1730 if (ixl_rx_unrefreshed(que)) 1731 ixl_refresh_mbufs(que, i); 1732 1733 rxr->next_check = i; 1734 1735 #if defined(INET6) || defined(INET) 1736 /* 1737 * Flush any outstanding LRO work 1738 */ 1739 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1740 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1741 tcp_lro_flush(lro, queued); 1742 } 1743 #endif 1744 1745 IXL_RX_UNLOCK(rxr); 1746 return (FALSE); 1747 } 1748 1749 1750 /********************************************************************* 1751 * 1752 * Verify that the hardware indicated that the checksum is valid. 1753 * Inform the stack about the status of checksum so that stack 1754 * doesn't spend time verifying the checksum. 1755 * 1756 *********************************************************************/ 1757 static void 1758 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1759 { 1760 struct i40e_rx_ptype_decoded decoded; 1761 1762 decoded = decode_rx_desc_ptype(ptype); 1763 1764 /* Errors? */ 1765 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1766 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1767 mp->m_pkthdr.csum_flags = 0; 1768 return; 1769 } 1770 1771 /* IPv6 with extension headers likely have bad csum */ 1772 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1773 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1774 if (status & 1775 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1776 mp->m_pkthdr.csum_flags = 0; 1777 return; 1778 } 1779 1780 1781 /* IP Checksum Good */ 1782 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1783 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1784 1785 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1786 mp->m_pkthdr.csum_flags |= 1787 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1788 mp->m_pkthdr.csum_data |= htons(0xffff); 1789 } 1790 return; 1791 } 1792 1793 #if __FreeBSD_version >= 1100000 1794 uint64_t 1795 ixl_get_counter(if_t ifp, ift_counter cnt) 1796 { 1797 struct ixl_vsi *vsi; 1798 1799 vsi = if_getsoftc(ifp); 1800 1801 switch (cnt) { 1802 case IFCOUNTER_IPACKETS: 1803 return (vsi->ipackets); 1804 case IFCOUNTER_IERRORS: 1805 return (vsi->ierrors); 1806 case IFCOUNTER_OPACKETS: 1807 return (vsi->opackets); 1808 case IFCOUNTER_OERRORS: 1809 return (vsi->oerrors); 1810 case IFCOUNTER_COLLISIONS: 1811 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1812 return (0); 1813 case IFCOUNTER_IBYTES: 1814 return (vsi->ibytes); 1815 case IFCOUNTER_OBYTES: 1816 return (vsi->obytes); 1817 case IFCOUNTER_IMCASTS: 1818 return (vsi->imcasts); 1819 case IFCOUNTER_OMCASTS: 1820 return (vsi->omcasts); 1821 case IFCOUNTER_IQDROPS: 1822 return (vsi->iqdrops); 1823 case IFCOUNTER_OQDROPS: 1824 return (vsi->oqdrops); 1825 case IFCOUNTER_NOPROTO: 1826 return (vsi->noproto); 1827 default: 1828 return (if_get_counter_default(ifp, cnt)); 1829 } 1830 } 1831 #endif 1832 1833