1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 67 68 #ifdef DEV_NETMAP 69 #include <dev/netmap/if_ixl_netmap.h> 70 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 71 #endif /* DEV_NETMAP */ 72 73 /* 74 * @key key is saved into this parameter 75 */ 76 void 77 ixl_get_default_rss_key(u32 *key) 78 { 79 MPASS(key != NULL); 80 81 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 82 0x183cfd8c, 0xce880440, 0x580cbc3c, 83 0x35897377, 0x328b25e1, 0x4fa98922, 84 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 85 0x0, 0x0, 0x0}; 86 87 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 88 } 89 90 /* 91 ** Multiqueue Transmit driver 92 */ 93 int 94 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 95 { 96 struct ixl_vsi *vsi = ifp->if_softc; 97 struct ixl_queue *que; 98 struct tx_ring *txr; 99 int err, i; 100 #ifdef RSS 101 u32 bucket_id; 102 #endif 103 104 /* 105 ** Which queue to use: 106 ** 107 ** When doing RSS, map it to the same outbound 108 ** queue as the incoming flow would be mapped to. 109 ** If everything is setup correctly, it should be 110 ** the same bucket that the current CPU we're on is. 111 */ 112 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 113 #ifdef RSS 114 if (rss_hash2bucket(m->m_pkthdr.flowid, 115 M_HASHTYPE_GET(m), &bucket_id) == 0) { 116 i = bucket_id % vsi->num_queues; 117 } else 118 #endif 119 i = m->m_pkthdr.flowid % vsi->num_queues; 120 } else 121 i = curcpu % vsi->num_queues; 122 123 que = &vsi->queues[i]; 124 txr = &que->txr; 125 126 err = drbr_enqueue(ifp, txr->br, m); 127 if (err) 128 return (err); 129 if (IXL_TX_TRYLOCK(txr)) { 130 ixl_mq_start_locked(ifp, txr); 131 IXL_TX_UNLOCK(txr); 132 } else 133 taskqueue_enqueue(que->tq, &que->tx_task); 134 135 return (0); 136 } 137 138 int 139 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 140 { 141 struct ixl_queue *que = txr->que; 142 struct ixl_vsi *vsi = que->vsi; 143 struct mbuf *next; 144 int err = 0; 145 146 147 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 148 vsi->link_active == 0) 149 return (ENETDOWN); 150 151 /* Process the transmit queue */ 152 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 153 if ((err = ixl_xmit(que, &next)) != 0) { 154 if (next == NULL) 155 drbr_advance(ifp, txr->br); 156 else 157 drbr_putback(ifp, txr->br, next); 158 break; 159 } 160 drbr_advance(ifp, txr->br); 161 /* Send a copy of the frame to the BPF listener */ 162 ETHER_BPF_MTAP(ifp, next); 163 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 164 break; 165 } 166 167 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 168 ixl_txeof(que); 169 170 return (err); 171 } 172 173 /* 174 * Called from a taskqueue to drain queued transmit packets. 175 */ 176 void 177 ixl_deferred_mq_start(void *arg, int pending) 178 { 179 struct ixl_queue *que = arg; 180 struct tx_ring *txr = &que->txr; 181 struct ixl_vsi *vsi = que->vsi; 182 struct ifnet *ifp = vsi->ifp; 183 184 IXL_TX_LOCK(txr); 185 if (!drbr_empty(ifp, txr->br)) 186 ixl_mq_start_locked(ifp, txr); 187 IXL_TX_UNLOCK(txr); 188 } 189 190 /* 191 ** Flush all queue ring buffers 192 */ 193 void 194 ixl_qflush(struct ifnet *ifp) 195 { 196 struct ixl_vsi *vsi = ifp->if_softc; 197 198 for (int i = 0; i < vsi->num_queues; i++) { 199 struct ixl_queue *que = &vsi->queues[i]; 200 struct tx_ring *txr = &que->txr; 201 struct mbuf *m; 202 IXL_TX_LOCK(txr); 203 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 204 m_freem(m); 205 IXL_TX_UNLOCK(txr); 206 } 207 if_qflush(ifp); 208 } 209 210 /* 211 ** Find mbuf chains passed to the driver 212 ** that are 'sparse', using more than 8 213 ** mbufs to deliver an mss-size chunk of data 214 */ 215 static inline bool 216 ixl_tso_detect_sparse(struct mbuf *mp) 217 { 218 struct mbuf *m; 219 int num, mss; 220 221 num = 0; 222 mss = mp->m_pkthdr.tso_segsz; 223 224 /* Exclude first mbuf; assume it contains all headers */ 225 for (m = mp->m_next; m != NULL; m = m->m_next) { 226 if (m == NULL) 227 break; 228 num++; 229 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 230 231 if (mss < 1) { 232 if (num > IXL_SPARSE_CHAIN) 233 return (true); 234 num = (mss == 0) ? 0 : 1; 235 mss += mp->m_pkthdr.tso_segsz; 236 } 237 } 238 239 return (false); 240 } 241 242 243 /********************************************************************* 244 * 245 * This routine maps the mbufs to tx descriptors, allowing the 246 * TX engine to transmit the packets. 247 * - return 0 on success, positive on failure 248 * 249 **********************************************************************/ 250 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 251 252 static int 253 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 254 { 255 struct ixl_vsi *vsi = que->vsi; 256 struct i40e_hw *hw = vsi->hw; 257 struct tx_ring *txr = &que->txr; 258 struct ixl_tx_buf *buf; 259 struct i40e_tx_desc *txd = NULL; 260 struct mbuf *m_head, *m; 261 int i, j, error, nsegs; 262 int first, last = 0; 263 u16 vtag = 0; 264 u32 cmd, off; 265 bus_dmamap_t map; 266 bus_dma_tag_t tag; 267 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 268 269 cmd = off = 0; 270 m_head = *m_headp; 271 272 /* 273 * Important to capture the first descriptor 274 * used because it will contain the index of 275 * the one we tell the hardware to report back 276 */ 277 first = txr->next_avail; 278 buf = &txr->buffers[first]; 279 map = buf->map; 280 tag = txr->tx_tag; 281 282 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 283 /* Use larger mapping for TSO */ 284 tag = txr->tso_tag; 285 if (ixl_tso_detect_sparse(m_head)) { 286 m = m_defrag(m_head, M_NOWAIT); 287 if (m == NULL) { 288 m_freem(*m_headp); 289 *m_headp = NULL; 290 return (ENOBUFS); 291 } 292 *m_headp = m; 293 } 294 } 295 296 /* 297 * Map the packet for DMA. 298 */ 299 error = bus_dmamap_load_mbuf_sg(tag, map, 300 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 301 302 if (error == EFBIG) { 303 struct mbuf *m; 304 305 m = m_defrag(*m_headp, M_NOWAIT); 306 if (m == NULL) { 307 que->mbuf_defrag_failed++; 308 m_freem(*m_headp); 309 *m_headp = NULL; 310 return (ENOBUFS); 311 } 312 *m_headp = m; 313 314 /* Try it again */ 315 error = bus_dmamap_load_mbuf_sg(tag, map, 316 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 317 318 if (error != 0) { 319 que->tx_dmamap_failed++; 320 m_freem(*m_headp); 321 *m_headp = NULL; 322 return (error); 323 } 324 } else if (error != 0) { 325 que->tx_dmamap_failed++; 326 m_freem(*m_headp); 327 *m_headp = NULL; 328 return (error); 329 } 330 331 /* Make certain there are enough descriptors */ 332 if (nsegs > txr->avail - 2) { 333 txr->no_desc++; 334 error = ENOBUFS; 335 goto xmit_fail; 336 } 337 m_head = *m_headp; 338 339 /* Set up the TSO/CSUM offload */ 340 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 341 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 342 if (error) 343 goto xmit_fail; 344 } 345 346 cmd |= I40E_TX_DESC_CMD_ICRC; 347 /* Grab the VLAN tag */ 348 if (m_head->m_flags & M_VLANTAG) { 349 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 350 vtag = htole16(m_head->m_pkthdr.ether_vtag); 351 } 352 353 i = txr->next_avail; 354 for (j = 0; j < nsegs; j++) { 355 bus_size_t seglen; 356 357 buf = &txr->buffers[i]; 358 buf->tag = tag; /* Keep track of the type tag */ 359 txd = &txr->base[i]; 360 seglen = segs[j].ds_len; 361 362 txd->buffer_addr = htole64(segs[j].ds_addr); 363 txd->cmd_type_offset_bsz = 364 htole64(I40E_TX_DESC_DTYPE_DATA 365 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 366 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 367 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 368 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 369 370 last = i; /* descriptor that will get completion IRQ */ 371 372 if (++i == que->num_desc) 373 i = 0; 374 375 buf->m_head = NULL; 376 buf->eop_index = -1; 377 } 378 /* Set the last descriptor for report */ 379 txd->cmd_type_offset_bsz |= 380 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 381 txr->avail -= nsegs; 382 txr->next_avail = i; 383 384 buf->m_head = m_head; 385 /* Swap the dma map between the first and last descriptor */ 386 txr->buffers[first].map = buf->map; 387 buf->map = map; 388 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 389 390 /* Set the index of the descriptor that will be marked done */ 391 buf = &txr->buffers[first]; 392 buf->eop_index = last; 393 394 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 395 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 396 /* 397 * Advance the Transmit Descriptor Tail (Tdt), this tells the 398 * hardware that this frame is available to transmit. 399 */ 400 ++txr->total_packets; 401 wr32(hw, txr->tail, i); 402 403 /* Mark outstanding work */ 404 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 405 return (0); 406 407 xmit_fail: 408 bus_dmamap_unload(tag, buf->map); 409 return (error); 410 } 411 412 413 /********************************************************************* 414 * 415 * Allocate memory for tx_buffer structures. The tx_buffer stores all 416 * the information needed to transmit a packet on the wire. This is 417 * called only once at attach, setup is done every reset. 418 * 419 **********************************************************************/ 420 int 421 ixl_allocate_tx_data(struct ixl_queue *que) 422 { 423 struct tx_ring *txr = &que->txr; 424 struct ixl_vsi *vsi = que->vsi; 425 device_t dev = vsi->dev; 426 struct ixl_tx_buf *buf; 427 int error = 0; 428 429 /* 430 * Setup DMA descriptor areas. 431 */ 432 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 433 1, 0, /* alignment, bounds */ 434 BUS_SPACE_MAXADDR, /* lowaddr */ 435 BUS_SPACE_MAXADDR, /* highaddr */ 436 NULL, NULL, /* filter, filterarg */ 437 IXL_TSO_SIZE, /* maxsize */ 438 IXL_MAX_TX_SEGS, /* nsegments */ 439 PAGE_SIZE, /* maxsegsize */ 440 0, /* flags */ 441 NULL, /* lockfunc */ 442 NULL, /* lockfuncarg */ 443 &txr->tx_tag))) { 444 device_printf(dev,"Unable to allocate TX DMA tag\n"); 445 goto fail; 446 } 447 448 /* Make a special tag for TSO */ 449 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 450 1, 0, /* alignment, bounds */ 451 BUS_SPACE_MAXADDR, /* lowaddr */ 452 BUS_SPACE_MAXADDR, /* highaddr */ 453 NULL, NULL, /* filter, filterarg */ 454 IXL_TSO_SIZE, /* maxsize */ 455 IXL_MAX_TSO_SEGS, /* nsegments */ 456 PAGE_SIZE, /* maxsegsize */ 457 0, /* flags */ 458 NULL, /* lockfunc */ 459 NULL, /* lockfuncarg */ 460 &txr->tso_tag))) { 461 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 462 goto fail; 463 } 464 465 if (!(txr->buffers = 466 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 467 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 468 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 469 error = ENOMEM; 470 goto fail; 471 } 472 473 /* Create the descriptor buffer default dma maps */ 474 buf = txr->buffers; 475 for (int i = 0; i < que->num_desc; i++, buf++) { 476 buf->tag = txr->tx_tag; 477 error = bus_dmamap_create(buf->tag, 0, &buf->map); 478 if (error != 0) { 479 device_printf(dev, "Unable to create TX DMA map\n"); 480 goto fail; 481 } 482 } 483 fail: 484 return (error); 485 } 486 487 488 /********************************************************************* 489 * 490 * (Re)Initialize a queue transmit ring. 491 * - called by init, it clears the descriptor ring, 492 * and frees any stale mbufs 493 * 494 **********************************************************************/ 495 void 496 ixl_init_tx_ring(struct ixl_queue *que) 497 { 498 #ifdef DEV_NETMAP 499 struct netmap_adapter *na = NA(que->vsi->ifp); 500 struct netmap_slot *slot; 501 #endif /* DEV_NETMAP */ 502 struct tx_ring *txr = &que->txr; 503 struct ixl_tx_buf *buf; 504 505 /* Clear the old ring contents */ 506 IXL_TX_LOCK(txr); 507 508 #ifdef DEV_NETMAP 509 /* 510 * (under lock): if in netmap mode, do some consistency 511 * checks and set slot to entry 0 of the netmap ring. 512 */ 513 slot = netmap_reset(na, NR_TX, que->me, 0); 514 #endif /* DEV_NETMAP */ 515 516 bzero((void *)txr->base, 517 (sizeof(struct i40e_tx_desc)) * que->num_desc); 518 519 /* Reset indices */ 520 txr->next_avail = 0; 521 txr->next_to_clean = 0; 522 523 /* Reset watchdog status */ 524 txr->watchdog_timer = 0; 525 526 #ifdef IXL_FDIR 527 /* Initialize flow director */ 528 txr->atr_rate = ixl_atr_rate; 529 txr->atr_count = 0; 530 #endif 531 /* Free any existing tx mbufs. */ 532 buf = txr->buffers; 533 for (int i = 0; i < que->num_desc; i++, buf++) { 534 if (buf->m_head != NULL) { 535 bus_dmamap_sync(buf->tag, buf->map, 536 BUS_DMASYNC_POSTWRITE); 537 bus_dmamap_unload(buf->tag, buf->map); 538 m_freem(buf->m_head); 539 buf->m_head = NULL; 540 } 541 #ifdef DEV_NETMAP 542 /* 543 * In netmap mode, set the map for the packet buffer. 544 * NOTE: Some drivers (not this one) also need to set 545 * the physical buffer address in the NIC ring. 546 * netmap_idx_n2k() maps a nic index, i, into the corresponding 547 * netmap slot index, si 548 */ 549 if (slot) { 550 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 551 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 552 } 553 #endif /* DEV_NETMAP */ 554 /* Clear the EOP index */ 555 buf->eop_index = -1; 556 } 557 558 /* Set number of descriptors available */ 559 txr->avail = que->num_desc; 560 561 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 562 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 563 IXL_TX_UNLOCK(txr); 564 } 565 566 567 /********************************************************************* 568 * 569 * Free transmit ring related data structures. 570 * 571 **********************************************************************/ 572 void 573 ixl_free_que_tx(struct ixl_queue *que) 574 { 575 struct tx_ring *txr = &que->txr; 576 struct ixl_tx_buf *buf; 577 578 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 579 580 for (int i = 0; i < que->num_desc; i++) { 581 buf = &txr->buffers[i]; 582 if (buf->m_head != NULL) { 583 bus_dmamap_sync(buf->tag, buf->map, 584 BUS_DMASYNC_POSTWRITE); 585 bus_dmamap_unload(buf->tag, 586 buf->map); 587 m_freem(buf->m_head); 588 buf->m_head = NULL; 589 if (buf->map != NULL) { 590 bus_dmamap_destroy(buf->tag, 591 buf->map); 592 buf->map = NULL; 593 } 594 } else if (buf->map != NULL) { 595 bus_dmamap_unload(buf->tag, 596 buf->map); 597 bus_dmamap_destroy(buf->tag, 598 buf->map); 599 buf->map = NULL; 600 } 601 } 602 if (txr->br != NULL) 603 buf_ring_free(txr->br, M_DEVBUF); 604 if (txr->buffers != NULL) { 605 free(txr->buffers, M_DEVBUF); 606 txr->buffers = NULL; 607 } 608 if (txr->tx_tag != NULL) { 609 bus_dma_tag_destroy(txr->tx_tag); 610 txr->tx_tag = NULL; 611 } 612 if (txr->tso_tag != NULL) { 613 bus_dma_tag_destroy(txr->tso_tag); 614 txr->tso_tag = NULL; 615 } 616 617 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 618 return; 619 } 620 621 /********************************************************************* 622 * 623 * Setup descriptor for hw offloads 624 * 625 **********************************************************************/ 626 627 static int 628 ixl_tx_setup_offload(struct ixl_queue *que, 629 struct mbuf *mp, u32 *cmd, u32 *off) 630 { 631 struct ether_vlan_header *eh; 632 #ifdef INET 633 struct ip *ip = NULL; 634 #endif 635 struct tcphdr *th = NULL; 636 #ifdef INET6 637 struct ip6_hdr *ip6; 638 #endif 639 int elen, ip_hlen = 0, tcp_hlen; 640 u16 etype; 641 u8 ipproto = 0; 642 bool tso = FALSE; 643 644 /* Set up the TSO context descriptor if required */ 645 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 646 tso = ixl_tso_setup(que, mp); 647 if (tso) 648 ++que->tso; 649 else 650 return (ENXIO); 651 } 652 653 /* 654 * Determine where frame payload starts. 655 * Jump over vlan headers if already present, 656 * helpful for QinQ too. 657 */ 658 eh = mtod(mp, struct ether_vlan_header *); 659 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 660 etype = ntohs(eh->evl_proto); 661 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 662 } else { 663 etype = ntohs(eh->evl_encap_proto); 664 elen = ETHER_HDR_LEN; 665 } 666 667 switch (etype) { 668 #ifdef INET 669 case ETHERTYPE_IP: 670 ip = (struct ip *)(mp->m_data + elen); 671 ip_hlen = ip->ip_hl << 2; 672 ipproto = ip->ip_p; 673 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 674 /* The IP checksum must be recalculated with TSO */ 675 if (tso) 676 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 677 else 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 679 break; 680 #endif 681 #ifdef INET6 682 case ETHERTYPE_IPV6: 683 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 684 ip_hlen = sizeof(struct ip6_hdr); 685 ipproto = ip6->ip6_nxt; 686 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 687 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 688 break; 689 #endif 690 default: 691 break; 692 } 693 694 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 695 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 696 697 switch (ipproto) { 698 case IPPROTO_TCP: 699 tcp_hlen = th->th_off << 2; 700 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 701 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 702 *off |= (tcp_hlen >> 2) << 703 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 704 } 705 #ifdef IXL_FDIR 706 ixl_atr(que, th, etype); 707 #endif 708 break; 709 case IPPROTO_UDP: 710 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 711 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 712 *off |= (sizeof(struct udphdr) >> 2) << 713 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 714 } 715 break; 716 717 case IPPROTO_SCTP: 718 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 719 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 720 *off |= (sizeof(struct sctphdr) >> 2) << 721 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 722 } 723 /* Fall Thru */ 724 default: 725 break; 726 } 727 728 return (0); 729 } 730 731 732 /********************************************************************** 733 * 734 * Setup context for hardware segmentation offload (TSO) 735 * 736 **********************************************************************/ 737 static bool 738 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 739 { 740 struct tx_ring *txr = &que->txr; 741 struct i40e_tx_context_desc *TXD; 742 struct ixl_tx_buf *buf; 743 u32 cmd, mss, type, tsolen; 744 u16 etype; 745 int idx, elen, ip_hlen, tcp_hlen; 746 struct ether_vlan_header *eh; 747 #ifdef INET 748 struct ip *ip; 749 #endif 750 #ifdef INET6 751 struct ip6_hdr *ip6; 752 #endif 753 #if defined(INET6) || defined(INET) 754 struct tcphdr *th; 755 #endif 756 u64 type_cmd_tso_mss; 757 758 /* 759 * Determine where frame payload starts. 760 * Jump over vlan headers if already present 761 */ 762 eh = mtod(mp, struct ether_vlan_header *); 763 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 764 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 765 etype = eh->evl_proto; 766 } else { 767 elen = ETHER_HDR_LEN; 768 etype = eh->evl_encap_proto; 769 } 770 771 switch (ntohs(etype)) { 772 #ifdef INET6 773 case ETHERTYPE_IPV6: 774 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 775 if (ip6->ip6_nxt != IPPROTO_TCP) 776 return (ENXIO); 777 ip_hlen = sizeof(struct ip6_hdr); 778 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 779 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 780 tcp_hlen = th->th_off << 2; 781 /* 782 * The corresponding flag is set by the stack in the IPv4 783 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 784 * So, set it here because the rest of the flow requires it. 785 */ 786 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 787 break; 788 #endif 789 #ifdef INET 790 case ETHERTYPE_IP: 791 ip = (struct ip *)(mp->m_data + elen); 792 if (ip->ip_p != IPPROTO_TCP) 793 return (ENXIO); 794 ip->ip_sum = 0; 795 ip_hlen = ip->ip_hl << 2; 796 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 797 th->th_sum = in_pseudo(ip->ip_src.s_addr, 798 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 799 tcp_hlen = th->th_off << 2; 800 break; 801 #endif 802 default: 803 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 804 __func__, ntohs(etype)); 805 return FALSE; 806 } 807 808 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 809 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 810 return FALSE; 811 812 idx = txr->next_avail; 813 buf = &txr->buffers[idx]; 814 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 815 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 816 817 type = I40E_TX_DESC_DTYPE_CONTEXT; 818 cmd = I40E_TX_CTX_DESC_TSO; 819 /* TSO MSS must not be less than 64 */ 820 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 821 que->mss_too_small++; 822 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 823 } 824 mss = mp->m_pkthdr.tso_segsz; 825 826 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 827 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 828 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 829 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 830 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 831 832 TXD->tunneling_params = htole32(0); 833 buf->m_head = NULL; 834 buf->eop_index = -1; 835 836 if (++idx == que->num_desc) 837 idx = 0; 838 839 txr->avail--; 840 txr->next_avail = idx; 841 842 return TRUE; 843 } 844 845 /* 846 ** ixl_get_tx_head - Retrieve the value from the 847 ** location the HW records its HEAD index 848 */ 849 static inline u32 850 ixl_get_tx_head(struct ixl_queue *que) 851 { 852 struct tx_ring *txr = &que->txr; 853 void *head = &txr->base[que->num_desc]; 854 return LE32_TO_CPU(*(volatile __le32 *)head); 855 } 856 857 /********************************************************************** 858 * 859 * Examine each tx_buffer in the used queue. If the hardware is done 860 * processing the packet then free associated resources. The 861 * tx_buffer is put back on the free queue. 862 * 863 **********************************************************************/ 864 bool 865 ixl_txeof(struct ixl_queue *que) 866 { 867 struct tx_ring *txr = &que->txr; 868 u32 first, last, head, done, processed; 869 struct ixl_tx_buf *buf; 870 struct i40e_tx_desc *tx_desc, *eop_desc; 871 872 873 mtx_assert(&txr->mtx, MA_OWNED); 874 875 #ifdef DEV_NETMAP 876 // XXX todo: implement moderation 877 if (netmap_tx_irq(que->vsi->ifp, que->me)) 878 return FALSE; 879 #endif /* DEF_NETMAP */ 880 881 /* These are not the descriptors you seek, move along :) */ 882 if (txr->avail == que->num_desc) { 883 atomic_store_rel_32(&txr->watchdog_timer, 0); 884 return FALSE; 885 } 886 887 processed = 0; 888 first = txr->next_to_clean; 889 buf = &txr->buffers[first]; 890 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 891 last = buf->eop_index; 892 if (last == -1) 893 return FALSE; 894 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 895 896 /* Get the Head WB value */ 897 head = ixl_get_tx_head(que); 898 899 /* 900 ** Get the index of the first descriptor 901 ** BEYOND the EOP and call that 'done'. 902 ** I do this so the comparison in the 903 ** inner while loop below can be simple 904 */ 905 if (++last == que->num_desc) last = 0; 906 done = last; 907 908 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 909 BUS_DMASYNC_POSTREAD); 910 /* 911 ** The HEAD index of the ring is written in a 912 ** defined location, this rather than a done bit 913 ** is what is used to keep track of what must be 914 ** 'cleaned'. 915 */ 916 while (first != head) { 917 /* We clean the range of the packet */ 918 while (first != done) { 919 ++txr->avail; 920 ++processed; 921 922 if (buf->m_head) { 923 txr->bytes += /* for ITR adjustment */ 924 buf->m_head->m_pkthdr.len; 925 txr->tx_bytes += /* for TX stats */ 926 buf->m_head->m_pkthdr.len; 927 bus_dmamap_sync(buf->tag, 928 buf->map, 929 BUS_DMASYNC_POSTWRITE); 930 bus_dmamap_unload(buf->tag, 931 buf->map); 932 m_freem(buf->m_head); 933 buf->m_head = NULL; 934 } 935 buf->eop_index = -1; 936 937 if (++first == que->num_desc) 938 first = 0; 939 940 buf = &txr->buffers[first]; 941 tx_desc = &txr->base[first]; 942 } 943 ++txr->packets; 944 /* See if there is more work now */ 945 last = buf->eop_index; 946 if (last != -1) { 947 eop_desc = &txr->base[last]; 948 /* Get next done point */ 949 if (++last == que->num_desc) last = 0; 950 done = last; 951 } else 952 break; 953 } 954 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 955 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 956 957 txr->next_to_clean = first; 958 959 960 /* 961 * If there are no pending descriptors, clear the timeout. 962 */ 963 if (txr->avail == que->num_desc) { 964 atomic_store_rel_32(&txr->watchdog_timer, 0); 965 return FALSE; 966 } 967 968 return TRUE; 969 } 970 971 /********************************************************************* 972 * 973 * Refresh mbuf buffers for RX descriptor rings 974 * - now keeps its own state so discards due to resource 975 * exhaustion are unnecessary, if an mbuf cannot be obtained 976 * it just returns, keeping its placeholder, thus it can simply 977 * be recalled to try again. 978 * 979 **********************************************************************/ 980 static void 981 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 982 { 983 struct ixl_vsi *vsi = que->vsi; 984 struct rx_ring *rxr = &que->rxr; 985 bus_dma_segment_t hseg[1]; 986 bus_dma_segment_t pseg[1]; 987 struct ixl_rx_buf *buf; 988 struct mbuf *mh, *mp; 989 int i, j, nsegs, error; 990 bool refreshed = FALSE; 991 992 i = j = rxr->next_refresh; 993 /* Control the loop with one beyond */ 994 if (++j == que->num_desc) 995 j = 0; 996 997 while (j != limit) { 998 buf = &rxr->buffers[i]; 999 if (rxr->hdr_split == FALSE) 1000 goto no_split; 1001 1002 if (buf->m_head == NULL) { 1003 mh = m_gethdr(M_NOWAIT, MT_DATA); 1004 if (mh == NULL) 1005 goto update; 1006 } else 1007 mh = buf->m_head; 1008 1009 mh->m_pkthdr.len = mh->m_len = MHLEN; 1010 mh->m_len = MHLEN; 1011 mh->m_flags |= M_PKTHDR; 1012 /* Get the memory mapping */ 1013 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1014 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1015 if (error != 0) { 1016 printf("Refresh mbufs: hdr dmamap load" 1017 " failure - %d\n", error); 1018 m_free(mh); 1019 buf->m_head = NULL; 1020 goto update; 1021 } 1022 buf->m_head = mh; 1023 bus_dmamap_sync(rxr->htag, buf->hmap, 1024 BUS_DMASYNC_PREREAD); 1025 rxr->base[i].read.hdr_addr = 1026 htole64(hseg[0].ds_addr); 1027 1028 no_split: 1029 if (buf->m_pack == NULL) { 1030 mp = m_getjcl(M_NOWAIT, MT_DATA, 1031 M_PKTHDR, rxr->mbuf_sz); 1032 if (mp == NULL) 1033 goto update; 1034 } else 1035 mp = buf->m_pack; 1036 1037 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1038 /* Get the memory mapping */ 1039 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1040 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1041 if (error != 0) { 1042 printf("Refresh mbufs: payload dmamap load" 1043 " failure - %d\n", error); 1044 m_free(mp); 1045 buf->m_pack = NULL; 1046 goto update; 1047 } 1048 buf->m_pack = mp; 1049 bus_dmamap_sync(rxr->ptag, buf->pmap, 1050 BUS_DMASYNC_PREREAD); 1051 rxr->base[i].read.pkt_addr = 1052 htole64(pseg[0].ds_addr); 1053 /* Used only when doing header split */ 1054 rxr->base[i].read.hdr_addr = 0; 1055 1056 refreshed = TRUE; 1057 /* Next is precalculated */ 1058 i = j; 1059 rxr->next_refresh = i; 1060 if (++j == que->num_desc) 1061 j = 0; 1062 } 1063 update: 1064 if (refreshed) /* Update hardware tail index */ 1065 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1066 return; 1067 } 1068 1069 1070 /********************************************************************* 1071 * 1072 * Allocate memory for rx_buffer structures. Since we use one 1073 * rx_buffer per descriptor, the maximum number of rx_buffer's 1074 * that we'll need is equal to the number of receive descriptors 1075 * that we've defined. 1076 * 1077 **********************************************************************/ 1078 int 1079 ixl_allocate_rx_data(struct ixl_queue *que) 1080 { 1081 struct rx_ring *rxr = &que->rxr; 1082 struct ixl_vsi *vsi = que->vsi; 1083 device_t dev = vsi->dev; 1084 struct ixl_rx_buf *buf; 1085 int i, bsize, error; 1086 1087 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1088 if (!(rxr->buffers = 1089 (struct ixl_rx_buf *) malloc(bsize, 1090 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1091 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1092 error = ENOMEM; 1093 return (error); 1094 } 1095 1096 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1097 1, 0, /* alignment, bounds */ 1098 BUS_SPACE_MAXADDR, /* lowaddr */ 1099 BUS_SPACE_MAXADDR, /* highaddr */ 1100 NULL, NULL, /* filter, filterarg */ 1101 MSIZE, /* maxsize */ 1102 1, /* nsegments */ 1103 MSIZE, /* maxsegsize */ 1104 0, /* flags */ 1105 NULL, /* lockfunc */ 1106 NULL, /* lockfuncarg */ 1107 &rxr->htag))) { 1108 device_printf(dev, "Unable to create RX DMA htag\n"); 1109 return (error); 1110 } 1111 1112 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1113 1, 0, /* alignment, bounds */ 1114 BUS_SPACE_MAXADDR, /* lowaddr */ 1115 BUS_SPACE_MAXADDR, /* highaddr */ 1116 NULL, NULL, /* filter, filterarg */ 1117 MJUM16BYTES, /* maxsize */ 1118 1, /* nsegments */ 1119 MJUM16BYTES, /* maxsegsize */ 1120 0, /* flags */ 1121 NULL, /* lockfunc */ 1122 NULL, /* lockfuncarg */ 1123 &rxr->ptag))) { 1124 device_printf(dev, "Unable to create RX DMA ptag\n"); 1125 return (error); 1126 } 1127 1128 for (i = 0; i < que->num_desc; i++) { 1129 buf = &rxr->buffers[i]; 1130 error = bus_dmamap_create(rxr->htag, 1131 BUS_DMA_NOWAIT, &buf->hmap); 1132 if (error) { 1133 device_printf(dev, "Unable to create RX head map\n"); 1134 break; 1135 } 1136 error = bus_dmamap_create(rxr->ptag, 1137 BUS_DMA_NOWAIT, &buf->pmap); 1138 if (error) { 1139 device_printf(dev, "Unable to create RX pkt map\n"); 1140 break; 1141 } 1142 } 1143 1144 return (error); 1145 } 1146 1147 1148 /********************************************************************* 1149 * 1150 * (Re)Initialize the queue receive ring and its buffers. 1151 * 1152 **********************************************************************/ 1153 int 1154 ixl_init_rx_ring(struct ixl_queue *que) 1155 { 1156 struct rx_ring *rxr = &que->rxr; 1157 struct ixl_vsi *vsi = que->vsi; 1158 #if defined(INET6) || defined(INET) 1159 struct ifnet *ifp = vsi->ifp; 1160 struct lro_ctrl *lro = &rxr->lro; 1161 #endif 1162 struct ixl_rx_buf *buf; 1163 bus_dma_segment_t pseg[1], hseg[1]; 1164 int rsize, nsegs, error = 0; 1165 #ifdef DEV_NETMAP 1166 struct netmap_adapter *na = NA(que->vsi->ifp); 1167 struct netmap_slot *slot; 1168 #endif /* DEV_NETMAP */ 1169 1170 IXL_RX_LOCK(rxr); 1171 #ifdef DEV_NETMAP 1172 /* same as in ixl_init_tx_ring() */ 1173 slot = netmap_reset(na, NR_RX, que->me, 0); 1174 #endif /* DEV_NETMAP */ 1175 /* Clear the ring contents */ 1176 rsize = roundup2(que->num_desc * 1177 sizeof(union i40e_rx_desc), DBA_ALIGN); 1178 bzero((void *)rxr->base, rsize); 1179 /* Cleanup any existing buffers */ 1180 for (int i = 0; i < que->num_desc; i++) { 1181 buf = &rxr->buffers[i]; 1182 if (buf->m_head != NULL) { 1183 bus_dmamap_sync(rxr->htag, buf->hmap, 1184 BUS_DMASYNC_POSTREAD); 1185 bus_dmamap_unload(rxr->htag, buf->hmap); 1186 buf->m_head->m_flags |= M_PKTHDR; 1187 m_freem(buf->m_head); 1188 } 1189 if (buf->m_pack != NULL) { 1190 bus_dmamap_sync(rxr->ptag, buf->pmap, 1191 BUS_DMASYNC_POSTREAD); 1192 bus_dmamap_unload(rxr->ptag, buf->pmap); 1193 buf->m_pack->m_flags |= M_PKTHDR; 1194 m_freem(buf->m_pack); 1195 } 1196 buf->m_head = NULL; 1197 buf->m_pack = NULL; 1198 } 1199 1200 /* header split is off */ 1201 rxr->hdr_split = FALSE; 1202 1203 /* Now replenish the mbufs */ 1204 for (int j = 0; j != que->num_desc; ++j) { 1205 struct mbuf *mh, *mp; 1206 1207 buf = &rxr->buffers[j]; 1208 #ifdef DEV_NETMAP 1209 /* 1210 * In netmap mode, fill the map and set the buffer 1211 * address in the NIC ring, considering the offset 1212 * between the netmap and NIC rings (see comment in 1213 * ixgbe_setup_transmit_ring() ). No need to allocate 1214 * an mbuf, so end the block with a continue; 1215 */ 1216 if (slot) { 1217 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1218 uint64_t paddr; 1219 void *addr; 1220 1221 addr = PNMB(na, slot + sj, &paddr); 1222 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1223 /* Update descriptor and the cached value */ 1224 rxr->base[j].read.pkt_addr = htole64(paddr); 1225 rxr->base[j].read.hdr_addr = 0; 1226 continue; 1227 } 1228 #endif /* DEV_NETMAP */ 1229 /* 1230 ** Don't allocate mbufs if not 1231 ** doing header split, its wasteful 1232 */ 1233 if (rxr->hdr_split == FALSE) 1234 goto skip_head; 1235 1236 /* First the header */ 1237 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1238 if (buf->m_head == NULL) { 1239 error = ENOBUFS; 1240 goto fail; 1241 } 1242 m_adj(buf->m_head, ETHER_ALIGN); 1243 mh = buf->m_head; 1244 mh->m_len = mh->m_pkthdr.len = MHLEN; 1245 mh->m_flags |= M_PKTHDR; 1246 /* Get the memory mapping */ 1247 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1248 buf->hmap, buf->m_head, hseg, 1249 &nsegs, BUS_DMA_NOWAIT); 1250 if (error != 0) /* Nothing elegant to do here */ 1251 goto fail; 1252 bus_dmamap_sync(rxr->htag, 1253 buf->hmap, BUS_DMASYNC_PREREAD); 1254 /* Update descriptor */ 1255 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1256 1257 skip_head: 1258 /* Now the payload cluster */ 1259 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1260 M_PKTHDR, rxr->mbuf_sz); 1261 if (buf->m_pack == NULL) { 1262 error = ENOBUFS; 1263 goto fail; 1264 } 1265 mp = buf->m_pack; 1266 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1267 /* Get the memory mapping */ 1268 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1269 buf->pmap, mp, pseg, 1270 &nsegs, BUS_DMA_NOWAIT); 1271 if (error != 0) 1272 goto fail; 1273 bus_dmamap_sync(rxr->ptag, 1274 buf->pmap, BUS_DMASYNC_PREREAD); 1275 /* Update descriptor */ 1276 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1277 rxr->base[j].read.hdr_addr = 0; 1278 } 1279 1280 1281 /* Setup our descriptor indices */ 1282 rxr->next_check = 0; 1283 rxr->next_refresh = 0; 1284 rxr->lro_enabled = FALSE; 1285 rxr->split = 0; 1286 rxr->bytes = 0; 1287 rxr->discard = FALSE; 1288 1289 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1290 ixl_flush(vsi->hw); 1291 1292 #if defined(INET6) || defined(INET) 1293 /* 1294 ** Now set up the LRO interface: 1295 */ 1296 if (ifp->if_capenable & IFCAP_LRO) { 1297 int err = tcp_lro_init(lro); 1298 if (err) { 1299 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1300 goto fail; 1301 } 1302 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1303 rxr->lro_enabled = TRUE; 1304 lro->ifp = vsi->ifp; 1305 } 1306 #endif 1307 1308 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1309 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1310 1311 fail: 1312 IXL_RX_UNLOCK(rxr); 1313 return (error); 1314 } 1315 1316 1317 /********************************************************************* 1318 * 1319 * Free station receive ring data structures 1320 * 1321 **********************************************************************/ 1322 void 1323 ixl_free_que_rx(struct ixl_queue *que) 1324 { 1325 struct rx_ring *rxr = &que->rxr; 1326 struct ixl_rx_buf *buf; 1327 1328 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1329 1330 /* Cleanup any existing buffers */ 1331 if (rxr->buffers != NULL) { 1332 for (int i = 0; i < que->num_desc; i++) { 1333 buf = &rxr->buffers[i]; 1334 if (buf->m_head != NULL) { 1335 bus_dmamap_sync(rxr->htag, buf->hmap, 1336 BUS_DMASYNC_POSTREAD); 1337 bus_dmamap_unload(rxr->htag, buf->hmap); 1338 buf->m_head->m_flags |= M_PKTHDR; 1339 m_freem(buf->m_head); 1340 } 1341 if (buf->m_pack != NULL) { 1342 bus_dmamap_sync(rxr->ptag, buf->pmap, 1343 BUS_DMASYNC_POSTREAD); 1344 bus_dmamap_unload(rxr->ptag, buf->pmap); 1345 buf->m_pack->m_flags |= M_PKTHDR; 1346 m_freem(buf->m_pack); 1347 } 1348 buf->m_head = NULL; 1349 buf->m_pack = NULL; 1350 if (buf->hmap != NULL) { 1351 bus_dmamap_destroy(rxr->htag, buf->hmap); 1352 buf->hmap = NULL; 1353 } 1354 if (buf->pmap != NULL) { 1355 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1356 buf->pmap = NULL; 1357 } 1358 } 1359 if (rxr->buffers != NULL) { 1360 free(rxr->buffers, M_DEVBUF); 1361 rxr->buffers = NULL; 1362 } 1363 } 1364 1365 if (rxr->htag != NULL) { 1366 bus_dma_tag_destroy(rxr->htag); 1367 rxr->htag = NULL; 1368 } 1369 if (rxr->ptag != NULL) { 1370 bus_dma_tag_destroy(rxr->ptag); 1371 rxr->ptag = NULL; 1372 } 1373 1374 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1375 return; 1376 } 1377 1378 static inline void 1379 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1380 { 1381 1382 #if defined(INET6) || defined(INET) 1383 /* 1384 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1385 * should be computed by hardware. Also it should not have VLAN tag in 1386 * ethernet header. 1387 */ 1388 if (rxr->lro_enabled && 1389 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1390 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1391 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1392 /* 1393 * Send to the stack if: 1394 ** - LRO not enabled, or 1395 ** - no LRO resources, or 1396 ** - lro enqueue fails 1397 */ 1398 if (rxr->lro.lro_cnt != 0) 1399 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1400 return; 1401 } 1402 #endif 1403 IXL_RX_UNLOCK(rxr); 1404 (*ifp->if_input)(ifp, m); 1405 IXL_RX_LOCK(rxr); 1406 } 1407 1408 1409 static inline void 1410 ixl_rx_discard(struct rx_ring *rxr, int i) 1411 { 1412 struct ixl_rx_buf *rbuf; 1413 1414 rbuf = &rxr->buffers[i]; 1415 1416 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1417 rbuf->fmp->m_flags |= M_PKTHDR; 1418 m_freem(rbuf->fmp); 1419 rbuf->fmp = NULL; 1420 } 1421 1422 /* 1423 ** With advanced descriptors the writeback 1424 ** clobbers the buffer addrs, so its easier 1425 ** to just free the existing mbufs and take 1426 ** the normal refresh path to get new buffers 1427 ** and mapping. 1428 */ 1429 if (rbuf->m_head) { 1430 m_free(rbuf->m_head); 1431 rbuf->m_head = NULL; 1432 } 1433 1434 if (rbuf->m_pack) { 1435 m_free(rbuf->m_pack); 1436 rbuf->m_pack = NULL; 1437 } 1438 1439 return; 1440 } 1441 1442 #ifdef RSS 1443 /* 1444 ** i40e_ptype_to_hash: parse the packet type 1445 ** to determine the appropriate hash. 1446 */ 1447 static inline int 1448 ixl_ptype_to_hash(u8 ptype) 1449 { 1450 struct i40e_rx_ptype_decoded decoded; 1451 u8 ex = 0; 1452 1453 decoded = decode_rx_desc_ptype(ptype); 1454 ex = decoded.outer_frag; 1455 1456 if (!decoded.known) 1457 return M_HASHTYPE_OPAQUE_HASH; 1458 1459 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1460 return M_HASHTYPE_OPAQUE_HASH; 1461 1462 /* Note: anything that gets to this point is IP */ 1463 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1464 switch (decoded.inner_prot) { 1465 case I40E_RX_PTYPE_INNER_PROT_TCP: 1466 if (ex) 1467 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1468 else 1469 return M_HASHTYPE_RSS_TCP_IPV6; 1470 case I40E_RX_PTYPE_INNER_PROT_UDP: 1471 if (ex) 1472 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1473 else 1474 return M_HASHTYPE_RSS_UDP_IPV6; 1475 default: 1476 if (ex) 1477 return M_HASHTYPE_RSS_IPV6_EX; 1478 else 1479 return M_HASHTYPE_RSS_IPV6; 1480 } 1481 } 1482 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1483 switch (decoded.inner_prot) { 1484 case I40E_RX_PTYPE_INNER_PROT_TCP: 1485 return M_HASHTYPE_RSS_TCP_IPV4; 1486 case I40E_RX_PTYPE_INNER_PROT_UDP: 1487 if (ex) 1488 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1489 else 1490 return M_HASHTYPE_RSS_UDP_IPV4; 1491 default: 1492 return M_HASHTYPE_RSS_IPV4; 1493 } 1494 } 1495 /* We should never get here!! */ 1496 return M_HASHTYPE_OPAQUE_HASH; 1497 } 1498 #endif /* RSS */ 1499 1500 /********************************************************************* 1501 * 1502 * This routine executes in interrupt context. It replenishes 1503 * the mbufs in the descriptor and sends data which has been 1504 * dma'ed into host memory to upper layer. 1505 * 1506 * We loop at most count times if count is > 0, or until done if 1507 * count < 0. 1508 * 1509 * Return TRUE for more work, FALSE for all clean. 1510 *********************************************************************/ 1511 bool 1512 ixl_rxeof(struct ixl_queue *que, int count) 1513 { 1514 struct ixl_vsi *vsi = que->vsi; 1515 struct rx_ring *rxr = &que->rxr; 1516 struct ifnet *ifp = vsi->ifp; 1517 #if defined(INET6) || defined(INET) 1518 struct lro_ctrl *lro = &rxr->lro; 1519 #endif 1520 int i, nextp, processed = 0; 1521 union i40e_rx_desc *cur; 1522 struct ixl_rx_buf *rbuf, *nbuf; 1523 1524 1525 IXL_RX_LOCK(rxr); 1526 1527 #ifdef DEV_NETMAP 1528 if (netmap_rx_irq(ifp, que->me, &count)) { 1529 IXL_RX_UNLOCK(rxr); 1530 return (FALSE); 1531 } 1532 #endif /* DEV_NETMAP */ 1533 1534 for (i = rxr->next_check; count != 0;) { 1535 struct mbuf *sendmp, *mh, *mp; 1536 u32 status, error; 1537 u16 hlen, plen, vtag; 1538 u64 qword; 1539 u8 ptype; 1540 bool eop; 1541 1542 /* Sync the ring. */ 1543 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1544 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1545 1546 cur = &rxr->base[i]; 1547 qword = le64toh(cur->wb.qword1.status_error_len); 1548 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1549 >> I40E_RXD_QW1_STATUS_SHIFT; 1550 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1551 >> I40E_RXD_QW1_ERROR_SHIFT; 1552 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1553 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1554 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1555 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1556 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1557 >> I40E_RXD_QW1_PTYPE_SHIFT; 1558 1559 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1560 ++rxr->not_done; 1561 break; 1562 } 1563 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1564 break; 1565 1566 count--; 1567 sendmp = NULL; 1568 nbuf = NULL; 1569 cur->wb.qword1.status_error_len = 0; 1570 rbuf = &rxr->buffers[i]; 1571 mh = rbuf->m_head; 1572 mp = rbuf->m_pack; 1573 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1574 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1575 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1576 else 1577 vtag = 0; 1578 1579 /* Remove device access to the rx buffers. */ 1580 if (rbuf->m_head != NULL) { 1581 bus_dmamap_sync(rxr->htag, rbuf->hmap, 1582 BUS_DMASYNC_POSTREAD); 1583 bus_dmamap_unload(rxr->htag, rbuf->hmap); 1584 } 1585 if (rbuf->m_pack != NULL) { 1586 bus_dmamap_sync(rxr->ptag, rbuf->pmap, 1587 BUS_DMASYNC_POSTREAD); 1588 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1589 } 1590 1591 /* 1592 ** Make sure bad packets are discarded, 1593 ** note that only EOP descriptor has valid 1594 ** error results. 1595 */ 1596 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1597 rxr->desc_errs++; 1598 ixl_rx_discard(rxr, i); 1599 goto next_desc; 1600 } 1601 1602 /* Prefetch the next buffer */ 1603 if (!eop) { 1604 nextp = i + 1; 1605 if (nextp == que->num_desc) 1606 nextp = 0; 1607 nbuf = &rxr->buffers[nextp]; 1608 prefetch(nbuf); 1609 } 1610 1611 /* 1612 ** The header mbuf is ONLY used when header 1613 ** split is enabled, otherwise we get normal 1614 ** behavior, ie, both header and payload 1615 ** are DMA'd into the payload buffer. 1616 ** 1617 ** Rather than using the fmp/lmp global pointers 1618 ** we now keep the head of a packet chain in the 1619 ** buffer struct and pass this along from one 1620 ** descriptor to the next, until we get EOP. 1621 */ 1622 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1623 if (hlen > IXL_RX_HDR) 1624 hlen = IXL_RX_HDR; 1625 mh->m_len = hlen; 1626 mh->m_flags |= M_PKTHDR; 1627 mh->m_next = NULL; 1628 mh->m_pkthdr.len = mh->m_len; 1629 /* Null buf pointer so it is refreshed */ 1630 rbuf->m_head = NULL; 1631 /* 1632 ** Check the payload length, this 1633 ** could be zero if its a small 1634 ** packet. 1635 */ 1636 if (plen > 0) { 1637 mp->m_len = plen; 1638 mp->m_next = NULL; 1639 mp->m_flags &= ~M_PKTHDR; 1640 mh->m_next = mp; 1641 mh->m_pkthdr.len += mp->m_len; 1642 /* Null buf pointer so it is refreshed */ 1643 rbuf->m_pack = NULL; 1644 rxr->split++; 1645 } 1646 /* 1647 ** Now create the forward 1648 ** chain so when complete 1649 ** we wont have to. 1650 */ 1651 if (eop == 0) { 1652 /* stash the chain head */ 1653 nbuf->fmp = mh; 1654 /* Make forward chain */ 1655 if (plen) 1656 mp->m_next = nbuf->m_pack; 1657 else 1658 mh->m_next = nbuf->m_pack; 1659 } else { 1660 /* Singlet, prepare to send */ 1661 sendmp = mh; 1662 if (vtag) { 1663 sendmp->m_pkthdr.ether_vtag = vtag; 1664 sendmp->m_flags |= M_VLANTAG; 1665 } 1666 } 1667 } else { 1668 /* 1669 ** Either no header split, or a 1670 ** secondary piece of a fragmented 1671 ** split packet. 1672 */ 1673 mp->m_len = plen; 1674 /* 1675 ** See if there is a stored head 1676 ** that determines what we are 1677 */ 1678 sendmp = rbuf->fmp; 1679 rbuf->m_pack = rbuf->fmp = NULL; 1680 1681 if (sendmp != NULL) /* secondary frag */ 1682 sendmp->m_pkthdr.len += mp->m_len; 1683 else { 1684 /* first desc of a non-ps chain */ 1685 sendmp = mp; 1686 sendmp->m_flags |= M_PKTHDR; 1687 sendmp->m_pkthdr.len = mp->m_len; 1688 } 1689 /* Pass the head pointer on */ 1690 if (eop == 0) { 1691 nbuf->fmp = sendmp; 1692 sendmp = NULL; 1693 mp->m_next = nbuf->m_pack; 1694 } 1695 } 1696 ++processed; 1697 /* Sending this frame? */ 1698 if (eop) { 1699 sendmp->m_pkthdr.rcvif = ifp; 1700 /* gather stats */ 1701 rxr->rx_packets++; 1702 rxr->rx_bytes += sendmp->m_pkthdr.len; 1703 /* capture data for dynamic ITR adjustment */ 1704 rxr->packets++; 1705 rxr->bytes += sendmp->m_pkthdr.len; 1706 /* Set VLAN tag (field only valid in eop desc) */ 1707 if (vtag) { 1708 sendmp->m_pkthdr.ether_vtag = vtag; 1709 sendmp->m_flags |= M_VLANTAG; 1710 } 1711 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1712 ixl_rx_checksum(sendmp, status, error, ptype); 1713 #ifdef RSS 1714 sendmp->m_pkthdr.flowid = 1715 le32toh(cur->wb.qword0.hi_dword.rss); 1716 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1717 #else 1718 sendmp->m_pkthdr.flowid = que->msix; 1719 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1720 #endif 1721 } 1722 next_desc: 1723 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1724 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1725 1726 /* Advance our pointers to the next descriptor. */ 1727 if (++i == que->num_desc) 1728 i = 0; 1729 1730 /* Now send to the stack or do LRO */ 1731 if (sendmp != NULL) { 1732 rxr->next_check = i; 1733 ixl_rx_input(rxr, ifp, sendmp, ptype); 1734 i = rxr->next_check; 1735 } 1736 1737 /* Every 8 descriptors we go to refresh mbufs */ 1738 if (processed == 8) { 1739 ixl_refresh_mbufs(que, i); 1740 processed = 0; 1741 } 1742 } 1743 1744 /* Refresh any remaining buf structs */ 1745 if (ixl_rx_unrefreshed(que)) 1746 ixl_refresh_mbufs(que, i); 1747 1748 rxr->next_check = i; 1749 1750 #if defined(INET6) || defined(INET) 1751 /* 1752 * Flush any outstanding LRO work 1753 */ 1754 #if __FreeBSD_version >= 1100105 1755 tcp_lro_flush_all(lro); 1756 #else 1757 struct lro_entry *queued; 1758 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1759 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1760 tcp_lro_flush(lro, queued); 1761 } 1762 #endif 1763 #endif /* defined(INET6) || defined(INET) */ 1764 1765 IXL_RX_UNLOCK(rxr); 1766 return (FALSE); 1767 } 1768 1769 1770 /********************************************************************* 1771 * 1772 * Verify that the hardware indicated that the checksum is valid. 1773 * Inform the stack about the status of checksum so that stack 1774 * doesn't spend time verifying the checksum. 1775 * 1776 *********************************************************************/ 1777 static void 1778 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1779 { 1780 struct i40e_rx_ptype_decoded decoded; 1781 1782 decoded = decode_rx_desc_ptype(ptype); 1783 1784 /* Errors? */ 1785 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1786 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1787 mp->m_pkthdr.csum_flags = 0; 1788 return; 1789 } 1790 1791 /* IPv6 with extension headers likely have bad csum */ 1792 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1793 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1794 if (status & 1795 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1796 mp->m_pkthdr.csum_flags = 0; 1797 return; 1798 } 1799 1800 1801 /* IP Checksum Good */ 1802 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1803 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1804 1805 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1806 mp->m_pkthdr.csum_flags |= 1807 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1808 mp->m_pkthdr.csum_data |= htons(0xffff); 1809 } 1810 return; 1811 } 1812 1813 #if __FreeBSD_version >= 1100000 1814 uint64_t 1815 ixl_get_counter(if_t ifp, ift_counter cnt) 1816 { 1817 struct ixl_vsi *vsi; 1818 1819 vsi = if_getsoftc(ifp); 1820 1821 switch (cnt) { 1822 case IFCOUNTER_IPACKETS: 1823 return (vsi->ipackets); 1824 case IFCOUNTER_IERRORS: 1825 return (vsi->ierrors); 1826 case IFCOUNTER_OPACKETS: 1827 return (vsi->opackets); 1828 case IFCOUNTER_OERRORS: 1829 return (vsi->oerrors); 1830 case IFCOUNTER_COLLISIONS: 1831 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1832 return (0); 1833 case IFCOUNTER_IBYTES: 1834 return (vsi->ibytes); 1835 case IFCOUNTER_OBYTES: 1836 return (vsi->obytes); 1837 case IFCOUNTER_IMCASTS: 1838 return (vsi->imcasts); 1839 case IFCOUNTER_OMCASTS: 1840 return (vsi->omcasts); 1841 case IFCOUNTER_IQDROPS: 1842 return (vsi->iqdrops); 1843 case IFCOUNTER_OQDROPS: 1844 return (vsi->oqdrops); 1845 case IFCOUNTER_NOPROTO: 1846 return (vsi->noproto); 1847 default: 1848 return (if_get_counter_default(ifp, cnt)); 1849 } 1850 } 1851 #endif 1852 1853