1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static int ixl_tx_setup_offload(struct ixl_queue *que, 67 struct mbuf *mp, u32 *cmd, u32 *off); 68 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 69 70 #ifdef DEV_NETMAP 71 #include <dev/netmap/if_ixl_netmap.h> 72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 73 #endif /* DEV_NETMAP */ 74 75 /* 76 * @key key is saved into this parameter 77 */ 78 void 79 ixl_get_default_rss_key(u32 *key) 80 { 81 MPASS(key != NULL); 82 83 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 84 0x183cfd8c, 0xce880440, 0x580cbc3c, 85 0x35897377, 0x328b25e1, 0x4fa98922, 86 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 87 0x0, 0x0, 0x0}; 88 89 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 90 } 91 92 /* 93 ** Multiqueue Transmit driver 94 */ 95 int 96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 97 { 98 struct ixl_vsi *vsi = ifp->if_softc; 99 struct ixl_queue *que; 100 struct tx_ring *txr; 101 int err, i; 102 #ifdef RSS 103 u32 bucket_id; 104 #endif 105 106 /* 107 ** Which queue to use: 108 ** 109 ** When doing RSS, map it to the same outbound 110 ** queue as the incoming flow would be mapped to. 111 ** If everything is setup correctly, it should be 112 ** the same bucket that the current CPU we're on is. 113 */ 114 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 115 #ifdef RSS 116 if (rss_hash2bucket(m->m_pkthdr.flowid, 117 M_HASHTYPE_GET(m), &bucket_id) == 0) { 118 i = bucket_id % vsi->num_queues; 119 } else 120 #endif 121 i = m->m_pkthdr.flowid % vsi->num_queues; 122 } else 123 i = curcpu % vsi->num_queues; 124 125 que = &vsi->queues[i]; 126 txr = &que->txr; 127 128 err = drbr_enqueue(ifp, txr->br, m); 129 if (err) 130 return (err); 131 if (IXL_TX_TRYLOCK(txr)) { 132 ixl_mq_start_locked(ifp, txr); 133 IXL_TX_UNLOCK(txr); 134 } else 135 taskqueue_enqueue(que->tq, &que->tx_task); 136 137 return (0); 138 } 139 140 int 141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 142 { 143 struct ixl_queue *que = txr->que; 144 struct ixl_vsi *vsi = que->vsi; 145 struct mbuf *next; 146 int err = 0; 147 148 149 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 150 vsi->link_active == 0) 151 return (ENETDOWN); 152 153 /* Process the transmit queue */ 154 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 155 if ((err = ixl_xmit(que, &next)) != 0) { 156 if (next == NULL) 157 drbr_advance(ifp, txr->br); 158 else 159 drbr_putback(ifp, txr->br, next); 160 break; 161 } 162 drbr_advance(ifp, txr->br); 163 /* Send a copy of the frame to the BPF listener */ 164 ETHER_BPF_MTAP(ifp, next); 165 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 166 break; 167 } 168 169 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 170 ixl_txeof(que); 171 172 return (err); 173 } 174 175 /* 176 * Called from a taskqueue to drain queued transmit packets. 177 */ 178 void 179 ixl_deferred_mq_start(void *arg, int pending) 180 { 181 struct ixl_queue *que = arg; 182 struct tx_ring *txr = &que->txr; 183 struct ixl_vsi *vsi = que->vsi; 184 struct ifnet *ifp = vsi->ifp; 185 186 IXL_TX_LOCK(txr); 187 if (!drbr_empty(ifp, txr->br)) 188 ixl_mq_start_locked(ifp, txr); 189 IXL_TX_UNLOCK(txr); 190 } 191 192 /* 193 ** Flush all queue ring buffers 194 */ 195 void 196 ixl_qflush(struct ifnet *ifp) 197 { 198 struct ixl_vsi *vsi = ifp->if_softc; 199 200 for (int i = 0; i < vsi->num_queues; i++) { 201 struct ixl_queue *que = &vsi->queues[i]; 202 struct tx_ring *txr = &que->txr; 203 struct mbuf *m; 204 IXL_TX_LOCK(txr); 205 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 206 m_freem(m); 207 IXL_TX_UNLOCK(txr); 208 } 209 if_qflush(ifp); 210 } 211 212 /* 213 ** Find mbuf chains passed to the driver 214 ** that are 'sparse', using more than 8 215 ** mbufs to deliver an mss-size chunk of data 216 */ 217 static inline bool 218 ixl_tso_detect_sparse(struct mbuf *mp) 219 { 220 struct mbuf *m; 221 int num, mss; 222 223 num = 0; 224 mss = mp->m_pkthdr.tso_segsz; 225 226 /* Exclude first mbuf; assume it contains all headers */ 227 for (m = mp->m_next; m != NULL; m = m->m_next) { 228 if (m == NULL) 229 break; 230 num++; 231 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 232 233 if (mss < 1) { 234 if (num > IXL_SPARSE_CHAIN) 235 return (true); 236 num = (mss == 0) ? 0 : 1; 237 mss += mp->m_pkthdr.tso_segsz; 238 } 239 } 240 241 return (false); 242 } 243 244 245 /********************************************************************* 246 * 247 * This routine maps the mbufs to tx descriptors, allowing the 248 * TX engine to transmit the packets. 249 * - return 0 on success, positive on failure 250 * 251 **********************************************************************/ 252 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 253 254 static int 255 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 256 { 257 struct ixl_vsi *vsi = que->vsi; 258 struct i40e_hw *hw = vsi->hw; 259 struct tx_ring *txr = &que->txr; 260 struct ixl_tx_buf *buf; 261 struct i40e_tx_desc *txd = NULL; 262 struct mbuf *m_head, *m; 263 int i, j, error, nsegs; 264 int first, last = 0; 265 u16 vtag = 0; 266 u32 cmd, off; 267 bus_dmamap_t map; 268 bus_dma_tag_t tag; 269 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 270 271 cmd = off = 0; 272 m_head = *m_headp; 273 274 /* 275 * Important to capture the first descriptor 276 * used because it will contain the index of 277 * the one we tell the hardware to report back 278 */ 279 first = txr->next_avail; 280 buf = &txr->buffers[first]; 281 map = buf->map; 282 tag = txr->tx_tag; 283 284 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 285 /* Use larger mapping for TSO */ 286 tag = txr->tso_tag; 287 if (ixl_tso_detect_sparse(m_head)) { 288 m = m_defrag(m_head, M_NOWAIT); 289 if (m == NULL) { 290 m_freem(*m_headp); 291 *m_headp = NULL; 292 return (ENOBUFS); 293 } 294 *m_headp = m; 295 } 296 } 297 298 /* 299 * Map the packet for DMA. 300 */ 301 error = bus_dmamap_load_mbuf_sg(tag, map, 302 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 303 304 if (error == EFBIG) { 305 struct mbuf *m; 306 307 m = m_defrag(*m_headp, M_NOWAIT); 308 if (m == NULL) { 309 que->mbuf_defrag_failed++; 310 m_freem(*m_headp); 311 *m_headp = NULL; 312 return (ENOBUFS); 313 } 314 *m_headp = m; 315 316 /* Try it again */ 317 error = bus_dmamap_load_mbuf_sg(tag, map, 318 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 319 320 if (error != 0) { 321 que->tx_dmamap_failed++; 322 m_freem(*m_headp); 323 *m_headp = NULL; 324 return (error); 325 } 326 } else if (error != 0) { 327 que->tx_dmamap_failed++; 328 m_freem(*m_headp); 329 *m_headp = NULL; 330 return (error); 331 } 332 333 /* Make certain there are enough descriptors */ 334 if (nsegs > txr->avail - 2) { 335 txr->no_desc++; 336 error = ENOBUFS; 337 goto xmit_fail; 338 } 339 m_head = *m_headp; 340 341 /* Set up the TSO/CSUM offload */ 342 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 343 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 344 if (error) 345 goto xmit_fail; 346 } 347 348 cmd |= I40E_TX_DESC_CMD_ICRC; 349 /* Grab the VLAN tag */ 350 if (m_head->m_flags & M_VLANTAG) { 351 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 352 vtag = htole16(m_head->m_pkthdr.ether_vtag); 353 } 354 355 i = txr->next_avail; 356 for (j = 0; j < nsegs; j++) { 357 bus_size_t seglen; 358 359 buf = &txr->buffers[i]; 360 buf->tag = tag; /* Keep track of the type tag */ 361 txd = &txr->base[i]; 362 seglen = segs[j].ds_len; 363 364 txd->buffer_addr = htole64(segs[j].ds_addr); 365 txd->cmd_type_offset_bsz = 366 htole64(I40E_TX_DESC_DTYPE_DATA 367 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 368 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 369 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 370 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 371 372 last = i; /* descriptor that will get completion IRQ */ 373 374 if (++i == que->num_desc) 375 i = 0; 376 377 buf->m_head = NULL; 378 buf->eop_index = -1; 379 } 380 /* Set the last descriptor for report */ 381 txd->cmd_type_offset_bsz |= 382 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 383 txr->avail -= nsegs; 384 txr->next_avail = i; 385 386 buf->m_head = m_head; 387 /* Swap the dma map between the first and last descriptor */ 388 txr->buffers[first].map = buf->map; 389 buf->map = map; 390 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 391 392 /* Set the index of the descriptor that will be marked done */ 393 buf = &txr->buffers[first]; 394 buf->eop_index = last; 395 396 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 397 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 398 /* 399 * Advance the Transmit Descriptor Tail (Tdt), this tells the 400 * hardware that this frame is available to transmit. 401 */ 402 ++txr->total_packets; 403 wr32(hw, txr->tail, i); 404 405 /* Mark outstanding work */ 406 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 407 return (0); 408 409 xmit_fail: 410 bus_dmamap_unload(tag, buf->map); 411 return (error); 412 } 413 414 415 /********************************************************************* 416 * 417 * Allocate memory for tx_buffer structures. The tx_buffer stores all 418 * the information needed to transmit a packet on the wire. This is 419 * called only once at attach, setup is done every reset. 420 * 421 **********************************************************************/ 422 int 423 ixl_allocate_tx_data(struct ixl_queue *que) 424 { 425 struct tx_ring *txr = &que->txr; 426 struct ixl_vsi *vsi = que->vsi; 427 device_t dev = vsi->dev; 428 struct ixl_tx_buf *buf; 429 int error = 0; 430 431 /* 432 * Setup DMA descriptor areas. 433 */ 434 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 435 1, 0, /* alignment, bounds */ 436 BUS_SPACE_MAXADDR, /* lowaddr */ 437 BUS_SPACE_MAXADDR, /* highaddr */ 438 NULL, NULL, /* filter, filterarg */ 439 IXL_TSO_SIZE, /* maxsize */ 440 IXL_MAX_TX_SEGS, /* nsegments */ 441 PAGE_SIZE, /* maxsegsize */ 442 0, /* flags */ 443 NULL, /* lockfunc */ 444 NULL, /* lockfuncarg */ 445 &txr->tx_tag))) { 446 device_printf(dev,"Unable to allocate TX DMA tag\n"); 447 goto fail; 448 } 449 450 /* Make a special tag for TSO */ 451 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 452 1, 0, /* alignment, bounds */ 453 BUS_SPACE_MAXADDR, /* lowaddr */ 454 BUS_SPACE_MAXADDR, /* highaddr */ 455 NULL, NULL, /* filter, filterarg */ 456 IXL_TSO_SIZE, /* maxsize */ 457 IXL_MAX_TSO_SEGS, /* nsegments */ 458 PAGE_SIZE, /* maxsegsize */ 459 0, /* flags */ 460 NULL, /* lockfunc */ 461 NULL, /* lockfuncarg */ 462 &txr->tso_tag))) { 463 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 464 goto fail; 465 } 466 467 if (!(txr->buffers = 468 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 469 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 470 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 471 error = ENOMEM; 472 goto fail; 473 } 474 475 /* Create the descriptor buffer default dma maps */ 476 buf = txr->buffers; 477 for (int i = 0; i < que->num_desc; i++, buf++) { 478 buf->tag = txr->tx_tag; 479 error = bus_dmamap_create(buf->tag, 0, &buf->map); 480 if (error != 0) { 481 device_printf(dev, "Unable to create TX DMA map\n"); 482 goto fail; 483 } 484 } 485 fail: 486 return (error); 487 } 488 489 490 /********************************************************************* 491 * 492 * (Re)Initialize a queue transmit ring. 493 * - called by init, it clears the descriptor ring, 494 * and frees any stale mbufs 495 * 496 **********************************************************************/ 497 void 498 ixl_init_tx_ring(struct ixl_queue *que) 499 { 500 #ifdef DEV_NETMAP 501 struct netmap_adapter *na = NA(que->vsi->ifp); 502 struct netmap_slot *slot; 503 #endif /* DEV_NETMAP */ 504 struct tx_ring *txr = &que->txr; 505 struct ixl_tx_buf *buf; 506 507 /* Clear the old ring contents */ 508 IXL_TX_LOCK(txr); 509 510 #ifdef DEV_NETMAP 511 /* 512 * (under lock): if in netmap mode, do some consistency 513 * checks and set slot to entry 0 of the netmap ring. 514 */ 515 slot = netmap_reset(na, NR_TX, que->me, 0); 516 #endif /* DEV_NETMAP */ 517 518 bzero((void *)txr->base, 519 (sizeof(struct i40e_tx_desc)) * que->num_desc); 520 521 /* Reset indices */ 522 txr->next_avail = 0; 523 txr->next_to_clean = 0; 524 525 /* Reset watchdog status */ 526 txr->watchdog_timer = 0; 527 528 #ifdef IXL_FDIR 529 /* Initialize flow director */ 530 txr->atr_rate = ixl_atr_rate; 531 txr->atr_count = 0; 532 #endif 533 /* Free any existing tx mbufs. */ 534 buf = txr->buffers; 535 for (int i = 0; i < que->num_desc; i++, buf++) { 536 if (buf->m_head != NULL) { 537 bus_dmamap_sync(buf->tag, buf->map, 538 BUS_DMASYNC_POSTWRITE); 539 bus_dmamap_unload(buf->tag, buf->map); 540 m_freem(buf->m_head); 541 buf->m_head = NULL; 542 } 543 #ifdef DEV_NETMAP 544 /* 545 * In netmap mode, set the map for the packet buffer. 546 * NOTE: Some drivers (not this one) also need to set 547 * the physical buffer address in the NIC ring. 548 * netmap_idx_n2k() maps a nic index, i, into the corresponding 549 * netmap slot index, si 550 */ 551 if (slot) { 552 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 553 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 554 } 555 #endif /* DEV_NETMAP */ 556 /* Clear the EOP index */ 557 buf->eop_index = -1; 558 } 559 560 /* Set number of descriptors available */ 561 txr->avail = que->num_desc; 562 563 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 564 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 565 IXL_TX_UNLOCK(txr); 566 } 567 568 569 /********************************************************************* 570 * 571 * Free transmit ring related data structures. 572 * 573 **********************************************************************/ 574 void 575 ixl_free_que_tx(struct ixl_queue *que) 576 { 577 struct tx_ring *txr = &que->txr; 578 struct ixl_tx_buf *buf; 579 580 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 581 582 for (int i = 0; i < que->num_desc; i++) { 583 buf = &txr->buffers[i]; 584 if (buf->m_head != NULL) { 585 bus_dmamap_sync(buf->tag, buf->map, 586 BUS_DMASYNC_POSTWRITE); 587 bus_dmamap_unload(buf->tag, 588 buf->map); 589 m_freem(buf->m_head); 590 buf->m_head = NULL; 591 if (buf->map != NULL) { 592 bus_dmamap_destroy(buf->tag, 593 buf->map); 594 buf->map = NULL; 595 } 596 } else if (buf->map != NULL) { 597 bus_dmamap_unload(buf->tag, 598 buf->map); 599 bus_dmamap_destroy(buf->tag, 600 buf->map); 601 buf->map = NULL; 602 } 603 } 604 if (txr->br != NULL) 605 buf_ring_free(txr->br, M_DEVBUF); 606 if (txr->buffers != NULL) { 607 free(txr->buffers, M_DEVBUF); 608 txr->buffers = NULL; 609 } 610 if (txr->tx_tag != NULL) { 611 bus_dma_tag_destroy(txr->tx_tag); 612 txr->tx_tag = NULL; 613 } 614 if (txr->tso_tag != NULL) { 615 bus_dma_tag_destroy(txr->tso_tag); 616 txr->tso_tag = NULL; 617 } 618 619 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 620 return; 621 } 622 623 /********************************************************************* 624 * 625 * Setup descriptor for hw offloads 626 * 627 **********************************************************************/ 628 629 static int 630 ixl_tx_setup_offload(struct ixl_queue *que, 631 struct mbuf *mp, u32 *cmd, u32 *off) 632 { 633 struct ether_vlan_header *eh; 634 #ifdef INET 635 struct ip *ip = NULL; 636 #endif 637 struct tcphdr *th = NULL; 638 #ifdef INET6 639 struct ip6_hdr *ip6; 640 #endif 641 int elen, ip_hlen = 0, tcp_hlen; 642 u16 etype; 643 u8 ipproto = 0; 644 bool tso = FALSE; 645 646 /* Set up the TSO context descriptor if required */ 647 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 648 tso = ixl_tso_setup(que, mp); 649 if (tso) 650 ++que->tso; 651 else 652 return (ENXIO); 653 } 654 655 /* 656 * Determine where frame payload starts. 657 * Jump over vlan headers if already present, 658 * helpful for QinQ too. 659 */ 660 eh = mtod(mp, struct ether_vlan_header *); 661 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 662 etype = ntohs(eh->evl_proto); 663 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 664 } else { 665 etype = ntohs(eh->evl_encap_proto); 666 elen = ETHER_HDR_LEN; 667 } 668 669 switch (etype) { 670 #ifdef INET 671 case ETHERTYPE_IP: 672 ip = (struct ip *)(mp->m_data + elen); 673 ip_hlen = ip->ip_hl << 2; 674 ipproto = ip->ip_p; 675 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 676 /* The IP checksum must be recalculated with TSO */ 677 if (tso) 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 679 else 680 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 681 break; 682 #endif 683 #ifdef INET6 684 case ETHERTYPE_IPV6: 685 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 686 ip_hlen = sizeof(struct ip6_hdr); 687 ipproto = ip6->ip6_nxt; 688 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 689 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 690 break; 691 #endif 692 default: 693 break; 694 } 695 696 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 697 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 698 699 switch (ipproto) { 700 case IPPROTO_TCP: 701 tcp_hlen = th->th_off << 2; 702 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 703 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 704 *off |= (tcp_hlen >> 2) << 705 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 706 } 707 #ifdef IXL_FDIR 708 ixl_atr(que, th, etype); 709 #endif 710 break; 711 case IPPROTO_UDP: 712 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 713 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 714 *off |= (sizeof(struct udphdr) >> 2) << 715 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 716 } 717 break; 718 719 case IPPROTO_SCTP: 720 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 721 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 722 *off |= (sizeof(struct sctphdr) >> 2) << 723 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 724 } 725 /* Fall Thru */ 726 default: 727 break; 728 } 729 730 return (0); 731 } 732 733 734 /********************************************************************** 735 * 736 * Setup context for hardware segmentation offload (TSO) 737 * 738 **********************************************************************/ 739 static bool 740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 741 { 742 struct tx_ring *txr = &que->txr; 743 struct i40e_tx_context_desc *TXD; 744 struct ixl_tx_buf *buf; 745 u32 cmd, mss, type, tsolen; 746 u16 etype; 747 int idx, elen, ip_hlen, tcp_hlen; 748 struct ether_vlan_header *eh; 749 #ifdef INET 750 struct ip *ip; 751 #endif 752 #ifdef INET6 753 struct ip6_hdr *ip6; 754 #endif 755 #if defined(INET6) || defined(INET) 756 struct tcphdr *th; 757 #endif 758 u64 type_cmd_tso_mss; 759 760 /* 761 * Determine where frame payload starts. 762 * Jump over vlan headers if already present 763 */ 764 eh = mtod(mp, struct ether_vlan_header *); 765 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 766 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 etype = eh->evl_proto; 768 } else { 769 elen = ETHER_HDR_LEN; 770 etype = eh->evl_encap_proto; 771 } 772 773 switch (ntohs(etype)) { 774 #ifdef INET6 775 case ETHERTYPE_IPV6: 776 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 777 if (ip6->ip6_nxt != IPPROTO_TCP) 778 return (ENXIO); 779 ip_hlen = sizeof(struct ip6_hdr); 780 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 781 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 782 tcp_hlen = th->th_off << 2; 783 /* 784 * The corresponding flag is set by the stack in the IPv4 785 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 786 * So, set it here because the rest of the flow requires it. 787 */ 788 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 789 break; 790 #endif 791 #ifdef INET 792 case ETHERTYPE_IP: 793 ip = (struct ip *)(mp->m_data + elen); 794 if (ip->ip_p != IPPROTO_TCP) 795 return (ENXIO); 796 ip->ip_sum = 0; 797 ip_hlen = ip->ip_hl << 2; 798 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 799 th->th_sum = in_pseudo(ip->ip_src.s_addr, 800 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 801 tcp_hlen = th->th_off << 2; 802 break; 803 #endif 804 default: 805 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 806 __func__, ntohs(etype)); 807 return FALSE; 808 } 809 810 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 811 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 812 return FALSE; 813 814 idx = txr->next_avail; 815 buf = &txr->buffers[idx]; 816 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 817 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 818 819 type = I40E_TX_DESC_DTYPE_CONTEXT; 820 cmd = I40E_TX_CTX_DESC_TSO; 821 /* TSO MSS must not be less than 64 */ 822 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 823 que->mss_too_small++; 824 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 825 } 826 mss = mp->m_pkthdr.tso_segsz; 827 828 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 829 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 830 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 831 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 832 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 833 834 TXD->tunneling_params = htole32(0); 835 buf->m_head = NULL; 836 buf->eop_index = -1; 837 838 if (++idx == que->num_desc) 839 idx = 0; 840 841 txr->avail--; 842 txr->next_avail = idx; 843 844 return TRUE; 845 } 846 847 /* 848 ** ixl_get_tx_head - Retrieve the value from the 849 ** location the HW records its HEAD index 850 */ 851 static inline u32 852 ixl_get_tx_head(struct ixl_queue *que) 853 { 854 struct tx_ring *txr = &que->txr; 855 void *head = &txr->base[que->num_desc]; 856 return LE32_TO_CPU(*(volatile __le32 *)head); 857 } 858 859 /********************************************************************** 860 * 861 * Examine each tx_buffer in the used queue. If the hardware is done 862 * processing the packet then free associated resources. The 863 * tx_buffer is put back on the free queue. 864 * 865 **********************************************************************/ 866 bool 867 ixl_txeof(struct ixl_queue *que) 868 { 869 struct tx_ring *txr = &que->txr; 870 u32 first, last, head, done, processed; 871 struct ixl_tx_buf *buf; 872 struct i40e_tx_desc *tx_desc, *eop_desc; 873 874 875 mtx_assert(&txr->mtx, MA_OWNED); 876 877 #ifdef DEV_NETMAP 878 // XXX todo: implement moderation 879 if (netmap_tx_irq(que->vsi->ifp, que->me)) 880 return FALSE; 881 #endif /* DEF_NETMAP */ 882 883 /* These are not the descriptors you seek, move along :) */ 884 if (txr->avail == que->num_desc) { 885 atomic_store_rel_32(&txr->watchdog_timer, 0); 886 return FALSE; 887 } 888 889 processed = 0; 890 first = txr->next_to_clean; 891 buf = &txr->buffers[first]; 892 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 893 last = buf->eop_index; 894 if (last == -1) 895 return FALSE; 896 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 897 898 /* Get the Head WB value */ 899 head = ixl_get_tx_head(que); 900 901 /* 902 ** Get the index of the first descriptor 903 ** BEYOND the EOP and call that 'done'. 904 ** I do this so the comparison in the 905 ** inner while loop below can be simple 906 */ 907 if (++last == que->num_desc) last = 0; 908 done = last; 909 910 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 911 BUS_DMASYNC_POSTREAD); 912 /* 913 ** The HEAD index of the ring is written in a 914 ** defined location, this rather than a done bit 915 ** is what is used to keep track of what must be 916 ** 'cleaned'. 917 */ 918 while (first != head) { 919 /* We clean the range of the packet */ 920 while (first != done) { 921 ++txr->avail; 922 ++processed; 923 924 if (buf->m_head) { 925 txr->bytes += /* for ITR adjustment */ 926 buf->m_head->m_pkthdr.len; 927 txr->tx_bytes += /* for TX stats */ 928 buf->m_head->m_pkthdr.len; 929 bus_dmamap_sync(buf->tag, 930 buf->map, 931 BUS_DMASYNC_POSTWRITE); 932 bus_dmamap_unload(buf->tag, 933 buf->map); 934 m_freem(buf->m_head); 935 buf->m_head = NULL; 936 } 937 buf->eop_index = -1; 938 939 if (++first == que->num_desc) 940 first = 0; 941 942 buf = &txr->buffers[first]; 943 tx_desc = &txr->base[first]; 944 } 945 ++txr->packets; 946 /* See if there is more work now */ 947 last = buf->eop_index; 948 if (last != -1) { 949 eop_desc = &txr->base[last]; 950 /* Get next done point */ 951 if (++last == que->num_desc) last = 0; 952 done = last; 953 } else 954 break; 955 } 956 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 957 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 958 959 txr->next_to_clean = first; 960 961 962 /* 963 * If there are no pending descriptors, clear the timeout. 964 */ 965 if (txr->avail == que->num_desc) { 966 atomic_store_rel_32(&txr->watchdog_timer, 0); 967 return FALSE; 968 } 969 970 return TRUE; 971 } 972 973 /********************************************************************* 974 * 975 * Refresh mbuf buffers for RX descriptor rings 976 * - now keeps its own state so discards due to resource 977 * exhaustion are unnecessary, if an mbuf cannot be obtained 978 * it just returns, keeping its placeholder, thus it can simply 979 * be recalled to try again. 980 * 981 **********************************************************************/ 982 static void 983 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 984 { 985 struct ixl_vsi *vsi = que->vsi; 986 struct rx_ring *rxr = &que->rxr; 987 bus_dma_segment_t hseg[1]; 988 bus_dma_segment_t pseg[1]; 989 struct ixl_rx_buf *buf; 990 struct mbuf *mh, *mp; 991 int i, j, nsegs, error; 992 bool refreshed = FALSE; 993 994 i = j = rxr->next_refresh; 995 /* Control the loop with one beyond */ 996 if (++j == que->num_desc) 997 j = 0; 998 999 while (j != limit) { 1000 buf = &rxr->buffers[i]; 1001 if (rxr->hdr_split == FALSE) 1002 goto no_split; 1003 1004 if (buf->m_head == NULL) { 1005 mh = m_gethdr(M_NOWAIT, MT_DATA); 1006 if (mh == NULL) 1007 goto update; 1008 } else 1009 mh = buf->m_head; 1010 1011 mh->m_pkthdr.len = mh->m_len = MHLEN; 1012 mh->m_len = MHLEN; 1013 mh->m_flags |= M_PKTHDR; 1014 /* Get the memory mapping */ 1015 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1016 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1017 if (error != 0) { 1018 printf("Refresh mbufs: hdr dmamap load" 1019 " failure - %d\n", error); 1020 m_free(mh); 1021 buf->m_head = NULL; 1022 goto update; 1023 } 1024 buf->m_head = mh; 1025 bus_dmamap_sync(rxr->htag, buf->hmap, 1026 BUS_DMASYNC_PREREAD); 1027 rxr->base[i].read.hdr_addr = 1028 htole64(hseg[0].ds_addr); 1029 1030 no_split: 1031 if (buf->m_pack == NULL) { 1032 mp = m_getjcl(M_NOWAIT, MT_DATA, 1033 M_PKTHDR, rxr->mbuf_sz); 1034 if (mp == NULL) 1035 goto update; 1036 } else 1037 mp = buf->m_pack; 1038 1039 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1040 /* Get the memory mapping */ 1041 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1042 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1043 if (error != 0) { 1044 printf("Refresh mbufs: payload dmamap load" 1045 " failure - %d\n", error); 1046 m_free(mp); 1047 buf->m_pack = NULL; 1048 goto update; 1049 } 1050 buf->m_pack = mp; 1051 bus_dmamap_sync(rxr->ptag, buf->pmap, 1052 BUS_DMASYNC_PREREAD); 1053 rxr->base[i].read.pkt_addr = 1054 htole64(pseg[0].ds_addr); 1055 /* Used only when doing header split */ 1056 rxr->base[i].read.hdr_addr = 0; 1057 1058 refreshed = TRUE; 1059 /* Next is precalculated */ 1060 i = j; 1061 rxr->next_refresh = i; 1062 if (++j == que->num_desc) 1063 j = 0; 1064 } 1065 update: 1066 if (refreshed) /* Update hardware tail index */ 1067 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1068 return; 1069 } 1070 1071 1072 /********************************************************************* 1073 * 1074 * Allocate memory for rx_buffer structures. Since we use one 1075 * rx_buffer per descriptor, the maximum number of rx_buffer's 1076 * that we'll need is equal to the number of receive descriptors 1077 * that we've defined. 1078 * 1079 **********************************************************************/ 1080 int 1081 ixl_allocate_rx_data(struct ixl_queue *que) 1082 { 1083 struct rx_ring *rxr = &que->rxr; 1084 struct ixl_vsi *vsi = que->vsi; 1085 device_t dev = vsi->dev; 1086 struct ixl_rx_buf *buf; 1087 int i, bsize, error; 1088 1089 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1090 if (!(rxr->buffers = 1091 (struct ixl_rx_buf *) malloc(bsize, 1092 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1093 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1094 error = ENOMEM; 1095 return (error); 1096 } 1097 1098 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1099 1, 0, /* alignment, bounds */ 1100 BUS_SPACE_MAXADDR, /* lowaddr */ 1101 BUS_SPACE_MAXADDR, /* highaddr */ 1102 NULL, NULL, /* filter, filterarg */ 1103 MSIZE, /* maxsize */ 1104 1, /* nsegments */ 1105 MSIZE, /* maxsegsize */ 1106 0, /* flags */ 1107 NULL, /* lockfunc */ 1108 NULL, /* lockfuncarg */ 1109 &rxr->htag))) { 1110 device_printf(dev, "Unable to create RX DMA htag\n"); 1111 return (error); 1112 } 1113 1114 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1115 1, 0, /* alignment, bounds */ 1116 BUS_SPACE_MAXADDR, /* lowaddr */ 1117 BUS_SPACE_MAXADDR, /* highaddr */ 1118 NULL, NULL, /* filter, filterarg */ 1119 MJUM16BYTES, /* maxsize */ 1120 1, /* nsegments */ 1121 MJUM16BYTES, /* maxsegsize */ 1122 0, /* flags */ 1123 NULL, /* lockfunc */ 1124 NULL, /* lockfuncarg */ 1125 &rxr->ptag))) { 1126 device_printf(dev, "Unable to create RX DMA ptag\n"); 1127 return (error); 1128 } 1129 1130 for (i = 0; i < que->num_desc; i++) { 1131 buf = &rxr->buffers[i]; 1132 error = bus_dmamap_create(rxr->htag, 1133 BUS_DMA_NOWAIT, &buf->hmap); 1134 if (error) { 1135 device_printf(dev, "Unable to create RX head map\n"); 1136 break; 1137 } 1138 error = bus_dmamap_create(rxr->ptag, 1139 BUS_DMA_NOWAIT, &buf->pmap); 1140 if (error) { 1141 device_printf(dev, "Unable to create RX pkt map\n"); 1142 break; 1143 } 1144 } 1145 1146 return (error); 1147 } 1148 1149 1150 /********************************************************************* 1151 * 1152 * (Re)Initialize the queue receive ring and its buffers. 1153 * 1154 **********************************************************************/ 1155 int 1156 ixl_init_rx_ring(struct ixl_queue *que) 1157 { 1158 struct rx_ring *rxr = &que->rxr; 1159 struct ixl_vsi *vsi = que->vsi; 1160 #if defined(INET6) || defined(INET) 1161 struct ifnet *ifp = vsi->ifp; 1162 struct lro_ctrl *lro = &rxr->lro; 1163 #endif 1164 struct ixl_rx_buf *buf; 1165 bus_dma_segment_t pseg[1], hseg[1]; 1166 int rsize, nsegs, error = 0; 1167 #ifdef DEV_NETMAP 1168 struct netmap_adapter *na = NA(que->vsi->ifp); 1169 struct netmap_slot *slot; 1170 #endif /* DEV_NETMAP */ 1171 1172 IXL_RX_LOCK(rxr); 1173 #ifdef DEV_NETMAP 1174 /* same as in ixl_init_tx_ring() */ 1175 slot = netmap_reset(na, NR_RX, que->me, 0); 1176 #endif /* DEV_NETMAP */ 1177 /* Clear the ring contents */ 1178 rsize = roundup2(que->num_desc * 1179 sizeof(union i40e_rx_desc), DBA_ALIGN); 1180 bzero((void *)rxr->base, rsize); 1181 /* Cleanup any existing buffers */ 1182 for (int i = 0; i < que->num_desc; i++) { 1183 buf = &rxr->buffers[i]; 1184 if (buf->m_head != NULL) { 1185 bus_dmamap_sync(rxr->htag, buf->hmap, 1186 BUS_DMASYNC_POSTREAD); 1187 bus_dmamap_unload(rxr->htag, buf->hmap); 1188 buf->m_head->m_flags |= M_PKTHDR; 1189 m_freem(buf->m_head); 1190 } 1191 if (buf->m_pack != NULL) { 1192 bus_dmamap_sync(rxr->ptag, buf->pmap, 1193 BUS_DMASYNC_POSTREAD); 1194 bus_dmamap_unload(rxr->ptag, buf->pmap); 1195 buf->m_pack->m_flags |= M_PKTHDR; 1196 m_freem(buf->m_pack); 1197 } 1198 buf->m_head = NULL; 1199 buf->m_pack = NULL; 1200 } 1201 1202 /* header split is off */ 1203 rxr->hdr_split = FALSE; 1204 1205 /* Now replenish the mbufs */ 1206 for (int j = 0; j != que->num_desc; ++j) { 1207 struct mbuf *mh, *mp; 1208 1209 buf = &rxr->buffers[j]; 1210 #ifdef DEV_NETMAP 1211 /* 1212 * In netmap mode, fill the map and set the buffer 1213 * address in the NIC ring, considering the offset 1214 * between the netmap and NIC rings (see comment in 1215 * ixgbe_setup_transmit_ring() ). No need to allocate 1216 * an mbuf, so end the block with a continue; 1217 */ 1218 if (slot) { 1219 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1220 uint64_t paddr; 1221 void *addr; 1222 1223 addr = PNMB(na, slot + sj, &paddr); 1224 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1225 /* Update descriptor and the cached value */ 1226 rxr->base[j].read.pkt_addr = htole64(paddr); 1227 rxr->base[j].read.hdr_addr = 0; 1228 continue; 1229 } 1230 #endif /* DEV_NETMAP */ 1231 /* 1232 ** Don't allocate mbufs if not 1233 ** doing header split, its wasteful 1234 */ 1235 if (rxr->hdr_split == FALSE) 1236 goto skip_head; 1237 1238 /* First the header */ 1239 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1240 if (buf->m_head == NULL) { 1241 error = ENOBUFS; 1242 goto fail; 1243 } 1244 m_adj(buf->m_head, ETHER_ALIGN); 1245 mh = buf->m_head; 1246 mh->m_len = mh->m_pkthdr.len = MHLEN; 1247 mh->m_flags |= M_PKTHDR; 1248 /* Get the memory mapping */ 1249 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1250 buf->hmap, buf->m_head, hseg, 1251 &nsegs, BUS_DMA_NOWAIT); 1252 if (error != 0) /* Nothing elegant to do here */ 1253 goto fail; 1254 bus_dmamap_sync(rxr->htag, 1255 buf->hmap, BUS_DMASYNC_PREREAD); 1256 /* Update descriptor */ 1257 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1258 1259 skip_head: 1260 /* Now the payload cluster */ 1261 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1262 M_PKTHDR, rxr->mbuf_sz); 1263 if (buf->m_pack == NULL) { 1264 error = ENOBUFS; 1265 goto fail; 1266 } 1267 mp = buf->m_pack; 1268 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1269 /* Get the memory mapping */ 1270 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1271 buf->pmap, mp, pseg, 1272 &nsegs, BUS_DMA_NOWAIT); 1273 if (error != 0) 1274 goto fail; 1275 bus_dmamap_sync(rxr->ptag, 1276 buf->pmap, BUS_DMASYNC_PREREAD); 1277 /* Update descriptor */ 1278 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1279 rxr->base[j].read.hdr_addr = 0; 1280 } 1281 1282 1283 /* Setup our descriptor indices */ 1284 rxr->next_check = 0; 1285 rxr->next_refresh = 0; 1286 rxr->lro_enabled = FALSE; 1287 rxr->split = 0; 1288 rxr->bytes = 0; 1289 rxr->discard = FALSE; 1290 1291 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1292 ixl_flush(vsi->hw); 1293 1294 #if defined(INET6) || defined(INET) 1295 /* 1296 ** Now set up the LRO interface: 1297 */ 1298 if (ifp->if_capenable & IFCAP_LRO) { 1299 int err = tcp_lro_init(lro); 1300 if (err) { 1301 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1302 goto fail; 1303 } 1304 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1305 rxr->lro_enabled = TRUE; 1306 lro->ifp = vsi->ifp; 1307 } 1308 #endif 1309 1310 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1311 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1312 1313 fail: 1314 IXL_RX_UNLOCK(rxr); 1315 return (error); 1316 } 1317 1318 1319 /********************************************************************* 1320 * 1321 * Free station receive ring data structures 1322 * 1323 **********************************************************************/ 1324 void 1325 ixl_free_que_rx(struct ixl_queue *que) 1326 { 1327 struct rx_ring *rxr = &que->rxr; 1328 struct ixl_rx_buf *buf; 1329 1330 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1331 1332 /* Cleanup any existing buffers */ 1333 if (rxr->buffers != NULL) { 1334 for (int i = 0; i < que->num_desc; i++) { 1335 buf = &rxr->buffers[i]; 1336 if (buf->m_head != NULL) { 1337 bus_dmamap_sync(rxr->htag, buf->hmap, 1338 BUS_DMASYNC_POSTREAD); 1339 bus_dmamap_unload(rxr->htag, buf->hmap); 1340 buf->m_head->m_flags |= M_PKTHDR; 1341 m_freem(buf->m_head); 1342 } 1343 if (buf->m_pack != NULL) { 1344 bus_dmamap_sync(rxr->ptag, buf->pmap, 1345 BUS_DMASYNC_POSTREAD); 1346 bus_dmamap_unload(rxr->ptag, buf->pmap); 1347 buf->m_pack->m_flags |= M_PKTHDR; 1348 m_freem(buf->m_pack); 1349 } 1350 buf->m_head = NULL; 1351 buf->m_pack = NULL; 1352 if (buf->hmap != NULL) { 1353 bus_dmamap_destroy(rxr->htag, buf->hmap); 1354 buf->hmap = NULL; 1355 } 1356 if (buf->pmap != NULL) { 1357 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1358 buf->pmap = NULL; 1359 } 1360 } 1361 if (rxr->buffers != NULL) { 1362 free(rxr->buffers, M_DEVBUF); 1363 rxr->buffers = NULL; 1364 } 1365 } 1366 1367 if (rxr->htag != NULL) { 1368 bus_dma_tag_destroy(rxr->htag); 1369 rxr->htag = NULL; 1370 } 1371 if (rxr->ptag != NULL) { 1372 bus_dma_tag_destroy(rxr->ptag); 1373 rxr->ptag = NULL; 1374 } 1375 1376 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1377 return; 1378 } 1379 1380 static inline void 1381 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1382 { 1383 1384 #if defined(INET6) || defined(INET) 1385 /* 1386 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1387 * should be computed by hardware. Also it should not have VLAN tag in 1388 * ethernet header. 1389 */ 1390 if (rxr->lro_enabled && 1391 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1392 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1393 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1394 /* 1395 * Send to the stack if: 1396 ** - LRO not enabled, or 1397 ** - no LRO resources, or 1398 ** - lro enqueue fails 1399 */ 1400 if (rxr->lro.lro_cnt != 0) 1401 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1402 return; 1403 } 1404 #endif 1405 IXL_RX_UNLOCK(rxr); 1406 (*ifp->if_input)(ifp, m); 1407 IXL_RX_LOCK(rxr); 1408 } 1409 1410 1411 static inline void 1412 ixl_rx_discard(struct rx_ring *rxr, int i) 1413 { 1414 struct ixl_rx_buf *rbuf; 1415 1416 rbuf = &rxr->buffers[i]; 1417 1418 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1419 rbuf->fmp->m_flags |= M_PKTHDR; 1420 m_freem(rbuf->fmp); 1421 rbuf->fmp = NULL; 1422 } 1423 1424 /* 1425 ** With advanced descriptors the writeback 1426 ** clobbers the buffer addrs, so its easier 1427 ** to just free the existing mbufs and take 1428 ** the normal refresh path to get new buffers 1429 ** and mapping. 1430 */ 1431 if (rbuf->m_head) { 1432 m_free(rbuf->m_head); 1433 rbuf->m_head = NULL; 1434 } 1435 1436 if (rbuf->m_pack) { 1437 m_free(rbuf->m_pack); 1438 rbuf->m_pack = NULL; 1439 } 1440 1441 return; 1442 } 1443 1444 #ifdef RSS 1445 /* 1446 ** i40e_ptype_to_hash: parse the packet type 1447 ** to determine the appropriate hash. 1448 */ 1449 static inline int 1450 ixl_ptype_to_hash(u8 ptype) 1451 { 1452 struct i40e_rx_ptype_decoded decoded; 1453 u8 ex = 0; 1454 1455 decoded = decode_rx_desc_ptype(ptype); 1456 ex = decoded.outer_frag; 1457 1458 if (!decoded.known) 1459 return M_HASHTYPE_OPAQUE_HASH; 1460 1461 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1462 return M_HASHTYPE_OPAQUE_HASH; 1463 1464 /* Note: anything that gets to this point is IP */ 1465 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1466 switch (decoded.inner_prot) { 1467 case I40E_RX_PTYPE_INNER_PROT_TCP: 1468 if (ex) 1469 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1470 else 1471 return M_HASHTYPE_RSS_TCP_IPV6; 1472 case I40E_RX_PTYPE_INNER_PROT_UDP: 1473 if (ex) 1474 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1475 else 1476 return M_HASHTYPE_RSS_UDP_IPV6; 1477 default: 1478 if (ex) 1479 return M_HASHTYPE_RSS_IPV6_EX; 1480 else 1481 return M_HASHTYPE_RSS_IPV6; 1482 } 1483 } 1484 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1485 switch (decoded.inner_prot) { 1486 case I40E_RX_PTYPE_INNER_PROT_TCP: 1487 return M_HASHTYPE_RSS_TCP_IPV4; 1488 case I40E_RX_PTYPE_INNER_PROT_UDP: 1489 if (ex) 1490 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1491 else 1492 return M_HASHTYPE_RSS_UDP_IPV4; 1493 default: 1494 return M_HASHTYPE_RSS_IPV4; 1495 } 1496 } 1497 /* We should never get here!! */ 1498 return M_HASHTYPE_OPAQUE_HASH; 1499 } 1500 #endif /* RSS */ 1501 1502 /********************************************************************* 1503 * 1504 * This routine executes in interrupt context. It replenishes 1505 * the mbufs in the descriptor and sends data which has been 1506 * dma'ed into host memory to upper layer. 1507 * 1508 * We loop at most count times if count is > 0, or until done if 1509 * count < 0. 1510 * 1511 * Return TRUE for more work, FALSE for all clean. 1512 *********************************************************************/ 1513 bool 1514 ixl_rxeof(struct ixl_queue *que, int count) 1515 { 1516 struct ixl_vsi *vsi = que->vsi; 1517 struct rx_ring *rxr = &que->rxr; 1518 struct ifnet *ifp = vsi->ifp; 1519 #if defined(INET6) || defined(INET) 1520 struct lro_ctrl *lro = &rxr->lro; 1521 #endif 1522 int i, nextp, processed = 0; 1523 union i40e_rx_desc *cur; 1524 struct ixl_rx_buf *rbuf, *nbuf; 1525 1526 1527 IXL_RX_LOCK(rxr); 1528 1529 #ifdef DEV_NETMAP 1530 if (netmap_rx_irq(ifp, que->me, &count)) { 1531 IXL_RX_UNLOCK(rxr); 1532 return (FALSE); 1533 } 1534 #endif /* DEV_NETMAP */ 1535 1536 for (i = rxr->next_check; count != 0;) { 1537 struct mbuf *sendmp, *mh, *mp; 1538 u32 status, error; 1539 u16 hlen, plen, vtag; 1540 u64 qword; 1541 u8 ptype; 1542 bool eop; 1543 1544 /* Sync the ring. */ 1545 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1546 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1547 1548 cur = &rxr->base[i]; 1549 qword = le64toh(cur->wb.qword1.status_error_len); 1550 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1551 >> I40E_RXD_QW1_STATUS_SHIFT; 1552 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1553 >> I40E_RXD_QW1_ERROR_SHIFT; 1554 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1555 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1556 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1557 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1558 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1559 >> I40E_RXD_QW1_PTYPE_SHIFT; 1560 1561 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1562 ++rxr->not_done; 1563 break; 1564 } 1565 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1566 break; 1567 1568 count--; 1569 sendmp = NULL; 1570 nbuf = NULL; 1571 cur->wb.qword1.status_error_len = 0; 1572 rbuf = &rxr->buffers[i]; 1573 mh = rbuf->m_head; 1574 mp = rbuf->m_pack; 1575 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1576 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1577 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1578 else 1579 vtag = 0; 1580 1581 /* 1582 ** Make sure bad packets are discarded, 1583 ** note that only EOP descriptor has valid 1584 ** error results. 1585 */ 1586 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1587 rxr->desc_errs++; 1588 ixl_rx_discard(rxr, i); 1589 goto next_desc; 1590 } 1591 1592 /* Prefetch the next buffer */ 1593 if (!eop) { 1594 nextp = i + 1; 1595 if (nextp == que->num_desc) 1596 nextp = 0; 1597 nbuf = &rxr->buffers[nextp]; 1598 prefetch(nbuf); 1599 } 1600 1601 /* 1602 ** The header mbuf is ONLY used when header 1603 ** split is enabled, otherwise we get normal 1604 ** behavior, ie, both header and payload 1605 ** are DMA'd into the payload buffer. 1606 ** 1607 ** Rather than using the fmp/lmp global pointers 1608 ** we now keep the head of a packet chain in the 1609 ** buffer struct and pass this along from one 1610 ** descriptor to the next, until we get EOP. 1611 */ 1612 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1613 if (hlen > IXL_RX_HDR) 1614 hlen = IXL_RX_HDR; 1615 mh->m_len = hlen; 1616 mh->m_flags |= M_PKTHDR; 1617 mh->m_next = NULL; 1618 mh->m_pkthdr.len = mh->m_len; 1619 /* Null buf pointer so it is refreshed */ 1620 rbuf->m_head = NULL; 1621 /* 1622 ** Check the payload length, this 1623 ** could be zero if its a small 1624 ** packet. 1625 */ 1626 if (plen > 0) { 1627 mp->m_len = plen; 1628 mp->m_next = NULL; 1629 mp->m_flags &= ~M_PKTHDR; 1630 mh->m_next = mp; 1631 mh->m_pkthdr.len += mp->m_len; 1632 /* Null buf pointer so it is refreshed */ 1633 rbuf->m_pack = NULL; 1634 rxr->split++; 1635 } 1636 /* 1637 ** Now create the forward 1638 ** chain so when complete 1639 ** we wont have to. 1640 */ 1641 if (eop == 0) { 1642 /* stash the chain head */ 1643 nbuf->fmp = mh; 1644 /* Make forward chain */ 1645 if (plen) 1646 mp->m_next = nbuf->m_pack; 1647 else 1648 mh->m_next = nbuf->m_pack; 1649 } else { 1650 /* Singlet, prepare to send */ 1651 sendmp = mh; 1652 if (vtag) { 1653 sendmp->m_pkthdr.ether_vtag = vtag; 1654 sendmp->m_flags |= M_VLANTAG; 1655 } 1656 } 1657 } else { 1658 /* 1659 ** Either no header split, or a 1660 ** secondary piece of a fragmented 1661 ** split packet. 1662 */ 1663 mp->m_len = plen; 1664 /* 1665 ** See if there is a stored head 1666 ** that determines what we are 1667 */ 1668 sendmp = rbuf->fmp; 1669 rbuf->m_pack = rbuf->fmp = NULL; 1670 1671 if (sendmp != NULL) /* secondary frag */ 1672 sendmp->m_pkthdr.len += mp->m_len; 1673 else { 1674 /* first desc of a non-ps chain */ 1675 sendmp = mp; 1676 sendmp->m_flags |= M_PKTHDR; 1677 sendmp->m_pkthdr.len = mp->m_len; 1678 } 1679 /* Pass the head pointer on */ 1680 if (eop == 0) { 1681 nbuf->fmp = sendmp; 1682 sendmp = NULL; 1683 mp->m_next = nbuf->m_pack; 1684 } 1685 } 1686 ++processed; 1687 /* Sending this frame? */ 1688 if (eop) { 1689 sendmp->m_pkthdr.rcvif = ifp; 1690 /* gather stats */ 1691 rxr->rx_packets++; 1692 rxr->rx_bytes += sendmp->m_pkthdr.len; 1693 /* capture data for dynamic ITR adjustment */ 1694 rxr->packets++; 1695 rxr->bytes += sendmp->m_pkthdr.len; 1696 /* Set VLAN tag (field only valid in eop desc) */ 1697 if (vtag) { 1698 sendmp->m_pkthdr.ether_vtag = vtag; 1699 sendmp->m_flags |= M_VLANTAG; 1700 } 1701 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1702 ixl_rx_checksum(sendmp, status, error, ptype); 1703 #ifdef RSS 1704 sendmp->m_pkthdr.flowid = 1705 le32toh(cur->wb.qword0.hi_dword.rss); 1706 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1707 #else 1708 sendmp->m_pkthdr.flowid = que->msix; 1709 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1710 #endif 1711 } 1712 next_desc: 1713 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1714 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1715 1716 /* Advance our pointers to the next descriptor. */ 1717 if (++i == que->num_desc) 1718 i = 0; 1719 1720 /* Now send to the stack or do LRO */ 1721 if (sendmp != NULL) { 1722 rxr->next_check = i; 1723 ixl_rx_input(rxr, ifp, sendmp, ptype); 1724 i = rxr->next_check; 1725 } 1726 1727 /* Every 8 descriptors we go to refresh mbufs */ 1728 if (processed == 8) { 1729 ixl_refresh_mbufs(que, i); 1730 processed = 0; 1731 } 1732 } 1733 1734 /* Refresh any remaining buf structs */ 1735 if (ixl_rx_unrefreshed(que)) 1736 ixl_refresh_mbufs(que, i); 1737 1738 rxr->next_check = i; 1739 1740 #if defined(INET6) || defined(INET) 1741 /* 1742 * Flush any outstanding LRO work 1743 */ 1744 #if __FreeBSD_version >= 1100105 1745 tcp_lro_flush_all(lro); 1746 #else 1747 struct lro_entry *queued; 1748 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1749 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1750 tcp_lro_flush(lro, queued); 1751 } 1752 #endif 1753 #endif /* defined(INET6) || defined(INET) */ 1754 1755 IXL_RX_UNLOCK(rxr); 1756 return (FALSE); 1757 } 1758 1759 1760 /********************************************************************* 1761 * 1762 * Verify that the hardware indicated that the checksum is valid. 1763 * Inform the stack about the status of checksum so that stack 1764 * doesn't spend time verifying the checksum. 1765 * 1766 *********************************************************************/ 1767 static void 1768 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1769 { 1770 struct i40e_rx_ptype_decoded decoded; 1771 1772 decoded = decode_rx_desc_ptype(ptype); 1773 1774 /* Errors? */ 1775 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1776 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1777 mp->m_pkthdr.csum_flags = 0; 1778 return; 1779 } 1780 1781 /* IPv6 with extension headers likely have bad csum */ 1782 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1783 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1784 if (status & 1785 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1786 mp->m_pkthdr.csum_flags = 0; 1787 return; 1788 } 1789 1790 1791 /* IP Checksum Good */ 1792 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1793 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1794 1795 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1796 mp->m_pkthdr.csum_flags |= 1797 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1798 mp->m_pkthdr.csum_data |= htons(0xffff); 1799 } 1800 return; 1801 } 1802 1803 #if __FreeBSD_version >= 1100000 1804 uint64_t 1805 ixl_get_counter(if_t ifp, ift_counter cnt) 1806 { 1807 struct ixl_vsi *vsi; 1808 1809 vsi = if_getsoftc(ifp); 1810 1811 switch (cnt) { 1812 case IFCOUNTER_IPACKETS: 1813 return (vsi->ipackets); 1814 case IFCOUNTER_IERRORS: 1815 return (vsi->ierrors); 1816 case IFCOUNTER_OPACKETS: 1817 return (vsi->opackets); 1818 case IFCOUNTER_OERRORS: 1819 return (vsi->oerrors); 1820 case IFCOUNTER_COLLISIONS: 1821 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1822 return (0); 1823 case IFCOUNTER_IBYTES: 1824 return (vsi->ibytes); 1825 case IFCOUNTER_OBYTES: 1826 return (vsi->obytes); 1827 case IFCOUNTER_IMCASTS: 1828 return (vsi->imcasts); 1829 case IFCOUNTER_OMCASTS: 1830 return (vsi->omcasts); 1831 case IFCOUNTER_IQDROPS: 1832 return (vsi->iqdrops); 1833 case IFCOUNTER_OQDROPS: 1834 return (vsi->oqdrops); 1835 case IFCOUNTER_NOPROTO: 1836 return (vsi->noproto); 1837 default: 1838 return (if_get_counter_default(ifp, cnt)); 1839 } 1840 } 1841 #endif 1842 1843