1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 67 68 #ifdef DEV_NETMAP 69 #include <dev/netmap/if_ixl_netmap.h> 70 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 71 #endif /* DEV_NETMAP */ 72 73 /* 74 * @key key is saved into this parameter 75 */ 76 void 77 ixl_get_default_rss_key(u32 *key) 78 { 79 MPASS(key != NULL); 80 81 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 82 0x183cfd8c, 0xce880440, 0x580cbc3c, 83 0x35897377, 0x328b25e1, 0x4fa98922, 84 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 85 0x0, 0x0, 0x0}; 86 87 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 88 } 89 90 /* 91 ** Multiqueue Transmit driver 92 */ 93 int 94 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 95 { 96 struct ixl_vsi *vsi = ifp->if_softc; 97 struct ixl_queue *que; 98 struct tx_ring *txr; 99 int err, i; 100 #ifdef RSS 101 u32 bucket_id; 102 #endif 103 104 /* 105 ** Which queue to use: 106 ** 107 ** When doing RSS, map it to the same outbound 108 ** queue as the incoming flow would be mapped to. 109 ** If everything is setup correctly, it should be 110 ** the same bucket that the current CPU we're on is. 111 */ 112 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 113 #ifdef RSS 114 if (rss_hash2bucket(m->m_pkthdr.flowid, 115 M_HASHTYPE_GET(m), &bucket_id) == 0) { 116 i = bucket_id % vsi->num_queues; 117 } else 118 #endif 119 i = m->m_pkthdr.flowid % vsi->num_queues; 120 } else 121 i = curcpu % vsi->num_queues; 122 123 que = &vsi->queues[i]; 124 txr = &que->txr; 125 126 err = drbr_enqueue(ifp, txr->br, m); 127 if (err) 128 return (err); 129 if (IXL_TX_TRYLOCK(txr)) { 130 ixl_mq_start_locked(ifp, txr); 131 IXL_TX_UNLOCK(txr); 132 } else 133 taskqueue_enqueue(que->tq, &que->tx_task); 134 135 return (0); 136 } 137 138 int 139 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 140 { 141 struct ixl_queue *que = txr->que; 142 struct ixl_vsi *vsi = que->vsi; 143 struct mbuf *next; 144 int err = 0; 145 146 147 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 148 vsi->link_active == 0) 149 return (ENETDOWN); 150 151 /* Process the transmit queue */ 152 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 153 if ((err = ixl_xmit(que, &next)) != 0) { 154 if (next == NULL) 155 drbr_advance(ifp, txr->br); 156 else 157 drbr_putback(ifp, txr->br, next); 158 break; 159 } 160 drbr_advance(ifp, txr->br); 161 /* Send a copy of the frame to the BPF listener */ 162 ETHER_BPF_MTAP(ifp, next); 163 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 164 break; 165 } 166 167 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 168 ixl_txeof(que); 169 170 return (err); 171 } 172 173 /* 174 * Called from a taskqueue to drain queued transmit packets. 175 */ 176 void 177 ixl_deferred_mq_start(void *arg, int pending) 178 { 179 struct ixl_queue *que = arg; 180 struct tx_ring *txr = &que->txr; 181 struct ixl_vsi *vsi = que->vsi; 182 struct ifnet *ifp = vsi->ifp; 183 184 IXL_TX_LOCK(txr); 185 if (!drbr_empty(ifp, txr->br)) 186 ixl_mq_start_locked(ifp, txr); 187 IXL_TX_UNLOCK(txr); 188 } 189 190 /* 191 ** Flush all queue ring buffers 192 */ 193 void 194 ixl_qflush(struct ifnet *ifp) 195 { 196 struct ixl_vsi *vsi = ifp->if_softc; 197 198 for (int i = 0; i < vsi->num_queues; i++) { 199 struct ixl_queue *que = &vsi->queues[i]; 200 struct tx_ring *txr = &que->txr; 201 struct mbuf *m; 202 IXL_TX_LOCK(txr); 203 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 204 m_freem(m); 205 IXL_TX_UNLOCK(txr); 206 } 207 if_qflush(ifp); 208 } 209 210 /* 211 ** Find mbuf chains passed to the driver 212 ** that are 'sparse', using more than 8 213 ** mbufs to deliver an mss-size chunk of data 214 */ 215 static inline bool 216 ixl_tso_detect_sparse(struct mbuf *mp) 217 { 218 struct mbuf *m; 219 int num, mss; 220 221 num = 0; 222 mss = mp->m_pkthdr.tso_segsz; 223 224 /* Exclude first mbuf; assume it contains all headers */ 225 for (m = mp->m_next; m != NULL; m = m->m_next) { 226 if (m == NULL) 227 break; 228 num++; 229 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 230 231 if (mss < 1) { 232 if (num > IXL_SPARSE_CHAIN) 233 return (true); 234 num = (mss == 0) ? 0 : 1; 235 mss += mp->m_pkthdr.tso_segsz; 236 } 237 } 238 239 return (false); 240 } 241 242 243 /********************************************************************* 244 * 245 * This routine maps the mbufs to tx descriptors, allowing the 246 * TX engine to transmit the packets. 247 * - return 0 on success, positive on failure 248 * 249 **********************************************************************/ 250 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 251 252 static int 253 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 254 { 255 struct ixl_vsi *vsi = que->vsi; 256 struct i40e_hw *hw = vsi->hw; 257 struct tx_ring *txr = &que->txr; 258 struct ixl_tx_buf *buf; 259 struct i40e_tx_desc *txd = NULL; 260 struct mbuf *m_head, *m; 261 int i, j, error, nsegs; 262 int first, last = 0; 263 u16 vtag = 0; 264 u32 cmd, off; 265 bus_dmamap_t map; 266 bus_dma_tag_t tag; 267 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 268 269 cmd = off = 0; 270 m_head = *m_headp; 271 272 /* 273 * Important to capture the first descriptor 274 * used because it will contain the index of 275 * the one we tell the hardware to report back 276 */ 277 first = txr->next_avail; 278 buf = &txr->buffers[first]; 279 map = buf->map; 280 tag = txr->tx_tag; 281 282 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 283 /* Use larger mapping for TSO */ 284 tag = txr->tso_tag; 285 if (ixl_tso_detect_sparse(m_head)) { 286 m = m_defrag(m_head, M_NOWAIT); 287 if (m == NULL) { 288 m_freem(*m_headp); 289 *m_headp = NULL; 290 return (ENOBUFS); 291 } 292 *m_headp = m; 293 } 294 } 295 296 /* 297 * Map the packet for DMA. 298 */ 299 error = bus_dmamap_load_mbuf_sg(tag, map, 300 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 301 302 if (error == EFBIG) { 303 struct mbuf *m; 304 305 m = m_defrag(*m_headp, M_NOWAIT); 306 if (m == NULL) { 307 que->mbuf_defrag_failed++; 308 m_freem(*m_headp); 309 *m_headp = NULL; 310 return (ENOBUFS); 311 } 312 *m_headp = m; 313 314 /* Try it again */ 315 error = bus_dmamap_load_mbuf_sg(tag, map, 316 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 317 318 if (error != 0) { 319 que->tx_dmamap_failed++; 320 m_freem(*m_headp); 321 *m_headp = NULL; 322 return (error); 323 } 324 } else if (error != 0) { 325 que->tx_dmamap_failed++; 326 m_freem(*m_headp); 327 *m_headp = NULL; 328 return (error); 329 } 330 331 /* Make certain there are enough descriptors */ 332 if (nsegs > txr->avail - 2) { 333 txr->no_desc++; 334 error = ENOBUFS; 335 goto xmit_fail; 336 } 337 m_head = *m_headp; 338 339 /* Set up the TSO/CSUM offload */ 340 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 341 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 342 if (error) 343 goto xmit_fail; 344 } 345 346 cmd |= I40E_TX_DESC_CMD_ICRC; 347 /* Grab the VLAN tag */ 348 if (m_head->m_flags & M_VLANTAG) { 349 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 350 vtag = htole16(m_head->m_pkthdr.ether_vtag); 351 } 352 353 i = txr->next_avail; 354 for (j = 0; j < nsegs; j++) { 355 bus_size_t seglen; 356 357 buf = &txr->buffers[i]; 358 buf->tag = tag; /* Keep track of the type tag */ 359 txd = &txr->base[i]; 360 seglen = segs[j].ds_len; 361 362 txd->buffer_addr = htole64(segs[j].ds_addr); 363 txd->cmd_type_offset_bsz = 364 htole64(I40E_TX_DESC_DTYPE_DATA 365 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 366 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 367 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 368 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 369 370 last = i; /* descriptor that will get completion IRQ */ 371 372 if (++i == que->num_desc) 373 i = 0; 374 375 buf->m_head = NULL; 376 buf->eop_index = -1; 377 } 378 /* Set the last descriptor for report */ 379 txd->cmd_type_offset_bsz |= 380 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 381 txr->avail -= nsegs; 382 txr->next_avail = i; 383 384 buf->m_head = m_head; 385 /* Swap the dma map between the first and last descriptor */ 386 txr->buffers[first].map = buf->map; 387 buf->map = map; 388 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 389 390 /* Set the index of the descriptor that will be marked done */ 391 buf = &txr->buffers[first]; 392 buf->eop_index = last; 393 394 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 395 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 396 /* 397 * Advance the Transmit Descriptor Tail (Tdt), this tells the 398 * hardware that this frame is available to transmit. 399 */ 400 ++txr->total_packets; 401 wr32(hw, txr->tail, i); 402 403 /* Mark outstanding work */ 404 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 405 return (0); 406 407 xmit_fail: 408 bus_dmamap_unload(tag, buf->map); 409 return (error); 410 } 411 412 413 /********************************************************************* 414 * 415 * Allocate memory for tx_buffer structures. The tx_buffer stores all 416 * the information needed to transmit a packet on the wire. This is 417 * called only once at attach, setup is done every reset. 418 * 419 **********************************************************************/ 420 int 421 ixl_allocate_tx_data(struct ixl_queue *que) 422 { 423 struct tx_ring *txr = &que->txr; 424 struct ixl_vsi *vsi = que->vsi; 425 device_t dev = vsi->dev; 426 struct ixl_tx_buf *buf; 427 int error = 0; 428 429 /* 430 * Setup DMA descriptor areas. 431 */ 432 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 433 1, 0, /* alignment, bounds */ 434 BUS_SPACE_MAXADDR, /* lowaddr */ 435 BUS_SPACE_MAXADDR, /* highaddr */ 436 NULL, NULL, /* filter, filterarg */ 437 IXL_TSO_SIZE, /* maxsize */ 438 IXL_MAX_TX_SEGS, /* nsegments */ 439 PAGE_SIZE, /* maxsegsize */ 440 0, /* flags */ 441 NULL, /* lockfunc */ 442 NULL, /* lockfuncarg */ 443 &txr->tx_tag))) { 444 device_printf(dev,"Unable to allocate TX DMA tag\n"); 445 goto fail; 446 } 447 448 /* Make a special tag for TSO */ 449 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 450 1, 0, /* alignment, bounds */ 451 BUS_SPACE_MAXADDR, /* lowaddr */ 452 BUS_SPACE_MAXADDR, /* highaddr */ 453 NULL, NULL, /* filter, filterarg */ 454 IXL_TSO_SIZE, /* maxsize */ 455 IXL_MAX_TSO_SEGS, /* nsegments */ 456 PAGE_SIZE, /* maxsegsize */ 457 0, /* flags */ 458 NULL, /* lockfunc */ 459 NULL, /* lockfuncarg */ 460 &txr->tso_tag))) { 461 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 462 goto fail; 463 } 464 465 if (!(txr->buffers = 466 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 467 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 468 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 469 error = ENOMEM; 470 goto fail; 471 } 472 473 /* Create the descriptor buffer default dma maps */ 474 buf = txr->buffers; 475 for (int i = 0; i < que->num_desc; i++, buf++) { 476 buf->tag = txr->tx_tag; 477 error = bus_dmamap_create(buf->tag, 0, &buf->map); 478 if (error != 0) { 479 device_printf(dev, "Unable to create TX DMA map\n"); 480 goto fail; 481 } 482 } 483 fail: 484 return (error); 485 } 486 487 488 /********************************************************************* 489 * 490 * (Re)Initialize a queue transmit ring. 491 * - called by init, it clears the descriptor ring, 492 * and frees any stale mbufs 493 * 494 **********************************************************************/ 495 void 496 ixl_init_tx_ring(struct ixl_queue *que) 497 { 498 #ifdef DEV_NETMAP 499 struct netmap_adapter *na = NA(que->vsi->ifp); 500 struct netmap_slot *slot; 501 #endif /* DEV_NETMAP */ 502 struct tx_ring *txr = &que->txr; 503 struct ixl_tx_buf *buf; 504 505 /* Clear the old ring contents */ 506 IXL_TX_LOCK(txr); 507 508 #ifdef DEV_NETMAP 509 /* 510 * (under lock): if in netmap mode, do some consistency 511 * checks and set slot to entry 0 of the netmap ring. 512 */ 513 slot = netmap_reset(na, NR_TX, que->me, 0); 514 #endif /* DEV_NETMAP */ 515 516 bzero((void *)txr->base, 517 (sizeof(struct i40e_tx_desc)) * que->num_desc); 518 519 /* Reset indices */ 520 txr->next_avail = 0; 521 txr->next_to_clean = 0; 522 523 /* Reset watchdog status */ 524 txr->watchdog_timer = 0; 525 526 #ifdef IXL_FDIR 527 /* Initialize flow director */ 528 txr->atr_rate = ixl_atr_rate; 529 txr->atr_count = 0; 530 #endif 531 /* Free any existing tx mbufs. */ 532 buf = txr->buffers; 533 for (int i = 0; i < que->num_desc; i++, buf++) { 534 if (buf->m_head != NULL) { 535 bus_dmamap_sync(buf->tag, buf->map, 536 BUS_DMASYNC_POSTWRITE); 537 bus_dmamap_unload(buf->tag, buf->map); 538 m_freem(buf->m_head); 539 buf->m_head = NULL; 540 } 541 #ifdef DEV_NETMAP 542 /* 543 * In netmap mode, set the map for the packet buffer. 544 * NOTE: Some drivers (not this one) also need to set 545 * the physical buffer address in the NIC ring. 546 * netmap_idx_n2k() maps a nic index, i, into the corresponding 547 * netmap slot index, si 548 */ 549 if (slot) { 550 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 551 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 552 } 553 #endif /* DEV_NETMAP */ 554 /* Clear the EOP index */ 555 buf->eop_index = -1; 556 } 557 558 /* Set number of descriptors available */ 559 txr->avail = que->num_desc; 560 561 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 562 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 563 IXL_TX_UNLOCK(txr); 564 } 565 566 567 /********************************************************************* 568 * 569 * Free transmit ring related data structures. 570 * 571 **********************************************************************/ 572 void 573 ixl_free_que_tx(struct ixl_queue *que) 574 { 575 struct tx_ring *txr = &que->txr; 576 struct ixl_tx_buf *buf; 577 578 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 579 580 for (int i = 0; i < que->num_desc; i++) { 581 buf = &txr->buffers[i]; 582 if (buf->m_head != NULL) { 583 bus_dmamap_sync(buf->tag, buf->map, 584 BUS_DMASYNC_POSTWRITE); 585 bus_dmamap_unload(buf->tag, 586 buf->map); 587 m_freem(buf->m_head); 588 buf->m_head = NULL; 589 if (buf->map != NULL) { 590 bus_dmamap_destroy(buf->tag, 591 buf->map); 592 buf->map = NULL; 593 } 594 } else if (buf->map != NULL) { 595 bus_dmamap_unload(buf->tag, 596 buf->map); 597 bus_dmamap_destroy(buf->tag, 598 buf->map); 599 buf->map = NULL; 600 } 601 } 602 if (txr->br != NULL) 603 buf_ring_free(txr->br, M_DEVBUF); 604 if (txr->buffers != NULL) { 605 free(txr->buffers, M_DEVBUF); 606 txr->buffers = NULL; 607 } 608 if (txr->tx_tag != NULL) { 609 bus_dma_tag_destroy(txr->tx_tag); 610 txr->tx_tag = NULL; 611 } 612 if (txr->tso_tag != NULL) { 613 bus_dma_tag_destroy(txr->tso_tag); 614 txr->tso_tag = NULL; 615 } 616 617 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 618 return; 619 } 620 621 /********************************************************************* 622 * 623 * Setup descriptor for hw offloads 624 * 625 **********************************************************************/ 626 627 static int 628 ixl_tx_setup_offload(struct ixl_queue *que, 629 struct mbuf *mp, u32 *cmd, u32 *off) 630 { 631 struct ether_vlan_header *eh; 632 #ifdef INET 633 struct ip *ip = NULL; 634 #endif 635 struct tcphdr *th = NULL; 636 #ifdef INET6 637 struct ip6_hdr *ip6; 638 #endif 639 int elen, ip_hlen = 0, tcp_hlen; 640 u16 etype; 641 u8 ipproto = 0; 642 bool tso = FALSE; 643 644 /* Set up the TSO context descriptor if required */ 645 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 646 tso = ixl_tso_setup(que, mp); 647 if (tso) 648 ++que->tso; 649 else 650 return (ENXIO); 651 } 652 653 /* 654 * Determine where frame payload starts. 655 * Jump over vlan headers if already present, 656 * helpful for QinQ too. 657 */ 658 eh = mtod(mp, struct ether_vlan_header *); 659 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 660 etype = ntohs(eh->evl_proto); 661 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 662 } else { 663 etype = ntohs(eh->evl_encap_proto); 664 elen = ETHER_HDR_LEN; 665 } 666 667 switch (etype) { 668 #ifdef INET 669 case ETHERTYPE_IP: 670 ip = (struct ip *)(mp->m_data + elen); 671 ip_hlen = ip->ip_hl << 2; 672 ipproto = ip->ip_p; 673 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 674 /* The IP checksum must be recalculated with TSO */ 675 if (tso) 676 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 677 else 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 679 break; 680 #endif 681 #ifdef INET6 682 case ETHERTYPE_IPV6: 683 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 684 ip_hlen = sizeof(struct ip6_hdr); 685 ipproto = ip6->ip6_nxt; 686 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 687 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 688 break; 689 #endif 690 default: 691 break; 692 } 693 694 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 695 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 696 697 switch (ipproto) { 698 case IPPROTO_TCP: 699 tcp_hlen = th->th_off << 2; 700 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 701 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 702 *off |= (tcp_hlen >> 2) << 703 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 704 } 705 #ifdef IXL_FDIR 706 ixl_atr(que, th, etype); 707 #endif 708 break; 709 case IPPROTO_UDP: 710 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 711 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 712 *off |= (sizeof(struct udphdr) >> 2) << 713 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 714 } 715 break; 716 717 case IPPROTO_SCTP: 718 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 719 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 720 *off |= (sizeof(struct sctphdr) >> 2) << 721 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 722 } 723 /* Fall Thru */ 724 default: 725 break; 726 } 727 728 return (0); 729 } 730 731 732 /********************************************************************** 733 * 734 * Setup context for hardware segmentation offload (TSO) 735 * 736 **********************************************************************/ 737 static bool 738 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 739 { 740 struct tx_ring *txr = &que->txr; 741 struct i40e_tx_context_desc *TXD; 742 struct ixl_tx_buf *buf; 743 u32 cmd, mss, type, tsolen; 744 u16 etype; 745 int idx, elen, ip_hlen, tcp_hlen; 746 struct ether_vlan_header *eh; 747 #ifdef INET 748 struct ip *ip; 749 #endif 750 #ifdef INET6 751 struct ip6_hdr *ip6; 752 #endif 753 #if defined(INET6) || defined(INET) 754 struct tcphdr *th; 755 #endif 756 u64 type_cmd_tso_mss; 757 758 /* 759 * Determine where frame payload starts. 760 * Jump over vlan headers if already present 761 */ 762 eh = mtod(mp, struct ether_vlan_header *); 763 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 764 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 765 etype = eh->evl_proto; 766 } else { 767 elen = ETHER_HDR_LEN; 768 etype = eh->evl_encap_proto; 769 } 770 771 switch (ntohs(etype)) { 772 #ifdef INET6 773 case ETHERTYPE_IPV6: 774 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 775 if (ip6->ip6_nxt != IPPROTO_TCP) 776 return (ENXIO); 777 ip_hlen = sizeof(struct ip6_hdr); 778 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 779 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 780 tcp_hlen = th->th_off << 2; 781 /* 782 * The corresponding flag is set by the stack in the IPv4 783 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 784 * So, set it here because the rest of the flow requires it. 785 */ 786 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 787 break; 788 #endif 789 #ifdef INET 790 case ETHERTYPE_IP: 791 ip = (struct ip *)(mp->m_data + elen); 792 if (ip->ip_p != IPPROTO_TCP) 793 return (ENXIO); 794 ip->ip_sum = 0; 795 ip_hlen = ip->ip_hl << 2; 796 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 797 th->th_sum = in_pseudo(ip->ip_src.s_addr, 798 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 799 tcp_hlen = th->th_off << 2; 800 break; 801 #endif 802 default: 803 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 804 __func__, ntohs(etype)); 805 return FALSE; 806 } 807 808 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 809 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 810 return FALSE; 811 812 idx = txr->next_avail; 813 buf = &txr->buffers[idx]; 814 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 815 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 816 817 type = I40E_TX_DESC_DTYPE_CONTEXT; 818 cmd = I40E_TX_CTX_DESC_TSO; 819 /* TSO MSS must not be less than 64 */ 820 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 821 que->mss_too_small++; 822 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 823 } 824 mss = mp->m_pkthdr.tso_segsz; 825 826 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 827 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 828 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 829 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 830 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 831 832 TXD->tunneling_params = htole32(0); 833 buf->m_head = NULL; 834 buf->eop_index = -1; 835 836 if (++idx == que->num_desc) 837 idx = 0; 838 839 txr->avail--; 840 txr->next_avail = idx; 841 842 return TRUE; 843 } 844 845 /* 846 ** ixl_get_tx_head - Retrieve the value from the 847 ** location the HW records its HEAD index 848 */ 849 static inline u32 850 ixl_get_tx_head(struct ixl_queue *que) 851 { 852 struct tx_ring *txr = &que->txr; 853 void *head = &txr->base[que->num_desc]; 854 return LE32_TO_CPU(*(volatile __le32 *)head); 855 } 856 857 /********************************************************************** 858 * 859 * Examine each tx_buffer in the used queue. If the hardware is done 860 * processing the packet then free associated resources. The 861 * tx_buffer is put back on the free queue. 862 * 863 **********************************************************************/ 864 bool 865 ixl_txeof(struct ixl_queue *que) 866 { 867 struct tx_ring *txr = &que->txr; 868 u32 first, last, head, done, processed; 869 struct ixl_tx_buf *buf; 870 struct i40e_tx_desc *tx_desc, *eop_desc; 871 872 873 mtx_assert(&txr->mtx, MA_OWNED); 874 875 #ifdef DEV_NETMAP 876 // XXX todo: implement moderation 877 if (netmap_tx_irq(que->vsi->ifp, que->me)) 878 return FALSE; 879 #endif /* DEF_NETMAP */ 880 881 /* These are not the descriptors you seek, move along :) */ 882 if (txr->avail == que->num_desc) { 883 atomic_store_rel_32(&txr->watchdog_timer, 0); 884 return FALSE; 885 } 886 887 processed = 0; 888 first = txr->next_to_clean; 889 buf = &txr->buffers[first]; 890 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 891 last = buf->eop_index; 892 if (last == -1) 893 return FALSE; 894 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 895 896 /* Get the Head WB value */ 897 head = ixl_get_tx_head(que); 898 899 /* 900 ** Get the index of the first descriptor 901 ** BEYOND the EOP and call that 'done'. 902 ** I do this so the comparison in the 903 ** inner while loop below can be simple 904 */ 905 if (++last == que->num_desc) last = 0; 906 done = last; 907 908 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 909 BUS_DMASYNC_POSTREAD); 910 /* 911 ** The HEAD index of the ring is written in a 912 ** defined location, this rather than a done bit 913 ** is what is used to keep track of what must be 914 ** 'cleaned'. 915 */ 916 while (first != head) { 917 /* We clean the range of the packet */ 918 while (first != done) { 919 ++txr->avail; 920 ++processed; 921 922 if (buf->m_head) { 923 txr->bytes += /* for ITR adjustment */ 924 buf->m_head->m_pkthdr.len; 925 txr->tx_bytes += /* for TX stats */ 926 buf->m_head->m_pkthdr.len; 927 bus_dmamap_sync(buf->tag, 928 buf->map, 929 BUS_DMASYNC_POSTWRITE); 930 bus_dmamap_unload(buf->tag, 931 buf->map); 932 m_freem(buf->m_head); 933 buf->m_head = NULL; 934 } 935 buf->eop_index = -1; 936 937 if (++first == que->num_desc) 938 first = 0; 939 940 buf = &txr->buffers[first]; 941 tx_desc = &txr->base[first]; 942 } 943 ++txr->packets; 944 /* See if there is more work now */ 945 last = buf->eop_index; 946 if (last != -1) { 947 eop_desc = &txr->base[last]; 948 /* Get next done point */ 949 if (++last == que->num_desc) last = 0; 950 done = last; 951 } else 952 break; 953 } 954 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 955 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 956 957 txr->next_to_clean = first; 958 959 960 /* 961 * If there are no pending descriptors, clear the timeout. 962 */ 963 if (txr->avail == que->num_desc) { 964 atomic_store_rel_32(&txr->watchdog_timer, 0); 965 return FALSE; 966 } 967 968 return TRUE; 969 } 970 971 /********************************************************************* 972 * 973 * Refresh mbuf buffers for RX descriptor rings 974 * - now keeps its own state so discards due to resource 975 * exhaustion are unnecessary, if an mbuf cannot be obtained 976 * it just returns, keeping its placeholder, thus it can simply 977 * be recalled to try again. 978 * 979 **********************************************************************/ 980 static void 981 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 982 { 983 struct ixl_vsi *vsi = que->vsi; 984 struct rx_ring *rxr = &que->rxr; 985 bus_dma_segment_t hseg[1]; 986 bus_dma_segment_t pseg[1]; 987 struct ixl_rx_buf *buf; 988 struct mbuf *mh, *mp; 989 int i, j, nsegs, error; 990 bool refreshed = FALSE; 991 992 i = j = rxr->next_refresh; 993 /* Control the loop with one beyond */ 994 if (++j == que->num_desc) 995 j = 0; 996 997 while (j != limit) { 998 buf = &rxr->buffers[i]; 999 if (rxr->hdr_split == FALSE) 1000 goto no_split; 1001 1002 if (buf->m_head == NULL) { 1003 mh = m_gethdr(M_NOWAIT, MT_DATA); 1004 if (mh == NULL) 1005 goto update; 1006 } else 1007 mh = buf->m_head; 1008 1009 mh->m_pkthdr.len = mh->m_len = MHLEN; 1010 mh->m_len = MHLEN; 1011 mh->m_flags |= M_PKTHDR; 1012 /* Get the memory mapping */ 1013 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1014 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1015 if (error != 0) { 1016 printf("Refresh mbufs: hdr dmamap load" 1017 " failure - %d\n", error); 1018 m_free(mh); 1019 buf->m_head = NULL; 1020 goto update; 1021 } 1022 buf->m_head = mh; 1023 bus_dmamap_sync(rxr->htag, buf->hmap, 1024 BUS_DMASYNC_PREREAD); 1025 rxr->base[i].read.hdr_addr = 1026 htole64(hseg[0].ds_addr); 1027 1028 no_split: 1029 if (buf->m_pack == NULL) { 1030 mp = m_getjcl(M_NOWAIT, MT_DATA, 1031 M_PKTHDR, rxr->mbuf_sz); 1032 if (mp == NULL) 1033 goto update; 1034 } else 1035 mp = buf->m_pack; 1036 1037 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1038 /* Get the memory mapping */ 1039 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1040 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1041 if (error != 0) { 1042 printf("Refresh mbufs: payload dmamap load" 1043 " failure - %d\n", error); 1044 m_free(mp); 1045 buf->m_pack = NULL; 1046 goto update; 1047 } 1048 buf->m_pack = mp; 1049 bus_dmamap_sync(rxr->ptag, buf->pmap, 1050 BUS_DMASYNC_PREREAD); 1051 rxr->base[i].read.pkt_addr = 1052 htole64(pseg[0].ds_addr); 1053 /* Used only when doing header split */ 1054 rxr->base[i].read.hdr_addr = 0; 1055 1056 refreshed = TRUE; 1057 /* Next is precalculated */ 1058 i = j; 1059 rxr->next_refresh = i; 1060 if (++j == que->num_desc) 1061 j = 0; 1062 } 1063 update: 1064 if (refreshed) /* Update hardware tail index */ 1065 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1066 return; 1067 } 1068 1069 1070 /********************************************************************* 1071 * 1072 * Allocate memory for rx_buffer structures. Since we use one 1073 * rx_buffer per descriptor, the maximum number of rx_buffer's 1074 * that we'll need is equal to the number of receive descriptors 1075 * that we've defined. 1076 * 1077 **********************************************************************/ 1078 int 1079 ixl_allocate_rx_data(struct ixl_queue *que) 1080 { 1081 struct rx_ring *rxr = &que->rxr; 1082 struct ixl_vsi *vsi = que->vsi; 1083 device_t dev = vsi->dev; 1084 struct ixl_rx_buf *buf; 1085 int i, bsize, error; 1086 1087 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1088 if (!(rxr->buffers = 1089 (struct ixl_rx_buf *) malloc(bsize, 1090 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1091 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1092 error = ENOMEM; 1093 return (error); 1094 } 1095 1096 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1097 1, 0, /* alignment, bounds */ 1098 BUS_SPACE_MAXADDR, /* lowaddr */ 1099 BUS_SPACE_MAXADDR, /* highaddr */ 1100 NULL, NULL, /* filter, filterarg */ 1101 MSIZE, /* maxsize */ 1102 1, /* nsegments */ 1103 MSIZE, /* maxsegsize */ 1104 0, /* flags */ 1105 NULL, /* lockfunc */ 1106 NULL, /* lockfuncarg */ 1107 &rxr->htag))) { 1108 device_printf(dev, "Unable to create RX DMA htag\n"); 1109 return (error); 1110 } 1111 1112 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1113 1, 0, /* alignment, bounds */ 1114 BUS_SPACE_MAXADDR, /* lowaddr */ 1115 BUS_SPACE_MAXADDR, /* highaddr */ 1116 NULL, NULL, /* filter, filterarg */ 1117 MJUM16BYTES, /* maxsize */ 1118 1, /* nsegments */ 1119 MJUM16BYTES, /* maxsegsize */ 1120 0, /* flags */ 1121 NULL, /* lockfunc */ 1122 NULL, /* lockfuncarg */ 1123 &rxr->ptag))) { 1124 device_printf(dev, "Unable to create RX DMA ptag\n"); 1125 return (error); 1126 } 1127 1128 for (i = 0; i < que->num_desc; i++) { 1129 buf = &rxr->buffers[i]; 1130 error = bus_dmamap_create(rxr->htag, 1131 BUS_DMA_NOWAIT, &buf->hmap); 1132 if (error) { 1133 device_printf(dev, "Unable to create RX head map\n"); 1134 break; 1135 } 1136 error = bus_dmamap_create(rxr->ptag, 1137 BUS_DMA_NOWAIT, &buf->pmap); 1138 if (error) { 1139 device_printf(dev, "Unable to create RX pkt map\n"); 1140 break; 1141 } 1142 } 1143 1144 return (error); 1145 } 1146 1147 1148 /********************************************************************* 1149 * 1150 * (Re)Initialize the queue receive ring and its buffers. 1151 * 1152 **********************************************************************/ 1153 int 1154 ixl_init_rx_ring(struct ixl_queue *que) 1155 { 1156 struct rx_ring *rxr = &que->rxr; 1157 struct ixl_vsi *vsi = que->vsi; 1158 #if defined(INET6) || defined(INET) 1159 struct ifnet *ifp = vsi->ifp; 1160 struct lro_ctrl *lro = &rxr->lro; 1161 #endif 1162 struct ixl_rx_buf *buf; 1163 bus_dma_segment_t pseg[1], hseg[1]; 1164 int rsize, nsegs, error = 0; 1165 #ifdef DEV_NETMAP 1166 struct netmap_adapter *na = NA(que->vsi->ifp); 1167 struct netmap_slot *slot; 1168 #endif /* DEV_NETMAP */ 1169 1170 IXL_RX_LOCK(rxr); 1171 #ifdef DEV_NETMAP 1172 /* same as in ixl_init_tx_ring() */ 1173 slot = netmap_reset(na, NR_RX, que->me, 0); 1174 #endif /* DEV_NETMAP */ 1175 /* Clear the ring contents */ 1176 rsize = roundup2(que->num_desc * 1177 sizeof(union i40e_rx_desc), DBA_ALIGN); 1178 bzero((void *)rxr->base, rsize); 1179 /* Cleanup any existing buffers */ 1180 for (int i = 0; i < que->num_desc; i++) { 1181 buf = &rxr->buffers[i]; 1182 if (buf->m_head != NULL) { 1183 bus_dmamap_sync(rxr->htag, buf->hmap, 1184 BUS_DMASYNC_POSTREAD); 1185 bus_dmamap_unload(rxr->htag, buf->hmap); 1186 buf->m_head->m_flags |= M_PKTHDR; 1187 m_freem(buf->m_head); 1188 } 1189 if (buf->m_pack != NULL) { 1190 bus_dmamap_sync(rxr->ptag, buf->pmap, 1191 BUS_DMASYNC_POSTREAD); 1192 bus_dmamap_unload(rxr->ptag, buf->pmap); 1193 buf->m_pack->m_flags |= M_PKTHDR; 1194 m_freem(buf->m_pack); 1195 } 1196 buf->m_head = NULL; 1197 buf->m_pack = NULL; 1198 } 1199 1200 /* header split is off */ 1201 rxr->hdr_split = FALSE; 1202 1203 /* Now replenish the mbufs */ 1204 for (int j = 0; j != que->num_desc; ++j) { 1205 struct mbuf *mh, *mp; 1206 1207 buf = &rxr->buffers[j]; 1208 #ifdef DEV_NETMAP 1209 /* 1210 * In netmap mode, fill the map and set the buffer 1211 * address in the NIC ring, considering the offset 1212 * between the netmap and NIC rings (see comment in 1213 * ixgbe_setup_transmit_ring() ). No need to allocate 1214 * an mbuf, so end the block with a continue; 1215 */ 1216 if (slot) { 1217 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1218 uint64_t paddr; 1219 void *addr; 1220 1221 addr = PNMB(na, slot + sj, &paddr); 1222 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1223 /* Update descriptor and the cached value */ 1224 rxr->base[j].read.pkt_addr = htole64(paddr); 1225 rxr->base[j].read.hdr_addr = 0; 1226 continue; 1227 } 1228 #endif /* DEV_NETMAP */ 1229 /* 1230 ** Don't allocate mbufs if not 1231 ** doing header split, its wasteful 1232 */ 1233 if (rxr->hdr_split == FALSE) 1234 goto skip_head; 1235 1236 /* First the header */ 1237 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1238 if (buf->m_head == NULL) { 1239 error = ENOBUFS; 1240 goto fail; 1241 } 1242 m_adj(buf->m_head, ETHER_ALIGN); 1243 mh = buf->m_head; 1244 mh->m_len = mh->m_pkthdr.len = MHLEN; 1245 mh->m_flags |= M_PKTHDR; 1246 /* Get the memory mapping */ 1247 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1248 buf->hmap, buf->m_head, hseg, 1249 &nsegs, BUS_DMA_NOWAIT); 1250 if (error != 0) /* Nothing elegant to do here */ 1251 goto fail; 1252 bus_dmamap_sync(rxr->htag, 1253 buf->hmap, BUS_DMASYNC_PREREAD); 1254 /* Update descriptor */ 1255 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1256 1257 skip_head: 1258 /* Now the payload cluster */ 1259 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1260 M_PKTHDR, rxr->mbuf_sz); 1261 if (buf->m_pack == NULL) { 1262 error = ENOBUFS; 1263 goto fail; 1264 } 1265 mp = buf->m_pack; 1266 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1267 /* Get the memory mapping */ 1268 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1269 buf->pmap, mp, pseg, 1270 &nsegs, BUS_DMA_NOWAIT); 1271 if (error != 0) 1272 goto fail; 1273 bus_dmamap_sync(rxr->ptag, 1274 buf->pmap, BUS_DMASYNC_PREREAD); 1275 /* Update descriptor */ 1276 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1277 rxr->base[j].read.hdr_addr = 0; 1278 } 1279 1280 1281 /* Setup our descriptor indices */ 1282 rxr->next_check = 0; 1283 rxr->next_refresh = 0; 1284 rxr->lro_enabled = FALSE; 1285 rxr->split = 0; 1286 rxr->bytes = 0; 1287 rxr->discard = FALSE; 1288 1289 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1290 ixl_flush(vsi->hw); 1291 1292 #if defined(INET6) || defined(INET) 1293 /* 1294 ** Now set up the LRO interface: 1295 */ 1296 if (ifp->if_capenable & IFCAP_LRO) { 1297 int err = tcp_lro_init(lro); 1298 if (err) { 1299 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1300 goto fail; 1301 } 1302 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1303 rxr->lro_enabled = TRUE; 1304 lro->ifp = vsi->ifp; 1305 } 1306 #endif 1307 1308 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1309 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1310 1311 fail: 1312 IXL_RX_UNLOCK(rxr); 1313 return (error); 1314 } 1315 1316 1317 /********************************************************************* 1318 * 1319 * Free station receive ring data structures 1320 * 1321 **********************************************************************/ 1322 void 1323 ixl_free_que_rx(struct ixl_queue *que) 1324 { 1325 struct rx_ring *rxr = &que->rxr; 1326 struct ixl_rx_buf *buf; 1327 1328 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1329 1330 /* Cleanup any existing buffers */ 1331 if (rxr->buffers != NULL) { 1332 for (int i = 0; i < que->num_desc; i++) { 1333 buf = &rxr->buffers[i]; 1334 if (buf->m_head != NULL) { 1335 bus_dmamap_sync(rxr->htag, buf->hmap, 1336 BUS_DMASYNC_POSTREAD); 1337 bus_dmamap_unload(rxr->htag, buf->hmap); 1338 buf->m_head->m_flags |= M_PKTHDR; 1339 m_freem(buf->m_head); 1340 } 1341 if (buf->m_pack != NULL) { 1342 bus_dmamap_sync(rxr->ptag, buf->pmap, 1343 BUS_DMASYNC_POSTREAD); 1344 bus_dmamap_unload(rxr->ptag, buf->pmap); 1345 buf->m_pack->m_flags |= M_PKTHDR; 1346 m_freem(buf->m_pack); 1347 } 1348 buf->m_head = NULL; 1349 buf->m_pack = NULL; 1350 if (buf->hmap != NULL) { 1351 bus_dmamap_destroy(rxr->htag, buf->hmap); 1352 buf->hmap = NULL; 1353 } 1354 if (buf->pmap != NULL) { 1355 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1356 buf->pmap = NULL; 1357 } 1358 } 1359 if (rxr->buffers != NULL) { 1360 free(rxr->buffers, M_DEVBUF); 1361 rxr->buffers = NULL; 1362 } 1363 } 1364 1365 if (rxr->htag != NULL) { 1366 bus_dma_tag_destroy(rxr->htag); 1367 rxr->htag = NULL; 1368 } 1369 if (rxr->ptag != NULL) { 1370 bus_dma_tag_destroy(rxr->ptag); 1371 rxr->ptag = NULL; 1372 } 1373 1374 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1375 return; 1376 } 1377 1378 static inline void 1379 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1380 { 1381 1382 #if defined(INET6) || defined(INET) 1383 /* 1384 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1385 * should be computed by hardware. Also it should not have VLAN tag in 1386 * ethernet header. 1387 */ 1388 if (rxr->lro_enabled && 1389 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1390 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1391 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1392 /* 1393 * Send to the stack if: 1394 ** - LRO not enabled, or 1395 ** - no LRO resources, or 1396 ** - lro enqueue fails 1397 */ 1398 if (rxr->lro.lro_cnt != 0) 1399 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1400 return; 1401 } 1402 #endif 1403 (*ifp->if_input)(ifp, m); 1404 } 1405 1406 1407 static inline void 1408 ixl_rx_discard(struct rx_ring *rxr, int i) 1409 { 1410 struct ixl_rx_buf *rbuf; 1411 1412 rbuf = &rxr->buffers[i]; 1413 1414 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1415 rbuf->fmp->m_flags |= M_PKTHDR; 1416 m_freem(rbuf->fmp); 1417 rbuf->fmp = NULL; 1418 } 1419 1420 /* 1421 ** With advanced descriptors the writeback 1422 ** clobbers the buffer addrs, so its easier 1423 ** to just free the existing mbufs and take 1424 ** the normal refresh path to get new buffers 1425 ** and mapping. 1426 */ 1427 if (rbuf->m_head) { 1428 m_free(rbuf->m_head); 1429 rbuf->m_head = NULL; 1430 } 1431 1432 if (rbuf->m_pack) { 1433 m_free(rbuf->m_pack); 1434 rbuf->m_pack = NULL; 1435 } 1436 1437 return; 1438 } 1439 1440 #ifdef RSS 1441 /* 1442 ** i40e_ptype_to_hash: parse the packet type 1443 ** to determine the appropriate hash. 1444 */ 1445 static inline int 1446 ixl_ptype_to_hash(u8 ptype) 1447 { 1448 struct i40e_rx_ptype_decoded decoded; 1449 1450 decoded = decode_rx_desc_ptype(ptype); 1451 1452 if (!decoded.known) 1453 return M_HASHTYPE_OPAQUE_HASH; 1454 1455 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1456 return M_HASHTYPE_OPAQUE_HASH; 1457 1458 /* Note: anything that gets to this point is IP */ 1459 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1460 switch (decoded.inner_prot) { 1461 case I40E_RX_PTYPE_INNER_PROT_TCP: 1462 return M_HASHTYPE_RSS_TCP_IPV6; 1463 case I40E_RX_PTYPE_INNER_PROT_UDP: 1464 return M_HASHTYPE_RSS_UDP_IPV6; 1465 default: 1466 return M_HASHTYPE_RSS_IPV6; 1467 } 1468 } 1469 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1470 switch (decoded.inner_prot) { 1471 case I40E_RX_PTYPE_INNER_PROT_TCP: 1472 return M_HASHTYPE_RSS_TCP_IPV4; 1473 case I40E_RX_PTYPE_INNER_PROT_UDP: 1474 return M_HASHTYPE_RSS_UDP_IPV4; 1475 default: 1476 return M_HASHTYPE_RSS_IPV4; 1477 } 1478 } 1479 /* We should never get here!! */ 1480 return M_HASHTYPE_OPAQUE_HASH; 1481 } 1482 #endif /* RSS */ 1483 1484 /********************************************************************* 1485 * 1486 * This routine executes in interrupt context. It replenishes 1487 * the mbufs in the descriptor and sends data which has been 1488 * dma'ed into host memory to upper layer. 1489 * 1490 * We loop at most count times if count is > 0, or until done if 1491 * count < 0. 1492 * 1493 * Return TRUE for more work, FALSE for all clean. 1494 *********************************************************************/ 1495 bool 1496 ixl_rxeof(struct ixl_queue *que, int count) 1497 { 1498 struct ixl_vsi *vsi = que->vsi; 1499 struct rx_ring *rxr = &que->rxr; 1500 struct ifnet *ifp = vsi->ifp; 1501 #if defined(INET6) || defined(INET) 1502 struct lro_ctrl *lro = &rxr->lro; 1503 #endif 1504 int i, nextp, processed = 0; 1505 union i40e_rx_desc *cur; 1506 struct ixl_rx_buf *rbuf, *nbuf; 1507 1508 1509 IXL_RX_LOCK(rxr); 1510 1511 #ifdef DEV_NETMAP 1512 if (netmap_rx_irq(ifp, que->me, &count)) { 1513 IXL_RX_UNLOCK(rxr); 1514 return (FALSE); 1515 } 1516 #endif /* DEV_NETMAP */ 1517 1518 for (i = rxr->next_check; count != 0;) { 1519 struct mbuf *sendmp, *mh, *mp; 1520 u32 status, error; 1521 u16 hlen, plen, vtag; 1522 u64 qword; 1523 u8 ptype; 1524 bool eop; 1525 1526 /* Sync the ring. */ 1527 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1528 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1529 1530 cur = &rxr->base[i]; 1531 qword = le64toh(cur->wb.qword1.status_error_len); 1532 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1533 >> I40E_RXD_QW1_STATUS_SHIFT; 1534 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1535 >> I40E_RXD_QW1_ERROR_SHIFT; 1536 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1537 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1538 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1539 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1540 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1541 >> I40E_RXD_QW1_PTYPE_SHIFT; 1542 1543 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1544 ++rxr->not_done; 1545 break; 1546 } 1547 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1548 break; 1549 1550 count--; 1551 sendmp = NULL; 1552 nbuf = NULL; 1553 cur->wb.qword1.status_error_len = 0; 1554 rbuf = &rxr->buffers[i]; 1555 mh = rbuf->m_head; 1556 mp = rbuf->m_pack; 1557 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1558 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1559 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1560 else 1561 vtag = 0; 1562 1563 /* Remove device access to the rx buffers. */ 1564 if (rbuf->m_head != NULL) { 1565 bus_dmamap_sync(rxr->htag, rbuf->hmap, 1566 BUS_DMASYNC_POSTREAD); 1567 bus_dmamap_unload(rxr->htag, rbuf->hmap); 1568 } 1569 if (rbuf->m_pack != NULL) { 1570 bus_dmamap_sync(rxr->ptag, rbuf->pmap, 1571 BUS_DMASYNC_POSTREAD); 1572 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 1573 } 1574 1575 /* 1576 ** Make sure bad packets are discarded, 1577 ** note that only EOP descriptor has valid 1578 ** error results. 1579 */ 1580 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1581 rxr->desc_errs++; 1582 ixl_rx_discard(rxr, i); 1583 goto next_desc; 1584 } 1585 1586 /* Prefetch the next buffer */ 1587 if (!eop) { 1588 nextp = i + 1; 1589 if (nextp == que->num_desc) 1590 nextp = 0; 1591 nbuf = &rxr->buffers[nextp]; 1592 prefetch(nbuf); 1593 } 1594 1595 /* 1596 ** The header mbuf is ONLY used when header 1597 ** split is enabled, otherwise we get normal 1598 ** behavior, ie, both header and payload 1599 ** are DMA'd into the payload buffer. 1600 ** 1601 ** Rather than using the fmp/lmp global pointers 1602 ** we now keep the head of a packet chain in the 1603 ** buffer struct and pass this along from one 1604 ** descriptor to the next, until we get EOP. 1605 */ 1606 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1607 if (hlen > IXL_RX_HDR) 1608 hlen = IXL_RX_HDR; 1609 mh->m_len = hlen; 1610 mh->m_flags |= M_PKTHDR; 1611 mh->m_next = NULL; 1612 mh->m_pkthdr.len = mh->m_len; 1613 /* Null buf pointer so it is refreshed */ 1614 rbuf->m_head = NULL; 1615 /* 1616 ** Check the payload length, this 1617 ** could be zero if its a small 1618 ** packet. 1619 */ 1620 if (plen > 0) { 1621 mp->m_len = plen; 1622 mp->m_next = NULL; 1623 mp->m_flags &= ~M_PKTHDR; 1624 mh->m_next = mp; 1625 mh->m_pkthdr.len += mp->m_len; 1626 /* Null buf pointer so it is refreshed */ 1627 rbuf->m_pack = NULL; 1628 rxr->split++; 1629 } 1630 /* 1631 ** Now create the forward 1632 ** chain so when complete 1633 ** we wont have to. 1634 */ 1635 if (eop == 0) { 1636 /* stash the chain head */ 1637 nbuf->fmp = mh; 1638 /* Make forward chain */ 1639 if (plen) 1640 mp->m_next = nbuf->m_pack; 1641 else 1642 mh->m_next = nbuf->m_pack; 1643 } else { 1644 /* Singlet, prepare to send */ 1645 sendmp = mh; 1646 if (vtag) { 1647 sendmp->m_pkthdr.ether_vtag = vtag; 1648 sendmp->m_flags |= M_VLANTAG; 1649 } 1650 } 1651 } else { 1652 /* 1653 ** Either no header split, or a 1654 ** secondary piece of a fragmented 1655 ** split packet. 1656 */ 1657 mp->m_len = plen; 1658 /* 1659 ** See if there is a stored head 1660 ** that determines what we are 1661 */ 1662 sendmp = rbuf->fmp; 1663 rbuf->m_pack = rbuf->fmp = NULL; 1664 1665 if (sendmp != NULL) /* secondary frag */ 1666 sendmp->m_pkthdr.len += mp->m_len; 1667 else { 1668 /* first desc of a non-ps chain */ 1669 sendmp = mp; 1670 sendmp->m_flags |= M_PKTHDR; 1671 sendmp->m_pkthdr.len = mp->m_len; 1672 } 1673 /* Pass the head pointer on */ 1674 if (eop == 0) { 1675 nbuf->fmp = sendmp; 1676 sendmp = NULL; 1677 mp->m_next = nbuf->m_pack; 1678 } 1679 } 1680 ++processed; 1681 /* Sending this frame? */ 1682 if (eop) { 1683 sendmp->m_pkthdr.rcvif = ifp; 1684 /* gather stats */ 1685 rxr->rx_packets++; 1686 rxr->rx_bytes += sendmp->m_pkthdr.len; 1687 /* capture data for dynamic ITR adjustment */ 1688 rxr->packets++; 1689 rxr->bytes += sendmp->m_pkthdr.len; 1690 /* Set VLAN tag (field only valid in eop desc) */ 1691 if (vtag) { 1692 sendmp->m_pkthdr.ether_vtag = vtag; 1693 sendmp->m_flags |= M_VLANTAG; 1694 } 1695 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1696 ixl_rx_checksum(sendmp, status, error, ptype); 1697 #ifdef RSS 1698 sendmp->m_pkthdr.flowid = 1699 le32toh(cur->wb.qword0.hi_dword.rss); 1700 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1701 #else 1702 sendmp->m_pkthdr.flowid = que->msix; 1703 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1704 #endif 1705 } 1706 next_desc: 1707 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1708 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1709 1710 /* Advance our pointers to the next descriptor. */ 1711 if (++i == que->num_desc) 1712 i = 0; 1713 1714 /* Now send to the stack or do LRO */ 1715 if (sendmp != NULL) { 1716 rxr->next_check = i; 1717 IXL_RX_UNLOCK(rxr); 1718 ixl_rx_input(rxr, ifp, sendmp, ptype); 1719 IXL_RX_LOCK(rxr); 1720 i = rxr->next_check; 1721 } 1722 1723 /* Every 8 descriptors we go to refresh mbufs */ 1724 if (processed == 8) { 1725 ixl_refresh_mbufs(que, i); 1726 processed = 0; 1727 } 1728 } 1729 1730 /* Refresh any remaining buf structs */ 1731 if (ixl_rx_unrefreshed(que)) 1732 ixl_refresh_mbufs(que, i); 1733 1734 rxr->next_check = i; 1735 1736 IXL_RX_UNLOCK(rxr); 1737 1738 #if defined(INET6) || defined(INET) 1739 /* 1740 * Flush any outstanding LRO work 1741 */ 1742 #if __FreeBSD_version >= 1100105 1743 tcp_lro_flush_all(lro); 1744 #else 1745 struct lro_entry *queued; 1746 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1747 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1748 tcp_lro_flush(lro, queued); 1749 } 1750 #endif 1751 #endif /* defined(INET6) || defined(INET) */ 1752 1753 return (FALSE); 1754 } 1755 1756 1757 /********************************************************************* 1758 * 1759 * Verify that the hardware indicated that the checksum is valid. 1760 * Inform the stack about the status of checksum so that stack 1761 * doesn't spend time verifying the checksum. 1762 * 1763 *********************************************************************/ 1764 static void 1765 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1766 { 1767 struct i40e_rx_ptype_decoded decoded; 1768 1769 decoded = decode_rx_desc_ptype(ptype); 1770 1771 /* Errors? */ 1772 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1773 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1774 mp->m_pkthdr.csum_flags = 0; 1775 return; 1776 } 1777 1778 /* IPv6 with extension headers likely have bad csum */ 1779 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1780 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1781 if (status & 1782 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1783 mp->m_pkthdr.csum_flags = 0; 1784 return; 1785 } 1786 1787 1788 /* IP Checksum Good */ 1789 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1790 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1791 1792 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1793 mp->m_pkthdr.csum_flags |= 1794 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1795 mp->m_pkthdr.csum_data |= htons(0xffff); 1796 } 1797 return; 1798 } 1799 1800 #if __FreeBSD_version >= 1100000 1801 uint64_t 1802 ixl_get_counter(if_t ifp, ift_counter cnt) 1803 { 1804 struct ixl_vsi *vsi; 1805 1806 vsi = if_getsoftc(ifp); 1807 1808 switch (cnt) { 1809 case IFCOUNTER_IPACKETS: 1810 return (vsi->ipackets); 1811 case IFCOUNTER_IERRORS: 1812 return (vsi->ierrors); 1813 case IFCOUNTER_OPACKETS: 1814 return (vsi->opackets); 1815 case IFCOUNTER_OERRORS: 1816 return (vsi->oerrors); 1817 case IFCOUNTER_COLLISIONS: 1818 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1819 return (0); 1820 case IFCOUNTER_IBYTES: 1821 return (vsi->ibytes); 1822 case IFCOUNTER_OBYTES: 1823 return (vsi->obytes); 1824 case IFCOUNTER_IMCASTS: 1825 return (vsi->imcasts); 1826 case IFCOUNTER_OMCASTS: 1827 return (vsi->omcasts); 1828 case IFCOUNTER_IQDROPS: 1829 return (vsi->iqdrops); 1830 case IFCOUNTER_OQDROPS: 1831 return (vsi->oqdrops); 1832 case IFCOUNTER_NOPROTO: 1833 return (vsi->noproto); 1834 default: 1835 return (if_get_counter_default(ifp, cnt)); 1836 } 1837 } 1838 #endif 1839 1840