1 /****************************************************************************** 2 3 Copyright (c) 2013-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /*$FreeBSD$*/ 34 35 /* 36 ** IXL driver TX/RX Routines: 37 ** This was seperated to allow usage by 38 ** both the PF and VF drivers. 39 */ 40 41 #ifndef IXL_STANDALONE_BUILD 42 #include "opt_inet.h" 43 #include "opt_inet6.h" 44 #include "opt_rss.h" 45 #endif 46 47 #include "ixl.h" 48 49 #ifdef RSS 50 #include <net/rss_config.h> 51 #endif 52 53 /* Local Prototypes */ 54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); 55 static void ixl_refresh_mbufs(struct ixl_queue *, int); 56 static int ixl_xmit(struct ixl_queue *, struct mbuf **); 57 static int ixl_tx_setup_offload(struct ixl_queue *, 58 struct mbuf *, u32 *, u32 *); 59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); 60 61 static inline void ixl_rx_discard(struct rx_ring *, int); 62 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, 63 struct mbuf *, u8); 64 65 static inline bool ixl_tso_detect_sparse(struct mbuf *mp); 66 static int ixl_tx_setup_offload(struct ixl_queue *que, 67 struct mbuf *mp, u32 *cmd, u32 *off); 68 static inline u32 ixl_get_tx_head(struct ixl_queue *que); 69 70 #ifdef DEV_NETMAP 71 #include <dev/netmap/if_ixl_netmap.h> 72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; 73 #endif /* DEV_NETMAP */ 74 75 /* 76 * @key key is saved into this parameter 77 */ 78 void 79 ixl_get_default_rss_key(u32 *key) 80 { 81 MPASS(key != NULL); 82 83 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 84 0x183cfd8c, 0xce880440, 0x580cbc3c, 85 0x35897377, 0x328b25e1, 0x4fa98922, 86 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 87 0x0, 0x0, 0x0}; 88 89 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); 90 } 91 92 /* 93 ** Multiqueue Transmit driver 94 */ 95 int 96 ixl_mq_start(struct ifnet *ifp, struct mbuf *m) 97 { 98 struct ixl_vsi *vsi = ifp->if_softc; 99 struct ixl_queue *que; 100 struct tx_ring *txr; 101 int err, i; 102 #ifdef RSS 103 u32 bucket_id; 104 #endif 105 106 /* 107 ** Which queue to use: 108 ** 109 ** When doing RSS, map it to the same outbound 110 ** queue as the incoming flow would be mapped to. 111 ** If everything is setup correctly, it should be 112 ** the same bucket that the current CPU we're on is. 113 */ 114 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 115 #ifdef RSS 116 if (rss_hash2bucket(m->m_pkthdr.flowid, 117 M_HASHTYPE_GET(m), &bucket_id) == 0) { 118 i = bucket_id % vsi->num_queues; 119 } else 120 #endif 121 i = m->m_pkthdr.flowid % vsi->num_queues; 122 } else 123 i = curcpu % vsi->num_queues; 124 125 que = &vsi->queues[i]; 126 txr = &que->txr; 127 128 err = drbr_enqueue(ifp, txr->br, m); 129 if (err) 130 return (err); 131 if (IXL_TX_TRYLOCK(txr)) { 132 ixl_mq_start_locked(ifp, txr); 133 IXL_TX_UNLOCK(txr); 134 } else 135 taskqueue_enqueue(que->tq, &que->tx_task); 136 137 return (0); 138 } 139 140 int 141 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 142 { 143 struct ixl_queue *que = txr->que; 144 struct ixl_vsi *vsi = que->vsi; 145 struct mbuf *next; 146 int err = 0; 147 148 149 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 150 vsi->link_active == 0) 151 return (ENETDOWN); 152 153 /* Process the transmit queue */ 154 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 155 if ((err = ixl_xmit(que, &next)) != 0) { 156 if (next == NULL) 157 drbr_advance(ifp, txr->br); 158 else 159 drbr_putback(ifp, txr->br, next); 160 break; 161 } 162 drbr_advance(ifp, txr->br); 163 /* Send a copy of the frame to the BPF listener */ 164 ETHER_BPF_MTAP(ifp, next); 165 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 166 break; 167 } 168 169 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) 170 ixl_txeof(que); 171 172 return (err); 173 } 174 175 /* 176 * Called from a taskqueue to drain queued transmit packets. 177 */ 178 void 179 ixl_deferred_mq_start(void *arg, int pending) 180 { 181 struct ixl_queue *que = arg; 182 struct tx_ring *txr = &que->txr; 183 struct ixl_vsi *vsi = que->vsi; 184 struct ifnet *ifp = vsi->ifp; 185 186 IXL_TX_LOCK(txr); 187 if (!drbr_empty(ifp, txr->br)) 188 ixl_mq_start_locked(ifp, txr); 189 IXL_TX_UNLOCK(txr); 190 } 191 192 /* 193 ** Flush all queue ring buffers 194 */ 195 void 196 ixl_qflush(struct ifnet *ifp) 197 { 198 struct ixl_vsi *vsi = ifp->if_softc; 199 200 for (int i = 0; i < vsi->num_queues; i++) { 201 struct ixl_queue *que = &vsi->queues[i]; 202 struct tx_ring *txr = &que->txr; 203 struct mbuf *m; 204 IXL_TX_LOCK(txr); 205 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 206 m_freem(m); 207 IXL_TX_UNLOCK(txr); 208 } 209 if_qflush(ifp); 210 } 211 212 /* 213 ** Find mbuf chains passed to the driver 214 ** that are 'sparse', using more than 8 215 ** mbufs to deliver an mss-size chunk of data 216 */ 217 static inline bool 218 ixl_tso_detect_sparse(struct mbuf *mp) 219 { 220 struct mbuf *m; 221 int num, mss; 222 223 num = 0; 224 mss = mp->m_pkthdr.tso_segsz; 225 226 /* Exclude first mbuf; assume it contains all headers */ 227 for (m = mp->m_next; m != NULL; m = m->m_next) { 228 if (m == NULL) 229 break; 230 num++; 231 mss -= m->m_len % mp->m_pkthdr.tso_segsz; 232 233 if (mss < 1) { 234 if (num > IXL_SPARSE_CHAIN) 235 return (true); 236 num = (mss == 0) ? 0 : 1; 237 mss += mp->m_pkthdr.tso_segsz; 238 } 239 } 240 241 return (false); 242 } 243 244 245 /********************************************************************* 246 * 247 * This routine maps the mbufs to tx descriptors, allowing the 248 * TX engine to transmit the packets. 249 * - return 0 on success, positive on failure 250 * 251 **********************************************************************/ 252 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) 253 254 static int 255 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) 256 { 257 struct ixl_vsi *vsi = que->vsi; 258 struct i40e_hw *hw = vsi->hw; 259 struct tx_ring *txr = &que->txr; 260 struct ixl_tx_buf *buf; 261 struct i40e_tx_desc *txd = NULL; 262 struct mbuf *m_head, *m; 263 int i, j, error, nsegs; 264 int first, last = 0; 265 u16 vtag = 0; 266 u32 cmd, off; 267 bus_dmamap_t map; 268 bus_dma_tag_t tag; 269 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; 270 271 cmd = off = 0; 272 m_head = *m_headp; 273 274 /* 275 * Important to capture the first descriptor 276 * used because it will contain the index of 277 * the one we tell the hardware to report back 278 */ 279 first = txr->next_avail; 280 buf = &txr->buffers[first]; 281 map = buf->map; 282 tag = txr->tx_tag; 283 284 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 285 /* Use larger mapping for TSO */ 286 tag = txr->tso_tag; 287 if (ixl_tso_detect_sparse(m_head)) { 288 m = m_defrag(m_head, M_NOWAIT); 289 if (m == NULL) { 290 m_freem(*m_headp); 291 *m_headp = NULL; 292 return (ENOBUFS); 293 } 294 *m_headp = m; 295 } 296 } 297 298 /* 299 * Map the packet for DMA. 300 */ 301 error = bus_dmamap_load_mbuf_sg(tag, map, 302 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 303 304 if (error == EFBIG) { 305 struct mbuf *m; 306 307 m = m_defrag(*m_headp, M_NOWAIT); 308 if (m == NULL) { 309 que->mbuf_defrag_failed++; 310 m_freem(*m_headp); 311 *m_headp = NULL; 312 return (ENOBUFS); 313 } 314 *m_headp = m; 315 316 /* Try it again */ 317 error = bus_dmamap_load_mbuf_sg(tag, map, 318 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 319 320 if (error != 0) { 321 que->tx_dmamap_failed++; 322 m_freem(*m_headp); 323 *m_headp = NULL; 324 return (error); 325 } 326 } else if (error != 0) { 327 que->tx_dmamap_failed++; 328 m_freem(*m_headp); 329 *m_headp = NULL; 330 return (error); 331 } 332 333 /* Make certain there are enough descriptors */ 334 if (nsegs > txr->avail - 2) { 335 txr->no_desc++; 336 error = ENOBUFS; 337 goto xmit_fail; 338 } 339 m_head = *m_headp; 340 341 /* Set up the TSO/CSUM offload */ 342 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { 343 error = ixl_tx_setup_offload(que, m_head, &cmd, &off); 344 if (error) 345 goto xmit_fail; 346 } 347 348 cmd |= I40E_TX_DESC_CMD_ICRC; 349 /* Grab the VLAN tag */ 350 if (m_head->m_flags & M_VLANTAG) { 351 cmd |= I40E_TX_DESC_CMD_IL2TAG1; 352 vtag = htole16(m_head->m_pkthdr.ether_vtag); 353 } 354 355 i = txr->next_avail; 356 for (j = 0; j < nsegs; j++) { 357 bus_size_t seglen; 358 359 buf = &txr->buffers[i]; 360 buf->tag = tag; /* Keep track of the type tag */ 361 txd = &txr->base[i]; 362 seglen = segs[j].ds_len; 363 364 txd->buffer_addr = htole64(segs[j].ds_addr); 365 txd->cmd_type_offset_bsz = 366 htole64(I40E_TX_DESC_DTYPE_DATA 367 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) 368 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) 369 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) 370 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); 371 372 last = i; /* descriptor that will get completion IRQ */ 373 374 if (++i == que->num_desc) 375 i = 0; 376 377 buf->m_head = NULL; 378 buf->eop_index = -1; 379 } 380 /* Set the last descriptor for report */ 381 txd->cmd_type_offset_bsz |= 382 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); 383 txr->avail -= nsegs; 384 txr->next_avail = i; 385 386 buf->m_head = m_head; 387 /* Swap the dma map between the first and last descriptor */ 388 txr->buffers[first].map = buf->map; 389 buf->map = map; 390 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); 391 392 /* Set the index of the descriptor that will be marked done */ 393 buf = &txr->buffers[first]; 394 buf->eop_index = last; 395 396 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 397 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 398 /* 399 * Advance the Transmit Descriptor Tail (Tdt), this tells the 400 * hardware that this frame is available to transmit. 401 */ 402 ++txr->total_packets; 403 wr32(hw, txr->tail, i); 404 405 /* Mark outstanding work */ 406 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); 407 return (0); 408 409 xmit_fail: 410 bus_dmamap_unload(tag, buf->map); 411 return (error); 412 } 413 414 415 /********************************************************************* 416 * 417 * Allocate memory for tx_buffer structures. The tx_buffer stores all 418 * the information needed to transmit a packet on the wire. This is 419 * called only once at attach, setup is done every reset. 420 * 421 **********************************************************************/ 422 int 423 ixl_allocate_tx_data(struct ixl_queue *que) 424 { 425 struct tx_ring *txr = &que->txr; 426 struct ixl_vsi *vsi = que->vsi; 427 device_t dev = vsi->dev; 428 struct ixl_tx_buf *buf; 429 int error = 0; 430 431 /* 432 * Setup DMA descriptor areas. 433 */ 434 if ((error = bus_dma_tag_create(NULL, /* parent */ 435 1, 0, /* alignment, bounds */ 436 BUS_SPACE_MAXADDR, /* lowaddr */ 437 BUS_SPACE_MAXADDR, /* highaddr */ 438 NULL, NULL, /* filter, filterarg */ 439 IXL_TSO_SIZE, /* maxsize */ 440 IXL_MAX_TX_SEGS, /* nsegments */ 441 PAGE_SIZE, /* maxsegsize */ 442 0, /* flags */ 443 NULL, /* lockfunc */ 444 NULL, /* lockfuncarg */ 445 &txr->tx_tag))) { 446 device_printf(dev,"Unable to allocate TX DMA tag\n"); 447 goto fail; 448 } 449 450 /* Make a special tag for TSO */ 451 if ((error = bus_dma_tag_create(NULL, /* parent */ 452 1, 0, /* alignment, bounds */ 453 BUS_SPACE_MAXADDR, /* lowaddr */ 454 BUS_SPACE_MAXADDR, /* highaddr */ 455 NULL, NULL, /* filter, filterarg */ 456 IXL_TSO_SIZE, /* maxsize */ 457 IXL_MAX_TSO_SEGS, /* nsegments */ 458 PAGE_SIZE, /* maxsegsize */ 459 0, /* flags */ 460 NULL, /* lockfunc */ 461 NULL, /* lockfuncarg */ 462 &txr->tso_tag))) { 463 device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); 464 goto fail; 465 } 466 467 if (!(txr->buffers = 468 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * 469 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 470 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 471 error = ENOMEM; 472 goto fail; 473 } 474 475 /* Create the descriptor buffer default dma maps */ 476 buf = txr->buffers; 477 for (int i = 0; i < que->num_desc; i++, buf++) { 478 buf->tag = txr->tx_tag; 479 error = bus_dmamap_create(buf->tag, 0, &buf->map); 480 if (error != 0) { 481 device_printf(dev, "Unable to create TX DMA map\n"); 482 goto fail; 483 } 484 } 485 fail: 486 return (error); 487 } 488 489 490 /********************************************************************* 491 * 492 * (Re)Initialize a queue transmit ring. 493 * - called by init, it clears the descriptor ring, 494 * and frees any stale mbufs 495 * 496 **********************************************************************/ 497 void 498 ixl_init_tx_ring(struct ixl_queue *que) 499 { 500 #ifdef DEV_NETMAP 501 struct netmap_adapter *na = NA(que->vsi->ifp); 502 struct netmap_slot *slot; 503 #endif /* DEV_NETMAP */ 504 struct tx_ring *txr = &que->txr; 505 struct ixl_tx_buf *buf; 506 507 /* Clear the old ring contents */ 508 IXL_TX_LOCK(txr); 509 510 #ifdef DEV_NETMAP 511 /* 512 * (under lock): if in netmap mode, do some consistency 513 * checks and set slot to entry 0 of the netmap ring. 514 */ 515 slot = netmap_reset(na, NR_TX, que->me, 0); 516 #endif /* DEV_NETMAP */ 517 518 bzero((void *)txr->base, 519 (sizeof(struct i40e_tx_desc)) * que->num_desc); 520 521 /* Reset indices */ 522 txr->next_avail = 0; 523 txr->next_to_clean = 0; 524 525 /* Reset watchdog status */ 526 txr->watchdog_timer = 0; 527 528 #ifdef IXL_FDIR 529 /* Initialize flow director */ 530 txr->atr_rate = ixl_atr_rate; 531 txr->atr_count = 0; 532 #endif 533 /* Free any existing tx mbufs. */ 534 buf = txr->buffers; 535 for (int i = 0; i < que->num_desc; i++, buf++) { 536 if (buf->m_head != NULL) { 537 bus_dmamap_sync(buf->tag, buf->map, 538 BUS_DMASYNC_POSTWRITE); 539 bus_dmamap_unload(buf->tag, buf->map); 540 m_freem(buf->m_head); 541 buf->m_head = NULL; 542 } 543 #ifdef DEV_NETMAP 544 /* 545 * In netmap mode, set the map for the packet buffer. 546 * NOTE: Some drivers (not this one) also need to set 547 * the physical buffer address in the NIC ring. 548 * netmap_idx_n2k() maps a nic index, i, into the corresponding 549 * netmap slot index, si 550 */ 551 if (slot) { 552 int si = netmap_idx_n2k(&na->tx_rings[que->me], i); 553 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); 554 } 555 #endif /* DEV_NETMAP */ 556 /* Clear the EOP index */ 557 buf->eop_index = -1; 558 } 559 560 /* Set number of descriptors available */ 561 txr->avail = que->num_desc; 562 563 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 564 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 565 IXL_TX_UNLOCK(txr); 566 } 567 568 569 /********************************************************************* 570 * 571 * Free transmit ring related data structures. 572 * 573 **********************************************************************/ 574 void 575 ixl_free_que_tx(struct ixl_queue *que) 576 { 577 struct tx_ring *txr = &que->txr; 578 struct ixl_tx_buf *buf; 579 580 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 581 582 for (int i = 0; i < que->num_desc; i++) { 583 buf = &txr->buffers[i]; 584 if (buf->m_head != NULL) { 585 bus_dmamap_sync(buf->tag, buf->map, 586 BUS_DMASYNC_POSTWRITE); 587 bus_dmamap_unload(buf->tag, 588 buf->map); 589 m_freem(buf->m_head); 590 buf->m_head = NULL; 591 if (buf->map != NULL) { 592 bus_dmamap_destroy(buf->tag, 593 buf->map); 594 buf->map = NULL; 595 } 596 } else if (buf->map != NULL) { 597 bus_dmamap_unload(buf->tag, 598 buf->map); 599 bus_dmamap_destroy(buf->tag, 600 buf->map); 601 buf->map = NULL; 602 } 603 } 604 if (txr->br != NULL) 605 buf_ring_free(txr->br, M_DEVBUF); 606 if (txr->buffers != NULL) { 607 free(txr->buffers, M_DEVBUF); 608 txr->buffers = NULL; 609 } 610 if (txr->tx_tag != NULL) { 611 bus_dma_tag_destroy(txr->tx_tag); 612 txr->tx_tag = NULL; 613 } 614 if (txr->tso_tag != NULL) { 615 bus_dma_tag_destroy(txr->tso_tag); 616 txr->tso_tag = NULL; 617 } 618 619 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 620 return; 621 } 622 623 /********************************************************************* 624 * 625 * Setup descriptor for hw offloads 626 * 627 **********************************************************************/ 628 629 static int 630 ixl_tx_setup_offload(struct ixl_queue *que, 631 struct mbuf *mp, u32 *cmd, u32 *off) 632 { 633 struct ether_vlan_header *eh; 634 #ifdef INET 635 struct ip *ip = NULL; 636 #endif 637 struct tcphdr *th = NULL; 638 #ifdef INET6 639 struct ip6_hdr *ip6; 640 #endif 641 int elen, ip_hlen = 0, tcp_hlen; 642 u16 etype; 643 u8 ipproto = 0; 644 bool tso = FALSE; 645 646 /* Set up the TSO context descriptor if required */ 647 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 648 tso = ixl_tso_setup(que, mp); 649 if (tso) 650 ++que->tso; 651 else 652 return (ENXIO); 653 } 654 655 /* 656 * Determine where frame payload starts. 657 * Jump over vlan headers if already present, 658 * helpful for QinQ too. 659 */ 660 eh = mtod(mp, struct ether_vlan_header *); 661 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 662 etype = ntohs(eh->evl_proto); 663 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 664 } else { 665 etype = ntohs(eh->evl_encap_proto); 666 elen = ETHER_HDR_LEN; 667 } 668 669 switch (etype) { 670 #ifdef INET 671 case ETHERTYPE_IP: 672 ip = (struct ip *)(mp->m_data + elen); 673 ip_hlen = ip->ip_hl << 2; 674 ipproto = ip->ip_p; 675 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 676 /* The IP checksum must be recalculated with TSO */ 677 if (tso) 678 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 679 else 680 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; 681 break; 682 #endif 683 #ifdef INET6 684 case ETHERTYPE_IPV6: 685 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 686 ip_hlen = sizeof(struct ip6_hdr); 687 ipproto = ip6->ip6_nxt; 688 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 689 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; 690 break; 691 #endif 692 default: 693 break; 694 } 695 696 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 697 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 698 699 switch (ipproto) { 700 case IPPROTO_TCP: 701 tcp_hlen = th->th_off << 2; 702 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { 703 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; 704 *off |= (tcp_hlen >> 2) << 705 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 706 } 707 #ifdef IXL_FDIR 708 ixl_atr(que, th, etype); 709 #endif 710 break; 711 case IPPROTO_UDP: 712 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { 713 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; 714 *off |= (sizeof(struct udphdr) >> 2) << 715 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 716 } 717 break; 718 719 case IPPROTO_SCTP: 720 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { 721 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 722 *off |= (sizeof(struct sctphdr) >> 2) << 723 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 724 } 725 /* Fall Thru */ 726 default: 727 break; 728 } 729 730 return (0); 731 } 732 733 734 /********************************************************************** 735 * 736 * Setup context for hardware segmentation offload (TSO) 737 * 738 **********************************************************************/ 739 static bool 740 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) 741 { 742 struct tx_ring *txr = &que->txr; 743 struct i40e_tx_context_desc *TXD; 744 struct ixl_tx_buf *buf; 745 u32 cmd, mss, type, tsolen; 746 u16 etype; 747 int idx, elen, ip_hlen, tcp_hlen; 748 struct ether_vlan_header *eh; 749 #ifdef INET 750 struct ip *ip; 751 #endif 752 #ifdef INET6 753 struct ip6_hdr *ip6; 754 #endif 755 #if defined(INET6) || defined(INET) 756 struct tcphdr *th; 757 #endif 758 u64 type_cmd_tso_mss; 759 760 /* 761 * Determine where frame payload starts. 762 * Jump over vlan headers if already present 763 */ 764 eh = mtod(mp, struct ether_vlan_header *); 765 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 766 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 767 etype = eh->evl_proto; 768 } else { 769 elen = ETHER_HDR_LEN; 770 etype = eh->evl_encap_proto; 771 } 772 773 switch (ntohs(etype)) { 774 #ifdef INET6 775 case ETHERTYPE_IPV6: 776 ip6 = (struct ip6_hdr *)(mp->m_data + elen); 777 if (ip6->ip6_nxt != IPPROTO_TCP) 778 return (ENXIO); 779 ip_hlen = sizeof(struct ip6_hdr); 780 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 781 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 782 tcp_hlen = th->th_off << 2; 783 /* 784 * The corresponding flag is set by the stack in the IPv4 785 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). 786 * So, set it here because the rest of the flow requires it. 787 */ 788 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 789 break; 790 #endif 791 #ifdef INET 792 case ETHERTYPE_IP: 793 ip = (struct ip *)(mp->m_data + elen); 794 if (ip->ip_p != IPPROTO_TCP) 795 return (ENXIO); 796 ip->ip_sum = 0; 797 ip_hlen = ip->ip_hl << 2; 798 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 799 th->th_sum = in_pseudo(ip->ip_src.s_addr, 800 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 801 tcp_hlen = th->th_off << 2; 802 break; 803 #endif 804 default: 805 printf("%s: CSUM_TSO but no supported IP version (0x%04x)", 806 __func__, ntohs(etype)); 807 return FALSE; 808 } 809 810 /* Ensure we have at least the IP+TCP header in the first mbuf. */ 811 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) 812 return FALSE; 813 814 idx = txr->next_avail; 815 buf = &txr->buffers[idx]; 816 TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; 817 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); 818 819 type = I40E_TX_DESC_DTYPE_CONTEXT; 820 cmd = I40E_TX_CTX_DESC_TSO; 821 /* TSO MSS must not be less than 64 */ 822 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { 823 que->mss_too_small++; 824 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; 825 } 826 mss = mp->m_pkthdr.tso_segsz; 827 828 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | 829 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | 830 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 831 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); 832 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); 833 834 TXD->tunneling_params = htole32(0); 835 buf->m_head = NULL; 836 buf->eop_index = -1; 837 838 if (++idx == que->num_desc) 839 idx = 0; 840 841 txr->avail--; 842 txr->next_avail = idx; 843 844 return TRUE; 845 } 846 847 /* 848 ** ixl_get_tx_head - Retrieve the value from the 849 ** location the HW records its HEAD index 850 */ 851 static inline u32 852 ixl_get_tx_head(struct ixl_queue *que) 853 { 854 struct tx_ring *txr = &que->txr; 855 void *head = &txr->base[que->num_desc]; 856 return LE32_TO_CPU(*(volatile __le32 *)head); 857 } 858 859 /********************************************************************** 860 * 861 * Examine each tx_buffer in the used queue. If the hardware is done 862 * processing the packet then free associated resources. The 863 * tx_buffer is put back on the free queue. 864 * 865 **********************************************************************/ 866 bool 867 ixl_txeof(struct ixl_queue *que) 868 { 869 struct tx_ring *txr = &que->txr; 870 u32 first, last, head, done, processed; 871 struct ixl_tx_buf *buf; 872 struct i40e_tx_desc *tx_desc, *eop_desc; 873 874 875 mtx_assert(&txr->mtx, MA_OWNED); 876 877 #ifdef DEV_NETMAP 878 // XXX todo: implement moderation 879 if (netmap_tx_irq(que->vsi->ifp, que->me)) 880 return FALSE; 881 #endif /* DEF_NETMAP */ 882 883 /* These are not the descriptors you seek, move along :) */ 884 if (txr->avail == que->num_desc) { 885 atomic_store_rel_32(&txr->watchdog_timer, 0); 886 return FALSE; 887 } 888 889 processed = 0; 890 first = txr->next_to_clean; 891 buf = &txr->buffers[first]; 892 tx_desc = (struct i40e_tx_desc *)&txr->base[first]; 893 last = buf->eop_index; 894 if (last == -1) 895 return FALSE; 896 eop_desc = (struct i40e_tx_desc *)&txr->base[last]; 897 898 /* Get the Head WB value */ 899 head = ixl_get_tx_head(que); 900 901 /* 902 ** Get the index of the first descriptor 903 ** BEYOND the EOP and call that 'done'. 904 ** I do this so the comparison in the 905 ** inner while loop below can be simple 906 */ 907 if (++last == que->num_desc) last = 0; 908 done = last; 909 910 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 911 BUS_DMASYNC_POSTREAD); 912 /* 913 ** The HEAD index of the ring is written in a 914 ** defined location, this rather than a done bit 915 ** is what is used to keep track of what must be 916 ** 'cleaned'. 917 */ 918 while (first != head) { 919 /* We clean the range of the packet */ 920 while (first != done) { 921 ++txr->avail; 922 ++processed; 923 924 if (buf->m_head) { 925 txr->bytes += /* for ITR adjustment */ 926 buf->m_head->m_pkthdr.len; 927 txr->tx_bytes += /* for TX stats */ 928 buf->m_head->m_pkthdr.len; 929 bus_dmamap_sync(buf->tag, 930 buf->map, 931 BUS_DMASYNC_POSTWRITE); 932 bus_dmamap_unload(buf->tag, 933 buf->map); 934 m_freem(buf->m_head); 935 buf->m_head = NULL; 936 buf->map = NULL; 937 } 938 buf->eop_index = -1; 939 940 if (++first == que->num_desc) 941 first = 0; 942 943 buf = &txr->buffers[first]; 944 tx_desc = &txr->base[first]; 945 } 946 ++txr->packets; 947 /* See if there is more work now */ 948 last = buf->eop_index; 949 if (last != -1) { 950 eop_desc = &txr->base[last]; 951 /* Get next done point */ 952 if (++last == que->num_desc) last = 0; 953 done = last; 954 } else 955 break; 956 } 957 bus_dmamap_sync(txr->dma.tag, txr->dma.map, 958 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 959 960 txr->next_to_clean = first; 961 962 963 /* 964 * If there are no pending descriptors, clear the timeout. 965 */ 966 if (txr->avail == que->num_desc) { 967 atomic_store_rel_32(&txr->watchdog_timer, 0); 968 return FALSE; 969 } 970 971 return TRUE; 972 } 973 974 /********************************************************************* 975 * 976 * Refresh mbuf buffers for RX descriptor rings 977 * - now keeps its own state so discards due to resource 978 * exhaustion are unnecessary, if an mbuf cannot be obtained 979 * it just returns, keeping its placeholder, thus it can simply 980 * be recalled to try again. 981 * 982 **********************************************************************/ 983 static void 984 ixl_refresh_mbufs(struct ixl_queue *que, int limit) 985 { 986 struct ixl_vsi *vsi = que->vsi; 987 struct rx_ring *rxr = &que->rxr; 988 bus_dma_segment_t hseg[1]; 989 bus_dma_segment_t pseg[1]; 990 struct ixl_rx_buf *buf; 991 struct mbuf *mh, *mp; 992 int i, j, nsegs, error; 993 bool refreshed = FALSE; 994 995 i = j = rxr->next_refresh; 996 /* Control the loop with one beyond */ 997 if (++j == que->num_desc) 998 j = 0; 999 1000 while (j != limit) { 1001 buf = &rxr->buffers[i]; 1002 if (rxr->hdr_split == FALSE) 1003 goto no_split; 1004 1005 if (buf->m_head == NULL) { 1006 mh = m_gethdr(M_NOWAIT, MT_DATA); 1007 if (mh == NULL) 1008 goto update; 1009 } else 1010 mh = buf->m_head; 1011 1012 mh->m_pkthdr.len = mh->m_len = MHLEN; 1013 mh->m_len = MHLEN; 1014 mh->m_flags |= M_PKTHDR; 1015 /* Get the memory mapping */ 1016 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1017 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 1018 if (error != 0) { 1019 printf("Refresh mbufs: hdr dmamap load" 1020 " failure - %d\n", error); 1021 m_free(mh); 1022 buf->m_head = NULL; 1023 goto update; 1024 } 1025 buf->m_head = mh; 1026 bus_dmamap_sync(rxr->htag, buf->hmap, 1027 BUS_DMASYNC_PREREAD); 1028 rxr->base[i].read.hdr_addr = 1029 htole64(hseg[0].ds_addr); 1030 1031 no_split: 1032 if (buf->m_pack == NULL) { 1033 mp = m_getjcl(M_NOWAIT, MT_DATA, 1034 M_PKTHDR, rxr->mbuf_sz); 1035 if (mp == NULL) 1036 goto update; 1037 } else 1038 mp = buf->m_pack; 1039 1040 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1041 /* Get the memory mapping */ 1042 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1043 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 1044 if (error != 0) { 1045 printf("Refresh mbufs: payload dmamap load" 1046 " failure - %d\n", error); 1047 m_free(mp); 1048 buf->m_pack = NULL; 1049 goto update; 1050 } 1051 buf->m_pack = mp; 1052 bus_dmamap_sync(rxr->ptag, buf->pmap, 1053 BUS_DMASYNC_PREREAD); 1054 rxr->base[i].read.pkt_addr = 1055 htole64(pseg[0].ds_addr); 1056 /* Used only when doing header split */ 1057 rxr->base[i].read.hdr_addr = 0; 1058 1059 refreshed = TRUE; 1060 /* Next is precalculated */ 1061 i = j; 1062 rxr->next_refresh = i; 1063 if (++j == que->num_desc) 1064 j = 0; 1065 } 1066 update: 1067 if (refreshed) /* Update hardware tail index */ 1068 wr32(vsi->hw, rxr->tail, rxr->next_refresh); 1069 return; 1070 } 1071 1072 1073 /********************************************************************* 1074 * 1075 * Allocate memory for rx_buffer structures. Since we use one 1076 * rx_buffer per descriptor, the maximum number of rx_buffer's 1077 * that we'll need is equal to the number of receive descriptors 1078 * that we've defined. 1079 * 1080 **********************************************************************/ 1081 int 1082 ixl_allocate_rx_data(struct ixl_queue *que) 1083 { 1084 struct rx_ring *rxr = &que->rxr; 1085 struct ixl_vsi *vsi = que->vsi; 1086 device_t dev = vsi->dev; 1087 struct ixl_rx_buf *buf; 1088 int i, bsize, error; 1089 1090 bsize = sizeof(struct ixl_rx_buf) * que->num_desc; 1091 if (!(rxr->buffers = 1092 (struct ixl_rx_buf *) malloc(bsize, 1093 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1094 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 1095 error = ENOMEM; 1096 return (error); 1097 } 1098 1099 if ((error = bus_dma_tag_create(NULL, /* parent */ 1100 1, 0, /* alignment, bounds */ 1101 BUS_SPACE_MAXADDR, /* lowaddr */ 1102 BUS_SPACE_MAXADDR, /* highaddr */ 1103 NULL, NULL, /* filter, filterarg */ 1104 MSIZE, /* maxsize */ 1105 1, /* nsegments */ 1106 MSIZE, /* maxsegsize */ 1107 0, /* flags */ 1108 NULL, /* lockfunc */ 1109 NULL, /* lockfuncarg */ 1110 &rxr->htag))) { 1111 device_printf(dev, "Unable to create RX DMA htag\n"); 1112 return (error); 1113 } 1114 1115 if ((error = bus_dma_tag_create(NULL, /* parent */ 1116 1, 0, /* alignment, bounds */ 1117 BUS_SPACE_MAXADDR, /* lowaddr */ 1118 BUS_SPACE_MAXADDR, /* highaddr */ 1119 NULL, NULL, /* filter, filterarg */ 1120 MJUM16BYTES, /* maxsize */ 1121 1, /* nsegments */ 1122 MJUM16BYTES, /* maxsegsize */ 1123 0, /* flags */ 1124 NULL, /* lockfunc */ 1125 NULL, /* lockfuncarg */ 1126 &rxr->ptag))) { 1127 device_printf(dev, "Unable to create RX DMA ptag\n"); 1128 return (error); 1129 } 1130 1131 for (i = 0; i < que->num_desc; i++) { 1132 buf = &rxr->buffers[i]; 1133 error = bus_dmamap_create(rxr->htag, 1134 BUS_DMA_NOWAIT, &buf->hmap); 1135 if (error) { 1136 device_printf(dev, "Unable to create RX head map\n"); 1137 break; 1138 } 1139 error = bus_dmamap_create(rxr->ptag, 1140 BUS_DMA_NOWAIT, &buf->pmap); 1141 if (error) { 1142 device_printf(dev, "Unable to create RX pkt map\n"); 1143 break; 1144 } 1145 } 1146 1147 return (error); 1148 } 1149 1150 1151 /********************************************************************* 1152 * 1153 * (Re)Initialize the queue receive ring and its buffers. 1154 * 1155 **********************************************************************/ 1156 int 1157 ixl_init_rx_ring(struct ixl_queue *que) 1158 { 1159 struct rx_ring *rxr = &que->rxr; 1160 struct ixl_vsi *vsi = que->vsi; 1161 #if defined(INET6) || defined(INET) 1162 struct ifnet *ifp = vsi->ifp; 1163 struct lro_ctrl *lro = &rxr->lro; 1164 #endif 1165 struct ixl_rx_buf *buf; 1166 bus_dma_segment_t pseg[1], hseg[1]; 1167 int rsize, nsegs, error = 0; 1168 #ifdef DEV_NETMAP 1169 struct netmap_adapter *na = NA(que->vsi->ifp); 1170 struct netmap_slot *slot; 1171 #endif /* DEV_NETMAP */ 1172 1173 IXL_RX_LOCK(rxr); 1174 #ifdef DEV_NETMAP 1175 /* same as in ixl_init_tx_ring() */ 1176 slot = netmap_reset(na, NR_RX, que->me, 0); 1177 #endif /* DEV_NETMAP */ 1178 /* Clear the ring contents */ 1179 rsize = roundup2(que->num_desc * 1180 sizeof(union i40e_rx_desc), DBA_ALIGN); 1181 bzero((void *)rxr->base, rsize); 1182 /* Cleanup any existing buffers */ 1183 for (int i = 0; i < que->num_desc; i++) { 1184 buf = &rxr->buffers[i]; 1185 if (buf->m_head != NULL) { 1186 bus_dmamap_sync(rxr->htag, buf->hmap, 1187 BUS_DMASYNC_POSTREAD); 1188 bus_dmamap_unload(rxr->htag, buf->hmap); 1189 buf->m_head->m_flags |= M_PKTHDR; 1190 m_freem(buf->m_head); 1191 } 1192 if (buf->m_pack != NULL) { 1193 bus_dmamap_sync(rxr->ptag, buf->pmap, 1194 BUS_DMASYNC_POSTREAD); 1195 bus_dmamap_unload(rxr->ptag, buf->pmap); 1196 buf->m_pack->m_flags |= M_PKTHDR; 1197 m_freem(buf->m_pack); 1198 } 1199 buf->m_head = NULL; 1200 buf->m_pack = NULL; 1201 } 1202 1203 /* header split is off */ 1204 rxr->hdr_split = FALSE; 1205 1206 /* Now replenish the mbufs */ 1207 for (int j = 0; j != que->num_desc; ++j) { 1208 struct mbuf *mh, *mp; 1209 1210 buf = &rxr->buffers[j]; 1211 #ifdef DEV_NETMAP 1212 /* 1213 * In netmap mode, fill the map and set the buffer 1214 * address in the NIC ring, considering the offset 1215 * between the netmap and NIC rings (see comment in 1216 * ixgbe_setup_transmit_ring() ). No need to allocate 1217 * an mbuf, so end the block with a continue; 1218 */ 1219 if (slot) { 1220 int sj = netmap_idx_n2k(&na->rx_rings[que->me], j); 1221 uint64_t paddr; 1222 void *addr; 1223 1224 addr = PNMB(na, slot + sj, &paddr); 1225 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); 1226 /* Update descriptor and the cached value */ 1227 rxr->base[j].read.pkt_addr = htole64(paddr); 1228 rxr->base[j].read.hdr_addr = 0; 1229 continue; 1230 } 1231 #endif /* DEV_NETMAP */ 1232 /* 1233 ** Don't allocate mbufs if not 1234 ** doing header split, its wasteful 1235 */ 1236 if (rxr->hdr_split == FALSE) 1237 goto skip_head; 1238 1239 /* First the header */ 1240 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 1241 if (buf->m_head == NULL) { 1242 error = ENOBUFS; 1243 goto fail; 1244 } 1245 m_adj(buf->m_head, ETHER_ALIGN); 1246 mh = buf->m_head; 1247 mh->m_len = mh->m_pkthdr.len = MHLEN; 1248 mh->m_flags |= M_PKTHDR; 1249 /* Get the memory mapping */ 1250 error = bus_dmamap_load_mbuf_sg(rxr->htag, 1251 buf->hmap, buf->m_head, hseg, 1252 &nsegs, BUS_DMA_NOWAIT); 1253 if (error != 0) /* Nothing elegant to do here */ 1254 goto fail; 1255 bus_dmamap_sync(rxr->htag, 1256 buf->hmap, BUS_DMASYNC_PREREAD); 1257 /* Update descriptor */ 1258 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 1259 1260 skip_head: 1261 /* Now the payload cluster */ 1262 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 1263 M_PKTHDR, rxr->mbuf_sz); 1264 if (buf->m_pack == NULL) { 1265 error = ENOBUFS; 1266 goto fail; 1267 } 1268 mp = buf->m_pack; 1269 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1270 /* Get the memory mapping */ 1271 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 1272 buf->pmap, mp, pseg, 1273 &nsegs, BUS_DMA_NOWAIT); 1274 if (error != 0) 1275 goto fail; 1276 bus_dmamap_sync(rxr->ptag, 1277 buf->pmap, BUS_DMASYNC_PREREAD); 1278 /* Update descriptor */ 1279 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 1280 rxr->base[j].read.hdr_addr = 0; 1281 } 1282 1283 1284 /* Setup our descriptor indices */ 1285 rxr->next_check = 0; 1286 rxr->next_refresh = 0; 1287 rxr->lro_enabled = FALSE; 1288 rxr->split = 0; 1289 rxr->bytes = 0; 1290 rxr->discard = FALSE; 1291 1292 wr32(vsi->hw, rxr->tail, que->num_desc - 1); 1293 ixl_flush(vsi->hw); 1294 1295 #if defined(INET6) || defined(INET) 1296 /* 1297 ** Now set up the LRO interface: 1298 */ 1299 if (ifp->if_capenable & IFCAP_LRO) { 1300 int err = tcp_lro_init(lro); 1301 if (err) { 1302 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); 1303 goto fail; 1304 } 1305 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); 1306 rxr->lro_enabled = TRUE; 1307 lro->ifp = vsi->ifp; 1308 } 1309 #endif 1310 1311 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1312 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1313 1314 fail: 1315 IXL_RX_UNLOCK(rxr); 1316 return (error); 1317 } 1318 1319 1320 /********************************************************************* 1321 * 1322 * Free station receive ring data structures 1323 * 1324 **********************************************************************/ 1325 void 1326 ixl_free_que_rx(struct ixl_queue *que) 1327 { 1328 struct rx_ring *rxr = &que->rxr; 1329 struct ixl_rx_buf *buf; 1330 1331 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); 1332 1333 /* Cleanup any existing buffers */ 1334 if (rxr->buffers != NULL) { 1335 for (int i = 0; i < que->num_desc; i++) { 1336 buf = &rxr->buffers[i]; 1337 if (buf->m_head != NULL) { 1338 bus_dmamap_sync(rxr->htag, buf->hmap, 1339 BUS_DMASYNC_POSTREAD); 1340 bus_dmamap_unload(rxr->htag, buf->hmap); 1341 buf->m_head->m_flags |= M_PKTHDR; 1342 m_freem(buf->m_head); 1343 } 1344 if (buf->m_pack != NULL) { 1345 bus_dmamap_sync(rxr->ptag, buf->pmap, 1346 BUS_DMASYNC_POSTREAD); 1347 bus_dmamap_unload(rxr->ptag, buf->pmap); 1348 buf->m_pack->m_flags |= M_PKTHDR; 1349 m_freem(buf->m_pack); 1350 } 1351 buf->m_head = NULL; 1352 buf->m_pack = NULL; 1353 if (buf->hmap != NULL) { 1354 bus_dmamap_destroy(rxr->htag, buf->hmap); 1355 buf->hmap = NULL; 1356 } 1357 if (buf->pmap != NULL) { 1358 bus_dmamap_destroy(rxr->ptag, buf->pmap); 1359 buf->pmap = NULL; 1360 } 1361 } 1362 if (rxr->buffers != NULL) { 1363 free(rxr->buffers, M_DEVBUF); 1364 rxr->buffers = NULL; 1365 } 1366 } 1367 1368 if (rxr->htag != NULL) { 1369 bus_dma_tag_destroy(rxr->htag); 1370 rxr->htag = NULL; 1371 } 1372 if (rxr->ptag != NULL) { 1373 bus_dma_tag_destroy(rxr->ptag); 1374 rxr->ptag = NULL; 1375 } 1376 1377 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); 1378 return; 1379 } 1380 1381 static inline void 1382 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) 1383 { 1384 1385 #if defined(INET6) || defined(INET) 1386 /* 1387 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 1388 * should be computed by hardware. Also it should not have VLAN tag in 1389 * ethernet header. 1390 */ 1391 if (rxr->lro_enabled && 1392 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 1393 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1394 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1395 /* 1396 * Send to the stack if: 1397 ** - LRO not enabled, or 1398 ** - no LRO resources, or 1399 ** - lro enqueue fails 1400 */ 1401 if (rxr->lro.lro_cnt != 0) 1402 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1403 return; 1404 } 1405 #endif 1406 IXL_RX_UNLOCK(rxr); 1407 (*ifp->if_input)(ifp, m); 1408 IXL_RX_LOCK(rxr); 1409 } 1410 1411 1412 static inline void 1413 ixl_rx_discard(struct rx_ring *rxr, int i) 1414 { 1415 struct ixl_rx_buf *rbuf; 1416 1417 rbuf = &rxr->buffers[i]; 1418 1419 if (rbuf->fmp != NULL) {/* Partial chain ? */ 1420 rbuf->fmp->m_flags |= M_PKTHDR; 1421 m_freem(rbuf->fmp); 1422 rbuf->fmp = NULL; 1423 } 1424 1425 /* 1426 ** With advanced descriptors the writeback 1427 ** clobbers the buffer addrs, so its easier 1428 ** to just free the existing mbufs and take 1429 ** the normal refresh path to get new buffers 1430 ** and mapping. 1431 */ 1432 if (rbuf->m_head) { 1433 m_free(rbuf->m_head); 1434 rbuf->m_head = NULL; 1435 } 1436 1437 if (rbuf->m_pack) { 1438 m_free(rbuf->m_pack); 1439 rbuf->m_pack = NULL; 1440 } 1441 1442 return; 1443 } 1444 1445 #ifdef RSS 1446 /* 1447 ** i40e_ptype_to_hash: parse the packet type 1448 ** to determine the appropriate hash. 1449 */ 1450 static inline int 1451 ixl_ptype_to_hash(u8 ptype) 1452 { 1453 struct i40e_rx_ptype_decoded decoded; 1454 u8 ex = 0; 1455 1456 decoded = decode_rx_desc_ptype(ptype); 1457 ex = decoded.outer_frag; 1458 1459 if (!decoded.known) 1460 return M_HASHTYPE_OPAQUE_HASH; 1461 1462 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 1463 return M_HASHTYPE_OPAQUE_HASH; 1464 1465 /* Note: anything that gets to this point is IP */ 1466 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 1467 switch (decoded.inner_prot) { 1468 case I40E_RX_PTYPE_INNER_PROT_TCP: 1469 if (ex) 1470 return M_HASHTYPE_RSS_TCP_IPV6_EX; 1471 else 1472 return M_HASHTYPE_RSS_TCP_IPV6; 1473 case I40E_RX_PTYPE_INNER_PROT_UDP: 1474 if (ex) 1475 return M_HASHTYPE_RSS_UDP_IPV6_EX; 1476 else 1477 return M_HASHTYPE_RSS_UDP_IPV6; 1478 default: 1479 if (ex) 1480 return M_HASHTYPE_RSS_IPV6_EX; 1481 else 1482 return M_HASHTYPE_RSS_IPV6; 1483 } 1484 } 1485 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1486 switch (decoded.inner_prot) { 1487 case I40E_RX_PTYPE_INNER_PROT_TCP: 1488 return M_HASHTYPE_RSS_TCP_IPV4; 1489 case I40E_RX_PTYPE_INNER_PROT_UDP: 1490 if (ex) 1491 return M_HASHTYPE_RSS_UDP_IPV4_EX; 1492 else 1493 return M_HASHTYPE_RSS_UDP_IPV4; 1494 default: 1495 return M_HASHTYPE_RSS_IPV4; 1496 } 1497 } 1498 /* We should never get here!! */ 1499 return M_HASHTYPE_OPAQUE_HASH; 1500 } 1501 #endif /* RSS */ 1502 1503 /********************************************************************* 1504 * 1505 * This routine executes in interrupt context. It replenishes 1506 * the mbufs in the descriptor and sends data which has been 1507 * dma'ed into host memory to upper layer. 1508 * 1509 * We loop at most count times if count is > 0, or until done if 1510 * count < 0. 1511 * 1512 * Return TRUE for more work, FALSE for all clean. 1513 *********************************************************************/ 1514 bool 1515 ixl_rxeof(struct ixl_queue *que, int count) 1516 { 1517 struct ixl_vsi *vsi = que->vsi; 1518 struct rx_ring *rxr = &que->rxr; 1519 struct ifnet *ifp = vsi->ifp; 1520 #if defined(INET6) || defined(INET) 1521 struct lro_ctrl *lro = &rxr->lro; 1522 #endif 1523 int i, nextp, processed = 0; 1524 union i40e_rx_desc *cur; 1525 struct ixl_rx_buf *rbuf, *nbuf; 1526 1527 1528 IXL_RX_LOCK(rxr); 1529 1530 #ifdef DEV_NETMAP 1531 if (netmap_rx_irq(ifp, que->me, &count)) { 1532 IXL_RX_UNLOCK(rxr); 1533 return (FALSE); 1534 } 1535 #endif /* DEV_NETMAP */ 1536 1537 for (i = rxr->next_check; count != 0;) { 1538 struct mbuf *sendmp, *mh, *mp; 1539 u32 status, error; 1540 u16 hlen, plen, vtag; 1541 u64 qword; 1542 u8 ptype; 1543 bool eop; 1544 1545 /* Sync the ring. */ 1546 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1547 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1548 1549 cur = &rxr->base[i]; 1550 qword = le64toh(cur->wb.qword1.status_error_len); 1551 status = (qword & I40E_RXD_QW1_STATUS_MASK) 1552 >> I40E_RXD_QW1_STATUS_SHIFT; 1553 error = (qword & I40E_RXD_QW1_ERROR_MASK) 1554 >> I40E_RXD_QW1_ERROR_SHIFT; 1555 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) 1556 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1557 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) 1558 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; 1559 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) 1560 >> I40E_RXD_QW1_PTYPE_SHIFT; 1561 1562 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { 1563 ++rxr->not_done; 1564 break; 1565 } 1566 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1567 break; 1568 1569 count--; 1570 sendmp = NULL; 1571 nbuf = NULL; 1572 cur->wb.qword1.status_error_len = 0; 1573 rbuf = &rxr->buffers[i]; 1574 mh = rbuf->m_head; 1575 mp = rbuf->m_pack; 1576 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); 1577 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) 1578 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); 1579 else 1580 vtag = 0; 1581 1582 /* 1583 ** Make sure bad packets are discarded, 1584 ** note that only EOP descriptor has valid 1585 ** error results. 1586 */ 1587 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { 1588 rxr->desc_errs++; 1589 ixl_rx_discard(rxr, i); 1590 goto next_desc; 1591 } 1592 1593 /* Prefetch the next buffer */ 1594 if (!eop) { 1595 nextp = i + 1; 1596 if (nextp == que->num_desc) 1597 nextp = 0; 1598 nbuf = &rxr->buffers[nextp]; 1599 prefetch(nbuf); 1600 } 1601 1602 /* 1603 ** The header mbuf is ONLY used when header 1604 ** split is enabled, otherwise we get normal 1605 ** behavior, ie, both header and payload 1606 ** are DMA'd into the payload buffer. 1607 ** 1608 ** Rather than using the fmp/lmp global pointers 1609 ** we now keep the head of a packet chain in the 1610 ** buffer struct and pass this along from one 1611 ** descriptor to the next, until we get EOP. 1612 */ 1613 if (rxr->hdr_split && (rbuf->fmp == NULL)) { 1614 if (hlen > IXL_RX_HDR) 1615 hlen = IXL_RX_HDR; 1616 mh->m_len = hlen; 1617 mh->m_flags |= M_PKTHDR; 1618 mh->m_next = NULL; 1619 mh->m_pkthdr.len = mh->m_len; 1620 /* Null buf pointer so it is refreshed */ 1621 rbuf->m_head = NULL; 1622 /* 1623 ** Check the payload length, this 1624 ** could be zero if its a small 1625 ** packet. 1626 */ 1627 if (plen > 0) { 1628 mp->m_len = plen; 1629 mp->m_next = NULL; 1630 mp->m_flags &= ~M_PKTHDR; 1631 mh->m_next = mp; 1632 mh->m_pkthdr.len += mp->m_len; 1633 /* Null buf pointer so it is refreshed */ 1634 rbuf->m_pack = NULL; 1635 rxr->split++; 1636 } 1637 /* 1638 ** Now create the forward 1639 ** chain so when complete 1640 ** we wont have to. 1641 */ 1642 if (eop == 0) { 1643 /* stash the chain head */ 1644 nbuf->fmp = mh; 1645 /* Make forward chain */ 1646 if (plen) 1647 mp->m_next = nbuf->m_pack; 1648 else 1649 mh->m_next = nbuf->m_pack; 1650 } else { 1651 /* Singlet, prepare to send */ 1652 sendmp = mh; 1653 if (vtag) { 1654 sendmp->m_pkthdr.ether_vtag = vtag; 1655 sendmp->m_flags |= M_VLANTAG; 1656 } 1657 } 1658 } else { 1659 /* 1660 ** Either no header split, or a 1661 ** secondary piece of a fragmented 1662 ** split packet. 1663 */ 1664 mp->m_len = plen; 1665 /* 1666 ** See if there is a stored head 1667 ** that determines what we are 1668 */ 1669 sendmp = rbuf->fmp; 1670 rbuf->m_pack = rbuf->fmp = NULL; 1671 1672 if (sendmp != NULL) /* secondary frag */ 1673 sendmp->m_pkthdr.len += mp->m_len; 1674 else { 1675 /* first desc of a non-ps chain */ 1676 sendmp = mp; 1677 sendmp->m_flags |= M_PKTHDR; 1678 sendmp->m_pkthdr.len = mp->m_len; 1679 } 1680 /* Pass the head pointer on */ 1681 if (eop == 0) { 1682 nbuf->fmp = sendmp; 1683 sendmp = NULL; 1684 mp->m_next = nbuf->m_pack; 1685 } 1686 } 1687 ++processed; 1688 /* Sending this frame? */ 1689 if (eop) { 1690 sendmp->m_pkthdr.rcvif = ifp; 1691 /* gather stats */ 1692 rxr->rx_packets++; 1693 rxr->rx_bytes += sendmp->m_pkthdr.len; 1694 /* capture data for dynamic ITR adjustment */ 1695 rxr->packets++; 1696 rxr->bytes += sendmp->m_pkthdr.len; 1697 /* Set VLAN tag (field only valid in eop desc) */ 1698 if (vtag) { 1699 sendmp->m_pkthdr.ether_vtag = vtag; 1700 sendmp->m_flags |= M_VLANTAG; 1701 } 1702 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 1703 ixl_rx_checksum(sendmp, status, error, ptype); 1704 #ifdef RSS 1705 sendmp->m_pkthdr.flowid = 1706 le32toh(cur->wb.qword0.hi_dword.rss); 1707 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); 1708 #else 1709 sendmp->m_pkthdr.flowid = que->msix; 1710 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 1711 #endif 1712 } 1713 next_desc: 1714 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, 1715 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1716 1717 /* Advance our pointers to the next descriptor. */ 1718 if (++i == que->num_desc) 1719 i = 0; 1720 1721 /* Now send to the stack or do LRO */ 1722 if (sendmp != NULL) { 1723 rxr->next_check = i; 1724 ixl_rx_input(rxr, ifp, sendmp, ptype); 1725 i = rxr->next_check; 1726 } 1727 1728 /* Every 8 descriptors we go to refresh mbufs */ 1729 if (processed == 8) { 1730 ixl_refresh_mbufs(que, i); 1731 processed = 0; 1732 } 1733 } 1734 1735 /* Refresh any remaining buf structs */ 1736 if (ixl_rx_unrefreshed(que)) 1737 ixl_refresh_mbufs(que, i); 1738 1739 rxr->next_check = i; 1740 1741 #if defined(INET6) || defined(INET) 1742 /* 1743 * Flush any outstanding LRO work 1744 */ 1745 #if __FreeBSD_version >= 1100105 1746 tcp_lro_flush_all(lro); 1747 #else 1748 struct lro_entry *queued; 1749 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1750 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1751 tcp_lro_flush(lro, queued); 1752 } 1753 #endif 1754 #endif /* defined(INET6) || defined(INET) */ 1755 1756 IXL_RX_UNLOCK(rxr); 1757 return (FALSE); 1758 } 1759 1760 1761 /********************************************************************* 1762 * 1763 * Verify that the hardware indicated that the checksum is valid. 1764 * Inform the stack about the status of checksum so that stack 1765 * doesn't spend time verifying the checksum. 1766 * 1767 *********************************************************************/ 1768 static void 1769 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) 1770 { 1771 struct i40e_rx_ptype_decoded decoded; 1772 1773 decoded = decode_rx_desc_ptype(ptype); 1774 1775 /* Errors? */ 1776 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | 1777 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { 1778 mp->m_pkthdr.csum_flags = 0; 1779 return; 1780 } 1781 1782 /* IPv6 with extension headers likely have bad csum */ 1783 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1784 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) 1785 if (status & 1786 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { 1787 mp->m_pkthdr.csum_flags = 0; 1788 return; 1789 } 1790 1791 1792 /* IP Checksum Good */ 1793 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 1794 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 1795 1796 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { 1797 mp->m_pkthdr.csum_flags |= 1798 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1799 mp->m_pkthdr.csum_data |= htons(0xffff); 1800 } 1801 return; 1802 } 1803 1804 #if __FreeBSD_version >= 1100000 1805 uint64_t 1806 ixl_get_counter(if_t ifp, ift_counter cnt) 1807 { 1808 struct ixl_vsi *vsi; 1809 1810 vsi = if_getsoftc(ifp); 1811 1812 switch (cnt) { 1813 case IFCOUNTER_IPACKETS: 1814 return (vsi->ipackets); 1815 case IFCOUNTER_IERRORS: 1816 return (vsi->ierrors); 1817 case IFCOUNTER_OPACKETS: 1818 return (vsi->opackets); 1819 case IFCOUNTER_OERRORS: 1820 return (vsi->oerrors); 1821 case IFCOUNTER_COLLISIONS: 1822 /* Collisions are by standard impossible in 40G/10G Ethernet */ 1823 return (0); 1824 case IFCOUNTER_IBYTES: 1825 return (vsi->ibytes); 1826 case IFCOUNTER_OBYTES: 1827 return (vsi->obytes); 1828 case IFCOUNTER_IMCASTS: 1829 return (vsi->imcasts); 1830 case IFCOUNTER_OMCASTS: 1831 return (vsi->omcasts); 1832 case IFCOUNTER_IQDROPS: 1833 return (vsi->iqdrops); 1834 case IFCOUNTER_OQDROPS: 1835 return (vsi->oqdrops); 1836 case IFCOUNTER_NOPROTO: 1837 return (vsi->noproto); 1838 default: 1839 return (if_get_counter_default(ifp, cnt)); 1840 } 1841 } 1842 #endif 1843 1844