1 /***************************************************************************** 2 3 Copyright (c) 2001-2017, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 *****************************************************************************/ 33 34 #ifndef IXGBE_STANDALONE_BUILD 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_rss.h" 38 #endif 39 40 #include "ixgbe.h" 41 42 /************************************************************************ 43 * Local Function prototypes 44 ************************************************************************/ 45 static int ixgbe_isc_txd_encap(void *, if_pkt_info_t); 46 static void ixgbe_isc_txd_flush(void *, uint16_t, qidx_t); 47 static int ixgbe_isc_txd_credits_update(void *, uint16_t, bool); 48 49 static void ixgbe_isc_rxd_refill(void *, if_rxd_update_t); 50 static void ixgbe_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); 51 static int ixgbe_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); 52 static int ixgbe_isc_rxd_pkt_get(void *, if_rxd_info_t); 53 54 static void ixgbe_rx_checksum(uint32_t, if_rxd_info_t, uint32_t); 55 static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *, 56 if_pkt_info_t); 57 58 extern void ixgbe_if_enable_intr(if_ctx_t ctx); 59 static int ixgbe_determine_rsstype(uint16_t pkt_info); 60 61 struct if_txrx ixgbe_txrx = { 62 .ift_txd_encap = ixgbe_isc_txd_encap, 63 .ift_txd_flush = ixgbe_isc_txd_flush, 64 .ift_txd_credits_update = ixgbe_isc_txd_credits_update, 65 .ift_rxd_available = ixgbe_isc_rxd_available, 66 .ift_rxd_pkt_get = ixgbe_isc_rxd_pkt_get, 67 .ift_rxd_refill = ixgbe_isc_rxd_refill, 68 .ift_rxd_flush = ixgbe_isc_rxd_flush, 69 .ift_legacy_intr = NULL 70 }; 71 72 /************************************************************************ 73 * ixgbe_tx_ctx_setup 74 * 75 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 76 * 77 ************************************************************************/ 78 static int 79 ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) 80 { 81 uint32_t vlan_macip_lens, type_tucmd_mlhl; 82 uint32_t olinfo_status, mss_l4len_idx, pktlen, offload; 83 u8 ehdrlen; 84 85 offload = true; 86 olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; 87 /* VLAN MACLEN IPLEN */ 88 vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT); 89 90 /* 91 * Some of our VF devices need a context descriptor for every 92 * packet. That means the ehdrlen needs to be non-zero in order 93 * for the host driver not to flag a malicious event. The stack 94 * will most likely populate this for all other reasons of why 95 * this function was called. 96 */ 97 if (pi->ipi_ehdrlen == 0) { 98 ehdrlen = ETHER_HDR_LEN; 99 ehdrlen += (pi->ipi_vtag != 0) ? ETHER_VLAN_ENCAP_LEN : 0; 100 } else 101 ehdrlen = pi->ipi_ehdrlen; 102 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 103 104 pktlen = pi->ipi_len; 105 /* First check if TSO is to be used */ 106 if (pi->ipi_csum_flags & CSUM_TSO) { 107 /* This is used in the transmit desc in encap */ 108 pktlen = pi->ipi_len - ehdrlen - pi->ipi_ip_hlen - 109 pi->ipi_tcp_hlen; 110 mss_l4len_idx |= 111 (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 112 mss_l4len_idx |= 113 (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 114 } 115 116 olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; 117 118 if (pi->ipi_flags & IPI_TX_IPV4) { 119 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 120 /* Tell transmit desc to also do IPv4 checksum. */ 121 if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO)) 122 olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 123 } else if (pi->ipi_flags & IPI_TX_IPV6) 124 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 125 else 126 offload = false; 127 128 vlan_macip_lens |= pi->ipi_ip_hlen; 129 130 switch (pi->ipi_ipproto) { 131 case IPPROTO_TCP: 132 if (pi->ipi_csum_flags & 133 (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO)) 134 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 135 else 136 offload = false; 137 break; 138 case IPPROTO_UDP: 139 if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) 140 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 141 else 142 offload = false; 143 break; 144 case IPPROTO_SCTP: 145 if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) 146 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; 147 else 148 offload = false; 149 break; 150 default: 151 offload = false; 152 break; 153 } 154 /* Insert L4 checksum into data descriptors */ 155 if (offload) 156 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 157 158 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 159 160 /* Now copy bits into descriptor */ 161 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 162 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 163 TXD->seqnum_seed = htole32(0); 164 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 165 166 return (olinfo_status); 167 } /* ixgbe_tx_ctx_setup */ 168 169 /************************************************************************ 170 * ixgbe_isc_txd_encap 171 ************************************************************************/ 172 static int 173 ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi) 174 { 175 struct ixgbe_softc *sc = arg; 176 if_softc_ctx_t scctx = sc->shared; 177 struct ix_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; 178 struct tx_ring *txr = &que->txr; 179 int nsegs = pi->ipi_nsegs; 180 bus_dma_segment_t *segs = pi->ipi_segs; 181 union ixgbe_adv_tx_desc *txd = NULL; 182 struct ixgbe_adv_tx_context_desc *TXD; 183 int i, j, first, pidx_last; 184 uint32_t olinfo_status, cmd, flags; 185 qidx_t ntxd; 186 187 cmd = (IXGBE_ADVTXD_DTYP_DATA | 188 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 189 190 if (pi->ipi_mflags & M_VLANTAG) 191 cmd |= IXGBE_ADVTXD_DCMD_VLE; 192 193 i = first = pi->ipi_pidx; 194 flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0; 195 ntxd = scctx->isc_ntxd[0]; 196 197 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first]; 198 if ((pi->ipi_csum_flags & CSUM_OFFLOAD) || 199 (sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) || 200 pi->ipi_vtag) { 201 /********************************************* 202 * Set up the appropriate offload context 203 * this will consume the first descriptor 204 *********************************************/ 205 olinfo_status = ixgbe_tx_ctx_setup(TXD, pi); 206 if (pi->ipi_csum_flags & CSUM_TSO) { 207 cmd |= IXGBE_ADVTXD_DCMD_TSE; 208 ++txr->tso_tx; 209 } 210 211 if (++i == scctx->isc_ntxd[0]) 212 i = 0; 213 } else { 214 /* Indicate the whole packet as payload when not doing TSO */ 215 olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT; 216 } 217 218 olinfo_status |= IXGBE_ADVTXD_CC; 219 pidx_last = 0; 220 for (j = 0; j < nsegs; j++) { 221 bus_size_t seglen; 222 223 txd = &txr->tx_base[i]; 224 seglen = segs[j].ds_len; 225 226 txd->read.buffer_addr = htole64(segs[j].ds_addr); 227 txd->read.cmd_type_len = htole32(cmd | seglen); 228 txd->read.olinfo_status = htole32(olinfo_status); 229 230 pidx_last = i; 231 if (++i == scctx->isc_ntxd[0]) { 232 i = 0; 233 } 234 } 235 236 if (flags) { 237 txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; 238 txr->tx_rs_pidx = (txr->tx_rs_pidx + 1) & (ntxd - 1); 239 } 240 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags); 241 242 txr->bytes += pi->ipi_len; 243 pi->ipi_new_pidx = i; 244 245 ++txr->total_packets; 246 247 return (0); 248 } /* ixgbe_isc_txd_encap */ 249 250 /************************************************************************ 251 * ixgbe_isc_txd_flush 252 ************************************************************************/ 253 static void 254 ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) 255 { 256 struct ixgbe_softc *sc = arg; 257 struct ix_tx_queue *que = &sc->tx_queues[txqid]; 258 struct tx_ring *txr = &que->txr; 259 260 IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx); 261 } /* ixgbe_isc_txd_flush */ 262 263 /************************************************************************ 264 * ixgbe_isc_txd_credits_update 265 ************************************************************************/ 266 static int 267 ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) 268 { 269 struct ixgbe_softc *sc = arg; 270 if_softc_ctx_t scctx = sc->shared; 271 struct ix_tx_queue *que = &sc->tx_queues[txqid]; 272 struct tx_ring *txr = &que->txr; 273 qidx_t processed = 0; 274 int updated; 275 qidx_t cur, prev, ntxd, rs_cidx; 276 int32_t delta; 277 uint8_t status; 278 279 rs_cidx = txr->tx_rs_cidx; 280 if (rs_cidx == txr->tx_rs_pidx) 281 return (0); 282 283 cur = txr->tx_rsq[rs_cidx]; 284 status = txr->tx_base[cur].wb.status; 285 updated = !!(status & IXGBE_TXD_STAT_DD); 286 287 if (!updated) 288 return (0); 289 290 /* If clear is false just let caller know that there 291 * are descriptors to reclaim */ 292 if (!clear) 293 return (1); 294 295 prev = txr->tx_cidx_processed; 296 ntxd = scctx->isc_ntxd[0]; 297 do { 298 MPASS(prev != cur); 299 delta = (int32_t)cur - (int32_t)prev; 300 if (delta < 0) 301 delta += ntxd; 302 MPASS(delta > 0); 303 304 processed += delta; 305 prev = cur; 306 rs_cidx = (rs_cidx + 1) & (ntxd - 1); 307 if (rs_cidx == txr->tx_rs_pidx) 308 break; 309 310 cur = txr->tx_rsq[rs_cidx]; 311 status = txr->tx_base[cur].wb.status; 312 } while ((status & IXGBE_TXD_STAT_DD)); 313 314 txr->tx_rs_cidx = rs_cidx; 315 txr->tx_cidx_processed = prev; 316 317 return (processed); 318 } /* ixgbe_isc_txd_credits_update */ 319 320 /************************************************************************ 321 * ixgbe_isc_rxd_refill 322 ************************************************************************/ 323 static void 324 ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) 325 { 326 struct ixgbe_softc *sc = arg; 327 struct ix_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; 328 struct rx_ring *rxr = &que->rxr; 329 uint64_t *paddrs; 330 int i; 331 uint32_t next_pidx, pidx; 332 uint16_t count; 333 334 paddrs = iru->iru_paddrs; 335 pidx = iru->iru_pidx; 336 count = iru->iru_count; 337 338 for (i = 0, next_pidx = pidx; i < count; i++) { 339 rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); 340 if (++next_pidx == sc->shared->isc_nrxd[0]) 341 next_pidx = 0; 342 } 343 } /* ixgbe_isc_rxd_refill */ 344 345 /************************************************************************ 346 * ixgbe_isc_rxd_flush 347 ************************************************************************/ 348 static void 349 ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, 350 qidx_t pidx) 351 { 352 struct ixgbe_softc *sc = arg; 353 struct ix_rx_queue *que = &sc->rx_queues[qsidx]; 354 struct rx_ring *rxr = &que->rxr; 355 356 IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx); 357 } /* ixgbe_isc_rxd_flush */ 358 359 /************************************************************************ 360 * ixgbe_isc_rxd_available 361 ************************************************************************/ 362 static int 363 ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget) 364 { 365 struct ixgbe_softc *sc = arg; 366 struct ix_rx_queue *que = &sc->rx_queues[qsidx]; 367 struct rx_ring *rxr = &que->rxr; 368 union ixgbe_adv_rx_desc *rxd; 369 uint32_t staterr; 370 int cnt, i, nrxd; 371 372 nrxd = sc->shared->isc_nrxd[0]; 373 for (cnt = 0, i = pidx; cnt < nrxd && cnt <= budget;) { 374 rxd = &rxr->rx_base[i]; 375 staterr = le32toh(rxd->wb.upper.status_error); 376 377 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 378 break; 379 if (++i == nrxd) 380 i = 0; 381 if (staterr & IXGBE_RXD_STAT_EOP) 382 cnt++; 383 } 384 return (cnt); 385 } /* ixgbe_isc_rxd_available */ 386 387 /************************************************************************ 388 * ixgbe_isc_rxd_pkt_get 389 * 390 * Routine sends data which has been dma'ed into host memory 391 * to upper layer. Initialize ri structure. 392 * 393 * Returns 0 upon success, errno on failure 394 ************************************************************************/ 395 396 static int 397 ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) 398 { 399 struct ixgbe_softc *sc = arg; 400 if_softc_ctx_t scctx = sc->shared; 401 struct ix_rx_queue *que = &sc->rx_queues[ri->iri_qsidx]; 402 struct rx_ring *rxr = &que->rxr; 403 union ixgbe_adv_rx_desc *rxd; 404 405 uint16_t pkt_info, len, cidx, i; 406 uint32_t ptype; 407 uint32_t staterr = 0; 408 bool eop; 409 410 i = 0; 411 cidx = ri->iri_cidx; 412 do { 413 rxd = &rxr->rx_base[cidx]; 414 staterr = le32toh(rxd->wb.upper.status_error); 415 pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); 416 417 /* Error Checking then decrement count */ 418 MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0); 419 420 len = le16toh(rxd->wb.upper.length); 421 ptype = le32toh(rxd->wb.lower.lo_dword.data) & 422 IXGBE_RXDADV_PKTTYPE_MASK; 423 424 ri->iri_len += len; 425 rxr->bytes += len; 426 427 rxd->wb.upper.status_error = 0; 428 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 429 430 /* Make sure bad packets are discarded */ 431 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 432 if (sc->feat_en & IXGBE_FEATURE_VF) 433 if_inc_counter(ri->iri_ifp, IFCOUNTER_IERRORS, 434 1); 435 436 rxr->rx_discarded++; 437 return (EBADMSG); 438 } 439 ri->iri_frags[i].irf_flid = 0; 440 ri->iri_frags[i].irf_idx = cidx; 441 ri->iri_frags[i].irf_len = len; 442 if (++cidx == sc->shared->isc_nrxd[0]) 443 cidx = 0; 444 i++; 445 /* even a 16K packet shouldn't consume more than 8 clusters */ 446 MPASS(i < 9); 447 } while (!eop); 448 449 rxr->rx_packets++; 450 rxr->packets++; 451 rxr->rx_bytes += ri->iri_len; 452 453 if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0) 454 ixgbe_rx_checksum(staterr, ri, ptype); 455 456 ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); 457 ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info); 458 if ((sc->feat_en & IXGBE_FEATURE_RSS) == 0) { 459 if (ri->iri_rsstype == M_HASHTYPE_OPAQUE) 460 ri->iri_rsstype = M_HASHTYPE_NONE; 461 else 462 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; 463 } 464 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) { 465 ri->iri_vtag = le16toh(rxd->wb.upper.vlan); 466 ri->iri_flags |= M_VLANTAG; 467 } 468 469 ri->iri_nfrags = i; 470 return (0); 471 } /* ixgbe_isc_rxd_pkt_get */ 472 473 /************************************************************************ 474 * ixgbe_rx_checksum 475 * 476 * Verify that the hardware indicated that the checksum is valid. 477 * Inform the stack about the status of checksum so that stack 478 * doesn't spend time verifying the checksum. 479 ************************************************************************/ 480 static void 481 ixgbe_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype) 482 { 483 uint16_t status = (uint16_t)staterr; 484 uint8_t errors = (uint8_t)(staterr >> 24); 485 486 /* If there is a layer 3 or 4 error we are done */ 487 if (__predict_false(errors & 488 (IXGBE_RXD_ERR_IPE | IXGBE_RXD_ERR_TCPE))) 489 return; 490 491 /* IP Checksum Good */ 492 if (status & IXGBE_RXD_STAT_IPCS) 493 ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); 494 495 /* Valid L4E checksum */ 496 if (__predict_true(status & IXGBE_RXD_STAT_L4CS)) { 497 /* SCTP header present. */ 498 if (__predict_false((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 499 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)) { 500 ri->iri_csum_flags |= CSUM_SCTP_VALID; 501 } else { 502 ri->iri_csum_flags |= 503 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 504 ri->iri_csum_data = htons(0xffff); 505 } 506 } 507 } /* ixgbe_rx_checksum */ 508 509 /************************************************************************ 510 * ixgbe_determine_rsstype 511 * 512 * Parse the packet type to determine the appropriate hash 513 ************************************************************************/ 514 static int 515 ixgbe_determine_rsstype(uint16_t pkt_info) 516 { 517 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 518 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 519 return M_HASHTYPE_RSS_TCP_IPV4; 520 case IXGBE_RXDADV_RSSTYPE_IPV4: 521 return M_HASHTYPE_RSS_IPV4; 522 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 523 return M_HASHTYPE_RSS_TCP_IPV6; 524 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 525 return M_HASHTYPE_RSS_IPV6_EX; 526 case IXGBE_RXDADV_RSSTYPE_IPV6: 527 return M_HASHTYPE_RSS_IPV6; 528 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 529 return M_HASHTYPE_RSS_TCP_IPV6_EX; 530 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 531 return M_HASHTYPE_RSS_UDP_IPV4; 532 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 533 return M_HASHTYPE_RSS_UDP_IPV6; 534 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 535 return M_HASHTYPE_RSS_UDP_IPV6_EX; 536 default: 537 return M_HASHTYPE_OPAQUE; 538 } 539 } /* ixgbe_determine_rsstype */ 540