1 /*- 2 * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_kern_tls.h" 29 30 #include "en.h" 31 #include <machine/atomic.h> 32 33 static inline bool 34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) 35 { 36 sq->cev_counter++; 37 /* interleave the CQEs */ 38 if (sq->cev_counter >= sq->cev_factor) { 39 sq->cev_counter = 0; 40 return (true); 41 } 42 return (false); 43 } 44 45 bool 46 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 47 { 48 49 return (mlx5e_do_send_cqe_inline(sq)); 50 } 51 52 void 53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 54 { 55 u16 pi = sq->pc & sq->wq.sz_m1; 56 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 57 58 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 59 60 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 61 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 62 if (mlx5e_do_send_cqe_inline(sq)) 63 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 64 else 65 wqe->ctrl.fm_ce_se = 0; 66 67 /* Copy data for doorbell */ 68 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 69 70 sq->mbuf[pi].mbuf = NULL; 71 sq->mbuf[pi].num_bytes = 0; 72 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 73 sq->pc += sq->mbuf[pi].num_wqebbs; 74 } 75 76 #if (__FreeBSD_version >= 1100000) 77 static uint32_t mlx5e_hash_value; 78 79 static void 80 mlx5e_hash_init(void *arg) 81 { 82 mlx5e_hash_value = m_ether_tcpip_hash_init(); 83 } 84 85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 87 #endif 88 89 static struct mlx5e_sq * 90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) 91 { 92 struct m_snd_tag *mb_tag; 93 struct mlx5e_snd_tag *ptag; 94 struct mlx5e_sq *sq; 95 96 mb_tag = mb->m_pkthdr.snd_tag; 97 98 #ifdef KERN_TLS 99 top: 100 #endif 101 /* get pointer to sendqueue */ 102 ptag = container_of(mb_tag, struct mlx5e_snd_tag, m_snd_tag); 103 104 switch (ptag->type) { 105 #ifdef RATELIMIT 106 case IF_SND_TAG_TYPE_RATE_LIMIT: 107 sq = container_of(ptag, 108 struct mlx5e_rl_channel, tag)->sq; 109 break; 110 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) 111 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 112 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 113 goto top; 114 #endif 115 #endif 116 case IF_SND_TAG_TYPE_UNLIMITED: 117 sq = &container_of(ptag, 118 struct mlx5e_channel, tag)->sq[0]; 119 KASSERT((ptag->m_snd_tag.refcount > 0), 120 ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); 121 break; 122 #ifdef KERN_TLS 123 case IF_SND_TAG_TYPE_TLS: 124 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 125 goto top; 126 #endif 127 default: 128 sq = NULL; 129 break; 130 } 131 132 /* check if valid */ 133 if (sq != NULL && READ_ONCE(sq->running) != 0) 134 return (sq); 135 136 return (NULL); 137 } 138 139 static struct mlx5e_sq * 140 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 141 { 142 struct mlx5e_priv *priv = ifp->if_softc; 143 struct mlx5e_sq *sq; 144 u32 ch; 145 u32 tc; 146 147 /* obtain VLAN information if present */ 148 if (mb->m_flags & M_VLANTAG) { 149 tc = (mb->m_pkthdr.ether_vtag >> 13); 150 if (tc >= priv->num_tc) 151 tc = priv->default_vlan_prio; 152 } else { 153 tc = priv->default_vlan_prio; 154 } 155 156 ch = priv->params.num_channels; 157 158 /* check if flowid is set */ 159 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 160 #ifdef RSS 161 u32 temp; 162 163 if (rss_hash2bucket(mb->m_pkthdr.flowid, 164 M_HASHTYPE_GET(mb), &temp) == 0) 165 ch = temp % ch; 166 else 167 #endif 168 ch = (mb->m_pkthdr.flowid % 128) % ch; 169 } else { 170 #if (__FreeBSD_version >= 1100000) 171 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 172 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 173 #else 174 /* 175 * m_ether_tcpip_hash not present in stable, so just 176 * throw unhashed mbufs on queue 0 177 */ 178 ch = 0; 179 #endif 180 } 181 182 /* check if send queue is running */ 183 sq = &priv->channel[ch].sq[tc]; 184 if (likely(READ_ONCE(sq->running) != 0)) 185 return (sq); 186 return (NULL); 187 } 188 189 static inline u16 190 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) 191 { 192 struct ether_vlan_header *eh; 193 uint16_t eth_type; 194 int min_inline; 195 196 eh = mtod(mb, struct ether_vlan_header *); 197 if (unlikely(mb->m_len < ETHER_HDR_LEN)) { 198 goto max_inline; 199 } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 200 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 201 goto max_inline; 202 eth_type = ntohs(eh->evl_proto); 203 min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 204 } else { 205 eth_type = ntohs(eh->evl_encap_proto); 206 min_inline = ETHER_HDR_LEN; 207 } 208 209 switch (eth_type) { 210 case ETHERTYPE_IP: 211 case ETHERTYPE_IPV6: 212 /* 213 * Make sure the TOS(IPv4) or traffic class(IPv6) 214 * field gets inlined. Else the SQ may stall. 215 */ 216 min_inline += 4; 217 break; 218 default: 219 goto max_inline; 220 } 221 222 /* 223 * m_copydata() will be used on the remaining header which 224 * does not need to reside within the first m_len bytes of 225 * data: 226 */ 227 if (mb->m_pkthdr.len < min_inline) 228 goto max_inline; 229 return (min_inline); 230 231 max_inline: 232 return (MIN(mb->m_pkthdr.len, sq->max_inline)); 233 } 234 235 /* 236 * This function parse IPv4 and IPv6 packets looking for TCP and UDP 237 * headers. 238 * 239 * Upon return the pointer at which the "ppth" argument points, is set 240 * to the location of the TCP header. NULL is used if no TCP header is 241 * present. 242 * 243 * The return value indicates the number of bytes from the beginning 244 * of the packet until the first byte after the TCP or UDP header. If 245 * this function returns zero, the parsing failed. 246 */ 247 int 248 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth) 249 { 250 const struct ether_vlan_header *eh; 251 const struct tcphdr *th; 252 const struct ip *ip; 253 int ip_hlen, tcp_hlen; 254 const struct ip6_hdr *ip6; 255 uint16_t eth_type; 256 int eth_hdr_len; 257 258 eh = mtod(mb, const struct ether_vlan_header *); 259 if (unlikely(mb->m_len < ETHER_HDR_LEN)) 260 goto failure; 261 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 262 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 263 goto failure; 264 eth_type = ntohs(eh->evl_proto); 265 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 266 } else { 267 eth_type = ntohs(eh->evl_encap_proto); 268 eth_hdr_len = ETHER_HDR_LEN; 269 } 270 271 switch (eth_type) { 272 case ETHERTYPE_IP: 273 ip = (const struct ip *)(mb->m_data + eth_hdr_len); 274 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip))) 275 goto failure; 276 switch (ip->ip_p) { 277 case IPPROTO_TCP: 278 ip_hlen = ip->ip_hl << 2; 279 eth_hdr_len += ip_hlen; 280 goto tcp_packet; 281 case IPPROTO_UDP: 282 ip_hlen = ip->ip_hl << 2; 283 eth_hdr_len += ip_hlen + 8; 284 th = NULL; 285 goto udp_packet; 286 default: 287 goto failure; 288 } 289 break; 290 case ETHERTYPE_IPV6: 291 ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len); 292 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6))) 293 goto failure; 294 switch (ip6->ip6_nxt) { 295 case IPPROTO_TCP: 296 eth_hdr_len += sizeof(*ip6); 297 goto tcp_packet; 298 case IPPROTO_UDP: 299 eth_hdr_len += sizeof(*ip6) + 8; 300 th = NULL; 301 goto udp_packet; 302 default: 303 goto failure; 304 } 305 break; 306 default: 307 goto failure; 308 } 309 tcp_packet: 310 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) { 311 const struct mbuf *m_th = mb->m_next; 312 if (unlikely(mb->m_len != eth_hdr_len || 313 m_th == NULL || m_th->m_len < sizeof(*th))) 314 goto failure; 315 th = (const struct tcphdr *)(m_th->m_data); 316 } else { 317 th = (const struct tcphdr *)(mb->m_data + eth_hdr_len); 318 } 319 tcp_hlen = th->th_off << 2; 320 eth_hdr_len += tcp_hlen; 321 udp_packet: 322 /* 323 * m_copydata() will be used on the remaining header which 324 * does not need to reside within the first m_len bytes of 325 * data: 326 */ 327 if (unlikely(mb->m_pkthdr.len < eth_hdr_len)) 328 goto failure; 329 if (ppth != NULL) 330 *ppth = th; 331 return (eth_hdr_len); 332 failure: 333 if (ppth != NULL) 334 *ppth = NULL; 335 return (0); 336 } 337 338 struct mlx5_wqe_dump_seg { 339 struct mlx5_wqe_ctrl_seg ctrl; 340 struct mlx5_wqe_data_seg data; 341 } __aligned(MLX5_SEND_WQE_BB); 342 343 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); 344 345 int 346 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) 347 { 348 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 349 struct mlx5_wqe_dump_seg *wqe; 350 struct mlx5_wqe_dump_seg *wqe_last; 351 int nsegs; 352 int xsegs; 353 u32 off; 354 u32 msb; 355 int err; 356 int x; 357 struct mbuf *mb; 358 const u32 ds_cnt = 2; 359 u16 pi; 360 const u8 opcode = MLX5_OPCODE_DUMP; 361 362 /* get pointer to mbuf */ 363 mb = *mbp; 364 365 /* get producer index */ 366 pi = sq->pc & sq->wq.sz_m1; 367 368 sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; 369 sq->mbuf[pi].num_wqebbs = 0; 370 371 /* check number of segments in mbuf */ 372 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 373 mb, segs, &nsegs, BUS_DMA_NOWAIT); 374 if (err == EFBIG) { 375 /* update statistics */ 376 sq->stats.defragged++; 377 /* too many mbuf fragments */ 378 mb = m_defrag(*mbp, M_NOWAIT); 379 if (mb == NULL) { 380 mb = *mbp; 381 goto tx_drop; 382 } 383 /* try again */ 384 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 385 mb, segs, &nsegs, BUS_DMA_NOWAIT); 386 } 387 388 if (err != 0) 389 goto tx_drop; 390 391 /* make sure all mbuf data, if any, is visible to the bus */ 392 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 393 BUS_DMASYNC_PREWRITE); 394 395 /* compute number of real DUMP segments */ 396 msb = sq->priv->params_ethtool.hw_mtu_msb; 397 for (x = xsegs = 0; x != nsegs; x++) 398 xsegs += howmany((u32)segs[x].ds_len, msb); 399 400 /* check if there are no segments */ 401 if (unlikely(xsegs == 0)) { 402 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 403 m_freem(mb); 404 *mbp = NULL; /* safety clear */ 405 return (0); 406 } 407 408 /* return ENOBUFS if the queue is full */ 409 if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { 410 sq->stats.enobuf++; 411 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 412 m_freem(mb); 413 *mbp = NULL; /* safety clear */ 414 return (ENOBUFS); 415 } 416 417 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 418 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); 419 420 for (x = 0; x != nsegs; x++) { 421 for (off = 0; off < segs[x].ds_len; off += msb) { 422 u32 len = segs[x].ds_len - off; 423 424 /* limit length */ 425 if (likely(len > msb)) 426 len = msb; 427 428 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 429 430 /* fill control segment */ 431 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 432 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 433 wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); 434 435 /* fill data segment */ 436 wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); 437 wqe->data.lkey = sq->mkey_be; 438 wqe->data.byte_count = cpu_to_be32(len); 439 440 /* advance to next building block */ 441 if (unlikely(wqe == wqe_last)) 442 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); 443 else 444 wqe++; 445 446 sq->mbuf[pi].num_wqebbs++; 447 sq->pc++; 448 } 449 } 450 451 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 452 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); 453 454 /* put in place data fence */ 455 wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; 456 457 /* check if we should generate a completion event */ 458 if (mlx5e_do_send_cqe_inline(sq)) 459 wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 460 461 /* copy data for doorbell */ 462 memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); 463 464 /* store pointer to mbuf */ 465 sq->mbuf[pi].mbuf = mb; 466 sq->mbuf[pi].p_refcount = parg->pref; 467 atomic_add_int(parg->pref, 1); 468 469 /* count all traffic going out */ 470 sq->stats.packets++; 471 sq->stats.bytes += sq->mbuf[pi].num_bytes; 472 473 *mbp = NULL; /* safety clear */ 474 return (0); 475 476 tx_drop: 477 sq->stats.dropped++; 478 *mbp = NULL; 479 m_freem(mb); 480 return err; 481 } 482 483 int 484 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 485 { 486 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 487 struct mlx5e_xmit_args args = {}; 488 struct mlx5_wqe_data_seg *dseg; 489 struct mlx5e_tx_wqe *wqe; 490 struct ifnet *ifp; 491 int nsegs; 492 int err; 493 int x; 494 struct mbuf *mb; 495 u16 ds_cnt; 496 u16 pi; 497 u8 opcode; 498 499 #ifdef KERN_TLS 500 top: 501 #endif 502 /* Return ENOBUFS if the queue is full */ 503 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { 504 sq->stats.enobuf++; 505 return (ENOBUFS); 506 } 507 508 /* Align SQ edge with NOPs to avoid WQE wrap around */ 509 pi = ((~sq->pc) & sq->wq.sz_m1); 510 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 511 /* Send one multi NOP message instead of many */ 512 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 513 pi = ((~sq->pc) & sq->wq.sz_m1); 514 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 515 sq->stats.enobuf++; 516 return (ENOMEM); 517 } 518 } 519 520 #ifdef KERN_TLS 521 /* Special handling for TLS packets, if any */ 522 switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { 523 case MLX5E_TLS_LOOP: 524 goto top; 525 case MLX5E_TLS_FAILURE: 526 mb = *mbp; 527 err = ENOMEM; 528 goto tx_drop; 529 case MLX5E_TLS_DEFERRED: 530 return (0); 531 case MLX5E_TLS_CONTINUE: 532 default: 533 break; 534 } 535 #endif 536 537 /* Setup local variables */ 538 pi = sq->pc & sq->wq.sz_m1; 539 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 540 ifp = sq->ifp; 541 542 memset(wqe, 0, sizeof(*wqe)); 543 544 /* get pointer to mbuf */ 545 mb = *mbp; 546 547 /* Send a copy of the frame to the BPF listener, if any */ 548 if (ifp != NULL && ifp->if_bpf != NULL) 549 ETHER_BPF_MTAP(ifp, mb); 550 551 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 552 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 553 } 554 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 555 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 556 } 557 if (wqe->eth.cs_flags == 0) { 558 sq->stats.csum_offload_none++; 559 } 560 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 561 u32 payload_len; 562 u32 mss = mb->m_pkthdr.tso_segsz; 563 u32 num_pkts; 564 565 wqe->eth.mss = cpu_to_be16(mss); 566 opcode = MLX5_OPCODE_LSO; 567 if (args.ihs == 0) 568 args.ihs = mlx5e_get_full_header_size(mb, NULL); 569 if (unlikely(args.ihs == 0)) { 570 err = EINVAL; 571 goto tx_drop; 572 } 573 payload_len = mb->m_pkthdr.len - args.ihs; 574 if (payload_len == 0) 575 num_pkts = 1; 576 else 577 num_pkts = DIV_ROUND_UP(payload_len, mss); 578 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); 579 580 sq->stats.tso_packets++; 581 sq->stats.tso_bytes += payload_len; 582 } else { 583 opcode = MLX5_OPCODE_SEND; 584 585 if (args.ihs == 0) { 586 switch (sq->min_inline_mode) { 587 case MLX5_INLINE_MODE_IP: 588 case MLX5_INLINE_MODE_TCP_UDP: 589 args.ihs = mlx5e_get_full_header_size(mb, NULL); 590 if (unlikely(args.ihs == 0)) 591 args.ihs = mlx5e_get_l2_header_size(sq, mb); 592 break; 593 case MLX5_INLINE_MODE_L2: 594 args.ihs = mlx5e_get_l2_header_size(sq, mb); 595 break; 596 case MLX5_INLINE_MODE_NONE: 597 /* FALLTHROUGH */ 598 default: 599 if ((mb->m_flags & M_VLANTAG) != 0 && 600 (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { 601 /* inlining VLAN data is not required */ 602 wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ 603 wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); 604 args.ihs = 0; 605 } else if ((mb->m_flags & M_VLANTAG) == 0 && 606 (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { 607 /* inlining non-VLAN data is not required */ 608 args.ihs = 0; 609 } else { 610 /* we are forced to inlining L2 header, if any */ 611 args.ihs = mlx5e_get_l2_header_size(sq, mb); 612 } 613 break; 614 } 615 } 616 sq->mbuf[pi].num_bytes = max_t (unsigned int, 617 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 618 } 619 620 if (likely(args.ihs == 0)) { 621 /* nothing to inline */ 622 } else if ((mb->m_flags & M_VLANTAG) != 0) { 623 struct ether_vlan_header *eh = (struct ether_vlan_header *) 624 wqe->eth.inline_hdr_start; 625 626 /* Range checks */ 627 if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { 628 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 629 err = EINVAL; 630 goto tx_drop; 631 } 632 args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); 633 } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { 634 err = EINVAL; 635 goto tx_drop; 636 } 637 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 638 m_adj(mb, ETHER_HDR_LEN); 639 /* Insert 4 bytes VLAN tag into data stream */ 640 eh->evl_proto = eh->evl_encap_proto; 641 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 642 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 643 /* Copy rest of header data, if any */ 644 m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 645 m_adj(mb, args.ihs - ETHER_HDR_LEN); 646 /* Extend header by 4 bytes */ 647 args.ihs += ETHER_VLAN_ENCAP_LEN; 648 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 649 } else { 650 /* check if inline header size is too big */ 651 if (unlikely(args.ihs > sq->max_inline)) { 652 if (unlikely(mb->m_pkthdr.csum_flags & CSUM_TSO)) { 653 err = EINVAL; 654 goto tx_drop; 655 } 656 args.ihs = sq->max_inline; 657 } 658 m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); 659 m_adj(mb, args.ihs); 660 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 661 } 662 663 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 664 if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { 665 ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), 666 MLX5_SEND_WQE_DS); 667 } 668 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 669 670 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 671 mb, segs, &nsegs, BUS_DMA_NOWAIT); 672 if (err == EFBIG) { 673 /* Update statistics */ 674 sq->stats.defragged++; 675 /* Too many mbuf fragments */ 676 mb = m_defrag(*mbp, M_NOWAIT); 677 if (mb == NULL) { 678 mb = *mbp; 679 goto tx_drop; 680 } 681 /* Try again */ 682 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 683 mb, segs, &nsegs, BUS_DMA_NOWAIT); 684 } 685 /* Catch errors */ 686 if (err != 0) 687 goto tx_drop; 688 689 /* Make sure all mbuf data, if any, is visible to the bus */ 690 if (nsegs != 0) { 691 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 692 BUS_DMASYNC_PREWRITE); 693 } else { 694 /* All data was inlined, free the mbuf. */ 695 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 696 m_freem(mb); 697 mb = NULL; 698 } 699 700 for (x = 0; x != nsegs; x++) { 701 if (segs[x].ds_len == 0) 702 continue; 703 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 704 dseg->lkey = sq->mkey_be; 705 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 706 dseg++; 707 } 708 709 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 710 711 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 712 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 713 wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); 714 715 if (mlx5e_do_send_cqe_inline(sq)) 716 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 717 else 718 wqe->ctrl.fm_ce_se = 0; 719 720 /* Copy data for doorbell */ 721 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 722 723 /* Store pointer to mbuf */ 724 sq->mbuf[pi].mbuf = mb; 725 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 726 sq->mbuf[pi].p_refcount = args.pref; 727 if (unlikely(args.pref != NULL)) 728 atomic_add_int(args.pref, 1); 729 sq->pc += sq->mbuf[pi].num_wqebbs; 730 731 /* Count all traffic going out */ 732 sq->stats.packets++; 733 sq->stats.bytes += sq->mbuf[pi].num_bytes; 734 735 *mbp = NULL; /* safety clear */ 736 return (0); 737 738 tx_drop: 739 sq->stats.dropped++; 740 *mbp = NULL; 741 m_freem(mb); 742 return err; 743 } 744 745 static void 746 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 747 { 748 u16 sqcc; 749 750 /* 751 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 752 * otherwise a cq overrun may occur 753 */ 754 sqcc = sq->cc; 755 756 while (budget > 0) { 757 struct mlx5_cqe64 *cqe; 758 struct mbuf *mb; 759 u16 x; 760 u16 ci; 761 762 cqe = mlx5e_get_cqe(&sq->cq); 763 if (!cqe) 764 break; 765 766 mlx5_cqwq_pop(&sq->cq.wq); 767 768 /* update budget according to the event factor */ 769 budget -= sq->cev_factor; 770 771 for (x = 0; x != sq->cev_factor; x++) { 772 ci = sqcc & sq->wq.sz_m1; 773 mb = sq->mbuf[ci].mbuf; 774 sq->mbuf[ci].mbuf = NULL; 775 776 if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { 777 atomic_add_int(sq->mbuf[ci].p_refcount, -1); 778 sq->mbuf[ci].p_refcount = NULL; 779 } 780 781 if (mb == NULL) { 782 if (sq->mbuf[ci].num_bytes == 0) { 783 /* NOP */ 784 sq->stats.nop++; 785 } 786 } else { 787 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 788 BUS_DMASYNC_POSTWRITE); 789 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 790 791 /* Free transmitted mbuf */ 792 m_freem(mb); 793 } 794 sqcc += sq->mbuf[ci].num_wqebbs; 795 } 796 } 797 798 mlx5_cqwq_update_db_record(&sq->cq.wq); 799 800 /* Ensure cq space is freed before enabling more cqes */ 801 atomic_thread_fence_rel(); 802 803 sq->cc = sqcc; 804 } 805 806 static int 807 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 808 { 809 int err = 0; 810 811 if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 812 READ_ONCE(sq->running) == 0)) { 813 m_freem(mb); 814 return (ENETDOWN); 815 } 816 817 /* Do transmit */ 818 if (mlx5e_sq_xmit(sq, &mb) != 0) { 819 /* NOTE: m_freem() is NULL safe */ 820 m_freem(mb); 821 err = ENOBUFS; 822 } 823 824 /* Check if we need to write the doorbell */ 825 if (likely(sq->doorbell.d64 != 0)) { 826 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 827 sq->doorbell.d64 = 0; 828 } 829 830 /* 831 * Check if we need to start the event timer which flushes the 832 * transmit ring on timeout: 833 */ 834 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 835 sq->cev_factor != 1)) { 836 /* start the timer */ 837 mlx5e_sq_cev_timeout(sq); 838 } else { 839 /* don't send NOPs yet */ 840 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 841 } 842 return (err); 843 } 844 845 int 846 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 847 { 848 struct mlx5e_sq *sq; 849 int ret; 850 851 if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { 852 MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); 853 sq = mlx5e_select_queue_by_send_tag(ifp, mb); 854 if (unlikely(sq == NULL)) { 855 goto select_queue; 856 } 857 } else { 858 select_queue: 859 sq = mlx5e_select_queue(ifp, mb); 860 if (unlikely(sq == NULL)) { 861 /* Free mbuf */ 862 m_freem(mb); 863 864 /* Invalid send queue */ 865 return (ENXIO); 866 } 867 } 868 869 mtx_lock(&sq->lock); 870 ret = mlx5e_xmit_locked(ifp, sq, mb); 871 mtx_unlock(&sq->lock); 872 873 return (ret); 874 } 875 876 void 877 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 878 { 879 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 880 881 mtx_lock(&sq->comp_lock); 882 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 883 mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); 884 mtx_unlock(&sq->comp_lock); 885 } 886