1 /*- 2 * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_kern_tls.h" 29 30 #include "en.h" 31 #include <machine/atomic.h> 32 33 static inline bool 34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) 35 { 36 sq->cev_counter++; 37 /* interleave the CQEs */ 38 if (sq->cev_counter >= sq->cev_factor) { 39 sq->cev_counter = 0; 40 return (true); 41 } 42 return (false); 43 } 44 45 bool 46 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 47 { 48 49 return (mlx5e_do_send_cqe_inline(sq)); 50 } 51 52 void 53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 54 { 55 u16 pi = sq->pc & sq->wq.sz_m1; 56 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 57 58 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 59 60 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 61 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 62 if (mlx5e_do_send_cqe_inline(sq)) 63 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 64 else 65 wqe->ctrl.fm_ce_se = 0; 66 67 /* Copy data for doorbell */ 68 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 69 70 sq->mbuf[pi].mbuf = NULL; 71 sq->mbuf[pi].num_bytes = 0; 72 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 73 sq->pc += sq->mbuf[pi].num_wqebbs; 74 } 75 76 #if (__FreeBSD_version >= 1100000) 77 static uint32_t mlx5e_hash_value; 78 79 static void 80 mlx5e_hash_init(void *arg) 81 { 82 mlx5e_hash_value = m_ether_tcpip_hash_init(); 83 } 84 85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 87 #endif 88 89 static struct mlx5e_sq * 90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) 91 { 92 struct m_snd_tag *mb_tag; 93 struct mlx5e_sq *sq; 94 95 mb_tag = mb->m_pkthdr.snd_tag; 96 97 #ifdef KERN_TLS 98 top: 99 #endif 100 /* get pointer to sendqueue */ 101 switch (mb_tag->type) { 102 #ifdef RATELIMIT 103 case IF_SND_TAG_TYPE_RATE_LIMIT: 104 sq = container_of(mb_tag, 105 struct mlx5e_rl_channel, tag)->sq; 106 break; 107 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) 108 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 109 mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; 110 goto top; 111 #endif 112 #endif 113 case IF_SND_TAG_TYPE_UNLIMITED: 114 sq = &container_of(mb_tag, 115 struct mlx5e_channel, tag)->sq[0]; 116 KASSERT((mb_tag->refcount > 0), 117 ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); 118 break; 119 #ifdef KERN_TLS 120 case IF_SND_TAG_TYPE_TLS: 121 mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag; 122 goto top; 123 #endif 124 default: 125 sq = NULL; 126 break; 127 } 128 129 /* check if valid */ 130 if (sq != NULL && READ_ONCE(sq->running) != 0) 131 return (sq); 132 133 return (NULL); 134 } 135 136 static struct mlx5e_sq * 137 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 138 { 139 struct mlx5e_priv *priv = ifp->if_softc; 140 struct mlx5e_sq *sq; 141 u32 ch; 142 u32 tc; 143 144 /* obtain VLAN information if present */ 145 if (mb->m_flags & M_VLANTAG) { 146 tc = (mb->m_pkthdr.ether_vtag >> 13); 147 if (tc >= priv->num_tc) 148 tc = priv->default_vlan_prio; 149 } else { 150 tc = priv->default_vlan_prio; 151 } 152 153 ch = priv->params.num_channels; 154 155 /* check if flowid is set */ 156 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 157 #ifdef RSS 158 u32 temp; 159 160 if (rss_hash2bucket(mb->m_pkthdr.flowid, 161 M_HASHTYPE_GET(mb), &temp) == 0) 162 ch = temp % ch; 163 else 164 #endif 165 ch = (mb->m_pkthdr.flowid % 128) % ch; 166 } else { 167 #if (__FreeBSD_version >= 1100000) 168 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 169 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 170 #else 171 /* 172 * m_ether_tcpip_hash not present in stable, so just 173 * throw unhashed mbufs on queue 0 174 */ 175 ch = 0; 176 #endif 177 } 178 179 /* check if send queue is running */ 180 sq = &priv->channel[ch].sq[tc]; 181 if (likely(READ_ONCE(sq->running) != 0)) 182 return (sq); 183 return (NULL); 184 } 185 186 static inline u16 187 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) 188 { 189 struct ether_vlan_header *eh; 190 uint16_t eth_type; 191 int min_inline; 192 193 eh = mtod(mb, struct ether_vlan_header *); 194 if (unlikely(mb->m_len < ETHER_HDR_LEN)) { 195 goto max_inline; 196 } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 197 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 198 goto max_inline; 199 eth_type = ntohs(eh->evl_proto); 200 min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 201 } else { 202 eth_type = ntohs(eh->evl_encap_proto); 203 min_inline = ETHER_HDR_LEN; 204 } 205 206 switch (eth_type) { 207 case ETHERTYPE_IP: 208 case ETHERTYPE_IPV6: 209 /* 210 * Make sure the TOS(IPv4) or traffic class(IPv6) 211 * field gets inlined. Else the SQ may stall. 212 */ 213 min_inline += 4; 214 break; 215 default: 216 goto max_inline; 217 } 218 219 /* 220 * m_copydata() will be used on the remaining header which 221 * does not need to reside within the first m_len bytes of 222 * data: 223 */ 224 if (mb->m_pkthdr.len < min_inline) 225 goto max_inline; 226 return (min_inline); 227 228 max_inline: 229 return (MIN(mb->m_pkthdr.len, sq->max_inline)); 230 } 231 232 /* 233 * This function parse IPv4 and IPv6 packets looking for TCP and UDP 234 * headers. 235 * 236 * Upon return the pointer at which the "ppth" argument points, is set 237 * to the location of the TCP header. NULL is used if no TCP header is 238 * present. 239 * 240 * The return value indicates the number of bytes from the beginning 241 * of the packet until the first byte after the TCP or UDP header. If 242 * this function returns zero, the parsing failed. 243 */ 244 int 245 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth) 246 { 247 const struct ether_vlan_header *eh; 248 const struct tcphdr *th; 249 const struct ip *ip; 250 int ip_hlen, tcp_hlen; 251 const struct ip6_hdr *ip6; 252 uint16_t eth_type; 253 int eth_hdr_len; 254 255 eh = mtod(mb, const struct ether_vlan_header *); 256 if (unlikely(mb->m_len < ETHER_HDR_LEN)) 257 goto failure; 258 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 259 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 260 goto failure; 261 eth_type = ntohs(eh->evl_proto); 262 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 263 } else { 264 eth_type = ntohs(eh->evl_encap_proto); 265 eth_hdr_len = ETHER_HDR_LEN; 266 } 267 268 switch (eth_type) { 269 case ETHERTYPE_IP: 270 ip = (const struct ip *)(mb->m_data + eth_hdr_len); 271 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip))) 272 goto failure; 273 switch (ip->ip_p) { 274 case IPPROTO_TCP: 275 ip_hlen = ip->ip_hl << 2; 276 eth_hdr_len += ip_hlen; 277 goto tcp_packet; 278 case IPPROTO_UDP: 279 ip_hlen = ip->ip_hl << 2; 280 eth_hdr_len += ip_hlen + 8; 281 th = NULL; 282 goto udp_packet; 283 default: 284 goto failure; 285 } 286 break; 287 case ETHERTYPE_IPV6: 288 ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len); 289 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6))) 290 goto failure; 291 switch (ip6->ip6_nxt) { 292 case IPPROTO_TCP: 293 eth_hdr_len += sizeof(*ip6); 294 goto tcp_packet; 295 case IPPROTO_UDP: 296 eth_hdr_len += sizeof(*ip6) + 8; 297 th = NULL; 298 goto udp_packet; 299 default: 300 goto failure; 301 } 302 break; 303 default: 304 goto failure; 305 } 306 tcp_packet: 307 if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) { 308 const struct mbuf *m_th = mb->m_next; 309 if (unlikely(mb->m_len != eth_hdr_len || 310 m_th == NULL || m_th->m_len < sizeof(*th))) 311 goto failure; 312 th = (const struct tcphdr *)(m_th->m_data); 313 } else { 314 th = (const struct tcphdr *)(mb->m_data + eth_hdr_len); 315 } 316 tcp_hlen = th->th_off << 2; 317 eth_hdr_len += tcp_hlen; 318 udp_packet: 319 /* 320 * m_copydata() will be used on the remaining header which 321 * does not need to reside within the first m_len bytes of 322 * data: 323 */ 324 if (unlikely(mb->m_pkthdr.len < eth_hdr_len)) 325 goto failure; 326 if (ppth != NULL) 327 *ppth = th; 328 return (eth_hdr_len); 329 failure: 330 if (ppth != NULL) 331 *ppth = NULL; 332 return (0); 333 } 334 335 struct mlx5_wqe_dump_seg { 336 struct mlx5_wqe_ctrl_seg ctrl; 337 struct mlx5_wqe_data_seg data; 338 } __aligned(MLX5_SEND_WQE_BB); 339 340 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); 341 342 int 343 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) 344 { 345 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 346 struct mlx5_wqe_dump_seg *wqe; 347 struct mlx5_wqe_dump_seg *wqe_last; 348 int nsegs; 349 int xsegs; 350 u32 off; 351 u32 msb; 352 int err; 353 int x; 354 struct mbuf *mb; 355 const u32 ds_cnt = 2; 356 u16 pi; 357 const u8 opcode = MLX5_OPCODE_DUMP; 358 359 /* get pointer to mbuf */ 360 mb = *mbp; 361 362 /* get producer index */ 363 pi = sq->pc & sq->wq.sz_m1; 364 365 sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; 366 sq->mbuf[pi].num_wqebbs = 0; 367 368 /* check number of segments in mbuf */ 369 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 370 mb, segs, &nsegs, BUS_DMA_NOWAIT); 371 if (err == EFBIG) { 372 /* update statistics */ 373 sq->stats.defragged++; 374 /* too many mbuf fragments */ 375 mb = m_defrag(*mbp, M_NOWAIT); 376 if (mb == NULL) { 377 mb = *mbp; 378 goto tx_drop; 379 } 380 /* try again */ 381 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 382 mb, segs, &nsegs, BUS_DMA_NOWAIT); 383 } 384 385 if (err != 0) 386 goto tx_drop; 387 388 /* make sure all mbuf data, if any, is visible to the bus */ 389 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 390 BUS_DMASYNC_PREWRITE); 391 392 /* compute number of real DUMP segments */ 393 msb = sq->priv->params_ethtool.hw_mtu_msb; 394 for (x = xsegs = 0; x != nsegs; x++) 395 xsegs += howmany((u32)segs[x].ds_len, msb); 396 397 /* check if there are no segments */ 398 if (unlikely(xsegs == 0)) { 399 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 400 m_freem(mb); 401 *mbp = NULL; /* safety clear */ 402 return (0); 403 } 404 405 /* return ENOBUFS if the queue is full */ 406 if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { 407 sq->stats.enobuf++; 408 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 409 m_freem(mb); 410 *mbp = NULL; /* safety clear */ 411 return (ENOBUFS); 412 } 413 414 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 415 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); 416 417 for (x = 0; x != nsegs; x++) { 418 for (off = 0; off < segs[x].ds_len; off += msb) { 419 u32 len = segs[x].ds_len - off; 420 421 /* limit length */ 422 if (likely(len > msb)) 423 len = msb; 424 425 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 426 427 /* fill control segment */ 428 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 429 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 430 wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); 431 432 /* fill data segment */ 433 wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); 434 wqe->data.lkey = sq->mkey_be; 435 wqe->data.byte_count = cpu_to_be32(len); 436 437 /* advance to next building block */ 438 if (unlikely(wqe == wqe_last)) 439 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); 440 else 441 wqe++; 442 443 sq->mbuf[pi].num_wqebbs++; 444 sq->pc++; 445 } 446 } 447 448 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 449 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); 450 451 /* put in place data fence */ 452 wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; 453 454 /* check if we should generate a completion event */ 455 if (mlx5e_do_send_cqe_inline(sq)) 456 wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 457 458 /* copy data for doorbell */ 459 memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); 460 461 /* store pointer to mbuf */ 462 sq->mbuf[pi].mbuf = mb; 463 sq->mbuf[pi].p_refcount = parg->pref; 464 atomic_add_int(parg->pref, 1); 465 466 /* count all traffic going out */ 467 sq->stats.packets++; 468 sq->stats.bytes += sq->mbuf[pi].num_bytes; 469 470 *mbp = NULL; /* safety clear */ 471 return (0); 472 473 tx_drop: 474 sq->stats.dropped++; 475 *mbp = NULL; 476 m_freem(mb); 477 return err; 478 } 479 480 int 481 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 482 { 483 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 484 struct mlx5e_xmit_args args = {}; 485 struct mlx5_wqe_data_seg *dseg; 486 struct mlx5e_tx_wqe *wqe; 487 struct ifnet *ifp; 488 int nsegs; 489 int err; 490 int x; 491 struct mbuf *mb; 492 u16 ds_cnt; 493 u16 pi; 494 u8 opcode; 495 496 #ifdef KERN_TLS 497 top: 498 #endif 499 /* Return ENOBUFS if the queue is full */ 500 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { 501 sq->stats.enobuf++; 502 return (ENOBUFS); 503 } 504 505 /* Align SQ edge with NOPs to avoid WQE wrap around */ 506 pi = ((~sq->pc) & sq->wq.sz_m1); 507 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 508 /* Send one multi NOP message instead of many */ 509 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 510 pi = ((~sq->pc) & sq->wq.sz_m1); 511 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 512 sq->stats.enobuf++; 513 return (ENOMEM); 514 } 515 } 516 517 #ifdef KERN_TLS 518 /* Special handling for TLS packets, if any */ 519 switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { 520 case MLX5E_TLS_LOOP: 521 goto top; 522 case MLX5E_TLS_FAILURE: 523 mb = *mbp; 524 err = ENOMEM; 525 goto tx_drop; 526 case MLX5E_TLS_DEFERRED: 527 return (0); 528 case MLX5E_TLS_CONTINUE: 529 default: 530 break; 531 } 532 #endif 533 534 /* Setup local variables */ 535 pi = sq->pc & sq->wq.sz_m1; 536 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 537 ifp = sq->ifp; 538 539 memset(wqe, 0, sizeof(*wqe)); 540 541 /* get pointer to mbuf */ 542 mb = *mbp; 543 544 /* Send a copy of the frame to the BPF listener, if any */ 545 if (ifp != NULL && ifp->if_bpf != NULL) 546 ETHER_BPF_MTAP(ifp, mb); 547 548 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 549 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 550 } 551 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 552 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 553 } 554 if (wqe->eth.cs_flags == 0) { 555 sq->stats.csum_offload_none++; 556 } 557 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 558 u32 payload_len; 559 u32 mss = mb->m_pkthdr.tso_segsz; 560 u32 num_pkts; 561 562 wqe->eth.mss = cpu_to_be16(mss); 563 opcode = MLX5_OPCODE_LSO; 564 if (args.ihs == 0) 565 args.ihs = mlx5e_get_full_header_size(mb, NULL); 566 if (unlikely(args.ihs == 0)) { 567 err = EINVAL; 568 goto tx_drop; 569 } 570 payload_len = mb->m_pkthdr.len - args.ihs; 571 if (payload_len == 0) 572 num_pkts = 1; 573 else 574 num_pkts = DIV_ROUND_UP(payload_len, mss); 575 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); 576 577 sq->stats.tso_packets++; 578 sq->stats.tso_bytes += payload_len; 579 } else { 580 opcode = MLX5_OPCODE_SEND; 581 582 if (args.ihs == 0) { 583 switch (sq->min_inline_mode) { 584 case MLX5_INLINE_MODE_IP: 585 case MLX5_INLINE_MODE_TCP_UDP: 586 args.ihs = mlx5e_get_full_header_size(mb, NULL); 587 if (unlikely(args.ihs == 0)) 588 args.ihs = mlx5e_get_l2_header_size(sq, mb); 589 break; 590 case MLX5_INLINE_MODE_L2: 591 args.ihs = mlx5e_get_l2_header_size(sq, mb); 592 break; 593 case MLX5_INLINE_MODE_NONE: 594 /* FALLTHROUGH */ 595 default: 596 if ((mb->m_flags & M_VLANTAG) != 0 && 597 (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { 598 /* inlining VLAN data is not required */ 599 wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ 600 wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); 601 args.ihs = 0; 602 } else if ((mb->m_flags & M_VLANTAG) == 0 && 603 (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { 604 /* inlining non-VLAN data is not required */ 605 args.ihs = 0; 606 } else { 607 /* we are forced to inlining L2 header, if any */ 608 args.ihs = mlx5e_get_l2_header_size(sq, mb); 609 } 610 break; 611 } 612 } 613 sq->mbuf[pi].num_bytes = max_t (unsigned int, 614 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 615 } 616 617 if (likely(args.ihs == 0)) { 618 /* nothing to inline */ 619 } else if ((mb->m_flags & M_VLANTAG) != 0) { 620 struct ether_vlan_header *eh = (struct ether_vlan_header *) 621 wqe->eth.inline_hdr_start; 622 623 /* Range checks */ 624 if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { 625 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 626 err = EINVAL; 627 goto tx_drop; 628 } 629 args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); 630 } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { 631 err = EINVAL; 632 goto tx_drop; 633 } 634 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 635 m_adj(mb, ETHER_HDR_LEN); 636 /* Insert 4 bytes VLAN tag into data stream */ 637 eh->evl_proto = eh->evl_encap_proto; 638 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 639 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 640 /* Copy rest of header data, if any */ 641 m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 642 m_adj(mb, args.ihs - ETHER_HDR_LEN); 643 /* Extend header by 4 bytes */ 644 args.ihs += ETHER_VLAN_ENCAP_LEN; 645 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 646 } else { 647 /* check if inline header size is too big */ 648 if (unlikely(args.ihs > sq->max_inline)) { 649 if (unlikely(mb->m_pkthdr.csum_flags & CSUM_TSO)) { 650 err = EINVAL; 651 goto tx_drop; 652 } 653 args.ihs = sq->max_inline; 654 } 655 m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); 656 m_adj(mb, args.ihs); 657 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 658 } 659 660 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 661 if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { 662 ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), 663 MLX5_SEND_WQE_DS); 664 } 665 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 666 667 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 668 mb, segs, &nsegs, BUS_DMA_NOWAIT); 669 if (err == EFBIG) { 670 /* Update statistics */ 671 sq->stats.defragged++; 672 /* Too many mbuf fragments */ 673 mb = m_defrag(*mbp, M_NOWAIT); 674 if (mb == NULL) { 675 mb = *mbp; 676 goto tx_drop; 677 } 678 /* Try again */ 679 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 680 mb, segs, &nsegs, BUS_DMA_NOWAIT); 681 } 682 /* Catch errors */ 683 if (err != 0) 684 goto tx_drop; 685 686 /* Make sure all mbuf data, if any, is visible to the bus */ 687 if (nsegs != 0) { 688 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 689 BUS_DMASYNC_PREWRITE); 690 } else { 691 /* All data was inlined, free the mbuf. */ 692 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 693 m_freem(mb); 694 mb = NULL; 695 } 696 697 for (x = 0; x != nsegs; x++) { 698 if (segs[x].ds_len == 0) 699 continue; 700 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 701 dseg->lkey = sq->mkey_be; 702 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 703 dseg++; 704 } 705 706 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 707 708 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 709 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 710 wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); 711 712 if (mlx5e_do_send_cqe_inline(sq)) 713 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 714 else 715 wqe->ctrl.fm_ce_se = 0; 716 717 /* Copy data for doorbell */ 718 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 719 720 /* Store pointer to mbuf */ 721 sq->mbuf[pi].mbuf = mb; 722 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 723 sq->mbuf[pi].p_refcount = args.pref; 724 if (unlikely(args.pref != NULL)) 725 atomic_add_int(args.pref, 1); 726 sq->pc += sq->mbuf[pi].num_wqebbs; 727 728 /* Count all traffic going out */ 729 sq->stats.packets++; 730 sq->stats.bytes += sq->mbuf[pi].num_bytes; 731 732 *mbp = NULL; /* safety clear */ 733 return (0); 734 735 tx_drop: 736 sq->stats.dropped++; 737 *mbp = NULL; 738 m_freem(mb); 739 return err; 740 } 741 742 static void 743 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 744 { 745 u16 sqcc; 746 747 /* 748 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 749 * otherwise a cq overrun may occur 750 */ 751 sqcc = sq->cc; 752 753 while (budget > 0) { 754 struct mlx5_cqe64 *cqe; 755 struct mbuf *mb; 756 u16 x; 757 u16 ci; 758 759 cqe = mlx5e_get_cqe(&sq->cq); 760 if (!cqe) 761 break; 762 763 mlx5_cqwq_pop(&sq->cq.wq); 764 765 /* update budget according to the event factor */ 766 budget -= sq->cev_factor; 767 768 for (x = 0; x != sq->cev_factor; x++) { 769 ci = sqcc & sq->wq.sz_m1; 770 mb = sq->mbuf[ci].mbuf; 771 sq->mbuf[ci].mbuf = NULL; 772 773 if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { 774 atomic_add_int(sq->mbuf[ci].p_refcount, -1); 775 sq->mbuf[ci].p_refcount = NULL; 776 } 777 778 if (mb == NULL) { 779 if (sq->mbuf[ci].num_bytes == 0) { 780 /* NOP */ 781 sq->stats.nop++; 782 } 783 } else { 784 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 785 BUS_DMASYNC_POSTWRITE); 786 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 787 788 /* Free transmitted mbuf */ 789 m_freem(mb); 790 } 791 sqcc += sq->mbuf[ci].num_wqebbs; 792 } 793 } 794 795 mlx5_cqwq_update_db_record(&sq->cq.wq); 796 797 /* Ensure cq space is freed before enabling more cqes */ 798 atomic_thread_fence_rel(); 799 800 sq->cc = sqcc; 801 } 802 803 static int 804 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 805 { 806 int err = 0; 807 808 if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 809 READ_ONCE(sq->running) == 0)) { 810 m_freem(mb); 811 return (ENETDOWN); 812 } 813 814 /* Do transmit */ 815 if (mlx5e_sq_xmit(sq, &mb) != 0) { 816 /* NOTE: m_freem() is NULL safe */ 817 m_freem(mb); 818 err = ENOBUFS; 819 } 820 821 /* Check if we need to write the doorbell */ 822 if (likely(sq->doorbell.d64 != 0)) { 823 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 824 sq->doorbell.d64 = 0; 825 } 826 827 /* 828 * Check if we need to start the event timer which flushes the 829 * transmit ring on timeout: 830 */ 831 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 832 sq->cev_factor != 1)) { 833 /* start the timer */ 834 mlx5e_sq_cev_timeout(sq); 835 } else { 836 /* don't send NOPs yet */ 837 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 838 } 839 return (err); 840 } 841 842 int 843 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 844 { 845 struct mlx5e_sq *sq; 846 int ret; 847 848 if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { 849 MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); 850 sq = mlx5e_select_queue_by_send_tag(ifp, mb); 851 if (unlikely(sq == NULL)) { 852 goto select_queue; 853 } 854 } else { 855 select_queue: 856 sq = mlx5e_select_queue(ifp, mb); 857 if (unlikely(sq == NULL)) { 858 /* Free mbuf */ 859 m_freem(mb); 860 861 /* Invalid send queue */ 862 return (ENXIO); 863 } 864 } 865 866 mtx_lock(&sq->lock); 867 ret = mlx5e_xmit_locked(ifp, sq, mb); 868 mtx_unlock(&sq->lock); 869 870 return (ret); 871 } 872 873 void 874 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 875 { 876 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 877 878 mtx_lock(&sq->comp_lock); 879 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 880 mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); 881 mtx_unlock(&sq->comp_lock); 882 } 883