1 /*- 2 * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_kern_tls.h" 29 30 #include "en.h" 31 #include <machine/atomic.h> 32 33 static inline bool 34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) 35 { 36 sq->cev_counter++; 37 /* interleave the CQEs */ 38 if (sq->cev_counter >= sq->cev_factor) { 39 sq->cev_counter = 0; 40 return (true); 41 } 42 return (false); 43 } 44 45 bool 46 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 47 { 48 49 return (mlx5e_do_send_cqe_inline(sq)); 50 } 51 52 void 53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 54 { 55 u16 pi = sq->pc & sq->wq.sz_m1; 56 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 57 58 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 59 60 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 61 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 62 if (mlx5e_do_send_cqe_inline(sq)) 63 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 64 else 65 wqe->ctrl.fm_ce_se = 0; 66 67 /* Copy data for doorbell */ 68 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 69 70 sq->mbuf[pi].mbuf = NULL; 71 sq->mbuf[pi].num_bytes = 0; 72 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 73 sq->pc += sq->mbuf[pi].num_wqebbs; 74 } 75 76 #if (__FreeBSD_version >= 1100000) 77 static uint32_t mlx5e_hash_value; 78 79 static void 80 mlx5e_hash_init(void *arg) 81 { 82 mlx5e_hash_value = m_ether_tcpip_hash_init(); 83 } 84 85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 87 #endif 88 89 static struct mlx5e_sq * 90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) 91 { 92 struct m_snd_tag *mb_tag; 93 struct mlx5e_snd_tag *ptag; 94 struct mlx5e_sq *sq; 95 96 mb_tag = mb->m_pkthdr.snd_tag; 97 98 #ifdef KERN_TLS 99 top: 100 #endif 101 /* get pointer to sendqueue */ 102 ptag = container_of(mb_tag, struct mlx5e_snd_tag, m_snd_tag); 103 104 switch (ptag->type) { 105 #ifdef RATELIMIT 106 case IF_SND_TAG_TYPE_RATE_LIMIT: 107 sq = container_of(ptag, 108 struct mlx5e_rl_channel, tag)->sq; 109 break; 110 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) 111 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 112 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 113 goto top; 114 #endif 115 #endif 116 case IF_SND_TAG_TYPE_UNLIMITED: 117 sq = &container_of(ptag, 118 struct mlx5e_channel, tag)->sq[0]; 119 KASSERT((ptag->m_snd_tag.refcount > 0), 120 ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); 121 break; 122 #ifdef KERN_TLS 123 case IF_SND_TAG_TYPE_TLS: 124 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 125 goto top; 126 #endif 127 default: 128 sq = NULL; 129 break; 130 } 131 132 /* check if valid */ 133 if (sq != NULL && READ_ONCE(sq->running) != 0) 134 return (sq); 135 136 return (NULL); 137 } 138 139 static struct mlx5e_sq * 140 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 141 { 142 struct mlx5e_priv *priv = ifp->if_softc; 143 struct mlx5e_sq *sq; 144 u32 ch; 145 u32 tc; 146 147 /* obtain VLAN information if present */ 148 if (mb->m_flags & M_VLANTAG) { 149 tc = (mb->m_pkthdr.ether_vtag >> 13); 150 if (tc >= priv->num_tc) 151 tc = priv->default_vlan_prio; 152 } else { 153 tc = priv->default_vlan_prio; 154 } 155 156 ch = priv->params.num_channels; 157 158 /* check if flowid is set */ 159 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 160 #ifdef RSS 161 u32 temp; 162 163 if (rss_hash2bucket(mb->m_pkthdr.flowid, 164 M_HASHTYPE_GET(mb), &temp) == 0) 165 ch = temp % ch; 166 else 167 #endif 168 ch = (mb->m_pkthdr.flowid % 128) % ch; 169 } else { 170 #if (__FreeBSD_version >= 1100000) 171 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 172 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 173 #else 174 /* 175 * m_ether_tcpip_hash not present in stable, so just 176 * throw unhashed mbufs on queue 0 177 */ 178 ch = 0; 179 #endif 180 } 181 182 /* check if send queue is running */ 183 sq = &priv->channel[ch].sq[tc]; 184 if (likely(READ_ONCE(sq->running) != 0)) 185 return (sq); 186 return (NULL); 187 } 188 189 static inline u16 190 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) 191 { 192 struct ether_vlan_header *eh; 193 uint16_t eth_type; 194 int min_inline; 195 196 eh = mtod(mb, struct ether_vlan_header *); 197 if (unlikely(mb->m_len < ETHER_HDR_LEN)) { 198 goto max_inline; 199 } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 200 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 201 goto max_inline; 202 eth_type = ntohs(eh->evl_proto); 203 min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 204 } else { 205 eth_type = ntohs(eh->evl_encap_proto); 206 min_inline = ETHER_HDR_LEN; 207 } 208 209 switch (eth_type) { 210 case ETHERTYPE_IP: 211 case ETHERTYPE_IPV6: 212 /* 213 * Make sure the TOS(IPv4) or traffic class(IPv6) 214 * field gets inlined. Else the SQ may stall. 215 */ 216 min_inline += 4; 217 break; 218 default: 219 goto max_inline; 220 } 221 222 /* 223 * m_copydata() will be used on the remaining header which 224 * does not need to reside within the first m_len bytes of 225 * data: 226 */ 227 if (mb->m_pkthdr.len < min_inline) 228 goto max_inline; 229 return (min_inline); 230 231 max_inline: 232 return (MIN(mb->m_pkthdr.len, sq->max_inline)); 233 } 234 235 /* 236 * This function parse IPv4 and IPv6 packets looking for TCP and UDP 237 * headers. 238 * 239 * Upon return the pointer at which the "ppth" argument points, is set 240 * to the location of the TCP header. NULL is used if no TCP header is 241 * present. 242 * 243 * The return value indicates the number of bytes from the beginning 244 * of the packet until the first byte after the TCP or UDP header. If 245 * this function returns zero, the parsing failed. 246 */ 247 int 248 mlx5e_get_full_header_size(struct mbuf *mb, struct tcphdr **ppth) 249 { 250 struct ether_vlan_header *eh; 251 struct tcphdr *th; 252 struct ip *ip; 253 int ip_hlen, tcp_hlen; 254 struct ip6_hdr *ip6; 255 uint16_t eth_type; 256 int eth_hdr_len; 257 258 eh = mtod(mb, struct ether_vlan_header *); 259 if (mb->m_len < ETHER_HDR_LEN) 260 goto failure; 261 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 262 if (mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) 263 goto failure; 264 eth_type = ntohs(eh->evl_proto); 265 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 266 } else { 267 eth_type = ntohs(eh->evl_encap_proto); 268 eth_hdr_len = ETHER_HDR_LEN; 269 } 270 271 switch (eth_type) { 272 case ETHERTYPE_IP: 273 ip = (struct ip *)(mb->m_data + eth_hdr_len); 274 if (mb->m_len < eth_hdr_len + sizeof(*ip)) 275 goto failure; 276 switch (ip->ip_p) { 277 case IPPROTO_TCP: 278 ip_hlen = ip->ip_hl << 2; 279 eth_hdr_len += ip_hlen; 280 goto tcp_packet; 281 case IPPROTO_UDP: 282 ip_hlen = ip->ip_hl << 2; 283 eth_hdr_len += ip_hlen + 8; 284 th = NULL; 285 goto udp_packet; 286 default: 287 goto failure; 288 } 289 break; 290 case ETHERTYPE_IPV6: 291 ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); 292 if (mb->m_len < eth_hdr_len + sizeof(*ip6)) 293 goto failure; 294 switch (ip6->ip6_nxt) { 295 case IPPROTO_TCP: 296 eth_hdr_len += sizeof(*ip6); 297 goto tcp_packet; 298 case IPPROTO_UDP: 299 eth_hdr_len += sizeof(*ip6) + 8; 300 th = NULL; 301 goto udp_packet; 302 default: 303 goto failure; 304 } 305 break; 306 default: 307 goto failure; 308 } 309 tcp_packet: 310 if (mb->m_len < eth_hdr_len + sizeof(*th)) 311 goto failure; 312 th = (struct tcphdr *)(mb->m_data + eth_hdr_len); 313 tcp_hlen = th->th_off << 2; 314 eth_hdr_len += tcp_hlen; 315 udp_packet: 316 /* 317 * m_copydata() will be used on the remaining header which 318 * does not need to reside within the first m_len bytes of 319 * data: 320 */ 321 if (mb->m_pkthdr.len < eth_hdr_len) 322 goto failure; 323 if (ppth != NULL) 324 *ppth = th; 325 return (eth_hdr_len); 326 failure: 327 if (ppth != NULL) 328 *ppth = NULL; 329 return (0); 330 } 331 332 struct mlx5_wqe_dump_seg { 333 struct mlx5_wqe_ctrl_seg ctrl; 334 struct mlx5_wqe_data_seg data; 335 } __aligned(MLX5_SEND_WQE_BB); 336 337 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); 338 339 int 340 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) 341 { 342 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 343 struct mlx5_wqe_dump_seg *wqe; 344 struct mlx5_wqe_dump_seg *wqe_last; 345 int nsegs; 346 int xsegs; 347 u32 off; 348 u32 msb; 349 int err; 350 int x; 351 struct mbuf *mb; 352 const u32 ds_cnt = 2; 353 u16 pi; 354 const u8 opcode = MLX5_OPCODE_DUMP; 355 356 /* get pointer to mbuf */ 357 mb = *mbp; 358 359 /* get producer index */ 360 pi = sq->pc & sq->wq.sz_m1; 361 362 sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; 363 sq->mbuf[pi].num_wqebbs = 0; 364 365 /* check number of segments in mbuf */ 366 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 367 mb, segs, &nsegs, BUS_DMA_NOWAIT); 368 if (err == EFBIG) { 369 /* update statistics */ 370 sq->stats.defragged++; 371 /* too many mbuf fragments */ 372 mb = m_defrag(*mbp, M_NOWAIT); 373 if (mb == NULL) { 374 mb = *mbp; 375 goto tx_drop; 376 } 377 /* try again */ 378 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 379 mb, segs, &nsegs, BUS_DMA_NOWAIT); 380 } 381 382 if (err != 0) 383 goto tx_drop; 384 385 /* make sure all mbuf data, if any, is visible to the bus */ 386 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 387 BUS_DMASYNC_PREWRITE); 388 389 /* compute number of real DUMP segments */ 390 msb = sq->priv->params_ethtool.hw_mtu_msb; 391 for (x = xsegs = 0; x != nsegs; x++) 392 xsegs += howmany((u32)segs[x].ds_len, msb); 393 394 /* check if there are no segments */ 395 if (unlikely(xsegs == 0)) { 396 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 397 m_freem(mb); 398 *mbp = NULL; /* safety clear */ 399 return (0); 400 } 401 402 /* return ENOBUFS if the queue is full */ 403 if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { 404 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 405 m_freem(mb); 406 *mbp = NULL; /* safety clear */ 407 return (ENOBUFS); 408 } 409 410 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 411 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); 412 413 for (x = 0; x != nsegs; x++) { 414 for (off = 0; off < segs[x].ds_len; off += msb) { 415 u32 len = segs[x].ds_len - off; 416 417 /* limit length */ 418 if (likely(len > msb)) 419 len = msb; 420 421 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 422 423 /* fill control segment */ 424 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 425 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 426 wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); 427 428 /* fill data segment */ 429 wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); 430 wqe->data.lkey = sq->mkey_be; 431 wqe->data.byte_count = cpu_to_be32(len); 432 433 /* advance to next building block */ 434 if (unlikely(wqe == wqe_last)) 435 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); 436 else 437 wqe++; 438 439 sq->mbuf[pi].num_wqebbs++; 440 sq->pc++; 441 } 442 } 443 444 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 445 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); 446 447 /* put in place data fence */ 448 wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; 449 450 /* check if we should generate a completion event */ 451 if (mlx5e_do_send_cqe_inline(sq)) 452 wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 453 454 /* copy data for doorbell */ 455 memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); 456 457 /* store pointer to mbuf */ 458 sq->mbuf[pi].mbuf = mb; 459 sq->mbuf[pi].p_refcount = parg->pref; 460 atomic_add_int(parg->pref, 1); 461 462 /* count all traffic going out */ 463 sq->stats.packets++; 464 sq->stats.bytes += sq->mbuf[pi].num_bytes; 465 466 *mbp = NULL; /* safety clear */ 467 return (0); 468 469 tx_drop: 470 sq->stats.dropped++; 471 *mbp = NULL; 472 m_freem(mb); 473 return err; 474 } 475 476 int 477 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 478 { 479 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 480 struct mlx5e_xmit_args args = {}; 481 struct mlx5_wqe_data_seg *dseg; 482 struct mlx5e_tx_wqe *wqe; 483 struct ifnet *ifp; 484 int nsegs; 485 int err; 486 int x; 487 struct mbuf *mb; 488 u16 ds_cnt; 489 u16 pi; 490 u8 opcode; 491 492 #ifdef KERN_TLS 493 top: 494 #endif 495 /* Return ENOBUFS if the queue is full */ 496 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) 497 return (ENOBUFS); 498 499 /* Align SQ edge with NOPs to avoid WQE wrap around */ 500 pi = ((~sq->pc) & sq->wq.sz_m1); 501 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 502 /* Send one multi NOP message instead of many */ 503 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 504 pi = ((~sq->pc) & sq->wq.sz_m1); 505 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) 506 return (ENOMEM); 507 } 508 509 #ifdef KERN_TLS 510 /* Special handling for TLS packets, if any */ 511 switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { 512 case MLX5E_TLS_LOOP: 513 goto top; 514 case MLX5E_TLS_FAILURE: 515 mb = *mbp; 516 err = ENOMEM; 517 goto tx_drop; 518 case MLX5E_TLS_DEFERRED: 519 return (0); 520 case MLX5E_TLS_CONTINUE: 521 default: 522 break; 523 } 524 #endif 525 526 /* Setup local variables */ 527 pi = sq->pc & sq->wq.sz_m1; 528 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 529 ifp = sq->ifp; 530 531 memset(wqe, 0, sizeof(*wqe)); 532 533 /* get pointer to mbuf */ 534 mb = *mbp; 535 536 /* Send a copy of the frame to the BPF listener, if any */ 537 if (ifp != NULL && ifp->if_bpf != NULL) 538 ETHER_BPF_MTAP(ifp, mb); 539 540 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 541 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 542 } 543 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 544 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 545 } 546 if (wqe->eth.cs_flags == 0) { 547 sq->stats.csum_offload_none++; 548 } 549 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 550 u32 payload_len; 551 u32 mss = mb->m_pkthdr.tso_segsz; 552 u32 num_pkts; 553 554 wqe->eth.mss = cpu_to_be16(mss); 555 opcode = MLX5_OPCODE_LSO; 556 if (args.ihs == 0) 557 args.ihs = mlx5e_get_full_header_size(mb, NULL); 558 if (unlikely(args.ihs == 0)) { 559 err = EINVAL; 560 goto tx_drop; 561 } 562 payload_len = mb->m_pkthdr.len - args.ihs; 563 if (payload_len == 0) 564 num_pkts = 1; 565 else 566 num_pkts = DIV_ROUND_UP(payload_len, mss); 567 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); 568 569 sq->stats.tso_packets++; 570 sq->stats.tso_bytes += payload_len; 571 } else { 572 opcode = MLX5_OPCODE_SEND; 573 574 if (args.ihs == 0) { 575 switch (sq->min_inline_mode) { 576 case MLX5_INLINE_MODE_IP: 577 case MLX5_INLINE_MODE_TCP_UDP: 578 args.ihs = mlx5e_get_full_header_size(mb, NULL); 579 if (unlikely(args.ihs == 0)) 580 args.ihs = mlx5e_get_l2_header_size(sq, mb); 581 break; 582 case MLX5_INLINE_MODE_L2: 583 args.ihs = mlx5e_get_l2_header_size(sq, mb); 584 break; 585 case MLX5_INLINE_MODE_NONE: 586 /* FALLTHROUGH */ 587 default: 588 if ((mb->m_flags & M_VLANTAG) != 0 && 589 (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { 590 /* inlining VLAN data is not required */ 591 wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ 592 wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); 593 args.ihs = 0; 594 } else if ((mb->m_flags & M_VLANTAG) == 0 && 595 (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { 596 /* inlining non-VLAN data is not required */ 597 args.ihs = 0; 598 } else { 599 /* we are forced to inlining L2 header, if any */ 600 args.ihs = mlx5e_get_l2_header_size(sq, mb); 601 } 602 break; 603 } 604 } 605 sq->mbuf[pi].num_bytes = max_t (unsigned int, 606 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 607 } 608 609 if (likely(args.ihs == 0)) { 610 /* nothing to inline */ 611 } else if (unlikely(args.ihs > sq->max_inline)) { 612 /* inline header size is too big */ 613 err = EINVAL; 614 goto tx_drop; 615 } else if ((mb->m_flags & M_VLANTAG) != 0) { 616 struct ether_vlan_header *eh = (struct ether_vlan_header *) 617 wqe->eth.inline_hdr_start; 618 619 /* Range checks */ 620 if (unlikely(args.ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))) 621 args.ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); 622 else if (unlikely(args.ihs < ETHER_HDR_LEN)) { 623 err = EINVAL; 624 goto tx_drop; 625 } 626 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 627 m_adj(mb, ETHER_HDR_LEN); 628 /* Insert 4 bytes VLAN tag into data stream */ 629 eh->evl_proto = eh->evl_encap_proto; 630 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 631 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 632 /* Copy rest of header data, if any */ 633 m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 634 m_adj(mb, args.ihs - ETHER_HDR_LEN); 635 /* Extend header by 4 bytes */ 636 args.ihs += ETHER_VLAN_ENCAP_LEN; 637 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 638 } else { 639 m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); 640 m_adj(mb, args.ihs); 641 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 642 } 643 644 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 645 if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { 646 ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), 647 MLX5_SEND_WQE_DS); 648 } 649 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 650 651 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 652 mb, segs, &nsegs, BUS_DMA_NOWAIT); 653 if (err == EFBIG) { 654 /* Update statistics */ 655 sq->stats.defragged++; 656 /* Too many mbuf fragments */ 657 mb = m_defrag(*mbp, M_NOWAIT); 658 if (mb == NULL) { 659 mb = *mbp; 660 goto tx_drop; 661 } 662 /* Try again */ 663 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 664 mb, segs, &nsegs, BUS_DMA_NOWAIT); 665 } 666 /* Catch errors */ 667 if (err != 0) 668 goto tx_drop; 669 670 /* Make sure all mbuf data, if any, is visible to the bus */ 671 if (nsegs != 0) { 672 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 673 BUS_DMASYNC_PREWRITE); 674 } else { 675 /* All data was inlined, free the mbuf. */ 676 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 677 m_freem(mb); 678 mb = NULL; 679 } 680 681 for (x = 0; x != nsegs; x++) { 682 if (segs[x].ds_len == 0) 683 continue; 684 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 685 dseg->lkey = sq->mkey_be; 686 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 687 dseg++; 688 } 689 690 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 691 692 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 693 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 694 wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); 695 696 if (mlx5e_do_send_cqe_inline(sq)) 697 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 698 else 699 wqe->ctrl.fm_ce_se = 0; 700 701 /* Copy data for doorbell */ 702 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 703 704 /* Store pointer to mbuf */ 705 sq->mbuf[pi].mbuf = mb; 706 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 707 sq->mbuf[pi].p_refcount = args.pref; 708 if (unlikely(args.pref != NULL)) 709 atomic_add_int(args.pref, 1); 710 sq->pc += sq->mbuf[pi].num_wqebbs; 711 712 /* Count all traffic going out */ 713 sq->stats.packets++; 714 sq->stats.bytes += sq->mbuf[pi].num_bytes; 715 716 *mbp = NULL; /* safety clear */ 717 return (0); 718 719 tx_drop: 720 sq->stats.dropped++; 721 *mbp = NULL; 722 m_freem(mb); 723 return err; 724 } 725 726 static void 727 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 728 { 729 u16 sqcc; 730 731 /* 732 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 733 * otherwise a cq overrun may occur 734 */ 735 sqcc = sq->cc; 736 737 while (budget > 0) { 738 struct mlx5_cqe64 *cqe; 739 struct mbuf *mb; 740 u16 x; 741 u16 ci; 742 743 cqe = mlx5e_get_cqe(&sq->cq); 744 if (!cqe) 745 break; 746 747 mlx5_cqwq_pop(&sq->cq.wq); 748 749 /* update budget according to the event factor */ 750 budget -= sq->cev_factor; 751 752 for (x = 0; x != sq->cev_factor; x++) { 753 ci = sqcc & sq->wq.sz_m1; 754 mb = sq->mbuf[ci].mbuf; 755 sq->mbuf[ci].mbuf = NULL; 756 757 if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { 758 atomic_add_int(sq->mbuf[ci].p_refcount, -1); 759 sq->mbuf[ci].p_refcount = NULL; 760 } 761 762 if (mb == NULL) { 763 if (sq->mbuf[ci].num_bytes == 0) { 764 /* NOP */ 765 sq->stats.nop++; 766 } 767 } else { 768 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 769 BUS_DMASYNC_POSTWRITE); 770 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 771 772 /* Free transmitted mbuf */ 773 m_freem(mb); 774 } 775 sqcc += sq->mbuf[ci].num_wqebbs; 776 } 777 } 778 779 mlx5_cqwq_update_db_record(&sq->cq.wq); 780 781 /* Ensure cq space is freed before enabling more cqes */ 782 atomic_thread_fence_rel(); 783 784 sq->cc = sqcc; 785 } 786 787 static int 788 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 789 { 790 int err = 0; 791 792 if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 793 READ_ONCE(sq->running) == 0)) { 794 m_freem(mb); 795 return (ENETDOWN); 796 } 797 798 /* Do transmit */ 799 if (mlx5e_sq_xmit(sq, &mb) != 0) { 800 /* NOTE: m_freem() is NULL safe */ 801 m_freem(mb); 802 err = ENOBUFS; 803 } 804 805 /* Check if we need to write the doorbell */ 806 if (likely(sq->doorbell.d64 != 0)) { 807 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 808 sq->doorbell.d64 = 0; 809 } 810 811 /* 812 * Check if we need to start the event timer which flushes the 813 * transmit ring on timeout: 814 */ 815 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 816 sq->cev_factor != 1)) { 817 /* start the timer */ 818 mlx5e_sq_cev_timeout(sq); 819 } else { 820 /* don't send NOPs yet */ 821 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 822 } 823 return (err); 824 } 825 826 int 827 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 828 { 829 struct mlx5e_sq *sq; 830 int ret; 831 832 if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { 833 MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); 834 sq = mlx5e_select_queue_by_send_tag(ifp, mb); 835 if (unlikely(sq == NULL)) { 836 goto select_queue; 837 } 838 } else { 839 select_queue: 840 sq = mlx5e_select_queue(ifp, mb); 841 if (unlikely(sq == NULL)) { 842 /* Free mbuf */ 843 m_freem(mb); 844 845 /* Invalid send queue */ 846 return (ENXIO); 847 } 848 } 849 850 mtx_lock(&sq->lock); 851 ret = mlx5e_xmit_locked(ifp, sq, mb); 852 mtx_unlock(&sq->lock); 853 854 return (ret); 855 } 856 857 void 858 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 859 { 860 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 861 862 mtx_lock(&sq->comp_lock); 863 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 864 mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); 865 mtx_unlock(&sq->comp_lock); 866 } 867