1 /*- 2 * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_kern_tls.h" 29 30 #include "en.h" 31 #include <machine/atomic.h> 32 33 static inline bool 34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq) 35 { 36 sq->cev_counter++; 37 /* interleave the CQEs */ 38 if (sq->cev_counter >= sq->cev_factor) { 39 sq->cev_counter = 0; 40 return (true); 41 } 42 return (false); 43 } 44 45 bool 46 mlx5e_do_send_cqe(struct mlx5e_sq *sq) 47 { 48 49 return (mlx5e_do_send_cqe_inline(sq)); 50 } 51 52 void 53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt) 54 { 55 u16 pi = sq->pc & sq->wq.sz_m1; 56 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 57 58 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 59 60 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 61 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 62 if (mlx5e_do_send_cqe_inline(sq)) 63 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 64 else 65 wqe->ctrl.fm_ce_se = 0; 66 67 /* Copy data for doorbell */ 68 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 69 70 sq->mbuf[pi].mbuf = NULL; 71 sq->mbuf[pi].num_bytes = 0; 72 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 73 sq->pc += sq->mbuf[pi].num_wqebbs; 74 } 75 76 #if (__FreeBSD_version >= 1100000) 77 static uint32_t mlx5e_hash_value; 78 79 static void 80 mlx5e_hash_init(void *arg) 81 { 82 mlx5e_hash_value = m_ether_tcpip_hash_init(); 83 } 84 85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */ 86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); 87 #endif 88 89 static struct mlx5e_sq * 90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb) 91 { 92 struct m_snd_tag *mb_tag; 93 struct mlx5e_snd_tag *ptag; 94 struct mlx5e_sq *sq; 95 96 mb_tag = mb->m_pkthdr.snd_tag; 97 98 #ifdef KERN_TLS 99 top: 100 #endif 101 /* get pointer to sendqueue */ 102 ptag = container_of(mb_tag, struct mlx5e_snd_tag, m_snd_tag); 103 104 switch (ptag->type) { 105 #ifdef RATELIMIT 106 case IF_SND_TAG_TYPE_RATE_LIMIT: 107 sq = container_of(ptag, 108 struct mlx5e_rl_channel, tag)->sq; 109 break; 110 #if defined(KERN_TLS) && defined(IF_SND_TAG_TYPE_TLS_RATE_LIMIT) 111 case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: 112 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 113 goto top; 114 #endif 115 #endif 116 case IF_SND_TAG_TYPE_UNLIMITED: 117 sq = &container_of(ptag, 118 struct mlx5e_channel, tag)->sq[0]; 119 KASSERT((ptag->m_snd_tag.refcount > 0), 120 ("mlx5e_select_queue: Channel refs are zero for unlimited tag")); 121 break; 122 #ifdef KERN_TLS 123 case IF_SND_TAG_TYPE_TLS: 124 mb_tag = container_of(ptag, struct mlx5e_tls_tag, tag)->rl_tag; 125 goto top; 126 #endif 127 default: 128 sq = NULL; 129 break; 130 } 131 132 /* check if valid */ 133 if (sq != NULL && READ_ONCE(sq->running) != 0) 134 return (sq); 135 136 return (NULL); 137 } 138 139 static struct mlx5e_sq * 140 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) 141 { 142 struct mlx5e_priv *priv = ifp->if_softc; 143 struct mlx5e_sq *sq; 144 u32 ch; 145 u32 tc; 146 147 /* obtain VLAN information if present */ 148 if (mb->m_flags & M_VLANTAG) { 149 tc = (mb->m_pkthdr.ether_vtag >> 13); 150 if (tc >= priv->num_tc) 151 tc = priv->default_vlan_prio; 152 } else { 153 tc = priv->default_vlan_prio; 154 } 155 156 ch = priv->params.num_channels; 157 158 /* check if flowid is set */ 159 if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { 160 #ifdef RSS 161 u32 temp; 162 163 if (rss_hash2bucket(mb->m_pkthdr.flowid, 164 M_HASHTYPE_GET(mb), &temp) == 0) 165 ch = temp % ch; 166 else 167 #endif 168 ch = (mb->m_pkthdr.flowid % 128) % ch; 169 } else { 170 #if (__FreeBSD_version >= 1100000) 171 ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | 172 MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; 173 #else 174 /* 175 * m_ether_tcpip_hash not present in stable, so just 176 * throw unhashed mbufs on queue 0 177 */ 178 ch = 0; 179 #endif 180 } 181 182 /* check if send queue is running */ 183 sq = &priv->channel[ch].sq[tc]; 184 if (likely(READ_ONCE(sq->running) != 0)) 185 return (sq); 186 return (NULL); 187 } 188 189 static inline u16 190 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb) 191 { 192 struct ether_vlan_header *eh; 193 uint16_t eth_type; 194 int min_inline; 195 196 eh = mtod(mb, struct ether_vlan_header *); 197 if (unlikely(mb->m_len < ETHER_HDR_LEN)) { 198 goto max_inline; 199 } else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 200 if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))) 201 goto max_inline; 202 eth_type = ntohs(eh->evl_proto); 203 min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 204 } else { 205 eth_type = ntohs(eh->evl_encap_proto); 206 min_inline = ETHER_HDR_LEN; 207 } 208 209 switch (eth_type) { 210 case ETHERTYPE_IP: 211 case ETHERTYPE_IPV6: 212 /* 213 * Make sure the TOS(IPv4) or traffic class(IPv6) 214 * field gets inlined. Else the SQ may stall. 215 */ 216 min_inline += 4; 217 break; 218 default: 219 goto max_inline; 220 } 221 222 /* 223 * m_copydata() will be used on the remaining header which 224 * does not need to reside within the first m_len bytes of 225 * data: 226 */ 227 if (mb->m_pkthdr.len < min_inline) 228 goto max_inline; 229 return (min_inline); 230 231 max_inline: 232 return (MIN(mb->m_pkthdr.len, sq->max_inline)); 233 } 234 235 /* 236 * This function parse IPv4 and IPv6 packets looking for TCP and UDP 237 * headers. 238 * 239 * Upon return the pointer at which the "ppth" argument points, is set 240 * to the location of the TCP header. NULL is used if no TCP header is 241 * present. 242 * 243 * The return value indicates the number of bytes from the beginning 244 * of the packet until the first byte after the TCP or UDP header. If 245 * this function returns zero, the parsing failed. 246 */ 247 int 248 mlx5e_get_full_header_size(struct mbuf *mb, struct tcphdr **ppth) 249 { 250 struct ether_vlan_header *eh; 251 struct tcphdr *th; 252 struct ip *ip; 253 int ip_hlen, tcp_hlen; 254 struct ip6_hdr *ip6; 255 uint16_t eth_type; 256 int eth_hdr_len; 257 258 eh = mtod(mb, struct ether_vlan_header *); 259 if (mb->m_len < ETHER_HDR_LEN) 260 goto failure; 261 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 262 if (mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) 263 goto failure; 264 eth_type = ntohs(eh->evl_proto); 265 eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 266 } else { 267 eth_type = ntohs(eh->evl_encap_proto); 268 eth_hdr_len = ETHER_HDR_LEN; 269 } 270 271 switch (eth_type) { 272 case ETHERTYPE_IP: 273 ip = (struct ip *)(mb->m_data + eth_hdr_len); 274 if (mb->m_len < eth_hdr_len + sizeof(*ip)) 275 goto failure; 276 switch (ip->ip_p) { 277 case IPPROTO_TCP: 278 ip_hlen = ip->ip_hl << 2; 279 eth_hdr_len += ip_hlen; 280 goto tcp_packet; 281 case IPPROTO_UDP: 282 ip_hlen = ip->ip_hl << 2; 283 eth_hdr_len += ip_hlen + 8; 284 th = NULL; 285 goto udp_packet; 286 default: 287 goto failure; 288 } 289 break; 290 case ETHERTYPE_IPV6: 291 ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); 292 if (mb->m_len < eth_hdr_len + sizeof(*ip6)) 293 goto failure; 294 switch (ip6->ip6_nxt) { 295 case IPPROTO_TCP: 296 eth_hdr_len += sizeof(*ip6); 297 goto tcp_packet; 298 case IPPROTO_UDP: 299 eth_hdr_len += sizeof(*ip6) + 8; 300 th = NULL; 301 goto udp_packet; 302 default: 303 goto failure; 304 } 305 break; 306 default: 307 goto failure; 308 } 309 tcp_packet: 310 if (mb->m_len < eth_hdr_len + sizeof(*th)) 311 goto failure; 312 th = (struct tcphdr *)(mb->m_data + eth_hdr_len); 313 tcp_hlen = th->th_off << 2; 314 eth_hdr_len += tcp_hlen; 315 udp_packet: 316 /* 317 * m_copydata() will be used on the remaining header which 318 * does not need to reside within the first m_len bytes of 319 * data: 320 */ 321 if (mb->m_pkthdr.len < eth_hdr_len) 322 goto failure; 323 if (ppth != NULL) 324 *ppth = th; 325 return (eth_hdr_len); 326 failure: 327 if (ppth != NULL) 328 *ppth = NULL; 329 return (0); 330 } 331 332 struct mlx5_wqe_dump_seg { 333 struct mlx5_wqe_ctrl_seg ctrl; 334 struct mlx5_wqe_data_seg data; 335 } __aligned(MLX5_SEND_WQE_BB); 336 337 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1); 338 339 int 340 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp) 341 { 342 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 343 struct mlx5_wqe_dump_seg *wqe; 344 struct mlx5_wqe_dump_seg *wqe_last; 345 int nsegs; 346 int xsegs; 347 u32 off; 348 u32 msb; 349 int err; 350 int x; 351 struct mbuf *mb; 352 const u32 ds_cnt = 2; 353 u16 pi; 354 const u8 opcode = MLX5_OPCODE_DUMP; 355 356 /* get pointer to mbuf */ 357 mb = *mbp; 358 359 /* get producer index */ 360 pi = sq->pc & sq->wq.sz_m1; 361 362 sq->mbuf[pi].num_bytes = mb->m_pkthdr.len; 363 sq->mbuf[pi].num_wqebbs = 0; 364 365 /* check number of segments in mbuf */ 366 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 367 mb, segs, &nsegs, BUS_DMA_NOWAIT); 368 if (err == EFBIG) { 369 /* update statistics */ 370 sq->stats.defragged++; 371 /* too many mbuf fragments */ 372 mb = m_defrag(*mbp, M_NOWAIT); 373 if (mb == NULL) { 374 mb = *mbp; 375 goto tx_drop; 376 } 377 /* try again */ 378 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 379 mb, segs, &nsegs, BUS_DMA_NOWAIT); 380 } 381 382 if (err != 0) 383 goto tx_drop; 384 385 /* make sure all mbuf data, if any, is visible to the bus */ 386 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 387 BUS_DMASYNC_PREWRITE); 388 389 /* compute number of real DUMP segments */ 390 msb = sq->priv->params_ethtool.hw_mtu_msb; 391 for (x = xsegs = 0; x != nsegs; x++) 392 xsegs += howmany((u32)segs[x].ds_len, msb); 393 394 /* check if there are no segments */ 395 if (unlikely(xsegs == 0)) { 396 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 397 m_freem(mb); 398 *mbp = NULL; /* safety clear */ 399 return (0); 400 } 401 402 /* return ENOBUFS if the queue is full */ 403 if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) { 404 sq->stats.enobuf++; 405 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 406 m_freem(mb); 407 *mbp = NULL; /* safety clear */ 408 return (ENOBUFS); 409 } 410 411 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 412 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1); 413 414 for (x = 0; x != nsegs; x++) { 415 for (off = 0; off < segs[x].ds_len; off += msb) { 416 u32 len = segs[x].ds_len - off; 417 418 /* limit length */ 419 if (likely(len > msb)) 420 len = msb; 421 422 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); 423 424 /* fill control segment */ 425 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 426 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 427 wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8); 428 429 /* fill data segment */ 430 wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off); 431 wqe->data.lkey = sq->mkey_be; 432 wqe->data.byte_count = cpu_to_be32(len); 433 434 /* advance to next building block */ 435 if (unlikely(wqe == wqe_last)) 436 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0); 437 else 438 wqe++; 439 440 sq->mbuf[pi].num_wqebbs++; 441 sq->pc++; 442 } 443 } 444 445 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 446 wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1); 447 448 /* put in place data fence */ 449 wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL; 450 451 /* check if we should generate a completion event */ 452 if (mlx5e_do_send_cqe_inline(sq)) 453 wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 454 455 /* copy data for doorbell */ 456 memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32)); 457 458 /* store pointer to mbuf */ 459 sq->mbuf[pi].mbuf = mb; 460 sq->mbuf[pi].p_refcount = parg->pref; 461 atomic_add_int(parg->pref, 1); 462 463 /* count all traffic going out */ 464 sq->stats.packets++; 465 sq->stats.bytes += sq->mbuf[pi].num_bytes; 466 467 *mbp = NULL; /* safety clear */ 468 return (0); 469 470 tx_drop: 471 sq->stats.dropped++; 472 *mbp = NULL; 473 m_freem(mb); 474 return err; 475 } 476 477 int 478 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) 479 { 480 bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; 481 struct mlx5e_xmit_args args = {}; 482 struct mlx5_wqe_data_seg *dseg; 483 struct mlx5e_tx_wqe *wqe; 484 struct ifnet *ifp; 485 int nsegs; 486 int err; 487 int x; 488 struct mbuf *mb; 489 u16 ds_cnt; 490 u16 pi; 491 u8 opcode; 492 493 #ifdef KERN_TLS 494 top: 495 #endif 496 /* Return ENOBUFS if the queue is full */ 497 if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { 498 sq->stats.enobuf++; 499 return (ENOBUFS); 500 } 501 502 /* Align SQ edge with NOPs to avoid WQE wrap around */ 503 pi = ((~sq->pc) & sq->wq.sz_m1); 504 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 505 /* Send one multi NOP message instead of many */ 506 mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS); 507 pi = ((~sq->pc) & sq->wq.sz_m1); 508 if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { 509 sq->stats.enobuf++; 510 return (ENOMEM); 511 } 512 } 513 514 #ifdef KERN_TLS 515 /* Special handling for TLS packets, if any */ 516 switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) { 517 case MLX5E_TLS_LOOP: 518 goto top; 519 case MLX5E_TLS_FAILURE: 520 mb = *mbp; 521 err = ENOMEM; 522 goto tx_drop; 523 case MLX5E_TLS_DEFERRED: 524 return (0); 525 case MLX5E_TLS_CONTINUE: 526 default: 527 break; 528 } 529 #endif 530 531 /* Setup local variables */ 532 pi = sq->pc & sq->wq.sz_m1; 533 wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 534 ifp = sq->ifp; 535 536 memset(wqe, 0, sizeof(*wqe)); 537 538 /* get pointer to mbuf */ 539 mb = *mbp; 540 541 /* Send a copy of the frame to the BPF listener, if any */ 542 if (ifp != NULL && ifp->if_bpf != NULL) 543 ETHER_BPF_MTAP(ifp, mb); 544 545 if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { 546 wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; 547 } 548 if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { 549 wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; 550 } 551 if (wqe->eth.cs_flags == 0) { 552 sq->stats.csum_offload_none++; 553 } 554 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 555 u32 payload_len; 556 u32 mss = mb->m_pkthdr.tso_segsz; 557 u32 num_pkts; 558 559 wqe->eth.mss = cpu_to_be16(mss); 560 opcode = MLX5_OPCODE_LSO; 561 if (args.ihs == 0) 562 args.ihs = mlx5e_get_full_header_size(mb, NULL); 563 if (unlikely(args.ihs == 0)) { 564 err = EINVAL; 565 goto tx_drop; 566 } 567 payload_len = mb->m_pkthdr.len - args.ihs; 568 if (payload_len == 0) 569 num_pkts = 1; 570 else 571 num_pkts = DIV_ROUND_UP(payload_len, mss); 572 sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); 573 574 sq->stats.tso_packets++; 575 sq->stats.tso_bytes += payload_len; 576 } else { 577 opcode = MLX5_OPCODE_SEND; 578 579 if (args.ihs == 0) { 580 switch (sq->min_inline_mode) { 581 case MLX5_INLINE_MODE_IP: 582 case MLX5_INLINE_MODE_TCP_UDP: 583 args.ihs = mlx5e_get_full_header_size(mb, NULL); 584 if (unlikely(args.ihs == 0)) 585 args.ihs = mlx5e_get_l2_header_size(sq, mb); 586 break; 587 case MLX5_INLINE_MODE_L2: 588 args.ihs = mlx5e_get_l2_header_size(sq, mb); 589 break; 590 case MLX5_INLINE_MODE_NONE: 591 /* FALLTHROUGH */ 592 default: 593 if ((mb->m_flags & M_VLANTAG) != 0 && 594 (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) { 595 /* inlining VLAN data is not required */ 596 wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */ 597 wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag); 598 args.ihs = 0; 599 } else if ((mb->m_flags & M_VLANTAG) == 0 && 600 (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) { 601 /* inlining non-VLAN data is not required */ 602 args.ihs = 0; 603 } else { 604 /* we are forced to inlining L2 header, if any */ 605 args.ihs = mlx5e_get_l2_header_size(sq, mb); 606 } 607 break; 608 } 609 } 610 sq->mbuf[pi].num_bytes = max_t (unsigned int, 611 mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); 612 } 613 614 if (likely(args.ihs == 0)) { 615 /* nothing to inline */ 616 } else if ((mb->m_flags & M_VLANTAG) != 0) { 617 struct ether_vlan_header *eh = (struct ether_vlan_header *) 618 wqe->eth.inline_hdr_start; 619 620 /* Range checks */ 621 if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { 622 if (mb->m_pkthdr.csum_flags & CSUM_TSO) { 623 err = EINVAL; 624 goto tx_drop; 625 } 626 args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN); 627 } else if (unlikely(args.ihs < ETHER_HDR_LEN)) { 628 err = EINVAL; 629 goto tx_drop; 630 } 631 m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); 632 m_adj(mb, ETHER_HDR_LEN); 633 /* Insert 4 bytes VLAN tag into data stream */ 634 eh->evl_proto = eh->evl_encap_proto; 635 eh->evl_encap_proto = htons(ETHERTYPE_VLAN); 636 eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); 637 /* Copy rest of header data, if any */ 638 m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); 639 m_adj(mb, args.ihs - ETHER_HDR_LEN); 640 /* Extend header by 4 bytes */ 641 args.ihs += ETHER_VLAN_ENCAP_LEN; 642 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 643 } else { 644 /* check if inline header size is too big */ 645 if (unlikely(args.ihs > sq->max_inline)) { 646 if (unlikely(mb->m_pkthdr.csum_flags & CSUM_TSO)) { 647 err = EINVAL; 648 goto tx_drop; 649 } 650 args.ihs = sq->max_inline; 651 } 652 m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start); 653 m_adj(mb, args.ihs); 654 wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs); 655 } 656 657 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 658 if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) { 659 ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start), 660 MLX5_SEND_WQE_DS); 661 } 662 dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; 663 664 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 665 mb, segs, &nsegs, BUS_DMA_NOWAIT); 666 if (err == EFBIG) { 667 /* Update statistics */ 668 sq->stats.defragged++; 669 /* Too many mbuf fragments */ 670 mb = m_defrag(*mbp, M_NOWAIT); 671 if (mb == NULL) { 672 mb = *mbp; 673 goto tx_drop; 674 } 675 /* Try again */ 676 err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, 677 mb, segs, &nsegs, BUS_DMA_NOWAIT); 678 } 679 /* Catch errors */ 680 if (err != 0) 681 goto tx_drop; 682 683 /* Make sure all mbuf data, if any, is visible to the bus */ 684 if (nsegs != 0) { 685 bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, 686 BUS_DMASYNC_PREWRITE); 687 } else { 688 /* All data was inlined, free the mbuf. */ 689 bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map); 690 m_freem(mb); 691 mb = NULL; 692 } 693 694 for (x = 0; x != nsegs; x++) { 695 if (segs[x].ds_len == 0) 696 continue; 697 dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); 698 dseg->lkey = sq->mkey_be; 699 dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); 700 dseg++; 701 } 702 703 ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); 704 705 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 706 wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 707 wqe->ctrl.imm = cpu_to_be32(args.tisn << 8); 708 709 if (mlx5e_do_send_cqe_inline(sq)) 710 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 711 else 712 wqe->ctrl.fm_ce_se = 0; 713 714 /* Copy data for doorbell */ 715 memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32)); 716 717 /* Store pointer to mbuf */ 718 sq->mbuf[pi].mbuf = mb; 719 sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 720 sq->mbuf[pi].p_refcount = args.pref; 721 if (unlikely(args.pref != NULL)) 722 atomic_add_int(args.pref, 1); 723 sq->pc += sq->mbuf[pi].num_wqebbs; 724 725 /* Count all traffic going out */ 726 sq->stats.packets++; 727 sq->stats.bytes += sq->mbuf[pi].num_bytes; 728 729 *mbp = NULL; /* safety clear */ 730 return (0); 731 732 tx_drop: 733 sq->stats.dropped++; 734 *mbp = NULL; 735 m_freem(mb); 736 return err; 737 } 738 739 static void 740 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) 741 { 742 u16 sqcc; 743 744 /* 745 * sq->cc must be updated only after mlx5_cqwq_update_db_record(), 746 * otherwise a cq overrun may occur 747 */ 748 sqcc = sq->cc; 749 750 while (budget > 0) { 751 struct mlx5_cqe64 *cqe; 752 struct mbuf *mb; 753 u16 x; 754 u16 ci; 755 756 cqe = mlx5e_get_cqe(&sq->cq); 757 if (!cqe) 758 break; 759 760 mlx5_cqwq_pop(&sq->cq.wq); 761 762 /* update budget according to the event factor */ 763 budget -= sq->cev_factor; 764 765 for (x = 0; x != sq->cev_factor; x++) { 766 ci = sqcc & sq->wq.sz_m1; 767 mb = sq->mbuf[ci].mbuf; 768 sq->mbuf[ci].mbuf = NULL; 769 770 if (unlikely(sq->mbuf[ci].p_refcount != NULL)) { 771 atomic_add_int(sq->mbuf[ci].p_refcount, -1); 772 sq->mbuf[ci].p_refcount = NULL; 773 } 774 775 if (mb == NULL) { 776 if (sq->mbuf[ci].num_bytes == 0) { 777 /* NOP */ 778 sq->stats.nop++; 779 } 780 } else { 781 bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, 782 BUS_DMASYNC_POSTWRITE); 783 bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); 784 785 /* Free transmitted mbuf */ 786 m_freem(mb); 787 } 788 sqcc += sq->mbuf[ci].num_wqebbs; 789 } 790 } 791 792 mlx5_cqwq_update_db_record(&sq->cq.wq); 793 794 /* Ensure cq space is freed before enabling more cqes */ 795 atomic_thread_fence_rel(); 796 797 sq->cc = sqcc; 798 } 799 800 static int 801 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) 802 { 803 int err = 0; 804 805 if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 806 READ_ONCE(sq->running) == 0)) { 807 m_freem(mb); 808 return (ENETDOWN); 809 } 810 811 /* Do transmit */ 812 if (mlx5e_sq_xmit(sq, &mb) != 0) { 813 /* NOTE: m_freem() is NULL safe */ 814 m_freem(mb); 815 err = ENOBUFS; 816 } 817 818 /* Check if we need to write the doorbell */ 819 if (likely(sq->doorbell.d64 != 0)) { 820 mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); 821 sq->doorbell.d64 = 0; 822 } 823 824 /* 825 * Check if we need to start the event timer which flushes the 826 * transmit ring on timeout: 827 */ 828 if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && 829 sq->cev_factor != 1)) { 830 /* start the timer */ 831 mlx5e_sq_cev_timeout(sq); 832 } else { 833 /* don't send NOPs yet */ 834 sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; 835 } 836 return (err); 837 } 838 839 int 840 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) 841 { 842 struct mlx5e_sq *sq; 843 int ret; 844 845 if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) { 846 MPASS(mb->m_pkthdr.snd_tag->ifp == ifp); 847 sq = mlx5e_select_queue_by_send_tag(ifp, mb); 848 if (unlikely(sq == NULL)) { 849 goto select_queue; 850 } 851 } else { 852 select_queue: 853 sq = mlx5e_select_queue(ifp, mb); 854 if (unlikely(sq == NULL)) { 855 /* Free mbuf */ 856 m_freem(mb); 857 858 /* Invalid send queue */ 859 return (ENXIO); 860 } 861 } 862 863 mtx_lock(&sq->lock); 864 ret = mlx5e_xmit_locked(ifp, sq, mb); 865 mtx_unlock(&sq->lock); 866 867 return (ret); 868 } 869 870 void 871 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) 872 { 873 struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); 874 875 mtx_lock(&sq->comp_lock); 876 mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); 877 mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); 878 mtx_unlock(&sq->comp_lock); 879 } 880