xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision fe75646a0234a261c0013bf1840fdac4acaf0cec)
1 /*-
2  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
3  * Copyright (c) 2022 NVIDIA corporation & affiliates.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_kern_tls.h"
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <dev/mlx5/mlx5_en/en.h>
32 #include <machine/atomic.h>
33 
34 static inline bool
35 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
36 {
37 	sq->cev_counter++;
38 	/* interleave the CQEs */
39 	if (sq->cev_counter >= sq->cev_factor) {
40 		sq->cev_counter = 0;
41 		return (true);
42 	}
43 	return (false);
44 }
45 
46 bool
47 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
48 {
49 
50 	return (mlx5e_do_send_cqe_inline(sq));
51 }
52 
53 void
54 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
55 {
56 	u16 pi = sq->pc & sq->wq.sz_m1;
57 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
58 
59 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
60 
61 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
62 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
63 	if (mlx5e_do_send_cqe_inline(sq))
64 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
65 	else
66 		wqe->ctrl.fm_ce_se = 0;
67 
68 	/* Copy data for doorbell */
69 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
70 
71 	sq->mbuf[pi].mbuf = NULL;
72 	sq->mbuf[pi].num_bytes = 0;
73 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
74 	sq->pc += sq->mbuf[pi].num_wqebbs;
75 }
76 
77 static uint32_t mlx5e_hash_value;
78 
79 static void
80 mlx5e_hash_init(void *arg)
81 {
82 	mlx5e_hash_value = m_ether_tcpip_hash_init();
83 }
84 
85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
87 
88 static struct mlx5e_sq *
89 mlx5e_select_queue_by_send_tag(if_t ifp, struct mbuf *mb)
90 {
91 	struct m_snd_tag *mb_tag;
92 	struct mlx5e_sq *sq;
93 
94 	mb_tag = mb->m_pkthdr.snd_tag;
95 
96 #ifdef KERN_TLS
97 top:
98 #endif
99 	/* get pointer to sendqueue */
100 	switch (mb_tag->sw->type) {
101 #ifdef RATELIMIT
102 	case IF_SND_TAG_TYPE_RATE_LIMIT:
103 		sq = container_of(mb_tag,
104 		    struct mlx5e_rl_channel, tag)->sq;
105 		break;
106 #ifdef KERN_TLS
107 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
108 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
109 		goto top;
110 #endif
111 #endif
112 	case IF_SND_TAG_TYPE_UNLIMITED:
113 		sq = &container_of(mb_tag,
114 		    struct mlx5e_channel, tag)->sq[0];
115 		KASSERT((mb_tag->refcount > 0),
116 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
117 		break;
118 #ifdef KERN_TLS
119 	case IF_SND_TAG_TYPE_TLS:
120 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
121 		goto top;
122 #endif
123 	default:
124 		sq = NULL;
125 		break;
126 	}
127 
128 	/* check if valid */
129 	if (sq != NULL && READ_ONCE(sq->running) != 0)
130 		return (sq);
131 
132 	return (NULL);
133 }
134 
135 static struct mlx5e_sq *
136 mlx5e_select_queue(if_t ifp, struct mbuf *mb)
137 {
138 	struct mlx5e_priv *priv = if_getsoftc(ifp);
139 	struct mlx5e_sq *sq;
140 	u32 ch;
141 	u32 tc;
142 
143 	/* obtain VLAN information if present */
144 	if (mb->m_flags & M_VLANTAG) {
145 		tc = (mb->m_pkthdr.ether_vtag >> 13);
146 		if (tc >= priv->num_tc)
147 			tc = priv->default_vlan_prio;
148 	} else {
149 		tc = priv->default_vlan_prio;
150 	}
151 
152 	ch = priv->params.num_channels;
153 
154 	/* check if flowid is set */
155 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
156 #ifdef RSS
157 		u32 temp;
158 
159 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
160 		    M_HASHTYPE_GET(mb), &temp) == 0)
161 			ch = temp % ch;
162 		else
163 #endif
164 			ch = (mb->m_pkthdr.flowid % 128) % ch;
165 	} else {
166 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
167 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
168 	}
169 
170 	/* check if send queue is running */
171 	sq = &priv->channel[ch].sq[tc];
172 	if (likely(READ_ONCE(sq->running) != 0))
173 		return (sq);
174 	return (NULL);
175 }
176 
177 static inline u16
178 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
179 {
180 	struct ether_vlan_header *eh;
181 	uint16_t eth_type;
182 	int min_inline;
183 
184 	eh = mtod(mb, struct ether_vlan_header *);
185 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
186 		goto max_inline;
187 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
188 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
189 			goto max_inline;
190 		eth_type = ntohs(eh->evl_proto);
191 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
192 	} else {
193 		eth_type = ntohs(eh->evl_encap_proto);
194 		min_inline = ETHER_HDR_LEN;
195 	}
196 
197 	switch (eth_type) {
198 	case ETHERTYPE_IP:
199 	case ETHERTYPE_IPV6:
200 		/*
201 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
202 		 * field gets inlined. Else the SQ may stall.
203 		 */
204 		min_inline += 4;
205 		break;
206 	default:
207 		goto max_inline;
208 	}
209 
210 	/*
211 	 * m_copydata() will be used on the remaining header which
212 	 * does not need to reside within the first m_len bytes of
213 	 * data:
214 	 */
215 	if (mb->m_pkthdr.len < min_inline)
216 		goto max_inline;
217 	return (min_inline);
218 
219 max_inline:
220 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
221 }
222 
223 /*
224  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
225  * headers.
226  *
227  * Upon return the pointer at which the "ppth" argument points, is set
228  * to the location of the TCP header. NULL is used if no TCP header is
229  * present.
230  *
231  * The return value indicates the number of bytes from the beginning
232  * of the packet until the first byte after the TCP or UDP header. If
233  * this function returns zero, the parsing failed.
234  */
235 int
236 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
237 {
238 	const struct ether_vlan_header *eh;
239 	const struct tcphdr *th;
240 	const struct ip *ip;
241 	int ip_hlen, tcp_hlen;
242 	const struct ip6_hdr *ip6;
243 	uint16_t eth_type;
244 	int eth_hdr_len;
245 
246 	eh = mtod(mb, const struct ether_vlan_header *);
247 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
248 		goto failure;
249 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
250 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
251 			goto failure;
252 		eth_type = ntohs(eh->evl_proto);
253 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
254 	} else {
255 		eth_type = ntohs(eh->evl_encap_proto);
256 		eth_hdr_len = ETHER_HDR_LEN;
257 	}
258 
259 	switch (eth_type) {
260 	case ETHERTYPE_IP:
261 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
262 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
263 			goto failure;
264 		switch (ip->ip_p) {
265 		case IPPROTO_TCP:
266 			ip_hlen = ip->ip_hl << 2;
267 			eth_hdr_len += ip_hlen;
268 			goto tcp_packet;
269 		case IPPROTO_UDP:
270 			ip_hlen = ip->ip_hl << 2;
271 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
272 			th = NULL;
273 			goto udp_packet;
274 		default:
275 			goto failure;
276 		}
277 		break;
278 	case ETHERTYPE_IPV6:
279 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
280 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
281 			goto failure;
282 		switch (ip6->ip6_nxt) {
283 		case IPPROTO_TCP:
284 			eth_hdr_len += sizeof(*ip6);
285 			goto tcp_packet;
286 		case IPPROTO_UDP:
287 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
288 			th = NULL;
289 			goto udp_packet;
290 		default:
291 			goto failure;
292 		}
293 		break;
294 	default:
295 		goto failure;
296 	}
297 tcp_packet:
298 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
299 		const struct mbuf *m_th = mb->m_next;
300 		if (unlikely(mb->m_len != eth_hdr_len ||
301 		    m_th == NULL || m_th->m_len < sizeof(*th)))
302 			goto failure;
303 		th = (const struct tcphdr *)(m_th->m_data);
304 	} else {
305 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
306 	}
307 	tcp_hlen = th->th_off << 2;
308 	eth_hdr_len += tcp_hlen;
309 udp_packet:
310 	/*
311 	 * m_copydata() will be used on the remaining header which
312 	 * does not need to reside within the first m_len bytes of
313 	 * data:
314 	 */
315 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
316 		goto failure;
317 	if (ppth != NULL)
318 		*ppth = th;
319 	return (eth_hdr_len);
320 failure:
321 	if (ppth != NULL)
322 		*ppth = NULL;
323 	return (0);
324 }
325 
326 /*
327  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
328  */
329 static inline void *
330 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
331     int min_len)
332 {
333 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
334 		poffset[0] = eth_hdr_len;
335 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
336 			return (NULL);
337 	}
338 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
339 		return (NULL);
340 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
341 }
342 
343 /*
344  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
345  * and TCP headers.
346  *
347  * The return value indicates the number of bytes from the beginning
348  * of the packet until the first byte after the TCP header. If this
349  * function returns zero, the parsing failed.
350  */
351 static int
352 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
353     uint8_t cs_mask, uint8_t opcode)
354 {
355 	const struct ether_vlan_header *eh;
356 	struct ip *ip4;
357 	struct ip6_hdr *ip6;
358 	struct tcphdr *th;
359 	struct udphdr *udp;
360 	bool has_outer_vlan_tag;
361 	uint16_t eth_type;
362 	uint8_t ip_type;
363 	int pkt_hdr_len;
364 	int eth_hdr_len;
365 	int tcp_hlen;
366 	int ip_hlen;
367 	int offset;
368 
369 	pkt_hdr_len = mb->m_pkthdr.len;
370 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
371 	offset = 0;
372 
373 	eh = mtod(mb, const struct ether_vlan_header *);
374 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
375 		return (0);
376 
377 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
378 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
379 			return (0);
380 		eth_type = eh->evl_proto;
381 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
382 	} else {
383 		eth_type = eh->evl_encap_proto;
384 		eth_hdr_len = ETHER_HDR_LEN;
385 	}
386 
387 	switch (eth_type) {
388 	case htons(ETHERTYPE_IP):
389 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
390 		    sizeof(*ip4));
391 		if (unlikely(ip4 == NULL))
392 			return (0);
393 		ip_type = ip4->ip_p;
394 		if (unlikely(ip_type != IPPROTO_UDP))
395 			return (0);
396 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
397 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
398 		ip_hlen = ip4->ip_hl << 2;
399 		eth_hdr_len += ip_hlen;
400 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
401 		    sizeof(*udp));
402 		if (unlikely(udp == NULL))
403 			return (0);
404 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
405 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
406 		eth_hdr_len += sizeof(*udp);
407 		break;
408 	case htons(ETHERTYPE_IPV6):
409 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
410 		    sizeof(*ip6));
411 		if (unlikely(ip6 == NULL))
412 			return (0);
413 		ip_type = ip6->ip6_nxt;
414 		if (unlikely(ip_type != IPPROTO_UDP))
415 			return (0);
416 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
417 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
418 		eth_hdr_len += sizeof(*ip6);
419 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
420 		    sizeof(*udp));
421 		if (unlikely(udp == NULL))
422 			return (0);
423 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
424 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
425 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
426 		eth_hdr_len += sizeof(*udp);
427 		break;
428 	default:
429 		return (0);
430 	}
431 
432 	/*
433 	 * If the hardware is not computing inner IP checksum, then
434 	 * skip inlining the inner outer UDP and VXLAN header:
435 	 */
436 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
437 		goto done;
438 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
439 	    8) == NULL))
440 		return (0);
441 	eth_hdr_len += 8;
442 
443 	/* Check for ethernet header again. */
444 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
445 	if (unlikely(eh == NULL))
446 		return (0);
447 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
448 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
449 		    ETHER_VLAN_ENCAP_LEN))
450 			return (0);
451 		eth_type = eh->evl_proto;
452 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
453 	} else {
454 		eth_type = eh->evl_encap_proto;
455 		eth_hdr_len += ETHER_HDR_LEN;
456 	}
457 
458 	/* Check for IP header again. */
459 	switch (eth_type) {
460 	case htons(ETHERTYPE_IP):
461 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
462 		    sizeof(*ip4));
463 		if (unlikely(ip4 == NULL))
464 			return (0);
465 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
466 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
467 		ip_type = ip4->ip_p;
468 		ip_hlen = ip4->ip_hl << 2;
469 		eth_hdr_len += ip_hlen;
470 		break;
471 	case htons(ETHERTYPE_IPV6):
472 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
473 		    sizeof(*ip6));
474 		if (unlikely(ip6 == NULL))
475 			return (0);
476 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
477 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
478 		ip_type = ip6->ip6_nxt;
479 		eth_hdr_len += sizeof(*ip6);
480 		break;
481 	default:
482 		return (0);
483 	}
484 
485 	/*
486 	 * If the hardware is not computing inner UDP/TCP checksum,
487 	 * then skip inlining the inner UDP/TCP header:
488 	 */
489 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
490 		goto done;
491 
492 	switch (ip_type) {
493 	case IPPROTO_UDP:
494 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
495 		    sizeof(*udp));
496 		if (unlikely(udp == NULL))
497 			return (0);
498 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
499 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
500 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
501 		eth_hdr_len += sizeof(*udp);
502 		break;
503 	case IPPROTO_TCP:
504 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
505 		    sizeof(*th));
506 		if (unlikely(th == NULL))
507 			return (0);
508 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
509 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
510 		tcp_hlen = th->th_off << 2;
511 		eth_hdr_len += tcp_hlen;
512 		break;
513 	default:
514 		return (0);
515 	}
516 done:
517 	if (unlikely(pkt_hdr_len < eth_hdr_len))
518 		return (0);
519 
520 	/* Account for software inserted VLAN tag, if any. */
521 	if (unlikely(has_outer_vlan_tag)) {
522 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
523 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
524 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
525 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
526 	}
527 
528 	/*
529 	 * When inner checksums are set, outer L4 checksum flag must
530 	 * be disabled.
531 	 */
532 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
533 	    MLX5_ETH_WQE_L4_INNER_CSUM))
534 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
535 
536 	return (eth_hdr_len);
537 }
538 
539 struct mlx5_wqe_dump_seg {
540 	struct mlx5_wqe_ctrl_seg ctrl;
541 	struct mlx5_wqe_data_seg data;
542 } __aligned(MLX5_SEND_WQE_BB);
543 
544 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
545 
546 int
547 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
548 {
549 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
550 	struct mlx5_wqe_dump_seg *wqe;
551 	struct mlx5_wqe_dump_seg *wqe_last;
552 	int nsegs;
553 	int xsegs;
554 	u32 off;
555 	u32 msb;
556 	int err;
557 	int x;
558 	struct mbuf *mb;
559 	const u32 ds_cnt = 2;
560 	u16 pi;
561 	const u8 opcode = MLX5_OPCODE_DUMP;
562 
563 	/* get pointer to mbuf */
564 	mb = *mbp;
565 
566 	/* get producer index */
567 	pi = sq->pc & sq->wq.sz_m1;
568 
569 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
570 	sq->mbuf[pi].num_wqebbs = 0;
571 
572 	/* check number of segments in mbuf */
573 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
574 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
575 	if (err == EFBIG) {
576 		/* update statistics */
577 		sq->stats.defragged++;
578 		/* too many mbuf fragments */
579 		mb = m_defrag(*mbp, M_NOWAIT);
580 		if (mb == NULL) {
581 			mb = *mbp;
582 			goto tx_drop;
583 		}
584 		/* try again */
585 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
586 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
587 	}
588 
589 	if (err != 0)
590 		goto tx_drop;
591 
592 	/* make sure all mbuf data, if any, is visible to the bus */
593 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
594 	    BUS_DMASYNC_PREWRITE);
595 
596 	/* compute number of real DUMP segments */
597 	msb = sq->priv->params_ethtool.hw_mtu_msb;
598 	for (x = xsegs = 0; x != nsegs; x++)
599 		xsegs += howmany((u32)segs[x].ds_len, msb);
600 
601 	/* check if there are no segments */
602 	if (unlikely(xsegs == 0)) {
603 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
604 		m_freem(mb);
605 		*mbp = NULL;	/* safety clear */
606 		return (0);
607 	}
608 
609 	/* return ENOBUFS if the queue is full */
610 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
611 		sq->stats.enobuf++;
612 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
613 		m_freem(mb);
614 		*mbp = NULL;	/* safety clear */
615 		return (ENOBUFS);
616 	}
617 
618 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
619 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
620 
621 	for (x = 0; x != nsegs; x++) {
622 		for (off = 0; off < segs[x].ds_len; off += msb) {
623 			u32 len = segs[x].ds_len - off;
624 
625 			/* limit length */
626 			if (likely(len > msb))
627 				len = msb;
628 
629 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
630 
631 			/* fill control segment */
632 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
633 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
634 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
635 
636 			/* fill data segment */
637 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
638 			wqe->data.lkey = sq->mkey_be;
639 			wqe->data.byte_count = cpu_to_be32(len);
640 
641 			/* advance to next building block */
642 			if (unlikely(wqe == wqe_last))
643 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
644 			else
645 				wqe++;
646 
647 			sq->mbuf[pi].num_wqebbs++;
648 			sq->pc++;
649 		}
650 	}
651 
652 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
653 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
654 
655 	/* put in place data fence */
656 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
657 
658 	/* check if we should generate a completion event */
659 	if (mlx5e_do_send_cqe_inline(sq))
660 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
661 
662 	/* copy data for doorbell */
663 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
664 
665 	/* store pointer to mbuf */
666 	sq->mbuf[pi].mbuf = mb;
667 	sq->mbuf[pi].mst = m_snd_tag_ref(parg->mst);
668 
669 	/* count all traffic going out */
670 	sq->stats.packets++;
671 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
672 
673 	*mbp = NULL;	/* safety clear */
674 	return (0);
675 
676 tx_drop:
677 	sq->stats.dropped++;
678 	*mbp = NULL;
679 	m_freem(mb);
680 	return err;
681 }
682 
683 int
684 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
685 {
686 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
687 	struct mlx5e_xmit_args args = {};
688 	struct mlx5_wqe_data_seg *dseg;
689 	struct mlx5e_tx_wqe *wqe;
690 	if_t ifp;
691 	int nsegs;
692 	int err;
693 	int x;
694 	struct mbuf *mb;
695 	u16 ds_cnt;
696 	u16 pi;
697 	u8 opcode;
698 
699 #ifdef KERN_TLS
700 top:
701 #endif
702 	/* Return ENOBUFS if the queue is full */
703 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
704 		sq->stats.enobuf++;
705 		return (ENOBUFS);
706 	}
707 
708 	/* Align SQ edge with NOPs to avoid WQE wrap around */
709 	pi = ((~sq->pc) & sq->wq.sz_m1);
710 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
711 		/* Send one multi NOP message instead of many */
712 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
713 		pi = ((~sq->pc) & sq->wq.sz_m1);
714 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
715 			sq->stats.enobuf++;
716 			return (ENOMEM);
717 		}
718 	}
719 
720 #ifdef KERN_TLS
721 	/* Special handling for TLS packets, if any */
722 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
723 	case MLX5E_TLS_LOOP:
724 		goto top;
725 	case MLX5E_TLS_FAILURE:
726 		mb = *mbp;
727 		err = ENOMEM;
728 		goto tx_drop;
729 	case MLX5E_TLS_DEFERRED:
730 		return (0);
731 	case MLX5E_TLS_CONTINUE:
732 	default:
733 		break;
734 	}
735 #endif
736 
737 	/* Setup local variables */
738 	pi = sq->pc & sq->wq.sz_m1;
739 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
740 	ifp = sq->ifp;
741 
742 	memset(wqe, 0, sizeof(*wqe));
743 
744 	/* get pointer to mbuf */
745 	mb = *mbp;
746 
747 	/* Send a copy of the frame to the BPF listener, if any */
748 	if (ifp != NULL && if_getbpf(ifp) != NULL)
749 		ETHER_BPF_MTAP(ifp, mb);
750 
751 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
752 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
753 	}
754 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
755 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
756 	}
757 	if (wqe->eth.cs_flags == 0) {
758 		sq->stats.csum_offload_none++;
759 	}
760 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
761 		u32 payload_len;
762 		u32 mss = mb->m_pkthdr.tso_segsz;
763 		u32 num_pkts;
764 
765 		wqe->eth.mss = cpu_to_be16(mss);
766 		opcode = MLX5_OPCODE_LSO;
767 		if (args.ihs == 0)
768 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
769 		if (unlikely(args.ihs == 0)) {
770 			err = EINVAL;
771 			goto tx_drop;
772 		}
773 		payload_len = mb->m_pkthdr.len - args.ihs;
774 		if (payload_len == 0)
775 			num_pkts = 1;
776 		else
777 			num_pkts = DIV_ROUND_UP(payload_len, mss);
778 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
779 
780 
781 		sq->stats.tso_packets++;
782 		sq->stats.tso_bytes += payload_len;
783 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
784 		/* check for inner TCP TSO first */
785 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
786 		    CSUM_INNER_IP6_TSO)) {
787 			u32 payload_len;
788 			u32 mss = mb->m_pkthdr.tso_segsz;
789 			u32 num_pkts;
790 
791 			wqe->eth.mss = cpu_to_be16(mss);
792 			opcode = MLX5_OPCODE_LSO;
793 
794 			if (likely(args.ihs == 0)) {
795 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
796 				       MLX5_ETH_WQE_L3_INNER_CSUM |
797 				       MLX5_ETH_WQE_L4_INNER_CSUM |
798 				       MLX5_ETH_WQE_L4_CSUM |
799 				       MLX5_ETH_WQE_L3_CSUM,
800 				       opcode);
801 				if (unlikely(args.ihs == 0)) {
802 					err = EINVAL;
803 					goto tx_drop;
804 				}
805 			}
806 
807 			payload_len = mb->m_pkthdr.len - args.ihs;
808 			if (payload_len == 0)
809 				num_pkts = 1;
810 			else
811 				num_pkts = DIV_ROUND_UP(payload_len, mss);
812 			sq->mbuf[pi].num_bytes = payload_len +
813 			    num_pkts * args.ihs;
814 
815 			sq->stats.tso_packets++;
816 			sq->stats.tso_bytes += payload_len;
817 		} else {
818 			opcode = MLX5_OPCODE_SEND;
819 
820 			if (likely(args.ihs == 0)) {
821 				uint8_t cs_mask;
822 
823 				if (mb->m_pkthdr.csum_flags &
824 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
825 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
826 					cs_mask =
827 					    MLX5_ETH_WQE_L3_INNER_CSUM |
828 					    MLX5_ETH_WQE_L4_INNER_CSUM |
829 					    MLX5_ETH_WQE_L4_CSUM |
830 					    MLX5_ETH_WQE_L3_CSUM;
831 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
832 					cs_mask =
833 					    MLX5_ETH_WQE_L3_INNER_CSUM |
834 					    MLX5_ETH_WQE_L4_CSUM |
835 					    MLX5_ETH_WQE_L3_CSUM;
836 				} else {
837 					cs_mask =
838 					    MLX5_ETH_WQE_L4_CSUM |
839 					    MLX5_ETH_WQE_L3_CSUM;
840 				}
841 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
842 				    cs_mask, opcode);
843 				if (unlikely(args.ihs == 0)) {
844 					err = EINVAL;
845 					goto tx_drop;
846 				}
847 			}
848 
849 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
850 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
851 		}
852 	} else {
853 		opcode = MLX5_OPCODE_SEND;
854 
855 		if (args.ihs == 0) {
856 			switch (sq->min_inline_mode) {
857 			case MLX5_INLINE_MODE_IP:
858 			case MLX5_INLINE_MODE_TCP_UDP:
859 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
860 				if (unlikely(args.ihs == 0))
861 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
862 				break;
863 			case MLX5_INLINE_MODE_L2:
864 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
865 				break;
866 			case MLX5_INLINE_MODE_NONE:
867 				/* FALLTHROUGH */
868 			default:
869 				if ((mb->m_flags & M_VLANTAG) != 0 &&
870 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
871 					/* inlining VLAN data is not required */
872 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
873 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
874 					args.ihs = 0;
875 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
876 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
877 					/* inlining non-VLAN data is not required */
878 					args.ihs = 0;
879 				} else {
880 					/* we are forced to inlining L2 header, if any */
881 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
882 				}
883 				break;
884 			}
885 		}
886 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
887 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
888 	}
889 
890 	if (likely(args.ihs == 0)) {
891 		/* nothing to inline */
892 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
893 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
894 		    wqe->eth.inline_hdr_start;
895 
896 		/* Range checks */
897 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
898 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
899 				err = EINVAL;
900 				goto tx_drop;
901 			}
902 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
903 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
904 			err = EINVAL;
905 			goto tx_drop;
906 		}
907 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
908 		m_adj(mb, ETHER_HDR_LEN);
909 		/* Insert 4 bytes VLAN tag into data stream */
910 		eh->evl_proto = eh->evl_encap_proto;
911 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
912 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
913 		/* Copy rest of header data, if any */
914 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
915 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
916 		/* Extend header by 4 bytes */
917 		args.ihs += ETHER_VLAN_ENCAP_LEN;
918 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
919 	} else {
920 		/* check if inline header size is too big */
921 		if (unlikely(args.ihs > sq->max_inline)) {
922 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
923 			    CSUM_ENCAP_VXLAN))) {
924 				err = EINVAL;
925 				goto tx_drop;
926 			}
927 			args.ihs = sq->max_inline;
928 		}
929 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
930 		m_adj(mb, args.ihs);
931 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
932 	}
933 
934 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
935 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
936 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
937 		    MLX5_SEND_WQE_DS);
938 	}
939 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
940 
941 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
942 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
943 	if (err == EFBIG) {
944 		/* Update statistics */
945 		sq->stats.defragged++;
946 		/* Too many mbuf fragments */
947 		mb = m_defrag(*mbp, M_NOWAIT);
948 		if (mb == NULL) {
949 			mb = *mbp;
950 			goto tx_drop;
951 		}
952 		/* Try again */
953 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
954 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
955 	}
956 	/* Catch errors */
957 	if (err != 0)
958 		goto tx_drop;
959 
960 	/* Make sure all mbuf data, if any, is visible to the bus */
961 	if (nsegs != 0) {
962 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
963 		    BUS_DMASYNC_PREWRITE);
964 	} else {
965 		/* All data was inlined, free the mbuf. */
966 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
967 		m_freem(mb);
968 		mb = NULL;
969 	}
970 
971 	for (x = 0; x != nsegs; x++) {
972 		if (segs[x].ds_len == 0)
973 			continue;
974 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
975 		dseg->lkey = sq->mkey_be;
976 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
977 		dseg++;
978 	}
979 
980 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
981 
982 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
983 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
984 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
985 
986 	if (mlx5e_do_send_cqe_inline(sq))
987 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
988 	else
989 		wqe->ctrl.fm_ce_se = 0;
990 
991 	/* Copy data for doorbell */
992 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
993 
994 	/* Store pointer to mbuf */
995 	sq->mbuf[pi].mbuf = mb;
996 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
997 	if (unlikely(args.mst != NULL))
998 		sq->mbuf[pi].mst = m_snd_tag_ref(args.mst);
999 	else
1000 		MPASS(sq->mbuf[pi].mst == NULL);
1001 
1002 	sq->pc += sq->mbuf[pi].num_wqebbs;
1003 
1004 	/* Count all traffic going out */
1005 	sq->stats.packets++;
1006 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1007 
1008 	*mbp = NULL;	/* safety clear */
1009 	return (0);
1010 
1011 tx_drop:
1012 	sq->stats.dropped++;
1013 	*mbp = NULL;
1014 	m_freem(mb);
1015 	return err;
1016 }
1017 
1018 static void
1019 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1020 {
1021 	u16 sqcc;
1022 
1023 	/*
1024 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1025 	 * otherwise a cq overrun may occur
1026 	 */
1027 	sqcc = sq->cc;
1028 
1029 	while (budget > 0) {
1030 		struct mlx5_cqe64 *cqe;
1031 		struct m_snd_tag *mst;
1032 		struct mbuf *mb;
1033 		bool match;
1034 		u16 sqcc_this;
1035 		u16 delta;
1036 		u16 x;
1037 		u16 ci;
1038 
1039 		cqe = mlx5e_get_cqe(&sq->cq);
1040 		if (!cqe)
1041 			break;
1042 
1043 		mlx5_cqwq_pop(&sq->cq.wq);
1044 
1045 		/* check if the completion event indicates an error */
1046 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
1047 			mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
1048 			sq->stats.cqe_err++;
1049 		}
1050 
1051 		/* setup local variables */
1052 		sqcc_this = be16toh(cqe->wqe_counter);
1053 		match = false;
1054 
1055 		/* update budget according to the event factor */
1056 		budget -= sq->cev_factor;
1057 
1058 		for (x = 0;; x++) {
1059 			if (unlikely(match != false)) {
1060 				break;
1061 			} else if (unlikely(x == sq->cev_factor)) {
1062 				/* WQE counter match not found */
1063 				sq->stats.cqe_err++;
1064 				break;
1065 			}
1066 			ci = sqcc & sq->wq.sz_m1;
1067 			delta = sqcc_this - sqcc;
1068 			match = (delta < sq->mbuf[ci].num_wqebbs);
1069 			mb = sq->mbuf[ci].mbuf;
1070 			sq->mbuf[ci].mbuf = NULL;
1071 			mst = sq->mbuf[ci].mst;
1072 			sq->mbuf[ci].mst = NULL;
1073 
1074 			if (unlikely(mb == NULL)) {
1075 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1076 					sq->stats.nop++;
1077 			} else {
1078 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1079 				    BUS_DMASYNC_POSTWRITE);
1080 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1081 
1082 				/* Free transmitted mbuf */
1083 				m_freem(mb);
1084 			}
1085 
1086 			if (unlikely(mst != NULL))
1087 				m_snd_tag_rele(mst);
1088 
1089 			sqcc += sq->mbuf[ci].num_wqebbs;
1090 		}
1091 	}
1092 
1093 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1094 
1095 	/* Ensure cq space is freed before enabling more cqes */
1096 	atomic_thread_fence_rel();
1097 
1098 	sq->cc = sqcc;
1099 }
1100 
1101 static int
1102 mlx5e_xmit_locked(if_t ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1103 {
1104 	int err = 0;
1105 
1106 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
1107 	    READ_ONCE(sq->running) == 0)) {
1108 		m_freem(mb);
1109 		return (ENETDOWN);
1110 	}
1111 
1112 	/* Do transmit */
1113 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1114 		/* NOTE: m_freem() is NULL safe */
1115 		m_freem(mb);
1116 		err = ENOBUFS;
1117 	}
1118 
1119 	/* Write the doorbell record, if any. */
1120 	mlx5e_tx_notify_hw(sq, false);
1121 
1122 	/*
1123 	 * Check if we need to start the event timer which flushes the
1124 	 * transmit ring on timeout:
1125 	 */
1126 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1127 	    sq->cev_factor != 1)) {
1128 		/* start the timer */
1129 		mlx5e_sq_cev_timeout(sq);
1130 	} else {
1131 		/* don't send NOPs yet */
1132 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1133 	}
1134 	return (err);
1135 }
1136 
1137 int
1138 mlx5e_xmit(if_t ifp, struct mbuf *mb)
1139 {
1140 	struct mlx5e_sq *sq;
1141 	int ret;
1142 
1143 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1144 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1145 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1146 		if (unlikely(sq == NULL)) {
1147 			goto select_queue;
1148 		}
1149 	} else {
1150 select_queue:
1151 		sq = mlx5e_select_queue(ifp, mb);
1152 		if (unlikely(sq == NULL)) {
1153 			/* Free mbuf */
1154 			m_freem(mb);
1155 
1156 			/* Invalid send queue */
1157 			return (ENXIO);
1158 		}
1159 	}
1160 
1161 	mtx_lock(&sq->lock);
1162 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1163 	mtx_unlock(&sq->lock);
1164 
1165 	return (ret);
1166 }
1167 
1168 void
1169 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1170 {
1171 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1172 
1173 	mtx_lock(&sq->comp_lock);
1174 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1175 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1176 	mtx_unlock(&sq->comp_lock);
1177 }
1178