xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 7be9a3b45356747f9fcb6d69a722c1c95f8060bf)
1 /*-
2  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_kern_tls.h"
29 #include "opt_rss.h"
30 #include "opt_ratelimit.h"
31 
32 #include <dev/mlx5/mlx5_en/en.h>
33 #include <machine/atomic.h>
34 
35 static inline bool
36 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
37 {
38 	sq->cev_counter++;
39 	/* interleave the CQEs */
40 	if (sq->cev_counter >= sq->cev_factor) {
41 		sq->cev_counter = 0;
42 		return (true);
43 	}
44 	return (false);
45 }
46 
47 bool
48 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
49 {
50 
51 	return (mlx5e_do_send_cqe_inline(sq));
52 }
53 
54 void
55 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
56 {
57 	u16 pi = sq->pc & sq->wq.sz_m1;
58 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
59 
60 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
61 
62 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
63 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
64 	if (mlx5e_do_send_cqe_inline(sq))
65 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
66 	else
67 		wqe->ctrl.fm_ce_se = 0;
68 
69 	/* Copy data for doorbell */
70 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
71 
72 	sq->mbuf[pi].mbuf = NULL;
73 	sq->mbuf[pi].num_bytes = 0;
74 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
75 	sq->pc += sq->mbuf[pi].num_wqebbs;
76 }
77 
78 static uint32_t mlx5e_hash_value;
79 
80 static void
81 mlx5e_hash_init(void *arg)
82 {
83 	mlx5e_hash_value = m_ether_tcpip_hash_init();
84 }
85 
86 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
87 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
88 
89 static struct mlx5e_sq *
90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb)
91 {
92 	struct m_snd_tag *mb_tag;
93 	struct mlx5e_sq *sq;
94 
95 	mb_tag = mb->m_pkthdr.snd_tag;
96 
97 #ifdef KERN_TLS
98 top:
99 #endif
100 	/* get pointer to sendqueue */
101 	switch (mb_tag->sw->type) {
102 #ifdef RATELIMIT
103 	case IF_SND_TAG_TYPE_RATE_LIMIT:
104 		sq = container_of(mb_tag,
105 		    struct mlx5e_rl_channel, tag)->sq;
106 		break;
107 #ifdef KERN_TLS
108 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
109 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
110 		goto top;
111 #endif
112 #endif
113 	case IF_SND_TAG_TYPE_UNLIMITED:
114 		sq = &container_of(mb_tag,
115 		    struct mlx5e_channel, tag)->sq[0];
116 		KASSERT((mb_tag->refcount > 0),
117 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
118 		break;
119 #ifdef KERN_TLS
120 	case IF_SND_TAG_TYPE_TLS:
121 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
122 		goto top;
123 #endif
124 	default:
125 		sq = NULL;
126 		break;
127 	}
128 
129 	/* check if valid */
130 	if (sq != NULL && READ_ONCE(sq->running) != 0)
131 		return (sq);
132 
133 	return (NULL);
134 }
135 
136 static struct mlx5e_sq *
137 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
138 {
139 	struct mlx5e_priv *priv = ifp->if_softc;
140 	struct mlx5e_sq *sq;
141 	u32 ch;
142 	u32 tc;
143 
144 	/* obtain VLAN information if present */
145 	if (mb->m_flags & M_VLANTAG) {
146 		tc = (mb->m_pkthdr.ether_vtag >> 13);
147 		if (tc >= priv->num_tc)
148 			tc = priv->default_vlan_prio;
149 	} else {
150 		tc = priv->default_vlan_prio;
151 	}
152 
153 	ch = priv->params.num_channels;
154 
155 	/* check if flowid is set */
156 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
157 #ifdef RSS
158 		u32 temp;
159 
160 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
161 		    M_HASHTYPE_GET(mb), &temp) == 0)
162 			ch = temp % ch;
163 		else
164 #endif
165 			ch = (mb->m_pkthdr.flowid % 128) % ch;
166 	} else {
167 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
168 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
169 	}
170 
171 	/* check if send queue is running */
172 	sq = &priv->channel[ch].sq[tc];
173 	if (likely(READ_ONCE(sq->running) != 0))
174 		return (sq);
175 	return (NULL);
176 }
177 
178 static inline u16
179 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
180 {
181 	struct ether_vlan_header *eh;
182 	uint16_t eth_type;
183 	int min_inline;
184 
185 	eh = mtod(mb, struct ether_vlan_header *);
186 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
187 		goto max_inline;
188 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
189 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
190 			goto max_inline;
191 		eth_type = ntohs(eh->evl_proto);
192 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
193 	} else {
194 		eth_type = ntohs(eh->evl_encap_proto);
195 		min_inline = ETHER_HDR_LEN;
196 	}
197 
198 	switch (eth_type) {
199 	case ETHERTYPE_IP:
200 	case ETHERTYPE_IPV6:
201 		/*
202 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
203 		 * field gets inlined. Else the SQ may stall.
204 		 */
205 		min_inline += 4;
206 		break;
207 	default:
208 		goto max_inline;
209 	}
210 
211 	/*
212 	 * m_copydata() will be used on the remaining header which
213 	 * does not need to reside within the first m_len bytes of
214 	 * data:
215 	 */
216 	if (mb->m_pkthdr.len < min_inline)
217 		goto max_inline;
218 	return (min_inline);
219 
220 max_inline:
221 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
222 }
223 
224 /*
225  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
226  * headers.
227  *
228  * Upon return the pointer at which the "ppth" argument points, is set
229  * to the location of the TCP header. NULL is used if no TCP header is
230  * present.
231  *
232  * The return value indicates the number of bytes from the beginning
233  * of the packet until the first byte after the TCP or UDP header. If
234  * this function returns zero, the parsing failed.
235  */
236 int
237 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
238 {
239 	const struct ether_vlan_header *eh;
240 	const struct tcphdr *th;
241 	const struct ip *ip;
242 	int ip_hlen, tcp_hlen;
243 	const struct ip6_hdr *ip6;
244 	uint16_t eth_type;
245 	int eth_hdr_len;
246 
247 	eh = mtod(mb, const struct ether_vlan_header *);
248 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
249 		goto failure;
250 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
251 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
252 			goto failure;
253 		eth_type = ntohs(eh->evl_proto);
254 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
255 	} else {
256 		eth_type = ntohs(eh->evl_encap_proto);
257 		eth_hdr_len = ETHER_HDR_LEN;
258 	}
259 
260 	switch (eth_type) {
261 	case ETHERTYPE_IP:
262 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
263 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
264 			goto failure;
265 		switch (ip->ip_p) {
266 		case IPPROTO_TCP:
267 			ip_hlen = ip->ip_hl << 2;
268 			eth_hdr_len += ip_hlen;
269 			goto tcp_packet;
270 		case IPPROTO_UDP:
271 			ip_hlen = ip->ip_hl << 2;
272 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
273 			th = NULL;
274 			goto udp_packet;
275 		default:
276 			goto failure;
277 		}
278 		break;
279 	case ETHERTYPE_IPV6:
280 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
281 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
282 			goto failure;
283 		switch (ip6->ip6_nxt) {
284 		case IPPROTO_TCP:
285 			eth_hdr_len += sizeof(*ip6);
286 			goto tcp_packet;
287 		case IPPROTO_UDP:
288 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
289 			th = NULL;
290 			goto udp_packet;
291 		default:
292 			goto failure;
293 		}
294 		break;
295 	default:
296 		goto failure;
297 	}
298 tcp_packet:
299 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
300 		const struct mbuf *m_th = mb->m_next;
301 		if (unlikely(mb->m_len != eth_hdr_len ||
302 		    m_th == NULL || m_th->m_len < sizeof(*th)))
303 			goto failure;
304 		th = (const struct tcphdr *)(m_th->m_data);
305 	} else {
306 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
307 	}
308 	tcp_hlen = th->th_off << 2;
309 	eth_hdr_len += tcp_hlen;
310 udp_packet:
311 	/*
312 	 * m_copydata() will be used on the remaining header which
313 	 * does not need to reside within the first m_len bytes of
314 	 * data:
315 	 */
316 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
317 		goto failure;
318 	if (ppth != NULL)
319 		*ppth = th;
320 	return (eth_hdr_len);
321 failure:
322 	if (ppth != NULL)
323 		*ppth = NULL;
324 	return (0);
325 }
326 
327 /*
328  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
329  */
330 static inline void *
331 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
332     int min_len)
333 {
334 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
335 		poffset[0] = eth_hdr_len;
336 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
337 			return (NULL);
338 	}
339 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
340 		return (NULL);
341 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
342 }
343 
344 /*
345  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
346  * and TCP headers.
347  *
348  * The return value indicates the number of bytes from the beginning
349  * of the packet until the first byte after the TCP header. If this
350  * function returns zero, the parsing failed.
351  */
352 static int
353 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
354     uint8_t cs_mask, uint8_t opcode)
355 {
356 	const struct ether_vlan_header *eh;
357 	struct ip *ip4;
358 	struct ip6_hdr *ip6;
359 	struct tcphdr *th;
360 	struct udphdr *udp;
361 	bool has_outer_vlan_tag;
362 	uint16_t eth_type;
363 	uint8_t ip_type;
364 	int pkt_hdr_len;
365 	int eth_hdr_len;
366 	int tcp_hlen;
367 	int ip_hlen;
368 	int offset;
369 
370 	pkt_hdr_len = mb->m_pkthdr.len;
371 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
372 	offset = 0;
373 
374 	eh = mtod(mb, const struct ether_vlan_header *);
375 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
376 		return (0);
377 
378 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
379 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
380 			return (0);
381 		eth_type = eh->evl_proto;
382 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
383 	} else {
384 		eth_type = eh->evl_encap_proto;
385 		eth_hdr_len = ETHER_HDR_LEN;
386 	}
387 
388 	switch (eth_type) {
389 	case htons(ETHERTYPE_IP):
390 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
391 		    sizeof(*ip4));
392 		if (unlikely(ip4 == NULL))
393 			return (0);
394 		ip_type = ip4->ip_p;
395 		if (unlikely(ip_type != IPPROTO_UDP))
396 			return (0);
397 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
398 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
399 		ip_hlen = ip4->ip_hl << 2;
400 		eth_hdr_len += ip_hlen;
401 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
402 		    sizeof(*udp));
403 		if (unlikely(udp == NULL))
404 			return (0);
405 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
406 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
407 		eth_hdr_len += sizeof(*udp);
408 		break;
409 	case htons(ETHERTYPE_IPV6):
410 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
411 		    sizeof(*ip6));
412 		if (unlikely(ip6 == NULL))
413 			return (0);
414 		ip_type = ip6->ip6_nxt;
415 		if (unlikely(ip_type != IPPROTO_UDP))
416 			return (0);
417 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
418 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
419 		eth_hdr_len += sizeof(*ip6);
420 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
421 		    sizeof(*udp));
422 		if (unlikely(udp == NULL))
423 			return (0);
424 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
425 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
426 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
427 		eth_hdr_len += sizeof(*udp);
428 		break;
429 	default:
430 		return (0);
431 	}
432 
433 	/*
434 	 * If the hardware is not computing inner IP checksum, then
435 	 * skip inlining the inner outer UDP and VXLAN header:
436 	 */
437 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
438 		goto done;
439 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
440 	    8) == NULL))
441 		return (0);
442 	eth_hdr_len += 8;
443 
444 	/* Check for ethernet header again. */
445 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
446 	if (unlikely(eh == NULL))
447 		return (0);
448 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
449 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
450 		    ETHER_VLAN_ENCAP_LEN))
451 			return (0);
452 		eth_type = eh->evl_proto;
453 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
454 	} else {
455 		eth_type = eh->evl_encap_proto;
456 		eth_hdr_len += ETHER_HDR_LEN;
457 	}
458 
459 	/* Check for IP header again. */
460 	switch (eth_type) {
461 	case htons(ETHERTYPE_IP):
462 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
463 		    sizeof(*ip4));
464 		if (unlikely(ip4 == NULL))
465 			return (0);
466 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
467 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
468 		ip_type = ip4->ip_p;
469 		ip_hlen = ip4->ip_hl << 2;
470 		eth_hdr_len += ip_hlen;
471 		break;
472 	case htons(ETHERTYPE_IPV6):
473 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
474 		    sizeof(*ip6));
475 		if (unlikely(ip6 == NULL))
476 			return (0);
477 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
478 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
479 		ip_type = ip6->ip6_nxt;
480 		eth_hdr_len += sizeof(*ip6);
481 		break;
482 	default:
483 		return (0);
484 	}
485 
486 	/*
487 	 * If the hardware is not computing inner UDP/TCP checksum,
488 	 * then skip inlining the inner UDP/TCP header:
489 	 */
490 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
491 		goto done;
492 
493 	switch (ip_type) {
494 	case IPPROTO_UDP:
495 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
496 		    sizeof(*udp));
497 		if (unlikely(udp == NULL))
498 			return (0);
499 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
500 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
501 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
502 		eth_hdr_len += sizeof(*udp);
503 		break;
504 	case IPPROTO_TCP:
505 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
506 		    sizeof(*th));
507 		if (unlikely(th == NULL))
508 			return (0);
509 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
510 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
511 		tcp_hlen = th->th_off << 2;
512 		eth_hdr_len += tcp_hlen;
513 		break;
514 	default:
515 		return (0);
516 	}
517 done:
518 	if (unlikely(pkt_hdr_len < eth_hdr_len))
519 		return (0);
520 
521 	/* Account for software inserted VLAN tag, if any. */
522 	if (unlikely(has_outer_vlan_tag)) {
523 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
524 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
525 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
526 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
527 	}
528 
529 	/*
530 	 * When inner checksums are set, outer L4 checksum flag must
531 	 * be disabled.
532 	 */
533 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
534 	    MLX5_ETH_WQE_L4_INNER_CSUM))
535 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
536 
537 	return (eth_hdr_len);
538 }
539 
540 struct mlx5_wqe_dump_seg {
541 	struct mlx5_wqe_ctrl_seg ctrl;
542 	struct mlx5_wqe_data_seg data;
543 } __aligned(MLX5_SEND_WQE_BB);
544 
545 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
546 
547 int
548 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
549 {
550 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
551 	struct mlx5_wqe_dump_seg *wqe;
552 	struct mlx5_wqe_dump_seg *wqe_last;
553 	int nsegs;
554 	int xsegs;
555 	u32 off;
556 	u32 msb;
557 	int err;
558 	int x;
559 	struct mbuf *mb;
560 	const u32 ds_cnt = 2;
561 	u16 pi;
562 	const u8 opcode = MLX5_OPCODE_DUMP;
563 
564 	/* get pointer to mbuf */
565 	mb = *mbp;
566 
567 	/* get producer index */
568 	pi = sq->pc & sq->wq.sz_m1;
569 
570 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
571 	sq->mbuf[pi].num_wqebbs = 0;
572 
573 	/* check number of segments in mbuf */
574 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
575 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
576 	if (err == EFBIG) {
577 		/* update statistics */
578 		sq->stats.defragged++;
579 		/* too many mbuf fragments */
580 		mb = m_defrag(*mbp, M_NOWAIT);
581 		if (mb == NULL) {
582 			mb = *mbp;
583 			goto tx_drop;
584 		}
585 		/* try again */
586 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
587 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
588 	}
589 
590 	if (err != 0)
591 		goto tx_drop;
592 
593 	/* make sure all mbuf data, if any, is visible to the bus */
594 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
595 	    BUS_DMASYNC_PREWRITE);
596 
597 	/* compute number of real DUMP segments */
598 	msb = sq->priv->params_ethtool.hw_mtu_msb;
599 	for (x = xsegs = 0; x != nsegs; x++)
600 		xsegs += howmany((u32)segs[x].ds_len, msb);
601 
602 	/* check if there are no segments */
603 	if (unlikely(xsegs == 0)) {
604 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
605 		m_freem(mb);
606 		*mbp = NULL;	/* safety clear */
607 		return (0);
608 	}
609 
610 	/* return ENOBUFS if the queue is full */
611 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
612 		sq->stats.enobuf++;
613 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
614 		m_freem(mb);
615 		*mbp = NULL;	/* safety clear */
616 		return (ENOBUFS);
617 	}
618 
619 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
620 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
621 
622 	for (x = 0; x != nsegs; x++) {
623 		for (off = 0; off < segs[x].ds_len; off += msb) {
624 			u32 len = segs[x].ds_len - off;
625 
626 			/* limit length */
627 			if (likely(len > msb))
628 				len = msb;
629 
630 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
631 
632 			/* fill control segment */
633 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
634 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
635 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
636 
637 			/* fill data segment */
638 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
639 			wqe->data.lkey = sq->mkey_be;
640 			wqe->data.byte_count = cpu_to_be32(len);
641 
642 			/* advance to next building block */
643 			if (unlikely(wqe == wqe_last))
644 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
645 			else
646 				wqe++;
647 
648 			sq->mbuf[pi].num_wqebbs++;
649 			sq->pc++;
650 		}
651 	}
652 
653 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
654 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
655 
656 	/* put in place data fence */
657 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
658 
659 	/* check if we should generate a completion event */
660 	if (mlx5e_do_send_cqe_inline(sq))
661 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
662 
663 	/* copy data for doorbell */
664 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
665 
666 	/* store pointer to mbuf */
667 	sq->mbuf[pi].mbuf = mb;
668 	sq->mbuf[pi].p_refcount = parg->pref;
669 	atomic_add_int(parg->pref, 1);
670 
671 	/* count all traffic going out */
672 	sq->stats.packets++;
673 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
674 
675 	*mbp = NULL;	/* safety clear */
676 	return (0);
677 
678 tx_drop:
679 	sq->stats.dropped++;
680 	*mbp = NULL;
681 	m_freem(mb);
682 	return err;
683 }
684 
685 int
686 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
687 {
688 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
689 	struct mlx5e_xmit_args args = {};
690 	struct mlx5_wqe_data_seg *dseg;
691 	struct mlx5e_tx_wqe *wqe;
692 	struct ifnet *ifp;
693 	int nsegs;
694 	int err;
695 	int x;
696 	struct mbuf *mb;
697 	u16 ds_cnt;
698 	u16 pi;
699 	u8 opcode;
700 
701 #ifdef KERN_TLS
702 top:
703 #endif
704 	/* Return ENOBUFS if the queue is full */
705 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
706 		sq->stats.enobuf++;
707 		return (ENOBUFS);
708 	}
709 
710 	/* Align SQ edge with NOPs to avoid WQE wrap around */
711 	pi = ((~sq->pc) & sq->wq.sz_m1);
712 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
713 		/* Send one multi NOP message instead of many */
714 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
715 		pi = ((~sq->pc) & sq->wq.sz_m1);
716 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
717 			sq->stats.enobuf++;
718 			return (ENOMEM);
719 		}
720 	}
721 
722 #ifdef KERN_TLS
723 	/* Special handling for TLS packets, if any */
724 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
725 	case MLX5E_TLS_LOOP:
726 		goto top;
727 	case MLX5E_TLS_FAILURE:
728 		mb = *mbp;
729 		err = ENOMEM;
730 		goto tx_drop;
731 	case MLX5E_TLS_DEFERRED:
732 		return (0);
733 	case MLX5E_TLS_CONTINUE:
734 	default:
735 		break;
736 	}
737 #endif
738 
739 	/* Setup local variables */
740 	pi = sq->pc & sq->wq.sz_m1;
741 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
742 	ifp = sq->ifp;
743 
744 	memset(wqe, 0, sizeof(*wqe));
745 
746 	/* get pointer to mbuf */
747 	mb = *mbp;
748 
749 	/* Send a copy of the frame to the BPF listener, if any */
750 	if (ifp != NULL && ifp->if_bpf != NULL)
751 		ETHER_BPF_MTAP(ifp, mb);
752 
753 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
754 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
755 	}
756 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
757 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
758 	}
759 	if (wqe->eth.cs_flags == 0) {
760 		sq->stats.csum_offload_none++;
761 	}
762 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
763 		u32 payload_len;
764 		u32 mss = mb->m_pkthdr.tso_segsz;
765 		u32 num_pkts;
766 
767 		wqe->eth.mss = cpu_to_be16(mss);
768 		opcode = MLX5_OPCODE_LSO;
769 		if (args.ihs == 0)
770 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
771 		if (unlikely(args.ihs == 0)) {
772 			err = EINVAL;
773 			goto tx_drop;
774 		}
775 		payload_len = mb->m_pkthdr.len - args.ihs;
776 		if (payload_len == 0)
777 			num_pkts = 1;
778 		else
779 			num_pkts = DIV_ROUND_UP(payload_len, mss);
780 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
781 
782 
783 		sq->stats.tso_packets++;
784 		sq->stats.tso_bytes += payload_len;
785 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
786 		/* check for inner TCP TSO first */
787 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
788 		    CSUM_INNER_IP6_TSO)) {
789 			u32 payload_len;
790 			u32 mss = mb->m_pkthdr.tso_segsz;
791 			u32 num_pkts;
792 
793 			wqe->eth.mss = cpu_to_be16(mss);
794 			opcode = MLX5_OPCODE_LSO;
795 
796 			if (likely(args.ihs == 0)) {
797 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
798 				       MLX5_ETH_WQE_L3_INNER_CSUM |
799 				       MLX5_ETH_WQE_L4_INNER_CSUM |
800 				       MLX5_ETH_WQE_L4_CSUM |
801 				       MLX5_ETH_WQE_L3_CSUM,
802 				       opcode);
803 				if (unlikely(args.ihs == 0)) {
804 					err = EINVAL;
805 					goto tx_drop;
806 				}
807 			}
808 
809 			payload_len = mb->m_pkthdr.len - args.ihs;
810 			if (payload_len == 0)
811 				num_pkts = 1;
812 			else
813 				num_pkts = DIV_ROUND_UP(payload_len, mss);
814 			sq->mbuf[pi].num_bytes = payload_len +
815 			    num_pkts * args.ihs;
816 
817 			sq->stats.tso_packets++;
818 			sq->stats.tso_bytes += payload_len;
819 		} else {
820 			opcode = MLX5_OPCODE_SEND;
821 
822 			if (likely(args.ihs == 0)) {
823 				uint8_t cs_mask;
824 
825 				if (mb->m_pkthdr.csum_flags &
826 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
827 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
828 					cs_mask =
829 					    MLX5_ETH_WQE_L3_INNER_CSUM |
830 					    MLX5_ETH_WQE_L4_INNER_CSUM |
831 					    MLX5_ETH_WQE_L4_CSUM |
832 					    MLX5_ETH_WQE_L3_CSUM;
833 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
834 					cs_mask =
835 					    MLX5_ETH_WQE_L3_INNER_CSUM |
836 					    MLX5_ETH_WQE_L4_CSUM |
837 					    MLX5_ETH_WQE_L3_CSUM;
838 				} else {
839 					cs_mask =
840 					    MLX5_ETH_WQE_L4_CSUM |
841 					    MLX5_ETH_WQE_L3_CSUM;
842 				}
843 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
844 				    cs_mask, opcode);
845 				if (unlikely(args.ihs == 0)) {
846 					err = EINVAL;
847 					goto tx_drop;
848 				}
849 			}
850 
851 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
852 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
853 		}
854 	} else {
855 		opcode = MLX5_OPCODE_SEND;
856 
857 		if (args.ihs == 0) {
858 			switch (sq->min_inline_mode) {
859 			case MLX5_INLINE_MODE_IP:
860 			case MLX5_INLINE_MODE_TCP_UDP:
861 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
862 				if (unlikely(args.ihs == 0))
863 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
864 				break;
865 			case MLX5_INLINE_MODE_L2:
866 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
867 				break;
868 			case MLX5_INLINE_MODE_NONE:
869 				/* FALLTHROUGH */
870 			default:
871 				if ((mb->m_flags & M_VLANTAG) != 0 &&
872 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
873 					/* inlining VLAN data is not required */
874 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
875 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
876 					args.ihs = 0;
877 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
878 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
879 					/* inlining non-VLAN data is not required */
880 					args.ihs = 0;
881 				} else {
882 					/* we are forced to inlining L2 header, if any */
883 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
884 				}
885 				break;
886 			}
887 		}
888 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
889 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
890 	}
891 
892 	if (likely(args.ihs == 0)) {
893 		/* nothing to inline */
894 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
895 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
896 		    wqe->eth.inline_hdr_start;
897 
898 		/* Range checks */
899 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
900 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
901 				err = EINVAL;
902 				goto tx_drop;
903 			}
904 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
905 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
906 			err = EINVAL;
907 			goto tx_drop;
908 		}
909 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
910 		m_adj(mb, ETHER_HDR_LEN);
911 		/* Insert 4 bytes VLAN tag into data stream */
912 		eh->evl_proto = eh->evl_encap_proto;
913 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
914 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
915 		/* Copy rest of header data, if any */
916 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
917 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
918 		/* Extend header by 4 bytes */
919 		args.ihs += ETHER_VLAN_ENCAP_LEN;
920 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
921 	} else {
922 		/* check if inline header size is too big */
923 		if (unlikely(args.ihs > sq->max_inline)) {
924 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
925 			    CSUM_ENCAP_VXLAN))) {
926 				err = EINVAL;
927 				goto tx_drop;
928 			}
929 			args.ihs = sq->max_inline;
930 		}
931 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
932 		m_adj(mb, args.ihs);
933 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
934 	}
935 
936 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
937 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
938 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
939 		    MLX5_SEND_WQE_DS);
940 	}
941 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
942 
943 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
944 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
945 	if (err == EFBIG) {
946 		/* Update statistics */
947 		sq->stats.defragged++;
948 		/* Too many mbuf fragments */
949 		mb = m_defrag(*mbp, M_NOWAIT);
950 		if (mb == NULL) {
951 			mb = *mbp;
952 			goto tx_drop;
953 		}
954 		/* Try again */
955 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
956 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
957 	}
958 	/* Catch errors */
959 	if (err != 0)
960 		goto tx_drop;
961 
962 	/* Make sure all mbuf data, if any, is visible to the bus */
963 	if (nsegs != 0) {
964 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
965 		    BUS_DMASYNC_PREWRITE);
966 	} else {
967 		/* All data was inlined, free the mbuf. */
968 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
969 		m_freem(mb);
970 		mb = NULL;
971 	}
972 
973 	for (x = 0; x != nsegs; x++) {
974 		if (segs[x].ds_len == 0)
975 			continue;
976 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
977 		dseg->lkey = sq->mkey_be;
978 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
979 		dseg++;
980 	}
981 
982 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
983 
984 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
985 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
986 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
987 
988 	if (mlx5e_do_send_cqe_inline(sq))
989 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
990 	else
991 		wqe->ctrl.fm_ce_se = 0;
992 
993 	/* Copy data for doorbell */
994 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
995 
996 	/* Store pointer to mbuf */
997 	sq->mbuf[pi].mbuf = mb;
998 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
999 	sq->mbuf[pi].p_refcount = args.pref;
1000 	if (unlikely(args.pref != NULL))
1001 		atomic_add_int(args.pref, 1);
1002 	sq->pc += sq->mbuf[pi].num_wqebbs;
1003 
1004 	/* Count all traffic going out */
1005 	sq->stats.packets++;
1006 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1007 
1008 	*mbp = NULL;	/* safety clear */
1009 	return (0);
1010 
1011 tx_drop:
1012 	sq->stats.dropped++;
1013 	*mbp = NULL;
1014 	m_freem(mb);
1015 	return err;
1016 }
1017 
1018 static void
1019 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1020 {
1021 	u16 sqcc;
1022 
1023 	/*
1024 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1025 	 * otherwise a cq overrun may occur
1026 	 */
1027 	sqcc = sq->cc;
1028 
1029 	while (budget > 0) {
1030 		struct mlx5_cqe64 *cqe;
1031 		struct mbuf *mb;
1032 		bool match;
1033 		u16 sqcc_this;
1034 		u16 delta;
1035 		u16 x;
1036 		u16 ci;
1037 
1038 		cqe = mlx5e_get_cqe(&sq->cq);
1039 		if (!cqe)
1040 			break;
1041 
1042 		mlx5_cqwq_pop(&sq->cq.wq);
1043 
1044 		/* check if the completion event indicates an error */
1045 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
1046 			sq->stats.cqe_err++;
1047 
1048 		/* setup local variables */
1049 		sqcc_this = be16toh(cqe->wqe_counter);
1050 		match = false;
1051 
1052 		/* update budget according to the event factor */
1053 		budget -= sq->cev_factor;
1054 
1055 		for (x = 0;; x++) {
1056 			if (unlikely(match != false)) {
1057 				break;
1058 			} else if (unlikely(x == sq->cev_factor)) {
1059 				/* WQE counter match not found */
1060 				sq->stats.cqe_err++;
1061 				break;
1062 			}
1063 			ci = sqcc & sq->wq.sz_m1;
1064 			delta = sqcc_this - sqcc;
1065 			match = (delta < sq->mbuf[ci].num_wqebbs);
1066 			mb = sq->mbuf[ci].mbuf;
1067 			sq->mbuf[ci].mbuf = NULL;
1068 
1069 			if (unlikely(sq->mbuf[ci].p_refcount != NULL)) {
1070 				atomic_add_int(sq->mbuf[ci].p_refcount, -1);
1071 				sq->mbuf[ci].p_refcount = NULL;
1072 			}
1073 
1074 			if (mb == NULL) {
1075 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1076 					sq->stats.nop++;
1077 			} else {
1078 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1079 				    BUS_DMASYNC_POSTWRITE);
1080 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1081 
1082 				/* Free transmitted mbuf */
1083 				m_freem(mb);
1084 			}
1085 			sqcc += sq->mbuf[ci].num_wqebbs;
1086 		}
1087 	}
1088 
1089 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1090 
1091 	/* Ensure cq space is freed before enabling more cqes */
1092 	atomic_thread_fence_rel();
1093 
1094 	sq->cc = sqcc;
1095 }
1096 
1097 static int
1098 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1099 {
1100 	int err = 0;
1101 
1102 	if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1103 	    READ_ONCE(sq->running) == 0)) {
1104 		m_freem(mb);
1105 		return (ENETDOWN);
1106 	}
1107 
1108 	/* Do transmit */
1109 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1110 		/* NOTE: m_freem() is NULL safe */
1111 		m_freem(mb);
1112 		err = ENOBUFS;
1113 	}
1114 
1115 	/* Write the doorbell record, if any. */
1116 	mlx5e_tx_notify_hw(sq, false);
1117 
1118 	/*
1119 	 * Check if we need to start the event timer which flushes the
1120 	 * transmit ring on timeout:
1121 	 */
1122 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1123 	    sq->cev_factor != 1)) {
1124 		/* start the timer */
1125 		mlx5e_sq_cev_timeout(sq);
1126 	} else {
1127 		/* don't send NOPs yet */
1128 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1129 	}
1130 	return (err);
1131 }
1132 
1133 int
1134 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
1135 {
1136 	struct mlx5e_sq *sq;
1137 	int ret;
1138 
1139 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1140 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1141 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1142 		if (unlikely(sq == NULL)) {
1143 			goto select_queue;
1144 		}
1145 	} else {
1146 select_queue:
1147 		sq = mlx5e_select_queue(ifp, mb);
1148 		if (unlikely(sq == NULL)) {
1149 			/* Free mbuf */
1150 			m_freem(mb);
1151 
1152 			/* Invalid send queue */
1153 			return (ENXIO);
1154 		}
1155 	}
1156 
1157 	mtx_lock(&sq->lock);
1158 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1159 	mtx_unlock(&sq->lock);
1160 
1161 	return (ret);
1162 }
1163 
1164 void
1165 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1166 {
1167 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1168 
1169 	mtx_lock(&sq->comp_lock);
1170 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1171 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1172 	mtx_unlock(&sq->comp_lock);
1173 }
1174