xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 914752d0f7f874ab4fc8393aee28c22df87324f2)
1 /*-
2  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
3  * Copyright (c) 2022 NVIDIA corporation & affiliates.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "opt_kern_tls.h"
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <dev/mlx5/mlx5_en/en.h>
32 #include <machine/atomic.h>
33 #include <dev/mlx5/mlx5_accel/ipsec.h>
34 
35 static inline bool
36 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
37 {
38 	sq->cev_counter++;
39 	/* interleave the CQEs */
40 	if (sq->cev_counter >= sq->cev_factor) {
41 		sq->cev_counter = 0;
42 		return (true);
43 	}
44 	return (false);
45 }
46 
47 bool
48 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
49 {
50 
51 	return (mlx5e_do_send_cqe_inline(sq));
52 }
53 
54 void
55 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
56 {
57 	u16 pi = sq->pc & sq->wq.sz_m1;
58 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
59 
60 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
61 
62 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
63 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
64 	if (mlx5e_do_send_cqe_inline(sq))
65 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
66 	else
67 		wqe->ctrl.fm_ce_se = 0;
68 
69 	/* Copy data for doorbell */
70 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
71 
72 	sq->mbuf[pi].mbuf = NULL;
73 	sq->mbuf[pi].num_bytes = 0;
74 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
75 	sq->pc += sq->mbuf[pi].num_wqebbs;
76 }
77 
78 static uint32_t mlx5e_hash_value;
79 
80 static void
81 mlx5e_hash_init(void *arg)
82 {
83 	mlx5e_hash_value = m_ether_tcpip_hash_init();
84 }
85 
86 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
87 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
88 
89 static struct mlx5e_sq *
90 mlx5e_select_queue_by_send_tag(if_t ifp, struct mbuf *mb)
91 {
92 	struct m_snd_tag *mb_tag;
93 	struct mlx5e_sq *sq;
94 
95 	mb_tag = mb->m_pkthdr.snd_tag;
96 
97 #ifdef KERN_TLS
98 top:
99 #endif
100 	/* get pointer to sendqueue */
101 	switch (mb_tag->sw->type) {
102 #ifdef RATELIMIT
103 	case IF_SND_TAG_TYPE_RATE_LIMIT:
104 		sq = container_of(mb_tag,
105 		    struct mlx5e_rl_channel, tag)->sq;
106 		break;
107 #ifdef KERN_TLS
108 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
109 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
110 		goto top;
111 #endif
112 #endif
113 	case IF_SND_TAG_TYPE_UNLIMITED:
114 		sq = &container_of(mb_tag,
115 		    struct mlx5e_channel, tag)->sq[0];
116 		KASSERT((mb_tag->refcount > 0),
117 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
118 		break;
119 #ifdef KERN_TLS
120 	case IF_SND_TAG_TYPE_TLS:
121 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
122 		goto top;
123 #endif
124 	default:
125 		sq = NULL;
126 		break;
127 	}
128 
129 	/* check if valid */
130 	if (sq != NULL && READ_ONCE(sq->running) != 0)
131 		return (sq);
132 
133 	return (NULL);
134 }
135 
136 static struct mlx5e_sq *
137 mlx5e_select_queue(if_t ifp, struct mbuf *mb)
138 {
139 	struct mlx5e_priv *priv = if_getsoftc(ifp);
140 	struct mlx5e_sq *sq;
141 	u32 ch;
142 	u32 tc;
143 
144 	/* obtain VLAN information if present */
145 	if (mb->m_flags & M_VLANTAG) {
146 		tc = (mb->m_pkthdr.ether_vtag >> 13);
147 		if (tc >= priv->num_tc)
148 			tc = priv->default_vlan_prio;
149 	} else {
150 		tc = priv->default_vlan_prio;
151 	}
152 
153 	ch = priv->params.num_channels;
154 
155 	/* check if flowid is set */
156 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
157 #ifdef RSS
158 		u32 temp;
159 
160 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
161 		    M_HASHTYPE_GET(mb), &temp) == 0)
162 			ch = temp % ch;
163 		else
164 #endif
165 			ch = (mb->m_pkthdr.flowid % 128) % ch;
166 	} else {
167 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
168 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
169 	}
170 
171 	/* check if send queue is running */
172 	sq = &priv->channel[ch].sq[tc];
173 	if (likely(READ_ONCE(sq->running) != 0))
174 		return (sq);
175 	return (NULL);
176 }
177 
178 static inline u16
179 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
180 {
181 	struct ether_vlan_header *eh;
182 	uint16_t eth_type;
183 	int min_inline;
184 
185 	eh = mtod(mb, struct ether_vlan_header *);
186 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
187 		goto max_inline;
188 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
189 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
190 			goto max_inline;
191 		eth_type = ntohs(eh->evl_proto);
192 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
193 	} else {
194 		eth_type = ntohs(eh->evl_encap_proto);
195 		min_inline = ETHER_HDR_LEN;
196 	}
197 
198 	switch (eth_type) {
199 	case ETHERTYPE_IP:
200 	case ETHERTYPE_IPV6:
201 		/*
202 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
203 		 * field gets inlined. Else the SQ may stall.
204 		 */
205 		min_inline += 4;
206 		break;
207 	default:
208 		goto max_inline;
209 	}
210 
211 	/*
212 	 * m_copydata() will be used on the remaining header which
213 	 * does not need to reside within the first m_len bytes of
214 	 * data:
215 	 */
216 	if (mb->m_pkthdr.len < min_inline)
217 		goto max_inline;
218 	return (min_inline);
219 
220 max_inline:
221 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
222 }
223 
224 /*
225  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
226  * headers.
227  *
228  * Upon return the pointer at which the "ppth" argument points, is set
229  * to the location of the TCP header. NULL is used if no TCP header is
230  * present.
231  *
232  * The return value indicates the number of bytes from the beginning
233  * of the packet until the first byte after the TCP or UDP header. If
234  * this function returns zero, the parsing failed.
235  */
236 int
237 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
238 {
239 	const struct ether_vlan_header *eh;
240 	const struct tcphdr *th;
241 	const struct ip *ip;
242 	int ip_hlen, tcp_hlen;
243 	const struct ip6_hdr *ip6;
244 	uint16_t eth_type;
245 	int eth_hdr_len;
246 
247 	eh = mtod(mb, const struct ether_vlan_header *);
248 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
249 		goto failure;
250 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
251 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
252 			goto failure;
253 		eth_type = ntohs(eh->evl_proto);
254 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
255 	} else {
256 		eth_type = ntohs(eh->evl_encap_proto);
257 		eth_hdr_len = ETHER_HDR_LEN;
258 	}
259 
260 	switch (eth_type) {
261 	case ETHERTYPE_IP:
262 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
263 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
264 			goto failure;
265 		switch (ip->ip_p) {
266 		case IPPROTO_TCP:
267 			ip_hlen = ip->ip_hl << 2;
268 			eth_hdr_len += ip_hlen;
269 			goto tcp_packet;
270 		case IPPROTO_UDP:
271 			ip_hlen = ip->ip_hl << 2;
272 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
273 			th = NULL;
274 			goto udp_packet;
275 		default:
276 			goto failure;
277 		}
278 		break;
279 	case ETHERTYPE_IPV6:
280 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
281 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
282 			goto failure;
283 		switch (ip6->ip6_nxt) {
284 		case IPPROTO_TCP:
285 			eth_hdr_len += sizeof(*ip6);
286 			goto tcp_packet;
287 		case IPPROTO_UDP:
288 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
289 			th = NULL;
290 			goto udp_packet;
291 		default:
292 			goto failure;
293 		}
294 		break;
295 	default:
296 		goto failure;
297 	}
298 tcp_packet:
299 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
300 		const struct mbuf *m_th = mb->m_next;
301 		if (unlikely(mb->m_len != eth_hdr_len ||
302 		    m_th == NULL || m_th->m_len < sizeof(*th)))
303 			goto failure;
304 		th = (const struct tcphdr *)(m_th->m_data);
305 	} else {
306 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
307 	}
308 	tcp_hlen = th->th_off << 2;
309 	eth_hdr_len += tcp_hlen;
310 udp_packet:
311 	/*
312 	 * m_copydata() will be used on the remaining header which
313 	 * does not need to reside within the first m_len bytes of
314 	 * data:
315 	 */
316 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
317 		goto failure;
318 	if (ppth != NULL)
319 		*ppth = th;
320 	return (eth_hdr_len);
321 failure:
322 	if (ppth != NULL)
323 		*ppth = NULL;
324 	return (0);
325 }
326 
327 /*
328  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
329  */
330 static inline void *
331 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
332     int min_len)
333 {
334 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
335 		poffset[0] = eth_hdr_len;
336 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
337 			return (NULL);
338 	}
339 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
340 		return (NULL);
341 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
342 }
343 
344 /*
345  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
346  * and TCP headers.
347  *
348  * The return value indicates the number of bytes from the beginning
349  * of the packet until the first byte after the TCP header. If this
350  * function returns zero, the parsing failed.
351  */
352 static int
353 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
354     uint8_t cs_mask, uint8_t opcode)
355 {
356 	const struct ether_vlan_header *eh;
357 	struct ip *ip4;
358 	struct ip6_hdr *ip6;
359 	struct tcphdr *th;
360 	struct udphdr *udp;
361 	bool has_outer_vlan_tag;
362 	uint16_t eth_type;
363 	uint8_t ip_type;
364 	int pkt_hdr_len;
365 	int eth_hdr_len;
366 	int tcp_hlen;
367 	int ip_hlen;
368 	int offset;
369 
370 	pkt_hdr_len = mb->m_pkthdr.len;
371 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
372 	offset = 0;
373 
374 	eh = mtod(mb, const struct ether_vlan_header *);
375 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
376 		return (0);
377 
378 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
379 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
380 			return (0);
381 		eth_type = eh->evl_proto;
382 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
383 	} else {
384 		eth_type = eh->evl_encap_proto;
385 		eth_hdr_len = ETHER_HDR_LEN;
386 	}
387 
388 	switch (eth_type) {
389 	case htons(ETHERTYPE_IP):
390 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
391 		    sizeof(*ip4));
392 		if (unlikely(ip4 == NULL))
393 			return (0);
394 		ip_type = ip4->ip_p;
395 		if (unlikely(ip_type != IPPROTO_UDP))
396 			return (0);
397 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
398 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
399 		ip_hlen = ip4->ip_hl << 2;
400 		eth_hdr_len += ip_hlen;
401 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
402 		    sizeof(*udp));
403 		if (unlikely(udp == NULL))
404 			return (0);
405 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
406 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
407 		eth_hdr_len += sizeof(*udp);
408 		break;
409 	case htons(ETHERTYPE_IPV6):
410 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
411 		    sizeof(*ip6));
412 		if (unlikely(ip6 == NULL))
413 			return (0);
414 		ip_type = ip6->ip6_nxt;
415 		if (unlikely(ip_type != IPPROTO_UDP))
416 			return (0);
417 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
418 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
419 		eth_hdr_len += sizeof(*ip6);
420 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
421 		    sizeof(*udp));
422 		if (unlikely(udp == NULL))
423 			return (0);
424 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
425 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
426 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
427 		eth_hdr_len += sizeof(*udp);
428 		break;
429 	default:
430 		return (0);
431 	}
432 
433 	/*
434 	 * If the hardware is not computing inner IP checksum, then
435 	 * skip inlining the inner outer UDP and VXLAN header:
436 	 */
437 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
438 		goto done;
439 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
440 	    8) == NULL))
441 		return (0);
442 	eth_hdr_len += 8;
443 
444 	/* Check for ethernet header again. */
445 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
446 	if (unlikely(eh == NULL))
447 		return (0);
448 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
449 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
450 		    ETHER_VLAN_ENCAP_LEN))
451 			return (0);
452 		eth_type = eh->evl_proto;
453 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
454 	} else {
455 		eth_type = eh->evl_encap_proto;
456 		eth_hdr_len += ETHER_HDR_LEN;
457 	}
458 
459 	/* Check for IP header again. */
460 	switch (eth_type) {
461 	case htons(ETHERTYPE_IP):
462 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
463 		    sizeof(*ip4));
464 		if (unlikely(ip4 == NULL))
465 			return (0);
466 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
467 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
468 		ip_type = ip4->ip_p;
469 		ip_hlen = ip4->ip_hl << 2;
470 		eth_hdr_len += ip_hlen;
471 		break;
472 	case htons(ETHERTYPE_IPV6):
473 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
474 		    sizeof(*ip6));
475 		if (unlikely(ip6 == NULL))
476 			return (0);
477 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
478 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
479 		ip_type = ip6->ip6_nxt;
480 		eth_hdr_len += sizeof(*ip6);
481 		break;
482 	default:
483 		return (0);
484 	}
485 
486 	/*
487 	 * If the hardware is not computing inner UDP/TCP checksum,
488 	 * then skip inlining the inner UDP/TCP header:
489 	 */
490 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
491 		goto done;
492 
493 	switch (ip_type) {
494 	case IPPROTO_UDP:
495 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
496 		    sizeof(*udp));
497 		if (unlikely(udp == NULL))
498 			return (0);
499 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
500 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
501 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
502 		eth_hdr_len += sizeof(*udp);
503 		break;
504 	case IPPROTO_TCP:
505 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
506 		    sizeof(*th));
507 		if (unlikely(th == NULL))
508 			return (0);
509 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
510 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
511 		tcp_hlen = th->th_off << 2;
512 		eth_hdr_len += tcp_hlen;
513 		break;
514 	default:
515 		return (0);
516 	}
517 done:
518 	if (unlikely(pkt_hdr_len < eth_hdr_len))
519 		return (0);
520 
521 	/* Account for software inserted VLAN tag, if any. */
522 	if (unlikely(has_outer_vlan_tag)) {
523 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
524 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
525 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
526 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
527 	}
528 
529 	/*
530 	 * When inner checksums are set, outer L4 checksum flag must
531 	 * be disabled.
532 	 */
533 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
534 	    MLX5_ETH_WQE_L4_INNER_CSUM))
535 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
536 
537 	return (eth_hdr_len);
538 }
539 
540 struct mlx5_wqe_dump_seg {
541 	struct mlx5_wqe_ctrl_seg ctrl;
542 	struct mlx5_wqe_data_seg data;
543 } __aligned(MLX5_SEND_WQE_BB);
544 
545 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
546 
547 int
548 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
549 {
550 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
551 	struct mlx5_wqe_dump_seg *wqe;
552 	struct mlx5_wqe_dump_seg *wqe_last;
553 	int nsegs;
554 	int xsegs;
555 	u32 off;
556 	u32 msb;
557 	int err;
558 	int x;
559 	struct mbuf *mb;
560 	const u32 ds_cnt = 2;
561 	u16 pi;
562 	const u8 opcode = MLX5_OPCODE_DUMP;
563 
564 	/* get pointer to mbuf */
565 	mb = *mbp;
566 
567 	/* get producer index */
568 	pi = sq->pc & sq->wq.sz_m1;
569 
570 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
571 	sq->mbuf[pi].num_wqebbs = 0;
572 
573 	/* check number of segments in mbuf */
574 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
575 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
576 	if (err == EFBIG) {
577 		/* update statistics */
578 		sq->stats.defragged++;
579 		/* too many mbuf fragments */
580 		mb = m_defrag(*mbp, M_NOWAIT);
581 		if (mb == NULL) {
582 			mb = *mbp;
583 			goto tx_drop;
584 		}
585 		/* try again */
586 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
587 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
588 	}
589 
590 	if (err != 0)
591 		goto tx_drop;
592 
593 	/* make sure all mbuf data, if any, is visible to the bus */
594 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
595 	    BUS_DMASYNC_PREWRITE);
596 
597 	/* compute number of real DUMP segments */
598 	msb = sq->priv->params_ethtool.hw_mtu_msb;
599 	for (x = xsegs = 0; x != nsegs; x++)
600 		xsegs += howmany((u32)segs[x].ds_len, msb);
601 
602 	/* check if there are no segments */
603 	if (unlikely(xsegs == 0)) {
604 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
605 		m_freem(mb);
606 		*mbp = NULL;	/* safety clear */
607 		return (0);
608 	}
609 
610 	/* return ENOBUFS if the queue is full */
611 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
612 		sq->stats.enobuf++;
613 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
614 		m_freem(mb);
615 		*mbp = NULL;	/* safety clear */
616 		return (ENOBUFS);
617 	}
618 
619 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
620 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
621 
622 	for (x = 0; x != nsegs; x++) {
623 		for (off = 0; off < segs[x].ds_len; off += msb) {
624 			u32 len = segs[x].ds_len - off;
625 
626 			/* limit length */
627 			if (likely(len > msb))
628 				len = msb;
629 
630 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
631 
632 			/* fill control segment */
633 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
634 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
635 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
636 
637 			/* fill data segment */
638 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
639 			wqe->data.lkey = sq->mkey_be;
640 			wqe->data.byte_count = cpu_to_be32(len);
641 
642 			/* advance to next building block */
643 			if (unlikely(wqe == wqe_last))
644 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
645 			else
646 				wqe++;
647 
648 			sq->mbuf[pi].num_wqebbs++;
649 			sq->pc++;
650 		}
651 	}
652 
653 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
654 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
655 
656 	/* put in place data fence */
657 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
658 
659 	/* check if we should generate a completion event */
660 	if (mlx5e_do_send_cqe_inline(sq))
661 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
662 
663 	/* copy data for doorbell */
664 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
665 
666 	/* store pointer to mbuf */
667 	sq->mbuf[pi].mbuf = mb;
668 	sq->mbuf[pi].mst = m_snd_tag_ref(parg->mst);
669 
670 	/* count all traffic going out */
671 	sq->stats.packets++;
672 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
673 
674 	*mbp = NULL;	/* safety clear */
675 	return (0);
676 
677 tx_drop:
678 	sq->stats.dropped++;
679 	*mbp = NULL;
680 	m_freem(mb);
681 	return err;
682 }
683 
684 int
685 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
686 {
687 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
688 	struct mlx5e_xmit_args args = {};
689 	struct mlx5_wqe_data_seg *dseg;
690 	struct mlx5e_tx_wqe *wqe;
691 	if_t ifp;
692 	int nsegs;
693 	int err;
694 	int x;
695 	struct mbuf *mb;
696 	u16 ds_cnt;
697 	u16 pi;
698 	u8 opcode;
699 
700 #ifdef KERN_TLS
701 top:
702 #endif
703 	/* Return ENOBUFS if the queue is full */
704 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
705 		sq->stats.enobuf++;
706 		return (ENOBUFS);
707 	}
708 
709 	/* Align SQ edge with NOPs to avoid WQE wrap around */
710 	pi = ((~sq->pc) & sq->wq.sz_m1);
711 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
712 		/* Send one multi NOP message instead of many */
713 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
714 		pi = ((~sq->pc) & sq->wq.sz_m1);
715 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
716 			sq->stats.enobuf++;
717 			return (ENOMEM);
718 		}
719 	}
720 
721 #ifdef KERN_TLS
722 	/* Special handling for TLS packets, if any */
723 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
724 	case MLX5E_TLS_LOOP:
725 		goto top;
726 	case MLX5E_TLS_FAILURE:
727 		mb = *mbp;
728 		err = ENOMEM;
729 		goto tx_drop;
730 	case MLX5E_TLS_DEFERRED:
731 		return (0);
732 	case MLX5E_TLS_CONTINUE:
733 	default:
734 		break;
735 	}
736 #endif
737 
738 	/* Setup local variables */
739 	pi = sq->pc & sq->wq.sz_m1;
740 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
741 	ifp = sq->ifp;
742 
743 	memset(wqe, 0, sizeof(*wqe));
744 
745 	/* get pointer to mbuf */
746 	mb = *mbp;
747 
748 	mlx5e_accel_ipsec_handle_tx(mb, wqe);
749 
750 	/* Send a copy of the frame to the BPF listener, if any */
751 	if (ifp != NULL)
752 		ETHER_BPF_MTAP(ifp, mb);
753 
754 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
755 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
756 	}
757 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
758 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
759 	}
760 	if (wqe->eth.cs_flags == 0) {
761 		sq->stats.csum_offload_none++;
762 	}
763 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
764 		u32 payload_len;
765 		u32 mss = mb->m_pkthdr.tso_segsz;
766 		u32 num_pkts;
767 
768 		wqe->eth.mss = cpu_to_be16(mss);
769 		opcode = MLX5_OPCODE_LSO;
770 		if (args.ihs == 0)
771 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
772 		if (unlikely(args.ihs == 0)) {
773 			err = EINVAL;
774 			goto tx_drop;
775 		}
776 		payload_len = mb->m_pkthdr.len - args.ihs;
777 		if (payload_len == 0)
778 			num_pkts = 1;
779 		else
780 			num_pkts = DIV_ROUND_UP(payload_len, mss);
781 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
782 
783 
784 		sq->stats.tso_packets++;
785 		sq->stats.tso_bytes += payload_len;
786 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
787 		/* check for inner TCP TSO first */
788 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
789 		    CSUM_INNER_IP6_TSO)) {
790 			u32 payload_len;
791 			u32 mss = mb->m_pkthdr.tso_segsz;
792 			u32 num_pkts;
793 
794 			wqe->eth.mss = cpu_to_be16(mss);
795 			opcode = MLX5_OPCODE_LSO;
796 
797 			if (likely(args.ihs == 0)) {
798 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
799 				       MLX5_ETH_WQE_L3_INNER_CSUM |
800 				       MLX5_ETH_WQE_L4_INNER_CSUM |
801 				       MLX5_ETH_WQE_L4_CSUM |
802 				       MLX5_ETH_WQE_L3_CSUM,
803 				       opcode);
804 				if (unlikely(args.ihs == 0)) {
805 					err = EINVAL;
806 					goto tx_drop;
807 				}
808 			}
809 
810 			payload_len = mb->m_pkthdr.len - args.ihs;
811 			if (payload_len == 0)
812 				num_pkts = 1;
813 			else
814 				num_pkts = DIV_ROUND_UP(payload_len, mss);
815 			sq->mbuf[pi].num_bytes = payload_len +
816 			    num_pkts * args.ihs;
817 
818 			sq->stats.tso_packets++;
819 			sq->stats.tso_bytes += payload_len;
820 		} else {
821 			opcode = MLX5_OPCODE_SEND;
822 
823 			if (likely(args.ihs == 0)) {
824 				uint8_t cs_mask;
825 
826 				if (mb->m_pkthdr.csum_flags &
827 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
828 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
829 					cs_mask =
830 					    MLX5_ETH_WQE_L3_INNER_CSUM |
831 					    MLX5_ETH_WQE_L4_INNER_CSUM |
832 					    MLX5_ETH_WQE_L4_CSUM |
833 					    MLX5_ETH_WQE_L3_CSUM;
834 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
835 					cs_mask =
836 					    MLX5_ETH_WQE_L3_INNER_CSUM |
837 					    MLX5_ETH_WQE_L4_CSUM |
838 					    MLX5_ETH_WQE_L3_CSUM;
839 				} else {
840 					cs_mask =
841 					    MLX5_ETH_WQE_L4_CSUM |
842 					    MLX5_ETH_WQE_L3_CSUM;
843 				}
844 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
845 				    cs_mask, opcode);
846 				if (unlikely(args.ihs == 0)) {
847 					err = EINVAL;
848 					goto tx_drop;
849 				}
850 			}
851 
852 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
853 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
854 		}
855 	} else {
856 		opcode = MLX5_OPCODE_SEND;
857 
858 		if (args.ihs == 0) {
859 			switch (sq->min_inline_mode) {
860 			case MLX5_INLINE_MODE_IP:
861 			case MLX5_INLINE_MODE_TCP_UDP:
862 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
863 				if (unlikely(args.ihs == 0))
864 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
865 				break;
866 			case MLX5_INLINE_MODE_L2:
867 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
868 				break;
869 			case MLX5_INLINE_MODE_NONE:
870 				/* FALLTHROUGH */
871 			default:
872 				if ((mb->m_flags & M_VLANTAG) != 0 &&
873 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
874 					/* inlining VLAN data is not required */
875 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
876 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
877 					args.ihs = 0;
878 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
879 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
880 					/* inlining non-VLAN data is not required */
881 					args.ihs = 0;
882 				} else {
883 					/* we are forced to inlining L2 header, if any */
884 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
885 				}
886 				break;
887 			}
888 		}
889 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
890 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
891 	}
892 
893 	if (likely(args.ihs == 0)) {
894 		/* nothing to inline */
895 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
896 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
897 		    wqe->eth.inline_hdr_start;
898 
899 		/* Range checks */
900 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
901 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
902 				err = EINVAL;
903 				goto tx_drop;
904 			}
905 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
906 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
907 			err = EINVAL;
908 			goto tx_drop;
909 		}
910 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
911 		m_adj(mb, ETHER_HDR_LEN);
912 		/* Insert 4 bytes VLAN tag into data stream */
913 		eh->evl_proto = eh->evl_encap_proto;
914 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
915 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
916 		/* Copy rest of header data, if any */
917 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
918 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
919 		/* Extend header by 4 bytes */
920 		args.ihs += ETHER_VLAN_ENCAP_LEN;
921 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
922 	} else {
923 		/* check if inline header size is too big */
924 		if (unlikely(args.ihs > sq->max_inline)) {
925 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
926 			    CSUM_ENCAP_VXLAN))) {
927 				err = EINVAL;
928 				goto tx_drop;
929 			}
930 			args.ihs = sq->max_inline;
931 		}
932 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
933 		m_adj(mb, args.ihs);
934 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
935 	}
936 
937 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
938 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
939 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
940 		    MLX5_SEND_WQE_DS);
941 	}
942 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
943 
944 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
945 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
946 	if (err == EFBIG) {
947 		/* Update statistics */
948 		sq->stats.defragged++;
949 		/* Too many mbuf fragments */
950 		mb = m_defrag(*mbp, M_NOWAIT);
951 		if (mb == NULL) {
952 			mb = *mbp;
953 			goto tx_drop;
954 		}
955 		/* Try again */
956 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
957 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
958 	}
959 	/* Catch errors */
960 	if (err != 0)
961 		goto tx_drop;
962 
963 	/* Make sure all mbuf data, if any, is visible to the bus */
964 	if (nsegs != 0) {
965 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
966 		    BUS_DMASYNC_PREWRITE);
967 	} else {
968 		/* All data was inlined, free the mbuf. */
969 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
970 		m_freem(mb);
971 		mb = NULL;
972 	}
973 
974 	for (x = 0; x != nsegs; x++) {
975 		if (segs[x].ds_len == 0)
976 			continue;
977 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
978 		dseg->lkey = sq->mkey_be;
979 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
980 		dseg++;
981 	}
982 
983 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
984 
985 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
986 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
987 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
988 
989 	if (mlx5e_do_send_cqe_inline(sq))
990 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
991 	else
992 		wqe->ctrl.fm_ce_se = 0;
993 
994 	/* Copy data for doorbell */
995 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
996 
997 	/* Store pointer to mbuf */
998 	sq->mbuf[pi].mbuf = mb;
999 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
1000 	if (unlikely(args.mst != NULL))
1001 		sq->mbuf[pi].mst = m_snd_tag_ref(args.mst);
1002 	else
1003 		MPASS(sq->mbuf[pi].mst == NULL);
1004 
1005 	sq->pc += sq->mbuf[pi].num_wqebbs;
1006 
1007 	/* Count all traffic going out */
1008 	sq->stats.packets++;
1009 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1010 
1011 	*mbp = NULL;	/* safety clear */
1012 	return (0);
1013 
1014 tx_drop:
1015 	sq->stats.dropped++;
1016 	*mbp = NULL;
1017 	m_freem(mb);
1018 	return err;
1019 }
1020 
1021 static void
1022 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1023 {
1024 	u16 sqcc;
1025 
1026 	/*
1027 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1028 	 * otherwise a cq overrun may occur
1029 	 */
1030 	sqcc = sq->cc;
1031 
1032 	while (budget > 0) {
1033 		struct mlx5_cqe64 *cqe;
1034 		struct m_snd_tag *mst;
1035 		struct mbuf *mb;
1036 		bool match;
1037 		u16 sqcc_this;
1038 		u16 delta;
1039 		u16 x;
1040 		u16 ci;
1041 
1042 		cqe = mlx5e_get_cqe(&sq->cq);
1043 		if (!cqe)
1044 			break;
1045 
1046 		mlx5_cqwq_pop(&sq->cq.wq);
1047 
1048 		/* check if the completion event indicates an error */
1049 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
1050 			mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
1051 			sq->stats.cqe_err++;
1052 		}
1053 
1054 		/* setup local variables */
1055 		sqcc_this = be16toh(cqe->wqe_counter);
1056 		match = false;
1057 
1058 		/* update budget according to the event factor */
1059 		budget -= sq->cev_factor;
1060 
1061 		for (x = 0;; x++) {
1062 			if (unlikely(match != false)) {
1063 				break;
1064 			} else if (unlikely(x == sq->cev_factor)) {
1065 				/* WQE counter match not found */
1066 				sq->stats.cqe_err++;
1067 				break;
1068 			}
1069 			ci = sqcc & sq->wq.sz_m1;
1070 			delta = sqcc_this - sqcc;
1071 			match = (delta < sq->mbuf[ci].num_wqebbs);
1072 			mb = sq->mbuf[ci].mbuf;
1073 			sq->mbuf[ci].mbuf = NULL;
1074 			mst = sq->mbuf[ci].mst;
1075 			sq->mbuf[ci].mst = NULL;
1076 
1077 			if (unlikely(mb == NULL)) {
1078 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1079 					sq->stats.nop++;
1080 			} else {
1081 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1082 				    BUS_DMASYNC_POSTWRITE);
1083 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1084 
1085 				/* Free transmitted mbuf */
1086 				m_freem(mb);
1087 			}
1088 
1089 			if (unlikely(mst != NULL))
1090 				m_snd_tag_rele(mst);
1091 
1092 			sqcc += sq->mbuf[ci].num_wqebbs;
1093 		}
1094 	}
1095 
1096 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1097 
1098 	/* Ensure cq space is freed before enabling more cqes */
1099 	atomic_thread_fence_rel();
1100 
1101 	sq->cc = sqcc;
1102 }
1103 
1104 static int
1105 mlx5e_xmit_locked(if_t ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1106 {
1107 	int err = 0;
1108 
1109 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
1110 	    READ_ONCE(sq->running) == 0)) {
1111 		m_freem(mb);
1112 		return (ENETDOWN);
1113 	}
1114 
1115 	/* Do transmit */
1116 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1117 		/* NOTE: m_freem() is NULL safe */
1118 		m_freem(mb);
1119 		err = ENOBUFS;
1120 	}
1121 
1122 	/* Write the doorbell record, if any. */
1123 	mlx5e_tx_notify_hw(sq, false);
1124 
1125 	/*
1126 	 * Check if we need to start the event timer which flushes the
1127 	 * transmit ring on timeout:
1128 	 */
1129 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1130 	    sq->cev_factor != 1)) {
1131 		/* start the timer */
1132 		mlx5e_sq_cev_timeout(sq);
1133 	} else {
1134 		/* don't send NOPs yet */
1135 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1136 	}
1137 	return (err);
1138 }
1139 
1140 int
1141 mlx5e_xmit(if_t ifp, struct mbuf *mb)
1142 {
1143 	struct mlx5e_sq *sq;
1144 	int ret;
1145 
1146 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1147 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1148 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1149 		if (unlikely(sq == NULL)) {
1150 			goto select_queue;
1151 		}
1152 	} else {
1153 select_queue:
1154 		sq = mlx5e_select_queue(ifp, mb);
1155 		if (unlikely(sq == NULL)) {
1156 			/* Free mbuf */
1157 			m_freem(mb);
1158 
1159 			/* Invalid send queue */
1160 			return (ENXIO);
1161 		}
1162 	}
1163 
1164 	mtx_lock(&sq->lock);
1165 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1166 	mtx_unlock(&sq->lock);
1167 
1168 	return (ret);
1169 }
1170 
1171 void
1172 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1173 {
1174 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1175 
1176 	mtx_lock(&sq->comp_lock);
1177 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1178 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1179 	mtx_unlock(&sq->comp_lock);
1180 }
1181