xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 3332f1b444d4a73238e9f59cca27bfc95fe936bd)
1 /*-
2  * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_kern_tls.h"
29 
30 #include "en.h"
31 #include <machine/atomic.h>
32 
33 static inline bool
34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
35 {
36 	sq->cev_counter++;
37 	/* interleave the CQEs */
38 	if (sq->cev_counter >= sq->cev_factor) {
39 		sq->cev_counter = 0;
40 		return (true);
41 	}
42 	return (false);
43 }
44 
45 bool
46 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
47 {
48 
49 	return (mlx5e_do_send_cqe_inline(sq));
50 }
51 
52 void
53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
54 {
55 	u16 pi = sq->pc & sq->wq.sz_m1;
56 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
57 
58 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
59 
60 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
61 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
62 	if (mlx5e_do_send_cqe_inline(sq))
63 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
64 	else
65 		wqe->ctrl.fm_ce_se = 0;
66 
67 	/* Copy data for doorbell */
68 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
69 
70 	sq->mbuf[pi].mbuf = NULL;
71 	sq->mbuf[pi].num_bytes = 0;
72 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
73 	sq->pc += sq->mbuf[pi].num_wqebbs;
74 }
75 
76 #if (__FreeBSD_version >= 1100000)
77 static uint32_t mlx5e_hash_value;
78 
79 static void
80 mlx5e_hash_init(void *arg)
81 {
82 	mlx5e_hash_value = m_ether_tcpip_hash_init();
83 }
84 
85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
87 #endif
88 
89 static struct mlx5e_sq *
90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb)
91 {
92 	struct m_snd_tag *mb_tag;
93 	struct mlx5e_sq *sq;
94 
95 	mb_tag = mb->m_pkthdr.snd_tag;
96 
97 #ifdef KERN_TLS
98 top:
99 #endif
100 	/* get pointer to sendqueue */
101 	switch (mb_tag->sw->type) {
102 #ifdef RATELIMIT
103 	case IF_SND_TAG_TYPE_RATE_LIMIT:
104 		sq = container_of(mb_tag,
105 		    struct mlx5e_rl_channel, tag)->sq;
106 		break;
107 #ifdef KERN_TLS
108 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
109 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
110 		goto top;
111 #endif
112 #endif
113 	case IF_SND_TAG_TYPE_UNLIMITED:
114 		sq = &container_of(mb_tag,
115 		    struct mlx5e_channel, tag)->sq[0];
116 		KASSERT((mb_tag->refcount > 0),
117 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
118 		break;
119 #ifdef KERN_TLS
120 	case IF_SND_TAG_TYPE_TLS:
121 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
122 		goto top;
123 #endif
124 	default:
125 		sq = NULL;
126 		break;
127 	}
128 
129 	/* check if valid */
130 	if (sq != NULL && READ_ONCE(sq->running) != 0)
131 		return (sq);
132 
133 	return (NULL);
134 }
135 
136 static struct mlx5e_sq *
137 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
138 {
139 	struct mlx5e_priv *priv = ifp->if_softc;
140 	struct mlx5e_sq *sq;
141 	u32 ch;
142 	u32 tc;
143 
144 	/* obtain VLAN information if present */
145 	if (mb->m_flags & M_VLANTAG) {
146 		tc = (mb->m_pkthdr.ether_vtag >> 13);
147 		if (tc >= priv->num_tc)
148 			tc = priv->default_vlan_prio;
149 	} else {
150 		tc = priv->default_vlan_prio;
151 	}
152 
153 	ch = priv->params.num_channels;
154 
155 	/* check if flowid is set */
156 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
157 #ifdef RSS
158 		u32 temp;
159 
160 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
161 		    M_HASHTYPE_GET(mb), &temp) == 0)
162 			ch = temp % ch;
163 		else
164 #endif
165 			ch = (mb->m_pkthdr.flowid % 128) % ch;
166 	} else {
167 #if (__FreeBSD_version >= 1100000)
168 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
169 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
170 #else
171 		/*
172 		 * m_ether_tcpip_hash not present in stable, so just
173 		 * throw unhashed mbufs on queue 0
174 		 */
175 		ch = 0;
176 #endif
177 	}
178 
179 	/* check if send queue is running */
180 	sq = &priv->channel[ch].sq[tc];
181 	if (likely(READ_ONCE(sq->running) != 0))
182 		return (sq);
183 	return (NULL);
184 }
185 
186 static inline u16
187 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
188 {
189 	struct ether_vlan_header *eh;
190 	uint16_t eth_type;
191 	int min_inline;
192 
193 	eh = mtod(mb, struct ether_vlan_header *);
194 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
195 		goto max_inline;
196 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
197 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
198 			goto max_inline;
199 		eth_type = ntohs(eh->evl_proto);
200 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
201 	} else {
202 		eth_type = ntohs(eh->evl_encap_proto);
203 		min_inline = ETHER_HDR_LEN;
204 	}
205 
206 	switch (eth_type) {
207 	case ETHERTYPE_IP:
208 	case ETHERTYPE_IPV6:
209 		/*
210 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
211 		 * field gets inlined. Else the SQ may stall.
212 		 */
213 		min_inline += 4;
214 		break;
215 	default:
216 		goto max_inline;
217 	}
218 
219 	/*
220 	 * m_copydata() will be used on the remaining header which
221 	 * does not need to reside within the first m_len bytes of
222 	 * data:
223 	 */
224 	if (mb->m_pkthdr.len < min_inline)
225 		goto max_inline;
226 	return (min_inline);
227 
228 max_inline:
229 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
230 }
231 
232 /*
233  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
234  * headers.
235  *
236  * Upon return the pointer at which the "ppth" argument points, is set
237  * to the location of the TCP header. NULL is used if no TCP header is
238  * present.
239  *
240  * The return value indicates the number of bytes from the beginning
241  * of the packet until the first byte after the TCP or UDP header. If
242  * this function returns zero, the parsing failed.
243  */
244 int
245 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
246 {
247 	const struct ether_vlan_header *eh;
248 	const struct tcphdr *th;
249 	const struct ip *ip;
250 	int ip_hlen, tcp_hlen;
251 	const struct ip6_hdr *ip6;
252 	uint16_t eth_type;
253 	int eth_hdr_len;
254 
255 	eh = mtod(mb, const struct ether_vlan_header *);
256 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
257 		goto failure;
258 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
259 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
260 			goto failure;
261 		eth_type = ntohs(eh->evl_proto);
262 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
263 	} else {
264 		eth_type = ntohs(eh->evl_encap_proto);
265 		eth_hdr_len = ETHER_HDR_LEN;
266 	}
267 
268 	switch (eth_type) {
269 	case ETHERTYPE_IP:
270 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
271 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
272 			goto failure;
273 		switch (ip->ip_p) {
274 		case IPPROTO_TCP:
275 			ip_hlen = ip->ip_hl << 2;
276 			eth_hdr_len += ip_hlen;
277 			goto tcp_packet;
278 		case IPPROTO_UDP:
279 			ip_hlen = ip->ip_hl << 2;
280 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
281 			th = NULL;
282 			goto udp_packet;
283 		default:
284 			goto failure;
285 		}
286 		break;
287 	case ETHERTYPE_IPV6:
288 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
289 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
290 			goto failure;
291 		switch (ip6->ip6_nxt) {
292 		case IPPROTO_TCP:
293 			eth_hdr_len += sizeof(*ip6);
294 			goto tcp_packet;
295 		case IPPROTO_UDP:
296 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
297 			th = NULL;
298 			goto udp_packet;
299 		default:
300 			goto failure;
301 		}
302 		break;
303 	default:
304 		goto failure;
305 	}
306 tcp_packet:
307 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
308 		const struct mbuf *m_th = mb->m_next;
309 		if (unlikely(mb->m_len != eth_hdr_len ||
310 		    m_th == NULL || m_th->m_len < sizeof(*th)))
311 			goto failure;
312 		th = (const struct tcphdr *)(m_th->m_data);
313 	} else {
314 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
315 	}
316 	tcp_hlen = th->th_off << 2;
317 	eth_hdr_len += tcp_hlen;
318 udp_packet:
319 	/*
320 	 * m_copydata() will be used on the remaining header which
321 	 * does not need to reside within the first m_len bytes of
322 	 * data:
323 	 */
324 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
325 		goto failure;
326 	if (ppth != NULL)
327 		*ppth = th;
328 	return (eth_hdr_len);
329 failure:
330 	if (ppth != NULL)
331 		*ppth = NULL;
332 	return (0);
333 }
334 
335 /*
336  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
337  */
338 static inline void *
339 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
340     int min_len)
341 {
342 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
343 		poffset[0] = eth_hdr_len;
344 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
345 			return (NULL);
346 	}
347 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
348 		return (NULL);
349 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
350 }
351 
352 /*
353  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
354  * and TCP headers.
355  *
356  * The return value indicates the number of bytes from the beginning
357  * of the packet until the first byte after the TCP header. If this
358  * function returns zero, the parsing failed.
359  */
360 static int
361 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
362     uint8_t cs_mask, uint8_t opcode)
363 {
364 	const struct ether_vlan_header *eh;
365 	struct ip *ip4;
366 	struct ip6_hdr *ip6;
367 	struct tcphdr *th;
368 	struct udphdr *udp;
369 	bool has_outer_vlan_tag;
370 	uint16_t eth_type;
371 	uint8_t ip_type;
372 	int pkt_hdr_len;
373 	int eth_hdr_len;
374 	int tcp_hlen;
375 	int ip_hlen;
376 	int offset;
377 
378 	pkt_hdr_len = mb->m_pkthdr.len;
379 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
380 	offset = 0;
381 
382 	eh = mtod(mb, const struct ether_vlan_header *);
383 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
384 		return (0);
385 
386 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
387 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
388 			return (0);
389 		eth_type = eh->evl_proto;
390 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
391 	} else {
392 		eth_type = eh->evl_encap_proto;
393 		eth_hdr_len = ETHER_HDR_LEN;
394 	}
395 
396 	switch (eth_type) {
397 	case htons(ETHERTYPE_IP):
398 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
399 		    sizeof(*ip4));
400 		if (unlikely(ip4 == NULL))
401 			return (0);
402 		ip_type = ip4->ip_p;
403 		if (unlikely(ip_type != IPPROTO_UDP))
404 			return (0);
405 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
406 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
407 		ip_hlen = ip4->ip_hl << 2;
408 		eth_hdr_len += ip_hlen;
409 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
410 		    sizeof(*udp));
411 		if (unlikely(udp == NULL))
412 			return (0);
413 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
414 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
415 		eth_hdr_len += sizeof(*udp);
416 		break;
417 	case htons(ETHERTYPE_IPV6):
418 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
419 		    sizeof(*ip6));
420 		if (unlikely(ip6 == NULL))
421 			return (0);
422 		ip_type = ip6->ip6_nxt;
423 		if (unlikely(ip_type != IPPROTO_UDP))
424 			return (0);
425 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
426 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
427 		eth_hdr_len += sizeof(*ip6);
428 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
429 		    sizeof(*udp));
430 		if (unlikely(udp == NULL))
431 			return (0);
432 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
433 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
434 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
435 		eth_hdr_len += sizeof(*udp);
436 		break;
437 	default:
438 		return (0);
439 	}
440 
441 	/*
442 	 * If the hardware is not computing inner IP checksum, then
443 	 * skip inlining the inner outer UDP and VXLAN header:
444 	 */
445 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
446 		goto done;
447 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
448 	    8) == NULL))
449 		return (0);
450 	eth_hdr_len += 8;
451 
452 	/* Check for ethernet header again. */
453 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
454 	if (unlikely(eh == NULL))
455 		return (0);
456 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
457 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
458 		    ETHER_VLAN_ENCAP_LEN))
459 			return (0);
460 		eth_type = eh->evl_proto;
461 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
462 	} else {
463 		eth_type = eh->evl_encap_proto;
464 		eth_hdr_len += ETHER_HDR_LEN;
465 	}
466 
467 	/* Check for IP header again. */
468 	switch (eth_type) {
469 	case htons(ETHERTYPE_IP):
470 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
471 		    sizeof(*ip4));
472 		if (unlikely(ip4 == NULL))
473 			return (0);
474 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
475 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
476 		ip_type = ip4->ip_p;
477 		ip_hlen = ip4->ip_hl << 2;
478 		eth_hdr_len += ip_hlen;
479 		break;
480 	case htons(ETHERTYPE_IPV6):
481 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
482 		    sizeof(*ip6));
483 		if (unlikely(ip6 == NULL))
484 			return (0);
485 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
486 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
487 		ip_type = ip6->ip6_nxt;
488 		eth_hdr_len += sizeof(*ip6);
489 		break;
490 	default:
491 		return (0);
492 	}
493 
494 	/*
495 	 * If the hardware is not computing inner UDP/TCP checksum,
496 	 * then skip inlining the inner UDP/TCP header:
497 	 */
498 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
499 		goto done;
500 
501 	switch (ip_type) {
502 	case IPPROTO_UDP:
503 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
504 		    sizeof(*udp));
505 		if (unlikely(udp == NULL))
506 			return (0);
507 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
508 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
509 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
510 		eth_hdr_len += sizeof(*udp);
511 		break;
512 	case IPPROTO_TCP:
513 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
514 		    sizeof(*th));
515 		if (unlikely(th == NULL))
516 			return (0);
517 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
518 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
519 		tcp_hlen = th->th_off << 2;
520 		eth_hdr_len += tcp_hlen;
521 		break;
522 	default:
523 		return (0);
524 	}
525 done:
526 	if (unlikely(pkt_hdr_len < eth_hdr_len))
527 		return (0);
528 
529 	/* Account for software inserted VLAN tag, if any. */
530 	if (unlikely(has_outer_vlan_tag)) {
531 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
532 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
533 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
534 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
535 	}
536 
537 	/*
538 	 * When inner checksums are set, outer L4 checksum flag must
539 	 * be disabled.
540 	 */
541 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
542 	    MLX5_ETH_WQE_L4_INNER_CSUM))
543 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
544 
545 	return (eth_hdr_len);
546 }
547 
548 struct mlx5_wqe_dump_seg {
549 	struct mlx5_wqe_ctrl_seg ctrl;
550 	struct mlx5_wqe_data_seg data;
551 } __aligned(MLX5_SEND_WQE_BB);
552 
553 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
554 
555 int
556 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
557 {
558 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
559 	struct mlx5_wqe_dump_seg *wqe;
560 	struct mlx5_wqe_dump_seg *wqe_last;
561 	int nsegs;
562 	int xsegs;
563 	u32 off;
564 	u32 msb;
565 	int err;
566 	int x;
567 	struct mbuf *mb;
568 	const u32 ds_cnt = 2;
569 	u16 pi;
570 	const u8 opcode = MLX5_OPCODE_DUMP;
571 
572 	/* get pointer to mbuf */
573 	mb = *mbp;
574 
575 	/* get producer index */
576 	pi = sq->pc & sq->wq.sz_m1;
577 
578 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
579 	sq->mbuf[pi].num_wqebbs = 0;
580 
581 	/* check number of segments in mbuf */
582 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
583 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
584 	if (err == EFBIG) {
585 		/* update statistics */
586 		sq->stats.defragged++;
587 		/* too many mbuf fragments */
588 		mb = m_defrag(*mbp, M_NOWAIT);
589 		if (mb == NULL) {
590 			mb = *mbp;
591 			goto tx_drop;
592 		}
593 		/* try again */
594 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
595 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
596 	}
597 
598 	if (err != 0)
599 		goto tx_drop;
600 
601 	/* make sure all mbuf data, if any, is visible to the bus */
602 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
603 	    BUS_DMASYNC_PREWRITE);
604 
605 	/* compute number of real DUMP segments */
606 	msb = sq->priv->params_ethtool.hw_mtu_msb;
607 	for (x = xsegs = 0; x != nsegs; x++)
608 		xsegs += howmany((u32)segs[x].ds_len, msb);
609 
610 	/* check if there are no segments */
611 	if (unlikely(xsegs == 0)) {
612 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
613 		m_freem(mb);
614 		*mbp = NULL;	/* safety clear */
615 		return (0);
616 	}
617 
618 	/* return ENOBUFS if the queue is full */
619 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
620 		sq->stats.enobuf++;
621 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
622 		m_freem(mb);
623 		*mbp = NULL;	/* safety clear */
624 		return (ENOBUFS);
625 	}
626 
627 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
628 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
629 
630 	for (x = 0; x != nsegs; x++) {
631 		for (off = 0; off < segs[x].ds_len; off += msb) {
632 			u32 len = segs[x].ds_len - off;
633 
634 			/* limit length */
635 			if (likely(len > msb))
636 				len = msb;
637 
638 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
639 
640 			/* fill control segment */
641 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
642 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
643 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
644 
645 			/* fill data segment */
646 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
647 			wqe->data.lkey = sq->mkey_be;
648 			wqe->data.byte_count = cpu_to_be32(len);
649 
650 			/* advance to next building block */
651 			if (unlikely(wqe == wqe_last))
652 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
653 			else
654 				wqe++;
655 
656 			sq->mbuf[pi].num_wqebbs++;
657 			sq->pc++;
658 		}
659 	}
660 
661 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
662 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
663 
664 	/* put in place data fence */
665 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
666 
667 	/* check if we should generate a completion event */
668 	if (mlx5e_do_send_cqe_inline(sq))
669 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
670 
671 	/* copy data for doorbell */
672 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
673 
674 	/* store pointer to mbuf */
675 	sq->mbuf[pi].mbuf = mb;
676 	sq->mbuf[pi].p_refcount = parg->pref;
677 	atomic_add_int(parg->pref, 1);
678 
679 	/* count all traffic going out */
680 	sq->stats.packets++;
681 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
682 
683 	*mbp = NULL;	/* safety clear */
684 	return (0);
685 
686 tx_drop:
687 	sq->stats.dropped++;
688 	*mbp = NULL;
689 	m_freem(mb);
690 	return err;
691 }
692 
693 int
694 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
695 {
696 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
697 	struct mlx5e_xmit_args args = {};
698 	struct mlx5_wqe_data_seg *dseg;
699 	struct mlx5e_tx_wqe *wqe;
700 	struct ifnet *ifp;
701 	int nsegs;
702 	int err;
703 	int x;
704 	struct mbuf *mb;
705 	u16 ds_cnt;
706 	u16 pi;
707 	u8 opcode;
708 
709 #ifdef KERN_TLS
710 top:
711 #endif
712 	/* Return ENOBUFS if the queue is full */
713 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
714 		sq->stats.enobuf++;
715 		return (ENOBUFS);
716 	}
717 
718 	/* Align SQ edge with NOPs to avoid WQE wrap around */
719 	pi = ((~sq->pc) & sq->wq.sz_m1);
720 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
721 		/* Send one multi NOP message instead of many */
722 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
723 		pi = ((~sq->pc) & sq->wq.sz_m1);
724 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
725 			sq->stats.enobuf++;
726 			return (ENOMEM);
727 		}
728 	}
729 
730 #ifdef KERN_TLS
731 	/* Special handling for TLS packets, if any */
732 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
733 	case MLX5E_TLS_LOOP:
734 		goto top;
735 	case MLX5E_TLS_FAILURE:
736 		mb = *mbp;
737 		err = ENOMEM;
738 		goto tx_drop;
739 	case MLX5E_TLS_DEFERRED:
740 		return (0);
741 	case MLX5E_TLS_CONTINUE:
742 	default:
743 		break;
744 	}
745 #endif
746 
747 	/* Setup local variables */
748 	pi = sq->pc & sq->wq.sz_m1;
749 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
750 	ifp = sq->ifp;
751 
752 	memset(wqe, 0, sizeof(*wqe));
753 
754 	/* get pointer to mbuf */
755 	mb = *mbp;
756 
757 	/* Send a copy of the frame to the BPF listener, if any */
758 	if (ifp != NULL && ifp->if_bpf != NULL)
759 		ETHER_BPF_MTAP(ifp, mb);
760 
761 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
762 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
763 	}
764 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
765 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
766 	}
767 	if (wqe->eth.cs_flags == 0) {
768 		sq->stats.csum_offload_none++;
769 	}
770 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
771 		u32 payload_len;
772 		u32 mss = mb->m_pkthdr.tso_segsz;
773 		u32 num_pkts;
774 
775 		wqe->eth.mss = cpu_to_be16(mss);
776 		opcode = MLX5_OPCODE_LSO;
777 		if (args.ihs == 0)
778 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
779 		if (unlikely(args.ihs == 0)) {
780 			err = EINVAL;
781 			goto tx_drop;
782 		}
783 		payload_len = mb->m_pkthdr.len - args.ihs;
784 		if (payload_len == 0)
785 			num_pkts = 1;
786 		else
787 			num_pkts = DIV_ROUND_UP(payload_len, mss);
788 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
789 
790 
791 		sq->stats.tso_packets++;
792 		sq->stats.tso_bytes += payload_len;
793 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
794 		/* check for inner TCP TSO first */
795 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
796 		    CSUM_INNER_IP6_TSO)) {
797 			u32 payload_len;
798 			u32 mss = mb->m_pkthdr.tso_segsz;
799 			u32 num_pkts;
800 
801 			wqe->eth.mss = cpu_to_be16(mss);
802 			opcode = MLX5_OPCODE_LSO;
803 
804 			if (likely(args.ihs == 0)) {
805 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
806 				       MLX5_ETH_WQE_L3_INNER_CSUM |
807 				       MLX5_ETH_WQE_L4_INNER_CSUM |
808 				       MLX5_ETH_WQE_L4_CSUM |
809 				       MLX5_ETH_WQE_L3_CSUM,
810 				       opcode);
811 				if (unlikely(args.ihs == 0)) {
812 					err = EINVAL;
813 					goto tx_drop;
814 				}
815 			}
816 
817 			payload_len = mb->m_pkthdr.len - args.ihs;
818 			if (payload_len == 0)
819 				num_pkts = 1;
820 			else
821 				num_pkts = DIV_ROUND_UP(payload_len, mss);
822 			sq->mbuf[pi].num_bytes = payload_len +
823 			    num_pkts * args.ihs;
824 
825 			sq->stats.tso_packets++;
826 			sq->stats.tso_bytes += payload_len;
827 		} else {
828 			opcode = MLX5_OPCODE_SEND;
829 
830 			if (likely(args.ihs == 0)) {
831 				uint8_t cs_mask;
832 
833 				if (mb->m_pkthdr.csum_flags &
834 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
835 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
836 					cs_mask =
837 					    MLX5_ETH_WQE_L3_INNER_CSUM |
838 					    MLX5_ETH_WQE_L4_INNER_CSUM |
839 					    MLX5_ETH_WQE_L4_CSUM |
840 					    MLX5_ETH_WQE_L3_CSUM;
841 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
842 					cs_mask =
843 					    MLX5_ETH_WQE_L3_INNER_CSUM |
844 					    MLX5_ETH_WQE_L4_CSUM |
845 					    MLX5_ETH_WQE_L3_CSUM;
846 				} else {
847 					cs_mask =
848 					    MLX5_ETH_WQE_L4_CSUM |
849 					    MLX5_ETH_WQE_L3_CSUM;
850 				}
851 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
852 				    cs_mask, opcode);
853 				if (unlikely(args.ihs == 0)) {
854 					err = EINVAL;
855 					goto tx_drop;
856 				}
857 			}
858 
859 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
860 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
861 		}
862 	} else {
863 		opcode = MLX5_OPCODE_SEND;
864 
865 		if (args.ihs == 0) {
866 			switch (sq->min_inline_mode) {
867 			case MLX5_INLINE_MODE_IP:
868 			case MLX5_INLINE_MODE_TCP_UDP:
869 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
870 				if (unlikely(args.ihs == 0))
871 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
872 				break;
873 			case MLX5_INLINE_MODE_L2:
874 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
875 				break;
876 			case MLX5_INLINE_MODE_NONE:
877 				/* FALLTHROUGH */
878 			default:
879 				if ((mb->m_flags & M_VLANTAG) != 0 &&
880 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
881 					/* inlining VLAN data is not required */
882 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
883 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
884 					args.ihs = 0;
885 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
886 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
887 					/* inlining non-VLAN data is not required */
888 					args.ihs = 0;
889 				} else {
890 					/* we are forced to inlining L2 header, if any */
891 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
892 				}
893 				break;
894 			}
895 		}
896 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
897 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
898 	}
899 
900 	if (likely(args.ihs == 0)) {
901 		/* nothing to inline */
902 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
903 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
904 		    wqe->eth.inline_hdr_start;
905 
906 		/* Range checks */
907 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
908 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
909 				err = EINVAL;
910 				goto tx_drop;
911 			}
912 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
913 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
914 			err = EINVAL;
915 			goto tx_drop;
916 		}
917 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
918 		m_adj(mb, ETHER_HDR_LEN);
919 		/* Insert 4 bytes VLAN tag into data stream */
920 		eh->evl_proto = eh->evl_encap_proto;
921 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
922 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
923 		/* Copy rest of header data, if any */
924 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
925 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
926 		/* Extend header by 4 bytes */
927 		args.ihs += ETHER_VLAN_ENCAP_LEN;
928 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
929 	} else {
930 		/* check if inline header size is too big */
931 		if (unlikely(args.ihs > sq->max_inline)) {
932 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
933 			    CSUM_ENCAP_VXLAN))) {
934 				err = EINVAL;
935 				goto tx_drop;
936 			}
937 			args.ihs = sq->max_inline;
938 		}
939 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
940 		m_adj(mb, args.ihs);
941 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
942 	}
943 
944 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
945 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
946 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
947 		    MLX5_SEND_WQE_DS);
948 	}
949 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
950 
951 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
952 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
953 	if (err == EFBIG) {
954 		/* Update statistics */
955 		sq->stats.defragged++;
956 		/* Too many mbuf fragments */
957 		mb = m_defrag(*mbp, M_NOWAIT);
958 		if (mb == NULL) {
959 			mb = *mbp;
960 			goto tx_drop;
961 		}
962 		/* Try again */
963 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
964 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
965 	}
966 	/* Catch errors */
967 	if (err != 0)
968 		goto tx_drop;
969 
970 	/* Make sure all mbuf data, if any, is visible to the bus */
971 	if (nsegs != 0) {
972 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
973 		    BUS_DMASYNC_PREWRITE);
974 	} else {
975 		/* All data was inlined, free the mbuf. */
976 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
977 		m_freem(mb);
978 		mb = NULL;
979 	}
980 
981 	for (x = 0; x != nsegs; x++) {
982 		if (segs[x].ds_len == 0)
983 			continue;
984 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
985 		dseg->lkey = sq->mkey_be;
986 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
987 		dseg++;
988 	}
989 
990 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
991 
992 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
993 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
994 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
995 
996 	if (mlx5e_do_send_cqe_inline(sq))
997 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
998 	else
999 		wqe->ctrl.fm_ce_se = 0;
1000 
1001 	/* Copy data for doorbell */
1002 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
1003 
1004 	/* Store pointer to mbuf */
1005 	sq->mbuf[pi].mbuf = mb;
1006 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
1007 	sq->mbuf[pi].p_refcount = args.pref;
1008 	if (unlikely(args.pref != NULL))
1009 		atomic_add_int(args.pref, 1);
1010 	sq->pc += sq->mbuf[pi].num_wqebbs;
1011 
1012 	/* Count all traffic going out */
1013 	sq->stats.packets++;
1014 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1015 
1016 	*mbp = NULL;	/* safety clear */
1017 	return (0);
1018 
1019 tx_drop:
1020 	sq->stats.dropped++;
1021 	*mbp = NULL;
1022 	m_freem(mb);
1023 	return err;
1024 }
1025 
1026 static void
1027 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1028 {
1029 	u16 sqcc;
1030 
1031 	/*
1032 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1033 	 * otherwise a cq overrun may occur
1034 	 */
1035 	sqcc = sq->cc;
1036 
1037 	while (budget > 0) {
1038 		struct mlx5_cqe64 *cqe;
1039 		struct mbuf *mb;
1040 		bool match;
1041 		u16 sqcc_this;
1042 		u16 delta;
1043 		u16 x;
1044 		u16 ci;
1045 
1046 		cqe = mlx5e_get_cqe(&sq->cq);
1047 		if (!cqe)
1048 			break;
1049 
1050 		mlx5_cqwq_pop(&sq->cq.wq);
1051 
1052 		/* check if the completion event indicates an error */
1053 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
1054 			sq->stats.cqe_err++;
1055 
1056 		/* setup local variables */
1057 		sqcc_this = be16toh(cqe->wqe_counter);
1058 		match = false;
1059 
1060 		/* update budget according to the event factor */
1061 		budget -= sq->cev_factor;
1062 
1063 		for (x = 0;; x++) {
1064 			if (unlikely(match != false)) {
1065 				break;
1066 			} else if (unlikely(x == sq->cev_factor)) {
1067 				/* WQE counter match not found */
1068 				sq->stats.cqe_err++;
1069 				break;
1070 			}
1071 			ci = sqcc & sq->wq.sz_m1;
1072 			delta = sqcc_this - sqcc;
1073 			match = (delta < sq->mbuf[ci].num_wqebbs);
1074 			mb = sq->mbuf[ci].mbuf;
1075 			sq->mbuf[ci].mbuf = NULL;
1076 
1077 			if (unlikely(sq->mbuf[ci].p_refcount != NULL)) {
1078 				atomic_add_int(sq->mbuf[ci].p_refcount, -1);
1079 				sq->mbuf[ci].p_refcount = NULL;
1080 			}
1081 
1082 			if (mb == NULL) {
1083 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1084 					sq->stats.nop++;
1085 			} else {
1086 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1087 				    BUS_DMASYNC_POSTWRITE);
1088 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1089 
1090 				/* Free transmitted mbuf */
1091 				m_freem(mb);
1092 			}
1093 			sqcc += sq->mbuf[ci].num_wqebbs;
1094 		}
1095 	}
1096 
1097 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1098 
1099 	/* Ensure cq space is freed before enabling more cqes */
1100 	atomic_thread_fence_rel();
1101 
1102 	sq->cc = sqcc;
1103 }
1104 
1105 static int
1106 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1107 {
1108 	int err = 0;
1109 
1110 	if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1111 	    READ_ONCE(sq->running) == 0)) {
1112 		m_freem(mb);
1113 		return (ENETDOWN);
1114 	}
1115 
1116 	/* Do transmit */
1117 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1118 		/* NOTE: m_freem() is NULL safe */
1119 		m_freem(mb);
1120 		err = ENOBUFS;
1121 	}
1122 
1123 	/* Check if we need to write the doorbell */
1124 	if (likely(sq->doorbell.d64 != 0)) {
1125 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32);
1126 		sq->doorbell.d64 = 0;
1127 	}
1128 
1129 	/*
1130 	 * Check if we need to start the event timer which flushes the
1131 	 * transmit ring on timeout:
1132 	 */
1133 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1134 	    sq->cev_factor != 1)) {
1135 		/* start the timer */
1136 		mlx5e_sq_cev_timeout(sq);
1137 	} else {
1138 		/* don't send NOPs yet */
1139 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1140 	}
1141 	return (err);
1142 }
1143 
1144 int
1145 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
1146 {
1147 	struct mlx5e_sq *sq;
1148 	int ret;
1149 
1150 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1151 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1152 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1153 		if (unlikely(sq == NULL)) {
1154 			goto select_queue;
1155 		}
1156 	} else {
1157 select_queue:
1158 		sq = mlx5e_select_queue(ifp, mb);
1159 		if (unlikely(sq == NULL)) {
1160 			/* Free mbuf */
1161 			m_freem(mb);
1162 
1163 			/* Invalid send queue */
1164 			return (ENXIO);
1165 		}
1166 	}
1167 
1168 	mtx_lock(&sq->lock);
1169 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1170 	mtx_unlock(&sq->lock);
1171 
1172 	return (ret);
1173 }
1174 
1175 void
1176 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1177 {
1178 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1179 
1180 	mtx_lock(&sq->comp_lock);
1181 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1182 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1183 	mtx_unlock(&sq->comp_lock);
1184 }
1185