xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 5eb61f6c6549f134a4f3bed4c164345d4f616bad)
1 /*-
2  * Copyright (c) 2015-2019 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_kern_tls.h"
29 
30 #include "en.h"
31 #include <machine/atomic.h>
32 
33 static inline bool
34 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
35 {
36 	sq->cev_counter++;
37 	/* interleave the CQEs */
38 	if (sq->cev_counter >= sq->cev_factor) {
39 		sq->cev_counter = 0;
40 		return (true);
41 	}
42 	return (false);
43 }
44 
45 bool
46 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
47 {
48 
49 	return (mlx5e_do_send_cqe_inline(sq));
50 }
51 
52 void
53 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
54 {
55 	u16 pi = sq->pc & sq->wq.sz_m1;
56 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
57 
58 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
59 
60 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
61 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
62 	if (mlx5e_do_send_cqe_inline(sq))
63 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
64 	else
65 		wqe->ctrl.fm_ce_se = 0;
66 
67 	/* Copy data for doorbell */
68 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
69 
70 	sq->mbuf[pi].mbuf = NULL;
71 	sq->mbuf[pi].num_bytes = 0;
72 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
73 	sq->pc += sq->mbuf[pi].num_wqebbs;
74 }
75 
76 #if (__FreeBSD_version >= 1100000)
77 static uint32_t mlx5e_hash_value;
78 
79 static void
80 mlx5e_hash_init(void *arg)
81 {
82 	mlx5e_hash_value = m_ether_tcpip_hash_init();
83 }
84 
85 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
86 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
87 #endif
88 
89 static struct mlx5e_sq *
90 mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb)
91 {
92 	struct m_snd_tag *mb_tag;
93 	struct mlx5e_sq *sq;
94 
95 	mb_tag = mb->m_pkthdr.snd_tag;
96 
97 #ifdef KERN_TLS
98 top:
99 #endif
100 	/* get pointer to sendqueue */
101 	switch (mb_tag->type) {
102 #ifdef RATELIMIT
103 	case IF_SND_TAG_TYPE_RATE_LIMIT:
104 		sq = container_of(mb_tag,
105 		    struct mlx5e_rl_channel, tag)->sq;
106 		break;
107 #ifdef KERN_TLS
108 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
109 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
110 		goto top;
111 #endif
112 #endif
113 	case IF_SND_TAG_TYPE_UNLIMITED:
114 		sq = &container_of(mb_tag,
115 		    struct mlx5e_channel, tag)->sq[0];
116 		KASSERT((mb_tag->refcount > 0),
117 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
118 		break;
119 #ifdef KERN_TLS
120 	case IF_SND_TAG_TYPE_TLS:
121 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
122 		goto top;
123 #endif
124 	default:
125 		sq = NULL;
126 		break;
127 	}
128 
129 	/* check if valid */
130 	if (sq != NULL && READ_ONCE(sq->running) != 0)
131 		return (sq);
132 
133 	return (NULL);
134 }
135 
136 static struct mlx5e_sq *
137 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
138 {
139 	struct mlx5e_priv *priv = ifp->if_softc;
140 	struct mlx5e_sq *sq;
141 	u32 ch;
142 	u32 tc;
143 
144 	/* obtain VLAN information if present */
145 	if (mb->m_flags & M_VLANTAG) {
146 		tc = (mb->m_pkthdr.ether_vtag >> 13);
147 		if (tc >= priv->num_tc)
148 			tc = priv->default_vlan_prio;
149 	} else {
150 		tc = priv->default_vlan_prio;
151 	}
152 
153 	ch = priv->params.num_channels;
154 
155 	/* check if flowid is set */
156 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
157 #ifdef RSS
158 		u32 temp;
159 
160 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
161 		    M_HASHTYPE_GET(mb), &temp) == 0)
162 			ch = temp % ch;
163 		else
164 #endif
165 			ch = (mb->m_pkthdr.flowid % 128) % ch;
166 	} else {
167 #if (__FreeBSD_version >= 1100000)
168 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
169 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
170 #else
171 		/*
172 		 * m_ether_tcpip_hash not present in stable, so just
173 		 * throw unhashed mbufs on queue 0
174 		 */
175 		ch = 0;
176 #endif
177 	}
178 
179 	/* check if send queue is running */
180 	sq = &priv->channel[ch].sq[tc];
181 	if (likely(READ_ONCE(sq->running) != 0))
182 		return (sq);
183 	return (NULL);
184 }
185 
186 static inline u16
187 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
188 {
189 	struct ether_vlan_header *eh;
190 	uint16_t eth_type;
191 	int min_inline;
192 
193 	eh = mtod(mb, struct ether_vlan_header *);
194 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
195 		goto max_inline;
196 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
197 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
198 			goto max_inline;
199 		eth_type = ntohs(eh->evl_proto);
200 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
201 	} else {
202 		eth_type = ntohs(eh->evl_encap_proto);
203 		min_inline = ETHER_HDR_LEN;
204 	}
205 
206 	switch (eth_type) {
207 	case ETHERTYPE_IP:
208 	case ETHERTYPE_IPV6:
209 		/*
210 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
211 		 * field gets inlined. Else the SQ may stall.
212 		 */
213 		min_inline += 4;
214 		break;
215 	default:
216 		goto max_inline;
217 	}
218 
219 	/*
220 	 * m_copydata() will be used on the remaining header which
221 	 * does not need to reside within the first m_len bytes of
222 	 * data:
223 	 */
224 	if (mb->m_pkthdr.len < min_inline)
225 		goto max_inline;
226 	return (min_inline);
227 
228 max_inline:
229 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
230 }
231 
232 /*
233  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
234  * headers.
235  *
236  * Upon return the pointer at which the "ppth" argument points, is set
237  * to the location of the TCP header. NULL is used if no TCP header is
238  * present.
239  *
240  * The return value indicates the number of bytes from the beginning
241  * of the packet until the first byte after the TCP or UDP header. If
242  * this function returns zero, the parsing failed.
243  */
244 int
245 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
246 {
247 	const struct ether_vlan_header *eh;
248 	const struct tcphdr *th;
249 	const struct ip *ip;
250 	int ip_hlen, tcp_hlen;
251 	const struct ip6_hdr *ip6;
252 	uint16_t eth_type;
253 	int eth_hdr_len;
254 
255 	eh = mtod(mb, const struct ether_vlan_header *);
256 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
257 		goto failure;
258 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
259 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
260 			goto failure;
261 		eth_type = ntohs(eh->evl_proto);
262 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
263 	} else {
264 		eth_type = ntohs(eh->evl_encap_proto);
265 		eth_hdr_len = ETHER_HDR_LEN;
266 	}
267 
268 	switch (eth_type) {
269 	case ETHERTYPE_IP:
270 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
271 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
272 			goto failure;
273 		switch (ip->ip_p) {
274 		case IPPROTO_TCP:
275 			ip_hlen = ip->ip_hl << 2;
276 			eth_hdr_len += ip_hlen;
277 			goto tcp_packet;
278 		case IPPROTO_UDP:
279 			ip_hlen = ip->ip_hl << 2;
280 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
281 			th = NULL;
282 			goto udp_packet;
283 		default:
284 			goto failure;
285 		}
286 		break;
287 	case ETHERTYPE_IPV6:
288 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
289 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
290 			goto failure;
291 		switch (ip6->ip6_nxt) {
292 		case IPPROTO_TCP:
293 			eth_hdr_len += sizeof(*ip6);
294 			goto tcp_packet;
295 		case IPPROTO_UDP:
296 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
297 			th = NULL;
298 			goto udp_packet;
299 		default:
300 			goto failure;
301 		}
302 		break;
303 	default:
304 		goto failure;
305 	}
306 tcp_packet:
307 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
308 		const struct mbuf *m_th = mb->m_next;
309 		if (unlikely(mb->m_len != eth_hdr_len ||
310 		    m_th == NULL || m_th->m_len < sizeof(*th)))
311 			goto failure;
312 		th = (const struct tcphdr *)(m_th->m_data);
313 	} else {
314 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
315 	}
316 	tcp_hlen = th->th_off << 2;
317 	eth_hdr_len += tcp_hlen;
318 udp_packet:
319 	/*
320 	 * m_copydata() will be used on the remaining header which
321 	 * does not need to reside within the first m_len bytes of
322 	 * data:
323 	 */
324 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
325 		goto failure;
326 	if (ppth != NULL)
327 		*ppth = th;
328 	return (eth_hdr_len);
329 failure:
330 	if (ppth != NULL)
331 		*ppth = NULL;
332 	return (0);
333 }
334 
335 /*
336  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
337  */
338 static inline void *
339 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
340     int min_len)
341 {
342 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
343 		poffset[0] = eth_hdr_len;
344 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
345 			return (NULL);
346 	}
347 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
348 		return (NULL);
349 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
350 }
351 
352 /*
353  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
354  * and TCP headers.
355  *
356  * The return value indicates the number of bytes from the beginning
357  * of the packet until the first byte after the TCP header. If this
358  * function returns zero, the parsing failed.
359  */
360 static int
361 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
362     uint8_t cs_mask, uint8_t opcode)
363 {
364 	const struct ether_vlan_header *eh;
365 	struct ip *ip4;
366 	struct ip6_hdr *ip6;
367 	struct tcphdr *th;
368 	struct udphdr *udp;
369 	bool has_outer_vlan_tag;
370 	uint16_t eth_type;
371 	uint8_t ip_type;
372 	int pkt_hdr_len;
373 	int eth_hdr_len;
374 	int tcp_hlen;
375 	int ip_hlen;
376 	int offset;
377 
378 	pkt_hdr_len = mb->m_pkthdr.len;
379 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
380 	offset = 0;
381 
382 	eh = mtod(mb, const struct ether_vlan_header *);
383 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
384 		return (0);
385 
386 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
387 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
388 			return (0);
389 		eth_type = eh->evl_proto;
390 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
391 	} else {
392 		eth_type = eh->evl_encap_proto;
393 		eth_hdr_len = ETHER_HDR_LEN;
394 	}
395 
396 	switch (eth_type) {
397 	case htons(ETHERTYPE_IP):
398 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
399 		    sizeof(*ip4));
400 		if (unlikely(ip4 == NULL))
401 			return (0);
402 		ip_type = ip4->ip_p;
403 		if (unlikely(ip_type != IPPROTO_UDP))
404 			return (0);
405 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
406 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
407 		ip_hlen = ip4->ip_hl << 2;
408 		eth_hdr_len += ip_hlen;
409 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
410 		    sizeof(*udp));
411 		if (unlikely(udp == NULL))
412 			return (0);
413 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
414 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
415 		eth_hdr_len += sizeof(*udp);
416 		break;
417 	case htons(ETHERTYPE_IPV6):
418 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
419 		    sizeof(*ip6));
420 		if (unlikely(ip6 == NULL))
421 			return (0);
422 		ip_type = ip6->ip6_nxt;
423 		if (unlikely(ip_type != IPPROTO_UDP))
424 			return (0);
425 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
426 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
427 		eth_hdr_len += sizeof(*ip6);
428 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
429 		    sizeof(*udp));
430 		if (unlikely(udp == NULL))
431 			return (0);
432 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
433 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
434 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
435 		eth_hdr_len += sizeof(*udp);
436 		break;
437 	default:
438 		return (0);
439 	}
440 
441 	/*
442 	 * If the hardware is not computing inner IP checksum, then
443 	 * skip inlining the inner outer UDP and VXLAN header:
444 	 */
445 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
446 		goto done;
447 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
448 	    8) == NULL))
449 		return (0);
450 	eth_hdr_len += 8;
451 
452 	/* Check for ethernet header again. */
453 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
454 	if (unlikely(eh == NULL))
455 		return (0);
456 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
457 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
458 		    ETHER_VLAN_ENCAP_LEN))
459 			return (0);
460 		eth_type = eh->evl_proto;
461 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
462 	} else {
463 		eth_type = eh->evl_encap_proto;
464 		eth_hdr_len += ETHER_HDR_LEN;
465 	}
466 
467 	/* Check for IP header again. */
468 	switch (eth_type) {
469 	case htons(ETHERTYPE_IP):
470 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
471 		    sizeof(*ip4));
472 		if (unlikely(ip4 == NULL))
473 			return (0);
474 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
475 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
476 		ip_type = ip4->ip_p;
477 		ip_hlen = ip4->ip_hl << 2;
478 		eth_hdr_len += ip_hlen;
479 		break;
480 	case htons(ETHERTYPE_IPV6):
481 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
482 		    sizeof(*ip6));
483 		if (unlikely(ip6 == NULL))
484 			return (0);
485 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
486 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
487 		ip_type = ip6->ip6_nxt;
488 		eth_hdr_len += sizeof(*ip6);
489 		break;
490 	default:
491 		return (0);
492 	}
493 
494 	/*
495 	 * If the hardware is not computing inner UDP/TCP checksum,
496 	 * then skip inlining the inner UDP/TCP header:
497 	 */
498 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
499 		goto done;
500 
501 	switch (ip_type) {
502 	case IPPROTO_UDP:
503 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
504 		    sizeof(*udp));
505 		if (unlikely(udp == NULL))
506 			return (0);
507 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
508 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
509 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
510 		eth_hdr_len += sizeof(*udp);
511 		break;
512 	case IPPROTO_TCP:
513 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
514 		    sizeof(*th));
515 		if (unlikely(th == NULL))
516 			return (0);
517 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
518 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
519 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
520 		tcp_hlen = th->th_off << 2;
521 		eth_hdr_len += tcp_hlen;
522 		break;
523 	default:
524 		return (0);
525 	}
526 done:
527 	if (unlikely(pkt_hdr_len < eth_hdr_len))
528 		return (0);
529 
530 	/* Account for software inserted VLAN tag, if any. */
531 	if (unlikely(has_outer_vlan_tag)) {
532 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
533 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
534 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
535 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
536 	}
537 
538 	/*
539 	 * When inner checksums are set, outer L4 checksum flag must
540 	 * be disabled.
541 	 */
542 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
543 	    MLX5_ETH_WQE_L4_INNER_CSUM))
544 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
545 
546 	return (eth_hdr_len);
547 }
548 
549 struct mlx5_wqe_dump_seg {
550 	struct mlx5_wqe_ctrl_seg ctrl;
551 	struct mlx5_wqe_data_seg data;
552 } __aligned(MLX5_SEND_WQE_BB);
553 
554 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
555 
556 int
557 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
558 {
559 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
560 	struct mlx5_wqe_dump_seg *wqe;
561 	struct mlx5_wqe_dump_seg *wqe_last;
562 	int nsegs;
563 	int xsegs;
564 	u32 off;
565 	u32 msb;
566 	int err;
567 	int x;
568 	struct mbuf *mb;
569 	const u32 ds_cnt = 2;
570 	u16 pi;
571 	const u8 opcode = MLX5_OPCODE_DUMP;
572 
573 	/* get pointer to mbuf */
574 	mb = *mbp;
575 
576 	/* get producer index */
577 	pi = sq->pc & sq->wq.sz_m1;
578 
579 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
580 	sq->mbuf[pi].num_wqebbs = 0;
581 
582 	/* check number of segments in mbuf */
583 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
584 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
585 	if (err == EFBIG) {
586 		/* update statistics */
587 		sq->stats.defragged++;
588 		/* too many mbuf fragments */
589 		mb = m_defrag(*mbp, M_NOWAIT);
590 		if (mb == NULL) {
591 			mb = *mbp;
592 			goto tx_drop;
593 		}
594 		/* try again */
595 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
596 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
597 	}
598 
599 	if (err != 0)
600 		goto tx_drop;
601 
602 	/* make sure all mbuf data, if any, is visible to the bus */
603 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
604 	    BUS_DMASYNC_PREWRITE);
605 
606 	/* compute number of real DUMP segments */
607 	msb = sq->priv->params_ethtool.hw_mtu_msb;
608 	for (x = xsegs = 0; x != nsegs; x++)
609 		xsegs += howmany((u32)segs[x].ds_len, msb);
610 
611 	/* check if there are no segments */
612 	if (unlikely(xsegs == 0)) {
613 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
614 		m_freem(mb);
615 		*mbp = NULL;	/* safety clear */
616 		return (0);
617 	}
618 
619 	/* return ENOBUFS if the queue is full */
620 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
621 		sq->stats.enobuf++;
622 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
623 		m_freem(mb);
624 		*mbp = NULL;	/* safety clear */
625 		return (ENOBUFS);
626 	}
627 
628 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
629 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
630 
631 	for (x = 0; x != nsegs; x++) {
632 		for (off = 0; off < segs[x].ds_len; off += msb) {
633 			u32 len = segs[x].ds_len - off;
634 
635 			/* limit length */
636 			if (likely(len > msb))
637 				len = msb;
638 
639 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
640 
641 			/* fill control segment */
642 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
643 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
644 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
645 
646 			/* fill data segment */
647 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
648 			wqe->data.lkey = sq->mkey_be;
649 			wqe->data.byte_count = cpu_to_be32(len);
650 
651 			/* advance to next building block */
652 			if (unlikely(wqe == wqe_last))
653 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
654 			else
655 				wqe++;
656 
657 			sq->mbuf[pi].num_wqebbs++;
658 			sq->pc++;
659 		}
660 	}
661 
662 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
663 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
664 
665 	/* put in place data fence */
666 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
667 
668 	/* check if we should generate a completion event */
669 	if (mlx5e_do_send_cqe_inline(sq))
670 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
671 
672 	/* copy data for doorbell */
673 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
674 
675 	/* store pointer to mbuf */
676 	sq->mbuf[pi].mbuf = mb;
677 	sq->mbuf[pi].p_refcount = parg->pref;
678 	atomic_add_int(parg->pref, 1);
679 
680 	/* count all traffic going out */
681 	sq->stats.packets++;
682 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
683 
684 	*mbp = NULL;	/* safety clear */
685 	return (0);
686 
687 tx_drop:
688 	sq->stats.dropped++;
689 	*mbp = NULL;
690 	m_freem(mb);
691 	return err;
692 }
693 
694 int
695 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
696 {
697 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
698 	struct mlx5e_xmit_args args = {};
699 	struct mlx5_wqe_data_seg *dseg;
700 	struct mlx5e_tx_wqe *wqe;
701 	struct ifnet *ifp;
702 	int nsegs;
703 	int err;
704 	int x;
705 	struct mbuf *mb;
706 	u16 ds_cnt;
707 	u16 pi;
708 	u8 opcode;
709 
710 #ifdef KERN_TLS
711 top:
712 #endif
713 	/* Return ENOBUFS if the queue is full */
714 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
715 		sq->stats.enobuf++;
716 		return (ENOBUFS);
717 	}
718 
719 	/* Align SQ edge with NOPs to avoid WQE wrap around */
720 	pi = ((~sq->pc) & sq->wq.sz_m1);
721 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
722 		/* Send one multi NOP message instead of many */
723 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
724 		pi = ((~sq->pc) & sq->wq.sz_m1);
725 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
726 			sq->stats.enobuf++;
727 			return (ENOMEM);
728 		}
729 	}
730 
731 #ifdef KERN_TLS
732 	/* Special handling for TLS packets, if any */
733 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
734 	case MLX5E_TLS_LOOP:
735 		goto top;
736 	case MLX5E_TLS_FAILURE:
737 		mb = *mbp;
738 		err = ENOMEM;
739 		goto tx_drop;
740 	case MLX5E_TLS_DEFERRED:
741 		return (0);
742 	case MLX5E_TLS_CONTINUE:
743 	default:
744 		break;
745 	}
746 #endif
747 
748 	/* Setup local variables */
749 	pi = sq->pc & sq->wq.sz_m1;
750 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
751 	ifp = sq->ifp;
752 
753 	memset(wqe, 0, sizeof(*wqe));
754 
755 	/* get pointer to mbuf */
756 	mb = *mbp;
757 
758 	/* Send a copy of the frame to the BPF listener, if any */
759 	if (ifp != NULL && ifp->if_bpf != NULL)
760 		ETHER_BPF_MTAP(ifp, mb);
761 
762 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
763 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
764 	}
765 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
766 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
767 	}
768 	if (wqe->eth.cs_flags == 0) {
769 		sq->stats.csum_offload_none++;
770 	}
771 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
772 		u32 payload_len;
773 		u32 mss = mb->m_pkthdr.tso_segsz;
774 		u32 num_pkts;
775 
776 		wqe->eth.mss = cpu_to_be16(mss);
777 		opcode = MLX5_OPCODE_LSO;
778 		if (args.ihs == 0)
779 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
780 		if (unlikely(args.ihs == 0)) {
781 			err = EINVAL;
782 			goto tx_drop;
783 		}
784 		payload_len = mb->m_pkthdr.len - args.ihs;
785 		if (payload_len == 0)
786 			num_pkts = 1;
787 		else
788 			num_pkts = DIV_ROUND_UP(payload_len, mss);
789 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
790 
791 
792 		sq->stats.tso_packets++;
793 		sq->stats.tso_bytes += payload_len;
794 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
795 		/* check for inner TCP TSO first */
796 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
797 		    CSUM_INNER_IP6_TSO)) {
798 			u32 payload_len;
799 			u32 mss = mb->m_pkthdr.tso_segsz;
800 			u32 num_pkts;
801 
802 			wqe->eth.mss = cpu_to_be16(mss);
803 			opcode = MLX5_OPCODE_LSO;
804 
805 			if (likely(args.ihs == 0)) {
806 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
807 				       MLX5_ETH_WQE_L3_INNER_CSUM |
808 				       MLX5_ETH_WQE_L4_INNER_CSUM |
809 				       MLX5_ETH_WQE_L4_CSUM |
810 				       MLX5_ETH_WQE_L3_CSUM,
811 				       opcode);
812 				if (unlikely(args.ihs == 0)) {
813 					err = EINVAL;
814 					goto tx_drop;
815 				}
816 			}
817 
818 			payload_len = mb->m_pkthdr.len - args.ihs;
819 			if (payload_len == 0)
820 				num_pkts = 1;
821 			else
822 				num_pkts = DIV_ROUND_UP(payload_len, mss);
823 			sq->mbuf[pi].num_bytes = payload_len +
824 			    num_pkts * args.ihs;
825 
826 			sq->stats.tso_packets++;
827 			sq->stats.tso_bytes += payload_len;
828 		} else {
829 			opcode = MLX5_OPCODE_SEND;
830 
831 			if (likely(args.ihs == 0)) {
832 				uint8_t cs_mask;
833 
834 				if (mb->m_pkthdr.csum_flags &
835 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
836 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
837 					cs_mask =
838 					    MLX5_ETH_WQE_L3_INNER_CSUM |
839 					    MLX5_ETH_WQE_L4_INNER_CSUM |
840 					    MLX5_ETH_WQE_L4_CSUM |
841 					    MLX5_ETH_WQE_L3_CSUM;
842 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
843 					cs_mask =
844 					    MLX5_ETH_WQE_L3_INNER_CSUM |
845 					    MLX5_ETH_WQE_L4_CSUM |
846 					    MLX5_ETH_WQE_L3_CSUM;
847 				} else {
848 					cs_mask =
849 					    MLX5_ETH_WQE_L4_CSUM |
850 					    MLX5_ETH_WQE_L3_CSUM;
851 				}
852 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
853 				    cs_mask, opcode);
854 				if (unlikely(args.ihs == 0)) {
855 					err = EINVAL;
856 					goto tx_drop;
857 				}
858 			}
859 
860 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
861 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
862 		}
863 	} else {
864 		opcode = MLX5_OPCODE_SEND;
865 
866 		if (args.ihs == 0) {
867 			switch (sq->min_inline_mode) {
868 			case MLX5_INLINE_MODE_IP:
869 			case MLX5_INLINE_MODE_TCP_UDP:
870 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
871 				if (unlikely(args.ihs == 0))
872 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
873 				break;
874 			case MLX5_INLINE_MODE_L2:
875 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
876 				break;
877 			case MLX5_INLINE_MODE_NONE:
878 				/* FALLTHROUGH */
879 			default:
880 				if ((mb->m_flags & M_VLANTAG) != 0 &&
881 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
882 					/* inlining VLAN data is not required */
883 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
884 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
885 					args.ihs = 0;
886 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
887 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
888 					/* inlining non-VLAN data is not required */
889 					args.ihs = 0;
890 				} else {
891 					/* we are forced to inlining L2 header, if any */
892 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
893 				}
894 				break;
895 			}
896 		}
897 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
898 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
899 	}
900 
901 	if (likely(args.ihs == 0)) {
902 		/* nothing to inline */
903 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
904 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
905 		    wqe->eth.inline_hdr_start;
906 
907 		/* Range checks */
908 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
909 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
910 				err = EINVAL;
911 				goto tx_drop;
912 			}
913 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
914 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
915 			err = EINVAL;
916 			goto tx_drop;
917 		}
918 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
919 		m_adj(mb, ETHER_HDR_LEN);
920 		/* Insert 4 bytes VLAN tag into data stream */
921 		eh->evl_proto = eh->evl_encap_proto;
922 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
923 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
924 		/* Copy rest of header data, if any */
925 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
926 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
927 		/* Extend header by 4 bytes */
928 		args.ihs += ETHER_VLAN_ENCAP_LEN;
929 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
930 	} else {
931 		/* check if inline header size is too big */
932 		if (unlikely(args.ihs > sq->max_inline)) {
933 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
934 			    CSUM_ENCAP_VXLAN))) {
935 				err = EINVAL;
936 				goto tx_drop;
937 			}
938 			args.ihs = sq->max_inline;
939 		}
940 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
941 		m_adj(mb, args.ihs);
942 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
943 	}
944 
945 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
946 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
947 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
948 		    MLX5_SEND_WQE_DS);
949 	}
950 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
951 
952 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
953 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
954 	if (err == EFBIG) {
955 		/* Update statistics */
956 		sq->stats.defragged++;
957 		/* Too many mbuf fragments */
958 		mb = m_defrag(*mbp, M_NOWAIT);
959 		if (mb == NULL) {
960 			mb = *mbp;
961 			goto tx_drop;
962 		}
963 		/* Try again */
964 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
965 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
966 	}
967 	/* Catch errors */
968 	if (err != 0)
969 		goto tx_drop;
970 
971 	/* Make sure all mbuf data, if any, is visible to the bus */
972 	if (nsegs != 0) {
973 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
974 		    BUS_DMASYNC_PREWRITE);
975 	} else {
976 		/* All data was inlined, free the mbuf. */
977 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
978 		m_freem(mb);
979 		mb = NULL;
980 	}
981 
982 	for (x = 0; x != nsegs; x++) {
983 		if (segs[x].ds_len == 0)
984 			continue;
985 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
986 		dseg->lkey = sq->mkey_be;
987 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
988 		dseg++;
989 	}
990 
991 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
992 
993 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
994 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
995 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
996 
997 	if (mlx5e_do_send_cqe_inline(sq))
998 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
999 	else
1000 		wqe->ctrl.fm_ce_se = 0;
1001 
1002 	/* Copy data for doorbell */
1003 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
1004 
1005 	/* Store pointer to mbuf */
1006 	sq->mbuf[pi].mbuf = mb;
1007 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
1008 	sq->mbuf[pi].p_refcount = args.pref;
1009 	if (unlikely(args.pref != NULL))
1010 		atomic_add_int(args.pref, 1);
1011 	sq->pc += sq->mbuf[pi].num_wqebbs;
1012 
1013 	/* Count all traffic going out */
1014 	sq->stats.packets++;
1015 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1016 
1017 	*mbp = NULL;	/* safety clear */
1018 	return (0);
1019 
1020 tx_drop:
1021 	sq->stats.dropped++;
1022 	*mbp = NULL;
1023 	m_freem(mb);
1024 	return err;
1025 }
1026 
1027 static void
1028 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1029 {
1030 	u16 sqcc;
1031 
1032 	/*
1033 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1034 	 * otherwise a cq overrun may occur
1035 	 */
1036 	sqcc = sq->cc;
1037 
1038 	while (budget > 0) {
1039 		struct mlx5_cqe64 *cqe;
1040 		struct mbuf *mb;
1041 		bool match;
1042 		u16 sqcc_this;
1043 		u16 delta;
1044 		u16 x;
1045 		u16 ci;
1046 
1047 		cqe = mlx5e_get_cqe(&sq->cq);
1048 		if (!cqe)
1049 			break;
1050 
1051 		mlx5_cqwq_pop(&sq->cq.wq);
1052 
1053 		/* check if the completion event indicates an error */
1054 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
1055 			sq->stats.cqe_err++;
1056 
1057 		/* setup local variables */
1058 		sqcc_this = be16toh(cqe->wqe_counter);
1059 		match = false;
1060 
1061 		/* update budget according to the event factor */
1062 		budget -= sq->cev_factor;
1063 
1064 		for (x = 0;; x++) {
1065 			if (unlikely(match != false)) {
1066 				break;
1067 			} else if (unlikely(x == sq->cev_factor)) {
1068 				/* WQE counter match not found */
1069 				sq->stats.cqe_err++;
1070 				break;
1071 			}
1072 			ci = sqcc & sq->wq.sz_m1;
1073 			delta = sqcc_this - sqcc;
1074 			match = (delta < sq->mbuf[ci].num_wqebbs);
1075 			mb = sq->mbuf[ci].mbuf;
1076 			sq->mbuf[ci].mbuf = NULL;
1077 
1078 			if (unlikely(sq->mbuf[ci].p_refcount != NULL)) {
1079 				atomic_add_int(sq->mbuf[ci].p_refcount, -1);
1080 				sq->mbuf[ci].p_refcount = NULL;
1081 			}
1082 
1083 			if (mb == NULL) {
1084 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1085 					sq->stats.nop++;
1086 			} else {
1087 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1088 				    BUS_DMASYNC_POSTWRITE);
1089 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1090 
1091 				/* Free transmitted mbuf */
1092 				m_freem(mb);
1093 			}
1094 			sqcc += sq->mbuf[ci].num_wqebbs;
1095 		}
1096 	}
1097 
1098 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1099 
1100 	/* Ensure cq space is freed before enabling more cqes */
1101 	atomic_thread_fence_rel();
1102 
1103 	sq->cc = sqcc;
1104 }
1105 
1106 static int
1107 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1108 {
1109 	int err = 0;
1110 
1111 	if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
1112 	    READ_ONCE(sq->running) == 0)) {
1113 		m_freem(mb);
1114 		return (ENETDOWN);
1115 	}
1116 
1117 	/* Do transmit */
1118 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1119 		/* NOTE: m_freem() is NULL safe */
1120 		m_freem(mb);
1121 		err = ENOBUFS;
1122 	}
1123 
1124 	/* Check if we need to write the doorbell */
1125 	if (likely(sq->doorbell.d64 != 0)) {
1126 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32);
1127 		sq->doorbell.d64 = 0;
1128 	}
1129 
1130 	/*
1131 	 * Check if we need to start the event timer which flushes the
1132 	 * transmit ring on timeout:
1133 	 */
1134 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1135 	    sq->cev_factor != 1)) {
1136 		/* start the timer */
1137 		mlx5e_sq_cev_timeout(sq);
1138 	} else {
1139 		/* don't send NOPs yet */
1140 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1141 	}
1142 	return (err);
1143 }
1144 
1145 int
1146 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
1147 {
1148 	struct mlx5e_sq *sq;
1149 	int ret;
1150 
1151 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1152 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1153 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1154 		if (unlikely(sq == NULL)) {
1155 			goto select_queue;
1156 		}
1157 	} else {
1158 select_queue:
1159 		sq = mlx5e_select_queue(ifp, mb);
1160 		if (unlikely(sq == NULL)) {
1161 			/* Free mbuf */
1162 			m_freem(mb);
1163 
1164 			/* Invalid send queue */
1165 			return (ENXIO);
1166 		}
1167 	}
1168 
1169 	mtx_lock(&sq->lock);
1170 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1171 	mtx_unlock(&sq->lock);
1172 
1173 	return (ret);
1174 }
1175 
1176 void
1177 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1178 {
1179 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1180 
1181 	mtx_lock(&sq->comp_lock);
1182 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1183 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1184 	mtx_unlock(&sq->comp_lock);
1185 }
1186