xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision c1cdf6a42f0d951ba720688dfc6ce07608b02f6e)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #ifndef ETH_DRIVER_VERSION
34 #define	ETH_DRIVER_VERSION	"3.4.2"
35 #endif
36 
37 char mlx5e_version[] = "Mellanox Ethernet driver"
38     " (" ETH_DRIVER_VERSION ")";
39 
40 static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
41 
42 struct mlx5e_channel_param {
43 	struct mlx5e_rq_param rq;
44 	struct mlx5e_sq_param sq;
45 	struct mlx5e_cq_param rx_cq;
46 	struct mlx5e_cq_param tx_cq;
47 };
48 
49 static const struct {
50 	u32	subtype;
51 	u64	baudrate;
52 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
53 
54 	[MLX5E_1000BASE_CX_SGMII] = {
55 		.subtype = IFM_1000_CX_SGMII,
56 		.baudrate = IF_Mbps(1000ULL),
57 	},
58 	[MLX5E_1000BASE_KX] = {
59 		.subtype = IFM_1000_KX,
60 		.baudrate = IF_Mbps(1000ULL),
61 	},
62 	[MLX5E_10GBASE_CX4] = {
63 		.subtype = IFM_10G_CX4,
64 		.baudrate = IF_Gbps(10ULL),
65 	},
66 	[MLX5E_10GBASE_KX4] = {
67 		.subtype = IFM_10G_KX4,
68 		.baudrate = IF_Gbps(10ULL),
69 	},
70 	[MLX5E_10GBASE_KR] = {
71 		.subtype = IFM_10G_KR,
72 		.baudrate = IF_Gbps(10ULL),
73 	},
74 	[MLX5E_20GBASE_KR2] = {
75 		.subtype = IFM_20G_KR2,
76 		.baudrate = IF_Gbps(20ULL),
77 	},
78 	[MLX5E_40GBASE_CR4] = {
79 		.subtype = IFM_40G_CR4,
80 		.baudrate = IF_Gbps(40ULL),
81 	},
82 	[MLX5E_40GBASE_KR4] = {
83 		.subtype = IFM_40G_KR4,
84 		.baudrate = IF_Gbps(40ULL),
85 	},
86 	[MLX5E_56GBASE_R4] = {
87 		.subtype = IFM_56G_R4,
88 		.baudrate = IF_Gbps(56ULL),
89 	},
90 	[MLX5E_10GBASE_CR] = {
91 		.subtype = IFM_10G_CR1,
92 		.baudrate = IF_Gbps(10ULL),
93 	},
94 	[MLX5E_10GBASE_SR] = {
95 		.subtype = IFM_10G_SR,
96 		.baudrate = IF_Gbps(10ULL),
97 	},
98 	[MLX5E_10GBASE_ER] = {
99 		.subtype = IFM_10G_ER,
100 		.baudrate = IF_Gbps(10ULL),
101 	},
102 	[MLX5E_40GBASE_SR4] = {
103 		.subtype = IFM_40G_SR4,
104 		.baudrate = IF_Gbps(40ULL),
105 	},
106 	[MLX5E_40GBASE_LR4] = {
107 		.subtype = IFM_40G_LR4,
108 		.baudrate = IF_Gbps(40ULL),
109 	},
110 	[MLX5E_100GBASE_CR4] = {
111 		.subtype = IFM_100G_CR4,
112 		.baudrate = IF_Gbps(100ULL),
113 	},
114 	[MLX5E_100GBASE_SR4] = {
115 		.subtype = IFM_100G_SR4,
116 		.baudrate = IF_Gbps(100ULL),
117 	},
118 	[MLX5E_100GBASE_KR4] = {
119 		.subtype = IFM_100G_KR4,
120 		.baudrate = IF_Gbps(100ULL),
121 	},
122 	[MLX5E_100GBASE_LR4] = {
123 		.subtype = IFM_100G_LR4,
124 		.baudrate = IF_Gbps(100ULL),
125 	},
126 	[MLX5E_100BASE_TX] = {
127 		.subtype = IFM_100_TX,
128 		.baudrate = IF_Mbps(100ULL),
129 	},
130 	[MLX5E_1000BASE_T] = {
131 		.subtype = IFM_1000_T,
132 		.baudrate = IF_Mbps(1000ULL),
133 	},
134 	[MLX5E_10GBASE_T] = {
135 		.subtype = IFM_10G_T,
136 		.baudrate = IF_Gbps(10ULL),
137 	},
138 	[MLX5E_25GBASE_CR] = {
139 		.subtype = IFM_25G_CR,
140 		.baudrate = IF_Gbps(25ULL),
141 	},
142 	[MLX5E_25GBASE_KR] = {
143 		.subtype = IFM_25G_KR,
144 		.baudrate = IF_Gbps(25ULL),
145 	},
146 	[MLX5E_25GBASE_SR] = {
147 		.subtype = IFM_25G_SR,
148 		.baudrate = IF_Gbps(25ULL),
149 	},
150 	[MLX5E_50GBASE_CR2] = {
151 		.subtype = IFM_50G_CR2,
152 		.baudrate = IF_Gbps(50ULL),
153 	},
154 	[MLX5E_50GBASE_KR2] = {
155 		.subtype = IFM_50G_KR2,
156 		.baudrate = IF_Gbps(50ULL),
157 	},
158 };
159 
160 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
161 
162 static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters");
163 
164 static void
165 mlx5e_update_carrier(struct mlx5e_priv *priv)
166 {
167 	struct mlx5_core_dev *mdev = priv->mdev;
168 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
169 	u32 eth_proto_oper;
170 	int error;
171 	u8 port_state;
172 	u8 i;
173 
174 	port_state = mlx5_query_vport_state(mdev,
175 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
176 
177 	if (port_state == VPORT_STATE_UP) {
178 		priv->media_status_last |= IFM_ACTIVE;
179 	} else {
180 		priv->media_status_last &= ~IFM_ACTIVE;
181 		priv->media_active_last = IFM_ETHER;
182 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
183 		return;
184 	}
185 
186 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
187 	if (error) {
188 		priv->media_active_last = IFM_ETHER;
189 		priv->ifp->if_baudrate = 1;
190 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
191 		    __func__, error);
192 		return;
193 	}
194 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
195 
196 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
197 		if (mlx5e_mode_table[i].baudrate == 0)
198 			continue;
199 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
200 			priv->ifp->if_baudrate =
201 			    mlx5e_mode_table[i].baudrate;
202 			priv->media_active_last =
203 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
204 		}
205 	}
206 	if_link_state_change(priv->ifp, LINK_STATE_UP);
207 }
208 
209 static void
210 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
211 {
212 	struct mlx5e_priv *priv = dev->if_softc;
213 
214 	ifmr->ifm_status = priv->media_status_last;
215 	ifmr->ifm_active = priv->media_active_last |
216 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
217 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
218 
219 }
220 
221 static u32
222 mlx5e_find_link_mode(u32 subtype)
223 {
224 	u32 i;
225 	u32 link_mode = 0;
226 
227 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
228 		if (mlx5e_mode_table[i].baudrate == 0)
229 			continue;
230 		if (mlx5e_mode_table[i].subtype == subtype)
231 			link_mode |= MLX5E_PROT_MASK(i);
232 	}
233 
234 	return (link_mode);
235 }
236 
237 static int
238 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
239 {
240 	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
241 	    priv->params.rx_pauseframe_control,
242 	    priv->params.tx_pauseframe_control,
243 	    priv->params.rx_priority_flow_control,
244 	    priv->params.tx_priority_flow_control));
245 }
246 
247 static int
248 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
249 {
250 	int error;
251 
252 	if (priv->params.rx_pauseframe_control ||
253 	    priv->params.tx_pauseframe_control) {
254 		if_printf(priv->ifp,
255 		    "Global pauseframes must be disabled before enabling PFC.\n");
256 		error = -EINVAL;
257 	} else {
258 		error = mlx5e_set_port_pause_and_pfc(priv);
259 	}
260 	return (error);
261 }
262 
263 static int
264 mlx5e_media_change(struct ifnet *dev)
265 {
266 	struct mlx5e_priv *priv = dev->if_softc;
267 	struct mlx5_core_dev *mdev = priv->mdev;
268 	u32 eth_proto_cap;
269 	u32 link_mode;
270 	int was_opened;
271 	int locked;
272 	int error;
273 
274 	locked = PRIV_LOCKED(priv);
275 	if (!locked)
276 		PRIV_LOCK(priv);
277 
278 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
279 		error = EINVAL;
280 		goto done;
281 	}
282 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
283 
284 	/* query supported capabilities */
285 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
286 	if (error != 0) {
287 		if_printf(dev, "Query port media capability failed\n");
288 		goto done;
289 	}
290 	/* check for autoselect */
291 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
292 		link_mode = eth_proto_cap;
293 		if (link_mode == 0) {
294 			if_printf(dev, "Port media capability is zero\n");
295 			error = EINVAL;
296 			goto done;
297 		}
298 	} else {
299 		link_mode = link_mode & eth_proto_cap;
300 		if (link_mode == 0) {
301 			if_printf(dev, "Not supported link mode requested\n");
302 			error = EINVAL;
303 			goto done;
304 		}
305 	}
306 	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
307 		/* check if PFC is enabled */
308 		if (priv->params.rx_priority_flow_control ||
309 		    priv->params.tx_priority_flow_control) {
310 			if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
311 			error = EINVAL;
312 			goto done;
313 		}
314 	}
315 	/* update pauseframe control bits */
316 	priv->params.rx_pauseframe_control =
317 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
318 	priv->params.tx_pauseframe_control =
319 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
320 
321 	/* check if device is opened */
322 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
323 
324 	/* reconfigure the hardware */
325 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
326 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
327 	error = -mlx5e_set_port_pause_and_pfc(priv);
328 	if (was_opened)
329 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
330 
331 done:
332 	if (!locked)
333 		PRIV_UNLOCK(priv);
334 	return (error);
335 }
336 
337 static void
338 mlx5e_update_carrier_work(struct work_struct *work)
339 {
340 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
341 	    update_carrier_work);
342 
343 	PRIV_LOCK(priv);
344 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
345 		mlx5e_update_carrier(priv);
346 	PRIV_UNLOCK(priv);
347 }
348 
349 /*
350  * This function reads the physical port counters from the firmware
351  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
352  * macros. The output is converted from big-endian 64-bit values into
353  * host endian ones and stored in the "priv->stats.pport" structure.
354  */
355 static void
356 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
357 {
358 	struct mlx5_core_dev *mdev = priv->mdev;
359 	struct mlx5e_pport_stats *s = &priv->stats.pport;
360 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
361 	u32 *in;
362 	u32 *out;
363 	const u64 *ptr;
364 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
365 	unsigned x;
366 	unsigned y;
367 	unsigned z;
368 
369 	/* allocate firmware request structures */
370 	in = mlx5_vzalloc(sz);
371 	out = mlx5_vzalloc(sz);
372 	if (in == NULL || out == NULL)
373 		goto free_out;
374 
375 	/*
376 	 * Get pointer to the 64-bit counter set which is located at a
377 	 * fixed offset in the output firmware request structure:
378 	 */
379 	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
380 
381 	MLX5_SET(ppcnt_reg, in, local_port, 1);
382 
383 	/* read IEEE802_3 counter group using predefined counter layout */
384 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
385 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
386 	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
387 	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
388 		s->arg[y] = be64toh(ptr[x]);
389 
390 	/* read RFC2819 counter group using predefined counter layout */
391 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
392 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
393 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
394 		s->arg[y] = be64toh(ptr[x]);
395 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
396 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
397 		s_debug->arg[y] = be64toh(ptr[x]);
398 
399 	/* read RFC2863 counter group using predefined counter layout */
400 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
401 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
402 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
403 		s_debug->arg[y] = be64toh(ptr[x]);
404 
405 	/* read physical layer stats counter group using predefined counter layout */
406 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
407 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
408 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
409 		s_debug->arg[y] = be64toh(ptr[x]);
410 
411 	/* read per-priority counters */
412 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
413 
414 	/* iterate all the priorities */
415 	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
416 		MLX5_SET(ppcnt_reg, in, prio_tc, z);
417 		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
418 
419 		/* read per priority stats counter group using predefined counter layout */
420 		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
421 		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
422 			s->arg[y] = be64toh(ptr[x]);
423 	}
424 free_out:
425 	/* free firmware request structures */
426 	kvfree(in);
427 	kvfree(out);
428 }
429 
430 /*
431  * This function is called regularly to collect all statistics
432  * counters from the firmware. The values can be viewed through the
433  * sysctl interface. Execution is serialized using the priv's global
434  * configuration lock.
435  */
436 static void
437 mlx5e_update_stats_work(struct work_struct *work)
438 {
439 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
440 	    update_stats_work);
441 	struct mlx5_core_dev *mdev = priv->mdev;
442 	struct mlx5e_vport_stats *s = &priv->stats.vport;
443 	struct mlx5e_rq_stats *rq_stats;
444 	struct mlx5e_sq_stats *sq_stats;
445 	struct buf_ring *sq_br;
446 #if (__FreeBSD_version < 1100000)
447 	struct ifnet *ifp = priv->ifp;
448 #endif
449 
450 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
451 	u32 *out;
452 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
453 	u64 tso_packets = 0;
454 	u64 tso_bytes = 0;
455 	u64 tx_queue_dropped = 0;
456 	u64 tx_defragged = 0;
457 	u64 tx_offload_none = 0;
458 	u64 lro_packets = 0;
459 	u64 lro_bytes = 0;
460 	u64 sw_lro_queued = 0;
461 	u64 sw_lro_flushed = 0;
462 	u64 rx_csum_none = 0;
463 	u64 rx_wqe_err = 0;
464 	u32 rx_out_of_buffer = 0;
465 	int i;
466 	int j;
467 
468 	PRIV_LOCK(priv);
469 	out = mlx5_vzalloc(outlen);
470 	if (out == NULL)
471 		goto free_out;
472 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
473 		goto free_out;
474 
475 	/* Collect firts the SW counters and then HW for consistency */
476 	for (i = 0; i < priv->params.num_channels; i++) {
477 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
478 
479 		rq_stats = &priv->channel[i]->rq.stats;
480 
481 		/* collect stats from LRO */
482 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
483 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
484 		sw_lro_queued += rq_stats->sw_lro_queued;
485 		sw_lro_flushed += rq_stats->sw_lro_flushed;
486 		lro_packets += rq_stats->lro_packets;
487 		lro_bytes += rq_stats->lro_bytes;
488 		rx_csum_none += rq_stats->csum_none;
489 		rx_wqe_err += rq_stats->wqe_err;
490 
491 		for (j = 0; j < priv->num_tc; j++) {
492 			sq_stats = &priv->channel[i]->sq[j].stats;
493 			sq_br = priv->channel[i]->sq[j].br;
494 
495 			tso_packets += sq_stats->tso_packets;
496 			tso_bytes += sq_stats->tso_bytes;
497 			tx_queue_dropped += sq_stats->dropped;
498 			if (sq_br != NULL)
499 				tx_queue_dropped += sq_br->br_drops;
500 			tx_defragged += sq_stats->defragged;
501 			tx_offload_none += sq_stats->csum_offload_none;
502 		}
503 	}
504 
505 	/* update counters */
506 	s->tso_packets = tso_packets;
507 	s->tso_bytes = tso_bytes;
508 	s->tx_queue_dropped = tx_queue_dropped;
509 	s->tx_defragged = tx_defragged;
510 	s->lro_packets = lro_packets;
511 	s->lro_bytes = lro_bytes;
512 	s->sw_lro_queued = sw_lro_queued;
513 	s->sw_lro_flushed = sw_lro_flushed;
514 	s->rx_csum_none = rx_csum_none;
515 	s->rx_wqe_err = rx_wqe_err;
516 
517 	/* HW counters */
518 	memset(in, 0, sizeof(in));
519 
520 	MLX5_SET(query_vport_counter_in, in, opcode,
521 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
522 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
523 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
524 
525 	memset(out, 0, outlen);
526 
527 	/* get number of out-of-buffer drops first */
528 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
529 	    &rx_out_of_buffer))
530 		goto free_out;
531 
532 	/* accumulate difference into a 64-bit counter */
533 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
534 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
535 
536 	/* get port statistics */
537 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
538 		goto free_out;
539 
540 #define	MLX5_GET_CTR(out, x) \
541 	MLX5_GET64(query_vport_counter_out, out, x)
542 
543 	s->rx_error_packets =
544 	    MLX5_GET_CTR(out, received_errors.packets);
545 	s->rx_error_bytes =
546 	    MLX5_GET_CTR(out, received_errors.octets);
547 	s->tx_error_packets =
548 	    MLX5_GET_CTR(out, transmit_errors.packets);
549 	s->tx_error_bytes =
550 	    MLX5_GET_CTR(out, transmit_errors.octets);
551 
552 	s->rx_unicast_packets =
553 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
554 	s->rx_unicast_bytes =
555 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
556 	s->tx_unicast_packets =
557 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
558 	s->tx_unicast_bytes =
559 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
560 
561 	s->rx_multicast_packets =
562 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
563 	s->rx_multicast_bytes =
564 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
565 	s->tx_multicast_packets =
566 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
567 	s->tx_multicast_bytes =
568 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
569 
570 	s->rx_broadcast_packets =
571 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
572 	s->rx_broadcast_bytes =
573 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
574 	s->tx_broadcast_packets =
575 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
576 	s->tx_broadcast_bytes =
577 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
578 
579 	s->rx_packets =
580 	    s->rx_unicast_packets +
581 	    s->rx_multicast_packets +
582 	    s->rx_broadcast_packets -
583 	    s->rx_out_of_buffer;
584 	s->rx_bytes =
585 	    s->rx_unicast_bytes +
586 	    s->rx_multicast_bytes +
587 	    s->rx_broadcast_bytes;
588 	s->tx_packets =
589 	    s->tx_unicast_packets +
590 	    s->tx_multicast_packets +
591 	    s->tx_broadcast_packets;
592 	s->tx_bytes =
593 	    s->tx_unicast_bytes +
594 	    s->tx_multicast_bytes +
595 	    s->tx_broadcast_bytes;
596 
597 	/* Update calculated offload counters */
598 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
599 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
600 
601 	/* Get physical port counters */
602 	mlx5e_update_pport_counters(priv);
603 
604 #if (__FreeBSD_version < 1100000)
605 	/* no get_counters interface in fbsd 10 */
606 	ifp->if_ipackets = s->rx_packets;
607 	ifp->if_ierrors = s->rx_error_packets +
608 	    priv->stats.pport.alignment_err +
609 	    priv->stats.pport.check_seq_err +
610 	    priv->stats.pport.crc_align_errors +
611 	    priv->stats.pport.in_range_len_errors +
612 	    priv->stats.pport.jabbers +
613 	    priv->stats.pport.out_of_range_len +
614 	    priv->stats.pport.oversize_pkts +
615 	    priv->stats.pport.symbol_err +
616 	    priv->stats.pport.too_long_errors +
617 	    priv->stats.pport.undersize_pkts +
618 	    priv->stats.pport.unsupported_op_rx;
619 	ifp->if_iqdrops = s->rx_out_of_buffer +
620 	    priv->stats.pport.drop_events;
621 	ifp->if_opackets = s->tx_packets;
622 	ifp->if_oerrors = s->tx_error_packets;
623 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
624 	ifp->if_ibytes = s->rx_bytes;
625 	ifp->if_obytes = s->tx_bytes;
626 	ifp->if_collisions =
627 	    priv->stats.pport.collisions;
628 #endif
629 
630 free_out:
631 	kvfree(out);
632 
633 	/* Update diagnostics, if any */
634 	if (priv->params_ethtool.diag_pci_enable ||
635 	    priv->params_ethtool.diag_general_enable) {
636 		int error = mlx5_core_get_diagnostics_full(mdev,
637 		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
638 		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
639 		if (error != 0)
640 			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
641 	}
642 	PRIV_UNLOCK(priv);
643 }
644 
645 static void
646 mlx5e_update_stats(void *arg)
647 {
648 	struct mlx5e_priv *priv = arg;
649 
650 	queue_work(priv->wq, &priv->update_stats_work);
651 
652 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
653 }
654 
655 static void
656 mlx5e_async_event_sub(struct mlx5e_priv *priv,
657     enum mlx5_dev_event event)
658 {
659 	switch (event) {
660 	case MLX5_DEV_EVENT_PORT_UP:
661 	case MLX5_DEV_EVENT_PORT_DOWN:
662 		queue_work(priv->wq, &priv->update_carrier_work);
663 		break;
664 
665 	default:
666 		break;
667 	}
668 }
669 
670 static void
671 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
672     enum mlx5_dev_event event, unsigned long param)
673 {
674 	struct mlx5e_priv *priv = vpriv;
675 
676 	mtx_lock(&priv->async_events_mtx);
677 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
678 		mlx5e_async_event_sub(priv, event);
679 	mtx_unlock(&priv->async_events_mtx);
680 }
681 
682 static void
683 mlx5e_enable_async_events(struct mlx5e_priv *priv)
684 {
685 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
686 }
687 
688 static void
689 mlx5e_disable_async_events(struct mlx5e_priv *priv)
690 {
691 	mtx_lock(&priv->async_events_mtx);
692 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
693 	mtx_unlock(&priv->async_events_mtx);
694 }
695 
696 static void mlx5e_calibration_callout(void *arg);
697 static int mlx5e_calibration_duration = 20;
698 static int mlx5e_fast_calibration = 1;
699 static int mlx5e_normal_calibration = 30;
700 
701 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
702     "MLX5 timestamp calibration parameteres");
703 
704 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
705     &mlx5e_calibration_duration, 0,
706     "Duration of initial calibration");
707 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
708     &mlx5e_fast_calibration, 0,
709     "Recalibration interval during initial calibration");
710 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
711     &mlx5e_normal_calibration, 0,
712     "Recalibration interval during normal operations");
713 
714 /*
715  * Ignites the calibration process.
716  */
717 static void
718 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
719 {
720 
721 	if (priv->clbr_done == 0)
722 		mlx5e_calibration_callout(priv);
723 	else
724 		callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
725 		    mlx5e_calibration_duration ? mlx5e_fast_calibration :
726 		    mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
727 		    priv);
728 }
729 
730 static uint64_t
731 mlx5e_timespec2usec(const struct timespec *ts)
732 {
733 
734 	return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
735 }
736 
737 static uint64_t
738 mlx5e_hw_clock(struct mlx5e_priv *priv)
739 {
740 	struct mlx5_init_seg *iseg;
741 	uint32_t hw_h, hw_h1, hw_l;
742 
743 	iseg = priv->mdev->iseg;
744 	do {
745 		hw_h = ioread32be(&iseg->internal_timer_h);
746 		hw_l = ioread32be(&iseg->internal_timer_l);
747 		hw_h1 = ioread32be(&iseg->internal_timer_h);
748 	} while (hw_h1 != hw_h);
749 	return (((uint64_t)hw_h << 32) | hw_l);
750 }
751 
752 /*
753  * The calibration callout, it runs either in the context of the
754  * thread which enables calibration, or in callout.  It takes the
755  * snapshot of system and adapter clocks, then advances the pointers to
756  * the calibration point to allow rx path to read the consistent data
757  * lockless.
758  */
759 static void
760 mlx5e_calibration_callout(void *arg)
761 {
762 	struct mlx5e_priv *priv;
763 	struct mlx5e_clbr_point *next, *curr;
764 	struct timespec ts;
765 	int clbr_curr_next;
766 
767 	priv = arg;
768 	curr = &priv->clbr_points[priv->clbr_curr];
769 	clbr_curr_next = priv->clbr_curr + 1;
770 	if (clbr_curr_next >= nitems(priv->clbr_points))
771 		clbr_curr_next = 0;
772 	next = &priv->clbr_points[clbr_curr_next];
773 
774 	next->base_prev = curr->base_curr;
775 	next->clbr_hw_prev = curr->clbr_hw_curr;
776 
777 	next->clbr_hw_curr = mlx5e_hw_clock(priv);
778 	if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
779 	    0) {
780 		if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
781 		    "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
782 		priv->clbr_done = 0;
783 		return;
784 	}
785 
786 	nanouptime(&ts);
787 	next->base_curr = mlx5e_timespec2usec(&ts);
788 
789 	curr->clbr_gen = 0;
790 	atomic_thread_fence_rel();
791 	priv->clbr_curr = clbr_curr_next;
792 	atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
793 
794 	if (priv->clbr_done < mlx5e_calibration_duration)
795 		priv->clbr_done++;
796 	mlx5e_reset_calibration_callout(priv);
797 }
798 
799 static const char *mlx5e_rq_stats_desc[] = {
800 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
801 };
802 
803 static int
804 mlx5e_create_rq(struct mlx5e_channel *c,
805     struct mlx5e_rq_param *param,
806     struct mlx5e_rq *rq)
807 {
808 	struct mlx5e_priv *priv = c->priv;
809 	struct mlx5_core_dev *mdev = priv->mdev;
810 	char buffer[16];
811 	void *rqc = param->rqc;
812 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
813 	int wq_sz;
814 	int err;
815 	int i;
816 	u32 nsegs, wqe_sz;
817 
818 	err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
819 	if (err != 0)
820 		goto done;
821 
822 	/* Create DMA descriptor TAG */
823 	if ((err = -bus_dma_tag_create(
824 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
825 	    1,				/* any alignment */
826 	    0,				/* no boundary */
827 	    BUS_SPACE_MAXADDR,		/* lowaddr */
828 	    BUS_SPACE_MAXADDR,		/* highaddr */
829 	    NULL, NULL,			/* filter, filterarg */
830 	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsize */
831 	    nsegs,			/* nsegments */
832 	    nsegs * MLX5E_MAX_RX_BYTES,	/* maxsegsize */
833 	    0,				/* flags */
834 	    NULL, NULL,			/* lockfunc, lockfuncarg */
835 	    &rq->dma_tag)))
836 		goto done;
837 
838 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
839 	    &rq->wq_ctrl);
840 	if (err)
841 		goto err_free_dma_tag;
842 
843 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
844 
845 	err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
846 	if (err != 0)
847 		goto err_rq_wq_destroy;
848 
849 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
850 
851 	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
852 	if (err)
853 		goto err_rq_wq_destroy;
854 
855 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
856 	for (i = 0; i != wq_sz; i++) {
857 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
858 #if (MLX5E_MAX_RX_SEGS == 1)
859 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
860 #else
861 		int j;
862 #endif
863 
864 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
865 		if (err != 0) {
866 			while (i--)
867 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
868 			goto err_rq_mbuf_free;
869 		}
870 
871 		/* set value for constant fields */
872 #if (MLX5E_MAX_RX_SEGS == 1)
873 		wqe->data[0].lkey = c->mkey_be;
874 		wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
875 #else
876 		for (j = 0; j < rq->nsegs; j++)
877 			wqe->data[j].lkey = c->mkey_be;
878 #endif
879 	}
880 
881 	rq->ifp = c->ifp;
882 	rq->channel = c;
883 	rq->ix = c->ix;
884 
885 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
886 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
887 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
888 	    rq->stats.arg);
889 	return (0);
890 
891 err_rq_mbuf_free:
892 	free(rq->mbuf, M_MLX5EN);
893 	tcp_lro_free(&rq->lro);
894 err_rq_wq_destroy:
895 	mlx5_wq_destroy(&rq->wq_ctrl);
896 err_free_dma_tag:
897 	bus_dma_tag_destroy(rq->dma_tag);
898 done:
899 	return (err);
900 }
901 
902 static void
903 mlx5e_destroy_rq(struct mlx5e_rq *rq)
904 {
905 	int wq_sz;
906 	int i;
907 
908 	/* destroy all sysctl nodes */
909 	sysctl_ctx_free(&rq->stats.ctx);
910 
911 	/* free leftover LRO packets, if any */
912 	tcp_lro_free(&rq->lro);
913 
914 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
915 	for (i = 0; i != wq_sz; i++) {
916 		if (rq->mbuf[i].mbuf != NULL) {
917 			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
918 			m_freem(rq->mbuf[i].mbuf);
919 		}
920 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
921 	}
922 	free(rq->mbuf, M_MLX5EN);
923 	mlx5_wq_destroy(&rq->wq_ctrl);
924 }
925 
926 static int
927 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
928 {
929 	struct mlx5e_channel *c = rq->channel;
930 	struct mlx5e_priv *priv = c->priv;
931 	struct mlx5_core_dev *mdev = priv->mdev;
932 
933 	void *in;
934 	void *rqc;
935 	void *wq;
936 	int inlen;
937 	int err;
938 
939 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
940 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
941 	in = mlx5_vzalloc(inlen);
942 	if (in == NULL)
943 		return (-ENOMEM);
944 
945 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
946 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
947 
948 	memcpy(rqc, param->rqc, sizeof(param->rqc));
949 
950 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
951 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
952 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
953 	if (priv->counter_set_id >= 0)
954 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
955 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
956 	    PAGE_SHIFT);
957 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
958 
959 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
960 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
961 
962 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
963 
964 	kvfree(in);
965 
966 	return (err);
967 }
968 
969 static int
970 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
971 {
972 	struct mlx5e_channel *c = rq->channel;
973 	struct mlx5e_priv *priv = c->priv;
974 	struct mlx5_core_dev *mdev = priv->mdev;
975 
976 	void *in;
977 	void *rqc;
978 	int inlen;
979 	int err;
980 
981 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
982 	in = mlx5_vzalloc(inlen);
983 	if (in == NULL)
984 		return (-ENOMEM);
985 
986 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
987 
988 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
989 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
990 	MLX5_SET(rqc, rqc, state, next_state);
991 
992 	err = mlx5_core_modify_rq(mdev, in, inlen);
993 
994 	kvfree(in);
995 
996 	return (err);
997 }
998 
999 static void
1000 mlx5e_disable_rq(struct mlx5e_rq *rq)
1001 {
1002 	struct mlx5e_channel *c = rq->channel;
1003 	struct mlx5e_priv *priv = c->priv;
1004 	struct mlx5_core_dev *mdev = priv->mdev;
1005 
1006 	mlx5_core_destroy_rq(mdev, rq->rqn);
1007 }
1008 
1009 static int
1010 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
1011 {
1012 	struct mlx5e_channel *c = rq->channel;
1013 	struct mlx5e_priv *priv = c->priv;
1014 	struct mlx5_wq_ll *wq = &rq->wq;
1015 	int i;
1016 
1017 	for (i = 0; i < 1000; i++) {
1018 		if (wq->cur_sz >= priv->params.min_rx_wqes)
1019 			return (0);
1020 
1021 		msleep(4);
1022 	}
1023 	return (-ETIMEDOUT);
1024 }
1025 
1026 static int
1027 mlx5e_open_rq(struct mlx5e_channel *c,
1028     struct mlx5e_rq_param *param,
1029     struct mlx5e_rq *rq)
1030 {
1031 	int err;
1032 
1033 	err = mlx5e_create_rq(c, param, rq);
1034 	if (err)
1035 		return (err);
1036 
1037 	err = mlx5e_enable_rq(rq, param);
1038 	if (err)
1039 		goto err_destroy_rq;
1040 
1041 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1042 	if (err)
1043 		goto err_disable_rq;
1044 
1045 	c->rq.enabled = 1;
1046 
1047 	return (0);
1048 
1049 err_disable_rq:
1050 	mlx5e_disable_rq(rq);
1051 err_destroy_rq:
1052 	mlx5e_destroy_rq(rq);
1053 
1054 	return (err);
1055 }
1056 
1057 static void
1058 mlx5e_close_rq(struct mlx5e_rq *rq)
1059 {
1060 	mtx_lock(&rq->mtx);
1061 	rq->enabled = 0;
1062 	callout_stop(&rq->watchdog);
1063 	mtx_unlock(&rq->mtx);
1064 
1065 	callout_drain(&rq->watchdog);
1066 
1067 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1068 }
1069 
1070 static void
1071 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1072 {
1073 	struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1074 
1075 	/* wait till RQ is empty */
1076 	while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1077 	       (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1078 		msleep(4);
1079 		rq->cq.mcq.comp(&rq->cq.mcq);
1080 	}
1081 
1082 	mlx5e_disable_rq(rq);
1083 	mlx5e_destroy_rq(rq);
1084 }
1085 
1086 void
1087 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1088 {
1089 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1090 	int x;
1091 
1092 	for (x = 0; x != wq_sz; x++)
1093 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1094 	free(sq->mbuf, M_MLX5EN);
1095 }
1096 
1097 int
1098 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1099 {
1100 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1101 	int err;
1102 	int x;
1103 
1104 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1105 
1106 	/* Create DMA descriptor MAPs */
1107 	for (x = 0; x != wq_sz; x++) {
1108 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1109 		if (err != 0) {
1110 			while (x--)
1111 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1112 			free(sq->mbuf, M_MLX5EN);
1113 			return (err);
1114 		}
1115 	}
1116 	return (0);
1117 }
1118 
1119 static const char *mlx5e_sq_stats_desc[] = {
1120 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1121 };
1122 
1123 static int
1124 mlx5e_create_sq(struct mlx5e_channel *c,
1125     int tc,
1126     struct mlx5e_sq_param *param,
1127     struct mlx5e_sq *sq)
1128 {
1129 	struct mlx5e_priv *priv = c->priv;
1130 	struct mlx5_core_dev *mdev = priv->mdev;
1131 	char buffer[16];
1132 
1133 	void *sqc = param->sqc;
1134 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1135 #ifdef RSS
1136 	cpuset_t cpu_mask;
1137 	int cpu_id;
1138 #endif
1139 	int err;
1140 
1141 	/* Create DMA descriptor TAG */
1142 	if ((err = -bus_dma_tag_create(
1143 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1144 	    1,				/* any alignment */
1145 	    0,				/* no boundary */
1146 	    BUS_SPACE_MAXADDR,		/* lowaddr */
1147 	    BUS_SPACE_MAXADDR,		/* highaddr */
1148 	    NULL, NULL,			/* filter, filterarg */
1149 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1150 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1151 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1152 	    0,				/* flags */
1153 	    NULL, NULL,			/* lockfunc, lockfuncarg */
1154 	    &sq->dma_tag)))
1155 		goto done;
1156 
1157 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1158 	if (err)
1159 		goto err_free_dma_tag;
1160 
1161 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1162 	    &sq->wq_ctrl);
1163 	if (err)
1164 		goto err_unmap_free_uar;
1165 
1166 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1167 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1168 
1169 	err = mlx5e_alloc_sq_db(sq);
1170 	if (err)
1171 		goto err_sq_wq_destroy;
1172 
1173 	sq->mkey_be = c->mkey_be;
1174 	sq->ifp = priv->ifp;
1175 	sq->priv = priv;
1176 	sq->tc = tc;
1177 	sq->max_inline = priv->params.tx_max_inline;
1178 	sq->min_inline_mode = priv->params.tx_min_inline_mode;
1179 	sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
1180 
1181 	/* check if we should allocate a second packet buffer */
1182 	if (priv->params_ethtool.tx_bufring_disable == 0) {
1183 		sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1184 		    M_WAITOK, &sq->lock);
1185 		if (sq->br == NULL) {
1186 			if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1187 			    __func__);
1188 			err = -ENOMEM;
1189 			goto err_free_sq_db;
1190 		}
1191 
1192 		sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1193 		    taskqueue_thread_enqueue, &sq->sq_tq);
1194 		if (sq->sq_tq == NULL) {
1195 			if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1196 			    __func__);
1197 			err = -ENOMEM;
1198 			goto err_free_drbr;
1199 		}
1200 
1201 		TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1202 #ifdef RSS
1203 		cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1204 		CPU_SETOF(cpu_id, &cpu_mask);
1205 		taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1206 		    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1207 #else
1208 		taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1209 		    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1210 #endif
1211 	}
1212 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1213 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1214 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1215 	    sq->stats.arg);
1216 
1217 	return (0);
1218 
1219 err_free_drbr:
1220 	buf_ring_free(sq->br, M_MLX5EN);
1221 err_free_sq_db:
1222 	mlx5e_free_sq_db(sq);
1223 err_sq_wq_destroy:
1224 	mlx5_wq_destroy(&sq->wq_ctrl);
1225 
1226 err_unmap_free_uar:
1227 	mlx5_unmap_free_uar(mdev, &sq->uar);
1228 
1229 err_free_dma_tag:
1230 	bus_dma_tag_destroy(sq->dma_tag);
1231 done:
1232 	return (err);
1233 }
1234 
1235 static void
1236 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1237 {
1238 	/* destroy all sysctl nodes */
1239 	sysctl_ctx_free(&sq->stats.ctx);
1240 
1241 	mlx5e_free_sq_db(sq);
1242 	mlx5_wq_destroy(&sq->wq_ctrl);
1243 	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1244 	if (sq->sq_tq != NULL) {
1245 		taskqueue_drain(sq->sq_tq, &sq->sq_task);
1246 		taskqueue_free(sq->sq_tq);
1247 	}
1248 	if (sq->br != NULL)
1249 		buf_ring_free(sq->br, M_MLX5EN);
1250 }
1251 
1252 int
1253 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1254     int tis_num)
1255 {
1256 	void *in;
1257 	void *sqc;
1258 	void *wq;
1259 	int inlen;
1260 	int err;
1261 
1262 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1263 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1264 	in = mlx5_vzalloc(inlen);
1265 	if (in == NULL)
1266 		return (-ENOMEM);
1267 
1268 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1269 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1270 
1271 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1272 
1273 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1274 	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1275 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1276 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1277 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1278 
1279 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1280 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1281 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1282 	    PAGE_SHIFT);
1283 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1284 
1285 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1286 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1287 
1288 	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1289 
1290 	kvfree(in);
1291 
1292 	return (err);
1293 }
1294 
1295 int
1296 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1297 {
1298 	void *in;
1299 	void *sqc;
1300 	int inlen;
1301 	int err;
1302 
1303 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1304 	in = mlx5_vzalloc(inlen);
1305 	if (in == NULL)
1306 		return (-ENOMEM);
1307 
1308 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1309 
1310 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1311 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1312 	MLX5_SET(sqc, sqc, state, next_state);
1313 
1314 	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1315 
1316 	kvfree(in);
1317 
1318 	return (err);
1319 }
1320 
1321 void
1322 mlx5e_disable_sq(struct mlx5e_sq *sq)
1323 {
1324 
1325 	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1326 }
1327 
1328 static int
1329 mlx5e_open_sq(struct mlx5e_channel *c,
1330     int tc,
1331     struct mlx5e_sq_param *param,
1332     struct mlx5e_sq *sq)
1333 {
1334 	int err;
1335 
1336 	err = mlx5e_create_sq(c, tc, param, sq);
1337 	if (err)
1338 		return (err);
1339 
1340 	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1341 	if (err)
1342 		goto err_destroy_sq;
1343 
1344 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1345 	if (err)
1346 		goto err_disable_sq;
1347 
1348 	WRITE_ONCE(sq->queue_state, MLX5E_SQ_READY);
1349 
1350 	return (0);
1351 
1352 err_disable_sq:
1353 	mlx5e_disable_sq(sq);
1354 err_destroy_sq:
1355 	mlx5e_destroy_sq(sq);
1356 
1357 	return (err);
1358 }
1359 
1360 static void
1361 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1362 {
1363 	/* fill up remainder with NOPs */
1364 	while (sq->cev_counter != 0) {
1365 		while (!mlx5e_sq_has_room_for(sq, 1)) {
1366 			if (can_sleep != 0) {
1367 				mtx_unlock(&sq->lock);
1368 				msleep(4);
1369 				mtx_lock(&sq->lock);
1370 			} else {
1371 				goto done;
1372 			}
1373 		}
1374 		/* send a single NOP */
1375 		mlx5e_send_nop(sq, 1);
1376 		atomic_thread_fence_rel();
1377 	}
1378 done:
1379 	/* Check if we need to write the doorbell */
1380 	if (likely(sq->doorbell.d64 != 0)) {
1381 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1382 		sq->doorbell.d64 = 0;
1383 	}
1384 }
1385 
1386 void
1387 mlx5e_sq_cev_timeout(void *arg)
1388 {
1389 	struct mlx5e_sq *sq = arg;
1390 
1391 	mtx_assert(&sq->lock, MA_OWNED);
1392 
1393 	/* check next state */
1394 	switch (sq->cev_next_state) {
1395 	case MLX5E_CEV_STATE_SEND_NOPS:
1396 		/* fill TX ring with NOPs, if any */
1397 		mlx5e_sq_send_nops_locked(sq, 0);
1398 
1399 		/* check if completed */
1400 		if (sq->cev_counter == 0) {
1401 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1402 			return;
1403 		}
1404 		break;
1405 	default:
1406 		/* send NOPs on next timeout */
1407 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1408 		break;
1409 	}
1410 
1411 	/* restart timer */
1412 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1413 }
1414 
1415 void
1416 mlx5e_drain_sq(struct mlx5e_sq *sq)
1417 {
1418 	int error;
1419 	struct mlx5_core_dev *mdev= sq->priv->mdev;
1420 
1421 	/*
1422 	 * Check if already stopped.
1423 	 *
1424 	 * NOTE: The "stopped" variable is only written when both the
1425 	 * priv's configuration lock and the SQ's lock is locked. It
1426 	 * can therefore safely be read when only one of the two locks
1427 	 * is locked. This function is always called when the priv's
1428 	 * configuration lock is locked.
1429 	 */
1430 	if (sq->stopped != 0)
1431 		return;
1432 
1433 	mtx_lock(&sq->lock);
1434 
1435 	/* don't put more packets into the SQ */
1436 	sq->stopped = 1;
1437 
1438 	/* teardown event factor timer, if any */
1439 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1440 	callout_stop(&sq->cev_callout);
1441 
1442 	/* send dummy NOPs in order to flush the transmit ring */
1443 	mlx5e_sq_send_nops_locked(sq, 1);
1444 	mtx_unlock(&sq->lock);
1445 
1446 	/* make sure it is safe to free the callout */
1447 	callout_drain(&sq->cev_callout);
1448 
1449 	/* wait till SQ is empty or link is down */
1450 	mtx_lock(&sq->lock);
1451 	while (sq->cc != sq->pc &&
1452 	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1453 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1454 		mtx_unlock(&sq->lock);
1455 		msleep(1);
1456 		sq->cq.mcq.comp(&sq->cq.mcq);
1457 		mtx_lock(&sq->lock);
1458 	}
1459 	mtx_unlock(&sq->lock);
1460 
1461 	/* error out remaining requests */
1462 	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1463 	if (error != 0) {
1464 		if_printf(sq->ifp,
1465 		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1466 	}
1467 
1468 	/* wait till SQ is empty */
1469 	mtx_lock(&sq->lock);
1470 	while (sq->cc != sq->pc &&
1471 	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1472 		mtx_unlock(&sq->lock);
1473 		msleep(1);
1474 		sq->cq.mcq.comp(&sq->cq.mcq);
1475 		mtx_lock(&sq->lock);
1476 	}
1477 	mtx_unlock(&sq->lock);
1478 }
1479 
1480 static void
1481 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1482 {
1483 
1484 	mlx5e_drain_sq(sq);
1485 	mlx5e_disable_sq(sq);
1486 	mlx5e_destroy_sq(sq);
1487 }
1488 
1489 static int
1490 mlx5e_create_cq(struct mlx5e_priv *priv,
1491     struct mlx5e_cq_param *param,
1492     struct mlx5e_cq *cq,
1493     mlx5e_cq_comp_t *comp,
1494     int eq_ix)
1495 {
1496 	struct mlx5_core_dev *mdev = priv->mdev;
1497 	struct mlx5_core_cq *mcq = &cq->mcq;
1498 	int eqn_not_used;
1499 	int irqn;
1500 	int err;
1501 	u32 i;
1502 
1503 	param->wq.buf_numa_node = 0;
1504 	param->wq.db_numa_node = 0;
1505 
1506 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1507 	    &cq->wq_ctrl);
1508 	if (err)
1509 		return (err);
1510 
1511 	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1512 
1513 	mcq->cqe_sz = 64;
1514 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1515 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1516 	*mcq->set_ci_db = 0;
1517 	*mcq->arm_db = 0;
1518 	mcq->vector = eq_ix;
1519 	mcq->comp = comp;
1520 	mcq->event = mlx5e_cq_error_event;
1521 	mcq->irqn = irqn;
1522 	mcq->uar = &priv->cq_uar;
1523 
1524 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1525 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1526 
1527 		cqe->op_own = 0xf1;
1528 	}
1529 
1530 	cq->priv = priv;
1531 
1532 	return (0);
1533 }
1534 
1535 static void
1536 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1537 {
1538 	mlx5_wq_destroy(&cq->wq_ctrl);
1539 }
1540 
1541 static int
1542 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1543 {
1544 	struct mlx5_core_cq *mcq = &cq->mcq;
1545 	void *in;
1546 	void *cqc;
1547 	int inlen;
1548 	int irqn_not_used;
1549 	int eqn;
1550 	int err;
1551 
1552 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1553 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1554 	in = mlx5_vzalloc(inlen);
1555 	if (in == NULL)
1556 		return (-ENOMEM);
1557 
1558 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1559 
1560 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1561 
1562 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1563 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1564 
1565 	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1566 
1567 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1568 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1569 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1570 	    PAGE_SHIFT);
1571 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1572 
1573 	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1574 
1575 	kvfree(in);
1576 
1577 	if (err)
1578 		return (err);
1579 
1580 	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1581 
1582 	return (0);
1583 }
1584 
1585 static void
1586 mlx5e_disable_cq(struct mlx5e_cq *cq)
1587 {
1588 
1589 	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1590 }
1591 
1592 int
1593 mlx5e_open_cq(struct mlx5e_priv *priv,
1594     struct mlx5e_cq_param *param,
1595     struct mlx5e_cq *cq,
1596     mlx5e_cq_comp_t *comp,
1597     int eq_ix)
1598 {
1599 	int err;
1600 
1601 	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1602 	if (err)
1603 		return (err);
1604 
1605 	err = mlx5e_enable_cq(cq, param, eq_ix);
1606 	if (err)
1607 		goto err_destroy_cq;
1608 
1609 	return (0);
1610 
1611 err_destroy_cq:
1612 	mlx5e_destroy_cq(cq);
1613 
1614 	return (err);
1615 }
1616 
1617 void
1618 mlx5e_close_cq(struct mlx5e_cq *cq)
1619 {
1620 	mlx5e_disable_cq(cq);
1621 	mlx5e_destroy_cq(cq);
1622 }
1623 
1624 static int
1625 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1626     struct mlx5e_channel_param *cparam)
1627 {
1628 	int err;
1629 	int tc;
1630 
1631 	for (tc = 0; tc < c->num_tc; tc++) {
1632 		/* open completion queue */
1633 		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1634 		    &mlx5e_tx_cq_comp, c->ix);
1635 		if (err)
1636 			goto err_close_tx_cqs;
1637 	}
1638 	return (0);
1639 
1640 err_close_tx_cqs:
1641 	for (tc--; tc >= 0; tc--)
1642 		mlx5e_close_cq(&c->sq[tc].cq);
1643 
1644 	return (err);
1645 }
1646 
1647 static void
1648 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1649 {
1650 	int tc;
1651 
1652 	for (tc = 0; tc < c->num_tc; tc++)
1653 		mlx5e_close_cq(&c->sq[tc].cq);
1654 }
1655 
1656 static int
1657 mlx5e_open_sqs(struct mlx5e_channel *c,
1658     struct mlx5e_channel_param *cparam)
1659 {
1660 	int err;
1661 	int tc;
1662 
1663 	for (tc = 0; tc < c->num_tc; tc++) {
1664 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1665 		if (err)
1666 			goto err_close_sqs;
1667 	}
1668 
1669 	return (0);
1670 
1671 err_close_sqs:
1672 	for (tc--; tc >= 0; tc--)
1673 		mlx5e_close_sq_wait(&c->sq[tc]);
1674 
1675 	return (err);
1676 }
1677 
1678 static void
1679 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1680 {
1681 	int tc;
1682 
1683 	for (tc = 0; tc < c->num_tc; tc++)
1684 		mlx5e_close_sq_wait(&c->sq[tc]);
1685 }
1686 
1687 static void
1688 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1689 {
1690 	int tc;
1691 
1692 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1693 
1694 	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1695 
1696 	for (tc = 0; tc < c->num_tc; tc++) {
1697 		struct mlx5e_sq *sq = c->sq + tc;
1698 
1699 		mtx_init(&sq->lock, "mlx5tx",
1700 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1701 		mtx_init(&sq->comp_lock, "mlx5comp",
1702 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1703 
1704 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1705 
1706 		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1707 
1708 		/* ensure the TX completion event factor is not zero */
1709 		if (sq->cev_factor == 0)
1710 			sq->cev_factor = 1;
1711 	}
1712 }
1713 
1714 static void
1715 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1716 {
1717 	int tc;
1718 
1719 	mtx_destroy(&c->rq.mtx);
1720 
1721 	for (tc = 0; tc < c->num_tc; tc++) {
1722 		mtx_destroy(&c->sq[tc].lock);
1723 		mtx_destroy(&c->sq[tc].comp_lock);
1724 	}
1725 }
1726 
1727 static int
1728 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1729     struct mlx5e_channel_param *cparam,
1730     struct mlx5e_channel *volatile *cp)
1731 {
1732 	struct mlx5e_channel *c;
1733 	int err;
1734 
1735 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1736 	c->priv = priv;
1737 	c->ix = ix;
1738 	c->cpu = 0;
1739 	c->ifp = priv->ifp;
1740 	c->mkey_be = cpu_to_be32(priv->mr.key);
1741 	c->num_tc = priv->num_tc;
1742 
1743 	/* init mutexes */
1744 	mlx5e_chan_mtx_init(c);
1745 
1746 	/* open transmit completion queue */
1747 	err = mlx5e_open_tx_cqs(c, cparam);
1748 	if (err)
1749 		goto err_free;
1750 
1751 	/* open receive completion queue */
1752 	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1753 	    &mlx5e_rx_cq_comp, c->ix);
1754 	if (err)
1755 		goto err_close_tx_cqs;
1756 
1757 	err = mlx5e_open_sqs(c, cparam);
1758 	if (err)
1759 		goto err_close_rx_cq;
1760 
1761 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1762 	if (err)
1763 		goto err_close_sqs;
1764 
1765 	/* store channel pointer */
1766 	*cp = c;
1767 
1768 	/* poll receive queue initially */
1769 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1770 
1771 	return (0);
1772 
1773 err_close_sqs:
1774 	mlx5e_close_sqs_wait(c);
1775 
1776 err_close_rx_cq:
1777 	mlx5e_close_cq(&c->rq.cq);
1778 
1779 err_close_tx_cqs:
1780 	mlx5e_close_tx_cqs(c);
1781 
1782 err_free:
1783 	/* destroy mutexes */
1784 	mlx5e_chan_mtx_destroy(c);
1785 	free(c, M_MLX5EN);
1786 	return (err);
1787 }
1788 
1789 static void
1790 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1791 {
1792 	struct mlx5e_channel *c = *pp;
1793 
1794 	/* check if channel is already closed */
1795 	if (c == NULL)
1796 		return;
1797 	mlx5e_close_rq(&c->rq);
1798 }
1799 
1800 static void
1801 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1802 {
1803 	struct mlx5e_channel *c = *pp;
1804 
1805 	/* check if channel is already closed */
1806 	if (c == NULL)
1807 		return;
1808 	/* ensure channel pointer is no longer used */
1809 	*pp = NULL;
1810 
1811 	mlx5e_close_rq_wait(&c->rq);
1812 	mlx5e_close_sqs_wait(c);
1813 	mlx5e_close_cq(&c->rq.cq);
1814 	mlx5e_close_tx_cqs(c);
1815 	/* destroy mutexes */
1816 	mlx5e_chan_mtx_destroy(c);
1817 	free(c, M_MLX5EN);
1818 }
1819 
1820 static int
1821 mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
1822 {
1823 	u32 r, n;
1824 
1825 	r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
1826 	    MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
1827 	if (r > MJUM16BYTES)
1828 		return (-ENOMEM);
1829 
1830 	if (r > MJUM9BYTES)
1831 		r = MJUM16BYTES;
1832 	else if (r > MJUMPAGESIZE)
1833 		r = MJUM9BYTES;
1834 	else if (r > MCLBYTES)
1835 		r = MJUMPAGESIZE;
1836 	else
1837 		r = MCLBYTES;
1838 
1839 	/*
1840 	 * n + 1 must be a power of two, because stride size must be.
1841 	 * Stride size is 16 * (n + 1), as the first segment is
1842 	 * control.
1843 	 */
1844 	for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
1845 		;
1846 
1847 	*wqe_sz = r;
1848 	*nsegs = n;
1849 	return (0);
1850 }
1851 
1852 static void
1853 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1854     struct mlx5e_rq_param *param)
1855 {
1856 	void *rqc = param->rqc;
1857 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1858 	u32 wqe_sz, nsegs;
1859 
1860 	mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
1861 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1862 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1863 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
1864 	    nsegs * sizeof(struct mlx5_wqe_data_seg)));
1865 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1866 	MLX5_SET(wq, wq, pd, priv->pdn);
1867 
1868 	param->wq.buf_numa_node = 0;
1869 	param->wq.db_numa_node = 0;
1870 	param->wq.linear = 1;
1871 }
1872 
1873 static void
1874 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1875     struct mlx5e_sq_param *param)
1876 {
1877 	void *sqc = param->sqc;
1878 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1879 
1880 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1881 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1882 	MLX5_SET(wq, wq, pd, priv->pdn);
1883 
1884 	param->wq.buf_numa_node = 0;
1885 	param->wq.db_numa_node = 0;
1886 	param->wq.linear = 1;
1887 }
1888 
1889 static void
1890 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1891     struct mlx5e_cq_param *param)
1892 {
1893 	void *cqc = param->cqc;
1894 
1895 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1896 }
1897 
1898 static void
1899 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1900     struct mlx5e_cq_param *param)
1901 {
1902 	void *cqc = param->cqc;
1903 
1904 
1905 	/*
1906 	 * TODO The sysctl to control on/off is a bool value for now, which means
1907 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1908 	 */
1909 	if (priv->params.cqe_zipping_en) {
1910 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1911 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1912 	}
1913 
1914 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1915 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1916 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1917 
1918 	switch (priv->params.rx_cq_moderation_mode) {
1919 	case 0:
1920 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1921 		break;
1922 	default:
1923 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1924 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1925 		else
1926 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1927 		break;
1928 	}
1929 
1930 	mlx5e_build_common_cq_param(priv, param);
1931 }
1932 
1933 static void
1934 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1935     struct mlx5e_cq_param *param)
1936 {
1937 	void *cqc = param->cqc;
1938 
1939 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1940 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1941 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1942 
1943 	switch (priv->params.tx_cq_moderation_mode) {
1944 	case 0:
1945 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1946 		break;
1947 	default:
1948 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1949 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1950 		else
1951 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1952 		break;
1953 	}
1954 
1955 	mlx5e_build_common_cq_param(priv, param);
1956 }
1957 
1958 static void
1959 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1960     struct mlx5e_channel_param *cparam)
1961 {
1962 	memset(cparam, 0, sizeof(*cparam));
1963 
1964 	mlx5e_build_rq_param(priv, &cparam->rq);
1965 	mlx5e_build_sq_param(priv, &cparam->sq);
1966 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1967 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1968 }
1969 
1970 static int
1971 mlx5e_open_channels(struct mlx5e_priv *priv)
1972 {
1973 	struct mlx5e_channel_param cparam;
1974 	void *ptr;
1975 	int err;
1976 	int i;
1977 	int j;
1978 
1979 	priv->channel = malloc(priv->params.num_channels *
1980 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1981 
1982 	mlx5e_build_channel_param(priv, &cparam);
1983 	for (i = 0; i < priv->params.num_channels; i++) {
1984 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1985 		if (err)
1986 			goto err_close_channels;
1987 	}
1988 
1989 	for (j = 0; j < priv->params.num_channels; j++) {
1990 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1991 		if (err)
1992 			goto err_close_channels;
1993 	}
1994 
1995 	return (0);
1996 
1997 err_close_channels:
1998 	for (i--; i >= 0; i--) {
1999 		mlx5e_close_channel(&priv->channel[i]);
2000 		mlx5e_close_channel_wait(&priv->channel[i]);
2001 	}
2002 
2003 	/* remove "volatile" attribute from "channel" pointer */
2004 	ptr = __DECONST(void *, priv->channel);
2005 	priv->channel = NULL;
2006 
2007 	free(ptr, M_MLX5EN);
2008 
2009 	return (err);
2010 }
2011 
2012 static void
2013 mlx5e_close_channels(struct mlx5e_priv *priv)
2014 {
2015 	void *ptr;
2016 	int i;
2017 
2018 	if (priv->channel == NULL)
2019 		return;
2020 
2021 	for (i = 0; i < priv->params.num_channels; i++)
2022 		mlx5e_close_channel(&priv->channel[i]);
2023 	for (i = 0; i < priv->params.num_channels; i++)
2024 		mlx5e_close_channel_wait(&priv->channel[i]);
2025 
2026 	/* remove "volatile" attribute from "channel" pointer */
2027 	ptr = __DECONST(void *, priv->channel);
2028 	priv->channel = NULL;
2029 
2030 	free(ptr, M_MLX5EN);
2031 }
2032 
2033 static int
2034 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
2035 {
2036 
2037 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2038 		uint8_t cq_mode;
2039 
2040 		switch (priv->params.tx_cq_moderation_mode) {
2041 		case 0:
2042 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2043 			break;
2044 		default:
2045 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2046 			break;
2047 		}
2048 
2049 		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2050 		    priv->params.tx_cq_moderation_usec,
2051 		    priv->params.tx_cq_moderation_pkts,
2052 		    cq_mode));
2053 	}
2054 
2055 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2056 	    priv->params.tx_cq_moderation_usec,
2057 	    priv->params.tx_cq_moderation_pkts));
2058 }
2059 
2060 static int
2061 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2062 {
2063 
2064 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2065 		uint8_t cq_mode;
2066 		int retval;
2067 
2068 		switch (priv->params.rx_cq_moderation_mode) {
2069 		case 0:
2070 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2071 			break;
2072 		default:
2073 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2074 			break;
2075 		}
2076 
2077 		retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2078 		    priv->params.rx_cq_moderation_usec,
2079 		    priv->params.rx_cq_moderation_pkts,
2080 		    cq_mode);
2081 
2082 		return (retval);
2083 	}
2084 
2085 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2086 	    priv->params.rx_cq_moderation_usec,
2087 	    priv->params.rx_cq_moderation_pkts));
2088 }
2089 
2090 static int
2091 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2092 {
2093 	int err;
2094 	int i;
2095 
2096 	if (c == NULL)
2097 		return (EINVAL);
2098 
2099 	err = mlx5e_refresh_rq_params(priv, &c->rq);
2100 	if (err)
2101 		goto done;
2102 
2103 	for (i = 0; i != c->num_tc; i++) {
2104 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2105 		if (err)
2106 			goto done;
2107 	}
2108 done:
2109 	return (err);
2110 }
2111 
2112 int
2113 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2114 {
2115 	int i;
2116 
2117 	if (priv->channel == NULL)
2118 		return (EINVAL);
2119 
2120 	for (i = 0; i < priv->params.num_channels; i++) {
2121 		int err;
2122 
2123 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2124 		if (err)
2125 			return (err);
2126 	}
2127 	return (0);
2128 }
2129 
2130 static int
2131 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2132 {
2133 	struct mlx5_core_dev *mdev = priv->mdev;
2134 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2135 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2136 
2137 	memset(in, 0, sizeof(in));
2138 
2139 	MLX5_SET(tisc, tisc, prio, tc);
2140 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2141 
2142 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2143 }
2144 
2145 static void
2146 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2147 {
2148 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2149 }
2150 
2151 static int
2152 mlx5e_open_tises(struct mlx5e_priv *priv)
2153 {
2154 	int num_tc = priv->num_tc;
2155 	int err;
2156 	int tc;
2157 
2158 	for (tc = 0; tc < num_tc; tc++) {
2159 		err = mlx5e_open_tis(priv, tc);
2160 		if (err)
2161 			goto err_close_tises;
2162 	}
2163 
2164 	return (0);
2165 
2166 err_close_tises:
2167 	for (tc--; tc >= 0; tc--)
2168 		mlx5e_close_tis(priv, tc);
2169 
2170 	return (err);
2171 }
2172 
2173 static void
2174 mlx5e_close_tises(struct mlx5e_priv *priv)
2175 {
2176 	int num_tc = priv->num_tc;
2177 	int tc;
2178 
2179 	for (tc = 0; tc < num_tc; tc++)
2180 		mlx5e_close_tis(priv, tc);
2181 }
2182 
2183 static int
2184 mlx5e_open_rqt(struct mlx5e_priv *priv)
2185 {
2186 	struct mlx5_core_dev *mdev = priv->mdev;
2187 	u32 *in;
2188 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2189 	void *rqtc;
2190 	int inlen;
2191 	int err;
2192 	int sz;
2193 	int i;
2194 
2195 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2196 
2197 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2198 	in = mlx5_vzalloc(inlen);
2199 	if (in == NULL)
2200 		return (-ENOMEM);
2201 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2202 
2203 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2204 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2205 
2206 	for (i = 0; i < sz; i++) {
2207 		int ix = i;
2208 #ifdef RSS
2209 		ix = rss_get_indirection_to_bucket(ix);
2210 #endif
2211 		/* ensure we don't overflow */
2212 		ix %= priv->params.num_channels;
2213 
2214 		/* apply receive side scaling stride, if any */
2215 		ix -= ix % (int)priv->params.channels_rsss;
2216 
2217 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2218 	}
2219 
2220 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2221 
2222 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2223 	if (!err)
2224 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2225 
2226 	kvfree(in);
2227 
2228 	return (err);
2229 }
2230 
2231 static void
2232 mlx5e_close_rqt(struct mlx5e_priv *priv)
2233 {
2234 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2235 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2236 
2237 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2238 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2239 
2240 	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2241 }
2242 
2243 static void
2244 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2245 {
2246 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2247 	__be32 *hkey;
2248 
2249 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2250 
2251 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
2252 
2253 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2254 			  MLX5_HASH_FIELD_SEL_DST_IP)
2255 
2256 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2257 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2258 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2259 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2260 
2261 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2262 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2263 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2264 
2265 	if (priv->params.hw_lro_en) {
2266 		MLX5_SET(tirc, tirc, lro_enable_mask,
2267 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2268 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2269 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2270 		    (priv->params.lro_wqe_sz -
2271 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2272 		/* TODO: add the option to choose timer value dynamically */
2273 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2274 		    MLX5_CAP_ETH(priv->mdev,
2275 		    lro_timer_supported_periods[2]));
2276 	}
2277 
2278 	/* setup parameters for hashing TIR type, if any */
2279 	switch (tt) {
2280 	case MLX5E_TT_ANY:
2281 		MLX5_SET(tirc, tirc, disp_type,
2282 		    MLX5_TIRC_DISP_TYPE_DIRECT);
2283 		MLX5_SET(tirc, tirc, inline_rqn,
2284 		    priv->channel[0]->rq.rqn);
2285 		break;
2286 	default:
2287 		MLX5_SET(tirc, tirc, disp_type,
2288 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2289 		MLX5_SET(tirc, tirc, indirect_table,
2290 		    priv->rqtn);
2291 		MLX5_SET(tirc, tirc, rx_hash_fn,
2292 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2293 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2294 #ifdef RSS
2295 		/*
2296 		 * The FreeBSD RSS implementation does currently not
2297 		 * support symmetric Toeplitz hashes:
2298 		 */
2299 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2300 		rss_getkey((uint8_t *)hkey);
2301 #else
2302 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2303 		hkey[0] = cpu_to_be32(0xD181C62C);
2304 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2305 		hkey[2] = cpu_to_be32(0x1983A2FC);
2306 		hkey[3] = cpu_to_be32(0x943E1ADB);
2307 		hkey[4] = cpu_to_be32(0xD9389E6B);
2308 		hkey[5] = cpu_to_be32(0xD1039C2C);
2309 		hkey[6] = cpu_to_be32(0xA74499AD);
2310 		hkey[7] = cpu_to_be32(0x593D56D9);
2311 		hkey[8] = cpu_to_be32(0xF3253C06);
2312 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2313 #endif
2314 		break;
2315 	}
2316 
2317 	switch (tt) {
2318 	case MLX5E_TT_IPV4_TCP:
2319 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2320 		    MLX5_L3_PROT_TYPE_IPV4);
2321 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2322 		    MLX5_L4_PROT_TYPE_TCP);
2323 #ifdef RSS
2324 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2325 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2326 			    MLX5_HASH_IP);
2327 		} else
2328 #endif
2329 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2330 		    MLX5_HASH_ALL);
2331 		break;
2332 
2333 	case MLX5E_TT_IPV6_TCP:
2334 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2335 		    MLX5_L3_PROT_TYPE_IPV6);
2336 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2337 		    MLX5_L4_PROT_TYPE_TCP);
2338 #ifdef RSS
2339 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2340 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2341 			    MLX5_HASH_IP);
2342 		} else
2343 #endif
2344 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2345 		    MLX5_HASH_ALL);
2346 		break;
2347 
2348 	case MLX5E_TT_IPV4_UDP:
2349 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2350 		    MLX5_L3_PROT_TYPE_IPV4);
2351 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2352 		    MLX5_L4_PROT_TYPE_UDP);
2353 #ifdef RSS
2354 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2355 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2356 			    MLX5_HASH_IP);
2357 		} else
2358 #endif
2359 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2360 		    MLX5_HASH_ALL);
2361 		break;
2362 
2363 	case MLX5E_TT_IPV6_UDP:
2364 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2365 		    MLX5_L3_PROT_TYPE_IPV6);
2366 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2367 		    MLX5_L4_PROT_TYPE_UDP);
2368 #ifdef RSS
2369 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2370 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2371 			    MLX5_HASH_IP);
2372 		} else
2373 #endif
2374 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2375 		    MLX5_HASH_ALL);
2376 		break;
2377 
2378 	case MLX5E_TT_IPV4_IPSEC_AH:
2379 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2380 		    MLX5_L3_PROT_TYPE_IPV4);
2381 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2382 		    MLX5_HASH_IP_IPSEC_SPI);
2383 		break;
2384 
2385 	case MLX5E_TT_IPV6_IPSEC_AH:
2386 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2387 		    MLX5_L3_PROT_TYPE_IPV6);
2388 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2389 		    MLX5_HASH_IP_IPSEC_SPI);
2390 		break;
2391 
2392 	case MLX5E_TT_IPV4_IPSEC_ESP:
2393 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2394 		    MLX5_L3_PROT_TYPE_IPV4);
2395 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2396 		    MLX5_HASH_IP_IPSEC_SPI);
2397 		break;
2398 
2399 	case MLX5E_TT_IPV6_IPSEC_ESP:
2400 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2401 		    MLX5_L3_PROT_TYPE_IPV6);
2402 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2403 		    MLX5_HASH_IP_IPSEC_SPI);
2404 		break;
2405 
2406 	case MLX5E_TT_IPV4:
2407 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2408 		    MLX5_L3_PROT_TYPE_IPV4);
2409 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2410 		    MLX5_HASH_IP);
2411 		break;
2412 
2413 	case MLX5E_TT_IPV6:
2414 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2415 		    MLX5_L3_PROT_TYPE_IPV6);
2416 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2417 		    MLX5_HASH_IP);
2418 		break;
2419 
2420 	default:
2421 		break;
2422 	}
2423 }
2424 
2425 static int
2426 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2427 {
2428 	struct mlx5_core_dev *mdev = priv->mdev;
2429 	u32 *in;
2430 	void *tirc;
2431 	int inlen;
2432 	int err;
2433 
2434 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2435 	in = mlx5_vzalloc(inlen);
2436 	if (in == NULL)
2437 		return (-ENOMEM);
2438 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2439 
2440 	mlx5e_build_tir_ctx(priv, tirc, tt);
2441 
2442 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2443 
2444 	kvfree(in);
2445 
2446 	return (err);
2447 }
2448 
2449 static void
2450 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2451 {
2452 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2453 }
2454 
2455 static int
2456 mlx5e_open_tirs(struct mlx5e_priv *priv)
2457 {
2458 	int err;
2459 	int i;
2460 
2461 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2462 		err = mlx5e_open_tir(priv, i);
2463 		if (err)
2464 			goto err_close_tirs;
2465 	}
2466 
2467 	return (0);
2468 
2469 err_close_tirs:
2470 	for (i--; i >= 0; i--)
2471 		mlx5e_close_tir(priv, i);
2472 
2473 	return (err);
2474 }
2475 
2476 static void
2477 mlx5e_close_tirs(struct mlx5e_priv *priv)
2478 {
2479 	int i;
2480 
2481 	for (i = 0; i < MLX5E_NUM_TT; i++)
2482 		mlx5e_close_tir(priv, i);
2483 }
2484 
2485 /*
2486  * SW MTU does not include headers,
2487  * HW MTU includes all headers and checksums.
2488  */
2489 static int
2490 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2491 {
2492 	struct mlx5e_priv *priv = ifp->if_softc;
2493 	struct mlx5_core_dev *mdev = priv->mdev;
2494 	int hw_mtu;
2495 	int err;
2496 
2497 	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2498 
2499 	err = mlx5_set_port_mtu(mdev, hw_mtu);
2500 	if (err) {
2501 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2502 		    __func__, sw_mtu, err);
2503 		return (err);
2504 	}
2505 
2506 	/* Update vport context MTU */
2507 	err = mlx5_set_vport_mtu(mdev, hw_mtu);
2508 	if (err) {
2509 		if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2510 		    __func__, err);
2511 	}
2512 
2513 	ifp->if_mtu = sw_mtu;
2514 
2515 	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2516 	if (err || !hw_mtu) {
2517 		/* fallback to port oper mtu */
2518 		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2519 	}
2520 	if (err) {
2521 		if_printf(ifp, "Query port MTU, after setting new "
2522 		    "MTU value, failed\n");
2523 		return (err);
2524 	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2525 		err = -E2BIG,
2526 		if_printf(ifp, "Port MTU %d is smaller than "
2527                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2528 	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2529 		err = -EINVAL;
2530                 if_printf(ifp, "Port MTU %d is bigger than "
2531                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2532 	}
2533 	priv->params_ethtool.hw_mtu = hw_mtu;
2534 
2535 	return (err);
2536 }
2537 
2538 int
2539 mlx5e_open_locked(struct ifnet *ifp)
2540 {
2541 	struct mlx5e_priv *priv = ifp->if_softc;
2542 	int err;
2543 	u16 set_id;
2544 
2545 	/* check if already opened */
2546 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2547 		return (0);
2548 
2549 #ifdef RSS
2550 	if (rss_getnumbuckets() > priv->params.num_channels) {
2551 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2552 		    "channels(%u) available\n", rss_getnumbuckets(),
2553 		    priv->params.num_channels);
2554 	}
2555 #endif
2556 	err = mlx5e_open_tises(priv);
2557 	if (err) {
2558 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2559 		    __func__, err);
2560 		return (err);
2561 	}
2562 	err = mlx5_vport_alloc_q_counter(priv->mdev,
2563 	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2564 	if (err) {
2565 		if_printf(priv->ifp,
2566 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2567 		    __func__, err);
2568 		goto err_close_tises;
2569 	}
2570 	/* store counter set ID */
2571 	priv->counter_set_id = set_id;
2572 
2573 	err = mlx5e_open_channels(priv);
2574 	if (err) {
2575 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2576 		    __func__, err);
2577 		goto err_dalloc_q_counter;
2578 	}
2579 	err = mlx5e_open_rqt(priv);
2580 	if (err) {
2581 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2582 		    __func__, err);
2583 		goto err_close_channels;
2584 	}
2585 	err = mlx5e_open_tirs(priv);
2586 	if (err) {
2587 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2588 		    __func__, err);
2589 		goto err_close_rqls;
2590 	}
2591 	err = mlx5e_open_flow_table(priv);
2592 	if (err) {
2593 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2594 		    __func__, err);
2595 		goto err_close_tirs;
2596 	}
2597 	err = mlx5e_add_all_vlan_rules(priv);
2598 	if (err) {
2599 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2600 		    __func__, err);
2601 		goto err_close_flow_table;
2602 	}
2603 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2604 
2605 	mlx5e_update_carrier(priv);
2606 	mlx5e_set_rx_mode_core(priv);
2607 
2608 	return (0);
2609 
2610 err_close_flow_table:
2611 	mlx5e_close_flow_table(priv);
2612 
2613 err_close_tirs:
2614 	mlx5e_close_tirs(priv);
2615 
2616 err_close_rqls:
2617 	mlx5e_close_rqt(priv);
2618 
2619 err_close_channels:
2620 	mlx5e_close_channels(priv);
2621 
2622 err_dalloc_q_counter:
2623 	mlx5_vport_dealloc_q_counter(priv->mdev,
2624 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2625 
2626 err_close_tises:
2627 	mlx5e_close_tises(priv);
2628 
2629 	return (err);
2630 }
2631 
2632 static void
2633 mlx5e_open(void *arg)
2634 {
2635 	struct mlx5e_priv *priv = arg;
2636 
2637 	PRIV_LOCK(priv);
2638 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2639 		if_printf(priv->ifp,
2640 		    "%s: Setting port status to up failed\n",
2641 		    __func__);
2642 
2643 	mlx5e_open_locked(priv->ifp);
2644 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2645 	PRIV_UNLOCK(priv);
2646 }
2647 
2648 int
2649 mlx5e_close_locked(struct ifnet *ifp)
2650 {
2651 	struct mlx5e_priv *priv = ifp->if_softc;
2652 
2653 	/* check if already closed */
2654 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2655 		return (0);
2656 
2657 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2658 
2659 	mlx5e_set_rx_mode_core(priv);
2660 	mlx5e_del_all_vlan_rules(priv);
2661 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2662 	mlx5e_close_flow_table(priv);
2663 	mlx5e_close_tirs(priv);
2664 	mlx5e_close_rqt(priv);
2665 	mlx5e_close_channels(priv);
2666 	mlx5_vport_dealloc_q_counter(priv->mdev,
2667 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2668 	mlx5e_close_tises(priv);
2669 
2670 	return (0);
2671 }
2672 
2673 #if (__FreeBSD_version >= 1100000)
2674 static uint64_t
2675 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2676 {
2677 	struct mlx5e_priv *priv = ifp->if_softc;
2678 	u64 retval;
2679 
2680 	/* PRIV_LOCK(priv); XXX not allowed */
2681 	switch (cnt) {
2682 	case IFCOUNTER_IPACKETS:
2683 		retval = priv->stats.vport.rx_packets;
2684 		break;
2685 	case IFCOUNTER_IERRORS:
2686 		retval = priv->stats.vport.rx_error_packets +
2687 		    priv->stats.pport.alignment_err +
2688 		    priv->stats.pport.check_seq_err +
2689 		    priv->stats.pport.crc_align_errors +
2690 		    priv->stats.pport.in_range_len_errors +
2691 		    priv->stats.pport.jabbers +
2692 		    priv->stats.pport.out_of_range_len +
2693 		    priv->stats.pport.oversize_pkts +
2694 		    priv->stats.pport.symbol_err +
2695 		    priv->stats.pport.too_long_errors +
2696 		    priv->stats.pport.undersize_pkts +
2697 		    priv->stats.pport.unsupported_op_rx;
2698 		break;
2699 	case IFCOUNTER_IQDROPS:
2700 		retval = priv->stats.vport.rx_out_of_buffer +
2701 		    priv->stats.pport.drop_events;
2702 		break;
2703 	case IFCOUNTER_OPACKETS:
2704 		retval = priv->stats.vport.tx_packets;
2705 		break;
2706 	case IFCOUNTER_OERRORS:
2707 		retval = priv->stats.vport.tx_error_packets;
2708 		break;
2709 	case IFCOUNTER_IBYTES:
2710 		retval = priv->stats.vport.rx_bytes;
2711 		break;
2712 	case IFCOUNTER_OBYTES:
2713 		retval = priv->stats.vport.tx_bytes;
2714 		break;
2715 	case IFCOUNTER_IMCASTS:
2716 		retval = priv->stats.vport.rx_multicast_packets;
2717 		break;
2718 	case IFCOUNTER_OMCASTS:
2719 		retval = priv->stats.vport.tx_multicast_packets;
2720 		break;
2721 	case IFCOUNTER_OQDROPS:
2722 		retval = priv->stats.vport.tx_queue_dropped;
2723 		break;
2724 	case IFCOUNTER_COLLISIONS:
2725 		retval = priv->stats.pport.collisions;
2726 		break;
2727 	default:
2728 		retval = if_get_counter_default(ifp, cnt);
2729 		break;
2730 	}
2731 	/* PRIV_UNLOCK(priv); XXX not allowed */
2732 	return (retval);
2733 }
2734 #endif
2735 
2736 static void
2737 mlx5e_set_rx_mode(struct ifnet *ifp)
2738 {
2739 	struct mlx5e_priv *priv = ifp->if_softc;
2740 
2741 	queue_work(priv->wq, &priv->set_rx_mode_work);
2742 }
2743 
2744 static int
2745 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2746 {
2747 	struct mlx5e_priv *priv;
2748 	struct ifreq *ifr;
2749 	struct ifi2creq i2c;
2750 	int error = 0;
2751 	int mask = 0;
2752 	int size_read = 0;
2753 	int module_status;
2754 	int module_num;
2755 	int max_mtu;
2756 	uint8_t read_addr;
2757 
2758 	priv = ifp->if_softc;
2759 
2760 	/* check if detaching */
2761 	if (priv == NULL || priv->gone != 0)
2762 		return (ENXIO);
2763 
2764 	switch (command) {
2765 	case SIOCSIFMTU:
2766 		ifr = (struct ifreq *)data;
2767 
2768 		PRIV_LOCK(priv);
2769 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2770 
2771 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2772 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2773 			int was_opened;
2774 
2775 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2776 			if (was_opened)
2777 				mlx5e_close_locked(ifp);
2778 
2779 			/* set new MTU */
2780 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2781 
2782 			if (was_opened)
2783 				mlx5e_open_locked(ifp);
2784 		} else {
2785 			error = EINVAL;
2786 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2787 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2788 		}
2789 		PRIV_UNLOCK(priv);
2790 		break;
2791 	case SIOCSIFFLAGS:
2792 		if ((ifp->if_flags & IFF_UP) &&
2793 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2794 			mlx5e_set_rx_mode(ifp);
2795 			break;
2796 		}
2797 		PRIV_LOCK(priv);
2798 		if (ifp->if_flags & IFF_UP) {
2799 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2800 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2801 					mlx5e_open_locked(ifp);
2802 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2803 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2804 			}
2805 		} else {
2806 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2807 				mlx5_set_port_status(priv->mdev,
2808 				    MLX5_PORT_DOWN);
2809 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2810 					mlx5e_close_locked(ifp);
2811 				mlx5e_update_carrier(priv);
2812 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2813 			}
2814 		}
2815 		PRIV_UNLOCK(priv);
2816 		break;
2817 	case SIOCADDMULTI:
2818 	case SIOCDELMULTI:
2819 		mlx5e_set_rx_mode(ifp);
2820 		break;
2821 	case SIOCSIFMEDIA:
2822 	case SIOCGIFMEDIA:
2823 	case SIOCGIFXMEDIA:
2824 		ifr = (struct ifreq *)data;
2825 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2826 		break;
2827 	case SIOCSIFCAP:
2828 		ifr = (struct ifreq *)data;
2829 		PRIV_LOCK(priv);
2830 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2831 
2832 		if (mask & IFCAP_TXCSUM) {
2833 			ifp->if_capenable ^= IFCAP_TXCSUM;
2834 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2835 
2836 			if (IFCAP_TSO4 & ifp->if_capenable &&
2837 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2838 				ifp->if_capenable &= ~IFCAP_TSO4;
2839 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2840 				if_printf(ifp,
2841 				    "tso4 disabled due to -txcsum.\n");
2842 			}
2843 		}
2844 		if (mask & IFCAP_TXCSUM_IPV6) {
2845 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2846 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2847 
2848 			if (IFCAP_TSO6 & ifp->if_capenable &&
2849 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2850 				ifp->if_capenable &= ~IFCAP_TSO6;
2851 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2852 				if_printf(ifp,
2853 				    "tso6 disabled due to -txcsum6.\n");
2854 			}
2855 		}
2856 		if (mask & IFCAP_RXCSUM)
2857 			ifp->if_capenable ^= IFCAP_RXCSUM;
2858 		if (mask & IFCAP_RXCSUM_IPV6)
2859 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2860 		if (mask & IFCAP_TSO4) {
2861 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2862 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2863 				if_printf(ifp, "enable txcsum first.\n");
2864 				error = EAGAIN;
2865 				goto out;
2866 			}
2867 			ifp->if_capenable ^= IFCAP_TSO4;
2868 			ifp->if_hwassist ^= CSUM_IP_TSO;
2869 		}
2870 		if (mask & IFCAP_TSO6) {
2871 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2872 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2873 				if_printf(ifp, "enable txcsum6 first.\n");
2874 				error = EAGAIN;
2875 				goto out;
2876 			}
2877 			ifp->if_capenable ^= IFCAP_TSO6;
2878 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2879 		}
2880 		if (mask & IFCAP_VLAN_HWFILTER) {
2881 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2882 				mlx5e_disable_vlan_filter(priv);
2883 			else
2884 				mlx5e_enable_vlan_filter(priv);
2885 
2886 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2887 		}
2888 		if (mask & IFCAP_VLAN_HWTAGGING)
2889 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2890 		if (mask & IFCAP_WOL_MAGIC)
2891 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2892 
2893 		VLAN_CAPABILITIES(ifp);
2894 		/* turn off LRO means also turn of HW LRO - if it's on */
2895 		if (mask & IFCAP_LRO) {
2896 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2897 			bool need_restart = false;
2898 
2899 			ifp->if_capenable ^= IFCAP_LRO;
2900 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2901 				if (priv->params.hw_lro_en) {
2902 					priv->params.hw_lro_en = false;
2903 					need_restart = true;
2904 					/* Not sure this is the correct way */
2905 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2906 				}
2907 			}
2908 			if (was_opened && need_restart) {
2909 				mlx5e_close_locked(ifp);
2910 				mlx5e_open_locked(ifp);
2911 			}
2912 		}
2913 		if (mask & IFCAP_HWRXTSTMP) {
2914 			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2915 			if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2916 				if (priv->clbr_done == 0)
2917 					mlx5e_reset_calibration_callout(priv);
2918 			} else {
2919 				callout_drain(&priv->tstmp_clbr);
2920 				priv->clbr_done = 0;
2921 			}
2922 		}
2923 out:
2924 		PRIV_UNLOCK(priv);
2925 		break;
2926 
2927 	case SIOCGI2C:
2928 		ifr = (struct ifreq *)data;
2929 
2930 		/*
2931 		 * Copy from the user-space address ifr_data to the
2932 		 * kernel-space address i2c
2933 		 */
2934 		error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2935 		if (error)
2936 			break;
2937 
2938 		if (i2c.len > sizeof(i2c.data)) {
2939 			error = EINVAL;
2940 			break;
2941 		}
2942 
2943 		PRIV_LOCK(priv);
2944 		/* Get module_num which is required for the query_eeprom */
2945 		error = mlx5_query_module_num(priv->mdev, &module_num);
2946 		if (error) {
2947 			if_printf(ifp, "Query module num failed, eeprom "
2948 			    "reading is not supported\n");
2949 			error = EINVAL;
2950 			goto err_i2c;
2951 		}
2952 		/* Check if module is present before doing an access */
2953 		module_status = mlx5_query_module_status(priv->mdev, module_num);
2954 		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2955 		    module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2956 			error = EINVAL;
2957 			goto err_i2c;
2958 		}
2959 		/*
2960 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2961 		 * The internal conversion is as follows:
2962 		 */
2963 		if (i2c.dev_addr == 0xA0)
2964 			read_addr = MLX5E_I2C_ADDR_LOW;
2965 		else if (i2c.dev_addr == 0xA2)
2966 			read_addr = MLX5E_I2C_ADDR_HIGH;
2967 		else {
2968 			if_printf(ifp, "Query eeprom failed, "
2969 			    "Invalid Address: %X\n", i2c.dev_addr);
2970 			error = EINVAL;
2971 			goto err_i2c;
2972 		}
2973 		error = mlx5_query_eeprom(priv->mdev,
2974 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2975 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2976 		    (uint32_t *)i2c.data, &size_read);
2977 		if (error) {
2978 			if_printf(ifp, "Query eeprom failed, eeprom "
2979 			    "reading is not supported\n");
2980 			error = EINVAL;
2981 			goto err_i2c;
2982 		}
2983 
2984 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2985 			error = mlx5_query_eeprom(priv->mdev,
2986 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2987 			    (uint32_t)(i2c.offset + size_read),
2988 			    (uint32_t)(i2c.len - size_read), module_num,
2989 			    (uint32_t *)(i2c.data + size_read), &size_read);
2990 		}
2991 		if (error) {
2992 			if_printf(ifp, "Query eeprom failed, eeprom "
2993 			    "reading is not supported\n");
2994 			error = EINVAL;
2995 			goto err_i2c;
2996 		}
2997 
2998 		error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2999 err_i2c:
3000 		PRIV_UNLOCK(priv);
3001 		break;
3002 
3003 	default:
3004 		error = ether_ioctl(ifp, command, data);
3005 		break;
3006 	}
3007 	return (error);
3008 }
3009 
3010 static int
3011 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
3012 {
3013 	/*
3014 	 * TODO: uncoment once FW really sets all these bits if
3015 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
3016 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
3017 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
3018 	 * -ENOTSUPP;
3019 	 */
3020 
3021 	/* TODO: add more must-to-have features */
3022 
3023 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
3024 		return (-ENODEV);
3025 
3026 	return (0);
3027 }
3028 
3029 static u16
3030 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
3031 {
3032 	int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
3033 
3034 	return bf_buf_size -
3035 	       sizeof(struct mlx5e_tx_wqe) +
3036 	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
3037 }
3038 
3039 static void
3040 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
3041     struct mlx5e_priv *priv,
3042     int num_comp_vectors)
3043 {
3044 	/*
3045 	 * TODO: Consider link speed for setting "log_sq_size",
3046 	 * "log_rq_size" and "cq_moderation_xxx":
3047 	 */
3048 	priv->params.log_sq_size =
3049 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
3050 	priv->params.log_rq_size =
3051 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
3052 	priv->params.rx_cq_moderation_usec =
3053 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
3054 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
3055 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
3056 	priv->params.rx_cq_moderation_mode =
3057 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3058 	priv->params.rx_cq_moderation_pkts =
3059 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3060 	priv->params.tx_cq_moderation_usec =
3061 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3062 	priv->params.tx_cq_moderation_pkts =
3063 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3064 	priv->params.min_rx_wqes =
3065 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3066 	priv->params.rx_hash_log_tbl_sz =
3067 	    (order_base_2(num_comp_vectors) >
3068 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3069 	    order_base_2(num_comp_vectors) :
3070 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3071 	priv->params.num_tc = 1;
3072 	priv->params.default_vlan_prio = 0;
3073 	priv->counter_set_id = -1;
3074 	priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
3075 	mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
3076 
3077 	/*
3078 	 * hw lro is currently defaulted to off. when it won't anymore we
3079 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3080 	 */
3081 	priv->params.hw_lro_en = false;
3082 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3083 
3084 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3085 
3086 	priv->mdev = mdev;
3087 	priv->params.num_channels = num_comp_vectors;
3088 	priv->params.channels_rsss = 1;
3089 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3090 	priv->queue_mapping_channel_mask =
3091 	    roundup_pow_of_two(num_comp_vectors) - 1;
3092 	priv->num_tc = priv->params.num_tc;
3093 	priv->default_vlan_prio = priv->params.default_vlan_prio;
3094 
3095 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3096 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3097 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3098 }
3099 
3100 static int
3101 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3102 		  struct mlx5_core_mr *mkey)
3103 {
3104 	struct ifnet *ifp = priv->ifp;
3105 	struct mlx5_core_dev *mdev = priv->mdev;
3106 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3107 	void *mkc;
3108 	u32 *in;
3109 	int err;
3110 
3111 	in = mlx5_vzalloc(inlen);
3112 	if (in == NULL) {
3113 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3114 		return (-ENOMEM);
3115 	}
3116 
3117 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3118 	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3119 	MLX5_SET(mkc, mkc, lw, 1);
3120 	MLX5_SET(mkc, mkc, lr, 1);
3121 
3122 	MLX5_SET(mkc, mkc, pd, pdn);
3123 	MLX5_SET(mkc, mkc, length64, 1);
3124 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
3125 
3126 	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3127 	if (err)
3128 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3129 		    __func__, err);
3130 
3131 	kvfree(in);
3132 	return (err);
3133 }
3134 
3135 static const char *mlx5e_vport_stats_desc[] = {
3136 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3137 };
3138 
3139 static const char *mlx5e_pport_stats_desc[] = {
3140 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3141 };
3142 
3143 static void
3144 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3145 {
3146 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3147 	sx_init(&priv->state_lock, "mlx5state");
3148 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3149 	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3150 }
3151 
3152 static void
3153 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3154 {
3155 	mtx_destroy(&priv->async_events_mtx);
3156 	sx_destroy(&priv->state_lock);
3157 }
3158 
3159 static int
3160 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3161 {
3162 	/*
3163 	 * %d.%d%.d the string format.
3164 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3165 	 * We need at most 5 chars to store that.
3166 	 * It also has: two "." and NULL at the end, which means we need 18
3167 	 * (5*3 + 3) chars at most.
3168 	 */
3169 	char fw[18];
3170 	struct mlx5e_priv *priv = arg1;
3171 	int error;
3172 
3173 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3174 	    fw_rev_sub(priv->mdev));
3175 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3176 	return (error);
3177 }
3178 
3179 static void
3180 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3181 {
3182 	int i;
3183 
3184 	for (i = 0; i < ch->num_tc; i++)
3185 		mlx5e_drain_sq(&ch->sq[i]);
3186 }
3187 
3188 static void
3189 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3190 {
3191 
3192 	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3193 	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3194 	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3195 	sq->doorbell.d64 = 0;
3196 }
3197 
3198 void
3199 mlx5e_resume_sq(struct mlx5e_sq *sq)
3200 {
3201 	int err;
3202 
3203 	/* check if already enabled */
3204 	if (sq->stopped == 0)
3205 		return;
3206 
3207 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3208 	    MLX5_SQC_STATE_RST);
3209 	if (err != 0) {
3210 		if_printf(sq->ifp,
3211 		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3212 	}
3213 
3214 	sq->cc = 0;
3215 	sq->pc = 0;
3216 
3217 	/* reset doorbell prior to moving from RST to RDY */
3218 	mlx5e_reset_sq_doorbell_record(sq);
3219 
3220 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3221 	    MLX5_SQC_STATE_RDY);
3222 	if (err != 0) {
3223 		if_printf(sq->ifp,
3224 		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3225 	}
3226 
3227 	mtx_lock(&sq->lock);
3228 	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3229 	sq->stopped = 0;
3230 	mtx_unlock(&sq->lock);
3231 
3232 }
3233 
3234 static void
3235 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3236 {
3237         int i;
3238 
3239 	for (i = 0; i < ch->num_tc; i++)
3240 		mlx5e_resume_sq(&ch->sq[i]);
3241 }
3242 
3243 static void
3244 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3245 {
3246 	struct mlx5e_rq *rq = &ch->rq;
3247 	int err;
3248 
3249 	mtx_lock(&rq->mtx);
3250 	rq->enabled = 0;
3251 	callout_stop(&rq->watchdog);
3252 	mtx_unlock(&rq->mtx);
3253 
3254 	callout_drain(&rq->watchdog);
3255 
3256 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3257 	if (err != 0) {
3258 		if_printf(rq->ifp,
3259 		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3260 	}
3261 
3262 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3263 		msleep(1);
3264 		rq->cq.mcq.comp(&rq->cq.mcq);
3265 	}
3266 
3267 	/*
3268 	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3269 	 * thus reducing the recv queue flushing time
3270 	 */
3271 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3272 	if (err != 0) {
3273 		if_printf(rq->ifp,
3274 		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3275 	}
3276 }
3277 
3278 static void
3279 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3280 {
3281 	struct mlx5e_rq *rq = &ch->rq;
3282 	int err;
3283 
3284 	rq->wq.wqe_ctr = 0;
3285 	mlx5_wq_ll_update_db_record(&rq->wq);
3286 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3287 	if (err != 0) {
3288 		if_printf(rq->ifp,
3289 		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3290         }
3291 
3292 	rq->enabled = 1;
3293 
3294 	rq->cq.mcq.comp(&rq->cq.mcq);
3295 }
3296 
3297 void
3298 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3299 {
3300 	int i;
3301 
3302 	if (priv->channel == NULL)
3303 		return;
3304 
3305 	for (i = 0; i < priv->params.num_channels; i++) {
3306 
3307 		if (!priv->channel[i])
3308 			continue;
3309 
3310 		if (value)
3311 			mlx5e_disable_tx_dma(priv->channel[i]);
3312 		else
3313 			mlx5e_enable_tx_dma(priv->channel[i]);
3314 	}
3315 }
3316 
3317 void
3318 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3319 {
3320 	int i;
3321 
3322 	if (priv->channel == NULL)
3323 		return;
3324 
3325 	for (i = 0; i < priv->params.num_channels; i++) {
3326 
3327 		if (!priv->channel[i])
3328 			continue;
3329 
3330 		if (value)
3331 			mlx5e_disable_rx_dma(priv->channel[i]);
3332 		else
3333 			mlx5e_enable_rx_dma(priv->channel[i]);
3334 	}
3335 }
3336 
3337 u8
3338 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
3339 {
3340 	u8 min_inline_mode;
3341 
3342 	min_inline_mode = MLX5_INLINE_MODE_L2;
3343 	mlx5_query_min_inline(mdev, &min_inline_mode);
3344 	if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
3345 	    !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
3346 		min_inline_mode = MLX5_INLINE_MODE_L2;
3347 
3348 	return (min_inline_mode);
3349 }
3350 
3351 static void
3352 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3353 {
3354 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3355 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3356 	    sysctl_firmware, "A", "HCA firmware version");
3357 
3358 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3359 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3360 	    "Board ID");
3361 }
3362 
3363 static int
3364 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3365 {
3366 	struct mlx5e_priv *priv = arg1;
3367 	uint32_t tx_pfc;
3368 	uint32_t value;
3369 	int error;
3370 
3371 	PRIV_LOCK(priv);
3372 
3373 	tx_pfc = priv->params.tx_priority_flow_control;
3374 
3375 	/* get current value */
3376 	value = (tx_pfc >> arg2) & 1;
3377 
3378 	error = sysctl_handle_32(oidp, &value, 0, req);
3379 
3380 	/* range check value */
3381 	if (value != 0)
3382 		priv->params.tx_priority_flow_control |= (1 << arg2);
3383 	else
3384 		priv->params.tx_priority_flow_control &= ~(1 << arg2);
3385 
3386 	/* check if update is required */
3387 	if (error == 0 && priv->gone == 0 &&
3388 	    tx_pfc != priv->params.tx_priority_flow_control) {
3389 		error = -mlx5e_set_port_pfc(priv);
3390 		/* restore previous value */
3391 		if (error != 0)
3392 			priv->params.tx_priority_flow_control= tx_pfc;
3393 	}
3394 	PRIV_UNLOCK(priv);
3395 
3396 	return (error);
3397 }
3398 
3399 static int
3400 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3401 {
3402 	struct mlx5e_priv *priv = arg1;
3403 	uint32_t rx_pfc;
3404 	uint32_t value;
3405 	int error;
3406 
3407 	PRIV_LOCK(priv);
3408 
3409 	rx_pfc = priv->params.rx_priority_flow_control;
3410 
3411 	/* get current value */
3412 	value = (rx_pfc >> arg2) & 1;
3413 
3414 	error = sysctl_handle_32(oidp, &value, 0, req);
3415 
3416 	/* range check value */
3417 	if (value != 0)
3418 		priv->params.rx_priority_flow_control |= (1 << arg2);
3419 	else
3420 		priv->params.rx_priority_flow_control &= ~(1 << arg2);
3421 
3422 	/* check if update is required */
3423 	if (error == 0 && priv->gone == 0 &&
3424 	    rx_pfc != priv->params.rx_priority_flow_control) {
3425 		error = -mlx5e_set_port_pfc(priv);
3426 		/* restore previous value */
3427 		if (error != 0)
3428 			priv->params.rx_priority_flow_control= rx_pfc;
3429 	}
3430 	PRIV_UNLOCK(priv);
3431 
3432 	return (error);
3433 }
3434 
3435 static void
3436 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3437 {
3438 	unsigned int x;
3439 	char path[96];
3440 	int error;
3441 
3442 	/* enable pauseframes by default */
3443 	priv->params.tx_pauseframe_control = 1;
3444 	priv->params.rx_pauseframe_control = 1;
3445 
3446 	/* disable ports flow control, PFC, by default */
3447 	priv->params.tx_priority_flow_control = 0;
3448 	priv->params.rx_priority_flow_control = 0;
3449 
3450 #if (__FreeBSD_version < 1100000)
3451 	/* compute path for sysctl */
3452 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3453 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3454 
3455 	/* try to fetch tunable, if any */
3456 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3457 
3458 	/* compute path for sysctl */
3459 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3460 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3461 
3462 	/* try to fetch tunable, if any */
3463 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3464 
3465 	for (x = 0; x != 8; x++) {
3466 
3467 		/* compute path for sysctl */
3468 		snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3469 		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3470 
3471 		/* try to fetch tunable, if any */
3472 		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3473 			priv->params.tx_priority_flow_control |= 1 << x;
3474 
3475 		/* compute path for sysctl */
3476 		snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3477 		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3478 
3479 		/* try to fetch tunable, if any */
3480 		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3481 			priv->params.rx_priority_flow_control |= 1 << x;
3482 	}
3483 #endif
3484 
3485 	/* register pauseframe SYSCTLs */
3486 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3487 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3488 	    &priv->params.tx_pauseframe_control, 0,
3489 	    "Set to enable TX pause frames. Clear to disable.");
3490 
3491 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3492 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3493 	    &priv->params.rx_pauseframe_control, 0,
3494 	    "Set to enable RX pause frames. Clear to disable.");
3495 
3496 	/* register priority_flow control, PFC, SYSCTLs */
3497 	for (x = 0; x != 8; x++) {
3498 		snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3499 
3500 		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3501 		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3502 		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3503 		    "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3504 
3505 		snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3506 
3507 		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3508 		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3509 		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3510 		    "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3511 	}
3512 
3513 	PRIV_LOCK(priv);
3514 
3515 	/* range check */
3516 	priv->params.tx_pauseframe_control =
3517 	    priv->params.tx_pauseframe_control ? 1 : 0;
3518 	priv->params.rx_pauseframe_control =
3519 	    priv->params.rx_pauseframe_control ? 1 : 0;
3520 
3521 	/* update firmware */
3522 	error = mlx5e_set_port_pause_and_pfc(priv);
3523 	if (error == -EINVAL) {
3524 		if_printf(priv->ifp,
3525 		    "Global pauseframes must be disabled before enabling PFC.\n");
3526 		priv->params.rx_priority_flow_control = 0;
3527 		priv->params.tx_priority_flow_control = 0;
3528 
3529 		/* update firmware */
3530 		(void) mlx5e_set_port_pause_and_pfc(priv);
3531 	}
3532 	PRIV_UNLOCK(priv);
3533 }
3534 
3535 static void *
3536 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3537 {
3538 	struct ifnet *ifp;
3539 	struct mlx5e_priv *priv;
3540 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3541 	struct sysctl_oid_list *child;
3542 	int ncv = mdev->priv.eq_table.num_comp_vectors;
3543 	char unit[16];
3544 	int err;
3545 	int i;
3546 	u32 eth_proto_cap;
3547 
3548 	if (mlx5e_check_required_hca_cap(mdev)) {
3549 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3550 		return (NULL);
3551 	}
3552 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3553 	mlx5e_priv_mtx_init(priv);
3554 
3555 	ifp = priv->ifp = if_alloc(IFT_ETHER);
3556 	if (ifp == NULL) {
3557 		mlx5_core_err(mdev, "if_alloc() failed\n");
3558 		goto err_free_priv;
3559 	}
3560 	ifp->if_softc = priv;
3561 	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3562 	ifp->if_mtu = ETHERMTU;
3563 	ifp->if_init = mlx5e_open;
3564 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3565 	ifp->if_ioctl = mlx5e_ioctl;
3566 	ifp->if_transmit = mlx5e_xmit;
3567 	ifp->if_qflush = if_qflush;
3568 #if (__FreeBSD_version >= 1100000)
3569 	ifp->if_get_counter = mlx5e_get_counter;
3570 #endif
3571 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3572 	/*
3573          * Set driver features
3574          */
3575 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3576 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3577 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3578 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3579 	ifp->if_capabilities |= IFCAP_LRO;
3580 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3581 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3582 #ifdef RATELIMIT
3583 	ifp->if_capabilities |= IFCAP_TXRTLMT;
3584 	ifp->if_snd_tag_alloc = mlx5e_rl_snd_tag_alloc;
3585 	ifp->if_snd_tag_free = mlx5e_rl_snd_tag_free;
3586 	ifp->if_snd_tag_modify = mlx5e_rl_snd_tag_modify;
3587 	ifp->if_snd_tag_query = mlx5e_rl_snd_tag_query;
3588 #endif
3589 
3590 	/* set TSO limits so that we don't have to drop TX packets */
3591 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3592 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3593 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3594 
3595 	ifp->if_capenable = ifp->if_capabilities;
3596 	ifp->if_hwassist = 0;
3597 	if (ifp->if_capenable & IFCAP_TSO)
3598 		ifp->if_hwassist |= CSUM_TSO;
3599 	if (ifp->if_capenable & IFCAP_TXCSUM)
3600 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3601 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3602 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3603 
3604 	sysctl_ctx_init(&priv->sysctl_ctx_channel_debug);
3605 
3606 	/* ifnet sysctl tree */
3607 	sysctl_ctx_init(&priv->sysctl_ctx);
3608 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3609 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3610 	if (priv->sysctl_ifnet == NULL) {
3611 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3612 		goto err_free_sysctl;
3613 	}
3614 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3615 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3616 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3617 	if (priv->sysctl_ifnet == NULL) {
3618 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3619 		goto err_free_sysctl;
3620 	}
3621 
3622 	/* HW sysctl tree */
3623 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3624 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3625 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3626 	if (priv->sysctl_hw == NULL) {
3627 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3628 		goto err_free_sysctl;
3629 	}
3630 	mlx5e_build_ifp_priv(mdev, priv, ncv);
3631 
3632 	snprintf(unit, sizeof(unit), "mce%u_wq",
3633 	    device_get_unit(mdev->pdev->dev.bsddev));
3634 	priv->wq = alloc_workqueue(unit, 0, 1);
3635 	if (priv->wq == NULL) {
3636 		if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3637 		goto err_free_sysctl;
3638 	}
3639 
3640 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3641 	if (err) {
3642 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3643 		    __func__, err);
3644 		goto err_free_wq;
3645 	}
3646 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3647 	if (err) {
3648 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3649 		    __func__, err);
3650 		goto err_unmap_free_uar;
3651 	}
3652 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3653 	if (err) {
3654 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3655 		    __func__, err);
3656 		goto err_dealloc_pd;
3657 	}
3658 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3659 	if (err) {
3660 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3661 		    __func__, err);
3662 		goto err_dealloc_transport_domain;
3663 	}
3664 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3665 
3666 	/* check if we should generate a random MAC address */
3667 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3668 	    is_zero_ether_addr(dev_addr)) {
3669 		random_ether_addr(dev_addr);
3670 		if_printf(ifp, "Assigned random MAC address\n");
3671 	}
3672 #ifdef RATELIMIT
3673 	err = mlx5e_rl_init(priv);
3674 	if (err) {
3675 		if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
3676 		    __func__, err);
3677 		goto err_create_mkey;
3678 	}
3679 #endif
3680 
3681 	/* set default MTU */
3682 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3683 
3684 	/* Set desc */
3685 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3686 
3687 	/* Set default media status */
3688 	priv->media_status_last = IFM_AVALID;
3689 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3690 	    IFM_ETH_RXPAUSE | IFM_FDX;
3691 
3692 	/* setup default pauseframes configuration */
3693 	mlx5e_setup_pauseframes(priv);
3694 
3695 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3696 	if (err) {
3697 		eth_proto_cap = 0;
3698 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3699 		    __func__, err);
3700 	}
3701 
3702 	/* Setup supported medias */
3703 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3704 	    mlx5e_media_change, mlx5e_media_status);
3705 
3706 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3707 		if (mlx5e_mode_table[i].baudrate == 0)
3708 			continue;
3709 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3710 			ifmedia_add(&priv->media,
3711 			    mlx5e_mode_table[i].subtype |
3712 			    IFM_ETHER, 0, NULL);
3713 			ifmedia_add(&priv->media,
3714 			    mlx5e_mode_table[i].subtype |
3715 			    IFM_ETHER | IFM_FDX |
3716 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3717 		}
3718 	}
3719 
3720 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3721 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3722 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3723 
3724 	/* Set autoselect by default */
3725 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3726 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3727 	ether_ifattach(ifp, dev_addr);
3728 
3729 	/* Register for VLAN events */
3730 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3731 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3732 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3733 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3734 
3735 	/* Link is down by default */
3736 	if_link_state_change(ifp, LINK_STATE_DOWN);
3737 
3738 	mlx5e_enable_async_events(priv);
3739 
3740 	mlx5e_add_hw_stats(priv);
3741 
3742 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3743 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3744 	    priv->stats.vport.arg);
3745 
3746 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3747 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3748 	    priv->stats.pport.arg);
3749 
3750 	mlx5e_create_ethtool(priv);
3751 
3752 	mtx_lock(&priv->async_events_mtx);
3753 	mlx5e_update_stats(priv);
3754 	mtx_unlock(&priv->async_events_mtx);
3755 
3756 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3757 	    OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3758 	    &priv->clbr_done, 0,
3759 	    "RX timestamps calibration state");
3760 	callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3761 	mlx5e_reset_calibration_callout(priv);
3762 
3763 	return (priv);
3764 
3765 #ifdef RATELIMIT
3766 err_create_mkey:
3767 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3768 #endif
3769 err_dealloc_transport_domain:
3770 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3771 
3772 err_dealloc_pd:
3773 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3774 
3775 err_unmap_free_uar:
3776 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3777 
3778 err_free_wq:
3779 	destroy_workqueue(priv->wq);
3780 
3781 err_free_sysctl:
3782 	sysctl_ctx_free(&priv->sysctl_ctx);
3783 	sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
3784 
3785 	if_free(ifp);
3786 
3787 err_free_priv:
3788 	mlx5e_priv_mtx_destroy(priv);
3789 	free(priv, M_MLX5EN);
3790 	return (NULL);
3791 }
3792 
3793 static void
3794 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3795 {
3796 	struct mlx5e_priv *priv = vpriv;
3797 	struct ifnet *ifp = priv->ifp;
3798 
3799 	/* don't allow more IOCTLs */
3800 	priv->gone = 1;
3801 
3802 	/*
3803 	 * Clear the device description to avoid use after free,
3804 	 * because the bsddev is not destroyed when this module is
3805 	 * unloaded:
3806 	 */
3807 	device_set_desc(mdev->pdev->dev.bsddev, NULL);
3808 
3809 	/* XXX wait a bit to allow IOCTL handlers to complete */
3810 	pause("W", hz);
3811 
3812 #ifdef RATELIMIT
3813 	/*
3814 	 * The kernel can have reference(s) via the m_snd_tag's into
3815 	 * the ratelimit channels, and these must go away before
3816 	 * detaching:
3817 	 */
3818 	while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
3819 		if_printf(priv->ifp, "Waiting for all ratelimit connections "
3820 		    "to terminate\n");
3821 		pause("W", hz);
3822 	}
3823 #endif
3824 	/* stop watchdog timer */
3825 	callout_drain(&priv->watchdog);
3826 
3827 	callout_drain(&priv->tstmp_clbr);
3828 
3829 	if (priv->vlan_attach != NULL)
3830 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3831 	if (priv->vlan_detach != NULL)
3832 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3833 
3834 	/* make sure device gets closed */
3835 	PRIV_LOCK(priv);
3836 	mlx5e_close_locked(ifp);
3837 	PRIV_UNLOCK(priv);
3838 
3839 	/* unregister device */
3840 	ifmedia_removeall(&priv->media);
3841 	ether_ifdetach(ifp);
3842 	if_free(ifp);
3843 
3844 #ifdef RATELIMIT
3845 	mlx5e_rl_cleanup(priv);
3846 #endif
3847 	/* destroy all remaining sysctl nodes */
3848 	if (priv->sysctl_debug) {
3849 		sysctl_ctx_free(&priv->sysctl_ctx_channel_debug);
3850 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3851 	}
3852 	sysctl_ctx_free(&priv->stats.vport.ctx);
3853 	sysctl_ctx_free(&priv->stats.pport.ctx);
3854 	sysctl_ctx_free(&priv->sysctl_ctx);
3855 
3856 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3857 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3858 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3859 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3860 	mlx5e_disable_async_events(priv);
3861 	destroy_workqueue(priv->wq);
3862 	mlx5e_priv_mtx_destroy(priv);
3863 	free(priv, M_MLX5EN);
3864 }
3865 
3866 static void *
3867 mlx5e_get_ifp(void *vpriv)
3868 {
3869 	struct mlx5e_priv *priv = vpriv;
3870 
3871 	return (priv->ifp);
3872 }
3873 
3874 static struct mlx5_interface mlx5e_interface = {
3875 	.add = mlx5e_create_ifp,
3876 	.remove = mlx5e_destroy_ifp,
3877 	.event = mlx5e_async_event,
3878 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3879 	.get_dev = mlx5e_get_ifp,
3880 };
3881 
3882 void
3883 mlx5e_init(void)
3884 {
3885 	mlx5_register_interface(&mlx5e_interface);
3886 }
3887 
3888 void
3889 mlx5e_cleanup(void)
3890 {
3891 	mlx5_unregister_interface(&mlx5e_interface);
3892 }
3893 
3894 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3895 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3896 
3897 #if (__FreeBSD_version >= 1100000)
3898 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3899 #endif
3900 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3901 MODULE_VERSION(mlx5en, 1);
3902