xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision d06955f9bdb1416d9196043ed781f9b36dae9adc)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_channel_param {
38 	struct mlx5e_rq_param rq;
39 	struct mlx5e_sq_param sq;
40 	struct mlx5e_cq_param rx_cq;
41 	struct mlx5e_cq_param tx_cq;
42 };
43 
44 static const struct {
45 	u32	subtype;
46 	u64	baudrate;
47 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
48 
49 	[MLX5E_1000BASE_CX_SGMII] = {
50 		.subtype = IFM_1000_CX_SGMII,
51 		.baudrate = IF_Mbps(1000ULL),
52 	},
53 	[MLX5E_1000BASE_KX] = {
54 		.subtype = IFM_1000_KX,
55 		.baudrate = IF_Mbps(1000ULL),
56 	},
57 	[MLX5E_10GBASE_CX4] = {
58 		.subtype = IFM_10G_CX4,
59 		.baudrate = IF_Gbps(10ULL),
60 	},
61 	[MLX5E_10GBASE_KX4] = {
62 		.subtype = IFM_10G_KX4,
63 		.baudrate = IF_Gbps(10ULL),
64 	},
65 	[MLX5E_10GBASE_KR] = {
66 		.subtype = IFM_10G_KR,
67 		.baudrate = IF_Gbps(10ULL),
68 	},
69 	[MLX5E_20GBASE_KR2] = {
70 		.subtype = IFM_20G_KR2,
71 		.baudrate = IF_Gbps(20ULL),
72 	},
73 	[MLX5E_40GBASE_CR4] = {
74 		.subtype = IFM_40G_CR4,
75 		.baudrate = IF_Gbps(40ULL),
76 	},
77 	[MLX5E_40GBASE_KR4] = {
78 		.subtype = IFM_40G_KR4,
79 		.baudrate = IF_Gbps(40ULL),
80 	},
81 	[MLX5E_56GBASE_R4] = {
82 		.subtype = IFM_56G_R4,
83 		.baudrate = IF_Gbps(56ULL),
84 	},
85 	[MLX5E_10GBASE_CR] = {
86 		.subtype = IFM_10G_CR1,
87 		.baudrate = IF_Gbps(10ULL),
88 	},
89 	[MLX5E_10GBASE_SR] = {
90 		.subtype = IFM_10G_SR,
91 		.baudrate = IF_Gbps(10ULL),
92 	},
93 	[MLX5E_10GBASE_LR] = {
94 		.subtype = IFM_10G_LR,
95 		.baudrate = IF_Gbps(10ULL),
96 	},
97 	[MLX5E_40GBASE_SR4] = {
98 		.subtype = IFM_40G_SR4,
99 		.baudrate = IF_Gbps(40ULL),
100 	},
101 	[MLX5E_40GBASE_LR4] = {
102 		.subtype = IFM_40G_LR4,
103 		.baudrate = IF_Gbps(40ULL),
104 	},
105 	[MLX5E_100GBASE_CR4] = {
106 		.subtype = IFM_100G_CR4,
107 		.baudrate = IF_Gbps(100ULL),
108 	},
109 	[MLX5E_100GBASE_SR4] = {
110 		.subtype = IFM_100G_SR4,
111 		.baudrate = IF_Gbps(100ULL),
112 	},
113 	[MLX5E_100GBASE_KR4] = {
114 		.subtype = IFM_100G_KR4,
115 		.baudrate = IF_Gbps(100ULL),
116 	},
117 	[MLX5E_100GBASE_LR4] = {
118 		.subtype = IFM_100G_LR4,
119 		.baudrate = IF_Gbps(100ULL),
120 	},
121 	[MLX5E_100BASE_TX] = {
122 		.subtype = IFM_100_TX,
123 		.baudrate = IF_Mbps(100ULL),
124 	},
125 	[MLX5E_100BASE_T] = {
126 		.subtype = IFM_100_T,
127 		.baudrate = IF_Mbps(100ULL),
128 	},
129 	[MLX5E_10GBASE_T] = {
130 		.subtype = IFM_10G_T,
131 		.baudrate = IF_Gbps(10ULL),
132 	},
133 	[MLX5E_25GBASE_CR] = {
134 		.subtype = IFM_25G_CR,
135 		.baudrate = IF_Gbps(25ULL),
136 	},
137 	[MLX5E_25GBASE_KR] = {
138 		.subtype = IFM_25G_KR,
139 		.baudrate = IF_Gbps(25ULL),
140 	},
141 	[MLX5E_25GBASE_SR] = {
142 		.subtype = IFM_25G_SR,
143 		.baudrate = IF_Gbps(25ULL),
144 	},
145 	[MLX5E_50GBASE_CR2] = {
146 		.subtype = IFM_50G_CR2,
147 		.baudrate = IF_Gbps(50ULL),
148 	},
149 	[MLX5E_50GBASE_KR2] = {
150 		.subtype = IFM_50G_KR2,
151 		.baudrate = IF_Gbps(50ULL),
152 	},
153 };
154 
155 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
156 
157 static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters");
158 
159 static void
160 mlx5e_update_carrier(struct mlx5e_priv *priv)
161 {
162 	struct mlx5_core_dev *mdev = priv->mdev;
163 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
164 	u32 eth_proto_oper;
165 	int error;
166 	u8 port_state;
167 	u8 i;
168 
169 	port_state = mlx5_query_vport_state(mdev,
170 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
171 
172 	if (port_state == VPORT_STATE_UP) {
173 		priv->media_status_last |= IFM_ACTIVE;
174 	} else {
175 		priv->media_status_last &= ~IFM_ACTIVE;
176 		priv->media_active_last = IFM_ETHER;
177 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
178 		return;
179 	}
180 
181 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
182 	if (error) {
183 		priv->media_active_last = IFM_ETHER;
184 		priv->ifp->if_baudrate = 1;
185 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
186 		    __func__, error);
187 		return;
188 	}
189 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
190 
191 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
192 		if (mlx5e_mode_table[i].baudrate == 0)
193 			continue;
194 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
195 			priv->ifp->if_baudrate =
196 			    mlx5e_mode_table[i].baudrate;
197 			priv->media_active_last =
198 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
199 		}
200 	}
201 	if_link_state_change(priv->ifp, LINK_STATE_UP);
202 }
203 
204 static void
205 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
206 {
207 	struct mlx5e_priv *priv = dev->if_softc;
208 
209 	ifmr->ifm_status = priv->media_status_last;
210 	ifmr->ifm_active = priv->media_active_last |
211 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
212 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
213 
214 }
215 
216 static u32
217 mlx5e_find_link_mode(u32 subtype)
218 {
219 	u32 i;
220 	u32 link_mode = 0;
221 
222 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
223 		if (mlx5e_mode_table[i].baudrate == 0)
224 			continue;
225 		if (mlx5e_mode_table[i].subtype == subtype)
226 			link_mode |= MLX5E_PROT_MASK(i);
227 	}
228 
229 	return (link_mode);
230 }
231 
232 static int
233 mlx5e_media_change(struct ifnet *dev)
234 {
235 	struct mlx5e_priv *priv = dev->if_softc;
236 	struct mlx5_core_dev *mdev = priv->mdev;
237 	u32 eth_proto_cap;
238 	u32 link_mode;
239 	int was_opened;
240 	int locked;
241 	int error;
242 
243 	locked = PRIV_LOCKED(priv);
244 	if (!locked)
245 		PRIV_LOCK(priv);
246 
247 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
248 		error = EINVAL;
249 		goto done;
250 	}
251 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
252 
253 	/* query supported capabilities */
254 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
255 	if (error != 0) {
256 		if_printf(dev, "Query port media capability failed\n");
257 		goto done;
258 	}
259 	/* check for autoselect */
260 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
261 		link_mode = eth_proto_cap;
262 		if (link_mode == 0) {
263 			if_printf(dev, "Port media capability is zero\n");
264 			error = EINVAL;
265 			goto done;
266 		}
267 	} else {
268 		link_mode = link_mode & eth_proto_cap;
269 		if (link_mode == 0) {
270 			if_printf(dev, "Not supported link mode requested\n");
271 			error = EINVAL;
272 			goto done;
273 		}
274 	}
275 	/* update pauseframe control bits */
276 	priv->params.rx_pauseframe_control =
277 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
278 	priv->params.tx_pauseframe_control =
279 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
280 
281 	/* check if device is opened */
282 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
283 
284 	/* reconfigure the hardware */
285 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
286 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
287 	mlx5_set_port_pause(mdev, 1,
288 	    priv->params.rx_pauseframe_control,
289 	    priv->params.tx_pauseframe_control);
290 	if (was_opened)
291 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
292 
293 done:
294 	if (!locked)
295 		PRIV_UNLOCK(priv);
296 	return (error);
297 }
298 
299 static void
300 mlx5e_update_carrier_work(struct work_struct *work)
301 {
302 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
303 	    update_carrier_work);
304 
305 	PRIV_LOCK(priv);
306 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
307 		mlx5e_update_carrier(priv);
308 	PRIV_UNLOCK(priv);
309 }
310 
311 /*
312  * This function reads the physical port counters from the firmware
313  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
314  * macros. The output is converted from big-endian 64-bit values into
315  * host endian ones and stored in the "priv->stats.pport" structure.
316  */
317 static void
318 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
319 {
320 	struct mlx5_core_dev *mdev = priv->mdev;
321 	struct mlx5e_pport_stats *s = &priv->stats.pport;
322 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
323 	u32 *in;
324 	u32 *out;
325 	const u64 *ptr;
326 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
327 	unsigned x;
328 	unsigned y;
329 
330 	/* allocate firmware request structures */
331 	in = mlx5_vzalloc(sz);
332 	out = mlx5_vzalloc(sz);
333 	if (in == NULL || out == NULL)
334 		goto free_out;
335 
336 	/*
337 	 * Get pointer to the 64-bit counter set which is located at a
338 	 * fixed offset in the output firmware request structure:
339 	 */
340 	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
341 
342 	MLX5_SET(ppcnt_reg, in, local_port, 1);
343 
344 	/* read IEEE802_3 counter group using predefined counter layout */
345 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
346 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
347 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
348 		s->arg[y] = be64toh(ptr[x]);
349 
350 	/* read RFC2819 counter group using predefined counter layout */
351 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
352 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
353 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
354 		s->arg[y] = be64toh(ptr[x]);
355 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
356 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
357 		s_debug->arg[y] = be64toh(ptr[x]);
358 
359 	/* read RFC2863 counter group using predefined counter layout */
360 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
361 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
362 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
363 		s_debug->arg[y] = be64toh(ptr[x]);
364 
365 	/* read physical layer stats counter group using predefined counter layout */
366 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
367 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
368 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
369 		s_debug->arg[y] = be64toh(ptr[x]);
370 free_out:
371 	/* free firmware request structures */
372 	kvfree(in);
373 	kvfree(out);
374 }
375 
376 /*
377  * This function is called regularly to collect all statistics
378  * counters from the firmware. The values can be viewed through the
379  * sysctl interface. Execution is serialized using the priv's global
380  * configuration lock.
381  */
382 static void
383 mlx5e_update_stats_work(struct work_struct *work)
384 {
385 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
386 	    update_stats_work);
387 	struct mlx5_core_dev *mdev = priv->mdev;
388 	struct mlx5e_vport_stats *s = &priv->stats.vport;
389 	struct mlx5e_rq_stats *rq_stats;
390 	struct mlx5e_sq_stats *sq_stats;
391 	struct buf_ring *sq_br;
392 #if (__FreeBSD_version < 1100000)
393 	struct ifnet *ifp = priv->ifp;
394 #endif
395 
396 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
397 	u32 *out;
398 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
399 	u64 tso_packets = 0;
400 	u64 tso_bytes = 0;
401 	u64 tx_queue_dropped = 0;
402 	u64 tx_defragged = 0;
403 	u64 tx_offload_none = 0;
404 	u64 lro_packets = 0;
405 	u64 lro_bytes = 0;
406 	u64 sw_lro_queued = 0;
407 	u64 sw_lro_flushed = 0;
408 	u64 rx_csum_none = 0;
409 	u64 rx_wqe_err = 0;
410 	u32 rx_out_of_buffer = 0;
411 	int i;
412 	int j;
413 
414 	PRIV_LOCK(priv);
415 	out = mlx5_vzalloc(outlen);
416 	if (out == NULL)
417 		goto free_out;
418 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
419 		goto free_out;
420 
421 	/* Collect firts the SW counters and then HW for consistency */
422 	for (i = 0; i < priv->params.num_channels; i++) {
423 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
424 
425 		rq_stats = &priv->channel[i]->rq.stats;
426 
427 		/* collect stats from LRO */
428 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
429 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
430 		sw_lro_queued += rq_stats->sw_lro_queued;
431 		sw_lro_flushed += rq_stats->sw_lro_flushed;
432 		lro_packets += rq_stats->lro_packets;
433 		lro_bytes += rq_stats->lro_bytes;
434 		rx_csum_none += rq_stats->csum_none;
435 		rx_wqe_err += rq_stats->wqe_err;
436 
437 		for (j = 0; j < priv->num_tc; j++) {
438 			sq_stats = &priv->channel[i]->sq[j].stats;
439 			sq_br = priv->channel[i]->sq[j].br;
440 
441 			tso_packets += sq_stats->tso_packets;
442 			tso_bytes += sq_stats->tso_bytes;
443 			tx_queue_dropped += sq_stats->dropped;
444 			if (sq_br != NULL)
445 				tx_queue_dropped += sq_br->br_drops;
446 			tx_defragged += sq_stats->defragged;
447 			tx_offload_none += sq_stats->csum_offload_none;
448 		}
449 	}
450 
451 	/* update counters */
452 	s->tso_packets = tso_packets;
453 	s->tso_bytes = tso_bytes;
454 	s->tx_queue_dropped = tx_queue_dropped;
455 	s->tx_defragged = tx_defragged;
456 	s->lro_packets = lro_packets;
457 	s->lro_bytes = lro_bytes;
458 	s->sw_lro_queued = sw_lro_queued;
459 	s->sw_lro_flushed = sw_lro_flushed;
460 	s->rx_csum_none = rx_csum_none;
461 	s->rx_wqe_err = rx_wqe_err;
462 
463 	/* HW counters */
464 	memset(in, 0, sizeof(in));
465 
466 	MLX5_SET(query_vport_counter_in, in, opcode,
467 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
468 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
469 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
470 
471 	memset(out, 0, outlen);
472 
473 	/* get number of out-of-buffer drops first */
474 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
475 	    &rx_out_of_buffer))
476 		goto free_out;
477 
478 	/* accumulate difference into a 64-bit counter */
479 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
480 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
481 
482 	/* get port statistics */
483 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
484 		goto free_out;
485 
486 #define	MLX5_GET_CTR(out, x) \
487 	MLX5_GET64(query_vport_counter_out, out, x)
488 
489 	s->rx_error_packets =
490 	    MLX5_GET_CTR(out, received_errors.packets);
491 	s->rx_error_bytes =
492 	    MLX5_GET_CTR(out, received_errors.octets);
493 	s->tx_error_packets =
494 	    MLX5_GET_CTR(out, transmit_errors.packets);
495 	s->tx_error_bytes =
496 	    MLX5_GET_CTR(out, transmit_errors.octets);
497 
498 	s->rx_unicast_packets =
499 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
500 	s->rx_unicast_bytes =
501 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
502 	s->tx_unicast_packets =
503 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
504 	s->tx_unicast_bytes =
505 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
506 
507 	s->rx_multicast_packets =
508 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
509 	s->rx_multicast_bytes =
510 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
511 	s->tx_multicast_packets =
512 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
513 	s->tx_multicast_bytes =
514 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
515 
516 	s->rx_broadcast_packets =
517 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
518 	s->rx_broadcast_bytes =
519 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
520 	s->tx_broadcast_packets =
521 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
522 	s->tx_broadcast_bytes =
523 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
524 
525 	s->rx_packets =
526 	    s->rx_unicast_packets +
527 	    s->rx_multicast_packets +
528 	    s->rx_broadcast_packets -
529 	    s->rx_out_of_buffer;
530 	s->rx_bytes =
531 	    s->rx_unicast_bytes +
532 	    s->rx_multicast_bytes +
533 	    s->rx_broadcast_bytes;
534 	s->tx_packets =
535 	    s->tx_unicast_packets +
536 	    s->tx_multicast_packets +
537 	    s->tx_broadcast_packets;
538 	s->tx_bytes =
539 	    s->tx_unicast_bytes +
540 	    s->tx_multicast_bytes +
541 	    s->tx_broadcast_bytes;
542 
543 	/* Update calculated offload counters */
544 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
545 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
546 
547 	/* Get physical port counters */
548 	mlx5e_update_pport_counters(priv);
549 
550 #if (__FreeBSD_version < 1100000)
551 	/* no get_counters interface in fbsd 10 */
552 	ifp->if_ipackets = s->rx_packets;
553 	ifp->if_ierrors = s->rx_error_packets +
554 	    priv->stats.pport.alignment_err +
555 	    priv->stats.pport.check_seq_err +
556 	    priv->stats.pport.crc_align_errors +
557 	    priv->stats.pport.in_range_len_errors +
558 	    priv->stats.pport.jabbers +
559 	    priv->stats.pport.out_of_range_len +
560 	    priv->stats.pport.oversize_pkts +
561 	    priv->stats.pport.symbol_err +
562 	    priv->stats.pport.too_long_errors +
563 	    priv->stats.pport.undersize_pkts +
564 	    priv->stats.pport.unsupported_op_rx;
565 	ifp->if_iqdrops = s->rx_out_of_buffer +
566 	    priv->stats.pport.drop_events;
567 	ifp->if_opackets = s->tx_packets;
568 	ifp->if_oerrors = s->tx_error_packets;
569 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
570 	ifp->if_ibytes = s->rx_bytes;
571 	ifp->if_obytes = s->tx_bytes;
572 	ifp->if_collisions =
573 	    priv->stats.pport.collisions;
574 #endif
575 
576 free_out:
577 	kvfree(out);
578 
579 	/* Update diagnostics, if any */
580 	if (priv->params_ethtool.diag_pci_enable ||
581 	    priv->params_ethtool.diag_general_enable) {
582 		int error = mlx5_core_get_diagnostics_full(mdev,
583 		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
584 		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
585 		if (error != 0)
586 			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
587 	}
588 	PRIV_UNLOCK(priv);
589 }
590 
591 static void
592 mlx5e_update_stats(void *arg)
593 {
594 	struct mlx5e_priv *priv = arg;
595 
596 	schedule_work(&priv->update_stats_work);
597 
598 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
599 }
600 
601 static void
602 mlx5e_async_event_sub(struct mlx5e_priv *priv,
603     enum mlx5_dev_event event)
604 {
605 	switch (event) {
606 	case MLX5_DEV_EVENT_PORT_UP:
607 	case MLX5_DEV_EVENT_PORT_DOWN:
608 		schedule_work(&priv->update_carrier_work);
609 		break;
610 
611 	default:
612 		break;
613 	}
614 }
615 
616 static void
617 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
618     enum mlx5_dev_event event, unsigned long param)
619 {
620 	struct mlx5e_priv *priv = vpriv;
621 
622 	mtx_lock(&priv->async_events_mtx);
623 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
624 		mlx5e_async_event_sub(priv, event);
625 	mtx_unlock(&priv->async_events_mtx);
626 }
627 
628 static void
629 mlx5e_enable_async_events(struct mlx5e_priv *priv)
630 {
631 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
632 }
633 
634 static void
635 mlx5e_disable_async_events(struct mlx5e_priv *priv)
636 {
637 	mtx_lock(&priv->async_events_mtx);
638 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
639 	mtx_unlock(&priv->async_events_mtx);
640 }
641 
642 static void mlx5e_calibration_callout(void *arg);
643 static int mlx5e_calibration_duration = 20;
644 static int mlx5e_fast_calibration = 1;
645 static int mlx5e_normal_calibration = 30;
646 
647 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
648     "MLX5 timestamp calibration parameteres");
649 
650 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
651     &mlx5e_calibration_duration, 0,
652     "Duration of initial calibration");
653 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
654     &mlx5e_fast_calibration, 0,
655     "Recalibration interval during initial calibration");
656 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
657     &mlx5e_normal_calibration, 0,
658     "Recalibration interval during normal operations");
659 
660 /*
661  * Ignites the calibration process.
662  */
663 static void
664 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
665 {
666 
667 	if (priv->clbr_done == 0)
668 		mlx5e_calibration_callout(priv);
669 	else
670 		callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
671 		    mlx5e_calibration_duration ? mlx5e_fast_calibration :
672 		    mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
673 		    priv);
674 }
675 
676 static uint64_t
677 mlx5e_timespec2usec(const struct timespec *ts)
678 {
679 
680 	return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
681 }
682 
683 static uint64_t
684 mlx5e_hw_clock(struct mlx5e_priv *priv)
685 {
686 	struct mlx5_init_seg *iseg;
687 	uint32_t hw_h, hw_h1, hw_l;
688 
689 	iseg = priv->mdev->iseg;
690 	do {
691 		hw_h = ioread32be(&iseg->internal_timer_h);
692 		hw_l = ioread32be(&iseg->internal_timer_l);
693 		hw_h1 = ioread32be(&iseg->internal_timer_h);
694 	} while (hw_h1 != hw_h);
695 	return (((uint64_t)hw_h << 32) | hw_l);
696 }
697 
698 /*
699  * The calibration callout, it runs either in the context of the
700  * thread which enables calibration, or in callout.  It takes the
701  * snapshot of system and adapter clocks, then advances the pointers to
702  * the calibration point to allow rx path to read the consistent data
703  * lockless.
704  */
705 static void
706 mlx5e_calibration_callout(void *arg)
707 {
708 	struct mlx5e_priv *priv;
709 	struct mlx5e_clbr_point *next, *curr;
710 	struct timespec ts;
711 	int clbr_curr_next;
712 
713 	priv = arg;
714 	curr = &priv->clbr_points[priv->clbr_curr];
715 	clbr_curr_next = priv->clbr_curr + 1;
716 	if (clbr_curr_next >= nitems(priv->clbr_points))
717 		clbr_curr_next = 0;
718 	next = &priv->clbr_points[clbr_curr_next];
719 
720 	next->base_prev = curr->base_curr;
721 	next->clbr_hw_prev = curr->clbr_hw_curr;
722 
723 	next->clbr_hw_curr = mlx5e_hw_clock(priv);
724 	if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
725 	    0) {
726 		if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
727 		    "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
728 		priv->clbr_done = 0;
729 		return;
730 	}
731 
732 	nanouptime(&ts);
733 	next->base_curr = mlx5e_timespec2usec(&ts);
734 
735 	curr->clbr_gen = 0;
736 	atomic_thread_fence_rel();
737 	priv->clbr_curr = clbr_curr_next;
738 	atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
739 
740 	if (priv->clbr_done < mlx5e_calibration_duration)
741 		priv->clbr_done++;
742 	mlx5e_reset_calibration_callout(priv);
743 }
744 
745 static const char *mlx5e_rq_stats_desc[] = {
746 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
747 };
748 
749 static int
750 mlx5e_create_rq(struct mlx5e_channel *c,
751     struct mlx5e_rq_param *param,
752     struct mlx5e_rq *rq)
753 {
754 	struct mlx5e_priv *priv = c->priv;
755 	struct mlx5_core_dev *mdev = priv->mdev;
756 	char buffer[16];
757 	void *rqc = param->rqc;
758 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
759 	int wq_sz;
760 	int err;
761 	int i;
762 
763 	/* Create DMA descriptor TAG */
764 	if ((err = -bus_dma_tag_create(
765 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
766 	    1,				/* any alignment */
767 	    0,				/* no boundary */
768 	    BUS_SPACE_MAXADDR,		/* lowaddr */
769 	    BUS_SPACE_MAXADDR,		/* highaddr */
770 	    NULL, NULL,			/* filter, filterarg */
771 	    MJUM16BYTES,		/* maxsize */
772 	    1,				/* nsegments */
773 	    MJUM16BYTES,		/* maxsegsize */
774 	    0,				/* flags */
775 	    NULL, NULL,			/* lockfunc, lockfuncarg */
776 	    &rq->dma_tag)))
777 		goto done;
778 
779 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
780 	    &rq->wq_ctrl);
781 	if (err)
782 		goto err_free_dma_tag;
783 
784 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
785 
786 	if (priv->params.hw_lro_en) {
787 		rq->wqe_sz = priv->params.lro_wqe_sz;
788 	} else {
789 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
790 	}
791 	if (rq->wqe_sz > MJUM16BYTES) {
792 		err = -ENOMEM;
793 		goto err_rq_wq_destroy;
794 	} else if (rq->wqe_sz > MJUM9BYTES) {
795 		rq->wqe_sz = MJUM16BYTES;
796 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
797 		rq->wqe_sz = MJUM9BYTES;
798 	} else if (rq->wqe_sz > MCLBYTES) {
799 		rq->wqe_sz = MJUMPAGESIZE;
800 	} else {
801 		rq->wqe_sz = MCLBYTES;
802 	}
803 
804 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
805 
806 	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
807 	if (err)
808 		goto err_rq_wq_destroy;
809 
810 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
811 	for (i = 0; i != wq_sz; i++) {
812 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
813 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
814 
815 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
816 		if (err != 0) {
817 			while (i--)
818 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
819 			goto err_rq_mbuf_free;
820 		}
821 		wqe->data.lkey = c->mkey_be;
822 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
823 	}
824 
825 	rq->ifp = c->ifp;
826 	rq->channel = c;
827 	rq->ix = c->ix;
828 
829 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
830 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
831 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
832 	    rq->stats.arg);
833 	return (0);
834 
835 err_rq_mbuf_free:
836 	free(rq->mbuf, M_MLX5EN);
837 	tcp_lro_free(&rq->lro);
838 err_rq_wq_destroy:
839 	mlx5_wq_destroy(&rq->wq_ctrl);
840 err_free_dma_tag:
841 	bus_dma_tag_destroy(rq->dma_tag);
842 done:
843 	return (err);
844 }
845 
846 static void
847 mlx5e_destroy_rq(struct mlx5e_rq *rq)
848 {
849 	int wq_sz;
850 	int i;
851 
852 	/* destroy all sysctl nodes */
853 	sysctl_ctx_free(&rq->stats.ctx);
854 
855 	/* free leftover LRO packets, if any */
856 	tcp_lro_free(&rq->lro);
857 
858 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
859 	for (i = 0; i != wq_sz; i++) {
860 		if (rq->mbuf[i].mbuf != NULL) {
861 			bus_dmamap_unload(rq->dma_tag,
862 			    rq->mbuf[i].dma_map);
863 			m_freem(rq->mbuf[i].mbuf);
864 		}
865 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
866 	}
867 	free(rq->mbuf, M_MLX5EN);
868 	mlx5_wq_destroy(&rq->wq_ctrl);
869 }
870 
871 static int
872 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
873 {
874 	struct mlx5e_channel *c = rq->channel;
875 	struct mlx5e_priv *priv = c->priv;
876 	struct mlx5_core_dev *mdev = priv->mdev;
877 
878 	void *in;
879 	void *rqc;
880 	void *wq;
881 	int inlen;
882 	int err;
883 
884 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
885 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
886 	in = mlx5_vzalloc(inlen);
887 	if (in == NULL)
888 		return (-ENOMEM);
889 
890 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
891 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
892 
893 	memcpy(rqc, param->rqc, sizeof(param->rqc));
894 
895 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
896 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
897 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
898 	if (priv->counter_set_id >= 0)
899 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
900 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
901 	    PAGE_SHIFT);
902 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
903 
904 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
905 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
906 
907 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
908 
909 	kvfree(in);
910 
911 	return (err);
912 }
913 
914 static int
915 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
916 {
917 	struct mlx5e_channel *c = rq->channel;
918 	struct mlx5e_priv *priv = c->priv;
919 	struct mlx5_core_dev *mdev = priv->mdev;
920 
921 	void *in;
922 	void *rqc;
923 	int inlen;
924 	int err;
925 
926 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
927 	in = mlx5_vzalloc(inlen);
928 	if (in == NULL)
929 		return (-ENOMEM);
930 
931 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
932 
933 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
934 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
935 	MLX5_SET(rqc, rqc, state, next_state);
936 
937 	err = mlx5_core_modify_rq(mdev, in, inlen);
938 
939 	kvfree(in);
940 
941 	return (err);
942 }
943 
944 static void
945 mlx5e_disable_rq(struct mlx5e_rq *rq)
946 {
947 	struct mlx5e_channel *c = rq->channel;
948 	struct mlx5e_priv *priv = c->priv;
949 	struct mlx5_core_dev *mdev = priv->mdev;
950 
951 	mlx5_core_destroy_rq(mdev, rq->rqn);
952 }
953 
954 static int
955 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
956 {
957 	struct mlx5e_channel *c = rq->channel;
958 	struct mlx5e_priv *priv = c->priv;
959 	struct mlx5_wq_ll *wq = &rq->wq;
960 	int i;
961 
962 	for (i = 0; i < 1000; i++) {
963 		if (wq->cur_sz >= priv->params.min_rx_wqes)
964 			return (0);
965 
966 		msleep(4);
967 	}
968 	return (-ETIMEDOUT);
969 }
970 
971 static int
972 mlx5e_open_rq(struct mlx5e_channel *c,
973     struct mlx5e_rq_param *param,
974     struct mlx5e_rq *rq)
975 {
976 	int err;
977 
978 	err = mlx5e_create_rq(c, param, rq);
979 	if (err)
980 		return (err);
981 
982 	err = mlx5e_enable_rq(rq, param);
983 	if (err)
984 		goto err_destroy_rq;
985 
986 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
987 	if (err)
988 		goto err_disable_rq;
989 
990 	c->rq.enabled = 1;
991 
992 	return (0);
993 
994 err_disable_rq:
995 	mlx5e_disable_rq(rq);
996 err_destroy_rq:
997 	mlx5e_destroy_rq(rq);
998 
999 	return (err);
1000 }
1001 
1002 static void
1003 mlx5e_close_rq(struct mlx5e_rq *rq)
1004 {
1005 	mtx_lock(&rq->mtx);
1006 	rq->enabled = 0;
1007 	callout_stop(&rq->watchdog);
1008 	mtx_unlock(&rq->mtx);
1009 
1010 	callout_drain(&rq->watchdog);
1011 
1012 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1013 }
1014 
1015 static void
1016 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1017 {
1018 	/* wait till RQ is empty */
1019 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
1020 		msleep(4);
1021 		rq->cq.mcq.comp(&rq->cq.mcq);
1022 	}
1023 
1024 	mlx5e_disable_rq(rq);
1025 	mlx5e_destroy_rq(rq);
1026 }
1027 
1028 void
1029 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1030 {
1031 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1032 	int x;
1033 
1034 	for (x = 0; x != wq_sz; x++)
1035 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1036 	free(sq->mbuf, M_MLX5EN);
1037 }
1038 
1039 int
1040 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1041 {
1042 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1043 	int err;
1044 	int x;
1045 
1046 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1047 
1048 	/* Create DMA descriptor MAPs */
1049 	for (x = 0; x != wq_sz; x++) {
1050 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1051 		if (err != 0) {
1052 			while (x--)
1053 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1054 			free(sq->mbuf, M_MLX5EN);
1055 			return (err);
1056 		}
1057 	}
1058 	return (0);
1059 }
1060 
1061 static const char *mlx5e_sq_stats_desc[] = {
1062 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1063 };
1064 
1065 static int
1066 mlx5e_create_sq(struct mlx5e_channel *c,
1067     int tc,
1068     struct mlx5e_sq_param *param,
1069     struct mlx5e_sq *sq)
1070 {
1071 	struct mlx5e_priv *priv = c->priv;
1072 	struct mlx5_core_dev *mdev = priv->mdev;
1073 	char buffer[16];
1074 
1075 	void *sqc = param->sqc;
1076 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1077 #ifdef RSS
1078 	cpuset_t cpu_mask;
1079 	int cpu_id;
1080 #endif
1081 	int err;
1082 
1083 	/* Create DMA descriptor TAG */
1084 	if ((err = -bus_dma_tag_create(
1085 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1086 	    1,				/* any alignment */
1087 	    0,				/* no boundary */
1088 	    BUS_SPACE_MAXADDR,		/* lowaddr */
1089 	    BUS_SPACE_MAXADDR,		/* highaddr */
1090 	    NULL, NULL,			/* filter, filterarg */
1091 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1092 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1093 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1094 	    0,				/* flags */
1095 	    NULL, NULL,			/* lockfunc, lockfuncarg */
1096 	    &sq->dma_tag)))
1097 		goto done;
1098 
1099 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1100 	if (err)
1101 		goto err_free_dma_tag;
1102 
1103 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1104 	    &sq->wq_ctrl);
1105 	if (err)
1106 		goto err_unmap_free_uar;
1107 
1108 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1109 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1110 
1111 	err = mlx5e_alloc_sq_db(sq);
1112 	if (err)
1113 		goto err_sq_wq_destroy;
1114 
1115 	sq->mkey_be = c->mkey_be;
1116 	sq->ifp = priv->ifp;
1117 	sq->priv = priv;
1118 	sq->tc = tc;
1119 
1120 	/* check if we should allocate a second packet buffer */
1121 	if (priv->params_ethtool.tx_bufring_disable == 0) {
1122 		sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1123 		    M_WAITOK, &sq->lock);
1124 		if (sq->br == NULL) {
1125 			if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1126 			    __func__);
1127 			err = -ENOMEM;
1128 			goto err_free_sq_db;
1129 		}
1130 
1131 		sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1132 		    taskqueue_thread_enqueue, &sq->sq_tq);
1133 		if (sq->sq_tq == NULL) {
1134 			if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1135 			    __func__);
1136 			err = -ENOMEM;
1137 			goto err_free_drbr;
1138 		}
1139 
1140 		TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1141 #ifdef RSS
1142 		cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1143 		CPU_SETOF(cpu_id, &cpu_mask);
1144 		taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1145 		    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1146 #else
1147 		taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1148 		    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1149 #endif
1150 	}
1151 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1152 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1153 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1154 	    sq->stats.arg);
1155 
1156 	return (0);
1157 
1158 err_free_drbr:
1159 	buf_ring_free(sq->br, M_MLX5EN);
1160 err_free_sq_db:
1161 	mlx5e_free_sq_db(sq);
1162 err_sq_wq_destroy:
1163 	mlx5_wq_destroy(&sq->wq_ctrl);
1164 
1165 err_unmap_free_uar:
1166 	mlx5_unmap_free_uar(mdev, &sq->uar);
1167 
1168 err_free_dma_tag:
1169 	bus_dma_tag_destroy(sq->dma_tag);
1170 done:
1171 	return (err);
1172 }
1173 
1174 static void
1175 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1176 {
1177 	/* destroy all sysctl nodes */
1178 	sysctl_ctx_free(&sq->stats.ctx);
1179 
1180 	mlx5e_free_sq_db(sq);
1181 	mlx5_wq_destroy(&sq->wq_ctrl);
1182 	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1183 	if (sq->sq_tq != NULL) {
1184 		taskqueue_drain(sq->sq_tq, &sq->sq_task);
1185 		taskqueue_free(sq->sq_tq);
1186 	}
1187 	if (sq->br != NULL)
1188 		buf_ring_free(sq->br, M_MLX5EN);
1189 }
1190 
1191 int
1192 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1193     int tis_num)
1194 {
1195 	void *in;
1196 	void *sqc;
1197 	void *wq;
1198 	int inlen;
1199 	int err;
1200 
1201 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1202 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1203 	in = mlx5_vzalloc(inlen);
1204 	if (in == NULL)
1205 		return (-ENOMEM);
1206 
1207 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1208 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1209 
1210 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1211 
1212 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1213 	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1214 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1215 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1216 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1217 
1218 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1219 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1220 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1221 	    PAGE_SHIFT);
1222 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1223 
1224 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1225 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1226 
1227 	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1228 
1229 	kvfree(in);
1230 
1231 	return (err);
1232 }
1233 
1234 int
1235 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1236 {
1237 	void *in;
1238 	void *sqc;
1239 	int inlen;
1240 	int err;
1241 
1242 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1243 	in = mlx5_vzalloc(inlen);
1244 	if (in == NULL)
1245 		return (-ENOMEM);
1246 
1247 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1248 
1249 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1250 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1251 	MLX5_SET(sqc, sqc, state, next_state);
1252 
1253 	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1254 
1255 	kvfree(in);
1256 
1257 	return (err);
1258 }
1259 
1260 void
1261 mlx5e_disable_sq(struct mlx5e_sq *sq)
1262 {
1263 
1264 	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1265 }
1266 
1267 static int
1268 mlx5e_open_sq(struct mlx5e_channel *c,
1269     int tc,
1270     struct mlx5e_sq_param *param,
1271     struct mlx5e_sq *sq)
1272 {
1273 	int err;
1274 
1275 	err = mlx5e_create_sq(c, tc, param, sq);
1276 	if (err)
1277 		return (err);
1278 
1279 	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1280 	if (err)
1281 		goto err_destroy_sq;
1282 
1283 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1284 	if (err)
1285 		goto err_disable_sq;
1286 
1287 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1288 
1289 	return (0);
1290 
1291 err_disable_sq:
1292 	mlx5e_disable_sq(sq);
1293 err_destroy_sq:
1294 	mlx5e_destroy_sq(sq);
1295 
1296 	return (err);
1297 }
1298 
1299 static void
1300 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1301 {
1302 	/* fill up remainder with NOPs */
1303 	while (sq->cev_counter != 0) {
1304 		while (!mlx5e_sq_has_room_for(sq, 1)) {
1305 			if (can_sleep != 0) {
1306 				mtx_unlock(&sq->lock);
1307 				msleep(4);
1308 				mtx_lock(&sq->lock);
1309 			} else {
1310 				goto done;
1311 			}
1312 		}
1313 		/* send a single NOP */
1314 		mlx5e_send_nop(sq, 1);
1315 		wmb();
1316 	}
1317 done:
1318 	/* Check if we need to write the doorbell */
1319 	if (likely(sq->doorbell.d64 != 0)) {
1320 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1321 		sq->doorbell.d64 = 0;
1322 	}
1323 }
1324 
1325 void
1326 mlx5e_sq_cev_timeout(void *arg)
1327 {
1328 	struct mlx5e_sq *sq = arg;
1329 
1330 	mtx_assert(&sq->lock, MA_OWNED);
1331 
1332 	/* check next state */
1333 	switch (sq->cev_next_state) {
1334 	case MLX5E_CEV_STATE_SEND_NOPS:
1335 		/* fill TX ring with NOPs, if any */
1336 		mlx5e_sq_send_nops_locked(sq, 0);
1337 
1338 		/* check if completed */
1339 		if (sq->cev_counter == 0) {
1340 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1341 			return;
1342 		}
1343 		break;
1344 	default:
1345 		/* send NOPs on next timeout */
1346 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1347 		break;
1348 	}
1349 
1350 	/* restart timer */
1351 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1352 }
1353 
1354 void
1355 mlx5e_drain_sq(struct mlx5e_sq *sq)
1356 {
1357 	int error;
1358 
1359 	/*
1360 	 * Check if already stopped.
1361 	 *
1362 	 * NOTE: The "stopped" variable is only written when both the
1363 	 * priv's configuration lock and the SQ's lock is locked. It
1364 	 * can therefore safely be read when only one of the two locks
1365 	 * is locked. This function is always called when the priv's
1366 	 * configuration lock is locked.
1367 	 */
1368 	if (sq->stopped != 0)
1369 		return;
1370 
1371 	mtx_lock(&sq->lock);
1372 
1373 	/* don't put more packets into the SQ */
1374 	sq->stopped = 1;
1375 
1376 	/* teardown event factor timer, if any */
1377 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1378 	callout_stop(&sq->cev_callout);
1379 
1380 	/* send dummy NOPs in order to flush the transmit ring */
1381 	mlx5e_sq_send_nops_locked(sq, 1);
1382 	mtx_unlock(&sq->lock);
1383 
1384 	/* make sure it is safe to free the callout */
1385 	callout_drain(&sq->cev_callout);
1386 
1387 	/* wait till SQ is empty or link is down */
1388 	mtx_lock(&sq->lock);
1389 	while (sq->cc != sq->pc &&
1390 	    (sq->priv->media_status_last & IFM_ACTIVE) != 0) {
1391 		mtx_unlock(&sq->lock);
1392 		msleep(1);
1393 		sq->cq.mcq.comp(&sq->cq.mcq);
1394 		mtx_lock(&sq->lock);
1395 	}
1396 	mtx_unlock(&sq->lock);
1397 
1398 	/* error out remaining requests */
1399 	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1400 	if (error != 0) {
1401 		if_printf(sq->ifp,
1402 		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1403 	}
1404 
1405 	/* wait till SQ is empty */
1406 	mtx_lock(&sq->lock);
1407 	while (sq->cc != sq->pc) {
1408 		mtx_unlock(&sq->lock);
1409 		msleep(1);
1410 		sq->cq.mcq.comp(&sq->cq.mcq);
1411 		mtx_lock(&sq->lock);
1412 	}
1413 	mtx_unlock(&sq->lock);
1414 }
1415 
1416 static void
1417 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1418 {
1419 
1420 	mlx5e_drain_sq(sq);
1421 	mlx5e_disable_sq(sq);
1422 	mlx5e_destroy_sq(sq);
1423 }
1424 
1425 static int
1426 mlx5e_create_cq(struct mlx5e_priv *priv,
1427     struct mlx5e_cq_param *param,
1428     struct mlx5e_cq *cq,
1429     mlx5e_cq_comp_t *comp,
1430     int eq_ix)
1431 {
1432 	struct mlx5_core_dev *mdev = priv->mdev;
1433 	struct mlx5_core_cq *mcq = &cq->mcq;
1434 	int eqn_not_used;
1435 	int irqn;
1436 	int err;
1437 	u32 i;
1438 
1439 	param->wq.buf_numa_node = 0;
1440 	param->wq.db_numa_node = 0;
1441 
1442 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1443 	    &cq->wq_ctrl);
1444 	if (err)
1445 		return (err);
1446 
1447 	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1448 
1449 	mcq->cqe_sz = 64;
1450 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1451 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1452 	*mcq->set_ci_db = 0;
1453 	*mcq->arm_db = 0;
1454 	mcq->vector = eq_ix;
1455 	mcq->comp = comp;
1456 	mcq->event = mlx5e_cq_error_event;
1457 	mcq->irqn = irqn;
1458 	mcq->uar = &priv->cq_uar;
1459 
1460 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1461 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1462 
1463 		cqe->op_own = 0xf1;
1464 	}
1465 
1466 	cq->priv = priv;
1467 
1468 	return (0);
1469 }
1470 
1471 static void
1472 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1473 {
1474 	mlx5_wq_destroy(&cq->wq_ctrl);
1475 }
1476 
1477 static int
1478 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1479 {
1480 	struct mlx5_core_cq *mcq = &cq->mcq;
1481 	void *in;
1482 	void *cqc;
1483 	int inlen;
1484 	int irqn_not_used;
1485 	int eqn;
1486 	int err;
1487 
1488 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1489 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1490 	in = mlx5_vzalloc(inlen);
1491 	if (in == NULL)
1492 		return (-ENOMEM);
1493 
1494 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1495 
1496 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1497 
1498 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1499 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1500 
1501 	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1502 
1503 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1504 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1505 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1506 	    PAGE_SHIFT);
1507 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1508 
1509 	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1510 
1511 	kvfree(in);
1512 
1513 	if (err)
1514 		return (err);
1515 
1516 	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1517 
1518 	return (0);
1519 }
1520 
1521 static void
1522 mlx5e_disable_cq(struct mlx5e_cq *cq)
1523 {
1524 
1525 	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1526 }
1527 
1528 int
1529 mlx5e_open_cq(struct mlx5e_priv *priv,
1530     struct mlx5e_cq_param *param,
1531     struct mlx5e_cq *cq,
1532     mlx5e_cq_comp_t *comp,
1533     int eq_ix)
1534 {
1535 	int err;
1536 
1537 	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1538 	if (err)
1539 		return (err);
1540 
1541 	err = mlx5e_enable_cq(cq, param, eq_ix);
1542 	if (err)
1543 		goto err_destroy_cq;
1544 
1545 	return (0);
1546 
1547 err_destroy_cq:
1548 	mlx5e_destroy_cq(cq);
1549 
1550 	return (err);
1551 }
1552 
1553 void
1554 mlx5e_close_cq(struct mlx5e_cq *cq)
1555 {
1556 	mlx5e_disable_cq(cq);
1557 	mlx5e_destroy_cq(cq);
1558 }
1559 
1560 static int
1561 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1562     struct mlx5e_channel_param *cparam)
1563 {
1564 	int err;
1565 	int tc;
1566 
1567 	for (tc = 0; tc < c->num_tc; tc++) {
1568 		/* open completion queue */
1569 		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1570 		    &mlx5e_tx_cq_comp, c->ix);
1571 		if (err)
1572 			goto err_close_tx_cqs;
1573 	}
1574 	return (0);
1575 
1576 err_close_tx_cqs:
1577 	for (tc--; tc >= 0; tc--)
1578 		mlx5e_close_cq(&c->sq[tc].cq);
1579 
1580 	return (err);
1581 }
1582 
1583 static void
1584 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1585 {
1586 	int tc;
1587 
1588 	for (tc = 0; tc < c->num_tc; tc++)
1589 		mlx5e_close_cq(&c->sq[tc].cq);
1590 }
1591 
1592 static int
1593 mlx5e_open_sqs(struct mlx5e_channel *c,
1594     struct mlx5e_channel_param *cparam)
1595 {
1596 	int err;
1597 	int tc;
1598 
1599 	for (tc = 0; tc < c->num_tc; tc++) {
1600 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1601 		if (err)
1602 			goto err_close_sqs;
1603 	}
1604 
1605 	return (0);
1606 
1607 err_close_sqs:
1608 	for (tc--; tc >= 0; tc--)
1609 		mlx5e_close_sq_wait(&c->sq[tc]);
1610 
1611 	return (err);
1612 }
1613 
1614 static void
1615 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1616 {
1617 	int tc;
1618 
1619 	for (tc = 0; tc < c->num_tc; tc++)
1620 		mlx5e_close_sq_wait(&c->sq[tc]);
1621 }
1622 
1623 static void
1624 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1625 {
1626 	int tc;
1627 
1628 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1629 
1630 	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1631 
1632 	for (tc = 0; tc < c->num_tc; tc++) {
1633 		struct mlx5e_sq *sq = c->sq + tc;
1634 
1635 		mtx_init(&sq->lock, "mlx5tx",
1636 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1637 		mtx_init(&sq->comp_lock, "mlx5comp",
1638 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1639 
1640 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1641 
1642 		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1643 
1644 		/* ensure the TX completion event factor is not zero */
1645 		if (sq->cev_factor == 0)
1646 			sq->cev_factor = 1;
1647 	}
1648 }
1649 
1650 static void
1651 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1652 {
1653 	int tc;
1654 
1655 	mtx_destroy(&c->rq.mtx);
1656 
1657 	for (tc = 0; tc < c->num_tc; tc++) {
1658 		mtx_destroy(&c->sq[tc].lock);
1659 		mtx_destroy(&c->sq[tc].comp_lock);
1660 	}
1661 }
1662 
1663 static int
1664 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1665     struct mlx5e_channel_param *cparam,
1666     struct mlx5e_channel *volatile *cp)
1667 {
1668 	struct mlx5e_channel *c;
1669 	int err;
1670 
1671 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1672 	c->priv = priv;
1673 	c->ix = ix;
1674 	c->cpu = 0;
1675 	c->ifp = priv->ifp;
1676 	c->mkey_be = cpu_to_be32(priv->mr.key);
1677 	c->num_tc = priv->num_tc;
1678 
1679 	/* init mutexes */
1680 	mlx5e_chan_mtx_init(c);
1681 
1682 	/* open transmit completion queue */
1683 	err = mlx5e_open_tx_cqs(c, cparam);
1684 	if (err)
1685 		goto err_free;
1686 
1687 	/* open receive completion queue */
1688 	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1689 	    &mlx5e_rx_cq_comp, c->ix);
1690 	if (err)
1691 		goto err_close_tx_cqs;
1692 
1693 	err = mlx5e_open_sqs(c, cparam);
1694 	if (err)
1695 		goto err_close_rx_cq;
1696 
1697 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1698 	if (err)
1699 		goto err_close_sqs;
1700 
1701 	/* store channel pointer */
1702 	*cp = c;
1703 
1704 	/* poll receive queue initially */
1705 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1706 
1707 	return (0);
1708 
1709 err_close_sqs:
1710 	mlx5e_close_sqs_wait(c);
1711 
1712 err_close_rx_cq:
1713 	mlx5e_close_cq(&c->rq.cq);
1714 
1715 err_close_tx_cqs:
1716 	mlx5e_close_tx_cqs(c);
1717 
1718 err_free:
1719 	/* destroy mutexes */
1720 	mlx5e_chan_mtx_destroy(c);
1721 	free(c, M_MLX5EN);
1722 	return (err);
1723 }
1724 
1725 static void
1726 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1727 {
1728 	struct mlx5e_channel *c = *pp;
1729 
1730 	/* check if channel is already closed */
1731 	if (c == NULL)
1732 		return;
1733 	mlx5e_close_rq(&c->rq);
1734 }
1735 
1736 static void
1737 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1738 {
1739 	struct mlx5e_channel *c = *pp;
1740 
1741 	/* check if channel is already closed */
1742 	if (c == NULL)
1743 		return;
1744 	/* ensure channel pointer is no longer used */
1745 	*pp = NULL;
1746 
1747 	mlx5e_close_rq_wait(&c->rq);
1748 	mlx5e_close_sqs_wait(c);
1749 	mlx5e_close_cq(&c->rq.cq);
1750 	mlx5e_close_tx_cqs(c);
1751 	/* destroy mutexes */
1752 	mlx5e_chan_mtx_destroy(c);
1753 	free(c, M_MLX5EN);
1754 }
1755 
1756 static void
1757 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1758     struct mlx5e_rq_param *param)
1759 {
1760 	void *rqc = param->rqc;
1761 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1762 
1763 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1764 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1765 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1766 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1767 	MLX5_SET(wq, wq, pd, priv->pdn);
1768 
1769 	param->wq.buf_numa_node = 0;
1770 	param->wq.db_numa_node = 0;
1771 	param->wq.linear = 1;
1772 }
1773 
1774 static void
1775 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1776     struct mlx5e_sq_param *param)
1777 {
1778 	void *sqc = param->sqc;
1779 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1780 
1781 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1782 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1783 	MLX5_SET(wq, wq, pd, priv->pdn);
1784 
1785 	param->wq.buf_numa_node = 0;
1786 	param->wq.db_numa_node = 0;
1787 	param->wq.linear = 1;
1788 }
1789 
1790 static void
1791 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1792     struct mlx5e_cq_param *param)
1793 {
1794 	void *cqc = param->cqc;
1795 
1796 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1797 }
1798 
1799 static void
1800 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1801     struct mlx5e_cq_param *param)
1802 {
1803 	void *cqc = param->cqc;
1804 
1805 
1806 	/*
1807 	 * TODO The sysctl to control on/off is a bool value for now, which means
1808 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1809 	 */
1810 	if (priv->params.cqe_zipping_en) {
1811 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1812 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1813 	}
1814 
1815 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1816 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1817 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1818 
1819 	switch (priv->params.rx_cq_moderation_mode) {
1820 	case 0:
1821 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1822 		break;
1823 	default:
1824 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1825 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1826 		else
1827 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1828 		break;
1829 	}
1830 
1831 	mlx5e_build_common_cq_param(priv, param);
1832 }
1833 
1834 static void
1835 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1836     struct mlx5e_cq_param *param)
1837 {
1838 	void *cqc = param->cqc;
1839 
1840 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1841 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1842 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1843 
1844 	switch (priv->params.tx_cq_moderation_mode) {
1845 	case 0:
1846 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1847 		break;
1848 	default:
1849 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1850 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1851 		else
1852 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1853 		break;
1854 	}
1855 
1856 	mlx5e_build_common_cq_param(priv, param);
1857 }
1858 
1859 static void
1860 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1861     struct mlx5e_channel_param *cparam)
1862 {
1863 	memset(cparam, 0, sizeof(*cparam));
1864 
1865 	mlx5e_build_rq_param(priv, &cparam->rq);
1866 	mlx5e_build_sq_param(priv, &cparam->sq);
1867 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1868 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1869 }
1870 
1871 static int
1872 mlx5e_open_channels(struct mlx5e_priv *priv)
1873 {
1874 	struct mlx5e_channel_param cparam;
1875 	void *ptr;
1876 	int err;
1877 	int i;
1878 	int j;
1879 
1880 	priv->channel = malloc(priv->params.num_channels *
1881 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1882 
1883 	mlx5e_build_channel_param(priv, &cparam);
1884 	for (i = 0; i < priv->params.num_channels; i++) {
1885 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1886 		if (err)
1887 			goto err_close_channels;
1888 	}
1889 
1890 	for (j = 0; j < priv->params.num_channels; j++) {
1891 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1892 		if (err)
1893 			goto err_close_channels;
1894 	}
1895 
1896 	return (0);
1897 
1898 err_close_channels:
1899 	for (i--; i >= 0; i--) {
1900 		mlx5e_close_channel(&priv->channel[i]);
1901 		mlx5e_close_channel_wait(&priv->channel[i]);
1902 	}
1903 
1904 	/* remove "volatile" attribute from "channel" pointer */
1905 	ptr = __DECONST(void *, priv->channel);
1906 	priv->channel = NULL;
1907 
1908 	free(ptr, M_MLX5EN);
1909 
1910 	return (err);
1911 }
1912 
1913 static void
1914 mlx5e_close_channels(struct mlx5e_priv *priv)
1915 {
1916 	void *ptr;
1917 	int i;
1918 
1919 	if (priv->channel == NULL)
1920 		return;
1921 
1922 	for (i = 0; i < priv->params.num_channels; i++)
1923 		mlx5e_close_channel(&priv->channel[i]);
1924 	for (i = 0; i < priv->params.num_channels; i++)
1925 		mlx5e_close_channel_wait(&priv->channel[i]);
1926 
1927 	/* remove "volatile" attribute from "channel" pointer */
1928 	ptr = __DECONST(void *, priv->channel);
1929 	priv->channel = NULL;
1930 
1931 	free(ptr, M_MLX5EN);
1932 }
1933 
1934 static int
1935 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1936 {
1937 
1938 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1939 		uint8_t cq_mode;
1940 
1941 		switch (priv->params.tx_cq_moderation_mode) {
1942 		case 0:
1943 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1944 			break;
1945 		default:
1946 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1947 			break;
1948 		}
1949 
1950 		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
1951 		    priv->params.tx_cq_moderation_usec,
1952 		    priv->params.tx_cq_moderation_pkts,
1953 		    cq_mode));
1954 	}
1955 
1956 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1957 	    priv->params.tx_cq_moderation_usec,
1958 	    priv->params.tx_cq_moderation_pkts));
1959 }
1960 
1961 static int
1962 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1963 {
1964 
1965 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1966 		uint8_t cq_mode;
1967 		int retval;
1968 
1969 		switch (priv->params.rx_cq_moderation_mode) {
1970 		case 0:
1971 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1972 			break;
1973 		default:
1974 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1975 			break;
1976 		}
1977 
1978 		retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
1979 		    priv->params.rx_cq_moderation_usec,
1980 		    priv->params.rx_cq_moderation_pkts,
1981 		    cq_mode);
1982 
1983 		return (retval);
1984 	}
1985 
1986 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1987 	    priv->params.rx_cq_moderation_usec,
1988 	    priv->params.rx_cq_moderation_pkts));
1989 }
1990 
1991 static int
1992 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1993 {
1994 	int err;
1995 	int i;
1996 
1997 	if (c == NULL)
1998 		return (EINVAL);
1999 
2000 	err = mlx5e_refresh_rq_params(priv, &c->rq);
2001 	if (err)
2002 		goto done;
2003 
2004 	for (i = 0; i != c->num_tc; i++) {
2005 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2006 		if (err)
2007 			goto done;
2008 	}
2009 done:
2010 	return (err);
2011 }
2012 
2013 int
2014 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2015 {
2016 	int i;
2017 
2018 	if (priv->channel == NULL)
2019 		return (EINVAL);
2020 
2021 	for (i = 0; i < priv->params.num_channels; i++) {
2022 		int err;
2023 
2024 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2025 		if (err)
2026 			return (err);
2027 	}
2028 	return (0);
2029 }
2030 
2031 static int
2032 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2033 {
2034 	struct mlx5_core_dev *mdev = priv->mdev;
2035 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2036 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2037 
2038 	memset(in, 0, sizeof(in));
2039 
2040 	MLX5_SET(tisc, tisc, prio, tc);
2041 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2042 
2043 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2044 }
2045 
2046 static void
2047 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2048 {
2049 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2050 }
2051 
2052 static int
2053 mlx5e_open_tises(struct mlx5e_priv *priv)
2054 {
2055 	int num_tc = priv->num_tc;
2056 	int err;
2057 	int tc;
2058 
2059 	for (tc = 0; tc < num_tc; tc++) {
2060 		err = mlx5e_open_tis(priv, tc);
2061 		if (err)
2062 			goto err_close_tises;
2063 	}
2064 
2065 	return (0);
2066 
2067 err_close_tises:
2068 	for (tc--; tc >= 0; tc--)
2069 		mlx5e_close_tis(priv, tc);
2070 
2071 	return (err);
2072 }
2073 
2074 static void
2075 mlx5e_close_tises(struct mlx5e_priv *priv)
2076 {
2077 	int num_tc = priv->num_tc;
2078 	int tc;
2079 
2080 	for (tc = 0; tc < num_tc; tc++)
2081 		mlx5e_close_tis(priv, tc);
2082 }
2083 
2084 static int
2085 mlx5e_open_rqt(struct mlx5e_priv *priv)
2086 {
2087 	struct mlx5_core_dev *mdev = priv->mdev;
2088 	u32 *in;
2089 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
2090 	void *rqtc;
2091 	int inlen;
2092 	int err;
2093 	int sz;
2094 	int i;
2095 
2096 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2097 
2098 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2099 	in = mlx5_vzalloc(inlen);
2100 	if (in == NULL)
2101 		return (-ENOMEM);
2102 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2103 
2104 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2105 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2106 
2107 	for (i = 0; i < sz; i++) {
2108 		int ix;
2109 #ifdef RSS
2110 		ix = rss_get_indirection_to_bucket(i);
2111 #else
2112 		ix = i;
2113 #endif
2114 		/* ensure we don't overflow */
2115 		ix %= priv->params.num_channels;
2116 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2117 	}
2118 
2119 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2120 
2121 	memset(out, 0, sizeof(out));
2122 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
2123 	if (!err)
2124 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2125 
2126 	kvfree(in);
2127 
2128 	return (err);
2129 }
2130 
2131 static void
2132 mlx5e_close_rqt(struct mlx5e_priv *priv)
2133 {
2134 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
2135 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
2136 
2137 	memset(in, 0, sizeof(in));
2138 
2139 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2140 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2141 
2142 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
2143 	    sizeof(out));
2144 }
2145 
2146 static void
2147 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2148 {
2149 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2150 	__be32 *hkey;
2151 
2152 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2153 
2154 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
2155 
2156 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2157 			  MLX5_HASH_FIELD_SEL_DST_IP)
2158 
2159 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2160 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2161 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2162 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2163 
2164 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2165 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2166 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2167 
2168 	if (priv->params.hw_lro_en) {
2169 		MLX5_SET(tirc, tirc, lro_enable_mask,
2170 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2171 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2172 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2173 		    (priv->params.lro_wqe_sz -
2174 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2175 		/* TODO: add the option to choose timer value dynamically */
2176 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2177 		    MLX5_CAP_ETH(priv->mdev,
2178 		    lro_timer_supported_periods[2]));
2179 	}
2180 
2181 	/* setup parameters for hashing TIR type, if any */
2182 	switch (tt) {
2183 	case MLX5E_TT_ANY:
2184 		MLX5_SET(tirc, tirc, disp_type,
2185 		    MLX5_TIRC_DISP_TYPE_DIRECT);
2186 		MLX5_SET(tirc, tirc, inline_rqn,
2187 		    priv->channel[0]->rq.rqn);
2188 		break;
2189 	default:
2190 		MLX5_SET(tirc, tirc, disp_type,
2191 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2192 		MLX5_SET(tirc, tirc, indirect_table,
2193 		    priv->rqtn);
2194 		MLX5_SET(tirc, tirc, rx_hash_fn,
2195 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2196 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2197 #ifdef RSS
2198 		/*
2199 		 * The FreeBSD RSS implementation does currently not
2200 		 * support symmetric Toeplitz hashes:
2201 		 */
2202 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2203 		rss_getkey((uint8_t *)hkey);
2204 #else
2205 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2206 		hkey[0] = cpu_to_be32(0xD181C62C);
2207 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2208 		hkey[2] = cpu_to_be32(0x1983A2FC);
2209 		hkey[3] = cpu_to_be32(0x943E1ADB);
2210 		hkey[4] = cpu_to_be32(0xD9389E6B);
2211 		hkey[5] = cpu_to_be32(0xD1039C2C);
2212 		hkey[6] = cpu_to_be32(0xA74499AD);
2213 		hkey[7] = cpu_to_be32(0x593D56D9);
2214 		hkey[8] = cpu_to_be32(0xF3253C06);
2215 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2216 #endif
2217 		break;
2218 	}
2219 
2220 	switch (tt) {
2221 	case MLX5E_TT_IPV4_TCP:
2222 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2223 		    MLX5_L3_PROT_TYPE_IPV4);
2224 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2225 		    MLX5_L4_PROT_TYPE_TCP);
2226 #ifdef RSS
2227 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2228 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2229 			    MLX5_HASH_IP);
2230 		} else
2231 #endif
2232 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2233 		    MLX5_HASH_ALL);
2234 		break;
2235 
2236 	case MLX5E_TT_IPV6_TCP:
2237 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2238 		    MLX5_L3_PROT_TYPE_IPV6);
2239 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2240 		    MLX5_L4_PROT_TYPE_TCP);
2241 #ifdef RSS
2242 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2243 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2244 			    MLX5_HASH_IP);
2245 		} else
2246 #endif
2247 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2248 		    MLX5_HASH_ALL);
2249 		break;
2250 
2251 	case MLX5E_TT_IPV4_UDP:
2252 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2253 		    MLX5_L3_PROT_TYPE_IPV4);
2254 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2255 		    MLX5_L4_PROT_TYPE_UDP);
2256 #ifdef RSS
2257 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2258 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2259 			    MLX5_HASH_IP);
2260 		} else
2261 #endif
2262 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2263 		    MLX5_HASH_ALL);
2264 		break;
2265 
2266 	case MLX5E_TT_IPV6_UDP:
2267 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2268 		    MLX5_L3_PROT_TYPE_IPV6);
2269 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2270 		    MLX5_L4_PROT_TYPE_UDP);
2271 #ifdef RSS
2272 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2273 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2274 			    MLX5_HASH_IP);
2275 		} else
2276 #endif
2277 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2278 		    MLX5_HASH_ALL);
2279 		break;
2280 
2281 	case MLX5E_TT_IPV4_IPSEC_AH:
2282 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2283 		    MLX5_L3_PROT_TYPE_IPV4);
2284 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2285 		    MLX5_HASH_IP_IPSEC_SPI);
2286 		break;
2287 
2288 	case MLX5E_TT_IPV6_IPSEC_AH:
2289 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2290 		    MLX5_L3_PROT_TYPE_IPV6);
2291 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2292 		    MLX5_HASH_IP_IPSEC_SPI);
2293 		break;
2294 
2295 	case MLX5E_TT_IPV4_IPSEC_ESP:
2296 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2297 		    MLX5_L3_PROT_TYPE_IPV4);
2298 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2299 		    MLX5_HASH_IP_IPSEC_SPI);
2300 		break;
2301 
2302 	case MLX5E_TT_IPV6_IPSEC_ESP:
2303 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2304 		    MLX5_L3_PROT_TYPE_IPV6);
2305 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2306 		    MLX5_HASH_IP_IPSEC_SPI);
2307 		break;
2308 
2309 	case MLX5E_TT_IPV4:
2310 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2311 		    MLX5_L3_PROT_TYPE_IPV4);
2312 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2313 		    MLX5_HASH_IP);
2314 		break;
2315 
2316 	case MLX5E_TT_IPV6:
2317 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2318 		    MLX5_L3_PROT_TYPE_IPV6);
2319 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2320 		    MLX5_HASH_IP);
2321 		break;
2322 
2323 	default:
2324 		break;
2325 	}
2326 }
2327 
2328 static int
2329 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2330 {
2331 	struct mlx5_core_dev *mdev = priv->mdev;
2332 	u32 *in;
2333 	void *tirc;
2334 	int inlen;
2335 	int err;
2336 
2337 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2338 	in = mlx5_vzalloc(inlen);
2339 	if (in == NULL)
2340 		return (-ENOMEM);
2341 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2342 
2343 	mlx5e_build_tir_ctx(priv, tirc, tt);
2344 
2345 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2346 
2347 	kvfree(in);
2348 
2349 	return (err);
2350 }
2351 
2352 static void
2353 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2354 {
2355 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2356 }
2357 
2358 static int
2359 mlx5e_open_tirs(struct mlx5e_priv *priv)
2360 {
2361 	int err;
2362 	int i;
2363 
2364 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2365 		err = mlx5e_open_tir(priv, i);
2366 		if (err)
2367 			goto err_close_tirs;
2368 	}
2369 
2370 	return (0);
2371 
2372 err_close_tirs:
2373 	for (i--; i >= 0; i--)
2374 		mlx5e_close_tir(priv, i);
2375 
2376 	return (err);
2377 }
2378 
2379 static void
2380 mlx5e_close_tirs(struct mlx5e_priv *priv)
2381 {
2382 	int i;
2383 
2384 	for (i = 0; i < MLX5E_NUM_TT; i++)
2385 		mlx5e_close_tir(priv, i);
2386 }
2387 
2388 /*
2389  * SW MTU does not include headers,
2390  * HW MTU includes all headers and checksums.
2391  */
2392 static int
2393 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2394 {
2395 	struct mlx5e_priv *priv = ifp->if_softc;
2396 	struct mlx5_core_dev *mdev = priv->mdev;
2397 	int hw_mtu;
2398 	int err;
2399 
2400 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2401 	if (err) {
2402 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2403 		    __func__, sw_mtu, err);
2404 		return (err);
2405 	}
2406 
2407 	ifp->if_mtu = sw_mtu;
2408 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2409 	if (err) {
2410 		if_printf(ifp, "Query port MTU, after setting new "
2411 		    "MTU value, failed\n");
2412 		return (err);
2413 	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2414 		err = -E2BIG,
2415 		if_printf(ifp, "Port MTU %d is smaller than "
2416                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2417 	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2418 		err = -EINVAL;
2419                 if_printf(ifp, "Port MTU %d is bigger than "
2420                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2421 	}
2422 	priv->params_ethtool.hw_mtu = hw_mtu;
2423 
2424 	return (err);
2425 }
2426 
2427 int
2428 mlx5e_open_locked(struct ifnet *ifp)
2429 {
2430 	struct mlx5e_priv *priv = ifp->if_softc;
2431 	int err;
2432 	u16 set_id;
2433 
2434 	/* check if already opened */
2435 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2436 		return (0);
2437 
2438 #ifdef RSS
2439 	if (rss_getnumbuckets() > priv->params.num_channels) {
2440 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2441 		    "channels(%u) available\n", rss_getnumbuckets(),
2442 		    priv->params.num_channels);
2443 	}
2444 #endif
2445 	err = mlx5e_open_tises(priv);
2446 	if (err) {
2447 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2448 		    __func__, err);
2449 		return (err);
2450 	}
2451 	err = mlx5_vport_alloc_q_counter(priv->mdev,
2452 	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2453 	if (err) {
2454 		if_printf(priv->ifp,
2455 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2456 		    __func__, err);
2457 		goto err_close_tises;
2458 	}
2459 	/* store counter set ID */
2460 	priv->counter_set_id = set_id;
2461 
2462 	err = mlx5e_open_channels(priv);
2463 	if (err) {
2464 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2465 		    __func__, err);
2466 		goto err_dalloc_q_counter;
2467 	}
2468 	err = mlx5e_open_rqt(priv);
2469 	if (err) {
2470 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2471 		    __func__, err);
2472 		goto err_close_channels;
2473 	}
2474 	err = mlx5e_open_tirs(priv);
2475 	if (err) {
2476 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2477 		    __func__, err);
2478 		goto err_close_rqls;
2479 	}
2480 	err = mlx5e_open_flow_table(priv);
2481 	if (err) {
2482 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2483 		    __func__, err);
2484 		goto err_close_tirs;
2485 	}
2486 	err = mlx5e_add_all_vlan_rules(priv);
2487 	if (err) {
2488 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2489 		    __func__, err);
2490 		goto err_close_flow_table;
2491 	}
2492 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2493 
2494 	mlx5e_update_carrier(priv);
2495 	mlx5e_set_rx_mode_core(priv);
2496 
2497 	return (0);
2498 
2499 err_close_flow_table:
2500 	mlx5e_close_flow_table(priv);
2501 
2502 err_close_tirs:
2503 	mlx5e_close_tirs(priv);
2504 
2505 err_close_rqls:
2506 	mlx5e_close_rqt(priv);
2507 
2508 err_close_channels:
2509 	mlx5e_close_channels(priv);
2510 
2511 err_dalloc_q_counter:
2512 	mlx5_vport_dealloc_q_counter(priv->mdev,
2513 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2514 
2515 err_close_tises:
2516 	mlx5e_close_tises(priv);
2517 
2518 	return (err);
2519 }
2520 
2521 static void
2522 mlx5e_open(void *arg)
2523 {
2524 	struct mlx5e_priv *priv = arg;
2525 
2526 	PRIV_LOCK(priv);
2527 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2528 		if_printf(priv->ifp,
2529 		    "%s: Setting port status to up failed\n",
2530 		    __func__);
2531 
2532 	mlx5e_open_locked(priv->ifp);
2533 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2534 	PRIV_UNLOCK(priv);
2535 }
2536 
2537 int
2538 mlx5e_close_locked(struct ifnet *ifp)
2539 {
2540 	struct mlx5e_priv *priv = ifp->if_softc;
2541 
2542 	/* check if already closed */
2543 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2544 		return (0);
2545 
2546 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2547 
2548 	mlx5e_set_rx_mode_core(priv);
2549 	mlx5e_del_all_vlan_rules(priv);
2550 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2551 	mlx5e_close_flow_table(priv);
2552 	mlx5e_close_tirs(priv);
2553 	mlx5e_close_rqt(priv);
2554 	mlx5e_close_channels(priv);
2555 	mlx5_vport_dealloc_q_counter(priv->mdev,
2556 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2557 	mlx5e_close_tises(priv);
2558 
2559 	return (0);
2560 }
2561 
2562 #if (__FreeBSD_version >= 1100000)
2563 static uint64_t
2564 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2565 {
2566 	struct mlx5e_priv *priv = ifp->if_softc;
2567 	u64 retval;
2568 
2569 	/* PRIV_LOCK(priv); XXX not allowed */
2570 	switch (cnt) {
2571 	case IFCOUNTER_IPACKETS:
2572 		retval = priv->stats.vport.rx_packets;
2573 		break;
2574 	case IFCOUNTER_IERRORS:
2575 		retval = priv->stats.vport.rx_error_packets +
2576 		    priv->stats.pport.alignment_err +
2577 		    priv->stats.pport.check_seq_err +
2578 		    priv->stats.pport.crc_align_errors +
2579 		    priv->stats.pport.in_range_len_errors +
2580 		    priv->stats.pport.jabbers +
2581 		    priv->stats.pport.out_of_range_len +
2582 		    priv->stats.pport.oversize_pkts +
2583 		    priv->stats.pport.symbol_err +
2584 		    priv->stats.pport.too_long_errors +
2585 		    priv->stats.pport.undersize_pkts +
2586 		    priv->stats.pport.unsupported_op_rx;
2587 		break;
2588 	case IFCOUNTER_IQDROPS:
2589 		retval = priv->stats.vport.rx_out_of_buffer +
2590 		    priv->stats.pport.drop_events;
2591 		break;
2592 	case IFCOUNTER_OPACKETS:
2593 		retval = priv->stats.vport.tx_packets;
2594 		break;
2595 	case IFCOUNTER_OERRORS:
2596 		retval = priv->stats.vport.tx_error_packets;
2597 		break;
2598 	case IFCOUNTER_IBYTES:
2599 		retval = priv->stats.vport.rx_bytes;
2600 		break;
2601 	case IFCOUNTER_OBYTES:
2602 		retval = priv->stats.vport.tx_bytes;
2603 		break;
2604 	case IFCOUNTER_IMCASTS:
2605 		retval = priv->stats.vport.rx_multicast_packets;
2606 		break;
2607 	case IFCOUNTER_OMCASTS:
2608 		retval = priv->stats.vport.tx_multicast_packets;
2609 		break;
2610 	case IFCOUNTER_OQDROPS:
2611 		retval = priv->stats.vport.tx_queue_dropped;
2612 		break;
2613 	case IFCOUNTER_COLLISIONS:
2614 		retval = priv->stats.pport.collisions;
2615 		break;
2616 	default:
2617 		retval = if_get_counter_default(ifp, cnt);
2618 		break;
2619 	}
2620 	/* PRIV_UNLOCK(priv); XXX not allowed */
2621 	return (retval);
2622 }
2623 #endif
2624 
2625 static void
2626 mlx5e_set_rx_mode(struct ifnet *ifp)
2627 {
2628 	struct mlx5e_priv *priv = ifp->if_softc;
2629 
2630 	schedule_work(&priv->set_rx_mode_work);
2631 }
2632 
2633 static int
2634 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2635 {
2636 	struct mlx5e_priv *priv;
2637 	struct ifreq *ifr;
2638 	struct ifi2creq i2c;
2639 	int error = 0;
2640 	int mask = 0;
2641 	int size_read = 0;
2642 	int module_num;
2643 	int max_mtu;
2644 	uint8_t read_addr;
2645 
2646 	priv = ifp->if_softc;
2647 
2648 	/* check if detaching */
2649 	if (priv == NULL || priv->gone != 0)
2650 		return (ENXIO);
2651 
2652 	switch (command) {
2653 	case SIOCSIFMTU:
2654 		ifr = (struct ifreq *)data;
2655 
2656 		PRIV_LOCK(priv);
2657 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2658 
2659 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2660 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2661 			int was_opened;
2662 
2663 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2664 			if (was_opened)
2665 				mlx5e_close_locked(ifp);
2666 
2667 			/* set new MTU */
2668 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2669 
2670 			if (was_opened)
2671 				mlx5e_open_locked(ifp);
2672 		} else {
2673 			error = EINVAL;
2674 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2675 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2676 		}
2677 		PRIV_UNLOCK(priv);
2678 		break;
2679 	case SIOCSIFFLAGS:
2680 		if ((ifp->if_flags & IFF_UP) &&
2681 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2682 			mlx5e_set_rx_mode(ifp);
2683 			break;
2684 		}
2685 		PRIV_LOCK(priv);
2686 		if (ifp->if_flags & IFF_UP) {
2687 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2688 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2689 					mlx5e_open_locked(ifp);
2690 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2691 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2692 			}
2693 		} else {
2694 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2695 				mlx5_set_port_status(priv->mdev,
2696 				    MLX5_PORT_DOWN);
2697 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2698 					mlx5e_close_locked(ifp);
2699 				mlx5e_update_carrier(priv);
2700 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2701 			}
2702 		}
2703 		PRIV_UNLOCK(priv);
2704 		break;
2705 	case SIOCADDMULTI:
2706 	case SIOCDELMULTI:
2707 		mlx5e_set_rx_mode(ifp);
2708 		break;
2709 	case SIOCSIFMEDIA:
2710 	case SIOCGIFMEDIA:
2711 	case SIOCGIFXMEDIA:
2712 		ifr = (struct ifreq *)data;
2713 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2714 		break;
2715 	case SIOCSIFCAP:
2716 		ifr = (struct ifreq *)data;
2717 		PRIV_LOCK(priv);
2718 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2719 
2720 		if (mask & IFCAP_TXCSUM) {
2721 			ifp->if_capenable ^= IFCAP_TXCSUM;
2722 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2723 
2724 			if (IFCAP_TSO4 & ifp->if_capenable &&
2725 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2726 				ifp->if_capenable &= ~IFCAP_TSO4;
2727 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2728 				if_printf(ifp,
2729 				    "tso4 disabled due to -txcsum.\n");
2730 			}
2731 		}
2732 		if (mask & IFCAP_TXCSUM_IPV6) {
2733 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2734 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2735 
2736 			if (IFCAP_TSO6 & ifp->if_capenable &&
2737 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2738 				ifp->if_capenable &= ~IFCAP_TSO6;
2739 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2740 				if_printf(ifp,
2741 				    "tso6 disabled due to -txcsum6.\n");
2742 			}
2743 		}
2744 		if (mask & IFCAP_RXCSUM)
2745 			ifp->if_capenable ^= IFCAP_RXCSUM;
2746 		if (mask & IFCAP_RXCSUM_IPV6)
2747 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2748 		if (mask & IFCAP_TSO4) {
2749 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2750 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2751 				if_printf(ifp, "enable txcsum first.\n");
2752 				error = EAGAIN;
2753 				goto out;
2754 			}
2755 			ifp->if_capenable ^= IFCAP_TSO4;
2756 			ifp->if_hwassist ^= CSUM_IP_TSO;
2757 		}
2758 		if (mask & IFCAP_TSO6) {
2759 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2760 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2761 				if_printf(ifp, "enable txcsum6 first.\n");
2762 				error = EAGAIN;
2763 				goto out;
2764 			}
2765 			ifp->if_capenable ^= IFCAP_TSO6;
2766 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2767 		}
2768 		if (mask & IFCAP_VLAN_HWFILTER) {
2769 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2770 				mlx5e_disable_vlan_filter(priv);
2771 			else
2772 				mlx5e_enable_vlan_filter(priv);
2773 
2774 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2775 		}
2776 		if (mask & IFCAP_VLAN_HWTAGGING)
2777 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2778 		if (mask & IFCAP_WOL_MAGIC)
2779 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2780 
2781 		VLAN_CAPABILITIES(ifp);
2782 		/* turn off LRO means also turn of HW LRO - if it's on */
2783 		if (mask & IFCAP_LRO) {
2784 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2785 			bool need_restart = false;
2786 
2787 			ifp->if_capenable ^= IFCAP_LRO;
2788 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2789 				if (priv->params.hw_lro_en) {
2790 					priv->params.hw_lro_en = false;
2791 					need_restart = true;
2792 					/* Not sure this is the correct way */
2793 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2794 				}
2795 			}
2796 			if (was_opened && need_restart) {
2797 				mlx5e_close_locked(ifp);
2798 				mlx5e_open_locked(ifp);
2799 			}
2800 		}
2801 		if (mask & IFCAP_HWRXTSTMP) {
2802 			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2803 			if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2804 				if (priv->clbr_done == 0)
2805 					mlx5e_reset_calibration_callout(priv);
2806 			} else {
2807 				callout_drain(&priv->tstmp_clbr);
2808 				priv->clbr_done = 0;
2809 			}
2810 		}
2811 out:
2812 		PRIV_UNLOCK(priv);
2813 		break;
2814 
2815 	case SIOCGI2C:
2816 		ifr = (struct ifreq *)data;
2817 
2818 		/*
2819 		 * Copy from the user-space address ifr_data to the
2820 		 * kernel-space address i2c
2821 		 */
2822 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2823 		if (error)
2824 			break;
2825 
2826 		if (i2c.len > sizeof(i2c.data)) {
2827 			error = EINVAL;
2828 			break;
2829 		}
2830 
2831 		PRIV_LOCK(priv);
2832 		/* Get module_num which is required for the query_eeprom */
2833 		error = mlx5_query_module_num(priv->mdev, &module_num);
2834 		if (error) {
2835 			if_printf(ifp, "Query module num failed, eeprom "
2836 			    "reading is not supported\n");
2837 			error = EINVAL;
2838 			goto err_i2c;
2839 		}
2840 		/* Check if module is present before doing an access */
2841 		if (mlx5_query_module_status(priv->mdev, module_num) !=
2842 		    MLX5_MODULE_STATUS_PLUGGED) {
2843 			error = EINVAL;
2844 			goto err_i2c;
2845 		}
2846 		/*
2847 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2848 		 * The internal conversion is as follows:
2849 		 */
2850 		if (i2c.dev_addr == 0xA0)
2851 			read_addr = MLX5E_I2C_ADDR_LOW;
2852 		else if (i2c.dev_addr == 0xA2)
2853 			read_addr = MLX5E_I2C_ADDR_HIGH;
2854 		else {
2855 			if_printf(ifp, "Query eeprom failed, "
2856 			    "Invalid Address: %X\n", i2c.dev_addr);
2857 			error = EINVAL;
2858 			goto err_i2c;
2859 		}
2860 		error = mlx5_query_eeprom(priv->mdev,
2861 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2862 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2863 		    (uint32_t *)i2c.data, &size_read);
2864 		if (error) {
2865 			if_printf(ifp, "Query eeprom failed, eeprom "
2866 			    "reading is not supported\n");
2867 			error = EINVAL;
2868 			goto err_i2c;
2869 		}
2870 
2871 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2872 			error = mlx5_query_eeprom(priv->mdev,
2873 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2874 			    (uint32_t)(i2c.offset + size_read),
2875 			    (uint32_t)(i2c.len - size_read), module_num,
2876 			    (uint32_t *)(i2c.data + size_read), &size_read);
2877 		}
2878 		if (error) {
2879 			if_printf(ifp, "Query eeprom failed, eeprom "
2880 			    "reading is not supported\n");
2881 			error = EINVAL;
2882 			goto err_i2c;
2883 		}
2884 
2885 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2886 err_i2c:
2887 		PRIV_UNLOCK(priv);
2888 		break;
2889 
2890 	default:
2891 		error = ether_ioctl(ifp, command, data);
2892 		break;
2893 	}
2894 	return (error);
2895 }
2896 
2897 static int
2898 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2899 {
2900 	/*
2901 	 * TODO: uncoment once FW really sets all these bits if
2902 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2903 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2904 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2905 	 * -ENOTSUPP;
2906 	 */
2907 
2908 	/* TODO: add more must-to-have features */
2909 
2910 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2911 		return (-ENODEV);
2912 
2913 	return (0);
2914 }
2915 
2916 static void
2917 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2918     struct mlx5e_priv *priv,
2919     int num_comp_vectors)
2920 {
2921 	/*
2922 	 * TODO: Consider link speed for setting "log_sq_size",
2923 	 * "log_rq_size" and "cq_moderation_xxx":
2924 	 */
2925 	priv->params.log_sq_size =
2926 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2927 	priv->params.log_rq_size =
2928 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2929 	priv->params.rx_cq_moderation_usec =
2930 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2931 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2932 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2933 	priv->params.rx_cq_moderation_mode =
2934 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2935 	priv->params.rx_cq_moderation_pkts =
2936 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2937 	priv->params.tx_cq_moderation_usec =
2938 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2939 	priv->params.tx_cq_moderation_pkts =
2940 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2941 	priv->params.min_rx_wqes =
2942 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2943 	priv->params.rx_hash_log_tbl_sz =
2944 	    (order_base_2(num_comp_vectors) >
2945 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2946 	    order_base_2(num_comp_vectors) :
2947 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2948 	priv->params.num_tc = 1;
2949 	priv->params.default_vlan_prio = 0;
2950 	priv->counter_set_id = -1;
2951 
2952 	/*
2953 	 * hw lro is currently defaulted to off. when it won't anymore we
2954 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2955 	 */
2956 	priv->params.hw_lro_en = false;
2957 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2958 
2959 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2960 
2961 	priv->mdev = mdev;
2962 	priv->params.num_channels = num_comp_vectors;
2963 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2964 	priv->queue_mapping_channel_mask =
2965 	    roundup_pow_of_two(num_comp_vectors) - 1;
2966 	priv->num_tc = priv->params.num_tc;
2967 	priv->default_vlan_prio = priv->params.default_vlan_prio;
2968 
2969 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2970 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2971 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2972 }
2973 
2974 static int
2975 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2976     struct mlx5_core_mr *mr)
2977 {
2978 	struct ifnet *ifp = priv->ifp;
2979 	struct mlx5_core_dev *mdev = priv->mdev;
2980 	struct mlx5_create_mkey_mbox_in *in;
2981 	int err;
2982 
2983 	in = mlx5_vzalloc(sizeof(*in));
2984 	if (in == NULL) {
2985 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2986 		return (-ENOMEM);
2987 	}
2988 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2989 	    MLX5_PERM_LOCAL_READ |
2990 	    MLX5_ACCESS_MODE_PA;
2991 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2992 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2993 
2994 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2995 	    NULL);
2996 	if (err)
2997 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2998 		    __func__, err);
2999 
3000 	kvfree(in);
3001 
3002 	return (err);
3003 }
3004 
3005 static const char *mlx5e_vport_stats_desc[] = {
3006 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3007 };
3008 
3009 static const char *mlx5e_pport_stats_desc[] = {
3010 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3011 };
3012 
3013 static void
3014 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3015 {
3016 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3017 	sx_init(&priv->state_lock, "mlx5state");
3018 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3019 	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3020 }
3021 
3022 static void
3023 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3024 {
3025 	mtx_destroy(&priv->async_events_mtx);
3026 	sx_destroy(&priv->state_lock);
3027 }
3028 
3029 static int
3030 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3031 {
3032 	/*
3033 	 * %d.%d%.d the string format.
3034 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3035 	 * We need at most 5 chars to store that.
3036 	 * It also has: two "." and NULL at the end, which means we need 18
3037 	 * (5*3 + 3) chars at most.
3038 	 */
3039 	char fw[18];
3040 	struct mlx5e_priv *priv = arg1;
3041 	int error;
3042 
3043 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3044 	    fw_rev_sub(priv->mdev));
3045 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3046 	return (error);
3047 }
3048 
3049 static void
3050 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3051 {
3052 	int i;
3053 
3054 	for (i = 0; i < ch->num_tc; i++)
3055 		mlx5e_drain_sq(&ch->sq[i]);
3056 }
3057 
3058 static void
3059 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3060 {
3061 
3062 	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3063 	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3064 	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3065 	sq->doorbell.d64 = 0;
3066 }
3067 
3068 void
3069 mlx5e_resume_sq(struct mlx5e_sq *sq)
3070 {
3071 	int err;
3072 
3073 	/* check if already enabled */
3074 	if (sq->stopped == 0)
3075 		return;
3076 
3077 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3078 	    MLX5_SQC_STATE_RST);
3079 	if (err != 0) {
3080 		if_printf(sq->ifp,
3081 		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3082 	}
3083 
3084 	sq->cc = 0;
3085 	sq->pc = 0;
3086 
3087 	/* reset doorbell prior to moving from RST to RDY */
3088 	mlx5e_reset_sq_doorbell_record(sq);
3089 
3090 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3091 	    MLX5_SQC_STATE_RDY);
3092 	if (err != 0) {
3093 		if_printf(sq->ifp,
3094 		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3095 	}
3096 
3097 	mtx_lock(&sq->lock);
3098 	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3099 	sq->stopped = 0;
3100 	mtx_unlock(&sq->lock);
3101 
3102 }
3103 
3104 static void
3105 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3106 {
3107         int i;
3108 
3109 	for (i = 0; i < ch->num_tc; i++)
3110 		mlx5e_resume_sq(&ch->sq[i]);
3111 }
3112 
3113 static void
3114 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3115 {
3116 	struct mlx5e_rq *rq = &ch->rq;
3117 	int err;
3118 
3119 	mtx_lock(&rq->mtx);
3120 	rq->enabled = 0;
3121 	callout_stop(&rq->watchdog);
3122 	mtx_unlock(&rq->mtx);
3123 
3124 	callout_drain(&rq->watchdog);
3125 
3126 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3127 	if (err != 0) {
3128 		if_printf(rq->ifp,
3129 		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3130 	}
3131 
3132 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3133 		msleep(1);
3134 		rq->cq.mcq.comp(&rq->cq.mcq);
3135 	}
3136 
3137 	/*
3138 	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3139 	 * thus reducing the recv queue flushing time
3140 	 */
3141 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3142 	if (err != 0) {
3143 		if_printf(rq->ifp,
3144 		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3145 	}
3146 }
3147 
3148 static void
3149 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3150 {
3151 	struct mlx5e_rq *rq = &ch->rq;
3152 	int err;
3153 
3154 	rq->wq.wqe_ctr = 0;
3155 	mlx5_wq_ll_update_db_record(&rq->wq);
3156 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3157 	if (err != 0) {
3158 		if_printf(rq->ifp,
3159 		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3160         }
3161 
3162 	rq->enabled = 1;
3163 
3164 	rq->cq.mcq.comp(&rq->cq.mcq);
3165 }
3166 
3167 void
3168 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3169 {
3170 	int i;
3171 
3172 	if (priv->channel == NULL)
3173 		return;
3174 
3175 	for (i = 0; i < priv->params.num_channels; i++) {
3176 
3177 		if (!priv->channel[i])
3178 			continue;
3179 
3180 		if (value)
3181 			mlx5e_disable_tx_dma(priv->channel[i]);
3182 		else
3183 			mlx5e_enable_tx_dma(priv->channel[i]);
3184 	}
3185 }
3186 
3187 void
3188 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3189 {
3190 	int i;
3191 
3192 	if (priv->channel == NULL)
3193 		return;
3194 
3195 	for (i = 0; i < priv->params.num_channels; i++) {
3196 
3197 		if (!priv->channel[i])
3198 			continue;
3199 
3200 		if (value)
3201 			mlx5e_disable_rx_dma(priv->channel[i]);
3202 		else
3203 			mlx5e_enable_rx_dma(priv->channel[i]);
3204 	}
3205 }
3206 
3207 static void
3208 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3209 {
3210 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3211 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3212 	    sysctl_firmware, "A", "HCA firmware version");
3213 
3214 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3215 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3216 	    "Board ID");
3217 }
3218 
3219 static void
3220 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3221 {
3222 #if (__FreeBSD_version < 1100000)
3223 	char path[64];
3224 
3225 #endif
3226 	/* Only receiving pauseframes is enabled by default */
3227 	priv->params.tx_pauseframe_control = 0;
3228 	priv->params.rx_pauseframe_control = 1;
3229 
3230 #if (__FreeBSD_version < 1100000)
3231 	/* compute path for sysctl */
3232 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3233 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3234 
3235 	/* try to fetch tunable, if any */
3236 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3237 
3238 	/* compute path for sysctl */
3239 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3240 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3241 
3242 	/* try to fetch tunable, if any */
3243 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3244 #endif
3245 
3246 	/* register pausframe SYSCTLs */
3247 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3248 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3249 	    &priv->params.tx_pauseframe_control, 0,
3250 	    "Set to enable TX pause frames. Clear to disable.");
3251 
3252 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3253 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3254 	    &priv->params.rx_pauseframe_control, 0,
3255 	    "Set to enable RX pause frames. Clear to disable.");
3256 
3257 	/* range check */
3258 	priv->params.tx_pauseframe_control =
3259 	    priv->params.tx_pauseframe_control ? 1 : 0;
3260 	priv->params.rx_pauseframe_control =
3261 	    priv->params.rx_pauseframe_control ? 1 : 0;
3262 
3263 	/* update firmware */
3264 	mlx5_set_port_pause(priv->mdev, 1,
3265 	    priv->params.rx_pauseframe_control,
3266 	    priv->params.tx_pauseframe_control);
3267 }
3268 
3269 static void *
3270 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3271 {
3272 	static volatile int mlx5_en_unit;
3273 	struct ifnet *ifp;
3274 	struct mlx5e_priv *priv;
3275 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3276 	struct sysctl_oid_list *child;
3277 	int ncv = mdev->priv.eq_table.num_comp_vectors;
3278 	char unit[16];
3279 	int err;
3280 	int i;
3281 	u32 eth_proto_cap;
3282 
3283 	if (mlx5e_check_required_hca_cap(mdev)) {
3284 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3285 		return (NULL);
3286 	}
3287 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3288 	mlx5e_priv_mtx_init(priv);
3289 
3290 	ifp = priv->ifp = if_alloc(IFT_ETHER);
3291 	if (ifp == NULL) {
3292 		mlx5_core_err(mdev, "if_alloc() failed\n");
3293 		goto err_free_priv;
3294 	}
3295 	ifp->if_softc = priv;
3296 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
3297 	ifp->if_mtu = ETHERMTU;
3298 	ifp->if_init = mlx5e_open;
3299 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3300 	ifp->if_ioctl = mlx5e_ioctl;
3301 	ifp->if_transmit = mlx5e_xmit;
3302 	ifp->if_qflush = if_qflush;
3303 #if (__FreeBSD_version >= 1100000)
3304 	ifp->if_get_counter = mlx5e_get_counter;
3305 #endif
3306 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3307 	/*
3308          * Set driver features
3309          */
3310 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3311 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3312 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3313 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3314 	ifp->if_capabilities |= IFCAP_LRO;
3315 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3316 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3317 
3318 	/* set TSO limits so that we don't have to drop TX packets */
3319 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3320 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3321 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3322 
3323 	ifp->if_capenable = ifp->if_capabilities;
3324 	ifp->if_hwassist = 0;
3325 	if (ifp->if_capenable & IFCAP_TSO)
3326 		ifp->if_hwassist |= CSUM_TSO;
3327 	if (ifp->if_capenable & IFCAP_TXCSUM)
3328 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3329 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3330 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3331 
3332 	/* ifnet sysctl tree */
3333 	sysctl_ctx_init(&priv->sysctl_ctx);
3334 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3335 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3336 	if (priv->sysctl_ifnet == NULL) {
3337 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3338 		goto err_free_sysctl;
3339 	}
3340 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3341 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3342 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3343 	if (priv->sysctl_ifnet == NULL) {
3344 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3345 		goto err_free_sysctl;
3346 	}
3347 
3348 	/* HW sysctl tree */
3349 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3350 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3351 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3352 	if (priv->sysctl_hw == NULL) {
3353 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3354 		goto err_free_sysctl;
3355 	}
3356 	mlx5e_build_ifp_priv(mdev, priv, ncv);
3357 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3358 	if (err) {
3359 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3360 		    __func__, err);
3361 		goto err_free_sysctl;
3362 	}
3363 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3364 	if (err) {
3365 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3366 		    __func__, err);
3367 		goto err_unmap_free_uar;
3368 	}
3369 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3370 	if (err) {
3371 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3372 		    __func__, err);
3373 		goto err_dealloc_pd;
3374 	}
3375 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3376 	if (err) {
3377 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3378 		    __func__, err);
3379 		goto err_dealloc_transport_domain;
3380 	}
3381 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3382 
3383 	/* check if we should generate a random MAC address */
3384 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3385 	    is_zero_ether_addr(dev_addr)) {
3386 		random_ether_addr(dev_addr);
3387 		if_printf(ifp, "Assigned random MAC address\n");
3388 	}
3389 
3390 	/* set default MTU */
3391 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3392 
3393 	/* Set desc */
3394 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3395 
3396 	/* Set default media status */
3397 	priv->media_status_last = IFM_AVALID;
3398 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3399 	    IFM_ETH_RXPAUSE | IFM_FDX;
3400 
3401 	/* setup default pauseframes configuration */
3402 	mlx5e_setup_pauseframes(priv);
3403 
3404 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3405 	if (err) {
3406 		eth_proto_cap = 0;
3407 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3408 		    __func__, err);
3409 	}
3410 
3411 	/* Setup supported medias */
3412 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3413 	    mlx5e_media_change, mlx5e_media_status);
3414 
3415 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3416 		if (mlx5e_mode_table[i].baudrate == 0)
3417 			continue;
3418 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3419 			ifmedia_add(&priv->media,
3420 			    mlx5e_mode_table[i].subtype |
3421 			    IFM_ETHER, 0, NULL);
3422 			ifmedia_add(&priv->media,
3423 			    mlx5e_mode_table[i].subtype |
3424 			    IFM_ETHER | IFM_FDX |
3425 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3426 		}
3427 	}
3428 
3429 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3430 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3431 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3432 
3433 	/* Set autoselect by default */
3434 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3435 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3436 	ether_ifattach(ifp, dev_addr);
3437 
3438 	/* Register for VLAN events */
3439 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3440 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3441 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3442 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3443 
3444 	/* Link is down by default */
3445 	if_link_state_change(ifp, LINK_STATE_DOWN);
3446 
3447 	mlx5e_enable_async_events(priv);
3448 
3449 	mlx5e_add_hw_stats(priv);
3450 
3451 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3452 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3453 	    priv->stats.vport.arg);
3454 
3455 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3456 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3457 	    priv->stats.pport.arg);
3458 
3459 	mlx5e_create_ethtool(priv);
3460 
3461 	mtx_lock(&priv->async_events_mtx);
3462 	mlx5e_update_stats(priv);
3463 	mtx_unlock(&priv->async_events_mtx);
3464 
3465 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3466 	    OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3467 	    &priv->clbr_done, 0,
3468 	    "RX timestamps calibration state");
3469 	callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3470 	mlx5e_reset_calibration_callout(priv);
3471 
3472 	return (priv);
3473 
3474 err_dealloc_transport_domain:
3475 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3476 
3477 err_dealloc_pd:
3478 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3479 
3480 err_unmap_free_uar:
3481 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3482 
3483 err_free_sysctl:
3484 	sysctl_ctx_free(&priv->sysctl_ctx);
3485 
3486 	if_free(ifp);
3487 
3488 err_free_priv:
3489 	mlx5e_priv_mtx_destroy(priv);
3490 	free(priv, M_MLX5EN);
3491 	return (NULL);
3492 }
3493 
3494 static void
3495 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3496 {
3497 	struct mlx5e_priv *priv = vpriv;
3498 	struct ifnet *ifp = priv->ifp;
3499 
3500 	/* don't allow more IOCTLs */
3501 	priv->gone = 1;
3502 
3503 	/*
3504 	 * Clear the device description to avoid use after free,
3505 	 * because the bsddev is not destroyed when this module is
3506 	 * unloaded:
3507 	 */
3508 	device_set_desc(mdev->pdev->dev.bsddev, NULL);
3509 
3510 	/* XXX wait a bit to allow IOCTL handlers to complete */
3511 	pause("W", hz);
3512 
3513 	/* stop watchdog timer */
3514 	callout_drain(&priv->watchdog);
3515 
3516 	callout_drain(&priv->tstmp_clbr);
3517 
3518 	if (priv->vlan_attach != NULL)
3519 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3520 	if (priv->vlan_detach != NULL)
3521 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3522 
3523 	/* make sure device gets closed */
3524 	PRIV_LOCK(priv);
3525 	mlx5e_close_locked(ifp);
3526 	PRIV_UNLOCK(priv);
3527 
3528 	/* unregister device */
3529 	ifmedia_removeall(&priv->media);
3530 	ether_ifdetach(ifp);
3531 	if_free(ifp);
3532 
3533 	/* destroy all remaining sysctl nodes */
3534 	if (priv->sysctl_debug)
3535 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3536 	sysctl_ctx_free(&priv->stats.vport.ctx);
3537 	sysctl_ctx_free(&priv->stats.pport.ctx);
3538 	sysctl_ctx_free(&priv->sysctl_ctx);
3539 
3540 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3541 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3542 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3543 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3544 	mlx5e_disable_async_events(priv);
3545 	flush_scheduled_work();
3546 	mlx5e_priv_mtx_destroy(priv);
3547 	free(priv, M_MLX5EN);
3548 }
3549 
3550 static void *
3551 mlx5e_get_ifp(void *vpriv)
3552 {
3553 	struct mlx5e_priv *priv = vpriv;
3554 
3555 	return (priv->ifp);
3556 }
3557 
3558 static struct mlx5_interface mlx5e_interface = {
3559 	.add = mlx5e_create_ifp,
3560 	.remove = mlx5e_destroy_ifp,
3561 	.event = mlx5e_async_event,
3562 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3563 	.get_dev = mlx5e_get_ifp,
3564 };
3565 
3566 void
3567 mlx5e_init(void)
3568 {
3569 	mlx5_register_interface(&mlx5e_interface);
3570 }
3571 
3572 void
3573 mlx5e_cleanup(void)
3574 {
3575 	mlx5_unregister_interface(&mlx5e_interface);
3576 }
3577 
3578 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3579 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3580 
3581 #if (__FreeBSD_version >= 1100000)
3582 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3583 #endif
3584 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3585 MODULE_VERSION(mlx5en, 1);
3586